1
linux/arch/powerpc/include/asm/imc-pmu.h

173 lines
3.9 KiB
C
Raw Normal View History

/* SPDX-License-Identifier: GPL-2.0-or-later */
#ifndef __ASM_POWERPC_IMC_PMU_H
#define __ASM_POWERPC_IMC_PMU_H
/*
* IMC Nest Performance Monitor counter support.
*
* Copyright (C) 2017 Madhavan Srinivasan, IBM Corporation.
* (C) 2017 Anju T Sudhakar, IBM Corporation.
* (C) 2017 Hemant K Shaw, IBM Corporation.
*/
#include <linux/perf_event.h>
#include <linux/slab.h>
#include <linux/of.h>
#include <linux/io.h>
#include <asm/opal.h>
/*
* Compatibility macros for IMC devices
*/
#define IMC_DTB_COMPAT "ibm,opal-in-memory-counters"
#define IMC_DTB_UNIT_COMPAT "ibm,imc-counters"
/*
* LDBAR: Counter address and Enable/Disable macro.
* perf/imc-pmu.c has the LDBAR layout information.
*/
#define THREAD_IMC_LDBAR_MASK 0x0003ffffffffe000ULL
#define THREAD_IMC_ENABLE 0x8000000000000000ULL
#define TRACE_IMC_ENABLE 0x4000000000000000ULL
powerpc/powernv: Add debugfs interface for imc-mode and imc-command In memory Collection (IMC) counter pmu driver controls the ucode's execution state. At the system boot, IMC perf driver pause the ucode. Ucode state is changed to "running" only when any of the nest units are monitored or profiled using perf tool. Nest units support only limited set of hardware counters and ucode is always programmed in the "production mode" ("accumulation") mode. This mode is configured to provide key performance metric data for most of the nest units. But ucode also supports other modes which would be used for "debug" to drill down specific nest units. That is, ucode when switched to "powerbus" debug mode (for example), will dynamically reconfigure the nest counters to target only "powerbus" related events in the hardware counters. This allows the IMC nest unit to focus on powerbus related transactions in the system in more detail. At this point, production mode events may or may not be counted. IMC nest counters has both in-band (ucode access) and out of band access to it. Since not all nest counter configurations are supported by ucode, out of band tools are used to characterize other nest counter configurations. Patch provides an interface via "debugfs" to enable the switching of ucode modes in the system. To switch ucode mode, one has to first pause the microcode (imc_cmd), and then write the target mode value to the "imc_mode" file. Proposed Approach: In the proposed approach, the function (export_imc_mode_and_cmd) which creates the debugfs interface for imc mode and command is implemented in opal-imc.c. Thus we can use imc_get_mem_addr() to get the homer base address for each chip. The interface to expose imc mode and command is required only if we have nest pmu units registered. Employing the existing data structures to track whether we have any nest units registered will require to extend data from perf side to opal-imc.c. Instead an integer is introduced to hold that information by counting successful nest unit registration. Debugfs interface is removed based on the integer count. Example for the interface: $ ls /sys/kernel/debug/imc imc_cmd_0 imc_cmd_8 imc_mode_0 imc_mode_8 Signed-off-by: Anju T Sudhakar <anju@linux.vnet.ibm.com> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
2017-12-12 23:09:54 -07:00
/*
* For debugfs interface for imc-mode and imc-command
*/
#define IMC_CNTL_BLK_OFFSET 0x3FC00
#define IMC_CNTL_BLK_CMD_OFFSET 8
#define IMC_CNTL_BLK_MODE_OFFSET 32
/*
* Structure to hold memory address information for imc units.
*/
struct imc_mem_info {
u64 *vbase;
u32 id;
};
/*
* Place holder for nest pmu events and values.
*/
struct imc_events {
u32 value;
char *name;
char *unit;
char *scale;
};
/*
* Trace IMC hardware updates a 64bytes record on
* Core Performance Monitoring Counter (CPMC)
* overflow. Here is the layout for the trace imc record
*
* DW 0 : Timebase
* DW 1 : Program Counter
* DW 2 : PIDR information
* DW 3 : CPMC1
* DW 4 : CPMC2
* DW 5 : CPMC3
* Dw 6 : CPMC4
* DW 7 : Timebase
* .....
*
* The following is the data structure to hold trace imc data.
*/
struct trace_imc_data {
__be64 tb1;
__be64 ip;
__be64 val;
__be64 cpmc1;
__be64 cpmc2;
__be64 cpmc3;
__be64 cpmc4;
__be64 tb2;
};
/* Event attribute array index */
#define IMC_FORMAT_ATTR 0
#define IMC_EVENT_ATTR 1
#define IMC_CPUMASK_ATTR 2
#define IMC_NULL_ATTR 3
/* PMU Format attribute macros */
#define IMC_EVENT_OFFSET_MASK 0xffffffffULL
/*
* Macro to mask bits 0:21 of first double word(which is the timebase) to
* compare with 8th double word (timebase) of trace imc record data.
*/
#define IMC_TRACE_RECORD_TB1_MASK 0x3ffffffffffULL
/*
* Bit 0:1 in third DW of IMC trace record
* specifies the MSR[HV PR] values.
*/
#define IMC_TRACE_RECORD_VAL_HVPR(x) ((x) >> 62)
/*
* Device tree parser code detects IMC pmu support and
* registers new IMC pmus. This structure will hold the
* pmu functions, events, counter memory information
* and attrs for each imc pmu and will be referenced at
* the time of pmu registration.
*/
struct imc_pmu {
struct pmu pmu;
struct imc_mem_info *mem_info;
struct imc_events *events;
/*
* Attribute groups for the PMU. Slot 0 used for
* format attribute, slot 1 used for cpusmask attribute,
* slot 2 used for event attribute. Slot 3 keep as
* NULL.
*/
const struct attribute_group *attr_groups[4];
u32 counter_mem_size;
int domain;
/*
* flag to notify whether the memory is mmaped
* or allocated by kernel.
*/
bool imc_counter_mmaped;
};
/*
* Structure to hold id, lock and reference count for the imc events which
* are inited.
*/
struct imc_pmu_ref {
powerpc/imc-pmu: Fix use of mutex in IRQs disabled section Current imc-pmu code triggers a WARNING with CONFIG_DEBUG_ATOMIC_SLEEP and CONFIG_PROVE_LOCKING enabled, while running a thread_imc event. Command to trigger the warning: # perf stat -e thread_imc/CPM_CS_FROM_L4_MEM_X_DPTEG/ sleep 5 Performance counter stats for 'sleep 5': 0 thread_imc/CPM_CS_FROM_L4_MEM_X_DPTEG/ 5.002117947 seconds time elapsed 0.000131000 seconds user 0.001063000 seconds sys Below is snippet of the warning in dmesg: BUG: sleeping function called from invalid context at kernel/locking/mutex.c:580 in_atomic(): 1, irqs_disabled(): 1, non_block: 0, pid: 2869, name: perf-exec preempt_count: 2, expected: 0 4 locks held by perf-exec/2869: #0: c00000004325c540 (&sig->cred_guard_mutex){+.+.}-{3:3}, at: bprm_execve+0x64/0xa90 #1: c00000004325c5d8 (&sig->exec_update_lock){++++}-{3:3}, at: begin_new_exec+0x460/0xef0 #2: c0000003fa99d4e0 (&cpuctx_lock){-...}-{2:2}, at: perf_event_exec+0x290/0x510 #3: c000000017ab8418 (&ctx->lock){....}-{2:2}, at: perf_event_exec+0x29c/0x510 irq event stamp: 4806 hardirqs last enabled at (4805): [<c000000000f65b94>] _raw_spin_unlock_irqrestore+0x94/0xd0 hardirqs last disabled at (4806): [<c0000000003fae44>] perf_event_exec+0x394/0x510 softirqs last enabled at (0): [<c00000000013c404>] copy_process+0xc34/0x1ff0 softirqs last disabled at (0): [<0000000000000000>] 0x0 CPU: 36 PID: 2869 Comm: perf-exec Not tainted 6.2.0-rc2-00011-g1247637727f2 #61 Hardware name: 8375-42A POWER9 0x4e1202 opal:v7.0-16-g9b85f7d961 PowerNV Call Trace: dump_stack_lvl+0x98/0xe0 (unreliable) __might_resched+0x2f8/0x310 __mutex_lock+0x6c/0x13f0 thread_imc_event_add+0xf4/0x1b0 event_sched_in+0xe0/0x210 merge_sched_in+0x1f0/0x600 visit_groups_merge.isra.92.constprop.166+0x2bc/0x6c0 ctx_flexible_sched_in+0xcc/0x140 ctx_sched_in+0x20c/0x2a0 ctx_resched+0x104/0x1c0 perf_event_exec+0x340/0x510 begin_new_exec+0x730/0xef0 load_elf_binary+0x3f8/0x1e10 ... do not call blocking ops when !TASK_RUNNING; state=2001 set at [<00000000fd63e7cf>] do_nanosleep+0x60/0x1a0 WARNING: CPU: 36 PID: 2869 at kernel/sched/core.c:9912 __might_sleep+0x9c/0xb0 CPU: 36 PID: 2869 Comm: sleep Tainted: G W 6.2.0-rc2-00011-g1247637727f2 #61 Hardware name: 8375-42A POWER9 0x4e1202 opal:v7.0-16-g9b85f7d961 PowerNV NIP: c000000000194a1c LR: c000000000194a18 CTR: c000000000a78670 REGS: c00000004d2134e0 TRAP: 0700 Tainted: G W (6.2.0-rc2-00011-g1247637727f2) MSR: 9000000000021033 <SF,HV,ME,IR,DR,RI,LE> CR: 48002824 XER: 00000000 CFAR: c00000000013fb64 IRQMASK: 1 The above warning triggered because the current imc-pmu code uses mutex lock in interrupt disabled sections. The function mutex_lock() internally calls __might_resched(), which will check if IRQs are disabled and in case IRQs are disabled, it will trigger the warning. Fix the issue by changing the mutex lock to spinlock. Fixes: 8f95faaac56c ("powerpc/powernv: Detect and create IMC device") Reported-by: Michael Petlan <mpetlan@redhat.com> Reported-by: Peter Zijlstra <peterz@infradead.org> Signed-off-by: Kajol Jain <kjain@linux.ibm.com> [mpe: Fix comments, trim oops in change log, add reported-by tags] Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://lore.kernel.org/r/20230106065157.182648-1-kjain@linux.ibm.com
2023-01-05 23:51:57 -07:00
spinlock_t lock;
unsigned int id;
int refc;
};
/*
* In-Memory Collection Counters type.
* Data comes from Device tree.
* Three device type are supported.
*/
enum {
IMC_TYPE_THREAD = 0x1,
IMC_TYPE_TRACE = 0x2,
IMC_TYPE_CORE = 0x4,
IMC_TYPE_CHIP = 0x10,
};
/*
* Domains for IMC PMUs
*/
#define IMC_DOMAIN_NEST 1
#define IMC_DOMAIN_CORE 2
#define IMC_DOMAIN_THREAD 3
/* For trace-imc the domain is still thread but it operates in trace-mode */
#define IMC_DOMAIN_TRACE 4
extern int init_imc_pmu(struct device_node *parent,
struct imc_pmu *pmu_ptr, int pmu_id);
extern void thread_imc_disable(void);
extern int get_max_nest_dev(void);
extern void unregister_thread_imc(void);
#endif /* __ASM_POWERPC_IMC_PMU_H */