54aa699e80
Fix typos, most reported by "codespell arch/x86". Only touches comments, no code changes. Signed-off-by: Bjorn Helgaas <bhelgaas@google.com> Signed-off-by: Ingo Molnar <mingo@kernel.org> Reviewed-by: Randy Dunlap <rdunlap@infradead.org> Link: https://lore.kernel.org/r/20240103004011.1758650-1-helgaas@kernel.org
432 lines
9.9 KiB
C
432 lines
9.9 KiB
C
// SPDX-License-Identifier: GPL-2.0
|
|
/*
|
|
* Implement support for AMD Fam19h Branch Sampling feature
|
|
* Based on specifications published in AMD PPR Fam19 Model 01
|
|
*
|
|
* Copyright 2021 Google LLC
|
|
* Contributed by Stephane Eranian <eranian@google.com>
|
|
*/
|
|
#include <linux/kernel.h>
|
|
#include <linux/jump_label.h>
|
|
#include <asm/msr.h>
|
|
#include <asm/cpufeature.h>
|
|
|
|
#include "../perf_event.h"
|
|
|
|
#define BRS_POISON 0xFFFFFFFFFFFFFFFEULL /* mark limit of valid entries */
|
|
|
|
/* Debug Extension Configuration register layout */
|
|
union amd_debug_extn_cfg {
|
|
__u64 val;
|
|
struct {
|
|
__u64 rsvd0:2, /* reserved */
|
|
brsmen:1, /* branch sample enable */
|
|
rsvd4_3:2,/* reserved - must be 0x3 */
|
|
vb:1, /* valid branches recorded */
|
|
rsvd2:10, /* reserved */
|
|
msroff:4, /* index of next entry to write */
|
|
rsvd3:4, /* reserved */
|
|
pmc:3, /* #PMC holding the sampling event */
|
|
rsvd4:37; /* reserved */
|
|
};
|
|
};
|
|
|
|
static inline unsigned int brs_from(int idx)
|
|
{
|
|
return MSR_AMD_SAMP_BR_FROM + 2 * idx;
|
|
}
|
|
|
|
static inline unsigned int brs_to(int idx)
|
|
{
|
|
return MSR_AMD_SAMP_BR_FROM + 2 * idx + 1;
|
|
}
|
|
|
|
static __always_inline void set_debug_extn_cfg(u64 val)
|
|
{
|
|
/* bits[4:3] must always be set to 11b */
|
|
__wrmsr(MSR_AMD_DBG_EXTN_CFG, val | 3ULL << 3, val >> 32);
|
|
}
|
|
|
|
static __always_inline u64 get_debug_extn_cfg(void)
|
|
{
|
|
return __rdmsr(MSR_AMD_DBG_EXTN_CFG);
|
|
}
|
|
|
|
static bool __init amd_brs_detect(void)
|
|
{
|
|
if (!cpu_feature_enabled(X86_FEATURE_BRS))
|
|
return false;
|
|
|
|
switch (boot_cpu_data.x86) {
|
|
case 0x19: /* AMD Fam19h (Zen3) */
|
|
x86_pmu.lbr_nr = 16;
|
|
|
|
/* No hardware filtering supported */
|
|
x86_pmu.lbr_sel_map = NULL;
|
|
x86_pmu.lbr_sel_mask = 0;
|
|
break;
|
|
default:
|
|
return false;
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
/*
|
|
* Current BRS implementation does not support branch type or privilege level
|
|
* filtering. Therefore, this function simply enforces these limitations. No need for
|
|
* a br_sel_map. Software filtering is not supported because it would not correlate well
|
|
* with a sampling period.
|
|
*/
|
|
static int amd_brs_setup_filter(struct perf_event *event)
|
|
{
|
|
u64 type = event->attr.branch_sample_type;
|
|
|
|
/* No BRS support */
|
|
if (!x86_pmu.lbr_nr)
|
|
return -EOPNOTSUPP;
|
|
|
|
/* Can only capture all branches, i.e., no filtering */
|
|
if ((type & ~PERF_SAMPLE_BRANCH_PLM_ALL) != PERF_SAMPLE_BRANCH_ANY)
|
|
return -EINVAL;
|
|
|
|
return 0;
|
|
}
|
|
|
|
static inline int amd_is_brs_event(struct perf_event *e)
|
|
{
|
|
return (e->hw.config & AMD64_RAW_EVENT_MASK) == AMD_FAM19H_BRS_EVENT;
|
|
}
|
|
|
|
int amd_brs_hw_config(struct perf_event *event)
|
|
{
|
|
int ret = 0;
|
|
|
|
/*
|
|
* Due to interrupt holding, BRS is not recommended in
|
|
* counting mode.
|
|
*/
|
|
if (!is_sampling_event(event))
|
|
return -EINVAL;
|
|
|
|
/*
|
|
* Due to the way BRS operates by holding the interrupt until
|
|
* lbr_nr entries have been captured, it does not make sense
|
|
* to allow sampling on BRS with an event that does not match
|
|
* what BRS is capturing, i.e., retired taken branches.
|
|
* Otherwise the correlation with the event's period is even
|
|
* more loose:
|
|
*
|
|
* With retired taken branch:
|
|
* Effective P = P + 16 + X
|
|
* With any other event:
|
|
* Effective P = P + Y + X
|
|
*
|
|
* Where X is the number of taken branches due to interrupt
|
|
* skid. Skid is large.
|
|
*
|
|
* Where Y is the occurrences of the event while BRS is
|
|
* capturing the lbr_nr entries.
|
|
*
|
|
* By using retired taken branches, we limit the impact on the
|
|
* Y variable. We know it cannot be more than the depth of
|
|
* BRS.
|
|
*/
|
|
if (!amd_is_brs_event(event))
|
|
return -EINVAL;
|
|
|
|
/*
|
|
* BRS implementation does not work with frequency mode
|
|
* reprogramming of the period.
|
|
*/
|
|
if (event->attr.freq)
|
|
return -EINVAL;
|
|
/*
|
|
* The kernel subtracts BRS depth from period, so it must
|
|
* be big enough.
|
|
*/
|
|
if (event->attr.sample_period <= x86_pmu.lbr_nr)
|
|
return -EINVAL;
|
|
|
|
/*
|
|
* Check if we can allow PERF_SAMPLE_BRANCH_STACK
|
|
*/
|
|
ret = amd_brs_setup_filter(event);
|
|
|
|
/* only set in case of success */
|
|
if (!ret)
|
|
event->hw.flags |= PERF_X86_EVENT_AMD_BRS;
|
|
|
|
return ret;
|
|
}
|
|
|
|
/* tos = top of stack, i.e., last valid entry written */
|
|
static inline int amd_brs_get_tos(union amd_debug_extn_cfg *cfg)
|
|
{
|
|
/*
|
|
* msroff: index of next entry to write so top-of-stack is one off
|
|
* if BRS is full then msroff is set back to 0.
|
|
*/
|
|
return (cfg->msroff ? cfg->msroff : x86_pmu.lbr_nr) - 1;
|
|
}
|
|
|
|
/*
|
|
* make sure we have a sane BRS offset to begin with
|
|
* especially with kexec
|
|
*/
|
|
void amd_brs_reset(void)
|
|
{
|
|
if (!cpu_feature_enabled(X86_FEATURE_BRS))
|
|
return;
|
|
|
|
/*
|
|
* Reset config
|
|
*/
|
|
set_debug_extn_cfg(0);
|
|
|
|
/*
|
|
* Mark first entry as poisoned
|
|
*/
|
|
wrmsrl(brs_to(0), BRS_POISON);
|
|
}
|
|
|
|
int __init amd_brs_init(void)
|
|
{
|
|
if (!amd_brs_detect())
|
|
return -EOPNOTSUPP;
|
|
|
|
pr_cont("%d-deep BRS, ", x86_pmu.lbr_nr);
|
|
|
|
return 0;
|
|
}
|
|
|
|
void amd_brs_enable(void)
|
|
{
|
|
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
|
|
union amd_debug_extn_cfg cfg;
|
|
|
|
/* Activate only on first user */
|
|
if (++cpuc->brs_active > 1)
|
|
return;
|
|
|
|
cfg.val = 0; /* reset all fields */
|
|
cfg.brsmen = 1; /* enable branch sampling */
|
|
|
|
/* Set enable bit */
|
|
set_debug_extn_cfg(cfg.val);
|
|
}
|
|
|
|
void amd_brs_enable_all(void)
|
|
{
|
|
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
|
|
if (cpuc->lbr_users)
|
|
amd_brs_enable();
|
|
}
|
|
|
|
void amd_brs_disable(void)
|
|
{
|
|
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
|
|
union amd_debug_extn_cfg cfg;
|
|
|
|
/* Check if active (could be disabled via x86_pmu_disable_all()) */
|
|
if (!cpuc->brs_active)
|
|
return;
|
|
|
|
/* Only disable for last user */
|
|
if (--cpuc->brs_active)
|
|
return;
|
|
|
|
/*
|
|
* Clear the brsmen bit but preserve the others as they contain
|
|
* useful state such as vb and msroff
|
|
*/
|
|
cfg.val = get_debug_extn_cfg();
|
|
|
|
/*
|
|
* When coming in on interrupt and BRS is full, then hw will have
|
|
* already stopped BRS, no need to issue wrmsr again
|
|
*/
|
|
if (cfg.brsmen) {
|
|
cfg.brsmen = 0;
|
|
set_debug_extn_cfg(cfg.val);
|
|
}
|
|
}
|
|
|
|
void amd_brs_disable_all(void)
|
|
{
|
|
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
|
|
if (cpuc->lbr_users)
|
|
amd_brs_disable();
|
|
}
|
|
|
|
static bool amd_brs_match_plm(struct perf_event *event, u64 to)
|
|
{
|
|
int type = event->attr.branch_sample_type;
|
|
int plm_k = PERF_SAMPLE_BRANCH_KERNEL | PERF_SAMPLE_BRANCH_HV;
|
|
int plm_u = PERF_SAMPLE_BRANCH_USER;
|
|
|
|
if (!(type & plm_k) && kernel_ip(to))
|
|
return 0;
|
|
|
|
if (!(type & plm_u) && !kernel_ip(to))
|
|
return 0;
|
|
|
|
return 1;
|
|
}
|
|
|
|
/*
|
|
* Caller must ensure amd_brs_inuse() is true before calling
|
|
* return:
|
|
*/
|
|
void amd_brs_drain(void)
|
|
{
|
|
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
|
|
struct perf_event *event = cpuc->events[0];
|
|
struct perf_branch_entry *br = cpuc->lbr_entries;
|
|
union amd_debug_extn_cfg cfg;
|
|
u32 i, nr = 0, num, tos, start;
|
|
u32 shift = 64 - boot_cpu_data.x86_virt_bits;
|
|
|
|
/*
|
|
* BRS event forced on PMC0,
|
|
* so check if there is an event.
|
|
* It is possible to have lbr_users > 0 but the event
|
|
* not yet scheduled due to long latency PMU irq
|
|
*/
|
|
if (!event)
|
|
goto empty;
|
|
|
|
cfg.val = get_debug_extn_cfg();
|
|
|
|
/* Sanity check [0-x86_pmu.lbr_nr] */
|
|
if (WARN_ON_ONCE(cfg.msroff >= x86_pmu.lbr_nr))
|
|
goto empty;
|
|
|
|
/* No valid branch */
|
|
if (cfg.vb == 0)
|
|
goto empty;
|
|
|
|
/*
|
|
* msr.off points to next entry to be written
|
|
* tos = most recent entry index = msr.off - 1
|
|
* BRS register buffer saturates, so we know we have
|
|
* start < tos and that we have to read from start to tos
|
|
*/
|
|
start = 0;
|
|
tos = amd_brs_get_tos(&cfg);
|
|
|
|
num = tos - start + 1;
|
|
|
|
/*
|
|
* BRS is only one pass (saturation) from MSROFF to depth-1
|
|
* MSROFF wraps to zero when buffer is full
|
|
*/
|
|
for (i = 0; i < num; i++) {
|
|
u32 brs_idx = tos - i;
|
|
u64 from, to;
|
|
|
|
rdmsrl(brs_to(brs_idx), to);
|
|
|
|
/* Entry does not belong to us (as marked by kernel) */
|
|
if (to == BRS_POISON)
|
|
break;
|
|
|
|
/*
|
|
* Sign-extend SAMP_BR_TO to 64 bits, bits 61-63 are reserved.
|
|
* Necessary to generate proper virtual addresses suitable for
|
|
* symbolization
|
|
*/
|
|
to = (u64)(((s64)to << shift) >> shift);
|
|
|
|
if (!amd_brs_match_plm(event, to))
|
|
continue;
|
|
|
|
rdmsrl(brs_from(brs_idx), from);
|
|
|
|
perf_clear_branch_entry_bitfields(br+nr);
|
|
|
|
br[nr].from = from;
|
|
br[nr].to = to;
|
|
|
|
nr++;
|
|
}
|
|
empty:
|
|
/* Record number of sampled branches */
|
|
cpuc->lbr_stack.nr = nr;
|
|
}
|
|
|
|
/*
|
|
* Poison most recent entry to prevent reuse by next task
|
|
* required because BRS entry are not tagged by PID
|
|
*/
|
|
static void amd_brs_poison_buffer(void)
|
|
{
|
|
union amd_debug_extn_cfg cfg;
|
|
unsigned int idx;
|
|
|
|
/* Get current state */
|
|
cfg.val = get_debug_extn_cfg();
|
|
|
|
/* idx is most recently written entry */
|
|
idx = amd_brs_get_tos(&cfg);
|
|
|
|
/* Poison target of entry */
|
|
wrmsrl(brs_to(idx), BRS_POISON);
|
|
}
|
|
|
|
/*
|
|
* On context switch in, we need to make sure no samples from previous user
|
|
* are left in the BRS.
|
|
*
|
|
* On ctxswin, sched_in = true, called after the PMU has started
|
|
* On ctxswout, sched_in = false, called before the PMU is stopped
|
|
*/
|
|
void amd_pmu_brs_sched_task(struct perf_event_pmu_context *pmu_ctx, bool sched_in)
|
|
{
|
|
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
|
|
|
|
/* no active users */
|
|
if (!cpuc->lbr_users)
|
|
return;
|
|
|
|
/*
|
|
* On context switch in, we need to ensure we do not use entries
|
|
* from previous BRS user on that CPU, so we poison the buffer as
|
|
* a faster way compared to resetting all entries.
|
|
*/
|
|
if (sched_in)
|
|
amd_brs_poison_buffer();
|
|
}
|
|
|
|
/*
|
|
* called from ACPI processor_idle.c or acpi_pad.c
|
|
* with interrupts disabled
|
|
*/
|
|
void noinstr perf_amd_brs_lopwr_cb(bool lopwr_in)
|
|
{
|
|
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
|
|
union amd_debug_extn_cfg cfg;
|
|
|
|
/*
|
|
* on mwait in, we may end up in non C0 state.
|
|
* we must disable branch sampling to avoid holding the NMI
|
|
* for too long. We disable it in hardware but we
|
|
* keep the state in cpuc, so we can re-enable.
|
|
*
|
|
* The hardware will deliver the NMI if needed when brsmen cleared
|
|
*/
|
|
if (cpuc->brs_active) {
|
|
cfg.val = get_debug_extn_cfg();
|
|
cfg.brsmen = !lopwr_in;
|
|
set_debug_extn_cfg(cfg.val);
|
|
}
|
|
}
|
|
|
|
DEFINE_STATIC_CALL_NULL(perf_lopwr_cb, perf_amd_brs_lopwr_cb);
|
|
EXPORT_STATIC_CALL_TRAMP_GPL(perf_lopwr_cb);
|
|
|
|
void __init amd_brs_lopwr_init(void)
|
|
{
|
|
static_call_update(perf_lopwr_cb, perf_amd_brs_lopwr_cb);
|
|
}
|