diff options
author | Linus Torvalds | 2021-06-28 12:03:20 -0700 |
---|---|---|
committer | Linus Torvalds | 2021-06-28 12:03:20 -0700 |
commit | 28a27cbd86076c1a6be311c751b421c4c17a7dd9 (patch) | |
tree | b7c5de7d32b392314d433d5fd54c39e179e35693 | |
parent | a15286c63d113d4296c58867994cd266a28f5d6d (diff) | |
parent | 012669c740e6e2afa8bdb95394d06676f933dd2d (diff) |
Merge tag 'perf-core-2021-06-28' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull perf events updates from Ingo Molnar:
- Platform PMU driver updates:
- x86 Intel uncore driver updates for Skylake (SNR) and Icelake (ICX) servers
- Fix RDPMC support
- Fix [extended-]PEBS-via-PT support
- Fix Sapphire Rapids event constraints
- Fix :ppp support on Sapphire Rapids
- Fix fixed counter sanity check on Alder Lake & X86_FEATURE_HYBRID_CPU
- Other heterogenous-PMU fixes
- Kprobes:
- Remove the unused and misguided kprobe::fault_handler callbacks.
- Warn about kprobes taking a page fault.
- Fix the 'nmissed' stat counter.
- Misc cleanups and fixes.
* tag 'perf-core-2021-06-28' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
perf: Fix task context PMU for Hetero
perf/x86/intel: Fix instructions:ppp support in Sapphire Rapids
perf/x86/intel: Add more events requires FRONTEND MSR on Sapphire Rapids
perf/x86/intel: Fix fixed counter check warning for some Alder Lake
perf/x86/intel: Fix PEBS-via-PT reload base value for Extended PEBS
perf/x86: Reset the dirty counter to prevent the leak for an RDPMC task
kprobes: Do not increment probe miss count in the fault handler
x86,kprobes: WARN if kprobes tries to handle a fault
kprobes: Remove kprobe::fault_handler
uprobes: Update uprobe_write_opcode() kernel-doc comment
perf/hw_breakpoint: Fix DocBook warnings in perf hw_breakpoint
perf/core: Fix DocBook warnings
perf/core: Make local function perf_pmu_snapshot_aux() static
perf/x86/intel/uncore: Enable I/O stacks to IIO PMON mapping on ICX
perf/x86/intel/uncore: Enable I/O stacks to IIO PMON mapping on SNR
perf/x86/intel/uncore: Generalize I/O stacks to PMON mapping procedure
perf/x86/intel/uncore: Drop unnecessary NULL checks after container_of()
29 files changed, 257 insertions, 285 deletions
diff --git a/Documentation/trace/kprobes.rst b/Documentation/trace/kprobes.rst index b757b6dfd3d4..998149ce2fd9 100644 --- a/Documentation/trace/kprobes.rst +++ b/Documentation/trace/kprobes.rst @@ -362,14 +362,11 @@ register_kprobe #include <linux/kprobes.h> int register_kprobe(struct kprobe *kp); -Sets a breakpoint at the address kp->addr. When the breakpoint is -hit, Kprobes calls kp->pre_handler. After the probed instruction -is single-stepped, Kprobe calls kp->post_handler. If a fault -occurs during execution of kp->pre_handler or kp->post_handler, -or during single-stepping of the probed instruction, Kprobes calls -kp->fault_handler. Any or all handlers can be NULL. If kp->flags -is set KPROBE_FLAG_DISABLED, that kp will be registered but disabled, -so, its handlers aren't hit until calling enable_kprobe(kp). +Sets a breakpoint at the address kp->addr. When the breakpoint is hit, Kprobes +calls kp->pre_handler. After the probed instruction is single-stepped, Kprobe +calls kp->post_handler. Any or all handlers can be NULL. If kp->flags is set +KPROBE_FLAG_DISABLED, that kp will be registered but disabled, so, its handlers +aren't hit until calling enable_kprobe(kp). .. note:: @@ -415,17 +412,6 @@ User's post-handler (kp->post_handler):: p and regs are as described for the pre_handler. flags always seems to be zero. -User's fault-handler (kp->fault_handler):: - - #include <linux/kprobes.h> - #include <linux/ptrace.h> - int fault_handler(struct kprobe *p, struct pt_regs *regs, int trapnr); - -p and regs are as described for the pre_handler. trapnr is the -architecture-specific trap number associated with the fault (e.g., -on i386, 13 for a general protection fault or 14 for a page fault). -Returns 1 if it successfully handled the exception. - register_kretprobe ------------------ diff --git a/arch/arc/kernel/kprobes.c b/arch/arc/kernel/kprobes.c index cabef45f11df..5f0415fc7328 100644 --- a/arch/arc/kernel/kprobes.c +++ b/arch/arc/kernel/kprobes.c @@ -317,22 +317,6 @@ int __kprobes kprobe_fault_handler(struct pt_regs *regs, unsigned long trapnr) * caused the fault. */ - /* We increment the nmissed count for accounting, - * we can also use npre/npostfault count for accounting - * these specific fault cases. - */ - kprobes_inc_nmissed_count(cur); - - /* - * We come here because instructions in the pre/post - * handler caused the page_fault, this could happen - * if handler tries to access user space by - * copy_from_user(), get_user() etc. Let the - * user-specified handler try to fix it first. - */ - if (cur->fault_handler && cur->fault_handler(cur, regs, trapnr)) - return 1; - /* * In case the user-specified fault handler returned zero, * try to fix up. diff --git a/arch/arm/probes/kprobes/core.c b/arch/arm/probes/kprobes/core.c index a9653117ca0d..27e0af78e88b 100644 --- a/arch/arm/probes/kprobes/core.c +++ b/arch/arm/probes/kprobes/core.c @@ -348,29 +348,6 @@ int __kprobes kprobe_fault_handler(struct pt_regs *regs, unsigned int fsr) reset_current_kprobe(); } break; - - case KPROBE_HIT_ACTIVE: - case KPROBE_HIT_SSDONE: - /* - * We increment the nmissed count for accounting, - * we can also use npre/npostfault count for accounting - * these specific fault cases. - */ - kprobes_inc_nmissed_count(cur); - - /* - * We come here because instructions in the pre/post - * handler caused the page_fault, this could happen - * if handler tries to access user space by - * copy_from_user(), get_user() etc. Let the - * user-specified handler try to fix it. - */ - if (cur->fault_handler && cur->fault_handler(cur, regs, fsr)) - return 1; - break; - - default: - break; } return 0; diff --git a/arch/arm64/kernel/probes/kprobes.c b/arch/arm64/kernel/probes/kprobes.c index d607c9912025..004b86eff9c2 100644 --- a/arch/arm64/kernel/probes/kprobes.c +++ b/arch/arm64/kernel/probes/kprobes.c @@ -277,23 +277,6 @@ int __kprobes kprobe_fault_handler(struct pt_regs *regs, unsigned int fsr) case KPROBE_HIT_ACTIVE: case KPROBE_HIT_SSDONE: /* - * We increment the nmissed count for accounting, - * we can also use npre/npostfault count for accounting - * these specific fault cases. - */ - kprobes_inc_nmissed_count(cur); - - /* - * We come here because instructions in the pre/post - * handler caused the page_fault, this could happen - * if handler tries to access user space by - * copy_from_user(), get_user() etc. Let the - * user-specified handler try to fix it first. - */ - if (cur->fault_handler && cur->fault_handler(cur, regs, fsr)) - return 1; - - /* * In case the user-specified fault handler returned * zero, try to fix up. */ diff --git a/arch/csky/kernel/probes/kprobes.c b/arch/csky/kernel/probes/kprobes.c index 589f090f48b9..68b22b499aeb 100644 --- a/arch/csky/kernel/probes/kprobes.c +++ b/arch/csky/kernel/probes/kprobes.c @@ -295,23 +295,6 @@ int __kprobes kprobe_fault_handler(struct pt_regs *regs, unsigned int trapnr) case KPROBE_HIT_ACTIVE: case KPROBE_HIT_SSDONE: /* - * We increment the nmissed count for accounting, - * we can also use npre/npostfault count for accounting - * these specific fault cases. - */ - kprobes_inc_nmissed_count(cur); - - /* - * We come here because instructions in the pre/post - * handler caused the page_fault, this could happen - * if handler tries to access user space by - * copy_from_user(), get_user() etc. Let the - * user-specified handler try to fix it first. - */ - if (cur->fault_handler && cur->fault_handler(cur, regs, trapnr)) - return 1; - - /* * In case the user-specified fault handler returned * zero, try to fix up. */ diff --git a/arch/ia64/kernel/kprobes.c b/arch/ia64/kernel/kprobes.c index fc1ff8a4d7de..441ed04b1037 100644 --- a/arch/ia64/kernel/kprobes.c +++ b/arch/ia64/kernel/kprobes.c @@ -844,22 +844,6 @@ int __kprobes kprobe_fault_handler(struct pt_regs *regs, int trapnr) case KPROBE_HIT_ACTIVE: case KPROBE_HIT_SSDONE: /* - * We increment the nmissed count for accounting, - * we can also use npre/npostfault count for accounting - * these specific fault cases. - */ - kprobes_inc_nmissed_count(cur); - - /* - * We come here because instructions in the pre/post - * handler caused the page_fault, this could happen - * if handler tries to access user space by - * copy_from_user(), get_user() etc. Let the - * user-specified handler try to fix it first. - */ - if (cur->fault_handler && cur->fault_handler(cur, regs, trapnr)) - return 1; - /* * In case the user-specified fault handler returned * zero, try to fix up. */ diff --git a/arch/mips/kernel/kprobes.c b/arch/mips/kernel/kprobes.c index 54dfba8fa77c..75bff0f77319 100644 --- a/arch/mips/kernel/kprobes.c +++ b/arch/mips/kernel/kprobes.c @@ -403,9 +403,6 @@ int kprobe_fault_handler(struct pt_regs *regs, int trapnr) struct kprobe *cur = kprobe_running(); struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); - if (cur->fault_handler && cur->fault_handler(cur, regs, trapnr)) - return 1; - if (kcb->kprobe_status & KPROBE_HIT_SS) { resume_execution(cur, regs, kcb); regs->cp0_status |= kcb->kprobe_old_SR; diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c index e8c2a6373157..c64a5feaebbe 100644 --- a/arch/powerpc/kernel/kprobes.c +++ b/arch/powerpc/kernel/kprobes.c @@ -502,23 +502,6 @@ int kprobe_fault_handler(struct pt_regs *regs, int trapnr) case KPROBE_HIT_ACTIVE: case KPROBE_HIT_SSDONE: /* - * We increment the nmissed count for accounting, - * we can also use npre/npostfault count for accounting - * these specific fault cases. - */ - kprobes_inc_nmissed_count(cur); - - /* - * We come here because instructions in the pre/post - * handler caused the page_fault, this could happen - * if handler tries to access user space by - * copy_from_user(), get_user() etc. Let the - * user-specified handler try to fix it first. - */ - if (cur->fault_handler && cur->fault_handler(cur, regs, trapnr)) - return 1; - - /* * In case the user-specified fault handler returned * zero, try to fix up. */ diff --git a/arch/riscv/kernel/probes/kprobes.c b/arch/riscv/kernel/probes/kprobes.c index 15cc65ac7ca6..247e33fa5bc7 100644 --- a/arch/riscv/kernel/probes/kprobes.c +++ b/arch/riscv/kernel/probes/kprobes.c @@ -279,23 +279,6 @@ int __kprobes kprobe_fault_handler(struct pt_regs *regs, unsigned int trapnr) case KPROBE_HIT_ACTIVE: case KPROBE_HIT_SSDONE: /* - * We increment the nmissed count for accounting, - * we can also use npre/npostfault count for accounting - * these specific fault cases. - */ - kprobes_inc_nmissed_count(cur); - - /* - * We come here because instructions in the pre/post - * handler caused the page_fault, this could happen - * if handler tries to access user space by - * copy_from_user(), get_user() etc. Let the - * user-specified handler try to fix it first. - */ - if (cur->fault_handler && cur->fault_handler(cur, regs, trapnr)) - return 1; - - /* * In case the user-specified fault handler returned * zero, try to fix up. */ diff --git a/arch/s390/kernel/kprobes.c b/arch/s390/kernel/kprobes.c index aae24dc75df6..74b0bd2c24d4 100644 --- a/arch/s390/kernel/kprobes.c +++ b/arch/s390/kernel/kprobes.c @@ -446,23 +446,6 @@ static int kprobe_trap_handler(struct pt_regs *regs, int trapnr) case KPROBE_HIT_ACTIVE: case KPROBE_HIT_SSDONE: /* - * We increment the nmissed count for accounting, - * we can also use npre/npostfault count for accounting - * these specific fault cases. - */ - kprobes_inc_nmissed_count(p); - - /* - * We come here because instructions in the pre/post - * handler caused the page_fault, this could happen - * if handler tries to access user space by - * copy_from_user(), get_user() etc. Let the - * user-specified handler try to fix it first. - */ - if (p->fault_handler && p->fault_handler(p, regs, trapnr)) - return 1; - - /* * In case the user-specified fault handler returned * zero, try to fix up. */ diff --git a/arch/sh/kernel/kprobes.c b/arch/sh/kernel/kprobes.c index 756100b01e84..1c7f358ef0be 100644 --- a/arch/sh/kernel/kprobes.c +++ b/arch/sh/kernel/kprobes.c @@ -383,23 +383,6 @@ int __kprobes kprobe_fault_handler(struct pt_regs *regs, int trapnr) case KPROBE_HIT_ACTIVE: case KPROBE_HIT_SSDONE: /* - * We increment the nmissed count for accounting, - * we can also use npre/npostfault count for accounting - * these specific fault cases. - */ - kprobes_inc_nmissed_count(cur); - - /* - * We come here because instructions in the pre/post - * handler caused the page_fault, this could happen - * if handler tries to access user space by - * copy_from_user(), get_user() etc. Let the - * user-specified handler try to fix it first. - */ - if (cur->fault_handler && cur->fault_handler(cur, regs, trapnr)) - return 1; - - /* * In case the user-specified fault handler returned * zero, try to fix up. */ diff --git a/arch/sparc/kernel/kprobes.c b/arch/sparc/kernel/kprobes.c index 217c21a6986a..4c05a4ee6a0e 100644 --- a/arch/sparc/kernel/kprobes.c +++ b/arch/sparc/kernel/kprobes.c @@ -346,23 +346,6 @@ int __kprobes kprobe_fault_handler(struct pt_regs *regs, int trapnr) case KPROBE_HIT_ACTIVE: case KPROBE_HIT_SSDONE: /* - * We increment the nmissed count for accounting, - * we can also use npre/npostfault count for accounting - * these specific fault cases. - */ - kprobes_inc_nmissed_count(cur); - - /* - * We come here because instructions in the pre/post - * handler caused the page_fault, this could happen - * if handler tries to access user space by - * copy_from_user(), get_user() etc. Let the - * user-specified handler try to fix it first. - */ - if (cur->fault_handler && cur->fault_handler(cur, regs, trapnr)) - return 1; - - /* * In case the user-specified fault handler returned * zero, try to fix up. */ diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c index 8f71dd72ef95..1eb45139fcc6 100644 --- a/arch/x86/events/core.c +++ b/arch/x86/events/core.c @@ -1626,6 +1626,8 @@ static void x86_pmu_del(struct perf_event *event, int flags) if (cpuc->txn_flags & PERF_PMU_TXN_ADD) goto do_del; + __set_bit(event->hw.idx, cpuc->dirty); + /* * Not a TXN, therefore cleanup properly. */ @@ -2474,6 +2476,31 @@ static int x86_pmu_event_init(struct perf_event *event) return err; } +void perf_clear_dirty_counters(void) +{ + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); + int i; + + /* Don't need to clear the assigned counter. */ + for (i = 0; i < cpuc->n_events; i++) + __clear_bit(cpuc->assign[i], cpuc->dirty); + + if (bitmap_empty(cpuc->dirty, X86_PMC_IDX_MAX)) + return; + + for_each_set_bit(i, cpuc->dirty, X86_PMC_IDX_MAX) { + /* Metrics and fake events don't have corresponding HW counters. */ + if (is_metric_idx(i) || (i == INTEL_PMC_IDX_FIXED_VLBR)) + continue; + else if (i >= INTEL_PMC_IDX_FIXED) + wrmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + (i - INTEL_PMC_IDX_FIXED), 0); + else + wrmsrl(x86_pmu_event_addr(i), 0); + } + + bitmap_zero(cpuc->dirty, X86_PMC_IDX_MAX); +} + static void x86_pmu_event_mapped(struct perf_event *event, struct mm_struct *mm) { if (!(event->hw.flags & PERF_X86_EVENT_RDPMC_ALLOWED)) @@ -2497,7 +2524,6 @@ static void x86_pmu_event_mapped(struct perf_event *event, struct mm_struct *mm) static void x86_pmu_event_unmapped(struct perf_event *event, struct mm_struct *mm) { - if (!(event->hw.flags & PERF_X86_EVENT_RDPMC_ALLOWED)) return; diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index 3a77f66730d0..fca7a6e2242f 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -280,6 +280,8 @@ static struct extra_reg intel_spr_extra_regs[] __read_mostly = { INTEL_UEVENT_EXTRA_REG(0x012b, MSR_OFFCORE_RSP_1, 0x3fffffffffull, RSP_1), INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x01cd), INTEL_UEVENT_EXTRA_REG(0x01c6, MSR_PEBS_FRONTEND, 0x7fff17, FE), + INTEL_UEVENT_EXTRA_REG(0x40ad, MSR_PEBS_FRONTEND, 0x7, FE), + INTEL_UEVENT_EXTRA_REG(0x04c2, MSR_PEBS_FRONTEND, 0x8, FE), EVENT_EXTRA_END }; @@ -4030,8 +4032,10 @@ spr_get_event_constraints(struct cpu_hw_events *cpuc, int idx, * The :ppp indicates the Precise Distribution (PDist) facility, which * is only supported on the GP counter 0. If a :ppp event which is not * available on the GP counter 0, error out. + * Exception: Instruction PDIR is only available on the fixed counter 0. */ - if (event->attr.precise_ip == 3) { + if ((event->attr.precise_ip == 3) && + !constraint_match(&fixed0_constraint, event->hw.config)) { if (c->idxmsk64 & BIT_ULL(0)) return &counter0_constraint; @@ -6163,8 +6167,13 @@ __init int intel_pmu_init(void) pmu = &x86_pmu.hybrid_pmu[X86_HYBRID_PMU_CORE_IDX]; pmu->name = "cpu_core"; pmu->cpu_type = hybrid_big; - pmu->num_counters = x86_pmu.num_counters + 2; - pmu->num_counters_fixed = x86_pmu.num_counters_fixed + 1; + if (cpu_feature_enabled(X86_FEATURE_HYBRID_CPU)) { + pmu->num_counters = x86_pmu.num_counters + 2; + pmu->num_counters_fixed = x86_pmu.num_counters_fixed + 1; + } else { + pmu->num_counters = x86_pmu.num_counters; + pmu->num_counters_fixed = x86_pmu.num_counters_fixed; + } pmu->max_pebs_events = min_t(unsigned, MAX_PEBS_EVENTS, pmu->num_counters); pmu->unconstrained = (struct event_constraint) __EVENT_CONSTRAINT(0, (1ULL << pmu->num_counters) - 1, diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c index 1ec8fd311f38..8647713276a7 100644 --- a/arch/x86/events/intel/ds.c +++ b/arch/x86/events/intel/ds.c @@ -1187,6 +1187,9 @@ static void intel_pmu_pebs_via_pt_enable(struct perf_event *event) struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); struct hw_perf_event *hwc = &event->hw; struct debug_store *ds = cpuc->ds; + u64 value = ds->pebs_event_reset[hwc->idx]; + u32 base = MSR_RELOAD_PMC0; + unsigned int idx = hwc->idx; if (!is_pebs_pt(event)) return; @@ -1196,7 +1199,12 @@ static void intel_pmu_pebs_via_pt_enable(struct perf_event *event) cpuc->pebs_enabled |= PEBS_OUTPUT_PT; - wrmsrl(MSR_RELOAD_PMC0 + hwc->idx, ds->pebs_event_reset[hwc->idx]); + if (hwc->idx >= INTEL_PMC_IDX_FIXED) { + base = MSR_RELOAD_FIXED_CTR0; + idx = hwc->idx - INTEL_PMC_IDX_FIXED; + value = ds->pebs_event_reset[MAX_PEBS_EVENTS + idx]; + } + wrmsrl(base + idx, value); } void intel_pmu_pebs_enable(struct perf_event *event) @@ -1204,6 +1212,7 @@ void intel_pmu_pebs_enable(struct perf_event *event) struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); struct hw_perf_event *hwc = &event->hw; struct debug_store *ds = cpuc->ds; + unsigned int idx = hwc->idx; hwc->config &= ~ARCH_PERFMON_EVENTSEL_INT; @@ -1222,19 +1231,18 @@ void intel_pmu_pebs_enable(struct perf_event *event) } } + if (idx >= INTEL_PMC_IDX_FIXED) + idx = MAX_PEBS_EVENTS + (idx - INTEL_PMC_IDX_FIXED); + /* * Use auto-reload if possible to save a MSR write in the PMI. * This must be done in pmu::start(), because PERF_EVENT_IOC_PERIOD. */ if (hwc->flags & PERF_X86_EVENT_AUTO_RELOAD) { - unsigned int idx = hwc->idx; - - if (idx >= INTEL_PMC_IDX_FIXED) - idx = MAX_PEBS_EVENTS + (idx - INTEL_PMC_IDX_FIXED); ds->pebs_event_reset[idx] = (u64)(-hwc->sample_period) & x86_pmu.cntval_mask; } else { - ds->pebs_event_reset[hwc->idx] = 0; + ds->pebs_event_reset[idx] = 0; } intel_pmu_pebs_via_pt_enable(event); diff --git a/arch/x86/events/intel/uncore.c b/arch/x86/events/intel/uncore.c index df7b07d7fdcb..9bf4dbbc26e2 100644 --- a/arch/x86/events/intel/uncore.c +++ b/arch/x86/events/intel/uncore.c @@ -801,8 +801,6 @@ static void uncore_pmu_enable(struct pmu *pmu) struct intel_uncore_box *box; uncore_pmu = container_of(pmu, struct intel_uncore_pmu, pmu); - if (!uncore_pmu) - return; box = uncore_pmu_to_box(uncore_pmu, smp_processor_id()); if (!box) @@ -818,8 +816,6 @@ static void uncore_pmu_disable(struct pmu *pmu) struct intel_uncore_box *box; uncore_pmu = container_of(pmu, struct intel_uncore_pmu, pmu); - if (!uncore_pmu) - return; box = uncore_pmu_to_box(uncore_pmu, smp_processor_id()); if (!box) diff --git a/arch/x86/events/intel/uncore.h b/arch/x86/events/intel/uncore.h index 291791002997..187d7287039c 100644 --- a/arch/x86/events/intel/uncore.h +++ b/arch/x86/events/intel/uncore.h @@ -92,6 +92,7 @@ struct intel_uncore_type { /* * Optional callbacks for managing mapping of Uncore units to PMONs */ + int (*get_topology)(struct intel_uncore_type *type); int (*set_mapping)(struct intel_uncore_type *type); void (*cleanup_mapping)(struct intel_uncore_type *type); }; diff --git a/arch/x86/events/intel/uncore_snbep.c b/arch/x86/events/intel/uncore_snbep.c index 3a75a2c601c2..bb6eb1e5569c 100644 --- a/arch/x86/events/intel/uncore_snbep.c +++ b/arch/x86/events/intel/uncore_snbep.c @@ -348,6 +348,13 @@ #define SKX_M2M_PCI_PMON_CTR0 0x200 #define SKX_M2M_PCI_PMON_BOX_CTL 0x258 +/* Memory Map registers device ID */ +#define SNR_ICX_MESH2IIO_MMAP_DID 0x9a2 +#define SNR_ICX_SAD_CONTROL_CFG 0x3f4 + +/* Getting I/O stack id in SAD_COTROL_CFG notation */ +#define SAD_CONTROL_STACK_ID(data) (((data) >> 4) & 0x7) + /* SNR Ubox */ #define SNR_U_MSR_PMON_CTR0 0x1f98 #define SNR_U_MSR_PMON_CTL0 0x1f91 @@ -3682,12 +3689,19 @@ static inline u8 skx_iio_stack(struct intel_uncore_pmu *pmu, int die) } static umode_t -skx_iio_mapping_visible(struct kobject *kobj, struct attribute *attr, int die) +pmu_iio_mapping_visible(struct kobject *kobj, struct attribute *attr, + int die, int zero_bus_pmu) { struct intel_uncore_pmu *pmu = dev_to_uncore_pmu(kobj_to_dev(kobj)); - /* Root bus 0x00 is valid only for die 0 AND pmu_idx = 0. */ - return (!skx_iio_stack(pmu, die) && pmu->pmu_idx) ? 0 : attr->mode; + return (!skx_iio_stack(pmu, die) && pmu->pmu_idx != zero_bus_pmu) ? 0 : attr->mode; +} + +static umode_t +skx_iio_mapping_visible(struct kobject *kobj, struct attribute *attr, int die) +{ + /* Root bus 0x00 is valid only for pmu_idx = 0. */ + return pmu_iio_mapping_visible(kobj, attr, die, 0); } static ssize_t skx_iio_mapping_show(struct device *dev, @@ -3772,7 +3786,8 @@ static const struct attribute_group *skx_iio_attr_update[] = { NULL, }; -static int skx_iio_set_mapping(struct intel_uncore_type *type) +static int +pmu_iio_set_mapping(struct intel_uncore_type *type, struct attribute_group *ag) { char buf[64]; int ret; @@ -3780,7 +3795,7 @@ static int skx_iio_set_mapping(struct intel_uncore_type *type) struct attribute **attrs = NULL; struct dev_ext_attribute *eas = NULL; - ret = skx_iio_get_topology(type); + ret = type->get_topology(type); if (ret < 0) goto clear_attr_update; @@ -3807,7 +3822,7 @@ static int skx_iio_set_mapping(struct intel_uncore_type *type) eas[die].var = (void *)die; attrs[die] = &eas[die].attr.attr; } - skx_iio_mapping_group.attrs = attrs; + ag->attrs = attrs; return 0; err: @@ -3821,6 +3836,11 @@ clear_attr_update: return ret; } +static int skx_iio_set_mapping(struct intel_uncore_type *type) +{ + return pmu_iio_set_mapping(type, &skx_iio_mapping_group); +} + static void skx_iio_cleanup_mapping(struct intel_uncore_type *type) { struct attribute **attr = skx_iio_mapping_group.attrs; @@ -3851,6 +3871,7 @@ static struct intel_uncore_type skx_uncore_iio = { .ops = &skx_uncore_iio_ops, .format_group = &skx_uncore_iio_format_group, .attr_update = skx_iio_attr_update, + .get_topology = skx_iio_get_topology, .set_mapping = skx_iio_set_mapping, .cleanup_mapping = skx_iio_cleanup_mapping, }; @@ -4393,6 +4414,91 @@ static const struct attribute_group snr_uncore_iio_format_group = { .attrs = snr_uncore_iio_formats_attr, }; +static umode_t +snr_iio_mapping_visible(struct kobject *kobj, struct attribute *attr, int die) +{ + /* Root bus 0x00 is valid only for pmu_idx = 1. */ + return pmu_iio_mapping_visible(kobj, attr, die, 1); +} + +static struct attribute_group snr_iio_mapping_group = { + .is_visible = snr_iio_mapping_visible, +}; + +static const struct attribute_group *snr_iio_attr_update[] = { + &snr_iio_mapping_group, + NULL, +}; + +static int sad_cfg_iio_topology(struct intel_uncore_type *type, u8 *sad_pmon_mapping) +{ + u32 sad_cfg; + int die, stack_id, ret = -EPERM; + struct pci_dev *dev = NULL; + + type->topology = kcalloc(uncore_max_dies(), sizeof(*type->topology), + GFP_KERNEL); + if (!type->topology) + return -ENOMEM; + + while ((dev = pci_get_device(PCI_VENDOR_ID_INTEL, SNR_ICX_MESH2IIO_MMAP_DID, dev))) { + ret = pci_read_config_dword(dev, SNR_ICX_SAD_CONTROL_CFG, &sad_cfg); + if (ret) { + ret = pcibios_err_to_errno(ret); + break; + } + + die = uncore_pcibus_to_dieid(dev->bus); + stack_id = SAD_CONTROL_STACK_ID(sad_cfg); + if (die < 0 || stack_id >= type->num_boxes) { + ret = -EPERM; + break; + } + + /* Convert stack id from SAD_CONTROL to PMON notation. */ + stack_id = sad_pmon_mapping[stack_id]; + + ((u8 *)&(type->topology[die].configuration))[stack_id] = dev->bus->number; + type->topology[die].segment = pci_domain_nr(dev->bus); + } + + if (ret) { + kfree(type->topology); + type->topology = NULL; + } + + return ret; +} + +/* + * SNR has a static mapping of stack IDs from SAD_CONTROL_CFG notation to PMON + */ +enum { + SNR_QAT_PMON_ID, + SNR_CBDMA_DMI_PMON_ID, + SNR_NIS_PMON_ID, + SNR_DLB_PMON_ID, + SNR_PCIE_GEN3_PMON_ID +}; + +static u8 snr_sad_pmon_mapping[] = { + SNR_CBDMA_DMI_PMON_ID, + SNR_PCIE_GEN3_PMON_ID, + SNR_DLB_PMON_ID, + SNR_NIS_PMON_ID, + SNR_QAT_PMON_ID +}; + +static int snr_iio_get_topology(struct intel_uncore_type *type) +{ + return sad_cfg_iio_topology(type, snr_sad_pmon_mapping); +} + +static int snr_iio_set_mapping(struct intel_uncore_type *type) +{ + return pmu_iio_set_mapping(type, &snr_iio_mapping_group); +} + static struct intel_uncore_type snr_uncore_iio = { .name = "iio", .num_counters = 4, @@ -4406,6 +4512,10 @@ static struct intel_uncore_type snr_uncore_iio = { .msr_offset = SNR_IIO_MSR_OFFSET, .ops = &ivbep_uncore_msr_ops, .format_group = &snr_uncore_iio_format_group, + .attr_update = snr_iio_attr_update, + .get_topology = snr_iio_get_topology, + .set_mapping = snr_iio_set_mapping, + .cleanup_mapping = skx_iio_cleanup_mapping, }; static struct intel_uncore_type snr_uncore_irp = { @@ -4933,6 +5043,53 @@ static struct event_constraint icx_uncore_iio_constraints[] = { EVENT_CONSTRAINT_END }; +static umode_t +icx_iio_mapping_visible(struct kobject *kobj, struct attribute *attr, int die) +{ + /* Root bus 0x00 is valid only for pmu_idx = 5. */ + return pmu_iio_mapping_visible(kobj, attr, die, 5); +} + +static struct attribute_group icx_iio_mapping_group = { + .is_visible = icx_iio_mapping_visible, +}; + +static const struct attribute_group *icx_iio_attr_update[] = { + &icx_iio_mapping_group, + NULL, +}; + +/* + * ICX has a static mapping of stack IDs from SAD_CONTROL_CFG notation to PMON + */ +enum { + ICX_PCIE1_PMON_ID, + ICX_PCIE2_PMON_ID, + ICX_PCIE3_PMON_ID, + ICX_PCIE4_PMON_ID, + ICX_PCIE5_PMON_ID, + ICX_CBDMA_DMI_PMON_ID +}; + +static u8 icx_sad_pmon_mapping[] = { + ICX_CBDMA_DMI_PMON_ID, + ICX_PCIE1_PMON_ID, + ICX_PCIE2_PMON_ID, + ICX_PCIE3_PMON_ID, + ICX_PCIE4_PMON_ID, + ICX_PCIE5_PMON_ID, +}; + +static int icx_iio_get_topology(struct intel_uncore_type *type) +{ + return sad_cfg_iio_topology(type, icx_sad_pmon_mapping); +} + +static int icx_iio_set_mapping(struct intel_uncore_type *type) +{ + return pmu_iio_set_mapping(type, &icx_iio_mapping_group); +} + static struct intel_uncore_type icx_uncore_iio = { .name = "iio", .num_counters = 4, @@ -4947,6 +5104,10 @@ static struct intel_uncore_type icx_uncore_iio = { .constraints = icx_uncore_iio_constraints, .ops = &skx_uncore_iio_ops, .format_group = &snr_uncore_iio_format_group, + .attr_update = icx_iio_attr_update, + .get_topology = icx_iio_get_topology, + .set_mapping = icx_iio_set_mapping, + .cleanup_mapping = skx_iio_cleanup_mapping, }; static struct intel_uncore_type icx_uncore_irp = { diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h index ad87cb36f7c8..2bf1c7ea2758 100644 --- a/arch/x86/events/perf_event.h +++ b/arch/x86/events/perf_event.h @@ -229,6 +229,7 @@ struct cpu_hw_events { */ struct perf_event *events[X86_PMC_IDX_MAX]; /* in counter order */ unsigned long active_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; + unsigned long dirty[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; int enabled; int n_events; /* the # of events in the below arrays */ diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h index 544f41a179fb..8fc1b5003713 100644 --- a/arch/x86/include/asm/perf_event.h +++ b/arch/x86/include/asm/perf_event.h @@ -478,6 +478,7 @@ struct x86_pmu_lbr { extern void perf_get_x86_pmu_capability(struct x86_pmu_capability *cap); extern void perf_check_microcode(void); +extern void perf_clear_dirty_counters(void); extern int x86_perf_rdpmc_index(struct perf_event *event); #else static inline void perf_get_x86_pmu_capability(struct x86_pmu_capability *cap) diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c index d3d65545cb8b..1b3fe0edd329 100644 --- a/arch/x86/kernel/kprobes/core.c +++ b/arch/x86/kernel/kprobes/core.c @@ -1102,24 +1102,6 @@ int kprobe_fault_handler(struct pt_regs *regs, int trapnr) restore_previous_kprobe(kcb); else reset_current_kprobe(); - } else if (kcb->kprobe_status == KPROBE_HIT_ACTIVE || - kcb->kprobe_status == KPROBE_HIT_SSDONE) { - /* - * We increment the nmissed count for accounting, - * we can also use npre/npostfault count for accounting - * these specific fault cases. - */ - kprobes_inc_nmissed_count(cur); - - /* - * We come here because instructions in the pre/post - * handler caused the page_fault, this could happen - * if handler tries to access user space by - * copy_from_user(), get_user() etc. Let the - * user-specified handler try to fix it first. - */ - if (cur->fault_handler && cur->fault_handler(cur, regs, trapnr)) - return 1; } return 0; diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 6bda7f67d737..2d27932c9ac7 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -1186,7 +1186,7 @@ do_kern_addr_fault(struct pt_regs *regs, unsigned long hw_error_code, return; /* kprobes don't want to hook the spurious faults: */ - if (kprobe_page_fault(regs, X86_TRAP_PF)) + if (WARN_ON_ONCE(kprobe_page_fault(regs, X86_TRAP_PF))) return; /* @@ -1239,7 +1239,7 @@ void do_user_addr_fault(struct pt_regs *regs, } /* kprobes don't want to hook the spurious faults: */ - if (unlikely(kprobe_page_fault(regs, X86_TRAP_PF))) + if (WARN_ON_ONCE(kprobe_page_fault(regs, X86_TRAP_PF))) return; /* diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c index 78804680e923..cfe6b1e85fa6 100644 --- a/arch/x86/mm/tlb.c +++ b/arch/x86/mm/tlb.c @@ -14,6 +14,7 @@ #include <asm/nospec-branch.h> #include <asm/cache.h> #include <asm/apic.h> +#include <asm/perf_event.h> #include "mm_internal.h" @@ -404,9 +405,14 @@ static inline void cr4_update_pce_mm(struct mm_struct *mm) { if (static_branch_unlikely(&rdpmc_always_available_key) || (!static_branch_unlikely(&rdpmc_never_available_key) && - atomic_read(&mm->context.perf_rdpmc_allowed))) + atomic_read(&mm->context.perf_rdpmc_allowed))) { + /* + * Clear the existing dirty counters to + * prevent the leak for an RDPMC task. + */ + perf_clear_dirty_counters(); cr4_set_bits_irqsoff(X86_CR4_PCE); - else + } else cr4_clear_bits_irqsoff(X86_CR4_PCE); } diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h index 1883a4a9f16a..523ffc7bc3a8 100644 --- a/include/linux/kprobes.h +++ b/include/linux/kprobes.h @@ -54,8 +54,6 @@ struct kretprobe_instance; typedef int (*kprobe_pre_handler_t) (struct kprobe *, struct pt_regs *); typedef void (*kprobe_post_handler_t) (struct kprobe *, struct pt_regs *, unsigned long flags); -typedef int (*kprobe_fault_handler_t) (struct kprobe *, struct pt_regs *, - int trapnr); typedef int (*kretprobe_handler_t) (struct kretprobe_instance *, struct pt_regs *); @@ -83,12 +81,6 @@ struct kprobe { /* Called after addr is executed, unless... */ kprobe_post_handler_t post_handler; - /* - * ... called if executing addr causes a fault (eg. page fault). - * Return 1 if it handled fault, otherwise kernel will see it. - */ - kprobe_fault_handler_t fault_handler; - /* Saved opcode (which has been replaced with breakpoint) */ kprobe_opcode_t opcode; diff --git a/kernel/events/core.c b/kernel/events/core.c index fe88d6eea3c2..ea0e24040691 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -132,6 +132,7 @@ task_function_call(struct task_struct *p, remote_function_f func, void *info) /** * cpu_function_call - call a function on the cpu + * @cpu: target cpu to queue this function * @func: the function to be called * @info: the function call argument * @@ -3821,9 +3822,16 @@ static void perf_event_context_sched_in(struct perf_event_context *ctx, struct task_struct *task) { struct perf_cpu_context *cpuctx; - struct pmu *pmu = ctx->pmu; + struct pmu *pmu; cpuctx = __get_cpu_context(ctx); + + /* + * HACK: for HETEROGENEOUS the task context might have switched to a + * different PMU, force (re)set the context, + */ + pmu = ctx->pmu = cpuctx->ctx.pmu; + if (cpuctx->task_ctx == ctx) { if (cpuctx->sched_cb_usage) __perf_pmu_sched_task(cpuctx, true); @@ -6669,10 +6677,10 @@ out: return data->aux_size; } -long perf_pmu_snapshot_aux(struct perf_buffer *rb, - struct perf_event *event, - struct perf_output_handle *handle, - unsigned long size) +static long perf_pmu_snapshot_aux(struct perf_buffer *rb, + struct perf_event *event, + struct perf_output_handle *handle, + unsigned long size) { unsigned long flags; long ret; @@ -11919,6 +11927,7 @@ again: * @pid: target pid * @cpu: target cpu * @group_fd: group leader event fd + * @flags: perf event open flags */ SYSCALL_DEFINE5(perf_event_open, struct perf_event_attr __user *, attr_uptr, @@ -12375,6 +12384,8 @@ err_fd: * @attr: attributes of the counter to create * @cpu: cpu in which the counter is bound * @task: task to profile (NULL for percpu) + * @overflow_handler: callback to trigger when we hit the event + * @context: context data could be used in overflow_handler callback */ struct perf_event * perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu, diff --git a/kernel/events/hw_breakpoint.c b/kernel/events/hw_breakpoint.c index b48d7039a015..835973444a1e 100644 --- a/kernel/events/hw_breakpoint.c +++ b/kernel/events/hw_breakpoint.c @@ -451,6 +451,7 @@ int register_perf_hw_breakpoint(struct perf_event *bp) * register_user_hw_breakpoint - register a hardware breakpoint for user space * @attr: breakpoint attributes * @triggered: callback to trigger when we hit the breakpoint + * @context: context data could be used in the triggered callback * @tsk: pointer to 'task_struct' of the process to which the address belongs */ struct perf_event * @@ -550,6 +551,7 @@ EXPORT_SYMBOL_GPL(unregister_hw_breakpoint); * register_wide_hw_breakpoint - register a wide breakpoint in the kernel * @attr: breakpoint attributes * @triggered: callback to trigger when we hit the breakpoint + * @context: context data could be used in the triggered callback * * @return a set of per_cpu pointers to perf events */ diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index 6addc9780319..a481ef696143 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -453,6 +453,7 @@ static int update_ref_ctr(struct uprobe *uprobe, struct mm_struct *mm, * that have fixed length instructions. * * uprobe_write_opcode - write the opcode at a given virtual address. + * @auprobe: arch specific probepoint information. * @mm: the probed process address space. * @vaddr: the virtual address to store the opcode. * @opcode: opcode to be written at @vaddr. diff --git a/kernel/kprobes.c b/kernel/kprobes.c index 745f08fdd7a6..e41385afe79d 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c @@ -1183,23 +1183,6 @@ static void aggr_post_handler(struct kprobe *p, struct pt_regs *regs, } NOKPROBE_SYMBOL(aggr_post_handler); -static int aggr_fault_handler(struct kprobe *p, struct pt_regs *regs, - int trapnr) -{ - struct kprobe *cur = __this_cpu_read(kprobe_instance); - - /* - * if we faulted "during" the execution of a user specified - * probe handler, invoke just that probe's fault handler - */ - if (cur && cur->fault_handler) { - if (cur->fault_handler(cur, regs, trapnr)) - return 1; - } - return 0; -} -NOKPROBE_SYMBOL(aggr_fault_handler); - /* Walks the list and increments nmissed count for multiprobe case */ void kprobes_inc_nmissed_count(struct kprobe *p) { @@ -1330,7 +1313,6 @@ static void init_aggr_kprobe(struct kprobe *ap, struct kprobe *p) ap->addr = p->addr; ap->flags = p->flags & ~KPROBE_FLAG_OPTIMIZED; ap->pre_handler = aggr_pre_handler; - ap->fault_handler = aggr_fault_handler; /* We don't care the kprobe which has gone. */ if (p->post_handler && !kprobe_gone(p)) ap->post_handler = aggr_post_handler; @@ -2014,7 +1996,6 @@ int register_kretprobe(struct kretprobe *rp) rp->kp.pre_handler = pre_handler_kretprobe; rp->kp.post_handler = NULL; - rp->kp.fault_handler = NULL; /* Pre-allocate memory for max kretprobe instances */ if (rp->maxactive <= 0) { diff --git a/samples/kprobes/kprobe_example.c b/samples/kprobes/kprobe_example.c index c495664c0a9b..4b2f31828951 100644 --- a/samples/kprobes/kprobe_example.c +++ b/samples/kprobes/kprobe_example.c @@ -94,26 +94,11 @@ static void __kprobes handler_post(struct kprobe *p, struct pt_regs *regs, #endif } -/* - * fault_handler: this is called if an exception is generated for any - * instruction within the pre- or post-handler, or when Kprobes - * single-steps the probed instruction. - */ -static int handler_fault(struct kprobe *p, struct pt_regs *regs, int trapnr) -{ - pr_info("fault_handler: p->addr = 0x%p, trap #%dn", p->addr, trapnr); - /* Return 0 because we don't handle the fault. */ - return 0; -} -/* NOKPROBE_SYMBOL() is also available */ -NOKPROBE_SYMBOL(handler_fault); - static int __init kprobe_init(void) { int ret; kp.pre_handler = handler_pre; kp.post_handler = handler_post; - kp.fault_handler = handler_fault; ret = register_kprobe(&kp); if (ret < 0) { |