diff options
33 files changed, 2017 insertions, 243 deletions
diff --git a/arch/alpha/kernel/perf_event.c b/arch/alpha/kernel/perf_event.c index 8143cd7cdbfb..0dae252f7a33 100644 --- a/arch/alpha/kernel/perf_event.c +++ b/arch/alpha/kernel/perf_event.c @@ -685,6 +685,10 @@ static int alpha_pmu_event_init(struct perf_event *event) { int err; + /* does not support taken branch sampling */ + if (has_branch_stack(event)) + return -EOPNOTSUPP; + switch (event->attr.type) { case PERF_TYPE_RAW: case PERF_TYPE_HARDWARE: diff --git a/arch/arm/kernel/perf_event.c b/arch/arm/kernel/perf_event.c index b2abfa18f137..8a89d3b7626b 100644 --- a/arch/arm/kernel/perf_event.c +++ b/arch/arm/kernel/perf_event.c @@ -539,6 +539,10 @@ static int armpmu_event_init(struct perf_event *event) int err = 0; atomic_t *active_events = &armpmu->active_events; + /* does not support taken branch sampling */ + if (has_branch_stack(event)) + return -EOPNOTSUPP; + if (armpmu->map_event(event) == -ENOENT) return -ENOENT; diff --git a/arch/mips/kernel/perf_event_mipsxx.c b/arch/mips/kernel/perf_event_mipsxx.c index e3b897acfbc0..811084f4e422 100644 --- a/arch/mips/kernel/perf_event_mipsxx.c +++ b/arch/mips/kernel/perf_event_mipsxx.c @@ -606,6 +606,10 @@ static int mipspmu_event_init(struct perf_event *event) { int err = 0; + /* does not support taken branch sampling */ + if (has_branch_stack(event)) + return -EOPNOTSUPP; + switch (event->attr.type) { case PERF_TYPE_RAW: case PERF_TYPE_HARDWARE: diff --git a/arch/powerpc/kernel/perf_event.c b/arch/powerpc/kernel/perf_event.c index f04c2301725e..c2e27ede07ec 100644 --- a/arch/powerpc/kernel/perf_event.c +++ b/arch/powerpc/kernel/perf_event.c @@ -1084,6 +1084,10 @@ static int power_pmu_event_init(struct perf_event *event) if (!ppmu) return -ENOENT; + /* does not support taken branch sampling */ + if (has_branch_stack(event)) + return -EOPNOTSUPP; + switch (event->attr.type) { case PERF_TYPE_HARDWARE: ev = event->attr.config; diff --git a/arch/sh/kernel/perf_event.c b/arch/sh/kernel/perf_event.c index 10b14e3a7eb8..068b8a2759b5 100644 --- a/arch/sh/kernel/perf_event.c +++ b/arch/sh/kernel/perf_event.c @@ -310,6 +310,10 @@ static int sh_pmu_event_init(struct perf_event *event) { int err; + /* does not support taken branch sampling */ + if (has_branch_stack(event)) + return -EOPNOTSUPP; + switch (event->attr.type) { case PERF_TYPE_RAW: case PERF_TYPE_HW_CACHE: diff --git a/arch/sparc/kernel/perf_event.c b/arch/sparc/kernel/perf_event.c index 614da624330c..8e16a4a21582 100644 --- a/arch/sparc/kernel/perf_event.c +++ b/arch/sparc/kernel/perf_event.c @@ -1105,6 +1105,10 @@ static int sparc_pmu_event_init(struct perf_event *event) if (atomic_read(&nmi_active) < 0) return -ENODEV; + /* does not support taken branch sampling */ + if (has_branch_stack(event)) + return -EOPNOTSUPP; + switch (attr->type) { case PERF_TYPE_HARDWARE: if (attr->config >= sparc_pmu->max_events) diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index a6962d9161a0..ccb805966f68 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -56,6 +56,13 @@ #define MSR_OFFCORE_RSP_0 0x000001a6 #define MSR_OFFCORE_RSP_1 0x000001a7 +#define MSR_LBR_SELECT 0x000001c8 +#define MSR_LBR_TOS 0x000001c9 +#define MSR_LBR_NHM_FROM 0x00000680 +#define MSR_LBR_NHM_TO 0x000006c0 +#define MSR_LBR_CORE_FROM 0x00000040 +#define MSR_LBR_CORE_TO 0x00000060 + #define MSR_IA32_PEBS_ENABLE 0x000003f1 #define MSR_IA32_DS_AREA 0x00000600 #define MSR_IA32_PERF_CAPABILITIES 0x00000345 diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index f8bddb5b0600..0a18d16cb58d 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -353,6 +353,36 @@ int x86_setup_perfctr(struct perf_event *event) return 0; } +/* + * check that branch_sample_type is compatible with + * settings needed for precise_ip > 1 which implies + * using the LBR to capture ALL taken branches at the + * priv levels of the measurement + */ +static inline int precise_br_compat(struct perf_event *event) +{ + u64 m = event->attr.branch_sample_type; + u64 b = 0; + + /* must capture all branches */ + if (!(m & PERF_SAMPLE_BRANCH_ANY)) + return 0; + + m &= PERF_SAMPLE_BRANCH_KERNEL | PERF_SAMPLE_BRANCH_USER; + + if (!event->attr.exclude_user) + b |= PERF_SAMPLE_BRANCH_USER; + + if (!event->attr.exclude_kernel) + b |= PERF_SAMPLE_BRANCH_KERNEL; + + /* + * ignore PERF_SAMPLE_BRANCH_HV, not supported on x86 + */ + + return m == b; +} + int x86_pmu_hw_config(struct perf_event *event) { if (event->attr.precise_ip) { @@ -369,6 +399,36 @@ int x86_pmu_hw_config(struct perf_event *event) if (event->attr.precise_ip > precise) return -EOPNOTSUPP; + /* + * check that PEBS LBR correction does not conflict with + * whatever the user is asking with attr->branch_sample_type + */ + if (event->attr.precise_ip > 1) { + u64 *br_type = &event->attr.branch_sample_type; + + if (has_branch_stack(event)) { + if (!precise_br_compat(event)) + return -EOPNOTSUPP; + + /* branch_sample_type is compatible */ + + } else { + /* + * user did not specify branch_sample_type + * + * For PEBS fixups, we capture all + * the branches at the priv level of the + * event. + */ + *br_type = PERF_SAMPLE_BRANCH_ANY; + + if (!event->attr.exclude_user) + *br_type |= PERF_SAMPLE_BRANCH_USER; + + if (!event->attr.exclude_kernel) + *br_type |= PERF_SAMPLE_BRANCH_KERNEL; + } + } } /* @@ -426,6 +486,10 @@ static int __x86_pmu_event_init(struct perf_event *event) /* mark unused */ event->hw.extra_reg.idx = EXTRA_REG_NONE; + /* mark not used */ + event->hw.extra_reg.idx = EXTRA_REG_NONE; + event->hw.branch_reg.idx = EXTRA_REG_NONE; + return x86_pmu.hw_config(event); } @@ -1607,25 +1671,32 @@ static const struct attribute_group *x86_pmu_attr_groups[] = { NULL, }; +static void x86_pmu_flush_branch_stack(void) +{ + if (x86_pmu.flush_branch_stack) + x86_pmu.flush_branch_stack(); +} + static struct pmu pmu = { - .pmu_enable = x86_pmu_enable, - .pmu_disable = x86_pmu_disable, + .pmu_enable = x86_pmu_enable, + .pmu_disable = x86_pmu_disable, .attr_groups = x86_pmu_attr_groups, .event_init = x86_pmu_event_init, - .add = x86_pmu_add, - .del = x86_pmu_del, - .start = x86_pmu_start, - .stop = x86_pmu_stop, - .read = x86_pmu_read, + .add = x86_pmu_add, + .del = x86_pmu_del, + .start = x86_pmu_start, + .stop = x86_pmu_stop, + .read = x86_pmu_read, .start_txn = x86_pmu_start_txn, .cancel_txn = x86_pmu_cancel_txn, .commit_txn = x86_pmu_commit_txn, .event_idx = x86_pmu_event_idx, + .flush_branch_stack = x86_pmu_flush_branch_stack, }; void perf_update_user_clock(struct perf_event_mmap_page *userpg, u64 now) diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h index 66fda0c26402..8484e77c211e 100644 --- a/arch/x86/kernel/cpu/perf_event.h +++ b/arch/x86/kernel/cpu/perf_event.h @@ -33,6 +33,7 @@ enum extra_reg_type { EXTRA_REG_RSP_0 = 0, /* offcore_response_0 */ EXTRA_REG_RSP_1 = 1, /* offcore_response_1 */ + EXTRA_REG_LBR = 2, /* lbr_select */ EXTRA_REG_MAX /* number of entries needed */ }; @@ -130,6 +131,8 @@ struct cpu_hw_events { void *lbr_context; struct perf_branch_stack lbr_stack; struct perf_branch_entry lbr_entries[MAX_LBR_ENTRIES]; + struct er_account *lbr_sel; + u64 br_sel; /* * Intel host/guest exclude bits @@ -344,6 +347,7 @@ struct x86_pmu { void (*cpu_starting)(int cpu); void (*cpu_dying)(int cpu); void (*cpu_dead)(int cpu); + void (*flush_branch_stack)(void); /* * Intel Arch Perfmon v2+ @@ -365,6 +369,8 @@ struct x86_pmu { */ unsigned long lbr_tos, lbr_from, lbr_to; /* MSR base regs */ int lbr_nr; /* hardware stack size */ + u64 lbr_sel_mask; /* LBR_SELECT valid bits */ + const int *lbr_sel_map; /* lbr_select mappings */ /* * Extra registers for events @@ -478,6 +484,15 @@ extern struct event_constraint emptyconstraint; extern struct event_constraint unconstrained; +static inline bool kernel_ip(unsigned long ip) +{ +#ifdef CONFIG_X86_32 + return ip > PAGE_OFFSET; +#else + return (long)ip < 0; +#endif +} + #ifdef CONFIG_CPU_SUP_AMD int amd_pmu_init(void); @@ -558,6 +573,10 @@ void intel_pmu_lbr_init_nhm(void); void intel_pmu_lbr_init_atom(void); +void intel_pmu_lbr_init_snb(void); + +int intel_pmu_setup_lbr_filter(struct perf_event *event); + int p4_pmu_init(void); int p6_pmu_init(void); diff --git a/arch/x86/kernel/cpu/perf_event_amd.c b/arch/x86/kernel/cpu/perf_event_amd.c index 67250a52430b..dd002faff7a6 100644 --- a/arch/x86/kernel/cpu/perf_event_amd.c +++ b/arch/x86/kernel/cpu/perf_event_amd.c @@ -139,6 +139,9 @@ static int amd_pmu_hw_config(struct perf_event *event) if (ret) return ret; + if (has_branch_stack(event)) + return -EOPNOTSUPP; + if (event->attr.exclude_host && event->attr.exclude_guest) /* * When HO == GO == 1 the hardware treats that as GO == HO == 0 diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index 4bd9c9ef9d42..6a84e7f28f05 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c @@ -728,6 +728,19 @@ static __initconst const u64 atom_hw_cache_event_ids }, }; +static inline bool intel_pmu_needs_lbr_smpl(struct perf_event *event) +{ + /* user explicitly requested branch sampling */ + if (has_branch_stack(event)) + return true; + + /* implicit branch sampling to correct PEBS skid */ + if (x86_pmu.intel_cap.pebs_trap && event->attr.precise_ip > 1) + return true; + + return false; +} + static void intel_pmu_disable_all(void) { struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); @@ -882,6 +895,13 @@ static void intel_pmu_disable_event(struct perf_event *event) cpuc->intel_ctrl_guest_mask &= ~(1ull << hwc->idx); cpuc->intel_ctrl_host_mask &= ~(1ull << hwc->idx); + /* + * must disable before any actual event + * because any event may be combined with LBR + */ + if (intel_pmu_needs_lbr_smpl(event)) + intel_pmu_lbr_disable(event); + if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) { intel_pmu_disable_fixed(hwc); return; @@ -936,6 +956,12 @@ static void intel_pmu_enable_event(struct perf_event *event) intel_pmu_enable_bts(hwc->config); return; } + /* + * must enabled before any actual event + * because any event may be combined with LBR + */ + if (intel_pmu_needs_lbr_smpl(event)) + intel_pmu_lbr_enable(event); if (event->attr.exclude_host) cpuc->intel_ctrl_guest_mask |= (1ull << hwc->idx); @@ -1058,6 +1084,9 @@ again: data.period = event->hw.last_period; + if (has_branch_stack(event)) + data.br_stack = &cpuc->lbr_stack; + if (perf_event_overflow(event, &data, regs)) x86_pmu_stop(event, 0); } @@ -1124,17 +1153,17 @@ static bool intel_try_alt_er(struct perf_event *event, int orig_idx) */ static struct event_constraint * __intel_shared_reg_get_constraints(struct cpu_hw_events *cpuc, - struct perf_event *event) + struct perf_event *event, + struct hw_perf_event_extra *reg) { struct event_constraint *c = &emptyconstraint; - struct hw_perf_event_extra *reg = &event->hw.extra_reg; struct er_account *era; unsigned long flags; int orig_idx = reg->idx; /* already allocated shared msr */ if (reg->alloc) - return &unconstrained; + return NULL; /* call x86_get_event_constraint() */ again: era = &cpuc->shared_regs->regs[reg->idx]; @@ -1157,14 +1186,10 @@ again: reg->alloc = 1; /* - * All events using extra_reg are unconstrained. - * Avoids calling x86_get_event_constraints() - * - * Must revisit if extra_reg controlling events - * ever have constraints. Worst case we go through - * the regular event constraint table. + * need to call x86_get_event_constraint() + * to check if associated event has constraints */ - c = &unconstrained; + c = NULL; } else if (intel_try_alt_er(event, orig_idx)) { raw_spin_unlock_irqrestore(&era->lock, flags); goto again; @@ -1201,11 +1226,23 @@ static struct event_constraint * intel_shared_regs_constraints(struct cpu_hw_events *cpuc, struct perf_event *event) { - struct event_constraint *c = NULL; - - if (event->hw.extra_reg.idx != EXTRA_REG_NONE) - c = __intel_shared_reg_get_constraints(cpuc, event); - + struct event_constraint *c = NULL, *d; + struct hw_perf_event_extra *xreg, *breg; + + xreg = &event->hw.extra_reg; + if (xreg->idx != EXTRA_REG_NONE) { + c = __intel_shared_reg_get_constraints(cpuc, event, xreg); + if (c == &emptyconstraint) + return c; + } + breg = &event->hw.branch_reg; + if (breg->idx != EXTRA_REG_NONE) { + d = __intel_shared_reg_get_constraints(cpuc, event, breg); + if (d == &emptyconstraint) { + __intel_shared_reg_put_constraints(cpuc, xreg); + c = d; + } + } return c; } @@ -1253,6 +1290,10 @@ intel_put_shared_regs_event_constraints(struct cpu_hw_events *cpuc, reg = &event->hw.extra_reg; if (reg->idx != EXTRA_REG_NONE) __intel_shared_reg_put_constraints(cpuc, reg); + + reg = &event->hw.branch_reg; + if (reg->idx != EXTRA_REG_NONE) + __intel_shared_reg_put_constraints(cpuc, reg); } static void intel_put_event_constraints(struct cpu_hw_events *cpuc, @@ -1295,6 +1336,12 @@ static int intel_pmu_hw_config(struct perf_event *event) event->hw.config = alt_config; } + if (intel_pmu_needs_lbr_smpl(event)) { + ret = intel_pmu_setup_lbr_filter(event); + if (ret) + return ret; + } + if (event->attr.type != PERF_TYPE_RAW) return 0; @@ -1433,7 +1480,7 @@ static int intel_pmu_cpu_prepare(int cpu) { struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu); - if (!x86_pmu.extra_regs) + if (!(x86_pmu.extra_regs || x86_pmu.lbr_sel_map)) return NOTIFY_OK; cpuc->shared_regs = allocate_shared_regs(cpu); @@ -1455,22 +1502,28 @@ static void intel_pmu_cpu_starting(int cpu) */ intel_pmu_lbr_reset(); - if (!cpuc->shared_regs || (x86_pmu.er_flags & ERF_NO_HT_SHARING)) + cpuc->lbr_sel = NULL; + + if (!cpuc->shared_regs) return; - for_each_cpu(i, topology_thread_cpumask(cpu)) { - struct intel_shared_regs *pc; + if (!(x86_pmu.er_flags & ERF_NO_HT_SHARING)) { + for_each_cpu(i, topology_thread_cpumask(cpu)) { + struct intel_shared_regs *pc; - pc = per_cpu(cpu_hw_events, i).shared_regs; - if (pc && pc->core_id == core_id) { - cpuc->kfree_on_online = cpuc->shared_regs; - cpuc->shared_regs = pc; - break; + pc = per_cpu(cpu_hw_events, i).shared_regs; + if (pc && pc->core_id == core_id) { + cpuc->kfree_on_online = cpuc->shared_regs; + cpuc->shared_regs = pc; + break; + } } + cpuc->shared_regs->core_id = core_id; + cpuc->shared_regs->refcnt++; } - cpuc->shared_regs->core_id = core_id; - cpuc->shared_regs->refcnt++; + if (x86_pmu.lbr_sel_map) + cpuc->lbr_sel = &cpuc->shared_regs->regs[EXTRA_REG_LBR]; } static void intel_pmu_cpu_dying(int cpu) @@ -1488,6 +1541,18 @@ static void intel_pmu_cpu_dying(int cpu) fini_debug_store_on_cpu(cpu); } +static void intel_pmu_flush_branch_stack(void) +{ + /* + * Intel LBR does not tag entries with the + * PID of the current task, then we need to + * flush it on ctxsw + * For now, we simply reset it + */ + if (x86_pmu.lbr_nr) + intel_pmu_lbr_reset(); +} + static __initconst const struct x86_pmu intel_pmu = { .name = "Intel", .handle_irq = intel_pmu_handle_irq, @@ -1515,6 +1580,7 @@ static __initconst const struct x86_pmu intel_pmu = { .cpu_starting = intel_pmu_cpu_starting, .cpu_dying = intel_pmu_cpu_dying, .guest_get_msrs = intel_guest_get_msrs, + .flush_branch_stack = intel_pmu_flush_branch_stack, }; static __init void intel_clovertown_quirk(void) @@ -1745,7 +1811,7 @@ __init int intel_pmu_init(void) memcpy(hw_cache_event_ids, snb_hw_cache_event_ids, sizeof(hw_cache_event_ids)); - intel_pmu_lbr_init_nhm(); + intel_pmu_lbr_init_snb(); x86_pmu.event_constraints = intel_snb_event_constraints; x86_pmu.pebs_constraints = intel_snb_pebs_event_constraints; diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c index d6bd49faa40c..7f64df19e7dd 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_ds.c +++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c @@ -3,6 +3,7 @@ #include <linux/slab.h> #include <asm/perf_event.h> +#include <asm/insn.h> #include "perf_event.h" @@ -439,9 +440,6 @@ void intel_pmu_pebs_enable(struct perf_event *event) hwc->config &= ~ARCH_PERFMON_EVENTSEL_INT; cpuc->pebs_enabled |= 1ULL << hwc->idx; - - if (x86_pmu.intel_cap.pebs_trap && event->attr.precise_ip > 1) - intel_pmu_lbr_enable(event); } void intel_pmu_pebs_disable(struct perf_event *event) @@ -454,9 +452,6 @@ void intel_pmu_pebs_disable(struct perf_event *event) wrmsrl(MSR_IA32_PEBS_ENABLE, cpuc->pebs_enabled); hwc->config |= ARCH_PERFMON_EVENTSEL_INT; - - if (x86_pmu.intel_cap.pebs_trap && event->attr.precise_ip > 1) - intel_pmu_lbr_disable(event); } void intel_pmu_pebs_enable_all(void) @@ -475,17 +470,6 @@ void intel_pmu_pebs_disable_all(void) wrmsrl(MSR_IA32_PEBS_ENABLE, 0); } -#include <asm/insn.h> - -static inline bool kernel_ip(unsigned long ip) -{ -#ifdef CONFIG_X86_32 - return ip > PAGE_OFFSET; -#else - return (long)ip < 0; -#endif -} - static int intel_pmu_pebs_fixup_ip(struct pt_regs *regs) { struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); @@ -572,6 +556,7 @@ static void __intel_pmu_pebs_event(struct perf_event *event, * both formats and we don't use the other fields in this * routine. */ + struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); struct pebs_record_core *pebs = __pebs; struct perf_sample_data data; struct pt_regs regs; @@ -602,6 +587,9 @@ static void __intel_pmu_pebs_event(struct perf_event *event, else regs.flags &= ~PERF_EFLAGS_EXACT; + if (has_branch_stack(event)) + data.br_stack = &cpuc->lbr_stack; + if (perf_event_overflow(event, &data, ®s)) x86_pmu_stop(event, 0); } diff --git a/arch/x86/kernel/cpu/perf_event_intel_lbr.c b/arch/x86/kernel/cpu/perf_event_intel_lbr.c index 47a7e63bfe54..520b4265fcd2 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_lbr.c +++ b/arch/x86/kernel/cpu/perf_event_intel_lbr.c @@ -3,6 +3,7 @@ #include <asm/perf_event.h> #include <asm/msr.h> +#include <asm/insn.h> #include "perf_event.h" @@ -14,6 +15,100 @@ enum { }; /* + * Intel LBR_SELECT bits + * Intel Vol3a, April 2011, Section 16.7 Table 16-10 + * + * Hardware branch filter (not available on all CPUs) + */ +#define LBR_KERNEL_BIT 0 /* do not capture at ring0 */ +#define LBR_USER_BIT 1 /* do not capture at ring > 0 */ +#define LBR_JCC_BIT 2 /* do not capture conditional branches */ +#define LBR_REL_CALL_BIT 3 /* do not capture relative calls */ +#define LBR_IND_CALL_BIT 4 /* do not capture indirect calls */ +#define LBR_RETURN_BIT 5 /* do not capture near returns */ +#define LBR_IND_JMP_BIT 6 /* do not capture indirect jumps */ +#define LBR_REL_JMP_BIT 7 /* do not capture relative jumps */ +#define LBR_FAR_BIT 8 /* do not capture far branches */ + +#define LBR_KERNEL (1 << LBR_KERNEL_BIT) +#define LBR_USER (1 << LBR_USER_BIT) +#define LBR_JCC (1 << LBR_JCC_BIT) +#define LBR_REL_CALL (1 << LBR_REL_CALL_BIT) +#define LBR_IND_CALL (1 << LBR_IND_CALL_BIT) +#define LBR_RETURN (1 << LBR_RETURN_BIT) +#define LBR_REL_JMP (1 << LBR_REL_JMP_BIT) +#define LBR_IND_JMP (1 << LBR_IND_JMP_BIT) +#define LBR_FAR (1 << LBR_FAR_BIT) + +#define LBR_PLM (LBR_KERNEL | LBR_USER) + +#define LBR_SEL_MASK 0x1ff /* valid bits in LBR_SELECT */ +#define LBR_NOT_SUPP -1 /* LBR filter not supported */ +#define LBR_IGN 0 /* ignored */ + +#define LBR_ANY \ + (LBR_JCC |\ + LBR_REL_CALL |\ + LBR_IND_CALL |\ + LBR_RETURN |\ + LBR_REL_JMP |\ + LBR_IND_JMP |\ + LBR_FAR) + +#define LBR_FROM_FLAG_MISPRED (1ULL << 63) + +#define for_each_branch_sample_type(x) \ + for ((x) = PERF_SAMPLE_BRANCH_USER; \ + (x) < PERF_SAMPLE_BRANCH_MAX; (x) <<= 1) + +/* + * x86control flow change classification + * x86control flow changes include branches, interrupts, traps, faults + */ +enum { + X86_BR_NONE = 0, /* unknown */ + + X86_BR_USER = 1 << 0, /* branch target is user */ + X86_BR_KERNEL = 1 << 1, /* branch target is kernel */ + + X86_BR_CALL = 1 << 2, /* call */ + X86_BR_RET = 1 << 3, /* return */ + X86_BR_SYSCALL = 1 << 4, /* syscall */ + X86_BR_SYSRET = 1 << 5, /* syscall return */ + X86_BR_INT = 1 << 6, /* sw interrupt */ + X86_BR_IRET = 1 << 7, /* return from interrupt */ + X86_BR_JCC = 1 << 8, /* conditional */ + X86_BR_JMP = 1 << 9, /* jump */ + X86_BR_IRQ = 1 << 10,/* hw interrupt or trap or fault */ + X86_BR_IND_CALL = 1 << 11,/* indirect calls */ +}; + +#define X86_BR_PLM (X86_BR_USER | X86_BR_KERNEL) + +#define X86_BR_ANY \ + (X86_BR_CALL |\ + X86_BR_RET |\ + X86_BR_SYSCALL |\ + X86_BR_SYSRET |\ + X86_BR_INT |\ + X86_BR_IRET |\ + X86_BR_JCC |\ + X86_BR_JMP |\ + X86_BR_IRQ |\ + X86_BR_IND_CALL) + +#define X86_BR_ALL (X86_BR_PLM | X86_BR_ANY) + +#define X86_BR_ANY_CALL \ + (X86_BR_CALL |\ + X86_BR_IND_CALL |\ + X86_BR_SYSCALL |\ + X86_BR_IRQ |\ + X86_BR_INT) + +static void intel_pmu_lbr_filter(struct cpu_hw_events *cpuc); + +/* * We only support LBR implementations that have FREEZE_LBRS_ON_PMI * otherwise it becomes near impossible to get a reliable stack. */ @@ -21,6 +116,10 @@ enum { static void __intel_pmu_lbr_enable(void) { u64 debugctl; + struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + + if (cpuc->lbr_sel) + wrmsrl(MSR_LBR_SELECT, cpuc->lbr_sel->config); rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctl); debugctl |= (DEBUGCTLMSR_LBR | DEBUGCTLMSR_FREEZE_LBRS_ON_PMI); @@ -76,11 +175,11 @@ void intel_pmu_lbr_enable(struct perf_event *event) * Reset the LBR stack if we changed task context to * avoid data leaks. */ - if (event->ctx->task && cpuc->lbr_context != event->ctx) { intel_pmu_lbr_reset(); cpuc->lbr_context = event->ctx; } + cpuc->br_sel = event->hw.branch_reg.reg; cpuc->lbr_users++; } @@ -95,8 +194,11 @@ void intel_pmu_lbr_disable(struct perf_event *event) cpuc->lbr_users--; WARN_ON_ONCE(cpuc->lbr_users < 0); - if (cpuc->enabled && !cpuc->lbr_users) + if (cpuc->enabled && !cpuc->lbr_users) { __intel_pmu_lbr_disable(); + /* avoid stale pointer */ + cpuc->lbr_context = NULL; + } } void intel_pmu_lbr_enable_all(void) @@ -115,6 +217,9 @@ void intel_pmu_lbr_disable_all(void) __intel_pmu_lbr_disable(); } +/* + * TOS = most recently recorded branch + */ static inline u64 intel_pmu_lbr_tos(void) { u64 tos; @@ -142,15 +247,15 @@ static void intel_pmu_lbr_read_32(struct cpu_hw_events *cpuc) rdmsrl(x86_pmu.lbr_from + lbr_idx, msr_lastbranch.lbr); - cpuc->lbr_entries[i].from = msr_lastbranch.from; - cpuc->lbr_entries[i].to = msr_lastbranch.to; - cpuc->lbr_entries[i].flags = 0; + cpuc->lbr_entries[i].from = msr_lastbranch.from; + cpuc->lbr_entries[i].to = msr_lastbranch.to; + cpuc->lbr_entries[i].mispred = 0; + cpuc->lbr_entries[i].predicted = 0; + cpuc->lbr_entries[i].reserved = 0; } cpuc->lbr_stack.nr = i; } -#define LBR_FROM_FLAG_MISPRED (1ULL << 63) - /* * Due to lack of segmentation in Linux the effective address (offset) * is the same as the linear address, allowing us to merge the LIP and EIP @@ -165,19 +270,22 @@ static void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc) for (i = 0; i < x86_pmu.lbr_nr; i++) { unsigned long lbr_idx = (tos - i) & mask; - u64 from, to, flags = 0; + u64 from, to, mis = 0, pred = 0; rdmsrl(x86_pmu.lbr_from + lbr_idx, from); rdmsrl(x86_pmu.lbr_to + lbr_idx, to); if (lbr_format == LBR_FORMAT_EIP_FLAGS) { - flags = !!(from & LBR_FROM_FLAG_MISPRED); + mis = !!(from & LBR_FROM_FLAG_MISPRED); + pred = !mis; from = (u64)((((s64)from) << 1) >> 1); } - cpuc->lbr_entries[i].from = from; - cpuc->lbr_entries[i].to = to; - cpuc->lbr_entries[i].flags = flags; + cpuc->lbr_entries[i].from = from; + cpuc->lbr_entries[i].to = to; + cpuc->lbr_entries[i].mispred = mis; + cpuc->lbr_entries[i].predicted = pred; + cpuc->lbr_entries[i].reserved = 0; } cpuc->lbr_stack.nr = i; } @@ -193,28 +301,404 @@ void intel_pmu_lbr_read(void) intel_pmu_lbr_read_32(cpuc); else intel_pmu_lbr_read_64(cpuc); + + intel_pmu_lbr_filter(cpuc); +} + +/* + * SW filter is used: + * - in case there is no HW filter + * - in case the HW filter has errata or limitations + */ +static void intel_pmu_setup_sw_lbr_filter(struct perf_event *event) +{ + u64 br_type = event->attr.branch_sample_type; + int mask = 0; + + if (br_type & PERF_SAMPLE_BRANCH_USER) + mask |= X86_BR_USER; + + if (br_type & PERF_SAMPLE_BRANCH_KERNEL) + mask |= X86_BR_KERNEL; + + /* we ignore BRANCH_HV here */ + + if (br_type & PERF_SAMPLE_BRANCH_ANY) + mask |= X86_BR_ANY; + + if (br_type & PERF_SAMPLE_BRANCH_ANY_CALL) + mask |= X86_BR_ANY_CALL; + + if (br_type & PERF_SAMPLE_BRANCH_ANY_RETURN) + mask |= X86_BR_RET | X86_BR_IRET | X86_BR_SYSRET; + + if (br_type & PERF_SAMPLE_BRANCH_IND_CALL) + mask |= X86_BR_IND_CALL; + /* + * stash actual user request into reg, it may + * be used by fixup code for some CPU + */ + event->hw.branch_reg.reg = mask; +} + +/* + * setup the HW LBR filter + * Used only when available, may not be enough to disambiguate + * all branches, may need the help of the SW filter + */ +static int intel_pmu_setup_hw_lbr_filter(struct perf_event *event) +{ + struct hw_perf_event_extra *reg; + u64 br_type = event->attr.branch_sample_type; + u64 mask = 0, m; + u64 v; + + for_each_branch_sample_type(m) { + if (!(br_type & m)) + continue; + + v = x86_pmu.lbr_sel_map[m]; + if (v == LBR_NOT_SUPP) + return -EOPNOTSUPP; + + if (v != LBR_IGN) + mask |= v; + } + reg = &event->hw.branch_reg; + reg->idx = EXTRA_REG_LBR; + + /* LBR_SELECT operates in suppress mode so invert mask */ + reg->config = ~mask & x86_pmu.lbr_sel_mask; + + return 0; +} + +int intel_pmu_setup_lbr_filter(struct perf_event *event) +{ + int ret = 0; + + /* + * no LBR on this PMU + */ + if (!x86_pmu.lbr_nr) + return -EOPNOTSUPP; + + /* + * setup SW LBR filter + */ + intel_pmu_setup_sw_lbr_filter(event); + + /* + * setup HW LBR filter, if any + */ + if (x86_pmu.lbr_sel_map) + ret = intel_pmu_setup_hw_lbr_filter(event); + + return ret; } +/* + * return the type of control flow change at address "from" + * intruction is not necessarily a branch (in case of interrupt). + * + * The branch type returned also includes the priv level of the + * target of the control flow change (X86_BR_USER, X86_BR_KERNEL). + * + * If a branch type is unknown OR the instruction cannot be + * decoded (e.g., text page not present), then X86_BR_NONE is + * returned. + */ +static int branch_type(unsigned long from, unsigned long to) +{ + struct insn insn; + void *addr; + int bytes, size = MAX_INSN_SIZE; + int ret = X86_BR_NONE; + int ext, to_plm, from_plm; + u8 buf[MAX_INSN_SIZE]; + int is64 = 0; + + to_plm = kernel_ip(to) ? X86_BR_KERNEL : X86_BR_USER; + from_plm = kernel_ip(from) ? X86_BR_KERNEL : X86_BR_USER; + + /* + * maybe zero if lbr did not fill up after a reset by the time + * we get a PMU interrupt + */ + if (from == 0 || to == 0) + return X86_BR_NONE; + + if (from_plm == X86_BR_USER) { + /* + * can happen if measuring at the user level only + * and we interrupt in a kernel thread, e.g., idle. + */ + if (!current->mm) + return X86_BR_NONE; + + /* may fail if text not present */ + bytes = copy_from_user_nmi(buf, (void __user *)from, size); + if (bytes != size) + return X86_BR_NONE; + + addr = buf; + } else + addr = (void *)from; + + /* + * decoder needs to know the ABI especially + * on 64-bit systems running 32-bit apps + */ +#ifdef CONFIG_X86_64 + is64 = kernel_ip((unsigned long)addr) || !test_thread_flag(TIF_IA32); +#endif + insn_init(&insn, addr, is64); + insn_get_opcode(&insn); + + switch (insn.opcode.bytes[0]) { + case 0xf: + switch (insn.opcode.bytes[1]) { + case 0x05: /* syscall */ + case 0x34: /* sysenter */ + ret = X86_BR_SYSCALL; + break; + case 0x07: /* sysret */ + case 0x35: /* sysexit */ + ret = X86_BR_SYSRET; + break; + case 0x80 ... 0x8f: /* conditional */ + ret = X86_BR_JCC; + break; + default: + ret = X86_BR_NONE; + } + break; + case 0x70 ... 0x7f: /* conditional */ + ret = X86_BR_JCC; + break; + case 0xc2: /* near ret */ + case 0xc3: /* near ret */ + case 0xca: /* far ret */ + case 0xcb: /* far ret */ + ret = X86_BR_RET; + break; + case 0xcf: /* iret */ + ret = X86_BR_IRET; + break; + case 0xcc ... 0xce: /* int */ + ret = X86_BR_INT; + break; + case 0xe8: /* call near rel */ + case 0x9a: /* call far absolute */ + ret = X86_BR_CALL; + break; + case 0xe0 ... 0xe3: /* loop jmp */ + ret = X86_BR_JCC; + break; + case 0xe9 ... 0xeb: /* jmp */ + ret = X86_BR_JMP; + break; + case 0xff: /* call near absolute, call far absolute ind */ + insn_get_modrm(&insn); + ext = (insn.modrm.bytes[0] >> 3) & 0x7; + switch (ext) { + case 2: /* near ind call */ + case 3: /* far ind call */ + ret = X86_BR_IND_CALL; + break; + case 4: + case 5: + ret = X86_BR_JMP; + break; + } + break; + default: + ret = X86_BR_NONE; + } + /* + * interrupts, traps, faults (and thus ring transition) may + * occur on any instructions. Thus, to classify them correctly, + * we need to first look at the from and to priv levels. If they + * are different and to is in the kernel, then it indicates + * a ring transition. If the from instruction is not a ring + * transition instr (syscall, systenter, int), then it means + * it was a irq, trap or fault. + * + * we have no way of detecting kernel to kernel faults. + */ + if (from_plm == X86_BR_USER && to_plm == X86_BR_KERNEL + && ret != X86_BR_SYSCALL && ret != X86_BR_INT) + ret = X86_BR_IRQ; + + /* + * branch priv level determined by target as + * is done by HW when LBR_SELECT is implemented + */ + if (ret != X86_BR_NONE) + ret |= to_plm; + + return ret; +} + +/* + * implement actual branch filter based on user demand. + * Hardware may not exactly satisfy that request, thus + * we need to inspect opcodes. Mismatched branches are + * discarded. Therefore, the number of branches returned + * in PERF_SAMPLE_BRANCH_STACK sample may vary. + */ +static void +intel_pmu_lbr_filter(struct cpu_hw_events *cpuc) +{ + u64 from, to; + int br_sel = cpuc->br_sel; + int i, j, type; + bool compress = false; + + /* if sampling all branches, then nothing to filter */ + if ((br_sel & X86_BR_ALL) == X86_BR_ALL) + return; + + for (i = 0; i < cpuc->lbr_stack.nr; i++) { + + from = cpuc->lbr_entries[i].from; + to = cpuc->lbr_entries[i].to; + + type = branch_type(from, to); + + /* if type does not correspond, then discard */ + if (type == X86_BR_NONE || (br_sel & type) != type) { + cpuc->lbr_entries[i].from = 0; + compress = true; + } + } + + if (!compress) + return; + + /* remove all entries with from=0 */ + for (i = 0; i < cpuc->lbr_stack.nr; ) { + if (!cpuc->lbr_entries[i].from) { + j = i; + while (++j < cpuc->lbr_stack.nr) + cpuc->lbr_entries[j-1] = cpuc->lbr_entries[j]; + cpuc->lbr_stack.nr--; + if (!cpuc->lbr_entries[i].from) + continue; + } + i++; + } +} + +/* + * Map interface branch filters onto LBR filters + */ +static const int nhm_lbr_sel_map[PERF_SAMPLE_BRANCH_MAX] = { + [PERF_SAMPLE_BRANCH_ANY] = LBR_ANY, + [PERF_SAMPLE_BRANCH_USER] = LBR_USER, + [PERF_SAMPLE_BRANCH_KERNEL] = LBR_KERNEL, + [PERF_SAMPLE_BRANCH_HV] = LBR_IGN, + [PERF_SAMPLE_BRANCH_ANY_RETURN] = LBR_RETURN | LBR_REL_JMP + | LBR_IND_JMP | LBR_FAR, + /* + * NHM/WSM erratum: must include REL_JMP+IND_JMP to get CALL branches + */ + [PERF_SAMPLE_BRANCH_ANY_CALL] = + LBR_REL_CALL | LBR_IND_CALL | LBR_REL_JMP | LBR_IND_JMP | LBR_FAR, + /* + * NHM/WSM erratum: must include IND_JMP to capture IND_CALL + */ + [PERF_SAMPLE_BRANCH_IND_CALL] = LBR_IND_CALL | LBR_IND_JMP, +}; + +static const int snb_lbr_sel_map[PERF_SAMPLE_BRANCH_MAX] = { + [PERF_SAMPLE_BRANCH_ANY] = LBR_ANY, + [PERF_SAMPLE_BRANCH_USER] = LBR_USER, + [PERF_SAMPLE_BRANCH_KERNEL] = LBR_KERNEL, + [PERF_SAMPLE_BRANCH_HV] = LBR_IGN, + [PERF_SAMPLE_BRANCH_ANY_RETURN] = LBR_RETURN | LBR_FAR, + [PERF_SAMPLE_BRANCH_ANY_CALL] = LBR_REL_CALL | LBR_IND_CALL + | LBR_FAR, + [PERF_SAMPLE_BRANCH_IND_CALL] = LBR_IND_CALL, +}; + +/* core */ void intel_pmu_lbr_init_core(void) { x86_pmu.lbr_nr = 4; - x86_pmu.lbr_tos = 0x01c9; - x86_pmu.lbr_from = 0x40; - x86_pmu.lbr_to = 0x60; + x86_pmu.lbr_tos = MSR_LBR_TOS; + x86_pmu.lbr_from = MSR_LBR_CORE_FROM; + x86_pmu.lbr_to = MSR_LBR_CORE_TO; + + /* + * SW branch filter usage: + * - compensate for lack of HW filter + */ + pr_cont("4-deep LBR, "); } +/* nehalem/westmere */ void intel_pmu_lbr_init_nhm(void) { x86_pmu.lbr_nr = 16; - x86_pmu.lbr_tos = 0x01c9; - x86_pmu.lbr_from = 0x680; - x86_pmu.lbr_to = 0x6c0; + x86_pmu.lbr_tos = MSR_LBR_TOS; + x86_pmu.lbr_from = MSR_LBR_NHM_FROM; + x86_pmu.lbr_to = MSR_LBR_NHM_TO; + + x86_pmu.lbr_sel_mask = LBR_SEL_MASK; + x86_pmu.lbr_sel_map = nhm_lbr_sel_map; + + /* + * SW branch filter usage: + * - workaround LBR_SEL errata (see above) + * - support syscall, sysret capture. + * That requires LBR_FAR but that means far + * jmp need to be filtered out + */ + pr_cont("16-deep LBR, "); +} + +/* sandy bridge */ +void intel_pmu_lbr_init_snb(void) +{ + x86_pmu.lbr_nr = 16; + x86_pmu.lbr_tos = MSR_LBR_TOS; + x86_pmu.lbr_from = MSR_LBR_NHM_FROM; + x86_pmu.lbr_to = MSR_LBR_NHM_TO; + + x86_pmu.lbr_sel_mask = LBR_SEL_MASK; + x86_pmu.lbr_sel_map = snb_lbr_sel_map; + + /* + * SW branch filter usage: + * - support syscall, sysret capture. + * That requires LBR_FAR but that means far + * jmp need to be filtered out + */ + pr_cont("16-deep LBR, "); } +/* atom */ void intel_pmu_lbr_init_atom(void) { + /* + * only models starting at stepping 10 seems + * to have an operational LBR which can freeze + * on PMU interrupt + */ + if (boot_cpu_data.x86_mask < 10) { + pr_cont("LBR disabled due to erratum"); + return; + } + x86_pmu.lbr_nr = 8; - x86_pmu.lbr_tos = 0x01c9; - x86_pmu.lbr_from = 0x40; - x86_pmu.lbr_to = 0x60; + x86_pmu.lbr_tos = MSR_LBR_TOS; + x86_pmu.lbr_from = MSR_LBR_CORE_FROM; + x86_pmu.lbr_to = MSR_LBR_CORE_TO; + + /* + * SW branch filter usage: + * - compensate for lack of HW filter + */ + pr_cont("8-deep LBR, "); } diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 64426b71381f..bd9f55a5958d 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -129,11 +129,40 @@ enum perf_event_sample_format { PERF_SAMPLE_PERIOD = 1U << 8, PERF_SAMPLE_STREAM_ID = 1U << 9, PERF_SAMPLE_RAW = 1U << 10, + PERF_SAMPLE_BRANCH_STACK = 1U << 11, - PERF_SAMPLE_MAX = 1U << 11, /* non-ABI */ + PERF_SAMPLE_MAX = 1U << 12, /* non-ABI */ }; /* + * values to program into branch_sample_type when PERF_SAMPLE_BRANCH is set + * + * If the user does not pass priv level information via branch_sample_type, + * the kernel uses the event's priv level. Branch and event priv levels do + * not have to match. Branch priv level is checked for permissions. + * + * The branch types can be combined, however BRANCH_ANY covers all types + * of branches and therefore it supersedes all the other types. + */ +enum perf_branch_sample_type { + PERF_SAMPLE_BRANCH_USER = 1U << 0, /* user branches */ + PERF_SAMPLE_BRANCH_KERNEL = 1U << 1, /* kernel branches */ + PERF_SAMPLE_BRANCH_HV = 1U << 2, /* hypervisor branches */ + + PERF_SAMPLE_BRANCH_ANY = 1U << 3, /* any branch types */ + PERF_SAMPLE_BRANCH_ANY_CALL = 1U << 4, /* any call branch */ + PERF_SAMPLE_BRANCH_ANY_RETURN = 1U << 5, /* any return branch */ + PERF_SAMPLE_BRANCH_IND_CALL = 1U << 6, /* indirect calls */ + + PERF_SAMPLE_BRANCH_MAX = 1U << 7, /* non-ABI */ +}; + +#define PERF_SAMPLE_BRANCH_PLM_ALL \ + (PERF_SAMPLE_BRANCH_USER|\ + PERF_SAMPLE_BRANCH_KERNEL|\ + PERF_SAMPLE_BRANCH_HV) + +/* * The format of the data returned by read() on a perf event fd, * as specified by attr.read_format: * @@ -163,6 +192,8 @@ enum perf_event_read_format { }; #define PERF_ATTR_SIZE_VER0 64 /* sizeof first published struct */ +#define PERF_ATTR_SIZE_VER1 72 /* add: config2 */ +#define PERF_ATTR_SIZE_VER2 80 /* add: branch_sample_type */ /* * Hardware event_id to monitor via a performance monitoring event: @@ -240,6 +271,7 @@ struct perf_event_attr { __u64 bp_len; __u64 config2; /* extension of config1 */ }; + __u64 branch_sample_type; /* enum branch_sample_type */ }; /* @@ -458,6 +490,8 @@ enum perf_event_type { * * { u32 size; * char data[size];}&& PERF_SAMPLE_RAW + * + * { u64 from, to, flags } lbr[nr];} && PERF_SAMPLE_BRANCH_STACK * }; */ PERF_RECORD_SAMPLE = 9, @@ -530,12 +564,34 @@ struct perf_raw_record { void *data; }; +/* + * single taken branch record layout: + * + * from: source instruction (may not always be a branch insn) + * to: branch target + * mispred: branch target was mispredicted + * predicted: branch target was predicted + * + * support for mispred, predicted is optional. In case it + * is not supported mispred = predicted = 0. + */ struct perf_branch_entry { - __u64 from; - __u64 to; - __u64 flags; + __u64 from; + __u64 to; + __u64 mispred:1, /* target mispredicted */ + predicted:1,/* target predicted */ + reserved:62; }; +/* + * branch stack layout: + * nr: number of taken branches stored in entries[] + * + * Note that nr can vary from sample to sample + * branches (to, from) are stored from most recent + * to least recent, i.e., entries[0] contains the most + * recent branch. + */ struct perf_branch_stack { __u64 nr; struct perf_branch_entry entries[0]; @@ -566,7 +622,9 @@ struct hw_perf_event { unsigned long event_base; int idx; int last_cpu; + struct hw_perf_event_extra extra_reg; + struct hw_perf_event_extra branch_reg; }; struct { /* software */ struct hrtimer hrtimer; @@ -690,6 +748,11 @@ struct pmu { * if no implementation is provided it will default to: event->hw.idx + 1. */ int (*event_idx) (struct perf_event *event); /*optional */ + + /* + * flush branch stack on context-switches (needed in cpu-wide mode) + */ + void (*flush_branch_stack) (void); }; /** @@ -923,7 +986,8 @@ struct perf_event_context { u64 parent_gen; u64 generation; int pin_count; - int nr_cgroups; /* cgroup events present */ + int nr_cgroups; /* cgroup evts */ + int nr_branch_stack; /* branch_stack evt */ struct rcu_head rcu_head; }; @@ -988,6 +1052,7 @@ perf_event_create_kernel_counter(struct perf_event_attr *attr, extern u64 perf_event_read_value(struct perf_event *event, u64 *enabled, u64 *running); + struct perf_sample_data { u64 type; @@ -1007,12 +1072,14 @@ struct perf_sample_data { u64 period; struct perf_callchain_entry *callchain; struct perf_raw_record *raw; + struct perf_branch_stack *br_stack; }; static inline void perf_sample_data_init(struct perf_sample_data *data, u64 addr) { data->addr = addr; data->raw = NULL; + data->br_stack = NULL; } extern void perf_output_sample(struct perf_output_handle *handle, @@ -1151,6 +1218,11 @@ extern void perf_bp_event(struct perf_event *event, void *data); # define perf_instruction_pointer(regs) instruction_pointer(regs) #endif +static inline bool has_branch_stack(struct perf_event *event) +{ + return event->attr.sample_type & PERF_SAMPLE_BRANCH_STACK; +} + extern int perf_output_begin(struct perf_output_handle *handle, struct perf_event *event, unsigned int size); extern void perf_output_end(struct perf_output_handle *handle); diff --git a/kernel/events/core.c b/kernel/events/core.c index e8b32ac75ce3..c61234b1a988 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -118,6 +118,13 @@ static int cpu_function_call(int cpu, int (*func) (void *info), void *info) PERF_FLAG_FD_OUTPUT |\ PERF_FLAG_PID_CGROUP) +/* + * branch priv levels that need permission checks + */ +#define PERF_SAMPLE_BRANCH_PERM_PLM \ + (PERF_SAMPLE_BRANCH_KERNEL |\ + PERF_SAMPLE_BRANCH_HV) + enum event_type_t { EVENT_FLEXIBLE = 0x1, EVENT_PINNED = 0x2, @@ -130,6 +137,7 @@ enum event_type_t { */ struct static_key_deferred perf_sched_events __read_mostly; static DEFINE_PER_CPU(atomic_t, perf_cgroup_events); +static DEFINE_PER_CPU(atomic_t, perf_branch_stack_events); static atomic_t nr_mmap_events __read_mostly; static atomic_t nr_comm_events __read_mostly; @@ -881,6 +889,9 @@ list_add_event(struct perf_event *event, struct perf_event_context *ctx) if (is_cgroup_event(event)) ctx->nr_cgroups++; + if (has_branch_stack(event)) + ctx->nr_branch_stack++; + list_add_rcu(&event->event_entry, &ctx->event_list); if (!ctx->nr_events) perf_pmu_rotate_start(ctx->pmu); @@ -1020,6 +1031,9 @@ list_del_event(struct perf_event *event, struct perf_event_context *ctx) cpuctx->cgrp = NULL; } + if (has_branch_stack(event)) + ctx->nr_branch_stack--; + ctx->nr_events--; if (event->attr.inherit_stat) ctx->nr_stat--; @@ -2195,6 +2209,66 @@ static void perf_event_context_sched_in(struct perf_event_context *ctx, } /* + * When sampling the branck stack in system-wide, it may be necessary + * to flush the stack on context switch. This happens when the branch + * stack does not tag its entries with the pid of the current task. + * Otherwise it becomes impossible to associate a branch entry with a + * task. This ambiguity is more likely to appear when the branch stack + * supports priv level filtering and the user sets it to monitor only + * at the user level (which could be a useful measurement in system-wide + * mode). In that case, the risk is high of having a branch stack with + * branch from multiple tasks. Flushing may mean dropping the existing + * entries or stashing them somewhere in the PMU specific code layer. + * + * This function provides the context switch callback to the lower code + * layer. It is invoked ONLY when there is at least one system-wide context + * with at least one active event using taken branch sampling. + */ +static void perf_branch_stack_sched_in(struct task_struct *prev, + struct task_struct *task) +{ + struct perf_cpu_context *cpuctx; + struct pmu *pmu; + unsigned long flags; + + /* no need to flush branch stack if not changing task */ + if (prev == task) + return; + + local_irq_save(flags); + + rcu_read_lock(); + + list_for_each_entry_rcu(pmu, &pmus, entry) { + cpuctx = this_cpu_ptr(pmu->pmu_cpu_context); + + /* + * check if the context has at least one + * event using PERF_SAMPLE_BRANCH_STACK + */ + if (cpuctx->ctx.nr_branch_stack > 0 + && pmu->flush_branch_stack) { + + pmu = cpuctx->ctx.pmu; + + perf_ctx_lock(cpuctx, cpuctx->task_ctx); + + perf_pmu_disable(pmu); + + pmu->flush_branch_stack(); + + perf_pmu_enable(pmu); + + perf_ctx_unlock(cpuctx, cpuctx->task_ctx); + } + } + + rcu_read_unlock(); + + local_irq_restore(flags); +} + +/* * Called from scheduler to add the events of the current task * with interrupts disabled. * @@ -2225,6 +2299,10 @@ void __perf_event_task_sched_in(struct task_struct *prev, */ if (atomic_read(&__get_cpu_var(perf_cgroup_events))) perf_cgroup_sched_in(prev, task); + + /* check for system-wide branch_stack events */ + if (atomic_read(&__get_cpu_var(perf_branch_stack_events))) + perf_branch_stack_sched_in(prev, task); } static u64 perf_calculate_period(struct perf_event *event, u64 nsec, u64 count) @@ -2791,6 +2869,14 @@ static void free_event(struct perf_event *event) atomic_dec(&per_cpu(perf_cgroup_events, event->cpu)); static_key_slow_dec_deferred(&perf_sched_events); } + + if (has_branch_stack(event)) { + static_key_slow_dec_deferred(&perf_sched_events); + /* is system-wide event */ + if (!(event->attach_state & PERF_ATTACH_TASK)) + atomic_dec(&per_cpu(perf_branch_stack_events, + event->cpu)); + } } if (event->rb) { @@ -3907,6 +3993,24 @@ void perf_output_sample(struct perf_output_handle *handle, } } } + + if (sample_type & PERF_SAMPLE_BRANCH_STACK) { + if (data->br_stack) { + size_t size; + + size = data->br_stack->nr + * sizeof(struct perf_branch_entry); + + perf_output_put(handle, data->br_stack->nr); + perf_output_copy(handle, data->br_stack->entries, size); + } else { + /* + * we always store at least the value of nr + */ + u64 nr = 0; + perf_output_put(handle, nr); + } + } } void perf_prepare_sample(struct perf_event_header *header, @@ -3949,6 +4053,15 @@ void perf_prepare_sample(struct perf_event_header *header, WARN_ON_ONCE(size & (sizeof(u64)-1)); header->size += size; } + + if (sample_type & PERF_SAMPLE_BRANCH_STACK) { + int size = sizeof(u64); /* nr */ + if (data->br_stack) { + size += data->br_stack->nr + * sizeof(struct perf_branch_entry); + } + header->size += size; + } } static void perf_event_output(struct perf_event *event, @@ -5010,6 +5123,12 @@ static int perf_swevent_init(struct perf_event *event) if (event->attr.type != PERF_TYPE_SOFTWARE) return -ENOENT; + /* + * no branch sampling for software events + */ + if (has_branch_stack(event)) + return -EOPNOTSUPP; + switch (event_id) { case PERF_COUNT_SW_CPU_CLOCK: case PERF_COUNT_SW_TASK_CLOCK: @@ -5120,6 +5239,12 @@ static int perf_tp_event_init(struct perf_event *event) if (event->attr.type != PERF_TYPE_TRACEPOINT) return -ENOENT; + /* + * no branch sampling for tracepoint events + */ + if (has_branch_stack(event)) + return -EOPNOTSUPP; + err = perf_trace_init(event); if (err) return err; @@ -5345,6 +5470,12 @@ static int cpu_clock_event_init(struct perf_event *event) if (event->attr.config != PERF_COUNT_SW_CPU_CLOCK) return -ENOENT; + /* + * no branch sampling for software events + */ + if (has_branch_stack(event)) + return -EOPNOTSUPP; + perf_swevent_init_hrtimer(event); return 0; @@ -5419,6 +5550,12 @@ static int task_clock_event_init(struct perf_event *event) if (event->attr.config != PERF_COUNT_SW_TASK_CLOCK) return -ENOENT; + /* + * no branch sampling for software events + */ + if (has_branch_stack(event)) + return -EOPNOTSUPP; + perf_swevent_init_hrtimer(event); return 0; @@ -5866,6 +6003,12 @@ done: return ERR_PTR(err); } } + if (has_branch_stack(event)) { + static_key_slow_inc(&perf_sched_events.key); + if (!(event->attach_state & PERF_ATTACH_TASK)) + atomic_inc(&per_cpu(perf_branch_stack_events, + event->cpu)); + } } return event; @@ -5935,6 +6078,40 @@ static int perf_copy_attr(struct perf_event_attr __user *uattr, if (attr->read_format & ~(PERF_FORMAT_MAX-1)) return -EINVAL; + if (attr->sample_type & PERF_SAMPLE_BRANCH_STACK) { + u64 mask = attr->branch_sample_type; + + /* only using defined bits */ + if (mask & ~(PERF_SAMPLE_BRANCH_MAX-1)) + return -EINVAL; + + /* at least one branch bit must be set */ + if (!(mask & ~PERF_SAMPLE_BRANCH_PLM_ALL)) + return -EINVAL; + + /* kernel level capture: check permissions */ + if ((mask & PERF_SAMPLE_BRANCH_PERM_PLM) + && perf_paranoid_kernel() && !capable(CAP_SYS_ADMIN)) + return -EACCES; + + /* propagate priv level, when not set for branch */ + if (!(mask & PERF_SAMPLE_BRANCH_PLM_ALL)) { + + /* exclude_kernel checked on syscall entry */ + if (!attr->exclude_kernel) + mask |= PERF_SAMPLE_BRANCH_KERNEL; + + if (!attr->exclude_user) + mask |= PERF_SAMPLE_BRANCH_USER; + + if (!attr->exclude_hv) + mask |= PERF_SAMPLE_BRANCH_HV; + /* + * adjust user setting (for HW filter setup) + */ + attr->branch_sample_type = mask; + } + } out: return ret; diff --git a/kernel/events/hw_breakpoint.c b/kernel/events/hw_breakpoint.c index 3330022a7ac1..bb38c4d3ee12 100644 --- a/kernel/events/hw_breakpoint.c +++ b/kernel/events/hw_breakpoint.c @@ -581,6 +581,12 @@ static int hw_breakpoint_event_init(struct perf_event *bp) if (bp->attr.type != PERF_TYPE_BREAKPOINT) return -ENOENT; + /* + * no branch sampling for breakpoint events + */ + if (has_branch_stack(bp)) + return -EOPNOTSUPP; + err = register_perf_hw_breakpoint(bp); if (err) return err; diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt index a5766b4b0125..a1386b2fff00 100644 --- a/tools/perf/Documentation/perf-record.txt +++ b/tools/perf/Documentation/perf-record.txt @@ -152,6 +152,36 @@ an empty cgroup (monitor all the time) using, e.g., -G foo,,bar. Cgroups must ha corresponding events, i.e., they always refer to events defined earlier on the command line. +-b:: +--branch-any:: +Enable taken branch stack sampling. Any type of taken branch may be sampled. +This is a shortcut for --branch-filter any. See --branch-filter for more infos. + +-j:: +--branch-filter:: +Enable taken branch stack sampling. Each sample captures a series of consecutive +taken branches. The number of branches captured with each sample depends on the +underlying hardware, the type of branches of interest, and the executed code. +It is possible to select the types of branches captured by enabling filters. The +following filters are defined: + + - any: any type of branches + - any_call: any function call or system call + - any_ret: any function return or system call return + - any_ind: any indirect branch + - u: only when the branch target is at the user level + - k: only when the branch target is in the kernel + - hv: only when the target is at the hypervisor level + ++ +The option requires at least one branch type among any, any_call, any_ret, ind_call. +The privilege levels may be ommitted, in which case, the privilege levels of the associated +event are applied to the branch filter. Both kernel (k) and hypervisor (hv) privilege +levels are subject to permissions. When sampling on multiple events, branch stack sampling +is enabled for all the sampling events. The sampled branch type is the same for all events. +The various filters must be specified as a comma separated list: --branch-filter any_ret,u,k +Note that this feature may not be available on all processors. + SEE ALSO -------- linkperf:perf-stat[1], linkperf:perf-list[1] diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt index 9b430e98712e..87feeee8b90c 100644 --- a/tools/perf/Documentation/perf-report.txt +++ b/tools/perf/Documentation/perf-report.txt @@ -153,6 +153,16 @@ OPTIONS information which may be very large and thus may clutter the display. It currently includes: cpu and numa topology of the host system. +-b:: +--branch-stack:: + Use the addresses of sampled taken branches instead of the instruction + address to build the histograms. To generate meaningful output, the + perf.data file must have been obtained using perf record -b or + perf record --branch-filter xxx where xxx is a branch filter option. + perf report is able to auto-detect whether a perf.data file contains + branch stacks and it will automatically switch to the branch view mode, + unless --no-branch-stack is used. + SEE ALSO -------- linkperf:perf-stat[1], linkperf:perf-annotate[1] diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 75d230fef202..be4e1eee782e 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -473,6 +473,9 @@ static int __cmd_record(struct perf_record *rec, int argc, const char **argv) if (!have_tracepoints(&evsel_list->entries)) perf_header__clear_feat(&session->header, HEADER_TRACE_INFO); + if (!rec->opts.branch_stack) + perf_header__clear_feat(&session->header, HEADER_BRANCH_STACK); + if (!rec->file_new) { err = perf_session__read_header(session, output); if (err < 0) @@ -638,6 +641,90 @@ out_delete_session: return err; } +#define BRANCH_OPT(n, m) \ + { .name = n, .mode = (m) } + +#define BRANCH_END { .name = NULL } + +struct branch_mode { + const char *name; + int mode; +}; + +static const struct branch_mode branch_modes[] = { + BRANCH_OPT("u", PERF_SAMPLE_BRANCH_USER), + BRANCH_OPT("k", PERF_SAMPLE_BRANCH_KERNEL), + BRANCH_OPT("hv", PERF_SAMPLE_BRANCH_HV), + BRANCH_OPT("any", PERF_SAMPLE_BRANCH_ANY), + BRANCH_OPT("any_call", PERF_SAMPLE_BRANCH_ANY_CALL), + BRANCH_OPT("any_ret", PERF_SAMPLE_BRANCH_ANY_RETURN), + BRANCH_OPT("ind_call", PERF_SAMPLE_BRANCH_IND_CALL), + BRANCH_END +}; + +static int +parse_branch_stack(const struct option *opt, const char *str, int unset) +{ +#define ONLY_PLM \ + (PERF_SAMPLE_BRANCH_USER |\ + PERF_SAMPLE_BRANCH_KERNEL |\ + PERF_SAMPLE_BRANCH_HV) + + uint64_t *mode = (uint64_t *)opt->value; + const struct branch_mode *br; + char *s, *os = NULL, *p; + int ret = -1; + + if (unset) + return 0; + + /* + * cannot set it twice, -b + --branch-filter for instance + */ + if (*mode) + return -1; + + /* str may be NULL in case no arg is passed to -b */ + if (str) { + /* because str is read-only */ + s = os = strdup(str); + if (!s) + return -1; + + for (;;) { + p = strchr(s, ','); + if (p) + *p = '\0'; + + for (br = branch_modes; br->name; br++) { + if (!strcasecmp(s, br->name)) + break; + } + if (!br->name) { + ui__warning("unknown branch filter %s," + " check man page\n", s); + goto error; + } + + *mode |= br->mode; + + if (!p) + break; + + s = p + 1; + } + } + ret = 0; + + /* default to any branch */ + if ((*mode & ~ONLY_PLM) == 0) { + *mode = PERF_SAMPLE_BRANCH_ANY; + } +error: + free(os); + return ret; +} + static const char * const record_usage[] = { "perf record [<options>] [<command>]", "perf record [<options>] -- <command> [<options>]", @@ -727,6 +814,14 @@ const struct option record_options[] = { "monitor event in cgroup name only", parse_cgroups), OPT_STRING('u', "uid", &record.uid_str, "user", "user to profile"), + + OPT_CALLBACK_NOOPT('b', "branch-any", &record.opts.branch_stack, + "branch any", "sample any taken branches", + parse_branch_stack), + + OPT_CALLBACK('j', "branch-filter", &record.opts.branch_stack, + "branch filter mask", "branch stack filter modes", + parse_branch_stack), OPT_END() }; diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 25d34d483e49..8e91c6eba18a 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -53,6 +53,82 @@ struct perf_report { DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS); }; +static int perf_report__add_branch_hist_entry(struct perf_tool *tool, + struct addr_location *al, + struct perf_sample *sample, + struct perf_evsel *evsel, + struct machine *machine) +{ + struct perf_report *rep = container_of(tool, struct perf_report, tool); + struct symbol *parent = NULL; + int err = 0; + unsigned i; + struct hist_entry *he; + struct branch_info *bi, *bx; + + if ((sort__has_parent || symbol_conf.use_callchain) + && sample->callchain) { + err = machine__resolve_callchain(machine, evsel, al->thread, + sample->callchain, &parent); + if (err) + return err; + } + + bi = machine__resolve_bstack(machine, al->thread, + sample->branch_stack); + if (!bi) + return -ENOMEM; + + for (i = 0; i < sample->branch_stack->nr; i++) { + if (rep->hide_unresolved && !(bi[i].from.sym && bi[i].to.sym)) + continue; + /* + * The report shows the percentage of total branches captured + * and not events sampled. Thus we use a pseudo period of 1. + */ + he = __hists__add_branch_entry(&evsel->hists, al, parent, + &bi[i], 1); + if (he) { + struct annotation *notes; + err = -ENOMEM; + bx = he->branch_info; + if (bx->from.sym && use_browser > 0) { + notes = symbol__annotation(bx->from.sym); + if (!notes->src + && symbol__alloc_hist(bx->from.sym) < 0) + goto out; + + err = symbol__inc_addr_samples(bx->from.sym, + bx->from.map, + evsel->idx, + bx->from.al_addr); + if (err) + goto out; + } + + if (bx->to.sym && use_browser > 0) { + notes = symbol__annotation(bx->to.sym); + if (!notes->src + && symbol__alloc_hist(bx->to.sym) < 0) + goto out; + + err = symbol__inc_addr_samples(bx->to.sym, + bx->to.map, + evsel->idx, + bx->to.al_addr); + if (err) + goto out; + } + evsel->hists.stats.total_period += 1; + hists__inc_nr_events(&evsel->hists, PERF_RECORD_SAMPLE); + err = 0; + } else + return -ENOMEM; + } +out: + return err; +} + static int perf_evsel__add_hist_entry(struct perf_evsel *evsel, struct addr_location *al, struct perf_sample *sample, @@ -126,14 +202,21 @@ static int process_sample_event(struct perf_tool *tool, if (rep->cpu_list && !test_bit(sample->cpu, rep->cpu_bitmap)) return 0; - if (al.map != NULL) - al.map->dso->hit = 1; + if (sort__branch_mode == 1) { + if (perf_report__add_branch_hist_entry(tool, &al, sample, + evsel, machine)) { + pr_debug("problem adding lbr entry, skipping event\n"); + return -1; + } + } else { + if (al.map != NULL) + al.map->dso->hit = 1; - if (perf_evsel__add_hist_entry(evsel, &al, sample, machine)) { - pr_debug("problem incrementing symbol period, skipping event\n"); - return -1; + if (perf_evsel__add_hist_entry(evsel, &al, sample, machine)) { + pr_debug("problem incrementing symbol period, skipping event\n"); + return -1; + } } - return 0; } @@ -188,6 +271,15 @@ static int perf_report__setup_sample_type(struct perf_report *rep) } } + if (sort__branch_mode == 1) { + if (!(self->sample_type & PERF_SAMPLE_BRANCH_STACK)) { + fprintf(stderr, "selected -b but no branch data." + " Did you call perf record without" + " -b?\n"); + return -1; + } + } + return 0; } @@ -246,7 +338,7 @@ static int __cmd_report(struct perf_report *rep) { int ret = -EINVAL; u64 nr_samples; - struct perf_session *session; + struct perf_session *session = rep->session; struct perf_evsel *pos; struct map *kernel_map; struct kmap *kernel_kmap; @@ -254,13 +346,6 @@ static int __cmd_report(struct perf_report *rep) signal(SIGINT, sig_handler); - session = perf_session__new(rep->input_name, O_RDONLY, - rep->force, false, &rep->tool); - if (session == NULL) - return -ENOMEM; - - rep->session = session; - if (rep->cpu_list) { ret = perf_session__cpu_bitmap(session, rep->cpu_list, rep->cpu_bitmap); @@ -427,9 +512,19 @@ setup: return 0; } +static int +parse_branch_mode(const struct option *opt __used, const char *str __used, int unset) +{ + sort__branch_mode = !unset; + return 0; +} + int cmd_report(int argc, const char **argv, const char *prefix __used) { + struct perf_session *session; struct stat st; + bool has_br_stack = false; + int ret = -1; char callchain_default_opt[] = "fractal,0.5,callee"; const char * const report_usage[] = { "perf report [<options>]", @@ -477,7 +572,8 @@ int cmd_report(int argc, const char **argv, const char *prefix __used) OPT_BOOLEAN(0, "stdio", &report.use_stdio, "Use the stdio interface"), OPT_STRING('s', "sort", &sort_order, "key[,key2...]", - "sort by key(s): pid, comm, dso, symbol, parent"), + "sort by key(s): pid, comm, dso, symbol, parent, dso_to," + " dso_from, symbol_to, symbol_from, mispredict"), OPT_BOOLEAN(0, "showcpuutilization", &symbol_conf.show_cpu_utilization, "Show sample percentage for different cpu modes"), OPT_STRING('p', "parent", &parent_pattern, "regex", @@ -517,6 +613,8 @@ int cmd_report(int argc, const char **argv, const char *prefix __used) "Specify disassembler style (e.g. -M intel for intel syntax)"), OPT_BOOLEAN(0, "show-total-period", &symbol_conf.show_total_period, "Show a column with the sum of periods"), + OPT_CALLBACK_NOOPT('b', "branch-stack", &sort__branch_mode, "", + "use branch records for histogram filling", parse_branch_mode), OPT_END() }; @@ -536,11 +634,36 @@ int cmd_report(int argc, const char **argv, const char *prefix __used) else report.input_name = "perf.data"; } + session = perf_session__new(report.input_name, O_RDONLY, + report.force, false, &report.tool); + if (session == NULL) + return -ENOMEM; - if (strcmp(report.input_name, "-") != 0) + report.session = session; + + has_br_stack = perf_header__has_feat(&session->header, + HEADER_BRANCH_STACK); + + if (sort__branch_mode == -1 && has_br_stack) + sort__branch_mode = 1; + + /* sort__branch_mode could be 0 if --no-branch-stack */ + if (sort__branch_mode == 1) { + /* + * if no sort_order is provided, then specify + * branch-mode specific order + */ + if (sort_order == default_sort_order) + sort_order = "comm,dso_from,symbol_from," + "dso_to,symbol_to"; + + } + + if (strcmp(report.input_name, "-") != 0) { setup_browser(true); - else + } else { use_browser = 0; + } /* * Only in the newt browser we are doing integrated annotation, @@ -568,13 +691,13 @@ int cmd_report(int argc, const char **argv, const char *prefix __used) } if (symbol__init() < 0) - return -1; + goto error; setup_sorting(report_usage, options); if (parent_pattern != default_parent_pattern) { if (sort_dimension__add("parent") < 0) - return -1; + goto error; /* * Only show the parent fields if we explicitly @@ -592,9 +715,20 @@ int cmd_report(int argc, const char **argv, const char *prefix __used) if (argc) usage_with_options(report_usage, options); - sort_entry__setup_elide(&sort_dso, symbol_conf.dso_list, "dso", stdout); sort_entry__setup_elide(&sort_comm, symbol_conf.comm_list, "comm", stdout); - sort_entry__setup_elide(&sort_sym, symbol_conf.sym_list, "symbol", stdout); - return __cmd_report(&report); + if (sort__branch_mode == 1) { + sort_entry__setup_elide(&sort_dso_from, symbol_conf.dso_from_list, "dso_from", stdout); + sort_entry__setup_elide(&sort_dso_to, symbol_conf.dso_to_list, "dso_to", stdout); + sort_entry__setup_elide(&sort_sym_from, symbol_conf.sym_from_list, "sym_from", stdout); + sort_entry__setup_elide(&sort_sym_to, symbol_conf.sym_to_list, "sym_to", stdout); + } else { + sort_entry__setup_elide(&sort_dso, symbol_conf.dso_list, "dso", stdout); + sort_entry__setup_elide(&sort_sym, symbol_conf.sym_list, "symbol", stdout); + } + + ret = __cmd_report(&report); +error: + perf_session__delete(session); + return ret; } diff --git a/tools/perf/perf.h b/tools/perf/perf.h index f0227e93665d..eec392e48067 100644 --- a/tools/perf/perf.h +++ b/tools/perf/perf.h @@ -179,6 +179,23 @@ struct ip_callchain { u64 ips[0]; }; +struct branch_flags { + u64 mispred:1; + u64 predicted:1; + u64 reserved:62; +}; + +struct branch_entry { + u64 from; + u64 to; + struct branch_flags flags; +}; + +struct branch_stack { + u64 nr; + struct branch_entry entries[0]; +}; + extern bool perf_host, perf_guest; extern const char perf_version_string[]; @@ -205,6 +222,7 @@ struct perf_record_opts { unsigned int freq; unsigned int mmap_pages; unsigned int user_freq; + int branch_stack; u64 default_interval; u64 user_interval; const char *cpu_list; diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h index cbdeaad9c5e5..1b197280c621 100644 --- a/tools/perf/util/event.h +++ b/tools/perf/util/event.h @@ -81,6 +81,7 @@ struct perf_sample { u32 raw_size; void *raw_data; struct ip_callchain *callchain; + struct branch_stack *branch_stack; }; #define BUILD_ID_SIZE 20 diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 302d49a9f985..f421f7cbc0d3 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -126,6 +126,10 @@ void perf_evsel__config(struct perf_evsel *evsel, struct perf_record_opts *opts) attr->watermark = 0; attr->wakeup_events = 1; } + if (opts->branch_stack) { + attr->sample_type |= PERF_SAMPLE_BRANCH_STACK; + attr->branch_sample_type = opts->branch_stack; + } attr->mmap = track; attr->comm = track; @@ -576,6 +580,16 @@ int perf_event__parse_sample(const union perf_event *event, u64 type, data->raw_data = (void *) pdata; } + if (type & PERF_SAMPLE_BRANCH_STACK) { + u64 sz; + + data->branch_stack = (struct branch_stack *)array; + array++; /* nr */ + + sz = data->branch_stack->nr * sizeof(struct branch_entry); + sz /= sizeof(u64); + array += sz; + } return 0; } diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index 9f867d96c6a5..0d9b6da86a39 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -1023,6 +1023,12 @@ write_it: return do_write_string(fd, buffer); } +static int write_branch_stack(int fd __used, struct perf_header *h __used, + struct perf_evlist *evlist __used) +{ + return 0; +} + static void print_hostname(struct perf_header *ph, int fd, FILE *fp) { char *str = do_read_string(fd, ph); @@ -1144,8 +1150,9 @@ static void print_event_desc(struct perf_header *ph, int fd, FILE *fp) uint64_t id; void *buf = NULL; char *str; - u32 nre, sz, nr, i, j, msz; - int ret; + u32 nre, sz, nr, i, j; + ssize_t ret; + size_t msz; /* number of events */ ret = read(fd, &nre, sizeof(nre)); @@ -1162,25 +1169,23 @@ static void print_event_desc(struct perf_header *ph, int fd, FILE *fp) if (ph->needs_swap) sz = bswap_32(sz); - /* - * ensure it is at least to our ABI rev - */ - if (sz < (u32)sizeof(attr)) - goto error; - memset(&attr, 0, sizeof(attr)); - /* read entire region to sync up to next field */ + /* buffer to hold on file attr struct */ buf = malloc(sz); if (!buf) goto error; msz = sizeof(attr); - if (sz < msz) + if (sz < (ssize_t)msz) msz = sz; for (i = 0 ; i < nre; i++) { + /* + * must read entire on-file attr struct to + * sync up with layout. + */ ret = read(fd, buf, sz); if (ret != (ssize_t)sz) goto error; @@ -1316,6 +1321,12 @@ static void print_cpuid(struct perf_header *ph, int fd, FILE *fp) free(str); } +static void print_branch_stack(struct perf_header *ph __used, int fd __used, + FILE *fp) +{ + fprintf(fp, "# contains samples with branch stack\n"); +} + static int __event_process_build_id(struct build_id_event *bev, char *filename, struct perf_session *session) @@ -1520,6 +1531,7 @@ static const struct feature_ops feat_ops[HEADER_LAST_FEATURE] = { FEAT_OPA(HEADER_CMDLINE, cmdline), FEAT_OPF(HEADER_CPU_TOPOLOGY, cpu_topology), FEAT_OPF(HEADER_NUMA_TOPOLOGY, numa_topology), + FEAT_OPA(HEADER_BRANCH_STACK, branch_stack), }; struct header_print_data { @@ -1804,35 +1816,101 @@ out_free: return err; } -static int check_magic_endian(u64 *magic, struct perf_file_header *header, - struct perf_header *ph) +static const int attr_file_abi_sizes[] = { + [0] = PERF_ATTR_SIZE_VER0, + [1] = PERF_ATTR_SIZE_VER1, + 0, +}; + +/* + * In the legacy file format, the magic number is not used to encode endianness. + * hdr_sz was used to encode endianness. But given that hdr_sz can vary based + * on ABI revisions, we need to try all combinations for all endianness to + * detect the endianness. + */ +static int try_all_file_abis(uint64_t hdr_sz, struct perf_header *ph) { - int ret; + uint64_t ref_size, attr_size; + int i; - /* check for legacy format */ - ret = memcmp(magic, __perf_magic1, sizeof(*magic)); - if (ret == 0) { - pr_debug("legacy perf.data format\n"); - if (!header) - return -1; + for (i = 0 ; attr_file_abi_sizes[i]; i++) { + ref_size = attr_file_abi_sizes[i] + + sizeof(struct perf_file_section); + if (hdr_sz != ref_size) { + attr_size = bswap_64(hdr_sz); + if (attr_size != ref_size) + continue; - if (header->attr_size != sizeof(struct perf_file_attr)) { - u64 attr_size = bswap_64(header->attr_size); + ph->needs_swap = true; + } + pr_debug("ABI%d perf.data file detected, need_swap=%d\n", + i, + ph->needs_swap); + return 0; + } + /* could not determine endianness */ + return -1; +} - if (attr_size != sizeof(struct perf_file_attr)) - return -1; +#define PERF_PIPE_HDR_VER0 16 + +static const size_t attr_pipe_abi_sizes[] = { + [0] = PERF_PIPE_HDR_VER0, + 0, +}; + +/* + * In the legacy pipe format, there is an implicit assumption that endiannesss + * between host recording the samples, and host parsing the samples is the + * same. This is not always the case given that the pipe output may always be + * redirected into a file and analyzed on a different machine with possibly a + * different endianness and perf_event ABI revsions in the perf tool itself. + */ +static int try_all_pipe_abis(uint64_t hdr_sz, struct perf_header *ph) +{ + u64 attr_size; + int i; + + for (i = 0 ; attr_pipe_abi_sizes[i]; i++) { + if (hdr_sz != attr_pipe_abi_sizes[i]) { + attr_size = bswap_64(hdr_sz); + if (attr_size != hdr_sz) + continue; ph->needs_swap = true; } + pr_debug("Pipe ABI%d perf.data file detected\n", i); return 0; } + return -1; +} + +static int check_magic_endian(u64 magic, uint64_t hdr_sz, + bool is_pipe, struct perf_header *ph) +{ + int ret; + + /* check for legacy format */ + ret = memcmp(&magic, __perf_magic1, sizeof(magic)); + if (ret == 0) { + pr_debug("legacy perf.data format\n"); + if (is_pipe) + return try_all_pipe_abis(hdr_sz, ph); + + return try_all_file_abis(hdr_sz, ph); + } + /* + * the new magic number serves two purposes: + * - unique number to identify actual perf.data files + * - encode endianness of file + */ - /* check magic number with same endianness */ - if (*magic == __perf_magic2) + /* check magic number with one endianness */ + if (magic == __perf_magic2) return 0; - /* check magic number but opposite endianness */ - if (*magic != __perf_magic2_sw) + /* check magic number with opposite endianness */ + if (magic != __perf_magic2_sw) return -1; ph->needs_swap = true; @@ -1851,8 +1929,11 @@ int perf_file_header__read(struct perf_file_header *header, if (ret <= 0) return -1; - if (check_magic_endian(&header->magic, header, ph) < 0) + if (check_magic_endian(header->magic, + header->attr_size, false, ph) < 0) { + pr_debug("magic/endian check failed\n"); return -1; + } if (ph->needs_swap) { mem_bswap_64(header, offsetof(struct perf_file_header, @@ -1939,21 +2020,17 @@ static int perf_file_header__read_pipe(struct perf_pipe_file_header *header, if (ret <= 0) return -1; - if (check_magic_endian(&header->magic, NULL, ph) < 0) + if (check_magic_endian(header->magic, header->size, true, ph) < 0) { + pr_debug("endian/magic failed\n"); return -1; + } + + if (ph->needs_swap) + header->size = bswap_64(header->size); if (repipe && do_write(STDOUT_FILENO, header, sizeof(*header)) < 0) return -1; - if (header->size != sizeof(*header)) { - u64 size = bswap_64(header->size); - - if (size != sizeof(*header)) - return -1; - - ph->needs_swap = true; - } - return 0; } @@ -1973,6 +2050,52 @@ static int perf_header__read_pipe(struct perf_session *session, int fd) return 0; } +static int read_attr(int fd, struct perf_header *ph, + struct perf_file_attr *f_attr) +{ + struct perf_event_attr *attr = &f_attr->attr; + size_t sz, left; + size_t our_sz = sizeof(f_attr->attr); + int ret; + + memset(f_attr, 0, sizeof(*f_attr)); + + /* read minimal guaranteed structure */ + ret = readn(fd, attr, PERF_ATTR_SIZE_VER0); + if (ret <= 0) { + pr_debug("cannot read %d bytes of header attr\n", + PERF_ATTR_SIZE_VER0); + return -1; + } + + /* on file perf_event_attr size */ + sz = attr->size; + + if (ph->needs_swap) + sz = bswap_32(sz); + + if (sz == 0) { + /* assume ABI0 */ + sz = PERF_ATTR_SIZE_VER0; + } else if (sz > our_sz) { + pr_debug("file uses a more recent and unsupported ABI" + " (%zu bytes extra)\n", sz - our_sz); + return -1; + } + /* what we have not yet read and that we know about */ + left = sz - PERF_ATTR_SIZE_VER0; + if (left) { + void *ptr = attr; + ptr += PERF_ATTR_SIZE_VER0; + + ret = readn(fd, ptr, left); + } + /* read perf_file_section, ids are read in caller */ + ret = readn(fd, &f_attr->ids, sizeof(f_attr->ids)); + + return ret <= 0 ? -1 : 0; +} + int perf_session__read_header(struct perf_session *session, int fd) { struct perf_header *header = &session->header; @@ -1988,19 +2111,17 @@ int perf_session__read_header(struct perf_session *session, int fd) if (session->fd_pipe) return perf_header__read_pipe(session, fd); - if (perf_file_header__read(&f_header, header, fd) < 0) { - pr_debug("incompatible file format\n"); + if (perf_file_header__read(&f_header, header, fd) < 0) return -EINVAL; - } - nr_attrs = f_header.attrs.size / sizeof(f_attr); + nr_attrs = f_header.attrs.size / f_header.attr_size; lseek(fd, f_header.attrs.offset, SEEK_SET); for (i = 0; i < nr_attrs; i++) { struct perf_evsel *evsel; off_t tmp; - if (readn(fd, &f_attr, sizeof(f_attr)) <= 0) + if (read_attr(fd, header, &f_attr) < 0) goto out_errno; if (header->needs_swap) diff --git a/tools/perf/util/header.h b/tools/perf/util/header.h index e68f617d082f..21a6be09c129 100644 --- a/tools/perf/util/header.h +++ b/tools/perf/util/header.h @@ -27,7 +27,7 @@ enum { HEADER_EVENT_DESC, HEADER_CPU_TOPOLOGY, HEADER_NUMA_TOPOLOGY, - + HEADER_BRANCH_STACK, HEADER_LAST_FEATURE, HEADER_FEAT_BITS = 256, }; diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index 6f505d1abac7..8380c3db1c92 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c @@ -50,21 +50,25 @@ static void hists__reset_col_len(struct hists *hists) hists__set_col_len(hists, col, 0); } +static void hists__set_unres_dso_col_len(struct hists *hists, int dso) +{ + const unsigned int unresolved_col_width = BITS_PER_LONG / 4; + + if (hists__col_len(hists, dso) < unresolved_col_width && + !symbol_conf.col_width_list_str && !symbol_conf.field_sep && + !symbol_conf.dso_list) + hists__set_col_len(hists, dso, unresolved_col_width); +} + static void hists__calc_col_len(struct hists *hists, struct hist_entry *h) { + const unsigned int unresolved_col_width = BITS_PER_LONG / 4; u16 len; if (h->ms.sym) - hists__new_col_len(hists, HISTC_SYMBOL, h->ms.sym->namelen); - else { - const unsigned int unresolved_col_width = BITS_PER_LONG / 4; - - if (hists__col_len(hists, HISTC_DSO) < unresolved_col_width && - !symbol_conf.col_width_list_str && !symbol_conf.field_sep && - !symbol_conf.dso_list) - hists__set_col_len(hists, HISTC_DSO, - unresolved_col_width); - } + hists__new_col_len(hists, HISTC_SYMBOL, h->ms.sym->namelen + 4); + else + hists__set_unres_dso_col_len(hists, HISTC_DSO); len = thread__comm_len(h->thread); if (hists__new_col_len(hists, HISTC_COMM, len)) @@ -74,6 +78,37 @@ static void hists__calc_col_len(struct hists *hists, struct hist_entry *h) len = dso__name_len(h->ms.map->dso); hists__new_col_len(hists, HISTC_DSO, len); } + + if (h->branch_info) { + int symlen; + /* + * +4 accounts for '[x] ' priv level info + * +2 account of 0x prefix on raw addresses + */ + if (h->branch_info->from.sym) { + symlen = (int)h->branch_info->from.sym->namelen + 4; + hists__new_col_len(hists, HISTC_SYMBOL_FROM, symlen); + + symlen = dso__name_len(h->branch_info->from.map->dso); + hists__new_col_len(hists, HISTC_DSO_FROM, symlen); + } else { + symlen = unresolved_col_width + 4 + 2; + hists__new_col_len(hists, HISTC_SYMBOL_FROM, symlen); + hists__set_unres_dso_col_len(hists, HISTC_DSO_FROM); + } + + if (h->branch_info->to.sym) { + symlen = (int)h->branch_info->to.sym->namelen + 4; + hists__new_col_len(hists, HISTC_SYMBOL_TO, symlen); + + symlen = dso__name_len(h->branch_info->to.map->dso); + hists__new_col_len(hists, HISTC_DSO_TO, symlen); + } else { + symlen = unresolved_col_width + 4 + 2; + hists__new_col_len(hists, HISTC_SYMBOL_TO, symlen); + hists__set_unres_dso_col_len(hists, HISTC_DSO_TO); + } + } } static void hist_entry__add_cpumode_period(struct hist_entry *he, @@ -195,26 +230,14 @@ static u8 symbol__parent_filter(const struct symbol *parent) return 0; } -struct hist_entry *__hists__add_entry(struct hists *hists, +static struct hist_entry *add_hist_entry(struct hists *hists, + struct hist_entry *entry, struct addr_location *al, - struct symbol *sym_parent, u64 period) + u64 period) { struct rb_node **p; struct rb_node *parent = NULL; struct hist_entry *he; - struct hist_entry entry = { - .thread = al->thread, - .ms = { - .map = al->map, - .sym = al->sym, - }, - .cpu = al->cpu, - .ip = al->addr, - .level = al->level, - .period = period, - .parent = sym_parent, - .filtered = symbol__parent_filter(sym_parent), - }; int cmp; pthread_mutex_lock(&hists->lock); @@ -225,7 +248,7 @@ struct hist_entry *__hists__add_entry(struct hists *hists, parent = *p; he = rb_entry(parent, struct hist_entry, rb_node_in); - cmp = hist_entry__cmp(&entry, he); + cmp = hist_entry__cmp(entry, he); if (!cmp) { he->period += period; @@ -239,7 +262,7 @@ struct hist_entry *__hists__add_entry(struct hists *hists, p = &(*p)->rb_right; } - he = hist_entry__new(&entry); + he = hist_entry__new(entry); if (!he) goto out_unlock; @@ -252,6 +275,51 @@ out_unlock: return he; } +struct hist_entry *__hists__add_branch_entry(struct hists *self, + struct addr_location *al, + struct symbol *sym_parent, + struct branch_info *bi, + u64 period) +{ + struct hist_entry entry = { + .thread = al->thread, + .ms = { + .map = bi->to.map, + .sym = bi->to.sym, + }, + .cpu = al->cpu, + .ip = bi->to.addr, + .level = al->level, + .period = period, + .parent = sym_parent, + .filtered = symbol__parent_filter(sym_parent), + .branch_info = bi, + }; + + return add_hist_entry(self, &entry, al, period); +} + +struct hist_entry *__hists__add_entry(struct hists *self, + struct addr_location *al, + struct symbol *sym_parent, u64 period) +{ + struct hist_entry entry = { + .thread = al->thread, + .ms = { + .map = al->map, + .sym = al->sym, + }, + .cpu = al->cpu, + .ip = al->addr, + .level = al->level, + .period = period, + .parent = sym_parent, + .filtered = symbol__parent_filter(sym_parent), + }; + + return add_hist_entry(self, &entry, al, period); +} + int64_t hist_entry__cmp(struct hist_entry *left, struct hist_entry *right) { diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h index 48e5acd1e862..9413f3e31fea 100644 --- a/tools/perf/util/hist.h +++ b/tools/perf/util/hist.h @@ -42,6 +42,11 @@ enum hist_column { HISTC_COMM, HISTC_PARENT, HISTC_CPU, + HISTC_MISPREDICT, + HISTC_SYMBOL_FROM, + HISTC_SYMBOL_TO, + HISTC_DSO_FROM, + HISTC_DSO_TO, HISTC_NR_COLS, /* Last entry */ }; @@ -74,6 +79,12 @@ int hist_entry__snprintf(struct hist_entry *self, char *bf, size_t size, struct hists *hists); void hist_entry__free(struct hist_entry *); +struct hist_entry *__hists__add_branch_entry(struct hists *self, + struct addr_location *al, + struct symbol *sym_parent, + struct branch_info *bi, + u64 period); + void hists__output_resort(struct hists *self); void hists__output_resort_threaded(struct hists *hists); void hists__collapse_resort(struct hists *self); diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index 9f833cf9c6a9..002ebbf59f48 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -24,7 +24,7 @@ static int perf_session__open(struct perf_session *self, bool force) self->fd = STDIN_FILENO; if (perf_session__read_header(self, self->fd) < 0) - pr_err("incompatible file format"); + pr_err("incompatible file format (rerun with -v to learn more)"); return 0; } @@ -56,7 +56,7 @@ static int perf_session__open(struct perf_session *self, bool force) } if (perf_session__read_header(self, self->fd) < 0) { - pr_err("incompatible file format"); + pr_err("incompatible file format (rerun with -v to learn more)"); goto out_close; } @@ -229,6 +229,64 @@ static bool symbol__match_parent_regex(struct symbol *sym) return 0; } +static const u8 cpumodes[] = { + PERF_RECORD_MISC_USER, + PERF_RECORD_MISC_KERNEL, + PERF_RECORD_MISC_GUEST_USER, + PERF_RECORD_MISC_GUEST_KERNEL +}; +#define NCPUMODES (sizeof(cpumodes)/sizeof(u8)) + +static void ip__resolve_ams(struct machine *self, struct thread *thread, + struct addr_map_symbol *ams, + u64 ip) +{ + struct addr_location al; + size_t i; + u8 m; + + memset(&al, 0, sizeof(al)); + + for (i = 0; i < NCPUMODES; i++) { + m = cpumodes[i]; + /* + * We cannot use the header.misc hint to determine whether a + * branch stack address is user, kernel, guest, hypervisor. + * Branches may straddle the kernel/user/hypervisor boundaries. + * Thus, we have to try consecutively until we find a match + * or else, the symbol is unknown + */ + thread__find_addr_location(thread, self, m, MAP__FUNCTION, + ip, &al, NULL); + if (al.sym) + goto found; + } +found: + ams->addr = ip; + ams->al_addr = al.addr; + ams->sym = al.sym; + ams->map = al.map; +} + +struct branch_info *machine__resolve_bstack(struct machine *self, + struct thread *thr, + struct branch_stack *bs) +{ + struct branch_info *bi; + unsigned int i; + + bi = calloc(bs->nr, sizeof(struct branch_info)); + if (!bi) + return NULL; + + for (i = 0; i < bs->nr; i++) { + ip__resolve_ams(self, thr, &bi[i].to, bs->entries[i].to); + ip__resolve_ams(self, thr, &bi[i].from, bs->entries[i].from); + bi[i].flags = bs->entries[i].flags; + } + return bi; +} + int machine__resolve_callchain(struct machine *self, struct perf_evsel *evsel, struct thread *thread, struct ip_callchain *chain, @@ -697,6 +755,18 @@ static void callchain__printf(struct perf_sample *sample) i, sample->callchain->ips[i]); } +static void branch_stack__printf(struct perf_sample *sample) +{ + uint64_t i; + + printf("... branch stack: nr:%" PRIu64 "\n", sample->branch_stack->nr); + + for (i = 0; i < sample->branch_stack->nr; i++) + printf("..... %2"PRIu64": %016" PRIx64 " -> %016" PRIx64 "\n", + i, sample->branch_stack->entries[i].from, + sample->branch_stack->entries[i].to); +} + static void perf_session__print_tstamp(struct perf_session *session, union perf_event *event, struct perf_sample *sample) @@ -744,6 +814,9 @@ static void dump_sample(struct perf_session *session, union perf_event *event, if (session->sample_type & PERF_SAMPLE_CALLCHAIN) callchain__printf(sample); + + if (session->sample_type & PERF_SAMPLE_BRANCH_STACK) + branch_stack__printf(sample); } static struct machine * diff --git a/tools/perf/util/session.h b/tools/perf/util/session.h index c8d90178e7de..7a5434c00565 100644 --- a/tools/perf/util/session.h +++ b/tools/perf/util/session.h @@ -73,6 +73,10 @@ int perf_session__resolve_callchain(struct perf_session *self, struct perf_evsel struct ip_callchain *chain, struct symbol **parent); +struct branch_info *machine__resolve_bstack(struct machine *self, + struct thread *thread, + struct branch_stack *bs); + bool perf_session__has_traces(struct perf_session *self, const char *msg); void mem_bswap_64(void *src, int byte_size); diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c index 16da30d8d765..88dbcf6f9575 100644 --- a/tools/perf/util/sort.c +++ b/tools/perf/util/sort.c @@ -8,6 +8,7 @@ const char default_sort_order[] = "comm,dso,symbol"; const char *sort_order = default_sort_order; int sort__need_collapse = 0; int sort__has_parent = 0; +int sort__branch_mode = -1; /* -1 = means not set */ enum sort_type sort__first_dimension; @@ -94,6 +95,26 @@ static int hist_entry__comm_snprintf(struct hist_entry *self, char *bf, return repsep_snprintf(bf, size, "%*s", width, self->thread->comm); } +static int64_t _sort__dso_cmp(struct map *map_l, struct map *map_r) +{ + struct dso *dso_l = map_l ? map_l->dso : NULL; + struct dso *dso_r = map_r ? map_r->dso : NULL; + const char *dso_name_l, *dso_name_r; + + if (!dso_l || !dso_r) + return cmp_null(dso_l, dso_r); + + if (verbose) { + dso_name_l = dso_l->long_name; + dso_name_r = dso_r->long_name; + } else { + dso_name_l = dso_l->short_name; + dso_name_r = dso_r->short_name; + } + + return strcmp(dso_name_l, dso_name_r); +} + struct sort_entry sort_comm = { .se_header = "Command", .se_cmp = sort__comm_cmp, @@ -107,36 +128,74 @@ struct sort_entry sort_comm = { static int64_t sort__dso_cmp(struct hist_entry *left, struct hist_entry *right) { - struct dso *dso_l = left->ms.map ? left->ms.map->dso : NULL; - struct dso *dso_r = right->ms.map ? right->ms.map->dso : NULL; - const char *dso_name_l, *dso_name_r; + return _sort__dso_cmp(left->ms.map, right->ms.map); +} - if (!dso_l || !dso_r) - return cmp_null(dso_l, dso_r); - if (verbose) { - dso_name_l = dso_l->long_name; - dso_name_r = dso_r->long_name; - } else { - dso_name_l = dso_l->short_name; - dso_name_r = dso_r->short_name; +static int64_t _sort__sym_cmp(struct symbol *sym_l, struct symbol *sym_r, + u64 ip_l, u64 ip_r) +{ + if (!sym_l || !sym_r) + return cmp_null(sym_l, sym_r); + + if (sym_l == sym_r) + return 0; + + if (sym_l) + ip_l = sym_l->start; + if (sym_r) + ip_r = sym_r->start; + + return (int64_t)(ip_r - ip_l); +} + +static int _hist_entry__dso_snprintf(struct map *map, char *bf, + size_t size, unsigned int width) +{ + if (map && map->dso) { + const char *dso_name = !verbose ? map->dso->short_name : + map->dso->long_name; + return repsep_snprintf(bf, size, "%-*s", width, dso_name); } - return strcmp(dso_name_l, dso_name_r); + return repsep_snprintf(bf, size, "%-*s", width, "[unknown]"); } static int hist_entry__dso_snprintf(struct hist_entry *self, char *bf, size_t size, unsigned int width) { - if (self->ms.map && self->ms.map->dso) { - const char *dso_name = !verbose ? self->ms.map->dso->short_name : - self->ms.map->dso->long_name; - return repsep_snprintf(bf, size, "%-*s", width, dso_name); + return _hist_entry__dso_snprintf(self->ms.map, bf, size, width); +} + +static int _hist_entry__sym_snprintf(struct map *map, struct symbol *sym, + u64 ip, char level, char *bf, size_t size, + unsigned int width __used) +{ + size_t ret = 0; + + if (verbose) { + char o = map ? dso__symtab_origin(map->dso) : '!'; + ret += repsep_snprintf(bf, size, "%-#*llx %c ", + BITS_PER_LONG / 4, ip, o); } - return repsep_snprintf(bf, size, "%-*s", width, "[unknown]"); + ret += repsep_snprintf(bf + ret, size - ret, "[%c] ", level); + if (sym) + ret += repsep_snprintf(bf + ret, size - ret, "%-*s", + width - ret, + sym->name); + else { + size_t len = BITS_PER_LONG / 4; + ret += repsep_snprintf(bf + ret, size - ret, "%-#.*llx", + len, ip); + ret += repsep_snprintf(bf + ret, size - ret, "%-*s", + width - ret, ""); + } + + return ret; } + struct sort_entry sort_dso = { .se_header = "Shared Object", .se_cmp = sort__dso_cmp, @@ -144,8 +203,14 @@ struct sort_entry sort_dso = { .se_width_idx = HISTC_DSO, }; -/* --sort symbol */ +static int hist_entry__sym_snprintf(struct hist_entry *self, char *bf, + size_t size, unsigned int width __used) +{ + return _hist_entry__sym_snprintf(self->ms.map, self->ms.sym, self->ip, + self->level, bf, size, width); +} +/* --sort symbol */ static int64_t sort__sym_cmp(struct hist_entry *left, struct hist_entry *right) { @@ -163,31 +228,7 @@ sort__sym_cmp(struct hist_entry *left, struct hist_entry *right) ip_l = left->ms.sym->start; ip_r = right->ms.sym->start; - return (int64_t)(ip_r - ip_l); -} - -static int hist_entry__sym_snprintf(struct hist_entry *self, char *bf, - size_t size, unsigned int width __used) -{ - size_t ret = 0; - - if (verbose) { - char o = self->ms.map ? dso__symtab_origin(self->ms.map->dso) : '!'; - ret += repsep_snprintf(bf, size, "%-#*llx %c ", - BITS_PER_LONG / 4, self->ip, o); - } - - if (!sort_dso.elide) - ret += repsep_snprintf(bf + ret, size - ret, "[%c] ", self->level); - - if (self->ms.sym) - ret += repsep_snprintf(bf + ret, size - ret, "%s", - self->ms.sym->name); - else - ret += repsep_snprintf(bf + ret, size - ret, "%-#*llx", - BITS_PER_LONG / 4, self->ip); - - return ret; + return _sort__sym_cmp(left->ms.sym, right->ms.sym, ip_l, ip_r); } struct sort_entry sort_sym = { @@ -246,19 +287,155 @@ struct sort_entry sort_cpu = { .se_width_idx = HISTC_CPU, }; +static int64_t +sort__dso_from_cmp(struct hist_entry *left, struct hist_entry *right) +{ + return _sort__dso_cmp(left->branch_info->from.map, + right->branch_info->from.map); +} + +static int hist_entry__dso_from_snprintf(struct hist_entry *self, char *bf, + size_t size, unsigned int width) +{ + return _hist_entry__dso_snprintf(self->branch_info->from.map, + bf, size, width); +} + +struct sort_entry sort_dso_from = { + .se_header = "Source Shared Object", + .se_cmp = sort__dso_from_cmp, + .se_snprintf = hist_entry__dso_from_snprintf, + .se_width_idx = HISTC_DSO_FROM, +}; + +static int64_t +sort__dso_to_cmp(struct hist_entry *left, struct hist_entry *right) +{ + return _sort__dso_cmp(left->branch_info->to.map, + right->branch_info->to.map); +} + +static int hist_entry__dso_to_snprintf(struct hist_entry *self, char *bf, + size_t size, unsigned int width) +{ + return _hist_entry__dso_snprintf(self->branch_info->to.map, + bf, size, width); +} + +static int64_t +sort__sym_from_cmp(struct hist_entry *left, struct hist_entry *right) +{ + struct addr_map_symbol *from_l = &left->branch_info->from; + struct addr_map_symbol *from_r = &right->branch_info->from; + + if (!from_l->sym && !from_r->sym) + return right->level - left->level; + + return _sort__sym_cmp(from_l->sym, from_r->sym, from_l->addr, + from_r->addr); +} + +static int64_t +sort__sym_to_cmp(struct hist_entry *left, struct hist_entry *right) +{ + struct addr_map_symbol *to_l = &left->branch_info->to; + struct addr_map_symbol *to_r = &right->branch_info->to; + + if (!to_l->sym && !to_r->sym) + return right->level - left->level; + + return _sort__sym_cmp(to_l->sym, to_r->sym, to_l->addr, to_r->addr); +} + +static int hist_entry__sym_from_snprintf(struct hist_entry *self, char *bf, + size_t size, unsigned int width __used) +{ + struct addr_map_symbol *from = &self->branch_info->from; + return _hist_entry__sym_snprintf(from->map, from->sym, from->addr, + self->level, bf, size, width); + +} + +static int hist_entry__sym_to_snprintf(struct hist_entry *self, char *bf, + size_t size, unsigned int width __used) +{ + struct addr_map_symbol *to = &self->branch_info->to; + return _hist_entry__sym_snprintf(to->map, to->sym, to->addr, + self->level, bf, size, width); + +} + +struct sort_entry sort_dso_to = { + .se_header = "Target Shared Object", + .se_cmp = sort__dso_to_cmp, + .se_snprintf = hist_entry__dso_to_snprintf, + .se_width_idx = HISTC_DSO_TO, +}; + +struct sort_entry sort_sym_from = { + .se_header = "Source Symbol", + .se_cmp = sort__sym_from_cmp, + .se_snprintf = hist_entry__sym_from_snprintf, + .se_width_idx = HISTC_SYMBOL_FROM, +}; + +struct sort_entry sort_sym_to = { + .se_header = "Target Symbol", + .se_cmp = sort__sym_to_cmp, + .se_snprintf = hist_entry__sym_to_snprintf, + .se_width_idx = HISTC_SYMBOL_TO, +}; + +static int64_t +sort__mispredict_cmp(struct hist_entry *left, struct hist_entry *right) +{ + const unsigned char mp = left->branch_info->flags.mispred != + right->branch_info->flags.mispred; + const unsigned char p = left->branch_info->flags.predicted != + right->branch_info->flags.predicted; + + return mp || p; +} + +static int hist_entry__mispredict_snprintf(struct hist_entry *self, char *bf, + size_t size, unsigned int width){ + static const char *out = "N/A"; + + if (self->branch_info->flags.predicted) + out = "N"; + else if (self->branch_info->flags.mispred) + out = "Y"; + + return repsep_snprintf(bf, size, "%-*s", width, out); +} + +struct sort_entry sort_mispredict = { + .se_header = "Branch Mispredicted", + .se_cmp = sort__mispredict_cmp, + .se_snprintf = hist_entry__mispredict_snprintf, + .se_width_idx = HISTC_MISPREDICT, +}; + struct sort_dimension { const char *name; struct sort_entry *entry; int taken; }; +#define DIM(d, n, func) [d] = { .name = n, .entry = &(func) } + static struct sort_dimension sort_dimensions[] = { - { .name = "pid", .entry = &sort_thread, }, - { .name = "comm", .entry = &sort_comm, }, - { .name = "dso", .entry = &sort_dso, }, - { .name = "symbol", .entry = &sort_sym, }, - { .name = "parent", .entry = &sort_parent, }, - { .name = "cpu", .entry = &sort_cpu, }, + DIM(SORT_PID, "pid", sort_thread), + DIM(SORT_COMM, "comm", sort_comm), + DIM(SORT_DSO, "dso", sort_dso), + DIM(SORT_DSO_FROM, "dso_from", sort_dso_from), + DIM(SORT_DSO_TO, "dso_to", sort_dso_to), + DIM(SORT_SYM, "symbol", sort_sym), + DIM(SORT_SYM_TO, "symbol_from", sort_sym_from), + DIM(SORT_SYM_FROM, "symbol_to", sort_sym_to), + DIM(SORT_PARENT, "parent", sort_parent), + DIM(SORT_CPU, "cpu", sort_cpu), + DIM(SORT_MISPREDICT, "mispredict", sort_mispredict), }; int sort_dimension__add(const char *tok) @@ -270,7 +447,6 @@ int sort_dimension__add(const char *tok) if (strncasecmp(tok, sd->name, strlen(tok))) continue; - if (sd->entry == &sort_parent) { int ret = regcomp(&parent_regex, parent_pattern, REG_EXTENDED); if (ret) { @@ -302,6 +478,16 @@ int sort_dimension__add(const char *tok) sort__first_dimension = SORT_PARENT; else if (!strcmp(sd->name, "cpu")) sort__first_dimension = SORT_CPU; + else if (!strcmp(sd->name, "symbol_from")) + sort__first_dimension = SORT_SYM_FROM; + else if (!strcmp(sd->name, "symbol_to")) + sort__first_dimension = SORT_SYM_TO; + else if (!strcmp(sd->name, "dso_from")) + sort__first_dimension = SORT_DSO_FROM; + else if (!strcmp(sd->name, "dso_to")) + sort__first_dimension = SORT_DSO_TO; + else if (!strcmp(sd->name, "mispredict")) + sort__first_dimension = SORT_MISPREDICT; } list_add_tail(&sd->entry->list, &hist_entry__sort_list); @@ -309,7 +495,6 @@ int sort_dimension__add(const char *tok) return 0; } - return -ESRCH; } diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h index 3f67ae395752..472aa5a63a58 100644 --- a/tools/perf/util/sort.h +++ b/tools/perf/util/sort.h @@ -31,11 +31,16 @@ extern const char *parent_pattern; extern const char default_sort_order[]; extern int sort__need_collapse; extern int sort__has_parent; +extern int sort__branch_mode; extern char *field_sep; extern struct sort_entry sort_comm; extern struct sort_entry sort_dso; extern struct sort_entry sort_sym; extern struct sort_entry sort_parent; +extern struct sort_entry sort_dso_from; +extern struct sort_entry sort_dso_to; +extern struct sort_entry sort_sym_from; +extern struct sort_entry sort_sym_to; extern enum sort_type sort__first_dimension; /** @@ -72,6 +77,7 @@ struct hist_entry { struct hist_entry *pair; struct rb_root sorted_chain; }; + struct branch_info *branch_info; struct callchain_root callchain[0]; }; @@ -82,6 +88,11 @@ enum sort_type { SORT_SYM, SORT_PARENT, SORT_CPU, + SORT_DSO_FROM, + SORT_DSO_TO, + SORT_SYM_FROM, + SORT_SYM_TO, + SORT_MISPREDICT, }; /* diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h index 2a683d4fc918..ac49ef208a5f 100644 --- a/tools/perf/util/symbol.h +++ b/tools/perf/util/symbol.h @@ -5,6 +5,7 @@ #include <stdbool.h> #include <stdint.h> #include "map.h" +#include "../perf.h" #include <linux/list.h> #include <linux/rbtree.h> #include <stdio.h> @@ -96,7 +97,11 @@ struct symbol_conf { *col_width_list_str; struct strlist *dso_list, *comm_list, - *sym_list; + *sym_list, + *dso_from_list, + *dso_to_list, + *sym_from_list, + *sym_to_list; const char *symfs; }; @@ -120,6 +125,19 @@ struct map_symbol { bool has_children; }; +struct addr_map_symbol { + struct map *map; + struct symbol *sym; + u64 addr; + u64 al_addr; +}; + +struct branch_info { + struct addr_map_symbol from; + struct addr_map_symbol to; + struct branch_flags flags; +}; + struct addr_location { struct thread *thread; struct map *map; diff --git a/tools/perf/util/ui/browsers/hists.c b/tools/perf/util/ui/browsers/hists.c index bfba0490c098..de8ece8bcce3 100644 --- a/tools/perf/util/ui/browsers/hists.c +++ b/tools/perf/util/ui/browsers/hists.c @@ -805,8 +805,11 @@ static struct hist_browser *hist_browser__new(struct hists *hists) self->hists = hists; self->b.refresh = hist_browser__refresh; self->b.seek = ui_browser__hists_seek; - self->b.use_navkeypressed = true, - self->has_symbols = sort_sym.list.next != NULL; + self->b.use_navkeypressed = true; + if (sort__branch_mode == 1) + self->has_symbols = sort_sym_from.list.next != NULL; + else + self->has_symbols = sort_sym.list.next != NULL; } return self; @@ -853,6 +856,16 @@ static int hists__browser_title(struct hists *self, char *bf, size_t size, return printed; } +static inline void free_popup_options(char **options, int n) +{ + int i; + + for (i = 0; i < n; ++i) { + free(options[i]); + options[i] = NULL; + } +} + static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events, const char *helpline, const char *ev_name, bool left_exits, @@ -861,7 +874,10 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events, { struct hists *self = &evsel->hists; struct hist_browser *browser = hist_browser__new(self); + struct branch_info *bi; struct pstack *fstack; + char *options[16]; + int nr_options = 0; int key = -1; if (browser == NULL) @@ -873,13 +889,16 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events, ui_helpline__push(helpline); + memset(options, 0, sizeof(options)); + while (1) { const struct thread *thread = NULL; const struct dso *dso = NULL; - char *options[16]; - int nr_options = 0, choice = 0, i, + int choice = 0, annotate = -2, zoom_dso = -2, zoom_thread = -2, - browse_map = -2; + annotate_f = -2, annotate_t = -2, browse_map = -2; + + nr_options = 0; key = hist_browser__run(browser, ev_name, timer, arg, delay_secs); @@ -887,7 +906,6 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events, thread = hist_browser__selected_thread(browser); dso = browser->selection->map ? browser->selection->map->dso : NULL; } - switch (key) { case K_TAB: case K_UNTAB: @@ -902,7 +920,7 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events, if (!browser->has_symbols) { ui_browser__warning(&browser->b, delay_secs * 2, "Annotation is only available for symbolic views, " - "include \"sym\" in --sort to use it."); + "include \"sym*\" in --sort to use it."); continue; } @@ -972,12 +990,34 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events, if (!browser->has_symbols) goto add_exit_option; - if (browser->selection != NULL && - browser->selection->sym != NULL && - !browser->selection->map->dso->annotate_warned && - asprintf(&options[nr_options], "Annotate %s", - browser->selection->sym->name) > 0) - annotate = nr_options++; + if (sort__branch_mode == 1) { + bi = browser->he_selection->branch_info; + if (browser->selection != NULL && + bi && + bi->from.sym != NULL && + !bi->from.map->dso->annotate_warned && + asprintf(&options[nr_options], "Annotate %s", + bi->from.sym->name) > 0) + annotate_f = nr_options++; + + if (browser->selection != NULL && + bi && + bi->to.sym != NULL && + !bi->to.map->dso->annotate_warned && + (bi->to.sym != bi->from.sym || + bi->to.map->dso != bi->from.map->dso) && + asprintf(&options[nr_options], "Annotate %s", + bi->to.sym->name) > 0) + annotate_t = nr_options++; + } else { + + if (browser->selection != NULL && + browser->selection->sym != NULL && + !browser->selection->map->dso->annotate_warned && + asprintf(&options[nr_options], "Annotate %s", + browser->selection->sym->name) > 0) + annotate = nr_options++; + } if (thread != NULL && asprintf(&options[nr_options], "Zoom %s %s(%d) thread", @@ -998,25 +1038,39 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events, browse_map = nr_options++; add_exit_option: options[nr_options++] = (char *)"Exit"; - +retry_popup_menu: choice = ui__popup_menu(nr_options, options); - for (i = 0; i < nr_options - 1; ++i) - free(options[i]); - if (choice == nr_options - 1) break; - if (choice == -1) + if (choice == -1) { + free_popup_options(options, nr_options - 1); continue; + } - if (choice == annotate) { + if (choice == annotate || choice == annotate_t || choice == annotate_f) { struct hist_entry *he; int err; do_annotate: he = hist_browser__selected_entry(browser); if (he == NULL) continue; + + /* + * we stash the branch_info symbol + map into the + * the ms so we don't have to rewrite all the annotation + * code to use branch_info. + * in branch mode, the ms struct is not used + */ + if (choice == annotate_f) { + he->ms.sym = he->branch_info->from.sym; + he->ms.map = he->branch_info->from.map; + } else if (choice == annotate_t) { + he->ms.sym = he->branch_info->to.sym; + he->ms.map = he->branch_info->to.map; + } + /* * Don't let this be freed, say, by hists__decay_entry. */ @@ -1024,9 +1078,18 @@ do_annotate: err = hist_entry__tui_annotate(he, evsel->idx, timer, arg, delay_secs); he->used = false; + /* + * offer option to annotate the other branch source or target + * (if they exists) when returning from annotate + */ + if ((err == 'q' || err == CTRL('c')) + && annotate_t != -2 && annotate_f != -2) + goto retry_popup_menu; + ui_browser__update_nr_entries(&browser->b, browser->hists->nr_entries); if (err) ui_browser__handle_resize(&browser->b); + } else if (choice == browse_map) map__browse(browser->selection->map); else if (choice == zoom_dso) { @@ -1072,6 +1135,7 @@ out_free_stack: pstack__delete(fstack); out: hist_browser__delete(browser); + free_popup_options(options, nr_options - 1); return key; } |