From 1f043a687e47b9b3c0469ad8d2021708981536af Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 23 Feb 2021 09:16:45 -0300 Subject: tools headers UAPI: Update tools's copy of drm.h headers Picking the changes from: 0e0dc448005583a6 ("drm/doc: demote old doc-comments in drm.h") Silencing these perf build warnings: Warning: Kernel ABI header at 'tools/include/uapi/drm/drm.h' differs from latest version at 'include/uapi/drm/drm.h' diff -u tools/include/uapi/drm/drm.h include/uapi/drm/drm.h No changes in tooling as these are just C comment documentation changes. Cc: Simon Ser Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/uapi/drm/drm.h | 97 ++++++++++++++++++++++---------------------- 1 file changed, 49 insertions(+), 48 deletions(-) diff --git a/tools/include/uapi/drm/drm.h b/tools/include/uapi/drm/drm.h index 808b48a93330..0827037c5484 100644 --- a/tools/include/uapi/drm/drm.h +++ b/tools/include/uapi/drm/drm.h @@ -1,11 +1,10 @@ -/** - * \file drm.h +/* * Header for the Direct Rendering Manager * - * \author Rickard E. (Rik) Faith + * Author: Rickard E. (Rik) Faith * - * \par Acknowledgments: - * Dec 1999, Richard Henderson , move to generic \c cmpxchg. + * Acknowledgments: + * Dec 1999, Richard Henderson , move to generic cmpxchg. */ /* @@ -85,7 +84,7 @@ typedef unsigned int drm_context_t; typedef unsigned int drm_drawable_t; typedef unsigned int drm_magic_t; -/** +/* * Cliprect. * * \warning: If you change this structure, make sure you change @@ -101,7 +100,7 @@ struct drm_clip_rect { unsigned short y2; }; -/** +/* * Drawable information. */ struct drm_drawable_info { @@ -109,7 +108,7 @@ struct drm_drawable_info { struct drm_clip_rect *rects; }; -/** +/* * Texture region, */ struct drm_tex_region { @@ -120,7 +119,7 @@ struct drm_tex_region { unsigned int age; }; -/** +/* * Hardware lock. * * The lock structure is a simple cache-line aligned integer. To avoid @@ -132,7 +131,7 @@ struct drm_hw_lock { char padding[60]; /**< Pad to cache line */ }; -/** +/* * DRM_IOCTL_VERSION ioctl argument type. * * \sa drmGetVersion(). @@ -149,7 +148,7 @@ struct drm_version { char __user *desc; /**< User-space buffer to hold desc */ }; -/** +/* * DRM_IOCTL_GET_UNIQUE ioctl argument type. * * \sa drmGetBusid() and drmSetBusId(). @@ -168,7 +167,7 @@ struct drm_block { int unused; }; -/** +/* * DRM_IOCTL_CONTROL ioctl argument type. * * \sa drmCtlInstHandler() and drmCtlUninstHandler(). @@ -183,7 +182,7 @@ struct drm_control { int irq; }; -/** +/* * Type of memory to map. */ enum drm_map_type { @@ -195,7 +194,7 @@ enum drm_map_type { _DRM_CONSISTENT = 5 /**< Consistent memory for PCI DMA */ }; -/** +/* * Memory mapping flags. */ enum drm_map_flags { @@ -214,7 +213,7 @@ struct drm_ctx_priv_map { void *handle; /**< Handle of map */ }; -/** +/* * DRM_IOCTL_GET_MAP, DRM_IOCTL_ADD_MAP and DRM_IOCTL_RM_MAP ioctls * argument type. * @@ -231,7 +230,7 @@ struct drm_map { /* Private data */ }; -/** +/* * DRM_IOCTL_GET_CLIENT ioctl argument type. */ struct drm_client { @@ -263,7 +262,7 @@ enum drm_stat_type { /* Add to the *END* of the list */ }; -/** +/* * DRM_IOCTL_GET_STATS ioctl argument type. */ struct drm_stats { @@ -274,7 +273,7 @@ struct drm_stats { } data[15]; }; -/** +/* * Hardware locking flags. */ enum drm_lock_flags { @@ -289,7 +288,7 @@ enum drm_lock_flags { _DRM_HALT_CUR_QUEUES = 0x20 /**< Halt all current queues */ }; -/** +/* * DRM_IOCTL_LOCK, DRM_IOCTL_UNLOCK and DRM_IOCTL_FINISH ioctl argument type. * * \sa drmGetLock() and drmUnlock(). @@ -299,7 +298,7 @@ struct drm_lock { enum drm_lock_flags flags; }; -/** +/* * DMA flags * * \warning @@ -328,7 +327,7 @@ enum drm_dma_flags { _DRM_DMA_LARGER_OK = 0x40 /**< Larger-than-requested buffers OK */ }; -/** +/* * DRM_IOCTL_ADD_BUFS and DRM_IOCTL_MARK_BUFS ioctl argument type. * * \sa drmAddBufs(). @@ -351,7 +350,7 @@ struct drm_buf_desc { */ }; -/** +/* * DRM_IOCTL_INFO_BUFS ioctl argument type. */ struct drm_buf_info { @@ -359,7 +358,7 @@ struct drm_buf_info { struct drm_buf_desc __user *list; }; -/** +/* * DRM_IOCTL_FREE_BUFS ioctl argument type. */ struct drm_buf_free { @@ -367,7 +366,7 @@ struct drm_buf_free { int __user *list; }; -/** +/* * Buffer information * * \sa drm_buf_map. @@ -379,7 +378,7 @@ struct drm_buf_pub { void __user *address; /**< Address of buffer */ }; -/** +/* * DRM_IOCTL_MAP_BUFS ioctl argument type. */ struct drm_buf_map { @@ -392,7 +391,7 @@ struct drm_buf_map { struct drm_buf_pub __user *list; /**< Buffer information */ }; -/** +/* * DRM_IOCTL_DMA ioctl argument type. * * Indices here refer to the offset into the buffer list in drm_buf_get. @@ -417,7 +416,7 @@ enum drm_ctx_flags { _DRM_CONTEXT_2DONLY = 0x02 }; -/** +/* * DRM_IOCTL_ADD_CTX ioctl argument type. * * \sa drmCreateContext() and drmDestroyContext(). @@ -427,7 +426,7 @@ struct drm_ctx { enum drm_ctx_flags flags; }; -/** +/* * DRM_IOCTL_RES_CTX ioctl argument type. */ struct drm_ctx_res { @@ -435,14 +434,14 @@ struct drm_ctx_res { struct drm_ctx __user *contexts; }; -/** +/* * DRM_IOCTL_ADD_DRAW and DRM_IOCTL_RM_DRAW ioctl argument type. */ struct drm_draw { drm_drawable_t handle; }; -/** +/* * DRM_IOCTL_UPDATE_DRAW ioctl argument type. */ typedef enum { @@ -456,14 +455,14 @@ struct drm_update_draw { unsigned long long data; }; -/** +/* * DRM_IOCTL_GET_MAGIC and DRM_IOCTL_AUTH_MAGIC ioctl argument type. */ struct drm_auth { drm_magic_t magic; }; -/** +/* * DRM_IOCTL_IRQ_BUSID ioctl argument type. * * \sa drmGetInterruptFromBusID(). @@ -505,7 +504,7 @@ struct drm_wait_vblank_reply { long tval_usec; }; -/** +/* * DRM_IOCTL_WAIT_VBLANK ioctl argument type. * * \sa drmWaitVBlank(). @@ -518,7 +517,7 @@ union drm_wait_vblank { #define _DRM_PRE_MODESET 1 #define _DRM_POST_MODESET 2 -/** +/* * DRM_IOCTL_MODESET_CTL ioctl argument type * * \sa drmModesetCtl(). @@ -528,7 +527,7 @@ struct drm_modeset_ctl { __u32 cmd; }; -/** +/* * DRM_IOCTL_AGP_ENABLE ioctl argument type. * * \sa drmAgpEnable(). @@ -537,7 +536,7 @@ struct drm_agp_mode { unsigned long mode; /**< AGP mode */ }; -/** +/* * DRM_IOCTL_AGP_ALLOC and DRM_IOCTL_AGP_FREE ioctls argument type. * * \sa drmAgpAlloc() and drmAgpFree(). @@ -549,7 +548,7 @@ struct drm_agp_buffer { unsigned long physical; /**< Physical used by i810 */ }; -/** +/* * DRM_IOCTL_AGP_BIND and DRM_IOCTL_AGP_UNBIND ioctls argument type. * * \sa drmAgpBind() and drmAgpUnbind(). @@ -559,7 +558,7 @@ struct drm_agp_binding { unsigned long offset; /**< In bytes -- will round to page boundary */ }; -/** +/* * DRM_IOCTL_AGP_INFO ioctl argument type. * * \sa drmAgpVersionMajor(), drmAgpVersionMinor(), drmAgpGetMode(), @@ -580,7 +579,7 @@ struct drm_agp_info { unsigned short id_device; }; -/** +/* * DRM_IOCTL_SG_ALLOC ioctl argument type. */ struct drm_scatter_gather { @@ -588,7 +587,7 @@ struct drm_scatter_gather { unsigned long handle; /**< Used for mapping / unmapping */ }; -/** +/* * DRM_IOCTL_SET_VERSION ioctl argument type. */ struct drm_set_version { @@ -598,14 +597,14 @@ struct drm_set_version { int drm_dd_minor; }; -/** DRM_IOCTL_GEM_CLOSE ioctl argument type */ +/* DRM_IOCTL_GEM_CLOSE ioctl argument type */ struct drm_gem_close { /** Handle of the object to be closed. */ __u32 handle; __u32 pad; }; -/** DRM_IOCTL_GEM_FLINK ioctl argument type */ +/* DRM_IOCTL_GEM_FLINK ioctl argument type */ struct drm_gem_flink { /** Handle for the object being named */ __u32 handle; @@ -614,7 +613,7 @@ struct drm_gem_flink { __u32 name; }; -/** DRM_IOCTL_GEM_OPEN ioctl argument type */ +/* DRM_IOCTL_GEM_OPEN ioctl argument type */ struct drm_gem_open { /** Name of object being opened */ __u32 name; @@ -652,7 +651,7 @@ struct drm_gem_open { #define DRM_CAP_SYNCOBJ 0x13 #define DRM_CAP_SYNCOBJ_TIMELINE 0x14 -/** DRM_IOCTL_GET_CAP ioctl argument type */ +/* DRM_IOCTL_GET_CAP ioctl argument type */ struct drm_get_cap { __u64 capability; __u64 value; @@ -678,7 +677,9 @@ struct drm_get_cap { /** * DRM_CLIENT_CAP_ATOMIC * - * If set to 1, the DRM core will expose atomic properties to userspace + * If set to 1, the DRM core will expose atomic properties to userspace. This + * implicitly enables &DRM_CLIENT_CAP_UNIVERSAL_PLANES and + * &DRM_CLIENT_CAP_ASPECT_RATIO. */ #define DRM_CLIENT_CAP_ATOMIC 3 @@ -698,7 +699,7 @@ struct drm_get_cap { */ #define DRM_CLIENT_CAP_WRITEBACK_CONNECTORS 5 -/** DRM_IOCTL_SET_CLIENT_CAP ioctl argument type */ +/* DRM_IOCTL_SET_CLIENT_CAP ioctl argument type */ struct drm_set_client_cap { __u64 capability; __u64 value; @@ -950,7 +951,7 @@ extern "C" { #define DRM_IOCTL_MODE_GETFB2 DRM_IOWR(0xCE, struct drm_mode_fb_cmd2) -/** +/* * Device specific ioctls should only be in their respective headers * The device specific ioctl range is from 0x40 to 0x9f. * Generic IOCTLS restart at 0xA0. @@ -961,7 +962,7 @@ extern "C" { #define DRM_COMMAND_BASE 0x40 #define DRM_COMMAND_END 0xA0 -/** +/* * Header for events written back to userspace on the drm fd. The * type defines the type of event, the length specifies the total * length of the event (including the header), and user_data is -- cgit v1.2.3 From 4a8176fd62aa7fa86599efcbd3631af272b109d8 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 23 Feb 2021 09:21:00 -0300 Subject: tools headers UAPI: Sync drm/i915_drm.h with the kernel sources To pick the changes in: 8c3b1ba0e7ea9a80 ("drm/i915/gt: Track the overall awake/busy time") 348fb0cb0a79bce0 ("drm/i915/pmu: Deprecate I915_PMU_LAST and optimize state tracking") That don't result in any change in tooling: $ tools/perf/trace/beauty/drm_ioctl.sh > before $ cp include/uapi/drm/i915_drm.h tools/include/uapi/drm/i915_drm.h $ tools/perf/trace/beauty/drm_ioctl.sh > after $ diff -u before after $ Only silences this perf build warning: Warning: Kernel ABI header at 'tools/include/uapi/drm/i915_drm.h' differs from latest version at 'include/uapi/drm/i915_drm.h' diff -u tools/include/uapi/drm/i915_drm.h include/uapi/drm/i915_drm.h Cc: Chris Wilson Cc: Tvrtko Ursulin Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/uapi/drm/i915_drm.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tools/include/uapi/drm/i915_drm.h b/tools/include/uapi/drm/i915_drm.h index fa1f3d62f9a6..1987e2ea79a3 100644 --- a/tools/include/uapi/drm/i915_drm.h +++ b/tools/include/uapi/drm/i915_drm.h @@ -177,8 +177,9 @@ enum drm_i915_pmu_engine_sample { #define I915_PMU_REQUESTED_FREQUENCY __I915_PMU_OTHER(1) #define I915_PMU_INTERRUPTS __I915_PMU_OTHER(2) #define I915_PMU_RC6_RESIDENCY __I915_PMU_OTHER(3) +#define I915_PMU_SOFTWARE_GT_AWAKE_TIME __I915_PMU_OTHER(4) -#define I915_PMU_LAST I915_PMU_RC6_RESIDENCY +#define I915_PMU_LAST /* Deprecated - do not use */ I915_PMU_RC6_RESIDENCY /* Each region is a minimum of 16k, and there are at most 255 of them. */ -- cgit v1.2.3 From ed72adf64979ee2b5aa9f1c74fb45b2bf592ad2a Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 23 Feb 2021 09:44:37 -0300 Subject: tools headers UAPI: Sync openat2.h with the kernel sources To pick the changes in: 99668f618062816c ("fs: expose LOOKUP_CACHED through openat2() RESOLVE_CACHED") That don't result in any change in tooling, only silences this perf build warning: Warning: Kernel ABI header at 'tools/include/uapi/linux/openat2.h' differs from latest version at 'include/uapi/linux/openat2.h' diff -u tools/include/uapi/linux/openat2.h include/uapi/linux/openat2.h Cc: Al Viro Cc: Jens Axboe Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/uapi/linux/openat2.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tools/include/uapi/linux/openat2.h b/tools/include/uapi/linux/openat2.h index 58b1eb711360..a5feb7604948 100644 --- a/tools/include/uapi/linux/openat2.h +++ b/tools/include/uapi/linux/openat2.h @@ -35,5 +35,9 @@ struct open_how { #define RESOLVE_IN_ROOT 0x10 /* Make all jumps to "/" and ".." be scoped inside the dirfd (similar to chroot(2)). */ +#define RESOLVE_CACHED 0x20 /* Only complete if resolution can be + completed through cached lookup. May + return -EAGAIN if that's not + possible. */ #endif /* _UAPI_LINUX_OPENAT2_H */ -- cgit v1.2.3 From 867a9148298b42dee341b780ddfd706415a1253e Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 23 Feb 2021 09:48:05 -0300 Subject: perf arch powerpc: Sync powerpc syscall.tbl with the kernel sources To get the changes in: fbcee2ebe8edbb6a ("powerpc/32: Always save non volatile GPRs at syscall entry") That shouldn't cause any change in tooling, just silences the following tools/perf/ build warning: Warning: Kernel ABI header at 'tools/perf/arch/powerpc/entry/syscalls/syscall.tbl' differs from latest version at 'arch/powerpc/kernel/syscalls/syscall.tbl' Cc: Christophe Leroy Cc: Michael Ellerman Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/powerpc/entry/syscalls/syscall.tbl | 20 +++++--------------- 1 file changed, 5 insertions(+), 15 deletions(-) diff --git a/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl b/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl index f744eb5cba88..96b2157f0371 100644 --- a/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl +++ b/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl @@ -9,9 +9,7 @@ # 0 nospu restart_syscall sys_restart_syscall 1 nospu exit sys_exit -2 32 fork ppc_fork sys_fork -2 64 fork sys_fork -2 spu fork sys_ni_syscall +2 nospu fork sys_fork 3 common read sys_read 4 common write sys_write 5 common open sys_open compat_sys_open @@ -160,9 +158,7 @@ 119 32 sigreturn sys_sigreturn compat_sys_sigreturn 119 64 sigreturn sys_ni_syscall 119 spu sigreturn sys_ni_syscall -120 32 clone ppc_clone sys_clone -120 64 clone sys_clone -120 spu clone sys_ni_syscall +120 nospu clone sys_clone 121 common setdomainname sys_setdomainname 122 common uname sys_newuname 123 common modify_ldt sys_ni_syscall @@ -244,9 +240,7 @@ 186 spu sendfile sys_sendfile64 187 common getpmsg sys_ni_syscall 188 common putpmsg sys_ni_syscall -189 32 vfork ppc_vfork sys_vfork -189 64 vfork sys_vfork -189 spu vfork sys_ni_syscall +189 nospu vfork sys_vfork 190 common ugetrlimit sys_getrlimit compat_sys_getrlimit 191 common readahead sys_readahead compat_sys_readahead 192 32 mmap2 sys_mmap2 compat_sys_mmap2 @@ -322,9 +316,7 @@ 248 32 clock_nanosleep sys_clock_nanosleep_time32 248 64 clock_nanosleep sys_clock_nanosleep 248 spu clock_nanosleep sys_clock_nanosleep -249 32 swapcontext ppc_swapcontext compat_sys_swapcontext -249 64 swapcontext sys_swapcontext -249 spu swapcontext sys_ni_syscall +249 nospu swapcontext sys_swapcontext compat_sys_swapcontext 250 common tgkill sys_tgkill 251 32 utimes sys_utimes_time32 251 64 utimes sys_utimes @@ -522,9 +514,7 @@ 432 common fsmount sys_fsmount 433 common fspick sys_fspick 434 common pidfd_open sys_pidfd_open -435 32 clone3 ppc_clone3 sys_clone3 -435 64 clone3 sys_clone3 -435 spu clone3 sys_ni_syscall +435 nospu clone3 sys_clone3 436 common close_range sys_close_range 437 common openat2 sys_openat2 438 common pidfd_getfd sys_pidfd_getfd -- cgit v1.2.3 From 20e32b9cb0c61c9264efcb16d8e3a80d3738ff2b Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 23 Feb 2021 09:51:17 -0300 Subject: tools headers UAPI s390: Sync ptrace.h kernel headers To pick up the changes from: 56e62a7370283601 ("s390: convert to generic entry") That only adds two new defines, so shouldn't cause problems when building the BPF selftests. Silencing this perf build warning: Warning: Kernel ABI header at 'tools/arch/s390/include/uapi/asm/ptrace.h' differs from latest version at 'arch/s390/include/uapi/asm/ptrace.h' diff -u tools/arch/s390/include/uapi/asm/ptrace.h arch/s390/include/uapi/asm/ptrace.h Cc: Hendrik Brueckner Cc: Sven Schnelle Cc: Vasily Gorbik Signed-off-by: Arnaldo Carvalho de Melo --- tools/arch/s390/include/uapi/asm/ptrace.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tools/arch/s390/include/uapi/asm/ptrace.h b/tools/arch/s390/include/uapi/asm/ptrace.h index 543dd70e12c8..ad64d673b5e6 100644 --- a/tools/arch/s390/include/uapi/asm/ptrace.h +++ b/tools/arch/s390/include/uapi/asm/ptrace.h @@ -179,8 +179,9 @@ #define ACR_SIZE 4 -#define PTRACE_OLDSETOPTIONS 21 - +#define PTRACE_OLDSETOPTIONS 21 +#define PTRACE_SYSEMU 31 +#define PTRACE_SYSEMU_SINGLESTEP 32 #ifndef __ASSEMBLY__ #include #include -- cgit v1.2.3 From 84b7725536d82e99b7564a079b8627f3be692a13 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 23 Feb 2021 09:56:50 -0300 Subject: tools headers UAPI: Sync kvm.h headers with the kernel sources To pick the changes in: d9a47edabc4f9481 ("KVM: PPC: Book3S HV: Introduce new capability for 2nd DAWR") 8d4e7e80838f45d3 ("KVM: x86: declare Xen HVM shared info capability and add test case") 40da8ccd724f7ca2 ("KVM: x86/xen: Add event channel interrupt vector upcall") These new IOCTLs are now supported on 'perf trace': $ tools/perf/trace/beauty/kvm_ioctl.sh > before $ cp include/uapi/linux/kvm.h tools/include/uapi/linux/kvm.h $ tools/perf/trace/beauty/kvm_ioctl.sh > after $ diff -u before after --- before 2021-02-23 09:55:46.229058308 -0300 +++ after 2021-02-23 09:55:57.509308058 -0300 @@ -91,6 +91,10 @@ [0xc1] = "GET_SUPPORTED_HV_CPUID", [0xc6] = "X86_SET_MSR_FILTER", [0xc7] = "RESET_DIRTY_RINGS", + [0xc8] = "XEN_HVM_GET_ATTR", + [0xc9] = "XEN_HVM_SET_ATTR", + [0xca] = "XEN_VCPU_GET_ATTR", + [0xcb] = "XEN_VCPU_SET_ATTR", [0xe0] = "CREATE_DEVICE", [0xe1] = "SET_DEVICE_ATTR", [0xe2] = "GET_DEVICE_ATTR", $ Addressing this perf build warning: Warning: Kernel ABI header at 'tools/include/uapi/linux/kvm.h' differs from latest version at 'include/uapi/linux/kvm.h' diff -u tools/include/uapi/linux/kvm.h include/uapi/linux/kvm.h Cc: David Woodhouse Cc: Paul Mackerras Cc: Ravi Bangoria Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/uapi/linux/kvm.h | 73 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 73 insertions(+) diff --git a/tools/include/uapi/linux/kvm.h b/tools/include/uapi/linux/kvm.h index abb89bbe5635..8b281f722e5b 100644 --- a/tools/include/uapi/linux/kvm.h +++ b/tools/include/uapi/linux/kvm.h @@ -216,6 +216,20 @@ struct kvm_hyperv_exit { } u; }; +struct kvm_xen_exit { +#define KVM_EXIT_XEN_HCALL 1 + __u32 type; + union { + struct { + __u32 longmode; + __u32 cpl; + __u64 input; + __u64 result; + __u64 params[6]; + } hcall; + } u; +}; + #define KVM_S390_GET_SKEYS_NONE 1 #define KVM_S390_SKEYS_MAX 1048576 @@ -252,6 +266,8 @@ struct kvm_hyperv_exit { #define KVM_EXIT_X86_WRMSR 30 #define KVM_EXIT_DIRTY_RING_FULL 31 #define KVM_EXIT_AP_RESET_HOLD 32 +#define KVM_EXIT_X86_BUS_LOCK 33 +#define KVM_EXIT_XEN 34 /* For KVM_EXIT_INTERNAL_ERROR */ /* Emulate instruction failed. */ @@ -428,6 +444,8 @@ struct kvm_run { __u32 index; /* kernel -> user */ __u64 data; /* kernel <-> user */ } msr; + /* KVM_EXIT_XEN */ + struct kvm_xen_exit xen; /* Fix the size of the union. */ char padding[256]; }; @@ -1058,6 +1076,7 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_ENFORCE_PV_FEATURE_CPUID 190 #define KVM_CAP_SYS_HYPERV_CPUID 191 #define KVM_CAP_DIRTY_LOG_RING 192 +#define KVM_CAP_X86_BUS_LOCK_EXIT 193 #define KVM_CAP_PPC_DAWR1 194 #ifdef KVM_CAP_IRQ_ROUTING @@ -1132,6 +1151,10 @@ struct kvm_x86_mce { #endif #ifdef KVM_CAP_XEN_HVM +#define KVM_XEN_HVM_CONFIG_HYPERCALL_MSR (1 << 0) +#define KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL (1 << 1) +#define KVM_XEN_HVM_CONFIG_SHARED_INFO (1 << 2) + struct kvm_xen_hvm_config { __u32 flags; __u32 msr; @@ -1566,6 +1589,45 @@ struct kvm_pv_cmd { /* Available with KVM_CAP_DIRTY_LOG_RING */ #define KVM_RESET_DIRTY_RINGS _IO(KVMIO, 0xc7) +/* Per-VM Xen attributes */ +#define KVM_XEN_HVM_GET_ATTR _IOWR(KVMIO, 0xc8, struct kvm_xen_hvm_attr) +#define KVM_XEN_HVM_SET_ATTR _IOW(KVMIO, 0xc9, struct kvm_xen_hvm_attr) + +struct kvm_xen_hvm_attr { + __u16 type; + __u16 pad[3]; + union { + __u8 long_mode; + __u8 vector; + struct { + __u64 gfn; + } shared_info; + __u64 pad[8]; + } u; +}; + +/* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_SHARED_INFO */ +#define KVM_XEN_ATTR_TYPE_LONG_MODE 0x0 +#define KVM_XEN_ATTR_TYPE_SHARED_INFO 0x1 +#define KVM_XEN_ATTR_TYPE_UPCALL_VECTOR 0x2 + +/* Per-vCPU Xen attributes */ +#define KVM_XEN_VCPU_GET_ATTR _IOWR(KVMIO, 0xca, struct kvm_xen_vcpu_attr) +#define KVM_XEN_VCPU_SET_ATTR _IOW(KVMIO, 0xcb, struct kvm_xen_vcpu_attr) + +struct kvm_xen_vcpu_attr { + __u16 type; + __u16 pad[3]; + union { + __u64 gpa; + __u64 pad[8]; + } u; +}; + +/* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_SHARED_INFO */ +#define KVM_XEN_VCPU_ATTR_TYPE_VCPU_INFO 0x0 +#define KVM_XEN_VCPU_ATTR_TYPE_VCPU_TIME_INFO 0x1 + /* Secure Encrypted Virtualization command */ enum sev_cmd_id { /* Guest initialization commands */ @@ -1594,6 +1656,8 @@ enum sev_cmd_id { KVM_SEV_DBG_ENCRYPT, /* Guest certificates commands */ KVM_SEV_CERT_EXPORT, + /* Attestation report */ + KVM_SEV_GET_ATTESTATION_REPORT, KVM_SEV_NR_MAX, }; @@ -1646,6 +1710,12 @@ struct kvm_sev_dbg { __u32 len; }; +struct kvm_sev_attestation_report { + __u8 mnonce[16]; + __u64 uaddr; + __u32 len; +}; + #define KVM_DEV_ASSIGN_ENABLE_IOMMU (1 << 0) #define KVM_DEV_ASSIGN_PCI_2_3 (1 << 1) #define KVM_DEV_ASSIGN_MASK_INTX (1 << 2) @@ -1767,4 +1837,7 @@ struct kvm_dirty_gfn { __u64 offset; }; +#define KVM_BUS_LOCK_DETECTION_OFF (1 << 0) +#define KVM_BUS_LOCK_DETECTION_EXIT (1 << 1) + #endif /* __LINUX_KVM_H */ -- cgit v1.2.3 From b5f184fbdb03b4fcc1141de34dd5ec964ca5d99e Mon Sep 17 00:00:00 2001 From: Tiezhu Yang Date: Thu, 4 Feb 2021 11:35:23 +0800 Subject: perf tools: Support MIPS unwinding and dwarf-regs Map perf APIs (perf_reg_name/get_arch_regstr/unwind__arch_reg_id) with MIPS specific registers. [ayan@wavecomp.com: repick this patch for unwinding userstack backtrace by perf and libunwind on MIPS based CPU.] [yangtiezhu@loongson.cn: Add sample_reg_masks[] to fix build error, silence some checkpatch errors and warnings, and also separate the original patches into two parts (MIPS kernel and perf tools) to merge easily.] The original patches: https://lore.kernel.org/patchwork/patch/1126521/ https://lore.kernel.org/patchwork/patch/1126520/ Committer notes: Do it as __perf_reg_name() to cope with: 067012974c8ae31a ("perf tools: Fix arm64 build error with gcc-11") Signed-off-by: Tiezhu Yang Cc: Alexander Shishkin Cc: Archer Yan Cc: David Daney Cc: Jianlin Lv Cc: Jiri Olsa Cc: Juxin Gao Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Ralf Baechle Cc: Xuefeng Li Cc: Thomas Bogendoerfer Cc: linux-mips@vger.kernel.org Link: http://lore.kernel.org/lkml/1612409724-3516-3-git-send-email-yangtiezhu@loongson.cn Signed-off-by: Archer Yan Signed-off-by: David Daney Signed-off-by: Ralf Baechle Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Makefile.config | 6 ++ tools/perf/arch/mips/Makefile | 4 ++ tools/perf/arch/mips/include/dwarf-regs-table.h | 31 +++++++++ tools/perf/arch/mips/include/perf_regs.h | 84 +++++++++++++++++++++++++ tools/perf/arch/mips/util/Build | 3 + tools/perf/arch/mips/util/dwarf-regs.c | 38 +++++++++++ tools/perf/arch/mips/util/perf_regs.c | 6 ++ tools/perf/arch/mips/util/unwind-libunwind.c | 22 +++++++ tools/perf/util/dwarf-regs.c | 3 + 9 files changed, 197 insertions(+) create mode 100644 tools/perf/arch/mips/Makefile create mode 100644 tools/perf/arch/mips/include/dwarf-regs-table.h create mode 100644 tools/perf/arch/mips/include/perf_regs.h create mode 100644 tools/perf/arch/mips/util/Build create mode 100644 tools/perf/arch/mips/util/dwarf-regs.c create mode 100644 tools/perf/arch/mips/util/perf_regs.c create mode 100644 tools/perf/arch/mips/util/unwind-libunwind.c diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config index d8e59d31399a..bd0c93c35379 100644 --- a/tools/perf/Makefile.config +++ b/tools/perf/Makefile.config @@ -87,6 +87,12 @@ ifeq ($(ARCH),s390) CFLAGS += -fPIC -I$(OUTPUT)arch/s390/include/generated endif +ifeq ($(ARCH),mips) + NO_PERF_REGS := 0 + CFLAGS += -I../../arch/mips/include/uapi -I../../arch/mips/include/generated/uapi + LIBUNWIND_LIBS = -lunwind -lunwind-mips +endif + ifeq ($(NO_PERF_REGS),0) $(call detected,CONFIG_PERF_REGS) endif diff --git a/tools/perf/arch/mips/Makefile b/tools/perf/arch/mips/Makefile new file mode 100644 index 000000000000..6e1106fab26e --- /dev/null +++ b/tools/perf/arch/mips/Makefile @@ -0,0 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0 +ifndef NO_DWARF +PERF_HAVE_DWARF_REGS := 1 +endif diff --git a/tools/perf/arch/mips/include/dwarf-regs-table.h b/tools/perf/arch/mips/include/dwarf-regs-table.h new file mode 100644 index 000000000000..5badbcd3c5ec --- /dev/null +++ b/tools/perf/arch/mips/include/dwarf-regs-table.h @@ -0,0 +1,31 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * dwarf-regs-table.h : Mapping of DWARF debug register numbers into + * register names. + * + * Copyright (C) 2013 Cavium, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#ifdef DEFINE_DWARF_REGSTR_TABLE +#undef REG_DWARFNUM_NAME +#define REG_DWARFNUM_NAME(reg, idx) [idx] = "$" #reg +static const char * const mips_regstr_tbl[] = { + "$0", "$1", "$2", "$3", "$4", "$5", "$6", "$7", "$8", "$9", + "$10", "$11", "$12", "$13", "$14", "$15", "$16", "$17", "$18", "$19", + "$20", "$21", "$22", "$23", "$24", "$25", "$26", "$27", "$28", "%29", + "$30", "$31", + REG_DWARFNUM_NAME(hi, 64), + REG_DWARFNUM_NAME(lo, 65), +}; +#endif diff --git a/tools/perf/arch/mips/include/perf_regs.h b/tools/perf/arch/mips/include/perf_regs.h new file mode 100644 index 000000000000..ee73b36a14d1 --- /dev/null +++ b/tools/perf/arch/mips/include/perf_regs.h @@ -0,0 +1,84 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef ARCH_PERF_REGS_H +#define ARCH_PERF_REGS_H + +#include +#include +#include + +#define PERF_REGS_MAX PERF_REG_MIPS_MAX +#define PERF_REG_IP PERF_REG_MIPS_PC +#define PERF_REG_SP PERF_REG_MIPS_R29 + +#define PERF_REGS_MASK ((1ULL << PERF_REG_MIPS_MAX) - 1) + +static inline const char *__perf_reg_name(int id) +{ + switch (id) { + case PERF_REG_MIPS_PC: + return "PC"; + case PERF_REG_MIPS_R1: + return "$1"; + case PERF_REG_MIPS_R2: + return "$2"; + case PERF_REG_MIPS_R3: + return "$3"; + case PERF_REG_MIPS_R4: + return "$4"; + case PERF_REG_MIPS_R5: + return "$5"; + case PERF_REG_MIPS_R6: + return "$6"; + case PERF_REG_MIPS_R7: + return "$7"; + case PERF_REG_MIPS_R8: + return "$8"; + case PERF_REG_MIPS_R9: + return "$9"; + case PERF_REG_MIPS_R10: + return "$10"; + case PERF_REG_MIPS_R11: + return "$11"; + case PERF_REG_MIPS_R12: + return "$12"; + case PERF_REG_MIPS_R13: + return "$13"; + case PERF_REG_MIPS_R14: + return "$14"; + case PERF_REG_MIPS_R15: + return "$15"; + case PERF_REG_MIPS_R16: + return "$16"; + case PERF_REG_MIPS_R17: + return "$17"; + case PERF_REG_MIPS_R18: + return "$18"; + case PERF_REG_MIPS_R19: + return "$19"; + case PERF_REG_MIPS_R20: + return "$20"; + case PERF_REG_MIPS_R21: + return "$21"; + case PERF_REG_MIPS_R22: + return "$22"; + case PERF_REG_MIPS_R23: + return "$23"; + case PERF_REG_MIPS_R24: + return "$24"; + case PERF_REG_MIPS_R25: + return "$25"; + case PERF_REG_MIPS_R28: + return "$28"; + case PERF_REG_MIPS_R29: + return "$29"; + case PERF_REG_MIPS_R30: + return "$30"; + case PERF_REG_MIPS_R31: + return "$31"; + default: + break; + } + return NULL; +} + +#endif /* ARCH_PERF_REGS_H */ diff --git a/tools/perf/arch/mips/util/Build b/tools/perf/arch/mips/util/Build new file mode 100644 index 000000000000..51c8900a9a10 --- /dev/null +++ b/tools/perf/arch/mips/util/Build @@ -0,0 +1,3 @@ +perf-y += perf_regs.o +perf-$(CONFIG_DWARF) += dwarf-regs.o +perf-$(CONFIG_LOCAL_LIBUNWIND) += unwind-libunwind.o diff --git a/tools/perf/arch/mips/util/dwarf-regs.c b/tools/perf/arch/mips/util/dwarf-regs.c new file mode 100644 index 000000000000..25c13a91c2a7 --- /dev/null +++ b/tools/perf/arch/mips/util/dwarf-regs.c @@ -0,0 +1,38 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * dwarf-regs.c : Mapping of DWARF debug register numbers into register names. + * + * Copyright (C) 2013 Cavium, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#include +#include + +static const char *mips_gpr_names[32] = { + "$0", "$1", "$2", "$3", "$4", "$5", "$6", "$7", "$8", "$9", + "$10", "$11", "$12", "$13", "$14", "$15", "$16", "$17", "$18", "$19", + "$20", "$21", "$22", "$23", "$24", "$25", "$26", "$27", "$28", "$29", + "$30", "$31" +}; + +const char *get_arch_regstr(unsigned int n) +{ + if (n < 32) + return mips_gpr_names[n]; + if (n == 64) + return "hi"; + if (n == 65) + return "lo"; + return NULL; +} diff --git a/tools/perf/arch/mips/util/perf_regs.c b/tools/perf/arch/mips/util/perf_regs.c new file mode 100644 index 000000000000..2864e2e3776d --- /dev/null +++ b/tools/perf/arch/mips/util/perf_regs.c @@ -0,0 +1,6 @@ +// SPDX-License-Identifier: GPL-2.0 +#include "../../util/perf_regs.h" + +const struct sample_reg sample_reg_masks[] = { + SMPL_REG_END +}; diff --git a/tools/perf/arch/mips/util/unwind-libunwind.c b/tools/perf/arch/mips/util/unwind-libunwind.c new file mode 100644 index 000000000000..0d8c99c29da6 --- /dev/null +++ b/tools/perf/arch/mips/util/unwind-libunwind.c @@ -0,0 +1,22 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include +#include +#include "perf_regs.h" +#include "../../util/unwind.h" +#include "util/debug.h" + +int libunwind__arch_reg_id(int regnum) +{ + switch (regnum) { + case UNW_MIPS_R1 ... UNW_MIPS_R25: + return regnum - UNW_MIPS_R1 + PERF_REG_MIPS_R1; + case UNW_MIPS_R28 ... UNW_MIPS_R31: + return regnum - UNW_MIPS_R28 + PERF_REG_MIPS_R28; + case UNW_MIPS_PC: + return PERF_REG_MIPS_PC; + default: + pr_err("unwind: invalid reg id %d\n", regnum); + return -EINVAL; + } +} diff --git a/tools/perf/util/dwarf-regs.c b/tools/perf/util/dwarf-regs.c index 1b49ecee5aff..3fa4486742cd 100644 --- a/tools/perf/util/dwarf-regs.c +++ b/tools/perf/util/dwarf-regs.c @@ -24,6 +24,7 @@ #include "../arch/s390/include/dwarf-regs-table.h" #include "../arch/sparc/include/dwarf-regs-table.h" #include "../arch/xtensa/include/dwarf-regs-table.h" +#include "../arch/mips/include/dwarf-regs-table.h" #define __get_dwarf_regstr(tbl, n) (((n) < ARRAY_SIZE(tbl)) ? (tbl)[(n)] : NULL) @@ -53,6 +54,8 @@ const char *get_dwarf_regstr(unsigned int n, unsigned int machine) return __get_dwarf_regstr(sparc_regstr_tbl, n); case EM_XTENSA: return __get_dwarf_regstr(xtensa_regstr_tbl, n); + case EM_MIPS: + return __get_dwarf_regstr(mips_regstr_tbl, n); default: pr_err("ELF MACHINE %x is not supported.\n", machine); } -- cgit v1.2.3 From d9fd5a718977702f2fd112a081b62572e39f24db Mon Sep 17 00:00:00 2001 From: Tiezhu Yang Date: Thu, 4 Feb 2021 11:35:24 +0800 Subject: perf tools: Generate mips syscalls_n64.c syscall table Grab a copy of arch/mips/kernel/syscalls/syscall_n64.tbl and use it to generate tools/perf/arch/mips/include/generated/asm/syscalls_n64.c file, this is similar with commit 1b700c997500 ("perf tools: Build syscall table .c header from kernel's syscall_64.tbl") Signed-off-by: Tiezhu Yang Cc: Alexander Shishkin Cc: Jiri Olsa Cc: Juxin Gao Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Thomas Bogendoerfer Cc: Xuefeng Li Cc: linux-mips@vger.kernel.org Link: http://lore.kernel.org/lkml/1612409724-3516-4-git-send-email-yangtiezhu@loongson.cn Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Makefile.config | 3 +- tools/perf/arch/mips/Makefile | 18 ++ tools/perf/arch/mips/entry/syscalls/mksyscalltbl | 32 ++ .../perf/arch/mips/entry/syscalls/syscall_n64.tbl | 358 +++++++++++++++++++++ tools/perf/check-headers.sh | 1 + tools/perf/util/syscalltbl.c | 4 + 6 files changed, 415 insertions(+), 1 deletion(-) create mode 100644 tools/perf/arch/mips/entry/syscalls/mksyscalltbl create mode 100644 tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config index bd0c93c35379..3514fe956ed1 100644 --- a/tools/perf/Makefile.config +++ b/tools/perf/Makefile.config @@ -32,7 +32,7 @@ ifneq ($(NO_SYSCALL_TABLE),1) NO_SYSCALL_TABLE := 0 endif else - ifeq ($(SRCARCH),$(filter $(SRCARCH),powerpc arm64 s390)) + ifeq ($(SRCARCH),$(filter $(SRCARCH),powerpc arm64 s390 mips)) NO_SYSCALL_TABLE := 0 endif endif @@ -89,6 +89,7 @@ endif ifeq ($(ARCH),mips) NO_PERF_REGS := 0 + CFLAGS += -I$(OUTPUT)arch/mips/include/generated CFLAGS += -I../../arch/mips/include/uapi -I../../arch/mips/include/generated/uapi LIBUNWIND_LIBS = -lunwind -lunwind-mips endif diff --git a/tools/perf/arch/mips/Makefile b/tools/perf/arch/mips/Makefile index 6e1106fab26e..8bc09072e3d6 100644 --- a/tools/perf/arch/mips/Makefile +++ b/tools/perf/arch/mips/Makefile @@ -2,3 +2,21 @@ ifndef NO_DWARF PERF_HAVE_DWARF_REGS := 1 endif + +# Syscall table generation for perf +out := $(OUTPUT)arch/mips/include/generated/asm +header := $(out)/syscalls_n64.c +sysprf := $(srctree)/tools/perf/arch/mips/entry/syscalls +sysdef := $(sysprf)/syscall_n64.tbl +systbl := $(sysprf)/mksyscalltbl + +# Create output directory if not already present +_dummy := $(shell [ -d '$(out)' ] || mkdir -p '$(out)') + +$(header): $(sysdef) $(systbl) + $(Q)$(SHELL) '$(systbl)' $(sysdef) > $@ + +clean:: + $(call QUIET_CLEAN, mips) $(RM) $(header) + +archheaders: $(header) diff --git a/tools/perf/arch/mips/entry/syscalls/mksyscalltbl b/tools/perf/arch/mips/entry/syscalls/mksyscalltbl new file mode 100644 index 000000000000..fb1f49451af6 --- /dev/null +++ b/tools/perf/arch/mips/entry/syscalls/mksyscalltbl @@ -0,0 +1,32 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 +# +# Generate system call table for perf. Derived from +# s390 script. +# +# Author(s): Hendrik Brueckner +# Changed by: Tiezhu Yang + +SYSCALL_TBL=$1 + +if ! test -r $SYSCALL_TBL; then + echo "Could not read input file" >&2 + exit 1 +fi + +create_table() +{ + local max_nr nr abi sc discard + + echo 'static const char *syscalltbl_mips_n64[] = {' + while read nr abi sc discard; do + printf '\t[%d] = "%s",\n' $nr $sc + max_nr=$nr + done + echo '};' + echo "#define SYSCALLTBL_MIPS_N64_MAX_ID $max_nr" +} + +grep -E "^[[:digit:]]+[[:space:]]+(n64)" $SYSCALL_TBL \ + |sort -k1 -n \ + |create_table diff --git a/tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl b/tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl new file mode 100644 index 000000000000..91649690b52f --- /dev/null +++ b/tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl @@ -0,0 +1,358 @@ +# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note +# +# system call numbers and entry vectors for mips +# +# The format is: +# +# +# The is always "n64" for this file. +# +0 n64 read sys_read +1 n64 write sys_write +2 n64 open sys_open +3 n64 close sys_close +4 n64 stat sys_newstat +5 n64 fstat sys_newfstat +6 n64 lstat sys_newlstat +7 n64 poll sys_poll +8 n64 lseek sys_lseek +9 n64 mmap sys_mips_mmap +10 n64 mprotect sys_mprotect +11 n64 munmap sys_munmap +12 n64 brk sys_brk +13 n64 rt_sigaction sys_rt_sigaction +14 n64 rt_sigprocmask sys_rt_sigprocmask +15 n64 ioctl sys_ioctl +16 n64 pread64 sys_pread64 +17 n64 pwrite64 sys_pwrite64 +18 n64 readv sys_readv +19 n64 writev sys_writev +20 n64 access sys_access +21 n64 pipe sysm_pipe +22 n64 _newselect sys_select +23 n64 sched_yield sys_sched_yield +24 n64 mremap sys_mremap +25 n64 msync sys_msync +26 n64 mincore sys_mincore +27 n64 madvise sys_madvise +28 n64 shmget sys_shmget +29 n64 shmat sys_shmat +30 n64 shmctl sys_old_shmctl +31 n64 dup sys_dup +32 n64 dup2 sys_dup2 +33 n64 pause sys_pause +34 n64 nanosleep sys_nanosleep +35 n64 getitimer sys_getitimer +36 n64 setitimer sys_setitimer +37 n64 alarm sys_alarm +38 n64 getpid sys_getpid +39 n64 sendfile sys_sendfile64 +40 n64 socket sys_socket +41 n64 connect sys_connect +42 n64 accept sys_accept +43 n64 sendto sys_sendto +44 n64 recvfrom sys_recvfrom +45 n64 sendmsg sys_sendmsg +46 n64 recvmsg sys_recvmsg +47 n64 shutdown sys_shutdown +48 n64 bind sys_bind +49 n64 listen sys_listen +50 n64 getsockname sys_getsockname +51 n64 getpeername sys_getpeername +52 n64 socketpair sys_socketpair +53 n64 setsockopt sys_setsockopt +54 n64 getsockopt sys_getsockopt +55 n64 clone __sys_clone +56 n64 fork __sys_fork +57 n64 execve sys_execve +58 n64 exit sys_exit +59 n64 wait4 sys_wait4 +60 n64 kill sys_kill +61 n64 uname sys_newuname +62 n64 semget sys_semget +63 n64 semop sys_semop +64 n64 semctl sys_old_semctl +65 n64 shmdt sys_shmdt +66 n64 msgget sys_msgget +67 n64 msgsnd sys_msgsnd +68 n64 msgrcv sys_msgrcv +69 n64 msgctl sys_old_msgctl +70 n64 fcntl sys_fcntl +71 n64 flock sys_flock +72 n64 fsync sys_fsync +73 n64 fdatasync sys_fdatasync +74 n64 truncate sys_truncate +75 n64 ftruncate sys_ftruncate +76 n64 getdents sys_getdents +77 n64 getcwd sys_getcwd +78 n64 chdir sys_chdir +79 n64 fchdir sys_fchdir +80 n64 rename sys_rename +81 n64 mkdir sys_mkdir +82 n64 rmdir sys_rmdir +83 n64 creat sys_creat +84 n64 link sys_link +85 n64 unlink sys_unlink +86 n64 symlink sys_symlink +87 n64 readlink sys_readlink +88 n64 chmod sys_chmod +89 n64 fchmod sys_fchmod +90 n64 chown sys_chown +91 n64 fchown sys_fchown +92 n64 lchown sys_lchown +93 n64 umask sys_umask +94 n64 gettimeofday sys_gettimeofday +95 n64 getrlimit sys_getrlimit +96 n64 getrusage sys_getrusage +97 n64 sysinfo sys_sysinfo +98 n64 times sys_times +99 n64 ptrace sys_ptrace +100 n64 getuid sys_getuid +101 n64 syslog sys_syslog +102 n64 getgid sys_getgid +103 n64 setuid sys_setuid +104 n64 setgid sys_setgid +105 n64 geteuid sys_geteuid +106 n64 getegid sys_getegid +107 n64 setpgid sys_setpgid +108 n64 getppid sys_getppid +109 n64 getpgrp sys_getpgrp +110 n64 setsid sys_setsid +111 n64 setreuid sys_setreuid +112 n64 setregid sys_setregid +113 n64 getgroups sys_getgroups +114 n64 setgroups sys_setgroups +115 n64 setresuid sys_setresuid +116 n64 getresuid sys_getresuid +117 n64 setresgid sys_setresgid +118 n64 getresgid sys_getresgid +119 n64 getpgid sys_getpgid +120 n64 setfsuid sys_setfsuid +121 n64 setfsgid sys_setfsgid +122 n64 getsid sys_getsid +123 n64 capget sys_capget +124 n64 capset sys_capset +125 n64 rt_sigpending sys_rt_sigpending +126 n64 rt_sigtimedwait sys_rt_sigtimedwait +127 n64 rt_sigqueueinfo sys_rt_sigqueueinfo +128 n64 rt_sigsuspend sys_rt_sigsuspend +129 n64 sigaltstack sys_sigaltstack +130 n64 utime sys_utime +131 n64 mknod sys_mknod +132 n64 personality sys_personality +133 n64 ustat sys_ustat +134 n64 statfs sys_statfs +135 n64 fstatfs sys_fstatfs +136 n64 sysfs sys_sysfs +137 n64 getpriority sys_getpriority +138 n64 setpriority sys_setpriority +139 n64 sched_setparam sys_sched_setparam +140 n64 sched_getparam sys_sched_getparam +141 n64 sched_setscheduler sys_sched_setscheduler +142 n64 sched_getscheduler sys_sched_getscheduler +143 n64 sched_get_priority_max sys_sched_get_priority_max +144 n64 sched_get_priority_min sys_sched_get_priority_min +145 n64 sched_rr_get_interval sys_sched_rr_get_interval +146 n64 mlock sys_mlock +147 n64 munlock sys_munlock +148 n64 mlockall sys_mlockall +149 n64 munlockall sys_munlockall +150 n64 vhangup sys_vhangup +151 n64 pivot_root sys_pivot_root +152 n64 _sysctl sys_ni_syscall +153 n64 prctl sys_prctl +154 n64 adjtimex sys_adjtimex +155 n64 setrlimit sys_setrlimit +156 n64 chroot sys_chroot +157 n64 sync sys_sync +158 n64 acct sys_acct +159 n64 settimeofday sys_settimeofday +160 n64 mount sys_mount +161 n64 umount2 sys_umount +162 n64 swapon sys_swapon +163 n64 swapoff sys_swapoff +164 n64 reboot sys_reboot +165 n64 sethostname sys_sethostname +166 n64 setdomainname sys_setdomainname +167 n64 create_module sys_ni_syscall +168 n64 init_module sys_init_module +169 n64 delete_module sys_delete_module +170 n64 get_kernel_syms sys_ni_syscall +171 n64 query_module sys_ni_syscall +172 n64 quotactl sys_quotactl +173 n64 nfsservctl sys_ni_syscall +174 n64 getpmsg sys_ni_syscall +175 n64 putpmsg sys_ni_syscall +176 n64 afs_syscall sys_ni_syscall +# 177 reserved for security +177 n64 reserved177 sys_ni_syscall +178 n64 gettid sys_gettid +179 n64 readahead sys_readahead +180 n64 setxattr sys_setxattr +181 n64 lsetxattr sys_lsetxattr +182 n64 fsetxattr sys_fsetxattr +183 n64 getxattr sys_getxattr +184 n64 lgetxattr sys_lgetxattr +185 n64 fgetxattr sys_fgetxattr +186 n64 listxattr sys_listxattr +187 n64 llistxattr sys_llistxattr +188 n64 flistxattr sys_flistxattr +189 n64 removexattr sys_removexattr +190 n64 lremovexattr sys_lremovexattr +191 n64 fremovexattr sys_fremovexattr +192 n64 tkill sys_tkill +193 n64 reserved193 sys_ni_syscall +194 n64 futex sys_futex +195 n64 sched_setaffinity sys_sched_setaffinity +196 n64 sched_getaffinity sys_sched_getaffinity +197 n64 cacheflush sys_cacheflush +198 n64 cachectl sys_cachectl +199 n64 sysmips __sys_sysmips +200 n64 io_setup sys_io_setup +201 n64 io_destroy sys_io_destroy +202 n64 io_getevents sys_io_getevents +203 n64 io_submit sys_io_submit +204 n64 io_cancel sys_io_cancel +205 n64 exit_group sys_exit_group +206 n64 lookup_dcookie sys_lookup_dcookie +207 n64 epoll_create sys_epoll_create +208 n64 epoll_ctl sys_epoll_ctl +209 n64 epoll_wait sys_epoll_wait +210 n64 remap_file_pages sys_remap_file_pages +211 n64 rt_sigreturn sys_rt_sigreturn +212 n64 set_tid_address sys_set_tid_address +213 n64 restart_syscall sys_restart_syscall +214 n64 semtimedop sys_semtimedop +215 n64 fadvise64 sys_fadvise64_64 +216 n64 timer_create sys_timer_create +217 n64 timer_settime sys_timer_settime +218 n64 timer_gettime sys_timer_gettime +219 n64 timer_getoverrun sys_timer_getoverrun +220 n64 timer_delete sys_timer_delete +221 n64 clock_settime sys_clock_settime +222 n64 clock_gettime sys_clock_gettime +223 n64 clock_getres sys_clock_getres +224 n64 clock_nanosleep sys_clock_nanosleep +225 n64 tgkill sys_tgkill +226 n64 utimes sys_utimes +227 n64 mbind sys_mbind +228 n64 get_mempolicy sys_get_mempolicy +229 n64 set_mempolicy sys_set_mempolicy +230 n64 mq_open sys_mq_open +231 n64 mq_unlink sys_mq_unlink +232 n64 mq_timedsend sys_mq_timedsend +233 n64 mq_timedreceive sys_mq_timedreceive +234 n64 mq_notify sys_mq_notify +235 n64 mq_getsetattr sys_mq_getsetattr +236 n64 vserver sys_ni_syscall +237 n64 waitid sys_waitid +# 238 was sys_setaltroot +239 n64 add_key sys_add_key +240 n64 request_key sys_request_key +241 n64 keyctl sys_keyctl +242 n64 set_thread_area sys_set_thread_area +243 n64 inotify_init sys_inotify_init +244 n64 inotify_add_watch sys_inotify_add_watch +245 n64 inotify_rm_watch sys_inotify_rm_watch +246 n64 migrate_pages sys_migrate_pages +247 n64 openat sys_openat +248 n64 mkdirat sys_mkdirat +249 n64 mknodat sys_mknodat +250 n64 fchownat sys_fchownat +251 n64 futimesat sys_futimesat +252 n64 newfstatat sys_newfstatat +253 n64 unlinkat sys_unlinkat +254 n64 renameat sys_renameat +255 n64 linkat sys_linkat +256 n64 symlinkat sys_symlinkat +257 n64 readlinkat sys_readlinkat +258 n64 fchmodat sys_fchmodat +259 n64 faccessat sys_faccessat +260 n64 pselect6 sys_pselect6 +261 n64 ppoll sys_ppoll +262 n64 unshare sys_unshare +263 n64 splice sys_splice +264 n64 sync_file_range sys_sync_file_range +265 n64 tee sys_tee +266 n64 vmsplice sys_vmsplice +267 n64 move_pages sys_move_pages +268 n64 set_robust_list sys_set_robust_list +269 n64 get_robust_list sys_get_robust_list +270 n64 kexec_load sys_kexec_load +271 n64 getcpu sys_getcpu +272 n64 epoll_pwait sys_epoll_pwait +273 n64 ioprio_set sys_ioprio_set +274 n64 ioprio_get sys_ioprio_get +275 n64 utimensat sys_utimensat +276 n64 signalfd sys_signalfd +277 n64 timerfd sys_ni_syscall +278 n64 eventfd sys_eventfd +279 n64 fallocate sys_fallocate +280 n64 timerfd_create sys_timerfd_create +281 n64 timerfd_gettime sys_timerfd_gettime +282 n64 timerfd_settime sys_timerfd_settime +283 n64 signalfd4 sys_signalfd4 +284 n64 eventfd2 sys_eventfd2 +285 n64 epoll_create1 sys_epoll_create1 +286 n64 dup3 sys_dup3 +287 n64 pipe2 sys_pipe2 +288 n64 inotify_init1 sys_inotify_init1 +289 n64 preadv sys_preadv +290 n64 pwritev sys_pwritev +291 n64 rt_tgsigqueueinfo sys_rt_tgsigqueueinfo +292 n64 perf_event_open sys_perf_event_open +293 n64 accept4 sys_accept4 +294 n64 recvmmsg sys_recvmmsg +295 n64 fanotify_init sys_fanotify_init +296 n64 fanotify_mark sys_fanotify_mark +297 n64 prlimit64 sys_prlimit64 +298 n64 name_to_handle_at sys_name_to_handle_at +299 n64 open_by_handle_at sys_open_by_handle_at +300 n64 clock_adjtime sys_clock_adjtime +301 n64 syncfs sys_syncfs +302 n64 sendmmsg sys_sendmmsg +303 n64 setns sys_setns +304 n64 process_vm_readv sys_process_vm_readv +305 n64 process_vm_writev sys_process_vm_writev +306 n64 kcmp sys_kcmp +307 n64 finit_module sys_finit_module +308 n64 getdents64 sys_getdents64 +309 n64 sched_setattr sys_sched_setattr +310 n64 sched_getattr sys_sched_getattr +311 n64 renameat2 sys_renameat2 +312 n64 seccomp sys_seccomp +313 n64 getrandom sys_getrandom +314 n64 memfd_create sys_memfd_create +315 n64 bpf sys_bpf +316 n64 execveat sys_execveat +317 n64 userfaultfd sys_userfaultfd +318 n64 membarrier sys_membarrier +319 n64 mlock2 sys_mlock2 +320 n64 copy_file_range sys_copy_file_range +321 n64 preadv2 sys_preadv2 +322 n64 pwritev2 sys_pwritev2 +323 n64 pkey_mprotect sys_pkey_mprotect +324 n64 pkey_alloc sys_pkey_alloc +325 n64 pkey_free sys_pkey_free +326 n64 statx sys_statx +327 n64 rseq sys_rseq +328 n64 io_pgetevents sys_io_pgetevents +# 329 through 423 are reserved to sync up with other architectures +424 n64 pidfd_send_signal sys_pidfd_send_signal +425 n64 io_uring_setup sys_io_uring_setup +426 n64 io_uring_enter sys_io_uring_enter +427 n64 io_uring_register sys_io_uring_register +428 n64 open_tree sys_open_tree +429 n64 move_mount sys_move_mount +430 n64 fsopen sys_fsopen +431 n64 fsconfig sys_fsconfig +432 n64 fsmount sys_fsmount +433 n64 fspick sys_fspick +434 n64 pidfd_open sys_pidfd_open +435 n64 clone3 __sys_clone3 +436 n64 close_range sys_close_range +437 n64 openat2 sys_openat2 +438 n64 pidfd_getfd sys_pidfd_getfd +439 n64 faccessat2 sys_faccessat2 +440 n64 process_madvise sys_process_madvise +441 n64 epoll_pwait2 sys_epoll_pwait2 diff --git a/tools/perf/check-headers.sh b/tools/perf/check-headers.sh index dded93a2bc89..39eada935d4e 100755 --- a/tools/perf/check-headers.sh +++ b/tools/perf/check-headers.sh @@ -146,6 +146,7 @@ check arch/x86/lib/insn.c '-I "^#include [\"<]\(../include/\)*asm/in check_2 tools/perf/arch/x86/entry/syscalls/syscall_64.tbl arch/x86/entry/syscalls/syscall_64.tbl check_2 tools/perf/arch/powerpc/entry/syscalls/syscall.tbl arch/powerpc/kernel/syscalls/syscall.tbl check_2 tools/perf/arch/s390/entry/syscalls/syscall.tbl arch/s390/kernel/syscalls/syscall.tbl +check_2 tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl arch/mips/kernel/syscalls/syscall_n64.tbl for i in $BEAUTY_FILES; do beauty_check $i -B diff --git a/tools/perf/util/syscalltbl.c b/tools/perf/util/syscalltbl.c index 03bd99d3be16..a2e906858891 100644 --- a/tools/perf/util/syscalltbl.c +++ b/tools/perf/util/syscalltbl.c @@ -34,6 +34,10 @@ static const char **syscalltbl_native = syscalltbl_powerpc_32; #include const int syscalltbl_native_max_id = SYSCALLTBL_ARM64_MAX_ID; static const char **syscalltbl_native = syscalltbl_arm64; +#elif defined(__mips__) +#include +const int syscalltbl_native_max_id = SYSCALLTBL_MIPS_N64_MAX_ID; +static const char **syscalltbl_native = syscalltbl_mips_n64; #endif struct syscall { -- cgit v1.2.3 From 9bb8b74bdb186bd378cd5d510e8261538ca40094 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Thu, 19 Nov 2020 22:30:37 -0800 Subject: perf docs: Add man pages to see also Add all other man pages to the "see also" list except for perf-script-perl and perf-script-python that are linked to from perf-script. Signed-off-by: Ian Rogers Cc: Alexander Shishkin Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/20201120063037.3166069-1-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf.txt | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/tools/perf/Documentation/perf.txt b/tools/perf/Documentation/perf.txt index c130a3c46a90..9c330cdfa973 100644 --- a/tools/perf/Documentation/perf.txt +++ b/tools/perf/Documentation/perf.txt @@ -76,3 +76,15 @@ SEE ALSO linkperf:perf-stat[1], linkperf:perf-top[1], linkperf:perf-record[1], linkperf:perf-report[1], linkperf:perf-list[1] + +linkperf:perf-annotate[1],linkperf:perf-archive[1], +linkperf:perf-bench[1], linkperf:perf-buildid-cache[1], +linkperf:perf-buildid-list[1], linkperf:perf-c2c[1], +linkperf:perf-config[1], linkperf:perf-data[1], linkperf:perf-diff[1], +linkperf:perf-evlist[1], linkperf:perf-ftrace[1], +linkperf:perf-help[1], linkperf:perf-inject[1], +linkperf:perf-intel-pt[1], linkperf:perf-kallsyms[1], +linkperf:perf-kmem[1], linkperf:perf-kvm[1], linkperf:perf-lock[1], +linkperf:perf-mem[1], linkperf:perf-probe[1], linkperf:perf-sched[1], +linkperf:perf-script[1], linkperf:perf-test[1], +linkperf:perf-trace[1], linkperf:perf-version[1] -- cgit v1.2.3 From 34968b9327c83589e2867af3c5b9fd993666a514 Mon Sep 17 00:00:00 2001 From: Nicholas Fraser Date: Wed, 24 Feb 2021 14:59:16 -0500 Subject: perf buildid-cache: Add test for PE executable This builds on the previous changes to tests/shell/buildid.sh, adding tests for a PE file. It adds it to the build-id cache manually and, if Wine is available, runs it under "perf record" and verifies that it was added automatically. If wine is not installed, only warnings are printed; the test can still exit 0. Signed-off-by: Nicholas Fraser Reviewed-by: Jiri Olsa Cc: Alexander Shishkin Cc: Huw Davies Cc: Ian Rogers Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Ulrich Czekalla Link: http://lore.kernel.org/lkml/790bfe67-2155-a426-7130-ae7c45cb055b@codeweavers.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/shell/buildid.sh | 65 ++++++++++++++++++++++++++++++++++----- 1 file changed, 58 insertions(+), 7 deletions(-) diff --git a/tools/perf/tests/shell/buildid.sh b/tools/perf/tests/shell/buildid.sh index 416af614bbe0..f05670d1e39e 100755 --- a/tools/perf/tests/shell/buildid.sh +++ b/tools/perf/tests/shell/buildid.sh @@ -14,18 +14,56 @@ if ! [ -x "$(command -v cc)" ]; then exit 2 fi +# check what we need to test windows binaries +add_pe=1 +run_pe=1 +if ! perf version --build-options | grep -q 'libbfd: .* on '; then + echo "WARNING: perf not built with libbfd. PE binaries will not be tested." + add_pe=0 + run_pe=0 +fi +if ! which wine > /dev/null; then + echo "WARNING: wine not found. PE binaries will not be run." + run_pe=0 +fi + +# set up wine +if [ ${run_pe} -eq 1 ]; then + wineprefix=$(mktemp -d /tmp/perf.wineprefix.XXX) + export WINEPREFIX=${wineprefix} + # clear display variables to prevent wine from popping up dialogs + unset DISPLAY + unset WAYLAND_DISPLAY +fi + ex_md5=$(mktemp /tmp/perf.ex.MD5.XXX) ex_sha1=$(mktemp /tmp/perf.ex.SHA1.XXX) +ex_pe=$(dirname $0)/../pe-file.exe echo 'int main(void) { return 0; }' | cc -Wl,--build-id=sha1 -o ${ex_sha1} -x c - echo 'int main(void) { return 0; }' | cc -Wl,--build-id=md5 -o ${ex_md5} -x c - -echo "test binaries: ${ex_sha1} ${ex_md5}" +echo "test binaries: ${ex_sha1} ${ex_md5} ${ex_pe}" check() { - id=`readelf -n ${1} 2>/dev/null | grep 'Build ID' | awk '{print $3}'` - + case $1 in + *.exe) + # We don't have a tool that can pull a nicely formatted build-id out of + # a PE file, but we can extract the whole section with objcopy and + # format it ourselves. The .buildid section is a Debug Directory + # containing a CodeView entry: + # https://docs.microsoft.com/en-us/windows/win32/debug/pe-format#debug-directory-image-only + # https://github.com/dotnet/runtime/blob/da94c022576a5c3bbc0e896f006565905eb137f9/docs/design/specs/PE-COFF.md + # The build-id starts at byte 33 and must be rearranged into a GUID. + id=`objcopy -O binary --only-section=.buildid $1 /dev/stdout | \ + cut -c 33-48 | hexdump -ve '/1 "%02x"' | \ + sed 's@^\(..\)\(..\)\(..\)\(..\)\(..\)\(..\)\(..\)\(..\)\(.*\)0a$@\4\3\2\1\6\5\8\7\9@'` + ;; + *) + id=`readelf -n ${1} 2>/dev/null | grep 'Build ID' | awk '{print $3}'` + ;; + esac echo "build id: ${id}" link=${build_id_dir}/.build-id/${id:0:2}/${id:2} @@ -50,7 +88,7 @@ check() exit 1 fi - ${perf} buildid-cache -l | grep $id + ${perf} buildid-cache -l | grep ${id} if [ $? -ne 0 ]; then echo "failed: ${id} is not reported by \"perf buildid-cache -l\"" exit 1 @@ -79,16 +117,20 @@ test_record() { data=$(mktemp /tmp/perf.data.XXX) build_id_dir=$(mktemp -d /tmp/perf.debug.XXX) + log=$(mktemp /tmp/perf.log.XXX) perf="perf --buildid-dir ${build_id_dir}" - ${perf} record --buildid-all -o ${data} ${1} + echo "running: perf record $@" + ${perf} record --buildid-all -o ${data} $@ &> ${log} if [ $? -ne 0 ]; then - echo "failed: record ${1}" + echo "failed: record $@" + echo "see log: ${log}" exit 1 fi - check ${1} + check ${@: -1} + rm -f ${log} rm -rf ${build_id_dir} rm -rf ${data} } @@ -96,12 +138,21 @@ test_record() # add binaries manual via perf buildid-cache -a test_add ${ex_sha1} test_add ${ex_md5} +if [ ${add_pe} -eq 1 ]; then + test_add ${ex_pe} +fi # add binaries via perf record post processing test_record ${ex_sha1} test_record ${ex_md5} +if [ ${run_pe} -eq 1 ]; then + test_record wine ${ex_pe} +fi # cleanup rm ${ex_sha1} ${ex_md5} +if [ ${run_pe} -eq 1 ]; then + rm -r ${wineprefix} +fi exit ${err} -- cgit v1.2.3 From 83bf6fb8b076c72fe42e7d0fab5a5c98b5e2a11a Mon Sep 17 00:00:00 2001 From: Paul A. Clarke Date: Wed, 24 Feb 2021 12:14:36 -0600 Subject: perf vendor events power9: Remove unsupported metrics Several metrics are defined based on unsupported / non-existent events, and silently discarded. Remove them for good code hygiene and to avoid confusion. Signed-off-by: Paul A. Clarke Reviewed-by: Kajol Jain Cc: Ananth N Mavinakayanahalli Cc: Madhavan Srinivasan Cc: Michael Ellerman Cc: Naveen N. Rao Cc: Sukadev Bhattiprolu Link: https://lore.kernel.org/r/20210224181436.782091-1-pc@us.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- .../pmu-events/arch/powerpc/power9/metrics.json | 132 --------------------- 1 file changed, 132 deletions(-) diff --git a/tools/perf/pmu-events/arch/powerpc/power9/metrics.json b/tools/perf/pmu-events/arch/powerpc/power9/metrics.json index f8784c608479..140402d2855f 100644 --- a/tools/perf/pmu-events/arch/powerpc/power9/metrics.json +++ b/tools/perf/pmu-events/arch/powerpc/power9/metrics.json @@ -1209,156 +1209,24 @@ "MetricGroup": "instruction_stats_percent_per_ref", "MetricName": "inst_from_rmem_percent" }, - { - "BriefDescription": "%L2 Modified CO Cache read Utilization (4 pclks per disp attempt)", - "MetricExpr": "((PM_L2_CASTOUT_MOD/2)*4)/ PM_RUN_CYC * 100", - "MetricGroup": "l2_stats", - "MetricName": "l2_co_m_rd_util" - }, - { - "BriefDescription": "L2 dcache invalidates per run inst (per core)", - "MetricExpr": "(PM_L2_DC_INV / 2) / PM_RUN_INST_CMPL * 100", - "MetricGroup": "l2_stats", - "MetricName": "l2_dc_inv_rate_percent" - }, { "BriefDescription": "Demand load misses as a % of L2 LD dispatches (per thread)", "MetricExpr": "PM_L1_DCACHE_RELOAD_VALID / (PM_L2_LD / 2) * 100", "MetricGroup": "l2_stats", "MetricName": "l2_dem_ld_disp_percent" }, - { - "BriefDescription": "L2 Icache invalidates per run inst (per core)", - "MetricExpr": "(PM_L2_IC_INV / 2) / PM_RUN_INST_CMPL * 100", - "MetricGroup": "l2_stats", - "MetricName": "l2_ic_inv_rate_percent" - }, - { - "BriefDescription": "L2 Inst misses as a % of total L2 Inst dispatches (per thread)", - "MetricExpr": "PM_L2_INST_MISS / PM_L2_INST * 100", - "MetricGroup": "l2_stats", - "MetricName": "l2_inst_miss_ratio_percent" - }, - { - "BriefDescription": "Average number of cycles between L2 Load hits", - "MetricExpr": "(PM_L2_LD_HIT / PM_RUN_CYC) / 2", - "MetricGroup": "l2_stats", - "MetricName": "l2_ld_hit_frequency" - }, - { - "BriefDescription": "Average number of cycles between L2 Load misses", - "MetricExpr": "(PM_L2_LD_MISS / PM_RUN_CYC) / 2", - "MetricGroup": "l2_stats", - "MetricName": "l2_ld_miss_frequency" - }, - { - "BriefDescription": "L2 Load misses as a % of total L2 Load dispatches (per thread)", - "MetricExpr": "PM_L2_LD_MISS / PM_L2_LD * 100", - "MetricGroup": "l2_stats", - "MetricName": "l2_ld_miss_ratio_percent" - }, - { - "BriefDescription": "% L2 load disp attempts Cache read Utilization (4 pclks per disp attempt)", - "MetricExpr": "((PM_L2_RCLD_DISP/2)*4)/ PM_RUN_CYC * 100", - "MetricGroup": "l2_stats", - "MetricName": "l2_ld_rd_util" - }, - { - "BriefDescription": "L2 load misses that require a cache write (4 pclks per disp attempt) % of pclks", - "MetricExpr": "((( PM_L2_LD_DISP - PM_L2_LD_HIT)/2)*4)/ PM_RUN_CYC * 100", - "MetricGroup": "l2_stats", - "MetricName": "l2_ldmiss_wr_util" - }, - { - "BriefDescription": "L2 local pump prediction success", - "MetricExpr": "PM_L2_LOC_GUESS_CORRECT / (PM_L2_LOC_GUESS_CORRECT + PM_L2_LOC_GUESS_WRONG) * 100", - "MetricGroup": "l2_stats", - "MetricName": "l2_local_pred_correct_percent" - }, - { - "BriefDescription": "L2 COs that were in M,Me,Mu state as a % of all L2 COs", - "MetricExpr": "PM_L2_CASTOUT_MOD / (PM_L2_CASTOUT_MOD + PM_L2_CASTOUT_SHR) * 100", - "MetricGroup": "l2_stats", - "MetricName": "l2_mod_co_percent" - }, - { - "BriefDescription": "% of L2 Load RC dispatch atampts that failed because of address collisions and cclass conflicts", - "MetricExpr": "(PM_L2_RCLD_DISP_FAIL_ADDR )/ PM_L2_RCLD_DISP * 100", - "MetricGroup": "l2_stats", - "MetricName": "l2_rc_ld_disp_addr_fail_percent" - }, - { - "BriefDescription": "% of L2 Load RC dispatch attempts that failed", - "MetricExpr": "(PM_L2_RCLD_DISP_FAIL_ADDR + PM_L2_RCLD_DISP_FAIL_OTHER)/ PM_L2_RCLD_DISP * 100", - "MetricGroup": "l2_stats", - "MetricName": "l2_rc_ld_disp_fail_percent" - }, - { - "BriefDescription": "% of L2 Store RC dispatch atampts that failed because of address collisions and cclass conflicts", - "MetricExpr": "PM_L2_RCST_DISP_FAIL_ADDR / PM_L2_RCST_DISP * 100", - "MetricGroup": "l2_stats", - "MetricName": "l2_rc_st_disp_addr_fail_percent" - }, - { - "BriefDescription": "% of L2 Store RC dispatch attempts that failed", - "MetricExpr": "(PM_L2_RCST_DISP_FAIL_ADDR + PM_L2_RCST_DISP_FAIL_OTHER)/ PM_L2_RCST_DISP * 100", - "MetricGroup": "l2_stats", - "MetricName": "l2_rc_st_disp_fail_percent" - }, - { - "BriefDescription": "L2 Cache Read Utilization (per core)", - "MetricExpr": "(((PM_L2_RCLD_DISP/2)*4)/ PM_RUN_CYC * 100) + (((PM_L2_RCST_DISP/2)*4)/PM_RUN_CYC * 100) + (((PM_L2_CASTOUT_MOD/2)*4)/PM_RUN_CYC * 100)", - "MetricGroup": "l2_stats", - "MetricName": "l2_rd_util_percent" - }, - { - "BriefDescription": "L2 COs that were in T,Te,Si,S state as a % of all L2 COs", - "MetricExpr": "PM_L2_CASTOUT_SHR / (PM_L2_CASTOUT_MOD + PM_L2_CASTOUT_SHR) * 100", - "MetricGroup": "l2_stats", - "MetricName": "l2_shr_co_percent" - }, { "BriefDescription": "L2 Store misses as a % of total L2 Store dispatches (per thread)", "MetricExpr": "PM_L2_ST_MISS / PM_L2_ST * 100", "MetricGroup": "l2_stats", "MetricName": "l2_st_miss_ratio_percent" }, - { - "BriefDescription": "% L2 store disp attempts Cache read Utilization (4 pclks per disp attempt)", - "MetricExpr": "((PM_L2_RCST_DISP/2)*4) / PM_RUN_CYC * 100", - "MetricGroup": "l2_stats", - "MetricName": "l2_st_rd_util" - }, { "BriefDescription": "L2 stores that require a cache write (4 pclks per disp attempt) % of pclks", "MetricExpr": "((PM_L2_ST_DISP/2)*4) / PM_RUN_CYC * 100", "MetricGroup": "l2_stats", "MetricName": "l2_st_wr_util" }, - { - "BriefDescription": "L2 Cache Write Utilization (per core)", - "MetricExpr": "((((PM_L2_LD_DISP - PM_L2_LD_HIT)/2)*4) / PM_RUN_CYC * 100) + (((PM_L2_ST_DISP/2)*4) / PM_RUN_CYC * 100)", - "MetricGroup": "l2_stats", - "MetricName": "l2_wr_util_percent" - }, - { - "BriefDescription": "Average number of cycles between L3 Load hits", - "MetricExpr": "(PM_L3_LD_HIT / PM_RUN_CYC) / 2", - "MetricGroup": "l3_stats", - "MetricName": "l3_ld_hit_frequency" - }, - { - "BriefDescription": "Average number of cycles between L3 Load misses", - "MetricExpr": "(PM_L3_LD_MISS / PM_RUN_CYC) / 2", - "MetricGroup": "l3_stats", - "MetricName": "l3_ld_miss_frequency" - }, - { - "BriefDescription": "Average number of Write-in machines used. 1 of 8 WI machines is sampled every L3 cycle", - "MetricExpr": "(PM_L3_WI_USAGE / PM_RUN_CYC) * 8", - "MetricGroup": "l3_stats", - "MetricName": "l3_wi_usage" - }, { "BriefDescription": "Average icache miss latency", "MetricExpr": "PM_IC_DEMAND_CYC / PM_IC_DEMAND_REQ", -- cgit v1.2.3 From 42b2b570b34afb5fb9dc16ac77cb332194136a85 Mon Sep 17 00:00:00 2001 From: Mike Leach Date: Wed, 24 Feb 2021 09:48:30 -0700 Subject: perf cs-etm: Update ETM metadata format The current fixed metadata version format (version 0), means that adding metadata parameter items renders files from a previous version of perf unreadable. Per CPU parameters appear in a fixed order, but there is no field to indicate the number of ETM parameters per CPU. This patch updates the per CPU parameter blocks to include a NR_PARAMs value which indicates the number of parameters in the block. The header version is incremented to 1. Fixed ordering is retained, new ETM parameters are added to the end of the list. The reader code is updated to be able to read current version 0 files, For version 1, the reader will read the number of parameters in the per CPU block. This allows the reader to process older or newer files that may have different numbers of parameters than in use at the time perf was built. Signed-off-by: Mike Leach Reviewed-by: Leo Yan Tested-by: Leo Yan Link: https://lore.kernel.org/r/20210202214040.32349-1-mike.leach@linaro.org Link: https://lore.kernel.org/r/20210224164835.3497311-2-mathieu.poirier@linaro.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/arm/util/cs-etm.c | 7 +- tools/perf/util/cs-etm.c | 235 +++++++++++++++++++++++++++++++------- tools/perf/util/cs-etm.h | 30 ++++- 3 files changed, 223 insertions(+), 49 deletions(-) diff --git a/tools/perf/arch/arm/util/cs-etm.c b/tools/perf/arch/arm/util/cs-etm.c index bd446aba64f7..b0470f2a955a 100644 --- a/tools/perf/arch/arm/util/cs-etm.c +++ b/tools/perf/arch/arm/util/cs-etm.c @@ -572,7 +572,7 @@ static void cs_etm_get_metadata(int cpu, u32 *offset, struct auxtrace_record *itr, struct perf_record_auxtrace_info *info) { - u32 increment; + u32 increment, nr_trc_params; u64 magic; struct cs_etm_recording *ptr = container_of(itr, struct cs_etm_recording, itr); @@ -607,6 +607,7 @@ static void cs_etm_get_metadata(int cpu, u32 *offset, /* How much space was used */ increment = CS_ETMV4_PRIV_MAX; + nr_trc_params = CS_ETMV4_PRIV_MAX - CS_ETMV4_TRCCONFIGR; } else { magic = __perf_cs_etmv3_magic; /* Get configuration register */ @@ -624,11 +625,13 @@ static void cs_etm_get_metadata(int cpu, u32 *offset, /* How much space was used */ increment = CS_ETM_PRIV_MAX; + nr_trc_params = CS_ETM_PRIV_MAX - CS_ETM_ETMCR; } /* Build generic header portion */ info->priv[*offset + CS_ETM_MAGIC] = magic; info->priv[*offset + CS_ETM_CPU] = cpu; + info->priv[*offset + CS_ETM_NR_TRC_PARAMS] = nr_trc_params; /* Where the next CPU entry should start from */ *offset += increment; } @@ -674,7 +677,7 @@ static int cs_etm_info_fill(struct auxtrace_record *itr, /* First fill out the session header */ info->type = PERF_AUXTRACE_CS_ETM; - info->priv[CS_HEADER_VERSION_0] = 0; + info->priv[CS_HEADER_VERSION] = CS_HEADER_CURRENT_VERSION; info->priv[CS_PMU_TYPE_CPUS] = type << 32; info->priv[CS_PMU_TYPE_CPUS] |= nr_cpu; info->priv[CS_ETM_SNAPSHOT] = ptr->snapshot_mode; diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c index a2a369e2fbb6..ee32d023e9bd 100644 --- a/tools/perf/util/cs-etm.c +++ b/tools/perf/util/cs-etm.c @@ -2435,7 +2435,7 @@ static bool cs_etm__is_timeless_decoding(struct cs_etm_auxtrace *etm) } static const char * const cs_etm_global_header_fmts[] = { - [CS_HEADER_VERSION_0] = " Header version %llx\n", + [CS_HEADER_VERSION] = " Header version %llx\n", [CS_PMU_TYPE_CPUS] = " PMU type/num cpus %llx\n", [CS_ETM_SNAPSHOT] = " Snapshot %llx\n", }; @@ -2443,6 +2443,7 @@ static const char * const cs_etm_global_header_fmts[] = { static const char * const cs_etm_priv_fmts[] = { [CS_ETM_MAGIC] = " Magic number %llx\n", [CS_ETM_CPU] = " CPU %lld\n", + [CS_ETM_NR_TRC_PARAMS] = " NR_TRC_PARAMS %llx\n", [CS_ETM_ETMCR] = " ETMCR %llx\n", [CS_ETM_ETMTRACEIDR] = " ETMTRACEIDR %llx\n", [CS_ETM_ETMCCER] = " ETMCCER %llx\n", @@ -2452,6 +2453,7 @@ static const char * const cs_etm_priv_fmts[] = { static const char * const cs_etmv4_priv_fmts[] = { [CS_ETM_MAGIC] = " Magic number %llx\n", [CS_ETM_CPU] = " CPU %lld\n", + [CS_ETM_NR_TRC_PARAMS] = " NR_TRC_PARAMS %llx\n", [CS_ETMV4_TRCCONFIGR] = " TRCCONFIGR %llx\n", [CS_ETMV4_TRCTRACEIDR] = " TRCTRACEIDR %llx\n", [CS_ETMV4_TRCIDR0] = " TRCIDR0 %llx\n", @@ -2461,26 +2463,167 @@ static const char * const cs_etmv4_priv_fmts[] = { [CS_ETMV4_TRCAUTHSTATUS] = " TRCAUTHSTATUS %llx\n", }; -static void cs_etm__print_auxtrace_info(__u64 *val, int num) +static const char * const param_unk_fmt = + " Unknown parameter [%d] %llx\n"; +static const char * const magic_unk_fmt = + " Magic number Unknown %llx\n"; + +static int cs_etm__print_cpu_metadata_v0(__u64 *val, int *offset) { - int i, j, cpu = 0; + int i = *offset, j, nr_params = 0, fmt_offset; + __u64 magic; - for (i = 0; i < CS_HEADER_VERSION_0_MAX; i++) - fprintf(stdout, cs_etm_global_header_fmts[i], val[i]); + /* check magic value */ + magic = val[i + CS_ETM_MAGIC]; + if ((magic != __perf_cs_etmv3_magic) && + (magic != __perf_cs_etmv4_magic)) { + /* failure - note bad magic value */ + fprintf(stdout, magic_unk_fmt, magic); + return -EINVAL; + } + + /* print common header block */ + fprintf(stdout, cs_etm_priv_fmts[CS_ETM_MAGIC], val[i++]); + fprintf(stdout, cs_etm_priv_fmts[CS_ETM_CPU], val[i++]); + + if (magic == __perf_cs_etmv3_magic) { + nr_params = CS_ETM_NR_TRC_PARAMS_V0; + fmt_offset = CS_ETM_ETMCR; + /* after common block, offset format index past NR_PARAMS */ + for (j = fmt_offset; j < nr_params + fmt_offset; j++, i++) + fprintf(stdout, cs_etm_priv_fmts[j], val[i]); + } else if (magic == __perf_cs_etmv4_magic) { + nr_params = CS_ETMV4_NR_TRC_PARAMS_V0; + fmt_offset = CS_ETMV4_TRCCONFIGR; + /* after common block, offset format index past NR_PARAMS */ + for (j = fmt_offset; j < nr_params + fmt_offset; j++, i++) + fprintf(stdout, cs_etmv4_priv_fmts[j], val[i]); + } + *offset = i; + return 0; +} + +static int cs_etm__print_cpu_metadata_v1(__u64 *val, int *offset) +{ + int i = *offset, j, total_params = 0; + __u64 magic; + + magic = val[i + CS_ETM_MAGIC]; + /* total params to print is NR_PARAMS + common block size for v1 */ + total_params = val[i + CS_ETM_NR_TRC_PARAMS] + CS_ETM_COMMON_BLK_MAX_V1; - for (i = CS_HEADER_VERSION_0_MAX; cpu < num; cpu++) { - if (val[i] == __perf_cs_etmv3_magic) - for (j = 0; j < CS_ETM_PRIV_MAX; j++, i++) + if (magic == __perf_cs_etmv3_magic) { + for (j = 0; j < total_params; j++, i++) { + /* if newer record - could be excess params */ + if (j >= CS_ETM_PRIV_MAX) + fprintf(stdout, param_unk_fmt, j, val[i]); + else fprintf(stdout, cs_etm_priv_fmts[j], val[i]); - else if (val[i] == __perf_cs_etmv4_magic) - for (j = 0; j < CS_ETMV4_PRIV_MAX; j++, i++) + } + } else if (magic == __perf_cs_etmv4_magic) { + for (j = 0; j < total_params; j++, i++) { + /* if newer record - could be excess params */ + if (j >= CS_ETMV4_PRIV_MAX) + fprintf(stdout, param_unk_fmt, j, val[i]); + else fprintf(stdout, cs_etmv4_priv_fmts[j], val[i]); - else - /* failure.. return */ + } + } else { + /* failure - note bad magic value and error out */ + fprintf(stdout, magic_unk_fmt, magic); + return -EINVAL; + } + *offset = i; + return 0; +} + +static void cs_etm__print_auxtrace_info(__u64 *val, int num) +{ + int i, cpu = 0, version, err; + + /* bail out early on bad header version */ + version = val[0]; + if (version > CS_HEADER_CURRENT_VERSION) { + /* failure.. return */ + fprintf(stdout, " Unknown Header Version = %x, ", version); + fprintf(stdout, "Version supported <= %x\n", CS_HEADER_CURRENT_VERSION); + return; + } + + for (i = 0; i < CS_HEADER_VERSION_MAX; i++) + fprintf(stdout, cs_etm_global_header_fmts[i], val[i]); + + for (i = CS_HEADER_VERSION_MAX; cpu < num; cpu++) { + if (version == 0) + err = cs_etm__print_cpu_metadata_v0(val, &i); + else if (version == 1) + err = cs_etm__print_cpu_metadata_v1(val, &i); + if (err) return; } } +/* + * Read a single cpu parameter block from the auxtrace_info priv block. + * + * For version 1 there is a per cpu nr_params entry. If we are handling + * version 1 file, then there may be less, the same, or more params + * indicated by this value than the compile time number we understand. + * + * For a version 0 info block, there are a fixed number, and we need to + * fill out the nr_param value in the metadata we create. + */ +static u64 *cs_etm__create_meta_blk(u64 *buff_in, int *buff_in_offset, + int out_blk_size, int nr_params_v0) +{ + u64 *metadata = NULL; + int hdr_version; + int nr_in_params, nr_out_params, nr_cmn_params; + int i, k; + + metadata = zalloc(sizeof(*metadata) * out_blk_size); + if (!metadata) + return NULL; + + /* read block current index & version */ + i = *buff_in_offset; + hdr_version = buff_in[CS_HEADER_VERSION]; + + if (!hdr_version) { + /* read version 0 info block into a version 1 metadata block */ + nr_in_params = nr_params_v0; + metadata[CS_ETM_MAGIC] = buff_in[i + CS_ETM_MAGIC]; + metadata[CS_ETM_CPU] = buff_in[i + CS_ETM_CPU]; + metadata[CS_ETM_NR_TRC_PARAMS] = nr_in_params; + /* remaining block params at offset +1 from source */ + for (k = CS_ETM_COMMON_BLK_MAX_V1 - 1; k < nr_in_params; k++) + metadata[k + 1] = buff_in[i + k]; + /* version 0 has 2 common params */ + nr_cmn_params = 2; + } else { + /* read version 1 info block - input and output nr_params may differ */ + /* version 1 has 3 common params */ + nr_cmn_params = 3; + nr_in_params = buff_in[i + CS_ETM_NR_TRC_PARAMS]; + + /* if input has more params than output - skip excess */ + nr_out_params = nr_in_params + nr_cmn_params; + if (nr_out_params > out_blk_size) + nr_out_params = out_blk_size; + + for (k = CS_ETM_MAGIC; k < nr_out_params; k++) + metadata[k] = buff_in[i + k]; + + /* record the actual nr params we copied */ + metadata[CS_ETM_NR_TRC_PARAMS] = nr_out_params - nr_cmn_params; + } + + /* adjust in offset by number of in params used */ + i += nr_in_params + nr_cmn_params; + *buff_in_offset = i; + return metadata; +} + int cs_etm__process_auxtrace_info(union perf_event *event, struct perf_session *session) { @@ -2492,11 +2635,12 @@ int cs_etm__process_auxtrace_info(union perf_event *event, int info_header_size; int total_size = auxtrace_info->header.size; int priv_size = 0; - int num_cpu; - int err = 0, idx = -1; - int i, j, k; + int num_cpu, trcidr_idx; + int err = 0; + int i, j; u64 *ptr, *hdr = NULL; u64 **metadata = NULL; + u64 hdr_version; /* * sizeof(auxtrace_info_event::type) + @@ -2512,16 +2656,21 @@ int cs_etm__process_auxtrace_info(union perf_event *event, /* First the global part */ ptr = (u64 *) auxtrace_info->priv; - /* Look for version '0' of the header */ - if (ptr[0] != 0) + /* Look for version of the header */ + hdr_version = ptr[0]; + if (hdr_version > CS_HEADER_CURRENT_VERSION) { + /* print routine will print an error on bad version */ + if (dump_trace) + cs_etm__print_auxtrace_info(auxtrace_info->priv, 0); return -EINVAL; + } - hdr = zalloc(sizeof(*hdr) * CS_HEADER_VERSION_0_MAX); + hdr = zalloc(sizeof(*hdr) * CS_HEADER_VERSION_MAX); if (!hdr) return -ENOMEM; /* Extract header information - see cs-etm.h for format */ - for (i = 0; i < CS_HEADER_VERSION_0_MAX; i++) + for (i = 0; i < CS_HEADER_VERSION_MAX; i++) hdr[i] = ptr[i]; num_cpu = hdr[CS_PMU_TYPE_CPUS] & 0xffffffff; pmu_type = (unsigned int) ((hdr[CS_PMU_TYPE_CPUS] >> 32) & @@ -2552,35 +2701,31 @@ int cs_etm__process_auxtrace_info(union perf_event *event, */ for (j = 0; j < num_cpu; j++) { if (ptr[i] == __perf_cs_etmv3_magic) { - metadata[j] = zalloc(sizeof(*metadata[j]) * - CS_ETM_PRIV_MAX); - if (!metadata[j]) { - err = -ENOMEM; - goto err_free_metadata; - } - for (k = 0; k < CS_ETM_PRIV_MAX; k++) - metadata[j][k] = ptr[i + k]; + metadata[j] = + cs_etm__create_meta_blk(ptr, &i, + CS_ETM_PRIV_MAX, + CS_ETM_NR_TRC_PARAMS_V0); /* The traceID is our handle */ - idx = metadata[j][CS_ETM_ETMTRACEIDR]; - i += CS_ETM_PRIV_MAX; + trcidr_idx = CS_ETM_ETMTRACEIDR; + } else if (ptr[i] == __perf_cs_etmv4_magic) { - metadata[j] = zalloc(sizeof(*metadata[j]) * - CS_ETMV4_PRIV_MAX); - if (!metadata[j]) { - err = -ENOMEM; - goto err_free_metadata; - } - for (k = 0; k < CS_ETMV4_PRIV_MAX; k++) - metadata[j][k] = ptr[i + k]; + metadata[j] = + cs_etm__create_meta_blk(ptr, &i, + CS_ETMV4_PRIV_MAX, + CS_ETMV4_NR_TRC_PARAMS_V0); /* The traceID is our handle */ - idx = metadata[j][CS_ETMV4_TRCTRACEIDR]; - i += CS_ETMV4_PRIV_MAX; + trcidr_idx = CS_ETMV4_TRCTRACEIDR; + } + + if (!metadata[j]) { + err = -ENOMEM; + goto err_free_metadata; } /* Get an RB node for this CPU */ - inode = intlist__findnew(traceid_list, idx); + inode = intlist__findnew(traceid_list, metadata[j][trcidr_idx]); /* Something went wrong, no need to continue */ if (!inode) { @@ -2601,7 +2746,7 @@ int cs_etm__process_auxtrace_info(union perf_event *event, } /* - * Each of CS_HEADER_VERSION_0_MAX, CS_ETM_PRIV_MAX and + * Each of CS_HEADER_VERSION_MAX, CS_ETM_PRIV_MAX and * CS_ETMV4_PRIV_MAX mark how many double words are in the * global metadata, and each cpu's metadata respectively. * The following tests if the correct number of double words was @@ -2703,6 +2848,12 @@ err_free_traceid_list: intlist__delete(traceid_list); err_free_hdr: zfree(&hdr); - + /* + * At this point, as a minimum we have valid header. Dump the rest of + * the info section - the print routines will error out on structural + * issues. + */ + if (dump_trace) + cs_etm__print_auxtrace_info(auxtrace_info->priv, num_cpu); return err; } diff --git a/tools/perf/util/cs-etm.h b/tools/perf/util/cs-etm.h index 4ad925d6d799..e153d02df0de 100644 --- a/tools/perf/util/cs-etm.h +++ b/tools/perf/util/cs-etm.h @@ -17,23 +17,37 @@ struct perf_session; */ enum { /* Starting with 0x0 */ - CS_HEADER_VERSION_0, + CS_HEADER_VERSION, /* PMU->type (32 bit), total # of CPUs (32 bit) */ CS_PMU_TYPE_CPUS, CS_ETM_SNAPSHOT, - CS_HEADER_VERSION_0_MAX, + CS_HEADER_VERSION_MAX, }; +/* + * Update the version for new format. + * + * New version 1 format adds a param count to the per cpu metadata. + * This allows easy adding of new metadata parameters. + * Requires that new params always added after current ones. + * Also allows client reader to handle file versions that are different by + * checking the number of params in the file vs the number expected. + */ +#define CS_HEADER_CURRENT_VERSION 1 + /* Beginning of header common to both ETMv3 and V4 */ enum { CS_ETM_MAGIC, CS_ETM_CPU, + /* Number of trace config params in following ETM specific block */ + CS_ETM_NR_TRC_PARAMS, + CS_ETM_COMMON_BLK_MAX_V1, }; /* ETMv3/PTM metadata */ enum { /* Dynamic, configurable parameters */ - CS_ETM_ETMCR = CS_ETM_CPU + 1, + CS_ETM_ETMCR = CS_ETM_COMMON_BLK_MAX_V1, CS_ETM_ETMTRACEIDR, /* RO, taken from sysFS */ CS_ETM_ETMCCER, @@ -41,10 +55,13 @@ enum { CS_ETM_PRIV_MAX, }; +/* define fixed version 0 length - allow new format reader to read old files. */ +#define CS_ETM_NR_TRC_PARAMS_V0 (CS_ETM_ETMIDR - CS_ETM_ETMCR + 1) + /* ETMv4 metadata */ enum { /* Dynamic, configurable parameters */ - CS_ETMV4_TRCCONFIGR = CS_ETM_CPU + 1, + CS_ETMV4_TRCCONFIGR = CS_ETM_COMMON_BLK_MAX_V1, CS_ETMV4_TRCTRACEIDR, /* RO, taken from sysFS */ CS_ETMV4_TRCIDR0, @@ -55,6 +72,9 @@ enum { CS_ETMV4_PRIV_MAX, }; +/* define fixed version 0 length - allow new format reader to read old files. */ +#define CS_ETMV4_NR_TRC_PARAMS_V0 (CS_ETMV4_TRCAUTHSTATUS - CS_ETMV4_TRCCONFIGR + 1) + /* * ETMv3 exception encoding number: * See Embedded Trace Macrocell spcification (ARM IHI 0014Q) @@ -162,7 +182,7 @@ struct cs_etm_packet_queue { #define BMVAL(val, lsb, msb) ((val & GENMASK(msb, lsb)) >> lsb) -#define CS_ETM_HEADER_SIZE (CS_HEADER_VERSION_0_MAX * sizeof(u64)) +#define CS_ETM_HEADER_SIZE (CS_HEADER_VERSION_MAX * sizeof(u64)) #define __perf_cs_etmv3_magic 0x3030303030303030ULL #define __perf_cs_etmv4_magic 0x4040404040404040ULL -- cgit v1.2.3 From 2bb4ccbd95d7fbf58540c8d3d55cbabc8fb95e28 Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Wed, 24 Feb 2021 09:48:31 -0700 Subject: tools headers UAPI: Update tools' copy of linux/coresight-pmu.h To get the changes in the commit: "coresight: etm-perf: Clarify comment on perf options". Signed-off-by: Leo Yan Reviewed-by: Suzuki K Poulose Reviewed-by: Mathieu Poirier Cc: Mike Leach Link: https://lore.kernel.org/r/20210213113220.292229-2-leo.yan@linaro.org Link: https://lore.kernel.org/r/20210224164835.3497311-3-mathieu.poirier@linaro.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/linux/coresight-pmu.h | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/tools/include/linux/coresight-pmu.h b/tools/include/linux/coresight-pmu.h index b0e35eec6499..5dc47cfdcf07 100644 --- a/tools/include/linux/coresight-pmu.h +++ b/tools/include/linux/coresight-pmu.h @@ -10,11 +10,18 @@ #define CORESIGHT_ETM_PMU_NAME "cs_etm" #define CORESIGHT_ETM_PMU_SEED 0x10 -/* ETMv3.5/PTM's ETMCR config bit */ -#define ETM_OPT_CYCACC 12 -#define ETM_OPT_CTXTID 14 -#define ETM_OPT_TS 28 -#define ETM_OPT_RETSTK 29 +/* + * Below are the definition of bit offsets for perf option, and works as + * arbitrary values for all ETM versions. + * + * Most of them are orignally from ETMv3.5/PTM's ETMCR config, therefore, + * ETMv3.5/PTM doesn't define ETMCR config bits with prefix "ETM3_" and + * directly use below macros as config bits. + */ +#define ETM_OPT_CYCACC 12 +#define ETM_OPT_CTXTID 14 +#define ETM_OPT_TS 28 +#define ETM_OPT_RETSTK 29 /* ETMv4 CONFIGR programming bits for the ETM OPTs */ #define ETM4_CFG_BIT_CYCACC 4 -- cgit v1.2.3 From 8c559e8d68630d64d932bada633705f6551427df Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Wed, 24 Feb 2021 09:48:32 -0700 Subject: perf cs-etm: Fix bitmap for option When set option with macros ETM_OPT_CTXTID and ETM_OPT_TS, it wrongly takes these two values (14 and 28 prespectively) as bit masks, but actually both are the offset for bits. But this doesn't lead to further failure due to the AND logic operation will be always true for ETM_OPT_CTXTID / ETM_OPT_TS. This patch uses the BIT() macro for option bits, thus it can request the correct bitmaps for "contextid" and "timestamp" when calling cs_etm_set_option(). Signed-off-by: Suzuki K Poulose Reviewed-by: Mathieu Poirier Reviewed-by: Mike Leach Link: https://lore.kernel.org/r/20210213113220.292229-3-leo.yan@linaro.org Link: https://lore.kernel.org/r/20210224164835.3497311-4-mathieu.poirier@linaro.org [Extract the change as a separate patch for easier review] Signed-off-by: Leo Yan Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/arm/util/cs-etm.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tools/perf/arch/arm/util/cs-etm.c b/tools/perf/arch/arm/util/cs-etm.c index b0470f2a955a..5b2bb7fc5ee1 100644 --- a/tools/perf/arch/arm/util/cs-etm.c +++ b/tools/perf/arch/arm/util/cs-etm.c @@ -169,17 +169,17 @@ static int cs_etm_set_option(struct auxtrace_record *itr, !cpu_map__has(online_cpus, i)) continue; - if (option & ETM_OPT_CTXTID) { + if (option & BIT(ETM_OPT_CTXTID)) { err = cs_etm_set_context_id(itr, evsel, i); if (err) goto out; } - if (option & ETM_OPT_TS) { + if (option & BIT(ETM_OPT_TS)) { err = cs_etm_set_timestamp(itr, evsel, i); if (err) goto out; } - if (option & ~(ETM_OPT_CTXTID | ETM_OPT_TS)) + if (option & ~(BIT(ETM_OPT_CTXTID) | BIT(ETM_OPT_TS))) /* Nothing else is currently supported */ goto out; } @@ -406,7 +406,7 @@ static int cs_etm_recording_options(struct auxtrace_record *itr, evsel__set_sample_bit(cs_etm_evsel, CPU); err = cs_etm_set_option(itr, cs_etm_evsel, - ETM_OPT_CTXTID | ETM_OPT_TS); + BIT(ETM_OPT_CTXTID) | BIT(ETM_OPT_TS)); if (err) goto out; } -- cgit v1.2.3 From 30cb76aabfb4deab4ffef54882f86df319b4d862 Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Wed, 24 Feb 2021 09:48:33 -0700 Subject: perf cs-etm: Support PID tracing in config If the kernel is running at EL2, the pid of a task is exposed via VMID instead of the CONTEXTID. Add support for this in the perf tool. This patch respects user setting if user has specified any configs from "contextid", "contextid1" or "contextid2"; otherwise, it dynamically sets config based on PMU format "contextid". Signed-off-by: Suzuki K Poulose Co-developed-by: Leo Yan Signed-off-by: Leo Yan Reviewed-by: Mike Leach Reviewed-by: Mathieu Poirier Cc: Al Grant Link: https://lore.kernel.org/r/20210213113220.292229-4-leo.yan@linaro.org Link: https://lore.kernel.org/r/20210224164835.3497311-5-mathieu.poirier@linaro.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/linux/coresight-pmu.h | 3 ++ tools/perf/arch/arm/util/cs-etm.c | 61 +++++++++++++++++++++++++++++-------- 2 files changed, 52 insertions(+), 12 deletions(-) diff --git a/tools/include/linux/coresight-pmu.h b/tools/include/linux/coresight-pmu.h index 5dc47cfdcf07..4ac5c081af93 100644 --- a/tools/include/linux/coresight-pmu.h +++ b/tools/include/linux/coresight-pmu.h @@ -20,14 +20,17 @@ */ #define ETM_OPT_CYCACC 12 #define ETM_OPT_CTXTID 14 +#define ETM_OPT_CTXTID2 15 #define ETM_OPT_TS 28 #define ETM_OPT_RETSTK 29 /* ETMv4 CONFIGR programming bits for the ETM OPTs */ #define ETM4_CFG_BIT_CYCACC 4 #define ETM4_CFG_BIT_CTXTID 6 +#define ETM4_CFG_BIT_VMID 7 #define ETM4_CFG_BIT_TS 11 #define ETM4_CFG_BIT_RETSTK 12 +#define ETM4_CFG_BIT_VMID_OPT 15 static inline int coresight_get_trace_id(int cpu) { diff --git a/tools/perf/arch/arm/util/cs-etm.c b/tools/perf/arch/arm/util/cs-etm.c index 5b2bb7fc5ee1..911c7f2b3581 100644 --- a/tools/perf/arch/arm/util/cs-etm.c +++ b/tools/perf/arch/arm/util/cs-etm.c @@ -67,6 +67,7 @@ static int cs_etm_set_context_id(struct auxtrace_record *itr, char path[PATH_MAX]; int err = -EINVAL; u32 val; + u64 contextid; ptr = container_of(itr, struct cs_etm_recording, itr); cs_etm_pmu = ptr->cs_etm_pmu; @@ -86,25 +87,59 @@ static int cs_etm_set_context_id(struct auxtrace_record *itr, goto out; } + /* User has configured for PID tracing, respects it. */ + contextid = evsel->core.attr.config & + (BIT(ETM_OPT_CTXTID) | BIT(ETM_OPT_CTXTID2)); + /* - * TRCIDR2.CIDSIZE, bit [9-5], indicates whether contextID tracing - * is supported: - * 0b00000 Context ID tracing is not supported. - * 0b00100 Maximum of 32-bit Context ID size. - * All other values are reserved. + * If user doesn't configure the contextid format, parse PMU format and + * enable PID tracing according to the "contextid" format bits: + * + * If bit ETM_OPT_CTXTID is set, trace CONTEXTIDR_EL1; + * If bit ETM_OPT_CTXTID2 is set, trace CONTEXTIDR_EL2. */ - val = BMVAL(val, 5, 9); - if (!val || val != 0x4) { - err = -EINVAL; - goto out; + if (!contextid) + contextid = perf_pmu__format_bits(&cs_etm_pmu->format, + "contextid"); + + if (contextid & BIT(ETM_OPT_CTXTID)) { + /* + * TRCIDR2.CIDSIZE, bit [9-5], indicates whether contextID + * tracing is supported: + * 0b00000 Context ID tracing is not supported. + * 0b00100 Maximum of 32-bit Context ID size. + * All other values are reserved. + */ + val = BMVAL(val, 5, 9); + if (!val || val != 0x4) { + pr_err("%s: CONTEXTIDR_EL1 isn't supported\n", + CORESIGHT_ETM_PMU_NAME); + err = -EINVAL; + goto out; + } + } + + if (contextid & BIT(ETM_OPT_CTXTID2)) { + /* + * TRCIDR2.VMIDOPT[30:29] != 0 and + * TRCIDR2.VMIDSIZE[14:10] == 0b00100 (32bit virtual contextid) + * We can't support CONTEXTIDR in VMID if the size of the + * virtual context id is < 32bit. + * Any value of VMIDSIZE >= 4 (i.e, > 32bit) is fine for us. + */ + if (!BMVAL(val, 29, 30) || BMVAL(val, 10, 14) < 4) { + pr_err("%s: CONTEXTIDR_EL2 isn't supported\n", + CORESIGHT_ETM_PMU_NAME); + err = -EINVAL; + goto out; + } } /* All good, let the kernel know */ - evsel->core.attr.config |= (1 << ETM_OPT_CTXTID); + evsel->core.attr.config |= contextid; err = 0; out: - return err; } @@ -485,7 +520,9 @@ static u64 cs_etmv4_get_config(struct auxtrace_record *itr) config |= BIT(ETM4_CFG_BIT_TS); if (config_opts & BIT(ETM_OPT_RETSTK)) config |= BIT(ETM4_CFG_BIT_RETSTK); - + if (config_opts & BIT(ETM_OPT_CTXTID2)) + config |= BIT(ETM4_CFG_BIT_VMID) | + BIT(ETM4_CFG_BIT_VMID_OPT); return config; } -- cgit v1.2.3 From 47f0d94c203751ddcfdb296fcf15df20fffcef0c Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Wed, 24 Feb 2021 09:48:34 -0700 Subject: perf cs-etm: Add helper cs_etm__get_pid_fmt() This patch adds helper function cs_etm__get_pid_fmt(), by passing parameter "traceID", it returns the PID format. Signed-off-by: Leo Yan Reviewed-by: Mathieu Poirier Reviewed-by: Suzuki K Poulose Cc: Mike Leach Link: https://lore.kernel.org/r/20210213113220.292229-5-leo.yan@linaro.org Link: https://lore.kernel.org/r/20210224164835.3497311-6-mathieu.poirier@linaro.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/cs-etm.c | 42 ++++++++++++++++++++++++++++++++++++++++++ tools/perf/util/cs-etm.h | 1 + 2 files changed, 43 insertions(+) diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c index ee32d023e9bd..9ac80fc23c58 100644 --- a/tools/perf/util/cs-etm.c +++ b/tools/perf/util/cs-etm.c @@ -7,6 +7,7 @@ */ #include +#include #include #include #include @@ -156,6 +157,47 @@ int cs_etm__get_cpu(u8 trace_chan_id, int *cpu) return 0; } +/* + * The returned PID format is presented by two bits: + * + * Bit ETM_OPT_CTXTID: CONTEXTIDR or CONTEXTIDR_EL1 is traced; + * Bit ETM_OPT_CTXTID2: CONTEXTIDR_EL2 is traced. + * + * It's possible that the two bits ETM_OPT_CTXTID and ETM_OPT_CTXTID2 + * are enabled at the same time when the session runs on an EL2 kernel. + * This means the CONTEXTIDR_EL1 and CONTEXTIDR_EL2 both will be + * recorded in the trace data, the tool will selectively use + * CONTEXTIDR_EL2 as PID. + */ +int cs_etm__get_pid_fmt(u8 trace_chan_id, u64 *pid_fmt) +{ + struct int_node *inode; + u64 *metadata, val; + + inode = intlist__find(traceid_list, trace_chan_id); + if (!inode) + return -EINVAL; + + metadata = inode->priv; + + if (metadata[CS_ETM_MAGIC] == __perf_cs_etmv3_magic) { + val = metadata[CS_ETM_ETMCR]; + /* CONTEXTIDR is traced */ + if (val & BIT(ETM_OPT_CTXTID)) + *pid_fmt = BIT(ETM_OPT_CTXTID); + } else { + val = metadata[CS_ETMV4_TRCCONFIGR]; + /* CONTEXTIDR_EL2 is traced */ + if (val & (BIT(ETM4_CFG_BIT_VMID) | BIT(ETM4_CFG_BIT_VMID_OPT))) + *pid_fmt = BIT(ETM_OPT_CTXTID2); + /* CONTEXTIDR_EL1 is traced */ + else if (val & BIT(ETM4_CFG_BIT_CTXTID)) + *pid_fmt = BIT(ETM_OPT_CTXTID); + } + + return 0; +} + void cs_etm__etmq_set_traceid_queue_timestamp(struct cs_etm_queue *etmq, u8 trace_chan_id) { diff --git a/tools/perf/util/cs-etm.h b/tools/perf/util/cs-etm.h index e153d02df0de..85ed11e9d2a7 100644 --- a/tools/perf/util/cs-etm.h +++ b/tools/perf/util/cs-etm.h @@ -193,6 +193,7 @@ struct cs_etm_packet_queue { int cs_etm__process_auxtrace_info(union perf_event *event, struct perf_session *session); int cs_etm__get_cpu(u8 trace_chan_id, int *cpu); +int cs_etm__get_pid_fmt(u8 trace_chan_id, u64 *pid_fmt); int cs_etm__etmq_set_tid(struct cs_etm_queue *etmq, pid_t tid, u8 trace_chan_id); bool cs_etm__etmq_is_timeless(struct cs_etm_queue *etmq); -- cgit v1.2.3 From 8e1488a46dcf73b1f1916d95421e303dbf773fb4 Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Wed, 24 Feb 2021 09:48:35 -0700 Subject: perf cs-etm: Detect pid in VMID for kernel running at EL2 The PID of the task could be traced as VMID when the kernel is running at EL2. Teach the decoder to look for VMID when the CONTEXTIDR (Arm32) or CONTEXTIDR_EL1 (Arm64) is invalid but we have a valid VMID. Signed-off-by: Suzuki K Poulose Co-developed-by: Leo Yan Reviewed-by: Mathieu Poirier Cc: Al Grant Cc: Mike Leach Link: https://lore.kernel.org/r/20210213113220.292229-6-leo.yan@linaro.org Link: https://lore.kernel.org/r/20210224164835.3497311-7-mathieu.poirier@linaro.org Signed-off-by: Leo Yan Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/cs-etm-decoder/cs-etm-decoder.c | 38 ++++++++++++++++++++++--- 1 file changed, 34 insertions(+), 4 deletions(-) diff --git a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c index 3f4bc4050477..4052c9ce6e2f 100644 --- a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c +++ b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c @@ -6,6 +6,7 @@ * Author: Mathieu Poirier */ +#include #include #include #include @@ -491,13 +492,42 @@ cs_etm_decoder__set_tid(struct cs_etm_queue *etmq, const ocsd_generic_trace_elem *elem, const uint8_t trace_chan_id) { - pid_t tid; + pid_t tid = -1; + static u64 pid_fmt; + int ret; - /* Ignore PE_CONTEXT packets that don't have a valid contextID */ - if (!elem->context.ctxt_id_valid) + /* + * As all the ETMs run at the same exception level, the system should + * have the same PID format crossing CPUs. So cache the PID format + * and reuse it for sequential decoding. + */ + if (!pid_fmt) { + ret = cs_etm__get_pid_fmt(trace_chan_id, &pid_fmt); + if (ret) + return OCSD_RESP_FATAL_SYS_ERR; + } + + /* + * Process the PE_CONTEXT packets if we have a valid contextID or VMID. + * If the kernel is running at EL2, the PID is traced in CONTEXTIDR_EL2 + * as VMID, Bit ETM_OPT_CTXTID2 is set in this case. + */ + switch (pid_fmt) { + case BIT(ETM_OPT_CTXTID): + if (elem->context.ctxt_id_valid) + tid = elem->context.context_id; + break; + case BIT(ETM_OPT_CTXTID2): + if (elem->context.vmid_valid) + tid = elem->context.vmid; + break; + default: + break; + } + + if (tid == -1) return OCSD_RESP_CONT; - tid = elem->context.context_id; if (cs_etm__etmq_set_tid(etmq, tid, trace_chan_id)) return OCSD_RESP_FATAL_SYS_ERR; -- cgit v1.2.3 From 81db00a4ea625c88925b00df9673472bd1b8c77f Mon Sep 17 00:00:00 2001 From: Jiapeng Chong Date: Thu, 25 Feb 2021 14:07:00 +0800 Subject: perf metric: Remove unneeded semicolon Fix the following coccicheck warnings: ./tools/perf/tests/parse-metric.c:101:2-3: Unneeded semicolon. Reported-by: Abaci Robot Signed-off-by: Jiapeng Chong Cc: Alexander Shishkin Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/1614233220-67326-1-git-send-email-jiapeng.chong@linux.alibaba.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/parse-metric.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/tests/parse-metric.c b/tools/perf/tests/parse-metric.c index 6dc1db1626ad..55bf52e588be 100644 --- a/tools/perf/tests/parse-metric.c +++ b/tools/perf/tests/parse-metric.c @@ -98,7 +98,7 @@ static u64 find_value(const char *name, struct value *values) if (!strcmp(name, v->event)) return v->val; v++; - }; + } return 0; } -- cgit v1.2.3 From 2e989f82181cd414599c6afbc5a666356a3d1dd1 Mon Sep 17 00:00:00 2001 From: Jin Yao Date: Fri, 19 Feb 2021 15:00:05 +0800 Subject: perf report: Create option to disable raw event ordering Warning "dso not found" is reported when using "perf report -D". 66702781413407 0x32c0 [0x30]: PERF_RECORD_SAMPLE(IP, 0x2): 28177/28177: 0x55e493e00563 period: 106578 addr: 0 ... thread: perf:28177 ...... dso: 66702727832429 0x9dd8 [0x38]: PERF_RECORD_COMM exec: triad_loop:28177/28177 The PERF_RECORD_SAMPLE event (timestamp: 66702781413407) should be after the PERF_RECORD_COMM event (timestamp: 66702727832429), but it's early processed. So for most of cases, it makes sense to keep the event ordered even for dump mode. But it would be also useful to disable ordered_events for reporting raw dump to see events as they are stored in the perf.data file. So now, set ordered_events by default to true and add a new option 'disable-order' to disable it. For example, perf report -D --disable-order Fixes: 977f739b7126b ("perf report: Disable ordered_events for raw dump") Signed-off-by: Jin Yao Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: Ingo Molnar Cc: Jin Yao Cc: Kan Liang Cc: Peter Zijlstra Link: https://lore.kernel.org/r/20210219070005.12397-1-yao.jin@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-report.txt | 3 +++ tools/perf/builtin-report.c | 5 ++++- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt index f546b5e9db05..87112e8d904e 100644 --- a/tools/perf/Documentation/perf-report.txt +++ b/tools/perf/Documentation/perf-report.txt @@ -224,6 +224,9 @@ OPTIONS --dump-raw-trace:: Dump raw trace in ASCII. +--disable-order:: + Disable raw trace ordering. + -g:: --call-graph=:: Display call chains using type, min percent threshold, print limit, diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 2a845d6cac09..0d65c98794a8 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -84,6 +84,7 @@ struct report { bool nonany_branch_mode; bool group_set; bool stitch_lbr; + bool disable_order; int max_stack; struct perf_read_values show_threads_values; struct annotation_options annotation_opts; @@ -1296,6 +1297,8 @@ int cmd_report(int argc, const char **argv) OPTS_EVSWITCH(&report.evswitch), OPT_BOOLEAN(0, "total-cycles", &report.total_cycles_mode, "Sort all blocks by 'Sampled Cycles%'"), + OPT_BOOLEAN(0, "disable-order", &report.disable_order, + "Disable raw trace ordering"), OPT_END() }; struct perf_data data = { @@ -1329,7 +1332,7 @@ int cmd_report(int argc, const char **argv) if (report.mmaps_mode) report.tasks_mode = true; - if (dump_trace) + if (dump_trace && report.disable_order) report.tool.ordered_events = false; if (quiet) -- cgit v1.2.3 From 35276a4f058df23507987ef6d2426ea9673f5e35 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Sat, 6 Mar 2021 00:08:38 -0800 Subject: perf skel: Remove some unused variables. Fixes -Wall warnings. Signed-off-by: Ian Rogers Acked-by: Song Liu Cc: Alexander Shishkin Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lore.kernel.org/lkml/20210306080840.3785816-1-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/bpf_skel/bpf_prog_profiler.bpf.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tools/perf/util/bpf_skel/bpf_prog_profiler.bpf.c b/tools/perf/util/bpf_skel/bpf_prog_profiler.bpf.c index c7cec92d0236..ab12b4c4ece2 100644 --- a/tools/perf/util/bpf_skel/bpf_prog_profiler.bpf.c +++ b/tools/perf/util/bpf_skel/bpf_prog_profiler.bpf.c @@ -52,7 +52,7 @@ int BPF_PROG(fentry_XXX) static inline void fexit_update_maps(struct bpf_perf_event_value *after) { - struct bpf_perf_event_value *before, diff, *accum; + struct bpf_perf_event_value *before, diff; __u32 zero = 0; before = bpf_map_lookup_elem(&fentry_readings, &zero); @@ -78,7 +78,6 @@ int BPF_PROG(fexit_XXX) { struct bpf_perf_event_value reading; __u32 cpu = bpf_get_smp_processor_id(); - __u32 one = 1, zero = 0; int err; /* read all events before updating the maps, to reduce error */ -- cgit v1.2.3 From 7e1df64edeb294767f0976a7784fe1dbbe9f4394 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Sat, 6 Mar 2021 00:08:39 -0800 Subject: perf tools: Enable warnings when compiling BPF programs Add -Wall -Werror when compiling BPF skeletons. Signed-off-by: Ian Rogers Acked-by: Song Liu Cc: Alexander Shishkin Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lore.kernel.org/lkml/20210306080840.3785816-2-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Makefile.perf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index 5345ac70cd83..f43d2551f3de 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -1029,7 +1029,7 @@ $(BPFTOOL): | $(SKEL_TMP_OUT) OUTPUT=$(SKEL_TMP_OUT)/ bootstrap $(SKEL_TMP_OUT)/%.bpf.o: util/bpf_skel/%.bpf.c $(LIBBPF) | $(SKEL_TMP_OUT) - $(QUIET_CLANG)$(CLANG) -g -O2 -target bpf $(BPF_INCLUDE) \ + $(QUIET_CLANG)$(CLANG) -g -O2 -target bpf -Wall -Werror $(BPF_INCLUDE) \ -c $(filter util/bpf_skel/%.bpf.c,$^) -o $@ && $(LLVM_STRIP) -g $@ $(SKEL_OUT)/%.skel.h: $(SKEL_TMP_OUT)/%.bpf.o | $(BPFTOOL) -- cgit v1.2.3 From 509bbd75f7ff878b5e3f78b9e14ba4b6e16dc3b0 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Sat, 6 Mar 2021 00:08:40 -0800 Subject: perf bpf: Minor whitespace cleanup. Missed space after #include. Signed-off-by: Ian Rogers Acked-by: Song Liu Cc: Alexander Shishkin Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lore.kernel.org/lkml/20210306080840.3785816-3-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/bpf_counter.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/util/bpf_counter.h b/tools/perf/util/bpf_counter.h index 2eca210e5dc1..cb9c532e0a07 100644 --- a/tools/perf/util/bpf_counter.h +++ b/tools/perf/util/bpf_counter.h @@ -38,7 +38,7 @@ int bpf_counter__install_pe(struct evsel *evsel, int cpu, int fd); #else /* HAVE_BPF_SKEL */ -#include +#include static inline int bpf_counter__load(struct evsel *evsel __maybe_unused, struct target *target __maybe_unused) -- cgit v1.2.3 From 44e176501c557460de954572435baa4ae34d2a35 Mon Sep 17 00:00:00 2001 From: Martin Liska Date: Fri, 26 Feb 2021 11:01:24 +0100 Subject: perf config: Add annotate.demangle{,_kernel} Committer notes: This allows setting this in from the command line: $ perf config annotate.demangle $ perf config annotate.demangle=yes $ perf config annotate.demangle annotate.demangle=yes $ cat ~/.perfconfig # this file is auto-generated. [report] sort-order = srcline [annotate] demangle = yes $ $ $ perf config annotate.demangle_kernel $ perf config annotate.demangle_kernel=yes $ perf config annotate.demangle_kernel annotate.demangle_kernel=yes $ cat ~/.perfconfig # this file is auto-generated. [report] sort-order = srcline [annotate] demangle = yes demangle_kernel = yes $ Signed-off-by: Martin Liška Tested-by: Arnaldo Carvalho de Melo Link: https://lore.kernel.org/r/c96aabe7-791f-9503-295f-3147a9d19b60@suse.cz Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-config.txt | 6 ++++++ tools/perf/util/annotate.c | 4 ++++ 2 files changed, 10 insertions(+) diff --git a/tools/perf/Documentation/perf-config.txt b/tools/perf/Documentation/perf-config.txt index 153bde14bbe0..154a1ced72b2 100644 --- a/tools/perf/Documentation/perf-config.txt +++ b/tools/perf/Documentation/perf-config.txt @@ -393,6 +393,12 @@ annotate.*:: This option works with tui, stdio2 browsers. + annotate.demangle:: + Demangle symbol names to human readable form. Default is 'true'. + + annotate.demangle_kernel:: + Demangle kernel symbol names to human readable form. Default is 'true'. + hist.*:: hist.percentage:: This option control the way to calculate overhead of filtered entries - diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index e60841b86d27..f54224586a3d 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -3144,6 +3144,10 @@ static int annotation__config(const char *var, const char *value, void *data) opt->use_offset = perf_config_bool("use_offset", value); } else if (!strcmp(var, "annotate.disassembler_style")) { opt->disassembler_style = value; + } else if (!strcmp(var, "annotate.demangle")) { + symbol_conf.demangle = perf_config_bool("demangle", value); + } else if (!strcmp(var, "annotate.demangle_kernel")) { + symbol_conf.demangle_kernel = perf_config_bool("demangle_kernel", value); } else { pr_debug("%s variable unknown, ignoring...", var); } -- cgit v1.2.3 From a78e724f4eb487517f03a6044d7a554c8823fe49 Mon Sep 17 00:00:00 2001 From: Xiong Zhenwu Date: Fri, 5 Mar 2021 01:22:12 -0800 Subject: perf bench: Fix misspellings using codespell $ codespell ./tool/perf/bench tools/perf/bench/inject-buildid.c:375: tihs ==> this Fix a typo found by codespell. Signed-off-by: Xiong Zhenwu Cc: Alexander Shishkin Cc: Ian Rogers Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/20210305092212.204923-1-xiong.zhenwu@zte.com.cn Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/bench/inject-buildid.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/bench/inject-buildid.c b/tools/perf/bench/inject-buildid.c index 280227e3ffd7..55d373b75791 100644 --- a/tools/perf/bench/inject-buildid.c +++ b/tools/perf/bench/inject-buildid.c @@ -372,7 +372,7 @@ static int inject_build_id(struct bench_data *data, u64 *max_rss) len += synthesize_flush(data); } - /* tihs makes the child to finish */ + /* this makes the child to finish */ close(data->input_pipe[1]); wait4(data->pid, &status, 0, &rusage); -- cgit v1.2.3 From 2777b81b379df772defd654bc4d3fa82dca17a4b Mon Sep 17 00:00:00 2001 From: Martin Liska Date: Mon, 15 Feb 2021 12:34:46 +0100 Subject: perf annotate: Show full source location with 'l' hotkey Right now, when Line numbers are displayed, one can't easily find a source file that the line corresponds to. When a source line is selected and 'l' is pressed, full source file location is displayed in perf UI footer line. The hotkey works only for source code lines. Signed-off-by: Martin Liška Tested-by: Arnaldo Carvalho de Melo Link: http://lore.kernel.org/lkml/25a6384f-d862-5dda-4fec-8f0555599c75@suse.cz Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/ui/browsers/annotate.c | 25 +++++++++++++++++++++++-- tools/perf/util/annotate.c | 12 ++++++++++-- tools/perf/util/annotate.h | 2 ++ 3 files changed, 35 insertions(+), 4 deletions(-) diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c index 35b82caf8090..9f75d767b6da 100644 --- a/tools/perf/ui/browsers/annotate.c +++ b/tools/perf/ui/browsers/annotate.c @@ -381,6 +381,25 @@ static bool annotate_browser__toggle_source(struct annotate_browser *browser) return true; } +#define SYM_TITLE_MAX_SIZE (PATH_MAX + 64) + +static void annotate_browser__show_full_location(struct ui_browser *browser) +{ + struct annotate_browser *ab = container_of(browser, struct annotate_browser, b); + struct disasm_line *cursor = disasm_line(ab->selection); + struct annotation_line *al = &cursor->al; + + if (al->offset != -1) + ui_helpline__puts("Only available for source code lines."); + else if (al->fileloc == NULL) + ui_helpline__puts("No source file location."); + else { + char help_line[SYM_TITLE_MAX_SIZE]; + sprintf (help_line, "Source file location: %s", al->fileloc); + ui_helpline__puts(help_line); + } +} + static void ui_browser__init_asm_mode(struct ui_browser *browser) { struct annotation *notes = browser__annotation(browser); @@ -388,8 +407,6 @@ static void ui_browser__init_asm_mode(struct ui_browser *browser) browser->nr_entries = notes->nr_asm_entries; } -#define SYM_TITLE_MAX_SIZE (PATH_MAX + 64) - static int sym_title(struct symbol *sym, struct map *map, char *title, size_t sz, int percent_type) { @@ -747,6 +764,7 @@ static int annotate_browser__run(struct annotate_browser *browser, "c Show min/max cycle\n" "/ Search string\n" "k Toggle line numbers\n" + "l Show full source file location\n" "P Print to [symbol_name].annotation file.\n" "r Run available scripts\n" "p Toggle percent type [local/global]\n" @@ -760,6 +778,9 @@ static int annotate_browser__run(struct annotate_browser *browser, case 'k': notes->options->show_linenr = !notes->options->show_linenr; continue; + case 'l': + annotate_browser__show_full_location (&browser->b); + continue; case 'H': nd = browser->curr_hot; break; diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index f54224586a3d..18eee25b4976 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -1161,6 +1161,7 @@ struct annotate_args { s64 offset; char *line; int line_nr; + char *fileloc; }; static void annotation_line__init(struct annotation_line *al, @@ -1170,6 +1171,7 @@ static void annotation_line__init(struct annotation_line *al, al->offset = args->offset; al->line = strdup(args->line); al->line_nr = args->line_nr; + al->fileloc = args->fileloc; al->data_nr = nr; } @@ -1482,7 +1484,7 @@ annotation_line__print(struct annotation_line *al, struct symbol *sym, u64 start */ static int symbol__parse_objdump_line(struct symbol *sym, struct annotate_args *args, - char *parsed_line, int *line_nr) + char *parsed_line, int *line_nr, char **fileloc) { struct map *map = args->ms.map; struct annotation *notes = symbol__annotation(sym); @@ -1494,6 +1496,7 @@ static int symbol__parse_objdump_line(struct symbol *sym, /* /filename:linenr ? Save line number and ignore. */ if (regexec(&file_lineno, parsed_line, 2, match, 0) == 0) { *line_nr = atoi(parsed_line + match[1].rm_so); + *fileloc = strdup(parsed_line); return 0; } @@ -1513,6 +1516,7 @@ static int symbol__parse_objdump_line(struct symbol *sym, args->offset = offset; args->line = parsed_line; args->line_nr = *line_nr; + args->fileloc = *fileloc; args->ms.sym = sym; dl = disasm_line__new(args); @@ -1807,6 +1811,7 @@ static int symbol__disassemble_bpf(struct symbol *sym, args->offset = -1; args->line = strdup(srcline); args->line_nr = 0; + args->fileloc = NULL; args->ms.sym = sym; dl = disasm_line__new(args); if (dl) { @@ -1818,6 +1823,7 @@ static int symbol__disassemble_bpf(struct symbol *sym, args->offset = pc; args->line = buf + prev_buf_size; args->line_nr = 0; + args->fileloc = NULL; args->ms.sym = sym; dl = disasm_line__new(args); if (dl) @@ -1852,6 +1858,7 @@ symbol__disassemble_bpf_image(struct symbol *sym, args->offset = -1; args->line = strdup("to be implemented"); args->line_nr = 0; + args->fileloc = NULL; dl = disasm_line__new(args); if (dl) annotation_line__add(&dl->al, ¬es->src->source); @@ -1933,6 +1940,7 @@ static int symbol__disassemble(struct symbol *sym, struct annotate_args *args) bool delete_extract = false; bool decomp = false; int lineno = 0; + char *fileloc = NULL; int nline; char *line; size_t line_len; @@ -2060,7 +2068,7 @@ static int symbol__disassemble(struct symbol *sym, struct annotate_args *args) * See disasm_line__new() and struct disasm_line::line_nr. */ if (symbol__parse_objdump_line(sym, args, expanded_line, - &lineno) < 0) + &lineno, &fileloc) < 0) break; nline++; } diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h index 096cdaf21b01..3757416bcf46 100644 --- a/tools/perf/util/annotate.h +++ b/tools/perf/util/annotate.h @@ -84,6 +84,7 @@ struct annotation_options { print_lines, full_path, show_linenr, + show_fileloc, show_nr_jumps, show_minmax_cycle, show_asm_raw, @@ -136,6 +137,7 @@ struct annotation_line { s64 offset; char *line; int line_nr; + char *fileloc; int jump_sources; float ipc; u64 cycles; -- cgit v1.2.3 From 2942a671a37b61186e1073c76a17b1a631111f83 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Sun, 7 Mar 2021 14:30:24 -0800 Subject: tools include: Add __sum16 and __wsum definitions. This adds definitions available in the uapi version. Explanation: In the kernel include of types.h the uapi version is included. In tools the uapi/linux/types.h and linux/types.h are distinct. For BPF programs a definition of __wsum is needed by the generated bpf_helpers.h. The definition comes either from a generated vmlinux.h or from that may be transitively included from bpf.h. The perf build prefers linux/types.h over uapi/linux/types.h for *. To allow tools/perf/util/bpf_skel/bpf_prog_profiler.bpf.c to compile with the same include path used for perf then these definitions are necessary. There is likely a wider conversation about exactly how types.h should be specified and the include order used by the perf build - it is somewhat confusing that tools/include/uapi/linux/bpf.h is using the non-uapi types.h. *see tools/perf/Makefile.config: ... INC_FLAGS += -I$(srctree)/tools/include/ INC_FLAGS += -I$(srctree)/tools/arch/$(SRCARCH)/include/uapi INC_FLAGS += -I$(srctree)/tools/include/uapi ... The include directories are scanned from left-to-right: https://gcc.gnu.org/onlinedocs/gcc/Directory-Options.html As tools/include/linux/types.h appears before tools/include/uapi/linux/types.h then I say it is preferred. Signed-off-by: Ian Rogers Cc: Alexander Shishkin Cc: Alexei Starovoitov Cc: Andrii Nakryiko Cc: Daniel Borkmann Cc: Jiri Olsa Cc: John Fastabend Cc: KP Singh Cc: Mark Rutland Cc: Martin KaFai Lau Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Song Liu Cc: Stephane Eranian Cc: Tiezhu Yang Cc: Yonghong Song Cc: bpf@vger.kernel.org Cc: netdev@vger.kernel.org Link: http://lore.kernel.org/lkml/20210307223024.4081067-1-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/linux/types.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tools/include/linux/types.h b/tools/include/linux/types.h index e9c5a215837d..6e14a533ab4e 100644 --- a/tools/include/linux/types.h +++ b/tools/include/linux/types.h @@ -61,6 +61,9 @@ typedef __u32 __bitwise __be32; typedef __u64 __bitwise __le64; typedef __u64 __bitwise __be64; +typedef __u16 __bitwise __sum16; +typedef __u32 __bitwise __wsum; + typedef struct { int counter; } atomic_t; -- cgit v1.2.3 From 210e4c89ef61432040c6cd828fefa441f4887186 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 8 Mar 2021 11:17:51 -0300 Subject: perf symbols: Fix dso__fprintf_symbols_by_name() to return the number of printed chars The 'ret' variable was initialized to zero but then it was not updated from the fprintf() return, fix it. Reported-by: Yang Li cc: Alexander Shishkin cc: Ingo Molnar cc: Jiri Olsa cc: Mark Rutland cc: Namhyung Kim Cc: Peter Zijlstra Cc: Srikar Dronamraju Fixes: 90f18e63fbd00513 ("perf symbols: List symbols in a dso in ascending name order") Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/symbol_fprintf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/util/symbol_fprintf.c b/tools/perf/util/symbol_fprintf.c index 35c936ce33ef..2664fb65e47a 100644 --- a/tools/perf/util/symbol_fprintf.c +++ b/tools/perf/util/symbol_fprintf.c @@ -68,7 +68,7 @@ size_t dso__fprintf_symbols_by_name(struct dso *dso, for (nd = rb_first_cached(&dso->symbol_names); nd; nd = rb_next(nd)) { pos = rb_entry(nd, struct symbol_name_rb_node, rb_node); - fprintf(fp, "%s\n", pos->sym.name); + ret += fprintf(fp, "%s\n", pos->sym.name); } return ret; -- cgit v1.2.3 From 297e69bfa4c7aa27259dd456af1377e868337043 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 9 Mar 2021 08:59:21 -0300 Subject: perf script: Fixup 'struct evsel_script' method prefix They all operate on 'struct evsel_script' instances, so should be prefixed with evsel_script__, not with perf_evsel_script__. Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-script.c | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index 5915f19cee55..119a5f7e2772 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -314,8 +314,7 @@ static inline struct evsel_script *evsel_script(struct evsel *evsel) return (struct evsel_script *)evsel->priv; } -static struct evsel_script *perf_evsel_script__new(struct evsel *evsel, - struct perf_data *data) +static struct evsel_script *evsel_script__new(struct evsel *evsel, struct perf_data *data) { struct evsel_script *es = zalloc(sizeof(*es)); @@ -335,7 +334,7 @@ out_free: return NULL; } -static void perf_evsel_script__delete(struct evsel_script *es) +static void evsel_script__delete(struct evsel_script *es) { zfree(&es->filename); fclose(es->fp); @@ -343,7 +342,7 @@ static void perf_evsel_script__delete(struct evsel_script *es) free(es); } -static int perf_evsel_script__fprintf(struct evsel_script *es, FILE *fp) +static int evsel_script__fprintf(struct evsel_script *es, FILE *fp) { struct stat st; @@ -2219,8 +2218,7 @@ static int process_attr(struct perf_tool *tool, union perf_event *event, if (!evsel->priv) { if (scr->per_event_dump) { - evsel->priv = perf_evsel_script__new(evsel, - scr->session->data); + evsel->priv = evsel_script__new(evsel, scr->session->data); } else { es = zalloc(sizeof(*es)); if (!es) @@ -2475,7 +2473,7 @@ static void perf_script__fclose_per_event_dump(struct perf_script *script) evlist__for_each_entry(evlist, evsel) { if (!evsel->priv) break; - perf_evsel_script__delete(evsel->priv); + evsel_script__delete(evsel->priv); evsel->priv = NULL; } } @@ -2495,7 +2493,7 @@ static int perf_script__fopen_per_event_dump(struct perf_script *script) if (evsel->priv != NULL) continue; - evsel->priv = perf_evsel_script__new(evsel, script->session->data); + evsel->priv = evsel_script__new(evsel, script->session->data); if (evsel->priv == NULL) goto out_err_fclose; } @@ -2530,8 +2528,8 @@ static void perf_script__exit_per_event_dump_stats(struct perf_script *script) evlist__for_each_entry(script->session->evlist, evsel) { struct evsel_script *es = evsel->priv; - perf_evsel_script__fprintf(es, stdout); - perf_evsel_script__delete(es); + evsel_script__fprintf(es, stdout); + evsel_script__delete(es); evsel->priv = NULL; } } -- cgit v1.2.3 From 905203411d8b96cf931dacc359982108a5893c9b Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 9 Mar 2021 09:03:40 -0300 Subject: perf stat: Fixup __perf_stat_evsel__is() prefix This is a perf_stat_evsel method, so should have that as its prefix, previously it was swapped as __perf_evsel_stat__is(). Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/stat.c | 3 +-- tools/perf/util/stat.h | 5 ++--- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c index c400f8dde017..2db46b9bebd0 100644 --- a/tools/perf/util/stat.c +++ b/tools/perf/util/stat.c @@ -76,8 +76,7 @@ double rel_stddev_stats(double stddev, double avg) return pct; } -bool __perf_evsel_stat__is(struct evsel *evsel, - enum perf_stat_evsel_id id) +bool __perf_stat_evsel__is(struct evsel *evsel, enum perf_stat_evsel_id id) { struct perf_stat_evsel *ps = evsel->stats; diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h index d85c292148bb..41107b8deac5 100644 --- a/tools/perf/util/stat.h +++ b/tools/perf/util/stat.h @@ -187,11 +187,10 @@ struct perf_aggr_thread_value { u64 ena; }; -bool __perf_evsel_stat__is(struct evsel *evsel, - enum perf_stat_evsel_id id); +bool __perf_stat_evsel__is(struct evsel *evsel, enum perf_stat_evsel_id id); #define perf_stat_evsel__is(evsel, id) \ - __perf_evsel_stat__is(evsel, PERF_STAT_EVSEL_ID__ ## id) + __perf_stat_evsel__is(evsel, PERF_STAT_EVSEL_ID__ ## id) extern struct runtime_stat rt_stat; extern struct stats walltime_nsecs_stats; -- cgit v1.2.3 From 1f042de2d5c7a69d5ac403a022185164bbe0f51c Mon Sep 17 00:00:00 2001 From: Jiapeng Chong Date: Tue, 9 Mar 2021 17:12:25 +0800 Subject: perf tools: use ARRAY_SIZE Fix the following cppcheck warnings: ./tools/perf/tests/demangle-ocaml-test.c:29:34-35: WARNING: Use ARRAY_SIZE. Reported-by: Abaci Robot Signed-off-by: Jiapeng Chong Cc: Alexander Shishkin Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/1615281145-2122-1-git-send-email-jiapeng.chong@linux.alibaba.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/demangle-ocaml-test.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/tests/demangle-ocaml-test.c b/tools/perf/tests/demangle-ocaml-test.c index a273ed5163d7..2fac7d762c0d 100644 --- a/tools/perf/tests/demangle-ocaml-test.c +++ b/tools/perf/tests/demangle-ocaml-test.c @@ -26,7 +26,7 @@ int test__demangle_ocaml(struct test *test __maybe_unused, int subtest __maybe_u "Stdlib.bytes.++" }, }; - for (i = 0; i < sizeof(test_cases) / sizeof(test_cases[0]); i++) { + for (i = 0; i < ARRAY_SIZE(test_cases); i++) { buf = ocaml_demangle_sym(test_cases[i].mangled); if ((buf == NULL && test_cases[i].demangled != NULL) || (buf != NULL && test_cases[i].demangled == NULL) -- cgit v1.2.3 From 83ff0f93b0803700b018f6cce06f50439dc1348c Mon Sep 17 00:00:00 2001 From: Jiapeng Chong Date: Tue, 9 Mar 2021 18:11:09 +0800 Subject: perf machine: Assign boolean values to a bool variable Fix the following coccicheck warnings: ./tools/perf/util/machine.c:2041:9-10: WARNING: return of 0/1 in function 'symbol__match_regex' with return type bool. Reported-by: Abaci Robot Signed-off-by: Jiapeng Chong Cc: Alexander Shishkin Cc: Alexei Starovoitov Cc: Andrii Nakryiko Cc: Daniel Borkmann Cc: Jiri Olsa Cc: John Fastabend Cc: KP Singh Cc: Mark Rutland Cc: Martin KaFai Lau Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Song Liu Cc: Yonghong Song Cc: bpf@vger.kernel.org Cc: netdev@vger.kernel.org Link: http://lore.kernel.org/lkml/1615284669-82139-1-git-send-email-jiapeng.chong@linux.alibaba.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/machine.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index b5c2d8be4144..435771eed62b 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -2038,8 +2038,8 @@ int machine__process_event(struct machine *machine, union perf_event *event, static bool symbol__match_regex(struct symbol *sym, regex_t *regex) { if (!regexec(regex, sym->name, 0, NULL, 0)) - return 1; - return 0; + return true; + return false; } static void ip__resolve_ams(struct thread *thread, -- cgit v1.2.3 From a7672d1df5737009bd5339b80da429da7ceb9964 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 15 Mar 2021 10:13:22 -0300 Subject: perf evlist: Change the COMM when preparing the workload It was reported that --exclude-perf wasn't working, as tracepoints were appearing in 'perf script' output as having the 'perf' COMM, that is just the window in evlist__prepare_workload() after the fork() and before the execvp() call for workloads specified in the command line. Example: # perf record -e kmem:kmalloc --filter 'bytes_alloc<650 && bytes_alloc>620' --exclude-perf -e kmem:kfree --exclude-perf -aR sleep 30 Then: # perf script perf 15905 [009] 1498.356094: kmem:kfree: call_site=perf_event_mmap+0x279 ptr=(nil) perf 15905 [009] 1498.356116: kmem:kfree: call_site=free_bprm+0x8f ptr=(nil) perf 15905 [009] 1498.356116: kmem:kfree: call_site=do_execveat_common+0x19d ptr=0xffff9cf750421c00 perf 15905 [009] 1498.356138: kmem:kfree: call_site=perf_event_mmap+0x279 ptr=(nil) perf 15905 [009] 1498.356148: kmem:kfree: call_site=free_bprm+0x8f ptr=(nil) perf 15905 [009] 1498.356148: kmem:kfree: call_site=do_execveat_common+0x19d ptr=0xffff9cf750421c00 perf 15905 [009] 1498.356168: kmem:kfree: call_site=perf_event_mmap+0x279 ptr=(nil) perf 15905 [009] 1498.356176: kmem:kfree: call_site=free_bprm+0x8f ptr=(nil) perf 15905 [009] 1498.356348: kmem:kfree: call_site=perf_event_mmap+0x279 ptr=(nil) perf 15905 [014] 1498.356386: kmem:kfree: call_site=security_compute_sid.part.0+0x3b2 ptr=(nil) perf 15905 [014] 1498.356423: kmem:kfree: call_site=load_elf_binary+0x207 ptr=0xffff9cf5b2a34220 perf 15905 [014] 1498.356694: kmem:kfree: call_site=__free_slab+0xb5 ptr=0xffff9cf6d0b3b000 sleep 15905 [014] 1498.356739: kmem:kfree: call_site=perf_event_mmap+0x279 ptr=(nil) Use prctl() to show that that is just the preparation of the workload: # perf script perf-exec 19036 [009] 2199.357582: kmem:kfree: call_site=perf_event_mmap+0x279 ptr=(nil) perf-exec 19036 [009] 2199.357604: kmem:kfree: call_site=free_bprm+0x8f ptr=(nil) perf-exec 19036 [009] 2199.357604: kmem:kfree: call_site=do_execveat_common+0x19d ptr=0xffff9cf786459800 perf-exec 19036 [009] 2199.357630: kmem:kfree: call_site=perf_event_mmap+0x279 ptr=(nil) perf-exec 19036 [000] 2199.358277: kmem:kfree: call_site=__free_slab+0xb5 ptr=0xffff9cf786fb9c00 perf-exec 19036 [000] 2199.358278: kmem:kfree: call_site=__free_slab+0xb5 ptr=0xffff9cf786458200 perf-exec 19036 [000] 2199.358279: kmem:kfree: call_site=__free_slab+0xb5 ptr=0xffff9cf786458600 sleep 19036 [000] 2199.358316: kmem:kfree: call_site=perf_event_mmap+0x279 ptr=(nil) sleep 19036 [000] 2199.358323: kmem:kfree: call_site=perf_event_mmap+0x279 ptr=(nil) sleep 19036 [000] 2199.358330: kmem:kfree: call_site=perf_event_mmap+0x279 ptr=0xffff9cf58be2d000 sleep 19036 [000] 2199.358337: kmem:kfree: call_site=perf_event_mmap+0x279 ptr=0xffff9cf58be2d000 sleep 19036 [000] 2199.358339: kmem:kfree: call_site=perf_event_mmap+0x279 ptr=0xffff9cf58be2d000 sleep 19036 [000] 2199.358341: kmem:kfree: call_site=perf_event_mmap+0x279 ptr=0xffff9cf58be2d000 Reporter: zhanweiw Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=212213 Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/evlist.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index 882cd1f721d9..435bbfd00551 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -36,6 +36,7 @@ #include #include #include +#include #include #include @@ -1405,6 +1406,13 @@ int evlist__prepare_workload(struct evlist *evlist, struct target *target, const close(go_pipe[1]); fcntl(go_pipe[0], F_SETFD, FD_CLOEXEC); + /* + * Change the name of this process not to confuse --exclude-perf users + * that sees 'perf' in the window up to the execvp() and thinks that + * perf samples are not being excluded. + */ + prctl(PR_SET_NAME, "perf-exec"); + /* * Tell the parent we're ready to go */ -- cgit v1.2.3 From 8efd1634542d9255023d7c2ec2194e8908450b12 Mon Sep 17 00:00:00 2001 From: Shunsuke Nakamura Date: Mon, 8 Mar 2021 19:53:40 +0900 Subject: perf vendor events arm64: Add more common and uarch events Add the following events.[1] Common architectural events: - L2I_TLB_REFILL - L2I_TLB - SIMD_INST_RETIRED - SVE_INST_RETIRED Common microarchitectural events: - UOP_SPEC - SVE_MATH_SPEC - FP_SPEC - FP_FMA_SPEC - FP_RECPE_SPEC - FP_CVT_SPEC - ASE_SVE_INT_SPEC - SVE_PRED_SPEC - SVE_MOVPRFX_SPEC - SVE_MOVPRFX_U_SPEC - ASE_SVE_LD_SPEC - ASE_SVE_ST_SPEC - PRF_SPEC - BASE_LD_REG_SPEC - BASE_ST_REG_SPEC - SVE_LDR_REG_SPEC - SVE_STR_REG_SPEC - SVE_LDR_PREG_SPEC - SVE_STR_PREG_SPEC - SVE_PRF_CONTIG_SPEC - ASE_SVE_LD_MULTI_SPEC - ASE_SVE_ST_MULTI_SPEC - SVE_LD_GATHER_SPEC - SVE_ST_SCATTER_SPEC - SVE_PRF_GATHER_SPEC - SVE_LDFF_SPEC - FP_SCALE_OPS_SPEC - FP_FIXED_OPS_SPEC - FP_HP_SCALE_OPS_SPEC - FP_HP_FIXED_OPS_SPEC - FP_SP_SCALE_OPS_SPEC - FP_SP_FIXED_OPS_SPEC - FP_DP_SCALE_OPS_SPEC - FP_DP_FIXED_OPS_SPEC Reference document is at the following: [1] https://github.com/fujitsu/A64FX/blob/master/doc/A64FX_PMU_Events_v1.2.pdf Signed-off-by: Nakamura, Shunsuke/中村 俊介 Reviewed-by: John Garry Tested-by: Masayoshi Mizuma Cc: Alexander Shishkin Cc: Jiri Olsa Cc: Leo Yan Cc: Mark Rutland Cc: Mathieu Poirier Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Will Deacon Cc: linux-arm-kernel@lists.infradead.org Link: http://lore.kernel.org/lkml/20210308105342.746940-2-nakamura.shun@fujitsu.com Signed-off-by: Arnaldo Carvalho de Melo --- .../arch/arm64/armv8-common-and-microarch.json | 228 +++++++++++++++++++++ 1 file changed, 228 insertions(+) diff --git a/tools/perf/pmu-events/arch/arm64/armv8-common-and-microarch.json b/tools/perf/pmu-events/arch/arm64/armv8-common-and-microarch.json index 75376c7cc072..913fb200ea52 100644 --- a/tools/perf/pmu-events/arch/arm64/armv8-common-and-microarch.json +++ b/tools/perf/pmu-events/arch/arm64/armv8-common-and-microarch.json @@ -209,12 +209,24 @@ "EventName": "L2D_TLB_REFILL", "BriefDescription": "Attributable Level 2 data TLB refill" }, + { + "PublicDescription": "Attributable Level 2 instruction TLB refill.", + "EventCode": "0x2E", + "EventName": "L2I_TLB_REFILL", + "BriefDescription": "Attributable Level 2 instruction TLB refill." + }, { "PublicDescription": "Attributable Level 2 data or unified TLB access", "EventCode": "0x2F", "EventName": "L2D_TLB", "BriefDescription": "Attributable Level 2 data or unified TLB access" }, + { + "PublicDescription": "Attributable Level 2 instruction TLB access.", + "EventCode": "0x30", + "EventName": "L2I_TLB", + "BriefDescription": "Attributable Level 2 instruction TLB access." + }, { "PublicDescription": "Access to another socket in a multi-socket system", "EventCode": "0x31", @@ -244,5 +256,221 @@ "EventCode": "0x37", "EventName": "LL_CACHE_MISS_RD", "BriefDescription": "Last level cache miss, read" + }, + { + "PublicDescription": "SIMD Instruction architecturally executed.", + "EventCode": "0x8000", + "EventName": "SIMD_INST_RETIRED", + "BriefDescription": "SIMD Instruction architecturally executed." + }, + { + "PublicDescription": "Instruction architecturally executed, SVE.", + "EventCode": "0x8002", + "EventName": "SVE_INST_RETIRED", + "BriefDescription": "Instruction architecturally executed, SVE." + }, + { + "PublicDescription": "Microarchitectural operation, Operations speculatively executed.", + "EventCode": "0x8008", + "EventName": "UOP_SPEC", + "BriefDescription": "Microarchitectural operation, Operations speculatively executed." + }, + { + "PublicDescription": "SVE Math accelerator Operations speculatively executed.", + "EventCode": "0x800E", + "EventName": "SVE_MATH_SPEC", + "BriefDescription": "SVE Math accelerator Operations speculatively executed." + }, + { + "PublicDescription": "Floating-point Operations speculatively executed.", + "EventCode": "0x8010", + "EventName": "FP_SPEC", + "BriefDescription": "Floating-point Operations speculatively executed." + }, + { + "PublicDescription": "Floating-point FMA Operations speculatively executed.", + "EventCode": "0x8028", + "EventName": "FP_FMA_SPEC", + "BriefDescription": "Floating-point FMA Operations speculatively executed." + }, + { + "PublicDescription": "Floating-point reciprocal estimate Operations speculatively executed.", + "EventCode": "0x8034", + "EventName": "FP_RECPE_SPEC", + "BriefDescription": "Floating-point reciprocal estimate Operations speculatively executed." + }, + { + "PublicDescription": "floating-point convert Operations speculatively executed.", + "EventCode": "0x8038", + "EventName": "FP_CVT_SPEC", + "BriefDescription": "floating-point convert Operations speculatively executed." + }, + { + "PublicDescription": "Advanced SIMD and SVE integer Operations speculatively executed.", + "EventCode": "0x8043", + "EventName": "ASE_SVE_INT_SPEC", + "BriefDescription": "Advanced SIMD and SVE integer Operations speculatively executed." + }, + { + "PublicDescription": "SVE predicated Operations speculatively executed.", + "EventCode": "0x8074", + "EventName": "SVE_PRED_SPEC", + "BriefDescription": "SVE predicated Operations speculatively executed." + }, + { + "PublicDescription": "SVE MOVPRFX Operations speculatively executed.", + "EventCode": "0x807C", + "EventName": "SVE_MOVPRFX_SPEC", + "BriefDescription": "SVE MOVPRFX Operations speculatively executed." + }, + { + "PublicDescription": "SVE MOVPRFX unfused Operations speculatively executed.", + "EventCode": "0x807F", + "EventName": "SVE_MOVPRFX_U_SPEC", + "BriefDescription": "SVE MOVPRFX unfused Operations speculatively executed." + }, + { + "PublicDescription": "Advanced SIMD and SVE load Operations speculatively executed.", + "EventCode": "0x8085", + "EventName": "ASE_SVE_LD_SPEC", + "BriefDescription": "Advanced SIMD and SVE load Operations speculatively executed." + }, + { + "PublicDescription": "Advanced SIMD and SVE store Operations speculatively executed.", + "EventCode": "0x8086", + "EventName": "ASE_SVE_ST_SPEC", + "BriefDescription": "Advanced SIMD and SVE store Operations speculatively executed." + }, + { + "PublicDescription": "Prefetch Operations speculatively executed.", + "EventCode": "0x8087", + "EventName": "PRF_SPEC", + "BriefDescription": "Prefetch Operations speculatively executed." + }, + { + "PublicDescription": "General-purpose register load Operations speculatively executed.", + "EventCode": "0x8089", + "EventName": "BASE_LD_REG_SPEC", + "BriefDescription": "General-purpose register load Operations speculatively executed." + }, + { + "PublicDescription": "General-purpose register store Operations speculatively executed.", + "EventCode": "0x808A", + "EventName": "BASE_ST_REG_SPEC", + "BriefDescription": "General-purpose register store Operations speculatively executed." + }, + { + "PublicDescription": "SVE unpredicated load register Operations speculatively executed.", + "EventCode": "0x8091", + "EventName": "SVE_LDR_REG_SPEC", + "BriefDescription": "SVE unpredicated load register Operations speculatively executed." + }, + { + "PublicDescription": "SVE unpredicated store register Operations speculatively executed.", + "EventCode": "0x8092", + "EventName": "SVE_STR_REG_SPEC", + "BriefDescription": "SVE unpredicated store register Operations speculatively executed." + }, + { + "PublicDescription": "SVE load predicate register Operations speculatively executed.", + "EventCode": "0x8095", + "EventName": "SVE_LDR_PREG_SPEC", + "BriefDescription": "SVE load predicate register Operations speculatively executed." + }, + { + "PublicDescription": "SVE store predicate register Operations speculatively executed.", + "EventCode": "0x8096", + "EventName": "SVE_STR_PREG_SPEC", + "BriefDescription": "SVE store predicate register Operations speculatively executed." + }, + { + "PublicDescription": "SVE contiguous prefetch element Operations speculatively executed.", + "EventCode": "0x809F", + "EventName": "SVE_PRF_CONTIG_SPEC", + "BriefDescription": "SVE contiguous prefetch element Operations speculatively executed." + }, + { + "PublicDescription": "Advanced SIMD and SVE contiguous load multiple vector Operations speculatively executed.", + "EventCode": "0x80A5", + "EventName": "ASE_SVE_LD_MULTI_SPEC", + "BriefDescription": "Advanced SIMD and SVE contiguous load multiple vector Operations speculatively executed." + }, + { + "PublicDescription": "Advanced SIMD and SVE contiguous store multiple vector Operations speculatively executed.", + "EventCode": "0x80A6", + "EventName": "ASE_SVE_ST_MULTI_SPEC", + "BriefDescription": "Advanced SIMD and SVE contiguous store multiple vector Operations speculatively executed." + }, + { + "PublicDescription": "SVE gather-load Operations speculatively executed.", + "EventCode": "0x80AD", + "EventName": "SVE_LD_GATHER_SPEC", + "BriefDescription": "SVE gather-load Operations speculatively executed." + }, + { + "PublicDescription": "SVE scatter-store Operations speculatively executed.", + "EventCode": "0x80AE", + "EventName": "SVE_ST_SCATTER_SPEC", + "BriefDescription": "SVE scatter-store Operations speculatively executed." + }, + { + "PublicDescription": "SVE gather-prefetch Operations speculatively executed.", + "EventCode": "0x80AF", + "EventName": "SVE_PRF_GATHER_SPEC", + "BriefDescription": "SVE gather-prefetch Operations speculatively executed." + }, + { + "PublicDescription": "SVE First-fault load Operations speculatively executed.", + "EventCode": "0x80BC", + "EventName": "SVE_LDFF_SPEC", + "BriefDescription": "SVE First-fault load Operations speculatively executed." + }, + { + "PublicDescription": "Scalable floating-point element Operations speculatively executed.", + "EventCode": "0x80C0", + "EventName": "FP_SCALE_OPS_SPEC", + "BriefDescription": "Scalable floating-point element Operations speculatively executed." + }, + { + "PublicDescription": "Non-scalable floating-point element Operations speculatively executed.", + "EventCode": "0x80C1", + "EventName": "FP_FIXED_OPS_SPEC", + "BriefDescription": "Non-scalable floating-point element Operations speculatively executed." + }, + { + "PublicDescription": "Scalable half-precision floating-point element Operations speculatively executed.", + "EventCode": "0x80C2", + "EventName": "FP_HP_SCALE_OPS_SPEC", + "BriefDescription": "Scalable half-precision floating-point element Operations speculatively executed." + }, + { + "PublicDescription": "Non-scalable half-precision floating-point element Operations speculatively executed.", + "EventCode": "0x80C3", + "EventName": "FP_HP_FIXED_OPS_SPEC", + "BriefDescription": "Non-scalable half-precision floating-point element Operations speculatively executed." + }, + { + "PublicDescription": "Scalable single-precision floating-point element Operations speculatively executed.", + "EventCode": "0x80C4", + "EventName": "FP_SP_SCALE_OPS_SPEC", + "BriefDescription": "Scalable single-precision floating-point element Operations speculatively executed." + }, + { + "PublicDescription": "Non-scalable single-precision floating-point element Operations speculatively executed.", + "EventCode": "0x80C5", + "EventName": "FP_SP_FIXED_OPS_SPEC", + "BriefDescription": "Non-scalable single-precision floating-point element Operations speculatively executed." + }, + { + "PublicDescription": "Scalable double-precision floating-point element Operations speculatively executed.", + "EventCode": "0x80C6", + "EventName": "FP_DP_SCALE_OPS_SPEC", + "BriefDescription": "Scalable double-precision floating-point element Operations speculatively executed." + }, + { + "PublicDescription": "Non-scalable double-precision floating-point element Operations speculatively executed.", + "EventCode": "0x80C7", + "EventName": "FP_DP_FIXED_OPS_SPEC", + "BriefDescription": "Non-scalable double-precision floating-point element Operations speculatively executed." } ] -- cgit v1.2.3 From 5497b23e870c45486e8caf1116ccbb592443bff3 Mon Sep 17 00:00:00 2001 From: Shunsuke Nakamura Date: Mon, 8 Mar 2021 19:53:41 +0900 Subject: perf vendor events arm64: Add Fujitsu A64FX pmu event Add pmu events for A64FX. Documentation source: https://github.com/fujitsu/A64FX/blob/master/doc/A64FX_PMU_Events_v1.2.pdf Signed-off-by: Nakamura, Shunsuke/中村 俊介 Reviewed-by: John Garry Tested-by: Masayoshi Mizuma Cc: Alexander Shishkin Cc: Jiri Olsa Cc: Leo Yan Cc: Mark Rutland Cc: Mathieu Poirier Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Will Deacon Cc: linux-arm-kernel@lists.infradead.org Link: http://lore.kernel.org/lkml/20210308105342.746940-3-nakamura.shun@fujitsu.com Signed-off-by: Arnaldo Carvalho de Melo --- .../arch/arm64/fujitsu/a64fx/branch.json | 8 + .../pmu-events/arch/arm64/fujitsu/a64fx/bus.json | 62 +++++++ .../pmu-events/arch/arm64/fujitsu/a64fx/cache.json | 128 ++++++++++++++ .../pmu-events/arch/arm64/fujitsu/a64fx/cycle.json | 5 + .../arch/arm64/fujitsu/a64fx/exception.json | 29 +++ .../arch/arm64/fujitsu/a64fx/instruction.json | 131 ++++++++++++++ .../arch/arm64/fujitsu/a64fx/memory.json | 8 + .../pmu-events/arch/arm64/fujitsu/a64fx/other.json | 188 ++++++++++++++++++++ .../arch/arm64/fujitsu/a64fx/pipeline.json | 194 +++++++++++++++++++++ .../pmu-events/arch/arm64/fujitsu/a64fx/sve.json | 110 ++++++++++++ tools/perf/pmu-events/arch/arm64/mapfile.csv | 1 + 11 files changed, 864 insertions(+) create mode 100644 tools/perf/pmu-events/arch/arm64/fujitsu/a64fx/branch.json create mode 100644 tools/perf/pmu-events/arch/arm64/fujitsu/a64fx/bus.json create mode 100644 tools/perf/pmu-events/arch/arm64/fujitsu/a64fx/cache.json create mode 100644 tools/perf/pmu-events/arch/arm64/fujitsu/a64fx/cycle.json create mode 100644 tools/perf/pmu-events/arch/arm64/fujitsu/a64fx/exception.json create mode 100644 tools/perf/pmu-events/arch/arm64/fujitsu/a64fx/instruction.json create mode 100644 tools/perf/pmu-events/arch/arm64/fujitsu/a64fx/memory.json create mode 100644 tools/perf/pmu-events/arch/arm64/fujitsu/a64fx/other.json create mode 100644 tools/perf/pmu-events/arch/arm64/fujitsu/a64fx/pipeline.json create mode 100644 tools/perf/pmu-events/arch/arm64/fujitsu/a64fx/sve.json diff --git a/tools/perf/pmu-events/arch/arm64/fujitsu/a64fx/branch.json b/tools/perf/pmu-events/arch/arm64/fujitsu/a64fx/branch.json new file mode 100644 index 000000000000..b011af11bf94 --- /dev/null +++ b/tools/perf/pmu-events/arch/arm64/fujitsu/a64fx/branch.json @@ -0,0 +1,8 @@ +[ + { + "ArchStdEvent": "BR_MIS_PRED" + }, + { + "ArchStdEvent": "BR_PRED" + } +] diff --git a/tools/perf/pmu-events/arch/arm64/fujitsu/a64fx/bus.json b/tools/perf/pmu-events/arch/arm64/fujitsu/a64fx/bus.json new file mode 100644 index 000000000000..084e88d7df73 --- /dev/null +++ b/tools/perf/pmu-events/arch/arm64/fujitsu/a64fx/bus.json @@ -0,0 +1,62 @@ +[ + { + "PublicDescription": "This event counts read transactions from tofu controller to measured CMG.", + "EventCode": "0x314", + "EventName": "BUS_READ_TOTAL_TOFU", + "BriefDescription": "This event counts read transactions from tofu controller to measured CMG." + }, + { + "PublicDescription": "This event counts read transactions from PCI controller to measured CMG.", + "EventCode": "0x315", + "EventName": "BUS_READ_TOTAL_PCI", + "BriefDescription": "This event counts read transactions from PCI controller to measured CMG." + }, + { + "PublicDescription": "This event counts read transactions from measured CMG local memory to measured CMG.", + "EventCode": "0x316", + "EventName": "BUS_READ_TOTAL_MEM", + "BriefDescription": "This event counts read transactions from measured CMG local memory to measured CMG." + }, + { + "PublicDescription": "This event counts write transactions from measured CMG to CMG0, if measured CMG is not CMG0.", + "EventCode": "0x318", + "EventName": "BUS_WRITE_TOTAL_CMG0", + "BriefDescription": "This event counts write transactions from measured CMG to CMG0, if measured CMG is not CMG0." + }, + { + "PublicDescription": "This event counts write transactions from measured CMG to CMG1, if measured CMG is not CMG1.", + "EventCode": "0x319", + "EventName": "BUS_WRITE_TOTAL_CMG1", + "BriefDescription": "This event counts write transactions from measured CMG to CMG1, if measured CMG is not CMG1." + }, + { + "PublicDescription": "This event counts write transactions from measured CMG to CMG2, if measured CMG is not CMG2.", + "EventCode": "0x31A", + "EventName": "BUS_WRITE_TOTAL_CMG2", + "BriefDescription": "This event counts write transactions from measured CMG to CMG2, if measured CMG is not CMG2." + }, + { + "PublicDescription": "This event counts write transactions from measured CMG to CMG3, if measured CMG is not CMG3.", + "EventCode": "0x31B", + "EventName": "BUS_WRITE_TOTAL_CMG3", + "BriefDescription": "This event counts write transactions from measured CMG to CMG3, if measured CMG is not CMG3." + }, + { + "PublicDescription": "This event counts write transactions from measured CMG to tofu controller.", + "EventCode": "0x31C", + "EventName": "BUS_WRITE_TOTAL_TOFU", + "BriefDescription": "This event counts write transactions from measured CMG to tofu controller." + }, + { + "PublicDescription": "This event counts write transactions from measured CMG to PCI controller.", + "EventCode": "0x31D", + "EventName": "BUS_WRITE_TOTAL_PCI", + "BriefDescription": "This event counts write transactions from measured CMG to PCI controller." + }, + { + "PublicDescription": "This event counts write transactions from measured CMG to measured CMG local memory.", + "EventCode": "0x31E", + "EventName": "BUS_WRITE_TOTAL_MEM", + "BriefDescription": "This event counts write transactions from measured CMG to measured CMG local memory." + } +] diff --git a/tools/perf/pmu-events/arch/arm64/fujitsu/a64fx/cache.json b/tools/perf/pmu-events/arch/arm64/fujitsu/a64fx/cache.json new file mode 100644 index 000000000000..2e341a951a10 --- /dev/null +++ b/tools/perf/pmu-events/arch/arm64/fujitsu/a64fx/cache.json @@ -0,0 +1,128 @@ +[ + { + "ArchStdEvent": "L1I_CACHE_REFILL" + }, + { + "ArchStdEvent": "L1I_TLB_REFILL" + }, + { + "ArchStdEvent": "L1D_CACHE_REFILL" + }, + { + "ArchStdEvent": "L1D_CACHE" + }, + { + "ArchStdEvent": "L1D_TLB_REFILL" + }, + { + "ArchStdEvent": "L1I_CACHE" + }, + { + "ArchStdEvent": "L1D_CACHE_WB" + }, + { + "ArchStdEvent": "L2D_CACHE" + }, + { + "ArchStdEvent": "L2D_CACHE_REFILL" + }, + { + "ArchStdEvent": "L2D_CACHE_WB" + }, + { + "ArchStdEvent": "L2D_TLB_REFILL" + }, + { + "ArchStdEvent": "L2I_TLB_REFILL" + }, + { + "ArchStdEvent": "L2D_TLB" + }, + { + "ArchStdEvent": "L2I_TLB" + }, + { + "PublicDescription": "This event counts L1D_CACHE_REFILL caused by software or hardware prefetch.", + "EventCode": "0x49", + "EventName": "L1D_CACHE_REFILL_PRF", + "BriefDescription": "This event counts L1D_CACHE_REFILL caused by software or hardware prefetch." + }, + { + "PublicDescription": "This event counts L2D_CACHE_REFILL caused by software or hardware prefetch.", + "EventCode": "0x59", + "EventName": "L2D_CACHE_REFILL_PRF", + "BriefDescription": "This event counts L2D_CACHE_REFILL caused by software or hardware prefetch." + }, + { + "PublicDescription": "This event counts L1D_CACHE_REFILL caused by demand access.", + "EventCode": "0x200", + "EventName": "L1D_CACHE_REFILL_DM", + "BriefDescription": "This event counts L1D_CACHE_REFILL caused by demand access." + }, + { + "PublicDescription": "This event counts L1D_CACHE_REFILL caused by hardware prefetch.", + "EventCode": "0x202", + "EventName": "L1D_CACHE_REFILL_HWPRF", + "BriefDescription": "This event counts L1D_CACHE_REFILL caused by hardware prefetch." + }, + { + "PublicDescription": "This event counts outstanding L1D cache miss requests per cycle.", + "EventCode": "0x208", + "EventName": "L1_MISS_WAIT", + "BriefDescription": "This event counts outstanding L1D cache miss requests per cycle." + }, + { + "PublicDescription": "This event counts outstanding L1I cache miss requests per cycle.", + "EventCode": "0x209", + "EventName": "L1I_MISS_WAIT", + "BriefDescription": "This event counts outstanding L1I cache miss requests per cycle." + }, + { + "PublicDescription": "This event counts L2D_CACHE_REFILL caused by demand access.", + "EventCode": "0x300", + "EventName": "L2D_CACHE_REFILL_DM", + "BriefDescription": "This event counts L2D_CACHE_REFILL caused by demand access." + }, + { + "PublicDescription": "This event counts L2D_CACHE_REFILL caused by hardware prefetch.", + "EventCode": "0x302", + "EventName": "L2D_CACHE_REFILL_HWPRF", + "BriefDescription": "This event counts L2D_CACHE_REFILL caused by hardware prefetch." + }, + { + "PublicDescription": "This event counts outstanding L2 cache miss requests per cycle.", + "EventCode": "0x308", + "EventName": "L2_MISS_WAIT", + "BriefDescription": "This event counts outstanding L2 cache miss requests per cycle." + }, + { + "PublicDescription": "This event counts the number of times of L2 cache miss.", + "EventCode": "0x309", + "EventName": "L2_MISS_COUNT", + "BriefDescription": "This event counts the number of times of L2 cache miss." + }, + { + "PublicDescription": "This event counts operations where demand access hits an L2 cache refill buffer allocated by software or hardware prefetch.", + "EventCode": "0x325", + "EventName": "L2D_SWAP_DM", + "BriefDescription": "This event counts operations where demand access hits an L2 cache refill buffer allocated by software or hardware prefetch." + }, + { + "PublicDescription": "This event counts operations where software or hardware prefetch hits an L2 cache refill buffer allocated by demand access.", + "EventCode": "0x326", + "EventName": "L2D_CACHE_MIBMCH_PRF", + "BriefDescription": "This event counts operations where software or hardware prefetch hits an L2 cache refill buffer allocated by demand access." + }, + { + "PublicDescription": "This event counts operations where demand access hits an L2 cache refill buffer allocated by software or hardware prefetch.", + "EventCode": "0x396", + "EventName": "L2D_CACHE_SWAP_LOCAL", + "BriefDescription": "This event counts operations where demand access hits an L2 cache refill buffer allocated by software or hardware prefetch." + }, + { + "PublicDescription": "This event counts energy consumption per cycle of L2 cache.", + "EventCode": "0x3E0", + "EventName": "EA_L2", + "BriefDescription": "This event counts energy consumption per cycle of L2 cache." + } +] diff --git a/tools/perf/pmu-events/arch/arm64/fujitsu/a64fx/cycle.json b/tools/perf/pmu-events/arch/arm64/fujitsu/a64fx/cycle.json new file mode 100644 index 000000000000..b16484628290 --- /dev/null +++ b/tools/perf/pmu-events/arch/arm64/fujitsu/a64fx/cycle.json @@ -0,0 +1,5 @@ +[ + { + "ArchStdEvent": "CPU_CYCLES" + } +] diff --git a/tools/perf/pmu-events/arch/arm64/fujitsu/a64fx/exception.json b/tools/perf/pmu-events/arch/arm64/fujitsu/a64fx/exception.json new file mode 100644 index 000000000000..348749c154c0 --- /dev/null +++ b/tools/perf/pmu-events/arch/arm64/fujitsu/a64fx/exception.json @@ -0,0 +1,29 @@ +[ + { + "ArchStdEvent": "EXC_TAKEN" + }, + { + "ArchStdEvent": "EXC_UNDEF" + }, + { + "ArchStdEvent": "EXC_SVC" + }, + { + "ArchStdEvent": "EXC_PABORT" + }, + { + "ArchStdEvent": "EXC_DABORT" + }, + { + "ArchStdEvent": "EXC_IRQ" + }, + { + "ArchStdEvent": "EXC_FIQ" + }, + { + "ArchStdEvent": "EXC_SMC" + }, + { + "ArchStdEvent": "EXC_HVC" + } +] diff --git a/tools/perf/pmu-events/arch/arm64/fujitsu/a64fx/instruction.json b/tools/perf/pmu-events/arch/arm64/fujitsu/a64fx/instruction.json new file mode 100644 index 000000000000..6d258b1080cf --- /dev/null +++ b/tools/perf/pmu-events/arch/arm64/fujitsu/a64fx/instruction.json @@ -0,0 +1,131 @@ +[ + { + "ArchStdEvent": "SW_INCR" + }, + { + "ArchStdEvent": "INST_RETIRED" + }, + { + "ArchStdEvent": "EXC_RETURN" + }, + { + "ArchStdEvent": "CID_WRITE_RETIRED" + }, + { + "ArchStdEvent": "INST_SPEC" + }, + { + "ArchStdEvent": "LDREX_SPEC" + }, + { + "ArchStdEvent": "STREX_SPEC" + }, + { + "ArchStdEvent": "LD_SPEC" + }, + { + "ArchStdEvent": "ST_SPEC" + }, + { + "ArchStdEvent": "LDST_SPEC" + }, + { + "ArchStdEvent": "DP_SPEC" + }, + { + "ArchStdEvent": "ASE_SPEC" + }, + { + "ArchStdEvent": "VFP_SPEC" + }, + { + "ArchStdEvent": "PC_WRITE_SPEC" + }, + { + "ArchStdEvent": "CRYPTO_SPEC" + }, + { + "ArchStdEvent": "BR_IMMED_SPEC" + }, + { + "ArchStdEvent": "BR_RETURN_SPEC" + }, + { + "ArchStdEvent": "BR_INDIRECT_SPEC" + }, + { + "ArchStdEvent": "ISB_SPEC" + }, + { + "ArchStdEvent": "DSB_SPEC" + }, + { + "ArchStdEvent": "DMB_SPEC" + }, + { + "PublicDescription": "This event counts architecturally executed zero blocking operations due to the 'DC ZVA' instruction.", + "EventCode": "0x9F", + "EventName": "DCZVA_SPEC", + "BriefDescription": "This event counts architecturally executed zero blocking operations due to the 'DC ZVA' instruction." + }, + { + "PublicDescription": "This event counts architecturally executed floating-point move operations.", + "EventCode": "0x105", + "EventName": "FP_MV_SPEC", + "BriefDescription": "This event counts architecturally executed floating-point move operations." + }, + { + "PublicDescription": "This event counts architecturally executed operations that using predicate register.", + "EventCode": "0x108", + "EventName": "PRD_SPEC", + "BriefDescription": "This event counts architecturally executed operations that using predicate register." + }, + { + "PublicDescription": "This event counts architecturally executed inter-element manipulation operations.", + "EventCode": "0x109", + "EventName": "IEL_SPEC", + "BriefDescription": "This event counts architecturally executed inter-element manipulation operations." + }, + { + "PublicDescription": "This event counts architecturally executed inter-register manipulation operations.", + "EventCode": "0x10A", + "EventName": "IREG_SPEC", + "BriefDescription": "This event counts architecturally executed inter-register manipulation operations." + }, + { + "PublicDescription": "This event counts architecturally executed NOSIMD load operations that using SIMD&FP registers.", + "EventCode": "0x112", + "EventName": "FP_LD_SPEC", + "BriefDescription": "This event counts architecturally executed NOSIMD load operations that using SIMD&FP registers." + }, + { + "PublicDescription": "This event counts architecturally executed NOSIMD store operations that using SIMD&FP registers.", + "EventCode": "0x113", + "EventName": "FP_ST_SPEC", + "BriefDescription": "This event counts architecturally executed NOSIMD store operations that using SIMD&FP registers." + }, + { + "PublicDescription": "This event counts architecturally executed SIMD broadcast floating-point load operations.", + "EventCode": "0x11A", + "EventName": "BC_LD_SPEC", + "BriefDescription": "This event counts architecturally executed SIMD broadcast floating-point load operations." + }, + { + "PublicDescription": "This event counts architecturally executed instructions, excluding the MOVPRFX instruction.", + "EventCode": "0x121", + "EventName": "EFFECTIVE_INST_SPEC", + "BriefDescription": "This event counts architecturally executed instructions, excluding the MOVPRFX instruction." + }, + { + "PublicDescription": "This event counts architecturally executed operations that uses 'pre-index' as its addressing mode.", + "EventCode": "0x123", + "EventName": "PRE_INDEX_SPEC", + "BriefDescription": "This event counts architecturally executed operations that uses 'pre-index' as its addressing mode." + }, + { + "PublicDescription": "This event counts architecturally executed operations that uses 'post-index' as its addressing mode.", + "EventCode": "0x124", + "EventName": "POST_INDEX_SPEC", + "BriefDescription": "This event counts architecturally executed operations that uses 'post-index' as its addressing mode." + } +] diff --git a/tools/perf/pmu-events/arch/arm64/fujitsu/a64fx/memory.json b/tools/perf/pmu-events/arch/arm64/fujitsu/a64fx/memory.json new file mode 100644 index 000000000000..c1f6479e92b4 --- /dev/null +++ b/tools/perf/pmu-events/arch/arm64/fujitsu/a64fx/memory.json @@ -0,0 +1,8 @@ +[ + { + "PublicDescription": "This event counts energy consumption per cycle of CMG local memory.", + "EventCode": "0x3E8", + "EventName": "EA_MEMORY", + "BriefDescription": "This event counts energy consumption per cycle of CMG local memory." + } +] diff --git a/tools/perf/pmu-events/arch/arm64/fujitsu/a64fx/other.json b/tools/perf/pmu-events/arch/arm64/fujitsu/a64fx/other.json new file mode 100644 index 000000000000..10c823ac26cc --- /dev/null +++ b/tools/perf/pmu-events/arch/arm64/fujitsu/a64fx/other.json @@ -0,0 +1,188 @@ +[ + { + "PublicDescription": "This event counts the occurrence count of the micro-operation split.", + "EventCode": "0x139", + "EventName": "UOP_SPLIT", + "BriefDescription": "This event counts the occurrence count of the micro-operation split." + }, + { + "PublicDescription": "This event counts every cycle that no operation was committed because the oldest and uncommitted load/store/prefetch operation waits for memory access.", + "EventCode": "0x180", + "EventName": "LD_COMP_WAIT_L2_MISS", + "BriefDescription": "This event counts every cycle that no operation was committed because the oldest and uncommitted load/store/prefetch operation waits for memory access." + }, + { + "PublicDescription": "This event counts every cycle that no instruction was committed because the oldest and uncommitted integer load operation waits for memory access.", + "EventCode": "0x181", + "EventName": "LD_COMP_WAIT_L2_MISS_EX", + "BriefDescription": "This event counts every cycle that no instruction was committed because the oldest and uncommitted integer load operation waits for memory access." + }, + { + "PublicDescription": "This event counts every cycle that no instruction was committed because the oldest and uncommitted load/store/prefetch operation waits for L2 cache access.", + "EventCode": "0x182", + "EventName": "LD_COMP_WAIT_L1_MISS", + "BriefDescription": "This event counts every cycle that no instruction was committed because the oldest and uncommitted load/store/prefetch operation waits for L2 cache access." + }, + { + "PublicDescription": "This event counts every cycle that no instruction was committed because the oldest and uncommitted integer load operation waits for L2 cache access.", + "EventCode": "0x183", + "EventName": "LD_COMP_WAIT_L1_MISS_EX", + "BriefDescription": "This event counts every cycle that no instruction was committed because the oldest and uncommitted integer load operation waits for L2 cache access." + }, + { + "PublicDescription": "This event counts every cycle that no instruction was committed because the oldest and uncommitted load/store/prefetch operation waits for L1D cache, L2 cache and memory access.", + "EventCode": "0x184", + "EventName": "LD_COMP_WAIT", + "BriefDescription": "This event counts every cycle that no instruction was committed because the oldest and uncommitted load/store/prefetch operation waits for L1D cache, L2 cache and memory access." + }, + { + "PublicDescription": "This event counts every cycle that no instruction was committed because the oldest and uncommitted integer load operation waits for L1D cache, L2 cache and memory access.", + "EventCode": "0x185", + "EventName": "LD_COMP_WAIT_EX", + "BriefDescription": "This event counts every cycle that no instruction was committed because the oldest and uncommitted integer load operation waits for L1D cache, L2 cache and memory access." + }, + { + "PublicDescription": "This event counts every cycle that no instruction was committed due to the lack of an available prefetch port.", + "EventCode": "0x186", + "EventName": "LD_COMP_WAIT_PFP_BUSY", + "BriefDescription": "This event counts every cycle that no instruction was committed due to the lack of an available prefetch port." + }, + { + "PublicDescription": "This event counts the LD_COMP_WAIT_PFP_BUSY caused by an integer load operation.", + "EventCode": "0x187", + "EventName": "LD_COMP_WAIT_PFP_BUSY_EX", + "BriefDescription": "This event counts the LD_COMP_WAIT_PFP_BUSY caused by an integer load operation." + }, + { + "PublicDescription": "This event counts the LD_COMP_WAIT_PFP_BUSY caused by a software prefetch instruction.", + "EventCode": "0x188", + "EventName": "LD_COMP_WAIT_PFP_BUSY_SWPF", + "BriefDescription": "This event counts the LD_COMP_WAIT_PFP_BUSY caused by a software prefetch instruction." + }, + { + "PublicDescription": "This event counts every cycle that no instruction was committed and the oldest and uncommitted instruction is an integer or floating-point/SIMD instruction.", + "EventCode": "0x189", + "EventName": "EU_COMP_WAIT", + "BriefDescription": "This event counts every cycle that no instruction was committed and the oldest and uncommitted instruction is an integer or floating-point/SIMD instruction." + }, + { + "PublicDescription": "This event counts every cycle that no instruction was committed and the oldest and uncommitted instruction is a floating-point/SIMD instruction.", + "EventCode": "0x18A", + "EventName": "FL_COMP_WAIT", + "BriefDescription": "This event counts every cycle that no instruction was committed and the oldest and uncommitted instruction is a floating-point/SIMD instruction." + }, + { + "PublicDescription": "This event counts every cycle that no instruction was committed and the oldest and uncommitted instruction is a branch instruction.", + "EventCode": "0x18B", + "EventName": "BR_COMP_WAIT", + "BriefDescription": "This event counts every cycle that no instruction was committed and the oldest and uncommitted instruction is a branch instruction." + }, + { + "PublicDescription": "This event counts every cycle that no instruction was committed because the CSE is empty.", + "EventCode": "0x18C", + "EventName": "ROB_EMPTY", + "BriefDescription": "This event counts every cycle that no instruction was committed because the CSE is empty." + }, + { + "PublicDescription": "This event counts every cycle that no instruction was committed because the CSE is empty and the store port (SP) is full.", + "EventCode": "0x18D", + "EventName": "ROB_EMPTY_STQ_BUSY", + "BriefDescription": "This event counts every cycle that no instruction was committed because the CSE is empty and the store port (SP) is full." + }, + { + "PublicDescription": "This event counts every cycle that the instruction unit is halted by the WFE/WFI instruction.", + "EventCode": "0x18E", + "EventName": "WFE_WFI_CYCLE", + "BriefDescription": "This event counts every cycle that the instruction unit is halted by the WFE/WFI instruction." + }, + { + "PublicDescription": "This event counts every cycle that no instruction was committed, but counts at the time when commits MOVPRFX only.", + "EventCode": "0x190", + "EventName": "_0INST_COMMIT", + "BriefDescription": "This event counts every cycle that no instruction was committed, but counts at the time when commits MOVPRFX only." + }, + { + "PublicDescription": "This event counts every cycle that one instruction is committed.", + "EventCode": "0x191", + "EventName": "_1INST_COMMIT", + "BriefDescription": "This event counts every cycle that one instruction is committed." + }, + { + "PublicDescription": "This event counts every cycle that two instructions are committed.", + "EventCode": "0x192", + "EventName": "_2INST_COMMIT", + "BriefDescription": "This event counts every cycle that two instructions are committed." + }, + { + "PublicDescription": "This event counts every cycle that three instructions are committed.", + "EventCode": "0x193", + "EventName": "_3INST_COMMIT", + "BriefDescription": "This event counts every cycle that three instructions are committed." + }, + { + "PublicDescription": "This event counts every cycle that four instructions are committed.", + "EventCode": "0x194", + "EventName": "_4INST_COMMIT", + "BriefDescription": "This event counts every cycle that four instructions are committed." + }, + { + "PublicDescription": "This event counts every cycle that only any micro-operations are committed.", + "EventCode": "0x198", + "EventName": "UOP_ONLY_COMMIT", + "BriefDescription": "This event counts every cycle that only any micro-operations are committed." + }, + { + "PublicDescription": "This event counts every cycle that only the MOVPRFX instruction is committed.", + "EventCode": "0x199", + "EventName": "SINGLE_MOVPRFX_COMMIT", + "BriefDescription": "This event counts every cycle that only the MOVPRFX instruction is committed." + }, + { + "PublicDescription": "This event counts energy consumption per cycle of core.", + "EventCode": "0x1E0", + "EventName": "EA_CORE", + "BriefDescription": "This event counts energy consumption per cycle of core." + }, + { + "PublicDescription": "This event counts streaming prefetch requests to L1D cache generated by hardware prefetcher.", + "EventCode": "0x230", + "EventName": "L1HWPF_STREAM_PF", + "BriefDescription": "This event counts streaming prefetch requests to L1D cache generated by hardware prefetcher." + }, + { + "PublicDescription": "This event counts allocation type prefetch injection requests to L1D cache generated by hardware prefetcher.", + "EventCode": "0x231", + "EventName": "L1HWPF_INJ_ALLOC_PF", + "BriefDescription": "This event counts allocation type prefetch injection requests to L1D cache generated by hardware prefetcher." + }, + { + "PublicDescription": "This event counts non-allocation type prefetch injection requests to L1D cache generated by hardware prefetcher.", + "EventCode": "0x232", + "EventName": "L1HWPF_INJ_NOALLOC_PF", + "BriefDescription": "This event counts non-allocation type prefetch injection requests to L1D cache generated by hardware prefetcher." + }, + { + "PublicDescription": "This event counts streaming prefetch requests to L2 cache generated by hardware prefecher.", + "EventCode": "0x233", + "EventName": "L2HWPF_STREAM_PF", + "BriefDescription": "This event counts streaming prefetch requests to L2 cache generated by hardware prefecher." + }, + { + "PublicDescription": "This event counts allocation type prefetch injection requests to L2 cache generated by hardware prefetcher.", + "EventCode": "0x234", + "EventName": "L2HWPF_INJ_ALLOC_PF", + "BriefDescription": "This event counts allocation type prefetch injection requests to L2 cache generated by hardware prefetcher." + }, + { + "PublicDescription": "This event counts non-allocation type prefetch injection requests to L2 cache generated by hardware prefetcher.", + "EventCode": "0x235", + "EventName": "L2HWPF_INJ_NOALLOC_PF", + "BriefDescription": "This event counts non-allocation type prefetch injection requests to L2 cache generated by hardware prefetcher." + }, + { + "PublicDescription": "This event counts prefetch requests to L2 cache generated by the other causes.", + "EventCode": "0x236", + "EventName": "L2HWPF_OTHER", + "BriefDescription": "This event counts prefetch requests to L2 cache generated by the other causes." + } +] diff --git a/tools/perf/pmu-events/arch/arm64/fujitsu/a64fx/pipeline.json b/tools/perf/pmu-events/arch/arm64/fujitsu/a64fx/pipeline.json new file mode 100644 index 000000000000..dd7c97a9972b --- /dev/null +++ b/tools/perf/pmu-events/arch/arm64/fujitsu/a64fx/pipeline.json @@ -0,0 +1,194 @@ +[ + { + "ArchStdEvent": "STALL_FRONTEND" + }, + { + "ArchStdEvent": "STALL_BACKEND" + }, + { + "PublicDescription": "This event counts valid cycles of EAGA pipeline.", + "EventCode": "0x1A0", + "EventName": "EAGA_VAL", + "BriefDescription": "This event counts valid cycles of EAGA pipeline." + }, + { + "PublicDescription": "This event counts valid cycles of EAGB pipeline.", + "EventCode": "0x1A1", + "EventName": "EAGB_VAL", + "BriefDescription": "This event counts valid cycles of EAGB pipeline." + }, + { + "PublicDescription": "This event counts valid cycles of EXA pipeline.", + "EventCode": "0x1A2", + "EventName": "EXA_VAL", + "BriefDescription": "This event counts valid cycles of EXA pipeline." + }, + { + "PublicDescription": "This event counts valid cycles of EXB pipeline.", + "EventCode": "0x1A3", + "EventName": "EXB_VAL", + "BriefDescription": "This event counts valid cycles of EXB pipeline." + }, + { + "PublicDescription": "This event counts valid cycles of FLA pipeline.", + "EventCode": "0x1A4", + "EventName": "FLA_VAL", + "BriefDescription": "This event counts valid cycles of FLA pipeline." + }, + { + "PublicDescription": "This event counts valid cycles of FLB pipeline.", + "EventCode": "0x1A5", + "EventName": "FLB_VAL", + "BriefDescription": "This event counts valid cycles of FLB pipeline." + }, + { + "PublicDescription": "This event counts valid cycles of PRX pipeline.", + "EventCode": "0x1A6", + "EventName": "PRX_VAL", + "BriefDescription": "This event counts valid cycles of PRX pipeline." + }, + { + "PublicDescription": "This event counts the number of 1's in the predicate bits of request in FLA pipeline, where it is corrected so that it becomes 16 when all bits are 1.", + "EventCode": "0x1B4", + "EventName": "FLA_VAL_PRD_CNT", + "BriefDescription": "This event counts the number of 1's in the predicate bits of request in FLA pipeline, where it is corrected so that it becomes 16 when all bits are 1." + }, + { + "PublicDescription": "This event counts the number of 1's in the predicate bits of request in FLB pipeline, where it is corrected so that it becomes 16 when all bits are 1.", + "EventCode": "0x1B5", + "EventName": "FLB_VAL_PRD_CNT", + "BriefDescription": "This event counts the number of 1's in the predicate bits of request in FLB pipeline, where it is corrected so that it becomes 16 when all bits are 1." + }, + { + "PublicDescription": "This event counts valid cycles of L1D cache pipeline#0.", + "EventCode": "0x240", + "EventName": "L1_PIPE0_VAL", + "BriefDescription": "This event counts valid cycles of L1D cache pipeline#0." + }, + { + "PublicDescription": "This event counts valid cycles of L1D cache pipeline#1.", + "EventCode": "0x241", + "EventName": "L1_PIPE1_VAL", + "BriefDescription": "This event counts valid cycles of L1D cache pipeline#1." + }, + { + "PublicDescription": "This event counts requests in L1D cache pipeline#0 that its sce bit of tagged address is 1.", + "EventCode": "0x250", + "EventName": "L1_PIPE0_VAL_IU_TAG_ADRS_SCE", + "BriefDescription": "This event counts requests in L1D cache pipeline#0 that its sce bit of tagged address is 1." + }, + { + "PublicDescription": "This event counts requests in L1D cache pipeline#0 that its pfe bit of tagged address is 1.", + "EventCode": "0x251", + "EventName": "L1_PIPE0_VAL_IU_TAG_ADRS_PFE", + "BriefDescription": "This event counts requests in L1D cache pipeline#0 that its pfe bit of tagged address is 1." + }, + { + "PublicDescription": "This event counts requests in L1D cache pipeline#1 that its sce bit of tagged address is 1.", + "EventCode": "0x252", + "EventName": "L1_PIPE1_VAL_IU_TAG_ADRS_SCE", + "BriefDescription": "This event counts requests in L1D cache pipeline#1 that its sce bit of tagged address is 1." + }, + { + "PublicDescription": "This event counts requests in L1D cache pipeline#1 that its pfe bit of tagged address is 1.", + "EventCode": "0x253", + "EventName": "L1_PIPE1_VAL_IU_TAG_ADRS_PFE", + "BriefDescription": "This event counts requests in L1D cache pipeline#1 that its pfe bit of tagged address is 1." + }, + { + "PublicDescription": "This event counts completed requests in L1D cache pipeline#0.", + "EventCode": "0x260", + "EventName": "L1_PIPE0_COMP", + "BriefDescription": "This event counts completed requests in L1D cache pipeline#0." + }, + { + "PublicDescription": "This event counts completed requests in L1D cache pipeline#1.", + "EventCode": "0x261", + "EventName": "L1_PIPE1_COMP", + "BriefDescription": "This event counts completed requests in L1D cache pipeline#1." + }, + { + "PublicDescription": "This event counts completed requests in L1I cache pipeline.", + "EventCode": "0x268", + "EventName": "L1I_PIPE_COMP", + "BriefDescription": "This event counts completed requests in L1I cache pipeline." + }, + { + "PublicDescription": "This event counts valid cycles of L1I cache pipeline.", + "EventCode": "0x269", + "EventName": "L1I_PIPE_VAL", + "BriefDescription": "This event counts valid cycles of L1I cache pipeline." + }, + { + "PublicDescription": "This event counts aborted requests in L1D pipelines that due to store-load interlock.", + "EventCode": "0x274", + "EventName": "L1_PIPE_ABORT_STLD_INTLK", + "BriefDescription": "This event counts aborted requests in L1D pipelines that due to store-load interlock." + }, + { + "PublicDescription": "This event counts requests in L1D cache pipeline#0 that its sector cache ID is not 0.", + "EventCode": "0x2A0", + "EventName": "L1_PIPE0_VAL_IU_NOT_SEC0", + "BriefDescription": "This event counts requests in L1D cache pipeline#0 that its sector cache ID is not 0." + }, + { + "PublicDescription": "This event counts requests in L1D cache pipeline#1 that its sector cache ID is not 0.", + "EventCode": "0x2A1", + "EventName": "L1_PIPE1_VAL_IU_NOT_SEC0", + "BriefDescription": "This event counts requests in L1D cache pipeline#1 that its sector cache ID is not 0." + }, + { + "PublicDescription": "This event counts the number of times where 2 elements of the gather instructions became 2 flows because 2 elements could not be combined.", + "EventCode": "0x2B0", + "EventName": "L1_PIPE_COMP_GATHER_2FLOW", + "BriefDescription": "This event counts the number of times where 2 elements of the gather instructions became 2 flows because 2 elements could not be combined." + }, + { + "PublicDescription": "This event counts the number of times where 2 elements of the gather instructions became 1 flow because 2 elements could be combined.", + "EventCode": "0x2B1", + "EventName": "L1_PIPE_COMP_GATHER_1FLOW", + "BriefDescription": "This event counts the number of times where 2 elements of the gather instructions became 1 flow because 2 elements could be combined." + }, + { + "PublicDescription": "This event counts the number of times where 2 elements of the gather instructions became 0 flow because both predicate values are 0.", + "EventCode": "0x2B2", + "EventName": "L1_PIPE_COMP_GATHER_0FLOW", + "BriefDescription": "This event counts the number of times where 2 elements of the gather instructions became 0 flow because both predicate values are 0." + }, + { + "PublicDescription": "This event counts the number of flows of the scatter instructions.", + "EventCode": "0x2B3", + "EventName": "L1_PIPE_COMP_SCATTER_1FLOW", + "BriefDescription": "This event counts the number of flows of the scatter instructions." + }, + { + "PublicDescription": "This event counts the number of 1's in the predicate bits of request in L1D cache pipeline#0, where it is corrected so that it becomes 16 when all bits are 1.", + "EventCode": "0x2B8", + "EventName": "L1_PIPE0_COMP_PRD_CNT", + "BriefDescription": "This event counts the number of 1's in the predicate bits of request in L1D cache pipeline#0, where it is corrected so that it becomes 16 when all bits are 1." + }, + { + "PublicDescription": "This event counts the number of 1's in the predicate bits of request in L1D cache pipeline#1, where it is corrected so that it becomes 16 when all bits are 1.", + "EventCode": "0x2B9", + "EventName": "L1_PIPE1_COMP_PRD_CNT", + "BriefDescription": "This event counts the number of 1's in the predicate bits of request in L1D cache pipeline#1, where it is corrected so that it becomes 16 when all bits are 1." + }, + { + "PublicDescription": "This event counts valid cycles of L2 cache pipeline.", + "EventCode": "0x330", + "EventName": "L2_PIPE_VAL", + "BriefDescription": "This event counts valid cycles of L2 cache pipeline." + }, + { + "PublicDescription": "This event counts completed requests in L2 cache pipeline.", + "EventCode": "0x350", + "EventName": "L2_PIPE_COMP_ALL", + "BriefDescription": "This event counts completed requests in L2 cache pipeline." + }, + { + "PublicDescription": "This event counts operations where software or hardware prefetch hits an L2 cache refill buffer allocated by demand access.", + "EventCode": "0x370", + "EventName": "L2_PIPE_COMP_PF_L2MIB_MCH", + "BriefDescription": "This event counts operations where software or hardware prefetch hits an L2 cache refill buffer allocated by demand access." + } +] diff --git a/tools/perf/pmu-events/arch/arm64/fujitsu/a64fx/sve.json b/tools/perf/pmu-events/arch/arm64/fujitsu/a64fx/sve.json new file mode 100644 index 000000000000..dc1b95e42c32 --- /dev/null +++ b/tools/perf/pmu-events/arch/arm64/fujitsu/a64fx/sve.json @@ -0,0 +1,110 @@ +[ + { + "ArchStdEvent": "SIMD_INST_RETIRED" + }, + { + "ArchStdEvent": "SVE_INST_RETIRED" + }, + { + "ArchStdEvent": "UOP_SPEC" + }, + { + "ArchStdEvent": "SVE_MATH_SPEC" + }, + { + "ArchStdEvent": "FP_SPEC" + }, + { + "ArchStdEvent": "FP_FMA_SPEC" + }, + { + "ArchStdEvent": "FP_RECPE_SPEC" + }, + { + "ArchStdEvent": "FP_CVT_SPEC" + }, + { + "ArchStdEvent": "ASE_SVE_INT_SPEC" + }, + { + "ArchStdEvent": "SVE_PRED_SPEC" + }, + { + "ArchStdEvent": "SVE_MOVPRFX_SPEC" + }, + { + "ArchStdEvent": "SVE_MOVPRFX_U_SPEC" + }, + { + "ArchStdEvent": "ASE_SVE_LD_SPEC" + }, + { + "ArchStdEvent": "ASE_SVE_ST_SPEC" + }, + { + "ArchStdEvent": "PRF_SPEC" + }, + { + "ArchStdEvent": "BASE_LD_REG_SPEC" + }, + { + "ArchStdEvent": "BASE_ST_REG_SPEC" + }, + { + "ArchStdEvent": "SVE_LDR_REG_SPEC" + }, + { + "ArchStdEvent": "SVE_STR_REG_SPEC" + }, + { + "ArchStdEvent": "SVE_LDR_PREG_SPEC" + }, + { + "ArchStdEvent": "SVE_STR_PREG_SPEC" + }, + { + "ArchStdEvent": "SVE_PRF_CONTIG_SPEC" + }, + { + "ArchStdEvent": "ASE_SVE_LD_MULTI_SPEC" + }, + { + "ArchStdEvent": "ASE_SVE_ST_MULTI_SPEC" + }, + { + "ArchStdEvent": "SVE_LD_GATHER_SPEC" + }, + { + "ArchStdEvent": "SVE_ST_SCATTER_SPEC" + }, + { + "ArchStdEvent": "SVE_PRF_GATHER_SPEC" + }, + { + "ArchStdEvent": "SVE_LDFF_SPEC" + }, + { + "ArchStdEvent": "FP_SCALE_OPS_SPEC" + }, + { + "ArchStdEvent": "FP_FIXED_OPS_SPEC" + }, + { + "ArchStdEvent": "FP_HP_SCALE_OPS_SPEC" + }, + { + "ArchStdEvent": "FP_HP_FIXED_OPS_SPEC" + }, + { + "ArchStdEvent": "FP_SP_SCALE_OPS_SPEC" + }, + { + "ArchStdEvent": "FP_SP_FIXED_OPS_SPEC" + }, + { + "ArchStdEvent": "FP_DP_SCALE_OPS_SPEC" + }, + { + "ArchStdEvent": "FP_DP_FIXED_OPS_SPEC" + } +] diff --git a/tools/perf/pmu-events/arch/arm64/mapfile.csv b/tools/perf/pmu-events/arch/arm64/mapfile.csv index 0d609149b82a..c43591d831b8 100644 --- a/tools/perf/pmu-events/arch/arm64/mapfile.csv +++ b/tools/perf/pmu-events/arch/arm64/mapfile.csv @@ -20,5 +20,6 @@ 0x00000000410fd0c0,v1,arm/cortex-a76-n1,core 0x00000000420f5160,v1,cavium/thunderx2,core 0x00000000430f0af0,v1,cavium/thunderx2,core +0x00000000460f0010,v1,fujitsu/a64fx,core 0x00000000480fd010,v1,hisilicon/hip08,core 0x00000000500f0000,v1,ampere/emag,core -- cgit v1.2.3 From 4a03af3ee399e87934e18ae720dda72c52f0050c Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 15 Mar 2021 11:27:24 -0300 Subject: perf stat: Elaborate use cases for the -n/--null command line option The existing text was way too terse, pick the intended usage from the cset that introduced this option. Twitter: https://twitter.com/_monoid/status/1371461130175004672?s=20 Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-stat.txt | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tools/perf/Documentation/perf-stat.txt b/tools/perf/Documentation/perf-stat.txt index 08a1714494f8..3055aad38d46 100644 --- a/tools/perf/Documentation/perf-stat.txt +++ b/tools/perf/Documentation/perf-stat.txt @@ -142,7 +142,10 @@ Do not aggregate counts across all monitored CPUs. -n:: --null:: - null run - don't start any counters +null run - Don't start any counters. + +This can be useful to measure just elapsed wall-clock time - or to assess the +raw overhead of perf stat itself, without running any counters. -v:: --verbose:: -- cgit v1.2.3 From 6859bc0e78c6a699599cbb21404fdb6c8125da74 Mon Sep 17 00:00:00 2001 From: Changbin Du Date: Mon, 15 Mar 2021 22:30:47 +0800 Subject: perf stat: Improve readability of shadow stats This adds function convert_unit_double() and selects appropriate unit for shadow stats between K/M/G. $ sudo perf stat -a -- sleep 1 Before: Unit 'M' is selected even the number is very small. Performance counter stats for 'system wide': 4,003.06 msec cpu-clock # 3.998 CPUs utilized 16,179 context-switches # 0.004 M/sec 161 cpu-migrations # 0.040 K/sec 4,699 page-faults # 0.001 M/sec 6,135,801,925 cycles # 1.533 GHz (83.21%) 5,783,308,491 stalled-cycles-frontend # 94.26% frontend cycles idle (83.21%) 4,543,694,050 stalled-cycles-backend # 74.05% backend cycles idle (66.49%) 4,720,130,587 instructions # 0.77 insn per cycle # 1.23 stalled cycles per insn (83.28%) 753,848,078 branches # 188.318 M/sec (83.61%) 37,457,747 branch-misses # 4.97% of all branches (83.48%) 1.001283725 seconds time elapsed After: $ sudo perf stat -a -- sleep 2 Performance counter stats for 'system wide': 8,005.52 msec cpu-clock # 3.999 CPUs utilized 10,715 context-switches # 1.338 K/sec 785 cpu-migrations # 98.057 /sec 102 page-faults # 12.741 /sec 1,948,202,279 cycles # 0.243 GHz 2,816,470,932 stalled-cycles-frontend # 144.57% frontend cycles idle 2,661,172,207 stalled-cycles-backend # 136.60% backend cycles idle 464,172,105 instructions # 0.24 insn per cycle # 6.07 stalled cycles per insn 91,567,662 branches # 11.438 M/sec 7,756,054 branch-misses # 8.47% of all branches 2.002040043 seconds time elapsed v2: o do not change 'sec' to 'cpu-sec'. o use convert_unit_double to implement convert_unit. Signed-off-by: Changbin Du Cc: Jiri Olsa Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/20210315143047.3867-1-changbin.du@gmail.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/stat-shadow.c | 16 +++++++--------- tools/perf/util/units.c | 21 ++++++++++++++------- tools/perf/util/units.h | 1 + 3 files changed, 22 insertions(+), 16 deletions(-) diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c index 6ccf21a72f06..3f800e71126f 100644 --- a/tools/perf/util/stat-shadow.c +++ b/tools/perf/util/stat-shadow.c @@ -9,6 +9,7 @@ #include "expr.h" #include "metricgroup.h" #include "cgroup.h" +#include "units.h" #include /* @@ -1270,18 +1271,15 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config, generic_metric(config, evsel->metric_expr, evsel->metric_events, NULL, evsel->name, evsel->metric_name, NULL, 1, cpu, out, st); } else if (runtime_stat_n(st, STAT_NSECS, cpu, &rsd) != 0) { - char unit = 'M'; - char unit_buf[10]; + char unit = ' '; + char unit_buf[10] = "/sec"; total = runtime_stat_avg(st, STAT_NSECS, cpu, &rsd); - if (total) - ratio = 1000.0 * avg / total; - if (ratio < 0.001) { - ratio *= 1000; - unit = 'K'; - } - snprintf(unit_buf, sizeof(unit_buf), "%c/sec", unit); + ratio = convert_unit_double(1000000000.0 * avg / total, &unit); + + if (unit != ' ') + snprintf(unit_buf, sizeof(unit_buf), "%c/sec", unit); print_metric(config, ctxp, NULL, "%8.3f", unit_buf, ratio); } else if (perf_stat_evsel__is(evsel, SMI_NUM)) { print_smi_cost(config, cpu, out, st, &rsd); diff --git a/tools/perf/util/units.c b/tools/perf/util/units.c index a46762aec4c9..32c39cfe209b 100644 --- a/tools/perf/util/units.c +++ b/tools/perf/util/units.c @@ -33,28 +33,35 @@ unsigned long parse_tag_value(const char *str, struct parse_tag *tags) return (unsigned long) -1; } -unsigned long convert_unit(unsigned long value, char *unit) +double convert_unit_double(double value, char *unit) { *unit = ' '; - if (value > 1000) { - value /= 1000; + if (value > 1000.0) { + value /= 1000.0; *unit = 'K'; } - if (value > 1000) { - value /= 1000; + if (value > 1000.0) { + value /= 1000.0; *unit = 'M'; } - if (value > 1000) { - value /= 1000; + if (value > 1000.0) { + value /= 1000.0; *unit = 'G'; } return value; } +unsigned long convert_unit(unsigned long value, char *unit) +{ + double v = convert_unit_double((double)value, unit); + + return (unsigned long)v; +} + int unit_number__scnprintf(char *buf, size_t size, u64 n) { char unit[4] = "BKMG"; diff --git a/tools/perf/util/units.h b/tools/perf/util/units.h index 99263b6a23f7..ea43e74e3240 100644 --- a/tools/perf/util/units.h +++ b/tools/perf/util/units.h @@ -12,6 +12,7 @@ struct parse_tag { unsigned long parse_tag_value(const char *str, struct parse_tag *tags); +double convert_unit_double(double value, char *unit); unsigned long convert_unit(unsigned long value, char *unit); int unit_number__scnprintf(char *buf, size_t size, u64 n); -- cgit v1.2.3 From 87cb88d3c00275d7dfd695b2002eb53411a53cfa Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Tue, 16 Mar 2021 17:55:03 -0700 Subject: perf test: Remove unused argument Remove unused argument from daemon_exit. Signed-off-by: Ian Rogers Cc: Alexander Shishkin Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lore.kernel.org/lkml/20210317005505.2794804-1-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/shell/daemon.sh | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/tools/perf/tests/shell/daemon.sh b/tools/perf/tests/shell/daemon.sh index 5ad3ca8d681b..66ad56b4e0a5 100755 --- a/tools/perf/tests/shell/daemon.sh +++ b/tools/perf/tests/shell/daemon.sh @@ -115,8 +115,7 @@ daemon_start() daemon_exit() { - local base=$1 - local config=$2 + local config=$1 local line=`perf daemon --config ${config} -x: | head -1` local pid=`echo "${line}" | awk 'BEGIN { FS = ":" } ; { print $1 }'` @@ -171,7 +170,7 @@ EOF ${base}/session-time/ack "0" # stop daemon - daemon_exit ${base} ${config} + daemon_exit ${config} rm -rf ${base} rm -f ${config} @@ -288,7 +287,7 @@ EOF done # stop daemon - daemon_exit ${base} ${config} + daemon_exit ${config} rm -rf ${base} rm -f ${config} @@ -333,7 +332,7 @@ EOF fi # stop daemon - daemon_exit ${base} ${config} + daemon_exit ${config} # check that sessions are gone if [ -d "/proc/${pid_size}" ]; then @@ -374,7 +373,7 @@ EOF perf daemon signal --config ${config} # stop daemon - daemon_exit ${base} ${config} + daemon_exit ${config} # count is 2 perf.data for signals and 1 for perf record finished count=`ls ${base}/session-test/ | grep perf.data | wc -l` @@ -420,7 +419,7 @@ EOF fi # stop daemon - daemon_exit ${base} ${config} + daemon_exit ${config} rm -rf ${base} rm -f ${config} @@ -457,7 +456,7 @@ EOF fi # stop daemon - daemon_exit ${base} ${config} + daemon_exit ${config} rm -rf ${base} rm -f ${config} -- cgit v1.2.3 From 078cbb6f75f16a16fb843a431e83c2f92605bb75 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Tue, 16 Mar 2021 17:55:04 -0700 Subject: perf test: Cleanup daemon if test is interrupted. Reorder daemon_start and daemon_exit as the trap handler is added in daemon_start referencing daemon_exit. Signed-off-by: Ian Rogers Cc: Alexander Shishkin Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lore.kernel.org/lkml/20210317005505.2794804-2-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/shell/daemon.sh | 34 ++++++++++++++++++++-------------- 1 file changed, 20 insertions(+), 14 deletions(-) diff --git a/tools/perf/tests/shell/daemon.sh b/tools/perf/tests/shell/daemon.sh index 66ad56b4e0a5..61d13c4c64b8 100755 --- a/tools/perf/tests/shell/daemon.sh +++ b/tools/perf/tests/shell/daemon.sh @@ -98,6 +98,23 @@ check_line_other() fi } +daemon_exit() +{ + local config=$1 + + local line=`perf daemon --config ${config} -x: | head -1` + local pid=`echo "${line}" | awk 'BEGIN { FS = ":" } ; { print $1 }'` + + # Reset trap handler. + trap - SIGINT SIGTERM + + # stop daemon + perf daemon stop --config ${config} + + # ... and wait for the pid to go away + tail --pid=${pid} -f /dev/null +} + daemon_start() { local config=$1 @@ -105,6 +122,9 @@ daemon_start() perf daemon start --config ${config} + # Clean up daemon if interrupted. + trap "echo 'FAILED: Signal caught'; daemon_exit ${config}; exit 1" SIGINT SIGTERM + # wait for the session to ping local state="FAIL" while [ "${state}" != "OK" ]; do @@ -113,20 +133,6 @@ daemon_start() done } -daemon_exit() -{ - local config=$1 - - local line=`perf daemon --config ${config} -x: | head -1` - local pid=`echo "${line}" | awk 'BEGIN { FS = ":" } ; { print $1 }'` - - # stop daemon - perf daemon stop --config ${config} - - # ... and wait for the pid to go away - tail --pid=${pid} -f /dev/null -} - test_list() { echo "test daemon list" -- cgit v1.2.3 From a6cb06ff49fd522aaeecfee7d5952ac6e2ab9d13 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Tue, 16 Mar 2021 17:55:05 -0700 Subject: perf test: Add 30s timeout for wait for daemon start. Retry the ping loop upto 600 times, or approximately 30 seconds, to make sure the test does hang at start up. Signed-off-by: Ian Rogers Cc: Alexander Shishkin Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lore.kernel.org/lkml/20210317005505.2794804-3-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/shell/daemon.sh | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/tools/perf/tests/shell/daemon.sh b/tools/perf/tests/shell/daemon.sh index 61d13c4c64b8..ee4a30ca3f57 100755 --- a/tools/perf/tests/shell/daemon.sh +++ b/tools/perf/tests/shell/daemon.sh @@ -127,9 +127,16 @@ daemon_start() # wait for the session to ping local state="FAIL" + local retries=0 while [ "${state}" != "OK" ]; do state=`perf daemon ping --config ${config} --session ${session} | awk '{ print $1 }'` sleep 0.05 + retries=$((${retries} +1)) + if [ ${retries} -ge 600 ]; then + echo "FAILED: Timeout waiting for daemon to ping" + daemon_exit ${config} + exit 1 + fi done } -- cgit v1.2.3 From 4d39c89f0b94bf4a6e1ccf42702e7d80d210a5fd Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 23 Mar 2021 17:09:15 +0100 Subject: perf tools: Fix various typos in comments Fix ~124 single-word typos and a few spelling errors in the perf tooling code, accumulated over the years. Signed-off-by: Ingo Molnar Cc: Peter Zijlstra Link: https://lore.kernel.org/r/20210321113734.GA248990@gmail.com Link: http://lore.kernel.org/lkml/20210323160915.GA61903@gmail.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-buildid-cache.txt | 2 +- tools/perf/Documentation/perf-report.txt | 2 +- tools/perf/Documentation/perf-top.txt | 2 +- tools/perf/arch/arm/util/cs-etm.c | 2 +- tools/perf/arch/arm64/util/machine.c | 6 +++--- tools/perf/arch/arm64/util/perf_regs.c | 2 +- tools/perf/arch/powerpc/util/kvm-stat.c | 2 +- tools/perf/arch/powerpc/util/utils_header.h | 2 +- tools/perf/arch/x86/tests/bp-modify.c | 2 +- tools/perf/arch/x86/util/perf_regs.c | 4 ++-- tools/perf/bench/epoll-wait.c | 4 ++-- tools/perf/bench/numa.c | 2 +- tools/perf/builtin-annotate.c | 2 +- tools/perf/builtin-diff.c | 2 +- tools/perf/builtin-lock.c | 2 +- tools/perf/builtin-sched.c | 2 +- tools/perf/builtin-script.c | 4 ++-- tools/perf/builtin-stat.c | 4 ++-- tools/perf/builtin-top.c | 2 +- tools/perf/examples/bpf/augmented_raw_syscalls.c | 4 ++-- tools/perf/jvmti/jvmti_agent.c | 4 ++-- tools/perf/pmu-events/arch/powerpc/power8/metrics.json | 12 ++++++------ tools/perf/pmu-events/arch/powerpc/power9/metrics.json | 2 +- tools/perf/pmu-events/jevents.c | 2 +- tools/perf/scripts/python/netdev-times.py | 2 +- tools/perf/tests/bp_signal.c | 6 +++--- tools/perf/tests/code-reading.c | 2 +- tools/perf/tests/hists_cumulate.c | 4 ++-- tools/perf/tests/parse-events.c | 2 +- tools/perf/tests/parse-metric.c | 2 +- tools/perf/tests/topology.c | 2 +- tools/perf/trace/beauty/include/linux/socket.h | 2 +- tools/perf/ui/browsers/annotate.c | 2 +- tools/perf/ui/browsers/hists.c | 2 +- tools/perf/util/bpf-loader.c | 2 +- tools/perf/util/call-path.h | 2 +- tools/perf/util/callchain.c | 2 +- tools/perf/util/config.c | 2 +- tools/perf/util/cs-etm-decoder/cs-etm-decoder.c | 2 +- tools/perf/util/cs-etm.c | 8 ++++---- tools/perf/util/cs-etm.h | 5 +++-- tools/perf/util/data-convert-bt.c | 2 +- tools/perf/util/demangle-java.c | 4 ++-- tools/perf/util/dso.h | 2 +- tools/perf/util/dwarf-aux.c | 6 +++--- tools/perf/util/dwarf-aux.h | 2 +- tools/perf/util/events_stats.h | 2 +- tools/perf/util/evlist.c | 2 +- tools/perf/util/evsel.c | 4 ++-- tools/perf/util/expr.h | 2 +- tools/perf/util/header.c | 18 +++++++++--------- tools/perf/util/intel-pt.c | 2 +- tools/perf/util/levenshtein.c | 2 +- tools/perf/util/libunwind/arm64.c | 2 +- tools/perf/util/libunwind/x86_32.c | 2 +- tools/perf/util/llvm-utils.c | 2 +- tools/perf/util/machine.c | 8 ++++---- tools/perf/util/map.h | 4 ++-- tools/perf/util/mem-events.h | 2 +- tools/perf/util/metricgroup.c | 2 +- tools/perf/util/parse-events.c | 10 +++++----- tools/perf/util/pmu.c | 4 ++-- tools/perf/util/probe-event.c | 4 ++-- tools/perf/util/probe-finder.c | 6 +++--- tools/perf/util/s390-cpumsf.c | 10 +++++----- tools/perf/util/scripting-engines/trace-event-python.c | 2 +- tools/perf/util/session.c | 4 ++-- tools/perf/util/strbuf.h | 2 +- tools/perf/util/strfilter.h | 4 ++-- tools/perf/util/symbol-elf.c | 2 +- tools/perf/util/synthetic-events.c | 4 ++-- tools/perf/util/unwind-libunwind-local.c | 2 +- 72 files changed, 124 insertions(+), 123 deletions(-) diff --git a/tools/perf/Documentation/perf-buildid-cache.txt b/tools/perf/Documentation/perf-buildid-cache.txt index bb167e32a1d7..cd8ce6e8ec12 100644 --- a/tools/perf/Documentation/perf-buildid-cache.txt +++ b/tools/perf/Documentation/perf-buildid-cache.txt @@ -57,7 +57,7 @@ OPTIONS -u:: --update=:: Update specified file of the cache. Note that this doesn't remove - older entires since those may be still needed for annotating old + older entries since those may be still needed for annotating old (or remote) perf.data. Only if there is already a cache which has exactly same build-id, that is replaced by new one. It can be used to update kallsyms and kernel dso to vmlinux in order to support diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt index 87112e8d904e..822b16f05c15 100644 --- a/tools/perf/Documentation/perf-report.txt +++ b/tools/perf/Documentation/perf-report.txt @@ -475,7 +475,7 @@ OPTIONS but probably we'll make the default not to show the switch-on/off events on the --group mode and if there is only one event besides the off/on ones, go straight to the histogram browser, just like 'perf report' with no events - explicitely specified does. + explicitly specified does. --itrace:: Options for decoding instruction tracing data. The options are: diff --git a/tools/perf/Documentation/perf-top.txt b/tools/perf/Documentation/perf-top.txt index ee2024691d46..bba5ffb05463 100644 --- a/tools/perf/Documentation/perf-top.txt +++ b/tools/perf/Documentation/perf-top.txt @@ -317,7 +317,7 @@ Default is to monitor all CPUS. but probably we'll make the default not to show the switch-on/off events on the --group mode and if there is only one event besides the off/on ones, go straight to the histogram browser, just like 'perf top' with no events - explicitely specified does. + explicitly specified does. --stitch-lbr:: Show callgraph with stitched LBRs, which may have more complete diff --git a/tools/perf/arch/arm/util/cs-etm.c b/tools/perf/arch/arm/util/cs-etm.c index 0a44bc462cec..d942f118d32c 100644 --- a/tools/perf/arch/arm/util/cs-etm.c +++ b/tools/perf/arch/arm/util/cs-etm.c @@ -378,7 +378,7 @@ static int cs_etm_recording_options(struct auxtrace_record *itr, opts->auxtrace_mmap_pages = roundup_pow_of_two(sz); } - /* Snapshost size can't be bigger than the auxtrace area */ + /* Snapshot size can't be bigger than the auxtrace area */ if (opts->auxtrace_snapshot_size > opts->auxtrace_mmap_pages * (size_t)page_size) { pr_err("Snapshot size %zu must not be greater than AUX area tracing mmap size %zu\n", diff --git a/tools/perf/arch/arm64/util/machine.c b/tools/perf/arch/arm64/util/machine.c index 40c5e0b5bda8..7e7714290a87 100644 --- a/tools/perf/arch/arm64/util/machine.c +++ b/tools/perf/arch/arm64/util/machine.c @@ -6,11 +6,11 @@ #include "debug.h" #include "symbol.h" -/* On arm64, kernel text segment start at high memory address, +/* On arm64, kernel text segment starts at high memory address, * for example 0xffff 0000 8xxx xxxx. Modules start at a low memory - * address, like 0xffff 0000 00ax xxxx. When only samll amount of + * address, like 0xffff 0000 00ax xxxx. When only small amount of * memory is used by modules, gap between end of module's text segment - * and start of kernel text segment may be reach 2G. + * and start of kernel text segment may reach 2G. * Therefore do not fill this gap and do not assign it to the kernel dso map. */ diff --git a/tools/perf/arch/arm64/util/perf_regs.c b/tools/perf/arch/arm64/util/perf_regs.c index 2518cde18b34..476b037eea1c 100644 --- a/tools/perf/arch/arm64/util/perf_regs.c +++ b/tools/perf/arch/arm64/util/perf_regs.c @@ -108,7 +108,7 @@ int arch_sdt_arg_parse_op(char *old_op, char **new_op) /* [sp], [sp, NUM] or [sp,NUM] */ new_len = 7; /* + ( % s p ) NULL */ - /* If the arugment is [sp], need to fill offset '0' */ + /* If the argument is [sp], need to fill offset '0' */ if (rm[2].rm_so == -1) new_len += 1; else diff --git a/tools/perf/arch/powerpc/util/kvm-stat.c b/tools/perf/arch/powerpc/util/kvm-stat.c index eed9e5a42935..16510686c138 100644 --- a/tools/perf/arch/powerpc/util/kvm-stat.c +++ b/tools/perf/arch/powerpc/util/kvm-stat.c @@ -176,7 +176,7 @@ int cpu_isa_init(struct perf_kvm_stat *kvm, const char *cpuid __maybe_unused) } /* - * Incase of powerpc architecture, pmu registers are programmable + * In case of powerpc architecture, pmu registers are programmable * by guest kernel. So monitoring guest via host may not provide * valid samples with default 'cycles' event. It is better to use * 'trace_imc/trace_cycles' event for guest profiling, since it diff --git a/tools/perf/arch/powerpc/util/utils_header.h b/tools/perf/arch/powerpc/util/utils_header.h index 5788eb1f1fe3..2baeb1c1ae85 100644 --- a/tools/perf/arch/powerpc/util/utils_header.h +++ b/tools/perf/arch/powerpc/util/utils_header.h @@ -10,6 +10,6 @@ #define SPRN_PVR 0x11F /* Processor Version Register */ #define PVR_VER(pvr) (((pvr) >> 16) & 0xFFFF) /* Version field */ -#define PVR_REV(pvr) (((pvr) >> 0) & 0xFFFF) /* Revison field */ +#define PVR_REV(pvr) (((pvr) >> 0) & 0xFFFF) /* Revision field */ #endif /* __PERF_UTIL_HEADER_H */ diff --git a/tools/perf/arch/x86/tests/bp-modify.c b/tools/perf/arch/x86/tests/bp-modify.c index adcacf1b6609..dffcf9b52153 100644 --- a/tools/perf/arch/x86/tests/bp-modify.c +++ b/tools/perf/arch/x86/tests/bp-modify.c @@ -73,7 +73,7 @@ static int bp_modify1(void) /* * The parent does following steps: * - creates a new breakpoint (id 0) for bp_2 function - * - changes that breakponit to bp_1 function + * - changes that breakpoint to bp_1 function * - waits for the breakpoint to hit and checks * it has proper rip of bp_1 function * - detaches the child diff --git a/tools/perf/arch/x86/util/perf_regs.c b/tools/perf/arch/x86/util/perf_regs.c index fca81b39b09f..207c56805c55 100644 --- a/tools/perf/arch/x86/util/perf_regs.c +++ b/tools/perf/arch/x86/util/perf_regs.c @@ -165,7 +165,7 @@ static int sdt_init_op_regex(void) /* * Max x86 register name length is 5(ex: %r15d). So, 6th char * should always contain NULL. This helps to find register name - * length using strlen, insted of maintaing one more variable. + * length using strlen, instead of maintaining one more variable. */ #define SDT_REG_NAME_SIZE 6 @@ -207,7 +207,7 @@ int arch_sdt_arg_parse_op(char *old_op, char **new_op) * and displacement 0 (Both sign and displacement 0 are * optional so it may be empty). Use one more character * to hold last NULL so that strlen can be used to find - * prefix length, instead of maintaing one more variable. + * prefix length, instead of maintaining one more variable. */ char prefix[3] = {0}; diff --git a/tools/perf/bench/epoll-wait.c b/tools/perf/bench/epoll-wait.c index 0a0ff1247c83..79d13dbc0a47 100644 --- a/tools/perf/bench/epoll-wait.c +++ b/tools/perf/bench/epoll-wait.c @@ -17,7 +17,7 @@ * While the second model, enabled via --multiq option, uses multiple * queueing (which refers to one epoll instance per worker). For example, * short lived tcp connections in a high throughput httpd server will - * ditribute the accept()'ing connections across CPUs. In this case each + * distribute the accept()'ing connections across CPUs. In this case each * worker does a limited amount of processing. * * [queue A] ---> [worker] @@ -198,7 +198,7 @@ static void *workerfn(void *arg) do { /* - * Block undefinitely waiting for the IN event. + * Block indefinitely waiting for the IN event. * In order to stress the epoll_wait(2) syscall, * call it event per event, instead of a larger * batch (max)limit. diff --git a/tools/perf/bench/numa.c b/tools/perf/bench/numa.c index 20b87e29c96f..f2640179ada9 100644 --- a/tools/perf/bench/numa.c +++ b/tools/perf/bench/numa.c @@ -42,7 +42,7 @@ #endif /* - * Regular printout to the terminal, supressed if -q is specified: + * Regular printout to the terminal, suppressed if -q is specified: */ #define tprintf(x...) do { if (g && g->p.show_details >= 0) printf(x); } while (0) diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c index a23ba6bb99b6..0f3a196e5d6e 100644 --- a/tools/perf/builtin-annotate.c +++ b/tools/perf/builtin-annotate.c @@ -239,7 +239,7 @@ static int evsel__add_sample(struct evsel *evsel, struct perf_sample *sample, } /* - * XXX filtered samples can still have branch entires pointing into our + * XXX filtered samples can still have branch entries pointing into our * symbol and are missed. */ process_branch_stack(sample->branch_stack, al, sample); diff --git a/tools/perf/builtin-diff.c b/tools/perf/builtin-diff.c index 878e04b1fab7..f52b3a799e76 100644 --- a/tools/perf/builtin-diff.c +++ b/tools/perf/builtin-diff.c @@ -1796,7 +1796,7 @@ static int ui_init(void) data__for_each_file(i, d) { /* - * Baseline or compute realted columns: + * Baseline or compute related columns: * * PERF_HPP_DIFF__BASELINE * PERF_HPP_DIFF__DELTA diff --git a/tools/perf/builtin-lock.c b/tools/perf/builtin-lock.c index a2f1e53f37a7..01326e370009 100644 --- a/tools/perf/builtin-lock.c +++ b/tools/perf/builtin-lock.c @@ -49,7 +49,7 @@ struct lock_stat { /* * FIXME: evsel__intval() returns u64, - * so address of lockdep_map should be dealed as 64bit. + * so address of lockdep_map should be treated as 64bit. * Is there more better solution? */ void *addr; /* address of lockdep_map, used as ID */ diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c index 69c769b04a61..954ce2f594e9 100644 --- a/tools/perf/builtin-sched.c +++ b/tools/perf/builtin-sched.c @@ -1712,7 +1712,7 @@ static int perf_sched__process_fork_event(struct perf_tool *tool, { struct perf_sched *sched = container_of(tool, struct perf_sched, tool); - /* run the fork event through the perf machineruy */ + /* run the fork event through the perf machinery */ perf_event__process_fork(tool, event, sample, machine); /* and then run additional processing needed for this command */ diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index 119a5f7e2772..1280cbfad4db 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -2486,7 +2486,7 @@ static int perf_script__fopen_per_event_dump(struct perf_script *script) /* * Already setup? I.e. we may be called twice in cases like * Intel PT, one for the intel_pt// and dummy events, then - * for the evsels syntheized from the auxtrace info. + * for the evsels synthesized from the auxtrace info. * * Ses perf_script__process_auxtrace_info. */ @@ -3083,7 +3083,7 @@ static int list_available_scripts(const struct option *opt __maybe_unused, * * Fixme: All existing "xxx-record" are all in good formats "-e event ", * which is covered well now. And new parsing code should be added to - * cover the future complexing formats like event groups etc. + * cover the future complex formats like event groups etc. */ static int check_ev_match(char *dir_name, char *scriptname, struct perf_session *session) diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 2e2e4a8345ea..d140ffbb5b34 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -1705,7 +1705,7 @@ static int add_default_attributes(void) bzero(&errinfo, sizeof(errinfo)); if (transaction_run) { /* Handle -T as -M transaction. Once platform specific metrics - * support has been added to the json files, all archictures + * support has been added to the json files, all architectures * will use this approach. To determine transaction support * on an architecture test for such a metric name. */ @@ -2459,7 +2459,7 @@ int cmd_stat(int argc, const char **argv) /* * We synthesize the kernel mmap record just so that older tools * don't emit warnings about not being able to resolve symbols - * due to /proc/sys/kernel/kptr_restrict settings and instear provide + * due to /proc/sys/kernel/kptr_restrict settings and instead provide * a saner message about no samples being in the perf.data file. * * This also serves to suppress a warning about f_header.data.size == 0 diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index 3673c04d16b6..173ace43f845 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -1607,7 +1607,7 @@ int cmd_top(int argc, const char **argv) if (status) { /* * Some arches do not provide a get_cpuid(), so just use pr_debug, otherwise - * warn the user explicitely. + * warn the user explicitly. */ eprintf(status == ENOSYS ? 1 : 0, verbose, "Couldn't read the cpuid for this machine: %s\n", diff --git a/tools/perf/examples/bpf/augmented_raw_syscalls.c b/tools/perf/examples/bpf/augmented_raw_syscalls.c index b80437971d80..a262dcd020f4 100644 --- a/tools/perf/examples/bpf/augmented_raw_syscalls.c +++ b/tools/perf/examples/bpf/augmented_raw_syscalls.c @@ -262,7 +262,7 @@ int sys_enter(struct syscall_enter_args *args) /* * Jump to syscall specific augmenter, even if the default one, * "!raw_syscalls:unaugmented" that will just return 1 to return the - * unagmented tracepoint payload. + * unaugmented tracepoint payload. */ bpf_tail_call(args, &syscalls_sys_enter, augmented_args->args.syscall_nr); @@ -282,7 +282,7 @@ int sys_exit(struct syscall_exit_args *args) /* * Jump to syscall specific return augmenter, even if the default one, * "!raw_syscalls:unaugmented" that will just return 1 to return the - * unagmented tracepoint payload. + * unaugmented tracepoint payload. */ bpf_tail_call(args, &syscalls_sys_exit, exit_args.syscall_nr); /* diff --git a/tools/perf/jvmti/jvmti_agent.c b/tools/perf/jvmti/jvmti_agent.c index 88108598d6e9..526dcaf9f079 100644 --- a/tools/perf/jvmti/jvmti_agent.c +++ b/tools/perf/jvmti/jvmti_agent.c @@ -390,7 +390,7 @@ jvmti_write_code(void *agent, char const *sym, rec.p.total_size += size; /* - * If JVM is multi-threaded, nultiple concurrent calls to agent + * If JVM is multi-threaded, multiple concurrent calls to agent * may be possible, so protect file writes */ flockfile(fp); @@ -457,7 +457,7 @@ jvmti_write_debug_info(void *agent, uint64_t code, rec.p.total_size = size; /* - * If JVM is multi-threaded, nultiple concurrent calls to agent + * If JVM is multi-threaded, multiple concurrent calls to agent * may be possible, so protect file writes */ flockfile(fp); diff --git a/tools/perf/pmu-events/arch/powerpc/power8/metrics.json b/tools/perf/pmu-events/arch/powerpc/power8/metrics.json index fc4aa6c2ddc9..4e25525b7da6 100644 --- a/tools/perf/pmu-events/arch/powerpc/power8/metrics.json +++ b/tools/perf/pmu-events/arch/powerpc/power8/metrics.json @@ -885,37 +885,37 @@ "MetricName": "flush_rate_percent" }, { - "BriefDescription": "GCT slot utilization (11 to 14) as a % of cycles this thread had atleast 1 slot valid", + "BriefDescription": "GCT slot utilization (11 to 14) as a % of cycles this thread had at least 1 slot valid", "MetricExpr": "PM_GCT_UTIL_11_14_ENTRIES / ( PM_RUN_CYC - PM_GCT_NOSLOT_CYC) * 100", "MetricGroup": "general", "MetricName": "gct_util_11to14_slots_percent" }, { - "BriefDescription": "GCT slot utilization (15 to 17) as a % of cycles this thread had atleast 1 slot valid", + "BriefDescription": "GCT slot utilization (15 to 17) as a % of cycles this thread had at least 1 slot valid", "MetricExpr": "PM_GCT_UTIL_15_17_ENTRIES / ( PM_RUN_CYC - PM_GCT_NOSLOT_CYC) * 100", "MetricGroup": "general", "MetricName": "gct_util_15to17_slots_percent" }, { - "BriefDescription": "GCT slot utilization 18+ as a % of cycles this thread had atleast 1 slot valid", + "BriefDescription": "GCT slot utilization 18+ as a % of cycles this thread had at least 1 slot valid", "MetricExpr": "PM_GCT_UTIL_18_ENTRIES / ( PM_RUN_CYC - PM_GCT_NOSLOT_CYC) * 100", "MetricGroup": "general", "MetricName": "gct_util_18plus_slots_percent" }, { - "BriefDescription": "GCT slot utilization (1 to 2) as a % of cycles this thread had atleast 1 slot valid", + "BriefDescription": "GCT slot utilization (1 to 2) as a % of cycles this thread had at least 1 slot valid", "MetricExpr": "PM_GCT_UTIL_1_2_ENTRIES / ( PM_RUN_CYC - PM_GCT_NOSLOT_CYC) * 100", "MetricGroup": "general", "MetricName": "gct_util_1to2_slots_percent" }, { - "BriefDescription": "GCT slot utilization (3 to 6) as a % of cycles this thread had atleast 1 slot valid", + "BriefDescription": "GCT slot utilization (3 to 6) as a % of cycles this thread had at least 1 slot valid", "MetricExpr": "PM_GCT_UTIL_3_6_ENTRIES / ( PM_RUN_CYC - PM_GCT_NOSLOT_CYC) * 100", "MetricGroup": "general", "MetricName": "gct_util_3to6_slots_percent" }, { - "BriefDescription": "GCT slot utilization (7 to 10) as a % of cycles this thread had atleast 1 slot valid", + "BriefDescription": "GCT slot utilization (7 to 10) as a % of cycles this thread had at least 1 slot valid", "MetricExpr": "PM_GCT_UTIL_7_10_ENTRIES / ( PM_RUN_CYC - PM_GCT_NOSLOT_CYC) * 100", "MetricGroup": "general", "MetricName": "gct_util_7to10_slots_percent" diff --git a/tools/perf/pmu-events/arch/powerpc/power9/metrics.json b/tools/perf/pmu-events/arch/powerpc/power9/metrics.json index 140402d2855f..db86ba36224d 100644 --- a/tools/perf/pmu-events/arch/powerpc/power9/metrics.json +++ b/tools/perf/pmu-events/arch/powerpc/power9/metrics.json @@ -1691,7 +1691,7 @@ "MetricName": "custom_secs" }, { - "BriefDescription": "Percentage Cycles atleast one instruction dispatched", + "BriefDescription": "Percentage Cycles at least one instruction dispatched", "MetricExpr": "PM_1PLUS_PPC_DISP / PM_CYC * 100", "MetricName": "cycles_atleast_one_inst_dispatched_percent" }, diff --git a/tools/perf/pmu-events/jevents.c b/tools/perf/pmu-events/jevents.c index e1f3f5c8c550..33aa3c885eaf 100644 --- a/tools/perf/pmu-events/jevents.c +++ b/tools/perf/pmu-events/jevents.c @@ -1149,7 +1149,7 @@ static int process_one_file(const char *fpath, const struct stat *sb, * and directory tree could result in build failure due to table * names not being found. * - * Atleast for now, be strict with processing JSON file names. + * At least for now, be strict with processing JSON file names. * i.e. if JSON file name cannot be mapped to C-style table name, * fail. */ diff --git a/tools/perf/scripts/python/netdev-times.py b/tools/perf/scripts/python/netdev-times.py index ea0c8b90a783..a0cfc7fe5908 100644 --- a/tools/perf/scripts/python/netdev-times.py +++ b/tools/perf/scripts/python/netdev-times.py @@ -356,7 +356,7 @@ def handle_irq_softirq_exit(event_info): return rec_data = {'sirq_ent_t':sirq_ent_t, 'sirq_ext_t':time, 'irq_list':irq_list, 'event_list':event_list} - # merge information realted to a NET_RX softirq + # merge information related to a NET_RX softirq receive_hunk_list.append(rec_data) def handle_napi_poll(event_info): diff --git a/tools/perf/tests/bp_signal.c b/tools/perf/tests/bp_signal.c index cc9fbcedb364..ef37353636d8 100644 --- a/tools/perf/tests/bp_signal.c +++ b/tools/perf/tests/bp_signal.c @@ -225,11 +225,11 @@ int test__bp_signal(struct test *test __maybe_unused, int subtest __maybe_unused * * The test case check following error conditions: * - we get stuck in signal handler because of debug - * exception being triggered receursively due to + * exception being triggered recursively due to * the wrong RF EFLAG management * * - we never trigger the sig_handler breakpoint due - * to the rong RF EFLAG management + * to the wrong RF EFLAG management * */ @@ -242,7 +242,7 @@ int test__bp_signal(struct test *test __maybe_unused, int subtest __maybe_unused ioctl(fd3, PERF_EVENT_IOC_ENABLE, 0); /* - * Kick off the test by trigering 'fd1' + * Kick off the test by triggering 'fd1' * breakpoint. */ test_function(); diff --git a/tools/perf/tests/code-reading.c b/tools/perf/tests/code-reading.c index 2fdc7b2f996e..9866cddebf23 100644 --- a/tools/perf/tests/code-reading.c +++ b/tools/perf/tests/code-reading.c @@ -658,7 +658,7 @@ static int do_test_code_reading(bool try_kcore) /* * Both cpus and threads are now owned by evlist * and will be freed by following perf_evlist__set_maps - * call. Getting refference to keep them alive. + * call. Getting reference to keep them alive. */ perf_cpu_map__get(cpus); perf_thread_map__get(threads); diff --git a/tools/perf/tests/hists_cumulate.c b/tools/perf/tests/hists_cumulate.c index 3f2e1a581247..890cb1f5bf53 100644 --- a/tools/perf/tests/hists_cumulate.c +++ b/tools/perf/tests/hists_cumulate.c @@ -47,7 +47,7 @@ static struct sample fake_samples[] = { }; /* - * Will be casted to struct ip_callchain which has all 64 bit entries + * Will be cast to struct ip_callchain which has all 64 bit entries * of nr and ips[]. */ static u64 fake_callchains[][10] = { @@ -297,7 +297,7 @@ out: return err; } -/* callcain + NO children */ +/* callchain + NO children */ static int test2(struct evsel *evsel, struct machine *machine) { int err; diff --git a/tools/perf/tests/parse-events.c b/tools/perf/tests/parse-events.c index a7f6661e6112..026c54743311 100644 --- a/tools/perf/tests/parse-events.c +++ b/tools/perf/tests/parse-events.c @@ -20,7 +20,7 @@ #if defined(__s390x__) /* Return true if kvm module is available and loaded. Test this - * and retun success when trace point kvm_s390_create_vm + * and return success when trace point kvm_s390_create_vm * exists. Otherwise this test always fails. */ static bool kvm_s390_create_vm_valid(void) diff --git a/tools/perf/tests/parse-metric.c b/tools/perf/tests/parse-metric.c index 55bf52e588be..4968c4106254 100644 --- a/tools/perf/tests/parse-metric.c +++ b/tools/perf/tests/parse-metric.c @@ -186,7 +186,7 @@ static int __compute_metric(const char *name, struct value *vals, *ratio2 = compute_single(&metric_events, evlist, &st, name2); out: - /* ... clenup. */ + /* ... cleanup. */ metricgroup__rblist_exit(&metric_events); runtime_stat__exit(&st); evlist__free_stats(evlist); diff --git a/tools/perf/tests/topology.c b/tools/perf/tests/topology.c index 74748ed75b2c..050489807a47 100644 --- a/tools/perf/tests/topology.c +++ b/tools/perf/tests/topology.c @@ -80,7 +80,7 @@ static int check_cpu_topology(char *path, struct perf_cpu_map *map) * CPU 1 is on core_id 1 and physical_package_id 3 * * Core_id and physical_package_id are platform and architecture - * dependend and might have higher numbers than the CPU id. + * dependent and might have higher numbers than the CPU id. * This actually depends on the configuration. * * In this case process_cpu_topology() prints error message: diff --git a/tools/perf/trace/beauty/include/linux/socket.h b/tools/perf/trace/beauty/include/linux/socket.h index 385894b4a8bb..b8fc5c53ba6f 100644 --- a/tools/perf/trace/beauty/include/linux/socket.h +++ b/tools/perf/trace/beauty/include/linux/socket.h @@ -85,7 +85,7 @@ struct mmsghdr { /* * POSIX 1003.1g - ancillary data object information - * Ancillary data consits of a sequence of pairs of + * Ancillary data consists of a sequence of pairs of * (cmsghdr, cmsg_data[]) */ diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c index 9f75d767b6da..ad0a70f0edaf 100644 --- a/tools/perf/ui/browsers/annotate.c +++ b/tools/perf/ui/browsers/annotate.c @@ -415,7 +415,7 @@ static int sym_title(struct symbol *sym, struct map *map, char *title, } /* - * This can be called from external jumps, i.e. jumps from one functon + * This can be called from external jumps, i.e. jumps from one function * to another, like from the kernel's entry_SYSCALL_64 function to the * swapgs_restore_regs_and_return_to_usermode() function. * diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c index 3b9818ee9546..bcfd0a45953b 100644 --- a/tools/perf/ui/browsers/hists.c +++ b/tools/perf/ui/browsers/hists.c @@ -117,7 +117,7 @@ static void hist_browser__update_rows(struct hist_browser *hb) browser->rows -= browser->extra_title_lines; /* * Verify if we were at the last line and that line isn't - * visibe because we now show the header line(s). + * visible because we now show the header line(s). */ index_row = browser->index - browser->top_idx; if (index_row >= browser->rows) diff --git a/tools/perf/util/bpf-loader.c b/tools/perf/util/bpf-loader.c index 9087f1bffd3d..fbb3c4057c30 100644 --- a/tools/perf/util/bpf-loader.c +++ b/tools/perf/util/bpf-loader.c @@ -671,7 +671,7 @@ int bpf__probe(struct bpf_object *obj) * After probing, let's consider prologue, which * adds program fetcher to BPF programs. * - * hook_load_preprocessorr() hooks pre-processor + * hook_load_preprocessor() hooks pre-processor * to bpf_program, let it generate prologue * dynamically during loading. */ diff --git a/tools/perf/util/call-path.h b/tools/perf/util/call-path.h index 6b3229106f16..5875cfc8106e 100644 --- a/tools/perf/util/call-path.h +++ b/tools/perf/util/call-path.h @@ -23,7 +23,7 @@ * @children: tree of call paths of functions called * * In combination with the call_return structure, the call_path structure - * defines a context-sensitve call-graph. + * defines a context-sensitive call-graph. */ struct call_path { struct call_path *parent; diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c index 1b60985690bb..8e2777133bd9 100644 --- a/tools/perf/util/callchain.c +++ b/tools/perf/util/callchain.c @@ -877,7 +877,7 @@ append_chain_children(struct callchain_node *root, if (!node) return -1; - /* lookup in childrens */ + /* lookup in children */ while (*p) { enum match_result ret; diff --git a/tools/perf/util/config.c b/tools/perf/util/config.c index 6984c77068a3..2daeaa9a4a24 100644 --- a/tools/perf/util/config.c +++ b/tools/perf/util/config.c @@ -699,7 +699,7 @@ static int collect_config(const char *var, const char *value, /* perf_config_set can contain both user and system config items. * So we should know where each value is from. * The classification would be needed when a particular config file - * is overwrited by setting feature i.e. set_config(). + * is overwritten by setting feature i.e. set_config(). */ if (strcmp(config_file_name, perf_etc_perfconfig()) == 0) { section->from_system_config = true; diff --git a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c index 4052c9ce6e2f..059bcec3f651 100644 --- a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c +++ b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c @@ -317,7 +317,7 @@ cs_etm_decoder__do_hard_timestamp(struct cs_etm_queue *etmq, * This is the first timestamp we've seen since the beginning of traces * or a discontinuity. Since timestamps packets are generated *after* * range packets have been generated, we need to estimate the time at - * which instructions started by substracting the number of instructions + * which instructions started by subtracting the number of instructions * executed to the timestamp. */ packet_queue->timestamp = elem->timestamp - packet_queue->instr_count; diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c index 9ac80fc23c58..7e63e7dedc33 100644 --- a/tools/perf/util/cs-etm.c +++ b/tools/perf/util/cs-etm.c @@ -202,7 +202,7 @@ void cs_etm__etmq_set_traceid_queue_timestamp(struct cs_etm_queue *etmq, u8 trace_chan_id) { /* - * Wnen a timestamp packet is encountered the backend code + * When a timestamp packet is encountered the backend code * is stopped so that the front end has time to process packets * that were accumulated in the traceID queue. Since there can * be more than one channel per cs_etm_queue, we need to specify @@ -1697,7 +1697,7 @@ static bool cs_etm__is_svc_instr(struct cs_etm_queue *etmq, u8 trace_chan_id, * | 1 1 0 1 1 1 1 1 | imm8 | * +-----------------+--------+ * - * According to the specifiction, it only defines SVC for T32 + * According to the specification, it only defines SVC for T32 * with 16 bits instruction and has no definition for 32bits; * so below only read 2 bytes as instruction size for T32. */ @@ -1929,7 +1929,7 @@ static int cs_etm__set_sample_flags(struct cs_etm_queue *etmq, /* * If the previous packet is an exception return packet - * and the return address just follows SVC instuction, + * and the return address just follows SVC instruction, * it needs to calibrate the previous packet sample flags * as PERF_IP_FLAG_SYSCALLRET. */ @@ -2003,7 +2003,7 @@ static int cs_etm__set_sample_flags(struct cs_etm_queue *etmq, * contain exception type related info so we cannot decide * the exception type purely based on exception return packet. * If we record the exception number from exception packet and - * reuse it for excpetion return packet, this is not reliable + * reuse it for exception return packet, this is not reliable * due the trace can be discontinuity or the interrupt can * be nested, thus the recorded exception number cannot be * used for exception return packet for these two cases. diff --git a/tools/perf/util/cs-etm.h b/tools/perf/util/cs-etm.h index 85ed11e9d2a7..36428918411e 100644 --- a/tools/perf/util/cs-etm.h +++ b/tools/perf/util/cs-etm.h @@ -12,7 +12,8 @@ struct perf_session; -/* Versionning header in case things need tro change in the future. That way +/* + * Versioning header in case things need to change in the future. That way * decoding of old snapshot is still possible. */ enum { @@ -77,7 +78,7 @@ enum { /* * ETMv3 exception encoding number: - * See Embedded Trace Macrocell spcification (ARM IHI 0014Q) + * See Embedded Trace Macrocell specification (ARM IHI 0014Q) * table 7-12 Encoding of Exception[3:0] for non-ARMv7-M processors. */ enum { diff --git a/tools/perf/util/data-convert-bt.c b/tools/perf/util/data-convert-bt.c index 8b67bd97d122..a9c375e63e73 100644 --- a/tools/perf/util/data-convert-bt.c +++ b/tools/perf/util/data-convert-bt.c @@ -949,7 +949,7 @@ static char *change_name(char *name, char *orig_name, int dup) /* * Add '_' prefix to potential keywork. According to * Mathieu Desnoyers (https://lore.kernel.org/lkml/1074266107.40857.1422045946295.JavaMail.zimbra@efficios.com), - * futher CTF spec updating may require us to use '$'. + * further CTF spec updating may require us to use '$'. */ if (dup < 0) len = strlen(name) + sizeof("_"); diff --git a/tools/perf/util/demangle-java.c b/tools/perf/util/demangle-java.c index 39c05200ed65..ddf33d58bcd3 100644 --- a/tools/perf/util/demangle-java.c +++ b/tools/perf/util/demangle-java.c @@ -147,7 +147,7 @@ error: * Demangle Java function signature (openJDK, not GCJ) * input: * str: string to parse. String is not modified - * flags: comobination of JAVA_DEMANGLE_* flags to modify demangling + * flags: combination of JAVA_DEMANGLE_* flags to modify demangling * return: * if input can be demangled, then a newly allocated string is returned. * if input cannot be demangled, then NULL is returned @@ -164,7 +164,7 @@ java_demangle_sym(const char *str, int flags) if (!str) return NULL; - /* find start of retunr type */ + /* find start of return type */ p = strrchr(str, ')'); if (!p) return NULL; diff --git a/tools/perf/util/dso.h b/tools/perf/util/dso.h index cd2fe64a3c5d..52e7101c5609 100644 --- a/tools/perf/util/dso.h +++ b/tools/perf/util/dso.h @@ -216,7 +216,7 @@ struct dso { /* dso__for_each_symbol - iterate over the symbols of given type * - * @dso: the 'struct dso *' in which symbols itereated + * @dso: the 'struct dso *' in which symbols are iterated * @pos: the 'struct symbol *' to use as a loop cursor * @n: the 'struct rb_node *' to use as a temporary storage */ diff --git a/tools/perf/util/dwarf-aux.c b/tools/perf/util/dwarf-aux.c index 7b2d471a6419..b2f4920e19a6 100644 --- a/tools/perf/util/dwarf-aux.c +++ b/tools/perf/util/dwarf-aux.c @@ -91,7 +91,7 @@ static Dwarf_Line *cu_getsrc_die(Dwarf_Die *cu_die, Dwarf_Addr addr) return NULL; } while (laddr == addr); l++; - /* Going foward to find the statement line */ + /* Going forward to find the statement line */ do { line = dwarf_onesrcline(lines, l++); if (!line || dwarf_lineaddr(line, &laddr) != 0 || @@ -177,7 +177,7 @@ int cu_walk_functions_at(Dwarf_Die *cu_die, Dwarf_Addr addr, * die_get_linkage_name - Get the linkage name of the object * @dw_die: A DIE of the object * - * Get the linkage name attiribute of given @dw_die. + * Get the linkage name attribute of given @dw_die. * For C++ binary, the linkage name will be the mangled symbol. */ const char *die_get_linkage_name(Dwarf_Die *dw_die) @@ -739,7 +739,7 @@ static int __die_walk_instances_cb(Dwarf_Die *inst, void *data) * @data: user data * * Walk on the instances of give @in_die. @in_die must be an inlined function - * declartion. This returns the return value of @callback if it returns + * declaration. This returns the return value of @callback if it returns * non-zero value, or -ENOENT if there is no instance. */ int die_walk_instances(Dwarf_Die *or_die, int (*callback)(Dwarf_Die *, void *), diff --git a/tools/perf/util/dwarf-aux.h b/tools/perf/util/dwarf-aux.h index 506006e0cf66..cb99646843a9 100644 --- a/tools/perf/util/dwarf-aux.h +++ b/tools/perf/util/dwarf-aux.h @@ -22,7 +22,7 @@ const char *cu_get_comp_dir(Dwarf_Die *cu_die); int cu_find_lineinfo(Dwarf_Die *cudie, unsigned long addr, const char **fname, int *lineno); -/* Walk on funcitons at given address */ +/* Walk on functions at given address */ int cu_walk_functions_at(Dwarf_Die *cu_die, Dwarf_Addr addr, int (*callback)(Dwarf_Die *, void *), void *data); diff --git a/tools/perf/util/events_stats.h b/tools/perf/util/events_stats.h index 859cb34fcff2..631a4af2ed86 100644 --- a/tools/perf/util/events_stats.h +++ b/tools/perf/util/events_stats.h @@ -21,7 +21,7 @@ * all struct perf_record_lost_samples.lost fields reported. * * The total_period is needed because by default auto-freq is used, so - * multipling nr_events[PERF_EVENT_SAMPLE] by a frequency isn't possible to get + * multiplying nr_events[PERF_EVENT_SAMPLE] by a frequency isn't possible to get * the total number of low level events, it is necessary to to sum all struct * perf_record_sample.period and stash the result in total_period. */ diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index 435bbfd00551..f1c79ecf8107 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -1210,7 +1210,7 @@ bool evlist__valid_read_format(struct evlist *evlist) } } - /* PERF_SAMPLE_READ imples PERF_FORMAT_ID. */ + /* PERF_SAMPLE_READ implies PERF_FORMAT_ID. */ if ((sample_type & PERF_SAMPLE_READ) && !(read_format & PERF_FORMAT_ID)) { return false; diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 7ecbc8e2fbfa..2d2614eeaa20 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -621,7 +621,7 @@ const char *evsel__hw_cache_result[PERF_COUNT_HW_CACHE_RESULT_MAX][EVSEL__MAX_AL #define COP(x) (1 << x) /* - * cache operartion stat + * cache operation stat * L1I : Read and prefetch only * ITLB and BPU : Read-only */ @@ -2275,7 +2275,7 @@ int evsel__parse_sample(struct evsel *evsel, union perf_event *event, /* * Undo swap of u64, then swap on individual u32s, * get the size of the raw area and undo all of the - * swap. The pevent interface handles endianity by + * swap. The pevent interface handles endianness by * itself. */ if (swapped) { diff --git a/tools/perf/util/expr.h b/tools/perf/util/expr.h index dcf8d19b83c8..85df3e4771e4 100644 --- a/tools/perf/util/expr.h +++ b/tools/perf/util/expr.h @@ -3,7 +3,7 @@ #define PARSE_CTX_H 1 // There are fixes that need to land upstream before we can use libbpf's headers, -// for now use our copy uncoditionally, since the data structures at this point +// for now use our copy unconditionally, since the data structures at this point // are exactly the same, no problem. //#ifdef HAVE_LIBBPF_SUPPORT //#include diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index 20effdff76ce..aa1e42518d37 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -127,7 +127,7 @@ static int __do_write_buf(struct feat_fd *ff, const void *buf, size_t size) return 0; } -/* Return: 0 if succeded, -ERR if failed. */ +/* Return: 0 if succeeded, -ERR if failed. */ int do_write(struct feat_fd *ff, const void *buf, size_t size) { if (!ff->buf) @@ -135,7 +135,7 @@ int do_write(struct feat_fd *ff, const void *buf, size_t size) return __do_write_buf(ff, buf, size); } -/* Return: 0 if succeded, -ERR if failed. */ +/* Return: 0 if succeeded, -ERR if failed. */ static int do_write_bitmap(struct feat_fd *ff, unsigned long *set, u64 size) { u64 *p = (u64 *) set; @@ -154,7 +154,7 @@ static int do_write_bitmap(struct feat_fd *ff, unsigned long *set, u64 size) return 0; } -/* Return: 0 if succeded, -ERR if failed. */ +/* Return: 0 if succeeded, -ERR if failed. */ int write_padded(struct feat_fd *ff, const void *bf, size_t count, size_t count_aligned) { @@ -170,7 +170,7 @@ int write_padded(struct feat_fd *ff, const void *bf, #define string_size(str) \ (PERF_ALIGN((strlen(str) + 1), NAME_ALIGN) + sizeof(u32)) -/* Return: 0 if succeded, -ERR if failed. */ +/* Return: 0 if succeeded, -ERR if failed. */ static int do_write_string(struct feat_fd *ff, const char *str) { u32 len, olen; @@ -266,7 +266,7 @@ static char *do_read_string(struct feat_fd *ff) return NULL; } -/* Return: 0 if succeded, -ERR if failed. */ +/* Return: 0 if succeeded, -ERR if failed. */ static int do_read_bitmap(struct feat_fd *ff, unsigned long **pset, u64 *psize) { unsigned long *set; @@ -2874,7 +2874,7 @@ static int process_bpf_prog_info(struct feat_fd *ff, void *data __maybe_unused) int err = -1; if (ff->ph->needs_swap) { - pr_warning("interpreting bpf_prog_info from systems with endianity is not yet supported\n"); + pr_warning("interpreting bpf_prog_info from systems with endianness is not yet supported\n"); return 0; } @@ -2942,7 +2942,7 @@ static int process_bpf_btf(struct feat_fd *ff, void *data __maybe_unused) int err = -1; if (ff->ph->needs_swap) { - pr_warning("interpreting btf from systems with endianity is not yet supported\n"); + pr_warning("interpreting btf from systems with endianness is not yet supported\n"); return 0; } @@ -3481,11 +3481,11 @@ static const size_t attr_pipe_abi_sizes[] = { }; /* - * In the legacy pipe format, there is an implicit assumption that endiannesss + * In the legacy pipe format, there is an implicit assumption that endianness * between host recording the samples, and host parsing the samples is the * same. This is not always the case given that the pipe output may always be * redirected into a file and analyzed on a different machine with possibly a - * different endianness and perf_event ABI revsions in the perf tool itself. + * different endianness and perf_event ABI revisions in the perf tool itself. */ static int try_all_pipe_abis(uint64_t hdr_sz, struct perf_header *ph) { diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c index f6e28ac231b7..8658d42ce57a 100644 --- a/tools/perf/util/intel-pt.c +++ b/tools/perf/util/intel-pt.c @@ -3569,7 +3569,7 @@ int intel_pt_process_auxtrace_info(union perf_event *event, /* * Since this thread will not be kept in any rbtree not in a * list, initialize its list node so that at thread__put() the - * current thread lifetime assuption is kept and we don't segfault + * current thread lifetime assumption is kept and we don't segfault * at list_del_init(). */ INIT_LIST_HEAD(&pt->unknown_thread->node); diff --git a/tools/perf/util/levenshtein.c b/tools/perf/util/levenshtein.c index a217ecf0359d..6a6712635aa4 100644 --- a/tools/perf/util/levenshtein.c +++ b/tools/perf/util/levenshtein.c @@ -30,7 +30,7 @@ * * It does so by calculating the costs of the path ending in characters * i (in string1) and j (in string2), respectively, given that the last - * operation is a substition, a swap, a deletion, or an insertion. + * operation is a substitution, a swap, a deletion, or an insertion. * * This implementation allows the costs to be weighted: * diff --git a/tools/perf/util/libunwind/arm64.c b/tools/perf/util/libunwind/arm64.c index 6b4e5a0892f8..c397be0c2e32 100644 --- a/tools/perf/util/libunwind/arm64.c +++ b/tools/perf/util/libunwind/arm64.c @@ -4,7 +4,7 @@ * generic one. * * The function 'LIBUNWIND__ARCH_REG_ID' name is set according to arch - * name and the defination of this function is included directly from + * name and the definition of this function is included directly from * 'arch/arm64/util/unwind-libunwind.c', to make sure that this function * is defined no matter what arch the host is. * diff --git a/tools/perf/util/libunwind/x86_32.c b/tools/perf/util/libunwind/x86_32.c index 21c216c40a3b..b2b92d030aef 100644 --- a/tools/perf/util/libunwind/x86_32.c +++ b/tools/perf/util/libunwind/x86_32.c @@ -4,7 +4,7 @@ * generic one. * * The function 'LIBUNWIND__ARCH_REG_ID' name is set according to arch - * name and the defination of this function is included directly from + * name and the definition of this function is included directly from * 'arch/x86/util/unwind-libunwind.c', to make sure that this function * is defined no matter what arch the host is. * diff --git a/tools/perf/util/llvm-utils.c b/tools/perf/util/llvm-utils.c index dbdffb6673fe..3ceaf7ef3301 100644 --- a/tools/perf/util/llvm-utils.c +++ b/tools/perf/util/llvm-utils.c @@ -471,7 +471,7 @@ int llvm__compile_bpf(const char *path, void **p_obj_buf, /* * This is an optional work. Even it fail we can continue our - * work. Needn't to check error return. + * work. Needn't check error return. */ llvm__get_kbuild_opts(&kbuild_dir, &kbuild_include_opts); diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index 435771eed62b..3ff4936a15a4 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -905,7 +905,7 @@ static struct map *machine__addnew_module_map(struct machine *machine, u64 start maps__insert(&machine->kmaps, map); - /* Put the map here because maps__insert alread got it */ + /* Put the map here because maps__insert already got it */ map__put(map); out: /* put the dso here, corresponding to machine__findnew_module_dso */ @@ -1952,7 +1952,7 @@ int machine__process_fork_event(struct machine *machine, union perf_event *event * maps because that is what the kernel just did. * * But when synthesizing, this should not be done. If we do, we end up - * with overlapping maps as we process the sythesized MMAP2 events that + * with overlapping maps as we process the synthesized MMAP2 events that * get delivered shortly thereafter. * * Use the FORK event misc flags in an internal way to signal this @@ -2518,7 +2518,7 @@ static bool has_stitched_lbr(struct thread *thread, /* * Check if there are identical LBRs between two samples. - * Identicall LBRs must have same from, to and flags values. Also, + * Identical LBRs must have same from, to and flags values. Also, * they have to be saved in the same LBR registers (same physical * index). * @@ -2588,7 +2588,7 @@ err: } /* - * Recolve LBR callstack chain sample + * Resolve LBR callstack chain sample * Return: * 1 on success get LBR callchain information * 0 no available LBR callchain information, should try fp diff --git a/tools/perf/util/map.h b/tools/perf/util/map.h index 9f32825c98d8..d32f5b28c1fb 100644 --- a/tools/perf/util/map.h +++ b/tools/perf/util/map.h @@ -75,7 +75,7 @@ struct thread; /* map__for_each_symbol - iterate over the symbols in the given map * - * @map: the 'struct map *' in which symbols itereated + * @map: the 'struct map *' in which symbols are iterated * @pos: the 'struct symbol *' to use as a loop cursor * @n: the 'struct rb_node *' to use as a temporary storage * Note: caller must ensure map->dso is not NULL (map is loaded). @@ -86,7 +86,7 @@ struct thread; /* map__for_each_symbol_with_name - iterate over the symbols in the given map * that have the given name * - * @map: the 'struct map *' in which symbols itereated + * @map: the 'struct map *' in which symbols are iterated * @sym_name: the symbol name * @pos: the 'struct symbol *' to use as a loop cursor */ diff --git a/tools/perf/util/mem-events.h b/tools/perf/util/mem-events.h index 755cef7e0625..63dd383b6ce2 100644 --- a/tools/perf/util/mem-events.h +++ b/tools/perf/util/mem-events.h @@ -81,7 +81,7 @@ struct c2c_stats { u32 rmt_dram; /* count of loads miss to remote DRAM */ u32 blk_data; /* count of loads blocked by data */ u32 blk_addr; /* count of loads blocked by address conflict */ - u32 nomap; /* count of load/stores with no phys adrs */ + u32 nomap; /* count of load/stores with no phys addrs */ u32 noparse; /* count of unparsable data sources */ }; diff --git a/tools/perf/util/metricgroup.c b/tools/perf/util/metricgroup.c index 26c990e32378..6acb44ad439b 100644 --- a/tools/perf/util/metricgroup.c +++ b/tools/perf/util/metricgroup.c @@ -181,7 +181,7 @@ static bool evsel_same_pmu(struct evsel *ev1, struct evsel *ev2) * @pctx: the parse context for the metric expression. * @metric_no_merge: don't attempt to share events for the metric with other * metrics. - * @has_constraint: is there a contraint on the group of events? In which case + * @has_constraint: is there a constraint on the group of events? In which case * the events won't be grouped. * @metric_events: out argument, null terminated array of evsel's associated * with the metric. diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 42c84adeb2fb..9ecb45bea948 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -843,9 +843,9 @@ split_bpf_config_terms(struct list_head *evt_head_config, struct parse_events_term *term, *temp; /* - * Currectly, all possible user config term + * Currently, all possible user config term * belong to bpf object. parse_events__is_hardcoded_term() - * happends to be a good flag. + * happens to be a good flag. * * See parse_events_config_bpf() and * config_term_tracepoint(). @@ -895,7 +895,7 @@ int parse_events_load_bpf(struct parse_events_state *parse_state, /* * Caller doesn't know anything about obj_head_config, - * so combine them together again before returnning. + * so combine them together again before returning. */ if (head_config) list_splice_tail(&obj_head_config, head_config); @@ -1182,10 +1182,10 @@ do { \ } /* - * Check term availbility after basic checking so + * Check term availability after basic checking so * PARSE_EVENTS__TERM_TYPE_USER can be found and filtered. * - * If check availbility at the entry of this function, + * If check availability at the entry of this function, * user will see "'' is not usable in 'perf stat'" * if an invalid config term is provided for legacy events * (for example, instructions/badterm/...), which is confusing. diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index 44ef28302fc7..88da5cf6aee8 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -1069,7 +1069,7 @@ int perf_pmu__format_type(struct list_head *formats, const char *name) /* * Sets value based on the format definition (format parameter) - * and unformated value (value parameter). + * and unformatted value (value parameter). */ static void pmu_format_value(unsigned long *format, __u64 value, __u64 *v, bool zero) @@ -1408,7 +1408,7 @@ int perf_pmu__check_alias(struct perf_pmu *pmu, struct list_head *head_terms, } /* - * if no unit or scale foundin aliases, then + * if no unit or scale found in aliases, then * set defaults as for evsel * unit cannot left to NULL */ diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c index a9cff3a50ddf..a78c8d59a555 100644 --- a/tools/perf/util/probe-event.c +++ b/tools/perf/util/probe-event.c @@ -3228,7 +3228,7 @@ errout: return err; } -/* Concatinate two arrays */ +/* Concatenate two arrays */ static void *memcat(void *a, size_t sz_a, void *b, size_t sz_b) { void *ret; @@ -3258,7 +3258,7 @@ concat_probe_trace_events(struct probe_trace_event **tevs, int *ntevs, if (*ntevs + ntevs2 > probe_conf.max_probes) ret = -E2BIG; else { - /* Concatinate the array of probe_trace_event */ + /* Concatenate the array of probe_trace_event */ new_tevs = memcat(*tevs, (*ntevs) * sizeof(**tevs), *tevs2, ntevs2 * sizeof(**tevs2)); if (!new_tevs) diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c index 1b118c9c86a6..866f2d514d72 100644 --- a/tools/perf/util/probe-finder.c +++ b/tools/perf/util/probe-finder.c @@ -164,7 +164,7 @@ static struct probe_trace_arg_ref *alloc_trace_arg_ref(long offs) /* * Convert a location into trace_arg. * If tvar == NULL, this just checks variable can be converted. - * If fentry == true and vr_die is a parameter, do huristic search + * If fentry == true and vr_die is a parameter, do heuristic search * for the location fuzzed by function entry mcount. */ static int convert_variable_location(Dwarf_Die *vr_die, Dwarf_Addr addr, @@ -498,7 +498,7 @@ static int convert_variable_fields(Dwarf_Die *vr_die, const char *varname, " nor array.\n", varname); return -EINVAL; } - /* While prcessing unnamed field, we don't care about this */ + /* While processing unnamed field, we don't care about this */ if (field->ref && dwarf_diename(vr_die)) { pr_err("Semantic error: %s must be referred by '.'\n", field->name); @@ -1832,7 +1832,7 @@ static int line_range_walk_cb(const char *fname, int lineno, (lf->lno_s > lineno || lf->lno_e < lineno)) return 0; - /* Make sure this line can be reversable */ + /* Make sure this line can be reversible */ if (cu_find_lineinfo(&lf->cu_die, addr, &__fname, &__lineno) > 0 && (lineno != __lineno || strcmp(fname, __fname))) return 0; diff --git a/tools/perf/util/s390-cpumsf.c b/tools/perf/util/s390-cpumsf.c index 078a71773565..8130b56aa04b 100644 --- a/tools/perf/util/s390-cpumsf.c +++ b/tools/perf/util/s390-cpumsf.c @@ -45,7 +45,7 @@ * the data portion is mmap()'ed. * * To sort the queues in chronological order, all queue access is controlled - * by the auxtrace_heap. This is basicly a stack, each stack element has two + * by the auxtrace_heap. This is basically a stack, each stack element has two * entries, the queue number and a time stamp. However the stack is sorted by * the time stamps. The highest time stamp is at the bottom the lowest * (nearest) time stamp is at the top. That sort order is maintained at all @@ -65,11 +65,11 @@ * stamp of the last processed entry of the auxtrace_buffer replaces the * current auxtrace_heap top. * - * 3. Auxtrace_queues might run of out data and are feeded by the + * 3. Auxtrace_queues might run of out data and are fed by the * PERF_RECORD_AUXTRACE handling, see s390_cpumsf_process_auxtrace_event(). * * Event Generation - * Each sampling-data entry in the auxilary trace data generates a perf sample. + * Each sampling-data entry in the auxiliary trace data generates a perf sample. * This sample is filled * with data from the auxtrace such as PID/TID, instruction address, CPU state, * etc. This sample is processed with perf_session__deliver_synth_event() to @@ -575,7 +575,7 @@ static unsigned long long get_trailer_time(const unsigned char *buf) * pointer to the queue, the second parameter is the time stamp. This * is the time stamp: * - of the event that triggered this processing. - * - or the time stamp when the last proccesing of this queue stopped. + * - or the time stamp when the last processing of this queue stopped. * In this case it stopped at a 4KB page boundary and record the * position on where to continue processing on the next invocation * (see buffer->use_data and buffer->use_size). @@ -640,7 +640,7 @@ static int s390_cpumsf_samples(struct s390_cpumsf_queue *sfq, u64 *ts) goto out; } - pos += dsdes; /* Skip diagnositic entry */ + pos += dsdes; /* Skip diagnostic entry */ /* Check for trailer entry */ if (!s390_cpumsf_reached_trailer(bsdes + dsdes, pos)) { diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c index c83c2c6564e0..4e4aa4c97ac5 100644 --- a/tools/perf/util/scripting-engines/trace-event-python.c +++ b/tools/perf/util/scripting-engines/trace-event-python.c @@ -1531,7 +1531,7 @@ static void set_table_handlers(struct tables *tables) * Attempt to use the call path root from the call return * processor, if the call return processor is in use. Otherwise, * we allocate a new call path root. This prevents exporting - * duplicate call path ids when both are in use simultaniously. + * duplicate call path ids when both are in use simultaneously. */ if (tables->dbe.crp) tables->dbe.cpr = tables->dbe.crp->cpr; diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index 859832a82496..9a8808507bd9 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -1069,7 +1069,7 @@ static void callchain__lbr_callstack_printf(struct perf_sample *sample) * in "to" register. * For example, there is a call stack * "A"->"B"->"C"->"D". - * The LBR registers will recorde like + * The LBR registers will be recorded like * "C"->"D", "B"->"C", "A"->"B". * So only the first "to" register and all "from" * registers are needed to construct the whole stack. @@ -1584,7 +1584,7 @@ static s64 perf_session__process_user_event(struct perf_session *session, return tool->event_update(tool, event, &session->evlist); case PERF_RECORD_HEADER_EVENT_TYPE: /* - * Depreceated, but we need to handle it for sake + * Deprecated, but we need to handle it for sake * of old data files create in pipe mode. */ return 0; diff --git a/tools/perf/util/strbuf.h b/tools/perf/util/strbuf.h index ea94d8628980..be94d7046fa0 100644 --- a/tools/perf/util/strbuf.h +++ b/tools/perf/util/strbuf.h @@ -12,7 +12,7 @@ * build complex strings/buffers whose final size isn't easily known. * * It is NOT legal to copy the ->buf pointer away. - * `strbuf_detach' is the operation that detachs a buffer from its shell + * `strbuf_detach' is the operation that detaches a buffer from its shell * while keeping the shell valid wrt its invariants. * * 2. the ->buf member is a byte array that has at least ->len + 1 bytes diff --git a/tools/perf/util/strfilter.h b/tools/perf/util/strfilter.h index e0c25a40f796..c05aca9ca582 100644 --- a/tools/perf/util/strfilter.h +++ b/tools/perf/util/strfilter.h @@ -8,8 +8,8 @@ /* A node of string filter */ struct strfilter_node { - struct strfilter_node *l; /* Tree left branche (for &,|) */ - struct strfilter_node *r; /* Tree right branche (for !,&,|) */ + struct strfilter_node *l; /* Tree left branch (for &,|) */ + struct strfilter_node *r; /* Tree right branch (for !,&,|) */ const char *p; /* Operator or rule */ }; diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c index 6dff843fd883..4c56aa837434 100644 --- a/tools/perf/util/symbol-elf.c +++ b/tools/perf/util/symbol-elf.c @@ -1058,7 +1058,7 @@ static int dso__process_kernel_symbol(struct dso *dso, struct map *map, curr_dso->symtab_type = dso->symtab_type; maps__insert(kmaps, curr_map); /* - * Add it before we drop the referece to curr_map, i.e. while + * Add it before we drop the reference to curr_map, i.e. while * we still are sure to have a reference to this DSO via * *curr_map->dso. */ diff --git a/tools/perf/util/synthetic-events.c b/tools/perf/util/synthetic-events.c index b698046ec2db..49c9353ff9fc 100644 --- a/tools/perf/util/synthetic-events.c +++ b/tools/perf/util/synthetic-events.c @@ -1210,7 +1210,7 @@ static size_t mask_size(struct perf_cpu_map *map, int *max) *max = 0; for (i = 0; i < map->nr; i++) { - /* bit possition of the cpu is + 1 */ + /* bit position of the cpu is + 1 */ int bit = map->map[i] + 1; if (bit > *max) @@ -1236,7 +1236,7 @@ void *cpu_map_data__alloc(struct perf_cpu_map *map, size_t *size, u16 *type, int * mask = size of 'struct perf_record_record_cpu_map' + * maximum cpu bit converted to size of longs * - * and finaly + the size of 'struct perf_record_cpu_map_data'. + * and finally + the size of 'struct perf_record_cpu_map_data'. */ size_cpus = cpus_size(map); size_mask = mask_size(map, max); diff --git a/tools/perf/util/unwind-libunwind-local.c b/tools/perf/util/unwind-libunwind-local.c index 9aededc0bc06..71a353349181 100644 --- a/tools/perf/util/unwind-libunwind-local.c +++ b/tools/perf/util/unwind-libunwind-local.c @@ -82,7 +82,7 @@ UNW_OBJ(dwarf_find_debug_frame) (int found, unw_dyn_info_t *di_debug, #define DW_EH_PE_funcrel 0x40 /* start-of-procedure-relative */ #define DW_EH_PE_aligned 0x50 /* aligned pointer */ -/* Flags intentionaly not handled, since they're not needed: +/* Flags intentionally not handled, since they're not needed: * #define DW_EH_PE_indirect 0x80 * #define DW_EH_PE_uleb128 0x01 * #define DW_EH_PE_udata2 0x02 -- cgit v1.2.3 From 7fac83aaf2eecc9e7e7b72da694c49bb4ce7fdfc Mon Sep 17 00:00:00 2001 From: Song Liu Date: Tue, 16 Mar 2021 14:18:35 -0700 Subject: perf stat: Introduce 'bperf' to share hardware PMCs with BPF The perf tool uses performance monitoring counters (PMCs) to monitor system performance. The PMCs are limited hardware resources. For example, Intel CPUs have 3x fixed PMCs and 4x programmable PMCs per cpu. Modern data center systems use these PMCs in many different ways: system level monitoring, (maybe nested) container level monitoring, per process monitoring, profiling (in sample mode), etc. In some cases, there are more active perf_events than available hardware PMCs. To allow all perf_events to have a chance to run, it is necessary to do expensive time multiplexing of events. On the other hand, many monitoring tools count the common metrics (cycles, instructions). It is a waste to have multiple tools create multiple perf_events of "cycles" and occupy multiple PMCs. bperf tries to reduce such wastes by allowing multiple perf_events of "cycles" or "instructions" (at different scopes) to share PMUs. Instead of having each perf-stat session to read its own perf_events, bperf uses BPF programs to read the perf_events and aggregate readings to BPF maps. Then, the perf-stat session(s) reads the values from these BPF maps. Please refer to the comment before the definition of bperf_ops for the description of bperf architecture. bperf is off by default. To enable it, pass --bpf-counters option to perf-stat. bperf uses a BPF hashmap to share information about BPF programs and maps used by bperf. This map is pinned to bpffs. The default path is /sys/fs/bpf/perf_attr_map. The user could change the path with option --bpf-attr-map. Committer testing: # dmesg|grep "Performance Events" -A5 [ 0.225277] Performance Events: Fam17h+ core perfctr, AMD PMU driver. [ 0.225280] ... version: 0 [ 0.225280] ... bit width: 48 [ 0.225281] ... generic registers: 6 [ 0.225281] ... value mask: 0000ffffffffffff [ 0.225281] ... max period: 00007fffffffffff # # for a in $(seq 6) ; do perf stat -a -e cycles,instructions sleep 100000 & done [1] 2436231 [2] 2436232 [3] 2436233 [4] 2436234 [5] 2436235 [6] 2436236 # perf stat -a -e cycles,instructions sleep 0.1 Performance counter stats for 'system wide': 310,326,987 cycles (41.87%) 236,143,290 instructions # 0.76 insn per cycle (41.87%) 0.100800885 seconds time elapsed # We can see that the counters were enabled for this workload 41.87% of the time. Now with --bpf-counters: # for a in $(seq 32) ; do perf stat --bpf-counters -a -e cycles,instructions sleep 100000 & done [1] 2436514 [2] 2436515 [3] 2436516 [4] 2436517 [5] 2436518 [6] 2436519 [7] 2436520 [8] 2436521 [9] 2436522 [10] 2436523 [11] 2436524 [12] 2436525 [13] 2436526 [14] 2436527 [15] 2436528 [16] 2436529 [17] 2436530 [18] 2436531 [19] 2436532 [20] 2436533 [21] 2436534 [22] 2436535 [23] 2436536 [24] 2436537 [25] 2436538 [26] 2436539 [27] 2436540 [28] 2436541 [29] 2436542 [30] 2436543 [31] 2436544 [32] 2436545 # # ls -la /sys/fs/bpf/perf_attr_map -rw-------. 1 root root 0 Mar 23 14:53 /sys/fs/bpf/perf_attr_map # bpftool map | grep bperf | wc -l 64 # # bpftool map | tail 1265: percpu_array name accum_readings flags 0x0 key 4B value 24B max_entries 1 memlock 4096B 1266: hash name filter flags 0x0 key 4B value 4B max_entries 1 memlock 4096B 1267: array name bperf_fo.bss flags 0x400 key 4B value 8B max_entries 1 memlock 4096B btf_id 996 pids perf(2436545) 1268: percpu_array name accum_readings flags 0x0 key 4B value 24B max_entries 1 memlock 4096B 1269: hash name filter flags 0x0 key 4B value 4B max_entries 1 memlock 4096B 1270: array name bperf_fo.bss flags 0x400 key 4B value 8B max_entries 1 memlock 4096B btf_id 997 pids perf(2436541) 1285: array name pid_iter.rodata flags 0x480 key 4B value 4B max_entries 1 memlock 4096B btf_id 1017 frozen pids bpftool(2437504) 1286: array flags 0x0 key 4B value 32B max_entries 1 memlock 4096B # # bpftool map dump id 1268 | tail value (CPU 21): 8f f3 bc ca 00 00 00 00 80 fd 2a d1 4d 00 00 00 80 fd 2a d1 4d 00 00 00 value (CPU 22): 7e d5 64 4d 00 00 00 00 a4 8a 2e ee 4d 00 00 00 a4 8a 2e ee 4d 00 00 00 value (CPU 23): a7 78 3e 06 01 00 00 00 b2 34 94 f6 4d 00 00 00 b2 34 94 f6 4d 00 00 00 Found 1 element # bpftool map dump id 1268 | tail value (CPU 21): c6 8b d9 ca 00 00 00 00 20 c6 fc 83 4e 00 00 00 20 c6 fc 83 4e 00 00 00 value (CPU 22): 9c b4 d2 4d 00 00 00 00 3e 0c df 89 4e 00 00 00 3e 0c df 89 4e 00 00 00 value (CPU 23): 18 43 66 06 01 00 00 00 5b 69 ed 83 4e 00 00 00 5b 69 ed 83 4e 00 00 00 Found 1 element # bpftool map dump id 1268 | tail value (CPU 21): f2 6e db ca 00 00 00 00 92 67 4c ba 4e 00 00 00 92 67 4c ba 4e 00 00 00 value (CPU 22): dc 8e e1 4d 00 00 00 00 d9 32 7a c5 4e 00 00 00 d9 32 7a c5 4e 00 00 00 value (CPU 23): bd 2b 73 06 01 00 00 00 7c 73 87 bf 4e 00 00 00 7c 73 87 bf 4e 00 00 00 Found 1 element # # perf stat --bpf-counters -a -e cycles,instructions sleep 0.1 Performance counter stats for 'system wide': 119,410,122 cycles 152,105,479 instructions # 1.27 insn per cycle 0.101395093 seconds time elapsed # See? We had the counters enabled all the time. Signed-off-by: Song Liu Reviewed-by: Jiri Olsa Acked-by: Namhyung Kim Tested-by: Arnaldo Carvalho de Melo Cc: kernel-team@fb.com Link: http://lore.kernel.org/lkml/20210316211837.910506-2-songliubraving@fb.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-stat.txt | 11 + tools/perf/Makefile.perf | 1 + tools/perf/builtin-stat.c | 10 + tools/perf/util/bpf_counter.c | 519 +++++++++++++++++++++++++- tools/perf/util/bpf_skel/bperf.h | 14 + tools/perf/util/bpf_skel/bperf_follower.bpf.c | 69 ++++ tools/perf/util/bpf_skel/bperf_leader.bpf.c | 46 +++ tools/perf/util/bpf_skel/bperf_u.h | 14 + tools/perf/util/evsel.h | 20 +- tools/perf/util/target.h | 4 +- 10 files changed, 701 insertions(+), 7 deletions(-) create mode 100644 tools/perf/util/bpf_skel/bperf.h create mode 100644 tools/perf/util/bpf_skel/bperf_follower.bpf.c create mode 100644 tools/perf/util/bpf_skel/bperf_leader.bpf.c create mode 100644 tools/perf/util/bpf_skel/bperf_u.h diff --git a/tools/perf/Documentation/perf-stat.txt b/tools/perf/Documentation/perf-stat.txt index 3055aad38d46..744211fa8c18 100644 --- a/tools/perf/Documentation/perf-stat.txt +++ b/tools/perf/Documentation/perf-stat.txt @@ -93,6 +93,17 @@ report:: 1.102235068 seconds time elapsed +--bpf-counters:: + Use BPF programs to aggregate readings from perf_events. This + allows multiple perf-stat sessions that are counting the same metric (cycles, + instructions, etc.) to share hardware counters. + +--bpf-attr-map:: + With option "--bpf-counters", different perf-stat sessions share + information about shared BPF programs and maps via a pinned hashmap. + Use "--bpf-attr-map" to specify the path of this pinned hashmap. + The default path is /sys/fs/bpf/perf_attr_map. + ifdef::HAVE_LIBPFM[] --pfm-events events:: Select a PMU event using libpfm4 syntax (see http://perfmon2.sf.net) diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index a7d768fdc8a1..090fb9d62665 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -1007,6 +1007,7 @@ python-clean: SKEL_OUT := $(abspath $(OUTPUT)util/bpf_skel) SKEL_TMP_OUT := $(abspath $(SKEL_OUT)/.tmp) SKELETONS := $(SKEL_OUT)/bpf_prog_profiler.skel.h +SKELETONS += $(SKEL_OUT)/bperf_leader.skel.h $(SKEL_OUT)/bperf_follower.skel.h ifdef BUILD_BPF_SKEL BPFTOOL := $(SKEL_TMP_OUT)/bootstrap/bpftool diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index d140ffbb5b34..d34e22dce2c8 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -792,6 +792,12 @@ static int __run_perf_stat(int argc, const char **argv, int run_idx) } evlist__for_each_cpu (evsel_list, i, cpu) { + /* + * bperf calls evsel__open_per_cpu() in bperf__load(), so + * no need to call it again here. + */ + if (target.use_bpf) + break; affinity__set(&affinity, cpu); evlist__for_each_entry(evsel_list, counter) { @@ -1146,6 +1152,10 @@ static struct option stat_options[] = { #ifdef HAVE_BPF_SKEL OPT_STRING('b', "bpf-prog", &target.bpf_str, "bpf-prog-id", "stat events on existing bpf program id"), + OPT_BOOLEAN(0, "bpf-counters", &target.use_bpf, + "use bpf program to count events"), + OPT_STRING(0, "bpf-attr-map", &target.attr_map, "attr-map-path", + "path to perf_event_attr map"), #endif OPT_BOOLEAN('a', "all-cpus", &target.system_wide, "system-wide collection from all CPUs"), diff --git a/tools/perf/util/bpf_counter.c b/tools/perf/util/bpf_counter.c index 04f89120b323..81d1df3c4ec0 100644 --- a/tools/perf/util/bpf_counter.c +++ b/tools/perf/util/bpf_counter.c @@ -5,6 +5,7 @@ #include #include #include +#include #include #include #include @@ -12,14 +13,45 @@ #include #include #include +#include #include "bpf_counter.h" #include "counts.h" #include "debug.h" #include "evsel.h" +#include "evlist.h" #include "target.h" +#include "cpumap.h" +#include "thread_map.h" #include "bpf_skel/bpf_prog_profiler.skel.h" +#include "bpf_skel/bperf_u.h" +#include "bpf_skel/bperf_leader.skel.h" +#include "bpf_skel/bperf_follower.skel.h" + +/* + * bperf uses a hashmap, the attr_map, to track all the leader programs. + * The hashmap is pinned in bpffs. flock() on this file is used to ensure + * no concurrent access to the attr_map. The key of attr_map is struct + * perf_event_attr, and the value is struct perf_event_attr_map_entry. + * + * struct perf_event_attr_map_entry contains two __u32 IDs, bpf_link of the + * leader prog, and the diff_map. Each perf-stat session holds a reference + * to the bpf_link to make sure the leader prog is attached to sched_switch + * tracepoint. + * + * Since the hashmap only contains IDs of the bpf_link and diff_map, it + * does not hold any references to the leader program. Once all perf-stat + * sessions of these events exit, the leader prog, its maps, and the + * perf_events will be freed. + */ +struct perf_event_attr_map_entry { + __u32 link_id; + __u32 diff_map_id; +}; + +#define DEFAULT_ATTR_MAP_PATH "fs/bpf/perf_attr_map" +#define ATTR_MAP_SIZE 16 static inline void *u64_to_ptr(__u64 ptr) { @@ -274,17 +306,494 @@ struct bpf_counter_ops bpf_program_profiler_ops = { .install_pe = bpf_program_profiler__install_pe, }; +static __u32 bpf_link_get_id(int fd) +{ + struct bpf_link_info link_info = {0}; + __u32 link_info_len = sizeof(link_info); + + bpf_obj_get_info_by_fd(fd, &link_info, &link_info_len); + return link_info.id; +} + +static __u32 bpf_link_get_prog_id(int fd) +{ + struct bpf_link_info link_info = {0}; + __u32 link_info_len = sizeof(link_info); + + bpf_obj_get_info_by_fd(fd, &link_info, &link_info_len); + return link_info.prog_id; +} + +static __u32 bpf_map_get_id(int fd) +{ + struct bpf_map_info map_info = {0}; + __u32 map_info_len = sizeof(map_info); + + bpf_obj_get_info_by_fd(fd, &map_info, &map_info_len); + return map_info.id; +} + +static int bperf_lock_attr_map(struct target *target) +{ + char path[PATH_MAX]; + int map_fd, err; + + if (target->attr_map) { + scnprintf(path, PATH_MAX, "%s", target->attr_map); + } else { + scnprintf(path, PATH_MAX, "%s/%s", sysfs__mountpoint(), + DEFAULT_ATTR_MAP_PATH); + } + + if (access(path, F_OK)) { + map_fd = bpf_create_map(BPF_MAP_TYPE_HASH, + sizeof(struct perf_event_attr), + sizeof(struct perf_event_attr_map_entry), + ATTR_MAP_SIZE, 0); + if (map_fd < 0) + return -1; + + err = bpf_obj_pin(map_fd, path); + if (err) { + /* someone pinned the map in parallel? */ + close(map_fd); + map_fd = bpf_obj_get(path); + if (map_fd < 0) + return -1; + } + } else { + map_fd = bpf_obj_get(path); + if (map_fd < 0) + return -1; + } + + err = flock(map_fd, LOCK_EX); + if (err) { + close(map_fd); + return -1; + } + return map_fd; +} + +/* trigger the leader program on a cpu */ +static int bperf_trigger_reading(int prog_fd, int cpu) +{ + DECLARE_LIBBPF_OPTS(bpf_test_run_opts, opts, + .ctx_in = NULL, + .ctx_size_in = 0, + .flags = BPF_F_TEST_RUN_ON_CPU, + .cpu = cpu, + .retval = 0, + ); + + return bpf_prog_test_run_opts(prog_fd, &opts); +} + +static int bperf_check_target(struct evsel *evsel, + struct target *target, + enum bperf_filter_type *filter_type, + __u32 *filter_entry_cnt) +{ + if (evsel->leader->core.nr_members > 1) { + pr_err("bpf managed perf events do not yet support groups.\n"); + return -1; + } + + /* determine filter type based on target */ + if (target->system_wide) { + *filter_type = BPERF_FILTER_GLOBAL; + *filter_entry_cnt = 1; + } else if (target->cpu_list) { + *filter_type = BPERF_FILTER_CPU; + *filter_entry_cnt = perf_cpu_map__nr(evsel__cpus(evsel)); + } else if (target->tid) { + *filter_type = BPERF_FILTER_PID; + *filter_entry_cnt = perf_thread_map__nr(evsel->core.threads); + } else if (target->pid || evsel->evlist->workload.pid != -1) { + *filter_type = BPERF_FILTER_TGID; + *filter_entry_cnt = perf_thread_map__nr(evsel->core.threads); + } else { + pr_err("bpf managed perf events do not yet support these targets.\n"); + return -1; + } + + return 0; +} + +static struct perf_cpu_map *all_cpu_map; + +static int bperf_reload_leader_program(struct evsel *evsel, int attr_map_fd, + struct perf_event_attr_map_entry *entry) +{ + struct bperf_leader_bpf *skel = bperf_leader_bpf__open(); + int link_fd, diff_map_fd, err; + struct bpf_link *link = NULL; + + if (!skel) { + pr_err("Failed to open leader skeleton\n"); + return -1; + } + + bpf_map__resize(skel->maps.events, libbpf_num_possible_cpus()); + err = bperf_leader_bpf__load(skel); + if (err) { + pr_err("Failed to load leader skeleton\n"); + goto out; + } + + err = -1; + link = bpf_program__attach(skel->progs.on_switch); + if (!link) { + pr_err("Failed to attach leader program\n"); + goto out; + } + + link_fd = bpf_link__fd(link); + diff_map_fd = bpf_map__fd(skel->maps.diff_readings); + entry->link_id = bpf_link_get_id(link_fd); + entry->diff_map_id = bpf_map_get_id(diff_map_fd); + err = bpf_map_update_elem(attr_map_fd, &evsel->core.attr, entry, BPF_ANY); + assert(err == 0); + + evsel->bperf_leader_link_fd = bpf_link_get_fd_by_id(entry->link_id); + assert(evsel->bperf_leader_link_fd >= 0); + + /* + * save leader_skel for install_pe, which is called within + * following evsel__open_per_cpu call + */ + evsel->leader_skel = skel; + evsel__open_per_cpu(evsel, all_cpu_map, -1); + +out: + bperf_leader_bpf__destroy(skel); + bpf_link__destroy(link); + return err; +} + +static int bperf__load(struct evsel *evsel, struct target *target) +{ + struct perf_event_attr_map_entry entry = {0xffffffff, 0xffffffff}; + int attr_map_fd, diff_map_fd = -1, err; + enum bperf_filter_type filter_type; + __u32 filter_entry_cnt, i; + + if (bperf_check_target(evsel, target, &filter_type, &filter_entry_cnt)) + return -1; + + if (!all_cpu_map) { + all_cpu_map = perf_cpu_map__new(NULL); + if (!all_cpu_map) + return -1; + } + + evsel->bperf_leader_prog_fd = -1; + evsel->bperf_leader_link_fd = -1; + + /* + * Step 1: hold a fd on the leader program and the bpf_link, if + * the program is not already gone, reload the program. + * Use flock() to ensure exclusive access to the perf_event_attr + * map. + */ + attr_map_fd = bperf_lock_attr_map(target); + if (attr_map_fd < 0) { + pr_err("Failed to lock perf_event_attr map\n"); + return -1; + } + + err = bpf_map_lookup_elem(attr_map_fd, &evsel->core.attr, &entry); + if (err) { + err = bpf_map_update_elem(attr_map_fd, &evsel->core.attr, &entry, BPF_ANY); + if (err) + goto out; + } + + evsel->bperf_leader_link_fd = bpf_link_get_fd_by_id(entry.link_id); + if (evsel->bperf_leader_link_fd < 0 && + bperf_reload_leader_program(evsel, attr_map_fd, &entry)) + goto out; + + /* + * The bpf_link holds reference to the leader program, and the + * leader program holds reference to the maps. Therefore, if + * link_id is valid, diff_map_id should also be valid. + */ + evsel->bperf_leader_prog_fd = bpf_prog_get_fd_by_id( + bpf_link_get_prog_id(evsel->bperf_leader_link_fd)); + assert(evsel->bperf_leader_prog_fd >= 0); + + diff_map_fd = bpf_map_get_fd_by_id(entry.diff_map_id); + assert(diff_map_fd >= 0); + + /* + * bperf uses BPF_PROG_TEST_RUN to get accurate reading. Check + * whether the kernel support it + */ + err = bperf_trigger_reading(evsel->bperf_leader_prog_fd, 0); + if (err) { + pr_err("The kernel does not support test_run for raw_tp BPF programs.\n" + "Therefore, --use-bpf might show inaccurate readings\n"); + goto out; + } + + /* Step 2: load the follower skeleton */ + evsel->follower_skel = bperf_follower_bpf__open(); + if (!evsel->follower_skel) { + pr_err("Failed to open follower skeleton\n"); + goto out; + } + + /* attach fexit program to the leader program */ + bpf_program__set_attach_target(evsel->follower_skel->progs.fexit_XXX, + evsel->bperf_leader_prog_fd, "on_switch"); + + /* connect to leader diff_reading map */ + bpf_map__reuse_fd(evsel->follower_skel->maps.diff_readings, diff_map_fd); + + /* set up reading map */ + bpf_map__set_max_entries(evsel->follower_skel->maps.accum_readings, + filter_entry_cnt); + /* set up follower filter based on target */ + bpf_map__set_max_entries(evsel->follower_skel->maps.filter, + filter_entry_cnt); + err = bperf_follower_bpf__load(evsel->follower_skel); + if (err) { + pr_err("Failed to load follower skeleton\n"); + bperf_follower_bpf__destroy(evsel->follower_skel); + evsel->follower_skel = NULL; + goto out; + } + + for (i = 0; i < filter_entry_cnt; i++) { + int filter_map_fd; + __u32 key; + + if (filter_type == BPERF_FILTER_PID || + filter_type == BPERF_FILTER_TGID) + key = evsel->core.threads->map[i].pid; + else if (filter_type == BPERF_FILTER_CPU) + key = evsel->core.cpus->map[i]; + else + break; + + filter_map_fd = bpf_map__fd(evsel->follower_skel->maps.filter); + bpf_map_update_elem(filter_map_fd, &key, &i, BPF_ANY); + } + + evsel->follower_skel->bss->type = filter_type; + + err = bperf_follower_bpf__attach(evsel->follower_skel); + +out: + if (err && evsel->bperf_leader_link_fd >= 0) + close(evsel->bperf_leader_link_fd); + if (err && evsel->bperf_leader_prog_fd >= 0) + close(evsel->bperf_leader_prog_fd); + if (diff_map_fd >= 0) + close(diff_map_fd); + + flock(attr_map_fd, LOCK_UN); + close(attr_map_fd); + + return err; +} + +static int bperf__install_pe(struct evsel *evsel, int cpu, int fd) +{ + struct bperf_leader_bpf *skel = evsel->leader_skel; + + return bpf_map_update_elem(bpf_map__fd(skel->maps.events), + &cpu, &fd, BPF_ANY); +} + +/* + * trigger the leader prog on each cpu, so the accum_reading map could get + * the latest readings. + */ +static int bperf_sync_counters(struct evsel *evsel) +{ + int num_cpu, i, cpu; + + num_cpu = all_cpu_map->nr; + for (i = 0; i < num_cpu; i++) { + cpu = all_cpu_map->map[i]; + bperf_trigger_reading(evsel->bperf_leader_prog_fd, cpu); + } + return 0; +} + +static int bperf__enable(struct evsel *evsel) +{ + evsel->follower_skel->bss->enabled = 1; + return 0; +} + +static int bperf__read(struct evsel *evsel) +{ + struct bperf_follower_bpf *skel = evsel->follower_skel; + __u32 num_cpu_bpf = cpu__max_cpu(); + struct bpf_perf_event_value values[num_cpu_bpf]; + int reading_map_fd, err = 0; + __u32 i, j, num_cpu; + + bperf_sync_counters(evsel); + reading_map_fd = bpf_map__fd(skel->maps.accum_readings); + + for (i = 0; i < bpf_map__max_entries(skel->maps.accum_readings); i++) { + __u32 cpu; + + err = bpf_map_lookup_elem(reading_map_fd, &i, values); + if (err) + goto out; + switch (evsel->follower_skel->bss->type) { + case BPERF_FILTER_GLOBAL: + assert(i == 0); + + num_cpu = all_cpu_map->nr; + for (j = 0; j < num_cpu; j++) { + cpu = all_cpu_map->map[j]; + perf_counts(evsel->counts, cpu, 0)->val = values[cpu].counter; + perf_counts(evsel->counts, cpu, 0)->ena = values[cpu].enabled; + perf_counts(evsel->counts, cpu, 0)->run = values[cpu].running; + } + break; + case BPERF_FILTER_CPU: + cpu = evsel->core.cpus->map[i]; + perf_counts(evsel->counts, i, 0)->val = values[cpu].counter; + perf_counts(evsel->counts, i, 0)->ena = values[cpu].enabled; + perf_counts(evsel->counts, i, 0)->run = values[cpu].running; + break; + case BPERF_FILTER_PID: + case BPERF_FILTER_TGID: + perf_counts(evsel->counts, 0, i)->val = 0; + perf_counts(evsel->counts, 0, i)->ena = 0; + perf_counts(evsel->counts, 0, i)->run = 0; + + for (cpu = 0; cpu < num_cpu_bpf; cpu++) { + perf_counts(evsel->counts, 0, i)->val += values[cpu].counter; + perf_counts(evsel->counts, 0, i)->ena += values[cpu].enabled; + perf_counts(evsel->counts, 0, i)->run += values[cpu].running; + } + break; + default: + break; + } + } +out: + return err; +} + +static int bperf__destroy(struct evsel *evsel) +{ + bperf_follower_bpf__destroy(evsel->follower_skel); + close(evsel->bperf_leader_prog_fd); + close(evsel->bperf_leader_link_fd); + return 0; +} + +/* + * bperf: share hardware PMCs with BPF + * + * perf uses performance monitoring counters (PMC) to monitor system + * performance. The PMCs are limited hardware resources. For example, + * Intel CPUs have 3x fixed PMCs and 4x programmable PMCs per cpu. + * + * Modern data center systems use these PMCs in many different ways: + * system level monitoring, (maybe nested) container level monitoring, per + * process monitoring, profiling (in sample mode), etc. In some cases, + * there are more active perf_events than available hardware PMCs. To allow + * all perf_events to have a chance to run, it is necessary to do expensive + * time multiplexing of events. + * + * On the other hand, many monitoring tools count the common metrics + * (cycles, instructions). It is a waste to have multiple tools create + * multiple perf_events of "cycles" and occupy multiple PMCs. + * + * bperf tries to reduce such wastes by allowing multiple perf_events of + * "cycles" or "instructions" (at different scopes) to share PMUs. Instead + * of having each perf-stat session to read its own perf_events, bperf uses + * BPF programs to read the perf_events and aggregate readings to BPF maps. + * Then, the perf-stat session(s) reads the values from these BPF maps. + * + * || + * shared progs and maps <- || -> per session progs and maps + * || + * --------------- || + * | perf_events | || + * --------------- fexit || ----------------- + * | --------||----> | follower prog | + * --------------- / || --- ----------------- + * cs -> | leader prog |/ ||/ | | + * --> --------------- /|| -------------- ------------------ + * / | | / || | filter map | | accum_readings | + * / ------------ ------------ || -------------- ------------------ + * | | prev map | | diff map | || | + * | ------------ ------------ || | + * \ || | + * = \ ==================================================== | ============ + * \ / user space + * \ / + * \ / + * BPF_PROG_TEST_RUN BPF_MAP_LOOKUP_ELEM + * \ / + * \ / + * \------ perf-stat ----------------------/ + * + * The figure above shows the architecture of bperf. Note that the figure + * is divided into 3 regions: shared progs and maps (top left), per session + * progs and maps (top right), and user space (bottom). + * + * The leader prog is triggered on each context switch (cs). The leader + * prog reads perf_events and stores the difference (current_reading - + * previous_reading) to the diff map. For the same metric, e.g. "cycles", + * multiple perf-stat sessions share the same leader prog. + * + * Each perf-stat session creates a follower prog as fexit program to the + * leader prog. It is possible to attach up to BPF_MAX_TRAMP_PROGS (38) + * follower progs to the same leader prog. The follower prog checks current + * task and processor ID to decide whether to add the value from the diff + * map to its accumulated reading map (accum_readings). + * + * Finally, perf-stat user space reads the value from accum_reading map. + * + * Besides context switch, it is also necessary to trigger the leader prog + * before perf-stat reads the value. Otherwise, the accum_reading map may + * not have the latest reading from the perf_events. This is achieved by + * triggering the event via sys_bpf(BPF_PROG_TEST_RUN) to each CPU. + * + * Comment before the definition of struct perf_event_attr_map_entry + * describes how different sessions of perf-stat share information about + * the leader prog. + */ + +struct bpf_counter_ops bperf_ops = { + .load = bperf__load, + .enable = bperf__enable, + .read = bperf__read, + .install_pe = bperf__install_pe, + .destroy = bperf__destroy, +}; + +static inline bool bpf_counter_skip(struct evsel *evsel) +{ + return list_empty(&evsel->bpf_counter_list) && + evsel->follower_skel == NULL; +} + int bpf_counter__install_pe(struct evsel *evsel, int cpu, int fd) { - if (list_empty(&evsel->bpf_counter_list)) + if (bpf_counter_skip(evsel)) return 0; return evsel->bpf_counter_ops->install_pe(evsel, cpu, fd); } int bpf_counter__load(struct evsel *evsel, struct target *target) { - if (target__has_bpf(target)) + if (target->bpf_str) evsel->bpf_counter_ops = &bpf_program_profiler_ops; + else if (target->use_bpf) + evsel->bpf_counter_ops = &bperf_ops; if (evsel->bpf_counter_ops) return evsel->bpf_counter_ops->load(evsel, target); @@ -293,21 +802,21 @@ int bpf_counter__load(struct evsel *evsel, struct target *target) int bpf_counter__enable(struct evsel *evsel) { - if (list_empty(&evsel->bpf_counter_list)) + if (bpf_counter_skip(evsel)) return 0; return evsel->bpf_counter_ops->enable(evsel); } int bpf_counter__read(struct evsel *evsel) { - if (list_empty(&evsel->bpf_counter_list)) + if (bpf_counter_skip(evsel)) return -EAGAIN; return evsel->bpf_counter_ops->read(evsel); } void bpf_counter__destroy(struct evsel *evsel) { - if (list_empty(&evsel->bpf_counter_list)) + if (bpf_counter_skip(evsel)) return; evsel->bpf_counter_ops->destroy(evsel); evsel->bpf_counter_ops = NULL; diff --git a/tools/perf/util/bpf_skel/bperf.h b/tools/perf/util/bpf_skel/bperf.h new file mode 100644 index 000000000000..186a5551ddb9 --- /dev/null +++ b/tools/perf/util/bpf_skel/bperf.h @@ -0,0 +1,14 @@ +// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +// Copyright (c) 2021 Facebook + +#ifndef __BPERF_STAT_H +#define __BPERF_STAT_H + +typedef struct { + __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); + __uint(key_size, sizeof(__u32)); + __uint(value_size, sizeof(struct bpf_perf_event_value)); + __uint(max_entries, 1); +} reading_map; + +#endif /* __BPERF_STAT_H */ diff --git a/tools/perf/util/bpf_skel/bperf_follower.bpf.c b/tools/perf/util/bpf_skel/bperf_follower.bpf.c new file mode 100644 index 000000000000..b8fa3cb2da23 --- /dev/null +++ b/tools/perf/util/bpf_skel/bperf_follower.bpf.c @@ -0,0 +1,69 @@ +// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +// Copyright (c) 2021 Facebook +#include +#include +#include +#include +#include "bperf.h" +#include "bperf_u.h" + +reading_map diff_readings SEC(".maps"); +reading_map accum_readings SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(key_size, sizeof(__u32)); + __uint(value_size, sizeof(__u32)); +} filter SEC(".maps"); + +enum bperf_filter_type type = 0; +int enabled = 0; + +SEC("fexit/XXX") +int BPF_PROG(fexit_XXX) +{ + struct bpf_perf_event_value *diff_val, *accum_val; + __u32 filter_key, zero = 0; + __u32 *accum_key; + + if (!enabled) + return 0; + + switch (type) { + case BPERF_FILTER_GLOBAL: + accum_key = &zero; + goto do_add; + case BPERF_FILTER_CPU: + filter_key = bpf_get_smp_processor_id(); + break; + case BPERF_FILTER_PID: + filter_key = bpf_get_current_pid_tgid() & 0xffffffff; + break; + case BPERF_FILTER_TGID: + filter_key = bpf_get_current_pid_tgid() >> 32; + break; + default: + return 0; + } + + accum_key = bpf_map_lookup_elem(&filter, &filter_key); + if (!accum_key) + return 0; + +do_add: + diff_val = bpf_map_lookup_elem(&diff_readings, &zero); + if (!diff_val) + return 0; + + accum_val = bpf_map_lookup_elem(&accum_readings, accum_key); + if (!accum_val) + return 0; + + accum_val->counter += diff_val->counter; + accum_val->enabled += diff_val->enabled; + accum_val->running += diff_val->running; + + return 0; +} + +char LICENSE[] SEC("license") = "Dual BSD/GPL"; diff --git a/tools/perf/util/bpf_skel/bperf_leader.bpf.c b/tools/perf/util/bpf_skel/bperf_leader.bpf.c new file mode 100644 index 000000000000..4f70d1459e86 --- /dev/null +++ b/tools/perf/util/bpf_skel/bperf_leader.bpf.c @@ -0,0 +1,46 @@ +// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +// Copyright (c) 2021 Facebook +#include +#include +#include +#include +#include "bperf.h" + +struct { + __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY); + __uint(key_size, sizeof(__u32)); + __uint(value_size, sizeof(int)); + __uint(map_flags, BPF_F_PRESERVE_ELEMS); +} events SEC(".maps"); + +reading_map prev_readings SEC(".maps"); +reading_map diff_readings SEC(".maps"); + +SEC("raw_tp/sched_switch") +int BPF_PROG(on_switch) +{ + struct bpf_perf_event_value val, *prev_val, *diff_val; + __u32 key = bpf_get_smp_processor_id(); + __u32 zero = 0; + long err; + + prev_val = bpf_map_lookup_elem(&prev_readings, &zero); + if (!prev_val) + return 0; + + diff_val = bpf_map_lookup_elem(&diff_readings, &zero); + if (!diff_val) + return 0; + + err = bpf_perf_event_read_value(&events, key, &val, sizeof(val)); + if (err) + return 0; + + diff_val->counter = val.counter - prev_val->counter; + diff_val->enabled = val.enabled - prev_val->enabled; + diff_val->running = val.running - prev_val->running; + *prev_val = val; + return 0; +} + +char LICENSE[] SEC("license") = "Dual BSD/GPL"; diff --git a/tools/perf/util/bpf_skel/bperf_u.h b/tools/perf/util/bpf_skel/bperf_u.h new file mode 100644 index 000000000000..1ce0c2c905c1 --- /dev/null +++ b/tools/perf/util/bpf_skel/bperf_u.h @@ -0,0 +1,14 @@ +// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +// Copyright (c) 2021 Facebook + +#ifndef __BPERF_STAT_U_H +#define __BPERF_STAT_U_H + +enum bperf_filter_type { + BPERF_FILTER_GLOBAL = 1, + BPERF_FILTER_CPU, + BPERF_FILTER_PID, + BPERF_FILTER_TGID, +}; + +#endif /* __BPERF_STAT_U_H */ diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index 6026487353dd..dd4f56f9cfdf 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -20,6 +20,8 @@ union perf_event; struct bpf_counter_ops; struct target; struct hashmap; +struct bperf_leader_bpf; +struct bperf_follower_bpf; typedef int (evsel__sb_cb_t)(union perf_event *event, void *data); @@ -130,8 +132,24 @@ struct evsel { * See also evsel__has_callchain(). */ __u64 synth_sample_type; - struct list_head bpf_counter_list; + + /* + * bpf_counter_ops serves two use cases: + * 1. perf-stat -b counting events used byBPF programs + * 2. perf-stat --use-bpf use BPF programs to aggregate counts + */ struct bpf_counter_ops *bpf_counter_ops; + + /* for perf-stat -b */ + struct list_head bpf_counter_list; + + /* for perf-stat --use-bpf */ + int bperf_leader_prog_fd; + int bperf_leader_link_fd; + union { + struct bperf_leader_bpf *leader_skel; + struct bperf_follower_bpf *follower_skel; + }; }; struct perf_missing_features { diff --git a/tools/perf/util/target.h b/tools/perf/util/target.h index f132c6c2eef8..1bce3eb28ef2 100644 --- a/tools/perf/util/target.h +++ b/tools/perf/util/target.h @@ -16,6 +16,8 @@ struct target { bool uses_mmap; bool default_per_cpu; bool per_thread; + bool use_bpf; + const char *attr_map; }; enum target_errno { @@ -66,7 +68,7 @@ static inline bool target__has_cpu(struct target *target) static inline bool target__has_bpf(struct target *target) { - return target->bpf_str; + return target->bpf_str || target->use_bpf; } static inline bool target__none(struct target *target) -- cgit v1.2.3 From 435b46ef1d9fd904089199da16a21ade0701537f Mon Sep 17 00:00:00 2001 From: Song Liu Date: Tue, 16 Mar 2021 14:18:36 -0700 Subject: perf stat: Measure 't0' and 'ref_time' after enable_counters() Take measurements of 't0' and 'ref_time' after enable_counters(), so that they only measure the time consumed when the counters are enabled. Signed-off-by: Song Liu Acked-by: Andi Kleen Acked-by: Jiri Olsa Acked-by: Namhyung Kim Cc: kernel-team@fb.com Link: http://lore.kernel.org/lkml/20210316211837.910506-3-songliubraving@fb.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-stat.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index d34e22dce2c8..4bb48c6b6698 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -931,15 +931,15 @@ try_again_reset: /* * Enable counters and exec the command: */ - t0 = rdclock(); - clock_gettime(CLOCK_MONOTONIC, &ref_time); - if (forks) { evlist__start_workload(evsel_list); err = enable_counters(); if (err) return -1; + t0 = rdclock(); + clock_gettime(CLOCK_MONOTONIC, &ref_time); + if (interval || timeout || evlist__ctlfd_initialized(evsel_list)) status = dispatch_events(forks, timeout, interval, ×); if (child_pid != -1) { @@ -960,6 +960,10 @@ try_again_reset: err = enable_counters(); if (err) return -1; + + t0 = rdclock(); + clock_gettime(CLOCK_MONOTONIC, &ref_time); + status = dispatch_events(forks, timeout, interval, ×); } -- cgit v1.2.3 From 2c0cb9f56020d2ea006589434d5eb4e702110124 Mon Sep 17 00:00:00 2001 From: Song Liu Date: Tue, 23 Mar 2021 17:42:47 -0300 Subject: perf test: Add a shell test for 'perf stat --bpf-counters' new option Add a test to compare the output of perf-stat with and without option --bpf-counters. If the difference is more than 10%, the test is considered as failed. Committer testing: # perf test bpf-counters 86: perf stat --bpf-counters test : Ok # perf test -v bpf-counters 86: perf stat --bpf-counters test : --- start --- test child forked, pid 2433339 test child finished with 0 ---- end ---- perf stat --bpf-counters test: Ok # Signed-off-by: Song Liu Requested-by: Arnaldo Carvalho de Melo Tested-by: Arnaldo Carvalho de Melo Acked-by: Namhyung Kim Cc: Jiri Olsa Link: http://lore.kernel.org/lkml/EC00E37D-8587-4662-8E30-7AD5F874FA84@fb.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/shell/stat_bpf_counters.sh | 31 +++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) create mode 100755 tools/perf/tests/shell/stat_bpf_counters.sh diff --git a/tools/perf/tests/shell/stat_bpf_counters.sh b/tools/perf/tests/shell/stat_bpf_counters.sh new file mode 100755 index 000000000000..22eb31e48ca7 --- /dev/null +++ b/tools/perf/tests/shell/stat_bpf_counters.sh @@ -0,0 +1,31 @@ +#!/bin/sh +# perf stat --bpf-counters test +# SPDX-License-Identifier: GPL-2.0 + +set -e + +# check whether $2 is within +/- 10% of $1 +compare_number() +{ + first_num=$1 + second_num=$2 + + # upper bound is first_num * 110% + upper=$(( $first_num + $first_num / 10 )) + # lower bound is first_num * 90% + lower=$(( $first_num - $first_num / 10 )) + + if [ $second_num -gt $upper ] || [ $second_num -lt $lower ]; then + echo "The difference between $first_num and $second_num are greater than 10%." + exit 1 + fi +} + +# skip if --bpf-counters is not supported +perf stat --bpf-counters true > /dev/null 2>&1 || exit 2 + +base_cycles=$(perf stat --no-big-num -e cycles -- perf bench sched messaging -g 1 -l 100 -t 2>&1 | awk '/cycles/ {print $1}') +bpf_cycles=$(perf stat --no-big-num --bpf-counters -e cycles -- perf bench sched messaging -g 1 -l 100 -t 2>&1 | awk '/cycles/ {print $1}') + +compare_number $base_cycles $bpf_cycles +exit 0 -- cgit v1.2.3 From 0bdad97801af5913101179a5de3f54b0eb88deea Mon Sep 17 00:00:00 2001 From: Jin Yao Date: Fri, 19 Mar 2021 15:01:55 +0800 Subject: perf stat: Align CSV output for summary mode The 'perf stat' subcommand supports the request for a summary of the interval counter readings. But the summary lines break the CSV output so it's hard for scripts to parse the result. Before: # perf stat -x, -I1000 --interval-count 1 --summary 1.001323097,8013.48,msec,cpu-clock,8013483384,100.00,8.013,CPUs utilized 1.001323097,270,,context-switches,8013513297,100.00,0.034,K/sec 1.001323097,13,,cpu-migrations,8013530032,100.00,0.002,K/sec 1.001323097,184,,page-faults,8013546992,100.00,0.023,K/sec 1.001323097,20574191,,cycles,8013551506,100.00,0.003,GHz 1.001323097,10562267,,instructions,8013564958,100.00,0.51,insn per cycle 1.001323097,2019244,,branches,8013575673,100.00,0.252,M/sec 1.001323097,106152,,branch-misses,8013585776,100.00,5.26,of all branches 8013.48,msec,cpu-clock,8013483384,100.00,7.984,CPUs utilized 270,,context-switches,8013513297,100.00,0.034,K/sec 13,,cpu-migrations,8013530032,100.00,0.002,K/sec 184,,page-faults,8013546992,100.00,0.023,K/sec 20574191,,cycles,8013551506,100.00,0.003,GHz 10562267,,instructions,8013564958,100.00,0.51,insn per cycle 2019244,,branches,8013575673,100.00,0.252,M/sec 106152,,branch-misses,8013585776,100.00,5.26,of all branches The summary line loses the timestamp column, which breaks the CSV output. We add a column at the original 'timestamp' position and it just says 'summary' for the summary line. After: # perf stat -x, -I1000 --interval-count 1 --summary 1.001196053,8012.72,msec,cpu-clock,8012722903,100.00,8.013,CPUs utilized 1.001196053,218,,context-switches,8012753271,100.00,0.027,K/sec 1.001196053,9,,cpu-migrations,8012769767,100.00,0.001,K/sec 1.001196053,0,,page-faults,8012786257,100.00,0.000,K/sec 1.001196053,15004518,,cycles,8012790637,100.00,0.002,GHz 1.001196053,7954691,,instructions,8012804027,100.00,0.53,insn per cycle 1.001196053,1590259,,branches,8012814766,100.00,0.198,M/sec 1.001196053,82601,,branch-misses,8012824365,100.00,5.19,of all branches summary,8012.72,msec,cpu-clock,8012722903,100.00,7.986,CPUs utilized summary,218,,context-switches,8012753271,100.00,0.027,K/sec summary,9,,cpu-migrations,8012769767,100.00,0.001,K/sec summary,0,,page-faults,8012786257,100.00,0.000,K/sec summary,15004518,,cycles,8012790637,100.00,0.002,GHz summary,7954691,,instructions,8012804027,100.00,0.53,insn per cycle summary,1590259,,branches,8012814766,100.00,0.198,M/sec summary,82601,,branch-misses,8012824365,100.00,5.19,of all branches Now it's easy for script to analyse the summary lines. Of course, we also consider not to break possible existing scripts which can continue to use the broken CSV format by using a new '--no-csv-summary.' option. # perf stat -x, -I1000 --interval-count 1 --summary --no-csv-summary 1.001213261,8012.67,msec,cpu-clock,8012672327,100.00,8.013,CPUs utilized 1.001213261,197,,context-switches,8012703742,100.00,24.586,/sec 1.001213261,9,,cpu-migrations,8012720902,100.00,1.123,/sec 1.001213261,644,,page-faults,8012738266,100.00,80.373,/sec 1.001213261,18350698,,cycles,8012744109,100.00,0.002,GHz 1.001213261,12745021,,instructions,8012759001,100.00,0.69,insn per cycle 1.001213261,2458033,,branches,8012770864,100.00,306.768,K/sec 1.001213261,102107,,branch-misses,8012781751,100.00,4.15,of all branches 8012.67,msec,cpu-clock,8012672327,100.00,7.985,CPUs utilized 197,,context-switches,8012703742,100.00,24.586,/sec 9,,cpu-migrations,8012720902,100.00,1.123,/sec 644,,page-faults,8012738266,100.00,80.373,/sec 18350698,,cycles,8012744109,100.00,0.002,GHz 12745021,,instructions,8012759001,100.00,0.69,insn per cycle 2458033,,branches,8012770864,100.00,306.768,K/sec 102107,,branch-misses,8012781751,100.00,4.15,of all branches This option can be enabled in perf config by setting the variable 'stat.no-csv-summary'. # perf config stat.no-csv-summary=true # perf config -l stat.no-csv-summary=true # perf stat -x, -I1000 --interval-count 1 --summary 1.001330198,8013.28,msec,cpu-clock,8013279201,100.00,8.013,CPUs utilized 1.001330198,205,,context-switches,8013308394,100.00,25.583,/sec 1.001330198,10,,cpu-migrations,8013324681,100.00,1.248,/sec 1.001330198,0,,page-faults,8013340926,100.00,0.000,/sec 1.001330198,8027742,,cycles,8013344503,100.00,0.001,GHz 1.001330198,2871717,,instructions,8013356501,100.00,0.36,insn per cycle 1.001330198,553564,,branches,8013366204,100.00,69.081,K/sec 1.001330198,54021,,branch-misses,8013375952,100.00,9.76,of all branches 8013.28,msec,cpu-clock,8013279201,100.00,7.985,CPUs utilized 205,,context-switches,8013308394,100.00,25.583,/sec 10,,cpu-migrations,8013324681,100.00,1.248,/sec 0,,page-faults,8013340926,100.00,0.000,/sec 8027742,,cycles,8013344503,100.00,0.001,GHz 2871717,,instructions,8013356501,100.00,0.36,insn per cycle 553564,,branches,8013366204,100.00,69.081,K/sec 54021,,branch-misses,8013375952,100.00,9.76,of all branches Signed-off-by: Jin Yao Acked-by: Andi Kleen Acked-by: Jiri Olsa Tested-by: Arnaldo Carvalho de Melo Cc: Alexander Shishkin Cc: Jin Yao Cc: Kan Liang Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/20210319070156.20394-1-yao.jin@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-stat.txt | 9 +++++++++ tools/perf/builtin-stat.c | 7 +++++++ tools/perf/util/config.c | 3 +++ tools/perf/util/stat-display.c | 6 ++++++ tools/perf/util/stat.h | 2 ++ 5 files changed, 27 insertions(+) diff --git a/tools/perf/Documentation/perf-stat.txt b/tools/perf/Documentation/perf-stat.txt index 744211fa8c18..6ec5960b08c3 100644 --- a/tools/perf/Documentation/perf-stat.txt +++ b/tools/perf/Documentation/perf-stat.txt @@ -482,6 +482,15 @@ convenient for post processing. --summary:: Print summary for interval mode (-I). +--no-csv-summary:: +Don't print 'summary' at the first column for CVS summary output. +This option must be used with -x and --summary. + +This option can be enabled in perf config by setting the variable +'stat.no-csv-summary'. + +$ perf config stat.no-csv-summary=true + EXAMPLES -------- diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 4bb48c6b6698..2a2c15cac80a 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -1093,6 +1093,11 @@ void perf_stat__set_big_num(int set) stat_config.big_num = (set != 0); } +void perf_stat__set_no_csv_summary(int set) +{ + stat_config.no_csv_summary = (set != 0); +} + static int stat__set_big_num(const struct option *opt __maybe_unused, const char *s __maybe_unused, int unset) { @@ -1249,6 +1254,8 @@ static struct option stat_options[] = { "threads of same physical core"), OPT_BOOLEAN(0, "summary", &stat_config.summary, "print summary for interval mode"), + OPT_BOOLEAN(0, "no-csv-summary", &stat_config.no_csv_summary, + "don't print 'summary' for CSV summary output"), OPT_BOOLEAN(0, "quiet", &stat_config.quiet, "don't print output (useful with record)"), #ifdef HAVE_LIBPFM diff --git a/tools/perf/util/config.c b/tools/perf/util/config.c index 2daeaa9a4a24..6bcb5ef221f8 100644 --- a/tools/perf/util/config.c +++ b/tools/perf/util/config.c @@ -457,6 +457,9 @@ static int perf_stat_config(const char *var, const char *value) if (!strcmp(var, "stat.big-num")) perf_stat__set_big_num(perf_config_bool(var, value)); + if (!strcmp(var, "stat.no-csv-summary")) + perf_stat__set_no_csv_summary(perf_config_bool(var, value)); + /* Add other config variables here. */ return 0; } diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c index 7f09cdaf5b60..d3137bc17065 100644 --- a/tools/perf/util/stat-display.c +++ b/tools/perf/util/stat-display.c @@ -439,6 +439,12 @@ static void printout(struct perf_stat_config *config, struct aggr_cpu_id id, int if (counter->cgrp) os.nfields++; } + + if (!config->no_csv_summary && config->csv_output && + config->summary && !config->interval) { + fprintf(config->output, "%16s%s", "summary", config->csv_sep); + } + if (run == 0 || ena == 0 || counter->counts->scaled == -1) { if (config->metric_only) { pm(config, &os, NULL, "", "", 0); diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h index 41107b8deac5..48e6a06233fa 100644 --- a/tools/perf/util/stat.h +++ b/tools/perf/util/stat.h @@ -128,6 +128,7 @@ struct perf_stat_config { bool all_user; bool percore_show_thread; bool summary; + bool no_csv_summary; bool metric_no_group; bool metric_no_merge; bool stop_read_counter; @@ -160,6 +161,7 @@ struct perf_stat_config { }; void perf_stat__set_big_num(int set); +void perf_stat__set_no_csv_summary(int set); void update_stats(struct stats *stats, u64 val); double avg_stats(struct stats *stats); -- cgit v1.2.3 From 0f7ff383937b24a3db72234a37e8b724acda8ad3 Mon Sep 17 00:00:00 2001 From: Jin Yao Date: Fri, 19 Mar 2021 15:01:56 +0800 Subject: perf test: Add CSV summary test The patch "perf stat: Align CSV output for summary mode" aligned CSV output and added "summary" to the first column of summary lines. Now we check if the "summary" string is added to the CSV output. If we set '--no-csv-summary' option, the "summary" string would not be added, also check with this case. Committer testing: $ perf test csv 84: perf stat csv summary test : Ok $ Signed-off-by: Jin Yao Acked-by: Jiri Olsa Tested-by: Arnaldo Carvalho de Melo Cc: Alexander Shishkin Cc: Andi Kleen Cc: Jin Yao Cc: Kan Liang Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/20210319070156.20394-2-yao.jin@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/shell/stat+csv_summary.sh | 31 ++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) create mode 100755 tools/perf/tests/shell/stat+csv_summary.sh diff --git a/tools/perf/tests/shell/stat+csv_summary.sh b/tools/perf/tests/shell/stat+csv_summary.sh new file mode 100755 index 000000000000..5571ff75eb42 --- /dev/null +++ b/tools/perf/tests/shell/stat+csv_summary.sh @@ -0,0 +1,31 @@ +#!/bin/sh +# perf stat csv summary test +# SPDX-License-Identifier: GPL-2.0 + +set -e + +# +# 1.001364330 9224197 cycles 8012885033 100.00 +# summary 9224197 cycles 8012885033 100.00 +# +perf stat -e cycles -x' ' -I1000 --interval-count 1 --summary 2>&1 | \ +grep -e summary | \ +while read summary num event run pct +do + if [ $summary != "summary" ]; then + exit 1 + fi +done + +# +# 1.001360298 9148534 cycles 8012853854 100.00 +#9148534 cycles 8012853854 100.00 +# +perf stat -e cycles -x' ' -I1000 --interval-count 1 --summary --no-csv-summary 2>&1 | \ +grep -e summary | \ +while read num event run pct +do + exit 1 +done + +exit 0 -- cgit v1.2.3 From e0542cac435ba4bfb3b31da7d28f0df19703bf47 Mon Sep 17 00:00:00 2001 From: Tiezhu Yang Date: Mon, 15 Mar 2021 11:56:32 +0800 Subject: MAINTAINERS: Add Mailing list and Web-page for PERFORMANCE EVENTS SUBSYSTEM Add entry "L: linux-perf-users@vger.kernel.org" to archive the related mail on https://lore.kernel.org/linux-perf-users/, add entry "W: https://perf.wiki.kernel.org/" so that newbies could get some useful materials. Signed-off-by: Tiezhu Yang Cc: Alexander Shishkin Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/1615780592-21838-1-git-send-email-yangtiezhu@loongson.cn Signed-off-by: Arnaldo Carvalho de Melo --- MAINTAINERS | 2 ++ 1 file changed, 2 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index d92f85ca831d..607c0348fb61 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -14020,8 +14020,10 @@ R: Mark Rutland R: Alexander Shishkin R: Jiri Olsa R: Namhyung Kim +L: linux-perf-users@vger.kernel.org L: linux-kernel@vger.kernel.org S: Supported +W: https://perf.wiki.kernel.org/ T: git git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git perf/core F: arch/*/events/* F: arch/*/events/*/* -- cgit v1.2.3 From 405e07010d375d2123ec9d2e22197490eb698f74 Mon Sep 17 00:00:00 2001 From: Wan Jiabing Date: Thu, 25 Mar 2021 12:39:34 +0800 Subject: perf tools: Remove duplicate struct forward declarations 'struct evlist' has been declared at 10th line. 'struct comm' has been declared at 15th line. Remove the duplicates Signed-off-by: Wan Jiabing Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Ian Rogers Cc: Jiri Olsa Cc: Kajol Jain Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: kael_w@yeah.net Link: http://lore.kernel.org/lkml/20210325043947.846093-1-wanjiabing@vivo.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/metricgroup.h | 1 - tools/perf/util/thread-stack.h | 1 - 2 files changed, 2 deletions(-) diff --git a/tools/perf/util/metricgroup.h b/tools/perf/util/metricgroup.h index ed1b9392e624..026bbf416c48 100644 --- a/tools/perf/util/metricgroup.h +++ b/tools/perf/util/metricgroup.h @@ -9,7 +9,6 @@ struct evlist; struct evsel; -struct evlist; struct option; struct rblist; struct pmu_events_map; diff --git a/tools/perf/util/thread-stack.h b/tools/perf/util/thread-stack.h index 3bc47a42af8e..b3cd09beb62f 100644 --- a/tools/perf/util/thread-stack.h +++ b/tools/perf/util/thread-stack.h @@ -16,7 +16,6 @@ struct comm; struct ip_callchain; struct symbol; struct dso; -struct comm; struct perf_sample; struct addr_location; struct call_path; -- cgit v1.2.3 From 463a7d5a9e6fd3f3b592e09c936d2d07ee0b65b9 Mon Sep 17 00:00:00 2001 From: Wan Jiabing Date: Tue, 23 Mar 2021 13:01:39 +0800 Subject: perf daemon: Remove duplicate includes sys/stat.h has been included at line 23, so remove the duplicate one at line 27. linux/string.h has been included at line 7, so remove the duplicate one at line 9. time.h has been included at line 14, so remove the duplicate one at line 28. Signed-off-by: Wan Jiabing Cc: Alexander Shishkin Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: kael_w@yeah.net Link: http://lore.kernel.org/lkml/20210323050139.287461-1-wanjiabing@vivo.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-daemon.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/tools/perf/builtin-daemon.c b/tools/perf/builtin-daemon.c index ace8772a4f03..632ecd010a4f 100644 --- a/tools/perf/builtin-daemon.c +++ b/tools/perf/builtin-daemon.c @@ -6,7 +6,6 @@ #include #include #include -#include #include #include #include @@ -24,8 +23,6 @@ #include #include #include -#include -#include #include "builtin.h" #include "perf.h" #include "debug.h" -- cgit v1.2.3 From 0a606822c4863b2398925e6ff3329d64c4c52bb8 Mon Sep 17 00:00:00 2001 From: Athira Rajeev Date: Mon, 22 Mar 2021 10:57:24 -0400 Subject: perf sort: Add dynamic headers for perf report columns Currently the header string for different columns in perf report is fixed. Some fields of perf sample could have different meaning for different architectures than the meaning conveyed by the header string. An example is the new field 'var2_w' of perf_sample_weight structure. This is presently captured as 'Local INSTR Latency' in perf mem report. But this could be used to denote a different latency cycle in another architecture. Introduce a weak function arch_perf_header_entry() to set the arch specific header string for the fields which can contain dynamic header. If the architecture do not have this function, fall back to the default header string value. Signed-off-by: Athira Rajeev Reviewed-by: Madhavan Srinivasan Acked-by: Jiri Olsa Cc: Kajol Jain Cc: Kan Liang Cc: Michael Ellerman Cc: Peter Zijlstra Cc: Ravi Bangoria Link: https://lore.kernel.org/r/1616425047-1666-3-git-send-email-atrajeev@linux.vnet.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/event.h | 1 + tools/perf/util/sort.c | 19 ++++++++++++++++++- 2 files changed, 19 insertions(+), 1 deletion(-) diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h index f603edbbbc6f..6106a9c134c9 100644 --- a/tools/perf/util/event.h +++ b/tools/perf/util/event.h @@ -427,5 +427,6 @@ char *get_page_size_name(u64 size, char *str); void arch_perf_parse_sample_weight(struct perf_sample *data, const __u64 *array, u64 type); void arch_perf_synthesize_sample_weight(const struct perf_sample *data, __u64 *array, u64 type); +const char *arch_perf_header_entry(const char *se_header); #endif /* __PERF_RECORD_H */ diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c index 552b590485bf..eeb03e749181 100644 --- a/tools/perf/util/sort.c +++ b/tools/perf/util/sort.c @@ -25,6 +25,7 @@ #include #include "mem-events.h" #include "annotate.h" +#include "event.h" #include "time-utils.h" #include "cgroup.h" #include "machine.h" @@ -45,6 +46,7 @@ const char *field_order; regex_t ignore_callees_regex; int have_ignore_callees = 0; enum sort_mode sort__mode = SORT_MODE__NORMAL; +const char *dynamic_headers[] = {"local_ins_lat"}; /* * Replaces all occurrences of a char used with the: @@ -1816,6 +1818,16 @@ struct sort_dimension { int taken; }; +const char * __weak arch_perf_header_entry(const char *se_header) +{ + return se_header; +} + +static void sort_dimension_add_dynamic_header(struct sort_dimension *sd) +{ + sd->entry->se_header = arch_perf_header_entry(sd->entry->se_header); +} + #define DIM(d, n, func) [d] = { .name = n, .entry = &(func) } static struct sort_dimension common_sort_dimensions[] = { @@ -2739,7 +2751,7 @@ int sort_dimension__add(struct perf_hpp_list *list, const char *tok, struct evlist *evlist, int level) { - unsigned int i; + unsigned int i, j; for (i = 0; i < ARRAY_SIZE(common_sort_dimensions); i++) { struct sort_dimension *sd = &common_sort_dimensions[i]; @@ -2747,6 +2759,11 @@ int sort_dimension__add(struct perf_hpp_list *list, const char *tok, if (strncasecmp(tok, sd->name, strlen(tok))) continue; + for (j = 0; j < ARRAY_SIZE(dynamic_headers); j++) { + if (!strcmp(dynamic_headers[j], sd->name)) + sort_dimension_add_dynamic_header(sd); + } + if (sd->entry == &sort_parent) { int ret = regcomp(&parent_regex, parent_pattern, REG_EXTENDED); if (ret) { -- cgit v1.2.3 From ff0bd0a33f257cc01c0b777c1423205e49049777 Mon Sep 17 00:00:00 2001 From: Athira Rajeev Date: Mon, 22 Mar 2021 10:57:25 -0400 Subject: perf powerpc: Add support for PERF_SAMPLE_WEIGHT_STRUCT Add arch specific arch_evsel__set_sample_weight() to set the new sample type for powerpc. Add arch specific arch_perf_parse_sample_weight() to store the sample->weight values depending on the sample type applied. if the new sample type (PERF_SAMPLE_WEIGHT_STRUCT) is applied, store only the lower 32 bits to sample->weight. If sample type is 'PERF_SAMPLE_WEIGHT', store the full 64-bit to sample->weight. Signed-off-by: Athira Rajeev Reviewed-by: Madhavan Srinivasan Acked-by: Jiri Olsa Cc: Kajol Jain Cc: Kan Liang Cc: Michael Ellerman Cc: Peter Zijlstra Cc: Ravi Bangoria Link: https://lore.kernel.org/r/1616425047-1666-4-git-send-email-atrajeev@linux.vnet.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/powerpc/util/Build | 2 ++ tools/perf/arch/powerpc/util/event.c | 32 ++++++++++++++++++++++++++++++++ tools/perf/arch/powerpc/util/evsel.c | 8 ++++++++ 3 files changed, 42 insertions(+) create mode 100644 tools/perf/arch/powerpc/util/event.c create mode 100644 tools/perf/arch/powerpc/util/evsel.c diff --git a/tools/perf/arch/powerpc/util/Build b/tools/perf/arch/powerpc/util/Build index b7945e5a543b..8a79c4126e5b 100644 --- a/tools/perf/arch/powerpc/util/Build +++ b/tools/perf/arch/powerpc/util/Build @@ -4,6 +4,8 @@ perf-y += kvm-stat.o perf-y += perf_regs.o perf-y += mem-events.o perf-y += sym-handling.o +perf-y += evsel.o +perf-y += event.o perf-$(CONFIG_DWARF) += dwarf-regs.o perf-$(CONFIG_DWARF) += skip-callchain-idx.o diff --git a/tools/perf/arch/powerpc/util/event.c b/tools/perf/arch/powerpc/util/event.c new file mode 100644 index 000000000000..f49d32c2c8ae --- /dev/null +++ b/tools/perf/arch/powerpc/util/event.c @@ -0,0 +1,32 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include +#include + +#include "../../../util/event.h" +#include "../../../util/synthetic-events.h" +#include "../../../util/machine.h" +#include "../../../util/tool.h" +#include "../../../util/map.h" +#include "../../../util/debug.h" + +void arch_perf_parse_sample_weight(struct perf_sample *data, + const __u64 *array, u64 type) +{ + union perf_sample_weight weight; + + weight.full = *array; + if (type & PERF_SAMPLE_WEIGHT) + data->weight = weight.full; + else + data->weight = weight.var1_dw; +} + +void arch_perf_synthesize_sample_weight(const struct perf_sample *data, + __u64 *array, u64 type) +{ + *array = data->weight; + + if (type & PERF_SAMPLE_WEIGHT_STRUCT) + *array &= 0xffffffff; +} diff --git a/tools/perf/arch/powerpc/util/evsel.c b/tools/perf/arch/powerpc/util/evsel.c new file mode 100644 index 000000000000..2f733cdc8dbb --- /dev/null +++ b/tools/perf/arch/powerpc/util/evsel.c @@ -0,0 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include "util/evsel.h" + +void arch_evsel__set_sample_weight(struct evsel *evsel) +{ + evsel__set_sample_bit(evsel, WEIGHT_STRUCT); +} -- cgit v1.2.3 From 06e5ca746c07380dfe0e4c3e10c34a6daa69eae6 Mon Sep 17 00:00:00 2001 From: Athira Rajeev Date: Mon, 22 Mar 2021 10:57:26 -0400 Subject: perf tools: Support pipeline stage cycles for powerpc The pipeline stage cycles details can be recorded on powerpc from the contents of Performance Monitor Unit (PMU) registers. On ISA v3.1 platform, sampling registers exposes the cycles spent in different pipeline stages. Patch adds perf tools support to present two of the cycle counter information along with memory latency (weight). Re-use the field 'ins_lat' for storing the first pipeline stage cycle. This is stored in 'var2_w' field of 'perf_sample_weight'. Add a new field 'p_stage_cyc' to store the second pipeline stage cycle which is stored in 'var3_w' field of perf_sample_weight. Add new sort function 'Pipeline Stage Cycle' and include this in default_mem_sort_order[]. This new sort function may be used to denote some other pipeline stage in another architecture. So add this to list of sort entries that can have dynamic header string. Signed-off-by: Athira Rajeev Reviewed-by: Madhavan Srinivasan Acked-by: Jiri Olsa Cc: Jiri Olsa Cc: Kajol Jain Cc: Kan Liang Cc: Michael Ellerman Cc: Peter Zijlstra Cc: Ravi Bangoria Link: https://lore.kernel.org/r/1616425047-1666-5-git-send-email-atrajeev@linux.vnet.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-report.txt | 2 ++ tools/perf/arch/powerpc/util/event.c | 18 ++++++++++++++++-- tools/perf/util/event.h | 1 + tools/perf/util/hist.c | 11 ++++++++--- tools/perf/util/hist.h | 1 + tools/perf/util/session.c | 4 +++- tools/perf/util/sort.c | 24 ++++++++++++++++++++++-- tools/perf/util/sort.h | 2 ++ 8 files changed, 55 insertions(+), 8 deletions(-) diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt index 822b16f05c15..f51f0000676e 100644 --- a/tools/perf/Documentation/perf-report.txt +++ b/tools/perf/Documentation/perf-report.txt @@ -112,6 +112,8 @@ OPTIONS - ins_lat: Instruction latency in core cycles. This is the global instruction latency - local_ins_lat: Local instruction latency version + - p_stage_cyc: On powerpc, this presents the number of cycles spent in a + pipeline stage. And currently supported only on powerpc. By default, comm, dso and symbol keys are used. (i.e. --sort comm,dso,symbol) diff --git a/tools/perf/arch/powerpc/util/event.c b/tools/perf/arch/powerpc/util/event.c index f49d32c2c8ae..22521bc9481a 100644 --- a/tools/perf/arch/powerpc/util/event.c +++ b/tools/perf/arch/powerpc/util/event.c @@ -18,8 +18,11 @@ void arch_perf_parse_sample_weight(struct perf_sample *data, weight.full = *array; if (type & PERF_SAMPLE_WEIGHT) data->weight = weight.full; - else + else { data->weight = weight.var1_dw; + data->ins_lat = weight.var2_w; + data->p_stage_cyc = weight.var3_w; + } } void arch_perf_synthesize_sample_weight(const struct perf_sample *data, @@ -27,6 +30,17 @@ void arch_perf_synthesize_sample_weight(const struct perf_sample *data, { *array = data->weight; - if (type & PERF_SAMPLE_WEIGHT_STRUCT) + if (type & PERF_SAMPLE_WEIGHT_STRUCT) { *array &= 0xffffffff; + *array |= ((u64)data->ins_lat << 32); + } +} + +const char *arch_perf_header_entry(const char *se_header) +{ + if (!strcmp(se_header, "Local INSTR Latency")) + return "Finish Cyc"; + else if (!strcmp(se_header, "Pipeline Stage Cycle")) + return "Dispatch Cyc"; + return se_header; } diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h index 6106a9c134c9..e5da4a695ff2 100644 --- a/tools/perf/util/event.h +++ b/tools/perf/util/event.h @@ -147,6 +147,7 @@ struct perf_sample { u8 cpumode; u16 misc; u16 ins_lat; + u16 p_stage_cyc; bool no_hw_idx; /* No hw_idx collected in branch_stack */ char insn[MAX_INSN]; void *raw_data; diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index c82f5fc26af8..9299ee535518 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c @@ -211,6 +211,7 @@ void hists__calc_col_len(struct hists *hists, struct hist_entry *h) hists__new_col_len(hists, HISTC_MEM_BLOCKED, 10); hists__new_col_len(hists, HISTC_LOCAL_INS_LAT, 13); hists__new_col_len(hists, HISTC_GLOBAL_INS_LAT, 13); + hists__new_col_len(hists, HISTC_P_STAGE_CYC, 13); if (symbol_conf.nanosecs) hists__new_col_len(hists, HISTC_TIME, 16); else @@ -289,13 +290,14 @@ static long hist_time(unsigned long htime) } static void he_stat__add_period(struct he_stat *he_stat, u64 period, - u64 weight, u64 ins_lat) + u64 weight, u64 ins_lat, u64 p_stage_cyc) { he_stat->period += period; he_stat->weight += weight; he_stat->nr_events += 1; he_stat->ins_lat += ins_lat; + he_stat->p_stage_cyc += p_stage_cyc; } static void he_stat__add_stat(struct he_stat *dest, struct he_stat *src) @@ -308,6 +310,7 @@ static void he_stat__add_stat(struct he_stat *dest, struct he_stat *src) dest->nr_events += src->nr_events; dest->weight += src->weight; dest->ins_lat += src->ins_lat; + dest->p_stage_cyc += src->p_stage_cyc; } static void he_stat__decay(struct he_stat *he_stat) @@ -597,6 +600,7 @@ static struct hist_entry *hists__findnew_entry(struct hists *hists, u64 period = entry->stat.period; u64 weight = entry->stat.weight; u64 ins_lat = entry->stat.ins_lat; + u64 p_stage_cyc = entry->stat.p_stage_cyc; bool leftmost = true; p = &hists->entries_in->rb_root.rb_node; @@ -615,11 +619,11 @@ static struct hist_entry *hists__findnew_entry(struct hists *hists, if (!cmp) { if (sample_self) { - he_stat__add_period(&he->stat, period, weight, ins_lat); + he_stat__add_period(&he->stat, period, weight, ins_lat, p_stage_cyc); hist_entry__add_callchain_period(he, period); } if (symbol_conf.cumulate_callchain) - he_stat__add_period(he->stat_acc, period, weight, ins_lat); + he_stat__add_period(he->stat_acc, period, weight, ins_lat, p_stage_cyc); /* * This mem info was allocated from sample__resolve_mem @@ -731,6 +735,7 @@ __hists__add_entry(struct hists *hists, .period = sample->period, .weight = sample->weight, .ins_lat = sample->ins_lat, + .p_stage_cyc = sample->p_stage_cyc, }, .parent = sym_parent, .filtered = symbol__parent_filter(sym_parent) | al->filtered, diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h index 3c537232294b..e2faa745c8d6 100644 --- a/tools/perf/util/hist.h +++ b/tools/perf/util/hist.h @@ -75,6 +75,7 @@ enum hist_column { HISTC_MEM_BLOCKED, HISTC_LOCAL_INS_LAT, HISTC_GLOBAL_INS_LAT, + HISTC_P_STAGE_CYC, HISTC_NR_COLS, /* Last entry */ }; diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index 9a8808507bd9..eba3769be3f1 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -1302,8 +1302,10 @@ static void dump_sample(struct evsel *evsel, union perf_event *event, if (sample_type & PERF_SAMPLE_WEIGHT_TYPE) { printf("... weight: %" PRIu64 "", sample->weight); - if (sample_type & PERF_SAMPLE_WEIGHT_STRUCT) + if (sample_type & PERF_SAMPLE_WEIGHT_STRUCT) { printf(",0x%"PRIx16"", sample->ins_lat); + printf(",0x%"PRIx16"", sample->p_stage_cyc); + } printf("\n"); } diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c index eeb03e749181..df7b932b4073 100644 --- a/tools/perf/util/sort.c +++ b/tools/perf/util/sort.c @@ -37,7 +37,7 @@ const char default_parent_pattern[] = "^sys_|^do_page_fault"; const char *parent_pattern = default_parent_pattern; const char *default_sort_order = "comm,dso,symbol"; const char default_branch_sort_order[] = "comm,dso_from,symbol_from,symbol_to,cycles"; -const char default_mem_sort_order[] = "local_weight,mem,sym,dso,symbol_daddr,dso_daddr,snoop,tlb,locked,blocked,local_ins_lat"; +const char default_mem_sort_order[] = "local_weight,mem,sym,dso,symbol_daddr,dso_daddr,snoop,tlb,locked,blocked,local_ins_lat,p_stage_cyc"; const char default_top_sort_order[] = "dso,symbol"; const char default_diff_sort_order[] = "dso,symbol"; const char default_tracepoint_sort_order[] = "trace"; @@ -46,7 +46,7 @@ const char *field_order; regex_t ignore_callees_regex; int have_ignore_callees = 0; enum sort_mode sort__mode = SORT_MODE__NORMAL; -const char *dynamic_headers[] = {"local_ins_lat"}; +const char *dynamic_headers[] = {"local_ins_lat", "p_stage_cyc"}; /* * Replaces all occurrences of a char used with the: @@ -1410,6 +1410,25 @@ struct sort_entry sort_global_ins_lat = { .se_width_idx = HISTC_GLOBAL_INS_LAT, }; +static int64_t +sort__global_p_stage_cyc_cmp(struct hist_entry *left, struct hist_entry *right) +{ + return left->stat.p_stage_cyc - right->stat.p_stage_cyc; +} + +static int hist_entry__p_stage_cyc_snprintf(struct hist_entry *he, char *bf, + size_t size, unsigned int width) +{ + return repsep_snprintf(bf, size, "%-*u", width, he->stat.p_stage_cyc); +} + +struct sort_entry sort_p_stage_cyc = { + .se_header = "Pipeline Stage Cycle", + .se_cmp = sort__global_p_stage_cyc_cmp, + .se_snprintf = hist_entry__p_stage_cyc_snprintf, + .se_width_idx = HISTC_P_STAGE_CYC, +}; + struct sort_entry sort_mem_daddr_sym = { .se_header = "Data Symbol", .se_cmp = sort__daddr_cmp, @@ -1853,6 +1872,7 @@ static struct sort_dimension common_sort_dimensions[] = { DIM(SORT_CODE_PAGE_SIZE, "code_page_size", sort_code_page_size), DIM(SORT_LOCAL_INS_LAT, "local_ins_lat", sort_local_ins_lat), DIM(SORT_GLOBAL_INS_LAT, "ins_lat", sort_global_ins_lat), + DIM(SORT_PIPELINE_STAGE_CYC, "p_stage_cyc", sort_p_stage_cyc), }; #undef DIM diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h index 63f67a3f3630..87a092645aa7 100644 --- a/tools/perf/util/sort.h +++ b/tools/perf/util/sort.h @@ -51,6 +51,7 @@ struct he_stat { u64 period_guest_us; u64 weight; u64 ins_lat; + u64 p_stage_cyc; u32 nr_events; }; @@ -234,6 +235,7 @@ enum sort_type { SORT_CODE_PAGE_SIZE, SORT_LOCAL_INS_LAT, SORT_GLOBAL_INS_LAT, + SORT_PIPELINE_STAGE_CYC, /* branch stack specific sort keys */ __SORT_BRANCH_STACK, -- cgit v1.2.3 From 50fa3a531e8e4b58550171fb159d0aa578c6b52d Mon Sep 17 00:00:00 2001 From: Athira Rajeev Date: Mon, 22 Mar 2021 10:57:27 -0400 Subject: perf sort: Display sort dimension p_stage_cyc only on supported archs The sort dimension "p_stage_cyc" is used to represent pipeline stage cycle information. Presently, this is used only in powerpc. For unsupported platforms, we don't want to display it in the perf report output columns. Hence add check in sort_dimension__add() and skip the sort key incase it is not applicable for the particular arch. Signed-off-by: Athira Rajeev Reviewed-by: Madhavan Srinivasan Acked-by: Jiri Olsa Cc: Kajol Jain Cc: Kan Liang Cc: Michael Ellerman Cc: Peter Zijlstra Cc: Ravi Bangoria Link: https://lore.kernel.org/r/1616425047-1666-6-git-send-email-atrajeev@linux.vnet.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/powerpc/util/event.c | 7 +++++++ tools/perf/util/event.h | 1 + tools/perf/util/sort.c | 19 +++++++++++++++++++ 3 files changed, 27 insertions(+) diff --git a/tools/perf/arch/powerpc/util/event.c b/tools/perf/arch/powerpc/util/event.c index 22521bc9481a..3bf441257466 100644 --- a/tools/perf/arch/powerpc/util/event.c +++ b/tools/perf/arch/powerpc/util/event.c @@ -44,3 +44,10 @@ const char *arch_perf_header_entry(const char *se_header) return "Dispatch Cyc"; return se_header; } + +int arch_support_sort_key(const char *sort_key) +{ + if (!strcmp(sort_key, "p_stage_cyc")) + return 1; + return 0; +} diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h index e5da4a695ff2..8a62fb39e365 100644 --- a/tools/perf/util/event.h +++ b/tools/perf/util/event.h @@ -429,5 +429,6 @@ char *get_page_size_name(u64 size, char *str); void arch_perf_parse_sample_weight(struct perf_sample *data, const __u64 *array, u64 type); void arch_perf_synthesize_sample_weight(const struct perf_sample *data, __u64 *array, u64 type); const char *arch_perf_header_entry(const char *se_header); +int arch_support_sort_key(const char *sort_key); #endif /* __PERF_RECORD_H */ diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c index df7b932b4073..88ce47f2547e 100644 --- a/tools/perf/util/sort.c +++ b/tools/perf/util/sort.c @@ -47,6 +47,7 @@ regex_t ignore_callees_regex; int have_ignore_callees = 0; enum sort_mode sort__mode = SORT_MODE__NORMAL; const char *dynamic_headers[] = {"local_ins_lat", "p_stage_cyc"}; +const char *arch_specific_sort_keys[] = {"p_stage_cyc"}; /* * Replaces all occurrences of a char used with the: @@ -1837,6 +1838,11 @@ struct sort_dimension { int taken; }; +int __weak arch_support_sort_key(const char *sort_key __maybe_unused) +{ + return 0; +} + const char * __weak arch_perf_header_entry(const char *se_header) { return se_header; @@ -2773,6 +2779,19 @@ int sort_dimension__add(struct perf_hpp_list *list, const char *tok, { unsigned int i, j; + /* + * Check to see if there are any arch specific + * sort dimensions not applicable for the current + * architecture. If so, Skip that sort key since + * we don't want to display it in the output fields. + */ + for (j = 0; j < ARRAY_SIZE(arch_specific_sort_keys); j++) { + if (!strcmp(arch_specific_sort_keys[j], tok) && + !arch_support_sort_key(tok)) { + return 0; + } + } + for (i = 0; i < ARRAY_SIZE(common_sort_dimensions); i++) { struct sort_dimension *sd = &common_sort_dimensions[i]; -- cgit v1.2.3 From 292c5ed168597df85f53cb03ec3e831b18969b62 Mon Sep 17 00:00:00 2001 From: Fabian Hemmer Date: Fri, 26 Feb 2021 02:52:23 -0500 Subject: perf tools: Preserve identifier id in OCaml demangler Some OCaml developers reported that this bit of information is sometimes useful for disambiguating functions for which the OCaml compiler assigns the same name, e.g. nested or inlined functions. Signed-off-by: Fabian Hemmer Link: http://lore.kernel.org/lkml/20210226075223.p3s5oz4jbxwnqjtv@nyu Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/demangle-ocaml-test.c | 6 +++--- tools/perf/util/demangle-ocaml.c | 12 ------------ 2 files changed, 3 insertions(+), 15 deletions(-) diff --git a/tools/perf/tests/demangle-ocaml-test.c b/tools/perf/tests/demangle-ocaml-test.c index 2fac7d762c0d..0043be812355 100644 --- a/tools/perf/tests/demangle-ocaml-test.c +++ b/tools/perf/tests/demangle-ocaml-test.c @@ -19,11 +19,11 @@ int test__demangle_ocaml(struct test *test __maybe_unused, int subtest __maybe_u { "main", NULL }, { "camlStdlib__array__map_154", - "Stdlib.array.map" }, + "Stdlib.array.map_154" }, { "camlStdlib__anon_fn$5bstdlib$2eml$3a334$2c0$2d$2d54$5d_1453", - "Stdlib.anon_fn[stdlib.ml:334,0--54]" }, + "Stdlib.anon_fn[stdlib.ml:334,0--54]_1453" }, { "camlStdlib__bytes__$2b$2b_2205", - "Stdlib.bytes.++" }, + "Stdlib.bytes.++_2205" }, }; for (i = 0; i < ARRAY_SIZE(test_cases); i++) { diff --git a/tools/perf/util/demangle-ocaml.c b/tools/perf/util/demangle-ocaml.c index 3df14e67c622..9d707bb60b4b 100644 --- a/tools/perf/util/demangle-ocaml.c +++ b/tools/perf/util/demangle-ocaml.c @@ -64,17 +64,5 @@ ocaml_demangle_sym(const char *sym) } result[j] = '\0'; - /* scan backwards to remove an "_" followed by decimal digits */ - if (j != 0 && isdigit(result[j - 1])) { - while (--j) { - if (!isdigit(result[j])) { - break; - } - } - if (result[j] == '_') { - result[j] = '\0'; - } - } - return result; } -- cgit v1.2.3 From 3406ac5347dbf64ab9f7b137ed25a18493f5ea2d Mon Sep 17 00:00:00 2001 From: Martin Liška Date: Tue, 30 Mar 2021 20:33:55 +0200 Subject: perf annotate: Add --demangle and --demangle-kernel 'perf annotate' supports --symbol but it's impossible to filter a C++ symbol. With --no-demangle one can filter easily by mangled function name. Signed-off-by: Martin Liška Link: http://lore.kernel.org/lkml/c3c7e959-9f7f-18e2-e795-f604275cbac3@suse.cz Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-annotate.txt | 7 +++++++ tools/perf/builtin-annotate.c | 4 ++++ 2 files changed, 11 insertions(+) diff --git a/tools/perf/Documentation/perf-annotate.txt b/tools/perf/Documentation/perf-annotate.txt index 1b5042f134a8..80c1be5d566c 100644 --- a/tools/perf/Documentation/perf-annotate.txt +++ b/tools/perf/Documentation/perf-annotate.txt @@ -124,6 +124,13 @@ OPTIONS --group:: Show event group information together +--demangle:: + Demangle symbol names to human readable form. It's enabled by default, + disable with --no-demangle. + +--demangle-kernel:: + Demangle kernel symbol names to human readable form (for C++ kernels). + --percent-type:: Set annotation percent type from following choices: global-period, local-period, global-hits, local-hits diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c index 0f3a196e5d6e..021d974c978e 100644 --- a/tools/perf/builtin-annotate.c +++ b/tools/perf/builtin-annotate.c @@ -538,6 +538,10 @@ int cmd_annotate(int argc, const char **argv) "Strip first N entries of source file path name in programs (with --prefix)"), OPT_STRING(0, "objdump", &annotate.opts.objdump_path, "path", "objdump binary to use for disassembly and annotations"), + OPT_BOOLEAN(0, "demangle", &symbol_conf.demangle, + "Enable symbol demangling"), + OPT_BOOLEAN(0, "demangle-kernel", &symbol_conf.demangle_kernel, + "Enable kernel symbol demangling"), OPT_BOOLEAN(0, "group", &symbol_conf.event_group, "Show event group information together"), OPT_BOOLEAN(0, "show-total-period", &symbol_conf.show_total_period, -- cgit v1.2.3 From fd6103cb67966ed783b3800110bdbd66edae26a4 Mon Sep 17 00:00:00 2001 From: Wan Jiabing Date: Thu, 1 Apr 2021 14:23:54 +0800 Subject: perf evsel: Remove duplicate 'struct target' forward declaration 'struct target' is declared twice. One has been declared at 21st line. Remove the duplicate. Signed-off-by: Wan Jiabing Acked-by: Namhyung Kim Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Jiri Olsa Cc: Mark Rutland Cc: Peter Zijlstra Cc: kael_w@yeah.net Link: http://lore.kernel.org/lkml/20210401062424.991737-1-wanjiabing@vivo.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/evsel.h | 1 - 1 file changed, 1 deletion(-) diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index dd4f56f9cfdf..eccc4fd5b3eb 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -175,7 +175,6 @@ struct perf_missing_features { extern struct perf_missing_features perf_missing_features; struct perf_cpu_map; -struct target; struct thread_map; struct record_opts; -- cgit v1.2.3 From 69baf1a2a41a87eb16dc98aa9ddbdadd8070e5b2 Mon Sep 17 00:00:00 2001 From: Wan Jiabing Date: Tue, 6 Apr 2021 18:51:02 +0800 Subject: perf mem-events: Remove unnecessary 'struct mem_info' forward declaration 'struct mem_info' is defined at 22nd line. The declaration here is unnecessary. Remove it. Signed-off-by: Wan Jiabing Cc: Alexander Shishkin Cc: Ian Rogers Cc: Jiri Olsa Cc: Kan Liang Cc: Leo Yan Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: kael_w@yeah.net Link: http://lore.kernel.org/lkml/20210406105104.675879-1-wanjiabing@vivo.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/mem-events.h | 1 - 1 file changed, 1 deletion(-) diff --git a/tools/perf/util/mem-events.h b/tools/perf/util/mem-events.h index 63dd383b6ce2..cacdebd65b8a 100644 --- a/tools/perf/util/mem-events.h +++ b/tools/perf/util/mem-events.h @@ -44,7 +44,6 @@ bool is_mem_loads_aux_event(struct evsel *leader); void perf_mem_events__list(void); -struct mem_info; int perf_mem__tlb_scnprintf(char *out, size_t sz, struct mem_info *mem_info); int perf_mem__lvl_scnprintf(char *out, size_t sz, struct mem_info *mem_info); int perf_mem__snp_scnprintf(char *out, size_t sz, struct mem_info *mem_info); -- cgit v1.2.3 From dedb76d3598618e67b3a9af89bf4f418430acbe4 Mon Sep 17 00:00:00 2001 From: John Garry Date: Wed, 7 Apr 2021 18:32:45 +0800 Subject: perf metricgroup: Make find_metric() public with name change Function find_metric() is required for the metric processing in the pmu-events testcase, so make it public. Also change the name to include "metricgroup". Tested-by: Paul A. Clarke Reviewed-by: Kajol Jain Signed-off-by: John Garry Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Ian Rogers Cc: Ingo Molnar Cc: Kan Liang Cc: Leo Yan Cc: Mark Rutland Cc: Mathieu Poirier Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Shaokun Zhang Cc: Will Deacon Cc: linuxarm@huawei.com Cc: linux-arm-kernel@lists.infradead.org Link: https://lore.kernel.org/r/1617791570-165223-2-git-send-email-john.garry@huawei.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/metricgroup.c | 5 +++-- tools/perf/util/metricgroup.h | 3 ++- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/tools/perf/util/metricgroup.c b/tools/perf/util/metricgroup.c index 6acb44ad439b..37fe34a5d93d 100644 --- a/tools/perf/util/metricgroup.c +++ b/tools/perf/util/metricgroup.c @@ -900,7 +900,8 @@ static int __add_metric(struct list_head *metric_list, (match_metric(__pe->metric_group, __metric) || \ match_metric(__pe->metric_name, __metric))) -static struct pmu_event *find_metric(const char *metric, struct pmu_events_map *map) +struct pmu_event *metricgroup__find_metric(const char *metric, + struct pmu_events_map *map) { struct pmu_event *pe; int i; @@ -985,7 +986,7 @@ static int __resolve_metric(struct metric *m, struct expr_id *parent; struct pmu_event *pe; - pe = find_metric(cur->key, map); + pe = metricgroup__find_metric(cur->key, map); if (!pe) continue; diff --git a/tools/perf/util/metricgroup.h b/tools/perf/util/metricgroup.h index 026bbf416c48..cc4a92492a61 100644 --- a/tools/perf/util/metricgroup.h +++ b/tools/perf/util/metricgroup.h @@ -43,7 +43,8 @@ int metricgroup__parse_groups(const struct option *opt, bool metric_no_group, bool metric_no_merge, struct rblist *metric_events); - +struct pmu_event *metricgroup__find_metric(const char *metric, + struct pmu_events_map *map); int metricgroup__parse_groups_test(struct evlist *evlist, struct pmu_events_map *map, const char *str, -- cgit v1.2.3 From a48a995edcde832f2d4c4ec1bfb73e0da93810fb Mon Sep 17 00:00:00 2001 From: John Garry Date: Wed, 7 Apr 2021 18:32:46 +0800 Subject: perf test: Handle metric reuse in pmu-events parsing test The pmu-events parsing test does not handle metric reuse at all. Introduce some simple handling to resolve metrics who reference other metrics. Reviewed-by: Kajol Jain Signed-off-by: John Garry Tested-by: Paul A. Clarke Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Ian Rogers Cc: Ingo Molnar Cc: Kan Liang Cc: Leo Yan Cc: Mark Rutland Cc: Mathieu Poirier Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Shaokun Zhang Cc: Will Deacon Cc: linuxarm@huawei.com Cc: linux-arm-kernel@lists.infradead.org Link: https://lore.kernel.org/r/1617791570-165223-3-git-send-email-john.garry@huawei.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/pmu-events.c | 81 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 81 insertions(+) diff --git a/tools/perf/tests/pmu-events.c b/tools/perf/tests/pmu-events.c index 0ca6a5a53523..cb5b25d2fb27 100644 --- a/tools/perf/tests/pmu-events.c +++ b/tools/perf/tests/pmu-events.c @@ -12,6 +12,7 @@ #include "util/evlist.h" #include "util/expr.h" #include "util/parse-events.h" +#include "metricgroup.h" struct perf_pmu_test_event { /* used for matching against events from generated pmu-events.c */ @@ -471,6 +472,71 @@ static void expr_failure(const char *msg, pr_debug("On expression %s\n", pe->metric_expr); } +struct metric { + struct list_head list; + struct metric_ref metric_ref; +}; + +static int resolve_metric_simple(struct expr_parse_ctx *pctx, + struct list_head *compound_list, + struct pmu_events_map *map, + const char *metric_name) +{ + struct hashmap_entry *cur, *cur_tmp; + struct metric *metric, *tmp; + size_t bkt; + bool all; + int rc; + + do { + all = true; + hashmap__for_each_entry_safe((&pctx->ids), cur, cur_tmp, bkt) { + struct metric_ref *ref; + struct pmu_event *pe; + + pe = metricgroup__find_metric(cur->key, map); + if (!pe) + continue; + + if (!strcmp(metric_name, (char *)cur->key)) { + pr_warning("Recursion detected for metric %s\n", metric_name); + rc = -1; + goto out_err; + } + + all = false; + + /* The metric key itself needs to go out.. */ + expr__del_id(pctx, cur->key); + + metric = malloc(sizeof(*metric)); + if (!metric) { + rc = -ENOMEM; + goto out_err; + } + + ref = &metric->metric_ref; + ref->metric_name = pe->metric_name; + ref->metric_expr = pe->metric_expr; + list_add_tail(&metric->list, compound_list); + + rc = expr__find_other(pe->metric_expr, NULL, pctx, 0); + if (rc) + goto out_err; + break; /* The hashmap has been modified, so restart */ + } + } while (!all); + + return 0; + +out_err: + list_for_each_entry_safe(metric, tmp, compound_list, list) + free(metric); + + return rc; + +} + static int test_parsing(void) { struct pmu_events_map *cpus_map = perf_pmu__find_map(NULL); @@ -488,7 +554,9 @@ static int test_parsing(void) break; j = 0; for (;;) { + struct metric *metric, *tmp; struct hashmap_entry *cur; + LIST_HEAD(compound_list); size_t bkt; pe = &map->table[j++]; @@ -504,6 +572,13 @@ static int test_parsing(void) continue; } + if (resolve_metric_simple(&ctx, &compound_list, map, + pe->metric_name)) { + expr_failure("Could not resolve metrics", map, pe); + ret++; + goto exit; /* Don't tolerate errors due to severity */ + } + /* * Add all ids with a made up value. The value may * trigger divide by zero when subtracted and so try to @@ -519,6 +594,11 @@ static int test_parsing(void) ret++; } + list_for_each_entry_safe(metric, tmp, &compound_list, list) { + expr__add_ref(&ctx, &metric->metric_ref); + free(metric); + } + if (expr__parse(&result, &ctx, pe->metric_expr, 0)) { expr_failure("Parse failed", map, pe); ret++; @@ -527,6 +607,7 @@ static int test_parsing(void) } } /* TODO: fail when not ok */ +exit: return ret == 0 ? TEST_OK : TEST_SKIP; } -- cgit v1.2.3 From e126bef55f1dfb44440d632f9aae66af3240a435 Mon Sep 17 00:00:00 2001 From: John Garry Date: Wed, 7 Apr 2021 18:32:47 +0800 Subject: perf pmu: Add pmu_events_map__find() function to find the common PMU map for the system Add a function to find the common PMU map for the system. For arm64, a special variant is added. This is because arm64 supports heterogeneous CPU systems. As such, it cannot be guaranteed that the cpumap is same for all CPUs. So in case of heterogeneous systems, don't return a cpumap. Reviewed-by: Kajol Jain Signed-off-by: John Garry Tested-by: Paul A. Clarke Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Ian Rogers Cc: Ingo Molnar Cc: Kan Liang Cc: Leo Yan Cc: Mark Rutland Cc: Mathieu Poirier Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Shaokun Zhang Cc: Will Deacon Cc: linux-arm-kernel@lists.infradead.org Cc: linuxarm@huawei.com Link: https://lore.kernel.org/r/1617791570-165223-4-git-send-email-john.garry@huawei.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/arm64/util/Build | 1 + tools/perf/arch/arm64/util/pmu.c | 25 +++++++++++++++++++++++++ tools/perf/tests/pmu-events.c | 2 +- tools/perf/util/metricgroup.c | 7 +++---- tools/perf/util/pmu.c | 5 +++++ tools/perf/util/pmu.h | 1 + tools/perf/util/s390-sample-raw.c | 4 +--- 7 files changed, 37 insertions(+), 8 deletions(-) create mode 100644 tools/perf/arch/arm64/util/pmu.c diff --git a/tools/perf/arch/arm64/util/Build b/tools/perf/arch/arm64/util/Build index ead2f2275eee..9fcb4e68add9 100644 --- a/tools/perf/arch/arm64/util/Build +++ b/tools/perf/arch/arm64/util/Build @@ -2,6 +2,7 @@ perf-y += header.o perf-y += machine.o perf-y += perf_regs.o perf-y += tsc.o +perf-y += pmu.o perf-y += kvm-stat.o perf-$(CONFIG_DWARF) += dwarf-regs.o perf-$(CONFIG_LOCAL_LIBUNWIND) += unwind-libunwind.o diff --git a/tools/perf/arch/arm64/util/pmu.c b/tools/perf/arch/arm64/util/pmu.c new file mode 100644 index 000000000000..d3259d61ca75 --- /dev/null +++ b/tools/perf/arch/arm64/util/pmu.c @@ -0,0 +1,25 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include "../../util/cpumap.h" +#include "../../util/pmu.h" + +struct pmu_events_map *pmu_events_map__find(void) +{ + struct perf_pmu *pmu = NULL; + + while ((pmu = perf_pmu__scan(pmu))) { + if (!is_pmu_core(pmu->name)) + continue; + + /* + * The cpumap should cover all CPUs. Otherwise, some CPUs may + * not support some events or have different event IDs. + */ + if (pmu->cpus->nr != cpu__max_cpu()) + return NULL; + + return perf_pmu__find_map(pmu); + } + + return NULL; +} diff --git a/tools/perf/tests/pmu-events.c b/tools/perf/tests/pmu-events.c index cb5b25d2fb27..b8aff8fb50d8 100644 --- a/tools/perf/tests/pmu-events.c +++ b/tools/perf/tests/pmu-events.c @@ -539,7 +539,7 @@ out_err: static int test_parsing(void) { - struct pmu_events_map *cpus_map = perf_pmu__find_map(NULL); + struct pmu_events_map *cpus_map = pmu_events_map__find(); struct pmu_events_map *map; struct pmu_event *pe; int i, j, k; diff --git a/tools/perf/util/metricgroup.c b/tools/perf/util/metricgroup.c index 37fe34a5d93d..8336dd8e8098 100644 --- a/tools/perf/util/metricgroup.c +++ b/tools/perf/util/metricgroup.c @@ -618,7 +618,7 @@ static int metricgroup__print_sys_event_iter(struct pmu_event *pe, void *data) void metricgroup__print(bool metrics, bool metricgroups, char *filter, bool raw, bool details) { - struct pmu_events_map *map = perf_pmu__find_map(NULL); + struct pmu_events_map *map = pmu_events_map__find(); struct pmu_event *pe; int i; struct rblist groups; @@ -1254,8 +1254,7 @@ int metricgroup__parse_groups(const struct option *opt, struct rblist *metric_events) { struct evlist *perf_evlist = *(struct evlist **)opt->value; - struct pmu_events_map *map = perf_pmu__find_map(NULL); - + struct pmu_events_map *map = pmu_events_map__find(); return parse_groups(perf_evlist, str, metric_no_group, metric_no_merge, NULL, metric_events, map); @@ -1274,7 +1273,7 @@ int metricgroup__parse_groups_test(struct evlist *evlist, bool metricgroup__has_metric(const char *metric) { - struct pmu_events_map *map = perf_pmu__find_map(NULL); + struct pmu_events_map *map = pmu_events_map__find(); struct pmu_event *pe; int i; diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index 4e52b94f1ac6..286d5e415bdc 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -717,6 +717,11 @@ struct pmu_events_map *perf_pmu__find_map(struct perf_pmu *pmu) return map; } +struct pmu_events_map *__weak pmu_events_map__find(void) +{ + return perf_pmu__find_map(NULL); +} + bool pmu_uncore_alias_match(const char *pmu_name, const char *name) { char *tmp = NULL, *tok, *str; diff --git a/tools/perf/util/pmu.h b/tools/perf/util/pmu.h index 160b0f561771..1f1749ba830f 100644 --- a/tools/perf/util/pmu.h +++ b/tools/perf/util/pmu.h @@ -114,6 +114,7 @@ void pmu_add_cpu_aliases_map(struct list_head *head, struct perf_pmu *pmu, struct pmu_events_map *map); struct pmu_events_map *perf_pmu__find_map(struct perf_pmu *pmu); +struct pmu_events_map *pmu_events_map__find(void); bool pmu_uncore_alias_match(const char *pmu_name, const char *name); void perf_pmu_free_alias(struct perf_pmu_alias *alias); diff --git a/tools/perf/util/s390-sample-raw.c b/tools/perf/util/s390-sample-raw.c index cfcf8d534d76..08ec3c3ae0ee 100644 --- a/tools/perf/util/s390-sample-raw.c +++ b/tools/perf/util/s390-sample-raw.c @@ -160,11 +160,9 @@ static void s390_cpumcfdg_dump(struct perf_sample *sample) const char *color = PERF_COLOR_BLUE; struct cf_ctrset_entry *cep, ce; struct pmu_events_map *map; - struct perf_pmu pmu; u64 *p; - memset(&pmu, 0, sizeof(pmu)); - map = perf_pmu__find_map(&pmu); + map = pmu_events_map__find(); while (offset < len) { cep = (struct cf_ctrset_entry *)(buf + offset); -- cgit v1.2.3 From c4e1dc4a94931805fd4c69de71117dc040d8db2a Mon Sep 17 00:00:00 2001 From: John Garry Date: Wed, 7 Apr 2021 18:32:48 +0800 Subject: perf vendor events arm64: Add Hisi hip08 L1 metrics Add L1 metrics. Formula is as consistent as possible with MAN pages description for these metrics. Reviewed-by: Kajol Jain Signed-off-by: John Garry Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Ian Rogers Cc: Ingo Molnar Cc: Kan Liang Cc: Leo Yan Cc: Mark Rutland Cc: Mathieu Poirier Cc: Namhyung Kim Cc: Paul Clarke Cc: Peter Zijlstra Cc: Shaokun Zhang Cc: Will Deacon Cc: linux-arm-kernel@lists.infradead.org Cc: linuxarm@huawei.com Link: https://lore.kernel.org/r/1617791570-165223-5-git-send-email-john.garry@huawei.com Signed-off-by: Arnaldo Carvalho de Melo --- .../arch/arm64/hisilicon/hip08/metrics.json | 30 ++++++++++++++++++++++ 1 file changed, 30 insertions(+) create mode 100644 tools/perf/pmu-events/arch/arm64/hisilicon/hip08/metrics.json diff --git a/tools/perf/pmu-events/arch/arm64/hisilicon/hip08/metrics.json b/tools/perf/pmu-events/arch/arm64/hisilicon/hip08/metrics.json new file mode 100644 index 000000000000..dc5ff3051639 --- /dev/null +++ b/tools/perf/pmu-events/arch/arm64/hisilicon/hip08/metrics.json @@ -0,0 +1,30 @@ +[ + { + "MetricExpr": "FETCH_BUBBLE / (4 * CPU_CYCLES)", + "PublicDescription": "Frontend bound L1 topdown metric", + "BriefDescription": "Frontend bound L1 topdown metric", + "MetricGroup": "TopDownL1", + "MetricName": "frontend_bound" + }, + { + "MetricExpr": "(INST_SPEC - INST_RETIRED) / (4 * CPU_CYCLES)", + "PublicDescription": "Bad Speculation L1 topdown metric", + "BriefDescription": "Bad Speculation L1 topdown metric", + "MetricGroup": "TopDownL1", + "MetricName": "bad_speculation" + }, + { + "MetricExpr": "INST_RETIRED / (CPU_CYCLES * 4)", + "PublicDescription": "Retiring L1 topdown metric", + "BriefDescription": "Retiring L1 topdown metric", + "MetricGroup": "TopDownL1", + "MetricName": "retiring" + }, + { + "MetricExpr": "1 - (frontend_bound + bad_speculation + retiring)", + "PublicDescription": "Backend Bound L1 topdown metric", + "BriefDescription": "Backend Bound L1 topdown metric", + "MetricGroup": "TopDownL1", + "MetricName": "backend_bound" + }, +] -- cgit v1.2.3 From 03837173487a1c664b71f047e97209112be37dd5 Mon Sep 17 00:00:00 2001 From: John Garry Date: Wed, 7 Apr 2021 18:32:49 +0800 Subject: perf vendor events arm64: Add Hisi hip08 L2 metrics Add L2 metrics. Reviewed-by: Kajol Jain Signed-off-by: John Garry Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Ian Rogers Cc: Ingo Molnar Cc: Kan Liang Cc: Leo Yan Cc: Mark Rutland Cc: Mathieu Poirier Cc: Namhyung Kim Cc: Paul Clarke Cc: Peter Zijlstra Cc: Shaokun Zhang Cc: Will Deacon Cc: linux-arm-kernel@lists.infradead.org Cc: linuxarm@huawei.com Link: https://lore.kernel.org/r/1617791570-165223-6-git-send-email-john.garry@huawei.com Signed-off-by: Arnaldo Carvalho de Melo --- .../arch/arm64/hisilicon/hip08/metrics.json | 42 ++++++++++++++++++++++ 1 file changed, 42 insertions(+) diff --git a/tools/perf/pmu-events/arch/arm64/hisilicon/hip08/metrics.json b/tools/perf/pmu-events/arch/arm64/hisilicon/hip08/metrics.json index dc5ff3051639..dda898d23c2d 100644 --- a/tools/perf/pmu-events/arch/arm64/hisilicon/hip08/metrics.json +++ b/tools/perf/pmu-events/arch/arm64/hisilicon/hip08/metrics.json @@ -27,4 +27,46 @@ "MetricGroup": "TopDownL1", "MetricName": "backend_bound" }, + { + "MetricExpr": "armv8_pmuv3_0@event\\=0x201d@ / CPU_CYCLES", + "PublicDescription": "Fetch latency bound L2 topdown metric", + "BriefDescription": "Fetch latency bound L2 topdown metric", + "MetricGroup": "TopDownL2", + "MetricName": "fetch_latency_bound" + }, + { + "MetricExpr": "frontend_bound - fetch_latency_bound", + "PublicDescription": "Fetch bandwidth bound L2 topdown metric", + "BriefDescription": "Fetch bandwidth bound L2 topdown metric", + "MetricGroup": "TopDownL2", + "MetricName": "fetch_bandwidth_bound" + }, + { + "MetricExpr": "(bad_speculation * BR_MIS_PRED) / (BR_MIS_PRED + armv8_pmuv3_0@event\\=0x2013@)", + "PublicDescription": "Branch mispredicts L2 topdown metric", + "BriefDescription": "Branch mispredicts L2 topdown metric", + "MetricGroup": "TopDownL2", + "MetricName": "branch_mispredicts" + }, + { + "MetricExpr": "bad_speculation - branch_mispredicts", + "PublicDescription": "Machine clears L2 topdown metric", + "BriefDescription": "Machine clears L2 topdown metric", + "MetricGroup": "TopDownL2", + "MetricName": "machine_clears" + }, + { + "MetricExpr": "(EXE_STALL_CYCLE - (MEM_STALL_ANYLOAD + armv8_pmuv3_0@event\\=0x7005@)) / CPU_CYCLES", + "PublicDescription": "Core bound L2 topdown metric", + "BriefDescription": "Core bound L2 topdown metric", + "MetricGroup": "TopDownL2", + "MetricName": "core_bound" + }, + { + "MetricExpr": "(MEM_STALL_ANYLOAD + armv8_pmuv3_0@event\\=0x7005@) / CPU_CYCLES", + "PublicDescription": "Memory bound L2 topdown metric", + "BriefDescription": "Memory bound L2 topdown metric", + "MetricGroup": "TopDownL2", + "MetricName": "memory_bound" + }, ] -- cgit v1.2.3 From 0cc177cfc95d565e1a458136a592b0bd6d487db0 Mon Sep 17 00:00:00 2001 From: John Garry Date: Wed, 7 Apr 2021 18:32:50 +0800 Subject: perf vendor events arm64: Add Hisi hip08 L3 metrics Add L3 metrics. Reviewed-by: Kajol Jain Signed-off-by: John Garry Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Ian Rogers Cc: Ingo Molnar Cc: Kan Liang Cc: Leo Yan Cc: Mark Rutland Cc: Mathieu Poirier Cc: Namhyung Kim Cc: Paul Clarke Cc: Peter Zijlstra Cc: Shaokun Zhang Cc: Will Deacon Cc: linux-arm-kernel@lists.infradead.org Cc: linuxarm@huawei.com Link: https://lore.kernel.org/r/1617791570-165223-7-git-send-email-john.garry@huawei.com Signed-off-by: Arnaldo Carvalho de Melo --- .../arch/arm64/hisilicon/hip08/metrics.json | 161 +++++++++++++++++++++ 1 file changed, 161 insertions(+) diff --git a/tools/perf/pmu-events/arch/arm64/hisilicon/hip08/metrics.json b/tools/perf/pmu-events/arch/arm64/hisilicon/hip08/metrics.json index dda898d23c2d..dda8e59149d2 100644 --- a/tools/perf/pmu-events/arch/arm64/hisilicon/hip08/metrics.json +++ b/tools/perf/pmu-events/arch/arm64/hisilicon/hip08/metrics.json @@ -69,4 +69,165 @@ "MetricGroup": "TopDownL2", "MetricName": "memory_bound" }, + { + "MetricExpr": "(((L2I_TLB - L2I_TLB_REFILL) * 15) + (L2I_TLB_REFILL * 100)) / CPU_CYCLES", + "PublicDescription": "Idle by itlb miss L3 topdown metric", + "BriefDescription": "Idle by itlb miss L3 topdown metric", + "MetricGroup": "TopDownL3", + "MetricName": "idle_by_itlb_miss" + }, + { + "MetricExpr": "(((L2I_CACHE - L2I_CACHE_REFILL) * 15) + (L2I_CACHE_REFILL * 100)) / CPU_CYCLES", + "PublicDescription": "Idle by icache miss L3 topdown metric", + "BriefDescription": "Idle by icache miss L3 topdown metric", + "MetricGroup": "TopDownL3", + "MetricName": "idle_by_icache_miss" + }, + { + "MetricExpr": "(BR_MIS_PRED * 5) / CPU_CYCLES", + "PublicDescription": "BP misp flush L3 topdown metric", + "BriefDescription": "BP misp flush L3 topdown metric", + "MetricGroup": "TopDownL3", + "MetricName": "bp_misp_flush" + }, + { + "MetricExpr": "(armv8_pmuv3_0@event\\=0x2013@ * 5) / CPU_CYCLES", + "PublicDescription": "OOO flush L3 topdown metric", + "BriefDescription": "OOO flush L3 topdown metric", + "MetricGroup": "TopDownL3", + "MetricName": "ooo_flush" + }, + { + "MetricExpr": "(armv8_pmuv3_0@event\\=0x1001@ * 5) / CPU_CYCLES", + "PublicDescription": "Static predictor flush L3 topdown metric", + "BriefDescription": "Static predictor flush L3 topdown metric", + "MetricGroup": "TopDownL3", + "MetricName": "sp_flush" + }, + { + "MetricExpr": "armv8_pmuv3_0@event\\=0x1010@ / BR_MIS_PRED", + "PublicDescription": "Indirect branch L3 topdown metric", + "BriefDescription": "Indirect branch L3 topdown metric", + "MetricGroup": "TopDownL3", + "MetricName": "indirect_branch" + }, + { + "MetricExpr": "(armv8_pmuv3_0@event\\=0x1014@ + armv8_pmuv3_0@event\\=0x1018@) / BR_MIS_PRED", + "PublicDescription": "Push branch L3 topdown metric", + "BriefDescription": "Push branch L3 topdown metric", + "MetricGroup": "TopDownL3", + "MetricName": "push_branch" + }, + { + "MetricExpr": "armv8_pmuv3_0@event\\=0x100c@ / BR_MIS_PRED", + "PublicDescription": "Pop branch L3 topdown metric", + "BriefDescription": "Pop branch L3 topdown metric", + "MetricGroup": "TopDownL3", + "MetricName": "pop_branch" + }, + { + "MetricExpr": "(BR_MIS_PRED - armv8_pmuv3_0@event\\=0x1010@ - armv8_pmuv3_0@event\\=0x1014@ - armv8_pmuv3_0@event\\=0x1018@ - armv8_pmuv3_0@event\\=0x100c@) / BR_MIS_PRED", + "PublicDescription": "Other branch L3 topdown metric", + "BriefDescription": "Other branch L3 topdown metric", + "MetricGroup": "TopDownL3", + "MetricName": "other_branch" + }, + { + "MetricExpr": "armv8_pmuv3_0@event\\=0x2012@ / armv8_pmuv3_0@event\\=0x2013@", + "PublicDescription": "Nuke flush L3 topdown metric", + "BriefDescription": "Nuke flush L3 topdown metric", + "MetricGroup": "TopDownL3", + "MetricName": "nuke_flush" + }, + { + "MetricExpr": "1 - nuke_flush", + "PublicDescription": "Other flush L3 topdown metric", + "BriefDescription": "Other flush L3 topdown metric", + "MetricGroup": "TopDownL3", + "MetricName": "other_flush" + }, + { + "MetricExpr": "armv8_pmuv3_0@event\\=0x2010@ / CPU_CYCLES", + "PublicDescription": "Sync stall L3 topdown metric", + "BriefDescription": "Sync stall L3 topdown metric", + "MetricGroup": "TopDownL3", + "MetricName": "sync_stall" + }, + { + "MetricExpr": "armv8_pmuv3_0@event\\=0x2004@ / CPU_CYCLES", + "PublicDescription": "Rob stall L3 topdown metric", + "BriefDescription": "Rob stall L3 topdown metric", + "MetricGroup": "TopDownL3", + "MetricName": "rob_stall" + }, + { + "MetricExpr": "(armv8_pmuv3_0@event\\=0x2006@ + armv8_pmuv3_0@event\\=0x2007@ + armv8_pmuv3_0@event\\=0x2008@) / CPU_CYCLES", + "PublicDescription": "Ptag stall L3 topdown metric", + "BriefDescription": "Ptag stall L3 topdown metric", + "MetricGroup": "TopDownL3", + "MetricName": "ptag_stall" + }, + { + "MetricExpr": "armv8_pmuv3_0@event\\=0x201e@ / CPU_CYCLES", + "PublicDescription": "SaveOpQ stall L3 topdown metric", + "BriefDescription": "SaveOpQ stall L3 topdown metric", + "MetricGroup": "TopDownL3", + "MetricName": "saveopq_stall" + }, + { + "MetricExpr": "armv8_pmuv3_0@event\\=0x2005@ / CPU_CYCLES", + "PublicDescription": "PC buffer stall L3 topdown metric", + "BriefDescription": "PC buffer stall L3 topdown metric", + "MetricGroup": "TopDownL3", + "MetricName": "pc_buffer_stall" + }, + { + "MetricExpr": "armv8_pmuv3_0@event\\=0x7002@ / CPU_CYCLES", + "PublicDescription": "Divider L3 topdown metric", + "BriefDescription": "Divider L3 topdown metric", + "MetricGroup": "TopDownL3", + "MetricName": "divider" + }, + { + "MetricExpr": "armv8_pmuv3_0@event\\=0x7003@ / CPU_CYCLES", + "PublicDescription": "FSU stall L3 topdown metric", + "BriefDescription": "FSU stall L3 topdown metric", + "MetricGroup": "TopDownL3", + "MetricName": "fsu_stall" + }, + { + "MetricExpr": "core_bound - divider - fsu_stall", + "PublicDescription": "EXE ports util L3 topdown metric", + "BriefDescription": "EXE ports util L3 topdown metric", + "MetricGroup": "TopDownL3", + "MetricName": "exe_ports_util" + }, + { + "MetricExpr": "(MEM_STALL_ANYLOAD - MEM_STALL_L1MISS) / CPU_CYCLES", + "PublicDescription": "L1 bound L3 topdown metric", + "BriefDescription": "L1 bound L3 topdown metric", + "MetricGroup": "TopDownL3", + "MetricName": "l1_bound" + }, + { + "MetricExpr": "(MEM_STALL_L1MISS - MEM_STALL_L2MISS) / CPU_CYCLES", + "PublicDescription": "L2 bound L3 topdown metric", + "BriefDescription": "L2 bound L3 topdown metric", + "MetricGroup": "TopDownL3", + "MetricName": "l2_bound" + }, + { + "MetricExpr": "MEM_STALL_L2MISS / CPU_CYCLES", + "PublicDescription": "Mem bound L3 topdown metric", + "BriefDescription": "Mem bound L3 topdown metric", + "MetricGroup": "TopDownL3", + "MetricName": "mem_bound" + }, + { + "MetricExpr": "armv8_pmuv3_0@event\\=0x7005@ / CPU_CYCLES", + "PublicDescription": "Store bound L3 topdown metric", + "BriefDescription": "Store bound L3 topdown metric", + "MetricGroup": "TopDownL3", + "MetricName": "store_bound" + }, ] -- cgit v1.2.3 From 86c2bc3da769124e3e856b6e9457be3667c30919 Mon Sep 17 00:00:00 2001 From: Smita Koralahalli Date: Tue, 6 Apr 2021 16:59:41 -0500 Subject: perf vendor events amd: Fix broken L2 Cache Hits from L2 HWPF metric Commit 08ed77e414ab2342 ("perf vendor events amd: Add recommended events") added the hits event "L2 Cache Hits from L2 HWPF" with the same metric expression as the accesses event "L2 Cache Accesses from L2 HWPF": $ perf list --details ... l2_cache_accesses_from_l2_hwpf [L2 Cache Accesses from L2 HWPF] [l2_pf_hit_l2 + l2_pf_miss_l2_hit_l3 + l2_pf_miss_l2_l3] l2_cache_hits_from_l2_hwpf [L2 Cache Hits from L2 HWPF] [l2_pf_hit_l2 + l2_pf_miss_l2_hit_l3 + l2_pf_miss_l2_l3] ... This was wrong and led to counting hits the same as accesses. Section 2.1.15.2 "Performance Measurement" of "PPR for AMD Family 17h Model 31h B0 - 55803 Rev 0.54 - Sep 12, 2019", documents the hits event with EventCode 0x70 which is the same as l2_pf_hit_l2. Fix this, and massage the description for l2_pf_hit_l2 as the hits event is now the duplicate of l2_pf_hit_l2. AMD recommends using the recommended event over other events if the duplicate exists and maintain both for consistency. Hence, l2_cache_hits_from_l2_hwpf should override l2_pf_hit_l2. Before: # perf stat -M l2_cache_accesses_from_l2_hwpf,l2_cache_hits_from_l2_hwpf sleep 1 Performance counter stats for 'sleep 1': 1,436 l2_pf_miss_l2_l3 # 11114.00 l2_cache_accesses_from_l2_hwpf # 11114.00 l2_cache_hits_from_l2_hwpf 4,482 l2_pf_hit_l2 5,196 l2_pf_miss_l2_hit_l3 1.001765339 seconds time elapsed After: # perf stat -M l2_cache_accesses_from_l2_hwpf sleep 1 Performance counter stats for 'sleep 1': 1,477 l2_pf_miss_l2_l3 # 10442.00 l2_cache_accesses_from_l2_hwpf 3,978 l2_pf_hit_l2 4,987 l2_pf_miss_l2_hit_l3 1.001491186 seconds time elapsed # perf stat -e l2_cache_hits_from_l2_hwpf sleep 1 Performance counter stats for 'sleep 1': 3,983 l2_cache_hits_from_l2_hwpf 1.001329970 seconds time elapsed Note the difference in performance counter values for the accesses versus the hits after the fix, and the hits event now counting the same as l2_pf_hit_l2. Fixes: 08ed77e414ab ("perf vendor events amd: Add recommended events") Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=206537 Reviewed-by: Robert Richter Signed-off-by: Smita Koralahalli Tested-by: Arnaldo Carvalho de Melo # On a 3900X Cc: Alexander Shishkin Cc: Ian Rogers Cc: Ingo Molnar Cc: Jiri Olsa Cc: Kim Phillips Cc: Mark Rutland Cc: Martin Liška Cc: Michael Petlan Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Vijay Thakkar Cc: linux-perf-users@vger.kernel.org Link: https://lore.kernel.org/r/20210406215944.113332-2-Smita.KoralahalliChannabasappa@amd.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/pmu-events/arch/x86/amdzen1/cache.json | 2 +- tools/perf/pmu-events/arch/x86/amdzen1/recommended.json | 6 +++--- tools/perf/pmu-events/arch/x86/amdzen2/cache.json | 2 +- tools/perf/pmu-events/arch/x86/amdzen2/recommended.json | 6 +++--- 4 files changed, 8 insertions(+), 8 deletions(-) diff --git a/tools/perf/pmu-events/arch/x86/amdzen1/cache.json b/tools/perf/pmu-events/arch/x86/amdzen1/cache.json index 4ea7ec4f496e..008f1683e540 100644 --- a/tools/perf/pmu-events/arch/x86/amdzen1/cache.json +++ b/tools/perf/pmu-events/arch/x86/amdzen1/cache.json @@ -275,7 +275,7 @@ { "EventName": "l2_pf_hit_l2", "EventCode": "0x70", - "BriefDescription": "L2 prefetch hit in L2.", + "BriefDescription": "L2 prefetch hit in L2. Use l2_cache_hits_from_l2_hwpf instead.", "UMask": "0xff" }, { diff --git a/tools/perf/pmu-events/arch/x86/amdzen1/recommended.json b/tools/perf/pmu-events/arch/x86/amdzen1/recommended.json index 2cfe2d2f3bfd..3c954543d1ae 100644 --- a/tools/perf/pmu-events/arch/x86/amdzen1/recommended.json +++ b/tools/perf/pmu-events/arch/x86/amdzen1/recommended.json @@ -79,10 +79,10 @@ "UMask": "0x70" }, { - "MetricName": "l2_cache_hits_from_l2_hwpf", + "EventName": "l2_cache_hits_from_l2_hwpf", + "EventCode": "0x70", "BriefDescription": "L2 Cache Hits from L2 HWPF", - "MetricExpr": "l2_pf_hit_l2 + l2_pf_miss_l2_hit_l3 + l2_pf_miss_l2_l3", - "MetricGroup": "l2_cache" + "UMask": "0xff" }, { "EventName": "l3_accesses", diff --git a/tools/perf/pmu-events/arch/x86/amdzen2/cache.json b/tools/perf/pmu-events/arch/x86/amdzen2/cache.json index f61b982f83ca..8ba84a48188d 100644 --- a/tools/perf/pmu-events/arch/x86/amdzen2/cache.json +++ b/tools/perf/pmu-events/arch/x86/amdzen2/cache.json @@ -205,7 +205,7 @@ { "EventName": "l2_pf_hit_l2", "EventCode": "0x70", - "BriefDescription": "L2 prefetch hit in L2.", + "BriefDescription": "L2 prefetch hit in L2. Use l2_cache_hits_from_l2_hwpf instead.", "UMask": "0xff" }, { diff --git a/tools/perf/pmu-events/arch/x86/amdzen2/recommended.json b/tools/perf/pmu-events/arch/x86/amdzen2/recommended.json index 2ef91e25e661..1c624cee9ef4 100644 --- a/tools/perf/pmu-events/arch/x86/amdzen2/recommended.json +++ b/tools/perf/pmu-events/arch/x86/amdzen2/recommended.json @@ -79,10 +79,10 @@ "UMask": "0x70" }, { - "MetricName": "l2_cache_hits_from_l2_hwpf", + "EventName": "l2_cache_hits_from_l2_hwpf", + "EventCode": "0x70", "BriefDescription": "L2 Cache Hits from L2 HWPF", - "MetricExpr": "l2_pf_hit_l2 + l2_pf_miss_l2_hit_l3 + l2_pf_miss_l2_l3", - "MetricGroup": "l2_cache" + "UMask": "0xff" }, { "EventName": "l3_accesses", -- cgit v1.2.3 From ff64c98195c5c48c4cd98ff1347543cdb0631433 Mon Sep 17 00:00:00 2001 From: Smita Koralahalli Date: Tue, 6 Apr 2021 16:59:42 -0500 Subject: perf vendor events amd: Use lowercases for all the eventcodes and umasks The values of event codes and umasks are inconsistent with letter cases. Enforce a unique style and default everything to lower case as this helps in tracking changes of automatically generated event tables. Reviewed-by: Robert Richter Signed-off-by: Smita Koralahalli Cc: Alexander Shishkin Cc: Ian Rogers Cc: Ingo Molnar Cc: Jiri Olsa Cc: Kim Phillips Cc: Mark Rutland Cc: Martin Liška Cc: Michael Petlan Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Vijay Thakkar Cc: linux-perf-users@vger.kernel.org Link: https://lore.kernel.org/r/20210406215944.113332-3-Smita.KoralahalliChannabasappa@amd.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/pmu-events/arch/x86/amdzen2/branch.json | 2 +- tools/perf/pmu-events/arch/x86/amdzen2/cache.json | 2 +- tools/perf/pmu-events/arch/x86/amdzen2/memory.json | 16 ++++++++-------- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/tools/perf/pmu-events/arch/x86/amdzen2/branch.json b/tools/perf/pmu-events/arch/x86/amdzen2/branch.json index ef4166a66288..f5d16846aa1d 100644 --- a/tools/perf/pmu-events/arch/x86/amdzen2/branch.json +++ b/tools/perf/pmu-events/arch/x86/amdzen2/branch.json @@ -24,7 +24,7 @@ "EventName": "bp_l1_tlb_fetch_hit", "EventCode": "0x94", "BriefDescription": "The number of instruction fetches that hit in the L1 ITLB.", - "UMask": "0xFF" + "UMask": "0xff" }, { "EventName": "bp_l1_tlb_fetch_hit.if1g", diff --git a/tools/perf/pmu-events/arch/x86/amdzen2/cache.json b/tools/perf/pmu-events/arch/x86/amdzen2/cache.json index 8ba84a48188d..899ccc81263c 100644 --- a/tools/perf/pmu-events/arch/x86/amdzen2/cache.json +++ b/tools/perf/pmu-events/arch/x86/amdzen2/cache.json @@ -353,7 +353,7 @@ }, { "EventName": "xi_ccx_sdp_req1.all_l3_miss_req_typs", - "EventCode": "0x9A", + "EventCode": "0x9a", "BriefDescription": "All L3 Miss Request Types. Ignores SliceMask and ThreadMask.", "UMask": "0x3f", "Unit": "L3PMC" diff --git a/tools/perf/pmu-events/arch/x86/amdzen2/memory.json b/tools/perf/pmu-events/arch/x86/amdzen2/memory.json index 715046b339cb..609d9e3da3f7 100644 --- a/tools/perf/pmu-events/arch/x86/amdzen2/memory.json +++ b/tools/perf/pmu-events/arch/x86/amdzen2/memory.json @@ -60,17 +60,17 @@ }, { "EventName": "ls_smi_rx", - "EventCode": "0x2B", + "EventCode": "0x2b", "BriefDescription": "Number of SMIs received." }, { "EventName": "ls_int_taken", - "EventCode": "0x2C", + "EventCode": "0x2c", "BriefDescription": "Number of interrupts taken." }, { "EventName": "ls_rdtsc", - "EventCode": "0x2D", + "EventCode": "0x2d", "BriefDescription": "Number of reads of the TSC (RDTSC instructions). The count is speculative." }, { @@ -300,31 +300,31 @@ }, { "EventName": "ls_hw_pf_dc_fill.ls_mabresp_rmt_dram", - "EventCode": "0x5A", + "EventCode": "0x5a", "BriefDescription": "Hardware Prefetch Data Cache Fills by Data Source. From DRAM (home node remote).", "UMask": "0x40" }, { "EventName": "ls_hw_pf_dc_fill.ls_mabresp_rmt_cache", - "EventCode": "0x5A", + "EventCode": "0x5a", "BriefDescription": "Hardware Prefetch Data Cache Fills by Data Source. From another cache (home node remote).", "UMask": "0x10" }, { "EventName": "ls_hw_pf_dc_fill.ls_mabresp_lcl_dram", - "EventCode": "0x5A", + "EventCode": "0x5a", "BriefDescription": "Hardware Prefetch Data Cache Fills by Data Source. From DRAM (home node local).", "UMask": "0x8" }, { "EventName": "ls_hw_pf_dc_fill.ls_mabresp_lcl_cache", - "EventCode": "0x5A", + "EventCode": "0x5a", "BriefDescription": "Hardware Prefetch Data Cache Fills by Data Source. From another cache (home node local).", "UMask": "0x2" }, { "EventName": "ls_hw_pf_dc_fill.ls_mabresp_lcl_l2", - "EventCode": "0x5A", + "EventCode": "0x5a", "BriefDescription": "Hardware Prefetch Data Cache Fills by Data Source. Local L2 hit.", "UMask": "0x1" }, -- cgit v1.2.3 From e5f2b4e1b8b1c709d32e895c9ca77845b8e71ee3 Mon Sep 17 00:00:00 2001 From: Smita Koralahalli Date: Tue, 6 Apr 2021 16:59:43 -0500 Subject: perf vendor events amd: Use 0x%02x format for event code and umask Use 0x%02x format for all event codes and umasks as this helps in tracking changes of automatically generated event tables. Reviewed-by: Robert Richter Signed-off-by: Smita Koralahalli Cc: Alexander Shishkin Cc: Ian Rogers Cc: Ingo Molnar Cc: Jiri Olsa Cc: Kim Phillips Cc: Mark Rutland Cc: Martin Liška Cc: Michael Petlan Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Vijay Thakkar Cc: linux-perf-users@vger.kernel.org Link: https://lore.kernel.org/r/20210406215944.113332-4-Smita.KoralahalliChannabasappa@amd.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/pmu-events/arch/x86/amdzen1/cache.json | 46 +++++++------- tools/perf/pmu-events/arch/x86/amdzen1/core.json | 12 ++-- .../arch/x86/amdzen1/floating-point.json | 42 ++++++------- tools/perf/pmu-events/arch/x86/amdzen1/memory.json | 42 ++++++------- tools/perf/pmu-events/arch/x86/amdzen1/other.json | 12 ++-- .../pmu-events/arch/x86/amdzen1/recommended.json | 2 +- tools/perf/pmu-events/arch/x86/amdzen2/branch.json | 6 +- tools/perf/pmu-events/arch/x86/amdzen2/cache.json | 56 ++++++++--------- tools/perf/pmu-events/arch/x86/amdzen2/core.json | 12 ++-- .../arch/x86/amdzen2/floating-point.json | 42 ++++++------- tools/perf/pmu-events/arch/x86/amdzen2/memory.json | 70 +++++++++++----------- tools/perf/pmu-events/arch/x86/amdzen2/other.json | 20 +++---- .../pmu-events/arch/x86/amdzen2/recommended.json | 2 +- 13 files changed, 182 insertions(+), 182 deletions(-) diff --git a/tools/perf/pmu-events/arch/x86/amdzen1/cache.json b/tools/perf/pmu-events/arch/x86/amdzen1/cache.json index 008f1683e540..0d46cb82bd52 100644 --- a/tools/perf/pmu-events/arch/x86/amdzen1/cache.json +++ b/tools/perf/pmu-events/arch/x86/amdzen1/cache.json @@ -38,31 +38,31 @@ "EventName": "ic_fetch_stall.ic_stall_any", "EventCode": "0x87", "BriefDescription": "Instruction Pipe Stall. IC pipe was stalled during this clock cycle for any reason (nothing valid in pipe ICM1).", - "UMask": "0x4" + "UMask": "0x04" }, { "EventName": "ic_fetch_stall.ic_stall_dq_empty", "EventCode": "0x87", "BriefDescription": "Instruction Pipe Stall. IC pipe was stalled during this clock cycle (including IC to OC fetches) due to DQ empty.", - "UMask": "0x2" + "UMask": "0x02" }, { "EventName": "ic_fetch_stall.ic_stall_back_pressure", "EventCode": "0x87", "BriefDescription": "Instruction Pipe Stall. IC pipe was stalled during this clock cycle (including IC to OC fetches) due to back-pressure.", - "UMask": "0x1" + "UMask": "0x01" }, { "EventName": "ic_cache_inval.l2_invalidating_probe", "EventCode": "0x8c", "BriefDescription": "IC line invalidated due to L2 invalidating probe (external or LS). The number of instruction cache lines invalidated. A non-SMC event is CMC (cross modifying code), either from the other thread of the core or another core.", - "UMask": "0x2" + "UMask": "0x02" }, { "EventName": "ic_cache_inval.fill_invalidated", "EventCode": "0x8c", "BriefDescription": "IC line invalidated due to overwriting fill response. The number of instruction cache lines invalidated. A non-SMC event is CMC (cross modifying code), either from the other thread of the core or another core.", - "UMask": "0x1" + "UMask": "0x01" }, { "EventName": "bp_tlb_rel", @@ -97,25 +97,25 @@ "EventName": "l2_request_g1.change_to_x", "EventCode": "0x60", "BriefDescription": "All L2 Cache Requests (Breakdown 1 - Common). Data cache state change requests. Request change to writable, check L2 for current state.", - "UMask": "0x8" + "UMask": "0x08" }, { "EventName": "l2_request_g1.prefetch_l2_cmd", "EventCode": "0x60", "BriefDescription": "All L2 Cache Requests (Breakdown 1 - Common). PrefetchL2Cmd.", - "UMask": "0x4" + "UMask": "0x04" }, { "EventName": "l2_request_g1.l2_hw_pf", "EventCode": "0x60", "BriefDescription": "All L2 Cache Requests (Breakdown 1 - Common). L2 Prefetcher. All prefetches accepted by L2 pipeline, hit or miss. Types of PF and L2 hit/miss broken out in a separate perfmon event.", - "UMask": "0x2" + "UMask": "0x02" }, { "EventName": "l2_request_g1.group2", "EventCode": "0x60", "BriefDescription": "Miscellaneous events covered in more detail by l2_request_g2 (PMCx061).", - "UMask": "0x1" + "UMask": "0x01" }, { "EventName": "l2_request_g1.all_no_prefetch", @@ -150,31 +150,31 @@ "EventName": "l2_request_g2.ic_rd_sized_nc", "EventCode": "0x61", "BriefDescription": "All L2 Cache Requests (Breakdown 2 - Rare). Instruction cache read sized non-cacheable.", - "UMask": "0x8" + "UMask": "0x08" }, { "EventName": "l2_request_g2.smc_inval", "EventCode": "0x61", "BriefDescription": "All L2 Cache Requests (Breakdown 2 - Rare). Self-modifying code invalidates.", - "UMask": "0x4" + "UMask": "0x04" }, { "EventName": "l2_request_g2.bus_locks_originator", "EventCode": "0x61", "BriefDescription": "All L2 Cache Requests (Breakdown 2 - Rare). Bus locks.", - "UMask": "0x2" + "UMask": "0x02" }, { "EventName": "l2_request_g2.bus_locks_responses", "EventCode": "0x61", "BriefDescription": "All L2 Cache Requests (Breakdown 2 - Rare). Bus lock response.", - "UMask": "0x1" + "UMask": "0x01" }, { "EventName": "l2_latency.l2_cycles_waiting_on_fills", "EventCode": "0x62", "BriefDescription": "Total cycles spent waiting for L2 fills to complete from L3 or memory, divided by four. Event counts are for both threads. To calculate average latency, the number of fills from both threads must be used.", - "UMask": "0x1" + "UMask": "0x01" }, { "EventName": "l2_wcb_req.wcb_write", @@ -192,13 +192,13 @@ "EventName": "l2_wcb_req.zero_byte_store", "EventCode": "0x63", "BriefDescription": "LS to L2 WCB zero byte store requests. LS (Load/Store unit) to L2 WCB (Write Combining Buffer) zero byte store requests.", - "UMask": "0x4" + "UMask": "0x04" }, { "EventName": "l2_wcb_req.cl_zero", "EventCode": "0x63", "BriefDescription": "LS to L2 WCB cache line zeroing requests. LS (Load/Store unit) to L2 WCB (Write Combining Buffer) cache line zeroing requests.", - "UMask": "0x1" + "UMask": "0x01" }, { "EventName": "l2_cache_req_stat.ls_rd_blk_cs", @@ -228,37 +228,37 @@ "EventName": "l2_cache_req_stat.ls_rd_blk_c", "EventCode": "0x64", "BriefDescription": "Core to L2 cacheable request access status (not including L2 Prefetch). Data cache request miss in L2 (all types).", - "UMask": "0x8" + "UMask": "0x08" }, { "EventName": "l2_cache_req_stat.ic_fill_hit_x", "EventCode": "0x64", "BriefDescription": "Core to L2 cacheable request access status (not including L2 Prefetch). Instruction cache hit modifiable line in L2.", - "UMask": "0x4" + "UMask": "0x04" }, { "EventName": "l2_cache_req_stat.ic_fill_hit_s", "EventCode": "0x64", "BriefDescription": "Core to L2 cacheable request access status (not including L2 Prefetch). Instruction cache hit clean line in L2.", - "UMask": "0x2" + "UMask": "0x02" }, { "EventName": "l2_cache_req_stat.ic_fill_miss", "EventCode": "0x64", "BriefDescription": "Core to L2 cacheable request access status (not including L2 Prefetch). Instruction cache request miss in L2.", - "UMask": "0x1" + "UMask": "0x01" }, { "EventName": "l2_cache_req_stat.ic_access_in_l2", "EventCode": "0x64", "BriefDescription": "Core to L2 cacheable request access status (not including L2 Prefetch). Instruction cache requests in L2.", - "UMask": "0x7" + "UMask": "0x07" }, { "EventName": "l2_cache_req_stat.ic_dc_miss_in_l2", "EventCode": "0x64", "BriefDescription": "Core to L2 cacheable request access status (not including L2 Prefetch). Instruction cache request miss in L2 and Data cache request miss in L2 (all types).", - "UMask": "0x9" + "UMask": "0x09" }, { "EventName": "l2_cache_req_stat.ic_dc_hit_in_l2", @@ -270,7 +270,7 @@ "EventName": "l2_fill_pending.l2_fill_busy", "EventCode": "0x6d", "BriefDescription": "Cycles with fill pending from L2. Total cycles spent with one or more fill requests in flight from L2.", - "UMask": "0x1" + "UMask": "0x01" }, { "EventName": "l2_pf_hit_l2", diff --git a/tools/perf/pmu-events/arch/x86/amdzen1/core.json b/tools/perf/pmu-events/arch/x86/amdzen1/core.json index 653b11b23399..4dceeabc4a9f 100644 --- a/tools/perf/pmu-events/arch/x86/amdzen1/core.json +++ b/tools/perf/pmu-events/arch/x86/amdzen1/core.json @@ -68,21 +68,21 @@ "EventCode": "0xcb", "BriefDescription": "SSE instructions (SSE, SSE2, SSE3, SSSE3, SSE4A, SSE41, SSE42, AVX).", "PublicDescription": "The number of MMX, SSE or x87 instructions retired. The UnitMask allows the selection of the individual classes of instructions as given in the table. Each increment represents one complete instruction. Since this event includes non-numeric instructions it is not suitable for measuring MFLOPS. SSE instructions (SSE, SSE2, SSE3, SSSE3, SSE4A, SSE41, SSE42, AVX).", - "UMask": "0x4" + "UMask": "0x04" }, { "EventName": "ex_ret_mmx_fp_instr.mmx_instr", "EventCode": "0xcb", "BriefDescription": "MMX instructions.", "PublicDescription": "The number of MMX, SSE or x87 instructions retired. The UnitMask allows the selection of the individual classes of instructions as given in the table. Each increment represents one complete instruction. Since this event includes non-numeric instructions it is not suitable for measuring MFLOPS. MMX instructions.", - "UMask": "0x2" + "UMask": "0x02" }, { "EventName": "ex_ret_mmx_fp_instr.x87_instr", "EventCode": "0xcb", "BriefDescription": "x87 instructions.", "PublicDescription": "The number of MMX, SSE or x87 instructions retired. The UnitMask allows the selection of the individual classes of instructions as given in the table. Each increment represents one complete instruction. Since this event includes non-numeric instructions it is not suitable for measuring MFLOPS. x87 instructions.", - "UMask": "0x1" + "UMask": "0x01" }, { "EventName": "ex_ret_cond", @@ -103,19 +103,19 @@ "EventName": "ex_tagged_ibs_ops.ibs_count_rollover", "EventCode": "0x1cf", "BriefDescription": "Tagged IBS Ops. Number of times an op could not be tagged by IBS because of a previous tagged op that has not retired.", - "UMask": "0x4" + "UMask": "0x04" }, { "EventName": "ex_tagged_ibs_ops.ibs_tagged_ops_ret", "EventCode": "0x1cf", "BriefDescription": "Tagged IBS Ops. Number of Ops tagged by IBS that retired.", - "UMask": "0x2" + "UMask": "0x02" }, { "EventName": "ex_tagged_ibs_ops.ibs_tagged_ops", "EventCode": "0x1cf", "BriefDescription": "Tagged IBS Ops. Number of Ops tagged by IBS.", - "UMask": "0x1" + "UMask": "0x01" }, { "EventName": "ex_ret_fus_brnch_inst", diff --git a/tools/perf/pmu-events/arch/x86/amdzen1/floating-point.json b/tools/perf/pmu-events/arch/x86/amdzen1/floating-point.json index a35542bd3b36..3995b528ebd6 100644 --- a/tools/perf/pmu-events/arch/x86/amdzen1/floating-point.json +++ b/tools/perf/pmu-events/arch/x86/amdzen1/floating-point.json @@ -39,35 +39,35 @@ "EventCode": "0x00", "BriefDescription": "Total number uOps assigned to all fpu pipes.", "PublicDescription": "The number of operations (uOps) and dual-pipe uOps dispatched to each of the 4 FPU execution pipelines. This event reflects how busy the FPU pipelines are and may be used for workload characterization. This includes all operations performed by x87, MMX, and SSE instructions, including moves. Each increment represents a one- cycle dispatch event. This event is a speculative event. Since this event includes non-numeric operations it is not suitable for measuring MFLOPS. Total number uOps assigned to all pipes.", - "UMask": "0xf" + "UMask": "0x0f" }, { "EventName": "fpu_pipe_assignment.total3", "EventCode": "0x00", "BriefDescription": "Total number of fp uOps on pipe 3.", "PublicDescription": "The number of operations (uOps) dispatched to each of the 4 FPU execution pipelines. This event reflects how busy the FPU pipelines are and may be used for workload characterization. This includes all operations performed by x87, MMX, and SSE instructions, including moves. Each increment represents a one-cycle dispatch event. This event is a speculative event. Since this event includes non-numeric operations it is not suitable for measuring MFLOPS. Total number uOps assigned to pipe 3.", - "UMask": "0x8" + "UMask": "0x08" }, { "EventName": "fpu_pipe_assignment.total2", "EventCode": "0x00", "BriefDescription": "Total number of fp uOps on pipe 2.", "PublicDescription": "The number of operations (uOps) dispatched to each of the 4 FPU execution pipelines. This event reflects how busy the FPU pipelines are and may be used for workload characterization. This includes all operations performed by x87, MMX, and SSE instructions, including moves. Each increment represents a one- cycle dispatch event. This event is a speculative event. Since this event includes non-numeric operations it is not suitable for measuring MFLOPS. Total number uOps assigned to pipe 2.", - "UMask": "0x4" + "UMask": "0x04" }, { "EventName": "fpu_pipe_assignment.total1", "EventCode": "0x00", "BriefDescription": "Total number of fp uOps on pipe 1.", "PublicDescription": "The number of operations (uOps) dispatched to each of the 4 FPU execution pipelines. This event reflects how busy the FPU pipelines are and may be used for workload characterization. This includes all operations performed by x87, MMX, and SSE instructions, including moves. Each increment represents a one- cycle dispatch event. This event is a speculative event. Since this event includes non-numeric operations it is not suitable for measuring MFLOPS. Total number uOps assigned to pipe 1.", - "UMask": "0x2" + "UMask": "0x02" }, { "EventName": "fpu_pipe_assignment.total0", "EventCode": "0x00", "BriefDescription": "Total number of fp uOps on pipe 0.", "PublicDescription": "The number of operations (uOps) dispatched to each of the 4 FPU execution pipelines. This event reflects how busy the FPU pipelines are and may be used for workload characterization. This includes all operations performed by x87, MMX, and SSE instructions, including moves. Each increment represents a one- cycle dispatch event. This event is a speculative event. Since this event includes non-numeric operations it is not suitable for measuring MFLOPS. Total number uOps assigned to pipe 0.", - "UMask": "0x1" + "UMask": "0x01" }, { "EventName": "fp_sched_empty", @@ -79,28 +79,28 @@ "EventCode": "0x02", "BriefDescription": "All Ops.", "PublicDescription": "The number of x87 floating-point Ops that have retired. The number of events logged per cycle can vary from 0 to 8.", - "UMask": "0x7" + "UMask": "0x07" }, { "EventName": "fp_retx87_fp_ops.div_sqr_r_ops", "EventCode": "0x02", "BriefDescription": "Divide and square root Ops.", "PublicDescription": "The number of x87 floating-point Ops that have retired. The number of events logged per cycle can vary from 0 to 8. Divide and square root Ops.", - "UMask": "0x4" + "UMask": "0x04" }, { "EventName": "fp_retx87_fp_ops.mul_ops", "EventCode": "0x02", "BriefDescription": "Multiply Ops.", "PublicDescription": "The number of x87 floating-point Ops that have retired. The number of events logged per cycle can vary from 0 to 8. Multiply Ops.", - "UMask": "0x2" + "UMask": "0x02" }, { "EventName": "fp_retx87_fp_ops.add_sub_ops", "EventCode": "0x02", "BriefDescription": "Add/subtract Ops.", "PublicDescription": "The number of x87 floating-point Ops that have retired. The number of events logged per cycle can vary from 0 to 8. Add/subtract Ops.", - "UMask": "0x1" + "UMask": "0x01" }, { "EventName": "fp_ret_sse_avx_ops.all", @@ -142,83 +142,83 @@ "EventCode": "0x03", "BriefDescription": "Single precision multiply-add FLOPS. Multiply-add counts as 2 FLOPS.", "PublicDescription": "This is a retire-based event. The number of retired SSE/AVX FLOPS. The number of events logged per cycle can vary from 0 to 64. This event can count above 15. Single precision multiply-add FLOPS. Multiply-add counts as 2 FLOPS.", - "UMask": "0x8" + "UMask": "0x08" }, { "EventName": "fp_ret_sse_avx_ops.sp_div_flops", "EventCode": "0x03", "BriefDescription": "Single-precision divide/square root FLOPS.", "PublicDescription": "This is a retire-based event. The number of retired SSE/AVX FLOPS. The number of events logged per cycle can vary from 0 to 64. This event can count above 15. Single-precision divide/square root FLOPS.", - "UMask": "0x4" + "UMask": "0x04" }, { "EventName": "fp_ret_sse_avx_ops.sp_mult_flops", "EventCode": "0x03", "BriefDescription": "Single-precision multiply FLOPS.", "PublicDescription": "This is a retire-based event. The number of retired SSE/AVX FLOPS. The number of events logged per cycle can vary from 0 to 64. This event can count above 15. Single-precision multiply FLOPS.", - "UMask": "0x2" + "UMask": "0x02" }, { "EventName": "fp_ret_sse_avx_ops.sp_add_sub_flops", "EventCode": "0x03", "BriefDescription": "Single-precision add/subtract FLOPS.", "PublicDescription": "This is a retire-based event. The number of retired SSE/AVX FLOPS. The number of events logged per cycle can vary from 0 to 64. This event can count above 15. Single-precision add/subtract FLOPS.", - "UMask": "0x1" + "UMask": "0x01" }, { "EventName": "fp_num_mov_elim_scal_op.optimized", "EventCode": "0x04", "BriefDescription": "Number of Scalar Ops optimized.", "PublicDescription": "This is a dispatch based speculative event, and is useful for measuring the effectiveness of the Move elimination and Scalar code optimization schemes. Number of Scalar Ops optimized.", - "UMask": "0x8" + "UMask": "0x08" }, { "EventName": "fp_num_mov_elim_scal_op.opt_potential", "EventCode": "0x04", "BriefDescription": "Number of Ops that are candidates for optimization (have Z-bit either set or pass).", "PublicDescription": "This is a dispatch based speculative event, and is useful for measuring the effectiveness of the Move elimination and Scalar code optimization schemes. Number of Ops that are candidates for optimization (have Z-bit either set or pass).", - "UMask": "0x4" + "UMask": "0x04" }, { "EventName": "fp_num_mov_elim_scal_op.sse_mov_ops_elim", "EventCode": "0x04", "BriefDescription": "Number of SSE Move Ops eliminated.", "PublicDescription": "This is a dispatch based speculative event, and is useful for measuring the effectiveness of the Move elimination and Scalar code optimization schemes. Number of SSE Move Ops eliminated.", - "UMask": "0x2" + "UMask": "0x02" }, { "EventName": "fp_num_mov_elim_scal_op.sse_mov_ops", "EventCode": "0x04", "BriefDescription": "Number of SSE Move Ops.", "PublicDescription": "This is a dispatch based speculative event, and is useful for measuring the effectiveness of the Move elimination and Scalar code optimization schemes. Number of SSE Move Ops.", - "UMask": "0x1" + "UMask": "0x01" }, { "EventName": "fp_retired_ser_ops.x87_ctrl_ret", "EventCode": "0x05", "BriefDescription": "x87 control word mispredict traps due to mispredictions in RC or PC, or changes in mask bits.", "PublicDescription": "The number of serializing Ops retired. x87 control word mispredict traps due to mispredictions in RC or PC, or changes in mask bits.", - "UMask": "0x8" + "UMask": "0x08" }, { "EventName": "fp_retired_ser_ops.x87_bot_ret", "EventCode": "0x05", "BriefDescription": "x87 bottom-executing uOps retired.", "PublicDescription": "The number of serializing Ops retired. x87 bottom-executing uOps retired.", - "UMask": "0x4" + "UMask": "0x04" }, { "EventName": "fp_retired_ser_ops.sse_ctrl_ret", "EventCode": "0x05", "BriefDescription": "SSE control word mispredict traps due to mispredictions in RC, FTZ or DAZ, or changes in mask bits.", "PublicDescription": "The number of serializing Ops retired. SSE control word mispredict traps due to mispredictions in RC, FTZ or DAZ, or changes in mask bits.", - "UMask": "0x2" + "UMask": "0x02" }, { "EventName": "fp_retired_ser_ops.sse_bot_ret", "EventCode": "0x05", "BriefDescription": "SSE bottom-executing uOps retired.", "PublicDescription": "The number of serializing Ops retired. SSE bottom-executing uOps retired.", - "UMask": "0x1" + "UMask": "0x01" } ] diff --git a/tools/perf/pmu-events/arch/x86/amdzen1/memory.json b/tools/perf/pmu-events/arch/x86/amdzen1/memory.json index b33a3c308019..385022fb026e 100644 --- a/tools/perf/pmu-events/arch/x86/amdzen1/memory.json +++ b/tools/perf/pmu-events/arch/x86/amdzen1/memory.json @@ -3,25 +3,25 @@ "EventName": "ls_locks.bus_lock", "EventCode": "0x25", "BriefDescription": "Bus lock when a locked operations crosses a cache boundary or is done on an uncacheable memory type.", - "UMask": "0x1" + "UMask": "0x01" }, { "EventName": "ls_dispatch.ld_st_dispatch", "EventCode": "0x29", "BriefDescription": "Counts the number of operations dispatched to the LS unit. Unit Masks ADDed. Load-op-Stores.", - "UMask": "0x4" + "UMask": "0x04" }, { "EventName": "ls_dispatch.store_dispatch", "EventCode": "0x29", "BriefDescription": "Counts the number of stores dispatched to the LS unit. Unit Masks ADDed.", - "UMask": "0x2" + "UMask": "0x02" }, { "EventName": "ls_dispatch.ld_dispatch", "EventCode": "0x29", "BriefDescription": "Counts the number of loads dispatched to the LS unit. Unit Masks ADDed.", - "UMask": "0x1" + "UMask": "0x01" }, { "EventName": "ls_stlf", @@ -37,13 +37,13 @@ "EventName": "ls_mab_alloc.dc_prefetcher", "EventCode": "0x41", "BriefDescription": "LS MAB allocates by type - DC prefetcher.", - "UMask": "0x8" + "UMask": "0x08" }, { "EventName": "ls_mab_alloc.stores", "EventCode": "0x41", "BriefDescription": "LS MAB allocates by type - stores.", - "UMask": "0x2" + "UMask": "0x02" }, { "EventName": "ls_mab_alloc.loads", @@ -85,61 +85,61 @@ "EventName": "ls_l1_d_tlb_miss.tlb_reload_1g_l2_hit", "EventCode": "0x45", "BriefDescription": "L1 DTLB Reload of a page of 1G size.", - "UMask": "0x8" + "UMask": "0x08" }, { "EventName": "ls_l1_d_tlb_miss.tlb_reload_2m_l2_hit", "EventCode": "0x45", "BriefDescription": "L1 DTLB Reload of a page of 2M size.", - "UMask": "0x4" + "UMask": "0x04" }, { "EventName": "ls_l1_d_tlb_miss.tlb_reload_32k_l2_hit", "EventCode": "0x45", "BriefDescription": "L1 DTLB Reload of a page of 32K size.", - "UMask": "0x2" + "UMask": "0x02" }, { "EventName": "ls_l1_d_tlb_miss.tlb_reload_4k_l2_hit", "EventCode": "0x45", "BriefDescription": "L1 DTLB Reload of a page of 4K size.", - "UMask": "0x1" + "UMask": "0x01" }, { "EventName": "ls_tablewalker.iside", "EventCode": "0x46", "BriefDescription": "Total Page Table Walks on I-side.", - "UMask": "0xc" + "UMask": "0x0c" }, { "EventName": "ls_tablewalker.ic_type1", "EventCode": "0x46", "BriefDescription": "Total Page Table Walks IC Type 1.", - "UMask": "0x8" + "UMask": "0x08" }, { "EventName": "ls_tablewalker.ic_type0", "EventCode": "0x46", "BriefDescription": "Total Page Table Walks IC Type 0.", - "UMask": "0x4" + "UMask": "0x04" }, { "EventName": "ls_tablewalker.dside", "EventCode": "0x46", "BriefDescription": "Total Page Table Walks on D-side.", - "UMask": "0x3" + "UMask": "0x03" }, { "EventName": "ls_tablewalker.dc_type1", "EventCode": "0x46", "BriefDescription": "Total Page Table Walks DC Type 1.", - "UMask": "0x2" + "UMask": "0x02" }, { "EventName": "ls_tablewalker.dc_type0", "EventCode": "0x46", "BriefDescription": "Total Page Table Walks DC Type 0.", - "UMask": "0x1" + "UMask": "0x01" }, { "EventName": "ls_misal_accesses", @@ -150,31 +150,31 @@ "EventName": "ls_pref_instr_disp.prefetch_nta", "EventCode": "0x4b", "BriefDescription": "Software Prefetch Instructions (PREFETCHNTA instruction) Dispatched.", - "UMask": "0x4" + "UMask": "0x04" }, { "EventName": "ls_pref_instr_disp.store_prefetch_w", "EventCode": "0x4b", "BriefDescription": "Software Prefetch Instructions (3DNow PREFETCHW instruction) Dispatched.", - "UMask": "0x2" + "UMask": "0x02" }, { "EventName": "ls_pref_instr_disp.load_prefetch_w", "EventCode": "0x4b", "BriefDescription": "Software Prefetch Instructions Dispatched. Prefetch, Prefetch_T0_T1_T2.", - "UMask": "0x1" + "UMask": "0x01" }, { "EventName": "ls_inef_sw_pref.mab_mch_cnt", "EventCode": "0x52", "BriefDescription": "The number of software prefetches that did not fetch data outside of the processor core. Software PREFETCH instruction saw a match on an already-allocated miss request buffer.", - "UMask": "0x2" + "UMask": "0x02" }, { "EventName": "ls_inef_sw_pref.data_pipe_sw_pf_dc_hit", "EventCode": "0x52", "BriefDescription": "The number of software prefetches that did not fetch data outside of the processor core. Software PREFETCH instruction saw a DC hit.", - "UMask": "0x1" + "UMask": "0x01" }, { "EventName": "ls_not_halted_cyc", diff --git a/tools/perf/pmu-events/arch/x86/amdzen1/other.json b/tools/perf/pmu-events/arch/x86/amdzen1/other.json index ff780098d36e..7626986ce1fb 100644 --- a/tools/perf/pmu-events/arch/x86/amdzen1/other.json +++ b/tools/perf/pmu-events/arch/x86/amdzen1/other.json @@ -3,13 +3,13 @@ "EventName": "ic_oc_mode_switch.oc_ic_mode_switch", "EventCode": "0x28a", "BriefDescription": "OC Mode Switch. OC to IC mode switch.", - "UMask": "0x2" + "UMask": "0x02" }, { "EventName": "ic_oc_mode_switch.ic_oc_mode_switch", "EventCode": "0x28a", "BriefDescription": "OC Mode Switch. IC to OC mode switch.", - "UMask": "0x1" + "UMask": "0x01" }, { "EventName": "de_dis_dispatch_token_stalls0.retire_token_stall", @@ -33,24 +33,24 @@ "EventName": "de_dis_dispatch_token_stalls0.alsq3_0_token_stall", "EventCode": "0xaf", "BriefDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall. ALSQ 3_0 Tokens unavailable.", - "UMask": "0x8" + "UMask": "0x08" }, { "EventName": "de_dis_dispatch_token_stalls0.alsq3_token_stall", "EventCode": "0xaf", "BriefDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall. ALSQ 3 Tokens unavailable.", - "UMask": "0x4" + "UMask": "0x04" }, { "EventName": "de_dis_dispatch_token_stalls0.alsq2_token_stall", "EventCode": "0xaf", "BriefDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall. ALSQ 2 Tokens unavailable.", - "UMask": "0x2" + "UMask": "0x02" }, { "EventName": "de_dis_dispatch_token_stalls0.alsq1_token_stall", "EventCode": "0xaf", "BriefDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall. ALSQ 1 Tokens unavailable.", - "UMask": "0x1" + "UMask": "0x01" } ] diff --git a/tools/perf/pmu-events/arch/x86/amdzen1/recommended.json b/tools/perf/pmu-events/arch/x86/amdzen1/recommended.json index 3c954543d1ae..bf5083c1c260 100644 --- a/tools/perf/pmu-events/arch/x86/amdzen1/recommended.json +++ b/tools/perf/pmu-events/arch/x86/amdzen1/recommended.json @@ -10,7 +10,7 @@ "EventName": "all_dc_accesses", "EventCode": "0x29", "BriefDescription": "All L1 Data Cache Accesses", - "UMask": "0x7" + "UMask": "0x07" }, { "MetricName": "all_l2_cache_accesses", diff --git a/tools/perf/pmu-events/arch/x86/amdzen2/branch.json b/tools/perf/pmu-events/arch/x86/amdzen2/branch.json index f5d16846aa1d..84fb43fa59ad 100644 --- a/tools/perf/pmu-events/arch/x86/amdzen2/branch.json +++ b/tools/perf/pmu-events/arch/x86/amdzen2/branch.json @@ -30,19 +30,19 @@ "EventName": "bp_l1_tlb_fetch_hit.if1g", "EventCode": "0x94", "BriefDescription": "The number of instruction fetches that hit in the L1 ITLB. Instruction fetches to a 1GB page.", - "UMask": "0x4" + "UMask": "0x04" }, { "EventName": "bp_l1_tlb_fetch_hit.if2m", "EventCode": "0x94", "BriefDescription": "The number of instruction fetches that hit in the L1 ITLB. Instruction fetches to a 2MB page.", - "UMask": "0x2" + "UMask": "0x02" }, { "EventName": "bp_l1_tlb_fetch_hit.if4k", "EventCode": "0x94", "BriefDescription": "The number of instruction fetches that hit in the L1 ITLB. Instruction fetches to a 4KB page.", - "UMask": "0x1" + "UMask": "0x01" }, { "EventName": "bp_tlb_rel", diff --git a/tools/perf/pmu-events/arch/x86/amdzen2/cache.json b/tools/perf/pmu-events/arch/x86/amdzen2/cache.json index 899ccc81263c..c858fb9477e3 100644 --- a/tools/perf/pmu-events/arch/x86/amdzen2/cache.json +++ b/tools/perf/pmu-events/arch/x86/amdzen2/cache.json @@ -27,25 +27,25 @@ "EventName": "l2_request_g1.change_to_x", "EventCode": "0x60", "BriefDescription": "All L2 Cache Requests (Breakdown 1 - Common). Data cache state change requests. Request change to writable, check L2 for current state.", - "UMask": "0x8" + "UMask": "0x08" }, { "EventName": "l2_request_g1.prefetch_l2_cmd", "EventCode": "0x60", "BriefDescription": "All L2 Cache Requests (Breakdown 1 - Common). PrefetchL2Cmd.", - "UMask": "0x4" + "UMask": "0x04" }, { "EventName": "l2_request_g1.l2_hw_pf", "EventCode": "0x60", "BriefDescription": "All L2 Cache Requests (Breakdown 1 - Common). L2 Prefetcher. All prefetches accepted by L2 pipeline, hit or miss. Types of PF and L2 hit/miss broken out in a separate perfmon event.", - "UMask": "0x2" + "UMask": "0x02" }, { "EventName": "l2_request_g1.group2", "EventCode": "0x60", "BriefDescription": "Miscellaneous events covered in more detail by l2_request_g2 (PMCx061).", - "UMask": "0x1" + "UMask": "0x01" }, { "EventName": "l2_request_g1.all_no_prefetch", @@ -80,31 +80,31 @@ "EventName": "l2_request_g2.ic_rd_sized_nc", "EventCode": "0x61", "BriefDescription": "All L2 Cache Requests (Breakdown 2 - Rare). Instruction cache read sized non-cacheable.", - "UMask": "0x8" + "UMask": "0x08" }, { "EventName": "l2_request_g2.smc_inval", "EventCode": "0x61", "BriefDescription": "All L2 Cache Requests (Breakdown 2 - Rare). Self-modifying code invalidates.", - "UMask": "0x4" + "UMask": "0x04" }, { "EventName": "l2_request_g2.bus_locks_originator", "EventCode": "0x61", "BriefDescription": "All L2 Cache Requests (Breakdown 2 - Rare). Bus locks.", - "UMask": "0x2" + "UMask": "0x02" }, { "EventName": "l2_request_g2.bus_locks_responses", "EventCode": "0x61", "BriefDescription": "All L2 Cache Requests (Breakdown 2 - Rare). Bus lock response.", - "UMask": "0x1" + "UMask": "0x01" }, { "EventName": "l2_latency.l2_cycles_waiting_on_fills", "EventCode": "0x62", "BriefDescription": "Total cycles spent waiting for L2 fills to complete from L3 or memory, divided by four. Event counts are for both threads. To calculate average latency, the number of fills from both threads must be used.", - "UMask": "0x1" + "UMask": "0x01" }, { "EventName": "l2_wcb_req.wcb_write", @@ -122,13 +122,13 @@ "EventName": "l2_wcb_req.zero_byte_store", "EventCode": "0x63", "BriefDescription": "LS to L2 WCB zero byte store requests. LS (Load/Store unit) to L2 WCB (Write Combining Buffer) zero byte store requests.", - "UMask": "0x4" + "UMask": "0x04" }, { "EventName": "l2_wcb_req.cl_zero", "EventCode": "0x63", "BriefDescription": "LS to L2 WCB cache line zeroing requests. LS (Load/Store unit) to L2 WCB (Write Combining Buffer) cache line zeroing requests.", - "UMask": "0x1" + "UMask": "0x01" }, { "EventName": "l2_cache_req_stat.ls_rd_blk_cs", @@ -158,37 +158,37 @@ "EventName": "l2_cache_req_stat.ls_rd_blk_c", "EventCode": "0x64", "BriefDescription": "Core to L2 cacheable request access status (not including L2 Prefetch). Data cache request miss in L2 (all types).", - "UMask": "0x8" + "UMask": "0x08" }, { "EventName": "l2_cache_req_stat.ic_fill_hit_x", "EventCode": "0x64", "BriefDescription": "Core to L2 cacheable request access status (not including L2 Prefetch). Instruction cache hit modifiable line in L2.", - "UMask": "0x4" + "UMask": "0x04" }, { "EventName": "l2_cache_req_stat.ic_fill_hit_s", "EventCode": "0x64", "BriefDescription": "Core to L2 cacheable request access status (not including L2 Prefetch). Instruction cache hit clean line in L2.", - "UMask": "0x2" + "UMask": "0x02" }, { "EventName": "l2_cache_req_stat.ic_fill_miss", "EventCode": "0x64", "BriefDescription": "Core to L2 cacheable request access status (not including L2 Prefetch). Instruction cache request miss in L2.", - "UMask": "0x1" + "UMask": "0x01" }, { "EventName": "l2_cache_req_stat.ic_access_in_l2", "EventCode": "0x64", "BriefDescription": "Core to L2 cacheable request access status (not including L2 Prefetch). Instruction cache requests in L2.", - "UMask": "0x7" + "UMask": "0x07" }, { "EventName": "l2_cache_req_stat.ic_dc_miss_in_l2", "EventCode": "0x64", "BriefDescription": "Core to L2 cacheable request access status (not including L2 Prefetch). Instruction cache request miss in L2 and Data cache request miss in L2 (all types).", - "UMask": "0x9" + "UMask": "0x09" }, { "EventName": "l2_cache_req_stat.ic_dc_hit_in_l2", @@ -200,7 +200,7 @@ "EventName": "l2_fill_pending.l2_fill_busy", "EventCode": "0x6d", "BriefDescription": "Cycles with fill pending from L2. Total cycles spent with one or more fill requests in flight from L2.", - "UMask": "0x1" + "UMask": "0x01" }, { "EventName": "l2_pf_hit_l2", @@ -255,19 +255,19 @@ "EventName": "bp_l1_tlb_miss_l2_tlb_miss.if1g", "EventCode": "0x85", "BriefDescription": "The number of instruction fetches that miss in both the L1 and L2 TLBs. Instruction fetches to a 1GB page.", - "UMask": "0x4" + "UMask": "0x04" }, { "EventName": "bp_l1_tlb_miss_l2_tlb_miss.if2m", "EventCode": "0x85", "BriefDescription": "The number of instruction fetches that miss in both the L1 and L2 TLBs. Instruction fetches to a 2MB page.", - "UMask": "0x2" + "UMask": "0x02" }, { "EventName": "bp_l1_tlb_miss_l2_tlb_miss.if4k", "EventCode": "0x85", "BriefDescription": "The number of instruction fetches that miss in both the L1 and L2 TLBs. Instruction fetches to a 4KB page.", - "UMask": "0x1" + "UMask": "0x01" }, { "EventName": "bp_snp_re_sync", @@ -278,43 +278,43 @@ "EventName": "ic_fetch_stall.ic_stall_any", "EventCode": "0x87", "BriefDescription": "Instruction Pipe Stall. IC pipe was stalled during this clock cycle for any reason (nothing valid in pipe ICM1).", - "UMask": "0x4" + "UMask": "0x04" }, { "EventName": "ic_fetch_stall.ic_stall_dq_empty", "EventCode": "0x87", "BriefDescription": "Instruction Pipe Stall. IC pipe was stalled during this clock cycle (including IC to OC fetches) due to DQ empty.", - "UMask": "0x2" + "UMask": "0x02" }, { "EventName": "ic_fetch_stall.ic_stall_back_pressure", "EventCode": "0x87", "BriefDescription": "Instruction Pipe Stall. IC pipe was stalled during this clock cycle (including IC to OC fetches) due to back-pressure.", - "UMask": "0x1" + "UMask": "0x01" }, { "EventName": "ic_cache_inval.l2_invalidating_probe", "EventCode": "0x8c", "BriefDescription": "IC line invalidated due to L2 invalidating probe (external or LS). The number of instruction cache lines invalidated. A non-SMC event is CMC (cross modifying code), either from the other thread of the core or another core.", - "UMask": "0x2" + "UMask": "0x02" }, { "EventName": "ic_cache_inval.fill_invalidated", "EventCode": "0x8c", "BriefDescription": "IC line invalidated due to overwriting fill response. The number of instruction cache lines invalidated. A non-SMC event is CMC (cross modifying code), either from the other thread of the core or another core.", - "UMask": "0x1" + "UMask": "0x01" }, { "EventName": "ic_oc_mode_switch.oc_ic_mode_switch", "EventCode": "0x28a", "BriefDescription": "OC Mode Switch. OC to IC mode switch.", - "UMask": "0x2" + "UMask": "0x02" }, { "EventName": "ic_oc_mode_switch.ic_oc_mode_switch", "EventCode": "0x28a", "BriefDescription": "OC Mode Switch. IC to OC mode switch.", - "UMask": "0x1" + "UMask": "0x01" }, { "EventName": "l3_request_g1.caching_l3_cache_accesses", diff --git a/tools/perf/pmu-events/arch/x86/amdzen2/core.json b/tools/perf/pmu-events/arch/x86/amdzen2/core.json index 4b75183da94a..bed14829f0bc 100644 --- a/tools/perf/pmu-events/arch/x86/amdzen2/core.json +++ b/tools/perf/pmu-events/arch/x86/amdzen2/core.json @@ -68,21 +68,21 @@ "EventCode": "0xcb", "BriefDescription": "SSE instructions (SSE, SSE2, SSE3, SSSE3, SSE4A, SSE41, SSE42, AVX).", "PublicDescription": "The number of MMX, SSE or x87 instructions retired. The UnitMask allows the selection of the individual classes of instructions as given in the table. Each increment represents one complete instruction. Since this event includes non-numeric instructions it is not suitable for measuring MFLOPS. SSE instructions (SSE, SSE2, SSE3, SSSE3, SSE4A, SSE41, SSE42, AVX).", - "UMask": "0x4" + "UMask": "0x04" }, { "EventName": "ex_ret_mmx_fp_instr.mmx_instr", "EventCode": "0xcb", "BriefDescription": "MMX instructions.", "PublicDescription": "The number of MMX, SSE or x87 instructions retired. The UnitMask allows the selection of the individual classes of instructions as given in the table. Each increment represents one complete instruction. Since this event includes non-numeric instructions it is not suitable for measuring MFLOPS. MMX instructions.", - "UMask": "0x2" + "UMask": "0x02" }, { "EventName": "ex_ret_mmx_fp_instr.x87_instr", "EventCode": "0xcb", "BriefDescription": "x87 instructions.", "PublicDescription": "The number of MMX, SSE or x87 instructions retired. The UnitMask allows the selection of the individual classes of instructions as given in the table. Each increment represents one complete instruction. Since this event includes non-numeric instructions it is not suitable for measuring MFLOPS. x87 instructions.", - "UMask": "0x1" + "UMask": "0x01" }, { "EventName": "ex_ret_cond", @@ -108,19 +108,19 @@ "EventName": "ex_tagged_ibs_ops.ibs_count_rollover", "EventCode": "0x1cf", "BriefDescription": "Tagged IBS Ops. Number of times an op could not be tagged by IBS because of a previous tagged op that has not retired.", - "UMask": "0x4" + "UMask": "0x04" }, { "EventName": "ex_tagged_ibs_ops.ibs_tagged_ops_ret", "EventCode": "0x1cf", "BriefDescription": "Tagged IBS Ops. Number of Ops tagged by IBS that retired.", - "UMask": "0x2" + "UMask": "0x02" }, { "EventName": "ex_tagged_ibs_ops.ibs_tagged_ops", "EventCode": "0x1cf", "BriefDescription": "Tagged IBS Ops. Number of Ops tagged by IBS.", - "UMask": "0x1" + "UMask": "0x01" }, { "EventName": "ex_ret_fus_brnch_inst", diff --git a/tools/perf/pmu-events/arch/x86/amdzen2/floating-point.json b/tools/perf/pmu-events/arch/x86/amdzen2/floating-point.json index 622a0c420e46..91ed96f2580b 100644 --- a/tools/perf/pmu-events/arch/x86/amdzen2/floating-point.json +++ b/tools/perf/pmu-events/arch/x86/amdzen2/floating-point.json @@ -4,35 +4,35 @@ "EventCode": "0x00", "BriefDescription": "Total number of fp uOps.", "PublicDescription": "Total number of fp uOps. The number of operations (uOps) dispatched to each of the 4 FPU execution pipelines. This event reflects how busy the FPU pipelines are and may be used for workload characterization. This includes all operations performed by x87, MMX, and SSE instructions, including moves. Each increment represents a one- cycle dispatch event. This event is a speculative event. Since this event includes non-numeric operations it is not suitable for measuring MFLOPS.", - "UMask": "0xf" + "UMask": "0x0f" }, { "EventName": "fpu_pipe_assignment.total3", "EventCode": "0x00", "BriefDescription": "Total number uOps assigned to pipe 3.", "PublicDescription": "The number of operations (uOps) dispatched to each of the 4 FPU execution pipelines. This event reflects how busy the FPU pipelines are and may be used for workload characterization. This includes all operations performed by x87, MMX, and SSE instructions, including moves. Each increment represents a one-cycle dispatch event. This event is a speculative event. Since this event includes non-numeric operations it is not suitable for measuring MFLOPS. Total number uOps assigned to pipe 3.", - "UMask": "0x8" + "UMask": "0x08" }, { "EventName": "fpu_pipe_assignment.total2", "EventCode": "0x00", "BriefDescription": "Total number uOps assigned to pipe 2.", "PublicDescription": "The number of operations (uOps) dispatched to each of the 4 FPU execution pipelines. This event reflects how busy the FPU pipelines are and may be used for workload characterization. This includes all operations performed by x87, MMX, and SSE instructions, including moves. Each increment represents a one- cycle dispatch event. This event is a speculative event. Since this event includes non-numeric operations it is not suitable for measuring MFLOPS. Total number uOps assigned to pipe 2.", - "UMask": "0x4" + "UMask": "0x04" }, { "EventName": "fpu_pipe_assignment.total1", "EventCode": "0x00", "BriefDescription": "Total number uOps assigned to pipe 1.", "PublicDescription": "The number of operations (uOps) dispatched to each of the 4 FPU execution pipelines. This event reflects how busy the FPU pipelines are and may be used for workload characterization. This includes all operations performed by x87, MMX, and SSE instructions, including moves. Each increment represents a one- cycle dispatch event. This event is a speculative event. Since this event includes non-numeric operations it is not suitable for measuring MFLOPS. Total number uOps assigned to pipe 1.", - "UMask": "0x2" + "UMask": "0x02" }, { "EventName": "fpu_pipe_assignment.total0", "EventCode": "0x00", "BriefDescription": "Total number of fp uOps on pipe 0.", "PublicDescription": "The number of operations (uOps) dispatched to each of the 4 FPU execution pipelines. This event reflects how busy the FPU pipelines are and may be used for workload characterization. This includes all operations performed by x87, MMX, and SSE instructions, including moves. Each increment represents a one- cycle dispatch event. This event is a speculative event. Since this event includes non-numeric operations it is not suitable for measuring MFLOPS. Total number uOps assigned to pipe 0.", - "UMask": "0x1" + "UMask": "0x01" }, { "EventName": "fp_ret_sse_avx_ops.all", @@ -45,96 +45,96 @@ "EventCode": "0x03", "BriefDescription": "Multiply-add FLOPS. Multiply-add counts as 2 FLOPS. This is a retire-based event. The number of retired SSE/AVX FLOPS. The number of events logged per cycle can vary from 0 to 64. This event can count above 15.", "PublicDescription": "", - "UMask": "0x8" + "UMask": "0x08" }, { "EventName": "fp_ret_sse_avx_ops.div_flops", "EventCode": "0x03", "BriefDescription": "Divide/square root FLOPS. This is a retire-based event. The number of retired SSE/AVX FLOPS. The number of events logged per cycle can vary from 0 to 64. This event can count above 15.", - "UMask": "0x4" + "UMask": "0x04" }, { "EventName": "fp_ret_sse_avx_ops.mult_flops", "EventCode": "0x03", "BriefDescription": "Multiply FLOPS. This is a retire-based event. The number of retired SSE/AVX FLOPS. The number of events logged per cycle can vary from 0 to 64. This event can count above 15.", - "UMask": "0x2" + "UMask": "0x02" }, { "EventName": "fp_ret_sse_avx_ops.add_sub_flops", "EventCode": "0x03", "BriefDescription": "Add/subtract FLOPS. This is a retire-based event. The number of retired SSE/AVX FLOPS. The number of events logged per cycle can vary from 0 to 64. This event can count above 15.", - "UMask": "0x1" + "UMask": "0x01" }, { "EventName": "fp_num_mov_elim_scal_op.optimized", "EventCode": "0x04", "BriefDescription": "Number of Scalar Ops optimized. This is a dispatch based speculative event, and is useful for measuring the effectiveness of the Move elimination and Scalar code optimization schemes.", - "UMask": "0x8" + "UMask": "0x08" }, { "EventName": "fp_num_mov_elim_scal_op.opt_potential", "EventCode": "0x04", "BriefDescription": "Number of Ops that are candidates for optimization (have Z-bit either set or pass). This is a dispatch based speculative event, and is useful for measuring the effectiveness of the Move elimination and Scalar code optimization schemes.", - "UMask": "0x4" + "UMask": "0x04" }, { "EventName": "fp_num_mov_elim_scal_op.sse_mov_ops_elim", "EventCode": "0x04", "BriefDescription": "Number of SSE Move Ops eliminated. This is a dispatch based speculative event, and is useful for measuring the effectiveness of the Move elimination and Scalar code optimization schemes.", - "UMask": "0x2" + "UMask": "0x02" }, { "EventName": "fp_num_mov_elim_scal_op.sse_mov_ops", "EventCode": "0x04", "BriefDescription": "Number of SSE Move Ops. This is a dispatch based speculative event, and is useful for measuring the effectiveness of the Move elimination and Scalar code optimization schemes.", - "UMask": "0x1" + "UMask": "0x01" }, { "EventName": "fp_retired_ser_ops.sse_bot_ret", "EventCode": "0x05", "BriefDescription": "SSE bottom-executing uOps retired. The number of serializing Ops retired.", - "UMask": "0x8" + "UMask": "0x08" }, { "EventName": "fp_retired_ser_ops.sse_ctrl_ret", "EventCode": "0x05", "BriefDescription": "The number of serializing Ops retired. SSE control word mispredict traps due to mispredictions in RC, FTZ or DAZ, or changes in mask bits.", - "UMask": "0x4" + "UMask": "0x04" }, { "EventName": "fp_retired_ser_ops.x87_bot_ret", "EventCode": "0x05", "BriefDescription": "x87 bottom-executing uOps retired. The number of serializing Ops retired.", - "UMask": "0x2" + "UMask": "0x02" }, { "EventName": "fp_retired_ser_ops.x87_ctrl_ret", "EventCode": "0x05", "BriefDescription": "x87 control word mispredict traps due to mispredictions in RC or PC, or changes in mask bits. The number of serializing Ops retired.", - "UMask": "0x1" + "UMask": "0x01" }, { "EventName": "fp_disp_faults.ymm_spill_fault", "EventCode": "0x0e", "BriefDescription": "Floating Point Dispatch Faults. YMM spill fault.", - "UMask": "0x8" + "UMask": "0x08" }, { "EventName": "fp_disp_faults.ymm_fill_fault", "EventCode": "0x0e", "BriefDescription": "Floating Point Dispatch Faults. YMM fill fault.", - "UMask": "0x4" + "UMask": "0x04" }, { "EventName": "fp_disp_faults.xmm_fill_fault", "EventCode": "0x0e", "BriefDescription": "Floating Point Dispatch Faults. XMM fill fault.", - "UMask": "0x2" + "UMask": "0x02" }, { "EventName": "fp_disp_faults.x87_fill_fault", "EventCode": "0x0e", "BriefDescription": "Floating Point Dispatch Faults. x87 fill fault.", - "UMask": "0x1" + "UMask": "0x01" } ] diff --git a/tools/perf/pmu-events/arch/x86/amdzen2/memory.json b/tools/perf/pmu-events/arch/x86/amdzen2/memory.json index 609d9e3da3f7..89822b9ddb79 100644 --- a/tools/perf/pmu-events/arch/x86/amdzen2/memory.json +++ b/tools/perf/pmu-events/arch/x86/amdzen2/memory.json @@ -4,31 +4,31 @@ "EventCode": "0x24", "BriefDescription": "Non-forwardable conflict; used to reduce STLI's via software. All reasons. Store To Load Interlock (STLI) are loads that were unable to complete because of a possible match with an older store, and the older store could not do STLF for some reason.", "PublicDescription" : "Store-to-load conflicts: A load was unable to complete due to a non-forwardable conflict with an older store. Most commonly, a load's address range partially but not completely overlaps with an uncompleted older store. Software can avoid this problem by using same-size and same-alignment loads and stores when accessing the same data. Vector/SIMD code is particularly susceptible to this problem; software should construct wide vector stores by manipulating vector elements in registers using shuffle/blend/swap instructions prior to storing to memory, instead of using narrow element-by-element stores.", - "UMask": "0x2" + "UMask": "0x02" }, { "EventName": "ls_locks.spec_lock_hi_spec", "EventCode": "0x25", "BriefDescription": "Retired lock instructions. High speculative cacheable lock speculation succeeded.", - "UMask": "0x8" + "UMask": "0x08" }, { "EventName": "ls_locks.spec_lock_lo_spec", "EventCode": "0x25", "BriefDescription": "Retired lock instructions. Low speculative cacheable lock speculation succeeded.", - "UMask": "0x4" + "UMask": "0x04" }, { "EventName": "ls_locks.non_spec_lock", "EventCode": "0x25", "BriefDescription": "Retired lock instructions. Non-speculative lock succeeded.", - "UMask": "0x2" + "UMask": "0x02" }, { "EventName": "ls_locks.bus_lock", "EventCode": "0x25", "BriefDescription": "Retired lock instructions. Bus lock when a locked operations crosses a cache boundary or is done on an uncacheable memory type. Comparable to legacy bus lock.", - "UMask": "0x1" + "UMask": "0x01" }, { "EventName": "ls_ret_cl_flush", @@ -44,19 +44,19 @@ "EventName": "ls_dispatch.ld_st_dispatch", "EventCode": "0x29", "BriefDescription": "Dispatch of a single op that performs a load from and store to the same memory address. Number of single ops that do load/store to an address.", - "UMask": "0x4" + "UMask": "0x04" }, { "EventName": "ls_dispatch.store_dispatch", "EventCode": "0x29", "BriefDescription": "Number of stores dispatched. Counts the number of operations dispatched to the LS unit. Unit Masks ADDed.", - "UMask": "0x2" + "UMask": "0x02" }, { "EventName": "ls_dispatch.ld_dispatch", "EventCode": "0x29", "BriefDescription": "Number of loads dispatched. Counts the number of operations dispatched to the LS unit. Unit Masks ADDed.", - "UMask": "0x1" + "UMask": "0x01" }, { "EventName": "ls_smi_rx", @@ -93,19 +93,19 @@ "EventName": "ls_mab_alloc.dc_prefetcher", "EventCode": "0x41", "BriefDescription": "LS MAB Allocates by Type. DC prefetcher.", - "UMask": "0x8" + "UMask": "0x08" }, { "EventName": "ls_mab_alloc.stores", "EventCode": "0x41", "BriefDescription": "LS MAB Allocates by Type. Stores.", - "UMask": "0x2" + "UMask": "0x02" }, { "EventName": "ls_mab_alloc.loads", "EventCode": "0x41", "BriefDescription": "LS MAB Allocates by Type. Loads.", - "UMask": "0x1" + "UMask": "0x01" }, { "EventName": "ls_refills_from_sys.ls_mabresp_rmt_dram", @@ -123,19 +123,19 @@ "EventName": "ls_refills_from_sys.ls_mabresp_lcl_dram", "EventCode": "0x43", "BriefDescription": "Demand Data Cache Fills by Data Source. DRAM or IO from this thread's die.", - "UMask": "0x8" + "UMask": "0x08" }, { "EventName": "ls_refills_from_sys.ls_mabresp_lcl_cache", "EventCode": "0x43", "BriefDescription": "Demand Data Cache Fills by Data Source. Hit in cache; local CCX (not Local L2), or Remote CCX and the address's Home Node is on this thread's die.", - "UMask": "0x2" + "UMask": "0x02" }, { "EventName": "ls_refills_from_sys.ls_mabresp_lcl_l2", "EventCode": "0x43", "BriefDescription": "Demand Data Cache Fills by Data Source. Local L2 hit.", - "UMask": "0x1" + "UMask": "0x01" }, { "EventName": "ls_l1_d_tlb_miss.all", @@ -171,61 +171,61 @@ "EventName": "ls_l1_d_tlb_miss.tlb_reload_1g_l2_hit", "EventCode": "0x45", "BriefDescription": "L1 DTLB Miss. DTLB reload to a 1G page that hit in the L2 TLB.", - "UMask": "0x8" + "UMask": "0x08" }, { "EventName": "ls_l1_d_tlb_miss.tlb_reload_2m_l2_hit", "EventCode": "0x45", "BriefDescription": "L1 DTLB Miss. DTLB reload to a 2M page that hit in the L2 TLB.", - "UMask": "0x4" + "UMask": "0x04" }, { "EventName": "ls_l1_d_tlb_miss.tlb_reload_coalesced_page_hit", "EventCode": "0x45", "BriefDescription": "L1 DTLB Miss. DTLB reload hit a coalesced page.", - "UMask": "0x2" + "UMask": "0x02" }, { "EventName": "ls_l1_d_tlb_miss.tlb_reload_4k_l2_hit", "EventCode": "0x45", "BriefDescription": "L1 DTLB Miss. DTLB reload to a 4K page that hit in the L2 TLB.", - "UMask": "0x1" + "UMask": "0x01" }, { "EventName": "ls_tablewalker.iside", "EventCode": "0x46", "BriefDescription": "Total Page Table Walks on I-side.", - "UMask": "0xc" + "UMask": "0x0c" }, { "EventName": "ls_tablewalker.ic_type1", "EventCode": "0x46", "BriefDescription": "Total Page Table Walks IC Type 1.", - "UMask": "0x8" + "UMask": "0x08" }, { "EventName": "ls_tablewalker.ic_type0", "EventCode": "0x46", "BriefDescription": "Total Page Table Walks IC Type 0.", - "UMask": "0x4" + "UMask": "0x04" }, { "EventName": "ls_tablewalker.dside", "EventCode": "0x46", "BriefDescription": "Total Page Table Walks on D-side.", - "UMask": "0x3" + "UMask": "0x03" }, { "EventName": "ls_tablewalker.dc_type1", "EventCode": "0x46", "BriefDescription": "Total Page Table Walks DC Type 1.", - "UMask": "0x2" + "UMask": "0x02" }, { "EventName": "ls_tablewalker.dc_type0", "EventCode": "0x46", "BriefDescription": "Total Page Table Walks DC Type 0.", - "UMask": "0x1" + "UMask": "0x01" }, { "EventName": "ls_misal_accesses", @@ -242,31 +242,31 @@ "EventName": "ls_pref_instr_disp.prefetch_nta", "EventCode": "0x4b", "BriefDescription": "Software Prefetch Instructions Dispatched (Speculative). PrefetchNTA instruction. See docAPM3 PREFETCHlevel.", - "UMask": "0x4" + "UMask": "0x04" }, { "EventName": "ls_pref_instr_disp.prefetch_w", "EventCode": "0x4b", "BriefDescription": "Software Prefetch Instructions Dispatched (Speculative). See docAPM3 PREFETCHW.", - "UMask": "0x2" + "UMask": "0x02" }, { "EventName": "ls_pref_instr_disp.prefetch", "EventCode": "0x4b", "BriefDescription": "Software Prefetch Instructions Dispatched (Speculative). Prefetch_T0_T1_T2. PrefetchT0, T1 and T2 instructions. See docAPM3 PREFETCHlevel.", - "UMask": "0x1" + "UMask": "0x01" }, { "EventName": "ls_inef_sw_pref.mab_mch_cnt", "EventCode": "0x52", "BriefDescription": "The number of software prefetches that did not fetch data outside of the processor core. Software PREFETCH instruction saw a match on an already-allocated miss request buffer.", - "UMask": "0x2" + "UMask": "0x02" }, { "EventName": "ls_inef_sw_pref.data_pipe_sw_pf_dc_hit", "EventCode": "0x52", "BriefDescription": "The number of software prefetches that did not fetch data outside of the processor core. Software PREFETCH instruction saw a DC hit.", - "UMask": "0x1" + "UMask": "0x01" }, { "EventName": "ls_sw_pf_dc_fill.ls_mabresp_rmt_dram", @@ -284,19 +284,19 @@ "EventName": "ls_sw_pf_dc_fill.ls_mabresp_lcl_dram", "EventCode": "0x59", "BriefDescription": "Software Prefetch Data Cache Fills by Data Source. DRAM or IO from this thread's die. From DRAM (home node local).", - "UMask": "0x8" + "UMask": "0x08" }, { "EventName": "ls_sw_pf_dc_fill.ls_mabresp_lcl_cache", "EventCode": "0x59", "BriefDescription": "Software Prefetch Data Cache Fills by Data Source. From another cache (home node local).", - "UMask": "0x2" + "UMask": "0x02" }, { "EventName": "ls_sw_pf_dc_fill.ls_mabresp_lcl_l2", "EventCode": "0x59", "BriefDescription": "Software Prefetch Data Cache Fills by Data Source. Local L2 hit.", - "UMask": "0x1" + "UMask": "0x01" }, { "EventName": "ls_hw_pf_dc_fill.ls_mabresp_rmt_dram", @@ -314,19 +314,19 @@ "EventName": "ls_hw_pf_dc_fill.ls_mabresp_lcl_dram", "EventCode": "0x5a", "BriefDescription": "Hardware Prefetch Data Cache Fills by Data Source. From DRAM (home node local).", - "UMask": "0x8" + "UMask": "0x08" }, { "EventName": "ls_hw_pf_dc_fill.ls_mabresp_lcl_cache", "EventCode": "0x5a", "BriefDescription": "Hardware Prefetch Data Cache Fills by Data Source. From another cache (home node local).", - "UMask": "0x2" + "UMask": "0x02" }, { "EventName": "ls_hw_pf_dc_fill.ls_mabresp_lcl_l2", "EventCode": "0x5a", "BriefDescription": "Hardware Prefetch Data Cache Fills by Data Source. Local L2 hit.", - "UMask": "0x1" + "UMask": "0x01" }, { "EventName": "ls_not_halted_cyc", diff --git a/tools/perf/pmu-events/arch/x86/amdzen2/other.json b/tools/perf/pmu-events/arch/x86/amdzen2/other.json index e94994d4a60e..1bdf106ca785 100644 --- a/tools/perf/pmu-events/arch/x86/amdzen2/other.json +++ b/tools/perf/pmu-events/arch/x86/amdzen2/other.json @@ -14,13 +14,13 @@ "EventName": "de_dis_uops_from_decoder.opcache_dispatched", "EventCode": "0xaa", "BriefDescription": "Count of dispatched Ops from OpCache.", - "UMask": "0x2" + "UMask": "0x02" }, { "EventName": "de_dis_uops_from_decoder.decoder_dispatched", "EventCode": "0xaa", "BriefDescription": "Count of dispatched Ops from Decoder.", - "UMask": "0x1" + "UMask": "0x01" }, { "EventName": "de_dis_dispatch_token_stalls1.fp_misc_rsrc_stall", @@ -50,25 +50,25 @@ "EventName": "de_dis_dispatch_token_stalls1.int_sched_misc_token_stall", "EventCode": "0xae", "BriefDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall. Integer Scheduler miscellaneous resource stall.", - "UMask": "0x8" + "UMask": "0x08" }, { "EventName": "de_dis_dispatch_token_stalls1.store_queue_token_stall", "EventCode": "0xae", "BriefDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall. Store queue resource stall. Applies to all ops with store semantics.", - "UMask": "0x4" + "UMask": "0x04" }, { "EventName": "de_dis_dispatch_token_stalls1.load_queue_token_stall", "EventCode": "0xae", "BriefDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall. Load queue resource stall. Applies to all ops with load semantics.", - "UMask": "0x2" + "UMask": "0x02" }, { "EventName": "de_dis_dispatch_token_stalls1.int_phy_reg_file_token_stall", "EventCode": "0xae", "BriefDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall. Integer Physical Register File resource stall. Applies to all ops that have an integer destination register.", - "UMask": "0x1" + "UMask": "0x01" }, { "EventName": "de_dis_dispatch_token_stalls0.sc_agu_dispatch_stall", @@ -92,24 +92,24 @@ "EventName": "de_dis_dispatch_token_stalls0.alu_token_stall", "EventCode": "0xaf", "BriefDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall. ALU tokens total unavailable.", - "UMask": "0x8" + "UMask": "0x08" }, { "EventName": "de_dis_dispatch_token_stalls0.alsq3_0_token_stall", "EventCode": "0xaf", "BriefDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall. ALSQ3_0_TokenStall.", - "UMask": "0x4" + "UMask": "0x04" }, { "EventName": "de_dis_dispatch_token_stalls0.alsq2_token_stall", "EventCode": "0xaf", "BriefDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall. ALSQ 2 Tokens unavailable.", - "UMask": "0x2" + "UMask": "0x02" }, { "EventName": "de_dis_dispatch_token_stalls0.alsq1_token_stall", "EventCode": "0xaf", "BriefDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall. ALSQ 1 Tokens unavailable.", - "UMask": "0x1" + "UMask": "0x01" } ] diff --git a/tools/perf/pmu-events/arch/x86/amdzen2/recommended.json b/tools/perf/pmu-events/arch/x86/amdzen2/recommended.json index 1c624cee9ef4..a71694a043ba 100644 --- a/tools/perf/pmu-events/arch/x86/amdzen2/recommended.json +++ b/tools/perf/pmu-events/arch/x86/amdzen2/recommended.json @@ -10,7 +10,7 @@ "EventName": "all_dc_accesses", "EventCode": "0x29", "BriefDescription": "All L1 Data Cache Accesses", - "UMask": "0x7" + "UMask": "0x07" }, { "MetricName": "all_l2_cache_accesses", -- cgit v1.2.3 From da66658638c947cab0fb157289f03698453ff8d5 Mon Sep 17 00:00:00 2001 From: Smita Koralahalli Date: Tue, 6 Apr 2021 16:59:44 -0500 Subject: perf vendor events amd: Add Zen3 events Add PMU events for AMD Zen3 processors as documented in the AMD Processor Programming Reference for Family 19h and Model 01h [1]. Below are the events which are new on Zen3: PMCx041 ls_mab_alloc.{all_allocations|hardware_prefetcher_allocations|load_store_allocations} PMCx043 ls_dmnd_fills_from_sys.ext_cache_local PMCx044 ls_any_fills_from_sys.{mem_io_remote|ext_cache_remote|mem_io_local|ext_cache_local|int_cache|lcl_l2} PMCx047 ls_misal_loads.{ma4k|ma64} PMCx059 ls_sw_pf_dc_fills.ext_cache_local PMCx05a ls_hw_pf_dc_fills.ext_cache_local PMCx05f ls_alloc_mab_count PMCx085 bp_l1_tlb_miss_l2_tlb_miss.coalesced_4k PMCx0ab de_dis_cops_from_decoder.disp_op_type.{any_integer_dispatch|any_fp_dispatch} PMCx0cc ex_ret_ind_brch_instr PMCx18e ic_tag_hit_miss.{all_instruction_cache_accesses|instruction_cache_miss|instruction_cache_hit} PMCx1c7 ex_ret_msprd_brnch_instr_dir_msmtch PMCx28f op_cache_hit_miss.{all_op_cache_accesses|op_cache_miss|op_cache_hit} Section 2.1.17.2 "Performance Measurement" of "PPR for AMD Family 19h, Model 01h, Revision B1 Processors - 55898 Rev 0.35 - Feb 5, 2021." lists new metrics. Add them. Preserve the events for Zen3 if they are measurable and non-zero as taken from Zen2 directory even if the PPR of Zen3 [1] omits them. Those events are the following: PMCx000 fpu_pipe_assignment.{total|total0|total1|total2|total3} PMCx004 fp_num_mov_elim_scal_op.{optimized|opt_potential|sse_mov_ops_elim|sse_mov_ops} PMCx02D ls_rdtsc PMCx040 ls_dc_accesses PMCx046 ls_tablewalker.{iside|ic_type1|ic_type0|dside|dc_type1|dc_type0} PMCx061 l2_request_g2.{group1|ls_rd_sized|ls_rd_sized_nc|ic_rd_sized|ic_rd_sized_nc|smc_inval|bus_lock_originator|bus_locks_responses} PMCx062 l2_latency.l2_cycles_waiting_on_fills PMCx063 l2_wcb_req.{wcb_write|wcb_close|zero_byte_store|cl_zero} PMCx06d l2_fill_pending.l2_fill_busy PMCx080 ic_fw32 PMCx081 ic_fw32_miss PMCx086 bp_snp_re_sync PMCx087 ic_fetch_stall.{ic_stall_any|ic_stall_dq_empty|ic_stall_back_pressure} PMCx08a bp_l1_btb_correct PMCx08c ic_cache_inval.{l2_invalidating_probe|fill_invalidated} PMCx099 bp_tlb_rel PMCx0a9 de_dis_uop_queue_empty_di0 PMCx0c7 ex_ret_brn_resync PMCx28a ic_oc_mode_switch.{oc_ic_mode_switch|ic_oc_mode_switch} L3PMCx01 l3_request_g1.caching_l3_cache_accesses L3PMCx06 l3_comb_clstr_state.{other_l3_miss_typs|request_miss} [1] Processor Programming Reference (PPR) for AMD Family 19h, Model 01h, Revision B1 Processors - 55898 Rev 0.35 - Feb 5, 2021. [2] Processor Programming Reference (PPR) for AMD Family 17h Model 71h, Revision B0 Processors, 56176 Rev 3.06 - Jul 17, 2019. [3] Processor Programming Reference (PPR) for AMD Family 17h Models 01h,08h, Revision B2 Processors, 54945 Rev 3.03 - Jun 14, 2019. All of the PPRs can be found at: https://bugzilla.kernel.org/show_bug.cgi?id=206537 Reviewed-by: Robert Richter Signed-off-by: Smita Koralahalli Cc: Alexander Shishkin Cc: Ian Rogers Cc: Ingo Molnar Cc: Jiri Olsa Cc: Kim Phillips Cc: Mark Rutland Cc: Martin Liška Cc: Michael Petlan Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Vijay Thakkar Cc: linux-perf-users@vger.kernel.org Link: https://lore.kernel.org/r/20210406215944.113332-5-Smita.KoralahalliChannabasappa@amd.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/pmu-events/arch/x86/amdzen3/branch.json | 53 +++ tools/perf/pmu-events/arch/x86/amdzen3/cache.json | 402 +++++++++++++++++++ tools/perf/pmu-events/arch/x86/amdzen3/core.json | 137 +++++++ .../pmu-events/arch/x86/amdzen3/data-fabric.json | 98 +++++ .../arch/x86/amdzen3/floating-point.json | 139 +++++++ tools/perf/pmu-events/arch/x86/amdzen3/memory.json | 428 +++++++++++++++++++++ tools/perf/pmu-events/arch/x86/amdzen3/other.json | 103 +++++ .../pmu-events/arch/x86/amdzen3/recommended.json | 214 +++++++++++ tools/perf/pmu-events/arch/x86/mapfile.csv | 2 +- 9 files changed, 1575 insertions(+), 1 deletion(-) create mode 100644 tools/perf/pmu-events/arch/x86/amdzen3/branch.json create mode 100644 tools/perf/pmu-events/arch/x86/amdzen3/cache.json create mode 100644 tools/perf/pmu-events/arch/x86/amdzen3/core.json create mode 100644 tools/perf/pmu-events/arch/x86/amdzen3/data-fabric.json create mode 100644 tools/perf/pmu-events/arch/x86/amdzen3/floating-point.json create mode 100644 tools/perf/pmu-events/arch/x86/amdzen3/memory.json create mode 100644 tools/perf/pmu-events/arch/x86/amdzen3/other.json create mode 100644 tools/perf/pmu-events/arch/x86/amdzen3/recommended.json diff --git a/tools/perf/pmu-events/arch/x86/amdzen3/branch.json b/tools/perf/pmu-events/arch/x86/amdzen3/branch.json new file mode 100644 index 000000000000..018a7fe94fb9 --- /dev/null +++ b/tools/perf/pmu-events/arch/x86/amdzen3/branch.json @@ -0,0 +1,53 @@ +[ + { + "EventName": "bp_l1_btb_correct", + "EventCode": "0x8a", + "BriefDescription": "L1 Branch Prediction Overrides Existing Prediction (speculative)." + }, + { + "EventName": "bp_l2_btb_correct", + "EventCode": "0x8b", + "BriefDescription": "L2 Branch Prediction Overrides Existing Prediction (speculative)." + }, + { + "EventName": "bp_dyn_ind_pred", + "EventCode": "0x8e", + "BriefDescription": "Dynamic Indirect Predictions.", + "PublicDescription": "The number of times a branch used the indirect predictor to make a prediction." + }, + { + "EventName": "bp_de_redirect", + "EventCode": "0x91", + "BriefDescription": "Decode Redirects", + "PublicDescription": "The number of times the instruction decoder overrides the predicted target." + }, + { + "EventName": "bp_l1_tlb_fetch_hit", + "EventCode": "0x94", + "BriefDescription": "The number of instruction fetches that hit in the L1 ITLB.", + "UMask": "0xff" + }, + { + "EventName": "bp_l1_tlb_fetch_hit.if1g", + "EventCode": "0x94", + "BriefDescription": "The number of instruction fetches that hit in the L1 ITLB. L1 Instruction TLB hit (1G page size).", + "UMask": "0x04" + }, + { + "EventName": "bp_l1_tlb_fetch_hit.if2m", + "EventCode": "0x94", + "BriefDescription": "The number of instruction fetches that hit in the L1 ITLB. L1 Instruction TLB hit (2M page size).", + "UMask": "0x02" + }, + { + "EventName": "bp_l1_tlb_fetch_hit.if4k", + "EventCode": "0x94", + "BriefDescription": "The number of instruction fetches that hit in the L1 ITLB. L1 Instrcution TLB hit (4K or 16K page size).", + "UMask": "0x01" + }, + { + "EventName": "bp_tlb_rel", + "EventCode": "0x99", + "BriefDescription": "The number of ITLB reload requests." + } +] diff --git a/tools/perf/pmu-events/arch/x86/amdzen3/cache.json b/tools/perf/pmu-events/arch/x86/amdzen3/cache.json new file mode 100644 index 000000000000..fa1d7499a2e3 --- /dev/null +++ b/tools/perf/pmu-events/arch/x86/amdzen3/cache.json @@ -0,0 +1,402 @@ +[ + { + "EventName": "l2_request_g1.rd_blk_l", + "EventCode": "0x60", + "BriefDescription": "All L2 Cache Requests (Breakdown 1 - Common). Data cache reads (including hardware and software prefetch).", + "UMask": "0x80" + }, + { + "EventName": "l2_request_g1.rd_blk_x", + "EventCode": "0x60", + "BriefDescription": "All L2 Cache Requests (Breakdown 1 - Common). Data cache stores.", + "UMask": "0x40" + }, + { + "EventName": "l2_request_g1.ls_rd_blk_c_s", + "EventCode": "0x60", + "BriefDescription": "All L2 Cache Requests (Breakdown 1 - Common). Data cache shared reads.", + "UMask": "0x20" + }, + { + "EventName": "l2_request_g1.cacheable_ic_read", + "EventCode": "0x60", + "BriefDescription": "All L2 Cache Requests (Breakdown 1 - Common). Instruction cache reads.", + "UMask": "0x10" + }, + { + "EventName": "l2_request_g1.change_to_x", + "EventCode": "0x60", + "BriefDescription": "All L2 Cache Requests (Breakdown 1 - Common). Data cache state change requests. Request change to writable, check L2 for current state.", + "UMask": "0x08" + }, + { + "EventName": "l2_request_g1.prefetch_l2_cmd", + "EventCode": "0x60", + "BriefDescription": "All L2 Cache Requests (Breakdown 1 - Common). PrefetchL2Cmd.", + "UMask": "0x04" + }, + { + "EventName": "l2_request_g1.l2_hw_pf", + "EventCode": "0x60", + "BriefDescription": "All L2 Cache Requests (Breakdown 1 - Common). L2 Prefetcher. All prefetches accepted by L2 pipeline, hit or miss. Types of PF and L2 hit/miss broken out in a separate perfmon event.", + "UMask": "0x02" + }, + { + "EventName": "l2_request_g1.group2", + "EventCode": "0x60", + "BriefDescription": "Miscellaneous events covered in more detail by l2_request_g2 (PMCx061).", + "UMask": "0x01" + }, + { + "EventName": "l2_request_g1.all_no_prefetch", + "EventCode": "0x60", + "UMask": "0xf9" + }, + { + "EventName": "l2_request_g2.group1", + "EventCode": "0x61", + "BriefDescription": "Miscellaneous events covered in more detail by l2_request_g1 (PMCx060).", + "UMask": "0x80" + }, + { + "EventName": "l2_request_g2.ls_rd_sized", + "EventCode": "0x61", + "BriefDescription": "All L2 Cache Requests (Breakdown 2 - Rare). Data cache read sized.", + "UMask": "0x40" + }, + { + "EventName": "l2_request_g2.ls_rd_sized_nc", + "EventCode": "0x61", + "BriefDescription": "All L2 Cache Requests (Breakdown 2 - Rare). Data cache read sized non-cacheable.", + "UMask": "0x20" + }, + { + "EventName": "l2_request_g2.ic_rd_sized", + "EventCode": "0x61", + "BriefDescription": "All L2 Cache Requests (Breakdown 2 - Rare). Instruction cache read sized.", + "UMask": "0x10" + }, + { + "EventName": "l2_request_g2.ic_rd_sized_nc", + "EventCode": "0x61", + "BriefDescription": "All L2 Cache Requests (Breakdown 2 - Rare). Instruction cache read sized non-cacheable.", + "UMask": "0x08" + }, + { + "EventName": "l2_request_g2.smc_inval", + "EventCode": "0x61", + "BriefDescription": "All L2 Cache Requests (Breakdown 2 - Rare). Self-modifying code invalidates.", + "UMask": "0x04" + }, + { + "EventName": "l2_request_g2.bus_locks_originator", + "EventCode": "0x61", + "BriefDescription": "All L2 Cache Requests (Breakdown 2 - Rare). Bus locks.", + "UMask": "0x02" + }, + { + "EventName": "l2_request_g2.bus_locks_responses", + "EventCode": "0x61", + "BriefDescription": "All L2 Cache Requests (Breakdown 2 - Rare). Bus lock response.", + "UMask": "0x01" + }, + { + "EventName": "l2_latency.l2_cycles_waiting_on_fills", + "EventCode": "0x62", + "BriefDescription": "Total cycles spent waiting for L2 fills to complete from L3 or memory, divided by four. Event counts are for both threads. To calculate average latency, the number of fills from both threads must be used.", + "UMask": "0x01" + }, + { + "EventName": "l2_wcb_req.wcb_write", + "EventCode": "0x63", + "BriefDescription": "LS to L2 WCB write requests. LS (Load/Store unit) to L2 WCB (Write Combining Buffer) write requests.", + "UMask": "0x40" + }, + { + "EventName": "l2_wcb_req.wcb_close", + "EventCode": "0x63", + "BriefDescription": "LS to L2 WCB close requests. LS (Load/Store unit) to L2 WCB (Write Combining Buffer) close requests.", + "UMask": "0x20" + }, + { + "EventName": "l2_wcb_req.zero_byte_store", + "EventCode": "0x63", + "BriefDescription": "LS to L2 WCB zero byte store requests. LS (Load/Store unit) to L2 WCB (Write Combining Buffer) zero byte store requests.", + "UMask": "0x04" + }, + { + "EventName": "l2_wcb_req.cl_zero", + "EventCode": "0x63", + "BriefDescription": "LS to L2 WCB cache line zeroing requests. LS (Load/Store unit) to L2 WCB (Write Combining Buffer) cache line zeroing requests.", + "UMask": "0x01" + }, + { + "EventName": "l2_cache_req_stat.ls_rd_blk_cs", + "EventCode": "0x64", + "BriefDescription": "Core to L2 cacheable request access status (not including L2 Prefetch). Data cache shared read hit in L2", + "UMask": "0x80" + }, + { + "EventName": "l2_cache_req_stat.ls_rd_blk_l_hit_x", + "EventCode": "0x64", + "BriefDescription": "Core to L2 cacheable request access status (not including L2 Prefetch). Data cache read hit in L2. Modifiable.", + "UMask": "0x40" + }, + { + "EventName": "l2_cache_req_stat.ls_rd_blk_l_hit_s", + "EventCode": "0x64", + "BriefDescription": "Core to L2 cacheable request access status (not including L2 Prefetch). Data cache read hit non-modifiable line in L2.", + "UMask": "0x20" + }, + { + "EventName": "l2_cache_req_stat.ls_rd_blk_x", + "EventCode": "0x64", + "BriefDescription": "Core to L2 cacheable request access status (not including L2 Prefetch). Data cache store or state change hit in L2.", + "UMask": "0x10" + }, + { + "EventName": "l2_cache_req_stat.ls_rd_blk_c", + "EventCode": "0x64", + "BriefDescription": "Core to L2 cacheable request access status (not including L2 Prefetch). Data cache request miss in L2 (all types). Use l2_cache_misses_from_dc_misses instead.", + "UMask": "0x08" + }, + { + "EventName": "l2_cache_req_stat.ic_fill_hit_x", + "EventCode": "0x64", + "BriefDescription": "Core to L2 cacheable request access status (not including L2 Prefetch). Instruction cache hit modifiable line in L2.", + "UMask": "0x04" + }, + { + "EventName": "l2_cache_req_stat.ic_fill_hit_s", + "EventCode": "0x64", + "BriefDescription": "Core to L2 cacheable request access status (not including L2 Prefetch). Instruction cache hit non-modifiable line in L2.", + "UMask": "0x02" + }, + { + "EventName": "l2_cache_req_stat.ic_fill_miss", + "EventCode": "0x64", + "BriefDescription": "Core to L2 cacheable request access status (not including L2 Prefetch). Instruction cache request miss in L2. Use l2_cache_misses_from_ic_miss instead.", + "UMask": "0x01" + }, + { + "EventName": "l2_cache_req_stat.ic_access_in_l2", + "EventCode": "0x64", + "BriefDescription": "Core to L2 cacheable request access status (not including L2 Prefetch). Instruction cache requests in L2.", + "UMask": "0x07" + }, + { + "EventName": "l2_cache_req_stat.ic_dc_miss_in_l2", + "EventCode": "0x64", + "BriefDescription": "Core to L2 cacheable request access status (not including L2 Prefetch). Instruction cache request miss in L2 and Data cache request miss in L2 (all types).", + "UMask": "0x09" + }, + { + "EventName": "l2_cache_req_stat.ic_dc_hit_in_l2", + "EventCode": "0x64", + "BriefDescription": "Core to L2 cacheable request access status (not including L2 Prefetch). Instruction cache request hit in L2 and Data cache request hit in L2 (all types).", + "UMask": "0xf6" + }, + { + "EventName": "l2_fill_pending.l2_fill_busy", + "EventCode": "0x6d", + "BriefDescription": "Cycles with fill pending from L2. Total cycles spent with one or more fill requests in flight from L2.", + "UMask": "0x01" + }, + { + "EventName": "l2_pf_hit_l2", + "EventCode": "0x70", + "BriefDescription": "L2 prefetch hit in L2. Use l2_cache_hits_from_l2_hwpf instead.", + "UMask": "0xff" + }, + { + "EventName": "l2_pf_miss_l2_hit_l3", + "EventCode": "0x71", + "BriefDescription": "L2 prefetcher hits in L3. Counts all L2 prefetches accepted by the L2 pipeline which miss the L2 cache and hit the L3.", + "UMask": "0xff" + }, + { + "EventName": "l2_pf_miss_l2_l3", + "EventCode": "0x72", + "BriefDescription": "L2 prefetcher misses in L3. Counts all L2 prefetches accepted by the L2 pipeline which miss the L2 and the L3 caches.", + "UMask": "0xff" + }, + { + "EventName": "ic_fw32", + "EventCode": "0x80", + "BriefDescription": "The number of 32B fetch windows transferred from IC pipe to DE instruction decoder (includes non-cacheable and cacheable fill responses)." + }, + { + "EventName": "ic_fw32_miss", + "EventCode": "0x81", + "BriefDescription": "The number of 32B fetch windows tried to read the L1 IC and missed in the full tag." + }, + { + "EventName": "ic_cache_fill_l2", + "EventCode": "0x82", + "BriefDescription": "Instruction Cache Refills from L2. The number of 64 byte instruction cache line was fulfilled from the L2 cache." + }, + { + "EventName": "ic_cache_fill_sys", + "EventCode": "0x83", + "BriefDescription": "Instruction Cache Refills from System. The number of 64 byte instruction cache line fulfilled from system memory or another cache." + }, + { + "EventName": "bp_l1_tlb_miss_l2_tlb_hit", + "EventCode": "0x84", + "BriefDescription": "L1 ITLB Miss, L2 ITLB Hit. The number of instruction fetches that miss in the L1 ITLB but hit in the L2 ITLB." + }, + { + "EventName": "bp_l1_tlb_miss_l2_tlb_miss", + "EventCode": "0x85", + "BriefDescription": "The number of instruction fetches that miss in both the L1 and L2 TLBs.", + "UMask": "0xff" + }, + { + "EventName": "bp_l1_tlb_miss_l2_tlb_miss.coalesced_4k", + "EventCode": "0x85", + "BriefDescription": "The number of valid fills into the ITLB originating from the LS Page-Table Walker. Tablewalk requests are issued for L1-ITLB and L2-ITLB misses. Walk for >4K Coalesced page.", + "UMask": "0x08" + }, + { + "EventName": "bp_l1_tlb_miss_l2_tlb_miss.if1g", + "EventCode": "0x85", + "BriefDescription": "The number of valid fills into the ITLB originating from the LS Page-Table Walker. Tablewalk requests are issued for L1-ITLB and L2-ITLB misses. Walk for 1G page.", + "UMask": "0x04" + }, + { + "EventName": "bp_l1_tlb_miss_l2_tlb_miss.if2m", + "EventCode": "0x85", + "BriefDescription": "The number of valid fills into the ITLB originating from the LS Page-Table Walker. Tablewalk requests are issued for L1-ITLB and L2-ITLB misses. Walk for 2M page.", + "UMask": "0x02" + }, + { + "EventName": "bp_l1_tlb_miss_l2_tlb_miss.if4k", + "EventCode": "0x85", + "BriefDescription": "The number of valid fills into the ITLB originating from the LS Page-Table Walker. Tablewalk requests are issued for L1-ITLB and L2-ITLB misses. Walk to 4K page.", + "UMask": "0x01" + }, + { + "EventName": "bp_snp_re_sync", + "EventCode": "0x86", + "BriefDescription": "The number of pipeline restarts caused by invalidating probes that hit on the instruction stream currently being executed. This would happen if the active instruction stream was being modified by another processor in an MP system - typically a highly unlikely event." + }, + { + "EventName": "ic_fetch_stall.ic_stall_any", + "EventCode": "0x87", + "BriefDescription": "Instruction Pipe Stall. IC pipe was stalled during this clock cycle for any reason (nothing valid in pipe ICM1).", + "UMask": "0x04" + }, + { + "EventName": "ic_fetch_stall.ic_stall_dq_empty", + "EventCode": "0x87", + "BriefDescription": "Instruction Pipe Stall. IC pipe was stalled during this clock cycle (including IC to OC fetches) due to DQ empty.", + "UMask": "0x02" + }, + { + "EventName": "ic_fetch_stall.ic_stall_back_pressure", + "EventCode": "0x87", + "BriefDescription": "Instruction Pipe Stall. IC pipe was stalled during this clock cycle (including IC to OC fetches) due to back-pressure.", + "UMask": "0x01" + }, + { + "EventName": "ic_cache_inval.l2_invalidating_probe", + "EventCode": "0x8c", + "BriefDescription": "IC line invalidated due to L2 invalidating probe (external or LS). The number of instruction cache lines invalidated. A non-SMC event is CMC (cross modifying code), either from the other thread of the core or another core.", + "UMask": "0x02" + }, + { + "EventName": "ic_cache_inval.fill_invalidated", + "EventCode": "0x8c", + "BriefDescription": "IC line invalidated due to overwriting fill response. The number of instruction cache lines invalidated. A non-SMC event is CMC (cross modifying code), either from the other thread of the core or another core.", + "UMask": "0x01" + }, + { + "EventName": "ic_tag_hit_miss.all_instruction_cache_accesses", + "EventCode": "0x18e", + "BriefDescription": "All Instruction Cache Accesses. Counts various IC tag related hit and miss events.", + "UMask": "0x1f" + }, + { + "EventName": "ic_tag_hit_miss.instruction_cache_miss", + "EventCode": "0x18e", + "BriefDescription": "Instruction Cache Miss. Counts various IC tag related hit and miss events.", + "UMask": "0x18" + }, + { + "EventName": "ic_tag_hit_miss.instruction_cache_hit", + "EventCode": "0x18e", + "BriefDescription": "Instruction Cache Hit. Counts various IC tag related hit and miss events.", + "UMask": "0x07" + }, + { + "EventName": "ic_oc_mode_switch.oc_ic_mode_switch", + "EventCode": "0x28a", + "BriefDescription": "OC Mode Switch. OC to IC mode switch.", + "UMask": "0x02" + }, + { + "EventName": "ic_oc_mode_switch.ic_oc_mode_switch", + "EventCode": "0x28a", + "BriefDescription": "OC Mode Switch. IC to OC mode switch.", + "UMask": "0x01" + }, + { + "EventName": "op_cache_hit_miss.all_op_cache_accesses", + "EventCode": "0x28f", + "BriefDescription": "All Op Cache accesses. Counts Op Cache micro-tag hit/miss events", + "UMask": "0x07" + }, + { + "EventName": "op_cache_hit_miss.op_cache_miss", + "EventCode": "0x28f", + "BriefDescription": "Op Cache Miss. Counts Op Cache micro-tag hit/miss events", + "UMask": "0x04" + }, + { + "EventName": "op_cache_hit_miss.op_cache_hit", + "EventCode": "0x28f", + "BriefDescription": "Op Cache Hit. Counts Op Cache micro-tag hit/miss events", + "UMask": "0x03" + }, + { + "EventName": "l3_request_g1.caching_l3_cache_accesses", + "EventCode": "0x01", + "BriefDescription": "Caching: L3 cache accesses", + "UMask": "0x80", + "Unit": "L3PMC" + }, + { + "EventName": "l3_lookup_state.all_l3_req_typs", + "EventCode": "0x04", + "BriefDescription": "All L3 Request Types. All L3 cache Requests", + "UMask": "0xff", + "Unit": "L3PMC" + }, + { + "EventName": "l3_comb_clstr_state.other_l3_miss_typs", + "EventCode": "0x06", + "BriefDescription": "Other L3 Miss Request Types", + "UMask": "0xfe", + "Unit": "L3PMC" + }, + { + "EventName": "l3_comb_clstr_state.request_miss", + "EventCode": "0x06", + "BriefDescription": "L3 cache misses", + "UMask": "0x01", + "Unit": "L3PMC" + }, + { + "EventName": "xi_sys_fill_latency", + "EventCode": "0x90", + "BriefDescription": "L3 Cache Miss Latency. Total cycles for all transactions divided by 16. Ignores SliceMask and ThreadMask.", + "Unit": "L3PMC" + }, + { + "EventName": "xi_ccx_sdp_req1", + "EventCode": "0x9a", + "BriefDescription": "L3 Misses by Request Type. Ignores SliceID, EnAllSlices, CoreID, EnAllCores and ThreadMask. Requires unit mask 0xFF to engage event for counting.", + "UMask": "0xff", + "Unit": "L3PMC" + } +] diff --git a/tools/perf/pmu-events/arch/x86/amdzen3/core.json b/tools/perf/pmu-events/arch/x86/amdzen3/core.json new file mode 100644 index 000000000000..4e27a2be359e --- /dev/null +++ b/tools/perf/pmu-events/arch/x86/amdzen3/core.json @@ -0,0 +1,137 @@ +[ + { + "EventName": "ex_ret_instr", + "EventCode": "0xc0", + "BriefDescription": "Retired Instructions." + }, + { + "EventName": "ex_ret_ops", + "EventCode": "0xc1", + "BriefDescription": "Retired Ops. Use macro_ops_retired instead.", + "PublicDescription": "The number of macro-ops retired." + }, + { + "EventName": "ex_ret_brn", + "EventCode": "0xc2", + "BriefDescription": "Retired Branch Instructions.", + "PublicDescription": "The number of branch instructions retired. This includes all types of architectural control flow changes, including exceptions and interrupts." + }, + { + "EventName": "ex_ret_brn_misp", + "EventCode": "0xc3", + "BriefDescription": "Retired Branch Instructions Mispredicted.", + "PublicDescription": "The number of retired branch instructions, that were mispredicted." + }, + { + "EventName": "ex_ret_brn_tkn", + "EventCode": "0xc4", + "BriefDescription": "Retired Taken Branch Instructions.", + "PublicDescription": "The number of taken branches that were retired. This includes all types of architectural control flow changes, including exceptions and interrupts." + }, + { + "EventName": "ex_ret_brn_tkn_misp", + "EventCode": "0xc5", + "BriefDescription": "Retired Taken Branch Instructions Mispredicted.", + "PublicDescription": "The number of retired taken branch instructions that were mispredicted." + }, + { + "EventName": "ex_ret_brn_far", + "EventCode": "0xc6", + "BriefDescription": "Retired Far Control Transfers.", + "PublicDescription": "The number of far control transfers retired including far call/jump/return, IRET, SYSCALL and SYSRET, plus exceptions and interrupts. Far control transfers are not subject to branch prediction." + }, + { + "EventName": "ex_ret_brn_resync", + "EventCode": "0xc7", + "BriefDescription": "Retired Branch Resyncs.", + "PublicDescription": "The number of resync branches. These reflect pipeline restarts due to certain microcode assists and events such as writes to the active instruction stream, among other things. Each occurrence reflects a restart penalty similar to a branch mispredict. This is relatively rare." + }, + { + "EventName": "ex_ret_near_ret", + "EventCode": "0xc8", + "BriefDescription": "Retired Near Returns.", + "PublicDescription": "The number of near return instructions (RET or RET Iw) retired." + }, + { + "EventName": "ex_ret_near_ret_mispred", + "EventCode": "0xc9", + "BriefDescription": "Retired Near Returns Mispredicted.", + "PublicDescription": "The number of near returns retired that were not correctly predicted by the return address predictor. Each such mispredict incurs the same penalty as a mispredicted conditional branch instruction." + }, + { + "EventName": "ex_ret_brn_ind_misp", + "EventCode": "0xca", + "BriefDescription": "Retired Indirect Branch Instructions Mispredicted.", + "PublicDescription": "The number of indirect branches retired that were not correctly predicted. Each such mispredict incurs the same penalty as a mispredicted conditional branch instruction. Note that only EX mispredicts are counted." + }, + { + "EventName": "ex_ret_mmx_fp_instr.sse_instr", + "EventCode": "0xcb", + "BriefDescription": "SSE instructions (SSE, SSE2, SSE3, SSSE3, SSE4A, SSE41, SSE42, AVX).", + "PublicDescription": "The number of MMX, SSE or x87 instructions retired. The UnitMask allows the selection of the individual classes of instructions as given in the table. Each increment represents one complete instruction. Since this event includes non-numeric instructions it is not suitable for measuring MFLOPS.", + "UMask": "0x04" + }, + { + "EventName": "ex_ret_mmx_fp_instr.mmx_instr", + "EventCode": "0xcb", + "BriefDescription": "MMX instructions.", + "PublicDescription": "The number of MMX, SSE or x87 instructions retired. The UnitMask allows the selection of the individual classes of instructions as given in the table. Each increment represents one complete instruction. Since this event includes non-numeric instructions it is not suitable for measuring MFLOPS. MMX instructions.", + "UMask": "0x02" + }, + { + "EventName": "ex_ret_mmx_fp_instr.x87_instr", + "EventCode": "0xcb", + "BriefDescription": "x87 instructions.", + "PublicDescription": "The number of MMX, SSE or x87 instructions retired. The UnitMask allows the selection of the individual classes of instructions as given in the table. Each increment represents one complete instruction. Since this event includes non-numeric instructions it is not suitable for measuring MFLOPS. x87 instructions.", + "UMask": "0x01" + }, + { + "EventName": "ex_ret_ind_brch_instr", + "EventCode": "0xcc", + "BriefDescription": "Retired Indirect Branch Instructions. The number of indirect branches retired." + }, + { + "EventName": "ex_ret_cond", + "EventCode": "0xd1", + "BriefDescription": "Retired Conditional Branch Instructions." + }, + { + "EventName": "ex_div_busy", + "EventCode": "0xd3", + "BriefDescription": "Div Cycles Busy count." + }, + { + "EventName": "ex_div_count", + "EventCode": "0xd4", + "BriefDescription": "Div Op Count." + }, + { + "EventName": "ex_ret_msprd_brnch_instr_dir_msmtch", + "EventCode": "0x1c7", + "BriefDescription": "Retired Mispredicted Branch Instructions due to Direction Mismatch", + "PublicDescription": "The number of retired conditional branch instructions that were not correctly predicted because of a branch direction mismatch." + }, + { + "EventName": "ex_tagged_ibs_ops.ibs_count_rollover", + "EventCode": "0x1cf", + "BriefDescription": "Tagged IBS Ops. Number of times an op could not be tagged by IBS because of a previous tagged op that has not retired.", + "UMask": "0x04" + }, + { + "EventName": "ex_tagged_ibs_ops.ibs_tagged_ops_ret", + "EventCode": "0x1cf", + "BriefDescription": "Tagged IBS Ops. Number of Ops tagged by IBS that retired.", + "UMask": "0x02" + }, + { + "EventName": "ex_tagged_ibs_ops.ibs_tagged_ops", + "EventCode": "0x1cf", + "BriefDescription": "Tagged IBS Ops. Number of Ops tagged by IBS.", + "UMask": "0x01" + }, + { + "EventName": "ex_ret_fused_instr", + "EventCode": "0x1d0", + "BriefDescription": "Counts retired Fused Instructions." + } +] diff --git a/tools/perf/pmu-events/arch/x86/amdzen3/data-fabric.json b/tools/perf/pmu-events/arch/x86/amdzen3/data-fabric.json new file mode 100644 index 000000000000..40271df40015 --- /dev/null +++ b/tools/perf/pmu-events/arch/x86/amdzen3/data-fabric.json @@ -0,0 +1,98 @@ +[ + { + "EventName": "remote_outbound_data_controller_0", + "PublicDescription": "Remote Link Controller Outbound Packet Types: Data (32B): Remote Link Controller 0", + "EventCode": "0x7c7", + "UMask": "0x02", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "remote_outbound_data_controller_1", + "PublicDescription": "Remote Link Controller Outbound Packet Types: Data (32B): Remote Link Controller 1", + "EventCode": "0x807", + "UMask": "0x02", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "remote_outbound_data_controller_2", + "PublicDescription": "Remote Link Controller Outbound Packet Types: Data (32B): Remote Link Controller 2", + "EventCode": "0x847", + "UMask": "0x02", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "remote_outbound_data_controller_3", + "PublicDescription": "Remote Link Controller Outbound Packet Types: Data (32B): Remote Link Controller 3", + "EventCode": "0x887", + "UMask": "0x02", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "dram_channel_data_controller_0", + "PublicDescription": "DRAM Channel Controller Request Types: Requests with Data (64B): DRAM Channel Controller 0", + "EventCode": "0x07", + "UMask": "0x38", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "dram_channel_data_controller_1", + "PublicDescription": "DRAM Channel Controller Request Types: Requests with Data (64B): DRAM Channel Controller 0", + "EventCode": "0x47", + "UMask": "0x38", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "dram_channel_data_controller_2", + "PublicDescription": "DRAM Channel Controller Request Types: Requests with Data (64B): DRAM Channel Controller 0", + "EventCode": "0x87", + "UMask": "0x38", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "dram_channel_data_controller_3", + "PublicDescription": "DRAM Channel Controller Request Types: Requests with Data (64B): DRAM Channel Controller 0", + "EventCode": "0xc7", + "UMask": "0x38", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "dram_channel_data_controller_4", + "PublicDescription": "DRAM Channel Controller Request Types: Requests with Data (64B): DRAM Channel Controller 0", + "EventCode": "0x107", + "UMask": "0x38", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "dram_channel_data_controller_5", + "PublicDescription": "DRAM Channel Controller Request Types: Requests with Data (64B): DRAM Channel Controller 0", + "EventCode": "0x147", + "UMask": "0x38", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "dram_channel_data_controller_6", + "PublicDescription": "DRAM Channel Controller Request Types: Requests with Data (64B): DRAM Channel Controller 0", + "EventCode": "0x187", + "UMask": "0x38", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "dram_channel_data_controller_7", + "PublicDescription": "DRAM Channel Controller Request Types: Requests with Data (64B): DRAM Channel Controller 0", + "EventCode": "0x1c7", + "UMask": "0x38", + "PerPkg": "1", + "Unit": "DFPMC" + } +] diff --git a/tools/perf/pmu-events/arch/x86/amdzen3/floating-point.json b/tools/perf/pmu-events/arch/x86/amdzen3/floating-point.json new file mode 100644 index 000000000000..98cfcb9c78ec --- /dev/null +++ b/tools/perf/pmu-events/arch/x86/amdzen3/floating-point.json @@ -0,0 +1,139 @@ +[ + { + "EventName": "fpu_pipe_assignment.total", + "EventCode": "0x00", + "BriefDescription": "Total number of fp uOps.", + "PublicDescription": "Total number of fp uOps. The number of operations (uOps) dispatched to each of the 4 FPU execution pipelines. This event reflects how busy the FPU pipelines are and may be used for workload characterization. This includes all operations performed by x87, MMX, and SSE instructions, including moves. Each increment represents a one- cycle dispatch event. This event is a speculative event. Since this event includes non-numeric operations it is not suitable for measuring MFLOPS.", + "UMask": "0x0f" + }, + { + "EventName": "fpu_pipe_assignment.total3", + "EventCode": "0x00", + "BriefDescription": "Total number uOps assigned to pipe 3.", + "PublicDescription": "The number of operations (uOps) dispatched to each of the 4 FPU execution pipelines. This event reflects how busy the FPU pipelines are and may be used for workload characterization. This includes all operations performed by x87, MMX, and SSE instructions, including moves. Each increment represents a one-cycle dispatch event. This event is a speculative event. Since this event includes non-numeric operations it is not suitable for measuring MFLOPS. Total number uOps assigned to pipe 3.", + "UMask": "0x08" + }, + { + "EventName": "fpu_pipe_assignment.total2", + "EventCode": "0x00", + "BriefDescription": "Total number uOps assigned to pipe 2.", + "PublicDescription": "The number of operations (uOps) dispatched to each of the 4 FPU execution pipelines. This event reflects how busy the FPU pipelines are and may be used for workload characterization. This includes all operations performed by x87, MMX, and SSE instructions, including moves. Each increment represents a one- cycle dispatch event. This event is a speculative event. Since this event includes non-numeric operations it is not suitable for measuring MFLOPS. Total number uOps assigned to pipe 2.", + "UMask": "0x04" + }, + { + "EventName": "fpu_pipe_assignment.total1", + "EventCode": "0x00", + "BriefDescription": "Total number uOps assigned to pipe 1.", + "PublicDescription": "The number of operations (uOps) dispatched to each of the 4 FPU execution pipelines. This event reflects how busy the FPU pipelines are and may be used for workload characterization. This includes all operations performed by x87, MMX, and SSE instructions, including moves. Each increment represents a one- cycle dispatch event. This event is a speculative event. Since this event includes non-numeric operations it is not suitable for measuring MFLOPS. Total number uOps assigned to pipe 1.", + "UMask": "0x02" + }, + { + "EventName": "fpu_pipe_assignment.total0", + "EventCode": "0x00", + "BriefDescription": "Total number of fp uOps on pipe 0.", + "PublicDescription": "The number of operations (uOps) dispatched to each of the 4 FPU execution pipelines. This event reflects how busy the FPU pipelines are and may be used for workload characterization. This includes all operations performed by x87, MMX, and SSE instructions, including moves. Each increment represents a one- cycle dispatch event. This event is a speculative event. Since this event includes non-numeric operations it is not suitable for measuring MFLOPS. Total number uOps assigned to pipe 0.", + "UMask": "0x01" + }, + { + "EventName": "fp_ret_sse_avx_ops.all", + "EventCode": "0x03", + "BriefDescription": "All FLOPS. This is a retire-based event. The number of retired SSE/AVX FLOPS. The number of events logged per cycle can vary from 0 to 64. This event can count above 15.", + "UMask": "0xff" + }, + { + "EventName": "fp_ret_sse_avx_ops.mac_flops", + "EventCode": "0x03", + "BriefDescription": "Multiply-Accumulate FLOPs. Each MAC operation is counted as 2 FLOPS. This is a retire-based event. The number of retired SSE/AVX FLOPs. The number of events logged per cycle can vary from 0 to 64. This event requires the use of the MergeEvent since it can count above 15 events per cycle. See 2.1.17.3 [Large Increment per Cycle Events]. It does not provide a useful count without the use of the MergeEvent.", + "UMask": "0x08" + }, + { + "EventName": "fp_ret_sse_avx_ops.div_flops", + "EventCode": "0x03", + "BriefDescription": "Divide/square root FLOPs. This is a retire-based event. The number of retired SSE/AVX FLOPs. The number of events logged per cycle can vary from 0 to 64. This event requires the use of the MergeEvent since it can count above 15 events per cycle. See 2.1.17.3 [Large Increment per Cycle Events]. It does not provide a useful count without the use of the MergeEvent.", + "UMask": "0x04" + }, + { + "EventName": "fp_ret_sse_avx_ops.mult_flops", + "EventCode": "0x03", + "BriefDescription": "Multiply FLOPs. This is a retire-based event. The number of retired SSE/AVX FLOPs. The number of events logged per cycle can vary from 0 to 64. This event requires the use of the MergeEvent since it can count above 15 events per cycle. See 2.1.17.3 [Large Increment per Cycle Events]. It does not provide a useful count without the use of the MergeEvent.", + "UMask": "0x02" + }, + { + "EventName": "fp_ret_sse_avx_ops.add_sub_flops", + "EventCode": "0x03", + "BriefDescription": "Add/subtract FLOPs. This is a retire-based event. The number of retired SSE/AVX FLOPs. The number of events logged per cycle can vary from 0 to 64. This event requires the use of the MergeEvent since it can count above 15 events per cycle. See 2.1.17.3 [Large Increment per Cycle Events]. It does not provide a useful count without the use of the MergeEvent.", + "UMask": "0x01" + }, + { + "EventName": "fp_num_mov_elim_scal_op.optimized", + "EventCode": "0x04", + "BriefDescription": "Number of Scalar Ops optimized. This is a dispatch based speculative event, and is useful for measuring the effectiveness of the Move elimination and Scalar code optimization schemes.", + "UMask": "0x08" + }, + { + "EventName": "fp_num_mov_elim_scal_op.opt_potential", + "EventCode": "0x04", + "BriefDescription": "Number of Ops that are candidates for optimization (have Z-bit either set or pass). This is a dispatch based speculative event, and is useful for measuring the effectiveness of the Move elimination and Scalar code optimization schemes.", + "UMask": "0x04" + }, + { + "EventName": "fp_num_mov_elim_scal_op.sse_mov_ops_elim", + "EventCode": "0x04", + "BriefDescription": "Number of SSE Move Ops eliminated. This is a dispatch based speculative event, and is useful for measuring the effectiveness of the Move elimination and Scalar code optimization schemes.", + "UMask": "0x02" + }, + { + "EventName": "fp_num_mov_elim_scal_op.sse_mov_ops", + "EventCode": "0x04", + "BriefDescription": "Number of SSE Move Ops. This is a dispatch based speculative event, and is useful for measuring the effectiveness of the Move elimination and Scalar code optimization schemes.", + "UMask": "0x01" + }, + { + "EventName": "fp_retired_ser_ops.sse_bot_ret", + "EventCode": "0x05", + "BriefDescription": "SSE/AVX bottom-executing ops retired. The number of serializing Ops retired.", + "UMask": "0x08" + }, + { + "EventName": "fp_retired_ser_ops.sse_ctrl_ret", + "EventCode": "0x05", + "BriefDescription": "SSE/AVX control word mispredict traps. The number of serializing Ops retired.", + "UMask": "0x04" + }, + { + "EventName": "fp_retired_ser_ops.x87_bot_ret", + "EventCode": "0x05", + "BriefDescription": "x87 bottom-executing ops retired. The number of serializing Ops retired.", + "UMask": "0x02" + }, + { + "EventName": "fp_retired_ser_ops.x87_ctrl_ret", + "EventCode": "0x05", + "BriefDescription": "x87 control word mispredict traps due to mispredictions in RC or PC, or changes in mask bits. The number of serializing Ops retired.", + "UMask": "0x01" + }, + { + "EventName": "fp_disp_faults.ymm_spill_fault", + "EventCode": "0x0e", + "BriefDescription": "Floating Point Dispatch Faults. YMM spill fault.", + "UMask": "0x08" + }, + { + "EventName": "fp_disp_faults.ymm_fill_fault", + "EventCode": "0x0e", + "BriefDescription": "Floating Point Dispatch Faults. YMM fill fault.", + "UMask": "0x04" + }, + { + "EventName": "fp_disp_faults.xmm_fill_fault", + "EventCode": "0x0e", + "BriefDescription": "Floating Point Dispatch Faults. XMM fill fault.", + "UMask": "0x02" + }, + { + "EventName": "fp_disp_faults.x87_fill_fault", + "EventCode": "0x0e", + "BriefDescription": "Floating Point Dispatch Faults. x87 fill fault.", + "UMask": "0x01" + } +] diff --git a/tools/perf/pmu-events/arch/x86/amdzen3/memory.json b/tools/perf/pmu-events/arch/x86/amdzen3/memory.json new file mode 100644 index 000000000000..a2833955dcd2 --- /dev/null +++ b/tools/perf/pmu-events/arch/x86/amdzen3/memory.json @@ -0,0 +1,428 @@ +[ + { + "EventName": "ls_bad_status2.stli_other", + "EventCode": "0x24", + "BriefDescription": "Non-forwardable conflict; used to reduce STLI's via software. All reasons. Store To Load Interlock (STLI) are loads that were unable to complete because of a possible match with an older store, and the older store could not do STLF for some reason.", + "PublicDescription" : "Store-to-load conflicts: A load was unable to complete due to a non-forwardable conflict with an older store. Most commonly, a load's address range partially but not completely overlaps with an uncompleted older store. Software can avoid this problem by using same-size and same-alignment loads and stores when accessing the same data. Vector/SIMD code is particularly susceptible to this problem; software should construct wide vector stores by manipulating vector elements in registers using shuffle/blend/swap instructions prior to storing to memory, instead of using narrow element-by-element stores.", + "UMask": "0x02" + }, + { + "EventName": "ls_locks.spec_lock_hi_spec", + "EventCode": "0x25", + "BriefDescription": "Retired lock instructions. High speculative cacheable lock speculation succeeded.", + "UMask": "0x08" + }, + { + "EventName": "ls_locks.spec_lock_lo_spec", + "EventCode": "0x25", + "BriefDescription": "Retired lock instructions. Low speculative cacheable lock speculation succeeded.", + "UMask": "0x04" + }, + { + "EventName": "ls_locks.non_spec_lock", + "EventCode": "0x25", + "BriefDescription": "Retired lock instructions. Non-speculative lock succeeded.", + "UMask": "0x02" + }, + { + "EventName": "ls_locks.bus_lock", + "EventCode": "0x25", + "BriefDescription": "Retired lock instructions. Comparable to legacy bus lock.", + "UMask": "0x01" + }, + { + "EventName": "ls_ret_cl_flush", + "EventCode": "0x26", + "BriefDescription": "The number of retired CLFLUSH instructions. This is a non-speculative event." + }, + { + "EventName": "ls_ret_cpuid", + "EventCode": "0x27", + "BriefDescription": "The number of CPUID instructions retired." + }, + { + "EventName": "ls_dispatch.ld_st_dispatch", + "EventCode": "0x29", + "BriefDescription": "Load-op-Store Dispatch. Dispatch of a single op that performs a load from and store to the same memory address. Counts the number of operations dispatched to the LS unit. Unit Masks ADDed.", + "UMask": "0x04" + }, + { + "EventName": "ls_dispatch.store_dispatch", + "EventCode": "0x29", + "BriefDescription": "Dispatch of a single op that performs a memory store. Counts the number of operations dispatched to the LS unit. Unit Masks ADDed.", + "UMask": "0x02" + }, + { + "EventName": "ls_dispatch.ld_dispatch", + "EventCode": "0x29", + "BriefDescription": "Dispatch of a single op that performs a memory load. Counts the number of operations dispatched to the LS unit. Unit Masks ADDed.", + "UMask": "0x01" + }, + { + "EventName": "ls_smi_rx", + "EventCode": "0x2b", + "BriefDescription": "Counts the number of SMIs received." + }, + { + "EventName": "ls_int_taken", + "EventCode": "0x2c", + "BriefDescription": "Counts the number of interrupts taken." + }, + { + "EventName": "ls_rdtsc", + "EventCode": "0x2d", + "BriefDescription": "Number of reads of the TSC (RDTSC instructions). The count is speculative." + }, + { + "EventName": "ls_stlf", + "EventCode": "0x35", + "BriefDescription": "Number of STLF hits." + }, + { + "EventName": "ls_st_commit_cancel2.st_commit_cancel_wcb_full", + "EventCode": "0x37", + "BriefDescription": "A non-cacheable store and the non-cacheable commit buffer is full.", + "UMask": "0x01" + }, + { + "EventName": "ls_dc_accesses", + "EventCode": "0x40", + "BriefDescription": "Number of accesses to the dcache for load/store references.", + "PublicDescription": "The number of accesses to the data cache for load and store references. This may include certain microcode scratchpad accesses, although these are generally rare. Each increment represents an eight-byte access, although the instruction may only be accessing a portion of that. This event is a speculative event." + }, + { + "EventName": "ls_mab_alloc.all_allocations", + "EventCode": "0x41", + "BriefDescription": "All Allocations. Counts when a LS pipe allocates a MAB entry.", + "UMask": "0x7f" + }, + { + "EventName": "ls_mab_alloc.hardware_prefetcher_allocations", + "EventCode": "0x41", + "BriefDescription": "Hardware Prefetcher Allocations. Counts when a LS pipe allocates a MAB entry.", + "UMask": "0x40" + }, + { + "EventName": "ls_mab_alloc.load_store_allocations", + "EventCode": "0x41", + "BriefDescription": "Load Store Allocations. Counts when a LS pipe allocates a MAB entry.", + "UMask": "0x3f" + }, + { + "EventName": "ls_mab_alloc.dc_prefetcher", + "EventCode": "0x41", + "BriefDescription": "LS MAB Allocates by Type. DC prefetcher.", + "UMask": "0x08" + }, + { + "EventName": "ls_mab_alloc.stores", + "EventCode": "0x41", + "BriefDescription": "LS MAB Allocates by Type. Stores.", + "UMask": "0x02" + }, + { + "EventName": "ls_mab_alloc.loads", + "EventCode": "0x41", + "BriefDescription": "LS MAB Allocates by Type. Loads.", + "UMask": "0x01" + }, + { + "EventName": "ls_dmnd_fills_from_sys.mem_io_remote", + "EventCode": "0x43", + "BriefDescription": "Demand Data Cache Fills by Data Source. From DRAM or IO connected in different Node.", + "UMask": "0x40" + }, + { + "EventName": "ls_dmnd_fills_from_sys.ext_cache_remote", + "EventCode": "0x43", + "BriefDescription": "Demand Data Cache Fills by Data Source. From CCX Cache in different Node.", + "UMask": "0x10" + }, + { + "EventName": "ls_dmnd_fills_from_sys.mem_io_local", + "EventCode": "0x43", + "BriefDescription": "Demand Data Cache Fills by Data Source. From DRAM or IO connected in same node.", + "UMask": "0x08" + }, + { + "EventName": "ls_dmnd_fills_from_sys.ext_cache_local", + "EventCode": "0x43", + "BriefDescription": "Demand Data Cache Fills by Data Source. From cache of different CCX in same node.", + "UMask": "0x04" + }, + { + "EventName": "ls_dmnd_fills_from_sys.int_cache", + "EventCode": "0x43", + "BriefDescription": "Demand Data Cache Fills by Data Source. From L3 or different L2 in same CCX.", + "UMask": "0x02" + }, + { + "EventName": "ls_dmnd_fills_from_sys.lcl_l2", + "EventCode": "0x43", + "BriefDescription": "Demand Data Cache Fills by Data Source. From Local L2 to the core.", + "UMask": "0x01" + }, + { + "EventName": "ls_any_fills_from_sys.mem_io_remote", + "EventCode": "0x44", + "BriefDescription": "Any Data Cache Fills by Data Source. From DRAM or IO connected in different Node.", + "UMask": "0x40" + }, + { + "EventName": "ls_any_fills_from_sys.ext_cache_remote", + "EventCode": "0x44", + "BriefDescription": "Any Data Cache Fills by Data Source. From CCX Cache in different Node.", + "UMask": "0x10" + }, + { + "EventName": "ls_any_fills_from_sys.mem_io_local", + "EventCode": "0x44", + "BriefDescription": "Any Data Cache Fills by Data Source. From DRAM or IO connected in same node.", + "UMask": "0x08" + }, + { + "EventName": "ls_any_fills_from_sys.ext_cache_local", + "EventCode": "0x44", + "BriefDescription": "Any Data Cache Fills by Data Source. From cache of different CCX in same node.", + "UMask": "0x04" + }, + { + "EventName": "ls_any_fills_from_sys.int_cache", + "EventCode": "0x44", + "BriefDescription": "Any Data Cache Fills by Data Source. From L3 or different L2 in same CCX.", + "UMask": "0x02" + }, + { + "EventName": "ls_any_fills_from_sys.lcl_l2", + "EventCode": "0x44", + "BriefDescription": "Any Data Cache Fills by Data Source. From Local L2 to the core.", + "UMask": "0x01" + }, + { + "EventName": "ls_l1_d_tlb_miss.all", + "EventCode": "0x45", + "BriefDescription": "All L1 DTLB Misses or Reloads. Use l1_dtlb_misses instead.", + "UMask": "0xff" + }, + { + "EventName": "ls_l1_d_tlb_miss.tlb_reload_1g_l2_miss", + "EventCode": "0x45", + "BriefDescription": "L1 DTLB Miss. DTLB reload to a 1G page that also missed in the L2 TLB.", + "UMask": "0x80" + }, + { + "EventName": "ls_l1_d_tlb_miss.tlb_reload_2m_l2_miss", + "EventCode": "0x45", + "BriefDescription": "L1 DTLB Miss. DTLB reload to a 2M page that also missed in the L2 TLB.", + "UMask": "0x40" + }, + { + "EventName": "ls_l1_d_tlb_miss.tlb_reload_coalesced_page_miss", + "EventCode": "0x45", + "BriefDescription": "L1 DTLB Miss. DTLB reload coalesced page that also missed in the L2 TLB.", + "UMask": "0x20" + }, + { + "EventName": "ls_l1_d_tlb_miss.tlb_reload_4k_l2_miss", + "EventCode": "0x45", + "BriefDescription": "L1 DTLB Miss. DTLB reload to a 4K page that missed the L2 TLB.", + "UMask": "0x10" + }, + { + "EventName": "ls_l1_d_tlb_miss.tlb_reload_1g_l2_hit", + "EventCode": "0x45", + "BriefDescription": "L1 DTLB Miss. DTLB reload to a 1G page that hit in the L2 TLB.", + "UMask": "0x08" + }, + { + "EventName": "ls_l1_d_tlb_miss.tlb_reload_2m_l2_hit", + "EventCode": "0x45", + "BriefDescription": "L1 DTLB Miss. DTLB reload to a 2M page that hit in the L2 TLB.", + "UMask": "0x04" + }, + { + "EventName": "ls_l1_d_tlb_miss.tlb_reload_coalesced_page_hit", + "EventCode": "0x45", + "BriefDescription": "L1 DTLB Miss. DTLB reload to a coalesced page that hit in the L2 TLB.", + "UMask": "0x02" + }, + { + "EventName": "ls_l1_d_tlb_miss.tlb_reload_4k_l2_hit", + "EventCode": "0x45", + "BriefDescription": "L1 DTLB Miss. DTLB reload to a 4K page that hit in the L2 TLB.", + "UMask": "0x01" + }, + { + "EventName": "ls_tablewalker.iside", + "EventCode": "0x46", + "BriefDescription": "Total Page Table Walks on I-side.", + "UMask": "0x0c" + }, + { + "EventName": "ls_tablewalker.ic_type1", + "EventCode": "0x46", + "BriefDescription": "Total Page Table Walks IC Type 1.", + "UMask": "0x08" + }, + { + "EventName": "ls_tablewalker.ic_type0", + "EventCode": "0x46", + "BriefDescription": "Total Page Table Walks IC Type 0.", + "UMask": "0x04" + }, + { + "EventName": "ls_tablewalker.dside", + "EventCode": "0x46", + "BriefDescription": "Total Page Table Walks on D-side.", + "UMask": "0x03" + }, + { + "EventName": "ls_tablewalker.dc_type1", + "EventCode": "0x46", + "BriefDescription": "Total Page Table Walks DC Type 1.", + "UMask": "0x02" + }, + { + "EventName": "ls_tablewalker.dc_type0", + "EventCode": "0x46", + "BriefDescription": "Total Page Table Walks DC Type 0.", + "UMask": "0x01" + }, + { + "EventName": "ls_misal_loads.ma4k", + "EventCode": "0x47", + "BriefDescription": "The number of 4KB misaligned (i.e., page crossing) loads.", + "UMask": "0x02" + }, + { + "EventName": "ls_misal_loads.ma64", + "EventCode": "0x47", + "BriefDescription": "The number of 64B misaligned (i.e., cacheline crossing) loads.", + "UMask": "0x01" + }, + { + "EventName": "ls_pref_instr_disp", + "EventCode": "0x4b", + "BriefDescription": "Software Prefetch Instructions Dispatched (Speculative).", + "UMask": "0xff" + }, + { + "EventName": "ls_pref_instr_disp.prefetch_nta", + "EventCode": "0x4b", + "BriefDescription": "Software Prefetch Instructions Dispatched (Speculative). PrefetchNTA instruction. See docAPM3 PREFETCHlevel.", + "UMask": "0x04" + }, + { + "EventName": "ls_pref_instr_disp.prefetch_w", + "EventCode": "0x4b", + "BriefDescription": "Software Prefetch Instructions Dispatched (Speculative). PrefetchW instruction. See docAPM3 PREFETCHW.", + "UMask": "0x02" + }, + { + "EventName": "ls_pref_instr_disp.prefetch", + "EventCode": "0x4b", + "BriefDescription": "Software Prefetch Instructions Dispatched (Speculative). PrefetchT0, T1 and T2 instructions. See docAPM3 PREFETCHlevel.", + "UMask": "0x01" + }, + { + "EventName": "ls_inef_sw_pref.mab_mch_cnt", + "EventCode": "0x52", + "BriefDescription": "The number of software prefetches that did not fetch data outside of the processor core. Software PREFETCH instruction saw a match on an already-allocated miss request buffer.", + "UMask": "0x02" + }, + { + "EventName": "ls_inef_sw_pref.data_pipe_sw_pf_dc_hit", + "EventCode": "0x52", + "BriefDescription": "The number of software prefetches that did not fetch data outside of the processor core. Software PREFETCH instruction saw a DC hit.", + "UMask": "0x01" + }, + { + "EventName": "ls_sw_pf_dc_fills.mem_io_remote", + "EventCode": "0x59", + "BriefDescription": "Software Prefetch Data Cache Fills by Data Source. From DRAM or IO connected in different Node.", + "UMask": "0x40" + }, + { + "EventName": "ls_sw_pf_dc_fills.ext_cache_remote", + "EventCode": "0x59", + "BriefDescription": "Software Prefetch Data Cache Fills by Data Source. From CCX Cache in different Node.", + "UMask": "0x10" + }, + { + "EventName": "ls_sw_pf_dc_fills.mem_io_local", + "EventCode": "0x59", + "BriefDescription": "Software Prefetch Data Cache Fills by Data Source. From DRAM or IO connected in same node.", + "UMask": "0x08" + }, + { + "EventName": "ls_sw_pf_dc_fills.ext_cache_local", + "EventCode": "0x59", + "BriefDescription": "Software Prefetch Data Cache Fills by Data Source. From cache of different CCX in same node.", + "UMask": "0x04" + }, + { + "EventName": "ls_sw_pf_dc_fills.int_cache", + "EventCode": "0x59", + "BriefDescription": "Software Prefetch Data Cache Fills by Data Source. From L3 or different L2 in same CCX.", + "UMask": "0x02" + }, + { + "EventName": "ls_sw_pf_dc_fills.lcl_l2", + "EventCode": "0x59", + "BriefDescription": "Software Prefetch Data Cache Fills by Data Source. From Local L2 to the core.", + "UMask": "0x01" + }, + { + "EventName": "ls_hw_pf_dc_fills.mem_io_remote", + "EventCode": "0x5a", + "BriefDescription": "Hardware Prefetch Data Cache Fills by Data Source. From DRAM or IO connected in different Node.", + "UMask": "0x40" + }, + { + "EventName": "ls_hw_pf_dc_fills.ext_cache_remote", + "EventCode": "0x5a", + "BriefDescription": "Hardware Prefetch Data Cache Fills by Data Source. From CCX Cache in different Node.", + "UMask": "0x10" + }, + { + "EventName": "ls_hw_pf_dc_fills.mem_io_local", + "EventCode": "0x5a", + "BriefDescription": "Hardware Prefetch Data Cache Fills by Data Source. From DRAM or IO connected in same node.", + "UMask": "0x08" + }, + { + "EventName": "ls_hw_pf_dc_fills.ext_cache_local", + "EventCode": "0x5a", + "BriefDescription": "Hardware Prefetch Data Cache Fills by Data Source. From cache of different CCX in same node.", + "UMask": "0x04" + }, + { + "EventName": "ls_hw_pf_dc_fills.int_cache", + "EventCode": "0x5a", + "BriefDescription": "Hardware Prefetch Data Cache Fills by Data Source. From L3 or different L2 in same CCX.", + "UMask": "0x02" + }, + { + "EventName": "ls_hw_pf_dc_fills.lcl_l2", + "EventCode": "0x5a", + "BriefDescription": "Hardware Prefetch Data Cache Fills by Data Source. From Local L2 to the core.", + "UMask": "0x01" + }, + { + "EventName": "ls_alloc_mab_count", + "EventCode": "0x5f", + "BriefDescription": "Count of Allocated Mabs", + "PublicDescription": "This event counts the in-flight L1 data cache misses (allocated Miss Address Buffers) divided by 4 and rounded down each cycle unless used with the MergeEvent functionality. If the MergeEvent is used, it counts the exact number of outstanding L1 data cache misses. See 2.1.17.3 [Large Increment per Cycle Events]." + }, + { + "EventName": "ls_not_halted_cyc", + "EventCode": "0x76", + "BriefDescription": "Cycles not in Halt." + }, + { + "EventName": "ls_tlb_flush.all_tlb_flushes", + "EventCode": "0x78", + "BriefDescription": "All TLB Flushes. Requires unit mask 0xFF to engage event for counting. Use all_tlbs_flushed instead", + "UMask": "0xff" + } +] diff --git a/tools/perf/pmu-events/arch/x86/amdzen3/other.json b/tools/perf/pmu-events/arch/x86/amdzen3/other.json new file mode 100644 index 000000000000..7da5d0791ea3 --- /dev/null +++ b/tools/perf/pmu-events/arch/x86/amdzen3/other.json @@ -0,0 +1,103 @@ +[ + { + "EventName": "de_dis_uop_queue_empty_di0", + "EventCode": "0xa9", + "BriefDescription": "Cycles where the Micro-Op Queue is empty." + }, + { + "EventName": "de_dis_cops_from_decoder.disp_op_type.any_integer_dispatch", + "EventCode": "0xab", + "BriefDescription": "Any Integer dispatch. Types of Oops Dispatched from Decoder.", + "UMask": "0x08" + }, + { + "EventName": "de_dis_cops_from_decoder.disp_op_type.any_fp_dispatch", + "EventCode": "0xab", + "BriefDescription": "Any FP dispatch. Types of Oops Dispatched from Decoder.", + "UMask": "0x04" + }, + { + "EventName": "de_dis_dispatch_token_stalls1.fp_flush_recovery_stall", + "EventCode": "0xae", + "BriefDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a Token Stall. Also counts cycles when the thread is not selected to dispatch but would have been stalled due to a Token Stall. FP Flush recovery stall.", + "UMask": "0x80" + }, + { + "EventName": "de_dis_dispatch_token_stalls1.fp_sch_rsrc_stall", + "EventCode": "0xae", + "BriefDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a Token Stall. Also counts cycles when the thread is not selected to dispatch but would have been stalled due to a Token Stall. FP scheduler resource stall. Applies to ops that use the FP scheduler.", + "UMask": "0x40" + }, + { + "EventName": "de_dis_dispatch_token_stalls1.fp_reg_file_rsrc_stall", + "EventCode": "0xae", + "BriefDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a Token Stall. Also counts cycles when the thread is not selected to dispatch but would have been stalled due to a Token Stall. Floating point register file resource stall. Applies to all FP ops that have a destination register.", + "UMask": "0x20" + }, + { + "EventName": "de_dis_dispatch_token_stalls1.taken_brnch_buffer_rsrc", + "EventCode": "0xae", + "BriefDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a Token Stall. Also counts cycles when the thread is not selected to dispatch but would have been stalled due to a Token Stall. Taken branch buffer resource stall.", + "UMask": "0x10" + }, + { + "EventName": "de_dis_dispatch_token_stalls1.int_sched_misc_token_stall", + "EventCode": "0xae", + "BriefDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall. Integer Scheduler miscellaneous resource stall.", + "UMask": "0x08" + }, + { + "EventName": "de_dis_dispatch_token_stalls1.store_queue_rsrc_stall", + "EventCode": "0xae", + "BriefDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a Token Stall. Also counts cycles when the thread is not selected to dispatch but would have been stalled due to a Token Stall. Store Queue resource stall. Applies to all ops with store semantics.", + "UMask": "0x04" + }, + { + "EventName": "de_dis_dispatch_token_stalls1.load_queue_rsrc_stall", + "EventCode": "0xae", + "BriefDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a Token Stall. Also counts cycles when the thread is not selected to dispatch but would have been stalled due to a Token Stall. Load Queue resource stall. Applies to all ops with load semantics.", + "UMask": "0x02" + }, + { + "EventName": "de_dis_dispatch_token_stalls1.int_phy_reg_file_rsrc_stall", + "EventCode": "0xae", + "BriefDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a Token Stall. Also counts cycles when the thread is not selected to dispatch but would have been stalled due to a Token Stall. Integer Physical Register File resource stall. Integer Physical Register File, applies to all ops that have an integer destination register.", + "UMask": "0x01" + }, + { + "EventName": "de_dis_dispatch_token_stalls2.retire_token_stall", + "EventCode": "0xaf", + "BriefDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall. Insufficient Retire Queue tokens available.", + "UMask": "0x20" + }, + { + "EventName": "de_dis_dispatch_token_stalls2.agsq_token_stall", + "EventCode": "0xaf", + "BriefDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall. AGSQ Tokens unavailable.", + "UMask": "0x10" + }, + { + "EventName": "de_dis_dispatch_token_stalls2.int_sch3_token_stall", + "EventCode": "0xaf", + "BriefDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall. No tokens for Integer Scheduler Queue 3 available.", + "UMask": "0x08" + }, + { + "EventName": "de_dis_dispatch_token_stalls2.int_sch2_token_stall", + "EventCode": "0xaf", + "BriefDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall. No tokens for Integer Scheduler Queue 2 available.", + "UMask": "0x04" + }, + { + "EventName": "de_dis_dispatch_token_stalls2.int_sch1_token_stall", + "EventCode": "0xaf", + "BriefDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall. No tokens for Integer Scheduler Queue 1 available.", + "UMask": "0x02" + }, + { + "EventName": "de_dis_dispatch_token_stalls2.int_sch0_token_stall", + "EventCode": "0xaf", + "BriefDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall. No tokens for Integer Scheduler Queue 0 available.", + "UMask": "0x01" + } +] diff --git a/tools/perf/pmu-events/arch/x86/amdzen3/recommended.json b/tools/perf/pmu-events/arch/x86/amdzen3/recommended.json new file mode 100644 index 000000000000..988cf68ae825 --- /dev/null +++ b/tools/perf/pmu-events/arch/x86/amdzen3/recommended.json @@ -0,0 +1,214 @@ +[ + { + "MetricName": "branch_misprediction_ratio", + "BriefDescription": "Execution-Time Branch Misprediction Ratio (Non-Speculative)", + "MetricExpr": "d_ratio(ex_ret_brn_misp, ex_ret_brn)", + "MetricGroup": "branch_prediction", + "ScaleUnit": "100%" + }, + { + "EventName": "all_data_cache_accesses", + "EventCode": "0x29", + "BriefDescription": "All L1 Data Cache Accesses", + "UMask": "0x07" + }, + { + "MetricName": "all_l2_cache_accesses", + "BriefDescription": "All L2 Cache Accesses", + "MetricExpr": "l2_request_g1.all_no_prefetch + l2_pf_hit_l2 + l2_pf_miss_l2_hit_l3 + l2_pf_miss_l2_l3", + "MetricGroup": "l2_cache" + }, + { + "EventName": "l2_cache_accesses_from_ic_misses", + "EventCode": "0x60", + "BriefDescription": "L2 Cache Accesses from L1 Instruction Cache Misses (including prefetch)", + "UMask": "0x10" + }, + { + "EventName": "l2_cache_accesses_from_dc_misses", + "EventCode": "0x60", + "BriefDescription": "L2 Cache Accesses from L1 Data Cache Misses (including prefetch)", + "UMask": "0xe8" + }, + { + "MetricName": "l2_cache_accesses_from_l2_hwpf", + "BriefDescription": "L2 Cache Accesses from L2 HWPF", + "MetricExpr": "l2_pf_hit_l2 + l2_pf_miss_l2_hit_l3 + l2_pf_miss_l2_l3", + "MetricGroup": "l2_cache" + }, + { + "MetricName": "all_l2_cache_misses", + "BriefDescription": "All L2 Cache Misses", + "MetricExpr": "l2_cache_req_stat.ic_dc_miss_in_l2 + l2_pf_miss_l2_hit_l3 + l2_pf_miss_l2_l3", + "MetricGroup": "l2_cache" + }, + { + "EventName": "l2_cache_misses_from_ic_miss", + "EventCode": "0x64", + "BriefDescription": "L2 Cache Misses from L1 Instruction Cache Misses", + "UMask": "0x01" + }, + { + "EventName": "l2_cache_misses_from_dc_misses", + "EventCode": "0x64", + "BriefDescription": "L2 Cache Misses from L1 Data Cache Misses", + "UMask": "0x08" + }, + { + "MetricName": "l2_cache_misses_from_l2_hwpf", + "BriefDescription": "L2 Cache Misses from L2 Cache HWPF", + "MetricExpr": "l2_pf_miss_l2_hit_l3 + l2_pf_miss_l2_l3", + "MetricGroup": "l2_cache" + }, + { + "MetricName": "all_l2_cache_hits", + "BriefDescription": "All L2 Cache Hits", + "MetricExpr": "l2_cache_req_stat.ic_dc_hit_in_l2 + l2_pf_hit_l2", + "MetricGroup": "l2_cache" + }, + { + "EventName": "l2_cache_hits_from_ic_misses", + "EventCode": "0x64", + "BriefDescription": "L2 Cache Hits from L1 Instruction Cache Misses", + "UMask": "0x06" + }, + { + "EventName": "l2_cache_hits_from_dc_misses", + "EventCode": "0x64", + "BriefDescription": "L2 Cache Hits from L1 Data Cache Misses", + "UMask": "0xf0" + }, + { + "EventName": "l2_cache_hits_from_l2_hwpf", + "EventCode": "0x70", + "BriefDescription": "L2 Cache Hits from L2 Cache HWPF", + "UMask": "0xff" + }, + { + "EventName": "l3_cache_accesses", + "EventCode": "0x04", + "BriefDescription": "L3 Cache Accesses", + "UMask": "0xff", + "Unit": "L3PMC" + }, + { + "EventName": "l3_misses", + "EventCode": "0x04", + "BriefDescription": "L3 Misses (includes cacheline state change requests)", + "UMask": "0x01", + "Unit": "L3PMC" + }, + { + "MetricName": "l3_read_miss_latency", + "BriefDescription": "Average L3 Read Miss Latency (in core clocks)", + "MetricExpr": "(xi_sys_fill_latency * 16) / xi_ccx_sdp_req1", + "MetricGroup": "l3_cache", + "ScaleUnit": "1core clocks" + }, + { + "MetricName": "op_cache_fetch_miss_ratio", + "BriefDescription": "Op Cache (64B) Fetch Miss Ratio", + "MetricExpr": "d_ratio(op_cache_hit_miss.op_cache_miss, op_cache_hit_miss.all_op_cache_accesses)", + "MetricGroup": "l2_cache" + }, + { + "MetricName": "ic_fetch_miss_ratio", + "BriefDescription": "Instruction Cache (32B) Fetch Miss Ratio", + "MetricExpr": "d_ratio(ic_tag_hit_miss.instruction_cache_miss, ic_tag_hit_miss.all_instruction_cache_accesses)", + "MetricGroup": "l2_cache", + "ScaleUnit": "100%" + }, + { + "EventName": "l1_data_cache_fills_from_memory", + "EventCode": "0x44", + "BriefDescription": "L1 Data Cache Fills: From Memory", + "UMask": "0x48" + }, + { + "EventName": "l1_data_cache_fills_from_remote_node", + "EventCode": "0x44", + "BriefDescription": "L1 Data Cache Fills: From Remote Node", + "UMask": "0x50" + }, + { + "EventName": "l1_data_cache_fills_from_within_same_ccx", + "EventCode": "0x44", + "BriefDescription": "L1 Data Cache Fills: From within same CCX", + "UMask": "0x03" + }, + { + "EventName": "l1_data_cache_fills_from_external_ccx_cache", + "EventCode": "0x44", + "BriefDescription": "L1 Data Cache Fills: From External CCX Cache", + "UMask": "0x14" + }, + { + "EventName": "l1_data_cache_fills_all", + "EventCode": "0x44", + "BriefDescription": "L1 Data Cache Fills: All", + "UMask": "0xff" + }, + { + "MetricName": "l1_itlb_misses", + "BriefDescription": "L1 ITLB Misses", + "MetricExpr": "bp_l1_tlb_miss_l2_tlb_hit + bp_l1_tlb_miss_l2_tlb_miss", + "MetricGroup": "tlb" + }, + { + "EventName": "l2_itlb_misses", + "EventCode": "0x85", + "BriefDescription": "L2 ITLB Misses & Instruction page walks", + "UMask": "0x07" + }, + { + "EventName": "l1_dtlb_misses", + "EventCode": "0x45", + "BriefDescription": "L1 DTLB Misses", + "UMask": "0xff" + }, + { + "EventName": "l2_dtlb_misses", + "EventCode": "0x45", + "BriefDescription": "L2 DTLB Misses & Data page walks", + "UMask": "0xf0" + }, + { + "EventName": "all_tlbs_flushed", + "EventCode": "0x78", + "BriefDescription": "All TLBs Flushed", + "UMask": "0xff" + }, + { + "MetricName": "macro_ops_dispatched", + "BriefDescription": "Macro-ops Dispatched", + "MetricExpr": "de_dis_cops_from_decoder.disp_op_type.any_integer_dispatch + de_dis_cops_from_decoder.disp_op_type.any_fp_dispatch", + "MetricGroup": "decoder" + }, + { + "EventName": "sse_avx_stalls", + "EventCode": "0x0e", + "BriefDescription": "Mixed SSE/AVX Stalls", + "UMask": "0x0e" + }, + { + "EventName": "macro_ops_retired", + "EventCode": "0xc1", + "BriefDescription": "Macro-ops Retired" + }, + { + "MetricName": "all_remote_links_outbound", + "BriefDescription": "Approximate: Outbound data bytes for all Remote Links for a node (die)", + "MetricExpr": "remote_outbound_data_controller_0 + remote_outbound_data_controller_1 + remote_outbound_data_controller_2 + remote_outbound_data_controller_3", + "MetricGroup": "data_fabric", + "PerPkg": "1", + "ScaleUnit": "3e-5MiB" + }, + { + "MetricName": "nps1_die_to_dram", + "BriefDescription": "Approximate: Combined DRAM B/bytes of all channels on a NPS1 node (die) (may need --metric-no-group)", + "MetricExpr": "dram_channel_data_controller_0 + dram_channel_data_controller_1 + dram_channel_data_controller_2 + dram_channel_data_controller_3 + dram_channel_data_controller_4 + dram_channel_data_controller_5 + dram_channel_data_controller_6 + dram_channel_data_controller_7", + "MetricGroup": "data_fabric", + "PerPkg": "1", + "ScaleUnit": "6.1e-5MiB" + } +] diff --git a/tools/perf/pmu-events/arch/x86/mapfile.csv b/tools/perf/pmu-events/arch/x86/mapfile.csv index 2f2a209e87e1..9f97b32533a0 100644 --- a/tools/perf/pmu-events/arch/x86/mapfile.csv +++ b/tools/perf/pmu-events/arch/x86/mapfile.csv @@ -38,4 +38,4 @@ GenuineIntel-6-7E,v1,icelake,core GenuineIntel-6-86,v1,tremontx,core AuthenticAMD-23-([12][0-9A-F]|[0-9A-F]),v2,amdzen1,core AuthenticAMD-23-[[:xdigit:]]+,v1,amdzen2,core -AuthenticAMD-25-[[:xdigit:]]+,v1,amdzen2,core +AuthenticAMD-25-[[:xdigit:]]+,v1,amdzen3,core -- cgit v1.2.3 From 5676dba708bbb1fc94a9d3b2e9c114db9e4c6699 Mon Sep 17 00:00:00 2001 From: Yang Jihong Date: Fri, 19 Mar 2021 20:35:27 +0800 Subject: perf annotate: Fix sample events lost in stdio mode In hist__find_annotations(), since different 'struct hist_entry' entries may point to same symbol, we free notes->src to signal already processed this symbol in stdio mode; when annotate, entry will skipped if notes->src is NULL to avoid repeated output. However, there is a problem, for example, run the following command: # perf record -e branch-misses -e branch-instructions -a sleep 1 perf.data file contains different types of sample event. If the same IP sample event exists in branch-misses and branch-instructions, this event uses the same symbol. When annotate branch-misses events, notes->src corresponding to this event is set to null, as a result, when annotate branch-instructions events, this event is skipped and no annotate is output. Solution of this patch is to remove zfree in hists__find_annotations and change sort order to "dso,symbol" to avoid duplicate output when different processes correspond to the same symbol. Signed-off-by: Yang Jihong Acked-by: Namhyung Kim Cc: Alexander Shishkin Cc: Gustavo A. R. Silva Cc: Jin Yao Cc: Jiri Olsa Cc: Mark Rutland Cc: Martin Liška Cc: Peter Zijlstra Cc: zhangjinhao2@huawei.com Link: http://lore.kernel.org/lkml/20210319123527.173883-1-yangjihong1@huawei.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-annotate.c | 29 +++++++++++++++-------------- 1 file changed, 15 insertions(+), 14 deletions(-) diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c index 021d974c978e..524e6f0dff22 100644 --- a/tools/perf/builtin-annotate.c +++ b/tools/perf/builtin-annotate.c @@ -374,13 +374,6 @@ find_next: } else { hist_entry__tty_annotate(he, evsel, ann); nd = rb_next(nd); - /* - * Since we have a hist_entry per IP for the same - * symbol, free he->ms.sym->src to signal we already - * processed this symbol. - */ - zfree(¬es->src->cycles_hist); - zfree(¬es->src); } } } @@ -623,14 +616,22 @@ int cmd_annotate(int argc, const char **argv) setup_browser(true); - if ((use_browser == 1 || annotate.use_stdio2) && annotate.has_br_stack) { + /* + * Events of different processes may correspond to the same + * symbol, we do not care about the processes in annotate, + * set sort order to avoid repeated output. + */ + sort_order = "dso,symbol"; + + /* + * Set SORT_MODE__BRANCH so that annotate display IPC/Cycle + * if branch info is in perf data in TUI mode. + */ + if ((use_browser == 1 || annotate.use_stdio2) && annotate.has_br_stack) sort__mode = SORT_MODE__BRANCH; - if (setup_sorting(annotate.session->evlist) < 0) - usage_with_options(annotate_usage, options); - } else { - if (setup_sorting(NULL) < 0) - usage_with_options(annotate_usage, options); - } + + if (setup_sorting(NULL) < 0) + usage_with_options(annotate_usage, options); ret = __cmd_annotate(&annotate); -- cgit v1.2.3 From 9865ea8ab31f2e56be59125099ee251ce573f293 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 14 Apr 2021 10:08:16 -0300 Subject: perf evlist: Add a method to return the list of evsels as a string Add a 'scnprintf' method to obtain the list of evsels in a evlist as a string, excluding the "dummy" event used for things like receiving metadata events (PERF_RECORD_FORK, MMAP, etc) when synthesizing preexisting threads. Will be used to improve the error message for workload failure in 'perf record. Acked-by: Ian Rogers Cc: Adrian Hunter Cc: Jiri Olsa Cc: Namhyung Kim Link: http://lore.kernel.org/lkml/20210414131628.2064862-2-acme@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/evlist.c | 19 +++++++++++++++++++ tools/perf/util/evlist.h | 2 ++ 2 files changed, 21 insertions(+) diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index f1c79ecf8107..d29a8a118973 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -2138,3 +2138,22 @@ struct evsel *evlist__find_evsel(struct evlist *evlist, int idx) } return NULL; } + +int evlist__scnprintf_evsels(struct evlist *evlist, size_t size, char *bf) +{ + struct evsel *evsel; + int printed = 0; + + evlist__for_each_entry(evlist, evsel) { + if (evsel__is_dummy_event(evsel)) + continue; + if (size > (strlen(evsel__name(evsel)) + (printed ? 2 : 1))) { + printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "," : "", evsel__name(evsel)); + } else { + printed += scnprintf(bf + printed, size - printed, "%s...", printed ? "," : ""); + break; + } + } + + return printed; +} diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index b695ffaae519..a8b97b50cceb 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -365,4 +365,6 @@ int evlist__ctlfd_ack(struct evlist *evlist); #define EVLIST_DISABLED_MSG "Events disabled\n" struct evsel *evlist__find_evsel(struct evlist *evlist, int idx); + +int evlist__scnprintf_evsels(struct evlist *evlist, size_t size, char *bf); #endif /* __PERF_EVLIST_H */ -- cgit v1.2.3 From 3535a6967c0d590381c16d6676c6fdfa60f4d733 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 14 Apr 2021 09:32:14 -0300 Subject: perf record: Improve 'Workload failed' message printing events + what was exec'ed Before: # perf record -a cycles,instructions,cache-misses Workload failed: No such file or directory # After: # perf record -a cycles,instructions,cache-misses Failed to collect 'cycles' for the 'cycles,instructions,cache-misses' workload: No such file or directory # Helps disambiguating other error scenarios: # perf record -a -e cycles,instructions,cache-misses bla Failed to collect 'cycles,instructions,cache-misses' for the 'bla' workload: No such file or directory # perf record -a cycles,instructions,cache-misses sleep 1 Failed to collect 'cycles' for the 'cycles,instructions,cache-misses' workload: No such file or directory # When all goes well we're back to the usual: # perf record -a -e cycles,instructions,cache-misses sleep 1 [ perf record: Woken up 3 times to write data ] [ perf record: Captured and wrote 3.151 MB perf.data (21242 samples) ] # Acked-by: Ian Rogers Cc: Adrian Hunter Cc: Jiri Olsa Cc: Namhyung Kim Link: http://lore.kernel.org/lkml/20210414131628.2064862-3-acme@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-record.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 35465d1db6dd..5fb9665a2ec2 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -1977,9 +1977,13 @@ static int __cmd_record(struct record *rec, int argc, const char **argv) record__auxtrace_snapshot_exit(rec); if (forks && workload_exec_errno) { - char msg[STRERR_BUFSIZE]; + char msg[STRERR_BUFSIZE], strevsels[2048]; const char *emsg = str_error_r(workload_exec_errno, msg, sizeof(msg)); - pr_err("Workload failed: %s\n", emsg); + + evlist__scnprintf_evsels(rec->evlist, sizeof(strevsels), strevsels); + + pr_err("Failed to collect '%s' for the '%s' workload: %s\n", + strevsels, argv[0], emsg); err = -1; goto out_child; } -- cgit v1.2.3 From 2e1daee14e67fbf9b27280b974e2c680a22cabea Mon Sep 17 00:00:00 2001 From: Vitaly Chikunov Date: Wed, 14 Apr 2021 21:27:23 +0300 Subject: perf beauty: Fix fsconfig generator After gnulib update sed stopped matching `[[:space:]]*+' as before, causing the following compilation error: In file included from builtin-trace.c:719: trace/beauty/generated/fsconfig_arrays.c:2:3: error: expected expression before ']' token 2 | [] = "", | ^ trace/beauty/generated/fsconfig_arrays.c:2:3: error: array index in initializer not of integer type trace/beauty/generated/fsconfig_arrays.c:2:3: note: (near initialization for 'fsconfig_cmds') Fix this by correcting the regular expression used in the generator. Also, clean up the script by removing redundant egrep, xargs, and printf invocations. Committer testing: Continues to work: $ cat tools/perf/trace/beauty/fsconfig.sh #!/bin/sh # SPDX-License-Identifier: LGPL-2.1 if [ $# -ne 1 ] ; then linux_header_dir=tools/include/uapi/linux else linux_header_dir=$1 fi linux_mount=${linux_header_dir}/mount.h printf "static const char *fsconfig_cmds[] = {\n" ms='[[:space:]]*' sed -nr "s/^${ms}FSCONFIG_([[:alnum:]_]+)${ms}=${ms}([[:digit:]]+)${ms},.*/\t[\2] = \"\1\",/p" \ ${linux_mount} printf "};\n" $ tools/perf/trace/beauty/fsconfig.sh static const char *fsconfig_cmds[] = { [0] = "SET_FLAG", [1] = "SET_STRING", [2] = "SET_BINARY", [3] = "SET_PATH", [4] = "SET_PATH_EMPTY", [5] = "SET_FD", [6] = "CMD_CREATE", [7] = "CMD_RECONFIGURE", }; $ Fixes: d35293004a5e4 ("perf beauty: Add generator for fsconfig's 'cmd' arg values") Signed-off-by: Vitaly Chikunov Co-authored-by: Dmitry V. Levin Tested-by: Arnaldo Carvalho de Melo Link: http://lore.kernel.org/lkml/20210414182723.1670663-1-vt@altlinux.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/trace/beauty/fsconfig.sh | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/tools/perf/trace/beauty/fsconfig.sh b/tools/perf/trace/beauty/fsconfig.sh index 83fb24df05c9..bc6ef7bb7a5f 100755 --- a/tools/perf/trace/beauty/fsconfig.sh +++ b/tools/perf/trace/beauty/fsconfig.sh @@ -10,8 +10,7 @@ fi linux_mount=${linux_header_dir}/mount.h printf "static const char *fsconfig_cmds[] = {\n" -regex='^[[:space:]]*+FSCONFIG_([[:alnum:]_]+)[[:space:]]*=[[:space:]]*([[:digit:]]+)[[:space:]]*,[[:space:]]*.*' -egrep $regex ${linux_mount} | \ - sed -r "s/$regex/\2 \1/g" | \ - xargs printf "\t[%s] = \"%s\",\n" +ms='[[:space:]]*' +sed -nr "s/^${ms}FSCONFIG_([[:alnum:]_]+)${ms}=${ms}([[:digit:]]+)${ms},.*/\t[\2] = \"\1\",/p" \ + ${linux_mount} printf "};\n" -- cgit v1.2.3 From 2fc83c2cd77703cfcfc1ffaa092614fb1f837292 Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Wed, 14 Apr 2021 10:54:09 -0500 Subject: tools include: Add an initial math64.h Add an initial math64.h similar to linux/math64.h with functions mul_u64_u64_div64() and mul_u64_u32_shr(). This isn't a direct copy of include/linux/math64.h as that doesn't define mul_u64_u64_div64(). Implementation was written by Peter Zilkstra based on linux/math64.h and div64.h[1]. The original implementation was not optimal on arm64 as __int128 division is not optimal with a call out to __udivti3, so I dropped the __int128 variant of mul_u64_u64_div64(). [1] https://lore.kernel.org/lkml/20200322101848.GF2452@worktop.programming.kicks-ass.net/ Signed-off-by: Rob Herring Acked-by: Jiri Olsa Acked-by: Namhyung Kim Cc: Catalin Marinas Cc: Itaru Kitayama Cc: Mark Rutland Cc: Peter Zijlstra Cc: Will Deacon Link: http://lore.kernel.org/lkml/20210414155412.3697605-2-robh@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/linux/math64.h | 75 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 75 insertions(+) create mode 100644 tools/include/linux/math64.h diff --git a/tools/include/linux/math64.h b/tools/include/linux/math64.h new file mode 100644 index 000000000000..4ad45d5943dc --- /dev/null +++ b/tools/include/linux/math64.h @@ -0,0 +1,75 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_MATH64_H +#define _LINUX_MATH64_H + +#include + +#ifdef __x86_64__ +static inline u64 mul_u64_u64_div64(u64 a, u64 b, u64 c) +{ + u64 q; + + asm ("mulq %2; divq %3" : "=a" (q) + : "a" (a), "rm" (b), "rm" (c) + : "rdx"); + + return q; +} +#define mul_u64_u64_div64 mul_u64_u64_div64 +#endif + +#ifdef __SIZEOF_INT128__ +static inline u64 mul_u64_u32_shr(u64 a, u32 b, unsigned int shift) +{ + return (u64)(((unsigned __int128)a * b) >> shift); +} + +#else + +#ifdef __i386__ +static inline u64 mul_u32_u32(u32 a, u32 b) +{ + u32 high, low; + + asm ("mull %[b]" : "=a" (low), "=d" (high) + : [a] "a" (a), [b] "rm" (b) ); + + return low | ((u64)high) << 32; +} +#else +static inline u64 mul_u32_u32(u32 a, u32 b) +{ + return (u64)a * b; +} +#endif + +static inline u64 mul_u64_u32_shr(u64 a, u32 b, unsigned int shift) +{ + u32 ah, al; + u64 ret; + + al = a; + ah = a >> 32; + + ret = mul_u32_u32(al, b) >> shift; + if (ah) + ret += mul_u32_u32(ah, b) << (32 - shift); + + return ret; +} + +#endif /* __SIZEOF_INT128__ */ + +#ifndef mul_u64_u64_div64 +static inline u64 mul_u64_u64_div64(u64 a, u64 b, u64 c) +{ + u64 quot, rem; + + quot = a / c; + rem = a % c; + + return quot * b + (rem * b) / c; +} +#endif + +#endif /* _LINUX_MATH64_H */ -- cgit v1.2.3 From 6cd70754f262e593febc06a02d7ea637c927ea42 Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Wed, 14 Apr 2021 11:07:37 -0500 Subject: libperf: Add evsel mmap support In order to support usersapce access, an event must be mmapped. While there's already mmap support for evlist, the usecase is a bit different than the self monitoring with userspace access. So let's add new perf_evsel__mmap()/perf_evsel_munmap() functions to mmap/munmap an evsel. This allows implementing userspace access as a fastpath for perf_evsel__read(). The mmapped address is returned by perf_evsel__mmap_base() which primarily for users/tests to check if userspace access is enabled. Signed-off-by: Rob Herring Acked-by: Jiri Olsa Acked-by: Namhyung Kim Cc: Catalin Marinas Cc: Itaru Kitayama Cc: Mark Rutland Cc: Peter Zijlstra Cc: Will Deacon Link: http://lore.kernel.org/lkml/20210414155412.3697605-2-robh@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/lib/perf/Documentation/libperf.txt | 3 ++ tools/lib/perf/evsel.c | 76 ++++++++++++++++++++++++++++++++ tools/lib/perf/include/internal/evsel.h | 1 + tools/lib/perf/include/perf/evsel.h | 3 ++ tools/lib/perf/libperf.map | 3 ++ 5 files changed, 86 insertions(+) diff --git a/tools/lib/perf/Documentation/libperf.txt b/tools/lib/perf/Documentation/libperf.txt index 0c74c30ed23a..63ae5e0195ce 100644 --- a/tools/lib/perf/Documentation/libperf.txt +++ b/tools/lib/perf/Documentation/libperf.txt @@ -136,6 +136,9 @@ SYNOPSIS struct perf_thread_map *threads); void perf_evsel__close(struct perf_evsel *evsel); void perf_evsel__close_cpu(struct perf_evsel *evsel, int cpu); + int perf_evsel__mmap(struct perf_evsel *evsel, int pages); + void perf_evsel__munmap(struct perf_evsel *evsel); + void *perf_evsel__mmap_base(struct perf_evsel *evsel, int cpu, int thread); int perf_evsel__read(struct perf_evsel *evsel, int cpu, int thread, struct perf_counts_values *count); int perf_evsel__enable(struct perf_evsel *evsel); diff --git a/tools/lib/perf/evsel.c b/tools/lib/perf/evsel.c index 4dc06289f4c7..cd203998e875 100644 --- a/tools/lib/perf/evsel.c +++ b/tools/lib/perf/evsel.c @@ -11,10 +11,12 @@ #include #include #include +#include #include #include #include #include +#include void perf_evsel__init(struct perf_evsel *evsel, struct perf_event_attr *attr) { @@ -38,6 +40,7 @@ void perf_evsel__delete(struct perf_evsel *evsel) } #define FD(e, x, y) (*(int *) xyarray__entry(e->fd, x, y)) +#define MMAP(e, x, y) (e->mmap ? ((struct perf_mmap *) xyarray__entry(e->mmap, x, y)) : NULL) int perf_evsel__alloc_fd(struct perf_evsel *evsel, int ncpus, int nthreads) { @@ -55,6 +58,13 @@ int perf_evsel__alloc_fd(struct perf_evsel *evsel, int ncpus, int nthreads) return evsel->fd != NULL ? 0 : -ENOMEM; } +static int perf_evsel__alloc_mmap(struct perf_evsel *evsel, int ncpus, int nthreads) +{ + evsel->mmap = xyarray__new(ncpus, nthreads, sizeof(struct perf_mmap)); + + return evsel->mmap != NULL ? 0 : -ENOMEM; +} + static int sys_perf_event_open(struct perf_event_attr *attr, pid_t pid, int cpu, int group_fd, @@ -156,6 +166,72 @@ void perf_evsel__close_cpu(struct perf_evsel *evsel, int cpu) perf_evsel__close_fd_cpu(evsel, cpu); } +void perf_evsel__munmap(struct perf_evsel *evsel) +{ + int cpu, thread; + + if (evsel->fd == NULL || evsel->mmap == NULL) + return; + + for (cpu = 0; cpu < xyarray__max_x(evsel->fd); cpu++) { + for (thread = 0; thread < xyarray__max_y(evsel->fd); thread++) { + int fd = FD(evsel, cpu, thread); + struct perf_mmap *map = MMAP(evsel, cpu, thread); + + if (fd < 0) + continue; + + perf_mmap__munmap(map); + } + } + + xyarray__delete(evsel->mmap); + evsel->mmap = NULL; +} + +int perf_evsel__mmap(struct perf_evsel *evsel, int pages) +{ + int ret, cpu, thread; + struct perf_mmap_param mp = { + .prot = PROT_READ | PROT_WRITE, + .mask = (pages * page_size) - 1, + }; + + if (evsel->fd == NULL || evsel->mmap) + return -EINVAL; + + if (perf_evsel__alloc_mmap(evsel, xyarray__max_x(evsel->fd), xyarray__max_y(evsel->fd)) < 0) + return -ENOMEM; + + for (cpu = 0; cpu < xyarray__max_x(evsel->fd); cpu++) { + for (thread = 0; thread < xyarray__max_y(evsel->fd); thread++) { + int fd = FD(evsel, cpu, thread); + struct perf_mmap *map = MMAP(evsel, cpu, thread); + + if (fd < 0) + continue; + + perf_mmap__init(map, NULL, false, NULL); + + ret = perf_mmap__mmap(map, &mp, fd, cpu); + if (ret) { + perf_evsel__munmap(evsel); + return ret; + } + } + } + + return 0; +} + +void *perf_evsel__mmap_base(struct perf_evsel *evsel, int cpu, int thread) +{ + if (FD(evsel, cpu, thread) < 0 || MMAP(evsel, cpu, thread) == NULL) + return NULL; + + return MMAP(evsel, cpu, thread)->base; +} + int perf_evsel__read_size(struct perf_evsel *evsel) { u64 read_format = evsel->attr.read_format; diff --git a/tools/lib/perf/include/internal/evsel.h b/tools/lib/perf/include/internal/evsel.h index 1ffd083b235e..1c067d088bc6 100644 --- a/tools/lib/perf/include/internal/evsel.h +++ b/tools/lib/perf/include/internal/evsel.h @@ -41,6 +41,7 @@ struct perf_evsel { struct perf_cpu_map *own_cpus; struct perf_thread_map *threads; struct xyarray *fd; + struct xyarray *mmap; struct xyarray *sample_id; u64 *id; u32 ids; diff --git a/tools/lib/perf/include/perf/evsel.h b/tools/lib/perf/include/perf/evsel.h index c82ec39a4ad0..60eae25076d3 100644 --- a/tools/lib/perf/include/perf/evsel.h +++ b/tools/lib/perf/include/perf/evsel.h @@ -27,6 +27,9 @@ LIBPERF_API int perf_evsel__open(struct perf_evsel *evsel, struct perf_cpu_map * struct perf_thread_map *threads); LIBPERF_API void perf_evsel__close(struct perf_evsel *evsel); LIBPERF_API void perf_evsel__close_cpu(struct perf_evsel *evsel, int cpu); +LIBPERF_API int perf_evsel__mmap(struct perf_evsel *evsel, int pages); +LIBPERF_API void perf_evsel__munmap(struct perf_evsel *evsel); +LIBPERF_API void *perf_evsel__mmap_base(struct perf_evsel *evsel, int cpu, int thread); LIBPERF_API int perf_evsel__read(struct perf_evsel *evsel, int cpu, int thread, struct perf_counts_values *count); LIBPERF_API int perf_evsel__enable(struct perf_evsel *evsel); diff --git a/tools/lib/perf/libperf.map b/tools/lib/perf/libperf.map index 7be1af8a546c..c0c7ceb11060 100644 --- a/tools/lib/perf/libperf.map +++ b/tools/lib/perf/libperf.map @@ -23,6 +23,9 @@ LIBPERF_0.0.1 { perf_evsel__disable; perf_evsel__open; perf_evsel__close; + perf_evsel__mmap; + perf_evsel__munmap; + perf_evsel__mmap_base; perf_evsel__read; perf_evsel__cpus; perf_evsel__threads; -- cgit v1.2.3 From d3003d9e686890a1e9f0cc7c08aa02ef2953b1f0 Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Wed, 14 Apr 2021 11:07:38 -0500 Subject: libperf tests: Add support for verbose printing Add __T_VERBOSE() so tests can add verbose output. The verbose output is enabled with the '-v' command line option. Running 'make tests V=1' will enable the '-v' option when running the tests. It'll be used in the next patch, for a user space counter access test. Signed-off-by: Rob Herring Acked-by: Jiri Olsa Acked-by: Namhyung Kim Cc: Catalin Marinas Cc: Itaru Kitayama Cc: Mark Rutland Cc: Peter Zijlstra Cc: Will Deacon Link: http://lore.kernel.org/lkml/20210414155412.3697605-3-robh@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/lib/perf/include/internal/tests.h | 32 ++++++++++++++++++++++++++++++++ tools/lib/perf/tests/Makefile | 6 ++++-- 2 files changed, 36 insertions(+), 2 deletions(-) diff --git a/tools/lib/perf/include/internal/tests.h b/tools/lib/perf/include/internal/tests.h index 2093e8868a67..29425c2dabe1 100644 --- a/tools/lib/perf/include/internal/tests.h +++ b/tools/lib/perf/include/internal/tests.h @@ -3,11 +3,32 @@ #define __LIBPERF_INTERNAL_TESTS_H #include +#include int tests_failed; +int tests_verbose; + +static inline int get_verbose(char **argv, int argc) +{ + int c; + int verbose = 0; + + while ((c = getopt(argc, argv, "v")) != -1) { + switch (c) + { + case 'v': + verbose = 1; + break; + default: + break; + } + } + return verbose; +} #define __T_START \ do { \ + tests_verbose = get_verbose(argv, argc); \ fprintf(stdout, "- running %s...", __FILE__); \ fflush(NULL); \ tests_failed = 0; \ @@ -30,4 +51,15 @@ do { } \ } while (0) +#define __T_VERBOSE(...) \ +do { \ + if (tests_verbose) { \ + if (tests_verbose == 1) { \ + fputc('\n', stderr); \ + tests_verbose++; \ + } \ + fprintf(stderr, ##__VA_ARGS__); \ + } \ +} while (0) + #endif /* __LIBPERF_INTERNAL_TESTS_H */ diff --git a/tools/lib/perf/tests/Makefile b/tools/lib/perf/tests/Makefile index 96841775feaf..b536cc9a26dd 100644 --- a/tools/lib/perf/tests/Makefile +++ b/tools/lib/perf/tests/Makefile @@ -5,6 +5,8 @@ TESTS = test-cpumap test-threadmap test-evlist test-evsel TESTS_SO := $(addsuffix -so,$(TESTS)) TESTS_A := $(addsuffix -a,$(TESTS)) +TEST_ARGS := $(if $(V),-v) + # Set compile option CFLAGS ifdef EXTRA_CFLAGS CFLAGS := $(EXTRA_CFLAGS) @@ -28,9 +30,9 @@ all: $(TESTS_A) $(TESTS_SO) run: @echo "running static:" - @for i in $(TESTS_A); do ./$$i; done + @for i in $(TESTS_A); do ./$$i $(TEST_ARGS); done @echo "running dynamic:" - @for i in $(TESTS_SO); do LD_LIBRARY_PATH=../ ./$$i; done + @for i in $(TESTS_SO); do LD_LIBRARY_PATH=../ ./$$i $(TEST_ARGS); done clean: $(call QUIET_CLEAN, tests)$(RM) $(TESTS_A) $(TESTS_SO) -- cgit v1.2.3 From 47d01e7b9999b9591077a59a1ecec11c6ce570de Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Wed, 14 Apr 2021 11:07:39 -0500 Subject: libperf: Add support for user space counter access x86 and arm64 can both support direct access of event counters in userspace. The access sequence is less than trivial and currently exists in perf test code (tools/perf/arch/x86/tests/rdpmc.c) with copies in projects such as PAPI and libpfm4. In order to support userspace access, an event must be mmapped first with perf_evsel__mmap(). Then subsequent calls to perf_evsel__read() will use the fast path (assuming the arch supports it). Committer notes: Added a '__maybe_unused' attribute to the read_perf_counter() argument to fix the build on arches other than x86_64 and arm. Committer testing: Building and running the libperf tests in verbose mode (V=1) now shows those "loop = N, count = N" extra lines, testing user space counter access. # make V=1 -C tools/lib/perf tests make: Entering directory '/home/acme/git/perf/tools/lib/perf' make -f /home/acme/git/perf/tools/build/Makefile.build dir=. obj=libperf make -C /home/acme/git/perf/tools/lib/api/ O= libapi.a make -f /home/acme/git/perf/tools/build/Makefile.build dir=./fd obj=libapi make -f /home/acme/git/perf/tools/build/Makefile.build dir=./fs obj=libapi make -C tests gcc -I/home/acme/git/perf/tools/lib/perf/include -I/home/acme/git/perf/tools/include -I/home/acme/git/perf/tools/lib -g -Wall -o test-cpumap-a test-cpumap.c ../libperf.a /home/acme/git/perf/tools/lib/api/libapi.a gcc -I/home/acme/git/perf/tools/lib/perf/include -I/home/acme/git/perf/tools/include -I/home/acme/git/perf/tools/lib -g -Wall -o test-threadmap-a test-threadmap.c ../libperf.a /home/acme/git/perf/tools/lib/api/libapi.a gcc -I/home/acme/git/perf/tools/lib/perf/include -I/home/acme/git/perf/tools/include -I/home/acme/git/perf/tools/lib -g -Wall -o test-evlist-a test-evlist.c ../libperf.a /home/acme/git/perf/tools/lib/api/libapi.a gcc -I/home/acme/git/perf/tools/lib/perf/include -I/home/acme/git/perf/tools/include -I/home/acme/git/perf/tools/lib -g -Wall -o test-evsel-a test-evsel.c ../libperf.a /home/acme/git/perf/tools/lib/api/libapi.a gcc -I/home/acme/git/perf/tools/lib/perf/include -I/home/acme/git/perf/tools/include -I/home/acme/git/perf/tools/lib -g -Wall -L.. -o test-cpumap-so test-cpumap.c /home/acme/git/perf/tools/lib/api/libapi.a -lperf gcc -I/home/acme/git/perf/tools/lib/perf/include -I/home/acme/git/perf/tools/include -I/home/acme/git/perf/tools/lib -g -Wall -L.. -o test-threadmap-so test-threadmap.c /home/acme/git/perf/tools/lib/api/libapi.a -lperf gcc -I/home/acme/git/perf/tools/lib/perf/include -I/home/acme/git/perf/tools/include -I/home/acme/git/perf/tools/lib -g -Wall -L.. -o test-evlist-so test-evlist.c /home/acme/git/perf/tools/lib/api/libapi.a -lperf gcc -I/home/acme/git/perf/tools/lib/perf/include -I/home/acme/git/perf/tools/include -I/home/acme/git/perf/tools/lib -g -Wall -L.. -o test-evsel-so test-evsel.c /home/acme/git/perf/tools/lib/api/libapi.a -lperf make -C tests run running static: - running test-cpumap.c...OK - running test-threadmap.c...OK - running test-evlist.c...OK - running test-evsel.c... loop = 65536, count = 333926 loop = 131072, count = 655781 loop = 262144, count = 1311141 loop = 524288, count = 2630126 loop = 1048576, count = 5256955 loop = 65536, count = 524594 loop = 131072, count = 1058916 loop = 262144, count = 2097458 loop = 524288, count = 4205429 loop = 1048576, count = 8406606 OK running dynamic: - running test-cpumap.c...OK - running test-threadmap.c...OK - running test-evlist.c...OK - running test-evsel.c... loop = 65536, count = 328102 loop = 131072, count = 655782 loop = 262144, count = 1317494 loop = 524288, count = 2627851 loop = 1048576, count = 5255187 loop = 65536, count = 524601 loop = 131072, count = 1048923 loop = 262144, count = 2107917 loop = 524288, count = 4194606 loop = 1048576, count = 8409322 OK make: Leaving directory '/home/acme/git/perf/tools/lib/perf' # Signed-off-by: Rob Herring Acked-by: Jiri Olsa Acked-by: Namhyung Kim Tested-by: Arnaldo Carvalho de Melo Cc: Catalin Marinas Cc: Itaru Kitayama Cc: Mark Rutland Cc: Peter Zijlstra Cc: Will Deacon Link: http://lore.kernel.org/lkml/20210414155412.3697605-4-robh@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/lib/perf/evsel.c | 4 ++ tools/lib/perf/include/internal/mmap.h | 3 ++ tools/lib/perf/mmap.c | 88 ++++++++++++++++++++++++++++++++++ tools/lib/perf/tests/test-evsel.c | 66 +++++++++++++++++++++++++ 4 files changed, 161 insertions(+) diff --git a/tools/lib/perf/evsel.c b/tools/lib/perf/evsel.c index cd203998e875..bd8c2f19ef74 100644 --- a/tools/lib/perf/evsel.c +++ b/tools/lib/perf/evsel.c @@ -267,6 +267,10 @@ int perf_evsel__read(struct perf_evsel *evsel, int cpu, int thread, if (FD(evsel, cpu, thread) < 0) return -EINVAL; + if (MMAP(evsel, cpu, thread) && + !perf_mmap__read_self(MMAP(evsel, cpu, thread), count)) + return 0; + if (readn(FD(evsel, cpu, thread), count->values, size) <= 0) return -errno; diff --git a/tools/lib/perf/include/internal/mmap.h b/tools/lib/perf/include/internal/mmap.h index be7556e0a2b2..5e3422f40ed5 100644 --- a/tools/lib/perf/include/internal/mmap.h +++ b/tools/lib/perf/include/internal/mmap.h @@ -11,6 +11,7 @@ #define PERF_SAMPLE_MAX_SIZE (1 << 16) struct perf_mmap; +struct perf_counts_values; typedef void (*libperf_unmap_cb_t)(struct perf_mmap *map); @@ -52,4 +53,6 @@ void perf_mmap__put(struct perf_mmap *map); u64 perf_mmap__read_head(struct perf_mmap *map); +int perf_mmap__read_self(struct perf_mmap *map, struct perf_counts_values *count); + #endif /* __LIBPERF_INTERNAL_MMAP_H */ diff --git a/tools/lib/perf/mmap.c b/tools/lib/perf/mmap.c index 79d5ed6c38cc..c89dfa5f67b3 100644 --- a/tools/lib/perf/mmap.c +++ b/tools/lib/perf/mmap.c @@ -8,9 +8,11 @@ #include #include #include +#include #include #include #include +#include #include "internal.h" void perf_mmap__init(struct perf_mmap *map, struct perf_mmap *prev, @@ -273,3 +275,89 @@ union perf_event *perf_mmap__read_event(struct perf_mmap *map) return event; } + +#if defined(__i386__) || defined(__x86_64__) +static u64 read_perf_counter(unsigned int counter) +{ + unsigned int low, high; + + asm volatile("rdpmc" : "=a" (low), "=d" (high) : "c" (counter)); + + return low | ((u64)high) << 32; +} + +static u64 read_timestamp(void) +{ + unsigned int low, high; + + asm volatile("rdtsc" : "=a" (low), "=d" (high)); + + return low | ((u64)high) << 32; +} +#else +static u64 read_perf_counter(unsigned int counter __maybe_unused) { return 0; } +static u64 read_timestamp(void) { return 0; } +#endif + +int perf_mmap__read_self(struct perf_mmap *map, struct perf_counts_values *count) +{ + struct perf_event_mmap_page *pc = map->base; + u32 seq, idx, time_mult = 0, time_shift = 0; + u64 cnt, cyc = 0, time_offset = 0, time_cycles = 0, time_mask = ~0ULL; + + if (!pc || !pc->cap_user_rdpmc) + return -1; + + do { + seq = READ_ONCE(pc->lock); + barrier(); + + count->ena = READ_ONCE(pc->time_enabled); + count->run = READ_ONCE(pc->time_running); + + if (pc->cap_user_time && count->ena != count->run) { + cyc = read_timestamp(); + time_mult = READ_ONCE(pc->time_mult); + time_shift = READ_ONCE(pc->time_shift); + time_offset = READ_ONCE(pc->time_offset); + + if (pc->cap_user_time_short) { + time_cycles = READ_ONCE(pc->time_cycles); + time_mask = READ_ONCE(pc->time_mask); + } + } + + idx = READ_ONCE(pc->index); + cnt = READ_ONCE(pc->offset); + if (pc->cap_user_rdpmc && idx) { + s64 evcnt = read_perf_counter(idx - 1); + u16 width = READ_ONCE(pc->pmc_width); + + evcnt <<= 64 - width; + evcnt >>= 64 - width; + cnt += evcnt; + } else + return -1; + + barrier(); + } while (READ_ONCE(pc->lock) != seq); + + if (count->ena != count->run) { + u64 delta; + + /* Adjust for cap_usr_time_short, a nop if not */ + cyc = time_cycles + ((cyc - time_cycles) & time_mask); + + delta = time_offset + mul_u64_u32_shr(cyc, time_mult, time_shift); + + count->ena += delta; + if (idx) + count->run += delta; + + cnt = mul_u64_u64_div64(cnt, count->ena, count->run); + } + + count->val = cnt; + + return 0; +} diff --git a/tools/lib/perf/tests/test-evsel.c b/tools/lib/perf/tests/test-evsel.c index 0ad82d7a2a51..288b5feaefe2 100644 --- a/tools/lib/perf/tests/test-evsel.c +++ b/tools/lib/perf/tests/test-evsel.c @@ -120,6 +120,70 @@ static int test_stat_thread_enable(void) return 0; } +static int test_stat_user_read(int event) +{ + struct perf_counts_values counts = { .val = 0 }; + struct perf_thread_map *threads; + struct perf_evsel *evsel; + struct perf_event_mmap_page *pc; + struct perf_event_attr attr = { + .type = PERF_TYPE_HARDWARE, + .config = event, + }; + int err, i; + + threads = perf_thread_map__new_dummy(); + __T("failed to create threads", threads); + + perf_thread_map__set_pid(threads, 0, 0); + + evsel = perf_evsel__new(&attr); + __T("failed to create evsel", evsel); + + err = perf_evsel__open(evsel, NULL, threads); + __T("failed to open evsel", err == 0); + + err = perf_evsel__mmap(evsel, 0); + __T("failed to mmap evsel", err == 0); + + pc = perf_evsel__mmap_base(evsel, 0, 0); + +#if defined(__i386__) || defined(__x86_64__) + __T("userspace counter access not supported", pc->cap_user_rdpmc); + __T("userspace counter access not enabled", pc->index); + __T("userspace counter width not set", pc->pmc_width >= 32); +#endif + + perf_evsel__read(evsel, 0, 0, &counts); + __T("failed to read value for evsel", counts.val != 0); + + for (i = 0; i < 5; i++) { + volatile int count = 0x10000 << i; + __u64 start, end, last = 0; + + __T_VERBOSE("\tloop = %u, ", count); + + perf_evsel__read(evsel, 0, 0, &counts); + start = counts.val; + + while (count--) ; + + perf_evsel__read(evsel, 0, 0, &counts); + end = counts.val; + + __T("invalid counter data", (end - start) > last); + last = end - start; + __T_VERBOSE("count = %llu\n", end - start); + } + + perf_evsel__munmap(evsel); + perf_evsel__close(evsel); + perf_evsel__delete(evsel); + + perf_thread_map__put(threads); + return 0; +} + int main(int argc, char **argv) { __T_START; @@ -129,6 +193,8 @@ int main(int argc, char **argv) test_stat_cpu(); test_stat_thread(); test_stat_thread_enable(); + test_stat_user_read(PERF_COUNT_HW_INSTRUCTIONS); + test_stat_user_read(PERF_COUNT_HW_CPU_CYCLES); __T_END; return tests_failed == 0 ? 0 : -1; -- cgit v1.2.3 From 818869489ba3c4a4ed1360e22b2f66be488ea9f5 Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Wed, 14 Apr 2021 14:57:58 -0500 Subject: libperf xyarray: Add bounds checks to xyarray__entry() xyarray__entry() is missing any bounds checking yet often the x and y parameters come from external callers. Add bounds checks and an unchecked __xyarray__entry(). Committer notes: Make the 'x' and 'y' arguments to the new xyarray__entry() that does bounds check to be of type 'size_t', so that we cover also the case where 'x' and 'y' could be negative, which is needed anyway as having them as 'int' breaks the build with: /home/acme/git/perf/tools/lib/perf/include/internal/xyarray.h: In function ‘xyarray__entry’: /home/acme/git/perf/tools/lib/perf/include/internal/xyarray.h:28:8: error: comparison of integer expressions of different signedness: ‘int’ and ‘size_t’ {aka ‘long unsigned int’} [-Werror=sign-compare] 28 | if (x >= xy->max_x || y >= xy->max_y) | ^~ /home/acme/git/perf/tools/lib/perf/include/internal/xyarray.h:28:26: error: comparison of integer expressions of different signedness: ‘int’ and ‘size_t’ {aka ‘long unsigned int’} [-Werror=sign-compare] 28 | if (x >= xy->max_x || y >= xy->max_y) | ^~ cc1: all warnings being treated as errors Signed-off-by: Rob Herring Suggested-by: Arnaldo Carvalho de Melo Suggested-by: Namhyung Kim Cc: Alexander Shishkin Cc: Jiri Olsa Cc: Mark Rutland Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/20210414195758.4078803-1-robh@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/lib/perf/include/internal/xyarray.h | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/tools/lib/perf/include/internal/xyarray.h b/tools/lib/perf/include/internal/xyarray.h index 51e35d6c8ec4..f10af3da7b21 100644 --- a/tools/lib/perf/include/internal/xyarray.h +++ b/tools/lib/perf/include/internal/xyarray.h @@ -18,11 +18,18 @@ struct xyarray *xyarray__new(int xlen, int ylen, size_t entry_size); void xyarray__delete(struct xyarray *xy); void xyarray__reset(struct xyarray *xy); -static inline void *xyarray__entry(struct xyarray *xy, int x, int y) +static inline void *__xyarray__entry(struct xyarray *xy, int x, int y) { return &xy->contents[x * xy->row_size + y * xy->entry_size]; } +static inline void *xyarray__entry(struct xyarray *xy, size_t x, size_t y) +{ + if (x >= xy->max_x || y >= xy->max_y) + return NULL; + return __xyarray__entry(xy, x, y); +} + static inline int xyarray__max_y(struct xyarray *xy) { return xy->max_y; -- cgit v1.2.3 From 32daa5d7899e03433429bedf9e20d7963179703a Mon Sep 17 00:00:00 2001 From: Kajol Jain Date: Mon, 19 Apr 2021 16:50:01 +0530 Subject: perf vendor events: Initial JSON/events list for power10 platform Patch adds initial JSON/events for POWER10. Signed-off-by: Kajol Jain Reviewed-by: Paul Clarke Tested-by: Paul Clarke Acked-by: Michael Ellerman Cc: Athira Jajeev Cc: Jiri Olsa Cc: Madhavan Srinivasan Cc: Ravi Bangoria Cc: linuxppc-dev@lists.ozlabs.org Link: http://lore.kernel.org/lkml/20210419112001.71466-1-kjain@linux.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/pmu-events/arch/powerpc/mapfile.csv | 1 + .../pmu-events/arch/powerpc/power10/cache.json | 47 ++++ .../arch/powerpc/power10/floating_point.json | 7 + .../pmu-events/arch/powerpc/power10/frontend.json | 217 +++++++++++++++ .../pmu-events/arch/powerpc/power10/locks.json | 12 + .../pmu-events/arch/powerpc/power10/marked.json | 147 ++++++++++ .../pmu-events/arch/powerpc/power10/memory.json | 192 +++++++++++++ .../pmu-events/arch/powerpc/power10/others.json | 297 +++++++++++++++++++++ .../pmu-events/arch/powerpc/power10/pipeline.json | 297 +++++++++++++++++++++ .../perf/pmu-events/arch/powerpc/power10/pmc.json | 22 ++ .../arch/powerpc/power10/translation.json | 57 ++++ 11 files changed, 1296 insertions(+) create mode 100644 tools/perf/pmu-events/arch/powerpc/power10/cache.json create mode 100644 tools/perf/pmu-events/arch/powerpc/power10/floating_point.json create mode 100644 tools/perf/pmu-events/arch/powerpc/power10/frontend.json create mode 100644 tools/perf/pmu-events/arch/powerpc/power10/locks.json create mode 100644 tools/perf/pmu-events/arch/powerpc/power10/marked.json create mode 100644 tools/perf/pmu-events/arch/powerpc/power10/memory.json create mode 100644 tools/perf/pmu-events/arch/powerpc/power10/others.json create mode 100644 tools/perf/pmu-events/arch/powerpc/power10/pipeline.json create mode 100644 tools/perf/pmu-events/arch/powerpc/power10/pmc.json create mode 100644 tools/perf/pmu-events/arch/powerpc/power10/translation.json diff --git a/tools/perf/pmu-events/arch/powerpc/mapfile.csv b/tools/perf/pmu-events/arch/powerpc/mapfile.csv index 229150e7ab7d..4abdfc3f9692 100644 --- a/tools/perf/pmu-events/arch/powerpc/mapfile.csv +++ b/tools/perf/pmu-events/arch/powerpc/mapfile.csv @@ -15,3 +15,4 @@ # Power8 entries 004[bcd][[:xdigit:]]{4},1,power8,core 004e[[:xdigit:]]{4},1,power9,core +0080[[:xdigit:]]{4},1,power10,core diff --git a/tools/perf/pmu-events/arch/powerpc/power10/cache.json b/tools/perf/pmu-events/arch/powerpc/power10/cache.json new file mode 100644 index 000000000000..616f29098c71 --- /dev/null +++ b/tools/perf/pmu-events/arch/powerpc/power10/cache.json @@ -0,0 +1,47 @@ +[ + { + "EventCode": "1003C", + "EventName": "PM_EXEC_STALL_DMISS_L2L3", + "BriefDescription": "Cycles in which the oldest instruction in the pipeline was waiting for a load miss to resolve from either the local L2 or local L3." + }, + { + "EventCode": "34056", + "EventName": "PM_EXEC_STALL_LOAD_FINISH", + "BriefDescription": "Cycles in which the oldest instruction in the pipeline was finishing a load after its data was reloaded from a data source beyond the local L1; cycles in which the LSU was processing an L1-hit; cycles in which the NTF instruction merged with another load in the LMQ." + }, + { + "EventCode": "3006C", + "EventName": "PM_RUN_CYC_SMT2_MODE", + "BriefDescription": "Cycles when this thread's run latch is set and the core is in SMT2 mode." + }, + { + "EventCode": "300F4", + "EventName": "PM_RUN_INST_CMPL_CONC", + "BriefDescription": "PowerPC instructions completed by this thread when all threads in the core had the run-latch set." + }, + { + "EventCode": "4C016", + "EventName": "PM_EXEC_STALL_DMISS_L2L3_CONFLICT", + "BriefDescription": "Cycles in which the oldest instruction in the pipeline was waiting for a load miss to resolve from the local L2 or local L3, with a dispatch conflict." + }, + { + "EventCode": "4D014", + "EventName": "PM_EXEC_STALL_LOAD", + "BriefDescription": "Cycles in which the oldest instruction in the pipeline was a load instruction executing in the Load Store Unit." + }, + { + "EventCode": "4D016", + "EventName": "PM_EXEC_STALL_PTESYNC", + "BriefDescription": "Cycles in which the oldest instruction in the pipeline was a PTESYNC instruction executing in the Load Store Unit." + }, + { + "EventCode": "401EA", + "EventName": "PM_THRESH_EXC_128", + "BriefDescription": "Threshold counter exceeded a value of 128." + }, + { + "EventCode": "400F6", + "EventName": "PM_BR_MPRED_CMPL", + "BriefDescription": "A mispredicted branch completed. Includes direction and target." + } +] diff --git a/tools/perf/pmu-events/arch/powerpc/power10/floating_point.json b/tools/perf/pmu-events/arch/powerpc/power10/floating_point.json new file mode 100644 index 000000000000..703cd431ae5b --- /dev/null +++ b/tools/perf/pmu-events/arch/powerpc/power10/floating_point.json @@ -0,0 +1,7 @@ +[ + { + "EventCode": "4016E", + "EventName": "PM_THRESH_NOT_MET", + "BriefDescription": "Threshold counter did not meet threshold." + } +] diff --git a/tools/perf/pmu-events/arch/powerpc/power10/frontend.json b/tools/perf/pmu-events/arch/powerpc/power10/frontend.json new file mode 100644 index 000000000000..eac8609dcc90 --- /dev/null +++ b/tools/perf/pmu-events/arch/powerpc/power10/frontend.json @@ -0,0 +1,217 @@ +[ + { + "EventCode": "10004", + "EventName": "PM_EXEC_STALL_TRANSLATION", + "BriefDescription": "Cycles in which the oldest instruction in the pipeline suffered a TLB miss or ERAT miss and waited for it to resolve." + }, + { + "EventCode": "10010", + "EventName": "PM_PMC4_OVERFLOW", + "BriefDescription": "The event selected for PMC4 caused the event counter to overflow." + }, + { + "EventCode": "10020", + "EventName": "PM_PMC4_REWIND", + "BriefDescription": "The speculative event selected for PMC4 rewinds and the counter for PMC4 is not charged." + }, + { + "EventCode": "10038", + "EventName": "PM_DISP_STALL_TRANSLATION", + "BriefDescription": "Cycles when dispatch was stalled for this thread because the MMU was handling a translation miss." + }, + { + "EventCode": "1003A", + "EventName": "PM_DISP_STALL_BR_MPRED_IC_L2", + "BriefDescription": "Cycles when dispatch was stalled while the instruction was fetched from the local L2 after suffering a branch mispredict." + }, + { + "EventCode": "1E050", + "EventName": "PM_DISP_STALL_HELD_STF_MAPPER_CYC", + "BriefDescription": "Cycles in which the NTC instruction is held at dispatch because the STF mapper/SRB was full. Includes GPR (count, link, tar), VSR, VMR, FPR." + }, + { + "EventCode": "1F054", + "EventName": "PM_DTLB_HIT", + "BriefDescription": "The PTE required by the instruction was resident in the TLB (data TLB access). When MMCR1[16]=0 this event counts only demand hits. When MMCR1[16]=1 this event includes demand and prefetch. Applies to both HPT and RPT." + }, + { + "EventCode": "101E8", + "EventName": "PM_THRESH_EXC_256", + "BriefDescription": "Threshold counter exceeded a count of 256." + }, + { + "EventCode": "101EC", + "EventName": "PM_THRESH_MET", + "BriefDescription": "Threshold exceeded." + }, + { + "EventCode": "100F2", + "EventName": "PM_1PLUS_PPC_CMPL", + "BriefDescription": "Cycles in which at least one instruction is completed by this thread." + }, + { + "EventCode": "100F6", + "EventName": "PM_IERAT_MISS", + "BriefDescription": "IERAT Reloaded to satisfy an IERAT miss. All page sizes are counted by this event." + }, + { + "EventCode": "100F8", + "EventName": "PM_DISP_STALL_CYC", + "BriefDescription": "Cycles the ICT has no itags assigned to this thread (no instructions were dispatched during these cycles)." + }, + { + "EventCode": "20114", + "EventName": "PM_MRK_L2_RC_DISP", + "BriefDescription": "Marked instruction RC dispatched in L2." + }, + { + "EventCode": "2C010", + "EventName": "PM_EXEC_STALL_LSU", + "BriefDescription": "Cycles in which the oldest instruction in the pipeline was executing in the Load Store Unit. This does not include simple fixed point instructions." + }, + { + "EventCode": "2C016", + "EventName": "PM_DISP_STALL_IERAT_ONLY_MISS", + "BriefDescription": "Cycles when dispatch was stalled while waiting to resolve an instruction ERAT miss." + }, + { + "EventCode": "2C01E", + "EventName": "PM_DISP_STALL_BR_MPRED_IC_L3", + "BriefDescription": "Cycles when dispatch was stalled while the instruction was fetched from the local L3 after suffering a branch mispredict." + }, + { + "EventCode": "2D01A", + "EventName": "PM_DISP_STALL_IC_MISS", + "BriefDescription": "Cycles when dispatch was stalled for this thread due to an Icache Miss." + }, + { + "EventCode": "2D01C", + "EventName": "PM_CMPL_STALL_STCX", + "BriefDescription": "Cycles in which the oldest instruction in the pipeline was a stcx waiting for resolution from the nest before completing." + }, + { + "EventCode": "2E018", + "EventName": "PM_DISP_STALL_FETCH", + "BriefDescription": "Cycles when dispatch was stalled for this thread because Fetch was being held." + }, + { + "EventCode": "2E01A", + "EventName": "PM_DISP_STALL_HELD_XVFC_MAPPER_CYC", + "BriefDescription": "Cycles in which the NTC instruction is held at dispatch because the XVFC mapper/SRB was full." + }, + { + "EventCode": "2C142", + "EventName": "PM_MRK_XFER_FROM_SRC_PMC2", + "BriefDescription": "For a marked data transfer instruction, the processor's L1 data cache was reloaded from the source specified in MMCR3[15:27]. If MMCR1[16|17] is 0 (default), this count includes only lines that were reloaded to satisfy a demand miss. If MMCR1[16|17] is 1, this count includes both demand misses and prefetch reloads." + }, + { + "EventCode": "24050", + "EventName": "PM_IOPS_DISP", + "BriefDescription": "Internal Operations dispatched. PM_IOPS_DISP / PM_INST_DISP will show the average number of internal operations per PowerPC instruction." + }, + { + "EventCode": "2405E", + "EventName": "PM_ISSUE_CANCEL", + "BriefDescription": "An instruction issued and the issue was later cancelled. Only one cancel per PowerPC instruction." + }, + { + "EventCode": "200FA", + "EventName": "PM_BR_TAKEN_CMPL", + "BriefDescription": "Branch Taken instruction completed." + }, + { + "EventCode": "30012", + "EventName": "PM_FLUSH_COMPLETION", + "BriefDescription": "The instruction that was next to complete (oldest in the pipeline) did not complete because it suffered a flush." + }, + { + "EventCode": "30014", + "EventName": "PM_EXEC_STALL_STORE", + "BriefDescription": "Cycles in which the oldest instruction in the pipeline was a store instruction executing in the Load Store Unit." + }, + { + "EventCode": "30018", + "EventName": "PM_DISP_STALL_HELD_SCOREBOARD_CYC", + "BriefDescription": "Cycles in which the NTC instruction is held at dispatch while waiting on the Scoreboard. This event combines VSCR and FPSCR together." + }, + { + "EventCode": "30026", + "EventName": "PM_EXEC_STALL_STORE_MISS", + "BriefDescription": "Cycles in which the oldest instruction in the pipeline was a store whose cache line was not resident in the L1 and was waiting for allocation of the missing line into the L1." + }, + { + "EventCode": "3012A", + "EventName": "PM_MRK_L2_RC_DONE", + "BriefDescription": "L2 RC machine completed the transaction for the marked instruction." + }, + { + "EventCode": "3F046", + "EventName": "PM_ITLB_HIT_1G", + "BriefDescription": "Instruction TLB hit (IERAT reload) page size 1G, which implies Radix Page Table translation is in use. When MMCR1[17]=0 this event counts only for demand misses. When MMCR1[17]=1 this event includes demand misses and prefetches." + }, + { + "EventCode": "34058", + "EventName": "PM_DISP_STALL_BR_MPRED_ICMISS", + "BriefDescription": "Cycles when dispatch was stalled after a mispredicted branch resulted in an instruction cache miss." + }, + { + "EventCode": "3D05C", + "EventName": "PM_DISP_STALL_HELD_RENAME_CYC", + "BriefDescription": "Cycles in which the NTC instruction is held at dispatch because the mapper/SRB was full. Includes GPR (count, link, tar), VSR, VMR, FPR and XVFC." + }, + { + "EventCode": "3E052", + "EventName": "PM_DISP_STALL_IC_L3", + "BriefDescription": "Cycles when dispatch was stalled while the instruction was fetched from the local L3." + }, + { + "EventCode": "3E054", + "EventName": "PM_LD_MISS_L1", + "BriefDescription": "Load Missed L1, counted at execution time (can be greater than loads finished). LMQ merges are not included in this count. i.e. if a load instruction misses on an address that is already allocated on the LMQ, this event will not increment for that load). Note that this count is per slice, so if a load spans multiple slices this event will increment multiple times for a single load." + }, + { + "EventCode": "301EA", + "EventName": "PM_THRESH_EXC_1024", + "BriefDescription": "Threshold counter exceeded a value of 1024." + }, + { + "EventCode": "300FA", + "EventName": "PM_INST_FROM_L3MISS", + "BriefDescription": "The processor's instruction cache was reloaded from a source other than the local core's L1, L2, or L3 due to a demand miss." + }, + { + "EventCode": "40006", + "EventName": "PM_ISSUE_KILL", + "BriefDescription": "Cycles in which an instruction or group of instructions were cancelled after being issued. This event increments once per occurrence, regardless of how many instructions are included in the issue group." + }, + { + "EventCode": "40116", + "EventName": "PM_MRK_LARX_FIN", + "BriefDescription": "Marked load and reserve instruction (LARX) finished. LARX and STCX are instructions used to acquire a lock." + }, + { + "EventCode": "4C010", + "EventName": "PM_DISP_STALL_BR_MPRED_IC_L3MISS", + "BriefDescription": "Cycles when dispatch was stalled while the instruction was fetched from sources beyond the local L3 after suffering a mispredicted branch." + }, + { + "EventCode": "4D01E", + "EventName": "PM_DISP_STALL_BR_MPRED", + "BriefDescription": "Cycles when dispatch was stalled for this thread due to a mispredicted branch." + }, + { + "EventCode": "4E010", + "EventName": "PM_DISP_STALL_IC_L3MISS", + "BriefDescription": "Cycles when dispatch was stalled while the instruction was fetched from any source beyond the local L3." + }, + { + "EventCode": "4E01A", + "EventName": "PM_DISP_STALL_HELD_CYC", + "BriefDescription": "Cycles in which the NTC instruction is held at dispatch for any reason." + }, + { + "EventCode": "44056", + "EventName": "PM_VECTOR_ST_CMPL", + "BriefDescription": "Vector store instructions completed." + } +] diff --git a/tools/perf/pmu-events/arch/powerpc/power10/locks.json b/tools/perf/pmu-events/arch/powerpc/power10/locks.json new file mode 100644 index 000000000000..016d8de0e14a --- /dev/null +++ b/tools/perf/pmu-events/arch/powerpc/power10/locks.json @@ -0,0 +1,12 @@ +[ + { + "EventCode": "1E058", + "EventName": "PM_STCX_FAIL_FIN", + "BriefDescription": "Conditional store instruction (STCX) failed. LARX and STCX are instructions used to acquire a lock." + }, + { + "EventCode": "4E050", + "EventName": "PM_STCX_PASS_FIN", + "BriefDescription": "Conditional store instruction (STCX) passed. LARX and STCX are instructions used to acquire a lock." + } +] diff --git a/tools/perf/pmu-events/arch/powerpc/power10/marked.json b/tools/perf/pmu-events/arch/powerpc/power10/marked.json new file mode 100644 index 000000000000..93a5a5910648 --- /dev/null +++ b/tools/perf/pmu-events/arch/powerpc/power10/marked.json @@ -0,0 +1,147 @@ +[ + { + "EventCode": "1002C", + "EventName": "PM_LD_PREFETCH_CACHE_LINE_MISS", + "BriefDescription": "The L1 cache was reloaded with a line that fulfills a prefetch request." + }, + { + "EventCode": "10132", + "EventName": "PM_MRK_INST_ISSUED", + "BriefDescription": "Marked instruction issued. Note that stores always get issued twice, the address gets issued to the LSU and the data gets issued to the VSU. Also, issues can sometimes get killed/cancelled and cause multiple sequential issues for the same instruction." + }, + { + "EventCode": "101E0", + "EventName": "PM_MRK_INST_DISP", + "BriefDescription": "The thread has dispatched a randomly sampled marked instruction." + }, + { + "EventCode": "101E2", + "EventName": "PM_MRK_BR_TAKEN_CMPL", + "BriefDescription": "Marked Branch Taken instruction completed." + }, + { + "EventCode": "20112", + "EventName": "PM_MRK_NTF_FIN", + "BriefDescription": "The marked instruction became the oldest in the pipeline before it finished. It excludes instructions that finish at dispatch." + }, + { + "EventCode": "2C01C", + "EventName": "PM_EXEC_STALL_DMISS_OFF_CHIP", + "BriefDescription": "Cycles in which the oldest instruction in the pipeline was waiting for a load miss to resolve from a remote chip." + }, + { + "EventCode": "20138", + "EventName": "PM_MRK_ST_NEST", + "BriefDescription": "A store has been sampled/marked and is at the point of execution where it has completed in the core and can no longer be flushed. At this point the store is sent to the L2." + }, + { + "EventCode": "2013A", + "EventName": "PM_MRK_BRU_FIN", + "BriefDescription": "Marked Branch instruction finished." + }, + { + "EventCode": "2C144", + "EventName": "PM_MRK_XFER_FROM_SRC_CYC_PMC2", + "BriefDescription": "Cycles taken for a marked demand miss to reload a line from the source specified in MMCR3[15:27]." + }, + { + "EventCode": "24156", + "EventName": "PM_MRK_STCX_FIN", + "BriefDescription": "Marked conditional store instruction (STCX) finished. LARX and STCX are instructions used to acquire a lock." + }, + { + "EventCode": "24158", + "EventName": "PM_MRK_INST", + "BriefDescription": "An instruction was marked. Includes both Random Instruction Sampling (RIS) at decode time and Random Event Sampling (RES) at the time the configured event happens." + }, + { + "EventCode": "2415C", + "EventName": "PM_MRK_BR_CMPL", + "BriefDescription": "A marked branch completed. All branches are included." + }, + { + "EventCode": "200FD", + "EventName": "PM_L1_ICACHE_MISS", + "BriefDescription": "Demand iCache Miss." + }, + { + "EventCode": "30130", + "EventName": "PM_MRK_INST_FIN", + "BriefDescription": "marked instruction finished. Excludes instructions that finish at dispatch. Note that stores always finish twice since the address gets issued to the LSU and the data gets issued to the VSU." + }, + { + "EventCode": "34146", + "EventName": "PM_MRK_LD_CMPL", + "BriefDescription": "Marked loads completed." + }, + { + "EventCode": "3E158", + "EventName": "PM_MRK_STCX_FAIL", + "BriefDescription": "Marked conditional store instruction (STCX) failed. LARX and STCX are instructions used to acquire a lock." + }, + { + "EventCode": "3E15A", + "EventName": "PM_MRK_ST_FIN", + "BriefDescription": "The marked instruction was a store of any kind." + }, + { + "EventCode": "30068", + "EventName": "PM_L1_ICACHE_RELOADED_PREF", + "BriefDescription": "Counts all Icache prefetch reloads ( includes demand turned into prefetch)." + }, + { + "EventCode": "301E4", + "EventName": "PM_MRK_BR_MPRED_CMPL", + "BriefDescription": "Marked Branch Mispredicted. Includes direction and target." + }, + { + "EventCode": "300F6", + "EventName": "PM_LD_DEMAND_MISS_L1", + "BriefDescription": "The L1 cache was reloaded with a line that fulfills a demand miss request. Counted at reload time, before finish." + }, + { + "EventCode": "300FE", + "EventName": "PM_DATA_FROM_L3MISS", + "BriefDescription": "The processor's data cache was reloaded from a source other than the local core's L1, L2, or L3 due to a demand miss." + }, + { + "EventCode": "40012", + "EventName": "PM_L1_ICACHE_RELOADED_ALL", + "BriefDescription": "Counts all Icache reloads includes demand, prefetch, prefetch turned into demand and demand turned into prefetch." + }, + { + "EventCode": "40134", + "EventName": "PM_MRK_INST_TIMEO", + "BriefDescription": "Marked instruction finish timeout (instruction was lost)." + }, + { + "EventCode": "4003C", + "EventName": "PM_DISP_STALL_HELD_SYNC_CYC", + "BriefDescription": "Cycles in which the NTC instruction is held at dispatch because of a synchronizing instruction that requires the ICT to be empty before dispatch." + }, + { + "EventCode": "4505A", + "EventName": "PM_SP_FLOP_CMPL", + "BriefDescription": "Single Precision floating point instructions completed." + }, + { + "EventCode": "4D058", + "EventName": "PM_VECTOR_FLOP_CMPL", + "BriefDescription": "Vector floating point instructions completed." + }, + { + "EventCode": "4D05A", + "EventName": "PM_NON_MATH_FLOP_CMPL", + "BriefDescription": "Non Math instructions completed." + }, + { + "EventCode": "401E0", + "EventName": "PM_MRK_INST_CMPL", + "BriefDescription": "marked instruction completed." + }, + { + "EventCode": "400FE", + "EventName": "PM_DATA_FROM_MEMORY", + "BriefDescription": "The processor's data cache was reloaded from local, remote, or distant memory due to a demand miss." + } +] diff --git a/tools/perf/pmu-events/arch/powerpc/power10/memory.json b/tools/perf/pmu-events/arch/powerpc/power10/memory.json new file mode 100644 index 000000000000..b01141eeebee --- /dev/null +++ b/tools/perf/pmu-events/arch/powerpc/power10/memory.json @@ -0,0 +1,192 @@ +[ + { + "EventCode": "1000A", + "EventName": "PM_PMC3_REWIND", + "BriefDescription": "The speculative event selected for PMC3 rewinds and the counter for PMC3 is not charged." + }, + { + "EventCode": "1C040", + "EventName": "PM_XFER_FROM_SRC_PMC1", + "BriefDescription": "The processor's L1 data cache was reloaded from the source specified in MMCR3[0:12]. If MMCR1[16|17] is 0 (default), this count includes only lines that were reloaded to satisfy a demand miss. If MMCR1[16|17] is 1, this count includes both demand misses and prefetch reloads." + }, + { + "EventCode": "1C142", + "EventName": "PM_MRK_XFER_FROM_SRC_PMC1", + "BriefDescription": "For a marked data transfer instruction, the processor's L1 data cache was reloaded from the source specified in MMCR3[0:12]. If MMCR1[16|17] is 0 (default), this count includes only lines that were reloaded to satisfy a demand miss. If MMCR1[16|17] is 1, this count includes both demand misses and prefetch reloads." + }, + { + "EventCode": "1C144", + "EventName": "PM_MRK_XFER_FROM_SRC_CYC_PMC1", + "BriefDescription": "Cycles taken for a marked demand miss to reload a line from the source specified in MMCR3[0:12]." + }, + { + "EventCode": "1C056", + "EventName": "PM_DERAT_MISS_4K", + "BriefDescription": "Data ERAT Miss (Data TLB Access) page size 4K. When MMCR1[16]=0 this event counts only DERAT reloads for demand misses. When MMCR1[16]=1 this event includes demand misses and prefetches." + }, + { + "EventCode": "1C058", + "EventName": "PM_DTLB_MISS_16G", + "BriefDescription": "Data TLB reload (after a miss) page size 16G. When MMCR1[16]=0 this event counts only for demand misses. When MMCR1[16]=1 this event includes demand misses and prefetches." + }, + { + "EventCode": "1C05C", + "EventName": "PM_DTLB_MISS_2M", + "BriefDescription": "Data TLB reload (after a miss) page size 2M. Implies radix translation was used. When MMCR1[16]=0 this event counts only for demand misses. When MMCR1[16]=1 this event includes demand misses and prefetches." + }, + { + "EventCode": "1E056", + "EventName": "PM_EXEC_STALL_STORE_PIPE", + "BriefDescription": "Cycles in which the oldest instruction in the pipeline was executing in the store unit. This does not include cycles spent handling store misses, PTESYNC instructions or TLBIE instructions." + }, + { + "EventCode": "1F150", + "EventName": "PM_MRK_ST_L2_CYC", + "BriefDescription": "Cycles from L2 RC dispatch to L2 RC completion." + }, + { + "EventCode": "10062", + "EventName": "PM_LD_L3MISS_PEND_CYC", + "BriefDescription": "Cycles L3 miss was pending for this thread." + }, + { + "EventCode": "20010", + "EventName": "PM_PMC1_OVERFLOW", + "BriefDescription": "The event selected for PMC1 caused the event counter to overflow." + }, + { + "EventCode": "2001A", + "EventName": "PM_ITLB_HIT", + "BriefDescription": "The PTE required to translate the instruction address was resident in the TLB (instruction TLB access/IERAT reload). Applies to both HPT and RPT. When MMCR1[17]=0 this event counts only for demand misses. When MMCR1[17]=1 this event includes demand misses and prefetches." + }, + { + "EventCode": "2003E", + "EventName": "PM_PTESYNC_FIN", + "BriefDescription": "Ptesync instruction finished in the store unit. Only one ptesync can finish at a time." + }, + { + "EventCode": "2C040", + "EventName": "PM_XFER_FROM_SRC_PMC2", + "BriefDescription": "The processor's L1 data cache was reloaded from the source specified in MMCR3[15:27]. If MMCR1[16|17] is 0 (default), this count includes only lines that were reloaded to satisfy a demand miss. If MMCR1[16|17] is 1, this count includes both demand misses and prefetch reloads." + }, + { + "EventCode": "2C054", + "EventName": "PM_DERAT_MISS_64K", + "BriefDescription": "Data ERAT Miss (Data TLB Access) page size 64K. When MMCR1[16]=0 this event counts only DERAT reloads for demand misses. When MMCR1[16]=1 this event includes demand misses and prefetches." + }, + { + "EventCode": "2C056", + "EventName": "PM_DTLB_MISS_4K", + "BriefDescription": "Data TLB reload (after a miss) page size 4K. When MMCR1[16]=0 this event counts only for demand misses. When MMCR1[16]=1 this event includes demand misses and prefetches." + }, + { + "EventCode": "2D154", + "EventName": "PM_MRK_DERAT_MISS_64K", + "BriefDescription": "Data ERAT Miss (Data TLB Access) page size 64K for a marked instruction. When MMCR1[16]=0 this event counts only DERAT reloads for demand misses. When MMCR1[16]=1 this event includes demand misses and prefetches." + }, + { + "EventCode": "200F6", + "EventName": "PM_DERAT_MISS", + "BriefDescription": "DERAT Reloaded to satisfy a DERAT miss. All page sizes are counted by this event. When MMCR1[16]=0 this event counts only DERAT reloads for demand misses. When MMCR1[16]=1 this event includes demand misses and prefetches." + }, + { + "EventCode": "3000A", + "EventName": "PM_DISP_STALL_ITLB_MISS", + "BriefDescription": "Cycles when dispatch was stalled while waiting to resolve an instruction TLB miss." + }, + { + "EventCode": "30016", + "EventName": "PM_EXEC_STALL_DERAT_DTLB_MISS", + "BriefDescription": "Cycles in which the oldest instruction in the pipeline suffered a TLB miss and waited for it resolve." + }, + { + "EventCode": "3C040", + "EventName": "PM_XFER_FROM_SRC_PMC3", + "BriefDescription": "The processor's L1 data cache was reloaded from the source specified in MMCR3[30:42]. If MMCR1[16|17] is 0 (default), this count includes only lines that were reloaded to satisfy a demand miss. If MMCR1[16|17] is 1, this count includes both demand misses and prefetch reloads." + }, + { + "EventCode": "3C142", + "EventName": "PM_MRK_XFER_FROM_SRC_PMC3", + "BriefDescription": "For a marked data transfer instruction, the processor's L1 data cache was reloaded from the source specified in MMCR3[30:42]. If MMCR1[16|17] is 0 (default), this count includes only lines that were reloaded to satisfy a demand miss. If MMCR1[16|17] is 1, this count includes both demand misses and prefetch reloads." + }, + { + "EventCode": "3C144", + "EventName": "PM_MRK_XFER_FROM_SRC_CYC_PMC3", + "BriefDescription": "Cycles taken for a marked demand miss to reload a line from the source specified in MMCR3[30:42]." + }, + { + "EventCode": "3C054", + "EventName": "PM_DERAT_MISS_16M", + "BriefDescription": "Data ERAT Miss (Data TLB Access) page size 16M. When MMCR1[16]=0 this event counts only DERAT reloads for demand misses. When MMCR1[16]=1 this event includes demand misses and prefetches." + }, + { + "EventCode": "3C056", + "EventName": "PM_DTLB_MISS_64K", + "BriefDescription": "Data TLB reload (after a miss) page size 64K. When MMCR1[16]=0 this event counts only for demand misses. When MMCR1[16]=1 this event includes demand misses and prefetches." + }, + { + "EventCode": "3C058", + "EventName": "PM_LARX_FIN", + "BriefDescription": "Load and reserve instruction (LARX) finished. LARX and STCX are instructions used to acquire a lock." + }, + { + "EventCode": "301E2", + "EventName": "PM_MRK_ST_CMPL", + "BriefDescription": "Marked store completed and sent to nest. Note that this count excludes cache-inhibited stores." + }, + { + "EventCode": "300FC", + "EventName": "PM_DTLB_MISS", + "BriefDescription": "The DPTEG required for the load/store instruction in execution was missing from the TLB. It includes pages of all sizes for demand and prefetch activity." + }, + { + "EventCode": "4D02C", + "EventName": "PM_PMC1_REWIND", + "BriefDescription": "The speculative event selected for PMC1 rewinds and the counter for PMC1 is not charged." + }, + { + "EventCode": "4003E", + "EventName": "PM_LD_CMPL", + "BriefDescription": "Loads completed." + }, + { + "EventCode": "4C040", + "EventName": "PM_XFER_FROM_SRC_PMC4", + "BriefDescription": "The processor's L1 data cache was reloaded from the source specified in MMCR3[45:57]. If MMCR1[16|17] is 0 (default), this count includes only lines that were reloaded to satisfy a demand miss. If MMCR1[16|17] is 1, this count includes both demand misses and prefetch reloads." + }, + { + "EventCode": "4C142", + "EventName": "PM_MRK_XFER_FROM_SRC_PMC4", + "BriefDescription": "For a marked data transfer instruction, the processor's L1 data cache was reloaded from the source specified in MMCR3[45:57]. If MMCR1[16|17] is 0 (default), this count includes only lines that were reloaded to satisfy a demand miss. If MMCR1[16|17] is 1, this count includes both demand misses and prefetch reloads." + }, + { + "EventCode": "4C144", + "EventName": "PM_MRK_XFER_FROM_SRC_CYC_PMC4", + "BriefDescription": "Cycles taken for a marked demand miss to reload a line from the source specified in MMCR3[45:57]." + }, + { + "EventCode": "4C056", + "EventName": "PM_DTLB_MISS_16M", + "BriefDescription": "Data TLB reload (after a miss) page size 16M. When MMCR1[16]=0 this event counts only for demand misses. When MMCR1[16]=1 this event includes demand misses and prefetches." + }, + { + "EventCode": "4C05A", + "EventName": "PM_DTLB_MISS_1G", + "BriefDescription": "Data TLB reload (after a miss) page size 1G. Implies radix translation was used. When MMCR1[16]=0 this event counts only for demand misses. When MMCR1[16]=1 this event includes demand misses and prefetches." + }, + { + "EventCode": "4C15E", + "EventName": "PM_MRK_DTLB_MISS_64K", + "BriefDescription": "Marked Data TLB reload (after a miss) page size 64K. When MMCR1[16]=0 this event counts only for demand misses. When MMCR1[16]=1 this event includes demand misses and prefetches." + }, + { + "EventCode": "4D056", + "EventName": "PM_NON_FMA_FLOP_CMPL", + "BriefDescription": "Non FMA instruction completed." + }, + { + "EventCode": "40164", + "EventName": "PM_MRK_DERAT_MISS_2M", + "BriefDescription": "Data ERAT Miss (Data TLB Access) page size 2M for a marked instruction. When MMCR1[16]=0 this event counts only DERAT reloads for demand misses. When MMCR1[16]=1 this event includes demand misses and prefetches." + } +] diff --git a/tools/perf/pmu-events/arch/powerpc/power10/others.json b/tools/perf/pmu-events/arch/powerpc/power10/others.json new file mode 100644 index 000000000000..a119e56cbf1c --- /dev/null +++ b/tools/perf/pmu-events/arch/powerpc/power10/others.json @@ -0,0 +1,297 @@ +[ + { + "EventCode": "10016", + "EventName": "PM_VSU0_ISSUE", + "BriefDescription": "VSU instructions issued to VSU pipe 0." + }, + { + "EventCode": "1001C", + "EventName": "PM_ULTRAVISOR_INST_CMPL", + "BriefDescription": "PowerPC instructions that completed while the thread was in ultravisor state." + }, + { + "EventCode": "100F0", + "EventName": "PM_CYC", + "BriefDescription": "Processor cycles." + }, + { + "EventCode": "10134", + "EventName": "PM_MRK_ST_DONE_L2", + "BriefDescription": "Marked stores completed in L2 (RC machine done)." + }, + { + "EventCode": "1505E", + "EventName": "PM_LD_HIT_L1", + "BriefDescription": "Loads that finished without experiencing an L1 miss." + }, + { + "EventCode": "1D05E", + "EventName": "PM_DISP_STALL_HELD_HALT_CYC", + "BriefDescription": "Cycles in which the NTC instruction is held at dispatch because of power management." + }, + { + "EventCode": "1E054", + "EventName": "PM_EXEC_STALL_DMISS_L21_L31", + "BriefDescription": "Cycles in which the oldest instruction in the pipeline was waiting for a load miss to resolve from another core's L2 or L3 on the same chip." + }, + { + "EventCode": "1E05A", + "EventName": "PM_CMPL_STALL_LWSYNC", + "BriefDescription": "Cycles in which the oldest instruction in the pipeline was a lwsync waiting to complete." + }, + { + "EventCode": "1F056", + "EventName": "PM_DISP_SS0_2_INSTR_CYC", + "BriefDescription": "Cycles in which Superslice 0 dispatches either 1 or 2 instructions." + }, + { + "EventCode": "1F15C", + "EventName": "PM_MRK_STCX_L2_CYC", + "BriefDescription": "Cycles spent in the nest portion of a marked Stcx instruction. It starts counting when the operation starts to drain to the L2 and it stops counting when the instruction retires from the Instruction Completion Table (ICT) in the Instruction Sequencing Unit (ISU)." + }, + { + "EventCode": "10066", + "EventName": "PM_ADJUNCT_CYC", + "BriefDescription": "Cycles in which the thread is in Adjunct state. MSR[S HV PR] bits = 011." + }, + { + "EventCode": "101E4", + "EventName": "PM_MRK_L1_ICACHE_MISS", + "BriefDescription": "Marked Instruction suffered an icache Miss." + }, + { + "EventCode": "101EA", + "EventName": "PM_MRK_L1_RELOAD_VALID", + "BriefDescription": "Marked demand reload." + }, + { + "EventCode": "100F4", + "EventName": "PM_FLOP_CMPL", + "BriefDescription": "Floating Point Operations Completed. Includes any type. It counts once for each 1, 2, 4 or 8 flop instruction. Use PM_1|2|4|8_FLOP_CMPL events to count flops." + }, + { + "EventCode": "100FA", + "EventName": "PM_RUN_LATCH_ANY_THREAD_CYC", + "BriefDescription": "Cycles when at least one thread has the run latch set." + }, + { + "EventCode": "100FC", + "EventName": "PM_LD_REF_L1", + "BriefDescription": "All L1 D cache load references counted at finish, gated by reject. In P9 and earlier this event counted only cacheable loads but in P10 both cacheable and non-cacheable loads are included." + }, + { + "EventCode": "20006", + "EventName": "PM_DISP_STALL_HELD_ISSQ_FULL_CYC", + "BriefDescription": "Cycles in which the NTC instruction is held at dispatch due to Issue queue full. Includes issue queue and branch queue." + }, + { + "EventCode": "2000C", + "EventName": "PM_RUN_LATCH_ALL_THREADS_CYC", + "BriefDescription": "Cycles when the run latch is set for all threads." + }, + { + "EventCode": "2E010", + "EventName": "PM_ADJUNCT_INST_CMPL", + "BriefDescription": "PowerPC instructions that completed while the thread is in Adjunct state." + }, + { + "EventCode": "2E014", + "EventName": "PM_STCX_FIN", + "BriefDescription": "Conditional store instruction (STCX) finished. LARX and STCX are instructions used to acquire a lock." + }, + { + "EventCode": "20130", + "EventName": "PM_MRK_INST_DECODED", + "BriefDescription": "An instruction was marked at decode time. Random Instruction Sampling (RIS) only." + }, + { + "EventCode": "20132", + "EventName": "PM_MRK_DFU_ISSUE", + "BriefDescription": "The marked instruction was a decimal floating point operation issued to the VSU. Measured at issue time." + }, + { + "EventCode": "20134", + "EventName": "PM_MRK_FXU_ISSUE", + "BriefDescription": "The marked instruction was a fixed point operation issued to the VSU. Measured at issue time." + }, + { + "EventCode": "2505C", + "EventName": "PM_VSU_ISSUE", + "BriefDescription": "At least one VSU instruction was issued to one of the VSU pipes. Up to 4 per cycle. Includes fixed point operations." + }, + { + "EventCode": "2F054", + "EventName": "PM_DISP_SS1_2_INSTR_CYC", + "BriefDescription": "Cycles in which Superslice 1 dispatches either 1 or 2 instructions." + }, + { + "EventCode": "2F056", + "EventName": "PM_DISP_SS1_4_INSTR_CYC", + "BriefDescription": "Cycles in which Superslice 1 dispatches either 3 or 4 instructions." + }, + { + "EventCode": "2006C", + "EventName": "PM_RUN_CYC_SMT4_MODE", + "BriefDescription": "Cycles when this thread's run latch is set and the core is in SMT4 mode." + }, + { + "EventCode": "201E0", + "EventName": "PM_MRK_DATA_FROM_MEMORY", + "BriefDescription": "The processor's data cache was reloaded from local, remote, or distant memory due to a demand miss for a marked load." + }, + { + "EventCode": "201E4", + "EventName": "PM_MRK_DATA_FROM_L3MISS", + "BriefDescription": "The processor's data cache was reloaded from a source other than the local core's L1, L2, or L3 due to a demand miss for a marked load." + }, + { + "EventCode": "201E8", + "EventName": "PM_THRESH_EXC_512", + "BriefDescription": "Threshold counter exceeded a value of 512." + }, + { + "EventCode": "200F2", + "EventName": "PM_INST_DISP", + "BriefDescription": "PowerPC instructions dispatched." + }, + { + "EventCode": "30132", + "EventName": "PM_MRK_VSU_FIN", + "BriefDescription": "VSU marked instructions finished. Excludes simple FX instructions issued to the Store Unit." + }, + { + "EventCode": "30038", + "EventName": "PM_EXEC_STALL_DMISS_LMEM", + "BriefDescription": "Cycles in which the oldest instruction in the pipeline was waiting for a load miss to resolve from the local memory, local OpenCapp cache, or local OpenCapp memory." + }, + { + "EventCode": "3F04A", + "EventName": "PM_LSU_ST5_FIN", + "BriefDescription": "LSU Finished an internal operation in ST2 port." + }, + { + "EventCode": "34054", + "EventName": "PM_EXEC_STALL_DMISS_L2L3_NOCONFLICT", + "BriefDescription": "Cycles in which the oldest instruction in the pipeline was waiting for a load miss to resolve from the local L2 or local L3, without a dispatch conflict." + }, + { + "EventCode": "3405A", + "EventName": "PM_PRIVILEGED_INST_CMPL", + "BriefDescription": "PowerPC Instructions that completed while the thread is in Privileged state." + }, + { + "EventCode": "3F150", + "EventName": "PM_MRK_ST_DRAIN_CYC", + "BriefDescription": "cycles to drain st from core to L2." + }, + { + "EventCode": "3F054", + "EventName": "PM_DISP_SS0_4_INSTR_CYC", + "BriefDescription": "Cycles in which Superslice 0 dispatches either 3 or 4 instructions." + }, + { + "EventCode": "3F056", + "EventName": "PM_DISP_SS0_8_INSTR_CYC", + "BriefDescription": "Cycles in which Superslice 0 dispatches either 5, 6, 7 or 8 instructions." + }, + { + "EventCode": "30162", + "EventName": "PM_MRK_ISSUE_DEPENDENT_LOAD", + "BriefDescription": "The marked instruction was dependent on a load. It is eligible for issue kill." + }, + { + "EventCode": "40114", + "EventName": "PM_MRK_START_PROBE_NOP_DISP", + "BriefDescription": "Marked Start probe nop dispatched. Instruction AND R0,R0,R0." + }, + { + "EventCode": "4001C", + "EventName": "PM_VSU_FIN", + "BriefDescription": "VSU instructions finished." + }, + { + "EventCode": "4C01A", + "EventName": "PM_EXEC_STALL_DMISS_OFF_NODE", + "BriefDescription": "Cycles in which the oldest instruction in the pipeline was waiting for a load miss to resolve from a distant chip." + }, + { + "EventCode": "4D012", + "EventName": "PM_PMC3_SAVED", + "BriefDescription": "The conditions for the speculative event selected for PMC3 are met and PMC3 is charged." + }, + { + "EventCode": "4D022", + "EventName": "PM_HYPERVISOR_INST_CMPL", + "BriefDescription": "PowerPC instructions that completed while the thread is in hypervisor state." + }, + { + "EventCode": "4D026", + "EventName": "PM_ULTRAVISOR_CYC", + "BriefDescription": "Cycles when the thread is in Ultravisor state. MSR[S HV PR]=110." + }, + { + "EventCode": "4D028", + "EventName": "PM_PRIVILEGED_CYC", + "BriefDescription": "Cycles when the thread is in Privileged state. MSR[S HV PR]=x00." + }, + { + "EventCode": "40030", + "EventName": "PM_INST_FIN", + "BriefDescription": "Instructions finished." + }, + { + "EventCode": "44146", + "EventName": "PM_MRK_STCX_CORE_CYC", + "BriefDescription": "Cycles spent in the core portion of a marked Stcx instruction. It starts counting when the instruction is decoded and stops counting when it drains into the L2." + }, + { + "EventCode": "44054", + "EventName": "PM_VECTOR_LD_CMPL", + "BriefDescription": "Vector load instructions completed." + }, + { + "EventCode": "45054", + "EventName": "PM_FMA_CMPL", + "BriefDescription": "Two floating point instructions completed (FMA class of instructions: fmadd, fnmadd, fmsub, fnmsub). Scalar instructions only." + }, + { + "EventCode": "45056", + "EventName": "PM_SCALAR_FLOP_CMPL", + "BriefDescription": "Scalar floating point instructions completed." + }, + { + "EventCode": "4505C", + "EventName": "PM_MATH_FLOP_CMPL", + "BriefDescription": "Math floating point instructions completed." + }, + { + "EventCode": "4D05E", + "EventName": "PM_BR_CMPL", + "BriefDescription": "A branch completed. All branches are included." + }, + { + "EventCode": "4E15E", + "EventName": "PM_MRK_INST_FLUSHED", + "BriefDescription": "The marked instruction was flushed." + }, + { + "EventCode": "401E6", + "EventName": "PM_MRK_INST_FROM_L3MISS", + "BriefDescription": "The processor's instruction cache was reloaded from a source other than the local core's L1, L2, or L3 due to a demand miss for a marked instruction." + }, + { + "EventCode": "401E8", + "EventName": "PM_MRK_DATA_FROM_L2MISS", + "BriefDescription": "The processor's data cache was reloaded from a source other than the local core's L1 or L2 due to a demand miss for a marked load." + }, + { + "EventCode": "400F0", + "EventName": "PM_LD_DEMAND_MISS_L1_FIN", + "BriefDescription": "Load Missed L1, counted at finish time." + }, + { + "EventCode": "400FA", + "EventName": "PM_RUN_INST_CMPL", + "BriefDescription": "Completed PowerPC instructions gated by the run latch." + } +] diff --git a/tools/perf/pmu-events/arch/powerpc/power10/pipeline.json b/tools/perf/pmu-events/arch/powerpc/power10/pipeline.json new file mode 100644 index 000000000000..b61b5cc157ee --- /dev/null +++ b/tools/perf/pmu-events/arch/powerpc/power10/pipeline.json @@ -0,0 +1,297 @@ +[ + { + "EventCode": "100FE", + "EventName": "PM_INST_CMPL", + "BriefDescription": "PowerPC instructions completed." + }, + { + "EventCode": "10006", + "EventName": "PM_DISP_STALL_HELD_OTHER_CYC", + "BriefDescription": "Cycles in which the NTC instruction is held at dispatch for any other reason." + }, + { + "EventCode": "1000C", + "EventName": "PM_LSU_LD0_FIN", + "BriefDescription": "LSU Finished an internal operation in LD0 port." + }, + { + "EventCode": "1000E", + "EventName": "PM_MMA_ISSUED", + "BriefDescription": "MMA instructions issued." + }, + { + "EventCode": "10012", + "EventName": "PM_LSU_ST0_FIN", + "BriefDescription": "LSU Finished an internal operation in ST0 port." + }, + { + "EventCode": "10014", + "EventName": "PM_LSU_ST4_FIN", + "BriefDescription": "LSU Finished an internal operation in ST4 port." + }, + { + "EventCode": "10018", + "EventName": "PM_IC_DEMAND_CYC", + "BriefDescription": "Cycles in which an instruction reload is pending to satisfy a demand miss." + }, + { + "EventCode": "10022", + "EventName": "PM_PMC2_SAVED", + "BriefDescription": "The conditions for the speculative event selected for PMC2 are met and PMC2 is charged." + }, + { + "EventCode": "10024", + "EventName": "PM_PMC5_OVERFLOW", + "BriefDescription": "The event selected for PMC5 caused the event counter to overflow." + }, + { + "EventCode": "10058", + "EventName": "PM_EXEC_STALL_FIN_AT_DISP", + "BriefDescription": "Cycles in which the oldest instruction in the pipeline finished at dispatch and did not require execution in the LSU, BRU or VSU." + }, + { + "EventCode": "1005A", + "EventName": "PM_FLUSH_MPRED", + "BriefDescription": "A flush occurred due to a mispredicted branch. Includes target and direction." + }, + { + "EventCode": "1C05A", + "EventName": "PM_DERAT_MISS_2M", + "BriefDescription": "Data ERAT Miss (Data TLB Access) page size 2M. Implies radix translation. When MMCR1[16]=0 this event counts only DERAT reloads for demand misses. When MMCR1[16]=1 this event includes demand misses and prefetches." + }, + { + "EventCode": "10064", + "EventName": "PM_DISP_STALL_IC_L2", + "BriefDescription": "Cycles when dispatch was stalled while the instruction was fetched from the local L2." + }, + { + "EventCode": "10068", + "EventName": "PM_BR_FIN", + "BriefDescription": "A branch instruction finished. Includes predicted/mispredicted/unconditional." + }, + { + "EventCode": "1006A", + "EventName": "PM_FX_LSU_FIN", + "BriefDescription": "Simple fixed point instruction issued to the store unit. Measured at finish time." + }, + { + "EventCode": "1006C", + "EventName": "PM_RUN_CYC_ST_MODE", + "BriefDescription": "Cycles when the run latch is set and the core is in ST mode." + }, + { + "EventCode": "20004", + "EventName": "PM_ISSUE_STALL", + "BriefDescription": "Cycles in which the oldest instruction in the pipeline was dispatched but not issued yet." + }, + { + "EventCode": "2000A", + "EventName": "PM_HYPERVISOR_CYC", + "BriefDescription": "Cycles when the thread is in Hypervisor state. MSR[S HV PR]=010." + }, + { + "EventCode": "2000E", + "EventName": "PM_LSU_LD1_FIN", + "BriefDescription": "LSU Finished an internal operation in LD1 port." + }, + { + "EventCode": "2C014", + "EventName": "PM_CMPL_STALL_SPECIAL", + "BriefDescription": "Cycles in which the oldest instruction in the pipeline required special handling before completing." + }, + { + "EventCode": "2C018", + "EventName": "PM_EXEC_STALL_DMISS_L3MISS", + "BriefDescription": "Cycles in which the oldest instruction in the pipeline was waiting for a load miss to resolve from a source beyond the local L2 or local L3." + }, + { + "EventCode": "2D010", + "EventName": "PM_LSU_ST1_FIN", + "BriefDescription": "LSU Finished an internal operation in ST1 port." + }, + { + "EventCode": "2D012", + "EventName": "PM_VSU1_ISSUE", + "BriefDescription": "VSU instructions issued to VSU pipe 1." + }, + { + "EventCode": "2D018", + "EventName": "PM_EXEC_STALL_VSU", + "BriefDescription": "Cycles in which the oldest instruction in the pipeline was executing in the VSU (includes FXU, VSU, CRU)." + }, + { + "EventCode": "2E01E", + "EventName": "PM_EXEC_STALL_NTC_FLUSH", + "BriefDescription": "Cycles in which the oldest instruction in the pipeline was executing in any unit before it was flushed. Note that if the flush of the oldest instruction happens after finish, the cycles from dispatch to issue will be included in PM_DISP_STALL and the cycles from issue to finish will be included in PM_EXEC_STALL and its corresponding children." + }, + { + "EventCode": "2013C", + "EventName": "PM_MRK_FX_LSU_FIN", + "BriefDescription": "The marked instruction was simple fixed point that was issued to the store unit. Measured at finish time." + }, + { + "EventCode": "2405A", + "EventName": "PM_NTC_FIN", + "BriefDescription": "Cycles in which the oldest instruction in the pipeline (NTC) finishes. Note that instructions can finish out of order, therefore not all the instructions that finish have a Next-to-complete status." + }, + { + "EventCode": "201E2", + "EventName": "PM_MRK_LD_MISS_L1", + "BriefDescription": "Marked DL1 Demand Miss counted at finish time." + }, + { + "EventCode": "200F4", + "EventName": "PM_RUN_CYC", + "BriefDescription": "Processor cycles gated by the run latch." + }, + { + "EventCode": "30004", + "EventName": "PM_DISP_STALL_FLUSH", + "BriefDescription": "Cycles when dispatch was stalled because of a flush that happened to an instruction(s) that was not yet NTC. PM_EXEC_STALL_NTC_FLUSH only includes instructions that were flushed after becoming NTC." + }, + { + "EventCode": "30008", + "EventName": "PM_EXEC_STALL", + "BriefDescription": "Cycles in which the oldest instruction in the pipeline was waiting to finish in one of the execution units (BRU, LSU, VSU). Only cycles between issue and finish are counted in this category." + }, + { + "EventCode": "3001A", + "EventName": "PM_LSU_ST2_FIN", + "BriefDescription": "LSU Finished an internal operation in ST2 port." + }, + { + "EventCode": "30020", + "EventName": "PM_PMC2_REWIND", + "BriefDescription": "The speculative event selected for PMC2 rewinds and the counter for PMC2 is not charged." + }, + { + "EventCode": "30022", + "EventName": "PM_PMC4_SAVED", + "BriefDescription": "The conditions for the speculative event selected for PMC4 are met and PMC4 is charged." + }, + { + "EventCode": "30024", + "EventName": "PM_PMC6_OVERFLOW", + "BriefDescription": "The event selected for PMC6 caused the event counter to overflow." + }, + { + "EventCode": "30028", + "EventName": "PM_CMPL_STALL_MEM_ECC", + "BriefDescription": "Cycles in which the oldest instruction in the pipeline was waiting for the non-speculative finish of either a stcx waiting for its result or a load waiting for non-critical sectors of data and ECC." + }, + { + "EventCode": "30036", + "EventName": "PM_EXEC_STALL_SIMPLE_FX", + "BriefDescription": "Cycles in which the oldest instruction in the pipeline was a simple fixed point instruction executing in the Load Store Unit." + }, + { + "EventCode": "3003A", + "EventName": "PM_CMPL_STALL_EXCEPTION", + "BriefDescription": "Cycles in which the oldest instruction in the pipeline was not allowed to complete because it was interrupted by ANY exception, which has to be serviced before the instruction can complete." + }, + { + "EventCode": "3F044", + "EventName": "PM_VSU2_ISSUE", + "BriefDescription": "VSU instructions issued to VSU pipe 2." + }, + { + "EventCode": "30058", + "EventName": "PM_TLBIE_FIN", + "BriefDescription": "TLBIE instructions finished in the LSU. Two TLBIEs can finish each cycle. All will be counted." + }, + { + "EventCode": "3D058", + "EventName": "PM_SCALAR_FSQRT_FDIV_ISSUE", + "BriefDescription": "Scalar versions of four floating point operations: fdiv,fsqrt (xvdivdp, xvdivsp, xvsqrtdp, xvsqrtsp)." + }, + { + "EventCode": "30066", + "EventName": "PM_LSU_FIN", + "BriefDescription": "LSU Finished an internal operation (up to 4 per cycle)." + }, + { + "EventCode": "40004", + "EventName": "PM_FXU_ISSUE", + "BriefDescription": "A fixed point instruction was issued to the VSU." + }, + { + "EventCode": "40008", + "EventName": "PM_NTC_ALL_FIN", + "BriefDescription": "Cycles in which both instructions in the ICT entry pair show as finished. These are the cycles between finish and completion for the oldest pair of instructions in the pipeline." + }, + { + "EventCode": "40010", + "EventName": "PM_PMC3_OVERFLOW", + "BriefDescription": "The event selected for PMC3 caused the event counter to overflow." + }, + { + "EventCode": "4C012", + "EventName": "PM_EXEC_STALL_DERAT_ONLY_MISS", + "BriefDescription": "Cycles in which the oldest instruction in the pipeline suffered an ERAT miss and waited for it resolve." + }, + { + "EventCode": "4C018", + "EventName": "PM_CMPL_STALL", + "BriefDescription": "Cycles in which the oldest instruction in the pipeline cannot complete because the thread was blocked for any reason." + }, + { + "EventCode": "4C01E", + "EventName": "PM_LSU_ST3_FIN", + "BriefDescription": "LSU Finished an internal operation in ST3 port." + }, + { + "EventCode": "4D018", + "EventName": "PM_EXEC_STALL_BRU", + "BriefDescription": "Cycles in which the oldest instruction in the pipeline was executing in the Branch unit." + }, + { + "EventCode": "4D01A", + "EventName": "PM_CMPL_STALL_HWSYNC", + "BriefDescription": "Cycles in which the oldest instruction in the pipeline was a hwsync waiting for response from L2 before completing." + }, + { + "EventCode": "4D01C", + "EventName": "PM_EXEC_STALL_TLBIEL", + "BriefDescription": "Cycles in which the oldest instruction in the pipeline was a TLBIEL instruction executing in the Load Store Unit. TLBIEL instructions have lower overhead than TLBIE instructions because they don't get set to the nest." + }, + { + "EventCode": "4E012", + "EventName": "PM_EXEC_STALL_UNKNOWN", + "BriefDescription": "Cycles in which the oldest instruction in the pipeline completed without an ntf_type pulse. The ntf_pulse was missed by the ISU because the NTF finishes and completions came too close together." + }, + { + "EventCode": "4D020", + "EventName": "PM_VSU3_ISSUE", + "BriefDescription": "VSU instruction was issued to VSU pipe 3." + }, + { + "EventCode": "40132", + "EventName": "PM_MRK_LSU_FIN", + "BriefDescription": "LSU marked instruction finish." + }, + { + "EventCode": "45058", + "EventName": "PM_IC_MISS_CMPL", + "BriefDescription": "Non-speculative icache miss, counted at completion." + }, + { + "EventCode": "4D050", + "EventName": "PM_VSU_NON_FLOP_CMPL", + "BriefDescription": "Non-floating point VSU instructions completed." + }, + { + "EventCode": "4D052", + "EventName": "PM_2FLOP_CMPL", + "BriefDescription": "Double Precision vector version of fmul, fsub, fcmp, fsel, fabs, fnabs, fres, fsqrte, fneg completed." + }, + { + "EventCode": "400F2", + "EventName": "PM_1PLUS_PPC_DISP", + "BriefDescription": "Cycles at least one Instr Dispatched." + }, + { + "EventCode": "400F8", + "EventName": "PM_FLUSH", + "BriefDescription": "Flush (any type)." + } +] diff --git a/tools/perf/pmu-events/arch/powerpc/power10/pmc.json b/tools/perf/pmu-events/arch/powerpc/power10/pmc.json new file mode 100644 index 000000000000..ea122a91ceb0 --- /dev/null +++ b/tools/perf/pmu-events/arch/powerpc/power10/pmc.json @@ -0,0 +1,22 @@ +[ + { + "EventCode": "301E8", + "EventName": "PM_THRESH_EXC_64", + "BriefDescription": "Threshold counter exceeded a value of 64." + }, + { + "EventCode": "45050", + "EventName": "PM_1FLOP_CMPL", + "BriefDescription": "One floating point instruction completed (fadd, fmul, fsub, fcmp, fsel, fabs, fnabs, fres, fsqrte, fneg)." + }, + { + "EventCode": "45052", + "EventName": "PM_4FLOP_CMPL", + "BriefDescription": "Four floating point instructions completed (fadd, fmul, fsub, fcmp, fsel, fabs, fnabs, fres, fsqrte, fneg)." + }, + { + "EventCode": "4D054", + "EventName": "PM_8FLOP_CMPL", + "BriefDescription": "Four Double Precision vector instructions completed." + } +] diff --git a/tools/perf/pmu-events/arch/powerpc/power10/translation.json b/tools/perf/pmu-events/arch/powerpc/power10/translation.json new file mode 100644 index 000000000000..5a714e3dd71a --- /dev/null +++ b/tools/perf/pmu-events/arch/powerpc/power10/translation.json @@ -0,0 +1,57 @@ +[ + { + "EventCode": "1F15E", + "EventName": "PM_MRK_START_PROBE_NOP_CMPL", + "BriefDescription": "Marked Start probe nop (AND R0,R0,R0) completed." + }, + { + "EventCode": "20016", + "EventName": "PM_ST_FIN", + "BriefDescription": "Store finish count. Includes speculative activity." + }, + { + "EventCode": "20018", + "EventName": "PM_ST_FWD", + "BriefDescription": "Store forwards that finished." + }, + { + "EventCode": "2011C", + "EventName": "PM_MRK_NTF_CYC", + "BriefDescription": "Cycles during which the marked instruction is the oldest in the pipeline (NTF or NTC)." + }, + { + "EventCode": "2E01C", + "EventName": "PM_EXEC_STALL_TLBIE", + "BriefDescription": "Cycles in which the oldest instruction in the pipeline was a TLBIE instruction executing in the Load Store Unit." + }, + { + "EventCode": "201E6", + "EventName": "PM_THRESH_EXC_32", + "BriefDescription": "Threshold counter exceeded a value of 32." + }, + { + "EventCode": "200F0", + "EventName": "PM_ST_CMPL", + "BriefDescription": "Stores completed from S2Q (2nd-level store queue). This event includes regular stores, stcx and cache inhibited stores. The following operations are excluded (pteupdate, snoop tlbie complete, store atomics, miso, load atomic payloads, tlbie, tlbsync, slbieg, isync, msgsnd, slbiag, cpabort, copy, tcheck, tend, stsync, dcbst, icbi, dcbf, hwsync, lwsync, ptesync, eieio, msgsync)." + }, + { + "EventCode": "200FE", + "EventName": "PM_DATA_FROM_L2MISS", + "BriefDescription": "The processor's data cache was reloaded from a source other than the local core's L1 or L2 due to a demand miss." + }, + { + "EventCode": "30010", + "EventName": "PM_PMC2_OVERFLOW", + "BriefDescription": "The event selected for PMC2 caused the event counter to overflow." + }, + { + "EventCode": "4D010", + "EventName": "PM_PMC1_SAVED", + "BriefDescription": "The conditions for the speculative event selected for PMC1 are met and PMC1 is charged." + }, + { + "EventCode": "4D05C", + "EventName": "PM_DPP_FLOP_CMPL", + "BriefDescription": "Double-Precision or Quad-Precision instructions completed." + } +] -- cgit v1.2.3 From f07952b17969777196512368a216baae1ad45ea6 Mon Sep 17 00:00:00 2001 From: Alexander Antonov Date: Mon, 19 Apr 2021 12:41:44 +0300 Subject: perf stat: Basic support for iostat in perf Add basic flow for a new iostat mode in perf. Mode is intended to provide four I/O performance metrics per each PCIe root port: Inbound Read, Inbound Write, Outbound Read, Outbound Write. The actual code to compute the metrics and attribute it to root port is in follow-on patches. Signed-off-by: Alexander Antonov Acked-by: Namhyung Kim Cc: Alexander Shishkin Cc: Alexey V Bayduraev Cc: Andi Kleen Cc: Ian Rogers Cc: Ingo Molnar Cc: Jiri Olsa Cc: Mark Rutland Cc: Peter Zijlstra Link: https://lore.kernel.org/r/20210419094147.15909-2-alexander.antonov@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-stat.c | 21 ++++++++++++++++- tools/perf/util/Build | 1 + tools/perf/util/iostat.c | 53 ++++++++++++++++++++++++++++++++++++++++++ tools/perf/util/iostat.h | 47 +++++++++++++++++++++++++++++++++++++ tools/perf/util/stat-display.c | 40 +++++++++++++++++++++++-------- tools/perf/util/stat-shadow.c | 5 +++- tools/perf/util/stat.h | 1 + 7 files changed, 156 insertions(+), 12 deletions(-) create mode 100644 tools/perf/util/iostat.c create mode 100644 tools/perf/util/iostat.h diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 2a2c15cac80a..ba5b31aab86b 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -68,6 +68,7 @@ #include "util/affinity.h" #include "util/pfm.h" #include "util/bpf_counter.h" +#include "util/iostat.h" #include "asm/bug.h" #include @@ -212,7 +213,8 @@ static struct perf_stat_config stat_config = { .walltime_nsecs_stats = &walltime_nsecs_stats, .big_num = true, .ctl_fd = -1, - .ctl_fd_ack = -1 + .ctl_fd_ack = -1, + .iostat_run = false, }; static bool cpus_map_matched(struct evsel *a, struct evsel *b) @@ -1268,6 +1270,9 @@ static struct option stat_options[] = { "\t\t\t Optionally send control command completion ('ack\\n') to ack-fd descriptor.\n" "\t\t\t Alternatively, ctl-fifo / ack-fifo will be opened and used as ctl-fd / ack-fd.", parse_control_option), + OPT_CALLBACK_OPTARG(0, "iostat", &evsel_list, &stat_config, "default", + "measure I/O performance metrics provided by arch/platform", + iostat_parse), OPT_END() }; @@ -2341,6 +2346,17 @@ int cmd_stat(int argc, const char **argv) goto out; } + if (stat_config.iostat_run) { + status = iostat_prepare(evsel_list, &stat_config); + if (status) + goto out; + if (iostat_mode == IOSTAT_LIST) { + iostat_list(evsel_list, &stat_config); + goto out; + } else if (verbose) + iostat_list(evsel_list, &stat_config); + } + if (add_default_attributes()) goto out; @@ -2516,6 +2532,9 @@ int cmd_stat(int argc, const char **argv) perf_stat__exit_aggr_mode(); evlist__free_stats(evsel_list); out: + if (stat_config.iostat_run) + iostat_release(evsel_list); + zfree(&stat_config.walltime_run); if (smi_cost && smi_reset) diff --git a/tools/perf/util/Build b/tools/perf/util/Build index e3e12f9d4733..7dd815712d60 100644 --- a/tools/perf/util/Build +++ b/tools/perf/util/Build @@ -102,6 +102,7 @@ perf-y += rwsem.o perf-y += thread-stack.o perf-y += spark.o perf-y += topdown.o +perf-y += iostat.o perf-y += stream.o perf-$(CONFIG_AUXTRACE) += auxtrace.o perf-$(CONFIG_AUXTRACE) += intel-pt-decoder/ diff --git a/tools/perf/util/iostat.c b/tools/perf/util/iostat.c new file mode 100644 index 000000000000..57dd49da28fe --- /dev/null +++ b/tools/perf/util/iostat.c @@ -0,0 +1,53 @@ +// SPDX-License-Identifier: GPL-2.0 +#include "util/iostat.h" +#include "util/debug.h" + +enum iostat_mode_t iostat_mode = IOSTAT_NONE; + +__weak int iostat_prepare(struct evlist *evlist __maybe_unused, + struct perf_stat_config *config __maybe_unused) +{ + return -1; +} + +__weak int iostat_parse(const struct option *opt __maybe_unused, + const char *str __maybe_unused, + int unset __maybe_unused) +{ + pr_err("iostat mode is not supported on current platform\n"); + return -1; +} + +__weak void iostat_list(struct evlist *evlist __maybe_unused, + struct perf_stat_config *config __maybe_unused) +{ +} + +__weak void iostat_release(struct evlist *evlist __maybe_unused) +{ +} + +__weak void iostat_print_header_prefix(struct perf_stat_config *config __maybe_unused) +{ +} + +__weak void iostat_print_metric(struct perf_stat_config *config __maybe_unused, + struct evsel *evsel __maybe_unused, + struct perf_stat_output_ctx *out __maybe_unused) +{ +} + +__weak void iostat_prefix(struct evlist *evlist __maybe_unused, + struct perf_stat_config *config __maybe_unused, + char *prefix __maybe_unused, + struct timespec *ts __maybe_unused) +{ +} + +__weak void iostat_print_counters(struct evlist *evlist __maybe_unused, + struct perf_stat_config *config __maybe_unused, + struct timespec *ts __maybe_unused, + char *prefix __maybe_unused, + iostat_print_counter_t print_cnt_cb __maybe_unused) +{ +} diff --git a/tools/perf/util/iostat.h b/tools/perf/util/iostat.h new file mode 100644 index 000000000000..23c1c46a331a --- /dev/null +++ b/tools/perf/util/iostat.h @@ -0,0 +1,47 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * perf iostat + * + * Copyright (C) 2020, Intel Corporation + * + * Authors: Alexander Antonov + */ + +#ifndef _IOSTAT_H +#define _IOSTAT_H + +#include +#include "util/stat.h" +#include "util/parse-events.h" +#include "util/evlist.h" + +struct option; +struct perf_stat_config; +struct evlist; +struct timespec; + +enum iostat_mode_t { + IOSTAT_NONE = -1, + IOSTAT_RUN = 0, + IOSTAT_LIST = 1 +}; + +extern enum iostat_mode_t iostat_mode; + +typedef void (*iostat_print_counter_t)(struct perf_stat_config *, struct evsel *, char *); + +int iostat_prepare(struct evlist *evlist, struct perf_stat_config *config); +int iostat_parse(const struct option *opt, const char *str, + int unset __maybe_unused); +void iostat_list(struct evlist *evlist, struct perf_stat_config *config); +void iostat_release(struct evlist *evlist); +void iostat_prefix(struct evlist *evlist, struct perf_stat_config *config, + char *prefix, struct timespec *ts); +void iostat_print_header_prefix(struct perf_stat_config *config); +void iostat_print_metric(struct perf_stat_config *config, struct evsel *evsel, + struct perf_stat_output_ctx *out); +void iostat_print_counters(struct evlist *evlist, + struct perf_stat_config *config, struct timespec *ts, + char *prefix, iostat_print_counter_t print_cnt_cb); + +#endif /* _IOSTAT_H */ diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c index d3137bc17065..98664caef6af 100644 --- a/tools/perf/util/stat-display.c +++ b/tools/perf/util/stat-display.c @@ -17,6 +17,7 @@ #include "cgroup.h" #include #include "util.h" +#include "iostat.h" #define CNTR_NOT_SUPPORTED "" #define CNTR_NOT_COUNTED "" @@ -310,6 +311,11 @@ static void print_metric_header(struct perf_stat_config *config, struct outstate *os = ctx; char tbuf[1024]; + /* In case of iostat, print metric header for first root port only */ + if (config->iostat_run && + os->evsel->priv != os->evsel->evlist->selected->priv) + return; + if (!valid_only_metric(unit)) return; unit = fixunit(tbuf, os->evsel, unit); @@ -958,8 +964,11 @@ static void print_metric_headers(struct perf_stat_config *config, if (config->csv_output) { if (config->interval) fputs("time,", config->output); - fputs(aggr_header_csv[config->aggr_mode], config->output); + if (!config->iostat_run) + fputs(aggr_header_csv[config->aggr_mode], config->output); } + if (config->iostat_run) + iostat_print_header_prefix(config); /* Print metrics headers only */ evlist__for_each_entry(evlist, counter) { @@ -989,7 +998,8 @@ static void print_interval(struct perf_stat_config *config, if (config->interval_clear) puts(CONSOLE_CLEAR); - sprintf(prefix, "%6lu.%09lu%s", (unsigned long) ts->tv_sec, ts->tv_nsec, config->csv_sep); + if (!config->iostat_run) + sprintf(prefix, "%6lu.%09lu%s", (unsigned long) ts->tv_sec, ts->tv_nsec, config->csv_sep); if ((num_print_interval == 0 && !config->csv_output) || config->interval_clear) { switch (config->aggr_mode) { @@ -1025,9 +1035,11 @@ static void print_interval(struct perf_stat_config *config, break; case AGGR_GLOBAL: default: - fprintf(output, "# time"); - if (!metric_only) - fprintf(output, " counts %*s events\n", unit_width, "unit"); + if (!config->iostat_run) { + fprintf(output, "# time"); + if (!metric_only) + fprintf(output, " counts %*s events\n", unit_width, "unit"); + } case AGGR_UNSET: break; } @@ -1220,6 +1232,9 @@ void evlist__print_counters(struct evlist *evlist, struct perf_stat_config *conf struct evsel *counter; char buf[64], *prefix = NULL; + if (config->iostat_run) + evlist->selected = evlist__first(evlist); + if (interval) print_interval(config, evlist, prefix = buf, ts); else @@ -1232,7 +1247,7 @@ void evlist__print_counters(struct evlist *evlist, struct perf_stat_config *conf print_metric_headers(config, evlist, prefix, false); if (num_print_iv++ == 25) num_print_iv = 0; - if (config->aggr_mode == AGGR_GLOBAL && prefix) + if (config->aggr_mode == AGGR_GLOBAL && prefix && !config->iostat_run) fprintf(config->output, "%s", prefix); } @@ -1249,11 +1264,16 @@ void evlist__print_counters(struct evlist *evlist, struct perf_stat_config *conf } break; case AGGR_GLOBAL: - evlist__for_each_entry(evlist, counter) { - print_counter_aggr(config, counter, prefix); + if (config->iostat_run) + iostat_print_counters(evlist, config, ts, prefix = buf, + print_counter_aggr); + else { + evlist__for_each_entry(evlist, counter) { + print_counter_aggr(config, counter, prefix); + } + if (metric_only) + fputc('\n', config->output); } - if (metric_only) - fputc('\n', config->output); break; case AGGR_NONE: if (metric_only) diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c index 3f800e71126f..39967a45f55b 100644 --- a/tools/perf/util/stat-shadow.c +++ b/tools/perf/util/stat-shadow.c @@ -11,6 +11,7 @@ #include "cgroup.h" #include "units.h" #include +#include "iostat.h" /* * AGGR_GLOBAL: Use CPU 0 @@ -962,7 +963,9 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config, struct metric_event *me; int num = 1; - if (evsel__match(evsel, HARDWARE, HW_INSTRUCTIONS)) { + if (config->iostat_run) { + iostat_print_metric(config, evsel, out); + } else if (evsel__match(evsel, HARDWARE, HW_INSTRUCTIONS)) { total = runtime_stat_avg(st, STAT_CYCLES, cpu, &rsd); if (total) { diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h index 48e6a06233fa..32c8527de347 100644 --- a/tools/perf/util/stat.h +++ b/tools/perf/util/stat.h @@ -133,6 +133,7 @@ struct perf_stat_config { bool metric_no_merge; bool stop_read_counter; bool quiet; + bool iostat_run; FILE *output; unsigned int interval; unsigned int timeout; -- cgit v1.2.3 From 19776d3cede733dc9be79d880339acb9b2f456d6 Mon Sep 17 00:00:00 2001 From: Alexander Antonov Date: Mon, 19 Apr 2021 12:41:45 +0300 Subject: perf stat: Helper functions for PCIe root ports list in iostat mode Introduce helper functions to control PCIe root ports list. These helpers will be used in the follow-up patch. Signed-off-by: Alexander Antonov Acked-by: Namhyung Kim Cc: Alexander Shishkin Cc: Alexey V Bayduraev Cc: Andi Kleen Cc: Arnaldo Carvalho de Melo Cc: Ian Rogers Cc: Ingo Molnar Cc: Jiri Olsa Cc: Mark Rutland Cc: Peter Zijlstra Link: https://lore.kernel.org/r/20210419094147.15909-3-alexander.antonov@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/x86/util/iostat.c | 110 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 110 insertions(+) create mode 100644 tools/perf/arch/x86/util/iostat.c diff --git a/tools/perf/arch/x86/util/iostat.c b/tools/perf/arch/x86/util/iostat.c new file mode 100644 index 000000000000..c4471f8efa5e --- /dev/null +++ b/tools/perf/arch/x86/util/iostat.c @@ -0,0 +1,110 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * perf iostat + * + * Copyright (C) 2020, Intel Corporation + * + * Authors: Alexander Antonov + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "util/cpumap.h" +#include "util/debug.h" +#include "util/iostat.h" +#include "util/counts.h" +#include "path.h" + +struct iio_root_port { + u32 domain; + u8 bus; + u8 die; + u8 pmu_idx; + int idx; +}; + +struct iio_root_ports_list { + struct iio_root_port **rps; + int nr_entries; +}; + +static void iio_root_port_show(FILE *output, + const struct iio_root_port * const rp) +{ + if (output && rp) + fprintf(output, "S%d-uncore_iio_%d<%04x:%02x>\n", + rp->die, rp->pmu_idx, rp->domain, rp->bus); +} + +static struct iio_root_port *iio_root_port_new(u32 domain, u8 bus, + u8 die, u8 pmu_idx) +{ + struct iio_root_port *p = calloc(1, sizeof(*p)); + + if (p) { + p->domain = domain; + p->bus = bus; + p->die = die; + p->pmu_idx = pmu_idx; + } + return p; +} + +static void iio_root_ports_list_free(struct iio_root_ports_list *list) +{ + int idx; + + if (list) { + for (idx = 0; idx < list->nr_entries; idx++) + free(list->rps[idx]); + free(list->rps); + free(list); + } +} + +static struct iio_root_port *iio_root_port_find_by_notation( + const struct iio_root_ports_list * const list, u32 domain, u8 bus) +{ + int idx; + struct iio_root_port *rp; + + if (list) { + for (idx = 0; idx < list->nr_entries; idx++) { + rp = list->rps[idx]; + if (rp && rp->domain == domain && rp->bus == bus) + return rp; + } + } + return NULL; +} + +static int iio_root_ports_list_insert(struct iio_root_ports_list *list, + struct iio_root_port * const rp) +{ + struct iio_root_port **tmp_buf; + + if (list && rp) { + rp->idx = list->nr_entries++; + tmp_buf = realloc(list->rps, + list->nr_entries * sizeof(*list->rps)); + if (!tmp_buf) { + pr_err("Failed to realloc memory\n"); + return -ENOMEM; + } + tmp_buf[rp->idx] = rp; + list->rps = tmp_buf; + } + return 0; +} -- cgit v1.2.3 From f9ed693e8bc0e7de9eb766a3c7178590e8bb6cd5 Mon Sep 17 00:00:00 2001 From: Alexander Antonov Date: Mon, 19 Apr 2021 12:41:46 +0300 Subject: perf stat: Enable iostat mode for x86 platforms This functionality is based on recently introduced sysfs attributes for Intel® Xeon® Scalable processor family (code name Skylake-SP): Commit bb42b3d39781d7fc ("perf/x86/intel/uncore: Expose an Uncore unit to IIO PMON mapping") Mode is intended to provide four I/O performance metrics in MB per each PCIe root port: - Inbound Read: I/O devices below root port read from the host memory - Inbound Write: I/O devices below root port write to the host memory - Outbound Read: CPU reads from I/O devices below root port - Outbound Write: CPU writes to I/O devices below root port Each metric requiries only one uncore event which increments at every 4B transfer in corresponding direction. The formulas to compute metrics are generic: #EventCount * 4B / (1024 * 1024) Acked-by: Namhyung Kim Signed-off-by: Alexander Antonov Cc: Alexander Shishkin Cc: Alexey V Bayduraev Cc: Andi Kleen Cc: Arnaldo Carvalho de Melo Cc: Ian Rogers Cc: Ingo Molnar Cc: Jiri Olsa Cc: Mark Rutland Cc: Peter Zijlstra Link: https://lore.kernel.org/r/20210419094147.15909-4-alexander.antonov@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-iostat.txt | 88 ++++++++ tools/perf/Makefile.perf | 5 +- tools/perf/arch/x86/util/Build | 1 + tools/perf/arch/x86/util/iostat.c | 360 +++++++++++++++++++++++++++++++ tools/perf/command-list.txt | 1 + tools/perf/perf-iostat.sh | 12 ++ 6 files changed, 466 insertions(+), 1 deletion(-) create mode 100644 tools/perf/Documentation/perf-iostat.txt create mode 100644 tools/perf/perf-iostat.sh diff --git a/tools/perf/Documentation/perf-iostat.txt b/tools/perf/Documentation/perf-iostat.txt new file mode 100644 index 000000000000..165176944031 --- /dev/null +++ b/tools/perf/Documentation/perf-iostat.txt @@ -0,0 +1,88 @@ +perf-iostat(1) +=============== + +NAME +---- +perf-iostat - Show I/O performance metrics + +SYNOPSIS +-------- +[verse] +'perf iostat' list +'perf iostat' -- [] + +DESCRIPTION +----------- +Mode is intended to provide four I/O performance metrics per each PCIe root port: + +- Inbound Read - I/O devices below root port read from the host memory, in MB + +- Inbound Write - I/O devices below root port write to the host memory, in MB + +- Outbound Read - CPU reads from I/O devices below root port, in MB + +- Outbound Write - CPU writes to I/O devices below root port, in MB + +OPTIONS +------- +...:: + Any command you can specify in a shell. + +list:: + List all PCIe root ports. + +:: + Select the root ports for monitoring. Comma-separated list is supported. + +EXAMPLES +-------- + +1. List all PCIe root ports (example for 2-S platform): + + $ perf iostat list + S0-uncore_iio_0<0000:00> + S1-uncore_iio_0<0000:80> + S0-uncore_iio_1<0000:17> + S1-uncore_iio_1<0000:85> + S0-uncore_iio_2<0000:3a> + S1-uncore_iio_2<0000:ae> + S0-uncore_iio_3<0000:5d> + S1-uncore_iio_3<0000:d7> + +2. Collect metrics for all PCIe root ports: + + $ perf iostat -- dd if=/dev/zero of=/dev/nvme0n1 bs=1M oflag=direct + 357708+0 records in + 357707+0 records out + 375083606016 bytes (375 GB, 349 GiB) copied, 215.974 s, 1.7 GB/s + + Performance counter stats for 'system wide': + + port Inbound Read(MB) Inbound Write(MB) Outbound Read(MB) Outbound Write(MB) + 0000:00 1 0 2 3 + 0000:80 0 0 0 0 + 0000:17 352552 43 0 21 + 0000:85 0 0 0 0 + 0000:3a 3 0 0 0 + 0000:ae 0 0 0 0 + 0000:5d 0 0 0 0 + 0000:d7 0 0 0 0 + +3. Collect metrics for comma-separated list of PCIe root ports: + + $ perf iostat 0000:17,0:3a -- dd if=/dev/zero of=/dev/nvme0n1 bs=1M oflag=direct + 357708+0 records in + 357707+0 records out + 375083606016 bytes (375 GB, 349 GiB) copied, 197.08 s, 1.9 GB/s + + Performance counter stats for 'system wide': + + port Inbound Read(MB) Inbound Write(MB) Outbound Read(MB) Outbound Write(MB) + 0000:17 358559 44 0 22 + 0000:3a 3 2 0 0 + + 197.081983474 seconds time elapsed + +SEE ALSO +-------- +linkperf:perf-stat[1] \ No newline at end of file diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index 090fb9d62665..6240fbb1646e 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -283,6 +283,7 @@ SCRIPT_SH = SCRIPT_SH += perf-archive.sh SCRIPT_SH += perf-with-kcore.sh +SCRIPT_SH += perf-iostat.sh grep-libs = $(filter -l%,$(1)) strip-libs = $(filter-out -l%,$(1)) @@ -948,6 +949,8 @@ endif $(INSTALL) $(OUTPUT)perf-archive -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)' $(call QUIET_INSTALL, perf-with-kcore) \ $(INSTALL) $(OUTPUT)perf-with-kcore -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)' + $(call QUIET_INSTALL, perf-iostat) \ + $(INSTALL) $(OUTPUT)perf-iostat -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)' ifndef NO_LIBAUDIT $(call QUIET_INSTALL, strace/groups) \ $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(STRACE_GROUPS_INSTDIR_SQ)'; \ @@ -1042,7 +1045,7 @@ bpf-skel-clean: $(call QUIET_CLEAN, bpf-skel) $(RM) -r $(SKEL_TMP_OUT) $(SKELETONS) clean:: $(LIBTRACEEVENT)-clean $(LIBAPI)-clean $(LIBBPF)-clean $(LIBSUBCMD)-clean $(LIBPERF)-clean fixdep-clean python-clean bpf-skel-clean - $(call QUIET_CLEAN, core-objs) $(RM) $(LIBPERF_A) $(OUTPUT)perf-archive $(OUTPUT)perf-with-kcore $(LANG_BINDINGS) + $(call QUIET_CLEAN, core-objs) $(RM) $(LIBPERF_A) $(OUTPUT)perf-archive $(OUTPUT)perf-with-kcore $(OUTPUT)perf-iostat $(LANG_BINDINGS) $(Q)find $(if $(OUTPUT),$(OUTPUT),.) -name '*.o' -delete -o -name '\.*.cmd' -delete -o -name '\.*.d' -delete $(Q)$(RM) $(OUTPUT).config-detected $(call QUIET_CLEAN, core-progs) $(RM) $(ALL_PROGRAMS) perf perf-read-vdso32 perf-read-vdsox32 $(OUTPUT)pmu-events/jevents $(OUTPUT)$(LIBJVMTI).so diff --git a/tools/perf/arch/x86/util/Build b/tools/perf/arch/x86/util/Build index 0c72d418932e..dbeb04cb336e 100644 --- a/tools/perf/arch/x86/util/Build +++ b/tools/perf/arch/x86/util/Build @@ -9,6 +9,7 @@ perf-y += event.o perf-y += evlist.o perf-y += mem-events.o perf-y += evsel.o +perf-y += iostat.o perf-$(CONFIG_DWARF) += dwarf-regs.o perf-$(CONFIG_BPF_PROLOGUE) += dwarf-regs.o diff --git a/tools/perf/arch/x86/util/iostat.c b/tools/perf/arch/x86/util/iostat.c index c4471f8efa5e..d63acb782b63 100644 --- a/tools/perf/arch/x86/util/iostat.c +++ b/tools/perf/arch/x86/util/iostat.c @@ -27,6 +27,36 @@ #include "util/counts.h" #include "path.h" +#ifndef MAX_PATH +#define MAX_PATH 1024 +#endif + +#define UNCORE_IIO_PMU_PATH "devices/uncore_iio_%d" +#define SYSFS_UNCORE_PMU_PATH "%s/"UNCORE_IIO_PMU_PATH +#define PLATFORM_MAPPING_PATH UNCORE_IIO_PMU_PATH"/die%d" + +/* + * Each metric requiries one IIO event which increments at every 4B transfer + * in corresponding direction. The formulas to compute metrics are generic: + * #EventCount * 4B / (1024 * 1024) + */ +static const char * const iostat_metrics[] = { + "Inbound Read(MB)", + "Inbound Write(MB)", + "Outbound Read(MB)", + "Outbound Write(MB)", +}; + +static inline int iostat_metrics_count(void) +{ + return sizeof(iostat_metrics) / sizeof(char *); +} + +static const char *iostat_metric_by_idx(int idx) +{ + return *(iostat_metrics + idx % iostat_metrics_count()); +} + struct iio_root_port { u32 domain; u8 bus; @@ -40,6 +70,8 @@ struct iio_root_ports_list { int nr_entries; }; +static struct iio_root_ports_list *root_ports; + static void iio_root_port_show(FILE *output, const struct iio_root_port * const rp) { @@ -108,3 +140,331 @@ static int iio_root_ports_list_insert(struct iio_root_ports_list *list, } return 0; } + +static int iio_mapping(u8 pmu_idx, struct iio_root_ports_list * const list) +{ + char *buf; + char path[MAX_PATH]; + u32 domain; + u8 bus; + struct iio_root_port *rp; + size_t size; + int ret; + + for (int die = 0; die < cpu__max_node(); die++) { + scnprintf(path, MAX_PATH, PLATFORM_MAPPING_PATH, pmu_idx, die); + if (sysfs__read_str(path, &buf, &size) < 0) { + if (pmu_idx) + goto out; + pr_err("Mode iostat is not supported\n"); + return -1; + } + ret = sscanf(buf, "%04x:%02hhx", &domain, &bus); + free(buf); + if (ret != 2) { + pr_err("Invalid mapping data: iio_%d; die%d\n", + pmu_idx, die); + return -1; + } + rp = iio_root_port_new(domain, bus, die, pmu_idx); + if (!rp || iio_root_ports_list_insert(list, rp)) { + free(rp); + return -ENOMEM; + } + } +out: + return 0; +} + +static u8 iio_pmu_count(void) +{ + u8 pmu_idx = 0; + char path[MAX_PATH]; + const char *sysfs = sysfs__mountpoint(); + + if (sysfs) { + for (;; pmu_idx++) { + snprintf(path, sizeof(path), SYSFS_UNCORE_PMU_PATH, + sysfs, pmu_idx); + if (access(path, F_OK) != 0) + break; + } + } + return pmu_idx; +} + +static int iio_root_ports_scan(struct iio_root_ports_list **list) +{ + int ret = -ENOMEM; + struct iio_root_ports_list *tmp_list; + u8 pmu_count = iio_pmu_count(); + + if (!pmu_count) { + pr_err("Unsupported uncore pmu configuration\n"); + return -1; + } + + tmp_list = calloc(1, sizeof(*tmp_list)); + if (!tmp_list) + goto err; + + for (u8 pmu_idx = 0; pmu_idx < pmu_count; pmu_idx++) { + ret = iio_mapping(pmu_idx, tmp_list); + if (ret) + break; + } +err: + if (!ret) + *list = tmp_list; + else + iio_root_ports_list_free(tmp_list); + + return ret; +} + +static int iio_root_port_parse_str(u32 *domain, u8 *bus, char *str) +{ + int ret; + regex_t regex; + /* + * Expected format domain:bus: + * Valid domain range [0:ffff] + * Valid bus range [0:ff] + * Example: 0000:af, 0:3d, 01:7 + */ + regcomp(®ex, "^([a-f0-9A-F]{1,}):([a-f0-9A-F]{1,2})", REG_EXTENDED); + ret = regexec(®ex, str, 0, NULL, 0); + if (ret || sscanf(str, "%08x:%02hhx", domain, bus) != 2) + pr_warning("Unrecognized root port format: %s\n" + "Please use the following format:\n" + "\t [domain]:[bus]\n" + "\t for example: 0000:3d\n", str); + + regfree(®ex); + return ret; +} + +static int iio_root_ports_list_filter(struct iio_root_ports_list **list, + const char *filter) +{ + char *tok, *tmp, *filter_copy = NULL; + struct iio_root_port *rp; + u32 domain; + u8 bus; + int ret = -ENOMEM; + struct iio_root_ports_list *tmp_list = calloc(1, sizeof(*tmp_list)); + + if (!tmp_list) + goto err; + + filter_copy = strdup(filter); + if (!filter_copy) + goto err; + + for (tok = strtok_r(filter_copy, ",", &tmp); tok; + tok = strtok_r(NULL, ",", &tmp)) { + if (!iio_root_port_parse_str(&domain, &bus, tok)) { + rp = iio_root_port_find_by_notation(*list, domain, bus); + if (rp) { + (*list)->rps[rp->idx] = NULL; + ret = iio_root_ports_list_insert(tmp_list, rp); + if (ret) { + free(rp); + goto err; + } + } else if (!iio_root_port_find_by_notation(tmp_list, + domain, bus)) + pr_warning("Root port %04x:%02x were not found\n", + domain, bus); + } + } + + if (tmp_list->nr_entries == 0) { + pr_err("Requested root ports were not found\n"); + ret = -EINVAL; + } +err: + iio_root_ports_list_free(*list); + if (ret) + iio_root_ports_list_free(tmp_list); + else + *list = tmp_list; + + free(filter_copy); + return ret; +} + +static int iostat_event_group(struct evlist *evl, + struct iio_root_ports_list *list) +{ + int ret; + int idx; + const char *iostat_cmd_template = + "{uncore_iio_%x/event=0x83,umask=0x04,ch_mask=0xF,fc_mask=0x07/,\ + uncore_iio_%x/event=0x83,umask=0x01,ch_mask=0xF,fc_mask=0x07/,\ + uncore_iio_%x/event=0xc0,umask=0x04,ch_mask=0xF,fc_mask=0x07/,\ + uncore_iio_%x/event=0xc0,umask=0x01,ch_mask=0xF,fc_mask=0x07/}"; + const int len_template = strlen(iostat_cmd_template) + 1; + struct evsel *evsel = NULL; + int metrics_count = iostat_metrics_count(); + char *iostat_cmd = calloc(len_template, 1); + + if (!iostat_cmd) + return -ENOMEM; + + for (idx = 0; idx < list->nr_entries; idx++) { + sprintf(iostat_cmd, iostat_cmd_template, + list->rps[idx]->pmu_idx, list->rps[idx]->pmu_idx, + list->rps[idx]->pmu_idx, list->rps[idx]->pmu_idx); + ret = parse_events(evl, iostat_cmd, NULL); + if (ret) + goto err; + } + + evlist__for_each_entry(evl, evsel) { + evsel->priv = list->rps[evsel->idx / metrics_count]; + } + list->nr_entries = 0; +err: + iio_root_ports_list_free(list); + free(iostat_cmd); + return ret; +} + +int iostat_prepare(struct evlist *evlist, struct perf_stat_config *config) +{ + if (evlist->core.nr_entries > 0) { + pr_warning("The -e and -M options are not supported." + "All chosen events/metrics will be dropped\n"); + evlist__delete(evlist); + evlist = evlist__new(); + if (!evlist) + return -ENOMEM; + } + + config->metric_only = true; + config->aggr_mode = AGGR_GLOBAL; + + return iostat_event_group(evlist, root_ports); +} + +int iostat_parse(const struct option *opt, const char *str, + int unset __maybe_unused) +{ + int ret; + struct perf_stat_config *config = (struct perf_stat_config *)opt->data; + + ret = iio_root_ports_scan(&root_ports); + if (!ret) { + config->iostat_run = true; + if (!str) + iostat_mode = IOSTAT_RUN; + else if (!strcmp(str, "list")) + iostat_mode = IOSTAT_LIST; + else { + iostat_mode = IOSTAT_RUN; + ret = iio_root_ports_list_filter(&root_ports, str); + } + } + return ret; +} + +void iostat_list(struct evlist *evlist, struct perf_stat_config *config) +{ + struct evsel *evsel; + struct iio_root_port *rp = NULL; + + evlist__for_each_entry(evlist, evsel) { + if (rp != evsel->priv) { + rp = evsel->priv; + iio_root_port_show(config->output, rp); + } + } +} + +void iostat_release(struct evlist *evlist) +{ + struct evsel *evsel; + struct iio_root_port *rp = NULL; + + evlist__for_each_entry(evlist, evsel) { + if (rp != evsel->priv) { + rp = evsel->priv; + free(evsel->priv); + } + } +} + +void iostat_prefix(struct evlist *evlist, + struct perf_stat_config *config, + char *prefix, struct timespec *ts) +{ + struct iio_root_port *rp = evlist->selected->priv; + + if (rp) { + if (ts) + sprintf(prefix, "%6lu.%09lu%s%04x:%02x%s", + ts->tv_sec, ts->tv_nsec, + config->csv_sep, rp->domain, rp->bus, + config->csv_sep); + else + sprintf(prefix, "%04x:%02x%s", rp->domain, rp->bus, + config->csv_sep); + } +} + +void iostat_print_header_prefix(struct perf_stat_config *config) +{ + if (config->csv_output) + fputs("port,", config->output); + else if (config->interval) + fprintf(config->output, "# time port "); + else + fprintf(config->output, " port "); +} + +void iostat_print_metric(struct perf_stat_config *config, struct evsel *evsel, + struct perf_stat_output_ctx *out) +{ + double iostat_value = 0; + u64 prev_count_val = 0; + const char *iostat_metric = iostat_metric_by_idx(evsel->idx); + u8 die = ((struct iio_root_port *)evsel->priv)->die; + struct perf_counts_values *count = perf_counts(evsel->counts, die, 0); + + if (count->run && count->ena) { + if (evsel->prev_raw_counts && !out->force_header) { + struct perf_counts_values *prev_count = + perf_counts(evsel->prev_raw_counts, die, 0); + + prev_count_val = prev_count->val; + prev_count->val = count->val; + } + iostat_value = (count->val - prev_count_val) / + ((double) count->run / count->ena); + } + out->print_metric(config, out->ctx, NULL, "%8.0f", iostat_metric, + iostat_value / (256 * 1024)); +} + +void iostat_print_counters(struct evlist *evlist, + struct perf_stat_config *config, struct timespec *ts, + char *prefix, iostat_print_counter_t print_cnt_cb) +{ + void *perf_device = NULL; + struct evsel *counter = evlist__first(evlist); + + evlist__set_selected(evlist, counter); + iostat_prefix(evlist, config, prefix, ts); + fprintf(config->output, "%s", prefix); + evlist__for_each_entry(evlist, counter) { + perf_device = evlist->selected->priv; + if (perf_device && perf_device != counter->priv) { + evlist__set_selected(evlist, counter); + iostat_prefix(evlist, config, prefix, ts); + fprintf(config->output, "\n%s", prefix); + } + print_cnt_cb(config, counter, prefix); + } + fputc('\n', config->output); +} diff --git a/tools/perf/command-list.txt b/tools/perf/command-list.txt index 825a12e8d694..4aa034aefa33 100644 --- a/tools/perf/command-list.txt +++ b/tools/perf/command-list.txt @@ -14,6 +14,7 @@ perf-config mainporcelain common perf-evlist mainporcelain common perf-ftrace mainporcelain common perf-inject mainporcelain common +perf-iostat mainporcelain common perf-kallsyms mainporcelain common perf-kmem mainporcelain common perf-kvm mainporcelain common diff --git a/tools/perf/perf-iostat.sh b/tools/perf/perf-iostat.sh new file mode 100644 index 000000000000..e562f252d56f --- /dev/null +++ b/tools/perf/perf-iostat.sh @@ -0,0 +1,12 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# perf iostat +# Alexander Antonov + +if [[ "$1" == "list" ]] || [[ "$1" =~ ([a-f0-9A-F]{1,}):([a-f0-9A-F]{1,2})(,)? ]]; then + DELIMITER="=" +else + DELIMITER=" " +fi + +perf stat --iostat$DELIMITER$* -- cgit v1.2.3 From 537f1e38f31a2d2b9941f16d6e2a9ab24cdab086 Mon Sep 17 00:00:00 2001 From: Alexander Antonov Date: Mon, 19 Apr 2021 12:41:47 +0300 Subject: perf: Update .gitignore file After a "make -C tools/perf", git reports the following untracked file: perf-iostat Add this generated file to perf's .gitignore file. Signed-off-by: Alexander Antonov Acked-by: Namhyung Kim Cc: Alexander Shishkin Cc: Alexey V Bayduraev Cc: Andi Kleen Cc: Ian Rogers Cc: Ingo Molnar Cc: Jiri Olsa Cc: Mark Rutland Cc: Peter Zijlstra Link: https://lore.kernel.org/r/20210419094147.15909-5-alexander.antonov@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/.gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/perf/.gitignore b/tools/perf/.gitignore index f3f84781fd74..e555e9729758 100644 --- a/tools/perf/.gitignore +++ b/tools/perf/.gitignore @@ -20,6 +20,7 @@ perf.data.old output.svg perf-archive perf-with-kcore +perf-iostat tags TAGS cscope* -- cgit v1.2.3 From f89a82a82b20261e5778132f5237971991bad8e6 Mon Sep 17 00:00:00 2001 From: Martin Liška Date: Sun, 21 Feb 2021 13:46:36 +0100 Subject: perf annotate: Add line number like in TUI and source location at EOL The patch changes the output format in 2 ways: - line number is displayed for all source lines (matching TUI mode) - source locations for the hottest lines are printed at the line end in order to preserve layout Before: 0.00 : 405ef1: inc %r15 : tmpsd * (TD + tmpsd * TDD))); 0.01 : 405ef4: vfmadd213sd 0x2b9b3(%rip),%xmm0,%xmm3 # 4318b0 <_IO_stdin_used+0x8b0> : tmpsd * (TC + eff.c:1811 0.67 : 405efd: vfmadd213sd 0x2b9b2(%rip),%xmm0,%xmm3 # 4318b8 <_IO_stdin_used+0x8b8> : TA + tmpsd * (TB + 0.35 : 405f06: vfmadd213sd 0x2b9b1(%rip),%xmm0,%xmm3 # 4318c0 <_IO_stdin_used+0x8c0> : dumbo = eff.c:1809 1.41 : 405f0f: vfmadd213sd 0x2b9b0(%rip),%xmm0,%xmm3 # 4318c8 <_IO_stdin_used+0x8c8> : sumi -= sj * tmpsd * dij2i * dumbo; eff.c:1813 2.58 : 405f18: vmulsd %xmm3,%xmm0,%xmm0 2.81 : 405f1c: vfnmadd213sd 0x30(%rsp),%xmm1,%xmm0 3.78 : 405f23: vmovsd %xmm0,0x30(%rsp) : for (k = 0; k < lpears[i] + upears[i]; k++) { eff.c:1761 0.90 : 405f29: cmp %r15d,%r12d After: 0.00 : 405ef1: inc %r15 : 1812 tmpsd * (TD + tmpsd * TDD))); 0.01 : 405ef4: vfmadd213sd 0x2b9b3(%rip),%xmm0,%xmm3 # 4318b0 <_IO_stdin_used+0x8b0> : 1811 tmpsd * (TC + 0.67 : 405efd: vfmadd213sd 0x2b9b2(%rip),%xmm0,%xmm3 # 4318b8 <_IO_stdin_used+0x8b8> // eff.c:1811 : 1810 TA + tmpsd * (TB + 0.35 : 405f06: vfmadd213sd 0x2b9b1(%rip),%xmm0,%xmm3 # 4318c0 <_IO_stdin_used+0x8c0> : 1809 dumbo = 1.41 : 405f0f: vfmadd213sd 0x2b9b0(%rip),%xmm0,%xmm3 # 4318c8 <_IO_stdin_used+0x8c8> // eff.c:1809 : 1813 sumi -= sj * tmpsd * dij2i * dumbo; 2.58 : 405f18: vmulsd %xmm3,%xmm0,%xmm0 // eff.c:1813 2.81 : 405f1c: vfnmadd213sd 0x30(%rsp),%xmm1,%xmm0 3.78 : 405f23: vmovsd %xmm0,0x30(%rsp) : 1761 for (k = 0; k < lpears[i] + upears[i]; k++) { Where e.g. '// eff.c:1811' shares the same color as the percentantage at the line beginning. Signed-off-by: Martin Liška Link: http://lore.kernel.org/lkml/a0d53f31-f633-5013-c386-a4452391b081@suse.cz Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/annotate.c | 30 ++++++++++++++---------------- 1 file changed, 14 insertions(+), 16 deletions(-) diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index 18eee25b4976..abe1499a9164 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -1368,7 +1368,6 @@ annotation_line__print(struct annotation_line *al, struct symbol *sym, u64 start { struct disasm_line *dl = container_of(al, struct disasm_line, al); static const char *prev_line; - static const char *prev_color; if (al->offset != -1) { double max_percent = 0.0; @@ -1407,20 +1406,6 @@ annotation_line__print(struct annotation_line *al, struct symbol *sym, u64 start color = get_percent_color(max_percent); - /* - * Also color the filename and line if needed, with - * the same color than the percentage. Don't print it - * twice for close colored addr with the same filename:line - */ - if (al->path) { - if (!prev_line || strcmp(prev_line, al->path) - || color != prev_color) { - color_fprintf(stdout, color, " %s", al->path); - prev_line = al->path; - prev_color = color; - } - } - for (i = 0; i < nr_percent; i++) { struct annotation_data *data = &al->data[i]; double percent; @@ -1441,6 +1426,19 @@ annotation_line__print(struct annotation_line *al, struct symbol *sym, u64 start printf(" : "); disasm_line__print(dl, start, addr_fmt_width); + + /* + * Also color the filename and line if needed, with + * the same color than the percentage. Don't print it + * twice for close colored addr with the same filename:line + */ + if (al->path) { + if (!prev_line || strcmp(prev_line, al->path)) { + color_fprintf(stdout, color, " // %s", al->path); + prev_line = al->path; + } + } + printf("\n"); } else if (max_lines && printed >= max_lines) return 1; @@ -1456,7 +1454,7 @@ annotation_line__print(struct annotation_line *al, struct symbol *sym, u64 start if (!*al->line) printf(" %*s:\n", width, " "); else - printf(" %*s: %*s %s\n", width, " ", addr_fmt_width, " ", al->line); + printf(" %*s: %-*d %s\n", width, " ", addr_fmt_width, al->line_nr, al->line); } return 0; -- cgit v1.2.3 From b96da02bd6b8d7d81b345c94d3d76d8733f5ef60 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Fri, 16 Apr 2021 14:41:13 -0700 Subject: perf arm64: Fix off-by-one directory paths. Relative path include works in the regular build due to -I paths but may break in other situations. Signed-off-by: Ian Rogers Cc: Alexander Shishkin Cc: Jiri Olsa Cc: John Garry Cc: Leo Yan Cc: Mark Rutland Cc: Mathieu Poirier Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Sergey Senozhatsky Cc: Stephane Eranian Cc: Will Deacon Cc: linux-arm-kernel@lists.infradead.org Link: http://lore.kernel.org/lkml/20210416214113.552252-1-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/arm64/util/kvm-stat.c | 4 ++-- tools/perf/arch/arm64/util/pmu.c | 4 ++-- tools/perf/arch/arm64/util/unwind-libunwind.c | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/tools/perf/arch/arm64/util/kvm-stat.c b/tools/perf/arch/arm64/util/kvm-stat.c index 50376b9062c1..2303256b7d05 100644 --- a/tools/perf/arch/arm64/util/kvm-stat.c +++ b/tools/perf/arch/arm64/util/kvm-stat.c @@ -1,8 +1,8 @@ // SPDX-License-Identifier: GPL-2.0 #include #include -#include "../../util/evsel.h" -#include "../../util/kvm-stat.h" +#include "../../../util/evsel.h" +#include "../../../util/kvm-stat.h" #include "arm64_exception_types.h" #include "debug.h" diff --git a/tools/perf/arch/arm64/util/pmu.c b/tools/perf/arch/arm64/util/pmu.c index d3259d61ca75..2234fbd0a912 100644 --- a/tools/perf/arch/arm64/util/pmu.c +++ b/tools/perf/arch/arm64/util/pmu.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 -#include "../../util/cpumap.h" -#include "../../util/pmu.h" +#include "../../../util/cpumap.h" +#include "../../../util/pmu.h" struct pmu_events_map *pmu_events_map__find(void) { diff --git a/tools/perf/arch/arm64/util/unwind-libunwind.c b/tools/perf/arch/arm64/util/unwind-libunwind.c index 1495a9523a23..5aecf88e3de6 100644 --- a/tools/perf/arch/arm64/util/unwind-libunwind.c +++ b/tools/perf/arch/arm64/util/unwind-libunwind.c @@ -4,9 +4,9 @@ #ifndef REMOTE_UNWIND_LIBUNWIND #include #include "perf_regs.h" -#include "../../util/unwind.h" +#include "../../../util/unwind.h" #endif -#include "../../util/debug.h" +#include "../../../util/debug.h" int LIBUNWIND__ARCH_REG_ID(int regnum) { -- cgit v1.2.3 From 59a1a843b028c88b2ed33a459ff2767c737d8d69 Mon Sep 17 00:00:00 2001 From: Zhen Lei Date: Thu, 15 Apr 2021 16:34:16 +0800 Subject: perf data: Fix error return code in perf_data__create_dir() Although 'ret' has been initialized to -1, but it will be reassigned by the "ret = open(...)" statement in the for loop. So that, the value of 'ret' is unknown when asprintf() failed. Reported-by: Hulk Robot Signed-off-by: Zhen Lei Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/20210415083417.3740-1-thunder.leizhen@huawei.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/data.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tools/perf/util/data.c b/tools/perf/util/data.c index f29af4fc3d09..8fca4779ae6a 100644 --- a/tools/perf/util/data.c +++ b/tools/perf/util/data.c @@ -35,7 +35,7 @@ void perf_data__close_dir(struct perf_data *data) int perf_data__create_dir(struct perf_data *data, int nr) { struct perf_data_file *files = NULL; - int i, ret = -1; + int i, ret; if (WARN_ON(!data->is_dir)) return -EINVAL; @@ -51,7 +51,8 @@ int perf_data__create_dir(struct perf_data *data, int nr) for (i = 0; i < nr; i++) { struct perf_data_file *file = &files[i]; - if (asprintf(&file->path, "%s/data.%d", data->path, i) < 0) + ret = asprintf(&file->path, "%s/data.%d", data->path, i); + if (ret < 0) goto out_err; ret = open(file->path, O_RDWR|O_CREAT|O_TRUNC, S_IRUSR|S_IWUSR); -- cgit v1.2.3 From bb7db8699b6cd877c766ce69f3b44ab0830d85a4 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 15 Apr 2021 21:15:34 -0300 Subject: perf tools: Add a build-test variant to use in builds from a tarball To use in automated tests inside containers from a tarball generated by 'make perf-tar-src-pkg*', where testing building from a tarball is obviously not needed, so add a 'build-test-tarball' for that case. And don't build with gtk2 as this complicates things for cross builds where we don't always have all the libraries a full perf build requires available for the target arch, ditto for static builds. Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Makefile | 5 ++++- tools/perf/tests/make | 22 ++++++++++++++++++++-- 2 files changed, 24 insertions(+), 3 deletions(-) diff --git a/tools/perf/Makefile b/tools/perf/Makefile index b8fc7d972be9..f3fe360a35c6 100644 --- a/tools/perf/Makefile +++ b/tools/perf/Makefile @@ -100,7 +100,10 @@ clean: # make -C tools/perf -f tests/make # build-test: - @$(MAKE) SHUF=1 -f tests/make REUSE_FEATURES_DUMP=1 MK=Makefile SET_PARALLEL=1 --no-print-directory tarpkg out + @$(MAKE) SHUF=1 -f tests/make REUSE_FEATURES_DUMP=1 MK=Makefile SET_PARALLEL=1 --no-print-directory tarpkg make_static make_with_gtk2 out + +build-test-tarball: + @$(MAKE) -f tests/make REUSE_FEATURES_DUMP=1 MK=Makefile SET_PARALLEL=1 --no-print-directory out # # All other targets get passed through: diff --git a/tools/perf/tests/make b/tools/perf/tests/make index a90fa043c066..94bd5d215d94 100644 --- a/tools/perf/tests/make +++ b/tools/perf/tests/make @@ -155,7 +155,6 @@ run += make_no_syscall_tbl run += make_with_babeltrace run += make_with_clangllvm run += make_with_libpfm4 -run += make_with_gtk2 run += make_help run += make_doc run += make_perf_o @@ -172,7 +171,6 @@ run += make_install_prefix_slash # run += make_install_info # run += make_install_pdf run += make_minimal -run += make_static ifneq ($(call has,ctags),) run += make_tags @@ -307,6 +305,26 @@ $(run): $(call test,$@) && \ rm -rf $@ $$TMP_DEST || (cat $@ ; false) +make_with_gtk2: + $(call clean) + @TMP_DEST=$$(mktemp -d); \ + cmd="cd $(PERF) && $(MAKE_F) $($@) $(PARALLEL_OPT) $(O_OPT) DESTDIR=$$TMP_DEST"; \ + printf "%*.*s: %s\n" $(max_width) $(max_width) "$@" "$$cmd" && echo $$cmd > $@ && \ + ( eval $$cmd ) >> $@ 2>&1; \ + echo " test: $(call test,$@)" >> $@ 2>&1; \ + $(call test,$@) && \ + rm -rf $@ $$TMP_DEST || (cat $@ ; false) + +make_static: + $(call clean) + @TMP_DEST=$$(mktemp -d); \ + cmd="cd $(PERF) && $(MAKE_F) $($@) $(PARALLEL_OPT) $(O_OPT) DESTDIR=$$TMP_DEST"; \ + printf "%*.*s: %s\n" $(max_width) $(max_width) "$@" "$$cmd" && echo $$cmd > $@ && \ + ( eval $$cmd ) >> $@ 2>&1; \ + echo " test: $(call test,$@)" >> $@ 2>&1; \ + $(call test,$@) && \ + rm -rf $@ $$TMP_DEST || (cat $@ ; false) + $(run_O): $(call clean) @TMP_O=$$(mktemp -d); \ -- cgit v1.2.3 From a4b0fccfbdb4a2004b97cae3872088570495e274 Mon Sep 17 00:00:00 2001 From: Ray Kinsella Date: Wed, 21 Apr 2021 10:10:09 +0100 Subject: perf tools: Update topdown documentation to permit rdpmc calls Update Topdown documentation to permit calls to rdpmc, and describe interaction with system calls. Signed-off-by: Ray Kinsella Reviewed-by: Andi Kleen Cc: Jiri Olsa Cc: Kan Liang Link: http://lore.kernel.org/lkml/20210421091009.1711565-1-mdr@ashroe.eu Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/topdown.txt | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/tools/perf/Documentation/topdown.txt b/tools/perf/Documentation/topdown.txt index 10f07f9455b8..c6302df4cf29 100644 --- a/tools/perf/Documentation/topdown.txt +++ b/tools/perf/Documentation/topdown.txt @@ -72,6 +72,7 @@ For example, the perf_event_attr structure can be initialized with The Fixed counter 3 must be the leader of the group. #include +#include #include #include @@ -95,6 +96,11 @@ int slots_fd = perf_event_open(&slots, 0, -1, -1, 0); if (slots_fd < 0) ... error ... +/* Memory mapping the fd permits _rdpmc calls from userspace */ +void *slots_p = mmap(0, getpagesize(), PROT_READ, MAP_SHARED, slots_fd, 0); +if (!slot_p) + .... error ... + /* * Open metrics event file descriptor for current task. * Set slots event as the leader of the group. @@ -110,6 +116,14 @@ int metrics_fd = perf_event_open(&metrics, 0, -1, slots_fd, 0); if (metrics_fd < 0) ... error ... +/* Memory mapping the fd permits _rdpmc calls from userspace */ +void *metrics_p = mmap(0, getpagesize(), PROT_READ, MAP_SHARED, metrics_fd, 0); +if (!metrics_p) + ... error ... + +Note: the file descriptors returned by the perf_event_open calls must be memory +mapped to permit calls to the _rdpmd instruction. Permission may also be granted +by writing the /sys/devices/cpu/rdpmc sysfs node. The RDPMC instruction (or _rdpmc compiler intrinsic) can now be used to read slots and the topdown metrics at different points of the program: @@ -141,6 +155,10 @@ as the parallelism and overlap in the CPU program execution will cause too much measurement inaccuracy. For example instrumenting individual basic blocks is definitely too fine grained. +_rdpmc calls should not be mixed with reading the metrics and slots counters +through system calls, as the kernel will reset these counters after each system +call. + Decoding metrics values ======================= -- cgit v1.2.3 From 464c62f6f6e1c836d7aae68dbf46101de84fdcb7 Mon Sep 17 00:00:00 2001 From: Jin Yao Date: Mon, 29 Mar 2021 15:09:03 +0800 Subject: perf vendor events intel: Add missing skylake & icelake model numbers Kernel has supported COMETLAKE/COMETLAKE_L to use the SKYLAKE events and supported TIGERLAKE_L/TIGERLAKE/ROCKETLAKE to use the ICELAKE events. But pmu-events mapfile.csv is missing these model numbers. Now add the missing model numbers to mapfile.csv. Signed-off-by: Jin Yao Reviewed-by: Andi Kleen Cc: Alexander Shishkin Cc: Jin Yao Cc: Jiri Olsa Cc: Kan Liang Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/20210329070903.8894-1-yao.jin@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/pmu-events/arch/x86/mapfile.csv | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tools/perf/pmu-events/arch/x86/mapfile.csv b/tools/perf/pmu-events/arch/x86/mapfile.csv index 9f97b32533a0..0a6a8c7f937f 100644 --- a/tools/perf/pmu-events/arch/x86/mapfile.csv +++ b/tools/perf/pmu-events/arch/x86/mapfile.csv @@ -24,6 +24,7 @@ GenuineIntel-6-1F,v2,nehalemep,core GenuineIntel-6-1A,v2,nehalemep,core GenuineIntel-6-2E,v2,nehalemex,core GenuineIntel-6-[4589]E,v24,skylake,core +GenuineIntel-6-A[56],v24,skylake,core GenuineIntel-6-37,v13,silvermont,core GenuineIntel-6-4D,v13,silvermont,core GenuineIntel-6-4C,v13,silvermont,core @@ -35,6 +36,8 @@ GenuineIntel-6-55-[01234],v1,skylakex,core GenuineIntel-6-55-[56789ABCDEF],v1,cascadelakex,core GenuineIntel-6-7D,v1,icelake,core GenuineIntel-6-7E,v1,icelake,core +GenuineIntel-6-8[CD],v1,icelake,core +GenuineIntel-6-A7,v1,icelake,core GenuineIntel-6-86,v1,tremontx,core AuthenticAMD-23-([12][0-9A-F]|[0-9A-F]),v2,amdzen1,core AuthenticAMD-23-[[:xdigit:]]+,v1,amdzen2,core -- cgit v1.2.3 From ec8149fba64b719a618b432ce9eea7ce937a523c Mon Sep 17 00:00:00 2001 From: Song Liu Date: Sun, 25 Apr 2021 14:43:29 -0700 Subject: perf util: Move bpf_perf definitions to a libperf header By following the same protocol, other tools can share hardware PMCs with perf. Move perf_event_attr_map_entry and BPF_PERF_DEFAULT_ATTR_MAP_PATH to bpf_perf.h for other tools to use. Signed-off-by: Song Liu Cc: Jiri Olsa Cc: Namhyung Kim Cc: Song Liu Cc: kernel-team@fb.com Link: https://lore.kernel.org/r/20210425214333.1090950-2-song@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/lib/perf/include/perf/bpf_perf.h | 31 +++++++++++++++++++++++++++++++ tools/perf/util/bpf_counter.c | 27 +++------------------------ 2 files changed, 34 insertions(+), 24 deletions(-) create mode 100644 tools/lib/perf/include/perf/bpf_perf.h diff --git a/tools/lib/perf/include/perf/bpf_perf.h b/tools/lib/perf/include/perf/bpf_perf.h new file mode 100644 index 000000000000..e7cf6ba7b674 --- /dev/null +++ b/tools/lib/perf/include/perf/bpf_perf.h @@ -0,0 +1,31 @@ +/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */ +#ifndef __LIBPERF_BPF_PERF_H +#define __LIBPERF_BPF_PERF_H + +#include /* for __u32 */ + +/* + * bpf_perf uses a hashmap, the attr_map, to track all the leader programs. + * The hashmap is pinned in bpffs. flock() on this file is used to ensure + * no concurrent access to the attr_map. The key of attr_map is struct + * perf_event_attr, and the value is struct perf_event_attr_map_entry. + * + * struct perf_event_attr_map_entry contains two __u32 IDs, bpf_link of the + * leader prog, and the diff_map. Each perf-stat session holds a reference + * to the bpf_link to make sure the leader prog is attached to sched_switch + * tracepoint. + * + * Since the hashmap only contains IDs of the bpf_link and diff_map, it + * does not hold any references to the leader program. Once all perf-stat + * sessions of these events exit, the leader prog, its maps, and the + * perf_events will be freed. + */ +struct perf_event_attr_map_entry { + __u32 link_id; + __u32 diff_map_id; +}; + +/* default attr_map name */ +#define BPF_PERF_DEFAULT_ATTR_MAP_PATH "perf_attr_map" + +#endif /* __LIBPERF_BPF_PERF_H */ diff --git a/tools/perf/util/bpf_counter.c b/tools/perf/util/bpf_counter.c index 81d1df3c4ec0..be484ddbbd5b 100644 --- a/tools/perf/util/bpf_counter.c +++ b/tools/perf/util/bpf_counter.c @@ -14,6 +14,7 @@ #include #include #include +#include #include "bpf_counter.h" #include "counts.h" @@ -29,28 +30,6 @@ #include "bpf_skel/bperf_leader.skel.h" #include "bpf_skel/bperf_follower.skel.h" -/* - * bperf uses a hashmap, the attr_map, to track all the leader programs. - * The hashmap is pinned in bpffs. flock() on this file is used to ensure - * no concurrent access to the attr_map. The key of attr_map is struct - * perf_event_attr, and the value is struct perf_event_attr_map_entry. - * - * struct perf_event_attr_map_entry contains two __u32 IDs, bpf_link of the - * leader prog, and the diff_map. Each perf-stat session holds a reference - * to the bpf_link to make sure the leader prog is attached to sched_switch - * tracepoint. - * - * Since the hashmap only contains IDs of the bpf_link and diff_map, it - * does not hold any references to the leader program. Once all perf-stat - * sessions of these events exit, the leader prog, its maps, and the - * perf_events will be freed. - */ -struct perf_event_attr_map_entry { - __u32 link_id; - __u32 diff_map_id; -}; - -#define DEFAULT_ATTR_MAP_PATH "fs/bpf/perf_attr_map" #define ATTR_MAP_SIZE 16 static inline void *u64_to_ptr(__u64 ptr) @@ -341,8 +320,8 @@ static int bperf_lock_attr_map(struct target *target) if (target->attr_map) { scnprintf(path, PATH_MAX, "%s", target->attr_map); } else { - scnprintf(path, PATH_MAX, "%s/%s", sysfs__mountpoint(), - DEFAULT_ATTR_MAP_PATH); + scnprintf(path, PATH_MAX, "%s/fs/bpf/%s", sysfs__mountpoint(), + BPF_PERF_DEFAULT_ATTR_MAP_PATH); } if (access(path, F_OK)) { -- cgit v1.2.3 From fe3dd8263b9f3912a0f3a2f66c0fdb3987d69a1a Mon Sep 17 00:00:00 2001 From: Song Liu Date: Sun, 25 Apr 2021 14:43:30 -0700 Subject: perf bpf: check perf_attr_map is compatible with the perf binary perf_attr_map could be shared among different version of perf binary. Add bperf_attr_map_compatible() to check whether the existing attr_map is compatible with current perf binary. Signed-off-by: Song Liu Cc: Jiri Olsa Cc: Namhyung Kim Cc: Song Liu Cc: kernel-team@fb.com Link: https://lore.kernel.org/r/20210425214333.1090950-3-song@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/bpf_counter.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/tools/perf/util/bpf_counter.c b/tools/perf/util/bpf_counter.c index be484ddbbd5b..5de991ab46af 100644 --- a/tools/perf/util/bpf_counter.c +++ b/tools/perf/util/bpf_counter.c @@ -312,6 +312,20 @@ static __u32 bpf_map_get_id(int fd) return map_info.id; } +static bool bperf_attr_map_compatible(int attr_map_fd) +{ + struct bpf_map_info map_info = {0}; + __u32 map_info_len = sizeof(map_info); + int err; + + err = bpf_obj_get_info_by_fd(attr_map_fd, &map_info, &map_info_len); + + if (err) + return false; + return (map_info.key_size == sizeof(struct perf_event_attr)) && + (map_info.value_size == sizeof(struct perf_event_attr_map_entry)); +} + static int bperf_lock_attr_map(struct target *target) { char path[PATH_MAX]; @@ -346,6 +360,11 @@ static int bperf_lock_attr_map(struct target *target) return -1; } + if (!bperf_attr_map_compatible(map_fd)) { + close(map_fd); + return -1; + + } err = flock(map_fd, LOCK_EX); if (err) { close(map_fd); -- cgit v1.2.3 From 112cb56164bc2108a55aee785d841a35aab0616a Mon Sep 17 00:00:00 2001 From: Song Liu Date: Sun, 25 Apr 2021 14:43:31 -0700 Subject: perf stat: Introduce config stat.bpf-counter-events Currently, to use BPF to aggregate perf event counters, the user uses --bpf-counters option. Enable "use bpf by default" events with a config option, stat.bpf-counter-events. Events with name in the option will use BPF. This also enables mixed BPF event and regular event in the same sesssion. For example: perf config stat.bpf-counter-events=instructions perf stat -e instructions,cs The second command will use BPF for "instructions" but not "cs". Signed-off-by: Song Liu Cc: Jiri Olsa Cc: Namhyung Kim Cc: Song Liu Link: https://lore.kernel.org/r/20210425214333.1090950-4-song@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-stat.txt | 2 ++ tools/perf/builtin-stat.c | 42 ++++++++++++++++++++-------------- tools/perf/util/bpf_counter.c | 3 ++- tools/perf/util/config.c | 4 ++++ tools/perf/util/evsel.c | 22 ++++++++++++++++++ tools/perf/util/evsel.h | 8 +++++++ tools/perf/util/target.h | 5 ---- 7 files changed, 63 insertions(+), 23 deletions(-) diff --git a/tools/perf/Documentation/perf-stat.txt b/tools/perf/Documentation/perf-stat.txt index 6ec5960b08c3..f10e24da23e9 100644 --- a/tools/perf/Documentation/perf-stat.txt +++ b/tools/perf/Documentation/perf-stat.txt @@ -97,6 +97,8 @@ report:: Use BPF programs to aggregate readings from perf_events. This allows multiple perf-stat sessions that are counting the same metric (cycles, instructions, etc.) to share hardware counters. + To use BPF programs on common events by default, use + "perf config stat.bpf-counter-events=". --bpf-attr-map:: With option "--bpf-counters", different perf-stat sessions share diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index ba5b31aab86b..5ec2190e45cd 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -161,6 +161,7 @@ static const char *smi_cost_attrs = { }; static struct evlist *evsel_list; +static bool all_counters_use_bpf = true; static struct target target = { .uid = UINT_MAX, @@ -401,6 +402,9 @@ static int read_affinity_counters(struct timespec *rs) struct affinity affinity; int i, ncpus, cpu; + if (all_counters_use_bpf) + return 0; + if (affinity__setup(&affinity) < 0) return -1; @@ -415,6 +419,8 @@ static int read_affinity_counters(struct timespec *rs) evlist__for_each_entry(evsel_list, counter) { if (evsel__cpu_iter_skip(counter, cpu)) continue; + if (evsel__is_bpf(counter)) + continue; if (!counter->err) { counter->err = read_counter_cpu(counter, rs, counter->cpu_iter - 1); @@ -431,6 +437,9 @@ static int read_bpf_map_counters(void) int err; evlist__for_each_entry(evsel_list, counter) { + if (!evsel__is_bpf(counter)) + continue; + err = bpf_counter__read(counter); if (err) return err; @@ -441,14 +450,10 @@ static int read_bpf_map_counters(void) static void read_counters(struct timespec *rs) { struct evsel *counter; - int err; if (!stat_config.stop_read_counter) { - if (target__has_bpf(&target)) - err = read_bpf_map_counters(); - else - err = read_affinity_counters(rs); - if (err < 0) + if (read_bpf_map_counters() || + read_affinity_counters(rs)) return; } @@ -537,12 +542,13 @@ static int enable_counters(void) struct evsel *evsel; int err; - if (target__has_bpf(&target)) { - evlist__for_each_entry(evsel_list, evsel) { - err = bpf_counter__enable(evsel); - if (err) - return err; - } + evlist__for_each_entry(evsel_list, evsel) { + if (!evsel__is_bpf(evsel)) + continue; + + err = bpf_counter__enable(evsel); + if (err) + return err; } if (stat_config.initial_delay < 0) { @@ -786,11 +792,11 @@ static int __run_perf_stat(int argc, const char **argv, int run_idx) if (affinity__setup(&affinity) < 0) return -1; - if (target__has_bpf(&target)) { - evlist__for_each_entry(evsel_list, counter) { - if (bpf_counter__load(counter, &target)) - return -1; - } + evlist__for_each_entry(evsel_list, counter) { + if (bpf_counter__load(counter, &target)) + return -1; + if (!evsel__is_bpf(counter)) + all_counters_use_bpf = false; } evlist__for_each_cpu (evsel_list, i, cpu) { @@ -807,6 +813,8 @@ static int __run_perf_stat(int argc, const char **argv, int run_idx) continue; if (counter->reset_group || counter->errored) continue; + if (evsel__is_bpf(counter)) + continue; try_again: if (create_perf_stat_counter(counter, &stat_config, &target, counter->cpu_iter - 1) < 0) { diff --git a/tools/perf/util/bpf_counter.c b/tools/perf/util/bpf_counter.c index 5de991ab46af..33b1888103df 100644 --- a/tools/perf/util/bpf_counter.c +++ b/tools/perf/util/bpf_counter.c @@ -790,7 +790,8 @@ int bpf_counter__load(struct evsel *evsel, struct target *target) { if (target->bpf_str) evsel->bpf_counter_ops = &bpf_program_profiler_ops; - else if (target->use_bpf) + else if (target->use_bpf || + evsel__match_bpf_counter_events(evsel->name)) evsel->bpf_counter_ops = &bperf_ops; if (evsel->bpf_counter_ops) diff --git a/tools/perf/util/config.c b/tools/perf/util/config.c index 6bcb5ef221f8..63d472b336de 100644 --- a/tools/perf/util/config.c +++ b/tools/perf/util/config.c @@ -18,6 +18,7 @@ #include "util/hist.h" /* perf_hist_config */ #include "util/llvm-utils.h" /* perf_llvm_config */ #include "util/stat.h" /* perf_stat__set_big_num */ +#include "util/evsel.h" /* evsel__hw_names, evsel__use_bpf_counters */ #include "build-id.h" #include "debug.h" #include "config.h" @@ -460,6 +461,9 @@ static int perf_stat_config(const char *var, const char *value) if (!strcmp(var, "stat.no-csv-summary")) perf_stat__set_no_csv_summary(perf_config_bool(var, value)); + if (!strcmp(var, "stat.bpf-counter-events")) + evsel__bpf_counter_events = strdup(value); + /* Add other config variables here. */ return 0; } diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 2d2614eeaa20..080ddcfefbcd 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -492,6 +492,28 @@ const char *evsel__hw_names[PERF_COUNT_HW_MAX] = { "ref-cycles", }; +char *evsel__bpf_counter_events; + +bool evsel__match_bpf_counter_events(const char *name) +{ + int name_len; + bool match; + char *ptr; + + if (!evsel__bpf_counter_events) + return false; + + ptr = strstr(evsel__bpf_counter_events, name); + name_len = strlen(name); + + /* check name matches a full token in evsel__bpf_counter_events */ + match = (ptr != NULL) && + ((ptr == evsel__bpf_counter_events) || (*(ptr - 1) == ',')) && + ((*(ptr + name_len) == ',') || (*(ptr + name_len) == '\0')); + + return match; +} + static const char *__evsel__hw_name(u64 config) { if (config < PERF_COUNT_HW_MAX && evsel__hw_names[config]) diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index eccc4fd5b3eb..ce4b629d659c 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -239,6 +239,11 @@ void evsel__calc_id_pos(struct evsel *evsel); bool evsel__is_cache_op_valid(u8 type, u8 op); +static inline bool evsel__is_bpf(struct evsel *evsel) +{ + return evsel->bpf_counter_ops != NULL; +} + #define EVSEL__MAX_ALIASES 8 extern const char *evsel__hw_cache[PERF_COUNT_HW_CACHE_MAX][EVSEL__MAX_ALIASES]; @@ -246,6 +251,9 @@ extern const char *evsel__hw_cache_op[PERF_COUNT_HW_CACHE_OP_MAX][EVSEL__MAX_ALI extern const char *evsel__hw_cache_result[PERF_COUNT_HW_CACHE_RESULT_MAX][EVSEL__MAX_ALIASES]; extern const char *evsel__hw_names[PERF_COUNT_HW_MAX]; extern const char *evsel__sw_names[PERF_COUNT_SW_MAX]; +extern char *evsel__bpf_counter_events; +bool evsel__match_bpf_counter_events(const char *name); + int __evsel__hw_cache_type_op_res_name(u8 type, u8 op, u8 result, char *bf, size_t size); const char *evsel__name(struct evsel *evsel); diff --git a/tools/perf/util/target.h b/tools/perf/util/target.h index 1bce3eb28ef2..4ff56217f2a6 100644 --- a/tools/perf/util/target.h +++ b/tools/perf/util/target.h @@ -66,11 +66,6 @@ static inline bool target__has_cpu(struct target *target) return target->system_wide || target->cpu_list; } -static inline bool target__has_bpf(struct target *target) -{ - return target->bpf_str || target->use_bpf; -} - static inline bool target__none(struct target *target) { return !target__has_task(target) && !target__has_cpu(target); -- cgit v1.2.3 From 01bd8efcec444468db0275bbd71b49927f7e1544 Mon Sep 17 00:00:00 2001 From: Song Liu Date: Sun, 25 Apr 2021 14:43:32 -0700 Subject: perf stat: Introduce ':b' modifier Introduce 'b' modifier to event parser, which means use BPF program to manage this event. This is the same as --bpf-counters option, but only applies to this event. For example, perf stat -e cycles:b,cs # use bpf for cycles, but not cs perf stat -e cycles,cs --bpf-counters # use bpf for both cycles and cs Suggested-by: Jiri Olsa Signed-off-by: Song Liu Cc: Namhyung Kim Cc: Song Liu Link: https://lore.kernel.org/r/20210425214333.1090950-5-song@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/bpf_counter.c | 2 +- tools/perf/util/evsel.h | 1 + tools/perf/util/parse-events.c | 8 +++++++- tools/perf/util/parse-events.l | 2 +- 4 files changed, 10 insertions(+), 3 deletions(-) diff --git a/tools/perf/util/bpf_counter.c b/tools/perf/util/bpf_counter.c index 33b1888103df..f179f5743025 100644 --- a/tools/perf/util/bpf_counter.c +++ b/tools/perf/util/bpf_counter.c @@ -790,7 +790,7 @@ int bpf_counter__load(struct evsel *evsel, struct target *target) { if (target->bpf_str) evsel->bpf_counter_ops = &bpf_program_profiler_ops; - else if (target->use_bpf || + else if (target->use_bpf || evsel->bpf_counter || evsel__match_bpf_counter_events(evsel->name)) evsel->bpf_counter_ops = &bperf_ops; diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index ce4b629d659c..8f66cdcb338d 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -82,6 +82,7 @@ struct evsel { bool auto_merge_stats; bool collect_stat; bool weak_group; + bool bpf_counter; int bpf_fd; struct bpf_object *bpf_obj; }; diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 8123d218ad17..46ebd269a98d 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -1804,6 +1804,7 @@ struct event_modifier { int pinned; int weak; int exclusive; + int bpf_counter; }; static int get_event_modifier(struct event_modifier *mod, char *str, @@ -1824,6 +1825,7 @@ static int get_event_modifier(struct event_modifier *mod, char *str, int exclude = eu | ek | eh; int exclude_GH = evsel ? evsel->exclude_GH : 0; int weak = 0; + int bpf_counter = 0; memset(mod, 0, sizeof(*mod)); @@ -1867,6 +1869,8 @@ static int get_event_modifier(struct event_modifier *mod, char *str, exclusive = 1; } else if (*str == 'W') { weak = 1; + } else if (*str == 'b') { + bpf_counter = 1; } else break; @@ -1898,6 +1902,7 @@ static int get_event_modifier(struct event_modifier *mod, char *str, mod->sample_read = sample_read; mod->pinned = pinned; mod->weak = weak; + mod->bpf_counter = bpf_counter; mod->exclusive = exclusive; return 0; @@ -1912,7 +1917,7 @@ static int check_modifier(char *str) char *p = str; /* The sizeof includes 0 byte as well. */ - if (strlen(str) > (sizeof("ukhGHpppPSDIWe") - 1)) + if (strlen(str) > (sizeof("ukhGHpppPSDIWeb") - 1)) return -1; while (*p) { @@ -1953,6 +1958,7 @@ int parse_events__modifier_event(struct list_head *list, char *str, bool add) evsel->sample_read = mod.sample_read; evsel->precise_max = mod.precise_max; evsel->weak_group = mod.weak; + evsel->bpf_counter = mod.bpf_counter; if (evsel__is_group_leader(evsel)) { evsel->core.attr.pinned = mod.pinned; diff --git a/tools/perf/util/parse-events.l b/tools/perf/util/parse-events.l index 0b36285a9435..fb8646cc3e83 100644 --- a/tools/perf/util/parse-events.l +++ b/tools/perf/util/parse-events.l @@ -210,7 +210,7 @@ name_tag [\'][a-zA-Z_*?\[\]][a-zA-Z0-9_*?\-,\.\[\]:=]*[\'] name_minus [a-zA-Z_*?][a-zA-Z0-9\-_*?.:]* drv_cfg_term [a-zA-Z0-9_\.]+(=[a-zA-Z0-9_*?\.:]+)? /* If you add a modifier you need to update check_modifier() */ -modifier_event [ukhpPGHSDIWe]+ +modifier_event [ukhpPGHSDIWeb]+ modifier_bp [rwx]{1,3} %% -- cgit v1.2.3 From 5508c9dae2a4a111acc7472900164f556ae75346 Mon Sep 17 00:00:00 2001 From: Song Liu Date: Sun, 25 Apr 2021 14:43:33 -0700 Subject: perf stat: Introduce bpf_counter_ops->disable() Introduce bpf_counter_ops->disable(), which is used stop counting the event. Committer notes: Added a dummy bpf_counter__disable() to the python binding to avoid having 'perf test python' failing. bpf_counter isn't supported in the python binding. Signed-off-by: Song Liu Cc: Jiri Olsa Cc: Namhyung Kim Cc: Song Liu Cc: kernel-team@fb.com Link: https://lore.kernel.org/r/20210425214333.1090950-6-song@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/bpf_counter.c | 26 ++++++++++++++++++++++++++ tools/perf/util/bpf_counter.h | 7 +++++++ tools/perf/util/evlist.c | 4 ++++ tools/perf/util/python.c | 6 ++++++ 4 files changed, 43 insertions(+) diff --git a/tools/perf/util/bpf_counter.c b/tools/perf/util/bpf_counter.c index f179f5743025..ddb52f748c8e 100644 --- a/tools/perf/util/bpf_counter.c +++ b/tools/perf/util/bpf_counter.c @@ -215,6 +215,17 @@ static int bpf_program_profiler__enable(struct evsel *evsel) return 0; } +static int bpf_program_profiler__disable(struct evsel *evsel) +{ + struct bpf_counter *counter; + + list_for_each_entry(counter, &evsel->bpf_counter_list, list) { + assert(counter->skel != NULL); + bpf_prog_profiler_bpf__detach(counter->skel); + } + return 0; +} + static int bpf_program_profiler__read(struct evsel *evsel) { // perf_cpu_map uses /sys/devices/system/cpu/online @@ -280,6 +291,7 @@ static int bpf_program_profiler__install_pe(struct evsel *evsel, int cpu, struct bpf_counter_ops bpf_program_profiler_ops = { .load = bpf_program_profiler__load, .enable = bpf_program_profiler__enable, + .disable = bpf_program_profiler__disable, .read = bpf_program_profiler__read, .destroy = bpf_program_profiler__destroy, .install_pe = bpf_program_profiler__install_pe, @@ -627,6 +639,12 @@ static int bperf__enable(struct evsel *evsel) return 0; } +static int bperf__disable(struct evsel *evsel) +{ + evsel->follower_skel->bss->enabled = 0; + return 0; +} + static int bperf__read(struct evsel *evsel) { struct bperf_follower_bpf *skel = evsel->follower_skel; @@ -768,6 +786,7 @@ static int bperf__destroy(struct evsel *evsel) struct bpf_counter_ops bperf_ops = { .load = bperf__load, .enable = bperf__enable, + .disable = bperf__disable, .read = bperf__read, .install_pe = bperf__install_pe, .destroy = bperf__destroy, @@ -806,6 +825,13 @@ int bpf_counter__enable(struct evsel *evsel) return evsel->bpf_counter_ops->enable(evsel); } +int bpf_counter__disable(struct evsel *evsel) +{ + if (bpf_counter_skip(evsel)) + return 0; + return evsel->bpf_counter_ops->disable(evsel); +} + int bpf_counter__read(struct evsel *evsel) { if (bpf_counter_skip(evsel)) diff --git a/tools/perf/util/bpf_counter.h b/tools/perf/util/bpf_counter.h index cb9c532e0a07..d6d907c3dcf9 100644 --- a/tools/perf/util/bpf_counter.h +++ b/tools/perf/util/bpf_counter.h @@ -18,6 +18,7 @@ typedef int (*bpf_counter_evsel_install_pe_op)(struct evsel *evsel, struct bpf_counter_ops { bpf_counter_evsel_target_op load; bpf_counter_evsel_op enable; + bpf_counter_evsel_op disable; bpf_counter_evsel_op read; bpf_counter_evsel_op destroy; bpf_counter_evsel_install_pe_op install_pe; @@ -32,6 +33,7 @@ struct bpf_counter { int bpf_counter__load(struct evsel *evsel, struct target *target); int bpf_counter__enable(struct evsel *evsel); +int bpf_counter__disable(struct evsel *evsel); int bpf_counter__read(struct evsel *evsel); void bpf_counter__destroy(struct evsel *evsel); int bpf_counter__install_pe(struct evsel *evsel, int cpu, int fd); @@ -51,6 +53,11 @@ static inline int bpf_counter__enable(struct evsel *evsel __maybe_unused) return 0; } +static inline int bpf_counter__disable(struct evsel *evsel __maybe_unused) +{ + return 0; +} + static inline int bpf_counter__read(struct evsel *evsel __maybe_unused) { return -EAGAIN; diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index d29a8a118973..e71041c89010 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -17,6 +17,7 @@ #include "evsel.h" #include "debug.h" #include "units.h" +#include "bpf_counter.h" #include // page_size #include "affinity.h" #include "../perf.h" @@ -421,6 +422,9 @@ static void __evlist__disable(struct evlist *evlist, char *evsel_name) if (affinity__setup(&affinity) < 0) return; + evlist__for_each_entry(evlist, pos) + bpf_counter__disable(pos); + /* Disable 'immediate' events last */ for (imm = 0; imm <= 1; imm++) { evlist__for_each_cpu(evlist, i, cpu) { diff --git a/tools/perf/util/python.c b/tools/perf/util/python.c index 278abecb5bdf..412f8e79e409 100644 --- a/tools/perf/util/python.c +++ b/tools/perf/util/python.c @@ -90,6 +90,7 @@ int metricgroup__copy_metric_events(struct evlist *evlist, struct cgroup *cgrp, */ void bpf_counter__destroy(struct evsel *evsel); int bpf_counter__install_pe(struct evsel *evsel, int cpu, int fd); +int bpf_counter__disable(struct evsel *evsel); void bpf_counter__destroy(struct evsel *evsel __maybe_unused) { @@ -100,6 +101,11 @@ int bpf_counter__install_pe(struct evsel *evsel __maybe_unused, int cpu __maybe_ return 0; } +int bpf_counter__disable(struct evsel *evsel __maybe_unused) +{ + return 0; +} + /* * Support debug printing even though util/debug.c is not linked. That means * implementing 'verbose' and 'eprintf'. -- cgit v1.2.3 From d0713d4ca3e94827de77f8758e3e8045a0d85215 Mon Sep 17 00:00:00 2001 From: Nicholas Fraser Date: Mon, 26 Apr 2021 10:47:16 -0400 Subject: perf data: Add JSON export This adds a feature to export perf data to JSON. The resolved symbols are exported into the JSON so that external tools don't need to load the dsos themselves (or even have access to them at all.) This makes it easy to load and analyze perf data with standalone tools where direct perf or libbabeltrace integration is impractical. The exporter uses a minimal inline JSON encoding without any external dependencies. Currently it only outputs some headers and sample metadata but it's easily extensible. Use it like this: $ perf data convert --to-json out.json Committer notes: Fixup a __printf() bug that broke the build: util/data-convert-json.c:103:11: error: expected ‘)’ before numeric constant 103 | __(printf, 5, 6) | ^~ | ) util/data-convert-json.c: In function ‘output_sample_callchain_entry’: util/data-convert-json.c:124:2: error: implicit declaration of function ‘output_json_key_format’; did you mean ‘output_json_format’? [-Werror=implicit-function-declaration] 124 | output_json_key_format(out, false, 5, "ip", "\"0x%" PRIx64 "\"", ip); | ^~~~~~~~~~~~~~~~~~~~~~ | output_json_format Also had to add this patch to fix errors reported by various versions of clang: - if (al && al->sym && al->sym->name && strlen(al->sym->name) > 0) { + if (al && al->sym && al->sym->namelen) { al->sym->name is a zero sized array, to avoid one extra alloc in the symbol__new() constructor, sym->namelen carries its strlen. Committer testing: $ ls -la out.json ls: cannot access 'out.json': No such file or directory $ perf record sleep 0.1 [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.001 MB perf.data (8 samples) ] $ perf report --stats | grep -w SAMPLE SAMPLE events: 8 $ perf data convert --to-json out.json [ perf data convert: Converted 'perf.data' into JSON data 'out.json' ] [ perf data convert: Converted and wrote 0.002 MB (8 samples) ] $ ls -la out.json -rw-rw-r--. 1 acme acme 2017 Apr 26 17:29 out.json $ cat out.json { "linux-perf-json-version": 1, "headers": { "header-version": 1, "captured-on": "2021-04-26T20:28:57Z", "data-offset": 432, "data-size": 1016, "feat-offset": 1448, "hostname": "five", "os-release": "5.11.14-200.fc33.x86_64", "arch": "x86_64", "cpu-desc": "AMD Ryzen 9 3900X 12-Core Processor", "cpuid": "AuthenticAMD,23,113,0", "nrcpus-online": 24, "nrcpus-avail": 24, "perf-version": "5.12.gee134f3189bd", "cmdline": [ "/home/acme/bin/perf", "record", "sleep", "0.1" ] }, "samples": [ { "timestamp": 170517539043684, "pid": 375844, "tid": 375844, "comm": "sleep", "callchain": [ { "ip": "0xffffffffa6268827" } ] }, { "timestamp": 170517539048443, "pid": 375844, "tid": 375844, "comm": "sleep", "callchain": [ { "ip": "0xffffffffa661359d" } ] }, { "timestamp": 170517539051018, "pid": 375844, "tid": 375844, "comm": "sleep", "callchain": [ { "ip": "0xffffffffa6311e18" } ] }, { "timestamp": 170517539053652, "pid": 375844, "tid": 375844, "comm": "sleep", "callchain": [ { "ip": "0x7fdb77b4812b", "symbol": "_dl_start", "dso": "ld-2.32.so" } ] }, { "timestamp": 170517539055306, "pid": 375844, "tid": 375844, "comm": "sleep", "callchain": [ { "ip": "0xffffffffa6269286" } ] }, { "timestamp": 170517539057590, "pid": 375844, "tid": 375844, "comm": "sleep", "callchain": [ { "ip": "0xffffffffa62abd8b" } ] }, { "timestamp": 170517539067559, "pid": 375844, "tid": 375844, "comm": "sleep", "callchain": [ { "ip": "0x7fdb77b5e9e9", "symbol": "__GI___tunables_init", "dso": "ld-2.32.so" } ] }, { "timestamp": 170517539282452, "pid": 375844, "tid": 375844, "comm": "sleep", "callchain": [ { "ip": "0x7fdb779978d2", "symbol": "getenv", "dso": "libc-2.32.so" } ] } ] } $ Signed-off-by: Nicholas Fraser Tested-by: Arnaldo Carvalho de Melo Cc: Alexander Shishkin Cc: Changbin Du Cc: Ian Rogers Cc: Jin Yao Cc: Jiri Olsa Cc: Kan Liang Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Tan Xiaojun Cc: Ulrich Czekalla Link: http://lore.kernel.org/lkml/3884969f-804d-2f53-c648-e2b0bd85edff@codeweavers.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-data.txt | 5 +- tools/perf/builtin-data.c | 26 ++- tools/perf/util/Build | 1 + tools/perf/util/data-convert-bt.c | 2 +- tools/perf/util/data-convert-bt.h | 11 - tools/perf/util/data-convert-json.c | 384 +++++++++++++++++++++++++++++++++ tools/perf/util/data-convert.h | 10 + 7 files changed, 418 insertions(+), 21 deletions(-) delete mode 100644 tools/perf/util/data-convert-bt.h create mode 100644 tools/perf/util/data-convert-json.c diff --git a/tools/perf/Documentation/perf-data.txt b/tools/perf/Documentation/perf-data.txt index 726b9bc9e1a7..417bf17e265c 100644 --- a/tools/perf/Documentation/perf-data.txt +++ b/tools/perf/Documentation/perf-data.txt @@ -17,7 +17,7 @@ Data file related processing. COMMANDS -------- convert:: - Converts perf data file into another format (only CTF [1] format is support by now). + Converts perf data file into another format. It's possible to set data-convert debug variable to get debug messages from conversion, like: perf --debug data-convert data convert ... @@ -27,6 +27,9 @@ OPTIONS for 'convert' --to-ctf:: Triggers the CTF conversion, specify the path of CTF data directory. +--to-json:: + Triggers JSON conversion. Specify the JSON filename to output. + --tod:: Convert time to wall clock time. diff --git a/tools/perf/builtin-data.c b/tools/perf/builtin-data.c index 8d23b8d6ee8e..15ca23675ef0 100644 --- a/tools/perf/builtin-data.c +++ b/tools/perf/builtin-data.c @@ -7,7 +7,6 @@ #include "debug.h" #include #include "data-convert.h" -#include "data-convert-bt.h" typedef int (*data_cmd_fn_t)(int argc, const char **argv); @@ -55,7 +54,8 @@ static const char * const data_convert_usage[] = { static int cmd_data_convert(int argc, const char **argv) { - const char *to_ctf = NULL; + const char *to_json = NULL; + const char *to_ctf = NULL; struct perf_data_convert_opts opts = { .force = false, .all = false, @@ -63,6 +63,7 @@ static int cmd_data_convert(int argc, const char **argv) const struct option options[] = { OPT_INCR('v', "verbose", &verbose, "be more verbose"), OPT_STRING('i', "input", &input_name, "file", "input file name"), + OPT_STRING(0, "to-json", &to_json, NULL, "Convert to JSON format"), #ifdef HAVE_LIBBABELTRACE_SUPPORT OPT_STRING(0, "to-ctf", &to_ctf, NULL, "Convert to CTF format"), OPT_BOOLEAN(0, "tod", &opts.tod, "Convert time to wall clock time"), @@ -72,11 +73,6 @@ static int cmd_data_convert(int argc, const char **argv) OPT_END() }; -#ifndef HAVE_LIBBABELTRACE_SUPPORT - pr_err("No conversion support compiled in. perf should be compiled with environment variables LIBBABELTRACE=1 and LIBBABELTRACE_DIR=/path/to/libbabeltrace/\n"); - return -1; -#endif - argc = parse_options(argc, argv, options, data_convert_usage, 0); if (argc) { @@ -84,11 +80,25 @@ static int cmd_data_convert(int argc, const char **argv) return -1; } + if (to_json && to_ctf) { + pr_err("You cannot specify both --to-ctf and --to-json.\n"); + return -1; + } + if (!to_json && !to_ctf) { + pr_err("You must specify one of --to-ctf or --to-json.\n"); + return -1; + } + + if (to_json) + return bt_convert__perf2json(input_name, to_json, &opts); + if (to_ctf) { #ifdef HAVE_LIBBABELTRACE_SUPPORT return bt_convert__perf2ctf(input_name, to_ctf, &opts); #else - pr_err("The libbabeltrace support is not compiled in.\n"); + pr_err("The libbabeltrace support is not compiled in. perf should be " + "compiled with environment variables LIBBABELTRACE=1 and " + "LIBBABELTRACE_DIR=/path/to/libbabeltrace/\n"); return -1; #endif } diff --git a/tools/perf/util/Build b/tools/perf/util/Build index 7dd815712d60..827d216a780e 100644 --- a/tools/perf/util/Build +++ b/tools/perf/util/Build @@ -165,6 +165,7 @@ perf-$(CONFIG_LIBUNWIND_X86) += libunwind/x86_32.o perf-$(CONFIG_LIBUNWIND_AARCH64) += libunwind/arm64.o perf-$(CONFIG_LIBBABELTRACE) += data-convert-bt.o +perf-y += data-convert-json.o perf-y += scripting-engines/ diff --git a/tools/perf/util/data-convert-bt.c b/tools/perf/util/data-convert-bt.c index a9c375e63e73..cace349fb700 100644 --- a/tools/perf/util/data-convert-bt.c +++ b/tools/perf/util/data-convert-bt.c @@ -21,7 +21,7 @@ #include #include #include "asm/bug.h" -#include "data-convert-bt.h" +#include "data-convert.h" #include "session.h" #include "debug.h" #include "tool.h" diff --git a/tools/perf/util/data-convert-bt.h b/tools/perf/util/data-convert-bt.h deleted file mode 100644 index 821674d63c4e..000000000000 --- a/tools/perf/util/data-convert-bt.h +++ /dev/null @@ -1,11 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef __DATA_CONVERT_BT_H -#define __DATA_CONVERT_BT_H -#include "data-convert.h" -#ifdef HAVE_LIBBABELTRACE_SUPPORT - -int bt_convert__perf2ctf(const char *input_name, const char *to_ctf, - struct perf_data_convert_opts *opts); - -#endif /* HAVE_LIBBABELTRACE_SUPPORT */ -#endif /* __DATA_CONVERT_BT_H */ diff --git a/tools/perf/util/data-convert-json.c b/tools/perf/util/data-convert-json.c new file mode 100644 index 000000000000..355cd1948bdf --- /dev/null +++ b/tools/perf/util/data-convert-json.c @@ -0,0 +1,384 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * JSON export. + * + * Copyright (C) 2021, CodeWeavers Inc. + */ + +#include "data-convert.h" + +#include +#include +#include +#include + +#include "linux/compiler.h" +#include "linux/err.h" +#include "util/auxtrace.h" +#include "util/debug.h" +#include "util/dso.h" +#include "util/event.h" +#include "util/evsel.h" +#include "util/evlist.h" +#include "util/header.h" +#include "util/map.h" +#include "util/session.h" +#include "util/symbol.h" +#include "util/thread.h" +#include "util/tool.h" + +struct convert_json { + struct perf_tool tool; + FILE *out; + bool first; + u64 events_count; +}; + +// Outputs a JSON-encoded string surrounded by quotes with characters escaped. +static void output_json_string(FILE *out, const char *s) +{ + fputc('"', out); + while (*s) { + switch (*s) { + + // required escapes with special forms as per RFC 8259 + case '"': fputs("\\\"", out); break; + case '\\': fputs("\\\\", out); break; + case '\b': fputs("\\b", out); break; + case '\f': fputs("\\f", out); break; + case '\n': fputs("\\n", out); break; + case '\r': fputs("\\r", out); break; + case '\t': fputs("\\t", out); break; + + default: + // all other control characters must be escaped by hex code + if (*s <= 0x1f) + fprintf(out, "\\u%04x", *s); + else + fputc(*s, out); + break; + } + + ++s; + } + fputc('"', out); +} + +// Outputs an optional comma, newline and indentation to delimit a new value +// from the previous one in a JSON object or array. +static void output_json_delimiters(FILE *out, bool comma, int depth) +{ + int i; + + if (comma) + fputc(',', out); + fputc('\n', out); + for (i = 0; i < depth; ++i) + fputc('\t', out); +} + +// Outputs a printf format string (with delimiter) as a JSON value. +__printf(4, 5) +static void output_json_format(FILE *out, bool comma, int depth, const char *format, ...) +{ + va_list args; + + output_json_delimiters(out, comma, depth); + va_start(args, format); + vfprintf(out, format, args); + va_end(args); +} + +// Outputs a JSON key-value pair where the value is a string. +static void output_json_key_string(FILE *out, bool comma, int depth, + const char *key, const char *value) +{ + output_json_delimiters(out, comma, depth); + output_json_string(out, key); + fputs(": ", out); + output_json_string(out, value); +} + +// Outputs a JSON key-value pair where the value is a printf format string. +__printf(5, 6) +static void output_json_key_format(FILE *out, bool comma, int depth, + const char *key, const char *format, ...) +{ + va_list args; + + output_json_delimiters(out, comma, depth); + output_json_string(out, key); + fputs(": ", out); + va_start(args, format); + vfprintf(out, format, args); + va_end(args); +} + +static void output_sample_callchain_entry(struct perf_tool *tool, + u64 ip, struct addr_location *al) +{ + struct convert_json *c = container_of(tool, struct convert_json, tool); + FILE *out = c->out; + + output_json_format(out, false, 4, "{"); + output_json_key_format(out, false, 5, "ip", "\"0x%" PRIx64 "\"", ip); + + if (al && al->sym && al->sym->namelen) { + fputc(',', out); + output_json_key_string(out, false, 5, "symbol", al->sym->name); + + if (al->map && al->map->dso) { + const char *dso = al->map->dso->short_name; + + if (dso && strlen(dso) > 0) { + fputc(',', out); + output_json_key_string(out, false, 5, "dso", dso); + } + } + } + + output_json_format(out, false, 4, "}"); +} + +static int process_sample_event(struct perf_tool *tool, + union perf_event *event __maybe_unused, + struct perf_sample *sample, + struct evsel *evsel __maybe_unused, + struct machine *machine) +{ + struct convert_json *c = container_of(tool, struct convert_json, tool); + FILE *out = c->out; + struct addr_location al, tal; + u8 cpumode = PERF_RECORD_MISC_USER; + + if (machine__resolve(machine, &al, sample) < 0) { + pr_err("Sample resolution failed!\n"); + return -1; + } + + ++c->events_count; + + if (c->first) + c->first = false; + else + fputc(',', out); + output_json_format(out, false, 2, "{"); + + output_json_key_format(out, false, 3, "timestamp", "%" PRIi64, sample->time); + output_json_key_format(out, true, 3, "pid", "%i", al.thread->pid_); + output_json_key_format(out, true, 3, "tid", "%i", al.thread->tid); + + if (al.thread->cpu >= 0) + output_json_key_format(out, true, 3, "cpu", "%i", al.thread->cpu); + + output_json_key_string(out, true, 3, "comm", thread__comm_str(al.thread)); + + output_json_key_format(out, true, 3, "callchain", "["); + if (sample->callchain) { + unsigned int i; + bool ok; + bool first_callchain = true; + + for (i = 0; i < sample->callchain->nr; ++i) { + u64 ip = sample->callchain->ips[i]; + + if (ip >= PERF_CONTEXT_MAX) { + switch (ip) { + case PERF_CONTEXT_HV: + cpumode = PERF_RECORD_MISC_HYPERVISOR; + break; + case PERF_CONTEXT_KERNEL: + cpumode = PERF_RECORD_MISC_KERNEL; + break; + case PERF_CONTEXT_USER: + cpumode = PERF_RECORD_MISC_USER; + break; + default: + pr_debug("invalid callchain context: %" + PRId64 "\n", (s64) ip); + break; + } + continue; + } + + if (first_callchain) + first_callchain = false; + else + fputc(',', out); + + ok = thread__find_symbol(al.thread, cpumode, ip, &tal); + output_sample_callchain_entry(tool, ip, ok ? &tal : NULL); + } + } else { + output_sample_callchain_entry(tool, sample->ip, &al); + } + output_json_format(out, false, 3, "]"); + + output_json_format(out, false, 2, "}"); + return 0; +} + +static void output_headers(struct perf_session *session, struct convert_json *c) +{ + struct stat st; + struct perf_header *header = &session->header; + int ret; + int fd = perf_data__fd(session->data); + int i; + FILE *out = c->out; + + output_json_key_format(out, false, 2, "header-version", "%u", header->version); + + ret = fstat(fd, &st); + if (ret >= 0) { + time_t stctime = st.st_mtime; + char buf[256]; + + strftime(buf, sizeof(buf), "%FT%TZ", gmtime(&stctime)); + output_json_key_string(out, true, 2, "captured-on", buf); + } else { + pr_debug("Failed to get mtime of source file, not writing captured-on"); + } + + output_json_key_format(out, true, 2, "data-offset", "%" PRIu64, header->data_offset); + output_json_key_format(out, true, 2, "data-size", "%" PRIu64, header->data_size); + output_json_key_format(out, true, 2, "feat-offset", "%" PRIu64, header->feat_offset); + + output_json_key_string(out, true, 2, "hostname", header->env.hostname); + output_json_key_string(out, true, 2, "os-release", header->env.os_release); + output_json_key_string(out, true, 2, "arch", header->env.arch); + + output_json_key_string(out, true, 2, "cpu-desc", header->env.cpu_desc); + output_json_key_string(out, true, 2, "cpuid", header->env.cpuid); + output_json_key_format(out, true, 2, "nrcpus-online", "%u", header->env.nr_cpus_online); + output_json_key_format(out, true, 2, "nrcpus-avail", "%u", header->env.nr_cpus_avail); + + if (header->env.clock.enabled) { + output_json_key_format(out, true, 2, "clockid", + "%u", header->env.clock.clockid); + output_json_key_format(out, true, 2, "clock-time", + "%" PRIu64, header->env.clock.clockid_ns); + output_json_key_format(out, true, 2, "real-time", + "%" PRIu64, header->env.clock.tod_ns); + } + + output_json_key_string(out, true, 2, "perf-version", header->env.version); + + output_json_key_format(out, true, 2, "cmdline", "["); + for (i = 0; i < header->env.nr_cmdline; i++) { + output_json_delimiters(out, i != 0, 3); + output_json_string(c->out, header->env.cmdline_argv[i]); + } + output_json_format(out, false, 2, "]"); +} + +int bt_convert__perf2json(const char *input_name, const char *output_name, + struct perf_data_convert_opts *opts __maybe_unused) +{ + struct perf_session *session; + int fd; + int ret = -1; + + struct convert_json c = { + .tool = { + .sample = process_sample_event, + .mmap = perf_event__process_mmap, + .mmap2 = perf_event__process_mmap2, + .comm = perf_event__process_comm, + .namespaces = perf_event__process_namespaces, + .cgroup = perf_event__process_cgroup, + .exit = perf_event__process_exit, + .fork = perf_event__process_fork, + .lost = perf_event__process_lost, + .tracing_data = perf_event__process_tracing_data, + .build_id = perf_event__process_build_id, + .id_index = perf_event__process_id_index, + .auxtrace_info = perf_event__process_auxtrace_info, + .auxtrace = perf_event__process_auxtrace, + .event_update = perf_event__process_event_update, + .ordered_events = true, + .ordering_requires_timestamps = true, + }, + .first = true, + .events_count = 0, + }; + + struct perf_data data = { + .mode = PERF_DATA_MODE_READ, + .path = input_name, + .force = opts->force, + }; + + if (opts->all) { + pr_err("--all is currently unsupported for JSON output.\n"); + goto err; + } + if (opts->tod) { + pr_err("--tod is currently unsupported for JSON output.\n"); + goto err; + } + + fd = open(output_name, O_CREAT | O_WRONLY | (opts->force ? O_TRUNC : O_EXCL), 0666); + if (fd == -1) { + if (errno == EEXIST) + pr_err("Output file exists. Use --force to overwrite it.\n"); + else + pr_err("Error opening output file!\n"); + goto err; + } + + c.out = fdopen(fd, "w"); + if (!c.out) { + fprintf(stderr, "Error opening output file!\n"); + close(fd); + goto err; + } + + session = perf_session__new(&data, false, &c.tool); + if (IS_ERR(session)) { + fprintf(stderr, "Error creating perf session!\n"); + goto err_fclose; + } + + if (symbol__init(&session->header.env) < 0) { + fprintf(stderr, "Symbol init error!\n"); + goto err_session_delete; + } + + // The opening brace is printed manually because it isn't delimited from a + // previous value (i.e. we don't want a leading newline) + fputc('{', c.out); + + // Version number for future-proofing. Most additions should be able to be + // done in a backwards-compatible way so this should only need to be bumped + // if some major breaking change must be made. + output_json_format(c.out, false, 1, "\"linux-perf-json-version\": 1"); + + // Output headers + output_json_format(c.out, true, 1, "\"headers\": {"); + output_headers(session, &c); + output_json_format(c.out, false, 1, "}"); + + // Output samples + output_json_format(c.out, true, 1, "\"samples\": ["); + perf_session__process_events(session); + output_json_format(c.out, false, 1, "]"); + output_json_format(c.out, false, 0, "}"); + fputc('\n', c.out); + + fprintf(stderr, + "[ perf data convert: Converted '%s' into JSON data '%s' ]\n", + data.path, output_name); + + fprintf(stderr, + "[ perf data convert: Converted and wrote %.3f MB (%" PRIu64 " samples) ]\n", + (ftell(c.out)) / 1024.0 / 1024.0, c.events_count); + + ret = 0; +err_session_delete: + perf_session__delete(session); +err_fclose: + fclose(c.out); +err: + return ret; +} diff --git a/tools/perf/util/data-convert.h b/tools/perf/util/data-convert.h index feab5f114e37..1b4c5f598415 100644 --- a/tools/perf/util/data-convert.h +++ b/tools/perf/util/data-convert.h @@ -2,10 +2,20 @@ #ifndef __DATA_CONVERT_H #define __DATA_CONVERT_H +#include + struct perf_data_convert_opts { bool force; bool all; bool tod; }; +#ifdef HAVE_LIBBABELTRACE_SUPPORT +int bt_convert__perf2ctf(const char *input_name, const char *to_ctf, + struct perf_data_convert_opts *opts); +#endif /* HAVE_LIBBABELTRACE_SUPPORT */ + +int bt_convert__perf2json(const char *input_name, const char *to_ctf, + struct perf_data_convert_opts *opts); + #endif /* __DATA_CONVERT_H */ -- cgit v1.2.3 From bf8f8587bfb6d1315771a252a1a3be20fda1d783 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 26 Apr 2021 18:37:12 -0700 Subject: perf top: Use evlist->events_stat to count events It's mainly to count lost events for the warning so it should be ok to use the evlist->stats instead. This is needed for changes in the next commit. Reviewed-by: Andi Kleen Signed-off-by: Namhyung Kim Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Ian Rogers Cc: Ingo Molnar Cc: Mark Rutland Cc: Peter Zijlstra Link: https://lore.kernel.org/r/20210427013717.1651674-2-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-top.c | 20 ++++++++------------ 1 file changed, 8 insertions(+), 12 deletions(-) diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index 173ace43f845..69cb3635f5ef 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -328,13 +328,13 @@ static void perf_top__print_sym_table(struct perf_top *top) printf("%-*.*s\n", win_width, win_width, graph_dotted_line); if (!top->record_opts.overwrite && - (hists->stats.nr_lost_warned != - hists->stats.nr_events[PERF_RECORD_LOST])) { - hists->stats.nr_lost_warned = - hists->stats.nr_events[PERF_RECORD_LOST]; + (top->evlist->stats.nr_lost_warned != + top->evlist->stats.nr_events[PERF_RECORD_LOST])) { + top->evlist->stats.nr_lost_warned = + top->evlist->stats.nr_events[PERF_RECORD_LOST]; color_fprintf(stdout, PERF_COLOR_RED, "WARNING: LOST %d chunks, Check IO/CPU overload", - hists->stats.nr_lost_warned); + top->evlist->stats.nr_lost_warned); ++printed; } @@ -852,11 +852,9 @@ static void perf_top__process_lost(struct perf_top *top, union perf_event *event, struct evsel *evsel) { - struct hists *hists = evsel__hists(evsel); - top->lost += event->lost.lost; top->lost_total += event->lost.lost; - hists->stats.total_lost += event->lost.lost; + evsel->evlist->stats.total_lost += event->lost.lost; } static void @@ -864,11 +862,9 @@ perf_top__process_lost_samples(struct perf_top *top, union perf_event *event, struct evsel *evsel) { - struct hists *hists = evsel__hists(evsel); - top->lost += event->lost_samples.lost; top->lost_total += event->lost_samples.lost; - hists->stats.total_lost_samples += event->lost_samples.lost; + evsel->evlist->stats.total_lost_samples += event->lost_samples.lost; } static u64 last_timestamp; @@ -1205,7 +1201,7 @@ static int deliver_event(struct ordered_events *qe, } else if (event->header.type == PERF_RECORD_LOST_SAMPLES) { perf_top__process_lost_samples(top, event, evsel); } else if (event->header.type < PERF_RECORD_MAX) { - hists__inc_nr_events(evsel__hists(evsel), event->header.type); + events_stats__inc(&session->evlist->stats, event->header.type); machine__process_event(machine, event, &sample); } else ++session->evlist->stats.nr_unknown_events; -- cgit v1.2.3 From 0f0abbace3cddc92aaed2db3783c9c501354b3be Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 26 Apr 2021 18:37:13 -0700 Subject: perf hists: Split hists_stats from events_stats Each struct hists have events_stats but most of the fields were not used. It's to count number of samples and periods whether filtered or not. And other fields are used only by evlist. So it'd be better to split hists_stats and events_stats to reduce wasted memory in the struct hists. This makes the output of event statistics in the perf report compact by skipping 0 events in each evsel/hists. Reviewed-by: Andi Kleen Signed-off-by: Namhyung Kim Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Ian Rogers Cc: Ingo Molnar Cc: Mark Rutland Cc: Peter Zijlstra Link: https://lore.kernel.org/r/20210427013717.1651674-3-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-annotate.c | 2 +- tools/perf/builtin-report.c | 4 ++-- tools/perf/tests/hists_filter.c | 14 +++++++------- tools/perf/ui/browsers/hists.c | 17 +++++++++-------- tools/perf/util/events_stats.h | 10 +++++++--- tools/perf/util/hist.c | 20 ++++++++++++++------ tools/perf/util/hist.h | 4 ++-- 7 files changed, 42 insertions(+), 29 deletions(-) diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c index 524e6f0dff22..717efd78eee6 100644 --- a/tools/perf/builtin-annotate.c +++ b/tools/perf/builtin-annotate.c @@ -418,7 +418,7 @@ static int __cmd_annotate(struct perf_annotate *ann) total_nr_samples = 0; evlist__for_each_entry(session->evlist, pos) { struct hists *hists = evsel__hists(pos); - u32 nr_samples = hists->stats.nr_events[PERF_RECORD_SAMPLE]; + u32 nr_samples = hists->stats.nr_samples; if (nr_samples > 0) { total_nr_samples += nr_samples; diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 0d65c98794a8..b0b9b60f74e5 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -436,7 +436,7 @@ static size_t hists__fprintf_nr_sample_events(struct hists *hists, struct report { size_t ret; char unit; - unsigned long nr_samples = hists->stats.nr_events[PERF_RECORD_SAMPLE]; + unsigned long nr_samples = hists->stats.nr_samples; u64 nr_events = hists->stats.total_period; struct evsel *evsel = hists_to_evsel(hists); char buf[512]; @@ -464,7 +464,7 @@ static size_t hists__fprintf_nr_sample_events(struct hists *hists, struct report nr_samples += pos_hists->stats.nr_non_filtered_samples; nr_events += pos_hists->stats.total_non_filtered_period; } else { - nr_samples += pos_hists->stats.nr_events[PERF_RECORD_SAMPLE]; + nr_samples += pos_hists->stats.nr_samples; nr_events += pos_hists->stats.total_period; } } diff --git a/tools/perf/tests/hists_filter.c b/tools/perf/tests/hists_filter.c index 123e07d35b55..ca6120cd1d90 100644 --- a/tools/perf/tests/hists_filter.c +++ b/tools/perf/tests/hists_filter.c @@ -150,13 +150,13 @@ int test__hists_filter(struct test *test __maybe_unused, int subtest __maybe_unu } TEST_ASSERT_VAL("Invalid nr samples", - hists->stats.nr_events[PERF_RECORD_SAMPLE] == 10); + hists->stats.nr_samples == 10); TEST_ASSERT_VAL("Invalid nr hist entries", hists->nr_entries == 9); TEST_ASSERT_VAL("Invalid total period", hists->stats.total_period == 1000); TEST_ASSERT_VAL("Unmatched nr samples", - hists->stats.nr_events[PERF_RECORD_SAMPLE] == + hists->stats.nr_samples == hists->stats.nr_non_filtered_samples); TEST_ASSERT_VAL("Unmatched nr hist entries", hists->nr_entries == hists->nr_non_filtered_entries); @@ -175,7 +175,7 @@ int test__hists_filter(struct test *test __maybe_unused, int subtest __maybe_unu /* normal stats should be invariant */ TEST_ASSERT_VAL("Invalid nr samples", - hists->stats.nr_events[PERF_RECORD_SAMPLE] == 10); + hists->stats.nr_samples == 10); TEST_ASSERT_VAL("Invalid nr hist entries", hists->nr_entries == 9); TEST_ASSERT_VAL("Invalid total period", @@ -204,7 +204,7 @@ int test__hists_filter(struct test *test __maybe_unused, int subtest __maybe_unu /* normal stats should be invariant */ TEST_ASSERT_VAL("Invalid nr samples", - hists->stats.nr_events[PERF_RECORD_SAMPLE] == 10); + hists->stats.nr_samples == 10); TEST_ASSERT_VAL("Invalid nr hist entries", hists->nr_entries == 9); TEST_ASSERT_VAL("Invalid total period", @@ -239,7 +239,7 @@ int test__hists_filter(struct test *test __maybe_unused, int subtest __maybe_unu /* normal stats should be invariant */ TEST_ASSERT_VAL("Invalid nr samples", - hists->stats.nr_events[PERF_RECORD_SAMPLE] == 10); + hists->stats.nr_samples == 10); TEST_ASSERT_VAL("Invalid nr hist entries", hists->nr_entries == 9); TEST_ASSERT_VAL("Invalid total period", @@ -268,7 +268,7 @@ int test__hists_filter(struct test *test __maybe_unused, int subtest __maybe_unu /* normal stats should be invariant */ TEST_ASSERT_VAL("Invalid nr samples", - hists->stats.nr_events[PERF_RECORD_SAMPLE] == 10); + hists->stats.nr_samples == 10); TEST_ASSERT_VAL("Invalid nr hist entries", hists->nr_entries == 9); TEST_ASSERT_VAL("Invalid total period", @@ -299,7 +299,7 @@ int test__hists_filter(struct test *test __maybe_unused, int subtest __maybe_unu /* normal stats should be invariant */ TEST_ASSERT_VAL("Invalid nr samples", - hists->stats.nr_events[PERF_RECORD_SAMPLE] == 10); + hists->stats.nr_samples == 10); TEST_ASSERT_VAL("Invalid nr hist entries", hists->nr_entries == 9); TEST_ASSERT_VAL("Invalid total period", diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c index bcfd0a45953b..b72ee6822222 100644 --- a/tools/perf/ui/browsers/hists.c +++ b/tools/perf/ui/browsers/hists.c @@ -682,6 +682,7 @@ static int hist_browser__handle_hotkey(struct hist_browser *browser, bool warn_l switch (key) { case K_TIMER: { struct hist_browser_timer *hbt = browser->hbt; + struct evsel *evsel = hists_to_evsel(browser->hists); u64 nr_entries; WARN_ON_ONCE(!hbt); @@ -696,10 +697,10 @@ static int hist_browser__handle_hotkey(struct hist_browser *browser, bool warn_l ui_browser__update_nr_entries(&browser->b, nr_entries); if (warn_lost_event && - (browser->hists->stats.nr_lost_warned != - browser->hists->stats.nr_events[PERF_RECORD_LOST])) { - browser->hists->stats.nr_lost_warned = - browser->hists->stats.nr_events[PERF_RECORD_LOST]; + (evsel->evlist->stats.nr_lost_warned != + evsel->evlist->stats.nr_events[PERF_RECORD_LOST])) { + evsel->evlist->stats.nr_lost_warned = + evsel->evlist->stats.nr_events[PERF_RECORD_LOST]; ui_browser__warn_lost_events(&browser->b); } @@ -3416,7 +3417,7 @@ static void perf_evsel_menu__write(struct ui_browser *browser, struct evsel *evsel = list_entry(entry, struct evsel, core.node); struct hists *hists = evsel__hists(evsel); bool current_entry = ui_browser__is_current_entry(browser, row); - unsigned long nr_events = hists->stats.nr_events[PERF_RECORD_SAMPLE]; + unsigned long nr_events = hists->stats.nr_samples; const char *ev_name = evsel__name(evsel); char bf[256], unit; const char *warn = " "; @@ -3432,7 +3433,7 @@ static void perf_evsel_menu__write(struct ui_browser *browser, for_each_group_member(pos, evsel) { struct hists *pos_hists = evsel__hists(pos); - nr_events += pos_hists->stats.nr_events[PERF_RECORD_SAMPLE]; + nr_events += pos_hists->stats.nr_samples; } } @@ -3441,7 +3442,7 @@ static void perf_evsel_menu__write(struct ui_browser *browser, unit, unit == ' ' ? "" : " ", ev_name); ui_browser__printf(browser, "%s", bf); - nr_events = hists->stats.nr_events[PERF_RECORD_LOST]; + nr_events = evsel->evlist->stats.nr_events[PERF_RECORD_LOST]; if (nr_events != 0) { menu->lost_events = true; if (!current_entry) @@ -3647,7 +3648,7 @@ static int block_hists_browser__title(struct hist_browser *browser, char *bf, { struct hists *hists = evsel__hists(browser->block_evsel); const char *evname = evsel__name(browser->block_evsel); - unsigned long nr_samples = hists->stats.nr_events[PERF_RECORD_SAMPLE]; + unsigned long nr_samples = hists->stats.nr_samples; int ret; ret = scnprintf(bf, size, "# Samples: %lu", nr_samples); diff --git a/tools/perf/util/events_stats.h b/tools/perf/util/events_stats.h index 631a4af2ed86..e271c8004c89 100644 --- a/tools/perf/util/events_stats.h +++ b/tools/perf/util/events_stats.h @@ -26,15 +26,12 @@ * perf_record_sample.period and stash the result in total_period. */ struct events_stats { - u64 total_period; - u64 total_non_filtered_period; u64 total_lost; u64 total_lost_samples; u64 total_aux_lost; u64 total_aux_partial; u64 total_invalid_chains; u32 nr_events[PERF_RECORD_HEADER_MAX]; - u32 nr_non_filtered_samples; u32 nr_lost_warned; u32 nr_unknown_events; u32 nr_invalid_chains; @@ -44,6 +41,13 @@ struct events_stats { u32 nr_proc_map_timeout; }; +struct hists_stats { + u64 total_period; + u64 total_non_filtered_period; + u32 nr_samples; + u32 nr_non_filtered_samples; +}; + void events_stats__inc(struct events_stats *stats, u32 type); size_t events_stats__fprintf(struct events_stats *stats, FILE *fp); diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index 9299ee535518..691a6a777d14 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c @@ -2325,14 +2325,19 @@ void events_stats__inc(struct events_stats *stats, u32 type) ++stats->nr_events[type]; } -void hists__inc_nr_events(struct hists *hists, u32 type) +static void hists_stats__inc(struct hists_stats *stats) { - events_stats__inc(&hists->stats, type); + ++stats->nr_samples; +} + +void hists__inc_nr_events(struct hists *hists) +{ + hists_stats__inc(&hists->stats); } void hists__inc_nr_samples(struct hists *hists, bool filtered) { - events_stats__inc(&hists->stats, PERF_RECORD_SAMPLE); + hists_stats__inc(&hists->stats); if (!filtered) hists->stats.nr_non_filtered_samples++; } @@ -2677,8 +2682,11 @@ size_t evlist__fprintf_nr_events(struct evlist *evlist, FILE *fp) size_t ret = 0; evlist__for_each_entry(evlist, pos) { + struct hists *hists = evsel__hists(pos); + ret += fprintf(fp, "%s stats:\n", evsel__name(pos)); - ret += events_stats__fprintf(&evsel__hists(pos)->stats, fp); + ret += fprintf(fp, "%16s events: %10d\n", + "SAMPLE", hists->stats.nr_samples); } return ret; @@ -2698,7 +2706,7 @@ int __hists__scnprintf_title(struct hists *hists, char *bf, size_t size, bool sh const struct dso *dso = hists->dso_filter; struct thread *thread = hists->thread_filter; int socket_id = hists->socket_filter; - unsigned long nr_samples = hists->stats.nr_events[PERF_RECORD_SAMPLE]; + unsigned long nr_samples = hists->stats.nr_samples; u64 nr_events = hists->stats.total_period; struct evsel *evsel = hists_to_evsel(hists); const char *ev_name = evsel__name(evsel); @@ -2725,7 +2733,7 @@ int __hists__scnprintf_title(struct hists *hists, char *bf, size_t size, bool sh nr_samples += pos_hists->stats.nr_non_filtered_samples; nr_events += pos_hists->stats.total_non_filtered_period; } else { - nr_samples += pos_hists->stats.nr_events[PERF_RECORD_SAMPLE]; + nr_samples += pos_hists->stats.nr_samples; nr_events += pos_hists->stats.total_period; } } diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h index e2faa745c8d6..6b0f708f08ac 100644 --- a/tools/perf/util/hist.h +++ b/tools/perf/util/hist.h @@ -96,7 +96,7 @@ struct hists { const char *uid_filter_str; const char *symbol_filter_str; pthread_mutex_t lock; - struct events_stats stats; + struct hists_stats stats; u64 event_stream; u16 col_len[HISTC_NR_COLS]; bool has_callchains; @@ -196,7 +196,7 @@ struct hist_entry *hists__get_entry(struct hists *hists, int idx); u64 hists__total_period(struct hists *hists); void hists__reset_stats(struct hists *hists); void hists__inc_stats(struct hists *hists, struct hist_entry *h); -void hists__inc_nr_events(struct hists *hists, u32 type); +void hists__inc_nr_events(struct hists *hists); void hists__inc_nr_samples(struct hists *hists, bool filtered); size_t hists__fprintf(struct hists *hists, bool show_header, int max_rows, -- cgit v1.2.3 From 55f754443890043956ee81431faa3c529309ba24 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 26 Apr 2021 18:37:14 -0700 Subject: perf report: Show event sample counts in --stat output To make the output identical with perf report -D, it needs to show per-event sample counts along with the aggregated stat at the end. Reviewed-by: Andi Kleen Signed-off-by: Namhyung Kim Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Ian Rogers Cc: Ingo Molnar Cc: Mark Rutland Cc: Peter Zijlstra Link: https://lore.kernel.org/r/20210427013717.1651674-4-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-report.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index b0b9b60f74e5..be56f3efa413 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -708,9 +708,22 @@ static void report__output_resort(struct report *rep) ui_progress__finish(); } +static int count_sample_event(struct perf_tool *tool __maybe_unused, + union perf_event *event __maybe_unused, + struct perf_sample *sample __maybe_unused, + struct evsel *evsel, + struct machine *machine __maybe_unused) +{ + struct hists *hists = evsel__hists(evsel); + + hists__inc_nr_events(hists); + return 0; +} + static void stats_setup(struct report *rep) { memset(&rep->tool, 0, sizeof(rep->tool)); + rep->tool.sample = count_sample_event; rep->tool.no_warn = true; } @@ -719,6 +732,7 @@ static int stats_print(struct report *rep) struct perf_session *session = rep->session; perf_session__fprintf_nr_events(session, stdout); + perf_evlist__fprintf_nr_events(session->evlist, stdout); return 0; } -- cgit v1.2.3 From 2775de0b115a6ffab7882c45c755005ee0ac0122 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 26 Apr 2021 18:37:15 -0700 Subject: perf report: Add --skip-empty option to suppress 0 event stat To make the output more readable, I think it's better to remove 0's in the output. Also the dummy event has no event stats so it just wasts the space. Let's use the --skip-empty option to suppress it. $ perf report --stat --skip-empty Aggregated stats: TOTAL events: 16530 MMAP events: 226 COMM events: 1596 EXIT events: 2 THROTTLE events: 121 UNTHROTTLE events: 117 FORK events: 1595 SAMPLE events: 719 MMAP2 events: 12147 CGROUP events: 2 FINISHED_ROUND events: 2 THREAD_MAP events: 1 CPU_MAP events: 1 TIME_CONV events: 1 cycles stats: SAMPLE events: 719 Reviewed-by: Andi Kleen Signed-off-by: Namhyung Kim Tested-by: Arnaldo Carvalho de Melo Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Ian Rogers Cc: Ingo Molnar Cc: Mark Rutland Cc: Peter Zijlstra Link: https://lore.kernel.org/r/20210427013717.1651674-5-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-report.txt | 3 +++ tools/perf/builtin-annotate.c | 4 ++-- tools/perf/builtin-report.c | 16 ++++++++++++---- tools/perf/ui/stdio/hist.c | 5 ++++- tools/perf/util/events_stats.h | 3 ++- tools/perf/util/hist.c | 6 +++++- tools/perf/util/hist.h | 3 ++- tools/perf/util/session.c | 5 +++-- tools/perf/util/session.h | 3 ++- 9 files changed, 35 insertions(+), 13 deletions(-) diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt index f51f0000676e..24efc0583c93 100644 --- a/tools/perf/Documentation/perf-report.txt +++ b/tools/perf/Documentation/perf-report.txt @@ -571,6 +571,9 @@ include::itrace.txt[] sampled cycles 'Avg Cycles' - block average sampled cycles +--skip-empty:: + Do not print 0 results in the --stat output. + include::callchain-overhead-calculation.txt[] SEE ALSO diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c index 717efd78eee6..49627a7bed7c 100644 --- a/tools/perf/builtin-annotate.c +++ b/tools/perf/builtin-annotate.c @@ -404,8 +404,8 @@ static int __cmd_annotate(struct perf_annotate *ann) goto out; if (dump_trace) { - perf_session__fprintf_nr_events(session, stdout); - evlist__fprintf_nr_events(session->evlist, stdout); + perf_session__fprintf_nr_events(session, stdout, false); + evlist__fprintf_nr_events(session->evlist, stdout, false); goto out; } diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index be56f3efa413..4910194acaa6 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -85,6 +85,7 @@ struct report { bool group_set; bool stitch_lbr; bool disable_order; + bool skip_empty; int max_stack; struct perf_read_values show_threads_values; struct annotation_options annotation_opts; @@ -530,6 +531,9 @@ static int evlist__tty_browse_hists(struct evlist *evlist, struct report *rep, c if (symbol_conf.event_group && !evsel__is_group_leader(pos)) continue; + if (rep->skip_empty && !hists->stats.nr_samples) + continue; + hists__fprintf_nr_sample_events(hists, rep, evname, stdout); if (rep->total_cycles_mode) { @@ -731,8 +735,8 @@ static int stats_print(struct report *rep) { struct perf_session *session = rep->session; - perf_session__fprintf_nr_events(session, stdout); - perf_evlist__fprintf_nr_events(session->evlist, stdout); + perf_session__fprintf_nr_events(session, stdout, rep->skip_empty); + evlist__fprintf_nr_events(session->evlist, stdout, rep->skip_empty); return 0; } @@ -944,8 +948,10 @@ static int __cmd_report(struct report *rep) perf_session__fprintf_dsos(session, stdout); if (dump_trace) { - perf_session__fprintf_nr_events(session, stdout); - evlist__fprintf_nr_events(session->evlist, stdout); + perf_session__fprintf_nr_events(session, stdout, + rep->skip_empty); + evlist__fprintf_nr_events(session->evlist, stdout, + rep->skip_empty); return 0; } } @@ -1313,6 +1319,8 @@ int cmd_report(int argc, const char **argv) "Sort all blocks by 'Sampled Cycles%'"), OPT_BOOLEAN(0, "disable-order", &report.disable_order, "Disable raw trace ordering"), + OPT_BOOLEAN(0, "skip-empty", &report.skip_empty, + "Do not display empty (or dummy) events in the output"), OPT_END() }; struct perf_data data = { diff --git a/tools/perf/ui/stdio/hist.c b/tools/perf/ui/stdio/hist.c index 2ab2af4d4849..d9e634406175 100644 --- a/tools/perf/ui/stdio/hist.c +++ b/tools/perf/ui/stdio/hist.c @@ -897,7 +897,8 @@ out: return ret; } -size_t events_stats__fprintf(struct events_stats *stats, FILE *fp) +size_t events_stats__fprintf(struct events_stats *stats, FILE *fp, + bool skip_empty) { int i; size_t ret = 0; @@ -908,6 +909,8 @@ size_t events_stats__fprintf(struct events_stats *stats, FILE *fp) name = perf_event__name(i); if (!strcmp(name, "UNKNOWN")) continue; + if (skip_empty && !stats->nr_events[i]) + continue; ret += fprintf(fp, "%16s events: %10d\n", name, stats->nr_events[i]); } diff --git a/tools/perf/util/events_stats.h b/tools/perf/util/events_stats.h index e271c8004c89..3480bafd414b 100644 --- a/tools/perf/util/events_stats.h +++ b/tools/perf/util/events_stats.h @@ -50,6 +50,7 @@ struct hists_stats { void events_stats__inc(struct events_stats *stats, u32 type); -size_t events_stats__fprintf(struct events_stats *stats, FILE *fp); +size_t events_stats__fprintf(struct events_stats *stats, FILE *fp, + bool skip_empty); #endif /* __PERF_EVENTS_STATS_ */ diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index 691a6a777d14..65fe65ba03c2 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c @@ -2676,7 +2676,8 @@ void hist__account_cycles(struct branch_stack *bs, struct addr_location *al, } } -size_t evlist__fprintf_nr_events(struct evlist *evlist, FILE *fp) +size_t evlist__fprintf_nr_events(struct evlist *evlist, FILE *fp, + bool skip_empty) { struct evsel *pos; size_t ret = 0; @@ -2684,6 +2685,9 @@ size_t evlist__fprintf_nr_events(struct evlist *evlist, FILE *fp) evlist__for_each_entry(evlist, pos) { struct hists *hists = evsel__hists(pos); + if (skip_empty && !hists->stats.nr_samples) + continue; + ret += fprintf(fp, "%s stats:\n", evsel__name(pos)); ret += fprintf(fp, "%16s events: %10d\n", "SAMPLE", hists->stats.nr_samples); diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h index 6b0f708f08ac..5343b62476e6 100644 --- a/tools/perf/util/hist.h +++ b/tools/perf/util/hist.h @@ -202,7 +202,8 @@ void hists__inc_nr_samples(struct hists *hists, bool filtered); size_t hists__fprintf(struct hists *hists, bool show_header, int max_rows, int max_cols, float min_pcnt, FILE *fp, bool ignore_callchains); -size_t evlist__fprintf_nr_events(struct evlist *evlist, FILE *fp); +size_t evlist__fprintf_nr_events(struct evlist *evlist, FILE *fp, + bool skip_empty); void hists__filter_by_dso(struct hists *hists); void hists__filter_by_thread(struct hists *hists); diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index eba3769be3f1..a6659b616e6d 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -2352,7 +2352,8 @@ size_t perf_session__fprintf_dsos_buildid(struct perf_session *session, FILE *fp return machines__fprintf_dsos_buildid(&session->machines, fp, skip, parm); } -size_t perf_session__fprintf_nr_events(struct perf_session *session, FILE *fp) +size_t perf_session__fprintf_nr_events(struct perf_session *session, FILE *fp, + bool skip_empty) { size_t ret; const char *msg = ""; @@ -2362,7 +2363,7 @@ size_t perf_session__fprintf_nr_events(struct perf_session *session, FILE *fp) ret = fprintf(fp, "\nAggregated stats:%s\n", msg); - ret += events_stats__fprintf(&session->evlist->stats, fp); + ret += events_stats__fprintf(&session->evlist->stats, fp, skip_empty); return ret; } diff --git a/tools/perf/util/session.h b/tools/perf/util/session.h index f76480166d38..e31ba4c92a6c 100644 --- a/tools/perf/util/session.h +++ b/tools/perf/util/session.h @@ -113,7 +113,8 @@ size_t perf_session__fprintf_dsos(struct perf_session *session, FILE *fp); size_t perf_session__fprintf_dsos_buildid(struct perf_session *session, FILE *fp, bool (fn)(struct dso *dso, int parm), int parm); -size_t perf_session__fprintf_nr_events(struct perf_session *session, FILE *fp); +size_t perf_session__fprintf_nr_events(struct perf_session *session, FILE *fp, + bool skip_empty); struct evsel *perf_session__find_first_evtype(struct perf_session *session, unsigned int type); -- cgit v1.2.3 From 8f08cf3330da0582e7a51bd1b999c820147e19d1 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 26 Apr 2021 18:37:16 -0700 Subject: perf report: Make --skip-empty as default so that the compact output is shown by default. Also add 'report.skip-empty' config option to override the default. Users can also use --no-skip-empty command line option to change the behavior anytime. Committer testing: $ perf report --stat Aggregated stats: TOTAL events: 19 COMM events: 2 EXIT events: 1 SAMPLE events: 8 MMAP2 events: 4 FINISHED_ROUND events: 1 THREAD_MAP events: 1 CPU_MAP events: 1 TIME_CONV events: 1 cycles:u stats: SAMPLE events: 8 $ perf config report.skip-empty=false $ perf report --stat Aggregated stats: TOTAL events: 19 MMAP events: 0 LOST events: 0 COMM events: 2 EXIT events: 1 THROTTLE events: 0 UNTHROTTLE events: 0 FORK events: 0 READ events: 0 SAMPLE events: 8 MMAP2 events: 4 AUX events: 0 ITRACE_START events: 0 LOST_SAMPLES events: 0 SWITCH events: 0 SWITCH_CPU_WIDE events: 0 NAMESPACES events: 0 KSYMBOL events: 0 BPF_EVENT events: 0 CGROUP events: 0 TEXT_POKE events: 0 ATTR events: 0 EVENT_TYPE events: 0 TRACING_DATA events: 0 BUILD_ID events: 0 FINISHED_ROUND events: 1 ID_INDEX events: 0 AUXTRACE_INFO events: 0 AUXTRACE events: 0 AUXTRACE_ERROR events: 0 THREAD_MAP events: 1 CPU_MAP events: 1 STAT_CONFIG events: 0 STAT events: 0 STAT_ROUND events: 0 EVENT_UPDATE events: 0 TIME_CONV events: 1 FEATURE events: 0 COMPRESSED events: 0 cycles:u stats: SAMPLE events: 8 $ perf config report.skip-empty report.skip-empty=false $ Reviewed-by: Andi Kleen Signed-off-by: Namhyung Kim Tested-by: Arnaldo Carvalho de Melo Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Ian Rogers Cc: Ingo Molnar Cc: Mark Rutland Cc: Peter Zijlstra Link: https://lore.kernel.org/r/20210427013717.1651674-6-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-config.txt | 5 +++++ tools/perf/builtin-report.c | 6 ++++++ 2 files changed, 11 insertions(+) diff --git a/tools/perf/Documentation/perf-config.txt b/tools/perf/Documentation/perf-config.txt index 154a1ced72b2..b0872c801866 100644 --- a/tools/perf/Documentation/perf-config.txt +++ b/tools/perf/Documentation/perf-config.txt @@ -123,6 +123,7 @@ Given a $HOME/.perfconfig like this: queue-size = 0 children = true group = true + skip-empty = true [llvm] dump-obj = true @@ -531,6 +532,10 @@ report.*:: 0.07% 0.00% noploop ld-2.15.so [.] strcmp 0.03% 0.00% noploop [kernel.kallsyms] [k] timerqueue_del + report.skip-empty:: + This option can change default stat behavior with empty results. + If it's set true, 'perf report --stat' will not show 0 stats. + top.*:: top.children:: Same as 'report.children'. So if it is enabled, the output of 'top' diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 4910194acaa6..36f9ccfeb38a 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -136,6 +136,11 @@ static int report__config(const char *var, const char *value, void *cb) return 0; } + if (!strcmp(var, "report.skip-empty")) { + rep->skip_empty = perf_config_bool(var, value); + return 0; + } + return 0; } @@ -1160,6 +1165,7 @@ int cmd_report(int argc, const char **argv) .pretty_printing_style = "normal", .socket_filter = -1, .annotation_opts = annotation__default_options, + .skip_empty = true, }; const struct option options[] = { OPT_STRING('i', "input", &input_name, "file", -- cgit v1.2.3 From 462f57dbf9fa1fdcdeae2e0b19a667f7f9989bdb Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 26 Apr 2021 18:37:17 -0700 Subject: perf report: Print percentage of each event statistics It's sometimes useful to see how many samples vs other events in the data file with percent values. $ perf report --stat Aggregated stats: TOTAL events: 20064 MMAP events: 239 ( 1.2%) COMM events: 1518 ( 7.6%) EXIT events: 1 ( 0.0%) FORK events: 1517 ( 7.6%) SAMPLE events: 4015 (20.0%) MMAP2 events: 12769 (63.6%) FINISHED_ROUND events: 2 ( 0.0%) THREAD_MAP events: 1 ( 0.0%) CPU_MAP events: 1 ( 0.0%) TIME_CONV events: 1 ( 0.0%) cycles stats: SAMPLE events: 2475 instructions stats: SAMPLE events: 1540 Suggested-by: Andi Kleen Reviewed-by: Andi Kleen Signed-off-by: Namhyung Kim Tested-by: Arnaldo Carvalho de Melo Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Ian Rogers Cc: Ingo Molnar Cc: Mark Rutland Cc: Peter Zijlstra Link: https://lore.kernel.org/r/20210427013717.1651674-7-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/ui/stdio/hist.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/tools/perf/ui/stdio/hist.c b/tools/perf/ui/stdio/hist.c index d9e634406175..f36270485168 100644 --- a/tools/perf/ui/stdio/hist.c +++ b/tools/perf/ui/stdio/hist.c @@ -902,6 +902,7 @@ size_t events_stats__fprintf(struct events_stats *stats, FILE *fp, { int i; size_t ret = 0; + u32 total = stats->nr_events[0]; for (i = 0; i < PERF_RECORD_HEADER_MAX; ++i) { const char *name; @@ -912,7 +913,14 @@ size_t events_stats__fprintf(struct events_stats *stats, FILE *fp, if (skip_empty && !stats->nr_events[i]) continue; - ret += fprintf(fp, "%16s events: %10d\n", name, stats->nr_events[i]); + if (i && total) { + ret += fprintf(fp, "%16s events: %10d (%4.1f%%)\n", + name, stats->nr_events[i], + 100.0 * stats->nr_events[i] / total); + } else { + ret += fprintf(fp, "%16s events: %10d\n", + name, stats->nr_events[i]); + } } return ret; -- cgit v1.2.3 From 412736119116d0161688e9061485fbc3e25f78d5 Mon Sep 17 00:00:00 2001 From: Jin Yao Date: Tue, 27 Apr 2021 15:01:14 +0800 Subject: tools headers uapi: Update tools's copy of linux/perf_event.h To get the changes in: Liang Kan's patch 55bcf6ef314ae8ba ("perf: Extend PERF_TYPE_HARDWARE and PERF_TYPE_HW_CACHE") Kan's patch is in the tip/perf/core branch. So the next perf tool patches need this interface for hybrid support. Signed-off-by: Jin Yao Reviewed-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: Ingo Molnar Cc: Kan Liang Cc: Peter Zijlstra Link: https://lore.kernel.org/r/20210427070139.25256-2-yao.jin@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/uapi/linux/perf_event.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/tools/include/uapi/linux/perf_event.h b/tools/include/uapi/linux/perf_event.h index ad15e40d7f5d..14332f4cf816 100644 --- a/tools/include/uapi/linux/perf_event.h +++ b/tools/include/uapi/linux/perf_event.h @@ -37,6 +37,21 @@ enum perf_type_id { PERF_TYPE_MAX, /* non-ABI */ }; +/* + * attr.config layout for type PERF_TYPE_HARDWARE and PERF_TYPE_HW_CACHE + * PERF_TYPE_HARDWARE: 0xEEEEEEEE000000AA + * AA: hardware event ID + * EEEEEEEE: PMU type ID + * PERF_TYPE_HW_CACHE: 0xEEEEEEEE00DDCCBB + * BB: hardware cache ID + * CC: hardware cache op ID + * DD: hardware cache op result ID + * EEEEEEEE: PMU type ID + * If the PMU type ID is 0, the PERF_TYPE_RAW will be applied. + */ +#define PERF_PMU_TYPE_SHIFT 32 +#define PERF_HW_EVENT_MASK 0xffffffff + /* * Generalized performance event event_id types, used by the * attr.event_id parameter of the sys_perf_event_open() -- cgit v1.2.3 From 6b64833b9e49fda28b0eb94d865c334b37b4662f Mon Sep 17 00:00:00 2001 From: Jin Yao Date: Tue, 27 Apr 2021 15:01:15 +0800 Subject: perf jevents: Support unit value "cpu_core" and "cpu_atom" For some Intel platforms, such as Alderlake, which is a hybrid platform and it consists of atom cpu and core cpu. Each cpu has dedicated event list. Part of events are available on core cpu, part of events are available on atom cpu. The kernel exports new cpu pmus: cpu_core and cpu_atom. The event in json is added with a new field "Unit" to indicate which pmu the event is available on. For example, one event in cache.json, { "BriefDescription": "Counts the number of load ops retired that", "CollectPEBSRecord": "2", "Counter": "0,1,2,3", "EventCode": "0xd2", "EventName": "MEM_LOAD_UOPS_RETIRED_MISC.MMIO", "PEBScounters": "0,1,2,3", "SampleAfterValue": "1000003", "UMask": "0x80", "Unit": "cpu_atom" }, The unit "cpu_atom" indicates this event is only available on "cpu_atom". In generated pmu-events.c, we can see: { .name = "mem_load_uops_retired_misc.mmio", .event = "period=1000003,umask=0x80,event=0xd2", .desc = "Counts the number of load ops retired that. Unit: cpu_atom ", .topic = "cache", .pmu = "cpu_atom", }, But if without this patch, the "uncore_" prefix is added before "cpu_atom", such as: .pmu = "uncore_cpu_atom" That would be a wrong pmu. Signed-off-by: Jin Yao Reviewed-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: Ingo Molnar Cc: Kan Liang Cc: Peter Zijlstra Link: https://lore.kernel.org/r/20210427070139.25256-3-yao.jin@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/pmu-events/jevents.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/perf/pmu-events/jevents.c b/tools/perf/pmu-events/jevents.c index 33aa3c885eaf..ed4f0bd72e5a 100644 --- a/tools/perf/pmu-events/jevents.c +++ b/tools/perf/pmu-events/jevents.c @@ -285,6 +285,8 @@ static struct map { { "imx8_ddr", "imx8_ddr" }, { "L3PMC", "amd_l3" }, { "DFPMC", "amd_df" }, + { "cpu_core", "cpu_core" }, + { "cpu_atom", "cpu_atom" }, {} }; -- cgit v1.2.3 From eab35953e67b48c763fbb0e0ffc64dd3152361ea Mon Sep 17 00:00:00 2001 From: Jin Yao Date: Tue, 27 Apr 2021 15:01:16 +0800 Subject: perf pmu: Simplify arguments of __perf_pmu__new_alias Simplify the arguments of __perf_pmu__new_alias() by passing the whole 'struct pme_event' pointer. Signed-off-by: Jin Yao Reviewed-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: Ingo Molnar Cc: Kan Liang Cc: Peter Zijlstra Link: https://lore.kernel.org/r/20210427070139.25256-4-yao.jin@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/pmu.c | 36 ++++++++++++++++-------------------- 1 file changed, 16 insertions(+), 20 deletions(-) diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index 286d5e415bdc..8214def7b0f0 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -306,18 +306,25 @@ static bool perf_pmu_merge_alias(struct perf_pmu_alias *newalias, } static int __perf_pmu__new_alias(struct list_head *list, char *dir, char *name, - char *desc, char *val, - char *long_desc, char *topic, - char *unit, char *perpkg, - char *metric_expr, - char *metric_name, - char *deprecated) + char *desc, char *val, struct pmu_event *pe) { struct parse_events_term *term; struct perf_pmu_alias *alias; int ret; int num; char newval[256]; + char *long_desc = NULL, *topic = NULL, *unit = NULL, *perpkg = NULL, + *metric_expr = NULL, *metric_name = NULL, *deprecated = NULL; + + if (pe) { + long_desc = (char *)pe->long_desc; + topic = (char *)pe->topic; + unit = (char *)pe->unit; + perpkg = (char *)pe->perpkg; + metric_expr = (char *)pe->metric_expr; + metric_name = (char *)pe->metric_name; + deprecated = (char *)pe->deprecated; + } alias = malloc(sizeof(*alias)); if (!alias) @@ -406,8 +413,7 @@ static int perf_pmu__new_alias(struct list_head *list, char *dir, char *name, FI /* Remove trailing newline from sysfs file */ strim(buf); - return __perf_pmu__new_alias(list, dir, name, NULL, buf, NULL, NULL, NULL, - NULL, NULL, NULL, NULL); + return __perf_pmu__new_alias(list, dir, name, NULL, buf, NULL); } static inline bool pmu_alias_info_file(char *name) @@ -798,11 +804,7 @@ new_alias: /* need type casts to override 'const' */ __perf_pmu__new_alias(head, NULL, (char *)pe->name, (char *)pe->desc, (char *)pe->event, - (char *)pe->long_desc, (char *)pe->topic, - (char *)pe->unit, (char *)pe->perpkg, - (char *)pe->metric_expr, - (char *)pe->metric_name, - (char *)pe->deprecated); + pe); } } @@ -869,13 +871,7 @@ static int pmu_add_sys_aliases_iter_fn(struct pmu_event *pe, void *data) (char *)pe->name, (char *)pe->desc, (char *)pe->event, - (char *)pe->long_desc, - (char *)pe->topic, - (char *)pe->unit, - (char *)pe->perpkg, - (char *)pe->metric_expr, - (char *)pe->metric_name, - (char *)pe->deprecated); + pe); } return 0; -- cgit v1.2.3 From 32705de7d45d0ed989517a63454c2b3e5e5ea267 Mon Sep 17 00:00:00 2001 From: Jin Yao Date: Tue, 27 Apr 2021 15:01:17 +0800 Subject: perf pmu: Save pmu name On hybrid platform, one event is available on one pmu (such as, available on cpu_core or on cpu_atom). This patch saves the pmu name to the pmu field of struct perf_pmu_alias. Then next we can know the pmu which the event can be enabled on. Signed-off-by: Jin Yao Reviewed-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: Ingo Molnar Cc: Kan Liang Cc: Peter Zijlstra Link: https://lore.kernel.org/r/20210427070139.25256-5-yao.jin@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/pmu.c | 10 +++++++++- tools/perf/util/pmu.h | 1 + 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index 8214def7b0f0..44225838eb03 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -283,6 +283,7 @@ void perf_pmu_free_alias(struct perf_pmu_alias *newalias) zfree(&newalias->str); zfree(&newalias->metric_expr); zfree(&newalias->metric_name); + zfree(&newalias->pmu_name); parse_events_terms__purge(&newalias->terms); free(newalias); } @@ -297,6 +298,10 @@ static bool perf_pmu_merge_alias(struct perf_pmu_alias *newalias, list_for_each_entry(a, alist, list) { if (!strcasecmp(newalias->name, a->name)) { + if (newalias->pmu_name && a->pmu_name && + !strcasecmp(newalias->pmu_name, a->pmu_name)) { + continue; + } perf_pmu_update_alias(a, newalias); perf_pmu_free_alias(newalias); return true; @@ -314,7 +319,8 @@ static int __perf_pmu__new_alias(struct list_head *list, char *dir, char *name, int num; char newval[256]; char *long_desc = NULL, *topic = NULL, *unit = NULL, *perpkg = NULL, - *metric_expr = NULL, *metric_name = NULL, *deprecated = NULL; + *metric_expr = NULL, *metric_name = NULL, *deprecated = NULL, + *pmu_name = NULL; if (pe) { long_desc = (char *)pe->long_desc; @@ -324,6 +330,7 @@ static int __perf_pmu__new_alias(struct list_head *list, char *dir, char *name, metric_expr = (char *)pe->metric_expr; metric_name = (char *)pe->metric_name; deprecated = (char *)pe->deprecated; + pmu_name = (char *)pe->pmu; } alias = malloc(sizeof(*alias)); @@ -389,6 +396,7 @@ static int __perf_pmu__new_alias(struct list_head *list, char *dir, char *name, } alias->per_pkg = perpkg && sscanf(perpkg, "%d", &num) == 1 && num == 1; alias->str = strdup(newval); + alias->pmu_name = pmu_name ? strdup(pmu_name) : NULL; if (deprecated) alias->deprecated = true; diff --git a/tools/perf/util/pmu.h b/tools/perf/util/pmu.h index 1f1749ba830f..4f100768c264 100644 --- a/tools/perf/util/pmu.h +++ b/tools/perf/util/pmu.h @@ -72,6 +72,7 @@ struct perf_pmu_alias { bool deprecated; char *metric_expr; char *metric_name; + char *pmu_name; }; struct perf_pmu *perf_pmu__find(const char *name); -- cgit v1.2.3 From 444624307c4e06d35de12df1cfe08a4964ac086f Mon Sep 17 00:00:00 2001 From: Jin Yao Date: Tue, 27 Apr 2021 15:01:18 +0800 Subject: perf pmu: Save detected hybrid pmus to a global pmu list We identify the cpu_core pmu and cpu_atom pmu by explicitly checking following files: For cpu_core, checks: "/sys/bus/event_source/devices/cpu_core/cpus" For cpu_atom, checks: "/sys/bus/event_source/devices/cpu_atom/cpus" If the 'cpus' file exists and it has data, the pmu exists. But in order not to hardcode the "cpu_core" and "cpu_atom", and make the code in a generic way. So if the path "/sys/bus/event_source/devices/cpu_xxx/cpus" exists, the hybrid pmu exists. All the detected hybrid pmus are linked to a global list 'perf_pmu__hybrid_pmus' and then next we just need to iterate the list to get all hybrid pmu by using perf_pmu__for_each_hybrid_pmu. Signed-off-by: Jin Yao Reviewed-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: Ingo Molnar Cc: Kan Liang Cc: Peter Zijlstra Link: https://lore.kernel.org/r/20210427070139.25256-6-yao.jin@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/Build | 1 + tools/perf/util/pmu-hybrid.c | 49 ++++++++++++++++++++++++++++++++++++++++++++ tools/perf/util/pmu-hybrid.h | 18 ++++++++++++++++ tools/perf/util/pmu.c | 9 +++++++- tools/perf/util/pmu.h | 4 ++++ 5 files changed, 80 insertions(+), 1 deletion(-) create mode 100644 tools/perf/util/pmu-hybrid.c create mode 100644 tools/perf/util/pmu-hybrid.h diff --git a/tools/perf/util/Build b/tools/perf/util/Build index 827d216a780e..45bea9911669 100644 --- a/tools/perf/util/Build +++ b/tools/perf/util/Build @@ -69,6 +69,7 @@ perf-y += parse-events-bison.o perf-y += pmu.o perf-y += pmu-flex.o perf-y += pmu-bison.o +perf-y += pmu-hybrid.o perf-y += trace-event-read.o perf-y += trace-event-info.o perf-y += trace-event-scripting.o diff --git a/tools/perf/util/pmu-hybrid.c b/tools/perf/util/pmu-hybrid.c new file mode 100644 index 000000000000..8ed0e6e1776d --- /dev/null +++ b/tools/perf/util/pmu-hybrid.c @@ -0,0 +1,49 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "fncache.h" +#include "pmu-hybrid.h" + +LIST_HEAD(perf_pmu__hybrid_pmus); + +bool perf_pmu__hybrid_mounted(const char *name) +{ + char path[PATH_MAX]; + const char *sysfs; + FILE *file; + int n, cpu; + + if (strncmp(name, "cpu_", 4)) + return false; + + sysfs = sysfs__mountpoint(); + if (!sysfs) + return false; + + snprintf(path, PATH_MAX, CPUS_TEMPLATE_CPU, sysfs, name); + if (!file_available(path)) + return false; + + file = fopen(path, "r"); + if (!file) + return false; + + n = fscanf(file, "%u", &cpu); + fclose(file); + if (n <= 0) + return false; + + return true; +} diff --git a/tools/perf/util/pmu-hybrid.h b/tools/perf/util/pmu-hybrid.h new file mode 100644 index 000000000000..35bed3714438 --- /dev/null +++ b/tools/perf/util/pmu-hybrid.h @@ -0,0 +1,18 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __PMU_HYBRID_H +#define __PMU_HYBRID_H + +#include +#include +#include +#include +#include "pmu.h" + +extern struct list_head perf_pmu__hybrid_pmus; + +#define perf_pmu__for_each_hybrid_pmu(pmu) \ + list_for_each_entry(pmu, &perf_pmu__hybrid_pmus, hybrid_list) + +bool perf_pmu__hybrid_mounted(const char *name); + +#endif /* __PMU_HYBRID_H */ diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index 44225838eb03..6e49c7b8ad71 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -25,6 +25,7 @@ #include "string2.h" #include "strbuf.h" #include "fncache.h" +#include "pmu-hybrid.h" struct perf_pmu perf_pmu__fake; @@ -613,7 +614,6 @@ static struct perf_cpu_map *__pmu_cpumask(const char *path) */ #define SYS_TEMPLATE_ID "./bus/event_source/devices/%s/identifier" #define CPUS_TEMPLATE_UNCORE "%s/bus/event_source/devices/%s/cpumask" -#define CPUS_TEMPLATE_CPU "%s/bus/event_source/devices/%s/cpus" static struct perf_cpu_map *pmu_cpumask(const char *name) { @@ -645,6 +645,9 @@ static bool pmu_is_uncore(const char *name) char path[PATH_MAX]; const char *sysfs; + if (perf_pmu__hybrid_mounted(name)) + return false; + sysfs = sysfs__mountpoint(); snprintf(path, PATH_MAX, CPUS_TEMPLATE_UNCORE, sysfs, name); return file_available(path); @@ -951,6 +954,7 @@ static struct perf_pmu *pmu_lookup(const char *name) pmu->is_uncore = pmu_is_uncore(name); if (pmu->is_uncore) pmu->id = pmu_id(name); + pmu->is_hybrid = perf_pmu__hybrid_mounted(name); pmu->max_precise = pmu_max_precise(name); pmu_add_cpu_aliases(&aliases, pmu); pmu_add_sys_aliases(&aliases, pmu); @@ -962,6 +966,9 @@ static struct perf_pmu *pmu_lookup(const char *name) list_splice(&aliases, &pmu->aliases); list_add_tail(&pmu->list, &pmus); + if (pmu->is_hybrid) + list_add_tail(&pmu->hybrid_list, &perf_pmu__hybrid_pmus); + pmu->default_config = perf_pmu__get_default_config(pmu); return pmu; diff --git a/tools/perf/util/pmu.h b/tools/perf/util/pmu.h index 4f100768c264..9a2f89eeab6f 100644 --- a/tools/perf/util/pmu.h +++ b/tools/perf/util/pmu.h @@ -5,6 +5,7 @@ #include #include #include +#include #include #include "parse-events.h" #include "pmu-events/pmu-events.h" @@ -19,6 +20,7 @@ enum { #define PERF_PMU_FORMAT_BITS 64 #define EVENT_SOURCE_DEVICE_PATH "/bus/event_source/devices/" +#define CPUS_TEMPLATE_CPU "%s/bus/event_source/devices/%s/cpus" struct perf_event_attr; @@ -34,6 +36,7 @@ struct perf_pmu { __u32 type; bool selectable; bool is_uncore; + bool is_hybrid; bool auxtrace; int max_precise; struct perf_event_attr *default_config; @@ -42,6 +45,7 @@ struct perf_pmu { struct list_head aliases; /* HEAD struct perf_pmu_alias -> list */ struct list_head caps; /* HEAD struct perf_pmu_caps -> list */ struct list_head list; /* ELEM */ + struct list_head hybrid_list; }; extern struct perf_pmu perf_pmu__fake; -- cgit v1.2.3 From c5a26ea490a16798d973e6fa352c6b8375646bc4 Mon Sep 17 00:00:00 2001 From: Jin Yao Date: Tue, 27 Apr 2021 15:01:19 +0800 Subject: perf pmu: Add hybrid helper functions The functions perf_pmu__is_hybrid and perf_pmu__find_hybrid_pmu can be used to identify the hybrid platform and return the found hybrid cpu pmu. All the detected hybrid pmus have been saved in 'perf_pmu__hybrid_pmus' list. So we just need to search this list. perf_pmu__hybrid_type_to_pmu converts the user specified string to hybrid pmu name. This is used to support the '--cputype' option in next patches. perf_pmu__has_hybrid checks the existing of hybrid pmu. Note that, we have to define it in pmu.c (make pmu-hybrid.c no more symbol dependency), otherwise perf test python would be failed. Signed-off-by: Jin Yao Reviewed-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: Ingo Molnar Cc: Kan Liang Cc: Peter Zijlstra Link: https://lore.kernel.org/r/20210427070139.25256-7-yao.jin@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/pmu-hybrid.c | 40 ++++++++++++++++++++++++++++++++++++++++ tools/perf/util/pmu-hybrid.h | 4 ++++ tools/perf/util/pmu.c | 11 +++++++++++ tools/perf/util/pmu.h | 2 ++ 4 files changed, 57 insertions(+) diff --git a/tools/perf/util/pmu-hybrid.c b/tools/perf/util/pmu-hybrid.c index 8ed0e6e1776d..f51ccaac60ee 100644 --- a/tools/perf/util/pmu-hybrid.c +++ b/tools/perf/util/pmu-hybrid.c @@ -47,3 +47,43 @@ bool perf_pmu__hybrid_mounted(const char *name) return true; } + +struct perf_pmu *perf_pmu__find_hybrid_pmu(const char *name) +{ + struct perf_pmu *pmu; + + if (!name) + return NULL; + + perf_pmu__for_each_hybrid_pmu(pmu) { + if (!strcmp(name, pmu->name)) + return pmu; + } + + return NULL; +} + +bool perf_pmu__is_hybrid(const char *name) +{ + return perf_pmu__find_hybrid_pmu(name) != NULL; +} + +char *perf_pmu__hybrid_type_to_pmu(const char *type) +{ + char *pmu_name = NULL; + + if (asprintf(&pmu_name, "cpu_%s", type) < 0) + return NULL; + + if (perf_pmu__is_hybrid(pmu_name)) + return pmu_name; + + /* + * pmu may be not scanned, check the sysfs. + */ + if (perf_pmu__hybrid_mounted(pmu_name)) + return pmu_name; + + free(pmu_name); + return NULL; +} diff --git a/tools/perf/util/pmu-hybrid.h b/tools/perf/util/pmu-hybrid.h index 35bed3714438..d0fa7bc50a76 100644 --- a/tools/perf/util/pmu-hybrid.h +++ b/tools/perf/util/pmu-hybrid.h @@ -15,4 +15,8 @@ extern struct list_head perf_pmu__hybrid_pmus; bool perf_pmu__hybrid_mounted(const char *name); +struct perf_pmu *perf_pmu__find_hybrid_pmu(const char *name); +bool perf_pmu__is_hybrid(const char *name); +char *perf_pmu__hybrid_type_to_pmu(const char *type); + #endif /* __PMU_HYBRID_H */ diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index 6e49c7b8ad71..88c8ecdc60b0 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -40,6 +40,7 @@ int perf_pmu_parse(struct list_head *list, char *name); extern FILE *perf_pmu_in; static LIST_HEAD(pmus); +static bool hybrid_scanned; /* * Parse & process all the sysfs attributes located under @@ -1861,3 +1862,13 @@ void perf_pmu__warn_invalid_config(struct perf_pmu *pmu, __u64 config, "'%llx' not supported by kernel)!\n", name ?: "N/A", buf, config); } + +bool perf_pmu__has_hybrid(void) +{ + if (!hybrid_scanned) { + hybrid_scanned = true; + perf_pmu__scan(NULL); + } + + return !list_empty(&perf_pmu__hybrid_pmus); +} diff --git a/tools/perf/util/pmu.h b/tools/perf/util/pmu.h index 9a2f89eeab6f..a790ef758171 100644 --- a/tools/perf/util/pmu.h +++ b/tools/perf/util/pmu.h @@ -132,4 +132,6 @@ int perf_pmu__caps_parse(struct perf_pmu *pmu); void perf_pmu__warn_invalid_config(struct perf_pmu *pmu, __u64 config, char *name); +bool perf_pmu__has_hybrid(void); + #endif /* __PMU_H */ -- cgit v1.2.3 From 12279429d8620fe0cb2cdc0ba68cae3cc2c826f9 Mon Sep 17 00:00:00 2001 From: Jin Yao Date: Tue, 27 Apr 2021 15:01:20 +0800 Subject: perf stat: Uniquify hybrid event name It would be useful to let user know the pmu which the event belongs to. perf-stat has supported '--no-merge' option and it can print the pmu name after the event name, such as: "cycles [cpu_core]" Now this option is enabled by default for hybrid platform but change the format to: "cpu_core/cycles/" If user configs the name, we still use the user specified name. Signed-off-by: Jin Yao Reviewed-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: Ingo Molnar Cc: Kan Liang Cc: Peter Zijlstra ink: https://lore.kernel.org/r/20210427070139.25256-8-yao.jin@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-stat.c | 4 ++++ tools/perf/util/evsel.h | 1 + tools/perf/util/parse-events.c | 3 +++ tools/perf/util/stat-display.c | 15 +++++++++++++-- 4 files changed, 21 insertions(+), 2 deletions(-) diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 5ec2190e45cd..835e3696b9ce 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -69,6 +69,7 @@ #include "util/pfm.h" #include "util/bpf_counter.h" #include "util/iostat.h" +#include "util/pmu-hybrid.h" #include "asm/bug.h" #include @@ -2402,6 +2403,9 @@ int cmd_stat(int argc, const char **argv) evlist__check_cpu_maps(evsel_list); + if (perf_pmu__has_hybrid()) + stat_config.no_merge = true; + /* * Initialize thread_map with comm names, * so we could print it out on output. diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index 8f66cdcb338d..ad74c83bd35b 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -116,6 +116,7 @@ struct evsel { bool merged_stat; bool reset_group; bool errored; + bool use_config_name; struct hashmap *per_pkg_mask; struct evsel *leader; struct list_head config_terms; diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 46ebd269a98d..f4628c886e30 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -1567,6 +1567,9 @@ int parse_events_add_pmu(struct parse_events_state *parse_state, if (!evsel) return -ENOMEM; + if (evsel->name) + evsel->use_config_name = true; + evsel->pmu_name = name ? strdup(name) : NULL; evsel->use_uncore_alias = use_uncore_alias; evsel->percore = config_term_percore(&evsel->config_terms); diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c index 98664caef6af..0679129ad05e 100644 --- a/tools/perf/util/stat-display.c +++ b/tools/perf/util/stat-display.c @@ -18,6 +18,7 @@ #include #include "util.h" #include "iostat.h" +#include "pmu-hybrid.h" #define CNTR_NOT_SUPPORTED "" #define CNTR_NOT_COUNTED "" @@ -538,6 +539,7 @@ static void uniquify_event_name(struct evsel *counter) { char *new_name; char *config; + int ret = 0; if (counter->uniquified_name || !counter->pmu_name || !strncmp(counter->name, counter->pmu_name, @@ -552,8 +554,17 @@ static void uniquify_event_name(struct evsel *counter) counter->name = new_name; } } else { - if (asprintf(&new_name, - "%s [%s]", counter->name, counter->pmu_name) > 0) { + if (perf_pmu__has_hybrid()) { + if (!counter->use_config_name) { + ret = asprintf(&new_name, "%s/%s/", + counter->pmu_name, counter->name); + } + } else { + ret = asprintf(&new_name, "%s [%s]", + counter->name, counter->pmu_name); + } + + if (ret) { free(counter->name); counter->name = new_name; } -- cgit v1.2.3 From 9cbfa2f64c04d98ad2bbce93066e2e021d12a24b Mon Sep 17 00:00:00 2001 From: Jin Yao Date: Tue, 27 Apr 2021 15:01:21 +0800 Subject: perf parse-events: Create two hybrid hardware events Current hardware events has special perf types PERF_TYPE_HARDWARE. But it doesn't pass the PMU type in the user interface. For a hybrid system, the perf kernel doesn't know which PMU the events belong to. So now this type is extended to be PMU aware type. The PMU type ID is stored at attr.config[63:32]. PMU type ID is retrieved from sysfs. root@lkp-adl-d01:/sys/devices/cpu_atom# cat type 8 root@lkp-adl-d01:/sys/devices/cpu_core# cat type 4 When enabling a hybrid hardware event without specified pmu, such as, 'perf stat -e cycles -a', two events are created automatically. One is for atom, the other is for core. # perf stat -e cycles -a -vv -- sleep 1 Control descriptor is not initialized ------------------------------------------------------------ perf_event_attr: size 120 config 0x400000000 sample_type IDENTIFIER read_format TOTAL_TIME_ENABLED|TOTAL_TIME_RUNNING disabled 1 inherit 1 exclude_guest 1 ------------------------------------------------------------ sys_perf_event_open: pid -1 cpu 0 group_fd -1 flags 0x8 = 3 ------------------------------------------------------------ ... ------------------------------------------------------------ perf_event_attr: size 120 config 0x400000000 sample_type IDENTIFIER read_format TOTAL_TIME_ENABLED|TOTAL_TIME_RUNNING disabled 1 inherit 1 exclude_guest 1 ------------------------------------------------------------ sys_perf_event_open: pid -1 cpu 15 group_fd -1 flags 0x8 = 19 ------------------------------------------------------------ perf_event_attr: size 120 config 0x800000000 sample_type IDENTIFIER read_format TOTAL_TIME_ENABLED|TOTAL_TIME_RUNNING disabled 1 inherit 1 exclude_guest 1 ------------------------------------------------------------ sys_perf_event_open: pid -1 cpu 16 group_fd -1 flags 0x8 = 20 ------------------------------------------------------------ ... ------------------------------------------------------------ perf_event_attr: size 120 config 0x800000000 sample_type IDENTIFIER read_format TOTAL_TIME_ENABLED|TOTAL_TIME_RUNNING disabled 1 inherit 1 exclude_guest 1 ------------------------------------------------------------ sys_perf_event_open: pid -1 cpu 23 group_fd -1 flags 0x8 = 27 cycles: 0: 836272 1001525722 1001525722 cycles: 1: 628564 1001580453 1001580453 cycles: 2: 872693 1001605997 1001605997 cycles: 3: 70417 1001641369 1001641369 cycles: 4: 88593 1001726722 1001726722 cycles: 5: 470495 1001752993 1001752993 cycles: 6: 484733 1001840440 1001840440 cycles: 7: 1272477 1001593105 1001593105 cycles: 8: 209185 1001608616 1001608616 cycles: 9: 204391 1001633962 1001633962 cycles: 10: 264121 1001661745 1001661745 cycles: 11: 826104 1001689904 1001689904 cycles: 12: 89935 1001728861 1001728861 cycles: 13: 70639 1001756757 1001756757 cycles: 14: 185266 1001784810 1001784810 cycles: 15: 171094 1001825466 1001825466 cycles: 0: 129624 1001854843 1001854843 cycles: 1: 122533 1001840421 1001840421 cycles: 2: 90055 1001882506 1001882506 cycles: 3: 139607 1001896463 1001896463 cycles: 4: 141791 1001907838 1001907838 cycles: 5: 530927 1001883880 1001883880 cycles: 6: 143246 1001852529 1001852529 cycles: 7: 667769 1001872626 1001872626 cycles: 6744979 16026956922 16026956922 cycles: 1965552 8014991106 8014991106 Performance counter stats for 'system wide': 6,744,979 cpu_core/cycles/ 1,965,552 cpu_atom/cycles/ 1.001882711 seconds time elapsed 0x4 in 0x400000000 indicates the cpu_core pmu. 0x8 in 0x800000000 indicates the cpu_atom pmu. Signed-off-by: Jin Yao Reviewed-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: Ingo Molnar Cc: Kan Liang Cc: Peter Zijlstra Link: https://lore.kernel.org/r/20210427070139.25256-9-yao.jin@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/Build | 1 + tools/perf/util/parse-events-hybrid.c | 100 ++++++++++++++++++++++++++++++++++ tools/perf/util/parse-events-hybrid.h | 17 ++++++ tools/perf/util/parse-events.c | 18 ++++++ tools/perf/util/parse-events.h | 5 ++ 5 files changed, 141 insertions(+) create mode 100644 tools/perf/util/parse-events-hybrid.c create mode 100644 tools/perf/util/parse-events-hybrid.h diff --git a/tools/perf/util/Build b/tools/perf/util/Build index 45bea9911669..34fcbf3c3bce 100644 --- a/tools/perf/util/Build +++ b/tools/perf/util/Build @@ -23,6 +23,7 @@ perf-y += llvm-utils.o perf-y += mmap.o perf-y += memswap.o perf-y += parse-events.o +perf-y += parse-events-hybrid.o perf-y += perf_regs.o perf-y += path.o perf-y += print_binary.o diff --git a/tools/perf/util/parse-events-hybrid.c b/tools/perf/util/parse-events-hybrid.c new file mode 100644 index 000000000000..8fd7f19a9865 --- /dev/null +++ b/tools/perf/util/parse-events-hybrid.c @@ -0,0 +1,100 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include +#include +#include +#include +#include +#include +#include "evlist.h" +#include "evsel.h" +#include "parse-events.h" +#include "parse-events-hybrid.h" +#include "debug.h" +#include "pmu.h" +#include "pmu-hybrid.h" +#include "perf.h" + +static void config_hybrid_attr(struct perf_event_attr *attr, + int type, int pmu_type) +{ + /* + * attr.config layout for type PERF_TYPE_HARDWARE and + * PERF_TYPE_HW_CACHE + * + * PERF_TYPE_HARDWARE: 0xEEEEEEEE000000AA + * AA: hardware event ID + * EEEEEEEE: PMU type ID + * PERF_TYPE_HW_CACHE: 0xEEEEEEEE00DDCCBB + * BB: hardware cache ID + * CC: hardware cache op ID + * DD: hardware cache op result ID + * EEEEEEEE: PMU type ID + * If the PMU type ID is 0, the PERF_TYPE_RAW will be applied. + */ + attr->type = type; + attr->config = attr->config | ((__u64)pmu_type << PERF_PMU_TYPE_SHIFT); +} + +static int create_event_hybrid(__u32 config_type, int *idx, + struct list_head *list, + struct perf_event_attr *attr, char *name, + struct list_head *config_terms, + struct perf_pmu *pmu) +{ + struct evsel *evsel; + __u32 type = attr->type; + __u64 config = attr->config; + + config_hybrid_attr(attr, config_type, pmu->type); + evsel = parse_events__add_event_hybrid(list, idx, attr, name, + pmu, config_terms); + if (evsel) + evsel->pmu_name = strdup(pmu->name); + else + return -ENOMEM; + + attr->type = type; + attr->config = config; + return 0; +} + +static int add_hw_hybrid(struct parse_events_state *parse_state, + struct list_head *list, struct perf_event_attr *attr, + char *name, struct list_head *config_terms) +{ + struct perf_pmu *pmu; + int ret; + + perf_pmu__for_each_hybrid_pmu(pmu) { + ret = create_event_hybrid(PERF_TYPE_HARDWARE, + &parse_state->idx, list, attr, name, + config_terms, pmu); + if (ret) + return ret; + } + + return 0; +} + +int parse_events__add_numeric_hybrid(struct parse_events_state *parse_state, + struct list_head *list, + struct perf_event_attr *attr, + char *name, struct list_head *config_terms, + bool *hybrid) +{ + *hybrid = false; + if (attr->type == PERF_TYPE_SOFTWARE) + return 0; + + if (!perf_pmu__has_hybrid()) + return 0; + + *hybrid = true; + if (attr->type != PERF_TYPE_RAW) { + return add_hw_hybrid(parse_state, list, attr, name, + config_terms); + } + + return -1; +} diff --git a/tools/perf/util/parse-events-hybrid.h b/tools/perf/util/parse-events-hybrid.h new file mode 100644 index 000000000000..d81a76978480 --- /dev/null +++ b/tools/perf/util/parse-events-hybrid.h @@ -0,0 +1,17 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __PERF_PARSE_EVENTS_HYBRID_H +#define __PERF_PARSE_EVENTS_HYBRID_H + +#include +#include +#include +#include +#include + +int parse_events__add_numeric_hybrid(struct parse_events_state *parse_state, + struct list_head *list, + struct perf_event_attr *attr, + char *name, struct list_head *config_terms, + bool *hybrid); + +#endif /* __PERF_PARSE_EVENTS_HYBRID_H */ diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index f4628c886e30..9109f8b03fe7 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -37,6 +37,7 @@ #include "util/evsel_config.h" #include "util/event.h" #include "util/pfm.h" +#include "util/parse-events-hybrid.h" #include "perf.h" #define MAX_NAME_LEN 100 @@ -1419,6 +1420,8 @@ int parse_events_add_numeric(struct parse_events_state *parse_state, { struct perf_event_attr attr; LIST_HEAD(config_terms); + bool hybrid; + int ret; memset(&attr, 0, sizeof(attr)); attr.type = type; @@ -1433,6 +1436,12 @@ int parse_events_add_numeric(struct parse_events_state *parse_state, return -ENOMEM; } + ret = parse_events__add_numeric_hybrid(parse_state, list, &attr, + get_config_name(head_config), + &config_terms, &hybrid); + if (hybrid) + return ret; + return add_event(list, &parse_state->idx, &attr, get_config_name(head_config), &config_terms); } @@ -3194,3 +3203,12 @@ char *parse_events_formats_error_string(char *additional_terms) fail: return NULL; } + +struct evsel *parse_events__add_event_hybrid(struct list_head *list, int *idx, + struct perf_event_attr *attr, + char *name, struct perf_pmu *pmu, + struct list_head *config_terms) +{ + return __add_event(list, idx, attr, true, name, pmu, + config_terms, false, NULL); +} diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h index e80c9b74f2f2..c4f2f96304ce 100644 --- a/tools/perf/util/parse-events.h +++ b/tools/perf/util/parse-events.h @@ -263,4 +263,9 @@ static inline bool is_sdt_event(char *str __maybe_unused) int perf_pmu__test_parse_init(void); +struct evsel *parse_events__add_event_hybrid(struct list_head *list, int *idx, + struct perf_event_attr *attr, + char *name, struct perf_pmu *pmu, + struct list_head *config_terms); + #endif /* __PERF_PARSE_EVENTS_H */ -- cgit v1.2.3 From 30def61f64bac5f5cfe2a3cf96bae5b889403b4c Mon Sep 17 00:00:00 2001 From: Jin Yao Date: Tue, 27 Apr 2021 15:01:22 +0800 Subject: perf parse-events: Create two hybrid cache events For cache events, they have pre-defined configs. The kernel needs to know where the cache event comes from (e.g. from cpu_core pmu or from cpu_atom pmu). But the perf type PERF_TYPE_HW_CACHE can't carry pmu information. Now the type PERF_TYPE_HW_CACHE is extended to be PMU aware type. The PMU type ID is stored at attr.config[63:32]. When enabling a hybrid cache event without specified pmu, such as, 'perf stat -e LLC-loads -a', two events are created automatically. One is for atom, the other is for core. # perf stat -e LLC-loads -a -vv -- sleep 1 Control descriptor is not initialized ------------------------------------------------------------ perf_event_attr: type 3 size 120 config 0x400000002 sample_type IDENTIFIER read_format TOTAL_TIME_ENABLED|TOTAL_TIME_RUNNING disabled 1 inherit 1 exclude_guest 1 ------------------------------------------------------------ sys_perf_event_open: pid -1 cpu 0 group_fd -1 flags 0x8 = 3 ------------------------------------------------------------ ... ------------------------------------------------------------ perf_event_attr: type 3 size 120 config 0x400000002 sample_type IDENTIFIER read_format TOTAL_TIME_ENABLED|TOTAL_TIME_RUNNING disabled 1 inherit 1 exclude_guest 1 ------------------------------------------------------------ sys_perf_event_open: pid -1 cpu 15 group_fd -1 flags 0x8 = 19 ------------------------------------------------------------ perf_event_attr: type 3 size 120 config 0x800000002 sample_type IDENTIFIER read_format TOTAL_TIME_ENABLED|TOTAL_TIME_RUNNING disabled 1 inherit 1 exclude_guest 1 ------------------------------------------------------------ sys_perf_event_open: pid -1 cpu 16 group_fd -1 flags 0x8 = 20 ------------------------------------------------------------ ... ------------------------------------------------------------ perf_event_attr: type 3 size 120 config 0x800000002 sample_type IDENTIFIER read_format TOTAL_TIME_ENABLED|TOTAL_TIME_RUNNING disabled 1 inherit 1 exclude_guest 1 ------------------------------------------------------------ sys_perf_event_open: pid -1 cpu 23 group_fd -1 flags 0x8 = 27 LLC-loads: 0: 1507 1001800280 1001800280 LLC-loads: 1: 666 1001812250 1001812250 LLC-loads: 2: 3353 1001813453 1001813453 LLC-loads: 3: 514 1001848795 1001848795 LLC-loads: 4: 627 1001952832 1001952832 LLC-loads: 5: 4399 1001451154 1001451154 LLC-loads: 6: 1240 1001481052 1001481052 LLC-loads: 7: 478 1001520348 1001520348 LLC-loads: 8: 691 1001551236 1001551236 LLC-loads: 9: 310 1001578945 1001578945 LLC-loads: 10: 1018 1001594354 1001594354 LLC-loads: 11: 3656 1001622355 1001622355 LLC-loads: 12: 882 1001661416 1001661416 LLC-loads: 13: 506 1001693963 1001693963 LLC-loads: 14: 3547 1001721013 1001721013 LLC-loads: 15: 1399 1001734818 1001734818 LLC-loads: 0: 1314 1001793826 1001793826 LLC-loads: 1: 2857 1001752764 1001752764 LLC-loads: 2: 646 1001830694 1001830694 LLC-loads: 3: 1612 1001864861 1001864861 LLC-loads: 4: 2244 1001912381 1001912381 LLC-loads: 5: 1255 1001943889 1001943889 LLC-loads: 6: 4624 1002021109 1002021109 LLC-loads: 7: 2703 1001959302 1001959302 LLC-loads: 24793 16026838264 16026838264 LLC-loads: 17255 8015078826 8015078826 Performance counter stats for 'system wide': 24,793 cpu_core/LLC-loads/ 17,255 cpu_atom/LLC-loads/ 1.001970988 seconds time elapsed 0x4 in 0x400000002 indicates the cpu_core pmu. 0x8 in 0x800000002 indicates the cpu_atom pmu. Signed-off-by: Jin Yao Reviewed-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: Ingo Molnar Cc: Kan Liang Cc: Peter Zijlstra Link: https://lore.kernel.org/r/20210427070139.25256-10-yao.jin@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/parse-events-hybrid.c | 23 +++++++++++++++++++++++ tools/perf/util/parse-events-hybrid.h | 5 +++++ tools/perf/util/parse-events.c | 10 +++++++++- 3 files changed, 37 insertions(+), 1 deletion(-) diff --git a/tools/perf/util/parse-events-hybrid.c b/tools/perf/util/parse-events-hybrid.c index 8fd7f19a9865..7a7e065d2b5f 100644 --- a/tools/perf/util/parse-events-hybrid.c +++ b/tools/perf/util/parse-events-hybrid.c @@ -98,3 +98,26 @@ int parse_events__add_numeric_hybrid(struct parse_events_state *parse_state, return -1; } + +int parse_events__add_cache_hybrid(struct list_head *list, int *idx, + struct perf_event_attr *attr, char *name, + struct list_head *config_terms, + bool *hybrid) +{ + struct perf_pmu *pmu; + int ret; + + *hybrid = false; + if (!perf_pmu__has_hybrid()) + return 0; + + *hybrid = true; + perf_pmu__for_each_hybrid_pmu(pmu) { + ret = create_event_hybrid(PERF_TYPE_HW_CACHE, idx, list, + attr, name, config_terms, pmu); + if (ret) + return ret; + } + + return 0; +} diff --git a/tools/perf/util/parse-events-hybrid.h b/tools/perf/util/parse-events-hybrid.h index d81a76978480..9ad33cd0cef4 100644 --- a/tools/perf/util/parse-events-hybrid.h +++ b/tools/perf/util/parse-events-hybrid.h @@ -14,4 +14,9 @@ int parse_events__add_numeric_hybrid(struct parse_events_state *parse_state, char *name, struct list_head *config_terms, bool *hybrid); +int parse_events__add_cache_hybrid(struct list_head *list, int *idx, + struct perf_event_attr *attr, char *name, + struct list_head *config_terms, + bool *hybrid); + #endif /* __PERF_PARSE_EVENTS_HYBRID_H */ diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 9109f8b03fe7..c228bf5c7d88 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -460,7 +460,8 @@ int parse_events_add_cache(struct list_head *list, int *idx, char name[MAX_NAME_LEN], *config_name; int cache_type = -1, cache_op = -1, cache_result = -1; char *op_result[2] = { op_result1, op_result2 }; - int i, n; + int i, n, ret; + bool hybrid; /* * No fallback - if we cannot get a clear cache type @@ -520,6 +521,13 @@ int parse_events_add_cache(struct list_head *list, int *idx, if (get_config_terms(head_config, &config_terms)) return -ENOMEM; } + + ret = parse_events__add_cache_hybrid(list, idx, &attr, + config_name ? : name, &config_terms, + &hybrid); + if (hybrid) + return ret; + return add_event(list, idx, &attr, config_name ? : name, &config_terms); } -- cgit v1.2.3 From 94da591b1c7913880957c3477f6abff563783b33 Mon Sep 17 00:00:00 2001 From: Jin Yao Date: Tue, 27 Apr 2021 15:01:23 +0800 Subject: perf parse-events: Create two hybrid raw events On hybrid platform, same raw event is possible to be available on both cpu_core pmu and cpu_atom pmu. It's supported to create two raw events for one event encoding. For raw events, the attr.type is PMU type. # perf stat -e r3c -a -vv -- sleep 1 Control descriptor is not initialized ------------------------------------------------------------ perf_event_attr: type 4 size 120 config 0x3c sample_type IDENTIFIER read_format TOTAL_TIME_ENABLED|TOTAL_TIME_RUNNING disabled 1 inherit 1 exclude_guest 1 ------------------------------------------------------------ sys_perf_event_open: pid -1 cpu 0 group_fd -1 flags 0x8 = 3 ------------------------------------------------------------ ... ------------------------------------------------------------ perf_event_attr: type 4 size 120 config 0x3c sample_type IDENTIFIER read_format TOTAL_TIME_ENABLED|TOTAL_TIME_RUNNING disabled 1 inherit 1 exclude_guest 1 ------------------------------------------------------------ sys_perf_event_open: pid -1 cpu 15 group_fd -1 flags 0x8 = 19 ------------------------------------------------------------ perf_event_attr: type 8 size 120 config 0x3c sample_type IDENTIFIER read_format TOTAL_TIME_ENABLED|TOTAL_TIME_RUNNING disabled 1 inherit 1 exclude_guest 1 ------------------------------------------------------------ sys_perf_event_open: pid -1 cpu 16 group_fd -1 flags 0x8 = 20 ------------------------------------------------------------ ... ------------------------------------------------------------ perf_event_attr: type 8 size 120 config 0x3c sample_type IDENTIFIER read_format TOTAL_TIME_ENABLED|TOTAL_TIME_RUNNING disabled 1 inherit 1 exclude_guest 1 ------------------------------------------------------------ sys_perf_event_open: pid -1 cpu 23 group_fd -1 flags 0x8 = 27 r3c: 0: 434449 1001412521 1001412521 r3c: 1: 173162 1001482031 1001482031 r3c: 2: 231710 1001524974 1001524974 r3c: 3: 110012 1001563523 1001563523 r3c: 4: 191517 1001593221 1001593221 r3c: 5: 956458 1001628147 1001628147 r3c: 6: 416969 1001715626 1001715626 r3c: 7: 1047527 1001596650 1001596650 r3c: 8: 103877 1001633520 1001633520 r3c: 9: 70571 1001637898 1001637898 r3c: 10: 550284 1001714398 1001714398 r3c: 11: 1257274 1001738349 1001738349 r3c: 12: 107797 1001801432 1001801432 r3c: 13: 67471 1001836281 1001836281 r3c: 14: 286782 1001923161 1001923161 r3c: 15: 815509 1001952550 1001952550 r3c: 0: 95994 1002071117 1002071117 r3c: 1: 105570 1002142438 1002142438 r3c: 2: 115921 1002189147 1002189147 r3c: 3: 72747 1002238133 1002238133 r3c: 4: 103519 1002276753 1002276753 r3c: 5: 121382 1002315131 1002315131 r3c: 6: 80298 1002248050 1002248050 r3c: 7: 466790 1002278221 1002278221 r3c: 6821369 16026754282 16026754282 r3c: 1162221 8017758990 8017758990 Performance counter stats for 'system wide': 6,821,369 cpu_core/r3c/ 1,162,221 cpu_atom/r3c/ 1.002289965 seconds time elapsed Signed-off-by: Jin Yao Reviewed-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: Ingo Molnar Cc: Kan Liang Cc: Peter Zijlstra Link: https://lore.kernel.org/r/20210427070139.25256-11-yao.jin@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/parse-events-hybrid.c | 38 ++++++++++++++++++++++++++++++++++- 1 file changed, 37 insertions(+), 1 deletion(-) diff --git a/tools/perf/util/parse-events-hybrid.c b/tools/perf/util/parse-events-hybrid.c index 7a7e065d2b5f..e27b747080e1 100644 --- a/tools/perf/util/parse-events-hybrid.c +++ b/tools/perf/util/parse-events-hybrid.c @@ -77,6 +77,41 @@ static int add_hw_hybrid(struct parse_events_state *parse_state, return 0; } +static int create_raw_event_hybrid(int *idx, struct list_head *list, + struct perf_event_attr *attr, char *name, + struct list_head *config_terms, + struct perf_pmu *pmu) +{ + struct evsel *evsel; + + attr->type = pmu->type; + evsel = parse_events__add_event_hybrid(list, idx, attr, name, + pmu, config_terms); + if (evsel) + evsel->pmu_name = strdup(pmu->name); + else + return -ENOMEM; + + return 0; +} + +static int add_raw_hybrid(struct parse_events_state *parse_state, + struct list_head *list, struct perf_event_attr *attr, + char *name, struct list_head *config_terms) +{ + struct perf_pmu *pmu; + int ret; + + perf_pmu__for_each_hybrid_pmu(pmu) { + ret = create_raw_event_hybrid(&parse_state->idx, list, attr, + name, config_terms, pmu); + if (ret) + return ret; + } + + return 0; +} + int parse_events__add_numeric_hybrid(struct parse_events_state *parse_state, struct list_head *list, struct perf_event_attr *attr, @@ -96,7 +131,8 @@ int parse_events__add_numeric_hybrid(struct parse_events_state *parse_state, config_terms); } - return -1; + return add_raw_hybrid(parse_state, list, attr, name, + config_terms); } int parse_events__add_cache_hybrid(struct list_head *list, int *idx, -- cgit v1.2.3 From c93afadc924dbec51a38c4f6f0d07a8adfddd339 Mon Sep 17 00:00:00 2001 From: Jin Yao Date: Tue, 27 Apr 2021 15:01:24 +0800 Subject: perf parse-events: Compare with hybrid pmu name On hybrid platform, user may want to enable event only on one pmu. Following syntax will be supported: cpu_core// cpu_atom// For hardware event, hardware cache event and raw event, two events are created by default. We pass the specified pmu name in parse_state and it would be checked before event creation. So next only the event with the specified pmu would be created. Signed-off-by: Jin Yao Reviewed-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: Ingo Molnar Cc: Kan Liang Cc: Peter Zijlstra Link: https://lore.kernel.org/r/20210427070139.25256-12-yao.jin@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/parse-events-hybrid.c | 21 ++++++++++++++++++++- tools/perf/util/parse-events-hybrid.h | 3 ++- tools/perf/util/parse-events.c | 5 +++-- tools/perf/util/parse-events.h | 4 +++- tools/perf/util/parse-events.y | 9 ++++++--- 5 files changed, 34 insertions(+), 8 deletions(-) diff --git a/tools/perf/util/parse-events-hybrid.c b/tools/perf/util/parse-events-hybrid.c index e27b747080e1..10160ab126f9 100644 --- a/tools/perf/util/parse-events-hybrid.c +++ b/tools/perf/util/parse-events-hybrid.c @@ -59,6 +59,15 @@ static int create_event_hybrid(__u32 config_type, int *idx, return 0; } +static int pmu_cmp(struct parse_events_state *parse_state, + struct perf_pmu *pmu) +{ + if (!parse_state->hybrid_pmu_name) + return 0; + + return strcmp(parse_state->hybrid_pmu_name, pmu->name); +} + static int add_hw_hybrid(struct parse_events_state *parse_state, struct list_head *list, struct perf_event_attr *attr, char *name, struct list_head *config_terms) @@ -67,6 +76,9 @@ static int add_hw_hybrid(struct parse_events_state *parse_state, int ret; perf_pmu__for_each_hybrid_pmu(pmu) { + if (pmu_cmp(parse_state, pmu)) + continue; + ret = create_event_hybrid(PERF_TYPE_HARDWARE, &parse_state->idx, list, attr, name, config_terms, pmu); @@ -103,6 +115,9 @@ static int add_raw_hybrid(struct parse_events_state *parse_state, int ret; perf_pmu__for_each_hybrid_pmu(pmu) { + if (pmu_cmp(parse_state, pmu)) + continue; + ret = create_raw_event_hybrid(&parse_state->idx, list, attr, name, config_terms, pmu); if (ret) @@ -138,7 +153,8 @@ int parse_events__add_numeric_hybrid(struct parse_events_state *parse_state, int parse_events__add_cache_hybrid(struct list_head *list, int *idx, struct perf_event_attr *attr, char *name, struct list_head *config_terms, - bool *hybrid) + bool *hybrid, + struct parse_events_state *parse_state) { struct perf_pmu *pmu; int ret; @@ -149,6 +165,9 @@ int parse_events__add_cache_hybrid(struct list_head *list, int *idx, *hybrid = true; perf_pmu__for_each_hybrid_pmu(pmu) { + if (pmu_cmp(parse_state, pmu)) + continue; + ret = create_event_hybrid(PERF_TYPE_HW_CACHE, idx, list, attr, name, config_terms, pmu); if (ret) diff --git a/tools/perf/util/parse-events-hybrid.h b/tools/perf/util/parse-events-hybrid.h index 9ad33cd0cef4..f33bd67aa851 100644 --- a/tools/perf/util/parse-events-hybrid.h +++ b/tools/perf/util/parse-events-hybrid.h @@ -17,6 +17,7 @@ int parse_events__add_numeric_hybrid(struct parse_events_state *parse_state, int parse_events__add_cache_hybrid(struct list_head *list, int *idx, struct perf_event_attr *attr, char *name, struct list_head *config_terms, - bool *hybrid); + bool *hybrid, + struct parse_events_state *parse_state); #endif /* __PERF_PARSE_EVENTS_HYBRID_H */ diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index c228bf5c7d88..6236513e6369 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -453,7 +453,8 @@ static int config_attr(struct perf_event_attr *attr, int parse_events_add_cache(struct list_head *list, int *idx, char *type, char *op_result1, char *op_result2, struct parse_events_error *err, - struct list_head *head_config) + struct list_head *head_config, + struct parse_events_state *parse_state) { struct perf_event_attr attr; LIST_HEAD(config_terms); @@ -524,7 +525,7 @@ int parse_events_add_cache(struct list_head *list, int *idx, ret = parse_events__add_cache_hybrid(list, idx, &attr, config_name ? : name, &config_terms, - &hybrid); + &hybrid, parse_state); if (hybrid) return ret; diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h index c4f2f96304ce..bf6e41aa9b6a 100644 --- a/tools/perf/util/parse-events.h +++ b/tools/perf/util/parse-events.h @@ -138,6 +138,7 @@ struct parse_events_state { struct list_head *terms; int stoken; struct perf_pmu *fake_pmu; + char *hybrid_pmu_name; }; void parse_events__handle_error(struct parse_events_error *err, int idx, @@ -188,7 +189,8 @@ int parse_events_add_tool(struct parse_events_state *parse_state, int parse_events_add_cache(struct list_head *list, int *idx, char *type, char *op_result1, char *op_result2, struct parse_events_error *error, - struct list_head *head_config); + struct list_head *head_config, + struct parse_events_state *parse_state); int parse_events_add_breakpoint(struct list_head *list, int *idx, u64 addr, char *type, u64 len); int parse_events_add_pmu(struct parse_events_state *parse_state, diff --git a/tools/perf/util/parse-events.y b/tools/perf/util/parse-events.y index d57ac86ce7ca..aba12a4d488e 100644 --- a/tools/perf/util/parse-events.y +++ b/tools/perf/util/parse-events.y @@ -454,7 +454,8 @@ PE_NAME_CACHE_TYPE '-' PE_NAME_CACHE_OP_RESULT '-' PE_NAME_CACHE_OP_RESULT opt_e list = alloc_list(); ABORT_ON(!list); - err = parse_events_add_cache(list, &parse_state->idx, $1, $3, $5, error, $6); + err = parse_events_add_cache(list, &parse_state->idx, $1, $3, $5, error, $6, + parse_state); parse_events_terms__delete($6); free($1); free($3); @@ -475,7 +476,8 @@ PE_NAME_CACHE_TYPE '-' PE_NAME_CACHE_OP_RESULT opt_event_config list = alloc_list(); ABORT_ON(!list); - err = parse_events_add_cache(list, &parse_state->idx, $1, $3, NULL, error, $4); + err = parse_events_add_cache(list, &parse_state->idx, $1, $3, NULL, error, $4, + parse_state); parse_events_terms__delete($4); free($1); free($3); @@ -495,7 +497,8 @@ PE_NAME_CACHE_TYPE opt_event_config list = alloc_list(); ABORT_ON(!list); - err = parse_events_add_cache(list, &parse_state->idx, $1, NULL, NULL, error, $2); + err = parse_events_add_cache(list, &parse_state->idx, $1, NULL, NULL, error, $2, + parse_state); parse_events_terms__delete($2); free($1); if (err) { -- cgit v1.2.3 From 5e4edd1f73b5d59905aeb0fe43ab74301c39a5c1 Mon Sep 17 00:00:00 2001 From: Jin Yao Date: Tue, 27 Apr 2021 15:01:25 +0800 Subject: perf parse-events: Support event inside hybrid pmu On hybrid platform, user may want to enable events on one pmu. Following syntax are supported: cpu_core// cpu_atom// But the syntax doesn't work for cache event. Before: # perf stat -e cpu_core/LLC-loads/ -a -- sleep 1 event syntax error: 'cpu_core/LLC-loads/' \___ unknown term 'LLC-loads' for pmu 'cpu_core' Cache events are a bit complex. We can't create aliases for them. We use another solution. For example, if we use "cpu_core/LLC-loads/", in parse_events_add_pmu(), term->config is "LLC-loads". Then we create a new parser to scan "LLC-loads". The parse_events_add_cache() would be called during parsing. The parse_state->hybrid_pmu_name is used to identify the pmu where the event should be enabled on. After: # perf stat -e cpu_core/LLC-loads/ -a -- sleep 1 Performance counter stats for 'system wide': 24,593 cpu_core/LLC-loads/ 1.003911601 seconds time elapsed Signed-off-by: Jin Yao Reviewed-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: Ingo Molnar Cc: Kan Liang Cc: Peter Zijlstra Link: https://lore.kernel.org/r/20210427070139.25256-13-yao.jin@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/parse-events.c | 63 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 63 insertions(+) diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 6236513e6369..4dad14265b81 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -38,6 +38,7 @@ #include "util/event.h" #include "util/pfm.h" #include "util/parse-events-hybrid.h" +#include "util/pmu-hybrid.h" #include "perf.h" #define MAX_NAME_LEN 100 @@ -48,6 +49,9 @@ extern int parse_events_debug; int parse_events_parse(void *parse_state, void *scanner); static int get_config_terms(struct list_head *head_config, struct list_head *head_terms __maybe_unused); +static int parse_events__with_hybrid_pmu(struct parse_events_state *parse_state, + const char *str, char *pmu_name, + struct list_head *list); static struct perf_pmu_event_symbol *perf_pmu_events_list; /* @@ -1474,6 +1478,33 @@ static bool config_term_percore(struct list_head *config_terms) return false; } +static int parse_events__inside_hybrid_pmu(struct parse_events_state *parse_state, + struct list_head *list, char *name, + struct list_head *head_config) +{ + struct parse_events_term *term; + int ret = -1; + + if (parse_state->fake_pmu || !head_config || list_empty(head_config) || + !perf_pmu__is_hybrid(name)) { + return -1; + } + + /* + * More than one term in list. + */ + if (head_config->next && head_config->next->next != head_config) + return -1; + + term = list_first_entry(head_config, struct parse_events_term, list); + if (term && term->config && strcmp(term->config, "event")) { + ret = parse_events__with_hybrid_pmu(parse_state, term->config, + name, list); + } + + return ret; +} + int parse_events_add_pmu(struct parse_events_state *parse_state, struct list_head *list, char *name, struct list_head *head_config, @@ -1567,6 +1598,11 @@ int parse_events_add_pmu(struct parse_events_state *parse_state, if (pmu->default_config && get_config_chgs(pmu, head_config, &config_terms)) return -ENOMEM; + if (!parse_events__inside_hybrid_pmu(parse_state, list, name, + head_config)) { + return 0; + } + if (!parse_state->fake_pmu && perf_pmu__config(pmu, &attr, head_config, parse_state->error)) { struct evsel_config_term *pos, *tmp; @@ -2189,6 +2225,33 @@ int parse_events_terms(struct list_head *terms, const char *str) return ret; } +static int parse_events__with_hybrid_pmu(struct parse_events_state *parse_state, + const char *str, char *pmu_name, + struct list_head *list) +{ + struct parse_events_state ps = { + .list = LIST_HEAD_INIT(ps.list), + .stoken = PE_START_EVENTS, + .hybrid_pmu_name = pmu_name, + .idx = parse_state->idx, + }; + int ret; + + ret = parse_events__scanner(str, &ps); + perf_pmu__parse_cleanup(); + + if (!ret) { + if (!list_empty(&ps.list)) { + list_splice(&ps.list, list); + parse_state->idx = ps.idx; + return 0; + } else + return -1; + } + + return ret; +} + int __parse_events(struct evlist *evlist, const char *str, struct parse_events_error *err, struct perf_pmu *fake_pmu) { -- cgit v1.2.3 From b53a0755d5c2d19b13db897d6faf4969e03e45ae Mon Sep 17 00:00:00 2001 From: Jin Yao Date: Tue, 27 Apr 2021 15:01:26 +0800 Subject: perf record: Create two hybrid 'cycles' events by default When evlist is empty, for example no '-e' specified in perf record, one default 'cycles' event is added to evlist. While on hybrid platform, it needs to create two default 'cycles' events. One is for cpu_core, the other is for cpu_atom. This patch actually calls evsel__new_cycles() two times to create two 'cycles' events. # ./perf record -vv -a -- sleep 1 ... ------------------------------------------------------------ perf_event_attr: size 120 config 0x400000000 { sample_period, sample_freq } 4000 sample_type IP|TID|TIME|ID|CPU|PERIOD read_format ID disabled 1 inherit 1 freq 1 precise_ip 3 sample_id_all 1 exclude_guest 1 ------------------------------------------------------------ sys_perf_event_open: pid -1 cpu 0 group_fd -1 flags 0x8 = 5 sys_perf_event_open: pid -1 cpu 1 group_fd -1 flags 0x8 = 6 sys_perf_event_open: pid -1 cpu 2 group_fd -1 flags 0x8 = 7 sys_perf_event_open: pid -1 cpu 3 group_fd -1 flags 0x8 = 9 sys_perf_event_open: pid -1 cpu 4 group_fd -1 flags 0x8 = 10 sys_perf_event_open: pid -1 cpu 5 group_fd -1 flags 0x8 = 11 sys_perf_event_open: pid -1 cpu 6 group_fd -1 flags 0x8 = 12 sys_perf_event_open: pid -1 cpu 7 group_fd -1 flags 0x8 = 13 sys_perf_event_open: pid -1 cpu 8 group_fd -1 flags 0x8 = 14 sys_perf_event_open: pid -1 cpu 9 group_fd -1 flags 0x8 = 15 sys_perf_event_open: pid -1 cpu 10 group_fd -1 flags 0x8 = 16 sys_perf_event_open: pid -1 cpu 11 group_fd -1 flags 0x8 = 17 sys_perf_event_open: pid -1 cpu 12 group_fd -1 flags 0x8 = 18 sys_perf_event_open: pid -1 cpu 13 group_fd -1 flags 0x8 = 19 sys_perf_event_open: pid -1 cpu 14 group_fd -1 flags 0x8 = 20 sys_perf_event_open: pid -1 cpu 15 group_fd -1 flags 0x8 = 21 ------------------------------------------------------------ perf_event_attr: size 120 config 0x800000000 { sample_period, sample_freq } 4000 sample_type IP|TID|TIME|ID|CPU|PERIOD read_format ID disabled 1 inherit 1 freq 1 precise_ip 3 sample_id_all 1 exclude_guest 1 ------------------------------------------------------------ sys_perf_event_open: pid -1 cpu 16 group_fd -1 flags 0x8 = 22 sys_perf_event_open: pid -1 cpu 17 group_fd -1 flags 0x8 = 23 sys_perf_event_open: pid -1 cpu 18 group_fd -1 flags 0x8 = 24 sys_perf_event_open: pid -1 cpu 19 group_fd -1 flags 0x8 = 25 sys_perf_event_open: pid -1 cpu 20 group_fd -1 flags 0x8 = 26 sys_perf_event_open: pid -1 cpu 21 group_fd -1 flags 0x8 = 27 sys_perf_event_open: pid -1 cpu 22 group_fd -1 flags 0x8 = 28 sys_perf_event_open: pid -1 cpu 23 group_fd -1 flags 0x8 = 29 ------------------------------------------------------------ We have to create evlist-hybrid.c otherwise due to the symbol dependency the perf test python would be failed. Signed-off-by: Jin Yao Reviewed-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: Ingo Molnar Cc: Kan Liang Cc: Peter Zijlstra Link: https://lore.kernel.org/r/20210427070139.25256-14-yao.jin@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-record.c | 19 +++++++++++++++---- tools/perf/util/Build | 1 + tools/perf/util/evlist-hybrid.c | 41 +++++++++++++++++++++++++++++++++++++++++ tools/perf/util/evlist-hybrid.h | 12 ++++++++++++ tools/perf/util/evlist.c | 5 ++++- tools/perf/util/evsel.c | 6 +++--- tools/perf/util/evsel.h | 2 +- 7 files changed, 77 insertions(+), 9 deletions(-) create mode 100644 tools/perf/util/evlist-hybrid.c create mode 100644 tools/perf/util/evlist-hybrid.h diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 5fb9665a2ec2..6af46c6a4fd8 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -47,6 +47,8 @@ #include "util/util.h" #include "util/pfm.h" #include "util/clockid.h" +#include "util/pmu-hybrid.h" +#include "util/evlist-hybrid.h" #include "asm/bug.h" #include "perf.h" @@ -2790,10 +2792,19 @@ int cmd_record(int argc, const char **argv) if (record.opts.overwrite) record.opts.tail_synthesize = true; - if (rec->evlist->core.nr_entries == 0 && - __evlist__add_default(rec->evlist, !record.opts.no_samples) < 0) { - pr_err("Not enough memory for event selector list\n"); - goto out; + if (rec->evlist->core.nr_entries == 0) { + if (perf_pmu__has_hybrid()) { + err = evlist__add_default_hybrid(rec->evlist, + !record.opts.no_samples); + } else { + err = __evlist__add_default(rec->evlist, + !record.opts.no_samples); + } + + if (err < 0) { + pr_err("Not enough memory for event selector list\n"); + goto out; + } } if (rec->opts.target.tid && !rec->opts.no_inherit_set) diff --git a/tools/perf/util/Build b/tools/perf/util/Build index 34fcbf3c3bce..8c0d9f368ebc 100644 --- a/tools/perf/util/Build +++ b/tools/perf/util/Build @@ -10,6 +10,7 @@ perf-y += db-export.o perf-y += env.o perf-y += event.o perf-y += evlist.o +perf-y += evlist-hybrid.o perf-y += sideband_evlist.o perf-y += evsel.o perf-y += evsel_fprintf.o diff --git a/tools/perf/util/evlist-hybrid.c b/tools/perf/util/evlist-hybrid.c new file mode 100644 index 000000000000..e11998526f2e --- /dev/null +++ b/tools/perf/util/evlist-hybrid.c @@ -0,0 +1,41 @@ +// SPDX-License-Identifier: GPL-2.0-only +#include +#include +#include "cpumap.h" +#include "evlist.h" +#include "evsel.h" +#include "../perf.h" +#include "util/pmu-hybrid.h" +#include "util/evlist-hybrid.h" +#include +#include +#include +#include +#include +#include +#include + +int evlist__add_default_hybrid(struct evlist *evlist, bool precise) +{ + struct evsel *evsel; + struct perf_pmu *pmu; + __u64 config; + struct perf_cpu_map *cpus; + + perf_pmu__for_each_hybrid_pmu(pmu) { + config = PERF_COUNT_HW_CPU_CYCLES | + ((__u64)pmu->type << PERF_PMU_TYPE_SHIFT); + evsel = evsel__new_cycles(precise, PERF_TYPE_HARDWARE, + config); + if (!evsel) + return -ENOMEM; + + cpus = perf_cpu_map__get(pmu->cpus); + evsel->core.cpus = cpus; + evsel->core.own_cpus = perf_cpu_map__get(cpus); + evsel->pmu_name = strdup(pmu->name); + evlist__add(evlist, evsel); + } + + return 0; +} diff --git a/tools/perf/util/evlist-hybrid.h b/tools/perf/util/evlist-hybrid.h new file mode 100644 index 000000000000..e25861649d8f --- /dev/null +++ b/tools/perf/util/evlist-hybrid.h @@ -0,0 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __PERF_EVLIST_HYBRID_H +#define __PERF_EVLIST_HYBRID_H + +#include +#include +#include "evlist.h" +#include + +int evlist__add_default_hybrid(struct evlist *evlist, bool precise); + +#endif /* __PERF_EVLIST_HYBRID_H */ diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index e71041c89010..6e5c41528c7d 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -26,6 +26,7 @@ #include "util/string2.h" #include "util/perf_api_probe.h" #include "util/evsel_fprintf.h" +#include "util/evlist-hybrid.h" #include #include #include @@ -248,8 +249,10 @@ void evlist__set_leader(struct evlist *evlist) int __evlist__add_default(struct evlist *evlist, bool precise) { - struct evsel *evsel = evsel__new_cycles(precise); + struct evsel *evsel; + evsel = evsel__new_cycles(precise, PERF_TYPE_HARDWARE, + PERF_COUNT_HW_CPU_CYCLES); if (evsel == NULL) return -ENOMEM; diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 080ddcfefbcd..0fc9555a2ac4 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -295,11 +295,11 @@ static bool perf_event_can_profile_kernel(void) return perf_event_paranoid_check(1); } -struct evsel *evsel__new_cycles(bool precise) +struct evsel *evsel__new_cycles(bool precise, __u32 type, __u64 config) { struct perf_event_attr attr = { - .type = PERF_TYPE_HARDWARE, - .config = PERF_COUNT_HW_CPU_CYCLES, + .type = type, + .config = config, .exclude_kernel = !perf_event_can_profile_kernel(), }; struct evsel *evsel; diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index ad74c83bd35b..a9cc9563ac59 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -221,7 +221,7 @@ static inline struct evsel *evsel__newtp(const char *sys, const char *name) return evsel__newtp_idx(sys, name, 0); } -struct evsel *evsel__new_cycles(bool precise); +struct evsel *evsel__new_cycles(bool precise, __u32 type, __u64 config); struct tep_event *event_format__new(const char *sys, const char *name); -- cgit v1.2.3 From ac2dc29edd21f9ec011863336ab1c7c9fe77a1d3 Mon Sep 17 00:00:00 2001 From: Jin Yao Date: Tue, 27 Apr 2021 15:01:27 +0800 Subject: perf stat: Add default hybrid events Previously if '-e' is not specified in perf stat, some software events and hardware events are added to evlist by default. Before: # perf stat -a -- sleep 1 Performance counter stats for 'system wide': 24,044.40 msec cpu-clock # 23.946 CPUs utilized 99 context-switches # 4.117 /sec 24 cpu-migrations # 0.998 /sec 3 page-faults # 0.125 /sec 7,000,244 cycles # 0.000 GHz 2,955,024 instructions # 0.42 insn per cycle 608,941 branches # 25.326 K/sec 31,991 branch-misses # 5.25% of all branches 1.004106859 seconds time elapsed Among the events, cycles, instructions, branches and branch-misses are hardware events. One hybrid platform, two hardware events are created for one hardware event. cpu_core/cycles/, cpu_atom/cycles/, cpu_core/instructions/, cpu_atom/instructions/, cpu_core/branches/, cpu_atom/branches/, cpu_core/branch-misses/, cpu_atom/branch-misses/ These events would be added to evlist on hybrid platform. Since parse_events() has been supported to create two hardware events for one event on hybrid platform, so we just use parse_events(evlist, "cycles,instructions,branches,branch-misses") to create the default events and add them to evlist. After: # perf stat -a -- sleep 1 Performance counter stats for 'system wide': 24,043.99 msec cpu-clock # 23.991 CPUs utilized 139 context-switches # 5.781 /sec 25 cpu-migrations # 1.040 /sec 6 page-faults # 0.250 /sec 10,381,751 cpu_core/cycles/ # 431.782 K/sec 1,264,216 cpu_atom/cycles/ # 52.579 K/sec 3,406,958 cpu_core/instructions/ # 141.697 K/sec 414,588 cpu_atom/instructions/ # 17.243 K/sec 705,149 cpu_core/branches/ # 29.327 K/sec 82,358 cpu_atom/branches/ # 3.425 K/sec 40,821 cpu_core/branch-misses/ # 1.698 K/sec 9,086 cpu_atom/branch-misses/ # 377.891 /sec 1.002228863 seconds time elapsed We can see two events are created for one hardware event. One TODO is, the shadow stats looks a bit different, now it's just 'M/sec'. The perf_stat__update_shadow_stats and perf_stat__print_shadow_stats need to be improved in future if we want to get the original shadow stats. Signed-off-by: Jin Yao Reviewed-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: Ingo Molnar Cc: Kan Liang Cc: Peter Zijlstra Link: https://lore.kernel.org/r/20210427070139.25256-15-yao.jin@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-stat.c | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 835e3696b9ce..42e60764ad8d 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -1639,6 +1639,12 @@ static int add_default_attributes(void) { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS }, { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_MISSES }, +}; + struct perf_event_attr default_sw_attrs[] = { + { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_TASK_CLOCK }, + { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CONTEXT_SWITCHES }, + { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CPU_MIGRATIONS }, + { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_PAGE_FAULTS }, }; /* @@ -1876,6 +1882,28 @@ setup_metrics: } if (!evsel_list->core.nr_entries) { + if (perf_pmu__has_hybrid()) { + const char *hybrid_str = "cycles,instructions,branches,branch-misses"; + + if (target__has_cpu(&target)) + default_sw_attrs[0].config = PERF_COUNT_SW_CPU_CLOCK; + + if (evlist__add_default_attrs(evsel_list, + default_sw_attrs) < 0) { + return -1; + } + + err = parse_events(evsel_list, hybrid_str, &errinfo); + if (err) { + fprintf(stderr, + "Cannot set up hybrid events %s: %d\n", + hybrid_str, err); + parse_events_print_error(&errinfo, hybrid_str); + return -1; + } + return err; + } + if (target__has_cpu(&target)) default_attrs0[0].config = PERF_COUNT_SW_CPU_CLOCK; -- cgit v1.2.3 From 92637cc7295510f4b3cb945cafcaec97c82e42f2 Mon Sep 17 00:00:00 2001 From: Jin Yao Date: Tue, 27 Apr 2021 15:01:28 +0800 Subject: perf stat: Filter out unmatched aggregation for hybrid event perf-stat has supported some aggregation modes, such as --per-core, --per-socket and etc. While for hybrid event, it may only available on part of cpus. So for --per-core, we need to filter out the unavailable cores, for --per-socket, filter out the unavailable sockets, and so on. Before: # perf stat --per-core -e cpu_core/cycles/ -a -- sleep 1 Performance counter stats for 'system wide': S0-D0-C0 2 479,530 cpu_core/cycles/ S0-D0-C4 2 175,007 cpu_core/cycles/ S0-D0-C8 2 166,240 cpu_core/cycles/ S0-D0-C12 2 704,673 cpu_core/cycles/ S0-D0-C16 2 865,835 cpu_core/cycles/ S0-D0-C20 2 2,958,461 cpu_core/cycles/ S0-D0-C24 2 163,988 cpu_core/cycles/ S0-D0-C28 2 164,729 cpu_core/cycles/ S0-D0-C32 0 cpu_core/cycles/ S0-D0-C33 0 cpu_core/cycles/ S0-D0-C34 0 cpu_core/cycles/ S0-D0-C35 0 cpu_core/cycles/ S0-D0-C36 0 cpu_core/cycles/ S0-D0-C37 0 cpu_core/cycles/ S0-D0-C38 0 cpu_core/cycles/ S0-D0-C39 0 cpu_core/cycles/ 1.003597211 seconds time elapsed After: # perf stat --per-core -e cpu_core/cycles/ -a -- sleep 1 Performance counter stats for 'system wide': S0-D0-C0 2 210,428 cpu_core/cycles/ S0-D0-C4 2 444,830 cpu_core/cycles/ S0-D0-C8 2 435,241 cpu_core/cycles/ S0-D0-C12 2 423,976 cpu_core/cycles/ S0-D0-C16 2 859,350 cpu_core/cycles/ S0-D0-C20 2 1,559,589 cpu_core/cycles/ S0-D0-C24 2 163,924 cpu_core/cycles/ S0-D0-C28 2 376,610 cpu_core/cycles/ 1.003621290 seconds time elapsed Signed-off-by: Jin Yao Co-developed-by: Jiri Olsa Reviewed-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: Ingo Molnar Cc: Kan Liang Cc: Peter Zijlstra Link: https://lore.kernel.org/r/20210427070139.25256-16-yao.jin@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/stat-display.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c index 0679129ad05e..a76fff5e7d83 100644 --- a/tools/perf/util/stat-display.c +++ b/tools/perf/util/stat-display.c @@ -667,6 +667,9 @@ static void print_counter_aggrdata(struct perf_stat_config *config, if (!collect_data(config, counter, aggr_cb, &ad)) return; + if (perf_pmu__has_hybrid() && ad.ena == 0) + return; + nr = ad.nr; ena = ad.ena; run = ad.run; -- cgit v1.2.3 From 660e533e87ff4e66434f90fca987b929d4eb0059 Mon Sep 17 00:00:00 2001 From: Jin Yao Date: Tue, 27 Apr 2021 15:01:29 +0800 Subject: perf stat: Warn group events from different hybrid PMU If a group has events which are from different hybrid PMUs, shows a warning: "WARNING: events in group from different hybrid PMUs!" This is to remind the user not to put the core event and atom event into one group. Next, just disable grouping. # perf stat -e "{cpu_core/cycles/,cpu_atom/cycles/}" -a -- sleep 1 WARNING: events in group from different hybrid PMUs! WARNING: grouped events cpus do not match, disabling group: anon group { cpu_core/cycles/, cpu_atom/cycles/ } Performance counter stats for 'system wide': 5,438,125 cpu_core/cycles/ 3,914,586 cpu_atom/cycles/ 1.004250966 seconds time elapsed Signed-off-by: Jin Yao Reviewed-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: Ingo Molnar Cc: Kan Liang Cc: Peter Zijlstra Link: https://lore.kernel.org/r/20210427070139.25256-17-yao.jin@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-stat.c | 4 ++++ tools/perf/util/evlist-hybrid.c | 47 ++++++++++++++++++++++++++++++++++++++ tools/perf/util/evlist-hybrid.h | 2 ++ tools/perf/util/evsel.c | 6 +++++ tools/perf/util/evsel.h | 1 + tools/perf/util/python-ext-sources | 2 ++ 6 files changed, 62 insertions(+) diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 42e60764ad8d..5a830ae09418 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -48,6 +48,7 @@ #include "util/pmu.h" #include "util/event.h" #include "util/evlist.h" +#include "util/evlist-hybrid.h" #include "util/evsel.h" #include "util/debug.h" #include "util/color.h" @@ -243,6 +244,9 @@ static void evlist__check_cpu_maps(struct evlist *evlist) struct evsel *evsel, *pos, *leader; char buf[1024]; + if (evlist__has_hybrid(evlist)) + evlist__warn_hybrid_group(evlist); + evlist__for_each_entry(evlist, evsel) { leader = evsel->leader; diff --git a/tools/perf/util/evlist-hybrid.c b/tools/perf/util/evlist-hybrid.c index e11998526f2e..db3f5fbdebe1 100644 --- a/tools/perf/util/evlist-hybrid.c +++ b/tools/perf/util/evlist-hybrid.c @@ -7,6 +7,7 @@ #include "../perf.h" #include "util/pmu-hybrid.h" #include "util/evlist-hybrid.h" +#include "debug.h" #include #include #include @@ -39,3 +40,49 @@ int evlist__add_default_hybrid(struct evlist *evlist, bool precise) return 0; } + +static bool group_hybrid_conflict(struct evsel *leader) +{ + struct evsel *pos, *prev = NULL; + + for_each_group_evsel(pos, leader) { + if (!evsel__is_hybrid(pos)) + continue; + + if (prev && strcmp(prev->pmu_name, pos->pmu_name)) + return true; + + prev = pos; + } + + return false; +} + +void evlist__warn_hybrid_group(struct evlist *evlist) +{ + struct evsel *evsel; + + evlist__for_each_entry(evlist, evsel) { + if (evsel__is_group_leader(evsel) && + evsel->core.nr_members > 1 && + group_hybrid_conflict(evsel)) { + pr_warning("WARNING: events in group from " + "different hybrid PMUs!\n"); + return; + } + } +} + +bool evlist__has_hybrid(struct evlist *evlist) +{ + struct evsel *evsel; + + evlist__for_each_entry(evlist, evsel) { + if (evsel->pmu_name && + perf_pmu__is_hybrid(evsel->pmu_name)) { + return true; + } + } + + return false; +} diff --git a/tools/perf/util/evlist-hybrid.h b/tools/perf/util/evlist-hybrid.h index e25861649d8f..19f74b4c340a 100644 --- a/tools/perf/util/evlist-hybrid.h +++ b/tools/perf/util/evlist-hybrid.h @@ -8,5 +8,7 @@ #include int evlist__add_default_hybrid(struct evlist *evlist, bool precise); +void evlist__warn_hybrid_group(struct evlist *evlist); +bool evlist__has_hybrid(struct evlist *evlist); #endif /* __PERF_EVLIST_HYBRID_H */ diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 0fc9555a2ac4..4a3cd1b5bb33 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -47,6 +47,7 @@ #include "memswap.h" #include "util.h" #include "hashmap.h" +#include "pmu-hybrid.h" #include "../perf-sys.h" #include "util/parse-branch-options.h" #include @@ -2819,3 +2820,8 @@ void evsel__zero_per_pkg(struct evsel *evsel) hashmap__clear(evsel->per_pkg_mask); } } + +bool evsel__is_hybrid(struct evsel *evsel) +{ + return evsel->pmu_name && perf_pmu__is_hybrid(evsel->pmu_name); +} diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index a9cc9563ac59..75cf5dbfe208 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -462,4 +462,5 @@ struct perf_env *evsel__env(struct evsel *evsel); int evsel__store_ids(struct evsel *evsel, struct evlist *evlist); void evsel__zero_per_pkg(struct evsel *evsel); +bool evsel__is_hybrid(struct evsel *evsel); #endif /* __PERF_EVSEL_H */ diff --git a/tools/perf/util/python-ext-sources b/tools/perf/util/python-ext-sources index 845dd46e3c61..d7c976671e3a 100644 --- a/tools/perf/util/python-ext-sources +++ b/tools/perf/util/python-ext-sources @@ -37,3 +37,5 @@ util/units.c util/affinity.c util/rwsem.c util/hashmap.c +util/pmu-hybrid.c +util/fncache.c -- cgit v1.2.3 From 91c0f5ec812f38f5e900b5557254baf563c4a2e3 Mon Sep 17 00:00:00 2001 From: Jin Yao Date: Tue, 27 Apr 2021 15:01:30 +0800 Subject: perf record: Uniquify hybrid event name For perf-record, it would be useful to tell user the pmu which the event belongs to. For example, # perf record -a -- sleep 1 # perf report # To display the perf.data header info, please use --header/--header-only options. # # # Total Lost Samples: 0 # # Samples: 106 of event 'cpu_core/cycles/' # Event count (approx.): 22043448 # # Overhead Command Shared Object Symbol # ........ ............ ....................... ............................ # ... Signed-off-by: Jin Yao Reviewed-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: Ingo Molnar Cc: Kan Liang Cc: Peter Zijlstra Link: https://lore.kernel.org/r/20210427070139.25256-18-yao.jin@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-record.c | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 6af46c6a4fd8..3337b5f93336 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -1605,6 +1605,32 @@ static void hit_auxtrace_snapshot_trigger(struct record *rec) } } +static void record__uniquify_name(struct record *rec) +{ + struct evsel *pos; + struct evlist *evlist = rec->evlist; + char *new_name; + int ret; + + if (!perf_pmu__has_hybrid()) + return; + + evlist__for_each_entry(evlist, pos) { + if (!evsel__is_hybrid(pos)) + continue; + + if (strchr(pos->name, '/')) + continue; + + ret = asprintf(&new_name, "%s/%s/", + pos->pmu_name, pos->name); + if (ret) { + free(pos->name); + pos->name = new_name; + } + } +} + static int __cmd_record(struct record *rec, int argc, const char **argv) { int err; @@ -1709,6 +1735,8 @@ static int __cmd_record(struct record *rec, int argc, const char **argv) if (data->is_pipe && rec->evlist->core.nr_entries == 1) rec->opts.sample_id = true; + record__uniquify_name(rec); + if (record__open(rec) != 0) { err = -1; goto out_child; -- cgit v1.2.3 From 2541cb63ac0c3dfbbe363dd09a16dfdd4096fc88 Mon Sep 17 00:00:00 2001 From: Jin Yao Date: Tue, 27 Apr 2021 15:01:31 +0800 Subject: perf tests: Add hybrid cases for 'Parse event definition strings' test Add basic hybrid test cases for 'Parse event definition strings' test. # perf test 6 6: Parse event definition strings : Ok Signed-off-by: Jin Yao Reviewed-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: Ingo Molnar Cc: Kan Liang Cc: Peter Zijlstra Link: https://lore.kernel.org/r/20210427070139.25256-19-yao.jin@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/parse-events.c | 171 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 171 insertions(+) diff --git a/tools/perf/tests/parse-events.c b/tools/perf/tests/parse-events.c index 026c54743311..0f113b2b36a3 100644 --- a/tools/perf/tests/parse-events.c +++ b/tools/perf/tests/parse-events.c @@ -1512,6 +1512,124 @@ static int test__all_tracepoints(struct evlist *evlist) return test__checkevent_tracepoint_multi(evlist); } +static int test__hybrid_hw_event_with_pmu(struct evlist *evlist) +{ + struct evsel *evsel = evlist__first(evlist); + + TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->core.nr_entries); + TEST_ASSERT_VAL("wrong type", PERF_TYPE_RAW == evsel->core.attr.type); + TEST_ASSERT_VAL("wrong config", 0x3c == evsel->core.attr.config); + return 0; +} + +static int test__hybrid_hw_group_event(struct evlist *evlist) +{ + struct evsel *evsel, *leader; + + evsel = leader = evlist__first(evlist); + TEST_ASSERT_VAL("wrong number of entries", 2 == evlist->core.nr_entries); + TEST_ASSERT_VAL("wrong type", PERF_TYPE_RAW == evsel->core.attr.type); + TEST_ASSERT_VAL("wrong config", 0x3c == evsel->core.attr.config); + TEST_ASSERT_VAL("wrong leader", evsel->leader == leader); + + evsel = evsel__next(evsel); + TEST_ASSERT_VAL("wrong type", PERF_TYPE_RAW == evsel->core.attr.type); + TEST_ASSERT_VAL("wrong config", 0xc0 == evsel->core.attr.config); + TEST_ASSERT_VAL("wrong leader", evsel->leader == leader); + return 0; +} + +static int test__hybrid_sw_hw_group_event(struct evlist *evlist) +{ + struct evsel *evsel, *leader; + + evsel = leader = evlist__first(evlist); + TEST_ASSERT_VAL("wrong number of entries", 2 == evlist->core.nr_entries); + TEST_ASSERT_VAL("wrong type", PERF_TYPE_SOFTWARE == evsel->core.attr.type); + TEST_ASSERT_VAL("wrong leader", evsel->leader == leader); + + evsel = evsel__next(evsel); + TEST_ASSERT_VAL("wrong type", PERF_TYPE_RAW == evsel->core.attr.type); + TEST_ASSERT_VAL("wrong config", 0x3c == evsel->core.attr.config); + TEST_ASSERT_VAL("wrong leader", evsel->leader == leader); + return 0; +} + +static int test__hybrid_hw_sw_group_event(struct evlist *evlist) +{ + struct evsel *evsel, *leader; + + evsel = leader = evlist__first(evlist); + TEST_ASSERT_VAL("wrong number of entries", 2 == evlist->core.nr_entries); + TEST_ASSERT_VAL("wrong type", PERF_TYPE_RAW == evsel->core.attr.type); + TEST_ASSERT_VAL("wrong config", 0x3c == evsel->core.attr.config); + TEST_ASSERT_VAL("wrong leader", evsel->leader == leader); + + evsel = evsel__next(evsel); + TEST_ASSERT_VAL("wrong type", PERF_TYPE_SOFTWARE == evsel->core.attr.type); + TEST_ASSERT_VAL("wrong leader", evsel->leader == leader); + return 0; +} + +static int test__hybrid_group_modifier1(struct evlist *evlist) +{ + struct evsel *evsel, *leader; + + evsel = leader = evlist__first(evlist); + TEST_ASSERT_VAL("wrong number of entries", 2 == evlist->core.nr_entries); + TEST_ASSERT_VAL("wrong type", PERF_TYPE_RAW == evsel->core.attr.type); + TEST_ASSERT_VAL("wrong config", 0x3c == evsel->core.attr.config); + TEST_ASSERT_VAL("wrong leader", evsel->leader == leader); + TEST_ASSERT_VAL("wrong exclude_user", evsel->core.attr.exclude_user); + TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->core.attr.exclude_kernel); + + evsel = evsel__next(evsel); + TEST_ASSERT_VAL("wrong type", PERF_TYPE_RAW == evsel->core.attr.type); + TEST_ASSERT_VAL("wrong config", 0xc0 == evsel->core.attr.config); + TEST_ASSERT_VAL("wrong leader", evsel->leader == leader); + TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user); + TEST_ASSERT_VAL("wrong exclude_kernel", evsel->core.attr.exclude_kernel); + return 0; +} + +static int test__hybrid_raw1(struct evlist *evlist) +{ + struct evsel *evsel = evlist__first(evlist); + + TEST_ASSERT_VAL("wrong number of entries", 2 == evlist->core.nr_entries); + TEST_ASSERT_VAL("wrong type", PERF_TYPE_RAW == evsel->core.attr.type); + TEST_ASSERT_VAL("wrong config", 0x1a == evsel->core.attr.config); + + /* The type of second event is randome value */ + evsel = evsel__next(evsel); + TEST_ASSERT_VAL("wrong config", 0x1a == evsel->core.attr.config); + return 0; +} + +static int test__hybrid_raw2(struct evlist *evlist) +{ + struct evsel *evsel = evlist__first(evlist); + + TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->core.nr_entries); + TEST_ASSERT_VAL("wrong type", PERF_TYPE_RAW == evsel->core.attr.type); + TEST_ASSERT_VAL("wrong config", 0x1a == evsel->core.attr.config); + return 0; +} + +static int test__hybrid_cache_event(struct evlist *evlist) +{ + struct evsel *evsel = evlist__first(evlist); + + TEST_ASSERT_VAL("wrong number of entries", 2 == evlist->core.nr_entries); + TEST_ASSERT_VAL("wrong type", PERF_TYPE_HW_CACHE == evsel->core.attr.type); + TEST_ASSERT_VAL("wrong config", 0x2 == (evsel->core.attr.config & 0xffffffff)); + + evsel = evsel__next(evsel); + TEST_ASSERT_VAL("wrong type", PERF_TYPE_HW_CACHE == evsel->core.attr.type); + TEST_ASSERT_VAL("wrong config", 0x10002 == (evsel->core.attr.config & 0xffffffff)); + return 0; +} + struct evlist_test { const char *name; __u32 type; @@ -1868,6 +1986,54 @@ static struct terms_test test__terms[] = { }, }; +static struct evlist_test test__hybrid_events[] = { + { + .name = "cpu_core/cpu-cycles/", + .check = test__hybrid_hw_event_with_pmu, + .id = 0, + }, + { + .name = "{cpu_core/cpu-cycles/,cpu_core/instructions/}", + .check = test__hybrid_hw_group_event, + .id = 1, + }, + { + .name = "{cpu-clock,cpu_core/cpu-cycles/}", + .check = test__hybrid_sw_hw_group_event, + .id = 2, + }, + { + .name = "{cpu_core/cpu-cycles/,cpu-clock}", + .check = test__hybrid_hw_sw_group_event, + .id = 3, + }, + { + .name = "{cpu_core/cpu-cycles/k,cpu_core/instructions/u}", + .check = test__hybrid_group_modifier1, + .id = 4, + }, + { + .name = "r1a", + .check = test__hybrid_raw1, + .id = 5, + }, + { + .name = "cpu_core/r1a/", + .check = test__hybrid_raw2, + .id = 6, + }, + { + .name = "cpu_core/config=10,config1,config2=3,period=1000/u", + .check = test__checkevent_pmu, + .id = 7, + }, + { + .name = "cpu_core/LLC-loads/,cpu_atom/LLC-load-misses/", + .check = test__hybrid_cache_event, + .id = 8, + }, +}; + static int test_event(struct evlist_test *e) { struct parse_events_error err; @@ -2035,6 +2201,11 @@ do { \ ret2 = ret1; \ } while (0) + if (perf_pmu__has_hybrid()) { + TEST_EVENTS(test__hybrid_events); + return ret2; + } + TEST_EVENTS(test__events); if (test_pmu()) -- cgit v1.2.3 From afff9f312e37c64a789aad0fab1ec597404a500f Mon Sep 17 00:00:00 2001 From: Jin Yao Date: Tue, 27 Apr 2021 15:01:32 +0800 Subject: perf tests: Add hybrid cases for 'Roundtrip evsel->name' test Since for one hw event, two hybrid events are created. For example, evsel->idx evsel__name(evsel) 0 cycles 1 cycles 2 instructions 3 instructions ... So for comparing the evsel name on hybrid, the evsel->idx needs to be divided by 2. # ./perf test 14 14: Roundtrip evsel->name : Ok Signed-off-by: Jin Yao Reviewed-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: Ingo Molnar Cc: Kan Liang Cc: Peter Zijlstra Link: https://lore.kernel.org/r/20210427070139.25256-20-yao.jin@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/evsel-roundtrip-name.c | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/tools/perf/tests/evsel-roundtrip-name.c b/tools/perf/tests/evsel-roundtrip-name.c index f7f3e5b4c180..b74cf80d1f10 100644 --- a/tools/perf/tests/evsel-roundtrip-name.c +++ b/tools/perf/tests/evsel-roundtrip-name.c @@ -4,6 +4,7 @@ #include "parse-events.h" #include "tests.h" #include "debug.h" +#include "pmu.h" #include #include @@ -62,7 +63,8 @@ static int perf_evsel__roundtrip_cache_name_test(void) return ret; } -static int __perf_evsel__name_array_test(const char *names[], int nr_names) +static int __perf_evsel__name_array_test(const char *names[], int nr_names, + int distance) { int i, err; struct evsel *evsel; @@ -82,9 +84,9 @@ static int __perf_evsel__name_array_test(const char *names[], int nr_names) err = 0; evlist__for_each_entry(evlist, evsel) { - if (strcmp(evsel__name(evsel), names[evsel->idx])) { + if (strcmp(evsel__name(evsel), names[evsel->idx / distance])) { --err; - pr_debug("%s != %s\n", evsel__name(evsel), names[evsel->idx]); + pr_debug("%s != %s\n", evsel__name(evsel), names[evsel->idx / distance]); } } @@ -93,18 +95,21 @@ out_delete_evlist: return err; } -#define perf_evsel__name_array_test(names) \ - __perf_evsel__name_array_test(names, ARRAY_SIZE(names)) +#define perf_evsel__name_array_test(names, distance) \ + __perf_evsel__name_array_test(names, ARRAY_SIZE(names), distance) int test__perf_evsel__roundtrip_name_test(struct test *test __maybe_unused, int subtest __maybe_unused) { int err = 0, ret = 0; - err = perf_evsel__name_array_test(evsel__hw_names); + if (perf_pmu__has_hybrid()) + return perf_evsel__name_array_test(evsel__hw_names, 2); + + err = perf_evsel__name_array_test(evsel__hw_names, 1); if (err) ret = err; - err = __perf_evsel__name_array_test(evsel__sw_names, PERF_COUNT_SW_DUMMY + 1); + err = __perf_evsel__name_array_test(evsel__sw_names, PERF_COUNT_SW_DUMMY + 1, 1); if (err) ret = err; -- cgit v1.2.3 From f15da0b1fb7bdff4891218f648d374cfffeb24fa Mon Sep 17 00:00:00 2001 From: Jin Yao Date: Tue, 27 Apr 2021 15:01:33 +0800 Subject: perf tests: Skip 'Setup struct perf_event_attr' test for hybrid For hybrid, the attr.type consists of pmu type id + original type. There will be much changes for this test. Now we temporarily skip this test case and TODO in future. Signed-off-by: Jin Yao Reviewed-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: Ingo Molnar Cc: Kan Liang Cc: Peter Zijlstra Link: https://lore.kernel.org/r/20210427070139.25256-21-yao.jin@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/attr.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tools/perf/tests/attr.c b/tools/perf/tests/attr.c index dd39ce9b0277..9b40a25376ae 100644 --- a/tools/perf/tests/attr.c +++ b/tools/perf/tests/attr.c @@ -34,6 +34,7 @@ #include "event.h" #include "util.h" #include "tests.h" +#include "pmu.h" #define ENV "PERF_TEST_ATTR" @@ -184,6 +185,9 @@ int test__attr(struct test *test __maybe_unused, int subtest __maybe_unused) char path_dir[PATH_MAX]; char *exec_path; + if (perf_pmu__has_hybrid()) + return TEST_SKIP; + /* First try development tree tests. */ if (!lstat("./tests", &st)) return run_dir("./tests", "./perf"); -- cgit v1.2.3 From 43eb05d066795bdfea58a6a0cea77bbaa1a09b30 Mon Sep 17 00:00:00 2001 From: Jin Yao Date: Tue, 27 Apr 2021 15:01:34 +0800 Subject: perf tests: Support 'Track with sched_switch' test for hybrid Since for "cycles:u' on hybrid platform, it creates two "cycles". So the number of events in evlist is not expected in next test steps. Now we just use one event "cpu_core/cycles:u/" for hybrid. # ./perf test 35 35: Track with sched_switch : Ok Signed-off-by: Jin Yao Reviewed-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: Ingo Molnar Cc: Kan Liang Cc: Peter Zijlstra Link: https://lore.kernel.org/r/20210427070139.25256-22-yao.jin@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/switch-tracking.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/tools/perf/tests/switch-tracking.c b/tools/perf/tests/switch-tracking.c index 3ebaa758df77..62c0ec21aaa8 100644 --- a/tools/perf/tests/switch-tracking.c +++ b/tools/perf/tests/switch-tracking.c @@ -18,6 +18,7 @@ #include "record.h" #include "tests.h" #include "util/mmap.h" +#include "pmu.h" static int spin_sleep(void) { @@ -371,7 +372,10 @@ int test__switch_tracking(struct test *test __maybe_unused, int subtest __maybe_ cpu_clocks_evsel = evlist__last(evlist); /* Second event */ - err = parse_events(evlist, "cycles:u", NULL); + if (perf_pmu__has_hybrid()) + err = parse_events(evlist, "cpu_core/cycles/u", NULL); + else + err = parse_events(evlist, "cycles:u", NULL); if (err) { pr_debug("Failed to parse event cycles:u\n"); goto out_err; -- cgit v1.2.3 From 6081e876edd3f5d23273385730e482eca0afb2c8 Mon Sep 17 00:00:00 2001 From: Jin Yao Date: Tue, 27 Apr 2021 15:01:35 +0800 Subject: perf tests: Support 'Parse and process metrics' test for hybrid Some events are not supported. Only pick up some cases for hybrid. # ./perf test 68 68: Parse and process metrics : Ok Signed-off-by: Jin Yao Reviewed-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: Ingo Molnar Cc: Kan Liang Cc: Peter Zijlstra Link: https://lore.kernel.org/r/20210427070139.25256-23-yao.jin@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/parse-metric.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/tools/perf/tests/parse-metric.c b/tools/perf/tests/parse-metric.c index 4968c4106254..4f6f4904e852 100644 --- a/tools/perf/tests/parse-metric.c +++ b/tools/perf/tests/parse-metric.c @@ -11,6 +11,7 @@ #include "debug.h" #include "expr.h" #include "stat.h" +#include "pmu.h" static struct pmu_event pme_test[] = { { @@ -372,10 +373,13 @@ int test__parse_metric(struct test *test __maybe_unused, int subtest __maybe_unu { TEST_ASSERT_VAL("IPC failed", test_ipc() == 0); TEST_ASSERT_VAL("frontend failed", test_frontend() == 0); - TEST_ASSERT_VAL("cache_miss_cycles failed", test_cache_miss_cycles() == 0); TEST_ASSERT_VAL("DCache_L2 failed", test_dcache_l2() == 0); TEST_ASSERT_VAL("recursion fail failed", test_recursion_fail() == 0); - TEST_ASSERT_VAL("test metric group", test_metric_group() == 0); TEST_ASSERT_VAL("Memory bandwidth", test_memory_bandwidth() == 0); + + if (!perf_pmu__has_hybrid()) { + TEST_ASSERT_VAL("cache_miss_cycles failed", test_cache_miss_cycles() == 0); + TEST_ASSERT_VAL("test metric group", test_metric_group() == 0); + } return 0; } -- cgit v1.2.3 From c102038892f73cf70f8c50e4fafb45d6e5465129 Mon Sep 17 00:00:00 2001 From: Jin Yao Date: Tue, 27 Apr 2021 15:01:36 +0800 Subject: perf tests: Support 'Session topology' test for hybrid Force to create one event "cpu_core/cycles/" by default, otherwise in evlist__valid_sample_type, the checking of 'if (evlist->core.nr_entries == 1)' would be failed. # ./perf test 41 41: Session topology : Ok Signed-off-by: Jin Yao Reviewed-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: Ingo Molnar Cc: Kan Liang Cc: Peter Zijlstra Link: https://lore.kernel.org/r/20210427070139.25256-24-yao.jin@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/topology.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/tools/perf/tests/topology.c b/tools/perf/tests/topology.c index 050489807a47..ec4e3b21b831 100644 --- a/tools/perf/tests/topology.c +++ b/tools/perf/tests/topology.c @@ -8,6 +8,7 @@ #include "session.h" #include "evlist.h" #include "debug.h" +#include "pmu.h" #include #define TEMPL "/tmp/perf-test-XXXXXX" @@ -40,8 +41,16 @@ static int session_write_header(char *path) session = perf_session__new(&data, false, NULL); TEST_ASSERT_VAL("can't get session", !IS_ERR(session)); - session->evlist = evlist__new_default(); - TEST_ASSERT_VAL("can't get evlist", session->evlist); + if (!perf_pmu__has_hybrid()) { + session->evlist = evlist__new_default(); + TEST_ASSERT_VAL("can't get evlist", session->evlist); + } else { + struct parse_events_error err; + + session->evlist = evlist__new(); + TEST_ASSERT_VAL("can't get evlist", session->evlist); + parse_events(session->evlist, "cpu_core/cycles/", &err); + } perf_header__set_feat(&session->header, HEADER_CPU_TOPOLOGY); perf_header__set_feat(&session->header, HEADER_NRCPUS); -- cgit v1.2.3 From d9da6f70eb23511007cc6ed0aba02d9f61b3d6cf Mon Sep 17 00:00:00 2001 From: Jin Yao Date: Tue, 27 Apr 2021 15:01:37 +0800 Subject: perf tests: Support 'Convert perf time to TSC' test for hybrid Since for "cycles:u' on hybrid platform, it creates two "cycles". So the second evsel in evlist also needs initialization. With this patch, # ./perf test 71 71: Convert perf time to TSC : Ok Signed-off-by: Jin Yao Reviewed-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: Ingo Molnar Cc: Kan Liang Cc: Peter Zijlstra Link: https://lore.kernel.org/r/20210427070139.25256-25-yao.jin@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/perf-time-to-tsc.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/tools/perf/tests/perf-time-to-tsc.c b/tools/perf/tests/perf-time-to-tsc.c index 680c3cffb128..85d75b9b25a1 100644 --- a/tools/perf/tests/perf-time-to-tsc.c +++ b/tools/perf/tests/perf-time-to-tsc.c @@ -20,6 +20,7 @@ #include "tsc.h" #include "mmap.h" #include "tests.h" +#include "pmu.h" #define CHECK__(x) { \ while ((x) < 0) { \ @@ -88,6 +89,17 @@ int test__perf_time_to_tsc(struct test *test __maybe_unused, int subtest __maybe evsel->core.attr.disabled = 1; evsel->core.attr.enable_on_exec = 0; + /* + * For hybrid "cycles:u", it creates two events. + * Init the second evsel here. + */ + if (perf_pmu__has_hybrid()) { + evsel = evsel__next(evsel); + evsel->core.attr.comm = 1; + evsel->core.attr.disabled = 1; + evsel->core.attr.enable_on_exec = 0; + } + CHECK__(evlist__open(evlist)); CHECK__(evlist__mmap(evlist, UINT_MAX)); -- cgit v1.2.3 From a37f3b885610f89c3f2285756eb3f386288c3d41 Mon Sep 17 00:00:00 2001 From: Jin Yao Date: Tue, 27 Apr 2021 15:01:38 +0800 Subject: perf tests: Skip 'perf stat metrics (shadow stat) test' for hybrid Currently we don't support shadow stat for hybrid. root@ssp-pwrt-002:~# ./perf stat -e cycles,instructions -a -- sleep 1 Performance counter stats for 'system wide': 12,883,109,591 cpu_core/cycles/ 6,405,163,221 cpu_atom/cycles/ 555,553,778 cpu_core/instructions/ 841,158,734 cpu_atom/instructions/ 1.002644773 seconds time elapsed Now there is no shadow stat 'insn per cycle' reported. We will support it later and now just skip the 'perf stat metrics (shadow stat) test'. Signed-off-by: Jin Yao Reviewed-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: Ingo Molnar Cc: Kan Liang Cc: Peter Zijlstra Link: https://lore.kernel.org/r/20210427070139.25256-26-yao.jin@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/shell/stat+shadow_stat.sh | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tools/perf/tests/shell/stat+shadow_stat.sh b/tools/perf/tests/shell/stat+shadow_stat.sh index ebebd3596cf9..e6e35fc6c882 100755 --- a/tools/perf/tests/shell/stat+shadow_stat.sh +++ b/tools/perf/tests/shell/stat+shadow_stat.sh @@ -7,6 +7,9 @@ set -e # skip if system-wide mode is forbidden perf stat -a true > /dev/null 2>&1 || exit 2 +# skip if on hybrid platform +perf stat -a -e cycles sleep 1 2>&1 | grep -e cpu_core && exit 2 + test_global_aggr() { perf stat -a --no-big-num -e cycles,instructions sleep 1 2>&1 | \ -- cgit v1.2.3 From 2750ce1d4df2e70630d76bc53da160ca43a80d22 Mon Sep 17 00:00:00 2001 From: Jin Yao Date: Tue, 27 Apr 2021 15:01:39 +0800 Subject: perf Documentation: Document intel-hybrid support Add some words and examples to help understanding of Intel hybrid perf support. Signed-off-by: Jin Yao Reviewed-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: Ingo Molnar Cc: Kan Liang Cc: Peter Zijlstra Link: https://lore.kernel.org/r/20210427070139.25256-27-yao.jin@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/intel-hybrid.txt | 214 ++++++++++++++++++++++++++++++ tools/perf/Documentation/perf-record.txt | 1 + tools/perf/Documentation/perf-stat.txt | 2 + 3 files changed, 217 insertions(+) create mode 100644 tools/perf/Documentation/intel-hybrid.txt diff --git a/tools/perf/Documentation/intel-hybrid.txt b/tools/perf/Documentation/intel-hybrid.txt new file mode 100644 index 000000000000..07f0aa3bf682 --- /dev/null +++ b/tools/perf/Documentation/intel-hybrid.txt @@ -0,0 +1,214 @@ +Intel hybrid support +-------------------- +Support for Intel hybrid events within perf tools. + +For some Intel platforms, such as AlderLake, which is hybrid platform and +it consists of atom cpu and core cpu. Each cpu has dedicated event list. +Part of events are available on core cpu, part of events are available +on atom cpu and even part of events are available on both. + +Kernel exports two new cpu pmus via sysfs: +/sys/devices/cpu_core +/sys/devices/cpu_atom + +The 'cpus' files are created under the directories. For example, + +cat /sys/devices/cpu_core/cpus +0-15 + +cat /sys/devices/cpu_atom/cpus +16-23 + +It indicates cpu0-cpu15 are core cpus and cpu16-cpu23 are atom cpus. + +Quickstart + +List hybrid event +----------------- + +As before, use perf-list to list the symbolic event. + +perf list + +inst_retired.any + [Fixed Counter: Counts the number of instructions retired. Unit: cpu_atom] +inst_retired.any + [Number of instructions retired. Fixed Counter - architectural event. Unit: cpu_core] + +The 'Unit: xxx' is added to brief description to indicate which pmu +the event is belong to. Same event name but with different pmu can +be supported. + +Enable hybrid event with a specific pmu +--------------------------------------- + +To enable a core only event or atom only event, following syntax is supported: + + cpu_core// +or + cpu_atom// + +For example, count the 'cycles' event on core cpus. + + perf stat -e cpu_core/cycles/ + +Create two events for one hardware event automatically +------------------------------------------------------ + +When creating one event and the event is available on both atom and core, +two events are created automatically. One is for atom, the other is for +core. Most of hardware events and cache events are available on both +cpu_core and cpu_atom. + +For hardware events, they have pre-defined configs (e.g. 0 for cycles). +But on hybrid platform, kernel needs to know where the event comes from +(from atom or from core). The original perf event type PERF_TYPE_HARDWARE +can't carry pmu information. So now this type is extended to be PMU aware +type. The PMU type ID is stored at attr.config[63:32]. + +PMU type ID is retrieved from sysfs. +/sys/devices/cpu_atom/type +/sys/devices/cpu_core/type + +The new attr.config layout for PERF_TYPE_HARDWARE: + +PERF_TYPE_HARDWARE: 0xEEEEEEEE000000AA + AA: hardware event ID + EEEEEEEE: PMU type ID + +Cache event is similar. The type PERF_TYPE_HW_CACHE is extended to be +PMU aware type. The PMU type ID is stored at attr.config[63:32]. + +The new attr.config layout for PERF_TYPE_HW_CACHE: + +PERF_TYPE_HW_CACHE: 0xEEEEEEEE00DDCCBB + BB: hardware cache ID + CC: hardware cache op ID + DD: hardware cache op result ID + EEEEEEEE: PMU type ID + +When enabling a hardware event without specified pmu, such as, +perf stat -e cycles -a (use system-wide in this example), two events +are created automatically. + + ------------------------------------------------------------ + perf_event_attr: + size 120 + config 0x400000000 + sample_type IDENTIFIER + read_format TOTAL_TIME_ENABLED|TOTAL_TIME_RUNNING + disabled 1 + inherit 1 + exclude_guest 1 + ------------------------------------------------------------ + +and + + ------------------------------------------------------------ + perf_event_attr: + size 120 + config 0x800000000 + sample_type IDENTIFIER + read_format TOTAL_TIME_ENABLED|TOTAL_TIME_RUNNING + disabled 1 + inherit 1 + exclude_guest 1 + ------------------------------------------------------------ + +type 0 is PERF_TYPE_HARDWARE. +0x4 in 0x400000000 indicates it's cpu_core pmu. +0x8 in 0x800000000 indicates it's cpu_atom pmu (atom pmu type id is random). + +The kernel creates 'cycles' (0x400000000) on cpu0-cpu15 (core cpus), +and create 'cycles' (0x800000000) on cpu16-cpu23 (atom cpus). + +For perf-stat result, it displays two events: + + Performance counter stats for 'system wide': + + 6,744,979 cpu_core/cycles/ + 1,965,552 cpu_atom/cycles/ + +The first 'cycles' is core event, the second 'cycles' is atom event. + +Thread mode example: +-------------------- + +perf-stat reports the scaled counts for hybrid event and with a percentage +displayed. The percentage is the event's running time/enabling time. + +One example, 'triad_loop' runs on cpu16 (atom core), while we can see the +scaled value for core cycles is 160,444,092 and the percentage is 0.47%. + +perf stat -e cycles -- taskset -c 16 ./triad_loop + +As previous, two events are created. + +------------------------------------------------------------ +perf_event_attr: + size 120 + config 0x400000000 + sample_type IDENTIFIER + read_format TOTAL_TIME_ENABLED|TOTAL_TIME_RUNNING + disabled 1 + inherit 1 + enable_on_exec 1 + exclude_guest 1 +------------------------------------------------------------ + +and + +------------------------------------------------------------ +perf_event_attr: + size 120 + config 0x800000000 + sample_type IDENTIFIER + read_format TOTAL_TIME_ENABLED|TOTAL_TIME_RUNNING + disabled 1 + inherit 1 + enable_on_exec 1 + exclude_guest 1 +------------------------------------------------------------ + + Performance counter stats for 'taskset -c 16 ./triad_loop': + + 233,066,666 cpu_core/cycles/ (0.43%) + 604,097,080 cpu_atom/cycles/ (99.57%) + +perf-record: +------------ + +If there is no '-e' specified in perf record, on hybrid platform, +it creates two default 'cycles' and adds them to event list. One +is for core, the other is for atom. + +perf-stat: +---------- + +If there is no '-e' specified in perf stat, on hybrid platform, +besides of software events, following events are created and +added to event list in order. + +cpu_core/cycles/, +cpu_atom/cycles/, +cpu_core/instructions/, +cpu_atom/instructions/, +cpu_core/branches/, +cpu_atom/branches/, +cpu_core/branch-misses/, +cpu_atom/branch-misses/ + +Of course, both perf-stat and perf-record support to enable +hybrid event with a specific pmu. + +e.g. +perf stat -e cpu_core/cycles/ +perf stat -e cpu_atom/cycles/ +perf stat -e cpu_core/r1a/ +perf stat -e cpu_atom/L1-icache-loads/ +perf stat -e cpu_core/cycles/,cpu_atom/instructions/ +perf stat -e '{cpu_core/cycles/,cpu_core/instructions/}' + +But '{cpu_core/cycles/,cpu_atom/instructions/}' will return +warning and disable grouping, because the pmus in group are +not matched (cpu_core vs. cpu_atom). diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt index f3161c9673e9..d71bac847936 100644 --- a/tools/perf/Documentation/perf-record.txt +++ b/tools/perf/Documentation/perf-record.txt @@ -695,6 +695,7 @@ measurements: wait -n ${perf_pid} exit $? +include::intel-hybrid.txt[] SEE ALSO -------- diff --git a/tools/perf/Documentation/perf-stat.txt b/tools/perf/Documentation/perf-stat.txt index f10e24da23e9..45c2467e4eb2 100644 --- a/tools/perf/Documentation/perf-stat.txt +++ b/tools/perf/Documentation/perf-stat.txt @@ -552,6 +552,8 @@ The fields are in this order: Additional metrics may be printed with all earlier fields being empty. +include::intel-hybrid.txt[] + SEE ALSO -------- linkperf:perf-top[1], linkperf:perf-list[1] -- cgit v1.2.3 From 56d32d4cac645bac05fa70d935fa5040e3ab6bb3 Mon Sep 17 00:00:00 2001 From: Michael Petlan Date: Wed, 28 Apr 2021 11:20:23 +0200 Subject: perf tools: Enable libtraceevent dynamic linking Currently we support only static linking with kernel's libtraceevent (tools/lib/traceevent). This patch adds libtraceevent package detection and support to link perf with it dynamically. The libtraceevent package status is displayed with: $ make VF=1 LIBTRACEEVENT_DYNAMIC=1 ... ... libtraceevent: [ on ] Default behavior remains the same (static linking). Committer testing: $ make LIBTRACEEVENT_DYNAMIC=1 VF=1 O=/tmp/build/perf -C tools/perf install-bin |& grep traceevent Makefile.config:1090: *** Error: No libtraceevent devel library found, please install libtraceevent-devel. Stop. $ Signed-off-by: Michael Petlan Tested-by: Arnaldo Carvalho de Melo Cc: Jiri Olsa LPU-Reference: 20210428092023.4009-1-mpetlan@redhat.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/build/Makefile.feature | 1 + tools/build/feature/Makefile | 4 ++++ tools/build/feature/test-libtraceevent.c | 12 ++++++++++++ tools/perf/Makefile.config | 9 +++++++++ tools/perf/Makefile.perf | 8 ++++++-- 5 files changed, 32 insertions(+), 2 deletions(-) create mode 100644 tools/build/feature/test-libtraceevent.c diff --git a/tools/build/Makefile.feature b/tools/build/Makefile.feature index 74e255d58d8d..b3221a43fa65 100644 --- a/tools/build/Makefile.feature +++ b/tools/build/Makefile.feature @@ -52,6 +52,7 @@ FEATURE_TESTS_BASIC := \ libpython-version \ libslang \ libslang-include-subdir \ + libtraceevent \ libcrypto \ libunwind \ pthread-attr-setaffinity-np \ diff --git a/tools/build/feature/Makefile b/tools/build/feature/Makefile index 3e55edb3ea54..ec203e28407f 100644 --- a/tools/build/feature/Makefile +++ b/tools/build/feature/Makefile @@ -36,6 +36,7 @@ FILES= \ test-libpython-version.bin \ test-libslang.bin \ test-libslang-include-subdir.bin \ + test-libtraceevent.bin \ test-libcrypto.bin \ test-libunwind.bin \ test-libunwind-debug-frame.bin \ @@ -196,6 +197,9 @@ $(OUTPUT)test-libslang.bin: $(OUTPUT)test-libslang-include-subdir.bin: $(BUILD) -lslang +$(OUTPUT)test-libtraceevent.bin: + $(BUILD) -ltraceevent + $(OUTPUT)test-libcrypto.bin: $(BUILD) -lcrypto diff --git a/tools/build/feature/test-libtraceevent.c b/tools/build/feature/test-libtraceevent.c new file mode 100644 index 000000000000..416b11ffd4b4 --- /dev/null +++ b/tools/build/feature/test-libtraceevent.c @@ -0,0 +1,12 @@ +// SPDX-License-Identifier: GPL-2.0 +#include + +int main(void) +{ + int rv = 0; + struct trace_seq s; + trace_seq_init(&s); + rv += !(s.state == TRACE_SEQ__GOOD); + trace_seq_destroy(&s); + return rv; +} diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config index 3514fe956ed1..99c73a7b6a26 100644 --- a/tools/perf/Makefile.config +++ b/tools/perf/Makefile.config @@ -1079,6 +1079,15 @@ ifdef LIBPFM4 endif endif +ifdef LIBTRACEEVENT_DYNAMIC + $(call feature_check,libtraceevent) + ifeq ($(feature-libtraceevent), 1) + EXTLIBS += -ltraceevent + else + dummy := $(error Error: No libtraceevent devel library found, please install libtraceevent-devel); + endif +endif + # Among the variables below, these: # perfexecdir # perf_include_dir diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index 6240fbb1646e..e47f04e5b51e 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -128,6 +128,8 @@ include ../scripts/utilities.mak # # Define BUILD_BPF_SKEL to enable BPF skeletons # +# Define LIBTRACEEVENT_DYNAMIC to enable libtraceevent dynamic linking +# # As per kernel Makefile, avoid funny character set dependencies unexport LC_ALL @@ -310,7 +312,6 @@ endif LIBTRACEEVENT = $(TE_PATH)libtraceevent.a export LIBTRACEEVENT - LIBTRACEEVENT_DYNAMIC_LIST = $(PLUGINS_PATH)libtraceevent-dynamic-list # @@ -375,12 +376,15 @@ endif export PERL_PATH -PERFLIBS = $(LIBAPI) $(LIBTRACEEVENT) $(LIBSUBCMD) $(LIBPERF) +PERFLIBS = $(LIBAPI) $(LIBSUBCMD) $(LIBPERF) ifndef NO_LIBBPF ifndef LIBBPF_DYNAMIC PERFLIBS += $(LIBBPF) endif endif +ifndef LIBTRACEEVENT_DYNAMIC + PERFLIBS += $(LIBTRACEEVENT) +endif # We choose to avoid "if .. else if .. else .. endif endif" # because maintaining the nesting to match is a pain. If -- cgit v1.2.3 From e1d380ea8b00db4bb14d1f513000d4b62aa9d3f0 Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Wed, 28 Apr 2021 20:09:12 +0800 Subject: perf tools: Change fields type in perf_record_time_conv C standard claims "An object declared as type _Bool is large enough to store the values 0 and 1", bool type size can be 1 byte or larger than 1 byte. Thus it's uncertian for bool type size with different compilers. This patch changes the bool type in structure perf_record_time_conv to __u8 type, and pads extra bytes for 8-byte alignment; this can give reliable structure size. Fixes: d110162cafc8 ("perf tsc: Support cap_user_time_short for event TIME_CONV") Suggested-by: Adrian Hunter Signed-off-by: Leo Yan Acked-by: Adrian Hunter Cc: Alexander Shishkin Cc: Gustavo A. R. Silva Cc: Ingo Molnar Cc: Jiri Olsa Cc: Kan Liang Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Steve MacLean Cc: Yonatan Goldschmidt Link: https://lore.kernel.org/r/20210428120915.7123-2-leo.yan@linaro.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/lib/perf/include/perf/event.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tools/lib/perf/include/perf/event.h b/tools/lib/perf/include/perf/event.h index d82054225fcc..48583e441d9b 100644 --- a/tools/lib/perf/include/perf/event.h +++ b/tools/lib/perf/include/perf/event.h @@ -346,8 +346,9 @@ struct perf_record_time_conv { __u64 time_zero; __u64 time_cycles; __u64 time_mask; - bool cap_user_time_zero; - bool cap_user_time_short; + __u8 cap_user_time_zero; + __u8 cap_user_time_short; + __u8 reserved[6]; /* For alignment */ }; struct perf_record_header_feature { -- cgit v1.2.3 From aa616f5a8a2d22a179d5502ebd85045af66fa656 Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Wed, 28 Apr 2021 20:09:13 +0800 Subject: perf jit: Let convert_timestamp() to be backwards-compatible Commit d110162cafc80dad ("perf tsc: Support cap_user_time_short for event TIME_CONV") supports the extended parameters for event TIME_CONV, but it broke the backwards compatibility, so any perf data file with old event format fails to convert timestamp. This patch introduces a helper event_contains() to check if an event contains a specific member or not. For the backwards-compatibility, if the event size confirms the extended parameters are supported in the event TIME_CONV, then copies these parameters. Committer notes: To make this compiler backwards compatible add this patch: - struct perf_tsc_conversion tc = { 0 }; + struct perf_tsc_conversion tc = { .time_shift = 0, }; Fixes: d110162cafc8 ("perf tsc: Support cap_user_time_short for event TIME_CONV") Signed-off-by: Leo Yan Acked-by: Adrian Hunter Cc: Alexander Shishkin Cc: Gustavo A. R. Silva Cc: Ingo Molnar Cc: Jiri Olsa Cc: Kan Liang Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Steve MacLean Cc: Yonatan Goldschmidt Link: https://lore.kernel.org/r/20210428120915.7123-3-leo.yan@linaro.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/lib/perf/include/perf/event.h | 2 ++ tools/perf/util/jitdump.c | 30 ++++++++++++++++++++---------- 2 files changed, 22 insertions(+), 10 deletions(-) diff --git a/tools/lib/perf/include/perf/event.h b/tools/lib/perf/include/perf/event.h index 48583e441d9b..4d0c02ba3f7d 100644 --- a/tools/lib/perf/include/perf/event.h +++ b/tools/lib/perf/include/perf/event.h @@ -8,6 +8,8 @@ #include #include /* pid_t */ +#define event_contains(obj, mem) ((obj).header.size > offsetof(typeof(obj), mem)) + struct perf_record_mmap { struct perf_event_header header; __u32 pid, tid; diff --git a/tools/perf/util/jitdump.c b/tools/perf/util/jitdump.c index 9760d8e7b386..917a9c707371 100644 --- a/tools/perf/util/jitdump.c +++ b/tools/perf/util/jitdump.c @@ -396,21 +396,31 @@ static pid_t jr_entry_tid(struct jit_buf_desc *jd, union jr_entry *jr) static uint64_t convert_timestamp(struct jit_buf_desc *jd, uint64_t timestamp) { - struct perf_tsc_conversion tc; + struct perf_tsc_conversion tc = { .time_shift = 0, }; + struct perf_record_time_conv *time_conv = &jd->session->time_conv; if (!jd->use_arch_timestamp) return timestamp; - tc.time_shift = jd->session->time_conv.time_shift; - tc.time_mult = jd->session->time_conv.time_mult; - tc.time_zero = jd->session->time_conv.time_zero; - tc.time_cycles = jd->session->time_conv.time_cycles; - tc.time_mask = jd->session->time_conv.time_mask; - tc.cap_user_time_zero = jd->session->time_conv.cap_user_time_zero; - tc.cap_user_time_short = jd->session->time_conv.cap_user_time_short; + tc.time_shift = time_conv->time_shift; + tc.time_mult = time_conv->time_mult; + tc.time_zero = time_conv->time_zero; - if (!tc.cap_user_time_zero) - return 0; + /* + * The event TIME_CONV was extended for the fields from "time_cycles" + * when supported cap_user_time_short, for backward compatibility, + * checks the event size and assigns these extended fields if these + * fields are contained in the event. + */ + if (event_contains(*time_conv, time_cycles)) { + tc.time_cycles = time_conv->time_cycles; + tc.time_mask = time_conv->time_mask; + tc.cap_user_time_zero = time_conv->cap_user_time_zero; + tc.cap_user_time_short = time_conv->cap_user_time_short; + + if (!tc.cap_user_time_zero) + return 0; + } return tsc_to_perf_time(timestamp, &tc); } -- cgit v1.2.3 From 050ffc449008eeeafc187dec337d9cf1518f89bc Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Wed, 28 Apr 2021 20:09:14 +0800 Subject: perf session: Add swap operation for event TIME_CONV Since commit d110162cafc8 ("perf tsc: Support cap_user_time_short for event TIME_CONV"), the event PERF_RECORD_TIME_CONV has extended the data structure for clock parameters. To be backwards-compatible, this patch adds a dedicated swap operation for the event PERF_RECORD_TIME_CONV, based on checking if the event contains field "time_cycles", it can support both for the old and new event formats. Fixes: d110162cafc8 ("perf tsc: Support cap_user_time_short for event TIME_CONV") Signed-off-by: Leo Yan Acked-by: Adrian Hunter Cc: Alexander Shishkin Cc: Gustavo A. R. Silva Cc: Ingo Molnar Cc: Jiri Olsa Cc: Kan Liang Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Steve MacLean Cc: Yonatan Goldschmidt Link: https://lore.kernel.org/r/20210428120915.7123-4-leo.yan@linaro.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/session.c | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index a6659b616e6d..cc954b1cfa86 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -949,6 +949,19 @@ static void perf_event__stat_round_swap(union perf_event *event, event->stat_round.time = bswap_64(event->stat_round.time); } +static void perf_event__time_conv_swap(union perf_event *event, + bool sample_id_all __maybe_unused) +{ + event->time_conv.time_shift = bswap_64(event->time_conv.time_shift); + event->time_conv.time_mult = bswap_64(event->time_conv.time_mult); + event->time_conv.time_zero = bswap_64(event->time_conv.time_zero); + + if (event_contains(event->time_conv, time_cycles)) { + event->time_conv.time_cycles = bswap_64(event->time_conv.time_cycles); + event->time_conv.time_mask = bswap_64(event->time_conv.time_mask); + } +} + typedef void (*perf_event__swap_op)(union perf_event *event, bool sample_id_all); @@ -985,7 +998,7 @@ static perf_event__swap_op perf_event__swap_ops[] = { [PERF_RECORD_STAT] = perf_event__stat_swap, [PERF_RECORD_STAT_ROUND] = perf_event__stat_round_swap, [PERF_RECORD_EVENT_UPDATE] = perf_event__event_update_swap, - [PERF_RECORD_TIME_CONV] = perf_event__all64_swap, + [PERF_RECORD_TIME_CONV] = perf_event__time_conv_swap, [PERF_RECORD_HEADER_MAX] = NULL, }; -- cgit v1.2.3 From 81e70d7ee4ae13d60800958bca9d3c7675de16c9 Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Wed, 28 Apr 2021 20:09:15 +0800 Subject: perf session: Dump PERF_RECORD_TIME_CONV event Now perf tool uses the common stub function process_event_op2_stub() for dumping TIME_CONV event, thus it doesn't output the clock parameters contained in the event. This patch adds the callback function for dumping the hardware clock parameters in TIME_CONV event. Before: # perf report -D 0x978 [0x38]: event: 79 . . ... raw event: size 56 bytes . 0000: 4f 00 00 00 00 00 38 00 15 00 00 00 00 00 00 00 O.....8......... . 0010: 00 00 40 01 00 00 00 00 86 89 0b bf df ff ff ff ..@........ . 0020: d1 c1 b2 39 03 00 00 00 ff ff ff ff ff ff ff 00 9..... . 0030: 01 01 00 00 00 00 00 00 ........ 0 0 0x978 [0x38]: PERF_RECORD_TIME_CONV : unhandled! [...] After: # perf report -D 0x978 [0x38]: event: 79 . . ... raw event: size 56 bytes . 0000: 4f 00 00 00 00 00 38 00 15 00 00 00 00 00 00 00 O.....8......... . 0010: 00 00 40 01 00 00 00 00 86 89 0b bf df ff ff ff ..@........ . 0020: d1 c1 b2 39 03 00 00 00 ff ff ff ff ff ff ff 00 9..... . 0030: 01 01 00 00 00 00 00 00 ........ 0 0 0x978 [0x38]: PERF_RECORD_TIME_CONV ... Time Shift 21 ... Time Muliplier 20971520 ... Time Zero 18446743935180835206 ... Time Cycles 13852918225 ... Time Mask 0xffffffffffffff ... Cap Time Zero 1 ... Cap Time Short 1 : unhandled! [...] Signed-off-by: Leo Yan Acked-by: Adrian Hunter Cc: Alexander Shishkin Cc: Gustavo A. R. Silva Cc: Ingo Molnar Cc: Jiri Olsa Cc: Kan Liang Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Steve MacLean Cc: Yonatan Goldschmidt Link: https://lore.kernel.org/r/20210428120915.7123-5-leo.yan@linaro.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/session.c | 13 ++++++++++++- tools/perf/util/tsc.c | 30 ++++++++++++++++++++++++++++++ tools/perf/util/tsc.h | 4 ++++ 3 files changed, 46 insertions(+), 1 deletion(-) diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index cc954b1cfa86..a12cf4f0e97a 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -29,6 +29,7 @@ #include "thread-stack.h" #include "sample-raw.h" #include "stat.h" +#include "tsc.h" #include "ui/progress.h" #include "../perf.h" #include "arch/common.h" @@ -451,6 +452,16 @@ static int process_stat_round_stub(struct perf_session *perf_session __maybe_unu return 0; } +static int process_event_time_conv_stub(struct perf_session *perf_session __maybe_unused, + union perf_event *event) +{ + if (dump_trace) + perf_event__fprintf_time_conv(event, stdout); + + dump_printf(": unhandled!\n"); + return 0; +} + static int perf_session__process_compressed_event_stub(struct perf_session *session __maybe_unused, union perf_event *event __maybe_unused, u64 file_offset __maybe_unused) @@ -532,7 +543,7 @@ void perf_tool__fill_defaults(struct perf_tool *tool) if (tool->stat_round == NULL) tool->stat_round = process_stat_round_stub; if (tool->time_conv == NULL) - tool->time_conv = process_event_op2_stub; + tool->time_conv = process_event_time_conv_stub; if (tool->feature == NULL) tool->feature = process_event_op2_stub; if (tool->compressed == NULL) diff --git a/tools/perf/util/tsc.c b/tools/perf/util/tsc.c index 62b4c75c966c..f19791d46e99 100644 --- a/tools/perf/util/tsc.c +++ b/tools/perf/util/tsc.c @@ -1,5 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 #include +#include +#include #include #include @@ -110,3 +112,31 @@ u64 __weak rdtsc(void) { return 0; } + +size_t perf_event__fprintf_time_conv(union perf_event *event, FILE *fp) +{ + struct perf_record_time_conv *tc = (struct perf_record_time_conv *)event; + size_t ret; + + ret = fprintf(fp, "\n... Time Shift %" PRI_lu64 "\n", tc->time_shift); + ret += fprintf(fp, "... Time Muliplier %" PRI_lu64 "\n", tc->time_mult); + ret += fprintf(fp, "... Time Zero %" PRI_lu64 "\n", tc->time_zero); + + /* + * The event TIME_CONV was extended for the fields from "time_cycles" + * when supported cap_user_time_short, for backward compatibility, + * prints the extended fields only if they are contained in the event. + */ + if (event_contains(*tc, time_cycles)) { + ret += fprintf(fp, "... Time Cycles %" PRI_lu64 "\n", + tc->time_cycles); + ret += fprintf(fp, "... Time Mask %#" PRI_lx64 "\n", + tc->time_mask); + ret += fprintf(fp, "... Cap Time Zero %" PRId32 "\n", + tc->cap_user_time_zero); + ret += fprintf(fp, "... Cap Time Short %" PRId32 "\n", + tc->cap_user_time_short); + } + + return ret; +} diff --git a/tools/perf/util/tsc.h b/tools/perf/util/tsc.h index 72a15419f3b3..7d83a31732a7 100644 --- a/tools/perf/util/tsc.h +++ b/tools/perf/util/tsc.h @@ -4,6 +4,8 @@ #include +#include "event.h" + struct perf_tsc_conversion { u16 time_shift; u32 time_mult; @@ -24,4 +26,6 @@ u64 perf_time_to_tsc(u64 ns, struct perf_tsc_conversion *tc); u64 tsc_to_perf_time(u64 cyc, struct perf_tsc_conversion *tc); u64 rdtsc(void); +size_t perf_event__fprintf_time_conv(union perf_event *event, FILE *fp); + #endif // __PERF_TSC_H -- cgit v1.2.3 From fbed59f844912f377b83cc25594c692b5f6ebae2 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Mon, 26 Apr 2021 15:27:32 -0300 Subject: perf build: Regenerate the FEATURE_DUMP file after extra feature checks Feature detection is done in tools/build/Makefile.feature, we may exit there with some features not detected and then, in tools/perf/Makefile.config try adding extra libraries to link and then do extra feature checks to see if we now find the feature. This is the case with the disassembler-four-args that checks if the diassembler() function in libopcodes (binutils) has a signature with one or with four arguments, as this is not ABI and they changed it at some point. This is not a problem when doing normal builds, for instance: $ make -C tools/perf O=/tmp/build/perf As we don't use what is in FEATURE-DUMP at that point, but is a problem if we pass FEATURE_DUMP=/previously-detected-features as we do in 'make -C tools/perf build-test' to reuse the feature detection in the many build combinations we test there. When that is done feature-disassembler-four-args will be set to 0, but opensuse 15.1 has the four arguments function signature in disassembler(). The build thus fails. Fix it by rewriting the FEATURE-DUMP file at the end of tools/perf/Makefile.config to register features we retested in that make file. Signed-off-by: Jiri Olsa Reported-by: Arnaldo Carvalho de Melo Tested-by: Arnaldo Carvalho de Melo Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Makefile.config | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config index 99c73a7b6a26..94e31fa75f42 100644 --- a/tools/perf/Makefile.config +++ b/tools/perf/Makefile.config @@ -1224,3 +1224,9 @@ $(call detected_var,LIBDIR) $(call detected_var,GTK_CFLAGS) $(call detected_var,PERL_EMBED_CCOPTS) $(call detected_var,PYTHON_EMBED_CCOPTS) + +# re-generate FEATURE-DUMP as we may have called feature_check, found out +# extra libraries to add to LDFLAGS of some other test and then redo those +# tests, see the block about libbfd, disassembler-four-args, for instance. +$(shell rm -f $(FEATURE_DUMP_FILENAME)) +$(foreach feat,$(FEATURE_TESTS),$(shell echo "$(call feature_assign,$(feat))" >> $(FEATURE_DUMP_FILENAME))) -- cgit v1.2.3 From 19177bc3da7e52bc7fb7e603556f98f06e074092 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 26 Apr 2021 17:01:24 -0300 Subject: tools build: Allow deferring printing the results of feature detection By setting FEATURE_DISPLAY_DEFERRED=1 a tool may ask for the printout of the detected features in tools/build/Makefile.feature to be done later adter extra feature checks are done that are tool specific. The perf tool will do it via its tools/perf/Makefile.config, as it performs such extra feature checks. Acked-by: Jiri Olsa Signed-off-by: Arnaldo Carvalho de Melo --- tools/build/Makefile.feature | 27 +++++++++++++++++---------- 1 file changed, 17 insertions(+), 10 deletions(-) diff --git a/tools/build/Makefile.feature b/tools/build/Makefile.feature index b3221a43fa65..04a8e3db8a54 100644 --- a/tools/build/Makefile.feature +++ b/tools/build/Makefile.feature @@ -240,17 +240,24 @@ ifeq ($(VF),1) feature_verbose := 1 endif -ifeq ($(feature_display),1) - $(info ) - $(info Auto-detecting system features:) - $(foreach feat,$(FEATURE_DISPLAY),$(call feature_print_status,$(feat),)) - ifneq ($(feature_verbose),1) +feature_display_entries = $(eval $(feature_display_entries_code)) +define feature_display_entries_code + ifeq ($(feature_display),1) $(info ) + $(info Auto-detecting system features:) + $(foreach feat,$(FEATURE_DISPLAY),$(call feature_print_status,$(feat),)) + ifneq ($(feature_verbose),1) + $(info ) + endif endif -endif -ifeq ($(feature_verbose),1) - TMP := $(filter-out $(FEATURE_DISPLAY),$(FEATURE_TESTS)) - $(foreach feat,$(TMP),$(call feature_print_status,$(feat),)) - $(info ) + ifeq ($(feature_verbose),1) + TMP := $(filter-out $(FEATURE_DISPLAY),$(FEATURE_TESTS)) + $(foreach feat,$(TMP),$(call feature_print_status,$(feat),)) + $(info ) + endif +endef + +ifeq ($(FEATURE_DISPLAY_DEFERRED),) + $(call feature_display_entries) endif -- cgit v1.2.3 From c6e3bf437184d41d885ba679eab0ddd43f95db56 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 26 Apr 2021 15:47:39 -0300 Subject: perf build: Defer printing detected features to the end of all feature checks We were doing it in tools/build/Makefile.feature, after running the feature checks, but then in tools/perf/Makefile.config we can call more feature checks when we notice that some feature check failed, like when libbfd wasn't detected and we add libraries to the LDFLAGS of its feature check to try again, etc. Acked-by: Jiri Olsa Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Makefile.config | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config index 94e31fa75f42..0d6619064a83 100644 --- a/tools/perf/Makefile.config +++ b/tools/perf/Makefile.config @@ -299,6 +299,9 @@ ifneq ($(TCMALLOC),) endif ifeq ($(FEATURES_DUMP),) +# We will display at the end of this Makefile.config, using $(call feature_display_entries) +# As we may retry some feature detection here, see the disassembler-four-args case, for instance + FEATURE_DISPLAY_DEFERRED := 1 include $(srctree)/tools/build/Makefile.feature else include $(FEATURES_DUMP) @@ -1230,3 +1233,7 @@ $(call detected_var,PYTHON_EMBED_CCOPTS) # tests, see the block about libbfd, disassembler-four-args, for instance. $(shell rm -f $(FEATURE_DUMP_FILENAME)) $(foreach feat,$(FEATURE_TESTS),$(shell echo "$(call feature_assign,$(feat))" >> $(FEATURE_DUMP_FILENAME))) + +ifeq ($(feature_display),1) + $(call feature_display_entries) +endif -- cgit v1.2.3