From 3149733584c8f0ab828eada539df7aa488c023a9 Mon Sep 17 00:00:00 2001 From: Ravi Bangoria Date: Sat, 11 Sep 2021 10:08:54 +0530 Subject: perf annotate: Add fusion logic for AMD microarchs AMD family 15h and above microarchs fuse a subset of cmp/test/ALU instructions with branch instructions[1][2]. Add perf annotate fused instruction support for these microarchs. Before: │ testb $0x80,0x51(%rax) │ ┌──jne 5b3 0.78 │ │ mov %r13,%rdi │ │→ callq mark_page_accessed 1.08 │5b3:└─→mov 0x8(%r13),%rax After: │ ┌──testb $0x80,0x51(%rax) │ ├──jne 5b3 0.78 │ │ mov %r13,%rdi │ │→ callq mark_page_accessed 1.08 │5b3:└─→mov 0x8(%r13),%rax [1] https://bugzilla.kernel.org/attachment.cgi?id=298553 [2] https://bugzilla.kernel.org/attachment.cgi?id=298555 Committer testing: On a: $ grep -m1 "model name" /proc/cpuinfo model name : AMD Ryzen 9 3900X 12-Core Processor $ Samples: 44K of event 'cycles', 4000 Hz, Event count (approx.): 7533249650 _int_malloc /usr/lib64/libc-2.33.so [Percent: local period] Percent│ ┌──test %eax,%eax │ ├──jne 884 │ │↓ jmpq 943 │ │ nop │878:│ add $0x10,%rdx 0.64 │ │ add %eax,%eax 0.57 │ │↓ je cc9 0.77 │884:└─→test %esi,%eax │ ↑ je 878 │ mov 0x18(%rdx),%r15 Reported-by: Kim Phillips Signed-off-by: Ravi Bangoria Tested-by: Arnaldo Carvalho de Melo Cc: Alexander Shishkin Cc: Jin Yao Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Link: https //lore.kernel.org/r/20210911043854.8373-2-ravi.bangoria@amd.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/x86/annotate/instructions.c | 28 +++++++++++++++++++++++++++- tools/perf/util/annotate.c | 1 - 2 files changed, 27 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/perf/arch/x86/annotate/instructions.c b/tools/perf/arch/x86/annotate/instructions.c index 24ea12ec7e02..305872692bfd 100644 --- a/tools/perf/arch/x86/annotate/instructions.c +++ b/tools/perf/arch/x86/annotate/instructions.c @@ -144,8 +144,31 @@ static struct ins x86__instructions[] = { { .name = "xorps", .ops = &mov_ops, }, }; -static bool x86__ins_is_fused(struct arch *arch, const char *ins1, +static bool amd__ins_is_fused(struct arch *arch, const char *ins1, const char *ins2) +{ + if (strstr(ins2, "jmp")) + return false; + + /* Family >= 15h supports cmp/test + branch fusion */ + if (arch->family >= 0x15 && (strstarts(ins1, "test") || + (strstarts(ins1, "cmp") && !strstr(ins1, "xchg")))) { + return true; + } + + /* Family >= 19h supports some ALU + branch fusion */ + if (arch->family >= 0x19 && (strstarts(ins1, "add") || + strstarts(ins1, "sub") || strstarts(ins1, "and") || + strstarts(ins1, "inc") || strstarts(ins1, "dec") || + strstarts(ins1, "or") || strstarts(ins1, "xor"))) { + return true; + } + + return false; +} + +static bool intel__ins_is_fused(struct arch *arch, const char *ins1, + const char *ins2) { if (arch->family != 6 || arch->model < 0x1e || strstr(ins2, "jmp")) return false; @@ -184,6 +207,9 @@ static int x86__cpuid_parse(struct arch *arch, char *cpuid) if (ret == 3) { arch->family = family; arch->model = model; + arch->ins_is_fused = strstarts(cpuid, "AuthenticAMD") ? + amd__ins_is_fused : + intel__ins_is_fused; return 0; } diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index 0bae061b2d6d..b55f35485e43 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -183,7 +183,6 @@ static struct arch architectures[] = { .init = x86__annotate_init, .instructions = x86__instructions, .nr_instructions = ARRAY_SIZE(x86__instructions), - .ins_is_fused = x86__ins_is_fused, .objdump = { .comment_char = '#', }, -- cgit v1.2.3 From ddf0d4dee4cbcabdf5161da3fe744b6cb149db88 Mon Sep 17 00:00:00 2001 From: Muhammad Falak R Wani Date: Sun, 15 Aug 2021 16:06:10 +0530 Subject: perf bpf: Deprecate bpf_map__resize() in favor of bpf_map_set_max_entries() As a part of libbpf 1.0 plan[0], this patch deprecates use of bpf_map__resize in favour of bpf_map__set_max_entries. Reference: https://github.com/libbpf/libbpf/issues/304 [0]: https://github.com/libbpf/libbpf/wiki/Libbpf:-the-road-to-v1.0#libbpfh-high-level-apis Signed-off-by: Muhammad Falak R Wani Acked-by: Andrii Nakryiko Cc: Alexander Shishkin Cc: Alexei Starovoitov Cc: Daniel Borkmann Cc: Jiri Olsa Cc: John Fastabend Cc: KP Singh Cc: Mark Rutland Cc: Martin KaFai Lau Cc: Muhammad Falak R Wani Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Quentin Monnet Cc: Song Liu Cc: Yu Kuai Link: http //lore.kernel.org/lkml/20210815103610.27887-1-falakreyaz@gmail.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/bpf_counter.c | 8 ++++---- tools/perf/util/bpf_counter_cgroup.c | 8 ++++---- 2 files changed, 8 insertions(+), 8 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/bpf_counter.c b/tools/perf/util/bpf_counter.c index ba0f20853651..ced2dac31dcf 100644 --- a/tools/perf/util/bpf_counter.c +++ b/tools/perf/util/bpf_counter.c @@ -127,9 +127,9 @@ static int bpf_program_profiler_load_one(struct evsel *evsel, u32 prog_id) skel->rodata->num_cpu = evsel__nr_cpus(evsel); - bpf_map__resize(skel->maps.events, evsel__nr_cpus(evsel)); - bpf_map__resize(skel->maps.fentry_readings, 1); - bpf_map__resize(skel->maps.accum_readings, 1); + bpf_map__set_max_entries(skel->maps.events, evsel__nr_cpus(evsel)); + bpf_map__set_max_entries(skel->maps.fentry_readings, 1); + bpf_map__set_max_entries(skel->maps.accum_readings, 1); prog_name = bpf_target_prog_name(prog_fd); if (!prog_name) { @@ -399,7 +399,7 @@ static int bperf_reload_leader_program(struct evsel *evsel, int attr_map_fd, return -1; } - bpf_map__resize(skel->maps.events, libbpf_num_possible_cpus()); + bpf_map__set_max_entries(skel->maps.events, libbpf_num_possible_cpus()); err = bperf_leader_bpf__load(skel); if (err) { pr_err("Failed to load leader skeleton\n"); diff --git a/tools/perf/util/bpf_counter_cgroup.c b/tools/perf/util/bpf_counter_cgroup.c index 89aa5e71db1a..cbc6c2bca488 100644 --- a/tools/perf/util/bpf_counter_cgroup.c +++ b/tools/perf/util/bpf_counter_cgroup.c @@ -65,14 +65,14 @@ static int bperf_load_program(struct evlist *evlist) /* we need one copy of events per cpu for reading */ map_size = total_cpus * evlist->core.nr_entries / nr_cgroups; - bpf_map__resize(skel->maps.events, map_size); - bpf_map__resize(skel->maps.cgrp_idx, nr_cgroups); + bpf_map__set_max_entries(skel->maps.events, map_size); + bpf_map__set_max_entries(skel->maps.cgrp_idx, nr_cgroups); /* previous result is saved in a per-cpu array */ map_size = evlist->core.nr_entries / nr_cgroups; - bpf_map__resize(skel->maps.prev_readings, map_size); + bpf_map__set_max_entries(skel->maps.prev_readings, map_size); /* cgroup result needs all events (per-cpu) */ map_size = evlist->core.nr_entries; - bpf_map__resize(skel->maps.cgrp_readings, map_size); + bpf_map__set_max_entries(skel->maps.cgrp_readings, map_size); set_max_rlimit(); -- cgit v1.2.3 From 00e0ca3721cf2ddcb38cf676a3de61933640d31d Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Tue, 14 Sep 2021 10:00:04 -0700 Subject: perf bpf: Ignore deprecation warning when using libbpf's btf__get_from_id() Perf code re-implements libbpf's btf__load_from_kernel_by_id() API as a weak function, presumably to dynamically link against old version of libbpf shared library. Unfortunately this causes compilation warning when perf is compiled against libbpf v0.6+. For now, just ignore deprecation warning, but there might be a better solution, depending on perf's needs. Signed-off-by: Andrii Nakryiko Cc: Alexei Starovoitov Cc: Daniel Borkmann Cc: kernel-team@fb.com LPU-Reference: 20210914170004.4185659-1-andrii@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/bpf-event.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'tools') diff --git a/tools/perf/util/bpf-event.c b/tools/perf/util/bpf-event.c index 683f6d63560e..1a7112a87736 100644 --- a/tools/perf/util/bpf-event.c +++ b/tools/perf/util/bpf-event.c @@ -24,7 +24,10 @@ struct btf * __weak btf__load_from_kernel_by_id(__u32 id) { struct btf *btf; +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wdeprecated-declarations" int err = btf__get_from_id(id, &btf); +#pragma GCC diagnostic pop return err ? ERR_PTR(err) : btf; } -- cgit v1.2.3 From 8228e9361e2a7447eaed87499123e85871c8bc18 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Wed, 15 Sep 2021 14:14:28 -0700 Subject: perf parse-events: Avoid enum forward declaration. Enum forward declarations aren't allowed as the size can't be implied. Switch to just using an int. This fixes a clang warning: In file included from tools/perf/bench/evlist-open-close.c:13: tools/perf/bench/../util/parse-events.h:185:6: error: redeclaration of already-defined enum 'perf_tool_event' is a GNU extension [-Werror,-Wgnu-redeclared-enum] enum perf_tool_event; ^ tools/perf/bench/../util/evsel.h:28:6: note: previous definition is here enum perf_tool_event { ^ Signed-off-by: Ian Rogers Cc: Alexander Shishkin Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Cc: Nathan Chancellor Cc: Nick Desaulniers Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/20210915211428.1773567-1-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/parse-events.c | 2 +- tools/perf/util/parse-events.h | 3 +-- 2 files changed, 2 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 51a2219df601..5d1346aa0627 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -1471,7 +1471,7 @@ out_free_terms: int parse_events_add_tool(struct parse_events_state *parse_state, struct list_head *list, - enum perf_tool_event tool_event) + int tool_event) { return add_event_tool(list, &parse_state->idx, tool_event); } diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h index bf6e41aa9b6a..b32ed3064c49 100644 --- a/tools/perf/util/parse-events.h +++ b/tools/perf/util/parse-events.h @@ -182,10 +182,9 @@ int parse_events_add_numeric(struct parse_events_state *parse_state, struct list_head *list, u32 type, u64 config, struct list_head *head_config); -enum perf_tool_event; int parse_events_add_tool(struct parse_events_state *parse_state, struct list_head *list, - enum perf_tool_event tool_event); + int tool_event); int parse_events_add_cache(struct list_head *list, int *idx, char *type, char *op_result1, char *op_result2, struct parse_events_error *error, -- cgit v1.2.3 From 84111b9c950ec9a8b31166973e79aa77ddcee7e3 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Tue, 10 Aug 2021 21:46:57 -0700 Subject: perf tools: Allow controlling synthesizing PERF_RECORD_ metadata events during record Depending on the use case, it might require some kind of synthesizing and some not. Make it controllable to turn off heavy operations like MMAP for all tasks. Currently all users are converted to enable all the synthesis by default. It'll be updated in the later patch. Signed-off-by: Namhyung Kim Acked-by: Jiri Olsa Cc: Adrian Hunter Cc: Andi Kleen Cc: Ian Rogers Cc: Ingo Molnar Cc: Jin Yao Cc: Peter Zijlstra Cc: Stephane Eranian Link: https //lore.kernel.org/r/20210811044658.1313391-1-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/bench/synthesize.c | 4 ++-- tools/perf/builtin-kvm.c | 2 +- tools/perf/builtin-record.c | 6 +++-- tools/perf/builtin-top.c | 2 +- tools/perf/builtin-trace.c | 4 ++-- tools/perf/tests/code-reading.c | 3 ++- tools/perf/tests/mmap-thread-lookup.c | 4 ++-- tools/perf/util/synthetic-events.c | 45 ++++++++++++++++++++--------------- tools/perf/util/synthetic-events.h | 8 +++---- 9 files changed, 44 insertions(+), 34 deletions(-) (limited to 'tools') diff --git a/tools/perf/bench/synthesize.c b/tools/perf/bench/synthesize.c index 05f7c923c745..7401ebbac100 100644 --- a/tools/perf/bench/synthesize.c +++ b/tools/perf/bench/synthesize.c @@ -80,7 +80,7 @@ static int do_run_single_threaded(struct perf_session *session, NULL, target, threads, process_synthesized_event, - data_mmap, + true, data_mmap, nr_threads_synthesize); if (err) return err; @@ -171,7 +171,7 @@ static int do_run_multi_threaded(struct target *target, NULL, target, NULL, process_synthesized_event, - false, + true, false, nr_threads_synthesize); if (err) { perf_session__delete(session); diff --git a/tools/perf/builtin-kvm.c b/tools/perf/builtin-kvm.c index aa1b127ffb5b..c6f352ee57e6 100644 --- a/tools/perf/builtin-kvm.c +++ b/tools/perf/builtin-kvm.c @@ -1456,7 +1456,7 @@ static int kvm_events_live(struct perf_kvm_stat *kvm, perf_session__set_id_hdr_size(kvm->session); ordered_events__set_copy_on_queue(&kvm->session->ordered_events, true); machine__synthesize_threads(&kvm->session->machines.host, &kvm->opts.target, - kvm->evlist->core.threads, false, 1); + kvm->evlist->core.threads, true, false, 1); err = kvm_live_open_events(kvm); if (err) goto out; diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index b3509d9d20cc..0263e383332f 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -1266,6 +1266,7 @@ static int record__synthesize_workload(struct record *rec, bool tail) err = perf_event__synthesize_thread_map(&rec->tool, thread_map, process_synthesized_event, &rec->session->machines.host, + true, rec->opts.sample_address); perf_thread_map__put(thread_map); return err; @@ -1480,8 +1481,9 @@ static int record__synthesize(struct record *rec, bool tail) f = process_locked_synthesized_event; } - err = __machine__synthesize_threads(machine, tool, &opts->target, rec->evlist->core.threads, - f, opts->sample_address, + err = __machine__synthesize_threads(machine, tool, &opts->target, + rec->evlist->core.threads, + f, true, opts->sample_address, rec->opts.nr_threads_synthesize); if (rec->opts.nr_threads_synthesize > 1) diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index a3ae9176a83e..020c4f110c10 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -1271,7 +1271,7 @@ static int __cmd_top(struct perf_top *top) pr_debug("Couldn't synthesize cgroup events.\n"); machine__synthesize_threads(&top->session->machines.host, &opts->target, - top->evlist->core.threads, false, + top->evlist->core.threads, true, false, top->nr_threads_synthesize); if (top->nr_threads_synthesize > 1) diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 2bf21194c7b3..2f1d20553a0a 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -1628,8 +1628,8 @@ static int trace__symbols_init(struct trace *trace, struct evlist *evlist) goto out; err = __machine__synthesize_threads(trace->host, &trace->tool, &trace->opts.target, - evlist->core.threads, trace__tool_process, false, - 1); + evlist->core.threads, trace__tool_process, + true, false, 1); out: if (err) symbol__exit(); diff --git a/tools/perf/tests/code-reading.c b/tools/perf/tests/code-reading.c index 9866cddebf23..3a4d932e7ffc 100644 --- a/tools/perf/tests/code-reading.c +++ b/tools/perf/tests/code-reading.c @@ -606,7 +606,8 @@ static int do_test_code_reading(bool try_kcore) } ret = perf_event__synthesize_thread_map(NULL, threads, - perf_event__process, machine, false); + perf_event__process, machine, + true, false); if (ret < 0) { pr_debug("perf_event__synthesize_thread_map failed\n"); goto out_err; diff --git a/tools/perf/tests/mmap-thread-lookup.c b/tools/perf/tests/mmap-thread-lookup.c index 8d9d4cbff76d..6f2da7a72f67 100644 --- a/tools/perf/tests/mmap-thread-lookup.c +++ b/tools/perf/tests/mmap-thread-lookup.c @@ -135,7 +135,7 @@ static int synth_all(struct machine *machine) { return perf_event__synthesize_threads(NULL, perf_event__process, - machine, 0, 1); + machine, 1, 0, 1); } static int synth_process(struct machine *machine) @@ -147,7 +147,7 @@ static int synth_process(struct machine *machine) err = perf_event__synthesize_thread_map(NULL, map, perf_event__process, - machine, 0); + machine, 1, 0); perf_thread_map__put(map); return err; diff --git a/tools/perf/util/synthetic-events.c b/tools/perf/util/synthetic-events.c index a7e981b2d7de..a7a2825356d6 100644 --- a/tools/perf/util/synthetic-events.c +++ b/tools/perf/util/synthetic-events.c @@ -715,7 +715,8 @@ static int __event__synthesize_thread(union perf_event *comm_event, union perf_event *fork_event, union perf_event *namespaces_event, pid_t pid, int full, perf_event__handler_t process, - struct perf_tool *tool, struct machine *machine, bool mmap_data) + struct perf_tool *tool, struct machine *machine, + bool needs_mmap, bool mmap_data) { char filename[PATH_MAX]; struct dirent **dirent; @@ -739,7 +740,7 @@ static int __event__synthesize_thread(union perf_event *comm_event, * send mmap only for thread group leader * see thread__init_maps() */ - if (pid == tgid && + if (pid == tgid && needs_mmap && perf_event__synthesize_mmap_events(tool, mmap_event, pid, tgid, process, machine, mmap_data)) return -1; @@ -786,7 +787,7 @@ static int __event__synthesize_thread(union perf_event *comm_event, break; rc = 0; - if (_pid == pid && !kernel_thread) { + if (_pid == pid && !kernel_thread && needs_mmap) { /* process the parent's maps too */ rc = perf_event__synthesize_mmap_events(tool, mmap_event, pid, tgid, process, machine, mmap_data); @@ -806,7 +807,7 @@ int perf_event__synthesize_thread_map(struct perf_tool *tool, struct perf_thread_map *threads, perf_event__handler_t process, struct machine *machine, - bool mmap_data) + bool needs_mmap, bool mmap_data) { union perf_event *comm_event, *mmap_event, *fork_event; union perf_event *namespaces_event; @@ -836,7 +837,7 @@ int perf_event__synthesize_thread_map(struct perf_tool *tool, fork_event, namespaces_event, perf_thread_map__pid(threads, thread), 0, process, tool, machine, - mmap_data)) { + needs_mmap, mmap_data)) { err = -1; break; } @@ -862,7 +863,7 @@ int perf_event__synthesize_thread_map(struct perf_tool *tool, fork_event, namespaces_event, comm_event->comm.pid, 0, process, tool, machine, - mmap_data)) { + needs_mmap, mmap_data)) { err = -1; break; } @@ -882,6 +883,7 @@ out: static int __perf_event__synthesize_threads(struct perf_tool *tool, perf_event__handler_t process, struct machine *machine, + bool needs_mmap, bool mmap_data, struct dirent **dirent, int start, @@ -926,7 +928,7 @@ static int __perf_event__synthesize_threads(struct perf_tool *tool, */ __event__synthesize_thread(comm_event, mmap_event, fork_event, namespaces_event, pid, 1, process, - tool, machine, mmap_data); + tool, machine, needs_mmap, mmap_data); } err = 0; @@ -945,6 +947,7 @@ struct synthesize_threads_arg { struct perf_tool *tool; perf_event__handler_t process; struct machine *machine; + bool needs_mmap; bool mmap_data; struct dirent **dirent; int num; @@ -956,7 +959,8 @@ static void *synthesize_threads_worker(void *arg) struct synthesize_threads_arg *args = arg; __perf_event__synthesize_threads(args->tool, args->process, - args->machine, args->mmap_data, + args->machine, + args->needs_mmap, args->mmap_data, args->dirent, args->start, args->num); return NULL; @@ -965,7 +969,7 @@ static void *synthesize_threads_worker(void *arg) int perf_event__synthesize_threads(struct perf_tool *tool, perf_event__handler_t process, struct machine *machine, - bool mmap_data, + bool needs_mmap, bool mmap_data, unsigned int nr_threads_synthesize) { struct synthesize_threads_arg *args = NULL; @@ -994,7 +998,8 @@ int perf_event__synthesize_threads(struct perf_tool *tool, if (thread_nr <= 1) { err = __perf_event__synthesize_threads(tool, process, - machine, mmap_data, + machine, + needs_mmap, mmap_data, dirent, base, n); goto free_dirent; } @@ -1015,6 +1020,7 @@ int perf_event__synthesize_threads(struct perf_tool *tool, args[i].tool = tool; args[i].process = process; args[i].machine = machine; + args[i].needs_mmap = needs_mmap; args[i].mmap_data = mmap_data; args[i].dirent = dirent; } @@ -1775,26 +1781,27 @@ out_err: int __machine__synthesize_threads(struct machine *machine, struct perf_tool *tool, struct target *target, struct perf_thread_map *threads, - perf_event__handler_t process, bool data_mmap, - unsigned int nr_threads_synthesize) + perf_event__handler_t process, bool needs_mmap, + bool data_mmap, unsigned int nr_threads_synthesize) { if (target__has_task(target)) - return perf_event__synthesize_thread_map(tool, threads, process, machine, data_mmap); + return perf_event__synthesize_thread_map(tool, threads, process, machine, + needs_mmap, data_mmap); else if (target__has_cpu(target)) - return perf_event__synthesize_threads(tool, process, - machine, data_mmap, + return perf_event__synthesize_threads(tool, process, machine, + needs_mmap, data_mmap, nr_threads_synthesize); /* command specified */ return 0; } int machine__synthesize_threads(struct machine *machine, struct target *target, - struct perf_thread_map *threads, bool data_mmap, - unsigned int nr_threads_synthesize) + struct perf_thread_map *threads, bool needs_mmap, + bool data_mmap, unsigned int nr_threads_synthesize) { return __machine__synthesize_threads(machine, NULL, target, threads, - perf_event__process, data_mmap, - nr_threads_synthesize); + perf_event__process, needs_mmap, + data_mmap, nr_threads_synthesize); } static struct perf_record_event_update *event_update_event__new(size_t size, u64 type, u64 id) diff --git a/tools/perf/util/synthetic-events.h b/tools/perf/util/synthetic-events.h index c845e2b9b444..44f72d56ca4d 100644 --- a/tools/perf/util/synthetic-events.h +++ b/tools/perf/util/synthetic-events.h @@ -53,8 +53,8 @@ int perf_event__synthesize_stat_events(struct perf_stat_config *config, struct p int perf_event__synthesize_stat_round(struct perf_tool *tool, u64 time, u64 type, perf_event__handler_t process, struct machine *machine); int perf_event__synthesize_stat(struct perf_tool *tool, u32 cpu, u32 thread, u64 id, struct perf_counts_values *count, perf_event__handler_t process, struct machine *machine); int perf_event__synthesize_thread_map2(struct perf_tool *tool, struct perf_thread_map *threads, perf_event__handler_t process, struct machine *machine); -int perf_event__synthesize_thread_map(struct perf_tool *tool, struct perf_thread_map *threads, perf_event__handler_t process, struct machine *machine, bool mmap_data); -int perf_event__synthesize_threads(struct perf_tool *tool, perf_event__handler_t process, struct machine *machine, bool mmap_data, unsigned int nr_threads_synthesize); +int perf_event__synthesize_thread_map(struct perf_tool *tool, struct perf_thread_map *threads, perf_event__handler_t process, struct machine *machine, bool needs_mmap, bool mmap_data); +int perf_event__synthesize_threads(struct perf_tool *tool, perf_event__handler_t process, struct machine *machine, bool needs_mmap, bool mmap_data, unsigned int nr_threads_synthesize); int perf_event__synthesize_tracing_data(struct perf_tool *tool, int fd, struct evlist *evlist, perf_event__handler_t process); int perf_event__synth_time_conv(const struct perf_event_mmap_page *pc, struct perf_tool *tool, perf_event__handler_t process, struct machine *machine); pid_t perf_event__synthesize_comm(struct perf_tool *tool, union perf_event *event, pid_t pid, perf_event__handler_t process, struct machine *machine); @@ -65,10 +65,10 @@ size_t perf_event__sample_event_size(const struct perf_sample *sample, u64 type, int __machine__synthesize_threads(struct machine *machine, struct perf_tool *tool, struct target *target, struct perf_thread_map *threads, - perf_event__handler_t process, bool data_mmap, + perf_event__handler_t process, bool needs_mmap, bool data_mmap, unsigned int nr_threads_synthesize); int machine__synthesize_threads(struct machine *machine, struct target *target, - struct perf_thread_map *threads, bool data_mmap, + struct perf_thread_map *threads, bool needs_mmap, bool data_mmap, unsigned int nr_threads_synthesize); #ifdef HAVE_AUXTRACE_SUPPORT -- cgit v1.2.3 From 41b740b6e8a994e5830daa5e15785522874f7456 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Tue, 10 Aug 2021 21:46:58 -0700 Subject: perf record: Add --synth option Add an option to control the synthesizing behavior. --synth Fine-tune event synthesis: default=all This can be useful when we know it doesn't need some synthesis like in a specific usecase and/or when using pipe: $ perf record -a --all-cgroups --synth cgroup -o- sleep 1 | \ > perf report -i- -s cgroup Committer notes: Added a clarification to the man page entry for --synth that this is about pre-existing threads. Signed-off-by: Namhyung Kim Acked-by: Jiri Olsa Cc: Adrian Hunter Cc: Andi Kleen Cc: Ian Rogers Cc: Ingo Molnar Cc: Jin Yao Cc: Peter Zijlstra Cc: Stephane Eranian Link: https //lore.kernel.org/r/20210811044658.1313391-2-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-record.txt | 16 +++++++++++ tools/perf/builtin-record.c | 48 ++++++++++++++++++++++++++------ tools/perf/util/record.h | 1 + tools/perf/util/synthetic-events.c | 28 +++++++++++++++++++ tools/perf/util/synthetic-events.h | 12 ++++++++ 5 files changed, 96 insertions(+), 9 deletions(-) (limited to 'tools') diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt index f1079ee7f2ec..2d7df8703cf2 100644 --- a/tools/perf/Documentation/perf-record.txt +++ b/tools/perf/Documentation/perf-record.txt @@ -596,6 +596,22 @@ options. 'perf record --dry-run -e' can act as a BPF script compiler if llvm.dump-obj in config file is set to true. +--synth=TYPE:: +Collect and synthesize given type of events (comma separated). Note that +this option controls the synthesis from the /proc filesystem which represent +task status for pre-existing threads. + +Kernel (and some other) events are recorded regardless of the +choice in this option. For example, --synth=no would have MMAP events for +kernel and modules. + +Available types are: + 'task' - synthesize FORK and COMM events for each task + 'mmap' - synthesize MMAP events for each process (implies 'task') + 'cgroup' - synthesize CGROUP events for each cgroup + 'all' - synthesize all events (default) + 'no' - do not synthesize any of the above events + --tail-synthesize:: Instead of collecting non-sample events (for example, fork, comm, mmap) at the beginning of record, collect them during finalizing an output file. diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 0263e383332f..41bb884f5a74 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -1255,6 +1255,7 @@ static int record__synthesize_workload(struct record *rec, bool tail) { int err; struct perf_thread_map *thread_map; + bool needs_mmap = rec->opts.synth & PERF_SYNTH_MMAP; if (rec->opts.tail_synthesize != tail) return 0; @@ -1266,7 +1267,7 @@ static int record__synthesize_workload(struct record *rec, bool tail) err = perf_event__synthesize_thread_map(&rec->tool, thread_map, process_synthesized_event, &rec->session->machines.host, - true, + needs_mmap, rec->opts.sample_address); perf_thread_map__put(thread_map); return err; @@ -1471,20 +1472,26 @@ static int record__synthesize(struct record *rec, bool tail) if (err < 0) pr_warning("Couldn't synthesize bpf events.\n"); - err = perf_event__synthesize_cgroups(tool, process_synthesized_event, - machine); - if (err < 0) - pr_warning("Couldn't synthesize cgroup events.\n"); + if (rec->opts.synth & PERF_SYNTH_CGROUP) { + err = perf_event__synthesize_cgroups(tool, process_synthesized_event, + machine); + if (err < 0) + pr_warning("Couldn't synthesize cgroup events.\n"); + } if (rec->opts.nr_threads_synthesize > 1) { perf_set_multithreaded(); f = process_locked_synthesized_event; } - err = __machine__synthesize_threads(machine, tool, &opts->target, - rec->evlist->core.threads, - f, true, opts->sample_address, - rec->opts.nr_threads_synthesize); + if (rec->opts.synth & PERF_SYNTH_TASK) { + bool needs_mmap = rec->opts.synth & PERF_SYNTH_MMAP; + + err = __machine__synthesize_threads(machine, tool, &opts->target, + rec->evlist->core.threads, + f, needs_mmap, opts->sample_address, + rec->opts.nr_threads_synthesize); + } if (rec->opts.nr_threads_synthesize > 1) perf_set_singlethreaded(); @@ -2393,6 +2400,26 @@ static int process_timestamp_boundary(struct perf_tool *tool, return 0; } +static int parse_record_synth_option(const struct option *opt, + const char *str, + int unset __maybe_unused) +{ + struct record_opts *opts = opt->value; + char *p = strdup(str); + + if (p == NULL) + return -1; + + opts->synth = parse_synth_opt(p); + free(p); + + if (opts->synth < 0) { + pr_err("Invalid synth option: %s\n", str); + return -1; + } + return 0; +} + /* * XXX Ideally would be local to cmd_record() and passed to a record__new * because we need to have access to it in record__exit, that is called @@ -2418,6 +2445,7 @@ static struct record record = { .nr_threads_synthesize = 1, .ctl_fd = -1, .ctl_fd_ack = -1, + .synth = PERF_SYNTH_ALL, }, .tool = { .sample = process_sample_event, @@ -2633,6 +2661,8 @@ static struct option __record_options[] = { "\t\t\t Optionally send control command completion ('ack\\n') to ack-fd descriptor.\n" "\t\t\t Alternatively, ctl-fifo / ack-fifo will be opened and used as ctl-fd / ack-fd.", parse_control_option), + OPT_CALLBACK(0, "synth", &record.opts, "no|all|task|mmap|cgroup", + "Fine-tune event synthesis: default=all", parse_record_synth_option), OPT_END() }; diff --git a/tools/perf/util/record.h b/tools/perf/util/record.h index 68f471d9a88b..ef6c2715fdd9 100644 --- a/tools/perf/util/record.h +++ b/tools/perf/util/record.h @@ -77,6 +77,7 @@ struct record_opts { int ctl_fd; int ctl_fd_ack; bool ctl_fd_close; + int synth; }; extern const char * const *record_usage; diff --git a/tools/perf/util/synthetic-events.c b/tools/perf/util/synthetic-events.c index a7a2825356d6..198982109f0f 100644 --- a/tools/perf/util/synthetic-events.c +++ b/tools/perf/util/synthetic-events.c @@ -2237,3 +2237,31 @@ int perf_event__synthesize_for_pipe(struct perf_tool *tool, return ret; } + +int parse_synth_opt(char *synth) +{ + char *p, *q; + int ret = 0; + + if (synth == NULL) + return -1; + + for (q = synth; (p = strsep(&q, ",")); p = q) { + if (!strcasecmp(p, "no") || !strcasecmp(p, "none")) + return 0; + + if (!strcasecmp(p, "all")) + return PERF_SYNTH_ALL; + + if (!strcasecmp(p, "task")) + ret |= PERF_SYNTH_TASK; + else if (!strcasecmp(p, "mmap")) + ret |= PERF_SYNTH_TASK | PERF_SYNTH_MMAP; + else if (!strcasecmp(p, "cgroup")) + ret |= PERF_SYNTH_CGROUP; + else + return -1; + } + + return ret; +} diff --git a/tools/perf/util/synthetic-events.h b/tools/perf/util/synthetic-events.h index 44f72d56ca4d..c931433bacbf 100644 --- a/tools/perf/util/synthetic-events.h +++ b/tools/perf/util/synthetic-events.h @@ -27,6 +27,18 @@ struct target; union perf_event; +enum perf_record_synth { + PERF_SYNTH_TASK = 1 << 0, + PERF_SYNTH_MMAP = 1 << 1, + PERF_SYNTH_CGROUP = 1 << 2, + + /* last element */ + PERF_SYNTH_MAX = 1 << 3, +}; +#define PERF_SYNTH_ALL (PERF_SYNTH_MAX - 1) + +int parse_synth_opt(char *str); + typedef int (*perf_event__handler_t)(struct perf_tool *tool, union perf_event *event, struct perf_sample *sample, struct machine *machine); -- cgit v1.2.3 From b28e5e439109fe6fd9fa047654ae99d0b7bc5ccc Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Wed, 16 Jun 2021 22:55:54 -0700 Subject: perf daemon: Avoid msan warnings on send_cmd As a full union is always sent, ensure all bytes of the union are initialized with memset to avoid msan warnings of use of uninitialized memory. An example warning from the daemon test: Uninitialized bytes in __interceptor_write at offset 71 inside [0x7ffd98da6280, 72) ==11602==WARNING: MemorySanitizer: use-of-uninitialized-value #0 0x5597edccdbe4 in ion tools/lib/perf/lib.c:18:6 #1 0x5597edccdbe4 in writen tools/lib/perf/lib.c:47:9 #2 0x5597ed221d30 in send_cmd tools/perf/builtin-daemon.c:1376:22 #3 0x5597ed21b48c in cmd_daemon tools/perf/builtin-daemon.c #4 0x5597ed1d6b67 in run_builtin tools/perf/perf.c:313:11 #5 0x5597ed1d6036 in handle_internal_command tools/perf/perf.c:365:8 #6 0x5597ed1d6036 in run_argv tools/perf/perf.c:409:2 #7 0x5597ed1d6036 in main tools/perf/perf.c:539:3 SUMMARY: MemorySanitizer: use-of-uninitialized-value tools/lib/perf/lib.c:18:6 in ion Exiting Signed-off-by: Ian Rogers Cc: Alexander Shishkin Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/20210617055554.1917997-1-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-daemon.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/perf/builtin-daemon.c b/tools/perf/builtin-daemon.c index 61929f63a047..c13201fb09c3 100644 --- a/tools/perf/builtin-daemon.c +++ b/tools/perf/builtin-daemon.c @@ -1403,8 +1403,10 @@ out: static int send_cmd_list(struct daemon *daemon) { - union cmd cmd = { .cmd = CMD_LIST, }; + union cmd cmd; + memset(&cmd, 0, sizeof(cmd)); + cmd.list.cmd = CMD_LIST; cmd.list.verbose = verbose; cmd.list.csv_sep = daemon->csv_sep ? *daemon->csv_sep : 0; @@ -1432,6 +1434,7 @@ static int __cmd_signal(struct daemon *daemon, struct option parent_options[], return -1; } + memset(&cmd, 0, sizeof(cmd)); cmd.signal.cmd = CMD_SIGNAL, cmd.signal.sig = SIGUSR2; strncpy(cmd.signal.name, name, sizeof(cmd.signal.name) - 1); @@ -1446,7 +1449,7 @@ static int __cmd_stop(struct daemon *daemon, struct option parent_options[], OPT_PARENT(parent_options), OPT_END() }; - union cmd cmd = { .cmd = CMD_STOP, }; + union cmd cmd; argc = parse_options(argc, argv, start_options, daemon_usage, 0); if (argc) @@ -1457,6 +1460,8 @@ static int __cmd_stop(struct daemon *daemon, struct option parent_options[], return -1; } + memset(&cmd, 0, sizeof(cmd)); + cmd.cmd = CMD_STOP; return send_cmd(daemon, &cmd); } @@ -1470,7 +1475,7 @@ static int __cmd_ping(struct daemon *daemon, struct option parent_options[], OPT_PARENT(parent_options), OPT_END() }; - union cmd cmd = { .cmd = CMD_PING, }; + union cmd cmd; argc = parse_options(argc, argv, ping_options, daemon_usage, 0); if (argc) @@ -1481,6 +1486,8 @@ static int __cmd_ping(struct daemon *daemon, struct option parent_options[], return -1; } + memset(&cmd, 0, sizeof(cmd)); + cmd.cmd = CMD_PING; scnprintf(cmd.ping.name, sizeof(cmd.ping.name), "%s", name); return send_cmd(daemon, &cmd); } -- cgit v1.2.3 From cb7bfb1da6f609bf954d5f164733ff35b1cb4d4e Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Wed, 27 Jan 2021 10:46:29 -0800 Subject: perf parse-events: Remove unnecessary #includes Minor cleanup motivated by trying to separately fuzz test parse-events. Signed-off-by: Ian Rogers Acked-by: Namhyung Kim Cc: Alexander Shishkin Cc: Andi Kleen Cc: Jin Yao Cc: Jiri Olsa Cc: Leo Yan Cc: Mark Rutland Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lore.kernel.org/lkml/20210127184629.516169-1-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/parse-events.c | 2 -- tools/perf/util/parse-events.l | 1 - 2 files changed, 3 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 5d1346aa0627..85d3d77d3c6a 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -19,8 +19,6 @@ #include #include "string2.h" #include "strlist.h" -#include "symbol.h" -#include "header.h" #include "bpf-loader.h" #include "debug.h" #include diff --git a/tools/perf/util/parse-events.l b/tools/perf/util/parse-events.l index 923849024b15..47da7a0c5df4 100644 --- a/tools/perf/util/parse-events.l +++ b/tools/perf/util/parse-events.l @@ -12,7 +12,6 @@ #include #include #include -#include "../perf.h" #include "parse-events.h" #include "parse-events-bison.h" #include "evsel.h" -- cgit v1.2.3 From 6c93f39f2f435d822c2f765650f405acebdc49fc Mon Sep 17 00:00:00 2001 From: Jin Yao Date: Thu, 9 Sep 2021 14:18:44 +0800 Subject: perf list: Display pmu prefix for partially supported hybrid cache events Part of hardware cache events are only available on one CPU PMU. For example, 'L1-dcache-load-misses' is only available on cpu_core. perf list should clearly report this info. root@otcpl-adl-s-2:~# ./perf list Before: L1-dcache-load-misses [Hardware cache event] L1-dcache-loads [Hardware cache event] L1-dcache-stores [Hardware cache event] L1-icache-load-misses [Hardware cache event] L1-icache-loads [Hardware cache event] LLC-load-misses [Hardware cache event] LLC-loads [Hardware cache event] LLC-store-misses [Hardware cache event] LLC-stores [Hardware cache event] branch-load-misses [Hardware cache event] branch-loads [Hardware cache event] dTLB-load-misses [Hardware cache event] dTLB-loads [Hardware cache event] dTLB-store-misses [Hardware cache event] dTLB-stores [Hardware cache event] iTLB-load-misses [Hardware cache event] node-load-misses [Hardware cache event] node-loads [Hardware cache event] node-store-misses [Hardware cache event] node-stores [Hardware cache event] After: L1-dcache-loads [Hardware cache event] L1-dcache-stores [Hardware cache event] L1-icache-load-misses [Hardware cache event] LLC-load-misses [Hardware cache event] LLC-loads [Hardware cache event] LLC-store-misses [Hardware cache event] LLC-stores [Hardware cache event] branch-load-misses [Hardware cache event] branch-loads [Hardware cache event] cpu_atom/L1-icache-loads/ [Hardware cache event] cpu_core/L1-dcache-load-misses/ [Hardware cache event] cpu_core/node-load-misses/ [Hardware cache event] cpu_core/node-loads/ [Hardware cache event] dTLB-load-misses [Hardware cache event] dTLB-loads [Hardware cache event] dTLB-store-misses [Hardware cache event] dTLB-stores [Hardware cache event] iTLB-load-misses [Hardware cache event] Now we can clearly see 'L1-dcache-load-misses' is only available on cpu_core. If without pmu prefix, it indicates the event is available on both cpu_core and cpu_atom. Signed-off-by: Jin Yao Cc: Alexander Shishkin Cc: Andi Kleen Cc: Jin Yao Cc: Jiri Olsa Cc: Kan Liang Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/20210909061844.10221-1-yao.jin@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/parse-events.c | 76 +++++++++++++++++++++++++++++++++++++----- 1 file changed, 68 insertions(+), 8 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 85d3d77d3c6a..067f830dea4b 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -2701,7 +2701,7 @@ next: return 0; } -static bool is_event_supported(u8 type, unsigned config) +static bool is_event_supported(u8 type, u64 config) { bool ret = true; int open_return; @@ -2821,10 +2821,18 @@ void print_sdt_events(const char *subsys_glob, const char *event_glob, int print_hwcache_events(const char *event_glob, bool name_only) { - unsigned int type, op, i, evt_i = 0, evt_num = 0; - char name[64]; - char **evt_list = NULL; + unsigned int type, op, i, evt_i = 0, evt_num = 0, npmus = 0; + char name[64], new_name[128]; + char **evt_list = NULL, **evt_pmus = NULL; bool evt_num_known = false; + struct perf_pmu *pmu = NULL; + + if (perf_pmu__has_hybrid()) { + npmus = perf_pmu__hybrid_pmu_num(); + evt_pmus = zalloc(sizeof(char *) * npmus); + if (!evt_pmus) + goto out_enomem; + } restart: if (evt_num_known) { @@ -2840,20 +2848,61 @@ restart: continue; for (i = 0; i < PERF_COUNT_HW_CACHE_RESULT_MAX; i++) { + unsigned int hybrid_supported = 0, j; + bool supported; + __evsel__hw_cache_type_op_res_name(type, op, i, name, sizeof(name)); if (event_glob != NULL && !strglobmatch(name, event_glob)) continue; - if (!is_event_supported(PERF_TYPE_HW_CACHE, - type | (op << 8) | (i << 16))) - continue; + if (!perf_pmu__has_hybrid()) { + if (!is_event_supported(PERF_TYPE_HW_CACHE, + type | (op << 8) | (i << 16))) { + continue; + } + } else { + perf_pmu__for_each_hybrid_pmu(pmu) { + if (!evt_num_known) { + evt_num++; + continue; + } + + supported = is_event_supported( + PERF_TYPE_HW_CACHE, + type | (op << 8) | (i << 16) | + ((__u64)pmu->type << PERF_PMU_TYPE_SHIFT)); + if (supported) { + snprintf(new_name, sizeof(new_name), "%s/%s/", + pmu->name, name); + evt_pmus[hybrid_supported] = strdup(new_name); + hybrid_supported++; + } + } + + if (hybrid_supported == 0) + continue; + } if (!evt_num_known) { evt_num++; continue; } - evt_list[evt_i] = strdup(name); + if ((hybrid_supported == 0) || + (hybrid_supported == npmus)) { + evt_list[evt_i] = strdup(name); + if (npmus > 0) { + for (j = 0; j < npmus; j++) + zfree(&evt_pmus[j]); + } + } else { + for (j = 0; j < hybrid_supported; j++) { + evt_list[evt_i++] = evt_pmus[j]; + evt_pmus[j] = NULL; + } + continue; + } + if (evt_list[evt_i] == NULL) goto out_enomem; evt_i++; @@ -2865,6 +2914,13 @@ restart: evt_num_known = true; goto restart; } + + for (evt_i = 0; evt_i < evt_num; evt_i++) { + if (!evt_list[evt_i]) + break; + } + + evt_num = evt_i; qsort(evt_list, evt_num, sizeof(char *), cmp_string); evt_i = 0; while (evt_i < evt_num) { @@ -2883,6 +2939,10 @@ out_free: for (evt_i = 0; evt_i < evt_num; evt_i++) zfree(&evt_list[evt_i]); zfree(&evt_list); + + for (evt_i = 0; evt_i < npmus; evt_i++) + zfree(&evt_pmus[evt_i]); + zfree(&evt_pmus); return evt_num; out_enomem: -- cgit v1.2.3 From 0ba37e05c240c7b38e5a327a96f404798a8698ff Mon Sep 17 00:00:00 2001 From: William Cohen Date: Sun, 26 Sep 2021 20:51:15 -0400 Subject: perf annotate: Add riscv64 support This patch adds basic arch initialization and instruction associate support for the riscv64 CPU architecture. Example output: $ perf annotate --stdio2 Samples: 122K of event 'task-clock:u', 4000 Hz, Event count (approx.): 30637250000, [percent: local period] strcmp() /usr/lib64/libc-2.32.so Percent Disassembly of section .text: 0000000000069a30 : __GI_strcmp(): const unsigned char *s2 = (const unsigned char *) p2; unsigned char c1, c2; do { c1 = (unsigned char) *s1++; 37.30 lbu a5,0(a0) c2 = (unsigned char) *s2++; 1.23 addi a1,a1,1 c1 = (unsigned char) *s1++; 18.68 addi a0,a0,1 c2 = (unsigned char) *s2++; 1.37 lbu a4,-1(a1) if (c1 == '\0') 18.71 ↓ beqz a5,18 return c1 - c2; } Signed-off-by: William Cohen Cc: Albert Ou Cc: Alexander Shishkin Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Cc: Palmer Dabbelt Cc: Paul Walmsley Cc: Peter Zijlstra Cc: linux-riscv@lists.infradead.org Link: http://lore.kernel.org/lkml/20210927005115.610264-1-wcohen@redhat.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/riscv64/annotate/instructions.c | 34 +++++++++++++++++++++++++ tools/perf/util/annotate.c | 5 ++++ 2 files changed, 39 insertions(+) create mode 100644 tools/perf/arch/riscv64/annotate/instructions.c (limited to 'tools') diff --git a/tools/perf/arch/riscv64/annotate/instructions.c b/tools/perf/arch/riscv64/annotate/instructions.c new file mode 100644 index 000000000000..869a0eb28953 --- /dev/null +++ b/tools/perf/arch/riscv64/annotate/instructions.c @@ -0,0 +1,34 @@ +// SPDX-License-Identifier: GPL-2.0 + +static +struct ins_ops *riscv64__associate_ins_ops(struct arch *arch, const char *name) +{ + struct ins_ops *ops = NULL; + + if (!strncmp(name, "jal", 3) || + !strncmp(name, "jr", 2) || + !strncmp(name, "call", 4)) + ops = &call_ops; + else if (!strncmp(name, "ret", 3)) + ops = &ret_ops; + else if (name[0] == 'j' || name[0] == 'b') + ops = &jump_ops; + else + return NULL; + + arch__associate_ins_ops(arch, name, ops); + + return ops; +} + +static +int riscv64__annotate_init(struct arch *arch, char *cpuid __maybe_unused) +{ + if (!arch->initialized) { + arch->associate_instruction_ops = riscv64__associate_ins_ops; + arch->initialized = true; + arch->objdump.comment_char = '#'; + } + + return 0; +} diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index b55f35485e43..4bab2273303a 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -151,6 +151,7 @@ static int arch__associate_ins_ops(struct arch* arch, const char *name, struct i #include "arch/mips/annotate/instructions.c" #include "arch/x86/annotate/instructions.c" #include "arch/powerpc/annotate/instructions.c" +#include "arch/riscv64/annotate/instructions.c" #include "arch/s390/annotate/instructions.c" #include "arch/sparc/annotate/instructions.c" @@ -191,6 +192,10 @@ static struct arch architectures[] = { .name = "powerpc", .init = powerpc__annotate_init, }, + { + .name = "riscv64", + .init = riscv64__annotate_init, + }, { .name = "s390", .init = s390__annotate_init, -- cgit v1.2.3 From 0e46c8307574a8e2dac8d7ba97e0f6f4bbee67a5 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 28 Sep 2021 14:15:01 -0300 Subject: perf jevents: Add __maybe_unused attribute to unused function arg The tools/perf/pmu-events/jevents.c file isn't being compiled with -Werror and -Wextra, which will be the case soon, so before we turn those compiler flags on, fix what it would flag. Cc: Alexander Shishkin Cc: Ingo Molnar Cc: Jiri Olsa Cc: Like Xu Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Signed-off-by: Arnaldo Carvalho de Melo To: John Garry --- tools/perf/pmu-events/jevents.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/perf/pmu-events/jevents.c b/tools/perf/pmu-events/jevents.c index 6731b3cf0c2f..323e1dfe2436 100644 --- a/tools/perf/pmu-events/jevents.c +++ b/tools/perf/pmu-events/jevents.c @@ -45,6 +45,7 @@ #include /* getrlimit */ #include #include +#include #include #include "jsmn.h" #include "json.h" @@ -470,7 +471,7 @@ static void free_arch_std_events(void) } } -static int save_arch_std_events(void *data, struct json_event *je) +static int save_arch_std_events(void *data __maybe_unused, struct json_event *je) { struct event_struct *es; -- cgit v1.2.3 From 4a87dea9e60fe10079f01e06a58c4f9dfb667940 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Fri, 17 Sep 2021 11:42:39 -0700 Subject: perf test: Workload test of metric and metricgroups Test every metric and metricgroup with 'true' as a workload. For metrics, check that we see the metric printed or get unsupported. If the 'true' workload executes too quickly retry with 'perf bench internals synthesize'. v3. Fix test condition (thanks to Paul A. Clarke ). Add a fallback case of a larger workload so that we don't ignore "". v2. Switched the workload to something faster. Signed-off-by: Ian Rogers Reviewed-by: John Garry Cc: Alexander Shishkin Cc: Jin Yao Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Cc: Paul Clarke Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lore.kernel.org/lkml/20210917184240.2181186-1-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/shell/stat_all_metricgroups.sh | 12 ++++++++++++ tools/perf/tests/shell/stat_all_metrics.sh | 22 ++++++++++++++++++++++ 2 files changed, 34 insertions(+) create mode 100755 tools/perf/tests/shell/stat_all_metricgroups.sh create mode 100755 tools/perf/tests/shell/stat_all_metrics.sh (limited to 'tools') diff --git a/tools/perf/tests/shell/stat_all_metricgroups.sh b/tools/perf/tests/shell/stat_all_metricgroups.sh new file mode 100755 index 000000000000..de24d374ce24 --- /dev/null +++ b/tools/perf/tests/shell/stat_all_metricgroups.sh @@ -0,0 +1,12 @@ +#!/bin/sh +# perf all metricgroups test +# SPDX-License-Identifier: GPL-2.0 + +set -e + +for m in $(perf list --raw-dump metricgroups); do + echo "Testing $m" + perf stat -M "$m" true +done + +exit 0 diff --git a/tools/perf/tests/shell/stat_all_metrics.sh b/tools/perf/tests/shell/stat_all_metrics.sh new file mode 100755 index 000000000000..7f4ba3cad632 --- /dev/null +++ b/tools/perf/tests/shell/stat_all_metrics.sh @@ -0,0 +1,22 @@ +#!/bin/sh +# perf all metrics test +# SPDX-License-Identifier: GPL-2.0 + +set -e + +for m in $(perf list --raw-dump metrics); do + echo "Testing $m" + result=$(perf stat -M "$m" true 2>&1) + if [[ ! "$result" =~ "$m" ]] && [[ ! "$result" =~ "" ]]; then + # We failed to see the metric and the events are support. Possibly the + # workload was too small so retry with something longer. + result=$(perf stat -M "$m" perf bench internals synthesize 2>&1) + if [[ ! "$result" =~ "$m" ]]; then + echo "Metric '$m' not printed in:" + echo "$result" + exit 1 + fi + fi +done + +exit 0 -- cgit v1.2.3 From 3d5ac9effcc640d5d66bc6d833e1dcc9faa279aa Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Fri, 17 Sep 2021 11:42:40 -0700 Subject: perf test: Workload test of all PMUs Iterate over the list of PMUs and run the 'true' workload on them. If the event isn't printed then run the large 'perf bench internals synthesize' workload and check the event is counted. On a Skylake this test takes 1m15s mainly running the 'true' workload. Suggested-by: John Garry Signed-off-by: Ian Rogers Reviewed-by: John Garry Cc: Alexander Shishkin Cc: Jin Yao Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Cc: Paul Clarke Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lore.kernel.org/lkml/20210917184240.2181186-2-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/shell/stat_all_pmu.sh | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) create mode 100755 tools/perf/tests/shell/stat_all_pmu.sh (limited to 'tools') diff --git a/tools/perf/tests/shell/stat_all_pmu.sh b/tools/perf/tests/shell/stat_all_pmu.sh new file mode 100755 index 000000000000..2de7fd0394fd --- /dev/null +++ b/tools/perf/tests/shell/stat_all_pmu.sh @@ -0,0 +1,22 @@ +#!/bin/sh +# perf all PMU test +# SPDX-License-Identifier: GPL-2.0 + +set -e + +for p in $(perf list --raw-dump pmu); do + echo "Testing $p" + result=$(perf stat -e "$p" true 2>&1) + if [[ ! "$result" =~ "$p" ]] && [[ ! "$result" =~ "" ]]; then + # We failed to see the event and it is supported. Possibly the workload was + # too small so retry with something longer. + result=$(perf stat -e "$p" perf bench internals synthesize 2>&1) + if [[ ! "$result" =~ "$p" ]]; then + echo "Event '$p' not printed in:" + echo "$result" + exit 1 + fi + fi +done + +exit 0 -- cgit v1.2.3 From b758a61b391fb5ed749f4848f444d8223ae0a324 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Wed, 22 Sep 2021 17:10:21 -0700 Subject: perf tools: Enable libtracefs dynamic linking Currently libtracefs isn't used by perf, but there are potential improvements by using it as identified Steven Rostedt's e-mail: https://lore.kernel.org/lkml/20210610154759.1ef958f0@oasis.local.home/ This change is modelled on the dynamic libtraceevent patch by Michael Petlan: https://lore.kernel.org/linux-perf-users/20210428092023.4009-1-mpetlan@redhat.com/ v3. Adds file missed in v1 and v2 spotted by Jiri Olsa. Signed-off-by: Ian Rogers Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Steven Rostedt (VMware) Cc: Tzvetomir Stoyanov (VMware) Cc: linux-trace-devel@vger.kernel.org Link: http://lore.kernel.org/lkml/20210923001024.550263-1-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/build/Makefile.feature | 1 + tools/build/feature/Makefile | 4 ++++ tools/build/feature/test-libtracefs.c | 10 ++++++++++ tools/perf/Makefile.config | 9 +++++++++ tools/perf/Makefile.perf | 2 ++ 5 files changed, 26 insertions(+) create mode 100644 tools/build/feature/test-libtracefs.c (limited to 'tools') diff --git a/tools/build/Makefile.feature b/tools/build/Makefile.feature index 3dd2f68366f9..45a9a59828c3 100644 --- a/tools/build/Makefile.feature +++ b/tools/build/Makefile.feature @@ -52,6 +52,7 @@ FEATURE_TESTS_BASIC := \ libslang \ libslang-include-subdir \ libtraceevent \ + libtracefs \ libcrypto \ libunwind \ pthread-attr-setaffinity-np \ diff --git a/tools/build/feature/Makefile b/tools/build/feature/Makefile index eff55d287db1..d024b5204ba0 100644 --- a/tools/build/feature/Makefile +++ b/tools/build/feature/Makefile @@ -36,6 +36,7 @@ FILES= \ test-libslang.bin \ test-libslang-include-subdir.bin \ test-libtraceevent.bin \ + test-libtracefs.bin \ test-libcrypto.bin \ test-libunwind.bin \ test-libunwind-debug-frame.bin \ @@ -199,6 +200,9 @@ $(OUTPUT)test-libslang-include-subdir.bin: $(OUTPUT)test-libtraceevent.bin: $(BUILD) -ltraceevent +$(OUTPUT)test-libtracefs.bin: + $(BUILD) -ltracefs + $(OUTPUT)test-libcrypto.bin: $(BUILD) -lcrypto diff --git a/tools/build/feature/test-libtracefs.c b/tools/build/feature/test-libtracefs.c new file mode 100644 index 000000000000..8eff16c0c10b --- /dev/null +++ b/tools/build/feature/test-libtracefs.c @@ -0,0 +1,10 @@ +// SPDX-License-Identifier: GPL-2.0 +#include + +int main(void) +{ + struct tracefs_instance *inst = tracefs_instance_create("dummy"); + + tracefs_instance_destroy(inst); + return 0; +} diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config index 446180401e26..00ec900ddbca 100644 --- a/tools/perf/Makefile.config +++ b/tools/perf/Makefile.config @@ -1098,6 +1098,15 @@ ifdef LIBTRACEEVENT_DYNAMIC endif endif +ifdef LIBTRACEFS_DYNAMIC + $(call feature_check,libtracefs) + ifeq ($(feature-libtracefs), 1) + EXTLIBS += -ltracefs + else + dummy := $(error Error: No libtracefs devel library found, please install libtracefs-dev); + endif +endif + # Among the variables below, these: # perfexecdir # perf_include_dir diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index e04313c4d840..7df13e74450c 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -130,6 +130,8 @@ include ../scripts/utilities.mak # # Define LIBTRACEEVENT_DYNAMIC to enable libtraceevent dynamic linking # +# Define LIBTRACEFS_DYNAMIC to enable libtracefs dynamic linking +# # As per kernel Makefile, avoid funny character set dependencies unexport LC_ALL -- cgit v1.2.3 From 569715164ba2125cf5014aa6de7154ebdb95fef4 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Wed, 22 Sep 2021 17:10:22 -0700 Subject: perf tools: Add define for libtraceevent version The definition is derived from pkg-config as discussed in: https://lore.kernel.org/lkml/20210610155915.20a252d3@oasis.local.home/ The definition is computed using expr rather than passed to be computed in C code, this avoids complications with quote in the variable expansions. For example see the target python/perf.so in Makefile.perf. Signed-off-by: Ian Rogers Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Steven Rostedt (VMware) Cc: Tzvetomir Stoyanov (VMware) Cc: linux-trace-devel@vger.kernel.org Link: http://lore.kernel.org/lkml/20210923001024.550263-2-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Makefile.config | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'tools') diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config index 00ec900ddbca..2001c315f0db 100644 --- a/tools/perf/Makefile.config +++ b/tools/perf/Makefile.config @@ -1093,6 +1093,12 @@ ifdef LIBTRACEEVENT_DYNAMIC $(call feature_check,libtraceevent) ifeq ($(feature-libtraceevent), 1) EXTLIBS += -ltraceevent + LIBTRACEEVENT_VERSION := $(shell $(PKG_CONFIG) --modversion libtraceevent) + LIBTRACEEVENT_VERSION_1 := $(word 1, $(subst ., ,$(LIBTRACEEVENT_VERSION))) + LIBTRACEEVENT_VERSION_2 := $(word 2, $(subst ., ,$(LIBTRACEEVENT_VERSION))) + LIBTRACEEVENT_VERSION_3 := $(word 3, $(subst ., ,$(LIBTRACEEVENT_VERSION))) + LIBTRACEEVENT_VERSION_CPP := $(shell expr $(LIBTRACEEVENT_VERSION_1) \* 255 \* 255 + $(LIBTRACEEVENT_VERSION_2) \* 255 + $(LIBTRACEEVENT_VERSION_3)) + CFLAGS += -DLIBTRACEEVENT_VERSION=$(LIBTRACEEVENT_VERSION_CPP) else dummy := $(error Error: No libtraceevent devel library found, please install libtraceevent-devel); endif -- cgit v1.2.3 From 359cad09e40bedf927ea6046d27ccf977c83b71a Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Wed, 22 Sep 2021 17:10:23 -0700 Subject: perf tools: Add define for libtracefs version This will allow version specific support of libtracefs. Signed-off-by: Ian Rogers Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Steven Rostedt (VMware) Cc: Tzvetomir Stoyanov (VMware) Cc: linux-trace-devel@vger.kernel.org Link: http://lore.kernel.org/lkml/20210923001024.550263-3-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Makefile.config | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'tools') diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config index 2001c315f0db..0ae2e3d8b832 100644 --- a/tools/perf/Makefile.config +++ b/tools/perf/Makefile.config @@ -1108,6 +1108,12 @@ ifdef LIBTRACEFS_DYNAMIC $(call feature_check,libtracefs) ifeq ($(feature-libtracefs), 1) EXTLIBS += -ltracefs + LIBTRACEFS_VERSION := $(shell $(PKG_CONFIG) --modversion libtracefs) + LIBTRACEFS_VERSION_1 := $(word 1, $(subst ., ,$(LIBTRACEFS_VERSION))) + LIBTRACEFS_VERSION_2 := $(word 2, $(subst ., ,$(LIBTRACEFS_VERSION))) + LIBTRACEFS_VERSION_3 := $(word 3, $(subst ., ,$(LIBTRACEFS_VERSION))) + LIBTRACEFS_VERSION_CPP := $(shell expr $(LIBTRACEFS_VERSION_1) \* 255 \* 255 + $(LIBTRACEFS_VERSION_2) \* 255 + $(LIBTRACEFS_VERSION_3)) + CFLAGS += -DLIBTRACEFS_VERSION=$(LIBTRACEFS_VERSION_CPP) else dummy := $(error Error: No libtracefs devel library found, please install libtracefs-dev); endif -- cgit v1.2.3 From 08efcb4a638d260ef7fcbae64ecf7ceceb3f1841 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Wed, 22 Sep 2021 17:10:24 -0700 Subject: libtraceevent: Increase libtraceevent logging when verbose libtraceevent has added more levels of debug printout and with changes like: https://lore.kernel.org/linux-trace-devel/20210507095022.1079364-3-tz.stoyanov@gmail.com previously generated output like "registering plugin" is no longer displayed. This change makes it so that if perf's verbose debug output is enabled then the debug and info libtraceevent messages can be displayed. The code is conditionally enabled based on the libtraceevent version as discussed in the RFC: https://lore.kernel.org/lkml/20210610060643.595673-1-irogers@google.com/ v2. Is a rebase and handles the case of building without LIBTRACEEVENT_DYNAMIC. Signed-off-by: Ian Rogers Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Steven Rostedt (VMware) Cc: Tzvetomir Stoyanov (VMware) Cc: linux-trace-devel@vger.kernel.org Link: http://lore.kernel.org/lkml/20210923001024.550263-4-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/debug.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) (limited to 'tools') diff --git a/tools/perf/util/debug.c b/tools/perf/util/debug.c index 2c06abf6dcd2..c7a9fa0ffae9 100644 --- a/tools/perf/util/debug.c +++ b/tools/perf/util/debug.c @@ -24,6 +24,16 @@ #include "util/parse-sublevel-options.h" #include +#include + +#define MAKE_LIBTRACEEVENT_VERSION(a, b, c) ((a)*255*255+(b)*255+(c)) +#ifndef LIBTRACEEVENT_VERSION +/* + * If LIBTRACEEVENT_VERSION wasn't computed then set to version 1.1.0 that ships + * with the Linux kernel tools. + */ +#define LIBTRACEEVENT_VERSION MAKE_LIBTRACEEVENT_VERSION(1, 1, 0) +#endif int verbose; int debug_peo_args; @@ -228,6 +238,15 @@ int perf_debug_option(const char *str) /* Allow only verbose value in range (0, 10), otherwise set 0. */ verbose = (verbose < 0) || (verbose > 10) ? 0 : verbose; +#if MAKE_LIBTRACEEVENT_VERSION(1, 3, 0) <= LIBTRACEEVENT_VERSION + if (verbose == 1) + tep_set_loglevel(TEP_LOG_INFO); + else if (verbose == 2) + tep_set_loglevel(TEP_LOG_DEBUG); + else if (verbose >= 3) + tep_set_loglevel(TEP_LOG_ALL); +#endif + return 0; } -- cgit v1.2.3 From 4f9d4f8aa7328068a2f25cfb3d1d04c1b6fa54ac Mon Sep 17 00:00:00 2001 From: John Garry Date: Thu, 16 Sep 2021 20:34:21 +0800 Subject: perf parse-events: Set numeric term config For numeric terms, the config field may be NULL as it is not set from the l+y parsing. Fix by setting the term config from the term type name. Also fix up the pmu-events test to set the alias strings to set the period term properly, and fix up parse-events test to check the term config string. Signed-off-by: John Garry Acked-by: Ian Rogers Cc: Alexander Shishkin Cc: Ingo Molnar Cc: Jiri Olsa Cc: Leo Yan Cc: Mark Rutland Cc: Mathieu Poirier Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Shaokun Zhang Cc: Will Deacon Cc: linux-arm-kernel@lists.infradead.org Cc: linuxarm@huawei.com Cc: liuqi115@huawei.com Link: https://lore.kernel.org/r/1631795665-240946-2-git-send-email-john.garry@huawei.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/parse-events.c | 8 ++++---- tools/perf/tests/pmu-events.c | 6 +++--- tools/perf/util/parse-events.c | 2 +- 3 files changed, 8 insertions(+), 8 deletions(-) (limited to 'tools') diff --git a/tools/perf/tests/parse-events.c b/tools/perf/tests/parse-events.c index fd3556cc9ad4..8875e388563e 100644 --- a/tools/perf/tests/parse-events.c +++ b/tools/perf/tests/parse-events.c @@ -605,7 +605,7 @@ static int test__checkterms_simple(struct list_head *terms) TEST_ASSERT_VAL("wrong type val", term->type_val == PARSE_EVENTS__TERM_TYPE_NUM); TEST_ASSERT_VAL("wrong val", term->val.num == 10); - TEST_ASSERT_VAL("wrong config", !term->config); + TEST_ASSERT_VAL("wrong config", !strcmp(term->config, "config")); /* config1 */ term = list_entry(term->list.next, struct parse_events_term, list); @@ -614,7 +614,7 @@ static int test__checkterms_simple(struct list_head *terms) TEST_ASSERT_VAL("wrong type val", term->type_val == PARSE_EVENTS__TERM_TYPE_NUM); TEST_ASSERT_VAL("wrong val", term->val.num == 1); - TEST_ASSERT_VAL("wrong config", !term->config); + TEST_ASSERT_VAL("wrong config", !strcmp(term->config, "config1")); /* config2=3 */ term = list_entry(term->list.next, struct parse_events_term, list); @@ -623,7 +623,7 @@ static int test__checkterms_simple(struct list_head *terms) TEST_ASSERT_VAL("wrong type val", term->type_val == PARSE_EVENTS__TERM_TYPE_NUM); TEST_ASSERT_VAL("wrong val", term->val.num == 3); - TEST_ASSERT_VAL("wrong config", !term->config); + TEST_ASSERT_VAL("wrong config", !strcmp(term->config, "config2")); /* umask=1*/ term = list_entry(term->list.next, struct parse_events_term, list); @@ -661,7 +661,7 @@ static int test__checkterms_simple(struct list_head *terms) TEST_ASSERT_VAL("wrong type val", term->type_val == PARSE_EVENTS__TERM_TYPE_NUM); TEST_ASSERT_VAL("wrong val", term->val.num == 0xead); - TEST_ASSERT_VAL("wrong config", !term->config); + TEST_ASSERT_VAL("wrong config", !strcmp(term->config, "config")); return 0; } diff --git a/tools/perf/tests/pmu-events.c b/tools/perf/tests/pmu-events.c index 43743cf719ef..8c5a6ba1cb14 100644 --- a/tools/perf/tests/pmu-events.c +++ b/tools/perf/tests/pmu-events.c @@ -67,7 +67,7 @@ static const struct perf_pmu_test_event segment_reg_loads_any = { .desc = "Number of segment register loads", .topic = "other", }, - .alias_str = "umask=0x80,(null)=0x30d40,event=0x6", + .alias_str = "umask=0x80,period=0x30d40,event=0x6", .alias_long_desc = "Number of segment register loads", }; @@ -78,7 +78,7 @@ static const struct perf_pmu_test_event dispatch_blocked_any = { .desc = "Memory cluster signals to block micro-op dispatch for any reason", .topic = "other", }, - .alias_str = "umask=0x20,(null)=0x30d40,event=0x9", + .alias_str = "umask=0x20,period=0x30d40,event=0x9", .alias_long_desc = "Memory cluster signals to block micro-op dispatch for any reason", }; @@ -89,7 +89,7 @@ static const struct perf_pmu_test_event eist_trans = { .desc = "Number of Enhanced Intel SpeedStep(R) Technology (EIST) transitions", .topic = "other", }, - .alias_str = "umask=0,(null)=0x30d40,event=0x3a", + .alias_str = "umask=0,period=0x30d40,event=0x3a", .alias_long_desc = "Number of Enhanced Intel SpeedStep(R) Technology (EIST) transitions", }; diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 067f830dea4b..1acac3e13b32 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -3141,7 +3141,7 @@ int parse_events_term__num(struct parse_events_term **term, struct parse_events_term temp = { .type_val = PARSE_EVENTS__TERM_TYPE_NUM, .type_term = type_term, - .config = config, + .config = config ? : strdup(config_term_names[type_term]), .no_value = no_value, .err_term = loc_term ? loc_term->first_column : 0, .err_val = loc_val ? loc_val->first_column : 0, -- cgit v1.2.3 From d60bad10c4ae42ef96b5b7dfd5924c54f3f257a6 Mon Sep 17 00:00:00 2001 From: John Garry Date: Thu, 16 Sep 2021 20:34:22 +0800 Subject: perf jevents: Support ConfigCode Some PMUs use "config=XXX" for eventcodes, like: more /sys/bus/event_source/devices/hisi_sccl1_ddrc3/events/act_cmd config=0x5 However jevents would give an alias with .event field "event=0x5" for this event. This is handled without issue by the parse events code, but the pmu alias code gets a bit confused, as it warns about assigning "event=0x5" over "config=0x5" in perf_pmu_assign_str() when merging aliases: ./perf stat -v -e act_cmd ... alias act_cmd differs in field 'value' ... To make things a bit more straightforward, allow jevents to support "config=XXX" as well, by supporting a "ConfigCode" field. Signed-off-by: John Garry Acked-by: Ian Rogers Cc: Alexander Shishkin Cc: Ingo Molnar Cc: Jiri Olsa Cc: Leo Yan Cc: Mark Rutland Cc: Mathieu Poirier Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Shaokun Zhang Cc: Will Deacon Cc: linux-arm-kernel@lists.infradead.org Cc: linuxarm@huawei.com Cc: liuqi115@huawei.com Link: https://lore.kernel.org/r/1631795665-240946-3-git-send-email-john.garry@huawei.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/pmu-events/jevents.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/perf/pmu-events/jevents.c b/tools/perf/pmu-events/jevents.c index 323e1dfe2436..19497e4f8a86 100644 --- a/tools/perf/pmu-events/jevents.c +++ b/tools/perf/pmu-events/jevents.c @@ -576,10 +576,12 @@ static int json_events(const char *fn, struct json_event je = {}; char *arch_std = NULL; unsigned long long eventcode = 0; + unsigned long long configcode = 0; struct msrmap *msr = NULL; jsmntok_t *msrval = NULL; jsmntok_t *precise = NULL; jsmntok_t *obj = tok++; + bool configcode_present = false; EXPECT(obj->type == JSMN_OBJECT, obj, "expected object"); for (j = 0; j < obj->size; j += 2) { @@ -602,6 +604,12 @@ static int json_events(const char *fn, addfield(map, &code, "", "", val); eventcode |= strtoul(code, NULL, 0); free(code); + } else if (json_streq(map, field, "ConfigCode")) { + char *code = NULL; + addfield(map, &code, "", "", val); + configcode |= strtoul(code, NULL, 0); + free(code); + configcode_present = true; } else if (json_streq(map, field, "ExtSel")) { char *code = NULL; addfield(map, &code, "", "", val); @@ -683,7 +691,10 @@ static int json_events(const char *fn, addfield(map, &extra_desc, " ", "(Precise event)", NULL); } - snprintf(buf, sizeof buf, "event=%#llx", eventcode); + if (configcode_present) + snprintf(buf, sizeof buf, "config=%#llx", configcode); + else + snprintf(buf, sizeof buf, "event=%#llx", eventcode); addfield(map, &event, ",", buf, NULL); if (je.desc && extra_desc) addfield(map, &je.desc, " ", extra_desc, NULL); -- cgit v1.2.3 From 56be05103a408b7f56c5e75717b2b23a5adf1ddb Mon Sep 17 00:00:00 2001 From: John Garry Date: Thu, 16 Sep 2021 20:34:23 +0800 Subject: perf test: Verify more event members in pmu-events test Function compare_pmu_events() does not compare all struct pmu-events members, so add tests for missing members "name", "event", "aggr_mod", "event", "metric_constraint", and "metric_group", and re-order the tests to match current struct pmu-events member ordering. Also fix uncore_hisi_l3c_rd_hit_cpipe.event member, now that we're actually testing it. Signed-off-by: John Garry Acked-by: Ian Rogers Cc: Alexander Shishkin Cc: Ingo Molnar Cc: Jiri Olsa Cc: Leo Yan Cc: Mark Rutland Cc: Mathieu Poirier Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Shaokun Zhang Cc: Will Deacon Cc: linux-arm-kernel@lists.infradead.org Cc: linuxarm@huawei.com Cc: liuqi115@huawei.com Link: https://lore.kernel.org/r/1631795665-240946-4-git-send-email-john.garry@huawei.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/pmu-events.c | 50 ++++++++++++++++++++++++++++++++++--------- 1 file changed, 40 insertions(+), 10 deletions(-) (limited to 'tools') diff --git a/tools/perf/tests/pmu-events.c b/tools/perf/tests/pmu-events.c index 8c5a6ba1cb14..adfc17f51c7b 100644 --- a/tools/perf/tests/pmu-events.c +++ b/tools/perf/tests/pmu-events.c @@ -146,7 +146,7 @@ static const struct perf_pmu_test_event unc_cbo_xsnp_response_miss_eviction = { static const struct perf_pmu_test_event uncore_hisi_l3c_rd_hit_cpipe = { .event = { .name = "uncore_hisi_l3c.rd_hit_cpipe", - .event = "event=0x2", + .event = "event=0x7", .desc = "Total read hits. Unit: hisi_sccl,l3c ", .topic = "uncore", .long_desc = "Total read hits", @@ -255,6 +255,24 @@ static struct pmu_event *__test_pmu_get_sys_events_table(void) static int compare_pmu_events(struct pmu_event *e1, const struct pmu_event *e2) { + if (!is_same(e1->name, e2->name)) { + pr_debug2("testing event e1 %s: mismatched name string, %s vs %s\n", + e1->name, e1->name, e2->name); + return -1; + } + + if (!is_same(e1->compat, e2->compat)) { + pr_debug2("testing event e1 %s: mismatched compat string, %s vs %s\n", + e1->name, e1->compat, e2->compat); + return -1; + } + + if (!is_same(e1->event, e2->event)) { + pr_debug2("testing event e1 %s: mismatched event, %s vs %s\n", + e1->name, e1->event, e2->event); + return -1; + } + if (!is_same(e1->desc, e2->desc)) { pr_debug2("testing event e1 %s: mismatched desc, %s vs %s\n", e1->name, e1->desc, e2->desc); @@ -273,6 +291,12 @@ static int compare_pmu_events(struct pmu_event *e1, const struct pmu_event *e2) return -1; } + if (!is_same(e1->pmu, e2->pmu)) { + pr_debug2("testing event e1 %s: mismatched pmu string, %s vs %s\n", + e1->name, e1->pmu, e2->pmu); + return -1; + } + if (!is_same(e1->unit, e2->unit)) { pr_debug2("testing event e1 %s: mismatched unit, %s vs %s\n", e1->name, e1->unit, e2->unit); @@ -285,6 +309,12 @@ static int compare_pmu_events(struct pmu_event *e1, const struct pmu_event *e2) return -1; } + if (!is_same(e1->aggr_mode, e2->aggr_mode)) { + pr_debug2("testing event e1 %s: mismatched aggr_mode, %s vs %s\n", + e1->name, e1->aggr_mode, e2->aggr_mode); + return -1; + } + if (!is_same(e1->metric_expr, e2->metric_expr)) { pr_debug2("testing event e1 %s: mismatched metric_expr, %s vs %s\n", e1->name, e1->metric_expr, e2->metric_expr); @@ -297,21 +327,21 @@ static int compare_pmu_events(struct pmu_event *e1, const struct pmu_event *e2) return -1; } - if (!is_same(e1->deprecated, e2->deprecated)) { - pr_debug2("testing event e1 %s: mismatched deprecated, %s vs %s\n", - e1->name, e1->deprecated, e2->deprecated); + if (!is_same(e1->metric_group, e2->metric_group)) { + pr_debug2("testing event e1 %s: mismatched metric_group, %s vs %s\n", + e1->name, e1->metric_group, e2->metric_group); return -1; } - if (!is_same(e1->pmu, e2->pmu)) { - pr_debug2("testing event e1 %s: mismatched pmu string, %s vs %s\n", - e1->name, e1->pmu, e2->pmu); + if (!is_same(e1->deprecated, e2->deprecated)) { + pr_debug2("testing event e1 %s: mismatched deprecated, %s vs %s\n", + e1->name, e1->deprecated, e2->deprecated); return -1; } - if (!is_same(e1->compat, e2->compat)) { - pr_debug2("testing event e1 %s: mismatched compat string, %s vs %s\n", - e1->name, e1->compat, e2->compat); + if (!is_same(e1->metric_constraint, e2->metric_constraint)) { + pr_debug2("testing event e1 %s: mismatched metric_constant, %s vs %s\n", + e1->name, e1->metric_constraint, e2->metric_constraint); return -1; } -- cgit v1.2.3 From b8b350afaa4bd007261c1ce367123444f6953a42 Mon Sep 17 00:00:00 2001 From: John Garry Date: Thu, 16 Sep 2021 20:34:24 +0800 Subject: perf test: Add pmu-event test for event described as "config=" Add a new test event for a system event whose event member is in form "config=". Signed-off-by: John Garry Acked-by: Ian Rogers Cc: Alexander Shishkin Cc: Ingo Molnar Cc: Jiri Olsa Cc: Leo Yan Cc: Mark Rutland Cc: Mathieu Poirier Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Shaokun Zhang Cc: Will Deacon Cc: linux-arm-kernel@lists.infradead.org Cc: linuxarm@huawei.com Cc: liuqi115@huawei.com Link: https://lore.kernel.org/r/1631795665-240946-5-git-send-email-john.garry@huawei.com Signed-off-by: Arnaldo Carvalho de Melo --- .../pmu-events/arch/test/test_soc/sys/uncore.json | 7 ++++++ tools/perf/tests/pmu-events.c | 25 ++++++++++++++++++++++ 2 files changed, 32 insertions(+) (limited to 'tools') diff --git a/tools/perf/pmu-events/arch/test/test_soc/sys/uncore.json b/tools/perf/pmu-events/arch/test/test_soc/sys/uncore.json index 0f681a6e10ea..c7e7528db315 100644 --- a/tools/perf/pmu-events/arch/test/test_soc/sys/uncore.json +++ b/tools/perf/pmu-events/arch/test/test_soc/sys/uncore.json @@ -6,4 +6,11 @@ "Unit": "sys_ddr_pmu", "Compat": "v8" }, + { + "BriefDescription": "ccn read-cycles event", + "ConfigCode": "0x2c", + "EventName": "sys_ccn_pmu.read_cycles", + "Unit": "sys_ccn_pmu", + "Compat": "0x01" + } ] diff --git a/tools/perf/tests/pmu-events.c b/tools/perf/tests/pmu-events.c index adfc17f51c7b..f14266a4c513 100644 --- a/tools/perf/tests/pmu-events.c +++ b/tools/perf/tests/pmu-events.c @@ -208,8 +208,23 @@ static const struct perf_pmu_test_event sys_ddr_pmu_write_cycles = { .matching_pmu = "uncore_sys_ddr_pmu", }; +static const struct perf_pmu_test_event sys_ccn_pmu_read_cycles = { + .event = { + .name = "sys_ccn_pmu.read_cycles", + .event = "config=0x2c", + .desc = "ccn read-cycles event. Unit: uncore_sys_ccn_pmu ", + .topic = "uncore", + .pmu = "uncore_sys_ccn_pmu", + .compat = "0x01", + }, + .alias_str = "config=0x2c", + .alias_long_desc = "ccn read-cycles event. Unit: uncore_sys_ccn_pmu ", + .matching_pmu = "uncore_sys_ccn_pmu", +}; + static const struct perf_pmu_test_event *sys_events[] = { &sys_ddr_pmu_write_cycles, + &sys_ccn_pmu_read_cycles, NULL }; @@ -677,6 +692,16 @@ static struct perf_pmu_test_pmu test_pmus[] = { &sys_ddr_pmu_write_cycles, }, }, + { + .pmu = { + .name = (char *)"uncore_sys_ccn_pmu4", + .is_uncore = 1, + .id = (char *)"0x01", + }, + .aliases = { + &sys_ccn_pmu_read_cycles, + }, + }, }; /* Test that aliases generated are as expected */ -- cgit v1.2.3 From c801612875909cbce823dfc276c58c7155f95b01 Mon Sep 17 00:00:00 2001 From: John Garry Date: Thu, 16 Sep 2021 20:34:25 +0800 Subject: perf vendor events arm64: Revise hip08 uncore events To improve alias matching, remove the PMU name prefix from the EventName. This will mean that the pmu code will merge aliases, such that we no longer get a huge list of per-PMU events - see perf_pmu_merge_alias(). Also make the following associated changes: - Use "ConfigCode" rather than "EventCode", so the pmu code is not so disagreeable about inconsistent event codes - Add undocumented HHA event codes to allow alias merging (for those events) Signed-off-by: John Garry Acked-by: Ian Rogers Cc: Alexander Shishkin Cc: Ingo Molnar Cc: Jiri Olsa Cc: Leo Yan Cc: Mark Rutland Cc: Mathieu Poirier Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Shaokun Zhang Cc: Will Deacon Cc: linux-arm-kernel@lists.infradead.org Cc: linuxarm@huawei.com Cc: liuqi115@huawei.com Link: https://lore.kernel.org/r/1631795665-240946-6-git-send-email-john.garry@huawei.com Signed-off-by: Arnaldo Carvalho de Melo --- .../arch/arm64/hisilicon/hip08/uncore-ddrc.json | 32 +++--- .../arch/arm64/hisilicon/hip08/uncore-hha.json | 120 +++++++++++++++++---- .../arch/arm64/hisilicon/hip08/uncore-l3c.json | 52 ++++----- 3 files changed, 142 insertions(+), 62 deletions(-) (limited to 'tools') diff --git a/tools/perf/pmu-events/arch/arm64/hisilicon/hip08/uncore-ddrc.json b/tools/perf/pmu-events/arch/arm64/hisilicon/hip08/uncore-ddrc.json index 61514d38601b..2b3cb55df288 100644 --- a/tools/perf/pmu-events/arch/arm64/hisilicon/hip08/uncore-ddrc.json +++ b/tools/perf/pmu-events/arch/arm64/hisilicon/hip08/uncore-ddrc.json @@ -1,56 +1,56 @@ [ { - "EventCode": "0x00", - "EventName": "uncore_hisi_ddrc.flux_wr", + "ConfigCode": "0x00", + "EventName": "flux_wr", "BriefDescription": "DDRC total write operations", "PublicDescription": "DDRC total write operations", "Unit": "hisi_sccl,ddrc" }, { - "EventCode": "0x01", - "EventName": "uncore_hisi_ddrc.flux_rd", + "ConfigCode": "0x01", + "EventName": "flux_rd", "BriefDescription": "DDRC total read operations", "PublicDescription": "DDRC total read operations", "Unit": "hisi_sccl,ddrc" }, { - "EventCode": "0x02", - "EventName": "uncore_hisi_ddrc.flux_wcmd", + "ConfigCode": "0x02", + "EventName": "flux_wcmd", "BriefDescription": "DDRC write commands", "PublicDescription": "DDRC write commands", "Unit": "hisi_sccl,ddrc" }, { - "EventCode": "0x03", - "EventName": "uncore_hisi_ddrc.flux_rcmd", + "ConfigCode": "0x03", + "EventName": "flux_rcmd", "BriefDescription": "DDRC read commands", "PublicDescription": "DDRC read commands", "Unit": "hisi_sccl,ddrc" }, { - "EventCode": "0x04", - "EventName": "uncore_hisi_ddrc.pre_cmd", + "ConfigCode": "0x04", + "EventName": "pre_cmd", "BriefDescription": "DDRC precharge commands", "PublicDescription": "DDRC precharge commands", "Unit": "hisi_sccl,ddrc" }, { - "EventCode": "0x05", - "EventName": "uncore_hisi_ddrc.act_cmd", + "ConfigCode": "0x05", + "EventName": "act_cmd", "BriefDescription": "DDRC active commands", "PublicDescription": "DDRC active commands", "Unit": "hisi_sccl,ddrc" }, { - "EventCode": "0x06", - "EventName": "uncore_hisi_ddrc.rnk_chg", + "ConfigCode": "0x06", + "EventName": "rnk_chg", "BriefDescription": "DDRC rank commands", "PublicDescription": "DDRC rank commands", "Unit": "hisi_sccl,ddrc" }, { - "EventCode": "0x07", - "EventName": "uncore_hisi_ddrc.rw_chg", + "ConfigCode": "0x07", + "EventName": "rw_chg", "BriefDescription": "DDRC read and write changes", "PublicDescription": "DDRC read and write changes", "Unit": "hisi_sccl,ddrc" diff --git a/tools/perf/pmu-events/arch/arm64/hisilicon/hip08/uncore-hha.json b/tools/perf/pmu-events/arch/arm64/hisilicon/hip08/uncore-hha.json index ada86782933f..9a7ec7af2060 100644 --- a/tools/perf/pmu-events/arch/arm64/hisilicon/hip08/uncore-hha.json +++ b/tools/perf/pmu-events/arch/arm64/hisilicon/hip08/uncore-hha.json @@ -1,72 +1,152 @@ [ { - "EventCode": "0x00", - "EventName": "uncore_hisi_hha.rx_ops_num", + "ConfigCode": "0x00", + "EventName": "rx_ops_num", "BriefDescription": "The number of all operations received by the HHA", "PublicDescription": "The number of all operations received by the HHA", "Unit": "hisi_sccl,hha" }, { - "EventCode": "0x01", - "EventName": "uncore_hisi_hha.rx_outer", + "ConfigCode": "0x01", + "EventName": "rx_outer", "BriefDescription": "The number of all operations received by the HHA from another socket", "PublicDescription": "The number of all operations received by the HHA from another socket", "Unit": "hisi_sccl,hha" }, { - "EventCode": "0x02", - "EventName": "uncore_hisi_hha.rx_sccl", + "ConfigCode": "0x02", + "EventName": "rx_sccl", "BriefDescription": "The number of all operations received by the HHA from another SCCL in this socket", "PublicDescription": "The number of all operations received by the HHA from another SCCL in this socket", "Unit": "hisi_sccl,hha" }, { - "EventCode": "0x03", - "EventName": "uncore_hisi_hha.rx_ccix", + "ConfigCode": "0x03", + "EventName": "rx_ccix", "BriefDescription": "Count of the number of operations that HHA has received from CCIX", "PublicDescription": "Count of the number of operations that HHA has received from CCIX", "Unit": "hisi_sccl,hha" }, { - "EventCode": "0x1c", - "EventName": "uncore_hisi_hha.rd_ddr_64b", + "ConfigCode": "0x4", + "EventName": "rx_wbi", + "Unit": "hisi_sccl,hha" + }, + { + "ConfigCode": "0x5", + "EventName": "rx_wbip", + "Unit": "hisi_sccl,hha" + }, + { + "ConfigCode": "0x11", + "EventName": "rx_wtistash", + "Unit": "hisi_sccl,hha" + }, + { + "ConfigCode": "0x1c", + "EventName": "rd_ddr_64b", "BriefDescription": "The number of read operations sent by HHA to DDRC which size is 64 bytes", "PublicDescription": "The number of read operations sent by HHA to DDRC which size is 64bytes", "Unit": "hisi_sccl,hha" }, { - "EventCode": "0x1d", - "EventName": "uncore_hisi_hha.wr_ddr_64b", + "ConfigCode": "0x1d", + "EventName": "wr_ddr_64b", "BriefDescription": "The number of write operations sent by HHA to DDRC which size is 64 bytes", "PublicDescription": "The number of write operations sent by HHA to DDRC which size is 64 bytes", "Unit": "hisi_sccl,hha" }, { - "EventCode": "0x1e", - "EventName": "uncore_hisi_hha.rd_ddr_128b", + "ConfigCode": "0x1e", + "EventName": "rd_ddr_128b", "BriefDescription": "The number of read operations sent by HHA to DDRC which size is 128 bytes", "PublicDescription": "The number of read operations sent by HHA to DDRC which size is 128 bytes", "Unit": "hisi_sccl,hha" }, { - "EventCode": "0x1f", - "EventName": "uncore_hisi_hha.wr_ddr_128b", + "ConfigCode": "0x1f", + "EventName": "wr_ddr_128b", "BriefDescription": "The number of write operations sent by HHA to DDRC which size is 128 bytes", "PublicDescription": "The number of write operations sent by HHA to DDRC which size is 128 bytes", "Unit": "hisi_sccl,hha" }, { - "EventCode": "0x20", - "EventName": "uncore_hisi_hha.spill_num", + "ConfigCode": "0x20", + "EventName": "spill_num", "BriefDescription": "Count of the number of spill operations that the HHA has sent", "PublicDescription": "Count of the number of spill operations that the HHA has sent", "Unit": "hisi_sccl,hha" }, { - "EventCode": "0x21", - "EventName": "uncore_hisi_hha.spill_success", + "ConfigCode": "0x21", + "EventName": "spill_success", "BriefDescription": "Count of the number of successful spill operations that the HHA has sent", "PublicDescription": "Count of the number of successful spill operations that the HHA has sent", "Unit": "hisi_sccl,hha" + }, + { + "ConfigCode": "0x23", + "EventName": "bi_num", + "Unit": "hisi_sccl,hha" + }, + { + "ConfigCode": "0x32", + "EventName": "mediated_num", + "Unit": "hisi_sccl,hha" + }, + { + "ConfigCode": "0x33", + "EventName": "tx_snp_num", + "Unit": "hisi_sccl,hha" + }, + { + "ConfigCode": "0x34", + "EventName": "tx_snp_outer", + "Unit": "hisi_sccl,hha" + }, + { + "ConfigCode": "0x35", + "EventName": "tx_snp_ccix", + "Unit": "hisi_sccl,hha" + }, + { + "ConfigCode": "0x38", + "EventName": "rx_snprspdata", + "Unit": "hisi_sccl,hha" + }, + { + "ConfigCode": "0x3c", + "EventName": "rx_snprsp_outer", + "Unit": "hisi_sccl,hha" + }, + { + "ConfigCode": "0x40", + "EventName": "sdir-lookup", + "Unit": "hisi_sccl,hha" + }, + { + "ConfigCode": "0x41", + "EventName": "edir-lookup", + "Unit": "hisi_sccl,hha" + }, + { + "ConfigCode": "0x42", + "EventName": "sdir-hit", + "Unit": "hisi_sccl,hha" + }, + { + "ConfigCode": "0x43", + "EventName": "edir-hit", + "Unit": "hisi_sccl,hha" + }, + { + "ConfigCode": "0x4c", + "EventName": "sdir-home-migrate", + "Unit": "hisi_sccl,hha" + }, + { + "ConfigCode": "0x4d", + "EventName": "edir-home-migrate", + "Unit": "hisi_sccl,hha" } ] diff --git a/tools/perf/pmu-events/arch/arm64/hisilicon/hip08/uncore-l3c.json b/tools/perf/pmu-events/arch/arm64/hisilicon/hip08/uncore-l3c.json index 67ab19e8cf3a..e3479b65be9a 100644 --- a/tools/perf/pmu-events/arch/arm64/hisilicon/hip08/uncore-l3c.json +++ b/tools/perf/pmu-events/arch/arm64/hisilicon/hip08/uncore-l3c.json @@ -1,91 +1,91 @@ [ { - "EventCode": "0x00", - "EventName": "uncore_hisi_l3c.rd_cpipe", + "ConfigCode": "0x00", + "EventName": "rd_cpipe", "BriefDescription": "Total read accesses", "PublicDescription": "Total read accesses", "Unit": "hisi_sccl,l3c" }, { - "EventCode": "0x01", - "EventName": "uncore_hisi_l3c.wr_cpipe", + "ConfigCode": "0x01", + "EventName": "wr_cpipe", "BriefDescription": "Total write accesses", "PublicDescription": "Total write accesses", "Unit": "hisi_sccl,l3c" }, { - "EventCode": "0x02", - "EventName": "uncore_hisi_l3c.rd_hit_cpipe", + "ConfigCode": "0x02", + "EventName": "rd_hit_cpipe", "BriefDescription": "Total read hits", "PublicDescription": "Total read hits", "Unit": "hisi_sccl,l3c" }, { - "EventCode": "0x03", - "EventName": "uncore_hisi_l3c.wr_hit_cpipe", + "ConfigCode": "0x03", + "EventName": "wr_hit_cpipe", "BriefDescription": "Total write hits", "PublicDescription": "Total write hits", "Unit": "hisi_sccl,l3c" }, { - "EventCode": "0x04", - "EventName": "uncore_hisi_l3c.victim_num", + "ConfigCode": "0x04", + "EventName": "victim_num", "BriefDescription": "l3c precharge commands", "PublicDescription": "l3c precharge commands", "Unit": "hisi_sccl,l3c" }, { - "EventCode": "0x20", - "EventName": "uncore_hisi_l3c.rd_spipe", + "ConfigCode": "0x20", + "EventName": "rd_spipe", "BriefDescription": "Count of the number of read lines that come from this cluster of CPU core in spipe", "PublicDescription": "Count of the number of read lines that come from this cluster of CPU core in spipe", "Unit": "hisi_sccl,l3c" }, { - "EventCode": "0x21", - "EventName": "uncore_hisi_l3c.wr_spipe", + "ConfigCode": "0x21", + "EventName": "wr_spipe", "BriefDescription": "Count of the number of write lines that come from this cluster of CPU core in spipe", "PublicDescription": "Count of the number of write lines that come from this cluster of CPU core in spipe", "Unit": "hisi_sccl,l3c" }, { - "EventCode": "0x22", - "EventName": "uncore_hisi_l3c.rd_hit_spipe", + "ConfigCode": "0x22", + "EventName": "rd_hit_spipe", "BriefDescription": "Count of the number of read lines that hits in spipe of this L3C", "PublicDescription": "Count of the number of read lines that hits in spipe of this L3C", "Unit": "hisi_sccl,l3c" }, { - "EventCode": "0x23", - "EventName": "uncore_hisi_l3c.wr_hit_spipe", + "ConfigCode": "0x23", + "EventName": "wr_hit_spipe", "BriefDescription": "Count of the number of write lines that hits in spipe of this L3C", "PublicDescription": "Count of the number of write lines that hits in spipe of this L3C", "Unit": "hisi_sccl,l3c" }, { - "EventCode": "0x29", - "EventName": "uncore_hisi_l3c.back_invalid", + "ConfigCode": "0x29", + "EventName": "back_invalid", "BriefDescription": "Count of the number of L3C back invalid operations", "PublicDescription": "Count of the number of L3C back invalid operations", "Unit": "hisi_sccl,l3c" }, { - "EventCode": "0x40", - "EventName": "uncore_hisi_l3c.retry_cpu", + "ConfigCode": "0x40", + "EventName": "retry_cpu", "BriefDescription": "Count of the number of retry that L3C suppresses the CPU operations", "PublicDescription": "Count of the number of retry that L3C suppresses the CPU operations", "Unit": "hisi_sccl,l3c" }, { - "EventCode": "0x41", - "EventName": "uncore_hisi_l3c.retry_ring", + "ConfigCode": "0x41", + "EventName": "retry_ring", "BriefDescription": "Count of the number of retry that L3C suppresses the ring operations", "PublicDescription": "Count of the number of retry that L3C suppresses the ring operations", "Unit": "hisi_sccl,l3c" }, { - "EventCode": "0x42", - "EventName": "uncore_hisi_l3c.prefetch_drop", + "ConfigCode": "0x42", + "EventName": "prefetch_drop", "BriefDescription": "Count of the number of prefetch drops from this L3C", "PublicDescription": "Count of the number of prefetch drops from this L3C", "Unit": "hisi_sccl,l3c" -- cgit v1.2.3 From cb94a02e7494c001fa8b5a4c5e16693fafd98530 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Thu, 23 Sep 2021 00:46:04 -0700 Subject: perf metric: Restructure struct expr_parse_ctx. A later change to parsing the ids out (in expr__find_other) will potentially drop hashmaps and so it is more convenient to move expr_parse_ctx to have a hashmap pointer rather than a struct value. As this pointer must be freed, rather than just going out of scope, add expr__ctx_new and expr__ctx_free to manage expr_parse_ctx memory. Adjust use of struct expr_parse_ctx accordingly. Reviewed-by: Andi Kleen Signed-off-by: Ian Rogers Tested-by: John Garry Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Ingo Molnar Cc: Jin Yao Cc: Kajol Jain Cc: Mark Rutland Cc: Namhyung Kim Cc: Paul Clarke Cc: Peter Zijlstra Cc: Sandeep Dasgupta Cc: Stephane Eranian Link: https://lore.kernel.org/r/20210923074616.674826-2-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/expr.c | 81 ++++++++++++++++++++++--------------------- tools/perf/tests/pmu-events.c | 47 +++++++++++++++---------- tools/perf/util/expr.c | 39 ++++++++++++++++----- tools/perf/util/expr.h | 5 +-- tools/perf/util/metricgroup.c | 44 ++++++++++++----------- tools/perf/util/stat-shadow.c | 50 ++++++++++++++++---------- 6 files changed, 159 insertions(+), 107 deletions(-) (limited to 'tools') diff --git a/tools/perf/tests/expr.c b/tools/perf/tests/expr.c index 4d01051951cd..b0a3b5fd0c00 100644 --- a/tools/perf/tests/expr.c +++ b/tools/perf/tests/expr.c @@ -22,67 +22,70 @@ int test__expr(struct test *t __maybe_unused, int subtest __maybe_unused) const char *p; double val; int ret; - struct expr_parse_ctx ctx; + struct expr_parse_ctx *ctx; - expr__ctx_init(&ctx); - expr__add_id_val(&ctx, strdup("FOO"), 1); - expr__add_id_val(&ctx, strdup("BAR"), 2); + ctx = expr__ctx_new(); + TEST_ASSERT_VAL("expr__ctx_new", ctx); + expr__add_id_val(ctx, strdup("FOO"), 1); + expr__add_id_val(ctx, strdup("BAR"), 2); - ret = test(&ctx, "1+1", 2); - ret |= test(&ctx, "FOO+BAR", 3); - ret |= test(&ctx, "(BAR/2)%2", 1); - ret |= test(&ctx, "1 - -4", 5); - ret |= test(&ctx, "(FOO-1)*2 + (BAR/2)%2 - -4", 5); - ret |= test(&ctx, "1-1 | 1", 1); - ret |= test(&ctx, "1-1 & 1", 0); - ret |= test(&ctx, "min(1,2) + 1", 2); - ret |= test(&ctx, "max(1,2) + 1", 3); - ret |= test(&ctx, "1+1 if 3*4 else 0", 2); - ret |= test(&ctx, "1.1 + 2.1", 3.2); - ret |= test(&ctx, ".1 + 2.", 2.1); - ret |= test(&ctx, "d_ratio(1, 2)", 0.5); - ret |= test(&ctx, "d_ratio(2.5, 0)", 0); - ret |= test(&ctx, "1.1 < 2.2", 1); - ret |= test(&ctx, "2.2 > 1.1", 1); - ret |= test(&ctx, "1.1 < 1.1", 0); - ret |= test(&ctx, "2.2 > 2.2", 0); - ret |= test(&ctx, "2.2 < 1.1", 0); - ret |= test(&ctx, "1.1 > 2.2", 0); + ret = test(ctx, "1+1", 2); + ret |= test(ctx, "FOO+BAR", 3); + ret |= test(ctx, "(BAR/2)%2", 1); + ret |= test(ctx, "1 - -4", 5); + ret |= test(ctx, "(FOO-1)*2 + (BAR/2)%2 - -4", 5); + ret |= test(ctx, "1-1 | 1", 1); + ret |= test(ctx, "1-1 & 1", 0); + ret |= test(ctx, "min(1,2) + 1", 2); + ret |= test(ctx, "max(1,2) + 1", 3); + ret |= test(ctx, "1+1 if 3*4 else 0", 2); + ret |= test(ctx, "1.1 + 2.1", 3.2); + ret |= test(ctx, ".1 + 2.", 2.1); + ret |= test(ctx, "d_ratio(1, 2)", 0.5); + ret |= test(ctx, "d_ratio(2.5, 0)", 0); + ret |= test(ctx, "1.1 < 2.2", 1); + ret |= test(ctx, "2.2 > 1.1", 1); + ret |= test(ctx, "1.1 < 1.1", 0); + ret |= test(ctx, "2.2 > 2.2", 0); + ret |= test(ctx, "2.2 < 1.1", 0); + ret |= test(ctx, "1.1 > 2.2", 0); - if (ret) + if (ret) { + expr__ctx_free(ctx); return ret; + } p = "FOO/0"; - ret = expr__parse(&val, &ctx, p, 1); + ret = expr__parse(&val, ctx, p, 1); TEST_ASSERT_VAL("division by zero", ret == -1); p = "BAR/"; - ret = expr__parse(&val, &ctx, p, 1); + ret = expr__parse(&val, ctx, p, 1); TEST_ASSERT_VAL("missing operand", ret == -1); - expr__ctx_clear(&ctx); + expr__ctx_clear(ctx); TEST_ASSERT_VAL("find other", expr__find_other("FOO + BAR + BAZ + BOZO", "FOO", - &ctx, 1) == 0); - TEST_ASSERT_VAL("find other", hashmap__size(&ctx.ids) == 3); - TEST_ASSERT_VAL("find other", hashmap__find(&ctx.ids, "BAR", + ctx, 1) == 0); + TEST_ASSERT_VAL("find other", hashmap__size(ctx->ids) == 3); + TEST_ASSERT_VAL("find other", hashmap__find(ctx->ids, "BAR", (void **)&val_ptr)); - TEST_ASSERT_VAL("find other", hashmap__find(&ctx.ids, "BAZ", + TEST_ASSERT_VAL("find other", hashmap__find(ctx->ids, "BAZ", (void **)&val_ptr)); - TEST_ASSERT_VAL("find other", hashmap__find(&ctx.ids, "BOZO", + TEST_ASSERT_VAL("find other", hashmap__find(ctx->ids, "BOZO", (void **)&val_ptr)); - expr__ctx_clear(&ctx); + expr__ctx_clear(ctx); TEST_ASSERT_VAL("find other", expr__find_other("EVENT1\\,param\\=?@ + EVENT2\\,param\\=?@", - NULL, &ctx, 3) == 0); - TEST_ASSERT_VAL("find other", hashmap__size(&ctx.ids) == 2); - TEST_ASSERT_VAL("find other", hashmap__find(&ctx.ids, "EVENT1,param=3/", + NULL, ctx, 3) == 0); + TEST_ASSERT_VAL("find other", hashmap__size(ctx->ids) == 2); + TEST_ASSERT_VAL("find other", hashmap__find(ctx->ids, "EVENT1,param=3/", (void **)&val_ptr)); - TEST_ASSERT_VAL("find other", hashmap__find(&ctx.ids, "EVENT2,param=3/", + TEST_ASSERT_VAL("find other", hashmap__find(ctx->ids, "EVENT2,param=3/", (void **)&val_ptr)); - expr__ctx_clear(&ctx); + expr__ctx_free(ctx); return 0; } diff --git a/tools/perf/tests/pmu-events.c b/tools/perf/tests/pmu-events.c index f14266a4c513..91076ab0514b 100644 --- a/tools/perf/tests/pmu-events.c +++ b/tools/perf/tests/pmu-events.c @@ -836,7 +836,7 @@ static int resolve_metric_simple(struct expr_parse_ctx *pctx, do { all = true; - hashmap__for_each_entry_safe((&pctx->ids), cur, cur_tmp, bkt) { + hashmap__for_each_entry_safe(pctx->ids, cur, cur_tmp, bkt) { struct metric_ref *ref; struct pmu_event *pe; @@ -890,9 +890,14 @@ static int test_parsing(void) struct pmu_event *pe; int i, j, k; int ret = 0; - struct expr_parse_ctx ctx; + struct expr_parse_ctx *ctx; double result; + ctx = expr__ctx_new(); + if (!ctx) { + pr_debug("expr__ctx_new failed"); + return TEST_FAIL; + } i = 0; for (;;) { map = &pmu_events_map[i++]; @@ -910,15 +915,15 @@ static int test_parsing(void) break; if (!pe->metric_expr) continue; - expr__ctx_init(&ctx); - if (expr__find_other(pe->metric_expr, NULL, &ctx, 0) + expr__ctx_clear(ctx); + if (expr__find_other(pe->metric_expr, NULL, ctx, 0) < 0) { expr_failure("Parse other failed", map, pe); ret++; continue; } - if (resolve_metric_simple(&ctx, &compound_list, map, + if (resolve_metric_simple(ctx, &compound_list, map, pe->metric_name)) { expr_failure("Could not resolve metrics", map, pe); ret++; @@ -931,27 +936,27 @@ static int test_parsing(void) * make them unique. */ k = 1; - hashmap__for_each_entry((&ctx.ids), cur, bkt) - expr__add_id_val(&ctx, strdup(cur->key), k++); + hashmap__for_each_entry(ctx->ids, cur, bkt) + expr__add_id_val(ctx, strdup(cur->key), k++); - hashmap__for_each_entry((&ctx.ids), cur, bkt) { + hashmap__for_each_entry(ctx->ids, cur, bkt) { if (check_parse_cpu(cur->key, map == cpus_map, pe)) ret++; } list_for_each_entry_safe(metric, tmp, &compound_list, list) { - expr__add_ref(&ctx, &metric->metric_ref); + expr__add_ref(ctx, &metric->metric_ref); free(metric); } - if (expr__parse(&result, &ctx, pe->metric_expr, 0)) { + if (expr__parse(&result, ctx, pe->metric_expr, 0)) { expr_failure("Parse failed", map, pe); ret++; } - expr__ctx_clear(&ctx); } } + expr__ctx_free(ctx); /* TODO: fail when not ok */ exit: return ret == 0 ? TEST_OK : TEST_SKIP; @@ -971,7 +976,7 @@ static struct test_metric metrics[] = { static int metric_parse_fake(const char *str) { - struct expr_parse_ctx ctx; + struct expr_parse_ctx *ctx; struct hashmap_entry *cur; double result; int ret = -1; @@ -980,8 +985,12 @@ static int metric_parse_fake(const char *str) pr_debug("parsing '%s'\n", str); - expr__ctx_init(&ctx); - if (expr__find_other(str, NULL, &ctx, 0) < 0) { + ctx = expr__ctx_new(); + if (!ctx) { + pr_debug("expr__ctx_new failed"); + return TEST_FAIL; + } + if (expr__find_other(str, NULL, ctx, 0) < 0) { pr_err("expr__find_other failed\n"); return -1; } @@ -992,23 +1001,23 @@ static int metric_parse_fake(const char *str) * make them unique. */ i = 1; - hashmap__for_each_entry((&ctx.ids), cur, bkt) - expr__add_id_val(&ctx, strdup(cur->key), i++); + hashmap__for_each_entry(ctx->ids, cur, bkt) + expr__add_id_val(ctx, strdup(cur->key), i++); - hashmap__for_each_entry((&ctx.ids), cur, bkt) { + hashmap__for_each_entry(ctx->ids, cur, bkt) { if (check_parse_fake(cur->key)) { pr_err("check_parse_fake failed\n"); goto out; } } - if (expr__parse(&result, &ctx, str, 0)) + if (expr__parse(&result, ctx, str, 0)) pr_err("expr__parse failed\n"); else ret = 0; out: - expr__ctx_clear(&ctx); + expr__ctx_free(ctx); return ret; } diff --git a/tools/perf/util/expr.c b/tools/perf/util/expr.c index a850fd0be3ee..7b1c06772a49 100644 --- a/tools/perf/util/expr.c +++ b/tools/perf/util/expr.c @@ -73,7 +73,7 @@ int expr__add_id(struct expr_parse_ctx *ctx, const char *id) data_ptr->parent = ctx->parent; data_ptr->kind = EXPR_ID_DATA__PARENT; - ret = hashmap__set(&ctx->ids, id, data_ptr, + ret = hashmap__set(ctx->ids, id, data_ptr, (const void **)&old_key, (void **)&old_data); if (ret) free(data_ptr); @@ -95,7 +95,7 @@ int expr__add_id_val(struct expr_parse_ctx *ctx, const char *id, double val) data_ptr->val = val; data_ptr->kind = EXPR_ID_DATA__VALUE; - ret = hashmap__set(&ctx->ids, id, data_ptr, + ret = hashmap__set(ctx->ids, id, data_ptr, (const void **)&old_key, (void **)&old_data); if (ret) free(data_ptr); @@ -140,7 +140,7 @@ int expr__add_ref(struct expr_parse_ctx *ctx, struct metric_ref *ref) data_ptr->ref.metric_expr = ref->metric_expr; data_ptr->kind = EXPR_ID_DATA__REF; - ret = hashmap__set(&ctx->ids, name, data_ptr, + ret = hashmap__set(ctx->ids, name, data_ptr, (const void **)&old_key, (void **)&old_data); if (ret) free(data_ptr); @@ -156,7 +156,7 @@ int expr__add_ref(struct expr_parse_ctx *ctx, struct metric_ref *ref) int expr__get_id(struct expr_parse_ctx *ctx, const char *id, struct expr_id_data **data) { - return hashmap__find(&ctx->ids, id, (void **)data) ? 0 : -1; + return hashmap__find(ctx->ids, id, (void **)data) ? 0 : -1; } int expr__resolve_id(struct expr_parse_ctx *ctx, const char *id, @@ -205,15 +205,23 @@ void expr__del_id(struct expr_parse_ctx *ctx, const char *id) struct expr_id_data *old_val = NULL; char *old_key = NULL; - hashmap__delete(&ctx->ids, id, + hashmap__delete(ctx->ids, id, (const void **)&old_key, (void **)&old_val); free(old_key); free(old_val); } -void expr__ctx_init(struct expr_parse_ctx *ctx) +struct expr_parse_ctx *expr__ctx_new(void) { - hashmap__init(&ctx->ids, key_hash, key_equal, NULL); + struct expr_parse_ctx *ctx; + + ctx = malloc(sizeof(struct expr_parse_ctx)); + if (!ctx) + return NULL; + + ctx->ids = hashmap__new(key_hash, key_equal, NULL); + ctx->parent = NULL; + return ctx; } void expr__ctx_clear(struct expr_parse_ctx *ctx) @@ -221,11 +229,24 @@ void expr__ctx_clear(struct expr_parse_ctx *ctx) struct hashmap_entry *cur; size_t bkt; - hashmap__for_each_entry((&ctx->ids), cur, bkt) { + hashmap__for_each_entry(ctx->ids, cur, bkt) { + free((char *)cur->key); + free(cur->value); + } + hashmap__clear(ctx->ids); +} + +void expr__ctx_free(struct expr_parse_ctx *ctx) +{ + struct hashmap_entry *cur; + size_t bkt; + + hashmap__for_each_entry(ctx->ids, cur, bkt) { free((char *)cur->key); free(cur->value); } - hashmap__clear(&ctx->ids); + hashmap__free(ctx->ids); + free(ctx); } static int diff --git a/tools/perf/util/expr.h b/tools/perf/util/expr.h index 85df3e4771e4..5fa394f10418 100644 --- a/tools/perf/util/expr.h +++ b/tools/perf/util/expr.h @@ -19,7 +19,7 @@ struct expr_id { }; struct expr_parse_ctx { - struct hashmap ids; + struct hashmap *ids; struct expr_id *parent; }; @@ -30,8 +30,9 @@ struct expr_scanner_ctx { int runtime; }; -void expr__ctx_init(struct expr_parse_ctx *ctx); +struct expr_parse_ctx *expr__ctx_new(void); void expr__ctx_clear(struct expr_parse_ctx *ctx); +void expr__ctx_free(struct expr_parse_ctx *ctx); void expr__del_id(struct expr_parse_ctx *ctx, const char *id); int expr__add_id(struct expr_parse_ctx *ctx, const char *id); int expr__add_id_val(struct expr_parse_ctx *ctx, const char *id, double val); diff --git a/tools/perf/util/metricgroup.c b/tools/perf/util/metricgroup.c index 29b747ac31c1..b7924a2f1f45 100644 --- a/tools/perf/util/metricgroup.c +++ b/tools/perf/util/metricgroup.c @@ -118,7 +118,7 @@ struct metric_ref_node { struct metric { struct list_head nd; - struct expr_parse_ctx pctx; + struct expr_parse_ctx *pctx; const char *metric_name; const char *metric_expr; const char *metric_unit; @@ -198,7 +198,7 @@ static struct evsel *find_evsel_group(struct evlist *perf_evlist, struct evsel *ev, *current_leader = NULL; struct expr_id_data *val_ptr; int i = 0, matched_events = 0, events_to_match; - const int idnum = (int)hashmap__size(&pctx->ids); + const int idnum = (int)hashmap__size(pctx->ids); /* * duration_time is always grouped separately, when events are grouped @@ -206,7 +206,7 @@ static struct evsel *find_evsel_group(struct evlist *perf_evlist, * add it to metric_events at the end. */ if (!has_constraint && - hashmap__find(&pctx->ids, "duration_time", (void **)&val_ptr)) + hashmap__find(pctx->ids, "duration_time", (void **)&val_ptr)) events_to_match = idnum - 1; else events_to_match = idnum; @@ -242,7 +242,7 @@ static struct evsel *find_evsel_group(struct evlist *perf_evlist, if (contains_event(metric_events, matched_events, ev->name)) continue; /* Does this event belong to the parse context? */ - if (hashmap__find(&pctx->ids, ev->name, (void **)&val_ptr)) + if (hashmap__find(pctx->ids, ev->name, (void **)&val_ptr)) metric_events[matched_events++] = ev; if (matched_events == events_to_match) @@ -322,12 +322,12 @@ static int metricgroup__setup_events(struct list_head *groups, struct metric_ref *metric_refs = NULL; metric_events = calloc(sizeof(void *), - hashmap__size(&m->pctx.ids) + 1); + hashmap__size(m->pctx->ids) + 1); if (!metric_events) { ret = -ENOMEM; break; } - evsel = find_evsel_group(perf_evlist, &m->pctx, + evsel = find_evsel_group(perf_evlist, m->pctx, metric_no_merge, m->has_constraint, metric_events, evlist_used); @@ -693,7 +693,7 @@ static void metricgroup__add_metric_weak_group(struct strbuf *events, size_t bkt; bool no_group = true, has_duration = false; - hashmap__for_each_entry((&ctx->ids), cur, bkt) { + hashmap__for_each_entry(ctx->ids, cur, bkt) { pr_debug("found event %s\n", (const char *)cur->key); /* * Duration time maps to a software event and can make @@ -724,7 +724,7 @@ static void metricgroup__add_metric_non_group(struct strbuf *events, size_t bkt; bool first = true; - hashmap__for_each_entry((&ctx->ids), cur, bkt) { + hashmap__for_each_entry(ctx->ids, cur, bkt) { if (!first) strbuf_addf(events, ","); strbuf_addf(events, "%s", (const char *)cur->key); @@ -799,7 +799,11 @@ static int __add_metric(struct list_head *metric_list, if (!m) return -ENOMEM; - expr__ctx_init(&m->pctx); + m->pctx = expr__ctx_new(); + if (!m->pctx) { + free(m); + return -ENOMEM; + } m->metric_name = pe->metric_name; m->metric_expr = pe->metric_expr; m->metric_unit = pe->unit; @@ -847,15 +851,15 @@ static int __add_metric(struct list_head *metric_list, /* Force all found IDs in metric to have us as parent ID. */ WARN_ON_ONCE(!parent); - m->pctx.parent = parent; + m->pctx->parent = parent; /* * For both the parent and referenced metrics, we parse * all the metric's IDs and add it to the parent context. */ - if (expr__find_other(pe->metric_expr, NULL, &m->pctx, runtime) < 0) { + if (expr__find_other(pe->metric_expr, NULL, m->pctx, runtime) < 0) { if (m->metric_refs_cnt == 0) { - expr__ctx_clear(&m->pctx); + expr__ctx_free(m->pctx); free(m); *mp = NULL; } @@ -878,8 +882,8 @@ static int __add_metric(struct list_head *metric_list, list_for_each_prev(pos, metric_list) { struct metric *old = list_entry(pos, struct metric, nd); - if (hashmap__size(&m->pctx.ids) <= - hashmap__size(&old->pctx.ids)) + if (hashmap__size(m->pctx->ids) <= + hashmap__size(old->pctx->ids)) break; } list_add(&m->nd, pos); @@ -927,7 +931,7 @@ static int recursion_check(struct metric *m, const char *id, struct expr_id **pa * if we already processed 'id', if we did, it's recursion * and we fail. */ - ret = expr__get_id(&m->pctx, id, &data); + ret = expr__get_id(m->pctx, id, &data); if (ret) return ret; @@ -982,7 +986,7 @@ static int __resolve_metric(struct metric *m, */ do { all = true; - hashmap__for_each_entry((&m->pctx.ids), cur, bkt) { + hashmap__for_each_entry(m->pctx->ids, cur, bkt) { struct expr_id *parent; struct pmu_event *pe; @@ -996,7 +1000,7 @@ static int __resolve_metric(struct metric *m, all = false; /* The metric key itself needs to go out.. */ - expr__del_id(&m->pctx, cur->key); + expr__del_id(m->pctx, cur->key); /* ... and it gets resolved to the parent context. */ ret = add_metric(metric_list, pe, metric_no_group, &m, parent, ids); @@ -1144,10 +1148,10 @@ static int metricgroup__add_metric(const char *metric, bool metric_no_group, if (m->has_constraint) { metricgroup__add_metric_non_group(events, - &m->pctx); + m->pctx); } else { metricgroup__add_metric_weak_group(events, - &m->pctx); + m->pctx); } } @@ -1210,7 +1214,7 @@ static void metricgroup__free_metrics(struct list_head *metric_list) list_for_each_entry_safe (m, tmp, metric_list, nd) { metric__free_refs(m); - expr__ctx_clear(&m->pctx); + expr__ctx_free(m->pctx); list_del_init(&m->nd); free(m); } diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c index 34a7f5c1fff7..c9fa07e49e72 100644 --- a/tools/perf/util/stat-shadow.c +++ b/tools/perf/util/stat-shadow.c @@ -1,8 +1,10 @@ // SPDX-License-Identifier: GPL-2.0 +#include #include #include "evsel.h" #include "stat.h" #include "color.h" +#include "debug.h" #include "pmu.h" #include "rblist.h" #include "evlist.h" @@ -370,12 +372,16 @@ void perf_stat__collect_metric_expr(struct evlist *evsel_list) { struct evsel *counter, *leader, **metric_events, *oc; bool found; - struct expr_parse_ctx ctx; + struct expr_parse_ctx *ctx; struct hashmap_entry *cur; size_t bkt; int i; - expr__ctx_init(&ctx); + ctx = expr__ctx_new(); + if (!ctx) { + pr_debug("expr__ctx_new failed"); + return; + } evlist__for_each_entry(evsel_list, counter) { bool invalid = false; @@ -383,25 +389,25 @@ void perf_stat__collect_metric_expr(struct evlist *evsel_list) if (!counter->metric_expr) continue; - expr__ctx_clear(&ctx); + expr__ctx_clear(ctx); metric_events = counter->metric_events; if (!metric_events) { if (expr__find_other(counter->metric_expr, counter->name, - &ctx, 1) < 0) + ctx, 1) < 0) continue; metric_events = calloc(sizeof(struct evsel *), - hashmap__size(&ctx.ids) + 1); + hashmap__size(ctx->ids) + 1); if (!metric_events) { - expr__ctx_clear(&ctx); + expr__ctx_free(ctx); return; } counter->metric_events = metric_events; } i = 0; - hashmap__for_each_entry((&ctx.ids), cur, bkt) { + hashmap__for_each_entry(ctx->ids, cur, bkt) { const char *metric_name = (const char *)cur->key; found = false; @@ -453,7 +459,7 @@ void perf_stat__collect_metric_expr(struct evlist *evsel_list) counter->metric_expr = NULL; } } - expr__ctx_clear(&ctx); + expr__ctx_free(ctx); } static double runtime_stat_avg(struct runtime_stat *st, @@ -818,7 +824,6 @@ static int prepare_metric(struct evsel **metric_events, char *n, *pn; int i, j, ret; - expr__ctx_init(pctx); for (i = 0; metric_events[i]; i++) { struct saved_value *v; struct stats *stats; @@ -880,17 +885,22 @@ static void generic_metric(struct perf_stat_config *config, struct runtime_stat *st) { print_metric_t print_metric = out->print_metric; - struct expr_parse_ctx pctx; + struct expr_parse_ctx *pctx; double ratio, scale; int i; void *ctxp = out->ctx; - i = prepare_metric(metric_events, metric_refs, &pctx, cpu, st); - if (i < 0) + pctx = expr__ctx_new(); + if (!pctx) return; + i = prepare_metric(metric_events, metric_refs, pctx, cpu, st); + if (i < 0) { + expr__ctx_free(pctx); + return; + } if (!metric_events[i]) { - if (expr__parse(&ratio, &pctx, metric_expr, runtime) == 0) { + if (expr__parse(&ratio, pctx, metric_expr, runtime) == 0) { char *unit; char metric_bf[64]; @@ -926,22 +936,26 @@ static void generic_metric(struct perf_stat_config *config, (metric_name ? metric_name : name) : "", 0); } - expr__ctx_clear(&pctx); + expr__ctx_free(pctx); } double test_generic_metric(struct metric_expr *mexp, int cpu, struct runtime_stat *st) { - struct expr_parse_ctx pctx; + struct expr_parse_ctx *pctx; double ratio = 0.0; - if (prepare_metric(mexp->metric_events, mexp->metric_refs, &pctx, cpu, st) < 0) + pctx = expr__ctx_new(); + if (!pctx) + return NAN; + + if (prepare_metric(mexp->metric_events, mexp->metric_refs, pctx, cpu, st) < 0) goto out; - if (expr__parse(&ratio, &pctx, mexp->metric_expr, 1)) + if (expr__parse(&ratio, pctx, mexp->metric_expr, 1)) ratio = 0.0; out: - expr__ctx_clear(&pctx); + expr__ctx_free(pctx); return ratio; } -- cgit v1.2.3 From edfe7f554ab8f083556c718ecbcefda509c46851 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Thu, 23 Sep 2021 00:46:05 -0700 Subject: perf metric: Use NAN for missing event IDs. If during computing a metric an event (id) is missing the parsing aborts. A later patch will make it so that events that aren't used in the output are deliberately omitted, in which case we don't want the abort. Modify the missing ID case to report NAN for these cases. Reviewed-by: Andi Kleen Signed-off-by: Ian Rogers Tested-by: John Garry Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Ingo Molnar Cc: Jin Yao Cc: Kajol Jain Cc: Mark Rutland Cc: Namhyung Kim Cc: Paul Clarke Cc: Peter Zijlstra Cc: Sandeep Dasgupta Cc: Stephane Eranian Link: https://lore.kernel.org/r/20210923074616.674826-3-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/expr.y | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/expr.y b/tools/perf/util/expr.y index b2ada8f8309a..41c9cd4efadd 100644 --- a/tools/perf/util/expr.y +++ b/tools/perf/util/expr.y @@ -1,6 +1,7 @@ /* Simple expression parser */ %{ #define YYDEBUG 1 +#include #include #include "util.h" #include "util/debug.h" @@ -88,12 +89,10 @@ expr: NUMBER | ID { struct expr_id_data *data; - if (expr__resolve_id(ctx, $1, &data)) { - free($1); - YYABORT; - } + $$ = NAN; + if (expr__resolve_id(ctx, $1, &data) == 0) + $$ = expr_id_data__value(data); - $$ = expr_id_data__value(data); free($1); } | expr '|' expr { $$ = (long)$1 | (long)$3; } -- cgit v1.2.3 From 7f8fdcbbbefb2d95259aa76a362689affafa4e4b Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Thu, 23 Sep 2021 00:46:06 -0700 Subject: perf expr: Remove unused headers and inline d_ratio No functional change. Inlining d_ratio makes it easier to special case for constants in a later patch. Signed-off-by: Ian Rogers Tested-by: John Garry Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: Ingo Molnar Cc: Kajol Jain Cc: Jin Yao Cc: Mark Rutland Cc: Namhyung Kim Cc: Paul Clarke Cc: Peter Zijlstra Cc: Sandeep Dasgupta Cc: Stephane Eranian Link: https://lore.kernel.org/r/20210923074616.674826-4-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/expr.y | 22 +++++++--------------- 1 file changed, 7 insertions(+), 15 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/expr.y b/tools/perf/util/expr.y index 41c9cd4efadd..e6005450feae 100644 --- a/tools/perf/util/expr.y +++ b/tools/perf/util/expr.y @@ -2,23 +2,10 @@ %{ #define YYDEBUG 1 #include -#include -#include "util.h" #include "util/debug.h" -#include // strtod() +#include "smt.h" #define IN_EXPR_Y 1 #include "expr.h" -#include "smt.h" -#include - -static double d_ratio(double val0, double val1) -{ - if (val1 == 0) { - return 0; - } - return val0 / val1; -} - %} %define api.pure full @@ -120,7 +107,12 @@ expr: NUMBER | MIN '(' expr ',' expr ')' { $$ = $3 < $5 ? $3 : $5; } | MAX '(' expr ',' expr ')' { $$ = $3 > $5 ? $3 : $5; } | SMT_ON { $$ = smt_on() > 0; } - | D_RATIO '(' expr ',' expr ')' { $$ = d_ratio($3,$5); } + | D_RATIO '(' expr ',' expr ')' { if ($5 == 0) { + $$ = 0; + } else { + $$ = $3 / $5; + } + } ; %% -- cgit v1.2.3 From aed0d6f8c6edab48be649a071e18d28efb1a203a Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Thu, 23 Sep 2021 00:46:07 -0700 Subject: perf expr: Separate token declataion from type No functional change, so the type of expr remains . A later patch will change the computation to be an aggregate type and making this change makes that later change smaller. Signed-off-by: Ian Rogers Tested-by: John Garry Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: Ingo Molnar Cc: Jin Yao Cc: Kajol Jain Cc: Mark Rutland Cc: Namhyung Kim Cc: Paul Clarke Cc: Peter Zijlstra Cc: Sandeep Dasgupta Cc: Stephane Eranian Link: https://lore.kernel.org/r/20210923074616.674826-5-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/expr.y | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/expr.y b/tools/perf/util/expr.y index e6005450feae..68b122e59b3f 100644 --- a/tools/perf/util/expr.y +++ b/tools/perf/util/expr.y @@ -20,11 +20,7 @@ char *str; } -%token EXPR_PARSE EXPR_OTHER EXPR_ERROR -%token NUMBER -%token ID -%destructor { free ($$); } -%token MIN MAX IF ELSE SMT_ON D_RATIO +%token ID NUMBER MIN MAX IF ELSE SMT_ON D_RATIO EXPR_ERROR EXPR_PARSE EXPR_OTHER %left MIN MAX IF %left '|' %left '^' @@ -33,6 +29,9 @@ %left '-' '+' %left '*' '/' '%' %left NEG NOT +%type NUMBER +%type ID +%destructor { free ($$); } %type expr if_expr %{ -- cgit v1.2.3 From e87576c5ac14e038ac96145d289bf0134eb08506 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Thu, 23 Sep 2021 00:46:08 -0700 Subject: perf expr: Use macros for operators No functional change, switch the operators to use macros so that additional complexity for constants can be added in a later change. Signed-off-by: Ian Rogers Tested-by: John Garry Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: Ingo Molnar Cc: Jin Yao Cc: Kajol Jain Cc: Mark Rutland Cc: Namhyung Kim Cc: Paul Clarke Cc: Peter Zijlstra Cc: Sandeep Dasgupta Cc: Stephane Eranian Link: https://lore.kernel.org/r/20210923074616.674826-6-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/expr.y | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/expr.y b/tools/perf/util/expr.y index 68b122e59b3f..5535badeef0a 100644 --- a/tools/perf/util/expr.y +++ b/tools/perf/util/expr.y @@ -43,6 +43,12 @@ static void expr_error(double *final_val __maybe_unused, pr_debug("%s\n", s); } +#define BINARY_LONG_OP(RESULT, OP, LHS, RHS) \ + RESULT = (long)LHS OP (long)RHS; + +#define BINARY_OP(RESULT, OP, LHS, RHS) \ + RESULT = LHS OP RHS; + %} %% @@ -81,14 +87,14 @@ expr: NUMBER free($1); } - | expr '|' expr { $$ = (long)$1 | (long)$3; } - | expr '&' expr { $$ = (long)$1 & (long)$3; } - | expr '^' expr { $$ = (long)$1 ^ (long)$3; } - | expr '<' expr { $$ = $1 < $3; } - | expr '>' expr { $$ = $1 > $3; } - | expr '+' expr { $$ = $1 + $3; } - | expr '-' expr { $$ = $1 - $3; } - | expr '*' expr { $$ = $1 * $3; } + | expr '|' expr { BINARY_LONG_OP($$, |, $1, $3); } + | expr '&' expr { BINARY_LONG_OP($$, &, $1, $3); } + | expr '^' expr { BINARY_LONG_OP($$, ^, $1, $3); } + | expr '<' expr { BINARY_OP($$, <, $1, $3); } + | expr '>' expr { BINARY_OP($$, >, $1, $3); } + | expr '+' expr { BINARY_OP($$, +, $1, $3); } + | expr '-' expr { BINARY_OP($$, -, $1, $3); } + | expr '*' expr { BINARY_OP($$, *, $1, $3); } | expr '/' expr { if ($3 == 0) { pr_debug("division by zero\n"); YYABORT; -- cgit v1.2.3 From c924e0cc0576b4e45b9c495174cd785aa41d51ad Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Thu, 23 Sep 2021 00:46:09 -0700 Subject: perf expr: Move actions to the left. No functional change, just modifying whitespace. This creates additional space for adding logic to actions in later changes. Signed-off-by: Ian Rogers Tested-by: John Garry Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: Ingo Molnar Cc: Jin Yao Cc: Kajol Jain Cc: Mark Rutland Cc: Namhyung Kim Cc: Paul Clarke Cc: Peter Zijlstra Cc: Sandeep Dasgupta Cc: Stephane Eranian Link: https://lore.kernel.org/r/20210923074616.674826-7-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/expr.y | 124 ++++++++++++++++++++++++++++++------------------- 1 file changed, 75 insertions(+), 49 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/expr.y b/tools/perf/util/expr.y index 5535badeef0a..78cbe377eb0e 100644 --- a/tools/perf/util/expr.y +++ b/tools/perf/util/expr.y @@ -70,54 +70,80 @@ MIN | MAX | IF | ELSE | SMT_ON | NUMBER | '|' | '^' | '&' | '-' | '+' | '*' | '/ '<' | '>' | D_RATIO all_expr: if_expr { *final_val = $1; } - ; - -if_expr: - expr IF expr ELSE expr { $$ = $3 ? $1 : $5; } - | expr - ; - -expr: NUMBER - | ID { - struct expr_id_data *data; - - $$ = NAN; - if (expr__resolve_id(ctx, $1, &data) == 0) - $$ = expr_id_data__value(data); - - free($1); - } - | expr '|' expr { BINARY_LONG_OP($$, |, $1, $3); } - | expr '&' expr { BINARY_LONG_OP($$, &, $1, $3); } - | expr '^' expr { BINARY_LONG_OP($$, ^, $1, $3); } - | expr '<' expr { BINARY_OP($$, <, $1, $3); } - | expr '>' expr { BINARY_OP($$, >, $1, $3); } - | expr '+' expr { BINARY_OP($$, +, $1, $3); } - | expr '-' expr { BINARY_OP($$, -, $1, $3); } - | expr '*' expr { BINARY_OP($$, *, $1, $3); } - | expr '/' expr { if ($3 == 0) { - pr_debug("division by zero\n"); - YYABORT; - } - $$ = $1 / $3; - } - | expr '%' expr { if ((long)$3 == 0) { - pr_debug("division by zero\n"); - YYABORT; - } - $$ = (long)$1 % (long)$3; - } - | '-' expr %prec NEG { $$ = -$2; } - | '(' if_expr ')' { $$ = $2; } - | MIN '(' expr ',' expr ')' { $$ = $3 < $5 ? $3 : $5; } - | MAX '(' expr ',' expr ')' { $$ = $3 > $5 ? $3 : $5; } - | SMT_ON { $$ = smt_on() > 0; } - | D_RATIO '(' expr ',' expr ')' { if ($5 == 0) { - $$ = 0; - } else { - $$ = $3 / $5; - } - } - ; + +if_expr: expr IF expr ELSE expr +{ + $$ = $3 ? $1 : $5; +} +| expr +; + +expr: NUMBER +{ + $$ = $1; +} +| ID +{ + struct expr_id_data *data; + + $$ = NAN; + if (expr__resolve_id(ctx, $1, &data) == 0) + $$ = expr_id_data__value(data); + + free($1); +} +| expr '|' expr { BINARY_LONG_OP($$, |, $1, $3); } +| expr '&' expr { BINARY_LONG_OP($$, &, $1, $3); } +| expr '^' expr { BINARY_LONG_OP($$, ^, $1, $3); } +| expr '<' expr { BINARY_OP($$, <, $1, $3); } +| expr '>' expr { BINARY_OP($$, >, $1, $3); } +| expr '+' expr { BINARY_OP($$, +, $1, $3); } +| expr '-' expr { BINARY_OP($$, -, $1, $3); } +| expr '*' expr { BINARY_OP($$, *, $1, $3); } +| expr '/' expr +{ + if ($3 == 0) { + pr_debug("division by zero\n"); + YYABORT; + } + $$ = $1 / $3; +} +| expr '%' expr +{ + if ((long)$3 == 0) { + pr_debug("division by zero\n"); + YYABORT; + } + $$ = (long)$1 % (long)$3; +} +| D_RATIO '(' expr ',' expr ')' +{ + if ($5 == 0) { + $$ = 0; + } else { + $$ = $3 / $5; + } +} +| '-' expr %prec NEG +{ + $$ = -$2; +} +| '(' if_expr ')' +{ + $$ = $2; +} +| MIN '(' expr ',' expr ')' +{ + $$ = $3 < $5 ? $3 : $5; +} +| MAX '(' expr ',' expr ')' +{ + $$ = $3 > $5 ? $3 : $5; +} +| SMT_ON +{ + $$ = smt_on() > 0 ? 1.0 : 0.0; +} +; %% -- cgit v1.2.3 From 7e06a5e30a0c5155291efab8cf866ffea052f829 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Thu, 23 Sep 2021 00:46:10 -0700 Subject: perf metric: Rename expr__find_other. A later change will remove the notion of other, rename the function to expr__find_ids as this is what it populates. Signed-off-by: Ian Rogers Tested-by: John Garry Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: Ingo Molnar Cc: Jin Yao Cc: Kajol Jain Cc: Mark Rutland Cc: Namhyung Kim Cc: Paul Clarke Cc: Peter Zijlstra Cc: Sandeep Dasgupta Cc: Stephane Eranian Link: https://lore.kernel.org/r/20210923074616.674826-8-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/expr.c | 26 +++++++++++++------------- tools/perf/tests/pmu-events.c | 11 +++++------ tools/perf/util/expr.c | 4 ++-- tools/perf/util/expr.h | 2 +- tools/perf/util/metricgroup.c | 2 +- tools/perf/util/stat-shadow.c | 6 +++--- 6 files changed, 25 insertions(+), 26 deletions(-) (limited to 'tools') diff --git a/tools/perf/tests/expr.c b/tools/perf/tests/expr.c index b0a3b5fd0c00..7ccb97c73347 100644 --- a/tools/perf/tests/expr.c +++ b/tools/perf/tests/expr.c @@ -64,25 +64,25 @@ int test__expr(struct test *t __maybe_unused, int subtest __maybe_unused) TEST_ASSERT_VAL("missing operand", ret == -1); expr__ctx_clear(ctx); - TEST_ASSERT_VAL("find other", - expr__find_other("FOO + BAR + BAZ + BOZO", "FOO", - ctx, 1) == 0); - TEST_ASSERT_VAL("find other", hashmap__size(ctx->ids) == 3); - TEST_ASSERT_VAL("find other", hashmap__find(ctx->ids, "BAR", + TEST_ASSERT_VAL("find ids", + expr__find_ids("FOO + BAR + BAZ + BOZO", "FOO", + ctx, 1) == 0); + TEST_ASSERT_VAL("find ids", hashmap__size(ctx->ids) == 3); + TEST_ASSERT_VAL("find ids", hashmap__find(ctx->ids, "BAR", (void **)&val_ptr)); - TEST_ASSERT_VAL("find other", hashmap__find(ctx->ids, "BAZ", + TEST_ASSERT_VAL("find ids", hashmap__find(ctx->ids, "BAZ", (void **)&val_ptr)); - TEST_ASSERT_VAL("find other", hashmap__find(ctx->ids, "BOZO", + TEST_ASSERT_VAL("find ids", hashmap__find(ctx->ids, "BOZO", (void **)&val_ptr)); expr__ctx_clear(ctx); - TEST_ASSERT_VAL("find other", - expr__find_other("EVENT1\\,param\\=?@ + EVENT2\\,param\\=?@", - NULL, ctx, 3) == 0); - TEST_ASSERT_VAL("find other", hashmap__size(ctx->ids) == 2); - TEST_ASSERT_VAL("find other", hashmap__find(ctx->ids, "EVENT1,param=3/", + TEST_ASSERT_VAL("find ids", + expr__find_ids("EVENT1\\,param\\=?@ + EVENT2\\,param\\=?@", + NULL, ctx, 3) == 0); + TEST_ASSERT_VAL("find ids", hashmap__size(ctx->ids) == 2); + TEST_ASSERT_VAL("find ids", hashmap__find(ctx->ids, "EVENT1,param=3/", (void **)&val_ptr)); - TEST_ASSERT_VAL("find other", hashmap__find(ctx->ids, "EVENT2,param=3/", + TEST_ASSERT_VAL("find ids", hashmap__find(ctx->ids, "EVENT2,param=3/", (void **)&val_ptr)); expr__ctx_free(ctx); diff --git a/tools/perf/tests/pmu-events.c b/tools/perf/tests/pmu-events.c index 91076ab0514b..d3534960ed25 100644 --- a/tools/perf/tests/pmu-events.c +++ b/tools/perf/tests/pmu-events.c @@ -866,7 +866,7 @@ static int resolve_metric_simple(struct expr_parse_ctx *pctx, ref->metric_expr = pe->metric_expr; list_add_tail(&metric->list, compound_list); - rc = expr__find_other(pe->metric_expr, NULL, pctx, 0); + rc = expr__find_ids(pe->metric_expr, NULL, pctx, 0); if (rc) goto out_err; break; /* The hashmap has been modified, so restart */ @@ -916,9 +916,8 @@ static int test_parsing(void) if (!pe->metric_expr) continue; expr__ctx_clear(ctx); - if (expr__find_other(pe->metric_expr, NULL, ctx, 0) - < 0) { - expr_failure("Parse other failed", map, pe); + if (expr__find_ids(pe->metric_expr, NULL, ctx, 0) < 0) { + expr_failure("Parse find ids failed", map, pe); ret++; continue; } @@ -990,8 +989,8 @@ static int metric_parse_fake(const char *str) pr_debug("expr__ctx_new failed"); return TEST_FAIL; } - if (expr__find_other(str, NULL, ctx, 0) < 0) { - pr_err("expr__find_other failed\n"); + if (expr__find_ids(str, NULL, ctx, 0) < 0) { + pr_err("expr__find_ids failed\n"); return -1; } diff --git a/tools/perf/util/expr.c b/tools/perf/util/expr.c index 7b1c06772a49..adf16bb7571a 100644 --- a/tools/perf/util/expr.c +++ b/tools/perf/util/expr.c @@ -288,8 +288,8 @@ int expr__parse(double *final_val, struct expr_parse_ctx *ctx, return __expr__parse(final_val, ctx, expr, EXPR_PARSE, runtime) ? -1 : 0; } -int expr__find_other(const char *expr, const char *one, - struct expr_parse_ctx *ctx, int runtime) +int expr__find_ids(const char *expr, const char *one, + struct expr_parse_ctx *ctx, int runtime) { int ret = __expr__parse(NULL, ctx, expr, EXPR_OTHER, runtime); diff --git a/tools/perf/util/expr.h b/tools/perf/util/expr.h index 5fa394f10418..de109c2ab917 100644 --- a/tools/perf/util/expr.h +++ b/tools/perf/util/expr.h @@ -43,7 +43,7 @@ int expr__resolve_id(struct expr_parse_ctx *ctx, const char *id, struct expr_id_data **datap); int expr__parse(double *final_val, struct expr_parse_ctx *ctx, const char *expr, int runtime); -int expr__find_other(const char *expr, const char *one, +int expr__find_ids(const char *expr, const char *one, struct expr_parse_ctx *ids, int runtime); double expr_id_data__value(const struct expr_id_data *data); diff --git a/tools/perf/util/metricgroup.c b/tools/perf/util/metricgroup.c index b7924a2f1f45..046fb3fe1700 100644 --- a/tools/perf/util/metricgroup.c +++ b/tools/perf/util/metricgroup.c @@ -857,7 +857,7 @@ static int __add_metric(struct list_head *metric_list, * For both the parent and referenced metrics, we parse * all the metric's IDs and add it to the parent context. */ - if (expr__find_other(pe->metric_expr, NULL, m->pctx, runtime) < 0) { + if (expr__find_ids(pe->metric_expr, NULL, m->pctx, runtime) < 0) { if (m->metric_refs_cnt == 0) { expr__ctx_free(m->pctx); free(m); diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c index c9fa07e49e72..9bc841e09a0c 100644 --- a/tools/perf/util/stat-shadow.c +++ b/tools/perf/util/stat-shadow.c @@ -392,9 +392,9 @@ void perf_stat__collect_metric_expr(struct evlist *evsel_list) expr__ctx_clear(ctx); metric_events = counter->metric_events; if (!metric_events) { - if (expr__find_other(counter->metric_expr, - counter->name, - ctx, 1) < 0) + if (expr__find_ids(counter->metric_expr, + counter->name, + ctx, 1) < 0) continue; metric_events = calloc(sizeof(struct evsel *), -- cgit v1.2.3 From 114a9d6e396eeb061fa532803ff9a6fd3a966ad8 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Thu, 23 Sep 2021 00:46:11 -0700 Subject: perf metric: Add utilities to work on ids map. Add utilities to new/free an ids hashmap, as well as to union. Add testing of the union. Unioning hashmaps will be used when parsing the metric, if a value is known then the hashmap is unnecessary, otherwise we need to union together all the event ids to compute their values for reporting. Signed-off-by: Ian Rogers Tested-by: John Garry Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: Ingo Molnar Cc: Jin Yao Cc: Kajol Jain Cc: Mark Rutland Cc: Namhyung Kim Cc: Paul Clarke Cc: Peter Zijlstra Cc: Sandeep Dasgupta Cc: Stephane Eranian Link: https://lore.kernel.org/r/20210923074616.674826-9-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/expr.c | 47 ++++++++++++++++++++++++++++++++ tools/perf/util/expr.c | 71 ++++++++++++++++++++++++++++++++++++++++++++++--- tools/perf/util/expr.h | 12 +++++++++ 3 files changed, 126 insertions(+), 4 deletions(-) (limited to 'tools') diff --git a/tools/perf/tests/expr.c b/tools/perf/tests/expr.c index 7ccb97c73347..1c881bea7fca 100644 --- a/tools/perf/tests/expr.c +++ b/tools/perf/tests/expr.c @@ -6,6 +6,51 @@ #include #include +static int test_ids_union(void) +{ + struct hashmap *ids1, *ids2; + + /* Empty union. */ + ids1 = ids__new(); + TEST_ASSERT_VAL("ids__new", ids1); + ids2 = ids__new(); + TEST_ASSERT_VAL("ids__new", ids2); + + ids1 = ids__union(ids1, ids2); + TEST_ASSERT_EQUAL("union", (int)hashmap__size(ids1), 0); + + /* Union {foo, bar} against {}. */ + ids2 = ids__new(); + TEST_ASSERT_VAL("ids__new", ids2); + + TEST_ASSERT_EQUAL("ids__insert", ids__insert(ids1, strdup("foo"), NULL), 0); + TEST_ASSERT_EQUAL("ids__insert", ids__insert(ids1, strdup("bar"), NULL), 0); + + ids1 = ids__union(ids1, ids2); + TEST_ASSERT_EQUAL("union", (int)hashmap__size(ids1), 2); + + /* Union {foo, bar} against {foo}. */ + ids2 = ids__new(); + TEST_ASSERT_VAL("ids__new", ids2); + TEST_ASSERT_EQUAL("ids__insert", ids__insert(ids2, strdup("foo"), NULL), 0); + + ids1 = ids__union(ids1, ids2); + TEST_ASSERT_EQUAL("union", (int)hashmap__size(ids1), 2); + + /* Union {foo, bar} against {bar,baz}. */ + ids2 = ids__new(); + TEST_ASSERT_VAL("ids__new", ids2); + TEST_ASSERT_EQUAL("ids__insert", ids__insert(ids2, strdup("bar"), NULL), 0); + TEST_ASSERT_EQUAL("ids__insert", ids__insert(ids2, strdup("baz"), NULL), 0); + + ids1 = ids__union(ids1, ids2); + TEST_ASSERT_EQUAL("union", (int)hashmap__size(ids1), 3); + + ids__free(ids1); + + return 0; +} + static int test(struct expr_parse_ctx *ctx, const char *e, double val2) { double val; @@ -24,6 +69,8 @@ int test__expr(struct test *t __maybe_unused, int subtest __maybe_unused) int ret; struct expr_parse_ctx *ctx; + TEST_ASSERT_EQUAL("ids_union", test_ids_union(), 0); + ctx = expr__ctx_new(); TEST_ASSERT_VAL("expr__ctx_new", ctx); expr__add_id_val(ctx, strdup("FOO"), 1); diff --git a/tools/perf/util/expr.c b/tools/perf/util/expr.c index adf16bb7571a..81101be51044 100644 --- a/tools/perf/util/expr.c +++ b/tools/perf/util/expr.c @@ -59,8 +59,29 @@ static bool key_equal(const void *key1, const void *key2, return !strcmp((const char *)key1, (const char *)key2); } -/* Caller must make sure id is allocated */ -int expr__add_id(struct expr_parse_ctx *ctx, const char *id) +struct hashmap *ids__new(void) +{ + return hashmap__new(key_hash, key_equal, NULL); +} + +void ids__free(struct hashmap *ids) +{ + struct hashmap_entry *cur; + size_t bkt; + + if (ids == NULL) + return; + + hashmap__for_each_entry(ids, cur, bkt) { + free((char *)cur->key); + free(cur->value); + } + + hashmap__free(ids); +} + +int ids__insert(struct hashmap *ids, const char *id, + struct expr_id *parent) { struct expr_id_data *data_ptr = NULL, *old_data = NULL; char *old_key = NULL; @@ -70,10 +91,10 @@ int expr__add_id(struct expr_parse_ctx *ctx, const char *id) if (!data_ptr) return -ENOMEM; - data_ptr->parent = ctx->parent; + data_ptr->parent = parent; data_ptr->kind = EXPR_ID_DATA__PARENT; - ret = hashmap__set(ctx->ids, id, data_ptr, + ret = hashmap__set(ids, id, data_ptr, (const void **)&old_key, (void **)&old_data); if (ret) free(data_ptr); @@ -82,6 +103,48 @@ int expr__add_id(struct expr_parse_ctx *ctx, const char *id) return ret; } +struct hashmap *ids__union(struct hashmap *ids1, struct hashmap *ids2) +{ + size_t bkt; + struct hashmap_entry *cur; + int ret; + struct expr_id_data *old_data = NULL; + char *old_key = NULL; + + if (!ids1) + return ids2; + + if (!ids2) + return ids1; + + if (hashmap__size(ids1) < hashmap__size(ids2)) { + struct hashmap *tmp = ids1; + + ids1 = ids2; + ids2 = tmp; + } + hashmap__for_each_entry(ids2, cur, bkt) { + ret = hashmap__set(ids1, cur->key, cur->value, + (const void **)&old_key, (void **)&old_data); + free(old_key); + free(old_data); + + if (ret) { + hashmap__free(ids1); + hashmap__free(ids2); + return NULL; + } + } + hashmap__free(ids2); + return ids1; +} + +/* Caller must make sure id is allocated */ +int expr__add_id(struct expr_parse_ctx *ctx, const char *id) +{ + return ids__insert(ctx->ids, id, ctx->parent); +} + /* Caller must make sure id is allocated */ int expr__add_id_val(struct expr_parse_ctx *ctx, const char *id, double val) { diff --git a/tools/perf/util/expr.h b/tools/perf/util/expr.h index de109c2ab917..4ed186bd1f13 100644 --- a/tools/perf/util/expr.h +++ b/tools/perf/util/expr.h @@ -30,9 +30,19 @@ struct expr_scanner_ctx { int runtime; }; +struct hashmap *ids__new(void); +void ids__free(struct hashmap *ids); +int ids__insert(struct hashmap *ids, const char *id, struct expr_id *parent); +/* + * Union two sets of ids (hashmaps) and construct a third, freeing ids1 and + * ids2. + */ +struct hashmap *ids__union(struct hashmap *ids1, struct hashmap *ids2); + struct expr_parse_ctx *expr__ctx_new(void); void expr__ctx_clear(struct expr_parse_ctx *ctx); void expr__ctx_free(struct expr_parse_ctx *ctx); + void expr__del_id(struct expr_parse_ctx *ctx, const char *id); int expr__add_id(struct expr_parse_ctx *ctx, const char *id); int expr__add_id_val(struct expr_parse_ctx *ctx, const char *id, double val); @@ -41,8 +51,10 @@ int expr__get_id(struct expr_parse_ctx *ctx, const char *id, struct expr_id_data **data); int expr__resolve_id(struct expr_parse_ctx *ctx, const char *id, struct expr_id_data **datap); + int expr__parse(double *final_val, struct expr_parse_ctx *ctx, const char *expr, int runtime); + int expr__find_ids(const char *expr, const char *one, struct expr_parse_ctx *ids, int runtime); -- cgit v1.2.3 From 762a05c561bcc36ccde6a25e268888383e6adb83 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Thu, 23 Sep 2021 00:46:12 -0700 Subject: perf metric: Allow metrics with no events A metric may be a constant value, for example, some SMT metrics are constant 0 if #smt_on is 0. If we eliminate all the events then there is no printing. Fix this by forcing metrics like this to have a duration_time tool event, previously the metric would fail when parsing the events with a parse error. Signed-off-by: Ian Rogers Tested-by: John Garry Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: Ingo Molnar Cc: Jin Yao Cc: Kajol Jain Cc: Mark Rutland Cc: Namhyung Kim Cc: Paul Clarke Cc: Peter Zijlstra Cc: Sandeep Dasgupta Cc: Stephane Eranian Link: https://lore.kernel.org/r/20210923074616.674826-10-irogers@google.com [ Reflow one __parse_events() call so that a ternary operation gets in a single line ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/metricgroup.c | 106 ++++++++++++++++++++++-------------------- 1 file changed, 56 insertions(+), 50 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/metricgroup.c b/tools/perf/util/metricgroup.c index 046fb3fe1700..8ba5370f5f64 100644 --- a/tools/perf/util/metricgroup.c +++ b/tools/perf/util/metricgroup.c @@ -198,65 +198,69 @@ static struct evsel *find_evsel_group(struct evlist *perf_evlist, struct evsel *ev, *current_leader = NULL; struct expr_id_data *val_ptr; int i = 0, matched_events = 0, events_to_match; - const int idnum = (int)hashmap__size(pctx->ids); + int idnum = (int)hashmap__size(pctx->ids); - /* - * duration_time is always grouped separately, when events are grouped - * (ie has_constraint is false) then ignore it in the matching loop and - * add it to metric_events at the end. - */ - if (!has_constraint && - hashmap__find(pctx->ids, "duration_time", (void **)&val_ptr)) - events_to_match = idnum - 1; - else - events_to_match = idnum; - - evlist__for_each_entry (perf_evlist, ev) { + if (idnum != 0) { /* - * Events with a constraint aren't grouped and match the first - * events available. + * duration_time is always grouped separately, when events are + * grouped (ie has_constraint is false) then ignore it in the + * matching loop and add it to metric_events at the end. */ - if (has_constraint && ev->weak_group) - continue; - /* Ignore event if already used and merging is disabled. */ - if (metric_no_merge && test_bit(ev->core.idx, evlist_used)) - continue; - if (!has_constraint && !evsel__has_leader(ev, current_leader)) { + events_to_match = idnum; + if (!has_constraint && hashmap__find(pctx->ids, "duration_time", (void **)&val_ptr)) + events_to_match--; + + evlist__for_each_entry(perf_evlist, ev) { + /* + * Events with a constraint aren't grouped and match the + * first events available. + */ + if (has_constraint && ev->weak_group) + continue; + /* Ignore event if already used and merging is disabled. */ + if (metric_no_merge && test_bit(ev->core.idx, evlist_used)) + continue; + if (!has_constraint && !evsel__has_leader(ev, current_leader)) { + /* + * Start of a new group, discard the whole match + * and start again. + */ + matched_events = 0; + memset(metric_events, 0, sizeof(struct evsel *) * idnum); + current_leader = evsel__leader(ev); + } /* - * Start of a new group, discard the whole match and - * start again. + * Check for duplicate events with the same name. For + * example, uncore_imc/cas_count_read/ will turn into 6 + * events per socket on skylakex. Only the first such + * event is placed in metric_events. If events aren't + * grouped then this also ensures that the same event in + * different sibling groups aren't both added to + * metric_events. */ - matched_events = 0; - memset(metric_events, 0, - sizeof(struct evsel *) * idnum); - current_leader = evsel__leader(ev); + if (contains_event(metric_events, matched_events, ev->name)) + continue; + /* Does this event belong to the parse context? */ + if (hashmap__find(pctx->ids, ev->name, (void **)&val_ptr)) + metric_events[matched_events++] = ev; + + if (matched_events == events_to_match) + break; } + } else { /* - * Check for duplicate events with the same name. For example, - * uncore_imc/cas_count_read/ will turn into 6 events per socket - * on skylakex. Only the first such event is placed in - * metric_events. If events aren't grouped then this also - * ensures that the same event in different sibling groups - * aren't both added to metric_events. + * There are no events to match, but we need to associate the + * metric with an event for printing. A duration_time event was + * parsed for this. */ - if (contains_event(metric_events, matched_events, ev->name)) - continue; - /* Does this event belong to the parse context? */ - if (hashmap__find(pctx->ids, ev->name, (void **)&val_ptr)) - metric_events[matched_events++] = ev; - - if (matched_events == events_to_match) - break; + idnum = 1; + events_to_match = 0; } - if (events_to_match != idnum) { /* Add the first duration_time. */ - evlist__for_each_entry(perf_evlist, ev) { - if (!strcmp(ev->name, "duration_time")) { - metric_events[matched_events++] = ev; - break; - } - } + ev = evlist__find_evsel_by_str(perf_evlist, "duration_time"); + if (ev) + metric_events[matched_events++] = ev; } if (matched_events != idnum) { @@ -320,9 +324,10 @@ static int metricgroup__setup_events(struct list_head *groups, list_for_each_entry (m, groups, nd) { struct evsel **metric_events; struct metric_ref *metric_refs = NULL; + const size_t ids_size = hashmap__size(m->pctx->ids); metric_events = calloc(sizeof(void *), - hashmap__size(m->pctx->ids) + 1); + ids_size == 0 ? 2 : ids_size + 1); if (!metric_events) { ret = -ENOMEM; break; @@ -1240,7 +1245,8 @@ static int parse_groups(struct evlist *perf_evlist, const char *str, goto out; pr_debug("adding %s\n", extra_events.buf); bzero(&parse_error, sizeof(parse_error)); - ret = __parse_events(perf_evlist, extra_events.buf, &parse_error, fake_pmu); + ret = __parse_events(perf_evlist, extra_events.len > 0 ? extra_events.buf : "duration_time", + &parse_error, fake_pmu); if (ret) { parse_events_print_error(&parse_error, extra_events.buf); goto out; -- cgit v1.2.3 From 3f965a7df09d7eebde0020cefe427219afe7df4a Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Thu, 23 Sep 2021 00:46:13 -0700 Subject: perf expr: Merge find_ids and regular parsing Add a new option to parsing that the set of IDs being used should be computed, this means every action needs to handle the compute_ids and regular case. This means actions yield a new ids type is a set of ids or the value being computed. Use the compute_ids case to replace find IDs parsing. Signed-off-by: Ian Rogers Tested-by: John Garry Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: Ingo Molnar Cc: Jin Yao Cc: Kajol Jain Cc: Mark Rutland Cc: Namhyung Kim Cc: Paul Clarke Cc: Peter Zijlstra Cc: Sandeep Dasgupta Cc: Stephane Eranian Link: https://lore.kernel.org/r/20210923074616.674826-11-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/expr.c | 9 ++- tools/perf/util/expr.h | 1 - tools/perf/util/expr.l | 9 --- tools/perf/util/expr.y | 176 ++++++++++++++++++++++++++++++++++++------------- 4 files changed, 136 insertions(+), 59 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/expr.c b/tools/perf/util/expr.c index 81101be51044..db2445677c8c 100644 --- a/tools/perf/util/expr.c +++ b/tools/perf/util/expr.c @@ -314,10 +314,9 @@ void expr__ctx_free(struct expr_parse_ctx *ctx) static int __expr__parse(double *val, struct expr_parse_ctx *ctx, const char *expr, - int start, int runtime) + bool compute_ids, int runtime) { struct expr_scanner_ctx scanner_ctx = { - .start_token = start, .runtime = runtime, }; YY_BUFFER_STATE buffer; @@ -337,7 +336,7 @@ __expr__parse(double *val, struct expr_parse_ctx *ctx, const char *expr, expr_set_debug(1, scanner); #endif - ret = expr_parse(val, ctx, scanner); + ret = expr_parse(val, ctx, compute_ids, scanner); expr__flush_buffer(buffer, scanner); expr__delete_buffer(buffer, scanner); @@ -348,13 +347,13 @@ __expr__parse(double *val, struct expr_parse_ctx *ctx, const char *expr, int expr__parse(double *final_val, struct expr_parse_ctx *ctx, const char *expr, int runtime) { - return __expr__parse(final_val, ctx, expr, EXPR_PARSE, runtime) ? -1 : 0; + return __expr__parse(final_val, ctx, expr, /*compute_ids=*/false, runtime) ? -1 : 0; } int expr__find_ids(const char *expr, const char *one, struct expr_parse_ctx *ctx, int runtime) { - int ret = __expr__parse(NULL, ctx, expr, EXPR_OTHER, runtime); + int ret = __expr__parse(NULL, ctx, expr, /*compute_ids=*/true, runtime); if (one) expr__del_id(ctx, one); diff --git a/tools/perf/util/expr.h b/tools/perf/util/expr.h index 4ed186bd1f13..b20513f0ae59 100644 --- a/tools/perf/util/expr.h +++ b/tools/perf/util/expr.h @@ -26,7 +26,6 @@ struct expr_parse_ctx { struct expr_id_data; struct expr_scanner_ctx { - int start_token; int runtime; }; diff --git a/tools/perf/util/expr.l b/tools/perf/util/expr.l index 13e5e3c75f56..702fdf6456ca 100644 --- a/tools/perf/util/expr.l +++ b/tools/perf/util/expr.l @@ -91,15 +91,6 @@ symbol ({spec}|{sym})+ %% struct expr_scanner_ctx *sctx = expr_get_extra(yyscanner); - { - int start_token = sctx->start_token; - - if (sctx->start_token) { - sctx->start_token = 0; - return start_token; - } - } - d_ratio { return D_RATIO; } max { return MAX; } min { return MIN; } diff --git a/tools/perf/util/expr.y b/tools/perf/util/expr.y index 78cbe377eb0e..6aeead54760a 100644 --- a/tools/perf/util/expr.y +++ b/tools/perf/util/expr.y @@ -1,6 +1,7 @@ /* Simple expression parser */ %{ #define YYDEBUG 1 +#include #include #include "util/debug.h" #include "smt.h" @@ -12,15 +13,31 @@ %parse-param { double *final_val } %parse-param { struct expr_parse_ctx *ctx } +%parse-param { bool compute_ids } %parse-param {void *scanner} %lex-param {void* scanner} %union { double num; char *str; + struct ids { + /* + * When creating ids, holds the working set of event ids. NULL + * implies the set is empty. + */ + struct hashmap *ids; + /* + * The metric value. When not creating ids this is the value + * read from a counter, a constant or some computed value. When + * creating ids the value is either a constant or BOTTOM. NAN is + * used as the special BOTTOM value, representing a "set of all + * values" case. + */ + double val; + } ids; } -%token ID NUMBER MIN MAX IF ELSE SMT_ON D_RATIO EXPR_ERROR EXPR_PARSE EXPR_OTHER +%token ID NUMBER MIN MAX IF ELSE SMT_ON D_RATIO EXPR_ERROR %left MIN MAX IF %left '|' %left '^' @@ -32,65 +49,109 @@ %type NUMBER %type ID %destructor { free ($$); } -%type expr if_expr +%type expr if_expr +%destructor { ids__free($$.ids); } %{ static void expr_error(double *final_val __maybe_unused, struct expr_parse_ctx *ctx __maybe_unused, + bool compute_ids __maybe_unused, void *scanner, const char *s) { pr_debug("%s\n", s); } +/* + * During compute ids, the special "bottom" value uses NAN to represent the set + * of all values. NAN is selected as it isn't a useful constant value. + */ +#define BOTTOM NAN + +static struct ids union_expr(struct ids ids1, struct ids ids2) +{ + struct ids result = { + .val = BOTTOM, + .ids = ids__union(ids1.ids, ids2.ids), + }; + return result; +} + #define BINARY_LONG_OP(RESULT, OP, LHS, RHS) \ - RESULT = (long)LHS OP (long)RHS; + if (!compute_ids) { \ + RESULT.val = (long)LHS.val OP (long)RHS.val; \ + RESULT.ids = NULL; \ + } else { \ + RESULT = union_expr(LHS, RHS); \ + } #define BINARY_OP(RESULT, OP, LHS, RHS) \ - RESULT = LHS OP RHS; + if (!compute_ids) { \ + RESULT.val = LHS.val OP RHS.val; \ + RESULT.ids = NULL; \ + } else { \ + RESULT = union_expr(LHS, RHS); \ + } %} %% -start: -EXPR_PARSE all_expr -| -EXPR_OTHER all_other - -all_other: all_other other -| - -other: ID +start: if_expr { - expr__add_id(ctx, $1); -} -| -MIN | MAX | IF | ELSE | SMT_ON | NUMBER | '|' | '^' | '&' | '-' | '+' | '*' | '/' | '%' | '(' | ')' | ',' -| -'<' | '>' | D_RATIO + if (compute_ids) + ctx->ids = ids__union($1.ids, ctx->ids); -all_expr: if_expr { *final_val = $1; } + if (final_val) + *final_val = $1.val; +} +; if_expr: expr IF expr ELSE expr { - $$ = $3 ? $1 : $5; + if (!compute_ids) { + $$.ids = NULL; + if (fpclassify($3.val) == FP_ZERO) { + $$.val = $5.val; + } else { + $$.val = $1.val; + } + } else { + $$ = union_expr($1, union_expr($3, $5)); + } } | expr ; expr: NUMBER { - $$ = $1; + $$.val = $1; + $$.ids = NULL; } | ID { - struct expr_id_data *data; - - $$ = NAN; - if (expr__resolve_id(ctx, $1, &data) == 0) - $$ = expr_id_data__value(data); - - free($1); + if (!compute_ids) { + /* + * Compute the event's value from ID. If the ID isn't known then + * it isn't used to compute the formula so set to NAN. + */ + struct expr_id_data *data; + + $$.val = NAN; + if (expr__resolve_id(ctx, $1, &data) == 0) + $$.val = expr_id_data__value(data); + + $$.ids = NULL; + free($1); + } else { + /* + * Set the value to BOTTOM to show that any value is possible + * when the event is computed. Create a set of just the ID. + */ + $$.val = BOTTOM; + $$.ids = ids__new(); + if (!$$.ids || ids__insert($$.ids, $1, ctx->parent)) + YYABORT; + } } | expr '|' expr { BINARY_LONG_OP($$, |, $1, $3); } | expr '&' expr { BINARY_LONG_OP($$, &, $1, $3); } @@ -102,31 +163,47 @@ expr: NUMBER | expr '*' expr { BINARY_OP($$, *, $1, $3); } | expr '/' expr { - if ($3 == 0) { - pr_debug("division by zero\n"); - YYABORT; + if (!compute_ids) { + if (fpclassify($3.val) == FP_ZERO) { + pr_debug("division by zero\n"); + YYABORT; + } + $$.val = $1.val / $3.val; + $$.ids = NULL; + } else { + $$ = union_expr($1, $3); } - $$ = $1 / $3; } | expr '%' expr { - if ((long)$3 == 0) { - pr_debug("division by zero\n"); - YYABORT; + if (!compute_ids) { + if (fpclassify($3.val) == FP_ZERO) { + pr_debug("division by zero\n"); + YYABORT; + } + $$.val = (long)$1.val % (long)$3.val; + $$.ids = NULL; + } else { + $$ = union_expr($1, $3); } - $$ = (long)$1 % (long)$3; } | D_RATIO '(' expr ',' expr ')' { - if ($5 == 0) { - $$ = 0; + if (!compute_ids) { + $$.ids = NULL; + if (fpclassify($5.val) == FP_ZERO) { + $$.val = 0.0; + } else { + $$.val = $3.val / $5.val; + } } else { - $$ = $3 / $5; + $$ = union_expr($3, $5); } } | '-' expr %prec NEG { - $$ = -$2; + $$.val = -$2.val; + $$.ids = $2.ids; } | '(' if_expr ')' { @@ -134,15 +211,26 @@ expr: NUMBER } | MIN '(' expr ',' expr ')' { - $$ = $3 < $5 ? $3 : $5; + if (!compute_ids) { + $$.val = $3.val < $5.val ? $3.val : $5.val; + $$.ids = NULL; + } else { + $$ = union_expr($3, $5); + } } | MAX '(' expr ',' expr ')' { - $$ = $3 > $5 ? $3 : $5; + if (!compute_ids) { + $$.val = $3.val > $5.val ? $3.val : $5.val; + $$.ids = NULL; + } else { + $$ = union_expr($3, $5); + } } | SMT_ON { - $$ = smt_on() > 0 ? 1.0 : 0.0; + $$.val = smt_on() > 0 ? 1.0 : 0.0; + $$.ids = NULL; } ; -- cgit v1.2.3 From 970f7afe55ee3e9d6d1c73cd138c5f023bb0beba Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Thu, 23 Sep 2021 00:46:14 -0700 Subject: perf expr: Propagate constants for binary operations When we're computing ID values, if we have constant values then compute the constant result. For example: 1 + 2 Previously .val would be set to BOTTOM by union_expr, meaning that all values are possible. With this change .val is set to 3. Later changes will use the constant values to hopefully eliminate ID values that don't need to be computed. Signed-off-by: Ian Rogers Tested-by: John Garry Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: Ingo Molnar Cc: Jin Yao Cc: Kajol Jain Cc: Mark Rutland Cc: Namhyung Kim Cc: Paul Clarke Cc: Peter Zijlstra Cc: Sandeep Dasgupta Cc: Stephane Eranian Link: https://lore.kernel.org/r/20210923074616.674826-12-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/expr.y | 63 +++++++++++++++++++++++++++++++++++--------------- 1 file changed, 45 insertions(+), 18 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/expr.y b/tools/perf/util/expr.y index 6aeead54760a..5a295e385914 100644 --- a/tools/perf/util/expr.y +++ b/tools/perf/util/expr.y @@ -68,6 +68,12 @@ static void expr_error(double *final_val __maybe_unused, */ #define BOTTOM NAN +/* During computing ids, does val represent a constant (non-BOTTOM) value? */ +static bool is_const(double val) +{ + return isfinite(val); +} + static struct ids union_expr(struct ids ids1, struct ids ids2) { struct ids result = { @@ -77,8 +83,15 @@ static struct ids union_expr(struct ids ids1, struct ids ids2) return result; } +/* + * If we're not computing ids or $1 and $3 are constants, compute the new + * constant value using OP. Its invariant that there are no ids. If computing + * ids for non-constants union the set of IDs that must be computed. + */ #define BINARY_LONG_OP(RESULT, OP, LHS, RHS) \ - if (!compute_ids) { \ + if (!compute_ids || (is_const(LHS.val) && is_const(RHS.val))) { \ + assert(LHS.ids == NULL); \ + assert(RHS.ids == NULL); \ RESULT.val = (long)LHS.val OP (long)RHS.val; \ RESULT.ids = NULL; \ } else { \ @@ -86,7 +99,9 @@ static struct ids union_expr(struct ids ids1, struct ids ids2) } #define BINARY_OP(RESULT, OP, LHS, RHS) \ - if (!compute_ids) { \ + if (!compute_ids || (is_const(LHS.val) && is_const(RHS.val))) { \ + assert(LHS.ids == NULL); \ + assert(RHS.ids == NULL); \ RESULT.val = LHS.val OP RHS.val; \ RESULT.ids = NULL; \ } else { \ @@ -163,40 +178,52 @@ expr: NUMBER | expr '*' expr { BINARY_OP($$, *, $1, $3); } | expr '/' expr { - if (!compute_ids) { - if (fpclassify($3.val) == FP_ZERO) { - pr_debug("division by zero\n"); - YYABORT; - } + if (fpclassify($3.val) == FP_ZERO) { + pr_debug("division by zero\n"); + YYABORT; + } else if (!compute_ids || (is_const($1.val) && is_const($3.val))) { + assert($1.ids == NULL); + assert($3.ids == NULL); $$.val = $1.val / $3.val; $$.ids = NULL; } else { + /* LHS and/or RHS need computing from event IDs so union. */ $$ = union_expr($1, $3); } } | expr '%' expr { - if (!compute_ids) { - if (fpclassify($3.val) == FP_ZERO) { - pr_debug("division by zero\n"); - YYABORT; - } + if (fpclassify($3.val) == FP_ZERO) { + pr_debug("division by zero\n"); + YYABORT; + } else if (!compute_ids || (is_const($1.val) && is_const($3.val))) { + assert($1.ids == NULL); + assert($3.ids == NULL); $$.val = (long)$1.val % (long)$3.val; $$.ids = NULL; } else { + /* LHS and/or RHS need computing from event IDs so union. */ $$ = union_expr($1, $3); } } | D_RATIO '(' expr ',' expr ')' { - if (!compute_ids) { + if (fpclassify($5.val) == FP_ZERO) { + /* + * Division by constant zero always yields zero and no events + * are necessary. + */ + assert($5.ids == NULL); + $$.val = 0.0; + $$.ids = NULL; + ids__free($3.ids); + } else if (!compute_ids || (is_const($3.val) && is_const($5.val))) { + assert($3.ids == NULL); + assert($5.ids == NULL); + $$.val = $3.val / $5.val; $$.ids = NULL; - if (fpclassify($5.val) == FP_ZERO) { - $$.val = 0.0; - } else { - $$.val = $3.val / $5.val; - } } else { + /* LHS and/or RHS need computing from event IDs so union. */ $$ = union_expr($3, $5); } } -- cgit v1.2.3 From a8e4e880834b5dc53ff6b4cfc9f4268e61399976 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Thu, 23 Sep 2021 00:46:15 -0700 Subject: perf metric: Don't compute unused events For a metric like: EVENT1 if #smt_on else EVENT2 currently EVENT1 and EVENT2 will be measured and then when the metric is reported EVENT1 or EVENT2 will be printed depending on the value from smt_on() during the expr parsing. Computing both events is unnecessary and can lead to multiplexing as discussed in this thread: https://lore.kernel.org/lkml/20201110100346.2527031-1-irogers@google.com/ If the input is constant to certain operators like: IDS1 if CONST else IDS2 then the result will be either IDS1 or IDS2 depending on CONST (which may be evaluated from an entire expression), and so IDS1 or IDS2 may be discarded avoiding events from being programmed. Signed-off-by: Ian Rogers Tested-by: John Garry Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: Ingo Molnar Cc: Jin Yao Cc: Kajol Jain Cc: Mark Rutland Cc: Namhyung Kim Cc: Paul Clarke Cc: Peter Zijlstra Cc: Sandeep Dasgupta Cc: Stephane Eranian Link: https://lore.kernel.org/r/20210923074616.674826-13-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/expr.c | 11 +++++++++++ tools/perf/util/expr.y | 30 +++++++++++++++++++++++------- 2 files changed, 34 insertions(+), 7 deletions(-) (limited to 'tools') diff --git a/tools/perf/tests/expr.c b/tools/perf/tests/expr.c index 1c881bea7fca..287989321d2a 100644 --- a/tools/perf/tests/expr.c +++ b/tools/perf/tests/expr.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 #include "util/debug.h" #include "util/expr.h" +#include "util/smt.h" #include "tests.h" #include #include @@ -132,6 +133,16 @@ int test__expr(struct test *t __maybe_unused, int subtest __maybe_unused) TEST_ASSERT_VAL("find ids", hashmap__find(ctx->ids, "EVENT2,param=3/", (void **)&val_ptr)); + /* Only EVENT1 or EVENT2 need be measured depending on the value of smt_on. */ + expr__ctx_clear(ctx); + TEST_ASSERT_VAL("find ids", + expr__find_ids("EVENT1 if #smt_on else EVENT2", + NULL, ctx, 0) == 0); + TEST_ASSERT_VAL("find ids", hashmap__size(ctx->ids) == 1); + TEST_ASSERT_VAL("find ids", hashmap__find(ctx->ids, + smt_on() ? "EVENT1" : "EVENT2", + (void **)&val_ptr)); + expr__ctx_free(ctx); return 0; diff --git a/tools/perf/util/expr.y b/tools/perf/util/expr.y index 5a295e385914..5b878f044f22 100644 --- a/tools/perf/util/expr.y +++ b/tools/perf/util/expr.y @@ -123,14 +123,30 @@ start: if_expr if_expr: expr IF expr ELSE expr { - if (!compute_ids) { - $$.ids = NULL; - if (fpclassify($3.val) == FP_ZERO) { - $$.val = $5.val; - } else { - $$.val = $1.val; - } + if (fpclassify($3.val) == FP_ZERO) { + /* + * The IF expression evaluated to 0 so treat as false, take the + * ELSE and discard everything else. + */ + $$.val = $5.val; + $$.ids = $5.ids; + ids__free($1.ids); + ids__free($3.ids); + } else if (!compute_ids || is_const($3.val)) { + /* + * If ids aren't computed then treat the expression as true. If + * ids are being computed and the IF expr is a non-zero + * constant, then also evaluate the true case. + */ + $$.val = $1.val; + $$.ids = $1.ids; + ids__free($3.ids); + ids__free($5.ids); } else { + /* + * Value is either the LHS or RHS and we need the IF expression + * to compute it. + */ $$ = union_expr($1, union_expr($3, $5)); } } -- cgit v1.2.3 From 94886961e324d5b6bae8e206b227c6eeb0f22c2c Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Thu, 23 Sep 2021 00:46:16 -0700 Subject: perf metric: Avoid events for an 'if' constant result For a metric like: CONST if expr else CONST if the values of CONST are identical then expr doesn't need evaluating, and events, in order to compute a result. Signed-off-by: Ian Rogers Tested-by: John Garry Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: Ingo Molnar Cc: Jin Yao Cc: Kajol Jain Cc: Mark Rutland Cc: Namhyung Kim Cc: Paul Clarke Cc: Peter Zijlstra Cc: Sandeep Dasgupta Cc: Stephane Eranian Link: https://lore.kernel.org/r/20210923074616.674826-14-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/expr.c | 7 +++++++ tools/perf/util/expr.y | 10 ++++++++++ 2 files changed, 17 insertions(+) (limited to 'tools') diff --git a/tools/perf/tests/expr.c b/tools/perf/tests/expr.c index 287989321d2a..f1d8411fce12 100644 --- a/tools/perf/tests/expr.c +++ b/tools/perf/tests/expr.c @@ -143,6 +143,13 @@ int test__expr(struct test *t __maybe_unused, int subtest __maybe_unused) smt_on() ? "EVENT1" : "EVENT2", (void **)&val_ptr)); + /* The expression is a constant 1.0 without needing to evaluate EVENT1. */ + expr__ctx_clear(ctx); + TEST_ASSERT_VAL("find ids", + expr__find_ids("1.0 if EVENT1 > 100.0 else 1.0", + NULL, ctx, 0) == 0); + TEST_ASSERT_VAL("find ids", hashmap__size(ctx->ids) == 0); + expr__ctx_free(ctx); return 0; diff --git a/tools/perf/util/expr.y b/tools/perf/util/expr.y index 5b878f044f22..ba7d3b667fcb 100644 --- a/tools/perf/util/expr.y +++ b/tools/perf/util/expr.y @@ -142,6 +142,16 @@ if_expr: expr IF expr ELSE expr $$.ids = $1.ids; ids__free($3.ids); ids__free($5.ids); + } else if ($1.val == $5.val) { + /* + * LHS == RHS, so both are an identical constant. No need to + * evaluate any events. + */ + $$.val = $1.val; + $$.ids = NULL; + ids__free($1.ids); + ids__free($3.ids); + ids__free($5.ids); } else { /* * Value is either the LHS or RHS and we need the IF expression -- cgit v1.2.3 From 2b775152bbe838c9de0055eb5bdb530c2c88235b Mon Sep 17 00:00:00 2001 From: Michael Petlan Date: Wed, 22 Sep 2021 17:27:06 +0200 Subject: perf tests vmlinux-kallsyms: Ignore hidden symbols Certain kernel symbols are purposely hidden from kallsyms. The function is_ignored_symbol() from scripts/kallsyms.c decides if a symbol should be hidden or not. The perf test "vmlinux symtab matches kallsyms" fails in case perf finds some of the hidden symbols in its machine image and can't match them to kallsyms. Let's add a filter to check if a symbol not found isn't one of these before failing the test. The function is_ignored_symbol() has been copied from scripts/kallsyms.c and needs to be updated along with the original. Signed-off-by: Michael Petlan Acked-by: Ian Rogers Cc: Jiri Olsa LPU-Reference: 20210922152706.23655-1-mpetlan@redhat.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/vmlinux-kallsyms.c | 102 ++++++++++++++++++++++++++++++++++++ 1 file changed, 102 insertions(+) (limited to 'tools') diff --git a/tools/perf/tests/vmlinux-kallsyms.c b/tools/perf/tests/vmlinux-kallsyms.c index 193b7c91b4e2..4f884aabc7f4 100644 --- a/tools/perf/tests/vmlinux-kallsyms.c +++ b/tools/perf/tests/vmlinux-kallsyms.c @@ -3,6 +3,7 @@ #include #include #include +#include #include #include "dso.h" #include "map.h" @@ -14,6 +15,102 @@ #define UM(x) kallsyms_map->unmap_ip(kallsyms_map, (x)) +static bool is_ignored_symbol(const char *name, char type) +{ + /* Symbol names that exactly match to the following are ignored.*/ + static const char * const ignored_symbols[] = { + /* + * Symbols which vary between passes. Passes 1 and 2 must have + * identical symbol lists. The kallsyms_* symbols below are + * only added after pass 1, they would be included in pass 2 + * when --all-symbols is specified so exclude them to get a + * stable symbol list. + */ + "kallsyms_addresses", + "kallsyms_offsets", + "kallsyms_relative_base", + "kallsyms_num_syms", + "kallsyms_names", + "kallsyms_markers", + "kallsyms_token_table", + "kallsyms_token_index", + /* Exclude linker generated symbols which vary between passes */ + "_SDA_BASE_", /* ppc */ + "_SDA2_BASE_", /* ppc */ + NULL + }; + + /* Symbol names that begin with the following are ignored.*/ + static const char * const ignored_prefixes[] = { + "$", /* local symbols for ARM, MIPS, etc. */ + ".LASANPC", /* s390 kasan local symbols */ + "__crc_", /* modversions */ + "__efistub_", /* arm64 EFI stub namespace */ + "__kvm_nvhe_", /* arm64 non-VHE KVM namespace */ + "__AArch64ADRPThunk_", /* arm64 lld */ + "__ARMV5PILongThunk_", /* arm lld */ + "__ARMV7PILongThunk_", + "__ThumbV7PILongThunk_", + "__LA25Thunk_", /* mips lld */ + "__microLA25Thunk_", + NULL + }; + + /* Symbol names that end with the following are ignored.*/ + static const char * const ignored_suffixes[] = { + "_from_arm", /* arm */ + "_from_thumb", /* arm */ + "_veneer", /* arm */ + NULL + }; + + /* Symbol names that contain the following are ignored.*/ + static const char * const ignored_matches[] = { + ".long_branch.", /* ppc stub */ + ".plt_branch.", /* ppc stub */ + NULL + }; + + const char * const *p; + + for (p = ignored_symbols; *p; p++) + if (!strcmp(name, *p)) + return true; + + for (p = ignored_prefixes; *p; p++) + if (!strncmp(name, *p, strlen(*p))) + return true; + + for (p = ignored_suffixes; *p; p++) { + int l = strlen(name) - strlen(*p); + + if (l >= 0 && !strcmp(name + l, *p)) + return true; + } + + for (p = ignored_matches; *p; p++) { + if (strstr(name, *p)) + return true; + } + + if (type == 'U' || type == 'u') + return true; + /* exclude debugging symbols */ + if (type == 'N' || type == 'n') + return true; + + if (toupper(type) == 'A') { + /* Keep these useful absolute symbols */ + if (strcmp(name, "__kernel_syscall_via_break") && + strcmp(name, "__kernel_syscall_via_epc") && + strcmp(name, "__kernel_sigtramp") && + strcmp(name, "__gp")) + return true; + } + + return false; +} + int test__vmlinux_matches_kallsyms(struct test *test __maybe_unused, int subtest __maybe_unused) { int err = -1; @@ -169,6 +266,11 @@ next_pair: * such as __indirect_thunk_end. */ continue; + } else if (is_ignored_symbol(sym->name, sym->type)) { + /* + * Ignore hidden symbols, see scripts/kallsyms.c for the details + */ + continue; } else { pr_debug("ERR : %#" PRIx64 ": %s not on kallsyms\n", mem_start, sym->name); -- cgit v1.2.3 From be8ecc57f180415e8a7c1cc5620c5236be2a7e56 Mon Sep 17 00:00:00 2001 From: Tony Garnock-Jones Date: Thu, 16 Sep 2021 14:09:39 +0200 Subject: perf srcline: Use long-running addr2line per DSO Invoking addr2line in a separate subprocess, one for each required lookup, takes a terribly long time. This patch introduces a long-running addr2line process for each DSO, *DRAMATICALLY* speeding up runs of perf. What used to take tens of minutes now takes tens of seconds. Debian bug report about this issue: https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=911815 Signed-off-by: Tony Garnock-Jones Tested-by: Ian Rogers Cc: Ingo Molnar Cc: Peter Zijlstra Link: https://lore.kernel.org/r/20210916120939.453536-1-tonyg@leastfixedpoint.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/srcline.c | 338 ++++++++++++++++++++++++++++++++++------------ 1 file changed, 250 insertions(+), 88 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/srcline.c b/tools/perf/util/srcline.c index 5b7d6c16d33f..af468e3bb6fa 100644 --- a/tools/perf/util/srcline.c +++ b/tools/perf/util/srcline.c @@ -1,8 +1,10 @@ // SPDX-License-Identifier: GPL-2.0 #include +#include #include #include #include +#include #include #include @@ -15,6 +17,7 @@ #include "srcline.h" #include "string2.h" #include "symbol.h" +#include "subcmd/run-command.h" bool srcline_full_filename; @@ -119,6 +122,8 @@ static struct symbol *new_inline_sym(struct dso *dso, return inline_sym; } +#define MAX_INLINE_NEST 1024 + #ifdef HAVE_LIBBFD_SUPPORT /* @@ -273,8 +278,6 @@ static void addr2line_cleanup(struct a2l_data *a2l) free(a2l); } -#define MAX_INLINE_NEST 1024 - static int inline_list__append_dso_a2l(struct dso *dso, struct inline_node *node, struct symbol *sym) @@ -361,26 +364,14 @@ void dso__free_a2l(struct dso *dso) dso->a2l = NULL; } -static struct inline_node *addr2inlines(const char *dso_name, u64 addr, - struct dso *dso, struct symbol *sym) -{ - struct inline_node *node; - - node = zalloc(sizeof(*node)); - if (node == NULL) { - perror("not enough memory for the inline node"); - return NULL; - } - - INIT_LIST_HEAD(&node->val); - node->addr = addr; - - addr2line(dso_name, addr, NULL, NULL, dso, true, node, sym); - return node; -} - #else /* HAVE_LIBBFD_SUPPORT */ +struct a2l_subprocess { + struct child_process addr2line; + FILE *to_child; + FILE *from_child; +}; + static int filename_split(char *filename, unsigned int *line_nr) { char *sep; @@ -402,114 +393,285 @@ static int filename_split(char *filename, unsigned int *line_nr) return 0; } -static int addr2line(const char *dso_name, u64 addr, - char **file, unsigned int *line_nr, - struct dso *dso __maybe_unused, - bool unwind_inlines __maybe_unused, - struct inline_node *node __maybe_unused, - struct symbol *sym __maybe_unused) +static void addr2line_subprocess_cleanup(struct a2l_subprocess *a2l) { - FILE *fp; - char cmd[PATH_MAX]; - char *filename = NULL; - size_t len; - int ret = 0; + if (a2l->addr2line.pid != -1) { + kill(a2l->addr2line.pid, SIGKILL); + finish_command(&a2l->addr2line); /* ignore result, we don't care */ + a2l->addr2line.pid = -1; + } - scnprintf(cmd, sizeof(cmd), "addr2line -e %s %016"PRIx64, - dso_name, addr); + if (a2l->to_child != NULL) { + fclose(a2l->to_child); + a2l->to_child = NULL; + } - fp = popen(cmd, "r"); - if (fp == NULL) { - pr_warning("popen failed for %s\n", dso_name); - return 0; + if (a2l->from_child != NULL) { + fclose(a2l->from_child); + a2l->from_child = NULL; + } + + free(a2l); +} + +static struct a2l_subprocess *addr2line_subprocess_init(const char *path) +{ + const char *argv[] = { "addr2line", "-e", path, "-i", "-f", NULL }; + struct a2l_subprocess *a2l = zalloc(sizeof(*a2l)); + int start_command_status = 0; + + if (a2l == NULL) + goto out; + + a2l->to_child = NULL; + a2l->from_child = NULL; + + a2l->addr2line.pid = -1; + a2l->addr2line.in = -1; + a2l->addr2line.out = -1; + a2l->addr2line.no_stderr = 1; + + a2l->addr2line.argv = argv; + start_command_status = start_command(&a2l->addr2line); + a2l->addr2line.argv = NULL; /* it's not used after start_command; avoid dangling pointers */ + + if (start_command_status != 0) { + pr_warning("could not start addr2line for %s: start_command return code %d\n", + path, + start_command_status); + goto out; } - if (getline(&filename, &len, fp) < 0 || !len) { - pr_warning("addr2line has no output for %s\n", dso_name); + a2l->to_child = fdopen(a2l->addr2line.in, "w"); + if (a2l->to_child == NULL) { + pr_warning("could not open write-stream to addr2line of %s\n", path); goto out; } - ret = filename_split(filename, line_nr); - if (ret != 1) { - free(filename); + a2l->from_child = fdopen(a2l->addr2line.out, "r"); + if (a2l->from_child == NULL) { + pr_warning("could not open read-stream from addr2line of %s\n", path); goto out; } - *file = filename; + return a2l; out: - pclose(fp); - return ret; + if (a2l) + addr2line_subprocess_cleanup(a2l); + + return NULL; } -void dso__free_a2l(struct dso *dso __maybe_unused) +static int read_addr2line_record(struct a2l_subprocess *a2l, + char **function, + char **filename, + unsigned int *line_nr) { + /* + * Returns: + * -1 ==> error + * 0 ==> sentinel (or other ill-formed) record read + * 1 ==> a genuine record read + */ + char *line = NULL; + size_t line_len = 0; + unsigned int dummy_line_nr = 0; + int ret = -1; + + if (function != NULL) + zfree(function); + + if (filename != NULL) + zfree(filename); + + if (line_nr != NULL) + *line_nr = 0; + + if (getline(&line, &line_len, a2l->from_child) < 0 || !line_len) + goto error; + + if (function != NULL) + *function = strdup(strim(line)); + + zfree(&line); + line_len = 0; + + if (getline(&line, &line_len, a2l->from_child) < 0 || !line_len) + goto error; + + if (filename_split(line, line_nr == NULL ? &dummy_line_nr : line_nr) == 0) { + ret = 0; + goto error; + } + + if (filename != NULL) + *filename = strdup(line); + + zfree(&line); + line_len = 0; + + return 1; + +error: + free(line); + if (function != NULL) + zfree(function); + if (filename != NULL) + zfree(filename); + return ret; } -static struct inline_node *addr2inlines(const char *dso_name, u64 addr, - struct dso *dso __maybe_unused, - struct symbol *sym) +static int inline_list__append_record(struct dso *dso, + struct inline_node *node, + struct symbol *sym, + const char *function, + const char *filename, + unsigned int line_nr) { - FILE *fp; - char cmd[PATH_MAX]; - struct inline_node *node; - char *filename = NULL; - char *funcname = NULL; - size_t filelen, funclen; - unsigned int line_nr = 0; + struct symbol *inline_sym = new_inline_sym(dso, sym, function); - scnprintf(cmd, sizeof(cmd), "addr2line -e %s -i -f %016"PRIx64, - dso_name, addr); + return inline_list__append(inline_sym, srcline_from_fileline(filename, line_nr), node); +} - fp = popen(cmd, "r"); - if (fp == NULL) { - pr_err("popen failed for %s\n", dso_name); - return NULL; +static int addr2line(const char *dso_name, u64 addr, + char **file, unsigned int *line_nr, + struct dso *dso, + bool unwind_inlines, + struct inline_node *node, + struct symbol *sym __maybe_unused) +{ + struct a2l_subprocess *a2l = dso->a2l; + char *record_function = NULL; + char *record_filename = NULL; + unsigned int record_line_nr = 0; + int record_status = -1; + int ret = 0; + size_t inline_count = 0; + + if (!a2l) { + dso->a2l = addr2line_subprocess_init(dso_name); + a2l = dso->a2l; } - node = zalloc(sizeof(*node)); - if (node == NULL) { - perror("not enough memory for the inline node"); + if (a2l == NULL) { + if (!symbol_conf.disable_add2line_warn) + pr_warning("%s %s: addr2line_subprocess_init failed\n", __func__, dso_name); goto out; } - INIT_LIST_HEAD(&node->val); - node->addr = addr; - - /* addr2line -f generates two lines for each inlined functions */ - while (getline(&funcname, &funclen, fp) != -1) { - char *srcline; - struct symbol *inline_sym; + /* + * Send our request and then *deliberately* send something that can't be interpreted as + * a valid address to ask addr2line about (namely, ","). This causes addr2line to first + * write out the answer to our request, in an unbounded/unknown number of records, and + * then to write out the lines "??" and "??:0", so that we can detect when it has + * finished giving us anything useful. We have to be careful about the first record, + * though, because it may be genuinely unknown, in which case we'll get two sets of + * "??"/"??:0" lines. + */ + if (fprintf(a2l->to_child, "%016"PRIx64"\n,\n", addr) < 0 || fflush(a2l->to_child) != 0) { + pr_warning("%s %s: could not send request\n", __func__, dso_name); + goto out; + } - strim(funcname); + switch (read_addr2line_record(a2l, &record_function, &record_filename, &record_line_nr)) { + case -1: + pr_warning("%s %s: could not read first record\n", __func__, dso_name); + goto out; + case 0: + /* + * The first record was invalid, so return failure, but first read another + * record, since we asked a junk question and have to clear the answer out. + */ + switch (read_addr2line_record(a2l, NULL, NULL, NULL)) { + case -1: + pr_warning("%s %s: could not read delimiter record\n", __func__, dso_name); + break; + case 0: + /* As expected. */ + break; + default: + pr_warning("%s %s: unexpected record instead of sentinel", + __func__, dso_name); + break; + } + goto out; + default: + break; + } - if (getline(&filename, &filelen, fp) == -1) - goto out; + if (file) { + *file = strdup(record_filename); + ret = 1; + } + if (line_nr) + *line_nr = record_line_nr; - if (filename_split(filename, &line_nr) != 1) + if (unwind_inlines) { + if (node && inline_list__append_record(dso, node, sym, + record_function, + record_filename, + record_line_nr)) { + ret = 0; goto out; + } + } - srcline = srcline_from_fileline(filename, line_nr); - inline_sym = new_inline_sym(dso, sym, funcname); - - if (inline_list__append(inline_sym, srcline, node) != 0) { - free(srcline); - if (inline_sym && inline_sym->inlined) - symbol__delete(inline_sym); - goto out; + /* We have to read the records even if we don't care about the inline info. */ + while ((record_status = read_addr2line_record(a2l, + &record_function, + &record_filename, + &record_line_nr)) == 1) { + if (unwind_inlines && node && inline_count++ < MAX_INLINE_NEST) { + if (inline_list__append_record(dso, node, sym, + record_function, + record_filename, + record_line_nr)) { + ret = 0; + goto out; + } + ret = 1; /* found at least one inline frame */ } } out: - pclose(fp); - free(filename); - free(funcname); + free(record_function); + free(record_filename); + return ret; +} - return node; +void dso__free_a2l(struct dso *dso) +{ + struct a2l_subprocess *a2l = dso->a2l; + + if (!a2l) + return; + + addr2line_subprocess_cleanup(a2l); + + dso->a2l = NULL; } #endif /* HAVE_LIBBFD_SUPPORT */ +static struct inline_node *addr2inlines(const char *dso_name, u64 addr, + struct dso *dso, struct symbol *sym) +{ + struct inline_node *node; + + node = zalloc(sizeof(*node)); + if (node == NULL) { + perror("not enough memory for the inline node"); + return NULL; + } + + INIT_LIST_HEAD(&node->val); + node->addr = addr; + + addr2line(dso_name, addr, NULL, NULL, dso, true, node, sym); + return node; +} + /* * Number of addr2line failures (without success) before disabling it for that * dso. -- cgit v1.2.3 From 73e40c9bd44ce91fd0fcee6f0a3480dcd97963b6 Mon Sep 17 00:00:00 2001 From: Riccardo Mancini Date: Sat, 21 Aug 2021 11:19:07 +0200 Subject: libperf cpumap: Use binary search in perf_cpu_map__idx() as array are sorted Since 7074674e7338863e ("perf cpumap: Maintain cpumaps ordered and without dups") perf_cpu_map elements are sorted in ascending order. This patch improves perf_cpu_map__idx() by using a binary search. Signed-off-by: Riccardo Mancini Cc: Andi Kleen Cc: Ian Rogers Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/f1543c15797169c21e8b205a4a6751159180580d.1629490974.git.rickyman7@gmail.com [ Removed 'else' after if + return, declared variables where needed ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/lib/perf/cpumap.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) (limited to 'tools') diff --git a/tools/lib/perf/cpumap.c b/tools/lib/perf/cpumap.c index 6d8e521c59e1..adaad3dddf6e 100644 --- a/tools/lib/perf/cpumap.c +++ b/tools/lib/perf/cpumap.c @@ -270,11 +270,19 @@ bool perf_cpu_map__empty(const struct perf_cpu_map *map) int perf_cpu_map__idx(struct perf_cpu_map *cpus, int cpu) { - int i; + int low = 0, high = cpus->nr; - for (i = 0; i < cpus->nr; ++i) { - if (cpus->map[i] == cpu) - return i; + while (low < high) { + int idx = (low + high) / 2, + cpu_at_idx = cpus->map[idx]; + + if (cpu_at_idx == cpu) + return idx; + + if (cpu_at_idx > cpu) + high = idx; + else + low = idx + 1; } return -1; -- cgit v1.2.3 From 6bd006c6eb7fdadbe36de84377af4447da4ffabb Mon Sep 17 00:00:00 2001 From: Riccardo Mancini Date: Sat, 21 Aug 2021 11:19:10 +0200 Subject: perf mmap: Introduce mmap_cpu_mask__duplicate() This patch adds a new function in util/mmap.c to duplicate a mmap_cpu_mask. This new function will be used in patches in the workqueue patchkit. Signed-off-by: Riccardo Mancini Cc: Ian Rogers Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/8943a548ef7a3dd3e015095afad7e9a8b2154c05.1629490974.git.rickyman7@gmail.com [ bitmap_alloc() was renamed to bitmap_zalloc() ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/mmap.c | 11 +++++++++++ tools/perf/util/mmap.h | 3 +++ 2 files changed, 14 insertions(+) (limited to 'tools') diff --git a/tools/perf/util/mmap.c b/tools/perf/util/mmap.c index 512dc8b9c168..23ecdba9e670 100644 --- a/tools/perf/util/mmap.c +++ b/tools/perf/util/mmap.c @@ -350,3 +350,14 @@ int perf_mmap__push(struct mmap *md, void *to, out: return rc; } + +int mmap_cpu_mask__duplicate(struct mmap_cpu_mask *original, struct mmap_cpu_mask *clone) +{ + clone->nbits = original->nbits; + clone->bits = bitmap_zalloc(original->nbits); + if (!clone->bits) + return -ENOMEM; + + memcpy(clone->bits, original->bits, MMAP_CPU_MASK_BYTES(original)); + return 0; +} diff --git a/tools/perf/util/mmap.h b/tools/perf/util/mmap.h index af33118354dd..8e259b9610f8 100644 --- a/tools/perf/util/mmap.h +++ b/tools/perf/util/mmap.h @@ -64,4 +64,7 @@ size_t mmap__mmap_len(struct mmap *map); void mmap_cpu_mask__scnprintf(struct mmap_cpu_mask *mask, const char *tag); +int mmap_cpu_mask__duplicate(struct mmap_cpu_mask *original, + struct mmap_cpu_mask *clone); + #endif /*__PERF_MMAP_H */ -- cgit v1.2.3 From c2d4fab01f5e69adbe697c82f35cc31cf200bf93 Mon Sep 17 00:00:00 2001 From: Riccardo Mancini Date: Sat, 21 Aug 2021 11:19:42 +0200 Subject: perf test evlist-open-close: Use inline func to convert timeval to usec This patch introduces a new inline function to convert a timeval to usec. This function will be used also in the next patch. Signed-off-by: Riccardo Mancini Cc: Ian Rogers Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/b95035ec4a125355be8ea843f7275c4580da6398.1629490974.git.rickyman7@gmail.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/bench/evlist-open-close.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/perf/bench/evlist-open-close.c b/tools/perf/bench/evlist-open-close.c index 83e9897c64a1..75a53919126b 100644 --- a/tools/perf/bench/evlist-open-close.c +++ b/tools/perf/bench/evlist-open-close.c @@ -25,6 +25,11 @@ static int iterations = 100; static int nr_events = 1; static const char *event_string = "dummy"; +static inline u64 timeval2usec(struct timeval *tv) +{ + return tv->tv_sec * USEC_PER_SEC + tv->tv_usec; +} + static struct record_opts opts = { .sample_time = true, .mmap_pages = UINT_MAX, @@ -167,7 +172,7 @@ static int bench_evlist_open_close__run(char *evstr) gettimeofday(&end, NULL); timersub(&end, &start, &diff); - runtime_us = diff.tv_sec * USEC_PER_SEC + diff.tv_usec; + runtime_us = timeval2usec(&diff); update_stats(&time_stats, runtime_us); evlist__delete(evlist); -- cgit v1.2.3 From c6c00900c751c2fab9e34ee27f5e883fc1a20fc6 Mon Sep 17 00:00:00 2001 From: Guo Zhengkui Date: Wed, 6 Oct 2021 14:22:34 +0800 Subject: perf daemon: Remove duplicate sys/file.h include There is a "#include " in line 10, so remove a duplicate one in line 1124. Signed-off-by: Guo Zhengkui Cc: Alexander Shishkin Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/20211006062235.6364-1-guozhengkui@vivo.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-daemon.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'tools') diff --git a/tools/perf/builtin-daemon.c b/tools/perf/builtin-daemon.c index c13201fb09c3..6cb3f6cc36d0 100644 --- a/tools/perf/builtin-daemon.c +++ b/tools/perf/builtin-daemon.c @@ -1121,8 +1121,6 @@ static int setup_config(struct daemon *daemon) #ifndef F_TLOCK #define F_TLOCK 2 -#include - static int lockf(int fd, int cmd, off_t len) { if (cmd != F_TLOCK || len != 0) -- cgit v1.2.3 From 08f3e0873ac203449465c2b8473d684e2f9f41d1 Mon Sep 17 00:00:00 2001 From: James Clark Date: Thu, 7 Oct 2021 12:05:40 +0100 Subject: perf vendor-events: Fix all remaining invalid JSON files Remove trailing commas. A later commit will make the parser more strict and these will not be valid anymore. Reviewed-by: Andi Kleen Reviewed-by: John Garry Reviewed-by: Kajol Jain Signed-off-by: James Clark Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andrew.Kilroy@arm.com Cc: Leo Yan Cc: Mark Rutland Cc: Mathieu Poirier Cc: Namhyung Kim Cc: Nick.Forrington@arm.com Cc: Will Deacon Cc: linux-arm-kernel@lists.infradead.org Link: https://lore.kernel.org/r/20211007110543.564963-2-james.clark@arm.com Signed-off-by: Arnaldo Carvalho de Melo --- .../perf/pmu-events/arch/arm64/ampere/emag/bus.json | 2 +- .../pmu-events/arch/arm64/ampere/emag/cache.json | 20 ++++++++++---------- .../pmu-events/arch/arm64/ampere/emag/clock.json | 2 +- .../pmu-events/arch/arm64/ampere/emag/exception.json | 4 ++-- .../arch/arm64/ampere/emag/instruction.json | 10 +++++----- .../pmu-events/arch/arm64/ampere/emag/memory.json | 4 ++-- .../arch/arm64/hisilicon/hip08/metrics.json | 2 +- tools/perf/pmu-events/arch/nds32/n13/atcpmu.json | 2 +- tools/perf/pmu-events/arch/s390/cf_z10/basic.json | 2 +- tools/perf/pmu-events/arch/s390/cf_z10/crypto.json | 2 +- tools/perf/pmu-events/arch/s390/cf_z10/extended.json | 2 +- tools/perf/pmu-events/arch/s390/cf_z13/basic.json | 2 +- tools/perf/pmu-events/arch/s390/cf_z13/crypto.json | 2 +- tools/perf/pmu-events/arch/s390/cf_z13/extended.json | 2 +- tools/perf/pmu-events/arch/s390/cf_z14/basic.json | 2 +- tools/perf/pmu-events/arch/s390/cf_z14/crypto.json | 2 +- tools/perf/pmu-events/arch/s390/cf_z14/extended.json | 2 +- tools/perf/pmu-events/arch/s390/cf_z15/basic.json | 2 +- tools/perf/pmu-events/arch/s390/cf_z15/crypto.json | 2 +- tools/perf/pmu-events/arch/s390/cf_z15/crypto6.json | 2 +- tools/perf/pmu-events/arch/s390/cf_z15/extended.json | 2 +- tools/perf/pmu-events/arch/s390/cf_z196/basic.json | 2 +- tools/perf/pmu-events/arch/s390/cf_z196/crypto.json | 2 +- .../perf/pmu-events/arch/s390/cf_z196/extended.json | 2 +- tools/perf/pmu-events/arch/s390/cf_zec12/basic.json | 2 +- tools/perf/pmu-events/arch/s390/cf_zec12/crypto.json | 2 +- .../perf/pmu-events/arch/s390/cf_zec12/extended.json | 2 +- .../pmu-events/arch/test/test_soc/cpu/uncore.json | 2 +- .../pmu-events/arch/x86/icelakex/icx-metrics.json | 2 +- 29 files changed, 44 insertions(+), 44 deletions(-) (limited to 'tools') diff --git a/tools/perf/pmu-events/arch/arm64/ampere/emag/bus.json b/tools/perf/pmu-events/arch/arm64/ampere/emag/bus.json index 9bea1ba1c4d2..cf48d0dfc759 100644 --- a/tools/perf/pmu-events/arch/arm64/ampere/emag/bus.json +++ b/tools/perf/pmu-events/arch/arm64/ampere/emag/bus.json @@ -18,6 +18,6 @@ "ArchStdEvent": "BUS_ACCESS_PERIPH" }, { - "ArchStdEvent": "BUS_ACCESS", + "ArchStdEvent": "BUS_ACCESS" } ] diff --git a/tools/perf/pmu-events/arch/arm64/ampere/emag/cache.json b/tools/perf/pmu-events/arch/arm64/ampere/emag/cache.json index 1e25f2ae4ae0..4cc50b7da526 100644 --- a/tools/perf/pmu-events/arch/arm64/ampere/emag/cache.json +++ b/tools/perf/pmu-events/arch/arm64/ampere/emag/cache.json @@ -39,31 +39,31 @@ "ArchStdEvent": "L2D_CACHE_INVAL" }, { - "ArchStdEvent": "L1I_CACHE_REFILL", + "ArchStdEvent": "L1I_CACHE_REFILL" }, { - "ArchStdEvent": "L1I_TLB_REFILL", + "ArchStdEvent": "L1I_TLB_REFILL" }, { - "ArchStdEvent": "L1D_CACHE_REFILL", + "ArchStdEvent": "L1D_CACHE_REFILL" }, { - "ArchStdEvent": "L1D_CACHE", + "ArchStdEvent": "L1D_CACHE" }, { - "ArchStdEvent": "L1D_TLB_REFILL", + "ArchStdEvent": "L1D_TLB_REFILL" }, { - "ArchStdEvent": "L1I_CACHE", + "ArchStdEvent": "L1I_CACHE" }, { - "ArchStdEvent": "L2D_CACHE", + "ArchStdEvent": "L2D_CACHE" }, { - "ArchStdEvent": "L2D_CACHE_REFILL", + "ArchStdEvent": "L2D_CACHE_REFILL" }, { - "ArchStdEvent": "L2D_CACHE_WB", + "ArchStdEvent": "L2D_CACHE_WB" }, { "PublicDescription": "This event counts any load or store operation which accesses the data L1 TLB", @@ -72,7 +72,7 @@ }, { "PublicDescription": "This event counts any instruction fetch which accesses the instruction L1 TLB", - "ArchStdEvent": "L1I_TLB", + "ArchStdEvent": "L1I_TLB" }, { "PublicDescription": "Level 2 access to data TLB that caused a page table walk. This event counts on any data access which causes L2D_TLB_REFILL to count", diff --git a/tools/perf/pmu-events/arch/arm64/ampere/emag/clock.json b/tools/perf/pmu-events/arch/arm64/ampere/emag/clock.json index 9076ca2daf9e..927a6f629a03 100644 --- a/tools/perf/pmu-events/arch/arm64/ampere/emag/clock.json +++ b/tools/perf/pmu-events/arch/arm64/ampere/emag/clock.json @@ -1,7 +1,7 @@ [ { "PublicDescription": "The number of core clock cycles", - "ArchStdEvent": "CPU_CYCLES", + "ArchStdEvent": "CPU_CYCLES" }, { "PublicDescription": "FSU clocking gated off cycle", diff --git a/tools/perf/pmu-events/arch/arm64/ampere/emag/exception.json b/tools/perf/pmu-events/arch/arm64/ampere/emag/exception.json index 9761433ad329..ada052e19632 100644 --- a/tools/perf/pmu-events/arch/arm64/ampere/emag/exception.json +++ b/tools/perf/pmu-events/arch/arm64/ampere/emag/exception.json @@ -36,9 +36,9 @@ "ArchStdEvent": "EXC_TRAP_FIQ" }, { - "ArchStdEvent": "EXC_TAKEN", + "ArchStdEvent": "EXC_TAKEN" }, { - "ArchStdEvent": "EXC_RETURN", + "ArchStdEvent": "EXC_RETURN" } ] diff --git a/tools/perf/pmu-events/arch/arm64/ampere/emag/instruction.json b/tools/perf/pmu-events/arch/arm64/ampere/emag/instruction.json index 482aa3f19e58..62f6276e3016 100644 --- a/tools/perf/pmu-events/arch/arm64/ampere/emag/instruction.json +++ b/tools/perf/pmu-events/arch/arm64/ampere/emag/instruction.json @@ -44,25 +44,25 @@ "BriefDescription": "Software increment" }, { - "ArchStdEvent": "INST_RETIRED", + "ArchStdEvent": "INST_RETIRED" }, { "ArchStdEvent": "CID_WRITE_RETIRED", "BriefDescription": "Write to CONTEXTIDR" }, { - "ArchStdEvent": "INST_SPEC", + "ArchStdEvent": "INST_SPEC" }, { - "ArchStdEvent": "TTBR_WRITE_RETIRED", + "ArchStdEvent": "TTBR_WRITE_RETIRED" }, { "PublicDescription": "This event counts all branches, taken or not. This excludes exception entries, debug entries and CCFAIL branches", - "ArchStdEvent": "BR_RETIRED", + "ArchStdEvent": "BR_RETIRED" }, { "PublicDescription": "This event counts any branch counted by BR_RETIRED which is not correctly predicted and causes a pipeline flush", - "ArchStdEvent": "BR_MIS_PRED_RETIRED", + "ArchStdEvent": "BR_MIS_PRED_RETIRED" }, { "PublicDescription": "Operation speculatively executed, NOP", diff --git a/tools/perf/pmu-events/arch/arm64/ampere/emag/memory.json b/tools/perf/pmu-events/arch/arm64/ampere/emag/memory.json index 2e7555696caf..50157e8c2005 100644 --- a/tools/perf/pmu-events/arch/arm64/ampere/emag/memory.json +++ b/tools/perf/pmu-events/arch/arm64/ampere/emag/memory.json @@ -15,10 +15,10 @@ "ArchStdEvent": "UNALIGNED_LDST_SPEC" }, { - "ArchStdEvent": "MEM_ACCESS", + "ArchStdEvent": "MEM_ACCESS" }, { "PublicDescription": "This event counts any correctable or uncorrectable memory error (ECC or parity) in the protected core RAMs", - "ArchStdEvent": "MEMORY_ERROR", + "ArchStdEvent": "MEMORY_ERROR" } ] diff --git a/tools/perf/pmu-events/arch/arm64/hisilicon/hip08/metrics.json b/tools/perf/pmu-events/arch/arm64/hisilicon/hip08/metrics.json index dda8e59149d2..6970203cb247 100644 --- a/tools/perf/pmu-events/arch/arm64/hisilicon/hip08/metrics.json +++ b/tools/perf/pmu-events/arch/arm64/hisilicon/hip08/metrics.json @@ -229,5 +229,5 @@ "BriefDescription": "Store bound L3 topdown metric", "MetricGroup": "TopDownL3", "MetricName": "store_bound" - }, + } ] diff --git a/tools/perf/pmu-events/arch/nds32/n13/atcpmu.json b/tools/perf/pmu-events/arch/nds32/n13/atcpmu.json index 5347350c360c..3e7ac409d894 100644 --- a/tools/perf/pmu-events/arch/nds32/n13/atcpmu.json +++ b/tools/perf/pmu-events/arch/nds32/n13/atcpmu.json @@ -286,5 +286,5 @@ "EventCode": "0x21e", "EventName": "pop25_inst", "BriefDescription": "V3 POP25 instructions" - }, + } ] diff --git a/tools/perf/pmu-events/arch/s390/cf_z10/basic.json b/tools/perf/pmu-events/arch/s390/cf_z10/basic.json index 2dd8dafff2ef..783de7f1aeaa 100644 --- a/tools/perf/pmu-events/arch/s390/cf_z10/basic.json +++ b/tools/perf/pmu-events/arch/s390/cf_z10/basic.json @@ -82,5 +82,5 @@ "EventName": "PROBLEM_STATE_L1D_PENALTY_CYCLES", "BriefDescription": "Problem-State L1D Penalty Cycles", "PublicDescription": "Problem-State Level-1 D-Cache Penalty Cycle Count" - }, + } ] diff --git a/tools/perf/pmu-events/arch/s390/cf_z10/crypto.json b/tools/perf/pmu-events/arch/s390/cf_z10/crypto.json index db286f19e7b6..3f28007d3892 100644 --- a/tools/perf/pmu-events/arch/s390/cf_z10/crypto.json +++ b/tools/perf/pmu-events/arch/s390/cf_z10/crypto.json @@ -110,5 +110,5 @@ "EventName": "AES_BLOCKED_CYCLES", "BriefDescription": "AES Blocked Cycles", "PublicDescription": "Total number of CPU cycles blocked for the AES functions issued by the CPU because the DEA/AES coprocessor is busy performing a function issued by another CPU" - }, + } ] diff --git a/tools/perf/pmu-events/arch/s390/cf_z10/extended.json b/tools/perf/pmu-events/arch/s390/cf_z10/extended.json index b6b7f29ca831..86bd8ba9391d 100644 --- a/tools/perf/pmu-events/arch/s390/cf_z10/extended.json +++ b/tools/perf/pmu-events/arch/s390/cf_z10/extended.json @@ -124,5 +124,5 @@ "EventName": "L2C_STORES_SENT", "BriefDescription": "L2C Stores Sent", "PublicDescription": "Incremented by one for every store sent to Level-2 (L1.5) cache" - }, + } ] diff --git a/tools/perf/pmu-events/arch/s390/cf_z13/basic.json b/tools/perf/pmu-events/arch/s390/cf_z13/basic.json index 2dd8dafff2ef..783de7f1aeaa 100644 --- a/tools/perf/pmu-events/arch/s390/cf_z13/basic.json +++ b/tools/perf/pmu-events/arch/s390/cf_z13/basic.json @@ -82,5 +82,5 @@ "EventName": "PROBLEM_STATE_L1D_PENALTY_CYCLES", "BriefDescription": "Problem-State L1D Penalty Cycles", "PublicDescription": "Problem-State Level-1 D-Cache Penalty Cycle Count" - }, + } ] diff --git a/tools/perf/pmu-events/arch/s390/cf_z13/crypto.json b/tools/perf/pmu-events/arch/s390/cf_z13/crypto.json index db286f19e7b6..3f28007d3892 100644 --- a/tools/perf/pmu-events/arch/s390/cf_z13/crypto.json +++ b/tools/perf/pmu-events/arch/s390/cf_z13/crypto.json @@ -110,5 +110,5 @@ "EventName": "AES_BLOCKED_CYCLES", "BriefDescription": "AES Blocked Cycles", "PublicDescription": "Total number of CPU cycles blocked for the AES functions issued by the CPU because the DEA/AES coprocessor is busy performing a function issued by another CPU" - }, + } ] diff --git a/tools/perf/pmu-events/arch/s390/cf_z13/extended.json b/tools/perf/pmu-events/arch/s390/cf_z13/extended.json index 5da8296b667e..1a5e4f89c57e 100644 --- a/tools/perf/pmu-events/arch/s390/cf_z13/extended.json +++ b/tools/perf/pmu-events/arch/s390/cf_z13/extended.json @@ -390,5 +390,5 @@ "EventName": "MT_DIAG_CYCLES_TWO_THR_ACTIVE", "BriefDescription": "Cycle count with two threads active", "PublicDescription": "Cycle count with two threads active" - }, + } ] diff --git a/tools/perf/pmu-events/arch/s390/cf_z14/basic.json b/tools/perf/pmu-events/arch/s390/cf_z14/basic.json index 17fb5241928b..fc762e9f1d6e 100644 --- a/tools/perf/pmu-events/arch/s390/cf_z14/basic.json +++ b/tools/perf/pmu-events/arch/s390/cf_z14/basic.json @@ -54,5 +54,5 @@ "EventName": "PROBLEM_STATE_INSTRUCTIONS", "BriefDescription": "Problem-State Instructions", "PublicDescription": "Problem-State Instruction Count" - }, + } ] diff --git a/tools/perf/pmu-events/arch/s390/cf_z14/crypto.json b/tools/perf/pmu-events/arch/s390/cf_z14/crypto.json index db286f19e7b6..3f28007d3892 100644 --- a/tools/perf/pmu-events/arch/s390/cf_z14/crypto.json +++ b/tools/perf/pmu-events/arch/s390/cf_z14/crypto.json @@ -110,5 +110,5 @@ "EventName": "AES_BLOCKED_CYCLES", "BriefDescription": "AES Blocked Cycles", "PublicDescription": "Total number of CPU cycles blocked for the AES functions issued by the CPU because the DEA/AES coprocessor is busy performing a function issued by another CPU" - }, + } ] diff --git a/tools/perf/pmu-events/arch/s390/cf_z14/extended.json b/tools/perf/pmu-events/arch/s390/cf_z14/extended.json index 89e070727e1b..4942b20a1ea1 100644 --- a/tools/perf/pmu-events/arch/s390/cf_z14/extended.json +++ b/tools/perf/pmu-events/arch/s390/cf_z14/extended.json @@ -369,5 +369,5 @@ "EventName": "MT_DIAG_CYCLES_TWO_THR_ACTIVE", "BriefDescription": "Cycle count with two threads active", "PublicDescription": "Cycle count with two threads active" - }, + } ] diff --git a/tools/perf/pmu-events/arch/s390/cf_z15/basic.json b/tools/perf/pmu-events/arch/s390/cf_z15/basic.json index 17fb5241928b..fc762e9f1d6e 100644 --- a/tools/perf/pmu-events/arch/s390/cf_z15/basic.json +++ b/tools/perf/pmu-events/arch/s390/cf_z15/basic.json @@ -54,5 +54,5 @@ "EventName": "PROBLEM_STATE_INSTRUCTIONS", "BriefDescription": "Problem-State Instructions", "PublicDescription": "Problem-State Instruction Count" - }, + } ] diff --git a/tools/perf/pmu-events/arch/s390/cf_z15/crypto.json b/tools/perf/pmu-events/arch/s390/cf_z15/crypto.json index db286f19e7b6..3f28007d3892 100644 --- a/tools/perf/pmu-events/arch/s390/cf_z15/crypto.json +++ b/tools/perf/pmu-events/arch/s390/cf_z15/crypto.json @@ -110,5 +110,5 @@ "EventName": "AES_BLOCKED_CYCLES", "BriefDescription": "AES Blocked Cycles", "PublicDescription": "Total number of CPU cycles blocked for the AES functions issued by the CPU because the DEA/AES coprocessor is busy performing a function issued by another CPU" - }, + } ] diff --git a/tools/perf/pmu-events/arch/s390/cf_z15/crypto6.json b/tools/perf/pmu-events/arch/s390/cf_z15/crypto6.json index c998e4f1d1d2..ad79189050a0 100644 --- a/tools/perf/pmu-events/arch/s390/cf_z15/crypto6.json +++ b/tools/perf/pmu-events/arch/s390/cf_z15/crypto6.json @@ -26,5 +26,5 @@ "EventName": "ECC_BLOCKED_CYCLES_COUNT", "BriefDescription": "ECC Blocked Cycles Count", "PublicDescription": "This counter counts the total number of CPU cycles blocked for the elliptic-curve cryptography (ECC) functions issued by the CPU because the ECC coprocessor is busy performing a function issued by another CPU." - }, + } ] diff --git a/tools/perf/pmu-events/arch/s390/cf_z15/extended.json b/tools/perf/pmu-events/arch/s390/cf_z15/extended.json index 24c4ba2a9ae5..8ac61f8f286b 100644 --- a/tools/perf/pmu-events/arch/s390/cf_z15/extended.json +++ b/tools/perf/pmu-events/arch/s390/cf_z15/extended.json @@ -397,5 +397,5 @@ "EventName": "MT_DIAG_CYCLES_TWO_THR_ACTIVE", "BriefDescription": "Cycle count with two threads active", "PublicDescription": "Cycle count with two threads active" - }, + } ] diff --git a/tools/perf/pmu-events/arch/s390/cf_z196/basic.json b/tools/perf/pmu-events/arch/s390/cf_z196/basic.json index 2dd8dafff2ef..783de7f1aeaa 100644 --- a/tools/perf/pmu-events/arch/s390/cf_z196/basic.json +++ b/tools/perf/pmu-events/arch/s390/cf_z196/basic.json @@ -82,5 +82,5 @@ "EventName": "PROBLEM_STATE_L1D_PENALTY_CYCLES", "BriefDescription": "Problem-State L1D Penalty Cycles", "PublicDescription": "Problem-State Level-1 D-Cache Penalty Cycle Count" - }, + } ] diff --git a/tools/perf/pmu-events/arch/s390/cf_z196/crypto.json b/tools/perf/pmu-events/arch/s390/cf_z196/crypto.json index db286f19e7b6..3f28007d3892 100644 --- a/tools/perf/pmu-events/arch/s390/cf_z196/crypto.json +++ b/tools/perf/pmu-events/arch/s390/cf_z196/crypto.json @@ -110,5 +110,5 @@ "EventName": "AES_BLOCKED_CYCLES", "BriefDescription": "AES Blocked Cycles", "PublicDescription": "Total number of CPU cycles blocked for the AES functions issued by the CPU because the DEA/AES coprocessor is busy performing a function issued by another CPU" - }, + } ] diff --git a/tools/perf/pmu-events/arch/s390/cf_z196/extended.json b/tools/perf/pmu-events/arch/s390/cf_z196/extended.json index b7b42a870bb0..86b29fd181cf 100644 --- a/tools/perf/pmu-events/arch/s390/cf_z196/extended.json +++ b/tools/perf/pmu-events/arch/s390/cf_z196/extended.json @@ -166,5 +166,5 @@ "EventName": "L1I_OFFCHIP_L3_SOURCED_WRITES", "BriefDescription": "L1I Off-Chip L3 Sourced Writes", "PublicDescription": "A directory write to the Level-1 I-Cache directory where the returned cache line was sourced from an Off Chip/On Book Level-3 cache" - }, + } ] diff --git a/tools/perf/pmu-events/arch/s390/cf_zec12/basic.json b/tools/perf/pmu-events/arch/s390/cf_zec12/basic.json index 2dd8dafff2ef..783de7f1aeaa 100644 --- a/tools/perf/pmu-events/arch/s390/cf_zec12/basic.json +++ b/tools/perf/pmu-events/arch/s390/cf_zec12/basic.json @@ -82,5 +82,5 @@ "EventName": "PROBLEM_STATE_L1D_PENALTY_CYCLES", "BriefDescription": "Problem-State L1D Penalty Cycles", "PublicDescription": "Problem-State Level-1 D-Cache Penalty Cycle Count" - }, + } ] diff --git a/tools/perf/pmu-events/arch/s390/cf_zec12/crypto.json b/tools/perf/pmu-events/arch/s390/cf_zec12/crypto.json index db286f19e7b6..3f28007d3892 100644 --- a/tools/perf/pmu-events/arch/s390/cf_zec12/crypto.json +++ b/tools/perf/pmu-events/arch/s390/cf_zec12/crypto.json @@ -110,5 +110,5 @@ "EventName": "AES_BLOCKED_CYCLES", "BriefDescription": "AES Blocked Cycles", "PublicDescription": "Total number of CPU cycles blocked for the AES functions issued by the CPU because the DEA/AES coprocessor is busy performing a function issued by another CPU" - }, + } ] diff --git a/tools/perf/pmu-events/arch/s390/cf_zec12/extended.json b/tools/perf/pmu-events/arch/s390/cf_zec12/extended.json index 162251037219..f40cbed89418 100644 --- a/tools/perf/pmu-events/arch/s390/cf_zec12/extended.json +++ b/tools/perf/pmu-events/arch/s390/cf_zec12/extended.json @@ -243,5 +243,5 @@ "EventName": "TX_C_TABORT_SPECIAL", "BriefDescription": "Aborted transactions in constrained TX mode using special completion logic", "PublicDescription": "A transaction abort has occurred in a constrained transactional-execution mode and the CPU is using special logic to allow the transaction to complete" - }, + } ] diff --git a/tools/perf/pmu-events/arch/test/test_soc/cpu/uncore.json b/tools/perf/pmu-events/arch/test/test_soc/cpu/uncore.json index 788766f45dbc..73089c682f80 100644 --- a/tools/perf/pmu-events/arch/test/test_soc/cpu/uncore.json +++ b/tools/perf/pmu-events/arch/test/test_soc/cpu/uncore.json @@ -38,5 +38,5 @@ "BriefDescription": "Total cache hits", "PublicDescription": "Total cache hits", "Unit": "imc" - }, + } ] diff --git a/tools/perf/pmu-events/arch/x86/icelakex/icx-metrics.json b/tools/perf/pmu-events/arch/x86/icelakex/icx-metrics.json index 57ddbb9f9b31..14b9a8ab15b9 100644 --- a/tools/perf/pmu-events/arch/x86/icelakex/icx-metrics.json +++ b/tools/perf/pmu-events/arch/x86/icelakex/icx-metrics.json @@ -311,5 +311,5 @@ "MetricExpr": "(cstate_pkg@c6\\-residency@ / msr@tsc@) * 100", "MetricGroup": "Power", "MetricName": "C6_Pkg_Residency" - }, + } ] -- cgit v1.2.3 From 21813684e46df1c979f714b20b369b3474f5e933 Mon Sep 17 00:00:00 2001 From: James Clark Date: Thu, 7 Oct 2021 12:05:41 +0100 Subject: perf tools: Make the JSON parser more conformant when in strict mode Return an error when a trailing comma is found or a new item is encountered before a comma or an opening brace. This ensures that the perf JSON files conform more closely to the spec at https://www.json.org Reviewed-by: Andi Kleen Reviewed-by: Kajol Jain Signed-off-by: James Clark Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andrew.Kilroy@arm.com Cc: John Garry Cc: Leo Yan Cc: Mark Rutland Cc: Mathieu Poirier Cc: Namhyung Kim Cc: Nick.Forrington@arm.com Cc: Will Deacon Cc: linux-arm-kernel@lists.infradead.org Link: https://lore.kernel.org/r/20211007110543.564963-3-james.clark@arm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/pmu-events/jsmn.c | 42 ++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 40 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/perf/pmu-events/jsmn.c b/tools/perf/pmu-events/jsmn.c index 11d1fa18bfa5..8124d2d3ff0c 100644 --- a/tools/perf/pmu-events/jsmn.c +++ b/tools/perf/pmu-events/jsmn.c @@ -176,6 +176,14 @@ jsmnerr_t jsmn_parse(jsmn_parser *parser, const char *js, size_t len, jsmnerr_t r; int i; jsmntok_t *token; +#ifdef JSMN_STRICT + /* + * Keeps track of whether a new object/list/primitive is expected. New items are only + * allowed after an opening brace, comma or colon. A closing brace after a comma is not + * valid JSON. + */ + int expecting_item = 1; +#endif for (; parser->pos < len; parser->pos++) { char c; @@ -185,6 +193,10 @@ jsmnerr_t jsmn_parse(jsmn_parser *parser, const char *js, size_t len, switch (c) { case '{': case '[': +#ifdef JSMN_STRICT + if (!expecting_item) + return JSMN_ERROR_INVAL; +#endif token = jsmn_alloc_token(parser, tokens, num_tokens); if (token == NULL) return JSMN_ERROR_NOMEM; @@ -196,6 +208,10 @@ jsmnerr_t jsmn_parse(jsmn_parser *parser, const char *js, size_t len, break; case '}': case ']': +#ifdef JSMN_STRICT + if (expecting_item) + return JSMN_ERROR_INVAL; +#endif type = (c == '}' ? JSMN_OBJECT : JSMN_ARRAY); for (i = parser->toknext - 1; i >= 0; i--) { token = &tokens[i]; @@ -219,6 +235,11 @@ jsmnerr_t jsmn_parse(jsmn_parser *parser, const char *js, size_t len, } break; case '\"': +#ifdef JSMN_STRICT + if (!expecting_item) + return JSMN_ERROR_INVAL; + expecting_item = 0; +#endif r = jsmn_parse_string(parser, js, len, tokens, num_tokens); if (r < 0) @@ -229,11 +250,15 @@ jsmnerr_t jsmn_parse(jsmn_parser *parser, const char *js, size_t len, case '\t': case '\r': case '\n': - case ':': - case ',': case ' ': break; #ifdef JSMN_STRICT + case ':': + case ',': + if (expecting_item) + return JSMN_ERROR_INVAL; + expecting_item = 1; + break; /* * In strict mode primitives are: * numbers and booleans. @@ -253,6 +278,9 @@ jsmnerr_t jsmn_parse(jsmn_parser *parser, const char *js, size_t len, case 'f': case 'n': #else + case ':': + case ',': + break; /* * In non-strict mode every unquoted value * is a primitive. @@ -260,6 +288,12 @@ jsmnerr_t jsmn_parse(jsmn_parser *parser, const char *js, size_t len, /*FALL THROUGH */ default: #endif + +#ifdef JSMN_STRICT + if (!expecting_item) + return JSMN_ERROR_INVAL; + expecting_item = 0; +#endif r = jsmn_parse_primitive(parser, js, len, tokens, num_tokens); if (r < 0) @@ -282,7 +316,11 @@ jsmnerr_t jsmn_parse(jsmn_parser *parser, const char *js, size_t len, return JSMN_ERROR_PART; } +#ifdef JSMN_STRICT + return expecting_item ? JSMN_ERROR_INVAL : JSMN_SUCCESS; +#else return JSMN_SUCCESS; +#endif } /* -- cgit v1.2.3 From eda1a84cb4e9375984563c23a950377f915e7e1b Mon Sep 17 00:00:00 2001 From: James Clark Date: Thu, 7 Oct 2021 12:05:42 +0100 Subject: perf tools: Enable strict JSON parsing This is to ensure that the PMU event files can always be parsed by other tools. Testing ======= * There are no errors when parsing files for all architectures: # pmu-events/jevents nds32 pmu-events/arch/ test # pmu-events/jevents s390 pmu-events/arch/ test # pmu-events/jevents powerpc pmu-events/arch/ test # pmu-events/jevents arm64 pmu-events/arch/ test # pmu-events/jevents test pmu-events/arch/ test # pmu-events/jevents x86 pmu-events/arch/ test * Trailing and leading commas now cause a parse error * Double commas now cause a parse error * Compilation and parsing works with strict mode disabled and enabled * A diff of the output files shows no changes Reviewed-by: Andi Kleen Reviewed-by: Kajol Jain Signed-off-by: James Clark Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andrew.Kilroy@arm.com Cc: John Garry Cc: Leo Yan Cc: Mark Rutland Cc: Mathieu Poirier Cc: Namhyung Kim Cc: Nick.Forrington@arm.com Cc: Will Deacon Cc: linux-arm-kernel@lists.infradead.org Link: https://lore.kernel.org/r/20211007110543.564963-4-james.clark@arm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/pmu-events/jsmn.c | 1 + 1 file changed, 1 insertion(+) (limited to 'tools') diff --git a/tools/perf/pmu-events/jsmn.c b/tools/perf/pmu-events/jsmn.c index 8124d2d3ff0c..831dc44c4558 100644 --- a/tools/perf/pmu-events/jsmn.c +++ b/tools/perf/pmu-events/jsmn.c @@ -24,6 +24,7 @@ #include #include "jsmn.h" +#define JSMN_STRICT /* * Allocates a fresh unused token from the token pool. -- cgit v1.2.3 From f792cf8a094eac29e1d4e3d588dffbe68c7741a6 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Wed, 22 Sep 2021 14:20:31 -0700 Subject: perf kmem: Improve man page for record options Since: https://lore.kernel.org/lkml/20200708183919.4141023-1-irogers@google.com/ The output option works for 'perf kmem', however, it must appear after 'record'. This is different to 'stat' where '-i' for the input must appear before. Try to capture this complication in the man page. Signed-off-by: Ian Rogers Cc: Alexander Shishkin Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lore.kernel.org/lkml/20210922212031.485950-1-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-kmem.txt | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) (limited to 'tools') diff --git a/tools/perf/Documentation/perf-kmem.txt b/tools/perf/Documentation/perf-kmem.txt index 85b8ac695c87..f378ac59353d 100644 --- a/tools/perf/Documentation/perf-kmem.txt +++ b/tools/perf/Documentation/perf-kmem.txt @@ -8,22 +8,25 @@ perf-kmem - Tool to trace/measure kernel memory properties SYNOPSIS -------- [verse] -'perf kmem' {record|stat} [] +'perf kmem' [] {record|stat} DESCRIPTION ----------- There are two variants of perf kmem: - 'perf kmem record ' to record the kmem events - of an arbitrary workload. + 'perf kmem [] record [] ' to + record the kmem events of an arbitrary workload. Additional 'perf + record' options may be specified after record, such as '-o' to + change the output file name. - 'perf kmem stat' to report kernel memory statistics. + 'perf kmem [] stat' to report kernel memory statistics. OPTIONS ------- -i :: --input=:: - Select the input file (default: perf.data unless stdin is a fifo) + For stat, select the input file (default: perf.data unless stdin is a + fifo) -f:: --force:: -- cgit v1.2.3 From 92ec3cc94c2cb60db21cc16b469c0a7366b86742 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Fri, 15 Oct 2021 10:21:12 -0700 Subject: tools lib: Adopt list_sort() from the kernel sources Add list_sort.[ch] from the main kernel tree. The linux/bug.h #include is removed due to conflicting definitions. Add check-headers and modify perf build accordingly. MANIFEST and python-ext-sources fixes suggested by Arnaldo. Suggested-by: Arnaldo Carvalho de Melo Signed-off-by: Ian Rogers Acked-by: Andi Kleen Cc: Adrian Hunter Cc: Alexander Antonov Cc: Alexander Shishkin Cc: Andrew Kilroy Cc: Andrew Morton Cc: Changbin Du Cc: Denys Zagorui Cc: Fabian Hemmer Cc: Felix Fietkau Cc: Heiko Carstens Cc: Ingo Molnar Cc: Jacob Keller Cc: Jiapeng Chong Cc: Jin Yao Cc: Jiri Olsa Cc: Joakim Zhang Cc: John Garry Cc: Kajol Jain Cc: Kan Liang Cc: Kees Kook Cc: Mark Rutland Cc: Namhyung Kim Cc: Nicholas Fraser Cc: Nick Desaulniers Cc: Paul Clarke Cc: Peter Zijlstra Cc: Riccardo Mancini Cc: Sami Tolvanen Cc: ShihCheng Tu Cc: Song Liu Cc: Stephane Eranian Cc: Sumanth Korikkar Cc: Thomas Richter Cc: Wan Jiabing Cc: Zhen Lei Link: https://lore.kernel.org/r/20211015172132.1162559-2-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/linux/list_sort.h | 14 +++ tools/lib/list_sort.c | 252 +++++++++++++++++++++++++++++++++++++ tools/perf/MANIFEST | 1 + tools/perf/check-headers.sh | 2 + tools/perf/util/Build | 5 + tools/perf/util/python-ext-sources | 1 + 6 files changed, 275 insertions(+) create mode 100644 tools/include/linux/list_sort.h create mode 100644 tools/lib/list_sort.c (limited to 'tools') diff --git a/tools/include/linux/list_sort.h b/tools/include/linux/list_sort.h new file mode 100644 index 000000000000..453105f74e05 --- /dev/null +++ b/tools/include/linux/list_sort.h @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_LIST_SORT_H +#define _LINUX_LIST_SORT_H + +#include + +struct list_head; + +typedef int __attribute__((nonnull(2,3))) (*list_cmp_func_t)(void *, + const struct list_head *, const struct list_head *); + +__attribute__((nonnull(2,3))) +void list_sort(void *priv, struct list_head *head, list_cmp_func_t cmp); +#endif diff --git a/tools/lib/list_sort.c b/tools/lib/list_sort.c new file mode 100644 index 000000000000..10c067e3a8d2 --- /dev/null +++ b/tools/lib/list_sort.c @@ -0,0 +1,252 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include +#include +#include +#include +#include + +/* + * Returns a list organized in an intermediate format suited + * to chaining of merge() calls: null-terminated, no reserved or + * sentinel head node, "prev" links not maintained. + */ +__attribute__((nonnull(2,3,4))) +static struct list_head *merge(void *priv, list_cmp_func_t cmp, + struct list_head *a, struct list_head *b) +{ + struct list_head *head, **tail = &head; + + for (;;) { + /* if equal, take 'a' -- important for sort stability */ + if (cmp(priv, a, b) <= 0) { + *tail = a; + tail = &a->next; + a = a->next; + if (!a) { + *tail = b; + break; + } + } else { + *tail = b; + tail = &b->next; + b = b->next; + if (!b) { + *tail = a; + break; + } + } + } + return head; +} + +/* + * Combine final list merge with restoration of standard doubly-linked + * list structure. This approach duplicates code from merge(), but + * runs faster than the tidier alternatives of either a separate final + * prev-link restoration pass, or maintaining the prev links + * throughout. + */ +__attribute__((nonnull(2,3,4,5))) +static void merge_final(void *priv, list_cmp_func_t cmp, struct list_head *head, + struct list_head *a, struct list_head *b) +{ + struct list_head *tail = head; + u8 count = 0; + + for (;;) { + /* if equal, take 'a' -- important for sort stability */ + if (cmp(priv, a, b) <= 0) { + tail->next = a; + a->prev = tail; + tail = a; + a = a->next; + if (!a) + break; + } else { + tail->next = b; + b->prev = tail; + tail = b; + b = b->next; + if (!b) { + b = a; + break; + } + } + } + + /* Finish linking remainder of list b on to tail */ + tail->next = b; + do { + /* + * If the merge is highly unbalanced (e.g. the input is + * already sorted), this loop may run many iterations. + * Continue callbacks to the client even though no + * element comparison is needed, so the client's cmp() + * routine can invoke cond_resched() periodically. + */ + if (unlikely(!++count)) + cmp(priv, b, b); + b->prev = tail; + tail = b; + b = b->next; + } while (b); + + /* And the final links to make a circular doubly-linked list */ + tail->next = head; + head->prev = tail; +} + +/** + * list_sort - sort a list + * @priv: private data, opaque to list_sort(), passed to @cmp + * @head: the list to sort + * @cmp: the elements comparison function + * + * The comparison function @cmp must return > 0 if @a should sort after + * @b ("@a > @b" if you want an ascending sort), and <= 0 if @a should + * sort before @b *or* their original order should be preserved. It is + * always called with the element that came first in the input in @a, + * and list_sort is a stable sort, so it is not necessary to distinguish + * the @a < @b and @a == @b cases. + * + * This is compatible with two styles of @cmp function: + * - The traditional style which returns <0 / =0 / >0, or + * - Returning a boolean 0/1. + * The latter offers a chance to save a few cycles in the comparison + * (which is used by e.g. plug_ctx_cmp() in block/blk-mq.c). + * + * A good way to write a multi-word comparison is:: + * + * if (a->high != b->high) + * return a->high > b->high; + * if (a->middle != b->middle) + * return a->middle > b->middle; + * return a->low > b->low; + * + * + * This mergesort is as eager as possible while always performing at least + * 2:1 balanced merges. Given two pending sublists of size 2^k, they are + * merged to a size-2^(k+1) list as soon as we have 2^k following elements. + * + * Thus, it will avoid cache thrashing as long as 3*2^k elements can + * fit into the cache. Not quite as good as a fully-eager bottom-up + * mergesort, but it does use 0.2*n fewer comparisons, so is faster in + * the common case that everything fits into L1. + * + * + * The merging is controlled by "count", the number of elements in the + * pending lists. This is beautifully simple code, but rather subtle. + * + * Each time we increment "count", we set one bit (bit k) and clear + * bits k-1 .. 0. Each time this happens (except the very first time + * for each bit, when count increments to 2^k), we merge two lists of + * size 2^k into one list of size 2^(k+1). + * + * This merge happens exactly when the count reaches an odd multiple of + * 2^k, which is when we have 2^k elements pending in smaller lists, + * so it's safe to merge away two lists of size 2^k. + * + * After this happens twice, we have created two lists of size 2^(k+1), + * which will be merged into a list of size 2^(k+2) before we create + * a third list of size 2^(k+1), so there are never more than two pending. + * + * The number of pending lists of size 2^k is determined by the + * state of bit k of "count" plus two extra pieces of information: + * + * - The state of bit k-1 (when k == 0, consider bit -1 always set), and + * - Whether the higher-order bits are zero or non-zero (i.e. + * is count >= 2^(k+1)). + * + * There are six states we distinguish. "x" represents some arbitrary + * bits, and "y" represents some arbitrary non-zero bits: + * 0: 00x: 0 pending of size 2^k; x pending of sizes < 2^k + * 1: 01x: 0 pending of size 2^k; 2^(k-1) + x pending of sizes < 2^k + * 2: x10x: 0 pending of size 2^k; 2^k + x pending of sizes < 2^k + * 3: x11x: 1 pending of size 2^k; 2^(k-1) + x pending of sizes < 2^k + * 4: y00x: 1 pending of size 2^k; 2^k + x pending of sizes < 2^k + * 5: y01x: 2 pending of size 2^k; 2^(k-1) + x pending of sizes < 2^k + * (merge and loop back to state 2) + * + * We gain lists of size 2^k in the 2->3 and 4->5 transitions (because + * bit k-1 is set while the more significant bits are non-zero) and + * merge them away in the 5->2 transition. Note in particular that just + * before the 5->2 transition, all lower-order bits are 11 (state 3), + * so there is one list of each smaller size. + * + * When we reach the end of the input, we merge all the pending + * lists, from smallest to largest. If you work through cases 2 to + * 5 above, you can see that the number of elements we merge with a list + * of size 2^k varies from 2^(k-1) (cases 3 and 5 when x == 0) to + * 2^(k+1) - 1 (second merge of case 5 when x == 2^(k-1) - 1). + */ +__attribute__((nonnull(2,3))) +void list_sort(void *priv, struct list_head *head, list_cmp_func_t cmp) +{ + struct list_head *list = head->next, *pending = NULL; + size_t count = 0; /* Count of pending */ + + if (list == head->prev) /* Zero or one elements */ + return; + + /* Convert to a null-terminated singly-linked list. */ + head->prev->next = NULL; + + /* + * Data structure invariants: + * - All lists are singly linked and null-terminated; prev + * pointers are not maintained. + * - pending is a prev-linked "list of lists" of sorted + * sublists awaiting further merging. + * - Each of the sorted sublists is power-of-two in size. + * - Sublists are sorted by size and age, smallest & newest at front. + * - There are zero to two sublists of each size. + * - A pair of pending sublists are merged as soon as the number + * of following pending elements equals their size (i.e. + * each time count reaches an odd multiple of that size). + * That ensures each later final merge will be at worst 2:1. + * - Each round consists of: + * - Merging the two sublists selected by the highest bit + * which flips when count is incremented, and + * - Adding an element from the input as a size-1 sublist. + */ + do { + size_t bits; + struct list_head **tail = &pending; + + /* Find the least-significant clear bit in count */ + for (bits = count; bits & 1; bits >>= 1) + tail = &(*tail)->prev; + /* Do the indicated merge */ + if (likely(bits)) { + struct list_head *a = *tail, *b = a->prev; + + a = merge(priv, cmp, b, a); + /* Install the merged result in place of the inputs */ + a->prev = b->prev; + *tail = a; + } + + /* Move one element from input list to pending */ + list->prev = pending; + pending = list; + list = list->next; + pending->next = NULL; + count++; + } while (list); + + /* End of input; merge together all the pending lists. */ + list = pending; + pending = pending->prev; + for (;;) { + struct list_head *next = pending->prev; + + if (!next) + break; + list = merge(priv, cmp, pending, list); + pending = next; + } + /* The final merge, rebuilding prev links */ + merge_final(priv, cmp, head, pending, list); +} +EXPORT_SYMBOL(list_sort); diff --git a/tools/perf/MANIFEST b/tools/perf/MANIFEST index f05c4d48fd7e..e728615a3830 100644 --- a/tools/perf/MANIFEST +++ b/tools/perf/MANIFEST @@ -17,6 +17,7 @@ tools/lib/symbol/kallsyms.c tools/lib/symbol/kallsyms.h tools/lib/find_bit.c tools/lib/bitmap.c +tools/lib/list_sort.c tools/lib/str_error_r.c tools/lib/vsprintf.c tools/lib/zalloc.c diff --git a/tools/perf/check-headers.sh b/tools/perf/check-headers.sh index f1e46277e822..30ecf3a0f68b 100755 --- a/tools/perf/check-headers.sh +++ b/tools/perf/check-headers.sh @@ -26,6 +26,7 @@ include/vdso/bits.h include/linux/const.h include/vdso/const.h include/linux/hash.h +include/linux/list-sort.h include/uapi/linux/hw_breakpoint.h arch/x86/include/asm/disabled-features.h arch/x86/include/asm/required-features.h @@ -150,6 +151,7 @@ check include/uapi/linux/mman.h '-I "^#include <\(uapi/\)*asm/mman.h>"' check include/linux/build_bug.h '-I "^#\(ifndef\|endif\)\( \/\/\)* static_assert$"' check include/linux/ctype.h '-I "isdigit("' check lib/ctype.c '-I "^EXPORT_SYMBOL" -I "^#include " -B' +check lib/list_sort.c '-I "^#include "' # diff non-symmetric files check_2 tools/perf/arch/x86/entry/syscalls/syscall_64.tbl arch/x86/entry/syscalls/syscall_64.tbl diff --git a/tools/perf/util/Build b/tools/perf/util/Build index f2914d5bed6e..15b2366ad384 100644 --- a/tools/perf/util/Build +++ b/tools/perf/util/Build @@ -138,6 +138,7 @@ perf-y += expr.o perf-y += branch.o perf-y += mem2node.o perf-y += clockid.o +perf-y += list_sort.o perf-$(CONFIG_LIBBPF) += bpf-loader.o perf-$(CONFIG_LIBBPF) += bpf_map.o @@ -315,3 +316,7 @@ $(OUTPUT)util/hweight.o: ../lib/hweight.c FORCE $(OUTPUT)util/vsprintf.o: ../lib/vsprintf.c FORCE $(call rule_mkdir) $(call if_changed_dep,cc_o_c) + +$(OUTPUT)util/list_sort.o: ../lib/list_sort.c FORCE + $(call rule_mkdir) + $(call if_changed_dep,cc_o_c) diff --git a/tools/perf/util/python-ext-sources b/tools/perf/util/python-ext-sources index d7c976671e3a..a685d20165f7 100644 --- a/tools/perf/util/python-ext-sources +++ b/tools/perf/util/python-ext-sources @@ -18,6 +18,7 @@ util/mmap.c util/namespaces.c ../lib/bitmap.c ../lib/find_bit.c +../lib/list_sort.c ../lib/hweight.c ../lib/string.c ../lib/vsprintf.c -- cgit v1.2.3 From 0ec43c08376fe3b827bdb42cc22c6b589869a558 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Fri, 15 Oct 2021 10:21:13 -0700 Subject: perf pmu: Add const to pmu_events_map. The pmu_events_map is generated at compile time and used for lookup. For testing purposes we need to swap the map being used. Having the pmu_events_map be non-const is misleading as it may be an out argument. Make it const and update uses so they work on const too. Reviewed-by: John Garry Signed-off-by: Ian Rogers Acked-by: Andi Kleen Cc: Adrian Hunter Cc: Alexander Antonov Cc: Alexander Shishkin Cc: Andrew Kilroy Cc: Andrew Morton Cc: Changbin Du Cc: Denys Zagorui Cc: Fabian Hemmer Cc: Felix Fietkau Cc: Heiko Carstens Cc: Ingo Molnar Cc: Jacob Keller Cc: Jiapeng Chong Cc: Jin Yao Cc: Jiri Olsa Cc: Joakim Zhang Cc: Kajol Jain Cc: Kan Liang Cc: Kees Kook Cc: Mark Rutland Cc: Namhyung Kim Cc: Nicholas Fraser Cc: Nick Desaulniers Cc: Paul Clarke Cc: Peter Zijlstra Cc: Riccardo Mancini Cc: Sami Tolvanen Cc: ShihCheng Tu Cc: Song Liu Cc: Stephane Eranian Cc: Sumanth Korikkar Cc: Thomas Richter Cc: Wan Jiabing Cc: Zhen Lei Link: https://lore.kernel.org/r/20211015172132.1162559-3-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/arm64/util/pmu.c | 2 +- tools/perf/pmu-events/jevents.c | 2 +- tools/perf/pmu-events/pmu-events.h | 2 +- tools/perf/tests/expand-cgroup.c | 2 +- tools/perf/tests/parse-metric.c | 2 +- tools/perf/tests/pmu-events.c | 18 +++++++++--------- tools/perf/util/metricgroup.c | 20 ++++++++++---------- tools/perf/util/metricgroup.h | 4 ++-- tools/perf/util/pmu.c | 10 +++++----- tools/perf/util/pmu.h | 6 +++--- tools/perf/util/s390-sample-raw.c | 4 ++-- 11 files changed, 36 insertions(+), 36 deletions(-) (limited to 'tools') diff --git a/tools/perf/arch/arm64/util/pmu.c b/tools/perf/arch/arm64/util/pmu.c index 2234fbd0a912..d3a18f9c85f6 100644 --- a/tools/perf/arch/arm64/util/pmu.c +++ b/tools/perf/arch/arm64/util/pmu.c @@ -3,7 +3,7 @@ #include "../../../util/cpumap.h" #include "../../../util/pmu.h" -struct pmu_events_map *pmu_events_map__find(void) +const struct pmu_events_map *pmu_events_map__find(void) { struct perf_pmu *pmu = NULL; diff --git a/tools/perf/pmu-events/jevents.c b/tools/perf/pmu-events/jevents.c index b7c8aae6bcf1..bff8f92733fe 100644 --- a/tools/perf/pmu-events/jevents.c +++ b/tools/perf/pmu-events/jevents.c @@ -798,7 +798,7 @@ static bool is_sys_dir(char *fname) static void print_mapping_table_prefix(FILE *outfp) { - fprintf(outfp, "struct pmu_events_map pmu_events_map[] = {\n"); + fprintf(outfp, "const struct pmu_events_map pmu_events_map[] = {\n"); } static void print_mapping_table_suffix(FILE *outfp) diff --git a/tools/perf/pmu-events/pmu-events.h b/tools/perf/pmu-events/pmu-events.h index 5c2bf7275c1c..42c6db6bedec 100644 --- a/tools/perf/pmu-events/pmu-events.h +++ b/tools/perf/pmu-events/pmu-events.h @@ -53,7 +53,7 @@ struct pmu_sys_events { * Global table mapping each known CPU for the architecture to its * table of PMU events. */ -extern struct pmu_events_map pmu_events_map[]; +extern const struct pmu_events_map pmu_events_map[]; extern struct pmu_sys_events pmu_sys_event_tables[]; #endif diff --git a/tools/perf/tests/expand-cgroup.c b/tools/perf/tests/expand-cgroup.c index 0e46aeb843ce..aaad51aba12f 100644 --- a/tools/perf/tests/expand-cgroup.c +++ b/tools/perf/tests/expand-cgroup.c @@ -193,7 +193,7 @@ static int expand_metric_events(void) .metric_name = NULL, }, }; - struct pmu_events_map ev_map = { + const struct pmu_events_map ev_map = { .cpuid = "test", .version = "1", .type = "core", diff --git a/tools/perf/tests/parse-metric.c b/tools/perf/tests/parse-metric.c index 4f6f4904e852..dfc797ecc750 100644 --- a/tools/perf/tests/parse-metric.c +++ b/tools/perf/tests/parse-metric.c @@ -79,7 +79,7 @@ static struct pmu_event pme_test[] = { } }; -static struct pmu_events_map map = { +static const struct pmu_events_map map = { .cpuid = "test", .version = "1", .type = "core", diff --git a/tools/perf/tests/pmu-events.c b/tools/perf/tests/pmu-events.c index d3534960ed25..8a1fdcd072f5 100644 --- a/tools/perf/tests/pmu-events.c +++ b/tools/perf/tests/pmu-events.c @@ -242,9 +242,9 @@ static bool is_same(const char *reference, const char *test) return !strcmp(reference, test); } -static struct pmu_events_map *__test_pmu_get_events_map(void) +static const struct pmu_events_map *__test_pmu_get_events_map(void) { - struct pmu_events_map *map; + const struct pmu_events_map *map; for (map = &pmu_events_map[0]; map->cpuid; map++) { if (!strcmp(map->cpuid, "testcpu")) @@ -421,7 +421,7 @@ static int compare_alias_to_test_event(struct perf_pmu_alias *alias, static int test_pmu_event_table(void) { struct pmu_event *sys_event_tables = __test_pmu_get_sys_events_table(); - struct pmu_events_map *map = __test_pmu_get_events_map(); + const struct pmu_events_map *map = __test_pmu_get_events_map(); struct pmu_event *table; int map_events = 0, expected_events; @@ -518,7 +518,7 @@ static int __test_core_pmu_event_aliases(char *pmu_name, int *count) struct perf_pmu *pmu; LIST_HEAD(aliases); int res = 0; - struct pmu_events_map *map = __test_pmu_get_events_map(); + const struct pmu_events_map *map = __test_pmu_get_events_map(); struct perf_pmu_alias *a, *tmp; if (!map) @@ -571,7 +571,7 @@ static int __test_uncore_pmu_event_aliases(struct perf_pmu_test_pmu *test_pmu) struct perf_pmu *pmu = &test_pmu->pmu; const char *pmu_name = pmu->name; struct perf_pmu_alias *a, *tmp, *alias; - struct pmu_events_map *map; + const struct pmu_events_map *map; LIST_HEAD(aliases); int res = 0; @@ -825,7 +825,7 @@ struct metric { static int resolve_metric_simple(struct expr_parse_ctx *pctx, struct list_head *compound_list, - struct pmu_events_map *map, + const struct pmu_events_map *map, const char *metric_name) { struct hashmap_entry *cur, *cur_tmp; @@ -885,8 +885,8 @@ out_err: static int test_parsing(void) { - struct pmu_events_map *cpus_map = pmu_events_map__find(); - struct pmu_events_map *map; + const struct pmu_events_map *cpus_map = pmu_events_map__find(); + const struct pmu_events_map *map; struct pmu_event *pe; int i, j, k; int ret = 0; @@ -1027,7 +1027,7 @@ out: */ static int test_parsing_fake(void) { - struct pmu_events_map *map; + const struct pmu_events_map *map; struct pmu_event *pe; unsigned int i, j; int err = 0; diff --git a/tools/perf/util/metricgroup.c b/tools/perf/util/metricgroup.c index 8ba5370f5f64..74ea0a3540ce 100644 --- a/tools/perf/util/metricgroup.c +++ b/tools/perf/util/metricgroup.c @@ -623,7 +623,7 @@ static int metricgroup__print_sys_event_iter(struct pmu_event *pe, void *data) void metricgroup__print(bool metrics, bool metricgroups, char *filter, bool raw, bool details) { - struct pmu_events_map *map = pmu_events_map__find(); + const struct pmu_events_map *map = pmu_events_map__find(); struct pmu_event *pe; int i; struct rblist groups; @@ -910,7 +910,7 @@ static int __add_metric(struct list_head *metric_list, match_metric(__pe->metric_name, __metric))) struct pmu_event *metricgroup__find_metric(const char *metric, - struct pmu_events_map *map) + const struct pmu_events_map *map) { struct pmu_event *pe; int i; @@ -977,7 +977,7 @@ static int add_metric(struct list_head *metric_list, static int __resolve_metric(struct metric *m, bool metric_no_group, struct list_head *metric_list, - struct pmu_events_map *map, + const struct pmu_events_map *map, struct expr_ids *ids) { struct hashmap_entry *cur; @@ -1025,7 +1025,7 @@ static int __resolve_metric(struct metric *m, static int resolve_metric(bool metric_no_group, struct list_head *metric_list, - struct pmu_events_map *map, + const struct pmu_events_map *map, struct expr_ids *ids) { struct metric *m; @@ -1099,7 +1099,7 @@ out: static int metricgroup__add_metric(const char *metric, bool metric_no_group, struct strbuf *events, struct list_head *metric_list, - struct pmu_events_map *map) + const struct pmu_events_map *map) { struct expr_ids ids = { .cnt = 0, }; struct pmu_event *pe; @@ -1173,7 +1173,7 @@ out: static int metricgroup__add_metric_list(const char *list, bool metric_no_group, struct strbuf *events, struct list_head *metric_list, - struct pmu_events_map *map) + const struct pmu_events_map *map) { char *llist, *nlist, *p; int ret = -EINVAL; @@ -1230,7 +1230,7 @@ static int parse_groups(struct evlist *perf_evlist, const char *str, bool metric_no_merge, struct perf_pmu *fake_pmu, struct rblist *metric_events, - struct pmu_events_map *map) + const struct pmu_events_map *map) { struct parse_events_error parse_error; struct strbuf extra_events; @@ -1266,14 +1266,14 @@ int metricgroup__parse_groups(const struct option *opt, struct rblist *metric_events) { struct evlist *perf_evlist = *(struct evlist **)opt->value; - struct pmu_events_map *map = pmu_events_map__find(); + const struct pmu_events_map *map = pmu_events_map__find(); return parse_groups(perf_evlist, str, metric_no_group, metric_no_merge, NULL, metric_events, map); } int metricgroup__parse_groups_test(struct evlist *evlist, - struct pmu_events_map *map, + const struct pmu_events_map *map, const char *str, bool metric_no_group, bool metric_no_merge, @@ -1285,7 +1285,7 @@ int metricgroup__parse_groups_test(struct evlist *evlist, bool metricgroup__has_metric(const char *metric) { - struct pmu_events_map *map = pmu_events_map__find(); + const struct pmu_events_map *map = pmu_events_map__find(); struct pmu_event *pe; int i; diff --git a/tools/perf/util/metricgroup.h b/tools/perf/util/metricgroup.h index cc4a92492a61..c931596557bf 100644 --- a/tools/perf/util/metricgroup.h +++ b/tools/perf/util/metricgroup.h @@ -44,9 +44,9 @@ int metricgroup__parse_groups(const struct option *opt, bool metric_no_merge, struct rblist *metric_events); struct pmu_event *metricgroup__find_metric(const char *metric, - struct pmu_events_map *map); + const struct pmu_events_map *map); int metricgroup__parse_groups_test(struct evlist *evlist, - struct pmu_events_map *map, + const struct pmu_events_map *map, const char *str, bool metric_no_group, bool metric_no_merge, diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index bdabd62170d2..4bcdc595ce5e 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -710,9 +710,9 @@ static char *perf_pmu__getcpuid(struct perf_pmu *pmu) return cpuid; } -struct pmu_events_map *perf_pmu__find_map(struct perf_pmu *pmu) +const struct pmu_events_map *perf_pmu__find_map(struct perf_pmu *pmu) { - struct pmu_events_map *map; + const struct pmu_events_map *map; char *cpuid = perf_pmu__getcpuid(pmu); int i; @@ -737,7 +737,7 @@ struct pmu_events_map *perf_pmu__find_map(struct perf_pmu *pmu) return map; } -struct pmu_events_map *__weak pmu_events_map__find(void) +const struct pmu_events_map *__weak pmu_events_map__find(void) { return perf_pmu__find_map(NULL); } @@ -824,7 +824,7 @@ out: * as aliases. */ void pmu_add_cpu_aliases_map(struct list_head *head, struct perf_pmu *pmu, - struct pmu_events_map *map) + const struct pmu_events_map *map) { int i; const char *name = pmu->name; @@ -859,7 +859,7 @@ new_alias: static void pmu_add_cpu_aliases(struct list_head *head, struct perf_pmu *pmu) { - struct pmu_events_map *map; + const struct pmu_events_map *map; map = perf_pmu__find_map(pmu); if (!map) diff --git a/tools/perf/util/pmu.h b/tools/perf/util/pmu.h index 394898b07fd9..dd5cdde6a3d0 100644 --- a/tools/perf/util/pmu.h +++ b/tools/perf/util/pmu.h @@ -120,10 +120,10 @@ int perf_pmu__test(void); struct perf_event_attr *perf_pmu__get_default_config(struct perf_pmu *pmu); void pmu_add_cpu_aliases_map(struct list_head *head, struct perf_pmu *pmu, - struct pmu_events_map *map); + const struct pmu_events_map *map); -struct pmu_events_map *perf_pmu__find_map(struct perf_pmu *pmu); -struct pmu_events_map *pmu_events_map__find(void); +const struct pmu_events_map *perf_pmu__find_map(struct perf_pmu *pmu); +const struct pmu_events_map *pmu_events_map__find(void); bool pmu_uncore_alias_match(const char *pmu_name, const char *name); void perf_pmu_free_alias(struct perf_pmu_alias *alias); diff --git a/tools/perf/util/s390-sample-raw.c b/tools/perf/util/s390-sample-raw.c index 08ec3c3ae0ee..13f33d1ddb78 100644 --- a/tools/perf/util/s390-sample-raw.c +++ b/tools/perf/util/s390-sample-raw.c @@ -135,7 +135,7 @@ static int get_counterset_start(int setnr) * the name of this counter. * If no match is found a NULL pointer is returned. */ -static const char *get_counter_name(int set, int nr, struct pmu_events_map *map) +static const char *get_counter_name(int set, int nr, const struct pmu_events_map *map) { int rc, event_nr, wanted = get_counterset_start(set) + nr; @@ -159,7 +159,7 @@ static void s390_cpumcfdg_dump(struct perf_sample *sample) unsigned char *buf = sample->raw_data; const char *color = PERF_COLOR_BLUE; struct cf_ctrset_entry *cep, ce; - struct pmu_events_map *map; + const struct pmu_events_map *map; u64 *p; map = pmu_events_map__find(); -- cgit v1.2.3 From 857974a6422de25ce115204fbc5bafd3871c4871 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Fri, 15 Oct 2021 10:21:14 -0700 Subject: perf pmu: Make pmu_sys_event_tables const. Make lookup nature of data structures clearer through their type. Reviewed-by: John Garry Signed-off-by: Ian Rogers Acked-by: Andi Kleen Cc: Adrian Hunter Cc: Alexander Antonov Cc: Alexander Shishkin Cc: Andrew Kilroy Cc: Andrew Morton Cc: Changbin Du Cc: Denys Zagorui Cc: Fabian Hemmer Cc: Felix Fietkau Cc: Heiko Carstens Cc: Ingo Molnar Cc: Jacob Keller Cc: Jiapeng Chong Cc: Jin Yao Cc: Jiri Olsa Cc: Joakim Zhang Cc: Kajol Jain Cc: Kan Liang Cc: Kees Kook Cc: Mark Rutland Cc: Namhyung Kim Cc: Nicholas Fraser Cc: Nick Desaulniers Cc: Paul Clarke Cc: Peter Zijlstra Cc: Riccardo Mancini Cc: Sami Tolvanen Cc: ShihCheng Tu Cc: Song Liu Cc: Stephane Eranian Cc: Sumanth Korikkar Cc: Thomas Richter Cc: Wan Jiabing Cc: Zhen Lei Link: https://lore.kernel.org/r/20211015172132.1162559-4-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/pmu-events/jevents.c | 2 +- tools/perf/pmu-events/pmu-events.h | 2 +- tools/perf/tests/pmu-events.c | 2 +- tools/perf/util/pmu.c | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) (limited to 'tools') diff --git a/tools/perf/pmu-events/jevents.c b/tools/perf/pmu-events/jevents.c index bff8f92733fe..cff06604f169 100644 --- a/tools/perf/pmu-events/jevents.c +++ b/tools/perf/pmu-events/jevents.c @@ -832,7 +832,7 @@ static void print_mapping_test_table(FILE *outfp) static void print_system_event_mapping_table_prefix(FILE *outfp) { - fprintf(outfp, "\nstruct pmu_sys_events pmu_sys_event_tables[] = {"); + fprintf(outfp, "\nconst struct pmu_sys_events pmu_sys_event_tables[] = {"); } static void print_system_event_mapping_table_suffix(FILE *outfp) diff --git a/tools/perf/pmu-events/pmu-events.h b/tools/perf/pmu-events/pmu-events.h index 42c6db6bedec..f6c9c9fc4ab2 100644 --- a/tools/perf/pmu-events/pmu-events.h +++ b/tools/perf/pmu-events/pmu-events.h @@ -54,6 +54,6 @@ struct pmu_sys_events { * table of PMU events. */ extern const struct pmu_events_map pmu_events_map[]; -extern struct pmu_sys_events pmu_sys_event_tables[]; +extern const struct pmu_sys_events pmu_sys_event_tables[]; #endif diff --git a/tools/perf/tests/pmu-events.c b/tools/perf/tests/pmu-events.c index 8a1fdcd072f5..c0f8b61871c8 100644 --- a/tools/perf/tests/pmu-events.c +++ b/tools/perf/tests/pmu-events.c @@ -258,7 +258,7 @@ static const struct pmu_events_map *__test_pmu_get_events_map(void) static struct pmu_event *__test_pmu_get_sys_events_table(void) { - struct pmu_sys_events *tables = &pmu_sys_event_tables[0]; + const struct pmu_sys_events *tables = &pmu_sys_event_tables[0]; for ( ; tables->name; tables++) { if (!strcmp("pme_test_soc_sys", tables->name)) diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index 4bcdc595ce5e..c04a89cc7cef 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -873,7 +873,7 @@ void pmu_for_each_sys_event(pmu_sys_event_iter_fn fn, void *data) int i = 0; while (1) { - struct pmu_sys_events *event_table; + const struct pmu_sys_events *event_table; int j = 0; event_table = &pmu_sys_event_tables[i++]; -- cgit v1.2.3 From 47f572aad5f4d9f58abe323912c4477e5bc67751 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Fri, 15 Oct 2021 10:21:15 -0700 Subject: perf pmu: Make pmu_event tables const. Make lookup nature of data structures clearer through their type. Reduce scope of architecture specific pmu_event tables by making them static. Suggested-by: John Garry Reviewed-by: John Garry Signed-off-by: Ian Rogers Acked-by: Andi Kleen Cc: Adrian Hunter Cc: Alexander Antonov Cc: Alexander Shishkin Cc: Andrew Kilroy Cc: Andrew Morton Cc: Changbin Du Cc: Denys Zagorui Cc: Fabian Hemmer Cc: Felix Fietkau Cc: Heiko Carstens Cc: Ingo Molnar Cc: Jacob Keller Cc: Jiapeng Chong Cc: Jin Yao Cc: Jiri Olsa Cc: Joakim Zhang Cc: Kajol Jain Cc: Kan Liang Cc: Kees Kook Cc: Mark Rutland Cc: Namhyung Kim Cc: Nicholas Fraser Cc: Nick Desaulniers Cc: Paul Clarke Cc: Peter Zijlstra Cc: Riccardo Mancini Cc: Sami Tolvanen Cc: ShihCheng Tu Cc: Song Liu Cc: Stephane Eranian Cc: Sumanth Korikkar Cc: Thomas Richter Cc: Wan Jiabing Cc: Zhen Lei Link: https://lore.kernel.org/r/20211015172132.1162559-5-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/powerpc/util/header.c | 2 +- tools/perf/pmu-events/jevents.c | 2 +- tools/perf/pmu-events/pmu-events.h | 4 ++-- tools/perf/tests/pmu-events.c | 16 ++++++++-------- tools/perf/util/metricgroup.c | 36 +++++++++++++++++------------------ tools/perf/util/metricgroup.h | 6 +++--- tools/perf/util/pmu.c | 8 ++++---- tools/perf/util/pmu.h | 2 +- tools/perf/util/s390-sample-raw.c | 2 +- 9 files changed, 39 insertions(+), 39 deletions(-) (limited to 'tools') diff --git a/tools/perf/arch/powerpc/util/header.c b/tools/perf/arch/powerpc/util/header.c index 58b2d610aadb..e8fe36b10d20 100644 --- a/tools/perf/arch/powerpc/util/header.c +++ b/tools/perf/arch/powerpc/util/header.c @@ -40,7 +40,7 @@ get_cpuid_str(struct perf_pmu *pmu __maybe_unused) return bufp; } -int arch_get_runtimeparam(struct pmu_event *pe) +int arch_get_runtimeparam(const struct pmu_event *pe) { int count; char path[PATH_MAX] = "/devices/hv_24x7/interface/"; diff --git a/tools/perf/pmu-events/jevents.c b/tools/perf/pmu-events/jevents.c index cff06604f169..26b0ba6b6395 100644 --- a/tools/perf/pmu-events/jevents.c +++ b/tools/perf/pmu-events/jevents.c @@ -362,7 +362,7 @@ static int close_table; static void print_events_table_prefix(FILE *fp, const char *tblname) { - fprintf(fp, "struct pmu_event %s[] = {\n", tblname); + fprintf(fp, "static const struct pmu_event %s[] = {\n", tblname); close_table = 1; } diff --git a/tools/perf/pmu-events/pmu-events.h b/tools/perf/pmu-events/pmu-events.h index f6c9c9fc4ab2..6efe73976440 100644 --- a/tools/perf/pmu-events/pmu-events.h +++ b/tools/perf/pmu-events/pmu-events.h @@ -41,12 +41,12 @@ struct pmu_events_map { const char *cpuid; const char *version; const char *type; /* core, uncore etc */ - struct pmu_event *table; + const struct pmu_event *table; }; struct pmu_sys_events { const char *name; - struct pmu_event *table; + const struct pmu_event *table; }; /* diff --git a/tools/perf/tests/pmu-events.c b/tools/perf/tests/pmu-events.c index c0f8b61871c8..cc5cea141beb 100644 --- a/tools/perf/tests/pmu-events.c +++ b/tools/perf/tests/pmu-events.c @@ -256,7 +256,7 @@ static const struct pmu_events_map *__test_pmu_get_events_map(void) return NULL; } -static struct pmu_event *__test_pmu_get_sys_events_table(void) +static const struct pmu_event *__test_pmu_get_sys_events_table(void) { const struct pmu_sys_events *tables = &pmu_sys_event_tables[0]; @@ -268,7 +268,7 @@ static struct pmu_event *__test_pmu_get_sys_events_table(void) return NULL; } -static int compare_pmu_events(struct pmu_event *e1, const struct pmu_event *e2) +static int compare_pmu_events(const struct pmu_event *e1, const struct pmu_event *e2) { if (!is_same(e1->name, e2->name)) { pr_debug2("testing event e1 %s: mismatched name string, %s vs %s\n", @@ -420,9 +420,9 @@ static int compare_alias_to_test_event(struct perf_pmu_alias *alias, /* Verify generated events from pmu-events.c are as expected */ static int test_pmu_event_table(void) { - struct pmu_event *sys_event_tables = __test_pmu_get_sys_events_table(); + const struct pmu_event *sys_event_tables = __test_pmu_get_sys_events_table(); const struct pmu_events_map *map = __test_pmu_get_events_map(); - struct pmu_event *table; + const struct pmu_event *table; int map_events = 0, expected_events; /* ignore 3x sentinels */ @@ -774,7 +774,7 @@ static int check_parse_id(const char *id, struct parse_events_error *error, return ret; } -static int check_parse_cpu(const char *id, bool same_cpu, struct pmu_event *pe) +static int check_parse_cpu(const char *id, bool same_cpu, const struct pmu_event *pe) { struct parse_events_error error = { .idx = 0, }; @@ -838,7 +838,7 @@ static int resolve_metric_simple(struct expr_parse_ctx *pctx, all = true; hashmap__for_each_entry_safe(pctx->ids, cur, cur_tmp, bkt) { struct metric_ref *ref; - struct pmu_event *pe; + const struct pmu_event *pe; pe = metricgroup__find_metric(cur->key, map); if (!pe) @@ -887,7 +887,7 @@ static int test_parsing(void) { const struct pmu_events_map *cpus_map = pmu_events_map__find(); const struct pmu_events_map *map; - struct pmu_event *pe; + const struct pmu_event *pe; int i, j, k; int ret = 0; struct expr_parse_ctx *ctx; @@ -1028,7 +1028,7 @@ out: static int test_parsing_fake(void) { const struct pmu_events_map *map; - struct pmu_event *pe; + const struct pmu_event *pe; unsigned int i, j; int err = 0; diff --git a/tools/perf/util/metricgroup.c b/tools/perf/util/metricgroup.c index 74ea0a3540ce..b60ccbbf0829 100644 --- a/tools/perf/util/metricgroup.c +++ b/tools/perf/util/metricgroup.c @@ -427,7 +427,7 @@ static bool match_metric(const char *n, const char *list) return false; } -static bool match_pe_metric(struct pmu_event *pe, const char *metric) +static bool match_pe_metric(const struct pmu_event *pe, const char *metric) { return match_metric(pe->metric_group, metric) || match_metric(pe->metric_name, metric); @@ -511,7 +511,7 @@ static void metricgroup__print_strlist(struct strlist *metrics, bool raw) putchar('\n'); } -static int metricgroup__print_pmu_event(struct pmu_event *pe, +static int metricgroup__print_pmu_event(const struct pmu_event *pe, bool metricgroups, char *filter, bool raw, bool details, struct rblist *groups, @@ -586,14 +586,14 @@ struct metricgroup_print_sys_idata { bool details; }; -typedef int (*metricgroup_sys_event_iter_fn)(struct pmu_event *pe, void *); +typedef int (*metricgroup_sys_event_iter_fn)(const struct pmu_event *pe, void *); struct metricgroup_iter_data { metricgroup_sys_event_iter_fn fn; void *data; }; -static int metricgroup__sys_event_iter(struct pmu_event *pe, void *data) +static int metricgroup__sys_event_iter(const struct pmu_event *pe, void *data) { struct metricgroup_iter_data *d = data; struct perf_pmu *pmu = NULL; @@ -612,7 +612,7 @@ static int metricgroup__sys_event_iter(struct pmu_event *pe, void *data) return 0; } -static int metricgroup__print_sys_event_iter(struct pmu_event *pe, void *data) +static int metricgroup__print_sys_event_iter(const struct pmu_event *pe, void *data) { struct metricgroup_print_sys_idata *d = data; @@ -624,7 +624,7 @@ void metricgroup__print(bool metrics, bool metricgroups, char *filter, bool raw, bool details) { const struct pmu_events_map *map = pmu_events_map__find(); - struct pmu_event *pe; + const struct pmu_event *pe; int i; struct rblist groups; struct rb_node *node, *next; @@ -756,7 +756,7 @@ static void metricgroup___watchdog_constraint_hint(const char *name, bool foot) " echo 1 > /proc/sys/kernel/nmi_watchdog\n"); } -static bool metricgroup__has_constraint(struct pmu_event *pe) +static bool metricgroup__has_constraint(const struct pmu_event *pe) { if (!pe->metric_constraint) return false; @@ -770,7 +770,7 @@ static bool metricgroup__has_constraint(struct pmu_event *pe) return false; } -int __weak arch_get_runtimeparam(struct pmu_event *pe __maybe_unused) +int __weak arch_get_runtimeparam(const struct pmu_event *pe __maybe_unused) { return 1; } @@ -785,7 +785,7 @@ struct metricgroup_add_iter_data { }; static int __add_metric(struct list_head *metric_list, - struct pmu_event *pe, + const struct pmu_event *pe, bool metric_no_group, int runtime, struct metric **mp, @@ -909,10 +909,10 @@ static int __add_metric(struct list_head *metric_list, (match_metric(__pe->metric_group, __metric) || \ match_metric(__pe->metric_name, __metric))) -struct pmu_event *metricgroup__find_metric(const char *metric, - const struct pmu_events_map *map) +const struct pmu_event *metricgroup__find_metric(const char *metric, + const struct pmu_events_map *map) { - struct pmu_event *pe; + const struct pmu_event *pe; int i; map_for_each_event(pe, i, map) { @@ -968,7 +968,7 @@ static int recursion_check(struct metric *m, const char *id, struct expr_id **pa } static int add_metric(struct list_head *metric_list, - struct pmu_event *pe, + const struct pmu_event *pe, bool metric_no_group, struct metric **mp, struct expr_id *parent, @@ -993,7 +993,7 @@ static int __resolve_metric(struct metric *m, all = true; hashmap__for_each_entry(m->pctx->ids, cur, bkt) { struct expr_id *parent; - struct pmu_event *pe; + const struct pmu_event *pe; pe = metricgroup__find_metric(cur->key, map); if (!pe) @@ -1040,7 +1040,7 @@ static int resolve_metric(bool metric_no_group, } static int add_metric(struct list_head *metric_list, - struct pmu_event *pe, + const struct pmu_event *pe, bool metric_no_group, struct metric **m, struct expr_id *parent, @@ -1070,7 +1070,7 @@ static int add_metric(struct list_head *metric_list, return ret; } -static int metricgroup__add_metric_sys_event_iter(struct pmu_event *pe, +static int metricgroup__add_metric_sys_event_iter(const struct pmu_event *pe, void *data) { struct metricgroup_add_iter_data *d = data; @@ -1102,7 +1102,7 @@ static int metricgroup__add_metric(const char *metric, bool metric_no_group, const struct pmu_events_map *map) { struct expr_ids ids = { .cnt = 0, }; - struct pmu_event *pe; + const struct pmu_event *pe; struct metric *m; LIST_HEAD(list); int i, ret; @@ -1286,7 +1286,7 @@ int metricgroup__parse_groups_test(struct evlist *evlist, bool metricgroup__has_metric(const char *metric) { const struct pmu_events_map *map = pmu_events_map__find(); - struct pmu_event *pe; + const struct pmu_event *pe; int i; if (!map) diff --git a/tools/perf/util/metricgroup.h b/tools/perf/util/metricgroup.h index c931596557bf..88ba939a3082 100644 --- a/tools/perf/util/metricgroup.h +++ b/tools/perf/util/metricgroup.h @@ -43,8 +43,8 @@ int metricgroup__parse_groups(const struct option *opt, bool metric_no_group, bool metric_no_merge, struct rblist *metric_events); -struct pmu_event *metricgroup__find_metric(const char *metric, - const struct pmu_events_map *map); +const struct pmu_event *metricgroup__find_metric(const char *metric, + const struct pmu_events_map *map); int metricgroup__parse_groups_test(struct evlist *evlist, const struct pmu_events_map *map, const char *str, @@ -55,7 +55,7 @@ int metricgroup__parse_groups_test(struct evlist *evlist, void metricgroup__print(bool metrics, bool groups, char *filter, bool raw, bool details); bool metricgroup__has_metric(const char *metric); -int arch_get_runtimeparam(struct pmu_event *pe __maybe_unused); +int arch_get_runtimeparam(const struct pmu_event *pe __maybe_unused); void metricgroup__rblist_exit(struct rblist *metric_events); int metricgroup__copy_metric_events(struct evlist *evlist, struct cgroup *cgrp, diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index c04a89cc7cef..cdd6c3f6caf1 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -315,7 +315,7 @@ static bool perf_pmu_merge_alias(struct perf_pmu_alias *newalias, } static int __perf_pmu__new_alias(struct list_head *list, char *dir, char *name, - char *desc, char *val, struct pmu_event *pe) + char *desc, char *val, const struct pmu_event *pe) { struct parse_events_term *term; struct perf_pmu_alias *alias; @@ -834,7 +834,7 @@ void pmu_add_cpu_aliases_map(struct list_head *head, struct perf_pmu *pmu, i = 0; while (1) { const char *cpu_name = is_arm_pmu_core(name) ? name : "cpu"; - struct pmu_event *pe = &map->table[i++]; + const struct pmu_event *pe = &map->table[i++]; const char *pname = pe->pmu ? pe->pmu : cpu_name; if (!pe->name) { @@ -882,7 +882,7 @@ void pmu_for_each_sys_event(pmu_sys_event_iter_fn fn, void *data) break; while (1) { - struct pmu_event *pe = &event_table->table[j++]; + const struct pmu_event *pe = &event_table->table[j++]; int ret; if (!pe->name && !pe->metric_group && !pe->metric_name) @@ -900,7 +900,7 @@ struct pmu_sys_event_iter_data { struct perf_pmu *pmu; }; -static int pmu_add_sys_aliases_iter_fn(struct pmu_event *pe, void *data) +static int pmu_add_sys_aliases_iter_fn(const struct pmu_event *pe, void *data) { struct pmu_sys_event_iter_data *idata = data; struct perf_pmu *pmu = idata->pmu; diff --git a/tools/perf/util/pmu.h b/tools/perf/util/pmu.h index dd5cdde6a3d0..cc9f9e001347 100644 --- a/tools/perf/util/pmu.h +++ b/tools/perf/util/pmu.h @@ -127,7 +127,7 @@ const struct pmu_events_map *pmu_events_map__find(void); bool pmu_uncore_alias_match(const char *pmu_name, const char *name); void perf_pmu_free_alias(struct perf_pmu_alias *alias); -typedef int (*pmu_sys_event_iter_fn)(struct pmu_event *pe, void *data); +typedef int (*pmu_sys_event_iter_fn)(const struct pmu_event *pe, void *data); void pmu_for_each_sys_event(pmu_sys_event_iter_fn fn, void *data); int perf_pmu__convert_scale(const char *scale, char **end, double *sval); diff --git a/tools/perf/util/s390-sample-raw.c b/tools/perf/util/s390-sample-raw.c index 13f33d1ddb78..cd3a34840389 100644 --- a/tools/perf/util/s390-sample-raw.c +++ b/tools/perf/util/s390-sample-raw.c @@ -140,7 +140,7 @@ static const char *get_counter_name(int set, int nr, const struct pmu_events_map int rc, event_nr, wanted = get_counterset_start(set) + nr; if (map) { - struct pmu_event *evp = map->table; + const struct pmu_event *evp = map->table; for (; evp->name || evp->event || evp->desc; ++evp) { if (evp->name == NULL || evp->event == NULL) -- cgit v1.2.3 From fa831fbb430853ad8c1abb18001dc87bed3cf52b Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Fri, 15 Oct 2021 10:21:16 -0700 Subject: perf metric: Move runtime value to the expr context The runtime value is needed when recursively parsing metrics, currently a value of 1 is passed which is incorrect. Rather than add more arguments to the bison parser, add runtime to the context. Fix call sites not to pass a value. The runtime value is defaulted to 0, which is arbitrary. In some places this replaces a value of 1, which was also arbitrary. This shouldn't affect anything other than PPC. The use of 0 or 1 shouldn't matter as a proper runtime value would be needed in a case that it did matter. Signed-off-by: Ian Rogers Acked-by: Andi Kleen Cc: Adrian Hunter Cc: Alexander Antonov Cc: Alexander Shishkin Cc: Andrew Kilroy Cc: Andrew Morton Cc: Changbin Du Cc: Denys Zagorui Cc: Fabian Hemmer Cc: Felix Fietkau Cc: Heiko Carstens Cc: Ingo Molnar Cc: Jacob Keller Cc: Jiapeng Chong Cc: Jin Yao Cc: Jiri Olsa Cc: Joakim Zhang Cc: John Garry Cc: Kajol Jain Cc: Kan Liang Cc: Kees Kook Cc: Mark Rutland Cc: Namhyung Kim Cc: Nicholas Fraser Cc: Nick Desaulniers Cc: Paul Clarke Cc: Peter Zijlstra Cc: Riccardo Mancini Cc: Sami Tolvanen Cc: ShihCheng Tu Cc: Song Liu Cc: Stephane Eranian Cc: Sumanth Korikkar Cc: Thomas Richter Cc: Wan Jiabing Cc: Zhen Lei Link: https://lore.kernel.org/r/20211015172132.1162559-6-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/expr.c | 15 ++++++++------- tools/perf/tests/pmu-events.c | 10 +++++----- tools/perf/util/expr.c | 15 ++++++++------- tools/perf/util/expr.h | 5 +++-- tools/perf/util/metricgroup.c | 7 +++---- tools/perf/util/stat-shadow.c | 7 ++++--- 6 files changed, 31 insertions(+), 28 deletions(-) (limited to 'tools') diff --git a/tools/perf/tests/expr.c b/tools/perf/tests/expr.c index f1d8411fce12..3c16f3df1980 100644 --- a/tools/perf/tests/expr.c +++ b/tools/perf/tests/expr.c @@ -56,7 +56,7 @@ static int test(struct expr_parse_ctx *ctx, const char *e, double val2) { double val; - if (expr__parse(&val, ctx, e, 1)) + if (expr__parse(&val, ctx, e)) TEST_ASSERT_VAL("parse test failed", 0); TEST_ASSERT_VAL("unexpected value", val == val2); return 0; @@ -104,17 +104,17 @@ int test__expr(struct test *t __maybe_unused, int subtest __maybe_unused) } p = "FOO/0"; - ret = expr__parse(&val, ctx, p, 1); + ret = expr__parse(&val, ctx, p); TEST_ASSERT_VAL("division by zero", ret == -1); p = "BAR/"; - ret = expr__parse(&val, ctx, p, 1); + ret = expr__parse(&val, ctx, p); TEST_ASSERT_VAL("missing operand", ret == -1); expr__ctx_clear(ctx); TEST_ASSERT_VAL("find ids", expr__find_ids("FOO + BAR + BAZ + BOZO", "FOO", - ctx, 1) == 0); + ctx) == 0); TEST_ASSERT_VAL("find ids", hashmap__size(ctx->ids) == 3); TEST_ASSERT_VAL("find ids", hashmap__find(ctx->ids, "BAR", (void **)&val_ptr)); @@ -124,9 +124,10 @@ int test__expr(struct test *t __maybe_unused, int subtest __maybe_unused) (void **)&val_ptr)); expr__ctx_clear(ctx); + ctx->runtime = 3; TEST_ASSERT_VAL("find ids", expr__find_ids("EVENT1\\,param\\=?@ + EVENT2\\,param\\=?@", - NULL, ctx, 3) == 0); + NULL, ctx) == 0); TEST_ASSERT_VAL("find ids", hashmap__size(ctx->ids) == 2); TEST_ASSERT_VAL("find ids", hashmap__find(ctx->ids, "EVENT1,param=3/", (void **)&val_ptr)); @@ -137,7 +138,7 @@ int test__expr(struct test *t __maybe_unused, int subtest __maybe_unused) expr__ctx_clear(ctx); TEST_ASSERT_VAL("find ids", expr__find_ids("EVENT1 if #smt_on else EVENT2", - NULL, ctx, 0) == 0); + NULL, ctx) == 0); TEST_ASSERT_VAL("find ids", hashmap__size(ctx->ids) == 1); TEST_ASSERT_VAL("find ids", hashmap__find(ctx->ids, smt_on() ? "EVENT1" : "EVENT2", @@ -147,7 +148,7 @@ int test__expr(struct test *t __maybe_unused, int subtest __maybe_unused) expr__ctx_clear(ctx); TEST_ASSERT_VAL("find ids", expr__find_ids("1.0 if EVENT1 > 100.0 else 1.0", - NULL, ctx, 0) == 0); + NULL, ctx) == 0); TEST_ASSERT_VAL("find ids", hashmap__size(ctx->ids) == 0); expr__ctx_free(ctx); diff --git a/tools/perf/tests/pmu-events.c b/tools/perf/tests/pmu-events.c index cc5cea141beb..71b08c296410 100644 --- a/tools/perf/tests/pmu-events.c +++ b/tools/perf/tests/pmu-events.c @@ -866,7 +866,7 @@ static int resolve_metric_simple(struct expr_parse_ctx *pctx, ref->metric_expr = pe->metric_expr; list_add_tail(&metric->list, compound_list); - rc = expr__find_ids(pe->metric_expr, NULL, pctx, 0); + rc = expr__find_ids(pe->metric_expr, NULL, pctx); if (rc) goto out_err; break; /* The hashmap has been modified, so restart */ @@ -916,7 +916,7 @@ static int test_parsing(void) if (!pe->metric_expr) continue; expr__ctx_clear(ctx); - if (expr__find_ids(pe->metric_expr, NULL, ctx, 0) < 0) { + if (expr__find_ids(pe->metric_expr, NULL, ctx) < 0) { expr_failure("Parse find ids failed", map, pe); ret++; continue; @@ -949,7 +949,7 @@ static int test_parsing(void) free(metric); } - if (expr__parse(&result, ctx, pe->metric_expr, 0)) { + if (expr__parse(&result, ctx, pe->metric_expr)) { expr_failure("Parse failed", map, pe); ret++; } @@ -989,7 +989,7 @@ static int metric_parse_fake(const char *str) pr_debug("expr__ctx_new failed"); return TEST_FAIL; } - if (expr__find_ids(str, NULL, ctx, 0) < 0) { + if (expr__find_ids(str, NULL, ctx) < 0) { pr_err("expr__find_ids failed\n"); return -1; } @@ -1010,7 +1010,7 @@ static int metric_parse_fake(const char *str) } } - if (expr__parse(&result, ctx, str, 0)) + if (expr__parse(&result, ctx, str)) pr_err("expr__parse failed\n"); else ret = 0; diff --git a/tools/perf/util/expr.c b/tools/perf/util/expr.c index db2445677c8c..62fb39fd4d9d 100644 --- a/tools/perf/util/expr.c +++ b/tools/perf/util/expr.c @@ -246,7 +246,7 @@ int expr__resolve_id(struct expr_parse_ctx *ctx, const char *id, data->ref.metric_name); pr_debug("processing metric: %s ENTRY\n", id); data->kind = EXPR_ID_DATA__REF_VALUE; - if (expr__parse(&data->ref.val, ctx, data->ref.metric_expr, 1)) { + if (expr__parse(&data->ref.val, ctx, data->ref.metric_expr)) { pr_debug("%s failed to count\n", id); return -1; } @@ -284,6 +284,7 @@ struct expr_parse_ctx *expr__ctx_new(void) ctx->ids = hashmap__new(key_hash, key_equal, NULL); ctx->parent = NULL; + ctx->runtime = 0; return ctx; } @@ -314,10 +315,10 @@ void expr__ctx_free(struct expr_parse_ctx *ctx) static int __expr__parse(double *val, struct expr_parse_ctx *ctx, const char *expr, - bool compute_ids, int runtime) + bool compute_ids) { struct expr_scanner_ctx scanner_ctx = { - .runtime = runtime, + .runtime = ctx->runtime, }; YY_BUFFER_STATE buffer; void *scanner; @@ -345,15 +346,15 @@ __expr__parse(double *val, struct expr_parse_ctx *ctx, const char *expr, } int expr__parse(double *final_val, struct expr_parse_ctx *ctx, - const char *expr, int runtime) + const char *expr) { - return __expr__parse(final_val, ctx, expr, /*compute_ids=*/false, runtime) ? -1 : 0; + return __expr__parse(final_val, ctx, expr, /*compute_ids=*/false) ? -1 : 0; } int expr__find_ids(const char *expr, const char *one, - struct expr_parse_ctx *ctx, int runtime) + struct expr_parse_ctx *ctx) { - int ret = __expr__parse(NULL, ctx, expr, /*compute_ids=*/true, runtime); + int ret = __expr__parse(NULL, ctx, expr, /*compute_ids=*/true); if (one) expr__del_id(ctx, one); diff --git a/tools/perf/util/expr.h b/tools/perf/util/expr.h index b20513f0ae59..124475a4f245 100644 --- a/tools/perf/util/expr.h +++ b/tools/perf/util/expr.h @@ -21,6 +21,7 @@ struct expr_id { struct expr_parse_ctx { struct hashmap *ids; struct expr_id *parent; + int runtime; }; struct expr_id_data; @@ -52,10 +53,10 @@ int expr__resolve_id(struct expr_parse_ctx *ctx, const char *id, struct expr_id_data **datap); int expr__parse(double *final_val, struct expr_parse_ctx *ctx, - const char *expr, int runtime); + const char *expr); int expr__find_ids(const char *expr, const char *one, - struct expr_parse_ctx *ids, int runtime); + struct expr_parse_ctx *ids); double expr_id_data__value(const struct expr_id_data *data); struct expr_id *expr_id_data__parent(struct expr_id_data *data); diff --git a/tools/perf/util/metricgroup.c b/tools/perf/util/metricgroup.c index b60ccbbf0829..139f4a793f92 100644 --- a/tools/perf/util/metricgroup.c +++ b/tools/perf/util/metricgroup.c @@ -124,7 +124,6 @@ struct metric { const char *metric_unit; struct list_head metric_refs; int metric_refs_cnt; - int runtime; bool has_constraint; }; @@ -391,7 +390,7 @@ static int metricgroup__setup_events(struct list_head *groups, expr->metric_name = m->metric_name; expr->metric_unit = m->metric_unit; expr->metric_events = metric_events; - expr->runtime = m->runtime; + expr->runtime = m->pctx->runtime; list_add(&expr->nd, &me->head); } @@ -812,7 +811,7 @@ static int __add_metric(struct list_head *metric_list, m->metric_name = pe->metric_name; m->metric_expr = pe->metric_expr; m->metric_unit = pe->unit; - m->runtime = runtime; + m->pctx->runtime = runtime; m->has_constraint = metric_no_group || metricgroup__has_constraint(pe); INIT_LIST_HEAD(&m->metric_refs); m->metric_refs_cnt = 0; @@ -862,7 +861,7 @@ static int __add_metric(struct list_head *metric_list, * For both the parent and referenced metrics, we parse * all the metric's IDs and add it to the parent context. */ - if (expr__find_ids(pe->metric_expr, NULL, m->pctx, runtime) < 0) { + if (expr__find_ids(pe->metric_expr, NULL, m->pctx) < 0) { if (m->metric_refs_cnt == 0) { expr__ctx_free(m->pctx); free(m); diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c index 9bc841e09a0c..20f1b9d0f272 100644 --- a/tools/perf/util/stat-shadow.c +++ b/tools/perf/util/stat-shadow.c @@ -394,7 +394,7 @@ void perf_stat__collect_metric_expr(struct evlist *evsel_list) if (!metric_events) { if (expr__find_ids(counter->metric_expr, counter->name, - ctx, 1) < 0) + ctx) < 0) continue; metric_events = calloc(sizeof(struct evsel *), @@ -894,13 +894,14 @@ static void generic_metric(struct perf_stat_config *config, if (!pctx) return; + pctx->runtime = runtime; i = prepare_metric(metric_events, metric_refs, pctx, cpu, st); if (i < 0) { expr__ctx_free(pctx); return; } if (!metric_events[i]) { - if (expr__parse(&ratio, pctx, metric_expr, runtime) == 0) { + if (expr__parse(&ratio, pctx, metric_expr) == 0) { char *unit; char metric_bf[64]; @@ -951,7 +952,7 @@ double test_generic_metric(struct metric_expr *mexp, int cpu, struct runtime_sta if (prepare_metric(mexp->metric_events, mexp->metric_refs, pctx, cpu, st) < 0) goto out; - if (expr__parse(&ratio, pctx, mexp->metric_expr, 1)) + if (expr__parse(&ratio, pctx, mexp->metric_expr)) ratio = 0.0; out: -- cgit v1.2.3 From 68074811dfb9529bb7cade0e67d42c7f7bf209e6 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Fri, 15 Oct 2021 10:21:17 -0700 Subject: perf metric: Add documentation and rename a variable. Documentation to make current functionality clearer. Rename a variable called 'metric' to 'metric_name' as it can be ambiguous as to whether a string is the name of a metric or the expression. Signed-off-by: Ian Rogers Acked-by: Andi Kleen Cc: Adrian Hunter Cc: Alexander Antonov Cc: Alexander Shishkin Cc: Andrew Kilroy Cc: Andrew Morton Cc: Changbin Du Cc: Denys Zagorui Cc: Fabian Hemmer Cc: Felix Fietkau Cc: Heiko Carstens Cc: Ingo Molnar Cc: Jacob Keller Cc: Jiapeng Chong Cc: Jin Yao Cc: Jiri Olsa Cc: Joakim Zhang Cc: John Garry Cc: Kajol Jain Cc: Kan Liang Cc: Kees Kook Cc: Mark Rutland Cc: Namhyung Kim Cc: Nicholas Fraser Cc: Nick Desaulniers Cc: Paul Clarke Cc: Peter Zijlstra Cc: Riccardo Mancini Cc: Sami Tolvanen Cc: ShihCheng Tu Cc: Song Liu Cc: Stephane Eranian Cc: Sumanth Korikkar Cc: Thomas Richter Cc: Wan Jiabing Cc: Zhen Lei Link: https://lore.kernel.org/r/20211015172132.1162559-7-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/metricgroup.c | 59 +++++++++++++++++++++++++++++++++++++++---- 1 file changed, 54 insertions(+), 5 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/metricgroup.c b/tools/perf/util/metricgroup.c index 139f4a793f92..3e5f02938452 100644 --- a/tools/perf/util/metricgroup.c +++ b/tools/perf/util/metricgroup.c @@ -776,13 +776,27 @@ int __weak arch_get_runtimeparam(const struct pmu_event *pe __maybe_unused) struct metricgroup_add_iter_data { struct list_head *metric_list; - const char *metric; + const char *metric_name; struct expr_ids *ids; int *ret; bool *has_match; bool metric_no_group; }; +/** + * __add_metric - Add a metric to metric_list. + * @metric_list: The list the metric is added to. + * @pe: The pmu_event containing the metric to be added. + * @metric_no_group: Should events written to events be grouped "{}" or + * global. Grouping is the default but due to multiplexing the + * user may override. + * @runtime: A special argument for the parser only known at runtime. + * @mp: The pointer to a location holding the first metric added to metric + * list. It is initialized here if this is the first metric. + * @parent: The last entry in a linked list of metrics being + * added/resolved. This is maintained to detect recursion. + * @ids: Storage for parent list. + */ static int __add_metric(struct list_head *metric_list, const struct pmu_event *pe, bool metric_no_group, @@ -1076,7 +1090,7 @@ static int metricgroup__add_metric_sys_event_iter(const struct pmu_event *pe, struct metric *m = NULL; int ret; - if (!match_pe_metric(pe, d->metric)) + if (!match_pe_metric(pe, d->metric_name)) return 0; ret = add_metric(d->metric_list, pe, d->metric_no_group, &m, NULL, d->ids); @@ -1095,7 +1109,22 @@ out: return ret; } -static int metricgroup__add_metric(const char *metric, bool metric_no_group, +/** + * metricgroup__add_metric - Find and add a metric, or a metric group. + * @metric_name: The name of the metric or metric group. For example, "IPC" + * could be the name of a metric and "TopDownL1" the name of a + * metric group. + * @metric_no_group: Should events written to events be grouped "{}" or + * global. Grouping is the default but due to multiplexing the + * user may override. + * @events: an out argument string of events that need to be parsed and + * associated with the metric. For example, the metric "IPC" would + * create an events string like "{instructions,cycles}:W". + * @metric_list: The list that the metric or metric group are added to. + * @map: The map that is searched for metrics, most commonly the table for the + * architecture perf is running upon. + */ +static int metricgroup__add_metric(const char *metric_name, bool metric_no_group, struct strbuf *events, struct list_head *metric_list, const struct pmu_events_map *map) @@ -1107,7 +1136,11 @@ static int metricgroup__add_metric(const char *metric, bool metric_no_group, int i, ret; bool has_match = false; - map_for_each_metric(pe, i, map, metric) { + /* + * Iterate over all metrics seeing if metric matches either the name or + * group. When it does add the metric to the list. + */ + map_for_each_metric(pe, i, map, metric_name) { has_match = true; m = NULL; @@ -1130,7 +1163,7 @@ static int metricgroup__add_metric(const char *metric, bool metric_no_group, .fn = metricgroup__add_metric_sys_event_iter, .data = (void *) &(struct metricgroup_add_iter_data) { .metric_list = &list, - .metric = metric, + .metric_name = metric_name, .metric_no_group = metric_no_group, .ids = &ids, .has_match = &has_match, @@ -1169,6 +1202,22 @@ out: return ret; } +/** + * metricgroup__add_metric_list - Find and add metrics, or metric groups, + * specified in a list. + * @list: the list of metrics or metric groups. For example, "IPC,CPI,TopDownL1" + * would match the IPC and CPI metrics, and TopDownL1 would match all + * the metrics in the TopDownL1 group. + * @metric_no_group: Should events written to events be grouped "{}" or + * global. Grouping is the default but due to multiplexing the + * user may override. + * @events: an out argument string of events that need to be parsed and + * associated with the metric. For example, the metric "IPC" would + * create an events string like "{instructions,cycles}:W". + * @metric_list: The list that metrics are added to. + * @map: The map that is searched for metrics, most commonly the table for the + * architecture perf is running upon. + */ static int metricgroup__add_metric_list(const char *list, bool metric_no_group, struct strbuf *events, struct list_head *metric_list, -- cgit v1.2.3 From 3d81d761a518c6f5d5a084a7356470d5dbb0d870 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Fri, 15 Oct 2021 10:21:18 -0700 Subject: perf metric: Add metric new() and free() methods Metrics are complex enough that a new/free reduces the risk of memory leaks. Move static functions used in new. Reviewed-by: John Garry Signed-off-by: Ian Rogers Acked-by: Andi Kleen Cc: Adrian Hunter Cc: Alexander Antonov Cc: Alexander Shishkin Cc: Andrew Kilroy Cc: Andrew Morton Cc: Changbin Du Cc: Denys Zagorui Cc: Fabian Hemmer Cc: Felix Fietkau Cc: Heiko Carstens Cc: Ingo Molnar Cc: Jacob Keller Cc: Jiapeng Chong Cc: Jin Yao Cc: Jiri Olsa Cc: Joakim Zhang Cc: Kajol Jain Cc: Kan Liang Cc: Kees Kook Cc: Mark Rutland Cc: Namhyung Kim Cc: Nicholas Fraser Cc: Nick Desaulniers Cc: Paul Clarke Cc: Peter Zijlstra Cc: Riccardo Mancini Cc: Sami Tolvanen Cc: ShihCheng Tu Cc: Song Liu Cc: Stephane Eranian Cc: Sumanth Korikkar Cc: Thomas Richter Cc: Wan Jiabing Cc: Zhen Lei Link: https://lore.kernel.org/r/20211015172132.1162559-8-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/metricgroup.c | 137 +++++++++++++++++++++++------------------- 1 file changed, 75 insertions(+), 62 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/metricgroup.c b/tools/perf/util/metricgroup.c index 3e5f02938452..e4ce19389258 100644 --- a/tools/perf/util/metricgroup.c +++ b/tools/perf/util/metricgroup.c @@ -127,6 +127,78 @@ struct metric { bool has_constraint; }; +static void metricgroup___watchdog_constraint_hint(const char *name, bool foot) +{ + static bool violate_nmi_constraint; + + if (!foot) { + pr_warning("Splitting metric group %s into standalone metrics.\n", name); + violate_nmi_constraint = true; + return; + } + + if (!violate_nmi_constraint) + return; + + pr_warning("Try disabling the NMI watchdog to comply NO_NMI_WATCHDOG metric constraint:\n" + " echo 0 > /proc/sys/kernel/nmi_watchdog\n" + " perf stat ...\n" + " echo 1 > /proc/sys/kernel/nmi_watchdog\n"); +} + +static bool metricgroup__has_constraint(const struct pmu_event *pe) +{ + if (!pe->metric_constraint) + return false; + + if (!strcmp(pe->metric_constraint, "NO_NMI_WATCHDOG") && + sysctl__nmi_watchdog_enabled()) { + metricgroup___watchdog_constraint_hint(pe->metric_name, false); + return true; + } + + return false; +} + +static struct metric *metric__new(const struct pmu_event *pe, + bool metric_no_group, + int runtime) +{ + struct metric *m; + + m = zalloc(sizeof(*m)); + if (!m) + return NULL; + + m->pctx = expr__ctx_new(); + if (!m->pctx) { + free(m); + return NULL; + } + + m->metric_name = pe->metric_name; + m->metric_expr = pe->metric_expr; + m->metric_unit = pe->unit; + m->pctx->runtime = runtime; + m->has_constraint = metric_no_group || metricgroup__has_constraint(pe); + INIT_LIST_HEAD(&m->metric_refs); + m->metric_refs_cnt = 0; + + return m; +} + +static void metric__free(struct metric *m) +{ + struct metric_ref_node *ref, *tmp; + + list_for_each_entry_safe(ref, tmp, &m->metric_refs, list) { + list_del(&ref->list); + free(ref); + } + expr__ctx_free(m->pctx); + free(m); +} + #define RECURSION_ID_MAX 1000 struct expr_ids { @@ -736,39 +808,6 @@ static void metricgroup__add_metric_non_group(struct strbuf *events, } } -static void metricgroup___watchdog_constraint_hint(const char *name, bool foot) -{ - static bool violate_nmi_constraint; - - if (!foot) { - pr_warning("Splitting metric group %s into standalone metrics.\n", name); - violate_nmi_constraint = true; - return; - } - - if (!violate_nmi_constraint) - return; - - pr_warning("Try disabling the NMI watchdog to comply NO_NMI_WATCHDOG metric constraint:\n" - " echo 0 > /proc/sys/kernel/nmi_watchdog\n" - " perf stat ...\n" - " echo 1 > /proc/sys/kernel/nmi_watchdog\n"); -} - -static bool metricgroup__has_constraint(const struct pmu_event *pe) -{ - if (!pe->metric_constraint) - return false; - - if (!strcmp(pe->metric_constraint, "NO_NMI_WATCHDOG") && - sysctl__nmi_watchdog_enabled()) { - metricgroup___watchdog_constraint_hint(pe->metric_name, false); - return true; - } - - return false; -} - int __weak arch_get_runtimeparam(const struct pmu_event *pe __maybe_unused) { return 1; @@ -813,23 +852,10 @@ static int __add_metric(struct list_head *metric_list, * We got in here for the parent group, * allocate it and put it on the list. */ - m = zalloc(sizeof(*m)); + m = metric__new(pe, metric_no_group, runtime); if (!m) return -ENOMEM; - m->pctx = expr__ctx_new(); - if (!m->pctx) { - free(m); - return -ENOMEM; - } - m->metric_name = pe->metric_name; - m->metric_expr = pe->metric_expr; - m->metric_unit = pe->unit; - m->pctx->runtime = runtime; - m->has_constraint = metric_no_group || metricgroup__has_constraint(pe); - INIT_LIST_HEAD(&m->metric_refs); - m->metric_refs_cnt = 0; - parent = expr_ids__alloc(ids); if (!parent) { free(m); @@ -877,8 +903,7 @@ static int __add_metric(struct list_head *metric_list, */ if (expr__find_ids(pe->metric_expr, NULL, m->pctx) < 0) { if (m->metric_refs_cnt == 0) { - expr__ctx_free(m->pctx); - free(m); + metric__free(m); *mp = NULL; } return -EINVAL; @@ -1251,25 +1276,13 @@ static int metricgroup__add_metric_list(const char *list, bool metric_no_group, return ret; } -static void metric__free_refs(struct metric *metric) -{ - struct metric_ref_node *ref, *tmp; - - list_for_each_entry_safe(ref, tmp, &metric->metric_refs, list) { - list_del(&ref->list); - free(ref); - } -} - static void metricgroup__free_metrics(struct list_head *metric_list) { struct metric *m, *tmp; list_for_each_entry_safe (m, tmp, metric_list, nd) { - metric__free_refs(m); - expr__ctx_free(m->pctx); list_del_init(&m->nd); - free(m); + metric__free(m); } } -- cgit v1.2.3 From a3de76903dd0786a8661e9e6eb9054a7519e10e7 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Fri, 15 Oct 2021 10:21:19 -0700 Subject: perf metric: Only add a referenced metric once If a metric references other metrics then the same other metrics may be referenced more than once, but the events and metric ref are only needed once. An example of this is in tests/parse-metric.c where DCache_L2_Hits references the metric DCache_L2_All_Hits twice, once directly and once through DCache_L2_All. Signed-off-by: Ian Rogers Acked-by: Andi Kleen Cc: Adrian Hunter Cc: Alexander Antonov Cc: Alexander Shishkin Cc: Andrew Kilroy Cc: Andrew Morton Cc: Changbin Du Cc: Denys Zagorui Cc: Fabian Hemmer Cc: Felix Fietkau Cc: Heiko Carstens Cc: Ingo Molnar Cc: Jacob Keller Cc: Jiapeng Chong Cc: Jin Yao Cc: Jiri Olsa Cc: Joakim Zhang Cc: John Garry Cc: Kajol Jain Cc: Kan Liang Cc: Kees Kook Cc: Mark Rutland Cc: Namhyung Kim Cc: Nicholas Fraser Cc: Nick Desaulniers Cc: Paul Clarke Cc: Peter Zijlstra Cc: Riccardo Mancini Cc: Sami Tolvanen Cc: ShihCheng Tu Cc: Song Liu Cc: Stephane Eranian Cc: Sumanth Korikkar Cc: Thomas Richter Cc: Wan Jiabing Cc: Zhen Lei Link: https://lore.kernel.org/r/20211015172132.1162559-9-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/metricgroup.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/metricgroup.c b/tools/perf/util/metricgroup.c index e4ce19389258..6c4c51e35aa7 100644 --- a/tools/perf/util/metricgroup.c +++ b/tools/perf/util/metricgroup.c @@ -870,12 +870,18 @@ static int __add_metric(struct list_head *metric_list, *mp = m; } else { /* - * We got here for the referenced metric, via the - * recursive metricgroup__add_metric call, add - * it to the parent group. + * This metric was referenced in a metric higher in the + * tree. Check if the same metric is already resolved in the + * metric_refs list. */ m = *mp; + list_for_each_entry(ref, &m->metric_refs, list) { + if (!strcmp(pe->metric_name, ref->metric_name)) + return 0; + } + + /*Add the new referenced metric to the pare the parent group. */ ref = malloc(sizeof(*ref)); if (!ref) return -ENOMEM; -- cgit v1.2.3 From 80be6434c36f40d82c26035b949d78d845fec044 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Fri, 15 Oct 2021 10:21:20 -0700 Subject: perf metric: Modify resolution and recursion check Modify resolution. Rather than resolving a list of metrics, resolve a metric immediately after it is added. This simplifies knowing the root of the metric's tree so that IDs may be associated with it. A bug in the current implementation is that all the IDs were placed on the first metric in a metric group. Rather than maintain data on IDs' parents to detect cycles, maintain a list of visited metrics and detect cycles if the same metric is visited twice. Only place the root metric onto the list of metrics. Signed-off-by: Ian Rogers Acked-by: Andi Kleen Cc: Adrian Hunter Cc: Alexander Antonov Cc: Alexander Shishkin Cc: Andrew Kilroy Cc: Andrew Morton Cc: Changbin Du Cc: Denys Zagorui Cc: Fabian Hemmer Cc: Felix Fietkau Cc: Heiko Carstens Cc: Ingo Molnar Cc: Jacob Keller Cc: Jiapeng Chong Cc: Jin Yao Cc: Jiri Olsa Cc: Joakim Zhang Cc: John Garry Cc: Kajol Jain Cc: Kan Liang Cc: Kees Kook Cc: Mark Rutland Cc: Namhyung Kim Cc: Nicholas Fraser Cc: Nick Desaulniers Cc: Paul Clarke Cc: Peter Zijlstra Cc: Riccardo Mancini Cc: Sami Tolvanen Cc: ShihCheng Tu Cc: Song Liu Cc: Stephane Eranian Cc: Sumanth Korikkar Cc: Thomas Richter Cc: Wan Jiabing Cc: Zhen Lei Link: https://lore.kernel.org/r/20211015172132.1162559-10-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/expr.c | 10 +- tools/perf/util/expr.c | 26 +-- tools/perf/util/expr.h | 9 +- tools/perf/util/expr.y | 2 +- tools/perf/util/metricgroup.c | 402 ++++++++++++++++++------------------------ 5 files changed, 179 insertions(+), 270 deletions(-) (limited to 'tools') diff --git a/tools/perf/tests/expr.c b/tools/perf/tests/expr.c index 3c16f3df1980..718c13e5a0f4 100644 --- a/tools/perf/tests/expr.c +++ b/tools/perf/tests/expr.c @@ -24,8 +24,8 @@ static int test_ids_union(void) ids2 = ids__new(); TEST_ASSERT_VAL("ids__new", ids2); - TEST_ASSERT_EQUAL("ids__insert", ids__insert(ids1, strdup("foo"), NULL), 0); - TEST_ASSERT_EQUAL("ids__insert", ids__insert(ids1, strdup("bar"), NULL), 0); + TEST_ASSERT_EQUAL("ids__insert", ids__insert(ids1, strdup("foo")), 0); + TEST_ASSERT_EQUAL("ids__insert", ids__insert(ids1, strdup("bar")), 0); ids1 = ids__union(ids1, ids2); TEST_ASSERT_EQUAL("union", (int)hashmap__size(ids1), 2); @@ -33,7 +33,7 @@ static int test_ids_union(void) /* Union {foo, bar} against {foo}. */ ids2 = ids__new(); TEST_ASSERT_VAL("ids__new", ids2); - TEST_ASSERT_EQUAL("ids__insert", ids__insert(ids2, strdup("foo"), NULL), 0); + TEST_ASSERT_EQUAL("ids__insert", ids__insert(ids2, strdup("foo")), 0); ids1 = ids__union(ids1, ids2); TEST_ASSERT_EQUAL("union", (int)hashmap__size(ids1), 2); @@ -41,8 +41,8 @@ static int test_ids_union(void) /* Union {foo, bar} against {bar,baz}. */ ids2 = ids__new(); TEST_ASSERT_VAL("ids__new", ids2); - TEST_ASSERT_EQUAL("ids__insert", ids__insert(ids2, strdup("bar"), NULL), 0); - TEST_ASSERT_EQUAL("ids__insert", ids__insert(ids2, strdup("baz"), NULL), 0); + TEST_ASSERT_EQUAL("ids__insert", ids__insert(ids2, strdup("bar")), 0); + TEST_ASSERT_EQUAL("ids__insert", ids__insert(ids2, strdup("baz")), 0); ids1 = ids__union(ids1, ids2); TEST_ASSERT_EQUAL("union", (int)hashmap__size(ids1), 3); diff --git a/tools/perf/util/expr.c b/tools/perf/util/expr.c index 62fb39fd4d9d..5657222aaa25 100644 --- a/tools/perf/util/expr.c +++ b/tools/perf/util/expr.c @@ -25,7 +25,6 @@ struct expr_id_data { const char *metric_name; const char *metric_expr; } ref; - struct expr_id *parent; }; enum { @@ -35,8 +34,6 @@ struct expr_id_data { EXPR_ID_DATA__REF, /* A reference but the value has been computed. */ EXPR_ID_DATA__REF_VALUE, - /* A parent is remembered for the recursion check. */ - EXPR_ID_DATA__PARENT, } kind; }; @@ -80,20 +77,12 @@ void ids__free(struct hashmap *ids) hashmap__free(ids); } -int ids__insert(struct hashmap *ids, const char *id, - struct expr_id *parent) +int ids__insert(struct hashmap *ids, const char *id) { struct expr_id_data *data_ptr = NULL, *old_data = NULL; char *old_key = NULL; int ret; - data_ptr = malloc(sizeof(*data_ptr)); - if (!data_ptr) - return -ENOMEM; - - data_ptr->parent = parent; - data_ptr->kind = EXPR_ID_DATA__PARENT; - ret = hashmap__set(ids, id, data_ptr, (const void **)&old_key, (void **)&old_data); if (ret) @@ -142,7 +131,7 @@ struct hashmap *ids__union(struct hashmap *ids1, struct hashmap *ids2) /* Caller must make sure id is allocated */ int expr__add_id(struct expr_parse_ctx *ctx, const char *id) { - return ids__insert(ctx->ids, id, ctx->parent); + return ids__insert(ctx->ids, id); } /* Caller must make sure id is allocated */ @@ -238,9 +227,6 @@ int expr__resolve_id(struct expr_parse_ctx *ctx, const char *id, case EXPR_ID_DATA__VALUE: pr_debug2("lookup(%s): val %f\n", id, data->val); break; - case EXPR_ID_DATA__PARENT: - pr_debug2("lookup(%s): parent %s\n", id, data->parent->id); - break; case EXPR_ID_DATA__REF: pr_debug2("lookup(%s): ref metric name %s\n", id, data->ref.metric_name); @@ -283,8 +269,8 @@ struct expr_parse_ctx *expr__ctx_new(void) return NULL; ctx->ids = hashmap__new(key_hash, key_equal, NULL); - ctx->parent = NULL; ctx->runtime = 0; + return ctx; } @@ -369,9 +355,3 @@ double expr_id_data__value(const struct expr_id_data *data) assert(data->kind == EXPR_ID_DATA__REF_VALUE); return data->ref.val; } - -struct expr_id *expr_id_data__parent(struct expr_id_data *data) -{ - assert(data->kind == EXPR_ID_DATA__PARENT); - return data->parent; -} diff --git a/tools/perf/util/expr.h b/tools/perf/util/expr.h index 124475a4f245..c6e534f633c3 100644 --- a/tools/perf/util/expr.h +++ b/tools/perf/util/expr.h @@ -13,14 +13,8 @@ struct metric_ref; -struct expr_id { - char *id; - struct expr_id *parent; -}; - struct expr_parse_ctx { struct hashmap *ids; - struct expr_id *parent; int runtime; }; @@ -32,7 +26,7 @@ struct expr_scanner_ctx { struct hashmap *ids__new(void); void ids__free(struct hashmap *ids); -int ids__insert(struct hashmap *ids, const char *id, struct expr_id *parent); +int ids__insert(struct hashmap *ids, const char *id); /* * Union two sets of ids (hashmaps) and construct a third, freeing ids1 and * ids2. @@ -59,6 +53,5 @@ int expr__find_ids(const char *expr, const char *one, struct expr_parse_ctx *ids); double expr_id_data__value(const struct expr_id_data *data); -struct expr_id *expr_id_data__parent(struct expr_id_data *data); #endif diff --git a/tools/perf/util/expr.y b/tools/perf/util/expr.y index ba7d3b667fcb..f969dfa525bd 100644 --- a/tools/perf/util/expr.y +++ b/tools/perf/util/expr.y @@ -190,7 +190,7 @@ expr: NUMBER */ $$.val = BOTTOM; $$.ids = ids__new(); - if (!$$.ids || ids__insert($$.ids, $1, ctx->parent)) + if (!$$.ids || ids__insert($$.ids, $1)) YYABORT; } } diff --git a/tools/perf/util/metricgroup.c b/tools/perf/util/metricgroup.c index 6c4c51e35aa7..c96f9fe163f9 100644 --- a/tools/perf/util/metricgroup.c +++ b/tools/perf/util/metricgroup.c @@ -18,6 +18,7 @@ #include "strlist.h" #include #include +#include #include #include #include @@ -199,28 +200,6 @@ static void metric__free(struct metric *m) free(m); } -#define RECURSION_ID_MAX 1000 - -struct expr_ids { - struct expr_id id[RECURSION_ID_MAX]; - int cnt; -}; - -static struct expr_id *expr_ids__alloc(struct expr_ids *ids) -{ - if (ids->cnt >= RECURSION_ID_MAX) - return NULL; - return &ids->id[ids->cnt++]; -} - -static void expr_ids__exit(struct expr_ids *ids) -{ - int i; - - for (i = 0; i < ids->cnt; i++) - free(ids->id[i].id); -} - static bool contains_event(struct evsel **metric_events, int num_events, const char *event_name) { @@ -813,15 +792,106 @@ int __weak arch_get_runtimeparam(const struct pmu_event *pe __maybe_unused) return 1; } +/* + * A singly linked list on the stack of the names of metrics being + * processed. Used to identify recursion. + */ +struct visited_metric { + const char *name; + const struct visited_metric *parent; +}; + struct metricgroup_add_iter_data { struct list_head *metric_list; const char *metric_name; - struct expr_ids *ids; int *ret; bool *has_match; bool metric_no_group; + struct metric *root_metric; + const struct visited_metric *visited; + const struct pmu_events_map *map; }; +static int add_metric(struct list_head *metric_list, + const struct pmu_event *pe, + bool metric_no_group, + struct metric *root_metric, + const struct visited_metric *visited, + const struct pmu_events_map *map); + +/** + * resolve_metric - Locate metrics within the root metric and recursively add + * references to them. + * @metric_list: The list the metric is added to. + * @metric_no_group: Should events written to events be grouped "{}" or + * global. Grouping is the default but due to multiplexing the + * user may override. + * @root_metric: Metrics may reference other metrics to form a tree. In this + * case the root_metric holds all the IDs and a list of referenced + * metrics. When adding a root this argument is NULL. + * @visited: A singly linked list of metric names being added that is used to + * detect recursion. + * @map: The map that is searched for metrics, most commonly the table for the + * architecture perf is running upon. + */ +static int resolve_metric(struct list_head *metric_list, + bool metric_no_group, + struct metric *root_metric, + const struct visited_metric *visited, + const struct pmu_events_map *map) +{ + struct hashmap_entry *cur; + size_t bkt; + struct to_resolve { + /* The metric to resolve. */ + const struct pmu_event *pe; + /* + * The key in the IDs map, this may differ from in case, + * etc. from pe->metric_name. + */ + const char *key; + } *pending = NULL; + int i, ret = 0, pending_cnt = 0; + + /* + * Iterate all the parsed IDs and if there's a matching metric and it to + * the pending array. + */ + hashmap__for_each_entry(root_metric->pctx->ids, cur, bkt) { + const struct pmu_event *pe; + + pe = metricgroup__find_metric(cur->key, map); + if (pe) { + pending = realloc(pending, + (pending_cnt + 1) * sizeof(struct to_resolve)); + if (!pending) + return -ENOMEM; + + pending[pending_cnt].pe = pe; + pending[pending_cnt].key = cur->key; + pending_cnt++; + } + } + + /* Remove the metric IDs from the context. */ + for (i = 0; i < pending_cnt; i++) + expr__del_id(root_metric->pctx, pending[i].key); + + /* + * Recursively add all the metrics, IDs are added to the root metric's + * context. + */ + for (i = 0; i < pending_cnt; i++) { + ret = add_metric(metric_list, pending[i].pe, metric_no_group, + root_metric, visited, map); + if (ret) + break; + } + + free(pending); + return ret; +} + /** * __add_metric - Add a metric to metric_list. * @metric_list: The list the metric is added to. @@ -830,58 +900,59 @@ struct metricgroup_add_iter_data { * global. Grouping is the default but due to multiplexing the * user may override. * @runtime: A special argument for the parser only known at runtime. - * @mp: The pointer to a location holding the first metric added to metric - * list. It is initialized here if this is the first metric. - * @parent: The last entry in a linked list of metrics being - * added/resolved. This is maintained to detect recursion. - * @ids: Storage for parent list. + * @root_metric: Metrics may reference other metrics to form a tree. In this + * case the root_metric holds all the IDs and a list of referenced + * metrics. When adding a root this argument is NULL. + * @visited: A singly linked list of metric names being added that is used to + * detect recursion. + * @map: The map that is searched for metrics, most commonly the table for the + * architecture perf is running upon. */ static int __add_metric(struct list_head *metric_list, const struct pmu_event *pe, bool metric_no_group, int runtime, - struct metric **mp, - struct expr_id *parent, - struct expr_ids *ids) + struct metric *root_metric, + const struct visited_metric *visited, + const struct pmu_events_map *map) { struct metric_ref_node *ref; - struct metric *m; + const struct visited_metric *vm; + int ret; + bool is_root = !root_metric; + struct visited_metric visited_node = { + .name = pe->metric_name, + .parent = visited, + }; - if (*mp == NULL) { + for (vm = visited; vm; vm = vm->parent) { + if (!strcmp(pe->metric_name, vm->name)) { + pr_err("failed: recursion detected for %s\n", pe->metric_name); + return -1; + } + } + + if (is_root) { /* - * We got in here for the parent group, - * allocate it and put it on the list. + * This metric is the root of a tree and may reference other + * metrics that are added recursively. */ - m = metric__new(pe, metric_no_group, runtime); - if (!m) + root_metric = metric__new(pe, metric_no_group, runtime); + if (!root_metric) return -ENOMEM; - parent = expr_ids__alloc(ids); - if (!parent) { - free(m); - return -EINVAL; - } - - parent->id = strdup(pe->metric_name); - if (!parent->id) { - free(m); - return -ENOMEM; - } - *mp = m; } else { /* * This metric was referenced in a metric higher in the * tree. Check if the same metric is already resolved in the * metric_refs list. */ - m = *mp; - - list_for_each_entry(ref, &m->metric_refs, list) { + list_for_each_entry(ref, &root_metric->metric_refs, list) { if (!strcmp(pe->metric_name, ref->metric_name)) return 0; } - /*Add the new referenced metric to the pare the parent group. */ + /* Create reference */ ref = malloc(sizeof(*ref)); if (!ref) return -ENOMEM; @@ -895,50 +966,31 @@ static int __add_metric(struct list_head *metric_list, ref->metric_name = pe->metric_name; ref->metric_expr = pe->metric_expr; - list_add(&ref->list, &m->metric_refs); - m->metric_refs_cnt++; + list_add(&ref->list, &root_metric->metric_refs); + root_metric->metric_refs_cnt++; } - /* Force all found IDs in metric to have us as parent ID. */ - WARN_ON_ONCE(!parent); - m->pctx->parent = parent; - /* * For both the parent and referenced metrics, we parse - * all the metric's IDs and add it to the parent context. + * all the metric's IDs and add it to the root context. */ - if (expr__find_ids(pe->metric_expr, NULL, m->pctx) < 0) { - if (m->metric_refs_cnt == 0) { - metric__free(m); - *mp = NULL; - } - return -EINVAL; + if (expr__find_ids(pe->metric_expr, NULL, root_metric->pctx) < 0) { + /* Broken metric. */ + ret = -EINVAL; + } else { + /* Resolve referenced metrics. */ + ret = resolve_metric(metric_list, metric_no_group, root_metric, + &visited_node, map); } - /* - * We add new group only in the 'parent' call, - * so bail out for referenced metric case. - */ - if (m->metric_refs_cnt) - return 0; - - if (list_empty(metric_list)) - list_add(&m->nd, metric_list); - else { - struct list_head *pos; - - /* Place the largest groups at the front. */ - list_for_each_prev(pos, metric_list) { - struct metric *old = list_entry(pos, struct metric, nd); + if (ret) { + if (is_root) + metric__free(root_metric); - if (hashmap__size(m->pctx->ids) <= - hashmap__size(old->pctx->ids)) - break; - } - list_add(&m->nd, pos); - } + } else if (is_root) + list_add(&root_metric->nd, metric_list); - return 0; + return ret; } #define map_for_each_event(__pe, __idx, __map) \ @@ -967,136 +1019,20 @@ const struct pmu_event *metricgroup__find_metric(const char *metric, return NULL; } -static int recursion_check(struct metric *m, const char *id, struct expr_id **parent, - struct expr_ids *ids) -{ - struct expr_id_data *data; - struct expr_id *p; - int ret; - - /* - * We get the parent referenced by 'id' argument and - * traverse through all the parent object IDs to check - * if we already processed 'id', if we did, it's recursion - * and we fail. - */ - ret = expr__get_id(m->pctx, id, &data); - if (ret) - return ret; - - p = expr_id_data__parent(data); - - while (p->parent) { - if (!strcmp(p->id, id)) { - pr_err("failed: recursion detected for %s\n", id); - return -1; - } - p = p->parent; - } - - /* - * If we are over the limit of static entris, the metric - * is too difficult/nested to process, fail as well. - */ - p = expr_ids__alloc(ids); - if (!p) { - pr_err("failed: too many nested metrics\n"); - return -EINVAL; - } - - p->id = strdup(id); - p->parent = expr_id_data__parent(data); - *parent = p; - - return p->id ? 0 : -ENOMEM; -} - static int add_metric(struct list_head *metric_list, const struct pmu_event *pe, bool metric_no_group, - struct metric **mp, - struct expr_id *parent, - struct expr_ids *ids); - -static int __resolve_metric(struct metric *m, - bool metric_no_group, - struct list_head *metric_list, - const struct pmu_events_map *map, - struct expr_ids *ids) + struct metric *root_metric, + const struct visited_metric *visited, + const struct pmu_events_map *map) { - struct hashmap_entry *cur; - size_t bkt; - bool all; - int ret; - - /* - * Iterate all the parsed IDs and if there's metric, - * add it to the context. - */ - do { - all = true; - hashmap__for_each_entry(m->pctx->ids, cur, bkt) { - struct expr_id *parent; - const struct pmu_event *pe; - - pe = metricgroup__find_metric(cur->key, map); - if (!pe) - continue; - - ret = recursion_check(m, cur->key, &parent, ids); - if (ret) - return ret; - - all = false; - /* The metric key itself needs to go out.. */ - expr__del_id(m->pctx, cur->key); - - /* ... and it gets resolved to the parent context. */ - ret = add_metric(metric_list, pe, metric_no_group, &m, parent, ids); - if (ret) - return ret; - - /* - * We added new metric to hashmap, so we need - * to break the iteration and start over. - */ - break; - } - } while (!all); - - return 0; -} - -static int resolve_metric(bool metric_no_group, - struct list_head *metric_list, - const struct pmu_events_map *map, - struct expr_ids *ids) -{ - struct metric *m; - int err; - - list_for_each_entry(m, metric_list, nd) { - err = __resolve_metric(m, metric_no_group, metric_list, map, ids); - if (err) - return err; - } - return 0; -} - -static int add_metric(struct list_head *metric_list, - const struct pmu_event *pe, - bool metric_no_group, - struct metric **m, - struct expr_id *parent, - struct expr_ids *ids) -{ - struct metric *orig = *m; int ret = 0; pr_debug("metric expr %s for %s\n", pe->metric_expr, pe->metric_name); if (!strstr(pe->metric_expr, "?")) { - ret = __add_metric(metric_list, pe, metric_no_group, 1, m, parent, ids); + ret = __add_metric(metric_list, pe, metric_no_group, 0, + root_metric, visited, map); } else { int j, count; @@ -1107,8 +1043,9 @@ static int add_metric(struct list_head *metric_list, * those events to metric_list. */ - for (j = 0; j < count && !ret; j++, *m = orig) - ret = __add_metric(metric_list, pe, metric_no_group, j, m, parent, ids); + for (j = 0; j < count && !ret; j++) + ret = __add_metric(metric_list, pe, metric_no_group, j, + root_metric, visited, map); } return ret; @@ -1118,18 +1055,13 @@ static int metricgroup__add_metric_sys_event_iter(const struct pmu_event *pe, void *data) { struct metricgroup_add_iter_data *d = data; - struct metric *m = NULL; int ret; if (!match_pe_metric(pe, d->metric_name)) return 0; - ret = add_metric(d->metric_list, pe, d->metric_no_group, &m, NULL, d->ids); - if (ret) - goto out; - - ret = resolve_metric(d->metric_no_group, - d->metric_list, NULL, d->ids); + ret = add_metric(d->metric_list, pe, d->metric_no_group, + d->root_metric, d->visited, d->map); if (ret) goto out; @@ -1140,6 +1072,15 @@ out: return ret; } +static int metric_list_cmp(void *priv __maybe_unused, const struct list_head *l, + const struct list_head *r) +{ + const struct metric *left = container_of(l, struct metric, nd); + const struct metric *right = container_of(r, struct metric, nd); + + return hashmap__size(right->pctx->ids) - hashmap__size(left->pctx->ids); +} + /** * metricgroup__add_metric - Find and add a metric, or a metric group. * @metric_name: The name of the metric or metric group. For example, "IPC" @@ -1160,7 +1101,6 @@ static int metricgroup__add_metric(const char *metric_name, bool metric_no_group struct list_head *metric_list, const struct pmu_events_map *map) { - struct expr_ids ids = { .cnt = 0, }; const struct pmu_event *pe; struct metric *m; LIST_HEAD(list); @@ -1173,18 +1113,9 @@ static int metricgroup__add_metric(const char *metric_name, bool metric_no_group */ map_for_each_metric(pe, i, map, metric_name) { has_match = true; - m = NULL; - - ret = add_metric(&list, pe, metric_no_group, &m, NULL, &ids); - if (ret) - goto out; - - /* - * Process any possible referenced metrics - * included in the expression. - */ - ret = resolve_metric(metric_no_group, - &list, map, &ids); + ret = add_metric(&list, pe, metric_no_group, + /*root_metric=*/NULL, + /*visited_metrics=*/NULL, map); if (ret) goto out; } @@ -1196,9 +1127,9 @@ static int metricgroup__add_metric(const char *metric_name, bool metric_no_group .metric_list = &list, .metric_name = metric_name, .metric_no_group = metric_no_group, - .ids = &ids, .has_match = &has_match, .ret = &ret, + .map = map, }, }; @@ -1210,6 +1141,9 @@ static int metricgroup__add_metric(const char *metric_name, bool metric_no_group goto out; } + /* Sort metrics from largest to smallest. */ + list_sort(NULL, &list, metric_list_cmp); + list_for_each_entry(m, &list, nd) { if (events->len > 0) strbuf_addf(events, ","); @@ -1229,7 +1163,9 @@ out: * even if it's failed */ list_splice(&list, metric_list); - expr_ids__exit(&ids); + + /* Sort metrics from largest to smallest. */ + list_sort(NULL, metric_list, metric_list_cmp); return ret; } -- cgit v1.2.3 From 4d61aef93d96353c00a3cee715dcd4ccdbdd80c4 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Fri, 15 Oct 2021 10:21:21 -0700 Subject: perf metric: Comment data structures Document the data structures maintained by metricgroup.c and used by stat-shadow.c for metric output. Signed-off-by: Ian Rogers Acked-by: Andi Kleen Cc: Adrian Hunter Cc: Alexander Antonov Cc: Alexander Shishkin Cc: Andrew Kilroy Cc: Andrew Morton Cc: Changbin Du Cc: Denys Zagorui Cc: Fabian Hemmer Cc: Felix Fietkau Cc: Heiko Carstens Cc: Ingo Molnar Cc: Jacob Keller Cc: Jiapeng Chong Cc: Jin Yao Cc: Jiri Olsa Cc: Joakim Zhang Cc: John Garry Cc: Kajol Jain Cc: Kan Liang Cc: Kees Kook Cc: Mark Rutland Cc: Namhyung Kim Cc: Nicholas Fraser Cc: Nick Desaulniers Cc: Paul Clarke Cc: Peter Zijlstra Cc: Riccardo Mancini Cc: Sami Tolvanen Cc: ShihCheng Tu Cc: Song Liu Cc: Stephane Eranian Cc: Sumanth Korikkar Cc: Thomas Richter Cc: Wan Jiabing Cc: Zhen Lei Link: https://lore.kernel.org/r/20211015172132.1162559-11-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/metricgroup.h | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) (limited to 'tools') diff --git a/tools/perf/util/metricgroup.h b/tools/perf/util/metricgroup.h index 88ba939a3082..3a51a84f440b 100644 --- a/tools/perf/util/metricgroup.h +++ b/tools/perf/util/metricgroup.h @@ -14,24 +14,51 @@ struct rblist; struct pmu_events_map; struct cgroup; +/** + * A node in a rblist keyed by the evsel. The global rblist of metric events + * generally exists in perf_stat_config. The evsel is looked up in the rblist + * yielding a list of metric_expr. + */ struct metric_event { struct rb_node nd; struct evsel *evsel; struct list_head head; /* list of metric_expr */ }; +/** + * A metric referenced by a metric_expr. When parsing a metric expression IDs + * will be looked up, matching either a value (from metric_events) or a + * metric_ref. A metric_ref will then be parsed recursively. The metric_refs and + * metric_events need to be known before parsing so that their values may be + * placed in the parse context for lookup. + */ struct metric_ref { const char *metric_name; const char *metric_expr; }; +/** + * One in a list of metric_expr associated with an evsel. The data is used to + * generate a metric value during stat output. + */ struct metric_expr { struct list_head nd; + /** The expression to parse, for example, "instructions/cycles". */ const char *metric_expr; + /** The name of the meric such as "IPC". */ const char *metric_name; + /** + * The "ScaleUnit" that scales and adds a unit to the metric during + * output. For example, "6.4e-05MiB" means to scale the resulting metric + * by 6.4e-05 (typically converting a unit like cache lines to something + * more human intelligible) and then add "MiB" afterward when displayed. + */ const char *metric_unit; + /** Null terminated array of events used by the metric. */ struct evsel **metric_events; + /** Null terminated array of referenced metrics. */ struct metric_ref *metric_refs; + /** A value substituted for '?' during parsing. */ int runtime; }; -- cgit v1.2.3 From 485fcaed98ef1601fbab1cbec6dbbe4a4349d188 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Fri, 15 Oct 2021 10:21:22 -0700 Subject: perf metric: Document the internal 'struct metric' Add documentation as part of code tidying. Signed-off-by: Ian Rogers Acked-by: Andi Kleen Cc: Adrian Hunter Cc: Alexander Antonov Cc: Alexander Shishkin Cc: Andrew Kilroy Cc: Andrew Morton Cc: Changbin Du Cc: Denys Zagorui Cc: Fabian Hemmer Cc: Felix Fietkau Cc: Heiko Carstens Cc: Ingo Molnar Cc: Jacob Keller Cc: Jiapeng Chong Cc: Jin Yao Cc: Jiri Olsa Cc: Joakim Zhang Cc: John Garry Cc: Kajol Jain Cc: Kan Liang Cc: Kees Kook Cc: Mark Rutland Cc: Namhyung Kim Cc: Nicholas Fraser Cc: Nick Desaulniers Cc: Paul Clarke Cc: Peter Zijlstra Cc: Riccardo Mancini Cc: Sami Tolvanen Cc: ShihCheng Tu Cc: Song Liu Cc: Stephane Eranian Cc: Sumanth Korikkar Cc: Thomas Richter Cc: Wan Jiabing Cc: Zhen Lei Link: https://lore.kernel.org/r/20211015172132.1162559-12-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/metricgroup.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) (limited to 'tools') diff --git a/tools/perf/util/metricgroup.c b/tools/perf/util/metricgroup.c index c96f9fe163f9..632867cedbae 100644 --- a/tools/perf/util/metricgroup.c +++ b/tools/perf/util/metricgroup.c @@ -117,14 +117,34 @@ struct metric_ref_node { struct list_head list; }; +/** + * The metric under construction. The data held here will be placed in a + * metric_expr. + */ struct metric { struct list_head nd; + /** + * The expression parse context importantly holding the IDs contained + * within the expression. + */ struct expr_parse_ctx *pctx; + /** The name of the metric such as "IPC". */ const char *metric_name; + /** The expression to parse, for example, "instructions/cycles". */ const char *metric_expr; + /** + * The "ScaleUnit" that scales and adds a unit to the metric during + * output. + */ const char *metric_unit; + /** The list of metrics referenced by this one. */ struct list_head metric_refs; + /** The size of the metric_refs list. */ int metric_refs_cnt; + /** + * Is there a constraint on the group of events? In which case the + * events won't be grouped. + */ bool has_constraint; }; -- cgit v1.2.3 From 46bdc0bf8d21940f950749fc3017c744f30fd55f Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Fri, 15 Oct 2021 10:21:23 -0700 Subject: perf metric: Simplify metric_refs calculation Don't build a list and then turn to an array, just directly build the array. The size of the array is known due to the search for a duplicate. Signed-off-by: Ian Rogers Acked-by: Andi Kleen Cc: Adrian Hunter Cc: Alexander Antonov Cc: Alexander Shishkin Cc: Andrew Kilroy Cc: Andrew Morton Cc: Changbin Du Cc: Denys Zagorui Cc: Fabian Hemmer Cc: Felix Fietkau Cc: Heiko Carstens Cc: Ingo Molnar Cc: Jacob Keller Cc: Jiapeng Chong Cc: Jin Yao Cc: Jiri Olsa Cc: Joakim Zhang Cc: John Garry Cc: Kajol Jain Cc: Kan Liang Cc: Kees Kook Cc: Mark Rutland Cc: Namhyung Kim Cc: Nicholas Fraser Cc: Nick Desaulniers Cc: Paul Clarke Cc: Peter Zijlstra Cc: Riccardo Mancini Cc: Sami Tolvanen Cc: ShihCheng Tu Cc: Song Liu Cc: Stephane Eranian Cc: Sumanth Korikkar Cc: Thomas Richter Cc: Wan Jiabing Cc: Zhen Lei Link: https://lore.kernel.org/r/20211015172132.1162559-13-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/metricgroup.c | 77 +++++++++++++------------------------------ 1 file changed, 23 insertions(+), 54 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/metricgroup.c b/tools/perf/util/metricgroup.c index 632867cedbae..b48836d7c080 100644 --- a/tools/perf/util/metricgroup.c +++ b/tools/perf/util/metricgroup.c @@ -137,10 +137,8 @@ struct metric { * output. */ const char *metric_unit; - /** The list of metrics referenced by this one. */ - struct list_head metric_refs; - /** The size of the metric_refs list. */ - int metric_refs_cnt; + /** Optional null terminated array of referenced metrics. */ + struct metric_ref *metric_refs; /** * Is there a constraint on the group of events? In which case the * events won't be grouped. @@ -202,20 +200,14 @@ static struct metric *metric__new(const struct pmu_event *pe, m->metric_unit = pe->unit; m->pctx->runtime = runtime; m->has_constraint = metric_no_group || metricgroup__has_constraint(pe); - INIT_LIST_HEAD(&m->metric_refs); - m->metric_refs_cnt = 0; + m->metric_refs = NULL; return m; } static void metric__free(struct metric *m) { - struct metric_ref_node *ref, *tmp; - - list_for_each_entry_safe(ref, tmp, &m->metric_refs, list) { - list_del(&ref->list); - free(ref); - } + free(m->metric_refs); expr__ctx_free(m->pctx); free(m); } @@ -393,7 +385,6 @@ static int metricgroup__setup_events(struct list_head *groups, list_for_each_entry (m, groups, nd) { struct evsel **metric_events; - struct metric_ref *metric_refs = NULL; const size_t ids_size = hashmap__size(m->pctx->ids); metric_events = calloc(sizeof(void *), @@ -427,36 +418,8 @@ static int metricgroup__setup_events(struct list_head *groups, break; } - /* - * Collect and store collected nested expressions - * for metric processing. - */ - if (m->metric_refs_cnt) { - struct metric_ref_node *ref; - - metric_refs = zalloc(sizeof(struct metric_ref) * (m->metric_refs_cnt + 1)); - if (!metric_refs) { - ret = -ENOMEM; - free(metric_events); - free(expr); - break; - } - - i = 0; - list_for_each_entry(ref, &m->metric_refs, list) { - /* - * Intentionally passing just const char pointers, - * originally from 'struct pmu_event' object. - * We don't need to change them, so there's no - * need to create our own copy. - */ - metric_refs[i].metric_name = ref->metric_name; - metric_refs[i].metric_expr = ref->metric_expr; - i++; - } - } - - expr->metric_refs = metric_refs; + expr->metric_refs = m->metric_refs; + m->metric_refs = NULL; expr->metric_expr = m->metric_expr; expr->metric_name = m->metric_name; expr->metric_unit = m->metric_unit; @@ -936,7 +899,6 @@ static int __add_metric(struct list_head *metric_list, const struct visited_metric *visited, const struct pmu_events_map *map) { - struct metric_ref_node *ref; const struct visited_metric *vm; int ret; bool is_root = !root_metric; @@ -962,19 +924,25 @@ static int __add_metric(struct list_head *metric_list, return -ENOMEM; } else { + int cnt = 0; + /* * This metric was referenced in a metric higher in the * tree. Check if the same metric is already resolved in the * metric_refs list. */ - list_for_each_entry(ref, &root_metric->metric_refs, list) { - if (!strcmp(pe->metric_name, ref->metric_name)) - return 0; + if (root_metric->metric_refs) { + for (; root_metric->metric_refs[cnt].metric_name; cnt++) { + if (!strcmp(pe->metric_name, + root_metric->metric_refs[cnt].metric_name)) + return 0; + } } - /* Create reference */ - ref = malloc(sizeof(*ref)); - if (!ref) + /* Create reference. Need space for the entry and the terminator. */ + root_metric->metric_refs = realloc(root_metric->metric_refs, + (cnt + 2) * sizeof(struct metric_ref)); + if (!root_metric->metric_refs) return -ENOMEM; /* @@ -983,11 +951,12 @@ static int __add_metric(struct list_head *metric_list, * need to change them, so there's no need to create * our own copy. */ - ref->metric_name = pe->metric_name; - ref->metric_expr = pe->metric_expr; + root_metric->metric_refs[cnt].metric_name = pe->metric_name; + root_metric->metric_refs[cnt].metric_expr = pe->metric_expr; - list_add(&ref->list, &root_metric->metric_refs); - root_metric->metric_refs_cnt++; + /* Null terminate array. */ + root_metric->metric_refs[cnt+1].metric_name = NULL; + root_metric->metric_refs[cnt+1].metric_expr = NULL; } /* -- cgit v1.2.3 From 8e8bbfb311a26a17834f1839e15e2c29ea5e58c6 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Fri, 15 Oct 2021 10:21:24 -0700 Subject: perf parse-events: Add const to evsel name The evsel name is strdup-ed before assignment and so can be const. A later change will add another similar string. Using const makes it clearer that these are not out arguments. Signed-off-by: Ian Rogers Acked-by: Andi Kleen Cc: Adrian Hunter Cc: Alexander Antonov Cc: Alexander Shishkin Cc: Andrew Kilroy Cc: Andrew Morton Cc: Changbin Du Cc: Denys Zagorui Cc: Fabian Hemmer Cc: Felix Fietkau Cc: Heiko Carstens Cc: Ingo Molnar Cc: Jacob Keller Cc: Jiapeng Chong Cc: Jin Yao Cc: Jiri Olsa Cc: Joakim Zhang Cc: John Garry Cc: Kajol Jain Cc: Kan Liang Cc: Kees Kook Cc: Mark Rutland Cc: Namhyung Kim Cc: Nicholas Fraser Cc: Nick Desaulniers Cc: Paul Clarke Cc: Peter Zijlstra Cc: Riccardo Mancini Cc: Sami Tolvanen Cc: ShihCheng Tu Cc: Song Liu Cc: Stephane Eranian Cc: Sumanth Korikkar Cc: Thomas Richter Cc: Wan Jiabing Cc: Zhen Lei Link: https://lore.kernel.org/r/20211015172132.1162559-14-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/parse-events-hybrid.c | 15 +++++++++------ tools/perf/util/parse-events-hybrid.h | 6 ++++-- tools/perf/util/parse-events.c | 15 ++++++++------- tools/perf/util/parse-events.h | 7 ++++--- tools/perf/util/pmu.c | 2 +- tools/perf/util/pmu.h | 2 +- 6 files changed, 27 insertions(+), 20 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/parse-events-hybrid.c b/tools/perf/util/parse-events-hybrid.c index b234d95fb10a..7e44deee1343 100644 --- a/tools/perf/util/parse-events-hybrid.c +++ b/tools/perf/util/parse-events-hybrid.c @@ -38,7 +38,7 @@ static void config_hybrid_attr(struct perf_event_attr *attr, static int create_event_hybrid(__u32 config_type, int *idx, struct list_head *list, - struct perf_event_attr *attr, char *name, + struct perf_event_attr *attr, const char *name, struct list_head *config_terms, struct perf_pmu *pmu) { @@ -70,7 +70,7 @@ static int pmu_cmp(struct parse_events_state *parse_state, static int add_hw_hybrid(struct parse_events_state *parse_state, struct list_head *list, struct perf_event_attr *attr, - char *name, struct list_head *config_terms) + const char *name, struct list_head *config_terms) { struct perf_pmu *pmu; int ret; @@ -94,7 +94,8 @@ static int add_hw_hybrid(struct parse_events_state *parse_state, } static int create_raw_event_hybrid(int *idx, struct list_head *list, - struct perf_event_attr *attr, char *name, + struct perf_event_attr *attr, + const char *name, struct list_head *config_terms, struct perf_pmu *pmu) { @@ -113,7 +114,7 @@ static int create_raw_event_hybrid(int *idx, struct list_head *list, static int add_raw_hybrid(struct parse_events_state *parse_state, struct list_head *list, struct perf_event_attr *attr, - char *name, struct list_head *config_terms) + const char *name, struct list_head *config_terms) { struct perf_pmu *pmu; int ret; @@ -138,7 +139,8 @@ static int add_raw_hybrid(struct parse_events_state *parse_state, int parse_events__add_numeric_hybrid(struct parse_events_state *parse_state, struct list_head *list, struct perf_event_attr *attr, - char *name, struct list_head *config_terms, + const char *name, + struct list_head *config_terms, bool *hybrid) { *hybrid = false; @@ -159,7 +161,8 @@ int parse_events__add_numeric_hybrid(struct parse_events_state *parse_state, } int parse_events__add_cache_hybrid(struct list_head *list, int *idx, - struct perf_event_attr *attr, char *name, + struct perf_event_attr *attr, + const char *name, struct list_head *config_terms, bool *hybrid, struct parse_events_state *parse_state) diff --git a/tools/perf/util/parse-events-hybrid.h b/tools/perf/util/parse-events-hybrid.h index f33bd67aa851..25a4a4f73f3a 100644 --- a/tools/perf/util/parse-events-hybrid.h +++ b/tools/perf/util/parse-events-hybrid.h @@ -11,11 +11,13 @@ int parse_events__add_numeric_hybrid(struct parse_events_state *parse_state, struct list_head *list, struct perf_event_attr *attr, - char *name, struct list_head *config_terms, + const char *name, + struct list_head *config_terms, bool *hybrid); int parse_events__add_cache_hybrid(struct list_head *list, int *idx, - struct perf_event_attr *attr, char *name, + struct perf_event_attr *attr, + const char *name, struct list_head *config_terms, bool *hybrid, struct parse_events_state *parse_state); diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 1acac3e13b32..88f181a985b7 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -337,7 +337,7 @@ static int parse_events__is_name_term(struct parse_events_term *term) return term->type_term == PARSE_EVENTS__TERM_TYPE_NAME; } -static char *get_config_name(struct list_head *head_terms) +static const char *get_config_name(struct list_head *head_terms) { struct parse_events_term *term; @@ -355,7 +355,7 @@ static struct evsel * __add_event(struct list_head *list, int *idx, struct perf_event_attr *attr, bool init_attr, - char *name, struct perf_pmu *pmu, + const char *name, struct perf_pmu *pmu, struct list_head *config_terms, bool auto_merge_stats, const char *cpu_list) { @@ -394,14 +394,14 @@ __add_event(struct list_head *list, int *idx, } struct evsel *parse_events__add_event(int idx, struct perf_event_attr *attr, - char *name, struct perf_pmu *pmu) + const char *name, struct perf_pmu *pmu) { return __add_event(NULL, &idx, attr, false, name, pmu, NULL, false, NULL); } static int add_event(struct list_head *list, int *idx, - struct perf_event_attr *attr, char *name, + struct perf_event_attr *attr, const char *name, struct list_head *config_terms) { return __add_event(list, idx, attr, true, name, NULL, config_terms, @@ -464,7 +464,8 @@ int parse_events_add_cache(struct list_head *list, int *idx, { struct perf_event_attr attr; LIST_HEAD(config_terms); - char name[MAX_NAME_LEN], *config_name; + char name[MAX_NAME_LEN]; + const char *config_name; int cache_type = -1, cache_op = -1, cache_result = -1; char *op_result[2] = { op_result1, op_result2 }; int i, n, ret; @@ -2027,7 +2028,7 @@ int parse_events__modifier_event(struct list_head *list, char *str, bool add) return 0; } -int parse_events_name(struct list_head *list, char *name) +int parse_events_name(struct list_head *list, const char *name) { struct evsel *evsel; @@ -3344,7 +3345,7 @@ fail: struct evsel *parse_events__add_event_hybrid(struct list_head *list, int *idx, struct perf_event_attr *attr, - char *name, struct perf_pmu *pmu, + const char *name, struct perf_pmu *pmu, struct list_head *config_terms) { return __add_event(list, idx, attr, true, name, pmu, diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h index b32ed3064c49..54d24c24d074 100644 --- a/tools/perf/util/parse-events.h +++ b/tools/perf/util/parse-events.h @@ -162,7 +162,7 @@ void parse_events_terms__purge(struct list_head *terms); void parse_events__clear_array(struct parse_events_array *a); int parse_events__modifier_event(struct list_head *list, char *str, bool add); int parse_events__modifier_group(struct list_head *list, char *event_mod); -int parse_events_name(struct list_head *list, char *name); +int parse_events_name(struct list_head *list, const char *name); int parse_events_add_tracepoint(struct list_head *list, int *idx, const char *sys, const char *event, struct parse_events_error *error, @@ -199,7 +199,7 @@ int parse_events_add_pmu(struct parse_events_state *parse_state, bool use_alias); struct evsel *parse_events__add_event(int idx, struct perf_event_attr *attr, - char *name, struct perf_pmu *pmu); + const char *name, struct perf_pmu *pmu); int parse_events_multi_pmu_add(struct parse_events_state *parse_state, char *str, @@ -266,7 +266,8 @@ int perf_pmu__test_parse_init(void); struct evsel *parse_events__add_event_hybrid(struct list_head *list, int *idx, struct perf_event_attr *attr, - char *name, struct perf_pmu *pmu, + const char *name, + struct perf_pmu *pmu, struct list_head *config_terms); #endif /* __PERF_PARSE_EVENTS_H */ diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index cdd6c3f6caf1..9b5039bf909a 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -1906,7 +1906,7 @@ int perf_pmu__caps_parse(struct perf_pmu *pmu) } void perf_pmu__warn_invalid_config(struct perf_pmu *pmu, __u64 config, - char *name) + const char *name) { struct perf_pmu_format *format; __u64 masks = 0, bits; diff --git a/tools/perf/util/pmu.h b/tools/perf/util/pmu.h index cc9f9e001347..f9743eab07b6 100644 --- a/tools/perf/util/pmu.h +++ b/tools/perf/util/pmu.h @@ -134,7 +134,7 @@ int perf_pmu__convert_scale(const char *scale, char **end, double *sval); int perf_pmu__caps_parse(struct perf_pmu *pmu); void perf_pmu__warn_invalid_config(struct perf_pmu *pmu, __u64 config, - char *name); + const char *name); bool perf_pmu__has_hybrid(void); int perf_pmu__match(char *pattern, char *name, char *tok); -- cgit v1.2.3 From 2b62b3a611715d3ca612e3225cf436277ed9648b Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Fri, 15 Oct 2021 10:21:25 -0700 Subject: perf parse-events: Add new "metric-id" term Add a new "metric-id" term to events so that metric parsing can set an ID that can be reliably looked up. Metric parsing currently will turn a metric like "instructions/cycles" into a parse events string of "{instructions,cycles}:W". However, parse-events may change "instructions" into "instructions:u" if perf_event_paranoid=2. When this happens expr__resolve_id currently fails as stat-shadow adds the ID "instructions:u" to match with the counter value and the metric tries to look up the ID just "instructions". A later patch will use the new term. An example of the current problem: $ echo -1 > /proc/sys/kernel/perf_event_paranoid $ perf stat -M IPC /bin/true Performance counter stats for '/bin/true': 1,217,161 inst_retired.any # 0.97 IPC 1,250,389 cpu_clk_unhalted.thread 0.002064773 seconds time elapsed 0.002378000 seconds user 0.000000000 seconds sys $ echo 2 > /proc/sys/kernel/perf_event_paranoid $ perf stat -M IPC /bin/true Performance counter stats for '/bin/true': 150,298 inst_retired.any:u # nan IPC 187,095 cpu_clk_unhalted.thread:u 0.002042731 seconds time elapsed 0.000000000 seconds user 0.002377000 seconds sys Note: nan IPC is printed as an effect of "perf metric: Use NAN for missing event IDs." but earlier versions of perf just fail with a parse error and display no value. Signed-off-by: Ian Rogers Acked-by: Andi Kleen Cc: Adrian Hunter Cc: Alexander Antonov Cc: Alexander Shishkin Cc: Andrew Kilroy Cc: Andrew Morton Cc: Changbin Du Cc: Denys Zagorui Cc: Fabian Hemmer Cc: Felix Fietkau Cc: Heiko Carstens Cc: Ingo Molnar Cc: Jacob Keller Cc: Jiapeng Chong Cc: Jin Yao Cc: Jiri Olsa Cc: Joakim Zhang Cc: John Garry Cc: Kajol Jain Cc: Kan Liang Cc: Kees Kook Cc: Mark Rutland Cc: Namhyung Kim Cc: Nicholas Fraser Cc: Nick Desaulniers Cc: Paul Clarke Cc: Peter Zijlstra Cc: Riccardo Mancini Cc: Sami Tolvanen Cc: ShihCheng Tu Cc: Song Liu Cc: Stephane Eranian Cc: Sumanth Korikkar Cc: Thomas Richter Cc: Wan Jiabing Cc: Zhen Lei Link: https://lore.kernel.org/r/20211015172132.1162559-15-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/evsel.c | 17 +++++++ tools/perf/util/evsel.h | 2 + tools/perf/util/parse-events-hybrid.c | 25 +++++---- tools/perf/util/parse-events-hybrid.h | 4 +- tools/perf/util/parse-events.c | 95 +++++++++++++++++++++++------------ tools/perf/util/parse-events.h | 5 +- tools/perf/util/parse-events.l | 1 + tools/perf/util/pfm.c | 3 +- 8 files changed, 107 insertions(+), 45 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index dbfeceb2546c..96ef6a4a7c14 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -410,6 +410,11 @@ struct evsel *evsel__clone(struct evsel *orig) if (evsel->filter == NULL) goto out_err; } + if (orig->metric_id) { + evsel->metric_id = strdup(orig->metric_id); + if (evsel->metric_id == NULL) + goto out_err; + } evsel->cgrp = cgroup__get(orig->cgrp); evsel->tp_format = orig->tp_format; evsel->handler = orig->handler; @@ -779,6 +784,17 @@ out_unknown: return "unknown"; } +const char *evsel__metric_id(const struct evsel *evsel) +{ + if (evsel->metric_id) + return evsel->metric_id; + + if (evsel->core.attr.type == PERF_TYPE_SOFTWARE && evsel->tool_event) + return "duration_time"; + + return "unknown"; +} + const char *evsel__group_name(struct evsel *evsel) { return evsel->group_name ?: "anon group"; @@ -1423,6 +1439,7 @@ void evsel__exit(struct evsel *evsel) zfree(&evsel->group_name); zfree(&evsel->name); zfree(&evsel->pmu_name); + zfree(&evsel->metric_id); evsel__zero_per_pkg(evsel); hashmap__free(evsel->per_pkg_mask); evsel->per_pkg_mask = NULL; diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index 1f7edfa8568a..45476a888942 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -68,6 +68,7 @@ struct evsel { double scale; const char *unit; struct cgroup *cgrp; + const char *metric_id; enum perf_tool_event tool_event; /* parse modifier helper */ int exclude_GH; @@ -261,6 +262,7 @@ bool evsel__match_bpf_counter_events(const char *name); int __evsel__hw_cache_type_op_res_name(u8 type, u8 op, u8 result, char *bf, size_t size); const char *evsel__name(struct evsel *evsel); +const char *evsel__metric_id(const struct evsel *evsel); const char *evsel__group_name(struct evsel *evsel); int evsel__group_desc(struct evsel *evsel, char *buf, size_t size); diff --git a/tools/perf/util/parse-events-hybrid.c b/tools/perf/util/parse-events-hybrid.c index 7e44deee1343..9fc86971027b 100644 --- a/tools/perf/util/parse-events-hybrid.c +++ b/tools/perf/util/parse-events-hybrid.c @@ -39,6 +39,7 @@ static void config_hybrid_attr(struct perf_event_attr *attr, static int create_event_hybrid(__u32 config_type, int *idx, struct list_head *list, struct perf_event_attr *attr, const char *name, + const char *metric_id, struct list_head *config_terms, struct perf_pmu *pmu) { @@ -47,7 +48,7 @@ static int create_event_hybrid(__u32 config_type, int *idx, __u64 config = attr->config; config_hybrid_attr(attr, config_type, pmu->type); - evsel = parse_events__add_event_hybrid(list, idx, attr, name, + evsel = parse_events__add_event_hybrid(list, idx, attr, name, metric_id, pmu, config_terms); if (evsel) evsel->pmu_name = strdup(pmu->name); @@ -70,7 +71,8 @@ static int pmu_cmp(struct parse_events_state *parse_state, static int add_hw_hybrid(struct parse_events_state *parse_state, struct list_head *list, struct perf_event_attr *attr, - const char *name, struct list_head *config_terms) + const char *name, const char *metric_id, + struct list_head *config_terms) { struct perf_pmu *pmu; int ret; @@ -84,7 +86,7 @@ static int add_hw_hybrid(struct parse_events_state *parse_state, copy_config_terms(&terms, config_terms); ret = create_event_hybrid(PERF_TYPE_HARDWARE, &parse_state->idx, list, attr, name, - &terms, pmu); + metric_id, &terms, pmu); free_config_terms(&terms); if (ret) return ret; @@ -96,13 +98,14 @@ static int add_hw_hybrid(struct parse_events_state *parse_state, static int create_raw_event_hybrid(int *idx, struct list_head *list, struct perf_event_attr *attr, const char *name, + const char *metric_id, struct list_head *config_terms, struct perf_pmu *pmu) { struct evsel *evsel; attr->type = pmu->type; - evsel = parse_events__add_event_hybrid(list, idx, attr, name, + evsel = parse_events__add_event_hybrid(list, idx, attr, name, metric_id, pmu, config_terms); if (evsel) evsel->pmu_name = strdup(pmu->name); @@ -114,7 +117,8 @@ static int create_raw_event_hybrid(int *idx, struct list_head *list, static int add_raw_hybrid(struct parse_events_state *parse_state, struct list_head *list, struct perf_event_attr *attr, - const char *name, struct list_head *config_terms) + const char *name, const char *metric_id, + struct list_head *config_terms) { struct perf_pmu *pmu; int ret; @@ -127,7 +131,7 @@ static int add_raw_hybrid(struct parse_events_state *parse_state, copy_config_terms(&terms, config_terms); ret = create_raw_event_hybrid(&parse_state->idx, list, attr, - name, &terms, pmu); + name, metric_id, &terms, pmu); free_config_terms(&terms); if (ret) return ret; @@ -139,7 +143,7 @@ static int add_raw_hybrid(struct parse_events_state *parse_state, int parse_events__add_numeric_hybrid(struct parse_events_state *parse_state, struct list_head *list, struct perf_event_attr *attr, - const char *name, + const char *name, const char *metric_id, struct list_head *config_terms, bool *hybrid) { @@ -152,17 +156,18 @@ int parse_events__add_numeric_hybrid(struct parse_events_state *parse_state, *hybrid = true; if (attr->type != PERF_TYPE_RAW) { - return add_hw_hybrid(parse_state, list, attr, name, + return add_hw_hybrid(parse_state, list, attr, name, metric_id, config_terms); } - return add_raw_hybrid(parse_state, list, attr, name, + return add_raw_hybrid(parse_state, list, attr, name, metric_id, config_terms); } int parse_events__add_cache_hybrid(struct list_head *list, int *idx, struct perf_event_attr *attr, const char *name, + const char *metric_id, struct list_head *config_terms, bool *hybrid, struct parse_events_state *parse_state) @@ -183,7 +188,7 @@ int parse_events__add_cache_hybrid(struct list_head *list, int *idx, copy_config_terms(&terms, config_terms); ret = create_event_hybrid(PERF_TYPE_HW_CACHE, idx, list, - attr, name, &terms, pmu); + attr, name, metric_id, &terms, pmu); free_config_terms(&terms); if (ret) return ret; diff --git a/tools/perf/util/parse-events-hybrid.h b/tools/perf/util/parse-events-hybrid.h index 25a4a4f73f3a..cbc05fec02a2 100644 --- a/tools/perf/util/parse-events-hybrid.h +++ b/tools/perf/util/parse-events-hybrid.h @@ -11,13 +11,13 @@ int parse_events__add_numeric_hybrid(struct parse_events_state *parse_state, struct list_head *list, struct perf_event_attr *attr, - const char *name, + const char *name, const char *metric_id, struct list_head *config_terms, bool *hybrid); int parse_events__add_cache_hybrid(struct list_head *list, int *idx, struct perf_event_attr *attr, - const char *name, + const char *name, const char *metric_id, struct list_head *config_terms, bool *hybrid, struct parse_events_state *parse_state); diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 88f181a985b7..89494b6213a6 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -332,12 +332,7 @@ const char *event_type(int type) return "unknown"; } -static int parse_events__is_name_term(struct parse_events_term *term) -{ - return term->type_term == PARSE_EVENTS__TERM_TYPE_NAME; -} - -static const char *get_config_name(struct list_head *head_terms) +static char *get_config_str(struct list_head *head_terms, int type_term) { struct parse_events_term *term; @@ -345,17 +340,27 @@ static const char *get_config_name(struct list_head *head_terms) return NULL; list_for_each_entry(term, head_terms, list) - if (parse_events__is_name_term(term)) + if (term->type_term == type_term) return term->val.str; return NULL; } +static char *get_config_metric_id(struct list_head *head_terms) +{ + return get_config_str(head_terms, PARSE_EVENTS__TERM_TYPE_METRIC_ID); +} + +static char *get_config_name(struct list_head *head_terms) +{ + return get_config_str(head_terms, PARSE_EVENTS__TERM_TYPE_NAME); +} + static struct evsel * __add_event(struct list_head *list, int *idx, struct perf_event_attr *attr, bool init_attr, - const char *name, struct perf_pmu *pmu, + const char *name, const char *metric_id, struct perf_pmu *pmu, struct list_head *config_terms, bool auto_merge_stats, const char *cpu_list) { @@ -384,6 +389,9 @@ __add_event(struct list_head *list, int *idx, if (name) evsel->name = strdup(name); + if (metric_id) + evsel->metric_id = strdup(metric_id); + if (config_terms) list_splice_init(config_terms, &evsel->config_terms); @@ -394,18 +402,21 @@ __add_event(struct list_head *list, int *idx, } struct evsel *parse_events__add_event(int idx, struct perf_event_attr *attr, - const char *name, struct perf_pmu *pmu) + const char *name, const char *metric_id, + struct perf_pmu *pmu) { - return __add_event(NULL, &idx, attr, false, name, pmu, NULL, false, - NULL); + return __add_event(/*list=*/NULL, &idx, attr, /*init_attr=*/false, name, + metric_id, pmu, /*config_terms=*/NULL, + /*auto_merge_stats=*/false, /*cpu_list=*/NULL); } static int add_event(struct list_head *list, int *idx, struct perf_event_attr *attr, const char *name, - struct list_head *config_terms) + const char *metric_id, struct list_head *config_terms) { - return __add_event(list, idx, attr, true, name, NULL, config_terms, - false, NULL) ? 0 : -ENOMEM; + return __add_event(list, idx, attr, /*init_attr*/true, name, metric_id, + /*pmu=*/NULL, config_terms, + /*auto_merge_stats=*/false, /*cpu_list=*/NULL) ? 0 : -ENOMEM; } static int add_event_tool(struct list_head *list, int *idx, @@ -417,8 +428,10 @@ static int add_event_tool(struct list_head *list, int *idx, .config = PERF_COUNT_SW_DUMMY, }; - evsel = __add_event(list, idx, &attr, true, NULL, NULL, NULL, false, - "0"); + evsel = __add_event(list, idx, &attr, /*init_attr=*/true, /*name=*/NULL, + /*metric_id=*/NULL, /*pmu=*/NULL, + /*config_terms=*/NULL, /*auto_merge_stats=*/false, + /*cpu_list=*/"0"); if (!evsel) return -ENOMEM; evsel->tool_event = tool_event; @@ -465,7 +478,7 @@ int parse_events_add_cache(struct list_head *list, int *idx, struct perf_event_attr attr; LIST_HEAD(config_terms); char name[MAX_NAME_LEN]; - const char *config_name; + const char *config_name, *metric_id; int cache_type = -1, cache_op = -1, cache_result = -1; char *op_result[2] = { op_result1, op_result2 }; int i, n, ret; @@ -530,13 +543,17 @@ int parse_events_add_cache(struct list_head *list, int *idx, return -ENOMEM; } + metric_id = get_config_metric_id(head_config); ret = parse_events__add_cache_hybrid(list, idx, &attr, - config_name ? : name, &config_terms, + config_name ? : name, + metric_id, + &config_terms, &hybrid, parse_state); if (hybrid) goto out_free_terms; - ret = add_event(list, idx, &attr, config_name ? : name, &config_terms); + ret = add_event(list, idx, &attr, config_name ? : name, metric_id, + &config_terms); out_free_terms: free_config_terms(&config_terms); return ret; @@ -1013,7 +1030,8 @@ int parse_events_add_breakpoint(struct list_head *list, int *idx, attr.type = PERF_TYPE_BREAKPOINT; attr.sample_period = 1; - return add_event(list, idx, &attr, NULL, NULL); + return add_event(list, idx, &attr, /*name=*/NULL, /*mertic_id=*/NULL, + /*config_terms=*/NULL); } static int check_type_val(struct parse_events_term *term, @@ -1058,6 +1076,7 @@ static const char *config_term_names[__PARSE_EVENTS__TERM_TYPE_NR] = { [PARSE_EVENTS__TERM_TYPE_PERCORE] = "percore", [PARSE_EVENTS__TERM_TYPE_AUX_OUTPUT] = "aux-output", [PARSE_EVENTS__TERM_TYPE_AUX_SAMPLE_SIZE] = "aux-sample-size", + [PARSE_EVENTS__TERM_TYPE_METRIC_ID] = "metric-id", }; static bool config_term_shrinked; @@ -1080,6 +1099,7 @@ config_term_avail(int term_type, struct parse_events_error *err) case PARSE_EVENTS__TERM_TYPE_CONFIG1: case PARSE_EVENTS__TERM_TYPE_CONFIG2: case PARSE_EVENTS__TERM_TYPE_NAME: + case PARSE_EVENTS__TERM_TYPE_METRIC_ID: case PARSE_EVENTS__TERM_TYPE_SAMPLE_PERIOD: case PARSE_EVENTS__TERM_TYPE_PERCORE: return true; @@ -1170,6 +1190,9 @@ do { \ case PARSE_EVENTS__TERM_TYPE_NAME: CHECK_TYPE_VAL(STR); break; + case PARSE_EVENTS__TERM_TYPE_METRIC_ID: + CHECK_TYPE_VAL(STR); + break; case PARSE_EVENTS__TERM_TYPE_MAX_STACK: CHECK_TYPE_VAL(NUM); break; @@ -1439,6 +1462,7 @@ int parse_events_add_numeric(struct parse_events_state *parse_state, { struct perf_event_attr attr; LIST_HEAD(config_terms); + const char *name, *metric_id; bool hybrid; int ret; @@ -1455,14 +1479,16 @@ int parse_events_add_numeric(struct parse_events_state *parse_state, return -ENOMEM; } + name = get_config_name(head_config); + metric_id = get_config_metric_id(head_config); ret = parse_events__add_numeric_hybrid(parse_state, list, &attr, - get_config_name(head_config), + name, metric_id, &config_terms, &hybrid); if (hybrid) goto out_free_terms; - ret = add_event(list, &parse_state->idx, &attr, - get_config_name(head_config), &config_terms); + ret = add_event(list, &parse_state->idx, &attr, name, metric_id, + &config_terms); out_free_terms: free_config_terms(&config_terms); return ret; @@ -1563,8 +1589,11 @@ int parse_events_add_pmu(struct parse_events_state *parse_state, if (!head_config) { attr.type = pmu->type; - evsel = __add_event(list, &parse_state->idx, &attr, true, NULL, - pmu, NULL, auto_merge_stats, NULL); + evsel = __add_event(list, &parse_state->idx, &attr, + /*init_attr=*/true, /*name=*/NULL, + /*metric_id=*/NULL, pmu, + /*config_terms=*/NULL, auto_merge_stats, + /*cpu_list=*/NULL); if (evsel) { evsel->pmu_name = name ? strdup(name) : NULL; evsel->use_uncore_alias = use_uncore_alias; @@ -1617,9 +1646,10 @@ int parse_events_add_pmu(struct parse_events_state *parse_state, return -EINVAL; } - evsel = __add_event(list, &parse_state->idx, &attr, true, - get_config_name(head_config), pmu, - &config_terms, auto_merge_stats, NULL); + evsel = __add_event(list, &parse_state->idx, &attr, /*init_attr=*/true, + get_config_name(head_config), + get_config_metric_id(head_config), pmu, + &config_terms, auto_merge_stats, /*cpu_list=*/NULL); if (!evsel) return -ENOMEM; @@ -3345,9 +3375,12 @@ fail: struct evsel *parse_events__add_event_hybrid(struct list_head *list, int *idx, struct perf_event_attr *attr, - const char *name, struct perf_pmu *pmu, + const char *name, + const char *metric_id, + struct perf_pmu *pmu, struct list_head *config_terms) { - return __add_event(list, idx, attr, true, name, pmu, - config_terms, false, NULL); + return __add_event(list, idx, attr, /*init_attr=*/true, name, metric_id, + pmu, config_terms, /*auto_merge_stats=*/false, + /*cpu_list=*/NULL); } diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h index 54d24c24d074..c6c8343d311b 100644 --- a/tools/perf/util/parse-events.h +++ b/tools/perf/util/parse-events.h @@ -87,6 +87,7 @@ enum { PARSE_EVENTS__TERM_TYPE_PERCORE, PARSE_EVENTS__TERM_TYPE_AUX_OUTPUT, PARSE_EVENTS__TERM_TYPE_AUX_SAMPLE_SIZE, + PARSE_EVENTS__TERM_TYPE_METRIC_ID, __PARSE_EVENTS__TERM_TYPE_NR, }; @@ -199,7 +200,8 @@ int parse_events_add_pmu(struct parse_events_state *parse_state, bool use_alias); struct evsel *parse_events__add_event(int idx, struct perf_event_attr *attr, - const char *name, struct perf_pmu *pmu); + const char *name, const char *metric_id, + struct perf_pmu *pmu); int parse_events_multi_pmu_add(struct parse_events_state *parse_state, char *str, @@ -267,6 +269,7 @@ int perf_pmu__test_parse_init(void); struct evsel *parse_events__add_event_hybrid(struct list_head *list, int *idx, struct perf_event_attr *attr, const char *name, + const char *metric_id, struct perf_pmu *pmu, struct list_head *config_terms); diff --git a/tools/perf/util/parse-events.l b/tools/perf/util/parse-events.l index 47da7a0c5df4..b1e29b97d261 100644 --- a/tools/perf/util/parse-events.l +++ b/tools/perf/util/parse-events.l @@ -294,6 +294,7 @@ no-overwrite { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_NOOVERWRITE); } percore { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_PERCORE); } aux-output { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_AUX_OUTPUT); } aux-sample-size { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_AUX_SAMPLE_SIZE); } +metric-id { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_METRIC_ID); } r{num_raw_hex} { return raw(yyscanner); } r0x{num_raw_hex} { return raw(yyscanner); } , { return ','; } diff --git a/tools/perf/util/pfm.c b/tools/perf/util/pfm.c index 756295dedccc..f0bcfcab1a93 100644 --- a/tools/perf/util/pfm.c +++ b/tools/perf/util/pfm.c @@ -87,7 +87,8 @@ int parse_libpfm_events_option(const struct option *opt, const char *str, pmu = perf_pmu__find_by_type((unsigned int)attr.type); evsel = parse_events__add_event(evlist->core.nr_entries, - &attr, q, pmu); + &attr, q, /*metric_id=*/NULL, + pmu); if (evsel == NULL) goto error; -- cgit v1.2.3 From fb0811535e92c6c1e093d7f59eb9d66426653b39 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Fri, 15 Oct 2021 10:21:26 -0700 Subject: perf parse-events: Allow config on kernel PMU events An event like inst_retired.any on an Intel skylake is found in the pmu-events code created from the pipeline event JSON. The event is an alias for cpu/event=0xc0,period=2000003/ and parse-events recognizes the event with the token PE_KERNEL_PMU_EVENT. The parser doesn't currently allow extra configuration on such events, except for modifiers, so: $ perf stat -e inst_retired.any// /bin/true event syntax error: 'inst_retired.any//' \___ parser error Run 'perf list' for a list of valid events Usage: perf stat [] [] -e, --event event selector. use 'perf list' to list available events This patch adds configuration to these events which can be useful for a number of parameters like name and call-graph: $ sudo perf record -e inst_retired.any/call-graph=lbr/ -a sleep 1 [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 1.856 MB perf.data (44 samples) ] It is necessary for the metric code so that we may add metric-id values to these events before they are parsed. Signed-off-by: Ian Rogers Acked-by: Andi Kleen Cc: Adrian Hunter Cc: Alexander Antonov Cc: Alexander Shishkin Cc: Andrew Kilroy Cc: Andrew Morton Cc: Changbin Du Cc: Denys Zagorui Cc: Fabian Hemmer Cc: Felix Fietkau Cc: Heiko Carstens Cc: Ingo Molnar Cc: Jacob Keller Cc: Jiapeng Chong Cc: Jin Yao Cc: Jiri Olsa Cc: Joakim Zhang Cc: John Garry Cc: Kajol Jain Cc: Kan Liang Cc: Kees Kook Cc: Mark Rutland Cc: Namhyung Kim Cc: Nicholas Fraser Cc: Nick Desaulniers Cc: Paul Clarke Cc: Peter Zijlstra Cc: Riccardo Mancini Cc: Sami Tolvanen Cc: ShihCheng Tu Cc: Song Liu Cc: Stephane Eranian Cc: Sumanth Korikkar Cc: Thomas Richter Cc: Wan Jiabing Cc: Zhen Lei Link: https://lore.kernel.org/r/20211015172132.1162559-16-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/parse-events.c | 66 +++++++++++++++++++++++------------------- tools/perf/util/parse-events.h | 1 + tools/perf/util/parse-events.y | 17 +++++++++-- 3 files changed, 52 insertions(+), 32 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 89494b6213a6..006a7f721549 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -1673,44 +1673,50 @@ int parse_events_add_pmu(struct parse_events_state *parse_state, } int parse_events_multi_pmu_add(struct parse_events_state *parse_state, - char *str, struct list_head **listp) + char *str, struct list_head *head, + struct list_head **listp) { struct parse_events_term *term; - struct list_head *list; + struct list_head *list = NULL; struct perf_pmu *pmu = NULL; int ok = 0; + char *config; *listp = NULL; + + if (!head) { + head = malloc(sizeof(struct list_head)); + if (!head) + goto out_err; + + INIT_LIST_HEAD(head); + } + config = strdup(str); + if (!config) + goto out_err; + + if (parse_events_term__num(&term, + PARSE_EVENTS__TERM_TYPE_USER, + config, 1, false, &config, + NULL) < 0) { + free(config); + goto out_err; + } + list_add_tail(&term->list, head); + + /* Add it for all PMUs that support the alias */ list = malloc(sizeof(struct list_head)); if (!list) - return -1; + goto out_err; + INIT_LIST_HEAD(list); + while ((pmu = perf_pmu__scan(pmu)) != NULL) { struct perf_pmu_alias *alias; list_for_each_entry(alias, &pmu->aliases, list) { if (!strcasecmp(alias->name, str)) { - struct list_head *head; - char *config; - - head = malloc(sizeof(struct list_head)); - if (!head) - return -1; - INIT_LIST_HEAD(head); - config = strdup(str); - if (!config) - return -1; - if (parse_events_term__num(&term, - PARSE_EVENTS__TERM_TYPE_USER, - config, 1, false, &config, - NULL) < 0) { - free(list); - free(config); - return -1; - } - list_add_tail(&term->list, head); - if (!parse_events_add_pmu(parse_state, list, pmu->name, head, true, true)) { @@ -1718,17 +1724,17 @@ int parse_events_multi_pmu_add(struct parse_events_state *parse_state, pmu->name, alias->str); ok++; } - - parse_events_terms__delete(head); } } } - if (!ok) { +out_err: + if (ok) + *listp = list; + else free(list); - return -1; - } - *listp = list; - return 0; + + parse_events_terms__delete(head); + return ok ? 0 : -1; } int parse_events__modifier_group(struct list_head *list, diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h index c6c8343d311b..07f879e525fe 100644 --- a/tools/perf/util/parse-events.h +++ b/tools/perf/util/parse-events.h @@ -205,6 +205,7 @@ struct evsel *parse_events__add_event(int idx, struct perf_event_attr *attr, int parse_events_multi_pmu_add(struct parse_events_state *parse_state, char *str, + struct list_head *head_config, struct list_head **listp); int parse_events_copy_term_list(struct list_head *old, diff --git a/tools/perf/util/parse-events.y b/tools/perf/util/parse-events.y index d94e48e1ff9b..17c8c66f3f51 100644 --- a/tools/perf/util/parse-events.y +++ b/tools/perf/util/parse-events.y @@ -342,7 +342,20 @@ PE_KERNEL_PMU_EVENT sep_dc struct list_head *list; int err; - err = parse_events_multi_pmu_add(_parse_state, $1, &list); + err = parse_events_multi_pmu_add(_parse_state, $1, NULL, &list); + free($1); + if (err < 0) + YYABORT; + $$ = list; +} +| +PE_KERNEL_PMU_EVENT opt_pmu_config +{ + struct list_head *list; + int err; + + /* frees $2 */ + err = parse_events_multi_pmu_add(_parse_state, $1, $2, &list); free($1); if (err < 0) YYABORT; @@ -357,7 +370,7 @@ PE_PMU_EVENT_PRE '-' PE_PMU_EVENT_SUF sep_dc snprintf(pmu_name, sizeof(pmu_name), "%s-%s", $1, $3); free($1); free($3); - if (parse_events_multi_pmu_add(_parse_state, pmu_name, &list) < 0) + if (parse_events_multi_pmu_add(_parse_state, pmu_name, NULL, &list) < 0) YYABORT; $$ = list; } -- cgit v1.2.3 From ec5c5b3d2c21b3f332fdc9c026c42723fb8a0ce6 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Fri, 15 Oct 2021 10:21:27 -0700 Subject: perf metric: Encode and use metric-id as qualifier For a metric like IPC a group of events like {instructions,cycles}:W would be formed. If the events names were changed in parsing then the metric expression parser would fail to find them. This change makes the event encoding be something like: {instructions/metric-id=instructions/, cycles/metric-id=cycles/} and then uses the evsel's stable metric-id value to locate the events. This fixes the case that an event is restricted to user because of the paranoia setting: $ echo 2 > /proc/sys/kernel/perf_event_paranoid $ perf stat -M IPC /bin/true Performance counter stats for '/bin/true': 150,298 inst_retired.any:u # 0.77 IPC 187,095 cpu_clk_unhalted.thread:u 0.002042731 seconds time elapsed 0.000000000 seconds user 0.002377000 seconds sys Adding the metric-id as a qualifier has a complication in that qualifiers will become embedded in qualifiers. For example, msr/tsc/ could become msr/tsc,metric-id=msr/tsc// which will fail parse-events. To solve this problem the metric is encoded and decoded for the metric-id with ! standing in for an encoded value. Previously ! wasn't parsed. With this msr/tsc/ becomes msr/tsc,metric-id=msr!3tsc!3/ The metric expression parser is changed so that @ isn't changed to /, instead this is done when the ID is encoded for parse events. metricgroup__add_metric_non_group() and metricgroup__add_metric_weak_group() need to inject the metric-id qualifier, so to avoid repetition they are merged into a single metricgroup__build_event_string with error codes more rigorously checked. stat-shadow's prepare_metric() uses the metric-id to match the metricgroup code. As "metric-id=..." is added to all events, it is adding during testing with the fake PMU. This complicates pmu_str_check code as PE_PMU_EVENT_FAKE won't match as part of a configuration. The testing fake PMU case is fixed so that if a known qualifier with an ! is parsed then it isn't reported as a fake PMU. This is sufficient to pass all testing but it and the original mechanism are somewhat brittle. Signed-off-by: Ian Rogers Acked-by: Andi Kleen Cc: Adrian Hunter Cc: Alexander Antonov Cc: Alexander Shishkin Cc: Andrew Kilroy Cc: Andrew Morton Cc: Changbin Du Cc: Denys Zagorui Cc: Fabian Hemmer Cc: Felix Fietkau Cc: Heiko Carstens Cc: Ingo Molnar Cc: Jacob Keller Cc: Jiapeng Chong Cc: Jin Yao Cc: Jiri Olsa Cc: Joakim Zhang Cc: John Garry Cc: Kajol Jain Cc: Kan Liang Cc: Kees Kook Cc: Mark Rutland Cc: Namhyung Kim Cc: Nicholas Fraser Cc: Nick Desaulniers Cc: Paul Clarke Cc: Peter Zijlstra Cc: Riccardo Mancini Cc: Sami Tolvanen Cc: ShihCheng Tu Cc: Song Liu Cc: Stephane Eranian Cc: Sumanth Korikkar Cc: Thomas Richter Cc: Wan Jiabing Cc: Zhen Lei Link: https://lore.kernel.org/r/20211015172132.1162559-17-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/expr.c | 4 +- tools/perf/tests/pmu-events.c | 13 +- tools/perf/util/expr.l | 6 +- tools/perf/util/metricgroup.c | 263 +++++++++++++++++++++++++++++++++-------- tools/perf/util/parse-events.l | 17 ++- tools/perf/util/stat-shadow.c | 20 +--- 6 files changed, 242 insertions(+), 81 deletions(-) (limited to 'tools') diff --git a/tools/perf/tests/expr.c b/tools/perf/tests/expr.c index 718c13e5a0f4..077783223ce0 100644 --- a/tools/perf/tests/expr.c +++ b/tools/perf/tests/expr.c @@ -129,9 +129,9 @@ int test__expr(struct test *t __maybe_unused, int subtest __maybe_unused) expr__find_ids("EVENT1\\,param\\=?@ + EVENT2\\,param\\=?@", NULL, ctx) == 0); TEST_ASSERT_VAL("find ids", hashmap__size(ctx->ids) == 2); - TEST_ASSERT_VAL("find ids", hashmap__find(ctx->ids, "EVENT1,param=3/", + TEST_ASSERT_VAL("find ids", hashmap__find(ctx->ids, "EVENT1,param=3@", (void **)&val_ptr)); - TEST_ASSERT_VAL("find ids", hashmap__find(ctx->ids, "EVENT2,param=3/", + TEST_ASSERT_VAL("find ids", hashmap__find(ctx->ids, "EVENT2,param=3@", (void **)&val_ptr)); /* Only EVENT1 or EVENT2 need be measured depending on the value of smt_on. */ diff --git a/tools/perf/tests/pmu-events.c b/tools/perf/tests/pmu-events.c index 71b08c296410..50b1299fe643 100644 --- a/tools/perf/tests/pmu-events.c +++ b/tools/perf/tests/pmu-events.c @@ -761,6 +761,7 @@ static int check_parse_id(const char *id, struct parse_events_error *error, { struct evlist *evlist; int ret; + char *dup, *cur; /* Numbers are always valid. */ if (is_number(id)) @@ -769,7 +770,17 @@ static int check_parse_id(const char *id, struct parse_events_error *error, evlist = evlist__new(); if (!evlist) return -ENOMEM; - ret = __parse_events(evlist, id, error, fake_pmu); + + dup = strdup(id); + if (!dup) + return -ENOMEM; + + for (cur = strchr(dup, '@') ; cur; cur = strchr(++cur, '@')) + *cur = '/'; + + ret = __parse_events(evlist, dup, error, fake_pmu); + free(dup); + evlist__delete(evlist); return ret; } diff --git a/tools/perf/util/expr.l b/tools/perf/util/expr.l index 702fdf6456ca..bd20f33418ba 100644 --- a/tools/perf/util/expr.l +++ b/tools/perf/util/expr.l @@ -41,11 +41,9 @@ static char *normalize(char *str, int runtime) char *dst = str; while (*str) { - if (*str == '@') - *dst++ = '/'; - else if (*str == '\\') + if (*str == '\\') *dst++ = *++str; - else if (*str == '?') { + else if (*str == '?') { char *paramval; int i = 0; int size = asprintf(¶mval, "%d", runtime); diff --git a/tools/perf/util/metricgroup.c b/tools/perf/util/metricgroup.c index b48836d7c080..9c16a956fd2c 100644 --- a/tools/perf/util/metricgroup.c +++ b/tools/perf/util/metricgroup.c @@ -212,13 +212,13 @@ static void metric__free(struct metric *m) free(m); } -static bool contains_event(struct evsel **metric_events, int num_events, - const char *event_name) +static bool contains_metric_id(struct evsel **metric_events, int num_events, + const char *metric_id) { int i; for (i = 0; i < num_events; i++) { - if (!strcmp(metric_events[i]->name, event_name)) + if (!strcmp(evsel__metric_id(metric_events[i]), metric_id)) return true; } return false; @@ -259,6 +259,7 @@ static struct evsel *find_evsel_group(struct evlist *perf_evlist, { struct evsel *ev, *current_leader = NULL; struct expr_id_data *val_ptr; + const char *metric_id; int i = 0, matched_events = 0, events_to_match; int idnum = (int)hashmap__size(pctx->ids); @@ -300,10 +301,11 @@ static struct evsel *find_evsel_group(struct evlist *perf_evlist, * different sibling groups aren't both added to * metric_events. */ - if (contains_event(metric_events, matched_events, ev->name)) + metric_id = evsel__metric_id(ev); + if (contains_metric_id(metric_events, matched_events, metric_id)) continue; /* Does this event belong to the parse context? */ - if (hashmap__find(pctx->ids, ev->name, (void **)&val_ptr)) + if (hashmap__find(pctx->ids, metric_id, (void **)&val_ptr)) metric_events[matched_events++] = ev; if (matched_events == events_to_match) @@ -347,6 +349,7 @@ static struct evsel *find_evsel_group(struct evlist *perf_evlist, * for each pmu. Set the metric leader of such events to be the * event that appears in metric_events. */ + metric_id = evsel__metric_id(ev); evlist__for_each_entry_continue(perf_evlist, ev) { /* * If events are grouped then the search can terminate @@ -356,7 +359,7 @@ static struct evsel *find_evsel_group(struct evlist *perf_evlist, ev->core.leader != metric_events[i]->core.leader && evsel_same_pmu_or_none(evsel__leader(ev), evsel__leader(metric_events[i]))) break; - if (!strcmp(metric_events[i]->name, ev->name)) { + if (!strcmp(evsel__metric_id(metric_events[i]), metric_id)) { set_bit(ev->core.idx, evlist_used); ev->metric_leader = metric_events[i]; } @@ -724,50 +727,191 @@ void metricgroup__print(bool metrics, bool metricgroups, char *filter, strlist__delete(metriclist); } -static void metricgroup__add_metric_weak_group(struct strbuf *events, - struct expr_parse_ctx *ctx) +static const char *code_characters = ",-=@"; + +static int encode_metric_id(struct strbuf *sb, const char *x) +{ + char *c; + int ret = 0; + + for (; *x; x++) { + c = strchr(code_characters, *x); + if (c) { + ret = strbuf_addch(sb, '!'); + if (ret) + break; + + ret = strbuf_addch(sb, '0' + (c - code_characters)); + if (ret) + break; + } else { + ret = strbuf_addch(sb, *x); + if (ret) + break; + } + } + return ret; +} + +static int decode_metric_id(struct strbuf *sb, const char *x) +{ + const char *orig = x; + size_t i; + char c; + int ret; + + for (; *x; x++) { + c = *x; + if (*x == '!') { + x++; + i = *x - '0'; + if (i > strlen(code_characters)) { + pr_err("Bad metric-id encoding in: '%s'", orig); + return -1; + } + c = code_characters[i]; + } + ret = strbuf_addch(sb, c); + if (ret) + return ret; + } + return 0; +} + +static int decode_all_metric_ids(struct evlist *perf_evlist) +{ + struct evsel *ev; + struct strbuf sb = STRBUF_INIT; + char *cur; + int ret = 0; + + evlist__for_each_entry(perf_evlist, ev) { + if (!ev->metric_id) + continue; + + ret = strbuf_setlen(&sb, 0); + if (ret) + break; + + ret = decode_metric_id(&sb, ev->metric_id); + if (ret) + break; + + free((char *)ev->metric_id); + ev->metric_id = strdup(sb.buf); + if (!ev->metric_id) { + ret = -ENOMEM; + break; + } + /* + * If the name is just the parsed event, use the metric-id to + * give a more friendly display version. + */ + if (strstr(ev->name, "metric-id=")) { + free(ev->name); + for (cur = strchr(sb.buf, '@') ; cur; cur = strchr(++cur, '@')) + *cur = '/'; + + ev->name = strdup(sb.buf); + if (!ev->name) { + ret = -ENOMEM; + break; + } + } + } + strbuf_release(&sb); + return ret; +} + +static int metricgroup__build_event_string(struct strbuf *events, + const struct expr_parse_ctx *ctx, + bool has_constraint) { struct hashmap_entry *cur; size_t bkt; bool no_group = true, has_duration = false; + int ret = 0; + +#define RETURN_IF_NON_ZERO(x) do { if (x) return x; } while (0) hashmap__for_each_entry(ctx->ids, cur, bkt) { - pr_debug("found event %s\n", (const char *)cur->key); + const char *sep, *rsep, *id = cur->key; + + pr_debug("found event %s\n", id); /* * Duration time maps to a software event and can make * groups not count. Always use it outside a * group. */ - if (!strcmp(cur->key, "duration_time")) { + if (!strcmp(id, "duration_time")) { has_duration = true; continue; } - strbuf_addf(events, "%s%s", - no_group ? "{" : ",", - (const char *)cur->key); - no_group = false; - } - if (!no_group) { - strbuf_addf(events, "}:W"); - if (has_duration) - strbuf_addf(events, ",duration_time"); - } else if (has_duration) - strbuf_addf(events, "duration_time"); -} - -static void metricgroup__add_metric_non_group(struct strbuf *events, - struct expr_parse_ctx *ctx) -{ - struct hashmap_entry *cur; - size_t bkt; - bool first = true; + /* Separate events with commas and open the group if necessary. */ + if (no_group) { + if (!has_constraint) { + ret = strbuf_addch(events, '{'); + RETURN_IF_NON_ZERO(ret); + } - hashmap__for_each_entry(ctx->ids, cur, bkt) { - if (!first) - strbuf_addf(events, ","); - strbuf_addf(events, "%s", (const char *)cur->key); - first = false; + no_group = false; + } else { + ret = strbuf_addch(events, ','); + RETURN_IF_NON_ZERO(ret); + } + /* + * Encode the ID as an event string. Add a qualifier for + * metric_id that is the original name except with characters + * that parse-events can't parse replaced. For example, + * 'msr@tsc@' gets added as msr/tsc,metric-id=msr!3tsc!3/ + */ + sep = strchr(id, '@'); + if (sep != NULL) { + ret = strbuf_add(events, id, sep - id); + RETURN_IF_NON_ZERO(ret); + ret = strbuf_addch(events, '/'); + RETURN_IF_NON_ZERO(ret); + rsep = strrchr(sep, '@'); + ret = strbuf_add(events, sep + 1, rsep - sep - 1); + RETURN_IF_NON_ZERO(ret); + ret = strbuf_addstr(events, ",metric-id="); + RETURN_IF_NON_ZERO(ret); + sep = rsep; + } else { + sep = strchr(id, ':'); + if (sep != NULL) { + ret = strbuf_add(events, id, sep - id); + RETURN_IF_NON_ZERO(ret); + } else { + ret = strbuf_addstr(events, id); + RETURN_IF_NON_ZERO(ret); + } + ret = strbuf_addstr(events, "/metric-id="); + RETURN_IF_NON_ZERO(ret); + } + ret = encode_metric_id(events, id); + RETURN_IF_NON_ZERO(ret); + ret = strbuf_addstr(events, "/"); + RETURN_IF_NON_ZERO(ret); + + if (sep != NULL) { + ret = strbuf_addstr(events, sep + 1); + RETURN_IF_NON_ZERO(ret); + } } + if (has_duration) { + if (no_group) { + /* Strange case of a metric of just duration_time. */ + ret = strbuf_addf(events, "duration_time"); + } else if (!has_constraint) + ret = strbuf_addf(events, "}:W,duration_time"); + else + ret = strbuf_addf(events, ",duration_time"); + } else if (!no_group && !has_constraint) + ret = strbuf_addf(events, "}:W"); + + return ret; +#undef RETURN_IF_NON_ZERO } int __weak arch_get_runtimeparam(const struct pmu_event *pe __maybe_unused) @@ -1134,16 +1278,17 @@ static int metricgroup__add_metric(const char *metric_name, bool metric_no_group list_sort(NULL, &list, metric_list_cmp); list_for_each_entry(m, &list, nd) { - if (events->len > 0) - strbuf_addf(events, ","); - - if (m->has_constraint) { - metricgroup__add_metric_non_group(events, - m->pctx); - } else { - metricgroup__add_metric_weak_group(events, - m->pctx); + if (events->len > 0) { + ret = strbuf_addf(events, ","); + if (ret) + break; } + + ret = metricgroup__build_event_string(events, + m->pctx, + m->has_constraint); + if (ret) + break; } out: @@ -1180,30 +1325,40 @@ static int metricgroup__add_metric_list(const char *list, bool metric_no_group, const struct pmu_events_map *map) { char *llist, *nlist, *p; - int ret = -EINVAL; + int ret, count = 0; nlist = strdup(list); if (!nlist) return -ENOMEM; llist = nlist; - strbuf_init(events, 100); - strbuf_addf(events, "%s", ""); + ret = strbuf_init(events, 100); + if (ret) + return ret; while ((p = strsep(&llist, ",")) != NULL) { ret = metricgroup__add_metric(p, metric_no_group, events, metric_list, map); - if (ret == -EINVAL) { - fprintf(stderr, "Cannot find metric or group `%s'\n", - p); + if (ret == -EINVAL) + fprintf(stderr, "Cannot find metric or group `%s'\n", p); + + if (ret) break; - } + + count++; } free(nlist); - if (!ret) + if (!ret) { + /* + * Warn about nmi_watchdog if any parsed metrics had the + * NO_NMI_WATCHDOG constraint. + */ metricgroup___watchdog_constraint_hint(NULL, true); - + /* No metrics. */ + if (count == 0) + return -EINVAL; + } return ret; } @@ -1243,6 +1398,10 @@ static int parse_groups(struct evlist *perf_evlist, const char *str, parse_events_print_error(&parse_error, extra_events.buf); goto out; } + ret = decode_all_metric_ids(perf_evlist); + if (ret) + goto out; + ret = metricgroup__setup_events(&metric_list, metric_no_merge, perf_evlist, metric_events); out: diff --git a/tools/perf/util/parse-events.l b/tools/perf/util/parse-events.l index b1e29b97d261..4efe9872c667 100644 --- a/tools/perf/util/parse-events.l +++ b/tools/perf/util/parse-events.l @@ -138,18 +138,23 @@ static int pmu_str_check(yyscan_t scanner, struct parse_events_state *parse_stat yylval->str = strdup(text); - if (parse_state->fake_pmu) - return PE_PMU_EVENT_FAKE; - + /* + * If we're not testing then parse check determines the PMU event type + * which if it isn't a PMU returns PE_NAME. When testing the result of + * parse check can't be trusted so we return PE_PMU_EVENT_FAKE unless + * an '!' is present in which case the text can't be a PMU name. + */ switch (perf_pmu__parse_check(text)) { case PMU_EVENT_SYMBOL_PREFIX: return PE_PMU_EVENT_PRE; case PMU_EVENT_SYMBOL_SUFFIX: return PE_PMU_EVENT_SUF; case PMU_EVENT_SYMBOL: - return PE_KERNEL_PMU_EVENT; + return parse_state->fake_pmu + ? PE_PMU_EVENT_FAKE : PE_KERNEL_PMU_EVENT; default: - return PE_NAME; + return parse_state->fake_pmu && !strchr(text,'!') + ? PE_PMU_EVENT_FAKE : PE_NAME; } } @@ -204,7 +209,7 @@ bpf_source [^,{}]+\.c[a-zA-Z0-9._]* num_dec [0-9]+ num_hex 0x[a-fA-F0-9]+ num_raw_hex [a-fA-F0-9]+ -name [a-zA-Z_*?\[\]][a-zA-Z0-9_*?.\[\]]* +name [a-zA-Z_*?\[\]][a-zA-Z0-9_*?.\[\]!]* name_tag [\'][a-zA-Z_*?\[\]][a-zA-Z0-9_*?\-,\.\[\]:=]*[\'] name_minus [a-zA-Z_*?][a-zA-Z0-9\-_*?.:]* drv_cfg_term [a-zA-Z0-9_\.]+(=[a-zA-Z0-9_*?\.:]+)? diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c index 20f1b9d0f272..69f3cf3b4a44 100644 --- a/tools/perf/util/stat-shadow.c +++ b/tools/perf/util/stat-shadow.c @@ -821,7 +821,7 @@ static int prepare_metric(struct evsel **metric_events, struct runtime_stat *st) { double scale; - char *n, *pn; + char *n; int i, j, ret; for (i = 0; metric_events[i]; i++) { @@ -844,23 +844,11 @@ static int prepare_metric(struct evsel **metric_events, if (v->metric_other) metric_total = v->metric_total; } - - n = strdup(metric_events[i]->name); + n = strdup(evsel__metric_id(metric_events[i])); if (!n) return -ENOMEM; - /* - * This display code with --no-merge adds [cpu] postfixes. - * These are not supported by the parser. Remove everything - * after the space. - */ - pn = strchr(n, ' '); - if (pn) - *pn = 0; - - if (metric_total) - expr__add_id_val(pctx, n, metric_total); - else - expr__add_id_val(pctx, n, avg_stats(stats)*scale); + + expr__add_id_val(pctx, n, metric_total ? : avg_stats(stats) * scale); } for (j = 0; metric_refs && metric_refs[j].metric_name; j++) { -- cgit v1.2.3 From 798c3f4a668e9281bb4060cbaf3b7c7bf25a8c6f Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Fri, 15 Oct 2021 10:21:28 -0700 Subject: perf expr: Add subset_of_ids() utility Add a helper that returns true if all the IDs in needles are present in haystack. Later this will be used in sharing events between metrics. Signed-off-by: Ian Rogers Acked-by: Andi Kleen Cc: Adrian Hunter Cc: Alexander Antonov Cc: Alexander Shishkin Cc: Andrew Kilroy Cc: Andrew Morton Cc: Changbin Du Cc: Denys Zagorui Cc: Fabian Hemmer Cc: Felix Fietkau Cc: Heiko Carstens Cc: Ingo Molnar Cc: Jacob Keller Cc: Jiapeng Chong Cc: Jin Yao Cc: Jiri Olsa Cc: Joakim Zhang Cc: John Garry Cc: Kajol Jain Cc: Kan Liang Cc: Kees Kook Cc: Mark Rutland Cc: Namhyung Kim Cc: Nicholas Fraser Cc: Nick Desaulniers Cc: Paul Clarke Cc: Peter Zijlstra Cc: Riccardo Mancini Cc: Sami Tolvanen Cc: ShihCheng Tu Cc: Song Liu Cc: Stephane Eranian Cc: Sumanth Korikkar Cc: Thomas Richter Cc: Wan Jiabing Cc: Zhen Lei Link: https://lore.kernel.org/r/20211015172132.1162559-18-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/expr.c | 15 +++++++++++++++ tools/perf/util/expr.h | 2 ++ 2 files changed, 17 insertions(+) (limited to 'tools') diff --git a/tools/perf/util/expr.c b/tools/perf/util/expr.c index 5657222aaa25..77c6ad81a923 100644 --- a/tools/perf/util/expr.c +++ b/tools/perf/util/expr.c @@ -211,6 +211,21 @@ int expr__get_id(struct expr_parse_ctx *ctx, const char *id, return hashmap__find(ctx->ids, id, (void **)data) ? 0 : -1; } +bool expr__subset_of_ids(struct expr_parse_ctx *haystack, + struct expr_parse_ctx *needles) +{ + struct hashmap_entry *cur; + size_t bkt; + struct expr_id_data *data; + + hashmap__for_each_entry(needles->ids, cur, bkt) { + if (expr__get_id(haystack, cur->key, &data)) + return false; + } + return true; +} + + int expr__resolve_id(struct expr_parse_ctx *ctx, const char *id, struct expr_id_data **datap) { diff --git a/tools/perf/util/expr.h b/tools/perf/util/expr.h index c6e534f633c3..cf81f9166dbb 100644 --- a/tools/perf/util/expr.h +++ b/tools/perf/util/expr.h @@ -43,6 +43,8 @@ int expr__add_id_val(struct expr_parse_ctx *ctx, const char *id, double val); int expr__add_ref(struct expr_parse_ctx *ctx, struct metric_ref *ref); int expr__get_id(struct expr_parse_ctx *ctx, const char *id, struct expr_id_data **data); +bool expr__subset_of_ids(struct expr_parse_ctx *haystack, + struct expr_parse_ctx *needles); int expr__resolve_id(struct expr_parse_ctx *ctx, const char *id, struct expr_id_data **datap); -- cgit v1.2.3 From 5ecd5a0c7d1cca79f1431093d12e4cd9893b0331 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Fri, 15 Oct 2021 10:21:29 -0700 Subject: perf metrics: Modify setup and deduplication Previously find_evsel_group was trying to share events while mark-sweeping to eliminate unused events, this was complicated and had issues around uncore events and grouped sharing. This was further complicated by the event string being created while metrics and metric groups were being added, with the string affecting the evlist order. This change moves deduplication before event parsing. Ungrouped events are placed in a single combined set. Groups are checked to see if an earlier (larger) group can support their events. As the deduplication and sharing detection is done on metric IDs before parsing, wildcard expansion problems with uncore events are avoided. Overall the code is simpler while working better. An example of failing to deduplicate can be seen with a list of metrics like the following, where in the after case multiplexing has been avoided: Before: $ perf stat -M Bad_Speculation,Backend_Bound,Frontend_Bound,Retiring -a sleep 2 Performance counter stats for 'system wide': 959,620,872 uops_issued.any # 0.06 Bad_Speculation (50.03%) 2,163,072,261 cycles # 0.09 Retiring (50.03%) 735,827,436 uops_retired.retire_slots (50.03%) 74,676,484 int_misc.recovery_cycles (50.03%) 987,062,794 uops_issued.any # 0.50 Backend_Bound (49.97%) 2,203,734,187 cycles # 0.35 Frontend_Bound (49.97%) 3,085,016,091 idq_uops_not_delivered.core (49.97%) 758,599,232 uops_retired.retire_slots (49.97%) 75,807,526 int_misc.recovery_cycles (49.97%) 2.002103760 seconds time elapsed After: $ sudo perf stat -M Bad_Speculation,Backend_Bound,Frontend_Bound,Retiring -a sleep 2 Performance counter stats for 'system wide': 769,694,676 uops_issued.any # 0.08 Bad_Speculation # 0.41 Backend_Bound 1,087,548,633 cycles # 0.38 Frontend_Bound # 0.14 Retiring 1,642,085,777 idq_uops_not_delivered.core 603,112,590 uops_retired.retire_slots 43,787,854 int_misc.recovery_cycles 2.003844383 seconds time elapsed Signed-off-by: Ian Rogers Acked-by: Andi Kleen Cc: Adrian Hunter Cc: Alexander Antonov Cc: Alexander Shishkin Cc: Andrew Kilroy Cc: Andrew Morton Cc: Changbin Du Cc: Denys Zagorui Cc: Fabian Hemmer Cc: Felix Fietkau Cc: Heiko Carstens Cc: Ingo Molnar Cc: Jacob Keller Cc: Jiapeng Chong Cc: Jin Yao Cc: Jiri Olsa Cc: Joakim Zhang Cc: John Garry Cc: Kajol Jain Cc: Kan Liang Cc: Kees Kook Cc: Mark Rutland Cc: Namhyung Kim Cc: Nicholas Fraser Cc: Nick Desaulniers Cc: Paul Clarke Cc: Peter Zijlstra Cc: Riccardo Mancini Cc: Sami Tolvanen Cc: ShihCheng Tu Cc: Song Liu Cc: Stephane Eranian Cc: Sumanth Korikkar Cc: Thomas Richter Cc: Wan Jiabing Cc: Zhen Lei Link: https://lore.kernel.org/r/20211015172132.1162559-19-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/metricgroup.c | 513 +++++++++++++++++++++--------------------- 1 file changed, 262 insertions(+), 251 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/metricgroup.c b/tools/perf/util/metricgroup.c index 9c16a956fd2c..988f9e95dded 100644 --- a/tools/perf/util/metricgroup.c +++ b/tools/perf/util/metricgroup.c @@ -144,6 +144,12 @@ struct metric { * events won't be grouped. */ bool has_constraint; + /** + * Parsed events for the metric. Optional as events may be taken from a + * different metric whose group contains all the IDs necessary for this + * one. + */ + struct evlist *evlist; }; static void metricgroup___watchdog_constraint_hint(const char *name, bool foot) @@ -201,6 +207,7 @@ static struct metric *metric__new(const struct pmu_event *pe, m->pctx->runtime = runtime; m->has_constraint = metric_no_group || metricgroup__has_constraint(pe); m->metric_refs = NULL; + m->evlist = NULL; return m; } @@ -224,222 +231,82 @@ static bool contains_metric_id(struct evsel **metric_events, int num_events, return false; } -static bool evsel_same_pmu_or_none(struct evsel *ev1, struct evsel *ev2) -{ - if (!ev1->pmu_name || !ev2->pmu_name) - return true; - - return !strcmp(ev1->pmu_name, ev2->pmu_name); -} - /** - * Find a group of events in perf_evlist that correspond to those from a parsed - * metric expression. Note, as find_evsel_group is called in the same order as - * perf_evlist was constructed, metric_no_merge doesn't need to test for - * underfilling a group. - * @perf_evlist: a list of events something like: {metric1 leader, metric1 - * sibling, metric1 sibling}:W,duration_time,{metric2 leader, metric2 sibling, - * metric2 sibling}:W,duration_time - * @pctx: the parse context for the metric expression. - * @metric_no_merge: don't attempt to share events for the metric with other - * metrics. - * @has_constraint: is there a constraint on the group of events? In which case - * the events won't be grouped. - * @metric_events: out argument, null terminated array of evsel's associated - * with the metric. - * @evlist_used: in/out argument, bitmap tracking which evlist events are used. - * @return the first metric event or NULL on failure. + * setup_metric_events - Find a group of events in metric_evlist that correspond + * to the IDs from a parsed metric expression. + * @ids: the metric IDs to match. + * @metric_evlist: the list of perf events. + * @out_metric_events: holds the created metric events array. */ -static struct evsel *find_evsel_group(struct evlist *perf_evlist, - struct expr_parse_ctx *pctx, - bool metric_no_merge, - bool has_constraint, - struct evsel **metric_events, - unsigned long *evlist_used) +static int setup_metric_events(struct hashmap *ids, + struct evlist *metric_evlist, + struct evsel ***out_metric_events) { - struct evsel *ev, *current_leader = NULL; - struct expr_id_data *val_ptr; + struct evsel **metric_events; const char *metric_id; - int i = 0, matched_events = 0, events_to_match; - int idnum = (int)hashmap__size(pctx->ids); + struct evsel *ev; + size_t ids_size, matched_events, i; - if (idnum != 0) { - /* - * duration_time is always grouped separately, when events are - * grouped (ie has_constraint is false) then ignore it in the - * matching loop and add it to metric_events at the end. - */ - events_to_match = idnum; - if (!has_constraint && hashmap__find(pctx->ids, "duration_time", (void **)&val_ptr)) - events_to_match--; + *out_metric_events = NULL; + ids_size = hashmap__size(ids); - evlist__for_each_entry(perf_evlist, ev) { - /* - * Events with a constraint aren't grouped and match the - * first events available. - */ - if (has_constraint && ev->weak_group) - continue; - /* Ignore event if already used and merging is disabled. */ - if (metric_no_merge && test_bit(ev->core.idx, evlist_used)) - continue; - if (!has_constraint && !evsel__has_leader(ev, current_leader)) { - /* - * Start of a new group, discard the whole match - * and start again. - */ - matched_events = 0; - memset(metric_events, 0, sizeof(struct evsel *) * idnum); - current_leader = evsel__leader(ev); - } - /* - * Check for duplicate events with the same name. For - * example, uncore_imc/cas_count_read/ will turn into 6 - * events per socket on skylakex. Only the first such - * event is placed in metric_events. If events aren't - * grouped then this also ensures that the same event in - * different sibling groups aren't both added to - * metric_events. - */ - metric_id = evsel__metric_id(ev); - if (contains_metric_id(metric_events, matched_events, metric_id)) - continue; - /* Does this event belong to the parse context? */ - if (hashmap__find(pctx->ids, metric_id, (void **)&val_ptr)) - metric_events[matched_events++] = ev; + metric_events = calloc(sizeof(void *), ids_size + 1); + if (!metric_events) + return -ENOMEM; + + matched_events = 0; + evlist__for_each_entry(metric_evlist, ev) { + struct expr_id_data *val_ptr; - if (matched_events == events_to_match) - break; - } - } else { /* - * There are no events to match, but we need to associate the - * metric with an event for printing. A duration_time event was - * parsed for this. + * Check for duplicate events with the same name. For + * example, uncore_imc/cas_count_read/ will turn into 6 + * events per socket on skylakex. Only the first such + * event is placed in metric_events. */ - idnum = 1; - events_to_match = 0; - } - if (events_to_match != idnum) { - /* Add the first duration_time. */ - ev = evlist__find_evsel_by_str(perf_evlist, "duration_time"); - if (ev) + metric_id = evsel__metric_id(ev); + if (contains_metric_id(metric_events, matched_events, metric_id)) + continue; + /* + * Does this event belong to the parse context? For + * combined or shared groups, this metric may not care + * about this event. + */ + if (hashmap__find(ids, metric_id, (void **)&val_ptr)) { metric_events[matched_events++] = ev; - } - if (matched_events != idnum) { - /* Not a whole match */ - return NULL; + if (matched_events >= ids_size) + break; + } } - - metric_events[idnum] = NULL; - - for (i = 0; i < idnum; i++) { + if (matched_events < ids_size) { + free(metric_events); + return -EINVAL; + } + for (i = 0; i < ids_size; i++) { ev = metric_events[i]; - /* Don't free the used events. */ - set_bit(ev->core.idx, evlist_used); + ev->collect_stat = true; + /* - * The metric leader points to the identically named event in - * metric_events. + * The metric leader points to the identically named + * event in metric_events. */ ev->metric_leader = ev; /* - * Mark two events with identical names in the same group (or - * globally) as being in use as uncore events may be duplicated - * for each pmu. Set the metric leader of such events to be the - * event that appears in metric_events. + * Mark two events with identical names in the same + * group (or globally) as being in use as uncore events + * may be duplicated for each pmu. Set the metric leader + * of such events to be the event that appears in + * metric_events. */ metric_id = evsel__metric_id(ev); - evlist__for_each_entry_continue(perf_evlist, ev) { - /* - * If events are grouped then the search can terminate - * when then group is left. - */ - if (!has_constraint && - ev->core.leader != metric_events[i]->core.leader && - evsel_same_pmu_or_none(evsel__leader(ev), evsel__leader(metric_events[i]))) - break; - if (!strcmp(evsel__metric_id(metric_events[i]), metric_id)) { - set_bit(ev->core.idx, evlist_used); + evlist__for_each_entry_continue(metric_evlist, ev) { + if (!strcmp(evsel__metric_id(metric_events[i]), metric_id)) ev->metric_leader = metric_events[i]; - } - } - } - - return metric_events[0]; -} - -static int metricgroup__setup_events(struct list_head *groups, - bool metric_no_merge, - struct evlist *perf_evlist, - struct rblist *metric_events_list) -{ - struct metric_event *me; - struct metric_expr *expr; - int i = 0; - int ret = 0; - struct metric *m; - struct evsel *evsel, *tmp; - unsigned long *evlist_used; - - evlist_used = bitmap_zalloc(perf_evlist->core.nr_entries); - if (!evlist_used) - return -ENOMEM; - - list_for_each_entry (m, groups, nd) { - struct evsel **metric_events; - const size_t ids_size = hashmap__size(m->pctx->ids); - - metric_events = calloc(sizeof(void *), - ids_size == 0 ? 2 : ids_size + 1); - if (!metric_events) { - ret = -ENOMEM; - break; - } - evsel = find_evsel_group(perf_evlist, m->pctx, - metric_no_merge, - m->has_constraint, metric_events, - evlist_used); - if (!evsel) { - pr_debug("Cannot resolve %s: %s\n", - m->metric_name, m->metric_expr); - free(metric_events); - continue; - } - for (i = 0; metric_events[i]; i++) - metric_events[i]->collect_stat = true; - me = metricgroup__lookup(metric_events_list, evsel, true); - if (!me) { - ret = -ENOMEM; - free(metric_events); - break; - } - expr = malloc(sizeof(struct metric_expr)); - if (!expr) { - ret = -ENOMEM; - free(metric_events); - break; - } - - expr->metric_refs = m->metric_refs; - m->metric_refs = NULL; - expr->metric_expr = m->metric_expr; - expr->metric_name = m->metric_name; - expr->metric_unit = m->metric_unit; - expr->metric_events = metric_events; - expr->runtime = m->pctx->runtime; - list_add(&expr->nd, &me->head); - } - - evlist__for_each_entry_safe(perf_evlist, tmp, evsel) { - if (!test_bit(evsel->core.idx, evlist_used)) { - evlist__remove(perf_evlist, evsel); - evsel__delete(evsel); } } - bitmap_free(evlist_used); - - return ret; + *out_metric_events = metric_events; + return 0; } static bool match_metric(const char *n, const char *list) @@ -1222,20 +1089,15 @@ static int metric_list_cmp(void *priv __maybe_unused, const struct list_head *l, * @metric_no_group: Should events written to events be grouped "{}" or * global. Grouping is the default but due to multiplexing the * user may override. - * @events: an out argument string of events that need to be parsed and - * associated with the metric. For example, the metric "IPC" would - * create an events string like "{instructions,cycles}:W". * @metric_list: The list that the metric or metric group are added to. * @map: The map that is searched for metrics, most commonly the table for the * architecture perf is running upon. */ static int metricgroup__add_metric(const char *metric_name, bool metric_no_group, - struct strbuf *events, struct list_head *metric_list, const struct pmu_events_map *map) { const struct pmu_event *pe; - struct metric *m; LIST_HEAD(list); int i, ret; bool has_match = false; @@ -1269,27 +1131,8 @@ static int metricgroup__add_metric(const char *metric_name, bool metric_no_group pmu_for_each_sys_event(metricgroup__sys_event_iter, &data); } /* End of pmu events. */ - if (!has_match) { + if (!has_match) ret = -EINVAL; - goto out; - } - - /* Sort metrics from largest to smallest. */ - list_sort(NULL, &list, metric_list_cmp); - - list_for_each_entry(m, &list, nd) { - if (events->len > 0) { - ret = strbuf_addf(events, ","); - if (ret) - break; - } - - ret = metricgroup__build_event_string(events, - m->pctx, - m->has_constraint); - if (ret) - break; - } out: /* @@ -1297,9 +1140,6 @@ out: * even if it's failed */ list_splice(&list, metric_list); - - /* Sort metrics from largest to smallest. */ - list_sort(NULL, metric_list, metric_list_cmp); return ret; } @@ -1312,15 +1152,11 @@ out: * @metric_no_group: Should events written to events be grouped "{}" or * global. Grouping is the default but due to multiplexing the * user may override. - * @events: an out argument string of events that need to be parsed and - * associated with the metric. For example, the metric "IPC" would - * create an events string like "{instructions,cycles}:W". * @metric_list: The list that metrics are added to. * @map: The map that is searched for metrics, most commonly the table for the * architecture perf is running upon. */ static int metricgroup__add_metric_list(const char *list, bool metric_no_group, - struct strbuf *events, struct list_head *metric_list, const struct pmu_events_map *map) { @@ -1332,13 +1168,9 @@ static int metricgroup__add_metric_list(const char *list, bool metric_no_group, return -ENOMEM; llist = nlist; - ret = strbuf_init(events, 100); - if (ret) - return ret; - while ((p = strsep(&llist, ",")) != NULL) { - ret = metricgroup__add_metric(p, metric_no_group, events, - metric_list, map); + ret = metricgroup__add_metric(p, metric_no_group, metric_list, + map); if (ret == -EINVAL) fprintf(stderr, "Cannot find metric or group `%s'\n", p); @@ -1372,41 +1204,220 @@ static void metricgroup__free_metrics(struct list_head *metric_list) } } +/** + * build_combined_expr_ctx - Make an expr_parse_ctx with all has_constraint + * metric IDs, as the IDs are held in a set, + * duplicates will be removed. + * @metric_list: List to take metrics from. + * @combined: Out argument for result. + */ +static int build_combined_expr_ctx(const struct list_head *metric_list, + struct expr_parse_ctx **combined) +{ + struct hashmap_entry *cur; + size_t bkt; + struct metric *m; + char *dup; + int ret; + + *combined = expr__ctx_new(); + if (!*combined) + return -ENOMEM; + + list_for_each_entry(m, metric_list, nd) { + if (m->has_constraint) { + hashmap__for_each_entry(m->pctx->ids, cur, bkt) { + dup = strdup(cur->key); + if (!dup) { + ret = -ENOMEM; + goto err_out; + } + ret = expr__add_id(*combined, dup); + if (ret) + goto err_out; + } + } + } + return 0; +err_out: + expr__ctx_free(*combined); + *combined = NULL; + return ret; +} + +/** + * parse_ids - Build the event string for the ids and parse them creating an + * evlist. The encoded metric_ids are decoded. + * @fake_pmu: used when testing metrics not supported by the current CPU. + * @ids: the event identifiers parsed from a metric. + * @has_constraint: false if events should be placed in a weak group. + * @out_evlist: the created list of events. + */ +static int parse_ids(struct perf_pmu *fake_pmu, struct expr_parse_ctx *ids, + bool has_constraint, struct evlist **out_evlist) +{ + struct parse_events_error parse_error; + struct evlist *parsed_evlist; + struct strbuf events = STRBUF_INIT; + int ret; + + *out_evlist = NULL; + if (hashmap__size(ids->ids) == 0) { + char *tmp; + /* + * No ids/events in the expression parsing context. Events may + * have been removed because of constant evaluation, e.g.: + * event1 if #smt_on else 0 + * Add a duration_time event to avoid a parse error on an empty + * string. + */ + tmp = strdup("duration_time"); + if (!tmp) + return -ENOMEM; + + ids__insert(ids->ids, tmp); + } + ret = metricgroup__build_event_string(&events, ids, has_constraint); + if (ret) + return ret; + + parsed_evlist = evlist__new(); + if (!parsed_evlist) { + ret = -ENOMEM; + goto err_out; + } + pr_debug("Parsing metric events '%s'\n", events.buf); + bzero(&parse_error, sizeof(parse_error)); + ret = __parse_events(parsed_evlist, events.buf, &parse_error, fake_pmu); + if (ret) { + parse_events_print_error(&parse_error, events.buf); + goto err_out; + } + ret = decode_all_metric_ids(parsed_evlist); + if (ret) + goto err_out; + + *out_evlist = parsed_evlist; + parsed_evlist = NULL; +err_out: + evlist__delete(parsed_evlist); + strbuf_release(&events); + return ret; +} + static int parse_groups(struct evlist *perf_evlist, const char *str, bool metric_no_group, bool metric_no_merge, struct perf_pmu *fake_pmu, - struct rblist *metric_events, + struct rblist *metric_events_list, const struct pmu_events_map *map) { - struct parse_events_error parse_error; - struct strbuf extra_events; + struct evlist *combined_evlist = NULL; LIST_HEAD(metric_list); + struct metric *m; int ret; - if (metric_events->nr_entries == 0) - metricgroup__rblist_init(metric_events); + if (metric_events_list->nr_entries == 0) + metricgroup__rblist_init(metric_events_list); ret = metricgroup__add_metric_list(str, metric_no_group, - &extra_events, &metric_list, map); + &metric_list, map); if (ret) goto out; - pr_debug("adding %s\n", extra_events.buf); - bzero(&parse_error, sizeof(parse_error)); - ret = __parse_events(perf_evlist, extra_events.len > 0 ? extra_events.buf : "duration_time", - &parse_error, fake_pmu); - if (ret) { - parse_events_print_error(&parse_error, extra_events.buf); - goto out; + + /* Sort metrics from largest to smallest. */ + list_sort(NULL, &metric_list, metric_list_cmp); + + if (!metric_no_merge) { + struct expr_parse_ctx *combined = NULL; + + ret = build_combined_expr_ctx(&metric_list, &combined); + + if (!ret && combined && hashmap__size(combined->ids)) { + ret = parse_ids(fake_pmu, combined, /*has_constraint=*/true, + &combined_evlist); + } + if (combined) + expr__ctx_free(combined); + + if (ret) + goto out; + } + + list_for_each_entry(m, &metric_list, nd) { + struct metric_event *me; + struct evsel **metric_events; + struct evlist *metric_evlist = NULL; + struct metric *n; + struct metric_expr *expr; + + if (combined_evlist && m->has_constraint) { + metric_evlist = combined_evlist; + } else if (!metric_no_merge) { + /* + * See if the IDs for this metric are a subset of an + * earlier metric. + */ + list_for_each_entry(n, &metric_list, nd) { + if (m == n) + break; + + if (n->evlist == NULL) + continue; + + if (expr__subset_of_ids(n->pctx, m->pctx)) { + pr_debug("Events in '%s' fully contained within '%s'\n", + m->metric_name, n->metric_name); + metric_evlist = n->evlist; + break; + } + + } + } + if (!metric_evlist) { + ret = parse_ids(fake_pmu, m->pctx, m->has_constraint, + &m->evlist); + if (ret) + goto out; + + metric_evlist = m->evlist; + } + ret = setup_metric_events(m->pctx->ids, metric_evlist, &metric_events); + if (ret) { + pr_debug("Cannot resolve IDs for %s: %s\n", + m->metric_name, m->metric_expr); + goto out; + } + + me = metricgroup__lookup(metric_events_list, metric_events[0], true); + + expr = malloc(sizeof(struct metric_expr)); + if (!expr) { + ret = -ENOMEM; + free(metric_events); + goto out; + } + + expr->metric_refs = m->metric_refs; + m->metric_refs = NULL; + expr->metric_expr = m->metric_expr; + expr->metric_name = m->metric_name; + expr->metric_unit = m->metric_unit; + expr->metric_events = metric_events; + expr->runtime = m->pctx->runtime; + list_add(&expr->nd, &me->head); + } + + + if (combined_evlist) + evlist__splice_list_tail(perf_evlist, &combined_evlist->core.entries); + + list_for_each_entry(m, &metric_list, nd) { + if (m->evlist) + evlist__splice_list_tail(perf_evlist, &m->evlist->core.entries); } - ret = decode_all_metric_ids(perf_evlist); - if (ret) - goto out; - ret = metricgroup__setup_events(&metric_list, metric_no_merge, - perf_evlist, metric_events); out: metricgroup__free_metrics(&metric_list); - strbuf_release(&extra_events); return ret; } -- cgit v1.2.3 From e068c25671accfe762524f01ee24dff750849623 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Fri, 15 Oct 2021 10:21:30 -0700 Subject: perf metric: Switch fprintf() to pr_err() There's no clear reason for the inconsistency that stems from the initial commit. Signed-off-by: Ian Rogers Acked-by: Andi Kleen Cc: Adrian Hunter Cc: Alexander Antonov Cc: Alexander Shishkin Cc: Andrew Kilroy Cc: Andrew Morton Cc: Changbin Du Cc: Denys Zagorui Cc: Fabian Hemmer Cc: Felix Fietkau Cc: Heiko Carstens Cc: Ingo Molnar Cc: Jacob Keller Cc: Jiapeng Chong Cc: Jin Yao Cc: Jiri Olsa Cc: Joakim Zhang Cc: John Garry Cc: Kajol Jain Cc: Kan Liang Cc: Kees Kook Cc: Mark Rutland Cc: Namhyung Kim Cc: Nicholas Fraser Cc: Nick Desaulniers Cc: Paul Clarke Cc: Peter Zijlstra Cc: Riccardo Mancini Cc: Sami Tolvanen Cc: ShihCheng Tu Cc: Song Liu Cc: Stephane Eranian Cc: Sumanth Korikkar Cc: Thomas Richter Cc: Wan Jiabing Cc: Zhen Lei Link: https://lore.kernel.org/r/20211015172132.1162559-20-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/metricgroup.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/perf/util/metricgroup.c b/tools/perf/util/metricgroup.c index 988f9e95dded..b6ba1317973e 100644 --- a/tools/perf/util/metricgroup.c +++ b/tools/perf/util/metricgroup.c @@ -1172,7 +1172,7 @@ static int metricgroup__add_metric_list(const char *list, bool metric_no_group, ret = metricgroup__add_metric(p, metric_no_group, metric_list, map); if (ret == -EINVAL) - fprintf(stderr, "Cannot find metric or group `%s'\n", p); + pr_err("Cannot find metric or group `%s'\n", p); if (ret) break; -- cgit v1.2.3 From eabd4523395e4a8f2b049165642801f2ab8ff893 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Fri, 15 Oct 2021 10:21:31 -0700 Subject: perf parse-events: Identify broken modifiers Previously the broken modifier causes a usage message to printed but nothing else. After: $ perf stat -e 'cycles:kk' -a sleep 2 event syntax error: 'cycles:kk' \___ Bad modifier Run 'perf list' for a list of valid events Usage: perf stat [] [] -e, --event event selector. use 'perf list' to list available events $ perf stat -e '{instructions,cycles}:kk' -a sleep 2 event syntax error: '..ns,cycles}:kk' \___ Bad modifier Run 'perf list' for a list of valid events Usage: perf stat [] [] -e, --event event selector. use 'perf list' to list available events Signed-off-by: Ian Rogers Acked-by: Andi Kleen Cc: Adrian Hunter Cc: Alexander Antonov Cc: Alexander Shishkin Cc: Andrew Kilroy Cc: Andrew Morton Cc: Changbin Du Cc: Denys Zagorui Cc: Fabian Hemmer Cc: Felix Fietkau Cc: Heiko Carstens Cc: Ingo Molnar Cc: Jacob Keller Cc: Jiapeng Chong Cc: Jin Yao Cc: Jiri Olsa Cc: Joakim Zhang Cc: John Garry Cc: Kajol Jain Cc: Kan Liang Cc: Kees Kook Cc: Mark Rutland Cc: Namhyung Kim Cc: Nicholas Fraser Cc: Nick Desaulniers Cc: Paul Clarke Cc: Peter Zijlstra Cc: Riccardo Mancini Cc: Sami Tolvanen Cc: ShihCheng Tu Cc: Song Liu Cc: Stephane Eranian Cc: Sumanth Korikkar Cc: Thomas Richter Cc: Wan Jiabing Cc: Zhen Lei Link: https://lore.kernel.org/r/20211015172132.1162559-21-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/parse-events.y | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'tools') diff --git a/tools/perf/util/parse-events.y b/tools/perf/util/parse-events.y index 17c8c66f3f51..2d60f3cbe42b 100644 --- a/tools/perf/util/parse-events.y +++ b/tools/perf/util/parse-events.y @@ -183,6 +183,11 @@ group_def ':' PE_MODIFIER_EVENT err = parse_events__modifier_group(list, $3); free($3); if (err) { + struct parse_events_state *parse_state = _parse_state; + struct parse_events_error *error = parse_state->error; + + parse_events__handle_error(error, @3.first_column, + strdup("Bad modifier"), NULL); free_list_evsel(list); YYABORT; } @@ -240,6 +245,11 @@ event_name PE_MODIFIER_EVENT err = parse_events__modifier_event(list, $2, false); free($2); if (err) { + struct parse_events_state *parse_state = _parse_state; + struct parse_events_error *error = parse_state->error; + + parse_events__handle_error(error, @2.first_column, + strdup("Bad modifier"), NULL); free_list_evsel(list); YYABORT; } -- cgit v1.2.3 From b85a4d61d30226080ff6fec9ea4096d369fd6852 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Fri, 15 Oct 2021 10:21:32 -0700 Subject: perf metric: Allow modifiers on metrics By allowing modifiers on metrics we can, for example, gather the same metric for kernel and user mode. On a SkylakeX with TopDownL1 this gives: $ perf stat -M TopDownL1:u,TopDownL1:k -a sleep 2 Performance counter stats for 'system wide': 849,855,577 uops_issued.any:k # 0.06 Bad_Speculation:k # 0.51 Backend_Bound:k (16.71%) 1,995,257,996 cycles:k # 7981031984.00 SLOTS:k # 0.35 Frontend_Bound:k # 0.08 Retiring:k (16.71%) 2,791,940,753 idq_uops_not_delivered.core:k (16.71%) 641,961,928 uops_retired.retire_slots:k (16.71%) 72,239,337 int_misc.recovery_cycles:k (16.71%) 2,294,413,647 uops_issued.any:u # 0.04 Bad_Speculation:u # 0.39 Backend_Bound:u (16.78%) 1,333,248,940 cycles:u # 5332995760.00 SLOTS:u # 0.16 Frontend_Bound:u # 0.40 Retiring:u (16.78%) 858,517,081 idq_uops_not_delivered.core:u (16.78%) 2,153,789,582 uops_retired.retire_slots:u (16.78%) 19,373,627 int_misc.recovery_cycles:u (16.78%) 31,503,661 cpu_clk_unhalted.one_thread_active:k # 0.18 CoreIPC_SMT:k (16.73%) 315,454,104 inst_retired.any:k # 315454104.00 Instructions:k (16.73%) 42,533,729 cpu_clk_unhalted.ref_xclk:k (16.73%) 2,043,119,037 cpu_clk_unhalted.thread:k (16.73%) 28,843,803 cpu_clk_unhalted.one_thread_active:u # 1.55 CoreIPC_SMT:u (16.60%) 2,153,353,869 inst_retired.any:u # 2153353869.00 Instructions:u (16.60%) 28,844,743 cpu_clk_unhalted.ref_xclk:u (16.60%) 1,387,544,378 cpu_clk_unhalted.thread:u (16.60%) 308,031,603 inst_retired.any:k # 0.15 CoreIPC:k (33.19%) 2,036,774,753 cycles:k (33.19%) 1,994,344,281 inst_retired.any:u # 1.59 CoreIPC:u (33.18%) 1,251,538,227 cycles:u (33.18%) 2.000342948 seconds time elapsed Modifiers are naively copy and pasted on to events, this can yield errors like: $ perf stat -M Kernel_Utilization:k -a sleep 2 event syntax error: '..d.thread:k/kk,cpu_clk_unhalted.thread/metric-id=cpu_clk_unhalted.thread/k..' \___ Bad modifier Usage: perf stat [] [] -M, --metrics monitor specified metrics or metric groups (separated by ,) When modifiers are present with constraints, from --metric-no-group or the NMI watchdog, they are no longer placed in the same set - which may miss deduplicating events. Signed-off-by: Ian Rogers Acked-by: Andi Kleen Cc: Adrian Hunter Cc: Alexander Antonov Cc: Alexander Shishkin Cc: Andrew Kilroy Cc: Andrew Morton Cc: Changbin Du Cc: Denys Zagorui Cc: Fabian Hemmer Cc: Felix Fietkau Cc: Heiko Carstens Cc: Ingo Molnar Cc: Jacob Keller Cc: Jiapeng Chong Cc: Jin Yao Cc: Jiri Olsa Cc: Joakim Zhang Cc: John Garry Cc: Kajol Jain Cc: Kan Liang Cc: Kees Kook Cc: Mark Rutland Cc: Namhyung Kim Cc: Nicholas Fraser Cc: Nick Desaulniers Cc: Paul Clarke Cc: Peter Zijlstra Cc: Riccardo Mancini Cc: Sami Tolvanen Cc: ShihCheng Tu Cc: Song Liu Cc: Stephane Eranian Cc: Sumanth Korikkar Cc: Thomas Richter Cc: Wan Jiabing Cc: Zhen Lei Link: https://lore.kernel.org/r/20211015172132.1162559-22-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/metricgroup.c | 125 +++++++++++++++++++++++++++++++++--------- 1 file changed, 98 insertions(+), 27 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/metricgroup.c b/tools/perf/util/metricgroup.c index b6ba1317973e..c34a3683e719 100644 --- a/tools/perf/util/metricgroup.c +++ b/tools/perf/util/metricgroup.c @@ -85,6 +85,7 @@ static void metric_event_delete(struct rblist *rblist __maybe_unused, struct metric_expr *expr, *tmp; list_for_each_entry_safe(expr, tmp, &me->head, nd) { + free((char *)expr->metric_name); free(expr->metric_refs); free(expr->metric_events); free(expr); @@ -130,6 +131,8 @@ struct metric { struct expr_parse_ctx *pctx; /** The name of the metric such as "IPC". */ const char *metric_name; + /** Modifier on the metric such as "u" or NULL for none. */ + const char *modifier; /** The expression to parse, for example, "instructions/cycles". */ const char *metric_expr; /** @@ -186,6 +189,7 @@ static bool metricgroup__has_constraint(const struct pmu_event *pe) } static struct metric *metric__new(const struct pmu_event *pe, + const char *modifier, bool metric_no_group, int runtime) { @@ -202,6 +206,12 @@ static struct metric *metric__new(const struct pmu_event *pe, } m->metric_name = pe->metric_name; + m->modifier = modifier ? strdup(modifier) : NULL; + if (modifier && !m->modifier) { + free(m); + expr__ctx_free(m->pctx); + return NULL; + } m->metric_expr = pe->metric_expr; m->metric_unit = pe->unit; m->pctx->runtime = runtime; @@ -216,6 +226,7 @@ static void metric__free(struct metric *m) { free(m->metric_refs); expr__ctx_free(m->pctx); + free((char *)m->modifier); free(m); } @@ -645,7 +656,7 @@ static int decode_metric_id(struct strbuf *sb, const char *x) return 0; } -static int decode_all_metric_ids(struct evlist *perf_evlist) +static int decode_all_metric_ids(struct evlist *perf_evlist, const char *modifier) { struct evsel *ev; struct strbuf sb = STRBUF_INIT; @@ -675,10 +686,24 @@ static int decode_all_metric_ids(struct evlist *perf_evlist) * give a more friendly display version. */ if (strstr(ev->name, "metric-id=")) { + bool has_slash = false; + free(ev->name); - for (cur = strchr(sb.buf, '@') ; cur; cur = strchr(++cur, '@')) + for (cur = strchr(sb.buf, '@') ; cur; cur = strchr(++cur, '@')) { *cur = '/'; + has_slash = true; + } + if (modifier) { + if (!has_slash && !strchr(sb.buf, ':')) { + ret = strbuf_addch(&sb, ':'); + if (ret) + break; + } + ret = strbuf_addstr(&sb, modifier); + if (ret) + break; + } ev->name = strdup(sb.buf); if (!ev->name) { ret = -ENOMEM; @@ -692,6 +717,7 @@ static int decode_all_metric_ids(struct evlist *perf_evlist) static int metricgroup__build_event_string(struct strbuf *events, const struct expr_parse_ctx *ctx, + const char *modifier, bool has_constraint) { struct hashmap_entry *cur; @@ -765,6 +791,10 @@ static int metricgroup__build_event_string(struct strbuf *events, ret = strbuf_addstr(events, sep + 1); RETURN_IF_NON_ZERO(ret); } + if (modifier) { + ret = strbuf_addstr(events, modifier); + RETURN_IF_NON_ZERO(ret); + } } if (has_duration) { if (no_group) { @@ -798,6 +828,7 @@ struct visited_metric { struct metricgroup_add_iter_data { struct list_head *metric_list; const char *metric_name; + const char *modifier; int *ret; bool *has_match; bool metric_no_group; @@ -808,6 +839,7 @@ struct metricgroup_add_iter_data { static int add_metric(struct list_head *metric_list, const struct pmu_event *pe, + const char *modifier, bool metric_no_group, struct metric *root_metric, const struct visited_metric *visited, @@ -817,6 +849,7 @@ static int add_metric(struct list_head *metric_list, * resolve_metric - Locate metrics within the root metric and recursively add * references to them. * @metric_list: The list the metric is added to. + * @modifier: if non-null event modifiers like "u". * @metric_no_group: Should events written to events be grouped "{}" or * global. Grouping is the default but due to multiplexing the * user may override. @@ -829,6 +862,7 @@ static int add_metric(struct list_head *metric_list, * architecture perf is running upon. */ static int resolve_metric(struct list_head *metric_list, + const char *modifier, bool metric_no_group, struct metric *root_metric, const struct visited_metric *visited, @@ -876,7 +910,7 @@ static int resolve_metric(struct list_head *metric_list, * context. */ for (i = 0; i < pending_cnt; i++) { - ret = add_metric(metric_list, pending[i].pe, metric_no_group, + ret = add_metric(metric_list, pending[i].pe, modifier, metric_no_group, root_metric, visited, map); if (ret) break; @@ -890,6 +924,7 @@ static int resolve_metric(struct list_head *metric_list, * __add_metric - Add a metric to metric_list. * @metric_list: The list the metric is added to. * @pe: The pmu_event containing the metric to be added. + * @modifier: if non-null event modifiers like "u". * @metric_no_group: Should events written to events be grouped "{}" or * global. Grouping is the default but due to multiplexing the * user may override. @@ -904,6 +939,7 @@ static int resolve_metric(struct list_head *metric_list, */ static int __add_metric(struct list_head *metric_list, const struct pmu_event *pe, + const char *modifier, bool metric_no_group, int runtime, struct metric *root_metric, @@ -930,7 +966,7 @@ static int __add_metric(struct list_head *metric_list, * This metric is the root of a tree and may reference other * metrics that are added recursively. */ - root_metric = metric__new(pe, metric_no_group, runtime); + root_metric = metric__new(pe, modifier, metric_no_group, runtime); if (!root_metric) return -ENOMEM; @@ -979,7 +1015,7 @@ static int __add_metric(struct list_head *metric_list, ret = -EINVAL; } else { /* Resolve referenced metrics. */ - ret = resolve_metric(metric_list, metric_no_group, root_metric, + ret = resolve_metric(metric_list, modifier, metric_no_group, root_metric, &visited_node, map); } @@ -1021,6 +1057,7 @@ const struct pmu_event *metricgroup__find_metric(const char *metric, static int add_metric(struct list_head *metric_list, const struct pmu_event *pe, + const char *modifier, bool metric_no_group, struct metric *root_metric, const struct visited_metric *visited, @@ -1031,7 +1068,7 @@ static int add_metric(struct list_head *metric_list, pr_debug("metric expr %s for %s\n", pe->metric_expr, pe->metric_name); if (!strstr(pe->metric_expr, "?")) { - ret = __add_metric(metric_list, pe, metric_no_group, 0, + ret = __add_metric(metric_list, pe, modifier, metric_no_group, 0, root_metric, visited, map); } else { int j, count; @@ -1044,7 +1081,7 @@ static int add_metric(struct list_head *metric_list, */ for (j = 0; j < count && !ret; j++) - ret = __add_metric(metric_list, pe, metric_no_group, j, + ret = __add_metric(metric_list, pe, modifier, metric_no_group, j, root_metric, visited, map); } @@ -1060,7 +1097,7 @@ static int metricgroup__add_metric_sys_event_iter(const struct pmu_event *pe, if (!match_pe_metric(pe, d->metric_name)) return 0; - ret = add_metric(d->metric_list, pe, d->metric_no_group, + ret = add_metric(d->metric_list, pe, d->modifier, d->metric_no_group, d->root_metric, d->visited, d->map); if (ret) goto out; @@ -1086,6 +1123,7 @@ static int metric_list_cmp(void *priv __maybe_unused, const struct list_head *l, * @metric_name: The name of the metric or metric group. For example, "IPC" * could be the name of a metric and "TopDownL1" the name of a * metric group. + * @modifier: if non-null event modifiers like "u". * @metric_no_group: Should events written to events be grouped "{}" or * global. Grouping is the default but due to multiplexing the * user may override. @@ -1093,7 +1131,8 @@ static int metric_list_cmp(void *priv __maybe_unused, const struct list_head *l, * @map: The map that is searched for metrics, most commonly the table for the * architecture perf is running upon. */ -static int metricgroup__add_metric(const char *metric_name, bool metric_no_group, +static int metricgroup__add_metric(const char *metric_name, const char *modifier, + bool metric_no_group, struct list_head *metric_list, const struct pmu_events_map *map) { @@ -1108,7 +1147,7 @@ static int metricgroup__add_metric(const char *metric_name, bool metric_no_group */ map_for_each_metric(pe, i, map, metric_name) { has_match = true; - ret = add_metric(&list, pe, metric_no_group, + ret = add_metric(&list, pe, modifier, metric_no_group, /*root_metric=*/NULL, /*visited_metrics=*/NULL, map); if (ret) @@ -1121,6 +1160,7 @@ static int metricgroup__add_metric(const char *metric_name, bool metric_no_group .data = (void *) &(struct metricgroup_add_iter_data) { .metric_list = &list, .metric_name = metric_name, + .modifier = modifier, .metric_no_group = metric_no_group, .has_match = &has_match, .ret = &ret, @@ -1160,26 +1200,31 @@ static int metricgroup__add_metric_list(const char *list, bool metric_no_group, struct list_head *metric_list, const struct pmu_events_map *map) { - char *llist, *nlist, *p; + char *list_itr, *list_copy, *metric_name, *modifier; int ret, count = 0; - nlist = strdup(list); - if (!nlist) + list_copy = strdup(list); + if (!list_copy) return -ENOMEM; - llist = nlist; + list_itr = list_copy; - while ((p = strsep(&llist, ",")) != NULL) { - ret = metricgroup__add_metric(p, metric_no_group, metric_list, + while ((metric_name = strsep(&list_itr, ",")) != NULL) { + modifier = strchr(metric_name, ':'); + if (modifier) + *modifier++ = '\0'; + + ret = metricgroup__add_metric(metric_name, modifier, + metric_no_group, metric_list, map); if (ret == -EINVAL) - pr_err("Cannot find metric or group `%s'\n", p); + pr_err("Cannot find metric or group `%s'\n", metric_name); if (ret) break; count++; } - free(nlist); + free(list_copy); if (!ret) { /* @@ -1225,7 +1270,7 @@ static int build_combined_expr_ctx(const struct list_head *metric_list, return -ENOMEM; list_for_each_entry(m, metric_list, nd) { - if (m->has_constraint) { + if (m->has_constraint && !m->modifier) { hashmap__for_each_entry(m->pctx->ids, cur, bkt) { dup = strdup(cur->key); if (!dup) { @@ -1250,11 +1295,12 @@ err_out: * evlist. The encoded metric_ids are decoded. * @fake_pmu: used when testing metrics not supported by the current CPU. * @ids: the event identifiers parsed from a metric. + * @modifier: any modifiers added to the events. * @has_constraint: false if events should be placed in a weak group. * @out_evlist: the created list of events. */ static int parse_ids(struct perf_pmu *fake_pmu, struct expr_parse_ctx *ids, - bool has_constraint, struct evlist **out_evlist) + const char *modifier, bool has_constraint, struct evlist **out_evlist) { struct parse_events_error parse_error; struct evlist *parsed_evlist; @@ -1277,7 +1323,8 @@ static int parse_ids(struct perf_pmu *fake_pmu, struct expr_parse_ctx *ids, ids__insert(ids->ids, tmp); } - ret = metricgroup__build_event_string(&events, ids, has_constraint); + ret = metricgroup__build_event_string(&events, ids, modifier, + has_constraint); if (ret) return ret; @@ -1293,7 +1340,7 @@ static int parse_ids(struct perf_pmu *fake_pmu, struct expr_parse_ctx *ids, parse_events_print_error(&parse_error, events.buf); goto err_out; } - ret = decode_all_metric_ids(parsed_evlist); + ret = decode_all_metric_ids(parsed_evlist, modifier); if (ret) goto err_out; @@ -1333,7 +1380,8 @@ static int parse_groups(struct evlist *perf_evlist, const char *str, ret = build_combined_expr_ctx(&metric_list, &combined); if (!ret && combined && hashmap__size(combined->ids)) { - ret = parse_ids(fake_pmu, combined, /*has_constraint=*/true, + ret = parse_ids(fake_pmu, combined, /*modifier=*/NULL, + /*has_constraint=*/true, &combined_evlist); } if (combined) @@ -1364,6 +1412,12 @@ static int parse_groups(struct evlist *perf_evlist, const char *str, if (n->evlist == NULL) continue; + if ((!m->modifier && n->modifier) || + (m->modifier && !n->modifier) || + (m->modifier && n->modifier && + strcmp(m->modifier, n->modifier))) + continue; + if (expr__subset_of_ids(n->pctx, m->pctx)) { pr_debug("Events in '%s' fully contained within '%s'\n", m->metric_name, n->metric_name); @@ -1374,8 +1428,8 @@ static int parse_groups(struct evlist *perf_evlist, const char *str, } } if (!metric_evlist) { - ret = parse_ids(fake_pmu, m->pctx, m->has_constraint, - &m->evlist); + ret = parse_ids(fake_pmu, m->pctx, m->modifier, + m->has_constraint, &m->evlist); if (ret) goto out; @@ -1400,7 +1454,21 @@ static int parse_groups(struct evlist *perf_evlist, const char *str, expr->metric_refs = m->metric_refs; m->metric_refs = NULL; expr->metric_expr = m->metric_expr; - expr->metric_name = m->metric_name; + if (m->modifier) { + char *tmp; + + if (asprintf(&tmp, "%s:%s", m->metric_name, m->modifier) < 0) + expr->metric_name = NULL; + else + expr->metric_name = tmp; + } else + expr->metric_name = strdup(m->metric_name); + + if (!expr->metric_name) { + ret = -ENOMEM; + free(metric_events); + goto out; + } expr->metric_unit = m->metric_unit; expr->metric_events = metric_events; expr->runtime = m->pctx->runtime; @@ -1500,7 +1568,10 @@ int metricgroup__copy_metric_events(struct evlist *evlist, struct cgroup *cgrp, return -ENOMEM; new_expr->metric_expr = old_expr->metric_expr; - new_expr->metric_name = old_expr->metric_name; + new_expr->metric_name = strdup(old_expr->metric_name); + if (!new_expr->metric_name) + return -ENOMEM; + new_expr->metric_unit = old_expr->metric_unit; new_expr->runtime = old_expr->runtime; -- cgit v1.2.3 From 25bc4793dc89b845be8c1f1f37c00ee315363ce4 Mon Sep 17 00:00:00 2001 From: Andrew Kilroy Date: Wed, 6 Oct 2021 09:11:03 +0100 Subject: perf vendor events: Syntax corrections in Neoverse N1 json There are some syntactical mistakes in the json files for the Cortex A76 N1 (Neoverse N1). This was obstructing parsing from an external tool. This patch fixes the erroneous placement of commas causing the problems. Reviewed-by: John Garry Signed-off-by: Andrew Kilroy Cc: Alexander Shishkin Cc: Jiri Olsa Cc: Leo Yan Cc: Mark Rutland Cc: Mathieu Poirier Cc: Namhyung Kim Cc: Will Deacon Cc: linux-arm-kernel@lists.infradead.org Link: https://lore.kernel.org/r/20211006081106.8649-1-andrew.kilroy@arm.com Signed-off-by: Arnaldo Carvalho de Melo --- .../arch/arm64/arm/cortex-a76-n1/branch.json | 4 +-- .../arch/arm64/arm/cortex-a76-n1/bus.json | 12 ++++---- .../arch/arm64/arm/cortex-a76-n1/cache.json | 34 +++++++++++----------- .../arch/arm64/arm/cortex-a76-n1/exception.json | 4 +-- .../arch/arm64/arm/cortex-a76-n1/instruction.json | 18 ++++++------ .../arch/arm64/arm/cortex-a76-n1/memory.json | 2 +- .../arch/arm64/arm/cortex-a76-n1/other.json | 2 +- .../arch/arm64/arm/cortex-a76-n1/pipeline.json | 4 +-- 8 files changed, 40 insertions(+), 40 deletions(-) (limited to 'tools') diff --git a/tools/perf/pmu-events/arch/arm64/arm/cortex-a76-n1/branch.json b/tools/perf/pmu-events/arch/arm64/arm/cortex-a76-n1/branch.json index ec0dc92288ab..db68de188390 100644 --- a/tools/perf/pmu-events/arch/arm64/arm/cortex-a76-n1/branch.json +++ b/tools/perf/pmu-events/arch/arm64/arm/cortex-a76-n1/branch.json @@ -1,10 +1,10 @@ [ { "PublicDescription": "This event counts any predictable branch instruction which is mispredicted either due to dynamic misprediction or because the MMU is off and the branches are statically predicted not taken", - "ArchStdEvent": "BR_MIS_PRED", + "ArchStdEvent": "BR_MIS_PRED" }, { "PublicDescription": "This event counts all predictable branches.", - "ArchStdEvent": "BR_PRED", + "ArchStdEvent": "BR_PRED" } ] diff --git a/tools/perf/pmu-events/arch/arm64/arm/cortex-a76-n1/bus.json b/tools/perf/pmu-events/arch/arm64/arm/cortex-a76-n1/bus.json index 6263929efce2..e0875d3a685d 100644 --- a/tools/perf/pmu-events/arch/arm64/arm/cortex-a76-n1/bus.json +++ b/tools/perf/pmu-events/arch/arm64/arm/cortex-a76-n1/bus.json @@ -1,21 +1,21 @@ [ { - "PublicDescription": "The number of core clock cycles" + "PublicDescription": "The number of core clock cycles", "ArchStdEvent": "CPU_CYCLES", "BriefDescription": "The number of core clock cycles." }, { "PublicDescription": "This event counts for every beat of data transferred over the data channels between the core and the SCU. If both read and write data beats are transferred on a given cycle, this event is counted twice on that cycle. This event counts the sum of BUS_ACCESS_RD and BUS_ACCESS_WR.", - "ArchStdEvent": "BUS_ACCESS", + "ArchStdEvent": "BUS_ACCESS" }, { - "PublicDescription": "This event duplicates CPU_CYCLES." - "ArchStdEvent": "BUS_CYCLES", + "PublicDescription": "This event duplicates CPU_CYCLES.", + "ArchStdEvent": "BUS_CYCLES" }, { - "ArchStdEvent": "BUS_ACCESS_RD", + "ArchStdEvent": "BUS_ACCESS_RD" }, { - "ArchStdEvent": "BUS_ACCESS_WR", + "ArchStdEvent": "BUS_ACCESS_WR" } ] diff --git a/tools/perf/pmu-events/arch/arm64/arm/cortex-a76-n1/cache.json b/tools/perf/pmu-events/arch/arm64/arm/cortex-a76-n1/cache.json index cd67bb9df139..fc448c2d5ea4 100644 --- a/tools/perf/pmu-events/arch/arm64/arm/cortex-a76-n1/cache.json +++ b/tools/perf/pmu-events/arch/arm64/arm/cortex-a76-n1/cache.json @@ -1,47 +1,47 @@ [ { "PublicDescription": "This event counts any instruction fetch which misses in the cache.", - "ArchStdEvent": "L1I_CACHE_REFILL", + "ArchStdEvent": "L1I_CACHE_REFILL" }, { "PublicDescription": "This event counts any refill of the instruction L1 TLB from the L2 TLB. This includes refills that result in a translation fault.", - "ArchStdEvent": "L1I_TLB_REFILL", + "ArchStdEvent": "L1I_TLB_REFILL" }, { "PublicDescription": "This event counts any load or store operation or page table walk access which causes data to be read from outside the L1, including accesses which do not allocate into L1.", - "ArchStdEvent": "L1D_CACHE_REFILL", + "ArchStdEvent": "L1D_CACHE_REFILL" }, { "PublicDescription": "This event counts any load or store operation or page table walk access which looks up in the L1 data cache. In particular, any access which could count the L1D_CACHE_REFILL event causes this event to count.", - "ArchStdEvent": "L1D_CACHE", + "ArchStdEvent": "L1D_CACHE" }, { "PublicDescription": "This event counts any refill of the data L1 TLB from the L2 TLB. This includes refills that result in a translation fault.", - "ArchStdEvent": "L1D_TLB_REFILL", + "ArchStdEvent": "L1D_TLB_REFILL" }, - {, + { "PublicDescription": "Level 1 instruction cache access or Level 0 Macro-op cache access. This event counts any instruction fetch which accesses the L1 instruction cache or L0 Macro-op cache.", - "ArchStdEvent": "L1I_CACHE", + "ArchStdEvent": "L1I_CACHE" }, { "PublicDescription": "This event counts any write-back of data from the L1 data cache to L2 or L3. This counts both victim line evictions and snoops, including cache maintenance operations.", - "ArchStdEvent": "L1D_CACHE_WB", + "ArchStdEvent": "L1D_CACHE_WB" }, { "PublicDescription": "This event counts any transaction from L1 which looks up in the L2 cache, and any write-back from the L1 to the L2. Snoops from outside the core and cache maintenance operations are not counted.", - "ArchStdEvent": "L2D_CACHE", + "ArchStdEvent": "L2D_CACHE" }, { "PublicDescription": "L2 data cache refill. This event counts any cacheable transaction from L1 which causes data to be read from outside the core. L2 refills caused by stashes into L2 should not be counted", - "ArchStdEvent": "L2D_CACHE_REFILL", + "ArchStdEvent": "L2D_CACHE_REFILL" }, { "PublicDescription": "This event counts any write-back of data from the L2 cache to outside the core. This includes snoops to the L2 which return data, regardless of whether they cause an invalidation. Invalidations from the L2 which do not write data outside of the core and snoops which return data from the L1 are not counted", - "ArchStdEvent": "L2D_CACHE_WB", + "ArchStdEvent": "L2D_CACHE_WB" }, { "PublicDescription": "This event counts any full cache line write into the L2 cache which does not cause a linefill, including write-backs from L1 to L2 and full-line writes which do not allocate into L1.", - "ArchStdEvent": "L2D_CACHE_ALLOCATE", + "ArchStdEvent": "L2D_CACHE_ALLOCATE" }, { "PublicDescription": "This event counts any load or store operation which accesses the data L1 TLB. If both a load and a store are executed on a cycle, this event counts twice. This event counts regardless of whether the MMU is enabled.", @@ -75,21 +75,21 @@ }, { "PublicDescription": "This event counts on any access to the L2 TLB (caused by a refill of any of the L1 TLBs). This event does not count if the MMU is disabled.", - "ArchStdEvent": "L2D_TLB", + "ArchStdEvent": "L2D_TLB" }, { "PublicDescription": "This event counts on any data access which causes L2D_TLB_REFILL to count.", - "ArchStdEvent": "DTLB_WALK", + "ArchStdEvent": "DTLB_WALK" }, { "PublicDescription": "This event counts on any instruction access which causes L2D_TLB_REFILL to count.", - "ArchStdEvent": "ITLB_WALK", + "ArchStdEvent": "ITLB_WALK" }, { - "ArchStdEvent": "LL_CACHE_RD", + "ArchStdEvent": "LL_CACHE_RD" }, { - "ArchStdEvent": "LL_CACHE_MISS_RD", + "ArchStdEvent": "LL_CACHE_MISS_RD" }, { "ArchStdEvent": "L1D_CACHE_INVAL" diff --git a/tools/perf/pmu-events/arch/arm64/arm/cortex-a76-n1/exception.json b/tools/perf/pmu-events/arch/arm64/arm/cortex-a76-n1/exception.json index ea4631db41b5..ce942324ee60 100644 --- a/tools/perf/pmu-events/arch/arm64/arm/cortex-a76-n1/exception.json +++ b/tools/perf/pmu-events/arch/arm64/arm/cortex-a76-n1/exception.json @@ -1,10 +1,10 @@ [ { - "ArchStdEvent": "EXC_TAKEN", + "ArchStdEvent": "EXC_TAKEN" }, { "PublicDescription": "This event counts any correctable or uncorrectable memory error (ECC or parity) in the protected core RAMs", - "ArchStdEvent": "MEMORY_ERROR", + "ArchStdEvent": "MEMORY_ERROR" }, { "ArchStdEvent": "EXC_DABORT" diff --git a/tools/perf/pmu-events/arch/arm64/arm/cortex-a76-n1/instruction.json b/tools/perf/pmu-events/arch/arm64/arm/cortex-a76-n1/instruction.json index 8e59566cba8b..b0b439a36ae9 100644 --- a/tools/perf/pmu-events/arch/arm64/arm/cortex-a76-n1/instruction.json +++ b/tools/perf/pmu-events/arch/arm64/arm/cortex-a76-n1/instruction.json @@ -1,32 +1,32 @@ [ { - "ArchStdEvent": "SW_INCR", + "ArchStdEvent": "SW_INCR" }, { "PublicDescription": "This event counts all retired instructions, including those that fail their condition check.", - "ArchStdEvent": "INST_RETIRED", + "ArchStdEvent": "INST_RETIRED" }, { - "ArchStdEvent": "EXC_RETURN", + "ArchStdEvent": "EXC_RETURN" }, { "PublicDescription": "This event only counts writes to CONTEXTIDR in AArch32 state, and via the CONTEXTIDR_EL1 mnemonic in AArch64 state.", - "ArchStdEvent": "CID_WRITE_RETIRED", + "ArchStdEvent": "CID_WRITE_RETIRED" }, { - "ArchStdEvent": "INST_SPEC", + "ArchStdEvent": "INST_SPEC" }, { "PublicDescription": "This event only counts writes to TTBR0/TTBR1 in AArch32 state and TTBR0_EL1/TTBR1_EL1 in AArch64 state.", - "ArchStdEvent": "TTBR_WRITE_RETIRED", + "ArchStdEvent": "TTBR_WRITE_RETIRED" }, - {, + { "PublicDescription": "This event counts all branches, taken or not. This excludes exception entries, debug entries and CCFAIL branches.", - "ArchStdEvent": "BR_RETIRED", + "ArchStdEvent": "BR_RETIRED" }, { "PublicDescription": "This event counts any branch counted by BR_RETIRED which is not correctly predicted and causes a pipeline flush.", - "ArchStdEvent": "BR_MIS_PRED_RETIRED", + "ArchStdEvent": "BR_MIS_PRED_RETIRED" }, { "ArchStdEvent": "ASE_SPEC" diff --git a/tools/perf/pmu-events/arch/arm64/arm/cortex-a76-n1/memory.json b/tools/perf/pmu-events/arch/arm64/arm/cortex-a76-n1/memory.json index f06f399051c1..20a929e7728d 100644 --- a/tools/perf/pmu-events/arch/arm64/arm/cortex-a76-n1/memory.json +++ b/tools/perf/pmu-events/arch/arm64/arm/cortex-a76-n1/memory.json @@ -1,7 +1,7 @@ [ { "PublicDescription": "This event counts memory accesses due to load or store instructions. This event counts the sum of MEM_ACCESS_RD and MEM_ACCESS_WR.", - "ArchStdEvent": "MEM_ACCESS", + "ArchStdEvent": "MEM_ACCESS" }, { "ArchStdEvent": "MEM_ACCESS_RD" diff --git a/tools/perf/pmu-events/arch/arm64/arm/cortex-a76-n1/other.json b/tools/perf/pmu-events/arch/arm64/arm/cortex-a76-n1/other.json index c2ccbf6fbfa0..20d8365756c5 100644 --- a/tools/perf/pmu-events/arch/arm64/arm/cortex-a76-n1/other.json +++ b/tools/perf/pmu-events/arch/arm64/arm/cortex-a76-n1/other.json @@ -1,5 +1,5 @@ [ { - "ArchStdEvent": "REMOTE_ACCESS", + "ArchStdEvent": "REMOTE_ACCESS" } ] diff --git a/tools/perf/pmu-events/arch/arm64/arm/cortex-a76-n1/pipeline.json b/tools/perf/pmu-events/arch/arm64/arm/cortex-a76-n1/pipeline.json index d79f0aeaf7f1..b4e96551d51a 100644 --- a/tools/perf/pmu-events/arch/arm64/arm/cortex-a76-n1/pipeline.json +++ b/tools/perf/pmu-events/arch/arm64/arm/cortex-a76-n1/pipeline.json @@ -1,10 +1,10 @@ [ { "PublicDescription": "The counter counts on any cycle when there are no fetched instructions available to dispatch.", - "ArchStdEvent": "STALL_FRONTEND", + "ArchStdEvent": "STALL_FRONTEND" }, { "PublicDescription": "The counter counts on any cycle fetched instructions are not dispatched due to resource constraints.", - "ArchStdEvent": "STALL_BACKEND", + "ArchStdEvent": "STALL_BACKEND" } ] -- cgit v1.2.3 From e166fc328b100b6e8ac7272b45d77494f79aab5f Mon Sep 17 00:00:00 2001 From: Andrew Kilroy Date: Wed, 6 Oct 2021 09:11:04 +0100 Subject: perf vendor events arm64: Add new armv8 pmu events Add new armv8 common events for use by Arm Neoverse V1 cores in a later commit. These are defined in the ArmV8 architecture reference manual available from https://developer.arm.com/documentation/ddi0487/gb/?lang=en Reviewed-by: John Garry Signed-off-by: Andrew Kilroy Cc: Alexander Shishkin Cc: Jiri Olsa Cc: Leo Yan Cc: Mark Rutland Cc: Mathieu Poirier Cc: Namhyung Kim Cc: Will Deacon Cc: linux-arm-kernel@lists.infradead.org Link: https://lore.kernel.org/r/20211006081106.8649-2-andrew.kilroy@arm.com Signed-off-by: Arnaldo Carvalho de Melo --- .../arch/arm64/armv8-common-and-microarch.json | 72 ++++++++++++++++++++++ 1 file changed, 72 insertions(+) (limited to 'tools') diff --git a/tools/perf/pmu-events/arch/arm64/armv8-common-and-microarch.json b/tools/perf/pmu-events/arch/arm64/armv8-common-and-microarch.json index 913fb200ea52..423767510aff 100644 --- a/tools/perf/pmu-events/arch/arm64/armv8-common-and-microarch.json +++ b/tools/perf/pmu-events/arch/arm64/armv8-common-and-microarch.json @@ -257,6 +257,78 @@ "EventName": "LL_CACHE_MISS_RD", "BriefDescription": "Last level cache miss, read" }, + { + "PublicDescription": "Level 1 data cache long-latency read miss. The counter counts each memory read access counted by L1D_CACHE that incurs additional latency because it returns data from outside the Level 1 data or unified cache of this processing element.", + "EventCode": "0x39", + "EventName": "L1D_CACHE_LMISS_RD", + "BriefDescription": "Level 1 data cache long-latency read miss" + }, + { + "PublicDescription": "Micro-operation architecturally executed. The counter counts each operation counted by OP_SPEC that would be executed in a simple sequential execution of the program.", + "EventCode": "0x3A", + "EventName": "OP_RETIRED", + "BriefDescription": "Micro-operation architecturally executed" + }, + { + "PublicDescription": "Micro-operation speculatively executed. The counter counts the number of operations executed by the processing element, including those that are executed speculatively and would not be executed in a simple sequential execution of the program.", + "EventCode": "0x3B", + "EventName": "OP_SPEC", + "BriefDescription": "Micro-operation speculatively executed" + }, + { + "PublicDescription": "No operation sent for execution. The counter counts every attributable cycle on which no attributable instruction or operation was sent for execution on this processing element.", + "EventCode": "0x3C", + "EventName": "STALL", + "BriefDescription": "No operation sent for execution" + }, + { + "PublicDescription": "No operation sent for execution on a slot due to the backend. Counts each slot counted by STALL_SLOT where no attributable instruction or operation was sent for execution because the backend is unable to accept it.", + "EventCode": "0x3D", + "EventName": "STALL_SLOT_BACKEND", + "BriefDescription": "No operation sent for execution on a slot due to the backend" + }, + { + "PublicDescription": "No operation sent for execution on a slot due to the frontend. Counts each slot counted by STALL_SLOT where no attributable instruction or operation was sent for execution because there was no attributable instruction or operation available to issue from the processing element from the frontend for the slot.", + "EventCode": "0x3E", + "EventName": "STALL_SLOT_FRONTEND", + "BriefDescription": "No operation sent for execution on a slot due to the frontend" + }, + { + "PublicDescription": "No operation sent for execution on a slot. The counter counts on each attributable cycle the number of instruction or operation slots that were not occupied by an instruction or operation attributable to the processing element.", + "EventCode": "0x3F", + "EventName": "STALL_SLOT", + "BriefDescription": "No operation sent for execution on a slot" + }, + { + "PublicDescription": "Constant frequency cycles. The counter increments at a constant frequency equal to the rate of increment of the system counter, CNTPCT_EL0.", + "EventCode": "0x4004", + "EventName": "CNT_CYCLES", + "BriefDescription": "Constant frequency cycles" + }, + { + "PublicDescription": "Memory stall cycles. The counter counts each cycle counted by STALL_BACKEND where there is a cache miss in the last level of cache within the processing element clock domain", + "EventCode": "0x4005", + "EventName": "STALL_BACKEND_MEM", + "BriefDescription": "Memory stall cycles" + }, + { + "PublicDescription": "Level 1 instruction cache long-latency read miss. If the L1I_CACHE_RD event is implemented, the counter counts each access counted by L1I_CACHE_RD that incurs additional latency because it returns instructions from outside of the Level 1 instruction cache of this PE. If the L1I_CACHE_RD event is not implemented, the counter counts each access counted by L1I_CACHE that incurs additional latency because it returns instructions from outside the Level 1 instruction cache of this PE. The event indicates to software that the access missed in the Level 1 instruction cache and might have a significant performance impact due to the additional latency, compared to the latency of an access that hits in the Level 1 instruction cache.", + "EventCode": "0x4006", + "EventName": "L1I_CACHE_LMISS", + "BriefDescription": "Level 1 instruction cache long-latency read miss" + }, + { + "PublicDescription": "Level 2 data cache long-latency read miss. The counter counts each memory read access counted by L2D_CACHE that incurs additional latency because it returns data from outside the Level 2 data or unified cache of this processing element. The event indicates to software that the access missed in the Level 2 data or unified cache and might have a significant performance impact compared to the latency of an access that hits in the Level 2 data or unified cache.", + "EventCode": "0x4009", + "EventName": "L2D_CACHE_LMISS_RD", + "BriefDescription": "Level 2 data cache long-latency read miss" + }, + { + "PublicDescription": "Level 3 data cache long-latency read miss. The counter counts each memory read access counted by L3D_CACHE that incurs additional latency because it returns data from outside the Level 3 data or unified cache of this processing element. The event indicates to software that the access missed in the Level 3 data or unified cache and might have a significant performance impact compared to the latency of an access that hits in the Level 3 data or unified cache.", + "EventCode": "0x400B", + "EventName": "L3D_CACHE_LMISS_RD", + "BriefDescription": "Level 3 data cache long-latency read miss" + }, { "PublicDescription": "SIMD Instruction architecturally executed.", "EventCode": "0x8000", -- cgit v1.2.3 From 70ae034d499d4bd7ac3d0db20505ff9644a6b959 Mon Sep 17 00:00:00 2001 From: Andrew Kilroy Date: Wed, 6 Oct 2021 09:11:05 +0100 Subject: perf vendor events arm64: Categorise the Neoverse V1 counters This is so they are categorised in the perf list output. The pmus all exist in the armv8-common-and-microarch.json and arm-recommended.json files, so this commit places them into each category's own file under tools/perf/pmu-events/arch/arm64/arm/neoverse-v1 Also add the Neoverse V1 to the arm64 mapfile Reviewed-by: John Garry Signed-off-by: Andrew Kilroy Cc: Alexander Shishkin Cc: Jiri Olsa Cc: Leo Yan Cc: Mark Rutland Cc: Mathieu Poirier Cc: Namhyung Kim Cc: Will Deacon Cc: linux-arm-kernel@lists.infradead.org Link: https://lore.kernel.org/r/20211006081106.8649-3-andrew.kilroy@arm.com Signed-off-by: Arnaldo Carvalho de Melo --- .../arch/arm64/arm/neoverse-v1/branch.json | 8 ++ .../pmu-events/arch/arm64/arm/neoverse-v1/bus.json | 20 +++ .../arch/arm64/arm/neoverse-v1/cache.json | 155 +++++++++++++++++++++ .../arch/arm64/arm/neoverse-v1/exception.json | 47 +++++++ .../arch/arm64/arm/neoverse-v1/instruction.json | 89 ++++++++++++ .../arch/arm64/arm/neoverse-v1/memory.json | 20 +++ .../arch/arm64/arm/neoverse-v1/other.json | 5 + .../arch/arm64/arm/neoverse-v1/pipeline.json | 23 +++ tools/perf/pmu-events/arch/arm64/mapfile.csv | 1 + 9 files changed, 368 insertions(+) create mode 100644 tools/perf/pmu-events/arch/arm64/arm/neoverse-v1/branch.json create mode 100644 tools/perf/pmu-events/arch/arm64/arm/neoverse-v1/bus.json create mode 100644 tools/perf/pmu-events/arch/arm64/arm/neoverse-v1/cache.json create mode 100644 tools/perf/pmu-events/arch/arm64/arm/neoverse-v1/exception.json create mode 100644 tools/perf/pmu-events/arch/arm64/arm/neoverse-v1/instruction.json create mode 100644 tools/perf/pmu-events/arch/arm64/arm/neoverse-v1/memory.json create mode 100644 tools/perf/pmu-events/arch/arm64/arm/neoverse-v1/other.json create mode 100644 tools/perf/pmu-events/arch/arm64/arm/neoverse-v1/pipeline.json (limited to 'tools') diff --git a/tools/perf/pmu-events/arch/arm64/arm/neoverse-v1/branch.json b/tools/perf/pmu-events/arch/arm64/arm/neoverse-v1/branch.json new file mode 100644 index 000000000000..79f2016c53b0 --- /dev/null +++ b/tools/perf/pmu-events/arch/arm64/arm/neoverse-v1/branch.json @@ -0,0 +1,8 @@ +[ + { + "ArchStdEvent": "BR_MIS_PRED" + }, + { + "ArchStdEvent": "BR_PRED" + } +] diff --git a/tools/perf/pmu-events/arch/arm64/arm/neoverse-v1/bus.json b/tools/perf/pmu-events/arch/arm64/arm/neoverse-v1/bus.json new file mode 100644 index 000000000000..579c1c993d17 --- /dev/null +++ b/tools/perf/pmu-events/arch/arm64/arm/neoverse-v1/bus.json @@ -0,0 +1,20 @@ +[ + { + "ArchStdEvent": "CPU_CYCLES" + }, + { + "ArchStdEvent": "BUS_ACCESS" + }, + { + "ArchStdEvent": "BUS_CYCLES" + }, + { + "ArchStdEvent": "BUS_ACCESS_RD" + }, + { + "ArchStdEvent": "BUS_ACCESS_WR" + }, + { + "ArchStdEvent": "CNT_CYCLES" + } +] diff --git a/tools/perf/pmu-events/arch/arm64/arm/neoverse-v1/cache.json b/tools/perf/pmu-events/arch/arm64/arm/neoverse-v1/cache.json new file mode 100644 index 000000000000..0141f749bff3 --- /dev/null +++ b/tools/perf/pmu-events/arch/arm64/arm/neoverse-v1/cache.json @@ -0,0 +1,155 @@ +[ + { + "ArchStdEvent": "L1I_CACHE_REFILL" + }, + { + "ArchStdEvent": "L1I_TLB_REFILL" + }, + { + "ArchStdEvent": "L1D_CACHE_REFILL" + }, + { + "ArchStdEvent": "L1D_CACHE" + }, + { + "ArchStdEvent": "L1D_TLB_REFILL" + }, + { + "ArchStdEvent": "L1I_CACHE" + }, + { + "ArchStdEvent": "L1D_CACHE_WB" + }, + { + "ArchStdEvent": "L2D_CACHE" + }, + { + "ArchStdEvent": "L2D_CACHE_REFILL" + }, + { + "ArchStdEvent": "L2D_CACHE_WB" + }, + { + "ArchStdEvent": "L2D_CACHE_ALLOCATE" + }, + { + "ArchStdEvent": "L1D_TLB" + }, + { + "ArchStdEvent": "L1I_TLB" + }, + { + "ArchStdEvent": "L3D_CACHE_ALLOCATE" + }, + { + "ArchStdEvent": "L3D_CACHE_REFILL" + }, + { + "ArchStdEvent": "L3D_CACHE" + }, + { + "ArchStdEvent": "L2D_TLB_REFILL" + }, + { + "ArchStdEvent": "L2D_TLB" + }, + { + "ArchStdEvent": "DTLB_WALK" + }, + { + "ArchStdEvent": "ITLB_WALK" + }, + { + "ArchStdEvent": "LL_CACHE_RD" + }, + { + "ArchStdEvent": "LL_CACHE_MISS_RD" + }, + { + "ArchStdEvent": "L1D_CACHE_LMISS_RD" + }, + { + "ArchStdEvent": "L1D_CACHE_RD" + }, + { + "ArchStdEvent": "L1D_CACHE_WR" + }, + { + "ArchStdEvent": "L1D_CACHE_REFILL_RD" + }, + { + "ArchStdEvent": "L1D_CACHE_REFILL_WR" + }, + { + "ArchStdEvent": "L1D_CACHE_REFILL_INNER" + }, + { + "ArchStdEvent": "L1D_CACHE_REFILL_OUTER" + }, + { + "ArchStdEvent": "L1D_CACHE_WB_VICTIM" + }, + { + "ArchStdEvent": "L1D_CACHE_WB_CLEAN" + }, + { + "ArchStdEvent": "L1D_CACHE_INVAL" + }, + { + "ArchStdEvent": "L1D_TLB_REFILL_RD" + }, + { + "ArchStdEvent": "L1D_TLB_REFILL_WR" + }, + { + "ArchStdEvent": "L1D_TLB_RD" + }, + { + "ArchStdEvent": "L1D_TLB_WR" + }, + { + "ArchStdEvent": "L2D_CACHE_RD" + }, + { + "ArchStdEvent": "L2D_CACHE_WR" + }, + { + "ArchStdEvent": "L2D_CACHE_REFILL_RD" + }, + { + "ArchStdEvent": "L2D_CACHE_REFILL_WR" + }, + { + "ArchStdEvent": "L2D_CACHE_WB_VICTIM" + }, + { + "ArchStdEvent": "L2D_CACHE_WB_CLEAN" + }, + { + "ArchStdEvent": "L2D_CACHE_INVAL" + }, + { + "ArchStdEvent": "L2D_TLB_REFILL_RD" + }, + { + "ArchStdEvent": "L2D_TLB_REFILL_WR" + }, + { + "ArchStdEvent": "L2D_TLB_RD" + }, + { + "ArchStdEvent": "L2D_TLB_WR" + }, + { + "ArchStdEvent": "L3D_CACHE_RD" + }, + { + "ArchStdEvent": "L1I_CACHE_LMISS" + }, + { + "ArchStdEvent": "L2D_CACHE_LMISS_RD" + }, + { + "ArchStdEvent": "L3D_CACHE_LMISS_RD" + } +] diff --git a/tools/perf/pmu-events/arch/arm64/arm/neoverse-v1/exception.json b/tools/perf/pmu-events/arch/arm64/arm/neoverse-v1/exception.json new file mode 100644 index 000000000000..344a2d552ad5 --- /dev/null +++ b/tools/perf/pmu-events/arch/arm64/arm/neoverse-v1/exception.json @@ -0,0 +1,47 @@ +[ + { + "ArchStdEvent": "EXC_TAKEN" + }, + { + "ArchStdEvent": "MEMORY_ERROR" + }, + { + "ArchStdEvent": "EXC_UNDEF" + }, + { + "ArchStdEvent": "EXC_SVC" + }, + { + "ArchStdEvent": "EXC_PABORT" + }, + { + "ArchStdEvent": "EXC_DABORT" + }, + { + "ArchStdEvent": "EXC_IRQ" + }, + { + "ArchStdEvent": "EXC_FIQ" + }, + { + "ArchStdEvent": "EXC_SMC" + }, + { + "ArchStdEvent": "EXC_HVC" + }, + { + "ArchStdEvent": "EXC_TRAP_PABORT" + }, + { + "ArchStdEvent": "EXC_TRAP_DABORT" + }, + { + "ArchStdEvent": "EXC_TRAP_OTHER" + }, + { + "ArchStdEvent": "EXC_TRAP_IRQ" + }, + { + "ArchStdEvent": "EXC_TRAP_FIQ" + } +] diff --git a/tools/perf/pmu-events/arch/arm64/arm/neoverse-v1/instruction.json b/tools/perf/pmu-events/arch/arm64/arm/neoverse-v1/instruction.json new file mode 100644 index 000000000000..25825e14c535 --- /dev/null +++ b/tools/perf/pmu-events/arch/arm64/arm/neoverse-v1/instruction.json @@ -0,0 +1,89 @@ +[ + { + "ArchStdEvent": "SW_INCR" + }, + { + "ArchStdEvent": "INST_RETIRED" + }, + { + "ArchStdEvent": "EXC_RETURN" + }, + { + "ArchStdEvent": "CID_WRITE_RETIRED" + }, + { + "ArchStdEvent": "INST_SPEC" + }, + { + "ArchStdEvent": "TTBR_WRITE_RETIRED" + }, + { + "ArchStdEvent": "BR_RETIRED" + }, + { + "ArchStdEvent": "BR_MIS_PRED_RETIRED" + }, + { + "ArchStdEvent": "OP_RETIRED" + }, + { + "ArchStdEvent": "OP_SPEC" + }, + { + "ArchStdEvent": "LDREX_SPEC" + }, + { + "ArchStdEvent": "STREX_PASS_SPEC" + }, + { + "ArchStdEvent": "STREX_FAIL_SPEC" + }, + { + "ArchStdEvent": "STREX_SPEC" + }, + { + "ArchStdEvent": "LD_SPEC" + }, + { + "ArchStdEvent": "ST_SPEC" + }, + { + "ArchStdEvent": "DP_SPEC" + }, + { + "ArchStdEvent": "ASE_SPEC" + }, + { + "ArchStdEvent": "VFP_SPEC" + }, + { + "ArchStdEvent": "PC_WRITE_SPEC" + }, + { + "ArchStdEvent": "CRYPTO_SPEC" + }, + { + "ArchStdEvent": "BR_IMMED_SPEC" + }, + { + "ArchStdEvent": "BR_RETURN_SPEC" + }, + { + "ArchStdEvent": "BR_INDIRECT_SPEC" + }, + { + "ArchStdEvent": "ISB_SPEC" + }, + { + "ArchStdEvent": "DSB_SPEC" + }, + { + "ArchStdEvent": "DMB_SPEC" + }, + { + "ArchStdEvent": "RC_LD_SPEC" + }, + { + "ArchStdEvent": "RC_ST_SPEC" + } +] diff --git a/tools/perf/pmu-events/arch/arm64/arm/neoverse-v1/memory.json b/tools/perf/pmu-events/arch/arm64/arm/neoverse-v1/memory.json new file mode 100644 index 000000000000..e3d08f1f7c92 --- /dev/null +++ b/tools/perf/pmu-events/arch/arm64/arm/neoverse-v1/memory.json @@ -0,0 +1,20 @@ +[ + { + "ArchStdEvent": "MEM_ACCESS" + }, + { + "ArchStdEvent": "MEM_ACCESS_RD" + }, + { + "ArchStdEvent": "MEM_ACCESS_WR" + }, + { + "ArchStdEvent": "UNALIGNED_LD_SPEC" + }, + { + "ArchStdEvent": "UNALIGNED_ST_SPEC" + }, + { + "ArchStdEvent": "UNALIGNED_LDST_SPEC" + } +] diff --git a/tools/perf/pmu-events/arch/arm64/arm/neoverse-v1/other.json b/tools/perf/pmu-events/arch/arm64/arm/neoverse-v1/other.json new file mode 100644 index 000000000000..20d8365756c5 --- /dev/null +++ b/tools/perf/pmu-events/arch/arm64/arm/neoverse-v1/other.json @@ -0,0 +1,5 @@ +[ + { + "ArchStdEvent": "REMOTE_ACCESS" + } +] diff --git a/tools/perf/pmu-events/arch/arm64/arm/neoverse-v1/pipeline.json b/tools/perf/pmu-events/arch/arm64/arm/neoverse-v1/pipeline.json new file mode 100644 index 000000000000..f9fae15f7555 --- /dev/null +++ b/tools/perf/pmu-events/arch/arm64/arm/neoverse-v1/pipeline.json @@ -0,0 +1,23 @@ +[ + { + "ArchStdEvent": "STALL_FRONTEND" + }, + { + "ArchStdEvent": "STALL_BACKEND" + }, + { + "ArchStdEvent": "STALL" + }, + { + "ArchStdEvent": "STALL_SLOT_BACKEND" + }, + { + "ArchStdEvent": "STALL_SLOT_FRONTEND" + }, + { + "ArchStdEvent": "STALL_SLOT" + }, + { + "ArchStdEvent": "STALL_BACKEND_MEM" + } +] diff --git a/tools/perf/pmu-events/arch/arm64/mapfile.csv b/tools/perf/pmu-events/arch/arm64/mapfile.csv index c43591d831b8..31d8b57ca9bb 100644 --- a/tools/perf/pmu-events/arch/arm64/mapfile.csv +++ b/tools/perf/pmu-events/arch/arm64/mapfile.csv @@ -18,6 +18,7 @@ 0x00000000410fd080,v1,arm/cortex-a57-a72,core 0x00000000410fd0b0,v1,arm/cortex-a76-n1,core 0x00000000410fd0c0,v1,arm/cortex-a76-n1,core +0x00000000410fd400,v1,arm/neoverse-v1,core 0x00000000420f5160,v1,cavium/thunderx2,core 0x00000000430f0af0,v1,cavium/thunderx2,core 0x00000000460f0010,v1,fujitsu/a64fx,core -- cgit v1.2.3 From 61750473589b6f8adc35007c8261986043907f13 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Tue, 7 Sep 2021 19:39:02 +0300 Subject: perf tools: Add support for PERF_RECORD_AUX_OUTPUT_HW_ID The PERF_RECORD_AUX_OUTPUT_HW_ID event provides a way to match AUX output data like Intel PT PEBS-via-PT back to the event that it came from, by providing a hardware ID that is present in the AUX output. Reviewed-by: Alexander Shishkin Reviewed-by: Andi Kleen Signed-off-by: Adrian Hunter Cc: Jiri Olsa Cc: Kan Liang Cc: Leo Yan Cc: Mark Rutland Cc: Peter Zijlstra Cc: x86@kernel.org Link: http://lore.kernel.org/lkml/20210907163903.11820-3-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/uapi/linux/perf_event.h | 15 +++++++++++++++ tools/lib/perf/include/perf/event.h | 6 ++++++ tools/perf/builtin-inject.c | 4 +++- tools/perf/builtin-record.c | 2 +- tools/perf/util/event.c | 18 ++++++++++++++++++ tools/perf/util/event.h | 5 +++++ tools/perf/util/machine.c | 10 ++++++++++ tools/perf/util/machine.h | 2 ++ tools/perf/util/session.c | 5 +++++ tools/perf/util/tool.h | 1 + 10 files changed, 66 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/include/uapi/linux/perf_event.h b/tools/include/uapi/linux/perf_event.h index f92880a15645..c89535de1ec8 100644 --- a/tools/include/uapi/linux/perf_event.h +++ b/tools/include/uapi/linux/perf_event.h @@ -1141,6 +1141,21 @@ enum perf_event_type { */ PERF_RECORD_TEXT_POKE = 20, + /* + * Data written to the AUX area by hardware due to aux_output, may need + * to be matched to the event by an architecture-specific hardware ID. + * This records the hardware ID, but requires sample_id to provide the + * event ID. e.g. Intel PT uses this record to disambiguate PEBS-via-PT + * records from multiple events. + * + * struct { + * struct perf_event_header header; + * u64 hw_id; + * struct sample_id sample_id; + * }; + */ + PERF_RECORD_AUX_OUTPUT_HW_ID = 21, + PERF_RECORD_MAX, /* non-ABI */ }; diff --git a/tools/lib/perf/include/perf/event.h b/tools/lib/perf/include/perf/event.h index 4d0c02ba3f7d..75ee385fb078 100644 --- a/tools/lib/perf/include/perf/event.h +++ b/tools/lib/perf/include/perf/event.h @@ -289,6 +289,11 @@ struct perf_record_itrace_start { __u32 tid; }; +struct perf_record_aux_output_hw_id { + struct perf_event_header header; + __u64 hw_id; +}; + struct perf_record_thread_map_entry { __u64 pid; char comm[16]; @@ -414,6 +419,7 @@ union perf_event { struct perf_record_auxtrace_error auxtrace_error; struct perf_record_aux aux; struct perf_record_itrace_start itrace_start; + struct perf_record_aux_output_hw_id aux_output_hw_id; struct perf_record_switch context_switch; struct perf_record_thread_map thread_map; struct perf_record_cpu_map cpu_map; diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c index 6ad191e731fc..ac6c570029e3 100644 --- a/tools/perf/builtin-inject.c +++ b/tools/perf/builtin-inject.c @@ -815,7 +815,8 @@ static int __cmd_inject(struct perf_inject *inject) inject->tool.auxtrace_info = perf_event__process_auxtrace_info; inject->tool.auxtrace = perf_event__process_auxtrace; inject->tool.aux = perf_event__drop_aux; - inject->tool.itrace_start = perf_event__drop_aux, + inject->tool.itrace_start = perf_event__drop_aux; + inject->tool.aux_output_hw_id = perf_event__drop_aux; inject->tool.ordered_events = true; inject->tool.ordering_requires_timestamps = true; /* Allow space in the header for new attributes */ @@ -882,6 +883,7 @@ int cmd_inject(int argc, const char **argv) .lost_samples = perf_event__repipe, .aux = perf_event__repipe, .itrace_start = perf_event__repipe, + .aux_output_hw_id = perf_event__repipe, .context_switch = perf_event__repipe, .throttle = perf_event__repipe, .unthrottle = perf_event__repipe, diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 41bb884f5a74..78185c982ebf 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -1411,7 +1411,7 @@ static int record__synthesize(struct record *rec, bool tail) goto out; /* Synthesize id_index before auxtrace_info */ - if (rec->opts.auxtrace_sample_mode) { + if (rec->opts.auxtrace_sample_mode || rec->opts.full_auxtrace) { err = perf_event__synthesize_id_index(tool, process_synthesized_event, session->evlist, machine); diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c index ac706304afe9..fe24801f8e9f 100644 --- a/tools/perf/util/event.c +++ b/tools/perf/util/event.c @@ -57,6 +57,7 @@ static const char *perf_event__names[] = { [PERF_RECORD_BPF_EVENT] = "BPF_EVENT", [PERF_RECORD_CGROUP] = "CGROUP", [PERF_RECORD_TEXT_POKE] = "TEXT_POKE", + [PERF_RECORD_AUX_OUTPUT_HW_ID] = "AUX_OUTPUT_HW_ID", [PERF_RECORD_HEADER_ATTR] = "ATTR", [PERF_RECORD_HEADER_EVENT_TYPE] = "EVENT_TYPE", [PERF_RECORD_HEADER_TRACING_DATA] = "TRACING_DATA", @@ -237,6 +238,14 @@ int perf_event__process_itrace_start(struct perf_tool *tool __maybe_unused, return machine__process_itrace_start_event(machine, event); } +int perf_event__process_aux_output_hw_id(struct perf_tool *tool __maybe_unused, + union perf_event *event, + struct perf_sample *sample __maybe_unused, + struct machine *machine) +{ + return machine__process_aux_output_hw_id_event(machine, event); +} + int perf_event__process_lost_samples(struct perf_tool *tool __maybe_unused, union perf_event *event, struct perf_sample *sample, @@ -407,6 +416,12 @@ size_t perf_event__fprintf_itrace_start(union perf_event *event, FILE *fp) event->itrace_start.pid, event->itrace_start.tid); } +size_t perf_event__fprintf_aux_output_hw_id(union perf_event *event, FILE *fp) +{ + return fprintf(fp, " hw_id: %#"PRI_lx64"\n", + event->aux_output_hw_id.hw_id); +} + size_t perf_event__fprintf_switch(union perf_event *event, FILE *fp) { bool out = event->header.misc & PERF_RECORD_MISC_SWITCH_OUT; @@ -534,6 +549,9 @@ size_t perf_event__fprintf(union perf_event *event, struct machine *machine, FIL case PERF_RECORD_TEXT_POKE: ret += perf_event__fprintf_text_poke(event, machine, fp); break; + case PERF_RECORD_AUX_OUTPUT_HW_ID: + ret += perf_event__fprintf_aux_output_hw_id(event, fp); + break; default: ret += fprintf(fp, "\n"); } diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h index 19ad64f2bd83..95ffed66369c 100644 --- a/tools/perf/util/event.h +++ b/tools/perf/util/event.h @@ -330,6 +330,10 @@ int perf_event__process_itrace_start(struct perf_tool *tool, union perf_event *event, struct perf_sample *sample, struct machine *machine); +int perf_event__process_aux_output_hw_id(struct perf_tool *tool, + union perf_event *event, + struct perf_sample *sample, + struct machine *machine); int perf_event__process_switch(struct perf_tool *tool, union perf_event *event, struct perf_sample *sample, @@ -397,6 +401,7 @@ size_t perf_event__fprintf_mmap2(union perf_event *event, FILE *fp); size_t perf_event__fprintf_task(union perf_event *event, FILE *fp); size_t perf_event__fprintf_aux(union perf_event *event, FILE *fp); size_t perf_event__fprintf_itrace_start(union perf_event *event, FILE *fp); +size_t perf_event__fprintf_aux_output_hw_id(union perf_event *event, FILE *fp); size_t perf_event__fprintf_switch(union perf_event *event, FILE *fp); size_t perf_event__fprintf_thread_map(union perf_event *event, FILE *fp); size_t perf_event__fprintf_cpu_map(union perf_event *event, FILE *fp); diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index 44e40bad0e33..fb8496df8432 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -755,6 +755,14 @@ int machine__process_itrace_start_event(struct machine *machine __maybe_unused, return 0; } +int machine__process_aux_output_hw_id_event(struct machine *machine __maybe_unused, + union perf_event *event) +{ + if (dump_trace) + perf_event__fprintf_aux_output_hw_id(event, stdout); + return 0; +} + int machine__process_switch_event(struct machine *machine __maybe_unused, union perf_event *event) { @@ -2028,6 +2036,8 @@ int machine__process_event(struct machine *machine, union perf_event *event, ret = machine__process_bpf(machine, event, sample); break; case PERF_RECORD_TEXT_POKE: ret = machine__process_text_poke(machine, event, sample); break; + case PERF_RECORD_AUX_OUTPUT_HW_ID: + ret = machine__process_aux_output_hw_id_event(machine, event); break; default: ret = -1; break; diff --git a/tools/perf/util/machine.h b/tools/perf/util/machine.h index 7377ed6efdf1..a143087eeb47 100644 --- a/tools/perf/util/machine.h +++ b/tools/perf/util/machine.h @@ -124,6 +124,8 @@ int machine__process_aux_event(struct machine *machine, union perf_event *event); int machine__process_itrace_start_event(struct machine *machine, union perf_event *event); +int machine__process_aux_output_hw_id_event(struct machine *machine, + union perf_event *event); int machine__process_switch_event(struct machine *machine, union perf_event *event); int machine__process_namespaces_event(struct machine *machine, diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index 069c2cfdd3be..bdf1da9ea418 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -509,6 +509,8 @@ void perf_tool__fill_defaults(struct perf_tool *tool) tool->bpf = perf_event__process_bpf; if (tool->text_poke == NULL) tool->text_poke = perf_event__process_text_poke; + if (tool->aux_output_hw_id == NULL) + tool->aux_output_hw_id = perf_event__process_aux_output_hw_id; if (tool->read == NULL) tool->read = process_event_sample_stub; if (tool->throttle == NULL) @@ -1000,6 +1002,7 @@ static perf_event__swap_op perf_event__swap_ops[] = { [PERF_RECORD_NAMESPACES] = perf_event__namespaces_swap, [PERF_RECORD_CGROUP] = perf_event__cgroup_swap, [PERF_RECORD_TEXT_POKE] = perf_event__text_poke_swap, + [PERF_RECORD_AUX_OUTPUT_HW_ID] = perf_event__all64_swap, [PERF_RECORD_HEADER_ATTR] = perf_event__hdr_attr_swap, [PERF_RECORD_HEADER_EVENT_TYPE] = perf_event__event_type_swap, [PERF_RECORD_HEADER_TRACING_DATA] = perf_event__tracing_data_swap, @@ -1556,6 +1559,8 @@ static int machines__deliver_event(struct machines *machines, return tool->bpf(tool, event, sample, machine); case PERF_RECORD_TEXT_POKE: return tool->text_poke(tool, event, sample, machine); + case PERF_RECORD_AUX_OUTPUT_HW_ID: + return tool->aux_output_hw_id(tool, event, sample, machine); default: ++evlist->stats.nr_unknown_events; return -1; diff --git a/tools/perf/util/tool.h b/tools/perf/util/tool.h index bbbc0dcd461f..ef873f2cc38f 100644 --- a/tools/perf/util/tool.h +++ b/tools/perf/util/tool.h @@ -53,6 +53,7 @@ struct perf_tool { lost_samples, aux, itrace_start, + aux_output_hw_id, context_switch, throttle, unthrottle, -- cgit v1.2.3 From dedcc0ea6ddcafcfb0f7e91b73deb546cb622d0c Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Tue, 7 Sep 2021 19:39:03 +0300 Subject: perf intel-pt: Add support for PERF_RECORD_AUX_OUTPUT_HW_ID Originally, software only supported redirecting at most one PEBS event to Intel PT (PEBS-via-PT) because it was not able to differentiate one event from another. To overcome that, add support for the PERF_RECORD_AUX_OUTPUT_HW_ID side-band event. Committer notes: Cast the pointer arg to for_each_set_bit() to (unsigned long *), to fix the build on 32-bit systems. Reviewed-by: Alexander Shishkin Reviewed-by: Andi Kleen Signed-off-by: Adrian Hunter Cc: Jiri Olsa Cc: Kan Liang Cc: Leo Yan Cc: Mark Rutland Cc: Peter Zijlstra Cc: x86@kernel.org Link: http://lore.kernel.org/lkml/20210907163903.11820-4-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-intel-pt.txt | 7 ++- tools/perf/util/intel-pt.c | 85 ++++++++++++++++++++++++++++-- 2 files changed, 87 insertions(+), 5 deletions(-) (limited to 'tools') diff --git a/tools/perf/Documentation/perf-intel-pt.txt b/tools/perf/Documentation/perf-intel-pt.txt index db465fa7ee91..553c3e08fa4a 100644 --- a/tools/perf/Documentation/perf-intel-pt.txt +++ b/tools/perf/Documentation/perf-intel-pt.txt @@ -1144,7 +1144,12 @@ Recording is selected by using the aux-output config term e.g. perf record -c 10000 -e '{intel_pt/branch=0/,cycles/aux-output/ppp}' uname -Note that currently, software only supports redirecting at most one PEBS event. +Originally, software only supported redirecting at most one PEBS event because it +was not able to differentiate one event from another. To overcome that, more recent +kernels and perf tools add support for the PERF_RECORD_AUX_OUTPUT_HW_ID side-band event. +To check for the presence of that event in a PEBS-via-PT trace: + + perf script -D --no-itrace | grep PERF_RECORD_AUX_OUTPUT_HW_ID To display PEBS events from the Intel PT trace, use the itrace 'o' option e.g. diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c index 6f852b305e92..c9542fada8fb 100644 --- a/tools/perf/util/intel-pt.c +++ b/tools/perf/util/intel-pt.c @@ -111,6 +111,7 @@ struct intel_pt { u64 cbr_id; u64 psb_id; + bool single_pebs; bool sample_pebs; struct evsel *pebs_evsel; @@ -148,6 +149,14 @@ enum switch_state { INTEL_PT_SS_EXPECTING_SWITCH_IP, }; +/* applicable_counters is 64-bits */ +#define INTEL_PT_MAX_PEBS 64 + +struct intel_pt_pebs_event { + struct evsel *evsel; + u64 id; +}; + struct intel_pt_queue { struct intel_pt *pt; unsigned int queue_nr; @@ -189,6 +198,7 @@ struct intel_pt_queue { u64 last_br_cyc_cnt; unsigned int cbr_seen; char insn[INTEL_PT_INSN_BUF_SZ]; + struct intel_pt_pebs_event pebs[INTEL_PT_MAX_PEBS]; }; static void intel_pt_dump(struct intel_pt *pt __maybe_unused, @@ -1978,15 +1988,13 @@ static void intel_pt_add_lbrs(struct branch_stack *br_stack, } } -static int intel_pt_synth_pebs_sample(struct intel_pt_queue *ptq) +static int intel_pt_do_synth_pebs_sample(struct intel_pt_queue *ptq, struct evsel *evsel, u64 id) { const struct intel_pt_blk_items *items = &ptq->state->items; struct perf_sample sample = { .ip = 0, }; union perf_event *event = ptq->event_buf; struct intel_pt *pt = ptq->pt; - struct evsel *evsel = pt->pebs_evsel; u64 sample_type = evsel->core.attr.sample_type; - u64 id = evsel->core.id[0]; u8 cpumode; u64 regs[8 * sizeof(sample.intr_regs.mask)]; @@ -2112,6 +2120,45 @@ static int intel_pt_synth_pebs_sample(struct intel_pt_queue *ptq) return intel_pt_deliver_synth_event(pt, event, &sample, sample_type); } +static int intel_pt_synth_single_pebs_sample(struct intel_pt_queue *ptq) +{ + struct intel_pt *pt = ptq->pt; + struct evsel *evsel = pt->pebs_evsel; + u64 id = evsel->core.id[0]; + + return intel_pt_do_synth_pebs_sample(ptq, evsel, id); +} + +static int intel_pt_synth_pebs_sample(struct intel_pt_queue *ptq) +{ + const struct intel_pt_blk_items *items = &ptq->state->items; + struct intel_pt_pebs_event *pe; + struct intel_pt *pt = ptq->pt; + int err = -EINVAL; + int hw_id; + + if (!items->has_applicable_counters || !items->applicable_counters) { + if (!pt->single_pebs) + pr_err("PEBS-via-PT record with no applicable_counters\n"); + return intel_pt_synth_single_pebs_sample(ptq); + } + + for_each_set_bit(hw_id, (unsigned long *)&items->applicable_counters, INTEL_PT_MAX_PEBS) { + pe = &ptq->pebs[hw_id]; + if (!pe->evsel) { + if (!pt->single_pebs) + pr_err("PEBS-via-PT record with no matching event, hw_id %d\n", + hw_id); + return intel_pt_synth_single_pebs_sample(ptq); + } + err = intel_pt_do_synth_pebs_sample(ptq, pe->evsel, pe->id); + if (err) + return err; + } + + return err; +} + static int intel_pt_synth_error(struct intel_pt *pt, int code, int cpu, pid_t pid, pid_t tid, u64 ip, u64 timestamp) { @@ -2882,6 +2929,30 @@ static int intel_pt_process_itrace_start(struct intel_pt *pt, event->itrace_start.tid); } +static int intel_pt_process_aux_output_hw_id(struct intel_pt *pt, + union perf_event *event, + struct perf_sample *sample) +{ + u64 hw_id = event->aux_output_hw_id.hw_id; + struct auxtrace_queue *queue; + struct intel_pt_queue *ptq; + struct evsel *evsel; + + queue = auxtrace_queues__sample_queue(&pt->queues, sample, pt->session); + evsel = evlist__id2evsel_strict(pt->session->evlist, sample->id); + if (!queue || !queue->priv || !evsel || hw_id > INTEL_PT_MAX_PEBS) { + pr_err("Bad AUX output hardware ID\n"); + return -EINVAL; + } + + ptq = queue->priv; + + ptq->pebs[hw_id].evsel = evsel; + ptq->pebs[hw_id].id = sample->id; + + return 0; +} + static int intel_pt_find_map(struct thread *thread, u8 cpumode, u64 addr, struct addr_location *al) { @@ -3009,6 +3080,8 @@ static int intel_pt_process_event(struct perf_session *session, err = intel_pt_process_switch(pt, sample); else if (event->header.type == PERF_RECORD_ITRACE_START) err = intel_pt_process_itrace_start(pt, event, sample); + else if (event->header.type == PERF_RECORD_AUX_OUTPUT_HW_ID) + err = intel_pt_process_aux_output_hw_id(pt, event, sample); else if (event->header.type == PERF_RECORD_SWITCH || event->header.type == PERF_RECORD_SWITCH_CPU_WIDE) err = intel_pt_context_switch(pt, event, sample); @@ -3393,9 +3466,13 @@ static void intel_pt_setup_pebs_events(struct intel_pt *pt) evlist__for_each_entry(pt->session->evlist, evsel) { if (evsel->core.attr.aux_output && evsel->core.id) { + if (pt->single_pebs) { + pt->single_pebs = false; + return; + } + pt->single_pebs = true; pt->sample_pebs = true; pt->pebs_evsel = evsel; - return; } } } -- cgit v1.2.3 From 529b6fbca03e1d8c101d041ffda5cc90e8f3fa4c Mon Sep 17 00:00:00 2001 From: Alexey Bayduraev Date: Wed, 13 Oct 2021 12:06:35 +0300 Subject: perf session: Move all state items to reader object We need all the state info about reader in separate object to load data from multiple files, so we can keep multiple readers at the same time. Moving all items that need to be kept from reader__process_events to the reader object. Introducing mmap_cur to keep current mapping. Suggested-by: Jiri Olsa Reviewed-by: Jiri Olsa Reviewed-by: Riccardo Mancini Signed-off-by: Alexey Bayduraev Tested-by: Riccardo Mancini Acked-by: Namhyung Kim Cc: Adrian Hunter Cc: Alexander Antonov Cc: Alexander Shishkin Cc: Alexei Budankov Cc: Andi Kleen Cc: Ingo Molnar Cc: Namhyung Kim Cc: Peter Zijlstra Link: https://lore.kernel.org/r/5c7bdebfaadd7fcb729bd999b181feccaa292e8e.1634113027.git.alexey.v.bayduraev@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/session.c | 63 ++++++++++++++++++++++++++--------------------- 1 file changed, 35 insertions(+), 28 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index bdf1da9ea418..f927bcd39b0a 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -2176,6 +2176,13 @@ struct reader { u64 data_offset; reader_cb_t process; bool in_place_update; + char *mmaps[NUM_MMAPS]; + size_t mmap_size; + int mmap_idx; + char *mmap_cur; + u64 file_pos; + u64 file_offset; + u64 head; }; static int @@ -2183,28 +2190,27 @@ reader__process_events(struct reader *rd, struct perf_session *session, struct ui_progress *prog) { u64 data_size = rd->data_size; - u64 head, page_offset, file_offset, file_pos, size; - int err = 0, mmap_prot, mmap_flags, map_idx = 0; - size_t mmap_size; - char *buf, *mmaps[NUM_MMAPS]; + u64 page_offset, size; + int err = 0, mmap_prot, mmap_flags; + char *buf, **mmaps = rd->mmaps; union perf_event *event; s64 skip; page_offset = page_size * (rd->data_offset / page_size); - file_offset = page_offset; - head = rd->data_offset - page_offset; + rd->file_offset = page_offset; + rd->head = rd->data_offset - page_offset; ui_progress__init_size(prog, data_size, "Processing events..."); data_size += rd->data_offset; - mmap_size = MMAP_SIZE; - if (mmap_size > data_size) { - mmap_size = data_size; + rd->mmap_size = MMAP_SIZE; + if (rd->mmap_size > data_size) { + rd->mmap_size = data_size; session->one_mmap = true; } - memset(mmaps, 0, sizeof(mmaps)); + memset(mmaps, 0, sizeof(rd->mmaps)); mmap_prot = PROT_READ; mmap_flags = MAP_SHARED; @@ -2216,35 +2222,36 @@ reader__process_events(struct reader *rd, struct perf_session *session, mmap_flags = MAP_PRIVATE; } remap: - buf = mmap(NULL, mmap_size, mmap_prot, mmap_flags, rd->fd, - file_offset); + buf = mmap(NULL, rd->mmap_size, mmap_prot, mmap_flags, rd->fd, + rd->file_offset); if (buf == MAP_FAILED) { pr_err("failed to mmap file\n"); err = -errno; goto out; } - mmaps[map_idx] = buf; - map_idx = (map_idx + 1) & (ARRAY_SIZE(mmaps) - 1); - file_pos = file_offset + head; + mmaps[rd->mmap_idx] = rd->mmap_cur = buf; + rd->mmap_idx = (rd->mmap_idx + 1) & (ARRAY_SIZE(rd->mmaps) - 1); + rd->file_pos = rd->file_offset + rd->head; if (session->one_mmap) { session->one_mmap_addr = buf; - session->one_mmap_offset = file_offset; + session->one_mmap_offset = rd->file_offset; } more: - event = fetch_mmaped_event(head, mmap_size, buf, session->header.needs_swap); + event = fetch_mmaped_event(rd->head, rd->mmap_size, rd->mmap_cur, + session->header.needs_swap); if (IS_ERR(event)) return PTR_ERR(event); if (!event) { - if (mmaps[map_idx]) { - munmap(mmaps[map_idx], mmap_size); - mmaps[map_idx] = NULL; + if (mmaps[rd->mmap_idx]) { + munmap(mmaps[rd->mmap_idx], rd->mmap_size); + mmaps[rd->mmap_idx] = NULL; } - page_offset = page_size * (head / page_size); - file_offset += page_offset; - head -= page_offset; + page_offset = page_size * (rd->head / page_size); + rd->file_offset += page_offset; + rd->head -= page_offset; goto remap; } @@ -2253,9 +2260,9 @@ more: skip = -EINVAL; if (size < sizeof(struct perf_event_header) || - (skip = rd->process(session, event, file_pos)) < 0) { + (skip = rd->process(session, event, rd->file_pos)) < 0) { pr_err("%#" PRIx64 " [%#x]: failed to process type: %d [%s]\n", - file_offset + head, event->header.size, + rd->file_offset + rd->head, event->header.size, event->header.type, strerror(-skip)); err = skip; goto out; @@ -2264,8 +2271,8 @@ more: if (skip) size += skip; - head += size; - file_pos += size; + rd->head += size; + rd->file_pos += size; err = __perf_session__process_decomp_events(session); if (err) @@ -2276,7 +2283,7 @@ more: if (session_done()) goto out; - if (file_pos < data_size) + if (rd->file_pos < data_size) goto more; out: -- cgit v1.2.3 From 3a3535e67dfdc29b4a2b455220244fb776f1df61 Mon Sep 17 00:00:00 2001 From: Alexey Bayduraev Date: Wed, 13 Oct 2021 12:06:36 +0300 Subject: perf session: Introduce decompressor in reader object Introduce a decompressor data structure with pointers to decomp objects and to zstd object. We cannot just move session->zstd_data to decomp_data as session->zstd_data is not only used for decompression. Adding decompressor data object to reader object and introducing active_decomp into perf_session object to select current decompressor. Thus decompression could be executed separately for each data file. Reviewed-by: Jiri Olsa Reviewed-by: Riccardo Mancini Signed-off-by: Alexey Bayduraev Tested-by: Riccardo Mancini Acked-by: Namhyung Kim Cc: Adrian Hunter Cc: Alexander Antonov Cc: Alexander Shishkin Cc: Alexei Budankov Cc: Andi Kleen Cc: Ingo Molnar Cc: Namhyung Kim Cc: Peter Zijlstra Link: https://lore.kernel.org/r/0eee270cb52aebcbd029c8445d9009fd17709d53.1634113027.git.alexey.v.bayduraev@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/session.c | 39 +++++++++++++++++++++++++-------------- tools/perf/util/session.h | 10 ++++++++-- 2 files changed, 33 insertions(+), 16 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index f927bcd39b0a..79a447130193 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -44,7 +44,7 @@ static int perf_session__process_compressed_event(struct perf_session *session, size_t decomp_size, src_size; u64 decomp_last_rem = 0; size_t mmap_len, decomp_len = session->header.env.comp_mmap_len; - struct decomp *decomp, *decomp_last = session->decomp_last; + struct decomp *decomp, *decomp_last = session->active_decomp->decomp_last; if (decomp_last) { decomp_last_rem = decomp_last->size - decomp_last->head; @@ -71,7 +71,7 @@ static int perf_session__process_compressed_event(struct perf_session *session, src = (void *)event + sizeof(struct perf_record_compressed); src_size = event->pack.header.size - sizeof(struct perf_record_compressed); - decomp_size = zstd_decompress_stream(&(session->zstd_data), src, src_size, + decomp_size = zstd_decompress_stream(session->active_decomp->zstd_decomp, src, src_size, &(decomp->data[decomp_last_rem]), decomp_len - decomp_last_rem); if (!decomp_size) { munmap(decomp, mmap_len); @@ -81,13 +81,12 @@ static int perf_session__process_compressed_event(struct perf_session *session, decomp->size += decomp_size; - if (session->decomp == NULL) { - session->decomp = decomp; - session->decomp_last = decomp; - } else { - session->decomp_last->next = decomp; - session->decomp_last = decomp; - } + if (session->active_decomp->decomp == NULL) + session->active_decomp->decomp = decomp; + else + session->active_decomp->decomp_last->next = decomp; + + session->active_decomp->decomp_last = decomp; pr_debug("decomp (B): %zd to %zd\n", src_size, decomp_size); @@ -197,6 +196,8 @@ struct perf_session *__perf_session__new(struct perf_data *data, session->repipe = repipe; session->tool = tool; + session->decomp_data.zstd_decomp = &session->zstd_data; + session->active_decomp = &session->decomp_data; INIT_LIST_HEAD(&session->auxtrace_index); machines__init(&session->machines); ordered_events__init(&session->ordered_events, @@ -276,11 +277,11 @@ static void perf_session__delete_threads(struct perf_session *session) machine__delete_threads(&session->machines.host); } -static void perf_session__release_decomp_events(struct perf_session *session) +static void perf_decomp__release_events(struct decomp *next) { - struct decomp *next, *decomp; + struct decomp *decomp; size_t mmap_len; - next = session->decomp; + do { decomp = next; if (decomp == NULL) @@ -299,7 +300,7 @@ void perf_session__delete(struct perf_session *session) auxtrace_index__free(&session->auxtrace_index); perf_session__destroy_kernel_maps(session); perf_session__delete_threads(session); - perf_session__release_decomp_events(session); + perf_decomp__release_events(session->decomp_data.decomp); perf_env__exit(&session->header.env); machines__exit(&session->machines); if (session->data) { @@ -2122,7 +2123,7 @@ static int __perf_session__process_decomp_events(struct perf_session *session) { s64 skip; u64 size, file_pos = 0; - struct decomp *decomp = session->decomp_last; + struct decomp *decomp = session->active_decomp->decomp_last; if (!decomp) return 0; @@ -2183,6 +2184,8 @@ struct reader { u64 file_pos; u64 file_offset; u64 head; + struct zstd_data zstd_data; + struct decomp_data decomp_data; }; static int @@ -2212,6 +2215,11 @@ reader__process_events(struct reader *rd, struct perf_session *session, memset(mmaps, 0, sizeof(rd->mmaps)); + if (zstd_init(&rd->zstd_data, 0)) + return -1; + rd->decomp_data.zstd_decomp = &rd->zstd_data; + session->active_decomp = &rd->decomp_data; + mmap_prot = PROT_READ; mmap_flags = MAP_SHARED; @@ -2287,6 +2295,7 @@ more: goto more; out: + session->active_decomp = &session->decomp_data; return err; } @@ -2339,6 +2348,8 @@ out_err: */ ordered_events__reinit(&session->ordered_events); auxtrace__free_events(session); + perf_decomp__release_events(rd.decomp_data.decomp); + zstd_fini(&rd.zstd_data); session->one_mmap = false; return err; } diff --git a/tools/perf/util/session.h b/tools/perf/util/session.h index 5d8bd14a0a39..46c854292ad6 100644 --- a/tools/perf/util/session.h +++ b/tools/perf/util/session.h @@ -20,6 +20,12 @@ struct thread; struct auxtrace; struct itrace_synth_opts; +struct decomp_data { + struct decomp *decomp; + struct decomp *decomp_last; + struct zstd_data *zstd_decomp; +}; + struct perf_session { struct perf_header header; struct machines machines; @@ -39,8 +45,8 @@ struct perf_session { u64 bytes_transferred; u64 bytes_compressed; struct zstd_data zstd_data; - struct decomp *decomp; - struct decomp *decomp_last; + struct decomp_data decomp_data; + struct decomp_data *active_decomp; }; struct decomp { -- cgit v1.2.3 From 5965063094944be751a7bff6ccd3e404c14b65cc Mon Sep 17 00:00:00 2001 From: Alexey Bayduraev Date: Wed, 13 Oct 2021 12:06:37 +0300 Subject: perf session: Move init/release code to separate functions Separate init/release code into reader__init() and reader__release_decomp() functions. Remove a duplicate call to ui_progress__init_size(), the same call can be found in __perf_session__process_events(). For multiple traces ui_progress should be initialized by total size before reader__init() calls. Suggested-by: Jiri Olsa Reviewed-by: Jiri Olsa Reviewed-by: Riccardo Mancini Signed-off-by: Alexey Bayduraev Tested-by: Riccardo Mancini Acked-by: Namhyung Kim Cc: Adrian Hunter Cc: Alexander Antonov Cc: Alexander Shishkin Cc: Alexei Budankov Cc: Andi Kleen Cc: Ingo Molnar Cc: Namhyung Kim Cc: Peter Zijlstra Link: https://lore.kernel.org/r/8bacf247de220be8e57af1d2b796322175f5e257.1634113027.git.alexey.v.bayduraev@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/session.c | 45 ++++++++++++++++++++++++++++++++------------- 1 file changed, 32 insertions(+), 13 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index 79a447130193..b315febea052 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -2189,28 +2189,23 @@ struct reader { }; static int -reader__process_events(struct reader *rd, struct perf_session *session, - struct ui_progress *prog) +reader__init(struct reader *rd, bool *one_mmap) { u64 data_size = rd->data_size; - u64 page_offset, size; - int err = 0, mmap_prot, mmap_flags; - char *buf, **mmaps = rd->mmaps; - union perf_event *event; - s64 skip; + u64 page_offset; + char **mmaps = rd->mmaps; page_offset = page_size * (rd->data_offset / page_size); rd->file_offset = page_offset; rd->head = rd->data_offset - page_offset; - ui_progress__init_size(prog, data_size, "Processing events..."); - data_size += rd->data_offset; rd->mmap_size = MMAP_SIZE; if (rd->mmap_size > data_size) { rd->mmap_size = data_size; - session->one_mmap = true; + if (one_mmap) + *one_mmap = true; } memset(mmaps, 0, sizeof(rd->mmaps)); @@ -2218,6 +2213,31 @@ reader__process_events(struct reader *rd, struct perf_session *session, if (zstd_init(&rd->zstd_data, 0)) return -1; rd->decomp_data.zstd_decomp = &rd->zstd_data; + + return 0; +} + +static void +reader__release_decomp(struct reader *rd) +{ + perf_decomp__release_events(rd->decomp_data.decomp); + zstd_fini(&rd->zstd_data); +} + +static int +reader__process_events(struct reader *rd, struct perf_session *session, + struct ui_progress *prog) +{ + u64 page_offset, size; + int err = 0, mmap_prot, mmap_flags; + char *buf, **mmaps = rd->mmaps; + union perf_event *event; + s64 skip; + + err = reader__init(rd, &session->one_mmap); + if (err) + goto out; + session->active_decomp = &rd->decomp_data; mmap_prot = PROT_READ; @@ -2291,7 +2311,7 @@ more: if (session_done()) goto out; - if (rd->file_pos < data_size) + if (rd->file_pos < rd->data_size + rd->data_offset) goto more; out: @@ -2348,8 +2368,7 @@ out_err: */ ordered_events__reinit(&session->ordered_events); auxtrace__free_events(session); - perf_decomp__release_events(rd.decomp_data.decomp); - zstd_fini(&rd.zstd_data); + reader__release_decomp(&rd); session->one_mmap = false; return err; } -- cgit v1.2.3 From 06763e7b30d9b502c48ad982851a195781aeff81 Mon Sep 17 00:00:00 2001 From: Alexey Bayduraev Date: Wed, 13 Oct 2021 12:06:38 +0300 Subject: perf session: Move reader map code to a separate function Move the mapping code into a separate reader__mmap() function. Suggested-by: Jiri Olsa Reviewed-by: Jiri Olsa Reviewed-by: Riccardo Mancini Signed-off-by: Alexey Bayduraev Tested-by: Riccardo Mancini Acked-by: Namhyung Kim Cc: Adrian Hunter Cc: Alexander Antonov Cc: Alexander Shishkin Cc: Alexei Budankov Cc: Andi Kleen Cc: Ingo Molnar Cc: Namhyung Kim Cc: Peter Zijlstra Link: https://lore.kernel.org/r/e445de5bb85bbd91287986802d6ed0ce1b419b5a.1634113027.git.alexey.v.bayduraev@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/session.c | 43 ++++++++++++++++++++++++++++--------------- 1 file changed, 28 insertions(+), 15 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index b315febea052..1abe870223f6 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -2225,20 +2225,10 @@ reader__release_decomp(struct reader *rd) } static int -reader__process_events(struct reader *rd, struct perf_session *session, - struct ui_progress *prog) +reader__mmap(struct reader *rd, struct perf_session *session) { - u64 page_offset, size; - int err = 0, mmap_prot, mmap_flags; + int mmap_prot, mmap_flags; char *buf, **mmaps = rd->mmaps; - union perf_event *event; - s64 skip; - - err = reader__init(rd, &session->one_mmap); - if (err) - goto out; - - session->active_decomp = &rd->decomp_data; mmap_prot = PROT_READ; mmap_flags = MAP_SHARED; @@ -2249,13 +2239,12 @@ reader__process_events(struct reader *rd, struct perf_session *session, mmap_prot |= PROT_WRITE; mmap_flags = MAP_PRIVATE; } -remap: + buf = mmap(NULL, rd->mmap_size, mmap_prot, mmap_flags, rd->fd, rd->file_offset); if (buf == MAP_FAILED) { pr_err("failed to mmap file\n"); - err = -errno; - goto out; + return -errno; } mmaps[rd->mmap_idx] = rd->mmap_cur = buf; rd->mmap_idx = (rd->mmap_idx + 1) & (ARRAY_SIZE(rd->mmaps) - 1); @@ -2265,6 +2254,30 @@ remap: session->one_mmap_offset = rd->file_offset; } + return 0; +} + +static int +reader__process_events(struct reader *rd, struct perf_session *session, + struct ui_progress *prog) +{ + u64 page_offset, size; + int err = 0; + char **mmaps = rd->mmaps; + union perf_event *event; + s64 skip; + + err = reader__init(rd, &session->one_mmap); + if (err) + goto out; + + session->active_decomp = &rd->decomp_data; + +remap: + err = reader__mmap(rd, session); + if (err) + goto out; + more: event = fetch_mmaped_event(rd->head, rd->mmap_size, rd->mmap_cur, session->header.needs_swap); -- cgit v1.2.3 From de096489d00f7764934906d0b6688783be6b9dc0 Mon Sep 17 00:00:00 2001 From: Alexey Bayduraev Date: Wed, 13 Oct 2021 12:06:39 +0300 Subject: perf session: Move unmap code to reader__mmap Move the unmapping code to reader__mmap(), so that the mmap code is located together. Move the head/file_offset computation to reader__mmap(), so all the offset computation is located together and in one place only. Suggested-by: Jiri Olsa Reviewed-by: Jiri Olsa Reviewed-by: Riccardo Mancini Signed-off-by: Alexey Bayduraev Tested-by: Riccardo Mancini Acked-by: Namhyung Kim Cc: Adrian Hunter Cc: Alexander Antonov Cc: Alexander Shishkin Cc: Alexei Budankov Cc: Andi Kleen Cc: Ingo Molnar Cc: Namhyung Kim Cc: Peter Zijlstra Link: https://lore.kernel.org/r/f1c5e17cfa1ecfe912d10b411be203b55d148bc7.1634113027.git.alexey.v.bayduraev@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/session.c | 30 +++++++++++++----------------- 1 file changed, 13 insertions(+), 17 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index 1abe870223f6..4cbe66366d17 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -2192,13 +2192,9 @@ static int reader__init(struct reader *rd, bool *one_mmap) { u64 data_size = rd->data_size; - u64 page_offset; char **mmaps = rd->mmaps; - page_offset = page_size * (rd->data_offset / page_size); - rd->file_offset = page_offset; - rd->head = rd->data_offset - page_offset; - + rd->head = rd->data_offset; data_size += rd->data_offset; rd->mmap_size = MMAP_SIZE; @@ -2229,6 +2225,7 @@ reader__mmap(struct reader *rd, struct perf_session *session) { int mmap_prot, mmap_flags; char *buf, **mmaps = rd->mmaps; + u64 page_offset; mmap_prot = PROT_READ; mmap_flags = MAP_SHARED; @@ -2240,6 +2237,15 @@ reader__mmap(struct reader *rd, struct perf_session *session) mmap_flags = MAP_PRIVATE; } + if (mmaps[rd->mmap_idx]) { + munmap(mmaps[rd->mmap_idx], rd->mmap_size); + mmaps[rd->mmap_idx] = NULL; + } + + page_offset = page_size * (rd->head / page_size); + rd->file_offset += page_offset; + rd->head -= page_offset; + buf = mmap(NULL, rd->mmap_size, mmap_prot, mmap_flags, rd->fd, rd->file_offset); if (buf == MAP_FAILED) { @@ -2261,9 +2267,8 @@ static int reader__process_events(struct reader *rd, struct perf_session *session, struct ui_progress *prog) { - u64 page_offset, size; + u64 size; int err = 0; - char **mmaps = rd->mmaps; union perf_event *event; s64 skip; @@ -2284,17 +2289,8 @@ more: if (IS_ERR(event)) return PTR_ERR(event); - if (!event) { - if (mmaps[rd->mmap_idx]) { - munmap(mmaps[rd->mmap_idx], rd->mmap_size); - mmaps[rd->mmap_idx] = NULL; - } - - page_offset = page_size * (rd->head / page_size); - rd->file_offset += page_offset; - rd->head -= page_offset; + if (!event) goto remap; - } size = event->header.size; -- cgit v1.2.3 From 5c10dc9244fe37855002f43297ff338d0fd253e2 Mon Sep 17 00:00:00 2001 From: Alexey Bayduraev Date: Wed, 13 Oct 2021 12:06:40 +0300 Subject: perf session: Move the event read code to a separate function Separate the reading code of a single event to a new reader__read_event() function. Suggested-by: Jiri Olsa Reviewed-by: Jiri Olsa Reviewed-by: Riccardo Mancini Signed-off-by: Alexey Bayduraev Tested-by: Riccardo Mancini Acked-by: Namhyung Kim Cc: Adrian Hunter Cc: Alexander Antonov Cc: Alexander Shishkin Cc: Alexei Budankov Cc: Andi Kleen Cc: Ingo Molnar Cc: Namhyung Kim Cc: Peter Zijlstra Link: https://lore.kernel.org/r/ffe570d937138dd24f282978ce7ed9c46a06ff9b.1634113027.git.alexey.v.bayduraev@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/session.c | 46 +++++++++++++++++++++++++++++++--------------- 1 file changed, 31 insertions(+), 15 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index 4cbe66366d17..d5a8811cf8ce 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -2264,33 +2264,21 @@ reader__mmap(struct reader *rd, struct perf_session *session) } static int -reader__process_events(struct reader *rd, struct perf_session *session, - struct ui_progress *prog) +reader__read_event(struct reader *rd, struct perf_session *session, + struct ui_progress *prog) { u64 size; int err = 0; union perf_event *event; s64 skip; - err = reader__init(rd, &session->one_mmap); - if (err) - goto out; - - session->active_decomp = &rd->decomp_data; - -remap: - err = reader__mmap(rd, session); - if (err) - goto out; - -more: event = fetch_mmaped_event(rd->head, rd->mmap_size, rd->mmap_cur, session->header.needs_swap); if (IS_ERR(event)) return PTR_ERR(event); if (!event) - goto remap; + return 1; size = event->header.size; @@ -2317,6 +2305,34 @@ more: ui_progress__update(prog, size); +out: + return err; +} + +static int +reader__process_events(struct reader *rd, struct perf_session *session, + struct ui_progress *prog) +{ + int err; + + err = reader__init(rd, &session->one_mmap); + if (err) + goto out; + + session->active_decomp = &rd->decomp_data; + +remap: + err = reader__mmap(rd, session); + if (err) + goto out; + +more: + err = reader__read_event(rd, session, prog); + if (err < 0) + goto out; + else if (err == 1) + goto remap; + if (session_done()) goto out; -- cgit v1.2.3 From 4c0028864cd937ae604f0d62282899108061a6f1 Mon Sep 17 00:00:00 2001 From: Alexey Bayduraev Date: Wed, 13 Oct 2021 12:06:41 +0300 Subject: perf session: Introduce reader return codes Add READER_OK and READER_NODATA return codes to make the code more clear. Suggested-by: Jiri Olsa Reviewed-by: Jiri Olsa Reviewed-by: Riccardo Mancini Signed-off-by: Alexey Bayduraev Tested-by: Riccardo Mancini Acked-by: Namhyung Kim Cc: Adrian Hunter Cc: Alexander Antonov Cc: Alexander Shishkin Cc: Alexei Budankov Cc: Andi Kleen Cc: Ingo Molnar Cc: Namhyung Kim Cc: Peter Zijlstra Link: https://lore.kernel.org/r/5fca481e91c3c5d2ba033d4c6e9b969f8033ab0f.1634113027.git.alexey.v.bayduraev@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/session.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index d5a8811cf8ce..fee7a7171f1a 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -2263,12 +2263,17 @@ reader__mmap(struct reader *rd, struct perf_session *session) return 0; } +enum { + READER_OK, + READER_NODATA, +}; + static int reader__read_event(struct reader *rd, struct perf_session *session, struct ui_progress *prog) { u64 size; - int err = 0; + int err = READER_OK; union perf_event *event; s64 skip; @@ -2278,7 +2283,7 @@ reader__read_event(struct reader *rd, struct perf_session *session, return PTR_ERR(event); if (!event) - return 1; + return READER_NODATA; size = event->header.size; @@ -2330,7 +2335,7 @@ more: err = reader__read_event(rd, session, prog); if (err < 0) goto out; - else if (err == 1) + else if (err == READER_NODATA) goto remap; if (session_done()) -- cgit v1.2.3 From 25900ea85ceef35e19234682e7c9dfc8ca2addbe Mon Sep 17 00:00:00 2001 From: Alexey Bayduraev Date: Wed, 13 Oct 2021 12:06:42 +0300 Subject: perf session: Introduce reader EOF function Introduce function to check end-of-file status. Reviewed-by: Jiri Olsa Signed-off-by: Alexey Bayduraev Cc: Adrian Hunter Cc: Alexander Antonov Cc: Alexander Shishkin Cc: Alexei Budankov Cc: Andi Kleen Cc: Ingo Molnar Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Riccardo Mancini Link: https://lore.kernel.org/r/b3b0e0904da01f9ec84d4ae9368df99ecd231598.1634113027.git.alexey.v.bayduraev@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/session.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index fee7a7171f1a..51bf2efceb7f 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -2314,6 +2314,12 @@ out: return err; } +static inline bool +reader__eof(struct reader *rd) +{ + return (rd->file_pos >= rd->data_size + rd->data_offset); +} + static int reader__process_events(struct reader *rd, struct perf_session *session, struct ui_progress *prog) @@ -2341,7 +2347,7 @@ more: if (session_done()) goto out; - if (rd->file_pos < rd->data_size + rd->data_offset) + if (!reader__eof(rd)) goto more; out: -- cgit v1.2.3 From 637b8b90fe0d10036eb4adfdcb9a592d62fd0112 Mon Sep 17 00:00:00 2001 From: Athira Rajeev Date: Mon, 18 Oct 2021 17:19:47 +0530 Subject: perf powerpc: Refactor the code definition of perf reg extended mask in tools side header file PERF_REG_PMU_MASK_300 and PERF_REG_PMU_MASK_31 defines the mask value for extended registers. Current definition of these mask values uses hex constant and does not use registers by name, making it less readable. Patch refactor the macro values in perf tools side header file by or'ing together the actual register value constants. Suggested-by: Michael Ellerman Reviewed-by: Kajol Jain Signed-off-by: Athira Jajeev Cc: Jiri Olsa Cc: Madhavan Srinivasan Cc: Nageswara R Sastry Cc: linuxppc-dev@lists.ozlabs.org Link: https://lore.kernel.org/r/20211018114948.16830-2-atrajeev@linux.vnet.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/arch/powerpc/include/uapi/asm/perf_regs.h | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) (limited to 'tools') diff --git a/tools/arch/powerpc/include/uapi/asm/perf_regs.h b/tools/arch/powerpc/include/uapi/asm/perf_regs.h index 578b3ee86105..085094553f3b 100644 --- a/tools/arch/powerpc/include/uapi/asm/perf_regs.h +++ b/tools/arch/powerpc/include/uapi/asm/perf_regs.h @@ -61,27 +61,32 @@ enum perf_event_powerpc_regs { PERF_REG_POWERPC_PMC4, PERF_REG_POWERPC_PMC5, PERF_REG_POWERPC_PMC6, - /* Max regs without the extended regs */ + /* Max mask value for interrupt regs w/o extended regs */ PERF_REG_POWERPC_MAX = PERF_REG_POWERPC_MMCRA + 1, + /* Max mask value for interrupt regs including extended regs */ + PERF_REG_EXTENDED_MAX = PERF_REG_POWERPC_PMC6 + 1, }; #define PERF_REG_PMU_MASK ((1ULL << PERF_REG_POWERPC_MAX) - 1) -/* Exclude MMCR3, SIER2, SIER3 for CPU_FTR_ARCH_300 */ -#define PERF_EXCLUDE_REG_EXT_300 (7ULL << PERF_REG_POWERPC_MMCR3) - /* * PERF_REG_EXTENDED_MASK value for CPU_FTR_ARCH_300 * includes 9 SPRS from MMCR0 to PMC6 excluding the - * unsupported SPRS in PERF_EXCLUDE_REG_EXT_300. + * unsupported SPRS MMCR3, SIER2 and SIER3. */ -#define PERF_REG_PMU_MASK_300 ((0xfffULL << PERF_REG_POWERPC_MMCR0) - PERF_EXCLUDE_REG_EXT_300) +#define PERF_REG_PMU_MASK_300 \ + ((1ULL << PERF_REG_POWERPC_MMCR0) | (1ULL << PERF_REG_POWERPC_MMCR1) | \ + (1ULL << PERF_REG_POWERPC_MMCR2) | (1ULL << PERF_REG_POWERPC_PMC1) | \ + (1ULL << PERF_REG_POWERPC_PMC2) | (1ULL << PERF_REG_POWERPC_PMC3) | \ + (1ULL << PERF_REG_POWERPC_PMC4) | (1ULL << PERF_REG_POWERPC_PMC5) | \ + (1ULL << PERF_REG_POWERPC_PMC6)) /* * PERF_REG_EXTENDED_MASK value for CPU_FTR_ARCH_31 * includes 12 SPRs from MMCR0 to PMC6. */ -#define PERF_REG_PMU_MASK_31 (0xfffULL << PERF_REG_POWERPC_MMCR0) +#define PERF_REG_PMU_MASK_31 \ + (PERF_REG_PMU_MASK_300 | (1ULL << PERF_REG_POWERPC_MMCR3) | \ + (1ULL << PERF_REG_POWERPC_SIER2) | (1ULL << PERF_REG_POWERPC_SIER3)) -#define PERF_REG_EXTENDED_MAX (PERF_REG_POWERPC_PMC6 + 1) #endif /* _UAPI_ASM_POWERPC_PERF_REGS_H */ -- cgit v1.2.3 From 83e1ada67a597caa4c9693512991520e923c2b9a Mon Sep 17 00:00:00 2001 From: Athira Rajeev Date: Mon, 18 Oct 2021 17:19:48 +0530 Subject: perf powerpc: Add support to expose instruction and data address registers as part of extended regs This patch enables presenting Sampled Instruction Address Register (SIAR) and Sampled Data Address Register (SDAR) SPRs as part of extended registers for the perf tool. Add these SPR's to sample_reg_mask in the tool side (to use with -I? option). Reviewed-by: Kajol Jain Signed-off-by: Athira Jajeev Cc: Jiri Olsa Cc: Madhavan Srinivasan Cc: Michael Ellerman Cc: Nageswara R Sastry Cc: linuxppc-dev@lists.ozlabs.org Link: https://lore.kernel.org/r/20211018114948.16830-3-atrajeev@linux.vnet.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/arch/powerpc/include/uapi/asm/perf_regs.h | 11 +++++++---- tools/perf/arch/powerpc/include/perf_regs.h | 2 ++ tools/perf/arch/powerpc/util/perf_regs.c | 2 ++ 3 files changed, 11 insertions(+), 4 deletions(-) (limited to 'tools') diff --git a/tools/arch/powerpc/include/uapi/asm/perf_regs.h b/tools/arch/powerpc/include/uapi/asm/perf_regs.h index 085094553f3b..749a2e3af89e 100644 --- a/tools/arch/powerpc/include/uapi/asm/perf_regs.h +++ b/tools/arch/powerpc/include/uapi/asm/perf_regs.h @@ -61,17 +61,19 @@ enum perf_event_powerpc_regs { PERF_REG_POWERPC_PMC4, PERF_REG_POWERPC_PMC5, PERF_REG_POWERPC_PMC6, + PERF_REG_POWERPC_SDAR, + PERF_REG_POWERPC_SIAR, /* Max mask value for interrupt regs w/o extended regs */ PERF_REG_POWERPC_MAX = PERF_REG_POWERPC_MMCRA + 1, /* Max mask value for interrupt regs including extended regs */ - PERF_REG_EXTENDED_MAX = PERF_REG_POWERPC_PMC6 + 1, + PERF_REG_EXTENDED_MAX = PERF_REG_POWERPC_SIAR + 1, }; #define PERF_REG_PMU_MASK ((1ULL << PERF_REG_POWERPC_MAX) - 1) /* * PERF_REG_EXTENDED_MASK value for CPU_FTR_ARCH_300 - * includes 9 SPRS from MMCR0 to PMC6 excluding the + * includes 11 SPRS from MMCR0 to SIAR excluding the * unsupported SPRS MMCR3, SIER2 and SIER3. */ #define PERF_REG_PMU_MASK_300 \ @@ -79,11 +81,12 @@ enum perf_event_powerpc_regs { (1ULL << PERF_REG_POWERPC_MMCR2) | (1ULL << PERF_REG_POWERPC_PMC1) | \ (1ULL << PERF_REG_POWERPC_PMC2) | (1ULL << PERF_REG_POWERPC_PMC3) | \ (1ULL << PERF_REG_POWERPC_PMC4) | (1ULL << PERF_REG_POWERPC_PMC5) | \ - (1ULL << PERF_REG_POWERPC_PMC6)) + (1ULL << PERF_REG_POWERPC_PMC6) | (1ULL << PERF_REG_POWERPC_SDAR) | \ + (1ULL << PERF_REG_POWERPC_SIAR)) /* * PERF_REG_EXTENDED_MASK value for CPU_FTR_ARCH_31 - * includes 12 SPRs from MMCR0 to PMC6. + * includes 14 SPRs from MMCR0 to SIAR. */ #define PERF_REG_PMU_MASK_31 \ (PERF_REG_PMU_MASK_300 | (1ULL << PERF_REG_POWERPC_MMCR3) | \ diff --git a/tools/perf/arch/powerpc/include/perf_regs.h b/tools/perf/arch/powerpc/include/perf_regs.h index 04e5dc07e93f..93339d17acc4 100644 --- a/tools/perf/arch/powerpc/include/perf_regs.h +++ b/tools/perf/arch/powerpc/include/perf_regs.h @@ -77,6 +77,8 @@ static const char *reg_names[] = { [PERF_REG_POWERPC_PMC4] = "pmc4", [PERF_REG_POWERPC_PMC5] = "pmc5", [PERF_REG_POWERPC_PMC6] = "pmc6", + [PERF_REG_POWERPC_SDAR] = "sdar", + [PERF_REG_POWERPC_SIAR] = "siar", }; static inline const char *__perf_reg_name(int id) diff --git a/tools/perf/arch/powerpc/util/perf_regs.c b/tools/perf/arch/powerpc/util/perf_regs.c index 8116a253f91f..8d07a78e742a 100644 --- a/tools/perf/arch/powerpc/util/perf_regs.c +++ b/tools/perf/arch/powerpc/util/perf_regs.c @@ -74,6 +74,8 @@ const struct sample_reg sample_reg_masks[] = { SMPL_REG(pmc4, PERF_REG_POWERPC_PMC4), SMPL_REG(pmc5, PERF_REG_POWERPC_PMC5), SMPL_REG(pmc6, PERF_REG_POWERPC_PMC6), + SMPL_REG(sdar, PERF_REG_POWERPC_SDAR), + SMPL_REG(siar, PERF_REG_POWERPC_SIAR), SMPL_REG_END }; -- cgit v1.2.3 From 0e0ae8742207c3b477cf0357b8115cec7b19612c Mon Sep 17 00:00:00 2001 From: Jin Yao Date: Fri, 3 Sep 2021 10:52:39 +0800 Subject: perf list: Display hybrid PMU events with cpu type Add a new option '--cputype' to 'perf list' to display core-only PMU events or atom-only PMU events. Each hybrid PMU event has been assigned with a PMU name, this patch compares the PMU name before listing the result. For example: perf list --cputype atom ... cache: core_reject_l2q.any [Counts the number of request that were not accepted into the L2Q because the L2Q is FULL. Unit: cpu_atom] ... The "Unit: cpu_atom" is displayed in the brief description section to indicate this is an atom event. Signed-off-by: Jin Yao Cc: Alexander Shishkin Cc: Andi Kleen Cc: Jin Yao Cc: Jiri Olsa Cc: Kan Liang Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/20210903025239.22754-1-yao.jin@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-list.txt | 4 ++++ tools/perf/builtin-list.c | 42 ++++++++++++++++++++++++---------- tools/perf/util/metricgroup.c | 7 +++++- tools/perf/util/metricgroup.h | 2 +- tools/perf/util/parse-events.c | 8 ++++--- tools/perf/util/parse-events.h | 3 ++- tools/perf/util/pmu.c | 29 +++++++++++++++++++---- tools/perf/util/pmu.h | 2 +- 8 files changed, 73 insertions(+), 24 deletions(-) (limited to 'tools') diff --git a/tools/perf/Documentation/perf-list.txt b/tools/perf/Documentation/perf-list.txt index 4c7db1da8fcc..4dc8d0af19df 100644 --- a/tools/perf/Documentation/perf-list.txt +++ b/tools/perf/Documentation/perf-list.txt @@ -39,6 +39,10 @@ any extra expressions computed by perf stat. --deprecated:: Print deprecated events. By default the deprecated events are hidden. +--cputype:: +Print events applying cpu with this type for hybrid platform +(e.g. --cputype core or --cputype atom) + [[EVENT_MODIFIERS]] EVENT MODIFIERS --------------- diff --git a/tools/perf/builtin-list.c b/tools/perf/builtin-list.c index 10ab5e40a34f..468958154ed9 100644 --- a/tools/perf/builtin-list.c +++ b/tools/perf/builtin-list.c @@ -12,6 +12,7 @@ #include "util/parse-events.h" #include "util/pmu.h" +#include "util/pmu-hybrid.h" #include "util/debug.h" #include "util/metricgroup.h" #include @@ -20,13 +21,15 @@ static bool desc_flag = true; static bool details_flag; +static const char *hybrid_type; int cmd_list(int argc, const char **argv) { - int i; + int i, ret = 0; bool raw_dump = false; bool long_desc_flag = false; bool deprecated = false; + char *pmu_name = NULL; struct option list_options[] = { OPT_BOOLEAN(0, "raw-dump", &raw_dump, "Dump raw events"), OPT_BOOLEAN('d', "desc", &desc_flag, @@ -37,6 +40,9 @@ int cmd_list(int argc, const char **argv) "Print information on the perf event names and expressions used internally by events."), OPT_BOOLEAN(0, "deprecated", &deprecated, "Print deprecated events."), + OPT_STRING(0, "cputype", &hybrid_type, "hybrid cpu type", + "Print events applying cpu with this type for hybrid platform " + "(e.g. core or atom)"), OPT_INCR(0, "debug", &verbose, "Enable debugging output"), OPT_END() @@ -56,10 +62,16 @@ int cmd_list(int argc, const char **argv) if (!raw_dump && pager_in_use()) printf("\nList of pre-defined events (to be used in -e):\n\n"); + if (hybrid_type) { + pmu_name = perf_pmu__hybrid_type_to_pmu(hybrid_type); + if (!pmu_name) + pr_warning("WARNING: hybrid cputype is not supported!\n"); + } + if (argc == 0) { print_events(NULL, raw_dump, !desc_flag, long_desc_flag, - details_flag, deprecated); - return 0; + details_flag, deprecated, pmu_name); + goto out; } for (i = 0; i < argc; ++i) { @@ -82,25 +94,27 @@ int cmd_list(int argc, const char **argv) else if (strcmp(argv[i], "pmu") == 0) print_pmu_events(NULL, raw_dump, !desc_flag, long_desc_flag, details_flag, - deprecated); + deprecated, pmu_name); else if (strcmp(argv[i], "sdt") == 0) print_sdt_events(NULL, NULL, raw_dump); else if (strcmp(argv[i], "metric") == 0 || strcmp(argv[i], "metrics") == 0) - metricgroup__print(true, false, NULL, raw_dump, details_flag); + metricgroup__print(true, false, NULL, raw_dump, details_flag, pmu_name); else if (strcmp(argv[i], "metricgroup") == 0 || strcmp(argv[i], "metricgroups") == 0) - metricgroup__print(false, true, NULL, raw_dump, details_flag); + metricgroup__print(false, true, NULL, raw_dump, details_flag, pmu_name); else if ((sep = strchr(argv[i], ':')) != NULL) { int sep_idx; sep_idx = sep - argv[i]; s = strdup(argv[i]); - if (s == NULL) - return -1; + if (s == NULL) { + ret = -1; + goto out; + } s[sep_idx] = '\0'; print_tracepoint_events(s, s + sep_idx + 1, raw_dump); print_sdt_events(s, s + sep_idx + 1, raw_dump); - metricgroup__print(true, true, s, raw_dump, details_flag); + metricgroup__print(true, true, s, raw_dump, details_flag, pmu_name); free(s); } else { if (asprintf(&s, "*%s*", argv[i]) < 0) { @@ -116,12 +130,16 @@ int cmd_list(int argc, const char **argv) print_pmu_events(s, raw_dump, !desc_flag, long_desc_flag, details_flag, - deprecated); + deprecated, + pmu_name); print_tracepoint_events(NULL, s, raw_dump); print_sdt_events(NULL, s, raw_dump); - metricgroup__print(true, true, s, raw_dump, details_flag); + metricgroup__print(true, true, s, raw_dump, details_flag, pmu_name); free(s); } } - return 0; + +out: + free(pmu_name); + return ret; } diff --git a/tools/perf/util/metricgroup.c b/tools/perf/util/metricgroup.c index c34a3683e719..4917e9704765 100644 --- a/tools/perf/util/metricgroup.c +++ b/tools/perf/util/metricgroup.c @@ -11,6 +11,7 @@ #include "evsel.h" #include "strbuf.h" #include "pmu.h" +#include "pmu-hybrid.h" #include "expr.h" #include "rblist.h" #include @@ -535,7 +536,7 @@ static int metricgroup__print_sys_event_iter(const struct pmu_event *pe, void *d } void metricgroup__print(bool metrics, bool metricgroups, char *filter, - bool raw, bool details) + bool raw, bool details, const char *pmu_name) { const struct pmu_events_map *map = pmu_events_map__find(); const struct pmu_event *pe; @@ -561,6 +562,10 @@ void metricgroup__print(bool metrics, bool metricgroups, char *filter, break; if (!pe->metric_expr) continue; + if (pmu_name && perf_pmu__is_hybrid(pe->pmu) && + strcmp(pmu_name, pe->pmu)) { + continue; + } if (metricgroup__print_pmu_event(pe, metricgroups, filter, raw, details, &groups, metriclist) < 0) diff --git a/tools/perf/util/metricgroup.h b/tools/perf/util/metricgroup.h index 3a51a84f440b..2b42b778d1bf 100644 --- a/tools/perf/util/metricgroup.h +++ b/tools/perf/util/metricgroup.h @@ -80,7 +80,7 @@ int metricgroup__parse_groups_test(struct evlist *evlist, struct rblist *metric_events); void metricgroup__print(bool metrics, bool groups, char *filter, - bool raw, bool details); + bool raw, bool details, const char *pmu_name); bool metricgroup__has_metric(const char *metric); int arch_get_runtimeparam(const struct pmu_event *pe __maybe_unused); void metricgroup__rblist_exit(struct rblist *metric_events); diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 006a7f721549..278199ed788b 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -3089,7 +3089,8 @@ out_enomem: * Print the help text for the event symbols: */ void print_events(const char *event_glob, bool name_only, bool quiet_flag, - bool long_desc, bool details_flag, bool deprecated) + bool long_desc, bool details_flag, bool deprecated, + const char *pmu_name) { print_symbol_events(event_glob, PERF_TYPE_HARDWARE, event_symbols_hw, PERF_COUNT_HW_MAX, name_only); @@ -3101,7 +3102,7 @@ void print_events(const char *event_glob, bool name_only, bool quiet_flag, print_hwcache_events(event_glob, name_only); print_pmu_events(event_glob, name_only, quiet_flag, long_desc, - details_flag, deprecated); + details_flag, deprecated, pmu_name); if (event_glob != NULL) return; @@ -3127,7 +3128,8 @@ void print_events(const char *event_glob, bool name_only, bool quiet_flag, print_sdt_events(NULL, NULL, name_only); - metricgroup__print(true, true, NULL, name_only, details_flag); + metricgroup__print(true, true, NULL, name_only, details_flag, + pmu_name); print_libpfm_events(name_only, long_desc); } diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h index 07f879e525fe..f60a661a2247 100644 --- a/tools/perf/util/parse-events.h +++ b/tools/perf/util/parse-events.h @@ -221,7 +221,8 @@ void parse_events_evlist_error(struct parse_events_state *parse_state, int idx, const char *str); void print_events(const char *event_glob, bool name_only, bool quiet, - bool long_desc, bool details_flag, bool deprecated); + bool long_desc, bool details_flag, bool deprecated, + const char *pmu_name); struct event_symbol { const char *symbol; diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index 9b5039bf909a..f3072c71d132 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -1608,6 +1608,7 @@ static int cmp_sevent(const void *a, const void *b) { const struct sevent *as = a; const struct sevent *bs = b; + int ret; /* Put extra events last */ if (!!as->desc != !!bs->desc) @@ -1623,7 +1624,13 @@ static int cmp_sevent(const void *a, const void *b) if (as->is_cpu != bs->is_cpu) return bs->is_cpu - as->is_cpu; - return strcmp(as->name, bs->name); + ret = strcmp(as->name, bs->name); + if (!ret) { + if (as->pmu && bs->pmu) + return strcmp(as->pmu, bs->pmu); + } + + return ret; } static void wordwrap(char *s, int start, int max, int corr) @@ -1653,7 +1660,8 @@ bool is_pmu_core(const char *name) } void print_pmu_events(const char *event_glob, bool name_only, bool quiet_flag, - bool long_desc, bool details_flag, bool deprecated) + bool long_desc, bool details_flag, bool deprecated, + const char *pmu_name) { struct perf_pmu *pmu; struct perf_pmu_alias *alias; @@ -1679,10 +1687,16 @@ void print_pmu_events(const char *event_glob, bool name_only, bool quiet_flag, pmu = NULL; j = 0; while ((pmu = perf_pmu__scan(pmu)) != NULL) { + if (pmu_name && perf_pmu__is_hybrid(pmu->name) && + strcmp(pmu_name, pmu->name)) { + continue; + } + list_for_each_entry(alias, &pmu->aliases, list) { char *name = alias->desc ? alias->name : format_alias(buf, sizeof(buf), pmu, alias); - bool is_cpu = is_pmu_core(pmu->name); + bool is_cpu = is_pmu_core(pmu->name) || + perf_pmu__is_hybrid(pmu->name); if (alias->deprecated && !deprecated) continue; @@ -1730,8 +1744,13 @@ void print_pmu_events(const char *event_glob, bool name_only, bool quiet_flag, qsort(aliases, len, sizeof(struct sevent), cmp_sevent); for (j = 0; j < len; j++) { /* Skip duplicates */ - if (j > 0 && !strcmp(aliases[j].name, aliases[j - 1].name)) - continue; + if (j > 0 && !strcmp(aliases[j].name, aliases[j - 1].name)) { + if (!aliases[j].pmu || !aliases[j - 1].pmu || + !strcmp(aliases[j].pmu, aliases[j - 1].pmu)) { + continue; + } + } + if (name_only) { printf("%s ", aliases[j].name); continue; diff --git a/tools/perf/util/pmu.h b/tools/perf/util/pmu.h index f9743eab07b6..15bbec3a9959 100644 --- a/tools/perf/util/pmu.h +++ b/tools/perf/util/pmu.h @@ -111,7 +111,7 @@ struct perf_pmu *perf_pmu__scan(struct perf_pmu *pmu); bool is_pmu_core(const char *name); void print_pmu_events(const char *event_glob, bool name_only, bool quiet, bool long_desc, bool details_flag, - bool deprecated); + bool deprecated, const char *pmu_name); bool pmu_have_event(const char *pname, const char *name); int perf_pmu__scan_file(struct perf_pmu *pmu, const char *name, const char *fmt, ...) __scanf(3, 4); -- cgit v1.2.3 From e277ac28df1dc5d7496f598910a721bfce1d862a Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Thu, 30 Sep 2021 09:28:49 +0300 Subject: perf build: Suppress 'rm dlfilter' build message The following build message: rm dlfilters/dlfilter-test-api-v0.o is unwanted. The object file is being treated as an intermediate file and being automatically removed. Mark the object file as .SECONDARY to prevent removal and hence the message. Requested-by: Arnaldo Carvalho de Melo Signed-off-by: Adrian Hunter Cc: Jiri Olsa Link: http://lore.kernel.org/lkml/20210930062849.110416-1-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Makefile.perf | 2 ++ 1 file changed, 2 insertions(+) (limited to 'tools') diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index 6b6154a52c4e..1d2b73f99172 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -789,6 +789,8 @@ $(OUTPUT)dlfilters/%.o: dlfilters/%.c include/perf/perf_dlfilter.h $(Q)$(MKDIR) -p $(OUTPUT)dlfilters $(QUIET_CC)$(CC) -c -Iinclude $(EXTRA_CFLAGS) -o $@ -fpic $< +.SECONDARY: $(DLFILTERS:.so=.o) + $(OUTPUT)dlfilters/%.so: $(OUTPUT)dlfilters/%.o $(QUIET_LINK)$(CC) $(EXTRA_CFLAGS) -shared -o $@ $< -- cgit v1.2.3 From d4145960e52cf76c4150b790227f94d7bb7faf6a Mon Sep 17 00:00:00 2001 From: James Clark Date: Thu, 21 Oct 2021 12:27:00 +0100 Subject: perf dso: Fix /proc/kcore access on 32 bit systems Because _LARGEFILE64_SOURCE is set in perf, file offset sizes can be 64 bits. If a workflow needs to open /proc/kcore on a 32 bit system (for example to decode Arm ETM kernel trace) then the size value will be wrapped to 32 bits in the function file_size() at this line: dso->data.file_size = st.st_size; Setting the file_size member to be u64 fixes the issue and allows /proc/kcore to be opened. Reported-by: Denis Nikitin Signed-off-by: James Clark Cc: Alexander Shishkin Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Link: http://lore.kernel.org/lkml/20211021112700.112499-1-james.clark@arm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/dso.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/perf/util/dso.h b/tools/perf/util/dso.h index 83723ba11dc8..011da3924fc1 100644 --- a/tools/perf/util/dso.h +++ b/tools/perf/util/dso.h @@ -193,7 +193,7 @@ struct dso { int fd; int status; u32 status_seen; - size_t file_size; + u64 file_size; struct list_head open_entry; u64 debug_frame_offset; u64 eh_frame_hdr_offset; -- cgit v1.2.3 From 342cb7ebf5e29fff4dc09ab2c8f37d710f8f5206 Mon Sep 17 00:00:00 2001 From: John Garry Date: Thu, 21 Oct 2021 17:16:44 +0800 Subject: perf jevents: Fix some would-be warnings Before enabling warnings through HOSTCFLAGS, fix the would-be warnings: HOSTCC pmu-events/jevents.o pmu-events/jevents.c:74:22: warning: no previous prototype for ‘convert’ [-Wmissing-prototypes] 74 | enum aggr_mode_class convert(const char *aggr_mode) | ^~~~~~~ pmu-events/jevents.c: In function ‘print_events_table_entry’: pmu-events/jevents.c:373:8: warning: declaration of ‘topic’ shadows a global declaration [-Wshadow] 373 | char *topic = pd->topic; | ^~~~~ pmu-events/jevents.c:316:14: note: shadowed declaration is here 316 | static char *topic; | ^~~~~ pmu-events/jevents.c: In function ‘json_events’: pmu-events/jevents.c:554:9: warning: declaration of ‘func’ shadows a global declaration [-Wshadow] 554 | int (*func)(void *data, struct json_event *je), | ~~~~~~^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ pmu-events/jevents.c:85:15: note: shadowed declaration is here 85 | typedef int (*func)(void *data, struct json_event *je); | ^~~~ pmu-events/jevents.c: In function ‘main’: pmu-events/jevents.c:1211:25: warning: initialization discards ‘const’ qualifier from pointer target type [-Wdiscarded-qualifiers] 1211 | char *err_string_ext = ""; | ^~ pmu-events/jevents.c:1304:17: warning: assignment discards ‘const’ qualifier from pointer target type [-Wdiscarded-qualifiers] 1304 | err_string_ext = " for std arch event"; | ^ Signed-off-by: John Garry Cc: Alexander Shishkin Cc: Ian Rogers Cc: James Clark Cc: Jiri Olsa Cc: Kajol Jain Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/1634807805-40093-2-git-send-email-john.garry@huawei.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/pmu-events/jevents.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) (limited to 'tools') diff --git a/tools/perf/pmu-events/jevents.c b/tools/perf/pmu-events/jevents.c index 26b0ba6b6395..2e7c4153875b 100644 --- a/tools/perf/pmu-events/jevents.c +++ b/tools/perf/pmu-events/jevents.c @@ -71,7 +71,7 @@ struct json_event { char *metric_constraint; }; -enum aggr_mode_class convert(const char *aggr_mode) +static enum aggr_mode_class convert(const char *aggr_mode) { if (!strcmp(aggr_mode, "PerCore")) return PerCore; @@ -82,8 +82,6 @@ enum aggr_mode_class convert(const char *aggr_mode) return -1; } -typedef int (*func)(void *data, struct json_event *je); - static LIST_HEAD(sys_event_tables); struct sys_event_table { @@ -370,7 +368,7 @@ static int print_events_table_entry(void *data, struct json_event *je) { struct perf_entry_data *pd = data; FILE *outfp = pd->outfp; - char *topic = pd->topic; + char *topic_local = pd->topic; /* * TODO: Remove formatting chars after debugging to reduce @@ -385,7 +383,7 @@ static int print_events_table_entry(void *data, struct json_event *je) fprintf(outfp, "\t.desc = \"%s\",\n", je->desc); if (je->compat) fprintf(outfp, "\t.compat = \"%s\",\n", je->compat); - fprintf(outfp, "\t.topic = \"%s\",\n", topic); + fprintf(outfp, "\t.topic = \"%s\",\n", topic_local); if (je->long_desc && je->long_desc[0]) fprintf(outfp, "\t.long_desc = \"%s\",\n", je->long_desc); if (je->pmu) @@ -1208,7 +1206,7 @@ int main(int argc, char *argv[]) const char *arch; const char *output_file; const char *start_dirname; - char *err_string_ext = ""; + const char *err_string_ext = ""; struct stat stbuf; prog = basename(argv[0]); -- cgit v1.2.3 From cf95f85e27bb5001f1cd8fc2ee9d3ee13e580ab1 Mon Sep 17 00:00:00 2001 From: James Clark Date: Tue, 21 Sep 2021 14:10:07 +0100 Subject: perf test: Fix record+script_probe_vfs_getname.sh /tmp cleanup The temp files are only cleaned up if the test is not skipped, so delay making them until after the skip so they don't get left behind in /tmp. Signed-off-by: James Clark Acked-by: Ian Rogers Cc: Alexander Shishkin Cc: Jiri Olsa Cc: Leo Yan Cc: Mark Rutland Cc: Namhyung Kim Link: https://lore.kernel.org/r/20210921131009.390810-1-james.clark@arm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/shell/record+script_probe_vfs_getname.sh | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/perf/tests/shell/record+script_probe_vfs_getname.sh b/tools/perf/tests/shell/record+script_probe_vfs_getname.sh index bf9e729b3ecf..8d9c04e450ae 100755 --- a/tools/perf/tests/shell/record+script_probe_vfs_getname.sh +++ b/tools/perf/tests/shell/record+script_probe_vfs_getname.sh @@ -15,9 +15,6 @@ skip_if_no_perf_probe || exit 2 . $(dirname $0)/lib/probe_vfs_getname.sh -perfdata=$(mktemp /tmp/__perf_test.perf.data.XXXXX) -file=$(mktemp /tmp/temporary_file.XXXXX) - record_open_file() { echo "Recording open file:" perf record -o ${perfdata} -e probe:vfs_getname\* touch $file @@ -35,6 +32,9 @@ if [ $err -ne 0 ] ; then exit $err fi +perfdata=$(mktemp /tmp/__perf_test.perf.data.XXXXX) +file=$(mktemp /tmp/temporary_file.XXXXX) + record_open_file && perf_script_filenames err=$? rm -f ${perfdata} -- cgit v1.2.3 From 39c534889e8c988867693ed8407ee0284c936941 Mon Sep 17 00:00:00 2001 From: James Clark Date: Tue, 21 Sep 2021 14:10:08 +0100 Subject: perf tests: Fix trace+probe_vfs_getname.sh /tmp cleanup The temp file is only cleaned up if the test is not skipped, so delay making it until after the skip so it doesn't get left behind in /tmp. Signed-off-by: James Clark Acked-by: Ian Rogers Cc: Alexander Shishkin Cc: Jiri Olsa Cc: Leo Yan Cc: Mark Rutland Cc: Namhyung Kim Link: https://lore.kernel.org/r/20210921131009.390810-2-james.clark@arm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/shell/trace+probe_vfs_getname.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/perf/tests/shell/trace+probe_vfs_getname.sh b/tools/perf/tests/shell/trace+probe_vfs_getname.sh index 3d31c1d560d6..3d60e993d2b8 100755 --- a/tools/perf/tests/shell/trace+probe_vfs_getname.sh +++ b/tools/perf/tests/shell/trace+probe_vfs_getname.sh @@ -17,8 +17,6 @@ skip_if_no_perf_trace || exit 2 . $(dirname $0)/lib/probe_vfs_getname.sh -file=$(mktemp /tmp/temporary_file.XXXXX) - trace_open_vfs_getname() { evts=$(echo $(perf list syscalls:sys_enter_open* 2>/dev/null | egrep 'open(at)? ' | sed -r 's/.*sys_enter_([a-z]+) +\[.*$/\1/') | sed 's/ /,/') perf trace -e $evts touch $file 2>&1 | \ @@ -32,6 +30,8 @@ if [ $err -ne 0 ] ; then exit $err fi +file=$(mktemp /tmp/temporary_file.XXXXX) + # Do not use whatever ~/.perfconfig file, it may change the output # via trace.{show_timestamp,show_prefix,etc} export PERF_CONFIG=/dev/null -- cgit v1.2.3 From 133fe2e617e48ca0948983329f43877064ffda3e Mon Sep 17 00:00:00 2001 From: James Clark Date: Tue, 21 Sep 2021 14:10:09 +0100 Subject: perf tests: Improve temp file cleanup in test_arm_coresight.sh Cleanup perf.data.old files which are also dropped by perf, handle sigint and propagate it to the parent in case the test is run in a bash while loop and don't create the temp files if the test will be skipped. Reviewed-by: Leo Yan Signed-off-by: James Clark Acked-by: Ian Rogers Cc: Alexander Shishkin Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Link: https://lore.kernel.org/r/20210921131009.390810-3-james.clark@arm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/shell/test_arm_coresight.sh | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/perf/tests/shell/test_arm_coresight.sh b/tools/perf/tests/shell/test_arm_coresight.sh index c9eef0bba6f1..6de53b7ef5ff 100755 --- a/tools/perf/tests/shell/test_arm_coresight.sh +++ b/tools/perf/tests/shell/test_arm_coresight.sh @@ -9,8 +9,6 @@ # SPDX-License-Identifier: GPL-2.0 # Leo Yan , 2020 -perfdata=$(mktemp /tmp/__perf_test.perf.data.XXXXX) -file=$(mktemp /tmp/temporary_file.XXXXX) glb_err=0 skip_if_no_cs_etm_event() { @@ -22,13 +20,20 @@ skip_if_no_cs_etm_event() { skip_if_no_cs_etm_event || exit 2 +perfdata=$(mktemp /tmp/__perf_test.perf.data.XXXXX) +file=$(mktemp /tmp/temporary_file.XXXXX) + cleanup_files() { rm -f ${perfdata} rm -f ${file} + rm -f "${perfdata}.old" + trap - exit term int + kill -2 $$ + exit $glb_err } -trap cleanup_files exit +trap cleanup_files exit term int record_touch_file() { echo "Recording trace (only user mode) with path: CPU$2 => $1" -- cgit v1.2.3 From 432d7f52825ccdd6b1003d77ab7a33a8ebb0a80d Mon Sep 17 00:00:00 2001 From: John Keeping Date: Mon, 25 Oct 2021 18:23:13 +0100 Subject: tools build: Drop needless slang include path in test-all Commit cbefd24f0aee3a5d ("tools build: Add test to check if slang.h is in /usr/include/slang/") added a proper test to check whether slang.h is in a subdirectory, and commit 1955c8cf5e26b1f7 ("perf tools: Don't hardcode host include path for libslang") removed the include path for test-libslang.bin but missed test-all.bin. Apply the same change to test-all.bin. Reviewed-by: Florian Fainelli Fixes: 1955c8cf5e26 ("perf tools: Don't hardcode host include path for libslang") Signed-off-by: John Keeping Cc: Alexander Shishkin Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Cc: Nick Terrell Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/20211025172314.3766032-1-john@metanate.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/build/feature/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/build/feature/Makefile b/tools/build/feature/Makefile index d024b5204ba0..71390309f4d7 100644 --- a/tools/build/feature/Makefile +++ b/tools/build/feature/Makefile @@ -91,7 +91,7 @@ __BUILDXX = $(CXX) $(CXXFLAGS) -MD -Wall -Werror -o $@ $(patsubst %.bin,%.cpp,$( ############################### $(OUTPUT)test-all.bin: - $(BUILD) -fstack-protector-all -O2 -D_FORTIFY_SOURCE=2 -ldw -lelf -lnuma -lelf -I/usr/include/slang -lslang $(FLAGS_PERL_EMBED) $(FLAGS_PYTHON_EMBED) -DPACKAGE='"perf"' -lbfd -ldl -lz -llzma -lzstd -lcap + $(BUILD) -fstack-protector-all -O2 -D_FORTIFY_SOURCE=2 -ldw -lelf -lnuma -lelf -lslang $(FLAGS_PERL_EMBED) $(FLAGS_PYTHON_EMBED) -DPACKAGE='"perf"' -lbfd -ldl -lz -llzma -lzstd -lcap $(OUTPUT)test-hello.bin: $(BUILD) -- cgit v1.2.3 From cf14013b6ccce97901fa91eae4934f3fb4380401 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Wed, 27 Oct 2021 11:03:29 +0300 Subject: perf auxtrace: Add missing Z option to ITRACE_HELP ITRACE_HELP is used by perf commands to display help text for the --itrace option. Add missing Z option. Reviewed-by: Andi Kleen Signed-off-by: Adrian Hunter Cc: Jiri Olsa Link: https://lore.kernel.org/r/20211027080334.365596-2-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/auxtrace.h | 1 + 1 file changed, 1 insertion(+) (limited to 'tools') diff --git a/tools/perf/util/auxtrace.h b/tools/perf/util/auxtrace.h index 5f383908ca6e..20dc78d86d54 100644 --- a/tools/perf/util/auxtrace.h +++ b/tools/perf/util/auxtrace.h @@ -649,6 +649,7 @@ bool auxtrace__evsel_is_auxtrace(struct perf_session *session, " L[len]: synthesize last branch entries on existing event records\n" \ " sNUMBER: skip initial number of events\n" \ " q: quicker (less detailed) decoding\n" \ +" Z: prefer to ignore timestamps (so-called \"timeless\" decoding)\n" \ " PERIOD[ns|us|ms|i|t]: specify period to sample stream\n" \ " concatenate multiple options. Default is ibxwpe or cewp\n" -- cgit v1.2.3 From b6778fe1bbe486e65439a50226b5c7b70dc11d94 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Wed, 27 Oct 2021 11:03:30 +0300 Subject: perf auxtrace: Add itrace A option to approximate IPC Add an option to specify that synthesized IPC can be approximate, rather than completely accurate. Reviewed-by: Andi Kleen Signed-off-by: Adrian Hunter Cc: Jiri Olsa Link: https://lore.kernel.org/r/20211027080334.365596-3-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/itrace.txt | 1 + tools/perf/util/auxtrace.c | 3 +++ tools/perf/util/auxtrace.h | 3 +++ 3 files changed, 7 insertions(+) (limited to 'tools') diff --git a/tools/perf/Documentation/itrace.txt b/tools/perf/Documentation/itrace.txt index 2d586fe5e4c5..141449e97bed 100644 --- a/tools/perf/Documentation/itrace.txt +++ b/tools/perf/Documentation/itrace.txt @@ -20,6 +20,7 @@ L synthesize last branch entries on existing event records s skip initial number of events q quicker (less detailed) decoding + A approximate IPC Z prefer to ignore timestamps (so-called "timeless" decoding) The default is all events i.e. the same as --itrace=ibxwpe, diff --git a/tools/perf/util/auxtrace.c b/tools/perf/util/auxtrace.c index 8d2865b9ade2..c679394b898d 100644 --- a/tools/perf/util/auxtrace.c +++ b/tools/perf/util/auxtrace.c @@ -1564,6 +1564,9 @@ int itrace_do_parse_synth_opts(struct itrace_synth_opts *synth_opts, case 'q': synth_opts->quick += 1; break; + case 'A': + synth_opts->approx_ipc = true; + break; case 'Z': synth_opts->timeless_decoding = true; break; diff --git a/tools/perf/util/auxtrace.h b/tools/perf/util/auxtrace.h index 20dc78d86d54..889f976ea1a0 100644 --- a/tools/perf/util/auxtrace.h +++ b/tools/perf/util/auxtrace.h @@ -84,6 +84,7 @@ enum itrace_period_type { * @thread_stack: feed branches to the thread_stack * @last_branch: add branch context to 'instruction' events * @add_last_branch: add branch context to existing event records + * @approx_ipc: approximate IPC * @flc: whether to synthesize first level cache events * @llc: whether to synthesize last level cache events * @tlb: whether to synthesize TLB events @@ -127,6 +128,7 @@ struct itrace_synth_opts { bool thread_stack; bool last_branch; bool add_last_branch; + bool approx_ipc; bool flc; bool llc; bool tlb; @@ -649,6 +651,7 @@ bool auxtrace__evsel_is_auxtrace(struct perf_session *session, " L[len]: synthesize last branch entries on existing event records\n" \ " sNUMBER: skip initial number of events\n" \ " q: quicker (less detailed) decoding\n" \ +" A: approximate IPC\n" \ " Z: prefer to ignore timestamps (so-called \"timeless\" decoding)\n" \ " PERIOD[ns|us|ms|i|t]: specify period to sample stream\n" \ " concatenate multiple options. Default is ibxwpe or cewp\n" -- cgit v1.2.3 From f2b91386ffe66dba0860c03c9dec1c6b45c2daba Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Wed, 27 Oct 2021 11:03:31 +0300 Subject: perf intel-pt: Support itrace A option to approximate IPC Normally, for cycle-acccurate mode, IPC values are an exact number of instructions and cycles. Due to the granularity of timestamps, that happens only when a CYC packet correlates to the event. Support the itrace 'A' option, to use instead, the number of cycles associated with the current timestamp. This provides IPC information for every change of timestamp, but at the expense of accuracy. Due to the granularity of timestamps, the actual number of cycles increases even though the cycles reported does not. The number of instructions is known, but if IPC is reported, cycles can be too low and so IPC is too high. Note that inaccuracy decreases as the period of sampling increases i.e. if the number of cycles is too low by a small amount, that becomes less significant if the number of cycles is large. Furthermore, it can be used in conjunction with dlfilter-show-cycles.so to provide higher granularity cycle information. Reviewed-by: Andi Kleen Signed-off-by: Adrian Hunter Cc: Jiri Olsa Link: https://lore.kernel.org/r/20211027080334.365596-4-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-intel-pt.txt | 10 ++++++++++ tools/perf/util/intel-pt-decoder/intel-pt-decoder.c | 1 + tools/perf/util/intel-pt-decoder/intel-pt-decoder.h | 1 + tools/perf/util/intel-pt.c | 16 ++++++++++++---- 4 files changed, 24 insertions(+), 4 deletions(-) (limited to 'tools') diff --git a/tools/perf/Documentation/perf-intel-pt.txt b/tools/perf/Documentation/perf-intel-pt.txt index 553c3e08fa4a..8914335db84b 100644 --- a/tools/perf/Documentation/perf-intel-pt.txt +++ b/tools/perf/Documentation/perf-intel-pt.txt @@ -157,6 +157,15 @@ of instructions and number of cycles since the last update, and thus represent the average IPC since the last IPC for that event type. Note IPC for "branches" events is calculated separately from IPC for "instructions" events. +Even with the 'cyc' config term, it is possible to produce IPC information for +every change of timestamp, but at the expense of accuracy. That is selected by +specifying the itrace 'A' option. Due to the granularity of timestamps, the +actual number of cycles increases even though the cycles reported does not. +The number of instructions is known, but if IPC is reported, cycles can be too +low and so IPC is too high. Note that inaccuracy decreases as the period of +sampling increases i.e. if the number of cycles is too low by a small amount, +that becomes less significant if the number of cycles is large. + Also note that the IPC instruction count may or may not include the current instruction. If the cycle count is associated with an asynchronous branch (e.g. page fault or interrupt), then the instruction count does not include the @@ -873,6 +882,7 @@ The letters are: L synthesize last branch entries on existing event records s skip initial number of events q quicker (less detailed) decoding + A approximate IPC Z prefer to ignore timestamps (so-called "timeless" decoding) "Instructions" events look like they were recorded by "perf record -e diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c index 5ab631702769..5f83937bf8f3 100644 --- a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c +++ b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c @@ -608,6 +608,7 @@ static inline void intel_pt_update_sample_time(struct intel_pt_decoder *decoder) { decoder->sample_timestamp = decoder->timestamp; decoder->sample_insn_cnt = decoder->timestamp_insn_cnt; + decoder->state.cycles = decoder->tot_cyc_cnt; } static void intel_pt_reposition(struct intel_pt_decoder *decoder) diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h index 4b5e79fcf557..8fd68f7a0963 100644 --- a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h +++ b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h @@ -218,6 +218,7 @@ struct intel_pt_state { uint64_t to_ip; uint64_t tot_insn_cnt; uint64_t tot_cyc_cnt; + uint64_t cycles; uint64_t timestamp; uint64_t est_timestamp; uint64_t trace_nr; diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c index c9542fada8fb..0ee5005e9837 100644 --- a/tools/perf/util/intel-pt.c +++ b/tools/perf/util/intel-pt.c @@ -172,6 +172,7 @@ struct intel_pt_queue { bool step_through_buffers; bool use_buffer_pid_tid; bool sync_switch; + bool sample_ipc; pid_t pid, tid; int cpu; int switch_state; @@ -1581,7 +1582,7 @@ static int intel_pt_synth_branch_sample(struct intel_pt_queue *ptq) sample.branch_stack = (struct branch_stack *)&dummy_bs; } - if (ptq->state->flags & INTEL_PT_SAMPLE_IPC) + if (ptq->sample_ipc) sample.cyc_cnt = ptq->ipc_cyc_cnt - ptq->last_br_cyc_cnt; if (sample.cyc_cnt) { sample.insn_cnt = ptq->ipc_insn_cnt - ptq->last_br_insn_cnt; @@ -1632,7 +1633,7 @@ static int intel_pt_synth_instruction_sample(struct intel_pt_queue *ptq) else sample.period = ptq->state->tot_insn_cnt - ptq->last_insn_cnt; - if (ptq->state->flags & INTEL_PT_SAMPLE_IPC) + if (ptq->sample_ipc) sample.cyc_cnt = ptq->ipc_cyc_cnt - ptq->last_in_cyc_cnt; if (sample.cyc_cnt) { sample.insn_cnt = ptq->ipc_insn_cnt - ptq->last_in_insn_cnt; @@ -2245,8 +2246,15 @@ static int intel_pt_sample(struct intel_pt_queue *ptq) ptq->have_sample = false; - ptq->ipc_insn_cnt = ptq->state->tot_insn_cnt; - ptq->ipc_cyc_cnt = ptq->state->tot_cyc_cnt; + if (pt->synth_opts.approx_ipc) { + ptq->ipc_insn_cnt = ptq->state->tot_insn_cnt; + ptq->ipc_cyc_cnt = ptq->state->cycles; + ptq->sample_ipc = true; + } else { + ptq->ipc_insn_cnt = ptq->state->tot_insn_cnt; + ptq->ipc_cyc_cnt = ptq->state->tot_cyc_cnt; + ptq->sample_ipc = ptq->state->flags & INTEL_PT_SAMPLE_IPC; + } /* * Do PEBS first to allow for the possibility that the PEBS timestamp -- cgit v1.2.3 From c3afd6e50fce824f551914c690f6b905787783cf Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Wed, 27 Oct 2021 11:03:32 +0300 Subject: perf dlfilter: Add dlfilter-show-cycles Add a new dlfilter to show cycles. Cycle counts are accumulated per CPU (or per thread if CPU is not recorded) from IPC information, and printed together with the change since the last print, at the start of each line. Separate counts are kept for branches, instructions or other events. Note also, the itrace A option can be useful to provide higher granularity cycle information. Example: $ perf record -e intel_pt/cyc/u uname Linux [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.044 MB perf.data ] $ perf script --itrace=A --call-trace --dlfilter dlfilter-show-cycles.so --deltatime | head 0 perf-exec 8509 [001] 0.000000000: psb offs: 0 0 perf-exec 8509 [001] 0.000000000: cbr: 42 freq: 4219 MHz (156%) 833 833 uname 8509 [001] 0.000047689: (/usr/lib/x86_64-linux-gnu/ld-2.31.so ) _start 833 uname 8509 [001] 0.000003261: (/usr/lib/x86_64-linux-gnu/ld-2.31.so ) _dl_start 2015 1182 uname 8509 [001] 0.000000282: (/usr/lib/x86_64-linux-gnu/ld-2.31.so ) _dl_start 2676 661 uname 8509 [001] 0.000002629: (/usr/lib/x86_64-linux-gnu/ld-2.31.so ) _dl_start 3612 936 uname 8509 [001] 0.000001232: (/usr/lib/x86_64-linux-gnu/ld-2.31.so ) _dl_start 4579 967 uname 8509 [001] 0.000002519: (/usr/lib/x86_64-linux-gnu/ld-2.31.so ) _dl_start 6145 1566 uname 8509 [001] 0.000001050: (/usr/lib/x86_64-linux-gnu/ld-2.31.so ) _dl_setup_hash 6239 94 uname 8509 [001] 0.000000023: (/usr/lib/x86_64-linux-gnu/ld-2.31.so ) _dl_sysdep_start Reviewed-by: Andi Kleen Signed-off-by: Adrian Hunter Cc: Jiri Olsa Link: https://lore.kernel.org/r/20211027080334.365596-5-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-intel-pt.txt | 19 +++- tools/perf/Makefile.perf | 2 +- tools/perf/dlfilters/dlfilter-show-cycles.c | 144 ++++++++++++++++++++++++++++ 3 files changed, 163 insertions(+), 2 deletions(-) create mode 100644 tools/perf/dlfilters/dlfilter-show-cycles.c (limited to 'tools') diff --git a/tools/perf/Documentation/perf-intel-pt.txt b/tools/perf/Documentation/perf-intel-pt.txt index 8914335db84b..c792fa7b59d3 100644 --- a/tools/perf/Documentation/perf-intel-pt.txt +++ b/tools/perf/Documentation/perf-intel-pt.txt @@ -164,7 +164,9 @@ actual number of cycles increases even though the cycles reported does not. The number of instructions is known, but if IPC is reported, cycles can be too low and so IPC is too high. Note that inaccuracy decreases as the period of sampling increases i.e. if the number of cycles is too low by a small amount, -that becomes less significant if the number of cycles is large. +that becomes less significant if the number of cycles is large. It may also be +useful to use the 'A' option in conjunction with dlfilter-show-cycles.so to +provide higher granularity cycle information. Also note that the IPC instruction count may or may not include the current instruction. If the cycle count is associated with an asynchronous branch @@ -1082,6 +1084,21 @@ The Z option is equivalent to having recorded a trace without TSC decoding a trace of a virtual machine. +dlfilter-show-cycles.so +~~~~~~~~~~~~~~~~~~~~~~~ + +Cycles can be displayed using dlfilter-show-cycles.so in which case the itrace A +option can be useful to provide higher granularity cycle information: + + perf script --itrace=A --call-trace --dlfilter dlfilter-show-cycles.so + +To see a list of dlfilters: + + perf script -v --list-dlfilters + +See also linkperf:perf-dlfilters[1] + + dump option ~~~~~~~~~~~ diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index 1d2b73f99172..a3966f290297 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -362,7 +362,7 @@ ifndef NO_JVMTI PROGRAMS += $(OUTPUT)$(LIBJVMTI) endif -DLFILTERS := dlfilter-test-api-v0.so +DLFILTERS := dlfilter-test-api-v0.so dlfilter-show-cycles.so DLFILTERS := $(patsubst %,$(OUTPUT)dlfilters/%,$(DLFILTERS)) # what 'all' will build and 'install' will install, in perfexecdir diff --git a/tools/perf/dlfilters/dlfilter-show-cycles.c b/tools/perf/dlfilters/dlfilter-show-cycles.c new file mode 100644 index 000000000000..9eccc97bff82 --- /dev/null +++ b/tools/perf/dlfilters/dlfilter-show-cycles.c @@ -0,0 +1,144 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * dlfilter-show-cycles.c: Print the number of cycles at the start of each line + * Copyright (c) 2021, Intel Corporation. + */ +#include +#include +#include + +#define MAX_CPU 4096 + +enum { + INSTR_CYC, + BRNCH_CYC, + OTHER_CYC, + MAX_ENTRY +}; + +static __u64 cycles[MAX_CPU][MAX_ENTRY]; +static __u64 cycles_rpt[MAX_CPU][MAX_ENTRY]; + +#define BITS 16 +#define TABLESZ (1 << BITS) +#define TABLEMAX (TABLESZ / 2) +#define MASK (TABLESZ - 1) + +static struct entry { + __u32 used; + __s32 tid; + __u64 cycles[MAX_ENTRY]; + __u64 cycles_rpt[MAX_ENTRY]; +} table[TABLESZ]; + +static int tid_cnt; + +static int event_entry(const char *event) +{ + if (!event) + return OTHER_CYC; + if (!strncmp(event, "instructions", 12)) + return INSTR_CYC; + if (!strncmp(event, "branches", 8)) + return BRNCH_CYC; + return OTHER_CYC; +} + +static struct entry *find_entry(__s32 tid) +{ + __u32 pos = tid & MASK; + struct entry *e; + + e = &table[pos]; + while (e->used) { + if (e->tid == tid) + return e; + if (++pos == TABLESZ) + pos = 0; + e = &table[pos]; + } + + if (tid_cnt >= TABLEMAX) { + fprintf(stderr, "Too many threads\n"); + return NULL; + } + + tid_cnt += 1; + e->used = 1; + e->tid = tid; + return e; +} + +static void add_entry(__s32 tid, int pos, __u64 cnt) +{ + struct entry *e = find_entry(tid); + + if (e) + e->cycles[pos] += cnt; +} + +int filter_event_early(void *data, const struct perf_dlfilter_sample *sample, void *ctx) +{ + __s32 cpu = sample->cpu; + __s32 tid = sample->tid; + int pos; + + if (!sample->cyc_cnt) + return 0; + + pos = event_entry(sample->event); + + if (cpu >= 0 && cpu < MAX_CPU) + cycles[cpu][pos] += sample->cyc_cnt; + else if (tid != -1) + add_entry(tid, pos, sample->cyc_cnt); + return 0; +} + +static void print_vals(__u64 cycles, __u64 delta) +{ + if (delta) + printf("%10llu %10llu ", cycles, delta); + else + printf("%10llu %10s ", cycles, ""); +} + +int filter_event(void *data, const struct perf_dlfilter_sample *sample, void *ctx) +{ + __s32 cpu = sample->cpu; + __s32 tid = sample->tid; + int pos; + + pos = event_entry(sample->event); + + if (cpu >= 0 && cpu < MAX_CPU) { + print_vals(cycles[cpu][pos], cycles[cpu][pos] - cycles_rpt[cpu][pos]); + cycles_rpt[cpu][pos] = cycles[cpu][pos]; + return 0; + } + + if (tid != -1) { + struct entry *e = find_entry(tid); + + if (e) { + print_vals(e->cycles[pos], e->cycles[pos] - e->cycles_rpt[pos]); + e->cycles_rpt[pos] = e->cycles[pos]; + return 0; + } + } + + printf("%22s", ""); + return 0; +} + +const char *filter_description(const char **long_description) +{ + static char *long_desc = "Cycle counts are accumulated per CPU (or " + "per thread if CPU is not recorded) from IPC information, and " + "printed together with the change since the last print, at the " + "start of each line. Separate counts are kept for branches, " + "instructions or other events."; + + *long_description = long_desc; + return "Print the number of cycles at the start of each line"; +} -- cgit v1.2.3 From 4b2b2c6a7d244464b6142cdbfef441de1499e7ff Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Wed, 27 Oct 2021 11:03:33 +0300 Subject: perf auxtrace: Add itrace d+o option to direct debug log to stdout It can be useful to see debug output in between normal output. Add 'o' to the flags of debug option 'd', so that '--itrace=d+o' can specify output of the debug log to stdout. Reviewed-by: Andi Kleen Signed-off-by: Adrian Hunter Cc: Jiri Olsa Link: https://lore.kernel.org/r/20211027080334.365596-6-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/itrace.txt | 1 + tools/perf/util/auxtrace.h | 2 ++ 2 files changed, 3 insertions(+) (limited to 'tools') diff --git a/tools/perf/Documentation/itrace.txt b/tools/perf/Documentation/itrace.txt index 141449e97bed..c52755481e2f 100644 --- a/tools/perf/Documentation/itrace.txt +++ b/tools/perf/Documentation/itrace.txt @@ -62,5 +62,6 @@ debug messages will or will not be logged. Each flag must be preceded by either '+' or '-'. The flags are: a all perf events + o output to stdout If supported, the 'q' option may be repeated to increase the effect. diff --git a/tools/perf/util/auxtrace.h b/tools/perf/util/auxtrace.h index 889f976ea1a0..bbf0d78c6401 100644 --- a/tools/perf/util/auxtrace.h +++ b/tools/perf/util/auxtrace.h @@ -59,6 +59,7 @@ enum itrace_period_type { #define AUXTRACE_ERR_FLG_DATA_LOST (1 << ('l' - 'a')) #define AUXTRACE_LOG_FLG_ALL_PERF_EVTS (1 << ('a' - 'a')) +#define AUXTRACE_LOG_FLG_USE_STDOUT (1 << ('o' - 'a')) /** * struct itrace_synth_opts - AUX area tracing synthesis options. @@ -641,6 +642,7 @@ bool auxtrace__evsel_is_auxtrace(struct perf_session *session, " d[flags]: create a debug log\n" \ " each flag must be preceded by + or -\n" \ " log flags are: a (all perf events)\n" \ +" o (output to stdout)\n" \ " f: synthesize first level cache events\n" \ " m: synthesize last level cache events\n" \ " t: synthesize TLB events\n" \ -- cgit v1.2.3 From 624ff63abfd368fef5fa18ce34e025bc4e37dee5 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Wed, 27 Oct 2021 11:03:34 +0300 Subject: perf intel-pt: Support itrace d+o option to direct debug log to stdout It can be useful to see debug output in between normal output. Add support for AUXTRACE_LOG_FLG_USE_STDOUT to Intel PT. Reviewed-by: Andi Kleen Signed-off-by: Adrian Hunter Cc: Jiri Olsa Link: https://lore.kernel.org/r/20211027080334.365596-7-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-intel-pt.txt | 1 + tools/perf/util/intel-pt-decoder/intel-pt-log.c | 8 ++++---- tools/perf/util/intel-pt.c | 5 +++-- 3 files changed, 8 insertions(+), 6 deletions(-) (limited to 'tools') diff --git a/tools/perf/Documentation/perf-intel-pt.txt b/tools/perf/Documentation/perf-intel-pt.txt index c792fa7b59d3..cbb920f5d056 100644 --- a/tools/perf/Documentation/perf-intel-pt.txt +++ b/tools/perf/Documentation/perf-intel-pt.txt @@ -953,6 +953,7 @@ by flags which affect what debug messages will or will not be logged. Each flag must be preceded by either '+' or '-'. The flags support by Intel PT are: -a Suppress logging of perf events +a Log all perf events + +o Output to stdout instead of "intel_pt.log" By default, logged perf events are filtered by any specified time ranges, but flag +a overrides that. diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-log.c b/tools/perf/util/intel-pt-decoder/intel-pt-log.c index 09feb5b07d32..5f5dfc8753f3 100644 --- a/tools/perf/util/intel-pt-decoder/intel-pt-log.c +++ b/tools/perf/util/intel-pt-decoder/intel-pt-log.c @@ -82,10 +82,10 @@ static int intel_pt_log_open(void) if (f) return 0; - if (!log_name[0]) - return -1; - - f = fopen(log_name, "w+"); + if (log_name[0]) + f = fopen(log_name, "w+"); + else + f = stdout; if (!f) { intel_pt_enable_logging = false; return -1; diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c index 0ee5005e9837..556a893508da 100644 --- a/tools/perf/util/intel-pt.c +++ b/tools/perf/util/intel-pt.c @@ -3736,8 +3736,6 @@ int intel_pt_process_auxtrace_info(union perf_event *event, if (err) goto err_free; - intel_pt_log_set_name(INTEL_PT_PMU_NAME); - if (session->itrace_synth_opts->set) { pt->synth_opts = *session->itrace_synth_opts; } else { @@ -3752,6 +3750,9 @@ int intel_pt_process_auxtrace_info(union perf_event *event, pt->synth_opts.thread_stack = opts->thread_stack; } + if (!(pt->synth_opts.log_plus_flags & AUXTRACE_LOG_FLG_USE_STDOUT)) + intel_pt_log_set_name(INTEL_PT_PMU_NAME); + pt->session = session; pt->machine = &session->machines.host; /* No kvm support */ pt->auxtrace_type = auxtrace_info->type; -- cgit v1.2.3 From fec5c3a5155911b750753975c425a24ce70d06cf Mon Sep 17 00:00:00 2001 From: Alistair Francis Date: Fri, 22 Oct 2021 11:33:40 +1000 Subject: perf bench futex: Call the futex syscall from a function In preparation for a more complex futex() function let's convert the current macro into two functions. We need two functions to avoid compiler failures as the macro is overloaded. This will allow us to include pre-processor conditionals in the futex syscall functions. Signed-off-by: Alistair Francis Acked-by: Davidlohr Bueso Cc: Alexander Shishkin Cc: Alistair Francis Cc: Arnd Bergmann Cc: Atish Patra Cc: Darren Hart Cc: Davidlohr Bueso Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-riscv@lists.infradead.org Link: http://lore.kernel.org/lkml/20211022013343.2262938-1-alistair.francis@opensource.wdc.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/bench/futex.h | 43 ++++++++++++++++++++++++++----------------- 1 file changed, 26 insertions(+), 17 deletions(-) (limited to 'tools') diff --git a/tools/perf/bench/futex.h b/tools/perf/bench/futex.h index b3853aac3021..ebdc2b032afc 100644 --- a/tools/perf/bench/futex.h +++ b/tools/perf/bench/futex.h @@ -28,7 +28,7 @@ struct bench_futex_parameters { }; /** - * futex() - SYS_futex syscall wrapper + * futex_syscall() - SYS_futex syscall wrapper * @uaddr: address of first futex * @op: futex op code * @val: typically expected value of uaddr, but varies by op @@ -38,17 +38,26 @@ struct bench_futex_parameters { * @val3: varies by op * @opflags: flags to be bitwise OR'd with op, such as FUTEX_PRIVATE_FLAG * - * futex() is used by all the following futex op wrappers. It can also be + * futex_syscall() is used by all the following futex op wrappers. It can also be * used for misuse and abuse testing. Generally, the specific op wrappers - * should be used instead. It is a macro instead of an static inline function as - * some of the types over overloaded (timeout is used for nr_requeue for - * example). + * should be used instead. * * These argument descriptions are the defaults for all * like-named arguments in the following wrappers except where noted below. */ -#define futex(uaddr, op, val, timeout, uaddr2, val3, opflags) \ - syscall(SYS_futex, uaddr, op | opflags, val, timeout, uaddr2, val3) +static inline int +futex_syscall(volatile u_int32_t *uaddr, int op, u_int32_t val, struct timespec *timeout, + volatile u_int32_t *uaddr2, int val3, int opflags) +{ + return syscall(SYS_futex, uaddr, op | opflags, val, timeout, uaddr2, val3); +} + +static inline int +futex_syscall_nr_requeue(volatile u_int32_t *uaddr, int op, u_int32_t val, int nr_requeue, + volatile u_int32_t *uaddr2, int val3, int opflags) +{ + return syscall(SYS_futex, uaddr, op | opflags, val, nr_requeue, uaddr2, val3); +} /** * futex_wait() - block on uaddr with optional timeout @@ -57,7 +66,7 @@ struct bench_futex_parameters { static inline int futex_wait(u_int32_t *uaddr, u_int32_t val, struct timespec *timeout, int opflags) { - return futex(uaddr, FUTEX_WAIT, val, timeout, NULL, 0, opflags); + return futex_syscall(uaddr, FUTEX_WAIT, val, timeout, NULL, 0, opflags); } /** @@ -67,7 +76,7 @@ futex_wait(u_int32_t *uaddr, u_int32_t val, struct timespec *timeout, int opflag static inline int futex_wake(u_int32_t *uaddr, int nr_wake, int opflags) { - return futex(uaddr, FUTEX_WAKE, nr_wake, NULL, NULL, 0, opflags); + return futex_syscall(uaddr, FUTEX_WAKE, nr_wake, NULL, NULL, 0, opflags); } /** @@ -76,7 +85,7 @@ futex_wake(u_int32_t *uaddr, int nr_wake, int opflags) static inline int futex_lock_pi(u_int32_t *uaddr, struct timespec *timeout, int opflags) { - return futex(uaddr, FUTEX_LOCK_PI, 0, timeout, NULL, 0, opflags); + return futex_syscall(uaddr, FUTEX_LOCK_PI, 0, timeout, NULL, 0, opflags); } /** @@ -85,7 +94,7 @@ futex_lock_pi(u_int32_t *uaddr, struct timespec *timeout, int opflags) static inline int futex_unlock_pi(u_int32_t *uaddr, int opflags) { - return futex(uaddr, FUTEX_UNLOCK_PI, 0, NULL, NULL, 0, opflags); + return futex_syscall(uaddr, FUTEX_UNLOCK_PI, 0, NULL, NULL, 0, opflags); } /** @@ -97,8 +106,8 @@ static inline int futex_cmp_requeue(u_int32_t *uaddr, u_int32_t val, u_int32_t *uaddr2, int nr_wake, int nr_requeue, int opflags) { - return futex(uaddr, FUTEX_CMP_REQUEUE, nr_wake, nr_requeue, uaddr2, - val, opflags); + return futex_syscall_nr_requeue(uaddr, FUTEX_CMP_REQUEUE, nr_wake, nr_requeue, uaddr2, + val, opflags); } /** @@ -113,8 +122,8 @@ static inline int futex_wait_requeue_pi(u_int32_t *uaddr, u_int32_t val, u_int32_t *uaddr2, struct timespec *timeout, int opflags) { - return futex(uaddr, FUTEX_WAIT_REQUEUE_PI, val, timeout, uaddr2, 0, - opflags); + return futex_syscall(uaddr, FUTEX_WAIT_REQUEUE_PI, val, timeout, uaddr2, 0, + opflags); } /** @@ -130,8 +139,8 @@ static inline int futex_cmp_requeue_pi(u_int32_t *uaddr, u_int32_t val, u_int32_t *uaddr2, int nr_requeue, int opflags) { - return futex(uaddr, FUTEX_CMP_REQUEUE_PI, 1, nr_requeue, uaddr2, - val, opflags); + return futex_syscall_nr_requeue(uaddr, FUTEX_CMP_REQUEUE_PI, 1, nr_requeue, uaddr2, + val, opflags); } #endif /* _FUTEX_H */ -- cgit v1.2.3 From c1ff12dac4657e0134c972978479b97f652711a2 Mon Sep 17 00:00:00 2001 From: Alistair Francis Date: Fri, 22 Oct 2021 11:33:41 +1000 Subject: perf bench futex: Add support for 32-bit systems with 64-bit time_t Some 32-bit architectures (such are 32-bit RISC-V) only have a 64-bit time_t and as such don't have the SYS_futex syscall. This patch will allow us to use the SYS_futex_time64 syscall on those platforms. This also converts the futex calls to be y2038 safe (when built for a 5.1+ kernel). Signed-off-by: Alistair Francis Reviewed-by: Arnd Bergmann Acked-by: Davidlohr Bueso Cc: Alexander Shishkin Cc: Alistair Francis Cc: Atish Patra Cc: Darren Hart Cc: Davidlohr Bueso Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-riscv@lists.infradead.org Link: http://lore.kernel.org/lkml/20211022013343.2262938-2-alistair.francis@opensource.wdc.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/bench/futex.h | 43 ++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 40 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/perf/bench/futex.h b/tools/perf/bench/futex.h index ebdc2b032afc..6a7dd86871eb 100644 --- a/tools/perf/bench/futex.h +++ b/tools/perf/bench/futex.h @@ -8,10 +8,12 @@ #ifndef _FUTEX_H #define _FUTEX_H +#include #include #include #include #include +#include struct bench_futex_parameters { bool silent; @@ -28,7 +30,7 @@ struct bench_futex_parameters { }; /** - * futex_syscall() - SYS_futex syscall wrapper + * futex_syscall() - __NR_futex syscall wrapper * @uaddr: address of first futex * @op: futex op code * @val: typically expected value of uaddr, but varies by op @@ -49,14 +51,49 @@ static inline int futex_syscall(volatile u_int32_t *uaddr, int op, u_int32_t val, struct timespec *timeout, volatile u_int32_t *uaddr2, int val3, int opflags) { - return syscall(SYS_futex, uaddr, op | opflags, val, timeout, uaddr2, val3); +#if defined(__NR_futex_time64) + if (sizeof(*timeout) != sizeof(struct __kernel_old_timespec)) { + int ret = syscall(__NR_futex_time64, uaddr, op | opflags, val, timeout, + uaddr2, val3); + if (ret == 0 || errno != ENOSYS) + return ret; + } +#endif + +#if defined(__NR_futex) + if (sizeof(*timeout) == sizeof(struct __kernel_old_timespec)) + return syscall(__NR_futex, uaddr, op | opflags, val, timeout, uaddr2, val3); + + if (timeout && timeout->tv_sec == (long)timeout->tv_sec) { + struct __kernel_old_timespec ts32; + + ts32.tv_sec = (__kernel_long_t) timeout->tv_sec; + ts32.tv_nsec = (__kernel_long_t) timeout->tv_nsec; + + return syscall(__NR_futex, uaddr, op | opflags, val, ts32, uaddr2, val3); + } else if (!timeout) { + return syscall(__NR_futex, uaddr, op | opflags, val, NULL, uaddr2, val3); + } +#endif + + errno = ENOSYS; + return -1; } static inline int futex_syscall_nr_requeue(volatile u_int32_t *uaddr, int op, u_int32_t val, int nr_requeue, volatile u_int32_t *uaddr2, int val3, int opflags) { - return syscall(SYS_futex, uaddr, op | opflags, val, nr_requeue, uaddr2, val3); +#if defined(__NR_futex_time64) + int ret = syscall(__NR_futex_time64, uaddr, op | opflags, val, nr_requeue, + uaddr2, val3); + if (ret == 0 || errno != ENOSYS) + return ret; +#endif + +#if defined(__NR_futex) + return syscall(__NR_futex, uaddr, op | opflags, val, nr_requeue, uaddr2, val3); +#endif } /** -- cgit v1.2.3 From 57d7ecfd1133e303fcab67521fea3398df8c5b62 Mon Sep 17 00:00:00 2001 From: Lexi Shao Date: Tue, 19 Oct 2021 15:24:17 +0800 Subject: perf script: Show binary offsets for userspace addr Show binary offsets for userspace addr with map in perf script output with callchain. In commit 19610184693c("perf script: Show virtual addresses instead of offsets"), the addr shown in perf script output with callchain is changed from binary offsets to virtual address to fix the incorrectness when displaying symbol offset. This is inconvenient in scenario that the binary is stripped and symbol cannot be resolved. If someone wants to further resolve symbols for specific binaries later, he would need an extra step to translate virtual address to binary offset with mapping information recorded in perf.data, which can be difficult for people not familiar with perf. This patch modifies function sample__fprintf_callchain to print binary offset for userspace addr with dsos, and virtual address otherwise. It does not affect symbol offset calculation so symoff remains correct. Before applying this patch: test 1512 78.711307: 533129 cycles: aaaae0da07f4 [unknown] (/tmp/test) aaaae0da0704 [unknown] (/tmp/test) ffffbe9f7ef4 __libc_start_main+0xe4 (/lib64/libc-2.31.so) After this patch: test 1519 111.330127: 406953 cycles: 7f4 [unknown] (/tmp/test) 704 [unknown] (/tmp/test) 20ef4 __libc_start_main+0xe4 (/lib64/libc-2.31.so) Fixes: 19610184693c("perf script: Show virtual addresses instead of offsets") Signed-off-by: Lexi Shao Cc: Alexander Shishkin Acked-by: Jiri Olsa Tested-by: Jiri Olsa Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: QiuXi Cc: Wangbing Cc: Xiaoming Ni Link: http://lore.kernel.org/lkml/20211019072417.122576-1-shaolexi@huawei.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/evsel_fprintf.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/evsel_fprintf.c b/tools/perf/util/evsel_fprintf.c index bfedd7b23521..8c2ea8001329 100644 --- a/tools/perf/util/evsel_fprintf.c +++ b/tools/perf/util/evsel_fprintf.c @@ -11,6 +11,7 @@ #include "strlist.h" #include "symbol.h" #include "srcline.h" +#include "dso.h" static int comma_fprintf(FILE *fp, bool *first, const char *fmt, ...) { @@ -144,12 +145,17 @@ int sample__fprintf_callchain(struct perf_sample *sample, int left_alignment, if (print_arrow && !first) printed += fprintf(fp, " <-"); - if (print_ip) - printed += fprintf(fp, "%c%16" PRIx64, s, node->ip); - if (map) addr = map->map_ip(map, node->ip); + if (print_ip) { + /* Show binary offset for userspace addr */ + if (map && !map->dso->kernel) + printed += fprintf(fp, "%c%16" PRIx64, s, addr); + else + printed += fprintf(fp, "%c%16" PRIx64, s, node->ip); + } + if (print_sym) { printed += fprintf(fp, " "); node_al.addr = addr; -- cgit v1.2.3 From 6ea5d1a3e301a3d1f0364dfd481210aa6aa3cf17 Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Wed, 29 Sep 2021 08:38:14 -0700 Subject: perf script: Support instruction latency The instruction latency information can be recorded on some platforms, e.g., the Intel Sapphire Rapids server. With both memory latency (weight) and the new instruction latency information, users can easily locate the expensive load instructions, and also understand the time spent in different stages. The users can optimize their applications in different pipeline stages. Add a new field "ins_lat" to filter the instruction latency information, which is available with sample type PERF_SAMPLE_WEIGHT_STRUCT. Signed-off-by: Kan Liang Cc: Andi Kleen Cc: Jiri Olsa Cc: Joe Mario Link: https://lore.kernel.org/r/1632929894-102778-2-git-send-email-kan.liang@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-script.txt | 2 +- tools/perf/builtin-script.c | 14 ++++++++++++-- 2 files changed, 13 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/perf/Documentation/perf-script.txt b/tools/perf/Documentation/perf-script.txt index c80515243560..b0070718784d 100644 --- a/tools/perf/Documentation/perf-script.txt +++ b/tools/perf/Documentation/perf-script.txt @@ -130,7 +130,7 @@ OPTIONS comm, tid, pid, time, cpu, event, trace, ip, sym, dso, addr, symoff, srcline, period, iregs, uregs, brstack, brstacksym, flags, bpf-output, brstackinsn, brstackoff, callindent, insn, insnlen, synth, phys_addr, - metric, misc, srccode, ipc, data_page_size, code_page_size. + metric, misc, srccode, ipc, data_page_size, code_page_size, ins_lat. Field list can be prepended with the type, trace, sw or hw, to indicate to which event type the field list applies. e.g., -F sw:comm,tid,time,ip,sym and -F trace:time,cpu,trace diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index 6211d0b84b7a..a6258f6f816c 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -122,6 +122,7 @@ enum perf_output_field { PERF_OUTPUT_TOD = 1ULL << 32, PERF_OUTPUT_DATA_PAGE_SIZE = 1ULL << 33, PERF_OUTPUT_CODE_PAGE_SIZE = 1ULL << 34, + PERF_OUTPUT_INS_LAT = 1ULL << 35, }; struct perf_script { @@ -188,6 +189,7 @@ struct output_option { {.str = "tod", .field = PERF_OUTPUT_TOD}, {.str = "data_page_size", .field = PERF_OUTPUT_DATA_PAGE_SIZE}, {.str = "code_page_size", .field = PERF_OUTPUT_CODE_PAGE_SIZE}, + {.str = "ins_lat", .field = PERF_OUTPUT_INS_LAT}, }; enum { @@ -262,7 +264,8 @@ static struct { PERF_OUTPUT_DSO | PERF_OUTPUT_PERIOD | PERF_OUTPUT_ADDR | PERF_OUTPUT_DATA_SRC | PERF_OUTPUT_WEIGHT | PERF_OUTPUT_PHYS_ADDR | - PERF_OUTPUT_DATA_PAGE_SIZE | PERF_OUTPUT_CODE_PAGE_SIZE, + PERF_OUTPUT_DATA_PAGE_SIZE | PERF_OUTPUT_CODE_PAGE_SIZE | + PERF_OUTPUT_INS_LAT, .invalid_fields = PERF_OUTPUT_TRACE | PERF_OUTPUT_BPF_OUTPUT, }, @@ -522,6 +525,10 @@ static int evsel__check_attr(struct evsel *evsel, struct perf_session *session) evsel__check_stype(evsel, PERF_SAMPLE_CODE_PAGE_SIZE, "CODE_PAGE_SIZE", PERF_OUTPUT_CODE_PAGE_SIZE)) return -EINVAL; + if (PRINT_FIELD(INS_LAT) && + evsel__check_stype(evsel, PERF_SAMPLE_WEIGHT_STRUCT, "WEIGHT_STRUCT", PERF_OUTPUT_INS_LAT)) + return -EINVAL; + return 0; } @@ -2039,6 +2046,9 @@ static void process_event(struct perf_script *script, if (PRINT_FIELD(WEIGHT)) fprintf(fp, "%16" PRIu64, sample->weight); + if (PRINT_FIELD(INS_LAT)) + fprintf(fp, "%16" PRIu16, sample->ins_lat); + if (PRINT_FIELD(IP)) { struct callchain_cursor *cursor = NULL; @@ -3715,7 +3725,7 @@ int cmd_script(int argc, const char **argv) "addr,symoff,srcline,period,iregs,uregs,brstack," "brstacksym,flags,bpf-output,brstackinsn,brstackoff," "callindent,insn,insnlen,synth,phys_addr,metric,misc,ipc,tod," - "data_page_size,code_page_size", + "data_page_size,code_page_size,ins_lat", parse_output_fields), OPT_BOOLEAN('a', "all-cpus", &system_wide, "system-wide collection from all CPUs"), -- cgit v1.2.3 From 63c12ae2f246dcdc30895ec7c980365a5133433d Mon Sep 17 00:00:00 2001 From: Madhavan Srinivasan Date: Thu, 28 Oct 2021 17:07:13 +0530 Subject: perf evsel: Add bitfield_swap() to handle branch_stack endian issue The branch_stack struct has bit field definition which produces different bit ordering for big/little endian. Because of this, when branch_stack sample is collected in a BE system and viewed/reported in a LE system, bit fields of the branch stack are not presented properly. To address this issue, a evsel__bitfield_swap_branch_stack() is defined and introduced in evsel__parse_sample. Signed-off-by: Madhavan Srinivasan Acked-by: Jiri Olsa Cc: Athira Jajeev Cc: Kajol Jain Cc: Mark Rutland Cc: Michael Ellerman Cc: Namhyung Kim Cc: Stephane Eranian Link: http://lore.kernel.org/lkml/20211028113714.600549-1-maddy@linux.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/evsel.c | 77 +++++++++++++++++++++++++++++++++++++++++++++++-- tools/perf/util/evsel.h | 13 +++++++++ 2 files changed, 88 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 96ef6a4a7c14..2cfc2935d1d2 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -2238,6 +2238,54 @@ void __weak arch_perf_parse_sample_weight(struct perf_sample *data, data->weight = *array; } +u64 evsel__bitfield_swap_branch_flags(u64 value) +{ + u64 new_val = 0; + + /* + * branch_flags + * union { + * u64 values; + * struct { + * mispred:1 //target mispredicted + * predicted:1 //target predicted + * in_tx:1 //in transaction + * abort:1 //transaction abort + * cycles:16 //cycle count to last branch + * type:4 //branch type + * reserved:40 + * } + * } + * + * Avoid bswap64() the entire branch_flag.value, + * as it has variable bit-field sizes. Instead the + * macro takes the bit-field position/size, + * swaps it based on the host endianness. + * + * tep_is_bigendian() is used here instead of + * bigendian() to avoid python test fails. + */ + if (tep_is_bigendian()) { + new_val = bitfield_swap(value, 0, 1); + new_val |= bitfield_swap(value, 1, 1); + new_val |= bitfield_swap(value, 2, 1); + new_val |= bitfield_swap(value, 3, 1); + new_val |= bitfield_swap(value, 4, 16); + new_val |= bitfield_swap(value, 20, 4); + new_val |= bitfield_swap(value, 24, 40); + } else { + new_val = bitfield_swap(value, 63, 1); + new_val |= bitfield_swap(value, 62, 1); + new_val |= bitfield_swap(value, 61, 1); + new_val |= bitfield_swap(value, 60, 1); + new_val |= bitfield_swap(value, 44, 16); + new_val |= bitfield_swap(value, 40, 4); + new_val |= bitfield_swap(value, 0, 40); + } + + return new_val; +} + int evsel__parse_sample(struct evsel *evsel, union perf_event *event, struct perf_sample *data) { @@ -2425,6 +2473,8 @@ int evsel__parse_sample(struct evsel *evsel, union perf_event *event, if (type & PERF_SAMPLE_BRANCH_STACK) { const u64 max_branch_nr = UINT64_MAX / sizeof(struct branch_entry); + struct branch_entry *e; + unsigned int i; OVERFLOW_CHECK_u64(array); data->branch_stack = (struct branch_stack *)array++; @@ -2433,10 +2483,33 @@ int evsel__parse_sample(struct evsel *evsel, union perf_event *event, return -EFAULT; sz = data->branch_stack->nr * sizeof(struct branch_entry); - if (evsel__has_branch_hw_idx(evsel)) + if (evsel__has_branch_hw_idx(evsel)) { sz += sizeof(u64); - else + e = &data->branch_stack->entries[0]; + } else { data->no_hw_idx = true; + /* + * if the PERF_SAMPLE_BRANCH_HW_INDEX is not applied, + * only nr and entries[] will be output by kernel. + */ + e = (struct branch_entry *)&data->branch_stack->hw_idx; + } + + if (swapped) { + /* + * struct branch_flag does not have endian + * specific bit field definition. And bswap + * will not resolve the issue, since these + * are bit fields. + * + * evsel__bitfield_swap_branch_flags() uses a + * bitfield_swap macro to swap the bit position + * based on the host endians. + */ + for (i = 0; i < data->branch_stack->nr; i++, e++) + e->flags.value = evsel__bitfield_swap_branch_flags(e->flags.value); + } + OVERFLOW_CHECK(array, sz, max_size); array = (void *)array + sz; } diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index 45476a888942..846c827934de 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -484,4 +484,17 @@ struct evsel *evsel__leader(struct evsel *evsel); bool evsel__has_leader(struct evsel *evsel, struct evsel *leader); bool evsel__is_leader(struct evsel *evsel); void evsel__set_leader(struct evsel *evsel, struct evsel *leader); + +/* + * Macro to swap the bit-field postition and size. + * Used when, + * - dont need to swap the entire u64 && + * - when u64 has variable bit-field sizes && + * - when presented in a host endian which is different + * than the source endian of the perf.data file + */ +#define bitfield_swap(src, pos, size) \ + ((((src) >> (pos)) & ((1ull << (size)) - 1)) << (63 - ((pos) + (size) - 1))) + +u64 evsel__bitfield_swap_branch_flags(u64 value); #endif /* __PERF_EVSEL_H */ -- cgit v1.2.3 From 10269a2ca2b08cbdda9232771e59ba901b87a074 Mon Sep 17 00:00:00 2001 From: Madhavan Srinivasan Date: Thu, 28 Oct 2021 17:07:14 +0530 Subject: perf test sample-parsing: Add endian test for struct branch_flags Extend the sample-parsing test to include a branch_flag bitfield-endian swap test. This patch adds a include for "util/trace-event.h" in the sample-parsing test for importing tep_is_bigendian() and extends samples_same() to include "needs_swap" to detect/enable check for bitfield-endian swap. Signed-off-by: Madhavan Srinivasan Acked-by: Jiri Olsa Cc: Athira Jajeev Cc: Kajol Jain Cc: Mark Rutland Cc: Michael Ellerman Cc: Namhyung Kim Cc: Stephane Eranian Link: http://lore.kernel.org/lkml/20211028113714.600549-2-maddy@linux.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/sample-parsing.c | 43 ++++++++++++++++++++++++++++++++++----- 1 file changed, 38 insertions(+), 5 deletions(-) (limited to 'tools') diff --git a/tools/perf/tests/sample-parsing.c b/tools/perf/tests/sample-parsing.c index 8fd8a4ef97da..c83a11514129 100644 --- a/tools/perf/tests/sample-parsing.c +++ b/tools/perf/tests/sample-parsing.c @@ -13,6 +13,7 @@ #include "evsel.h" #include "debug.h" #include "util/synthetic-events.h" +#include "util/trace-event.h" #include "tests.h" @@ -30,9 +31,18 @@ } \ } while (0) +/* + * Hardcode the expected values for branch_entry flags. + * These are based on the input value (213) specified + * in branch_stack variable. + */ +#define BS_EXPECTED_BE 0xa00d000000000000 +#define BS_EXPECTED_LE 0xd5000000 +#define FLAG(s) s->branch_stack->entries[i].flags + static bool samples_same(const struct perf_sample *s1, const struct perf_sample *s2, - u64 type, u64 read_format) + u64 type, u64 read_format, bool needs_swap) { size_t i; @@ -100,8 +110,14 @@ static bool samples_same(const struct perf_sample *s1, if (type & PERF_SAMPLE_BRANCH_STACK) { COMP(branch_stack->nr); COMP(branch_stack->hw_idx); - for (i = 0; i < s1->branch_stack->nr; i++) - MCOMP(branch_stack->entries[i]); + for (i = 0; i < s1->branch_stack->nr; i++) { + if (needs_swap) + return ((tep_is_bigendian()) ? + (FLAG(s2).value == BS_EXPECTED_BE) : + (FLAG(s2).value == BS_EXPECTED_LE)); + else + MCOMP(branch_stack->entries[i]); + } } if (type & PERF_SAMPLE_REGS_USER) { @@ -248,7 +264,7 @@ static int do_test(u64 sample_type, u64 sample_regs, u64 read_format) }, }; struct sample_read_value values[] = {{1, 5}, {9, 3}, {2, 7}, {6, 4},}; - struct perf_sample sample_out; + struct perf_sample sample_out, sample_out_endian; size_t i, sz, bufsz; int err, ret = -1; @@ -313,12 +329,29 @@ static int do_test(u64 sample_type, u64 sample_regs, u64 read_format) goto out_free; } - if (!samples_same(&sample, &sample_out, sample_type, read_format)) { + if (!samples_same(&sample, &sample_out, sample_type, read_format, evsel.needs_swap)) { pr_debug("parsing failed for sample_type %#"PRIx64"\n", sample_type); goto out_free; } + if (sample_type == PERF_SAMPLE_BRANCH_STACK) { + evsel.needs_swap = true; + evsel.sample_size = __evsel__sample_size(sample_type); + err = evsel__parse_sample(&evsel, event, &sample_out_endian); + if (err) { + pr_debug("%s failed for sample_type %#"PRIx64", error %d\n", + "evsel__parse_sample", sample_type, err); + goto out_free; + } + + if (!samples_same(&sample, &sample_out_endian, sample_type, read_format, evsel.needs_swap)) { + pr_debug("parsing failed for sample_type %#"PRIx64"\n", + sample_type); + goto out_free; + } + } + ret = 0; out_free: free(event); -- cgit v1.2.3 From ba4026b09d83acf56c040b6933eac7916c27e728 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 1 Nov 2021 11:38:02 -0300 Subject: Revert "perf bench futex: Add support for 32-bit systems with 64-bit time_t" This reverts commit c1ff12dac4657e0134c972978479b97f652711a2. This commit makes the build break on ubuntu 20.04 and other older systems and it as well has identation problems, lets revert it till we get these problems fixed. Test results: 1 78.36 almalinux:8 : Ok gcc (GCC) 8.4.1 20200928 (Red Hat 8.4.1-1) , clang version 11.0.0 (Red Hat 11.0.0-1.module_el8.4.0+2107+39fed697) 2 8.40 alpine:3.4 : FAIL gcc version 5.3.0 (Alpine 5.3.0) In file included from bench/futex-hash.c:29:0: bench/futex.h:16:30: fatal error: linux/time_types.h: No such file or directory compilation terminated. /git/perf-5.15.0/tools/build/Makefile.build:139: recipe for target 'bench' failed make[3]: *** [bench] Error 2 3 8.89 alpine:3.5 : FAIL gcc version 6.2.1 20160822 (Alpine 6.2.1) In file included from bench/futex-hash.c:29:0: bench/futex.h:16:30: fatal error: linux/time_types.h: No such file or directory #include ^ compilation terminated. In file included from bench/futex-wake.c:25:0: bench/futex.h:16:30: fatal error: linux/time_types.h: No such file or directory #include ^ compilation terminated. make[3]: *** [/git/perf-5.15.0/tools/build/Makefile.build:139: bench] Error 2 4 8.59 alpine:3.6 : FAIL gcc version 6.3.0 (Alpine 6.3.0) In file included from bench/futex-hash.c:29:0: bench/futex.h:16:30: fatal error: linux/time_types.h: No such file or directory #include ^ compilation terminated. In file included from bench/futex-wake.c:25:0: bench/futex.h:16:30: fatal error: linux/time_types.h: No such file or directory #include ^ compilation terminated. make[3]: *** [/git/perf-5.15.0/tools/build/Makefile.build:139: bench] Error 2 5 9.01 alpine:3.7 : FAIL gcc version 6.4.0 (Alpine 6.4.0) In file included from bench/futex-wake.c:25:0: bench/futex.h:16:30: fatal error: linux/time_types.h: No such file or directory #include ^ compilation terminated. In file included from bench/futex-hash.c:29:0: bench/futex.h:16:30: fatal error: linux/time_types.h: No such file or directory #include ^ compilation terminated. make[3]: *** [/git/perf-5.15.0/tools/build/Makefile.build:139: bench] Error 2 6 8.70 alpine:3.8 : FAIL gcc version 6.4.0 (Alpine 6.4.0) In file included from bench/futex-hash.c:29:0: bench/futex.h:16:30: fatal error: linux/time_types.h: No such file or directory #include ^ compilation terminated. In file included from bench/futex-wake.c:25:0: bench/futex.h:16:30: fatal error: linux/time_types.h: No such file or directory #include ^ compilation terminated. make[3]: *** [/git/perf-5.15.0/tools/build/Makefile.build:139: bench] Error 2 7 9.70 alpine:3.9 : FAIL gcc version 8.3.0 (Alpine 8.3.0) In file included from bench/futex-wake.c:25: bench/futex.h:16:10: fatal error: linux/time_types.h: No such file or directory #include ^~~~~~~~~~~~~~~~~~~~ compilation terminated. In file included from bench/futex-wake-parallel.c:31: bench/futex.h:16:10: fatal error: linux/time_types.h: No such file or directory #include ^~~~~~~~~~~~~~~~~~~~ compilation terminated. In file included from bench/futex-hash.c:29: bench/futex.h:16:10: fatal error: linux/time_types.h: No such file or directory #include ^~~~~~~~~~~~~~~~~~~~ compilation terminated. make[3]: *** [/git/perf-5.15.0/tools/build/Makefile.build:139: bench] Error 2 8 9.40 alpine:3.10 : FAIL gcc version 8.3.0 (Alpine 8.3.0) In file included from bench/futex-hash.c:29: bench/futex.h:16:10: fatal error: linux/time_types.h: No such file or directory #include ^~~~~~~~~~~~~~~~~~~~ compilation terminated. In file included from bench/futex-wake.c:25: bench/futex.h:16:10: fatal error: linux/time_types.h: No such file or directory #include ^~~~~~~~~~~~~~~~~~~~ compilation terminated. make[3]: *** [/git/perf-5.15.0/tools/build/Makefile.build:139: bench] Error 2 9 9.81 alpine:3.11 : FAIL gcc version 9.3.0 (Alpine 9.3.0) In file included from bench/futex-hash.c:29: bench/futex.h:16:10: fatal error: linux/time_types.h: No such file or directory 16 | #include | ^~~~~~~~~~~~~~~~~~~~ compilation terminated. In file included from bench/futex-wake.c:25: bench/futex.h:16:10: fatal error: linux/time_types.h: No such file or directory 16 | #include | ^~~~~~~~~~~~~~~~~~~~ compilation terminated. make[3]: *** [/git/perf-5.15.0/tools/build/Makefile.build:139: bench] Error 2 10 10.32 alpine:3.12 : FAIL gcc version 9.3.0 (Alpine 9.3.0) bench/futex.h: In function 'futex_syscall': bench/futex.h:64:33: error: invalid application of 'sizeof' to incomplete type 'struct __kernel_old_timespec' 64 | if (sizeof(*timeout) == sizeof(struct __kernel_old_timespec)) | ^~~~~~ bench/futex.h:68:32: error: storage size of 'ts32' isn't known 68 | struct __kernel_old_timespec ts32; | ^~~~ bench/futex.h:68:32: error: unused variable 'ts32' [-Werror=unused-variable] cc1: all warnings being treated as errors make[3]: *** [/git/perf-5.15.0/tools/build/Makefile.build:139: bench] Error 2 11 99.82 alpine:3.13 : Ok gcc (Alpine 10.2.1_pre1) 10.2.1 20201203 , Alpine clang version 10.0.1 12 87.39 alpine:3.14 : Ok gcc (Alpine 10.3.1_git20210424) 10.3.1 20210424 , Alpine clang version 11.1.0 13 86.89 alpine:edge : Ok gcc (Alpine 10.3.1_git20210921) 10.3.1 20210921 , Alpine clang version 12.0.1 14 7.30 alt:p8 : FAIL gcc version 5.3.1 20151207 (ALT p8 5.3.1-alt3.M80P.1) (GCC) In file included from bench/futex-hash.c:29:0: bench/futex.h:16:30: fatal error: linux/time_types.h: No such file or directory compilation terminated. make[3]: *** [bench] Error 2 15 63.92 alt:p9 : Ok x86_64-alt-linux-gcc (GCC) 8.4.1 20200305 (ALT p9 8.4.1-alt0.p9.1) , clang version 10.0.0 16 61.42 alt:sisyphus : Ok x86_64-alt-linux-gcc (GCC) 11.2.1 20210911 (ALT Sisyphus 11.2.1-alt1) , ALT Linux Team clang version 12.0.1 17 8.30 amazonlinux:1 : FAIL gcc version 7.2.1 20170915 (Red Hat 7.2.1-2) (GCC) In file included from bench/futex-hash.c:29:0: bench/futex.h:16:10: fatal error: linux/time_types.h: No such file or directory #include ^~~~~~~~~~~~~~~~~~~~ compilation terminated. make[3]: *** [bench] Error 2 18 8.71 amazonlinux:2 : FAIL gcc version 7.3.1 20180712 (Red Hat 7.3.1-13) (GCC) In file included from bench/futex-hash.c:29:0: bench/futex.h:16:10: fatal error: linux/time_types.h: No such file or directory #include ^~~~~~~~~~~~~~~~~~~~ compilation terminated. In file included from bench/futex-wake.c:25:0: bench/futex.h:16:10: fatal error: linux/time_types.h: No such file or directory #include ^~~~~~~~~~~~~~~~~~~~ compilation terminated. make[3]: *** [bench] Error 2 19 79.56 centos:8 : Ok gcc (GCC) 8.4.1 20200928 (Red Hat 8.4.1-1) , clang version 11.0.0 (Red Hat 11.0.0-1.module_el8.4.0+587+5187cac0) 20 82.28 centos:stream : Ok gcc (GCC) 8.5.0 20210514 (Red Hat 8.5.0-3) , clang version 12.0.1 (Red Hat 12.0.1-2.module_el8.6.0+937+1cafe22c) 21 55.24 clearlinux:latest : Ok gcc (Clear Linux OS for Intel Architecture) 11.2.1 20211020 releases/gcc-11.2.0-375-g40b209e340 , clang version 11.1.0 22 7.41 debian:9 : FAIL gcc version 6.3.0 20170516 (Debian 6.3.0-18+deb9u1) In file included from bench/futex-hash.c:29:0: bench/futex.h:16:30: fatal error: linux/time_types.h: No such file or directory #include ^ compilation terminated. /git/perf-5.15.0/tools/build/Makefile.build:139: recipe for target 'bench' failed make[3]: *** [bench] Error 2 23 7.90 debian:10 : FAIL gcc version 8.3.0 (Debian 8.3.0-6) In file included from bench/futex-hash.c:29: bench/futex.h:16:10: fatal error: linux/time_types.h: No such file or directory #include ^~~~~~~~~~~~~~~~~~~~ compilation terminated. In file included from bench/futex-wake.c:25: bench/futex.h:16:10: fatal error: linux/time_types.h: No such file or directory #include ^~~~~~~~~~~~~~~~~~~~ compilation terminated. make[3]: *** [/git/perf-5.15.0/tools/build/Makefile.build:139: bench] Error 2 24 60.32 debian:11 : Ok gcc (Debian 10.2.1-6) 10.2.1 20210110 , Debian clang version 11.0.1-2 25 59.42 debian:experimental : Ok gcc (Debian 11.2.0-10) 11.2.0 , Debian clang version 11.1.0-4 26 23.76 debian:experimental-x-arm64 : Ok aarch64-linux-gnu-gcc (Debian 11.2.0-9) 11.2.0 27 19.25 debian:experimental-x-mips : Ok mips-linux-gnu-gcc (Debian 10.2.1-6) 10.2.1 20210110 28 21.25 debian:experimental-x-mips64 : Ok mips64-linux-gnuabi64-gcc (Debian 10.2.1-6) 10.2.1 20210110 29 21.88 debian:experimental-x-mipsel : Ok mipsel-linux-gnu-gcc (Debian 11.2.0-9) 11.2.0 30 8.20 fedora:22 : FAIL gcc version 5.3.1 20160406 (Red Hat 5.3.1-6) (GCC) In file included from bench/futex-hash.c:29:0: bench/futex.h:16:30: fatal error: linux/time_types.h: No such file or directory compilation terminated. In file included from bench/futex-wake.c:25:0: bench/futex.h:16:30: fatal error: linux/time_types.h: No such file or directory compilation terminated. /git/perf-5.15.0/tools/build/Makefile.build:139: recipe for target 'bench' failed make[3]: *** [bench] Error 2 31 8.20 fedora:23 : FAIL gcc version 5.3.1 20160406 (Red Hat 5.3.1-6) (GCC) In file included from bench/futex-hash.c:29:0: bench/futex.h:16:30: fatal error: linux/time_types.h: No such file or directory compilation terminated. /git/perf-5.15.0/tools/build/Makefile.build:139: recipe for target 'bench' failed make[3]: *** [bench] Error 2 32 8.59 fedora:24 : FAIL gcc version 6.3.1 20161221 (Red Hat 6.3.1-1) (GCC) In file included from bench/futex-hash.c:29:0: bench/futex.h:16:30: fatal error: linux/time_types.h: No such file or directory #include ^ compilation terminated. /git/perf-5.15.0/tools/build/Makefile.build:139: recipe for target 'bench' failed make[3]: *** [bench] Error 2 33 6.60 fedora:24-x-ARC-uClibc : FAIL gcc version 7.1.1 20170710 (ARCompact ISA Linux uClibc toolchain 2017.09-rc2) In file included from bench/futex-hash.c:29:0: bench/futex.h:16:10: fatal error: linux/time_types.h: No such file or directory #include ^~~~~~~~~~~~~~~~~~~~ In file included from bench/futex-wake.c:25:0: bench/futex.h:16:10: fatal error: linux/time_types.h: No such file or directory #include ^~~~~~~~~~~~~~~~~~~~ compilation terminated. compilation terminated. /git/perf-5.15.0/tools/build/Makefile.build:139: recipe for target 'bench' failed make[3]: *** [bench] Error 2 34 8.59 fedora:25 : FAIL gcc version 6.4.1 20170727 (Red Hat 6.4.1-1) (GCC) In file included from bench/futex-hash.c:29:0: bench/futex.h:16:30: fatal error: linux/time_types.h: No such file or directory #include ^ compilation terminated. /git/perf-5.15.0/tools/build/Makefile.build:139: recipe for target 'bench' failed make[3]: *** [bench] Error 2 35 14.61 fedora:26 : FAIL gcc version 7.3.1 20180130 (Red Hat 7.3.1-2) (GCC) In file included from bench/futex-hash.c:29:0: bench/futex.h:16:10: fatal error: linux/time_types.h: No such file or directory #include ^~~~~~~~~~~~~~~~~~~~ compilation terminated. make[3]: *** [/git/perf-5.15.0/tools/build/Makefile.build:139: bench] Error 2 36 8.79 fedora:27 : FAIL gcc version 7.3.1 20180712 (Red Hat 7.3.1-6) (GCC) In file included from bench/futex-hash.c:29:0: bench/futex.h:16:10: fatal error: linux/time_types.h: No such file or directory #include ^~~~~~~~~~~~~~~~~~~~ compilation terminated. In file included from bench/futex-wake.c:25:0: bench/futex.h:16:10: fatal error: linux/time_types.h: No such file or directory #include ^~~~~~~~~~~~~~~~~~~~ compilation terminated. make[3]: *** [/git/perf-5.15.0/tools/build/Makefile.build:139: bench] Error 2 37 15.12 fedora:28 : FAIL gcc version 8.3.1 20190223 (Red Hat 8.3.1-2) (GCC) In file included from bench/futex-wake.c:25: bench/futex.h:16:10: fatal error: linux/time_types.h: No such file or directory #include ^~~~~~~~~~~~~~~~~~~~ compilation terminated. In file included from bench/futex-hash.c:29: bench/futex.h:16:10: fatal error: linux/time_types.h: No such file or directory #include ^~~~~~~~~~~~~~~~~~~~ compilation terminated. In file included from bench/futex-wake-parallel.c:31: bench/futex.h:16:10: fatal error: linux/time_types.h: No such file or directory #include ^~~~~~~~~~~~~~~~~~~~ compilation terminated. make[3]: *** [/git/perf-5.15.0/tools/build/Makefile.build:139: bench] Error 2 38 9.60 fedora:29 : FAIL gcc version 8.3.1 20190223 (Red Hat 8.3.1-2) (GCC) bench/futex.h: In function 'futex_syscall': bench/futex.h:64:33: error: invalid application of 'sizeof' to incomplete type 'struct __kernel_old_timespec' if (sizeof(*timeout) == sizeof(struct __kernel_old_timespec)) ^~~~~~ bench/futex.h:68:32: error: storage size of 'ts32' isn't known struct __kernel_old_timespec ts32; ^~~~ bench/futex.h:68:32: error: unused variable 'ts32' [-Werror=unused-variable] cc1: all warnings being treated as errors make[3]: *** [/git/perf-5.15.0/tools/build/Makefile.build:139: bench] Error 2 39 101.90 fedora:30 : Ok gcc (GCC) 9.3.1 20200408 (Red Hat 9.3.1-2) , clang version 8.0.0 (Fedora 8.0.0-3.fc30) 40 99.30 fedora:31 : Ok gcc (GCC) 9.3.1 20200408 (Red Hat 9.3.1-2) , clang version 9.0.1 (Fedora 9.0.1-4.fc31) 41 82.46 fedora:32 : Ok gcc (GCC) 10.3.1 20210422 (Red Hat 10.3.1-1) , clang version 10.0.1 (Fedora 10.0.1-3.fc32) 42 81.32 fedora:33 : Ok gcc (GCC) 10.3.1 20210422 (Red Hat 10.3.1-1) , clang version 11.0.0 (Fedora 11.0.0-3.fc33) 43 84.07 fedora:34 : Ok gcc (GCC) 11.2.1 20210728 (Red Hat 11.2.1-1) , clang version 12.0.1 (Fedora 12.0.1-1.fc34) 44 7.09 fedora:34-x-ARC-glibc : FAIL gcc version 8.3.1 20190225 (ARC HS GNU/Linux glibc toolchain 2019.03-rc1) In file included from bench/futex-hash.c:29: bench/futex.h:16:10: fatal error: linux/time_types.h: No such file or directory #include ^~~~~~~~~~~~~~~~~~~~ compilation terminated. In file included from bench/futex-wake.c:25: bench/futex.h:16:10: fatal error: linux/time_types.h: No such file or directory #include ^~~~~~~~~~~~~~~~~~~~ compilation terminated. In file included from bench/futex-wake-parallel.c:31: bench/futex.h:16:10: fatal error: linux/time_types.h: No such file or directory #include ^~~~~~~~~~~~~~~~~~~~ compilation terminated. make[3]: *** [/git/perf-5.15.0/tools/build/Makefile.build:139: bench] Error 2 45 6.29 fedora:34-x-ARC-uClibc : FAIL gcc version 8.3.1 20190225 (ARCv2 ISA Linux uClibc toolchain 2019.03-rc1) In file included from bench/futex-hash.c:29: bench/futex.h:16:10: fatal error: linux/time_types.h: No such file or directory #include ^~~~~~~~~~~~~~~~~~~~ compilation terminated. make[3]: *** [/git/perf-5.15.0/tools/build/Makefile.build:139: bench] Error 2 46 74.74 fedora:35 : Ok gcc (GCC) 11.2.1 20210728 (Red Hat 11.2.1-1) , clang version 13.0.0 (Fedora 13.0.0~rc1-1.fc35) 47 73.13 fedora:rawhide : Ok gcc (GCC) 11.2.1 20211019 (Red Hat 11.2.1-6) , clang version 13.0.0 (Fedora 13.0.0-4.fc36) 48 28.17 gentoo-stage3:latest : Ok gcc (Gentoo 11.2.0 p1) 11.2.0 49 9.10 mageia:6 : FAIL gcc version 5.5.0 (Mageia 5.5.0-1.mga6) In file included from bench/futex-hash.c:29:0: bench/futex.h:16:30: fatal error: linux/time_types.h: No such file or directory compilation terminated. In file included from bench/futex-wake.c:25:0: bench/futex.h:16:30: fatal error: linux/time_types.h: No such file or directory compilation terminated. In file included from bench/futex-wake-parallel.c:31:0: bench/futex.h:16:30: fatal error: linux/time_types.h: No such file or directory compilation terminated. /git/perf-5.15.0/tools/build/Makefile.build:139: recipe for target 'bench' failed make[3]: *** [bench] Error 2 50 38.60 mageia:7 : FAIL clang version 8.0.0 (Mageia 8.0.0-1.mga7) yychar = yylex (&yylval, &yylloc, scanner); ^ #define yylex parse_events_lex ^ 1 error generated. make[3]: *** [/git/perf-5.15.0/tools/build/Makefile.build:139: util] Error 2 51 6.18 openmandriva:cooker : FAIL gcc version 11.2.0 20210728 (OpenMandriva) (GCC) In file included from builtin-bench.c:22: bench/bench.h:66:19: error: conflicting types for 'pthread_attr_setaffinity_np'; have 'int(pthread_attr_t *, size_t, cpu_set_t *)' {aka 'int(pthread_attr_t *, long unsigned int, cpu_set_t *)'} 66 | static inline int pthread_attr_setaffinity_np(pthread_attr_t *attr __maybe_unused, | ^~~~~~~~~~~~~~~~~~~~~~~~~~~ In file included from bench/bench.h:64, from builtin-bench.c:22: /usr/include/pthread.h:394:12: note: previous declaration of 'pthread_attr_setaffinity_np' with type 'int(pthread_attr_t *, size_t, const cpu_set_t *)' {aka 'int(pthread_attr_t *, long unsigned int, const cpu_set_t *)'} 394 | extern int pthread_attr_setaffinity_np (pthread_attr_t *__attr, | ^~~~~~~~~~~~~~~~~~~~~~~~~~~ file: Compiled magic version [540] does not match with shared library magic version [539] ld: warning: -r and --gc-sections may not be used together, disabling --gc-sections ld: warning: -r and --icf may not be used together, disabling --icf ld: warning: -r and --gc-sections may not be used together, disabling --gc-sections ld: warning: -r and --icf may not be used together, disabling --icf file: Compiled magic version [540] does not match with shared library magic version [539] file: Compiled magic version [540] does not match with shared library magic version [539] ld: warning: -r and --gc-sections may not be used together, disabling --gc-sections ld: warning: -r and --icf may not be used together, disabling --icf 52 12.51 opensuse:15.0 : FAIL gcc version 7.4.1 20190905 [gcc-7-branch revision 275407] (SUSE Linux) Makefile.config:999: No libbabeltrace found, disables 'perf data' CTF format support, please install libbabeltrace-dev[el]/libbabeltrace-ctf-dev update-alternatives: error: no alternatives for java update-alternatives: error: no alternatives for java Makefile.config:1043: No openjdk development package found, please install JDK package, e.g. openjdk-8-jdk, java-1.8.0-openjdk-devel Auto-detecting system features: ... dwarf: [ on ] ... dwarf_getlocations: [ on ] ... glibc: [ on ] ... libbfd: [ OFF ] ... libbfd-buildid: [ OFF ] ... libcap: [ on ] ... libelf: [ on ] ... libnuma: [ on ] ... numa_num_possible_cpus: [ on ] ... libperl: [ on ] ... libpython: [ on ] ... libcrypto: [ on ] ... libunwind: [ on ] ... libdw-dwarf-unwind: [ on ] ... zlib: [ on ] ... lzma: [ on ] ... get_cpuid: [ on ] ... bpf: [ on ] ... libaio: [ on ] ... libzstd: [ on ] ... disassembler-four-args: [ on ] PERF_VERSION = 5.15.g875eaa399042 GEN perf-archive GEN perf-with-kcore GEN perf-iostat -- In file included from bench/futex-wake.c:25:0: bench/futex.h:16:10: fatal error: linux/time_types.h: No such file or directory #include ^~~~~~~~~~~~~~~~~~~~ compilation terminated. In file included from bench/futex-hash.c:29:0: bench/futex.h:16:10: fatal error: linux/time_types.h: No such file or directory #include ^~~~~~~~~~~~~~~~~~~~ compilation terminated. In file included from bench/futex-wake-parallel.c:31:0: bench/futex.h:16:10: fatal error: linux/time_types.h: No such file or directory #include ^~~~~~~~~~~~~~~~~~~~ compilation terminated. In file included from bench/futex-requeue.c:26:0: bench/futex.h:16:10: fatal error: linux/time_types.h: No such file or directory #include ^~~~~~~~~~~~~~~~~~~~ compilation terminated. make[3]: *** [/git/perf-5.15.0/tools/build/Makefile.build:139: bench] Error 2 53 12.41 opensuse:15.1 : FAIL gcc version 7.5.0 (SUSE Linux) Makefile.config:999: No libbabeltrace found, disables 'perf data' CTF format support, please install libbabeltrace-dev[el]/libbabeltrace-ctf-dev update-alternatives: error: no alternatives for java update-alternatives: error: no alternatives for java Makefile.config:1043: No openjdk development package found, please install JDK package, e.g. openjdk-8-jdk, java-1.8.0-openjdk-devel Auto-detecting system features: ... dwarf: [ on ] ... dwarf_getlocations: [ on ] ... glibc: [ on ] ... libbfd: [ OFF ] ... libbfd-buildid: [ OFF ] ... libcap: [ on ] ... libelf: [ on ] ... libnuma: [ on ] ... numa_num_possible_cpus: [ on ] ... libperl: [ on ] ... libpython: [ on ] ... libcrypto: [ on ] ... libunwind: [ on ] ... libdw-dwarf-unwind: [ on ] ... zlib: [ on ] ... lzma: [ on ] ... get_cpuid: [ on ] ... bpf: [ on ] ... libaio: [ on ] ... libzstd: [ on ] ... disassembler-four-args: [ on ] PERF_VERSION = 5.15.g875eaa399042 GEN perf-archive GEN perf-with-kcore GEN perf-iostat -- In file included from bench/futex-hash.c:29:0: bench/futex.h:16:10: fatal error: linux/time_types.h: No such file or directory #include ^~~~~~~~~~~~~~~~~~~~ compilation terminated. In file included from bench/futex-wake.c:25:0: bench/futex.h:16:10: fatal error: linux/time_types.h: No such file or directory #include ^~~~~~~~~~~~~~~~~~~~ compilation terminated. In file included from bench/futex-wake-parallel.c:31:0: bench/futex.h:16:10: fatal error: linux/time_types.h: No such file or directory #include ^~~~~~~~~~~~~~~~~~~~ compilation terminated. In file included from bench/futex-requeue.c:26:0: bench/futex.h:16:10: fatal error: linux/time_types.h: No such file or directory #include ^~~~~~~~~~~~~~~~~~~~ compilation terminated. make[3]: *** [/git/perf-5.15.0/tools/build/Makefile.build:139: bench] Error 2 54 12.20 opensuse:15.2 : FAIL gcc version 7.5.0 (SUSE Linux) Makefile.config:999: No libbabeltrace found, disables 'perf data' CTF format support, please install libbabeltrace-dev[el]/libbabeltrace-ctf-dev update-alternatives: error: no alternatives for java update-alternatives: error: no alternatives for java Makefile.config:1043: No openjdk development package found, please install JDK package, e.g. openjdk-8-jdk, java-1.8.0-openjdk-devel Auto-detecting system features: ... dwarf: [ on ] ... dwarf_getlocations: [ on ] ... glibc: [ on ] ... libbfd: [ OFF ] ... libbfd-buildid: [ OFF ] ... libcap: [ on ] ... libelf: [ on ] ... libnuma: [ on ] ... numa_num_possible_cpus: [ on ] ... libperl: [ on ] ... libpython: [ on ] ... libcrypto: [ on ] ... libunwind: [ on ] ... libdw-dwarf-unwind: [ on ] ... zlib: [ on ] ... lzma: [ on ] ... get_cpuid: [ on ] ... bpf: [ on ] ... libaio: [ on ] ... libzstd: [ on ] ... disassembler-four-args: [ on ] PERF_VERSION = 5.15.g875eaa399042 GEN perf-archive GEN perf-with-kcore GEN perf-iostat -- bench/futex.h: In function 'futex_syscall': bench/futex.h:64:33: error: invalid application of 'sizeof' to incomplete type 'struct __kernel_old_timespec' if (sizeof(*timeout) == sizeof(struct __kernel_old_timespec)) ^~~~~~ bench/futex.h:68:32: error: storage size of 'ts32' isn't known struct __kernel_old_timespec ts32; ^~~~ bench/futex.h:68:32: error: unused variable 'ts32' [-Werror=unused-variable] cc1: all warnings being treated as errors In file included from bench/futex-wake.c:25:0: bench/futex.h: In function 'futex_syscall': bench/futex.h:64:33: error: invalid application of 'sizeof' to incomplete type 'struct __kernel_old_timespec' if (sizeof(*timeout) == sizeof(struct __kernel_old_timespec)) ^~~~~~ bench/futex.h:68:32: error: storage size of 'ts32' isn't known struct __kernel_old_timespec ts32; ^~~~ bench/futex.h:68:32: error: unused variable 'ts32' [-Werror=unused-variable] cc1: all warnings being treated as errors In file included from bench/futex-wake-parallel.c:31:0: bench/futex.h: In function 'futex_syscall': bench/futex.h:64:33: error: invalid application of 'sizeof' to incomplete type 'struct __kernel_old_timespec' if (sizeof(*timeout) == sizeof(struct __kernel_old_timespec)) ^~~~~~ bench/futex.h:68:32: error: storage size of 'ts32' isn't known struct __kernel_old_timespec ts32; ^~~~ bench/futex.h:68:32: error: unused variable 'ts32' [-Werror=unused-variable] cc1: all warnings being treated as errors make[3]: *** [/git/perf-5.15.0/tools/build/Makefile.build:139: bench] Error 2 55 12.30 opensuse:15.3 : FAIL gcc version 7.5.0 (SUSE Linux) Makefile.config:999: No libbabeltrace found, disables 'perf data' CTF format support, please install libbabeltrace-dev[el]/libbabeltrace-ctf-dev update-alternatives: error: no alternatives for java update-alternatives: error: no alternatives for java Makefile.config:1043: No openjdk development package found, please install JDK package, e.g. openjdk-8-jdk, java-1.8.0-openjdk-devel Auto-detecting system features: ... dwarf: [ on ] ... dwarf_getlocations: [ on ] ... glibc: [ on ] ... libbfd: [ OFF ] ... libbfd-buildid: [ OFF ] ... libcap: [ on ] ... libelf: [ on ] ... libnuma: [ on ] ... numa_num_possible_cpus: [ on ] ... libperl: [ on ] ... libpython: [ on ] ... libcrypto: [ on ] ... libunwind: [ on ] ... libdw-dwarf-unwind: [ on ] ... zlib: [ on ] ... lzma: [ on ] ... get_cpuid: [ on ] ... bpf: [ on ] ... libaio: [ on ] ... libzstd: [ on ] ... disassembler-four-args: [ on ] PERF_VERSION = 5.15.g875eaa399042 GEN perf-archive GEN perf-with-kcore GEN perf-iostat -- bench/futex.h: In function 'futex_syscall': bench/futex.h:64:33: error: invalid application of 'sizeof' to incomplete type 'struct __kernel_old_timespec' if (sizeof(*timeout) == sizeof(struct __kernel_old_timespec)) ^~~~~~ bench/futex.h:68:32: error: storage size of 'ts32' isn't known struct __kernel_old_timespec ts32; ^~~~ bench/futex.h:68:32: error: unused variable 'ts32' [-Werror=unused-variable] In file included from bench/futex-hash.c:29:0: bench/futex.h: In function 'futex_syscall': bench/futex.h:64:33: error: invalid application of 'sizeof' to incomplete type 'struct __kernel_old_timespec' if (sizeof(*timeout) == sizeof(struct __kernel_old_timespec)) ^~~~~~ bench/futex.h:68:32: error: storage size of 'ts32' isn't known struct __kernel_old_timespec ts32; ^~~~ bench/futex.h:68:32: error: unused variable 'ts32' [-Werror=unused-variable] cc1: all warnings being treated as errors cc1: all warnings being treated as errors In file included from bench/futex-wake-parallel.c:31:0: bench/futex.h: In function 'futex_syscall': bench/futex.h:64:33: error: invalid application of 'sizeof' to incomplete type 'struct __kernel_old_timespec' if (sizeof(*timeout) == sizeof(struct __kernel_old_timespec)) ^~~~~~ bench/futex.h:68:32: error: storage size of 'ts32' isn't known struct __kernel_old_timespec ts32; ^~~~ bench/futex.h:68:32: error: unused variable 'ts32' [-Werror=unused-variable] cc1: all warnings being treated as errors make[3]: *** [/git/perf-5.15.0/tools/build/Makefile.build:139: bench] Error 2 56 92.79 opensuse:tumbleweed : Ok gcc (SUSE Linux) 11.2.1 20210816 [revision 056e324ce46a7924b5cf10f61010cf9dd2ca10e9] , clang version 13.0.0 57 78.85 oraclelinux:8 : Ok gcc (GCC) 8.4.1 20200928 (Red Hat 8.4.1-1.0.4) , clang version 11.0.0 (Red Hat 11.0.0-1.0.1.module+el8.4.0+20046+39fed697) 58 78.47 rockylinux:8 : Ok gcc (GCC) 8.4.1 20200928 (Red Hat 8.4.1-1) , clang version 11.0.0 (Red Hat 11.0.0-1.module+el8.4.0+412+05cf643f) 59 8.32 ubuntu:16.04 : FAIL gcc version 5.4.0 20160609 (Ubuntu 5.4.0-6ubuntu1~16.04.12) In file included from bench/futex-hash.c:29:0: bench/futex.h:16:30: fatal error: linux/time_types.h: No such file or directory compilation terminated. In file included from bench/futex-wake.c:25:0: bench/futex.h:16:30: fatal error: linux/time_types.h: No such file or directory compilation terminated. /git/perf-5.15.0/tools/build/Makefile.build:139: recipe for target 'bench' failed make[3]: *** [bench] Error 2 60 7.19 ubuntu:16.04-x-arm : FAIL gcc version 5.4.0 20160609 (Ubuntu/Linaro 5.4.0-6ubuntu1~16.04.9) In file included from bench/futex-wake.c:25:0: bench/futex.h:16:30: fatal error: linux/time_types.h: No such file or directory compilation terminated. In file included from bench/futex-hash.c:29:0: bench/futex.h:16:30: fatal error: linux/time_types.h: No such file or directory compilation terminated. /git/perf-5.15.0/tools/build/Makefile.build:139: recipe for target 'bench' failed make[3]: *** [bench] Error 2 61 18.14 ubuntu:16.04-x-arm64 : FAIL gcc version 5.4.0 20160609 (Ubuntu/Linaro 5.4.0-6ubuntu1~16.04.9) In file included from bench/futex-hash.c:29:0: bench/futex.h:16:30: fatal error: linux/time_types.h: No such file or directory compilation terminated. /git/perf-5.15.0/tools/build/Makefile.build:139: recipe for target 'bench' failed make[3]: *** [bench] Error 2 62 6.99 ubuntu:16.04-x-powerpc : FAIL gcc version 5.4.0 20160609 (Ubuntu 5.4.0-6ubuntu1~16.04.9) In file included from bench/futex-hash.c:29:0: bench/futex.h:16:30: fatal error: linux/time_types.h: No such file or directory compilation terminated. /git/perf-5.15.0/tools/build/Makefile.build:139: recipe for target 'bench' failed make[3]: *** [bench] Error 2 63 7.29 ubuntu:16.04-x-powerpc64 : FAIL gcc version 5.4.0 20160609 (Ubuntu/IBM 5.4.0-6ubuntu1~16.04.9) In file included from bench/futex-hash.c:29:0: bench/futex.h:16:30: fatal error: linux/time_types.h: No such file or directory compilation terminated. In file included from bench/futex-wake.c:25:0: bench/futex.h:16:30: fatal error: linux/time_types.h: No such file or directory compilation terminated. In file included from bench/futex-wake-parallel.c:31:0: bench/futex.h:16:30: fatal error: linux/time_types.h: No such file or directory compilation terminated. In file included from bench/futex-requeue.c:26:0: bench/futex.h:16:30: fatal error: linux/time_types.h: No such file or directory compilation terminated. In file included from bench/futex-lock-pi.c:19:0: bench/futex.h:16:30: fatal error: linux/time_types.h: No such file or directory compilation terminated. /git/perf-5.15.0/tools/build/Makefile.build:139: recipe for target 'bench' failed make[3]: *** [bench] Error 2 64 7.29 ubuntu:16.04-x-powerpc64el : FAIL gcc version 5.4.0 20160609 (Ubuntu/IBM 5.4.0-6ubuntu1~16.04.9) In file included from bench/futex-hash.c:29:0: bench/futex.h:16:30: fatal error: linux/time_types.h: No such file or directory compilation terminated. /git/perf-5.15.0/tools/build/Makefile.build:139: recipe for target 'bench' failed make[3]: *** [bench] Error 2 65 6.59 ubuntu:16.04-x-s390 : FAIL gcc version 5.4.0 20160609 (Ubuntu 5.4.0-6ubuntu1~16.04.9) In file included from bench/futex-hash.c:29:0: bench/futex.h:16:30: fatal error: linux/time_types.h: No such file or directory compilation terminated. In file included from bench/futex-wake.c:25:0: bench/futex.h:16:30: fatal error: linux/time_types.h: No such file or directory compilation terminated. In file included from bench/futex-wake-parallel.c:31:0: bench/futex.h:16:30: fatal error: linux/time_types.h: No such file or directory compilation terminated. /git/perf-5.15.0/tools/build/Makefile.build:139: recipe for target 'bench' failed make[3]: *** [bench] Error 2 66 9.00 ubuntu:18.04 : FAIL gcc version 7.5.0 (Ubuntu 7.5.0-3ubuntu1~18.04) In file included from bench/futex-wake.c:25:0: bench/futex.h:16:10: fatal error: linux/time_types.h: No such file or directory #include ^~~~~~~~~~~~~~~~~~~~ compilation terminated. In file included from bench/futex-hash.c:29:0: bench/futex.h:16:10: fatal error: linux/time_types.h: No such file or directory #include ^~~~~~~~~~~~~~~~~~~~ compilation terminated. /git/perf-5.15.0/tools/build/Makefile.build:139: recipe for target 'bench' failed make[3]: *** [bench] Error 2 67 7.49 ubuntu:18.04-x-arm : FAIL gcc version 7.5.0 (Ubuntu/Linaro 7.5.0-3ubuntu1~18.04) In file included from bench/futex-hash.c:29:0: bench/futex.h:16:10: fatal error: linux/time_types.h: No such file or directory #include ^~~~~~~~~~~~~~~~~~~~ compilation terminated. In file included from bench/futex-wake.c:25:0: bench/futex.h:16:10: fatal error: linux/time_types.h: No such file or directory #include ^~~~~~~~~~~~~~~~~~~~ compilation terminated. /git/perf-5.15.0/tools/build/Makefile.build:139: recipe for target 'bench' failed make[3]: *** [bench] Error 2 68 7.49 ubuntu:18.04-x-arm64 : FAIL gcc version 7.5.0 (Ubuntu/Linaro 7.5.0-3ubuntu1~18.04) In file included from bench/futex-hash.c:29:0: bench/futex.h:16:10: fatal error: linux/time_types.h: No such file or directory #include ^~~~~~~~~~~~~~~~~~~~ compilation terminated. In file included from bench/futex-wake.c:25:0: bench/futex.h:16:10: fatal error: linux/time_types.h: No such file or directory #include ^~~~~~~~~~~~~~~~~~~~ compilation terminated. /git/perf-5.15.0/tools/build/Makefile.build:139: recipe for target 'bench' failed make[3]: *** [bench] Error 2 69 6.09 ubuntu:18.04-x-m68k : FAIL gcc version 7.5.0 (Ubuntu 7.5.0-3ubuntu1~18.04) In file included from bench/futex-hash.c:29:0: bench/futex.h:16:10: fatal error: linux/time_types.h: No such file or directory #include ^~~~~~~~~~~~~~~~~~~~ compilation terminated. In file included from bench/futex-wake.c:25:0: bench/futex.h:16:10: fatal error: linux/time_types.h: No such file or directory #include ^~~~~~~~~~~~~~~~~~~~ compilation terminated. In file included from bench/futex-wake-parallel.c:31:0: bench/futex.h:16:10: fatal error: linux/time_types.h: No such file or directory #include ^~~~~~~~~~~~~~~~~~~~ compilation terminated. /git/perf-5.15.0/tools/build/Makefile.build:139: recipe for target 'bench' failed make[3]: *** [bench] Error 2 70 7.40 ubuntu:18.04-x-powerpc : FAIL gcc version 7.5.0 (Ubuntu 7.5.0-3ubuntu1~18.04) In file included from bench/futex-hash.c:29:0: bench/futex.h:16:10: fatal error: linux/time_types.h: No such file or directory #include ^~~~~~~~~~~~~~~~~~~~ compilation terminated. /git/perf-5.15.0/tools/build/Makefile.build:139: recipe for target 'bench' failed make[3]: *** [bench] Error 2 71 8.00 ubuntu:18.04-x-powerpc64 : FAIL gcc version 7.5.0 (Ubuntu 7.5.0-3ubuntu1~18.04) In file included from bench/futex-hash.c:29:0: bench/futex.h:16:10: fatal error: linux/time_types.h: No such file or directory #include ^~~~~~~~~~~~~~~~~~~~ compilation terminated. /git/perf-5.15.0/tools/build/Makefile.build:139: recipe for target 'bench' failed make[3]: *** [bench] Error 2 72 7.99 ubuntu:18.04-x-powerpc64el : FAIL gcc version 7.5.0 (Ubuntu 7.5.0-3ubuntu1~18.04) In file included from bench/futex-hash.c:29:0: bench/futex.h:16:10: fatal error: linux/time_types.h: No such file or directory #include ^~~~~~~~~~~~~~~~~~~~ compilation terminated. /git/perf-5.15.0/tools/build/Makefile.build:139: recipe for target 'bench' failed make[3]: *** [bench] Error 2 73 6.89 ubuntu:18.04-x-riscv64 : FAIL gcc version 7.5.0 (Ubuntu 7.5.0-3ubuntu1~18.04) In file included from bench/futex-hash.c:29:0: bench/futex.h:16:10: fatal error: linux/time_types.h: No such file or directory #include ^~~~~~~~~~~~~~~~~~~~ compilation terminated. In file included from bench/futex-wake.c:25:0: bench/futex.h:16:10: fatal error: linux/time_types.h: No such file or directory #include ^~~~~~~~~~~~~~~~~~~~ compilation terminated. /git/perf-5.15.0/tools/build/Makefile.build:139: recipe for target 'bench' failed make[3]: *** [bench] Error 2 74 6.69 ubuntu:18.04-x-s390 : FAIL gcc version 7.5.0 (Ubuntu 7.5.0-3ubuntu1~18.04) In file included from bench/futex-hash.c:29:0: bench/futex.h:16:10: fatal error: linux/time_types.h: No such file or directory #include ^~~~~~~~~~~~~~~~~~~~ compilation terminated. /git/perf-5.15.0/tools/build/Makefile.build:139: recipe for target 'bench' failed make[3]: *** [bench] Error 2 75 7.29 ubuntu:18.04-x-sh4 : FAIL gcc version 7.5.0 (Ubuntu 7.5.0-3ubuntu1~18.04) In file included from bench/futex-hash.c:29:0: bench/futex.h:16:10: fatal error: linux/time_types.h: No such file or directory #include ^~~~~~~~~~~~~~~~~~~~ compilation terminated. In file included from bench/futex-wake.c:25:0: bench/futex.h:16:10: fatal error: linux/time_types.h: No such file or directory #include ^~~~~~~~~~~~~~~~~~~~ compilation terminated. /git/perf-5.15.0/tools/build/Makefile.build:139: recipe for target 'bench' failed make[3]: *** [bench] Error 2 76 6.69 ubuntu:18.04-x-sparc64 : FAIL gcc version 7.5.0 (Ubuntu 7.5.0-3ubuntu1~18.04) In file included from bench/futex-hash.c:29:0: bench/futex.h:16:10: fatal error: linux/time_types.h: No such file or directory #include ^~~~~~~~~~~~~~~~~~~~ compilation terminated. In file included from bench/futex-wake.c:25:0: bench/futex.h:16:10: fatal error: linux/time_types.h: No such file or directory #include ^~~~~~~~~~~~~~~~~~~~ compilation terminated. /git/perf-5.15.0/tools/build/Makefile.build:139: recipe for target 'bench' failed make[3]: *** [bench] Error 2 77 9.59 ubuntu:20.04 : FAIL gcc version 9.3.0 (Ubuntu 9.3.0-17ubuntu1~20.04) bench/futex.h: In function 'futex_syscall': bench/futex.h:64:33: error: invalid application of 'sizeof' to incomplete type 'struct __kernel_old_timespec' 64 | if (sizeof(*timeout) == sizeof(struct __kernel_old_timespec)) | ^~~~~~ bench/futex.h:68:32: error: storage size of 'ts32' isn't known 68 | struct __kernel_old_timespec ts32; | ^~~~ bench/futex.h:68:32: error: unused variable 'ts32' [-Werror=unused-variable] cc1: all warnings being treated as errors In file included from bench/futex-wake.c:25: bench/futex.h: In function 'futex_syscall': bench/futex.h:64:33: error: invalid application of 'sizeof' to incomplete type 'struct __kernel_old_timespec' 64 | if (sizeof(*timeout) == sizeof(struct __kernel_old_timespec)) | ^~~~~~ bench/futex.h:68:32: error: storage size of 'ts32' isn't known 68 | struct __kernel_old_timespec ts32; | ^~~~ bench/futex.h:68:32: error: unused variable 'ts32' [-Werror=unused-variable] cc1: all warnings being treated as errors In file included from bench/futex-wake-parallel.c:31: bench/futex.h: In function 'futex_syscall': bench/futex.h:64:33: error: invalid application of 'sizeof' to incomplete type 'struct __kernel_old_timespec' 64 | if (sizeof(*timeout) == sizeof(struct __kernel_old_timespec)) | ^~~~~~ bench/futex.h:68:32: error: storage size of 'ts32' isn't known 68 | struct __kernel_old_timespec ts32; | ^~~~ bench/futex.h:68:32: error: unused variable 'ts32' [-Werror=unused-variable] cc1: all warnings being treated as errors make[3]: *** [/git/perf-5.15.0/tools/build/Makefile.build:139: bench] Error 2 78 8.29 ubuntu:20.04-x-powerpc64el : FAIL gcc version 10.3.0 (Ubuntu 10.3.0-1ubuntu1~20.04) bench/futex.h: In function 'futex_syscall': bench/futex.h:64:33: error: invalid application of 'sizeof' to incomplete type 'struct __kernel_old_timespec' 64 | if (sizeof(*timeout) == sizeof(struct __kernel_old_timespec)) | ^~~~~~ bench/futex.h:68:32: error: storage size of 'ts32' isn't known 68 | struct __kernel_old_timespec ts32; | ^~~~ bench/futex.h:68:32: error: unused variable 'ts32' [-Werror=unused-variable] cc1: all warnings being treated as errors In file included from bench/futex-wake.c:25: bench/futex.h: In function 'futex_syscall': bench/futex.h:64:33: error: invalid application of 'sizeof' to incomplete type 'struct __kernel_old_timespec' 64 | if (sizeof(*timeout) == sizeof(struct __kernel_old_timespec)) | ^~~~~~ bench/futex.h:68:32: error: storage size of 'ts32' isn't known 68 | struct __kernel_old_timespec ts32; | ^~~~ bench/futex.h:68:32: error: unused variable 'ts32' [-Werror=unused-variable] In file included from bench/futex-requeue.c:26: bench/futex.h: In function 'futex_syscall': bench/futex.h:64:33: error: invalid application of 'sizeof' to incomplete type 'struct __kernel_old_timespec' 64 | if (sizeof(*timeout) == sizeof(struct __kernel_old_timespec)) | ^~~~~~ bench/futex.h:68:32: error: storage size of 'ts32' isn't known 68 | struct __kernel_old_timespec ts32; | ^~~~ In file included from bench/futex-wake-parallel.c:31: bench/futex.h: In function 'futex_syscall': bench/futex.h:68:32: error: unused variable 'ts32' [-Werror=unused-variable] bench/futex.h:64:33: error: invalid application of 'sizeof' to incomplete type 'struct __kernel_old_timespec' 64 | if (sizeof(*timeout) == sizeof(struct __kernel_old_timespec)) | ^~~~~~ bench/futex.h:68:32: error: storage size of 'ts32' isn't known 68 | struct __kernel_old_timespec ts32; | ^~~~ bench/futex.h:68:32: error: unused variable 'ts32' [-Werror=unused-variable] cc1: all warnings being treated as errors cc1: all warnings being treated as errors cc1: all warnings being treated as errors make[3]: *** [/git/perf-5.15.0/tools/build/Makefile.build:139: bench] Error 2 79 65.92 ubuntu:20.10 : Ok gcc (Ubuntu 10.3.0-1ubuntu1~20.10) 10.3.0 , Ubuntu clang version 11.0.0-2 80 65.91 ubuntu:21.04 : Ok gcc (Ubuntu 10.3.0-1ubuntu1) 10.3.0 , Ubuntu clang version 12.0.0-3ubuntu1~21.04.2 81 68.12 ubuntu:21.10 : Ok gcc (Ubuntu 11.2.0-7ubuntu2) 11.2.0 , Ubuntu clang version 13.0.0-2 Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/bench/futex.h | 43 +++---------------------------------------- 1 file changed, 3 insertions(+), 40 deletions(-) (limited to 'tools') diff --git a/tools/perf/bench/futex.h b/tools/perf/bench/futex.h index 6a7dd86871eb..ebdc2b032afc 100644 --- a/tools/perf/bench/futex.h +++ b/tools/perf/bench/futex.h @@ -8,12 +8,10 @@ #ifndef _FUTEX_H #define _FUTEX_H -#include #include #include #include #include -#include struct bench_futex_parameters { bool silent; @@ -30,7 +28,7 @@ struct bench_futex_parameters { }; /** - * futex_syscall() - __NR_futex syscall wrapper + * futex_syscall() - SYS_futex syscall wrapper * @uaddr: address of first futex * @op: futex op code * @val: typically expected value of uaddr, but varies by op @@ -51,49 +49,14 @@ static inline int futex_syscall(volatile u_int32_t *uaddr, int op, u_int32_t val, struct timespec *timeout, volatile u_int32_t *uaddr2, int val3, int opflags) { -#if defined(__NR_futex_time64) - if (sizeof(*timeout) != sizeof(struct __kernel_old_timespec)) { - int ret = syscall(__NR_futex_time64, uaddr, op | opflags, val, timeout, - uaddr2, val3); - if (ret == 0 || errno != ENOSYS) - return ret; - } -#endif - -#if defined(__NR_futex) - if (sizeof(*timeout) == sizeof(struct __kernel_old_timespec)) - return syscall(__NR_futex, uaddr, op | opflags, val, timeout, uaddr2, val3); - - if (timeout && timeout->tv_sec == (long)timeout->tv_sec) { - struct __kernel_old_timespec ts32; - - ts32.tv_sec = (__kernel_long_t) timeout->tv_sec; - ts32.tv_nsec = (__kernel_long_t) timeout->tv_nsec; - - return syscall(__NR_futex, uaddr, op | opflags, val, ts32, uaddr2, val3); - } else if (!timeout) { - return syscall(__NR_futex, uaddr, op | opflags, val, NULL, uaddr2, val3); - } -#endif - - errno = ENOSYS; - return -1; + return syscall(SYS_futex, uaddr, op | opflags, val, timeout, uaddr2, val3); } static inline int futex_syscall_nr_requeue(volatile u_int32_t *uaddr, int op, u_int32_t val, int nr_requeue, volatile u_int32_t *uaddr2, int val3, int opflags) { -#if defined(__NR_futex_time64) - int ret = syscall(__NR_futex_time64, uaddr, op | opflags, val, nr_requeue, - uaddr2, val3); - if (ret == 0 || errno != ENOSYS) - return ret; -#endif - -#if defined(__NR_futex) - return syscall(__NR_futex, uaddr, op | opflags, val, nr_requeue, uaddr2, val3); -#endif + return syscall(SYS_futex, uaddr, op | opflags, val, nr_requeue, uaddr2, val3); } /** -- cgit v1.2.3 From 6ac22d036f86c4e2d38bea1108672f947f7facca Mon Sep 17 00:00:00 2001 From: Dave Marchevsky Date: Mon, 11 Oct 2021 01:20:30 -0700 Subject: perf bpf: Pull in bpf_program__get_prog_info_linear() To prepare for impending deprecation of libbpf's bpf_program__get_prog_info_linear(), pull in the function and associated helpers into the perf codebase and migrate existing uses to the perf copy. Since libbpf's deprecated definitions will still be visible to perf, it is necessary to rename perf's definitions. Signed-off-by: Dave Marchevsky Acked-by: Andrii Nakryiko Acked-by: Song Liu Cc: Alexei Starovoitov Cc: Daniel Borkmann Cc: Ingo Molnar Cc: Peter Zijlstra Link: https://lore.kernel.org/r/20211011082031.4148337-4-davemarchevsky@fb.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf.data-file-format.txt | 2 +- tools/perf/util/Build | 1 + tools/perf/util/annotate.c | 3 +- tools/perf/util/bpf-event.c | 41 ++-- tools/perf/util/bpf-event.h | 2 +- tools/perf/util/bpf-utils.c | 261 +++++++++++++++++++++ tools/perf/util/bpf-utils.h | 76 ++++++ tools/perf/util/bpf_counter.c | 6 +- tools/perf/util/dso.c | 1 + tools/perf/util/env.c | 1 + tools/perf/util/header.c | 13 +- 11 files changed, 374 insertions(+), 33 deletions(-) create mode 100644 tools/perf/util/bpf-utils.c create mode 100644 tools/perf/util/bpf-utils.h (limited to 'tools') diff --git a/tools/perf/Documentation/perf.data-file-format.txt b/tools/perf/Documentation/perf.data-file-format.txt index e6ff8c898ada..f56d0e0fbff6 100644 --- a/tools/perf/Documentation/perf.data-file-format.txt +++ b/tools/perf/Documentation/perf.data-file-format.txt @@ -346,7 +346,7 @@ to special needs. HEADER_BPF_PROG_INFO = 25, -struct bpf_prog_info_linear, which contains detailed information about +struct perf_bpil, which contains detailed information about a BPF program, including type, id, tag, jited/xlated instructions, etc. HEADER_BPF_BTF = 26, diff --git a/tools/perf/util/Build b/tools/perf/util/Build index 15b2366ad384..2e5bfbb69960 100644 --- a/tools/perf/util/Build +++ b/tools/perf/util/Build @@ -202,6 +202,7 @@ endif perf-y += perf-hooks.o perf-$(CONFIG_LIBBPF) += bpf-event.o +perf-$(CONFIG_LIBBPF) += bpf-utils.o perf-$(CONFIG_CXX) += c++/ diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index 4bab2273303a..8511af55fc3a 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -28,6 +28,7 @@ #include "evsel.h" #include "evlist.h" #include "bpf-event.h" +#include "bpf-utils.h" #include "block-range.h" #include "string2.h" #include "util/event.h" @@ -1704,12 +1705,12 @@ static int symbol__disassemble_bpf(struct symbol *sym, { struct annotation *notes = symbol__annotation(sym); struct annotation_options *opts = args->options; - struct bpf_prog_info_linear *info_linear; struct bpf_prog_linfo *prog_linfo = NULL; struct bpf_prog_info_node *info_node; int len = sym->end - sym->start; disassembler_ftype disassemble; struct map *map = args->ms.map; + struct perf_bpil *info_linear; struct disassemble_info info; struct dso *dso = map->dso; int pc = 0, count, sub_id; diff --git a/tools/perf/util/bpf-event.c b/tools/perf/util/bpf-event.c index 1a7112a87736..bac6dcf1fe81 100644 --- a/tools/perf/util/bpf-event.c +++ b/tools/perf/util/bpf-event.c @@ -10,6 +10,7 @@ #include #include #include "bpf-event.h" +#include "bpf-utils.h" #include "debug.h" #include "dso.h" #include "symbol.h" @@ -32,8 +33,6 @@ struct btf * __weak btf__load_from_kernel_by_id(__u32 id) return err ? ERR_PTR(err) : btf; } -#define ptr_to_u64(ptr) ((__u64)(unsigned long)(ptr)) - static int snprintf_hex(char *buf, size_t size, unsigned char *data, size_t len) { int ret = 0; @@ -48,9 +47,9 @@ static int machine__process_bpf_event_load(struct machine *machine, union perf_event *event, struct perf_sample *sample __maybe_unused) { - struct bpf_prog_info_linear *info_linear; struct bpf_prog_info_node *info_node; struct perf_env *env = machine->env; + struct perf_bpil *info_linear; int id = event->bpf.id; unsigned int i; @@ -175,9 +174,9 @@ static int perf_event__synthesize_one_bpf_prog(struct perf_session *session, { struct perf_record_ksymbol *ksymbol_event = &event->ksymbol; struct perf_record_bpf_event *bpf_event = &event->bpf; - struct bpf_prog_info_linear *info_linear; struct perf_tool *tool = session->tool; struct bpf_prog_info_node *info_node; + struct perf_bpil *info_linear; struct bpf_prog_info *info; struct btf *btf = NULL; struct perf_env *env; @@ -191,15 +190,15 @@ static int perf_event__synthesize_one_bpf_prog(struct perf_session *session, */ env = session->data ? &session->header.env : &perf_env; - arrays = 1UL << BPF_PROG_INFO_JITED_KSYMS; - arrays |= 1UL << BPF_PROG_INFO_JITED_FUNC_LENS; - arrays |= 1UL << BPF_PROG_INFO_FUNC_INFO; - arrays |= 1UL << BPF_PROG_INFO_PROG_TAGS; - arrays |= 1UL << BPF_PROG_INFO_JITED_INSNS; - arrays |= 1UL << BPF_PROG_INFO_LINE_INFO; - arrays |= 1UL << BPF_PROG_INFO_JITED_LINE_INFO; + arrays = 1UL << PERF_BPIL_JITED_KSYMS; + arrays |= 1UL << PERF_BPIL_JITED_FUNC_LENS; + arrays |= 1UL << PERF_BPIL_FUNC_INFO; + arrays |= 1UL << PERF_BPIL_PROG_TAGS; + arrays |= 1UL << PERF_BPIL_JITED_INSNS; + arrays |= 1UL << PERF_BPIL_LINE_INFO; + arrays |= 1UL << PERF_BPIL_JITED_LINE_INFO; - info_linear = bpf_program__get_prog_info_linear(fd, arrays); + info_linear = get_bpf_prog_info_linear(fd, arrays); if (IS_ERR_OR_NULL(info_linear)) { info_linear = NULL; pr_debug("%s: failed to get BPF program info. aborting\n", __func__); @@ -452,8 +451,8 @@ int perf_event__synthesize_bpf_events(struct perf_session *session, static void perf_env__add_bpf_info(struct perf_env *env, u32 id) { - struct bpf_prog_info_linear *info_linear; struct bpf_prog_info_node *info_node; + struct perf_bpil *info_linear; struct btf *btf = NULL; u64 arrays; u32 btf_id; @@ -463,15 +462,15 @@ static void perf_env__add_bpf_info(struct perf_env *env, u32 id) if (fd < 0) return; - arrays = 1UL << BPF_PROG_INFO_JITED_KSYMS; - arrays |= 1UL << BPF_PROG_INFO_JITED_FUNC_LENS; - arrays |= 1UL << BPF_PROG_INFO_FUNC_INFO; - arrays |= 1UL << BPF_PROG_INFO_PROG_TAGS; - arrays |= 1UL << BPF_PROG_INFO_JITED_INSNS; - arrays |= 1UL << BPF_PROG_INFO_LINE_INFO; - arrays |= 1UL << BPF_PROG_INFO_JITED_LINE_INFO; + arrays = 1UL << PERF_BPIL_JITED_KSYMS; + arrays |= 1UL << PERF_BPIL_JITED_FUNC_LENS; + arrays |= 1UL << PERF_BPIL_FUNC_INFO; + arrays |= 1UL << PERF_BPIL_PROG_TAGS; + arrays |= 1UL << PERF_BPIL_JITED_INSNS; + arrays |= 1UL << PERF_BPIL_LINE_INFO; + arrays |= 1UL << PERF_BPIL_JITED_LINE_INFO; - info_linear = bpf_program__get_prog_info_linear(fd, arrays); + info_linear = get_bpf_prog_info_linear(fd, arrays); if (IS_ERR_OR_NULL(info_linear)) { pr_debug("%s: failed to get BPF program info. aborting\n", __func__); goto out; diff --git a/tools/perf/util/bpf-event.h b/tools/perf/util/bpf-event.h index 68f315c3df5b..144a8a24cc69 100644 --- a/tools/perf/util/bpf-event.h +++ b/tools/perf/util/bpf-event.h @@ -19,7 +19,7 @@ struct evlist; struct target; struct bpf_prog_info_node { - struct bpf_prog_info_linear *info_linear; + struct perf_bpil *info_linear; struct rb_node rb_node; }; diff --git a/tools/perf/util/bpf-utils.c b/tools/perf/util/bpf-utils.c new file mode 100644 index 000000000000..e271e05e51bc --- /dev/null +++ b/tools/perf/util/bpf-utils.c @@ -0,0 +1,261 @@ +// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) + +#ifndef _GNU_SOURCE +#define _GNU_SOURCE +#endif + +#include +#include +#include +#include +#include +#include "bpf-utils.h" +#include "debug.h" + +struct bpil_array_desc { + int array_offset; /* e.g. offset of jited_prog_insns */ + int count_offset; /* e.g. offset of jited_prog_len */ + int size_offset; /* > 0: offset of rec size, + * < 0: fix size of -size_offset + */ +}; + +static struct bpil_array_desc bpil_array_desc[] = { + [PERF_BPIL_JITED_INSNS] = { + offsetof(struct bpf_prog_info, jited_prog_insns), + offsetof(struct bpf_prog_info, jited_prog_len), + -1, + }, + [PERF_BPIL_XLATED_INSNS] = { + offsetof(struct bpf_prog_info, xlated_prog_insns), + offsetof(struct bpf_prog_info, xlated_prog_len), + -1, + }, + [PERF_BPIL_MAP_IDS] = { + offsetof(struct bpf_prog_info, map_ids), + offsetof(struct bpf_prog_info, nr_map_ids), + -(int)sizeof(__u32), + }, + [PERF_BPIL_JITED_KSYMS] = { + offsetof(struct bpf_prog_info, jited_ksyms), + offsetof(struct bpf_prog_info, nr_jited_ksyms), + -(int)sizeof(__u64), + }, + [PERF_BPIL_JITED_FUNC_LENS] = { + offsetof(struct bpf_prog_info, jited_func_lens), + offsetof(struct bpf_prog_info, nr_jited_func_lens), + -(int)sizeof(__u32), + }, + [PERF_BPIL_FUNC_INFO] = { + offsetof(struct bpf_prog_info, func_info), + offsetof(struct bpf_prog_info, nr_func_info), + offsetof(struct bpf_prog_info, func_info_rec_size), + }, + [PERF_BPIL_LINE_INFO] = { + offsetof(struct bpf_prog_info, line_info), + offsetof(struct bpf_prog_info, nr_line_info), + offsetof(struct bpf_prog_info, line_info_rec_size), + }, + [PERF_BPIL_JITED_LINE_INFO] = { + offsetof(struct bpf_prog_info, jited_line_info), + offsetof(struct bpf_prog_info, nr_jited_line_info), + offsetof(struct bpf_prog_info, jited_line_info_rec_size), + }, + [PERF_BPIL_PROG_TAGS] = { + offsetof(struct bpf_prog_info, prog_tags), + offsetof(struct bpf_prog_info, nr_prog_tags), + -(int)sizeof(__u8) * BPF_TAG_SIZE, + }, + +}; + +static __u32 bpf_prog_info_read_offset_u32(struct bpf_prog_info *info, + int offset) +{ + __u32 *array = (__u32 *)info; + + if (offset >= 0) + return array[offset / sizeof(__u32)]; + return -(int)offset; +} + +static __u64 bpf_prog_info_read_offset_u64(struct bpf_prog_info *info, + int offset) +{ + __u64 *array = (__u64 *)info; + + if (offset >= 0) + return array[offset / sizeof(__u64)]; + return -(int)offset; +} + +static void bpf_prog_info_set_offset_u32(struct bpf_prog_info *info, int offset, + __u32 val) +{ + __u32 *array = (__u32 *)info; + + if (offset >= 0) + array[offset / sizeof(__u32)] = val; +} + +static void bpf_prog_info_set_offset_u64(struct bpf_prog_info *info, int offset, + __u64 val) +{ + __u64 *array = (__u64 *)info; + + if (offset >= 0) + array[offset / sizeof(__u64)] = val; +} + +struct perf_bpil * +get_bpf_prog_info_linear(int fd, __u64 arrays) +{ + struct bpf_prog_info info = {}; + struct perf_bpil *info_linear; + __u32 info_len = sizeof(info); + __u32 data_len = 0; + int i, err; + void *ptr; + + if (arrays >> PERF_BPIL_LAST_ARRAY) + return ERR_PTR(-EINVAL); + + /* step 1: get array dimensions */ + err = bpf_obj_get_info_by_fd(fd, &info, &info_len); + if (err) { + pr_debug("can't get prog info: %s", strerror(errno)); + return ERR_PTR(-EFAULT); + } + + /* step 2: calculate total size of all arrays */ + for (i = PERF_BPIL_FIRST_ARRAY; i < PERF_BPIL_LAST_ARRAY; ++i) { + bool include_array = (arrays & (1UL << i)) > 0; + struct bpil_array_desc *desc; + __u32 count, size; + + desc = bpil_array_desc + i; + + /* kernel is too old to support this field */ + if (info_len < desc->array_offset + sizeof(__u32) || + info_len < desc->count_offset + sizeof(__u32) || + (desc->size_offset > 0 && info_len < (__u32)desc->size_offset)) + include_array = false; + + if (!include_array) { + arrays &= ~(1UL << i); /* clear the bit */ + continue; + } + + count = bpf_prog_info_read_offset_u32(&info, desc->count_offset); + size = bpf_prog_info_read_offset_u32(&info, desc->size_offset); + + data_len += count * size; + } + + /* step 3: allocate continuous memory */ + data_len = roundup(data_len, sizeof(__u64)); + info_linear = malloc(sizeof(struct perf_bpil) + data_len); + if (!info_linear) + return ERR_PTR(-ENOMEM); + + /* step 4: fill data to info_linear->info */ + info_linear->arrays = arrays; + memset(&info_linear->info, 0, sizeof(info)); + ptr = info_linear->data; + + for (i = PERF_BPIL_FIRST_ARRAY; i < PERF_BPIL_LAST_ARRAY; ++i) { + struct bpil_array_desc *desc; + __u32 count, size; + + if ((arrays & (1UL << i)) == 0) + continue; + + desc = bpil_array_desc + i; + count = bpf_prog_info_read_offset_u32(&info, desc->count_offset); + size = bpf_prog_info_read_offset_u32(&info, desc->size_offset); + bpf_prog_info_set_offset_u32(&info_linear->info, + desc->count_offset, count); + bpf_prog_info_set_offset_u32(&info_linear->info, + desc->size_offset, size); + bpf_prog_info_set_offset_u64(&info_linear->info, + desc->array_offset, + ptr_to_u64(ptr)); + ptr += count * size; + } + + /* step 5: call syscall again to get required arrays */ + err = bpf_obj_get_info_by_fd(fd, &info_linear->info, &info_len); + if (err) { + pr_debug("can't get prog info: %s", strerror(errno)); + free(info_linear); + return ERR_PTR(-EFAULT); + } + + /* step 6: verify the data */ + for (i = PERF_BPIL_FIRST_ARRAY; i < PERF_BPIL_LAST_ARRAY; ++i) { + struct bpil_array_desc *desc; + __u32 v1, v2; + + if ((arrays & (1UL << i)) == 0) + continue; + + desc = bpil_array_desc + i; + v1 = bpf_prog_info_read_offset_u32(&info, desc->count_offset); + v2 = bpf_prog_info_read_offset_u32(&info_linear->info, + desc->count_offset); + if (v1 != v2) + pr_warning("%s: mismatch in element count\n", __func__); + + v1 = bpf_prog_info_read_offset_u32(&info, desc->size_offset); + v2 = bpf_prog_info_read_offset_u32(&info_linear->info, + desc->size_offset); + if (v1 != v2) + pr_warning("%s: mismatch in rec size\n", __func__); + } + + /* step 7: update info_len and data_len */ + info_linear->info_len = sizeof(struct bpf_prog_info); + info_linear->data_len = data_len; + + return info_linear; +} + +void bpil_addr_to_offs(struct perf_bpil *info_linear) +{ + int i; + + for (i = PERF_BPIL_FIRST_ARRAY; i < PERF_BPIL_LAST_ARRAY; ++i) { + struct bpil_array_desc *desc; + __u64 addr, offs; + + if ((info_linear->arrays & (1UL << i)) == 0) + continue; + + desc = bpil_array_desc + i; + addr = bpf_prog_info_read_offset_u64(&info_linear->info, + desc->array_offset); + offs = addr - ptr_to_u64(info_linear->data); + bpf_prog_info_set_offset_u64(&info_linear->info, + desc->array_offset, offs); + } +} + +void bpil_offs_to_addr(struct perf_bpil *info_linear) +{ + int i; + + for (i = PERF_BPIL_FIRST_ARRAY; i < PERF_BPIL_LAST_ARRAY; ++i) { + struct bpil_array_desc *desc; + __u64 addr, offs; + + if ((info_linear->arrays & (1UL << i)) == 0) + continue; + + desc = bpil_array_desc + i; + offs = bpf_prog_info_read_offset_u64(&info_linear->info, + desc->array_offset); + addr = offs + ptr_to_u64(info_linear->data); + bpf_prog_info_set_offset_u64(&info_linear->info, + desc->array_offset, addr); + } +} diff --git a/tools/perf/util/bpf-utils.h b/tools/perf/util/bpf-utils.h new file mode 100644 index 000000000000..86a5055cdfad --- /dev/null +++ b/tools/perf/util/bpf-utils.h @@ -0,0 +1,76 @@ +/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */ + +#ifndef __PERF_BPF_UTILS_H +#define __PERF_BPF_UTILS_H + +#define ptr_to_u64(ptr) ((__u64)(unsigned long)(ptr)) + +#ifdef HAVE_LIBBPF_SUPPORT + +#include + +/* + * Get bpf_prog_info in continuous memory + * + * struct bpf_prog_info has multiple arrays. The user has option to choose + * arrays to fetch from kernel. The following APIs provide an uniform way to + * fetch these data. All arrays in bpf_prog_info are stored in a single + * continuous memory region. This makes it easy to store the info in a + * file. + * + * Before writing perf_bpil to files, it is necessary to + * translate pointers in bpf_prog_info to offsets. Helper functions + * bpil_addr_to_offs() and bpil_offs_to_addr() + * are introduced to switch between pointers and offsets. + * + * Examples: + * # To fetch map_ids and prog_tags: + * __u64 arrays = (1UL << PERF_BPIL_MAP_IDS) | + * (1UL << PERF_BPIL_PROG_TAGS); + * struct perf_bpil *info_linear = + * get_bpf_prog_info_linear(fd, arrays); + * + * # To save data in file + * bpil_addr_to_offs(info_linear); + * write(f, info_linear, sizeof(*info_linear) + info_linear->data_len); + * + * # To read data from file + * read(f, info_linear, ); + * bpil_offs_to_addr(info_linear); + */ +enum perf_bpil_array_types { + PERF_BPIL_FIRST_ARRAY = 0, + PERF_BPIL_JITED_INSNS = 0, + PERF_BPIL_XLATED_INSNS, + PERF_BPIL_MAP_IDS, + PERF_BPIL_JITED_KSYMS, + PERF_BPIL_JITED_FUNC_LENS, + PERF_BPIL_FUNC_INFO, + PERF_BPIL_LINE_INFO, + PERF_BPIL_JITED_LINE_INFO, + PERF_BPIL_PROG_TAGS, + PERF_BPIL_LAST_ARRAY, +}; + +struct perf_bpil { + /* size of struct bpf_prog_info, when the tool is compiled */ + __u32 info_len; + /* total bytes allocated for data, round up to 8 bytes */ + __u32 data_len; + /* which arrays are included in data */ + __u64 arrays; + struct bpf_prog_info info; + __u8 data[]; +}; + +struct perf_bpil * +get_bpf_prog_info_linear(int fd, __u64 arrays); + +void +bpil_addr_to_offs(struct perf_bpil *info_linear); + +void +bpil_offs_to_addr(struct perf_bpil *info_linear); + +#endif /* HAVE_LIBBPF_SUPPORT */ +#endif /* __PERF_BPF_UTILS_H */ diff --git a/tools/perf/util/bpf_counter.c b/tools/perf/util/bpf_counter.c index ced2dac31dcf..c17d4a43ce06 100644 --- a/tools/perf/util/bpf_counter.c +++ b/tools/perf/util/bpf_counter.c @@ -13,6 +13,7 @@ #include #include "bpf_counter.h" +#include "bpf-utils.h" #include "counts.h" #include "debug.h" #include "evsel.h" @@ -61,14 +62,13 @@ static int bpf_program_profiler__destroy(struct evsel *evsel) static char *bpf_target_prog_name(int tgt_fd) { - struct bpf_prog_info_linear *info_linear; struct bpf_func_info *func_info; + struct perf_bpil *info_linear; const struct btf_type *t; struct btf *btf = NULL; char *name = NULL; - info_linear = bpf_program__get_prog_info_linear( - tgt_fd, 1UL << BPF_PROG_INFO_FUNC_INFO); + info_linear = get_bpf_prog_info_linear(tgt_fd, 1UL << PERF_BPIL_FUNC_INFO); if (IS_ERR_OR_NULL(info_linear)) { pr_debug("failed to get info_linear for prog FD %d\n", tgt_fd); return NULL; diff --git a/tools/perf/util/dso.c b/tools/perf/util/dso.c index 9ed9a5676d35..9cc8a1772b4b 100644 --- a/tools/perf/util/dso.c +++ b/tools/perf/util/dso.c @@ -14,6 +14,7 @@ #ifdef HAVE_LIBBPF_SUPPORT #include #include "bpf-event.h" +#include "bpf-utils.h" #endif #include "compress.h" #include "env.h" diff --git a/tools/perf/util/env.c b/tools/perf/util/env.c index cf773f0dec38..17f1dd0680b4 100644 --- a/tools/perf/util/env.c +++ b/tools/perf/util/env.c @@ -16,6 +16,7 @@ struct perf_env perf_env; #ifdef HAVE_LIBBPF_SUPPORT #include "bpf-event.h" +#include "bpf-utils.h" #include void perf_env__insert_bpf_prog_info(struct perf_env *env, diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index 1c7414f66655..56511db8fa03 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -48,6 +48,7 @@ #include "util/util.h" // perf_exe() #include "cputopo.h" #include "bpf-event.h" +#include "bpf-utils.h" #include "clockid.h" #include "pmu-hybrid.h" @@ -1006,17 +1007,17 @@ static int write_bpf_prog_info(struct feat_fd *ff, node = rb_entry(next, struct bpf_prog_info_node, rb_node); next = rb_next(&node->rb_node); - len = sizeof(struct bpf_prog_info_linear) + + len = sizeof(struct perf_bpil) + node->info_linear->data_len; /* before writing to file, translate address to offset */ - bpf_program__bpil_addr_to_offs(node->info_linear); + bpil_addr_to_offs(node->info_linear); ret = do_write(ff, node->info_linear, len); /* * translate back to address even when do_write() fails, * so that this function never changes the data. */ - bpf_program__bpil_offs_to_addr(node->info_linear); + bpil_offs_to_addr(node->info_linear); if (ret < 0) goto out; } @@ -3018,9 +3019,9 @@ static int process_dir_format(struct feat_fd *ff, #ifdef HAVE_LIBBPF_SUPPORT static int process_bpf_prog_info(struct feat_fd *ff, void *data __maybe_unused) { - struct bpf_prog_info_linear *info_linear; struct bpf_prog_info_node *info_node; struct perf_env *env = &ff->ph->env; + struct perf_bpil *info_linear; u32 count, i; int err = -1; @@ -3049,7 +3050,7 @@ static int process_bpf_prog_info(struct feat_fd *ff, void *data __maybe_unused) goto out; } - info_linear = malloc(sizeof(struct bpf_prog_info_linear) + + info_linear = malloc(sizeof(struct perf_bpil) + data_len); if (!info_linear) goto out; @@ -3071,7 +3072,7 @@ static int process_bpf_prog_info(struct feat_fd *ff, void *data __maybe_unused) goto out; /* after reading from file, translate offset to address */ - bpf_program__bpil_offs_to_addr(info_linear); + bpil_offs_to_addr(info_linear); info_node->info_linear = info_linear; perf_env__insert_bpf_prog_info(env, info_node); } -- cgit v1.2.3 From d0d0f0c12461daf2f642d2779abe566e00716069 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Mon, 11 Oct 2021 19:13:20 -0700 Subject: tools: Bump minimum LLVM C++ std to GNU++14 LLVM 9 (current release is LLVM 13) moved the minimum C++ version to GNU++14. Bump the version numbers in the feature test and perf build. Reviewed-by: Fangrui Song Signed-off-by: Ian Rogers Cc: Alexander Shishkin Cc: Daniel Borkmann Cc: Ingo Molnar Cc: Jiri Olsa Cc: Leo Yan Cc: Mark Rutland Cc: Michael Petlan Cc: Namhyung Kim Cc: Nathan Chancellor Cc: Nick Desaulniers Cc: Peter Zijlstra Cc: Sedat Dilek Cc: llvm@lists.linux.dev Link: https://lore.kernel.org/r/20211012021321.291635-1-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/build/feature/Makefile | 6 +++--- tools/perf/Makefile.config | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'tools') diff --git a/tools/build/feature/Makefile b/tools/build/feature/Makefile index 71390309f4d7..0a3244ad9673 100644 --- a/tools/build/feature/Makefile +++ b/tools/build/feature/Makefile @@ -300,7 +300,7 @@ $(OUTPUT)test-jvmti-cmlr.bin: $(BUILD) $(OUTPUT)test-llvm.bin: - $(BUILDXX) -std=gnu++11 \ + $(BUILDXX) -std=gnu++14 \ -I$(shell $(LLVM_CONFIG) --includedir) \ -L$(shell $(LLVM_CONFIG) --libdir) \ $(shell $(LLVM_CONFIG) --libs Core BPF) \ @@ -308,12 +308,12 @@ $(OUTPUT)test-llvm.bin: > $(@:.bin=.make.output) 2>&1 $(OUTPUT)test-llvm-version.bin: - $(BUILDXX) -std=gnu++11 \ + $(BUILDXX) -std=gnu++14 \ -I$(shell $(LLVM_CONFIG) --includedir) \ > $(@:.bin=.make.output) 2>&1 $(OUTPUT)test-clang.bin: - $(BUILDXX) -std=gnu++11 \ + $(BUILDXX) -std=gnu++14 \ -I$(shell $(LLVM_CONFIG) --includedir) \ -L$(shell $(LLVM_CONFIG) --libdir) \ -Wl,--start-group -lclangBasic -lclangDriver \ diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config index 4a9baed28f2e..07e65a061fd3 100644 --- a/tools/perf/Makefile.config +++ b/tools/perf/Makefile.config @@ -285,7 +285,7 @@ CORE_CFLAGS += -Wall CORE_CFLAGS += -Wextra CORE_CFLAGS += -std=gnu99 -CXXFLAGS += -std=gnu++11 -fno-exceptions -fno-rtti +CXXFLAGS += -std=gnu++14 -fno-exceptions -fno-rtti CXXFLAGS += -Wall CXXFLAGS += -fno-omit-frame-pointer CXXFLAGS += -ggdb3 -- cgit v1.2.3 From 32f7aa2731b24ad8393f26d63df959d74844345f Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Mon, 11 Oct 2021 19:13:21 -0700 Subject: perf clang: Fixes for more recent LLVM/clang The parameters to two functions and the location of a variable have changed in more recent LLVM/clang releases. Remove the unneecessary -fmessage-length and -ferror-limit flags, the former causes failures like: 58: builtin clang support : 58.1: builtin clang compile C source to IR : --- start --- test child forked, pid 279307 error: unknown argument: '-fmessage-length' 1 error generated. test child finished with -1 Tested with LLVM 6, 8, 9, 10 and 11. Reviewed-by: Fangrui Song Signed-off-by: Ian Rogers Cc: Alexander Shishkin Cc: Daniel Borkmann Cc: Ingo Molnar Cc: Jiri Olsa Cc: Leo Yan Cc: Mark Rutland Cc: Michael Petlan Cc: Namhyung Kim Cc: Nathan Chancellor Cc: Nick Desaulniers Cc: Peter Zijlstra Cc: Sedat Dilek , Cc: llvm@lists.linux.dev Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/c++/clang.cpp | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/c++/clang.cpp b/tools/perf/util/c++/clang.cpp index c8885dfa3667..df7b18fb6b6e 100644 --- a/tools/perf/util/c++/clang.cpp +++ b/tools/perf/util/c++/clang.cpp @@ -43,8 +43,6 @@ createCompilerInvocation(llvm::opt::ArgStringList CFlags, StringRef& Path, "-cc1", "-triple", "bpf-pc-linux", "-fsyntax-only", - "-ferror-limit", "19", - "-fmessage-length", "127", "-O2", "-nostdsysteminc", "-nobuiltininc", @@ -55,7 +53,11 @@ createCompilerInvocation(llvm::opt::ArgStringList CFlags, StringRef& Path, "-x", "c"}; CCArgs.append(CFlags.begin(), CFlags.end()); - CompilerInvocation *CI = tooling::newInvocation(&Diags, CCArgs); + CompilerInvocation *CI = tooling::newInvocation(&Diags, CCArgs +#if CLANG_VERSION_MAJOR >= 11 + ,/*BinaryName=*/nullptr +#endif + ); FrontendOptions& Opts = CI->getFrontendOpts(); Opts.Inputs.clear(); @@ -151,13 +153,16 @@ getBPFObjectFromModule(llvm::Module *Module) legacy::PassManager PM; bool NotAdded; -#if CLANG_VERSION_MAJOR < 7 - NotAdded = TargetMachine->addPassesToEmitFile(PM, ostream, - TargetMachine::CGFT_ObjectFile); + NotAdded = TargetMachine->addPassesToEmitFile(PM, ostream +#if CLANG_VERSION_MAJOR >= 7 + , /*DwoOut=*/nullptr +#endif +#if CLANG_VERSION_MAJOR < 10 + , TargetMachine::CGFT_ObjectFile #else - NotAdded = TargetMachine->addPassesToEmitFile(PM, ostream, nullptr, - TargetMachine::CGFT_ObjectFile); + , llvm::CGFT_ObjectFile #endif + ); if (NotAdded) { llvm::errs() << "TargetMachine can't emit a file of this type\n"; return std::unique_ptr>(nullptr); -- cgit v1.2.3 From 6da2a45e15af4f706fed211f8eb57a40cc7abfc7 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Sat, 19 Jun 2021 10:09:08 -0300 Subject: perf beauty: Update copy of linux/socket.h with the kernel sources To pick the changes in: 99ce45d5e7dbde39 ("mctp: Implement extended addressing") 55c42fa7fa331f98 ("mptcp: add MPTCP_INFO getsockopt") That don't result in any changes in the tables generated from that header. A table generator for setsockopt is needed, probably will be done in the 5.16 cycle. This silences this perf build warning: Warning: Kernel ABI header at 'tools/perf/trace/beauty/include/linux/socket.h' differs from latest version at 'include/linux/socket.h' diff -u tools/perf/trace/beauty/include/linux/socket.h include/linux/socket.h Cc: David S. Miller Cc: Florian Westphal Cc: Jeremy Kerr Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/trace/beauty/include/linux/socket.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'tools') diff --git a/tools/perf/trace/beauty/include/linux/socket.h b/tools/perf/trace/beauty/include/linux/socket.h index 041d6032a348..8ef26d89ef49 100644 --- a/tools/perf/trace/beauty/include/linux/socket.h +++ b/tools/perf/trace/beauty/include/linux/socket.h @@ -364,6 +364,8 @@ struct ucred { #define SOL_KCM 281 #define SOL_TLS 282 #define SOL_XDP 283 +#define SOL_MPTCP 284 +#define SOL_MCTP 285 /* IPX options */ #define IPX_TYPE 1 -- cgit v1.2.3 From 88c42f4d6cb249eb68524282f8d4cc32f9059984 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Fri, 5 Nov 2021 22:37:33 -0700 Subject: perf bpf: Add missing free to bpf_event__print_bpf_prog_info() If btf__new() is called then there needs to be a corresponding btf__free(). Fixes: f8dfeae009effc0b ("perf bpf: Show more BPF program info in print_bpf_prog_info()") Signed-off-by: Ian Rogers Cc: Alexander Shishkin Cc: Alexei Starovoitov Cc: Andrii Nakryiko Cc: Daniel Borkmann Cc: Jiri Olsa Cc: John Fastabend Cc: KP Singh Cc: Mark Rutland Cc: Martin KaFai Lau Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Song Liu Cc: Stephane Eranian Cc: Tiezhu Yang Cc: Yonghong Song Cc: bpf@vger.kernel.org Cc: netdev@vger.kernel.org Link: http://lore.kernel.org/lkml/20211106053733.3580931-2-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/bpf-event.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/perf/util/bpf-event.c b/tools/perf/util/bpf-event.c index a27badb0a53a..4d3b4cdce176 100644 --- a/tools/perf/util/bpf-event.c +++ b/tools/perf/util/bpf-event.c @@ -575,7 +575,7 @@ void bpf_event__print_bpf_prog_info(struct bpf_prog_info *info, synthesize_bpf_prog_name(name, KSYM_NAME_LEN, info, btf, 0); fprintf(fp, "# bpf_prog_info %u: %s addr 0x%llx size %u\n", info->id, name, prog_addrs[0], prog_lens[0]); - return; + goto out; } fprintf(fp, "# bpf_prog_info %u:\n", info->id); @@ -585,4 +585,6 @@ void bpf_event__print_bpf_prog_info(struct bpf_prog_info *info, fprintf(fp, "# \tsub_prog %u: %s addr 0x%llx size %u\n", i, name, prog_addrs[i], prog_lens[i]); } +out: + btf__free(btf); } -- cgit v1.2.3 From 3500eeebeda842e8499617b8983a4c55fd6bdfe3 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Fri, 5 Nov 2021 13:58:47 -0700 Subject: perf evsel: Fix missing exclude_{host,guest} setting The current logic for the perf missing feature has a bug that it can wrongly clear some modifiers like G or H. Actually some PMUs don't support any filtering or exclusion while others do. But we check it as a global feature. For example, the cycles event can have 'G' modifier to enable it only in the guest mode on x86. When you don't run any VMs it'll return 0. # perf stat -a -e cycles:G sleep 1 Performance counter stats for 'system wide': 0 cycles:G 1.000721670 seconds time elapsed But when it's used with other pmu events that don't support G modifier, it'll be reset and return non-zero values. # perf stat -a -e cycles:G,msr/tsc/ sleep 1 Performance counter stats for 'system wide': 538,029,960 cycles:G 16,924,010,738 msr/tsc/ 1.001815327 seconds time elapsed This is because of the missing feature detection logic being global. Add a hashmap to set pmu-specific exclude_host/guest features. Committer notes: Fix 'perf test python' by adding a stub for evsel__find_pmu() in tools/perf/util/python.c, document that it is used so far only for the above reasons so that if anybody needs this in the python binding usecases, we can revisit this. Reported-by: Stephane Eranian Signed-off-by: Namhyung Kim Cc: Andi Kleen Cc: Ian Rogers Cc: Jiri Olsa Cc: Peter Zijlstra Cc: Ravi Bangoria Link: http://lore.kernel.org/lkml/20211105205847.120950-1-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/evsel.c | 27 ++++++++++++++++++++++----- tools/perf/util/evsel.h | 4 ++++ tools/perf/util/pmu.h | 4 ++++ tools/perf/util/python.c | 12 ++++++++++++ 4 files changed, 42 insertions(+), 5 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 2cfc2935d1d2..3cc1f8fcf15c 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -1824,7 +1824,7 @@ static void evsel__disable_missing_features(struct evsel *evsel) evsel->open_flags &= ~(unsigned long)PERF_FLAG_FD_CLOEXEC; if (perf_missing_features.mmap2) evsel->core.attr.mmap2 = 0; - if (perf_missing_features.exclude_guest) + if (evsel->pmu && evsel->pmu->missing_features.exclude_guest) evsel->core.attr.exclude_guest = evsel->core.attr.exclude_host = 0; if (perf_missing_features.lbr_flags) evsel->core.attr.branch_sample_type &= ~(PERF_SAMPLE_BRANCH_NO_FLAGS | @@ -1917,10 +1917,27 @@ bool evsel__detect_missing_features(struct evsel *evsel) perf_missing_features.mmap2 = true; pr_debug2_peo("switching off mmap2\n"); return true; - } else if (!perf_missing_features.exclude_guest && - (evsel->core.attr.exclude_guest || evsel->core.attr.exclude_host)) { - perf_missing_features.exclude_guest = true; - pr_debug2_peo("switching off exclude_guest, exclude_host\n"); + } else if ((evsel->core.attr.exclude_guest || evsel->core.attr.exclude_host) && + (evsel->pmu == NULL || evsel->pmu->missing_features.exclude_guest)) { + if (evsel->pmu == NULL) { + evsel->pmu = evsel__find_pmu(evsel); + if (evsel->pmu) + evsel->pmu->missing_features.exclude_guest = true; + else { + /* we cannot find PMU, disable attrs now */ + evsel->core.attr.exclude_host = false; + evsel->core.attr.exclude_guest = false; + } + } + + if (evsel->exclude_GH) { + pr_debug2_peo("PMU has no exclude_host/guest support, bailing out\n"); + return false; + } + if (!perf_missing_features.exclude_guest) { + perf_missing_features.exclude_guest = true; + pr_debug2_peo("switching off exclude_guest, exclude_host\n"); + } return true; } else if (!perf_missing_features.sample_id_all) { perf_missing_features.sample_id_all = true; diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index 846c827934de..dcc87c2881b8 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -22,6 +22,7 @@ struct target; struct hashmap; struct bperf_leader_bpf; struct bperf_follower_bpf; +struct perf_pmu; typedef int (evsel__sb_cb_t)(union perf_event *event, void *data); @@ -153,6 +154,9 @@ struct evsel { }; unsigned long open_flags; int precise_ip_original; + + /* for missing_features */ + struct perf_pmu *pmu; }; struct perf_missing_features { diff --git a/tools/perf/util/pmu.h b/tools/perf/util/pmu.h index 15bbec3a9959..541889fa9f9c 100644 --- a/tools/perf/util/pmu.h +++ b/tools/perf/util/pmu.h @@ -49,6 +49,10 @@ struct perf_pmu { struct list_head caps; /* HEAD struct perf_pmu_caps -> list */ struct list_head list; /* ELEM */ struct list_head hybrid_list; + + struct { + bool exclude_guest; + } missing_features; }; extern struct perf_pmu perf_pmu__fake; diff --git a/tools/perf/util/python.c b/tools/perf/util/python.c index 8feef3a05af7..563a9ba8954f 100644 --- a/tools/perf/util/python.c +++ b/tools/perf/util/python.c @@ -69,6 +69,18 @@ void perf_stat__collect_metric_expr(struct evlist *evsel_list) { } +/* + * This one is needed not to drag the PMU bandwagon, jevents generated + * pmu_sys_event_tables, etc and evsel__find_pmu() is used so far just for + * doing per PMU perf_event_attr.exclude_guest handling, not really needed, so + * far, for the perf python binding known usecases, revisit if this become + * necessary. + */ +struct perf_pmu *evsel__find_pmu(struct evsel *evsel __maybe_unused) +{ + return NULL; +} + /* * Add this one here not to drag util/metricgroup.c */ -- cgit v1.2.3 From eb39bf325631f9ae185abd16281079b7b9858737 Mon Sep 17 00:00:00 2001 From: Ravi Bangoria Date: Tue, 2 Nov 2021 11:01:12 +0530 Subject: perf evsel: Don't set exclude_guest by default Perf tool sets exclude_guest by default while calling perf_event_open(). Because IBS does not have filtering capability, it always gets rejected by IBS PMU driver and thus perf falls back to non-precise sampling. Fix it by not setting exclude_guest by default on AMD. Before: $ sudo ./perf record -C 0 -vvv true |& grep precise precise_ip 3 decreasing precise_ip by one (2) precise_ip 2 decreasing precise_ip by one (1) precise_ip 1 decreasing precise_ip by one (0) After: $ sudo ./perf record -C 0 -vvv true |& grep precise precise_ip 3 decreasing precise_ip by one (2) precise_ip 2 Committer notes: Fixup init to zero for perf_env in older compilers: arch/x86/util/evsel.c:15:26: error: missing field 'os_release' initializer [-Werror,-Wmissing-field-initializers] struct perf_env env = {0}; ^ Committer notes: Namhyung remarked: It'd be nice if it can cover explicit "-e cycles:pp" as well. Ravi clarified: For explicit :pp modifier, evsel->precise_max does not get set and thus perf does not try with different attr->precise_ip values while exclude_guest set. So no issue with explicit :pp: $ sudo ./perf record -C 0 -e cycles:pp -vvv |& grep "precise_ip\|exclude_guest" precise_ip 2 exclude_guest 1 precise_ip 2 exclude_guest 1 switching off exclude_guest, exclude_host precise_ip 2 ^C Also, with :P modifier, evsel->precise_max gets set but exclude_guest does not and thus :P also works fine: $ sudo ./perf record -C 0 -e cycles:P -vvv |& grep "precise_ip\|exclude_guest" precise_ip 3 decreasing precise_ip by one (2) precise_ip 2 ^C Reported-by: Kim Phillips Signed-off-by: Ravi Bangoria Acked-by: Namhyung Kim Tested-by: Arnaldo Carvalho de Melo Cc: Andi Kleen Cc: Ian Rogers Cc: Jiri Olsa Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lore.kernel.org/lkml/20211103072112.32312-1-ravi.bangoria@amd.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/x86/util/evsel.c | 23 +++++++++++++++++++++++ tools/perf/util/evsel.c | 12 +++++++----- tools/perf/util/evsel.h | 1 + 3 files changed, 31 insertions(+), 5 deletions(-) (limited to 'tools') diff --git a/tools/perf/arch/x86/util/evsel.c b/tools/perf/arch/x86/util/evsel.c index 2f733cdc8dbb..ac2899a25b7a 100644 --- a/tools/perf/arch/x86/util/evsel.c +++ b/tools/perf/arch/x86/util/evsel.c @@ -1,8 +1,31 @@ // SPDX-License-Identifier: GPL-2.0 #include +#include #include "util/evsel.h" +#include "util/env.h" +#include "linux/string.h" void arch_evsel__set_sample_weight(struct evsel *evsel) { evsel__set_sample_bit(evsel, WEIGHT_STRUCT); } + +void arch_evsel__fixup_new_cycles(struct perf_event_attr *attr) +{ + struct perf_env env = { .total_mem = 0, } ; + + if (!perf_env__cpuid(&env)) + return; + + /* + * On AMD, precise cycles event sampling internally uses IBS pmu. + * But IBS does not have filtering capabilities and perf by default + * sets exclude_guest = 1. This makes IBS pmu event init fail and + * thus perf ends up doing non-precise sampling. Avoid it by clearing + * exclude_guest. + */ + if (env.cpuid && strstarts(env.cpuid, "AuthenticAMD")) + attr->exclude_guest = 0; + + free(env.cpuid); +} diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 3cc1f8fcf15c..ec967fb8d7d9 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -294,7 +294,7 @@ static bool perf_event_can_profile_kernel(void) return perf_event_paranoid_check(1); } -struct evsel *evsel__new_cycles(bool precise, __u32 type, __u64 config) +struct evsel *evsel__new_cycles(bool precise __maybe_unused, __u32 type, __u64 config) { struct perf_event_attr attr = { .type = type, @@ -305,18 +305,16 @@ struct evsel *evsel__new_cycles(bool precise, __u32 type, __u64 config) event_attr_init(&attr); - if (!precise) - goto new_event; - /* * Now let the usual logic to set up the perf_event_attr defaults * to kick in when we return and before perf_evsel__open() is called. */ -new_event: evsel = evsel__new(&attr); if (evsel == NULL) goto out; + arch_evsel__fixup_new_cycles(&evsel->core.attr); + evsel->precise_max = true; /* use asprintf() because free(evsel) assumes name is allocated */ @@ -1063,6 +1061,10 @@ void __weak arch_evsel__set_sample_weight(struct evsel *evsel) evsel__set_sample_bit(evsel, WEIGHT); } +void __weak arch_evsel__fixup_new_cycles(struct perf_event_attr *attr __maybe_unused) +{ +} + /* * The enable_on_exec/disabled value strategy: * diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index dcc87c2881b8..3ea687141afa 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -283,6 +283,7 @@ void __evsel__reset_sample_bit(struct evsel *evsel, enum perf_event_sample_forma void evsel__set_sample_id(struct evsel *evsel, bool use_sample_identifier); void arch_evsel__set_sample_weight(struct evsel *evsel); +void arch_evsel__fixup_new_cycles(struct perf_event_attr *attr); int evsel__set_filter(struct evsel *evsel, const char *filter); int evsel__append_tp_filter(struct evsel *evsel, const char *filter); -- cgit v1.2.3 From 1a86f4ba5cf1c19b55a12be8e5e9235a23921f8d Mon Sep 17 00:00:00 2001 From: Lexi Shao Date: Fri, 29 Oct 2021 14:50:37 +0800 Subject: perf symbols: Ignore $a/$d symbols for ARM modules On anARM machine, kernel symbols from modules can be resolved to $a instead of printing the actual symbol name. Ignore symbols starting with "$" when building kallsyms rbtree. A sample stacktrace is shown as follows: c0f2e39c schedule_hrtimeout+0x14 ([kernel.kallsyms]) bf4a66d8 $a+0x78 ([test_module]) c0a4f5f4 kthread+0x15c ([kernel.kallsyms]) c0a001f8 ret_from_fork+0x14 ([kernel.kallsyms]) On an ARM machine, $a/$d symbols are used by the compiler to mark the beginning of code/data part in code section. These symbols are filtered out when linking vmlinux(see scripts/kallsyms.c ignored_prefixes), but are left on modules. So there are $a symbols in /proc/kallsyms which share the same addresses with the actual module symbols and confuses perf when resolving symbols. After this patch, the module symbol name is printed: c0f2e39c schedule_hrtimeout+0x14 ([kernel.kallsyms]) bf4a66d8 test_func+0x78 ([test_module]) c0a4f5f4 kthread+0x15c ([kernel.kallsyms]) c0a001f8 ret_from_fork+0x14 ([kernel.kallsyms]) Reviewed-by: James Clark Signed-off-by: Lexi Shao Cc: Alexander Shishkin Cc: Alexei Starovoitov Cc: Andrii Nakryiko Cc: Daniel Borkmann Cc: Ingo Molnar Cc: Jessica Yu Cc: Jiri Olsa Cc: John Fastabend Cc: KP Singh Cc: Mark Rutland Cc: Martin KaFai Lau Cc: Namhyung Kim Cc: Nathan Chancellor Cc: Nick Desaulniers Cc: Peter Zijlstra Cc: QiuXi Cc: Song Liu Cc: Wangbing Cc: Xiaoming Ni Cc: Yonghong Song Cc: bpf@vger.kernel.org Cc: clang-built-linux@googlegroups.com Link: https://lore.kernel.org/r/20211029065038.39449-2-shaolexi@huawei.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/symbol.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'tools') diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index 0fc9a5410739..35116aed74eb 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -702,6 +702,10 @@ static int map__process_kallsym_symbol(void *arg, const char *name, if (!symbol_type__filter(type)) return 0; + /* Ignore local symbols for ARM modules */ + if (name[0] == '$') + return 0; + /* * module symbols are not sorted so we add all * symbols, setting length to 0, and rely on -- cgit v1.2.3 From a3df50abeb7372fd0f1973f885fb8d634ac4e739 Mon Sep 17 00:00:00 2001 From: James Clark Date: Mon, 18 Oct 2021 14:48:41 +0100 Subject: perf tools: Refactor out kernel symbol argument sanity checking User supplied values for vmlinux and kallsyms are checked before continuing. Refactor this into a function so that it can be used elsewhere. Reviewed-by: Denis Nikitin Signed-off-by: James Clark Cc: Alexander Shishkin Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Link: https://lore.kernel.org/r/20211018134844.2627174-2-james.clark@arm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-report.c | 13 ++----------- tools/perf/util/symbol.c | 22 ++++++++++++++++++++++ tools/perf/util/symbol.h | 2 ++ 3 files changed, 26 insertions(+), 11 deletions(-) (limited to 'tools') diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index a0316ce910db..8167ebfe776a 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -1378,18 +1378,9 @@ int cmd_report(int argc, const char **argv) if (quiet) perf_quiet_option(); - if (symbol_conf.vmlinux_name && - access(symbol_conf.vmlinux_name, R_OK)) { - pr_err("Invalid file: %s\n", symbol_conf.vmlinux_name); - ret = -EINVAL; - goto exit; - } - if (symbol_conf.kallsyms_name && - access(symbol_conf.kallsyms_name, R_OK)) { - pr_err("Invalid file: %s\n", symbol_conf.kallsyms_name); - ret = -EINVAL; + ret = symbol__validate_sym_arguments(); + if (ret) goto exit; - } if (report.inverted_callchain) callchain_param.order = ORDER_CALLER; diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index 35116aed74eb..aa1b7c12fd61 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -2634,3 +2634,25 @@ struct mem_info *mem_info__new(void) refcount_set(&mi->refcnt, 1); return mi; } + +/* + * Checks that user supplied symbol kernel files are accessible because + * the default mechanism for accessing elf files fails silently. i.e. if + * debug syms for a build ID aren't found perf carries on normally. When + * they are user supplied we should assume that the user doesn't want to + * silently fail. + */ +int symbol__validate_sym_arguments(void) +{ + if (symbol_conf.vmlinux_name && + access(symbol_conf.vmlinux_name, R_OK)) { + pr_err("Invalid file: %s\n", symbol_conf.vmlinux_name); + return -EINVAL; + } + if (symbol_conf.kallsyms_name && + access(symbol_conf.kallsyms_name, R_OK)) { + pr_err("Invalid file: %s\n", symbol_conf.kallsyms_name); + return -EINVAL; + } + return 0; +} diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h index 954d6a049ee2..166196686f2e 100644 --- a/tools/perf/util/symbol.h +++ b/tools/perf/util/symbol.h @@ -286,4 +286,6 @@ static inline void __mem_info__zput(struct mem_info **mi) #define mem_info__zput(mi) __mem_info__zput(&mi) +int symbol__validate_sym_arguments(void); + #endif /* __PERF_SYMBOL */ -- cgit v1.2.3 From 7cc72553ac03ec20afe2dec91dce4624ccd379b8 Mon Sep 17 00:00:00 2001 From: James Clark Date: Mon, 18 Oct 2021 14:48:42 +0100 Subject: perf tools: Check vmlinux/kallsyms arguments in all tools Only perf report checked the validity of these arguments so apply the same check to all tools that read them for consistency. Signed-off-by: James Clark Cc: Alexander Shishkin Cc: Denis Nikitin Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Link: https://lore.kernel.org/r/20211018134844.2627174-3-james.clark@arm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-annotate.c | 4 ++++ tools/perf/builtin-c2c.c | 4 ++++ tools/perf/builtin-probe.c | 5 +++++ tools/perf/builtin-record.c | 4 ++++ tools/perf/builtin-sched.c | 4 ++++ tools/perf/builtin-script.c | 3 +++ tools/perf/builtin-top.c | 4 ++++ 7 files changed, 28 insertions(+) (limited to 'tools') diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c index 05eb098cb0e3..490bb9b8cf17 100644 --- a/tools/perf/builtin-annotate.c +++ b/tools/perf/builtin-annotate.c @@ -591,6 +591,10 @@ int cmd_annotate(int argc, const char **argv) return ret; } + ret = symbol__validate_sym_arguments(); + if (ret) + return ret; + if (quiet) perf_quiet_option(); diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c index a192014fa52b..b5c67ef73862 100644 --- a/tools/perf/builtin-c2c.c +++ b/tools/perf/builtin-c2c.c @@ -2768,6 +2768,10 @@ static int perf_c2c__report(int argc, const char **argv) if (c2c.stats_only) c2c.use_stdio = true; + err = symbol__validate_sym_arguments(); + if (err) + goto out; + if (!input_name || !strlen(input_name)) input_name = "perf.data"; diff --git a/tools/perf/builtin-probe.c b/tools/perf/builtin-probe.c index e1dd51f2874b..c31627af75d4 100644 --- a/tools/perf/builtin-probe.c +++ b/tools/perf/builtin-probe.c @@ -21,6 +21,7 @@ #include "util/build-id.h" #include "util/strlist.h" #include "util/strfilter.h" +#include "util/symbol.h" #include "util/symbol_conf.h" #include "util/debug.h" #include @@ -629,6 +630,10 @@ __cmd_probe(int argc, const char **argv) params.command = 'a'; } + ret = symbol__validate_sym_arguments(); + if (ret) + return ret; + if (params.quiet) { if (verbose != 0) { pr_err(" Error: -v and -q are exclusive.\n"); diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 78185c982ebf..0338b813585a 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -2712,6 +2712,10 @@ int cmd_record(int argc, const char **argv) if (quiet) perf_quiet_option(); + err = symbol__validate_sym_arguments(); + if (err) + return err; + /* Make system wide (-a) the default target. */ if (!argc && target__none(&rec->opts.target)) rec->opts.target.system_wide = true; diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c index 635a6b5a9ec9..4527f632ebe4 100644 --- a/tools/perf/builtin-sched.c +++ b/tools/perf/builtin-sched.c @@ -3538,6 +3538,7 @@ int cmd_sched(int argc, const char **argv) .fork_event = replay_fork_event, }; unsigned int i; + int ret; for (i = 0; i < ARRAY_SIZE(sched.curr_pid); i++) sched.curr_pid[i] = -1; @@ -3598,6 +3599,9 @@ int cmd_sched(int argc, const char **argv) parse_options_usage(NULL, timehist_options, "n", true); return -EINVAL; } + ret = symbol__validate_sym_arguments(); + if (ret) + return ret; return perf_sched__timehist(&sched); } else { diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index f3d07cfab550..9434367af166 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -3846,6 +3846,9 @@ int cmd_script(int argc, const char **argv) data.path = input_name; data.force = symbol_conf.force; + if (symbol__validate_sym_arguments()) + return -1; + if (argc > 1 && !strncmp(argv[0], "rec", strlen("rec"))) { rec_script_path = get_script_path(argv[1], RECORD_SUFFIX); if (!rec_script_path) diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index 020c4f110c10..1fc390f136dd 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -1618,6 +1618,10 @@ int cmd_top(int argc, const char **argv) if (argc) usage_with_options(top_usage, options); + status = symbol__validate_sym_arguments(); + if (status) + goto out_delete_evlist; + if (annotate_check_args(&top.annotation_opts) < 0) goto out_delete_evlist; -- cgit v1.2.3 From b3a018fc31fea05ffd034952b4b6e9e1eb0812bc Mon Sep 17 00:00:00 2001 From: James Clark Date: Mon, 18 Oct 2021 14:48:43 +0100 Subject: perf inject: Add vmlinux and ignore-vmlinux arguments Other perf tools allow specifying the path to vmlinux. 'perf inject' didn't have this argument which made some auxtrace workflows difficult. Also add --ignore-vmlinux for consistency with other tools. Suggested-by: Denis Nikitin Signed-off-by: James Clark Tested-by: Denis Nikitin Cc: Alexander Shishkin Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Link: https://lore.kernel.org/r/20211018134844.2627174-4-james.clark@arm.com [ Added the perf-inject man page entries for these options, as noted by Denis ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-inject.txt | 7 +++++++ tools/perf/builtin-inject.c | 7 +++++++ 2 files changed, 14 insertions(+) (limited to 'tools') diff --git a/tools/perf/Documentation/perf-inject.txt b/tools/perf/Documentation/perf-inject.txt index 91108fe3ad5f..0570a1ccd344 100644 --- a/tools/perf/Documentation/perf-inject.txt +++ b/tools/perf/Documentation/perf-inject.txt @@ -45,6 +45,13 @@ OPTIONS tasks slept. sched_switch contains a callchain where a task slept and sched_stat contains a timeslice how long a task slept. +-k:: +--vmlinux=:: + vmlinux pathname + +--ignore-vmlinux:: + Ignore vmlinux files. + --kallsyms=:: kallsyms pathname diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c index ac6c570029e3..bc5259db5fd9 100644 --- a/tools/perf/builtin-inject.c +++ b/tools/perf/builtin-inject.c @@ -940,6 +940,10 @@ int cmd_inject(int argc, const char **argv) #endif OPT_INCR('v', "verbose", &verbose, "be more verbose (show build ids, etc)"), + OPT_STRING('k', "vmlinux", &symbol_conf.vmlinux_name, + "file", "vmlinux pathname"), + OPT_BOOLEAN(0, "ignore-vmlinux", &symbol_conf.ignore_vmlinux, + "don't load vmlinux even if found"), OPT_STRING(0, "kallsyms", &symbol_conf.kallsyms_name, "file", "kallsyms pathname"), OPT_BOOLEAN('f', "force", &data.force, "don't complain, do it"), @@ -974,6 +978,9 @@ int cmd_inject(int argc, const char **argv) return -1; } + if (symbol__validate_sym_arguments()) + return -1; + if (inject.in_place_update) { if (!strcmp(inject.input_name, "-")) { pr_err("Input file name required for in-place updating\n"); -- cgit v1.2.3 From 4e88118c20fc5fa7890230da2d26f0235dd904f5 Mon Sep 17 00:00:00 2001 From: Ilya Leoshkevich Date: Thu, 4 Nov 2021 14:23:11 +0100 Subject: perf tools: Use __BYTE_ORDER__ Switch from the libc-defined __BYTE_ORDER to the compiler-defined __BYTE_ORDER__ in order to make endianness detection more robust, like it was done for libbpf. Signed-off-by: Ilya Leoshkevich Suggested-by: Arnaldo Carvalho de Melo Cc: Heiko Carstens Cc: Thomas Richter Cc: Vasily Gorbik Link: https://lore.kernel.org/r/20211104132311.984703-1-iii@linux.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c | 2 +- tools/perf/util/data-convert-bt.c | 2 +- tools/perf/util/genelf.h | 2 +- tools/perf/util/intel-bts.c | 2 +- tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c | 2 +- tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.c | 2 +- tools/perf/util/s390-cpumsf.c | 8 ++++---- 7 files changed, 10 insertions(+), 10 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c index 2e5eff4f8f03..2f311189c6e8 100644 --- a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c +++ b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c @@ -13,7 +13,7 @@ #include "arm-spe-pkt-decoder.h" -#if __BYTE_ORDER == __BIG_ENDIAN +#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ #define le16_to_cpu bswap_16 #define le32_to_cpu bswap_32 #define le64_to_cpu bswap_64 diff --git a/tools/perf/util/data-convert-bt.c b/tools/perf/util/data-convert-bt.c index aa862a26d95c..8f7705bbc2da 100644 --- a/tools/perf/util/data-convert-bt.c +++ b/tools/perf/util/data-convert-bt.c @@ -1437,7 +1437,7 @@ static struct bt_ctf_field_type *create_int_type(int size, bool sign, bool hex) bt_ctf_field_type_integer_set_base(type, BT_CTF_INTEGER_BASE_HEXADECIMAL)) goto err; -#if __BYTE_ORDER == __BIG_ENDIAN +#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ bt_ctf_field_type_set_byte_order(type, BT_CTF_BYTE_ORDER_BIG_ENDIAN); #else bt_ctf_field_type_set_byte_order(type, BT_CTF_BYTE_ORDER_LITTLE_ENDIAN); diff --git a/tools/perf/util/genelf.h b/tools/perf/util/genelf.h index d4137559be05..3db3293213a9 100644 --- a/tools/perf/util/genelf.h +++ b/tools/perf/util/genelf.h @@ -42,7 +42,7 @@ int jit_add_debug_info(Elf *e, uint64_t code_addr, void *debug, int nr_debug_ent #error "unsupported architecture" #endif -#if __BYTE_ORDER == __BIG_ENDIAN +#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ #define GEN_ELF_ENDIAN ELFDATA2MSB #else #define GEN_ELF_ENDIAN ELFDATA2LSB diff --git a/tools/perf/util/intel-bts.c b/tools/perf/util/intel-bts.c index af1e78d76228..2c8147a62203 100644 --- a/tools/perf/util/intel-bts.c +++ b/tools/perf/util/intel-bts.c @@ -35,7 +35,7 @@ #define INTEL_BTS_ERR_NOINSN 5 #define INTEL_BTS_ERR_LOST 9 -#if __BYTE_ORDER == __BIG_ENDIAN +#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ #define le64_to_cpu bswap_64 #else #define le64_to_cpu diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c index 593f20e9774c..9d5e65cec89b 100644 --- a/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c +++ b/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c @@ -143,7 +143,7 @@ static void intel_pt_insn_decoder(struct insn *insn, if (branch == INTEL_PT_BR_CONDITIONAL || branch == INTEL_PT_BR_UNCONDITIONAL) { -#if __BYTE_ORDER == __BIG_ENDIAN +#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ switch (insn->immediate.nbytes) { case 1: intel_pt_insn->rel = insn->immediate.value; diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.c index 02a3395d6ce3..4bd154848cad 100644 --- a/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.c +++ b/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.c @@ -16,7 +16,7 @@ #define BIT63 ((uint64_t)1 << 63) -#if __BYTE_ORDER == __BIG_ENDIAN +#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ #define le16_to_cpu bswap_16 #define le32_to_cpu bswap_32 #define le64_to_cpu bswap_64 diff --git a/tools/perf/util/s390-cpumsf.c b/tools/perf/util/s390-cpumsf.c index 8130b56aa04b..f3fdad28a852 100644 --- a/tools/perf/util/s390-cpumsf.c +++ b/tools/perf/util/s390-cpumsf.c @@ -244,7 +244,7 @@ static bool s390_cpumsf_basic_show(const char *color, size_t pos, struct hws_basic_entry *basicp) { struct hws_basic_entry *basic = basicp; -#if __BYTE_ORDER == __LITTLE_ENDIAN +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ struct hws_basic_entry local; unsigned long long word = be64toh(*(unsigned long long *)basicp); @@ -288,7 +288,7 @@ static bool s390_cpumsf_diag_show(const char *color, size_t pos, struct hws_diag_entry *diagp) { struct hws_diag_entry *diag = diagp; -#if __BYTE_ORDER == __LITTLE_ENDIAN +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ struct hws_diag_entry local; unsigned long long word = be64toh(*(unsigned long long *)diagp); @@ -322,7 +322,7 @@ static unsigned long long trailer_timestamp(struct hws_trailer_entry *te, static bool s390_cpumsf_trailer_show(const char *color, size_t pos, struct hws_trailer_entry *te) { -#if __BYTE_ORDER == __LITTLE_ENDIAN +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ struct hws_trailer_entry local; const unsigned long long flags = be64toh(te->flags); @@ -552,7 +552,7 @@ static unsigned long long get_trailer_time(const unsigned char *buf) te = (struct hws_trailer_entry *)(buf + S390_CPUMSF_PAGESZ - sizeof(*te)); -#if __BYTE_ORDER == __LITTLE_ENDIAN +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ clock_base = be64toh(te->progusage[0]) >> 63 & 0x1; progusage2 = be64toh(te->progusage[1]); #else -- cgit v1.2.3 From e4e290791d87b95b2b1fa991e504fba89cbe2a03 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Sun, 7 Nov 2021 01:54:44 -0700 Subject: perf stat: Fix memory leak on error path strdup() is used to deduplicate, ensure it isn't leaking an already created string by freeing first. Signed-off-by: Ian Rogers Cc: Alexander Shishkin Cc: Jiri Olsa Cc: John Garry Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lore.kernel.org/lkml/20211107085444.3781604-1-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/stat-shadow.c | 1 + 1 file changed, 1 insertion(+) (limited to 'tools') diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c index 69f3cf3b4a44..e4fb02b05130 100644 --- a/tools/perf/util/stat-shadow.c +++ b/tools/perf/util/stat-shadow.c @@ -444,6 +444,7 @@ void perf_stat__collect_metric_expr(struct evlist *evsel_list) "Add %s event to groups to get metric expression for %s\n", metric_name, counter->name); + free(printed); printed = strdup(metric_name); } invalid = true; -- cgit v1.2.3 From 6c1912898ed21bef2d7f8b52902b8bc3c0e5c2b5 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Sun, 7 Nov 2021 01:00:00 -0800 Subject: perf parse-events: Rename parse_events_error functions Group error functions and name after the data type they manipulate. Signed-off-by: Ian Rogers Cc: Alexander Shishkin Cc: Jiri Olsa Cc: John Garry Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lore.kernel.org/lkml/20211107090002.3784612-1-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/powerpc/util/kvm-stat.c | 2 +- tools/perf/bench/evlist-open-close.c | 2 +- tools/perf/builtin-stat.c | 10 +-- tools/perf/builtin-trace.c | 2 +- tools/perf/tests/expand-cgroup.c | 2 +- tools/perf/tests/parse-events.c | 2 +- tools/perf/util/metricgroup.c | 2 +- tools/perf/util/parse-events.c | 116 ++++++++++++++++---------------- tools/perf/util/parse-events.h | 8 +-- tools/perf/util/parse-events.y | 4 +- tools/perf/util/pmu.c | 8 +-- 11 files changed, 79 insertions(+), 79 deletions(-) (limited to 'tools') diff --git a/tools/perf/arch/powerpc/util/kvm-stat.c b/tools/perf/arch/powerpc/util/kvm-stat.c index 16510686c138..dc644ca01dc6 100644 --- a/tools/perf/arch/powerpc/util/kvm-stat.c +++ b/tools/perf/arch/powerpc/util/kvm-stat.c @@ -116,7 +116,7 @@ static int is_tracepoint_available(const char *str, struct evlist *evlist) bzero(&err, sizeof(err)); ret = parse_events(evlist, str, &err); if (err.str) - parse_events_print_error(&err, "tracepoint"); + parse_events_error__print(&err, "tracepoint"); return ret; } diff --git a/tools/perf/bench/evlist-open-close.c b/tools/perf/bench/evlist-open-close.c index 75a53919126b..3f9518936367 100644 --- a/tools/perf/bench/evlist-open-close.c +++ b/tools/perf/bench/evlist-open-close.c @@ -89,7 +89,7 @@ static struct evlist *bench__create_evlist(char *evstr) ret = parse_events(evlist, evstr, &err); if (ret) { - parse_events_print_error(&err, evstr); + parse_events_error__print(&err, evstr); pr_err("Run 'perf list' for a list of valid events\n"); ret = 1; goto out_delete_evlist; diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index f0ecfda34ece..af447a179d84 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -1782,7 +1782,7 @@ static int add_default_attributes(void) &errinfo); if (err) { fprintf(stderr, "Cannot set up transaction events\n"); - parse_events_print_error(&errinfo, transaction_attrs); + parse_events_error__print(&errinfo, transaction_attrs); return -1; } return 0; @@ -1812,11 +1812,11 @@ static int add_default_attributes(void) } else { fprintf(stderr, "To measure SMI cost, it needs " "msr/aperf/, msr/smi/ and cpu/cycles/ support\n"); - parse_events_print_error(&errinfo, smi_cost_attrs); + parse_events_error__print(&errinfo, smi_cost_attrs); return -1; } if (err) { - parse_events_print_error(&errinfo, smi_cost_attrs); + parse_events_error__print(&errinfo, smi_cost_attrs); fprintf(stderr, "Cannot set up SMI cost events\n"); return -1; } @@ -1883,7 +1883,7 @@ setup_metrics: fprintf(stderr, "Cannot set up top down events %s: %d\n", str, err); - parse_events_print_error(&errinfo, str); + parse_events_error__print(&errinfo, str); free(str); return -1; } @@ -1911,7 +1911,7 @@ setup_metrics: fprintf(stderr, "Cannot set up hybrid events %s: %d\n", hybrid_str, err); - parse_events_print_error(&errinfo, hybrid_str); + parse_events_error__print(&errinfo, hybrid_str); return -1; } return err; diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 2f1d20553a0a..7f0acc94e9ac 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -4928,7 +4928,7 @@ int cmd_trace(int argc, const char **argv) bzero(&parse_err, sizeof(parse_err)); err = parse_events(trace.evlist, trace.perfconfig_events, &parse_err); if (err) { - parse_events_print_error(&parse_err, trace.perfconfig_events); + parse_events_error__print(&parse_err, trace.perfconfig_events); goto out; } } diff --git a/tools/perf/tests/expand-cgroup.c b/tools/perf/tests/expand-cgroup.c index aaad51aba12f..57b4c5f30324 100644 --- a/tools/perf/tests/expand-cgroup.c +++ b/tools/perf/tests/expand-cgroup.c @@ -128,7 +128,7 @@ static int expand_group_events(void) if (ret < 0) { pr_debug("failed to parse event '%s', err %d, str '%s'\n", event_str, ret, err.str); - parse_events_print_error(&err, event_str); + parse_events_error__print(&err, event_str); goto out; } diff --git a/tools/perf/tests/parse-events.c b/tools/perf/tests/parse-events.c index 8875e388563e..e200af986613 100644 --- a/tools/perf/tests/parse-events.c +++ b/tools/perf/tests/parse-events.c @@ -2059,7 +2059,7 @@ static int test_event(struct evlist_test *e) if (ret) { pr_debug("failed to parse event '%s', err %d, str '%s'\n", e->name, ret, err.str); - parse_events_print_error(&err, e->name); + parse_events_error__print(&err, e->name); } else { ret = e->check(evlist); } diff --git a/tools/perf/util/metricgroup.c b/tools/perf/util/metricgroup.c index 4917e9704765..edd7180b24e4 100644 --- a/tools/perf/util/metricgroup.c +++ b/tools/perf/util/metricgroup.c @@ -1342,7 +1342,7 @@ static int parse_ids(struct perf_pmu *fake_pmu, struct expr_parse_ctx *ids, bzero(&parse_error, sizeof(parse_error)); ret = __parse_events(parsed_evlist, events.buf, &parse_error, fake_pmu); if (ret) { - parse_events_print_error(&parse_error, events.buf); + parse_events_error__print(&parse_error, events.buf); goto err_out; } ret = decode_all_metric_ids(parsed_evlist, modifier); diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 278199ed788b..75cafb9a0720 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -191,39 +191,6 @@ static int tp_event_has_id(const char *dir_path, struct dirent *evt_dir) #define MAX_EVENT_LENGTH 512 -void parse_events__handle_error(struct parse_events_error *err, int idx, - char *str, char *help) -{ - if (WARN(!str, "WARNING: failed to provide error string\n")) { - free(help); - return; - } - switch (err->num_errors) { - case 0: - err->idx = idx; - err->str = str; - err->help = help; - break; - case 1: - err->first_idx = err->idx; - err->idx = idx; - err->first_str = err->str; - err->str = str; - err->first_help = err->help; - err->help = help; - break; - default: - pr_debug("Multiple errors dropping message: %s (%s)\n", - err->str, err->help); - free(err->str); - err->str = str; - free(err->help); - err->help = help; - break; - } - err->num_errors++; -} - struct tracepoint_path *tracepoint_id_to_path(u64 config) { struct tracepoint_path *path = NULL; @@ -587,7 +554,7 @@ static void tracepoint_error(struct parse_events_error *e, int err, } tracing_path__strerror_open_tp(err, help, sizeof(help), sys, name); - parse_events__handle_error(e, 0, strdup(str), strdup(help)); + parse_events_error__handle(e, 0, strdup(str), strdup(help)); } static int add_tracepoint(struct list_head *list, int *idx, @@ -811,7 +778,7 @@ int parse_events_load_bpf_obj(struct parse_events_state *parse_state, return 0; errout: - parse_events__handle_error(parse_state->error, 0, + parse_events_error__handle(parse_state->error, 0, strdup(errbuf), strdup("(add -v to see detail)")); return err; } @@ -831,7 +798,7 @@ parse_events_config_bpf(struct parse_events_state *parse_state, int err; if (term->type_term != PARSE_EVENTS__TERM_TYPE_USER) { - parse_events__handle_error(parse_state->error, term->err_term, + parse_events_error__handle(parse_state->error, term->err_term, strdup("Invalid config term for BPF object"), NULL); return -EINVAL; @@ -851,7 +818,7 @@ parse_events_config_bpf(struct parse_events_state *parse_state, else idx = term->err_term + error_pos; - parse_events__handle_error(parse_state->error, idx, + parse_events_error__handle(parse_state->error, idx, strdup(errbuf), strdup( "Hint:\tValid config terms:\n" @@ -923,7 +890,7 @@ int parse_events_load_bpf(struct parse_events_state *parse_state, -err, errbuf, sizeof(errbuf)); - parse_events__handle_error(parse_state->error, 0, + parse_events_error__handle(parse_state->error, 0, strdup(errbuf), strdup("(add -v to see detail)")); return err; } @@ -947,7 +914,7 @@ int parse_events_load_bpf_obj(struct parse_events_state *parse_state, struct bpf_object *obj __maybe_unused, struct list_head *head_config __maybe_unused) { - parse_events__handle_error(parse_state->error, 0, + parse_events_error__handle(parse_state->error, 0, strdup("BPF support is not compiled"), strdup("Make sure libbpf-devel is available at build time.")); return -ENOTSUP; @@ -959,7 +926,7 @@ int parse_events_load_bpf(struct parse_events_state *parse_state, bool source __maybe_unused, struct list_head *head_config __maybe_unused) { - parse_events__handle_error(parse_state->error, 0, + parse_events_error__handle(parse_state->error, 0, strdup("BPF support is not compiled"), strdup("Make sure libbpf-devel is available at build time.")); return -ENOTSUP; @@ -1042,7 +1009,7 @@ static int check_type_val(struct parse_events_term *term, return 0; if (err) { - parse_events__handle_error(err, term->err_val, + parse_events_error__handle(err, term->err_val, type == PARSE_EVENTS__TERM_TYPE_NUM ? strdup("expected numeric value") : strdup("expected string value"), @@ -1087,7 +1054,7 @@ config_term_avail(int term_type, struct parse_events_error *err) char *err_str; if (term_type < 0 || term_type >= __PARSE_EVENTS__TERM_TYPE_NR) { - parse_events__handle_error(err, -1, + parse_events_error__handle(err, -1, strdup("Invalid term_type"), NULL); return false; } @@ -1110,7 +1077,7 @@ config_term_avail(int term_type, struct parse_events_error *err) /* term_type is validated so indexing is safe */ if (asprintf(&err_str, "'%s' is not usable in 'perf stat'", config_term_names[term_type]) >= 0) - parse_events__handle_error(err, -1, err_str, NULL); + parse_events_error__handle(err, -1, err_str, NULL); return false; } } @@ -1154,7 +1121,7 @@ do { \ if (strcmp(term->val.str, "no") && parse_branch_str(term->val.str, &attr->branch_sample_type)) { - parse_events__handle_error(err, term->err_val, + parse_events_error__handle(err, term->err_val, strdup("invalid branch sample type"), NULL); return -EINVAL; @@ -1163,7 +1130,7 @@ do { \ case PARSE_EVENTS__TERM_TYPE_TIME: CHECK_TYPE_VAL(NUM); if (term->val.num > 1) { - parse_events__handle_error(err, term->err_val, + parse_events_error__handle(err, term->err_val, strdup("expected 0 or 1"), NULL); return -EINVAL; @@ -1202,7 +1169,7 @@ do { \ case PARSE_EVENTS__TERM_TYPE_PERCORE: CHECK_TYPE_VAL(NUM); if ((unsigned int)term->val.num > 1) { - parse_events__handle_error(err, term->err_val, + parse_events_error__handle(err, term->err_val, strdup("expected 0 or 1"), NULL); return -EINVAL; @@ -1214,14 +1181,14 @@ do { \ case PARSE_EVENTS__TERM_TYPE_AUX_SAMPLE_SIZE: CHECK_TYPE_VAL(NUM); if (term->val.num > UINT_MAX) { - parse_events__handle_error(err, term->err_val, + parse_events_error__handle(err, term->err_val, strdup("too big"), NULL); return -EINVAL; } break; default: - parse_events__handle_error(err, term->err_term, + parse_events_error__handle(err, term->err_term, strdup("unknown term"), parse_events_formats_error_string(NULL)); return -EINVAL; @@ -1275,7 +1242,7 @@ static int config_term_tracepoint(struct perf_event_attr *attr, return config_term_common(attr, term, err); default: if (err) { - parse_events__handle_error(err, term->err_term, + parse_events_error__handle(err, term->err_term, strdup("unknown term"), strdup("valid terms: call-graph,stack-size\n")); } @@ -1574,7 +1541,7 @@ int parse_events_add_pmu(struct parse_events_state *parse_state, if (asprintf(&err_str, "Cannot find PMU `%s'. Missing kernel support?", name) >= 0) - parse_events__handle_error(err, 0, err_str, NULL); + parse_events_error__handle(err, 0, err_str, NULL); return -EINVAL; } @@ -2334,6 +2301,39 @@ int __parse_events(struct evlist *evlist, const char *str, return ret; } +void parse_events_error__handle(struct parse_events_error *err, int idx, + char *str, char *help) +{ + if (WARN(!str, "WARNING: failed to provide error string\n")) { + free(help); + return; + } + switch (err->num_errors) { + case 0: + err->idx = idx; + err->str = str; + err->help = help; + break; + case 1: + err->first_idx = err->idx; + err->idx = idx; + err->first_str = err->str; + err->str = str; + err->first_help = err->help; + err->help = help; + break; + default: + pr_debug("Multiple errors dropping message: %s (%s)\n", + err->str, err->help); + free(err->str); + err->str = str; + free(err->help); + err->help = help; + break; + } + err->num_errors++; +} + #define MAX_WIDTH 1000 static int get_term_width(void) { @@ -2343,8 +2343,8 @@ static int get_term_width(void) return ws.ws_col > MAX_WIDTH ? MAX_WIDTH : ws.ws_col; } -static void __parse_events_print_error(int err_idx, const char *err_str, - const char *err_help, const char *event) +static void __parse_events_error__print(int err_idx, const char *err_str, + const char *err_help, const char *event) { const char *str = "invalid or unsupported event: "; char _buf[MAX_WIDTH]; @@ -2398,19 +2398,19 @@ static void __parse_events_print_error(int err_idx, const char *err_str, } } -void parse_events_print_error(struct parse_events_error *err, - const char *event) +void parse_events_error__print(struct parse_events_error *err, + const char *event) { if (!err->num_errors) return; - __parse_events_print_error(err->idx, err->str, err->help, event); + __parse_events_error__print(err->idx, err->str, err->help, event); zfree(&err->str); zfree(&err->help); if (err->num_errors > 1) { fputs("\nInitial error:\n", stderr); - __parse_events_print_error(err->first_idx, err->first_str, + __parse_events_error__print(err->first_idx, err->first_str, err->first_help, event); zfree(&err->first_str); zfree(&err->first_help); @@ -2430,7 +2430,7 @@ int parse_events_option(const struct option *opt, const char *str, ret = parse_events(evlist, str, &err); if (ret) { - parse_events_print_error(&err, str); + parse_events_error__print(&err, str); fprintf(stderr, "Run 'perf list' for a list of valid events\n"); } @@ -3324,7 +3324,7 @@ void parse_events_evlist_error(struct parse_events_state *parse_state, if (!parse_state->error) return; - parse_events__handle_error(parse_state->error, idx, strdup(str), NULL); + parse_events_error__handle(parse_state->error, idx, strdup(str), NULL); } static void config_terms_list(char *buf, size_t buf_sz) diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h index f60a661a2247..52ac26b3720a 100644 --- a/tools/perf/util/parse-events.h +++ b/tools/perf/util/parse-events.h @@ -142,8 +142,6 @@ struct parse_events_state { char *hybrid_pmu_name; }; -void parse_events__handle_error(struct parse_events_error *err, int idx, - char *str, char *help); void parse_events__shrink_config_terms(void); int parse_events__is_hardcoded_term(struct parse_events_term *term); int parse_events_term__num(struct parse_events_term **term, @@ -244,8 +242,10 @@ int is_valid_tracepoint(const char *event_string); int valid_event_mount(const char *eventfs); char *parse_events_formats_error_string(char *additional_terms); -void parse_events_print_error(struct parse_events_error *err, - const char *event); +void parse_events_error__handle(struct parse_events_error *err, int idx, + char *str, char *help); +void parse_events_error__print(struct parse_events_error *err, + const char *event); #ifdef HAVE_LIBELF_SUPPORT /* diff --git a/tools/perf/util/parse-events.y b/tools/perf/util/parse-events.y index 2d60f3cbe42b..174158982fae 100644 --- a/tools/perf/util/parse-events.y +++ b/tools/perf/util/parse-events.y @@ -186,7 +186,7 @@ group_def ':' PE_MODIFIER_EVENT struct parse_events_state *parse_state = _parse_state; struct parse_events_error *error = parse_state->error; - parse_events__handle_error(error, @3.first_column, + parse_events_error__handle(error, @3.first_column, strdup("Bad modifier"), NULL); free_list_evsel(list); YYABORT; @@ -248,7 +248,7 @@ event_name PE_MODIFIER_EVENT struct parse_events_state *parse_state = _parse_state; struct parse_events_error *error = parse_state->error; - parse_events__handle_error(error, @2.first_column, + parse_events_error__handle(error, @2.first_column, strdup("Bad modifier"), NULL); free_list_evsel(list); YYABORT; diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index f3072c71d132..6ae58406f4fc 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -1283,7 +1283,7 @@ static int pmu_config_term(const char *pmu_name, unknown_term = NULL; help_msg = parse_events_formats_error_string(pmu_term); if (err) { - parse_events__handle_error(err, term->err_term, + parse_events_error__handle(err, term->err_term, unknown_term, help_msg); } else { @@ -1316,7 +1316,7 @@ static int pmu_config_term(const char *pmu_name, if (term->no_value && bitmap_weight(format->bits, PERF_PMU_FORMAT_BITS) > 1) { if (err) { - parse_events__handle_error(err, term->err_val, + parse_events_error__handle(err, term->err_val, strdup("no value assigned for term"), NULL); } @@ -1331,7 +1331,7 @@ static int pmu_config_term(const char *pmu_name, term->config, term->val.str); } if (err) { - parse_events__handle_error(err, term->err_val, + parse_events_error__handle(err, term->err_val, strdup("expected numeric value"), NULL); } @@ -1348,7 +1348,7 @@ static int pmu_config_term(const char *pmu_name, if (err) { char *err_str; - parse_events__handle_error(err, term->err_val, + parse_events_error__handle(err, term->err_val, asprintf(&err_str, "value too big for format, maximum is %llu", (unsigned long long)max_val) < 0 -- cgit v1.2.3 From 07eafd4e053a41d72611848b8758df0752b53ee4 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Sun, 7 Nov 2021 01:00:01 -0800 Subject: perf parse-event: Add init and exit to parse_event_error parse_events() may succeed but leave string memory allocations reachable in the error. Add an init/exit that must be called to initialize and clean up the error. This fixes a leak in metricgroup parse_ids. Signed-off-by: Ian Rogers Cc: Alexander Shishkin Cc: Jiri Olsa Cc: John Garry Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lore.kernel.org/lkml/20211107090002.3784612-2-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/powerpc/util/kvm-stat.c | 3 ++- tools/perf/bench/evlist-open-close.c | 6 ++++-- tools/perf/builtin-stat.c | 38 +++++++++++++++++++-------------- tools/perf/builtin-trace.c | 17 ++++++--------- tools/perf/tests/backward-ring-buffer.c | 3 ++- tools/perf/tests/bpf.c | 3 ++- tools/perf/tests/expand-cgroup.c | 2 ++ tools/perf/tests/parse-events.c | 4 ++-- tools/perf/tests/pmu-events.c | 22 +++++++++---------- tools/perf/tests/topology.c | 2 ++ tools/perf/util/metricgroup.c | 3 ++- tools/perf/util/parse-events.c | 20 ++++++++++++----- tools/perf/util/parse-events.h | 2 ++ 13 files changed, 74 insertions(+), 51 deletions(-) (limited to 'tools') diff --git a/tools/perf/arch/powerpc/util/kvm-stat.c b/tools/perf/arch/powerpc/util/kvm-stat.c index dc644ca01dc6..1a9b40ea92a5 100644 --- a/tools/perf/arch/powerpc/util/kvm-stat.c +++ b/tools/perf/arch/powerpc/util/kvm-stat.c @@ -113,10 +113,11 @@ static int is_tracepoint_available(const char *str, struct evlist *evlist) struct parse_events_error err; int ret; - bzero(&err, sizeof(err)); + parse_events_error__init(&err); ret = parse_events(evlist, str, &err); if (err.str) parse_events_error__print(&err, "tracepoint"); + parse_events_error__exit(&err); return ret; } diff --git a/tools/perf/bench/evlist-open-close.c b/tools/perf/bench/evlist-open-close.c index 3f9518936367..482738e9bdad 100644 --- a/tools/perf/bench/evlist-open-close.c +++ b/tools/perf/bench/evlist-open-close.c @@ -78,7 +78,7 @@ static int evlist__count_evsel_fds(struct evlist *evlist) static struct evlist *bench__create_evlist(char *evstr) { - struct parse_events_error err = { .idx = 0, }; + struct parse_events_error err; struct evlist *evlist = evlist__new(); int ret; @@ -87,14 +87,16 @@ static struct evlist *bench__create_evlist(char *evstr) return NULL; } + parse_events_error__init(&err); ret = parse_events(evlist, evstr, &err); if (ret) { parse_events_error__print(&err, evstr); + parse_events_error__exit(&err); pr_err("Run 'perf list' for a list of valid events\n"); ret = 1; goto out_delete_evlist; } - + parse_events_error__exit(&err); ret = evlist__create_maps(evlist, &opts.target); if (ret < 0) { pr_err("Not enough memory to create thread/cpu maps\n"); diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index af447a179d84..7974933dbc77 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -1750,14 +1750,12 @@ static int add_default_attributes(void) (PERF_COUNT_HW_CACHE_OP_PREFETCH << 8) | (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) }, }; - struct parse_events_error errinfo; - /* Set attrs if no event is selected and !null_run: */ if (stat_config.null_run) return 0; - bzero(&errinfo, sizeof(errinfo)); if (transaction_run) { + struct parse_events_error errinfo; /* Handle -T as -M transaction. Once platform specific metrics * support has been added to the json files, all architectures * will use this approach. To determine transaction support @@ -1772,6 +1770,7 @@ static int add_default_attributes(void) &stat_config.metric_events); } + parse_events_error__init(&errinfo); if (pmu_have_event("cpu", "cycles-ct") && pmu_have_event("cpu", "el-start")) err = parse_events(evsel_list, transaction_attrs, @@ -1783,12 +1782,13 @@ static int add_default_attributes(void) if (err) { fprintf(stderr, "Cannot set up transaction events\n"); parse_events_error__print(&errinfo, transaction_attrs); - return -1; } - return 0; + parse_events_error__exit(&errinfo); + return err ? -1 : 0; } if (smi_cost) { + struct parse_events_error errinfo; int smi; if (sysfs__read_int(FREEZE_ON_SMI_PATH, &smi) < 0) { @@ -1804,23 +1804,23 @@ static int add_default_attributes(void) smi_reset = true; } - if (pmu_have_event("msr", "aperf") && - pmu_have_event("msr", "smi")) { - if (!force_metric_only) - stat_config.metric_only = true; - err = parse_events(evsel_list, smi_cost_attrs, &errinfo); - } else { + if (!pmu_have_event("msr", "aperf") || + !pmu_have_event("msr", "smi")) { fprintf(stderr, "To measure SMI cost, it needs " "msr/aperf/, msr/smi/ and cpu/cycles/ support\n"); - parse_events_error__print(&errinfo, smi_cost_attrs); return -1; } + if (!force_metric_only) + stat_config.metric_only = true; + + parse_events_error__init(&errinfo); + err = parse_events(evsel_list, smi_cost_attrs, &errinfo); if (err) { parse_events_error__print(&errinfo, smi_cost_attrs); fprintf(stderr, "Cannot set up SMI cost events\n"); - return -1; } - return 0; + parse_events_error__exit(&errinfo); + return err ? -1 : 0; } if (topdown_run) { @@ -1875,18 +1875,22 @@ static int add_default_attributes(void) return -1; } if (topdown_attrs[0] && str) { + struct parse_events_error errinfo; if (warn) arch_topdown_group_warn(); setup_metrics: + parse_events_error__init(&errinfo); err = parse_events(evsel_list, str, &errinfo); if (err) { fprintf(stderr, "Cannot set up top down events %s: %d\n", str, err); parse_events_error__print(&errinfo, str); + parse_events_error__exit(&errinfo); free(str); return -1; } + parse_events_error__exit(&errinfo); } else { fprintf(stderr, "System does not support topdown\n"); return -1; @@ -1896,6 +1900,7 @@ setup_metrics: if (!evsel_list->core.nr_entries) { if (perf_pmu__has_hybrid()) { + struct parse_events_error errinfo; const char *hybrid_str = "cycles,instructions,branches,branch-misses"; if (target__has_cpu(&target)) @@ -1906,15 +1911,16 @@ setup_metrics: return -1; } + parse_events_error__init(&errinfo); err = parse_events(evsel_list, hybrid_str, &errinfo); if (err) { fprintf(stderr, "Cannot set up hybrid events %s: %d\n", hybrid_str, err); parse_events_error__print(&errinfo, hybrid_str); - return -1; } - return err; + parse_events_error__exit(&errinfo); + return err ? -1 : 0; } if (target__has_cpu(&target)) diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 7f0acc94e9ac..624ea12ce5ca 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -3063,15 +3063,11 @@ static bool evlist__add_vfs_getname(struct evlist *evlist) struct parse_events_error err; int ret; - bzero(&err, sizeof(err)); + parse_events_error__init(&err); ret = parse_events(evlist, "probe:vfs_getname*", &err); - if (ret) { - free(err.str); - free(err.help); - free(err.first_str); - free(err.first_help); + parse_events_error__exit(&err); + if (ret) return false; - } evlist__for_each_entry_safe(evlist, evsel, tmp) { if (!strstarts(evsel__name(evsel), "probe:vfs_getname")) @@ -4925,12 +4921,13 @@ int cmd_trace(int argc, const char **argv) if (trace.perfconfig_events != NULL) { struct parse_events_error parse_err; - bzero(&parse_err, sizeof(parse_err)); + parse_events_error__init(&parse_err); err = parse_events(trace.evlist, trace.perfconfig_events, &parse_err); - if (err) { + if (err) parse_events_error__print(&parse_err, trace.perfconfig_events); + parse_events_error__exit(&parse_err); + if (err) goto out; - } } if ((nr_cgroups || trace.cgroup) && !trace.opts.target.system_wide) { diff --git a/tools/perf/tests/backward-ring-buffer.c b/tools/perf/tests/backward-ring-buffer.c index b4b9a9488d51..7447a4478991 100644 --- a/tools/perf/tests/backward-ring-buffer.c +++ b/tools/perf/tests/backward-ring-buffer.c @@ -115,12 +115,13 @@ int test__backward_ring_buffer(struct test *test __maybe_unused, int subtest __m goto out_delete_evlist; } - bzero(&parse_error, sizeof(parse_error)); + parse_events_error__init(&parse_error); /* * Set backward bit, ring buffer should be writing from end. Record * it in aux evlist */ err = parse_events(evlist, "syscalls:sys_enter_prctl/overwrite/", &parse_error); + parse_events_error__exit(&parse_error); if (err) { pr_debug("Failed to parse tracepoint event, try use root\n"); ret = TEST_SKIP; diff --git a/tools/perf/tests/bpf.c b/tools/perf/tests/bpf.c index fa03ff0dc083..2bf146e49ce8 100644 --- a/tools/perf/tests/bpf.c +++ b/tools/perf/tests/bpf.c @@ -123,12 +123,13 @@ static int do_test(struct bpf_object *obj, int (*func)(void), struct parse_events_state parse_state; struct parse_events_error parse_error; - bzero(&parse_error, sizeof(parse_error)); + parse_events_error__init(&parse_error); bzero(&parse_state, sizeof(parse_state)); parse_state.error = &parse_error; INIT_LIST_HEAD(&parse_state.list); err = parse_events_load_bpf_obj(&parse_state, &parse_state.list, obj, NULL); + parse_events_error__exit(&parse_error); if (err || list_empty(&parse_state.list)) { pr_debug("Failed to add events selected by BPF\n"); return TEST_FAIL; diff --git a/tools/perf/tests/expand-cgroup.c b/tools/perf/tests/expand-cgroup.c index 57b4c5f30324..80cff8a3558c 100644 --- a/tools/perf/tests/expand-cgroup.c +++ b/tools/perf/tests/expand-cgroup.c @@ -124,6 +124,7 @@ static int expand_group_events(void) evlist = evlist__new(); TEST_ASSERT_VAL("failed to get evlist", evlist); + parse_events_error__init(&err); ret = parse_events(evlist, event_str, &err); if (ret < 0) { pr_debug("failed to parse event '%s', err %d, str '%s'\n", @@ -135,6 +136,7 @@ static int expand_group_events(void) rblist__init(&metric_events); ret = test_expand_events(evlist, &metric_events); out: + parse_events_error__exit(&err); evlist__delete(evlist); return ret; } diff --git a/tools/perf/tests/parse-events.c b/tools/perf/tests/parse-events.c index e200af986613..6af94639b14a 100644 --- a/tools/perf/tests/parse-events.c +++ b/tools/perf/tests/parse-events.c @@ -2045,7 +2045,6 @@ static int test_event(struct evlist_test *e) struct evlist *evlist; int ret; - bzero(&err, sizeof(err)); if (e->valid && !e->valid()) { pr_debug("... SKIP"); return 0; @@ -2055,6 +2054,7 @@ static int test_event(struct evlist_test *e) if (evlist == NULL) return -ENOMEM; + parse_events_error__init(&err); ret = parse_events(evlist, e->name, &err); if (ret) { pr_debug("failed to parse event '%s', err %d, str '%s'\n", @@ -2063,7 +2063,7 @@ static int test_event(struct evlist_test *e) } else { ret = e->check(evlist); } - + parse_events_error__exit(&err); evlist__delete(evlist); return ret; diff --git a/tools/perf/tests/pmu-events.c b/tools/perf/tests/pmu-events.c index 50b1299fe643..9ae894c406d8 100644 --- a/tools/perf/tests/pmu-events.c +++ b/tools/perf/tests/pmu-events.c @@ -787,9 +787,11 @@ static int check_parse_id(const char *id, struct parse_events_error *error, static int check_parse_cpu(const char *id, bool same_cpu, const struct pmu_event *pe) { - struct parse_events_error error = { .idx = 0, }; + struct parse_events_error error; + int ret; - int ret = check_parse_id(id, &error, NULL); + parse_events_error__init(&error); + ret = check_parse_id(id, &error, NULL); if (ret && same_cpu) { pr_warning("Parse event failed metric '%s' id '%s' expr '%s'\n", pe->metric_name, id, pe->metric_expr); @@ -800,22 +802,18 @@ static int check_parse_cpu(const char *id, bool same_cpu, const struct pmu_event id, pe->metric_name, pe->metric_expr); ret = 0; } - free(error.str); - free(error.help); - free(error.first_str); - free(error.first_help); + parse_events_error__exit(&error); return ret; } static int check_parse_fake(const char *id) { - struct parse_events_error error = { .idx = 0, }; - int ret = check_parse_id(id, &error, &perf_pmu__fake); + struct parse_events_error error; + int ret; - free(error.str); - free(error.help); - free(error.first_str); - free(error.first_help); + parse_events_error__init(&error); + ret = check_parse_id(id, &error, &perf_pmu__fake); + parse_events_error__exit(&error); return ret; } diff --git a/tools/perf/tests/topology.c b/tools/perf/tests/topology.c index b9028e304ddd..4574c46260d9 100644 --- a/tools/perf/tests/topology.c +++ b/tools/perf/tests/topology.c @@ -49,7 +49,9 @@ static int session_write_header(char *path) session->evlist = evlist__new(); TEST_ASSERT_VAL("can't get evlist", session->evlist); + parse_events_error__init(&err); parse_events(session->evlist, "cpu_core/cycles/", &err); + parse_events_error__exit(&err); } perf_header__set_feat(&session->header, HEADER_CPU_TOPOLOGY); diff --git a/tools/perf/util/metricgroup.c b/tools/perf/util/metricgroup.c index edd7180b24e4..1b43cbc1961d 100644 --- a/tools/perf/util/metricgroup.c +++ b/tools/perf/util/metricgroup.c @@ -1339,7 +1339,7 @@ static int parse_ids(struct perf_pmu *fake_pmu, struct expr_parse_ctx *ids, goto err_out; } pr_debug("Parsing metric events '%s'\n", events.buf); - bzero(&parse_error, sizeof(parse_error)); + parse_events_error__init(&parse_error); ret = __parse_events(parsed_evlist, events.buf, &parse_error, fake_pmu); if (ret) { parse_events_error__print(&parse_error, events.buf); @@ -1352,6 +1352,7 @@ static int parse_ids(struct perf_pmu *fake_pmu, struct expr_parse_ctx *ids, *out_evlist = parsed_evlist; parsed_evlist = NULL; err_out: + parse_events_error__exit(&parse_error); evlist__delete(parsed_evlist); strbuf_release(&events); return ret; diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 75cafb9a0720..5bfb6f892489 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -2301,6 +2301,19 @@ int __parse_events(struct evlist *evlist, const char *str, return ret; } +void parse_events_error__init(struct parse_events_error *err) +{ + bzero(err, sizeof(*err)); +} + +void parse_events_error__exit(struct parse_events_error *err) +{ + zfree(&err->str); + zfree(&err->help); + zfree(&err->first_str); + zfree(&err->first_help); +} + void parse_events_error__handle(struct parse_events_error *err, int idx, char *str, char *help) { @@ -2405,15 +2418,11 @@ void parse_events_error__print(struct parse_events_error *err, return; __parse_events_error__print(err->idx, err->str, err->help, event); - zfree(&err->str); - zfree(&err->help); if (err->num_errors > 1) { fputs("\nInitial error:\n", stderr); __parse_events_error__print(err->first_idx, err->first_str, err->first_help, event); - zfree(&err->first_str); - zfree(&err->first_help); } } @@ -2426,13 +2435,14 @@ int parse_events_option(const struct option *opt, const char *str, struct parse_events_error err; int ret; - bzero(&err, sizeof(err)); + parse_events_error__init(&err); ret = parse_events(evlist, str, &err); if (ret) { parse_events_error__print(&err, str); fprintf(stderr, "Run 'perf list' for a list of valid events\n"); } + parse_events_error__exit(&err); return ret; } diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h index 52ac26b3720a..c7fc93f54577 100644 --- a/tools/perf/util/parse-events.h +++ b/tools/perf/util/parse-events.h @@ -242,6 +242,8 @@ int is_valid_tracepoint(const char *event_string); int valid_event_mount(const char *eventfs); char *parse_events_formats_error_string(char *additional_terms); +void parse_events_error__init(struct parse_events_error *err); +void parse_events_error__exit(struct parse_events_error *err); void parse_events_error__handle(struct parse_events_error *err, int idx, char *str, char *help); void parse_events_error__print(struct parse_events_error *err, -- cgit v1.2.3 From aba8c5e38075fa0e0a5463b73b0788f71bb4c78d Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Sun, 7 Nov 2021 01:00:02 -0800 Subject: perf metric: Fix memory leaks Certain error paths may leak memory as caught by address sanitizer. Ensure this is cleaned up to make sure address/leak sanitizer is happy. Fixes: 5ecd5a0c7d1cca79 ("perf metrics: Modify setup and deduplication") Signed-off-by: Ian Rogers Cc: Alexander Shishkin Cc: Jiri Olsa Cc: John Garry Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lore.kernel.org/lkml/20211107090002.3784612-3-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/metricgroup.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/perf/util/metricgroup.c b/tools/perf/util/metricgroup.c index 1b43cbc1961d..fffe02aae3ed 100644 --- a/tools/perf/util/metricgroup.c +++ b/tools/perf/util/metricgroup.c @@ -228,6 +228,7 @@ static void metric__free(struct metric *m) free(m->metric_refs); expr__ctx_free(m->pctx); free((char *)m->modifier); + evlist__delete(m->evlist); free(m); } @@ -1482,8 +1483,10 @@ static int parse_groups(struct evlist *perf_evlist, const char *str, } - if (combined_evlist) + if (combined_evlist) { evlist__splice_list_tail(perf_evlist, &combined_evlist->core.entries); + evlist__delete(combined_evlist); + } list_for_each_entry(m, &metric_list, nd) { if (m->evlist) -- cgit v1.2.3 From f174940488dd7409e3d4dc96403380e67783d05d Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Sun, 7 Nov 2021 12:43:20 -0300 Subject: perf MANIFEST: Add bpftool files to allow building with BUILD_BPF_SKEL=1 We need bpftool and required kernel/bpf/disasm.[ch] to bootstrap the cgroups, bperf and other BPF skels used by perf. Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/MANIFEST | 3 +++ 1 file changed, 3 insertions(+) (limited to 'tools') diff --git a/tools/perf/MANIFEST b/tools/perf/MANIFEST index e728615a3830..f5d72f936a6b 100644 --- a/tools/perf/MANIFEST +++ b/tools/perf/MANIFEST @@ -22,3 +22,6 @@ tools/lib/str_error_r.c tools/lib/vsprintf.c tools/lib/zalloc.c scripts/bpf_doc.py +tools/bpf/bpftool +kernel/bpf/disasm.c +kernel/bpf/disasm.h -- cgit v1.2.3 From 6b491a86b77c0dc323ca49f3a29a0f67178b75f8 Mon Sep 17 00:00:00 2001 From: Quentin Monnet Date: Sun, 7 Nov 2021 00:24:45 +0000 Subject: perf build: Install libbpf headers locally when building API headers from libbpf should not be accessed directly from the library's source directory. Instead, they should be exported with "make install_headers". Let's adjust perf's Makefile to install those headers locally when building libbpf. v2: - Fix $(LIBBPF_OUTPUT) when $(OUTPUT) is null. - Make sure the recipe for $(LIBBPF_OUTPUT) is not under a "ifdef". Signed-off-by: Quentin Monnet Cc: Alexei Starovoitov Cc: Andrii Nakryiko Cc: Daniel Borkmann Cc: Jiri Olsa Cc: Namhyung Kim Cc: Song Liu Cc: bpf@vger.kernel.org Cc: netdev@vger.kernel.org Link: http://lore.kernel.org/lkml/20211107002445.4790-1-quentin@isovalent.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Makefile.perf | 32 +++++++++++++++++++------------- 1 file changed, 19 insertions(+), 13 deletions(-) (limited to 'tools') diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index a3966f290297..0777748b6da8 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -243,7 +243,7 @@ else # force_fixdep LIB_DIR = $(srctree)/tools/lib/api/ TRACE_EVENT_DIR = $(srctree)/tools/lib/traceevent/ -BPF_DIR = $(srctree)/tools/lib/bpf/ +LIBBPF_DIR = $(srctree)/tools/lib/bpf/ SUBCMD_DIR = $(srctree)/tools/lib/subcmd/ LIBPERF_DIR = $(srctree)/tools/lib/perf/ DOC_DIR = $(srctree)/tools/perf/Documentation/ @@ -295,7 +295,6 @@ strip-libs = $(filter-out -l%,$(1)) ifneq ($(OUTPUT),) TE_PATH=$(OUTPUT) PLUGINS_PATH=$(OUTPUT) - BPF_PATH=$(OUTPUT) SUBCMD_PATH=$(OUTPUT) LIBPERF_PATH=$(OUTPUT) ifneq ($(subdir),) @@ -307,7 +306,6 @@ else TE_PATH=$(TRACE_EVENT_DIR) PLUGINS_PATH=$(TRACE_EVENT_DIR)plugins/ API_PATH=$(LIB_DIR) - BPF_PATH=$(BPF_DIR) SUBCMD_PATH=$(SUBCMD_DIR) LIBPERF_PATH=$(LIBPERF_DIR) endif @@ -326,7 +324,14 @@ LIBTRACEEVENT_DYNAMIC_LIST_LDFLAGS = $(if $(findstring -static,$(LDFLAGS)),,$(DY LIBAPI = $(API_PATH)libapi.a export LIBAPI -LIBBPF = $(BPF_PATH)libbpf.a +ifneq ($(OUTPUT),) + LIBBPF_OUTPUT = $(abspath $(OUTPUT))/libbpf +else + LIBBPF_OUTPUT = $(CURDIR)/libbpf +endif +LIBBPF_DESTDIR = $(LIBBPF_OUTPUT) +LIBBPF_INCLUDE = $(LIBBPF_DESTDIR)/include +LIBBPF = $(LIBBPF_OUTPUT)/libbpf.a LIBSUBCMD = $(SUBCMD_PATH)libsubcmd.a @@ -831,12 +836,14 @@ $(LIBAPI)-clean: $(call QUIET_CLEAN, libapi) $(Q)$(MAKE) -C $(LIB_DIR) O=$(OUTPUT) clean >/dev/null -$(LIBBPF): FORCE - $(Q)$(MAKE) -C $(BPF_DIR) O=$(OUTPUT) $(OUTPUT)libbpf.a FEATURES_DUMP=$(FEATURE_DUMP_EXPORT) +$(LIBBPF): FORCE | $(LIBBPF_OUTPUT) + $(Q)$(MAKE) -C $(LIBBPF_DIR) FEATURES_DUMP=$(FEATURE_DUMP_EXPORT) \ + O= OUTPUT=$(LIBBPF_OUTPUT)/ DESTDIR=$(LIBBPF_DESTDIR) prefix= \ + $@ install_headers $(LIBBPF)-clean: $(call QUIET_CLEAN, libbpf) - $(Q)$(MAKE) -C $(BPF_DIR) O=$(OUTPUT) clean >/dev/null + $(Q)$(RM) -r -- $(LIBBPF_OUTPUT) $(LIBPERF): FORCE $(Q)$(MAKE) -C $(LIBPERF_DIR) EXTRA_CFLAGS="$(LIBPERF_CFLAGS)" O=$(OUTPUT) $(OUTPUT)libperf.a @@ -1036,16 +1043,15 @@ SKELETONS := $(SKEL_OUT)/bpf_prog_profiler.skel.h SKELETONS += $(SKEL_OUT)/bperf_leader.skel.h $(SKEL_OUT)/bperf_follower.skel.h SKELETONS += $(SKEL_OUT)/bperf_cgroup.skel.h +$(SKEL_TMP_OUT) $(LIBBPF_OUTPUT): + $(Q)$(MKDIR) -p $@ + ifdef BUILD_BPF_SKEL BPFTOOL := $(SKEL_TMP_OUT)/bootstrap/bpftool -LIBBPF_SRC := $(abspath ../lib/bpf) -BPF_INCLUDE := -I$(SKEL_TMP_OUT)/.. -I$(BPF_PATH) -I$(LIBBPF_SRC)/.. - -$(SKEL_TMP_OUT): - $(Q)$(MKDIR) -p $@ +BPF_INCLUDE := -I$(SKEL_TMP_OUT)/.. -I$(LIBBPF_INCLUDE) $(BPFTOOL): | $(SKEL_TMP_OUT) - CFLAGS= $(MAKE) -C ../bpf/bpftool \ + $(Q)CFLAGS= $(MAKE) -C ../bpf/bpftool \ OUTPUT=$(SKEL_TMP_OUT)/ bootstrap VMLINUX_BTF_PATHS ?= $(if $(O),$(O)/vmlinux) \ -- cgit v1.2.3