From 0f59d7a352c11712de0f226b46cb82775b4fcece Mon Sep 17 00:00:00 2001 From: David Ahern Date: Fri, 1 Sep 2017 10:49:12 -0700 Subject: perf sched timehist: Add pid and tid options Add options to only show event for specific pid(s) and tid(s). Signed-off-by: David Ahern Cc: Namhyung Kim Link: http://lkml.kernel.org/r/1504288152-19690-1-git-send-email-dsahern@gmail.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-sched.txt | 8 ++++++++ tools/perf/builtin-sched.c | 4 ++++ 2 files changed, 12 insertions(+) (limited to 'tools/perf') diff --git a/tools/perf/Documentation/perf-sched.txt b/tools/perf/Documentation/perf-sched.txt index a092a2499e8f..55b67338548e 100644 --- a/tools/perf/Documentation/perf-sched.txt +++ b/tools/perf/Documentation/perf-sched.txt @@ -106,6 +106,14 @@ OPTIONS for 'perf sched timehist' --max-stack:: Maximum number of functions to display in backtrace, default 5. +-p=:: +--pid=:: + Only show events for given process ID (comma separated list). + +-t=:: +--tid=:: + Only show events for given thread ID (comma separated list). + -s:: --summary:: Show only a summary of scheduling by thread with min, max, and average diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c index 322b4def8411..b7e8812ee80c 100644 --- a/tools/perf/builtin-sched.c +++ b/tools/perf/builtin-sched.c @@ -3363,6 +3363,10 @@ int cmd_sched(int argc, const char **argv) OPT_STRING(0, "time", &sched.time_str, "str", "Time span for analysis (start,stop)"), OPT_BOOLEAN(0, "state", &sched.show_state, "Show task state when sched-out"), + OPT_STRING('p', "pid", &symbol_conf.pid_list_str, "pid[,pid...]", + "analyze events only for given process id(s)"), + OPT_STRING('t', "tid", &symbol_conf.tid_list_str, "tid[,tid...]", + "analyze events only for given thread id(s)"), OPT_PARENT(sched_options) }; -- cgit v1.2.3 From 5a5dfe4b8548d806bf433090995ee0ee4c139f11 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Thu, 31 Aug 2017 12:40:26 -0700 Subject: perf tools: Support weak groups in 'perf stat' Setting up groups can be complicated due to the complicated scheduling restrictions of different PMUs. User tools usually don't understand all these restrictions. Still in many cases it is useful to set up groups and they work most of the time. However if the group is set up wrong some members will not report any value because they never get scheduled. Add a concept of a 'weak group': try to set up a group, but if it's not schedulable fallback to not using a group. That gives us the best of both worlds: groups if they work, but still a usable fallback if they don't. In theory it would be possible to have more complex fallback strategies (e.g. try to split the group in half), but the simple fallback of not using a group seems to work for now. So far the weak group is only implemented for perf stat, not for record. Here's an unschedulable group (on IvyBridge with SMT on) % perf stat -e '{branches,branch-misses,l1d.replacement,l2_lines_in.all,l2_rqsts.all_code_rd}' -a sleep 1 73,806,067 branches 4,848,144 branch-misses # 6.57% of all branches 14,754,458 l1d.replacement 24,905,558 l2_lines_in.all l2_rqsts.all_code_rd <------- will never report anything With the weak group: % perf stat -e '{branches,branch-misses,l1d.replacement,l2_lines_in.all,l2_rqsts.all_code_rd}:W' -a sleep 1 125,366,055 branches (80.02%) 9,208,402 branch-misses # 7.35% of all branches (80.01%) 24,560,249 l1d.replacement (80.00%) 43,174,971 l2_lines_in.all (80.05%) 31,891,457 l2_rqsts.all_code_rd (79.92%) The extra event scheduled with some extra multiplexing v2: Move fallback code to separate function. Add comment on for_each_group_member Adjust to new perf_evsel__close interface v3: Fix debug print out. Committer testing: Before: # perf stat -e '{branches,branch-misses,l1d.replacement,l2_lines_in.all,l2_rqsts.all_code_rd}' -a sleep 1 Performance counter stats for 'system wide': branches branch-misses l1d.replacement l2_lines_in.all l2_rqsts.all_code_rd 1.002147212 seconds time elapsed # perf stat -e '{branches,l1d.replacement,l2_lines_in.all,l2_rqsts.all_code_rd}' -a sleep 1 Performance counter stats for 'system wide': 83,207,892 branches 11,065,444 l1d.replacement 28,484,024 l2_lines_in.all 12,186,179 l2_rqsts.all_code_rd 1.001739493 seconds time elapsed After: # perf stat -e '{branches,branch-misses,l1d.replacement,l2_lines_in.all,l2_rqsts.all_code_rd}':W -a sleep 1 Performance counter stats for 'system wide': 543,323,909 branches (80.01%) 27,100,512 branch-misses # 4.99% of all branches (80.02%) 50,402,905 l1d.replacement (80.03%) 67,385,892 l2_lines_in.all (80.01%) 21,352,885 l2_rqsts.all_code_rd (79.94%) 1.001086658 seconds time elapsed # Signed-off-by: Andi Kleen Acked-by: Jiri Olsa Tested-by: Arnaldo Carvalho de Melo Link: http://lkml.kernel.org/r/20170831194036.30146-2-andi@firstfloor.org [ Add a "'perf stat' only, for now" comment in the man page, suggested by Jiri ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-list.txt | 2 ++ tools/perf/builtin-stat.c | 35 ++++++++++++++++++++++++++++++++++ tools/perf/util/evsel.h | 1 + tools/perf/util/parse-events.c | 8 +++++++- tools/perf/util/parse-events.l | 2 +- 5 files changed, 46 insertions(+), 2 deletions(-) (limited to 'tools/perf') diff --git a/tools/perf/Documentation/perf-list.txt b/tools/perf/Documentation/perf-list.txt index f709de54707b..75fc17f47298 100644 --- a/tools/perf/Documentation/perf-list.txt +++ b/tools/perf/Documentation/perf-list.txt @@ -47,6 +47,8 @@ counted. The following modifiers exist: P - use maximum detected precise level S - read sample value (PERF_SAMPLE_READ) D - pin the event to the PMU + W - group is weak and will fallback to non-group if not schedulable, + only supported in 'perf stat' for now. The 'p' modifier can be used for specifying how precise the instruction address should be. The 'p' modifier can be specified multiple times: diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 69523ed55894..7cc61eb0d83b 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -582,6 +582,32 @@ static bool perf_evsel__should_store_id(struct perf_evsel *counter) return STAT_RECORD || counter->attr.read_format & PERF_FORMAT_ID; } +static struct perf_evsel *perf_evsel__reset_weak_group(struct perf_evsel *evsel) +{ + struct perf_evsel *c2, *leader; + bool is_open = true; + + leader = evsel->leader; + pr_debug("Weak group for %s/%d failed\n", + leader->name, leader->nr_members); + + /* + * for_each_group_member doesn't work here because it doesn't + * include the first entry. + */ + evlist__for_each_entry(evsel_list, c2) { + if (c2 == evsel) + is_open = false; + if (c2->leader == leader) { + if (is_open) + perf_evsel__close(c2); + c2->leader = c2; + c2->nr_members = 0; + } + } + return leader; +} + static int __run_perf_stat(int argc, const char **argv) { int interval = stat_config.interval; @@ -618,6 +644,15 @@ static int __run_perf_stat(int argc, const char **argv) evlist__for_each_entry(evsel_list, counter) { try_again: if (create_perf_stat_counter(counter) < 0) { + + /* Weak group failed. Reset the group. */ + if (errno == EINVAL && + counter->leader != counter && + counter->weak_group) { + counter = perf_evsel__reset_weak_group(counter); + goto try_again; + } + /* * PPC returns ENXIO for HW counters until 2.6.37 * (behavior changed with commit b0a873e). diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index dd2c4b5112a5..db658785d828 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -137,6 +137,7 @@ struct perf_evsel { const char * metric_name; struct perf_evsel **metric_events; bool collect_stat; + bool weak_group; }; union u64_swap { diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index f6257fb4f08c..57d7acf890e0 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -1366,6 +1366,7 @@ struct event_modifier { int exclude_GH; int sample_read; int pinned; + int weak; }; static int get_event_modifier(struct event_modifier *mod, char *str, @@ -1384,6 +1385,7 @@ static int get_event_modifier(struct event_modifier *mod, char *str, int exclude = eu | ek | eh; int exclude_GH = evsel ? evsel->exclude_GH : 0; + int weak = 0; memset(mod, 0, sizeof(*mod)); @@ -1421,6 +1423,8 @@ static int get_event_modifier(struct event_modifier *mod, char *str, sample_read = 1; } else if (*str == 'D') { pinned = 1; + } else if (*str == 'W') { + weak = 1; } else break; @@ -1451,6 +1455,7 @@ static int get_event_modifier(struct event_modifier *mod, char *str, mod->exclude_GH = exclude_GH; mod->sample_read = sample_read; mod->pinned = pinned; + mod->weak = weak; return 0; } @@ -1464,7 +1469,7 @@ static int check_modifier(char *str) char *p = str; /* The sizeof includes 0 byte as well. */ - if (strlen(str) > (sizeof("ukhGHpppPSDI") - 1)) + if (strlen(str) > (sizeof("ukhGHpppPSDIW") - 1)) return -1; while (*p) { @@ -1504,6 +1509,7 @@ int parse_events__modifier_event(struct list_head *list, char *str, bool add) evsel->exclude_GH = mod.exclude_GH; evsel->sample_read = mod.sample_read; evsel->precise_max = mod.precise_max; + evsel->weak_group = mod.weak; if (perf_evsel__is_group_leader(evsel)) evsel->attr.pinned = mod.pinned; diff --git a/tools/perf/util/parse-events.l b/tools/perf/util/parse-events.l index c42edeac451f..fdb5bb52f01f 100644 --- a/tools/perf/util/parse-events.l +++ b/tools/perf/util/parse-events.l @@ -161,7 +161,7 @@ name [a-zA-Z_*?][a-zA-Z0-9_*?.]* name_minus [a-zA-Z_*?][a-zA-Z0-9\-_*?.:]* drv_cfg_term [a-zA-Z0-9_\.]+(=[a-zA-Z0-9_*?\.:]+)? /* If you add a modifier you need to update check_modifier() */ -modifier_event [ukhpPGHSDI]+ +modifier_event [ukhpPGHSDIW]+ modifier_bp [rwx]{1,3} %% -- cgit v1.2.3 From 3ba36d3620d08be31f5ee9ae20abb9bf3bdeb05a Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Thu, 31 Aug 2017 12:40:27 -0700 Subject: perf vendor events: Support metric_group and no event name in JSON parser Some enhancements to the JSON parser to prepare for metrics support - Parse the new MetricGroup field - Support JSON events with no event name, that have only MetricName. Signed-off-by: Andi Kleen Acked-by: Jiri Olsa Link: http://lkml.kernel.org/r/20170831194036.30146-3-andi@firstfloor.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/pmu-events/jevents.c | 24 ++++++++++++++++++------ tools/perf/pmu-events/jevents.h | 2 +- tools/perf/pmu-events/pmu-events.h | 1 + 3 files changed, 20 insertions(+), 7 deletions(-) (limited to 'tools/perf') diff --git a/tools/perf/pmu-events/jevents.c b/tools/perf/pmu-events/jevents.c index d51dc9ca8861..9eb7047bafe4 100644 --- a/tools/perf/pmu-events/jevents.c +++ b/tools/perf/pmu-events/jevents.c @@ -292,7 +292,7 @@ static int print_events_table_entry(void *data, char *name, char *event, char *desc, char *long_desc, char *pmu, char *unit, char *perpkg, char *metric_expr, - char *metric_name) + char *metric_name, char *metric_group) { struct perf_entry_data *pd = data; FILE *outfp = pd->outfp; @@ -304,8 +304,10 @@ static int print_events_table_entry(void *data, char *name, char *event, */ fprintf(outfp, "{\n"); - fprintf(outfp, "\t.name = \"%s\",\n", name); - fprintf(outfp, "\t.event = \"%s\",\n", event); + if (name) + fprintf(outfp, "\t.name = \"%s\",\n", name); + if (event) + fprintf(outfp, "\t.event = \"%s\",\n", event); fprintf(outfp, "\t.desc = \"%s\",\n", desc); fprintf(outfp, "\t.topic = \"%s\",\n", topic); if (long_desc && long_desc[0]) @@ -320,6 +322,8 @@ static int print_events_table_entry(void *data, char *name, char *event, fprintf(outfp, "\t.metric_expr = \"%s\",\n", metric_expr); if (metric_name) fprintf(outfp, "\t.metric_name = \"%s\",\n", metric_name); + if (metric_group) + fprintf(outfp, "\t.metric_group = \"%s\",\n", metric_group); fprintf(outfp, "},\n"); return 0; @@ -357,6 +361,9 @@ static char *real_event(const char *name, char *event) { int i; + if (!name) + return NULL; + for (i = 0; fixed[i].name; i++) if (!strcasecmp(name, fixed[i].name)) return (char *)fixed[i].event; @@ -369,7 +376,7 @@ int json_events(const char *fn, char *long_desc, char *pmu, char *unit, char *perpkg, char *metric_expr, - char *metric_name), + char *metric_name, char *metric_group), void *data) { int err = -EIO; @@ -397,6 +404,7 @@ int json_events(const char *fn, char *unit = NULL; char *metric_expr = NULL; char *metric_name = NULL; + char *metric_group = NULL; unsigned long long eventcode = 0; struct msrmap *msr = NULL; jsmntok_t *msrval = NULL; @@ -476,6 +484,8 @@ int json_events(const char *fn, addfield(map, &perpkg, "", "", val); } else if (json_streq(map, field, "MetricName")) { addfield(map, &metric_name, "", "", val); + } else if (json_streq(map, field, "MetricGroup")) { + addfield(map, &metric_group, "", "", val); } else if (json_streq(map, field, "MetricExpr")) { addfield(map, &metric_expr, "", "", val); for (s = metric_expr; *s; s++) @@ -501,10 +511,11 @@ int json_events(const char *fn, addfield(map, &event, ",", filter, NULL); if (msr != NULL) addfield(map, &event, ",", msr->pname, msrval); - fixname(name); + if (name) + fixname(name); err = func(data, name, real_event(name, event), desc, long_desc, - pmu, unit, perpkg, metric_expr, metric_name); + pmu, unit, perpkg, metric_expr, metric_name, metric_group); free(event); free(desc); free(name); @@ -516,6 +527,7 @@ int json_events(const char *fn, free(unit); free(metric_expr); free(metric_name); + free(metric_group); if (err) break; tok += j; diff --git a/tools/perf/pmu-events/jevents.h b/tools/perf/pmu-events/jevents.h index 611fac01913d..557994754410 100644 --- a/tools/perf/pmu-events/jevents.h +++ b/tools/perf/pmu-events/jevents.h @@ -6,7 +6,7 @@ int json_events(const char *fn, char *long_desc, char *pmu, char *unit, char *perpkg, char *metric_expr, - char *metric_name), + char *metric_name, char *metric_group), void *data); char *get_cpu_str(void); diff --git a/tools/perf/pmu-events/pmu-events.h b/tools/perf/pmu-events/pmu-events.h index 569eab3688dd..94fa1720f6fd 100644 --- a/tools/perf/pmu-events/pmu-events.h +++ b/tools/perf/pmu-events/pmu-events.h @@ -15,6 +15,7 @@ struct pmu_event { const char *perpkg; const char *metric_expr; const char *metric_name; + const char *metric_group; }; /* -- cgit v1.2.3 From bba49af87393ebc8960bf8abdcbb9af53bf1aba1 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Thu, 31 Aug 2017 12:40:28 -0700 Subject: perf stat: Factor out generic metric printing The 'perf stat' shadow metric printing already supports generic metrics. Factor out the code doing that into a separate function that can be re-used in a later patch. No behavior changes. v2: Fix indentation Signed-off-by: Andi Kleen Acked-by: Jiri Olsa Link: http://lkml.kernel.org/r/20170831194036.30146-4-andi@firstfloor.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/stat-shadow.c | 69 ++++++++++++++++++++++++++----------------- 1 file changed, 42 insertions(+), 27 deletions(-) (limited to 'tools/perf') diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c index a04cf56d3517..96aa6cbf24d6 100644 --- a/tools/perf/util/stat-shadow.c +++ b/tools/perf/util/stat-shadow.c @@ -627,6 +627,46 @@ static void print_smi_cost(int cpu, struct perf_evsel *evsel, out->print_metric(out->ctx, NULL, "%4.0f", "SMI#", smi_num); } +static void generic_metric(const char *metric_expr, + struct perf_evsel **metric_events, + char *name, + const char *metric_name, + double avg, + int cpu, + int ctx, + struct perf_stat_output_ctx *out) +{ + print_metric_t print_metric = out->print_metric; + struct parse_ctx pctx; + double ratio; + int i; + void *ctxp = out->ctx; + + expr__ctx_init(&pctx); + expr__add_id(&pctx, name, avg); + for (i = 0; metric_events[i]; i++) { + struct saved_value *v; + + v = saved_value_lookup(metric_events[i], cpu, ctx, false); + if (!v) + break; + expr__add_id(&pctx, metric_events[i]->name, avg_stats(&v->stats)); + } + if (!metric_events[i]) { + const char *p = metric_expr; + + if (expr__parse(&ratio, &pctx, &p) == 0) + print_metric(ctxp, NULL, "%8.1f", + metric_name ? + metric_name : + out->force_header ? name : "", + ratio); + else + print_metric(ctxp, NULL, NULL, "", 0); + } else + print_metric(ctxp, NULL, NULL, "", 0); +} + void perf_stat__print_shadow_stats(struct perf_evsel *evsel, double avg, int cpu, struct perf_stat_output_ctx *out) @@ -819,33 +859,8 @@ void perf_stat__print_shadow_stats(struct perf_evsel *evsel, else print_metric(ctxp, NULL, NULL, name, 0); } else if (evsel->metric_expr) { - struct parse_ctx pctx; - int i; - - expr__ctx_init(&pctx); - expr__add_id(&pctx, evsel->name, avg); - for (i = 0; evsel->metric_events[i]; i++) { - struct saved_value *v; - - v = saved_value_lookup(evsel->metric_events[i], cpu, ctx, false); - if (!v) - break; - expr__add_id(&pctx, evsel->metric_events[i]->name, - avg_stats(&v->stats)); - } - if (!evsel->metric_events[i]) { - const char *p = evsel->metric_expr; - - if (expr__parse(&ratio, &pctx, &p) == 0) - print_metric(ctxp, NULL, "%8.1f", - evsel->metric_name ? - evsel->metric_name : - out->force_header ? evsel->name : "", - ratio); - else - print_metric(ctxp, NULL, NULL, "", 0); - } else - print_metric(ctxp, NULL, NULL, "", 0); + generic_metric(evsel->metric_expr, evsel->metric_events, evsel->name, + evsel->metric_name, avg, cpu, ctx, out); } else if (runtime_nsecs_stats[cpu].n != 0) { char unit = 'M'; char unit_buf[10]; -- cgit v1.2.3 From 4ed962eb38c8a33b8b6ded911410afaefa1ca48c Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Thu, 31 Aug 2017 12:40:29 -0700 Subject: perf stat: Print generic metric header even for failed expressions Print the generic metric header even when the expression evaluation failed. Otherwise an expression that fails on the first collections due to division by zero may suddenly reappear later without an header. Signed-off-by: Andi Kleen Acked-by: Jiri Olsa Link: http://lkml.kernel.org/r/20170831194036.30146-5-andi@firstfloor.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/stat-shadow.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'tools/perf') diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c index 96aa6cbf24d6..8c7ab29169b9 100644 --- a/tools/perf/util/stat-shadow.c +++ b/tools/perf/util/stat-shadow.c @@ -662,7 +662,9 @@ static void generic_metric(const char *metric_expr, out->force_header ? name : "", ratio); else - print_metric(ctxp, NULL, NULL, "", 0); + print_metric(ctxp, NULL, NULL, + out->force_header ? + (metric_name ? metric_name : name) : "", 0); } else print_metric(ctxp, NULL, NULL, "", 0); } -- cgit v1.2.3 From d77ade9f4199c77c63e2ae382a8c8fbe0582ede2 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Thu, 31 Aug 2017 12:40:30 -0700 Subject: perf pmu: Extract function to get JSON alias map Extract the code to get the per cpu JSON alias into a separate function for reuse. No behavior changes. Signed-off-by: Andi Kleen Acked-by: Jiri Olsa Link: http://lkml.kernel.org/r/20170831194036.30146-6-andi@firstfloor.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/pmu.c | 49 +++++++++++++++++++++++++++++++++---------------- tools/perf/util/pmu.h | 2 ++ 2 files changed, 35 insertions(+), 16 deletions(-) (limited to 'tools/perf') diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index ac16a9db1fb5..ed25d7f88731 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -516,16 +516,8 @@ char * __weak get_cpuid_str(void) return NULL; } -/* - * From the pmu_events_map, find the table of PMU events that corresponds - * to the current running CPU. Then, add all PMU events from that table - * as aliases. - */ -static void pmu_add_cpu_aliases(struct list_head *head, const char *name) +static char *perf_pmu__getcpuid(void) { - int i; - struct pmu_events_map *map; - struct pmu_event *pe; char *cpuid; static bool printed; @@ -535,22 +527,50 @@ static void pmu_add_cpu_aliases(struct list_head *head, const char *name) if (!cpuid) cpuid = get_cpuid_str(); if (!cpuid) - return; + return NULL; if (!printed) { pr_debug("Using CPUID %s\n", cpuid); printed = true; } + return cpuid; +} + +struct pmu_events_map *perf_pmu__find_map(void) +{ + struct pmu_events_map *map; + char *cpuid = perf_pmu__getcpuid(); + int i; i = 0; - while (1) { + for (;;) { map = &pmu_events_map[i++]; - if (!map->table) - goto out; + if (!map->table) { + map = NULL; + break; + } if (!strcmp(map->cpuid, cpuid)) break; } + free(cpuid); + return map; +} + +/* + * From the pmu_events_map, find the table of PMU events that corresponds + * to the current running CPU. Then, add all PMU events from that table + * as aliases. + */ +static void pmu_add_cpu_aliases(struct list_head *head, const char *name) +{ + int i; + struct pmu_events_map *map; + struct pmu_event *pe; + + map = perf_pmu__find_map(); + if (!map) + return; /* * Found a matching PMU events table. Create aliases @@ -575,9 +595,6 @@ static void pmu_add_cpu_aliases(struct list_head *head, const char *name) (char *)pe->metric_expr, (char *)pe->metric_name); } - -out: - free(cpuid); } struct perf_event_attr * __weak diff --git a/tools/perf/util/pmu.h b/tools/perf/util/pmu.h index 389e9729331f..060f6abba8ed 100644 --- a/tools/perf/util/pmu.h +++ b/tools/perf/util/pmu.h @@ -90,4 +90,6 @@ int perf_pmu__test(void); struct perf_event_attr *perf_pmu__get_default_config(struct perf_pmu *pmu); +struct pmu_events_map *perf_pmu__find_map(void); + #endif /* __PMU_H */ -- cgit v1.2.3 From b18f3e365019de1a5b26a851e123f0aedcce881f Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Thu, 31 Aug 2017 12:40:31 -0700 Subject: perf stat: Support JSON metrics in perf stat Add generic support for standalone metrics specified in JSON files to perf stat. A metric is a formula that uses multiple events to compute a higher level result (e.g. IPC). Previously metrics were always tied to an event and automatically enabled with that event. But now change it that we can have standalone metrics. They are in the same JSON data structure as events, but don't have an event name. We also allow to organize the metrics in metric groups, which allows a short cut to select several related metrics at once. Add a new -M / --metrics option to perf stat that adds the metrics or metric groups specified. Add the core code to manage and parse the metric groups. They are collected from the JSON data structures into a separate rblist. When computing shadow values look for metrics in that list. Then they are computed using the existing saved values infrastructure in stat-shadow.c The actual JSON metrics are in a separate pull request. % perf stat -M Summary --metric-only -a sleep 1 Performance counter stats for 'system wide': Instructions CLKS CPU_Utilization GFLOPs SMT_2T_Utilization Kernel_Utilization 317614222.0 1392930775.0 0.0 0.0 0.2 0.1 1.001497549 seconds time elapsed % perf stat -M GFLOPs flops Performance counter stats for 'flops': 3,999,541,471 fp_comp_ops_exe.sse_scalar_single # 1.2 GFLOPs (66.65%) 14 fp_comp_ops_exe.sse_scalar_double (66.65%) 0 fp_comp_ops_exe.sse_packed_double (66.67%) 0 fp_comp_ops_exe.sse_packed_single (66.70%) 0 simd_fp_256.packed_double (66.70%) 0 simd_fp_256.packed_single (66.67%) 0 duration_time 3.238372845 seconds time elapsed v2: Add missing header file v3: Move find_map to pmu.c Signed-off-by: Andi Kleen Acked-by: Jiri Olsa Link: http://lkml.kernel.org/r/20170831194036.30146-7-andi@firstfloor.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-stat.txt | 7 + tools/perf/builtin-stat.c | 18 +- tools/perf/util/Build | 1 + tools/perf/util/metricgroup.c | 313 +++++++++++++++++++++++++++++++++ tools/perf/util/metricgroup.h | 31 ++++ tools/perf/util/pmu.c | 5 +- tools/perf/util/stat-shadow.c | 22 ++- tools/perf/util/stat.h | 4 +- 8 files changed, 395 insertions(+), 6 deletions(-) create mode 100644 tools/perf/util/metricgroup.c create mode 100644 tools/perf/util/metricgroup.h (limited to 'tools/perf') diff --git a/tools/perf/Documentation/perf-stat.txt b/tools/perf/Documentation/perf-stat.txt index c37d61682dfb..823fce7674bb 100644 --- a/tools/perf/Documentation/perf-stat.txt +++ b/tools/perf/Documentation/perf-stat.txt @@ -199,6 +199,13 @@ Aggregate counts per processor socket for system-wide mode measurements. --per-core:: Aggregate counts per physical processor for system-wide mode measurements. +-M:: +--metrics:: +Print metrics or metricgroups specified in a comma separated list. +For a group all metrics from the group are added. +The events from the metrics are automatically measured. +See perf list output for the possble metrics and metricgroups. + -A:: --no-aggr:: Do not aggregate counts across all monitored CPUs. diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 7cc61eb0d83b..874bc6dd8d60 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -65,6 +65,7 @@ #include "util/tool.h" #include "util/group.h" #include "util/string2.h" +#include "util/metricgroup.h" #include "asm/bug.h" #include @@ -133,6 +134,8 @@ static const char *smi_cost_attrs = { static struct perf_evlist *evsel_list; +static struct rblist metric_events; + static struct target target = { .uid = UINT_MAX, }; @@ -1234,7 +1237,7 @@ static void printout(int id, int nr, struct perf_evsel *counter, double uval, perf_stat__print_shadow_stats(counter, uval, first_shadow_cpu(counter, id), - &out); + &out, &metric_events); if (!csv_output && !metric_only) { print_noise(counter, noise); print_running(run, ena); @@ -1565,7 +1568,8 @@ static void print_metric_headers(const char *prefix, bool no_indent) os.evsel = counter; perf_stat__print_shadow_stats(counter, 0, 0, - &out); + &out, + &metric_events); } fputc('\n', stat_config.output); } @@ -1789,6 +1793,13 @@ static int enable_metric_only(const struct option *opt __maybe_unused, return 0; } +static int parse_metric_groups(const struct option *opt, + const char *str, + int unset __maybe_unused) +{ + return metricgroup__parse_groups(opt, str, &metric_events); +} + static const struct option stat_options[] = { OPT_BOOLEAN('T', "transaction", &transaction_run, "hardware transaction statistics"), @@ -1854,6 +1865,9 @@ static const struct option stat_options[] = { "measure topdown level 1 statistics"), OPT_BOOLEAN(0, "smi-cost", &smi_cost, "measure SMI cost"), + OPT_CALLBACK('M', "metrics", &evsel_list, "metric/metric group list", + "monitor specified metrics or metric groups (separated by ,)", + parse_metric_groups), OPT_END() }; diff --git a/tools/perf/util/Build b/tools/perf/util/Build index 94518c1bf8b6..71ab8466714d 100644 --- a/tools/perf/util/Build +++ b/tools/perf/util/Build @@ -34,6 +34,7 @@ libperf-y += dso.o libperf-y += symbol.o libperf-y += symbol_fprintf.o libperf-y += color.o +libperf-y += metricgroup.o libperf-y += header.o libperf-y += callchain.o libperf-y += values.o diff --git a/tools/perf/util/metricgroup.c b/tools/perf/util/metricgroup.c new file mode 100644 index 000000000000..7516b1746594 --- /dev/null +++ b/tools/perf/util/metricgroup.c @@ -0,0 +1,313 @@ +/* + * Copyright (c) 2017, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + */ + +/* Manage metrics and groups of metrics from JSON files */ + +#include "metricgroup.h" +#include "evlist.h" +#include "strbuf.h" +#include "pmu.h" +#include "expr.h" +#include "rblist.h" +#include "pmu.h" +#include +#include +#include +#include "pmu-events/pmu-events.h" +#include "strbuf.h" +#include "strlist.h" +#include +#include + +struct metric_event *metricgroup__lookup(struct rblist *metric_events, + struct perf_evsel *evsel, + bool create) +{ + struct rb_node *nd; + struct metric_event me = { + .evsel = evsel + }; + nd = rblist__find(metric_events, &me); + if (nd) + return container_of(nd, struct metric_event, nd); + if (create) { + rblist__add_node(metric_events, &me); + nd = rblist__find(metric_events, &me); + if (nd) + return container_of(nd, struct metric_event, nd); + } + return NULL; +} + +static int metric_event_cmp(struct rb_node *rb_node, const void *entry) +{ + struct metric_event *a = container_of(rb_node, + struct metric_event, + nd); + const struct metric_event *b = entry; + + if (a->evsel == b->evsel) + return 0; + if ((char *)a->evsel < (char *)b->evsel) + return -1; + return +1; +} + +static struct rb_node *metric_event_new(struct rblist *rblist __maybe_unused, + const void *entry) +{ + struct metric_event *me = malloc(sizeof(struct metric_event)); + + if (!me) + return NULL; + memcpy(me, entry, sizeof(struct metric_event)); + me->evsel = ((struct metric_event *)entry)->evsel; + INIT_LIST_HEAD(&me->head); + return &me->nd; +} + +static void metricgroup__rblist_init(struct rblist *metric_events) +{ + rblist__init(metric_events); + metric_events->node_cmp = metric_event_cmp; + metric_events->node_new = metric_event_new; +} + +struct egroup { + struct list_head nd; + int idnum; + const char **ids; + const char *metric_name; + const char *metric_expr; +}; + +static struct perf_evsel *find_evsel(struct perf_evlist *perf_evlist, + const char **ids, + int idnum, + struct perf_evsel **metric_events) +{ + struct perf_evsel *ev, *start = NULL; + int ind = 0; + + evlist__for_each_entry (perf_evlist, ev) { + if (!strcmp(ev->name, ids[ind])) { + metric_events[ind] = ev; + if (ind == 0) + start = ev; + if (++ind == idnum) { + metric_events[ind] = NULL; + return start; + } + } else { + ind = 0; + start = NULL; + } + } + /* + * This can happen when an alias expands to multiple + * events, like for uncore events. + * We don't support this case for now. + */ + return NULL; +} + +static int metricgroup__setup_events(struct list_head *groups, + struct perf_evlist *perf_evlist, + struct rblist *metric_events_list) +{ + struct metric_event *me; + struct metric_expr *expr; + int i = 0; + int ret = 0; + struct egroup *eg; + struct perf_evsel *evsel; + + list_for_each_entry (eg, groups, nd) { + struct perf_evsel **metric_events; + + metric_events = calloc(sizeof(void *), eg->idnum + 1); + if (!metric_events) { + ret = -ENOMEM; + break; + } + evsel = find_evsel(perf_evlist, eg->ids, eg->idnum, + metric_events); + if (!evsel) { + pr_debug("Cannot resolve %s: %s\n", + eg->metric_name, eg->metric_expr); + continue; + } + for (i = 0; i < eg->idnum; i++) + metric_events[i]->collect_stat = true; + me = metricgroup__lookup(metric_events_list, evsel, true); + if (!me) { + ret = -ENOMEM; + break; + } + expr = malloc(sizeof(struct metric_expr)); + if (!expr) { + ret = -ENOMEM; + break; + } + expr->metric_expr = eg->metric_expr; + expr->metric_name = eg->metric_name; + expr->metric_events = metric_events; + list_add(&expr->nd, &me->head); + } + return ret; +} + +static bool match_metric(const char *n, const char *list) +{ + int len; + char *m; + + if (!list) + return false; + if (!strcmp(list, "all")) + return true; + if (!n) + return !strcasecmp(list, "No_group"); + len = strlen(list); + m = strcasestr(n, list); + if (!m) + return false; + if ((m == n || m[-1] == ';' || m[-1] == ' ') && + (m[len] == 0 || m[len] == ';')) + return true; + return false; +} + +static int metricgroup__add_metric(const char *metric, struct strbuf *events, + struct list_head *group_list) +{ + struct pmu_events_map *map = perf_pmu__find_map(); + struct pmu_event *pe; + int ret = -EINVAL; + int i, j; + + strbuf_init(events, 100); + strbuf_addf(events, "%s", ""); + + if (!map) + return 0; + + for (i = 0; ; i++) { + pe = &map->table[i]; + + if (!pe->name && !pe->metric_group && !pe->metric_name) + break; + if (!pe->metric_expr) + continue; + if (match_metric(pe->metric_group, metric) || + match_metric(pe->metric_name, metric)) { + const char **ids; + int idnum; + struct egroup *eg; + + pr_debug("metric expr %s for %s\n", pe->metric_expr, pe->metric_name); + + if (expr__find_other(pe->metric_expr, + NULL, &ids, &idnum) < 0) + continue; + if (events->len > 0) + strbuf_addf(events, ","); + for (j = 0; j < idnum; j++) { + pr_debug("found event %s\n", ids[j]); + strbuf_addf(events, "%s%s", + j == 0 ? "{" : ",", + ids[j]); + } + strbuf_addf(events, "}:W"); + + eg = malloc(sizeof(struct egroup)); + if (!eg) { + ret = -ENOMEM; + break; + } + eg->ids = ids; + eg->idnum = idnum; + eg->metric_name = pe->metric_name; + eg->metric_expr = pe->metric_expr; + list_add_tail(&eg->nd, group_list); + ret = 0; + } + } + return ret; +} + +static int metricgroup__add_metric_list(const char *list, struct strbuf *events, + struct list_head *group_list) +{ + char *llist, *nlist, *p; + int ret = -EINVAL; + + nlist = strdup(list); + if (!nlist) + return -ENOMEM; + llist = nlist; + while ((p = strsep(&llist, ",")) != NULL) { + ret = metricgroup__add_metric(p, events, group_list); + if (ret == -EINVAL) { + fprintf(stderr, "Cannot find metric or group `%s'\n", + p); + break; + } + } + free(nlist); + return ret; +} + +static void metricgroup__free_egroups(struct list_head *group_list) +{ + struct egroup *eg, *egtmp; + int i; + + list_for_each_entry_safe (eg, egtmp, group_list, nd) { + for (i = 0; i < eg->idnum; i++) + free((char *)eg->ids[i]); + free(eg->ids); + free(eg); + } +} + +int metricgroup__parse_groups(const struct option *opt, + const char *str, + struct rblist *metric_events) +{ + struct parse_events_error parse_error; + struct perf_evlist *perf_evlist = *(struct perf_evlist **)opt->value; + struct strbuf extra_events; + LIST_HEAD(group_list); + int ret; + + if (metric_events->nr_entries == 0) + metricgroup__rblist_init(metric_events); + ret = metricgroup__add_metric_list(str, &extra_events, &group_list); + if (ret) + return ret; + pr_debug("adding %s\n", extra_events.buf); + memset(&parse_error, 0, sizeof(struct parse_events_error)); + ret = parse_events(perf_evlist, extra_events.buf, &parse_error); + if (ret) { + pr_err("Cannot set up events %s\n", extra_events.buf); + goto out; + } + strbuf_release(&extra_events); + ret = metricgroup__setup_events(&group_list, perf_evlist, + metric_events); +out: + metricgroup__free_egroups(&group_list); + return ret; +} diff --git a/tools/perf/util/metricgroup.h b/tools/perf/util/metricgroup.h new file mode 100644 index 000000000000..06854e125ee7 --- /dev/null +++ b/tools/perf/util/metricgroup.h @@ -0,0 +1,31 @@ +#ifndef METRICGROUP_H +#define METRICGROUP_H 1 + +#include "linux/list.h" +#include "rblist.h" +#include +#include "evlist.h" +#include "strbuf.h" + +struct metric_event { + struct rb_node nd; + struct perf_evsel *evsel; + struct list_head head; /* list of metric_expr */ +}; + +struct metric_expr { + struct list_head nd; + const char *metric_expr; + const char *metric_name; + struct perf_evsel **metric_events; +}; + +struct metric_event *metricgroup__lookup(struct rblist *metric_events, + struct perf_evsel *evsel, + bool create); +int metricgroup__parse_groups(const struct option *opt, + const char *str, + struct rblist *metric_events); + +void metricgroup__print(bool metrics, bool groups, char *filter, bool raw); +#endif diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index ed25d7f88731..7070638ab600 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -580,8 +580,11 @@ static void pmu_add_cpu_aliases(struct list_head *head, const char *name) const char *pname; pe = &map->table[i++]; - if (!pe->name) + if (!pe->name) { + if (pe->metric_group || pe->metric_name) + continue; break; + } pname = pe->pmu ? pe->pmu : "cpu"; if (strncmp(pname, name, strlen(pname))) diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c index 8c7ab29169b9..42e6c17be7ff 100644 --- a/tools/perf/util/stat-shadow.c +++ b/tools/perf/util/stat-shadow.c @@ -6,6 +6,7 @@ #include "rblist.h" #include "evlist.h" #include "expr.h" +#include "metricgroup.h" enum { CTX_BIT_USER = 1 << 0, @@ -671,13 +672,16 @@ static void generic_metric(const char *metric_expr, void perf_stat__print_shadow_stats(struct perf_evsel *evsel, double avg, int cpu, - struct perf_stat_output_ctx *out) + struct perf_stat_output_ctx *out, + struct rblist *metric_events) { void *ctxp = out->ctx; print_metric_t print_metric = out->print_metric; double total, ratio = 0.0, total2; const char *color = NULL; int ctx = evsel_context(evsel); + struct metric_event *me; + int num = 1; if (perf_evsel__match(evsel, HARDWARE, HW_INSTRUCTIONS)) { total = avg_stats(&runtime_cycles_stats[ctx][cpu]); @@ -880,6 +884,20 @@ void perf_stat__print_shadow_stats(struct perf_evsel *evsel, } else if (perf_stat_evsel__is(evsel, SMI_NUM)) { print_smi_cost(cpu, evsel, out); } else { - print_metric(ctxp, NULL, NULL, NULL, 0); + num = 0; } + + if ((me = metricgroup__lookup(metric_events, evsel, false)) != NULL) { + struct metric_expr *mexp; + + list_for_each_entry (mexp, &me->head, nd) { + if (num++ > 0) + out->new_line(ctxp); + generic_metric(mexp->metric_expr, mexp->metric_events, + evsel->name, mexp->metric_name, + avg, cpu, ctx, out); + } + } + if (num == 0) + print_metric(ctxp, NULL, NULL, NULL, 0); } diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h index eacaf958e19d..47915df346fb 100644 --- a/tools/perf/util/stat.h +++ b/tools/perf/util/stat.h @@ -91,9 +91,11 @@ struct perf_stat_output_ctx { bool force_header; }; +struct rblist; void perf_stat__print_shadow_stats(struct perf_evsel *evsel, double avg, int cpu, - struct perf_stat_output_ctx *out); + struct perf_stat_output_ctx *out, + struct rblist *metric_events); void perf_stat__collect_metric_expr(struct perf_evlist *); int perf_evlist__alloc_stats(struct perf_evlist *evlist, bool alloc_raw); -- cgit v1.2.3 From 71b0acce78d12e99eeda6fd6642ba89cc2b2b49c Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Thu, 31 Aug 2017 12:40:32 -0700 Subject: perf list: Add metric groups to perf list Add code to perf list to print metric groups, and metrics that don't have an event name. The metricgroup code collects the eventgroups and events into a rblist, and then prints them according to the configured filters. The metricgroups are printed by default, but can be limited by perf list metric or perf list metricgroup % perf list metricgroup .. Metric Groups: DSB: DSB_Coverage [Fraction of Uops delivered by the DSB (aka Decoded Icache; or Uop Cache)] FLOPS: GFLOPs [Giga Floating Point Operations Per Second] Frontend: IFetch_Line_Utilization [Rough Estimation of fraction of fetched lines bytes that were likely consumed by program instructions] Frontend_Bandwidth: DSB_Coverage [Fraction of Uops delivered by the DSB (aka Decoded Icache; or Uop Cache)] Memory_BW: MLP [Memory-Level-Parallelism (average number of L1 miss demand load when there is at least 1 such miss)] v2: Check return value of asprintf to fix warning on FC26 Fix key in lookup/addition for the groups list Signed-off-by: Andi Kleen Acked-by: Jiri Olsa Link: http://lkml.kernel.org/r/20170831194036.30146-8-andi@firstfloor.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-list.txt | 7 +- tools/perf/builtin-list.c | 7 ++ tools/perf/util/metricgroup.c | 176 +++++++++++++++++++++++++++++++++ tools/perf/util/parse-events.c | 3 + 4 files changed, 192 insertions(+), 1 deletion(-) (limited to 'tools/perf') diff --git a/tools/perf/Documentation/perf-list.txt b/tools/perf/Documentation/perf-list.txt index 75fc17f47298..24679aed90b7 100644 --- a/tools/perf/Documentation/perf-list.txt +++ b/tools/perf/Documentation/perf-list.txt @@ -8,7 +8,8 @@ perf-list - List all symbolic event types SYNOPSIS -------- [verse] -'perf list' [--no-desc] [--long-desc] [hw|sw|cache|tracepoint|pmu|sdt|event_glob] +'perf list' [--no-desc] [--long-desc] + [hw|sw|cache|tracepoint|pmu|sdt|metric|metricgroup|event_glob] DESCRIPTION ----------- @@ -248,6 +249,10 @@ To limit the list use: . 'sdt' to list all Statically Defined Tracepoint events. +. 'metric' to list metrics + +. 'metricgroup' to list metricgroups with metrics. + . If none of the above is matched, it will apply the supplied glob to all events, printing the ones that match. diff --git a/tools/perf/builtin-list.c b/tools/perf/builtin-list.c index 4bf2cb4d25aa..b2d2ad3dd478 100644 --- a/tools/perf/builtin-list.c +++ b/tools/perf/builtin-list.c @@ -15,6 +15,7 @@ #include "util/cache.h" #include "util/pmu.h" #include "util/debug.h" +#include "util/metricgroup.h" #include static bool desc_flag = true; @@ -79,6 +80,10 @@ int cmd_list(int argc, const char **argv) long_desc_flag, details_flag); else if (strcmp(argv[i], "sdt") == 0) print_sdt_events(NULL, NULL, raw_dump); + else if (strcmp(argv[i], "metric") == 0) + metricgroup__print(true, false, NULL, raw_dump); + else if (strcmp(argv[i], "metricgroup") == 0) + metricgroup__print(false, true, NULL, raw_dump); else if ((sep = strchr(argv[i], ':')) != NULL) { int sep_idx; @@ -96,6 +101,7 @@ int cmd_list(int argc, const char **argv) s[sep_idx] = '\0'; print_tracepoint_events(s, s + sep_idx + 1, raw_dump); print_sdt_events(s, s + sep_idx + 1, raw_dump); + metricgroup__print(true, true, s, raw_dump); free(s); } else { if (asprintf(&s, "*%s*", argv[i]) < 0) { @@ -112,6 +118,7 @@ int cmd_list(int argc, const char **argv) details_flag); print_tracepoint_events(NULL, s, raw_dump); print_sdt_events(NULL, s, raw_dump); + metricgroup__print(true, true, NULL, raw_dump); free(s); } } diff --git a/tools/perf/util/metricgroup.c b/tools/perf/util/metricgroup.c index 7516b1746594..2d60114f1870 100644 --- a/tools/perf/util/metricgroup.c +++ b/tools/perf/util/metricgroup.c @@ -189,6 +189,182 @@ static bool match_metric(const char *n, const char *list) return false; } +struct mep { + struct rb_node nd; + const char *name; + struct strlist *metrics; +}; + +static int mep_cmp(struct rb_node *rb_node, const void *entry) +{ + struct mep *a = container_of(rb_node, struct mep, nd); + struct mep *b = (struct mep *)entry; + + return strcmp(a->name, b->name); +} + +static struct rb_node *mep_new(struct rblist *rl __maybe_unused, + const void *entry) +{ + struct mep *me = malloc(sizeof(struct mep)); + + if (!me) + return NULL; + memcpy(me, entry, sizeof(struct mep)); + me->name = strdup(me->name); + if (!me->name) + goto out_me; + me->metrics = strlist__new(NULL, NULL); + if (!me->metrics) + goto out_name; + return &me->nd; +out_name: + free((char *)me->name); +out_me: + free(me); + return NULL; +} + +static struct mep *mep_lookup(struct rblist *groups, const char *name) +{ + struct rb_node *nd; + struct mep me = { + .name = name + }; + nd = rblist__find(groups, &me); + if (nd) + return container_of(nd, struct mep, nd); + rblist__add_node(groups, &me); + nd = rblist__find(groups, &me); + if (nd) + return container_of(nd, struct mep, nd); + return NULL; +} + +static void mep_delete(struct rblist *rl __maybe_unused, + struct rb_node *nd) +{ + struct mep *me = container_of(nd, struct mep, nd); + + strlist__delete(me->metrics); + free((void *)me->name); + free(me); +} + +static void metricgroup__print_strlist(struct strlist *metrics, bool raw) +{ + struct str_node *sn; + int n = 0; + + strlist__for_each_entry (sn, metrics) { + if (raw) + printf("%s%s", n > 0 ? " " : "", sn->s); + else + printf(" %s\n", sn->s); + n++; + } + if (raw) + putchar('\n'); +} + +void metricgroup__print(bool metrics, bool metricgroups, char *filter, + bool raw) +{ + struct pmu_events_map *map = perf_pmu__find_map(); + struct pmu_event *pe; + int i; + struct rblist groups; + struct rb_node *node, *next; + struct strlist *metriclist = NULL; + + if (!map) + return; + + if (!metricgroups) { + metriclist = strlist__new(NULL, NULL); + if (!metriclist) + return; + } + + rblist__init(&groups); + groups.node_new = mep_new; + groups.node_cmp = mep_cmp; + groups.node_delete = mep_delete; + for (i = 0; ; i++) { + const char *g; + pe = &map->table[i]; + + if (!pe->name && !pe->metric_group && !pe->metric_name) + break; + if (!pe->metric_expr) + continue; + g = pe->metric_group; + if (!g && pe->metric_name) { + if (pe->name) + continue; + g = "No_group"; + } + if (g) { + char *omg; + char *mg = strdup(g); + + if (!mg) + return; + omg = mg; + while ((g = strsep(&mg, ";")) != NULL) { + struct mep *me; + char *s; + + if (*g == 0) + g = "No_group"; + while (isspace(*g)) + g++; + if (filter && !strstr(g, filter)) + continue; + if (raw) + s = (char *)pe->metric_name; + else { + if (asprintf(&s, "%s\n\t[%s]", + pe->metric_name, pe->desc) < 0) + return; + } + + if (!s) + continue; + + if (!metricgroups) { + strlist__add(metriclist, s); + } else { + me = mep_lookup(&groups, g); + if (!me) + continue; + strlist__add(me->metrics, s); + } + } + free(omg); + } + } + + if (metricgroups && !raw) + printf("\nMetric Groups:\n\n"); + else if (metrics && !raw) + printf("\nMetrics:\n\n"); + + for (node = rb_first(&groups.entries); node; node = next) { + struct mep *me = container_of(node, struct mep, nd); + + if (metricgroups) + printf("%s%s%s", me->name, metrics ? ":" : "", raw ? " " : "\n"); + if (metrics) + metricgroup__print_strlist(me->metrics, raw); + next = rb_next(node); + rblist__remove_node(&groups, node); + } + if (!metricgroups) + metricgroup__print_strlist(metriclist, raw); + strlist__delete(metriclist); +} + static int metricgroup__add_metric(const char *metric, struct strbuf *events, struct list_head *group_list) { diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 57d7acf890e0..75588920fccc 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -28,6 +28,7 @@ #include "probe-file.h" #include "asm/bug.h" #include "util/parse-branch-options.h" +#include "metricgroup.h" #define MAX_NAME_LEN 100 @@ -2380,6 +2381,8 @@ void print_events(const char *event_glob, bool name_only, bool quiet_flag, print_tracepoint_events(NULL, NULL, name_only); print_sdt_events(NULL, NULL, name_only); + + metricgroup__print(true, true, NULL, name_only); } int parse_events__is_hardcoded_term(struct parse_events_term *term) -- cgit v1.2.3 From 4e1a096380e3b558ef021afc08e193ce5d1be478 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Thu, 31 Aug 2017 12:40:33 -0700 Subject: perf stat: Don't use ctx for saved values lookup We don't need to use ctx to look up events for saved values. The context is already part of the evsel pointer, which is the primary key. Signed-off-by: Andi Kleen Acked-by: Jiri Olsa Link: http://lkml.kernel.org/r/20170831194036.30146-9-andi@firstfloor.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/stat-shadow.c | 16 +++++----------- 1 file changed, 5 insertions(+), 11 deletions(-) (limited to 'tools/perf') diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c index 42e6c17be7ff..664f49a9b012 100644 --- a/tools/perf/util/stat-shadow.c +++ b/tools/perf/util/stat-shadow.c @@ -56,7 +56,6 @@ struct saved_value { struct rb_node rb_node; struct perf_evsel *evsel; int cpu; - int ctx; struct stats stats; }; @@ -67,8 +66,6 @@ static int saved_value_cmp(struct rb_node *rb_node, const void *entry) rb_node); const struct saved_value *b = entry; - if (a->ctx != b->ctx) - return a->ctx - b->ctx; if (a->cpu != b->cpu) return a->cpu - b->cpu; if (a->evsel == b->evsel) @@ -90,13 +87,12 @@ static struct rb_node *saved_value_new(struct rblist *rblist __maybe_unused, } static struct saved_value *saved_value_lookup(struct perf_evsel *evsel, - int cpu, int ctx, + int cpu, bool create) { struct rb_node *nd; struct saved_value dm = { .cpu = cpu, - .ctx = ctx, .evsel = evsel, }; nd = rblist__find(&runtime_saved_values, &dm); @@ -232,8 +228,7 @@ void perf_stat__update_shadow_stats(struct perf_evsel *counter, u64 *count, update_stats(&runtime_aperf_stats[ctx][cpu], count[0]); if (counter->collect_stat) { - struct saved_value *v = saved_value_lookup(counter, cpu, ctx, - true); + struct saved_value *v = saved_value_lookup(counter, cpu, true); update_stats(&v->stats, count[0]); } } @@ -634,7 +629,6 @@ static void generic_metric(const char *metric_expr, const char *metric_name, double avg, int cpu, - int ctx, struct perf_stat_output_ctx *out) { print_metric_t print_metric = out->print_metric; @@ -648,7 +642,7 @@ static void generic_metric(const char *metric_expr, for (i = 0; metric_events[i]; i++) { struct saved_value *v; - v = saved_value_lookup(metric_events[i], cpu, ctx, false); + v = saved_value_lookup(metric_events[i], cpu, false); if (!v) break; expr__add_id(&pctx, metric_events[i]->name, avg_stats(&v->stats)); @@ -866,7 +860,7 @@ void perf_stat__print_shadow_stats(struct perf_evsel *evsel, print_metric(ctxp, NULL, NULL, name, 0); } else if (evsel->metric_expr) { generic_metric(evsel->metric_expr, evsel->metric_events, evsel->name, - evsel->metric_name, avg, cpu, ctx, out); + evsel->metric_name, avg, cpu, out); } else if (runtime_nsecs_stats[cpu].n != 0) { char unit = 'M'; char unit_buf[10]; @@ -895,7 +889,7 @@ void perf_stat__print_shadow_stats(struct perf_evsel *evsel, out->new_line(ctxp); generic_metric(mexp->metric_expr, mexp->metric_events, evsel->name, mexp->metric_name, - avg, cpu, ctx, out); + avg, cpu, out); } } if (num == 0) -- cgit v1.2.3 From fd48aad9b0f3f7654433dfae3a72ceda36e2de28 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Thu, 31 Aug 2017 12:40:34 -0700 Subject: perf stat: Support duration_time for metrics Some of the metrics formulas (like GFLOPs) need to know how long the measurement period is. Support an internal event called duration_time, which reports time in second. It maps to the dummy event, but is special cased for statistics to report the walltime duration. So far it is not printed, but only used internally for metrics. Signed-off-by: Andi Kleen Acked-by: Jiri Olsa Link: http://lkml.kernel.org/r/20170831194036.30146-10-andi@firstfloor.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/parse-events.l | 1 + tools/perf/util/stat-shadow.c | 17 +++++++++++++---- 2 files changed, 14 insertions(+), 4 deletions(-) (limited to 'tools/perf') diff --git a/tools/perf/util/parse-events.l b/tools/perf/util/parse-events.l index fdb5bb52f01f..ea2426daf7e8 100644 --- a/tools/perf/util/parse-events.l +++ b/tools/perf/util/parse-events.l @@ -288,6 +288,7 @@ cpu-migrations|migrations { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COU alignment-faults { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_ALIGNMENT_FAULTS); } emulation-faults { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_EMULATION_FAULTS); } dummy { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_DUMMY); } +duration_time { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_DUMMY); } bpf-output { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_BPF_OUTPUT); } /* diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c index 664f49a9b012..a2c12d1ef32a 100644 --- a/tools/perf/util/stat-shadow.c +++ b/tools/perf/util/stat-shadow.c @@ -641,11 +641,20 @@ static void generic_metric(const char *metric_expr, expr__add_id(&pctx, name, avg); for (i = 0; metric_events[i]; i++) { struct saved_value *v; + struct stats *stats; + double scale; - v = saved_value_lookup(metric_events[i], cpu, false); - if (!v) - break; - expr__add_id(&pctx, metric_events[i]->name, avg_stats(&v->stats)); + if (!strcmp(metric_events[i]->name, "duration_time")) { + stats = &walltime_nsecs_stats; + scale = 1e-9; + } else { + v = saved_value_lookup(metric_events[i], cpu, false); + if (!v) + break; + stats = &v->stats; + scale = 1.0; + } + expr__add_id(&pctx, metric_events[i]->name, avg_stats(stats)*scale); } if (!metric_events[i]) { const char *p = metric_expr; -- cgit v1.2.3 From e864c5ca145e49bfce4847bd14b47b5f8549b2b1 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Thu, 31 Aug 2017 12:40:35 -0700 Subject: perf stat: Hide internal duration_time counter Some perf stat metrics use an internal "duration_time" metric. It is not correctly printed however. So hide it during output to avoid confusing users with 0 counts. Signed-off-by: Andi Kleen Acked-by: Jiri Olsa Link: http://lkml.kernel.org/r/20170831194036.30146-11-andi@firstfloor.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-stat.c | 27 ++++++++++++++++++++++++--- 1 file changed, 24 insertions(+), 3 deletions(-) (limited to 'tools/perf') diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 874bc6dd8d60..855890e0b70b 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -195,6 +195,11 @@ static struct perf_stat_config stat_config = { .scale = true, }; +static bool is_duration_time(struct perf_evsel *evsel) +{ + return !strcmp(evsel->name, "duration_time"); +} + static inline void diff_timespec(struct timespec *r, struct timespec *a, struct timespec *b) { @@ -1363,6 +1368,9 @@ static void print_aggr(char *prefix) ad.id = id = aggr_map->map[s]; first = true; evlist__for_each_entry(evsel_list, counter) { + if (is_duration_time(counter)) + continue; + ad.val = ad.ena = ad.run = 0; ad.nr = 0; if (!collect_data(counter, aggr_cb, &ad)) @@ -1506,6 +1514,8 @@ static void print_no_aggr_metric(char *prefix) if (prefix) fputs(prefix, stat_config.output); evlist__for_each_entry(evsel_list, counter) { + if (is_duration_time(counter)) + continue; if (first) { aggr_printout(counter, cpu, 0); first = false; @@ -1560,6 +1570,8 @@ static void print_metric_headers(const char *prefix, bool no_indent) /* Print metrics headers only */ evlist__for_each_entry(evsel_list, counter) { + if (is_duration_time(counter)) + continue; os.evsel = counter; out.ctx = &os; out.print_metric = print_metric_header; @@ -1707,12 +1719,18 @@ static void print_counters(struct timespec *ts, int argc, const char **argv) print_aggr(prefix); break; case AGGR_THREAD: - evlist__for_each_entry(evsel_list, counter) + evlist__for_each_entry(evsel_list, counter) { + if (is_duration_time(counter)) + continue; print_aggr_thread(counter, prefix); + } break; case AGGR_GLOBAL: - evlist__for_each_entry(evsel_list, counter) + evlist__for_each_entry(evsel_list, counter) { + if (is_duration_time(counter)) + continue; print_counter_aggr(counter, prefix); + } if (metric_only) fputc('\n', stat_config.output); break; @@ -1720,8 +1738,11 @@ static void print_counters(struct timespec *ts, int argc, const char **argv) if (metric_only) print_no_aggr_metric(prefix); else { - evlist__for_each_entry(evsel_list, counter) + evlist__for_each_entry(evsel_list, counter) { + if (is_duration_time(counter)) + continue; print_counter(counter, prefix); + } } break; case AGGR_UNSET: -- cgit v1.2.3 From b90f1333ef08d2a497ae239798868b046f4e3a97 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Thu, 31 Aug 2017 12:40:36 -0700 Subject: perf stat: Update walltime_nsecs_stats in interval mode Some metrics (like GFLOPs) need walltime_nsecs_stats for each interval. Compute it for each interval instead of only at the end. Pointed out by Jiri. Signed-off-by: Andi Kleen Acked-by: Jiri Olsa Link: http://lkml.kernel.org/r/20170831194036.30146-12-andi@firstfloor.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-stat.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'tools/perf') diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 855890e0b70b..88f1d5fbdb48 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -415,6 +415,8 @@ static void process_interval(void) pr_err("failed to write stat round event\n"); } + init_stats(&walltime_nsecs_stats); + update_stats(&walltime_nsecs_stats, stat_config.interval * 1000000); print_counters(&rs, 0, NULL); } -- cgit v1.2.3 From 84c417422798c897f637b0249f64a52807b4a61b Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Tue, 5 Sep 2017 10:00:28 -0700 Subject: perf record: Support direct --user-regs arguments USER_REGS can currently only collected implicitely with call graph recording. Sometimes it is useful to see them separately, and filter them. Add a new --user-regs option to record that is similar to --intr-regs, but acts on user regs. Signed-off-by: Andi Kleen Cc: Jiri Olsa Link: http://lkml.kernel.org/r/20170905170029.19722-1-andi@firstfloor.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-record.txt | 2 ++ tools/perf/builtin-record.c | 3 +++ tools/perf/perf.h | 1 + tools/perf/util/evsel.c | 7 ++++++- 4 files changed, 12 insertions(+), 1 deletion(-) (limited to 'tools/perf') diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt index e397453e5a46..68a1ffb0a8a5 100644 --- a/tools/perf/Documentation/perf-record.txt +++ b/tools/perf/Documentation/perf-record.txt @@ -377,6 +377,8 @@ symbolic names, e.g. on x86, ax, si. To list the available registers use --intr-regs=\?. To name registers, pass a comma separated list such as --intr-regs=ax,bx. The list of register is architecture dependent. +--user-regs:: +Capture user registers at sample time. Same arguments as -I. --running-time:: Record running and enabled time for read events (:S) diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 56f8142ff97f..9b379f3a3d99 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -1643,6 +1643,9 @@ static struct option __record_options[] = { OPT_CALLBACK_OPTARG('I', "intr-regs", &record.opts.sample_intr_regs, NULL, "any register", "sample selected machine registers on interrupt," " use -I ? to list register names", parse_regs), + OPT_CALLBACK_OPTARG(0, "user-regs", &record.opts.sample_user_regs, NULL, "any register", + "sample selected machine registers on interrupt," + " use -I ? to list register names", parse_regs), OPT_BOOLEAN(0, "running-time", &record.opts.running_time, "Record running/enabled time of read (:S) events"), OPT_CALLBACK('k', "clockid", &record.opts, diff --git a/tools/perf/perf.h b/tools/perf/perf.h index dc442ba21bf6..fbb0a9cd0ac6 100644 --- a/tools/perf/perf.h +++ b/tools/perf/perf.h @@ -65,6 +65,7 @@ struct record_opts { unsigned int user_freq; u64 branch_stack; u64 sample_intr_regs; + u64 sample_user_regs; u64 default_interval; u64 user_interval; size_t auxtrace_snapshot_size; diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 4bb89373eb52..7389746c0dc4 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -678,7 +678,7 @@ void perf_evsel__config_callchain(struct perf_evsel *evsel, if (!function) { perf_evsel__set_sample_bit(evsel, REGS_USER); perf_evsel__set_sample_bit(evsel, STACK_USER); - attr->sample_regs_user = PERF_REGS_MASK; + attr->sample_regs_user |= PERF_REGS_MASK; attr->sample_stack_user = param->dump_size; attr->exclude_callchain_user = 1; } else { @@ -931,6 +931,11 @@ void perf_evsel__config(struct perf_evsel *evsel, struct record_opts *opts, perf_evsel__set_sample_bit(evsel, REGS_INTR); } + if (opts->sample_user_regs) { + attr->sample_regs_user |= opts->sample_user_regs; + perf_evsel__set_sample_bit(evsel, REGS_USER); + } + if (target__has_cpu(&opts->target) || opts->sample_cpu) perf_evsel__set_sample_bit(evsel, CPU); -- cgit v1.2.3 From b1491ace8eb2e92677cd9ee966763f8f53d29d16 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Tue, 5 Sep 2017 11:40:57 -0700 Subject: perf script: Support user regs Teach perf script to print user regs. % perf record --user-regs=ip,sp ... % perf script -F ip,sym,uregs ... ffffffff9e060c24 native_write_msr ABI:2 SP:0x7ffd0ea06c38 IP:0x7fe77f55b637 ffffffff9e060c24 native_write_msr ABI:2 SP:0x7ffd0ea06c38 IP:0x7fe77f55b637 ffffffff9e060c24 native_write_msr ABI:2 SP:0x7ffd0ea06c38 IP:0x7fe77f55b637 ffffffff9e060c24 native_write_msr ABI:2 SP:0x7ffd0ea06c38 IP:0x7fe77f55b637 ffffffff9e00cc12 intel_pmu_handle_irq ABI:2 SP:0x7ffd0ea06c38 IP:0x7fe77f55b637 v2: Rebased on top of phys-addr patches Signed-off-by: Andi Kleen Link: http://lkml.kernel.org/r/20170905184057.26135-1-andi@firstfloor.org [ Use PRIu64 for regs->abi in print_sample_uregs() ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-script.txt | 4 ++-- tools/perf/builtin-script.c | 30 +++++++++++++++++++++++++++++- 2 files changed, 31 insertions(+), 3 deletions(-) (limited to 'tools/perf') diff --git a/tools/perf/Documentation/perf-script.txt b/tools/perf/Documentation/perf-script.txt index 18dfcfa38454..bcc1ba35a2d8 100644 --- a/tools/perf/Documentation/perf-script.txt +++ b/tools/perf/Documentation/perf-script.txt @@ -116,8 +116,8 @@ OPTIONS --fields:: Comma separated list of fields to print. Options are: comm, tid, pid, time, cpu, event, trace, ip, sym, dso, addr, symoff, - srcline, period, iregs, brstack, brstacksym, flags, bpf-output, brstackinsn, brstackoff, - callindent, insn, insnlen, synth, phys_addr. + srcline, period, iregs, uregs, brstack, brstacksym, flags, bpf-output, brstackinsn, + brstackoff, callindent, insn, insnlen, synth, phys_addr. Field list can be prepended with the type, trace, sw or hw, to indicate to which event type the field list applies. e.g., -F sw:comm,tid,time,ip,sym and -F trace:time,cpu,trace diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index 3d4c3b5e1868..725dbd3dd104 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -88,6 +88,7 @@ enum perf_output_field { PERF_OUTPUT_BRSTACKOFF = 1U << 24, PERF_OUTPUT_SYNTH = 1U << 25, PERF_OUTPUT_PHYS_ADDR = 1U << 26, + PERF_OUTPUT_UREGS = 1U << 27, }; struct output_option { @@ -109,6 +110,7 @@ struct output_option { {.str = "srcline", .field = PERF_OUTPUT_SRCLINE}, {.str = "period", .field = PERF_OUTPUT_PERIOD}, {.str = "iregs", .field = PERF_OUTPUT_IREGS}, + {.str = "uregs", .field = PERF_OUTPUT_UREGS}, {.str = "brstack", .field = PERF_OUTPUT_BRSTACK}, {.str = "brstacksym", .field = PERF_OUTPUT_BRSTACKSYM}, {.str = "data_src", .field = PERF_OUTPUT_DATA_SRC}, @@ -385,6 +387,11 @@ static int perf_evsel__check_attr(struct perf_evsel *evsel, PERF_OUTPUT_IREGS)) return -EINVAL; + if (PRINT_FIELD(UREGS) && + perf_evsel__check_stype(evsel, PERF_SAMPLE_REGS_USER, "UREGS", + PERF_OUTPUT_UREGS)) + return -EINVAL; + if (PRINT_FIELD(PHYS_ADDR) && perf_evsel__check_stype(evsel, PERF_SAMPLE_PHYS_ADDR, "PHYS_ADDR", PERF_OUTPUT_PHYS_ADDR)) @@ -509,6 +516,24 @@ static void print_sample_iregs(struct perf_sample *sample, } } +static void print_sample_uregs(struct perf_sample *sample, + struct perf_event_attr *attr) +{ + struct regs_dump *regs = &sample->user_regs; + uint64_t mask = attr->sample_regs_user; + unsigned i = 0, r; + + if (!regs || !regs->regs) + return; + + printf(" ABI:%" PRIu64 " ", regs->abi); + + for_each_set_bit(r, (unsigned long *) &mask, sizeof(mask) * 8) { + u64 val = regs->regs[i++]; + printf("%5s:0x%"PRIx64" ", perf_reg_name(r), val); + } +} + static void print_sample_start(struct perf_sample *sample, struct thread *thread, struct perf_evsel *evsel) @@ -1444,6 +1469,9 @@ static void process_event(struct perf_script *script, if (PRINT_FIELD(IREGS)) print_sample_iregs(sample, attr); + if (PRINT_FIELD(UREGS)) + print_sample_uregs(sample, attr); + if (PRINT_FIELD(BRSTACK)) print_sample_brstack(sample, thread, attr); else if (PRINT_FIELD(BRSTACKSYM)) @@ -2739,7 +2767,7 @@ int cmd_script(int argc, const char **argv) "+field to add and -field to remove." "Valid types: hw,sw,trace,raw,synth. " "Fields: comm,tid,pid,time,cpu,event,trace,ip,sym,dso," - "addr,symoff,period,iregs,brstack,brstacksym,flags," + "addr,symoff,period,iregs,uregs,brstack,brstacksym,flags," "bpf-output,callindent,insn,insnlen,brstackinsn,synth,phys_addr", parse_output_fields), OPT_BOOLEAN('a', "all-cpus", &system_wide, -- cgit v1.2.3 From 80f873557112fc163f011cd131d4cfe4959100a6 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Fri, 8 Sep 2017 10:46:21 +0200 Subject: perf tools: Add python-clean target To be able to cleanup only python related binaries. Signed-off-by: Jiri Olsa Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20170908084621.31595-3-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Makefile.perf | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'tools/perf') diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index 91ef44bfaf3e..1df93b4c4648 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -173,7 +173,7 @@ AWK = awk # non-config cases config := 1 -NON_CONFIG_TARGETS := clean TAGS tags cscope help install-doc install-man install-html install-info install-pdf doc man html info pdf +NON_CONFIG_TARGETS := clean python-clean TAGS tags cscope help install-doc install-man install-html install-info install-pdf doc man html info pdf ifdef MAKECMDGOALS ifeq ($(filter-out $(NON_CONFIG_TARGETS),$(MAKECMDGOALS)),) @@ -802,7 +802,10 @@ config-clean: $(call QUIET_CLEAN, config) $(Q)$(MAKE) -C $(srctree)/tools/build/feature/ $(if $(OUTPUT),OUTPUT=$(OUTPUT)feature/,) clean >/dev/null -clean:: $(LIBTRACEEVENT)-clean $(LIBAPI)-clean $(LIBBPF)-clean $(LIBSUBCMD)-clean config-clean fixdep-clean +python-clean: + $(python-clean) + +clean:: $(LIBTRACEEVENT)-clean $(LIBAPI)-clean $(LIBBPF)-clean $(LIBSUBCMD)-clean config-clean fixdep-clean python-clean $(call QUIET_CLEAN, core-objs) $(RM) $(LIB_FILE) $(OUTPUT)perf-archive $(OUTPUT)perf-with-kcore $(LANG_BINDINGS) $(Q)find $(if $(OUTPUT),$(OUTPUT),.) -name '*.o' -delete -o -name '\.*.cmd' -delete -o -name '\.*.d' -delete $(Q)$(RM) $(OUTPUT).config-detected @@ -819,7 +822,6 @@ clean:: $(LIBTRACEEVENT)-clean $(LIBAPI)-clean $(LIBBPF)-clean $(LIBSUBCMD)-clea $(OUTPUT)$(vhost_virtio_ioctl_array) \ $(OUTPUT)$(perf_ioctl_array) $(QUIET_SUBDIR0)Documentation $(QUIET_SUBDIR1) clean - $(python-clean) # # To provide FEATURE-DUMP into $(FEATURE_DUMP_COPY) -- cgit v1.2.3 From 25cc4eb44b0c840eff0e5a46a85b9ccbde77401b Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Fri, 8 Sep 2017 14:05:09 +0200 Subject: perf ui progress: Add ui specific init function Adding ui specific init function allowing to setup the progress bar width based on current screen scales. Adding TUI init function to get more grained update of the progress bar. Signed-off-by: Jiri Olsa Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20170908120510.22515-4-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/ui/progress.c | 2 ++ tools/perf/ui/progress.h | 1 + tools/perf/ui/tui/progress.c | 9 +++++++-- 3 files changed, 10 insertions(+), 2 deletions(-) (limited to 'tools/perf') diff --git a/tools/perf/ui/progress.c b/tools/perf/ui/progress.c index ae91c8148edf..3e2b5d64c55e 100644 --- a/tools/perf/ui/progress.c +++ b/tools/perf/ui/progress.c @@ -34,6 +34,8 @@ void ui_progress__init(struct ui_progress *p, u64 total, const char *title) p->total = total; p->title = title; + if (ui_progress__ops->init) + ui_progress__ops->init(p); } void ui_progress__finish(void) diff --git a/tools/perf/ui/progress.h b/tools/perf/ui/progress.h index 717d39d3052b..e5f434a2070b 100644 --- a/tools/perf/ui/progress.h +++ b/tools/perf/ui/progress.h @@ -14,6 +14,7 @@ void ui_progress__init(struct ui_progress *p, u64 total, const char *title); void ui_progress__update(struct ui_progress *p, u64 adv); struct ui_progress_ops { + void (*init)(struct ui_progress *p); void (*update)(struct ui_progress *p); void (*finish)(void); }; diff --git a/tools/perf/ui/tui/progress.c b/tools/perf/ui/tui/progress.c index c4b99008e2c9..f6b8f52aad7e 100644 --- a/tools/perf/ui/tui/progress.c +++ b/tools/perf/ui/tui/progress.c @@ -5,6 +5,11 @@ #include "tui.h" #include "../browser.h" +static void __tui_progress__init(struct ui_progress *p) +{ + p->next = p->step = p->total / (SLtt_Screen_Cols - 2) ?: 1; +} + static void tui_progress__update(struct ui_progress *p) { int bar, y; @@ -49,8 +54,8 @@ static void tui_progress__finish(void) pthread_mutex_unlock(&ui__lock); } -static struct ui_progress_ops tui_progress__ops = -{ +static struct ui_progress_ops tui_progress__ops = { + .init = __tui_progress__init, .update = tui_progress__update, .finish = tui_progress__finish, }; -- cgit v1.2.3 From 8233822f403b67bbaa1d58e8fa6b8f821fe7626d Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Fri, 8 Sep 2017 14:05:10 +0200 Subject: perf ui progress: Add size info into progress bar Adding the size values '[current/total]' into progress bar, to show more detailed progress of data reading. Adding new ui_progress__init_size function to specify we want to display the size. Signed-off-by: Jiri Olsa Tested-by: Arnaldo Carvalho de Melo Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20170908120510.22515-5-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/ui/progress.c | 4 +++- tools/perf/ui/progress.h | 11 ++++++++++- tools/perf/ui/tui/progress.c | 23 ++++++++++++++++++++++- tools/perf/util/session.c | 2 +- 4 files changed, 36 insertions(+), 4 deletions(-) (limited to 'tools/perf') diff --git a/tools/perf/ui/progress.c b/tools/perf/ui/progress.c index 3e2b5d64c55e..7ade387d511c 100644 --- a/tools/perf/ui/progress.c +++ b/tools/perf/ui/progress.c @@ -27,12 +27,14 @@ void ui_progress__update(struct ui_progress *p, u64 adv) } } -void ui_progress__init(struct ui_progress *p, u64 total, const char *title) +void __ui_progress__init(struct ui_progress *p, u64 total, + const char *title, bool size) { p->curr = 0; p->next = p->step = total / 16 ?: 1; p->total = total; p->title = title; + p->size = size; if (ui_progress__ops->init) ui_progress__ops->init(p); diff --git a/tools/perf/ui/progress.h b/tools/perf/ui/progress.h index e5f434a2070b..fbaa1507ebfe 100644 --- a/tools/perf/ui/progress.h +++ b/tools/perf/ui/progress.h @@ -8,9 +8,18 @@ void ui_progress__finish(void); struct ui_progress { const char *title; u64 curr, next, step, total; + bool size; }; -void ui_progress__init(struct ui_progress *p, u64 total, const char *title); +void __ui_progress__init(struct ui_progress *p, u64 total, + const char *title, bool size); + +#define ui_progress__init(p, total, title) \ + __ui_progress__init(p, total, title, false) + +#define ui_progress__init_size(p, total, title) \ + __ui_progress__init(p, total, title, true) + void ui_progress__update(struct ui_progress *p, u64 adv); struct ui_progress_ops { diff --git a/tools/perf/ui/tui/progress.c b/tools/perf/ui/tui/progress.c index f6b8f52aad7e..68f6144ea603 100644 --- a/tools/perf/ui/tui/progress.c +++ b/tools/perf/ui/tui/progress.c @@ -1,8 +1,10 @@ +#include #include "../cache.h" #include "../progress.h" #include "../libslang.h" #include "../ui.h" #include "tui.h" +#include "units.h" #include "../browser.h" static void __tui_progress__init(struct ui_progress *p) @@ -10,8 +12,22 @@ static void __tui_progress__init(struct ui_progress *p) p->next = p->step = p->total / (SLtt_Screen_Cols - 2) ?: 1; } +static int get_title(struct ui_progress *p, char *buf, size_t size) +{ + char buf_cur[20]; + char buf_tot[20]; + int ret; + + ret = unit_number__scnprintf(buf_cur, sizeof(buf_cur), p->curr); + ret += unit_number__scnprintf(buf_tot, sizeof(buf_tot), p->total); + + return ret + scnprintf(buf, size, "%s [%s/%s]", + p->title, buf_cur, buf_tot); +} + static void tui_progress__update(struct ui_progress *p) { + char buf[100], *title = (char *) p->title; int bar, y; /* * FIXME: We should have a per UI backend way of showing progress, @@ -23,13 +39,18 @@ static void tui_progress__update(struct ui_progress *p) if (p->total == 0) return; + if (p->size) { + get_title(p, buf, sizeof(buf)); + title = buf; + } + ui__refresh_dimensions(false); pthread_mutex_lock(&ui__lock); y = SLtt_Screen_Rows / 2 - 2; SLsmg_set_color(0); SLsmg_draw_box(y, 0, 3, SLtt_Screen_Cols); SLsmg_gotorc(y++, 1); - SLsmg_write_string((char *)p->title); + SLsmg_write_string(title); SLsmg_fill_region(y, 1, 1, SLtt_Screen_Cols - 2, ' '); SLsmg_set_color(HE_COLORSET_SELECTED); bar = ((SLtt_Screen_Cols - 2) * p->curr) / p->total; diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index a7ebd9fe8e40..ceac0848469d 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -1847,7 +1847,7 @@ static int __perf_session__process_events(struct perf_session *session, if (data_offset + data_size < file_size) file_size = data_offset + data_size; - ui_progress__init(&prog, file_size, "Processing events..."); + ui_progress__init_size(&prog, file_size, "Processing events..."); mmap_size = MMAP_SIZE; if (mmap_size > file_size) { -- cgit v1.2.3 From ecdad24d7a4480c9af0ff6dbe00ac8bbae720d19 Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Thu, 7 Sep 2017 10:55:46 -0700 Subject: perf tools: Use scandir() to replace readdir() In perf_event__synthesize_threads() perf goes through all proc files serially by readdir. scandir() does a snapshoot of /proc, which is multithreading friendly. It's possible that some threads which are added during event synthesize. But the number of lost threads should be small. They should not impact the final analysis. Signed-off-by: Kan Liang Cc: Adrian Hunter Cc: Andi Kleen Cc: Jiri Olsa Cc: Lukasz Odzioba Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1504806954-150842-3-git-send-email-kan.liang@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/event.c | 45 +++++++++++++++++++++++++-------------------- 1 file changed, 25 insertions(+), 20 deletions(-) (limited to 'tools/perf') diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c index 1c905ba3641b..17c21ea68a72 100644 --- a/tools/perf/util/event.c +++ b/tools/perf/util/event.c @@ -683,12 +683,14 @@ int perf_event__synthesize_threads(struct perf_tool *tool, bool mmap_data, unsigned int proc_map_timeout) { - DIR *proc; - char proc_path[PATH_MAX]; - struct dirent *dirent; union perf_event *comm_event, *mmap_event, *fork_event; union perf_event *namespaces_event; + char proc_path[PATH_MAX]; + struct dirent **dirent; int err = -1; + char *end; + pid_t pid; + int n, i; if (machine__is_default_guest(machine)) return 0; @@ -712,29 +714,32 @@ int perf_event__synthesize_threads(struct perf_tool *tool, goto out_free_fork; snprintf(proc_path, sizeof(proc_path), "%s/proc", machine->root_dir); - proc = opendir(proc_path); + n = scandir(proc_path, &dirent, 0, alphasort); - if (proc == NULL) + if (n < 0) goto out_free_namespaces; - while ((dirent = readdir(proc)) != NULL) { - char *end; - pid_t pid = strtol(dirent->d_name, &end, 10); - - if (*end) /* only interested in proper numerical dirents */ + for (i = 0; i < n; i++) { + if (!isdigit(dirent[i]->d_name[0])) continue; - /* - * We may race with exiting thread, so don't stop just because - * one thread couldn't be synthesized. - */ - __event__synthesize_thread(comm_event, mmap_event, fork_event, - namespaces_event, pid, 1, process, - tool, machine, mmap_data, - proc_map_timeout); - } + pid = (pid_t)strtol(dirent[i]->d_name, &end, 10); + /* only interested in proper numerical dirents */ + if (!*end) { + /* + * We may race with exiting thread, so don't stop just because + * one thread couldn't be synthesized. + */ + __event__synthesize_thread(comm_event, mmap_event, fork_event, + namespaces_event, pid, 1, process, + tool, machine, mmap_data, + proc_map_timeout); + } + free(dirent[i]); + } + free(dirent); err = 0; - closedir(proc); + out_free_namespaces: free(namespaces_event); out_free_fork: -- cgit v1.2.3 From 5c2615556d4410baebc9b336f14befe0bb32cde4 Mon Sep 17 00:00:00 2001 From: Taeung Song Date: Thu, 7 Sep 2017 12:18:51 +0900 Subject: perf config: Write a config file just once Currently set_config() can be repeatedly called for each input config on the below case: $ perf config kmem.default=slab report.children=false ... But it's a waste, so only once write a config file gathering all given config key=value pairs. Signed-off-by: Taeung Song Cc: Jiri Olsa Cc: Namhyung Kim Link: http://lkml.kernel.org/r/1504754331-9776-1-git-send-email-treeze.taeung@gmail.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-config.c | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) (limited to 'tools/perf') diff --git a/tools/perf/builtin-config.c b/tools/perf/builtin-config.c index a1d82e33282c..b89417d9305e 100644 --- a/tools/perf/builtin-config.c +++ b/tools/perf/builtin-config.c @@ -34,8 +34,7 @@ static struct option config_options[] = { OPT_END() }; -static int set_config(struct perf_config_set *set, const char *file_name, - const char *var, const char *value) +static int set_config(struct perf_config_set *set, const char *file_name) { struct perf_config_section *section = NULL; struct perf_config_item *item = NULL; @@ -49,7 +48,6 @@ static int set_config(struct perf_config_set *set, const char *file_name, if (!fp) return -1; - perf_config_set__collect(set, file_name, var, value); fprintf(fp, "%s\n", first_line); /* overwrite configvariables */ @@ -161,6 +159,7 @@ int cmd_config(int argc, const char **argv) struct perf_config_set *set; char *user_config = mkpath("%s/.perfconfig", getenv("HOME")); const char *config_filename; + bool changed = false; argc = parse_options(argc, argv, config_options, config_usage, PARSE_OPT_STOP_AT_NON_OPTION); @@ -231,15 +230,26 @@ int cmd_config(int argc, const char **argv) goto out_err; } } else { - if (set_config(set, config_filename, var, value) < 0) { - pr_err("Failed to set '%s=%s' on %s\n", - var, value, config_filename); + if (perf_config_set__collect(set, config_filename, + var, value) < 0) { + pr_err("Failed to add '%s=%s'\n", + var, value); free(arg); goto out_err; } + changed = true; } free(arg); } + + if (!changed) + break; + + if (set_config(set, config_filename) < 0) { + pr_err("Failed to set the configs on %s\n", + config_filename); + goto out_err; + } } ret = 0; -- cgit v1.2.3 From 55421b4fb7054f85274b1b6a321e204dac696133 Mon Sep 17 00:00:00 2001 From: Taeung Song Date: Thu, 7 Sep 2017 12:18:56 +0900 Subject: perf config: Allow creating empty config set for config file autogeneration When there isn't a config file (e.g. ~/.perfconfig) or it has nothing, the config set wasn't created. If the config set does not exist, a config file can't be autogenerated. So allow creating a empty config set in the above case, then we can support the config file autogeneration. Before: $ rm -f ~/.perfconfig $ perf config --user report.children=false $ cat ~/.perfconfig cat: /root/.perfconfig: No such file or directory But I think it should work even if there isn't a config file. After: $ rm -f ~/.perfconfig $ perf config --user report.children=false $ cat ~/.perfconfig # this file is auto-generated. [report] children = false NOTE: As a result, if perf_config_set__init() fails, it looks as if the config set isn't freed. But it isn't a problem. Because the config set will be freed by perf_config_set__delete() at the end of cmd_config(). Signed-off-by: Taeung Song Cc: Jiri Olsa Cc: Namhyung Kim Link: http://lkml.kernel.org/r/1504754336-9824-1-git-send-email-treeze.taeung@gmail.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/config.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'tools/perf') diff --git a/tools/perf/util/config.c b/tools/perf/util/config.c index bc75596f9e79..d2b6983b1779 100644 --- a/tools/perf/util/config.c +++ b/tools/perf/util/config.c @@ -700,10 +700,7 @@ struct perf_config_set *perf_config_set__new(void) if (set) { INIT_LIST_HEAD(&set->sections); - if (perf_config_set__init(set) < 0) { - perf_config_set__delete(set); - set = NULL; - } + perf_config_set__init(set); } return set; -- cgit v1.2.3 From c23c2a0f236601c635d9a9d18d7993641e72aa8c Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 11 Sep 2017 10:50:26 -0300 Subject: perf tools: Make copyfile_offset() static There are no usage outside util.c and this is the only remaining reason for fcntl.h to be included in util.h, to get the loff_t definition in Alpine Linux, so make it static. Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-2dzlsao7k6ihozs5karw6kpx@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/builtin-test.c | 1 + tools/perf/util/data.c | 1 + tools/perf/util/dso.c | 1 + tools/perf/util/event.c | 1 + tools/perf/util/evlist.h | 1 + tools/perf/util/namespaces.c | 1 + tools/perf/util/pmu.c | 1 + tools/perf/util/probe-file.c | 1 + tools/perf/util/util.c | 3 ++- tools/perf/util/util.h | 2 -- tools/perf/util/zlib.c | 1 + 11 files changed, 11 insertions(+), 3 deletions(-) (limited to 'tools/perf') diff --git a/tools/perf/tests/builtin-test.c b/tools/perf/tests/builtin-test.c index 377bea009163..d0fee35db0e7 100644 --- a/tools/perf/tests/builtin-test.c +++ b/tools/perf/tests/builtin-test.c @@ -3,6 +3,7 @@ * * Builtin regression testing command: ever growing number of sanity tests */ +#include #include #include #include diff --git a/tools/perf/util/data.c b/tools/perf/util/data.c index 263f5a906ba5..1123b30e3033 100644 --- a/tools/perf/util/data.c +++ b/tools/perf/util/data.c @@ -3,6 +3,7 @@ #include #include #include +#include #include #include diff --git a/tools/perf/util/dso.c b/tools/perf/util/dso.c index b9e087fb8247..ffd723179a4b 100644 --- a/tools/perf/util/dso.c +++ b/tools/perf/util/dso.c @@ -6,6 +6,7 @@ #include #include #include +#include #include "compress.h" #include "path.h" #include "symbol.h" diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c index 17c21ea68a72..10366b87d0b5 100644 --- a/tools/perf/util/event.c +++ b/tools/perf/util/event.c @@ -1,5 +1,6 @@ #include #include +#include #include #include #include diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index bf2c4936e35f..b1c14f1fdc27 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -6,6 +6,7 @@ #include #include #include +#include #include #include "../perf.h" #include "event.h" diff --git a/tools/perf/util/namespaces.c b/tools/perf/util/namespaces.c index a58e91197729..5be021701f34 100644 --- a/tools/perf/util/namespaces.c +++ b/tools/perf/util/namespaces.c @@ -11,6 +11,7 @@ #include "event.h" #include #include +#include #include #include #include diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index 7070638ab600..0b11dfc0af44 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -2,6 +2,7 @@ #include #include #include +#include #include #include #include diff --git a/tools/perf/util/probe-file.c b/tools/perf/util/probe-file.c index cdf8d83a484c..4ae1123c6794 100644 --- a/tools/perf/util/probe-file.c +++ b/tools/perf/util/probe-file.c @@ -15,6 +15,7 @@ * */ #include +#include #include #include #include diff --git a/tools/perf/util/util.c b/tools/perf/util/util.c index 4c360daa4e24..3c9c39711343 100644 --- a/tools/perf/util/util.c +++ b/tools/perf/util/util.c @@ -6,6 +6,7 @@ #include #include #include +#include #include #include #include @@ -174,7 +175,7 @@ out: return err; } -int copyfile_offset(int ifd, loff_t off_in, int ofd, loff_t off_out, u64 size) +static int copyfile_offset(int ifd, loff_t off_in, int ofd, loff_t off_out, u64 size) { void *ptr; loff_t pgoff; diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h index b136c271125f..03946d5e4741 100644 --- a/tools/perf/util/util.h +++ b/tools/perf/util/util.h @@ -5,7 +5,6 @@ /* glibc 2.20 deprecates _BSD_SOURCE in favour of _DEFAULT_SOURCE */ #define _DEFAULT_SOURCE 1 -#include #include #include #include @@ -35,7 +34,6 @@ bool lsdir_no_dot_filter(const char *name, struct dirent *d); int copyfile(const char *from, const char *to); int copyfile_mode(const char *from, const char *to, mode_t mode); int copyfile_ns(const char *from, const char *to, struct nsinfo *nsi); -int copyfile_offset(int fromfd, loff_t from_ofs, int tofd, loff_t to_ofs, u64 size); ssize_t readn(int fd, void *buf, size_t n); ssize_t writen(int fd, const void *buf, size_t n); diff --git a/tools/perf/util/zlib.c b/tools/perf/util/zlib.c index 1329d843eb7b..7c1175310a12 100644 --- a/tools/perf/util/zlib.c +++ b/tools/perf/util/zlib.c @@ -1,3 +1,4 @@ +#include #include #include #include -- cgit v1.2.3 From 35c1980eb3d1acb3cac11c38252339399dca77e3 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Tue, 5 Sep 2017 14:13:24 -0700 Subject: perf stat: Fall weak group back even for EBADF It's not possible to run a package event and a per cpu event in the same group. This is used by some of the power metrics. They work correctly when not using a group. Normally weak groups should handle that, but in this case EBADF is returned instead of the normal EINVAL. $ strace -e perf_event_open ./perf stat -v -e '{cstate_pkg/c2-residency/,msr/tsc/}:W' -a sleep 1 Using CPUID GenuineIntel-6-3E perf_event_open({type=0x17 /* PERF_TYPE_??? */, size=PERF_ATTR_SIZE_VER5, config=0, ...}, -1, 0, -1, PERF_FLAG_FD_CLOEXEC) = -1 EINVAL (Invalid argument) perf_event_open({type=0x17 /* PERF_TYPE_??? */, size=PERF_ATTR_SIZE_VER5, config=0, ...}, -1, 0, -1, 0) = -1 EINVAL (Invalid argument) perf_event_open({type=0x17 /* PERF_TYPE_??? */, size=PERF_ATTR_SIZE_VER5, config=0, ...}, -1, 0, -1, 0) = -1 EINVAL (Invalid argument) perf_event_open({type=0x17 /* PERF_TYPE_??? */, size=PERF_ATTR_SIZE_VER5, config=0, ...}, -1, 0, -1, 0) = -1 EINVAL (Invalid argument) perf_event_open({type=0x17 /* PERF_TYPE_??? */, size=PERF_ATTR_SIZE_VER5, config=0, ...}, -1, 0, -1, 0) = 3 perf_event_open({type=0x7 /* PERF_TYPE_??? */, size=PERF_ATTR_SIZE_VER5, config=0, ...}, -1, 0, 3, 0) = 4 perf_event_open({type=0x7 /* PERF_TYPE_??? */, size=PERF_ATTR_SIZE_VER5, config=0, ...}, -1, 1, 0, 0) = -1 EBADF (Bad file descriptor) and perf errors out. Make weak groups trigger a fall back for EBADF too. Then this case works correctly: $ perf stat -v -e '{cstate_pkg/c2-residency/,msr/tsc/}:W' -a sleep 1 Using CPUID GenuineIntel-6-3E Weak group for cstate_pkg/c2-residency//2 failed cstate_pkg/c2-residency/: 476709882 1000598460 1000598460 msr/tsc/: 39625837911 12007369110 12007369110 Performance counter stats for 'system wide': 476,709,882 cstate_pkg/c2-residency/ 39,625,837,911 msr/tsc/ 1.000697588 seconds time elapsed This fixes perf stat -M Power ... $ perf stat -M Power --metric-only -a sleep 1 Performance counter stats for 'system wide': Turbo_Utilization C3_Core_Residency C6_Core_Residency C7_Core_Residency C2_Pkg_Residency C3_Pkg_Residency C6_Pkg_Residency C7_Pkg_Residency 1.0 0.7 30.0 0.0 0.9 0.1 0.4 0.0 1.001240740 seconds time elapsed Signed-off-by: Andi Kleen Acked-by: Jiri Olsa Link: http://lkml.kernel.org/r/20170905211324.32427-1-andi@firstfloor.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-stat.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools/perf') diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 88f1d5fbdb48..dd525417880a 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -656,7 +656,7 @@ try_again: if (create_perf_stat_counter(counter) < 0) { /* Weak group failed. Reset the group. */ - if (errno == EINVAL && + if ((errno == EINVAL || errno == EBADF) && counter->leader != counter && counter->weak_group) { counter = perf_evsel__reset_weak_group(counter); -- cgit v1.2.3 From cf97962308ba6b6afdeb038505032c7c0972bdfa Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Sun, 23 Jul 2017 21:53:25 -0700 Subject: perf vendor events: Add JSON metrics for Broadwell Add JSON metrics for Broadwell. Commiter testing: # uname -a Linux jouet 4.13.0-rc7+ #3 SMP Sat Sep 2 09:04:44 -03 2017 x86_64 x86_64 x86_64 GNU/Linux # grep "model name" /proc/cpuinfo | head -1 model name : Intel(R) Core(TM) i7-5600U CPU @ 2.60GHz # perf list metricgroup List of pre-defined events (to be used in -e): Metric Groups: DSB FLOPS Frontend Frontend_Bandwidth Memory_BW Memory_Bound Memory_Lat Pipeline Ports_Utilization Power SMT Summary TLB TopDownL1 Unknown_Branches # perf stat -M Power --metric-only -a sleep 1 Performance counter stats for 'system wide': Turbo_Utilization C3_Core_Residency C6_Core_Residency C7_Core_Residency C2_Pkg_Residency C3_Pkg_Residency C6_Pkg_Residency C7_Pkg_Residency 1.1 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.003502904 seconds time elapsed # # perf stat -M Memory_BW --metric-only -a sleep 1 Performance counter stats for 'system wide': MLP 1.7 1.001364525 seconds time elapsed # # perf stat -M TLB --metric-only -a sleep 1 Performance counter stats for 'system wide': Page_Walks_Utilization 0.1 1.005962198 seconds time elapsed # # perf stat -M Summary --metric-only -a sleep 1 Performance counter stats for 'system wide': Instructions CPI CLKS CPU_Utilization GFLOPs SMT_2T_Utilization Kernel_Utilization 7281856697.0 0.0 11150898087.0 1.0 0.0 1.0 0.7 1.012134025 seconds time elapsed # Running in verbose mode shows which counters and expressions are being used: # perf stat -v -M Summary --metric-only -a sleep 1 Using CPUID GenuineIntel-6-3D metric expr 1 / inst_retired.any / cycles for CPI found event inst_retired.any found event cycles metric expr cpu_clk_unhalted.thread for CLKS found event cpu_clk_unhalted.thread metric expr inst_retired.any for Instructions found event inst_retired.any metric expr cpu_clk_unhalted.ref_tsc / msr@tsc@ for CPU_Utilization found event cpu_clk_unhalted.ref_tsc found event msr/tsc/ metric expr ( 1*( fp_arith_inst_retired.scalar_single + fp_arith_inst_retired.scalar_double ) + 2* fp_arith_inst_retired.128b_packed_double + 4*( fp_arith_inst_retired.128b_packed_single + fp_arith_inst_retired.256b_packed_double ) + 8* fp_arith_inst_retired.256b_packed_single ) / 1000000000 / duration_time for GFLOPs found event fp_arith_inst_retired.scalar_single found event fp_arith_inst_retired.scalar_double found event fp_arith_inst_retired.128b_packed_double found event fp_arith_inst_retired.128b_packed_single found event fp_arith_inst_retired.256b_packed_double found event fp_arith_inst_retired.256b_packed_single found event duration_time metric expr 1 - cpu_clk_thread_unhalted.one_thread_active / ( cpu_clk_thread_unhalted.ref_xclk_any / 2 ) if #smt_on else 0 for SMT_2T_Utilization found event cpu_clk_thread_unhalted.one_thread_active found event cpu_clk_thread_unhalted.ref_xclk_any metric expr cpu_clk_unhalted.ref_tsc:u / cpu_clk_unhalted.ref_tsc for Kernel_Utilization found event cpu_clk_unhalted.ref_tsc:u found event cpu_clk_unhalted.ref_tsc adding {inst_retired.any,cycles}:W,{cpu_clk_unhalted.thread}:W,{inst_retired.any}:W,{cpu_clk_unhalted.ref_tsc,msr/tsc/}:W,{fp_arith_inst_retired.scalar_single,fp_arith_inst_retired.scalar_double,fp_arith_inst_retired.128b_packed_double,fp_arith_inst_retired.128b_packed_single,fp_arith_inst_retired.256b_packed_double,fp_arith_inst_retired.256b_packed_single,duration_time}:W,{cpu_clk_thread_unhalted.one_thread_active,cpu_clk_thread_unhalted.ref_xclk_any}:W,{cpu_clk_unhalted.ref_tsc:u,cpu_clk_unhalted.ref_tsc}:W inst_retired.any -> cpu/event=0xc0/ cpu_clk_unhalted.thread -> cpu/event=0x3c/ inst_retired.any -> cpu/event=0xc0/ cpu_clk_unhalted.ref_tsc -> cpu/umask=0x3,period=2000003,event=0/ fp_arith_inst_retired.scalar_single -> cpu/umask=0x2,period=2000003,event=0xc7/ fp_arith_inst_retired.scalar_double -> cpu/umask=0x1,period=2000003,event=0xc7/ fp_arith_inst_retired.128b_packed_double -> cpu/umask=0x4,period=2000003,event=0xc7/ fp_arith_inst_retired.128b_packed_single -> cpu/umask=0x8,period=2000003,event=0xc7/ fp_arith_inst_retired.256b_packed_double -> cpu/umask=0x10,period=2000003,event=0xc7/ fp_arith_inst_retired.256b_packed_single -> cpu/umask=0x20,period=2000003,event=0xc7/ cpu_clk_thread_unhalted.one_thread_active -> cpu/umask=0x2,period=2000003,event=0x3c/ cpu_clk_thread_unhalted.ref_xclk_any -> cpu/umask=0x1,any=1,period=2000003,event=0x3c/ cpu_clk_unhalted.ref_tsc -> cpu/umask=0x3,period=2000003,event=0/ cpu_clk_unhalted.ref_tsc -> cpu/umask=0x3,period=2000003,event=0/ Weak group for fp_arith_inst_retired.scalar_single/7 failed Weak group for cpu_clk_unhalted.ref_tsc:u/2 failed inst_retired.any: 8704146437 4026374016 619883741 cycles: 11180800018 4026374016 619883741 cpu_clk_unhalted.thread: 11140030295 4026323772 931621933 inst_retired.any: 8643115117 4026260510 1243595906 cpu_clk_unhalted.ref_tsc: 10201638510 4026184297 1247351077 msr/tsc/: 10378022785 4026184297 1247351077 fp_arith_inst_retired.scalar_single: 134697 4026102728 1559210545 fp_arith_inst_retired.scalar_double: 274339 4026007348 1870014984 fp_arith_inst_retired.128b_packed_double: 1639 4025886054 1866736918 fp_arith_inst_retired.128b_packed_single: 0 4025776614 2175106569 fp_arith_inst_retired.256b_packed_double: 0 4025681734 1235551129 fp_arith_inst_retired.256b_packed_single: 0 4025582962 1232398454 duration_time: 0 4025552913 4025552913 cpu_clk_thread_unhalted.one_thread_active: 10505 4025474649 923893076 cpu_clk_thread_unhalted.ref_xclk_any: 394992110 4025474649 923893076 cpu_clk_unhalted.ref_tsc:u: 5341421014 4025360315 1231634198 cpu_clk_unhalted.ref_tsc: 10258278508 4025252611 307909362 Performance counter stats for 'system wide': Instructions CPI CLKS CPU_Utilization GFLOPs SMT_2T_Utilization Kernel_Utilization 8704146437.0 0.0 11140030295.0 1.0 0.0 1.0 0.5 1.006783654 seconds time elapsed # Signed-off-by: Andi Kleen Tested-by: Arnaldo Carvalho de Melo Cc: Jiri Olsa Link: http://lkml.kernel.org/r/20170905195235.GW2482@two.firstfloor.org Signed-off-by: Arnaldo Carvalho de Melo --- .../pmu-events/arch/x86/broadwell/bdw-metrics.json | 164 +++++++++++++++++++++ 1 file changed, 164 insertions(+) create mode 100644 tools/perf/pmu-events/arch/x86/broadwell/bdw-metrics.json (limited to 'tools/perf') diff --git a/tools/perf/pmu-events/arch/x86/broadwell/bdw-metrics.json b/tools/perf/pmu-events/arch/x86/broadwell/bdw-metrics.json new file mode 100644 index 000000000000..49c5f123d811 --- /dev/null +++ b/tools/perf/pmu-events/arch/x86/broadwell/bdw-metrics.json @@ -0,0 +1,164 @@ +[ + { + "BriefDescription": "Instructions Per Cycle (per logical thread)", + "MetricExpr": "INST_RETIRED.ANY / CPU_CLK_UNHALTED.THREAD", + "MetricGroup": "TopDownL1", + "MetricName": "IPC" + }, + { + "BriefDescription": "Uops Per Instruction", + "MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY", + "MetricGroup": "Pipeline", + "MetricName": "UPI" + }, + { + "BriefDescription": "Rough Estimation of fraction of fetched lines bytes that were likely consumed by program instructions", + "MetricExpr": "min( 1 , IDQ.MITE_UOPS / ( UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY * 16 * ( ICACHE.HIT + ICACHE.MISSES ) / 4.0 ) )", + "MetricGroup": "Frontend", + "MetricName": "IFetch_Line_Utilization" + }, + { + "BriefDescription": "Fraction of Uops delivered by the DSB (aka Decoded Icache; or Uop Cache)", + "MetricExpr": "IDQ.DSB_UOPS / ( IDQ.DSB_UOPS + LSD.UOPS + IDQ.MITE_UOPS + IDQ.MS_UOPS )", + "MetricGroup": "DSB; Frontend_Bandwidth", + "MetricName": "DSB_Coverage" + }, + { + "BriefDescription": "Cycles Per Instruction (threaded)", + "MetricExpr": "1 / INST_RETIRED.ANY / cycles", + "MetricGroup": "Pipeline;Summary", + "MetricName": "CPI" + }, + { + "BriefDescription": "Per-thread actual clocks when the logical processor is active. This is called 'Clockticks' in VTune.", + "MetricExpr": "CPU_CLK_UNHALTED.THREAD", + "MetricGroup": "Summary", + "MetricName": "CLKS" + }, + { + "BriefDescription": "Total issue-pipeline slots", + "MetricExpr": "4*( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles", + "MetricGroup": "TopDownL1", + "MetricName": "SLOTS" + }, + { + "BriefDescription": "Total number of retired Instructions", + "MetricExpr": "INST_RETIRED.ANY", + "MetricGroup": "Summary", + "MetricName": "Instructions" + }, + { + "BriefDescription": "Instructions Per Cycle (per physical core)", + "MetricExpr": "INST_RETIRED.ANY / ( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles", + "MetricGroup": "SMT", + "MetricName": "CoreIPC" + }, + { + "BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is at least 1 uop executed)", + "MetricExpr": "UOPS_EXECUTED.THREAD / ( cpu@uops_executed.core\\,cmask\\=1@ / 2) if #SMT_on else UOPS_EXECUTED.CYCLES_GE_1_UOP_EXEC", + "MetricGroup": "Pipeline;Ports_Utilization", + "MetricName": "ILP" + }, + { + "BriefDescription": "Average Branch Address Clear Cost (fraction of cycles)", + "MetricExpr": "2* ( RS_EVENTS.EMPTY_CYCLES - ICACHE.IFDATA_STALL - ( 14 * ITLB_MISSES.STLB_HIT + cpu@ITLB_MISSES.WALK_DURATION\\,cmask\\=1@ + 7* ITLB_MISSES.WALK_COMPLETED ) ) / RS_EVENTS.EMPTY_END", + "MetricGroup": "Unknown_Branches", + "MetricName": "BAClear_Cost" + }, + { + "BriefDescription": "Core actual clocks when any thread is active on the physical core", + "MetricExpr": "( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else CPU_CLK_UNHALTED.THREAD", + "MetricGroup": "SMT", + "MetricName": "CORE_CLKS" + }, + { + "BriefDescription": "Actual Average Latency for L1 data-cache miss demand loads", + "MetricExpr": "L1D_PEND_MISS.PENDING / ( MEM_LOAD_UOPS_RETIRED.L1_MISS + mem_load_uops_retired.hit_lfb )", + "MetricGroup": "Memory_Bound;Memory_Lat", + "MetricName": "Load_Miss_Real_Latency" + }, + { + "BriefDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least 1 such miss)", + "MetricExpr": "L1D_PEND_MISS.PENDING / ( cpu@l1d_pend_miss.pending_cycles\\,any\\=1@ / 2) if #SMT_on else L1D_PEND_MISS.PENDING_CYCLES", + "MetricGroup": "Memory_Bound;Memory_BW", + "MetricName": "MLP" + }, + { + "BriefDescription": "Utilization of the core's Page Walker(s) serving STLB misses triggered by instruction/Load/Store accesses", + "MetricExpr": "( cpu@ITLB_MISSES.WALK_DURATION\\,cmask\\=1@ + cpu@DTLB_LOAD_MISSES.WALK_DURATION\\,cmask\\=1@ + cpu@DTLB_STORE_MISSES.WALK_DURATION\\,cmask\\=1@ + 7*(DTLB_STORE_MISSES.WALK_COMPLETED+DTLB_LOAD_MISSES.WALK_COMPLETED+ITLB_MISSES.WALK_COMPLETED)) / ( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles", + "MetricGroup": "TLB", + "MetricName": "Page_Walks_Utilization" + }, + { + "BriefDescription": "Average CPU Utilization", + "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / msr@tsc@", + "MetricGroup": "Summary", + "MetricName": "CPU_Utilization" + }, + { + "BriefDescription": "Giga Floating Point Operations Per Second", + "MetricExpr": "( 1*( FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE ) + 2* FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4*( FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE ) + 8* FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE ) / 1000000000 / duration_time", + "MetricGroup": "FLOPS;Summary", + "MetricName": "GFLOPs" + }, + { + "BriefDescription": "Average Frequency Utilization relative nominal frequency", + "MetricExpr": "CPU_CLK_UNHALTED.THREAD / CPU_CLK_UNHALTED.REF_TSC", + "MetricGroup": "Power", + "MetricName": "Turbo_Utilization" + }, + { + "BriefDescription": "Fraction of cycles where both hardware threads were active", + "MetricExpr": "1 - CPU_CLK_THREAD_UNHALTED.ONE_THREAD_ACTIVE / ( CPU_CLK_THREAD_UNHALTED.REF_XCLK_ANY / 2 ) if #SMT_on else 0", + "MetricGroup": "SMT;Summary", + "MetricName": "SMT_2T_Utilization" + }, + { + "BriefDescription": "Fraction of cycles spent in Kernel mode", + "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC:u / CPU_CLK_UNHALTED.REF_TSC", + "MetricGroup": "Summary", + "MetricName": "Kernel_Utilization" + }, + { + "BriefDescription": "C3 residency percent per core", + "MetricExpr": "(cstate_core@c3\\-residency@ / msr@tsc@) * 100", + "MetricGroup": "Power", + "MetricName": "C3_Core_Residency" + }, + { + "BriefDescription": "C6 residency percent per core", + "MetricExpr": "(cstate_core@c6\\-residency@ / msr@tsc@) * 100", + "MetricGroup": "Power", + "MetricName": "C6_Core_Residency" + }, + { + "BriefDescription": "C7 residency percent per core", + "MetricExpr": "(cstate_core@c7\\-residency@ / msr@tsc@) * 100", + "MetricGroup": "Power", + "MetricName": "C7_Core_Residency" + }, + { + "BriefDescription": "C2 residency percent per package", + "MetricExpr": "(cstate_pkg@c2\\-residency@ / msr@tsc@) * 100", + "MetricGroup": "Power", + "MetricName": "C2_Pkg_Residency" + }, + { + "BriefDescription": "C3 residency percent per package", + "MetricExpr": "(cstate_pkg@c3\\-residency@ / msr@tsc@) * 100", + "MetricGroup": "Power", + "MetricName": "C3_Pkg_Residency" + }, + { + "BriefDescription": "C6 residency percent per package", + "MetricExpr": "(cstate_pkg@c6\\-residency@ / msr@tsc@) * 100", + "MetricGroup": "Power", + "MetricName": "C6_Pkg_Residency" + }, + { + "BriefDescription": "C7 residency percent per package", + "MetricExpr": "(cstate_pkg@c7\\-residency@ / msr@tsc@) * 100", + "MetricGroup": "Power", + "MetricName": "C7_Pkg_Residency" + } +] -- cgit v1.2.3 From 2e006a24127ad88422632f9e5d6a8039a40f01da Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Sun, 23 Jul 2017 21:54:49 -0700 Subject: perf vendor events: Add JSON metrics for Skylake Add JSON metrics for Skylake. Committer testing: # grep "model name" /proc/cpuinfo | head -1 model name : Intel(R) Core(TM) i5-7500 CPU @ 3.40GHz # uname -a Linux seventh 4.12.0-rc6+ #1 SMP Fri Jun 30 16:40:55 -03 2017 x86_64 x86_64 x86_64 GNU/Linux # perf stat --metric-only -M Summary -a sleep 1 Performance counter stats for 'system wide': Instructions CPI CLKS CPU_Utilization GFLOPs SMT_2T_Utilization Kernel_Utilization 34021097.0 0.0 119424171.0 0.0 0.0 0.0 0.0 1.001001793 seconds time elapsed # perf list metricgroup List of pre-defined events (to be used in -e): Metric Groups: DSB FLOPS Frontend Frontend_Bandwidth Memory_BW Memory_Bound Memory_Lat Pipeline Ports_Utilization Power SMT Summary TLB TopDownL1 Unknown_Branches # perf stat --metric-only -M Ports_Utilization -a sleep 1 Performance counter stats for 'system wide': ILP 1475828.0 1.000688547 seconds time elapsed # perf stat -v --metric-only -M Ports_Utilization -a sleep 1 Using CPUID GenuineIntel-6-9E metric expr uops_executed.thread / ( uops_executed.core_cycles_ge_1 / 2) if #smt_on else uops_executed.core_cycles_ge_1 for ILP found event uops_executed.thread found event uops_executed.core_cycles_ge_1 adding {uops_executed.thread,uops_executed.core_cycles_ge_1}:W uops_executed.thread -> cpu/umask=0x1,period=2000003,event=0xb1/ uops_executed.core_cycles_ge_1 -> cpu/umask=0x2,period=2000003,cmask=1,event=0xb1/ uops_executed.thread: 8115271 4002547654 4002547654 uops_executed.core_cycles_ge_1: 3282969 4002547654 4002547654 Performance counter stats for 'system wide': ILP 3282969.0 1.000719870 seconds time elapsed # Signed-off-by: Andi Kleen Tested-by: Arnaldo Carvalho de Melo Cc: Jiri Olsa Link: http://lkml.kernel.org/r/20170905195235.GW2482@two.firstfloor.org Signed-off-by: Arnaldo Carvalho de Melo --- .../pmu-events/arch/x86/skylake/skl-metrics.json | 164 +++++++++++++++++++++ 1 file changed, 164 insertions(+) create mode 100644 tools/perf/pmu-events/arch/x86/skylake/skl-metrics.json (limited to 'tools/perf') diff --git a/tools/perf/pmu-events/arch/x86/skylake/skl-metrics.json b/tools/perf/pmu-events/arch/x86/skylake/skl-metrics.json new file mode 100644 index 000000000000..411f9411ec9e --- /dev/null +++ b/tools/perf/pmu-events/arch/x86/skylake/skl-metrics.json @@ -0,0 +1,164 @@ +[ + { + "BriefDescription": "Instructions Per Cycle (per logical thread)", + "MetricExpr": "INST_RETIRED.ANY / CPU_CLK_UNHALTED.THREAD", + "MetricGroup": "TopDownL1", + "MetricName": "IPC" + }, + { + "BriefDescription": "Uops Per Instruction", + "MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY", + "MetricGroup": "Pipeline", + "MetricName": "UPI" + }, + { + "BriefDescription": "Rough Estimation of fraction of fetched lines bytes that were likely consumed by program instructions", + "MetricExpr": "min( 1 , UOPS_ISSUED.ANY / (UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY * 64 * ( ICACHE_64B.IFTAG_HIT + ICACHE_64B.IFTAG_MISS ) / 4.1) )", + "MetricGroup": "Frontend", + "MetricName": "IFetch_Line_Utilization" + }, + { + "BriefDescription": "Fraction of Uops delivered by the DSB (aka Decoded Icache; or Uop Cache)", + "MetricExpr": "IDQ.DSB_UOPS / ( IDQ.DSB_UOPS + LSD.UOPS + IDQ.MITE_UOPS + IDQ.MS_UOPS )", + "MetricGroup": "DSB; Frontend_Bandwidth", + "MetricName": "DSB_Coverage" + }, + { + "BriefDescription": "Cycles Per Instruction (threaded)", + "MetricExpr": "1 / INST_RETIRED.ANY / cycles", + "MetricGroup": "Pipeline;Summary", + "MetricName": "CPI" + }, + { + "BriefDescription": "Per-thread actual clocks when the logical processor is active. This is called 'Clockticks' in VTune.", + "MetricExpr": "CPU_CLK_UNHALTED.THREAD", + "MetricGroup": "Summary", + "MetricName": "CLKS" + }, + { + "BriefDescription": "Total issue-pipeline slots", + "MetricExpr": "4*( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles", + "MetricGroup": "TopDownL1", + "MetricName": "SLOTS" + }, + { + "BriefDescription": "Total number of retired Instructions", + "MetricExpr": "INST_RETIRED.ANY", + "MetricGroup": "Summary", + "MetricName": "Instructions" + }, + { + "BriefDescription": "Instructions Per Cycle (per physical core)", + "MetricExpr": "INST_RETIRED.ANY / ( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles", + "MetricGroup": "SMT", + "MetricName": "CoreIPC" + }, + { + "BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is at least 1 uop executed)", + "MetricExpr": "UOPS_EXECUTED.THREAD / ( UOPS_EXECUTED.CORE_CYCLES_GE_1 / 2) if #SMT_on else UOPS_EXECUTED.CORE_CYCLES_GE_1", + "MetricGroup": "Pipeline;Ports_Utilization", + "MetricName": "ILP" + }, + { + "BriefDescription": "Average Branch Address Clear Cost (fraction of cycles)", + "MetricExpr": "2* ( RS_EVENTS.EMPTY_CYCLES - ICACHE_16B.IFDATA_STALL - ICACHE_64B.IFTAG_STALL ) / RS_EVENTS.EMPTY_END", + "MetricGroup": "Unknown_Branches", + "MetricName": "BAClear_Cost" + }, + { + "BriefDescription": "Core actual clocks when any thread is active on the physical core", + "MetricExpr": "( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else CPU_CLK_UNHALTED.THREAD", + "MetricGroup": "SMT", + "MetricName": "CORE_CLKS" + }, + { + "BriefDescription": "Actual Average Latency for L1 data-cache miss demand loads", + "MetricExpr": "L1D_PEND_MISS.PENDING / ( MEM_LOAD_RETIRED.L1_MISS + MEM_LOAD_RETIRED.FB_HIT )", + "MetricGroup": "Memory_Bound;Memory_Lat", + "MetricName": "Load_Miss_Real_Latency" + }, + { + "BriefDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least 1 such miss)", + "MetricExpr": "L1D_PEND_MISS.PENDING / ( L1D_PEND_MISS.PENDING_CYCLES_ANY / 2) if #SMT_on else L1D_PEND_MISS.PENDING_CYCLES", + "MetricGroup": "Memory_Bound;Memory_BW", + "MetricName": "MLP" + }, + { + "BriefDescription": "Utilization of the core's Page Walker(s) serving STLB misses triggered by instruction/Load/Store accesses", + "MetricExpr": "( ITLB_MISSES.WALK_PENDING + DTLB_LOAD_MISSES.WALK_PENDING + DTLB_STORE_MISSES.WALK_PENDING + EPT.WALK_PENDING ) / ( 2 * ( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles )", + "MetricGroup": "TLB", + "MetricName": "Page_Walks_Utilization" + }, + { + "BriefDescription": "Average CPU Utilization", + "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / msr@tsc@", + "MetricGroup": "Summary", + "MetricName": "CPU_Utilization" + }, + { + "BriefDescription": "Giga Floating Point Operations Per Second", + "MetricExpr": "( 1*( FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE ) + 2* FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4*( FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE ) + 8* FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE ) / 1000000000 / duration_time", + "MetricGroup": "FLOPS;Summary", + "MetricName": "GFLOPs" + }, + { + "BriefDescription": "Average Frequency Utilization relative nominal frequency", + "MetricExpr": "CPU_CLK_UNHALTED.THREAD / CPU_CLK_UNHALTED.REF_TSC", + "MetricGroup": "Power", + "MetricName": "Turbo_Utilization" + }, + { + "BriefDescription": "Fraction of cycles where both hardware threads were active", + "MetricExpr": "1 - CPU_CLK_THREAD_UNHALTED.ONE_THREAD_ACTIVE / ( CPU_CLK_THREAD_UNHALTED.REF_XCLK_ANY / 2 ) if #SMT_on else 0", + "MetricGroup": "SMT;Summary", + "MetricName": "SMT_2T_Utilization" + }, + { + "BriefDescription": "Fraction of cycles spent in Kernel mode", + "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC:u / CPU_CLK_UNHALTED.REF_TSC", + "MetricGroup": "Summary", + "MetricName": "Kernel_Utilization" + }, + { + "BriefDescription": "C3 residency percent per core", + "MetricExpr": "(cstate_core@c3\\-residency@ / msr@tsc@) * 100", + "MetricGroup": "Power", + "MetricName": "C3_Core_Residency" + }, + { + "BriefDescription": "C6 residency percent per core", + "MetricExpr": "(cstate_core@c6\\-residency@ / msr@tsc@) * 100", + "MetricGroup": "Power", + "MetricName": "C6_Core_Residency" + }, + { + "BriefDescription": "C7 residency percent per core", + "MetricExpr": "(cstate_core@c7\\-residency@ / msr@tsc@) * 100", + "MetricGroup": "Power", + "MetricName": "C7_Core_Residency" + }, + { + "BriefDescription": "C2 residency percent per package", + "MetricExpr": "(cstate_pkg@c2\\-residency@ / msr@tsc@) * 100", + "MetricGroup": "Power", + "MetricName": "C2_Pkg_Residency" + }, + { + "BriefDescription": "C3 residency percent per package", + "MetricExpr": "(cstate_pkg@c3\\-residency@ / msr@tsc@) * 100", + "MetricGroup": "Power", + "MetricName": "C3_Pkg_Residency" + }, + { + "BriefDescription": "C6 residency percent per package", + "MetricExpr": "(cstate_pkg@c6\\-residency@ / msr@tsc@) * 100", + "MetricGroup": "Power", + "MetricName": "C6_Pkg_Residency" + }, + { + "BriefDescription": "C7 residency percent per package", + "MetricExpr": "(cstate_pkg@c7\\-residency@ / msr@tsc@) * 100", + "MetricGroup": "Power", + "MetricName": "C7_Pkg_Residency" + } +] -- cgit v1.2.3 From 97dca6715d0a058a6af028a3019432740b4a0011 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Sun, 23 Jul 2017 21:50:34 -0700 Subject: perf vendor events: Add JSON metrics for Sandy Bridge Add JSON metrics for Sandy Bridge. Committer testing: # grep "model name" /proc/cpuinfo | head -1 model name : Intel(R) Core(TM) i5-2400 CPU @ 3.10GHz # perf list metricgroup List of pre-defined events (to be used in -e): Metric Groups: DSB FLOPS Frontend Frontend_Bandwidth Pipeline Ports_Utilization Power SMT Summary TopDownL1 # perf stat -M Power --metric-only -a sleep 1 Performance counter stats for 'system wide': Turbo_Utilization C3_Core_Residency C6_Core_Residency C7_Core_Residency C2_Pkg_Residency C3_Pkg_Residency C6_Pkg_Residency C7_Pkg_Residency 0.8 0.0 98.1 0.0 0.0 0.0 23.4 0.0 1.001153658 seconds time elapsed # perf stat -v -M Power --metric-only -a sleep 1 Using CPUID GenuineIntel-6-2A metric expr cpu_clk_unhalted.thread / cpu_clk_unhalted.ref_tsc for Turbo_Utilization found event cpu_clk_unhalted.thread found event cpu_clk_unhalted.ref_tsc metric expr (cstate_core@c3\-residency@ / msr@tsc@) * 100 for C3_Core_Residency found event cstate_core/c3-residency/ found event msr/tsc/ metric expr (cstate_core@c6\-residency@ / msr@tsc@) * 100 for C6_Core_Residency found event cstate_core/c6-residency/ found event msr/tsc/ metric expr (cstate_core@c7\-residency@ / msr@tsc@) * 100 for C7_Core_Residency found event cstate_core/c7-residency/ found event msr/tsc/ metric expr (cstate_pkg@c2\-residency@ / msr@tsc@) * 100 for C2_Pkg_Residency found event cstate_pkg/c2-residency/ found event msr/tsc/ metric expr (cstate_pkg@c3\-residency@ / msr@tsc@) * 100 for C3_Pkg_Residency found event cstate_pkg/c3-residency/ found event msr/tsc/ metric expr (cstate_pkg@c6\-residency@ / msr@tsc@) * 100 for C6_Pkg_Residency found event cstate_pkg/c6-residency/ found event msr/tsc/ metric expr (cstate_pkg@c7\-residency@ / msr@tsc@) * 100 for C7_Pkg_Residency found event cstate_pkg/c7-residency/ found event msr/tsc/ adding {cpu_clk_unhalted.thread,cpu_clk_unhalted.ref_tsc}:W,{cstate_core/c3-residency/,msr/tsc/}:W,{cstate_core/c6-residency/,msr/tsc/}:W,{cstate_core/c7-residency/,msr/tsc/}:W,{cstate_pkg/c2-residency/,msr/tsc/}:W,{cstate_pkg/c3-residency/,msr/tsc/}:W,{cstate_pkg/c6-residency/,msr/tsc/}:W,{cstate_pkg/c7-residency/,msr/tsc/}:W cpu_clk_unhalted.thread -> cpu/event=0x3c/ cpu_clk_unhalted.ref_tsc -> cpu/umask=0x3,period=2000003,event=0/ Weak group for cstate_pkg/c2-residency//2 failed Weak group for cstate_pkg/c3-residency//2 failed Weak group for cstate_pkg/c6-residency//2 failed Weak group for cstate_pkg/c7-residency//2 failed cpu_clk_unhalted.thread: 5564185 4002833569 4002833569 cpu_clk_unhalted.ref_tsc: 7325424 4002833569 4002833569 cstate_core/c3-residency/: 68293 4003027101 4003027101 msr/tsc/: 12451294472 4003027101 4003027101 cstate_core/c6-residency/: 12238830163 4003260984 4003260984 msr/tsc/: 12452017806 4003260984 4003260984 cstate_core/c7-residency/: 0 4003489648 4003489648 msr/tsc/: 12452725162 4003489648 4003489648 cstate_pkg/c2-residency/: 1830054 1000913138 1000913138 msr/tsc/: 12453441079 4003717513 4003717513 cstate_pkg/c3-residency/: 0 1000973570 1000973570 msr/tsc/: 12454177865 4003954758 4003954758 cstate_pkg/c6-residency/: 2940448859 1001032370 1001032370 msr/tsc/: 12454833890 4004166118 4004166118 cstate_pkg/c7-residency/: 0 1001049818 1001049818 msr/tsc/: 12454919470 4004194204 4004194204 Performance counter stats for 'system wide': Turbo_Utilization C3_Core_Residency C6_Core_Residency C7_Core_Residency C2_Pkg_Residency C3_Pkg_Residency C6_Pkg_Residency C7_Pkg_Residency 0.8 0.0 98.3 0.0 0.0 0.0 23.6 0.0 1.001126519 seconds time elapsed # Signed-off-by: Andi Kleen Tested-by: Arnaldo Carvalho de Melo Cc: Jiri Olsa Link: http://lkml.kernel.org/r/20170905195235.GW2482@two.firstfloor.org Signed-off-by: Arnaldo Carvalho de Melo --- .../arch/x86/sandybridge/snb-metrics.json | 140 +++++++++++++++++++++ 1 file changed, 140 insertions(+) create mode 100644 tools/perf/pmu-events/arch/x86/sandybridge/snb-metrics.json (limited to 'tools/perf') diff --git a/tools/perf/pmu-events/arch/x86/sandybridge/snb-metrics.json b/tools/perf/pmu-events/arch/x86/sandybridge/snb-metrics.json new file mode 100644 index 000000000000..b35b1c153c8a --- /dev/null +++ b/tools/perf/pmu-events/arch/x86/sandybridge/snb-metrics.json @@ -0,0 +1,140 @@ +[ + { + "BriefDescription": "Instructions Per Cycle (per logical thread)", + "MetricExpr": "INST_RETIRED.ANY / CPU_CLK_UNHALTED.THREAD", + "MetricGroup": "TopDownL1", + "MetricName": "IPC" + }, + { + "BriefDescription": "Uops Per Instruction", + "MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY", + "MetricGroup": "Pipeline", + "MetricName": "UPI" + }, + { + "BriefDescription": "Rough Estimation of fraction of fetched lines bytes that were likely consumed by program instructions", + "MetricExpr": "min( 1 , UOPS_ISSUED.ANY / ( UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY * 32 * ( ICACHE.HIT + ICACHE.MISSES ) / 4) )", + "MetricGroup": "Frontend", + "MetricName": "IFetch_Line_Utilization" + }, + { + "BriefDescription": "Fraction of Uops delivered by the DSB (aka Decoded Icache; or Uop Cache)", + "MetricExpr": "IDQ.DSB_UOPS / ( IDQ.DSB_UOPS + LSD.UOPS + IDQ.MITE_UOPS + IDQ.MS_UOPS )", + "MetricGroup": "DSB; Frontend_Bandwidth", + "MetricName": "DSB_Coverage" + }, + { + "BriefDescription": "Cycles Per Instruction (threaded)", + "MetricExpr": "1 / INST_RETIRED.ANY / cycles", + "MetricGroup": "Pipeline;Summary", + "MetricName": "CPI" + }, + { + "BriefDescription": "Per-thread actual clocks when the logical processor is active. This is called 'Clockticks' in VTune.", + "MetricExpr": "CPU_CLK_UNHALTED.THREAD", + "MetricGroup": "Summary", + "MetricName": "CLKS" + }, + { + "BriefDescription": "Total issue-pipeline slots", + "MetricExpr": "4*( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles", + "MetricGroup": "TopDownL1", + "MetricName": "SLOTS" + }, + { + "BriefDescription": "Total number of retired Instructions", + "MetricExpr": "INST_RETIRED.ANY", + "MetricGroup": "Summary", + "MetricName": "Instructions" + }, + { + "BriefDescription": "Instructions Per Cycle (per physical core)", + "MetricExpr": "INST_RETIRED.ANY / ( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles", + "MetricGroup": "SMT", + "MetricName": "CoreIPC" + }, + { + "BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is at least 1 uop executed)", + "MetricExpr": "UOPS_DISPATCHED.THREAD / ( cpu@UOPS_DISPATCHED.CORE\\,cmask\\=1@ / 2) if #SMT_on else cpu@UOPS_DISPATCHED.CORE\\,cmask\\=1@", + "MetricGroup": "Pipeline;Ports_Utilization", + "MetricName": "ILP" + }, + { + "BriefDescription": "Core actual clocks when any thread is active on the physical core", + "MetricExpr": "( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else CPU_CLK_UNHALTED.THREAD", + "MetricGroup": "SMT", + "MetricName": "CORE_CLKS" + }, + { + "BriefDescription": "Average CPU Utilization", + "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / msr@tsc@", + "MetricGroup": "Summary", + "MetricName": "CPU_Utilization" + }, + { + "BriefDescription": "Giga Floating Point Operations Per Second", + "MetricExpr": "( 1*( FP_COMP_OPS_EXE.SSE_SCALAR_SINGLE + FP_COMP_OPS_EXE.SSE_SCALAR_DOUBLE ) + 2* FP_COMP_OPS_EXE.SSE_PACKED_DOUBLE + 4*( FP_COMP_OPS_EXE.SSE_PACKED_SINGLE + SIMD_FP_256.PACKED_DOUBLE ) + 8* SIMD_FP_256.PACKED_SINGLE ) / 1000000000 / duration_time", + "MetricGroup": "FLOPS;Summary", + "MetricName": "GFLOPs" + }, + { + "BriefDescription": "Average Frequency Utilization relative nominal frequency", + "MetricExpr": "CPU_CLK_UNHALTED.THREAD / CPU_CLK_UNHALTED.REF_TSC", + "MetricGroup": "Power", + "MetricName": "Turbo_Utilization" + }, + { + "BriefDescription": "Fraction of cycles where both hardware threads were active", + "MetricExpr": "1 - CPU_CLK_THREAD_UNHALTED.ONE_THREAD_ACTIVE / ( CPU_CLK_THREAD_UNHALTED.REF_XCLK_ANY / 2 ) if #SMT_on else 0", + "MetricGroup": "SMT;Summary", + "MetricName": "SMT_2T_Utilization" + }, + { + "BriefDescription": "Fraction of cycles spent in Kernel mode", + "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC:u / CPU_CLK_UNHALTED.REF_TSC", + "MetricGroup": "Summary", + "MetricName": "Kernel_Utilization" + }, + { + "BriefDescription": "C3 residency percent per core", + "MetricExpr": "(cstate_core@c3\\-residency@ / msr@tsc@) * 100", + "MetricGroup": "Power", + "MetricName": "C3_Core_Residency" + }, + { + "BriefDescription": "C6 residency percent per core", + "MetricExpr": "(cstate_core@c6\\-residency@ / msr@tsc@) * 100", + "MetricGroup": "Power", + "MetricName": "C6_Core_Residency" + }, + { + "BriefDescription": "C7 residency percent per core", + "MetricExpr": "(cstate_core@c7\\-residency@ / msr@tsc@) * 100", + "MetricGroup": "Power", + "MetricName": "C7_Core_Residency" + }, + { + "BriefDescription": "C2 residency percent per package", + "MetricExpr": "(cstate_pkg@c2\\-residency@ / msr@tsc@) * 100", + "MetricGroup": "Power", + "MetricName": "C2_Pkg_Residency" + }, + { + "BriefDescription": "C3 residency percent per package", + "MetricExpr": "(cstate_pkg@c3\\-residency@ / msr@tsc@) * 100", + "MetricGroup": "Power", + "MetricName": "C3_Pkg_Residency" + }, + { + "BriefDescription": "C6 residency percent per package", + "MetricExpr": "(cstate_pkg@c6\\-residency@ / msr@tsc@) * 100", + "MetricGroup": "Power", + "MetricName": "C6_Pkg_Residency" + }, + { + "BriefDescription": "C7 residency percent per package", + "MetricExpr": "(cstate_pkg@c7\\-residency@ / msr@tsc@) * 100", + "MetricGroup": "Power", + "MetricName": "C7_Pkg_Residency" + } +] -- cgit v1.2.3 From 28bc0ddb3a89a09b6f2d4dc97b85e28b2a70db1e Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Sun, 23 Jul 2017 21:51:07 -0700 Subject: perf vendor events: Add JSON metrics for Sandy Bridge EP Add JSON metrics for Sandy Bridge EP. Signed-off-by: Andi Kleen Cc: Jiri Olsa Link: http://lkml.kernel.org/r/20170908180133.GA20128@tassilo.jf.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- .../pmu-events/arch/x86/jaketown/jkt-metrics.json | 140 +++++++++++++++++++++ 1 file changed, 140 insertions(+) create mode 100644 tools/perf/pmu-events/arch/x86/jaketown/jkt-metrics.json (limited to 'tools/perf') diff --git a/tools/perf/pmu-events/arch/x86/jaketown/jkt-metrics.json b/tools/perf/pmu-events/arch/x86/jaketown/jkt-metrics.json new file mode 100644 index 000000000000..b35b1c153c8a --- /dev/null +++ b/tools/perf/pmu-events/arch/x86/jaketown/jkt-metrics.json @@ -0,0 +1,140 @@ +[ + { + "BriefDescription": "Instructions Per Cycle (per logical thread)", + "MetricExpr": "INST_RETIRED.ANY / CPU_CLK_UNHALTED.THREAD", + "MetricGroup": "TopDownL1", + "MetricName": "IPC" + }, + { + "BriefDescription": "Uops Per Instruction", + "MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY", + "MetricGroup": "Pipeline", + "MetricName": "UPI" + }, + { + "BriefDescription": "Rough Estimation of fraction of fetched lines bytes that were likely consumed by program instructions", + "MetricExpr": "min( 1 , UOPS_ISSUED.ANY / ( UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY * 32 * ( ICACHE.HIT + ICACHE.MISSES ) / 4) )", + "MetricGroup": "Frontend", + "MetricName": "IFetch_Line_Utilization" + }, + { + "BriefDescription": "Fraction of Uops delivered by the DSB (aka Decoded Icache; or Uop Cache)", + "MetricExpr": "IDQ.DSB_UOPS / ( IDQ.DSB_UOPS + LSD.UOPS + IDQ.MITE_UOPS + IDQ.MS_UOPS )", + "MetricGroup": "DSB; Frontend_Bandwidth", + "MetricName": "DSB_Coverage" + }, + { + "BriefDescription": "Cycles Per Instruction (threaded)", + "MetricExpr": "1 / INST_RETIRED.ANY / cycles", + "MetricGroup": "Pipeline;Summary", + "MetricName": "CPI" + }, + { + "BriefDescription": "Per-thread actual clocks when the logical processor is active. This is called 'Clockticks' in VTune.", + "MetricExpr": "CPU_CLK_UNHALTED.THREAD", + "MetricGroup": "Summary", + "MetricName": "CLKS" + }, + { + "BriefDescription": "Total issue-pipeline slots", + "MetricExpr": "4*( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles", + "MetricGroup": "TopDownL1", + "MetricName": "SLOTS" + }, + { + "BriefDescription": "Total number of retired Instructions", + "MetricExpr": "INST_RETIRED.ANY", + "MetricGroup": "Summary", + "MetricName": "Instructions" + }, + { + "BriefDescription": "Instructions Per Cycle (per physical core)", + "MetricExpr": "INST_RETIRED.ANY / ( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles", + "MetricGroup": "SMT", + "MetricName": "CoreIPC" + }, + { + "BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is at least 1 uop executed)", + "MetricExpr": "UOPS_DISPATCHED.THREAD / ( cpu@UOPS_DISPATCHED.CORE\\,cmask\\=1@ / 2) if #SMT_on else cpu@UOPS_DISPATCHED.CORE\\,cmask\\=1@", + "MetricGroup": "Pipeline;Ports_Utilization", + "MetricName": "ILP" + }, + { + "BriefDescription": "Core actual clocks when any thread is active on the physical core", + "MetricExpr": "( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else CPU_CLK_UNHALTED.THREAD", + "MetricGroup": "SMT", + "MetricName": "CORE_CLKS" + }, + { + "BriefDescription": "Average CPU Utilization", + "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / msr@tsc@", + "MetricGroup": "Summary", + "MetricName": "CPU_Utilization" + }, + { + "BriefDescription": "Giga Floating Point Operations Per Second", + "MetricExpr": "( 1*( FP_COMP_OPS_EXE.SSE_SCALAR_SINGLE + FP_COMP_OPS_EXE.SSE_SCALAR_DOUBLE ) + 2* FP_COMP_OPS_EXE.SSE_PACKED_DOUBLE + 4*( FP_COMP_OPS_EXE.SSE_PACKED_SINGLE + SIMD_FP_256.PACKED_DOUBLE ) + 8* SIMD_FP_256.PACKED_SINGLE ) / 1000000000 / duration_time", + "MetricGroup": "FLOPS;Summary", + "MetricName": "GFLOPs" + }, + { + "BriefDescription": "Average Frequency Utilization relative nominal frequency", + "MetricExpr": "CPU_CLK_UNHALTED.THREAD / CPU_CLK_UNHALTED.REF_TSC", + "MetricGroup": "Power", + "MetricName": "Turbo_Utilization" + }, + { + "BriefDescription": "Fraction of cycles where both hardware threads were active", + "MetricExpr": "1 - CPU_CLK_THREAD_UNHALTED.ONE_THREAD_ACTIVE / ( CPU_CLK_THREAD_UNHALTED.REF_XCLK_ANY / 2 ) if #SMT_on else 0", + "MetricGroup": "SMT;Summary", + "MetricName": "SMT_2T_Utilization" + }, + { + "BriefDescription": "Fraction of cycles spent in Kernel mode", + "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC:u / CPU_CLK_UNHALTED.REF_TSC", + "MetricGroup": "Summary", + "MetricName": "Kernel_Utilization" + }, + { + "BriefDescription": "C3 residency percent per core", + "MetricExpr": "(cstate_core@c3\\-residency@ / msr@tsc@) * 100", + "MetricGroup": "Power", + "MetricName": "C3_Core_Residency" + }, + { + "BriefDescription": "C6 residency percent per core", + "MetricExpr": "(cstate_core@c6\\-residency@ / msr@tsc@) * 100", + "MetricGroup": "Power", + "MetricName": "C6_Core_Residency" + }, + { + "BriefDescription": "C7 residency percent per core", + "MetricExpr": "(cstate_core@c7\\-residency@ / msr@tsc@) * 100", + "MetricGroup": "Power", + "MetricName": "C7_Core_Residency" + }, + { + "BriefDescription": "C2 residency percent per package", + "MetricExpr": "(cstate_pkg@c2\\-residency@ / msr@tsc@) * 100", + "MetricGroup": "Power", + "MetricName": "C2_Pkg_Residency" + }, + { + "BriefDescription": "C3 residency percent per package", + "MetricExpr": "(cstate_pkg@c3\\-residency@ / msr@tsc@) * 100", + "MetricGroup": "Power", + "MetricName": "C3_Pkg_Residency" + }, + { + "BriefDescription": "C6 residency percent per package", + "MetricExpr": "(cstate_pkg@c6\\-residency@ / msr@tsc@) * 100", + "MetricGroup": "Power", + "MetricName": "C6_Pkg_Residency" + }, + { + "BriefDescription": "C7 residency percent per package", + "MetricExpr": "(cstate_pkg@c7\\-residency@ / msr@tsc@) * 100", + "MetricGroup": "Power", + "MetricName": "C7_Pkg_Residency" + } +] -- cgit v1.2.3 From 8853d2de0efe25572d3a7033bbb87c9b1208391e Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Sun, 23 Jul 2017 21:51:25 -0700 Subject: perf vendor events: Add JSON metrics for Ivy Bridge Add JSON metrics for Ivy Bridge. Signed-off-by: Andi Kleen Cc: Jiri Olsa Link: http://lkml.kernel.org/r/20170908180133.GA20128@tassilo.jf.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- .../pmu-events/arch/x86/ivybridge/ivb-metrics.json | 164 +++++++++++++++++++++ 1 file changed, 164 insertions(+) create mode 100644 tools/perf/pmu-events/arch/x86/ivybridge/ivb-metrics.json (limited to 'tools/perf') diff --git a/tools/perf/pmu-events/arch/x86/ivybridge/ivb-metrics.json b/tools/perf/pmu-events/arch/x86/ivybridge/ivb-metrics.json new file mode 100644 index 000000000000..057a832f1a6a --- /dev/null +++ b/tools/perf/pmu-events/arch/x86/ivybridge/ivb-metrics.json @@ -0,0 +1,164 @@ +[ + { + "BriefDescription": "Instructions Per Cycle (per logical thread)", + "MetricExpr": "INST_RETIRED.ANY / CPU_CLK_UNHALTED.THREAD", + "MetricGroup": "TopDownL1", + "MetricName": "IPC" + }, + { + "BriefDescription": "Uops Per Instruction", + "MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY", + "MetricGroup": "Pipeline", + "MetricName": "UPI" + }, + { + "BriefDescription": "Rough Estimation of fraction of fetched lines bytes that were likely consumed by program instructions", + "MetricExpr": "min( 1 , UOPS_ISSUED.ANY / ( UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY * 32 * ( ICACHE.HIT + ICACHE.MISSES ) / 4) )", + "MetricGroup": "Frontend", + "MetricName": "IFetch_Line_Utilization" + }, + { + "BriefDescription": "Fraction of Uops delivered by the DSB (aka Decoded Icache; or Uop Cache)", + "MetricExpr": "IDQ.DSB_UOPS / ( IDQ.DSB_UOPS + LSD.UOPS + IDQ.MITE_UOPS + IDQ.MS_UOPS )", + "MetricGroup": "DSB; Frontend_Bandwidth", + "MetricName": "DSB_Coverage" + }, + { + "BriefDescription": "Cycles Per Instruction (threaded)", + "MetricExpr": "1 / INST_RETIRED.ANY / cycles", + "MetricGroup": "Pipeline;Summary", + "MetricName": "CPI" + }, + { + "BriefDescription": "Per-thread actual clocks when the logical processor is active. This is called 'Clockticks' in VTune.", + "MetricExpr": "CPU_CLK_UNHALTED.THREAD", + "MetricGroup": "Summary", + "MetricName": "CLKS" + }, + { + "BriefDescription": "Total issue-pipeline slots", + "MetricExpr": "4*( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles", + "MetricGroup": "TopDownL1", + "MetricName": "SLOTS" + }, + { + "BriefDescription": "Total number of retired Instructions", + "MetricExpr": "INST_RETIRED.ANY", + "MetricGroup": "Summary", + "MetricName": "Instructions" + }, + { + "BriefDescription": "Instructions Per Cycle (per physical core)", + "MetricExpr": "INST_RETIRED.ANY / ( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles", + "MetricGroup": "SMT", + "MetricName": "CoreIPC" + }, + { + "BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is at least 1 uop executed)", + "MetricExpr": "UOPS_EXECUTED.THREAD / ( cpu@uops_executed.core\\,cmask\\=1@ / 2) if #SMT_on else UOPS_EXECUTED.CYCLES_GE_1_UOP_EXEC", + "MetricGroup": "Pipeline;Ports_Utilization", + "MetricName": "ILP" + }, + { + "BriefDescription": "Average Branch Address Clear Cost (fraction of cycles)", + "MetricExpr": "2* ( RS_EVENTS.EMPTY_CYCLES - ICACHE.IFETCH_STALL ) / RS_EVENTS.EMPTY_END", + "MetricGroup": "Unknown_Branches", + "MetricName": "BAClear_Cost" + }, + { + "BriefDescription": "Core actual clocks when any thread is active on the physical core", + "MetricExpr": "( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else CPU_CLK_UNHALTED.THREAD", + "MetricGroup": "SMT", + "MetricName": "CORE_CLKS" + }, + { + "BriefDescription": "Actual Average Latency for L1 data-cache miss demand loads", + "MetricExpr": "L1D_PEND_MISS.PENDING / ( MEM_LOAD_UOPS_RETIRED.L1_MISS + mem_load_uops_retired.hit_lfb )", + "MetricGroup": "Memory_Bound;Memory_Lat", + "MetricName": "Load_Miss_Real_Latency" + }, + { + "BriefDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least 1 such miss)", + "MetricExpr": "L1D_PEND_MISS.PENDING / ( cpu@l1d_pend_miss.pending_cycles\\,any\\=1@ / 2) if #SMT_on else L1D_PEND_MISS.PENDING_CYCLES", + "MetricGroup": "Memory_Bound;Memory_BW", + "MetricName": "MLP" + }, + { + "BriefDescription": "Utilization of the core's Page Walker(s) serving STLB misses triggered by instruction/Load/Store accesses", + "MetricExpr": "( ITLB_MISSES.WALK_DURATION + DTLB_LOAD_MISSES.WALK_DURATION + DTLB_STORE_MISSES.WALK_DURATION ) / ( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles", + "MetricGroup": "TLB", + "MetricName": "Page_Walks_Utilization" + }, + { + "BriefDescription": "Average CPU Utilization", + "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / msr@tsc@", + "MetricGroup": "Summary", + "MetricName": "CPU_Utilization" + }, + { + "BriefDescription": "Giga Floating Point Operations Per Second", + "MetricExpr": "( 1*( FP_COMP_OPS_EXE.SSE_SCALAR_SINGLE + FP_COMP_OPS_EXE.SSE_SCALAR_DOUBLE ) + 2* FP_COMP_OPS_EXE.SSE_PACKED_DOUBLE + 4*( FP_COMP_OPS_EXE.SSE_PACKED_SINGLE + SIMD_FP_256.PACKED_DOUBLE ) + 8* SIMD_FP_256.PACKED_SINGLE ) / 1000000000 / duration_time", + "MetricGroup": "FLOPS;Summary", + "MetricName": "GFLOPs" + }, + { + "BriefDescription": "Average Frequency Utilization relative nominal frequency", + "MetricExpr": "CPU_CLK_UNHALTED.THREAD / CPU_CLK_UNHALTED.REF_TSC", + "MetricGroup": "Power", + "MetricName": "Turbo_Utilization" + }, + { + "BriefDescription": "Fraction of cycles where both hardware threads were active", + "MetricExpr": "1 - CPU_CLK_THREAD_UNHALTED.ONE_THREAD_ACTIVE / ( CPU_CLK_THREAD_UNHALTED.REF_XCLK_ANY / 2 ) if #SMT_on else 0", + "MetricGroup": "SMT;Summary", + "MetricName": "SMT_2T_Utilization" + }, + { + "BriefDescription": "Fraction of cycles spent in Kernel mode", + "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC:u / CPU_CLK_UNHALTED.REF_TSC", + "MetricGroup": "Summary", + "MetricName": "Kernel_Utilization" + }, + { + "BriefDescription": "C3 residency percent per core", + "MetricExpr": "(cstate_core@c3\\-residency@ / msr@tsc@) * 100", + "MetricGroup": "Power", + "MetricName": "C3_Core_Residency" + }, + { + "BriefDescription": "C6 residency percent per core", + "MetricExpr": "(cstate_core@c6\\-residency@ / msr@tsc@) * 100", + "MetricGroup": "Power", + "MetricName": "C6_Core_Residency" + }, + { + "BriefDescription": "C7 residency percent per core", + "MetricExpr": "(cstate_core@c7\\-residency@ / msr@tsc@) * 100", + "MetricGroup": "Power", + "MetricName": "C7_Core_Residency" + }, + { + "BriefDescription": "C2 residency percent per package", + "MetricExpr": "(cstate_pkg@c2\\-residency@ / msr@tsc@) * 100", + "MetricGroup": "Power", + "MetricName": "C2_Pkg_Residency" + }, + { + "BriefDescription": "C3 residency percent per package", + "MetricExpr": "(cstate_pkg@c3\\-residency@ / msr@tsc@) * 100", + "MetricGroup": "Power", + "MetricName": "C3_Pkg_Residency" + }, + { + "BriefDescription": "C6 residency percent per package", + "MetricExpr": "(cstate_pkg@c6\\-residency@ / msr@tsc@) * 100", + "MetricGroup": "Power", + "MetricName": "C6_Pkg_Residency" + }, + { + "BriefDescription": "C7 residency percent per package", + "MetricExpr": "(cstate_pkg@c7\\-residency@ / msr@tsc@) * 100", + "MetricGroup": "Power", + "MetricName": "C7_Pkg_Residency" + } +] -- cgit v1.2.3 From 2099f51d1851c8955ed3406683be0b961c7792cb Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Sun, 23 Jul 2017 21:51:47 -0700 Subject: perf vendor events: Add JSON metrics for Haswell Add JSON metrics for Haswell. Signed-off-by: Andi Kleen Cc: Jiri Olsa Link: http://lkml.kernel.org/r/20170908180133.GA20128@tassilo.jf.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- .../pmu-events/arch/x86/haswell/hsw-metrics.json | 158 +++++++++++++++++++++ 1 file changed, 158 insertions(+) create mode 100644 tools/perf/pmu-events/arch/x86/haswell/hsw-metrics.json (limited to 'tools/perf') diff --git a/tools/perf/pmu-events/arch/x86/haswell/hsw-metrics.json b/tools/perf/pmu-events/arch/x86/haswell/hsw-metrics.json new file mode 100644 index 000000000000..03d894988f0f --- /dev/null +++ b/tools/perf/pmu-events/arch/x86/haswell/hsw-metrics.json @@ -0,0 +1,158 @@ +[ + { + "BriefDescription": "Instructions Per Cycle (per logical thread)", + "MetricExpr": "INST_RETIRED.ANY / CPU_CLK_UNHALTED.THREAD", + "MetricGroup": "TopDownL1", + "MetricName": "IPC" + }, + { + "BriefDescription": "Uops Per Instruction", + "MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY", + "MetricGroup": "Pipeline", + "MetricName": "UPI" + }, + { + "BriefDescription": "Rough Estimation of fraction of fetched lines bytes that were likely consumed by program instructions", + "MetricExpr": "min( 1 , IDQ.MITE_UOPS / ( UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY * 16 * ( ICACHE.HIT + ICACHE.MISSES ) / 4.0 ) )", + "MetricGroup": "Frontend", + "MetricName": "IFetch_Line_Utilization" + }, + { + "BriefDescription": "Fraction of Uops delivered by the DSB (aka Decoded Icache; or Uop Cache)", + "MetricExpr": "IDQ.DSB_UOPS / ( IDQ.DSB_UOPS + LSD.UOPS + IDQ.MITE_UOPS + IDQ.MS_UOPS )", + "MetricGroup": "DSB; Frontend_Bandwidth", + "MetricName": "DSB_Coverage" + }, + { + "BriefDescription": "Cycles Per Instruction (threaded)", + "MetricExpr": "1 / INST_RETIRED.ANY / cycles", + "MetricGroup": "Pipeline;Summary", + "MetricName": "CPI" + }, + { + "BriefDescription": "Per-thread actual clocks when the logical processor is active. This is called 'Clockticks' in VTune.", + "MetricExpr": "CPU_CLK_UNHALTED.THREAD", + "MetricGroup": "Summary", + "MetricName": "CLKS" + }, + { + "BriefDescription": "Total issue-pipeline slots", + "MetricExpr": "4*( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles", + "MetricGroup": "TopDownL1", + "MetricName": "SLOTS" + }, + { + "BriefDescription": "Total number of retired Instructions", + "MetricExpr": "INST_RETIRED.ANY", + "MetricGroup": "Summary", + "MetricName": "Instructions" + }, + { + "BriefDescription": "Instructions Per Cycle (per physical core)", + "MetricExpr": "INST_RETIRED.ANY / ( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles", + "MetricGroup": "SMT", + "MetricName": "CoreIPC" + }, + { + "BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is at least 1 uop executed)", + "MetricExpr": "( UOPS_EXECUTED.CORE / 2 / ( cpu@uops_executed.core\\,cmask\\=1@ / 2)) if #SMT_on else (UOPS_EXECUTED.CORE / cpu@uops_executed.core\\,cmask\\=1@)", + "MetricGroup": "Pipeline;Ports_Utilization", + "MetricName": "ILP" + }, + { + "BriefDescription": "Average Branch Address Clear Cost (fraction of cycles)", + "MetricExpr": "2* ( RS_EVENTS.EMPTY_CYCLES - ICACHE.IFDATA_STALL - ( 14 * ITLB_MISSES.STLB_HIT + ITLB_MISSES.WALK_DURATION ) ) / RS_EVENTS.EMPTY_END", + "MetricGroup": "Unknown_Branches", + "MetricName": "BAClear_Cost" + }, + { + "BriefDescription": "Core actual clocks when any thread is active on the physical core", + "MetricExpr": "( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else CPU_CLK_UNHALTED.THREAD", + "MetricGroup": "SMT", + "MetricName": "CORE_CLKS" + }, + { + "BriefDescription": "Actual Average Latency for L1 data-cache miss demand loads", + "MetricExpr": "L1D_PEND_MISS.PENDING / ( MEM_LOAD_UOPS_RETIRED.L1_MISS + mem_load_uops_retired.hit_lfb )", + "MetricGroup": "Memory_Bound;Memory_Lat", + "MetricName": "Load_Miss_Real_Latency" + }, + { + "BriefDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least 1 such miss)", + "MetricExpr": "L1D_PEND_MISS.PENDING / ( cpu@l1d_pend_miss.pending_cycles\\,any\\=1@ / 2) if #SMT_on else L1D_PEND_MISS.PENDING_CYCLES", + "MetricGroup": "Memory_Bound;Memory_BW", + "MetricName": "MLP" + }, + { + "BriefDescription": "Utilization of the core's Page Walker(s) serving STLB misses triggered by instruction/Load/Store accesses", + "MetricExpr": "( ITLB_MISSES.WALK_DURATION + DTLB_LOAD_MISSES.WALK_DURATION + DTLB_STORE_MISSES.WALK_DURATION ) / ( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles", + "MetricGroup": "TLB", + "MetricName": "Page_Walks_Utilization" + }, + { + "BriefDescription": "Average CPU Utilization", + "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / msr@tsc@", + "MetricGroup": "Summary", + "MetricName": "CPU_Utilization" + }, + { + "BriefDescription": "Average Frequency Utilization relative nominal frequency", + "MetricExpr": "CPU_CLK_UNHALTED.THREAD / CPU_CLK_UNHALTED.REF_TSC", + "MetricGroup": "Power", + "MetricName": "Turbo_Utilization" + }, + { + "BriefDescription": "Fraction of cycles where both hardware threads were active", + "MetricExpr": "1 - CPU_CLK_THREAD_UNHALTED.ONE_THREAD_ACTIVE / ( CPU_CLK_THREAD_UNHALTED.REF_XCLK_ANY / 2 ) if #SMT_on else 0", + "MetricGroup": "SMT;Summary", + "MetricName": "SMT_2T_Utilization" + }, + { + "BriefDescription": "Fraction of cycles spent in Kernel mode", + "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC:u / CPU_CLK_UNHALTED.REF_TSC", + "MetricGroup": "Summary", + "MetricName": "Kernel_Utilization" + }, + { + "BriefDescription": "C3 residency percent per core", + "MetricExpr": "(cstate_core@c3\\-residency@ / msr@tsc@) * 100", + "MetricGroup": "Power", + "MetricName": "C3_Core_Residency" + }, + { + "BriefDescription": "C6 residency percent per core", + "MetricExpr": "(cstate_core@c6\\-residency@ / msr@tsc@) * 100", + "MetricGroup": "Power", + "MetricName": "C6_Core_Residency" + }, + { + "BriefDescription": "C7 residency percent per core", + "MetricExpr": "(cstate_core@c7\\-residency@ / msr@tsc@) * 100", + "MetricGroup": "Power", + "MetricName": "C7_Core_Residency" + }, + { + "BriefDescription": "C2 residency percent per package", + "MetricExpr": "(cstate_pkg@c2\\-residency@ / msr@tsc@) * 100", + "MetricGroup": "Power", + "MetricName": "C2_Pkg_Residency" + }, + { + "BriefDescription": "C3 residency percent per package", + "MetricExpr": "(cstate_pkg@c3\\-residency@ / msr@tsc@) * 100", + "MetricGroup": "Power", + "MetricName": "C3_Pkg_Residency" + }, + { + "BriefDescription": "C6 residency percent per package", + "MetricExpr": "(cstate_pkg@c6\\-residency@ / msr@tsc@) * 100", + "MetricGroup": "Power", + "MetricName": "C6_Pkg_Residency" + }, + { + "BriefDescription": "C7 residency percent per package", + "MetricExpr": "(cstate_pkg@c7\\-residency@ / msr@tsc@) * 100", + "MetricGroup": "Power", + "MetricName": "C7_Pkg_Residency" + } +] -- cgit v1.2.3 From 43fd36a19d501483d5ec243dd28268646f717cde Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Sun, 23 Jul 2017 21:55:16 -0700 Subject: perf vendor events: Add JSON metrics for Ivy Town Add JSON metrics for Ivy Town. Signed-off-by: Andi Kleen Cc: Jiri Olsa Link: http://lkml.kernel.org/r/20170908180133.GA20128@tassilo.jf.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- .../pmu-events/arch/x86/ivytown/ivt-metrics.json | 164 +++++++++++++++++++++ 1 file changed, 164 insertions(+) create mode 100644 tools/perf/pmu-events/arch/x86/ivytown/ivt-metrics.json (limited to 'tools/perf') diff --git a/tools/perf/pmu-events/arch/x86/ivytown/ivt-metrics.json b/tools/perf/pmu-events/arch/x86/ivytown/ivt-metrics.json new file mode 100644 index 000000000000..057a832f1a6a --- /dev/null +++ b/tools/perf/pmu-events/arch/x86/ivytown/ivt-metrics.json @@ -0,0 +1,164 @@ +[ + { + "BriefDescription": "Instructions Per Cycle (per logical thread)", + "MetricExpr": "INST_RETIRED.ANY / CPU_CLK_UNHALTED.THREAD", + "MetricGroup": "TopDownL1", + "MetricName": "IPC" + }, + { + "BriefDescription": "Uops Per Instruction", + "MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY", + "MetricGroup": "Pipeline", + "MetricName": "UPI" + }, + { + "BriefDescription": "Rough Estimation of fraction of fetched lines bytes that were likely consumed by program instructions", + "MetricExpr": "min( 1 , UOPS_ISSUED.ANY / ( UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY * 32 * ( ICACHE.HIT + ICACHE.MISSES ) / 4) )", + "MetricGroup": "Frontend", + "MetricName": "IFetch_Line_Utilization" + }, + { + "BriefDescription": "Fraction of Uops delivered by the DSB (aka Decoded Icache; or Uop Cache)", + "MetricExpr": "IDQ.DSB_UOPS / ( IDQ.DSB_UOPS + LSD.UOPS + IDQ.MITE_UOPS + IDQ.MS_UOPS )", + "MetricGroup": "DSB; Frontend_Bandwidth", + "MetricName": "DSB_Coverage" + }, + { + "BriefDescription": "Cycles Per Instruction (threaded)", + "MetricExpr": "1 / INST_RETIRED.ANY / cycles", + "MetricGroup": "Pipeline;Summary", + "MetricName": "CPI" + }, + { + "BriefDescription": "Per-thread actual clocks when the logical processor is active. This is called 'Clockticks' in VTune.", + "MetricExpr": "CPU_CLK_UNHALTED.THREAD", + "MetricGroup": "Summary", + "MetricName": "CLKS" + }, + { + "BriefDescription": "Total issue-pipeline slots", + "MetricExpr": "4*( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles", + "MetricGroup": "TopDownL1", + "MetricName": "SLOTS" + }, + { + "BriefDescription": "Total number of retired Instructions", + "MetricExpr": "INST_RETIRED.ANY", + "MetricGroup": "Summary", + "MetricName": "Instructions" + }, + { + "BriefDescription": "Instructions Per Cycle (per physical core)", + "MetricExpr": "INST_RETIRED.ANY / ( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles", + "MetricGroup": "SMT", + "MetricName": "CoreIPC" + }, + { + "BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is at least 1 uop executed)", + "MetricExpr": "UOPS_EXECUTED.THREAD / ( cpu@uops_executed.core\\,cmask\\=1@ / 2) if #SMT_on else UOPS_EXECUTED.CYCLES_GE_1_UOP_EXEC", + "MetricGroup": "Pipeline;Ports_Utilization", + "MetricName": "ILP" + }, + { + "BriefDescription": "Average Branch Address Clear Cost (fraction of cycles)", + "MetricExpr": "2* ( RS_EVENTS.EMPTY_CYCLES - ICACHE.IFETCH_STALL ) / RS_EVENTS.EMPTY_END", + "MetricGroup": "Unknown_Branches", + "MetricName": "BAClear_Cost" + }, + { + "BriefDescription": "Core actual clocks when any thread is active on the physical core", + "MetricExpr": "( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else CPU_CLK_UNHALTED.THREAD", + "MetricGroup": "SMT", + "MetricName": "CORE_CLKS" + }, + { + "BriefDescription": "Actual Average Latency for L1 data-cache miss demand loads", + "MetricExpr": "L1D_PEND_MISS.PENDING / ( MEM_LOAD_UOPS_RETIRED.L1_MISS + mem_load_uops_retired.hit_lfb )", + "MetricGroup": "Memory_Bound;Memory_Lat", + "MetricName": "Load_Miss_Real_Latency" + }, + { + "BriefDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least 1 such miss)", + "MetricExpr": "L1D_PEND_MISS.PENDING / ( cpu@l1d_pend_miss.pending_cycles\\,any\\=1@ / 2) if #SMT_on else L1D_PEND_MISS.PENDING_CYCLES", + "MetricGroup": "Memory_Bound;Memory_BW", + "MetricName": "MLP" + }, + { + "BriefDescription": "Utilization of the core's Page Walker(s) serving STLB misses triggered by instruction/Load/Store accesses", + "MetricExpr": "( ITLB_MISSES.WALK_DURATION + DTLB_LOAD_MISSES.WALK_DURATION + DTLB_STORE_MISSES.WALK_DURATION ) / ( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles", + "MetricGroup": "TLB", + "MetricName": "Page_Walks_Utilization" + }, + { + "BriefDescription": "Average CPU Utilization", + "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / msr@tsc@", + "MetricGroup": "Summary", + "MetricName": "CPU_Utilization" + }, + { + "BriefDescription": "Giga Floating Point Operations Per Second", + "MetricExpr": "( 1*( FP_COMP_OPS_EXE.SSE_SCALAR_SINGLE + FP_COMP_OPS_EXE.SSE_SCALAR_DOUBLE ) + 2* FP_COMP_OPS_EXE.SSE_PACKED_DOUBLE + 4*( FP_COMP_OPS_EXE.SSE_PACKED_SINGLE + SIMD_FP_256.PACKED_DOUBLE ) + 8* SIMD_FP_256.PACKED_SINGLE ) / 1000000000 / duration_time", + "MetricGroup": "FLOPS;Summary", + "MetricName": "GFLOPs" + }, + { + "BriefDescription": "Average Frequency Utilization relative nominal frequency", + "MetricExpr": "CPU_CLK_UNHALTED.THREAD / CPU_CLK_UNHALTED.REF_TSC", + "MetricGroup": "Power", + "MetricName": "Turbo_Utilization" + }, + { + "BriefDescription": "Fraction of cycles where both hardware threads were active", + "MetricExpr": "1 - CPU_CLK_THREAD_UNHALTED.ONE_THREAD_ACTIVE / ( CPU_CLK_THREAD_UNHALTED.REF_XCLK_ANY / 2 ) if #SMT_on else 0", + "MetricGroup": "SMT;Summary", + "MetricName": "SMT_2T_Utilization" + }, + { + "BriefDescription": "Fraction of cycles spent in Kernel mode", + "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC:u / CPU_CLK_UNHALTED.REF_TSC", + "MetricGroup": "Summary", + "MetricName": "Kernel_Utilization" + }, + { + "BriefDescription": "C3 residency percent per core", + "MetricExpr": "(cstate_core@c3\\-residency@ / msr@tsc@) * 100", + "MetricGroup": "Power", + "MetricName": "C3_Core_Residency" + }, + { + "BriefDescription": "C6 residency percent per core", + "MetricExpr": "(cstate_core@c6\\-residency@ / msr@tsc@) * 100", + "MetricGroup": "Power", + "MetricName": "C6_Core_Residency" + }, + { + "BriefDescription": "C7 residency percent per core", + "MetricExpr": "(cstate_core@c7\\-residency@ / msr@tsc@) * 100", + "MetricGroup": "Power", + "MetricName": "C7_Core_Residency" + }, + { + "BriefDescription": "C2 residency percent per package", + "MetricExpr": "(cstate_pkg@c2\\-residency@ / msr@tsc@) * 100", + "MetricGroup": "Power", + "MetricName": "C2_Pkg_Residency" + }, + { + "BriefDescription": "C3 residency percent per package", + "MetricExpr": "(cstate_pkg@c3\\-residency@ / msr@tsc@) * 100", + "MetricGroup": "Power", + "MetricName": "C3_Pkg_Residency" + }, + { + "BriefDescription": "C6 residency percent per package", + "MetricExpr": "(cstate_pkg@c6\\-residency@ / msr@tsc@) * 100", + "MetricGroup": "Power", + "MetricName": "C6_Pkg_Residency" + }, + { + "BriefDescription": "C7 residency percent per package", + "MetricExpr": "(cstate_pkg@c7\\-residency@ / msr@tsc@) * 100", + "MetricGroup": "Power", + "MetricName": "C7_Pkg_Residency" + } +] -- cgit v1.2.3 From 5e49f7321b06428ac68c51096cff9a4a55c3d4d6 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Sun, 23 Jul 2017 21:55:39 -0700 Subject: perf vendor events: Add JSON metrics for Haswell EP Add JSON metrics for Haswell EP. Signed-off-by: Andi Kleen Cc: Jiri Olsa Link: http://lkml.kernel.org/r/20170908180133.GA20128@tassilo.jf.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- .../pmu-events/arch/x86/haswellx/hsx-metrics.json | 158 +++++++++++++++++++++ 1 file changed, 158 insertions(+) create mode 100644 tools/perf/pmu-events/arch/x86/haswellx/hsx-metrics.json (limited to 'tools/perf') diff --git a/tools/perf/pmu-events/arch/x86/haswellx/hsx-metrics.json b/tools/perf/pmu-events/arch/x86/haswellx/hsx-metrics.json new file mode 100644 index 000000000000..0bae5eda9fb3 --- /dev/null +++ b/tools/perf/pmu-events/arch/x86/haswellx/hsx-metrics.json @@ -0,0 +1,158 @@ +[ + { + "BriefDescription": "Instructions Per Cycle (per logical thread)", + "MetricExpr": "INST_RETIRED.ANY / CPU_CLK_UNHALTED.THREAD", + "MetricGroup": "TopDownL1", + "MetricName": "IPC" + }, + { + "BriefDescription": "Uops Per Instruction", + "MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY", + "MetricGroup": "Pipeline", + "MetricName": "UPI" + }, + { + "BriefDescription": "Rough Estimation of fraction of fetched lines bytes that were likely consumed by program instructions", + "MetricExpr": "min( 1 , IDQ.MITE_UOPS / ( UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY * 16 * ( ICACHE.HIT + ICACHE.MISSES ) / 4.0 ) )", + "MetricGroup": "Frontend", + "MetricName": "IFetch_Line_Utilization" + }, + { + "BriefDescription": "Fraction of Uops delivered by the DSB (aka Decoded Icache; or Uop Cache)", + "MetricExpr": "IDQ.DSB_UOPS / ( IDQ.DSB_UOPS + LSD.UOPS + IDQ.MITE_UOPS + IDQ.MS_UOPS )", + "MetricGroup": "DSB; Frontend_Bandwidth", + "MetricName": "DSB_Coverage" + }, + { + "BriefDescription": "Cycles Per Instruction (threaded)", + "MetricExpr": "1 / INST_RETIRED.ANY / cycles", + "MetricGroup": "Pipeline;Summary", + "MetricName": "CPI" + }, + { + "BriefDescription": "Per-thread actual clocks when the logical processor is active. This is called 'Clockticks' in VTune.", + "MetricExpr": "CPU_CLK_UNHALTED.THREAD", + "MetricGroup": "Summary", + "MetricName": "CLKS" + }, + { + "BriefDescription": "Total issue-pipeline slots", + "MetricExpr": "4*( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles", + "MetricGroup": "TopDownL1", + "MetricName": "SLOTS" + }, + { + "BriefDescription": "Total number of retired Instructions", + "MetricExpr": "INST_RETIRED.ANY", + "MetricGroup": "Summary", + "MetricName": "Instructions" + }, + { + "BriefDescription": "Instructions Per Cycle (per physical core)", + "MetricExpr": "INST_RETIRED.ANY / ( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles", + "MetricGroup": "SMT", + "MetricName": "CoreIPC" + }, + { + "BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is at least 1 uop executed)", + "MetricExpr": "( UOPS_EXECUTED.CORE / 2 / ( cpu@uops_executed.core\\,cmask\\=1@ / 2)) if #SMT_on else UOPS_EXECUTED.CORE / cpu@uops_executed.core\\,cmask\\=1@", + "MetricGroup": "Pipeline;Ports_Utilization", + "MetricName": "ILP" + }, + { + "BriefDescription": "Average Branch Address Clear Cost (fraction of cycles)", + "MetricExpr": "2* ( RS_EVENTS.EMPTY_CYCLES - ICACHE.IFDATA_STALL - ( 14 * ITLB_MISSES.STLB_HIT + ITLB_MISSES.WALK_DURATION ) ) / RS_EVENTS.EMPTY_END", + "MetricGroup": "Unknown_Branches", + "MetricName": "BAClear_Cost" + }, + { + "BriefDescription": "Core actual clocks when any thread is active on the physical core", + "MetricExpr": "( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else CPU_CLK_UNHALTED.THREAD", + "MetricGroup": "SMT", + "MetricName": "CORE_CLKS" + }, + { + "BriefDescription": "Actual Average Latency for L1 data-cache miss demand loads", + "MetricExpr": "L1D_PEND_MISS.PENDING / ( MEM_LOAD_UOPS_RETIRED.L1_MISS + mem_load_uops_retired.hit_lfb )", + "MetricGroup": "Memory_Bound;Memory_Lat", + "MetricName": "Load_Miss_Real_Latency" + }, + { + "BriefDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least 1 such miss)", + "MetricExpr": "L1D_PEND_MISS.PENDING / ( cpu@l1d_pend_miss.pending_cycles\\,any\\=1@ / 2) if #SMT_on else L1D_PEND_MISS.PENDING_CYCLES", + "MetricGroup": "Memory_Bound;Memory_BW", + "MetricName": "MLP" + }, + { + "BriefDescription": "Utilization of the core's Page Walker(s) serving STLB misses triggered by instruction/Load/Store accesses", + "MetricExpr": "( ITLB_MISSES.WALK_DURATION + DTLB_LOAD_MISSES.WALK_DURATION + DTLB_STORE_MISSES.WALK_DURATION ) / ( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles", + "MetricGroup": "TLB", + "MetricName": "Page_Walks_Utilization" + }, + { + "BriefDescription": "Average CPU Utilization", + "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / msr@tsc@", + "MetricGroup": "Summary", + "MetricName": "CPU_Utilization" + }, + { + "BriefDescription": "Average Frequency Utilization relative nominal frequency", + "MetricExpr": "CPU_CLK_UNHALTED.THREAD / CPU_CLK_UNHALTED.REF_TSC", + "MetricGroup": "Power", + "MetricName": "Turbo_Utilization" + }, + { + "BriefDescription": "Fraction of cycles where both hardware threads were active", + "MetricExpr": "1 - CPU_CLK_THREAD_UNHALTED.ONE_THREAD_ACTIVE / ( CPU_CLK_THREAD_UNHALTED.REF_XCLK_ANY / 2 ) if #SMT_on else 0", + "MetricGroup": "SMT;Summary", + "MetricName": "SMT_2T_Utilization" + }, + { + "BriefDescription": "Fraction of cycles spent in Kernel mode", + "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC:u / CPU_CLK_UNHALTED.REF_TSC", + "MetricGroup": "Summary", + "MetricName": "Kernel_Utilization" + }, + { + "BriefDescription": "C3 residency percent per core", + "MetricExpr": "(cstate_core@c3\\-residency@ / msr@tsc@) * 100", + "MetricGroup": "Power", + "MetricName": "C3_Core_Residency" + }, + { + "BriefDescription": "C6 residency percent per core", + "MetricExpr": "(cstate_core@c6\\-residency@ / msr@tsc@) * 100", + "MetricGroup": "Power", + "MetricName": "C6_Core_Residency" + }, + { + "BriefDescription": "C7 residency percent per core", + "MetricExpr": "(cstate_core@c7\\-residency@ / msr@tsc@) * 100", + "MetricGroup": "Power", + "MetricName": "C7_Core_Residency" + }, + { + "BriefDescription": "C2 residency percent per package", + "MetricExpr": "(cstate_pkg@c2\\-residency@ / msr@tsc@) * 100", + "MetricGroup": "Power", + "MetricName": "C2_Pkg_Residency" + }, + { + "BriefDescription": "C3 residency percent per package", + "MetricExpr": "(cstate_pkg@c3\\-residency@ / msr@tsc@) * 100", + "MetricGroup": "Power", + "MetricName": "C3_Pkg_Residency" + }, + { + "BriefDescription": "C6 residency percent per package", + "MetricExpr": "(cstate_pkg@c6\\-residency@ / msr@tsc@) * 100", + "MetricGroup": "Power", + "MetricName": "C6_Pkg_Residency" + }, + { + "BriefDescription": "C7 residency percent per package", + "MetricExpr": "(cstate_pkg@c7\\-residency@ / msr@tsc@) * 100", + "MetricGroup": "Power", + "MetricName": "C7_Pkg_Residency" + } +] -- cgit v1.2.3 From 6d75abd3e84596933aa2ca91751744271cfec6cb Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Sun, 23 Jul 2017 21:55:59 -0700 Subject: perf vendor events: Add JSON metrics for Broadwell Server Add JSON metrics for Broadwell Server. Signed-off-by: Andi Kleen Cc: Jiri Olsa Link: http://lkml.kernel.org/r/20170908180133.GA20128@tassilo.jf.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- .../arch/x86/broadwellx/bdx-metrics.json | 164 +++++++++++++++++++++ 1 file changed, 164 insertions(+) create mode 100644 tools/perf/pmu-events/arch/x86/broadwellx/bdx-metrics.json (limited to 'tools/perf') diff --git a/tools/perf/pmu-events/arch/x86/broadwellx/bdx-metrics.json b/tools/perf/pmu-events/arch/x86/broadwellx/bdx-metrics.json new file mode 100644 index 000000000000..4248b029d199 --- /dev/null +++ b/tools/perf/pmu-events/arch/x86/broadwellx/bdx-metrics.json @@ -0,0 +1,164 @@ +[ + { + "BriefDescription": "Instructions Per Cycle (per logical thread)", + "MetricExpr": "INST_RETIRED.ANY / CPU_CLK_UNHALTED.THREAD", + "MetricGroup": "TopDownL1", + "MetricName": "IPC" + }, + { + "BriefDescription": "Uops Per Instruction", + "MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY", + "MetricGroup": "Pipeline", + "MetricName": "UPI" + }, + { + "BriefDescription": "Rough Estimation of fraction of fetched lines bytes that were likely consumed by program instructions", + "MetricExpr": "min( 1 , IDQ.MITE_UOPS / ( UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY * 16 * ( ICACHE.HIT + ICACHE.MISSES ) / 4.0 ) )", + "MetricGroup": "Frontend", + "MetricName": "IFetch_Line_Utilization" + }, + { + "BriefDescription": "Fraction of Uops delivered by the DSB (aka Decoded Icache; or Uop Cache)", + "MetricExpr": "IDQ.DSB_UOPS / ( IDQ.DSB_UOPS + LSD.UOPS + IDQ.MITE_UOPS + IDQ.MS_UOPS )", + "MetricGroup": "DSB; Frontend_Bandwidth", + "MetricName": "DSB_Coverage" + }, + { + "BriefDescription": "Cycles Per Instruction (threaded)", + "MetricExpr": "1 / INST_RETIRED.ANY / cycles", + "MetricGroup": "Pipeline;Summary", + "MetricName": "CPI" + }, + { + "BriefDescription": "Per-thread actual clocks when the logical processor is active. This is called 'Clockticks' in VTune.", + "MetricExpr": "CPU_CLK_UNHALTED.THREAD", + "MetricGroup": "Summary", + "MetricName": "CLKS" + }, + { + "BriefDescription": "Total issue-pipeline slots", + "MetricExpr": "4*( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles", + "MetricGroup": "TopDownL1", + "MetricName": "SLOTS" + }, + { + "BriefDescription": "Total number of retired Instructions", + "MetricExpr": "INST_RETIRED.ANY", + "MetricGroup": "Summary", + "MetricName": "Instructions" + }, + { + "BriefDescription": "Instructions Per Cycle (per physical core)", + "MetricExpr": "INST_RETIRED.ANY / ( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles", + "MetricGroup": "SMT", + "MetricName": "CoreIPC" + }, + { + "BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is at least 1 uop executed)", + "MetricExpr": "UOPS_EXECUTED.THREAD / ( cpu@uops_executed.core\\,cmask\\=1@ / 2) if #SMT_on else UOPS_EXECUTED.CYCLES_GE_1_UOP_EXEC", + "MetricGroup": "Pipeline;Ports_Utilization", + "MetricName": "ILP" + }, + { + "BriefDescription": "Average Branch Address Clear Cost (fraction of cycles)", + "MetricExpr": "2* ( RS_EVENTS.EMPTY_CYCLES - ICACHE.IFDATA_STALL - ( 14 * ITLB_MISSES.STLB_HIT + cpu@ITLB_MISSES.WALK_DURATION\\,cmask\\=1@ + 7* ITLB_MISSES.WALK_COMPLETED ) ) / RS_EVENTS.EMPTY_END", + "MetricGroup": "Unknown_Branches", + "MetricName": "BAClear_Cost" + }, + { + "BriefDescription": "Core actual clocks when any thread is active on the physical core", + "MetricExpr": "( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else CPU_CLK_UNHALTED.THREAD", + "MetricGroup": "SMT", + "MetricName": "CORE_CLKS" + }, + { + "BriefDescription": "Actual Average Latency for L1 data-cache miss demand loads", + "MetricExpr": "L1D_PEND_MISS.PENDING / ( MEM_LOAD_UOPS_RETIRED.L1_MISS + mem_load_uops_retired.hit_lfb )", + "MetricGroup": "Memory_Bound;Memory_Lat", + "MetricName": "Load_Miss_Real_Latency" + }, + { + "BriefDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least 1 such miss)", + "MetricExpr": "L1D_PEND_MISS.PENDING / ( cpu@l1d_pend_miss.pending_cycles\\,any\\=1@ / 2) if #SMT_on else L1D_PEND_MISS.PENDING_CYCLES", + "MetricGroup": "Memory_Bound;Memory_BW", + "MetricName": "MLP" + }, + { + "BriefDescription": "Utilization of the core's Page Walker(s) serving STLB misses triggered by instruction/Load/Store accesses", + "MetricExpr": "( ITLB_MISSES.WALK_DURATION + DTLB_LOAD_MISSES.WALK_DURATION + DTLB_STORE_MISSES.WALK_DURATION + 7*(DTLB_STORE_MISSES.WALK_COMPLETED+DTLB_LOAD_MISSES.WALK_COMPLETED+ITLB_MISSES.WALK_COMPLETED) ) / (2*( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles)", + "MetricGroup": "TLB", + "MetricName": "Page_Walks_Utilization" + }, + { + "BriefDescription": "Average CPU Utilization", + "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / msr@tsc@", + "MetricGroup": "Summary", + "MetricName": "CPU_Utilization" + }, + { + "BriefDescription": "Giga Floating Point Operations Per Second", + "MetricExpr": "( 1*( FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE ) + 2* FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4*( FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE ) + 8* FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE ) / 1000000000 / duration_time", + "MetricGroup": "FLOPS;Summary", + "MetricName": "GFLOPs" + }, + { + "BriefDescription": "Average Frequency Utilization relative nominal frequency", + "MetricExpr": "CPU_CLK_UNHALTED.THREAD / CPU_CLK_UNHALTED.REF_TSC", + "MetricGroup": "Power", + "MetricName": "Turbo_Utilization" + }, + { + "BriefDescription": "Fraction of cycles where both hardware threads were active", + "MetricExpr": "1 - CPU_CLK_THREAD_UNHALTED.ONE_THREAD_ACTIVE / ( CPU_CLK_THREAD_UNHALTED.REF_XCLK_ANY / 2 ) if #SMT_on else 0", + "MetricGroup": "SMT;Summary", + "MetricName": "SMT_2T_Utilization" + }, + { + "BriefDescription": "Fraction of cycles spent in Kernel mode", + "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC:u / CPU_CLK_UNHALTED.REF_TSC", + "MetricGroup": "Summary", + "MetricName": "Kernel_Utilization" + }, + { + "BriefDescription": "C3 residency percent per core", + "MetricExpr": "(cstate_core@c3\\-residency@ / msr@tsc@) * 100", + "MetricGroup": "Power", + "MetricName": "C3_Core_Residency" + }, + { + "BriefDescription": "C6 residency percent per core", + "MetricExpr": "(cstate_core@c6\\-residency@ / msr@tsc@) * 100", + "MetricGroup": "Power", + "MetricName": "C6_Core_Residency" + }, + { + "BriefDescription": "C7 residency percent per core", + "MetricExpr": "(cstate_core@c7\\-residency@ / msr@tsc@) * 100", + "MetricGroup": "Power", + "MetricName": "C7_Core_Residency" + }, + { + "BriefDescription": "C2 residency percent per package", + "MetricExpr": "(cstate_pkg@c2\\-residency@ / msr@tsc@) * 100", + "MetricGroup": "Power", + "MetricName": "C2_Pkg_Residency" + }, + { + "BriefDescription": "C3 residency percent per package", + "MetricExpr": "(cstate_pkg@c3\\-residency@ / msr@tsc@) * 100", + "MetricGroup": "Power", + "MetricName": "C3_Pkg_Residency" + }, + { + "BriefDescription": "C6 residency percent per package", + "MetricExpr": "(cstate_pkg@c6\\-residency@ / msr@tsc@) * 100", + "MetricGroup": "Power", + "MetricName": "C6_Pkg_Residency" + }, + { + "BriefDescription": "C7 residency percent per package", + "MetricExpr": "(cstate_pkg@c7\\-residency@ / msr@tsc@) * 100", + "MetricGroup": "Power", + "MetricName": "C7_Pkg_Residency" + } +] -- cgit v1.2.3 From 69e932139db1d23e978256652dbb179d6ed75204 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Sun, 23 Jul 2017 22:00:42 -0700 Subject: perf vendor events: Add JSON metrics for Broadwell DE Add JSON metrics for Broadwell DE Signed-off-by: Andi Kleen Cc: Jiri Olsa Link: http://lkml.kernel.org/r/20170908180133.GA20128@tassilo.jf.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- .../arch/x86/broadwellde/bdwde-metrics.json | 164 +++++++++++++++++++++ 1 file changed, 164 insertions(+) create mode 100644 tools/perf/pmu-events/arch/x86/broadwellde/bdwde-metrics.json (limited to 'tools/perf') diff --git a/tools/perf/pmu-events/arch/x86/broadwellde/bdwde-metrics.json b/tools/perf/pmu-events/arch/x86/broadwellde/bdwde-metrics.json new file mode 100644 index 000000000000..49c5f123d811 --- /dev/null +++ b/tools/perf/pmu-events/arch/x86/broadwellde/bdwde-metrics.json @@ -0,0 +1,164 @@ +[ + { + "BriefDescription": "Instructions Per Cycle (per logical thread)", + "MetricExpr": "INST_RETIRED.ANY / CPU_CLK_UNHALTED.THREAD", + "MetricGroup": "TopDownL1", + "MetricName": "IPC" + }, + { + "BriefDescription": "Uops Per Instruction", + "MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY", + "MetricGroup": "Pipeline", + "MetricName": "UPI" + }, + { + "BriefDescription": "Rough Estimation of fraction of fetched lines bytes that were likely consumed by program instructions", + "MetricExpr": "min( 1 , IDQ.MITE_UOPS / ( UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY * 16 * ( ICACHE.HIT + ICACHE.MISSES ) / 4.0 ) )", + "MetricGroup": "Frontend", + "MetricName": "IFetch_Line_Utilization" + }, + { + "BriefDescription": "Fraction of Uops delivered by the DSB (aka Decoded Icache; or Uop Cache)", + "MetricExpr": "IDQ.DSB_UOPS / ( IDQ.DSB_UOPS + LSD.UOPS + IDQ.MITE_UOPS + IDQ.MS_UOPS )", + "MetricGroup": "DSB; Frontend_Bandwidth", + "MetricName": "DSB_Coverage" + }, + { + "BriefDescription": "Cycles Per Instruction (threaded)", + "MetricExpr": "1 / INST_RETIRED.ANY / cycles", + "MetricGroup": "Pipeline;Summary", + "MetricName": "CPI" + }, + { + "BriefDescription": "Per-thread actual clocks when the logical processor is active. This is called 'Clockticks' in VTune.", + "MetricExpr": "CPU_CLK_UNHALTED.THREAD", + "MetricGroup": "Summary", + "MetricName": "CLKS" + }, + { + "BriefDescription": "Total issue-pipeline slots", + "MetricExpr": "4*( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles", + "MetricGroup": "TopDownL1", + "MetricName": "SLOTS" + }, + { + "BriefDescription": "Total number of retired Instructions", + "MetricExpr": "INST_RETIRED.ANY", + "MetricGroup": "Summary", + "MetricName": "Instructions" + }, + { + "BriefDescription": "Instructions Per Cycle (per physical core)", + "MetricExpr": "INST_RETIRED.ANY / ( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles", + "MetricGroup": "SMT", + "MetricName": "CoreIPC" + }, + { + "BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is at least 1 uop executed)", + "MetricExpr": "UOPS_EXECUTED.THREAD / ( cpu@uops_executed.core\\,cmask\\=1@ / 2) if #SMT_on else UOPS_EXECUTED.CYCLES_GE_1_UOP_EXEC", + "MetricGroup": "Pipeline;Ports_Utilization", + "MetricName": "ILP" + }, + { + "BriefDescription": "Average Branch Address Clear Cost (fraction of cycles)", + "MetricExpr": "2* ( RS_EVENTS.EMPTY_CYCLES - ICACHE.IFDATA_STALL - ( 14 * ITLB_MISSES.STLB_HIT + cpu@ITLB_MISSES.WALK_DURATION\\,cmask\\=1@ + 7* ITLB_MISSES.WALK_COMPLETED ) ) / RS_EVENTS.EMPTY_END", + "MetricGroup": "Unknown_Branches", + "MetricName": "BAClear_Cost" + }, + { + "BriefDescription": "Core actual clocks when any thread is active on the physical core", + "MetricExpr": "( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else CPU_CLK_UNHALTED.THREAD", + "MetricGroup": "SMT", + "MetricName": "CORE_CLKS" + }, + { + "BriefDescription": "Actual Average Latency for L1 data-cache miss demand loads", + "MetricExpr": "L1D_PEND_MISS.PENDING / ( MEM_LOAD_UOPS_RETIRED.L1_MISS + mem_load_uops_retired.hit_lfb )", + "MetricGroup": "Memory_Bound;Memory_Lat", + "MetricName": "Load_Miss_Real_Latency" + }, + { + "BriefDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least 1 such miss)", + "MetricExpr": "L1D_PEND_MISS.PENDING / ( cpu@l1d_pend_miss.pending_cycles\\,any\\=1@ / 2) if #SMT_on else L1D_PEND_MISS.PENDING_CYCLES", + "MetricGroup": "Memory_Bound;Memory_BW", + "MetricName": "MLP" + }, + { + "BriefDescription": "Utilization of the core's Page Walker(s) serving STLB misses triggered by instruction/Load/Store accesses", + "MetricExpr": "( cpu@ITLB_MISSES.WALK_DURATION\\,cmask\\=1@ + cpu@DTLB_LOAD_MISSES.WALK_DURATION\\,cmask\\=1@ + cpu@DTLB_STORE_MISSES.WALK_DURATION\\,cmask\\=1@ + 7*(DTLB_STORE_MISSES.WALK_COMPLETED+DTLB_LOAD_MISSES.WALK_COMPLETED+ITLB_MISSES.WALK_COMPLETED)) / ( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles", + "MetricGroup": "TLB", + "MetricName": "Page_Walks_Utilization" + }, + { + "BriefDescription": "Average CPU Utilization", + "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / msr@tsc@", + "MetricGroup": "Summary", + "MetricName": "CPU_Utilization" + }, + { + "BriefDescription": "Giga Floating Point Operations Per Second", + "MetricExpr": "( 1*( FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE ) + 2* FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4*( FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE ) + 8* FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE ) / 1000000000 / duration_time", + "MetricGroup": "FLOPS;Summary", + "MetricName": "GFLOPs" + }, + { + "BriefDescription": "Average Frequency Utilization relative nominal frequency", + "MetricExpr": "CPU_CLK_UNHALTED.THREAD / CPU_CLK_UNHALTED.REF_TSC", + "MetricGroup": "Power", + "MetricName": "Turbo_Utilization" + }, + { + "BriefDescription": "Fraction of cycles where both hardware threads were active", + "MetricExpr": "1 - CPU_CLK_THREAD_UNHALTED.ONE_THREAD_ACTIVE / ( CPU_CLK_THREAD_UNHALTED.REF_XCLK_ANY / 2 ) if #SMT_on else 0", + "MetricGroup": "SMT;Summary", + "MetricName": "SMT_2T_Utilization" + }, + { + "BriefDescription": "Fraction of cycles spent in Kernel mode", + "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC:u / CPU_CLK_UNHALTED.REF_TSC", + "MetricGroup": "Summary", + "MetricName": "Kernel_Utilization" + }, + { + "BriefDescription": "C3 residency percent per core", + "MetricExpr": "(cstate_core@c3\\-residency@ / msr@tsc@) * 100", + "MetricGroup": "Power", + "MetricName": "C3_Core_Residency" + }, + { + "BriefDescription": "C6 residency percent per core", + "MetricExpr": "(cstate_core@c6\\-residency@ / msr@tsc@) * 100", + "MetricGroup": "Power", + "MetricName": "C6_Core_Residency" + }, + { + "BriefDescription": "C7 residency percent per core", + "MetricExpr": "(cstate_core@c7\\-residency@ / msr@tsc@) * 100", + "MetricGroup": "Power", + "MetricName": "C7_Core_Residency" + }, + { + "BriefDescription": "C2 residency percent per package", + "MetricExpr": "(cstate_pkg@c2\\-residency@ / msr@tsc@) * 100", + "MetricGroup": "Power", + "MetricName": "C2_Pkg_Residency" + }, + { + "BriefDescription": "C3 residency percent per package", + "MetricExpr": "(cstate_pkg@c3\\-residency@ / msr@tsc@) * 100", + "MetricGroup": "Power", + "MetricName": "C3_Pkg_Residency" + }, + { + "BriefDescription": "C6 residency percent per package", + "MetricExpr": "(cstate_pkg@c6\\-residency@ / msr@tsc@) * 100", + "MetricGroup": "Power", + "MetricName": "C6_Pkg_Residency" + }, + { + "BriefDescription": "C7 residency percent per package", + "MetricExpr": "(cstate_pkg@c7\\-residency@ / msr@tsc@) * 100", + "MetricGroup": "Power", + "MetricName": "C7_Pkg_Residency" + } +] -- cgit v1.2.3 From 56de5b63ffaff859f75c19aff057ee10f20c6c07 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Tue, 5 Sep 2017 16:26:13 -0700 Subject: perf vendor events: Add JSON metrics for Skylake server Add JSON metrics for Skylake server Signed-off-by: Andi Kleen Cc: Jiri Olsa Link: http://lkml.kernel.org/r/20170908180133.GA20128@tassilo.jf.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- .../pmu-events/arch/x86/skylakex/skx-metrics.json | 182 +++++++++++++++++++++ 1 file changed, 182 insertions(+) create mode 100644 tools/perf/pmu-events/arch/x86/skylakex/skx-metrics.json (limited to 'tools/perf') diff --git a/tools/perf/pmu-events/arch/x86/skylakex/skx-metrics.json b/tools/perf/pmu-events/arch/x86/skylakex/skx-metrics.json new file mode 100644 index 000000000000..68f12a26c816 --- /dev/null +++ b/tools/perf/pmu-events/arch/x86/skylakex/skx-metrics.json @@ -0,0 +1,182 @@ +[ + { + "BriefDescription": "Instructions Per Cycle (per logical thread)", + "MetricExpr": "INST_RETIRED.ANY / CPU_CLK_UNHALTED.THREAD", + "MetricGroup": "TopDownL1", + "MetricName": "IPC" + }, + { + "BriefDescription": "Uops Per Instruction", + "MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY", + "MetricGroup": "Pipeline", + "MetricName": "UPI" + }, + { + "BriefDescription": "Rough Estimation of fraction of fetched lines bytes that were likely consumed by program instructions", + "MetricExpr": "min( 1 , UOPS_ISSUED.ANY / (UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY * 64 * ( ICACHE_64B.IFTAG_HIT + ICACHE_64B.IFTAG_MISS ) / 4.1) )", + "MetricGroup": "Frontend", + "MetricName": "IFetch_Line_Utilization" + }, + { + "BriefDescription": "Fraction of Uops delivered by the DSB (aka Decoded ICache; or Uop Cache)", + "MetricExpr": "IDQ.DSB_UOPS / ( IDQ.DSB_UOPS + LSD.UOPS + IDQ.MITE_UOPS + IDQ.MS_UOPS )", + "MetricGroup": "DSB; Frontend_Bandwidth", + "MetricName": "DSB_Coverage" + }, + { + "BriefDescription": "Cycles Per Instruction (threaded)", + "MetricExpr": "1 / INST_RETIRED.ANY / cycles", + "MetricGroup": "Pipeline;Summary", + "MetricName": "CPI" + }, + { + "BriefDescription": "Per-thread actual clocks when the logical processor is active. This is called 'Clockticks' in VTune.", + "MetricExpr": "CPU_CLK_UNHALTED.THREAD", + "MetricGroup": "Summary", + "MetricName": "CLKS" + }, + { + "BriefDescription": "Total issue-pipeline slots (per-core)", + "MetricExpr": "4*cycles if not #SMT_on else (( CPU_CLK_UNHALTED.THREAD / 2) * (CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK )) if #EBS_Mode else ( CPU_CLK_UNHALTED.THREAD_ANY / 2 )", + "MetricGroup": "TopDownL1", + "MetricName": "SLOTS" + }, + { + "BriefDescription": "Total number of retired Instructions", + "MetricExpr": "INST_RETIRED.ANY", + "MetricGroup": "Summary", + "MetricName": "Instructions" + }, + { + "BriefDescription": "Instructions Per Cycle (per physical core)", + "MetricExpr": "INST_RETIRED.ANY / cycles if not #SMT_on else (( CPU_CLK_UNHALTED.THREAD / 2) * (CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK )) if #EBS_Mode else ( CPU_CLK_UNHALTED.THREAD_ANY / 2 )", + "MetricGroup": "SMT", + "MetricName": "CoreIPC" + }, + { + "BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is at least 1 uop executed)", + "MetricExpr": "UOPS_EXECUTED.THREAD / ( UOPS_EXECUTED.CORE_CYCLES_GE_1 / 2) if #SMT_on else UOPS_EXECUTED.CORE_CYCLES_GE_1", + "MetricGroup": "Pipeline;Ports_Utilization", + "MetricName": "ILP" + }, + { + "BriefDescription": "Average Branch Address Clear Cost (fraction of cycles)", + "MetricExpr": "( RS_EVENTS.EMPTY_CYCLES - (ICACHE_16B.IFDATA_STALL +2* ICACHE_16B.IFDATA_STALL:c1:e1) - ICACHE_64B.IFTAG_STALL ) / RS_EVENTS.EMPTY_END", + "MetricGroup": "Unknown_Branches", + "MetricName": "BAClear_Cost" + }, + { + "BriefDescription": "Core actual clocks when any thread is active on the physical core", + "MetricExpr": "CPU_CLK_UNHALTED.THREAD if not #SMT_on else (( CPU_CLK_UNHALTED.THREAD / 2) * (CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK )) if 1 else ( CPU_CLK_UNHALTED.THREAD_ANY / 2 )", + "MetricGroup": "SMT", + "MetricName": "CORE_CLKS" + }, + { + "BriefDescription": "Actual Average Latency for L1 data-cache miss demand loads", + "MetricExpr": "L1D_PEND_MISS.PENDING / ( MEM_LOAD_RETIRED.L1_MISS_PS + MEM_LOAD_RETIRED.FB_HIT_PS )", + "MetricGroup": "Memory_Bound;Memory_Lat", + "MetricName": "Load_Miss_Real_Latency" + }, + { + "BriefDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least 1 such miss)", + "MetricExpr": "L1D_PEND_MISS.PENDING / ( L1D_PEND_MISS.PENDING_CYCLES_ANY / 2) if #SMT_on else L1D_PEND_MISS.PENDING_CYCLES", + "MetricGroup": "Memory_Bound;Memory_BW", + "MetricName": "MLP" + }, + { + "BriefDescription": "Utilization of the core's Page Walker(s) serving STLB misses triggered by instruction/Load/Store accesses", + "MetricExpr": "( ITLB_MISSES.WALK_PENDING + DTLB_LOAD_MISSES.WALK_PENDING + DTLB_STORE_MISSES.WALK_PENDING + EPT.WALK_PENDING ) / ( 2 * cycles if not #SMT_on else (( CPU_CLK_UNHALTED.THREAD / 2) * (CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK )) if #EBS_Mode else ( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) )", + "MetricGroup": "TLB", + "MetricName": "Page_Walks_Utilization" + }, + { + "BriefDescription": "L1 cache miss per kilo instruction for demand loads", + "MetricExpr": "1000 * MEM_LOAD_RETIRED.L1_MISS_PS / INST_RETIRED.ANY", + "MetricGroup": "Cache_Misses;", + "MetricName": "L1MPKI" + }, + { + "BriefDescription": "L2 cache miss per kilo instruction for demand loads", + "MetricExpr": "1000 * MEM_LOAD_RETIRED.L2_MISS_PS / INST_RETIRED.ANY", + "MetricGroup": "Cache_Misses;", + "MetricName": "L2MPKI" + }, + { + "BriefDescription": "L3 cache miss per kilo instruction for demand loads", + "MetricExpr": "1000 * MEM_LOAD_RETIRED.L3_MISS_PS / INST_RETIRED.ANY", + "MetricGroup": "Cache_Misses;", + "MetricName": "L3MPKI" + }, + { + "BriefDescription": "Average CPU Utilization", + "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / msr@tsc@", + "MetricGroup": "Summary", + "MetricName": "CPU_Utilization" + }, + { + "BriefDescription": "Giga Floating Point Operations Per Second", + "MetricExpr": "( 1*( FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE ) + 2* FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4*( FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE ) + 8* (FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.512B_PACKED_DOUBLE ) + 16* FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE) / 1000000000 / duration_time", + "MetricGroup": "FLOPS;Summary", + "MetricName": "GFLOPs" + }, + { + "BriefDescription": "Average Frequency Utilization relative nominal frequency", + "MetricExpr": "CPU_CLK_UNHALTED.THREAD / CPU_CLK_UNHALTED.REF_TSC", + "MetricGroup": "Power", + "MetricName": "Turbo_Utilization" + }, + { + "BriefDescription": "Fraction of cycles where both hardware threads were active", + "MetricExpr": "1 - CPU_CLK_THREAD_UNHALTED.ONE_THREAD_ACTIVE / ( CPU_CLK_THREAD_UNHALTED.REF_XCLK_ANY / 2 ) if #SMT_on else 0", + "MetricGroup": "SMT;Summary", + "MetricName": "SMT_2T_Utilization" + }, + { + "BriefDescription": "Fraction of cycles spent in Kernel mode", + "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC:u / CPU_CLK_UNHALTED.REF_TSC", + "MetricGroup": "Summary", + "MetricName": "Kernel_Utilization" + }, + { + "BriefDescription": "C3 residency percent per core", + "MetricExpr": "(cstate_core@c3\\-residency@ / msr@tsc@) * 100", + "MetricGroup": "Power", + "MetricName": "C3_Core_Residency" + }, + { + "BriefDescription": "C6 residency percent per core", + "MetricExpr": "(cstate_core@c6\\-residency@ / msr@tsc@) * 100", + "MetricGroup": "Power", + "MetricName": "C6_Core_Residency" + }, + { + "BriefDescription": "C7 residency percent per core", + "MetricExpr": "(cstate_core@c7\\-residency@ / msr@tsc@) * 100", + "MetricGroup": "Power", + "MetricName": "C7_Core_Residency" + }, + { + "BriefDescription": "C2 residency percent per package", + "MetricExpr": "(cstate_pkg@c2\\-residency@ / msr@tsc@) * 100", + "MetricGroup": "Power", + "MetricName": "C2_Pkg_Residency" + }, + { + "BriefDescription": "C3 residency percent per package", + "MetricExpr": "(cstate_pkg@c3\\-residency@ / msr@tsc@) * 100", + "MetricGroup": "Power", + "MetricName": "C3_Pkg_Residency" + }, + { + "BriefDescription": "C6 residency percent per package", + "MetricExpr": "(cstate_pkg@c6\\-residency@ / msr@tsc@) * 100", + "MetricGroup": "Power", + "MetricName": "C6_Pkg_Residency" + }, + { + "BriefDescription": "C7 residency percent per package", + "MetricExpr": "(cstate_pkg@c7\\-residency@ / msr@tsc@) * 100", + "MetricGroup": "Power", + "MetricName": "C7_Pkg_Residency" + } +] -- cgit v1.2.3 From 91e467bc568f15da2eac688e131010601e889184 Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Sun, 10 Sep 2017 19:23:14 -0700 Subject: perf machine: Use hashtable for machine threads To process any events, it needs to find the thread in the machine first. The machine maintains a rb tree to store all threads. The rb tree is protected by a rw lock. It is not a problem for current perf which serially processing events. However, it will have scalability performance issue to process events in parallel, especially on a heavy load system which have many threads. Introduce a hashtable to divide the big rb tree into many samll rb tree for threads. The index is thread id % hashtable size. It can reduce the lock contention. Committer notes: Renamed some variables and function names to reduce semantic confusion: 'struct threads' pointers: thread -> threads threads hastable index: tid -> hash_bucket struct threads *machine__thread() -> machine__threads() Cast tid to (unsigned int) to handle -1 in machine__threads() (Kan Liang) Signed-off-by: Kan Liang Tested-by: Arnaldo Carvalho de Melo Cc: Adrian Hunter Cc: Andi Kleen Cc: Jiri Olsa Cc: Lukasz Odzioba Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1505096603-215017-2-git-send-email-kan.liang@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-trace.c | 19 ++++--- tools/perf/util/machine.c | 136 +++++++++++++++++++++++++++----------------- tools/perf/util/machine.h | 23 ++++++-- tools/perf/util/rb_resort.h | 5 +- 4 files changed, 117 insertions(+), 66 deletions(-) (limited to 'tools/perf') diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 771ddab94bb0..ee8c6e814437 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -2730,20 +2730,23 @@ DEFINE_RESORT_RB(threads, (thread__nr_events(a->thread->priv) < thread__nr_event static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp) { - DECLARE_RESORT_RB_MACHINE_THREADS(threads, trace->host); size_t printed = trace__fprintf_threads_header(fp); struct rb_node *nd; + int i; - if (threads == NULL) { - fprintf(fp, "%s", "Error sorting output by nr_events!\n"); - return 0; - } + for (i = 0; i < THREADS__TABLE_SIZE; i++) { + DECLARE_RESORT_RB_MACHINE_THREADS(threads, trace->host, i); - resort_rb__for_each_entry(nd, threads) - printed += trace__fprintf_thread(fp, threads_entry->thread, trace); + if (threads == NULL) { + fprintf(fp, "%s", "Error sorting output by nr_events!\n"); + return 0; + } - resort_rb__delete(threads); + resort_rb__for_each_entry(nd, threads) + printed += trace__fprintf_thread(fp, threads_entry->thread, trace); + resort_rb__delete(threads); + } return printed; } diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index df709363ef69..f4f926753209 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -33,6 +33,20 @@ static void dsos__init(struct dsos *dsos) pthread_rwlock_init(&dsos->lock, NULL); } +static void machine__threads_init(struct machine *machine) +{ + int i; + + for (i = 0; i < THREADS__TABLE_SIZE; i++) { + struct threads *threads = &machine->threads[i]; + threads->entries = RB_ROOT; + pthread_rwlock_init(&threads->lock, NULL); + threads->nr = 0; + INIT_LIST_HEAD(&threads->dead); + threads->last_match = NULL; + } +} + int machine__init(struct machine *machine, const char *root_dir, pid_t pid) { memset(machine, 0, sizeof(*machine)); @@ -40,11 +54,7 @@ int machine__init(struct machine *machine, const char *root_dir, pid_t pid) RB_CLEAR_NODE(&machine->rb_node); dsos__init(&machine->dsos); - machine->threads = RB_ROOT; - pthread_rwlock_init(&machine->threads_lock, NULL); - machine->nr_threads = 0; - INIT_LIST_HEAD(&machine->dead_threads); - machine->last_match = NULL; + machine__threads_init(machine); machine->vdso_info = NULL; machine->env = NULL; @@ -141,27 +151,37 @@ static void dsos__exit(struct dsos *dsos) void machine__delete_threads(struct machine *machine) { struct rb_node *nd; + int i; - pthread_rwlock_wrlock(&machine->threads_lock); - nd = rb_first(&machine->threads); - while (nd) { - struct thread *t = rb_entry(nd, struct thread, rb_node); + for (i = 0; i < THREADS__TABLE_SIZE; i++) { + struct threads *threads = &machine->threads[i]; + pthread_rwlock_wrlock(&threads->lock); + nd = rb_first(&threads->entries); + while (nd) { + struct thread *t = rb_entry(nd, struct thread, rb_node); - nd = rb_next(nd); - __machine__remove_thread(machine, t, false); + nd = rb_next(nd); + __machine__remove_thread(machine, t, false); + } + pthread_rwlock_unlock(&threads->lock); } - pthread_rwlock_unlock(&machine->threads_lock); } void machine__exit(struct machine *machine) { + int i; + machine__destroy_kernel_maps(machine); map_groups__exit(&machine->kmaps); dsos__exit(&machine->dsos); machine__exit_vdso(machine); zfree(&machine->root_dir); zfree(&machine->current_tid); - pthread_rwlock_destroy(&machine->threads_lock); + + for (i = 0; i < THREADS__TABLE_SIZE; i++) { + struct threads *threads = &machine->threads[i]; + pthread_rwlock_destroy(&threads->lock); + } } void machine__delete(struct machine *machine) @@ -382,7 +402,8 @@ static struct thread *____machine__findnew_thread(struct machine *machine, pid_t pid, pid_t tid, bool create) { - struct rb_node **p = &machine->threads.rb_node; + struct threads *threads = machine__threads(machine, tid); + struct rb_node **p = &threads->entries.rb_node; struct rb_node *parent = NULL; struct thread *th; @@ -391,14 +412,14 @@ static struct thread *____machine__findnew_thread(struct machine *machine, * so most of the time we dont have to look up * the full rbtree: */ - th = machine->last_match; + th = threads->last_match; if (th != NULL) { if (th->tid == tid) { machine__update_thread_pid(machine, th, pid); return thread__get(th); } - machine->last_match = NULL; + threads->last_match = NULL; } while (*p != NULL) { @@ -406,7 +427,7 @@ static struct thread *____machine__findnew_thread(struct machine *machine, th = rb_entry(parent, struct thread, rb_node); if (th->tid == tid) { - machine->last_match = th; + threads->last_match = th; machine__update_thread_pid(machine, th, pid); return thread__get(th); } @@ -423,7 +444,7 @@ static struct thread *____machine__findnew_thread(struct machine *machine, th = thread__new(pid, tid); if (th != NULL) { rb_link_node(&th->rb_node, parent, p); - rb_insert_color(&th->rb_node, &machine->threads); + rb_insert_color(&th->rb_node, &threads->entries); /* * We have to initialize map_groups separately @@ -434,7 +455,7 @@ static struct thread *____machine__findnew_thread(struct machine *machine, * leader and that would screwed the rb tree. */ if (thread__init_map_groups(th, machine)) { - rb_erase_init(&th->rb_node, &machine->threads); + rb_erase_init(&th->rb_node, &threads->entries); RB_CLEAR_NODE(&th->rb_node); thread__put(th); return NULL; @@ -443,8 +464,8 @@ static struct thread *____machine__findnew_thread(struct machine *machine, * It is now in the rbtree, get a ref */ thread__get(th); - machine->last_match = th; - ++machine->nr_threads; + threads->last_match = th; + ++threads->nr; } return th; @@ -458,21 +479,24 @@ struct thread *__machine__findnew_thread(struct machine *machine, pid_t pid, pid struct thread *machine__findnew_thread(struct machine *machine, pid_t pid, pid_t tid) { + struct threads *threads = machine__threads(machine, tid); struct thread *th; - pthread_rwlock_wrlock(&machine->threads_lock); + pthread_rwlock_wrlock(&threads->lock); th = __machine__findnew_thread(machine, pid, tid); - pthread_rwlock_unlock(&machine->threads_lock); + pthread_rwlock_unlock(&threads->lock); return th; } struct thread *machine__find_thread(struct machine *machine, pid_t pid, pid_t tid) { + struct threads *threads = machine__threads(machine, tid); struct thread *th; - pthread_rwlock_rdlock(&machine->threads_lock); + + pthread_rwlock_rdlock(&threads->lock); th = ____machine__findnew_thread(machine, pid, tid, false); - pthread_rwlock_unlock(&machine->threads_lock); + pthread_rwlock_unlock(&threads->lock); return th; } @@ -719,21 +743,24 @@ size_t machine__fprintf_vmlinux_path(struct machine *machine, FILE *fp) size_t machine__fprintf(struct machine *machine, FILE *fp) { - size_t ret; struct rb_node *nd; + size_t ret; + int i; - pthread_rwlock_rdlock(&machine->threads_lock); - - ret = fprintf(fp, "Threads: %u\n", machine->nr_threads); + for (i = 0; i < THREADS__TABLE_SIZE; i++) { + struct threads *threads = &machine->threads[i]; + pthread_rwlock_rdlock(&threads->lock); - for (nd = rb_first(&machine->threads); nd; nd = rb_next(nd)) { - struct thread *pos = rb_entry(nd, struct thread, rb_node); + ret = fprintf(fp, "Threads: %u\n", threads->nr); - ret += thread__fprintf(pos, fp); - } + for (nd = rb_first(&threads->entries); nd; nd = rb_next(nd)) { + struct thread *pos = rb_entry(nd, struct thread, rb_node); - pthread_rwlock_unlock(&machine->threads_lock); + ret += thread__fprintf(pos, fp); + } + pthread_rwlock_unlock(&threads->lock); + } return ret; } @@ -1479,23 +1506,25 @@ out_problem: static void __machine__remove_thread(struct machine *machine, struct thread *th, bool lock) { - if (machine->last_match == th) - machine->last_match = NULL; + struct threads *threads = machine__threads(machine, th->tid); + + if (threads->last_match == th) + threads->last_match = NULL; BUG_ON(refcount_read(&th->refcnt) == 0); if (lock) - pthread_rwlock_wrlock(&machine->threads_lock); - rb_erase_init(&th->rb_node, &machine->threads); + pthread_rwlock_wrlock(&threads->lock); + rb_erase_init(&th->rb_node, &threads->entries); RB_CLEAR_NODE(&th->rb_node); - --machine->nr_threads; + --threads->nr; /* * Move it first to the dead_threads list, then drop the reference, * if this is the last reference, then the thread__delete destructor * will be called and we will remove it from the dead_threads list. */ - list_add_tail(&th->node, &machine->dead_threads); + list_add_tail(&th->node, &threads->dead); if (lock) - pthread_rwlock_unlock(&machine->threads_lock); + pthread_rwlock_unlock(&threads->lock); thread__put(th); } @@ -2140,21 +2169,26 @@ int machine__for_each_thread(struct machine *machine, int (*fn)(struct thread *thread, void *p), void *priv) { + struct threads *threads; struct rb_node *nd; struct thread *thread; int rc = 0; + int i; - for (nd = rb_first(&machine->threads); nd; nd = rb_next(nd)) { - thread = rb_entry(nd, struct thread, rb_node); - rc = fn(thread, priv); - if (rc != 0) - return rc; - } + for (i = 0; i < THREADS__TABLE_SIZE; i++) { + threads = &machine->threads[i]; + for (nd = rb_first(&threads->entries); nd; nd = rb_next(nd)) { + thread = rb_entry(nd, struct thread, rb_node); + rc = fn(thread, priv); + if (rc != 0) + return rc; + } - list_for_each_entry(thread, &machine->dead_threads, node) { - rc = fn(thread, priv); - if (rc != 0) - return rc; + list_for_each_entry(thread, &threads->dead, node) { + rc = fn(thread, priv); + if (rc != 0) + return rc; + } } return rc; } diff --git a/tools/perf/util/machine.h b/tools/perf/util/machine.h index 3cdb1340f917..fe2f05848050 100644 --- a/tools/perf/util/machine.h +++ b/tools/perf/util/machine.h @@ -23,6 +23,17 @@ extern const char *ref_reloc_sym_names[]; struct vdso_info; +#define THREADS__TABLE_BITS 8 +#define THREADS__TABLE_SIZE (1 << THREADS__TABLE_BITS) + +struct threads { + struct rb_root entries; + pthread_rwlock_t lock; + unsigned int nr; + struct list_head dead; + struct thread *last_match; +}; + struct machine { struct rb_node rb_node; pid_t pid; @@ -30,11 +41,7 @@ struct machine { bool comm_exec; bool kptr_restrict_warned; char *root_dir; - struct rb_root threads; - pthread_rwlock_t threads_lock; - unsigned int nr_threads; - struct list_head dead_threads; - struct thread *last_match; + struct threads threads[THREADS__TABLE_SIZE]; struct vdso_info *vdso_info; struct perf_env *env; struct dsos dsos; @@ -48,6 +55,12 @@ struct machine { }; }; +static inline struct threads *machine__threads(struct machine *machine, pid_t tid) +{ + /* Cast it to handle tid == -1 */ + return &machine->threads[(unsigned int)tid % THREADS__TABLE_SIZE]; +} + static inline struct map *__machine__kernel_map(struct machine *machine, enum map_type type) { diff --git a/tools/perf/util/rb_resort.h b/tools/perf/util/rb_resort.h index 808cc45611fe..b30746f5f613 100644 --- a/tools/perf/util/rb_resort.h +++ b/tools/perf/util/rb_resort.h @@ -143,7 +143,8 @@ struct __name##_sorted *__name = __name##_sorted__new __ilist->rblist.nr_entries) /* For 'struct machine->threads' */ -#define DECLARE_RESORT_RB_MACHINE_THREADS(__name, __machine) \ - DECLARE_RESORT_RB(__name)(&__machine->threads, __machine->nr_threads) +#define DECLARE_RESORT_RB_MACHINE_THREADS(__name, __machine, hash_bucket) \ + DECLARE_RESORT_RB(__name)(&__machine->threads[hash_bucket].entries, \ + __machine->threads[hash_bucket].nr) #endif /* _PERF_RESORT_RB_H_ */ -- cgit v1.2.3 From 75e45e432052c7b1a5da866cff88192db8be1445 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 14 Sep 2017 16:16:34 -0300 Subject: perf machine: Optimize a bit the machine__findnew_thread() methods In some cases we already have calculated the hash bucket, so reuse it. Cc: Adrian Hunter Cc: Andi Kleen Cc: David Ahern Cc: Jiri Olsa Cc: Kan Liang Cc: Lukasz Odzioba Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-800zehjsyy03er4s4jf0e99v@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/machine.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'tools/perf') diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index f4f926753209..ddeea05eae86 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -399,10 +399,10 @@ out_err: * lookup/new thread inserted. */ static struct thread *____machine__findnew_thread(struct machine *machine, + struct threads *threads, pid_t pid, pid_t tid, bool create) { - struct threads *threads = machine__threads(machine, tid); struct rb_node **p = &threads->entries.rb_node; struct rb_node *parent = NULL; struct thread *th; @@ -473,7 +473,7 @@ static struct thread *____machine__findnew_thread(struct machine *machine, struct thread *__machine__findnew_thread(struct machine *machine, pid_t pid, pid_t tid) { - return ____machine__findnew_thread(machine, pid, tid, true); + return ____machine__findnew_thread(machine, machine__threads(machine, tid), pid, tid, true); } struct thread *machine__findnew_thread(struct machine *machine, pid_t pid, @@ -495,7 +495,7 @@ struct thread *machine__find_thread(struct machine *machine, pid_t pid, struct thread *th; pthread_rwlock_rdlock(&threads->lock); - th = ____machine__findnew_thread(machine, pid, tid, false); + th = ____machine__findnew_thread(machine, threads, pid, tid, false); pthread_rwlock_unlock(&threads->lock); return th; } -- cgit v1.2.3 From 333b566559019b146905c623bde7f455c1d5add3 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Wed, 13 Sep 2017 14:50:06 -0700 Subject: perf pmu: Improve error messages for missing PMUs When a PMU is missing print a better error message mentioning the missing PMU. % mkdir empty % mount --bind empty /sys/devices/msr % perf stat -M Summary true event syntax error: '{inst_retired.any,cycles}:W,{cpu_clk_unhalted.thread}:W,{inst_retired.any}:W,{cpu_clk_unhalted.ref_tsc,msr/tsc/}:W,{fp_comp_ops_exe.sse_scalar..' \___ Cannot find PMU `msr'. Missing kernel support? It still cannot find the right column for aliases, but it's already a vast improvement. v2: Check asprintf Signed-off-by: Andi Kleen Acked-by: Jiri Olsa Link: http://lkml.kernel.org/r/20170913215006.32222-1-andi@firstfloor.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/metricgroup.c | 2 +- tools/perf/util/parse-events.c | 18 ++++++++++++------ tools/perf/util/parse-events.h | 3 +++ 3 files changed, 16 insertions(+), 7 deletions(-) (limited to 'tools/perf') diff --git a/tools/perf/util/metricgroup.c b/tools/perf/util/metricgroup.c index 2d60114f1870..fa37ef79517a 100644 --- a/tools/perf/util/metricgroup.c +++ b/tools/perf/util/metricgroup.c @@ -477,7 +477,7 @@ int metricgroup__parse_groups(const struct option *opt, memset(&parse_error, 0, sizeof(struct parse_events_error)); ret = parse_events(perf_evlist, extra_events.buf, &parse_error); if (ret) { - pr_err("Cannot set up events %s\n", extra_events.buf); + parse_events_print_error(&parse_error, extra_events.buf); goto out; } strbuf_release(&extra_events); diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 75588920fccc..9183913a6174 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -1219,11 +1219,17 @@ static int __parse_events_add_pmu(struct parse_events_state *parse_state, struct perf_pmu_info info; struct perf_pmu *pmu; struct perf_evsel *evsel; + struct parse_events_error *err = parse_state->error; LIST_HEAD(config_terms); pmu = perf_pmu__find(name); - if (!pmu) + if (!pmu) { + if (asprintf(&err->str, + "Cannot find PMU `%s'. Missing kernel support?", + name) < 0) + err->str = NULL; return -EINVAL; + } if (pmu->default_config) { memcpy(&attr, pmu->default_config, @@ -1733,8 +1739,8 @@ static int get_term_width(void) return ws.ws_col > MAX_WIDTH ? MAX_WIDTH : ws.ws_col; } -static void parse_events_print_error(struct parse_events_error *err, - const char *event) +void parse_events_print_error(struct parse_events_error *err, + const char *event) { const char *str = "invalid or unsupported event: "; char _buf[MAX_WIDTH]; @@ -1789,8 +1795,6 @@ static void parse_events_print_error(struct parse_events_error *err, zfree(&err->str); zfree(&err->help); } - - fprintf(stderr, "Run 'perf list' for a list of valid events\n"); } #undef MAX_WIDTH @@ -1802,8 +1806,10 @@ int parse_events_option(const struct option *opt, const char *str, struct parse_events_error err = { .idx = 0, }; int ret = parse_events(evlist, str, &err); - if (ret) + if (ret) { parse_events_print_error(&err, str); + fprintf(stderr, "Run 'perf list' for a list of valid events\n"); + } return ret; } diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h index 635135125111..3909ca0639f2 100644 --- a/tools/perf/util/parse-events.h +++ b/tools/perf/util/parse-events.h @@ -202,6 +202,9 @@ int is_valid_tracepoint(const char *event_string); int valid_event_mount(const char *eventfs); char *parse_events_formats_error_string(char *additional_terms); +void parse_events_print_error(struct parse_events_error *err, + const char *event); + #ifdef HAVE_LIBELF_SUPPORT /* * If the probe point starts with '%', -- cgit v1.2.3 From c896f85a7c15ab9d040ffac8b8003e47996602a2 Mon Sep 17 00:00:00 2001 From: Martin Kepplinger Date: Wed, 13 Sep 2017 21:14:19 +0200 Subject: perf tools: Fix leaking rec_argv in error cases Let's free the allocated rec_argv in case we return early, in order to avoid leaking memory. This adds free() at a few very similar places across the tree where it was missing. Signed-off-by: Martin Kepplinger Cc: Alexander Shishkin Cc: Martin kepplinger Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20170913191419.29806-1-martink@posteo.de Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-c2c.c | 1 + tools/perf/builtin-mem.c | 1 + tools/perf/builtin-timechart.c | 4 +++- tools/perf/builtin-trace.c | 1 + 4 files changed, 6 insertions(+), 1 deletion(-) (limited to 'tools/perf') diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c index 475999e48f66..bb1ee22bd221 100644 --- a/tools/perf/builtin-c2c.c +++ b/tools/perf/builtin-c2c.c @@ -2732,6 +2732,7 @@ static int perf_c2c__record(int argc, const char **argv) if (!perf_mem_events[j].supported) { pr_err("failed: event '%s' not supported\n", perf_mem_events[j].name); + free(rec_argv); return -1; } diff --git a/tools/perf/builtin-mem.c b/tools/perf/builtin-mem.c index 0f15634ef82c..6940490bc3f9 100644 --- a/tools/perf/builtin-mem.c +++ b/tools/perf/builtin-mem.c @@ -112,6 +112,7 @@ static int __cmd_record(int argc, const char **argv, struct perf_mem *mem) if (!perf_mem_events[j].supported) { pr_err("failed: event '%s' not supported\n", perf_mem_events__name(j)); + free(rec_argv); return -1; } diff --git a/tools/perf/builtin-timechart.c b/tools/perf/builtin-timechart.c index 4e2e61695986..01de01ca14f2 100644 --- a/tools/perf/builtin-timechart.c +++ b/tools/perf/builtin-timechart.c @@ -1732,8 +1732,10 @@ static int timechart__io_record(int argc, const char **argv) if (rec_argv == NULL) return -ENOMEM; - if (asprintf(&filter, "common_pid != %d", getpid()) < 0) + if (asprintf(&filter, "common_pid != %d", getpid()) < 0) { + free(rec_argv); return -ENOMEM; + } p = rec_argv; for (i = 0; i < common_args_nr; i++) diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index ee8c6e814437..967bd351b58d 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -2078,6 +2078,7 @@ static int trace__record(struct trace *trace, int argc, const char **argv) rec_argv[j++] = "syscalls:sys_enter,syscalls:sys_exit"; else { pr_err("Neither raw_syscalls nor syscalls events exist.\n"); + free(rec_argv); return -1; } } -- cgit v1.2.3 From 411bc316f3365363959c2c895af2618389534583 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Thu, 14 Sep 2017 13:57:35 -0700 Subject: perf stat: Fix adding multiple event groups The -M metric group parser threw away the events of earlier groups when multiple groups were specified. Fix this here by not overwriting the string incorrectly. Now this works correctly: % perf stat -M Summary,SMT --metric-only -a sleep 1 Performance counter stats for 'system wide': Instructions CPI CLKS CPU_Utilization GFLOPs SMT_2T_Utilization SMT_2T_Utilization Kernel_Utilization CoreIPC CORE_CLKS 900907376.0 2.7 2398954144.0 0.1 0.0 0.2 0.2 0.1 0.4 2080822855.5 while previously it would only show the SMT metrics. Signed-off-by: Andi Kleen Acked-by: Jiri Olsa Link: http://lkml.kernel.org/r/20170914205735.18431-1-andi@firstfloor.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/metricgroup.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'tools/perf') diff --git a/tools/perf/util/metricgroup.c b/tools/perf/util/metricgroup.c index fa37ef79517a..0ddd9c199227 100644 --- a/tools/perf/util/metricgroup.c +++ b/tools/perf/util/metricgroup.c @@ -373,9 +373,6 @@ static int metricgroup__add_metric(const char *metric, struct strbuf *events, int ret = -EINVAL; int i, j; - strbuf_init(events, 100); - strbuf_addf(events, "%s", ""); - if (!map) return 0; @@ -433,6 +430,10 @@ static int metricgroup__add_metric_list(const char *list, struct strbuf *events, if (!nlist) return -ENOMEM; llist = nlist; + + strbuf_init(events, 100); + strbuf_addf(events, "%s", ""); + while ((p = strsep(&llist, ",")) != NULL) { ret = metricgroup__add_metric(p, events, group_list); if (ret == -EINVAL) { -- cgit v1.2.3 From 5c9295bfe6f5f59f3f2eee78f58b0523d117897e Mon Sep 17 00:00:00 2001 From: Xiaochen Shen Date: Tue, 19 Sep 2017 12:57:37 +0800 Subject: perf tests: Remove Intel CQM perf test Intel CQM perf test is obsolete for perf PMU code has been removed in commit c39a0e2c8850 ("x86/perf/cqm: Wipe out perf based cqm"). Signed-off-by: Xiaochen Shen Cc: Alexander Shishkin Cc: Fenghua Yu Cc: Matt Fleming Cc: Pei P Jia Cc: Peter Zijlstra Cc: Tony Luck Cc: Vikas Shivappa Link: http://lkml.kernel.org/r/1505797057-16300-1-git-send-email-xiaochen.shen@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/x86/include/arch-tests.h | 1 - tools/perf/arch/x86/tests/Build | 1 - tools/perf/arch/x86/tests/arch-tests.c | 4 - tools/perf/arch/x86/tests/intel-cqm.c | 127 ------------------------------- 4 files changed, 133 deletions(-) delete mode 100644 tools/perf/arch/x86/tests/intel-cqm.c (limited to 'tools/perf') diff --git a/tools/perf/arch/x86/include/arch-tests.h b/tools/perf/arch/x86/include/arch-tests.h index 4e0b806a7a0f..01ad4208bcdf 100644 --- a/tools/perf/arch/x86/include/arch-tests.h +++ b/tools/perf/arch/x86/include/arch-tests.h @@ -8,7 +8,6 @@ struct test; int test__rdpmc(struct test *test __maybe_unused, int subtest); int test__perf_time_to_tsc(struct test *test __maybe_unused, int subtest); int test__insn_x86(struct test *test __maybe_unused, int subtest); -int test__intel_cqm_count_nmi_context(struct test *test __maybe_unused, int subtest); #ifdef HAVE_DWARF_UNWIND_SUPPORT struct thread; diff --git a/tools/perf/arch/x86/tests/Build b/tools/perf/arch/x86/tests/Build index cbb7e978166b..8e2c5a38c3b9 100644 --- a/tools/perf/arch/x86/tests/Build +++ b/tools/perf/arch/x86/tests/Build @@ -5,4 +5,3 @@ libperf-y += arch-tests.o libperf-y += rdpmc.o libperf-y += perf-time-to-tsc.o libperf-$(CONFIG_AUXTRACE) += insn-x86.o -libperf-y += intel-cqm.o diff --git a/tools/perf/arch/x86/tests/arch-tests.c b/tools/perf/arch/x86/tests/arch-tests.c index 99d66191e56c..271c0a0f95d7 100644 --- a/tools/perf/arch/x86/tests/arch-tests.c +++ b/tools/perf/arch/x86/tests/arch-tests.c @@ -23,10 +23,6 @@ struct test arch_tests[] = { .func = test__insn_x86, }, #endif - { - .desc = "Intel cqm nmi context read", - .func = test__intel_cqm_count_nmi_context, - }, { .func = NULL, }, diff --git a/tools/perf/arch/x86/tests/intel-cqm.c b/tools/perf/arch/x86/tests/intel-cqm.c deleted file mode 100644 index 57f86b6e7d6f..000000000000 --- a/tools/perf/arch/x86/tests/intel-cqm.c +++ /dev/null @@ -1,127 +0,0 @@ -#include "tests/tests.h" -#include "perf.h" -#include "cloexec.h" -#include "debug.h" -#include "evlist.h" -#include "evsel.h" -#include "arch-tests.h" - -#include -#include -#include -#include -#include - -static pid_t spawn(void) -{ - pid_t pid; - - pid = fork(); - if (pid) - return pid; - - while(1) - sleep(5); - return 0; -} - -/* - * Create an event group that contains both a sampled hardware - * (cpu-cycles) and software (intel_cqm/llc_occupancy/) event. We then - * wait for the hardware perf counter to overflow and generate a PMI, - * which triggers an event read for both of the events in the group. - * - * Since reading Intel CQM event counters requires sending SMP IPIs, the - * CQM pmu needs to handle the above situation gracefully, and return - * the last read counter value to avoid triggering a WARN_ON_ONCE() in - * smp_call_function_many() caused by sending IPIs from NMI context. - */ -int test__intel_cqm_count_nmi_context(struct test *test __maybe_unused, int subtest __maybe_unused) -{ - struct perf_evlist *evlist = NULL; - struct perf_evsel *evsel = NULL; - struct perf_event_attr pe; - int i, fd[2], flag, ret; - size_t mmap_len; - void *event; - pid_t pid; - int err = TEST_FAIL; - - flag = perf_event_open_cloexec_flag(); - - evlist = perf_evlist__new(); - if (!evlist) { - pr_debug("perf_evlist__new failed\n"); - return TEST_FAIL; - } - - ret = parse_events(evlist, "intel_cqm/llc_occupancy/", NULL); - if (ret) { - pr_debug("parse_events failed, is \"intel_cqm/llc_occupancy/\" available?\n"); - err = TEST_SKIP; - goto out; - } - - evsel = perf_evlist__first(evlist); - if (!evsel) { - pr_debug("perf_evlist__first failed\n"); - goto out; - } - - memset(&pe, 0, sizeof(pe)); - pe.size = sizeof(pe); - - pe.type = PERF_TYPE_HARDWARE; - pe.config = PERF_COUNT_HW_CPU_CYCLES; - pe.read_format = PERF_FORMAT_GROUP; - - pe.sample_period = 128; - pe.sample_type = PERF_SAMPLE_IP | PERF_SAMPLE_READ; - - pid = spawn(); - - fd[0] = sys_perf_event_open(&pe, pid, -1, -1, flag); - if (fd[0] < 0) { - pr_debug("failed to open event\n"); - goto out; - } - - memset(&pe, 0, sizeof(pe)); - pe.size = sizeof(pe); - - pe.type = evsel->attr.type; - pe.config = evsel->attr.config; - - fd[1] = sys_perf_event_open(&pe, pid, -1, fd[0], flag); - if (fd[1] < 0) { - pr_debug("failed to open event\n"); - goto out; - } - - /* - * Pick a power-of-two number of pages + 1 for the meta-data - * page (struct perf_event_mmap_page). See tools/perf/design.txt. - */ - mmap_len = page_size * 65; - - event = mmap(NULL, mmap_len, PROT_READ, MAP_SHARED, fd[0], 0); - if (event == (void *)(-1)) { - pr_debug("failed to mmap %d\n", errno); - goto out; - } - - sleep(1); - - err = TEST_OK; - - munmap(event, mmap_len); - - for (i = 0; i < 2; i++) - close(fd[i]); - - kill(pid, SIGKILL); - wait(NULL); -out: - perf_evlist__delete(evlist); - return err; -} -- cgit v1.2.3 From 5a54c2f5e1717264f6e24687f703937eaca5f55d Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 20 Sep 2017 12:30:36 -0300 Subject: perf trace beauty madvise: Generate 'behavior' string table from kernel headers This is one more case where the way that syscall parameter values are defined in kernel headers are easy to parse using a shell script that will then generate the string table that gets used by the madvise 'behaviour' argument beautifier. This way as soon as the header syncronization mechanism in perf's build system detects a change in a copy of a kernel ABI header and that file is syncronized, we get 'perf trace' updated automagically. So, when we syncronize this: Warning: Kernel ABI header at 'tools/include/uapi/asm-generic/mman-common.h' differs from latest version at 'include/uapi/asm-generic/mman-common.h' We'll get these: #define MADV_WIPEONFORK 18 /* Zero memory on fork, child only */ #define MADV_KEEPONFORK 19 /* Undo MADV_WIPEONFORK */ Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Rik van Riel Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-dolb0ghds4ui7wc1npgkchvc@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Makefile.perf | 9 +++++++ tools/perf/trace/beauty/madvise_behavior.sh | 10 ++++++++ tools/perf/trace/beauty/mmap.c | 38 +++++++++-------------------- 3 files changed, 31 insertions(+), 26 deletions(-) create mode 100755 tools/perf/trace/beauty/madvise_behavior.sh (limited to 'tools/perf') diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index 1df93b4c4648..5f7408118a2d 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -441,6 +441,13 @@ perf_ioctl_tbl := $(srctree)/tools/perf/trace/beauty/perf_ioctl.sh $(perf_ioctl_array): $(perf_hdr_dir)/perf_event.h $(perf_ioctl_tbl) $(Q)$(SHELL) '$(perf_ioctl_tbl)' $(perf_hdr_dir) > $@ +madvise_behavior_array := $(beauty_outdir)/madvise_behavior_array.c +madvise_hdr_dir := $(srctree)/tools/include/uapi/asm-generic/ +madvise_behavior_tbl := $(srctree)/tools/perf/trace/beauty/madvise_behavior.sh + +$(madvise_behavior_array): $(madvise_hdr_dir)/mman-common.h $(madvise_behavior_tbl) + $(Q)$(SHELL) '$(madvise_behavior_tbl)' $(madvise_hdr_dir) > $@ + all: shell_compatibility_test $(ALL_PROGRAMS) $(LANG_BINDINGS) $(OTHER_PROGRAMS) $(OUTPUT)python/perf.so: $(PYTHON_EXT_SRCS) $(PYTHON_EXT_DEPS) $(LIBTRACEEVENT_DYNAMIC_LIST) @@ -541,6 +548,7 @@ prepare: $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)common-cmds.h archheaders $(drm_ioc $(sndrv_ctl_ioctl_array) \ $(kvm_ioctl_array) \ $(vhost_virtio_ioctl_array) \ + $(madvise_behavior_array) \ $(perf_ioctl_array) $(OUTPUT)%.o: %.c prepare FORCE @@ -814,6 +822,7 @@ clean:: $(LIBTRACEEVENT)-clean $(LIBAPI)-clean $(LIBBPF)-clean $(LIBSUBCMD)-clea $(OUTPUT)util/intel-pt-decoder/inat-tables.c \ $(OUTPUT)tests/llvm-src-{base,kbuild,prologue,relocation}.c \ $(OUTPUT)pmu-events/pmu-events.c \ + $(OUTPUT)$(madvise_behavior_array) \ $(OUTPUT)$(drm_ioctl_array) \ $(OUTPUT)$(pkey_alloc_access_rights_array) \ $(OUTPUT)$(sndrv_ctl_ioctl_array) \ diff --git a/tools/perf/trace/beauty/madvise_behavior.sh b/tools/perf/trace/beauty/madvise_behavior.sh new file mode 100755 index 000000000000..60ef8640ee70 --- /dev/null +++ b/tools/perf/trace/beauty/madvise_behavior.sh @@ -0,0 +1,10 @@ +#!/bin/sh + +header_dir=$1 + +printf "static const char *madvise_advices[] = {\n" +regex='^[[:space:]]*#[[:space:]]*define[[:space:]]+MADV_([[:alnum:]_]+)[[:space:]]+([[:digit:]]+)[[:space:]]*.*' +egrep $regex ${header_dir}/mman-common.h | \ + sed -r "s/$regex/\2 \1/g" | \ + sort -n | xargs printf "\t[%s] = \"%s\",\n" +printf "};\n" diff --git a/tools/perf/trace/beauty/mmap.c b/tools/perf/trace/beauty/mmap.c index 754558f9009d..45b1e0c0018f 100644 --- a/tools/perf/trace/beauty/mmap.c +++ b/tools/perf/trace/beauty/mmap.c @@ -94,35 +94,21 @@ static size_t syscall_arg__scnprintf_mremap_flags(char *bf, size_t size, #define SCA_MREMAP_FLAGS syscall_arg__scnprintf_mremap_flags +static size_t madvise__scnprintf_behavior(int behavior, char *bf, size_t size) +{ +#include "trace/beauty/generated/madvise_behavior_array.c" + static DEFINE_STRARRAY(madvise_advices); + + if (behavior < strarray__madvise_advices.nr_entries && strarray__madvise_advices.entries[behavior] != NULL) + return scnprintf(bf, size, "MADV_%s", strarray__madvise_advices.entries[behavior]); + + return scnprintf(bf, size, "%#", behavior); +} + static size_t syscall_arg__scnprintf_madvise_behavior(char *bf, size_t size, struct syscall_arg *arg) { - int behavior = arg->val; - - switch (behavior) { -#define P_MADV_BHV(n) case MADV_##n: return scnprintf(bf, size, #n) - P_MADV_BHV(NORMAL); - P_MADV_BHV(RANDOM); - P_MADV_BHV(SEQUENTIAL); - P_MADV_BHV(WILLNEED); - P_MADV_BHV(DONTNEED); - P_MADV_BHV(FREE); - P_MADV_BHV(REMOVE); - P_MADV_BHV(DONTFORK); - P_MADV_BHV(DOFORK); - P_MADV_BHV(HWPOISON); - P_MADV_BHV(SOFT_OFFLINE); - P_MADV_BHV(MERGEABLE); - P_MADV_BHV(UNMERGEABLE); - P_MADV_BHV(HUGEPAGE); - P_MADV_BHV(NOHUGEPAGE); - P_MADV_BHV(DONTDUMP); - P_MADV_BHV(DODUMP); -#undef P_MADV_BHV - default: break; - } - - return scnprintf(bf, size, "%#x", behavior); + return madvise__scnprintf_behavior(arg->val, bf, size); } #define SCA_MADV_BHV syscall_arg__scnprintf_madvise_behavior -- cgit v1.2.3 From 0e1eed80885fdb09993bffb16f5662f4b53c1a08 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 20 Sep 2017 16:41:34 -0300 Subject: perf tools: Get all of tools/{arch,include}/ in the MANIFEST Now that I'm switching the container builds from using a local volume pointing to the kernel repository with the perf sources, instead getting a detached tarball to be able to use a container cluster, some places broke because I forgot to put some of the required files in tools/perf/MANIFEST, namely some bitsperlong.h files. So, to fix it do the same as for tools/build/ and pack the whole tools/arch/ directory. Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-wmenpjfjsobwdnfde30qqncj@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/MANIFEST | 87 ++--------------------------------------------------- 1 file changed, 2 insertions(+), 85 deletions(-) (limited to 'tools/perf') diff --git a/tools/perf/MANIFEST b/tools/perf/MANIFEST index 62072822dc85..627b7cada144 100644 --- a/tools/perf/MANIFEST +++ b/tools/perf/MANIFEST @@ -1,34 +1,8 @@ tools/perf -tools/arch/alpha/include/asm/barrier.h -tools/arch/arm/include/asm/barrier.h -tools/arch/arm64/include/asm/barrier.h -tools/arch/ia64/include/asm/barrier.h -tools/arch/mips/include/asm/barrier.h -tools/arch/powerpc/include/asm/barrier.h -tools/arch/s390/include/asm/barrier.h -tools/arch/sh/include/asm/barrier.h -tools/arch/sparc/include/asm/barrier.h -tools/arch/sparc/include/asm/barrier_32.h -tools/arch/sparc/include/asm/barrier_64.h -tools/arch/tile/include/asm/barrier.h -tools/arch/x86/include/asm/barrier.h -tools/arch/x86/include/asm/cmpxchg.h -tools/arch/x86/include/asm/cpufeatures.h -tools/arch/x86/include/asm/disabled-features.h -tools/arch/x86/include/asm/required-features.h -tools/arch/x86/include/uapi/asm/svm.h -tools/arch/x86/include/uapi/asm/vmx.h -tools/arch/x86/include/uapi/asm/kvm.h -tools/arch/x86/include/uapi/asm/kvm_perf.h -tools/arch/x86/lib/memcpy_64.S -tools/arch/x86/lib/memset_64.S -tools/arch/s390/include/uapi/asm/kvm_perf.h -tools/arch/s390/include/uapi/asm/sie.h -tools/arch/xtensa/include/asm/barrier.h +tools/arch tools/scripts tools/build -tools/arch/x86/include/asm/atomic.h -tools/arch/x86/include/asm/rmwcc.h +tools/include tools/lib/traceevent tools/lib/api tools/lib/bpf @@ -42,60 +16,3 @@ tools/lib/find_bit.c tools/lib/bitmap.c tools/lib/str_error_r.c tools/lib/vsprintf.c -tools/include/asm/alternative-asm.h -tools/include/asm/atomic.h -tools/include/asm/barrier.h -tools/include/asm/bug.h -tools/include/asm-generic/atomic-gcc.h -tools/include/asm-generic/barrier.h -tools/include/asm-generic/bitops/arch_hweight.h -tools/include/asm-generic/bitops/atomic.h -tools/include/asm-generic/bitops/const_hweight.h -tools/include/asm-generic/bitops/__ffs.h -tools/include/asm-generic/bitops/__ffz.h -tools/include/asm-generic/bitops/__fls.h -tools/include/asm-generic/bitops/find.h -tools/include/asm-generic/bitops/fls64.h -tools/include/asm-generic/bitops/fls.h -tools/include/asm-generic/bitops/hweight.h -tools/include/asm-generic/bitops.h -tools/include/linux/atomic.h -tools/include/linux/bitops.h -tools/include/linux/compiler.h -tools/include/linux/compiler-gcc.h -tools/include/linux/coresight-pmu.h -tools/include/linux/bug.h -tools/include/linux/filter.h -tools/include/linux/hash.h -tools/include/linux/kernel.h -tools/include/linux/list.h -tools/include/linux/log2.h -tools/include/uapi/asm-generic/fcntl.h -tools/include/uapi/asm-generic/ioctls.h -tools/include/uapi/asm-generic/mman-common.h -tools/include/uapi/asm-generic/mman.h -tools/include/uapi/drm/drm.h -tools/include/uapi/drm/i915_drm.h -tools/include/uapi/linux/bpf.h -tools/include/uapi/linux/bpf_common.h -tools/include/uapi/linux/fcntl.h -tools/include/uapi/linux/hw_breakpoint.h -tools/include/uapi/linux/kvm.h -tools/include/uapi/linux/mman.h -tools/include/uapi/linux/perf_event.h -tools/include/uapi/linux/sched.h -tools/include/uapi/linux/stat.h -tools/include/uapi/linux/vhost.h -tools/include/uapi/sound/asound.h -tools/include/linux/poison.h -tools/include/linux/rbtree.h -tools/include/linux/rbtree_augmented.h -tools/include/linux/refcount.h -tools/include/linux/string.h -tools/include/linux/stringify.h -tools/include/linux/types.h -tools/include/linux/err.h -tools/include/linux/bitmap.h -tools/include/linux/time64.h -tools/arch/*/include/uapi/asm/mman.h -tools/arch/*/include/uapi/asm/perf_regs.h -- cgit v1.2.3 From 0a7c74eae307894c6c95316c382f118aef8481e8 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 4 Apr 2017 13:15:04 -0300 Subject: perf tools: Provide mutex wrappers for pthreads rwlocks Andi reported a performance drop in single threaded perf tools such as 'perf script' due to the growing number of locks being put in place to allow for multithreaded tools, so wrap the POSIX threads rwlock routines with the names used for such kinds of locks in the Linux kernel and then allow for tools to ask for those locks to be used or not. I.e. a tool may have a multithreaded phase and then switch to single threaded, like the upcoming patches for the synthesizing of PERF_RECORD_{FORK,MMAP,etc} for pre-existing processes to then switch to single threaded mode in 'perf top'. The init routines will not be conditional, this way starting as single threaded to then move to multi threaded mode should be possible. Reported-by: Andi Kleen Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Kan Liang Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Wang Nan Link: http://lkml.kernel.org/r/20170404161739.GH12903@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-kvm.c | 1 - tools/perf/builtin-script.c | 2 ++ tools/perf/util/Build | 1 + tools/perf/util/dso.c | 12 +++++------ tools/perf/util/dso.h | 4 ++-- tools/perf/util/machine.c | 41 +++++++++++++++++++------------------- tools/perf/util/machine.h | 3 ++- tools/perf/util/map.c | 34 +++++++++++++++---------------- tools/perf/util/map.h | 3 ++- tools/perf/util/rwsem.c | 32 +++++++++++++++++++++++++++++ tools/perf/util/rwsem.h | 19 ++++++++++++++++++ tools/perf/util/symbol.c | 8 ++++---- tools/perf/util/thread.c | 4 ++-- tools/perf/util/trace-event-info.c | 1 - tools/perf/util/trace-event-read.c | 1 - tools/perf/util/util.c | 13 ++++++++++++ tools/perf/util/util.h | 5 +++++ tools/perf/util/vdso.c | 4 ++-- 18 files changed, 130 insertions(+), 58 deletions(-) create mode 100644 tools/perf/util/rwsem.c create mode 100644 tools/perf/util/rwsem.h (limited to 'tools/perf') diff --git a/tools/perf/builtin-kvm.c b/tools/perf/builtin-kvm.c index f309c3773522..c747a1af49fe 100644 --- a/tools/perf/builtin-kvm.c +++ b/tools/perf/builtin-kvm.c @@ -34,7 +34,6 @@ #include #include #include -#include #include static const char *get_filename_for_perf_kvm(void) diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index 725dbd3dd104..9092de0f7238 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -2829,6 +2829,8 @@ int cmd_script(int argc, const char **argv) NULL }; + perf_set_singlethreaded(); + setup_scripting(); argc = parse_options_subcommand(argc, argv, options, script_subcommands, script_usage, diff --git a/tools/perf/util/Build b/tools/perf/util/Build index 71ab8466714d..369c3163e68c 100644 --- a/tools/perf/util/Build +++ b/tools/perf/util/Build @@ -79,6 +79,7 @@ libperf-y += data.o libperf-y += tsc.o libperf-y += cloexec.o libperf-y += call-path.o +libperf-y += rwsem.o libperf-y += thread-stack.o libperf-$(CONFIG_AUXTRACE) += auxtrace.o libperf-$(CONFIG_AUXTRACE) += intel-pt-decoder/ diff --git a/tools/perf/util/dso.c b/tools/perf/util/dso.c index ffd723179a4b..339e52971380 100644 --- a/tools/perf/util/dso.c +++ b/tools/perf/util/dso.c @@ -1366,9 +1366,9 @@ void __dsos__add(struct dsos *dsos, struct dso *dso) void dsos__add(struct dsos *dsos, struct dso *dso) { - pthread_rwlock_wrlock(&dsos->lock); + down_write(&dsos->lock); __dsos__add(dsos, dso); - pthread_rwlock_unlock(&dsos->lock); + up_write(&dsos->lock); } struct dso *__dsos__find(struct dsos *dsos, const char *name, bool cmp_short) @@ -1387,9 +1387,9 @@ struct dso *__dsos__find(struct dsos *dsos, const char *name, bool cmp_short) struct dso *dsos__find(struct dsos *dsos, const char *name, bool cmp_short) { struct dso *dso; - pthread_rwlock_rdlock(&dsos->lock); + down_read(&dsos->lock); dso = __dsos__find(dsos, name, cmp_short); - pthread_rwlock_unlock(&dsos->lock); + up_read(&dsos->lock); return dso; } @@ -1416,9 +1416,9 @@ struct dso *__dsos__findnew(struct dsos *dsos, const char *name) struct dso *dsos__findnew(struct dsos *dsos, const char *name) { struct dso *dso; - pthread_rwlock_wrlock(&dsos->lock); + down_write(&dsos->lock); dso = dso__get(__dsos__findnew(dsos, name)); - pthread_rwlock_unlock(&dsos->lock); + up_write(&dsos->lock); return dso; } diff --git a/tools/perf/util/dso.h b/tools/perf/util/dso.h index f886141678eb..a2bbb21f301c 100644 --- a/tools/perf/util/dso.h +++ b/tools/perf/util/dso.h @@ -6,7 +6,7 @@ #include #include #include -#include +#include "rwsem.h" #include #include #include "map.h" @@ -129,7 +129,7 @@ struct dso_cache { struct dsos { struct list_head head; struct rb_root root; /* rbtree root sorted by long name */ - pthread_rwlock_t lock; + struct rw_semaphore lock; }; struct auxtrace_cache; diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index ddeea05eae86..585b4a3d64a4 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -30,7 +30,7 @@ static void dsos__init(struct dsos *dsos) { INIT_LIST_HEAD(&dsos->head); dsos->root = RB_ROOT; - pthread_rwlock_init(&dsos->lock, NULL); + init_rwsem(&dsos->lock); } static void machine__threads_init(struct machine *machine) @@ -40,7 +40,7 @@ static void machine__threads_init(struct machine *machine) for (i = 0; i < THREADS__TABLE_SIZE; i++) { struct threads *threads = &machine->threads[i]; threads->entries = RB_ROOT; - pthread_rwlock_init(&threads->lock, NULL); + init_rwsem(&threads->lock); threads->nr = 0; INIT_LIST_HEAD(&threads->dead); threads->last_match = NULL; @@ -130,7 +130,7 @@ static void dsos__purge(struct dsos *dsos) { struct dso *pos, *n; - pthread_rwlock_wrlock(&dsos->lock); + down_write(&dsos->lock); list_for_each_entry_safe(pos, n, &dsos->head, node) { RB_CLEAR_NODE(&pos->rb_node); @@ -139,13 +139,13 @@ static void dsos__purge(struct dsos *dsos) dso__put(pos); } - pthread_rwlock_unlock(&dsos->lock); + up_write(&dsos->lock); } static void dsos__exit(struct dsos *dsos) { dsos__purge(dsos); - pthread_rwlock_destroy(&dsos->lock); + exit_rwsem(&dsos->lock); } void machine__delete_threads(struct machine *machine) @@ -155,7 +155,7 @@ void machine__delete_threads(struct machine *machine) for (i = 0; i < THREADS__TABLE_SIZE; i++) { struct threads *threads = &machine->threads[i]; - pthread_rwlock_wrlock(&threads->lock); + down_write(&threads->lock); nd = rb_first(&threads->entries); while (nd) { struct thread *t = rb_entry(nd, struct thread, rb_node); @@ -163,7 +163,7 @@ void machine__delete_threads(struct machine *machine) nd = rb_next(nd); __machine__remove_thread(machine, t, false); } - pthread_rwlock_unlock(&threads->lock); + up_write(&threads->lock); } } @@ -180,7 +180,7 @@ void machine__exit(struct machine *machine) for (i = 0; i < THREADS__TABLE_SIZE; i++) { struct threads *threads = &machine->threads[i]; - pthread_rwlock_destroy(&threads->lock); + exit_rwsem(&threads->lock); } } @@ -482,9 +482,9 @@ struct thread *machine__findnew_thread(struct machine *machine, pid_t pid, struct threads *threads = machine__threads(machine, tid); struct thread *th; - pthread_rwlock_wrlock(&threads->lock); + down_write(&threads->lock); th = __machine__findnew_thread(machine, pid, tid); - pthread_rwlock_unlock(&threads->lock); + up_write(&threads->lock); return th; } @@ -494,9 +494,9 @@ struct thread *machine__find_thread(struct machine *machine, pid_t pid, struct threads *threads = machine__threads(machine, tid); struct thread *th; - pthread_rwlock_rdlock(&threads->lock); + down_read(&threads->lock); th = ____machine__findnew_thread(machine, threads, pid, tid, false); - pthread_rwlock_unlock(&threads->lock); + up_read(&threads->lock); return th; } @@ -588,7 +588,7 @@ static struct dso *machine__findnew_module_dso(struct machine *machine, { struct dso *dso; - pthread_rwlock_wrlock(&machine->dsos.lock); + down_write(&machine->dsos.lock); dso = __dsos__find(&machine->dsos, m->name, true); if (!dso) { @@ -602,7 +602,7 @@ static struct dso *machine__findnew_module_dso(struct machine *machine, dso__get(dso); out_unlock: - pthread_rwlock_unlock(&machine->dsos.lock); + up_write(&machine->dsos.lock); return dso; } @@ -749,7 +749,8 @@ size_t machine__fprintf(struct machine *machine, FILE *fp) for (i = 0; i < THREADS__TABLE_SIZE; i++) { struct threads *threads = &machine->threads[i]; - pthread_rwlock_rdlock(&threads->lock); + + down_read(&threads->lock); ret = fprintf(fp, "Threads: %u\n", threads->nr); @@ -759,7 +760,7 @@ size_t machine__fprintf(struct machine *machine, FILE *fp) ret += thread__fprintf(pos, fp); } - pthread_rwlock_unlock(&threads->lock); + up_read(&threads->lock); } return ret; } @@ -1319,7 +1320,7 @@ static int machine__process_kernel_mmap_event(struct machine *machine, struct dso *kernel = NULL; struct dso *dso; - pthread_rwlock_rdlock(&machine->dsos.lock); + down_read(&machine->dsos.lock); list_for_each_entry(dso, &machine->dsos.head, node) { @@ -1349,7 +1350,7 @@ static int machine__process_kernel_mmap_event(struct machine *machine, break; } - pthread_rwlock_unlock(&machine->dsos.lock); + up_read(&machine->dsos.lock); if (kernel == NULL) kernel = machine__findnew_dso(machine, kmmap_prefix); @@ -1513,7 +1514,7 @@ static void __machine__remove_thread(struct machine *machine, struct thread *th, BUG_ON(refcount_read(&th->refcnt) == 0); if (lock) - pthread_rwlock_wrlock(&threads->lock); + down_write(&threads->lock); rb_erase_init(&th->rb_node, &threads->entries); RB_CLEAR_NODE(&th->rb_node); --threads->nr; @@ -1524,7 +1525,7 @@ static void __machine__remove_thread(struct machine *machine, struct thread *th, */ list_add_tail(&th->node, &threads->dead); if (lock) - pthread_rwlock_unlock(&threads->lock); + up_write(&threads->lock); thread__put(th); } diff --git a/tools/perf/util/machine.h b/tools/perf/util/machine.h index fe2f05848050..b1cd516f2025 100644 --- a/tools/perf/util/machine.h +++ b/tools/perf/util/machine.h @@ -6,6 +6,7 @@ #include "map.h" #include "dso.h" #include "event.h" +#include "rwsem.h" struct addr_location; struct branch_stack; @@ -28,7 +29,7 @@ struct vdso_info; struct threads { struct rb_root entries; - pthread_rwlock_t lock; + struct rw_semaphore lock; unsigned int nr; struct list_head dead; struct thread *last_match; diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c index bdaa0a4edc17..5792d7a78152 100644 --- a/tools/perf/util/map.c +++ b/tools/perf/util/map.c @@ -488,7 +488,7 @@ u64 map__objdump_2mem(struct map *map, u64 ip) static void maps__init(struct maps *maps) { maps->entries = RB_ROOT; - pthread_rwlock_init(&maps->lock, NULL); + init_rwsem(&maps->lock); } void map_groups__init(struct map_groups *mg, struct machine *machine) @@ -517,9 +517,9 @@ static void __maps__purge(struct maps *maps) static void maps__exit(struct maps *maps) { - pthread_rwlock_wrlock(&maps->lock); + down_write(&maps->lock); __maps__purge(maps); - pthread_rwlock_unlock(&maps->lock); + up_write(&maps->lock); } void map_groups__exit(struct map_groups *mg) @@ -586,7 +586,7 @@ struct symbol *maps__find_symbol_by_name(struct maps *maps, const char *name, struct symbol *sym; struct rb_node *nd; - pthread_rwlock_rdlock(&maps->lock); + down_read(&maps->lock); for (nd = rb_first(&maps->entries); nd; nd = rb_next(nd)) { struct map *pos = rb_entry(nd, struct map, rb_node); @@ -602,7 +602,7 @@ struct symbol *maps__find_symbol_by_name(struct maps *maps, const char *name, sym = NULL; out: - pthread_rwlock_unlock(&maps->lock); + up_read(&maps->lock); return sym; } @@ -638,7 +638,7 @@ static size_t maps__fprintf(struct maps *maps, FILE *fp) size_t printed = 0; struct rb_node *nd; - pthread_rwlock_rdlock(&maps->lock); + down_read(&maps->lock); for (nd = rb_first(&maps->entries); nd; nd = rb_next(nd)) { struct map *pos = rb_entry(nd, struct map, rb_node); @@ -650,7 +650,7 @@ static size_t maps__fprintf(struct maps *maps, FILE *fp) } } - pthread_rwlock_unlock(&maps->lock); + up_read(&maps->lock); return printed; } @@ -682,7 +682,7 @@ static int maps__fixup_overlappings(struct maps *maps, struct map *map, FILE *fp struct rb_node *next; int err = 0; - pthread_rwlock_wrlock(&maps->lock); + down_write(&maps->lock); root = &maps->entries; next = rb_first(root); @@ -750,7 +750,7 @@ put_map: err = 0; out: - pthread_rwlock_unlock(&maps->lock); + up_write(&maps->lock); return err; } @@ -771,7 +771,7 @@ int map_groups__clone(struct thread *thread, struct map *map; struct maps *maps = &parent->maps[type]; - pthread_rwlock_rdlock(&maps->lock); + down_read(&maps->lock); for (map = maps__first(maps); map; map = map__next(map)) { struct map *new = map__clone(map); @@ -788,7 +788,7 @@ int map_groups__clone(struct thread *thread, err = 0; out_unlock: - pthread_rwlock_unlock(&maps->lock); + up_read(&maps->lock); return err; } @@ -815,9 +815,9 @@ static void __maps__insert(struct maps *maps, struct map *map) void maps__insert(struct maps *maps, struct map *map) { - pthread_rwlock_wrlock(&maps->lock); + down_write(&maps->lock); __maps__insert(maps, map); - pthread_rwlock_unlock(&maps->lock); + up_write(&maps->lock); } static void __maps__remove(struct maps *maps, struct map *map) @@ -828,9 +828,9 @@ static void __maps__remove(struct maps *maps, struct map *map) void maps__remove(struct maps *maps, struct map *map) { - pthread_rwlock_wrlock(&maps->lock); + down_write(&maps->lock); __maps__remove(maps, map); - pthread_rwlock_unlock(&maps->lock); + up_write(&maps->lock); } struct map *maps__find(struct maps *maps, u64 ip) @@ -838,7 +838,7 @@ struct map *maps__find(struct maps *maps, u64 ip) struct rb_node **p, *parent = NULL; struct map *m; - pthread_rwlock_rdlock(&maps->lock); + down_read(&maps->lock); p = &maps->entries.rb_node; while (*p != NULL) { @@ -854,7 +854,7 @@ struct map *maps__find(struct maps *maps, u64 ip) m = NULL; out: - pthread_rwlock_unlock(&maps->lock); + up_read(&maps->lock); return m; } diff --git a/tools/perf/util/map.h b/tools/perf/util/map.h index 73aacf7a7dc4..d5d7442dac7a 100644 --- a/tools/perf/util/map.h +++ b/tools/perf/util/map.h @@ -9,6 +9,7 @@ #include #include #include +#include "rwsem.h" enum map_type { MAP__FUNCTION = 0, @@ -61,7 +62,7 @@ struct kmap { struct maps { struct rb_root entries; - pthread_rwlock_t lock; + struct rw_semaphore lock; }; struct map_groups { diff --git a/tools/perf/util/rwsem.c b/tools/perf/util/rwsem.c new file mode 100644 index 000000000000..5e52e7baa7b6 --- /dev/null +++ b/tools/perf/util/rwsem.c @@ -0,0 +1,32 @@ +#include "util.h" +#include "rwsem.h" + +int init_rwsem(struct rw_semaphore *sem) +{ + return pthread_rwlock_init(&sem->lock, NULL); +} + +int exit_rwsem(struct rw_semaphore *sem) +{ + return pthread_rwlock_destroy(&sem->lock); +} + +int down_read(struct rw_semaphore *sem) +{ + return perf_singlethreaded ? 0 : pthread_rwlock_rdlock(&sem->lock); +} + +int up_read(struct rw_semaphore *sem) +{ + return perf_singlethreaded ? 0 : pthread_rwlock_unlock(&sem->lock); +} + +int down_write(struct rw_semaphore *sem) +{ + return perf_singlethreaded ? 0 : pthread_rwlock_wrlock(&sem->lock); +} + +int up_write(struct rw_semaphore *sem) +{ + return perf_singlethreaded ? 0 : pthread_rwlock_unlock(&sem->lock); +} diff --git a/tools/perf/util/rwsem.h b/tools/perf/util/rwsem.h new file mode 100644 index 000000000000..94565ad4d494 --- /dev/null +++ b/tools/perf/util/rwsem.h @@ -0,0 +1,19 @@ +#ifndef _PERF_RWSEM_H +#define _PERF_RWSEM_H + +#include + +struct rw_semaphore { + pthread_rwlock_t lock; +}; + +int init_rwsem(struct rw_semaphore *sem); +int exit_rwsem(struct rw_semaphore *sem); + +int down_read(struct rw_semaphore *sem); +int up_read(struct rw_semaphore *sem); + +int down_write(struct rw_semaphore *sem); +int up_write(struct rw_semaphore *sem); + +#endif /* _PERF_RWSEM_H */ diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index 5909ee4c7ade..066e38aa4063 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -226,7 +226,7 @@ void __map_groups__fixup_end(struct map_groups *mg, enum map_type type) struct maps *maps = &mg->maps[type]; struct map *next, *curr; - pthread_rwlock_wrlock(&maps->lock); + down_write(&maps->lock); curr = maps__first(maps); if (curr == NULL) @@ -246,7 +246,7 @@ void __map_groups__fixup_end(struct map_groups *mg, enum map_type type) curr->end = ~0ULL; out_unlock: - pthread_rwlock_unlock(&maps->lock); + up_write(&maps->lock); } struct symbol *symbol__new(u64 start, u64 len, u8 binding, const char *name) @@ -1671,7 +1671,7 @@ struct map *map_groups__find_by_name(struct map_groups *mg, struct maps *maps = &mg->maps[type]; struct map *map; - pthread_rwlock_rdlock(&maps->lock); + down_read(&maps->lock); for (map = maps__first(maps); map; map = map__next(map)) { if (map->dso && strcmp(map->dso->short_name, name) == 0) @@ -1681,7 +1681,7 @@ struct map *map_groups__find_by_name(struct map_groups *mg, map = NULL; out_unlock: - pthread_rwlock_unlock(&maps->lock); + up_read(&maps->lock); return map; } diff --git a/tools/perf/util/thread.c b/tools/perf/util/thread.c index aee9a42102ba..c09bdb509d82 100644 --- a/tools/perf/util/thread.c +++ b/tools/perf/util/thread.c @@ -264,7 +264,7 @@ static int __thread__prepare_access(struct thread *thread) struct maps *maps = &thread->mg->maps[i]; struct map *map; - pthread_rwlock_rdlock(&maps->lock); + down_read(&maps->lock); for (map = maps__first(maps); map; map = map__next(map)) { err = unwind__prepare_access(thread, map, &initialized); @@ -272,7 +272,7 @@ static int __thread__prepare_access(struct thread *thread) break; } - pthread_rwlock_unlock(&maps->lock); + up_read(&maps->lock); } return err; diff --git a/tools/perf/util/trace-event-info.c b/tools/perf/util/trace-event-info.c index e7d60d05596d..d7f2113462fb 100644 --- a/tools/perf/util/trace-event-info.c +++ b/tools/perf/util/trace-event-info.c @@ -28,7 +28,6 @@ #include #include #include -#include #include #include #include diff --git a/tools/perf/util/trace-event-read.c b/tools/perf/util/trace-event-read.c index 8a9a677f7576..40b425949aa3 100644 --- a/tools/perf/util/trace-event-read.c +++ b/tools/perf/util/trace-event-read.c @@ -27,7 +27,6 @@ #include #include #include -#include #include #include #include diff --git a/tools/perf/util/util.c b/tools/perf/util/util.c index 3c9c39711343..97e0c8e26477 100644 --- a/tools/perf/util/util.c +++ b/tools/perf/util/util.c @@ -23,6 +23,19 @@ /* * XXX We need to find a better place for these things... */ + +bool perf_singlethreaded = true; + +void perf_set_singlethreaded(void) +{ + perf_singlethreaded = true; +} + +void perf_set_multithreaded(void) +{ + perf_singlethreaded = false; +} + unsigned int page_size; int cacheline_size; diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h index 03946d5e4741..6c7e6cc902bb 100644 --- a/tools/perf/util/util.h +++ b/tools/perf/util/util.h @@ -62,4 +62,9 @@ int sched_getcpu(void); int setns(int fd, int nstype); #endif +extern bool perf_singlethreaded; + +void perf_set_singlethreaded(void); +void perf_set_multithreaded(void); + #endif /* GIT_COMPAT_UTIL_H */ diff --git a/tools/perf/util/vdso.c b/tools/perf/util/vdso.c index d3c39eec89a8..f5f843d3c22f 100644 --- a/tools/perf/util/vdso.c +++ b/tools/perf/util/vdso.c @@ -319,7 +319,7 @@ struct dso *machine__findnew_vdso(struct machine *machine, struct vdso_info *vdso_info; struct dso *dso = NULL; - pthread_rwlock_wrlock(&machine->dsos.lock); + down_write(&machine->dsos.lock); if (!machine->vdso_info) machine->vdso_info = vdso_info__new(); @@ -347,7 +347,7 @@ struct dso *machine__findnew_vdso(struct machine *machine, out_unlock: dso__get(dso); - pthread_rwlock_unlock(&machine->dsos.lock); + up_write(&machine->dsos.lock); return dso; } -- cgit v1.2.3