diff options
-rw-r--r-- | tools/lib/perf/include/perf/event.h | 14 | ||||
-rw-r--r-- | tools/perf/tests/cpumap.c | 52 | ||||
-rw-r--r-- | tools/perf/util/cpumap.c | 31 | ||||
-rw-r--r-- | tools/perf/util/session.c | 5 | ||||
-rw-r--r-- | tools/perf/util/synthetic-events.c | 151 |
5 files changed, 166 insertions, 87 deletions
diff --git a/tools/lib/perf/include/perf/event.h b/tools/lib/perf/include/perf/event.h index e147e6183292..e282faf8fd75 100644 --- a/tools/lib/perf/include/perf/event.h +++ b/tools/lib/perf/include/perf/event.h @@ -153,6 +153,7 @@ struct perf_record_header_attr { enum { PERF_CPU_MAP__CPUS = 0, PERF_CPU_MAP__MASK = 1, + PERF_CPU_MAP__RANGE_CPUS = 2, }; /* @@ -195,6 +196,17 @@ struct perf_record_mask_cpu_map64 { #pragma GCC diagnostic ignored "-Wpacked" #pragma GCC diagnostic ignored "-Wattributes" +/* + * An encoding of a CPU map for a range starting at start_cpu through to + * end_cpu. If any_cpu is 1, an any CPU (-1) value (aka dummy value) is present. + */ +struct perf_record_range_cpu_map { + __u8 any_cpu; + __u8 __pad; + __u16 start_cpu; + __u16 end_cpu; +}; + struct __packed perf_record_cpu_map_data { __u16 type; union { @@ -204,6 +216,8 @@ struct __packed perf_record_cpu_map_data { struct perf_record_mask_cpu_map32 mask32_data; /* Used when type == PERF_CPU_MAP__MASK and long_size == 8. */ struct perf_record_mask_cpu_map64 mask64_data; + /* Used when type == PERF_CPU_MAP__RANGE_CPUS. */ + struct perf_record_range_cpu_map range_cpu_data; }; }; diff --git a/tools/perf/tests/cpumap.c b/tools/perf/tests/cpumap.c index 7ea150cdc137..7c873c6ae3eb 100644 --- a/tools/perf/tests/cpumap.c +++ b/tools/perf/tests/cpumap.c @@ -19,7 +19,6 @@ static int process_event_mask(struct perf_tool *tool __maybe_unused, struct perf_record_cpu_map *map_event = &event->cpu_map; struct perf_record_cpu_map_data *data; struct perf_cpu_map *map; - int i; unsigned int long_size; data = &map_event->data; @@ -32,16 +31,17 @@ static int process_event_mask(struct perf_tool *tool __maybe_unused, TEST_ASSERT_VAL("wrong nr", data->mask32_data.nr == 1); - for (i = 0; i < 20; i++) { + TEST_ASSERT_VAL("wrong cpu", perf_record_cpu_map_data__test_bit(0, data)); + TEST_ASSERT_VAL("wrong cpu", !perf_record_cpu_map_data__test_bit(1, data)); + for (int i = 2; i <= 20; i++) TEST_ASSERT_VAL("wrong cpu", perf_record_cpu_map_data__test_bit(i, data)); - } map = cpu_map__new_data(data); TEST_ASSERT_VAL("wrong nr", perf_cpu_map__nr(map) == 20); - for (i = 0; i < 20; i++) { - TEST_ASSERT_VAL("wrong cpu", perf_cpu_map__cpu(map, i).cpu == i); - } + TEST_ASSERT_VAL("wrong cpu", perf_cpu_map__cpu(map, 0).cpu == 0); + for (int i = 2; i <= 20; i++) + TEST_ASSERT_VAL("wrong cpu", perf_cpu_map__cpu(map, i - 1).cpu == i); perf_cpu_map__put(map); return 0; @@ -73,26 +73,60 @@ static int process_event_cpus(struct perf_tool *tool __maybe_unused, return 0; } +static int process_event_range_cpus(struct perf_tool *tool __maybe_unused, + union perf_event *event, + struct perf_sample *sample __maybe_unused, + struct machine *machine __maybe_unused) +{ + struct perf_record_cpu_map *map_event = &event->cpu_map; + struct perf_record_cpu_map_data *data; + struct perf_cpu_map *map; + + data = &map_event->data; + + TEST_ASSERT_VAL("wrong type", data->type == PERF_CPU_MAP__RANGE_CPUS); + + TEST_ASSERT_VAL("wrong any_cpu", data->range_cpu_data.any_cpu == 0); + TEST_ASSERT_VAL("wrong start_cpu", data->range_cpu_data.start_cpu == 1); + TEST_ASSERT_VAL("wrong end_cpu", data->range_cpu_data.end_cpu == 256); + + map = cpu_map__new_data(data); + TEST_ASSERT_VAL("wrong nr", perf_cpu_map__nr(map) == 256); + TEST_ASSERT_VAL("wrong cpu", perf_cpu_map__cpu(map, 0).cpu == 1); + TEST_ASSERT_VAL("wrong cpu", perf_cpu_map__max(map).cpu == 256); + TEST_ASSERT_VAL("wrong refcnt", refcount_read(&map->refcnt) == 1); + perf_cpu_map__put(map); + return 0; +} + static int test__cpu_map_synthesize(struct test_suite *test __maybe_unused, int subtest __maybe_unused) { struct perf_cpu_map *cpus; - /* This one is better stores in mask. */ - cpus = perf_cpu_map__new("0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19"); + /* This one is better stored in a mask. */ + cpus = perf_cpu_map__new("0,2-20"); TEST_ASSERT_VAL("failed to synthesize map", !perf_event__synthesize_cpu_map(NULL, cpus, process_event_mask, NULL)); perf_cpu_map__put(cpus); - /* This one is better stores in cpu values. */ + /* This one is better stored in cpu values. */ cpus = perf_cpu_map__new("1,256"); TEST_ASSERT_VAL("failed to synthesize map", !perf_event__synthesize_cpu_map(NULL, cpus, process_event_cpus, NULL)); perf_cpu_map__put(cpus); + + /* This one is better stored as a range. */ + cpus = perf_cpu_map__new("1-256"); + + TEST_ASSERT_VAL("failed to synthesize map", + !perf_event__synthesize_cpu_map(NULL, cpus, process_event_range_cpus, NULL)); + + perf_cpu_map__put(cpus); return 0; } diff --git a/tools/perf/util/cpumap.c b/tools/perf/util/cpumap.c index ae43fb88f444..2389bd3e19b8 100644 --- a/tools/perf/util/cpumap.c +++ b/tools/perf/util/cpumap.c @@ -112,12 +112,39 @@ static struct perf_cpu_map *cpu_map__from_mask(const struct perf_record_cpu_map_ } +static struct perf_cpu_map *cpu_map__from_range(const struct perf_record_cpu_map_data *data) +{ + struct perf_cpu_map *map; + unsigned int i = 0; + + map = perf_cpu_map__empty_new(data->range_cpu_data.end_cpu - + data->range_cpu_data.start_cpu + 1 + data->range_cpu_data.any_cpu); + if (!map) + return NULL; + + if (data->range_cpu_data.any_cpu) + map->map[i++].cpu = -1; + + for (int cpu = data->range_cpu_data.start_cpu; cpu <= data->range_cpu_data.end_cpu; + i++, cpu++) + map->map[i].cpu = cpu; + + return map; +} + struct perf_cpu_map *cpu_map__new_data(const struct perf_record_cpu_map_data *data) { - if (data->type == PERF_CPU_MAP__CPUS) + switch (data->type) { + case PERF_CPU_MAP__CPUS: return cpu_map__from_entries(data); - else + case PERF_CPU_MAP__MASK: return cpu_map__from_mask(data); + case PERF_CPU_MAP__RANGE_CPUS: + return cpu_map__from_range(data); + default: + pr_err("cpu_map__new_data unknown type %d\n", data->type); + return NULL; + } } size_t cpu_map__fprintf(struct perf_cpu_map *map, FILE *fp) diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index 47d5a50e616a..1a4f10de29ff 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -943,6 +943,11 @@ static void perf_event__cpu_map_swap(union perf_event *event, default: pr_err("cpu_map swap: unsupported long size\n"); } + break; + case PERF_CPU_MAP__RANGE_CPUS: + data->range_cpu_data.start_cpu = bswap_16(data->range_cpu_data.start_cpu); + data->range_cpu_data.end_cpu = bswap_16(data->range_cpu_data.end_cpu); + break; default: break; } diff --git a/tools/perf/util/synthetic-events.c b/tools/perf/util/synthetic-events.c index 851d11a64b0d..289ea17ac5f7 100644 --- a/tools/perf/util/synthetic-events.c +++ b/tools/perf/util/synthetic-events.c @@ -1195,93 +1195,97 @@ int perf_event__synthesize_thread_map2(struct perf_tool *tool, return err; } -static void synthesize_cpus(struct perf_record_cpu_map_data *data, - const struct perf_cpu_map *map) -{ - int i, map_nr = perf_cpu_map__nr(map); - - data->cpus_data.nr = map_nr; +struct synthesize_cpu_map_data { + const struct perf_cpu_map *map; + int nr; + int min_cpu; + int max_cpu; + int has_any_cpu; + int type; + size_t size; + struct perf_record_cpu_map_data *data; +}; - for (i = 0; i < map_nr; i++) - data->cpus_data.cpu[i] = perf_cpu_map__cpu(map, i).cpu; +static void synthesize_cpus(struct synthesize_cpu_map_data *data) +{ + data->data->type = PERF_CPU_MAP__CPUS; + data->data->cpus_data.nr = data->nr; + for (int i = 0; i < data->nr; i++) + data->data->cpus_data.cpu[i] = perf_cpu_map__cpu(data->map, i).cpu; } -static void synthesize_mask(struct perf_record_cpu_map_data *data, - const struct perf_cpu_map *map, int max) +static void synthesize_mask(struct synthesize_cpu_map_data *data) { int idx; struct perf_cpu cpu; /* Due to padding, the 4bytes per entry mask variant is always smaller. */ - data->mask32_data.nr = BITS_TO_U32(max); - data->mask32_data.long_size = 4; + data->data->type = PERF_CPU_MAP__MASK; + data->data->mask32_data.nr = BITS_TO_U32(data->max_cpu); + data->data->mask32_data.long_size = 4; - perf_cpu_map__for_each_cpu(cpu, idx, map) { + perf_cpu_map__for_each_cpu(cpu, idx, data->map) { int bit_word = cpu.cpu / 32; - __u32 bit_mask = 1U << (cpu.cpu & 31); + u32 bit_mask = 1U << (cpu.cpu & 31); - data->mask32_data.mask[bit_word] |= bit_mask; + data->data->mask32_data.mask[bit_word] |= bit_mask; } } -static size_t cpus_size(const struct perf_cpu_map *map) +static void synthesize_range_cpus(struct synthesize_cpu_map_data *data) { - return sizeof(struct cpu_map_entries) + perf_cpu_map__nr(map) * sizeof(u16); + data->data->type = PERF_CPU_MAP__RANGE_CPUS; + data->data->range_cpu_data.any_cpu = data->has_any_cpu; + data->data->range_cpu_data.start_cpu = data->min_cpu; + data->data->range_cpu_data.end_cpu = data->max_cpu; } -static size_t mask_size(const struct perf_cpu_map *map, int *max) -{ - *max = perf_cpu_map__max(map).cpu; - return sizeof(struct perf_record_mask_cpu_map32) + BITS_TO_U32(*max) * sizeof(__u32); -} - -static void *cpu_map_data__alloc(const struct perf_cpu_map *map, size_t *size, - u16 *type, int *max) +static void *cpu_map_data__alloc(struct synthesize_cpu_map_data *syn_data, + size_t header_size) { size_t size_cpus, size_mask; - bool is_dummy = perf_cpu_map__empty(map); - /* - * Both array and mask data have variable size based - * on the number of cpus and their actual values. - * The size of the 'struct perf_record_cpu_map_data' is: - * - * array = size of 'struct cpu_map_entries' + - * number of cpus * sizeof(u64) - * - * mask = size of 'struct perf_record_record_cpu_map' + - * maximum cpu bit converted to size of longs - * - * and finally + the size of 'struct perf_record_cpu_map_data'. - */ - size_cpus = cpus_size(map); - size_mask = mask_size(map, max); + syn_data->nr = perf_cpu_map__nr(syn_data->map); + syn_data->has_any_cpu = (perf_cpu_map__cpu(syn_data->map, 0).cpu == -1) ? 1 : 0; - if (is_dummy || (size_cpus < size_mask)) { - *size += size_cpus; - *type = PERF_CPU_MAP__CPUS; - } else { - *size += size_mask; - *type = PERF_CPU_MAP__MASK; + syn_data->min_cpu = perf_cpu_map__cpu(syn_data->map, syn_data->has_any_cpu).cpu; + syn_data->max_cpu = perf_cpu_map__max(syn_data->map).cpu; + if (syn_data->max_cpu - syn_data->min_cpu + 1 == syn_data->nr - syn_data->has_any_cpu) { + /* A consecutive range of CPUs can be encoded using a range. */ + assert(sizeof(u16) + sizeof(struct perf_record_range_cpu_map) == sizeof(u64)); + syn_data->type = PERF_CPU_MAP__RANGE_CPUS; + syn_data->size = header_size + sizeof(u64); + return zalloc(syn_data->size); } - *size += sizeof(__u16); /* For perf_record_cpu_map_data.type. */ - *size = PERF_ALIGN(*size, sizeof(u64)); - return zalloc(*size); + size_cpus = sizeof(u16) + sizeof(struct cpu_map_entries) + syn_data->nr * sizeof(u16); + /* Due to padding, the 4bytes per entry mask variant is always smaller. */ + size_mask = sizeof(u16) + sizeof(struct perf_record_mask_cpu_map32) + + BITS_TO_U32(syn_data->max_cpu) * sizeof(__u32); + if (syn_data->has_any_cpu || size_cpus < size_mask) { + /* Follow the CPU map encoding. */ + syn_data->type = PERF_CPU_MAP__CPUS; + syn_data->size = header_size + PERF_ALIGN(size_cpus, sizeof(u64)); + return zalloc(syn_data->size); + } + /* Encode using a bitmask. */ + syn_data->type = PERF_CPU_MAP__MASK; + syn_data->size = header_size + PERF_ALIGN(size_mask, sizeof(u64)); + return zalloc(syn_data->size); } -static void cpu_map_data__synthesize(struct perf_record_cpu_map_data *data, - const struct perf_cpu_map *map, - u16 type, int max) +static void cpu_map_data__synthesize(struct synthesize_cpu_map_data *data) { - data->type = type; - - switch (type) { + switch (data->type) { case PERF_CPU_MAP__CPUS: - synthesize_cpus(data, map); + synthesize_cpus(data); break; case PERF_CPU_MAP__MASK: - synthesize_mask(data, map, max); + synthesize_mask(data); + break; + case PERF_CPU_MAP__RANGE_CPUS: + synthesize_range_cpus(data); + break; default: break; } @@ -1289,23 +1293,22 @@ static void cpu_map_data__synthesize(struct perf_record_cpu_map_data *data, static struct perf_record_cpu_map *cpu_map_event__new(const struct perf_cpu_map *map) { - size_t size = sizeof(struct perf_event_header); + struct synthesize_cpu_map_data syn_data = { .map = map }; struct perf_record_cpu_map *event; - int max; - u16 type; - event = cpu_map_data__alloc(map, &size, &type, &max); + + event = cpu_map_data__alloc(&syn_data, sizeof(struct perf_event_header)); if (!event) return NULL; + syn_data.data = &event->data; event->header.type = PERF_RECORD_CPU_MAP; - event->header.size = size; - event->data.type = type; - - cpu_map_data__synthesize(&event->data, map, type, max); + event->header.size = syn_data.size; + cpu_map_data__synthesize(&syn_data); return event; } + int perf_event__synthesize_cpu_map(struct perf_tool *tool, const struct perf_cpu_map *map, perf_event__handler_t process, @@ -1998,24 +2001,20 @@ int perf_event__synthesize_event_update_name(struct perf_tool *tool, struct evse int perf_event__synthesize_event_update_cpus(struct perf_tool *tool, struct evsel *evsel, perf_event__handler_t process) { - size_t size = sizeof(struct perf_event_header) + sizeof(u64) + sizeof(u64); + struct synthesize_cpu_map_data syn_data = { .map = evsel->core.own_cpus }; struct perf_record_event_update *ev; - int max, err; - u16 type; - - if (!evsel->core.own_cpus) - return 0; + int err; - ev = cpu_map_data__alloc(evsel->core.own_cpus, &size, &type, &max); + ev = cpu_map_data__alloc(&syn_data, sizeof(struct perf_event_header) + 2 * sizeof(u64)); if (!ev) return -ENOMEM; + syn_data.data = &ev->cpus.cpus; ev->header.type = PERF_RECORD_EVENT_UPDATE; - ev->header.size = (u16)size; + ev->header.size = (u16)syn_data.size; ev->type = PERF_EVENT_UPDATE__CPUS; ev->id = evsel->core.id[0]; - - cpu_map_data__synthesize(&ev->cpus.cpus, evsel->core.own_cpus, type, max); + cpu_map_data__synthesize(&syn_data); err = process(tool, (union perf_event *)ev, NULL, NULL); free(ev); |