aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--arch/x86/kernel/cpu/perf_event.c14
-rw-r--r--include/linux/perf_event.h2
-rw-r--r--include/uapi/linux/perf_event.h6
-rw-r--r--kernel/events/core.c77
4 files changed, 91 insertions, 8 deletions
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index ac41b3ad1fc9..0420ebcac116 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -1978,13 +1978,23 @@ void arch_perf_update_userpage(struct perf_event *event,
data = cyc2ns_read_begin();
+ /*
+ * Internal timekeeping for enabled/running/stopped times
+ * is always in the local_clock domain.
+ */
userpg->cap_user_time = 1;
userpg->time_mult = data->cyc2ns_mul;
userpg->time_shift = data->cyc2ns_shift;
userpg->time_offset = data->cyc2ns_offset - now;
- userpg->cap_user_time_zero = 1;
- userpg->time_zero = data->cyc2ns_offset;
+ /*
+ * cap_user_time_zero doesn't make sense when we're using a different
+ * time base for the records.
+ */
+ if (event->clock == &local_clock) {
+ userpg->cap_user_time_zero = 1;
+ userpg->time_zero = data->cyc2ns_offset;
+ }
cyc2ns_read_end(data);
}
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index b16eac5f54ce..401554074de9 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -173,6 +173,7 @@ struct perf_event;
* pmu::capabilities flags
*/
#define PERF_PMU_CAP_NO_INTERRUPT 0x01
+#define PERF_PMU_CAP_NO_NMI 0x02
/**
* struct pmu - generic performance monitoring unit
@@ -457,6 +458,7 @@ struct perf_event {
struct pid_namespace *ns;
u64 id;
+ u64 (*clock)(void);
perf_overflow_handler_t overflow_handler;
void *overflow_handler_context;
diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h
index 1e3cd07cf76e..3bb40ddadbe5 100644
--- a/include/uapi/linux/perf_event.h
+++ b/include/uapi/linux/perf_event.h
@@ -326,7 +326,8 @@ struct perf_event_attr {
exclude_callchain_user : 1, /* exclude user callchains */
mmap2 : 1, /* include mmap with inode data */
comm_exec : 1, /* flag comm events that are due to an exec */
- __reserved_1 : 39;
+ use_clockid : 1, /* use @clockid for time fields */
+ __reserved_1 : 38;
union {
__u32 wakeup_events; /* wakeup every n events */
@@ -355,8 +356,7 @@ struct perf_event_attr {
*/
__u32 sample_stack_user;
- /* Align to u64. */
- __u32 __reserved_2;
+ __s32 clockid;
/*
* Defines set of regs to dump for each sample
* state captured on:
diff --git a/kernel/events/core.c b/kernel/events/core.c
index bb1a7c36e794..c40c2cac2d8e 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -327,6 +327,11 @@ static inline u64 perf_clock(void)
return local_clock();
}
+static inline u64 perf_event_clock(struct perf_event *event)
+{
+ return event->clock();
+}
+
static inline struct perf_cpu_context *
__get_cpu_context(struct perf_event_context *ctx)
{
@@ -4762,7 +4767,7 @@ static void __perf_event_header__init_id(struct perf_event_header *header,
}
if (sample_type & PERF_SAMPLE_TIME)
- data->time = perf_clock();
+ data->time = perf_event_clock(event);
if (sample_type & (PERF_SAMPLE_ID | PERF_SAMPLE_IDENTIFIER))
data->id = primary_event_id(event);
@@ -5340,6 +5345,8 @@ static void perf_event_task_output(struct perf_event *event,
task_event->event_id.tid = perf_event_tid(event, task);
task_event->event_id.ptid = perf_event_tid(event, current);
+ task_event->event_id.time = perf_event_clock(event);
+
perf_output_put(&handle, task_event->event_id);
perf_event__output_id_sample(event, &handle, &sample);
@@ -5373,7 +5380,7 @@ static void perf_event_task(struct task_struct *task,
/* .ppid */
/* .tid */
/* .ptid */
- .time = perf_clock(),
+ /* .time */
},
};
@@ -5749,7 +5756,7 @@ static void perf_log_throttle(struct perf_event *event, int enable)
.misc = 0,
.size = sizeof(throttle_event),
},
- .time = perf_clock(),
+ .time = perf_event_clock(event),
.id = primary_event_id(event),
.stream_id = event->id,
};
@@ -6293,6 +6300,8 @@ static int perf_swevent_init(struct perf_event *event)
static struct pmu perf_swevent = {
.task_ctx_nr = perf_sw_context,
+ .capabilities = PERF_PMU_CAP_NO_NMI,
+
.event_init = perf_swevent_init,
.add = perf_swevent_add,
.del = perf_swevent_del,
@@ -6636,6 +6645,8 @@ static int cpu_clock_event_init(struct perf_event *event)
static struct pmu perf_cpu_clock = {
.task_ctx_nr = perf_sw_context,
+ .capabilities = PERF_PMU_CAP_NO_NMI,
+
.event_init = cpu_clock_event_init,
.add = cpu_clock_event_add,
.del = cpu_clock_event_del,
@@ -6715,6 +6726,8 @@ static int task_clock_event_init(struct perf_event *event)
static struct pmu perf_task_clock = {
.task_ctx_nr = perf_sw_context,
+ .capabilities = PERF_PMU_CAP_NO_NMI,
+
.event_init = task_clock_event_init,
.add = task_clock_event_add,
.del = task_clock_event_del,
@@ -7200,6 +7213,10 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu,
event->hw.target = task;
}
+ event->clock = &local_clock;
+ if (parent_event)
+ event->clock = parent_event->clock;
+
if (!overflow_handler && parent_event) {
overflow_handler = parent_event->overflow_handler;
context = parent_event->overflow_handler_context;
@@ -7422,6 +7439,12 @@ perf_event_set_output(struct perf_event *event, struct perf_event *output_event)
if (output_event->cpu == -1 && output_event->ctx != event->ctx)
goto out;
+ /*
+ * Mixing clocks in the same buffer is trouble you don't need.
+ */
+ if (output_event->clock != event->clock)
+ goto out;
+
set:
mutex_lock(&event->mmap_mutex);
/* Can't redirect output if we've got an active mmap() */
@@ -7454,6 +7477,43 @@ static void mutex_lock_double(struct mutex *a, struct mutex *b)
mutex_lock_nested(b, SINGLE_DEPTH_NESTING);
}
+static int perf_event_set_clock(struct perf_event *event, clockid_t clk_id)
+{
+ bool nmi_safe = false;
+
+ switch (clk_id) {
+ case CLOCK_MONOTONIC:
+ event->clock = &ktime_get_mono_fast_ns;
+ nmi_safe = true;
+ break;
+
+ case CLOCK_MONOTONIC_RAW:
+ event->clock = &ktime_get_raw_fast_ns;
+ nmi_safe = true;
+ break;
+
+ case CLOCK_REALTIME:
+ event->clock = &ktime_get_real_ns;
+ break;
+
+ case CLOCK_BOOTTIME:
+ event->clock = &ktime_get_boot_ns;
+ break;
+
+ case CLOCK_TAI:
+ event->clock = &ktime_get_tai_ns;
+ break;
+
+ default:
+ return -EINVAL;
+ }
+
+ if (!nmi_safe && !(event->pmu->capabilities & PERF_PMU_CAP_NO_NMI))
+ return -EINVAL;
+
+ return 0;
+}
+
/**
* sys_perf_event_open - open a performance event, associate it to a task/cpu
*
@@ -7569,6 +7629,12 @@ SYSCALL_DEFINE5(perf_event_open,
*/
pmu = event->pmu;
+ if (attr.use_clockid) {
+ err = perf_event_set_clock(event, attr.clockid);
+ if (err)
+ goto err_alloc;
+ }
+
if (group_leader &&
(is_software_event(event) != is_software_event(group_leader))) {
if (is_software_event(event)) {
@@ -7618,6 +7684,11 @@ SYSCALL_DEFINE5(perf_event_open,
*/
if (group_leader->group_leader != group_leader)
goto err_context;
+
+ /* All events in a group should have the same clock */
+ if (group_leader->clock != event->clock)
+ goto err_context;
+
/*
* Do not allow to attach to a group in a different
* task or CPU context: