From ba9372a8f306c4e53a5f61dcbcd6c1e4a8c2e9ac Mon Sep 17 00:00:00 2001 From: Markus Metzger Date: Fri, 13 Mar 2009 10:48:52 +0100 Subject: x86, hw-branch-tracer: keep resources on stop Distinguish init/reset and start/stop: init/reset will allocate and release bts tracing resources stop/start will suspend and resume bts tracing Return an error on init() if no cpu can be traced. Signed-off-by: Markus Metzger LKML-Reference: <20090313104852.A30168@sedona.ch.intel.com> Signed-off-by: Ingo Molnar --- kernel/trace/trace_hw_branches.c | 119 ++++++++++++++++++++++++++++----------- 1 file changed, 85 insertions(+), 34 deletions(-) (limited to 'kernel/trace') diff --git a/kernel/trace/trace_hw_branches.c b/kernel/trace/trace_hw_branches.c index 7bfdf4c2347f..a99a04c5e9cd 100644 --- a/kernel/trace/trace_hw_branches.c +++ b/kernel/trace/trace_hw_branches.c @@ -19,7 +19,7 @@ #include "trace_output.h" -#define SIZEOF_BTS (1 << 13) +#define BTS_BUFFER_SIZE (1 << 13) /* * The tracer lock protects the below per-cpu tracer array. @@ -33,53 +33,68 @@ */ static DEFINE_SPINLOCK(bts_tracer_lock); static DEFINE_PER_CPU(struct bts_tracer *, tracer); -static DEFINE_PER_CPU(unsigned char[SIZEOF_BTS], buffer); +static DEFINE_PER_CPU(unsigned char[BTS_BUFFER_SIZE], buffer); #define this_tracer per_cpu(tracer, smp_processor_id()) #define this_buffer per_cpu(buffer, smp_processor_id()) -static int __read_mostly trace_hw_branches_enabled; +static int trace_hw_branches_enabled __read_mostly; +static int trace_hw_branches_suspended __read_mostly; static struct trace_array *hw_branch_trace __read_mostly; /* - * Start tracing on the current cpu. + * Initialize the tracer for the current cpu. * The argument is ignored. * * pre: bts_tracer_lock must be locked. */ -static void bts_trace_start_cpu(void *arg) +static void bts_trace_init_cpu(void *arg) { if (this_tracer) ds_release_bts(this_tracer); - this_tracer = - ds_request_bts(/* task = */ NULL, this_buffer, SIZEOF_BTS, - /* ovfl = */ NULL, /* th = */ (size_t)-1, - BTS_KERNEL); + this_tracer = ds_request_bts(NULL, this_buffer, BTS_BUFFER_SIZE, + NULL, (size_t)-1, BTS_KERNEL); if (IS_ERR(this_tracer)) { this_tracer = NULL; return; } } -static void bts_trace_start(struct trace_array *tr) +static int bts_trace_init(struct trace_array *tr) { + int cpu, avail; + spin_lock(&bts_tracer_lock); - on_each_cpu(bts_trace_start_cpu, NULL, 1); - trace_hw_branches_enabled = 1; + hw_branch_trace = tr; + + on_each_cpu(bts_trace_init_cpu, NULL, 1); + + /* Check on how many cpus we could enable tracing */ + avail = 0; + for_each_online_cpu(cpu) + if (per_cpu(tracer, cpu)) + avail++; + + trace_hw_branches_enabled = (avail ? 1 : 0); + trace_hw_branches_suspended = 0; spin_unlock(&bts_tracer_lock); + + + /* If we could not enable tracing on a single cpu, we fail. */ + return avail ? 0 : -EOPNOTSUPP; } /* - * Stop tracing on the current cpu. + * Release the tracer for the current cpu. * The argument is ignored. * * pre: bts_tracer_lock must be locked. */ -static void bts_trace_stop_cpu(void *arg) +static void bts_trace_release_cpu(void *arg) { if (this_tracer) { ds_release_bts(this_tracer); @@ -87,12 +102,57 @@ static void bts_trace_stop_cpu(void *arg) } } -static void bts_trace_stop(struct trace_array *tr) +static void bts_trace_reset(struct trace_array *tr) { spin_lock(&bts_tracer_lock); + on_each_cpu(bts_trace_release_cpu, NULL, 1); trace_hw_branches_enabled = 0; - on_each_cpu(bts_trace_stop_cpu, NULL, 1); + trace_hw_branches_suspended = 0; + + spin_unlock(&bts_tracer_lock); +} + +/* + * Resume tracing on the current cpu. + * The argument is ignored. + * + * pre: bts_tracer_lock must be locked. + */ +static void bts_trace_resume_cpu(void *arg) +{ + if (this_tracer) + ds_resume_bts(this_tracer); +} + +static void bts_trace_start(struct trace_array *tr) +{ + spin_lock(&bts_tracer_lock); + + on_each_cpu(bts_trace_resume_cpu, NULL, 1); + trace_hw_branches_suspended = 0; + + spin_unlock(&bts_tracer_lock); +} + +/* + * Suspend tracing on the current cpu. + * The argument is ignored. + * + * pre: bts_tracer_lock must be locked. + */ +static void bts_trace_suspend_cpu(void *arg) +{ + if (this_tracer) + ds_suspend_bts(this_tracer); +} + +static void bts_trace_stop(struct trace_array *tr) +{ + spin_lock(&bts_tracer_lock); + + on_each_cpu(bts_trace_suspend_cpu, NULL, 1); + trace_hw_branches_suspended = 1; spin_unlock(&bts_tracer_lock); } @@ -110,10 +170,14 @@ static int __cpuinit bts_hotcpu_handler(struct notifier_block *nfb, switch (action) { case CPU_ONLINE: case CPU_DOWN_FAILED: - smp_call_function_single(cpu, bts_trace_start_cpu, NULL, 1); + smp_call_function_single(cpu, bts_trace_init_cpu, NULL, 1); + + if (trace_hw_branches_suspended) + smp_call_function_single(cpu, bts_trace_suspend_cpu, + NULL, 1); break; case CPU_DOWN_PREPARE: - smp_call_function_single(cpu, bts_trace_stop_cpu, NULL, 1); + smp_call_function_single(cpu, bts_trace_release_cpu, NULL, 1); break; } @@ -126,20 +190,6 @@ static struct notifier_block bts_hotcpu_notifier __cpuinitdata = { .notifier_call = bts_hotcpu_handler }; -static int bts_trace_init(struct trace_array *tr) -{ - hw_branch_trace = tr; - - bts_trace_start(tr); - - return 0; -} - -static void bts_trace_reset(struct trace_array *tr) -{ - bts_trace_stop(tr); -} - static void bts_trace_print_header(struct seq_file *m) { seq_puts(m, "# CPU# TO <- FROM\n"); @@ -228,7 +278,7 @@ static void trace_bts_at(const struct bts_trace *trace, void *at) */ static void trace_bts_cpu(void *arg) { - struct trace_array *tr = (struct trace_array *) arg; + struct trace_array *tr = (struct trace_array *)arg; const struct bts_trace *trace; unsigned char *at; @@ -276,7 +326,8 @@ void trace_hw_branch_oops(void) { spin_lock(&bts_tracer_lock); - trace_bts_cpu(hw_branch_trace); + if (trace_hw_branches_enabled) + trace_bts_cpu(hw_branch_trace); spin_unlock(&bts_tracer_lock); } -- cgit v1.2.3 From 321bb5e1ac461c04b6a93f795010d6eb01d8c5ca Mon Sep 17 00:00:00 2001 From: Markus Metzger Date: Fri, 13 Mar 2009 10:50:27 +0100 Subject: x86, hw-branch-tracer: add selftest Add a selftest for the hw-branch-tracer. Signed-off-by: Markus Metzger LKML-Reference: <20090313105027.A30183@sedona.ch.intel.com> Signed-off-by: Ingo Molnar --- kernel/trace/trace.h | 2 ++ kernel/trace/trace_hw_branches.c | 5 +++- kernel/trace/trace_selftest.c | 53 ++++++++++++++++++++++++++++++++++++++++ 3 files changed, 59 insertions(+), 1 deletion(-) (limited to 'kernel/trace') diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 56ce34d90b03..e7fbc826f1e9 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -576,6 +576,8 @@ extern int trace_selftest_startup_sysprof(struct tracer *trace, struct trace_array *tr); extern int trace_selftest_startup_branch(struct tracer *trace, struct trace_array *tr); +extern int trace_selftest_startup_hw_branches(struct tracer *trace, + struct trace_array *tr); #endif /* CONFIG_FTRACE_STARTUP_TEST */ extern void *head_page(struct trace_array_cpu *data); diff --git a/kernel/trace/trace_hw_branches.c b/kernel/trace/trace_hw_branches.c index a99a04c5e9cd..4ca82700c04e 100644 --- a/kernel/trace/trace_hw_branches.c +++ b/kernel/trace/trace_hw_branches.c @@ -342,7 +342,10 @@ struct tracer bts_tracer __read_mostly = .start = bts_trace_start, .stop = bts_trace_stop, .open = trace_bts_prepare, - .close = trace_bts_close + .close = trace_bts_close, +#ifdef CONFIG_FTRACE_SELFTEST + .selftest = trace_selftest_startup_hw_branches, +#endif /* CONFIG_FTRACE_SELFTEST */ }; __init static int init_bts_trace(void) diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c index f907a2b29028..3c7b797d0d28 100644 --- a/kernel/trace/trace_selftest.c +++ b/kernel/trace/trace_selftest.c @@ -16,6 +16,7 @@ static inline int trace_valid_entry(struct trace_entry *entry) case TRACE_BRANCH: case TRACE_GRAPH_ENT: case TRACE_GRAPH_RET: + case TRACE_HW_BRANCHES: return 1; } return 0; @@ -691,3 +692,55 @@ trace_selftest_startup_branch(struct tracer *trace, struct trace_array *tr) return ret; } #endif /* CONFIG_BRANCH_TRACER */ + +#ifdef CONFIG_HW_BRANCH_TRACER +int +trace_selftest_startup_hw_branches(struct tracer *trace, + struct trace_array *tr) +{ + unsigned long count; + int ret; + struct trace_iterator iter; + struct tracer tracer; + + if (!trace->open) { + printk(KERN_CONT "missing open function..."); + return -1; + } + + ret = tracer_init(trace, tr); + if (ret) { + warn_failed_init_tracer(trace, ret); + return ret; + } + + /* + * The hw-branch tracer needs to collect the trace from the various + * cpu trace buffers - before tracing is stopped. + */ + memset(&iter, 0, sizeof(iter)); + memcpy(&tracer, trace, sizeof(tracer)); + + iter.trace = &tracer; + iter.tr = tr; + iter.pos = -1; + mutex_init(&iter.mutex); + + trace->open(&iter); + + mutex_destroy(&iter.mutex); + + tracing_stop(); + + ret = trace_test_buffer(tr, &count); + trace->reset(tr); + tracing_start(); + + if (!ret && !count) { + printk(KERN_CONT "no entries found.."); + ret = -1; + } + + return ret; +} +#endif /* CONFIG_HW_BRANCH_TRACER */ -- cgit v1.2.3 From e9a22d1fb94050b7d600019c32e6b672d539054b Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 13 Mar 2009 11:54:40 +0100 Subject: x86, bts: cleanups Impact: cleanup, no code changed Cc: Markus Metzger LKML-Reference: <20090313104218.A30096@sedona.ch.intel.com> Signed-off-by: Ingo Molnar --- arch/x86/kernel/ds.c | 142 ++++++++++++++++++++++----------------- arch/x86/kernel/ds_selftest.h | 2 +- kernel/trace/trace_hw_branches.c | 6 +- kernel/trace/trace_selftest.c | 5 +- 4 files changed, 87 insertions(+), 68 deletions(-) (limited to 'kernel/trace') diff --git a/arch/x86/kernel/ds.c b/arch/x86/kernel/ds.c index d9cab7168058..7363e01ba082 100644 --- a/arch/x86/kernel/ds.c +++ b/arch/x86/kernel/ds.c @@ -19,43 +19,52 @@ * Markus Metzger , 2007-2009 */ - -#include - -#include +#include #include -#include +#include #include +#include #include -#include + +#include #include "ds_selftest.h" /* - * The configuration for a particular DS hardware implementation. + * The configuration for a particular DS hardware implementation: */ struct ds_configuration { - /* The name of the configuration. */ - const char *name; - /* The size of pointer-typed fields in DS, BTS, and PEBS. */ - unsigned char sizeof_ptr_field; - /* The size of a BTS/PEBS record in bytes. */ - unsigned char sizeof_rec[2]; - /* Control bit-masks indexed by enum ds_feature. */ - unsigned long ctl[dsf_ctl_max]; + /* The name of the configuration: */ + const char *name; + + /* The size of pointer-typed fields in DS, BTS, and PEBS: */ + unsigned char sizeof_ptr_field; + + /* The size of a BTS/PEBS record in bytes: */ + unsigned char sizeof_rec[2]; + + /* Control bit-masks indexed by enum ds_feature: */ + unsigned long ctl[dsf_ctl_max]; }; static DEFINE_PER_CPU(struct ds_configuration, ds_cfg_array); #define ds_cfg per_cpu(ds_cfg_array, smp_processor_id()) -#define MAX_SIZEOF_DS (12 * 8) /* Maximal size of a DS configuration. */ -#define MAX_SIZEOF_BTS (3 * 8) /* Maximal size of a BTS record. */ -#define DS_ALIGNMENT (1 << 3) /* BTS and PEBS buffer alignment. */ +/* Maximal size of a DS configuration: */ +#define MAX_SIZEOF_DS (12 * 8) -#define BTS_CONTROL \ - (ds_cfg.ctl[dsf_bts] | ds_cfg.ctl[dsf_bts_kernel] | ds_cfg.ctl[dsf_bts_user] |\ - ds_cfg.ctl[dsf_bts_overflow]) +/* Maximal size of a BTS record: */ +#define MAX_SIZEOF_BTS (3 * 8) +/* BTS and PEBS buffer alignment: */ +#define DS_ALIGNMENT (1 << 3) + +/* Mask of control bits in the DS MSR register: */ +#define BTS_CONTROL \ + ( ds_cfg.ctl[dsf_bts] | \ + ds_cfg.ctl[dsf_bts_kernel] | \ + ds_cfg.ctl[dsf_bts_user] | \ + ds_cfg.ctl[dsf_bts_overflow] ) /* * A BTS or PEBS tracer. @@ -65,28 +74,32 @@ static DEFINE_PER_CPU(struct ds_configuration, ds_cfg_array); */ struct ds_tracer { /* The DS context (partially) owned by this tracer. */ - struct ds_context *context; + struct ds_context *context; /* The buffer provided on ds_request() and its size in bytes. */ - void *buffer; - size_t size; + void *buffer; + size_t size; }; struct bts_tracer { - /* The common DS part. */ - struct ds_tracer ds; - /* The trace including the DS configuration. */ - struct bts_trace trace; - /* Buffer overflow notification function. */ - bts_ovfl_callback_t ovfl; + /* The common DS part: */ + struct ds_tracer ds; + + /* The trace including the DS configuration: */ + struct bts_trace trace; + + /* Buffer overflow notification function: */ + bts_ovfl_callback_t ovfl; }; struct pebs_tracer { - /* The common DS part. */ - struct ds_tracer ds; - /* The trace including the DS configuration. */ - struct pebs_trace trace; - /* Buffer overflow notification function. */ - pebs_ovfl_callback_t ovfl; + /* The common DS part: */ + struct ds_tracer ds; + + /* The trace including the DS configuration: */ + struct pebs_trace trace; + + /* Buffer overflow notification function: */ + pebs_ovfl_callback_t ovfl; }; /* @@ -95,6 +108,7 @@ struct pebs_tracer { * * The DS configuration consists of the following fields; different * architetures vary in the size of those fields. + * * - double-word aligned base linear address of the BTS buffer * - write pointer into the BTS buffer * - end linear address of the BTS buffer (one byte beyond the end of @@ -133,19 +147,20 @@ enum ds_field { }; enum ds_qualifier { - ds_bts = 0, + ds_bts = 0, ds_pebs }; -static inline unsigned long ds_get(const unsigned char *base, - enum ds_qualifier qual, enum ds_field field) +static inline unsigned long +ds_get(const unsigned char *base, enum ds_qualifier qual, enum ds_field field) { base += (ds_cfg.sizeof_ptr_field * (field + (4 * qual))); return *(unsigned long *)base; } -static inline void ds_set(unsigned char *base, enum ds_qualifier qual, - enum ds_field field, unsigned long value) +static inline void +ds_set(unsigned char *base, enum ds_qualifier qual, enum ds_field field, + unsigned long value) { base += (ds_cfg.sizeof_ptr_field * (field + (4 * qual))); (*(unsigned long *)base) = value; @@ -157,7 +172,6 @@ static inline void ds_set(unsigned char *base, enum ds_qualifier qual, */ static DEFINE_SPINLOCK(ds_lock); - /* * We either support (system-wide) per-cpu or per-thread allocation. * We distinguish the two based on the task_struct pointer, where a @@ -211,17 +225,21 @@ static inline int check_tracer(struct task_struct *task) * deallocated when the last user puts the context. */ struct ds_context { - /* The DS configuration; goes into MSR_IA32_DS_AREA. */ - unsigned char ds[MAX_SIZEOF_DS]; - /* The owner of the BTS and PEBS configuration, respectively. */ - struct bts_tracer *bts_master; - struct pebs_tracer *pebs_master; - /* Use count. */ + /* The DS configuration; goes into MSR_IA32_DS_AREA: */ + unsigned char ds[MAX_SIZEOF_DS]; + + /* The owner of the BTS and PEBS configuration, respectively: */ + struct bts_tracer *bts_master; + struct pebs_tracer *pebs_master; + + /* Use count: */ unsigned long count; - /* Pointer to the context pointer field. */ - struct ds_context **this; - /* The traced task; NULL for current cpu. */ - struct task_struct *task; + + /* Pointer to the context pointer field: */ + struct ds_context **this; + + /* The traced task; NULL for current cpu: */ + struct task_struct *task; }; static DEFINE_PER_CPU(struct ds_context *, system_context_array); @@ -328,9 +346,9 @@ static void ds_overflow(struct ds_context *context, enum ds_qualifier qual) * The remainder of any partially written record is zeroed out. * * context: the DS context - * qual: the buffer type - * record: the data to write - * size: the size of the data + * qual: the buffer type + * record: the data to write + * size: the size of the data */ static int ds_write(struct ds_context *context, enum ds_qualifier qual, const void *record, size_t size) @@ -429,12 +447,12 @@ enum bts_field { bts_to, bts_flags, - bts_qual = bts_from, - bts_jiffies = bts_to, - bts_pid = bts_flags, + bts_qual = bts_from, + bts_jiffies = bts_to, + bts_pid = bts_flags, - bts_qual_mask = (bts_qual_max - 1), - bts_escape = ((unsigned long)-1 & ~bts_qual_mask) + bts_qual_mask = (bts_qual_max - 1), + bts_escape = ((unsigned long)-1 & ~bts_qual_mask) }; static inline unsigned long bts_get(const char *base, enum bts_field field) @@ -461,8 +479,8 @@ static inline void bts_set(char *base, enum bts_field field, unsigned long val) * * return: bytes read/written on success; -Eerrno, otherwise */ -static int bts_read(struct bts_tracer *tracer, const void *at, - struct bts_struct *out) +static int +bts_read(struct bts_tracer *tracer, const void *at, struct bts_struct *out) { if (!tracer) return -EINVAL; diff --git a/arch/x86/kernel/ds_selftest.h b/arch/x86/kernel/ds_selftest.h index 0e6e19d4c7d2..2ba8745c6663 100644 --- a/arch/x86/kernel/ds_selftest.h +++ b/arch/x86/kernel/ds_selftest.h @@ -12,4 +12,4 @@ extern int ds_selftest_pebs(void); #else static inline int ds_selftest_bts(void) { return 0; } static inline int ds_selftest_pebs(void) { return 0; } -#endif /* CONFIG_X86_DS_SELFTEST */ +#endif diff --git a/kernel/trace/trace_hw_branches.c b/kernel/trace/trace_hw_branches.c index 4ca82700c04e..8b2109a6c61c 100644 --- a/kernel/trace/trace_hw_branches.c +++ b/kernel/trace/trace_hw_branches.c @@ -1,5 +1,5 @@ /* - * h/w branch tracer for x86 based on bts + * h/w branch tracer for x86 based on BTS * * Copyright (C) 2008-2009 Intel Corporation. * Markus Metzger , 2008-2009 @@ -15,8 +15,8 @@ #include -#include "trace.h" #include "trace_output.h" +#include "trace.h" #define BTS_BUFFER_SIZE (1 << 13) @@ -197,10 +197,10 @@ static void bts_trace_print_header(struct seq_file *m) static enum print_line_t bts_trace_print_line(struct trace_iterator *iter) { + unsigned long symflags = TRACE_ITER_SYM_OFFSET; struct trace_entry *entry = iter->ent; struct trace_seq *seq = &iter->seq; struct hw_branch_entry *it; - unsigned long symflags = TRACE_ITER_SYM_OFFSET; trace_assign_type(it, entry); diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c index 3c7b797d0d28..b91091267067 100644 --- a/kernel/trace/trace_selftest.c +++ b/kernel/trace/trace_selftest.c @@ -189,6 +189,7 @@ int trace_selftest_startup_dynamic_tracing(struct tracer *trace, #else # define trace_selftest_startup_dynamic_tracing(trace, tr, func) ({ 0; }) #endif /* CONFIG_DYNAMIC_FTRACE */ + /* * Simple verification test of ftrace function tracer. * Enable ftrace, sleep 1/10 second, and then read the trace @@ -698,10 +699,10 @@ int trace_selftest_startup_hw_branches(struct tracer *trace, struct trace_array *tr) { - unsigned long count; - int ret; struct trace_iterator iter; struct tracer tracer; + unsigned long count; + int ret; if (!trace->open) { printk(KERN_CONT "missing open function..."); -- cgit v1.2.3 From de79f54f5347ad7ec6ff55ccbb6d4ab2a21f6a93 Mon Sep 17 00:00:00 2001 From: Markus Metzger Date: Fri, 3 Apr 2009 16:43:40 +0200 Subject: x86, bts, hw-branch-tracer: add _noirq variants to the debug store interface The hw-branch-tracer uses debug store functions from an on_each_cpu() context, which is simply wrong since the functions may sleep. Add _noirq variants for most functions, which may be called with interrupts disabled. Separate per-cpu and per-task tracing and allow per-cpu tracing to be controlled from any cpu. Make the hw-branch-tracer use the new debug store interface, synchronize with hotplug cpu event using get/put_online_cpus(), and remove the unnecessary spinlock. Make the ptrace bts and the ds selftest code use the new interface. Defer the ds selftest. Signed-off-by: Markus Metzger Cc: roland@redhat.com Cc: eranian@googlemail.com Cc: oleg@redhat.com Cc: juan.villacis@intel.com Cc: ak@linux.jf.intel.com LKML-Reference: <20090403144555.658136000@intel.com> Signed-off-by: Ingo Molnar --- arch/x86/include/asm/ds.h | 57 ++++- arch/x86/kernel/ds.c | 474 ++++++++++++++++++++++++++++++--------- arch/x86/kernel/ds_selftest.c | 9 +- arch/x86/kernel/ptrace.c | 5 +- kernel/trace/trace_hw_branches.c | 193 ++++++---------- 5 files changed, 492 insertions(+), 246 deletions(-) (limited to 'kernel/trace') diff --git a/arch/x86/include/asm/ds.h b/arch/x86/include/asm/ds.h index 772f141afb9a..413e127e567d 100644 --- a/arch/x86/include/asm/ds.h +++ b/arch/x86/include/asm/ds.h @@ -15,8 +15,8 @@ * - buffer allocation (memory accounting) * * - * Copyright (C) 2007-2008 Intel Corporation. - * Markus Metzger , 2007-2008 + * Copyright (C) 2007-2009 Intel Corporation. + * Markus Metzger , 2007-2009 */ #ifndef _ASM_X86_DS_H @@ -83,8 +83,10 @@ enum ds_feature { * The interrupt threshold is independent from the overflow callback * to allow users to use their own overflow interrupt handling mechanism. * - * task: the task to request recording for; - * NULL for per-cpu recording on the current cpu + * The function might sleep. + * + * task: the task to request recording for + * cpu: the cpu to request recording for * base: the base pointer for the (non-pageable) buffer; * size: the size of the provided buffer in bytes * ovfl: pointer to a function to be called on buffer overflow; @@ -93,19 +95,28 @@ enum ds_feature { * -1 if no interrupt threshold is requested. * flags: a bit-mask of the above flags */ -extern struct bts_tracer *ds_request_bts(struct task_struct *task, - void *base, size_t size, - bts_ovfl_callback_t ovfl, - size_t th, unsigned int flags); -extern struct pebs_tracer *ds_request_pebs(struct task_struct *task, - void *base, size_t size, - pebs_ovfl_callback_t ovfl, - size_t th, unsigned int flags); +extern struct bts_tracer *ds_request_bts_task(struct task_struct *task, + void *base, size_t size, + bts_ovfl_callback_t ovfl, + size_t th, unsigned int flags); +extern struct bts_tracer *ds_request_bts_cpu(int cpu, void *base, size_t size, + bts_ovfl_callback_t ovfl, + size_t th, unsigned int flags); +extern struct pebs_tracer *ds_request_pebs_task(struct task_struct *task, + void *base, size_t size, + pebs_ovfl_callback_t ovfl, + size_t th, unsigned int flags); +extern struct pebs_tracer *ds_request_pebs_cpu(int cpu, + void *base, size_t size, + pebs_ovfl_callback_t ovfl, + size_t th, unsigned int flags); /* * Release BTS or PEBS resources * Suspend and resume BTS or PEBS tracing * + * Must be called with irq's enabled. + * * tracer: the tracer handle returned from ds_request_~() */ extern void ds_release_bts(struct bts_tracer *tracer); @@ -115,6 +126,28 @@ extern void ds_release_pebs(struct pebs_tracer *tracer); extern void ds_suspend_pebs(struct pebs_tracer *tracer); extern void ds_resume_pebs(struct pebs_tracer *tracer); +/* + * Release BTS or PEBS resources + * Suspend and resume BTS or PEBS tracing + * + * Cpu tracers must call this on the traced cpu. + * Task tracers must call ds_release_~_noirq() for themselves. + * + * May be called with irq's disabled. + * + * Returns 0 if successful; + * -EPERM if the cpu tracer does not trace the current cpu. + * -EPERM if the task tracer does not trace itself. + * + * tracer: the tracer handle returned from ds_request_~() + */ +extern int ds_release_bts_noirq(struct bts_tracer *tracer); +extern int ds_suspend_bts_noirq(struct bts_tracer *tracer); +extern int ds_resume_bts_noirq(struct bts_tracer *tracer); +extern int ds_release_pebs_noirq(struct pebs_tracer *tracer); +extern int ds_suspend_pebs_noirq(struct pebs_tracer *tracer); +extern int ds_resume_pebs_noirq(struct pebs_tracer *tracer); + /* * The raw DS buffer state as it is used for BTS and PEBS recording. diff --git a/arch/x86/kernel/ds.c b/arch/x86/kernel/ds.c index 2071b992c35c..21a3852abf68 100644 --- a/arch/x86/kernel/ds.c +++ b/arch/x86/kernel/ds.c @@ -245,60 +245,50 @@ struct ds_context { struct pebs_tracer *pebs_master; /* Use count: */ - unsigned long count; + unsigned long count; /* Pointer to the context pointer field: */ struct ds_context **this; - /* The traced task; NULL for current cpu: */ + /* The traced task; NULL for cpu tracing: */ struct task_struct *task; -}; -static DEFINE_PER_CPU(struct ds_context *, system_context_array); + /* The traced cpu; only valid if task is NULL: */ + int cpu; +}; -#define system_context per_cpu(system_context_array, smp_processor_id()) +static DEFINE_PER_CPU(struct ds_context *, cpu_context); -static inline struct ds_context *ds_get_context(struct task_struct *task) +static struct ds_context *ds_get_context(struct task_struct *task, int cpu) { struct ds_context **p_context = - (task ? &task->thread.ds_ctx : &system_context); + (task ? &task->thread.ds_ctx : &per_cpu(cpu_context, cpu)); struct ds_context *context = NULL; struct ds_context *new_context = NULL; - unsigned long irq; - /* - * Chances are small that we already have a context. - * - * Contexts for per-cpu tracing are allocated using - * smp_call_function(). We must not sleep. - */ - new_context = kzalloc(sizeof(*new_context), GFP_ATOMIC); + /* Chances are small that we already have a context. */ + new_context = kzalloc(sizeof(*new_context), GFP_KERNEL); if (!new_context) return NULL; - spin_lock_irqsave(&ds_lock, irq); + spin_lock_irq(&ds_lock); context = *p_context; - if (!context) { + if (likely(!context)) { context = new_context; context->this = p_context; context->task = task; + context->cpu = cpu; context->count = 0; - if (task) - set_tsk_thread_flag(task, TIF_DS_AREA_MSR); - - if (!task || (task == current)) - wrmsrl(MSR_IA32_DS_AREA, (unsigned long)context->ds); - *p_context = context; } context->count++; - spin_unlock_irqrestore(&ds_lock, irq); + spin_unlock_irq(&ds_lock); if (context != new_context) kfree(new_context); @@ -306,7 +296,7 @@ static inline struct ds_context *ds_get_context(struct task_struct *task) return context; } -static inline void ds_put_context(struct ds_context *context) +static void ds_put_context(struct ds_context *context) { struct task_struct *task; unsigned long irq; @@ -328,8 +318,15 @@ static inline void ds_put_context(struct ds_context *context) if (task) clear_tsk_thread_flag(task, TIF_DS_AREA_MSR); - if (!task || (task == current)) - wrmsrl(MSR_IA32_DS_AREA, 0); + /* + * We leave the (now dangling) pointer to the DS configuration in + * the DS_AREA msr. This is as good or as bad as replacing it with + * NULL - the hardware would crash if we enabled tracing. + * + * This saves us some problems with having to write an msr on a + * different cpu while preventing others from doing the same for the + * next context for that same cpu. + */ spin_unlock_irqrestore(&ds_lock, irq); @@ -340,6 +337,31 @@ static inline void ds_put_context(struct ds_context *context) kfree(context); } +static void ds_install_ds_area(struct ds_context *context) +{ + unsigned long ds; + + ds = (unsigned long)context->ds; + + /* + * There is a race between the bts master and the pebs master. + * + * The thread/cpu access is synchronized via get/put_cpu() for + * task tracing and via wrmsr_on_cpu for cpu tracing. + * + * If bts and pebs are collected for the same task or same cpu, + * the same confiuration is written twice. + */ + if (context->task) { + get_cpu(); + if (context->task == current) + wrmsrl(MSR_IA32_DS_AREA, ds); + set_tsk_thread_flag(context->task, TIF_DS_AREA_MSR); + put_cpu(); + } else + wrmsr_on_cpu(context->cpu, MSR_IA32_DS_AREA, + (u32)((u64)ds), (u32)((u64)ds >> 32)); +} /* * Call the tracer's callback on a buffer overflow. @@ -622,6 +644,7 @@ static void ds_init_ds_trace(struct ds_trace *trace, enum ds_qualifier qual, * The value for 'no threshold' is -1, which will set the * threshold outside of the buffer, just like we want it. */ + ith *= ds_cfg.sizeof_rec[qual]; trace->ith = (void *)(buffer + size - ith); trace->flags = flags; @@ -630,7 +653,7 @@ static void ds_init_ds_trace(struct ds_trace *trace, enum ds_qualifier qual, static int ds_request(struct ds_tracer *tracer, struct ds_trace *trace, enum ds_qualifier qual, struct task_struct *task, - void *base, size_t size, size_t th, unsigned int flags) + int cpu, void *base, size_t size, size_t th) { struct ds_context *context; int error; @@ -643,7 +666,7 @@ static int ds_request(struct ds_tracer *tracer, struct ds_trace *trace, if (!base) goto out; - /* We require some space to do alignment adjustments below. */ + /* We need space for alignment adjustments in ds_init_ds_trace(). */ error = -EINVAL; if (size < (DS_ALIGNMENT + ds_cfg.sizeof_rec[qual])) goto out; @@ -660,25 +683,27 @@ static int ds_request(struct ds_tracer *tracer, struct ds_trace *trace, tracer->size = size; error = -ENOMEM; - context = ds_get_context(task); + context = ds_get_context(task, cpu); if (!context) goto out; tracer->context = context; - ds_init_ds_trace(trace, qual, base, size, th, flags); + /* + * Defer any tracer-specific initialization work for the context until + * context ownership has been clarified. + */ error = 0; out: return error; } -struct bts_tracer *ds_request_bts(struct task_struct *task, - void *base, size_t size, - bts_ovfl_callback_t ovfl, size_t th, - unsigned int flags) +static struct bts_tracer *ds_request_bts(struct task_struct *task, int cpu, + void *base, size_t size, + bts_ovfl_callback_t ovfl, size_t th, + unsigned int flags) { struct bts_tracer *tracer; - unsigned long irq; int error; /* Buffer overflow notification is not yet implemented. */ @@ -690,42 +715,46 @@ struct bts_tracer *ds_request_bts(struct task_struct *task, if (error < 0) goto out; - /* - * Per-cpu tracing is typically requested using smp_call_function(). - * We must not sleep. - */ error = -ENOMEM; - tracer = kzalloc(sizeof(*tracer), GFP_ATOMIC); + tracer = kzalloc(sizeof(*tracer), GFP_KERNEL); if (!tracer) goto out_put_tracer; tracer->ovfl = ovfl; + /* Do some more error checking and acquire a tracing context. */ error = ds_request(&tracer->ds, &tracer->trace.ds, - ds_bts, task, base, size, th, flags); + ds_bts, task, cpu, base, size, th); if (error < 0) goto out_tracer; - - spin_lock_irqsave(&ds_lock, irq); + /* Claim the bts part of the tracing context we acquired above. */ + spin_lock_irq(&ds_lock); error = -EPERM; if (tracer->ds.context->bts_master) goto out_unlock; tracer->ds.context->bts_master = tracer; - spin_unlock_irqrestore(&ds_lock, irq); + spin_unlock_irq(&ds_lock); + /* + * Now that we own the bts part of the context, let's complete the + * initialization for that part. + */ + ds_init_ds_trace(&tracer->trace.ds, ds_bts, base, size, th, flags); + ds_write_config(tracer->ds.context, &tracer->trace.ds, ds_bts); + ds_install_ds_area(tracer->ds.context); tracer->trace.read = bts_read; tracer->trace.write = bts_write; - ds_write_config(tracer->ds.context, &tracer->trace.ds, ds_bts); + /* Start tracing. */ ds_resume_bts(tracer); return tracer; out_unlock: - spin_unlock_irqrestore(&ds_lock, irq); + spin_unlock_irq(&ds_lock); ds_put_context(tracer->ds.context); out_tracer: kfree(tracer); @@ -735,13 +764,27 @@ struct bts_tracer *ds_request_bts(struct task_struct *task, return ERR_PTR(error); } -struct pebs_tracer *ds_request_pebs(struct task_struct *task, - void *base, size_t size, - pebs_ovfl_callback_t ovfl, size_t th, - unsigned int flags) +struct bts_tracer *ds_request_bts_task(struct task_struct *task, + void *base, size_t size, + bts_ovfl_callback_t ovfl, + size_t th, unsigned int flags) +{ + return ds_request_bts(task, 0, base, size, ovfl, th, flags); +} + +struct bts_tracer *ds_request_bts_cpu(int cpu, void *base, size_t size, + bts_ovfl_callback_t ovfl, + size_t th, unsigned int flags) +{ + return ds_request_bts(NULL, cpu, base, size, ovfl, th, flags); +} + +static struct pebs_tracer *ds_request_pebs(struct task_struct *task, int cpu, + void *base, size_t size, + pebs_ovfl_callback_t ovfl, size_t th, + unsigned int flags) { struct pebs_tracer *tracer; - unsigned long irq; int error; /* Buffer overflow notification is not yet implemented. */ @@ -753,37 +796,43 @@ struct pebs_tracer *ds_request_pebs(struct task_struct *task, if (error < 0) goto out; - /* - * Per-cpu tracing is typically requested using smp_call_function(). - * We must not sleep. - */ error = -ENOMEM; - tracer = kzalloc(sizeof(*tracer), GFP_ATOMIC); + tracer = kzalloc(sizeof(*tracer), GFP_KERNEL); if (!tracer) goto out_put_tracer; tracer->ovfl = ovfl; + /* Do some more error checking and acquire a tracing context. */ error = ds_request(&tracer->ds, &tracer->trace.ds, - ds_pebs, task, base, size, th, flags); + ds_pebs, task, cpu, base, size, th); if (error < 0) goto out_tracer; - spin_lock_irqsave(&ds_lock, irq); + /* Claim the pebs part of the tracing context we acquired above. */ + spin_lock_irq(&ds_lock); error = -EPERM; if (tracer->ds.context->pebs_master) goto out_unlock; tracer->ds.context->pebs_master = tracer; - spin_unlock_irqrestore(&ds_lock, irq); + spin_unlock_irq(&ds_lock); + /* + * Now that we own the pebs part of the context, let's complete the + * initialization for that part. + */ + ds_init_ds_trace(&tracer->trace.ds, ds_pebs, base, size, th, flags); ds_write_config(tracer->ds.context, &tracer->trace.ds, ds_pebs); + ds_install_ds_area(tracer->ds.context); + + /* Start tracing. */ ds_resume_pebs(tracer); return tracer; out_unlock: - spin_unlock_irqrestore(&ds_lock, irq); + spin_unlock_irq(&ds_lock); ds_put_context(tracer->ds.context); out_tracer: kfree(tracer); @@ -793,16 +842,26 @@ struct pebs_tracer *ds_request_pebs(struct task_struct *task, return ERR_PTR(error); } -void ds_release_bts(struct bts_tracer *tracer) +struct pebs_tracer *ds_request_pebs_task(struct task_struct *task, + void *base, size_t size, + pebs_ovfl_callback_t ovfl, + size_t th, unsigned int flags) { - struct task_struct *task; + return ds_request_pebs(task, 0, base, size, ovfl, th, flags); +} - if (!tracer) - return; +struct pebs_tracer *ds_request_pebs_cpu(int cpu, void *base, size_t size, + pebs_ovfl_callback_t ovfl, + size_t th, unsigned int flags) +{ + return ds_request_pebs(NULL, cpu, base, size, ovfl, th, flags); +} - task = tracer->ds.context->task; +static void ds_free_bts(struct bts_tracer *tracer) +{ + struct task_struct *task; - ds_suspend_bts(tracer); + task = tracer->ds.context->task; WARN_ON_ONCE(tracer->ds.context->bts_master != tracer); tracer->ds.context->bts_master = NULL; @@ -817,9 +876,69 @@ void ds_release_bts(struct bts_tracer *tracer) kfree(tracer); } +void ds_release_bts(struct bts_tracer *tracer) +{ + might_sleep(); + + if (!tracer) + return; + + ds_suspend_bts(tracer); + ds_free_bts(tracer); +} + +int ds_release_bts_noirq(struct bts_tracer *tracer) +{ + struct task_struct *task; + unsigned long irq; + int error; + + if (!tracer) + return 0; + + task = tracer->ds.context->task; + + local_irq_save(irq); + + error = -EPERM; + if (!task && + (tracer->ds.context->cpu != smp_processor_id())) + goto out; + + error = -EPERM; + if (task && (task != current)) + goto out; + + ds_suspend_bts_noirq(tracer); + ds_free_bts(tracer); + + error = 0; + out: + local_irq_restore(irq); + return error; +} + +static void update_task_debugctlmsr(struct task_struct *task, + unsigned long debugctlmsr) +{ + task->thread.debugctlmsr = debugctlmsr; + + get_cpu(); + if (task == current) + update_debugctlmsr(debugctlmsr); + + if (task->thread.debugctlmsr) + set_tsk_thread_flag(task, TIF_DEBUGCTLMSR); + else + clear_tsk_thread_flag(task, TIF_DEBUGCTLMSR); + put_cpu(); +} + void ds_suspend_bts(struct bts_tracer *tracer) { struct task_struct *task; + unsigned long debugctlmsr; + int cpu; if (!tracer) return; @@ -827,29 +946,60 @@ void ds_suspend_bts(struct bts_tracer *tracer) tracer->flags = 0; task = tracer->ds.context->task; + cpu = tracer->ds.context->cpu; - if (!task || (task == current)) - update_debugctlmsr(get_debugctlmsr() & ~BTS_CONTROL); + WARN_ON(!task && irqs_disabled()); - if (task) { - task->thread.debugctlmsr &= ~BTS_CONTROL; + debugctlmsr = (task ? + task->thread.debugctlmsr : + get_debugctlmsr_on_cpu(cpu)); + debugctlmsr &= ~BTS_CONTROL; - if (!task->thread.debugctlmsr) - clear_tsk_thread_flag(task, TIF_DEBUGCTLMSR); - } + if (task) + update_task_debugctlmsr(task, debugctlmsr); + else + update_debugctlmsr_on_cpu(cpu, debugctlmsr); } -void ds_resume_bts(struct bts_tracer *tracer) +int ds_suspend_bts_noirq(struct bts_tracer *tracer) { struct task_struct *task; - unsigned long control; + unsigned long debugctlmsr, irq; + int cpu, error = 0; if (!tracer) - return; + return 0; - tracer->flags = tracer->trace.ds.flags; + tracer->flags = 0; task = tracer->ds.context->task; + cpu = tracer->ds.context->cpu; + + local_irq_save(irq); + + error = -EPERM; + if (!task && (cpu != smp_processor_id())) + goto out; + + debugctlmsr = (task ? + task->thread.debugctlmsr : + get_debugctlmsr()); + debugctlmsr &= ~BTS_CONTROL; + + if (task) + update_task_debugctlmsr(task, debugctlmsr); + else + update_debugctlmsr(debugctlmsr); + + error = 0; + out: + local_irq_restore(irq); + return error; +} + +static unsigned long ds_bts_control(struct bts_tracer *tracer) +{ + unsigned long control; control = ds_cfg.ctl[dsf_bts]; if (!(tracer->trace.ds.flags & BTS_KERNEL)) @@ -857,25 +1007,77 @@ void ds_resume_bts(struct bts_tracer *tracer) if (!(tracer->trace.ds.flags & BTS_USER)) control |= ds_cfg.ctl[dsf_bts_user]; - if (task) { - task->thread.debugctlmsr |= control; - set_tsk_thread_flag(task, TIF_DEBUGCTLMSR); - } - - if (!task || (task == current)) - update_debugctlmsr(get_debugctlmsr() | control); + return control; } -void ds_release_pebs(struct pebs_tracer *tracer) +void ds_resume_bts(struct bts_tracer *tracer) { struct task_struct *task; + unsigned long debugctlmsr; + int cpu; if (!tracer) return; + tracer->flags = tracer->trace.ds.flags; + task = tracer->ds.context->task; + cpu = tracer->ds.context->cpu; - ds_suspend_pebs(tracer); + WARN_ON(!task && irqs_disabled()); + + debugctlmsr = (task ? + task->thread.debugctlmsr : + get_debugctlmsr_on_cpu(cpu)); + debugctlmsr |= ds_bts_control(tracer); + + if (task) + update_task_debugctlmsr(task, debugctlmsr); + else + update_debugctlmsr_on_cpu(cpu, debugctlmsr); +} + +int ds_resume_bts_noirq(struct bts_tracer *tracer) +{ + struct task_struct *task; + unsigned long debugctlmsr, irq; + int cpu, error = 0; + + if (!tracer) + return 0; + + tracer->flags = tracer->trace.ds.flags; + + task = tracer->ds.context->task; + cpu = tracer->ds.context->cpu; + + local_irq_save(irq); + + error = -EPERM; + if (!task && (cpu != smp_processor_id())) + goto out; + + debugctlmsr = (task ? + task->thread.debugctlmsr : + get_debugctlmsr()); + debugctlmsr |= ds_bts_control(tracer); + + if (task) + update_task_debugctlmsr(task, debugctlmsr); + else + update_debugctlmsr(debugctlmsr); + + error = 0; + out: + local_irq_restore(irq); + return error; +} + +static void ds_free_pebs(struct pebs_tracer *tracer) +{ + struct task_struct *task; + + task = tracer->ds.context->task; WARN_ON_ONCE(tracer->ds.context->pebs_master != tracer); tracer->ds.context->pebs_master = NULL; @@ -886,16 +1088,68 @@ void ds_release_pebs(struct pebs_tracer *tracer) kfree(tracer); } +void ds_release_pebs(struct pebs_tracer *tracer) +{ + might_sleep(); + + if (!tracer) + return; + + ds_suspend_pebs(tracer); + ds_free_pebs(tracer); +} + +int ds_release_pebs_noirq(struct pebs_tracer *tracer) +{ + struct task_struct *task; + unsigned long irq; + int error; + + if (!tracer) + return 0; + + task = tracer->ds.context->task; + + local_irq_save(irq); + + error = -EPERM; + if (!task && + (tracer->ds.context->cpu != smp_processor_id())) + goto out; + + error = -EPERM; + if (task && (task != current)) + goto out; + + ds_suspend_pebs_noirq(tracer); + ds_free_pebs(tracer); + + error = 0; + out: + local_irq_restore(irq); + return error; +} + void ds_suspend_pebs(struct pebs_tracer *tracer) { } +int ds_suspend_pebs_noirq(struct pebs_tracer *tracer) +{ + return 0; +} + void ds_resume_pebs(struct pebs_tracer *tracer) { } +int ds_resume_pebs_noirq(struct pebs_tracer *tracer) +{ + return 0; +} + const struct bts_trace *ds_read_bts(struct bts_tracer *tracer) { if (!tracer) @@ -1004,26 +1258,6 @@ ds_configure(const struct ds_configuration *cfg, printk(KERN_INFO "[ds] pebs not available\n"); } - if (ds_cfg.sizeof_rec[ds_bts]) { - int error; - - error = ds_selftest_bts(); - if (error) { - WARN(1, "[ds] selftest failed. disabling bts.\n"); - ds_cfg.sizeof_rec[ds_bts] = 0; - } - } - - if (ds_cfg.sizeof_rec[ds_pebs]) { - int error; - - error = ds_selftest_pebs(); - if (error) { - WARN(1, "[ds] selftest failed. disabling pebs.\n"); - ds_cfg.sizeof_rec[ds_pebs] = 0; - } - } - printk(KERN_INFO "[ds] sizes: address: %u bit, ", 8 * ds_cfg.sizeof_ptr_field); printk("bts/pebs record: %u/%u bytes\n", @@ -1127,3 +1361,29 @@ void ds_copy_thread(struct task_struct *tsk, struct task_struct *father) void ds_exit_thread(struct task_struct *tsk) { } + +static __init int ds_selftest(void) +{ + if (ds_cfg.sizeof_rec[ds_bts]) { + int error; + + error = ds_selftest_bts(); + if (error) { + WARN(1, "[ds] selftest failed. disabling bts.\n"); + ds_cfg.sizeof_rec[ds_bts] = 0; + } + } + + if (ds_cfg.sizeof_rec[ds_pebs]) { + int error; + + error = ds_selftest_pebs(); + if (error) { + WARN(1, "[ds] selftest failed. disabling pebs.\n"); + ds_cfg.sizeof_rec[ds_pebs] = 0; + } + } + + return 0; +} +device_initcall(ds_selftest); diff --git a/arch/x86/kernel/ds_selftest.c b/arch/x86/kernel/ds_selftest.c index 8c46fbf38c46..e5a263c8a14c 100644 --- a/arch/x86/kernel/ds_selftest.c +++ b/arch/x86/kernel/ds_selftest.c @@ -10,11 +10,12 @@ #include #include +#include #include -#define DS_SELFTEST_BUFFER_SIZE 1021 /* Intentionally chose an odd size. */ +#define BUFFER_SIZE 1021 /* Intentionally chose an odd size. */ static int ds_selftest_bts_consistency(const struct bts_trace *trace) @@ -125,12 +126,12 @@ int ds_selftest_bts(void) struct bts_tracer *tracer; int error = 0; void *top; - unsigned char buffer[DS_SELFTEST_BUFFER_SIZE]; + unsigned char buffer[BUFFER_SIZE]; printk(KERN_INFO "[ds] bts selftest..."); - tracer = ds_request_bts(NULL, buffer, DS_SELFTEST_BUFFER_SIZE, - NULL, (size_t)-1, BTS_KERNEL); + tracer = ds_request_bts_cpu(smp_processor_id(), buffer, BUFFER_SIZE, + NULL, (size_t)-1, BTS_KERNEL); if (IS_ERR(tracer)) { error = PTR_ERR(tracer); tracer = NULL; diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index 7c21d1e8cae7..adbb24322d8f 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c @@ -800,8 +800,9 @@ static int ptrace_bts_config(struct task_struct *child, if (cfg.flags & PTRACE_BTS_O_SCHED) flags |= BTS_TIMESTAMPS; - context->tracer = ds_request_bts(child, context->buffer, context->size, - NULL, (size_t)-1, flags); + context->tracer = + ds_request_bts_task(child, context->buffer, context->size, + NULL, (size_t)-1, flags); if (unlikely(IS_ERR(context->tracer))) { int error = PTR_ERR(context->tracer); diff --git a/kernel/trace/trace_hw_branches.c b/kernel/trace/trace_hw_branches.c index 8b2109a6c61c..50565d8cd2ed 100644 --- a/kernel/trace/trace_hw_branches.c +++ b/kernel/trace/trace_hw_branches.c @@ -4,7 +4,6 @@ * Copyright (C) 2008-2009 Intel Corporation. * Markus Metzger , 2008-2009 */ -#include #include #include #include @@ -21,168 +20,113 @@ #define BTS_BUFFER_SIZE (1 << 13) -/* - * The tracer lock protects the below per-cpu tracer array. - * It needs to be held to: - * - start tracing on all cpus - * - stop tracing on all cpus - * - start tracing on a single hotplug cpu - * - stop tracing on a single hotplug cpu - * - read the trace from all cpus - * - read the trace from a single cpu - */ -static DEFINE_SPINLOCK(bts_tracer_lock); static DEFINE_PER_CPU(struct bts_tracer *, tracer); static DEFINE_PER_CPU(unsigned char[BTS_BUFFER_SIZE], buffer); #define this_tracer per_cpu(tracer, smp_processor_id()) -#define this_buffer per_cpu(buffer, smp_processor_id()) static int trace_hw_branches_enabled __read_mostly; static int trace_hw_branches_suspended __read_mostly; static struct trace_array *hw_branch_trace __read_mostly; -/* - * Initialize the tracer for the current cpu. - * The argument is ignored. - * - * pre: bts_tracer_lock must be locked. - */ -static void bts_trace_init_cpu(void *arg) +static void bts_trace_init_cpu(int cpu) { - if (this_tracer) - ds_release_bts(this_tracer); + per_cpu(tracer, cpu) = + ds_request_bts_cpu(cpu, per_cpu(buffer, cpu), BTS_BUFFER_SIZE, + NULL, (size_t)-1, BTS_KERNEL); - this_tracer = ds_request_bts(NULL, this_buffer, BTS_BUFFER_SIZE, - NULL, (size_t)-1, BTS_KERNEL); - if (IS_ERR(this_tracer)) { - this_tracer = NULL; - return; - } + if (IS_ERR(per_cpu(tracer, cpu))) + per_cpu(tracer, cpu) = NULL; } static int bts_trace_init(struct trace_array *tr) { - int cpu, avail; - - spin_lock(&bts_tracer_lock); + int cpu; hw_branch_trace = tr; + trace_hw_branches_enabled = 0; - on_each_cpu(bts_trace_init_cpu, NULL, 1); - - /* Check on how many cpus we could enable tracing */ - avail = 0; - for_each_online_cpu(cpu) - if (per_cpu(tracer, cpu)) - avail++; + get_online_cpus(); + for_each_online_cpu(cpu) { + bts_trace_init_cpu(cpu); - trace_hw_branches_enabled = (avail ? 1 : 0); + if (likely(per_cpu(tracer, cpu))) + trace_hw_branches_enabled = 1; + } trace_hw_branches_suspended = 0; - - spin_unlock(&bts_tracer_lock); - + put_online_cpus(); /* If we could not enable tracing on a single cpu, we fail. */ - return avail ? 0 : -EOPNOTSUPP; -} - -/* - * Release the tracer for the current cpu. - * The argument is ignored. - * - * pre: bts_tracer_lock must be locked. - */ -static void bts_trace_release_cpu(void *arg) -{ - if (this_tracer) { - ds_release_bts(this_tracer); - this_tracer = NULL; - } + return trace_hw_branches_enabled ? 0 : -EOPNOTSUPP; } static void bts_trace_reset(struct trace_array *tr) { - spin_lock(&bts_tracer_lock); + int cpu; - on_each_cpu(bts_trace_release_cpu, NULL, 1); + get_online_cpus(); + for_each_online_cpu(cpu) { + if (likely(per_cpu(tracer, cpu))) { + ds_release_bts(per_cpu(tracer, cpu)); + per_cpu(tracer, cpu) = NULL; + } + } trace_hw_branches_enabled = 0; trace_hw_branches_suspended = 0; - - spin_unlock(&bts_tracer_lock); -} - -/* - * Resume tracing on the current cpu. - * The argument is ignored. - * - * pre: bts_tracer_lock must be locked. - */ -static void bts_trace_resume_cpu(void *arg) -{ - if (this_tracer) - ds_resume_bts(this_tracer); + put_online_cpus(); } static void bts_trace_start(struct trace_array *tr) { - spin_lock(&bts_tracer_lock); + int cpu; - on_each_cpu(bts_trace_resume_cpu, NULL, 1); + get_online_cpus(); + for_each_online_cpu(cpu) + if (likely(per_cpu(tracer, cpu))) + ds_resume_bts(per_cpu(tracer, cpu)); trace_hw_branches_suspended = 0; - - spin_unlock(&bts_tracer_lock); -} - -/* - * Suspend tracing on the current cpu. - * The argument is ignored. - * - * pre: bts_tracer_lock must be locked. - */ -static void bts_trace_suspend_cpu(void *arg) -{ - if (this_tracer) - ds_suspend_bts(this_tracer); + put_online_cpus(); } static void bts_trace_stop(struct trace_array *tr) { - spin_lock(&bts_tracer_lock); + int cpu; - on_each_cpu(bts_trace_suspend_cpu, NULL, 1); + get_online_cpus(); + for_each_online_cpu(cpu) + if (likely(per_cpu(tracer, cpu))) + ds_suspend_bts(per_cpu(tracer, cpu)); trace_hw_branches_suspended = 1; - - spin_unlock(&bts_tracer_lock); + put_online_cpus(); } static int __cpuinit bts_hotcpu_handler(struct notifier_block *nfb, unsigned long action, void *hcpu) { - unsigned int cpu = (unsigned long)hcpu; - - spin_lock(&bts_tracer_lock); - - if (!trace_hw_branches_enabled) - goto out; + int cpu = (long)hcpu; switch (action) { case CPU_ONLINE: case CPU_DOWN_FAILED: - smp_call_function_single(cpu, bts_trace_init_cpu, NULL, 1); - - if (trace_hw_branches_suspended) - smp_call_function_single(cpu, bts_trace_suspend_cpu, - NULL, 1); + /* The notification is sent with interrupts enabled. */ + if (trace_hw_branches_enabled) { + bts_trace_init_cpu(cpu); + + if (trace_hw_branches_suspended && + likely(per_cpu(tracer, cpu))) + ds_suspend_bts(per_cpu(tracer, cpu)); + } break; + case CPU_DOWN_PREPARE: - smp_call_function_single(cpu, bts_trace_release_cpu, NULL, 1); - break; + /* The notification is sent with interrupts enabled. */ + if (likely(per_cpu(tracer, cpu))) { + ds_release_bts(per_cpu(tracer, cpu)); + per_cpu(tracer, cpu) = NULL; + } } - out: - spin_unlock(&bts_tracer_lock); return NOTIFY_DONE; } @@ -274,7 +218,7 @@ static void trace_bts_at(const struct bts_trace *trace, void *at) /* * Collect the trace on the current cpu and write it into the ftrace buffer. * - * pre: bts_tracer_lock must be locked + * pre: tracing must be suspended on the current cpu */ static void trace_bts_cpu(void *arg) { @@ -291,10 +235,9 @@ static void trace_bts_cpu(void *arg) if (unlikely(!this_tracer)) return; - ds_suspend_bts(this_tracer); trace = ds_read_bts(this_tracer); if (!trace) - goto out; + return; for (at = trace->ds.top; (void *)at < trace->ds.end; at += trace->ds.size) @@ -303,18 +246,27 @@ static void trace_bts_cpu(void *arg) for (at = trace->ds.begin; (void *)at < trace->ds.top; at += trace->ds.size) trace_bts_at(trace, at); - -out: - ds_resume_bts(this_tracer); } static void trace_bts_prepare(struct trace_iterator *iter) { - spin_lock(&bts_tracer_lock); + int cpu; + get_online_cpus(); + for_each_online_cpu(cpu) + if (likely(per_cpu(tracer, cpu))) + ds_suspend_bts(per_cpu(tracer, cpu)); + /* + * We need to collect the trace on the respective cpu since ftrace + * implicitly adds the record for the current cpu. + * Once that is more flexible, we could collect the data from any cpu. + */ on_each_cpu(trace_bts_cpu, iter->tr, 1); - spin_unlock(&bts_tracer_lock); + for_each_online_cpu(cpu) + if (likely(per_cpu(tracer, cpu))) + ds_resume_bts(per_cpu(tracer, cpu)); + put_online_cpus(); } static void trace_bts_close(struct trace_iterator *iter) @@ -324,12 +276,11 @@ static void trace_bts_close(struct trace_iterator *iter) void trace_hw_branch_oops(void) { - spin_lock(&bts_tracer_lock); - - if (trace_hw_branches_enabled) + if (this_tracer) { + ds_suspend_bts_noirq(this_tracer); trace_bts_cpu(hw_branch_trace); - - spin_unlock(&bts_tracer_lock); + ds_resume_bts_noirq(this_tracer); + } } struct tracer bts_tracer __read_mostly = -- cgit v1.2.3 From 4d657e51dfc042216febd4a007c6f36881f9256d Mon Sep 17 00:00:00 2001 From: Markus Metzger Date: Fri, 3 Apr 2009 16:43:41 +0200 Subject: x86, hw-branch-tracer: allocate selftest iterator on heap Allocate the trace_iterator for the hw-branch-tracer selftest on the heap. Signed-off-by: Markus Metzger Cc: roland@redhat.com Cc: eranian@googlemail.com Cc: oleg@redhat.com Cc: juan.villacis@intel.com Cc: ak@linux.jf.intel.com LKML-Reference: <20090403144556.578777000@intel.com> Signed-off-by: Ingo Molnar --- kernel/trace/trace_selftest.c | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) (limited to 'kernel/trace') diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c index 499d01c44cd1..00dd6485bdd7 100644 --- a/kernel/trace/trace_selftest.c +++ b/kernel/trace/trace_selftest.c @@ -757,7 +757,7 @@ int trace_selftest_startup_hw_branches(struct tracer *trace, struct trace_array *tr) { - struct trace_iterator iter; + struct trace_iterator *iter; struct tracer tracer; unsigned long count; int ret; @@ -777,17 +777,21 @@ trace_selftest_startup_hw_branches(struct tracer *trace, * The hw-branch tracer needs to collect the trace from the various * cpu trace buffers - before tracing is stopped. */ - memset(&iter, 0, sizeof(iter)); + iter = kzalloc(sizeof(*iter), GFP_KERNEL); + if (!iter) + return -ENOMEM; + memcpy(&tracer, trace, sizeof(tracer)); - iter.trace = &tracer; - iter.tr = tr; - iter.pos = -1; - mutex_init(&iter.mutex); + iter->trace = &tracer; + iter->tr = tr; + iter->pos = -1; + mutex_init(&iter->mutex); - trace->open(&iter); + trace->open(iter); - mutex_destroy(&iter.mutex); + mutex_destroy(&iter->mutex); + kfree(iter); tracing_stop(); -- cgit v1.2.3 From 56449f437add737a1e5e1cb7e00f63ac8ead1938 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 14 Apr 2009 11:24:36 +0200 Subject: tracing: make the trace clocks available generally Jeremy Fitzhardinge reported this build failure: LD .tmp_vmlinux1 arch/x86/kernel/built-in.o: In function `ds_take_timestamp': git/linux/arch/x86/kernel/ds.c:1380: undefined reference to `trace_clock_global' git/linux/arch/x86/kernel/ds.c:1380: undefined reference to `trace_clock_global' Which is due to !CONFIG_TRACING && CONFIG_X86_DS=y. Expose the trace clock code to CONFIG_X86_DS as well. [ Unfortunately librarizing doesnt work well - ancient architectures with no raw_local_irq_save() primitive break the build. ] Reported-by: Jeremy Fitzhardinge LKML-Reference: <49E4413F.7070700@goop.org> Signed-off-by: Ingo Molnar --- kernel/Makefile | 1 + kernel/trace/Makefile | 7 ++++++- 2 files changed, 7 insertions(+), 1 deletion(-) (limited to 'kernel/trace') diff --git a/kernel/Makefile b/kernel/Makefile index bab1dffe37e9..c8e1be5f0b00 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -92,6 +92,7 @@ obj-$(CONFIG_LATENCYTOP) += latencytop.o obj-$(CONFIG_HAVE_GENERIC_DMA_COHERENT) += dma-coherent.o obj-$(CONFIG_FUNCTION_TRACER) += trace/ obj-$(CONFIG_TRACING) += trace/ +obj-$(CONFIG_X86_DS) += trace/ obj-$(CONFIG_SMP) += sched_cpupri.o obj-$(CONFIG_SLOW_WORK) += slow-work.o diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile index 2630f5121ec1..ecc671e9f142 100644 --- a/kernel/trace/Makefile +++ b/kernel/trace/Makefile @@ -15,11 +15,16 @@ ifdef CONFIG_TRACING_BRANCHES KBUILD_CFLAGS += -DDISABLE_BRANCH_PROFILING endif +# +# Make the trace clocks available generally: it's infrastructure +# relied on by ptrace for example: +# +obj-y += trace_clock.o + obj-$(CONFIG_FUNCTION_TRACER) += libftrace.o obj-$(CONFIG_RING_BUFFER) += ring_buffer.o obj-$(CONFIG_TRACING) += trace.o -obj-$(CONFIG_TRACING) += trace_clock.o obj-$(CONFIG_TRACING) += trace_output.o obj-$(CONFIG_TRACING) += trace_stat.o obj-$(CONFIG_TRACING) += trace_printk.o -- cgit v1.2.3