From dd17c8f72993f9461e9c19250e3f155d6d99df22 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Thu, 29 Oct 2009 22:34:15 +0900 Subject: percpu: remove per_cpu__ prefix. Now that the return from alloc_percpu is compatible with the address of per-cpu vars, it makes sense to hand around the address of per-cpu variables. To make this sane, we remove the per_cpu__ prefix we used created to stop people accidentally using these vars directly. Now we have sparse, we can use that (next patch). tj: * Updated to convert stuff which were missed by or added after the original patch. * Kill per_cpu_var() macro. Signed-off-by: Rusty Russell Signed-off-by: Tejun Heo Reviewed-by: Christoph Lameter --- kernel/trace/trace.c | 6 +++--- kernel/trace/trace_functions_graph.c | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) (limited to 'kernel/trace') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 85a5ed70b5b2..b808177af816 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -91,12 +91,12 @@ DEFINE_PER_CPU(int, ftrace_cpu_disabled); static inline void ftrace_disable_cpu(void) { preempt_disable(); - __this_cpu_inc(per_cpu_var(ftrace_cpu_disabled)); + __this_cpu_inc(ftrace_cpu_disabled); } static inline void ftrace_enable_cpu(void) { - __this_cpu_dec(per_cpu_var(ftrace_cpu_disabled)); + __this_cpu_dec(ftrace_cpu_disabled); preempt_enable(); } @@ -1085,7 +1085,7 @@ trace_function(struct trace_array *tr, struct ftrace_entry *entry; /* If we are reading the ring buffer, don't trace */ - if (unlikely(__this_cpu_read(per_cpu_var(ftrace_cpu_disabled)))) + if (unlikely(__this_cpu_read(ftrace_cpu_disabled))) return; event = trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry), diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c index 90a6daa10962..8614e3241ff8 100644 --- a/kernel/trace/trace_functions_graph.c +++ b/kernel/trace/trace_functions_graph.c @@ -176,7 +176,7 @@ static int __trace_graph_entry(struct trace_array *tr, struct ring_buffer *buffer = tr->buffer; struct ftrace_graph_ent_entry *entry; - if (unlikely(__this_cpu_read(per_cpu_var(ftrace_cpu_disabled)))) + if (unlikely(__this_cpu_read(ftrace_cpu_disabled))) return 0; event = trace_buffer_lock_reserve(buffer, TRACE_GRAPH_ENT, @@ -240,7 +240,7 @@ static void __trace_graph_return(struct trace_array *tr, struct ring_buffer *buffer = tr->buffer; struct ftrace_graph_ret_entry *entry; - if (unlikely(__this_cpu_read(per_cpu_var(ftrace_cpu_disabled)))) + if (unlikely(__this_cpu_read(ftrace_cpu_disabled))) return; event = trace_buffer_lock_reserve(buffer, TRACE_GRAPH_RET, -- cgit v1.2.3 From 79615760f380ec86cd58204744e774c33fab9211 Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Tue, 5 Jan 2010 15:34:50 +0900 Subject: local_t: Move local.h include to ringbuffer.c and ring_buffer_benchmark.c ringbuffer*.c are the last users of local.h. Remove the include from modules.h and add it to ringbuffer files. Signed-off-by: Christoph Lameter Signed-off-by: Tejun Heo --- include/linux/module.h | 1 - kernel/trace/ring_buffer.c | 1 + kernel/trace/ring_buffer_benchmark.c | 1 + 3 files changed, 2 insertions(+), 1 deletion(-) (limited to 'kernel/trace') diff --git a/include/linux/module.h b/include/linux/module.h index 2302f09ea2d9..7e74ae0051cc 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -17,7 +17,6 @@ #include #include -#include #include #include diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 2326b04c95c4..eb6c8988c31a 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -20,6 +20,7 @@ #include #include +#include #include "trace.h" /* diff --git a/kernel/trace/ring_buffer_benchmark.c b/kernel/trace/ring_buffer_benchmark.c index b2477caf09c2..df74c7982255 100644 --- a/kernel/trace/ring_buffer_benchmark.c +++ b/kernel/trace/ring_buffer_benchmark.c @@ -8,6 +8,7 @@ #include #include #include +#include struct rb_page { u64 ts; -- cgit v1.2.3 From 809826a389040e0ad9d646b587bccc0e34691afd Mon Sep 17 00:00:00 2001 From: Lai Jiangshan Date: Tue, 15 Dec 2009 15:39:34 +0800 Subject: tracing: Have __dynamic_array() define a field This is part of a patch set that removes the show_format method in the ftrace event macros. This patch set requires that all fields are added to the ftrace_event_call->fields. This patch changes __dynamic_array() to call trace_define_field() to include fields that use __dynamic_array(). Signed-off-by: Lai Jiangshan LKML-Reference: <4B273D36.8090100@cn.fujitsu.com> Signed-off-by: Steven Rostedt --- kernel/trace/trace_export.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'kernel/trace') diff --git a/kernel/trace/trace_export.c b/kernel/trace/trace_export.c index d4fa5dc1ee4e..9978a4f40090 100644 --- a/kernel/trace/trace_export.c +++ b/kernel/trace/trace_export.c @@ -175,7 +175,12 @@ ftrace_format_##name(struct ftrace_event_call *unused, \ return ret; #undef __dynamic_array -#define __dynamic_array(type, item) +#define __dynamic_array(type, item) \ + ret = trace_define_field(event_call, #type, #item, \ + offsetof(typeof(field), item), \ + 0, is_signed_type(type), FILTER_OTHER);\ + if (ret) \ + return ret; #undef FTRACE_ENTRY #define FTRACE_ENTRY(name, struct_name, id, tstruct, print) \ -- cgit v1.2.3 From 509e760cd91c831983097ae174cb6c0b8c6c8e6b Mon Sep 17 00:00:00 2001 From: Lai Jiangshan Date: Tue, 15 Dec 2009 15:39:42 +0800 Subject: tracing: Add print_fmt field This is part of a patch set that removes the show_format method in the ftrace event macros. The print_fmt field is added to hold the string that shows the print_fmt in the event format files. This patch only adds the field but it is currently not used. Later patches will use this field to enable us to remove the show_format field and function. Signed-off-by: Lai Jiangshan LKML-Reference: <4B273D3E.2000704@cn.fujitsu.com> Signed-off-by: Steven Rostedt --- include/linux/ftrace_event.h | 1 + include/trace/ftrace.h | 28 +++++++++++++++++++++++++++- kernel/trace/trace_export.c | 7 +++++++ 3 files changed, 35 insertions(+), 1 deletion(-) (limited to 'kernel/trace') diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h index 2233c98d80df..bd23d8e52f02 100644 --- a/include/linux/ftrace_event.h +++ b/include/linux/ftrace_event.h @@ -121,6 +121,7 @@ struct ftrace_event_call { int (*regfunc)(struct ftrace_event_call *); void (*unregfunc)(struct ftrace_event_call *); int id; + const char *print_fmt; int (*raw_init)(struct ftrace_event_call *); int (*show_format)(struct ftrace_event_call *, struct trace_seq *); diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h index c6fe03e902ca..3351d85c83a3 100644 --- a/include/trace/ftrace.h +++ b/include/trace/ftrace.h @@ -722,8 +722,20 @@ static struct trace_event ftrace_event_type_##call = { \ #include TRACE_INCLUDE(TRACE_INCLUDE_FILE) +#undef __entry +#define __entry REC + +#undef __print_flags +#undef __print_symbolic +#undef __get_dynamic_array +#undef __get_str + +#undef TP_printk +#define TP_printk(fmt, args...) "\"" fmt "\", " __stringify(args) + #undef DECLARE_EVENT_CLASS -#define DECLARE_EVENT_CLASS(call, proto, args, tstruct, assign, print) +#define DECLARE_EVENT_CLASS(call, proto, args, tstruct, assign, print) \ +static const char print_fmt_##call[] = print; #undef DEFINE_EVENT #define DEFINE_EVENT(template, call, proto, args) \ @@ -737,6 +749,7 @@ __attribute__((section("_ftrace_events"))) event_##call = { \ .raw_init = trace_event_raw_init, \ .regfunc = ftrace_raw_reg_event_##call, \ .unregfunc = ftrace_raw_unreg_event_##call, \ + .print_fmt = print_fmt_##template, \ .show_format = ftrace_format_##template, \ .define_fields = ftrace_define_fields_##template, \ _TRACE_PROFILE_INIT(call) \ @@ -745,6 +758,8 @@ __attribute__((section("_ftrace_events"))) event_##call = { \ #undef DEFINE_EVENT_PRINT #define DEFINE_EVENT_PRINT(template, call, proto, args, print) \ \ +static const char print_fmt_##call[] = print; \ + \ static struct ftrace_event_call __used \ __attribute__((__aligned__(4))) \ __attribute__((section("_ftrace_events"))) event_##call = { \ @@ -754,6 +769,7 @@ __attribute__((section("_ftrace_events"))) event_##call = { \ .raw_init = trace_event_raw_init, \ .regfunc = ftrace_raw_reg_event_##call, \ .unregfunc = ftrace_raw_unreg_event_##call, \ + .print_fmt = print_fmt_##call, \ .show_format = ftrace_format_##call, \ .define_fields = ftrace_define_fields_##template, \ _TRACE_PROFILE_INIT(call) \ @@ -837,6 +853,16 @@ __attribute__((section("_ftrace_events"))) event_##call = { \ #ifdef CONFIG_EVENT_PROFILE +#undef __entry +#define __entry entry + +#undef __get_dynamic_array +#define __get_dynamic_array(field) \ + ((void *)__entry + (__entry->__data_loc_##field & 0xffff)) + +#undef __get_str +#define __get_str(field) (char *)__get_dynamic_array(field) + #undef __perf_addr #define __perf_addr(a) __addr = (a) diff --git a/kernel/trace/trace_export.c b/kernel/trace/trace_export.c index 9978a4f40090..95d14b640a66 100644 --- a/kernel/trace/trace_export.c +++ b/kernel/trace/trace_export.c @@ -203,6 +203,9 @@ static int ftrace_raw_init_event(struct ftrace_event_call *call) return 0; } +#undef __entry +#define __entry REC + #undef __field #define __field(type, item) @@ -218,6 +221,9 @@ static int ftrace_raw_init_event(struct ftrace_event_call *call) #undef __dynamic_array #define __dynamic_array(type, item) +#undef F_printk +#define F_printk(fmt, args...) #fmt ", " __stringify(args) + #undef FTRACE_ENTRY #define FTRACE_ENTRY(call, struct_name, type, tstruct, print) \ \ @@ -228,6 +234,7 @@ __attribute__((section("_ftrace_events"))) event_##call = { \ .id = type, \ .system = __stringify(TRACE_SYSTEM), \ .raw_init = ftrace_raw_init_event, \ + .print_fmt = print, \ .show_format = ftrace_format_##call, \ .define_fields = ftrace_define_fields_##call, \ }; \ -- cgit v1.2.3 From 50307a45f8515f6244e3b08e6b19824b9fbfe293 Mon Sep 17 00:00:00 2001 From: Lai Jiangshan Date: Tue, 15 Dec 2009 15:39:45 +0800 Subject: tracing/syscalls: Init print_fmt for syscall events This is part of a patch set that removes the show_format method in the ftrace event macros. Add the print_fmt initialization to the syscall events. The print_fmt is still not used, but will be in the follow up patches. Signed-off-by: Lai Jiangshan LKML-Reference: <4B273D41.609@cn.fujitsu.com> Signed-off-by: Steven Rostedt --- kernel/trace/trace_syscalls.c | 68 ++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 67 insertions(+), 1 deletion(-) (limited to 'kernel/trace') diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c index 75289f372dd2..1352b0a36fac 100644 --- a/kernel/trace/trace_syscalls.c +++ b/kernel/trace/trace_syscalls.c @@ -191,6 +191,67 @@ int syscall_enter_format(struct ftrace_event_call *call, struct trace_seq *s) return trace_seq_putc(s, '\n'); } +static +int __set_enter_print_fmt(struct syscall_metadata *entry, char *buf, int len) +{ + int i; + int pos = 0; + + /* When len=0, we just calculate the needed length */ +#define LEN_OR_ZERO (len ? len - pos : 0) + + pos += snprintf(buf + pos, LEN_OR_ZERO, "\""); + for (i = 0; i < entry->nb_args; i++) { + pos += snprintf(buf + pos, LEN_OR_ZERO, "%s: 0x%%0%zulx%s", + entry->args[i], sizeof(unsigned long), + i == entry->nb_args - 1 ? "" : ", "); + } + pos += snprintf(buf + pos, LEN_OR_ZERO, "\""); + + for (i = 0; i < entry->nb_args; i++) { + pos += snprintf(buf + pos, LEN_OR_ZERO, + ", ((unsigned long)(REC->%s))", entry->args[i]); + } + +#undef LEN_OR_ZERO + + /* return the length of print_fmt */ + return pos; +} + +static int set_syscall_print_fmt(struct ftrace_event_call *call) +{ + char *print_fmt; + int len; + struct syscall_metadata *entry = call->data; + + if (entry->enter_event != call) { + call->print_fmt = "\"0x%lx\", REC->ret"; + return 0; + } + + /* First: called with 0 length to calculate the needed length */ + len = __set_enter_print_fmt(entry, NULL, 0); + + print_fmt = kmalloc(len + 1, GFP_KERNEL); + if (!print_fmt) + return -ENOMEM; + + /* Second: actually write the @print_fmt */ + __set_enter_print_fmt(entry, print_fmt, len + 1); + call->print_fmt = print_fmt; + + return 0; +} + +static void free_syscall_print_fmt(struct ftrace_event_call *call) +{ + struct syscall_metadata *entry = call->data; + + if (entry->enter_event == call) + kfree(call->print_fmt); +} + int syscall_exit_format(struct ftrace_event_call *call, struct trace_seq *s) { int ret; @@ -386,9 +447,14 @@ int init_syscall_trace(struct ftrace_event_call *call) { int id; + if (set_syscall_print_fmt(call) < 0) + return -ENOMEM; + id = register_ftrace_event(call->event); - if (!id) + if (!id) { + free_syscall_print_fmt(call); return -ENODEV; + } call->id = id; INIT_LIST_HEAD(&call->fields); return 0; -- cgit v1.2.3 From a342a0280b981c130e32dbb94dbd3a57959c4d04 Mon Sep 17 00:00:00 2001 From: Lai Jiangshan Date: Tue, 15 Dec 2009 15:39:49 +0800 Subject: tracing/kprobes: Init print_fmt for kprobe events This is part of a patch set that removes the show_format method in the ftrace event macros. Add the print_fmt initialization to the kprobe events. The print_fmt is still not used, but will be in the follow up patches. Signed-off-by: Lai Jiangshan LKML-Reference: <4B273D45.3080100@cn.fujitsu.com> Acked-by: Masami Hiramatsu Signed-off-by: Steven Rostedt --- kernel/trace/trace_kprobe.c | 64 ++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 63 insertions(+), 1 deletion(-) (limited to 'kernel/trace') diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index 6ea90c0e2c96..147491dccead 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -1250,6 +1250,62 @@ static int kretprobe_event_show_format(struct ftrace_event_call *call, ", REC->" FIELD_STRING_RETIP); } +static int __set_print_fmt(struct trace_probe *tp, char *buf, int len) +{ + int i; + int pos = 0; + + const char *fmt, *arg; + + if (!probe_is_return(tp)) { + fmt = "(%lx)"; + arg = "REC->" FIELD_STRING_IP; + } else { + fmt = "(%lx <- %lx)"; + arg = "REC->" FIELD_STRING_FUNC ", REC->" FIELD_STRING_RETIP; + } + + /* When len=0, we just calculate the needed length */ +#define LEN_OR_ZERO (len ? len - pos : 0) + + pos += snprintf(buf + pos, LEN_OR_ZERO, "\"%s", fmt); + + for (i = 0; i < tp->nr_args; i++) { + pos += snprintf(buf + pos, LEN_OR_ZERO, " %s=%%lx", + tp->args[i].name); + } + + pos += snprintf(buf + pos, LEN_OR_ZERO, "\", %s", arg); + + for (i = 0; i < tp->nr_args; i++) { + pos += snprintf(buf + pos, LEN_OR_ZERO, ", REC->%s", + tp->args[i].name); + } + +#undef LEN_OR_ZERO + + /* return the length of print_fmt */ + return pos; +} + +static int set_print_fmt(struct trace_probe *tp) +{ + int len; + char *print_fmt; + + /* First: called with 0 length to calculate the needed length */ + len = __set_print_fmt(tp, NULL, 0); + print_fmt = kmalloc(len + 1, GFP_KERNEL); + if (!print_fmt) + return -ENOMEM; + + /* Second: actually write the @print_fmt */ + __set_print_fmt(tp, print_fmt, len + 1); + tp->call.print_fmt = print_fmt; + + return 0; +} + #ifdef CONFIG_EVENT_PROFILE /* Kprobe profile handler */ @@ -1456,10 +1512,14 @@ static int register_probe_event(struct trace_probe *tp) call->show_format = kprobe_event_show_format; call->define_fields = kprobe_event_define_fields; } + if (set_print_fmt(tp) < 0) + return -ENOMEM; call->event = &tp->event; call->id = register_ftrace_event(&tp->event); - if (!call->id) + if (!call->id) { + kfree(call->print_fmt); return -ENODEV; + } call->enabled = 0; call->regfunc = probe_event_enable; call->unregfunc = probe_event_disable; @@ -1472,6 +1532,7 @@ static int register_probe_event(struct trace_probe *tp) ret = trace_add_event_call(call); if (ret) { pr_info("Failed to register kprobe event: %s\n", call->name); + kfree(call->print_fmt); unregister_ftrace_event(&tp->event); } return ret; @@ -1481,6 +1542,7 @@ static void unregister_probe_event(struct trace_probe *tp) { /* tp->event is unregistered in trace_remove_event_call() */ trace_remove_event_call(&tp->call); + kfree(tp->call.print_fmt); } /* Make a debugfs interface for controling probe points */ -- cgit v1.2.3 From c7ef3a9004201bca90626db246a19dadd2c29c9b Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 28 Dec 2009 21:13:59 -0500 Subject: tracing: Have syscall tracing call its own init function In the clean up of having all events call one specific function, the syscall event init was changed to call this helper function. With the new print_fmt updates, the syscalls need to do special initializations. This patch converts the syscall events to call its own init function again. Cc: Lai Jiangshan Cc: Li Zefan Signed-off-by: Steven Rostedt --- include/linux/syscalls.h | 4 ++-- kernel/trace/trace_syscalls.c | 12 ++++++------ 2 files changed, 8 insertions(+), 8 deletions(-) (limited to 'kernel/trace') diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 207466a49f3d..ed353d274a77 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -143,7 +143,7 @@ struct perf_event_attr; .name = "sys_enter"#sname, \ .system = "syscalls", \ .event = &enter_syscall_print_##sname, \ - .raw_init = trace_event_raw_init, \ + .raw_init = init_syscall_trace, \ .show_format = syscall_enter_format, \ .define_fields = syscall_enter_define_fields, \ .regfunc = reg_event_syscall_enter, \ @@ -165,7 +165,7 @@ struct perf_event_attr; .name = "sys_exit"#sname, \ .system = "syscalls", \ .event = &exit_syscall_print_##sname, \ - .raw_init = trace_event_raw_init, \ + .raw_init = init_syscall_trace, \ .show_format = syscall_exit_format, \ .define_fields = syscall_exit_define_fields, \ .regfunc = reg_event_syscall_exit, \ diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c index 1352b0a36fac..a78e86349ecb 100644 --- a/kernel/trace/trace_syscalls.c +++ b/kernel/trace/trace_syscalls.c @@ -450,14 +450,14 @@ int init_syscall_trace(struct ftrace_event_call *call) if (set_syscall_print_fmt(call) < 0) return -ENOMEM; - id = register_ftrace_event(call->event); - if (!id) { + id = trace_event_raw_init(call); + + if (id < 0) { free_syscall_print_fmt(call); - return -ENODEV; + return id; } - call->id = id; - INIT_LIST_HEAD(&call->fields); - return 0; + + return id; } int __init init_ftrace_syscalls(void) -- cgit v1.2.3 From 5a65e956220efc2421e21ee56d6153fd5c533a95 Mon Sep 17 00:00:00 2001 From: Lai Jiangshan Date: Tue, 15 Dec 2009 15:39:53 +0800 Subject: tracing: Use defined fields and print_fmt to print formats The calls ftrace_format_##call() and ftrace_define_fields_##call() are almost duplicate in functionality. With the addition of the print_fmt in previous patches, these two functions can be merged into one. The trace_define_field() defines the fields and links them into the struct ftrace_event_call. The previous patches introduced the print_fmt field and this can now be used with the trace_define_field() to create the event format file fields and print_fmt field. The struct ftrace_event_call->fields are used to print the fields The struct ftrace_event_call->print_fmt is used to print the "print fmt: XXXXXXXXXXX" line. Signed-off-by: Lai Jiangshan LKML-Reference: <4B273D49.5000006@cn.fujitsu.com> Signed-off-by: Steven Rostedt --- kernel/trace/trace_events.c | 65 ++++++++++++++++++++++++++++----------------- 1 file changed, 41 insertions(+), 24 deletions(-) (limited to 'kernel/trace') diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 189b09baf4fb..250ec865d5f5 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -528,33 +528,16 @@ extern char *__bad_type_size(void); #type, "common_" #name, offsetof(typeof(field), name), \ sizeof(field.name), is_signed_type(type) -static int trace_write_header(struct trace_seq *s) -{ - struct trace_entry field; - - /* struct trace_entry */ - return trace_seq_printf(s, - "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\tsigned:%u;\n" - "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\tsigned:%u;\n" - "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\tsigned:%u;\n" - "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\tsigned:%u;\n" - "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\tsigned:%u;\n" - "\n", - FIELD(unsigned short, type), - FIELD(unsigned char, flags), - FIELD(unsigned char, preempt_count), - FIELD(int, pid), - FIELD(int, lock_depth)); -} - static ssize_t event_format_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos) { struct ftrace_event_call *call = filp->private_data; + struct ftrace_event_field *field; struct trace_seq *s; + int common_field_count = 5; char *buf; - int r; + int r = 0; if (*ppos) return 0; @@ -565,14 +548,48 @@ event_format_read(struct file *filp, char __user *ubuf, size_t cnt, trace_seq_init(s); - /* If any of the first writes fail, so will the show_format. */ - trace_seq_printf(s, "name: %s\n", call->name); trace_seq_printf(s, "ID: %d\n", call->id); trace_seq_printf(s, "format:\n"); - trace_write_header(s); - r = call->show_format(call, s); + list_for_each_entry_reverse(field, &call->fields, link) { + /* + * Smartly shows the array type(except dynamic array). + * Normal: + * field:TYPE VAR + * If TYPE := TYPE[LEN], it is shown: + * field:TYPE VAR[LEN] + */ + const char *array_descriptor = strchr(field->type, '['); + + if (!strncmp(field->type, "__data_loc", 10)) + array_descriptor = NULL; + + if (!array_descriptor) { + r = trace_seq_printf(s, "\tfield:%s %s;\toffset:%u;" + "\tsize:%u;\tsigned:%d;\n", + field->type, field->name, field->offset, + field->size, !!field->is_signed); + } else { + r = trace_seq_printf(s, "\tfield:%.*s %s%s;\toffset:%u;" + "\tsize:%u;\tsigned:%d;\n", + (int)(array_descriptor - field->type), + field->type, field->name, + array_descriptor, field->offset, + field->size, !!field->is_signed); + } + + if (--common_field_count == 0) + r = trace_seq_printf(s, "\n"); + + if (!r) + break; + } + + if (r) + r = trace_seq_printf(s, "\nprint fmt: %s\n", + call->print_fmt); + if (!r) { /* * ug! The format output is bigger than a PAGE!! -- cgit v1.2.3 From 0fa0edaf32b9a78b9854f1da98d4511a501089b0 Mon Sep 17 00:00:00 2001 From: Lai Jiangshan Date: Tue, 15 Dec 2009 15:39:57 +0800 Subject: tracing: Remove show_format and related macros from TRACE_EVENT The previous patches added the use of print_fmt string and changes the trace_define_field() function to also create the fields and format output for the event format files. text data bss dec hex filename 5857201 1355780 9336808 16549789 fc879d vmlinux 5884589 1351684 9337896 16574169 fce6d9 vmlinux-orig The above shows the size of the vmlinux after this patch set compared to the vmlinux-orig which is before the patch set. This saves us 27k on text, 1k on bss and adds just 4k of data. The total savings of 24k in size. Signed-off-by: Lai Jiangshan LKML-Reference: <4B273D4D.40604@cn.fujitsu.com> Acked-by: Masami Hiramatsu Signed-off-by: Steven Rostedt --- include/linux/ftrace_event.h | 2 - include/linux/syscalls.h | 2 - include/trace/ftrace.h | 133 ++---------------------------------------- include/trace/syscall.h | 4 -- kernel/trace/trace_events.c | 12 ---- kernel/trace/trace_export.c | 73 ----------------------- kernel/trace/trace_kprobe.c | 78 ------------------------- kernel/trace/trace_syscalls.c | 66 --------------------- 8 files changed, 6 insertions(+), 364 deletions(-) (limited to 'kernel/trace') diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h index bd23d8e52f02..84a5629adfd8 100644 --- a/include/linux/ftrace_event.h +++ b/include/linux/ftrace_event.h @@ -123,8 +123,6 @@ struct ftrace_event_call { int id; const char *print_fmt; int (*raw_init)(struct ftrace_event_call *); - int (*show_format)(struct ftrace_event_call *, - struct trace_seq *); int (*define_fields)(struct ftrace_event_call *); struct list_head fields; int filter_active; diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index ed353d274a77..7b219696ad24 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -144,7 +144,6 @@ struct perf_event_attr; .system = "syscalls", \ .event = &enter_syscall_print_##sname, \ .raw_init = init_syscall_trace, \ - .show_format = syscall_enter_format, \ .define_fields = syscall_enter_define_fields, \ .regfunc = reg_event_syscall_enter, \ .unregfunc = unreg_event_syscall_enter, \ @@ -166,7 +165,6 @@ struct perf_event_attr; .system = "syscalls", \ .event = &exit_syscall_print_##sname, \ .raw_init = init_syscall_trace, \ - .show_format = syscall_exit_format, \ .define_fields = syscall_exit_define_fields, \ .regfunc = reg_event_syscall_exit, \ .unregfunc = unreg_event_syscall_exit, \ diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h index 3351d85c83a3..df65b99880b1 100644 --- a/include/trace/ftrace.h +++ b/include/trace/ftrace.h @@ -130,130 +130,6 @@ #include TRACE_INCLUDE(TRACE_INCLUDE_FILE) -/* - * Setup the showing format of trace point. - * - * int - * ftrace_format_##call(struct trace_seq *s) - * { - * struct ftrace_raw_##call field; - * int ret; - * - * ret = trace_seq_printf(s, #type " " #item ";" - * " offset:%u; size:%u;\n", - * offsetof(struct ftrace_raw_##call, item), - * sizeof(field.type)); - * - * } - */ - -#undef TP_STRUCT__entry -#define TP_STRUCT__entry(args...) args - -#undef __field -#define __field(type, item) \ - ret = trace_seq_printf(s, "\tfield:" #type " " #item ";\t" \ - "offset:%u;\tsize:%u;\tsigned:%u;\n", \ - (unsigned int)offsetof(typeof(field), item), \ - (unsigned int)sizeof(field.item), \ - (unsigned int)is_signed_type(type)); \ - if (!ret) \ - return 0; - -#undef __field_ext -#define __field_ext(type, item, filter_type) __field(type, item) - -#undef __array -#define __array(type, item, len) \ - ret = trace_seq_printf(s, "\tfield:" #type " " #item "[" #len "];\t" \ - "offset:%u;\tsize:%u;\tsigned:%u;\n", \ - (unsigned int)offsetof(typeof(field), item), \ - (unsigned int)sizeof(field.item), \ - (unsigned int)is_signed_type(type)); \ - if (!ret) \ - return 0; - -#undef __dynamic_array -#define __dynamic_array(type, item, len) \ - ret = trace_seq_printf(s, "\tfield:__data_loc " #type "[] " #item ";\t"\ - "offset:%u;\tsize:%u;\tsigned:%u;\n", \ - (unsigned int)offsetof(typeof(field), \ - __data_loc_##item), \ - (unsigned int)sizeof(field.__data_loc_##item), \ - (unsigned int)is_signed_type(type)); \ - if (!ret) \ - return 0; - -#undef __string -#define __string(item, src) __dynamic_array(char, item, -1) - -#undef __entry -#define __entry REC - -#undef __print_symbolic -#undef __get_dynamic_array -#undef __get_str - -#undef TP_printk -#define TP_printk(fmt, args...) "\"%s\", %s\n", fmt, __stringify(args) - -#undef TP_fast_assign -#define TP_fast_assign(args...) args - -#undef TP_perf_assign -#define TP_perf_assign(args...) - -#undef DECLARE_EVENT_CLASS -#define DECLARE_EVENT_CLASS(call, proto, args, tstruct, func, print) \ -static int \ -ftrace_format_setup_##call(struct ftrace_event_call *unused, \ - struct trace_seq *s) \ -{ \ - struct ftrace_raw_##call field __attribute__((unused)); \ - int ret = 0; \ - \ - tstruct; \ - \ - return ret; \ -} \ - \ -static int \ -ftrace_format_##call(struct ftrace_event_call *unused, \ - struct trace_seq *s) \ -{ \ - int ret = 0; \ - \ - ret = ftrace_format_setup_##call(unused, s); \ - if (!ret) \ - return ret; \ - \ - ret = trace_seq_printf(s, "\nprint fmt: " print); \ - \ - return ret; \ -} - -#undef DEFINE_EVENT -#define DEFINE_EVENT(template, name, proto, args) - -#undef DEFINE_EVENT_PRINT -#define DEFINE_EVENT_PRINT(template, name, proto, args, print) \ -static int \ -ftrace_format_##name(struct ftrace_event_call *unused, \ - struct trace_seq *s) \ -{ \ - int ret = 0; \ - \ - ret = ftrace_format_setup_##template(unused, s); \ - if (!ret) \ - return ret; \ - \ - trace_seq_printf(s, "\nprint fmt: " print); \ - \ - return ret; \ -} - -#include TRACE_INCLUDE(TRACE_INCLUDE_FILE) - /* * Stage 3 of the trace events. * @@ -622,7 +498,6 @@ static void ftrace_profile_disable_##name(struct ftrace_event_call *unused)\ * .raw_init = trace_event_raw_init, * .regfunc = ftrace_reg_event_, * .unregfunc = ftrace_unreg_event_, - * .show_format = ftrace_format_, * } * */ @@ -657,6 +532,12 @@ static void ftrace_profile_disable_##name(struct ftrace_event_call *unused)\ #define __assign_str(dst, src) \ strcpy(__get_str(dst), src); +#undef TP_fast_assign +#define TP_fast_assign(args...) args + +#undef TP_perf_assign +#define TP_perf_assign(args...) + #undef DECLARE_EVENT_CLASS #define DECLARE_EVENT_CLASS(call, proto, args, tstruct, assign, print) \ \ @@ -750,7 +631,6 @@ __attribute__((section("_ftrace_events"))) event_##call = { \ .regfunc = ftrace_raw_reg_event_##call, \ .unregfunc = ftrace_raw_unreg_event_##call, \ .print_fmt = print_fmt_##template, \ - .show_format = ftrace_format_##template, \ .define_fields = ftrace_define_fields_##template, \ _TRACE_PROFILE_INIT(call) \ } @@ -770,7 +650,6 @@ __attribute__((section("_ftrace_events"))) event_##call = { \ .regfunc = ftrace_raw_reg_event_##call, \ .unregfunc = ftrace_raw_unreg_event_##call, \ .print_fmt = print_fmt_##call, \ - .show_format = ftrace_format_##call, \ .define_fields = ftrace_define_fields_##template, \ _TRACE_PROFILE_INIT(call) \ } diff --git a/include/trace/syscall.h b/include/trace/syscall.h index 961fda3556bb..8cd410254456 100644 --- a/include/trace/syscall.h +++ b/include/trace/syscall.h @@ -34,10 +34,6 @@ struct syscall_metadata { extern unsigned long arch_syscall_addr(int nr); extern int init_syscall_trace(struct ftrace_event_call *call); -extern int syscall_enter_format(struct ftrace_event_call *call, - struct trace_seq *s); -extern int syscall_exit_format(struct ftrace_event_call *call, - struct trace_seq *s); extern int syscall_enter_define_fields(struct ftrace_event_call *call); extern int syscall_exit_define_fields(struct ftrace_event_call *call); extern int reg_event_syscall_enter(struct ftrace_event_call *call); diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 250ec865d5f5..c2a3077b7353 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -520,14 +520,6 @@ out: return ret; } -extern char *__bad_type_size(void); - -#undef FIELD -#define FIELD(type, name) \ - sizeof(type) != sizeof(field.name) ? __bad_type_size() : \ - #type, "common_" #name, offsetof(typeof(field), name), \ - sizeof(field.name), is_signed_type(type) - static ssize_t event_format_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos) @@ -965,10 +957,6 @@ event_create_dir(struct ftrace_event_call *call, struct dentry *d_events, filter); } - /* A trace may not want to export its format */ - if (!call->show_format) - return 0; - trace_create_file("format", 0444, call->dir, call, format); diff --git a/kernel/trace/trace_export.c b/kernel/trace/trace_export.c index 95d14b640a66..e091f64ba6ce 100644 --- a/kernel/trace/trace_export.c +++ b/kernel/trace/trace_export.c @@ -62,78 +62,6 @@ static void __always_unused ____ftrace_check_##name(void) \ #include "trace_entries.h" - -#undef __field -#define __field(type, item) \ - ret = trace_seq_printf(s, "\tfield:" #type " " #item ";\t" \ - "offset:%zu;\tsize:%zu;\tsigned:%u;\n", \ - offsetof(typeof(field), item), \ - sizeof(field.item), is_signed_type(type)); \ - if (!ret) \ - return 0; - -#undef __field_desc -#define __field_desc(type, container, item) \ - ret = trace_seq_printf(s, "\tfield:" #type " " #item ";\t" \ - "offset:%zu;\tsize:%zu;\tsigned:%u;\n", \ - offsetof(typeof(field), container.item), \ - sizeof(field.container.item), \ - is_signed_type(type)); \ - if (!ret) \ - return 0; - -#undef __array -#define __array(type, item, len) \ - ret = trace_seq_printf(s, "\tfield:" #type " " #item "[" #len "];\t" \ - "offset:%zu;\tsize:%zu;\tsigned:%u;\n", \ - offsetof(typeof(field), item), \ - sizeof(field.item), is_signed_type(type)); \ - if (!ret) \ - return 0; - -#undef __array_desc -#define __array_desc(type, container, item, len) \ - ret = trace_seq_printf(s, "\tfield:" #type " " #item "[" #len "];\t" \ - "offset:%zu;\tsize:%zu;\tsigned:%u;\n", \ - offsetof(typeof(field), container.item), \ - sizeof(field.container.item), \ - is_signed_type(type)); \ - if (!ret) \ - return 0; - -#undef __dynamic_array -#define __dynamic_array(type, item) \ - ret = trace_seq_printf(s, "\tfield:" #type " " #item ";\t" \ - "offset:%zu;\tsize:0;\tsigned:%u;\n", \ - offsetof(typeof(field), item), \ - is_signed_type(type)); \ - if (!ret) \ - return 0; - -#undef F_printk -#define F_printk(fmt, args...) "%s, %s\n", #fmt, __stringify(args) - -#undef __entry -#define __entry REC - -#undef FTRACE_ENTRY -#define FTRACE_ENTRY(name, struct_name, id, tstruct, print) \ -static int \ -ftrace_format_##name(struct ftrace_event_call *unused, \ - struct trace_seq *s) \ -{ \ - struct struct_name field __attribute__((unused)); \ - int ret = 0; \ - \ - tstruct; \ - \ - trace_seq_printf(s, "\nprint fmt: " print); \ - \ - return ret; \ -} - -#include "trace_entries.h" - #undef __field #define __field(type, item) \ ret = trace_define_field(event_call, #type, #item, \ @@ -235,7 +163,6 @@ __attribute__((section("_ftrace_events"))) event_##call = { \ .system = __stringify(TRACE_SYSTEM), \ .raw_init = ftrace_raw_init_event, \ .print_fmt = print, \ - .show_format = ftrace_format_##call, \ .define_fields = ftrace_define_fields_##call, \ }; \ diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index 147491dccead..c99029916c76 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -1174,82 +1174,6 @@ static int kretprobe_event_define_fields(struct ftrace_event_call *event_call) return 0; } -static int __probe_event_show_format(struct trace_seq *s, - struct trace_probe *tp, const char *fmt, - const char *arg) -{ - int i; - - /* Show format */ - if (!trace_seq_printf(s, "\nprint fmt: \"%s", fmt)) - return 0; - - for (i = 0; i < tp->nr_args; i++) - if (!trace_seq_printf(s, " %s=%%lx", tp->args[i].name)) - return 0; - - if (!trace_seq_printf(s, "\", %s", arg)) - return 0; - - for (i = 0; i < tp->nr_args; i++) - if (!trace_seq_printf(s, ", REC->%s", tp->args[i].name)) - return 0; - - return trace_seq_puts(s, "\n"); -} - -#undef SHOW_FIELD -#define SHOW_FIELD(type, item, name) \ - do { \ - ret = trace_seq_printf(s, "\tfield:" #type " %s;\t" \ - "offset:%u;\tsize:%u;\tsigned:%d;\n", name,\ - (unsigned int)offsetof(typeof(field), item),\ - (unsigned int)sizeof(type), \ - is_signed_type(type)); \ - if (!ret) \ - return 0; \ - } while (0) - -static int kprobe_event_show_format(struct ftrace_event_call *call, - struct trace_seq *s) -{ - struct kprobe_trace_entry field __attribute__((unused)); - int ret, i; - struct trace_probe *tp = (struct trace_probe *)call->data; - - SHOW_FIELD(unsigned long, ip, FIELD_STRING_IP); - SHOW_FIELD(int, nargs, FIELD_STRING_NARGS); - - /* Show fields */ - for (i = 0; i < tp->nr_args; i++) - SHOW_FIELD(unsigned long, args[i], tp->args[i].name); - trace_seq_puts(s, "\n"); - - return __probe_event_show_format(s, tp, "(%lx)", - "REC->" FIELD_STRING_IP); -} - -static int kretprobe_event_show_format(struct ftrace_event_call *call, - struct trace_seq *s) -{ - struct kretprobe_trace_entry field __attribute__((unused)); - int ret, i; - struct trace_probe *tp = (struct trace_probe *)call->data; - - SHOW_FIELD(unsigned long, func, FIELD_STRING_FUNC); - SHOW_FIELD(unsigned long, ret_ip, FIELD_STRING_RETIP); - SHOW_FIELD(int, nargs, FIELD_STRING_NARGS); - - /* Show fields */ - for (i = 0; i < tp->nr_args; i++) - SHOW_FIELD(unsigned long, args[i], tp->args[i].name); - trace_seq_puts(s, "\n"); - - return __probe_event_show_format(s, tp, "(%lx <- %lx)", - "REC->" FIELD_STRING_FUNC - ", REC->" FIELD_STRING_RETIP); -} - static int __set_print_fmt(struct trace_probe *tp, char *buf, int len) { int i; @@ -1504,12 +1428,10 @@ static int register_probe_event(struct trace_probe *tp) if (probe_is_return(tp)) { tp->event.trace = print_kretprobe_event; call->raw_init = probe_event_raw_init; - call->show_format = kretprobe_event_show_format; call->define_fields = kretprobe_event_define_fields; } else { tp->event.trace = print_kprobe_event; call->raw_init = probe_event_raw_init; - call->show_format = kprobe_event_show_format; call->define_fields = kprobe_event_define_fields; } if (set_print_fmt(tp) < 0) diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c index a78e86349ecb..49cea70fbf6d 100644 --- a/kernel/trace/trace_syscalls.c +++ b/kernel/trace/trace_syscalls.c @@ -143,54 +143,6 @@ extern char *__bad_type_size(void); #type, #name, offsetof(typeof(trace), name), \ sizeof(trace.name), is_signed_type(type) -int syscall_enter_format(struct ftrace_event_call *call, struct trace_seq *s) -{ - int i; - int ret; - struct syscall_metadata *entry = call->data; - struct syscall_trace_enter trace; - int offset = offsetof(struct syscall_trace_enter, args); - - ret = trace_seq_printf(s, "\tfield:%s %s;\toffset:%zu;\tsize:%zu;" - "\tsigned:%u;\n", - SYSCALL_FIELD(int, nr)); - if (!ret) - return 0; - - for (i = 0; i < entry->nb_args; i++) { - ret = trace_seq_printf(s, "\tfield:%s %s;", entry->types[i], - entry->args[i]); - if (!ret) - return 0; - ret = trace_seq_printf(s, "\toffset:%d;\tsize:%zu;" - "\tsigned:%u;\n", offset, - sizeof(unsigned long), - is_signed_type(unsigned long)); - if (!ret) - return 0; - offset += sizeof(unsigned long); - } - - trace_seq_puts(s, "\nprint fmt: \""); - for (i = 0; i < entry->nb_args; i++) { - ret = trace_seq_printf(s, "%s: 0x%%0%zulx%s", entry->args[i], - sizeof(unsigned long), - i == entry->nb_args - 1 ? "" : ", "); - if (!ret) - return 0; - } - trace_seq_putc(s, '"'); - - for (i = 0; i < entry->nb_args; i++) { - ret = trace_seq_printf(s, ", ((unsigned long)(REC->%s))", - entry->args[i]); - if (!ret) - return 0; - } - - return trace_seq_putc(s, '\n'); -} - static int __set_enter_print_fmt(struct syscall_metadata *entry, char *buf, int len) { @@ -252,24 +204,6 @@ static void free_syscall_print_fmt(struct ftrace_event_call *call) kfree(call->print_fmt); } -int syscall_exit_format(struct ftrace_event_call *call, struct trace_seq *s) -{ - int ret; - struct syscall_trace_exit trace; - - ret = trace_seq_printf(s, - "\tfield:%s %s;\toffset:%zu;\tsize:%zu;" - "\tsigned:%u;\n" - "\tfield:%s %s;\toffset:%zu;\tsize:%zu;" - "\tsigned:%u;\n", - SYSCALL_FIELD(int, nr), - SYSCALL_FIELD(long, ret)); - if (!ret) - return 0; - - return trace_seq_printf(s, "\nprint fmt: \"0x%%lx\", REC->ret\n"); -} - int syscall_enter_define_fields(struct ftrace_event_call *call) { struct syscall_trace_enter trace; -- cgit v1.2.3 From 7e53bd42d14c75192b99674c40fcc359392da59d Mon Sep 17 00:00:00 2001 From: Lai Jiangshan Date: Wed, 6 Jan 2010 20:08:50 +0800 Subject: tracing: Consolidate protection of reader access to the ring buffer At the beginning, access to the ring buffer was fully serialized by trace_types_lock. Patch d7350c3f4569 gives more freedom to readers, and patch b04cc6b1f6 adds code to protect trace_pipe and cpu#/trace_pipe. But actually it is not enough, ring buffer readers are not always read-only, they may consume data. This patch makes accesses to trace, trace_pipe, trace_pipe_raw cpu#/trace, cpu#/trace_pipe and cpu#/trace_pipe_raw serialized. And removes tracing_reader_cpumask which is used to protect trace_pipe. Details: Ring buffer serializes readers, but it is low level protection. The validity of the events (which returns by ring_buffer_peek() ..etc) are not protected by ring buffer. The content of events may become garbage if we allow another process to consume these events concurrently: A) the page of the consumed events may become a normal page (not reader page) in ring buffer, and this page will be rewritten by the events producer. B) The page of the consumed events may become a page for splice_read, and this page will be returned to system. This patch adds trace_access_lock() and trace_access_unlock() primitives. These primitives allow multi process access to different cpu ring buffers concurrently. These primitives don't distinguish read-only and read-consume access. Multi read-only access is also serialized. And we don't use these primitives when we open files, we only use them when we read files. Signed-off-by: Lai Jiangshan LKML-Reference: <4B447D52.1050602@cn.fujitsu.com> Signed-off-by: Steven Rostedt --- kernel/trace/trace.c | 136 ++++++++++++++++++++++++++++++++++++--------------- 1 file changed, 97 insertions(+), 39 deletions(-) (limited to 'kernel/trace') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 0df1b0f2cb9e..abdd333a0825 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -32,6 +32,7 @@ #include #include #include +#include #include #include #include @@ -102,9 +103,6 @@ static inline void ftrace_enable_cpu(void) static cpumask_var_t __read_mostly tracing_buffer_mask; -/* Define which cpu buffers are currently read in trace_pipe */ -static cpumask_var_t tracing_reader_cpumask; - #define for_each_tracing_cpu(cpu) \ for_each_cpu(cpu, tracing_buffer_mask) @@ -243,12 +241,91 @@ static struct tracer *current_trace __read_mostly; /* * trace_types_lock is used to protect the trace_types list. - * This lock is also used to keep user access serialized. - * Accesses from userspace will grab this lock while userspace - * activities happen inside the kernel. */ static DEFINE_MUTEX(trace_types_lock); +/* + * serialize the access of the ring buffer + * + * ring buffer serializes readers, but it is low level protection. + * The validity of the events (which returns by ring_buffer_peek() ..etc) + * are not protected by ring buffer. + * + * The content of events may become garbage if we allow other process consumes + * these events concurrently: + * A) the page of the consumed events may become a normal page + * (not reader page) in ring buffer, and this page will be rewrited + * by events producer. + * B) The page of the consumed events may become a page for splice_read, + * and this page will be returned to system. + * + * These primitives allow multi process access to different cpu ring buffer + * concurrently. + * + * These primitives don't distinguish read-only and read-consume access. + * Multi read-only access are also serialized. + */ + +#ifdef CONFIG_SMP +static DECLARE_RWSEM(all_cpu_access_lock); +static DEFINE_PER_CPU(struct mutex, cpu_access_lock); + +static inline void trace_access_lock(int cpu) +{ + if (cpu == TRACE_PIPE_ALL_CPU) { + /* gain it for accessing the whole ring buffer. */ + down_write(&all_cpu_access_lock); + } else { + /* gain it for accessing a cpu ring buffer. */ + + /* Firstly block other trace_access_lock(TRACE_PIPE_ALL_CPU). */ + down_read(&all_cpu_access_lock); + + /* Secondly block other access to this @cpu ring buffer. */ + mutex_lock(&per_cpu(cpu_access_lock, cpu)); + } +} + +static inline void trace_access_unlock(int cpu) +{ + if (cpu == TRACE_PIPE_ALL_CPU) { + up_write(&all_cpu_access_lock); + } else { + mutex_unlock(&per_cpu(cpu_access_lock, cpu)); + up_read(&all_cpu_access_lock); + } +} + +static inline void trace_access_lock_init(void) +{ + int cpu; + + for_each_possible_cpu(cpu) + mutex_init(&per_cpu(cpu_access_lock, cpu)); +} + +#else + +static DEFINE_MUTEX(access_lock); + +static inline void trace_access_lock(int cpu) +{ + (void)cpu; + mutex_lock(&access_lock); +} + +static inline void trace_access_unlock(int cpu) +{ + (void)cpu; + mutex_unlock(&access_lock); +} + +static inline void trace_access_lock_init(void) +{ +} + +#endif + /* trace_wait is a waitqueue for tasks blocked on trace_poll */ static DECLARE_WAIT_QUEUE_HEAD(trace_wait); @@ -1580,12 +1657,6 @@ static void tracing_iter_reset(struct trace_iterator *iter, int cpu) } /* - * No necessary locking here. The worst thing which can - * happen is loosing events consumed at the same time - * by a trace_pipe reader. - * Other than that, we don't risk to crash the ring buffer - * because it serializes the readers. - * * The current tracer is copied to avoid a global locking * all around. */ @@ -1640,12 +1711,16 @@ static void *s_start(struct seq_file *m, loff_t *pos) } trace_event_read_lock(); + trace_access_lock(cpu_file); return p; } static void s_stop(struct seq_file *m, void *p) { + struct trace_iterator *iter = m->private; + atomic_dec(&trace_record_cmdline_disabled); + trace_access_unlock(iter->cpu_file); trace_event_read_unlock(); } @@ -2836,22 +2911,6 @@ static int tracing_open_pipe(struct inode *inode, struct file *filp) mutex_lock(&trace_types_lock); - /* We only allow one reader per cpu */ - if (cpu_file == TRACE_PIPE_ALL_CPU) { - if (!cpumask_empty(tracing_reader_cpumask)) { - ret = -EBUSY; - goto out; - } - cpumask_setall(tracing_reader_cpumask); - } else { - if (!cpumask_test_cpu(cpu_file, tracing_reader_cpumask)) - cpumask_set_cpu(cpu_file, tracing_reader_cpumask); - else { - ret = -EBUSY; - goto out; - } - } - /* create a buffer to store the information to pass to userspace */ iter = kzalloc(sizeof(*iter), GFP_KERNEL); if (!iter) { @@ -2907,12 +2966,6 @@ static int tracing_release_pipe(struct inode *inode, struct file *file) mutex_lock(&trace_types_lock); - if (iter->cpu_file == TRACE_PIPE_ALL_CPU) - cpumask_clear(tracing_reader_cpumask); - else - cpumask_clear_cpu(iter->cpu_file, tracing_reader_cpumask); - - if (iter->trace->pipe_close) iter->trace->pipe_close(iter); @@ -3074,6 +3127,7 @@ waitagain: iter->pos = -1; trace_event_read_lock(); + trace_access_lock(iter->cpu_file); while (find_next_entry_inc(iter) != NULL) { enum print_line_t ret; int len = iter->seq.len; @@ -3090,6 +3144,7 @@ waitagain: if (iter->seq.len >= cnt) break; } + trace_access_unlock(iter->cpu_file); trace_event_read_unlock(); /* Now copy what we have to the user */ @@ -3215,6 +3270,7 @@ static ssize_t tracing_splice_read_pipe(struct file *filp, } trace_event_read_lock(); + trace_access_lock(iter->cpu_file); /* Fill as many pages as possible. */ for (i = 0, rem = len; i < PIPE_BUFFERS && rem; i++) { @@ -3238,6 +3294,7 @@ static ssize_t tracing_splice_read_pipe(struct file *filp, trace_seq_init(&iter->seq); } + trace_access_unlock(iter->cpu_file); trace_event_read_unlock(); mutex_unlock(&iter->mutex); @@ -3539,10 +3596,12 @@ tracing_buffers_read(struct file *filp, char __user *ubuf, info->read = 0; + trace_access_lock(info->cpu); ret = ring_buffer_read_page(info->tr->buffer, &info->spare, count, info->cpu, 0); + trace_access_unlock(info->cpu); if (ret < 0) return 0; @@ -3670,6 +3729,7 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos, len &= PAGE_MASK; } + trace_access_lock(info->cpu); entries = ring_buffer_entries_cpu(info->tr->buffer, info->cpu); for (i = 0; i < PIPE_BUFFERS && len && entries; i++, len -= PAGE_SIZE) { @@ -3717,6 +3777,7 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos, entries = ring_buffer_entries_cpu(info->tr->buffer, info->cpu); } + trace_access_unlock(info->cpu); spd.nr_pages = i; /* did we read anything? */ @@ -4153,6 +4214,8 @@ static __init int tracer_init_debugfs(void) struct dentry *d_tracer; int cpu; + trace_access_lock_init(); + d_tracer = tracing_init_dentry(); trace_create_file("tracing_enabled", 0644, d_tracer, @@ -4387,9 +4450,6 @@ __init static int tracer_alloc_buffers(void) if (!alloc_cpumask_var(&tracing_cpumask, GFP_KERNEL)) goto out_free_buffer_mask; - if (!zalloc_cpumask_var(&tracing_reader_cpumask, GFP_KERNEL)) - goto out_free_tracing_cpumask; - /* To save memory, keep the ring buffer size to its minimum */ if (ring_buffer_expanded) ring_buf_size = trace_buf_size; @@ -4447,8 +4507,6 @@ __init static int tracer_alloc_buffers(void) return 0; out_free_cpumask: - free_cpumask_var(tracing_reader_cpumask); -out_free_tracing_cpumask: free_cpumask_var(tracing_cpumask); out_free_buffer_mask: free_cpumask_var(tracing_buffer_mask); -- cgit v1.2.3 From d931369b74b3d6f2044f595af6f3dd074f65d9cf Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 6 Jan 2010 17:27:11 -0500 Subject: tracing: Add stack dump to trace_printk if stacktrace option is set If the ftrace stacktrace option is set, then add the stack dumps to trace_printk. Requested-by: Peter Zijlstra Signed-off-by: Steven Rostedt --- kernel/trace/trace.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'kernel/trace') diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index abdd333a0825..5314c90bbc83 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -1392,8 +1392,10 @@ int trace_vbprintk(unsigned long ip, const char *fmt, va_list args) entry->fmt = fmt; memcpy(entry->buf, trace_buf, sizeof(u32) * len); - if (!filter_check_discard(call, entry, buffer, event)) + if (!filter_check_discard(call, entry, buffer, event)) { ring_buffer_unlock_commit(buffer, event); + ftrace_trace_stack(buffer, flags, 6, pc); + } out_unlock: arch_spin_unlock(&trace_buf_lock); @@ -1466,8 +1468,10 @@ int trace_array_vprintk(struct trace_array *tr, memcpy(&entry->buf, trace_buf, len); entry->buf[len] = '\0'; - if (!filter_check_discard(call, entry, buffer, event)) + if (!filter_check_discard(call, entry, buffer, event)) { ring_buffer_unlock_commit(buffer, event); + ftrace_trace_stack(buffer, irq_flags, 6, pc); + } out_unlock: arch_spin_unlock(&trace_buf_lock); -- cgit v1.2.3 From 24a53652e361321b09df5040711e69387344ce09 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Thu, 14 Jan 2010 08:23:15 +0100 Subject: tracing: Drop the tr check from the graph tracing path Each time we save a function entry from the function graph tracer, we check if the trace array is set, which is wasteful because it is set anyway before we start the tracer. All we need is to ensure we have good read and write orderings. When we set the trace array, we just need to guarantee it to be visible before starting tracing. Signed-off-by: Frederic Weisbecker Acked-by: Steven Rostedt Cc: Lai Jiangshan Cc: Paul E. McKenney LKML-Reference: <1263453795-7496-1-git-send-regression-fweisbec@gmail.com> Signed-off-by: Ingo Molnar --- kernel/trace/trace_functions_graph.c | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) (limited to 'kernel/trace') diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c index b1342c5d37cf..f2252296607c 100644 --- a/kernel/trace/trace_functions_graph.c +++ b/kernel/trace/trace_functions_graph.c @@ -212,9 +212,6 @@ int trace_graph_entry(struct ftrace_graph_ent *trace) int cpu; int pc; - if (unlikely(!tr)) - return 0; - if (!ftrace_trace_task(current)) return 0; @@ -287,11 +284,20 @@ void trace_graph_return(struct ftrace_graph_ret *trace) local_irq_restore(flags); } +void set_graph_array(struct trace_array *tr) +{ + graph_array = tr; + + /* Make graph_array visible before we start tracing */ + + smp_mb(); +} + static int graph_trace_init(struct trace_array *tr) { int ret; - graph_array = tr; + set_graph_array(tr); ret = register_ftrace_graph(&trace_graph_return, &trace_graph_entry); if (ret) @@ -301,11 +307,6 @@ static int graph_trace_init(struct trace_array *tr) return 0; } -void set_graph_array(struct trace_array *tr) -{ - graph_array = tr; -} - static void graph_trace_reset(struct trace_array *tr) { tracing_stop_cmdline_record(); -- cgit v1.2.3 From ea2c68a08fedb5053ba312d661e47df9f4d72411 Mon Sep 17 00:00:00 2001 From: Lai Jiangshan Date: Wed, 13 Jan 2010 19:38:30 +0800 Subject: tracing: Simplify test for function_graph tracing start point In the function graph tracer, a calling function is to be traced only when it is enabled through the set_graph_function file, or when it is nested in an enabled function. Current code uses TSK_TRACE_FL_GRAPH to test whether it is nested or not. Looking at the code, we can get this: (trace->depth > 0) <==> (TSK_TRACE_FL_GRAPH is set) trace->depth is more explicit to tell that it is nested. So we use trace->depth directly and simplify the code. No functionality is changed. TSK_TRACE_FL_GRAPH is not removed yet, it is left for future usage. Signed-off-by: Lai Jiangshan Cc: Ingo Molnar Cc: Steven Rostedt LKML-Reference: <4B4DB0B6.7040607@cn.fujitsu.com> Signed-off-by: Frederic Weisbecker --- kernel/trace/trace.h | 2 +- kernel/trace/trace_functions_graph.c | 8 ++------ 2 files changed, 3 insertions(+), 7 deletions(-) (limited to 'kernel/trace') diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 4df6a77eb196..ce077fbbf552 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -504,7 +504,7 @@ static inline int ftrace_graph_addr(unsigned long addr) { int i; - if (!ftrace_graph_count || test_tsk_trace_graph(current)) + if (!ftrace_graph_count) return 1; for (i = 0; i < ftrace_graph_count; i++) { diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c index f2252296607c..616b135c9eb9 100644 --- a/kernel/trace/trace_functions_graph.c +++ b/kernel/trace/trace_functions_graph.c @@ -215,7 +215,8 @@ int trace_graph_entry(struct ftrace_graph_ent *trace) if (!ftrace_trace_task(current)) return 0; - if (!ftrace_graph_addr(trace->func)) + /* trace it when it is-nested-in or is a function enabled. */ + if (!(trace->depth || ftrace_graph_addr(trace->func))) return 0; local_irq_save(flags); @@ -228,9 +229,6 @@ int trace_graph_entry(struct ftrace_graph_ent *trace) } else { ret = 0; } - /* Only do the atomic if it is not already set */ - if (!test_tsk_trace_graph(current)) - set_tsk_trace_graph(current); atomic_dec(&data->disabled); local_irq_restore(flags); @@ -278,8 +276,6 @@ void trace_graph_return(struct ftrace_graph_ret *trace) pc = preempt_count(); __trace_graph_return(tr, trace, flags, pc); } - if (!trace->depth) - clear_tsk_trace_graph(current); atomic_dec(&data->disabled); local_irq_restore(flags); } -- cgit v1.2.3 From ede55c9d78101fef0d8e620940a5163f14b02f29 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 27 Jan 2010 11:25:54 -0500 Subject: tracing: Add correct/incorrect to sort keys for branch annotation output The branch annotation is a bit difficult to see the worst offenders because it only sorts by percentage: correct incorrect % Function File Line ------- --------- - -------- ---- ---- 0 163 100 qdisc_restart sch_generic.c 179 0 163 100 pfifo_fast_dequeue sch_generic.c 447 0 4 100 pskb_trim_rcsum skbuff.h 1689 0 4 100 llc_rcv llc_input.c 170 0 18 100 psmouse_interrupt psmouse-base.c 304 0 3 100 atkbd_interrupt atkbd.c 389 0 5 100 usb_alloc_dev usb.c 437 0 11 100 vsscanf vsprintf.c 1897 0 2 100 IS_ERR err.h 34 0 23 100 __rmqueue_fallback page_alloc.c 865 0 4 100 probe_wakeup_sched_switch trace_sched_wakeup.c 142 0 3 100 move_masked_irq migration.c 11 Adding the incorrect and correct values as sort keys makes this file a bit more informative: correct incorrect % Function File Line ------- --------- - -------- ---- ---- 0 366541 100 audit_syscall_entry auditsc.c 1637 0 366538 100 audit_syscall_exit auditsc.c 1685 0 115839 100 sched_info_switch sched_stats.h 269 0 74567 100 sched_info_queued sched_stats.h 222 0 66578 100 sched_info_dequeued sched_stats.h 177 0 15113 100 trace_workqueue_insertion workqueue.h 38 0 15107 100 trace_workqueue_execution workqueue.h 45 0 3622 100 syscall_trace_leave ptrace.c 1772 0 2750 100 sched_move_task sched.c 10100 0 2750 100 sched_move_task sched.c 10110 0 1815 100 pre_schedule_rt sched_rt.c 1462 0 837 100 audit_alloc auditsc.c 879 0 814 100 tcp_mss_split_point tcp_output.c 1302 Signed-off-by: Steven Rostedt --- kernel/trace/trace_branch.c | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) (limited to 'kernel/trace') diff --git a/kernel/trace/trace_branch.c b/kernel/trace/trace_branch.c index 4a194f08f88c..b9bc4d470177 100644 --- a/kernel/trace/trace_branch.c +++ b/kernel/trace/trace_branch.c @@ -307,8 +307,23 @@ static int annotated_branch_stat_cmp(void *p1, void *p2) return -1; if (percent_a > percent_b) return 1; - else - return 0; + + if (a->incorrect < b->incorrect) + return -1; + if (a->incorrect > b->incorrect) + return 1; + + /* + * Since the above shows worse (incorrect) cases + * first, we continue that by showing best (correct) + * cases last. + */ + if (a->correct > b->correct) + return -1; + if (a->correct < b->correct) + return 1; + + return 0; } static struct tracer_stat annotated_branch_stats = { -- cgit v1.2.3 From c7c6b1fe9f942c1a30585ec2210a09dfff238506 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Wed, 10 Feb 2010 15:43:04 +0800 Subject: ftrace: Allow to remove a single function from function graph filter I don't see why we can only clear all functions from the filter. After patching: # echo sys_open > set_graph_function # echo sys_close >> set_graph_function # cat set_graph_function sys_open sys_close # echo '!sys_close' >> set_graph_function # cat set_graph_function sys_open Signed-off-by: Li Zefan LKML-Reference: <4B726388.2000408@cn.fujitsu.com> Signed-off-by: Steven Rostedt --- kernel/trace/ftrace.c | 51 +++++++++++++++++++++++++++++++-------------------- kernel/trace/trace.h | 3 ++- 2 files changed, 33 insertions(+), 21 deletions(-) (limited to 'kernel/trace') diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 7968762c8167..43bec993c864 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -2426,6 +2426,7 @@ static const struct file_operations ftrace_notrace_fops = { static DEFINE_MUTEX(graph_lock); int ftrace_graph_count; +int ftrace_graph_filter_enabled; unsigned long ftrace_graph_funcs[FTRACE_GRAPH_MAX_FUNCS] __read_mostly; static void * @@ -2448,7 +2449,7 @@ static void *g_start(struct seq_file *m, loff_t *pos) mutex_lock(&graph_lock); /* Nothing, tell g_show to print all functions are enabled */ - if (!ftrace_graph_count && !*pos) + if (!ftrace_graph_filter_enabled && !*pos) return (void *)1; return __g_next(m, pos); @@ -2494,6 +2495,7 @@ ftrace_graph_open(struct inode *inode, struct file *file) mutex_lock(&graph_lock); if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) { + ftrace_graph_filter_enabled = 0; ftrace_graph_count = 0; memset(ftrace_graph_funcs, 0, sizeof(ftrace_graph_funcs)); } @@ -2519,7 +2521,7 @@ ftrace_set_func(unsigned long *array, int *idx, char *buffer) struct dyn_ftrace *rec; struct ftrace_page *pg; int search_len; - int found = 0; + int fail = 1; int type, not; char *search; bool exists; @@ -2530,37 +2532,51 @@ ftrace_set_func(unsigned long *array, int *idx, char *buffer) /* decode regex */ type = filter_parse_regex(buffer, strlen(buffer), &search, ¬); - if (not) - return -EINVAL; + if (!not && *idx >= FTRACE_GRAPH_MAX_FUNCS) + return -EBUSY; search_len = strlen(search); mutex_lock(&ftrace_lock); do_for_each_ftrace_rec(pg, rec) { - if (*idx >= FTRACE_GRAPH_MAX_FUNCS) - break; - if (rec->flags & (FTRACE_FL_FAILED | FTRACE_FL_FREE)) continue; if (ftrace_match_record(rec, search, search_len, type)) { - /* ensure it is not already in the array */ + /* if it is in the array */ exists = false; - for (i = 0; i < *idx; i++) + for (i = 0; i < *idx; i++) { if (array[i] == rec->ip) { exists = true; break; } - if (!exists) - array[(*idx)++] = rec->ip; - found = 1; + } + + if (!not) { + fail = 0; + if (!exists) { + array[(*idx)++] = rec->ip; + if (*idx >= FTRACE_GRAPH_MAX_FUNCS) + goto out; + } + } else { + if (exists) { + array[i] = array[--(*idx)]; + array[*idx] = 0; + fail = 0; + } + } } } while_for_each_ftrace_rec(); - +out: mutex_unlock(&ftrace_lock); - return found ? 0 : -EINVAL; + if (fail) + return -EINVAL; + + ftrace_graph_filter_enabled = 1; + return 0; } static ssize_t @@ -2570,16 +2586,11 @@ ftrace_graph_write(struct file *file, const char __user *ubuf, struct trace_parser parser; ssize_t read, ret; - if (!cnt || cnt < 0) + if (!cnt) return 0; mutex_lock(&graph_lock); - if (ftrace_graph_count >= FTRACE_GRAPH_MAX_FUNCS) { - ret = -EBUSY; - goto out_unlock; - } - if (trace_parser_get_init(&parser, FTRACE_BUFF_MAX)) { ret = -ENOMEM; goto out_unlock; diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index ce077fbbf552..b477fce41edf 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -497,6 +497,7 @@ trace_print_graph_duration(unsigned long long duration, struct trace_seq *s); #ifdef CONFIG_DYNAMIC_FTRACE /* TODO: make this variable */ #define FTRACE_GRAPH_MAX_FUNCS 32 +extern int ftrace_graph_filter_enabled; extern int ftrace_graph_count; extern unsigned long ftrace_graph_funcs[FTRACE_GRAPH_MAX_FUNCS]; @@ -504,7 +505,7 @@ static inline int ftrace_graph_addr(unsigned long addr) { int i; - if (!ftrace_graph_count) + if (!ftrace_graph_filter_enabled) return 1; for (i = 0; i < ftrace_graph_count; i++) { -- cgit v1.2.3 From e7b8e675d9c71b868b66f62f725a948047514719 Mon Sep 17 00:00:00 2001 From: Mike Frysinger Date: Tue, 26 Jan 2010 04:40:03 -0500 Subject: tracing: Unify arch_syscall_addr() implementations Most implementations of arch_syscall_addr() are the same, so create a default version in common code and move the one piece that differs (the syscall table) to asm/syscall.h. New arch ports don't have to waste time copying & pasting this simple function. The s390/sparc versions need to be different, so document why. Signed-off-by: Mike Frysinger Acked-by: David S. Miller Acked-by: Paul Mundt Acked-by: Heiko Carstens Cc: Steven Rostedt LKML-Reference: <1264498803-17278-1-git-send-email-vapier@gentoo.org> Signed-off-by: Frederic Weisbecker --- Documentation/trace/ftrace-design.txt | 5 ++--- arch/s390/include/asm/syscall.h | 7 +++++++ arch/s390/kernel/ftrace.c | 10 ---------- arch/sh/include/asm/syscall.h | 2 ++ arch/sh/kernel/ftrace.c | 9 --------- arch/sparc/include/asm/syscall.h | 7 +++++++ arch/sparc/kernel/ftrace.c | 11 ----------- arch/x86/include/asm/syscall.h | 2 ++ arch/x86/kernel/ftrace.c | 10 ---------- include/linux/ftrace.h | 6 ++++++ kernel/trace/trace_syscalls.c | 5 +++++ 11 files changed, 31 insertions(+), 43 deletions(-) (limited to 'kernel/trace') diff --git a/Documentation/trace/ftrace-design.txt b/Documentation/trace/ftrace-design.txt index 239f14b2b55a..99df1101d2a5 100644 --- a/Documentation/trace/ftrace-design.txt +++ b/Documentation/trace/ftrace-design.txt @@ -218,11 +218,10 @@ HAVE_SYSCALL_TRACEPOINTS You need very few things to get the syscalls tracing in an arch. +- Support HAVE_ARCH_TRACEHOOK (see arch/Kconfig). - Have a NR_syscalls variable in that provides the number of syscalls supported by the arch. -- Implement arch_syscall_addr() that resolves a syscall address from a - syscall number. -- Support the TIF_SYSCALL_TRACEPOINT thread flags +- Support the TIF_SYSCALL_TRACEPOINT thread flags. - Put the trace_sys_enter() and trace_sys_exit() tracepoints calls from ptrace in the ptrace syscalls tracing path. - Tag this arch as HAVE_SYSCALL_TRACEPOINTS. diff --git a/arch/s390/include/asm/syscall.h b/arch/s390/include/asm/syscall.h index e0a73d3eb837..8429686951f9 100644 --- a/arch/s390/include/asm/syscall.h +++ b/arch/s390/include/asm/syscall.h @@ -15,6 +15,13 @@ #include #include +/* + * The syscall table always contains 32 bit pointers since we know that the + * address of the function to be called is (way) below 4GB. So the "int" + * type here is what we want [need] for both 32 bit and 64 bit systems. + */ +extern const unsigned int sys_call_table[]; + static inline long syscall_get_nr(struct task_struct *task, struct pt_regs *regs) { diff --git a/arch/s390/kernel/ftrace.c b/arch/s390/kernel/ftrace.c index 5a82bc68193e..9e69449e77ad 100644 --- a/arch/s390/kernel/ftrace.c +++ b/arch/s390/kernel/ftrace.c @@ -200,13 +200,3 @@ out: return parent; } #endif /* CONFIG_FUNCTION_GRAPH_TRACER */ - -#ifdef CONFIG_FTRACE_SYSCALLS - -extern unsigned int sys_call_table[]; - -unsigned long __init arch_syscall_addr(int nr) -{ - return (unsigned long)sys_call_table[nr]; -} -#endif diff --git a/arch/sh/include/asm/syscall.h b/arch/sh/include/asm/syscall.h index 6a381429ee9d..aa7777bdc370 100644 --- a/arch/sh/include/asm/syscall.h +++ b/arch/sh/include/asm/syscall.h @@ -1,6 +1,8 @@ #ifndef __ASM_SH_SYSCALL_H #define __ASM_SH_SYSCALL_H +extern const unsigned long sys_call_table[]; + #ifdef CONFIG_SUPERH32 # include "syscall_32.h" #else diff --git a/arch/sh/kernel/ftrace.c b/arch/sh/kernel/ftrace.c index a48cdedc73b5..30e13196d35b 100644 --- a/arch/sh/kernel/ftrace.c +++ b/arch/sh/kernel/ftrace.c @@ -399,12 +399,3 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr) } } #endif /* CONFIG_FUNCTION_GRAPH_TRACER */ - -#ifdef CONFIG_FTRACE_SYSCALLS -extern unsigned long *sys_call_table; - -unsigned long __init arch_syscall_addr(int nr) -{ - return (unsigned long)sys_call_table[nr]; -} -#endif /* CONFIG_FTRACE_SYSCALLS */ diff --git a/arch/sparc/include/asm/syscall.h b/arch/sparc/include/asm/syscall.h index 7486c605e23c..025a02ad2e31 100644 --- a/arch/sparc/include/asm/syscall.h +++ b/arch/sparc/include/asm/syscall.h @@ -5,6 +5,13 @@ #include #include +/* + * The syscall table always contains 32 bit pointers since we know that the + * address of the function to be called is (way) below 4GB. So the "int" + * type here is what we want [need] for both 32 bit and 64 bit systems. + */ +extern const unsigned int sys_call_table[]; + /* The system call number is given by the user in %g1 */ static inline long syscall_get_nr(struct task_struct *task, struct pt_regs *regs) diff --git a/arch/sparc/kernel/ftrace.c b/arch/sparc/kernel/ftrace.c index 29973daa9930..9103a56b39e8 100644 --- a/arch/sparc/kernel/ftrace.c +++ b/arch/sparc/kernel/ftrace.c @@ -91,14 +91,3 @@ int __init ftrace_dyn_arch_init(void *data) return 0; } #endif - -#ifdef CONFIG_FTRACE_SYSCALLS - -extern unsigned int sys_call_table[]; - -unsigned long __init arch_syscall_addr(int nr) -{ - return (unsigned long)sys_call_table[nr]; -} - -#endif diff --git a/arch/x86/include/asm/syscall.h b/arch/x86/include/asm/syscall.h index 8d33bc5462d1..c4a348f7bd43 100644 --- a/arch/x86/include/asm/syscall.h +++ b/arch/x86/include/asm/syscall.h @@ -16,6 +16,8 @@ #include #include +extern const unsigned long sys_call_table[]; + /* * Only the low 32 bits of orig_ax are meaningful, so we return int. * This importantly ignores the high bits on 64-bit, so comparisons diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index 309689245431..0d93a941934c 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -484,13 +484,3 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr, } } #endif /* CONFIG_FUNCTION_GRAPH_TRACER */ - -#ifdef CONFIG_FTRACE_SYSCALLS - -extern unsigned long *sys_call_table; - -unsigned long __init arch_syscall_addr(int nr) -{ - return (unsigned long)(&sys_call_table)[nr]; -} -#endif diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 0b4f97d24d7f..1cbb36f2759c 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -511,4 +511,10 @@ static inline void trace_hw_branch_oops(void) {} #endif /* CONFIG_HW_BRANCH_TRACER */ +#ifdef CONFIG_FTRACE_SYSCALLS + +unsigned long arch_syscall_addr(int nr); + +#endif /* CONFIG_FTRACE_SYSCALLS */ + #endif /* _LINUX_FTRACE_H */ diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c index 49cea70fbf6d..ecf00782b46c 100644 --- a/kernel/trace/trace_syscalls.c +++ b/kernel/trace/trace_syscalls.c @@ -394,6 +394,11 @@ int init_syscall_trace(struct ftrace_event_call *call) return id; } +unsigned long __init arch_syscall_addr(int nr) +{ + return (unsigned long)sys_call_table[nr]; +} + int __init init_ftrace_syscalls(void) { struct syscall_metadata *meta; -- cgit v1.2.3 From f850c30c8b426ba1688cb63b1a3e534eed03a138 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 10 Feb 2010 17:25:17 +0100 Subject: tracing/kprobes: Make Kconfig dependencies generic KPROBES_EVENT actually depends on the regs and stack access API (b1cf540f) and not on x86. So introduce a new config option which architectures can select if they have the API implemented and switch x86. Signed-off-by: Heiko Carstens Acked-by: Masami Hiramatsu Cc: Ingo Molnar Cc: Martin Schwidefsky LKML-Reference: <20100210162517.GB6933@osiris.boeblingen.de.ibm.com> Signed-off-by: Frederic Weisbecker --- arch/Kconfig | 3 +++ arch/x86/Kconfig | 1 + kernel/trace/Kconfig | 2 +- 3 files changed, 5 insertions(+), 1 deletion(-) (limited to 'kernel/trace') diff --git a/arch/Kconfig b/arch/Kconfig index 9d055b4f0585..04e3aa77da25 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -121,6 +121,9 @@ config HAVE_DMA_ATTRS config USE_GENERIC_SMP_HELPERS bool +config HAVE_REGS_AND_STACK_ACCESS_API + bool + config HAVE_CLK bool help diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 55298e891571..07baa12929b4 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -45,6 +45,7 @@ config X86 select HAVE_GENERIC_DMA_COHERENT if X86_32 select HAVE_EFFICIENT_UNALIGNED_ACCESS select USER_STACKTRACE_SUPPORT + select HAVE_REGS_AND_STACK_ACCESS_API select HAVE_DMA_API_DEBUG select HAVE_KERNEL_GZIP select HAVE_KERNEL_BZIP2 diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index 6c22d8a2f289..40fef552f012 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -451,7 +451,7 @@ config BLK_DEV_IO_TRACE config KPROBE_EVENT depends on KPROBES - depends on X86 + depends on HAVE_REGS_AND_STACK_ACCESS_API bool "Enable kprobes-based dynamic events" select TRACING default y -- cgit v1.2.3 From 86c38a31aa7f2dd6e74a262710bf8ebf7455acc5 Mon Sep 17 00:00:00 2001 From: Jeff Mahoney Date: Wed, 24 Feb 2010 13:59:23 -0500 Subject: tracing: Fix ftrace_event_call alignment for use with gcc 4.5 GCC 4.5 introduces behavior that forces the alignment of structures to use the largest possible value. The default value is 32 bytes, so if some structures are defined with a 4-byte alignment and others aren't declared with an alignment constraint at all - it will align at 32-bytes. For things like the ftrace events, this results in a non-standard array. When initializing the ftrace subsystem, we traverse the _ftrace_events section and call the initialization callback for each event. When the structures are misaligned, we could be treating another part of the structure (or the zeroed out space between them) as a function pointer. This patch forces the alignment for all the ftrace_event_call structures to 4 bytes. Without this patch, the kernel fails to boot very early when built with gcc 4.5. It's trivial to check the alignment of the members of the array, so it might be worthwhile to add something to the build system to do that automatically. Unfortunately, that only covers this case. I've asked one of the gcc developers about adding a warning when this condition is seen. Cc: stable@kernel.org Signed-off-by: Jeff Mahoney LKML-Reference: <4B85770B.6010901@suse.com> Signed-off-by: Steven Rostedt --- include/linux/syscalls.h | 6 ++++-- include/trace/ftrace.h | 3 ++- kernel/trace/trace.h | 3 ++- 3 files changed, 8 insertions(+), 4 deletions(-) (limited to 'kernel/trace') diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 7b219696ad24..91bd7d78a07d 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -132,7 +132,8 @@ struct perf_event_attr; #define SYSCALL_TRACE_ENTER_EVENT(sname) \ static const struct syscall_metadata __syscall_meta_##sname; \ - static struct ftrace_event_call event_enter_##sname; \ + static struct ftrace_event_call \ + __attribute__((__aligned__(4))) event_enter_##sname; \ static struct trace_event enter_syscall_print_##sname = { \ .trace = print_syscall_enter, \ }; \ @@ -153,7 +154,8 @@ struct perf_event_attr; #define SYSCALL_TRACE_EXIT_EVENT(sname) \ static const struct syscall_metadata __syscall_meta_##sname; \ - static struct ftrace_event_call event_exit_##sname; \ + static struct ftrace_event_call \ + __attribute__((__aligned__(4))) event_exit_##sname; \ static struct trace_event exit_syscall_print_##sname = { \ .trace = print_syscall_exit, \ }; \ diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h index 09fd9afc0859..f23a0ca6910a 100644 --- a/include/trace/ftrace.h +++ b/include/trace/ftrace.h @@ -65,7 +65,8 @@ }; #undef DEFINE_EVENT #define DEFINE_EVENT(template, name, proto, args) \ - static struct ftrace_event_call event_##name + static struct ftrace_event_call \ + __attribute__((__aligned__(4))) event_##name #undef DEFINE_EVENT_PRINT #define DEFINE_EVENT_PRINT(template, name, proto, args, print) \ diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index b477fce41edf..fd05bcaf91b0 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -792,7 +792,8 @@ extern const char *__stop___trace_bprintk_fmt[]; #undef FTRACE_ENTRY #define FTRACE_ENTRY(call, struct_name, id, tstruct, print) \ - extern struct ftrace_event_call event_##call; + extern struct ftrace_event_call \ + __attribute__((__aligned__(4))) event_##call; #undef FTRACE_ENTRY_DUP #define FTRACE_ENTRY_DUP(call, struct_name, id, tstruct, print) \ FTRACE_ENTRY(call, struct_name, id, PARAMS(tstruct), PARAMS(print)) -- cgit v1.2.3 From 1ab83a89411556c4d5914dcf0d5da190178ae7db Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Wed, 24 Feb 2010 15:28:14 +0800 Subject: tracing: Remove CONFIG_TRACE_POWER from kernel config The power tracer has been converted to power trace events. Acked-by: Frederic Weisbecker Signed-off-by: Li Zefan LKML-Reference: <4B84D50E.4070806@cn.fujitsu.com> Signed-off-by: Steven Rostedt --- kernel/trace/Kconfig | 9 --------- 1 file changed, 9 deletions(-) (limited to 'kernel/trace') diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index 6c22d8a2f289..ca2d3a8778b6 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -330,15 +330,6 @@ config BRANCH_TRACER Say N if unsure. -config POWER_TRACER - bool "Trace power consumption behavior" - depends on X86 - select GENERIC_TRACER - help - This tracer helps developers to analyze and optimize the kernel's - power management decisions, specifically the C-state and P-state - behavior. - config KSYM_TRACER bool "Trace read and write access on kernel memory locations" depends on HAVE_HW_BREAKPOINT -- cgit v1.2.3 From 6574658b3bc7c408581629de5efb809f125cce8c Mon Sep 17 00:00:00 2001 From: Wenji Huang Date: Wed, 24 Feb 2010 15:40:22 +0800 Subject: tracing: Fix typo in prof_sysexit_enable() Signed-off-by: Wenji Huang LKML-Reference: <1266997226-6833-1-git-send-email-wenji.huang@oracle.com> Signed-off-by: Steven Rostedt --- kernel/trace/trace_syscalls.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/trace') diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c index 49cea70fbf6d..8cdda95da81a 100644 --- a/kernel/trace/trace_syscalls.c +++ b/kernel/trace/trace_syscalls.c @@ -603,7 +603,7 @@ int prof_sysexit_enable(struct ftrace_event_call *call) ret = register_trace_sys_exit(prof_syscall_exit); if (ret) { pr_info("event trace: Could not activate" - "syscall entry trace point"); + "syscall exit trace point"); } else { set_bit(num, enabled_prof_exit_syscalls); sys_prof_refcount_exit++; -- cgit v1.2.3 From a5efd925115cbc1f90195dca9a25f7b8daa10c37 Mon Sep 17 00:00:00 2001 From: Wenji Huang Date: Wed, 24 Feb 2010 15:40:23 +0800 Subject: tracing: Fix typo of info text in trace_kprobe.c Signed-off-by: Wenji Huang LKML-Reference: <1266997226-6833-2-git-send-email-wenji.huang@oracle.com> Signed-off-by: Steven Rostedt --- kernel/trace/trace_kprobe.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'kernel/trace') diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index c99029916c76..8d4bd16d6f70 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -651,12 +651,12 @@ static int create_trace_probe(int argc, char **argv) event = strchr(group, '/') + 1; event[-1] = '\0'; if (strlen(group) == 0) { - pr_info("Group name is not specifiled\n"); + pr_info("Group name is not specified\n"); return -EINVAL; } } if (strlen(event) == 0) { - pr_info("Event name is not specifiled\n"); + pr_info("Event name is not specified\n"); return -EINVAL; } } -- cgit v1.2.3 From c85f3a91f84d5a85f179c2504bb7a39370c82b41 Mon Sep 17 00:00:00 2001 From: Wenji Huang Date: Wed, 24 Feb 2010 15:40:24 +0800 Subject: tracing: Remove unnecessary variable in print_graph_return The "cpu" variable is declared at the start of the function and also within a branch, with the exact same initialization. Remove the local variable of the same name in the branch. Signed-off-by: Wenji Huang LKML-Reference: <1266997226-6833-3-git-send-email-wenji.huang@oracle.com> Signed-off-by: Steven Rostedt --- kernel/trace/trace_functions_graph.c | 1 - 1 file changed, 1 deletion(-) (limited to 'kernel/trace') diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c index 616b135c9eb9..112561df2a0a 100644 --- a/kernel/trace/trace_functions_graph.c +++ b/kernel/trace/trace_functions_graph.c @@ -855,7 +855,6 @@ print_graph_return(struct ftrace_graph_ret *trace, struct trace_seq *s, int i; if (data) { - int cpu = iter->cpu; int *depth = &(per_cpu_ptr(data->cpu_data, cpu)->depth); /* -- cgit v1.2.3 From 7b60997f73865b019e595720185c85285ca3df9a Mon Sep 17 00:00:00 2001 From: Wenji Huang Date: Wed, 24 Feb 2010 15:40:26 +0800 Subject: tracing: Simplify memory recycle of trace_define_field Discard freeing field->type since it is not necessary. Reviewed-by: Li Zefan Signed-off-by: Wenji Huang LKML-Reference: <1266997226-6833-5-git-send-email-wenji.huang@oracle.com> Signed-off-by: Steven Rostedt --- kernel/trace/trace_events.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'kernel/trace') diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index c2a3077b7353..3f972ad98d04 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -60,10 +60,8 @@ int trace_define_field(struct ftrace_event_call *call, const char *type, return 0; err: - if (field) { + if (field) kfree(field->name); - kfree(field->type); - } kfree(field); return -ENOMEM; -- cgit v1.2.3 From f1c7f517a5dc23bce07efa5ed55e2c074ed9d4ba Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Fri, 26 Feb 2010 17:08:16 -0500 Subject: ftrace: Add function names to dangling } in function graph tracer The function graph tracer is currently the most invasive tracer in the ftrace family. It can easily overflow the buffer even with 10megs per CPU. This means that events can often be lost. On start up, or after events are lost, if the function return is recorded but the function enter was lost, all we get to see is the exiting '}'. Here is how a typical trace output starts: [tracing] cat trace # tracer: function_graph # # CPU DURATION FUNCTION CALLS # | | | | | | | 0) + 91.897 us | } 0) ! 567.961 us | } 0) <========== | 0) ! 579.083 us | _raw_spin_lock_irqsave(); 0) 4.694 us | _raw_spin_unlock_irqrestore(); 0) ! 594.862 us | } 0) ! 603.361 us | } 0) ! 613.574 us | } 0) ! 623.554 us | } 0) 3.653 us | fget_light(); 0) | sock_poll() { There are a series of '}' with no matching "func() {". There's no information to what functions these ending brackets belong to. This patch adds a stack on the per cpu structure used in outputting the function graph tracer to keep track of what function was outputted. Then on a function exit event, it checks the depth to see if the function exit has a matching entry event. If it does, then it only prints the '}', otherwise it adds the function name after the '}'. This allows function exit events to show what function they belong to at trace output startup, when the entry was lost due to ring buffer overflow, or even after a new task is scheduled in. Here is what the above trace will look like after this patch: [tracing] cat trace # tracer: function_graph # # CPU DURATION FUNCTION CALLS # | | | | | | | 0) + 91.897 us | } (irq_exit) 0) ! 567.961 us | } (smp_apic_timer_interrupt) 0) <========== | 0) ! 579.083 us | _raw_spin_lock_irqsave(); 0) 4.694 us | _raw_spin_unlock_irqrestore(); 0) ! 594.862 us | } (add_wait_queue) 0) ! 603.361 us | } (__pollwait) 0) ! 613.574 us | } (tcp_poll) 0) ! 623.554 us | } (sock_poll) 0) 3.653 us | fget_light(); 0) | sock_poll() { Signed-off-by: Steven Rostedt --- kernel/trace/trace_functions_graph.c | 52 +++++++++++++++++++++++++++++------- 1 file changed, 43 insertions(+), 9 deletions(-) (limited to 'kernel/trace') diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c index 112561df2a0a..e998a824e9db 100644 --- a/kernel/trace/trace_functions_graph.c +++ b/kernel/trace/trace_functions_graph.c @@ -18,6 +18,7 @@ struct fgraph_cpu_data { pid_t last_pid; int depth; int ignore; + unsigned long enter_funcs[FTRACE_RETFUNC_DEPTH]; }; struct fgraph_data { @@ -670,15 +671,21 @@ print_graph_entry_leaf(struct trace_iterator *iter, duration = graph_ret->rettime - graph_ret->calltime; if (data) { + struct fgraph_cpu_data *cpu_data; int cpu = iter->cpu; - int *depth = &(per_cpu_ptr(data->cpu_data, cpu)->depth); + + cpu_data = per_cpu_ptr(data->cpu_data, cpu); /* * Comments display at + 1 to depth. Since * this is a leaf function, keep the comments * equal to this depth. */ - *depth = call->depth - 1; + cpu_data->depth = call->depth - 1; + + /* No need to keep this function around for this depth */ + if (call->depth < FTRACE_RETFUNC_DEPTH) + cpu_data->enter_funcs[call->depth] = 0; } /* Overhead */ @@ -718,10 +725,15 @@ print_graph_entry_nested(struct trace_iterator *iter, int i; if (data) { + struct fgraph_cpu_data *cpu_data; int cpu = iter->cpu; - int *depth = &(per_cpu_ptr(data->cpu_data, cpu)->depth); - *depth = call->depth; + cpu_data = per_cpu_ptr(data->cpu_data, cpu); + cpu_data->depth = call->depth; + + /* Save this function pointer to see if the exit matches */ + if (call->depth < FTRACE_RETFUNC_DEPTH) + cpu_data->enter_funcs[call->depth] = call->func; } /* No overhead */ @@ -851,18 +863,28 @@ print_graph_return(struct ftrace_graph_ret *trace, struct trace_seq *s, struct fgraph_data *data = iter->private; pid_t pid = ent->pid; int cpu = iter->cpu; + int func_match = 1; int ret; int i; if (data) { - int *depth = &(per_cpu_ptr(data->cpu_data, cpu)->depth); + struct fgraph_cpu_data *cpu_data; + int cpu = iter->cpu; + + cpu_data = per_cpu_ptr(data->cpu_data, cpu); /* * Comments display at + 1 to depth. This is the * return from a function, we now want the comments * to display at the same level of the bracket. */ - *depth = trace->depth - 1; + cpu_data->depth = trace->depth - 1; + + if (trace->depth < FTRACE_RETFUNC_DEPTH) { + if (cpu_data->enter_funcs[trace->depth] != trace->func) + func_match = 0; + cpu_data->enter_funcs[trace->depth] = 0; + } } if (print_graph_prologue(iter, s, 0, 0)) @@ -887,9 +909,21 @@ print_graph_return(struct ftrace_graph_ret *trace, struct trace_seq *s, return TRACE_TYPE_PARTIAL_LINE; } - ret = trace_seq_printf(s, "}\n"); - if (!ret) - return TRACE_TYPE_PARTIAL_LINE; + /* + * If the return function does not have a matching entry, + * then the entry was lost. Instead of just printing + * the '}' and letting the user guess what function this + * belongs to, write out the function name. + */ + if (func_match) { + ret = trace_seq_printf(s, "}\n"); + if (!ret) + return TRACE_TYPE_PARTIAL_LINE; + } else { + ret = trace_seq_printf(s, "} (%ps)\n", (void *)trace->func); + if (!ret) + return TRACE_TYPE_PARTIAL_LINE; + } /* Overrun */ if (tracer_flags.val & TRACE_GRAPH_PRINT_OVERRUN) { -- cgit v1.2.3 From 9a8c28c8311e30ba97499447d5a11662f5aea094 Mon Sep 17 00:00:00 2001 From: Dmitry Monakhov Date: Sat, 27 Feb 2010 20:35:13 +0300 Subject: blktrace: perform cleanup after setup error Currently even if BLKTRACESETUP ioctl has failed user must call BLKTRACETEARDOWN to be shure what all staff was cleaned, which is contr-intuitive. Let's setup ioctl make necessery cleanup by it self. Signed-off-by: Dmitry Monakhov Signed-off-by: Jens Axboe --- kernel/trace/blktrace.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'kernel/trace') diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c index d9d6206e0b14..07f945a99430 100644 --- a/kernel/trace/blktrace.c +++ b/kernel/trace/blktrace.c @@ -540,9 +540,10 @@ int blk_trace_setup(struct request_queue *q, char *name, dev_t dev, if (ret) return ret; - if (copy_to_user(arg, &buts, sizeof(buts))) + if (copy_to_user(arg, &buts, sizeof(buts))) { + blk_trace_remove(q); return -EFAULT; - + } return 0; } EXPORT_SYMBOL_GPL(blk_trace_setup); -- cgit v1.2.3