From e7b8e675d9c71b868b66f62f725a948047514719 Mon Sep 17 00:00:00 2001 From: Mike Frysinger Date: Tue, 26 Jan 2010 04:40:03 -0500 Subject: tracing: Unify arch_syscall_addr() implementations Most implementations of arch_syscall_addr() are the same, so create a default version in common code and move the one piece that differs (the syscall table) to asm/syscall.h. New arch ports don't have to waste time copying & pasting this simple function. The s390/sparc versions need to be different, so document why. Signed-off-by: Mike Frysinger Acked-by: David S. Miller Acked-by: Paul Mundt Acked-by: Heiko Carstens Cc: Steven Rostedt LKML-Reference: <1264498803-17278-1-git-send-email-vapier@gentoo.org> Signed-off-by: Frederic Weisbecker --- arch/s390/include/asm/syscall.h | 7 +++++++ arch/s390/kernel/ftrace.c | 10 ---------- arch/sh/include/asm/syscall.h | 2 ++ arch/sh/kernel/ftrace.c | 9 --------- arch/sparc/include/asm/syscall.h | 7 +++++++ arch/sparc/kernel/ftrace.c | 11 ----------- arch/x86/include/asm/syscall.h | 2 ++ arch/x86/kernel/ftrace.c | 10 ---------- 8 files changed, 18 insertions(+), 40 deletions(-) (limited to 'arch') diff --git a/arch/s390/include/asm/syscall.h b/arch/s390/include/asm/syscall.h index e0a73d3eb837..8429686951f9 100644 --- a/arch/s390/include/asm/syscall.h +++ b/arch/s390/include/asm/syscall.h @@ -15,6 +15,13 @@ #include #include +/* + * The syscall table always contains 32 bit pointers since we know that the + * address of the function to be called is (way) below 4GB. So the "int" + * type here is what we want [need] for both 32 bit and 64 bit systems. + */ +extern const unsigned int sys_call_table[]; + static inline long syscall_get_nr(struct task_struct *task, struct pt_regs *regs) { diff --git a/arch/s390/kernel/ftrace.c b/arch/s390/kernel/ftrace.c index 5a82bc68193e..9e69449e77ad 100644 --- a/arch/s390/kernel/ftrace.c +++ b/arch/s390/kernel/ftrace.c @@ -200,13 +200,3 @@ out: return parent; } #endif /* CONFIG_FUNCTION_GRAPH_TRACER */ - -#ifdef CONFIG_FTRACE_SYSCALLS - -extern unsigned int sys_call_table[]; - -unsigned long __init arch_syscall_addr(int nr) -{ - return (unsigned long)sys_call_table[nr]; -} -#endif diff --git a/arch/sh/include/asm/syscall.h b/arch/sh/include/asm/syscall.h index 6a381429ee9d..aa7777bdc370 100644 --- a/arch/sh/include/asm/syscall.h +++ b/arch/sh/include/asm/syscall.h @@ -1,6 +1,8 @@ #ifndef __ASM_SH_SYSCALL_H #define __ASM_SH_SYSCALL_H +extern const unsigned long sys_call_table[]; + #ifdef CONFIG_SUPERH32 # include "syscall_32.h" #else diff --git a/arch/sh/kernel/ftrace.c b/arch/sh/kernel/ftrace.c index a48cdedc73b5..30e13196d35b 100644 --- a/arch/sh/kernel/ftrace.c +++ b/arch/sh/kernel/ftrace.c @@ -399,12 +399,3 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr) } } #endif /* CONFIG_FUNCTION_GRAPH_TRACER */ - -#ifdef CONFIG_FTRACE_SYSCALLS -extern unsigned long *sys_call_table; - -unsigned long __init arch_syscall_addr(int nr) -{ - return (unsigned long)sys_call_table[nr]; -} -#endif /* CONFIG_FTRACE_SYSCALLS */ diff --git a/arch/sparc/include/asm/syscall.h b/arch/sparc/include/asm/syscall.h index 7486c605e23c..025a02ad2e31 100644 --- a/arch/sparc/include/asm/syscall.h +++ b/arch/sparc/include/asm/syscall.h @@ -5,6 +5,13 @@ #include #include +/* + * The syscall table always contains 32 bit pointers since we know that the + * address of the function to be called is (way) below 4GB. So the "int" + * type here is what we want [need] for both 32 bit and 64 bit systems. + */ +extern const unsigned int sys_call_table[]; + /* The system call number is given by the user in %g1 */ static inline long syscall_get_nr(struct task_struct *task, struct pt_regs *regs) diff --git a/arch/sparc/kernel/ftrace.c b/arch/sparc/kernel/ftrace.c index 29973daa9930..9103a56b39e8 100644 --- a/arch/sparc/kernel/ftrace.c +++ b/arch/sparc/kernel/ftrace.c @@ -91,14 +91,3 @@ int __init ftrace_dyn_arch_init(void *data) return 0; } #endif - -#ifdef CONFIG_FTRACE_SYSCALLS - -extern unsigned int sys_call_table[]; - -unsigned long __init arch_syscall_addr(int nr) -{ - return (unsigned long)sys_call_table[nr]; -} - -#endif diff --git a/arch/x86/include/asm/syscall.h b/arch/x86/include/asm/syscall.h index 8d33bc5462d1..c4a348f7bd43 100644 --- a/arch/x86/include/asm/syscall.h +++ b/arch/x86/include/asm/syscall.h @@ -16,6 +16,8 @@ #include #include +extern const unsigned long sys_call_table[]; + /* * Only the low 32 bits of orig_ax are meaningful, so we return int. * This importantly ignores the high bits on 64-bit, so comparisons diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index 309689245431..0d93a941934c 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -484,13 +484,3 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr, } } #endif /* CONFIG_FUNCTION_GRAPH_TRACER */ - -#ifdef CONFIG_FTRACE_SYSCALLS - -extern unsigned long *sys_call_table; - -unsigned long __init arch_syscall_addr(int nr) -{ - return (unsigned long)(&sys_call_table)[nr]; -} -#endif -- cgit v1.2.3 From f850c30c8b426ba1688cb63b1a3e534eed03a138 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 10 Feb 2010 17:25:17 +0100 Subject: tracing/kprobes: Make Kconfig dependencies generic KPROBES_EVENT actually depends on the regs and stack access API (b1cf540f) and not on x86. So introduce a new config option which architectures can select if they have the API implemented and switch x86. Signed-off-by: Heiko Carstens Acked-by: Masami Hiramatsu Cc: Ingo Molnar Cc: Martin Schwidefsky LKML-Reference: <20100210162517.GB6933@osiris.boeblingen.de.ibm.com> Signed-off-by: Frederic Weisbecker --- arch/Kconfig | 3 +++ arch/x86/Kconfig | 1 + kernel/trace/Kconfig | 2 +- 3 files changed, 5 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/Kconfig b/arch/Kconfig index 9d055b4f0585..04e3aa77da25 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -121,6 +121,9 @@ config HAVE_DMA_ATTRS config USE_GENERIC_SMP_HELPERS bool +config HAVE_REGS_AND_STACK_ACCESS_API + bool + config HAVE_CLK bool help diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 55298e891571..07baa12929b4 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -45,6 +45,7 @@ config X86 select HAVE_GENERIC_DMA_COHERENT if X86_32 select HAVE_EFFICIENT_UNALIGNED_ACCESS select USER_STACKTRACE_SUPPORT + select HAVE_REGS_AND_STACK_ACCESS_API select HAVE_DMA_API_DEBUG select HAVE_KERNEL_GZIP select HAVE_KERNEL_BZIP2 diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index 6c22d8a2f289..40fef552f012 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -451,7 +451,7 @@ config BLK_DEV_IO_TRACE config KPROBE_EVENT depends on KPROBES - depends on X86 + depends on HAVE_REGS_AND_STACK_ACCESS_API bool "Enable kprobes-based dynamic events" select TRACING default y -- cgit v1.2.3 From 952974ac61f686896bd4134dae106a886a5589f1 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Fri, 12 Feb 2010 13:38:40 +0100 Subject: s390: Add pt_regs register and stack access API This API is needed for the kprobe-based event tracer. Signed-off-by: Heiko Carstens Reviewed-by: Masami Hiramatsu Cc: Martin Schwidefsky LKML-Reference: <20100212123840.GB27548@osiris.boeblingen.de.ibm.com> Signed-off-by: Frederic Weisbecker --- arch/s390/Kconfig | 1 + arch/s390/include/asm/ptrace.h | 13 +++++++++- arch/s390/kernel/ptrace.c | 58 ++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 71 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index c80235206c01..2590ce20157d 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -87,6 +87,7 @@ config S390 select HAVE_SYSCALL_TRACEPOINTS select HAVE_DYNAMIC_FTRACE select HAVE_FUNCTION_GRAPH_TRACER + select HAVE_REGS_AND_STACK_ACCESS_API select HAVE_DEFAULT_NO_SPIN_MUTEXES select HAVE_OPROFILE select HAVE_KPROBES diff --git a/arch/s390/include/asm/ptrace.h b/arch/s390/include/asm/ptrace.h index 95dcf183a28d..dd2d913afcae 100644 --- a/arch/s390/include/asm/ptrace.h +++ b/arch/s390/include/asm/ptrace.h @@ -492,13 +492,24 @@ struct user_regs_struct struct task_struct; extern void user_enable_single_step(struct task_struct *); extern void user_disable_single_step(struct task_struct *); +extern void show_regs(struct pt_regs * regs); #define user_mode(regs) (((regs)->psw.mask & PSW_MASK_PSTATE) != 0) #define instruction_pointer(regs) ((regs)->psw.addr & PSW_ADDR_INSN) #define user_stack_pointer(regs)((regs)->gprs[15]) #define regs_return_value(regs)((regs)->gprs[2]) #define profile_pc(regs) instruction_pointer(regs) -extern void show_regs(struct pt_regs * regs); + +int regs_query_register_offset(const char *name); +const char *regs_query_register_name(unsigned int offset); +unsigned long regs_get_register(struct pt_regs *regs, unsigned int offset); +unsigned long regs_get_kernel_stack_nth(struct pt_regs *regs, unsigned int n); + +static inline unsigned long kernel_stack_pointer(struct pt_regs *regs) +{ + return regs->gprs[15] & PSW_ADDR_INSN; +} + #endif /* __KERNEL__ */ #endif /* __ASSEMBLY__ */ diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c index 13815d39f7dd..1720f380add5 100644 --- a/arch/s390/kernel/ptrace.c +++ b/arch/s390/kernel/ptrace.c @@ -984,3 +984,61 @@ const struct user_regset_view *task_user_regset_view(struct task_struct *task) #endif return &user_s390_view; } + +static const char *gpr_names[NUM_GPRS] = { + "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7", + "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", +}; + +unsigned long regs_get_register(struct pt_regs *regs, unsigned int offset) +{ + if (offset >= NUM_GPRS) + return 0; + return regs->gprs[offset]; +} + +int regs_query_register_offset(const char *name) +{ + unsigned long offset; + + if (!name || *name != 'r') + return -EINVAL; + if (strict_strtoul(name + 1, 10, &offset)) + return -EINVAL; + if (offset >= NUM_GPRS) + return -EINVAL; + return offset; +} + +const char *regs_query_register_name(unsigned int offset) +{ + if (offset >= NUM_GPRS) + return NULL; + return gpr_names[offset]; +} + +static int regs_within_kernel_stack(struct pt_regs *regs, unsigned long addr) +{ + unsigned long ksp = kernel_stack_pointer(regs); + + return (addr & ~(THREAD_SIZE - 1)) == (ksp & ~(THREAD_SIZE - 1)); +} + +/** + * regs_get_kernel_stack_nth() - get Nth entry of the stack + * @regs:pt_regs which contains kernel stack pointer. + * @n:stack entry number. + * + * regs_get_kernel_stack_nth() returns @n th entry of the kernel stack which + * is specifined by @regs. If the @n th entry is NOT in the kernel stack, + * this returns 0. + */ +unsigned long regs_get_kernel_stack_nth(struct pt_regs *regs, unsigned int n) +{ + unsigned long addr; + + addr = kernel_stack_pointer(regs) + n * sizeof(long); + if (!regs_within_kernel_stack(regs, addr)) + return 0; + return *(unsigned long *)addr; +} -- cgit v1.2.3 From e01292b1fd68ff2abe234d584b06e64344d2c1de Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Thu, 18 Feb 2010 14:25:21 +0100 Subject: tracing/kprobes: Add short documentation for HAVE_REGS_AND_STACK_ACCESS_API So that arch developers know how to implement it without the need to dig into changelogs. Reported-by: Mike Frysinger Signed-off-by: Heiko Carstens Acked-by: Masami Hiramatsu Cc: Martin Schwidefsky Cc: "David S . Miller" Cc: Paul Mundt Cc: Steven Rostedt LKML-Reference: <20100218132521.GB2406@osiris.boeblingen.de.ibm.com> [added reference to ptrace.h in the config help] Signed-off-by: Frederic Weisbecker --- arch/Kconfig | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'arch') diff --git a/arch/Kconfig b/arch/Kconfig index 04e3aa77da25..50877ef25844 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -123,6 +123,11 @@ config USE_GENERIC_SMP_HELPERS config HAVE_REGS_AND_STACK_ACCESS_API bool + help + This symbol should be selected by an architecure if it supports + the API needed to access registers and stack entries from pt_regs, + declared in asm/ptrace.h + For example the kprobes-based event tracer needs this API. config HAVE_CLK bool -- cgit v1.2.3 From 0c54dd341fb701928b8e5dca91ced1870c55b05b Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 25 Feb 2010 08:42:06 -0500 Subject: ftrace: Remove memory barriers from NMI code when not needed The code in stop_machine that modifies the kernel text has a bit of logic to handle the case of NMIs. stop_machine does not prevent NMIs from executing, and if an NMI were to trigger on another CPU as the modifying CPU is changing the NMI text, a GPF could result. To prevent the GPF, the NMI calls ftrace_nmi_enter() which may modify the code first, then any other NMIs will just change the text to the same content which will do no harm. The code that stop_machine called must wait for NMIs to finish while it changes each location in the kernel. That code may also change the text to what the NMI changed it to. The key is that the text will never change content while another CPU is executing it. To make the above work, the call to ftrace_nmi_enter() must also do a smp_mb() as well as atomic_inc(). But for applications like perf that require a high number of NMIs for profiling, this can have a dramatic effect on the system. Not only is it doing a full memory barrier on both nmi_enter() as well as nmi_exit() it is also modifying a global variable with an atomic operation. This kills performance on large SMP machines. Since the memory barriers are only needed when ftrace is in the process of modifying the text (which is seldom), this patch adds a "modifying_code" variable that gets set before stop machine is executed and cleared afterwards. The NMIs will check this variable and store it in a per CPU "save_modifying_code" variable that it will use to check if it needs to do the memory barriers and atomic dec on NMI exit. Acked-by: Peter Zijlstra Signed-off-by: Steven Rostedt --- arch/x86/kernel/ftrace.c | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) (limited to 'arch') diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index 309689245431..605ef196fdd6 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -30,14 +30,32 @@ #ifdef CONFIG_DYNAMIC_FTRACE +/* + * modifying_code is set to notify NMIs that they need to use + * memory barriers when entering or exiting. But we don't want + * to burden NMIs with unnecessary memory barriers when code + * modification is not being done (which is most of the time). + * + * A mutex is already held when ftrace_arch_code_modify_prepare + * and post_process are called. No locks need to be taken here. + * + * Stop machine will make sure currently running NMIs are done + * and new NMIs will see the updated variable before we need + * to worry about NMIs doing memory barriers. + */ +static int modifying_code __read_mostly; +static DEFINE_PER_CPU(int, save_modifying_code); + int ftrace_arch_code_modify_prepare(void) { set_kernel_text_rw(); + modifying_code = 1; return 0; } int ftrace_arch_code_modify_post_process(void) { + modifying_code = 0; set_kernel_text_ro(); return 0; } @@ -149,6 +167,11 @@ static void ftrace_mod_code(void) void ftrace_nmi_enter(void) { + __get_cpu_var(save_modifying_code) = modifying_code; + + if (!__get_cpu_var(save_modifying_code)) + return; + if (atomic_inc_return(&nmi_running) & MOD_CODE_WRITE_FLAG) { smp_rmb(); ftrace_mod_code(); @@ -160,6 +183,9 @@ void ftrace_nmi_enter(void) void ftrace_nmi_exit(void) { + if (!__get_cpu_var(save_modifying_code)) + return; + /* Finish all executions before clearing nmi_running */ smp_mb(); atomic_dec(&nmi_running); -- cgit v1.2.3