diff options
135 files changed, 3008 insertions, 1525 deletions
diff --git a/Documentation/trace/histogram.rst b/Documentation/trace/histogram.rst index 533415644c54..66ec972dfb78 100644 --- a/Documentation/trace/histogram.rst +++ b/Documentation/trace/histogram.rst @@ -1763,6 +1763,20 @@ using the same key and variable from yet another event:: # echo 'hist:key=pid:wakeupswitch_lat=$wakeup_lat+$switchtime_lat ...' >> event3/trigger +Expressions support the use of addition, subtraction, multiplication and +division operators (+-\*/). + +Note that division by zero always returns -1. + +Numeric constants can also be used directly in an expression:: + + # echo 'hist:keys=next_pid:timestamp_secs=common_timestamp/1000000 ...' >> event/trigger + +or assigned to a variable and referenced in a subsequent expression:: + + # echo 'hist:keys=next_pid:us_per_sec=1000000 ...' >> event/trigger + # echo 'hist:keys=next_pid:timestamp_secs=common_timestamp/$us_per_sec ...' >> event/trigger + 2.2.2 Synthetic Events ---------------------- diff --git a/Documentation/trace/kprobes.rst b/Documentation/trace/kprobes.rst index 998149ce2fd9..f318bceda1e6 100644 --- a/Documentation/trace/kprobes.rst +++ b/Documentation/trace/kprobes.rst @@ -784,6 +784,6 @@ References For additional information on Kprobes, refer to the following URLs: -- https://www.ibm.com/developerworks/library/l-kprobes/index.html +- https://lwn.net/Articles/132196/ - https://www.kernel.org/doc/ols/2006/ols2006v2-pages-109-124.pdf diff --git a/Documentation/trace/timerlat-tracer.rst b/Documentation/trace/timerlat-tracer.rst index c7cbb557aee7..64d1fe6e9b93 100644 --- a/Documentation/trace/timerlat-tracer.rst +++ b/Documentation/trace/timerlat-tracer.rst @@ -3,7 +3,7 @@ Timerlat tracer ############### The timerlat tracer aims to help the preemptive kernel developers to -find souces of wakeup latencies of real-time threads. Like cyclictest, +find sources of wakeup latencies of real-time threads. Like cyclictest, the tracer sets a periodic timer that wakes up a thread. The thread then computes a *wakeup latency* value as the difference between the *current time* and the *absolute time* that the timer was set to expire. The main @@ -50,14 +50,14 @@ The second is the *timer latency* observed by the thread. The ACTIVATION ID field serves to relate the *irq* execution to its respective *thread* execution. -The *irq*/*thread* splitting is important to clarify at which context +The *irq*/*thread* splitting is important to clarify in which context the unexpected high value is coming from. The *irq* context can be -delayed by hardware related actions, such as SMIs, NMIs, IRQs -or by a thread masking interrupts. Once the timer happens, the delay +delayed by hardware-related actions, such as SMIs, NMIs, IRQs, +or by thread masking interrupts. Once the timer happens, the delay can also be influenced by blocking caused by threads. For example, by -postponing the scheduler execution via preempt_disable(), by the -scheduler execution, or by masking interrupts. Threads can -also be delayed by the interference from other threads and IRQs. +postponing the scheduler execution via preempt_disable(), scheduler +execution, or masking interrupts. Threads can also be delayed by the +interference from other threads and IRQs. Tracer options --------------------- @@ -68,14 +68,14 @@ directory. The timerlat configs are: - cpus: CPUs at which a timerlat thread will execute. - timerlat_period_us: the period of the timerlat thread. - - osnoise/stop_tracing_us: stop the system tracing if a + - stop_tracing_us: stop the system tracing if a timer latency at the *irq* context higher than the configured value happens. Writing 0 disables this option. - stop_tracing_total_us: stop the system tracing if a - timer latency at the *thread* context higher than the configured + timer latency at the *thread* context is higher than the configured value happens. Writing 0 disables this option. - - print_stack: save the stack of the IRQ ocurrence, and print - it afte the *thread context* event". + - print_stack: save the stack of the IRQ occurrence, and print + it after the *thread context* event". timerlat and osnoise ---------------------------- @@ -95,7 +95,7 @@ For example:: timerlat/5-1035 [005] ....... 548.771104: #402268 context thread timer_latency 39960 ns In this case, the root cause of the timer latency does not point to a -single cause, but to multiple ones. Firstly, the timer IRQ was delayed +single cause but to multiple ones. Firstly, the timer IRQ was delayed for 13 us, which may point to a long IRQ disabled section (see IRQ stacktrace section). Then the timer interrupt that wakes up the timerlat thread took 7597 ns, and the qxl:21 device IRQ took 7139 ns. Finally, diff --git a/MAINTAINERS b/MAINTAINERS index 0b4f251e34e1..2c9a7cac8361 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -10482,10 +10482,13 @@ M: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com> M: "David S. Miller" <davem@davemloft.net> M: Masami Hiramatsu <mhiramat@kernel.org> S: Maintained +T: git git://git.kernel.org/pub/scm/linux/kernel/git/rostedt/linux-trace.git F: Documentation/trace/kprobes.rst F: include/asm-generic/kprobes.h F: include/linux/kprobes.h F: kernel/kprobes.c +F: lib/test_kprobes.c +F: samples/kprobes KS0108 LCD CONTROLLER DRIVER M: Miguel Ojeda <ojeda@kernel.org> @@ -19026,7 +19029,7 @@ TRACING M: Steven Rostedt <rostedt@goodmis.org> M: Ingo Molnar <mingo@redhat.com> S: Maintained -T: git git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git perf/core +T: git git://git.kernel.org/pub/scm/linux/kernel/git/rostedt/linux-trace.git F: Documentation/trace/ftrace.rst F: arch/*/*/*/ftrace.h F: arch/*/kernel/ftrace.c diff --git a/arch/Kconfig b/arch/Kconfig index dcd84e3ae7d4..26b8ed11639d 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -191,6 +191,14 @@ config HAVE_OPTPROBES config HAVE_KPROBES_ON_FTRACE bool +config ARCH_CORRECT_STACKTRACE_ON_KRETPROBE + bool + help + Since kretprobes modifies return address on the stack, the + stacktrace may see the kretprobe trampoline address instead + of correct one. If the architecture stacktrace code and + unwinder can adjust such entries, select this configuration. + config HAVE_FUNCTION_ERROR_INJECTION bool diff --git a/arch/arc/include/asm/kprobes.h b/arch/arc/include/asm/kprobes.h index 2134721dce44..de1566e32cb8 100644 --- a/arch/arc/include/asm/kprobes.h +++ b/arch/arc/include/asm/kprobes.h @@ -46,7 +46,7 @@ struct kprobe_ctlblk { }; int kprobe_fault_handler(struct pt_regs *regs, unsigned long cause); -void kretprobe_trampoline(void); +void __kretprobe_trampoline(void); void trap_is_kprobe(unsigned long address, struct pt_regs *regs); #else #define trap_is_kprobe(address, regs) diff --git a/arch/arc/include/asm/ptrace.h b/arch/arc/include/asm/ptrace.h index 4c3c9be5bd16..cca8d6583e31 100644 --- a/arch/arc/include/asm/ptrace.h +++ b/arch/arc/include/asm/ptrace.h @@ -149,6 +149,11 @@ static inline long regs_return_value(struct pt_regs *regs) return (long)regs->r0; } +static inline void instruction_pointer_set(struct pt_regs *regs, + unsigned long val) +{ + instruction_pointer(regs) = val; +} #endif /* !__ASSEMBLY__ */ #endif /* __ASM_PTRACE_H */ diff --git a/arch/arc/kernel/kprobes.c b/arch/arc/kernel/kprobes.c index 5f0415fc7328..e71d64119d71 100644 --- a/arch/arc/kernel/kprobes.c +++ b/arch/arc/kernel/kprobes.c @@ -363,8 +363,9 @@ int __kprobes kprobe_exceptions_notify(struct notifier_block *self, static void __used kretprobe_trampoline_holder(void) { - __asm__ __volatile__(".global kretprobe_trampoline\n" - "kretprobe_trampoline:\n" "nop\n"); + __asm__ __volatile__(".global __kretprobe_trampoline\n" + "__kretprobe_trampoline:\n" + "nop\n"); } void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri, @@ -375,13 +376,13 @@ void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri, ri->fp = NULL; /* Replace the return addr with trampoline addr */ - regs->blink = (unsigned long)&kretprobe_trampoline; + regs->blink = (unsigned long)&__kretprobe_trampoline; } static int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs) { - regs->ret = __kretprobe_trampoline_handler(regs, &kretprobe_trampoline, NULL); + regs->ret = __kretprobe_trampoline_handler(regs, NULL); /* By returning a non zero value, we are telling the kprobe handler * that we don't want the post_handler to run @@ -390,7 +391,7 @@ static int __kprobes trampoline_probe_handler(struct kprobe *p, } static struct kprobe trampoline_p = { - .addr = (kprobe_opcode_t *) &kretprobe_trampoline, + .addr = (kprobe_opcode_t *) &__kretprobe_trampoline, .pre_handler = trampoline_probe_handler }; @@ -402,7 +403,7 @@ int __init arch_init_kprobes(void) int __kprobes arch_trampoline_kprobe(struct kprobe *p) { - if (p->addr == (kprobe_opcode_t *) &kretprobe_trampoline) + if (p->addr == (kprobe_opcode_t *) &__kretprobe_trampoline) return 1; return 0; diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 2c056c0e834e..5d3b030e78e3 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -3,6 +3,7 @@ config ARM bool default y select ARCH_32BIT_OFF_T + select ARCH_CORRECT_STACKTRACE_ON_KRETPROBE if HAVE_KRETPROBES && FRAME_POINTER && !ARM_UNWIND select ARCH_HAS_BINFMT_FLAT select ARCH_HAS_DEBUG_VIRTUAL if MMU select ARCH_HAS_DMA_WRITE_COMBINE if !ARM_DMA_MEM_BUFFERABLE diff --git a/arch/arm/include/asm/stacktrace.h b/arch/arm/include/asm/stacktrace.h index 2d76a2e29f05..8f54f9ad8a9b 100644 --- a/arch/arm/include/asm/stacktrace.h +++ b/arch/arm/include/asm/stacktrace.h @@ -3,6 +3,7 @@ #define __ASM_STACKTRACE_H #include <asm/ptrace.h> +#include <linux/llist.h> struct stackframe { /* @@ -13,6 +14,10 @@ struct stackframe { unsigned long sp; unsigned long lr; unsigned long pc; +#ifdef CONFIG_KRETPROBES + struct llist_node *kr_cur; + struct task_struct *tsk; +#endif }; static __always_inline @@ -22,6 +27,10 @@ void arm_get_current_stackframe(struct pt_regs *regs, struct stackframe *frame) frame->sp = regs->ARM_sp; frame->lr = regs->ARM_lr; frame->pc = regs->ARM_pc; +#ifdef CONFIG_KRETPROBES + frame->kr_cur = NULL; + frame->tsk = current; +#endif } extern int unwind_frame(struct stackframe *frame); diff --git a/arch/arm/kernel/ftrace.c b/arch/arm/kernel/ftrace.c index 3c83b5d29697..a006585e1c09 100644 --- a/arch/arm/kernel/ftrace.c +++ b/arch/arm/kernel/ftrace.c @@ -193,11 +193,6 @@ int ftrace_make_nop(struct module *mod, return ret; } - -int __init ftrace_dyn_arch_init(void) -{ - return 0; -} #endif /* CONFIG_DYNAMIC_FTRACE */ #ifdef CONFIG_FUNCTION_GRAPH_TRACER diff --git a/arch/arm/kernel/return_address.c b/arch/arm/kernel/return_address.c index 7b42ac010fdf..00c11579406c 100644 --- a/arch/arm/kernel/return_address.c +++ b/arch/arm/kernel/return_address.c @@ -42,6 +42,10 @@ void *return_address(unsigned int level) frame.sp = current_stack_pointer; frame.lr = (unsigned long)__builtin_return_address(0); frame.pc = (unsigned long)return_address; +#ifdef CONFIG_KRETPROBES + frame.kr_cur = NULL; + frame.tsk = current; +#endif walk_stackframe(&frame, save_return_addr, &data); diff --git a/arch/arm/kernel/stacktrace.c b/arch/arm/kernel/stacktrace.c index 76ea4178a55c..75e905508f27 100644 --- a/arch/arm/kernel/stacktrace.c +++ b/arch/arm/kernel/stacktrace.c @@ -1,5 +1,6 @@ // SPDX-License-Identifier: GPL-2.0-only #include <linux/export.h> +#include <linux/kprobes.h> #include <linux/sched.h> #include <linux/sched/debug.h> #include <linux/stacktrace.h> @@ -54,8 +55,7 @@ int notrace unwind_frame(struct stackframe *frame) frame->sp = frame->fp; frame->fp = *(unsigned long *)(fp); - frame->pc = frame->lr; - frame->lr = *(unsigned long *)(fp + 4); + frame->pc = *(unsigned long *)(fp + 4); #else /* check current frame pointer is within bounds */ if (fp < low + 12 || fp > high - 4) @@ -66,6 +66,11 @@ int notrace unwind_frame(struct stackframe *frame) frame->sp = *(unsigned long *)(fp - 8); frame->pc = *(unsigned long *)(fp - 4); #endif +#ifdef CONFIG_KRETPROBES + if (is_kretprobe_trampoline(frame->pc)) + frame->pc = kretprobe_find_ret_addr(frame->tsk, + (void *)frame->fp, &frame->kr_cur); +#endif return 0; } @@ -157,6 +162,10 @@ static noinline void __save_stack_trace(struct task_struct *tsk, frame.lr = (unsigned long)__builtin_return_address(0); frame.pc = (unsigned long)__save_stack_trace; } +#ifdef CONFIG_KRETPROBES + frame.kr_cur = NULL; + frame.tsk = tsk; +#endif walk_stackframe(&frame, save_trace, &data); } @@ -174,6 +183,10 @@ void save_stack_trace_regs(struct pt_regs *regs, struct stack_trace *trace) frame.sp = regs->ARM_sp; frame.lr = regs->ARM_lr; frame.pc = regs->ARM_pc; +#ifdef CONFIG_KRETPROBES + frame.kr_cur = NULL; + frame.tsk = current; +#endif walk_stackframe(&frame, save_trace, &data); } diff --git a/arch/arm/probes/kprobes/core.c b/arch/arm/probes/kprobes/core.c index 9d8634e2f12f..9090c3a74dcc 100644 --- a/arch/arm/probes/kprobes/core.c +++ b/arch/arm/probes/kprobes/core.c @@ -11,6 +11,8 @@ * Copyright (C) 2007 Marvell Ltd. */ +#define pr_fmt(fmt) "kprobes: " fmt + #include <linux/kernel.h> #include <linux/kprobes.h> #include <linux/module.h> @@ -278,7 +280,7 @@ void __kprobes kprobe_handler(struct pt_regs *regs) break; case KPROBE_REENTER: /* A nested probe was hit in FIQ, it is a BUG */ - pr_warn("Unrecoverable kprobe detected.\n"); + pr_warn("Failed to recover from reentered kprobes.\n"); dump_kprobe(p); fallthrough; default: @@ -366,19 +368,41 @@ int __kprobes kprobe_exceptions_notify(struct notifier_block *self, /* * When a retprobed function returns, trampoline_handler() is called, * calling the kretprobe's handler. We construct a struct pt_regs to - * give a view of registers r0-r11 to the user return-handler. This is - * not a complete pt_regs structure, but that should be plenty sufficient - * for kretprobe handlers which should normally be interested in r0 only - * anyway. + * give a view of registers r0-r11, sp, lr, and pc to the user + * return-handler. This is not a complete pt_regs structure, but that + * should be enough for stacktrace from the return handler with or + * without pt_regs. */ -void __naked __kprobes kretprobe_trampoline(void) +void __naked __kprobes __kretprobe_trampoline(void) { __asm__ __volatile__ ( +#ifdef CONFIG_FRAME_POINTER + "ldr lr, =__kretprobe_trampoline \n\t" + /* __kretprobe_trampoline makes a framepointer on pt_regs. */ +#ifdef CONFIG_CC_IS_CLANG + "stmdb sp, {sp, lr, pc} \n\t" + "sub sp, sp, #12 \n\t" + /* In clang case, pt_regs->ip = lr. */ + "stmdb sp!, {r0 - r11, lr} \n\t" + /* fp points regs->r11 (fp) */ + "add fp, sp, #44 \n\t" +#else /* !CONFIG_CC_IS_CLANG */ + /* In gcc case, pt_regs->ip = fp. */ + "stmdb sp, {fp, sp, lr, pc} \n\t" + "sub sp, sp, #16 \n\t" + "stmdb sp!, {r0 - r11} \n\t" + /* fp points regs->r15 (pc) */ + "add fp, sp, #60 \n\t" +#endif /* CONFIG_CC_IS_CLANG */ +#else /* !CONFIG_FRAME_POINTER */ + "sub sp, sp, #16 \n\t" "stmdb sp!, {r0 - r11} \n\t" +#endif /* CONFIG_FRAME_POINTER */ "mov r0, sp \n\t" "bl trampoline_handler \n\t" "mov lr, r0 \n\t" "ldmia sp!, {r0 - r11} \n\t" + "add sp, sp, #16 \n\t" #ifdef CONFIG_THUMB2_KERNEL "bx lr \n\t" #else @@ -387,11 +411,10 @@ void __naked __kprobes kretprobe_trampoline(void) : : : "memory"); } -/* Called from kretprobe_trampoline */ +/* Called from __kretprobe_trampoline */ static __used __kprobes void *trampoline_handler(struct pt_regs *regs) { - return (void *)kretprobe_trampoline_handler(regs, &kretprobe_trampoline, - (void *)regs->ARM_fp); + return (void *)kretprobe_trampoline_handler(regs, (void *)regs->ARM_fp); } void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri, @@ -401,7 +424,7 @@ void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri, ri->fp = (void *)regs->ARM_fp; /* Replace the return addr with trampoline addr. */ - regs->ARM_lr = (unsigned long)&kretprobe_trampoline; + regs->ARM_lr = (unsigned long)&__kretprobe_trampoline; } int __kprobes arch_trampoline_kprobe(struct kprobe *p) diff --git a/arch/arm/probes/kprobes/opt-arm.c b/arch/arm/probes/kprobes/opt-arm.c index c78180172120..dbef34ed933f 100644 --- a/arch/arm/probes/kprobes/opt-arm.c +++ b/arch/arm/probes/kprobes/opt-arm.c @@ -347,10 +347,11 @@ void arch_unoptimize_kprobes(struct list_head *oplist, } int arch_within_optimized_kprobe(struct optimized_kprobe *op, - unsigned long addr) + kprobe_opcode_t *addr) { - return ((unsigned long)op->kp.addr <= addr && - (unsigned long)op->kp.addr + RELATIVEJUMP_SIZE > addr); + return (op->kp.addr <= addr && + op->kp.addr + (RELATIVEJUMP_SIZE / sizeof(kprobe_opcode_t)) > addr); + } void arch_remove_optimized_kprobe(struct optimized_kprobe *op) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index dde7e9f28d0c..3a00dfb0711e 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -11,6 +11,7 @@ config ARM64 select ACPI_PPTT if ACPI select ARCH_HAS_DEBUG_WX select ARCH_BINFMT_ELF_STATE + select ARCH_CORRECT_STACKTRACE_ON_KRETPROBE select ARCH_ENABLE_HUGEPAGE_MIGRATION if HUGETLB_PAGE && MIGRATION select ARCH_ENABLE_MEMORY_HOTPLUG select ARCH_ENABLE_MEMORY_HOTREMOVE diff --git a/arch/arm64/include/asm/kprobes.h b/arch/arm64/include/asm/kprobes.h index 5d38ff4a4806..05cd82eeca13 100644 --- a/arch/arm64/include/asm/kprobes.h +++ b/arch/arm64/include/asm/kprobes.h @@ -39,7 +39,7 @@ void arch_remove_kprobe(struct kprobe *); int kprobe_fault_handler(struct pt_regs *regs, unsigned int fsr); int kprobe_exceptions_notify(struct notifier_block *self, unsigned long val, void *data); -void kretprobe_trampoline(void); +void __kretprobe_trampoline(void); void __kprobes *trampoline_probe_handler(struct pt_regs *regs); #endif /* CONFIG_KPROBES */ diff --git a/arch/arm64/include/asm/stacktrace.h b/arch/arm64/include/asm/stacktrace.h index 8aebc00c1718..a4e046ef4568 100644 --- a/arch/arm64/include/asm/stacktrace.h +++ b/arch/arm64/include/asm/stacktrace.h @@ -9,6 +9,7 @@ #include <linux/sched.h> #include <linux/sched/task_stack.h> #include <linux/types.h> +#include <linux/llist.h> #include <asm/memory.h> #include <asm/ptrace.h> @@ -59,6 +60,9 @@ struct stackframe { #ifdef CONFIG_FUNCTION_GRAPH_TRACER int graph; #endif +#ifdef CONFIG_KRETPROBES + struct llist_node *kr_cur; +#endif }; extern int unwind_frame(struct task_struct *tsk, struct stackframe *frame); diff --git a/arch/arm64/kernel/ftrace.c b/arch/arm64/kernel/ftrace.c index 7f467bd9db7a..fc62dfe73f93 100644 --- a/arch/arm64/kernel/ftrace.c +++ b/arch/arm64/kernel/ftrace.c @@ -236,11 +236,6 @@ void arch_ftrace_update_code(int command) command |= FTRACE_MAY_SLEEP; ftrace_modify_all_code(command); } - -int __init ftrace_dyn_arch_init(void) -{ - return 0; -} #endif /* CONFIG_DYNAMIC_FTRACE */ #ifdef CONFIG_FUNCTION_GRAPH_TRACER diff --git a/arch/arm64/kernel/probes/kprobes.c b/arch/arm64/kernel/probes/kprobes.c index 6dbcc89f6662..d9dfa82c1f18 100644 --- a/arch/arm64/kernel/probes/kprobes.c +++ b/arch/arm64/kernel/probes/kprobes.c @@ -7,6 +7,9 @@ * Copyright (C) 2013 Linaro Limited. * Author: Sandeepa Prabhu <sandeepa.prabhu@linaro.org> */ + +#define pr_fmt(fmt) "kprobes: " fmt + #include <linux/extable.h> #include <linux/kasan.h> #include <linux/kernel.h> @@ -218,7 +221,7 @@ static int __kprobes reenter_kprobe(struct kprobe *p, break; case KPROBE_HIT_SS: case KPROBE_REENTER: - pr_warn("Unrecoverable kprobe detected.\n"); + pr_warn("Failed to recover from reentered kprobes.\n"); dump_kprobe(p); BUG(); break; @@ -398,18 +401,17 @@ int __init arch_populate_kprobe_blacklist(void) void __kprobes __used *trampoline_probe_handler(struct pt_regs *regs) { - return (void *)kretprobe_trampoline_handler(regs, &kretprobe_trampoline, - (void *)kernel_stack_pointer(regs)); + return (void *)kretprobe_trampoline_handler(regs, (void *)regs->regs[29]); } void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri, struct pt_regs *regs) { ri->ret_addr = (kprobe_opcode_t *)regs->regs[30]; - ri->fp = (void *)kernel_stack_pointer(regs); + ri->fp = (void *)regs->regs[29]; /* replace return addr (x30) with trampoline */ - regs->regs[30] = (long)&kretprobe_trampoline; + regs->regs[30] = (long)&__kretprobe_trampoline; } int __kprobes arch_trampoline_kprobe(struct kprobe *p) diff --git a/arch/arm64/kernel/probes/kprobes_trampoline.S b/arch/arm64/kernel/probes/kprobes_trampoline.S index 288a84e253cc..9a6499bed58b 100644 --- a/arch/arm64/kernel/probes/kprobes_trampoline.S +++ b/arch/arm64/kernel/probes/kprobes_trampoline.S @@ -61,11 +61,14 @@ ldp x28, x29, [sp, #S_X28] .endm -SYM_CODE_START(kretprobe_trampoline) +SYM_CODE_START(__kretprobe_trampoline) sub sp, sp, #PT_REGS_SIZE save_all_base_regs + /* Setup a frame pointer. */ + add x29, sp, #S_FP + mov x0, sp bl trampoline_probe_handler /* @@ -74,9 +77,10 @@ SYM_CODE_START(kretprobe_trampoline) */ mov lr, x0 + /* The frame pointer (x29) is restored with other registers. */ restore_all_base_regs add sp, sp, #PT_REGS_SIZE ret -SYM_CODE_END(kretprobe_trampoline) +SYM_CODE_END(__kretprobe_trampoline) diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c index 8982a2b78acf..c30624fff6ac 100644 --- a/arch/arm64/kernel/stacktrace.c +++ b/arch/arm64/kernel/stacktrace.c @@ -41,6 +41,9 @@ void start_backtrace(struct stackframe *frame, unsigned long fp, #ifdef CONFIG_FUNCTION_GRAPH_TRACER frame->graph = 0; #endif +#ifdef CONFIG_KRETPROBES + frame->kr_cur = NULL; +#endif /* * Prime the first unwind. @@ -129,6 +132,10 @@ int notrace unwind_frame(struct task_struct *tsk, struct stackframe *frame) frame->pc = ret_stack->ret; } #endif /* CONFIG_FUNCTION_GRAPH_TRACER */ +#ifdef CONFIG_KRETPROBES + if (is_kretprobe_trampoline(frame->pc)) + frame->pc = kretprobe_find_ret_addr(tsk, (void *)frame->fp, &frame->kr_cur); +#endif frame->pc = ptrauth_strip_insn_pac(frame->pc); diff --git a/arch/csky/include/asm/kprobes.h b/arch/csky/include/asm/kprobes.h index b647bbde4d6d..55267cbf5204 100644 --- a/arch/csky/include/asm/kprobes.h +++ b/arch/csky/include/asm/kprobes.h @@ -41,7 +41,7 @@ void arch_remove_kprobe(struct kprobe *p); int kprobe_fault_handler(struct pt_regs *regs, unsigned int trapnr); int kprobe_breakpoint_handler(struct pt_regs *regs); int kprobe_single_step_handler(struct pt_regs *regs); -void kretprobe_trampoline(void); +void __kretprobe_trampoline(void); void __kprobes *trampoline_probe_handler(struct pt_regs *regs); #endif /* CONFIG_KPROBES */ diff --git a/arch/csky/kernel/ftrace.c b/arch/csky/kernel/ftrace.c index b4a7ec1517ff..50bfcf129078 100644 --- a/arch/csky/kernel/ftrace.c +++ b/arch/csky/kernel/ftrace.c @@ -133,11 +133,6 @@ int ftrace_update_ftrace_func(ftrace_func_t func) (unsigned long)func, true, true); return ret; } - -int __init ftrace_dyn_arch_init(void) -{ - return 0; -} #endif /* CONFIG_DYNAMIC_FTRACE */ #ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS diff --git a/arch/csky/kernel/probes/ftrace.c b/arch/csky/kernel/probes/ftrace.c index ef2bb9bd9605..834cffcfbce3 100644 --- a/arch/csky/kernel/probes/ftrace.c +++ b/arch/csky/kernel/probes/ftrace.c @@ -2,13 +2,6 @@ #include <linux/kprobes.h> -int arch_check_ftrace_location(struct kprobe *p) -{ - if (ftrace_location((unsigned long)p->addr)) - p->flags |= KPROBE_FLAG_FTRACE; - return 0; -} - /* Ftrace callback handler for kprobes -- called under preepmt disabled */ void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip, struct ftrace_ops *ops, struct ftrace_regs *fregs) @@ -24,7 +17,6 @@ void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip, return; regs = ftrace_get_regs(fregs); - preempt_disable_notrace(); p = get_kprobe((kprobe_opcode_t *)ip); if (!p) { p = get_kprobe((kprobe_opcode_t *)(ip - MCOUNT_INSN_SIZE)); @@ -64,7 +56,6 @@ void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip, __this_cpu_write(current_kprobe, NULL); } out: - preempt_enable_notrace(); ftrace_test_recursion_unlock(bit); } NOKPROBE_SYMBOL(kprobe_ftrace_handler); diff --git a/arch/csky/kernel/probes/kprobes.c b/arch/csky/kernel/probes/kprobes.c index 8fffa34d4e1c..42920f25e73c 100644 --- a/arch/csky/kernel/probes/kprobes.c +++ b/arch/csky/kernel/probes/kprobes.c @@ -1,5 +1,7 @@ // SPDX-License-Identifier: GPL-2.0+ +#define pr_fmt(fmt) "kprobes: " fmt + #include <linux/kprobes.h> #include <linux/extable.h> #include <linux/slab.h> @@ -77,10 +79,8 @@ int __kprobes arch_prepare_kprobe(struct kprobe *p) { unsigned long probe_addr = (unsigned long)p->addr; - if (probe_addr & 0x1) { - pr_warn("Address not aligned.\n"); - return -EINVAL; - } + if (probe_addr & 0x1) + return -EILSEQ; /* copy instruction */ p->opcode = le32_to_cpu(*p->addr); @@ -225,7 +225,7 @@ static int __kprobes reenter_kprobe(struct kprobe *p, break; case KPROBE_HIT_SS: case KPROBE_REENTER: - pr_warn("Unrecoverable kprobe detected.\n"); + pr_warn("Failed to recover from reentered kprobes.\n"); dump_kprobe(p); BUG(); break; @@ -386,7 +386,7 @@ int __init arch_populate_kprobe_blacklist(void) void __kprobes __used *trampoline_probe_handler(struct pt_regs *regs) { - return (void *)kretprobe_trampoline_handler(regs, &kretprobe_trampoline, NULL); + return (void *)kretprobe_trampoline_handler(regs, NULL); } void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri, @@ -394,7 +394,7 @@ void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri, { ri->ret_addr = (kprobe_opcode_t *)regs->lr; ri->fp = NULL; - regs->lr = (unsigned long) &kretprobe_trampoline; + regs->lr = (unsigned long) &__kretprobe_trampoline; } int __kprobes arch_trampoline_kprobe(struct kprobe *p) diff --git a/arch/csky/kernel/probes/kprobes_trampoline.S b/arch/csky/kernel/probes/kprobes_trampoline.S index b1fe3af24f03..ba48ad04a847 100644 --- a/arch/csky/kernel/probes/kprobes_trampoline.S +++ b/arch/csky/kernel/probes/kprobes_trampoline.S @@ -4,7 +4,7 @@ #include <abi/entry.h> -ENTRY(kretprobe_trampoline) +ENTRY(__kretprobe_trampoline) SAVE_REGS_FTRACE mov a0, sp /* pt_regs */ @@ -16,4 +16,4 @@ ENTRY(kretprobe_trampoline) RESTORE_REGS_FTRACE rts -ENDPROC(kretprobe_trampoline) +ENDPROC(__kretprobe_trampoline) diff --git a/arch/ia64/include/asm/ptrace.h b/arch/ia64/include/asm/ptrace.h index 08179135905c..8a2d0f72b324 100644 --- a/arch/ia64/include/asm/ptrace.h +++ b/arch/ia64/include/asm/ptrace.h @@ -51,6 +51,11 @@ * the canonical representation by adding to instruction pointer. */ # define instruction_pointer(regs) ((regs)->cr_iip + ia64_psr(regs)->ri) +# define instruction_pointer_set(regs, val) \ +({ \ + ia64_psr(regs)->ri = (val & 0xf); \ + regs->cr_iip = (val & ~0xfULL); \ +}) static inline unsigned long user_stack_pointer(struct pt_regs *regs) { diff --git a/arch/ia64/kernel/ftrace.c b/arch/ia64/kernel/ftrace.c index b2ab2d58fb30..d6360fd404ab 100644 --- a/arch/ia64/kernel/ftrace.c +++ b/arch/ia64/kernel/ftrace.c @@ -194,9 +194,3 @@ int ftrace_update_ftrace_func(ftrace_func_t func) flush_icache_range(addr, addr + 16); return 0; } - -/* run from kstop_machine */ -int __init ftrace_dyn_arch_init(void) -{ - return 0; -} diff --git a/arch/ia64/kernel/kprobes.c b/arch/ia64/kernel/kprobes.c index 441ed04b1037..1a7bab1c5d7c 100644 --- a/arch/ia64/kernel/kprobes.c +++ b/arch/ia64/kernel/kprobes.c @@ -392,13 +392,13 @@ static void __kprobes set_current_kprobe(struct kprobe *p, __this_cpu_write(current_kprobe, p); } -static void kretprobe_trampoline(void) +void __kretprobe_trampoline(void) { } int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs) { - regs->cr_iip = __kretprobe_trampoline_handler(regs, kretprobe_trampoline, NULL); + regs->cr_iip = __kretprobe_trampoline_handler(regs, NULL); /* * By returning a non-zero value, we are telling * kprobe_handler() that we don't want the post_handler @@ -414,7 +414,7 @@ void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri, ri->fp = NULL; /* Replace the return addr with trampoline addr */ - regs->b0 = ((struct fnptr *)kretprobe_trampoline)->ip; + regs->b0 = (unsigned long)dereference_function_descriptor(__kretprobe_trampoline); } /* Check the instruction in the slot is break */ @@ -890,11 +890,6 @@ int __kprobes kprobe_exceptions_notify(struct notifier_block *self, return ret; } -unsigned long arch_deref_entry_point(void *entry) -{ - return ((struct fnptr *)entry)->ip; -} - static struct kprobe trampoline_p = { .pre_handler = trampoline_probe_handler }; @@ -902,14 +897,14 @@ static struct kprobe trampoline_p = { int __init arch_init_kprobes(void) { trampoline_p.addr = - (kprobe_opcode_t *)((struct fnptr *)kretprobe_trampoline)->ip; + dereference_function_descriptor(__kretprobe_trampoline); return register_kprobe(&trampoline_p); } int __kprobes arch_trampoline_kprobe(struct kprobe *p) { if (p->addr == - (kprobe_opcode_t *)((struct fnptr *)kretprobe_trampoline)->ip) + dereference_function_descriptor(__kretprobe_trampoline)) return 1; return 0; diff --git a/arch/microblaze/kernel/ftrace.c b/arch/microblaze/kernel/ftrace.c index 224eea40e1ee..188749d62709 100644 --- a/arch/microblaze/kernel/ftrace.c +++ b/arch/microblaze/kernel/ftrace.c @@ -163,11 +163,6 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) return ret; } -int __init ftrace_dyn_arch_init(void) -{ - return 0; -} - int ftrace_update_ftrace_func(ftrace_func_t func) { unsigned long ip = (unsigned long)(&ftrace_call); diff --git a/arch/mips/kernel/kprobes.c b/arch/mips/kernel/kprobes.c index 75bff0f77319..6c7f3b143fdc 100644 --- a/arch/mips/kernel/kprobes.c +++ b/arch/mips/kernel/kprobes.c @@ -11,6 +11,8 @@ * Copyright (C) IBM Corporation, 2002, 2004 */ +#define pr_fmt(fmt) "kprobes: " fmt + #include <linux/kprobes.h> #include <linux/preempt.h> #include <linux/uaccess.h> @@ -80,8 +82,7 @@ int __kprobes arch_prepare_kprobe(struct kprobe *p) insn = p->addr[0]; if (insn_has_ll_or_sc(insn)) { - pr_notice("Kprobes for ll and sc instructions are not" - "supported\n"); + pr_notice("Kprobes for ll and sc instructions are not supported\n"); ret = -EINVAL; goto out; } @@ -219,7 +220,7 @@ static int evaluate_branch_instruction(struct kprobe *p, struct pt_regs *regs, return 0; unaligned: - pr_notice("%s: unaligned epc - sending SIGBUS.\n", current->comm); + pr_notice("Failed to emulate branch instruction because of unaligned epc - sending SIGBUS to %s.\n", current->comm); force_sig(SIGBUS); return -EFAULT; @@ -238,10 +239,8 @@ static void prepare_singlestep(struct kprobe *p, struct pt_regs *regs, regs->cp0_epc = (unsigned long)p->addr; else if (insn_has_delayslot(p->opcode)) { ret = evaluate_branch_instruction(p, regs, kcb); - if (ret < 0) { - pr_notice("Kprobes: Error in evaluating branch\n"); + if (ret < 0) return; - } } regs->cp0_epc = (unsigned long)&p->ainsn.insn[0]; } @@ -461,14 +460,14 @@ static void __used kretprobe_trampoline_holder(void) /* Keep the assembler from reordering and placing JR here. */ ".set noreorder\n\t" "nop\n\t" - ".global kretprobe_trampoline\n" - "kretprobe_trampoline:\n\t" + ".global __kretprobe_trampoline\n" + "__kretprobe_trampoline:\n\t" "nop\n\t" ".set pop" : : : "memory"); } -void kretprobe_trampoline(void); +void __kretprobe_trampoline(void); void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri, struct pt_regs *regs) @@ -477,7 +476,7 @@ void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri, ri->fp = NULL; /* Replace the return addr with trampoline addr */ - regs->regs[31] = (unsigned long)kretprobe_trampoline; + regs->regs[31] = (unsigned long)__kretprobe_trampoline; } /* @@ -486,8 +485,7 @@ void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri, static int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs) { - instruction_pointer(regs) = __kretprobe_trampoline_handler(regs, - kretprobe_trampoline, NULL); + instruction_pointer(regs) = __kretprobe_trampoline_handler(regs, NULL); /* * By returning a non-zero value, we are telling * kprobe_handler() that we don't want the post_handler @@ -498,14 +496,14 @@ static int __kprobes trampoline_probe_handler(struct kprobe *p, int __kprobes arch_trampoline_kprobe(struct kprobe *p) { - if (p->addr == (kprobe_opcode_t *)kretprobe_trampoline) + if (p->addr == (kprobe_opcode_t *)__kretprobe_trampoline) return 1; return 0; } static struct kprobe trampoline_p = { - .addr = (kprobe_opcode_t *)kretprobe_trampoline, + .addr = (kprobe_opcode_t *)__kretprobe_trampoline, .pre_handler = trampoline_probe_handler }; diff --git a/arch/nds32/kernel/ftrace.c b/arch/nds32/kernel/ftrace.c index d55b73b18149..711bc8cd186d 100644 --- a/arch/nds32/kernel/ftrace.c +++ b/arch/nds32/kernel/ftrace.c @@ -84,11 +84,6 @@ void _ftrace_caller(unsigned long parent_ip) /* restore all state needed by the compiler epilogue */ } -int __init ftrace_dyn_arch_init(void) -{ - return 0; -} - static unsigned long gen_sethi_insn(unsigned long addr) { unsigned long opcode = 0x46000000; diff --git a/arch/parisc/kernel/ftrace.c b/arch/parisc/kernel/ftrace.c index 2a1f826b3def..4d392e4ed358 100644 --- a/arch/parisc/kernel/ftrace.c +++ b/arch/parisc/kernel/ftrace.c @@ -93,12 +93,6 @@ int ftrace_disable_ftrace_graph_caller(void) #endif #ifdef CONFIG_DYNAMIC_FTRACE - -int __init ftrace_dyn_arch_init(void) -{ - return 0; -} - int ftrace_update_ftrace_func(ftrace_func_t func) { ftrace_func = func; @@ -217,7 +211,6 @@ void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip, return; regs = ftrace_get_regs(fregs); - preempt_disable_notrace(); p = get_kprobe((kprobe_opcode_t *)ip); if (unlikely(!p) || kprobe_disabled(p)) goto out; @@ -246,7 +239,6 @@ void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip, } __this_cpu_write(current_kprobe, NULL); out: - preempt_enable_notrace(); ftrace_test_recursion_unlock(bit); } NOKPROBE_SYMBOL(kprobe_ftrace_handler); diff --git a/arch/parisc/kernel/kprobes.c b/arch/parisc/kernel/kprobes.c index 6d21a515eea5..e2bdb5a5f93e 100644 --- a/arch/parisc/kernel/kprobes.c +++ b/arch/parisc/kernel/kprobes.c @@ -175,7 +175,7 @@ int __kprobes parisc_kprobe_ss_handler(struct pt_regs *regs) return 1; } -static inline void kretprobe_trampoline(void) +void __kretprobe_trampoline(void) { asm volatile("nop"); asm volatile("nop"); @@ -193,7 +193,7 @@ static int __kprobes trampoline_probe_handler(struct kprobe *p, { unsigned long orig_ret_address; - orig_ret_address = __kretprobe_trampoline_handler(regs, trampoline_p.addr, NULL); + orig_ret_address = __kretprobe_trampoline_handler(regs, NULL); instruction_pointer_set(regs, orig_ret_address); return 1; @@ -217,6 +217,6 @@ int __kprobes arch_trampoline_kprobe(struct kprobe *p) int __init arch_init_kprobes(void) { trampoline_p.addr = (kprobe_opcode_t *) - dereference_function_descriptor(kretprobe_trampoline); + dereference_function_descriptor(__kretprobe_trampoline); return register_kprobe(&trampoline_p); } diff --git a/arch/powerpc/include/asm/kprobes.h b/arch/powerpc/include/asm/kprobes.h index 4fc0e15e23a5..bab364152b29 100644 --- a/arch/powerpc/include/asm/kprobes.h +++ b/arch/powerpc/include/asm/kprobes.h @@ -51,7 +51,7 @@ extern kprobe_opcode_t optprobe_template_end[]; #define flush_insn_slot(p) do { } while (0) #define kretprobe_blacklist_size 0 -void kretprobe_trampoline(void); +void __kretprobe_trampoline(void); extern void arch_remove_kprobe(struct kprobe *p); /* Architecture specific copy of original instruction */ diff --git a/arch/powerpc/kernel/kprobes-ftrace.c b/arch/powerpc/kernel/kprobes-ftrace.c index 7154d58338cc..072ebe7f290b 100644 --- a/arch/powerpc/kernel/kprobes-ftrace.c +++ b/arch/powerpc/kernel/kprobes-ftrace.c @@ -26,7 +26,6 @@ void kprobe_ftrace_handler(unsigned long nip, unsigned long parent_nip, return; regs = ftrace_get_regs(fregs); - preempt_disable_notrace(); p = get_kprobe((kprobe_opcode_t *)nip); if (unlikely(!p) || kprobe_disabled(p)) goto out; @@ -61,7 +60,6 @@ void kprobe_ftrace_handler(unsigned long nip, unsigned long parent_nip, __this_cpu_write(current_kprobe, NULL); } out: - preempt_enable_notrace(); ftrace_test_recursion_unlock(bit); } NOKPROBE_SYMBOL(kprobe_ftrace_handler); diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c index 7a7cd6bda53e..86d77ff056a6 100644 --- a/arch/powerpc/kernel/kprobes.c +++ b/arch/powerpc/kernel/kprobes.c @@ -237,7 +237,7 @@ void arch_prepare_kretprobe(struct kretprobe_instance *ri, struct pt_regs *regs) ri->fp = NULL; /* Replace the return addr with trampoline addr */ - regs->link = (unsigned long)kretprobe_trampoline; + regs->link = (unsigned long)__kretprobe_trampoline; } NOKPROBE_SYMBOL(arch_prepare_kretprobe); @@ -403,12 +403,12 @@ NOKPROBE_SYMBOL(kprobe_handler); * - When the probed function returns, this probe * causes the handlers to fire */ -asm(".global kretprobe_trampoline\n" - ".type kretprobe_trampoline, @function\n" - "kretprobe_trampoline:\n" +asm(".global __kretprobe_trampoline\n" + ".type __kretprobe_trampoline, @function\n" + "__kretprobe_trampoline:\n" "nop\n" "blr\n" - ".size kretprobe_trampoline, .-kretprobe_trampoline\n"); + ".size __kretprobe_trampoline, .-__kretprobe_trampoline\n"); /* * Called when the probe at kretprobe trampoline is hit @@ -417,7 +417,7 @@ static int trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs) { unsigned long orig_ret_address; - orig_ret_address = __kretprobe_trampoline_handler(regs, &kretprobe_trampoline, NULL); + orig_ret_address = __kretprobe_trampoline_handler(regs, NULL); /* * We get here through one of two paths: * 1. by taking a trap -> kprobe_handler() -> here @@ -427,7 +427,7 @@ static int trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs) * as it is used to determine the return address from the trap. * For (2), since nip is not honoured with optprobes, we instead setup * the link register properly so that the subsequent 'blr' in - * kretprobe_trampoline jumps back to the right instruction. + * __kretprobe_trampoline jumps back to the right instruction. * * For nip, we should set the address to the previous instruction since * we end up emulating it in kprobe_handler(), which increments the nip @@ -542,19 +542,8 @@ int kprobe_fault_handler(struct pt_regs *regs, int trapnr) } NOKPROBE_SYMBOL(kprobe_fault_handler); -unsigned long arch_deref_entry_point(void *entry) -{ -#ifdef PPC64_ELF_ABI_v1 - if (!kernel_text_address((unsigned long)entry)) - return ppc_global_function_entry(entry); - else -#endif - return (unsigned long)entry; -} -NOKPROBE_SYMBOL(arch_deref_entry_point); - static struct kprobe trampoline_p = { - .addr = (kprobe_opcode_t *) &kretprobe_trampoline, + .addr = (kprobe_opcode_t *) &__kretprobe_trampoline, .pre_handler = trampoline_probe_handler }; @@ -565,7 +554,7 @@ int __init arch_init_kprobes(void) int arch_trampoline_kprobe(struct kprobe *p) { - if (p->addr == (kprobe_opcode_t *)&kretprobe_trampoline) + if (p->addr == (kprobe_opcode_t *)&__kretprobe_trampoline) return 1; return 0; diff --git a/arch/powerpc/kernel/optprobes.c b/arch/powerpc/kernel/optprobes.c index c79899abcec8..ce1903064031 100644 --- a/arch/powerpc/kernel/optprobes.c +++ b/arch/powerpc/kernel/optprobes.c @@ -56,7 +56,7 @@ static unsigned long can_optimize(struct kprobe *p) * has a 'nop' instruction, which can be emulated. * So further checks can be skipped. */ - if (p->addr == (kprobe_opcode_t *)&kretprobe_trampoline) + if (p->addr == (kprobe_opcode_t *)&__kretprobe_trampoline) return addr + sizeof(kprobe_opcode_t); /* @@ -301,8 +301,8 @@ void arch_unoptimize_kprobes(struct list_head *oplist, struct list_head *done_li } } -int arch_within_optimized_kprobe(struct optimized_kprobe *op, unsigned long addr) +int arch_within_optimized_kprobe(struct optimized_kprobe *op, kprobe_opcode_t *addr) { - return ((unsigned long)op->kp.addr <= addr && - (unsigned long)op->kp.addr + RELATIVEJUMP_SIZE > addr); + return (op->kp.addr <= addr && + op->kp.addr + (RELATIVEJUMP_SIZE / sizeof(kprobe_opcode_t)) > addr); } diff --git a/arch/powerpc/kernel/stacktrace.c b/arch/powerpc/kernel/stacktrace.c index 9e4a4a7af380..a2443d61728e 100644 --- a/arch/powerpc/kernel/stacktrace.c +++ b/arch/powerpc/kernel/stacktrace.c @@ -155,7 +155,7 @@ int __no_sanitize_address arch_stack_walk_reliable(stack_trace_consume_fn consum * Mark stacktraces with kretprobed functions on them * as unreliable. */ - if (ip == (unsigned long)kretprobe_trampoline) + if (ip == (unsigned long)__kretprobe_trampoline) return -EINVAL; #endif diff --git a/arch/riscv/include/asm/kprobes.h b/arch/riscv/include/asm/kprobes.h index 9ea9b5ec3113..217ef89f22b9 100644 --- a/arch/riscv/include/asm/kprobes.h +++ b/arch/riscv/include/asm/kprobes.h @@ -40,7 +40,7 @@ void arch_remove_kprobe(struct kprobe *p); int kprobe_fault_handler(struct pt_regs *regs, unsigned int trapnr); bool kprobe_breakpoint_handler(struct pt_regs *regs); bool kprobe_single_step_handler(struct pt_regs *regs); -void kretprobe_trampoline(void); +void __kretprobe_trampoline(void); void __kprobes *trampoline_probe_handler(struct pt_regs *regs); #endif /* CONFIG_KPROBES */ diff --git a/arch/riscv/kernel/ftrace.c b/arch/riscv/kernel/ftrace.c index 7f1e5203de88..4716f4cdc038 100644 --- a/arch/riscv/kernel/ftrace.c +++ b/arch/riscv/kernel/ftrace.c @@ -154,11 +154,6 @@ int ftrace_update_ftrace_func(ftrace_func_t func) return ret; } - -int __init ftrace_dyn_arch_init(void) -{ - return 0; -} #endif #ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS diff --git a/arch/riscv/kernel/probes/ftrace.c b/arch/riscv/kernel/probes/ftrace.c index aab85a82f419..7142ec42e889 100644 --- a/arch/riscv/kernel/probes/ftrace.c +++ b/arch/riscv/kernel/probes/ftrace.c @@ -15,7 +15,6 @@ void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip, if (bit < 0) return; - preempt_disable_notrace(); p = get_kprobe((kprobe_opcode_t *)ip); if (unlikely(!p) || kprobe_disabled(p)) goto out; @@ -52,7 +51,6 @@ void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip, __this_cpu_write(current_kprobe, NULL); } out: - preempt_enable_notrace(); ftrace_test_recursion_unlock(bit); } NOKPROBE_SYMBOL(kprobe_ftrace_handler); diff --git a/arch/riscv/kernel/probes/kprobes.c b/arch/riscv/kernel/probes/kprobes.c index 00088dc6da4b..e6e950b7cf32 100644 --- a/arch/riscv/kernel/probes/kprobes.c +++ b/arch/riscv/kernel/probes/kprobes.c @@ -1,5 +1,7 @@ // SPDX-License-Identifier: GPL-2.0+ +#define pr_fmt(fmt) "kprobes: " fmt + #include <linux/kprobes.h> #include <linux/extable.h> #include <linux/slab.h> @@ -50,11 +52,8 @@ int __kprobes arch_prepare_kprobe(struct kprobe *p) { unsigned long probe_addr = (unsigned long)p->addr; - if (probe_addr & 0x1) { - pr_warn("Address not aligned.\n"); - - return -EINVAL; - } + if (probe_addr & 0x1) + return -EILSEQ; /* copy instruction */ p->opcode = *p->addr; @@ -191,7 +190,7 @@ static int __kprobes reenter_kprobe(struct kprobe *p, break; case KPROBE_HIT_SS: case KPROBE_REENTER: - pr_warn("Unrecoverable kprobe detected.\n"); + pr_warn("Failed to recover from reentered kprobes.\n"); dump_kprobe(p); BUG(); break; @@ -348,7 +347,7 @@ int __init arch_populate_kprobe_blacklist(void) void __kprobes __used *trampoline_probe_handler(struct pt_regs *regs) { - return (void *)kretprobe_trampoline_handler(regs, &kretprobe_trampoline, NULL); + return (void *)kretprobe_trampoline_handler(regs, NULL); } void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri, @@ -356,7 +355,7 @@ void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri, { ri->ret_addr = (kprobe_opcode_t *)regs->ra; ri->fp = NULL; - regs->ra = (unsigned long) &kretprobe_trampoline; + regs->ra = (unsigned long) &__kretprobe_trampoline; } int __kprobes arch_trampoline_kprobe(struct kprobe *p) diff --git a/arch/riscv/kernel/probes/kprobes_trampoline.S b/arch/riscv/kernel/probes/kprobes_trampoline.S index 6e85d021e2a2..7bdb09ded39b 100644 --- a/arch/riscv/kernel/probes/kprobes_trampoline.S +++ b/arch/riscv/kernel/probes/kprobes_trampoline.S @@ -75,7 +75,7 @@ REG_L x31, PT_T6(sp) .endm -ENTRY(kretprobe_trampoline) +ENTRY(__kretprobe_trampoline) addi sp, sp, -(PT_SIZE_ON_STACK) save_all_base_regs @@ -90,4 +90,4 @@ ENTRY(kretprobe_trampoline) addi sp, sp, PT_SIZE_ON_STACK ret -ENDPROC(kretprobe_trampoline) +ENDPROC(__kretprobe_trampoline) diff --git a/arch/s390/include/asm/kprobes.h b/arch/s390/include/asm/kprobes.h index 09cdb632a490..5eb722c984e4 100644 --- a/arch/s390/include/asm/kprobes.h +++ b/arch/s390/include/asm/kprobes.h @@ -70,7 +70,7 @@ struct kprobe_ctlblk { }; void arch_remove_kprobe(struct kprobe *p); -void kretprobe_trampoline(void); +void __kretprobe_trampoline(void); int kprobe_fault_handler(struct pt_regs *regs, int trapnr); int kprobe_exceptions_notify(struct notifier_block *self, diff --git a/arch/s390/kernel/ftrace.c b/arch/s390/kernel/ftrace.c index 1d94ffdf347b..5165bf344f95 100644 --- a/arch/s390/kernel/ftrace.c +++ b/arch/s390/kernel/ftrace.c @@ -262,11 +262,6 @@ int ftrace_update_ftrace_func(ftrace_func_t func) return 0; } -int __init ftrace_dyn_arch_init(void) -{ - return 0; -} - void arch_ftrace_update_code(int command) { if (ftrace_shared_hotpatch_trampoline(NULL)) diff --git a/arch/s390/kernel/kprobes.c b/arch/s390/kernel/kprobes.c index 52d056a5f89f..c505c0ee5f47 100644 --- a/arch/s390/kernel/kprobes.c +++ b/arch/s390/kernel/kprobes.c @@ -7,6 +7,8 @@ * s390 port, used ppc64 as template. Mike Grundy <grundym@us.ibm.com> */ +#define pr_fmt(fmt) "kprobes: " fmt + #include <linux/moduleloader.h> #include <linux/kprobes.h> #include <linux/ptrace.h> @@ -240,7 +242,7 @@ void arch_prepare_kretprobe(struct kretprobe_instance *ri, struct pt_regs *regs) ri->fp = NULL; /* Replace the return addr with trampoline addr */ - regs->gprs[14] = (unsigned long) &kretprobe_trampoline; + regs->gprs[14] = (unsigned long) &__kretprobe_trampoline; } NOKPROBE_SYMBOL(arch_prepare_kretprobe); @@ -259,7 +261,7 @@ static void kprobe_reenter_check(struct kprobe_ctlblk *kcb, struct kprobe *p) * is a BUG. The code path resides in the .kprobes.text * section and is executed with interrupts disabled. */ - pr_err("Invalid kprobe detected.\n"); + pr_err("Failed to recover from reentered kprobes.\n"); dump_kprobe(p); BUG(); } @@ -332,8 +334,8 @@ NOKPROBE_SYMBOL(kprobe_handler); */ static void __used kretprobe_trampoline_holder(void) { - asm volatile(".global kretprobe_trampoline\n" - "kretprobe_trampoline: bcr 0,0\n"); + asm volatile(".global __kretprobe_trampoline\n" + "__kretprobe_trampoline: bcr 0,0\n"); } /* @@ -341,7 +343,7 @@ static void __used kretprobe_trampoline_holder(void) */ static int trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs) { - regs->psw.addr = __kretprobe_trampoline_handler(regs, &kretprobe_trampoline, NULL); + regs->psw.addr = __kretprobe_trampoline_handler(regs, NULL); /* * By returning a non-zero value, we are telling * kprobe_handler() that we don't want the post_handler @@ -507,7 +509,7 @@ int kprobe_exceptions_notify(struct notifier_block *self, NOKPROBE_SYMBOL(kprobe_exceptions_notify); static struct kprobe trampoline = { - .addr = (kprobe_opcode_t *) &kretprobe_trampoline, + .addr = (kprobe_opcode_t *) &__kretprobe_trampoline, .pre_handler = trampoline_probe_handler }; @@ -518,6 +520,6 @@ int __init arch_init_kprobes(void) int arch_trampoline_kprobe(struct kprobe *p) { - return p->addr == (kprobe_opcode_t *) &kretprobe_trampoline; + return p->addr == (kprobe_opcode_t *) &__kretprobe_trampoline; } NOKPROBE_SYMBOL(arch_trampoline_kprobe); diff --git a/arch/s390/kernel/stacktrace.c b/arch/s390/kernel/stacktrace.c index 101477b3e263..b7bb1981e9ee 100644 --- a/arch/s390/kernel/stacktrace.c +++ b/arch/s390/kernel/stacktrace.c @@ -46,7 +46,7 @@ int arch_stack_walk_reliable(stack_trace_consume_fn consume_entry, * Mark stacktraces with kretprobed functions on them * as unreliable. */ - if (state.ip == (unsigned long)kretprobe_trampoline) + if (state.ip == (unsigned long)__kretprobe_trampoline) return -EINVAL; #endif diff --git a/arch/sh/boot/compressed/misc.c b/arch/sh/boot/compressed/misc.c index a03b6680a9d9..ca05c99a3d5b 100644 --- a/arch/sh/boot/compressed/misc.c +++ b/arch/sh/boot/compressed/misc.c @@ -115,6 +115,9 @@ void __stack_chk_fail(void) void ftrace_stub(void) { } +void arch_ftrace_ops_list_func(void) +{ +} #define stackalign 4 diff --git a/arch/sh/include/asm/kprobes.h b/arch/sh/include/asm/kprobes.h index 6171682f7798..eeba83e0a7d2 100644 --- a/arch/sh/include/asm/kprobes.h +++ b/arch/sh/include/asm/kprobes.h @@ -26,7 +26,7 @@ typedef insn_size_t kprobe_opcode_t; struct kprobe; void arch_remove_kprobe(struct kprobe *); -void kretprobe_trampoline(void); +void __kretprobe_trampoline(void); /* Architecture specific copy of original instruction*/ struct arch_specific_insn { diff --git a/arch/sh/kernel/ftrace.c b/arch/sh/kernel/ftrace.c index 295c43315bbe..930001bb8c6a 100644 --- a/arch/sh/kernel/ftrace.c +++ b/arch/sh/kernel/ftrace.c @@ -252,11 +252,6 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) return ftrace_modify_code(rec->ip, old, new); } - -int __init ftrace_dyn_arch_init(void) -{ - return 0; -} #endif /* CONFIG_DYNAMIC_FTRACE */ #ifdef CONFIG_FUNCTION_GRAPH_TRACER diff --git a/arch/sh/kernel/kprobes.c b/arch/sh/kernel/kprobes.c index 1c7f358ef0be..aed1ea8e2c2f 100644 --- a/arch/sh/kernel/kprobes.c +++ b/arch/sh/kernel/kprobes.c @@ -207,7 +207,7 @@ void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri, ri->fp = NULL; /* Replace the return addr with trampoline addr */ - regs->pr = (unsigned long)kretprobe_trampoline; + regs->pr = (unsigned long)__kretprobe_trampoline; } static int __kprobes kprobe_handler(struct pt_regs *regs) @@ -293,17 +293,17 @@ no_kprobe: */ static void __used kretprobe_trampoline_holder(void) { - asm volatile (".globl kretprobe_trampoline\n" - "kretprobe_trampoline:\n\t" + asm volatile (".globl __kretprobe_trampoline\n" + "__kretprobe_trampoline:\n\t" "nop\n"); } /* - * Called when we hit the probe point at kretprobe_trampoline + * Called when we hit the probe point at __kretprobe_trampoline */ int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs) { - regs->pc = __kretprobe_trampoline_handler(regs, &kretprobe_trampoline, NULL); + regs->pc = __kretprobe_trampoline_handler(regs, NULL); return 1; } @@ -442,7 +442,7 @@ int __kprobes kprobe_exceptions_notify(struct notifier_block *self, } static struct kprobe trampoline_p = { - .addr = (kprobe_opcode_t *)&kretprobe_trampoline, + .addr = (kprobe_opcode_t *)&__kretprobe_trampoline, .pre_handler = trampoline_probe_handler }; diff --git a/arch/sparc/include/asm/kprobes.h b/arch/sparc/include/asm/kprobes.h index bfcaa6326c20..06c2bc767ef7 100644 --- a/arch/sparc/include/asm/kprobes.h +++ b/arch/sparc/include/asm/kprobes.h @@ -24,7 +24,7 @@ do { flushi(&(p)->ainsn.insn[0]); \ flushi(&(p)->ainsn.insn[1]); \ } while (0) -void kretprobe_trampoline(void); +void __kretprobe_trampoline(void); /* Architecture specific copy of original instruction*/ struct arch_specific_insn { diff --git a/arch/sparc/kernel/ftrace.c b/arch/sparc/kernel/ftrace.c index 684b84ce397f..eaead3da8e03 100644 --- a/arch/sparc/kernel/ftrace.c +++ b/arch/sparc/kernel/ftrace.c @@ -82,11 +82,6 @@ int ftrace_update_ftrace_func(ftrace_func_t func) new = ftrace_call_replace(ip, (unsigned long)func); return ftrace_modify_code(ip, old, new); } - -int __init ftrace_dyn_arch_init(void) -{ - return 0; -} #endif #ifdef CONFIG_FUNCTION_GRAPH_TRACER diff --git a/arch/sparc/kernel/kprobes.c b/arch/sparc/kernel/kprobes.c index 4c05a4ee6a0e..535c7b35cb59 100644 --- a/arch/sparc/kernel/kprobes.c +++ b/arch/sparc/kernel/kprobes.c @@ -440,7 +440,7 @@ void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri, /* Replace the return addr with trampoline addr */ regs->u_regs[UREG_RETPC] = - ((unsigned long)kretprobe_trampoline) - 8; + ((unsigned long)__kretprobe_trampoline) - 8; } /* @@ -451,7 +451,7 @@ static int __kprobes trampoline_probe_handler(struct kprobe *p, { unsigned long orig_ret_address = 0; - orig_ret_address = __kretprobe_trampoline_handler(regs, &kretprobe_trampoline, NULL); + orig_ret_address = __kretprobe_trampoline_handler(regs, NULL); regs->tpc = orig_ret_address; regs->tnpc = orig_ret_address + 4; @@ -465,13 +465,13 @@ static int __kprobes trampoline_probe_handler(struct kprobe *p, static void __used kretprobe_trampoline_holder(void) { - asm volatile(".global kretprobe_trampoline\n" - "kretprobe_trampoline:\n" + asm volatile(".global __kretprobe_trampoline\n" + "__kretprobe_trampoline:\n" "\tnop\n" "\tnop\n"); } static struct kprobe trampoline_p = { - .addr = (kprobe_opcode_t *) &kretprobe_trampoline, + .addr = (kprobe_opcode_t *) &__kretprobe_trampoline, .pre_handler = trampoline_probe_handler }; @@ -482,7 +482,7 @@ int __init arch_init_kprobes(void) int __kprobes arch_trampoline_kprobe(struct kprobe *p) { - if (p->addr == (kprobe_opcode_t *)&kretprobe_trampoline) + if (p->addr == (kprobe_opcode_t *)&__kretprobe_trampoline) return 1; return 0; diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 5ddfbbe5c51e..b1d4b481fcdd 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -61,6 +61,7 @@ config X86 select ACPI_SYSTEM_POWER_STATES_SUPPORT if ACPI select ARCH_32BIT_OFF_T if X86_32 select ARCH_CLOCKSOURCE_INIT + select ARCH_CORRECT_STACKTRACE_ON_KRETPROBE select ARCH_ENABLE_HUGEPAGE_MIGRATION if X86_64 && HUGETLB_PAGE && MIGRATION select ARCH_ENABLE_MEMORY_HOTPLUG if X86_64 || (X86_32 && HIGHMEM) select ARCH_ENABLE_MEMORY_HOTREMOVE if MEMORY_HOTPLUG @@ -198,7 +199,7 @@ config X86 select HAVE_FAST_GUP select HAVE_FENTRY if X86_64 || DYNAMIC_FTRACE select HAVE_FTRACE_MCOUNT_RECORD - select HAVE_FUNCTION_GRAPH_TRACER + select HAVE_FUNCTION_GRAPH_TRACER if X86_32 || (X86_64 && DYNAMIC_FTRACE) select HAVE_FUNCTION_TRACER select HAVE_GCC_PLUGINS select HAVE_HW_BREAKPOINT diff --git a/arch/x86/include/asm/ftrace.h b/arch/x86/include/asm/ftrace.h index 9f3130f40807..024d9797646e 100644 --- a/arch/x86/include/asm/ftrace.h +++ b/arch/x86/include/asm/ftrace.h @@ -57,6 +57,13 @@ arch_ftrace_get_regs(struct ftrace_regs *fregs) #define ftrace_instruction_pointer_set(fregs, _ip) \ do { (fregs)->regs.ip = (_ip); } while (0) + +struct ftrace_ops; +#define ftrace_graph_func ftrace_graph_func +void ftrace_graph_func(unsigned long ip, unsigned long parent_ip, + struct ftrace_ops *op, struct ftrace_regs *fregs); +#else +#define FTRACE_GRAPH_TRAMP_ADDR FTRACE_GRAPH_ADDR #endif #ifdef CONFIG_DYNAMIC_FTRACE @@ -65,8 +72,6 @@ struct dyn_arch_ftrace { /* No extra data needed for x86 */ }; -#define FTRACE_GRAPH_TRAMP_ADDR FTRACE_GRAPH_ADDR - #endif /* CONFIG_DYNAMIC_FTRACE */ #endif /* __ASSEMBLY__ */ #endif /* CONFIG_FUNCTION_TRACER */ diff --git a/arch/x86/include/asm/kprobes.h b/arch/x86/include/asm/kprobes.h index bd7f5886a789..71ea2eab43d5 100644 --- a/arch/x86/include/asm/kprobes.h +++ b/arch/x86/include/asm/kprobes.h @@ -49,7 +49,6 @@ extern __visible kprobe_opcode_t optprobe_template_end[]; extern const int kretprobe_blacklist_size; void arch_remove_kprobe(struct kprobe *p); -asmlinkage void kretprobe_trampoline(void); extern void arch_kprobe_override_function(struct pt_regs *regs); diff --git a/arch/x86/include/asm/unwind.h b/arch/x86/include/asm/unwind.h index 70fc159ebe69..2a1f8734416d 100644 --- a/arch/x86/include/asm/unwind.h +++ b/arch/x86/include/asm/unwind.h @@ -4,6 +4,7 @@ #include <linux/sched.h> #include <linux/ftrace.h> +#include <linux/kprobes.h> #include <asm/ptrace.h> #include <asm/stacktrace.h> @@ -15,6 +16,9 @@ struct unwind_state { unsigned long stack_mask; struct task_struct *task; int graph_idx; +#ifdef CONFIG_KRETPROBES + struct llist_node *kr_cur; +#endif bool error; #if defined(CONFIG_UNWINDER_ORC) bool signal, full_regs; @@ -99,6 +103,31 @@ void unwind_module_init(struct module *mod, void *orc_ip, size_t orc_ip_size, void *orc, size_t orc_size) {} #endif +static inline +unsigned long unwind_recover_kretprobe(struct unwind_state *state, + unsigned long addr, unsigned long *addr_p) +{ +#ifdef CONFIG_KRETPROBES + return is_kretprobe_trampoline(addr) ? + kretprobe_find_ret_addr(state->task, addr_p, &state->kr_cur) : + addr; +#else + return addr; +#endif +} + +/* Recover the return address modified by kretprobe and ftrace_graph. */ +static inline +unsigned long unwind_recover_ret_addr(struct unwind_state *state, + unsigned long addr, unsigned long *addr_p) +{ + unsigned long ret; + + ret = ftrace_graph_ret_addr(state->task, &state->graph_idx, + addr, addr_p); + return unwind_recover_kretprobe(state, ret, addr_p); +} + /* * This disables KASAN checking when reading a value from another task's stack, * since the other task could be running on another CPU and could have poisoned diff --git a/arch/x86/include/asm/unwind_hints.h b/arch/x86/include/asm/unwind_hints.h index 8e574c0afef8..8b33674288ea 100644 --- a/arch/x86/include/asm/unwind_hints.h +++ b/arch/x86/include/asm/unwind_hints.h @@ -52,6 +52,11 @@ UNWIND_HINT sp_reg=ORC_REG_SP sp_offset=8 type=UNWIND_HINT_TYPE_FUNC .endm +#else + +#define UNWIND_HINT_FUNC \ + UNWIND_HINT(ORC_REG_SP, 8, UNWIND_HINT_TYPE_FUNC, 0) + #endif /* __ASSEMBLY__ */ #endif /* _ASM_X86_UNWIND_HINTS_H */ diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index 1b3ce3b4a2a2..c39f906cdc4e 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -252,11 +252,6 @@ void arch_ftrace_update_code(int command) ftrace_modify_all_code(command); } -int __init ftrace_dyn_arch_init(void) -{ - return 0; -} - /* Currently only x86_64 supports dynamic trampolines */ #ifdef CONFIG_X86_64 @@ -527,7 +522,7 @@ static void *addr_from_call(void *ptr) return ptr + CALL_INSN_SIZE + call.disp; } -void prepare_ftrace_return(unsigned long self_addr, unsigned long *parent, +void prepare_ftrace_return(unsigned long ip, unsigned long *parent, unsigned long frame_pointer); /* @@ -541,7 +536,8 @@ static void *static_tramp_func(struct ftrace_ops *ops, struct dyn_ftrace *rec) void *ptr; if (ops && ops->trampoline) { -#ifdef CONFIG_FUNCTION_GRAPH_TRACER +#if !defined(CONFIG_HAVE_DYNAMIC_FTRACE_WITH_ARGS) && \ + defined(CONFIG_FUNCTION_GRAPH_TRACER) /* * We only know about function graph tracer setting as static * trampoline. @@ -589,8 +585,9 @@ void arch_ftrace_trampoline_free(struct ftrace_ops *ops) #ifdef CONFIG_FUNCTION_GRAPH_TRACER #ifdef CONFIG_DYNAMIC_FTRACE -extern void ftrace_graph_call(void); +#ifndef CONFIG_HAVE_DYNAMIC_FTRACE_WITH_ARGS +extern void ftrace_graph_call(void); static const char *ftrace_jmp_replace(unsigned long ip, unsigned long addr) { return text_gen_insn(JMP32_INSN_OPCODE, (void *)ip, (void *)addr); @@ -618,19 +615,28 @@ int ftrace_disable_ftrace_graph_caller(void) return ftrace_mod_jmp(ip, &ftrace_stub); } +#else /* !CONFIG_HAVE_DYNAMIC_FTRACE_WITH_ARGS */ +int ftrace_enable_ftrace_graph_caller(void) +{ + return 0; +} +int ftrace_disable_ftrace_graph_caller(void) +{ + return 0; +} +#endif /* CONFIG_HAVE_DYNAMIC_FTRACE_WITH_ARGS */ #endif /* !CONFIG_DYNAMIC_FTRACE */ /* * Hook the return address and push it in the stack of return addrs * in current thread info. */ -void prepare_ftrace_return(unsigned long self_addr, unsigned long *parent, +void prepare_ftrace_return(unsigned long ip, unsigned long *parent, unsigned long frame_pointer) { unsigned long return_hooker = (unsigned long)&return_to_handler; - unsigned long old; - int faulted; + int bit; /* * When resuming from suspend-to-ram, this function can be indirectly @@ -650,37 +656,25 @@ void prepare_ftrace_return(unsigned long self_addr, unsigned long *parent, if (unlikely(atomic_read(¤t->tracing_graph_pause))) return; - /* - * Protect against fault, even if it shouldn't - * happen. This tool is too much intrusive to - * ignore such a protection. - */ - asm volatile( - "1: " _ASM_MOV " (%[parent]), %[old]\n" - "2: " _ASM_MOV " %[return_hooker], (%[parent])\n" - " movl $0, %[faulted]\n" - "3:\n" - - ".section .fixup, \"ax\"\n" - "4: movl $1, %[faulted]\n" - " jmp 3b\n" - ".previous\n" - - _ASM_EXTABLE(1b, 4b) - _ASM_EXTABLE(2b, 4b) - - : [old] "=&r" (old), [faulted] "=r" (faulted) - : [parent] "r" (parent), [return_hooker] "r" (return_hooker) - : "memory" - ); - - if (unlikely(faulted)) { - ftrace_graph_stop(); - WARN_ON(1); + bit = ftrace_test_recursion_trylock(ip, *parent); + if (bit < 0) return; - } - if (function_graph_enter(old, self_addr, frame_pointer, parent)) - *parent = old; + if (!function_graph_enter(*parent, ip, frame_pointer, parent)) + *parent = return_hooker; + + ftrace_test_recursion_unlock(bit); } + +#ifdef CONFIG_HAVE_DYNAMIC_FTRACE_WITH_ARGS +void ftrace_graph_func(unsigned long ip, unsigned long parent_ip, + struct ftrace_ops *op, struct ftrace_regs *fregs) +{ + struct pt_regs *regs = &fregs->regs; + unsigned long *stack = (unsigned long *)kernel_stack_pointer(regs); + + prepare_ftrace_return(ip, (unsigned long *)stack, 0); +} +#endif + #endif /* CONFIG_FUNCTION_GRAPH_TRACER */ diff --git a/arch/x86/kernel/ftrace_64.S b/arch/x86/kernel/ftrace_64.S index 7c273846c687..7a879901f103 100644 --- a/arch/x86/kernel/ftrace_64.S +++ b/arch/x86/kernel/ftrace_64.S @@ -174,11 +174,6 @@ SYM_INNER_LABEL(ftrace_caller_end, SYM_L_GLOBAL) SYM_FUNC_END(ftrace_caller); SYM_FUNC_START(ftrace_epilogue) -#ifdef CONFIG_FUNCTION_GRAPH_TRACER -SYM_INNER_LABEL(ftrace_graph_call, SYM_L_GLOBAL) - jmp ftrace_stub -#endif - /* * This is weak to keep gas from relaxing the jumps. * It is also used to copy the retq for trampolines. @@ -251,7 +246,6 @@ SYM_INNER_LABEL(ftrace_regs_call, SYM_L_GLOBAL) * If ORIG_RAX is anything but zero, make this a call to that. * See arch_ftrace_set_direct_caller(). */ - movq ORIG_RAX(%rsp), %rax testq %rax, %rax SYM_INNER_LABEL(ftrace_regs_caller_jmp, SYM_L_GLOBAL) jnz 1f @@ -289,15 +283,6 @@ SYM_FUNC_START(__fentry__) cmpq $ftrace_stub, ftrace_trace_function jnz trace -fgraph_trace: -#ifdef CONFIG_FUNCTION_GRAPH_TRACER - cmpq $ftrace_stub, ftrace_graph_return - jnz ftrace_graph_caller - - cmpq $ftrace_graph_entry_stub, ftrace_graph_entry - jnz ftrace_graph_caller -#endif - SYM_INNER_LABEL(ftrace_stub, SYM_L_GLOBAL) retq @@ -315,25 +300,12 @@ trace: CALL_NOSPEC r8 restore_mcount_regs - jmp fgraph_trace + jmp ftrace_stub SYM_FUNC_END(__fentry__) EXPORT_SYMBOL(__fentry__) #endif /* CONFIG_DYNAMIC_FTRACE */ #ifdef CONFIG_FUNCTION_GRAPH_TRACER -SYM_FUNC_START(ftrace_graph_caller) - /* Saves rbp into %rdx and fills first parameter */ - save_mcount_regs - - leaq MCOUNT_REG_SIZE+8(%rsp), %rsi - movq $0, %rdx /* No framepointers needed */ - call prepare_ftrace_return - - restore_mcount_regs - - retq -SYM_FUNC_END(ftrace_graph_caller) - SYM_FUNC_START(return_to_handler) subq $24, %rsp diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c index b6e046e4b289..fce99e249d61 100644 --- a/arch/x86/kernel/kprobes/core.c +++ b/arch/x86/kernel/kprobes/core.c @@ -809,7 +809,7 @@ void arch_prepare_kretprobe(struct kretprobe_instance *ri, struct pt_regs *regs) ri->fp = sara; /* Replace the return addr with trampoline addr */ - *sara = (unsigned long) &kretprobe_trampoline; + *sara = (unsigned long) &__kretprobe_trampoline; } NOKPROBE_SYMBOL(arch_prepare_kretprobe); @@ -1019,52 +1019,91 @@ NOKPROBE_SYMBOL(kprobe_int3_handler); */ asm( ".text\n" - ".global kretprobe_trampoline\n" - ".type kretprobe_trampoline, @function\n" - "kretprobe_trampoline:\n" - /* We don't bother saving the ss register */ + ".global __kretprobe_trampoline\n" + ".type __kretprobe_trampoline, @function\n" + "__kretprobe_trampoline:\n" #ifdef CONFIG_X86_64 + /* Push a fake return address to tell the unwinder it's a kretprobe. */ + " pushq $__kretprobe_trampoline\n" + UNWIND_HINT_FUNC + /* Save the 'sp - 8', this will be fixed later. */ " pushq %rsp\n" " pushfq\n" SAVE_REGS_STRING " movq %rsp, %rdi\n" " call trampoline_handler\n" - /* Replace saved sp with true return address. */ - " movq %rax, 19*8(%rsp)\n" RESTORE_REGS_STRING + /* In trampoline_handler(), 'regs->flags' is copied to 'regs->sp'. */ + " addq $8, %rsp\n" " popfq\n" #else + /* Push a fake return address to tell the unwinder it's a kretprobe. */ + " pushl $__kretprobe_trampoline\n" + UNWIND_HINT_FUNC + /* Save the 'sp - 4', this will be fixed later. */ " pushl %esp\n" " pushfl\n" SAVE_REGS_STRING " movl %esp, %eax\n" " call trampoline_handler\n" - /* Replace saved sp with true return address. */ - " movl %eax, 15*4(%esp)\n" RESTORE_REGS_STRING + /* In trampoline_handler(), 'regs->flags' is copied to 'regs->sp'. */ + " addl $4, %esp\n" " popfl\n" #endif " ret\n" - ".size kretprobe_trampoline, .-kretprobe_trampoline\n" + ".size __kretprobe_trampoline, .-__kretprobe_trampoline\n" ); -NOKPROBE_SYMBOL(kretprobe_trampoline); -STACK_FRAME_NON_STANDARD(kretprobe_trampoline); +NOKPROBE_SYMBOL(__kretprobe_trampoline); +/* + * __kretprobe_trampoline() skips updating frame pointer. The frame pointer + * saved in trampoline_handler() points to the real caller function's + * frame pointer. Thus the __kretprobe_trampoline() doesn't have a + * standard stack frame with CONFIG_FRAME_POINTER=y. + * Let's mark it non-standard function. Anyway, FP unwinder can correctly + * unwind without the hint. + */ +STACK_FRAME_NON_STANDARD_FP(__kretprobe_trampoline); + +/* This is called from kretprobe_trampoline_handler(). */ +void arch_kretprobe_fixup_return(struct pt_regs *regs, + kprobe_opcode_t *correct_ret_addr) +{ + unsigned long *frame_pointer = ®s->sp + 1; + /* Replace fake return address with real one. */ + *frame_pointer = (unsigned long)correct_ret_addr; +} /* - * Called from kretprobe_trampoline + * Called from __kretprobe_trampoline */ -__used __visible void *trampoline_handler(struct pt_regs *regs) +__used __visible void trampoline_handler(struct pt_regs *regs) { + unsigned long *frame_pointer; + /* fixup registers */ regs->cs = __KERNEL_CS; #ifdef CONFIG_X86_32 regs->gs = 0; #endif - regs->ip = (unsigned long)&kretprobe_trampoline; + regs->ip = (unsigned long)&__kretprobe_trampoline; regs->orig_ax = ~0UL; + regs->sp += sizeof(long); + frame_pointer = ®s->sp + 1; - return (void *)kretprobe_trampoline_handler(regs, &kretprobe_trampoline, ®s->sp); + /* + * The return address at 'frame_pointer' is recovered by the + * arch_kretprobe_fixup_return() which called from the + * kretprobe_trampoline_handler(). + */ + kretprobe_trampoline_handler(regs, frame_pointer); + + /* + * Copy FLAGS to 'pt_regs::sp' so that __kretprobe_trapmoline() + * can do RET right after POPF. + */ + regs->sp = regs->flags; } NOKPROBE_SYMBOL(trampoline_handler); diff --git a/arch/x86/kernel/kprobes/ftrace.c b/arch/x86/kernel/kprobes/ftrace.c index 596de2f6d3a5..dd2ec14adb77 100644 --- a/arch/x86/kernel/kprobes/ftrace.c +++ b/arch/x86/kernel/kprobes/ftrace.c @@ -25,7 +25,6 @@ void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip, if (bit < 0) return; - preempt_disable_notrace(); p = get_kprobe((kprobe_opcode_t *)ip); if (unlikely(!p) || kprobe_disabled(p)) goto out; @@ -59,7 +58,6 @@ void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip, __this_cpu_write(current_kprobe, NULL); } out: - preempt_enable_notrace(); ftrace_test_recursion_unlock(bit); } NOKPROBE_SYMBOL(kprobe_ftrace_handler); diff --git a/arch/x86/kernel/kprobes/opt.c b/arch/x86/kernel/kprobes/opt.c index 71425ebba98a..b4a54a52aa59 100644 --- a/arch/x86/kernel/kprobes/opt.c +++ b/arch/x86/kernel/kprobes/opt.c @@ -367,10 +367,10 @@ int arch_check_optimized_kprobe(struct optimized_kprobe *op) /* Check the addr is within the optimized instructions. */ int arch_within_optimized_kprobe(struct optimized_kprobe *op, - unsigned long addr) + kprobe_opcode_t *addr) { - return ((unsigned long)op->kp.addr <= addr && - (unsigned long)op->kp.addr + op->optinsn.size > addr); + return (op->kp.addr <= addr && + op->kp.addr + op->optinsn.size > addr); } /* Free optimized instruction slot */ diff --git a/arch/x86/kernel/trace.c b/arch/x86/kernel/trace.c index 6b73b6f92ad3..8322e8352777 100644 --- a/arch/x86/kernel/trace.c +++ b/arch/x86/kernel/trace.c @@ -231,4 +231,4 @@ void osnoise_arch_unregister(void) unregister_trace_local_timer_exit(trace_intel_irq_exit, "local_timer"); unregister_trace_local_timer_entry(trace_intel_irq_entry, NULL); } -#endif /* CONFIG_OSNOISE_TRAECR && CONFIG_X86_LOCAL_APIC */ +#endif /* CONFIG_OSNOISE_TRACER && CONFIG_X86_LOCAL_APIC */ diff --git a/arch/x86/kernel/unwind_frame.c b/arch/x86/kernel/unwind_frame.c index d7c44b257f7f..8e1c50c86e5d 100644 --- a/arch/x86/kernel/unwind_frame.c +++ b/arch/x86/kernel/unwind_frame.c @@ -240,8 +240,7 @@ static bool update_stack_state(struct unwind_state *state, else { addr_p = unwind_get_return_address_ptr(state); addr = READ_ONCE_TASK_STACK(state->task, *addr_p); - state->ip = ftrace_graph_ret_addr(state->task, &state->graph_idx, - addr, addr_p); + state->ip = unwind_recover_ret_addr(state, addr, addr_p); } /* Save the original stack pointer for unwind_dump(): */ diff --git a/arch/x86/kernel/unwind_guess.c b/arch/x86/kernel/unwind_guess.c index c49f10ffd8cd..884d68a6e714 100644 --- a/arch/x86/kernel/unwind_guess.c +++ b/arch/x86/kernel/unwind_guess.c @@ -15,8 +15,7 @@ unsigned long unwind_get_return_address(struct unwind_state *state) addr = READ_ONCE_NOCHECK(*state->sp); - return ftrace_graph_ret_addr(state->task, &state->graph_idx, - addr, state->sp); + return unwind_recover_ret_addr(state, addr, state->sp); } EXPORT_SYMBOL_GPL(unwind_get_return_address); diff --git a/arch/x86/kernel/unwind_orc.c b/arch/x86/kernel/unwind_orc.c index a1202536fc57..e6f7592790af 100644 --- a/arch/x86/kernel/unwind_orc.c +++ b/arch/x86/kernel/unwind_orc.c @@ -534,9 +534,8 @@ bool unwind_next_frame(struct unwind_state *state) if (!deref_stack_reg(state, ip_p, &state->ip)) goto err; - state->ip = ftrace_graph_ret_addr(state->task, &state->graph_idx, - state->ip, (void *)ip_p); - + state->ip = unwind_recover_ret_addr(state, state->ip, + (unsigned long *)ip_p); state->sp = sp; state->regs = NULL; state->prev_regs = NULL; @@ -549,7 +548,18 @@ bool unwind_next_frame(struct unwind_state *state) (void *)orig_ip); goto err; } - + /* + * There is a small chance to interrupt at the entry of + * __kretprobe_trampoline() where the ORC info doesn't exist. + * That point is right after the RET to __kretprobe_trampoline() + * which was modified return address. + * At that point, the @addr_p of the unwind_recover_kretprobe() + * (this has to point the address of the stack entry storing + * the modified return address) must be "SP - (a stack entry)" + * because SP is incremented by the RET. + */ + state->ip = unwind_recover_kretprobe(state, state->ip, + (unsigned long *)(state->sp - sizeof(long))); state->regs = (struct pt_regs *)sp; state->prev_regs = NULL; state->full_regs = true; @@ -562,6 +572,9 @@ bool unwind_next_frame(struct unwind_state *state) (void *)orig_ip); goto err; } + /* See UNWIND_HINT_TYPE_REGS case comment. */ + state->ip = unwind_recover_kretprobe(state, state->ip, + (unsigned long *)(state->sp - sizeof(long))); if (state->full_regs) state->prev_regs = state->regs; diff --git a/fs/tracefs/inode.c b/fs/tracefs/inode.c index 1261e8b41edb..925a621b432e 100644 --- a/fs/tracefs/inode.c +++ b/fs/tracefs/inode.c @@ -432,7 +432,8 @@ static struct dentry *__create_dir(const char *name, struct dentry *parent, if (unlikely(!inode)) return failed_creating(dentry); - inode->i_mode = S_IFDIR | S_IRWXU | S_IRUGO | S_IXUGO; + /* Do not set bits for OTH */ + inode->i_mode = S_IFDIR | S_IRWXU | S_IRUSR| S_IRGRP | S_IXUSR | S_IXGRP; inode->i_op = ops; inode->i_fop = &simple_dir_operations; diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index f2984af2b85b..8771c435f34b 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -164,16 +164,22 @@ * Need to also make ftrace_stub_graph point to ftrace_stub * so that the same stub location may have different protocols * and not mess up with C verifiers. + * + * ftrace_ops_list_func will be defined as arch_ftrace_ops_list_func + * as some archs will have a different prototype for that function + * but ftrace_ops_list_func() will have a single prototype. */ #define MCOUNT_REC() . = ALIGN(8); \ __start_mcount_loc = .; \ KEEP(*(__mcount_loc)) \ KEEP(*(__patchable_function_entries)) \ __stop_mcount_loc = .; \ - ftrace_stub_graph = ftrace_stub; + ftrace_stub_graph = ftrace_stub; \ + ftrace_ops_list_func = arch_ftrace_ops_list_func; #else # ifdef CONFIG_FUNCTION_TRACER -# define MCOUNT_REC() ftrace_stub_graph = ftrace_stub; +# define MCOUNT_REC() ftrace_stub_graph = ftrace_stub; \ + ftrace_ops_list_func = arch_ftrace_ops_list_func; # else # define MCOUNT_REC() # endif diff --git a/include/linux/bootconfig.h b/include/linux/bootconfig.h index 537e1b991f11..a4665c7ab07c 100644 --- a/include/linux/bootconfig.h +++ b/include/linux/bootconfig.h @@ -7,8 +7,18 @@ * Author: Masami Hiramatsu <mhiramat@kernel.org> */ +#ifdef __KERNEL__ #include <linux/kernel.h> #include <linux/types.h> +#else /* !__KERNEL__ */ +/* + * NOTE: This is only for tools/bootconfig, because tools/bootconfig will + * run the parser sanity test. + * This does NOT mean linux/bootconfig.h is available in the user space. + * However, if you change this file, please make sure the tools/bootconfig + * has no issue on building and running. + */ +#endif #define BOOTCONFIG_MAGIC "#BOOTCONFIG\n" #define BOOTCONFIG_MAGIC_LEN 12 @@ -25,10 +35,10 @@ * The checksum will be used with the BOOTCONFIG_MAGIC and the size for * embedding the bootconfig in the initrd image. */ -static inline __init u32 xbc_calc_checksum(void *data, u32 size) +static inline __init uint32_t xbc_calc_checksum(void *data, uint32_t size) { unsigned char *p = data; - u32 ret = 0; + uint32_t ret = 0; while (size--) ret += *p++; @@ -38,10 +48,10 @@ static inline __init u32 xbc_calc_checksum(void *data, u32 size) /* XBC tree node */ struct xbc_node { - u16 next; - u16 child; - u16 parent; - u16 data; + uint16_t next; + uint16_t child; + uint16_t parent; + uint16_t data; } __attribute__ ((__packed__)); #define XBC_KEY 0 @@ -271,13 +281,12 @@ static inline int __init xbc_node_compose_key(struct xbc_node *node, } /* XBC node initializer */ -int __init xbc_init(char *buf, const char **emsg, int *epos); +int __init xbc_init(const char *buf, size_t size, const char **emsg, int *epos); +/* XBC node and size information */ +int __init xbc_get_info(int *node_size, size_t *data_size); /* XBC cleanup data structures */ -void __init xbc_destroy_all(void); - -/* Debug dump functions */ -void __init xbc_debug_dump(void); +void __init xbc_exit(void); #endif diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 832e65f06754..9999e29187de 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -30,16 +30,26 @@ #define ARCH_SUPPORTS_FTRACE_OPS 0 #endif +#ifdef CONFIG_FUNCTION_TRACER +struct ftrace_ops; +struct ftrace_regs; /* * If the arch's mcount caller does not support all of ftrace's * features, then it must call an indirect function that * does. Or at least does enough to prevent any unwelcome side effects. + * + * Also define the function prototype that these architectures use + * to call the ftrace_ops_list_func(). */ #if !ARCH_SUPPORTS_FTRACE_OPS # define FTRACE_FORCE_LIST_FUNC 1 +void arch_ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip); #else # define FTRACE_FORCE_LIST_FUNC 0 +void arch_ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip, + struct ftrace_ops *op, struct ftrace_regs *fregs); #endif +#endif /* CONFIG_FUNCTION_TRACER */ /* Main tracing buffer and events set up */ #ifdef CONFIG_TRACING @@ -88,8 +98,6 @@ extern int ftrace_enable_sysctl(struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos); -struct ftrace_ops; - #ifndef CONFIG_HAVE_DYNAMIC_FTRACE_WITH_ARGS struct ftrace_regs { @@ -316,7 +324,12 @@ int ftrace_modify_direct_caller(struct ftrace_func_entry *entry, unsigned long old_addr, unsigned long new_addr); unsigned long ftrace_find_rec_direct(unsigned long ip); +int register_ftrace_direct_multi(struct ftrace_ops *ops, unsigned long addr); +int unregister_ftrace_direct_multi(struct ftrace_ops *ops, unsigned long addr); +int modify_ftrace_direct_multi(struct ftrace_ops *ops, unsigned long addr); + #else +struct ftrace_ops; # define ftrace_direct_func_count 0 static inline int register_ftrace_direct(unsigned long ip, unsigned long addr) { @@ -346,6 +359,18 @@ static inline unsigned long ftrace_find_rec_direct(unsigned long ip) { return 0; } +static inline int register_ftrace_direct_multi(struct ftrace_ops *ops, unsigned long addr) +{ + return -ENODEV; +} +static inline int unregister_ftrace_direct_multi(struct ftrace_ops *ops, unsigned long addr) +{ + return -ENODEV; +} +static inline int modify_ftrace_direct_multi(struct ftrace_ops *ops, unsigned long addr) +{ + return -ENODEV; +} #endif /* CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS */ #ifndef CONFIG_HAVE_DYNAMIC_FTRACE_WITH_DIRECT_CALLS @@ -795,6 +820,15 @@ static inline bool is_ftrace_trampoline(unsigned long addr) } #endif /* CONFIG_DYNAMIC_FTRACE */ +#ifdef CONFIG_FUNCTION_GRAPH_TRACER +#ifndef ftrace_graph_func +#define ftrace_graph_func ftrace_stub +#define FTRACE_OPS_GRAPH_STUB FTRACE_OPS_FL_STUB +#else +#define FTRACE_OPS_GRAPH_STUB 0 +#endif +#endif /* CONFIG_FUNCTION_GRAPH_TRACER */ + /* totally disable ftrace - can not re-enable after this */ void ftrace_kill(void); diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h index e4f3bfe08757..e974caf39d3e 100644 --- a/include/linux/kprobes.h +++ b/include/linux/kprobes.h @@ -3,7 +3,6 @@ #define _LINUX_KPROBES_H /* * Kernel Probes (KProbes) - * include/linux/kprobes.h * * Copyright (C) IBM Corporation, 2002, 2004 * @@ -39,7 +38,7 @@ #define KPROBE_REENTER 0x00000004 #define KPROBE_HIT_SSDONE 0x00000008 -#else /* CONFIG_KPROBES */ +#else /* !CONFIG_KPROBES */ #include <asm-generic/kprobes.h> typedef int kprobe_opcode_t; struct arch_specific_insn { @@ -105,25 +104,25 @@ struct kprobe { #define KPROBE_FLAG_FTRACE 8 /* probe is using ftrace */ /* Has this kprobe gone ? */ -static inline int kprobe_gone(struct kprobe *p) +static inline bool kprobe_gone(struct kprobe *p) { return p->flags & KPROBE_FLAG_GONE; } /* Is this kprobe disabled ? */ -static inline int kprobe_disabled(struct kprobe *p) +static inline bool kprobe_disabled(struct kprobe *p) { return p->flags & (KPROBE_FLAG_DISABLED | KPROBE_FLAG_GONE); } /* Is this kprobe really running optimized path ? */ -static inline int kprobe_optimized(struct kprobe *p) +static inline bool kprobe_optimized(struct kprobe *p) { return p->flags & KPROBE_FLAG_OPTIMIZED; } /* Is this kprobe uses ftrace ? */ -static inline int kprobe_ftrace(struct kprobe *p) +static inline bool kprobe_ftrace(struct kprobe *p) { return p->flags & KPROBE_FLAG_FTRACE; } @@ -181,14 +180,6 @@ struct kprobe_blacklist_entry { DECLARE_PER_CPU(struct kprobe *, current_kprobe); DECLARE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk); -/* - * For #ifdef avoidance: - */ -static inline int kprobes_built_in(void) -{ - return 1; -} - extern void kprobe_busy_begin(void); extern void kprobe_busy_end(void); @@ -197,15 +188,26 @@ extern void arch_prepare_kretprobe(struct kretprobe_instance *ri, struct pt_regs *regs); extern int arch_trampoline_kprobe(struct kprobe *p); +void arch_kretprobe_fixup_return(struct pt_regs *regs, + kprobe_opcode_t *correct_ret_addr); + +void __kretprobe_trampoline(void); +/* + * Since some architecture uses structured function pointer, + * use dereference_function_descriptor() to get real function address. + */ +static nokprobe_inline void *kretprobe_trampoline_addr(void) +{ + return dereference_kernel_function_descriptor(__kretprobe_trampoline); +} + /* If the trampoline handler called from a kprobe, use this version */ unsigned long __kretprobe_trampoline_handler(struct pt_regs *regs, - void *trampoline_address, - void *frame_pointer); + void *frame_pointer); static nokprobe_inline unsigned long kretprobe_trampoline_handler(struct pt_regs *regs, - void *trampoline_address, - void *frame_pointer) + void *frame_pointer) { unsigned long ret; /* @@ -214,7 +216,7 @@ unsigned long kretprobe_trampoline_handler(struct pt_regs *regs, * be running at this point. */ kprobe_busy_begin(); - ret = __kretprobe_trampoline_handler(regs, trampoline_address, frame_pointer); + ret = __kretprobe_trampoline_handler(regs, frame_pointer); kprobe_busy_end(); return ret; @@ -228,7 +230,7 @@ static nokprobe_inline struct kretprobe *get_kretprobe(struct kretprobe_instance return READ_ONCE(ri->rph->rp); } -#else /* CONFIG_KRETPROBES */ +#else /* !CONFIG_KRETPROBES */ static inline void arch_prepare_kretprobe(struct kretprobe *rp, struct pt_regs *regs) { @@ -239,11 +241,15 @@ static inline int arch_trampoline_kprobe(struct kprobe *p) } #endif /* CONFIG_KRETPROBES */ +/* Markers of '_kprobe_blacklist' section */ +extern unsigned long __start_kprobe_blacklist[]; +extern unsigned long __stop_kprobe_blacklist[]; + extern struct kretprobe_blackpoint kretprobe_blacklist[]; #ifdef CONFIG_KPROBES_SANITY_TEST extern int init_test_probes(void); -#else +#else /* !CONFIG_KPROBES_SANITY_TEST */ static inline int init_test_probes(void) { return 0; @@ -303,7 +309,7 @@ static inline bool is_kprobe_##__name##_slot(unsigned long addr) \ #define KPROBE_OPTINSN_PAGE_SYM "kprobe_optinsn_page" int kprobe_cache_get_kallsym(struct kprobe_insn_cache *c, unsigned int *symnum, unsigned long *value, char *type, char *sym); -#else /* __ARCH_WANT_KPROBES_INSN_SLOT */ +#else /* !__ARCH_WANT_KPROBES_INSN_SLOT */ #define DEFINE_INSN_CACHE_OPS(__name) \ static inline bool is_kprobe_##__name##_slot(unsigned long addr) \ { \ @@ -334,7 +340,7 @@ extern void arch_unoptimize_kprobes(struct list_head *oplist, struct list_head *done_list); extern void arch_unoptimize_kprobe(struct optimized_kprobe *op); extern int arch_within_optimized_kprobe(struct optimized_kprobe *op, - unsigned long addr); + kprobe_opcode_t *addr); extern void opt_pre_handler(struct kprobe *p, struct pt_regs *regs); @@ -345,18 +351,22 @@ extern int sysctl_kprobes_optimization; extern int proc_kprobes_optimization_handler(struct ctl_table *table, int write, void *buffer, size_t *length, loff_t *ppos); -#endif +#endif /* CONFIG_SYSCTL */ extern void wait_for_kprobe_optimizer(void); -#else +#else /* !CONFIG_OPTPROBES */ static inline void wait_for_kprobe_optimizer(void) { } #endif /* CONFIG_OPTPROBES */ + #ifdef CONFIG_KPROBES_ON_FTRACE extern void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip, struct ftrace_ops *ops, struct ftrace_regs *fregs); extern int arch_prepare_kprobe_ftrace(struct kprobe *p); -#endif - -int arch_check_ftrace_location(struct kprobe *p); +#else +static inline int arch_prepare_kprobe_ftrace(struct kprobe *p) +{ + return -EINVAL; +} +#endif /* CONFIG_KPROBES_ON_FTRACE */ /* Get the kprobe at this addr (if any) - called with preemption disabled */ struct kprobe *get_kprobe(void *addr); @@ -364,7 +374,7 @@ struct kprobe *get_kprobe(void *addr); /* kprobe_running() will just return the current_kprobe on this CPU */ static inline struct kprobe *kprobe_running(void) { - return (__this_cpu_read(current_kprobe)); + return __this_cpu_read(current_kprobe); } static inline void reset_current_kprobe(void) @@ -382,7 +392,6 @@ int register_kprobe(struct kprobe *p); void unregister_kprobe(struct kprobe *p); int register_kprobes(struct kprobe **kps, int num); void unregister_kprobes(struct kprobe **kps, int num); -unsigned long arch_deref_entry_point(void *); int register_kretprobe(struct kretprobe *rp); void unregister_kretprobe(struct kretprobe *rp); @@ -410,10 +419,6 @@ int arch_kprobe_get_kallsym(unsigned int *symnum, unsigned long *value, char *type, char *sym); #else /* !CONFIG_KPROBES: */ -static inline int kprobes_built_in(void) -{ - return 0; -} static inline int kprobe_fault_handler(struct pt_regs *regs, int trapnr) { return 0; @@ -428,11 +433,11 @@ static inline struct kprobe *kprobe_running(void) } static inline int register_kprobe(struct kprobe *p) { - return -ENOSYS; + return -EOPNOTSUPP; } static inline int register_kprobes(struct kprobe **kps, int num) { - return -ENOSYS; + return -EOPNOTSUPP; } static inline void unregister_kprobe(struct kprobe *p) { @@ -442,11 +447,11 @@ static inline void unregister_kprobes(struct kprobe **kps, int num) } static inline int register_kretprobe(struct kretprobe *rp) { - return -ENOSYS; + return -EOPNOTSUPP; } static inline int register_kretprobes(struct kretprobe **rps, int num) { - return -ENOSYS; + return -EOPNOTSUPP; } static inline void unregister_kretprobe(struct kretprobe *rp) { @@ -462,11 +467,11 @@ static inline void kprobe_free_init_mem(void) } static inline int disable_kprobe(struct kprobe *kp) { - return -ENOSYS; + return -EOPNOTSUPP; } static inline int enable_kprobe(struct kprobe *kp) { - return -ENOSYS; + return -EOPNOTSUPP; } static inline bool within_kprobe_blacklist(unsigned long addr) @@ -479,6 +484,7 @@ static inline int kprobe_get_kallsym(unsigned int symnum, unsigned long *value, return -ERANGE; } #endif /* CONFIG_KPROBES */ + static inline int disable_kretprobe(struct kretprobe *rp) { return disable_kprobe(&rp->kp); @@ -493,19 +499,42 @@ static inline bool is_kprobe_insn_slot(unsigned long addr) { return false; } -#endif +#endif /* !CONFIG_KPROBES */ + #ifndef CONFIG_OPTPROBES static inline bool is_kprobe_optinsn_slot(unsigned long addr) { return false; } +#endif /* !CONFIG_OPTPROBES */ + +#ifdef CONFIG_KRETPROBES +static nokprobe_inline bool is_kretprobe_trampoline(unsigned long addr) +{ + return (void *)addr == kretprobe_trampoline_addr(); +} + +unsigned long kretprobe_find_ret_addr(struct task_struct *tsk, void *fp, + struct llist_node **cur); +#else +static nokprobe_inline bool is_kretprobe_trampoline(unsigned long addr) +{ + return false; +} + +static nokprobe_inline +unsigned long kretprobe_find_ret_addr(struct task_struct *tsk, void *fp, + struct llist_node **cur) +{ + return 0; +} #endif /* Returns true if kprobes handled the fault */ static nokprobe_inline bool kprobe_page_fault(struct pt_regs *regs, unsigned int trap) { - if (!kprobes_built_in()) + if (!IS_ENABLED(CONFIG_KPROBES)) return false; if (user_mode(regs)) return false; diff --git a/include/linux/objtool.h b/include/linux/objtool.h index 7e72d975cb76..aca52db2f3f3 100644 --- a/include/linux/objtool.h +++ b/include/linux/objtool.h @@ -66,6 +66,17 @@ struct unwind_hint { static void __used __section(".discard.func_stack_frame_non_standard") \ *__func_stack_frame_non_standard_##func = func +/* + * STACK_FRAME_NON_STANDARD_FP() is a frame-pointer-specific function ignore + * for the case where a function is intentionally missing frame pointer setup, + * but otherwise needs objtool/ORC coverage when frame pointers are disabled. + */ +#ifdef CONFIG_FRAME_POINTER +#define STACK_FRAME_NON_STANDARD_FP(func) STACK_FRAME_NON_STANDARD(func) +#else +#define STACK_FRAME_NON_STANDARD_FP(func) +#endif + #else /* __ASSEMBLY__ */ /* @@ -127,6 +138,7 @@ struct unwind_hint { #define UNWIND_HINT(sp_reg, sp_offset, type, end) \ "\n\t" #define STACK_FRAME_NON_STANDARD(func) +#define STACK_FRAME_NON_STANDARD_FP(func) #else #define ANNOTATE_INTRA_FUNCTION_CALL .macro UNWIND_HINT sp_reg:req sp_offset=0 type:req end=0 diff --git a/include/linux/preempt.h b/include/linux/preempt.h index 031898b38d06..b4381f255a5c 100644 --- a/include/linux/preempt.h +++ b/include/linux/preempt.h @@ -77,6 +77,27 @@ /* preempt_count() and related functions, depends on PREEMPT_NEED_RESCHED */ #include <asm/preempt.h> +/** + * interrupt_context_level - return interrupt context level + * + * Returns the current interrupt context level. + * 0 - normal context + * 1 - softirq context + * 2 - hardirq context + * 3 - NMI context + */ +static __always_inline unsigned char interrupt_context_level(void) +{ + unsigned long pc = preempt_count(); + unsigned char level = 0; + + level += !!(pc & (NMI_MASK)); + level += !!(pc & (NMI_MASK | HARDIRQ_MASK)); + level += !!(pc & (NMI_MASK | HARDIRQ_MASK | SOFTIRQ_OFFSET)); + + return level; +} + #define nmi_count() (preempt_count() & NMI_MASK) #define hardirq_count() (preempt_count() & HARDIRQ_MASK) #ifdef CONFIG_PREEMPT_RT diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h index 3e475eeb5a99..50453b287615 100644 --- a/include/linux/trace_events.h +++ b/include/linux/trace_events.h @@ -671,7 +671,7 @@ struct trace_event_file { } \ early_initcall(trace_init_perf_perm_##name); -#define PERF_MAX_TRACE_SIZE 2048 +#define PERF_MAX_TRACE_SIZE 8192 #define MAX_FILTER_STR_VAL 256 /* Should handle KSYM_SYMBOL_LEN */ diff --git a/include/linux/trace_recursion.h b/include/linux/trace_recursion.h index fe95f0922526..c303f7a114e9 100644 --- a/include/linux/trace_recursion.h +++ b/include/linux/trace_recursion.h @@ -116,13 +116,9 @@ enum { static __always_inline int trace_get_context_bit(void) { - unsigned long pc = preempt_count(); + unsigned char bit = interrupt_context_level(); - if (!(pc & (NMI_MASK | HARDIRQ_MASK | SOFTIRQ_OFFSET))) - return TRACE_CTX_NORMAL; - else - return pc & NMI_MASK ? TRACE_CTX_NMI : - pc & HARDIRQ_MASK ? TRACE_CTX_IRQ : TRACE_CTX_SOFTIRQ; + return TRACE_CTX_NORMAL - bit; } #ifdef CONFIG_FTRACE_RECORD_RECURSION @@ -139,6 +135,9 @@ extern void ftrace_record_recursion(unsigned long ip, unsigned long parent_ip); # define do_ftrace_record_recursion(ip, pip) do { } while (0) #endif +/* + * Preemption is promised to be disabled when return bit >= 0. + */ static __always_inline int trace_test_and_set_recursion(unsigned long ip, unsigned long pip, int start) { @@ -148,8 +147,12 @@ static __always_inline int trace_test_and_set_recursion(unsigned long ip, unsign bit = trace_get_context_bit() + start; if (unlikely(val & (1 << bit))) { /* - * It could be that preempt_count has not been updated during - * a switch between contexts. Allow for a single recursion. + * If an interrupt occurs during a trace, and another trace + * happens in that interrupt but before the preempt_count is + * updated to reflect the new interrupt context, then this + * will think a recursion occurred, and the event will be dropped. + * Let a single instance happen via the TRANSITION_BIT to + * not drop those events. */ bit = TRACE_CTX_TRANSITION + start; if (val & (1 << bit)) { @@ -162,11 +165,17 @@ static __always_inline int trace_test_and_set_recursion(unsigned long ip, unsign current->trace_recursion = val; barrier(); + preempt_disable_notrace(); + return bit; } +/* + * Preemption will be enabled (if it was previously enabled). + */ static __always_inline void trace_clear_recursion(int bit) { + preempt_enable_notrace(); barrier(); trace_recursion_clear(bit); } @@ -178,7 +187,7 @@ static __always_inline void trace_clear_recursion(int bit) * tracing recursed in the same context (normal vs interrupt), * * Returns: -1 if a recursion happened. - * >= 0 if no recursion + * >= 0 if no recursion. */ static __always_inline int ftrace_test_recursion_trylock(unsigned long ip, unsigned long parent_ip) diff --git a/init/main.c b/init/main.c index 4162d7f3179f..183f861707e5 100644 --- a/init/main.c +++ b/init/main.c @@ -409,7 +409,7 @@ static void __init setup_boot_config(void) const char *msg; int pos; u32 size, csum; - char *data, *copy, *err; + char *data, *err; int ret; /* Cut out the bootconfig data even if we have no bootconfig option */ @@ -442,16 +442,7 @@ static void __init setup_boot_config(void) return; } - copy = memblock_alloc(size + 1, SMP_CACHE_BYTES); - if (!copy) { - pr_err("Failed to allocate memory for bootconfig\n"); - return; - } - - memcpy(copy, data, size); - copy[size] = '\0'; - - ret = xbc_init(copy, &msg, &pos); + ret = xbc_init(data, size, &msg, &pos); if (ret < 0) { if (pos < 0) pr_err("Failed to init bootconfig: %s.\n", msg); @@ -459,6 +450,7 @@ static void __init setup_boot_config(void) pr_err("Failed to parse bootconfig: %s at %d.\n", msg, pos); } else { + xbc_get_info(&ret, NULL); pr_info("Load bootconfig: %d bytes %d nodes\n", size, ret); /* keys starting with "kernel." are passed via cmdline */ extra_command_line = xbc_make_cmdline("kernel"); @@ -470,7 +462,7 @@ static void __init setup_boot_config(void) static void __init exit_boot_config(void) { - xbc_destroy_all(); + xbc_exit(); } #else /* !CONFIG_BOOT_CONFIG */ diff --git a/kernel/Makefile b/kernel/Makefile index 3f6ab5d5041b..186c49582f45 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -85,7 +85,6 @@ obj-$(CONFIG_PID_NS) += pid_namespace.o obj-$(CONFIG_IKCONFIG) += configs.o obj-$(CONFIG_IKHEADERS) += kheaders.o obj-$(CONFIG_SMP) += stop_machine.o -obj-$(CONFIG_KPROBES_SANITY_TEST) += test_kprobes.o obj-$(CONFIG_AUDIT) += audit.o auditfilter.o obj-$(CONFIG_AUDITSYSCALL) += auditsc.o audit_watch.o audit_fsnotify.o audit_tree.o obj-$(CONFIG_GCOV_KERNEL) += gcov/ diff --git a/kernel/events/internal.h b/kernel/events/internal.h index 228801e20788..082832738c8f 100644 --- a/kernel/events/internal.h +++ b/kernel/events/internal.h @@ -205,12 +205,7 @@ DEFINE_OUTPUT_COPY(__output_copy_user, arch_perf_out_copy_user) static inline int get_recursion_context(int *recursion) { - unsigned int pc = preempt_count(); - unsigned char rctx = 0; - - rctx += !!(pc & (NMI_MASK)); - rctx += !!(pc & (NMI_MASK | HARDIRQ_MASK)); - rctx += !!(pc & (NMI_MASK | HARDIRQ_MASK | SOFTIRQ_OFFSET)); + unsigned char rctx = interrupt_context_level(); if (recursion[rctx]) return -1; diff --git a/kernel/kprobes.c b/kernel/kprobes.c index 9a38e7581a5c..e9db0c810554 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c @@ -1,7 +1,6 @@ // SPDX-License-Identifier: GPL-2.0-or-later /* * Kernel Probes (KProbes) - * kernel/kprobes.c * * Copyright (C) IBM Corporation, 2002, 2004 * @@ -18,6 +17,9 @@ * <jkenisto@us.ibm.com> and Prasanna S Panchamukhi * <prasanna@in.ibm.com> added function-return probes. */ + +#define pr_fmt(fmt) "kprobes: " fmt + #include <linux/kprobes.h> #include <linux/hash.h> #include <linux/init.h> @@ -49,18 +51,18 @@ static int kprobes_initialized; /* kprobe_table can be accessed by - * - Normal hlist traversal and RCU add/del under kprobe_mutex is held. + * - Normal hlist traversal and RCU add/del under 'kprobe_mutex' is held. * Or * - RCU hlist traversal under disabling preempt (breakpoint handlers) */ static struct hlist_head kprobe_table[KPROBE_TABLE_SIZE]; -/* NOTE: change this value only with kprobe_mutex held */ +/* NOTE: change this value only with 'kprobe_mutex' held */ static bool kprobes_all_disarmed; -/* This protects kprobe_table and optimizing_list */ +/* This protects 'kprobe_table' and 'optimizing_list' */ static DEFINE_MUTEX(kprobe_mutex); -static DEFINE_PER_CPU(struct kprobe *, kprobe_instance) = NULL; +static DEFINE_PER_CPU(struct kprobe *, kprobe_instance); kprobe_opcode_t * __weak kprobe_lookup_name(const char *name, unsigned int __unused) @@ -68,12 +70,15 @@ kprobe_opcode_t * __weak kprobe_lookup_name(const char *name, return ((kprobe_opcode_t *)(kallsyms_lookup_name(name))); } -/* Blacklist -- list of struct kprobe_blacklist_entry */ +/* + * Blacklist -- list of 'struct kprobe_blacklist_entry' to store info where + * kprobes can not probe. + */ static LIST_HEAD(kprobe_blacklist); #ifdef __ARCH_WANT_KPROBES_INSN_SLOT /* - * kprobe->ainsn.insn points to the copy of the instruction to be + * 'kprobe::ainsn.insn' points to the copy of the instruction to be * single-stepped. x86_64, POWER4 and above have no-exec support and * stepping on the instruction on a vmalloced/kmalloced/data page * is a recipe for disaster @@ -104,6 +109,12 @@ enum kprobe_slot_state { void __weak *alloc_insn_page(void) { + /* + * Use module_alloc() so this page is within +/- 2GB of where the + * kernel image and loaded module images reside. This is required + * for most of the architectures. + * (e.g. x86-64 needs this to handle the %rip-relative fixups.) + */ return module_alloc(PAGE_SIZE); } @@ -139,6 +150,7 @@ kprobe_opcode_t *__get_insn_slot(struct kprobe_insn_cache *c) list_for_each_entry_rcu(kip, &c->pages, list) { if (kip->nused < slots_per_page(c)) { int i; + for (i = 0; i < slots_per_page(c); i++) { if (kip->slot_used[i] == SLOT_CLEAN) { kip->slot_used[i] = SLOT_USED; @@ -164,11 +176,6 @@ kprobe_opcode_t *__get_insn_slot(struct kprobe_insn_cache *c) if (!kip) goto out; - /* - * Use module_alloc so this page is within +/- 2GB of where the - * kernel image and loaded module images reside. This is required - * so x86_64 can correctly handle the %rip-relative fixups. - */ kip->insns = c->alloc(); if (!kip->insns) { kfree(kip); @@ -191,8 +198,8 @@ out: return slot; } -/* Return 1 if all garbages are collected, otherwise 0. */ -static int collect_one_slot(struct kprobe_insn_page *kip, int idx) +/* Return true if all garbages are collected, otherwise false. */ +static bool collect_one_slot(struct kprobe_insn_page *kip, int idx) { kip->slot_used[idx] = SLOT_CLEAN; kip->nused--; @@ -216,9 +223,9 @@ static int collect_one_slot(struct kprobe_insn_page *kip, int idx) kip->cache->free(kip->insns); kfree(kip); } - return 1; + return true; } - return 0; + return false; } static int collect_garbage_slots(struct kprobe_insn_cache *c) @@ -230,6 +237,7 @@ static int collect_garbage_slots(struct kprobe_insn_cache *c) list_for_each_entry_safe(kip, next, &c->pages, list) { int i; + if (kip->ngarbage == 0) continue; kip->ngarbage = 0; /* we will collect all garbages */ @@ -310,7 +318,7 @@ int kprobe_cache_get_kallsym(struct kprobe_insn_cache *c, unsigned int *symnum, list_for_each_entry_rcu(kip, &c->pages, list) { if ((*symnum)--) continue; - strlcpy(sym, c->sym, KSYM_NAME_LEN); + strscpy(sym, c->sym, KSYM_NAME_LEN); *type = 't'; *value = (unsigned long)kip->insns; ret = 0; @@ -358,9 +366,9 @@ static inline void reset_kprobe_instance(void) /* * This routine is called either: - * - under the kprobe_mutex - during kprobe_[un]register() - * OR - * - with preemption disabled - from arch/xxx/kernel/kprobes.c + * - under the 'kprobe_mutex' - during kprobe_[un]register(). + * OR + * - with preemption disabled - from architecture specific code. */ struct kprobe *get_kprobe(void *addr) { @@ -380,22 +388,20 @@ NOKPROBE_SYMBOL(get_kprobe); static int aggr_pre_handler(struct kprobe *p, struct pt_regs *regs); -/* Return true if the kprobe is an aggregator */ -static inline int kprobe_aggrprobe(struct kprobe *p) +/* Return true if 'p' is an aggregator */ +static inline bool kprobe_aggrprobe(struct kprobe *p) { return p->pre_handler == aggr_pre_handler; } -/* Return true(!0) if the kprobe is unused */ -static inline int kprobe_unused(struct kprobe *p) +/* Return true if 'p' is unused */ +static inline bool kprobe_unused(struct kprobe *p) { return kprobe_aggrprobe(p) && kprobe_disabled(p) && list_empty(&p->list); } -/* - * Keep all fields in the kprobe consistent - */ +/* Keep all fields in the kprobe consistent. */ static inline void copy_kprobe(struct kprobe *ap, struct kprobe *p) { memcpy(&p->opcode, &ap->opcode, sizeof(kprobe_opcode_t)); @@ -403,11 +409,11 @@ static inline void copy_kprobe(struct kprobe *ap, struct kprobe *p) } #ifdef CONFIG_OPTPROBES -/* NOTE: change this value only with kprobe_mutex held */ +/* NOTE: This is protected by 'kprobe_mutex'. */ static bool kprobes_allow_optimization; /* - * Call all pre_handler on the list, but ignores its return value. + * Call all 'kprobe::pre_handler' on the list, but ignores its return value. * This must be called from arch-dep optimized caller. */ void opt_pre_handler(struct kprobe *p, struct pt_regs *regs) @@ -435,7 +441,7 @@ static void free_aggr_kprobe(struct kprobe *p) kfree(op); } -/* Return true(!0) if the kprobe is ready for optimization. */ +/* Return true if the kprobe is ready for optimization. */ static inline int kprobe_optready(struct kprobe *p) { struct optimized_kprobe *op; @@ -448,8 +454,8 @@ static inline int kprobe_optready(struct kprobe *p) return 0; } -/* Return true(!0) if the kprobe is disarmed. Note: p must be on hash list */ -static inline int kprobe_disarmed(struct kprobe *p) +/* Return true if the kprobe is disarmed. Note: p must be on hash list */ +static inline bool kprobe_disarmed(struct kprobe *p) { struct optimized_kprobe *op; @@ -462,32 +468,32 @@ static inline int kprobe_disarmed(struct kprobe *p) return kprobe_disabled(p) && list_empty(&op->list); } -/* Return true(!0) if the probe is queued on (un)optimizing lists */ -static int kprobe_queued(struct kprobe *p) +/* Return true if the probe is queued on (un)optimizing lists */ +static bool kprobe_queued(struct kprobe *p) { struct optimized_kprobe *op; if (kprobe_aggrprobe(p)) { op = container_of(p, struct optimized_kprobe, kp); if (!list_empty(&op->list)) - return 1; + return true; } - return 0; + return false; } /* * Return an optimized kprobe whose optimizing code replaces - * instructions including addr (exclude breakpoint). + * instructions including 'addr' (exclude breakpoint). */ -static struct kprobe *get_optimized_kprobe(unsigned long addr) +static struct kprobe *get_optimized_kprobe(kprobe_opcode_t *addr) { int i; struct kprobe *p = NULL; struct optimized_kprobe *op; /* Don't check i == 0, since that is a breakpoint case. */ - for (i = 1; !p && i < MAX_OPTIMIZED_LENGTH; i++) - p = get_kprobe((void *)(addr - i)); + for (i = 1; !p && i < MAX_OPTIMIZED_LENGTH / sizeof(kprobe_opcode_t); i++) + p = get_kprobe(addr - i); if (p && kprobe_optready(p)) { op = container_of(p, struct optimized_kprobe, kp); @@ -498,7 +504,7 @@ static struct kprobe *get_optimized_kprobe(unsigned long addr) return NULL; } -/* Optimization staging list, protected by kprobe_mutex */ +/* Optimization staging list, protected by 'kprobe_mutex' */ static LIST_HEAD(optimizing_list); static LIST_HEAD(unoptimizing_list); static LIST_HEAD(freeing_list); @@ -509,20 +515,20 @@ static DECLARE_DELAYED_WORK(optimizing_work, kprobe_optimizer); /* * Optimize (replace a breakpoint with a jump) kprobes listed on - * optimizing_list. + * 'optimizing_list'. */ static void do_optimize_kprobes(void) { lockdep_assert_held(&text_mutex); /* - * The optimization/unoptimization refers online_cpus via - * stop_machine() and cpu-hotplug modifies online_cpus. - * And same time, text_mutex will be held in cpu-hotplug and here. - * This combination can cause a deadlock (cpu-hotplug try to lock - * text_mutex but stop_machine can not be done because online_cpus - * has been changed) - * To avoid this deadlock, caller must have locked cpu hotplug - * for preventing cpu-hotplug outside of text_mutex locking. + * The optimization/unoptimization refers 'online_cpus' via + * stop_machine() and cpu-hotplug modifies the 'online_cpus'. + * And same time, 'text_mutex' will be held in cpu-hotplug and here. + * This combination can cause a deadlock (cpu-hotplug tries to lock + * 'text_mutex' but stop_machine() can not be done because + * the 'online_cpus' has been changed) + * To avoid this deadlock, caller must have locked cpu-hotplug + * for preventing cpu-hotplug outside of 'text_mutex' locking. */ lockdep_assert_cpus_held(); @@ -536,7 +542,7 @@ static void do_optimize_kprobes(void) /* * Unoptimize (replace a jump with a breakpoint and remove the breakpoint - * if need) kprobes listed on unoptimizing_list. + * if need) kprobes listed on 'unoptimizing_list'. */ static void do_unoptimize_kprobes(void) { @@ -551,7 +557,7 @@ static void do_unoptimize_kprobes(void) return; arch_unoptimize_kprobes(&unoptimizing_list, &freeing_list); - /* Loop free_list for disarming */ + /* Loop on 'freeing_list' for disarming */ list_for_each_entry_safe(op, tmp, &freeing_list, list) { /* Switching from detour code to origin */ op->kp.flags &= ~KPROBE_FLAG_OPTIMIZED; @@ -562,7 +568,7 @@ static void do_unoptimize_kprobes(void) /* * Remove unused probes from hash list. After waiting * for synchronization, these probes are reclaimed. - * (reclaiming is done by do_free_cleaned_kprobes.) + * (reclaiming is done by do_free_cleaned_kprobes().) */ hlist_del_rcu(&op->kp.hlist); } else @@ -570,7 +576,7 @@ static void do_unoptimize_kprobes(void) } } -/* Reclaim all kprobes on the free_list */ +/* Reclaim all kprobes on the 'freeing_list' */ static void do_free_cleaned_kprobes(void) { struct optimized_kprobe *op, *tmp; @@ -642,9 +648,9 @@ void wait_for_kprobe_optimizer(void) while (!list_empty(&optimizing_list) || !list_empty(&unoptimizing_list)) { mutex_unlock(&kprobe_mutex); - /* this will also make optimizing_work execute immmediately */ + /* This will also make 'optimizing_work' execute immmediately */ flush_delayed_work(&optimizing_work); - /* @optimizing_work might not have been queued yet, relax */ + /* 'optimizing_work' might not have been queued yet, relax */ cpu_relax(); mutex_lock(&kprobe_mutex); @@ -675,7 +681,7 @@ static void optimize_kprobe(struct kprobe *p) (kprobe_disabled(p) || kprobes_all_disarmed)) return; - /* kprobes with post_handler can not be optimized */ + /* kprobes with 'post_handler' can not be optimized */ if (p->post_handler) return; @@ -695,7 +701,10 @@ static void optimize_kprobe(struct kprobe *p) } op->kp.flags |= KPROBE_FLAG_OPTIMIZED; - /* On unoptimizing/optimizing_list, op must have OPTIMIZED flag */ + /* + * On the 'unoptimizing_list' and 'optimizing_list', + * 'op' must have OPTIMIZED flag + */ if (WARN_ON_ONCE(!list_empty(&op->list))) return; @@ -765,7 +774,7 @@ static int reuse_unused_kprobe(struct kprobe *ap) WARN_ON_ONCE(list_empty(&op->list)); /* Enable the probe again */ ap->flags &= ~KPROBE_FLAG_DISABLED; - /* Optimize it again (remove from op->list) */ + /* Optimize it again. (remove from 'op->list') */ if (!kprobe_optready(ap)) return -EINVAL; @@ -815,7 +824,7 @@ static void prepare_optimized_kprobe(struct kprobe *p) __prepare_optimized_kprobe(op, p); } -/* Allocate new optimized_kprobe and try to prepare optimized instructions */ +/* Allocate new optimized_kprobe and try to prepare optimized instructions. */ static struct kprobe *alloc_aggr_kprobe(struct kprobe *p) { struct optimized_kprobe *op; @@ -834,19 +843,19 @@ static struct kprobe *alloc_aggr_kprobe(struct kprobe *p) static void init_aggr_kprobe(struct kprobe *ap, struct kprobe *p); /* - * Prepare an optimized_kprobe and optimize it - * NOTE: p must be a normal registered kprobe + * Prepare an optimized_kprobe and optimize it. + * NOTE: 'p' must be a normal registered kprobe. */ static void try_to_optimize_kprobe(struct kprobe *p) { struct kprobe *ap; struct optimized_kprobe *op; - /* Impossible to optimize ftrace-based kprobe */ + /* Impossible to optimize ftrace-based kprobe. */ if (kprobe_ftrace(p)) return; - /* For preparing optimization, jump_label_text_reserved() is called */ + /* For preparing optimization, jump_label_text_reserved() is called. */ cpus_read_lock(); jump_label_lock(); mutex_lock(&text_mutex); @@ -857,14 +866,14 @@ static void try_to_optimize_kprobe(struct kprobe *p) op = container_of(ap, struct optimized_kprobe, kp); if (!arch_prepared_optinsn(&op->optinsn)) { - /* If failed to setup optimizing, fallback to kprobe */ + /* If failed to setup optimizing, fallback to kprobe. */ arch_remove_optimized_kprobe(op); kfree(op); goto out; } init_aggr_kprobe(ap, p); - optimize_kprobe(ap); /* This just kicks optimizer thread */ + optimize_kprobe(ap); /* This just kicks optimizer thread. */ out: mutex_unlock(&text_mutex); @@ -879,7 +888,7 @@ static void optimize_all_kprobes(void) unsigned int i; mutex_lock(&kprobe_mutex); - /* If optimization is already allowed, just return */ + /* If optimization is already allowed, just return. */ if (kprobes_allow_optimization) goto out; @@ -892,7 +901,7 @@ static void optimize_all_kprobes(void) optimize_kprobe(p); } cpus_read_unlock(); - printk(KERN_INFO "Kprobes globally optimized\n"); + pr_info("kprobe jump-optimization is enabled. All kprobes are optimized if possible.\n"); out: mutex_unlock(&kprobe_mutex); } @@ -905,7 +914,7 @@ static void unoptimize_all_kprobes(void) unsigned int i; mutex_lock(&kprobe_mutex); - /* If optimization is already prohibited, just return */ + /* If optimization is already prohibited, just return. */ if (!kprobes_allow_optimization) { mutex_unlock(&kprobe_mutex); return; @@ -923,9 +932,9 @@ static void unoptimize_all_kprobes(void) cpus_read_unlock(); mutex_unlock(&kprobe_mutex); - /* Wait for unoptimizing completion */ + /* Wait for unoptimizing completion. */ wait_for_kprobe_optimizer(); - printk(KERN_INFO "Kprobes globally unoptimized\n"); + pr_info("kprobe jump-optimization is disabled. All kprobes are based on software breakpoint.\n"); } static DEFINE_MUTEX(kprobe_sysctl_mutex); @@ -950,13 +959,15 @@ int proc_kprobes_optimization_handler(struct ctl_table *table, int write, } #endif /* CONFIG_SYSCTL */ -/* Put a breakpoint for a probe. Must be called with text_mutex locked */ +/* Put a breakpoint for a probe. */ static void __arm_kprobe(struct kprobe *p) { struct kprobe *_p; - /* Check collision with other optimized kprobes */ - _p = get_optimized_kprobe((unsigned long)p->addr); + lockdep_assert_held(&text_mutex); + + /* Find the overlapping optimized kprobes. */ + _p = get_optimized_kprobe(p->addr); if (unlikely(_p)) /* Fallback to unoptimized kprobe */ unoptimize_kprobe(_p, true); @@ -965,22 +976,29 @@ static void __arm_kprobe(struct kprobe *p) optimize_kprobe(p); /* Try to optimize (add kprobe to a list) */ } -/* Remove the breakpoint of a probe. Must be called with text_mutex locked */ +/* Remove the breakpoint of a probe. */ static void __disarm_kprobe(struct kprobe *p, bool reopt) { struct kprobe *_p; + lockdep_assert_held(&text_mutex); + /* Try to unoptimize */ unoptimize_kprobe(p, kprobes_all_disarmed); if (!kprobe_queued(p)) { arch_disarm_kprobe(p); - /* If another kprobe was blocked, optimize it. */ - _p = get_optimized_kprobe((unsigned long)p->addr); + /* If another kprobe was blocked, re-optimize it. */ + _p = get_optimized_kprobe(p->addr); if (unlikely(_p) && reopt) optimize_kprobe(_p); } - /* TODO: reoptimize others after unoptimized this probe */ + /* + * TODO: Since unoptimization and real disarming will be done by + * the worker thread, we can not check whether another probe are + * unoptimized because of this probe here. It should be re-optimized + * by the worker thread. + */ } #else /* !CONFIG_OPTPROBES */ @@ -1003,7 +1021,7 @@ static int reuse_unused_kprobe(struct kprobe *ap) * unregistered. * Thus there should be no chance to reuse unused kprobe. */ - printk(KERN_ERR "Error: There should be no unused kprobe here.\n"); + WARN_ON_ONCE(1); return -EINVAL; } @@ -1033,34 +1051,21 @@ static struct ftrace_ops kprobe_ipmodify_ops __read_mostly = { static int kprobe_ipmodify_enabled; static int kprobe_ftrace_enabled; -/* Must ensure p->addr is really on ftrace */ -static int prepare_kprobe(struct kprobe *p) -{ - if (!kprobe_ftrace(p)) - return arch_prepare_kprobe(p); - - return arch_prepare_kprobe_ftrace(p); -} - -/* Caller must lock kprobe_mutex */ static int __arm_kprobe_ftrace(struct kprobe *p, struct ftrace_ops *ops, int *cnt) { int ret = 0; + lockdep_assert_held(&kprobe_mutex); + ret = ftrace_set_filter_ip(ops, (unsigned long)p->addr, 0, 0); - if (ret) { - pr_debug("Failed to arm kprobe-ftrace at %pS (%d)\n", - p->addr, ret); + if (WARN_ONCE(ret < 0, "Failed to arm kprobe-ftrace at %pS (error %d)\n", p->addr, ret)) return ret; - } if (*cnt == 0) { ret = register_ftrace_function(ops); - if (ret) { - pr_debug("Failed to init kprobe-ftrace (%d)\n", ret); + if (WARN(ret < 0, "Failed to register kprobe-ftrace (error %d)\n", ret)) goto err_ftrace; - } } (*cnt)++; @@ -1084,22 +1089,23 @@ static int arm_kprobe_ftrace(struct kprobe *p) ipmodify ? &kprobe_ipmodify_enabled : &kprobe_ftrace_enabled); } -/* Caller must lock kprobe_mutex */ static int __disarm_kprobe_ftrace(struct kprobe *p, struct ftrace_ops *ops, int *cnt) { int ret = 0; + lockdep_assert_held(&kprobe_mutex); + if (*cnt == 1) { ret = unregister_ftrace_function(ops); - if (WARN(ret < 0, "Failed to unregister kprobe-ftrace (%d)\n", ret)) + if (WARN(ret < 0, "Failed to unregister kprobe-ftrace (error %d)\n", ret)) return ret; } (*cnt)--; ret = ftrace_set_filter_ip(ops, (unsigned long)p->addr, 1, 0); - WARN_ONCE(ret < 0, "Failed to disarm kprobe-ftrace at %pS (%d)\n", + WARN_ONCE(ret < 0, "Failed to disarm kprobe-ftrace at %pS (error %d)\n", p->addr, ret); return ret; } @@ -1113,11 +1119,6 @@ static int disarm_kprobe_ftrace(struct kprobe *p) ipmodify ? &kprobe_ipmodify_enabled : &kprobe_ftrace_enabled); } #else /* !CONFIG_KPROBES_ON_FTRACE */ -static inline int prepare_kprobe(struct kprobe *p) -{ - return arch_prepare_kprobe(p); -} - static inline int arm_kprobe_ftrace(struct kprobe *p) { return -ENODEV; @@ -1129,7 +1130,15 @@ static inline int disarm_kprobe_ftrace(struct kprobe *p) } #endif -/* Arm a kprobe with text_mutex */ +static int prepare_kprobe(struct kprobe *p) +{ + /* Must ensure p->addr is really on ftrace */ + if (kprobe_ftrace(p)) + return arch_prepare_kprobe_ftrace(p); + + return arch_prepare_kprobe(p); +} + static int arm_kprobe(struct kprobe *kp) { if (unlikely(kprobe_ftrace(kp))) @@ -1144,7 +1153,6 @@ static int arm_kprobe(struct kprobe *kp) return 0; } -/* Disarm a kprobe with text_mutex */ static int disarm_kprobe(struct kprobe *kp, bool reopt) { if (unlikely(kprobe_ftrace(kp))) @@ -1194,17 +1202,17 @@ static void aggr_post_handler(struct kprobe *p, struct pt_regs *regs, } NOKPROBE_SYMBOL(aggr_post_handler); -/* Walks the list and increments nmissed count for multiprobe case */ +/* Walks the list and increments 'nmissed' if 'p' has child probes. */ void kprobes_inc_nmissed_count(struct kprobe *p) { struct kprobe *kp; + if (!kprobe_aggrprobe(p)) { p->nmissed++; } else { list_for_each_entry_rcu(kp, &p->list, list) kp->nmissed++; } - return; } NOKPROBE_SYMBOL(kprobes_inc_nmissed_count); @@ -1222,9 +1230,9 @@ static void recycle_rp_inst(struct kretprobe_instance *ri) { struct kretprobe *rp = get_kretprobe(ri); - if (likely(rp)) { + if (likely(rp)) freelist_add(&ri->freelist, &rp->freelist); - } else + else call_rcu(&ri->rcu, free_rp_inst_rcu); } NOKPROBE_SYMBOL(recycle_rp_inst); @@ -1251,9 +1259,9 @@ void kprobe_busy_end(void) /* * This function is called from delayed_put_task_struct() when a task is - * dead and cleaned up to recycle any function-return probe instances - * associated with this task. These left over instances represent probed - * functions that have been called but will never return. + * dead and cleaned up to recycle any kretprobe instances associated with + * this task. These left over instances represent probed functions that + * have been called but will never return. */ void kprobe_flush_task(struct task_struct *tk) { @@ -1299,7 +1307,7 @@ static inline void free_rp_inst(struct kretprobe *rp) } } -/* Add the new probe to ap->list */ +/* Add the new probe to 'ap->list'. */ static int add_new_kprobe(struct kprobe *ap, struct kprobe *p) { if (p->post_handler) @@ -1313,12 +1321,12 @@ static int add_new_kprobe(struct kprobe *ap, struct kprobe *p) } /* - * Fill in the required fields of the "manager kprobe". Replace the - * earlier kprobe in the hlist with the manager kprobe + * Fill in the required fields of the aggregator kprobe. Replace the + * earlier kprobe in the hlist with the aggregator kprobe. */ static void init_aggr_kprobe(struct kprobe *ap, struct kprobe *p) { - /* Copy p's insn slot to ap */ + /* Copy the insn slot of 'p' to 'ap'. */ copy_kprobe(p, ap); flush_insn_slot(ap); ap->addr = p->addr; @@ -1336,8 +1344,7 @@ static void init_aggr_kprobe(struct kprobe *ap, struct kprobe *p) } /* - * This is the second or subsequent kprobe at the address - handle - * the intricacies + * This registers the second or subsequent kprobe at the same address. */ static int register_aggr_kprobe(struct kprobe *orig_p, struct kprobe *p) { @@ -1351,7 +1358,7 @@ static int register_aggr_kprobe(struct kprobe *orig_p, struct kprobe *p) mutex_lock(&text_mutex); if (!kprobe_aggrprobe(orig_p)) { - /* If orig_p is not an aggr_kprobe, create new aggr_kprobe. */ + /* If 'orig_p' is not an 'aggr_kprobe', create new one. */ ap = alloc_aggr_kprobe(orig_p); if (!ap) { ret = -ENOMEM; @@ -1376,8 +1383,8 @@ static int register_aggr_kprobe(struct kprobe *orig_p, struct kprobe *p) if (ret) /* * Even if fail to allocate new slot, don't need to - * free aggr_probe. It will be used next time, or - * freed by unregister_kprobe. + * free the 'ap'. It will be used next time, or + * freed by unregister_kprobe(). */ goto out; @@ -1392,7 +1399,7 @@ static int register_aggr_kprobe(struct kprobe *orig_p, struct kprobe *p) | KPROBE_FLAG_DISABLED; } - /* Copy ap's insn slot to p */ + /* Copy the insn slot of 'p' to 'ap'. */ copy_kprobe(ap, p); ret = add_new_kprobe(ap, p); @@ -1418,7 +1425,7 @@ out: bool __weak arch_within_kprobe_blacklist(unsigned long addr) { - /* The __kprobes marked functions and entry code must not be probed */ + /* The '__kprobes' functions and entry code must not be probed. */ return addr >= (unsigned long)__kprobes_text_start && addr < (unsigned long)__kprobes_text_end; } @@ -1430,8 +1437,8 @@ static bool __within_kprobe_blacklist(unsigned long addr) if (arch_within_kprobe_blacklist(addr)) return true; /* - * If there exists a kprobe_blacklist, verify and - * fail any probe registration in the prohibited area + * If 'kprobe_blacklist' is defined, check the address and + * reject any probe registration in the prohibited area. */ list_for_each_entry(ent, &kprobe_blacklist, list) { if (addr >= ent->start_addr && addr < ent->end_addr) @@ -1461,7 +1468,7 @@ bool within_kprobe_blacklist(unsigned long addr) } /* - * If we have a symbol_name argument, look it up and add the offset field + * If 'symbol_name' is specified, look it up and add the 'offset' * to it. This way, we can specify a relative address to a symbol. * This returns encoded errors if it fails to look up symbol or invalid * combination of parameters. @@ -1491,7 +1498,10 @@ static kprobe_opcode_t *kprobe_addr(struct kprobe *p) return _kprobe_addr(p->addr, p->symbol_name, p->offset); } -/* Check passed kprobe is valid and return kprobe in kprobe_table. */ +/* + * Check the 'p' is valid and return the aggregator kprobe + * at the same address. + */ static struct kprobe *__get_valid_kprobe(struct kprobe *p) { struct kprobe *ap, *list_p; @@ -1529,7 +1539,7 @@ static inline int warn_kprobe_rereg(struct kprobe *p) return ret; } -int __weak arch_check_ftrace_location(struct kprobe *p) +static int check_ftrace_location(struct kprobe *p) { unsigned long ftrace_addr; @@ -1552,7 +1562,7 @@ static int check_kprobe_address_safe(struct kprobe *p, { int ret; - ret = arch_check_ftrace_location(p); + ret = check_ftrace_location(p); if (ret) return ret; jump_label_lock(); @@ -1568,7 +1578,7 @@ static int check_kprobe_address_safe(struct kprobe *p, goto out; } - /* Check if are we probing a module */ + /* Check if 'p' is probing a module. */ *probed_mod = __module_text_address((unsigned long) p->addr); if (*probed_mod) { /* @@ -1581,7 +1591,7 @@ static int check_kprobe_address_safe(struct kprobe *p, } /* - * If the module freed .init.text, we couldn't insert + * If the module freed '.init.text', we couldn't insert * kprobes in there. */ if (within_module_init((unsigned long)p->addr, *probed_mod) && @@ -1628,7 +1638,7 @@ int register_kprobe(struct kprobe *p) old_p = get_kprobe(p->addr); if (old_p) { - /* Since this may unoptimize old_p, locking text_mutex. */ + /* Since this may unoptimize 'old_p', locking 'text_mutex'. */ ret = register_aggr_kprobe(old_p, p); goto out; } @@ -1667,8 +1677,8 @@ out: } EXPORT_SYMBOL_GPL(register_kprobe); -/* Check if all probes on the aggrprobe are disabled */ -static int aggr_kprobe_disabled(struct kprobe *ap) +/* Check if all probes on the 'ap' are disabled. */ +static bool aggr_kprobe_disabled(struct kprobe *ap) { struct kprobe *kp; @@ -1677,20 +1687,21 @@ static int aggr_kprobe_disabled(struct kprobe *ap) list_for_each_entry(kp, &ap->list, list) if (!kprobe_disabled(kp)) /* - * There is an active probe on the list. - * We can't disable this ap. + * Since there is an active probe on the list, + * we can't disable this 'ap'. */ - return 0; + return false; - return 1; + return true; } -/* Disable one kprobe: Make sure called under kprobe_mutex is locked */ static struct kprobe *__disable_kprobe(struct kprobe *p) { struct kprobe *orig_p; int ret; + lockdep_assert_held(&kprobe_mutex); + /* Get an original kprobe for return */ orig_p = __get_valid_kprobe(p); if (unlikely(orig_p == NULL)) @@ -1704,7 +1715,7 @@ static struct kprobe *__disable_kprobe(struct kprobe *p) /* Try to disarm and disable this/parent probe */ if (p == orig_p || aggr_kprobe_disabled(orig_p)) { /* - * If kprobes_all_disarmed is set, orig_p + * If 'kprobes_all_disarmed' is set, 'orig_p' * should have already been disarmed, so * skip unneed disarming process. */ @@ -1850,53 +1861,105 @@ static struct notifier_block kprobe_exceptions_nb = { .priority = 0x7fffffff /* we need to be notified first */ }; -unsigned long __weak arch_deref_entry_point(void *entry) +#ifdef CONFIG_KRETPROBES + +/* This assumes the 'tsk' is the current task or the is not running. */ +static kprobe_opcode_t *__kretprobe_find_ret_addr(struct task_struct *tsk, + struct llist_node **cur) { - return (unsigned long)entry; + struct kretprobe_instance *ri = NULL; + struct llist_node *node = *cur; + + if (!node) + node = tsk->kretprobe_instances.first; + else + node = node->next; + + while (node) { + ri = container_of(node, struct kretprobe_instance, llist); + if (ri->ret_addr != kretprobe_trampoline_addr()) { + *cur = node; + return ri->ret_addr; + } + node = node->next; + } + return NULL; } +NOKPROBE_SYMBOL(__kretprobe_find_ret_addr); -#ifdef CONFIG_KRETPROBES +/** + * kretprobe_find_ret_addr -- Find correct return address modified by kretprobe + * @tsk: Target task + * @fp: A frame pointer + * @cur: a storage of the loop cursor llist_node pointer for next call + * + * Find the correct return address modified by a kretprobe on @tsk in unsigned + * long type. If it finds the return address, this returns that address value, + * or this returns 0. + * The @tsk must be 'current' or a task which is not running. @fp is a hint + * to get the currect return address - which is compared with the + * kretprobe_instance::fp field. The @cur is a loop cursor for searching the + * kretprobe return addresses on the @tsk. The '*@cur' should be NULL at the + * first call, but '@cur' itself must NOT NULL. + */ +unsigned long kretprobe_find_ret_addr(struct task_struct *tsk, void *fp, + struct llist_node **cur) +{ + struct kretprobe_instance *ri = NULL; + kprobe_opcode_t *ret; + + if (WARN_ON_ONCE(!cur)) + return 0; + + do { + ret = __kretprobe_find_ret_addr(tsk, cur); + if (!ret) + break; + ri = container_of(*cur, struct kretprobe_instance, llist); + } while (ri->fp != fp); + + return (unsigned long)ret; +} +NOKPROBE_SYMBOL(kretprobe_find_ret_addr); + +void __weak arch_kretprobe_fixup_return(struct pt_regs *regs, + kprobe_opcode_t *correct_ret_addr) +{ + /* + * Do nothing by default. Please fill this to update the fake return + * address on the stack with the correct one on each arch if possible. + */ +} unsigned long __kretprobe_trampoline_handler(struct pt_regs *regs, - void *trampoline_address, void *frame_pointer) { kprobe_opcode_t *correct_ret_addr = NULL; struct kretprobe_instance *ri = NULL; - struct llist_node *first, *node; + struct llist_node *first, *node = NULL; struct kretprobe *rp; - /* Find all nodes for this frame. */ - first = node = current->kretprobe_instances.first; - while (node) { - ri = container_of(node, struct kretprobe_instance, llist); - - BUG_ON(ri->fp != frame_pointer); - - if (ri->ret_addr != trampoline_address) { - correct_ret_addr = ri->ret_addr; - /* - * This is the real return address. Any other - * instances associated with this task are for - * other calls deeper on the call stack - */ - goto found; - } - - node = node->next; + /* Find correct address and all nodes for this frame. */ + correct_ret_addr = __kretprobe_find_ret_addr(current, &node); + if (!correct_ret_addr) { + pr_err("kretprobe: Return address not found, not execute handler. Maybe there is a bug in the kernel.\n"); + BUG_ON(1); } - pr_err("Oops! Kretprobe fails to find correct return address.\n"); - BUG_ON(1); -found: - /* Unlink all nodes for this frame. */ - current->kretprobe_instances.first = node->next; - node->next = NULL; + /* + * Set the return address as the instruction pointer, because if the + * user handler calls stack_trace_save_regs() with this 'regs', + * the stack trace will start from the instruction pointer. + */ + instruction_pointer_set(regs, (unsigned long)correct_ret_addr); - /* Run them.. */ + /* Run the user handler of the nodes. */ + first = current->kretprobe_instances.first; while (first) { ri = container_of(first, struct kretprobe_instance, llist); - first = first->next; + + if (WARN_ON_ONCE(ri->fp != frame_pointer)) + break; rp = get_kretprobe(ri); if (rp && rp->handler) { @@ -1907,6 +1970,23 @@ found: rp->handler(ri, regs); __this_cpu_write(current_kprobe, prev); } + if (first == node) + break; + + first = first->next; + } + + arch_kretprobe_fixup_return(regs, correct_ret_addr); + + /* Unlink all nodes for this frame. */ + first = current->kretprobe_instances.first; + current->kretprobe_instances.first = node->next; + node->next = NULL; + + /* Recycle free instances. */ + while (first) { + ri = container_of(first, struct kretprobe_instance, llist); + first = first->next; recycle_rp_inst(ri); } @@ -1991,7 +2071,7 @@ int register_kretprobe(struct kretprobe *rp) if (ret) return ret; - /* If only rp->kp.addr is specified, check reregistering kprobes */ + /* If only 'rp->kp.addr' is specified, check reregistering kprobes */ if (rp->kp.addr && warn_kprobe_rereg(&rp->kp)) return -EINVAL; @@ -2096,13 +2176,13 @@ EXPORT_SYMBOL_GPL(unregister_kretprobes); #else /* CONFIG_KRETPROBES */ int register_kretprobe(struct kretprobe *rp) { - return -ENOSYS; + return -EOPNOTSUPP; } EXPORT_SYMBOL_GPL(register_kretprobe); int register_kretprobes(struct kretprobe **rps, int num) { - return -ENOSYS; + return -EOPNOTSUPP; } EXPORT_SYMBOL_GPL(register_kretprobes); @@ -2151,7 +2231,7 @@ static void kill_kprobe(struct kprobe *p) /* * The module is going away. We should disarm the kprobe which * is using ftrace, because ftrace framework is still available at - * MODULE_STATE_GOING notification. + * 'MODULE_STATE_GOING' notification. */ if (kprobe_ftrace(p) && !kprobe_disabled(p) && !kprobes_all_disarmed) disarm_kprobe_ftrace(p); @@ -2214,8 +2294,7 @@ EXPORT_SYMBOL_GPL(enable_kprobe); /* Caller must NOT call this in usual path. This is only for critical case */ void dump_kprobe(struct kprobe *kp) { - pr_err("Dumping kprobe:\n"); - pr_err("Name: %s\nOffset: %x\nAddress: %pS\n", + pr_err("Dump kprobe:\n.symbol_name = %s, .offset = %x, .addr = %pS\n", kp->symbol_name, kp->offset, kp->addr); } NOKPROBE_SYMBOL(dump_kprobe); @@ -2317,7 +2396,7 @@ static int __init populate_kprobe_blacklist(unsigned long *start, int ret; for (iter = start; iter < end; iter++) { - entry = arch_deref_entry_point((void *)*iter); + entry = (unsigned long)dereference_symbol_descriptor((void *)*iter); ret = kprobe_add_ksym_blacklist(entry); if (ret == -EINVAL) continue; @@ -2325,13 +2404,13 @@ static int __init populate_kprobe_blacklist(unsigned long *start, return ret; } - /* Symbols in __kprobes_text are blacklisted */ + /* Symbols in '__kprobes_text' are blacklisted */ ret = kprobe_add_area_blacklist((unsigned long)__kprobes_text_start, (unsigned long)__kprobes_text_end); if (ret) return ret; - /* Symbols in noinstr section are blacklisted */ + /* Symbols in 'noinstr' section are blacklisted */ ret = kprobe_add_area_blacklist((unsigned long)__noinstr_text_start, (unsigned long)__noinstr_text_end); @@ -2403,9 +2482,9 @@ static int kprobes_module_callback(struct notifier_block *nb, return NOTIFY_DONE; /* - * When MODULE_STATE_GOING was notified, both of module .text and - * .init.text sections would be freed. When MODULE_STATE_LIVE was - * notified, only .init.text section would be freed. We need to + * When 'MODULE_STATE_GOING' was notified, both of module '.text' and + * '.init.text' sections would be freed. When 'MODULE_STATE_LIVE' was + * notified, only '.init.text' section would be freed. We need to * disable kprobes which have been inserted in the sections. */ mutex_lock(&kprobe_mutex); @@ -2422,9 +2501,9 @@ static int kprobes_module_callback(struct notifier_block *nb, * * Note, this will also move any optimized probes * that are pending to be removed from their - * corresponding lists to the freeing_list and + * corresponding lists to the 'freeing_list' and * will not be touched by the delayed - * kprobe_optimizer work handler. + * kprobe_optimizer() work handler. */ kill_kprobe(p); } @@ -2440,10 +2519,6 @@ static struct notifier_block kprobe_module_nb = { .priority = 0 }; -/* Markers of _kprobe_blacklist section */ -extern unsigned long __start_kprobe_blacklist[]; -extern unsigned long __stop_kprobe_blacklist[]; - void kprobe_free_init_mem(void) { void *start = (void *)(&__init_begin); @@ -2454,7 +2529,7 @@ void kprobe_free_init_mem(void) mutex_lock(&kprobe_mutex); - /* Kill all kprobes on initmem */ + /* Kill all kprobes on initmem because the target code has been freed. */ for (i = 0; i < KPROBE_TABLE_SIZE; i++) { head = &kprobe_table[i]; hlist_for_each_entry(p, head, hlist) { @@ -2477,10 +2552,8 @@ static int __init init_kprobes(void) err = populate_kprobe_blacklist(__start_kprobe_blacklist, __stop_kprobe_blacklist); - if (err) { - pr_err("kprobes: failed to populate blacklist: %d\n", err); - pr_err("Please take care of using kprobes.\n"); - } + if (err) + pr_err("Failed to populate blacklist (error %d), kprobes not restricted, be careful using them!\n", err); if (kretprobe_blacklist_size) { /* lookup the function address from its name */ @@ -2488,7 +2561,7 @@ static int __init init_kprobes(void) kretprobe_blacklist[i].addr = kprobe_lookup_name(kretprobe_blacklist[i].name, 0); if (!kretprobe_blacklist[i].addr) - printk("kretprobe: lookup failed: %s\n", + pr_err("Failed to lookup symbol '%s' for kretprobe blacklist. Maybe the target function is removed or renamed.\n", kretprobe_blacklist[i].name); } } @@ -2497,7 +2570,7 @@ static int __init init_kprobes(void) kprobes_all_disarmed = false; #if defined(CONFIG_OPTPROBES) && defined(__ARCH_WANT_KPROBES_INSN_SLOT) - /* Init kprobe_optinsn_slots for allocation */ + /* Init 'kprobe_optinsn_slots' for allocation */ kprobe_optinsn_slots.insn_size = MAX_OPTINSN_SIZE; #endif @@ -2508,9 +2581,6 @@ static int __init init_kprobes(void) err = register_module_notifier(&kprobe_module_nb); kprobes_initialized = (err == 0); - - if (!err) - init_test_probes(); return err; } early_initcall(init_kprobes); @@ -2631,7 +2701,7 @@ static int kprobe_blacklist_seq_show(struct seq_file *m, void *v) list_entry(v, struct kprobe_blacklist_entry, list); /* - * If /proc/kallsyms is not showing kernel address, we won't + * If '/proc/kallsyms' is not showing kernel address, we won't * show them here either. */ if (!kallsyms_show_value(m->file->f_cred)) @@ -2692,7 +2762,7 @@ static int arm_all_kprobes(void) } if (errors) - pr_warn("Kprobes globally enabled, but failed to arm %d out of %d probes\n", + pr_warn("Kprobes globally enabled, but failed to enable %d out of %d probes. Please check which kprobes are kept disabled via debugfs.\n", errors, total); else pr_info("Kprobes globally enabled\n"); @@ -2735,7 +2805,7 @@ static int disarm_all_kprobes(void) } if (errors) - pr_warn("Kprobes globally disabled, but failed to disarm %d out of %d probes\n", + pr_warn("Kprobes globally disabled, but failed to disable %d out of %d probes. Please check which kprobes are kept enabled via debugfs.\n", errors, total); else pr_info("Kprobes globally disabled\n"); @@ -2770,30 +2840,14 @@ static ssize_t read_enabled_file_bool(struct file *file, static ssize_t write_enabled_file_bool(struct file *file, const char __user *user_buf, size_t count, loff_t *ppos) { - char buf[32]; - size_t buf_size; - int ret = 0; - - buf_size = min(count, (sizeof(buf)-1)); - if (copy_from_user(buf, user_buf, buf_size)) - return -EFAULT; + bool enable; + int ret; - buf[buf_size] = '\0'; - switch (buf[0]) { - case 'y': - case 'Y': - case '1': - ret = arm_all_kprobes(); - break; - case 'n': - case 'N': - case '0': - ret = disarm_all_kprobes(); - break; - default: - return -EINVAL; - } + ret = kstrtobool_from_user(user_buf, count, &enable); + if (ret) + return ret; + ret = enable ? arm_all_kprobes() : disarm_all_kprobes(); if (ret) return ret; @@ -2809,13 +2863,12 @@ static const struct file_operations fops_kp = { static int __init debugfs_kprobe_init(void) { struct dentry *dir; - unsigned int value = 1; dir = debugfs_create_dir("kprobes", NULL); debugfs_create_file("list", 0400, dir, NULL, &kprobes_fops); - debugfs_create_file("enabled", 0600, dir, &value, &fops_kp); + debugfs_create_file("enabled", 0600, dir, NULL, &fops_kp); debugfs_create_file("blacklist", 0400, dir, NULL, &kprobe_blacklist_fops); diff --git a/kernel/livepatch/patch.c b/kernel/livepatch/patch.c index e8029aea67f1..fe316c021d73 100644 --- a/kernel/livepatch/patch.c +++ b/kernel/livepatch/patch.c @@ -49,14 +49,15 @@ static void notrace klp_ftrace_handler(unsigned long ip, ops = container_of(fops, struct klp_ops, fops); + /* + * The ftrace_test_recursion_trylock() will disable preemption, + * which is required for the variant of synchronize_rcu() that is + * used to allow patching functions where RCU is not watching. + * See klp_synchronize_transition() for more details. + */ bit = ftrace_test_recursion_trylock(ip, parent_ip); if (WARN_ON_ONCE(bit < 0)) return; - /* - * A variant of synchronize_rcu() is used to allow patching functions - * where RCU is not watching, see klp_synchronize_transition(). - */ - preempt_disable_notrace(); func = list_first_or_null_rcu(&ops->func_stack, struct klp_func, stack_node); @@ -120,7 +121,6 @@ static void notrace klp_ftrace_handler(unsigned long ip, klp_arch_set_pc(fregs, (unsigned long)func->new_func); unlock: - preempt_enable_notrace(); ftrace_test_recursion_unlock(bit); } diff --git a/kernel/test_kprobes.c b/kernel/test_kprobes.c deleted file mode 100644 index 76c997fdbc9d..000000000000 --- a/kernel/test_kprobes.c +++ /dev/null @@ -1,313 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * test_kprobes.c - simple sanity test for *probes - * - * Copyright IBM Corp. 2008 - */ - -#define pr_fmt(fmt) "Kprobe smoke test: " fmt - -#include <linux/kernel.h> -#include <linux/kprobes.h> -#include <linux/random.h> - -#define div_factor 3 - -static u32 rand1, preh_val, posth_val; -static int errors, handler_errors, num_tests; -static u32 (*target)(u32 value); -static u32 (*target2)(u32 value); - -static noinline u32 kprobe_target(u32 value) -{ - return (value / div_factor); -} - -static int kp_pre_handler(struct kprobe *p, struct pt_regs *regs) -{ - if (preemptible()) { - handler_errors++; - pr_err("pre-handler is preemptible\n"); - } - preh_val = (rand1 / div_factor); - return 0; -} - -static void kp_post_handler(struct kprobe *p, struct pt_regs *regs, - unsigned long flags) -{ - if (preemptible()) { - handler_errors++; - pr_err("post-handler is preemptible\n"); - } - if (preh_val != (rand1 / div_factor)) { - handler_errors++; - pr_err("incorrect value in post_handler\n"); - } - posth_val = preh_val + div_factor; -} - -static struct kprobe kp = { - .symbol_name = "kprobe_target", - .pre_handler = kp_pre_handler, - .post_handler = kp_post_handler -}; - -static int test_kprobe(void) -{ - int ret; - - ret = register_kprobe(&kp); - if (ret < 0) { - pr_err("register_kprobe returned %d\n", ret); - return ret; - } - - ret = target(rand1); - unregister_kprobe(&kp); - - if (preh_val == 0) { - pr_err("kprobe pre_handler not called\n"); - handler_errors++; - } - - if (posth_val == 0) { - pr_err("kprobe post_handler not called\n"); - handler_errors++; - } - - return 0; -} - -static noinline u32 kprobe_target2(u32 value) -{ - return (value / div_factor) + 1; -} - -static int kp_pre_handler2(struct kprobe *p, struct pt_regs *regs) -{ - preh_val = (rand1 / div_factor) + 1; - return 0; -} - -static void kp_post_handler2(struct kprobe *p, struct pt_regs *regs, - unsigned long flags) -{ - if (preh_val != (rand1 / div_factor) + 1) { - handler_errors++; - pr_err("incorrect value in post_handler2\n"); - } - posth_val = preh_val + div_factor; -} - -static struct kprobe kp2 = { - .symbol_name = "kprobe_target2", - .pre_handler = kp_pre_handler2, - .post_handler = kp_post_handler2 -}; - -static int test_kprobes(void) -{ - int ret; - struct kprobe *kps[2] = {&kp, &kp2}; - - /* addr and flags should be cleard for reusing kprobe. */ - kp.addr = NULL; - kp.flags = 0; - ret = register_kprobes(kps, 2); - if (ret < 0) { - pr_err("register_kprobes returned %d\n", ret); - return ret; - } - - preh_val = 0; - posth_val = 0; - ret = target(rand1); - - if (preh_val == 0) { - pr_err("kprobe pre_handler not called\n"); - handler_errors++; - } - - if (posth_val == 0) { - pr_err("kprobe post_handler not called\n"); - handler_errors++; - } - - preh_val = 0; - posth_val = 0; - ret = target2(rand1); - - if (preh_val == 0) { - pr_err("kprobe pre_handler2 not called\n"); - handler_errors++; - } - - if (posth_val == 0) { - pr_err("kprobe post_handler2 not called\n"); - handler_errors++; - } - - unregister_kprobes(kps, 2); - return 0; - -} - -#ifdef CONFIG_KRETPROBES -static u32 krph_val; - -static int entry_handler(struct kretprobe_instance *ri, struct pt_regs *regs) -{ - if (preemptible()) { - handler_errors++; - pr_err("kretprobe entry handler is preemptible\n"); - } - krph_val = (rand1 / div_factor); - return 0; -} - -static int return_handler(struct kretprobe_instance *ri, struct pt_regs *regs) -{ - unsigned long ret = regs_return_value(regs); - - if (preemptible()) { - handler_errors++; - pr_err("kretprobe return handler is preemptible\n"); - } - if (ret != (rand1 / div_factor)) { - handler_errors++; - pr_err("incorrect value in kretprobe handler\n"); - } - if (krph_val == 0) { - handler_errors++; - pr_err("call to kretprobe entry handler failed\n"); - } - - krph_val = rand1; - return 0; -} - -static struct kretprobe rp = { - .handler = return_handler, - .entry_handler = entry_handler, - .kp.symbol_name = "kprobe_target" -}; - -static int test_kretprobe(void) -{ - int ret; - - ret = register_kretprobe(&rp); - if (ret < 0) { - pr_err("register_kretprobe returned %d\n", ret); - return ret; - } - - ret = target(rand1); - unregister_kretprobe(&rp); - if (krph_val != rand1) { - pr_err("kretprobe handler not called\n"); - handler_errors++; - } - - return 0; -} - -static int return_handler2(struct kretprobe_instance *ri, struct pt_regs *regs) -{ - unsigned long ret = regs_return_value(regs); - - if (ret != (rand1 / div_factor) + 1) { - handler_errors++; - pr_err("incorrect value in kretprobe handler2\n"); - } - if (krph_val == 0) { - handler_errors++; - pr_err("call to kretprobe entry handler failed\n"); - } - - krph_val = rand1; - return 0; -} - -static struct kretprobe rp2 = { - .handler = return_handler2, - .entry_handler = entry_handler, - .kp.symbol_name = "kprobe_target2" -}; - -static int test_kretprobes(void) -{ - int ret; - struct kretprobe *rps[2] = {&rp, &rp2}; - - /* addr and flags should be cleard for reusing kprobe. */ - rp.kp.addr = NULL; - rp.kp.flags = 0; - ret = register_kretprobes(rps, 2); - if (ret < 0) { - pr_err("register_kretprobe returned %d\n", ret); - return ret; - } - - krph_val = 0; - ret = target(rand1); - if (krph_val != rand1) { - pr_err("kretprobe handler not called\n"); - handler_errors++; - } - - krph_val = 0; - ret = target2(rand1); - if (krph_val != rand1) { - pr_err("kretprobe handler2 not called\n"); - handler_errors++; - } - unregister_kretprobes(rps, 2); - return 0; -} -#endif /* CONFIG_KRETPROBES */ - -int init_test_probes(void) -{ - int ret; - - target = kprobe_target; - target2 = kprobe_target2; - - do { - rand1 = prandom_u32(); - } while (rand1 <= div_factor); - - pr_info("started\n"); - num_tests++; - ret = test_kprobe(); - if (ret < 0) - errors++; - - num_tests++; - ret = test_kprobes(); - if (ret < 0) - errors++; - -#ifdef CONFIG_KRETPROBES - num_tests++; - ret = test_kretprobe(); - if (ret < 0) - errors++; - - num_tests++; - ret = test_kretprobes(); - if (ret < 0) - errors++; -#endif /* CONFIG_KRETPROBES */ - - if (errors) - pr_err("BUG: %d out of %d tests failed\n", errors, num_tests); - else if (handler_errors) - pr_err("BUG: %d error(s) running handlers\n", handler_errors); - else - pr_info("passed successfully\n"); - - return 0; -} diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile index 6de5d4d63165..bedc5caceec7 100644 --- a/kernel/trace/Makefile +++ b/kernel/trace/Makefile @@ -47,6 +47,7 @@ obj-$(CONFIG_TRACING) += trace_output.o obj-$(CONFIG_TRACING) += trace_seq.o obj-$(CONFIG_TRACING) += trace_stat.o obj-$(CONFIG_TRACING) += trace_printk.o +obj-$(CONFIG_TRACING) += pid_list.o obj-$(CONFIG_TRACING_MAP) += tracing_map.o obj-$(CONFIG_PREEMPTIRQ_DELAY_TEST) += preemptirq_delay_test.o obj-$(CONFIG_SYNTH_EVENT_GEN_TEST) += synth_event_gen_test.o diff --git a/kernel/trace/fgraph.c b/kernel/trace/fgraph.c index b8a0d1d564fb..22061d38fc00 100644 --- a/kernel/trace/fgraph.c +++ b/kernel/trace/fgraph.c @@ -115,6 +115,7 @@ int function_graph_enter(unsigned long ret, unsigned long func, { struct ftrace_graph_ent trace; +#ifndef CONFIG_HAVE_DYNAMIC_FTRACE_WITH_ARGS /* * Skip graph tracing if the return location is served by direct trampoline, * since call sequence and return addresses are unpredictable anyway. @@ -124,6 +125,7 @@ int function_graph_enter(unsigned long ret, unsigned long func, if (ftrace_direct_func_count && ftrace_find_rec_direct(ret - MCOUNT_INSN_SIZE)) return -EBUSY; +#endif trace.func = func; trace.depth = ++current->curr_ret_depth; @@ -333,10 +335,10 @@ unsigned long ftrace_graph_ret_addr(struct task_struct *task, int *idx, #endif /* HAVE_FUNCTION_GRAPH_RET_ADDR_PTR */ static struct ftrace_ops graph_ops = { - .func = ftrace_stub, + .func = ftrace_graph_func, .flags = FTRACE_OPS_FL_INITIALIZED | FTRACE_OPS_FL_PID | - FTRACE_OPS_FL_STUB, + FTRACE_OPS_GRAPH_STUB, #ifdef FTRACE_GRAPH_TRAMP_ADDR .trampoline = FTRACE_GRAPH_TRAMP_ADDR, /* trampoline_size is only needed for dynamically allocated tramps */ diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index feebf57c6458..f3ea4e20072f 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -119,14 +119,9 @@ struct ftrace_ops __rcu *ftrace_ops_list __read_mostly = &ftrace_list_end; ftrace_func_t ftrace_trace_function __read_mostly = ftrace_stub; struct ftrace_ops global_ops; -#if ARCH_SUPPORTS_FTRACE_OPS -static void ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip, - struct ftrace_ops *op, struct ftrace_regs *fregs); -#else -/* See comment below, where ftrace_ops_list_func is defined */ -static void ftrace_ops_no_ops(unsigned long ip, unsigned long parent_ip); -#define ftrace_ops_list_func ((ftrace_func_t)ftrace_ops_no_ops) -#endif +/* Defined by vmlinux.lds.h see the commment above arch_ftrace_ops_list_func for details */ +void ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip, + struct ftrace_ops *op, struct ftrace_regs *fregs); static inline void ftrace_ops_init(struct ftrace_ops *ops) { @@ -581,7 +576,7 @@ static void ftrace_profile_reset(struct ftrace_profile_stat *stat) FTRACE_PROFILE_HASH_SIZE * sizeof(struct hlist_head)); } -int ftrace_profile_pages_init(struct ftrace_profile_stat *stat) +static int ftrace_profile_pages_init(struct ftrace_profile_stat *stat) { struct ftrace_profile_page *pg; int functions; @@ -988,8 +983,9 @@ static __init void ftrace_profile_tracefs(struct dentry *d_tracer) } } - entry = tracefs_create_file("function_profile_enabled", 0644, - d_tracer, NULL, &ftrace_profile_fops); + entry = tracefs_create_file("function_profile_enabled", + TRACE_MODE_WRITE, d_tracer, NULL, + &ftrace_profile_fops); if (!entry) pr_warn("Could not create tracefs 'function_profile_enabled' entry\n"); } @@ -2394,6 +2390,39 @@ unsigned long ftrace_find_rec_direct(unsigned long ip) return entry->direct; } +static struct ftrace_func_entry* +ftrace_add_rec_direct(unsigned long ip, unsigned long addr, + struct ftrace_hash **free_hash) +{ + struct ftrace_func_entry *entry; + + if (ftrace_hash_empty(direct_functions) || + direct_functions->count > 2 * (1 << direct_functions->size_bits)) { + struct ftrace_hash *new_hash; + int size = ftrace_hash_empty(direct_functions) ? 0 : + direct_functions->count + 1; + + if (size < 32) + size = 32; + + new_hash = dup_hash(direct_functions, size); + if (!new_hash) + return NULL; + + *free_hash = direct_functions; + direct_functions = new_hash; + } + + entry = kmalloc(sizeof(*entry), GFP_KERNEL); + if (!entry) + return NULL; + + entry->ip = ip; + entry->direct = addr; + __add_hash_entry(direct_functions, entry); + return entry; +} + static void call_direct_funcs(unsigned long ip, unsigned long pip, struct ftrace_ops *ops, struct ftrace_regs *fregs) { @@ -5110,39 +5139,16 @@ int register_ftrace_direct(unsigned long ip, unsigned long addr) } ret = -ENOMEM; - if (ftrace_hash_empty(direct_functions) || - direct_functions->count > 2 * (1 << direct_functions->size_bits)) { - struct ftrace_hash *new_hash; - int size = ftrace_hash_empty(direct_functions) ? 0 : - direct_functions->count + 1; - - if (size < 32) - size = 32; - - new_hash = dup_hash(direct_functions, size); - if (!new_hash) - goto out_unlock; - - free_hash = direct_functions; - direct_functions = new_hash; - } - - entry = kmalloc(sizeof(*entry), GFP_KERNEL); - if (!entry) - goto out_unlock; - direct = ftrace_find_direct_func(addr); if (!direct) { direct = ftrace_alloc_direct_func(addr); - if (!direct) { - kfree(entry); + if (!direct) goto out_unlock; - } } - entry->ip = ip; - entry->direct = addr; - __add_hash_entry(direct_functions, entry); + entry = ftrace_add_rec_direct(ip, addr, &free_hash); + if (!entry) + goto out_unlock; ret = ftrace_set_filter_ip(&direct_ops, ip, 0, 0); if (ret) @@ -5395,6 +5401,216 @@ int modify_ftrace_direct(unsigned long ip, return ret; } EXPORT_SYMBOL_GPL(modify_ftrace_direct); + +#define MULTI_FLAGS (FTRACE_OPS_FL_IPMODIFY | FTRACE_OPS_FL_DIRECT | \ + FTRACE_OPS_FL_SAVE_REGS) + +static int check_direct_multi(struct ftrace_ops *ops) +{ + if (!(ops->flags & FTRACE_OPS_FL_INITIALIZED)) + return -EINVAL; + if ((ops->flags & MULTI_FLAGS) != MULTI_FLAGS) + return -EINVAL; + return 0; +} + +static void remove_direct_functions_hash(struct ftrace_hash *hash, unsigned long addr) +{ + struct ftrace_func_entry *entry, *del; + int size, i; + + size = 1 << hash->size_bits; + for (i = 0; i < size; i++) { + hlist_for_each_entry(entry, &hash->buckets[i], hlist) { + del = __ftrace_lookup_ip(direct_functions, entry->ip); + if (del && del->direct == addr) { + remove_hash_entry(direct_functions, del); + kfree(del); + } + } + } +} + +/** + * register_ftrace_direct_multi - Call a custom trampoline directly + * for multiple functions registered in @ops + * @ops: The address of the struct ftrace_ops object + * @addr: The address of the trampoline to call at @ops functions + * + * This is used to connect a direct calls to @addr from the nop locations + * of the functions registered in @ops (with by ftrace_set_filter_ip + * function). + * + * The location that it calls (@addr) must be able to handle a direct call, + * and save the parameters of the function being traced, and restore them + * (or inject new ones if needed), before returning. + * + * Returns: + * 0 on success + * -EINVAL - The @ops object was already registered with this call or + * when there are no functions in @ops object. + * -EBUSY - Another direct function is already attached (there can be only one) + * -ENODEV - @ip does not point to a ftrace nop location (or not supported) + * -ENOMEM - There was an allocation failure. + */ +int register_ftrace_direct_multi(struct ftrace_ops *ops, unsigned long addr) +{ + struct ftrace_hash *hash, *free_hash = NULL; + struct ftrace_func_entry *entry, *new; + int err = -EBUSY, size, i; + + if (ops->func || ops->trampoline) + return -EINVAL; + if (!(ops->flags & FTRACE_OPS_FL_INITIALIZED)) + return -EINVAL; + if (ops->flags & FTRACE_OPS_FL_ENABLED) + return -EINVAL; + + hash = ops->func_hash->filter_hash; + if (ftrace_hash_empty(hash)) + return -EINVAL; + + mutex_lock(&direct_mutex); + + /* Make sure requested entries are not already registered.. */ + size = 1 << hash->size_bits; + for (i = 0; i < size; i++) { + hlist_for_each_entry(entry, &hash->buckets[i], hlist) { + if (ftrace_find_rec_direct(entry->ip)) + goto out_unlock; + } + } + + /* ... and insert them to direct_functions hash. */ + err = -ENOMEM; + for (i = 0; i < size; i++) { + hlist_for_each_entry(entry, &hash->buckets[i], hlist) { + new = ftrace_add_rec_direct(entry->ip, addr, &free_hash); + if (!new) + goto out_remove; + entry->direct = addr; + } + } + + ops->func = call_direct_funcs; + ops->flags = MULTI_FLAGS; + ops->trampoline = FTRACE_REGS_ADDR; + + err = register_ftrace_function(ops); + + out_remove: + if (err) + remove_direct_functions_hash(hash, addr); + + out_unlock: + mutex_unlock(&direct_mutex); + + if (free_hash) { + synchronize_rcu_tasks(); + free_ftrace_hash(free_hash); + } + return err; +} +EXPORT_SYMBOL_GPL(register_ftrace_direct_multi); + +/** + * unregister_ftrace_direct_multi - Remove calls to custom trampoline + * previously registered by register_ftrace_direct_multi for @ops object. + * @ops: The address of the struct ftrace_ops object + * + * This is used to remove a direct calls to @addr from the nop locations + * of the functions registered in @ops (with by ftrace_set_filter_ip + * function). + * + * Returns: + * 0 on success + * -EINVAL - The @ops object was not properly registered. + */ +int unregister_ftrace_direct_multi(struct ftrace_ops *ops, unsigned long addr) +{ + struct ftrace_hash *hash = ops->func_hash->filter_hash; + int err; + + if (check_direct_multi(ops)) + return -EINVAL; + if (!(ops->flags & FTRACE_OPS_FL_ENABLED)) + return -EINVAL; + + mutex_lock(&direct_mutex); + err = unregister_ftrace_function(ops); + remove_direct_functions_hash(hash, addr); + mutex_unlock(&direct_mutex); + return err; +} +EXPORT_SYMBOL_GPL(unregister_ftrace_direct_multi); + +/** + * modify_ftrace_direct_multi - Modify an existing direct 'multi' call + * to call something else + * @ops: The address of the struct ftrace_ops object + * @addr: The address of the new trampoline to call at @ops functions + * + * This is used to unregister currently registered direct caller and + * register new one @addr on functions registered in @ops object. + * + * Note there's window between ftrace_shutdown and ftrace_startup calls + * where there will be no callbacks called. + * + * Returns: zero on success. Non zero on error, which includes: + * -EINVAL - The @ops object was not properly registered. + */ +int modify_ftrace_direct_multi(struct ftrace_ops *ops, unsigned long addr) +{ + struct ftrace_hash *hash; + struct ftrace_func_entry *entry, *iter; + static struct ftrace_ops tmp_ops = { + .func = ftrace_stub, + .flags = FTRACE_OPS_FL_STUB, + }; + int i, size; + int err; + + if (check_direct_multi(ops)) + return -EINVAL; + if (!(ops->flags & FTRACE_OPS_FL_ENABLED)) + return -EINVAL; + + mutex_lock(&direct_mutex); + + /* Enable the tmp_ops to have the same functions as the direct ops */ + ftrace_ops_init(&tmp_ops); + tmp_ops.func_hash = ops->func_hash; + + err = register_ftrace_function(&tmp_ops); + if (err) + goto out_direct; + + /* + * Now the ftrace_ops_list_func() is called to do the direct callers. + * We can safely change the direct functions attached to each entry. + */ + mutex_lock(&ftrace_lock); + + hash = ops->func_hash->filter_hash; + size = 1 << hash->size_bits; + for (i = 0; i < size; i++) { + hlist_for_each_entry(iter, &hash->buckets[i], hlist) { + entry = __ftrace_lookup_ip(direct_functions, iter->ip); + if (!entry) + continue; + entry->direct = addr; + } + } + + /* Removing the tmp_ops will add the updated direct callers to the functions */ + unregister_ftrace_function(&tmp_ops); + + mutex_unlock(&ftrace_lock); + out_direct: + mutex_unlock(&direct_mutex); + return err; +} +EXPORT_SYMBOL_GPL(modify_ftrace_direct_multi); #endif /* CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS */ /** @@ -6109,10 +6325,10 @@ void ftrace_create_filter_files(struct ftrace_ops *ops, struct dentry *parent) { - trace_create_file("set_ftrace_filter", 0644, parent, + trace_create_file("set_ftrace_filter", TRACE_MODE_WRITE, parent, ops, &ftrace_filter_fops); - trace_create_file("set_ftrace_notrace", 0644, parent, + trace_create_file("set_ftrace_notrace", TRACE_MODE_WRITE, parent, ops, &ftrace_notrace_fops); } @@ -6139,19 +6355,19 @@ void ftrace_destroy_filter_files(struct ftrace_ops *ops) static __init int ftrace_init_dyn_tracefs(struct dentry *d_tracer) { - trace_create_file("available_filter_functions", 0444, + trace_create_file("available_filter_functions", TRACE_MODE_READ, d_tracer, NULL, &ftrace_avail_fops); - trace_create_file("enabled_functions", 0444, + trace_create_file("enabled_functions", TRACE_MODE_READ, d_tracer, NULL, &ftrace_enabled_fops); ftrace_create_filter_files(&global_ops, d_tracer); #ifdef CONFIG_FUNCTION_GRAPH_TRACER - trace_create_file("set_graph_function", 0644, d_tracer, + trace_create_file("set_graph_function", TRACE_MODE_WRITE, d_tracer, NULL, &ftrace_graph_fops); - trace_create_file("set_graph_notrace", 0644, d_tracer, + trace_create_file("set_graph_notrace", TRACE_MODE_WRITE, d_tracer, NULL, &ftrace_graph_notrace_fops); #endif /* CONFIG_FUNCTION_GRAPH_TRACER */ @@ -6846,6 +7062,11 @@ void __init ftrace_free_init_mem(void) ftrace_free_mem(NULL, start, end); } +int __init __weak ftrace_dyn_arch_init(void) +{ + return 0; +} + void __init ftrace_init(void) { extern unsigned long __start_mcount_loc[]; @@ -6977,16 +7198,15 @@ __ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip, struct ftrace_ops *op; int bit; + /* + * The ftrace_test_and_set_recursion() will disable preemption, + * which is required since some of the ops may be dynamically + * allocated, they must be freed after a synchronize_rcu(). + */ bit = trace_test_and_set_recursion(ip, parent_ip, TRACE_LIST_START); if (bit < 0) return; - /* - * Some of the ops may be dynamically allocated, - * they must be freed after a synchronize_rcu(). - */ - preempt_disable_notrace(); - do_for_each_ftrace_op(op, ftrace_ops_list) { /* Stub functions don't need to be called nor tested */ if (op->flags & FTRACE_OPS_FL_STUB) @@ -7010,7 +7230,6 @@ __ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip, } } while_for_each_ftrace_op(op); out: - preempt_enable_notrace(); trace_clear_recursion(bit); } @@ -7026,21 +7245,23 @@ out: * Note, CONFIG_DYNAMIC_FTRACE_WITH_REGS expects a full regs to be saved. * An architecture can pass partial regs with ftrace_ops and still * set the ARCH_SUPPORTS_FTRACE_OPS. + * + * In vmlinux.lds.h, ftrace_ops_list_func() is defined to be + * arch_ftrace_ops_list_func. */ #if ARCH_SUPPORTS_FTRACE_OPS -static void ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip, - struct ftrace_ops *op, struct ftrace_regs *fregs) +void arch_ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip, + struct ftrace_ops *op, struct ftrace_regs *fregs) { __ftrace_ops_list_func(ip, parent_ip, NULL, fregs); } -NOKPROBE_SYMBOL(ftrace_ops_list_func); #else -static void ftrace_ops_no_ops(unsigned long ip, unsigned long parent_ip) +void arch_ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip) { __ftrace_ops_list_func(ip, parent_ip, NULL, NULL); } -NOKPROBE_SYMBOL(ftrace_ops_no_ops); #endif +NOKPROBE_SYMBOL(arch_ftrace_ops_list_func); /* * If there's only one function registered but it does not support @@ -7056,12 +7277,9 @@ static void ftrace_ops_assist_func(unsigned long ip, unsigned long parent_ip, if (bit < 0) return; - preempt_disable_notrace(); - if (!(op->flags & FTRACE_OPS_FL_RCU) || rcu_is_watching()) op->func(ip, parent_ip, op, fregs); - preempt_enable_notrace(); trace_clear_recursion(bit); } NOKPROBE_SYMBOL(ftrace_ops_assist_func); @@ -7184,10 +7402,10 @@ static void clear_ftrace_pids(struct trace_array *tr, int type) synchronize_rcu(); if ((type & TRACE_PIDS) && pid_list) - trace_free_pid_list(pid_list); + trace_pid_list_free(pid_list); if ((type & TRACE_NO_PIDS) && no_pid_list) - trace_free_pid_list(no_pid_list); + trace_pid_list_free(no_pid_list); } void ftrace_clear_pids(struct trace_array *tr) @@ -7428,7 +7646,7 @@ pid_write(struct file *filp, const char __user *ubuf, if (filtered_pids) { synchronize_rcu(); - trace_free_pid_list(filtered_pids); + trace_pid_list_free(filtered_pids); } else if (pid_list && !other_pids) { /* Register a probe to set whether to ignore the tracing of a task */ register_trace_sched_switch(ftrace_filter_pid_sched_switch_probe, tr); @@ -7494,10 +7712,10 @@ static const struct file_operations ftrace_no_pid_fops = { void ftrace_init_tracefs(struct trace_array *tr, struct dentry *d_tracer) { - trace_create_file("set_ftrace_pid", 0644, d_tracer, + trace_create_file("set_ftrace_pid", TRACE_MODE_WRITE, d_tracer, tr, &ftrace_pid_fops); - trace_create_file("set_ftrace_notrace_pid", 0644, d_tracer, - tr, &ftrace_no_pid_fops); + trace_create_file("set_ftrace_notrace_pid", TRACE_MODE_WRITE, + d_tracer, tr, &ftrace_no_pid_fops); } void __init ftrace_init_tracefs_toplevel(struct trace_array *tr, diff --git a/kernel/trace/pid_list.c b/kernel/trace/pid_list.c new file mode 100644 index 000000000000..a2ef1d18126a --- /dev/null +++ b/kernel/trace/pid_list.c @@ -0,0 +1,495 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2021 VMware Inc, Steven Rostedt <rostedt@goodmis.org> + */ +#include <linux/spinlock.h> +#include <linux/irq_work.h> +#include <linux/slab.h> +#include "trace.h" + +/* See pid_list.h for details */ + +static inline union lower_chunk *get_lower_chunk(struct trace_pid_list *pid_list) +{ + union lower_chunk *chunk; + + lockdep_assert_held(&pid_list->lock); + + if (!pid_list->lower_list) + return NULL; + + chunk = pid_list->lower_list; + pid_list->lower_list = chunk->next; + pid_list->free_lower_chunks--; + WARN_ON_ONCE(pid_list->free_lower_chunks < 0); + chunk->next = NULL; + /* + * If a refill needs to happen, it can not happen here + * as the scheduler run queue locks are held. + */ + if (pid_list->free_lower_chunks <= CHUNK_REALLOC) + irq_work_queue(&pid_list->refill_irqwork); + + return chunk; +} + +static inline union upper_chunk *get_upper_chunk(struct trace_pid_list *pid_list) +{ + union upper_chunk *chunk; + + lockdep_assert_held(&pid_list->lock); + + if (!pid_list->upper_list) + return NULL; + + chunk = pid_list->upper_list; + pid_list->upper_list = chunk->next; + pid_list->free_upper_chunks--; + WARN_ON_ONCE(pid_list->free_upper_chunks < 0); + chunk->next = NULL; + /* + * If a refill needs to happen, it can not happen here + * as the scheduler run queue locks are held. + */ + if (pid_list->free_upper_chunks <= CHUNK_REALLOC) + irq_work_queue(&pid_list->refill_irqwork); + + return chunk; +} + +static inline void put_lower_chunk(struct trace_pid_list *pid_list, + union lower_chunk *chunk) +{ + lockdep_assert_held(&pid_list->lock); + + chunk->next = pid_list->lower_list; + pid_list->lower_list = chunk; + pid_list->free_lower_chunks++; +} + +static inline void put_upper_chunk(struct trace_pid_list *pid_list, + union upper_chunk *chunk) +{ + lockdep_assert_held(&pid_list->lock); + + chunk->next = pid_list->upper_list; + pid_list->upper_list = chunk; + pid_list->free_upper_chunks++; +} + +static inline bool upper_empty(union upper_chunk *chunk) +{ + /* + * If chunk->data has no lower chunks, it will be the same + * as a zeroed bitmask. Use find_first_bit() to test it + * and if it doesn't find any bits set, then the array + * is empty. + */ + int bit = find_first_bit((unsigned long *)chunk->data, + sizeof(chunk->data) * 8); + return bit >= sizeof(chunk->data) * 8; +} + +static inline int pid_split(unsigned int pid, unsigned int *upper1, + unsigned int *upper2, unsigned int *lower) +{ + /* MAX_PID should cover all pids */ + BUILD_BUG_ON(MAX_PID < PID_MAX_LIMIT); + + /* In case a bad pid is passed in, then fail */ + if (unlikely(pid >= MAX_PID)) + return -1; + + *upper1 = (pid >> UPPER1_SHIFT) & UPPER_MASK; + *upper2 = (pid >> UPPER2_SHIFT) & UPPER_MASK; + *lower = pid & LOWER_MASK; + + return 0; +} + +static inline unsigned int pid_join(unsigned int upper1, + unsigned int upper2, unsigned int lower) +{ + return ((upper1 & UPPER_MASK) << UPPER1_SHIFT) | + ((upper2 & UPPER_MASK) << UPPER2_SHIFT) | + (lower & LOWER_MASK); +} + +/** + * trace_pid_list_is_set - test if the pid is set in the list + * @pid_list: The pid list to test + * @pid: The pid to to see if set in the list. + * + * Tests if @pid is is set in the @pid_list. This is usually called + * from the scheduler when a task is scheduled. Its pid is checked + * if it should be traced or not. + * + * Return true if the pid is in the list, false otherwise. + */ +bool trace_pid_list_is_set(struct trace_pid_list *pid_list, unsigned int pid) +{ + union upper_chunk *upper_chunk; + union lower_chunk *lower_chunk; + unsigned long flags; + unsigned int upper1; + unsigned int upper2; + unsigned int lower; + bool ret = false; + + if (!pid_list) + return false; + + if (pid_split(pid, &upper1, &upper2, &lower) < 0) + return false; + + raw_spin_lock_irqsave(&pid_list->lock, flags); + upper_chunk = pid_list->upper[upper1]; + if (upper_chunk) { + lower_chunk = upper_chunk->data[upper2]; + if (lower_chunk) + ret = test_bit(lower, lower_chunk->data); + } + raw_spin_unlock_irqrestore(&pid_list->lock, flags); + + return ret; +} + +/** + * trace_pid_list_set - add a pid to the list + * @pid_list: The pid list to add the @pid to. + * @pid: The pid to add. + * + * Adds @pid to @pid_list. This is usually done explicitly by a user + * adding a task to be traced, or indirectly by the fork function + * when children should be traced and a task's pid is in the list. + * + * Return 0 on success, negative otherwise. + */ +int trace_pid_list_set(struct trace_pid_list *pid_list, unsigned int pid) +{ + union upper_chunk *upper_chunk; + union lower_chunk *lower_chunk; + unsigned long flags; + unsigned int upper1; + unsigned int upper2; + unsigned int lower; + int ret; + + if (!pid_list) + return -ENODEV; + + if (pid_split(pid, &upper1, &upper2, &lower) < 0) + return -EINVAL; + + raw_spin_lock_irqsave(&pid_list->lock, flags); + upper_chunk = pid_list->upper[upper1]; + if (!upper_chunk) { + upper_chunk = get_upper_chunk(pid_list); + if (!upper_chunk) { + ret = -ENOMEM; + goto out; + } + pid_list->upper[upper1] = upper_chunk; + } + lower_chunk = upper_chunk->data[upper2]; + if (!lower_chunk) { + lower_chunk = get_lower_chunk(pid_list); + if (!lower_chunk) { + ret = -ENOMEM; + goto out; + } + upper_chunk->data[upper2] = lower_chunk; + } + set_bit(lower, lower_chunk->data); + ret = 0; + out: + raw_spin_unlock_irqrestore(&pid_list->lock, flags); + return ret; +} + +/** + * trace_pid_list_clear - remove a pid from the list + * @pid_list: The pid list to remove the @pid from. + * @pid: The pid to remove. + * + * Removes @pid from @pid_list. This is usually done explicitly by a user + * removing tasks from tracing, or indirectly by the exit function + * when a task that is set to be traced exits. + * + * Return 0 on success, negative otherwise. + */ +int trace_pid_list_clear(struct trace_pid_list *pid_list, unsigned int pid) +{ + union upper_chunk *upper_chunk; + union lower_chunk *lower_chunk; + unsigned long flags; + unsigned int upper1; + unsigned int upper2; + unsigned int lower; + + if (!pid_list) + return -ENODEV; + + if (pid_split(pid, &upper1, &upper2, &lower) < 0) + return -EINVAL; + + raw_spin_lock_irqsave(&pid_list->lock, flags); + upper_chunk = pid_list->upper[upper1]; + if (!upper_chunk) + goto out; + + lower_chunk = upper_chunk->data[upper2]; + if (!lower_chunk) + goto out; + + clear_bit(lower, lower_chunk->data); + + /* if there's no more bits set, add it to the free list */ + if (find_first_bit(lower_chunk->data, LOWER_MAX) >= LOWER_MAX) { + put_lower_chunk(pid_list, lower_chunk); + upper_chunk->data[upper2] = NULL; + if (upper_empty(upper_chunk)) { + put_upper_chunk(pid_list, upper_chunk); + pid_list->upper[upper1] = NULL; + } + } + out: + raw_spin_unlock_irqrestore(&pid_list->lock, flags); + return 0; +} + +/** + * trace_pid_list_next - return the next pid in the list + * @pid_list: The pid list to examine. + * @pid: The pid to start from + * @next: The pointer to place the pid that is set starting from @pid. + * + * Looks for the next consecutive pid that is in @pid_list starting + * at the pid specified by @pid. If one is set (including @pid), then + * that pid is placed into @next. + * + * Return 0 when a pid is found, -1 if there are no more pids included. + */ +int trace_pid_list_next(struct trace_pid_list *pid_list, unsigned int pid, + unsigned int *next) +{ + union upper_chunk *upper_chunk; + union lower_chunk *lower_chunk; + unsigned long flags; + unsigned int upper1; + unsigned int upper2; + unsigned int lower; + + if (!pid_list) + return -ENODEV; + + if (pid_split(pid, &upper1, &upper2, &lower) < 0) + return -EINVAL; + + raw_spin_lock_irqsave(&pid_list->lock, flags); + for (; upper1 <= UPPER_MASK; upper1++, upper2 = 0) { + upper_chunk = pid_list->upper[upper1]; + + if (!upper_chunk) + continue; + + for (; upper2 <= UPPER_MASK; upper2++, lower = 0) { + lower_chunk = upper_chunk->data[upper2]; + if (!lower_chunk) + continue; + + lower = find_next_bit(lower_chunk->data, LOWER_MAX, + lower); + if (lower < LOWER_MAX) + goto found; + } + } + + found: + raw_spin_unlock_irqrestore(&pid_list->lock, flags); + if (upper1 > UPPER_MASK) + return -1; + + *next = pid_join(upper1, upper2, lower); + return 0; +} + +/** + * trace_pid_list_first - return the first pid in the list + * @pid_list: The pid list to examine. + * @pid: The pointer to place the pid first found pid that is set. + * + * Looks for the first pid that is set in @pid_list, and places it + * into @pid if found. + * + * Return 0 when a pid is found, -1 if there are no pids set. + */ +int trace_pid_list_first(struct trace_pid_list *pid_list, unsigned int *pid) +{ + return trace_pid_list_next(pid_list, 0, pid); +} + +static void pid_list_refill_irq(struct irq_work *iwork) +{ + struct trace_pid_list *pid_list = container_of(iwork, struct trace_pid_list, + refill_irqwork); + union upper_chunk *upper = NULL; + union lower_chunk *lower = NULL; + union upper_chunk **upper_next = &upper; + union lower_chunk **lower_next = &lower; + int upper_count; + int lower_count; + int ucnt = 0; + int lcnt = 0; + + again: + raw_spin_lock(&pid_list->lock); + upper_count = CHUNK_ALLOC - pid_list->free_upper_chunks; + lower_count = CHUNK_ALLOC - pid_list->free_lower_chunks; + raw_spin_unlock(&pid_list->lock); + + if (upper_count <= 0 && lower_count <= 0) + return; + + while (upper_count-- > 0) { + union upper_chunk *chunk; + + chunk = kzalloc(sizeof(*chunk), GFP_KERNEL); + if (!chunk) + break; + *upper_next = chunk; + upper_next = &chunk->next; + ucnt++; + } + + while (lower_count-- > 0) { + union lower_chunk *chunk; + + chunk = kzalloc(sizeof(*chunk), GFP_KERNEL); + if (!chunk) + break; + *lower_next = chunk; + lower_next = &chunk->next; + lcnt++; + } + + raw_spin_lock(&pid_list->lock); + if (upper) { + *upper_next = pid_list->upper_list; + pid_list->upper_list = upper; + pid_list->free_upper_chunks += ucnt; + } + if (lower) { + *lower_next = pid_list->lower_list; + pid_list->lower_list = lower; + pid_list->free_lower_chunks += lcnt; + } + raw_spin_unlock(&pid_list->lock); + + /* + * On success of allocating all the chunks, both counters + * will be less than zero. If they are not, then an allocation + * failed, and we should not try again. + */ + if (upper_count >= 0 || lower_count >= 0) + return; + /* + * When the locks were released, free chunks could have + * been used and allocation needs to be done again. Might as + * well allocate it now. + */ + goto again; +} + +/** + * trace_pid_list_alloc - create a new pid_list + * + * Allocates a new pid_list to store pids into. + * + * Returns the pid_list on success, NULL otherwise. + */ +struct trace_pid_list *trace_pid_list_alloc(void) +{ + struct trace_pid_list *pid_list; + int i; + + /* According to linux/thread.h, pids can be no bigger that 30 bits */ + WARN_ON_ONCE(pid_max > (1 << 30)); + + pid_list = kzalloc(sizeof(*pid_list), GFP_KERNEL); + if (!pid_list) + return NULL; + + init_irq_work(&pid_list->refill_irqwork, pid_list_refill_irq); + + raw_spin_lock_init(&pid_list->lock); + + for (i = 0; i < CHUNK_ALLOC; i++) { + union upper_chunk *chunk; + + chunk = kzalloc(sizeof(*chunk), GFP_KERNEL); + if (!chunk) + break; + chunk->next = pid_list->upper_list; + pid_list->upper_list = chunk; + pid_list->free_upper_chunks++; + } + + for (i = 0; i < CHUNK_ALLOC; i++) { + union lower_chunk *chunk; + + chunk = kzalloc(sizeof(*chunk), GFP_KERNEL); + if (!chunk) + break; + chunk->next = pid_list->lower_list; + pid_list->lower_list = chunk; + pid_list->free_lower_chunks++; + } + + return pid_list; +} + +/** + * trace_pid_list_free - Frees an allocated pid_list. + * + * Frees the memory for a pid_list that was allocated. + */ +void trace_pid_list_free(struct trace_pid_list *pid_list) +{ + union upper_chunk *upper; + union lower_chunk *lower; + int i, j; + + if (!pid_list) + return; + + irq_work_sync(&pid_list->refill_irqwork); + + while (pid_list->lower_list) { + union lower_chunk *chunk; + + chunk = pid_list->lower_list; + pid_list->lower_list = pid_list->lower_list->next; + kfree(chunk); + } + + while (pid_list->upper_list) { + union upper_chunk *chunk; + + chunk = pid_list->upper_list; + pid_list->upper_list = pid_list->upper_list->next; + kfree(chunk); + } + + for (i = 0; i < UPPER1_SIZE; i++) { + upper = pid_list->upper[i]; + if (upper) { + for (j = 0; j < UPPER2_SIZE; j++) { + lower = upper->data[j]; + kfree(lower); + } + kfree(upper); + } + } + kfree(pid_list); +} diff --git a/kernel/trace/pid_list.h b/kernel/trace/pid_list.h new file mode 100644 index 000000000000..62e73f1ac85f --- /dev/null +++ b/kernel/trace/pid_list.h @@ -0,0 +1,88 @@ +// SPDX-License-Identifier: GPL-2.0 + +/* Do not include this file directly. */ + +#ifndef _TRACE_INTERNAL_PID_LIST_H +#define _TRACE_INTERNAL_PID_LIST_H + +/* + * In order to keep track of what pids to trace, a tree is created much + * like page tables are used. This creates a sparse bit map, where + * the tree is filled in when needed. A PID is at most 30 bits (see + * linux/thread.h), and is broken up into 3 sections based on the bit map + * of the bits. The 8 MSB is the "upper1" section. The next 8 MSB is the + * "upper2" section and the 14 LSB is the "lower" section. + * + * A trace_pid_list structure holds the "upper1" section, in an + * array of 256 pointers (1 or 2K in size) to "upper_chunk" unions, where + * each has an array of 256 pointers (1 or 2K in size) to the "lower_chunk" + * structures, where each has an array of size 2K bytes representing a bitmask + * of the 14 LSB of the PID (256 * 8 = 2048) + * + * When a trace_pid_list is allocated, it includes the 256 pointer array + * of the upper1 unions. Then a "cache" of upper and lower is allocated + * where these will be assigned as needed. + * + * When a bit is set in the pid_list bitmask, the pid to use has + * the 8 MSB masked, and this is used to index the array in the + * pid_list to find the next upper union. If the element is NULL, + * then one is retrieved from the upper_list cache. If none is + * available, then -ENOMEM is returned. + * + * The next 8 MSB is used to index into the "upper2" section. If this + * element is NULL, then it is retrieved from the lower_list cache. + * Again, if one is not available -ENOMEM is returned. + * + * Finally the 14 LSB of the PID is used to set the bit in the 16384 + * bitmask (made up of 2K bytes). + * + * When the second upper section or the lower section has their last + * bit cleared, they are added back to the free list to be reused + * when needed. + */ + +#define UPPER_BITS 8 +#define UPPER_MAX (1 << UPPER_BITS) +#define UPPER1_SIZE (1 << UPPER_BITS) +#define UPPER2_SIZE (1 << UPPER_BITS) + +#define LOWER_BITS 14 +#define LOWER_MAX (1 << LOWER_BITS) +#define LOWER_SIZE (LOWER_MAX / BITS_PER_LONG) + +#define UPPER1_SHIFT (LOWER_BITS + UPPER_BITS) +#define UPPER2_SHIFT LOWER_BITS +#define LOWER_MASK (LOWER_MAX - 1) + +#define UPPER_MASK (UPPER_MAX - 1) + +/* According to linux/thread.h pids can not be bigger than or equal to 1 << 30 */ +#define MAX_PID (1 << 30) + +/* Just keep 6 chunks of both upper and lower in the cache on alloc */ +#define CHUNK_ALLOC 6 + +/* Have 2 chunks free, trigger a refill of the cache */ +#define CHUNK_REALLOC 2 + +union lower_chunk { + union lower_chunk *next; + unsigned long data[LOWER_SIZE]; // 2K in size +}; + +union upper_chunk { + union upper_chunk *next; + union lower_chunk *data[UPPER2_SIZE]; // 1 or 2K in size +}; + +struct trace_pid_list { + raw_spinlock_t lock; + struct irq_work refill_irqwork; + union upper_chunk *upper[UPPER1_SIZE]; // 1 or 2K in size + union upper_chunk *upper_list; + union lower_chunk *lower_list; + int free_upper_chunks; + int free_lower_chunks; +}; + +#endif /* _TRACE_INTERNAL_PID_LIST_H */ diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index c5a3fbf19617..f6520d0a4c8c 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -3167,14 +3167,9 @@ static __always_inline int trace_recursive_lock(struct ring_buffer_per_cpu *cpu_buffer) { unsigned int val = cpu_buffer->current_context; - unsigned long pc = preempt_count(); - int bit; + int bit = interrupt_context_level(); - if (!(pc & (NMI_MASK | HARDIRQ_MASK | SOFTIRQ_OFFSET))) - bit = RB_CTX_NORMAL; - else - bit = pc & NMI_MASK ? RB_CTX_NMI : - pc & HARDIRQ_MASK ? RB_CTX_IRQ : RB_CTX_SOFTIRQ; + bit = RB_CTX_NORMAL - bit; if (unlikely(val & (1 << (bit + cpu_buffer->nest)))) { /* diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index bc677cd64224..c88bbfe75d1d 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -512,12 +512,6 @@ int call_filter_check_discard(struct trace_event_call *call, void *rec, return 0; } -void trace_free_pid_list(struct trace_pid_list *pid_list) -{ - vfree(pid_list->pids); - kfree(pid_list); -} - /** * trace_find_filtered_pid - check if a pid exists in a filtered_pid list * @filtered_pids: The list of pids to check @@ -528,14 +522,7 @@ void trace_free_pid_list(struct trace_pid_list *pid_list) bool trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid) { - /* - * If pid_max changed after filtered_pids was created, we - * by default ignore all pids greater than the previous pid_max. - */ - if (search_pid >= filtered_pids->pid_max) - return false; - - return test_bit(search_pid, filtered_pids->pids); + return trace_pid_list_is_set(filtered_pids, search_pid); } /** @@ -592,15 +579,11 @@ void trace_filter_add_remove_task(struct trace_pid_list *pid_list, return; } - /* Sorry, but we don't support pid_max changing after setting */ - if (task->pid >= pid_list->pid_max) - return; - /* "self" is set for forks, and NULL for exits */ if (self) - set_bit(task->pid, pid_list->pids); + trace_pid_list_set(pid_list, task->pid); else - clear_bit(task->pid, pid_list->pids); + trace_pid_list_clear(pid_list, task->pid); } /** @@ -617,18 +600,19 @@ void trace_filter_add_remove_task(struct trace_pid_list *pid_list, */ void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos) { - unsigned long pid = (unsigned long)v; + long pid = (unsigned long)v; + unsigned int next; (*pos)++; /* pid already is +1 of the actual previous bit */ - pid = find_next_bit(pid_list->pids, pid_list->pid_max, pid); + if (trace_pid_list_next(pid_list, pid, &next) < 0) + return NULL; - /* Return pid + 1 to allow zero to be represented */ - if (pid < pid_list->pid_max) - return (void *)(pid + 1); + pid = next; - return NULL; + /* Return pid + 1 to allow zero to be represented */ + return (void *)(pid + 1); } /** @@ -645,12 +629,14 @@ void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos) void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos) { unsigned long pid; + unsigned int first; loff_t l = 0; - pid = find_first_bit(pid_list->pids, pid_list->pid_max); - if (pid >= pid_list->pid_max) + if (trace_pid_list_first(pid_list, &first) < 0) return NULL; + pid = first; + /* Return pid + 1 so that zero can be the exit value */ for (pid++; pid && l < *pos; pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l)) @@ -686,7 +672,7 @@ int trace_pid_write(struct trace_pid_list *filtered_pids, unsigned long val; int nr_pids = 0; ssize_t read = 0; - ssize_t ret = 0; + ssize_t ret; loff_t pos; pid_t pid; @@ -699,34 +685,23 @@ int trace_pid_write(struct trace_pid_list *filtered_pids, * the user. If the operation fails, then the current list is * not modified. */ - pid_list = kmalloc(sizeof(*pid_list), GFP_KERNEL); + pid_list = trace_pid_list_alloc(); if (!pid_list) { trace_parser_put(&parser); return -ENOMEM; } - pid_list->pid_max = READ_ONCE(pid_max); - - /* Only truncating will shrink pid_max */ - if (filtered_pids && filtered_pids->pid_max > pid_list->pid_max) - pid_list->pid_max = filtered_pids->pid_max; - - pid_list->pids = vzalloc((pid_list->pid_max + 7) >> 3); - if (!pid_list->pids) { - trace_parser_put(&parser); - kfree(pid_list); - return -ENOMEM; - } - if (filtered_pids) { /* copy the current bits to the new max */ - for_each_set_bit(pid, filtered_pids->pids, - filtered_pids->pid_max) { - set_bit(pid, pid_list->pids); + ret = trace_pid_list_first(filtered_pids, &pid); + while (!ret) { + trace_pid_list_set(pid_list, pid); + ret = trace_pid_list_next(filtered_pids, pid + 1, &pid); nr_pids++; } } + ret = 0; while (cnt > 0) { pos = 0; @@ -742,12 +717,13 @@ int trace_pid_write(struct trace_pid_list *filtered_pids, ret = -EINVAL; if (kstrtoul(parser.buffer, 0, &val)) break; - if (val >= pid_list->pid_max) - break; pid = (pid_t)val; - set_bit(pid, pid_list->pids); + if (trace_pid_list_set(pid_list, pid) < 0) { + ret = -1; + break; + } nr_pids++; trace_parser_clear(&parser); @@ -756,13 +732,13 @@ int trace_pid_write(struct trace_pid_list *filtered_pids, trace_parser_put(&parser); if (ret < 0) { - trace_free_pid_list(pid_list); + trace_pid_list_free(pid_list); return ret; } if (!nr_pids) { /* Cleared the list of pids */ - trace_free_pid_list(pid_list); + trace_pid_list_free(pid_list); read = ret; pid_list = NULL; } @@ -1714,7 +1690,8 @@ static void trace_create_maxlat_file(struct trace_array *tr, { INIT_WORK(&tr->fsnotify_work, latency_fsnotify_workfn); init_irq_work(&tr->fsnotify_irqwork, latency_fsnotify_workfn_irq); - tr->d_max_latency = trace_create_file("tracing_max_latency", 0644, + tr->d_max_latency = trace_create_file("tracing_max_latency", + TRACE_MODE_WRITE, d_tracer, &tr->max_latency, &tracing_max_lat_fops); } @@ -1748,8 +1725,8 @@ void latency_fsnotify(struct trace_array *tr) || defined(CONFIG_OSNOISE_TRACER) #define trace_create_maxlat_file(tr, d_tracer) \ - trace_create_file("tracing_max_latency", 0644, d_tracer, \ - &tr->max_latency, &tracing_max_lat_fops) + trace_create_file("tracing_max_latency", TRACE_MODE_WRITE, \ + d_tracer, &tr->max_latency, &tracing_max_lat_fops) #else #define trace_create_maxlat_file(tr, d_tracer) do { } while (0) @@ -6077,7 +6054,7 @@ trace_insert_eval_map_file(struct module *mod, struct trace_eval_map **start, static void trace_create_eval_file(struct dentry *d_tracer) { - trace_create_file("eval_map", 0444, d_tracer, + trace_create_file("eval_map", TRACE_MODE_READ, d_tracer, NULL, &tracing_eval_map_fops); } @@ -8590,27 +8567,27 @@ tracing_init_tracefs_percpu(struct trace_array *tr, long cpu) } /* per cpu trace_pipe */ - trace_create_cpu_file("trace_pipe", 0444, d_cpu, + trace_create_cpu_file("trace_pipe", TRACE_MODE_READ, d_cpu, tr, cpu, &tracing_pipe_fops); /* per cpu trace */ - trace_create_cpu_file("trace", 0644, d_cpu, + trace_create_cpu_file("trace", TRACE_MODE_WRITE, d_cpu, tr, cpu, &tracing_fops); - trace_create_cpu_file("trace_pipe_raw", 0444, d_cpu, + trace_create_cpu_file("trace_pipe_raw", TRACE_MODE_READ, d_cpu, tr, cpu, &tracing_buffers_fops); - trace_create_cpu_file("stats", 0444, d_cpu, + trace_create_cpu_file("stats", TRACE_MODE_READ, d_cpu, tr, cpu, &tracing_stats_fops); - trace_create_cpu_file("buffer_size_kb", 0444, d_cpu, + trace_create_cpu_file("buffer_size_kb", TRACE_MODE_READ, d_cpu, tr, cpu, &tracing_entries_fops); #ifdef CONFIG_TRACER_SNAPSHOT - trace_create_cpu_file("snapshot", 0644, d_cpu, + trace_create_cpu_file("snapshot", TRACE_MODE_WRITE, d_cpu, tr, cpu, &snapshot_fops); - trace_create_cpu_file("snapshot_raw", 0444, d_cpu, + trace_create_cpu_file("snapshot_raw", TRACE_MODE_READ, d_cpu, tr, cpu, &snapshot_raw_fops); #endif } @@ -8816,8 +8793,8 @@ create_trace_option_file(struct trace_array *tr, topt->opt = opt; topt->tr = tr; - topt->entry = trace_create_file(opt->name, 0644, t_options, topt, - &trace_options_fops); + topt->entry = trace_create_file(opt->name, TRACE_MODE_WRITE, + t_options, topt, &trace_options_fops); } @@ -8892,7 +8869,7 @@ create_trace_option_core_file(struct trace_array *tr, if (!t_options) return NULL; - return trace_create_file(option, 0644, t_options, + return trace_create_file(option, TRACE_MODE_WRITE, t_options, (void *)&tr->trace_flags_index[index], &trace_options_core_fops); } @@ -9417,28 +9394,28 @@ init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer) struct trace_event_file *file; int cpu; - trace_create_file("available_tracers", 0444, d_tracer, + trace_create_file("available_tracers", TRACE_MODE_READ, d_tracer, tr, &show_traces_fops); - trace_create_file("current_tracer", 0644, d_tracer, + trace_create_file("current_tracer", TRACE_MODE_WRITE, d_tracer, tr, &set_tracer_fops); - trace_create_file("tracing_cpumask", 0644, d_tracer, + trace_create_file("tracing_cpumask", TRACE_MODE_WRITE, d_tracer, tr, &tracing_cpumask_fops); - trace_create_file("trace_options", 0644, d_tracer, + trace_create_file("trace_options", TRACE_MODE_WRITE, d_tracer, tr, &tracing_iter_fops); - trace_create_file("trace", 0644, d_tracer, + trace_create_file("trace", TRACE_MODE_WRITE, d_tracer, tr, &tracing_fops); - trace_create_file("trace_pipe", 0444, d_tracer, + trace_create_file("trace_pipe", TRACE_MODE_READ, d_tracer, tr, &tracing_pipe_fops); - trace_create_file("buffer_size_kb", 0644, d_tracer, + trace_create_file("buffer_size_kb", TRACE_MODE_WRITE, d_tracer, tr, &tracing_entries_fops); - trace_create_file("buffer_total_size_kb", 0444, d_tracer, + trace_create_file("buffer_total_size_kb", TRACE_MODE_READ, d_tracer, tr, &tracing_total_entries_fops); trace_create_file("free_buffer", 0200, d_tracer, @@ -9449,25 +9426,25 @@ init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer) file = __find_event_file(tr, "ftrace", "print"); if (file && file->dir) - trace_create_file("trigger", 0644, file->dir, file, - &event_trigger_fops); + trace_create_file("trigger", TRACE_MODE_WRITE, file->dir, + file, &event_trigger_fops); tr->trace_marker_file = file; trace_create_file("trace_marker_raw", 0220, d_tracer, tr, &tracing_mark_raw_fops); - trace_create_file("trace_clock", 0644, d_tracer, tr, + trace_create_file("trace_clock", TRACE_MODE_WRITE, d_tracer, tr, &trace_clock_fops); - trace_create_file("tracing_on", 0644, d_tracer, + trace_create_file("tracing_on", TRACE_MODE_WRITE, d_tracer, tr, &rb_simple_fops); - trace_create_file("timestamp_mode", 0444, d_tracer, tr, + trace_create_file("timestamp_mode", TRACE_MODE_READ, d_tracer, tr, &trace_time_stamp_mode_fops); tr->buffer_percent = 50; - trace_create_file("buffer_percent", 0444, d_tracer, + trace_create_file("buffer_percent", TRACE_MODE_READ, d_tracer, tr, &buffer_percent_fops); create_trace_options_dir(tr); @@ -9478,11 +9455,11 @@ init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer) MEM_FAIL(1, "Could not allocate function filter files"); #ifdef CONFIG_TRACER_SNAPSHOT - trace_create_file("snapshot", 0644, d_tracer, + trace_create_file("snapshot", TRACE_MODE_WRITE, d_tracer, tr, &snapshot_fops); #endif - trace_create_file("error_log", 0644, d_tracer, + trace_create_file("error_log", TRACE_MODE_WRITE, d_tracer, tr, &tracing_err_log_fops); for_each_tracing_cpu(cpu) @@ -9675,19 +9652,19 @@ static __init int tracer_init_tracefs(void) init_tracer_tracefs(&global_trace, NULL); ftrace_init_tracefs_toplevel(&global_trace, NULL); - trace_create_file("tracing_thresh", 0644, NULL, + trace_create_file("tracing_thresh", TRACE_MODE_WRITE, NULL, &global_trace, &tracing_thresh_fops); - trace_create_file("README", 0444, NULL, + trace_create_file("README", TRACE_MODE_READ, NULL, NULL, &tracing_readme_fops); - trace_create_file("saved_cmdlines", 0444, NULL, + trace_create_file("saved_cmdlines", TRACE_MODE_READ, NULL, NULL, &tracing_saved_cmdlines_fops); - trace_create_file("saved_cmdlines_size", 0644, NULL, + trace_create_file("saved_cmdlines_size", TRACE_MODE_WRITE, NULL, NULL, &tracing_saved_cmdlines_size_fops); - trace_create_file("saved_tgids", 0444, NULL, + trace_create_file("saved_tgids", TRACE_MODE_READ, NULL, NULL, &tracing_saved_tgids_fops); trace_eval_init(); @@ -9699,7 +9676,7 @@ static __init int tracer_init_tracefs(void) #endif #ifdef CONFIG_DYNAMIC_FTRACE - trace_create_file("dyn_ftrace_total_info", 0444, NULL, + trace_create_file("dyn_ftrace_total_info", TRACE_MODE_READ, NULL, NULL, &tracing_dyn_info_fops); #endif diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index b7c0f8e160fb..6b60ab9475ed 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -22,11 +22,16 @@ #include <linux/ctype.h> #include <linux/once_lite.h> +#include "pid_list.h" + #ifdef CONFIG_FTRACE_SYSCALLS #include <asm/unistd.h> /* For NR_SYSCALLS */ #include <asm/syscall.h> /* some archs define it here */ #endif +#define TRACE_MODE_WRITE 0640 +#define TRACE_MODE_READ 0440 + enum trace_type { __TRACE_FIRST_TYPE = 0, @@ -188,10 +193,14 @@ struct trace_options { struct trace_option_dentry *topts; }; -struct trace_pid_list { - int pid_max; - unsigned long *pids; -}; +struct trace_pid_list *trace_pid_list_alloc(void); +void trace_pid_list_free(struct trace_pid_list *pid_list); +bool trace_pid_list_is_set(struct trace_pid_list *pid_list, unsigned int pid); +int trace_pid_list_set(struct trace_pid_list *pid_list, unsigned int pid); +int trace_pid_list_clear(struct trace_pid_list *pid_list, unsigned int pid); +int trace_pid_list_first(struct trace_pid_list *pid_list, unsigned int *pid); +int trace_pid_list_next(struct trace_pid_list *pid_list, unsigned int pid, + unsigned int *next); enum { TRACE_PIDS = BIT(0), @@ -881,7 +890,7 @@ static inline int ftrace_graph_addr(struct ftrace_graph_ent *trace) * is set, and called by an interrupt handler, we still * want to trace it. */ - if (in_irq()) + if (in_hardirq()) trace_recursion_set(TRACE_IRQ_BIT); else trace_recursion_clear(TRACE_IRQ_BIT); diff --git a/kernel/trace/trace_boot.c b/kernel/trace/trace_boot.c index 8d252f63cd78..0580287d7a0d 100644 --- a/kernel/trace/trace_boot.c +++ b/kernel/trace/trace_boot.c @@ -430,6 +430,8 @@ trace_boot_init_histograms(struct trace_event_file *file, /* All digit started node should be instances. */ if (trace_boot_compose_hist_cmd(node, buf, size) == 0) { tmp = kstrdup(buf, GFP_KERNEL); + if (!tmp) + return; if (trigger_process_regex(file, buf) < 0) pr_err("Failed to apply hist trigger: %s\n", tmp); kfree(tmp); @@ -439,6 +441,8 @@ trace_boot_init_histograms(struct trace_event_file *file, if (xbc_node_find_subkey(hnode, "keys")) { if (trace_boot_compose_hist_cmd(hnode, buf, size) == 0) { tmp = kstrdup(buf, GFP_KERNEL); + if (!tmp) + return; if (trigger_process_regex(file, buf) < 0) pr_err("Failed to apply hist trigger: %s\n", tmp); kfree(tmp); diff --git a/kernel/trace/trace_dynevent.c b/kernel/trace/trace_dynevent.c index 1110112e55bd..e34e8182ee4b 100644 --- a/kernel/trace/trace_dynevent.c +++ b/kernel/trace/trace_dynevent.c @@ -262,7 +262,7 @@ static __init int init_dynamic_event(void) if (ret) return 0; - entry = tracefs_create_file("dynamic_events", 0644, NULL, + entry = tracefs_create_file("dynamic_events", TRACE_MODE_WRITE, NULL, NULL, &dynamic_events_ops); /* Event list interface */ diff --git a/kernel/trace/trace_event_perf.c b/kernel/trace/trace_event_perf.c index 6aed10e2f7ce..a114549720d6 100644 --- a/kernel/trace/trace_event_perf.c +++ b/kernel/trace/trace_event_perf.c @@ -400,7 +400,8 @@ void *perf_trace_buf_alloc(int size, struct pt_regs **regs, int *rctxp) BUILD_BUG_ON(PERF_MAX_TRACE_SIZE % sizeof(unsigned long)); if (WARN_ONCE(size > PERF_MAX_TRACE_SIZE, - "perf buffer not large enough")) + "perf buffer not large enough, wanted %d, have %d", + size, PERF_MAX_TRACE_SIZE)) return NULL; *rctxp = rctx = perf_swevent_get_recursion_context(); @@ -441,13 +442,13 @@ perf_ftrace_function_call(unsigned long ip, unsigned long parent_ip, if (!rcu_is_watching()) return; - if ((unsigned long)ops->private != smp_processor_id()) - return; - bit = ftrace_test_recursion_trylock(ip, parent_ip); if (bit < 0) return; + if ((unsigned long)ops->private != smp_processor_id()) + goto out; + event = container_of(ops, struct perf_event, ftrace_ops); /* diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 830b3b9940f4..4021b9a79f93 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -885,10 +885,10 @@ static void __ftrace_clear_event_pids(struct trace_array *tr, int type) tracepoint_synchronize_unregister(); if ((type & TRACE_PIDS) && pid_list) - trace_free_pid_list(pid_list); + trace_pid_list_free(pid_list); if ((type & TRACE_NO_PIDS) && no_pid_list) - trace_free_pid_list(no_pid_list); + trace_pid_list_free(no_pid_list); } static void ftrace_clear_event_pids(struct trace_array *tr, int type) @@ -1967,7 +1967,7 @@ event_pid_write(struct file *filp, const char __user *ubuf, if (filtered_pids) { tracepoint_synchronize_unregister(); - trace_free_pid_list(filtered_pids); + trace_pid_list_free(filtered_pids); } else if (pid_list && !other_pids) { register_pid_events(tr); } @@ -2312,7 +2312,8 @@ event_subsystem_dir(struct trace_array *tr, const char *name, /* the ftrace system is special, do not create enable or filter files */ if (strcmp(name, "ftrace") != 0) { - entry = tracefs_create_file("filter", 0644, dir->entry, dir, + entry = tracefs_create_file("filter", TRACE_MODE_WRITE, + dir->entry, dir, &ftrace_subsystem_filter_fops); if (!entry) { kfree(system->filter); @@ -2320,7 +2321,7 @@ event_subsystem_dir(struct trace_array *tr, const char *name, pr_warn("Could not create tracefs '%s/filter' entry\n", name); } - trace_create_file("enable", 0644, dir->entry, dir, + trace_create_file("enable", TRACE_MODE_WRITE, dir->entry, dir, &ftrace_system_enable_fops); } @@ -2402,12 +2403,12 @@ event_create_dir(struct dentry *parent, struct trace_event_file *file) } if (call->class->reg && !(call->flags & TRACE_EVENT_FL_IGNORE_ENABLE)) - trace_create_file("enable", 0644, file->dir, file, + trace_create_file("enable", TRACE_MODE_WRITE, file->dir, file, &ftrace_enable_fops); #ifdef CONFIG_PERF_EVENTS if (call->event.type && call->class->reg) - trace_create_file("id", 0444, file->dir, + trace_create_file("id", TRACE_MODE_READ, file->dir, (void *)(long)call->event.type, &ftrace_event_id_fops); #endif @@ -2423,22 +2424,22 @@ event_create_dir(struct dentry *parent, struct trace_event_file *file) * triggers or filters. */ if (!(call->flags & TRACE_EVENT_FL_IGNORE_ENABLE)) { - trace_create_file("filter", 0644, file->dir, file, - &ftrace_event_filter_fops); + trace_create_file("filter", TRACE_MODE_WRITE, file->dir, + file, &ftrace_event_filter_fops); - trace_create_file("trigger", 0644, file->dir, file, - &event_trigger_fops); + trace_create_file("trigger", TRACE_MODE_WRITE, file->dir, + file, &event_trigger_fops); } #ifdef CONFIG_HIST_TRIGGERS - trace_create_file("hist", 0444, file->dir, file, + trace_create_file("hist", TRACE_MODE_READ, file->dir, file, &event_hist_fops); #endif #ifdef CONFIG_HIST_TRIGGERS_DEBUG - trace_create_file("hist_debug", 0444, file->dir, file, + trace_create_file("hist_debug", TRACE_MODE_READ, file->dir, file, &event_hist_debug_fops); #endif - trace_create_file("format", 0444, file->dir, call, + trace_create_file("format", TRACE_MODE_READ, file->dir, call, &ftrace_event_format_fops); #ifdef CONFIG_TRACE_EVENT_INJECT @@ -3433,7 +3434,7 @@ create_event_toplevel_files(struct dentry *parent, struct trace_array *tr) struct dentry *d_events; struct dentry *entry; - entry = tracefs_create_file("set_event", 0644, parent, + entry = tracefs_create_file("set_event", TRACE_MODE_WRITE, parent, tr, &ftrace_set_event_fops); if (!entry) { pr_warn("Could not create tracefs 'set_event' entry\n"); @@ -3446,7 +3447,7 @@ create_event_toplevel_files(struct dentry *parent, struct trace_array *tr) return -ENOMEM; } - entry = trace_create_file("enable", 0644, d_events, + entry = trace_create_file("enable", TRACE_MODE_WRITE, d_events, tr, &ftrace_tr_enable_fops); if (!entry) { pr_warn("Could not create tracefs 'enable' entry\n"); @@ -3455,24 +3456,25 @@ create_event_toplevel_files(struct dentry *parent, struct trace_array *tr) /* There are not as crucial, just warn if they are not created */ - entry = tracefs_create_file("set_event_pid", 0644, parent, + entry = tracefs_create_file("set_event_pid", TRACE_MODE_WRITE, parent, tr, &ftrace_set_event_pid_fops); if (!entry) pr_warn("Could not create tracefs 'set_event_pid' entry\n"); - entry = tracefs_create_file("set_event_notrace_pid", 0644, parent, - tr, &ftrace_set_event_notrace_pid_fops); + entry = tracefs_create_file("set_event_notrace_pid", + TRACE_MODE_WRITE, parent, tr, + &ftrace_set_event_notrace_pid_fops); if (!entry) pr_warn("Could not create tracefs 'set_event_notrace_pid' entry\n"); /* ring buffer internal formats */ - entry = trace_create_file("header_page", 0444, d_events, + entry = trace_create_file("header_page", TRACE_MODE_READ, d_events, ring_buffer_print_page_header, &ftrace_show_header_fops); if (!entry) pr_warn("Could not create tracefs 'header_page' entry\n"); - entry = trace_create_file("header_event", 0444, d_events, + entry = trace_create_file("header_event", TRACE_MODE_READ, d_events, ring_buffer_print_entry_header, &ftrace_show_header_fops); if (!entry) @@ -3689,8 +3691,8 @@ __init int event_trace_init(void) if (!tr) return -ENODEV; - entry = tracefs_create_file("available_events", 0444, NULL, - tr, &ftrace_avail_fops); + entry = tracefs_create_file("available_events", TRACE_MODE_READ, + NULL, tr, &ftrace_avail_fops); if (!entry) pr_warn("Could not create tracefs 'available_events' entry\n"); diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c index f01e442716e2..61586f16a853 100644 --- a/kernel/trace/trace_events_hist.c +++ b/kernel/trace/trace_events_hist.c @@ -66,7 +66,9 @@ C(EMPTY_SORT_FIELD, "Empty sort field"), \ C(TOO_MANY_SORT_FIELDS, "Too many sort fields (Max = 2)"), \ C(INVALID_SORT_FIELD, "Sort field must be a key or a val"), \ - C(INVALID_STR_OPERAND, "String type can not be an operand in expression"), + C(INVALID_STR_OPERAND, "String type can not be an operand in expression"), \ + C(EXPECT_NUMBER, "Expecting numeric literal"), \ + C(UNARY_MINUS_SUBEXPR, "Unary minus not supported in sub-expressions"), #undef C #define C(a, b) HIST_ERR_##a @@ -89,12 +91,15 @@ typedef u64 (*hist_field_fn_t) (struct hist_field *field, #define HIST_FIELD_OPERANDS_MAX 2 #define HIST_FIELDS_MAX (TRACING_MAP_FIELDS_MAX + TRACING_MAP_VARS_MAX) #define HIST_ACTIONS_MAX 8 +#define HIST_CONST_DIGITS_MAX 21 enum field_op_id { FIELD_OP_NONE, FIELD_OP_PLUS, FIELD_OP_MINUS, FIELD_OP_UNARY_MINUS, + FIELD_OP_DIV, + FIELD_OP_MULT, }; /* @@ -152,6 +157,9 @@ struct hist_field { bool read_once; unsigned int var_str_idx; + + /* Numeric literals are represented as u64 */ + u64 constant; }; static u64 hist_field_none(struct hist_field *field, @@ -163,6 +171,15 @@ static u64 hist_field_none(struct hist_field *field, return 0; } +static u64 hist_field_const(struct hist_field *field, + struct tracing_map_elt *elt, + struct trace_buffer *buffer, + struct ring_buffer_event *rbe, + void *event) +{ + return field->constant; +} + static u64 hist_field_counter(struct hist_field *field, struct tracing_map_elt *elt, struct trace_buffer *buffer, @@ -271,6 +288,44 @@ static u64 hist_field_minus(struct hist_field *hist_field, return val1 - val2; } +static u64 hist_field_div(struct hist_field *hist_field, + struct tracing_map_elt *elt, + struct trace_buffer *buffer, + struct ring_buffer_event *rbe, + void *event) +{ + struct hist_field *operand1 = hist_field->operands[0]; + struct hist_field *operand2 = hist_field->operands[1]; + + u64 val1 = operand1->fn(operand1, elt, buffer, rbe, event); + u64 val2 = operand2->fn(operand2, elt, buffer, rbe, event); + + /* Return -1 for the undefined case */ + if (!val2) + return -1; + + /* Use shift if the divisor is a power of 2 */ + if (!(val2 & (val2 - 1))) + return val1 >> __ffs64(val2); + + return div64_u64(val1, val2); +} + +static u64 hist_field_mult(struct hist_field *hist_field, + struct tracing_map_elt *elt, + struct trace_buffer *buffer, + struct ring_buffer_event *rbe, + void *event) +{ + struct hist_field *operand1 = hist_field->operands[0]; + struct hist_field *operand2 = hist_field->operands[1]; + + u64 val1 = operand1->fn(operand1, elt, buffer, rbe, event); + u64 val2 = operand2->fn(operand2, elt, buffer, rbe, event); + + return val1 * val2; +} + static u64 hist_field_unary_minus(struct hist_field *hist_field, struct tracing_map_elt *elt, struct trace_buffer *buffer, @@ -341,6 +396,7 @@ enum hist_field_flags { HIST_FIELD_FL_CPU = 1 << 15, HIST_FIELD_FL_ALIAS = 1 << 16, HIST_FIELD_FL_BUCKET = 1 << 17, + HIST_FIELD_FL_CONST = 1 << 18, }; struct var_defs { @@ -1516,6 +1572,12 @@ static void expr_field_str(struct hist_field *field, char *expr) { if (field->flags & HIST_FIELD_FL_VAR_REF) strcat(expr, "$"); + else if (field->flags & HIST_FIELD_FL_CONST) { + char str[HIST_CONST_DIGITS_MAX]; + + snprintf(str, HIST_CONST_DIGITS_MAX, "%llu", field->constant); + strcat(expr, str); + } strcat(expr, hist_field_name(field, 0)); @@ -1571,6 +1633,12 @@ static char *expr_str(struct hist_field *field, unsigned int level) case FIELD_OP_PLUS: strcat(expr, "+"); break; + case FIELD_OP_DIV: + strcat(expr, "/"); + break; + case FIELD_OP_MULT: + strcat(expr, "*"); + break; default: kfree(expr); return NULL; @@ -1581,34 +1649,92 @@ static char *expr_str(struct hist_field *field, unsigned int level) return expr; } -static int contains_operator(char *str) +/* + * If field_op != FIELD_OP_NONE, *sep points to the root operator + * of the expression tree to be evaluated. + */ +static int contains_operator(char *str, char **sep) { enum field_op_id field_op = FIELD_OP_NONE; - char *op; + char *minus_op, *plus_op, *div_op, *mult_op; - op = strpbrk(str, "+-"); - if (!op) - return FIELD_OP_NONE; - switch (*op) { - case '-': + /* + * Report the last occurrence of the operators first, so that the + * expression is evaluated left to right. This is important since + * subtraction and division are not associative. + * + * e.g + * 64/8/4/2 is 1, i.e 64/8/4/2 = ((64/8)/4)/2 + * 14-7-5-2 is 0, i.e 14-7-5-2 = ((14-7)-5)-2 + */ + + /* + * First, find lower precedence addition and subtraction + * since the expression will be evaluated recursively. + */ + minus_op = strrchr(str, '-'); + if (minus_op) { /* - * Unfortunately, the modifier ".sym-offset" - * can confuse things. + * Unary minus is not supported in sub-expressions. If + * present, it is always the next root operator. */ - if (op - str >= 4 && !strncmp(op - 4, ".sym-offset", 11)) - return FIELD_OP_NONE; - - if (*str == '-') + if (minus_op == str) { field_op = FIELD_OP_UNARY_MINUS; - else - field_op = FIELD_OP_MINUS; - break; - case '+': - field_op = FIELD_OP_PLUS; - break; - default: - break; + goto out; + } + + field_op = FIELD_OP_MINUS; + } + + plus_op = strrchr(str, '+'); + if (plus_op || minus_op) { + /* + * For operators of the same precedence use to rightmost as the + * root, so that the expression is evaluated left to right. + */ + if (plus_op > minus_op) + field_op = FIELD_OP_PLUS; + goto out; + } + + /* + * Multiplication and division have higher precedence than addition and + * subtraction. + */ + div_op = strrchr(str, '/'); + if (div_op) + field_op = FIELD_OP_DIV; + + mult_op = strrchr(str, '*'); + /* + * For operators of the same precedence use to rightmost as the + * root, so that the expression is evaluated left to right. + */ + if (mult_op > div_op) + field_op = FIELD_OP_MULT; + +out: + if (sep) { + switch (field_op) { + case FIELD_OP_UNARY_MINUS: + case FIELD_OP_MINUS: + *sep = minus_op; + break; + case FIELD_OP_PLUS: + *sep = plus_op; + break; + case FIELD_OP_DIV: + *sep = div_op; + break; + case FIELD_OP_MULT: + *sep = mult_op; + break; + case FIELD_OP_NONE: + default: + *sep = NULL; + break; + } } return field_op; @@ -1689,6 +1815,15 @@ static struct hist_field *create_hist_field(struct hist_trigger_data *hist_data, goto out; } + if (flags & HIST_FIELD_FL_CONST) { + hist_field->fn = hist_field_const; + hist_field->size = sizeof(u64); + hist_field->type = kstrdup("u64", GFP_KERNEL); + if (!hist_field->type) + goto free; + goto out; + } + if (flags & HIST_FIELD_FL_STACKTRACE) { hist_field->fn = hist_field_none; goto out; @@ -1925,7 +2060,7 @@ static char *field_name_from_var(struct hist_trigger_data *hist_data, if (strcmp(var_name, name) == 0) { field = hist_data->attrs->var_defs.expr[i]; - if (contains_operator(field) || is_var_ref(field)) + if (contains_operator(field, NULL) || is_var_ref(field)) continue; return field; } @@ -2002,7 +2137,11 @@ parse_field(struct hist_trigger_data *hist_data, struct trace_event_file *file, *flags |= HIST_FIELD_FL_HEX; else if (strcmp(modifier, "sym") == 0) *flags |= HIST_FIELD_FL_SYM; - else if (strcmp(modifier, "sym-offset") == 0) + /* + * 'sym-offset' occurrences in the trigger string are modified + * to 'symXoffset' to simplify arithmetic expression parsing. + */ + else if (strcmp(modifier, "symXoffset") == 0) *flags |= HIST_FIELD_FL_SYM_OFFSET; else if ((strcmp(modifier, "execname") == 0) && (strcmp(field_name, "common_pid") == 0)) @@ -2090,6 +2229,29 @@ static struct hist_field *create_alias(struct hist_trigger_data *hist_data, return alias; } +static struct hist_field *parse_const(struct hist_trigger_data *hist_data, + char *str, char *var_name, + unsigned long *flags) +{ + struct trace_array *tr = hist_data->event_file->tr; + struct hist_field *field = NULL; + u64 constant; + + if (kstrtoull(str, 0, &constant)) { + hist_err(tr, HIST_ERR_EXPECT_NUMBER, errpos(str)); + return NULL; + } + + *flags |= HIST_FIELD_FL_CONST; + field = create_hist_field(hist_data, NULL, *flags, var_name); + if (!field) + return NULL; + + field->constant = constant; + + return field; +} + static struct hist_field *parse_atom(struct hist_trigger_data *hist_data, struct trace_event_file *file, char *str, unsigned long *flags, char *var_name) @@ -2100,6 +2262,15 @@ static struct hist_field *parse_atom(struct hist_trigger_data *hist_data, unsigned long buckets = 0; int ret = 0; + if (isdigit(str[0])) { + hist_field = parse_const(hist_data, str, var_name, flags); + if (!hist_field) { + ret = -EINVAL; + goto out; + } + return hist_field; + } + s = strchr(str, '.'); if (s) { s = strchr(++s, '.'); @@ -2156,21 +2327,24 @@ static struct hist_field *parse_atom(struct hist_trigger_data *hist_data, static struct hist_field *parse_expr(struct hist_trigger_data *hist_data, struct trace_event_file *file, char *str, unsigned long flags, - char *var_name, unsigned int level); + char *var_name, unsigned int *n_subexprs); static struct hist_field *parse_unary(struct hist_trigger_data *hist_data, struct trace_event_file *file, char *str, unsigned long flags, - char *var_name, unsigned int level) + char *var_name, unsigned int *n_subexprs) { struct hist_field *operand1, *expr = NULL; unsigned long operand_flags; int ret = 0; char *s; + /* Unary minus operator, increment n_subexprs */ + ++*n_subexprs; + /* we support only -(xxx) i.e. explicit parens required */ - if (level > 3) { + if (*n_subexprs > 3) { hist_err(file->tr, HIST_ERR_TOO_MANY_SUBEXPR, errpos(str)); ret = -EINVAL; goto free; @@ -2187,8 +2361,16 @@ static struct hist_field *parse_unary(struct hist_trigger_data *hist_data, } s = strrchr(str, ')'); - if (s) + if (s) { + /* unary minus not supported in sub-expressions */ + if (*(s+1) != '\0') { + hist_err(file->tr, HIST_ERR_UNARY_MINUS_SUBEXPR, + errpos(str)); + ret = -EINVAL; + goto free; + } *s = '\0'; + } else { ret = -EINVAL; /* no closing ')' */ goto free; @@ -2202,7 +2384,7 @@ static struct hist_field *parse_unary(struct hist_trigger_data *hist_data, } operand_flags = 0; - operand1 = parse_expr(hist_data, file, str, operand_flags, NULL, ++level); + operand1 = parse_expr(hist_data, file, str, operand_flags, NULL, n_subexprs); if (IS_ERR(operand1)) { ret = PTR_ERR(operand1); goto free; @@ -2233,9 +2415,15 @@ static struct hist_field *parse_unary(struct hist_trigger_data *hist_data, return ERR_PTR(ret); } +/* + * If the operands are var refs, return pointers the + * variable(s) referenced in var1 and var2, else NULL. + */ static int check_expr_operands(struct trace_array *tr, struct hist_field *operand1, - struct hist_field *operand2) + struct hist_field *operand2, + struct hist_field **var1, + struct hist_field **var2) { unsigned long operand1_flags = operand1->flags; unsigned long operand2_flags = operand2->flags; @@ -2248,6 +2436,7 @@ static int check_expr_operands(struct trace_array *tr, if (!var) return -EINVAL; operand1_flags = var->flags; + *var1 = var; } if ((operand2_flags & HIST_FIELD_FL_VAR_REF) || @@ -2258,6 +2447,7 @@ static int check_expr_operands(struct trace_array *tr, if (!var) return -EINVAL; operand2_flags = var->flags; + *var2 = var; } if ((operand1_flags & HIST_FIELD_FL_TIMESTAMP_USECS) != @@ -2272,44 +2462,46 @@ static int check_expr_operands(struct trace_array *tr, static struct hist_field *parse_expr(struct hist_trigger_data *hist_data, struct trace_event_file *file, char *str, unsigned long flags, - char *var_name, unsigned int level) + char *var_name, unsigned int *n_subexprs) { struct hist_field *operand1 = NULL, *operand2 = NULL, *expr = NULL; - unsigned long operand_flags; + struct hist_field *var1 = NULL, *var2 = NULL; + unsigned long operand_flags, operand2_flags; int field_op, ret = -EINVAL; char *sep, *operand1_str; + hist_field_fn_t op_fn; + bool combine_consts; - if (level > 3) { + if (*n_subexprs > 3) { hist_err(file->tr, HIST_ERR_TOO_MANY_SUBEXPR, errpos(str)); return ERR_PTR(-EINVAL); } - field_op = contains_operator(str); + field_op = contains_operator(str, &sep); if (field_op == FIELD_OP_NONE) return parse_atom(hist_data, file, str, &flags, var_name); if (field_op == FIELD_OP_UNARY_MINUS) - return parse_unary(hist_data, file, str, flags, var_name, ++level); + return parse_unary(hist_data, file, str, flags, var_name, n_subexprs); - switch (field_op) { - case FIELD_OP_MINUS: - sep = "-"; - break; - case FIELD_OP_PLUS: - sep = "+"; - break; - default: + /* Binary operator found, increment n_subexprs */ + ++*n_subexprs; + + /* Split the expression string at the root operator */ + if (!sep) goto free; - } + *sep = '\0'; + operand1_str = str; + str = sep+1; - operand1_str = strsep(&str, sep); if (!operand1_str || !str) goto free; operand_flags = 0; - operand1 = parse_atom(hist_data, file, operand1_str, - &operand_flags, NULL); + + /* LHS of string is an expression e.g. a+b in a+b+c */ + operand1 = parse_expr(hist_data, file, operand1_str, operand_flags, NULL, n_subexprs); if (IS_ERR(operand1)) { ret = PTR_ERR(operand1); operand1 = NULL; @@ -2321,9 +2513,9 @@ static struct hist_field *parse_expr(struct hist_trigger_data *hist_data, goto free; } - /* rest of string could be another expression e.g. b+c in a+b+c */ + /* RHS of string is another expression e.g. c in a+b+c */ operand_flags = 0; - operand2 = parse_expr(hist_data, file, str, operand_flags, NULL, ++level); + operand2 = parse_expr(hist_data, file, str, operand_flags, NULL, n_subexprs); if (IS_ERR(operand2)) { ret = PTR_ERR(operand2); operand2 = NULL; @@ -2335,11 +2527,38 @@ static struct hist_field *parse_expr(struct hist_trigger_data *hist_data, goto free; } - ret = check_expr_operands(file->tr, operand1, operand2); + switch (field_op) { + case FIELD_OP_MINUS: + op_fn = hist_field_minus; + break; + case FIELD_OP_PLUS: + op_fn = hist_field_plus; + break; + case FIELD_OP_DIV: + op_fn = hist_field_div; + break; + case FIELD_OP_MULT: + op_fn = hist_field_mult; + break; + default: + ret = -EINVAL; + goto free; + } + + ret = check_expr_operands(file->tr, operand1, operand2, &var1, &var2); if (ret) goto free; - flags |= HIST_FIELD_FL_EXPR; + operand_flags = var1 ? var1->flags : operand1->flags; + operand2_flags = var2 ? var2->flags : operand2->flags; + + /* + * If both operands are constant, the expression can be + * collapsed to a single constant. + */ + combine_consts = operand_flags & operand2_flags & HIST_FIELD_FL_CONST; + + flags |= combine_consts ? HIST_FIELD_FL_CONST : HIST_FIELD_FL_EXPR; flags |= operand1->flags & (HIST_FIELD_FL_TIMESTAMP | HIST_FIELD_FL_TIMESTAMP_USECS); @@ -2356,31 +2575,43 @@ static struct hist_field *parse_expr(struct hist_trigger_data *hist_data, expr->operands[0] = operand1; expr->operands[1] = operand2; - /* The operand sizes should be the same, so just pick one */ - expr->size = operand1->size; + if (combine_consts) { + if (var1) + expr->operands[0] = var1; + if (var2) + expr->operands[1] = var2; - expr->operator = field_op; - expr->name = expr_str(expr, 0); - expr->type = kstrdup_const(operand1->type, GFP_KERNEL); - if (!expr->type) { - ret = -ENOMEM; - goto free; - } + expr->constant = op_fn(expr, NULL, NULL, NULL, NULL); - switch (field_op) { - case FIELD_OP_MINUS: - expr->fn = hist_field_minus; - break; - case FIELD_OP_PLUS: - expr->fn = hist_field_plus; - break; - default: - ret = -EINVAL; - goto free; + expr->operands[0] = NULL; + expr->operands[1] = NULL; + + /* + * var refs won't be destroyed immediately + * See: destroy_hist_field() + */ + destroy_hist_field(operand2, 0); + destroy_hist_field(operand1, 0); + + expr->name = expr_str(expr, 0); + } else { + expr->fn = op_fn; + + /* The operand sizes should be the same, so just pick one */ + expr->size = operand1->size; + + expr->operator = field_op; + expr->type = kstrdup_const(operand1->type, GFP_KERNEL); + if (!expr->type) { + ret = -ENOMEM; + goto free; + } + + expr->name = expr_str(expr, 0); } return expr; - free: +free: destroy_hist_field(operand1, 0); destroy_hist_field(operand2, 0); destroy_hist_field(expr, 0); @@ -3751,9 +3982,9 @@ static int __create_val_field(struct hist_trigger_data *hist_data, unsigned long flags) { struct hist_field *hist_field; - int ret = 0; + int ret = 0, n_subexprs = 0; - hist_field = parse_expr(hist_data, file, field_str, flags, var_name, 0); + hist_field = parse_expr(hist_data, file, field_str, flags, var_name, &n_subexprs); if (IS_ERR(hist_field)) { ret = PTR_ERR(hist_field); goto out; @@ -3894,7 +4125,7 @@ static int create_key_field(struct hist_trigger_data *hist_data, struct hist_field *hist_field = NULL; unsigned long flags = 0; unsigned int key_size; - int ret = 0; + int ret = 0, n_subexprs = 0; if (WARN_ON(key_idx >= HIST_FIELDS_MAX)) return -EINVAL; @@ -3907,7 +4138,7 @@ static int create_key_field(struct hist_trigger_data *hist_data, hist_field = create_hist_field(hist_data, NULL, flags, NULL); } else { hist_field = parse_expr(hist_data, file, field_str, flags, - NULL, 0); + NULL, &n_subexprs); if (IS_ERR(hist_field)) { ret = PTR_ERR(hist_field); goto out; @@ -4706,7 +4937,6 @@ static void hist_trigger_stacktrace_print(struct seq_file *m, unsigned long *stacktrace_entries, unsigned int max_entries) { - char str[KSYM_SYMBOL_LEN]; unsigned int spaces = 8; unsigned int i; @@ -4715,8 +4945,7 @@ static void hist_trigger_stacktrace_print(struct seq_file *m, return; seq_printf(m, "%*c", 1 + spaces, ' '); - sprint_symbol(str, stacktrace_entries[i]); - seq_printf(m, "%s\n", str); + seq_printf(m, "%pS\n", (void*)stacktrace_entries[i]); } } @@ -4726,7 +4955,6 @@ static void hist_trigger_print_key(struct seq_file *m, struct tracing_map_elt *elt) { struct hist_field *key_field; - char str[KSYM_SYMBOL_LEN]; bool multiline = false; const char *field_name; unsigned int i; @@ -4747,14 +4975,12 @@ static void hist_trigger_print_key(struct seq_file *m, seq_printf(m, "%s: %llx", field_name, uval); } else if (key_field->flags & HIST_FIELD_FL_SYM) { uval = *(u64 *)(key + key_field->offset); - sprint_symbol_no_offset(str, uval); - seq_printf(m, "%s: [%llx] %-45s", field_name, - uval, str); + seq_printf(m, "%s: [%llx] %-45ps", field_name, + uval, (void *)(uintptr_t)uval); } else if (key_field->flags & HIST_FIELD_FL_SYM_OFFSET) { uval = *(u64 *)(key + key_field->offset); - sprint_symbol(str, uval); - seq_printf(m, "%s: [%llx] %-55s", field_name, - uval, str); + seq_printf(m, "%s: [%llx] %-55pS", field_name, + uval, (void *)(uintptr_t)uval); } else if (key_field->flags & HIST_FIELD_FL_EXECNAME) { struct hist_elt_data *elt_data = elt->private_data; char *comm; @@ -4950,6 +5176,8 @@ static void hist_field_debug_show_flags(struct seq_file *m, if (flags & HIST_FIELD_FL_ALIAS) seq_puts(m, " HIST_FIELD_FL_ALIAS\n"); + else if (flags & HIST_FIELD_FL_CONST) + seq_puts(m, " HIST_FIELD_FL_CONST\n"); } static int hist_field_debug_show(struct seq_file *m, @@ -4971,6 +5199,9 @@ static int hist_field_debug_show(struct seq_file *m, field->var.idx); } + if (field->flags & HIST_FIELD_FL_CONST) + seq_printf(m, " constant: %llu\n", field->constant); + if (field->flags & HIST_FIELD_FL_ALIAS) seq_printf(m, " var_ref_idx (into hist_data->var_refs[]): %u\n", field->var_ref_idx); @@ -5213,6 +5444,8 @@ static void hist_field_print(struct seq_file *m, struct hist_field *hist_field) if (hist_field->flags & HIST_FIELD_FL_CPU) seq_puts(m, "common_cpu"); + else if (hist_field->flags & HIST_FIELD_FL_CONST) + seq_printf(m, "%llu", hist_field->constant); else if (field_name) { if (hist_field->flags & HIST_FIELD_FL_VAR_REF || hist_field->flags & HIST_FIELD_FL_ALIAS) @@ -5795,7 +6028,7 @@ static int event_hist_trigger_func(struct event_command *cmd_ops, struct synth_event *se; const char *se_name; bool remove = false; - char *trigger, *p; + char *trigger, *p, *start; int ret = 0; lockdep_assert_held(&event_mutex); @@ -5843,6 +6076,16 @@ static int event_hist_trigger_func(struct event_command *cmd_ops, trigger = strstrip(trigger); } + /* + * To simplify arithmetic expression parsing, replace occurrences of + * '.sym-offset' modifier with '.symXoffset' + */ + start = strstr(trigger, ".sym-offset"); + while (start) { + *(start + 4) = 'X'; + start = strstr(start + 11, ".sym-offset"); + } + attrs = parse_hist_trigger_attrs(file->tr, trigger); if (IS_ERR(attrs)) return PTR_ERR(attrs); diff --git a/kernel/trace/trace_events_synth.c b/kernel/trace/trace_events_synth.c index d54094b7a9d7..22db3ce95e74 100644 --- a/kernel/trace/trace_events_synth.c +++ b/kernel/trace/trace_events_synth.c @@ -2227,8 +2227,8 @@ static __init int trace_events_synth_init(void) if (err) goto err; - entry = tracefs_create_file("synthetic_events", 0644, NULL, - NULL, &synth_events_fops); + entry = tracefs_create_file("synthetic_events", TRACE_MODE_WRITE, + NULL, NULL, &synth_events_fops); if (!entry) { err = -ENODEV; goto err; diff --git a/kernel/trace/trace_functions.c b/kernel/trace/trace_functions.c index 1f0e63f5d1f9..9f1bfbe105e8 100644 --- a/kernel/trace/trace_functions.c +++ b/kernel/trace/trace_functions.c @@ -186,7 +186,6 @@ function_trace_call(unsigned long ip, unsigned long parent_ip, return; trace_ctx = tracing_gen_ctx(); - preempt_disable_notrace(); cpu = smp_processor_id(); data = per_cpu_ptr(tr->array_buffer.data, cpu); @@ -194,7 +193,6 @@ function_trace_call(unsigned long ip, unsigned long parent_ip, trace_function(tr, ip, parent_ip, trace_ctx); ftrace_test_recursion_unlock(bit); - preempt_enable_notrace(); } #ifdef CONFIG_UNWINDER_ORC @@ -298,8 +296,6 @@ function_no_repeats_trace_call(unsigned long ip, unsigned long parent_ip, if (bit < 0) return; - preempt_disable_notrace(); - cpu = smp_processor_id(); data = per_cpu_ptr(tr->array_buffer.data, cpu); if (atomic_read(&data->disabled)) @@ -324,7 +320,6 @@ function_no_repeats_trace_call(unsigned long ip, unsigned long parent_ip, out: ftrace_test_recursion_unlock(bit); - preempt_enable_notrace(); } static void diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c index 0de6837722da..203204cadf92 100644 --- a/kernel/trace/trace_functions_graph.c +++ b/kernel/trace/trace_functions_graph.c @@ -120,7 +120,7 @@ static inline int ftrace_graph_ignore_irqs(void) if (!ftrace_graph_skip_irqs || trace_recursion_test(TRACE_IRQ_BIT)) return 0; - return in_irq(); + return in_hardirq(); } int trace_graph_entry(struct ftrace_graph_ent *trace) @@ -1340,7 +1340,7 @@ static __init int init_graph_tracefs(void) if (ret) return 0; - trace_create_file("max_graph_depth", 0644, NULL, + trace_create_file("max_graph_depth", TRACE_MODE_WRITE, NULL, NULL, &graph_depth_fops); return 0; diff --git a/kernel/trace/trace_hwlat.c b/kernel/trace/trace_hwlat.c index 1b83d75eb103..56bb7b890578 100644 --- a/kernel/trace/trace_hwlat.c +++ b/kernel/trace/trace_hwlat.c @@ -79,8 +79,8 @@ struct hwlat_kthread_data { int nmi_cpu; }; -struct hwlat_kthread_data hwlat_single_cpu_data; -DEFINE_PER_CPU(struct hwlat_kthread_data, hwlat_per_cpu_data); +static struct hwlat_kthread_data hwlat_single_cpu_data; +static DEFINE_PER_CPU(struct hwlat_kthread_data, hwlat_per_cpu_data); /* Tells NMIs to call back to the hwlat tracer to record timestamps */ bool trace_hwlat_callback_enabled; @@ -782,21 +782,21 @@ static int init_tracefs(void) if (!top_dir) return -ENOMEM; - hwlat_sample_window = tracefs_create_file("window", 0640, + hwlat_sample_window = tracefs_create_file("window", TRACE_MODE_WRITE, top_dir, &hwlat_window, &trace_min_max_fops); if (!hwlat_sample_window) goto err; - hwlat_sample_width = tracefs_create_file("width", 0644, + hwlat_sample_width = tracefs_create_file("width", TRACE_MODE_WRITE, top_dir, &hwlat_width, &trace_min_max_fops); if (!hwlat_sample_width) goto err; - hwlat_thread_mode = trace_create_file("mode", 0644, + hwlat_thread_mode = trace_create_file("mode", TRACE_MODE_WRITE, top_dir, NULL, &thread_mode_fops); diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index 3a64ba4bbad6..33272a7b6912 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -97,7 +97,7 @@ static nokprobe_inline unsigned long trace_kprobe_offset(struct trace_kprobe *tk static nokprobe_inline bool trace_kprobe_has_gone(struct trace_kprobe *tk) { - return !!(kprobe_gone(&tk->rp.kp)); + return kprobe_gone(&tk->rp.kp); } static nokprobe_inline bool trace_kprobe_within_module(struct trace_kprobe *tk, @@ -1925,16 +1925,16 @@ static __init int init_kprobe_trace(void) if (ret) return 0; - entry = tracefs_create_file("kprobe_events", 0644, NULL, - NULL, &kprobe_events_ops); + entry = tracefs_create_file("kprobe_events", TRACE_MODE_WRITE, + NULL, NULL, &kprobe_events_ops); /* Event list interface */ if (!entry) pr_warn("Could not create tracefs 'kprobe_events' entry\n"); /* Profile interface */ - entry = tracefs_create_file("kprobe_profile", 0444, NULL, - NULL, &kprobe_profile_ops); + entry = tracefs_create_file("kprobe_profile", TRACE_MODE_READ, + NULL, NULL, &kprobe_profile_ops); if (!entry) pr_warn("Could not create tracefs 'kprobe_profile' entry\n"); diff --git a/kernel/trace/trace_osnoise.c b/kernel/trace/trace_osnoise.c index ce053619f289..d11b41784fac 100644 --- a/kernel/trace/trace_osnoise.c +++ b/kernel/trace/trace_osnoise.c @@ -294,19 +294,19 @@ static void print_osnoise_headers(struct seq_file *s) seq_puts(s, "# _-----=> irqs-off\n"); seq_puts(s, "# / _----=> need-resched\n"); seq_puts(s, "# | / _---=> hardirq/softirq\n"); - seq_puts(s, "# || / _--=> preempt-depth "); - seq_puts(s, " MAX\n"); - - seq_puts(s, "# || / "); + seq_puts(s, "# || / _--=> preempt-depth\n"); + seq_puts(s, "# ||| / _-=> migrate-disable "); + seq_puts(s, " MAX\n"); + seq_puts(s, "# |||| / delay "); seq_puts(s, " SINGLE Interference counters:\n"); - seq_puts(s, "# |||| RUNTIME "); + seq_puts(s, "# ||||| RUNTIME "); seq_puts(s, " NOISE %% OF CPU NOISE +-----------------------------+\n"); - seq_puts(s, "# TASK-PID CPU# |||| TIMESTAMP IN US "); + seq_puts(s, "# TASK-PID CPU# ||||| TIMESTAMP IN US "); seq_puts(s, " IN US AVAILABLE IN US HW NMI IRQ SIRQ THREAD\n"); - seq_puts(s, "# | | | |||| | | "); + seq_puts(s, "# | | | ||||| | | "); seq_puts(s, " | | | | | | | |\n"); } #endif /* CONFIG_PREEMPT_RT */ @@ -378,11 +378,12 @@ static void print_timerlat_headers(struct seq_file *s) seq_puts(s, "# / _----=> need-resched\n"); seq_puts(s, "# | / _---=> hardirq/softirq\n"); seq_puts(s, "# || / _--=> preempt-depth\n"); - seq_puts(s, "# || /\n"); - seq_puts(s, "# |||| ACTIVATION\n"); - seq_puts(s, "# TASK-PID CPU# |||| TIMESTAMP ID "); - seq_puts(s, " CONTEXT LATENCY\n"); - seq_puts(s, "# | | | |||| | | "); + seq_puts(s, "# ||| / _-=> migrate-disable\n"); + seq_puts(s, "# |||| / delay\n"); + seq_puts(s, "# ||||| ACTIVATION\n"); + seq_puts(s, "# TASK-PID CPU# ||||| TIMESTAMP ID "); + seq_puts(s, " CONTEXT LATENCY\n"); + seq_puts(s, "# | | | ||||| | | "); seq_puts(s, " | |\n"); } #endif /* CONFIG_PREEMPT_RT */ @@ -1856,38 +1857,38 @@ static int init_tracefs(void) if (!top_dir) return 0; - tmp = tracefs_create_file("period_us", 0640, top_dir, + tmp = tracefs_create_file("period_us", TRACE_MODE_WRITE, top_dir, &osnoise_period, &trace_min_max_fops); if (!tmp) goto err; - tmp = tracefs_create_file("runtime_us", 0644, top_dir, + tmp = tracefs_create_file("runtime_us", TRACE_MODE_WRITE, top_dir, &osnoise_runtime, &trace_min_max_fops); if (!tmp) goto err; - tmp = tracefs_create_file("stop_tracing_us", 0640, top_dir, + tmp = tracefs_create_file("stop_tracing_us", TRACE_MODE_WRITE, top_dir, &osnoise_stop_tracing_in, &trace_min_max_fops); if (!tmp) goto err; - tmp = tracefs_create_file("stop_tracing_total_us", 0640, top_dir, + tmp = tracefs_create_file("stop_tracing_total_us", TRACE_MODE_WRITE, top_dir, &osnoise_stop_tracing_total, &trace_min_max_fops); if (!tmp) goto err; - tmp = trace_create_file("cpus", 0644, top_dir, NULL, &cpus_fops); + tmp = trace_create_file("cpus", TRACE_MODE_WRITE, top_dir, NULL, &cpus_fops); if (!tmp) goto err; #ifdef CONFIG_TIMERLAT_TRACER #ifdef CONFIG_STACKTRACE - tmp = tracefs_create_file("print_stack", 0640, top_dir, + tmp = tracefs_create_file("print_stack", TRACE_MODE_WRITE, top_dir, &osnoise_print_stack, &trace_min_max_fops); if (!tmp) goto err; #endif - tmp = tracefs_create_file("timerlat_period_us", 0640, top_dir, + tmp = tracefs_create_file("timerlat_period_us", TRACE_MODE_WRITE, top_dir, &timerlat_period, &trace_min_max_fops); if (!tmp) goto err; diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c index c2ca40e8595b..3547e7176ff7 100644 --- a/kernel/trace/trace_output.c +++ b/kernel/trace/trace_output.c @@ -8,6 +8,7 @@ #include <linux/module.h> #include <linux/mutex.h> #include <linux/ftrace.h> +#include <linux/kprobes.h> #include <linux/sched/clock.h> #include <linux/sched/mm.h> @@ -346,22 +347,12 @@ int trace_output_call(struct trace_iterator *iter, char *name, char *fmt, ...) } EXPORT_SYMBOL_GPL(trace_output_call); -#ifdef CONFIG_KRETPROBES -static inline const char *kretprobed(const char *name) +static inline const char *kretprobed(const char *name, unsigned long addr) { - static const char tramp_name[] = "kretprobe_trampoline"; - int size = sizeof(tramp_name); - - if (strncmp(tramp_name, name, size) == 0) + if (is_kretprobe_trampoline(addr)) return "[unknown/kretprobe'd]"; return name; } -#else -static inline const char *kretprobed(const char *name) -{ - return name; -} -#endif /* CONFIG_KRETPROBES */ void trace_seq_print_sym(struct trace_seq *s, unsigned long address, bool offset) @@ -374,7 +365,7 @@ trace_seq_print_sym(struct trace_seq *s, unsigned long address, bool offset) sprint_symbol(str, address); else kallsyms_lookup(address, NULL, NULL, NULL, str); - name = kretprobed(str); + name = kretprobed(str, address); if (name && strlen(name)) { trace_seq_puts(s, name); diff --git a/kernel/trace/trace_printk.c b/kernel/trace/trace_printk.c index 4b320fe7df70..29f6e95439b6 100644 --- a/kernel/trace/trace_printk.c +++ b/kernel/trace/trace_printk.c @@ -384,7 +384,7 @@ static __init int init_trace_printk_function_export(void) if (ret) return 0; - trace_create_file("printk_formats", 0444, NULL, + trace_create_file("printk_formats", TRACE_MODE_READ, NULL, NULL, &ftrace_formats_fops); return 0; diff --git a/kernel/trace/trace_recursion_record.c b/kernel/trace/trace_recursion_record.c index b2edac1fe156..4d4b78c8ca25 100644 --- a/kernel/trace/trace_recursion_record.c +++ b/kernel/trace/trace_recursion_record.c @@ -226,8 +226,8 @@ __init static int create_recursed_functions(void) { struct dentry *dentry; - dentry = trace_create_file("recursed_functions", 0644, NULL, NULL, - &recursed_functions_fops); + dentry = trace_create_file("recursed_functions", TRACE_MODE_WRITE, + NULL, NULL, &recursed_functions_fops); if (!dentry) pr_warn("WARNING: Failed to create recursed_functions\n"); return 0; diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c index adf7ef194005..afd937a46496 100644 --- a/kernel/trace/trace_selftest.c +++ b/kernel/trace/trace_selftest.c @@ -287,6 +287,40 @@ static int trace_selftest_ops(struct trace_array *tr, int cnt) if (trace_selftest_test_probe3_cnt != 4) goto out_free; + /* Remove trace function from probe 3 */ + func1_name = "!" __stringify(DYN_FTRACE_TEST_NAME); + len1 = strlen(func1_name); + + ftrace_set_filter(&test_probe3, func1_name, len1, 0); + + DYN_FTRACE_TEST_NAME(); + + print_counts(); + + if (trace_selftest_test_probe1_cnt != 3) + goto out_free; + if (trace_selftest_test_probe2_cnt != 2) + goto out_free; + if (trace_selftest_test_probe3_cnt != 4) + goto out_free; + if (cnt > 1) { + if (trace_selftest_test_global_cnt == 0) + goto out_free; + } + if (trace_selftest_test_dyn_cnt == 0) + goto out_free; + + DYN_FTRACE_TEST_NAME2(); + + print_counts(); + + if (trace_selftest_test_probe1_cnt != 3) + goto out_free; + if (trace_selftest_test_probe2_cnt != 3) + goto out_free; + if (trace_selftest_test_probe3_cnt != 5) + goto out_free; + ret = 0; out_free: unregister_ftrace_function(dyn_ops); @@ -750,6 +784,12 @@ static struct fgraph_ops fgraph_ops __initdata = { .retfunc = &trace_graph_return, }; +#if defined(CONFIG_DYNAMIC_FTRACE) && \ + defined(CONFIG_HAVE_DYNAMIC_FTRACE_WITH_ARGS) +#define TEST_DIRECT_TRAMP +noinline __noclone static void trace_direct_tramp(void) { } +#endif + /* * Pretty much the same than for the function tracer from which the selftest * has been borrowed. @@ -760,6 +800,7 @@ trace_selftest_startup_function_graph(struct tracer *trace, { int ret; unsigned long count; + char *func_name __maybe_unused; #ifdef CONFIG_DYNAMIC_FTRACE if (ftrace_filter_param) { @@ -808,8 +849,57 @@ trace_selftest_startup_function_graph(struct tracer *trace, goto out; } - /* Don't test dynamic tracing, the function tracer already did */ +#ifdef TEST_DIRECT_TRAMP + tracing_reset_online_cpus(&tr->array_buffer); + set_graph_array(tr); + + /* + * Some archs *cough*PowerPC*cough* add characters to the + * start of the function names. We simply put a '*' to + * accommodate them. + */ + func_name = "*" __stringify(DYN_FTRACE_TEST_NAME); + ftrace_set_global_filter(func_name, strlen(func_name), 1); + + /* + * Register direct function together with graph tracer + * and make sure we get graph trace. + */ + ret = register_ftrace_direct((unsigned long) DYN_FTRACE_TEST_NAME, + (unsigned long) trace_direct_tramp); + if (ret) + goto out; + + ret = register_ftrace_graph(&fgraph_ops); + if (ret) { + warn_failed_init_tracer(trace, ret); + goto out; + } + + DYN_FTRACE_TEST_NAME(); + + count = 0; + tracing_stop(); + /* check the trace buffer */ + ret = trace_test_buffer(&tr->array_buffer, &count); + + unregister_ftrace_graph(&fgraph_ops); + + ret = unregister_ftrace_direct((unsigned long) DYN_FTRACE_TEST_NAME, + (unsigned long) trace_direct_tramp); + if (ret) + goto out; + + tracing_start(); + + if (!ret && !count) { + ret = -1; + goto out; + } +#endif + + /* Don't test dynamic tracing, the function tracer already did */ out: /* Stop it if we failed */ if (ret) diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c index 63c285042051..5a48dba912ea 100644 --- a/kernel/trace/trace_stack.c +++ b/kernel/trace/trace_stack.c @@ -559,14 +559,14 @@ static __init int stack_trace_init(void) if (ret) return 0; - trace_create_file("stack_max_size", 0644, NULL, + trace_create_file("stack_max_size", TRACE_MODE_WRITE, NULL, &stack_trace_max_size, &stack_max_size_fops); - trace_create_file("stack_trace", 0444, NULL, + trace_create_file("stack_trace", TRACE_MODE_READ, NULL, NULL, &stack_trace_fops); #ifdef CONFIG_DYNAMIC_FTRACE - trace_create_file("stack_trace_filter", 0644, NULL, + trace_create_file("stack_trace_filter", TRACE_MODE_WRITE, NULL, &trace_ops, &stack_trace_filter_fops); #endif diff --git a/kernel/trace/trace_stat.c b/kernel/trace/trace_stat.c index 8d141c3825a9..bb247beec447 100644 --- a/kernel/trace/trace_stat.c +++ b/kernel/trace/trace_stat.c @@ -297,9 +297,9 @@ static int init_stat_file(struct stat_session *session) if (!stat_dir && (ret = tracing_stat_init())) return ret; - session->file = tracefs_create_file(session->ts->name, 0644, - stat_dir, - session, &tracing_stat_fops); + session->file = tracefs_create_file(session->ts->name, TRACE_MODE_WRITE, + stat_dir, session, + &tracing_stat_fops); if (!session->file) return -ENOMEM; return 0; diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c index 225ce569bf8f..0a5c0db3137e 100644 --- a/kernel/trace/trace_uprobe.c +++ b/kernel/trace/trace_uprobe.c @@ -1655,10 +1655,10 @@ static __init int init_uprobe_trace(void) if (ret) return 0; - trace_create_file("uprobe_events", 0644, NULL, + trace_create_file("uprobe_events", TRACE_MODE_WRITE, NULL, NULL, &uprobe_events_ops); /* Profile interface */ - trace_create_file("uprobe_profile", 0444, NULL, + trace_create_file("uprobe_profile", TRACE_MODE_READ, NULL, NULL, &uprobe_profile_ops); return 0; } diff --git a/kernel/trace/tracing_map.c b/kernel/trace/tracing_map.c index d6bddb157ef2..39bb56d2dcbe 100644 --- a/kernel/trace/tracing_map.c +++ b/kernel/trace/tracing_map.c @@ -834,29 +834,35 @@ int tracing_map_init(struct tracing_map *map) return err; } -static int cmp_entries_dup(const struct tracing_map_sort_entry **a, - const struct tracing_map_sort_entry **b) +static int cmp_entries_dup(const void *A, const void *B) { + const struct tracing_map_sort_entry *a, *b; int ret = 0; - if (memcmp((*a)->key, (*b)->key, (*a)->elt->map->key_size)) + a = *(const struct tracing_map_sort_entry **)A; + b = *(const struct tracing_map_sort_entry **)B; + + if (memcmp(a->key, b->key, a->elt->map->key_size)) ret = 1; return ret; } -static int cmp_entries_sum(const struct tracing_map_sort_entry **a, - const struct tracing_map_sort_entry **b) +static int cmp_entries_sum(const void *A, const void *B) { const struct tracing_map_elt *elt_a, *elt_b; + const struct tracing_map_sort_entry *a, *b; struct tracing_map_sort_key *sort_key; struct tracing_map_field *field; tracing_map_cmp_fn_t cmp_fn; void *val_a, *val_b; int ret = 0; - elt_a = (*a)->elt; - elt_b = (*b)->elt; + a = *(const struct tracing_map_sort_entry **)A; + b = *(const struct tracing_map_sort_entry **)B; + + elt_a = a->elt; + elt_b = b->elt; sort_key = &elt_a->map->sort_key; @@ -873,18 +879,21 @@ static int cmp_entries_sum(const struct tracing_map_sort_entry **a, return ret; } -static int cmp_entries_key(const struct tracing_map_sort_entry **a, - const struct tracing_map_sort_entry **b) +static int cmp_entries_key(const void *A, const void *B) { const struct tracing_map_elt *elt_a, *elt_b; + const struct tracing_map_sort_entry *a, *b; struct tracing_map_sort_key *sort_key; struct tracing_map_field *field; tracing_map_cmp_fn_t cmp_fn; void *val_a, *val_b; int ret = 0; - elt_a = (*a)->elt; - elt_b = (*b)->elt; + a = *(const struct tracing_map_sort_entry **)A; + b = *(const struct tracing_map_sort_entry **)B; + + elt_a = a->elt; + elt_b = b->elt; sort_key = &elt_a->map->sort_key; @@ -989,10 +998,8 @@ static void sort_secondary(struct tracing_map *map, struct tracing_map_sort_key *primary_key, struct tracing_map_sort_key *secondary_key) { - int (*primary_fn)(const struct tracing_map_sort_entry **, - const struct tracing_map_sort_entry **); - int (*secondary_fn)(const struct tracing_map_sort_entry **, - const struct tracing_map_sort_entry **); + int (*primary_fn)(const void *, const void *); + int (*secondary_fn)(const void *, const void *); unsigned i, start = 0, n_sub = 1; if (is_key(map, primary_key->field_idx)) @@ -1061,8 +1068,7 @@ int tracing_map_sort_entries(struct tracing_map *map, unsigned int n_sort_keys, struct tracing_map_sort_entry ***sort_entries) { - int (*cmp_entries_fn)(const struct tracing_map_sort_entry **, - const struct tracing_map_sort_entry **); + int (*cmp_entries_fn)(const void *, const void *); struct tracing_map_sort_entry *sort_entry, **entries; int i, n_entries, ret; diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 1b99aead800c..6fdbf9613aec 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -2080,9 +2080,10 @@ config TEST_DIV64 If unsure, say N. config KPROBES_SANITY_TEST - bool "Kprobes sanity tests" + tristate "Kprobes sanity tests" depends on DEBUG_KERNEL depends on KPROBES + depends on KUNIT help This option provides for testing basic kprobes functionality on boot. Samples of kprobe and kretprobe are inserted and diff --git a/lib/Makefile b/lib/Makefile index b0cb89451cd3..364c23f15578 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -100,6 +100,7 @@ obj-$(CONFIG_TEST_MEMINIT) += test_meminit.o obj-$(CONFIG_TEST_LOCKUP) += test_lockup.o obj-$(CONFIG_TEST_HMM) += test_hmm.o obj-$(CONFIG_TEST_FREE_PAGES) += test_free_pages.o +obj-$(CONFIG_KPROBES_SANITY_TEST) += test_kprobes.o # # CFLAGS for compiling floating point code inside the kernel. x86/Makefile turns diff --git a/lib/bootconfig.c b/lib/bootconfig.c index 5ae248b29373..70e0d52ffd24 100644 --- a/lib/bootconfig.c +++ b/lib/bootconfig.c @@ -4,16 +4,24 @@ * Masami Hiramatsu <mhiramat@kernel.org> */ -#define pr_fmt(fmt) "bootconfig: " fmt - +#ifdef __KERNEL__ #include <linux/bootconfig.h> #include <linux/bug.h> #include <linux/ctype.h> #include <linux/errno.h> #include <linux/kernel.h> #include <linux/memblock.h> -#include <linux/printk.h> #include <linux/string.h> +#else /* !__KERNEL__ */ +/* + * NOTE: This is only for tools/bootconfig, because tools/bootconfig will + * run the parser sanity test. + * This does NOT mean lib/bootconfig.c is available in the user space. + * However, if you change this file, please make sure the tools/bootconfig + * has no issue on building and running. + */ +#include <linux/bootconfig.h> +#endif /* * Extra Boot Config (XBC) is given as tree-structured ascii text of @@ -34,6 +42,50 @@ static int xbc_err_pos __initdata; static int open_brace[XBC_DEPTH_MAX] __initdata; static int brace_index __initdata; +#ifdef __KERNEL__ +static inline void * __init xbc_alloc_mem(size_t size) +{ + return memblock_alloc(size, SMP_CACHE_BYTES); +} + +static inline void __init xbc_free_mem(void *addr, size_t size) +{ + memblock_free_ptr(addr, size); +} + +#else /* !__KERNEL__ */ + +static inline void *xbc_alloc_mem(size_t size) +{ + return malloc(size); +} + +static inline void xbc_free_mem(void *addr, size_t size) +{ + free(addr); +} +#endif +/** + * xbc_get_info() - Get the information of loaded boot config + * @node_size: A pointer to store the number of nodes. + * @data_size: A pointer to store the size of bootconfig data. + * + * Get the number of used nodes in @node_size if it is not NULL, + * and the size of bootconfig data in @data_size if it is not NULL. + * Return 0 if the boot config is initialized, or return -ENODEV. + */ +int __init xbc_get_info(int *node_size, size_t *data_size) +{ + if (!xbc_data) + return -ENODEV; + + if (node_size) + *node_size = xbc_node_num; + if (data_size) + *data_size = xbc_data_size; + return 0; +} + static int __init xbc_parse_error(const char *msg, const char *p) { xbc_err_msg = msg; @@ -226,7 +278,7 @@ int __init xbc_node_compose_key_after(struct xbc_node *root, struct xbc_node *node, char *buf, size_t size) { - u16 keys[XBC_DEPTH_MAX]; + uint16_t keys[XBC_DEPTH_MAX]; int depth = 0, ret = 0, total = 0; if (!node || node == root) @@ -341,21 +393,21 @@ const char * __init xbc_node_find_next_key_value(struct xbc_node *root, /* XBC parse and tree build */ -static int __init xbc_init_node(struct xbc_node *node, char *data, u32 flag) +static int __init xbc_init_node(struct xbc_node *node, char *data, uint32_t flag) { unsigned long offset = data - xbc_data; if (WARN_ON(offset >= XBC_DATA_MAX)) return -EINVAL; - node->data = (u16)offset | flag; + node->data = (uint16_t)offset | flag; node->child = 0; node->next = 0; return 0; } -static struct xbc_node * __init xbc_add_node(char *data, u32 flag) +static struct xbc_node * __init xbc_add_node(char *data, uint32_t flag) { struct xbc_node *node; @@ -385,7 +437,7 @@ static inline __init struct xbc_node *xbc_last_child(struct xbc_node *node) return node; } -static struct xbc_node * __init __xbc_add_sibling(char *data, u32 flag, bool head) +static struct xbc_node * __init __xbc_add_sibling(char *data, uint32_t flag, bool head) { struct xbc_node *sib, *node = xbc_add_node(data, flag); @@ -412,17 +464,17 @@ static struct xbc_node * __init __xbc_add_sibling(char *data, u32 flag, bool hea return node; } -static inline struct xbc_node * __init xbc_add_sibling(char *data, u32 flag) +static inline struct xbc_node * __init xbc_add_sibling(char *data, uint32_t flag) { return __xbc_add_sibling(data, flag, false); } -static inline struct xbc_node * __init xbc_add_head_sibling(char *data, u32 flag) +static inline struct xbc_node * __init xbc_add_head_sibling(char *data, uint32_t flag) { return __xbc_add_sibling(data, flag, true); } -static inline __init struct xbc_node *xbc_add_child(char *data, u32 flag) +static inline __init struct xbc_node *xbc_add_child(char *data, uint32_t flag) { struct xbc_node *node = xbc_add_sibling(data, flag); @@ -780,41 +832,94 @@ static int __init xbc_verify_tree(void) return 0; } +/* Need to setup xbc_data and xbc_nodes before call this. */ +static int __init xbc_parse_tree(void) +{ + char *p, *q; + int ret = 0, c; + + last_parent = NULL; + p = xbc_data; + do { + q = strpbrk(p, "{}=+;:\n#"); + if (!q) { + p = skip_spaces(p); + if (*p != '\0') + ret = xbc_parse_error("No delimiter", p); + break; + } + + c = *q; + *q++ = '\0'; + switch (c) { + case ':': + case '+': + if (*q++ != '=') { + ret = xbc_parse_error(c == '+' ? + "Wrong '+' operator" : + "Wrong ':' operator", + q - 2); + break; + } + fallthrough; + case '=': + ret = xbc_parse_kv(&p, q, c); + break; + case '{': + ret = xbc_open_brace(&p, q); + break; + case '#': + q = skip_comment(q); + fallthrough; + case ';': + case '\n': + ret = xbc_parse_key(&p, q); + break; + case '}': + ret = xbc_close_brace(&p, q); + break; + } + } while (!ret); + + return ret; +} + /** - * xbc_destroy_all() - Clean up all parsed bootconfig + * xbc_exit() - Clean up all parsed bootconfig * * This clears all data structures of parsed bootconfig on memory. * If you need to reuse xbc_init() with new boot config, you can * use this. */ -void __init xbc_destroy_all(void) +void __init xbc_exit(void) { + xbc_free_mem(xbc_data, xbc_data_size); xbc_data = NULL; xbc_data_size = 0; xbc_node_num = 0; - memblock_free_ptr(xbc_nodes, sizeof(struct xbc_node) * XBC_NODE_MAX); + xbc_free_mem(xbc_nodes, sizeof(struct xbc_node) * XBC_NODE_MAX); xbc_nodes = NULL; brace_index = 0; } /** * xbc_init() - Parse given XBC file and build XBC internal tree - * @buf: boot config text + * @data: The boot config text original data + * @size: The size of @data * @emsg: A pointer of const char * to store the error message * @epos: A pointer of int to store the error position * - * This parses the boot config text in @buf. @buf must be a - * null terminated string and smaller than XBC_DATA_MAX. + * This parses the boot config text in @data. @size must be smaller + * than XBC_DATA_MAX. * Return the number of stored nodes (>0) if succeeded, or -errno * if there is any error. * In error cases, @emsg will be updated with an error message and * @epos will be updated with the error position which is the byte offset * of @buf. If the error is not a parser error, @epos will be -1. */ -int __init xbc_init(char *buf, const char **emsg, int *epos) +int __init xbc_init(const char *data, size_t size, const char **emsg, int *epos) { - char *p, *q; - int ret, c; + int ret; if (epos) *epos = -1; @@ -824,69 +929,33 @@ int __init xbc_init(char *buf, const char **emsg, int *epos) *emsg = "Bootconfig is already initialized"; return -EBUSY; } - - ret = strlen(buf); - if (ret > XBC_DATA_MAX - 1 || ret == 0) { + if (size > XBC_DATA_MAX || size == 0) { if (emsg) - *emsg = ret ? "Config data is too big" : + *emsg = size ? "Config data is too big" : "Config data is empty"; return -ERANGE; } - xbc_nodes = memblock_alloc(sizeof(struct xbc_node) * XBC_NODE_MAX, - SMP_CACHE_BYTES); + xbc_data = xbc_alloc_mem(size + 1); + if (!xbc_data) { + if (emsg) + *emsg = "Failed to allocate bootconfig data"; + return -ENOMEM; + } + memcpy(xbc_data, data, size); + xbc_data[size] = '\0'; + xbc_data_size = size + 1; + + xbc_nodes = xbc_alloc_mem(sizeof(struct xbc_node) * XBC_NODE_MAX); if (!xbc_nodes) { if (emsg) *emsg = "Failed to allocate bootconfig nodes"; + xbc_exit(); return -ENOMEM; } memset(xbc_nodes, 0, sizeof(struct xbc_node) * XBC_NODE_MAX); - xbc_data = buf; - xbc_data_size = ret + 1; - last_parent = NULL; - - p = buf; - do { - q = strpbrk(p, "{}=+;:\n#"); - if (!q) { - p = skip_spaces(p); - if (*p != '\0') - ret = xbc_parse_error("No delimiter", p); - break; - } - - c = *q; - *q++ = '\0'; - switch (c) { - case ':': - case '+': - if (*q++ != '=') { - ret = xbc_parse_error(c == '+' ? - "Wrong '+' operator" : - "Wrong ':' operator", - q - 2); - break; - } - fallthrough; - case '=': - ret = xbc_parse_kv(&p, q, c); - break; - case '{': - ret = xbc_open_brace(&p, q); - break; - case '#': - q = skip_comment(q); - fallthrough; - case ';': - case '\n': - ret = xbc_parse_key(&p, q); - break; - case '}': - ret = xbc_close_brace(&p, q); - break; - } - } while (!ret); + ret = xbc_parse_tree(); if (!ret) ret = xbc_verify_tree(); @@ -895,27 +964,9 @@ int __init xbc_init(char *buf, const char **emsg, int *epos) *epos = xbc_err_pos; if (emsg) *emsg = xbc_err_msg; - xbc_destroy_all(); + xbc_exit(); } else ret = xbc_node_num; return ret; } - -/** - * xbc_debug_dump() - Dump current XBC node list - * - * Dump the current XBC node list on printk buffer for debug. - */ -void __init xbc_debug_dump(void) -{ - int i; - - for (i = 0; i < xbc_node_num; i++) { - pr_debug("[%d] %s (%s) .next=%d, .child=%d .parent=%d\n", i, - xbc_node_get_data(xbc_nodes + i), - xbc_node_is_value(xbc_nodes + i) ? "value" : "key", - xbc_nodes[i].next, xbc_nodes[i].child, - xbc_nodes[i].parent); - } -} diff --git a/lib/error-inject.c b/lib/error-inject.c index c73651b15b76..2ff5ef689d72 100644 --- a/lib/error-inject.c +++ b/lib/error-inject.c @@ -8,6 +8,7 @@ #include <linux/mutex.h> #include <linux/list.h> #include <linux/slab.h> +#include <asm/sections.h> /* Whitelist of symbols that can be overridden for error injection. */ static LIST_HEAD(error_injection_list); @@ -64,7 +65,7 @@ static void populate_error_injection_list(struct error_injection_entry *start, mutex_lock(&ei_mutex); for (iter = start; iter < end; iter++) { - entry = arch_deref_entry_point((void *)iter->addr); + entry = (unsigned long)dereference_symbol_descriptor((void *)iter->addr); if (!kernel_text_address(entry) || !kallsyms_lookup_size_offset(entry, &size, &offset)) { diff --git a/lib/test_kprobes.c b/lib/test_kprobes.c new file mode 100644 index 000000000000..a5edc2ebc947 --- /dev/null +++ b/lib/test_kprobes.c @@ -0,0 +1,371 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * test_kprobes.c - simple sanity test for *probes + * + * Copyright IBM Corp. 2008 + */ + +#include <linux/kernel.h> +#include <linux/kprobes.h> +#include <linux/random.h> +#include <kunit/test.h> + +#define div_factor 3 + +static u32 rand1, preh_val, posth_val; +static u32 (*target)(u32 value); +static u32 (*target2)(u32 value); +static struct kunit *current_test; + +static unsigned long (*internal_target)(void); +static unsigned long (*stacktrace_target)(void); +static unsigned long (*stacktrace_driver)(void); +static unsigned long target_return_address[2]; + +static noinline u32 kprobe_target(u32 value) +{ + return (value / div_factor); +} + +static int kp_pre_handler(struct kprobe *p, struct pt_regs *regs) +{ + KUNIT_EXPECT_FALSE(current_test, preemptible()); + preh_val = (rand1 / div_factor); + return 0; +} + +static void kp_post_handler(struct kprobe *p, struct pt_regs *regs, + unsigned long flags) +{ + KUNIT_EXPECT_FALSE(current_test, preemptible()); + KUNIT_EXPECT_EQ(current_test, preh_val, (rand1 / div_factor)); + posth_val = preh_val + div_factor; +} + +static struct kprobe kp = { + .symbol_name = "kprobe_target", + .pre_handler = kp_pre_handler, + .post_handler = kp_post_handler +}; + +static void test_kprobe(struct kunit *test) +{ + current_test = test; + KUNIT_EXPECT_EQ(test, 0, register_kprobe(&kp)); + target(rand1); + unregister_kprobe(&kp); + KUNIT_EXPECT_NE(test, 0, preh_val); + KUNIT_EXPECT_NE(test, 0, posth_val); +} + +static noinline u32 kprobe_target2(u32 value) +{ + return (value / div_factor) + 1; +} + +static noinline unsigned long kprobe_stacktrace_internal_target(void) +{ + if (!target_return_address[0]) + target_return_address[0] = (unsigned long)__builtin_return_address(0); + return target_return_address[0]; +} + +static noinline unsigned long kprobe_stacktrace_target(void) +{ + if (!target_return_address[1]) + target_return_address[1] = (unsigned long)__builtin_return_address(0); + + if (internal_target) + internal_target(); + + return target_return_address[1]; +} + +static noinline unsigned long kprobe_stacktrace_driver(void) +{ + if (stacktrace_target) + stacktrace_target(); + + /* This is for preventing inlining the function */ + return (unsigned long)__builtin_return_address(0); +} + +static int kp_pre_handler2(struct kprobe *p, struct pt_regs *regs) +{ + preh_val = (rand1 / div_factor) + 1; + return 0; +} + +static void kp_post_handler2(struct kprobe *p, struct pt_regs *regs, + unsigned long flags) +{ + KUNIT_EXPECT_EQ(current_test, preh_val, (rand1 / div_factor) + 1); + posth_val = preh_val + div_factor; +} + +static struct kprobe kp2 = { + .symbol_name = "kprobe_target2", + .pre_handler = kp_pre_handler2, + .post_handler = kp_post_handler2 +}; + +static void test_kprobes(struct kunit *test) +{ + struct kprobe *kps[2] = {&kp, &kp2}; + + current_test = test; + + /* addr and flags should be cleard for reusing kprobe. */ + kp.addr = NULL; + kp.flags = 0; + + KUNIT_EXPECT_EQ(test, 0, register_kprobes(kps, 2)); + preh_val = 0; + posth_val = 0; + target(rand1); + + KUNIT_EXPECT_NE(test, 0, preh_val); + KUNIT_EXPECT_NE(test, 0, posth_val); + + preh_val = 0; + posth_val = 0; + target2(rand1); + + KUNIT_EXPECT_NE(test, 0, preh_val); + KUNIT_EXPECT_NE(test, 0, posth_val); + unregister_kprobes(kps, 2); +} + +#ifdef CONFIG_KRETPROBES +static u32 krph_val; + +static int entry_handler(struct kretprobe_instance *ri, struct pt_regs *regs) +{ + KUNIT_EXPECT_FALSE(current_test, preemptible()); + krph_val = (rand1 / div_factor); + return 0; +} + +static int return_handler(struct kretprobe_instance *ri, struct pt_regs *regs) +{ + unsigned long ret = regs_return_value(regs); + + KUNIT_EXPECT_FALSE(current_test, preemptible()); + KUNIT_EXPECT_EQ(current_test, ret, rand1 / div_factor); + KUNIT_EXPECT_NE(current_test, krph_val, 0); + krph_val = rand1; + return 0; +} + +static struct kretprobe rp = { + .handler = return_handler, + .entry_handler = entry_handler, + .kp.symbol_name = "kprobe_target" +}; + +static void test_kretprobe(struct kunit *test) +{ + current_test = test; + KUNIT_EXPECT_EQ(test, 0, register_kretprobe(&rp)); + target(rand1); + unregister_kretprobe(&rp); + KUNIT_EXPECT_EQ(test, krph_val, rand1); +} + +static int return_handler2(struct kretprobe_instance *ri, struct pt_regs *regs) +{ + unsigned long ret = regs_return_value(regs); + + KUNIT_EXPECT_EQ(current_test, ret, (rand1 / div_factor) + 1); + KUNIT_EXPECT_NE(current_test, krph_val, 0); + krph_val = rand1; + return 0; +} + +static struct kretprobe rp2 = { + .handler = return_handler2, + .entry_handler = entry_handler, + .kp.symbol_name = "kprobe_target2" +}; + +static void test_kretprobes(struct kunit *test) +{ + struct kretprobe *rps[2] = {&rp, &rp2}; + + current_test = test; + /* addr and flags should be cleard for reusing kprobe. */ + rp.kp.addr = NULL; + rp.kp.flags = 0; + KUNIT_EXPECT_EQ(test, 0, register_kretprobes(rps, 2)); + + krph_val = 0; + target(rand1); + KUNIT_EXPECT_EQ(test, krph_val, rand1); + + krph_val = 0; + target2(rand1); + KUNIT_EXPECT_EQ(test, krph_val, rand1); + unregister_kretprobes(rps, 2); +} + +#ifdef CONFIG_ARCH_CORRECT_STACKTRACE_ON_KRETPROBE +#define STACK_BUF_SIZE 16 +static unsigned long stack_buf[STACK_BUF_SIZE]; + +static int stacktrace_return_handler(struct kretprobe_instance *ri, struct pt_regs *regs) +{ + unsigned long retval = regs_return_value(regs); + int i, ret; + + KUNIT_EXPECT_FALSE(current_test, preemptible()); + KUNIT_EXPECT_EQ(current_test, retval, target_return_address[1]); + + /* + * Test stacktrace inside the kretprobe handler, this will involves + * kretprobe trampoline, but must include correct return address + * of the target function. + */ + ret = stack_trace_save(stack_buf, STACK_BUF_SIZE, 0); + KUNIT_EXPECT_NE(current_test, ret, 0); + + for (i = 0; i < ret; i++) { + if (stack_buf[i] == target_return_address[1]) + break; + } + KUNIT_EXPECT_NE(current_test, i, ret); + +#if !IS_MODULE(CONFIG_KPROBES_SANITY_TEST) + /* + * Test stacktrace from pt_regs at the return address. Thus the stack + * trace must start from the target return address. + */ + ret = stack_trace_save_regs(regs, stack_buf, STACK_BUF_SIZE, 0); + KUNIT_EXPECT_NE(current_test, ret, 0); + KUNIT_EXPECT_EQ(current_test, stack_buf[0], target_return_address[1]); +#endif + + return 0; +} + +static struct kretprobe rp3 = { + .handler = stacktrace_return_handler, + .kp.symbol_name = "kprobe_stacktrace_target" +}; + +static void test_stacktrace_on_kretprobe(struct kunit *test) +{ + unsigned long myretaddr = (unsigned long)__builtin_return_address(0); + + current_test = test; + rp3.kp.addr = NULL; + rp3.kp.flags = 0; + + /* + * Run the stacktrace_driver() to record correct return address in + * stacktrace_target() and ensure stacktrace_driver() call is not + * inlined by checking the return address of stacktrace_driver() + * and the return address of this function is different. + */ + KUNIT_ASSERT_NE(test, myretaddr, stacktrace_driver()); + + KUNIT_ASSERT_EQ(test, 0, register_kretprobe(&rp3)); + KUNIT_ASSERT_NE(test, myretaddr, stacktrace_driver()); + unregister_kretprobe(&rp3); +} + +static int stacktrace_internal_return_handler(struct kretprobe_instance *ri, struct pt_regs *regs) +{ + unsigned long retval = regs_return_value(regs); + int i, ret; + + KUNIT_EXPECT_FALSE(current_test, preemptible()); + KUNIT_EXPECT_EQ(current_test, retval, target_return_address[0]); + + /* + * Test stacktrace inside the kretprobe handler for nested case. + * The unwinder will find the kretprobe_trampoline address on the + * return address, and kretprobe must solve that. + */ + ret = stack_trace_save(stack_buf, STACK_BUF_SIZE, 0); + KUNIT_EXPECT_NE(current_test, ret, 0); + + for (i = 0; i < ret - 1; i++) { + if (stack_buf[i] == target_return_address[0]) { + KUNIT_EXPECT_EQ(current_test, stack_buf[i + 1], target_return_address[1]); + break; + } + } + KUNIT_EXPECT_NE(current_test, i, ret); + +#if !IS_MODULE(CONFIG_KPROBES_SANITY_TEST) + /* Ditto for the regs version. */ + ret = stack_trace_save_regs(regs, stack_buf, STACK_BUF_SIZE, 0); + KUNIT_EXPECT_NE(current_test, ret, 0); + KUNIT_EXPECT_EQ(current_test, stack_buf[0], target_return_address[0]); + KUNIT_EXPECT_EQ(current_test, stack_buf[1], target_return_address[1]); +#endif + + return 0; +} + +static struct kretprobe rp4 = { + .handler = stacktrace_internal_return_handler, + .kp.symbol_name = "kprobe_stacktrace_internal_target" +}; + +static void test_stacktrace_on_nested_kretprobe(struct kunit *test) +{ + unsigned long myretaddr = (unsigned long)__builtin_return_address(0); + struct kretprobe *rps[2] = {&rp3, &rp4}; + + current_test = test; + rp3.kp.addr = NULL; + rp3.kp.flags = 0; + + //KUNIT_ASSERT_NE(test, myretaddr, stacktrace_driver()); + + KUNIT_ASSERT_EQ(test, 0, register_kretprobes(rps, 2)); + KUNIT_ASSERT_NE(test, myretaddr, stacktrace_driver()); + unregister_kretprobes(rps, 2); +} +#endif /* CONFIG_ARCH_CORRECT_STACKTRACE_ON_KRETPROBE */ + +#endif /* CONFIG_KRETPROBES */ + +static int kprobes_test_init(struct kunit *test) +{ + target = kprobe_target; + target2 = kprobe_target2; + stacktrace_target = kprobe_stacktrace_target; + internal_target = kprobe_stacktrace_internal_target; + stacktrace_driver = kprobe_stacktrace_driver; + + do { + rand1 = prandom_u32(); + } while (rand1 <= div_factor); + return 0; +} + +static struct kunit_case kprobes_testcases[] = { + KUNIT_CASE(test_kprobe), + KUNIT_CASE(test_kprobes), +#ifdef CONFIG_KRETPROBES + KUNIT_CASE(test_kretprobe), + KUNIT_CASE(test_kretprobes), +#ifdef CONFIG_ARCH_CORRECT_STACKTRACE_ON_KRETPROBE + KUNIT_CASE(test_stacktrace_on_kretprobe), + KUNIT_CASE(test_stacktrace_on_nested_kretprobe), +#endif +#endif + {} +}; + +static struct kunit_suite kprobes_test_suite = { + .name = "kprobes_test", + .init = kprobes_test_init, + .test_cases = kprobes_testcases, +}; + +kunit_test_suites(&kprobes_test_suite); + +MODULE_LICENSE("GPL"); diff --git a/samples/ftrace/Makefile b/samples/ftrace/Makefile index 4ce896e10b2e..ab1d1c05c288 100644 --- a/samples/ftrace/Makefile +++ b/samples/ftrace/Makefile @@ -3,6 +3,7 @@ obj-$(CONFIG_SAMPLE_FTRACE_DIRECT) += ftrace-direct.o obj-$(CONFIG_SAMPLE_FTRACE_DIRECT) += ftrace-direct-too.o obj-$(CONFIG_SAMPLE_FTRACE_DIRECT) += ftrace-direct-modify.o +obj-$(CONFIG_SAMPLE_FTRACE_DIRECT) += ftrace-direct-multi.o CFLAGS_sample-trace-array.o := -I$(src) obj-$(CONFIG_SAMPLE_TRACE_ARRAY) += sample-trace-array.o diff --git a/samples/ftrace/ftrace-direct-multi.c b/samples/ftrace/ftrace-direct-multi.c new file mode 100644 index 000000000000..2a5b1fb7ac14 --- /dev/null +++ b/samples/ftrace/ftrace-direct-multi.c @@ -0,0 +1,52 @@ +// SPDX-License-Identifier: GPL-2.0-only +#include <linux/module.h> + +#include <linux/mm.h> /* for handle_mm_fault() */ +#include <linux/ftrace.h> +#include <linux/sched/stat.h> + +void my_direct_func(unsigned long ip) +{ + trace_printk("ip %lx\n", ip); +} + +extern void my_tramp(void *); + +asm ( +" .pushsection .text, \"ax\", @progbits\n" +" .type my_tramp, @function\n" +" .globl my_tramp\n" +" my_tramp:" +" pushq %rbp\n" +" movq %rsp, %rbp\n" +" pushq %rdi\n" +" movq 8(%rbp), %rdi\n" +" call my_direct_func\n" +" popq %rdi\n" +" leave\n" +" ret\n" +" .size my_tramp, .-my_tramp\n" +" .popsection\n" +); + +static struct ftrace_ops direct; + +static int __init ftrace_direct_multi_init(void) +{ + ftrace_set_filter_ip(&direct, (unsigned long) wake_up_process, 0, 0); + ftrace_set_filter_ip(&direct, (unsigned long) schedule, 0, 0); + + return register_ftrace_direct_multi(&direct, (unsigned long) my_tramp); +} + +static void __exit ftrace_direct_multi_exit(void) +{ + unregister_ftrace_direct_multi(&direct, (unsigned long) my_tramp); +} + +module_init(ftrace_direct_multi_init); +module_exit(ftrace_direct_multi_exit); + +MODULE_AUTHOR("Jiri Olsa"); +MODULE_DESCRIPTION("Example use case of using register_ftrace_direct_multi()"); +MODULE_LICENSE("GPL"); diff --git a/samples/kprobes/kretprobe_example.c b/samples/kprobes/kretprobe_example.c index 5dc1bf3baa98..228321ecb161 100644 --- a/samples/kprobes/kretprobe_example.c +++ b/samples/kprobes/kretprobe_example.c @@ -86,7 +86,7 @@ static int __init kretprobe_init(void) ret = register_kretprobe(&my_kretprobe); if (ret < 0) { pr_err("register_kretprobe failed, returned %d\n", ret); - return -1; + return ret; } pr_info("Planted return probe at %s: %p\n", my_kretprobe.kp.symbol_name, my_kretprobe.kp.addr); diff --git a/tools/bootconfig/Makefile b/tools/bootconfig/Makefile index da5975775337..566c3e0ee561 100644 --- a/tools/bootconfig/Makefile +++ b/tools/bootconfig/Makefile @@ -15,9 +15,9 @@ CFLAGS = -Wall -g -I$(CURDIR)/include ALL_TARGETS := bootconfig ALL_PROGRAMS := $(patsubst %,$(OUTPUT)%,$(ALL_TARGETS)) -all: $(ALL_PROGRAMS) +all: $(ALL_PROGRAMS) test -$(OUTPUT)bootconfig: main.c $(LIBSRC) +$(OUTPUT)bootconfig: main.c include/linux/bootconfig.h $(LIBSRC) $(CC) $(filter %.c,$^) $(CFLAGS) -o $@ test: $(ALL_PROGRAMS) test-bootconfig.sh diff --git a/tools/bootconfig/include/linux/bootconfig.h b/tools/bootconfig/include/linux/bootconfig.h index de7f30f99af3..6784296a0692 100644 --- a/tools/bootconfig/include/linux/bootconfig.h +++ b/tools/bootconfig/include/linux/bootconfig.h @@ -2,10 +2,53 @@ #ifndef _BOOTCONFIG_LINUX_BOOTCONFIG_H #define _BOOTCONFIG_LINUX_BOOTCONFIG_H -#include "../../../../include/linux/bootconfig.h" +#include <stdio.h> +#include <stdlib.h> +#include <stdint.h> +#include <stdbool.h> +#include <ctype.h> +#include <errno.h> +#include <string.h> + #ifndef fallthrough # define fallthrough #endif +#define WARN_ON(cond) \ + ((cond) ? printf("Internal warning(%s:%d, %s): %s\n", \ + __FILE__, __LINE__, __func__, #cond) : 0) + +#define unlikely(cond) (cond) + +/* Copied from lib/string.c */ +static inline char *skip_spaces(const char *str) +{ + while (isspace(*str)) + ++str; + return (char *)str; +} + +static inline char *strim(char *s) +{ + size_t size; + char *end; + + size = strlen(s); + if (!size) + return s; + + end = s + size - 1; + while (end >= s && isspace(*end)) + end--; + *(end + 1) = '\0'; + + return skip_spaces(s); +} + +#define __init +#define __initdata + +#include "../../../../include/linux/bootconfig.h" + #endif diff --git a/tools/bootconfig/include/linux/bug.h b/tools/bootconfig/include/linux/bug.h deleted file mode 100644 index 7b65a389c0dd..000000000000 --- a/tools/bootconfig/include/linux/bug.h +++ /dev/null @@ -1,12 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _SKC_LINUX_BUG_H -#define _SKC_LINUX_BUG_H - -#include <stdio.h> -#include <stdlib.h> - -#define WARN_ON(cond) \ - ((cond) ? printf("Internal warning(%s:%d, %s): %s\n", \ - __FILE__, __LINE__, __func__, #cond) : 0) - -#endif diff --git a/tools/bootconfig/include/linux/ctype.h b/tools/bootconfig/include/linux/ctype.h deleted file mode 100644 index c56ecc136448..000000000000 --- a/tools/bootconfig/include/linux/ctype.h +++ /dev/null @@ -1,7 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _SKC_LINUX_CTYPE_H -#define _SKC_LINUX_CTYPE_H - -#include <ctype.h> - -#endif diff --git a/tools/bootconfig/include/linux/errno.h b/tools/bootconfig/include/linux/errno.h deleted file mode 100644 index 5d9f91ec2fda..000000000000 --- a/tools/bootconfig/include/linux/errno.h +++ /dev/null @@ -1,7 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _SKC_LINUX_ERRNO_H -#define _SKC_LINUX_ERRNO_H - -#include <asm/errno.h> - -#endif diff --git a/tools/bootconfig/include/linux/kernel.h b/tools/bootconfig/include/linux/kernel.h deleted file mode 100644 index 2d93320aa374..000000000000 --- a/tools/bootconfig/include/linux/kernel.h +++ /dev/null @@ -1,18 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _SKC_LINUX_KERNEL_H -#define _SKC_LINUX_KERNEL_H - -#include <stdlib.h> -#include <stdbool.h> - -#include <linux/printk.h> - -typedef unsigned short u16; -typedef unsigned int u32; - -#define unlikely(cond) (cond) - -#define __init -#define __initdata - -#endif diff --git a/tools/bootconfig/include/linux/memblock.h b/tools/bootconfig/include/linux/memblock.h deleted file mode 100644 index f2e506f7d57f..000000000000 --- a/tools/bootconfig/include/linux/memblock.h +++ /dev/null @@ -1,11 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _XBC_LINUX_MEMBLOCK_H -#define _XBC_LINUX_MEMBLOCK_H - -#include <stdlib.h> - -#define SMP_CACHE_BYTES 0 -#define memblock_alloc(size, align) malloc(size) -#define memblock_free_ptr(paddr, size) free(paddr) - -#endif diff --git a/tools/bootconfig/include/linux/printk.h b/tools/bootconfig/include/linux/printk.h deleted file mode 100644 index 036e667596eb..000000000000 --- a/tools/bootconfig/include/linux/printk.h +++ /dev/null @@ -1,14 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _SKC_LINUX_PRINTK_H -#define _SKC_LINUX_PRINTK_H - -#include <stdio.h> - -#define printk(fmt, ...) printf(fmt, ##__VA_ARGS__) - -#define pr_err printk -#define pr_warn printk -#define pr_info printk -#define pr_debug printk - -#endif diff --git a/tools/bootconfig/include/linux/string.h b/tools/bootconfig/include/linux/string.h deleted file mode 100644 index 8267af75153a..000000000000 --- a/tools/bootconfig/include/linux/string.h +++ /dev/null @@ -1,32 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _SKC_LINUX_STRING_H -#define _SKC_LINUX_STRING_H - -#include <string.h> - -/* Copied from lib/string.c */ -static inline char *skip_spaces(const char *str) -{ - while (isspace(*str)) - ++str; - return (char *)str; -} - -static inline char *strim(char *s) -{ - size_t size; - char *end; - - size = strlen(s); - if (!size) - return s; - - end = s + size - 1; - while (end >= s && isspace(*end)) - end--; - *(end + 1) = '\0'; - - return skip_spaces(s); -} - -#endif diff --git a/tools/bootconfig/main.c b/tools/bootconfig/main.c index fd67496a947f..156b62a163c5 100644 --- a/tools/bootconfig/main.c +++ b/tools/bootconfig/main.c @@ -12,9 +12,10 @@ #include <errno.h> #include <endian.h> -#include <linux/kernel.h> #include <linux/bootconfig.h> +#define pr_err(fmt, ...) fprintf(stderr, fmt, ##__VA_ARGS__) + static int xbc_show_value(struct xbc_node *node, bool semicolon) { const char *val, *eol; @@ -176,7 +177,7 @@ static int load_xbc_from_initrd(int fd, char **buf) { struct stat stat; int ret; - u32 size = 0, csum = 0, rcsum; + uint32_t size = 0, csum = 0, rcsum; char magic[BOOTCONFIG_MAGIC_LEN]; const char *msg; @@ -200,11 +201,11 @@ static int load_xbc_from_initrd(int fd, char **buf) if (lseek(fd, -(8 + BOOTCONFIG_MAGIC_LEN), SEEK_END) < 0) return pr_errno("Failed to lseek for size", -errno); - if (read(fd, &size, sizeof(u32)) < 0) + if (read(fd, &size, sizeof(uint32_t)) < 0) return pr_errno("Failed to read size", -errno); size = le32toh(size); - if (read(fd, &csum, sizeof(u32)) < 0) + if (read(fd, &csum, sizeof(uint32_t)) < 0) return pr_errno("Failed to read checksum", -errno); csum = le32toh(csum); @@ -229,7 +230,7 @@ static int load_xbc_from_initrd(int fd, char **buf) return -EINVAL; } - ret = xbc_init(*buf, &msg, NULL); + ret = xbc_init(*buf, size, &msg, NULL); /* Wrong data */ if (ret < 0) { pr_err("parse error: %s.\n", msg); @@ -269,7 +270,7 @@ static int init_xbc_with_error(char *buf, int len) if (!copy) return -ENOMEM; - ret = xbc_init(buf, &msg, &pos); + ret = xbc_init(buf, len, &msg, &pos); if (ret < 0) show_xbc_error(copy, msg, pos); free(copy); @@ -362,7 +363,7 @@ static int apply_xbc(const char *path, const char *xbc_path) size_t total_size; struct stat stat; const char *msg; - u32 size, csum; + uint32_t size, csum; int pos, pad; int ret, fd; @@ -376,13 +377,13 @@ static int apply_xbc(const char *path, const char *xbc_path) /* Backup the bootconfig data */ data = calloc(size + BOOTCONFIG_ALIGN + - sizeof(u32) + sizeof(u32) + BOOTCONFIG_MAGIC_LEN, 1); + sizeof(uint32_t) + sizeof(uint32_t) + BOOTCONFIG_MAGIC_LEN, 1); if (!data) return -ENOMEM; memcpy(data, buf, size); /* Check the data format */ - ret = xbc_init(buf, &msg, &pos); + ret = xbc_init(buf, size, &msg, &pos); if (ret < 0) { show_xbc_error(data, msg, pos); free(data); @@ -391,12 +392,13 @@ static int apply_xbc(const char *path, const char *xbc_path) return ret; } printf("Apply %s to %s\n", xbc_path, path); + xbc_get_info(&ret, NULL); printf("\tNumber of nodes: %d\n", ret); printf("\tSize: %u bytes\n", (unsigned int)size); printf("\tChecksum: %d\n", (unsigned int)csum); /* TODO: Check the options by schema */ - xbc_destroy_all(); + xbc_exit(); free(buf); /* Remove old boot config if exists */ @@ -423,17 +425,17 @@ static int apply_xbc(const char *path, const char *xbc_path) } /* To align up the total size to BOOTCONFIG_ALIGN, get padding size */ - total_size = stat.st_size + size + sizeof(u32) * 2 + BOOTCONFIG_MAGIC_LEN; + total_size = stat.st_size + size + sizeof(uint32_t) * 2 + BOOTCONFIG_MAGIC_LEN; pad = ((total_size + BOOTCONFIG_ALIGN - 1) & (~BOOTCONFIG_ALIGN_MASK)) - total_size; size += pad; /* Add a footer */ p = data + size; - *(u32 *)p = htole32(size); - p += sizeof(u32); + *(uint32_t *)p = htole32(size); + p += sizeof(uint32_t); - *(u32 *)p = htole32(csum); - p += sizeof(u32); + *(uint32_t *)p = htole32(csum); + p += sizeof(uint32_t); memcpy(p, BOOTCONFIG_MAGIC, BOOTCONFIG_MAGIC_LEN); p += BOOTCONFIG_MAGIC_LEN; diff --git a/tools/include/linux/objtool.h b/tools/include/linux/objtool.h index 7e72d975cb76..aca52db2f3f3 100644 --- a/tools/include/linux/objtool.h +++ b/tools/include/linux/objtool.h @@ -66,6 +66,17 @@ struct unwind_hint { static void __used __section(".discard.func_stack_frame_non_standard") \ *__func_stack_frame_non_standard_##func = func +/* + * STACK_FRAME_NON_STANDARD_FP() is a frame-pointer-specific function ignore + * for the case where a function is intentionally missing frame pointer setup, + * but otherwise needs objtool/ORC coverage when frame pointers are disabled. + */ +#ifdef CONFIG_FRAME_POINTER +#define STACK_FRAME_NON_STANDARD_FP(func) STACK_FRAME_NON_STANDARD(func) +#else +#define STACK_FRAME_NON_STANDARD_FP(func) +#endif + #else /* __ASSEMBLY__ */ /* @@ -127,6 +138,7 @@ struct unwind_hint { #define UNWIND_HINT(sp_reg, sp_offset, type, end) \ "\n\t" #define STACK_FRAME_NON_STANDARD(func) +#define STACK_FRAME_NON_STANDARD_FP(func) #else #define ANNOTATE_INTRA_FUNCTION_CALL .macro UNWIND_HINT sp_reg:req sp_offset=0 type:req end=0 diff --git a/tools/objtool/check.c b/tools/objtool/check.c index fb3f251ea021..add39902166d 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -3229,7 +3229,7 @@ static int validate_unwind_hints(struct objtool_file *file, struct section *sec) } while (&insn->list != &file->insn_list && (!sec || insn->sec == sec)) { - if (insn->hint && !insn->visited) { + if (insn->hint && !insn->visited && !insn->ignore) { ret = validate_branch(file, insn->func, insn, state); if (ret && backtrace) BT_FUNC("<=== (hint)", insn); diff --git a/tools/testing/selftests/ftrace/ftracetest b/tools/testing/selftests/ftrace/ftracetest index 8ec1922e974e..c3311c8c4089 100755 --- a/tools/testing/selftests/ftrace/ftracetest +++ b/tools/testing/selftests/ftrace/ftracetest @@ -428,7 +428,7 @@ for t in $TEST_CASES; do exit 1 fi done -(cd $TRACING_DIR; initialize_ftrace) # for cleanup +(cd $TRACING_DIR; finish_ftrace) # for cleanup prlog "" prlog "# of passed: " `echo $PASSED_CASES | wc -w` diff --git a/tools/testing/selftests/ftrace/test.d/functions b/tools/testing/selftests/ftrace/test.d/functions index 000fd05e84b1..5f6cbec847fc 100644 --- a/tools/testing/selftests/ftrace/test.d/functions +++ b/tools/testing/selftests/ftrace/test.d/functions @@ -124,10 +124,22 @@ initialize_ftrace() { # Reset ftrace to initial-state [ -f uprobe_events ] && echo > uprobe_events [ -f synthetic_events ] && echo > synthetic_events [ -f snapshot ] && echo 0 > snapshot + +# Stop tracing while reading the trace file by default, to prevent +# the test results while checking it and to avoid taking a long time +# to check the result. + [ -f options/pause-on-trace ] && echo 1 > options/pause-on-trace + clear_trace enable_tracing } +finish_ftrace() { + initialize_ftrace +# And recover it to default. + [ -f options/pause-on-trace ] && echo 0 > options/pause-on-trace +} + check_requires() { # Check required files and tracers for i in "$@" ; do r=${i%:README} diff --git a/tools/tracing/latency/latency-collector.c b/tools/tracing/latency/latency-collector.c index 3a2e6bb781a8..59a7f2346eab 100644 --- a/tools/tracing/latency/latency-collector.c +++ b/tools/tracing/latency/latency-collector.c @@ -1538,7 +1538,7 @@ static void tracing_loop(void) mutex_lock(&print_mtx); check_signals(); write_or_die(fd_stdout, queue_full_warning, - sizeof(queue_full_warning)); + strlen(queue_full_warning)); mutex_unlock(&print_mtx); } modified--; |