From aebfd12521d9c7d0b502cf6d06314cfbcdccfe3b Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 8 Mar 2022 16:30:29 +0100 Subject: x86/ibt,ftrace: Search for __fentry__ location Currently a lot of ftrace code assumes __fentry__ is at sym+0. However with Intel IBT enabled the first instruction of a function will most likely be ENDBR. Change ftrace_location() to not only return the __fentry__ location when called for the __fentry__ location, but also when called for the sym+0 location. Then audit/update all callsites of this function to consistently use these new semantics. Suggested-by: Steven Rostedt Signed-off-by: Peter Zijlstra (Intel) Acked-by: Masami Hiramatsu Acked-by: Josh Poimboeuf Link: https://lore.kernel.org/r/20220308154318.227581603@infradead.org --- kernel/bpf/trampoline.c | 20 ++++---------------- kernel/kprobes.c | 8 ++------ kernel/trace/ftrace.c | 48 ++++++++++++++++++++++++++++++++++++++++-------- 3 files changed, 46 insertions(+), 30 deletions(-) (limited to 'kernel') diff --git a/kernel/bpf/trampoline.c b/kernel/bpf/trampoline.c index 5e7edf913060..455f1b4f312e 100644 --- a/kernel/bpf/trampoline.c +++ b/kernel/bpf/trampoline.c @@ -117,18 +117,6 @@ static void bpf_trampoline_module_put(struct bpf_trampoline *tr) tr->mod = NULL; } -static int is_ftrace_location(void *ip) -{ - long addr; - - addr = ftrace_location((long)ip); - if (!addr) - return 0; - if (WARN_ON_ONCE(addr != (long)ip)) - return -EFAULT; - return 1; -} - static int unregister_fentry(struct bpf_trampoline *tr, void *old_addr) { void *ip = tr->func.addr; @@ -160,12 +148,12 @@ static int modify_fentry(struct bpf_trampoline *tr, void *old_addr, void *new_ad static int register_fentry(struct bpf_trampoline *tr, void *new_addr) { void *ip = tr->func.addr; + unsigned long faddr; int ret; - ret = is_ftrace_location(ip); - if (ret < 0) - return ret; - tr->func.ftrace_managed = ret; + faddr = ftrace_location((unsigned long)ip); + if (faddr) + tr->func.ftrace_managed = true; if (bpf_trampoline_module_get(tr)) return -ENOENT; diff --git a/kernel/kprobes.c b/kernel/kprobes.c index 94cab8c9ce56..6d1e11cda4f1 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c @@ -1562,14 +1562,10 @@ static inline int warn_kprobe_rereg(struct kprobe *p) static int check_ftrace_location(struct kprobe *p) { - unsigned long ftrace_addr; + unsigned long addr = (unsigned long)p->addr; - ftrace_addr = ftrace_location((unsigned long)p->addr); - if (ftrace_addr) { + if (ftrace_location(addr) == addr) { #ifdef CONFIG_KPROBES_ON_FTRACE - /* Given address is not on the instruction boundary */ - if ((unsigned long)p->addr != ftrace_addr) - return -EILSEQ; p->flags |= KPROBE_FLAG_FTRACE; #else /* !CONFIG_KPROBES_ON_FTRACE */ return -EINVAL; diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 6105b7036482..2ae6fb9a5b12 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -1568,17 +1568,34 @@ unsigned long ftrace_location_range(unsigned long start, unsigned long end) } /** - * ftrace_location - return true if the ip giving is a traced location + * ftrace_location - return the ftrace location * @ip: the instruction pointer to check * - * Returns rec->ip if @ip given is a pointer to a ftrace location. - * That is, the instruction that is either a NOP or call to - * the function tracer. It checks the ftrace internal tables to - * determine if the address belongs or not. + * If @ip matches the ftrace location, return @ip. + * If @ip matches sym+0, return sym's ftrace location. + * Otherwise, return 0. */ unsigned long ftrace_location(unsigned long ip) { - return ftrace_location_range(ip, ip); + struct dyn_ftrace *rec; + unsigned long offset; + unsigned long size; + + rec = lookup_rec(ip, ip); + if (!rec) { + if (!kallsyms_lookup_size_offset(ip, &size, &offset)) + goto out; + + /* map sym+0 to __fentry__ */ + if (!offset) + rec = lookup_rec(ip, ip + size - 1); + } + + if (rec) + return rec->ip; + +out: + return 0; } /** @@ -4962,7 +4979,8 @@ ftrace_match_addr(struct ftrace_hash *hash, unsigned long ip, int remove) { struct ftrace_func_entry *entry; - if (!ftrace_location(ip)) + ip = ftrace_location(ip); + if (!ip) return -EINVAL; if (remove) { @@ -5110,11 +5128,16 @@ int register_ftrace_direct(unsigned long ip, unsigned long addr) struct ftrace_func_entry *entry; struct ftrace_hash *free_hash = NULL; struct dyn_ftrace *rec; - int ret = -EBUSY; + int ret = -ENODEV; mutex_lock(&direct_mutex); + ip = ftrace_location(ip); + if (!ip) + goto out_unlock; + /* See if there's a direct function at @ip already */ + ret = -EBUSY; if (ftrace_find_rec_direct(ip)) goto out_unlock; @@ -5222,6 +5245,10 @@ int unregister_ftrace_direct(unsigned long ip, unsigned long addr) mutex_lock(&direct_mutex); + ip = ftrace_location(ip); + if (!ip) + goto out_unlock; + entry = find_direct_entry(&ip, NULL); if (!entry) goto out_unlock; @@ -5354,6 +5381,11 @@ int modify_ftrace_direct(unsigned long ip, mutex_lock(&direct_mutex); mutex_lock(&ftrace_lock); + + ip = ftrace_location(ip); + if (!ip) + goto out_unlock; + entry = find_direct_entry(&ip, &rec); if (!entry) goto out_unlock; -- cgit v1.2.3 From d15cb3dab1e4f00e29599a4f5e1f6678a530d270 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 8 Mar 2022 16:30:30 +0100 Subject: x86/livepatch: Validate __fentry__ location Currently livepatch assumes __fentry__ lives at func+0, which is most likely untrue with IBT on. Instead make it use ftrace_location() by default which both validates and finds the actual ip if there is any in the same symbol. Suggested-by: Josh Poimboeuf Signed-off-by: Peter Zijlstra (Intel) Acked-by: Josh Poimboeuf Link: https://lore.kernel.org/r/20220308154318.285971256@infradead.org --- arch/powerpc/include/asm/livepatch.h | 10 ---------- kernel/livepatch/patch.c | 19 ++----------------- 2 files changed, 2 insertions(+), 27 deletions(-) (limited to 'kernel') diff --git a/arch/powerpc/include/asm/livepatch.h b/arch/powerpc/include/asm/livepatch.h index 4fe018cc207b..7b9dcd51af32 100644 --- a/arch/powerpc/include/asm/livepatch.h +++ b/arch/powerpc/include/asm/livepatch.h @@ -19,16 +19,6 @@ static inline void klp_arch_set_pc(struct ftrace_regs *fregs, unsigned long ip) regs_set_return_ip(regs, ip); } -#define klp_get_ftrace_location klp_get_ftrace_location -static inline unsigned long klp_get_ftrace_location(unsigned long faddr) -{ - /* - * Live patch works only with -mprofile-kernel on PPC. In this case, - * the ftrace location is always within the first 16 bytes. - */ - return ftrace_location_range(faddr, faddr + 16); -} - static inline void klp_init_thread_info(struct task_struct *p) { /* + 1 to account for STACK_END_MAGIC */ diff --git a/kernel/livepatch/patch.c b/kernel/livepatch/patch.c index fe316c021d73..c172bf92b576 100644 --- a/kernel/livepatch/patch.c +++ b/kernel/livepatch/patch.c @@ -124,19 +124,6 @@ unlock: ftrace_test_recursion_unlock(bit); } -/* - * Convert a function address into the appropriate ftrace location. - * - * Usually this is just the address of the function, but on some architectures - * it's more complicated so allow them to provide a custom behaviour. - */ -#ifndef klp_get_ftrace_location -static unsigned long klp_get_ftrace_location(unsigned long faddr) -{ - return faddr; -} -#endif - static void klp_unpatch_func(struct klp_func *func) { struct klp_ops *ops; @@ -153,8 +140,7 @@ static void klp_unpatch_func(struct klp_func *func) if (list_is_singular(&ops->func_stack)) { unsigned long ftrace_loc; - ftrace_loc = - klp_get_ftrace_location((unsigned long)func->old_func); + ftrace_loc = ftrace_location((unsigned long)func->old_func); if (WARN_ON(!ftrace_loc)) return; @@ -186,8 +172,7 @@ static int klp_patch_func(struct klp_func *func) if (!ops) { unsigned long ftrace_loc; - ftrace_loc = - klp_get_ftrace_location((unsigned long)func->old_func); + ftrace_loc = ftrace_location((unsigned long)func->old_func); if (!ftrace_loc) { pr_err("failed to find location for function '%s'\n", func->old_name); -- cgit v1.2.3 From cc66bb91457827f62e2b6cb2518666820f0a6c48 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 8 Mar 2022 16:30:32 +0100 Subject: x86/ibt,kprobes: Cure sym+0 equals fentry woes In order to allow kprobes to skip the ENDBR instructions at sym+0 for X86_KERNEL_IBT builds, change _kprobe_addr() to take an architecture callback to inspect the function at hand and modify the offset if needed. This streamlines the existing interface to cover more cases and require less hooks. Once PowerPC gets fully converted there will only be the one arch hook. Signed-off-by: Peter Zijlstra (Intel) Acked-by: Masami Hiramatsu Acked-by: Josh Poimboeuf Link: https://lore.kernel.org/r/20220308154318.405947704@infradead.org --- arch/powerpc/kernel/kprobes.c | 34 +++++++++++++--------- arch/x86/kernel/kprobes/core.c | 17 +++++++++++ include/linux/kprobes.h | 3 +- kernel/kprobes.c | 66 +++++++++++++++++++++++++++++++++--------- 4 files changed, 92 insertions(+), 28 deletions(-) (limited to 'kernel') diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c index 9a492fdec1df..7dae0b01abfb 100644 --- a/arch/powerpc/kernel/kprobes.c +++ b/arch/powerpc/kernel/kprobes.c @@ -105,6 +105,27 @@ kprobe_opcode_t *kprobe_lookup_name(const char *name, unsigned int offset) return addr; } +static bool arch_kprobe_on_func_entry(unsigned long offset) +{ +#ifdef PPC64_ELF_ABI_v2 +#ifdef CONFIG_KPROBES_ON_FTRACE + return offset <= 16; +#else + return offset <= 8; +#endif +#else + return !offset; +#endif +} + +/* XXX try and fold the magic of kprobe_lookup_name() in this */ +kprobe_opcode_t *arch_adjust_kprobe_addr(unsigned long addr, unsigned long offset, + bool *on_func_entry) +{ + *on_func_entry = arch_kprobe_on_func_entry(offset); + return (kprobe_opcode_t *)(addr + offset); +} + void *alloc_insn_page(void) { void *page; @@ -218,19 +239,6 @@ static nokprobe_inline void set_current_kprobe(struct kprobe *p, struct pt_regs kcb->kprobe_saved_msr = regs->msr; } -bool arch_kprobe_on_func_entry(unsigned long offset) -{ -#ifdef PPC64_ELF_ABI_v2 -#ifdef CONFIG_KPROBES_ON_FTRACE - return offset <= 16; -#else - return offset <= 8; -#endif -#else - return !offset; -#endif -} - void arch_prepare_kretprobe(struct kretprobe_instance *ri, struct pt_regs *regs) { ri->ret_addr = (kprobe_opcode_t *)regs->link; diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c index 4d8086a1627e..9ea0e3e79896 100644 --- a/arch/x86/kernel/kprobes/core.c +++ b/arch/x86/kernel/kprobes/core.c @@ -52,6 +52,7 @@ #include #include #include +#include #include "common.h" @@ -294,6 +295,22 @@ static int can_probe(unsigned long paddr) return (addr == paddr); } +/* If x86 supports IBT (ENDBR) it must be skipped. */ +kprobe_opcode_t *arch_adjust_kprobe_addr(unsigned long addr, unsigned long offset, + bool *on_func_entry) +{ + if (is_endbr(*(u32 *)addr)) { + *on_func_entry = !offset || offset == 4; + if (*on_func_entry) + offset = 4; + + } else { + *on_func_entry = !offset; + } + + return (kprobe_opcode_t *)(addr + offset); +} + /* * Copy an instruction with recovering modified instruction by kprobes * and adjust the displacement if the instruction uses the %rip-relative diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h index 19b884353b15..9c28f7a0ef42 100644 --- a/include/linux/kprobes.h +++ b/include/linux/kprobes.h @@ -265,7 +265,6 @@ extern int arch_init_kprobes(void); extern void kprobes_inc_nmissed_count(struct kprobe *p); extern bool arch_within_kprobe_blacklist(unsigned long addr); extern int arch_populate_kprobe_blacklist(void); -extern bool arch_kprobe_on_func_entry(unsigned long offset); extern int kprobe_on_func_entry(kprobe_opcode_t *addr, const char *sym, unsigned long offset); extern bool within_kprobe_blacklist(unsigned long addr); @@ -384,6 +383,8 @@ static inline struct kprobe_ctlblk *get_kprobe_ctlblk(void) } kprobe_opcode_t *kprobe_lookup_name(const char *name, unsigned int offset); +kprobe_opcode_t *arch_adjust_kprobe_addr(unsigned long addr, unsigned long offset, bool *on_func_entry); + int register_kprobe(struct kprobe *p); void unregister_kprobe(struct kprobe *p); int register_kprobes(struct kprobe **kps, int num); diff --git a/kernel/kprobes.c b/kernel/kprobes.c index 6d1e11cda4f1..185badc780b7 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c @@ -1488,25 +1488,69 @@ bool within_kprobe_blacklist(unsigned long addr) return false; } +/* + * arch_adjust_kprobe_addr - adjust the address + * @addr: symbol base address + * @offset: offset within the symbol + * @on_func_entry: was this @addr+@offset on the function entry + * + * Typically returns @addr + @offset, except for special cases where the + * function might be prefixed by a CFI landing pad, in that case any offset + * inside the landing pad is mapped to the first 'real' instruction of the + * symbol. + * + * Specifically, for things like IBT/BTI, skip the resp. ENDBR/BTI.C + * instruction at +0. + */ +kprobe_opcode_t *__weak arch_adjust_kprobe_addr(unsigned long addr, + unsigned long offset, + bool *on_func_entry) +{ + *on_func_entry = !offset; + return (kprobe_opcode_t *)(addr + offset); +} + /* * If 'symbol_name' is specified, look it up and add the 'offset' * to it. This way, we can specify a relative address to a symbol. * This returns encoded errors if it fails to look up symbol or invalid * combination of parameters. */ -static kprobe_opcode_t *_kprobe_addr(kprobe_opcode_t *addr, - const char *symbol_name, unsigned int offset) +static kprobe_opcode_t * +_kprobe_addr(kprobe_opcode_t *addr, const char *symbol_name, + unsigned long offset, bool *on_func_entry) { if ((symbol_name && addr) || (!symbol_name && !addr)) goto invalid; if (symbol_name) { + /* + * Input: @sym + @offset + * Output: @addr + @offset + * + * NOTE: kprobe_lookup_name() does *NOT* fold the offset + * argument into it's output! + */ addr = kprobe_lookup_name(symbol_name, offset); if (!addr) return ERR_PTR(-ENOENT); } - addr = (kprobe_opcode_t *)(((char *)addr) + offset); + /* + * So here we have @addr + @offset, displace it into a new + * @addr' + @offset' where @addr' is the symbol start address. + */ + addr = (void *)addr + offset; + if (!kallsyms_lookup_size_offset((unsigned long)addr, NULL, &offset)) + return ERR_PTR(-ENOENT); + addr = (void *)addr - offset; + + /* + * Then ask the architecture to re-combine them, taking care of + * magical function entry details while telling us if this was indeed + * at the start of the function. + */ + addr = arch_adjust_kprobe_addr((unsigned long)addr, offset, on_func_entry); if (addr) return addr; @@ -1516,7 +1560,8 @@ invalid: static kprobe_opcode_t *kprobe_addr(struct kprobe *p) { - return _kprobe_addr(p->addr, p->symbol_name, p->offset); + bool on_func_entry; + return _kprobe_addr(p->addr, p->symbol_name, p->offset, &on_func_entry); } /* @@ -2043,11 +2088,6 @@ static int pre_handler_kretprobe(struct kprobe *p, struct pt_regs *regs) } NOKPROBE_SYMBOL(pre_handler_kretprobe); -bool __weak arch_kprobe_on_func_entry(unsigned long offset) -{ - return !offset; -} - /** * kprobe_on_func_entry() -- check whether given address is function entry * @addr: Target address @@ -2063,15 +2103,13 @@ bool __weak arch_kprobe_on_func_entry(unsigned long offset) */ int kprobe_on_func_entry(kprobe_opcode_t *addr, const char *sym, unsigned long offset) { - kprobe_opcode_t *kp_addr = _kprobe_addr(addr, sym, offset); + bool on_func_entry; + kprobe_opcode_t *kp_addr = _kprobe_addr(addr, sym, offset, &on_func_entry); if (IS_ERR(kp_addr)) return PTR_ERR(kp_addr); - if (!kallsyms_lookup_size_offset((unsigned long)kp_addr, NULL, &offset)) - return -ENOENT; - - if (!arch_kprobe_on_func_entry(offset)) + if (!on_func_entry) return -EINVAL; return 0; -- cgit v1.2.3 From eae654f1c21216daa9fbb92591c0d9f5ae46cfc5 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 8 Mar 2022 16:30:48 +0100 Subject: exit: Mark do_group_exit() __noreturn vmlinux.o: warning: objtool: get_signal()+0x108: unreachable instruction 0000 000000000007f930 : ... 0103 7fa33: e8 00 00 00 00 call 7fa38 7fa34: R_X86_64_PLT32 do_group_exit-0x4 0108 7fa38: 41 8b 45 74 mov 0x74(%r13),%eax Signed-off-by: Peter Zijlstra (Intel) Acked-by: Josh Poimboeuf Link: https://lore.kernel.org/r/20220308154319.351270711@infradead.org --- include/linux/sched/task.h | 2 +- kernel/exit.c | 2 +- tools/objtool/check.c | 1 + 3 files changed, 3 insertions(+), 2 deletions(-) (limited to 'kernel') diff --git a/include/linux/sched/task.h b/include/linux/sched/task.h index e84e54d1b490..719c9a6cac8d 100644 --- a/include/linux/sched/task.h +++ b/include/linux/sched/task.h @@ -79,7 +79,7 @@ static inline void exit_thread(struct task_struct *tsk) { } #endif -extern void do_group_exit(int); +extern __noreturn void do_group_exit(int); extern void exit_files(struct task_struct *); extern void exit_itimers(struct signal_struct *); diff --git a/kernel/exit.c b/kernel/exit.c index b00a25bb4ab9..b71f9df9074e 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -906,7 +906,7 @@ SYSCALL_DEFINE1(exit, int, error_code) * Take down every thread in the group. This is called by fatal signals * as well as by sys_exit_group (below). */ -void +void __noreturn do_group_exit(int exit_code) { struct signal_struct *sig = current->signal; diff --git a/tools/objtool/check.c b/tools/objtool/check.c index c3ddcecdab57..9896562350a8 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -181,6 +181,7 @@ static bool __dead_end_function(struct objtool_file *file, struct symbol *func, "kunit_try_catch_throw", "xen_start_kernel", "cpu_bringup_and_idle", + "do_group_exit", "stop_this_cpu", }; -- cgit v1.2.3 From 105cd68596392cfe15056a891b0723609dcad247 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 14 Mar 2022 17:58:35 +0100 Subject: x86: Mark __invalid_creds() __noreturn vmlinux.o: warning: objtool: ksys_unshare()+0x36c: unreachable instruction 0000 0000000000067040 : ... 0364 673a4: 4c 89 ef mov %r13,%rdi 0367 673a7: e8 00 00 00 00 call 673ac 673a8: R_X86_64_PLT32 __invalid_creds-0x4 036c 673ac: e9 28 ff ff ff jmp 672d9 0371 673b1: 41 bc f4 ff ff ff mov $0xfffffff4,%r12d 0377 673b7: e9 80 fd ff ff jmp 6713c Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/Yi9gOW9f1GGwwUD6@hirez.programming.kicks-ass.net --- include/linux/cred.h | 2 +- kernel/cred.c | 2 +- tools/objtool/check.c | 1 + 3 files changed, 3 insertions(+), 2 deletions(-) (limited to 'kernel') diff --git a/include/linux/cred.h b/include/linux/cred.h index fcbc6885cc09..9ed9232af934 100644 --- a/include/linux/cred.h +++ b/include/linux/cred.h @@ -176,7 +176,7 @@ extern int set_cred_ucounts(struct cred *); * check for validity of credentials */ #ifdef CONFIG_DEBUG_CREDENTIALS -extern void __invalid_creds(const struct cred *, const char *, unsigned); +extern void __noreturn __invalid_creds(const struct cred *, const char *, unsigned); extern void __validate_process_creds(struct task_struct *, const char *, unsigned); diff --git a/kernel/cred.c b/kernel/cred.c index 933155c96922..e10c15f51c1f 100644 --- a/kernel/cred.c +++ b/kernel/cred.c @@ -870,7 +870,7 @@ static void dump_invalid_creds(const struct cred *cred, const char *label, /* * report use of invalid credentials */ -void __invalid_creds(const struct cred *cred, const char *file, unsigned line) +void __noreturn __invalid_creds(const struct cred *cred, const char *file, unsigned line) { printk(KERN_ERR "CRED: Invalid credentials\n"); printk(KERN_ERR "CRED: At %s:%u\n", file, line); diff --git a/tools/objtool/check.c b/tools/objtool/check.c index 9896562350a8..0c857e74c852 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -183,6 +183,7 @@ static bool __dead_end_function(struct objtool_file *file, struct symbol *func, "cpu_bringup_and_idle", "do_group_exit", "stop_this_cpu", + "__invalid_creds", }; if (!func) -- cgit v1.2.3