From 79cd2a11224eab86d6673fe8a11d2046ae9d2757 Mon Sep 17 00:00:00 2001 From: Petr Pavlu Date: Tue, 11 Jul 2023 11:19:51 +0200 Subject: x86/retpoline,kprobes: Fix position of thunk sections with CONFIG_LTO_CLANG The linker script arch/x86/kernel/vmlinux.lds.S matches the thunk sections ".text.__x86.*" from arch/x86/lib/retpoline.S as follows: .text { [...] TEXT_TEXT [...] __indirect_thunk_start = .; *(.text.__x86.*) __indirect_thunk_end = .; [...] } Macro TEXT_TEXT references TEXT_MAIN which normally expands to only ".text". However, with CONFIG_LTO_CLANG, TEXT_MAIN becomes ".text .text.[0-9a-zA-Z_]*" which wrongly matches also the thunk sections. The output layout is then different than expected. For instance, the currently defined range [__indirect_thunk_start, __indirect_thunk_end] becomes empty. Prevent the problem by using ".." as the first separator, for example, ".text..__x86.indirect_thunk". This pattern is utilized by other explicit section names which start with one of the standard prefixes, such as ".text" or ".data", and that need to be individually selected in the linker script. [ nathan: Fix conflicts with SRSO and fold in fix issue brought up by Andrew Cooper in post-review: https://lore.kernel.org/20230803230323.1478869-1-andrew.cooper3@citrix.com ] Fixes: dc5723b02e52 ("kbuild: add support for Clang LTO") Signed-off-by: Petr Pavlu Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Nathan Chancellor Signed-off-by: Borislav Petkov (AMD) Link: https://lore.kernel.org/r/20230711091952.27944-2-petr.pavlu@suse.com --- tools/objtool/check.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/objtool/check.c b/tools/objtool/check.c index 8936a05f0e5a..e2ee10ce7703 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -389,7 +389,7 @@ static int decode_instructions(struct objtool_file *file) if (!strcmp(sec->name, ".noinstr.text") || !strcmp(sec->name, ".entry.text") || !strcmp(sec->name, ".cpuidle.text") || - !strncmp(sec->name, ".text.__x86.", 12)) + !strncmp(sec->name, ".text..__x86.", 13)) sec->noinstr = true; /* -- cgit v1.2.3 From 833fd800bf56b74d39d71d3f5936dffb3e0409c6 Mon Sep 17 00:00:00 2001 From: Petr Pavlu Date: Tue, 11 Jul 2023 11:19:52 +0200 Subject: x86/retpoline,kprobes: Skip optprobe check for indirect jumps with retpolines and IBT The kprobes optimization check can_optimize() calls insn_is_indirect_jump() to detect indirect jump instructions in a target function. If any is found, creating an optprobe is disallowed in the function because the jump could be from a jump table and could potentially land in the middle of the target optprobe. With retpolines, insn_is_indirect_jump() additionally looks for calls to indirect thunks which the compiler potentially used to replace original jumps. This extra check is however unnecessary because jump tables are disabled when the kernel is built with retpolines. The same is currently the case with IBT. Based on this observation, remove the logic to look for calls to indirect thunks and skip the check for indirect jumps altogether if the kernel is built with retpolines or IBT. Remove subsequently the symbols __indirect_thunk_start and __indirect_thunk_end which are no longer needed. Dropping this logic indirectly fixes a problem where the range [__indirect_thunk_start, __indirect_thunk_end] wrongly included also the return thunk. It caused that machines which used the return thunk as a mitigation and didn't have it patched by any alternative ended up not being able to use optprobes in any regular function. Fixes: 0b53c374b9ef ("x86/retpoline: Use -mfunction-return") Suggested-by: Peter Zijlstra (Intel) Suggested-by: Masami Hiramatsu (Google) Signed-off-by: Petr Pavlu Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Borislav Petkov (AMD) Acked-by: Masami Hiramatsu (Google) Link: https://lore.kernel.org/r/20230711091952.27944-3-petr.pavlu@suse.com --- arch/x86/include/asm/nospec-branch.h | 3 --- arch/x86/kernel/kprobes/opt.c | 40 +++++++++++++++--------------------- arch/x86/kernel/vmlinux.lds.S | 2 -- tools/perf/util/thread-stack.c | 4 +--- 4 files changed, 17 insertions(+), 32 deletions(-) (limited to 'tools') diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index 3faf044569a5..e50db53a4cb9 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -478,9 +478,6 @@ enum ssb_mitigation { SPEC_STORE_BYPASS_SECCOMP, }; -extern char __indirect_thunk_start[]; -extern char __indirect_thunk_end[]; - static __always_inline void alternative_msr_write(unsigned int msr, u64 val, unsigned int feature) { diff --git a/arch/x86/kernel/kprobes/opt.c b/arch/x86/kernel/kprobes/opt.c index 57b0037d0a99..517821b48391 100644 --- a/arch/x86/kernel/kprobes/opt.c +++ b/arch/x86/kernel/kprobes/opt.c @@ -226,7 +226,7 @@ static int copy_optimized_instructions(u8 *dest, u8 *src, u8 *real) } /* Check whether insn is indirect jump */ -static int __insn_is_indirect_jump(struct insn *insn) +static int insn_is_indirect_jump(struct insn *insn) { return ((insn->opcode.bytes[0] == 0xff && (X86_MODRM_REG(insn->modrm.value) & 6) == 4) || /* Jump */ @@ -260,26 +260,6 @@ static int insn_jump_into_range(struct insn *insn, unsigned long start, int len) return (start <= target && target <= start + len); } -static int insn_is_indirect_jump(struct insn *insn) -{ - int ret = __insn_is_indirect_jump(insn); - -#ifdef CONFIG_RETPOLINE - /* - * Jump to x86_indirect_thunk_* is treated as an indirect jump. - * Note that even with CONFIG_RETPOLINE=y, the kernel compiled with - * older gcc may use indirect jump. So we add this check instead of - * replace indirect-jump check. - */ - if (!ret) - ret = insn_jump_into_range(insn, - (unsigned long)__indirect_thunk_start, - (unsigned long)__indirect_thunk_end - - (unsigned long)__indirect_thunk_start); -#endif - return ret; -} - /* Decode whole function to ensure any instructions don't jump into target */ static int can_optimize(unsigned long paddr) { @@ -334,9 +314,21 @@ static int can_optimize(unsigned long paddr) /* Recover address */ insn.kaddr = (void *)addr; insn.next_byte = (void *)(addr + insn.length); - /* Check any instructions don't jump into target */ - if (insn_is_indirect_jump(&insn) || - insn_jump_into_range(&insn, paddr + INT3_INSN_SIZE, + /* + * Check any instructions don't jump into target, indirectly or + * directly. + * + * The indirect case is present to handle a code with jump + * tables. When the kernel uses retpolines, the check should in + * theory additionally look for jumps to indirect thunks. + * However, the kernel built with retpolines or IBT has jump + * tables disabled so the check can be skipped altogether. + */ + if (!IS_ENABLED(CONFIG_RETPOLINE) && + !IS_ENABLED(CONFIG_X86_KERNEL_IBT) && + insn_is_indirect_jump(&insn)) + return 0; + if (insn_jump_into_range(&insn, paddr + INT3_INSN_SIZE, DISP32_SIZE)) return 0; addr += insn.length; diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S index dfb8783cb4c7..8e2a306e45b5 100644 --- a/arch/x86/kernel/vmlinux.lds.S +++ b/arch/x86/kernel/vmlinux.lds.S @@ -133,10 +133,8 @@ SECTIONS KPROBES_TEXT SOFTIRQENTRY_TEXT #ifdef CONFIG_RETPOLINE - __indirect_thunk_start = .; *(.text..__x86.indirect_thunk) *(.text..__x86.return_thunk) - __indirect_thunk_end = .; #endif STATIC_CALL_TEXT diff --git a/tools/perf/util/thread-stack.c b/tools/perf/util/thread-stack.c index 374d142e7390..c6a0a27b12c2 100644 --- a/tools/perf/util/thread-stack.c +++ b/tools/perf/util/thread-stack.c @@ -1038,9 +1038,7 @@ static int thread_stack__trace_end(struct thread_stack *ts, static bool is_x86_retpoline(const char *name) { - const char *p = strstr(name, "__x86_indirect_thunk_"); - - return p == name || !strcmp(name, "__indirect_thunk_start"); + return strstr(name, "__x86_indirect_thunk_") == name; } /* -- cgit v1.2.3 From 4ae68b26c3ab5a82aa271e6e9fc9b1a06e1d6b40 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 14 Aug 2023 13:44:29 +0200 Subject: objtool/x86: Fix SRSO mess Objtool --rethunk does two things: - it collects all (tail) call's of __x86_return_thunk and places them into .return_sites. These are typically compiler generated, but RET also emits this same. - it fudges the validation of the __x86_return_thunk symbol; because this symbol is inside another instruction, it can't actually find the instruction pointed to by the symbol offset and gets upset. Because these two things pertained to the same symbol, there was no pressing need to separate these two separate things. However, alas, along comes SRSO and more crazy things to deal with appeared. The SRSO patch itself added the following symbol names to identify as rethunk: 'srso_untrain_ret', 'srso_safe_ret' and '__ret' Where '__ret' is the old retbleed return thunk, 'srso_safe_ret' is a new similarly embedded return thunk, and 'srso_untrain_ret' is completely unrelated to anything the above does (and was only included because of that INT3 vs UD2 issue fixed previous). Clear things up by adding a second category for the embedded instruction thing. Fixes: fb3bd914b3ec ("x86/srso: Add a Speculative RAS Overflow mitigation") Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Borislav Petkov (AMD) Link: https://lore.kernel.org/r/20230814121148.704502245@infradead.org --- tools/objtool/arch/x86/decode.c | 11 +++++++---- tools/objtool/check.c | 24 ++++++++++++++++++++++-- tools/objtool/include/objtool/arch.h | 1 + tools/objtool/include/objtool/elf.h | 1 + 4 files changed, 31 insertions(+), 6 deletions(-) (limited to 'tools') diff --git a/tools/objtool/arch/x86/decode.c b/tools/objtool/arch/x86/decode.c index 2d51fa8da9e8..cba8a7be040e 100644 --- a/tools/objtool/arch/x86/decode.c +++ b/tools/objtool/arch/x86/decode.c @@ -824,8 +824,11 @@ bool arch_is_retpoline(struct symbol *sym) bool arch_is_rethunk(struct symbol *sym) { - return !strcmp(sym->name, "__x86_return_thunk") || - !strcmp(sym->name, "srso_untrain_ret") || - !strcmp(sym->name, "srso_safe_ret") || - !strcmp(sym->name, "__ret"); + return !strcmp(sym->name, "__x86_return_thunk"); +} + +bool arch_is_embedded_insn(struct symbol *sym) +{ + return !strcmp(sym->name, "__ret") || + !strcmp(sym->name, "srso_safe_ret"); } diff --git a/tools/objtool/check.c b/tools/objtool/check.c index e2ee10ce7703..191656ee9fbc 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -455,7 +455,7 @@ static int decode_instructions(struct objtool_file *file) return -1; } - if (func->return_thunk || func->alias != func) + if (func->embedded_insn || func->alias != func) continue; if (!find_insn(file, sec, func->offset)) { @@ -1288,16 +1288,33 @@ static int add_ignore_alternatives(struct objtool_file *file) return 0; } +/* + * Symbols that replace INSN_CALL_DYNAMIC, every (tail) call to such a symbol + * will be added to the .retpoline_sites section. + */ __weak bool arch_is_retpoline(struct symbol *sym) { return false; } +/* + * Symbols that replace INSN_RETURN, every (tail) call to such a symbol + * will be added to the .return_sites section. + */ __weak bool arch_is_rethunk(struct symbol *sym) { return false; } +/* + * Symbols that are embedded inside other instructions, because sometimes crazy + * code exists. These are mostly ignored for validation purposes. + */ +__weak bool arch_is_embedded_insn(struct symbol *sym) +{ + return false; +} + static struct reloc *insn_reloc(struct objtool_file *file, struct instruction *insn) { struct reloc *reloc; @@ -1583,7 +1600,7 @@ static int add_jump_destinations(struct objtool_file *file) * middle of another instruction. Objtool only * knows about the outer instruction. */ - if (sym && sym->return_thunk) { + if (sym && sym->embedded_insn) { add_return_call(file, insn, false); continue; } @@ -2502,6 +2519,9 @@ static int classify_symbols(struct objtool_file *file) if (arch_is_rethunk(func)) func->return_thunk = true; + if (arch_is_embedded_insn(func)) + func->embedded_insn = true; + if (arch_ftrace_match(func->name)) func->fentry = true; diff --git a/tools/objtool/include/objtool/arch.h b/tools/objtool/include/objtool/arch.h index 2b6d2ce4f9a5..0b303eba660e 100644 --- a/tools/objtool/include/objtool/arch.h +++ b/tools/objtool/include/objtool/arch.h @@ -90,6 +90,7 @@ int arch_decode_hint_reg(u8 sp_reg, int *base); bool arch_is_retpoline(struct symbol *sym); bool arch_is_rethunk(struct symbol *sym); +bool arch_is_embedded_insn(struct symbol *sym); int arch_rewrite_retpolines(struct objtool_file *file); diff --git a/tools/objtool/include/objtool/elf.h b/tools/objtool/include/objtool/elf.h index c532d70864dc..9f71e988eca4 100644 --- a/tools/objtool/include/objtool/elf.h +++ b/tools/objtool/include/objtool/elf.h @@ -66,6 +66,7 @@ struct symbol { u8 fentry : 1; u8 profiling_func : 1; u8 warned : 1; + u8 embedded_insn : 1; struct list_head pv_target; struct reloc *relocs; }; -- cgit v1.2.3 From d43490d0ab824023e11d0b57d0aeec17a6e0ca13 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 14 Aug 2023 13:44:31 +0200 Subject: x86/cpu: Clean up SRSO return thunk mess Use the existing configurable return thunk. There is absolute no justification for having created this __x86_return_thunk alternative. To clarify, the whole thing looks like: Zen3/4 does: srso_alias_untrain_ret: nop2 lfence jmp srso_alias_return_thunk int3 srso_alias_safe_ret: // aliasses srso_alias_untrain_ret just so add $8, %rsp ret int3 srso_alias_return_thunk: call srso_alias_safe_ret ud2 While Zen1/2 does: srso_untrain_ret: movabs $foo, %rax lfence call srso_safe_ret (jmp srso_return_thunk ?) int3 srso_safe_ret: // embedded in movabs instruction add $8,%rsp ret int3 srso_return_thunk: call srso_safe_ret ud2 While retbleed does: zen_untrain_ret: test $0xcc, %bl lfence jmp zen_return_thunk int3 zen_return_thunk: // embedded in the test instruction ret int3 Where Zen1/2 flush the BTB entry using the instruction decoder trick (test,movabs) Zen3/4 use BTB aliasing. SRSO adds a return sequence (srso_safe_ret()) which forces the function return instruction to speculate into a trap (UD2). This RET will then mispredict and execution will continue at the return site read from the top of the stack. Pick one of three options at boot (evey function can only ever return once). [ bp: Fixup commit message uarch details and add them in a comment in the code too. Add a comment about the srso_select_mitigation() dependency on retbleed_select_mitigation(). Add moar ifdeffery for 32-bit builds. Add a dummy srso_untrain_ret_alias() definition for 32-bit alternatives needing the symbol. ] Fixes: fb3bd914b3ec ("x86/srso: Add a Speculative RAS Overflow mitigation") Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Borislav Petkov (AMD) Link: https://lore.kernel.org/r/20230814121148.842775684@infradead.org --- arch/x86/include/asm/nospec-branch.h | 5 ++++ arch/x86/kernel/cpu/bugs.c | 15 ++++++++-- arch/x86/kernel/vmlinux.lds.S | 2 +- arch/x86/lib/retpoline.S | 58 ++++++++++++++++++++++++++---------- tools/objtool/arch/x86/decode.c | 2 +- 5 files changed, 62 insertions(+), 20 deletions(-) (limited to 'tools') diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index b3625cceb2f0..5ed78ad9f274 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -347,9 +347,14 @@ extern void __x86_return_thunk(void); static inline void __x86_return_thunk(void) {} #endif +extern void zen_return_thunk(void); +extern void srso_return_thunk(void); +extern void srso_alias_return_thunk(void); + extern void zen_untrain_ret(void); extern void srso_untrain_ret(void); extern void srso_untrain_ret_alias(void); + extern void entry_ibpb(void); extern void (*x86_return_thunk)(void); diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 3bc0d149eacd..56cf250c13f6 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -167,6 +167,11 @@ void __init cpu_select_mitigations(void) md_clear_select_mitigation(); srbds_select_mitigation(); l1d_flush_select_mitigation(); + + /* + * srso_select_mitigation() depends and must run after + * retbleed_select_mitigation(). + */ srso_select_mitigation(); gds_select_mitigation(); } @@ -1037,6 +1042,9 @@ do_cmd_auto: setup_force_cpu_cap(X86_FEATURE_RETHUNK); setup_force_cpu_cap(X86_FEATURE_UNRET); + if (IS_ENABLED(CONFIG_RETHUNK)) + x86_return_thunk = zen_return_thunk; + if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD && boot_cpu_data.x86_vendor != X86_VENDOR_HYGON) pr_err(RETBLEED_UNTRAIN_MSG); @@ -2453,10 +2461,13 @@ static void __init srso_select_mitigation(void) */ setup_force_cpu_cap(X86_FEATURE_RETHUNK); - if (boot_cpu_data.x86 == 0x19) + if (boot_cpu_data.x86 == 0x19) { setup_force_cpu_cap(X86_FEATURE_SRSO_ALIAS); - else + x86_return_thunk = srso_alias_return_thunk; + } else { setup_force_cpu_cap(X86_FEATURE_SRSO); + x86_return_thunk = srso_return_thunk; + } srso_mitigation = SRSO_MITIGATION_SAFE_RET; } else { pr_err("WARNING: kernel not compiled with CPU_SRSO.\n"); diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S index 8e2a306e45b5..d3b02d67321b 100644 --- a/arch/x86/kernel/vmlinux.lds.S +++ b/arch/x86/kernel/vmlinux.lds.S @@ -521,7 +521,7 @@ INIT_PER_CPU(irq_stack_backing_store); #endif #ifdef CONFIG_RETHUNK -. = ASSERT((__ret & 0x3f) == 0, "__ret not cacheline-aligned"); +. = ASSERT((zen_return_thunk & 0x3f) == 0, "zen_return_thunk not cacheline-aligned"); . = ASSERT((srso_safe_ret & 0x3f) == 0, "srso_safe_ret not cacheline-aligned"); #endif diff --git a/arch/x86/lib/retpoline.S b/arch/x86/lib/retpoline.S index a478eb5bbf62..7df8582fb64e 100644 --- a/arch/x86/lib/retpoline.S +++ b/arch/x86/lib/retpoline.S @@ -151,22 +151,27 @@ SYM_CODE_END(__x86_indirect_jump_thunk_array) .section .text..__x86.rethunk_untrain SYM_START(srso_untrain_ret_alias, SYM_L_GLOBAL, SYM_A_NONE) + UNWIND_HINT_FUNC ANNOTATE_NOENDBR ASM_NOP2 lfence - jmp __x86_return_thunk + jmp srso_alias_return_thunk SYM_FUNC_END(srso_untrain_ret_alias) __EXPORT_THUNK(srso_untrain_ret_alias) .section .text..__x86.rethunk_safe +#else +/* dummy definition for alternatives */ +SYM_START(srso_untrain_ret_alias, SYM_L_GLOBAL, SYM_A_NONE) + ANNOTATE_UNRET_SAFE + ret + int3 +SYM_FUNC_END(srso_untrain_ret_alias) #endif -/* Needs a definition for the __x86_return_thunk alternative below. */ SYM_START(srso_safe_ret_alias, SYM_L_GLOBAL, SYM_A_NONE) -#ifdef CONFIG_CPU_SRSO lea 8(%_ASM_SP), %_ASM_SP UNWIND_HINT_FUNC -#endif ANNOTATE_UNRET_SAFE ret int3 @@ -174,9 +179,16 @@ SYM_FUNC_END(srso_safe_ret_alias) .section .text..__x86.return_thunk +SYM_CODE_START(srso_alias_return_thunk) + UNWIND_HINT_FUNC + ANNOTATE_NOENDBR + call srso_safe_ret_alias + ud2 +SYM_CODE_END(srso_alias_return_thunk) + /* * Safety details here pertain to the AMD Zen{1,2} microarchitecture: - * 1) The RET at __x86_return_thunk must be on a 64 byte boundary, for + * 1) The RET at zen_return_thunk must be on a 64 byte boundary, for * alignment within the BTB. * 2) The instruction at zen_untrain_ret must contain, and not * end with, the 0xc3 byte of the RET. @@ -184,7 +196,7 @@ SYM_FUNC_END(srso_safe_ret_alias) * from re-poisioning the BTB prediction. */ .align 64 - .skip 64 - (__ret - zen_untrain_ret), 0xcc + .skip 64 - (zen_return_thunk - zen_untrain_ret), 0xcc SYM_START(zen_untrain_ret, SYM_L_GLOBAL, SYM_A_NONE) ANNOTATE_NOENDBR /* @@ -192,16 +204,16 @@ SYM_START(zen_untrain_ret, SYM_L_GLOBAL, SYM_A_NONE) * * TEST $0xcc, %bl * LFENCE - * JMP __x86_return_thunk + * JMP zen_return_thunk * * Executing the TEST instruction has a side effect of evicting any BTB * prediction (potentially attacker controlled) attached to the RET, as - * __x86_return_thunk + 1 isn't an instruction boundary at the moment. + * zen_return_thunk + 1 isn't an instruction boundary at the moment. */ .byte 0xf6 /* - * As executed from __x86_return_thunk, this is a plain RET. + * As executed from zen_return_thunk, this is a plain RET. * * As part of the TEST above, RET is the ModRM byte, and INT3 the imm8. * @@ -213,13 +225,13 @@ SYM_START(zen_untrain_ret, SYM_L_GLOBAL, SYM_A_NONE) * With SMT enabled and STIBP active, a sibling thread cannot poison * RET's prediction to a type of its choice, but can evict the * prediction due to competitive sharing. If the prediction is - * evicted, __x86_return_thunk will suffer Straight Line Speculation + * evicted, zen_return_thunk will suffer Straight Line Speculation * which will be contained safely by the INT3. */ -SYM_INNER_LABEL(__ret, SYM_L_GLOBAL) +SYM_INNER_LABEL(zen_return_thunk, SYM_L_GLOBAL) ret int3 -SYM_CODE_END(__ret) +SYM_CODE_END(zen_return_thunk) /* * Ensure the TEST decoding / BTB invalidation is complete. @@ -230,7 +242,7 @@ SYM_CODE_END(__ret) * Jump back and execute the RET in the middle of the TEST instruction. * INT3 is for SLS protection. */ - jmp __ret + jmp zen_return_thunk int3 SYM_FUNC_END(zen_untrain_ret) __EXPORT_THUNK(zen_untrain_ret) @@ -251,11 +263,18 @@ SYM_START(srso_untrain_ret, SYM_L_GLOBAL, SYM_A_NONE) ANNOTATE_NOENDBR .byte 0x48, 0xb8 +/* + * This forces the function return instruction to speculate into a trap + * (UD2 in srso_return_thunk() below). This RET will then mispredict + * and execution will continue at the return site read from the top of + * the stack. + */ SYM_INNER_LABEL(srso_safe_ret, SYM_L_GLOBAL) lea 8(%_ASM_SP), %_ASM_SP ret int3 int3 + /* end of movabs */ lfence call srso_safe_ret ud2 @@ -263,12 +282,19 @@ SYM_CODE_END(srso_safe_ret) SYM_FUNC_END(srso_untrain_ret) __EXPORT_THUNK(srso_untrain_ret) -SYM_CODE_START(__x86_return_thunk) +SYM_CODE_START(srso_return_thunk) UNWIND_HINT_FUNC ANNOTATE_NOENDBR - ALTERNATIVE_2 "jmp __ret", "call srso_safe_ret", X86_FEATURE_SRSO, \ - "call srso_safe_ret_alias", X86_FEATURE_SRSO_ALIAS + call srso_safe_ret ud2 +SYM_CODE_END(srso_return_thunk) + +SYM_CODE_START(__x86_return_thunk) + UNWIND_HINT_FUNC + ANNOTATE_NOENDBR + ANNOTATE_UNRET_SAFE + ret + int3 SYM_CODE_END(__x86_return_thunk) EXPORT_SYMBOL(__x86_return_thunk) diff --git a/tools/objtool/arch/x86/decode.c b/tools/objtool/arch/x86/decode.c index cba8a7be040e..c55f3bb59b31 100644 --- a/tools/objtool/arch/x86/decode.c +++ b/tools/objtool/arch/x86/decode.c @@ -829,6 +829,6 @@ bool arch_is_rethunk(struct symbol *sym) bool arch_is_embedded_insn(struct symbol *sym) { - return !strcmp(sym->name, "__ret") || + return !strcmp(sym->name, "zen_return_thunk") || !strcmp(sym->name, "srso_safe_ret"); } -- cgit v1.2.3 From d025b7bac07a6e90b6b98b487f88854ad9247c39 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 14 Aug 2023 13:44:32 +0200 Subject: x86/cpu: Rename original retbleed methods Rename the original retbleed return thunk and untrain_ret to retbleed_return_thunk() and retbleed_untrain_ret(). No functional changes. Suggested-by: Josh Poimboeuf Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Borislav Petkov (AMD) Link: https://lore.kernel.org/r/20230814121148.909378169@infradead.org --- arch/x86/include/asm/nospec-branch.h | 8 ++++---- arch/x86/kernel/cpu/bugs.c | 2 +- arch/x86/kernel/vmlinux.lds.S | 2 +- arch/x86/lib/retpoline.S | 30 +++++++++++++++--------------- tools/objtool/arch/x86/decode.c | 2 +- tools/objtool/check.c | 2 +- 6 files changed, 23 insertions(+), 23 deletions(-) (limited to 'tools') diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index 5ed78ad9f274..8a0d4c5f4cb7 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -272,7 +272,7 @@ .endm #ifdef CONFIG_CPU_UNRET_ENTRY -#define CALL_ZEN_UNTRAIN_RET "call zen_untrain_ret" +#define CALL_ZEN_UNTRAIN_RET "call retbleed_untrain_ret" #else #define CALL_ZEN_UNTRAIN_RET "" #endif @@ -282,7 +282,7 @@ * return thunk isn't mapped into the userspace tables (then again, AMD * typically has NO_MELTDOWN). * - * While zen_untrain_ret() doesn't clobber anything but requires stack, + * While retbleed_untrain_ret() doesn't clobber anything but requires stack, * entry_ibpb() will clobber AX, CX, DX. * * As such, this must be placed after every *SWITCH_TO_KERNEL_CR3 at a point @@ -347,11 +347,11 @@ extern void __x86_return_thunk(void); static inline void __x86_return_thunk(void) {} #endif -extern void zen_return_thunk(void); +extern void retbleed_return_thunk(void); extern void srso_return_thunk(void); extern void srso_alias_return_thunk(void); -extern void zen_untrain_ret(void); +extern void retbleed_untrain_ret(void); extern void srso_untrain_ret(void); extern void srso_untrain_ret_alias(void); diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 56cf250c13f6..bbbbda981bab 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -1043,7 +1043,7 @@ do_cmd_auto: setup_force_cpu_cap(X86_FEATURE_UNRET); if (IS_ENABLED(CONFIG_RETHUNK)) - x86_return_thunk = zen_return_thunk; + x86_return_thunk = retbleed_return_thunk; if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD && boot_cpu_data.x86_vendor != X86_VENDOR_HYGON) diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S index d3b02d67321b..7c0e2b4fc344 100644 --- a/arch/x86/kernel/vmlinux.lds.S +++ b/arch/x86/kernel/vmlinux.lds.S @@ -521,7 +521,7 @@ INIT_PER_CPU(irq_stack_backing_store); #endif #ifdef CONFIG_RETHUNK -. = ASSERT((zen_return_thunk & 0x3f) == 0, "zen_return_thunk not cacheline-aligned"); +. = ASSERT((retbleed_return_thunk & 0x3f) == 0, "retbleed_return_thunk not cacheline-aligned"); . = ASSERT((srso_safe_ret & 0x3f) == 0, "srso_safe_ret not cacheline-aligned"); #endif diff --git a/arch/x86/lib/retpoline.S b/arch/x86/lib/retpoline.S index 7df8582fb64e..adabd07eab99 100644 --- a/arch/x86/lib/retpoline.S +++ b/arch/x86/lib/retpoline.S @@ -188,32 +188,32 @@ SYM_CODE_END(srso_alias_return_thunk) /* * Safety details here pertain to the AMD Zen{1,2} microarchitecture: - * 1) The RET at zen_return_thunk must be on a 64 byte boundary, for + * 1) The RET at retbleed_return_thunk must be on a 64 byte boundary, for * alignment within the BTB. - * 2) The instruction at zen_untrain_ret must contain, and not + * 2) The instruction at retbleed_untrain_ret must contain, and not * end with, the 0xc3 byte of the RET. * 3) STIBP must be enabled, or SMT disabled, to prevent the sibling thread * from re-poisioning the BTB prediction. */ .align 64 - .skip 64 - (zen_return_thunk - zen_untrain_ret), 0xcc -SYM_START(zen_untrain_ret, SYM_L_GLOBAL, SYM_A_NONE) + .skip 64 - (retbleed_return_thunk - retbleed_untrain_ret), 0xcc +SYM_START(retbleed_untrain_ret, SYM_L_GLOBAL, SYM_A_NONE) ANNOTATE_NOENDBR /* - * As executed from zen_untrain_ret, this is: + * As executed from retbleed_untrain_ret, this is: * * TEST $0xcc, %bl * LFENCE - * JMP zen_return_thunk + * JMP retbleed_return_thunk * * Executing the TEST instruction has a side effect of evicting any BTB * prediction (potentially attacker controlled) attached to the RET, as - * zen_return_thunk + 1 isn't an instruction boundary at the moment. + * retbleed_return_thunk + 1 isn't an instruction boundary at the moment. */ .byte 0xf6 /* - * As executed from zen_return_thunk, this is a plain RET. + * As executed from retbleed_return_thunk, this is a plain RET. * * As part of the TEST above, RET is the ModRM byte, and INT3 the imm8. * @@ -225,13 +225,13 @@ SYM_START(zen_untrain_ret, SYM_L_GLOBAL, SYM_A_NONE) * With SMT enabled and STIBP active, a sibling thread cannot poison * RET's prediction to a type of its choice, but can evict the * prediction due to competitive sharing. If the prediction is - * evicted, zen_return_thunk will suffer Straight Line Speculation + * evicted, retbleed_return_thunk will suffer Straight Line Speculation * which will be contained safely by the INT3. */ -SYM_INNER_LABEL(zen_return_thunk, SYM_L_GLOBAL) +SYM_INNER_LABEL(retbleed_return_thunk, SYM_L_GLOBAL) ret int3 -SYM_CODE_END(zen_return_thunk) +SYM_CODE_END(retbleed_return_thunk) /* * Ensure the TEST decoding / BTB invalidation is complete. @@ -242,13 +242,13 @@ SYM_CODE_END(zen_return_thunk) * Jump back and execute the RET in the middle of the TEST instruction. * INT3 is for SLS protection. */ - jmp zen_return_thunk + jmp retbleed_return_thunk int3 -SYM_FUNC_END(zen_untrain_ret) -__EXPORT_THUNK(zen_untrain_ret) +SYM_FUNC_END(retbleed_untrain_ret) +__EXPORT_THUNK(retbleed_untrain_ret) /* - * SRSO untraining sequence for Zen1/2, similar to zen_untrain_ret() + * SRSO untraining sequence for Zen1/2, similar to retbleed_untrain_ret() * above. On kernel entry, srso_untrain_ret() is executed which is a * * movabs $0xccccc30824648d48,%rax diff --git a/tools/objtool/arch/x86/decode.c b/tools/objtool/arch/x86/decode.c index c55f3bb59b31..c0f25d00181e 100644 --- a/tools/objtool/arch/x86/decode.c +++ b/tools/objtool/arch/x86/decode.c @@ -829,6 +829,6 @@ bool arch_is_rethunk(struct symbol *sym) bool arch_is_embedded_insn(struct symbol *sym) { - return !strcmp(sym->name, "zen_return_thunk") || + return !strcmp(sym->name, "retbleed_return_thunk") || !strcmp(sym->name, "srso_safe_ret"); } diff --git a/tools/objtool/check.c b/tools/objtool/check.c index 191656ee9fbc..7a9aaf400873 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -1593,7 +1593,7 @@ static int add_jump_destinations(struct objtool_file *file) struct symbol *sym = find_symbol_by_offset(dest_sec, dest_off); /* - * This is a special case for zen_untrain_ret(). + * This is a special case for retbleed_untrain_ret(). * It jumps to __x86_return_thunk(), but objtool * can't find the thunk's starting RET * instruction, because the RET is also in the -- cgit v1.2.3 From dbf46008775516f7f25c95b7760041c286299783 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 16 Aug 2023 13:59:21 +0200 Subject: objtool/x86: Fixup frame-pointer vs rethunk For stack-validation of a frame-pointer build, objtool validates that every CALL instruction is preceded by a frame-setup. The new SRSO return thunks violate this with their RSB stuffing trickery. Extend the __fentry__ exception to also cover the embedded_insn case used for this. This cures: vmlinux.o: warning: objtool: srso_untrain_ret+0xd: call without frame pointer save/setup Fixes: 4ae68b26c3ab ("objtool/x86: Fix SRSO mess") Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Borislav Petkov (AMD) Acked-by: Josh Poimboeuf Link: https://lore.kernel.org/r/20230816115921.GH980931@hirez.programming.kicks-ass.net --- tools/objtool/check.c | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) (limited to 'tools') diff --git a/tools/objtool/check.c b/tools/objtool/check.c index 7a9aaf400873..1384090530db 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -2650,12 +2650,17 @@ static int decode_sections(struct objtool_file *file) return 0; } -static bool is_fentry_call(struct instruction *insn) +static bool is_special_call(struct instruction *insn) { - if (insn->type == INSN_CALL && - insn_call_dest(insn) && - insn_call_dest(insn)->fentry) - return true; + if (insn->type == INSN_CALL) { + struct symbol *dest = insn_call_dest(insn); + + if (!dest) + return false; + + if (dest->fentry || dest->embedded_insn) + return true; + } return false; } @@ -3656,7 +3661,7 @@ static int validate_branch(struct objtool_file *file, struct symbol *func, if (ret) return ret; - if (opts.stackval && func && !is_fentry_call(insn) && + if (opts.stackval && func && !is_special_call(insn) && !has_valid_stack_frame(&state)) { WARN_INSN(insn, "call without frame pointer save/setup"); return 1; -- cgit v1.2.3