diff options
95 files changed, 1454 insertions, 326 deletions
diff --git a/arch/powerpc/include/asm/livepatch.h b/arch/powerpc/include/asm/livepatch.h index 6f10de6af6e3..fd65931a739f 100644 --- a/arch/powerpc/include/asm/livepatch.h +++ b/arch/powerpc/include/asm/livepatch.h @@ -17,17 +17,6 @@ static inline void klp_arch_set_pc(struct ftrace_regs *fregs, unsigned long ip) ftrace_instruction_pointer_set(fregs, ip); } -#define klp_get_ftrace_location klp_get_ftrace_location -static inline unsigned long klp_get_ftrace_location(unsigned long faddr) -{ - /* - * Live patch works on PPC32 and only with -mprofile-kernel on PPC64. In - * both cases, the ftrace location is always within the first 16 bytes. - */ - return ftrace_location_range(faddr, faddr + 16); -} -#endif /* CONFIG_LIVEPATCH */ - #ifdef CONFIG_LIVEPATCH_64 static inline void klp_init_thread_info(struct task_struct *p) { diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c index 9a492fdec1df..7dae0b01abfb 100644 --- a/arch/powerpc/kernel/kprobes.c +++ b/arch/powerpc/kernel/kprobes.c @@ -105,6 +105,27 @@ kprobe_opcode_t *kprobe_lookup_name(const char *name, unsigned int offset) return addr; } +static bool arch_kprobe_on_func_entry(unsigned long offset) +{ +#ifdef PPC64_ELF_ABI_v2 +#ifdef CONFIG_KPROBES_ON_FTRACE + return offset <= 16; +#else + return offset <= 8; +#endif +#else + return !offset; +#endif +} + +/* XXX try and fold the magic of kprobe_lookup_name() in this */ +kprobe_opcode_t *arch_adjust_kprobe_addr(unsigned long addr, unsigned long offset, + bool *on_func_entry) +{ + *on_func_entry = arch_kprobe_on_func_entry(offset); + return (kprobe_opcode_t *)(addr + offset); +} + void *alloc_insn_page(void) { void *page; @@ -218,19 +239,6 @@ static nokprobe_inline void set_current_kprobe(struct kprobe *p, struct pt_regs kcb->kprobe_saved_msr = regs->msr; } -bool arch_kprobe_on_func_entry(unsigned long offset) -{ -#ifdef PPC64_ELF_ABI_v2 -#ifdef CONFIG_KPROBES_ON_FTRACE - return offset <= 16; -#else - return offset <= 8; -#endif -#else - return !offset; -#endif -} - void arch_prepare_kretprobe(struct kretprobe_instance *ri, struct pt_regs *regs) { ri->ret_addr = (kprobe_opcode_t *)regs->link; diff --git a/arch/um/kernel/um_arch.c b/arch/um/kernel/um_arch.c index abceeabe29b9..0760e24f2eba 100644 --- a/arch/um/kernel/um_arch.c +++ b/arch/um/kernel/um_arch.c @@ -424,6 +424,10 @@ void __init check_bugs(void) os_check_bugs(); } +void apply_ibt_endbr(s32 *start, s32 *end) +{ +} + void apply_retpolines(s32 *start, s32 *end) { } diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 10e4c332e15d..7340d9f01b62 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -1842,6 +1842,36 @@ config X86_UMIP specific cases in protected and virtual-8086 modes. Emulated results are dummy. +config CC_HAS_IBT + # GCC >= 9 and binutils >= 2.29 + # Retpoline check to work around https://gcc.gnu.org/bugzilla/show_bug.cgi?id=93654 + # Clang/LLVM >= 14 + # https://github.com/llvm/llvm-project/commit/e0b89df2e0f0130881bf6c39bf31d7f6aac00e0f + # https://github.com/llvm/llvm-project/commit/dfcf69770bc522b9e411c66454934a37c1f35332 + def_bool ((CC_IS_GCC && $(cc-option, -fcf-protection=branch -mindirect-branch-register)) || \ + (CC_IS_CLANG && CLANG_VERSION >= 140000)) && \ + $(as-instr,endbr64) + +config X86_KERNEL_IBT + prompt "Indirect Branch Tracking" + bool + depends on X86_64 && CC_HAS_IBT && STACK_VALIDATION + # https://github.com/llvm/llvm-project/commit/9d7001eba9c4cb311e03cd8cdc231f9e579f2d0f + depends on !LD_IS_LLD || LLD_VERSION >= 140000 + help + Build the kernel with support for Indirect Branch Tracking, a + hardware support course-grain forward-edge Control Flow Integrity + protection. It enforces that all indirect calls must land on + an ENDBR instruction, as such, the compiler will instrument the + code with them to make this happen. + + In addition to building the kernel with IBT, seal all functions that + are not indirect call targets, avoiding them ever becomming one. + + This requires LTO like objtool runs and will slow down the build. It + does significantly reduce the number of ENDBR instructions in the + kernel image. + config X86_INTEL_MEMORY_PROTECTION_KEYS prompt "Memory Protection Keys" def_bool y @@ -2815,19 +2845,20 @@ config IA32_AOUT help Support old a.out binaries in the 32bit emulation. -config X86_X32 +config X86_X32_ABI bool "x32 ABI for 64-bit mode" depends on X86_64 + # llvm-objcopy does not convert x86_64 .note.gnu.property or + # compressed debug sections to x86_x32 properly: + # https://github.com/ClangBuiltLinux/linux/issues/514 + # https://github.com/ClangBuiltLinux/linux/issues/1141 + depends on $(success,$(OBJCOPY) --version | head -n1 | grep -qv llvm) help Include code to run binaries for the x32 native 32-bit ABI for 64-bit processors. An x32 process gets access to the full 64-bit register file and wide data path while leaving pointers at 32 bits for smaller memory footprint. - You will need a recent binutils (2.22 or later) with - elf32_x86_64 support enabled to compile a kernel with this - option set. - config COMPAT_32 def_bool y depends on IA32_EMULATION || X86_32 @@ -2836,7 +2867,7 @@ config COMPAT_32 config COMPAT def_bool y - depends on IA32_EMULATION || X86_X32 + depends on IA32_EMULATION || X86_X32_ABI if COMPAT config COMPAT_FOR_U64_ALIGNMENT diff --git a/arch/x86/Makefile b/arch/x86/Makefile index e84cdd409b64..63d50f65b828 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile @@ -36,7 +36,7 @@ endif # How to compile the 16-bit code. Note we always compile for -march=i386; # that way we can complain to the user if the CPU is insufficient. -REALMODE_CFLAGS := -m16 -g -Os -DDISABLE_BRANCH_PROFILING \ +REALMODE_CFLAGS := -m16 -g -Os -DDISABLE_BRANCH_PROFILING -D__DISABLE_EXPORTS \ -Wall -Wstrict-prototypes -march=i386 -mregparm=3 \ -fno-strict-aliasing -fomit-frame-pointer -fno-pic \ -mno-mmx -mno-sse $(call cc-option,-fcf-protection=none) @@ -62,8 +62,20 @@ export BITS # KBUILD_CFLAGS += -mno-sse -mno-mmx -mno-sse2 -mno-3dnow -mno-avx -# Intel CET isn't enabled in the kernel +ifeq ($(CONFIG_X86_KERNEL_IBT),y) +# +# Kernel IBT has S_CET.NOTRACK_EN=0, as such the compilers must not generate +# NOTRACK prefixes. Current generation compilers unconditionally employ NOTRACK +# for jump-tables, as such, disable jump-tables for now. +# +# (jump-tables are implicitly disabled by RETPOLINE) +# +# https://gcc.gnu.org/bugzilla/show_bug.cgi?id=104816 +# +KBUILD_CFLAGS += $(call cc-option,-fcf-protection=branch -fno-jump-tables) +else KBUILD_CFLAGS += $(call cc-option,-fcf-protection=none) +endif ifeq ($(CONFIG_X86_32),y) BITS := 32 @@ -140,22 +152,6 @@ else KBUILD_CFLAGS += -mcmodel=kernel endif -ifdef CONFIG_X86_X32 - x32_ld_ok := $(call try-run,\ - /bin/echo -e '1: .quad 1b' | \ - $(CC) $(KBUILD_AFLAGS) -c -x assembler -o "$$TMP" - && \ - $(OBJCOPY) -O elf32-x86-64 "$$TMP" "$$TMP.o" && \ - $(LD) -m elf32_x86_64 "$$TMP.o" -o "$$TMP",y,n) - ifeq ($(x32_ld_ok),y) - CONFIG_X86_X32_ABI := y - KBUILD_AFLAGS += -DCONFIG_X86_X32_ABI - KBUILD_CFLAGS += -DCONFIG_X86_X32_ABI - else - $(warning CONFIG_X86_X32 enabled but no binutils support) - endif -endif -export CONFIG_X86_X32_ABI - # # If the function graph tracer is used with mcount instead of fentry, # '-maccumulate-outgoing-args' is needed to prevent a GCC bug diff --git a/arch/x86/crypto/crc32c-pcl-intel-asm_64.S b/arch/x86/crypto/crc32c-pcl-intel-asm_64.S index 80c0d22fc42c..ec35915f0901 100644 --- a/arch/x86/crypto/crc32c-pcl-intel-asm_64.S +++ b/arch/x86/crypto/crc32c-pcl-intel-asm_64.S @@ -195,6 +195,7 @@ crc_array: .altmacro LABEL crc_ %i .noaltmacro + ENDBR crc32q -i*8(block_0), crc_init crc32q -i*8(block_1), crc1 crc32q -i*8(block_2), crc2 @@ -204,6 +205,7 @@ LABEL crc_ %i .altmacro LABEL crc_ %i .noaltmacro + ENDBR crc32q -i*8(block_0), crc_init crc32q -i*8(block_1), crc1 # SKIP crc32 -i*8(block_2), crc2 ; Don't do this one yet @@ -237,6 +239,7 @@ LABEL crc_ %i ################################################################ LABEL crc_ 0 + ENDBR mov tmp, len cmp $128*24, tmp jae full_block diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index 466df3e50276..4faac48ebec5 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -86,6 +86,7 @@ SYM_CODE_START(entry_SYSCALL_64) UNWIND_HINT_EMPTY + ENDBR swapgs /* tss.sp2 is scratch space. */ @@ -94,6 +95,7 @@ SYM_CODE_START(entry_SYSCALL_64) movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp SYM_INNER_LABEL(entry_SYSCALL_64_safe_stack, SYM_L_GLOBAL) + ANNOTATE_NOENDBR /* Construct struct pt_regs on stack */ pushq $__USER_DS /* pt_regs->ss */ @@ -276,6 +278,7 @@ SYM_FUNC_END(__switch_to_asm) .pushsection .text, "ax" SYM_CODE_START(ret_from_fork) UNWIND_HINT_EMPTY + ANNOTATE_NOENDBR // copy_thread movq %rax, %rdi call schedule_tail /* rdi: 'prev' task parameter */ @@ -350,6 +353,7 @@ SYM_CODE_END(ret_from_fork) .macro idtentry vector asmsym cfunc has_error_code:req SYM_CODE_START(\asmsym) UNWIND_HINT_IRET_REGS offset=\has_error_code*8 + ENDBR ASM_CLAC .if \has_error_code == 0 @@ -417,6 +421,7 @@ SYM_CODE_END(\asmsym) .macro idtentry_mce_db vector asmsym cfunc SYM_CODE_START(\asmsym) UNWIND_HINT_IRET_REGS + ENDBR ASM_CLAC pushq $-1 /* ORIG_RAX: no syscall to restart */ @@ -472,6 +477,7 @@ SYM_CODE_END(\asmsym) .macro idtentry_vc vector asmsym cfunc SYM_CODE_START(\asmsym) UNWIND_HINT_IRET_REGS + ENDBR ASM_CLAC /* @@ -533,6 +539,7 @@ SYM_CODE_END(\asmsym) .macro idtentry_df vector asmsym cfunc SYM_CODE_START(\asmsym) UNWIND_HINT_IRET_REGS offset=8 + ENDBR ASM_CLAC /* paranoid_entry returns GS information for paranoid_exit in EBX. */ @@ -544,6 +551,9 @@ SYM_CODE_START(\asmsym) movq $-1, ORIG_RAX(%rsp) /* no syscall to restart */ call \cfunc + /* For some configurations \cfunc ends up being a noreturn. */ + REACHABLE + jmp paranoid_exit _ASM_NOKPROBE(\asmsym) @@ -564,6 +574,7 @@ __irqentry_text_start: .align 16 .globl __irqentry_text_end __irqentry_text_end: + ANNOTATE_NOENDBR SYM_CODE_START_LOCAL(common_interrupt_return) SYM_INNER_LABEL(swapgs_restore_regs_and_return_to_usermode, SYM_L_GLOBAL) @@ -608,8 +619,8 @@ SYM_INNER_LABEL(swapgs_restore_regs_and_return_to_usermode, SYM_L_GLOBAL) /* Restore RDI. */ popq %rdi - SWAPGS - INTERRUPT_RETURN + swapgs + jmp .Lnative_iret SYM_INNER_LABEL(restore_regs_and_return_to_kernel, SYM_L_GLOBAL) @@ -626,9 +637,14 @@ SYM_INNER_LABEL(restore_regs_and_return_to_kernel, SYM_L_GLOBAL) * ARCH_HAS_MEMBARRIER_SYNC_CORE rely on IRET core serialization * when returning from IPI handler. */ - INTERRUPT_RETURN +#ifdef CONFIG_XEN_PV +SYM_INNER_LABEL(early_xen_iret_patch, SYM_L_GLOBAL) + ANNOTATE_NOENDBR + .byte 0xe9 + .long .Lnative_iret - (. + 4) +#endif -SYM_INNER_LABEL_ALIGN(native_iret, SYM_L_GLOBAL) +.Lnative_iret: UNWIND_HINT_IRET_REGS /* * Are we returning to a stack segment from the LDT? Note: in @@ -640,6 +656,7 @@ SYM_INNER_LABEL_ALIGN(native_iret, SYM_L_GLOBAL) #endif SYM_INNER_LABEL(native_irq_return_iret, SYM_L_GLOBAL) + ANNOTATE_NOENDBR // exc_double_fault /* * This may fault. Non-paranoid faults on return to userspace are * handled by fixup_bad_iret. These include #SS, #GP, and #NP. @@ -734,6 +751,7 @@ SYM_FUNC_START(asm_load_gs_index) FRAME_BEGIN swapgs .Lgs_change: + ANNOTATE_NOENDBR // error_entry movl %edi, %gs 2: ALTERNATIVE "", "mfence", X86_BUG_SWAPGS_FENCE swapgs @@ -804,6 +822,7 @@ SYM_CODE_END(exc_xen_hypervisor_callback) */ SYM_CODE_START(xen_failsafe_callback) UNWIND_HINT_EMPTY + ENDBR movl %ds, %ecx cmpw %cx, 0x10(%rsp) jne 1f @@ -1063,6 +1082,7 @@ SYM_CODE_END(error_return) */ SYM_CODE_START(asm_exc_nmi) UNWIND_HINT_IRET_REGS + ENDBR /* * We allow breakpoints in NMIs. If a breakpoint occurs, then @@ -1310,6 +1330,7 @@ first_nmi: #endif repeat_nmi: + ANNOTATE_NOENDBR // this code /* * If there was a nested NMI, the first NMI's iret will return * here. But NMIs are still enabled and we can take another @@ -1338,6 +1359,7 @@ repeat_nmi: .endr subq $(5*8), %rsp end_repeat_nmi: + ANNOTATE_NOENDBR // this code /* * Everything below this point can be preempted by a nested NMI. @@ -1421,6 +1443,7 @@ SYM_CODE_END(asm_exc_nmi) */ SYM_CODE_START(ignore_sysret) UNWIND_HINT_EMPTY + ENDBR mov $-ENOSYS, %eax sysretl SYM_CODE_END(ignore_sysret) diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S index 0051cf5c792d..4fdb007cddbd 100644 --- a/arch/x86/entry/entry_64_compat.S +++ b/arch/x86/entry/entry_64_compat.S @@ -48,6 +48,7 @@ */ SYM_CODE_START(entry_SYSENTER_compat) UNWIND_HINT_EMPTY + ENDBR /* Interrupts are off on entry. */ SWAPGS @@ -147,6 +148,7 @@ SYM_INNER_LABEL(entry_SYSENTER_compat_after_hwframe, SYM_L_GLOBAL) popfq jmp .Lsysenter_flags_fixed SYM_INNER_LABEL(__end_entry_SYSENTER_compat, SYM_L_GLOBAL) + ANNOTATE_NOENDBR // is_sysenter_singlestep SYM_CODE_END(entry_SYSENTER_compat) /* @@ -198,6 +200,7 @@ SYM_CODE_END(entry_SYSENTER_compat) */ SYM_CODE_START(entry_SYSCALL_compat) UNWIND_HINT_EMPTY + ENDBR /* Interrupts are off on entry. */ swapgs @@ -211,6 +214,7 @@ SYM_CODE_START(entry_SYSCALL_compat) movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp SYM_INNER_LABEL(entry_SYSCALL_compat_safe_stack, SYM_L_GLOBAL) + ANNOTATE_NOENDBR /* Construct struct pt_regs on stack */ pushq $__USER32_DS /* pt_regs->ss */ @@ -340,6 +344,7 @@ SYM_CODE_END(entry_SYSCALL_compat) */ SYM_CODE_START(entry_INT80_compat) UNWIND_HINT_EMPTY + ENDBR /* * Interrupts are off on entry. */ diff --git a/arch/x86/entry/syscalls/Makefile b/arch/x86/entry/syscalls/Makefile index 5b3efed0e4e8..7f3886eeb2ff 100644 --- a/arch/x86/entry/syscalls/Makefile +++ b/arch/x86/entry/syscalls/Makefile @@ -67,7 +67,7 @@ uapisyshdr-y += unistd_32.h unistd_64.h unistd_x32.h syshdr-y += syscalls_32.h syshdr-$(CONFIG_X86_64) += unistd_32_ia32.h unistd_64_x32.h syshdr-$(CONFIG_X86_64) += syscalls_64.h -syshdr-$(CONFIG_X86_X32) += syscalls_x32.h +syshdr-$(CONFIG_X86_X32_ABI) += syscalls_x32.h syshdr-$(CONFIG_XEN) += xen-hypercalls.h uapisyshdr-y := $(addprefix $(uapi)/, $(uapisyshdr-y)) diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h index 58eee6402832..9b10c8c76087 100644 --- a/arch/x86/include/asm/alternative.h +++ b/arch/x86/include/asm/alternative.h @@ -76,6 +76,7 @@ extern int alternatives_patched; extern void alternative_instructions(void); extern void apply_alternatives(struct alt_instr *start, struct alt_instr *end); extern void apply_retpolines(s32 *start, s32 *end); +extern void apply_ibt_endbr(s32 *start, s32 *end); struct module; diff --git a/arch/x86/include/asm/bug.h b/arch/x86/include/asm/bug.h index bab883c0b6fe..4d20a293c6fd 100644 --- a/arch/x86/include/asm/bug.h +++ b/arch/x86/include/asm/bug.h @@ -4,6 +4,7 @@ #include <linux/stringify.h> #include <linux/instrumentation.h> +#include <linux/objtool.h> /* * Despite that some emulators terminate on UD2, we use it for WARN(). diff --git a/arch/x86/include/asm/cpu.h b/arch/x86/include/asm/cpu.h index 33d41e350c79..86e5e4e26fcb 100644 --- a/arch/x86/include/asm/cpu.h +++ b/arch/x86/include/asm/cpu.h @@ -7,6 +7,7 @@ #include <linux/topology.h> #include <linux/nodemask.h> #include <linux/percpu.h> +#include <asm/ibt.h> #ifdef CONFIG_SMP @@ -72,4 +73,7 @@ void init_ia32_feat_ctl(struct cpuinfo_x86 *c); #else static inline void init_ia32_feat_ctl(struct cpuinfo_x86 *c) {} #endif + +extern __noendbr void cet_disable(void); + #endif /* _ASM_X86_CPU_H */ diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 3edf05e98e58..73e643ae94b6 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -388,6 +388,7 @@ #define X86_FEATURE_TSXLDTRK (18*32+16) /* TSX Suspend Load Address Tracking */ #define X86_FEATURE_PCONFIG (18*32+18) /* Intel PCONFIG */ #define X86_FEATURE_ARCH_LBR (18*32+19) /* Intel ARCH LBR */ +#define X86_FEATURE_IBT (18*32+20) /* Indirect Branch Tracking */ #define X86_FEATURE_AMX_BF16 (18*32+22) /* AMX bf16 Support */ #define X86_FEATURE_AVX512_FP16 (18*32+23) /* AVX512 FP16 */ #define X86_FEATURE_AMX_TILE (18*32+24) /* AMX tile Support */ diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h index 03cb12775043..98938a68251c 100644 --- a/arch/x86/include/asm/efi.h +++ b/arch/x86/include/asm/efi.h @@ -7,6 +7,7 @@ #include <asm/tlb.h> #include <asm/nospec-branch.h> #include <asm/mmu_context.h> +#include <asm/ibt.h> #include <linux/build_bug.h> #include <linux/kernel.h> #include <linux/pgtable.h> @@ -120,8 +121,12 @@ extern asmlinkage u64 __efi_call(void *fp, ...); efi_enter_mm(); \ }) -#define arch_efi_call_virt(p, f, args...) \ - efi_call((void *)p->f, args) \ +#define arch_efi_call_virt(p, f, args...) ({ \ + u64 ret, ibt = ibt_save(); \ + ret = efi_call((void *)p->f, args); \ + ibt_restore(ibt); \ + ret; \ +}) #define arch_efi_call_virt_teardown() \ ({ \ diff --git a/arch/x86/include/asm/ibt.h b/arch/x86/include/asm/ibt.h new file mode 100644 index 000000000000..689880eca9ba --- /dev/null +++ b/arch/x86/include/asm/ibt.h @@ -0,0 +1,105 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_IBT_H +#define _ASM_X86_IBT_H + +#include <linux/types.h> + +/* + * The rules for enabling IBT are: + * + * - CC_HAS_IBT: the toolchain supports it + * - X86_KERNEL_IBT: it is selected in Kconfig + * - !__DISABLE_EXPORTS: this is regular kernel code + * + * Esp. that latter one is a bit non-obvious, but some code like compressed, + * purgatory, realmode etc.. is built with custom CFLAGS that do not include + * -fcf-protection=branch and things will go *bang*. + * + * When all the above are satisfied, HAS_KERNEL_IBT will be 1, otherwise 0. + */ +#if defined(CONFIG_X86_KERNEL_IBT) && !defined(__DISABLE_EXPORTS) + +#define HAS_KERNEL_IBT 1 + +#ifndef __ASSEMBLY__ + +#ifdef CONFIG_X86_64 +#define ASM_ENDBR "endbr64\n\t" +#else +#define ASM_ENDBR "endbr32\n\t" +#endif + +#define __noendbr __attribute__((nocf_check)) + +static inline __attribute_const__ u32 gen_endbr(void) +{ + u32 endbr; + + /* + * Generate ENDBR64 in a way that is sure to not result in + * an ENDBR64 instruction as immediate. + */ + asm ( "mov $~0xfa1e0ff3, %[endbr]\n\t" + "not %[endbr]\n\t" + : [endbr] "=&r" (endbr) ); + + return endbr; +} + +static inline __attribute_const__ u32 gen_endbr_poison(void) +{ + /* + * 4 byte NOP that isn't NOP4 (in fact it is OSP NOP3), such that it + * will be unique to (former) ENDBR sites. + */ + return 0x001f0f66; /* osp nopl (%rax) */ +} + +static inline bool is_endbr(u32 val) +{ + if (val == gen_endbr_poison()) + return true; + + val &= ~0x01000000U; /* ENDBR32 -> ENDBR64 */ + return val == gen_endbr(); +} + +extern __noendbr u64 ibt_save(void); +extern __noendbr void ibt_restore(u64 save); + +#else /* __ASSEMBLY__ */ + +#ifdef CONFIG_X86_64 +#define ENDBR endbr64 +#else +#define ENDBR endbr32 +#endif + +#endif /* __ASSEMBLY__ */ + +#else /* !IBT */ + +#define HAS_KERNEL_IBT 0 + +#ifndef __ASSEMBLY__ + +#define ASM_ENDBR + +#define __noendbr + +static inline bool is_endbr(u32 val) { return false; } + +static inline u64 ibt_save(void) { return 0; } +static inline void ibt_restore(u64 save) { } + +#else /* __ASSEMBLY__ */ + +#define ENDBR + +#endif /* __ASSEMBLY__ */ + +#endif /* CONFIG_X86_KERNEL_IBT */ + +#define ENDBR_INSN_SIZE (4*HAS_KERNEL_IBT) + +#endif /* _ASM_X86_IBT_H */ diff --git a/arch/x86/include/asm/idtentry.h b/arch/x86/include/asm/idtentry.h index 1345088e9902..7924f27f5c8b 100644 --- a/arch/x86/include/asm/idtentry.h +++ b/arch/x86/include/asm/idtentry.h @@ -5,6 +5,8 @@ /* Interrupts/Exceptions */ #include <asm/trapnr.h> +#define IDT_ALIGN (8 * (1 + HAS_KERNEL_IBT)) + #ifndef __ASSEMBLY__ #include <linux/entry-common.h> #include <linux/hardirq.h> @@ -480,7 +482,7 @@ __visible noinstr void func(struct pt_regs *regs, \ /* * ASM code to emit the common vector entry stubs where each stub is - * packed into 8 bytes. + * packed into IDT_ALIGN bytes. * * Note, that the 'pushq imm8' is emitted via '.byte 0x6a, vector' because * GCC treats the local vector variable as unsigned int and would expand @@ -492,33 +494,33 @@ __visible noinstr void func(struct pt_regs *regs, \ * point is to mask off the bits above bit 7 because the push is sign * extending. */ - .align 8 + .align IDT_ALIGN SYM_CODE_START(irq_entries_start) vector=FIRST_EXTERNAL_VECTOR .rept NR_EXTERNAL_VECTORS UNWIND_HINT_IRET_REGS 0 : + ENDBR .byte 0x6a, vector jmp asm_common_interrupt - nop - /* Ensure that the above is 8 bytes max */ - . = 0b + 8 + /* Ensure that the above is IDT_ALIGN bytes max */ + .fill 0b + IDT_ALIGN - ., 1, 0xcc vector = vector+1 .endr SYM_CODE_END(irq_entries_start) #ifdef CONFIG_X86_LOCAL_APIC - .align 8 + .align IDT_ALIGN SYM_CODE_START(spurious_entries_start) vector=FIRST_SYSTEM_VECTOR .rept NR_SYSTEM_VECTORS UNWIND_HINT_IRET_REGS 0 : + ENDBR .byte 0x6a, vector jmp asm_spurious_interrupt - nop - /* Ensure that the above is 8 bytes max */ - . = 0b + 8 + /* Ensure that the above is IDT_ALIGN bytes max */ + .fill 0b + IDT_ALIGN - ., 1, 0xcc vector = vector+1 .endr SYM_CODE_END(spurious_entries_start) @@ -615,6 +617,11 @@ DECLARE_IDTENTRY_DF(X86_TRAP_DF, exc_double_fault); DECLARE_IDTENTRY_RAW_ERRORCODE(X86_TRAP_DF, xenpv_exc_double_fault); #endif +/* #CP */ +#ifdef CONFIG_X86_KERNEL_IBT +DECLARE_IDTENTRY_ERRORCODE(X86_TRAP_CP, exc_control_protection); +#endif + /* #VC */ #ifdef CONFIG_AMD_MEM_ENCRYPT DECLARE_IDTENTRY_VC(X86_TRAP_VC, exc_vmm_communication); diff --git a/arch/x86/include/asm/irq_stack.h b/arch/x86/include/asm/irq_stack.h index ae9d40f6c706..63f818aedf77 100644 --- a/arch/x86/include/asm/irq_stack.h +++ b/arch/x86/include/asm/irq_stack.h @@ -3,6 +3,7 @@ #define _ASM_X86_IRQ_STACK_H #include <linux/ptrace.h> +#include <linux/objtool.h> #include <asm/processor.h> @@ -99,7 +100,8 @@ } #define ASM_CALL_ARG0 \ - "call %P[__func] \n" + "call %P[__func] \n" \ + ASM_REACHABLE #define ASM_CALL_ARG1 \ "movq %[arg1], %%rdi \n" \ diff --git a/arch/x86/include/asm/irqflags.h b/arch/x86/include/asm/irqflags.h index 87761396e8cc..111104d1c2cd 100644 --- a/arch/x86/include/asm/irqflags.h +++ b/arch/x86/include/asm/irqflags.h @@ -141,13 +141,8 @@ static __always_inline void arch_local_irq_restore(unsigned long flags) #ifdef CONFIG_X86_64 #ifdef CONFIG_XEN_PV #define SWAPGS ALTERNATIVE "swapgs", "", X86_FEATURE_XENPV -#define INTERRUPT_RETURN \ - ANNOTATE_RETPOLINE_SAFE; \ - ALTERNATIVE_TERNARY("jmp *paravirt_iret(%rip);", \ - X86_FEATURE_XENPV, "jmp xen_iret;", "jmp native_iret;") #else #define SWAPGS swapgs -#define INTERRUPT_RETURN jmp native_iret #endif #endif #endif /* !__ASSEMBLY__ */ diff --git a/arch/x86/include/asm/linkage.h b/arch/x86/include/asm/linkage.h index 030907922bd0..85865f1645bd 100644 --- a/arch/x86/include/asm/linkage.h +++ b/arch/x86/include/asm/linkage.h @@ -3,6 +3,7 @@ #define _ASM_X86_LINKAGE_H #include <linux/stringify.h> +#include <asm/ibt.h> #undef notrace #define notrace __attribute__((no_instrument_function)) @@ -34,5 +35,35 @@ #endif /* __ASSEMBLY__ */ +/* SYM_FUNC_START -- use for global functions */ +#define SYM_FUNC_START(name) \ + SYM_START(name, SYM_L_GLOBAL, SYM_A_ALIGN) \ + ENDBR + +/* SYM_FUNC_START_NOALIGN -- use for global functions, w/o alignment */ +#define SYM_FUNC_START_NOALIGN(name) \ + SYM_START(name, SYM_L_GLOBAL, SYM_A_NONE) \ + ENDBR + +/* SYM_FUNC_START_LOCAL -- use for local functions */ +#define SYM_FUNC_START_LOCAL(name) \ + SYM_START(name, SYM_L_LOCAL, SYM_A_ALIGN) \ + ENDBR + +/* SYM_FUNC_START_LOCAL_NOALIGN -- use for local functions, w/o alignment */ +#define SYM_FUNC_START_LOCAL_NOALIGN(name) \ + SYM_START(name, SYM_L_LOCAL, SYM_A_NONE) \ + ENDBR + +/* SYM_FUNC_START_WEAK -- use for weak functions */ +#define SYM_FUNC_START_WEAK(name) \ + SYM_START(name, SYM_L_WEAK, SYM_A_ALIGN) \ + ENDBR + +/* SYM_FUNC_START_WEAK_NOALIGN -- use for weak functions, w/o alignment */ +#define SYM_FUNC_START_WEAK_NOALIGN(name) \ + SYM_START(name, SYM_L_WEAK, SYM_A_NONE) \ + ENDBR + #endif /* _ASM_X86_LINKAGE_H */ diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 9f1741ac4769..0eb90d21049e 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -362,11 +362,29 @@ #define MSR_ATOM_CORE_TURBO_RATIOS 0x0000066c #define MSR_ATOM_CORE_TURBO_VIDS 0x0000066d - #define MSR_CORE_PERF_LIMIT_REASONS 0x00000690 #define MSR_GFX_PERF_LIMIT_REASONS 0x000006B0 #define MSR_RING_PERF_LIMIT_REASONS 0x000006B1 +/* Control-flow Enforcement Technology MSRs */ +#define MSR_IA32_U_CET 0x000006a0 /* user mode cet */ +#define MSR_IA32_S_CET 0x000006a2 /* kernel mode cet */ +#define CET_SHSTK_EN BIT_ULL(0) +#define CET_WRSS_EN BIT_ULL(1) +#define CET_ENDBR_EN BIT_ULL(2) +#define CET_LEG_IW_EN BIT_ULL(3) +#define CET_NO_TRACK_EN BIT_ULL(4) +#define CET_SUPPRESS_DISABLE BIT_ULL(5) +#define CET_RESERVED (BIT_ULL(6) | BIT_ULL(7) | BIT_ULL(8) | BIT_ULL(9)) +#define CET_SUPPRESS BIT_ULL(10) +#define CET_WAIT_ENDBR BIT_ULL(11) + +#define MSR_IA32_PL0_SSP 0x000006a4 /* ring-0 shadow stack pointer */ +#define MSR_IA32_PL1_SSP 0x000006a5 /* ring-1 shadow stack pointer */ +#define MSR_IA32_PL2_SSP 0x000006a6 /* ring-2 shadow stack pointer */ +#define MSR_IA32_PL3_SSP 0x000006a7 /* ring-3 shadow stack pointer */ +#define MSR_IA32_INT_SSP_TAB 0x000006a8 /* exception shadow stack table */ + /* Hardware P state interface */ #define MSR_PPERF 0x0000064e #define MSR_PERF_LIMIT_REASONS 0x0000064f diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h index 0d76502cc6f5..964442b99245 100644 --- a/arch/x86/include/asm/paravirt.h +++ b/arch/x86/include/asm/paravirt.h @@ -666,6 +666,7 @@ bool __raw_callee_save___native_vcpu_is_preempted(long cpu); ".globl " PV_THUNK_NAME(func) ";" \ ".type " PV_THUNK_NAME(func) ", @function;" \ PV_THUNK_NAME(func) ":" \ + ASM_ENDBR \ FRAME_BEGIN \ PV_SAVE_ALL_CALLER_REGS \ "call " #func ";" \ diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h index e1591467668e..89df6c6617f5 100644 --- a/arch/x86/include/asm/paravirt_types.h +++ b/arch/x86/include/asm/paravirt_types.h @@ -272,7 +272,6 @@ struct paravirt_patch_template { extern struct pv_info pv_info; extern struct paravirt_patch_template pv_ops; -extern void (*paravirt_iret)(void); #define PARAVIRT_PATCH(x) \ (offsetof(struct paravirt_patch_template, x) / sizeof(void *)) diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index a87e7c33d5ac..91d0f93a00c7 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -837,7 +837,7 @@ bool xen_set_default_idle(void); #define xen_set_default_idle 0 #endif -void stop_this_cpu(void *dummy); +void __noreturn stop_this_cpu(void *dummy); void microcode_check(void); enum l1tf_mitigations { diff --git a/arch/x86/include/asm/qspinlock_paravirt.h b/arch/x86/include/asm/qspinlock_paravirt.h index 1474cf96251d..892fd8c3a6f7 100644 --- a/arch/x86/include/asm/qspinlock_paravirt.h +++ b/arch/x86/include/asm/qspinlock_paravirt.h @@ -2,6 +2,8 @@ #ifndef __ASM_QSPINLOCK_PARAVIRT_H #define __ASM_QSPINLOCK_PARAVIRT_H +#include <asm/ibt.h> + /* * For x86-64, PV_CALLEE_SAVE_REGS_THUNK() saves and restores 8 64-bit * registers. For i386, however, only 1 32-bit register needs to be saved @@ -39,6 +41,7 @@ asm (".pushsection .text;" ".type " PV_UNLOCK ", @function;" ".align 4,0x90;" PV_UNLOCK ": " + ASM_ENDBR FRAME_BEGIN "push %rdx;" "mov $0x1,%eax;" diff --git a/arch/x86/include/asm/segment.h b/arch/x86/include/asm/segment.h index b228c9d44ee7..656ed6531d03 100644 --- a/arch/x86/include/asm/segment.h +++ b/arch/x86/include/asm/segment.h @@ -4,6 +4,7 @@ #include <linux/const.h> #include <asm/alternative.h> +#include <asm/ibt.h> /* * Constructor for a conventional segment GDT (or LDT) entry. @@ -275,7 +276,7 @@ static inline void vdso_read_cpunode(unsigned *cpu, unsigned *node) * vector has no error code (two bytes), a 'push $vector_number' (two * bytes), and a jump to the common entry code (up to five bytes). */ -#define EARLY_IDT_HANDLER_SIZE 9 +#define EARLY_IDT_HANDLER_SIZE (9 + ENDBR_INSN_SIZE) /* * xen_early_idt_handler_array is for Xen pv guests: for each entry in @@ -283,7 +284,7 @@ static inline void vdso_read_cpunode(unsigned *cpu, unsigned *node) * pop %rcx; pop %r11; jmp early_idt_handler_array[i]; summing up to * max 8 bytes. */ -#define XEN_EARLY_IDT_HANDLER_SIZE 8 +#define XEN_EARLY_IDT_HANDLER_SIZE (8 + ENDBR_INSN_SIZE) #ifndef __ASSEMBLY__ diff --git a/arch/x86/include/asm/setup.h b/arch/x86/include/asm/setup.h index a12458a7a8d4..896e48d45828 100644 --- a/arch/x86/include/asm/setup.h +++ b/arch/x86/include/asm/setup.h @@ -8,6 +8,7 @@ #include <linux/linkage.h> #include <asm/page_types.h> +#include <asm/ibt.h> #ifdef __i386__ @@ -119,7 +120,7 @@ void *extend_brk(size_t size, size_t align); * executable.) */ #define RESERVE_BRK(name,sz) \ - static void __section(".discard.text") __used notrace \ + static void __section(".discard.text") __noendbr __used notrace \ __brk_reservation_fn_##name##__(void) { \ asm volatile ( \ ".pushsection .brk_reservation,\"aw\",@nobits;" \ diff --git a/arch/x86/include/asm/syscall_wrapper.h b/arch/x86/include/asm/syscall_wrapper.h index 6a2827d0681f..59358d1bf880 100644 --- a/arch/x86/include/asm/syscall_wrapper.h +++ b/arch/x86/include/asm/syscall_wrapper.h @@ -159,7 +159,7 @@ extern long __ia32_sys_ni_syscall(const struct pt_regs *regs); #endif /* CONFIG_IA32_EMULATION */ -#ifdef CONFIG_X86_X32 +#ifdef CONFIG_X86_X32_ABI /* * For the x32 ABI, we need to create a stub for compat_sys_*() which is aware * of the x86-64-style parameter ordering of x32 syscalls. The syscalls common @@ -177,12 +177,12 @@ extern long __ia32_sys_ni_syscall(const struct pt_regs *regs); #define __X32_COMPAT_SYS_NI(name) \ __SYS_NI(x64, compat_sys_##name) -#else /* CONFIG_X86_X32 */ +#else /* CONFIG_X86_X32_ABI */ #define __X32_COMPAT_SYS_STUB0(name) #define __X32_COMPAT_SYS_STUBx(x, name, ...) #define __X32_COMPAT_COND_SYSCALL(name) #define __X32_COMPAT_SYS_NI(name) -#endif /* CONFIG_X86_X32 */ +#endif /* CONFIG_X86_X32_ABI */ #ifdef CONFIG_COMPAT diff --git a/arch/x86/include/asm/text-patching.h b/arch/x86/include/asm/text-patching.h index 4cc18ba1b75e..d20ab0921480 100644 --- a/arch/x86/include/asm/text-patching.h +++ b/arch/x86/include/asm/text-patching.h @@ -97,24 +97,40 @@ union text_poke_insn { }; static __always_inline -void *text_gen_insn(u8 opcode, const void *addr, const void *dest) +void __text_gen_insn(void *buf, u8 opcode, const void *addr, const void *dest, int size) { - static union text_poke_insn insn; /* per instance */ - int size = text_opcode_size(opcode); + union text_poke_insn *insn = buf; + + BUG_ON(size < text_opcode_size(opcode)); + + /* + * Hide the addresses to avoid the compiler folding in constants when + * referencing code, these can mess up annotations like + * ANNOTATE_NOENDBR. + */ + OPTIMIZER_HIDE_VAR(insn); + OPTIMIZER_HIDE_VAR(addr); + OPTIMIZER_HIDE_VAR(dest); - insn.opcode = opcode; + insn->opcode = opcode; if (size > 1) { - insn.disp = (long)dest - (long)(addr + size); + insn->disp = (long)dest - (long)(addr + size); if (size == 2) { /* - * Ensure that for JMP9 the displacement + * Ensure that for JMP8 the displacement * actually fits the signed byte. */ - BUG_ON((insn.disp >> 31) != (insn.disp >> 7)); + BUG_ON((insn->disp >> 31) != (insn->disp >> 7)); } } +} +static __always_inline +void *text_gen_insn(u8 opcode, const void *addr, const void *dest) +{ + static union text_poke_insn insn; /* per instance */ + __text_gen_insn(&insn, opcode, addr, dest, text_opcode_size(opcode)); return &insn.text; } diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h index 6221be7cafc3..35317c5c551d 100644 --- a/arch/x86/include/asm/traps.h +++ b/arch/x86/include/asm/traps.h @@ -18,6 +18,8 @@ void __init trap_init(void); asmlinkage __visible noinstr struct pt_regs *vc_switch_off_ist(struct pt_regs *eregs); #endif +extern bool ibt_selftest(void); + #ifdef CONFIG_X86_F00F_BUG /* For handling the FOOF bug */ void handle_invalid_op(struct pt_regs *regs); diff --git a/arch/x86/include/asm/vdso.h b/arch/x86/include/asm/vdso.h index 98aa103eb4ab..2963a2f5dbc4 100644 --- a/arch/x86/include/asm/vdso.h +++ b/arch/x86/include/asm/vdso.h @@ -37,7 +37,7 @@ struct vdso_image { extern const struct vdso_image vdso_image_64; #endif -#ifdef CONFIG_X86_X32 +#ifdef CONFIG_X86_X32_ABI extern const struct vdso_image vdso_image_x32; #endif diff --git a/arch/x86/include/uapi/asm/processor-flags.h b/arch/x86/include/uapi/asm/processor-flags.h index bcba3c643e63..c47cc7f2feeb 100644 --- a/arch/x86/include/uapi/asm/processor-flags.h +++ b/arch/x86/include/uapi/asm/processor-flags.h @@ -130,6 +130,8 @@ #define X86_CR4_SMAP _BITUL(X86_CR4_SMAP_BIT) #define X86_CR4_PKE_BIT 22 /* enable Protection Keys support */ #define X86_CR4_PKE _BITUL(X86_CR4_PKE_BIT) +#define X86_CR4_CET_BIT 23 /* enable Control-flow Enforcement Technology */ +#define X86_CR4_CET _BITUL(X86_CR4_CET_BIT) /* * x86-64 Task Priority Register, CR8 diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index b4e576600969..d374cb3cf024 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -115,6 +115,7 @@ static void __init_or_module add_nops(void *insns, unsigned int len) } extern s32 __retpoline_sites[], __retpoline_sites_end[]; +extern s32 __ibt_endbr_seal[], __ibt_endbr_seal_end[]; extern struct alt_instr __alt_instructions[], __alt_instructions_end[]; extern s32 __smp_locks[], __smp_locks_end[]; void text_poke_early(void *addr, const void *opcode, size_t len); @@ -512,6 +513,42 @@ void __init_or_module noinline apply_retpolines(s32 *start, s32 *end) { } #endif /* CONFIG_RETPOLINE && CONFIG_STACK_VALIDATION */ +#ifdef CONFIG_X86_KERNEL_IBT + +/* + * Generated by: objtool --ibt + */ +void __init_or_module noinline apply_ibt_endbr(s32 *start, s32 *end) +{ + s32 *s; + + for (s = start; s < end; s++) { + u32 endbr, poison = gen_endbr_poison(); + void *addr = (void *)s + *s; + + if (WARN_ON_ONCE(get_kernel_nofault(endbr, addr))) + continue; + + if (WARN_ON_ONCE(!is_endbr(endbr))) + continue; + + DPRINTK("ENDBR at: %pS (%px)", addr, addr); + + /* + * When we have IBT, the lack of ENDBR will trigger #CP + */ + DUMP_BYTES(((u8*)addr), 4, "%px: orig: ", addr); + DUMP_BYTES(((u8*)&poison), 4, "%px: repl: ", addr); + text_poke_early(addr, &poison, 4); + } +} + +#else + +void __init_or_module noinline apply_ibt_endbr(s32 *start, s32 *end) { } + +#endif /* CONFIG_X86_KERNEL_IBT */ + #ifdef CONFIG_SMP static void alternatives_smp_lock(const s32 *start, const s32 *end, u8 *text, u8 *text_end) @@ -713,34 +750,39 @@ asm ( " .pushsection .init.text, \"ax\", @progbits\n" " .type int3_magic, @function\n" "int3_magic:\n" + ANNOTATE_NOENDBR " movl $1, (%" _ASM_ARG1 ")\n" ASM_RET " .size int3_magic, .-int3_magic\n" " .popsection\n" ); -extern __initdata unsigned long int3_selftest_ip; /* defined in asm below */ +extern void int3_selftest_ip(void); /* defined in asm below */ static int __init int3_exception_notify(struct notifier_block *self, unsigned long val, void *data) { + unsigned long selftest = (unsigned long)&int3_selftest_ip; struct die_args *args = data; struct pt_regs *regs = args->regs; + OPTIMIZER_HIDE_VAR(selftest); + if (!regs || user_mode(regs)) return NOTIFY_DONE; if (val != DIE_INT3) return NOTIFY_DONE; - if (regs->ip - INT3_INSN_SIZE != int3_selftest_ip) + if (regs->ip - INT3_INSN_SIZE != selftest) return NOTIFY_DONE; int3_emulate_call(regs, (unsigned long)&int3_magic); return NOTIFY_STOP; } -static void __init int3_selftest(void) +/* Must be noinline to ensure uniqueness of int3_selftest_ip. */ +static noinline void __init int3_selftest(void) { static __initdata struct notifier_block int3_exception_nb = { .notifier_call = int3_exception_notify, @@ -753,18 +795,12 @@ static void __init int3_selftest(void) /* * Basically: int3_magic(&val); but really complicated :-) * - * Stick the address of the INT3 instruction into int3_selftest_ip, - * then trigger the INT3, padded with NOPs to match a CALL instruction - * length. + * INT3 padded with NOP to CALL_INSN_SIZE. The int3_exception_nb + * notifier above will emulate CALL for us. */ - asm volatile ("1: int3; nop; nop; nop; nop\n\t" - ".pushsection .init.data,\"aw\"\n\t" - ".align " __ASM_SEL(4, 8) "\n\t" - ".type int3_selftest_ip, @object\n\t" - ".size int3_selftest_ip, " __ASM_SEL(4, 8) "\n\t" - "int3_selftest_ip:\n\t" - __ASM_SEL(.long, .quad) " 1b\n\t" - ".popsection\n\t" + asm volatile ("int3_selftest_ip:\n\t" + ANNOTATE_NOENDBR + " int3; nop; nop; nop; nop\n\t" : ASM_CALL_CONSTRAINT : __ASM_SEL_RAW(a, D) (&val) : "memory"); @@ -831,6 +867,8 @@ void __init alternative_instructions(void) */ apply_alternatives(__alt_instructions, __alt_instructions_end); + apply_ibt_endbr(__ibt_endbr_seal, __ibt_endbr_seal_end); + #ifdef CONFIG_SMP /* Patch to UP if other cpus not imminent. */ if (!noreplace_smp && (num_present_cpus() == 1 || setup_max_cpus <= 1)) { diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c index 241dda687eb9..60e330cdbd17 100644 --- a/arch/x86/kernel/apm_32.c +++ b/arch/x86/kernel/apm_32.c @@ -232,6 +232,7 @@ #include <asm/paravirt.h> #include <asm/reboot.h> #include <asm/nospec-branch.h> +#include <asm/ibt.h> #if defined(CONFIG_APM_DISPLAY_BLANK) && defined(CONFIG_VT) extern int (*console_blank_hook)(int); @@ -598,6 +599,7 @@ static long __apm_bios_call(void *_call) struct desc_struct save_desc_40; struct desc_struct *gdt; struct apm_bios_call *call = _call; + u64 ibt; cpu = get_cpu(); BUG_ON(cpu != 0); @@ -607,11 +609,13 @@ static long __apm_bios_call(void *_call) apm_irq_save(flags); firmware_restrict_branch_speculation_start(); + ibt = ibt_save(); APM_DO_SAVE_SEGS; apm_bios_call_asm(call->func, call->ebx, call->ecx, &call->eax, &call->ebx, &call->ecx, &call->edx, &call->esi); APM_DO_RESTORE_SEGS; + ibt_restore(ibt); firmware_restrict_branch_speculation_end(); apm_irq_restore(flags); gdt[0x40 / 8] = save_desc_40; @@ -676,6 +680,7 @@ static long __apm_bios_call_simple(void *_call) struct desc_struct save_desc_40; struct desc_struct *gdt; struct apm_bios_call *call = _call; + u64 ibt; cpu = get_cpu(); BUG_ON(cpu != 0); @@ -685,10 +690,12 @@ static long __apm_bios_call_simple(void *_call) apm_irq_save(flags); firmware_restrict_branch_speculation_start(); + ibt = ibt_save(); APM_DO_SAVE_SEGS; error = apm_bios_call_simple_asm(call->func, call->ebx, call->ecx, &call->eax); APM_DO_RESTORE_SEGS; + ibt_restore(ibt); firmware_restrict_branch_speculation_end(); apm_irq_restore(flags); gdt[0x40 / 8] = save_desc_40; diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 64deb7727d00..ed4417500700 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -59,6 +59,7 @@ #include <asm/cpu_device_id.h> #include <asm/uv/uv.h> #include <asm/sigframe.h> +#include <asm/traps.h> #include "cpu.h" @@ -438,7 +439,8 @@ out: /* These bits should not change their value after CPU init is finished. */ static const unsigned long cr4_pinned_mask = - X86_CR4_SMEP | X86_CR4_SMAP | X86_CR4_UMIP | X86_CR4_FSGSBASE; + X86_CR4_SMEP | X86_CR4_SMAP | X86_CR4_UMIP | + X86_CR4_FSGSBASE | X86_CR4_CET; static DEFINE_STATIC_KEY_FALSE_RO(cr_pinning); static unsigned long cr4_pinned_bits __ro_after_init; @@ -592,6 +594,58 @@ static __init int setup_disable_pku(char *arg) __setup("nopku", setup_disable_pku); #endif /* CONFIG_X86_64 */ +#ifdef CONFIG_X86_KERNEL_IBT + +__noendbr u64 ibt_save(void) +{ + u64 msr = 0; + + if (cpu_feature_enabled(X86_FEATURE_IBT)) { + rdmsrl(MSR_IA32_S_CET, msr); + wrmsrl(MSR_IA32_S_CET, msr & ~CET_ENDBR_EN); + } + + return msr; +} + +__noendbr void ibt_restore(u64 save) +{ + u64 msr; + + if (cpu_feature_enabled(X86_FEATURE_IBT)) { + rdmsrl(MSR_IA32_S_CET, msr); + msr &= ~CET_ENDBR_EN; + msr |= (save & CET_ENDBR_EN); + wrmsrl(MSR_IA32_S_CET, msr); + } +} + +#endif + +static __always_inline void setup_cet(struct cpuinfo_x86 *c) +{ + u64 msr = CET_ENDBR_EN; + + if (!HAS_KERNEL_IBT || + !cpu_feature_enabled(X86_FEATURE_IBT)) + return; + + wrmsrl(MSR_IA32_S_CET, msr); + cr4_set_bits(X86_CR4_CET); + + if (!ibt_selftest()) { + pr_err("IBT selftest: Failed!\n"); + setup_clear_cpu_cap(X86_FEATURE_IBT); + return; + } +} + +__noendbr void cet_disable(void) +{ + if (cpu_feature_enabled(X86_FEATURE_IBT)) + wrmsrl(MSR_IA32_S_CET, 0); +} + /* * Some CPU features depend on higher CPUID levels, which may not always * be available due to CPUID level capping or broken virtualization @@ -1709,6 +1763,7 @@ static void identify_cpu(struct cpuinfo_x86 *c) x86_init_rdrand(c); setup_pku(c); + setup_cet(c); /* * Clear/Set all flags overridden by options, need do it @@ -1777,6 +1832,8 @@ void enable_sep_cpu(void) void __init identify_boot_cpu(void) { identify_cpu(&boot_cpu_data); + if (HAS_KERNEL_IBT && cpu_feature_enabled(X86_FEATURE_IBT)) + pr_info("CET detected: Indirect Branch Tracking enabled\n"); #ifdef CONFIG_X86_32 sysenter_setup(); enable_sep_cpu(); diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index 7cc540e6de0c..1e31c7d21597 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -316,12 +316,12 @@ create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size) unsigned long offset; unsigned long npages; unsigned long size; - unsigned long retq; unsigned long *ptr; void *trampoline; void *ip; /* 48 8b 15 <offset> is movq <offset>(%rip), %rdx */ unsigned const char op_ref[] = { 0x48, 0x8b, 0x15 }; + unsigned const char retq[] = { RET_INSN_OPCODE, INT3_INSN_OPCODE }; union ftrace_op_code_union op_ptr; int ret; @@ -359,12 +359,7 @@ create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size) goto fail; ip = trampoline + size; - - /* The trampoline ends with ret(q) */ - retq = (unsigned long)ftrace_stub; - ret = copy_from_kernel_nofault(ip, (void *)retq, RET_SIZE); - if (WARN_ON(ret < 0)) - goto fail; + memcpy(ip, retq, RET_SIZE); /* No need to test direct calls on created trampolines */ if (ops->flags & FTRACE_OPS_FL_SAVE_REGS) { diff --git a/arch/x86/kernel/ftrace_64.S b/arch/x86/kernel/ftrace_64.S index 11ac028e30e4..4ec13608d3c6 100644 --- a/arch/x86/kernel/ftrace_64.S +++ b/arch/x86/kernel/ftrace_64.S @@ -145,6 +145,7 @@ SYM_FUNC_START(ftrace_caller) movq %rcx, RSP(%rsp) SYM_INNER_LABEL(ftrace_caller_op_ptr, SYM_L_GLOBAL) + ANNOTATE_NOENDBR /* Load the ftrace_ops into the 3rd parameter */ movq function_trace_op(%rip), %rdx @@ -155,6 +156,7 @@ SYM_INNER_LABEL(ftrace_caller_op_ptr, SYM_L_GLOBAL) movq $0, CS(%rsp) SYM_INNER_LABEL(ftrace_call, SYM_L_GLOBAL) + ANNOTATE_NOENDBR call ftrace_stub /* Handlers can change the RIP */ @@ -169,6 +171,7 @@ SYM_INNER_LABEL(ftrace_call, SYM_L_GLOBAL) * layout here. */ SYM_INNER_LABEL(ftrace_caller_end, SYM_L_GLOBAL) + ANNOTATE_NOENDBR jmp ftrace_epilogue SYM_FUNC_END(ftrace_caller); @@ -176,10 +179,10 @@ SYM_FUNC_END(ftrace_caller); SYM_FUNC_START(ftrace_epilogue) /* * This is weak to keep gas from relaxing the jumps. - * It is also used to copy the RET for trampolines. */ SYM_INNER_LABEL_ALIGN(ftrace_stub, SYM_L_WEAK) UNWIND_HINT_FUNC + ENDBR RET SYM_FUNC_END(ftrace_epilogue) @@ -192,6 +195,7 @@ SYM_FUNC_START(ftrace_regs_caller) /* save_mcount_regs fills in first two parameters */ SYM_INNER_LABEL(ftrace_regs_caller_op_ptr, SYM_L_GLOBAL) + ANNOTATE_NOENDBR /* Load the ftrace_ops into the 3rd parameter */ movq function_trace_op(%rip), %rdx @@ -221,6 +225,7 @@ SYM_INNER_LABEL(ftrace_regs_caller_op_ptr, SYM_L_GLOBAL) leaq (%rsp), %rcx SYM_INNER_LABEL(ftrace_regs_call, SYM_L_GLOBAL) + ANNOTATE_NOENDBR call ftrace_stub /* Copy flags back to SS, to restore them */ @@ -248,6 +253,7 @@ SYM_INNER_LABEL(ftrace_regs_call, SYM_L_GLOBAL) */ testq %rax, %rax SYM_INNER_LABEL(ftrace_regs_caller_jmp, SYM_L_GLOBAL) + ANNOTATE_NOENDBR jnz 1f restore_mcount_regs @@ -261,6 +267,7 @@ SYM_INNER_LABEL(ftrace_regs_caller_jmp, SYM_L_GLOBAL) * to the return. */ SYM_INNER_LABEL(ftrace_regs_caller_end, SYM_L_GLOBAL) + ANNOTATE_NOENDBR jmp ftrace_epilogue /* Swap the flags with orig_rax */ @@ -284,6 +291,7 @@ SYM_FUNC_START(__fentry__) jnz trace SYM_INNER_LABEL(ftrace_stub, SYM_L_GLOBAL) + ENDBR RET trace: @@ -307,7 +315,7 @@ EXPORT_SYMBOL(__fentry__) #ifdef CONFIG_FUNCTION_GRAPH_TRACER SYM_FUNC_START(return_to_handler) - subq $24, %rsp + subq $16, %rsp /* Save the return values */ movq %rax, (%rsp) @@ -319,7 +327,19 @@ SYM_FUNC_START(return_to_handler) movq %rax, %rdi movq 8(%rsp), %rdx movq (%rsp), %rax - addq $24, %rsp - JMP_NOSPEC rdi + + addq $16, %rsp + /* + * Jump back to the old return address. This cannot be JMP_NOSPEC rdi + * since IBT would demand that contain ENDBR, which simply isn't so for + * return addresses. Use a retpoline here to keep the RSB balanced. + */ + ANNOTATE_INTRA_FUNCTION_CALL + call .Ldo_rop + int3 +.Ldo_rop: + mov %rdi, (%rsp) + UNWIND_HINT_FUNC + RET SYM_FUNC_END(return_to_handler) #endif diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index 9c63fc5988cd..b8e3019547a5 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S @@ -99,6 +99,7 @@ SYM_CODE_END(startup_64) SYM_CODE_START(secondary_startup_64) UNWIND_HINT_EMPTY + ANNOTATE_NOENDBR /* * At this point the CPU runs in 64bit mode CS.L = 1 CS.D = 0, * and someone has loaded a mapped page table. @@ -127,6 +128,7 @@ SYM_CODE_START(secondary_startup_64) */ SYM_INNER_LABEL(secondary_startup_64_no_verify, SYM_L_GLOBAL) UNWIND_HINT_EMPTY + ANNOTATE_NOENDBR /* * Retrieve the modifier (SME encryption mask if SME is active) to be @@ -192,6 +194,7 @@ SYM_INNER_LABEL(secondary_startup_64_no_verify, SYM_L_GLOBAL) jmp *%rax 1: UNWIND_HINT_EMPTY + ANNOTATE_NOENDBR // above /* * We must switch to a new descriptor in kernel space for the GDT @@ -299,6 +302,7 @@ SYM_INNER_LABEL(secondary_startup_64_no_verify, SYM_L_GLOBAL) pushq %rax # target address in negative space lretq .Lafter_lret: + ANNOTATE_NOENDBR SYM_CODE_END(secondary_startup_64) #include "verify_cpu.S" @@ -328,6 +332,7 @@ SYM_CODE_END(start_cpu0) */ SYM_CODE_START_NOALIGN(vc_boot_ghcb) UNWIND_HINT_IRET_REGS offset=8 + ENDBR /* Build pt_regs */ PUSH_AND_CLEAR_REGS @@ -345,7 +350,6 @@ SYM_CODE_START_NOALIGN(vc_boot_ghcb) /* Remove Error Code */ addq $8, %rsp - /* Pure iret required here - don't use INTERRUPT_RETURN */ iretq SYM_CODE_END(vc_boot_ghcb) #endif @@ -372,9 +376,11 @@ SYM_CODE_START(early_idt_handler_array) .rept NUM_EXCEPTION_VECTORS .if ((EXCEPTION_ERRCODE_MASK >> i) & 1) == 0 UNWIND_HINT_IRET_REGS + ENDBR pushq $0 # Dummy error code, to make stack frame uniform .else UNWIND_HINT_IRET_REGS offset=8 + ENDBR .endif pushq $i # 72(%rsp) Vector number jmp early_idt_handler_common @@ -382,10 +388,11 @@ SYM_CODE_START(early_idt_handler_array) i = i + 1 .fill early_idt_handler_array + i*EARLY_IDT_HANDLER_SIZE - ., 1, 0xcc .endr - UNWIND_HINT_IRET_REGS offset=16 SYM_CODE_END(early_idt_handler_array) + ANNOTATE_NOENDBR // early_idt_handler_array[NUM_EXCEPTION_VECTORS] SYM_CODE_START_LOCAL(early_idt_handler_common) + UNWIND_HINT_IRET_REGS offset=16 /* * The stack is the hardware frame, an error code or zero, and the * vector number. @@ -426,11 +433,14 @@ SYM_CODE_END(early_idt_handler_common) * early_idt_handler_array can't be used because it returns via the * paravirtualized INTERRUPT_RETURN and pv-ops don't work that early. * + * XXX it does, fix this. + * * This handler will end up in the .init.text section and not be * available to boot secondary CPUs. */ SYM_CODE_START_NOALIGN(vc_no_ghcb) UNWIND_HINT_IRET_REGS offset=8 + ENDBR /* Build pt_regs */ PUSH_AND_CLEAR_REGS diff --git a/arch/x86/kernel/idt.c b/arch/x86/kernel/idt.c index df0fa695bb09..608eb63bf044 100644 --- a/arch/x86/kernel/idt.c +++ b/arch/x86/kernel/idt.c @@ -10,6 +10,7 @@ #include <asm/proto.h> #include <asm/desc.h> #include <asm/hw_irq.h> +#include <asm/idtentry.h> #define DPL0 0x0 #define DPL3 0x3 @@ -103,6 +104,10 @@ static const __initconst struct idt_data def_idts[] = { ISTG(X86_TRAP_MC, asm_exc_machine_check, IST_INDEX_MCE), #endif +#ifdef CONFIG_X86_KERNEL_IBT + INTG(X86_TRAP_CP, asm_exc_control_protection), +#endif + #ifdef CONFIG_AMD_MEM_ENCRYPT ISTG(X86_TRAP_VC, asm_exc_vmm_communication, IST_INDEX_VC), #endif @@ -272,7 +277,7 @@ void __init idt_setup_apic_and_irq_gates(void) idt_setup_from_table(idt_table, apic_idts, ARRAY_SIZE(apic_idts), true); for_each_clear_bit_from(i, system_vectors, FIRST_SYSTEM_VECTOR) { - entry = irq_entries_start + 8 * (i - FIRST_EXTERNAL_VECTOR); + entry = irq_entries_start + IDT_ALIGN * (i - FIRST_EXTERNAL_VECTOR); set_intr_gate(i, entry); } @@ -283,7 +288,7 @@ void __init idt_setup_apic_and_irq_gates(void) * system_vectors bitmap. Otherwise they show up in * /proc/interrupts. */ - entry = spurious_entries_start + 8 * (i - FIRST_SYSTEM_VECTOR); + entry = spurious_entries_start + IDT_ALIGN * (i - FIRST_SYSTEM_VECTOR); set_intr_gate(i, entry); } #endif diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c index 6290712cb36d..8ef933c03afa 100644 --- a/arch/x86/kernel/kprobes/core.c +++ b/arch/x86/kernel/kprobes/core.c @@ -52,6 +52,7 @@ #include <asm/insn.h> #include <asm/debugreg.h> #include <asm/set_memory.h> +#include <asm/ibt.h> #include "common.h" @@ -193,17 +194,10 @@ static unsigned long __recover_probed_insn(kprobe_opcode_t *buf, unsigned long addr) { struct kprobe *kp; - unsigned long faddr; + bool faddr; kp = get_kprobe((void *)addr); - faddr = ftrace_location(addr); - /* - * Addresses inside the ftrace location are refused by - * arch_check_ftrace_location(). Something went terribly wrong - * if such an address is checked here. - */ - if (WARN_ON(faddr && faddr != addr)) - return 0UL; + faddr = ftrace_location(addr) == addr; /* * Use the current code if it is not modified by Kprobe * and it cannot be modified by ftrace. @@ -301,6 +295,22 @@ static int can_probe(unsigned long paddr) return (addr == paddr); } +/* If x86 supports IBT (ENDBR) it must be skipped. */ +kprobe_opcode_t *arch_adjust_kprobe_addr(unsigned long addr, unsigned long offset, + bool *on_func_entry) +{ + if (is_endbr(*(u32 *)addr)) { + *on_func_entry = !offset || offset == 4; + if (*on_func_entry) + offset = 4; + + } else { + *on_func_entry = !offset; + } + + return (kprobe_opcode_t *)(addr + offset); +} + /* * Copy an instruction with recovering modified instruction by kprobes * and adjust the displacement if the instruction uses the %rip-relative @@ -1023,6 +1033,7 @@ asm( ".type __kretprobe_trampoline, @function\n" "__kretprobe_trampoline:\n" #ifdef CONFIG_X86_64 + ANNOTATE_NOENDBR /* Push a fake return address to tell the unwinder it's a kretprobe. */ " pushq $__kretprobe_trampoline\n" UNWIND_HINT_FUNC diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index d77481ecb0d5..79e0b8d63ffa 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c @@ -1029,10 +1029,11 @@ asm( ".global __raw_callee_save___kvm_vcpu_is_preempted;" ".type __raw_callee_save___kvm_vcpu_is_preempted, @function;" "__raw_callee_save___kvm_vcpu_is_preempted:" +ASM_ENDBR "movq __per_cpu_offset(,%rdi,8), %rax;" "cmpb $0, " __stringify(KVM_STEAL_TIME_preempted) "+steal_time(%rax);" "setne %al;" -"ret;" +ASM_RET ".size __raw_callee_save___kvm_vcpu_is_preempted, .-__raw_callee_save___kvm_vcpu_is_preempted;" ".popsection"); diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c index f5da4a18070a..566bb8e17149 100644 --- a/arch/x86/kernel/machine_kexec_64.c +++ b/arch/x86/kernel/machine_kexec_64.c @@ -27,6 +27,7 @@ #include <asm/kexec-bzimage64.h> #include <asm/setup.h> #include <asm/set_memory.h> +#include <asm/cpu.h> #ifdef CONFIG_ACPI /* @@ -310,6 +311,7 @@ void machine_kexec(struct kimage *image) /* Interrupts aren't acceptable while we reboot */ local_irq_disable(); hw_breakpoint_disable(); + cet_disable(); if (image->preserve_context) { #ifdef CONFIG_X86_IO_APIC @@ -325,7 +327,7 @@ void machine_kexec(struct kimage *image) } control_page = page_address(image->control_code_page) + PAGE_SIZE; - memcpy(control_page, relocate_kernel, KEXEC_CONTROL_CODE_MAX_SIZE); + __memcpy(control_page, relocate_kernel, KEXEC_CONTROL_CODE_MAX_SIZE); page_list[PA_CONTROL_PAGE] = virt_to_phys(control_page); page_list[VA_CONTROL_PAGE] = (unsigned long)control_page; diff --git a/arch/x86/kernel/module.c b/arch/x86/kernel/module.c index 504ea65987e8..b98ffcf4d250 100644 --- a/arch/x86/kernel/module.c +++ b/arch/x86/kernel/module.c @@ -253,7 +253,7 @@ int module_finalize(const Elf_Ehdr *hdr, { const Elf_Shdr *s, *text = NULL, *alt = NULL, *locks = NULL, *para = NULL, *orc = NULL, *orc_ip = NULL, - *retpolines = NULL; + *retpolines = NULL, *ibt_endbr = NULL; char *secstrings = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset; for (s = sechdrs; s < sechdrs + hdr->e_shnum; s++) { @@ -271,6 +271,8 @@ int module_finalize(const Elf_Ehdr *hdr, orc_ip = s; if (!strcmp(".retpoline_sites", secstrings + s->sh_name)) retpolines = s; + if (!strcmp(".ibt_endbr_seal", secstrings + s->sh_name)) + ibt_endbr = s; } /* @@ -290,6 +292,10 @@ int module_finalize(const Elf_Ehdr *hdr, void *aseg = (void *)alt->sh_addr; apply_alternatives(aseg, aseg + alt->sh_size); } + if (ibt_endbr) { + void *iseg = (void *)ibt_endbr->sh_addr; + apply_ibt_endbr(iseg, iseg + ibt_endbr->sh_size); + } if (locks && text) { void *lseg = (void *)locks->sh_addr; void *tseg = (void *)text->sh_addr; diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index 4420499f7bb4..7ca2d46c08cc 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c @@ -41,6 +41,7 @@ extern void _paravirt_nop(void); asm (".pushsection .entry.text, \"ax\"\n" ".global _paravirt_nop\n" "_paravirt_nop:\n\t" + ASM_ENDBR ASM_RET ".size _paravirt_nop, . - _paravirt_nop\n\t" ".type _paravirt_nop, @function\n\t" @@ -50,6 +51,7 @@ asm (".pushsection .entry.text, \"ax\"\n" asm (".pushsection .entry.text, \"ax\"\n" ".global paravirt_ret0\n" "paravirt_ret0:\n\t" + ASM_ENDBR "xor %" _ASM_AX ", %" _ASM_AX ";\n\t" ASM_RET ".size paravirt_ret0, . - paravirt_ret0\n\t" @@ -69,29 +71,12 @@ noinstr void paravirt_BUG(void) BUG(); } -struct branch { - unsigned char opcode; - u32 delta; -} __attribute__((packed)); - static unsigned paravirt_patch_call(void *insn_buff, const void *target, unsigned long addr, unsigned len) { - const int call_len = 5; - struct branch *b = insn_buff; - unsigned long delta = (unsigned long)target - (addr+call_len); - - if (len < call_len) { - pr_warn("paravirt: Failed to patch indirect CALL at %ps\n", (void *)addr); - /* Kernel might not be viable if patching fails, bail out: */ - BUG_ON(1); - } - - b->opcode = 0xe8; /* call */ - b->delta = delta; - BUILD_BUG_ON(sizeof(*b) != call_len); - - return call_len; + __text_gen_insn(insn_buff, CALL_INSN_OPCODE, + (void *)addr, target, CALL_INSN_SIZE); + return CALL_INSN_SIZE; } #ifdef CONFIG_PARAVIRT_XXL @@ -149,8 +134,6 @@ void paravirt_set_sched_clock(u64 (*func)(void)) } /* These are in entry.S */ -extern void native_iret(void); - static struct resource reserve_ioports = { .start = 0, .end = IO_SPACE_LIMIT, @@ -414,8 +397,6 @@ struct paravirt_patch_template pv_ops = { #ifdef CONFIG_PARAVIRT_XXL NOKPROBE_SYMBOL(native_load_idt); - -void (*paravirt_iret)(void) = native_iret; #endif EXPORT_SYMBOL(pv_ops); diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index e131d71b3cae..b370767f5b19 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -747,7 +747,7 @@ bool xen_set_default_idle(void) } #endif -void stop_this_cpu(void *dummy) +void __noreturn stop_this_cpu(void *dummy) { local_irq_disable(); /* diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 3402edec236c..e459253649be 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -681,7 +681,7 @@ void set_personality_64bit(void) static void __set_personality_x32(void) { -#ifdef CONFIG_X86_X32 +#ifdef CONFIG_X86_X32_ABI if (current->mm) current->mm->context.flags = 0; diff --git a/arch/x86/kernel/relocate_kernel_64.S b/arch/x86/kernel/relocate_kernel_64.S index 399f075ccdc4..c1d8626c53b6 100644 --- a/arch/x86/kernel/relocate_kernel_64.S +++ b/arch/x86/kernel/relocate_kernel_64.S @@ -42,6 +42,7 @@ .code64 SYM_CODE_START_NOALIGN(relocate_kernel) UNWIND_HINT_EMPTY + ANNOTATE_NOENDBR /* * %rdi indirection_page * %rsi page_list @@ -115,6 +116,14 @@ SYM_CODE_START_LOCAL_NOALIGN(identity_mapped) pushq %rdx /* + * Clear X86_CR4_CET (if it was set) such that we can clear CR0_WP + * below. + */ + movq %cr4, %rax + andq $~(X86_CR4_CET), %rax + movq %rax, %cr4 + + /* * Set cr0 to a known state: * - Paging enabled * - Alignment check disabled @@ -215,6 +224,7 @@ SYM_CODE_END(identity_mapped) SYM_CODE_START_LOCAL_NOALIGN(virtual_mapped) UNWIND_HINT_EMPTY + ANNOTATE_NOENDBR // RET target, above movq RSP(%r8), %rsp movq CR4(%r8), %rax movq %rax, %cr4 diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 2e37862e3a8c..1563fb995005 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -210,6 +210,81 @@ DEFINE_IDTENTRY(exc_overflow) do_error_trap(regs, 0, "overflow", X86_TRAP_OF, SIGSEGV, 0, NULL); } +#ifdef CONFIG_X86_KERNEL_IBT + +static __ro_after_init bool ibt_fatal = true; + +extern void ibt_selftest_ip(void); /* code label defined in asm below */ + +enum cp_error_code { + CP_EC = (1 << 15) - 1, + + CP_RET = 1, + CP_IRET = 2, + CP_ENDBR = 3, + CP_RSTRORSSP = 4, + CP_SETSSBSY = 5, + + CP_ENCL = 1 << 15, +}; + +DEFINE_IDTENTRY_ERRORCODE(exc_control_protection) +{ + if (!cpu_feature_enabled(X86_FEATURE_IBT)) { + pr_err("Unexpected #CP\n"); + BUG(); + } + + if (WARN_ON_ONCE(user_mode(regs) || (error_code & CP_EC) != CP_ENDBR)) + return; + + if (unlikely(regs->ip == (unsigned long)&ibt_selftest_ip)) { + regs->ax = 0; + return; + } + + pr_err("Missing ENDBR: %pS\n", (void *)instruction_pointer(regs)); + if (!ibt_fatal) { + printk(KERN_DEFAULT CUT_HERE); + __warn(__FILE__, __LINE__, (void *)regs->ip, TAINT_WARN, regs, NULL); + return; + } + BUG(); +} + +/* Must be noinline to ensure uniqueness of ibt_selftest_ip. */ +noinline bool ibt_selftest(void) +{ + unsigned long ret; + + asm (" lea ibt_selftest_ip(%%rip), %%rax\n\t" + ANNOTATE_RETPOLINE_SAFE + " jmp *%%rax\n\t" + "ibt_selftest_ip:\n\t" + UNWIND_HINT_FUNC + ANNOTATE_NOENDBR + " nop\n\t" + + : "=a" (ret) : : "memory"); + + return !ret; +} + +static int __init ibt_setup(char *str) +{ + if (!strcmp(str, "off")) + setup_clear_cpu_cap(X86_FEATURE_IBT); + + if (!strcmp(str, "warn")) + ibt_fatal = false; + + return 1; +} + +__setup("ibt=", ibt_setup); + +#endif /* CONFIG_X86_KERNEL_IBT */ + #ifdef CONFIG_X86_F00F_BUG void handle_invalid_op(struct pt_regs *regs) #else diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S index 27f830345b6f..7fda7f27e762 100644 --- a/arch/x86/kernel/vmlinux.lds.S +++ b/arch/x86/kernel/vmlinux.lds.S @@ -285,6 +285,15 @@ SECTIONS } #endif +#ifdef CONFIG_X86_KERNEL_IBT + . = ALIGN(8); + .ibt_endbr_seal : AT(ADDR(.ibt_endbr_seal) - LOAD_OFFSET) { + __ibt_endbr_seal = .; + *(.ibt_endbr_seal) + __ibt_endbr_seal_end = .; + } +#endif + /* * struct alt_inst entries. From the header (alternative.h): * "Alternative instructions for different CPU types or capabilities" diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 7ba4ec77feeb..f9c00c89829b 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -24,6 +24,7 @@ #include <linux/stringify.h> #include <asm/debugreg.h> #include <asm/nospec-branch.h> +#include <asm/ibt.h> #include "x86.h" #include "tss.h" @@ -189,7 +190,7 @@ #define X16(x...) X8(x), X8(x) #define NR_FASTOP (ilog2(sizeof(ulong)) + 1) -#define FASTOP_SIZE 8 +#define FASTOP_SIZE (8 * (1 + HAS_KERNEL_IBT)) struct opcode { u64 flags; @@ -311,7 +312,8 @@ static int fastop(struct x86_emulate_ctxt *ctxt, fastop_t fop); #define __FOP_FUNC(name) \ ".align " __stringify(FASTOP_SIZE) " \n\t" \ ".type " name ", @function \n\t" \ - name ":\n\t" + name ":\n\t" \ + ASM_ENDBR #define FOP_FUNC(name) \ __FOP_FUNC(#name) @@ -433,21 +435,23 @@ static int fastop(struct x86_emulate_ctxt *ctxt, fastop_t fop); /* * Depending on .config the SETcc functions look like: * + * ENDBR [4 bytes; CONFIG_X86_KERNEL_IBT] * SETcc %al [3 bytes] * RET [1 byte] * INT3 [1 byte; CONFIG_SLS] * - * Which gives possible sizes 4 or 5. When rounded up to the - * next power-of-two alignment they become 4 or 8. + * Which gives possible sizes 4, 5, 8 or 9. When rounded up to the + * next power-of-two alignment they become 4, 8 or 16 resp. */ -#define SETCC_LENGTH (4 + IS_ENABLED(CONFIG_SLS)) -#define SETCC_ALIGN (4 << IS_ENABLED(CONFIG_SLS)) +#define SETCC_LENGTH (ENDBR_INSN_SIZE + 4 + IS_ENABLED(CONFIG_SLS)) +#define SETCC_ALIGN (4 << IS_ENABLED(CONFIG_SLS) << HAS_KERNEL_IBT) static_assert(SETCC_LENGTH <= SETCC_ALIGN); #define FOP_SETCC(op) \ ".align " __stringify(SETCC_ALIGN) " \n\t" \ ".type " #op ", @function \n\t" \ #op ": \n\t" \ + ASM_ENDBR \ #op " %al \n\t" \ __FOP_RET(#op) diff --git a/arch/x86/lib/error-inject.c b/arch/x86/lib/error-inject.c index 520897061ee0..1e3de0769b81 100644 --- a/arch/x86/lib/error-inject.c +++ b/arch/x86/lib/error-inject.c @@ -3,6 +3,7 @@ #include <linux/linkage.h> #include <linux/error-injection.h> #include <linux/kprobes.h> +#include <linux/objtool.h> asmlinkage void just_return_func(void); @@ -11,6 +12,7 @@ asm( ".type just_return_func, @function\n" ".globl just_return_func\n" "just_return_func:\n" + ANNOTATE_NOENDBR ASM_RET ".size just_return_func, .-just_return_func\n" ); diff --git a/arch/x86/lib/retpoline.S b/arch/x86/lib/retpoline.S index afbdda539b80..5f87bab4fb8d 100644 --- a/arch/x86/lib/retpoline.S +++ b/arch/x86/lib/retpoline.S @@ -55,6 +55,7 @@ SYM_INNER_LABEL(__x86_indirect_thunk_\reg, SYM_L_GLOBAL) .align RETPOLINE_THUNK_SIZE SYM_CODE_START(__x86_indirect_thunk_array) + ANNOTATE_NOENDBR // apply_retpolines #define GEN(reg) THUNK reg #include <asm/GEN-for-each-reg.h> diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index 6efbb87f65ed..8fe35ed11fd6 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -46,6 +46,12 @@ static u8 *emit_code(u8 *ptr, u32 bytes, unsigned int len) #define EMIT4_off32(b1, b2, b3, b4, off) \ do { EMIT4(b1, b2, b3, b4); EMIT(off, 4); } while (0) +#ifdef CONFIG_X86_KERNEL_IBT +#define EMIT_ENDBR() EMIT(gen_endbr(), 4) +#else +#define EMIT_ENDBR() +#endif + static bool is_imm8(int value) { return value <= 127 && value >= -128; @@ -241,7 +247,7 @@ struct jit_context { /* Number of bytes emit_patch() needs to generate instructions */ #define X86_PATCH_SIZE 5 /* Number of bytes that will be skipped on tailcall */ -#define X86_TAIL_CALL_OFFSET 11 +#define X86_TAIL_CALL_OFFSET (11 + ENDBR_INSN_SIZE) static void push_callee_regs(u8 **pprog, bool *callee_regs_used) { @@ -286,6 +292,7 @@ static void emit_prologue(u8 **pprog, u32 stack_depth, bool ebpf_from_cbpf, /* BPF trampoline can be made to work without these nops, * but let's waste 5 bytes for now and optimize later */ + EMIT_ENDBR(); memcpy(prog, x86_nops[5], X86_PATCH_SIZE); prog += X86_PATCH_SIZE; if (!ebpf_from_cbpf) { @@ -296,6 +303,10 @@ static void emit_prologue(u8 **pprog, u32 stack_depth, bool ebpf_from_cbpf, } EMIT1(0x55); /* push rbp */ EMIT3(0x48, 0x89, 0xE5); /* mov rbp, rsp */ + + /* X86_TAIL_CALL_OFFSET is here */ + EMIT_ENDBR(); + /* sub rsp, rounded_stack_depth */ if (stack_depth) EMIT3_off32(0x48, 0x81, 0xEC, round_up(stack_depth, 8)); @@ -380,6 +391,13 @@ int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type t, /* BPF poking in modules is not supported */ return -EINVAL; + /* + * See emit_prologue(), for IBT builds the trampoline hook is preceded + * with an ENDBR instruction. + */ + if (is_endbr(*(u32 *)ip)) + ip += ENDBR_INSN_SIZE; + return __bpf_arch_text_poke(ip, t, old_addr, new_addr); } @@ -2013,14 +2031,18 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i ip_off = stack_size; - if (flags & BPF_TRAMP_F_SKIP_FRAME) + if (flags & BPF_TRAMP_F_SKIP_FRAME) { /* skip patched call instruction and point orig_call to actual * body of the kernel function. */ + if (is_endbr(*(u32 *)orig_call)) + orig_call += ENDBR_INSN_SIZE; orig_call += X86_PATCH_SIZE; + } prog = image; + EMIT_ENDBR(); EMIT1(0x55); /* push rbp */ EMIT3(0x48, 0x89, 0xE5); /* mov rbp, rsp */ EMIT4(0x48, 0x83, 0xEC, stack_size); /* sub rsp, stack_size */ diff --git a/arch/x86/platform/efi/Makefile b/arch/x86/platform/efi/Makefile index 84b09c230cbd..a50245157685 100644 --- a/arch/x86/platform/efi/Makefile +++ b/arch/x86/platform/efi/Makefile @@ -1,5 +1,4 @@ # SPDX-License-Identifier: GPL-2.0 -OBJECT_FILES_NON_STANDARD_efi_thunk_$(BITS).o := y KASAN_SANITIZE := n GCOV_PROFILE := n diff --git a/arch/x86/platform/efi/efi_thunk_64.S b/arch/x86/platform/efi/efi_thunk_64.S index 25799d768624..854dd81804b7 100644 --- a/arch/x86/platform/efi/efi_thunk_64.S +++ b/arch/x86/platform/efi/efi_thunk_64.S @@ -20,12 +20,14 @@ */ #include <linux/linkage.h> +#include <linux/objtool.h> #include <asm/page_types.h> #include <asm/segment.h> .text .code64 -SYM_CODE_START(__efi64_thunk) +SYM_FUNC_START(__efi64_thunk) +STACK_FRAME_NON_STANDARD __efi64_thunk push %rbp push %rbx @@ -79,7 +81,7 @@ SYM_CODE_START(__efi64_thunk) 2: pushl $__KERNEL_CS pushl %ebp lret -SYM_CODE_END(__efi64_thunk) +SYM_FUNC_END(__efi64_thunk) .bss .balign 8 diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c index d47c3d176ae4..5038edb79ad5 100644 --- a/arch/x86/xen/enlighten_pv.c +++ b/arch/x86/xen/enlighten_pv.c @@ -624,6 +624,9 @@ static struct trap_array_entry trap_array[] = { TRAP_ENTRY(exc_coprocessor_error, false ), TRAP_ENTRY(exc_alignment_check, false ), TRAP_ENTRY(exc_simd_coprocessor_error, false ), +#ifdef CONFIG_X86_KERNEL_IBT + TRAP_ENTRY(exc_control_protection, false ), +#endif }; static bool __ref get_trap_addr(void **addr, unsigned int ist) @@ -1177,6 +1180,8 @@ static void __init xen_domu_set_legacy_features(void) x86_platform.legacy.rtc = 0; } +extern void early_xen_iret_patch(void); + /* First C function to be called on Xen boot */ asmlinkage __visible void __init xen_start_kernel(void) { @@ -1187,6 +1192,10 @@ asmlinkage __visible void __init xen_start_kernel(void) if (!xen_start_info) return; + __text_gen_insn(&early_xen_iret_patch, + JMP32_INSN_OPCODE, &early_xen_iret_patch, &xen_iret, + JMP32_INSN_SIZE); + xen_domain_type = XEN_PV_DOMAIN; xen_start_flags = xen_start_info->flags; @@ -1195,7 +1204,6 @@ asmlinkage __visible void __init xen_start_kernel(void) /* Install Xen paravirt ops */ pv_info = xen_info; pv_ops.cpu = xen_cpu_ops.cpu; - paravirt_iret = xen_iret; xen_init_irq_ops(); /* diff --git a/arch/x86/xen/xen-asm.S b/arch/x86/xen/xen-asm.S index e730e6200e64..caa9bc2fa100 100644 --- a/arch/x86/xen/xen-asm.S +++ b/arch/x86/xen/xen-asm.S @@ -122,6 +122,7 @@ SYM_FUNC_END(xen_read_cr2_direct); .macro xen_pv_trap name SYM_CODE_START(xen_\name) UNWIND_HINT_EMPTY + ENDBR pop %rcx pop %r11 jmp \name @@ -147,6 +148,9 @@ xen_pv_trap asm_exc_page_fault xen_pv_trap asm_exc_spurious_interrupt_bug xen_pv_trap asm_exc_coprocessor_error xen_pv_trap asm_exc_alignment_check +#ifdef CONFIG_X86_KERNEL_IBT +xen_pv_trap asm_exc_control_protection +#endif #ifdef CONFIG_X86_MCE xen_pv_trap asm_xenpv_exc_machine_check #endif /* CONFIG_X86_MCE */ @@ -162,6 +166,7 @@ SYM_CODE_START(xen_early_idt_handler_array) i = 0 .rept NUM_EXCEPTION_VECTORS UNWIND_HINT_EMPTY + ENDBR pop %rcx pop %r11 jmp early_idt_handler_array + i*EARLY_IDT_HANDLER_SIZE @@ -189,6 +194,7 @@ hypercall_iret = hypercall_page + __HYPERVISOR_iret * 32 */ SYM_CODE_START(xen_iret) UNWIND_HINT_EMPTY + ANNOTATE_NOENDBR pushq $0 jmp hypercall_iret SYM_CODE_END(xen_iret) @@ -230,6 +236,7 @@ SYM_CODE_END(xenpv_restore_regs_and_return_to_usermode) /* Normal 64-bit system call target */ SYM_CODE_START(xen_syscall_target) UNWIND_HINT_EMPTY + ENDBR popq %rcx popq %r11 @@ -249,6 +256,7 @@ SYM_CODE_END(xen_syscall_target) /* 32-bit compat syscall target */ SYM_CODE_START(xen_syscall32_target) UNWIND_HINT_EMPTY + ENDBR popq %rcx popq %r11 @@ -266,6 +274,7 @@ SYM_CODE_END(xen_syscall32_target) /* 32-bit compat sysenter target */ SYM_CODE_START(xen_sysenter_target) UNWIND_HINT_EMPTY + ENDBR /* * NB: Xen is polite and clears TF from EFLAGS for us. This means * that we don't need to guard against single step exceptions here. @@ -289,6 +298,7 @@ SYM_CODE_END(xen_sysenter_target) SYM_CODE_START(xen_syscall32_target) SYM_CODE_START(xen_sysenter_target) UNWIND_HINT_EMPTY + ENDBR lea 16(%rsp), %rsp /* strip %rcx, %r11 */ mov $-ENOSYS, %rax pushq $0 diff --git a/arch/x86/xen/xen-head.S b/arch/x86/xen/xen-head.S index 11d286529fe5..ac17196e2518 100644 --- a/arch/x86/xen/xen-head.S +++ b/arch/x86/xen/xen-head.S @@ -25,8 +25,12 @@ SYM_CODE_START(hypercall_page) .rept (PAGE_SIZE / 32) UNWIND_HINT_FUNC - .skip 31, 0x90 - RET + ANNOTATE_NOENDBR + ret + /* + * Xen will write the hypercall page, and sort out ENDBR. + */ + .skip 31, 0xcc .endr #define HYPERCALL(n) \ @@ -74,6 +78,7 @@ SYM_CODE_END(startup_xen) .pushsection .text SYM_CODE_START(asm_cpu_bringup_and_idle) UNWIND_HINT_EMPTY + ENDBR call cpu_bringup_and_idle SYM_CODE_END(asm_cpu_bringup_and_idle) diff --git a/fs/fuse/ioctl.c b/fs/fuse/ioctl.c index df58966bc874..33cde4bbccdc 100644 --- a/fs/fuse/ioctl.c +++ b/fs/fuse/ioctl.c @@ -170,7 +170,7 @@ long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg, #else if (flags & FUSE_IOCTL_COMPAT) { inarg.flags |= FUSE_IOCTL_32BIT; -#ifdef CONFIG_X86_X32 +#ifdef CONFIG_X86_X32_ABI if (in_x32_syscall()) inarg.flags |= FUSE_IOCTL_COMPAT_X32; #endif diff --git a/fs/xfs/xfs_ioctl32.c b/fs/xfs/xfs_ioctl32.c index 004ed2a251e8..ca25ed89b706 100644 --- a/fs/xfs/xfs_ioctl32.c +++ b/fs/xfs/xfs_ioctl32.c @@ -217,7 +217,7 @@ xfs_compat_ioc_fsbulkstat( inumbers_fmt_pf inumbers_func = xfs_fsinumbers_fmt_compat; bulkstat_one_fmt_pf bs_one_func = xfs_fsbulkstat_one_fmt_compat; -#ifdef CONFIG_X86_X32 +#ifdef CONFIG_X86_X32_ABI if (in_x32_syscall()) { /* * ... but on x32 the input xfs_fsop_bulkreq has pointers diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index 2a10db2f0bc5..69138e9db787 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -394,6 +394,7 @@ KEEP(*(__jump_table)) \ __stop___jump_table = .; +#ifdef CONFIG_HAVE_STATIC_CALL_INLINE #define STATIC_CALL_DATA \ . = ALIGN(8); \ __start_static_call_sites = .; \ @@ -402,6 +403,9 @@ __start_static_call_tramp_key = .; \ KEEP(*(.static_call_tramp_key)) \ __stop_static_call_tramp_key = .; +#else +#define STATIC_CALL_DATA +#endif /* * Allow architectures to handle ro_after_init data on their diff --git a/include/linux/cfi.h b/include/linux/cfi.h index 879744aaa6e0..c6dfc1ed0626 100644 --- a/include/linux/cfi.h +++ b/include/linux/cfi.h @@ -34,8 +34,17 @@ static inline void cfi_module_remove(struct module *mod, unsigned long base_addr #else /* !CONFIG_CFI_CLANG */ -#define __CFI_ADDRESSABLE(fn, __attr) +#ifdef CONFIG_X86_KERNEL_IBT + +#define __CFI_ADDRESSABLE(fn, __attr) \ + const void *__cfi_jt_ ## fn __visible __attr = (void *)&fn + +#endif /* CONFIG_X86_KERNEL_IBT */ #endif /* CONFIG_CFI_CLANG */ +#ifndef __CFI_ADDRESSABLE +#define __CFI_ADDRESSABLE(fn, __attr) +#endif + #endif /* _LINUX_CFI_H */ diff --git a/include/linux/compiler.h b/include/linux/compiler.h index 0f7fd205ab7e..219aa5ddbc73 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -125,18 +125,11 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val, }) #define annotate_unreachable() __annotate_unreachable(__COUNTER__) -#define ASM_REACHABLE \ - "998:\n\t" \ - ".pushsection .discard.reachable\n\t" \ - ".long 998b - .\n\t" \ - ".popsection\n\t" - /* Annotate a C jump table to allow objtool to follow the code flow */ #define __annotate_jump_table __section(".rodata..c_jump_table") #else #define annotate_unreachable() -# define ASM_REACHABLE #define __annotate_jump_table #endif diff --git a/include/linux/cred.h b/include/linux/cred.h index fcbc6885cc09..9ed9232af934 100644 --- a/include/linux/cred.h +++ b/include/linux/cred.h @@ -176,7 +176,7 @@ extern int set_cred_ucounts(struct cred *); * check for validity of credentials */ #ifdef CONFIG_DEBUG_CREDENTIALS -extern void __invalid_creds(const struct cred *, const char *, unsigned); +extern void __noreturn __invalid_creds(const struct cred *, const char *, unsigned); extern void __validate_process_creds(struct task_struct *, const char *, unsigned); diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h index 5f1859836deb..312ff997c743 100644 --- a/include/linux/kprobes.h +++ b/include/linux/kprobes.h @@ -265,7 +265,6 @@ extern int arch_init_kprobes(void); extern void kprobes_inc_nmissed_count(struct kprobe *p); extern bool arch_within_kprobe_blacklist(unsigned long addr); extern int arch_populate_kprobe_blacklist(void); -extern bool arch_kprobe_on_func_entry(unsigned long offset); extern int kprobe_on_func_entry(kprobe_opcode_t *addr, const char *sym, unsigned long offset); extern bool within_kprobe_blacklist(unsigned long addr); @@ -384,6 +383,8 @@ static inline struct kprobe_ctlblk *get_kprobe_ctlblk(void) } kprobe_opcode_t *kprobe_lookup_name(const char *name, unsigned int offset); +kprobe_opcode_t *arch_adjust_kprobe_addr(unsigned long addr, unsigned long offset, bool *on_func_entry); + int register_kprobe(struct kprobe *p); void unregister_kprobe(struct kprobe *p); int register_kprobes(struct kprobe **kps, int num); diff --git a/include/linux/objtool.h b/include/linux/objtool.h index aca52db2f3f3..586d35720f13 100644 --- a/include/linux/objtool.h +++ b/include/linux/objtool.h @@ -77,6 +77,18 @@ struct unwind_hint { #define STACK_FRAME_NON_STANDARD_FP(func) #endif +#define ANNOTATE_NOENDBR \ + "986: \n\t" \ + ".pushsection .discard.noendbr\n\t" \ + _ASM_PTR " 986b\n\t" \ + ".popsection\n\t" + +#define ASM_REACHABLE \ + "998:\n\t" \ + ".pushsection .discard.reachable\n\t" \ + ".long 998b - .\n\t" \ + ".popsection\n\t" + #else /* __ASSEMBLY__ */ /* @@ -129,6 +141,20 @@ struct unwind_hint { .popsection .endm +.macro ANNOTATE_NOENDBR +.Lhere_\@: + .pushsection .discard.noendbr + .quad .Lhere_\@ + .popsection +.endm + +.macro REACHABLE +.Lhere_\@: + .pushsection .discard.reachable + .long .Lhere_\@ - . + .popsection +.endm + #endif /* __ASSEMBLY__ */ #else /* !CONFIG_STACK_VALIDATION */ @@ -139,12 +165,18 @@ struct unwind_hint { "\n\t" #define STACK_FRAME_NON_STANDARD(func) #define STACK_FRAME_NON_STANDARD_FP(func) +#define ANNOTATE_NOENDBR +#define ASM_REACHABLE #else #define ANNOTATE_INTRA_FUNCTION_CALL .macro UNWIND_HINT sp_reg:req sp_offset=0 type:req end=0 .endm .macro STACK_FRAME_NON_STANDARD func:req .endm +.macro ANNOTATE_NOENDBR +.endm +.macro REACHABLE +.endm #endif #endif /* CONFIG_STACK_VALIDATION */ diff --git a/include/linux/sched/task.h b/include/linux/sched/task.h index e84e54d1b490..719c9a6cac8d 100644 --- a/include/linux/sched/task.h +++ b/include/linux/sched/task.h @@ -79,7 +79,7 @@ static inline void exit_thread(struct task_struct *tsk) { } #endif -extern void do_group_exit(int); +extern __noreturn void do_group_exit(int); extern void exit_files(struct task_struct *); extern void exit_itimers(struct signal_struct *); diff --git a/kernel/bpf/trampoline.c b/kernel/bpf/trampoline.c index 0b41fa993825..ada97751ae1b 100644 --- a/kernel/bpf/trampoline.c +++ b/kernel/bpf/trampoline.c @@ -117,18 +117,6 @@ static void bpf_trampoline_module_put(struct bpf_trampoline *tr) tr->mod = NULL; } -static int is_ftrace_location(void *ip) -{ - long addr; - - addr = ftrace_location((long)ip); - if (!addr) - return 0; - if (WARN_ON_ONCE(addr != (long)ip)) - return -EFAULT; - return 1; -} - static int unregister_fentry(struct bpf_trampoline *tr, void *old_addr) { void *ip = tr->func.addr; @@ -160,12 +148,12 @@ static int modify_fentry(struct bpf_trampoline *tr, void *old_addr, void *new_ad static int register_fentry(struct bpf_trampoline *tr, void *new_addr) { void *ip = tr->func.addr; + unsigned long faddr; int ret; - ret = is_ftrace_location(ip); - if (ret < 0) - return ret; - tr->func.ftrace_managed = ret; + faddr = ftrace_location((unsigned long)ip); + if (faddr) + tr->func.ftrace_managed = true; if (bpf_trampoline_module_get(tr)) return -ENOENT; diff --git a/kernel/cred.c b/kernel/cred.c index 933155c96922..e10c15f51c1f 100644 --- a/kernel/cred.c +++ b/kernel/cred.c @@ -870,7 +870,7 @@ static void dump_invalid_creds(const struct cred *cred, const char *label, /* * report use of invalid credentials */ -void __invalid_creds(const struct cred *cred, const char *file, unsigned line) +void __noreturn __invalid_creds(const struct cred *cred, const char *file, unsigned line) { printk(KERN_ERR "CRED: Invalid credentials\n"); printk(KERN_ERR "CRED: At %s:%u\n", file, line); diff --git a/kernel/exit.c b/kernel/exit.c index 9b1862488bf2..c8ce55541a25 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -895,7 +895,7 @@ SYSCALL_DEFINE1(exit, int, error_code) * Take down every thread in the group. This is called by fatal signals * as well as by sys_exit_group (below). */ -void +void __noreturn do_group_exit(int exit_code) { struct signal_struct *sig = current->signal; diff --git a/kernel/kprobes.c b/kernel/kprobes.c index 94cab8c9ce56..185badc780b7 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c @@ -1489,24 +1489,68 @@ bool within_kprobe_blacklist(unsigned long addr) } /* + * arch_adjust_kprobe_addr - adjust the address + * @addr: symbol base address + * @offset: offset within the symbol + * @on_func_entry: was this @addr+@offset on the function entry + * + * Typically returns @addr + @offset, except for special cases where the + * function might be prefixed by a CFI landing pad, in that case any offset + * inside the landing pad is mapped to the first 'real' instruction of the + * symbol. + * + * Specifically, for things like IBT/BTI, skip the resp. ENDBR/BTI.C + * instruction at +0. + */ +kprobe_opcode_t *__weak arch_adjust_kprobe_addr(unsigned long addr, + unsigned long offset, + bool *on_func_entry) +{ + *on_func_entry = !offset; + return (kprobe_opcode_t *)(addr + offset); +} + +/* * If 'symbol_name' is specified, look it up and add the 'offset' * to it. This way, we can specify a relative address to a symbol. * This returns encoded errors if it fails to look up symbol or invalid * combination of parameters. */ -static kprobe_opcode_t *_kprobe_addr(kprobe_opcode_t *addr, - const char *symbol_name, unsigned int offset) +static kprobe_opcode_t * +_kprobe_addr(kprobe_opcode_t *addr, const char *symbol_name, + unsigned long offset, bool *on_func_entry) { if ((symbol_name && addr) || (!symbol_name && !addr)) goto invalid; if (symbol_name) { + /* + * Input: @sym + @offset + * Output: @addr + @offset + * + * NOTE: kprobe_lookup_name() does *NOT* fold the offset + * argument into it's output! + */ addr = kprobe_lookup_name(symbol_name, offset); if (!addr) return ERR_PTR(-ENOENT); } - addr = (kprobe_opcode_t *)(((char *)addr) + offset); + /* + * So here we have @addr + @offset, displace it into a new + * @addr' + @offset' where @addr' is the symbol start address. + */ + addr = (void *)addr + offset; + if (!kallsyms_lookup_size_offset((unsigned long)addr, NULL, &offset)) + return ERR_PTR(-ENOENT); + addr = (void *)addr - offset; + + /* + * Then ask the architecture to re-combine them, taking care of + * magical function entry details while telling us if this was indeed + * at the start of the function. + */ + addr = arch_adjust_kprobe_addr((unsigned long)addr, offset, on_func_entry); if (addr) return addr; @@ -1516,7 +1560,8 @@ invalid: static kprobe_opcode_t *kprobe_addr(struct kprobe *p) { - return _kprobe_addr(p->addr, p->symbol_name, p->offset); + bool on_func_entry; + return _kprobe_addr(p->addr, p->symbol_name, p->offset, &on_func_entry); } /* @@ -1562,14 +1607,10 @@ static inline int warn_kprobe_rereg(struct kprobe *p) static int check_ftrace_location(struct kprobe *p) { - unsigned long ftrace_addr; + unsigned long addr = (unsigned long)p->addr; - ftrace_addr = ftrace_location((unsigned long)p->addr); - if (ftrace_addr) { + if (ftrace_location(addr) == addr) { #ifdef CONFIG_KPROBES_ON_FTRACE - /* Given address is not on the instruction boundary */ - if ((unsigned long)p->addr != ftrace_addr) - return -EILSEQ; p->flags |= KPROBE_FLAG_FTRACE; #else /* !CONFIG_KPROBES_ON_FTRACE */ return -EINVAL; @@ -2047,11 +2088,6 @@ static int pre_handler_kretprobe(struct kprobe *p, struct pt_regs *regs) } NOKPROBE_SYMBOL(pre_handler_kretprobe); -bool __weak arch_kprobe_on_func_entry(unsigned long offset) -{ - return !offset; -} - /** * kprobe_on_func_entry() -- check whether given address is function entry * @addr: Target address @@ -2067,15 +2103,13 @@ bool __weak arch_kprobe_on_func_entry(unsigned long offset) */ int kprobe_on_func_entry(kprobe_opcode_t *addr, const char *sym, unsigned long offset) { - kprobe_opcode_t *kp_addr = _kprobe_addr(addr, sym, offset); + bool on_func_entry; + kprobe_opcode_t *kp_addr = _kprobe_addr(addr, sym, offset, &on_func_entry); if (IS_ERR(kp_addr)) return PTR_ERR(kp_addr); - if (!kallsyms_lookup_size_offset((unsigned long)kp_addr, NULL, &offset)) - return -ENOENT; - - if (!arch_kprobe_on_func_entry(offset)) + if (!on_func_entry) return -EINVAL; return 0; diff --git a/kernel/livepatch/patch.c b/kernel/livepatch/patch.c index fe316c021d73..c172bf92b576 100644 --- a/kernel/livepatch/patch.c +++ b/kernel/livepatch/patch.c @@ -124,19 +124,6 @@ unlock: ftrace_test_recursion_unlock(bit); } -/* - * Convert a function address into the appropriate ftrace location. - * - * Usually this is just the address of the function, but on some architectures - * it's more complicated so allow them to provide a custom behaviour. - */ -#ifndef klp_get_ftrace_location -static unsigned long klp_get_ftrace_location(unsigned long faddr) -{ - return faddr; -} -#endif - static void klp_unpatch_func(struct klp_func *func) { struct klp_ops *ops; @@ -153,8 +140,7 @@ static void klp_unpatch_func(struct klp_func *func) if (list_is_singular(&ops->func_stack)) { unsigned long ftrace_loc; - ftrace_loc = - klp_get_ftrace_location((unsigned long)func->old_func); + ftrace_loc = ftrace_location((unsigned long)func->old_func); if (WARN_ON(!ftrace_loc)) return; @@ -186,8 +172,7 @@ static int klp_patch_func(struct klp_func *func) if (!ops) { unsigned long ftrace_loc; - ftrace_loc = - klp_get_ftrace_location((unsigned long)func->old_func); + ftrace_loc = ftrace_location((unsigned long)func->old_func); if (!ftrace_loc) { pr_err("failed to find location for function '%s'\n", func->old_name); diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 2e114659f7f8..4f1d2f5e7263 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -1568,17 +1568,34 @@ unsigned long ftrace_location_range(unsigned long start, unsigned long end) } /** - * ftrace_location - return true if the ip giving is a traced location + * ftrace_location - return the ftrace location * @ip: the instruction pointer to check * - * Returns rec->ip if @ip given is a pointer to a ftrace location. - * That is, the instruction that is either a NOP or call to - * the function tracer. It checks the ftrace internal tables to - * determine if the address belongs or not. + * If @ip matches the ftrace location, return @ip. + * If @ip matches sym+0, return sym's ftrace location. + * Otherwise, return 0. */ unsigned long ftrace_location(unsigned long ip) { - return ftrace_location_range(ip, ip); + struct dyn_ftrace *rec; + unsigned long offset; + unsigned long size; + + rec = lookup_rec(ip, ip); + if (!rec) { + if (!kallsyms_lookup_size_offset(ip, &size, &offset)) + goto out; + + /* map sym+0 to __fentry__ */ + if (!offset) + rec = lookup_rec(ip, ip + size - 1); + } + + if (rec) + return rec->ip; + +out: + return 0; } /** @@ -4962,7 +4979,8 @@ __ftrace_match_addr(struct ftrace_hash *hash, unsigned long ip, int remove) { struct ftrace_func_entry *entry; - if (!ftrace_location(ip)) + ip = ftrace_location(ip); + if (!ip) return -EINVAL; if (remove) { @@ -5131,11 +5149,16 @@ int register_ftrace_direct(unsigned long ip, unsigned long addr) struct ftrace_func_entry *entry; struct ftrace_hash *free_hash = NULL; struct dyn_ftrace *rec; - int ret = -EBUSY; + int ret = -ENODEV; mutex_lock(&direct_mutex); + ip = ftrace_location(ip); + if (!ip) + goto out_unlock; + /* See if there's a direct function at @ip already */ + ret = -EBUSY; if (ftrace_find_rec_direct(ip)) goto out_unlock; @@ -5243,6 +5266,10 @@ int unregister_ftrace_direct(unsigned long ip, unsigned long addr) mutex_lock(&direct_mutex); + ip = ftrace_location(ip); + if (!ip) + goto out_unlock; + entry = find_direct_entry(&ip, NULL); if (!entry) goto out_unlock; @@ -5375,6 +5402,11 @@ int modify_ftrace_direct(unsigned long ip, mutex_lock(&direct_mutex); mutex_lock(&ftrace_lock); + + ip = ftrace_location(ip); + if (!ip) + goto out_unlock; + entry = find_direct_entry(&ip, &rec); if (!entry) goto out_unlock; diff --git a/samples/ftrace/ftrace-direct-modify.c b/samples/ftrace/ftrace-direct-modify.c index 2c7c31893551..39146fa83e20 100644 --- a/samples/ftrace/ftrace-direct-modify.c +++ b/samples/ftrace/ftrace-direct-modify.c @@ -24,20 +24,25 @@ static unsigned long my_ip = (unsigned long)schedule; #ifdef CONFIG_X86_64 +#include <asm/ibt.h> + asm ( " .pushsection .text, \"ax\", @progbits\n" " .type my_tramp1, @function\n" " .globl my_tramp1\n" " my_tramp1:" + ASM_ENDBR " pushq %rbp\n" " movq %rsp, %rbp\n" " call my_direct_func1\n" " leave\n" " .size my_tramp1, .-my_tramp1\n" ASM_RET + " .type my_tramp2, @function\n" " .globl my_tramp2\n" " my_tramp2:" + ASM_ENDBR " pushq %rbp\n" " movq %rsp, %rbp\n" " call my_direct_func2\n" diff --git a/samples/ftrace/ftrace-direct-multi-modify.c b/samples/ftrace/ftrace-direct-multi-modify.c index 6f43a39decd0..65aa94d96f4e 100644 --- a/samples/ftrace/ftrace-direct-multi-modify.c +++ b/samples/ftrace/ftrace-direct-multi-modify.c @@ -22,11 +22,14 @@ extern void my_tramp2(void *); #ifdef CONFIG_X86_64 +#include <asm/ibt.h> + asm ( " .pushsection .text, \"ax\", @progbits\n" " .type my_tramp1, @function\n" " .globl my_tramp1\n" " my_tramp1:" + ASM_ENDBR " pushq %rbp\n" " movq %rsp, %rbp\n" " pushq %rdi\n" @@ -34,12 +37,13 @@ asm ( " call my_direct_func1\n" " popq %rdi\n" " leave\n" -" ret\n" + ASM_RET " .size my_tramp1, .-my_tramp1\n" + " .type my_tramp2, @function\n" -"\n" " .globl my_tramp2\n" " my_tramp2:" + ASM_ENDBR " pushq %rbp\n" " movq %rsp, %rbp\n" " pushq %rdi\n" @@ -47,7 +51,7 @@ asm ( " call my_direct_func2\n" " popq %rdi\n" " leave\n" -" ret\n" + ASM_RET " .size my_tramp2, .-my_tramp2\n" " .popsection\n" ); diff --git a/samples/ftrace/ftrace-direct-multi.c b/samples/ftrace/ftrace-direct-multi.c index 2fafc9afcbf0..41ded7c615c7 100644 --- a/samples/ftrace/ftrace-direct-multi.c +++ b/samples/ftrace/ftrace-direct-multi.c @@ -17,11 +17,14 @@ extern void my_tramp(void *); #ifdef CONFIG_X86_64 +#include <asm/ibt.h> + asm ( " .pushsection .text, \"ax\", @progbits\n" " .type my_tramp, @function\n" " .globl my_tramp\n" " my_tramp:" + ASM_ENDBR " pushq %rbp\n" " movq %rsp, %rbp\n" " pushq %rdi\n" @@ -29,7 +32,7 @@ asm ( " call my_direct_func\n" " popq %rdi\n" " leave\n" -" ret\n" + ASM_RET " .size my_tramp, .-my_tramp\n" " .popsection\n" ); diff --git a/samples/ftrace/ftrace-direct-too.c b/samples/ftrace/ftrace-direct-too.c index c93fb0e95173..6690468c5cc2 100644 --- a/samples/ftrace/ftrace-direct-too.c +++ b/samples/ftrace/ftrace-direct-too.c @@ -19,11 +19,14 @@ extern void my_tramp(void *); #ifdef CONFIG_X86_64 +#include <asm/ibt.h> + asm ( " .pushsection .text, \"ax\", @progbits\n" " .type my_tramp, @function\n" " .globl my_tramp\n" " my_tramp:" + ASM_ENDBR " pushq %rbp\n" " movq %rsp, %rbp\n" " pushq %rdi\n" diff --git a/samples/ftrace/ftrace-direct.c b/samples/ftrace/ftrace-direct.c index 8b551e503a48..e8f1e440b9b8 100644 --- a/samples/ftrace/ftrace-direct.c +++ b/samples/ftrace/ftrace-direct.c @@ -16,11 +16,14 @@ extern void my_tramp(void *); #ifdef CONFIG_X86_64 +#include <asm/ibt.h> + asm ( " .pushsection .text, \"ax\", @progbits\n" " .type my_tramp, @function\n" " .globl my_tramp\n" " my_tramp:" + ASM_ENDBR " pushq %rbp\n" " movq %rsp, %rbp\n" " pushq %rdi\n" diff --git a/scripts/Makefile.build b/scripts/Makefile.build index a4b89b757287..2173a6729f30 100644 --- a/scripts/Makefile.build +++ b/scripts/Makefile.build @@ -88,8 +88,8 @@ endif targets-for-modules := $(patsubst %.o, %.mod, $(filter %.o, $(obj-m))) -ifdef CONFIG_LTO_CLANG -targets-for-modules += $(patsubst %.o, %.lto.o, $(filter %.o, $(obj-m))) +ifneq ($(CONFIG_LTO_CLANG)$(CONFIG_X86_KERNEL_IBT),) +targets-for-modules += $(patsubst %.o, %.prelink.o, $(filter %.o, $(obj-m))) endif ifdef need-modorder @@ -230,6 +230,7 @@ objtool := $(objtree)/tools/objtool/objtool objtool_args = \ $(if $(CONFIG_UNWINDER_ORC),orc generate,check) \ $(if $(part-of-module), --module) \ + $(if $(CONFIG_X86_KERNEL_IBT), --lto --ibt) \ $(if $(CONFIG_FRAME_POINTER),, --no-fp) \ $(if $(CONFIG_GCOV_KERNEL)$(CONFIG_LTO_CLANG), --no-unreachable)\ $(if $(CONFIG_RETPOLINE), --retpoline) \ @@ -242,7 +243,7 @@ cmd_gen_objtooldep = $(if $(objtool-enabled), { echo ; echo '$@: $$(wildcard $(o endif # CONFIG_STACK_VALIDATION -ifdef CONFIG_LTO_CLANG +ifneq ($(CONFIG_LTO_CLANG)$(CONFIG_X86_KERNEL_IBT),) # Skip objtool for LLVM bitcode $(obj)/%.o: objtool-enabled := @@ -288,24 +289,24 @@ $(obj)/%.o: $(src)/%.c $(recordmcount_source) FORCE $(call if_changed_rule,cc_o_c) $(call cmd,force_checksrc) -ifdef CONFIG_LTO_CLANG +ifneq ($(CONFIG_LTO_CLANG)$(CONFIG_X86_KERNEL_IBT),) # Module .o files may contain LLVM bitcode, compile them into native code # before ELF processing -quiet_cmd_cc_lto_link_modules = LTO [M] $@ -cmd_cc_lto_link_modules = \ +quiet_cmd_cc_prelink_modules = LD [M] $@ + cmd_cc_prelink_modules = \ $(LD) $(ld_flags) -r -o $@ \ - $(shell [ -s $(@:.lto.o=.o.symversions) ] && \ - echo -T $(@:.lto.o=.o.symversions)) \ + $(shell [ -s $(@:.prelink.o=.o.symversions) ] && \ + echo -T $(@:.prelink.o=.o.symversions)) \ --whole-archive $(filter-out FORCE,$^) \ $(cmd_objtool) # objtool was skipped for LLVM bitcode, run it now that we have compiled # modules into native code -$(obj)/%.lto.o: objtool-enabled = y -$(obj)/%.lto.o: part-of-module := y +$(obj)/%.prelink.o: objtool-enabled = y +$(obj)/%.prelink.o: part-of-module := y -$(obj)/%.lto.o: $(obj)/%.o FORCE - $(call if_changed,cc_lto_link_modules) +$(obj)/%.prelink.o: $(obj)/%.o FORCE + $(call if_changed,cc_prelink_modules) endif cmd_mod = { \ @@ -469,7 +470,7 @@ $(obj)/lib.a: $(lib-y) FORCE # Do not replace $(filter %.o,^) with $(real-prereqs). When a single object # module is turned into a multi object module, $^ will contain header file # dependencies recorded in the .*.cmd file. -ifdef CONFIG_LTO_CLANG +ifneq ($(CONFIG_LTO_CLANG)$(CONFIG_X86_KERNEL_IBT),) quiet_cmd_link_multi-m = AR [M] $@ cmd_link_multi-m = \ $(cmd_update_lto_symversions); \ diff --git a/scripts/Makefile.lib b/scripts/Makefile.lib index 9d5320a47ef8..c593475d4a93 100644 --- a/scripts/Makefile.lib +++ b/scripts/Makefile.lib @@ -225,11 +225,11 @@ dtc_cpp_flags = -Wp,-MMD,$(depfile).pre.tmp -nostdinc \ $(addprefix -I,$(DTC_INCLUDE)) \ -undef -D__DTS__ -ifeq ($(CONFIG_LTO_CLANG),y) +ifneq ($(CONFIG_LTO_CLANG)$(CONFIG_X86_KERNEL_IBT),) # With CONFIG_LTO_CLANG, .o files in modules might be LLVM bitcode, so we # need to run LTO to compile them into native code (.lto.o) before further # processing. -mod-prelink-ext := .lto +mod-prelink-ext := .prelink endif # Useful for describing the dependency of composite objects diff --git a/scripts/link-vmlinux.sh b/scripts/link-vmlinux.sh index 666f7bbc13eb..f704034ebbe6 100755 --- a/scripts/link-vmlinux.sh +++ b/scripts/link-vmlinux.sh @@ -108,14 +108,20 @@ objtool_link() local objtoolcmd; local objtoolopt; - if is_enabled CONFIG_LTO_CLANG && is_enabled CONFIG_STACK_VALIDATION; then + if is_enabled CONFIG_STACK_VALIDATION && \ + ( is_enabled CONFIG_LTO_CLANG || is_enabled CONFIG_X86_KERNEL_IBT ); then + # Don't perform vmlinux validation unless explicitly requested, # but run objtool on vmlinux.o now that we have an object file. if is_enabled CONFIG_UNWINDER_ORC; then objtoolcmd="orc generate" fi - objtoolopt="${objtoolopt} --duplicate" + objtoolopt="${objtoolopt} --lto" + + if is_enabled CONFIG_X86_KERNEL_IBT; then + objtoolopt="${objtoolopt} --ibt" + fi if is_enabled CONFIG_FTRACE_MCOUNT_USE_OBJTOOL; then objtoolopt="${objtoolopt} --mcount" @@ -168,7 +174,7 @@ vmlinux_link() # skip output file argument shift - if is_enabled CONFIG_LTO_CLANG; then + if is_enabled CONFIG_LTO_CLANG || is_enabled CONFIG_X86_KERNEL_IBT; then # Use vmlinux.o instead of performing the slow LTO link again. objs=vmlinux.o libs= diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c index 8a7937452991..d10f93aac1c8 100644 --- a/scripts/mod/modpost.c +++ b/scripts/mod/modpost.c @@ -1996,9 +1996,9 @@ static char *remove_dot(char *s) if (m && (s[n + m] == '.' || s[n + m] == 0)) s[n] = 0; - /* strip trailing .lto */ - if (strends(s, ".lto")) - s[strlen(s) - 4] = '\0'; + /* strip trailing .prelink */ + if (strends(s, ".prelink")) + s[strlen(s) - 8] = '\0'; } return s; } @@ -2022,9 +2022,9 @@ static void read_symbols(const char *modname) /* strip trailing .o */ tmp = NOFAIL(strdup(modname)); tmp[strlen(tmp) - 2] = '\0'; - /* strip trailing .lto */ - if (strends(tmp, ".lto")) - tmp[strlen(tmp) - 4] = '\0'; + /* strip trailing .prelink */ + if (strends(tmp, ".prelink")) + tmp[strlen(tmp) - 8] = '\0'; mod = new_module(tmp); free(tmp); } diff --git a/sound/core/control_compat.c b/sound/core/control_compat.c index edff063e088d..d8a86d1a99d6 100644 --- a/sound/core/control_compat.c +++ b/sound/core/control_compat.c @@ -150,7 +150,7 @@ struct snd_ctl_elem_value32 { unsigned char reserved[128]; }; -#ifdef CONFIG_X86_X32 +#ifdef CONFIG_X86_X32_ABI /* x32 has a different alignment for 64bit values from ia32 */ struct snd_ctl_elem_value_x32 { struct snd_ctl_elem_id id; @@ -162,7 +162,7 @@ struct snd_ctl_elem_value_x32 { } value; unsigned char reserved[128]; }; -#endif /* CONFIG_X86_X32 */ +#endif /* CONFIG_X86_X32_ABI */ /* get the value type and count of the control */ static int get_ctl_type(struct snd_card *card, struct snd_ctl_elem_id *id, @@ -347,7 +347,7 @@ static int snd_ctl_elem_write_user_compat(struct snd_ctl_file *file, return ctl_elem_write_user(file, data32, &data32->value); } -#ifdef CONFIG_X86_X32 +#ifdef CONFIG_X86_X32_ABI static int snd_ctl_elem_read_user_x32(struct snd_card *card, struct snd_ctl_elem_value_x32 __user *data32) { @@ -359,7 +359,7 @@ static int snd_ctl_elem_write_user_x32(struct snd_ctl_file *file, { return ctl_elem_write_user(file, data32, &data32->value); } -#endif /* CONFIG_X86_X32 */ +#endif /* CONFIG_X86_X32_ABI */ /* add or replace a user control */ static int snd_ctl_elem_add_compat(struct snd_ctl_file *file, @@ -418,10 +418,10 @@ enum { SNDRV_CTL_IOCTL_ELEM_WRITE32 = _IOWR('U', 0x13, struct snd_ctl_elem_value32), SNDRV_CTL_IOCTL_ELEM_ADD32 = _IOWR('U', 0x17, struct snd_ctl_elem_info32), SNDRV_CTL_IOCTL_ELEM_REPLACE32 = _IOWR('U', 0x18, struct snd_ctl_elem_info32), -#ifdef CONFIG_X86_X32 +#ifdef CONFIG_X86_X32_ABI SNDRV_CTL_IOCTL_ELEM_READ_X32 = _IOWR('U', 0x12, struct snd_ctl_elem_value_x32), SNDRV_CTL_IOCTL_ELEM_WRITE_X32 = _IOWR('U', 0x13, struct snd_ctl_elem_value_x32), -#endif /* CONFIG_X86_X32 */ +#endif /* CONFIG_X86_X32_ABI */ }; static inline long snd_ctl_ioctl_compat(struct file *file, unsigned int cmd, unsigned long arg) @@ -460,12 +460,12 @@ static inline long snd_ctl_ioctl_compat(struct file *file, unsigned int cmd, uns return snd_ctl_elem_add_compat(ctl, argp, 0); case SNDRV_CTL_IOCTL_ELEM_REPLACE32: return snd_ctl_elem_add_compat(ctl, argp, 1); -#ifdef CONFIG_X86_X32 +#ifdef CONFIG_X86_X32_ABI case SNDRV_CTL_IOCTL_ELEM_READ_X32: return snd_ctl_elem_read_user_x32(ctl->card, argp); case SNDRV_CTL_IOCTL_ELEM_WRITE_X32: return snd_ctl_elem_write_user_x32(ctl, argp); -#endif /* CONFIG_X86_X32 */ +#endif /* CONFIG_X86_X32_ABI */ } down_read(&snd_ioctl_rwsem); diff --git a/sound/core/pcm_compat.c b/sound/core/pcm_compat.c index e4e176854ce7..917c5b4f19d7 100644 --- a/sound/core/pcm_compat.c +++ b/sound/core/pcm_compat.c @@ -147,13 +147,13 @@ static int snd_pcm_ioctl_channel_info_compat(struct snd_pcm_substream *substream return err; } -#ifdef CONFIG_X86_X32 +#ifdef CONFIG_X86_X32_ABI /* X32 ABI has the same struct as x86-64 for snd_pcm_channel_info */ static int snd_pcm_channel_info_user(struct snd_pcm_substream *substream, struct snd_pcm_channel_info __user *src); #define snd_pcm_ioctl_channel_info_x32(s, p) \ snd_pcm_channel_info_user(s, p) -#endif /* CONFIG_X86_X32 */ +#endif /* CONFIG_X86_X32_ABI */ struct compat_snd_pcm_status64 { snd_pcm_state_t state; @@ -375,7 +375,7 @@ static int snd_pcm_ioctl_xfern_compat(struct snd_pcm_substream *substream, return err; } -#ifdef CONFIG_X86_X32 +#ifdef CONFIG_X86_X32_ABI /* X32 ABI has 64bit timespec and 64bit alignment */ struct snd_pcm_mmap_status_x32 { snd_pcm_state_t state; @@ -468,7 +468,7 @@ static int snd_pcm_ioctl_sync_ptr_x32(struct snd_pcm_substream *substream, return 0; } -#endif /* CONFIG_X86_X32 */ +#endif /* CONFIG_X86_X32_ABI */ #ifdef __BIG_ENDIAN typedef char __pad_before_u32[4]; @@ -560,10 +560,10 @@ enum { SNDRV_PCM_IOCTL_READN_FRAMES32 = _IOR('A', 0x53, struct snd_xfern32), SNDRV_PCM_IOCTL_STATUS_COMPAT64 = _IOR('A', 0x20, struct compat_snd_pcm_status64), SNDRV_PCM_IOCTL_STATUS_EXT_COMPAT64 = _IOWR('A', 0x24, struct compat_snd_pcm_status64), -#ifdef CONFIG_X86_X32 +#ifdef CONFIG_X86_X32_ABI SNDRV_PCM_IOCTL_CHANNEL_INFO_X32 = _IOR('A', 0x32, struct snd_pcm_channel_info), SNDRV_PCM_IOCTL_SYNC_PTR_X32 = _IOWR('A', 0x23, struct snd_pcm_sync_ptr_x32), -#endif /* CONFIG_X86_X32 */ +#endif /* CONFIG_X86_X32_ABI */ }; static long snd_pcm_ioctl_compat(struct file *file, unsigned int cmd, unsigned long arg) @@ -607,10 +607,10 @@ static long snd_pcm_ioctl_compat(struct file *file, unsigned int cmd, unsigned l case __SNDRV_PCM_IOCTL_SYNC_PTR32: return snd_pcm_common_ioctl(file, substream, cmd, argp); case __SNDRV_PCM_IOCTL_SYNC_PTR64: -#ifdef CONFIG_X86_X32 +#ifdef CONFIG_X86_X32_ABI if (in_x32_syscall()) return snd_pcm_ioctl_sync_ptr_x32(substream, argp); -#endif /* CONFIG_X86_X32 */ +#endif /* CONFIG_X86_X32_ABI */ return snd_pcm_ioctl_sync_ptr_buggy(substream, argp); case SNDRV_PCM_IOCTL_HW_REFINE32: return snd_pcm_ioctl_hw_params_compat(substream, 1, argp); @@ -642,10 +642,10 @@ static long snd_pcm_ioctl_compat(struct file *file, unsigned int cmd, unsigned l return snd_pcm_status_user_compat64(substream, argp, false); case SNDRV_PCM_IOCTL_STATUS_EXT_COMPAT64: return snd_pcm_status_user_compat64(substream, argp, true); -#ifdef CONFIG_X86_X32 +#ifdef CONFIG_X86_X32_ABI case SNDRV_PCM_IOCTL_CHANNEL_INFO_X32: return snd_pcm_ioctl_channel_info_x32(substream, argp); -#endif /* CONFIG_X86_X32 */ +#endif /* CONFIG_X86_X32_ABI */ } return -ENOIOCTLCMD; diff --git a/tools/include/linux/objtool.h b/tools/include/linux/objtool.h index aca52db2f3f3..586d35720f13 100644 --- a/tools/include/linux/objtool.h +++ b/tools/include/linux/objtool.h @@ -77,6 +77,18 @@ struct unwind_hint { #define STACK_FRAME_NON_STANDARD_FP(func) #endif +#define ANNOTATE_NOENDBR \ + "986: \n\t" \ + ".pushsection .discard.noendbr\n\t" \ + _ASM_PTR " 986b\n\t" \ + ".popsection\n\t" + +#define ASM_REACHABLE \ + "998:\n\t" \ + ".pushsection .discard.reachable\n\t" \ + ".long 998b - .\n\t" \ + ".popsection\n\t" + #else /* __ASSEMBLY__ */ /* @@ -129,6 +141,20 @@ struct unwind_hint { .popsection .endm +.macro ANNOTATE_NOENDBR +.Lhere_\@: + .pushsection .discard.noendbr + .quad .Lhere_\@ + .popsection +.endm + +.macro REACHABLE +.Lhere_\@: + .pushsection .discard.reachable + .long .Lhere_\@ - . + .popsection +.endm + #endif /* __ASSEMBLY__ */ #else /* !CONFIG_STACK_VALIDATION */ @@ -139,12 +165,18 @@ struct unwind_hint { "\n\t" #define STACK_FRAME_NON_STANDARD(func) #define STACK_FRAME_NON_STANDARD_FP(func) +#define ANNOTATE_NOENDBR +#define ASM_REACHABLE #else #define ANNOTATE_INTRA_FUNCTION_CALL .macro UNWIND_HINT sp_reg:req sp_offset=0 type:req end=0 .endm .macro STACK_FRAME_NON_STANDARD func:req .endm +.macro ANNOTATE_NOENDBR +.endm +.macro REACHABLE +.endm #endif #endif /* CONFIG_STACK_VALIDATION */ diff --git a/tools/objtool/arch/x86/decode.c b/tools/objtool/arch/x86/decode.c index 479e769ca324..943cb41cddf7 100644 --- a/tools/objtool/arch/x86/decode.c +++ b/tools/objtool/arch/x86/decode.c @@ -103,6 +103,18 @@ unsigned long arch_jump_destination(struct instruction *insn) #define rm_is_mem(reg) (mod_is_mem() && !is_RIP() && rm_is(reg)) #define rm_is_reg(reg) (mod_is_reg() && modrm_rm == (reg)) +static bool has_notrack_prefix(struct insn *insn) +{ + int i; + + for (i = 0; i < insn->prefixes.nbytes; i++) { + if (insn->prefixes.bytes[i] == 0x3e) + return true; + } + + return false; +} + int arch_decode_instruction(struct objtool_file *file, const struct section *sec, unsigned long offset, unsigned int maxlen, unsigned int *len, enum insn_type *type, @@ -112,7 +124,7 @@ int arch_decode_instruction(struct objtool_file *file, const struct section *sec const struct elf *elf = file->elf; struct insn insn; int x86_64, ret; - unsigned char op1, op2, op3, + unsigned char op1, op2, op3, prefix, rex = 0, rex_b = 0, rex_r = 0, rex_w = 0, rex_x = 0, modrm = 0, modrm_mod = 0, modrm_rm = 0, modrm_reg = 0, sib = 0, /* sib_scale = 0, */ sib_index = 0, sib_base = 0; @@ -137,6 +149,8 @@ int arch_decode_instruction(struct objtool_file *file, const struct section *sec if (insn.vex_prefix.nbytes) return 0; + prefix = insn.prefixes.bytes[0]; + op1 = insn.opcode.bytes[0]; op2 = insn.opcode.bytes[1]; op3 = insn.opcode.bytes[2]; @@ -492,6 +506,12 @@ int arch_decode_instruction(struct objtool_file *file, const struct section *sec /* nopl/nopw */ *type = INSN_NOP; + } else if (op2 == 0x1e) { + + if (prefix == 0xf3 && (modrm == 0xfa || modrm == 0xfb)) + *type = INSN_ENDBR; + + } else if (op2 == 0x38 && op3 == 0xf8) { if (insn.prefixes.nbytes == 1 && insn.prefixes.bytes[0] == 0xf2) { @@ -636,20 +656,24 @@ int arch_decode_instruction(struct objtool_file *file, const struct section *sec break; case 0xff: - if (modrm_reg == 2 || modrm_reg == 3) + if (modrm_reg == 2 || modrm_reg == 3) { *type = INSN_CALL_DYNAMIC; + if (has_notrack_prefix(&insn)) + WARN("notrack prefix found at %s:0x%lx", sec->name, offset); - else if (modrm_reg == 4) + } else if (modrm_reg == 4) { *type = INSN_JUMP_DYNAMIC; + if (has_notrack_prefix(&insn)) + WARN("notrack prefix found at %s:0x%lx", sec->name, offset); - else if (modrm_reg == 5) + } else if (modrm_reg == 5) { /* jmpf */ *type = INSN_CONTEXT_SWITCH; - else if (modrm_reg == 6) { + } else if (modrm_reg == 6) { /* push from mem */ ADD_OP(op) { diff --git a/tools/objtool/builtin-check.c b/tools/objtool/builtin-check.c index 38070f26105b..fc6975ab8b06 100644 --- a/tools/objtool/builtin-check.c +++ b/tools/objtool/builtin-check.c @@ -20,7 +20,8 @@ #include <objtool/objtool.h> bool no_fp, no_unreachable, retpoline, module, backtrace, uaccess, stats, - validate_dup, vmlinux, mcount, noinstr, backup, sls; + lto, vmlinux, mcount, noinstr, backup, sls, dryrun, + ibt; static const char * const check_usage[] = { "objtool check [<options>] file.o", @@ -40,12 +41,14 @@ const struct option check_options[] = { OPT_BOOLEAN('b', "backtrace", &backtrace, "unwind on error"), OPT_BOOLEAN('a', "uaccess", &uaccess, "enable uaccess checking"), OPT_BOOLEAN('s', "stats", &stats, "print statistics"), - OPT_BOOLEAN('d', "duplicate", &validate_dup, "duplicate validation for vmlinux.o"), + OPT_BOOLEAN(0, "lto", <o, "whole-archive like runs"), OPT_BOOLEAN('n', "noinstr", &noinstr, "noinstr validation for vmlinux.o"), OPT_BOOLEAN('l', "vmlinux", &vmlinux, "vmlinux.o validation"), OPT_BOOLEAN('M', "mcount", &mcount, "generate __mcount_loc"), OPT_BOOLEAN('B', "backup", &backup, "create .orig files before modification"), OPT_BOOLEAN('S', "sls", &sls, "validate straight-line-speculation"), + OPT_BOOLEAN(0, "dry-run", &dryrun, "don't write the modifications"), + OPT_BOOLEAN(0, "ibt", &ibt, "validate ENDBR placement"), OPT_END(), }; diff --git a/tools/objtool/check.c b/tools/objtool/check.c index 7c33ec67c4a9..6de5085e3e5a 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -181,6 +181,9 @@ static bool __dead_end_function(struct objtool_file *file, struct symbol *func, "kunit_try_catch_throw", "xen_start_kernel", "cpu_bringup_and_idle", + "do_group_exit", + "stop_this_cpu", + "__invalid_creds", }; if (!func) @@ -380,6 +383,7 @@ static int decode_instructions(struct objtool_file *file) memset(insn, 0, sizeof(*insn)); INIT_LIST_HEAD(&insn->alts); INIT_LIST_HEAD(&insn->stack_ops); + INIT_LIST_HEAD(&insn->call_node); insn->sec = sec; insn->offset = offset; @@ -392,6 +396,14 @@ static int decode_instructions(struct objtool_file *file) if (ret) goto err; + /* + * By default, "ud2" is a dead end unless otherwise + * annotated, because GCC 7 inserts it for certain + * divide-by-zero cases. + */ + if (insn->type == INSN_BUG) + insn->dead_end = true; + hash_add(file->insn_hash, &insn->hash, sec_offset_hash(sec, insn->offset)); list_add_tail(&insn->list, &file->insn_list); nr_insns++; @@ -407,8 +419,17 @@ static int decode_instructions(struct objtool_file *file) return -1; } - sym_for_each_insn(file, func, insn) + sym_for_each_insn(file, func, insn) { insn->func = func; + if (insn->type == INSN_ENDBR && list_empty(&insn->call_node)) { + if (insn->offset == insn->func->offset) { + list_add_tail(&insn->call_node, &file->endbr_list); + file->nr_endbr++; + } else { + file->nr_endbr_int++; + } + } + } } } @@ -521,14 +542,6 @@ static int add_dead_ends(struct objtool_file *file) struct instruction *insn; /* - * By default, "ud2" is a dead end unless otherwise annotated, because - * GCC 7 inserts it for certain divide-by-zero cases. - */ - for_each_insn(file, insn) - if (insn->type == INSN_BUG) - insn->dead_end = true; - - /* * Check for manually annotated dead ends. */ sec = find_section_by_name(file->elf, ".rela.discard.unreachable"); @@ -731,6 +744,58 @@ static int create_retpoline_sites_sections(struct objtool_file *file) return 0; } +static int create_ibt_endbr_seal_sections(struct objtool_file *file) +{ + struct instruction *insn; + struct section *sec; + int idx; + + sec = find_section_by_name(file->elf, ".ibt_endbr_seal"); + if (sec) { + WARN("file already has .ibt_endbr_seal, skipping"); + return 0; + } + + idx = 0; + list_for_each_entry(insn, &file->endbr_list, call_node) + idx++; + + if (stats) { + printf("ibt: ENDBR at function start: %d\n", file->nr_endbr); + printf("ibt: ENDBR inside functions: %d\n", file->nr_endbr_int); + printf("ibt: superfluous ENDBR: %d\n", idx); + } + + if (!idx) + return 0; + + sec = elf_create_section(file->elf, ".ibt_endbr_seal", 0, + sizeof(int), idx); + if (!sec) { + WARN("elf_create_section: .ibt_endbr_seal"); + return -1; + } + + idx = 0; + list_for_each_entry(insn, &file->endbr_list, call_node) { + + int *site = (int *)sec->data->d_buf + idx; + *site = 0; + + if (elf_add_reloc_to_insn(file->elf, sec, + idx * sizeof(int), + R_X86_64_PC32, + insn->sec, insn->offset)) { + WARN("elf_add_reloc_to_insn: .ibt_endbr_seal"); + return -1; + } + + idx++; + } + + return 0; +} + static int create_mcount_loc_sections(struct objtool_file *file) { struct section *sec; @@ -1111,6 +1176,9 @@ static void annotate_call_site(struct objtool_file *file, list_add_tail(&insn->call_node, &file->mcount_loc_list); return; } + + if (!sibling && dead_end_function(file, sym)) + insn->dead_end = true; } static void add_call_dest(struct objtool_file *file, struct instruction *insn, @@ -1165,6 +1233,19 @@ static void add_retpoline_call(struct objtool_file *file, struct instruction *in annotate_call_site(file, insn, false); } + +static bool same_function(struct instruction *insn1, struct instruction *insn2) +{ + return insn1->func->pfunc == insn2->func->pfunc; +} + +static bool is_first_func_insn(struct instruction *insn) +{ + return insn->offset == insn->func->offset || + (insn->type == INSN_ENDBR && + insn->offset == insn->func->offset + insn->len); +} + /* * Find the destination instructions for all jumps. */ @@ -1245,8 +1326,8 @@ static int add_jump_destinations(struct objtool_file *file) insn->func->cfunc = insn->jump_dest->func; insn->jump_dest->func->pfunc = insn->func; - } else if (insn->jump_dest->func->pfunc != insn->func->pfunc && - insn->jump_dest->offset == insn->jump_dest->func->offset) { + } else if (!same_function(insn, insn->jump_dest) && + is_first_func_insn(insn->jump_dest)) { /* internal sibling call (without reloc) */ add_call_dest(file, insn, insn->jump_dest->func, true); } @@ -1836,6 +1917,16 @@ static int read_unwind_hints(struct objtool_file *file) insn->hint = true; + if (ibt && hint->type == UNWIND_HINT_TYPE_REGS_PARTIAL) { + struct symbol *sym = find_symbol_by_offset(insn->sec, insn->offset); + + if (sym && sym->bind == STB_GLOBAL && + insn->type != INSN_ENDBR && !insn->noendbr) { + WARN_FUNC("UNWIND_HINT_IRET_REGS without ENDBR", + insn->sec, insn->offset); + } + } + if (hint->type == UNWIND_HINT_TYPE_FUNC) { insn->cfi = &func_cfi; continue; @@ -1860,6 +1951,32 @@ static int read_unwind_hints(struct objtool_file *file) return 0; } +static int read_noendbr_hints(struct objtool_file *file) +{ + struct section *sec; + struct instruction *insn; + struct reloc *reloc; + + sec = find_section_by_name(file->elf, ".rela.discard.noendbr"); + if (!sec) + return 0; + + list_for_each_entry(reloc, &sec->reloc_list, list) { + insn = find_insn(file, reloc->sym->sec, reloc->sym->offset + reloc->addend); + if (!insn) { + WARN("bad .discard.noendbr entry"); + return -1; + } + + if (insn->type == INSN_ENDBR) + WARN_FUNC("ANNOTATE_NOENDBR on ENDBR", insn->sec, insn->offset); + + insn->noendbr = 1; + } + + return 0; +} + static int read_retpoline_hints(struct objtool_file *file) { struct section *sec; @@ -2086,10 +2203,6 @@ static int decode_sections(struct objtool_file *file) if (ret) return ret; - ret = add_dead_ends(file); - if (ret) - return ret; - add_ignores(file); add_uaccess_safe(file); @@ -2098,6 +2211,13 @@ static int decode_sections(struct objtool_file *file) return ret; /* + * Must be before read_unwind_hints() since that needs insn->noendbr. + */ + ret = read_noendbr_hints(file); + if (ret) + return ret; + + /* * Must be before add_{jump_call}_destination. */ ret = classify_symbols(file); @@ -2128,6 +2248,14 @@ static int decode_sections(struct objtool_file *file) if (ret) return ret; + /* + * Must be after add_call_destinations() such that it can override + * dead_end_function() marks. + */ + ret = add_dead_ends(file); + if (ret) + return ret; + ret = add_jump_table_alts(file); if (ret) return ret; @@ -3026,6 +3154,115 @@ static struct instruction *next_insn_to_validate(struct objtool_file *file, return next_insn_same_sec(file, insn); } +static struct instruction * +validate_ibt_reloc(struct objtool_file *file, struct reloc *reloc) +{ + struct instruction *dest; + struct section *sec; + unsigned long off; + + sec = reloc->sym->sec; + off = reloc->sym->offset; + + if ((reloc->sec->base->sh.sh_flags & SHF_EXECINSTR) && + (reloc->type == R_X86_64_PC32 || reloc->type == R_X86_64_PLT32)) + off += arch_dest_reloc_offset(reloc->addend); + else + off += reloc->addend; + + dest = find_insn(file, sec, off); + if (!dest) + return NULL; + + if (dest->type == INSN_ENDBR) { + if (!list_empty(&dest->call_node)) + list_del_init(&dest->call_node); + + return NULL; + } + + if (reloc->sym->static_call_tramp) + return NULL; + + return dest; +} + +static void warn_noendbr(const char *msg, struct section *sec, unsigned long offset, + struct instruction *dest) +{ + WARN_FUNC("%srelocation to !ENDBR: %s+0x%lx", sec, offset, msg, + dest->func ? dest->func->name : dest->sec->name, + dest->func ? dest->offset - dest->func->offset : dest->offset); +} + +static void validate_ibt_dest(struct objtool_file *file, struct instruction *insn, + struct instruction *dest) +{ + if (dest->func && dest->func == insn->func) { + /* + * Anything from->to self is either _THIS_IP_ or IRET-to-self. + * + * There is no sane way to annotate _THIS_IP_ since the compiler treats the + * relocation as a constant and is happy to fold in offsets, skewing any + * annotation we do, leading to vast amounts of false-positives. + * + * There's also compiler generated _THIS_IP_ through KCOV and + * such which we have no hope of annotating. + * + * As such, blanket accept self-references without issue. + */ + return; + } + + if (dest->noendbr) + return; + + warn_noendbr("", insn->sec, insn->offset, dest); +} + +static void validate_ibt_insn(struct objtool_file *file, struct instruction *insn) +{ + struct instruction *dest; + struct reloc *reloc; + + switch (insn->type) { + case INSN_CALL: + case INSN_CALL_DYNAMIC: + case INSN_JUMP_CONDITIONAL: + case INSN_JUMP_UNCONDITIONAL: + case INSN_JUMP_DYNAMIC: + case INSN_JUMP_DYNAMIC_CONDITIONAL: + case INSN_RETURN: + /* + * We're looking for code references setting up indirect code + * flow. As such, ignore direct code flow and the actual + * dynamic branches. + */ + return; + + case INSN_NOP: + /* + * handle_group_alt() will create INSN_NOP instruction that + * don't belong to any section, ignore all NOP since they won't + * carry a (useful) relocation anyway. + */ + return; + + default: + break; + } + + for (reloc = insn_reloc(file, insn); + reloc; + reloc = find_reloc_by_dest_range(file->elf, insn->sec, + reloc->offset + 1, + (insn->offset + insn->len) - (reloc->offset + 1))) { + dest = validate_ibt_reloc(file, reloc); + if (dest) + validate_ibt_dest(file, insn, dest); + } +} + /* * Follow the branch starting at the given instruction, and recursively follow * any other branches (jumps). Meanwhile, track the frame pointer state at @@ -3115,9 +3352,8 @@ static int validate_branch(struct objtool_file *file, struct symbol *func, switch (insn->type) { case INSN_RETURN: - if (next_insn && next_insn->type == INSN_TRAP) { - next_insn->ignore = true; - } else if (sls && !insn->retpoline_safe) { + if (sls && !insn->retpoline_safe && + next_insn && next_insn->type != INSN_TRAP) { WARN_FUNC("missing int3 after ret", insn->sec, insn->offset); } @@ -3136,7 +3372,7 @@ static int validate_branch(struct objtool_file *file, struct symbol *func, return 1; } - if (dead_end_function(file, insn->call_dest)) + if (insn->dead_end) return 0; break; @@ -3164,9 +3400,8 @@ static int validate_branch(struct objtool_file *file, struct symbol *func, break; case INSN_JUMP_DYNAMIC: - if (next_insn && next_insn->type == INSN_TRAP) { - next_insn->ignore = true; - } else if (sls && !insn->retpoline_safe) { + if (sls && !insn->retpoline_safe && + next_insn && next_insn->type != INSN_TRAP) { WARN_FUNC("missing int3 after indirect jump", insn->sec, insn->offset); } @@ -3237,6 +3472,9 @@ static int validate_branch(struct objtool_file *file, struct symbol *func, break; } + if (ibt) + validate_ibt_insn(file, insn); + if (insn->dead_end) return 0; @@ -3337,7 +3575,7 @@ static bool ignore_unreachable_insn(struct objtool_file *file, struct instructio int i; struct instruction *prev_insn; - if (insn->ignore || insn->type == INSN_NOP) + if (insn->ignore || insn->type == INSN_NOP || insn->type == INSN_TRAP) return true; /* @@ -3348,6 +3586,49 @@ static bool ignore_unreachable_insn(struct objtool_file *file, struct instructio !strcmp(insn->sec->name, ".altinstr_aux")) return true; + /* + * Whole archive runs might encounder dead code from weak symbols. + * This is where the linker will have dropped the weak symbol in + * favour of a regular symbol, but leaves the code in place. + * + * In this case we'll find a piece of code (whole function) that is not + * covered by a !section symbol. Ignore them. + */ + if (!insn->func && lto) { + int size = find_symbol_hole_containing(insn->sec, insn->offset); + unsigned long end = insn->offset + size; + + if (!size) /* not a hole */ + return false; + + if (size < 0) /* hole until the end */ + return true; + + sec_for_each_insn_continue(file, insn) { + /* + * If we reach a visited instruction at or before the + * end of the hole, ignore the unreachable. + */ + if (insn->visited) + return true; + + if (insn->offset >= end) + break; + + /* + * If this hole jumps to a .cold function, mark it ignore too. + */ + if (insn->jump_dest && insn->jump_dest->func && + strstr(insn->jump_dest->func->name, ".cold")) { + struct instruction *dest = insn->jump_dest; + func_for_each_insn(file, dest->func, dest) + dest->ignore = true; + } + } + + return false; + } + if (!insn->func) return false; @@ -3479,6 +3760,53 @@ static int validate_functions(struct objtool_file *file) return warnings; } +static int validate_ibt(struct objtool_file *file) +{ + struct section *sec; + struct reloc *reloc; + + for_each_sec(file, sec) { + bool is_data; + + /* already done in validate_branch() */ + if (sec->sh.sh_flags & SHF_EXECINSTR) + continue; + + if (!sec->reloc) + continue; + + if (!strncmp(sec->name, ".orc", 4)) + continue; + + if (!strncmp(sec->name, ".discard", 8)) + continue; + + if (!strncmp(sec->name, ".debug", 6)) + continue; + + if (!strcmp(sec->name, "_error_injection_whitelist")) + continue; + + if (!strcmp(sec->name, "_kprobe_blacklist")) + continue; + + is_data = strstr(sec->name, ".data") || strstr(sec->name, ".rodata"); + + list_for_each_entry(reloc, &sec->reloc->reloc_list, list) { + struct instruction *dest; + + dest = validate_ibt_reloc(file, reloc); + if (is_data && dest && !dest->noendbr) { + warn_noendbr("data ", reloc->sym->sec, + reloc->sym->offset + reloc->addend, + dest); + } + } + } + + return 0; +} + static int validate_reachable_instructions(struct objtool_file *file) { struct instruction *insn; @@ -3501,6 +3829,16 @@ int check(struct objtool_file *file) { int ret, warnings = 0; + if (lto && !(vmlinux || module)) { + fprintf(stderr, "--lto requires: --vmlinux or --module\n"); + return 1; + } + + if (ibt && !lto) { + fprintf(stderr, "--ibt requires: --lto\n"); + return 1; + } + arch_initial_func_cfi_state(&initial_func_cfi); init_cfi_state(&init_cfi); init_cfi_state(&func_cfi); @@ -3521,7 +3859,7 @@ int check(struct objtool_file *file) if (list_empty(&file->insn_list)) goto out; - if (vmlinux && !validate_dup) { + if (vmlinux && !lto) { ret = validate_vmlinux_functions(file); if (ret < 0) goto out; @@ -3547,6 +3885,13 @@ int check(struct objtool_file *file) goto out; warnings += ret; + if (ibt) { + ret = validate_ibt(file); + if (ret < 0) + goto out; + warnings += ret; + } + if (!warnings) { ret = validate_reachable_instructions(file); if (ret < 0) @@ -3573,6 +3918,13 @@ int check(struct objtool_file *file) warnings += ret; } + if (ibt) { + ret = create_ibt_endbr_seal_sections(file); + if (ret < 0) + goto out; + warnings += ret; + } + if (stats) { printf("nr_insns_visited: %ld\n", nr_insns_visited); printf("nr_cfi: %ld\n", nr_cfi); diff --git a/tools/objtool/elf.c b/tools/objtool/elf.c index 4b384c907027..d7b99a737496 100644 --- a/tools/objtool/elf.c +++ b/tools/objtool/elf.c @@ -83,6 +83,31 @@ static int symbol_by_offset(const void *key, const struct rb_node *node) return 0; } +struct symbol_hole { + unsigned long key; + const struct symbol *sym; +}; + +/* + * Find !section symbol where @offset is after it. + */ +static int symbol_hole_by_offset(const void *key, const struct rb_node *node) +{ + const struct symbol *s = rb_entry(node, struct symbol, node); + struct symbol_hole *sh = (void *)key; + + if (sh->key < s->offset) + return -1; + + if (sh->key >= s->offset + s->len) { + if (s->type != STT_SECTION) + sh->sym = s; + return 1; + } + + return 0; +} + struct section *find_section_by_name(const struct elf *elf, const char *name) { struct section *sec; @@ -162,6 +187,41 @@ struct symbol *find_symbol_containing(const struct section *sec, unsigned long o return NULL; } +/* + * Returns size of hole starting at @offset. + */ +int find_symbol_hole_containing(const struct section *sec, unsigned long offset) +{ + struct symbol_hole hole = { + .key = offset, + .sym = NULL, + }; + struct rb_node *n; + struct symbol *s; + + /* + * Find the rightmost symbol for which @offset is after it. + */ + n = rb_find(&hole, &sec->symbol_tree, symbol_hole_by_offset); + + /* found a symbol that contains @offset */ + if (n) + return 0; /* not a hole */ + + /* didn't find a symbol for which @offset is after it */ + if (!hole.sym) + return 0; /* not a hole */ + + /* @offset >= sym->offset + sym->len, find symbol after it */ + n = rb_next(&hole.sym->node); + if (!n) + return -1; /* until end of address space */ + + /* hole until start of next symbol */ + s = rb_entry(n, struct symbol, node); + return s->offset - offset; +} + struct symbol *find_func_containing(struct section *sec, unsigned long offset) { struct rb_node *node; @@ -1019,6 +1079,9 @@ int elf_write(struct elf *elf) struct section *sec; Elf_Scn *s; + if (dryrun) + return 0; + /* Update changed relocation sections and section headers: */ list_for_each_entry(sec, &elf->sections, list) { if (sec->changed) { diff --git a/tools/objtool/include/objtool/arch.h b/tools/objtool/include/objtool/arch.h index 76bae3078286..9b19cc304195 100644 --- a/tools/objtool/include/objtool/arch.h +++ b/tools/objtool/include/objtool/arch.h @@ -27,6 +27,7 @@ enum insn_type { INSN_STD, INSN_CLD, INSN_TRAP, + INSN_ENDBR, INSN_OTHER, }; diff --git a/tools/objtool/include/objtool/builtin.h b/tools/objtool/include/objtool/builtin.h index 89ba869ed08f..c39dbfaef6dc 100644 --- a/tools/objtool/include/objtool/builtin.h +++ b/tools/objtool/include/objtool/builtin.h @@ -9,7 +9,8 @@ extern const struct option check_options[]; extern bool no_fp, no_unreachable, retpoline, module, backtrace, uaccess, stats, - validate_dup, vmlinux, mcount, noinstr, backup, sls; + lto, vmlinux, mcount, noinstr, backup, sls, dryrun, + ibt; extern int cmd_parse_options(int argc, const char **argv, const char * const usage[]); diff --git a/tools/objtool/include/objtool/check.h b/tools/objtool/include/objtool/check.h index 6cfff078897f..f10d7374f388 100644 --- a/tools/objtool/include/objtool/check.h +++ b/tools/objtool/include/objtool/check.h @@ -45,11 +45,18 @@ struct instruction { unsigned int len; enum insn_type type; unsigned long immediate; - bool dead_end, ignore, ignore_alts; - bool hint; - bool retpoline_safe; + + u8 dead_end : 1, + ignore : 1, + ignore_alts : 1, + hint : 1, + retpoline_safe : 1, + noendbr : 1; + /* 2 bit hole */ s8 instr; u8 visited; + /* u8 hole */ + struct alt_group *alt_group; struct symbol *call_dest; struct instruction *jump_dest; diff --git a/tools/objtool/include/objtool/elf.h b/tools/objtool/include/objtool/elf.h index d22336781401..22ba7e2b816e 100644 --- a/tools/objtool/include/objtool/elf.h +++ b/tools/objtool/include/objtool/elf.h @@ -152,6 +152,7 @@ struct symbol *find_func_by_offset(struct section *sec, unsigned long offset); struct symbol *find_symbol_by_offset(struct section *sec, unsigned long offset); struct symbol *find_symbol_by_name(const struct elf *elf, const char *name); struct symbol *find_symbol_containing(const struct section *sec, unsigned long offset); +int find_symbol_hole_containing(const struct section *sec, unsigned long offset); struct reloc *find_reloc_by_dest(const struct elf *elf, struct section *sec, unsigned long offset); struct reloc *find_reloc_by_dest_range(const struct elf *elf, struct section *sec, unsigned long offset, unsigned int len); diff --git a/tools/objtool/include/objtool/objtool.h b/tools/objtool/include/objtool/objtool.h index f99fbc6078d5..7a5c13a78f87 100644 --- a/tools/objtool/include/objtool/objtool.h +++ b/tools/objtool/include/objtool/objtool.h @@ -26,8 +26,12 @@ struct objtool_file { struct list_head retpoline_call_list; struct list_head static_call_list; struct list_head mcount_loc_list; + struct list_head endbr_list; bool ignore_unreachables, c_file, hints, rodata; + unsigned int nr_endbr; + unsigned int nr_endbr_int; + unsigned long jl_short, jl_long; unsigned long jl_nop_short, jl_nop_long; diff --git a/tools/objtool/include/objtool/warn.h b/tools/objtool/include/objtool/warn.h index d99c4675e4a5..802cfda0a6f6 100644 --- a/tools/objtool/include/objtool/warn.h +++ b/tools/objtool/include/objtool/warn.h @@ -22,6 +22,8 @@ static inline char *offstr(struct section *sec, unsigned long offset) unsigned long name_off; func = find_func_containing(sec, offset); + if (!func) + func = find_symbol_containing(sec, offset); if (func) { name = func->name; name_off = offset - func->offset; diff --git a/tools/objtool/objtool.c b/tools/objtool/objtool.c index bdf699f6552b..b09946f4e1d6 100644 --- a/tools/objtool/objtool.c +++ b/tools/objtool/objtool.c @@ -128,6 +128,7 @@ struct objtool_file *objtool_open_read(const char *_objname) INIT_LIST_HEAD(&file.retpoline_call_list); INIT_LIST_HEAD(&file.static_call_list); INIT_LIST_HEAD(&file.mcount_loc_list); + INIT_LIST_HEAD(&file.endbr_list); file.c_file = !vmlinux && find_section_by_name(file.elf, ".comment"); file.ignore_unreachables = no_unreachable; file.hints = false; |