diff options
Diffstat (limited to 'arch/x86')
54 files changed, 678 insertions, 145 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 10e4c332e15d..7340d9f01b62 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -1842,6 +1842,36 @@ config X86_UMIP specific cases in protected and virtual-8086 modes. Emulated results are dummy. +config CC_HAS_IBT + # GCC >= 9 and binutils >= 2.29 + # Retpoline check to work around https://gcc.gnu.org/bugzilla/show_bug.cgi?id=93654 + # Clang/LLVM >= 14 + # https://github.com/llvm/llvm-project/commit/e0b89df2e0f0130881bf6c39bf31d7f6aac00e0f + # https://github.com/llvm/llvm-project/commit/dfcf69770bc522b9e411c66454934a37c1f35332 + def_bool ((CC_IS_GCC && $(cc-option, -fcf-protection=branch -mindirect-branch-register)) || \ + (CC_IS_CLANG && CLANG_VERSION >= 140000)) && \ + $(as-instr,endbr64) + +config X86_KERNEL_IBT + prompt "Indirect Branch Tracking" + bool + depends on X86_64 && CC_HAS_IBT && STACK_VALIDATION + # https://github.com/llvm/llvm-project/commit/9d7001eba9c4cb311e03cd8cdc231f9e579f2d0f + depends on !LD_IS_LLD || LLD_VERSION >= 140000 + help + Build the kernel with support for Indirect Branch Tracking, a + hardware support course-grain forward-edge Control Flow Integrity + protection. It enforces that all indirect calls must land on + an ENDBR instruction, as such, the compiler will instrument the + code with them to make this happen. + + In addition to building the kernel with IBT, seal all functions that + are not indirect call targets, avoiding them ever becomming one. + + This requires LTO like objtool runs and will slow down the build. It + does significantly reduce the number of ENDBR instructions in the + kernel image. + config X86_INTEL_MEMORY_PROTECTION_KEYS prompt "Memory Protection Keys" def_bool y @@ -2815,19 +2845,20 @@ config IA32_AOUT help Support old a.out binaries in the 32bit emulation. -config X86_X32 +config X86_X32_ABI bool "x32 ABI for 64-bit mode" depends on X86_64 + # llvm-objcopy does not convert x86_64 .note.gnu.property or + # compressed debug sections to x86_x32 properly: + # https://github.com/ClangBuiltLinux/linux/issues/514 + # https://github.com/ClangBuiltLinux/linux/issues/1141 + depends on $(success,$(OBJCOPY) --version | head -n1 | grep -qv llvm) help Include code to run binaries for the x32 native 32-bit ABI for 64-bit processors. An x32 process gets access to the full 64-bit register file and wide data path while leaving pointers at 32 bits for smaller memory footprint. - You will need a recent binutils (2.22 or later) with - elf32_x86_64 support enabled to compile a kernel with this - option set. - config COMPAT_32 def_bool y depends on IA32_EMULATION || X86_32 @@ -2836,7 +2867,7 @@ config COMPAT_32 config COMPAT def_bool y - depends on IA32_EMULATION || X86_X32 + depends on IA32_EMULATION || X86_X32_ABI if COMPAT config COMPAT_FOR_U64_ALIGNMENT diff --git a/arch/x86/Makefile b/arch/x86/Makefile index e84cdd409b64..63d50f65b828 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile @@ -36,7 +36,7 @@ endif # How to compile the 16-bit code. Note we always compile for -march=i386; # that way we can complain to the user if the CPU is insufficient. -REALMODE_CFLAGS := -m16 -g -Os -DDISABLE_BRANCH_PROFILING \ +REALMODE_CFLAGS := -m16 -g -Os -DDISABLE_BRANCH_PROFILING -D__DISABLE_EXPORTS \ -Wall -Wstrict-prototypes -march=i386 -mregparm=3 \ -fno-strict-aliasing -fomit-frame-pointer -fno-pic \ -mno-mmx -mno-sse $(call cc-option,-fcf-protection=none) @@ -62,8 +62,20 @@ export BITS # KBUILD_CFLAGS += -mno-sse -mno-mmx -mno-sse2 -mno-3dnow -mno-avx -# Intel CET isn't enabled in the kernel +ifeq ($(CONFIG_X86_KERNEL_IBT),y) +# +# Kernel IBT has S_CET.NOTRACK_EN=0, as such the compilers must not generate +# NOTRACK prefixes. Current generation compilers unconditionally employ NOTRACK +# for jump-tables, as such, disable jump-tables for now. +# +# (jump-tables are implicitly disabled by RETPOLINE) +# +# https://gcc.gnu.org/bugzilla/show_bug.cgi?id=104816 +# +KBUILD_CFLAGS += $(call cc-option,-fcf-protection=branch -fno-jump-tables) +else KBUILD_CFLAGS += $(call cc-option,-fcf-protection=none) +endif ifeq ($(CONFIG_X86_32),y) BITS := 32 @@ -140,22 +152,6 @@ else KBUILD_CFLAGS += -mcmodel=kernel endif -ifdef CONFIG_X86_X32 - x32_ld_ok := $(call try-run,\ - /bin/echo -e '1: .quad 1b' | \ - $(CC) $(KBUILD_AFLAGS) -c -x assembler -o "$$TMP" - && \ - $(OBJCOPY) -O elf32-x86-64 "$$TMP" "$$TMP.o" && \ - $(LD) -m elf32_x86_64 "$$TMP.o" -o "$$TMP",y,n) - ifeq ($(x32_ld_ok),y) - CONFIG_X86_X32_ABI := y - KBUILD_AFLAGS += -DCONFIG_X86_X32_ABI - KBUILD_CFLAGS += -DCONFIG_X86_X32_ABI - else - $(warning CONFIG_X86_X32 enabled but no binutils support) - endif -endif -export CONFIG_X86_X32_ABI - # # If the function graph tracer is used with mcount instead of fentry, # '-maccumulate-outgoing-args' is needed to prevent a GCC bug diff --git a/arch/x86/crypto/crc32c-pcl-intel-asm_64.S b/arch/x86/crypto/crc32c-pcl-intel-asm_64.S index 80c0d22fc42c..ec35915f0901 100644 --- a/arch/x86/crypto/crc32c-pcl-intel-asm_64.S +++ b/arch/x86/crypto/crc32c-pcl-intel-asm_64.S @@ -195,6 +195,7 @@ crc_array: .altmacro LABEL crc_ %i .noaltmacro + ENDBR crc32q -i*8(block_0), crc_init crc32q -i*8(block_1), crc1 crc32q -i*8(block_2), crc2 @@ -204,6 +205,7 @@ LABEL crc_ %i .altmacro LABEL crc_ %i .noaltmacro + ENDBR crc32q -i*8(block_0), crc_init crc32q -i*8(block_1), crc1 # SKIP crc32 -i*8(block_2), crc2 ; Don't do this one yet @@ -237,6 +239,7 @@ LABEL crc_ %i ################################################################ LABEL crc_ 0 + ENDBR mov tmp, len cmp $128*24, tmp jae full_block diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index 466df3e50276..4faac48ebec5 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -86,6 +86,7 @@ SYM_CODE_START(entry_SYSCALL_64) UNWIND_HINT_EMPTY + ENDBR swapgs /* tss.sp2 is scratch space. */ @@ -94,6 +95,7 @@ SYM_CODE_START(entry_SYSCALL_64) movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp SYM_INNER_LABEL(entry_SYSCALL_64_safe_stack, SYM_L_GLOBAL) + ANNOTATE_NOENDBR /* Construct struct pt_regs on stack */ pushq $__USER_DS /* pt_regs->ss */ @@ -276,6 +278,7 @@ SYM_FUNC_END(__switch_to_asm) .pushsection .text, "ax" SYM_CODE_START(ret_from_fork) UNWIND_HINT_EMPTY + ANNOTATE_NOENDBR // copy_thread movq %rax, %rdi call schedule_tail /* rdi: 'prev' task parameter */ @@ -350,6 +353,7 @@ SYM_CODE_END(ret_from_fork) .macro idtentry vector asmsym cfunc has_error_code:req SYM_CODE_START(\asmsym) UNWIND_HINT_IRET_REGS offset=\has_error_code*8 + ENDBR ASM_CLAC .if \has_error_code == 0 @@ -417,6 +421,7 @@ SYM_CODE_END(\asmsym) .macro idtentry_mce_db vector asmsym cfunc SYM_CODE_START(\asmsym) UNWIND_HINT_IRET_REGS + ENDBR ASM_CLAC pushq $-1 /* ORIG_RAX: no syscall to restart */ @@ -472,6 +477,7 @@ SYM_CODE_END(\asmsym) .macro idtentry_vc vector asmsym cfunc SYM_CODE_START(\asmsym) UNWIND_HINT_IRET_REGS + ENDBR ASM_CLAC /* @@ -533,6 +539,7 @@ SYM_CODE_END(\asmsym) .macro idtentry_df vector asmsym cfunc SYM_CODE_START(\asmsym) UNWIND_HINT_IRET_REGS offset=8 + ENDBR ASM_CLAC /* paranoid_entry returns GS information for paranoid_exit in EBX. */ @@ -544,6 +551,9 @@ SYM_CODE_START(\asmsym) movq $-1, ORIG_RAX(%rsp) /* no syscall to restart */ call \cfunc + /* For some configurations \cfunc ends up being a noreturn. */ + REACHABLE + jmp paranoid_exit _ASM_NOKPROBE(\asmsym) @@ -564,6 +574,7 @@ __irqentry_text_start: .align 16 .globl __irqentry_text_end __irqentry_text_end: + ANNOTATE_NOENDBR SYM_CODE_START_LOCAL(common_interrupt_return) SYM_INNER_LABEL(swapgs_restore_regs_and_return_to_usermode, SYM_L_GLOBAL) @@ -608,8 +619,8 @@ SYM_INNER_LABEL(swapgs_restore_regs_and_return_to_usermode, SYM_L_GLOBAL) /* Restore RDI. */ popq %rdi - SWAPGS - INTERRUPT_RETURN + swapgs + jmp .Lnative_iret SYM_INNER_LABEL(restore_regs_and_return_to_kernel, SYM_L_GLOBAL) @@ -626,9 +637,14 @@ SYM_INNER_LABEL(restore_regs_and_return_to_kernel, SYM_L_GLOBAL) * ARCH_HAS_MEMBARRIER_SYNC_CORE rely on IRET core serialization * when returning from IPI handler. */ - INTERRUPT_RETURN +#ifdef CONFIG_XEN_PV +SYM_INNER_LABEL(early_xen_iret_patch, SYM_L_GLOBAL) + ANNOTATE_NOENDBR + .byte 0xe9 + .long .Lnative_iret - (. + 4) +#endif -SYM_INNER_LABEL_ALIGN(native_iret, SYM_L_GLOBAL) +.Lnative_iret: UNWIND_HINT_IRET_REGS /* * Are we returning to a stack segment from the LDT? Note: in @@ -640,6 +656,7 @@ SYM_INNER_LABEL_ALIGN(native_iret, SYM_L_GLOBAL) #endif SYM_INNER_LABEL(native_irq_return_iret, SYM_L_GLOBAL) + ANNOTATE_NOENDBR // exc_double_fault /* * This may fault. Non-paranoid faults on return to userspace are * handled by fixup_bad_iret. These include #SS, #GP, and #NP. @@ -734,6 +751,7 @@ SYM_FUNC_START(asm_load_gs_index) FRAME_BEGIN swapgs .Lgs_change: + ANNOTATE_NOENDBR // error_entry movl %edi, %gs 2: ALTERNATIVE "", "mfence", X86_BUG_SWAPGS_FENCE swapgs @@ -804,6 +822,7 @@ SYM_CODE_END(exc_xen_hypervisor_callback) */ SYM_CODE_START(xen_failsafe_callback) UNWIND_HINT_EMPTY + ENDBR movl %ds, %ecx cmpw %cx, 0x10(%rsp) jne 1f @@ -1063,6 +1082,7 @@ SYM_CODE_END(error_return) */ SYM_CODE_START(asm_exc_nmi) UNWIND_HINT_IRET_REGS + ENDBR /* * We allow breakpoints in NMIs. If a breakpoint occurs, then @@ -1310,6 +1330,7 @@ first_nmi: #endif repeat_nmi: + ANNOTATE_NOENDBR // this code /* * If there was a nested NMI, the first NMI's iret will return * here. But NMIs are still enabled and we can take another @@ -1338,6 +1359,7 @@ repeat_nmi: .endr subq $(5*8), %rsp end_repeat_nmi: + ANNOTATE_NOENDBR // this code /* * Everything below this point can be preempted by a nested NMI. @@ -1421,6 +1443,7 @@ SYM_CODE_END(asm_exc_nmi) */ SYM_CODE_START(ignore_sysret) UNWIND_HINT_EMPTY + ENDBR mov $-ENOSYS, %eax sysretl SYM_CODE_END(ignore_sysret) diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S index 0051cf5c792d..4fdb007cddbd 100644 --- a/arch/x86/entry/entry_64_compat.S +++ b/arch/x86/entry/entry_64_compat.S @@ -48,6 +48,7 @@ */ SYM_CODE_START(entry_SYSENTER_compat) UNWIND_HINT_EMPTY + ENDBR /* Interrupts are off on entry. */ SWAPGS @@ -147,6 +148,7 @@ SYM_INNER_LABEL(entry_SYSENTER_compat_after_hwframe, SYM_L_GLOBAL) popfq jmp .Lsysenter_flags_fixed SYM_INNER_LABEL(__end_entry_SYSENTER_compat, SYM_L_GLOBAL) + ANNOTATE_NOENDBR // is_sysenter_singlestep SYM_CODE_END(entry_SYSENTER_compat) /* @@ -198,6 +200,7 @@ SYM_CODE_END(entry_SYSENTER_compat) */ SYM_CODE_START(entry_SYSCALL_compat) UNWIND_HINT_EMPTY + ENDBR /* Interrupts are off on entry. */ swapgs @@ -211,6 +214,7 @@ SYM_CODE_START(entry_SYSCALL_compat) movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp SYM_INNER_LABEL(entry_SYSCALL_compat_safe_stack, SYM_L_GLOBAL) + ANNOTATE_NOENDBR /* Construct struct pt_regs on stack */ pushq $__USER32_DS /* pt_regs->ss */ @@ -340,6 +344,7 @@ SYM_CODE_END(entry_SYSCALL_compat) */ SYM_CODE_START(entry_INT80_compat) UNWIND_HINT_EMPTY + ENDBR /* * Interrupts are off on entry. */ diff --git a/arch/x86/entry/syscalls/Makefile b/arch/x86/entry/syscalls/Makefile index 5b3efed0e4e8..7f3886eeb2ff 100644 --- a/arch/x86/entry/syscalls/Makefile +++ b/arch/x86/entry/syscalls/Makefile @@ -67,7 +67,7 @@ uapisyshdr-y += unistd_32.h unistd_64.h unistd_x32.h syshdr-y += syscalls_32.h syshdr-$(CONFIG_X86_64) += unistd_32_ia32.h unistd_64_x32.h syshdr-$(CONFIG_X86_64) += syscalls_64.h -syshdr-$(CONFIG_X86_X32) += syscalls_x32.h +syshdr-$(CONFIG_X86_X32_ABI) += syscalls_x32.h syshdr-$(CONFIG_XEN) += xen-hypercalls.h uapisyshdr-y := $(addprefix $(uapi)/, $(uapisyshdr-y)) diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h index 58eee6402832..9b10c8c76087 100644 --- a/arch/x86/include/asm/alternative.h +++ b/arch/x86/include/asm/alternative.h @@ -76,6 +76,7 @@ extern int alternatives_patched; extern void alternative_instructions(void); extern void apply_alternatives(struct alt_instr *start, struct alt_instr *end); extern void apply_retpolines(s32 *start, s32 *end); +extern void apply_ibt_endbr(s32 *start, s32 *end); struct module; diff --git a/arch/x86/include/asm/bug.h b/arch/x86/include/asm/bug.h index bab883c0b6fe..4d20a293c6fd 100644 --- a/arch/x86/include/asm/bug.h +++ b/arch/x86/include/asm/bug.h @@ -4,6 +4,7 @@ #include <linux/stringify.h> #include <linux/instrumentation.h> +#include <linux/objtool.h> /* * Despite that some emulators terminate on UD2, we use it for WARN(). diff --git a/arch/x86/include/asm/cpu.h b/arch/x86/include/asm/cpu.h index 33d41e350c79..86e5e4e26fcb 100644 --- a/arch/x86/include/asm/cpu.h +++ b/arch/x86/include/asm/cpu.h @@ -7,6 +7,7 @@ #include <linux/topology.h> #include <linux/nodemask.h> #include <linux/percpu.h> +#include <asm/ibt.h> #ifdef CONFIG_SMP @@ -72,4 +73,7 @@ void init_ia32_feat_ctl(struct cpuinfo_x86 *c); #else static inline void init_ia32_feat_ctl(struct cpuinfo_x86 *c) {} #endif + +extern __noendbr void cet_disable(void); + #endif /* _ASM_X86_CPU_H */ diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 3edf05e98e58..73e643ae94b6 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -388,6 +388,7 @@ #define X86_FEATURE_TSXLDTRK (18*32+16) /* TSX Suspend Load Address Tracking */ #define X86_FEATURE_PCONFIG (18*32+18) /* Intel PCONFIG */ #define X86_FEATURE_ARCH_LBR (18*32+19) /* Intel ARCH LBR */ +#define X86_FEATURE_IBT (18*32+20) /* Indirect Branch Tracking */ #define X86_FEATURE_AMX_BF16 (18*32+22) /* AMX bf16 Support */ #define X86_FEATURE_AVX512_FP16 (18*32+23) /* AVX512 FP16 */ #define X86_FEATURE_AMX_TILE (18*32+24) /* AMX tile Support */ diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h index 03cb12775043..98938a68251c 100644 --- a/arch/x86/include/asm/efi.h +++ b/arch/x86/include/asm/efi.h @@ -7,6 +7,7 @@ #include <asm/tlb.h> #include <asm/nospec-branch.h> #include <asm/mmu_context.h> +#include <asm/ibt.h> #include <linux/build_bug.h> #include <linux/kernel.h> #include <linux/pgtable.h> @@ -120,8 +121,12 @@ extern asmlinkage u64 __efi_call(void *fp, ...); efi_enter_mm(); \ }) -#define arch_efi_call_virt(p, f, args...) \ - efi_call((void *)p->f, args) \ +#define arch_efi_call_virt(p, f, args...) ({ \ + u64 ret, ibt = ibt_save(); \ + ret = efi_call((void *)p->f, args); \ + ibt_restore(ibt); \ + ret; \ +}) #define arch_efi_call_virt_teardown() \ ({ \ diff --git a/arch/x86/include/asm/ibt.h b/arch/x86/include/asm/ibt.h new file mode 100644 index 000000000000..689880eca9ba --- /dev/null +++ b/arch/x86/include/asm/ibt.h @@ -0,0 +1,105 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_IBT_H +#define _ASM_X86_IBT_H + +#include <linux/types.h> + +/* + * The rules for enabling IBT are: + * + * - CC_HAS_IBT: the toolchain supports it + * - X86_KERNEL_IBT: it is selected in Kconfig + * - !__DISABLE_EXPORTS: this is regular kernel code + * + * Esp. that latter one is a bit non-obvious, but some code like compressed, + * purgatory, realmode etc.. is built with custom CFLAGS that do not include + * -fcf-protection=branch and things will go *bang*. + * + * When all the above are satisfied, HAS_KERNEL_IBT will be 1, otherwise 0. + */ +#if defined(CONFIG_X86_KERNEL_IBT) && !defined(__DISABLE_EXPORTS) + +#define HAS_KERNEL_IBT 1 + +#ifndef __ASSEMBLY__ + +#ifdef CONFIG_X86_64 +#define ASM_ENDBR "endbr64\n\t" +#else +#define ASM_ENDBR "endbr32\n\t" +#endif + +#define __noendbr __attribute__((nocf_check)) + +static inline __attribute_const__ u32 gen_endbr(void) +{ + u32 endbr; + + /* + * Generate ENDBR64 in a way that is sure to not result in + * an ENDBR64 instruction as immediate. + */ + asm ( "mov $~0xfa1e0ff3, %[endbr]\n\t" + "not %[endbr]\n\t" + : [endbr] "=&r" (endbr) ); + + return endbr; +} + +static inline __attribute_const__ u32 gen_endbr_poison(void) +{ + /* + * 4 byte NOP that isn't NOP4 (in fact it is OSP NOP3), such that it + * will be unique to (former) ENDBR sites. + */ + return 0x001f0f66; /* osp nopl (%rax) */ +} + +static inline bool is_endbr(u32 val) +{ + if (val == gen_endbr_poison()) + return true; + + val &= ~0x01000000U; /* ENDBR32 -> ENDBR64 */ + return val == gen_endbr(); +} + +extern __noendbr u64 ibt_save(void); +extern __noendbr void ibt_restore(u64 save); + +#else /* __ASSEMBLY__ */ + +#ifdef CONFIG_X86_64 +#define ENDBR endbr64 +#else +#define ENDBR endbr32 +#endif + +#endif /* __ASSEMBLY__ */ + +#else /* !IBT */ + +#define HAS_KERNEL_IBT 0 + +#ifndef __ASSEMBLY__ + +#define ASM_ENDBR + +#define __noendbr + +static inline bool is_endbr(u32 val) { return false; } + +static inline u64 ibt_save(void) { return 0; } +static inline void ibt_restore(u64 save) { } + +#else /* __ASSEMBLY__ */ + +#define ENDBR + +#endif /* __ASSEMBLY__ */ + +#endif /* CONFIG_X86_KERNEL_IBT */ + +#define ENDBR_INSN_SIZE (4*HAS_KERNEL_IBT) + +#endif /* _ASM_X86_IBT_H */ diff --git a/arch/x86/include/asm/idtentry.h b/arch/x86/include/asm/idtentry.h index 1345088e9902..7924f27f5c8b 100644 --- a/arch/x86/include/asm/idtentry.h +++ b/arch/x86/include/asm/idtentry.h @@ -5,6 +5,8 @@ /* Interrupts/Exceptions */ #include <asm/trapnr.h> +#define IDT_ALIGN (8 * (1 + HAS_KERNEL_IBT)) + #ifndef __ASSEMBLY__ #include <linux/entry-common.h> #include <linux/hardirq.h> @@ -480,7 +482,7 @@ __visible noinstr void func(struct pt_regs *regs, \ /* * ASM code to emit the common vector entry stubs where each stub is - * packed into 8 bytes. + * packed into IDT_ALIGN bytes. * * Note, that the 'pushq imm8' is emitted via '.byte 0x6a, vector' because * GCC treats the local vector variable as unsigned int and would expand @@ -492,33 +494,33 @@ __visible noinstr void func(struct pt_regs *regs, \ * point is to mask off the bits above bit 7 because the push is sign * extending. */ - .align 8 + .align IDT_ALIGN SYM_CODE_START(irq_entries_start) vector=FIRST_EXTERNAL_VECTOR .rept NR_EXTERNAL_VECTORS UNWIND_HINT_IRET_REGS 0 : + ENDBR .byte 0x6a, vector jmp asm_common_interrupt - nop - /* Ensure that the above is 8 bytes max */ - . = 0b + 8 + /* Ensure that the above is IDT_ALIGN bytes max */ + .fill 0b + IDT_ALIGN - ., 1, 0xcc vector = vector+1 .endr SYM_CODE_END(irq_entries_start) #ifdef CONFIG_X86_LOCAL_APIC - .align 8 + .align IDT_ALIGN SYM_CODE_START(spurious_entries_start) vector=FIRST_SYSTEM_VECTOR .rept NR_SYSTEM_VECTORS UNWIND_HINT_IRET_REGS 0 : + ENDBR .byte 0x6a, vector jmp asm_spurious_interrupt - nop - /* Ensure that the above is 8 bytes max */ - . = 0b + 8 + /* Ensure that the above is IDT_ALIGN bytes max */ + .fill 0b + IDT_ALIGN - ., 1, 0xcc vector = vector+1 .endr SYM_CODE_END(spurious_entries_start) @@ -615,6 +617,11 @@ DECLARE_IDTENTRY_DF(X86_TRAP_DF, exc_double_fault); DECLARE_IDTENTRY_RAW_ERRORCODE(X86_TRAP_DF, xenpv_exc_double_fault); #endif +/* #CP */ +#ifdef CONFIG_X86_KERNEL_IBT +DECLARE_IDTENTRY_ERRORCODE(X86_TRAP_CP, exc_control_protection); +#endif + /* #VC */ #ifdef CONFIG_AMD_MEM_ENCRYPT DECLARE_IDTENTRY_VC(X86_TRAP_VC, exc_vmm_communication); diff --git a/arch/x86/include/asm/irq_stack.h b/arch/x86/include/asm/irq_stack.h index ae9d40f6c706..63f818aedf77 100644 --- a/arch/x86/include/asm/irq_stack.h +++ b/arch/x86/include/asm/irq_stack.h @@ -3,6 +3,7 @@ #define _ASM_X86_IRQ_STACK_H #include <linux/ptrace.h> +#include <linux/objtool.h> #include <asm/processor.h> @@ -99,7 +100,8 @@ } #define ASM_CALL_ARG0 \ - "call %P[__func] \n" + "call %P[__func] \n" \ + ASM_REACHABLE #define ASM_CALL_ARG1 \ "movq %[arg1], %%rdi \n" \ diff --git a/arch/x86/include/asm/irqflags.h b/arch/x86/include/asm/irqflags.h index 87761396e8cc..111104d1c2cd 100644 --- a/arch/x86/include/asm/irqflags.h +++ b/arch/x86/include/asm/irqflags.h @@ -141,13 +141,8 @@ static __always_inline void arch_local_irq_restore(unsigned long flags) #ifdef CONFIG_X86_64 #ifdef CONFIG_XEN_PV #define SWAPGS ALTERNATIVE "swapgs", "", X86_FEATURE_XENPV -#define INTERRUPT_RETURN \ - ANNOTATE_RETPOLINE_SAFE; \ - ALTERNATIVE_TERNARY("jmp *paravirt_iret(%rip);", \ - X86_FEATURE_XENPV, "jmp xen_iret;", "jmp native_iret;") #else #define SWAPGS swapgs -#define INTERRUPT_RETURN jmp native_iret #endif #endif #endif /* !__ASSEMBLY__ */ diff --git a/arch/x86/include/asm/linkage.h b/arch/x86/include/asm/linkage.h index 030907922bd0..85865f1645bd 100644 --- a/arch/x86/include/asm/linkage.h +++ b/arch/x86/include/asm/linkage.h @@ -3,6 +3,7 @@ #define _ASM_X86_LINKAGE_H #include <linux/stringify.h> +#include <asm/ibt.h> #undef notrace #define notrace __attribute__((no_instrument_function)) @@ -34,5 +35,35 @@ #endif /* __ASSEMBLY__ */ +/* SYM_FUNC_START -- use for global functions */ +#define SYM_FUNC_START(name) \ + SYM_START(name, SYM_L_GLOBAL, SYM_A_ALIGN) \ + ENDBR + +/* SYM_FUNC_START_NOALIGN -- use for global functions, w/o alignment */ +#define SYM_FUNC_START_NOALIGN(name) \ + SYM_START(name, SYM_L_GLOBAL, SYM_A_NONE) \ + ENDBR + +/* SYM_FUNC_START_LOCAL -- use for local functions */ +#define SYM_FUNC_START_LOCAL(name) \ + SYM_START(name, SYM_L_LOCAL, SYM_A_ALIGN) \ + ENDBR + +/* SYM_FUNC_START_LOCAL_NOALIGN -- use for local functions, w/o alignment */ +#define SYM_FUNC_START_LOCAL_NOALIGN(name) \ + SYM_START(name, SYM_L_LOCAL, SYM_A_NONE) \ + ENDBR + +/* SYM_FUNC_START_WEAK -- use for weak functions */ +#define SYM_FUNC_START_WEAK(name) \ + SYM_START(name, SYM_L_WEAK, SYM_A_ALIGN) \ + ENDBR + +/* SYM_FUNC_START_WEAK_NOALIGN -- use for weak functions, w/o alignment */ +#define SYM_FUNC_START_WEAK_NOALIGN(name) \ + SYM_START(name, SYM_L_WEAK, SYM_A_NONE) \ + ENDBR + #endif /* _ASM_X86_LINKAGE_H */ diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 9f1741ac4769..0eb90d21049e 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -362,11 +362,29 @@ #define MSR_ATOM_CORE_TURBO_RATIOS 0x0000066c #define MSR_ATOM_CORE_TURBO_VIDS 0x0000066d - #define MSR_CORE_PERF_LIMIT_REASONS 0x00000690 #define MSR_GFX_PERF_LIMIT_REASONS 0x000006B0 #define MSR_RING_PERF_LIMIT_REASONS 0x000006B1 +/* Control-flow Enforcement Technology MSRs */ +#define MSR_IA32_U_CET 0x000006a0 /* user mode cet */ +#define MSR_IA32_S_CET 0x000006a2 /* kernel mode cet */ +#define CET_SHSTK_EN BIT_ULL(0) +#define CET_WRSS_EN BIT_ULL(1) +#define CET_ENDBR_EN BIT_ULL(2) +#define CET_LEG_IW_EN BIT_ULL(3) +#define CET_NO_TRACK_EN BIT_ULL(4) +#define CET_SUPPRESS_DISABLE BIT_ULL(5) +#define CET_RESERVED (BIT_ULL(6) | BIT_ULL(7) | BIT_ULL(8) | BIT_ULL(9)) +#define CET_SUPPRESS BIT_ULL(10) +#define CET_WAIT_ENDBR BIT_ULL(11) + +#define MSR_IA32_PL0_SSP 0x000006a4 /* ring-0 shadow stack pointer */ +#define MSR_IA32_PL1_SSP 0x000006a5 /* ring-1 shadow stack pointer */ +#define MSR_IA32_PL2_SSP 0x000006a6 /* ring-2 shadow stack pointer */ +#define MSR_IA32_PL3_SSP 0x000006a7 /* ring-3 shadow stack pointer */ +#define MSR_IA32_INT_SSP_TAB 0x000006a8 /* exception shadow stack table */ + /* Hardware P state interface */ #define MSR_PPERF 0x0000064e #define MSR_PERF_LIMIT_REASONS 0x0000064f diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h index 0d76502cc6f5..964442b99245 100644 --- a/arch/x86/include/asm/paravirt.h +++ b/arch/x86/include/asm/paravirt.h @@ -666,6 +666,7 @@ bool __raw_callee_save___native_vcpu_is_preempted(long cpu); ".globl " PV_THUNK_NAME(func) ";" \ ".type " PV_THUNK_NAME(func) ", @function;" \ PV_THUNK_NAME(func) ":" \ + ASM_ENDBR \ FRAME_BEGIN \ PV_SAVE_ALL_CALLER_REGS \ "call " #func ";" \ diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h index e1591467668e..89df6c6617f5 100644 --- a/arch/x86/include/asm/paravirt_types.h +++ b/arch/x86/include/asm/paravirt_types.h @@ -272,7 +272,6 @@ struct paravirt_patch_template { extern struct pv_info pv_info; extern struct paravirt_patch_template pv_ops; -extern void (*paravirt_iret)(void); #define PARAVIRT_PATCH(x) \ (offsetof(struct paravirt_patch_template, x) / sizeof(void *)) diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index a87e7c33d5ac..91d0f93a00c7 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -837,7 +837,7 @@ bool xen_set_default_idle(void); #define xen_set_default_idle 0 #endif -void stop_this_cpu(void *dummy); +void __noreturn stop_this_cpu(void *dummy); void microcode_check(void); enum l1tf_mitigations { diff --git a/arch/x86/include/asm/qspinlock_paravirt.h b/arch/x86/include/asm/qspinlock_paravirt.h index 1474cf96251d..892fd8c3a6f7 100644 --- a/arch/x86/include/asm/qspinlock_paravirt.h +++ b/arch/x86/include/asm/qspinlock_paravirt.h @@ -2,6 +2,8 @@ #ifndef __ASM_QSPINLOCK_PARAVIRT_H #define __ASM_QSPINLOCK_PARAVIRT_H +#include <asm/ibt.h> + /* * For x86-64, PV_CALLEE_SAVE_REGS_THUNK() saves and restores 8 64-bit * registers. For i386, however, only 1 32-bit register needs to be saved @@ -39,6 +41,7 @@ asm (".pushsection .text;" ".type " PV_UNLOCK ", @function;" ".align 4,0x90;" PV_UNLOCK ": " + ASM_ENDBR FRAME_BEGIN "push %rdx;" "mov $0x1,%eax;" diff --git a/arch/x86/include/asm/segment.h b/arch/x86/include/asm/segment.h index b228c9d44ee7..656ed6531d03 100644 --- a/arch/x86/include/asm/segment.h +++ b/arch/x86/include/asm/segment.h @@ -4,6 +4,7 @@ #include <linux/const.h> #include <asm/alternative.h> +#include <asm/ibt.h> /* * Constructor for a conventional segment GDT (or LDT) entry. @@ -275,7 +276,7 @@ static inline void vdso_read_cpunode(unsigned *cpu, unsigned *node) * vector has no error code (two bytes), a 'push $vector_number' (two * bytes), and a jump to the common entry code (up to five bytes). */ -#define EARLY_IDT_HANDLER_SIZE 9 +#define EARLY_IDT_HANDLER_SIZE (9 + ENDBR_INSN_SIZE) /* * xen_early_idt_handler_array is for Xen pv guests: for each entry in @@ -283,7 +284,7 @@ static inline void vdso_read_cpunode(unsigned *cpu, unsigned *node) * pop %rcx; pop %r11; jmp early_idt_handler_array[i]; summing up to * max 8 bytes. */ -#define XEN_EARLY_IDT_HANDLER_SIZE 8 +#define XEN_EARLY_IDT_HANDLER_SIZE (8 + ENDBR_INSN_SIZE) #ifndef __ASSEMBLY__ diff --git a/arch/x86/include/asm/setup.h b/arch/x86/include/asm/setup.h index a12458a7a8d4..896e48d45828 100644 --- a/arch/x86/include/asm/setup.h +++ b/arch/x86/include/asm/setup.h @@ -8,6 +8,7 @@ #include <linux/linkage.h> #include <asm/page_types.h> +#include <asm/ibt.h> #ifdef __i386__ @@ -119,7 +120,7 @@ void *extend_brk(size_t size, size_t align); * executable.) */ #define RESERVE_BRK(name,sz) \ - static void __section(".discard.text") __used notrace \ + static void __section(".discard.text") __noendbr __used notrace \ __brk_reservation_fn_##name##__(void) { \ asm volatile ( \ ".pushsection .brk_reservation,\"aw\",@nobits;" \ diff --git a/arch/x86/include/asm/syscall_wrapper.h b/arch/x86/include/asm/syscall_wrapper.h index 6a2827d0681f..59358d1bf880 100644 --- a/arch/x86/include/asm/syscall_wrapper.h +++ b/arch/x86/include/asm/syscall_wrapper.h @@ -159,7 +159,7 @@ extern long __ia32_sys_ni_syscall(const struct pt_regs *regs); #endif /* CONFIG_IA32_EMULATION */ -#ifdef CONFIG_X86_X32 +#ifdef CONFIG_X86_X32_ABI /* * For the x32 ABI, we need to create a stub for compat_sys_*() which is aware * of the x86-64-style parameter ordering of x32 syscalls. The syscalls common @@ -177,12 +177,12 @@ extern long __ia32_sys_ni_syscall(const struct pt_regs *regs); #define __X32_COMPAT_SYS_NI(name) \ __SYS_NI(x64, compat_sys_##name) -#else /* CONFIG_X86_X32 */ +#else /* CONFIG_X86_X32_ABI */ #define __X32_COMPAT_SYS_STUB0(name) #define __X32_COMPAT_SYS_STUBx(x, name, ...) #define __X32_COMPAT_COND_SYSCALL(name) #define __X32_COMPAT_SYS_NI(name) -#endif /* CONFIG_X86_X32 */ +#endif /* CONFIG_X86_X32_ABI */ #ifdef CONFIG_COMPAT diff --git a/arch/x86/include/asm/text-patching.h b/arch/x86/include/asm/text-patching.h index 4cc18ba1b75e..d20ab0921480 100644 --- a/arch/x86/include/asm/text-patching.h +++ b/arch/x86/include/asm/text-patching.h @@ -97,24 +97,40 @@ union text_poke_insn { }; static __always_inline -void *text_gen_insn(u8 opcode, const void *addr, const void *dest) +void __text_gen_insn(void *buf, u8 opcode, const void *addr, const void *dest, int size) { - static union text_poke_insn insn; /* per instance */ - int size = text_opcode_size(opcode); + union text_poke_insn *insn = buf; + + BUG_ON(size < text_opcode_size(opcode)); + + /* + * Hide the addresses to avoid the compiler folding in constants when + * referencing code, these can mess up annotations like + * ANNOTATE_NOENDBR. + */ + OPTIMIZER_HIDE_VAR(insn); + OPTIMIZER_HIDE_VAR(addr); + OPTIMIZER_HIDE_VAR(dest); - insn.opcode = opcode; + insn->opcode = opcode; if (size > 1) { - insn.disp = (long)dest - (long)(addr + size); + insn->disp = (long)dest - (long)(addr + size); if (size == 2) { /* - * Ensure that for JMP9 the displacement + * Ensure that for JMP8 the displacement * actually fits the signed byte. */ - BUG_ON((insn.disp >> 31) != (insn.disp >> 7)); + BUG_ON((insn->disp >> 31) != (insn->disp >> 7)); } } +} +static __always_inline +void *text_gen_insn(u8 opcode, const void *addr, const void *dest) +{ + static union text_poke_insn insn; /* per instance */ + __text_gen_insn(&insn, opcode, addr, dest, text_opcode_size(opcode)); return &insn.text; } diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h index 6221be7cafc3..35317c5c551d 100644 --- a/arch/x86/include/asm/traps.h +++ b/arch/x86/include/asm/traps.h @@ -18,6 +18,8 @@ void __init trap_init(void); asmlinkage __visible noinstr struct pt_regs *vc_switch_off_ist(struct pt_regs *eregs); #endif +extern bool ibt_selftest(void); + #ifdef CONFIG_X86_F00F_BUG /* For handling the FOOF bug */ void handle_invalid_op(struct pt_regs *regs); diff --git a/arch/x86/include/asm/vdso.h b/arch/x86/include/asm/vdso.h index 98aa103eb4ab..2963a2f5dbc4 100644 --- a/arch/x86/include/asm/vdso.h +++ b/arch/x86/include/asm/vdso.h @@ -37,7 +37,7 @@ struct vdso_image { extern const struct vdso_image vdso_image_64; #endif -#ifdef CONFIG_X86_X32 +#ifdef CONFIG_X86_X32_ABI extern const struct vdso_image vdso_image_x32; #endif diff --git a/arch/x86/include/uapi/asm/processor-flags.h b/arch/x86/include/uapi/asm/processor-flags.h index bcba3c643e63..c47cc7f2feeb 100644 --- a/arch/x86/include/uapi/asm/processor-flags.h +++ b/arch/x86/include/uapi/asm/processor-flags.h @@ -130,6 +130,8 @@ #define X86_CR4_SMAP _BITUL(X86_CR4_SMAP_BIT) #define X86_CR4_PKE_BIT 22 /* enable Protection Keys support */ #define X86_CR4_PKE _BITUL(X86_CR4_PKE_BIT) +#define X86_CR4_CET_BIT 23 /* enable Control-flow Enforcement Technology */ +#define X86_CR4_CET _BITUL(X86_CR4_CET_BIT) /* * x86-64 Task Priority Register, CR8 diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index b4e576600969..d374cb3cf024 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -115,6 +115,7 @@ static void __init_or_module add_nops(void *insns, unsigned int len) } extern s32 __retpoline_sites[], __retpoline_sites_end[]; +extern s32 __ibt_endbr_seal[], __ibt_endbr_seal_end[]; extern struct alt_instr __alt_instructions[], __alt_instructions_end[]; extern s32 __smp_locks[], __smp_locks_end[]; void text_poke_early(void *addr, const void *opcode, size_t len); @@ -512,6 +513,42 @@ void __init_or_module noinline apply_retpolines(s32 *start, s32 *end) { } #endif /* CONFIG_RETPOLINE && CONFIG_STACK_VALIDATION */ +#ifdef CONFIG_X86_KERNEL_IBT + +/* + * Generated by: objtool --ibt + */ +void __init_or_module noinline apply_ibt_endbr(s32 *start, s32 *end) +{ + s32 *s; + + for (s = start; s < end; s++) { + u32 endbr, poison = gen_endbr_poison(); + void *addr = (void *)s + *s; + + if (WARN_ON_ONCE(get_kernel_nofault(endbr, addr))) + continue; + + if (WARN_ON_ONCE(!is_endbr(endbr))) + continue; + + DPRINTK("ENDBR at: %pS (%px)", addr, addr); + + /* + * When we have IBT, the lack of ENDBR will trigger #CP + */ + DUMP_BYTES(((u8*)addr), 4, "%px: orig: ", addr); + DUMP_BYTES(((u8*)&poison), 4, "%px: repl: ", addr); + text_poke_early(addr, &poison, 4); + } +} + +#else + +void __init_or_module noinline apply_ibt_endbr(s32 *start, s32 *end) { } + +#endif /* CONFIG_X86_KERNEL_IBT */ + #ifdef CONFIG_SMP static void alternatives_smp_lock(const s32 *start, const s32 *end, u8 *text, u8 *text_end) @@ -713,34 +750,39 @@ asm ( " .pushsection .init.text, \"ax\", @progbits\n" " .type int3_magic, @function\n" "int3_magic:\n" + ANNOTATE_NOENDBR " movl $1, (%" _ASM_ARG1 ")\n" ASM_RET " .size int3_magic, .-int3_magic\n" " .popsection\n" ); -extern __initdata unsigned long int3_selftest_ip; /* defined in asm below */ +extern void int3_selftest_ip(void); /* defined in asm below */ static int __init int3_exception_notify(struct notifier_block *self, unsigned long val, void *data) { + unsigned long selftest = (unsigned long)&int3_selftest_ip; struct die_args *args = data; struct pt_regs *regs = args->regs; + OPTIMIZER_HIDE_VAR(selftest); + if (!regs || user_mode(regs)) return NOTIFY_DONE; if (val != DIE_INT3) return NOTIFY_DONE; - if (regs->ip - INT3_INSN_SIZE != int3_selftest_ip) + if (regs->ip - INT3_INSN_SIZE != selftest) return NOTIFY_DONE; int3_emulate_call(regs, (unsigned long)&int3_magic); return NOTIFY_STOP; } -static void __init int3_selftest(void) +/* Must be noinline to ensure uniqueness of int3_selftest_ip. */ +static noinline void __init int3_selftest(void) { static __initdata struct notifier_block int3_exception_nb = { .notifier_call = int3_exception_notify, @@ -753,18 +795,12 @@ static void __init int3_selftest(void) /* * Basically: int3_magic(&val); but really complicated :-) * - * Stick the address of the INT3 instruction into int3_selftest_ip, - * then trigger the INT3, padded with NOPs to match a CALL instruction - * length. + * INT3 padded with NOP to CALL_INSN_SIZE. The int3_exception_nb + * notifier above will emulate CALL for us. */ - asm volatile ("1: int3; nop; nop; nop; nop\n\t" - ".pushsection .init.data,\"aw\"\n\t" - ".align " __ASM_SEL(4, 8) "\n\t" - ".type int3_selftest_ip, @object\n\t" - ".size int3_selftest_ip, " __ASM_SEL(4, 8) "\n\t" - "int3_selftest_ip:\n\t" - __ASM_SEL(.long, .quad) " 1b\n\t" - ".popsection\n\t" + asm volatile ("int3_selftest_ip:\n\t" + ANNOTATE_NOENDBR + " int3; nop; nop; nop; nop\n\t" : ASM_CALL_CONSTRAINT : __ASM_SEL_RAW(a, D) (&val) : "memory"); @@ -831,6 +867,8 @@ void __init alternative_instructions(void) */ apply_alternatives(__alt_instructions, __alt_instructions_end); + apply_ibt_endbr(__ibt_endbr_seal, __ibt_endbr_seal_end); + #ifdef CONFIG_SMP /* Patch to UP if other cpus not imminent. */ if (!noreplace_smp && (num_present_cpus() == 1 || setup_max_cpus <= 1)) { diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c index 241dda687eb9..60e330cdbd17 100644 --- a/arch/x86/kernel/apm_32.c +++ b/arch/x86/kernel/apm_32.c @@ -232,6 +232,7 @@ #include <asm/paravirt.h> #include <asm/reboot.h> #include <asm/nospec-branch.h> +#include <asm/ibt.h> #if defined(CONFIG_APM_DISPLAY_BLANK) && defined(CONFIG_VT) extern int (*console_blank_hook)(int); @@ -598,6 +599,7 @@ static long __apm_bios_call(void *_call) struct desc_struct save_desc_40; struct desc_struct *gdt; struct apm_bios_call *call = _call; + u64 ibt; cpu = get_cpu(); BUG_ON(cpu != 0); @@ -607,11 +609,13 @@ static long __apm_bios_call(void *_call) apm_irq_save(flags); firmware_restrict_branch_speculation_start(); + ibt = ibt_save(); APM_DO_SAVE_SEGS; apm_bios_call_asm(call->func, call->ebx, call->ecx, &call->eax, &call->ebx, &call->ecx, &call->edx, &call->esi); APM_DO_RESTORE_SEGS; + ibt_restore(ibt); firmware_restrict_branch_speculation_end(); apm_irq_restore(flags); gdt[0x40 / 8] = save_desc_40; @@ -676,6 +680,7 @@ static long __apm_bios_call_simple(void *_call) struct desc_struct save_desc_40; struct desc_struct *gdt; struct apm_bios_call *call = _call; + u64 ibt; cpu = get_cpu(); BUG_ON(cpu != 0); @@ -685,10 +690,12 @@ static long __apm_bios_call_simple(void *_call) apm_irq_save(flags); firmware_restrict_branch_speculation_start(); + ibt = ibt_save(); APM_DO_SAVE_SEGS; error = apm_bios_call_simple_asm(call->func, call->ebx, call->ecx, &call->eax); APM_DO_RESTORE_SEGS; + ibt_restore(ibt); firmware_restrict_branch_speculation_end(); apm_irq_restore(flags); gdt[0x40 / 8] = save_desc_40; diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 64deb7727d00..ed4417500700 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -59,6 +59,7 @@ #include <asm/cpu_device_id.h> #include <asm/uv/uv.h> #include <asm/sigframe.h> +#include <asm/traps.h> #include "cpu.h" @@ -438,7 +439,8 @@ out: /* These bits should not change their value after CPU init is finished. */ static const unsigned long cr4_pinned_mask = - X86_CR4_SMEP | X86_CR4_SMAP | X86_CR4_UMIP | X86_CR4_FSGSBASE; + X86_CR4_SMEP | X86_CR4_SMAP | X86_CR4_UMIP | + X86_CR4_FSGSBASE | X86_CR4_CET; static DEFINE_STATIC_KEY_FALSE_RO(cr_pinning); static unsigned long cr4_pinned_bits __ro_after_init; @@ -592,6 +594,58 @@ static __init int setup_disable_pku(char *arg) __setup("nopku", setup_disable_pku); #endif /* CONFIG_X86_64 */ +#ifdef CONFIG_X86_KERNEL_IBT + +__noendbr u64 ibt_save(void) +{ + u64 msr = 0; + + if (cpu_feature_enabled(X86_FEATURE_IBT)) { + rdmsrl(MSR_IA32_S_CET, msr); + wrmsrl(MSR_IA32_S_CET, msr & ~CET_ENDBR_EN); + } + + return msr; +} + +__noendbr void ibt_restore(u64 save) +{ + u64 msr; + + if (cpu_feature_enabled(X86_FEATURE_IBT)) { + rdmsrl(MSR_IA32_S_CET, msr); + msr &= ~CET_ENDBR_EN; + msr |= (save & CET_ENDBR_EN); + wrmsrl(MSR_IA32_S_CET, msr); + } +} + +#endif + +static __always_inline void setup_cet(struct cpuinfo_x86 *c) +{ + u64 msr = CET_ENDBR_EN; + + if (!HAS_KERNEL_IBT || + !cpu_feature_enabled(X86_FEATURE_IBT)) + return; + + wrmsrl(MSR_IA32_S_CET, msr); + cr4_set_bits(X86_CR4_CET); + + if (!ibt_selftest()) { + pr_err("IBT selftest: Failed!\n"); + setup_clear_cpu_cap(X86_FEATURE_IBT); + return; + } +} + +__noendbr void cet_disable(void) +{ + if (cpu_feature_enabled(X86_FEATURE_IBT)) + wrmsrl(MSR_IA32_S_CET, 0); +} + /* * Some CPU features depend on higher CPUID levels, which may not always * be available due to CPUID level capping or broken virtualization @@ -1709,6 +1763,7 @@ static void identify_cpu(struct cpuinfo_x86 *c) x86_init_rdrand(c); setup_pku(c); + setup_cet(c); /* * Clear/Set all flags overridden by options, need do it @@ -1777,6 +1832,8 @@ void enable_sep_cpu(void) void __init identify_boot_cpu(void) { identify_cpu(&boot_cpu_data); + if (HAS_KERNEL_IBT && cpu_feature_enabled(X86_FEATURE_IBT)) + pr_info("CET detected: Indirect Branch Tracking enabled\n"); #ifdef CONFIG_X86_32 sysenter_setup(); enable_sep_cpu(); diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index 7cc540e6de0c..1e31c7d21597 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -316,12 +316,12 @@ create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size) unsigned long offset; unsigned long npages; unsigned long size; - unsigned long retq; unsigned long *ptr; void *trampoline; void *ip; /* 48 8b 15 <offset> is movq <offset>(%rip), %rdx */ unsigned const char op_ref[] = { 0x48, 0x8b, 0x15 }; + unsigned const char retq[] = { RET_INSN_OPCODE, INT3_INSN_OPCODE }; union ftrace_op_code_union op_ptr; int ret; @@ -359,12 +359,7 @@ create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size) goto fail; ip = trampoline + size; - - /* The trampoline ends with ret(q) */ - retq = (unsigned long)ftrace_stub; - ret = copy_from_kernel_nofault(ip, (void *)retq, RET_SIZE); - if (WARN_ON(ret < 0)) - goto fail; + memcpy(ip, retq, RET_SIZE); /* No need to test direct calls on created trampolines */ if (ops->flags & FTRACE_OPS_FL_SAVE_REGS) { diff --git a/arch/x86/kernel/ftrace_64.S b/arch/x86/kernel/ftrace_64.S index 11ac028e30e4..4ec13608d3c6 100644 --- a/arch/x86/kernel/ftrace_64.S +++ b/arch/x86/kernel/ftrace_64.S @@ -145,6 +145,7 @@ SYM_FUNC_START(ftrace_caller) movq %rcx, RSP(%rsp) SYM_INNER_LABEL(ftrace_caller_op_ptr, SYM_L_GLOBAL) + ANNOTATE_NOENDBR /* Load the ftrace_ops into the 3rd parameter */ movq function_trace_op(%rip), %rdx @@ -155,6 +156,7 @@ SYM_INNER_LABEL(ftrace_caller_op_ptr, SYM_L_GLOBAL) movq $0, CS(%rsp) SYM_INNER_LABEL(ftrace_call, SYM_L_GLOBAL) + ANNOTATE_NOENDBR call ftrace_stub /* Handlers can change the RIP */ @@ -169,6 +171,7 @@ SYM_INNER_LABEL(ftrace_call, SYM_L_GLOBAL) * layout here. */ SYM_INNER_LABEL(ftrace_caller_end, SYM_L_GLOBAL) + ANNOTATE_NOENDBR jmp ftrace_epilogue SYM_FUNC_END(ftrace_caller); @@ -176,10 +179,10 @@ SYM_FUNC_END(ftrace_caller); SYM_FUNC_START(ftrace_epilogue) /* * This is weak to keep gas from relaxing the jumps. - * It is also used to copy the RET for trampolines. */ SYM_INNER_LABEL_ALIGN(ftrace_stub, SYM_L_WEAK) UNWIND_HINT_FUNC + ENDBR RET SYM_FUNC_END(ftrace_epilogue) @@ -192,6 +195,7 @@ SYM_FUNC_START(ftrace_regs_caller) /* save_mcount_regs fills in first two parameters */ SYM_INNER_LABEL(ftrace_regs_caller_op_ptr, SYM_L_GLOBAL) + ANNOTATE_NOENDBR /* Load the ftrace_ops into the 3rd parameter */ movq function_trace_op(%rip), %rdx @@ -221,6 +225,7 @@ SYM_INNER_LABEL(ftrace_regs_caller_op_ptr, SYM_L_GLOBAL) leaq (%rsp), %rcx SYM_INNER_LABEL(ftrace_regs_call, SYM_L_GLOBAL) + ANNOTATE_NOENDBR call ftrace_stub /* Copy flags back to SS, to restore them */ @@ -248,6 +253,7 @@ SYM_INNER_LABEL(ftrace_regs_call, SYM_L_GLOBAL) */ testq %rax, %rax SYM_INNER_LABEL(ftrace_regs_caller_jmp, SYM_L_GLOBAL) + ANNOTATE_NOENDBR jnz 1f restore_mcount_regs @@ -261,6 +267,7 @@ SYM_INNER_LABEL(ftrace_regs_caller_jmp, SYM_L_GLOBAL) * to the return. */ SYM_INNER_LABEL(ftrace_regs_caller_end, SYM_L_GLOBAL) + ANNOTATE_NOENDBR jmp ftrace_epilogue /* Swap the flags with orig_rax */ @@ -284,6 +291,7 @@ SYM_FUNC_START(__fentry__) jnz trace SYM_INNER_LABEL(ftrace_stub, SYM_L_GLOBAL) + ENDBR RET trace: @@ -307,7 +315,7 @@ EXPORT_SYMBOL(__fentry__) #ifdef CONFIG_FUNCTION_GRAPH_TRACER SYM_FUNC_START(return_to_handler) - subq $24, %rsp + subq $16, %rsp /* Save the return values */ movq %rax, (%rsp) @@ -319,7 +327,19 @@ SYM_FUNC_START(return_to_handler) movq %rax, %rdi movq 8(%rsp), %rdx movq (%rsp), %rax - addq $24, %rsp - JMP_NOSPEC rdi + + addq $16, %rsp + /* + * Jump back to the old return address. This cannot be JMP_NOSPEC rdi + * since IBT would demand that contain ENDBR, which simply isn't so for + * return addresses. Use a retpoline here to keep the RSB balanced. + */ + ANNOTATE_INTRA_FUNCTION_CALL + call .Ldo_rop + int3 +.Ldo_rop: + mov %rdi, (%rsp) + UNWIND_HINT_FUNC + RET SYM_FUNC_END(return_to_handler) #endif diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index 9c63fc5988cd..b8e3019547a5 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S @@ -99,6 +99,7 @@ SYM_CODE_END(startup_64) SYM_CODE_START(secondary_startup_64) UNWIND_HINT_EMPTY + ANNOTATE_NOENDBR /* * At this point the CPU runs in 64bit mode CS.L = 1 CS.D = 0, * and someone has loaded a mapped page table. @@ -127,6 +128,7 @@ SYM_CODE_START(secondary_startup_64) */ SYM_INNER_LABEL(secondary_startup_64_no_verify, SYM_L_GLOBAL) UNWIND_HINT_EMPTY + ANNOTATE_NOENDBR /* * Retrieve the modifier (SME encryption mask if SME is active) to be @@ -192,6 +194,7 @@ SYM_INNER_LABEL(secondary_startup_64_no_verify, SYM_L_GLOBAL) jmp *%rax 1: UNWIND_HINT_EMPTY + ANNOTATE_NOENDBR // above /* * We must switch to a new descriptor in kernel space for the GDT @@ -299,6 +302,7 @@ SYM_INNER_LABEL(secondary_startup_64_no_verify, SYM_L_GLOBAL) pushq %rax # target address in negative space lretq .Lafter_lret: + ANNOTATE_NOENDBR SYM_CODE_END(secondary_startup_64) #include "verify_cpu.S" @@ -328,6 +332,7 @@ SYM_CODE_END(start_cpu0) */ SYM_CODE_START_NOALIGN(vc_boot_ghcb) UNWIND_HINT_IRET_REGS offset=8 + ENDBR /* Build pt_regs */ PUSH_AND_CLEAR_REGS @@ -345,7 +350,6 @@ SYM_CODE_START_NOALIGN(vc_boot_ghcb) /* Remove Error Code */ addq $8, %rsp - /* Pure iret required here - don't use INTERRUPT_RETURN */ iretq SYM_CODE_END(vc_boot_ghcb) #endif @@ -372,9 +376,11 @@ SYM_CODE_START(early_idt_handler_array) .rept NUM_EXCEPTION_VECTORS .if ((EXCEPTION_ERRCODE_MASK >> i) & 1) == 0 UNWIND_HINT_IRET_REGS + ENDBR pushq $0 # Dummy error code, to make stack frame uniform .else UNWIND_HINT_IRET_REGS offset=8 + ENDBR .endif pushq $i # 72(%rsp) Vector number jmp early_idt_handler_common @@ -382,10 +388,11 @@ SYM_CODE_START(early_idt_handler_array) i = i + 1 .fill early_idt_handler_array + i*EARLY_IDT_HANDLER_SIZE - ., 1, 0xcc .endr - UNWIND_HINT_IRET_REGS offset=16 SYM_CODE_END(early_idt_handler_array) + ANNOTATE_NOENDBR // early_idt_handler_array[NUM_EXCEPTION_VECTORS] SYM_CODE_START_LOCAL(early_idt_handler_common) + UNWIND_HINT_IRET_REGS offset=16 /* * The stack is the hardware frame, an error code or zero, and the * vector number. @@ -426,11 +433,14 @@ SYM_CODE_END(early_idt_handler_common) * early_idt_handler_array can't be used because it returns via the * paravirtualized INTERRUPT_RETURN and pv-ops don't work that early. * + * XXX it does, fix this. + * * This handler will end up in the .init.text section and not be * available to boot secondary CPUs. */ SYM_CODE_START_NOALIGN(vc_no_ghcb) UNWIND_HINT_IRET_REGS offset=8 + ENDBR /* Build pt_regs */ PUSH_AND_CLEAR_REGS diff --git a/arch/x86/kernel/idt.c b/arch/x86/kernel/idt.c index df0fa695bb09..608eb63bf044 100644 --- a/arch/x86/kernel/idt.c +++ b/arch/x86/kernel/idt.c @@ -10,6 +10,7 @@ #include <asm/proto.h> #include <asm/desc.h> #include <asm/hw_irq.h> +#include <asm/idtentry.h> #define DPL0 0x0 #define DPL3 0x3 @@ -103,6 +104,10 @@ static const __initconst struct idt_data def_idts[] = { ISTG(X86_TRAP_MC, asm_exc_machine_check, IST_INDEX_MCE), #endif +#ifdef CONFIG_X86_KERNEL_IBT + INTG(X86_TRAP_CP, asm_exc_control_protection), +#endif + #ifdef CONFIG_AMD_MEM_ENCRYPT ISTG(X86_TRAP_VC, asm_exc_vmm_communication, IST_INDEX_VC), #endif @@ -272,7 +277,7 @@ void __init idt_setup_apic_and_irq_gates(void) idt_setup_from_table(idt_table, apic_idts, ARRAY_SIZE(apic_idts), true); for_each_clear_bit_from(i, system_vectors, FIRST_SYSTEM_VECTOR) { - entry = irq_entries_start + 8 * (i - FIRST_EXTERNAL_VECTOR); + entry = irq_entries_start + IDT_ALIGN * (i - FIRST_EXTERNAL_VECTOR); set_intr_gate(i, entry); } @@ -283,7 +288,7 @@ void __init idt_setup_apic_and_irq_gates(void) * system_vectors bitmap. Otherwise they show up in * /proc/interrupts. */ - entry = spurious_entries_start + 8 * (i - FIRST_SYSTEM_VECTOR); + entry = spurious_entries_start + IDT_ALIGN * (i - FIRST_SYSTEM_VECTOR); set_intr_gate(i, entry); } #endif diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c index 6290712cb36d..8ef933c03afa 100644 --- a/arch/x86/kernel/kprobes/core.c +++ b/arch/x86/kernel/kprobes/core.c @@ -52,6 +52,7 @@ #include <asm/insn.h> #include <asm/debugreg.h> #include <asm/set_memory.h> +#include <asm/ibt.h> #include "common.h" @@ -193,17 +194,10 @@ static unsigned long __recover_probed_insn(kprobe_opcode_t *buf, unsigned long addr) { struct kprobe *kp; - unsigned long faddr; + bool faddr; kp = get_kprobe((void *)addr); - faddr = ftrace_location(addr); - /* - * Addresses inside the ftrace location are refused by - * arch_check_ftrace_location(). Something went terribly wrong - * if such an address is checked here. - */ - if (WARN_ON(faddr && faddr != addr)) - return 0UL; + faddr = ftrace_location(addr) == addr; /* * Use the current code if it is not modified by Kprobe * and it cannot be modified by ftrace. @@ -301,6 +295,22 @@ static int can_probe(unsigned long paddr) return (addr == paddr); } +/* If x86 supports IBT (ENDBR) it must be skipped. */ +kprobe_opcode_t *arch_adjust_kprobe_addr(unsigned long addr, unsigned long offset, + bool *on_func_entry) +{ + if (is_endbr(*(u32 *)addr)) { + *on_func_entry = !offset || offset == 4; + if (*on_func_entry) + offset = 4; + + } else { + *on_func_entry = !offset; + } + + return (kprobe_opcode_t *)(addr + offset); +} + /* * Copy an instruction with recovering modified instruction by kprobes * and adjust the displacement if the instruction uses the %rip-relative @@ -1023,6 +1033,7 @@ asm( ".type __kretprobe_trampoline, @function\n" "__kretprobe_trampoline:\n" #ifdef CONFIG_X86_64 + ANNOTATE_NOENDBR /* Push a fake return address to tell the unwinder it's a kretprobe. */ " pushq $__kretprobe_trampoline\n" UNWIND_HINT_FUNC diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index d77481ecb0d5..79e0b8d63ffa 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c @@ -1029,10 +1029,11 @@ asm( ".global __raw_callee_save___kvm_vcpu_is_preempted;" ".type __raw_callee_save___kvm_vcpu_is_preempted, @function;" "__raw_callee_save___kvm_vcpu_is_preempted:" +ASM_ENDBR "movq __per_cpu_offset(,%rdi,8), %rax;" "cmpb $0, " __stringify(KVM_STEAL_TIME_preempted) "+steal_time(%rax);" "setne %al;" -"ret;" +ASM_RET ".size __raw_callee_save___kvm_vcpu_is_preempted, .-__raw_callee_save___kvm_vcpu_is_preempted;" ".popsection"); diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c index f5da4a18070a..566bb8e17149 100644 --- a/arch/x86/kernel/machine_kexec_64.c +++ b/arch/x86/kernel/machine_kexec_64.c @@ -27,6 +27,7 @@ #include <asm/kexec-bzimage64.h> #include <asm/setup.h> #include <asm/set_memory.h> +#include <asm/cpu.h> #ifdef CONFIG_ACPI /* @@ -310,6 +311,7 @@ void machine_kexec(struct kimage *image) /* Interrupts aren't acceptable while we reboot */ local_irq_disable(); hw_breakpoint_disable(); + cet_disable(); if (image->preserve_context) { #ifdef CONFIG_X86_IO_APIC @@ -325,7 +327,7 @@ void machine_kexec(struct kimage *image) } control_page = page_address(image->control_code_page) + PAGE_SIZE; - memcpy(control_page, relocate_kernel, KEXEC_CONTROL_CODE_MAX_SIZE); + __memcpy(control_page, relocate_kernel, KEXEC_CONTROL_CODE_MAX_SIZE); page_list[PA_CONTROL_PAGE] = virt_to_phys(control_page); page_list[VA_CONTROL_PAGE] = (unsigned long)control_page; diff --git a/arch/x86/kernel/module.c b/arch/x86/kernel/module.c index 504ea65987e8..b98ffcf4d250 100644 --- a/arch/x86/kernel/module.c +++ b/arch/x86/kernel/module.c @@ -253,7 +253,7 @@ int module_finalize(const Elf_Ehdr *hdr, { const Elf_Shdr *s, *text = NULL, *alt = NULL, *locks = NULL, *para = NULL, *orc = NULL, *orc_ip = NULL, - *retpolines = NULL; + *retpolines = NULL, *ibt_endbr = NULL; char *secstrings = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset; for (s = sechdrs; s < sechdrs + hdr->e_shnum; s++) { @@ -271,6 +271,8 @@ int module_finalize(const Elf_Ehdr *hdr, orc_ip = s; if (!strcmp(".retpoline_sites", secstrings + s->sh_name)) retpolines = s; + if (!strcmp(".ibt_endbr_seal", secstrings + s->sh_name)) + ibt_endbr = s; } /* @@ -290,6 +292,10 @@ int module_finalize(const Elf_Ehdr *hdr, void *aseg = (void *)alt->sh_addr; apply_alternatives(aseg, aseg + alt->sh_size); } + if (ibt_endbr) { + void *iseg = (void *)ibt_endbr->sh_addr; + apply_ibt_endbr(iseg, iseg + ibt_endbr->sh_size); + } if (locks && text) { void *lseg = (void *)locks->sh_addr; void *tseg = (void *)text->sh_addr; diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index 4420499f7bb4..7ca2d46c08cc 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c @@ -41,6 +41,7 @@ extern void _paravirt_nop(void); asm (".pushsection .entry.text, \"ax\"\n" ".global _paravirt_nop\n" "_paravirt_nop:\n\t" + ASM_ENDBR ASM_RET ".size _paravirt_nop, . - _paravirt_nop\n\t" ".type _paravirt_nop, @function\n\t" @@ -50,6 +51,7 @@ asm (".pushsection .entry.text, \"ax\"\n" asm (".pushsection .entry.text, \"ax\"\n" ".global paravirt_ret0\n" "paravirt_ret0:\n\t" + ASM_ENDBR "xor %" _ASM_AX ", %" _ASM_AX ";\n\t" ASM_RET ".size paravirt_ret0, . - paravirt_ret0\n\t" @@ -69,29 +71,12 @@ noinstr void paravirt_BUG(void) BUG(); } -struct branch { - unsigned char opcode; - u32 delta; -} __attribute__((packed)); - static unsigned paravirt_patch_call(void *insn_buff, const void *target, unsigned long addr, unsigned len) { - const int call_len = 5; - struct branch *b = insn_buff; - unsigned long delta = (unsigned long)target - (addr+call_len); - - if (len < call_len) { - pr_warn("paravirt: Failed to patch indirect CALL at %ps\n", (void *)addr); - /* Kernel might not be viable if patching fails, bail out: */ - BUG_ON(1); - } - - b->opcode = 0xe8; /* call */ - b->delta = delta; - BUILD_BUG_ON(sizeof(*b) != call_len); - - return call_len; + __text_gen_insn(insn_buff, CALL_INSN_OPCODE, + (void *)addr, target, CALL_INSN_SIZE); + return CALL_INSN_SIZE; } #ifdef CONFIG_PARAVIRT_XXL @@ -149,8 +134,6 @@ void paravirt_set_sched_clock(u64 (*func)(void)) } /* These are in entry.S */ -extern void native_iret(void); - static struct resource reserve_ioports = { .start = 0, .end = IO_SPACE_LIMIT, @@ -414,8 +397,6 @@ struct paravirt_patch_template pv_ops = { #ifdef CONFIG_PARAVIRT_XXL NOKPROBE_SYMBOL(native_load_idt); - -void (*paravirt_iret)(void) = native_iret; #endif EXPORT_SYMBOL(pv_ops); diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index e131d71b3cae..b370767f5b19 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -747,7 +747,7 @@ bool xen_set_default_idle(void) } #endif -void stop_this_cpu(void *dummy) +void __noreturn stop_this_cpu(void *dummy) { local_irq_disable(); /* diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 3402edec236c..e459253649be 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -681,7 +681,7 @@ void set_personality_64bit(void) static void __set_personality_x32(void) { -#ifdef CONFIG_X86_X32 +#ifdef CONFIG_X86_X32_ABI if (current->mm) current->mm->context.flags = 0; diff --git a/arch/x86/kernel/relocate_kernel_64.S b/arch/x86/kernel/relocate_kernel_64.S index 399f075ccdc4..c1d8626c53b6 100644 --- a/arch/x86/kernel/relocate_kernel_64.S +++ b/arch/x86/kernel/relocate_kernel_64.S @@ -42,6 +42,7 @@ .code64 SYM_CODE_START_NOALIGN(relocate_kernel) UNWIND_HINT_EMPTY + ANNOTATE_NOENDBR /* * %rdi indirection_page * %rsi page_list @@ -115,6 +116,14 @@ SYM_CODE_START_LOCAL_NOALIGN(identity_mapped) pushq %rdx /* + * Clear X86_CR4_CET (if it was set) such that we can clear CR0_WP + * below. + */ + movq %cr4, %rax + andq $~(X86_CR4_CET), %rax + movq %rax, %cr4 + + /* * Set cr0 to a known state: * - Paging enabled * - Alignment check disabled @@ -215,6 +224,7 @@ SYM_CODE_END(identity_mapped) SYM_CODE_START_LOCAL_NOALIGN(virtual_mapped) UNWIND_HINT_EMPTY + ANNOTATE_NOENDBR // RET target, above movq RSP(%r8), %rsp movq CR4(%r8), %rax movq %rax, %cr4 diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 2e37862e3a8c..1563fb995005 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -210,6 +210,81 @@ DEFINE_IDTENTRY(exc_overflow) do_error_trap(regs, 0, "overflow", X86_TRAP_OF, SIGSEGV, 0, NULL); } +#ifdef CONFIG_X86_KERNEL_IBT + +static __ro_after_init bool ibt_fatal = true; + +extern void ibt_selftest_ip(void); /* code label defined in asm below */ + +enum cp_error_code { + CP_EC = (1 << 15) - 1, + + CP_RET = 1, + CP_IRET = 2, + CP_ENDBR = 3, + CP_RSTRORSSP = 4, + CP_SETSSBSY = 5, + + CP_ENCL = 1 << 15, +}; + +DEFINE_IDTENTRY_ERRORCODE(exc_control_protection) +{ + if (!cpu_feature_enabled(X86_FEATURE_IBT)) { + pr_err("Unexpected #CP\n"); + BUG(); + } + + if (WARN_ON_ONCE(user_mode(regs) || (error_code & CP_EC) != CP_ENDBR)) + return; + + if (unlikely(regs->ip == (unsigned long)&ibt_selftest_ip)) { + regs->ax = 0; + return; + } + + pr_err("Missing ENDBR: %pS\n", (void *)instruction_pointer(regs)); + if (!ibt_fatal) { + printk(KERN_DEFAULT CUT_HERE); + __warn(__FILE__, __LINE__, (void *)regs->ip, TAINT_WARN, regs, NULL); + return; + } + BUG(); +} + +/* Must be noinline to ensure uniqueness of ibt_selftest_ip. */ +noinline bool ibt_selftest(void) +{ + unsigned long ret; + + asm (" lea ibt_selftest_ip(%%rip), %%rax\n\t" + ANNOTATE_RETPOLINE_SAFE + " jmp *%%rax\n\t" + "ibt_selftest_ip:\n\t" + UNWIND_HINT_FUNC + ANNOTATE_NOENDBR + " nop\n\t" + + : "=a" (ret) : : "memory"); + + return !ret; +} + +static int __init ibt_setup(char *str) +{ + if (!strcmp(str, "off")) + setup_clear_cpu_cap(X86_FEATURE_IBT); + + if (!strcmp(str, "warn")) + ibt_fatal = false; + + return 1; +} + +__setup("ibt=", ibt_setup); + +#endif /* CONFIG_X86_KERNEL_IBT */ + #ifdef CONFIG_X86_F00F_BUG void handle_invalid_op(struct pt_regs *regs) #else diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S index 27f830345b6f..7fda7f27e762 100644 --- a/arch/x86/kernel/vmlinux.lds.S +++ b/arch/x86/kernel/vmlinux.lds.S @@ -285,6 +285,15 @@ SECTIONS } #endif +#ifdef CONFIG_X86_KERNEL_IBT + . = ALIGN(8); + .ibt_endbr_seal : AT(ADDR(.ibt_endbr_seal) - LOAD_OFFSET) { + __ibt_endbr_seal = .; + *(.ibt_endbr_seal) + __ibt_endbr_seal_end = .; + } +#endif + /* * struct alt_inst entries. From the header (alternative.h): * "Alternative instructions for different CPU types or capabilities" diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 7ba4ec77feeb..f9c00c89829b 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -24,6 +24,7 @@ #include <linux/stringify.h> #include <asm/debugreg.h> #include <asm/nospec-branch.h> +#include <asm/ibt.h> #include "x86.h" #include "tss.h" @@ -189,7 +190,7 @@ #define X16(x...) X8(x), X8(x) #define NR_FASTOP (ilog2(sizeof(ulong)) + 1) -#define FASTOP_SIZE 8 +#define FASTOP_SIZE (8 * (1 + HAS_KERNEL_IBT)) struct opcode { u64 flags; @@ -311,7 +312,8 @@ static int fastop(struct x86_emulate_ctxt *ctxt, fastop_t fop); #define __FOP_FUNC(name) \ ".align " __stringify(FASTOP_SIZE) " \n\t" \ ".type " name ", @function \n\t" \ - name ":\n\t" + name ":\n\t" \ + ASM_ENDBR #define FOP_FUNC(name) \ __FOP_FUNC(#name) @@ -433,21 +435,23 @@ static int fastop(struct x86_emulate_ctxt *ctxt, fastop_t fop); /* * Depending on .config the SETcc functions look like: * + * ENDBR [4 bytes; CONFIG_X86_KERNEL_IBT] * SETcc %al [3 bytes] * RET [1 byte] * INT3 [1 byte; CONFIG_SLS] * - * Which gives possible sizes 4 or 5. When rounded up to the - * next power-of-two alignment they become 4 or 8. + * Which gives possible sizes 4, 5, 8 or 9. When rounded up to the + * next power-of-two alignment they become 4, 8 or 16 resp. */ -#define SETCC_LENGTH (4 + IS_ENABLED(CONFIG_SLS)) -#define SETCC_ALIGN (4 << IS_ENABLED(CONFIG_SLS)) +#define SETCC_LENGTH (ENDBR_INSN_SIZE + 4 + IS_ENABLED(CONFIG_SLS)) +#define SETCC_ALIGN (4 << IS_ENABLED(CONFIG_SLS) << HAS_KERNEL_IBT) static_assert(SETCC_LENGTH <= SETCC_ALIGN); #define FOP_SETCC(op) \ ".align " __stringify(SETCC_ALIGN) " \n\t" \ ".type " #op ", @function \n\t" \ #op ": \n\t" \ + ASM_ENDBR \ #op " %al \n\t" \ __FOP_RET(#op) diff --git a/arch/x86/lib/error-inject.c b/arch/x86/lib/error-inject.c index 520897061ee0..1e3de0769b81 100644 --- a/arch/x86/lib/error-inject.c +++ b/arch/x86/lib/error-inject.c @@ -3,6 +3,7 @@ #include <linux/linkage.h> #include <linux/error-injection.h> #include <linux/kprobes.h> +#include <linux/objtool.h> asmlinkage void just_return_func(void); @@ -11,6 +12,7 @@ asm( ".type just_return_func, @function\n" ".globl just_return_func\n" "just_return_func:\n" + ANNOTATE_NOENDBR ASM_RET ".size just_return_func, .-just_return_func\n" ); diff --git a/arch/x86/lib/retpoline.S b/arch/x86/lib/retpoline.S index afbdda539b80..5f87bab4fb8d 100644 --- a/arch/x86/lib/retpoline.S +++ b/arch/x86/lib/retpoline.S @@ -55,6 +55,7 @@ SYM_INNER_LABEL(__x86_indirect_thunk_\reg, SYM_L_GLOBAL) .align RETPOLINE_THUNK_SIZE SYM_CODE_START(__x86_indirect_thunk_array) + ANNOTATE_NOENDBR // apply_retpolines #define GEN(reg) THUNK reg #include <asm/GEN-for-each-reg.h> diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index 6efbb87f65ed..8fe35ed11fd6 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -46,6 +46,12 @@ static u8 *emit_code(u8 *ptr, u32 bytes, unsigned int len) #define EMIT4_off32(b1, b2, b3, b4, off) \ do { EMIT4(b1, b2, b3, b4); EMIT(off, 4); } while (0) +#ifdef CONFIG_X86_KERNEL_IBT +#define EMIT_ENDBR() EMIT(gen_endbr(), 4) +#else +#define EMIT_ENDBR() +#endif + static bool is_imm8(int value) { return value <= 127 && value >= -128; @@ -241,7 +247,7 @@ struct jit_context { /* Number of bytes emit_patch() needs to generate instructions */ #define X86_PATCH_SIZE 5 /* Number of bytes that will be skipped on tailcall */ -#define X86_TAIL_CALL_OFFSET 11 +#define X86_TAIL_CALL_OFFSET (11 + ENDBR_INSN_SIZE) static void push_callee_regs(u8 **pprog, bool *callee_regs_used) { @@ -286,6 +292,7 @@ static void emit_prologue(u8 **pprog, u32 stack_depth, bool ebpf_from_cbpf, /* BPF trampoline can be made to work without these nops, * but let's waste 5 bytes for now and optimize later */ + EMIT_ENDBR(); memcpy(prog, x86_nops[5], X86_PATCH_SIZE); prog += X86_PATCH_SIZE; if (!ebpf_from_cbpf) { @@ -296,6 +303,10 @@ static void emit_prologue(u8 **pprog, u32 stack_depth, bool ebpf_from_cbpf, } EMIT1(0x55); /* push rbp */ EMIT3(0x48, 0x89, 0xE5); /* mov rbp, rsp */ + + /* X86_TAIL_CALL_OFFSET is here */ + EMIT_ENDBR(); + /* sub rsp, rounded_stack_depth */ if (stack_depth) EMIT3_off32(0x48, 0x81, 0xEC, round_up(stack_depth, 8)); @@ -380,6 +391,13 @@ int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type t, /* BPF poking in modules is not supported */ return -EINVAL; + /* + * See emit_prologue(), for IBT builds the trampoline hook is preceded + * with an ENDBR instruction. + */ + if (is_endbr(*(u32 *)ip)) + ip += ENDBR_INSN_SIZE; + return __bpf_arch_text_poke(ip, t, old_addr, new_addr); } @@ -2013,14 +2031,18 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i ip_off = stack_size; - if (flags & BPF_TRAMP_F_SKIP_FRAME) + if (flags & BPF_TRAMP_F_SKIP_FRAME) { /* skip patched call instruction and point orig_call to actual * body of the kernel function. */ + if (is_endbr(*(u32 *)orig_call)) + orig_call += ENDBR_INSN_SIZE; orig_call += X86_PATCH_SIZE; + } prog = image; + EMIT_ENDBR(); EMIT1(0x55); /* push rbp */ EMIT3(0x48, 0x89, 0xE5); /* mov rbp, rsp */ EMIT4(0x48, 0x83, 0xEC, stack_size); /* sub rsp, stack_size */ diff --git a/arch/x86/platform/efi/Makefile b/arch/x86/platform/efi/Makefile index 84b09c230cbd..a50245157685 100644 --- a/arch/x86/platform/efi/Makefile +++ b/arch/x86/platform/efi/Makefile @@ -1,5 +1,4 @@ # SPDX-License-Identifier: GPL-2.0 -OBJECT_FILES_NON_STANDARD_efi_thunk_$(BITS).o := y KASAN_SANITIZE := n GCOV_PROFILE := n diff --git a/arch/x86/platform/efi/efi_thunk_64.S b/arch/x86/platform/efi/efi_thunk_64.S index 25799d768624..854dd81804b7 100644 --- a/arch/x86/platform/efi/efi_thunk_64.S +++ b/arch/x86/platform/efi/efi_thunk_64.S @@ -20,12 +20,14 @@ */ #include <linux/linkage.h> +#include <linux/objtool.h> #include <asm/page_types.h> #include <asm/segment.h> .text .code64 -SYM_CODE_START(__efi64_thunk) +SYM_FUNC_START(__efi64_thunk) +STACK_FRAME_NON_STANDARD __efi64_thunk push %rbp push %rbx @@ -79,7 +81,7 @@ SYM_CODE_START(__efi64_thunk) 2: pushl $__KERNEL_CS pushl %ebp lret -SYM_CODE_END(__efi64_thunk) +SYM_FUNC_END(__efi64_thunk) .bss .balign 8 diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c index d47c3d176ae4..5038edb79ad5 100644 --- a/arch/x86/xen/enlighten_pv.c +++ b/arch/x86/xen/enlighten_pv.c @@ -624,6 +624,9 @@ static struct trap_array_entry trap_array[] = { TRAP_ENTRY(exc_coprocessor_error, false ), TRAP_ENTRY(exc_alignment_check, false ), TRAP_ENTRY(exc_simd_coprocessor_error, false ), +#ifdef CONFIG_X86_KERNEL_IBT + TRAP_ENTRY(exc_control_protection, false ), +#endif }; static bool __ref get_trap_addr(void **addr, unsigned int ist) @@ -1177,6 +1180,8 @@ static void __init xen_domu_set_legacy_features(void) x86_platform.legacy.rtc = 0; } +extern void early_xen_iret_patch(void); + /* First C function to be called on Xen boot */ asmlinkage __visible void __init xen_start_kernel(void) { @@ -1187,6 +1192,10 @@ asmlinkage __visible void __init xen_start_kernel(void) if (!xen_start_info) return; + __text_gen_insn(&early_xen_iret_patch, + JMP32_INSN_OPCODE, &early_xen_iret_patch, &xen_iret, + JMP32_INSN_SIZE); + xen_domain_type = XEN_PV_DOMAIN; xen_start_flags = xen_start_info->flags; @@ -1195,7 +1204,6 @@ asmlinkage __visible void __init xen_start_kernel(void) /* Install Xen paravirt ops */ pv_info = xen_info; pv_ops.cpu = xen_cpu_ops.cpu; - paravirt_iret = xen_iret; xen_init_irq_ops(); /* diff --git a/arch/x86/xen/xen-asm.S b/arch/x86/xen/xen-asm.S index e730e6200e64..caa9bc2fa100 100644 --- a/arch/x86/xen/xen-asm.S +++ b/arch/x86/xen/xen-asm.S @@ -122,6 +122,7 @@ SYM_FUNC_END(xen_read_cr2_direct); .macro xen_pv_trap name SYM_CODE_START(xen_\name) UNWIND_HINT_EMPTY + ENDBR pop %rcx pop %r11 jmp \name @@ -147,6 +148,9 @@ xen_pv_trap asm_exc_page_fault xen_pv_trap asm_exc_spurious_interrupt_bug xen_pv_trap asm_exc_coprocessor_error xen_pv_trap asm_exc_alignment_check +#ifdef CONFIG_X86_KERNEL_IBT +xen_pv_trap asm_exc_control_protection +#endif #ifdef CONFIG_X86_MCE xen_pv_trap asm_xenpv_exc_machine_check #endif /* CONFIG_X86_MCE */ @@ -162,6 +166,7 @@ SYM_CODE_START(xen_early_idt_handler_array) i = 0 .rept NUM_EXCEPTION_VECTORS UNWIND_HINT_EMPTY + ENDBR pop %rcx pop %r11 jmp early_idt_handler_array + i*EARLY_IDT_HANDLER_SIZE @@ -189,6 +194,7 @@ hypercall_iret = hypercall_page + __HYPERVISOR_iret * 32 */ SYM_CODE_START(xen_iret) UNWIND_HINT_EMPTY + ANNOTATE_NOENDBR pushq $0 jmp hypercall_iret SYM_CODE_END(xen_iret) @@ -230,6 +236,7 @@ SYM_CODE_END(xenpv_restore_regs_and_return_to_usermode) /* Normal 64-bit system call target */ SYM_CODE_START(xen_syscall_target) UNWIND_HINT_EMPTY + ENDBR popq %rcx popq %r11 @@ -249,6 +256,7 @@ SYM_CODE_END(xen_syscall_target) /* 32-bit compat syscall target */ SYM_CODE_START(xen_syscall32_target) UNWIND_HINT_EMPTY + ENDBR popq %rcx popq %r11 @@ -266,6 +274,7 @@ SYM_CODE_END(xen_syscall32_target) /* 32-bit compat sysenter target */ SYM_CODE_START(xen_sysenter_target) UNWIND_HINT_EMPTY + ENDBR /* * NB: Xen is polite and clears TF from EFLAGS for us. This means * that we don't need to guard against single step exceptions here. @@ -289,6 +298,7 @@ SYM_CODE_END(xen_sysenter_target) SYM_CODE_START(xen_syscall32_target) SYM_CODE_START(xen_sysenter_target) UNWIND_HINT_EMPTY + ENDBR lea 16(%rsp), %rsp /* strip %rcx, %r11 */ mov $-ENOSYS, %rax pushq $0 diff --git a/arch/x86/xen/xen-head.S b/arch/x86/xen/xen-head.S index 11d286529fe5..ac17196e2518 100644 --- a/arch/x86/xen/xen-head.S +++ b/arch/x86/xen/xen-head.S @@ -25,8 +25,12 @@ SYM_CODE_START(hypercall_page) .rept (PAGE_SIZE / 32) UNWIND_HINT_FUNC - .skip 31, 0x90 - RET + ANNOTATE_NOENDBR + ret + /* + * Xen will write the hypercall page, and sort out ENDBR. + */ + .skip 31, 0xcc .endr #define HYPERCALL(n) \ @@ -74,6 +78,7 @@ SYM_CODE_END(startup_xen) .pushsection .text SYM_CODE_START(asm_cpu_bringup_and_idle) UNWIND_HINT_EMPTY + ENDBR call cpu_bringup_and_idle SYM_CODE_END(asm_cpu_bringup_and_idle) |