diff options
-rw-r--r-- | Documentation/admin-guide/kernel-parameters.txt | 7 | ||||
-rw-r--r-- | arch/arm64/Kconfig | 5 | ||||
-rw-r--r-- | arch/arm64/include/asm/exception.h | 1 | ||||
-rw-r--r-- | arch/arm64/include/asm/mte.h | 5 | ||||
-rw-r--r-- | arch/arm64/include/asm/uaccess.h | 22 | ||||
-rw-r--r-- | arch/arm64/kernel/Makefile | 1 | ||||
-rw-r--r-- | arch/arm64/kernel/compat_alignment.c | 387 | ||||
-rw-r--r-- | arch/arm64/kernel/cpufeature.c | 3 | ||||
-rw-r--r-- | arch/arm64/kernel/irq.c | 14 | ||||
-rw-r--r-- | arch/arm64/kernel/mte.c | 51 | ||||
-rw-r--r-- | arch/arm64/kernel/probes/kprobes.c | 27 | ||||
-rw-r--r-- | arch/arm64/kernel/proton-pack.c | 10 | ||||
-rw-r--r-- | arch/arm64/kernel/reloc_test_core.c | 4 | ||||
-rw-r--r-- | arch/arm64/kernel/suspend.c | 2 | ||||
-rw-r--r-- | arch/arm64/mm/dma-mapping.c | 2 | ||||
-rw-r--r-- | arch/arm64/mm/fault.c | 3 | ||||
-rw-r--r-- | arch/arm64/mm/mmu.c | 21 | ||||
-rw-r--r-- | arch/arm64/mm/pageattr.c | 8 | ||||
-rw-r--r-- | arch/arm64/mm/proc.S | 46 |
19 files changed, 539 insertions, 80 deletions
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 426fa892d311..dc3a939da2c0 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -3207,6 +3207,7 @@ spectre_v2_user=off [X86] spec_store_bypass_disable=off [X86,PPC] ssbd=force-off [ARM64] + nospectre_bhb [ARM64] l1tf=off [X86] mds=off [X86] tsx_async_abort=off [X86] @@ -3613,7 +3614,7 @@ nohugeiomap [KNL,X86,PPC,ARM64] Disable kernel huge I/O mappings. - nohugevmalloc [PPC] Disable kernel huge vmalloc mappings. + nohugevmalloc [KNL,X86,PPC,ARM64] Disable kernel huge vmalloc mappings. nosmt [KNL,S390] Disable symmetric multithreading (SMT). Equivalent to smt=1. @@ -3631,6 +3632,10 @@ vulnerability. System may allow data leaks with this option. + nospectre_bhb [ARM64] Disable all mitigations for Spectre-BHB (branch + history injection) vulnerability. System may allow data leaks + with this option. + nospec_store_bypass_disable [HW] Disable all mitigations for the Speculative Store Bypass vulnerability diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 526ab76cd233..1675310f1791 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -149,6 +149,7 @@ config ARM64 select HAVE_ARCH_AUDITSYSCALL select HAVE_ARCH_BITREVERSE select HAVE_ARCH_COMPILER_H + select HAVE_ARCH_HUGE_VMALLOC select HAVE_ARCH_HUGE_VMAP select HAVE_ARCH_JUMP_LABEL select HAVE_ARCH_JUMP_LABEL_RELATIVE @@ -230,6 +231,7 @@ config ARM64 select HAVE_ARCH_USERFAULTFD_MINOR if USERFAULTFD select TRACE_IRQFLAGS_SUPPORT select TRACE_IRQFLAGS_NMI_SUPPORT + select HAVE_SOFTIRQ_ON_OWN_STACK help ARM 64-bit (AArch64) Linux support. @@ -1575,6 +1577,9 @@ config THUMB2_COMPAT_VDSO Compile the compat vDSO with '-mthumb -fomit-frame-pointer' if y, otherwise with '-marm'. +config COMPAT_ALIGNMENT_FIXUPS + bool "Fix up misaligned multi-word loads and stores in user space" + menuconfig ARMV8_DEPRECATED bool "Emulate deprecated/obsolete ARMv8 instructions" depends on SYSCTL diff --git a/arch/arm64/include/asm/exception.h b/arch/arm64/include/asm/exception.h index 0278a58abe69..19713d0f013b 100644 --- a/arch/arm64/include/asm/exception.h +++ b/arch/arm64/include/asm/exception.h @@ -71,6 +71,7 @@ void do_sysinstr(unsigned long esr, struct pt_regs *regs); void do_sp_pc_abort(unsigned long addr, unsigned long esr, struct pt_regs *regs); void bad_el0_sync(struct pt_regs *regs, int reason, unsigned long esr); void do_cp15instr(unsigned long esr, struct pt_regs *regs); +int do_compat_alignment_fixup(unsigned long addr, struct pt_regs *regs); void do_el0_svc(struct pt_regs *regs); void do_el0_svc_compat(struct pt_regs *regs); void do_el0_fpac(struct pt_regs *regs, unsigned long esr); diff --git a/arch/arm64/include/asm/mte.h b/arch/arm64/include/asm/mte.h index aa523591a44e..760c62f8e22f 100644 --- a/arch/arm64/include/asm/mte.h +++ b/arch/arm64/include/asm/mte.h @@ -42,7 +42,9 @@ void mte_sync_tags(pte_t old_pte, pte_t pte); void mte_copy_page_tags(void *kto, const void *kfrom); void mte_thread_init_user(void); void mte_thread_switch(struct task_struct *next); +void mte_cpu_setup(void); void mte_suspend_enter(void); +void mte_suspend_exit(void); long set_mte_ctrl(struct task_struct *task, unsigned long arg); long get_mte_ctrl(struct task_struct *task); int mte_ptrace_copy_tags(struct task_struct *child, long request, @@ -72,6 +74,9 @@ static inline void mte_thread_switch(struct task_struct *next) static inline void mte_suspend_enter(void) { } +static inline void mte_suspend_exit(void) +{ +} static inline long set_mte_ctrl(struct task_struct *task, unsigned long arg) { return 0; diff --git a/arch/arm64/include/asm/uaccess.h b/arch/arm64/include/asm/uaccess.h index 2fc9f0861769..5c7b2f9d5913 100644 --- a/arch/arm64/include/asm/uaccess.h +++ b/arch/arm64/include/asm/uaccess.h @@ -203,9 +203,11 @@ static inline void uaccess_enable_privileged(void) } /* - * Sanitise a uaccess pointer such that it becomes NULL if above the maximum - * user address. In case the pointer is tagged (has the top byte set), untag - * the pointer before checking. + * Sanitize a uaccess pointer such that it cannot reach any kernel address. + * + * Clearing bit 55 ensures the pointer cannot address any portion of the TTBR1 + * address range (i.e. any kernel address), and either the pointer falls within + * the TTBR0 address range or must cause a fault. */ #define uaccess_mask_ptr(ptr) (__typeof__(ptr))__uaccess_mask_ptr(ptr) static inline void __user *__uaccess_mask_ptr(const void __user *ptr) @@ -213,14 +215,12 @@ static inline void __user *__uaccess_mask_ptr(const void __user *ptr) void __user *safe_ptr; asm volatile( - " bics xzr, %3, %2\n" - " csel %0, %1, xzr, eq\n" - : "=&r" (safe_ptr) - : "r" (ptr), "r" (TASK_SIZE_MAX - 1), - "r" (untagged_addr(ptr)) - : "cc"); - - csdb(); + " bic %0, %1, %2\n" + : "=r" (safe_ptr) + : "r" (ptr), + "i" (BIT(55)) + ); + return safe_ptr; } diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile index 1add7b01efa7..38a0b0291edb 100644 --- a/arch/arm64/kernel/Makefile +++ b/arch/arm64/kernel/Makefile @@ -45,6 +45,7 @@ $(obj)/%.stub.o: $(obj)/%.o FORCE obj-$(CONFIG_COMPAT) += sys32.o signal32.o \ sys_compat.o obj-$(CONFIG_COMPAT) += sigreturn32.o +obj-$(CONFIG_COMPAT_ALIGNMENT_FIXUPS) += compat_alignment.o obj-$(CONFIG_KUSER_HELPERS) += kuser32.o obj-$(CONFIG_FUNCTION_TRACER) += ftrace.o entry-ftrace.o obj-$(CONFIG_MODULES) += module.o diff --git a/arch/arm64/kernel/compat_alignment.c b/arch/arm64/kernel/compat_alignment.c new file mode 100644 index 000000000000..5edec2f49ec9 --- /dev/null +++ b/arch/arm64/kernel/compat_alignment.c @@ -0,0 +1,387 @@ +// SPDX-License-Identifier: GPL-2.0-only +// based on arch/arm/mm/alignment.c + +#include <linux/compiler.h> +#include <linux/errno.h> +#include <linux/kernel.h> +#include <linux/init.h> +#include <linux/perf_event.h> +#include <linux/uaccess.h> + +#include <asm/exception.h> +#include <asm/ptrace.h> +#include <asm/traps.h> + +/* + * 32-bit misaligned trap handler (c) 1998 San Mehat (CCC) -July 1998 + * + * Speed optimisations and better fault handling by Russell King. + */ +#define CODING_BITS(i) (i & 0x0e000000) + +#define LDST_P_BIT(i) (i & (1 << 24)) /* Preindex */ +#define LDST_U_BIT(i) (i & (1 << 23)) /* Add offset */ +#define LDST_W_BIT(i) (i & (1 << 21)) /* Writeback */ +#define LDST_L_BIT(i) (i & (1 << 20)) /* Load */ + +#define LDST_P_EQ_U(i) ((((i) ^ ((i) >> 1)) & (1 << 23)) == 0) + +#define LDSTHD_I_BIT(i) (i & (1 << 22)) /* double/half-word immed */ + +#define RN_BITS(i) ((i >> 16) & 15) /* Rn */ +#define RD_BITS(i) ((i >> 12) & 15) /* Rd */ +#define RM_BITS(i) (i & 15) /* Rm */ + +#define REGMASK_BITS(i) (i & 0xffff) + +#define BAD_INSTR 0xdeadc0de + +/* Thumb-2 32 bit format per ARMv7 DDI0406A A6.3, either f800h,e800h,f800h */ +#define IS_T32(hi16) \ + (((hi16) & 0xe000) == 0xe000 && ((hi16) & 0x1800)) + +union offset_union { + unsigned long un; + signed long sn; +}; + +#define TYPE_ERROR 0 +#define TYPE_FAULT 1 +#define TYPE_LDST 2 +#define TYPE_DONE 3 + +static void +do_alignment_finish_ldst(unsigned long addr, u32 instr, struct pt_regs *regs, + union offset_union offset) +{ + if (!LDST_U_BIT(instr)) + offset.un = -offset.un; + + if (!LDST_P_BIT(instr)) + addr += offset.un; + + if (!LDST_P_BIT(instr) || LDST_W_BIT(instr)) + regs->regs[RN_BITS(instr)] = addr; +} + +static int +do_alignment_ldrdstrd(unsigned long addr, u32 instr, struct pt_regs *regs) +{ + unsigned int rd = RD_BITS(instr); + unsigned int rd2; + int load; + + if ((instr & 0xfe000000) == 0xe8000000) { + /* ARMv7 Thumb-2 32-bit LDRD/STRD */ + rd2 = (instr >> 8) & 0xf; + load = !!(LDST_L_BIT(instr)); + } else if (((rd & 1) == 1) || (rd == 14)) { + return TYPE_ERROR; + } else { + load = ((instr & 0xf0) == 0xd0); + rd2 = rd + 1; + } + + if (load) { + unsigned int val, val2; + + if (get_user(val, (u32 __user *)addr) || + get_user(val2, (u32 __user *)(addr + 4))) + return TYPE_FAULT; + regs->regs[rd] = val; + regs->regs[rd2] = val2; + } else { + if (put_user(regs->regs[rd], (u32 __user *)addr) || + put_user(regs->regs[rd2], (u32 __user *)(addr + 4))) + return TYPE_FAULT; + } + return TYPE_LDST; +} + +/* + * LDM/STM alignment handler. + * + * There are 4 variants of this instruction: + * + * B = rn pointer before instruction, A = rn pointer after instruction + * ------ increasing address -----> + * | | r0 | r1 | ... | rx | | + * PU = 01 B A + * PU = 11 B A + * PU = 00 A B + * PU = 10 A B + */ +static int +do_alignment_ldmstm(unsigned long addr, u32 instr, struct pt_regs *regs) +{ + unsigned int rd, rn, nr_regs, regbits; + unsigned long eaddr, newaddr; + unsigned int val; + + /* count the number of registers in the mask to be transferred */ + nr_regs = hweight16(REGMASK_BITS(instr)) * 4; + + rn = RN_BITS(instr); + newaddr = eaddr = regs->regs[rn]; + + if (!LDST_U_BIT(instr)) + nr_regs = -nr_regs; + newaddr += nr_regs; + if (!LDST_U_BIT(instr)) + eaddr = newaddr; + + if (LDST_P_EQ_U(instr)) /* U = P */ + eaddr += 4; + + for (regbits = REGMASK_BITS(instr), rd = 0; regbits; + regbits >>= 1, rd += 1) + if (regbits & 1) { + if (LDST_L_BIT(instr)) { + if (get_user(val, (u32 __user *)eaddr)) + return TYPE_FAULT; + if (rd < 15) + regs->regs[rd] = val; + else + regs->pc = val; + } else { + /* + * The PC register has a bias of +8 in ARM mode + * and +4 in Thumb mode. This means that a read + * of the value of PC should account for this. + * Since Thumb does not permit STM instructions + * to refer to PC, just add 8 here. + */ + val = (rd < 15) ? regs->regs[rd] : regs->pc + 8; + if (put_user(val, (u32 __user *)eaddr)) + return TYPE_FAULT; + } + eaddr += 4; + } + + if (LDST_W_BIT(instr)) + regs->regs[rn] = newaddr; + + return TYPE_DONE; +} + +/* + * Convert Thumb multi-word load/store instruction forms to equivalent ARM + * instructions so we can reuse ARM userland alignment fault fixups for Thumb. + * + * This implementation was initially based on the algorithm found in + * gdb/sim/arm/thumbemu.c. It is basically just a code reduction of same + * to convert only Thumb ld/st instruction forms to equivalent ARM forms. + * + * NOTES: + * 1. Comments below refer to ARM ARM DDI0100E Thumb Instruction sections. + * 2. If for some reason we're passed an non-ld/st Thumb instruction to + * decode, we return 0xdeadc0de. This should never happen under normal + * circumstances but if it does, we've got other problems to deal with + * elsewhere and we obviously can't fix those problems here. + */ + +static unsigned long thumb2arm(u16 tinstr) +{ + u32 L = (tinstr & (1<<11)) >> 11; + + switch ((tinstr & 0xf800) >> 11) { + /* 6.6.1 Format 1: */ + case 0xc000 >> 11: /* 7.1.51 STMIA */ + case 0xc800 >> 11: /* 7.1.25 LDMIA */ + { + u32 Rn = (tinstr & (7<<8)) >> 8; + u32 W = ((L<<Rn) & (tinstr&255)) ? 0 : 1<<21; + + return 0xe8800000 | W | (L<<20) | (Rn<<16) | + (tinstr&255); + } + + /* 6.6.1 Format 2: */ + case 0xb000 >> 11: /* 7.1.48 PUSH */ + case 0xb800 >> 11: /* 7.1.47 POP */ + if ((tinstr & (3 << 9)) == 0x0400) { + static const u32 subset[4] = { + 0xe92d0000, /* STMDB sp!,{registers} */ + 0xe92d4000, /* STMDB sp!,{registers,lr} */ + 0xe8bd0000, /* LDMIA sp!,{registers} */ + 0xe8bd8000 /* LDMIA sp!,{registers,pc} */ + }; + return subset[(L<<1) | ((tinstr & (1<<8)) >> 8)] | + (tinstr & 255); /* register_list */ + } + fallthrough; /* for illegal instruction case */ + + default: + return BAD_INSTR; + } +} + +/* + * Convert Thumb-2 32 bit LDM, STM, LDRD, STRD to equivalent instruction + * handlable by ARM alignment handler, also find the corresponding handler, + * so that we can reuse ARM userland alignment fault fixups for Thumb. + * + * @pinstr: original Thumb-2 instruction; returns new handlable instruction + * @regs: register context. + * @poffset: return offset from faulted addr for later writeback + * + * NOTES: + * 1. Comments below refer to ARMv7 DDI0406A Thumb Instruction sections. + * 2. Register name Rt from ARMv7 is same as Rd from ARMv6 (Rd is Rt) + */ +static void * +do_alignment_t32_to_handler(u32 *pinstr, struct pt_regs *regs, + union offset_union *poffset) +{ + u32 instr = *pinstr; + u16 tinst1 = (instr >> 16) & 0xffff; + u16 tinst2 = instr & 0xffff; + + switch (tinst1 & 0xffe0) { + /* A6.3.5 Load/Store multiple */ + case 0xe880: /* STM/STMIA/STMEA,LDM/LDMIA, PUSH/POP T2 */ + case 0xe8a0: /* ...above writeback version */ + case 0xe900: /* STMDB/STMFD, LDMDB/LDMEA */ + case 0xe920: /* ...above writeback version */ + /* no need offset decision since handler calculates it */ + return do_alignment_ldmstm; + + case 0xf840: /* POP/PUSH T3 (single register) */ + if (RN_BITS(instr) == 13 && (tinst2 & 0x09ff) == 0x0904) { + u32 L = !!(LDST_L_BIT(instr)); + const u32 subset[2] = { + 0xe92d0000, /* STMDB sp!,{registers} */ + 0xe8bd0000, /* LDMIA sp!,{registers} */ + }; + *pinstr = subset[L] | (1<<RD_BITS(instr)); + return do_alignment_ldmstm; + } + /* Else fall through for illegal instruction case */ + break; + + /* A6.3.6 Load/store double, STRD/LDRD(immed, lit, reg) */ + case 0xe860: + case 0xe960: + case 0xe8e0: + case 0xe9e0: + poffset->un = (tinst2 & 0xff) << 2; + fallthrough; + + case 0xe940: + case 0xe9c0: + return do_alignment_ldrdstrd; + + /* + * No need to handle load/store instructions up to word size + * since ARMv6 and later CPUs can perform unaligned accesses. + */ + default: + break; + } + return NULL; +} + +static int alignment_get_arm(struct pt_regs *regs, __le32 __user *ip, u32 *inst) +{ + __le32 instr = 0; + int fault; + + fault = get_user(instr, ip); + if (fault) + return fault; + + *inst = __le32_to_cpu(instr); + return 0; +} + +static int alignment_get_thumb(struct pt_regs *regs, __le16 __user *ip, u16 *inst) +{ + __le16 instr = 0; + int fault; + + fault = get_user(instr, ip); + if (fault) + return fault; + + *inst = __le16_to_cpu(instr); + return 0; +} + +int do_compat_alignment_fixup(unsigned long addr, struct pt_regs *regs) +{ + union offset_union offset; + unsigned long instrptr; + int (*handler)(unsigned long addr, u32 instr, struct pt_regs *regs); + unsigned int type; + u32 instr = 0; + u16 tinstr = 0; + int isize = 4; + int thumb2_32b = 0; + int fault; + + instrptr = instruction_pointer(regs); + + if (compat_thumb_mode(regs)) { + __le16 __user *ptr = (__le16 __user *)(instrptr & ~1); + + fault = alignment_get_thumb(regs, ptr, &tinstr); + if (!fault) { + if (IS_T32(tinstr)) { + /* Thumb-2 32-bit */ + u16 tinst2; + fault = alignment_get_thumb(regs, ptr + 1, &tinst2); + instr = ((u32)tinstr << 16) | tinst2; + thumb2_32b = 1; + } else { + isize = 2; + instr = thumb2arm(tinstr); + } + } + } else { + fault = alignment_get_arm(regs, (__le32 __user *)instrptr, &instr); + } + + if (fault) + return 1; + + switch (CODING_BITS(instr)) { + case 0x00000000: /* 3.13.4 load/store instruction extensions */ + if (LDSTHD_I_BIT(instr)) + offset.un = (instr & 0xf00) >> 4 | (instr & 15); + else + offset.un = regs->regs[RM_BITS(instr)]; + + if ((instr & 0x001000f0) == 0x000000d0 || /* LDRD */ + (instr & 0x001000f0) == 0x000000f0) /* STRD */ + handler = do_alignment_ldrdstrd; + else + return 1; + break; + + case 0x08000000: /* ldm or stm, or thumb-2 32bit instruction */ + if (thumb2_32b) { + offset.un = 0; + handler = do_alignment_t32_to_handler(&instr, regs, &offset); + } else { + offset.un = 0; + handler = do_alignment_ldmstm; + } + break; + + default: + return 1; + } + + type = handler(addr, instr, regs); + + if (type == TYPE_ERROR || type == TYPE_FAULT) + return 1; + + if (type == TYPE_LDST) + do_alignment_finish_ldst(addr, instr, regs, offset); + + perf_sw_event(PERF_COUNT_SW_ALIGNMENT_FAULTS, 1, regs, regs->pc); + arm64_skip_faulting_instruction(regs, isize); + + return 0; +} diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 5d0527ba0804..a51edf3c0214 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -2043,7 +2043,8 @@ static void bti_enable(const struct arm64_cpu_capabilities *__unused) static void cpu_enable_mte(struct arm64_cpu_capabilities const *cap) { sysreg_clear_set(sctlr_el1, 0, SCTLR_ELx_ATA | SCTLR_EL1_ATA0); - isb(); + + mte_cpu_setup(); /* * Clear the tags in the zero page. This needs to be done via the diff --git a/arch/arm64/kernel/irq.c b/arch/arm64/kernel/irq.c index bda49430c9ea..38dbd3828f13 100644 --- a/arch/arm64/kernel/irq.c +++ b/arch/arm64/kernel/irq.c @@ -21,7 +21,9 @@ #include <linux/seq_file.h> #include <linux/vmalloc.h> #include <asm/daifflags.h> +#include <asm/exception.h> #include <asm/vmap_stack.h> +#include <asm/softirq_stack.h> /* Only access this in an NMI enter/exit */ DEFINE_PER_CPU(struct nmi_ctx, nmi_contexts); @@ -71,6 +73,18 @@ static void init_irq_stacks(void) } #endif +#ifndef CONFIG_PREEMPT_RT +static void ____do_softirq(struct pt_regs *regs) +{ + __do_softirq(); +} + +void do_softirq_own_stack(void) +{ + call_on_irq_stack(NULL, ____do_softirq); +} +#endif + static void default_handle_irq(struct pt_regs *regs) { panic("IRQ taken without a root IRQ handler\n"); diff --git a/arch/arm64/kernel/mte.c b/arch/arm64/kernel/mte.c index b2b730233274..aca88470fb69 100644 --- a/arch/arm64/kernel/mte.c +++ b/arch/arm64/kernel/mte.c @@ -285,6 +285,49 @@ void mte_thread_switch(struct task_struct *next) mte_check_tfsr_el1(); } +void mte_cpu_setup(void) +{ + u64 rgsr; + + /* + * CnP must be enabled only after the MAIR_EL1 register has been set + * up. Inconsistent MAIR_EL1 between CPUs sharing the same TLB may + * lead to the wrong memory type being used for a brief window during + * CPU power-up. + * + * CnP is not a boot feature so MTE gets enabled before CnP, but let's + * make sure that is the case. + */ + BUG_ON(read_sysreg(ttbr0_el1) & TTBR_CNP_BIT); + BUG_ON(read_sysreg(ttbr1_el1) & TTBR_CNP_BIT); + + /* Normal Tagged memory type at the corresponding MAIR index */ + sysreg_clear_set(mair_el1, + MAIR_ATTRIDX(MAIR_ATTR_MASK, MT_NORMAL_TAGGED), + MAIR_ATTRIDX(MAIR_ATTR_NORMAL_TAGGED, + MT_NORMAL_TAGGED)); + + write_sysreg_s(KERNEL_GCR_EL1, SYS_GCR_EL1); + + /* + * If GCR_EL1.RRND=1 is implemented the same way as RRND=0, then + * RGSR_EL1.SEED must be non-zero for IRG to produce + * pseudorandom numbers. As RGSR_EL1 is UNKNOWN out of reset, we + * must initialize it. + */ + rgsr = (read_sysreg(CNTVCT_EL0) & SYS_RGSR_EL1_SEED_MASK) << + SYS_RGSR_EL1_SEED_SHIFT; + if (rgsr == 0) + rgsr = 1 << SYS_RGSR_EL1_SEED_SHIFT; + write_sysreg_s(rgsr, SYS_RGSR_EL1); + + /* clear any pending tag check faults in TFSR*_EL1 */ + write_sysreg_s(0, SYS_TFSR_EL1); + write_sysreg_s(0, SYS_TFSRE0_EL1); + + local_flush_tlb_all(); +} + void mte_suspend_enter(void) { if (!system_supports_mte()) @@ -301,6 +344,14 @@ void mte_suspend_enter(void) mte_check_tfsr_el1(); } +void mte_suspend_exit(void) +{ + if (!system_supports_mte()) + return; + + mte_cpu_setup(); +} + long set_mte_ctrl(struct task_struct *task, unsigned long arg) { u64 mte_ctrl = (~((arg & PR_MTE_TAG_MASK) >> PR_MTE_TAG_SHIFT) & diff --git a/arch/arm64/kernel/probes/kprobes.c b/arch/arm64/kernel/probes/kprobes.c index d1d182320245..c9e4d0720285 100644 --- a/arch/arm64/kernel/probes/kprobes.c +++ b/arch/arm64/kernel/probes/kprobes.c @@ -44,13 +44,28 @@ post_kprobe_handler(struct kprobe *, struct kprobe_ctlblk *, struct pt_regs *); static void __kprobes arch_prepare_ss_slot(struct kprobe *p) { kprobe_opcode_t *addr = p->ainsn.api.insn; - void *addrs[] = {addr, addr + 1}; - u32 insns[] = {p->opcode, BRK64_OPCODE_KPROBES_SS}; - /* prepare insn slot */ - aarch64_insn_patch_text(addrs, insns, 2); - - flush_icache_range((uintptr_t)addr, (uintptr_t)(addr + MAX_INSN_SIZE)); + /* + * Prepare insn slot, Mark Rutland points out it depends on a coupe of + * subtleties: + * + * - That the I-cache maintenance for these instructions is complete + * *before* the kprobe BRK is written (and aarch64_insn_patch_text_nosync() + * ensures this, but just omits causing a Context-Synchronization-Event + * on all CPUS). + * + * - That the kprobe BRK results in an exception (and consequently a + * Context-Synchronoization-Event), which ensures that the CPU will + * fetch thesingle-step slot instructions *after* this, ensuring that + * the new instructions are used + * + * It supposes to place ISB after patching to guarantee I-cache maintenance + * is observed on all CPUS, however, single-step slot is installed in + * the BRK exception handler, so it is unnecessary to generate + * Contex-Synchronization-Event via ISB again. + */ + aarch64_insn_patch_text_nosync(addr, p->opcode); + aarch64_insn_patch_text_nosync(addr + 1, BRK64_OPCODE_KPROBES_SS); /* * Needs restoring of return address after stepping xol. diff --git a/arch/arm64/kernel/proton-pack.c b/arch/arm64/kernel/proton-pack.c index ea659a382e24..a8ea1637b137 100644 --- a/arch/arm64/kernel/proton-pack.c +++ b/arch/arm64/kernel/proton-pack.c @@ -988,6 +988,14 @@ static void this_cpu_set_vectors(enum arm64_bp_harden_el1_vectors slot) isb(); } +static bool __read_mostly __nospectre_bhb; +static int __init parse_spectre_bhb_param(char *str) +{ + __nospectre_bhb = true; + return 0; +} +early_param("nospectre_bhb", parse_spectre_bhb_param); + void spectre_bhb_enable_mitigation(const struct arm64_cpu_capabilities *entry) { bp_hardening_cb_t cpu_cb; @@ -1001,7 +1009,7 @@ void spectre_bhb_enable_mitigation(const struct arm64_cpu_capabilities *entry) /* No point mitigating Spectre-BHB alone. */ } else if (!IS_ENABLED(CONFIG_MITIGATE_SPECTRE_BRANCH_HISTORY)) { pr_info_once("spectre-bhb mitigation disabled by compile time option\n"); - } else if (cpu_mitigations_off()) { + } else if (cpu_mitigations_off() || __nospectre_bhb) { pr_info_once("spectre-bhb mitigation disabled by command line option\n"); } else if (supports_ecbhb(SCOPE_LOCAL_CPU)) { state = SPECTRE_MITIGATED; diff --git a/arch/arm64/kernel/reloc_test_core.c b/arch/arm64/kernel/reloc_test_core.c index e87a2b7f20f6..99f2ffe9fc05 100644 --- a/arch/arm64/kernel/reloc_test_core.c +++ b/arch/arm64/kernel/reloc_test_core.c @@ -48,7 +48,7 @@ static struct { { "R_AARCH64_PREL16", relative_data16, (u64)&sym64_rel }, }; -static int reloc_test_init(void) +static int __init reloc_test_init(void) { int i; @@ -67,7 +67,7 @@ static int reloc_test_init(void) return 0; } -static void reloc_test_exit(void) +static void __exit reloc_test_exit(void) { } diff --git a/arch/arm64/kernel/suspend.c b/arch/arm64/kernel/suspend.c index 9135fe0f3df5..8b02d310838f 100644 --- a/arch/arm64/kernel/suspend.c +++ b/arch/arm64/kernel/suspend.c @@ -43,6 +43,8 @@ void notrace __cpu_suspend_exit(void) { unsigned int cpu = smp_processor_id(); + mte_suspend_exit(); + /* * We are resuming from reset with the idmap active in TTBR0_EL1. * We must uninstall the idmap and restore the expected MMU diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c index 599cf81f5685..83a512a6ff0d 100644 --- a/arch/arm64/mm/dma-mapping.c +++ b/arch/arm64/mm/dma-mapping.c @@ -36,7 +36,7 @@ void arch_dma_prep_coherent(struct page *page, size_t size) { unsigned long start = (unsigned long)page_address(page); - dcache_clean_inval_poc(start, start + size); + dcache_clean_poc(start, start + size); } #ifdef CONFIG_IOMMU_DMA diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index c33f1fad2745..5b391490e045 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -691,6 +691,9 @@ static int __kprobes do_translation_fault(unsigned long far, static int do_alignment_fault(unsigned long far, unsigned long esr, struct pt_regs *regs) { + if (IS_ENABLED(CONFIG_COMPAT_ALIGNMENT_FIXUPS) && + compat_user_mode(regs)) + return do_compat_alignment_fixup(far, regs); do_bad_area(far, esr, regs); return 0; } diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index 5810eddfb48e..01f42c1185eb 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -535,7 +535,7 @@ static void __init map_mem(pgd_t *pgdp) */ BUILD_BUG_ON(pgd_index(direct_map_end - 1) == pgd_index(direct_map_end)); - if (can_set_direct_map() || IS_ENABLED(CONFIG_KFENCE)) + if (can_set_direct_map()) flags |= NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS; /* @@ -1180,14 +1180,6 @@ static void free_empty_tables(unsigned long addr, unsigned long end, } #endif -#if !ARM64_KERNEL_USES_PMD_MAPS -int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node, - struct vmem_altmap *altmap) -{ - WARN_ON((start < VMEMMAP_START) || (end > VMEMMAP_END)); - return vmemmap_populate_basepages(start, end, node, altmap); -} -#else /* !ARM64_KERNEL_USES_PMD_MAPS */ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node, struct vmem_altmap *altmap) { @@ -1199,6 +1191,10 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node, pmd_t *pmdp; WARN_ON((start < VMEMMAP_START) || (end > VMEMMAP_END)); + + if (!ARM64_KERNEL_USES_PMD_MAPS) + return vmemmap_populate_basepages(start, end, node, altmap); + do { next = pmd_addr_end(addr, end); @@ -1232,7 +1228,6 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node, return 0; } -#endif /* !ARM64_KERNEL_USES_PMD_MAPS */ #ifdef CONFIG_MEMORY_HOTPLUG void vmemmap_free(unsigned long start, unsigned long end, @@ -1547,11 +1542,7 @@ int arch_add_memory(int nid, u64 start, u64 size, VM_BUG_ON(!mhp_range_allowed(start, size, true)); - /* - * KFENCE requires linear map to be mapped at page granularity, so that - * it is possible to protect/unprotect single pages in the KFENCE pool. - */ - if (can_set_direct_map() || IS_ENABLED(CONFIG_KFENCE)) + if (can_set_direct_map()) flags |= NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS; __create_pgd_mapping(swapper_pg_dir, start, __phys_to_virt(start), diff --git a/arch/arm64/mm/pageattr.c b/arch/arm64/mm/pageattr.c index 64e985eaa52d..d107c3d434e2 100644 --- a/arch/arm64/mm/pageattr.c +++ b/arch/arm64/mm/pageattr.c @@ -21,7 +21,13 @@ bool rodata_full __ro_after_init = IS_ENABLED(CONFIG_RODATA_FULL_DEFAULT_ENABLED bool can_set_direct_map(void) { - return rodata_full || debug_pagealloc_enabled(); + /* + * rodata_full, DEBUG_PAGEALLOC and KFENCE require linear map to be + * mapped at page granularity, so that it is possible to + * protect/unprotect single pages. + */ + return rodata_full || debug_pagealloc_enabled() || + IS_ENABLED(CONFIG_KFENCE); } static int change_page_range(pte_t *ptep, unsigned long addr, void *data) diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S index 5f7784ee6044..f38bccdd374a 100644 --- a/arch/arm64/mm/proc.S +++ b/arch/arm64/mm/proc.S @@ -48,17 +48,19 @@ #ifdef CONFIG_KASAN_HW_TAGS #define TCR_MTE_FLAGS TCR_TCMA1 | TCR_TBI1 | TCR_TBID1 -#else +#elif defined(CONFIG_ARM64_MTE) /* * The mte_zero_clear_page_tags() implementation uses DC GZVA, which relies on * TBI being enabled at EL1. */ #define TCR_MTE_FLAGS TCR_TBI1 | TCR_TBID1 +#else +#define TCR_MTE_FLAGS 0 #endif /* * Default MAIR_EL1. MT_NORMAL_TAGGED is initially mapped as Normal memory and - * changed during __cpu_setup to Normal Tagged if the system supports MTE. + * changed during mte_cpu_setup to Normal Tagged if the system supports MTE. */ #define MAIR_EL1_SET \ (MAIR_ATTRIDX(MAIR_ATTR_DEVICE_nGnRnE, MT_DEVICE_nGnRnE) | \ @@ -426,46 +428,8 @@ SYM_FUNC_START(__cpu_setup) mov_q mair, MAIR_EL1_SET mov_q tcr, TCR_TxSZ(VA_BITS) | TCR_CACHE_FLAGS | TCR_SMP_FLAGS | \ TCR_TG_FLAGS | TCR_KASLR_FLAGS | TCR_ASID16 | \ - TCR_TBI0 | TCR_A1 | TCR_KASAN_SW_FLAGS - -#ifdef CONFIG_ARM64_MTE - /* - * Update MAIR_EL1, GCR_EL1 and TFSR*_EL1 if MTE is supported - * (ID_AA64PFR1_EL1[11:8] > 1). - */ - mrs x10, ID_AA64PFR1_EL1 - ubfx x10, x10, #ID_AA64PFR1_EL1_MTE_SHIFT, #4 - cmp x10, #ID_AA64PFR1_EL1_MTE_MTE2 - b.lt 1f - - /* Normal Tagged memory type at the corresponding MAIR index */ - mov x10, #MAIR_ATTR_NORMAL_TAGGED - bfi mair, x10, #(8 * MT_NORMAL_TAGGED), #8 + TCR_TBI0 | TCR_A1 | TCR_KASAN_SW_FLAGS | TCR_MTE_FLAGS - mov x10, #KERNEL_GCR_EL1 - msr_s SYS_GCR_EL1, x10 - - /* - * If GCR_EL1.RRND=1 is implemented the same way as RRND=0, then - * RGSR_EL1.SEED must be non-zero for IRG to produce - * pseudorandom numbers. As RGSR_EL1 is UNKNOWN out of reset, we - * must initialize it. - */ - mrs x10, CNTVCT_EL0 - ands x10, x10, #SYS_RGSR_EL1_SEED_MASK - csinc x10, x10, xzr, ne - lsl x10, x10, #SYS_RGSR_EL1_SEED_SHIFT - msr_s SYS_RGSR_EL1, x10 - - /* clear any pending tag check faults in TFSR*_EL1 */ - msr_s SYS_TFSR_EL1, xzr - msr_s SYS_TFSRE0_EL1, xzr - - /* set the TCR_EL1 bits */ - mov_q x10, TCR_MTE_FLAGS - orr tcr, tcr, x10 -1: -#endif tcr_clear_errata_bits tcr, x9, x5 #ifdef CONFIG_ARM64_VA_BITS_52 |