diff options
Diffstat (limited to 'arch')
495 files changed, 7401 insertions, 2261 deletions
diff --git a/arch/Kconfig b/arch/Kconfig index 56b6ccc0e32d..ba4e966484ab 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -1028,6 +1028,15 @@ config HAVE_STATIC_CALL_INLINE bool depends on HAVE_STATIC_CALL +config ARCH_WANT_LD_ORPHAN_WARN + bool + help + An arch should select this symbol once all linker sections are explicitly + included, size-asserted, or discarded in the linker scripts. This is + important because we never want expected sections to be placed heuristically + by the linker, since the locations of such sections can change between linker + versions. + source "kernel/gcov/Kconfig" source "scripts/gcc-plugins/Kconfig" diff --git a/arch/alpha/kernel/process.c b/arch/alpha/kernel/process.c index 7462a7911002..4c7b0414a3ff 100644 --- a/arch/alpha/kernel/process.c +++ b/arch/alpha/kernel/process.c @@ -57,7 +57,7 @@ EXPORT_SYMBOL(pm_power_off); void arch_cpu_idle(void) { wtint(0); - local_irq_enable(); + raw_local_irq_enable(); } void arch_cpu_idle_dead(void) diff --git a/arch/arc/include/asm/bitops.h b/arch/arc/include/asm/bitops.h index c6606f4d20d6..fb98440c0bd4 100644 --- a/arch/arc/include/asm/bitops.h +++ b/arch/arc/include/asm/bitops.h @@ -243,10 +243,8 @@ static inline int constant_fls(unsigned int x) x <<= 2; r -= 2; } - if (!(x & 0x80000000u)) { - x <<= 1; + if (!(x & 0x80000000u)) r -= 1; - } return r; } diff --git a/arch/arc/include/asm/pgtable.h b/arch/arc/include/asm/pgtable.h index f1ed17edb085..163641726a2b 100644 --- a/arch/arc/include/asm/pgtable.h +++ b/arch/arc/include/asm/pgtable.h @@ -134,8 +134,10 @@ #ifdef CONFIG_ARC_HAS_PAE40 #define PTE_BITS_NON_RWX_IN_PD1 (0xff00000000 | PAGE_MASK | _PAGE_CACHEABLE) +#define MAX_POSSIBLE_PHYSMEM_BITS 40 #else #define PTE_BITS_NON_RWX_IN_PD1 (PAGE_MASK | _PAGE_CACHEABLE) +#define MAX_POSSIBLE_PHYSMEM_BITS 32 #endif /************************************************************************** diff --git a/arch/arc/kernel/head.S b/arch/arc/kernel/head.S index 17fd1ed700cc..9152782444b5 100644 --- a/arch/arc/kernel/head.S +++ b/arch/arc/kernel/head.S @@ -67,7 +67,22 @@ sr r5, [ARC_REG_LPB_CTRL] 1: #endif /* CONFIG_ARC_LPB_DISABLE */ -#endif + + /* On HSDK, CCMs need to remapped super early */ +#ifdef CONFIG_ARC_SOC_HSDK + mov r6, 0x60000000 + lr r5, [ARC_REG_ICCM_BUILD] + breq r5, 0, 1f + sr r6, [ARC_REG_AUX_ICCM] +1: + lr r5, [ARC_REG_DCCM_BUILD] + breq r5, 0, 2f + sr r6, [ARC_REG_AUX_DCCM] +2: +#endif /* CONFIG_ARC_SOC_HSDK */ + +#endif /* CONFIG_ISA_ARCV2 */ + ; Config DSP_CTRL properly, so kernel may use integer multiply, ; multiply-accumulate, and divide operations DSP_EARLY_INIT diff --git a/arch/arc/kernel/stacktrace.c b/arch/arc/kernel/stacktrace.c index feba91c9d969..f73da203b170 100644 --- a/arch/arc/kernel/stacktrace.c +++ b/arch/arc/kernel/stacktrace.c @@ -38,15 +38,27 @@ #ifdef CONFIG_ARC_DW2_UNWIND -static void seed_unwind_frame_info(struct task_struct *tsk, - struct pt_regs *regs, - struct unwind_frame_info *frame_info) +static int +seed_unwind_frame_info(struct task_struct *tsk, struct pt_regs *regs, + struct unwind_frame_info *frame_info) { - /* - * synchronous unwinding (e.g. dump_stack) - * - uses current values of SP and friends - */ - if (tsk == NULL && regs == NULL) { + if (regs) { + /* + * Asynchronous unwinding of intr/exception + * - Just uses the pt_regs passed + */ + frame_info->task = tsk; + + frame_info->regs.r27 = regs->fp; + frame_info->regs.r28 = regs->sp; + frame_info->regs.r31 = regs->blink; + frame_info->regs.r63 = regs->ret; + frame_info->call_frame = 0; + } else if (tsk == NULL || tsk == current) { + /* + * synchronous unwinding (e.g. dump_stack) + * - uses current values of SP and friends + */ unsigned long fp, sp, blink, ret; frame_info->task = current; @@ -63,13 +75,17 @@ static void seed_unwind_frame_info(struct task_struct *tsk, frame_info->regs.r31 = blink; frame_info->regs.r63 = ret; frame_info->call_frame = 0; - } else if (regs == NULL) { + } else { /* - * Asynchronous unwinding of sleeping task - * - Gets SP etc from task's pt_regs (saved bottom of kernel - * mode stack of task) + * Asynchronous unwinding of a likely sleeping task + * - first ensure it is actually sleeping + * - if so, it will be in __switch_to, kernel mode SP of task + * is safe-kept and BLINK at a well known location in there */ + if (tsk->state == TASK_RUNNING) + return -1; + frame_info->task = tsk; frame_info->regs.r27 = TSK_K_FP(tsk); @@ -90,19 +106,8 @@ static void seed_unwind_frame_info(struct task_struct *tsk, frame_info->regs.r28 += 60; frame_info->call_frame = 0; - } else { - /* - * Asynchronous unwinding of intr/exception - * - Just uses the pt_regs passed - */ - frame_info->task = tsk; - - frame_info->regs.r27 = regs->fp; - frame_info->regs.r28 = regs->sp; - frame_info->regs.r31 = regs->blink; - frame_info->regs.r63 = regs->ret; - frame_info->call_frame = 0; } + return 0; } #endif @@ -112,11 +117,12 @@ arc_unwind_core(struct task_struct *tsk, struct pt_regs *regs, int (*consumer_fn) (unsigned int, void *), void *arg) { #ifdef CONFIG_ARC_DW2_UNWIND - int ret = 0; + int ret = 0, cnt = 0; unsigned int address; struct unwind_frame_info frame_info; - seed_unwind_frame_info(tsk, regs, &frame_info); + if (seed_unwind_frame_info(tsk, regs, &frame_info)) + return 0; while (1) { address = UNW_PC(&frame_info); @@ -132,6 +138,11 @@ arc_unwind_core(struct task_struct *tsk, struct pt_regs *regs, break; frame_info.regs.r63 = frame_info.regs.r31; + + if (cnt++ > 128) { + printk("unwinder looping too long, aborting !\n"); + return 0; + } } return address; /* return the last address it saw */ diff --git a/arch/arc/mm/tlb.c b/arch/arc/mm/tlb.c index c340acd989a0..9bb3c24f3677 100644 --- a/arch/arc/mm/tlb.c +++ b/arch/arc/mm/tlb.c @@ -30,14 +30,14 @@ * -Changes related to MMU v2 (Rel 4.8) * * Vineetg: Aug 29th 2008 - * -In TLB Flush operations (Metal Fix MMU) there is a explict command to + * -In TLB Flush operations (Metal Fix MMU) there is a explicit command to * flush Micro-TLBS. If TLB Index Reg is invalid prior to TLBIVUTLB cmd, * it fails. Thus need to load it with ANY valid value before invoking * TLBIVUTLB cmd * * Vineetg: Aug 21th 2008: * -Reduced the duration of IRQ lockouts in TLB Flush routines - * -Multiple copies of TLB erase code seperated into a "single" function + * -Multiple copies of TLB erase code separated into a "single" function * -In TLB Flush routines, interrupt disabling moved UP to retrieve ASID * in interrupt-safe region. * @@ -66,7 +66,7 @@ * * Although J-TLB is 2 way set assoc, ARC700 caches J-TLB into uTLBS which has * much higher associativity. u-D-TLB is 8 ways, u-I-TLB is 4 ways. - * Given this, the thrasing problem should never happen because once the 3 + * Given this, the thrashing problem should never happen because once the 3 * J-TLB entries are created (even though 3rd will knock out one of the prev * two), the u-D-TLB and u-I-TLB will have what is required to accomplish memcpy * @@ -127,7 +127,7 @@ static void utlb_invalidate(void) * There was however an obscure hardware bug, where uTLB flush would * fail when a prior probe for J-TLB (both totally unrelated) would * return lkup err - because the entry didn't exist in MMU. - * The Workround was to set Index reg with some valid value, prior to + * The Workaround was to set Index reg with some valid value, prior to * flush. This was fixed in MMU v3 */ unsigned int idx; @@ -272,7 +272,7 @@ noinline void local_flush_tlb_all(void) } /* - * Flush the entrie MM for userland. The fastest way is to move to Next ASID + * Flush the entire MM for userland. The fastest way is to move to Next ASID */ noinline void local_flush_tlb_mm(struct mm_struct *mm) { @@ -303,7 +303,7 @@ noinline void local_flush_tlb_mm(struct mm_struct *mm) * Difference between this and Kernel Range Flush is * -Here the fastest way (if range is too large) is to move to next ASID * without doing any explicit Shootdown - * -In case of kernel Flush, entry has to be shot down explictly + * -In case of kernel Flush, entry has to be shot down explicitly */ void local_flush_tlb_range(struct vm_area_struct *vma, unsigned long start, unsigned long end) @@ -620,7 +620,7 @@ void update_mmu_cache(struct vm_area_struct *vma, unsigned long vaddr_unaligned, * Super Page size is configurable in hardware (4K to 16M), but fixed once * RTL builds. * - * The exact THP size a Linx configuration will support is a function of: + * The exact THP size a Linux configuration will support is a function of: * - MMU page size (typical 8K, RTL fixed) * - software page walker address split between PGD:PTE:PFN (typical * 11:8:13, but can be changed with 1 line) @@ -698,7 +698,7 @@ void local_flush_pmd_tlb_range(struct vm_area_struct *vma, unsigned long start, #endif -/* Read the Cache Build Confuration Registers, Decode them and save into +/* Read the Cache Build Configuration Registers, Decode them and save into * the cpuinfo structure for later use. * No Validation is done here, simply read/convert the BCRs */ @@ -803,13 +803,13 @@ void arc_mmu_init(void) pr_info("%s", arc_mmu_mumbojumbo(0, str, sizeof(str))); /* - * Can't be done in processor.h due to header include depenedencies + * Can't be done in processor.h due to header include dependencies */ BUILD_BUG_ON(!IS_ALIGNED((CONFIG_ARC_KVADDR_SIZE << 20), PMD_SIZE)); /* * stack top size sanity check, - * Can't be done in processor.h due to header include depenedencies + * Can't be done in processor.h due to header include dependencies */ BUILD_BUG_ON(!IS_ALIGNED(STACK_TOP, PMD_SIZE)); @@ -881,7 +881,7 @@ void arc_mmu_init(void) * the duplicate one. * -Knob to be verbose abt it.(TODO: hook them up to debugfs) */ -volatile int dup_pd_silent; /* Be slient abt it or complain (default) */ +volatile int dup_pd_silent; /* Be silent abt it or complain (default) */ void do_tlb_overlap_fault(unsigned long cause, unsigned long address, struct pt_regs *regs) @@ -948,7 +948,7 @@ void do_tlb_overlap_fault(unsigned long cause, unsigned long address, /*********************************************************************** * Diagnostic Routines - * -Called from Low Level TLB Hanlders if things don;t look good + * -Called from Low Level TLB Handlers if things don;t look good **********************************************************************/ #ifdef CONFIG_ARC_DBG_TLB_PARANOIA diff --git a/arch/arc/plat-hsdk/platform.c b/arch/arc/plat-hsdk/platform.c index 0b63fc095b99..b3ea1fa11f87 100644 --- a/arch/arc/plat-hsdk/platform.c +++ b/arch/arc/plat-hsdk/platform.c @@ -17,22 +17,6 @@ int arc_hsdk_axi_dmac_coherent __section(".data") = 0; #define ARC_CCM_UNUSED_ADDR 0x60000000 -static void __init hsdk_init_per_cpu(unsigned int cpu) -{ - /* - * By default ICCM is mapped to 0x7z while this area is used for - * kernel virtual mappings, so move it to currently unused area. - */ - if (cpuinfo_arc700[cpu].iccm.sz) - write_aux_reg(ARC_REG_AUX_ICCM, ARC_CCM_UNUSED_ADDR); - - /* - * By default DCCM is mapped to 0x8z while this area is used by kernel, - * so move it to currently unused area. - */ - if (cpuinfo_arc700[cpu].dccm.sz) - write_aux_reg(ARC_REG_AUX_DCCM, ARC_CCM_UNUSED_ADDR); -} #define ARC_PERIPHERAL_BASE 0xf0000000 #define CREG_BASE (ARC_PERIPHERAL_BASE + 0x1000) @@ -339,5 +323,4 @@ static const char *hsdk_compat[] __initconst = { MACHINE_START(SIMULATION, "hsdk") .dt_compat = hsdk_compat, .init_early = hsdk_init_early, - .init_per_cpu = hsdk_init_per_cpu, MACHINE_END diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index fe2f17eb2b50..002e0cf025f5 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -35,6 +35,7 @@ config ARM select ARCH_USE_CMPXCHG_LOCKREF select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT if MMU select ARCH_WANT_IPC_PARSE_VERSION + select ARCH_WANT_LD_ORPHAN_WARN select BINFMT_FLAT_ARGVP_ENVP_ON_STACK select BUILDTIME_TABLE_SORT if MMU select CLONE_BACKWARDS diff --git a/arch/arm/Makefile b/arch/arm/Makefile index 4d76eab2b22d..e15f76ca2887 100644 --- a/arch/arm/Makefile +++ b/arch/arm/Makefile @@ -16,10 +16,6 @@ LDFLAGS_vmlinux += --be8 KBUILD_LDFLAGS_MODULE += --be8 endif -# We never want expected sections to be placed heuristically by the -# linker. All sections should be explicitly named in the linker script. -LDFLAGS_vmlinux += $(call ld-option, --orphan-handling=warn) - GZFLAGS :=-9 #KBUILD_CFLAGS +=-pipe diff --git a/arch/arm/boot/compressed/Makefile b/arch/arm/boot/compressed/Makefile index 47f001ca5499..e1567418a2b1 100644 --- a/arch/arm/boot/compressed/Makefile +++ b/arch/arm/boot/compressed/Makefile @@ -129,7 +129,9 @@ LDFLAGS_vmlinux += --no-undefined # Delete all temporary local symbols LDFLAGS_vmlinux += -X # Report orphan sections -LDFLAGS_vmlinux += $(call ld-option, --orphan-handling=warn) +ifdef CONFIG_LD_ORPHAN_WARN +LDFLAGS_vmlinux += --orphan-handling=warn +endif # Next argument is a linker script LDFLAGS_vmlinux += -T diff --git a/arch/arm/boot/compressed/head.S b/arch/arm/boot/compressed/head.S index 2e04ec5b5446..caa27322a0ab 100644 --- a/arch/arm/boot/compressed/head.S +++ b/arch/arm/boot/compressed/head.S @@ -1472,6 +1472,9 @@ ENTRY(efi_enter_kernel) @ issued from HYP mode take us to the correct handler code. We @ will disable the MMU before jumping to the kernel proper. @ + ARM( bic r1, r1, #(1 << 30) ) @ clear HSCTLR.TE + THUMB( orr r1, r1, #(1 << 30) ) @ set HSCTLR.TE + mcr p15, 4, r1, c1, c0, 0 adr r0, __hyp_reentry_vectors mcr p15, 4, r0, c12, c0, 0 @ set HYP vector base (HVBAR) isb diff --git a/arch/arm/boot/dts/am437x-l4.dtsi b/arch/arm/boot/dts/am437x-l4.dtsi index c220dc3c4e0f..243e35f7a56c 100644 --- a/arch/arm/boot/dts/am437x-l4.dtsi +++ b/arch/arm/boot/dts/am437x-l4.dtsi @@ -521,7 +521,7 @@ ranges = <0x0 0x100000 0x8000>; mac_sw: switch@0 { - compatible = "ti,am4372-cpsw","ti,cpsw-switch"; + compatible = "ti,am4372-cpsw-switch", "ti,cpsw-switch"; reg = <0x0 0x4000>; ranges = <0 0 0x4000>; clocks = <&cpsw_125mhz_gclk>, <&dpll_clksel_mac_clk>; diff --git a/arch/arm/boot/dts/aspeed-g6.dtsi b/arch/arm/boot/dts/aspeed-g6.dtsi index b58220a49cbd..74367ee96f20 100644 --- a/arch/arm/boot/dts/aspeed-g6.dtsi +++ b/arch/arm/boot/dts/aspeed-g6.dtsi @@ -69,6 +69,12 @@ always-on; }; + edac: sdram@1e6e0000 { + compatible = "aspeed,ast2600-sdram-edac", "syscon"; + reg = <0x1e6e0000 0x174>; + interrupts = <GIC_SPI 0 IRQ_TYPE_LEVEL_HIGH>; + }; + ahb { compatible = "simple-bus"; #address-cells = <1>; diff --git a/arch/arm/boot/dts/dra76x.dtsi b/arch/arm/boot/dts/dra76x.dtsi index b69c7d40f5d8..2f326151116b 100644 --- a/arch/arm/boot/dts/dra76x.dtsi +++ b/arch/arm/boot/dts/dra76x.dtsi @@ -32,8 +32,8 @@ interrupts = <GIC_SPI 67 IRQ_TYPE_LEVEL_HIGH>, <GIC_SPI 68 IRQ_TYPE_LEVEL_HIGH>; interrupt-names = "int0", "int1"; - clocks = <&mcan_clk>, <&l3_iclk_div>; - clock-names = "cclk", "hclk"; + clocks = <&l3_iclk_div>, <&mcan_clk>; + clock-names = "hclk", "cclk"; bosch,mram-cfg = <0x0 0 0 32 0 0 1 1>; }; }; diff --git a/arch/arm/boot/dts/exynos4412-odroid-common.dtsi b/arch/arm/boot/dts/exynos4412-odroid-common.dtsi index ab291cec650a..2983e91bc7dd 100644 --- a/arch/arm/boot/dts/exynos4412-odroid-common.dtsi +++ b/arch/arm/boot/dts/exynos4412-odroid-common.dtsi @@ -122,7 +122,6 @@ }; &clock { - clocks = <&clock CLK_XUSBXTI>; assigned-clocks = <&clock CLK_FOUT_EPLL>; assigned-clock-rates = <45158401>; }; diff --git a/arch/arm/boot/dts/imx50-evk.dts b/arch/arm/boot/dts/imx50-evk.dts index 878e89c20190..4ea5c23f181b 100644 --- a/arch/arm/boot/dts/imx50-evk.dts +++ b/arch/arm/boot/dts/imx50-evk.dts @@ -59,7 +59,7 @@ MX50_PAD_CSPI_MISO__CSPI_MISO 0x00 MX50_PAD_CSPI_MOSI__CSPI_MOSI 0x00 MX50_PAD_CSPI_SS0__GPIO4_11 0xc4 - MX50_PAD_ECSPI1_MOSI__CSPI_SS1 0xf4 + MX50_PAD_ECSPI1_MOSI__GPIO4_13 0x84 >; }; diff --git a/arch/arm/boot/dts/imx6q-prti6q.dts b/arch/arm/boot/dts/imx6q-prti6q.dts index d112b50f8c5d..b4605edfd2ab 100644 --- a/arch/arm/boot/dts/imx6q-prti6q.dts +++ b/arch/arm/boot/dts/imx6q-prti6q.dts @@ -213,8 +213,8 @@ #size-cells = <0>; /* Microchip KSZ9031RNX PHY */ - rgmii_phy: ethernet-phy@4 { - reg = <4>; + rgmii_phy: ethernet-phy@0 { + reg = <0>; interrupts-extended = <&gpio1 28 IRQ_TYPE_LEVEL_LOW>; reset-gpios = <&gpio1 25 GPIO_ACTIVE_LOW>; reset-assert-us = <10000>; diff --git a/arch/arm/boot/dts/imx6qdl-kontron-samx6i.dtsi b/arch/arm/boot/dts/imx6qdl-kontron-samx6i.dtsi index 265f5f3dbff6..24f793ca2886 100644 --- a/arch/arm/boot/dts/imx6qdl-kontron-samx6i.dtsi +++ b/arch/arm/boot/dts/imx6qdl-kontron-samx6i.dtsi @@ -551,7 +551,7 @@ pinctrl_i2c3: i2c3grp { fsl,pins = < - MX6QDL_PAD_GPIO_3__I2C3_SCL 0x4001b8b1 + MX6QDL_PAD_GPIO_5__I2C3_SCL 0x4001b8b1 MX6QDL_PAD_GPIO_16__I2C3_SDA 0x4001b8b1 >; }; diff --git a/arch/arm/boot/dts/imx6qdl-udoo.dtsi b/arch/arm/boot/dts/imx6qdl-udoo.dtsi index 828dd20cd27d..d07d8f83456d 100644 --- a/arch/arm/boot/dts/imx6qdl-udoo.dtsi +++ b/arch/arm/boot/dts/imx6qdl-udoo.dtsi @@ -98,7 +98,7 @@ &fec { pinctrl-names = "default"; pinctrl-0 = <&pinctrl_enet>; - phy-mode = "rgmii"; + phy-mode = "rgmii-id"; status = "okay"; }; diff --git a/arch/arm/boot/dts/imx6qdl-wandboard-revd1.dtsi b/arch/arm/boot/dts/imx6qdl-wandboard-revd1.dtsi index 93909796885a..b9b698f72b26 100644 --- a/arch/arm/boot/dts/imx6qdl-wandboard-revd1.dtsi +++ b/arch/arm/boot/dts/imx6qdl-wandboard-revd1.dtsi @@ -166,7 +166,6 @@ MX6QDL_PAD_RGMII_RD2__RGMII_RD2 0x1b030 MX6QDL_PAD_RGMII_RD3__RGMII_RD3 0x1b030 MX6QDL_PAD_RGMII_RX_CTL__RGMII_RX_CTL 0x1b030 - MX6QDL_PAD_GPIO_6__ENET_IRQ 0x000b1 >; }; diff --git a/arch/arm/boot/dts/mmp2-olpc-xo-1-75.dts b/arch/arm/boot/dts/mmp2-olpc-xo-1-75.dts index f1a41152e9dd..342304f5653a 100644 --- a/arch/arm/boot/dts/mmp2-olpc-xo-1-75.dts +++ b/arch/arm/boot/dts/mmp2-olpc-xo-1-75.dts @@ -223,16 +223,15 @@ }; &ssp3 { - /delete-property/ #address-cells; - /delete-property/ #size-cells; + #address-cells = <0>; spi-slave; status = "okay"; - ready-gpio = <&gpio 125 GPIO_ACTIVE_HIGH>; + ready-gpios = <&gpio 125 GPIO_ACTIVE_HIGH>; slave { compatible = "olpc,xo1.75-ec"; spi-cpha; - cmd-gpio = <&gpio 155 GPIO_ACTIVE_HIGH>; + cmd-gpios = <&gpio 155 GPIO_ACTIVE_HIGH>; }; }; diff --git a/arch/arm/boot/dts/mmp3.dtsi b/arch/arm/boot/dts/mmp3.dtsi index cc4efd0efabd..4ae630d37d09 100644 --- a/arch/arm/boot/dts/mmp3.dtsi +++ b/arch/arm/boot/dts/mmp3.dtsi @@ -296,6 +296,7 @@ interrupts = <GIC_SPI 42 IRQ_TYPE_LEVEL_HIGH>; clocks = <&soc_clocks MMP2_CLK_CCIC0>; clock-names = "axi"; + power-domains = <&soc_clocks MMP3_POWER_DOMAIN_CAMERA>; #clock-cells = <0>; clock-output-names = "mclk"; status = "disabled"; @@ -307,6 +308,7 @@ interrupts = <GIC_SPI 30 IRQ_TYPE_LEVEL_HIGH>; clocks = <&soc_clocks MMP2_CLK_CCIC1>; clock-names = "axi"; + power-domains = <&soc_clocks MMP3_POWER_DOMAIN_CAMERA>; #clock-cells = <0>; clock-output-names = "mclk"; status = "disabled"; diff --git a/arch/arm/boot/dts/stm32mp157c-ed1.dts b/arch/arm/boot/dts/stm32mp157c-ed1.dts index ca109dc18238..2e77ccec3fc1 100644 --- a/arch/arm/boot/dts/stm32mp157c-ed1.dts +++ b/arch/arm/boot/dts/stm32mp157c-ed1.dts @@ -89,6 +89,14 @@ states = <1800000 0x1>, <2900000 0x0>; }; + + vin: vin { + compatible = "regulator-fixed"; + regulator-name = "vin"; + regulator-min-microvolt = <5000000>; + regulator-max-microvolt = <5000000>; + regulator-always-on; + }; }; &adc { @@ -150,11 +158,18 @@ regulators { compatible = "st,stpmic1-regulators"; + buck1-supply = <&vin>; + buck2-supply = <&vin>; + buck3-supply = <&vin>; + buck4-supply = <&vin>; ldo1-supply = <&v3v3>; ldo2-supply = <&v3v3>; ldo3-supply = <&vdd_ddr>; + ldo4-supply = <&vin>; ldo5-supply = <&v3v3>; ldo6-supply = <&v3v3>; + vref_ddr-supply = <&vin>; + boost-supply = <&vin>; pwr_sw1-supply = <&bst_out>; pwr_sw2-supply = <&bst_out>; diff --git a/arch/arm/boot/dts/stm32mp15xx-dhcom-pdk2.dtsi b/arch/arm/boot/dts/stm32mp15xx-dhcom-pdk2.dtsi index 5dff24e39af8..8456f172d4b1 100644 --- a/arch/arm/boot/dts/stm32mp15xx-dhcom-pdk2.dtsi +++ b/arch/arm/boot/dts/stm32mp15xx-dhcom-pdk2.dtsi @@ -46,6 +46,16 @@ linux,code = <KEY_A>; gpios = <&gpiof 3 GPIO_ACTIVE_LOW>; }; + + /* + * The EXTi IRQ line 0 is shared with PMIC, + * so mark this as polled GPIO key. + */ + button-2 { + label = "TA3-GPIO-C"; + linux,code = <KEY_C>; + gpios = <&gpiog 0 GPIO_ACTIVE_LOW>; + }; }; gpio-keys { @@ -59,13 +69,6 @@ wakeup-source; }; - button-2 { - label = "TA3-GPIO-C"; - linux,code = <KEY_C>; - gpios = <&gpioi 11 GPIO_ACTIVE_LOW>; - wakeup-source; - }; - button-3 { label = "TA4-GPIO-D"; linux,code = <KEY_D>; @@ -79,7 +82,7 @@ led-0 { label = "green:led5"; - gpios = <&gpiog 2 GPIO_ACTIVE_HIGH>; + gpios = <&gpioc 6 GPIO_ACTIVE_HIGH>; default-state = "off"; }; diff --git a/arch/arm/boot/dts/stm32mp15xx-dhcom-som.dtsi b/arch/arm/boot/dts/stm32mp15xx-dhcom-som.dtsi index b4b52cf634af..f796a6150313 100644 --- a/arch/arm/boot/dts/stm32mp15xx-dhcom-som.dtsi +++ b/arch/arm/boot/dts/stm32mp15xx-dhcom-som.dtsi @@ -68,6 +68,7 @@ gpio = <&gpiog 3 GPIO_ACTIVE_LOW>; regulator-always-on; regulator-boot-on; + vin-supply = <&vdd>; }; }; @@ -202,6 +203,7 @@ vdda: ldo1 { regulator-name = "vdda"; + regulator-always-on; regulator-min-microvolt = <2900000>; regulator-max-microvolt = <2900000>; interrupts = <IT_CURLIM_LDO1 0>; diff --git a/arch/arm/boot/dts/stm32mp15xx-dhcor-som.dtsi b/arch/arm/boot/dts/stm32mp15xx-dhcor-som.dtsi index 04fbb324a541..803eb8bc9c85 100644 --- a/arch/arm/boot/dts/stm32mp15xx-dhcor-som.dtsi +++ b/arch/arm/boot/dts/stm32mp15xx-dhcor-som.dtsi @@ -21,6 +21,10 @@ }; }; +&dts { + status = "okay"; +}; + &i2c4 { pinctrl-names = "default"; pinctrl-0 = <&i2c4_pins_a>; diff --git a/arch/arm/boot/dts/stm32mp15xx-dkx.dtsi b/arch/arm/boot/dts/stm32mp15xx-dkx.dtsi index a5307745719a..93398cfae97e 100644 --- a/arch/arm/boot/dts/stm32mp15xx-dkx.dtsi +++ b/arch/arm/boot/dts/stm32mp15xx-dkx.dtsi @@ -80,6 +80,14 @@ dais = <&sai2a_port &sai2b_port &i2s2_port>; status = "okay"; }; + + vin: vin { + compatible = "regulator-fixed"; + regulator-name = "vin"; + regulator-min-microvolt = <5000000>; + regulator-max-microvolt = <5000000>; + regulator-always-on; + }; }; &adc { @@ -240,9 +248,18 @@ regulators { compatible = "st,stpmic1-regulators"; + buck1-supply = <&vin>; + buck2-supply = <&vin>; + buck3-supply = <&vin>; + buck4-supply = <&vin>; ldo1-supply = <&v3v3>; + ldo2-supply = <&vin>; ldo3-supply = <&vdd_ddr>; + ldo4-supply = <&vin>; + ldo5-supply = <&vin>; ldo6-supply = <&v3v3>; + vref_ddr-supply = <&vin>; + boost-supply = <&vin>; pwr_sw1-supply = <&bst_out>; pwr_sw2-supply = <&bst_out>; diff --git a/arch/arm/boot/dts/sun4i-a10.dtsi b/arch/arm/boot/dts/sun4i-a10.dtsi index 0f95a6ef8543..1c5a666c54b5 100644 --- a/arch/arm/boot/dts/sun4i-a10.dtsi +++ b/arch/arm/boot/dts/sun4i-a10.dtsi @@ -143,7 +143,7 @@ trips { cpu_alert0: cpu-alert0 { /* milliCelsius */ - temperature = <850000>; + temperature = <85000>; hysteresis = <2000>; type = "passive"; }; diff --git a/arch/arm/boot/dts/sun6i-a31-hummingbird.dts b/arch/arm/boot/dts/sun6i-a31-hummingbird.dts index 049e6ab3cf56..73de34ae37fd 100644 --- a/arch/arm/boot/dts/sun6i-a31-hummingbird.dts +++ b/arch/arm/boot/dts/sun6i-a31-hummingbird.dts @@ -154,7 +154,7 @@ pinctrl-names = "default"; pinctrl-0 = <&gmac_rgmii_pins>; phy-handle = <&phy1>; - phy-mode = "rgmii"; + phy-mode = "rgmii-id"; status = "okay"; }; diff --git a/arch/arm/boot/dts/sun7i-a20-bananapi-m1-plus.dts b/arch/arm/boot/dts/sun7i-a20-bananapi-m1-plus.dts index 32d5d45a35c0..8945dbb114a2 100644 --- a/arch/arm/boot/dts/sun7i-a20-bananapi-m1-plus.dts +++ b/arch/arm/boot/dts/sun7i-a20-bananapi-m1-plus.dts @@ -130,7 +130,7 @@ pinctrl-names = "default"; pinctrl-0 = <&gmac_rgmii_pins>; phy-handle = <&phy1>; - phy-mode = "rgmii"; + phy-mode = "rgmii-id"; phy-supply = <®_gmac_3v3>; status = "okay"; }; diff --git a/arch/arm/boot/dts/sun7i-a20-bananapi.dts b/arch/arm/boot/dts/sun7i-a20-bananapi.dts index bb3987e101c2..0b3d9ae75650 100644 --- a/arch/arm/boot/dts/sun7i-a20-bananapi.dts +++ b/arch/arm/boot/dts/sun7i-a20-bananapi.dts @@ -132,7 +132,7 @@ pinctrl-names = "default"; pinctrl-0 = <&gmac_rgmii_pins>; phy-handle = <&phy1>; - phy-mode = "rgmii"; + phy-mode = "rgmii-id"; phy-supply = <®_gmac_3v3>; status = "okay"; }; diff --git a/arch/arm/boot/dts/sun7i-a20-cubietruck.dts b/arch/arm/boot/dts/sun7i-a20-cubietruck.dts index 8c8dee6ea461..9109ca0919ad 100644 --- a/arch/arm/boot/dts/sun7i-a20-cubietruck.dts +++ b/arch/arm/boot/dts/sun7i-a20-cubietruck.dts @@ -151,7 +151,7 @@ pinctrl-names = "default"; pinctrl-0 = <&gmac_rgmii_pins>; phy-handle = <&phy1>; - phy-mode = "rgmii"; + phy-mode = "rgmii-id"; status = "okay"; }; diff --git a/arch/arm/boot/dts/sun7i-a20-pcduino3-nano.dts b/arch/arm/boot/dts/sun7i-a20-pcduino3-nano.dts index fce2f7fcd084..bf38c66c1815 100644 --- a/arch/arm/boot/dts/sun7i-a20-pcduino3-nano.dts +++ b/arch/arm/boot/dts/sun7i-a20-pcduino3-nano.dts @@ -1,5 +1,5 @@ /* - * Copyright 2015 Adam Sampson <ats@offog.org> + * Copyright 2015-2020 Adam Sampson <ats@offog.org> * * This file is dual-licensed: you can use it either under the terms * of the GPL or the X11 license, at your option. Note that this dual @@ -115,7 +115,7 @@ pinctrl-names = "default"; pinctrl-0 = <&gmac_rgmii_pins>; phy-handle = <&phy1>; - phy-mode = "rgmii"; + phy-mode = "rgmii-id"; status = "okay"; }; diff --git a/arch/arm/boot/dts/sun8i-a83t-bananapi-m3.dts b/arch/arm/boot/dts/sun8i-a83t-bananapi-m3.dts index 9d34eabba121..431f70234d36 100644 --- a/arch/arm/boot/dts/sun8i-a83t-bananapi-m3.dts +++ b/arch/arm/boot/dts/sun8i-a83t-bananapi-m3.dts @@ -131,7 +131,7 @@ pinctrl-0 = <&emac_rgmii_pins>; phy-supply = <®_sw>; phy-handle = <&rgmii_phy>; - phy-mode = "rgmii"; + phy-mode = "rgmii-id"; allwinner,rx-delay-ps = <700>; allwinner,tx-delay-ps = <700>; status = "okay"; diff --git a/arch/arm/boot/dts/sun8i-a83t-cubietruck-plus.dts b/arch/arm/boot/dts/sun8i-a83t-cubietruck-plus.dts index d9be511f054f..d8326a5c681d 100644 --- a/arch/arm/boot/dts/sun8i-a83t-cubietruck-plus.dts +++ b/arch/arm/boot/dts/sun8i-a83t-cubietruck-plus.dts @@ -183,7 +183,7 @@ pinctrl-0 = <&emac_rgmii_pins>; phy-supply = <®_dldo4>; phy-handle = <&rgmii_phy>; - phy-mode = "rgmii"; + phy-mode = "rgmii-id"; status = "okay"; }; diff --git a/arch/arm/boot/dts/sun8i-h3-orangepi-pc-plus.dts b/arch/arm/boot/dts/sun8i-h3-orangepi-pc-plus.dts index 71fb73208939..babf4cf1b2f6 100644 --- a/arch/arm/boot/dts/sun8i-h3-orangepi-pc-plus.dts +++ b/arch/arm/boot/dts/sun8i-h3-orangepi-pc-plus.dts @@ -53,11 +53,6 @@ }; }; -&emac { - /* LEDs changed to active high on the plus */ - /delete-property/ allwinner,leds-active-low; -}; - &mmc1 { vmmc-supply = <®_vcc3v3>; bus-width = <4>; diff --git a/arch/arm/boot/dts/sun8i-h3-orangepi-plus2e.dts b/arch/arm/boot/dts/sun8i-h3-orangepi-plus2e.dts index 6dbf7b2e0c13..b6ca45d18e51 100644 --- a/arch/arm/boot/dts/sun8i-h3-orangepi-plus2e.dts +++ b/arch/arm/boot/dts/sun8i-h3-orangepi-plus2e.dts @@ -67,7 +67,7 @@ pinctrl-0 = <&emac_rgmii_pins>; phy-supply = <®_gmac_3v3>; phy-handle = <&ext_rgmii_phy>; - phy-mode = "rgmii"; + phy-mode = "rgmii-id"; status = "okay"; }; diff --git a/arch/arm/boot/dts/sun8i-r40-bananapi-m2-ultra.dts b/arch/arm/boot/dts/sun8i-r40-bananapi-m2-ultra.dts index 2fc62ef0cb3e..a6a1087a0c9b 100644 --- a/arch/arm/boot/dts/sun8i-r40-bananapi-m2-ultra.dts +++ b/arch/arm/boot/dts/sun8i-r40-bananapi-m2-ultra.dts @@ -129,7 +129,7 @@ pinctrl-names = "default"; pinctrl-0 = <&gmac_rgmii_pins>; phy-handle = <&phy1>; - phy-mode = "rgmii"; + phy-mode = "rgmii-id"; phy-supply = <®_dc1sw>; status = "okay"; }; diff --git a/arch/arm/boot/dts/sun8i-s3-pinecube.dts b/arch/arm/boot/dts/sun8i-s3-pinecube.dts index 9bab6b7f4014..4aa0ee897a0a 100644 --- a/arch/arm/boot/dts/sun8i-s3-pinecube.dts +++ b/arch/arm/boot/dts/sun8i-s3-pinecube.dts @@ -10,7 +10,7 @@ / { model = "PineCube IP Camera"; - compatible = "pine64,pinecube", "allwinner,sun8i-s3"; + compatible = "pine64,pinecube", "sochip,s3", "allwinner,sun8i-v3"; aliases { serial0 = &uart2; diff --git a/arch/arm/boot/dts/sun8i-v3s.dtsi b/arch/arm/boot/dts/sun8i-v3s.dtsi index 0c7341676921..89abd4cc7e23 100644 --- a/arch/arm/boot/dts/sun8i-v3s.dtsi +++ b/arch/arm/boot/dts/sun8i-v3s.dtsi @@ -539,7 +539,7 @@ gic: interrupt-controller@1c81000 { compatible = "arm,gic-400"; reg = <0x01c81000 0x1000>, - <0x01c82000 0x1000>, + <0x01c82000 0x2000>, <0x01c84000 0x2000>, <0x01c86000 0x2000>; interrupt-controller; diff --git a/arch/arm/boot/dts/sun8i-v40-bananapi-m2-berry.dts b/arch/arm/boot/dts/sun8i-v40-bananapi-m2-berry.dts index 15c22b06fc4b..47954551f573 100644 --- a/arch/arm/boot/dts/sun8i-v40-bananapi-m2-berry.dts +++ b/arch/arm/boot/dts/sun8i-v40-bananapi-m2-berry.dts @@ -120,7 +120,7 @@ pinctrl-names = "default"; pinctrl-0 = <&gmac_rgmii_pins>; phy-handle = <&phy1>; - phy-mode = "rgmii"; + phy-mode = "rgmii-id"; phy-supply = <®_dc1sw>; status = "okay"; }; @@ -198,16 +198,16 @@ }; ®_dc1sw { - regulator-min-microvolt = <3000000>; - regulator-max-microvolt = <3000000>; + regulator-min-microvolt = <3300000>; + regulator-max-microvolt = <3300000>; regulator-name = "vcc-gmac-phy"; }; ®_dcdc1 { regulator-always-on; - regulator-min-microvolt = <3000000>; - regulator-max-microvolt = <3000000>; - regulator-name = "vcc-3v0"; + regulator-min-microvolt = <3300000>; + regulator-max-microvolt = <3300000>; + regulator-name = "vcc-3v3"; }; ®_dcdc2 { diff --git a/arch/arm/boot/dts/sun9i-a80-cubieboard4.dts b/arch/arm/boot/dts/sun9i-a80-cubieboard4.dts index d3b337b043a1..484b93df20cb 100644 --- a/arch/arm/boot/dts/sun9i-a80-cubieboard4.dts +++ b/arch/arm/boot/dts/sun9i-a80-cubieboard4.dts @@ -129,7 +129,7 @@ pinctrl-names = "default"; pinctrl-0 = <&gmac_rgmii_pins>; phy-handle = <&phy1>; - phy-mode = "rgmii"; + phy-mode = "rgmii-id"; phy-supply = <®_cldo1>; status = "okay"; }; diff --git a/arch/arm/boot/dts/sun9i-a80-optimus.dts b/arch/arm/boot/dts/sun9i-a80-optimus.dts index bbc6335e5631..5c3580d712e4 100644 --- a/arch/arm/boot/dts/sun9i-a80-optimus.dts +++ b/arch/arm/boot/dts/sun9i-a80-optimus.dts @@ -124,7 +124,7 @@ pinctrl-names = "default"; pinctrl-0 = <&gmac_rgmii_pins>; phy-handle = <&phy1>; - phy-mode = "rgmii"; + phy-mode = "rgmii-id"; phy-supply = <®_cldo1>; status = "okay"; }; diff --git a/arch/arm/boot/dts/sunxi-bananapi-m2-plus.dtsi b/arch/arm/boot/dts/sunxi-bananapi-m2-plus.dtsi index 39263e74fbb5..8e5cb3b3fd68 100644 --- a/arch/arm/boot/dts/sunxi-bananapi-m2-plus.dtsi +++ b/arch/arm/boot/dts/sunxi-bananapi-m2-plus.dtsi @@ -126,7 +126,7 @@ pinctrl-0 = <&emac_rgmii_pins>; phy-supply = <®_gmac_3v3>; phy-handle = <&ext_rgmii_phy>; - phy-mode = "rgmii"; + phy-mode = "rgmii-id"; status = "okay"; }; diff --git a/arch/arm/boot/dts/vf610-zii-dev-rev-b.dts b/arch/arm/boot/dts/vf610-zii-dev-rev-b.dts index e500911ce0a5..6f1e0f0d4f0a 100644 --- a/arch/arm/boot/dts/vf610-zii-dev-rev-b.dts +++ b/arch/arm/boot/dts/vf610-zii-dev-rev-b.dts @@ -406,6 +406,9 @@ }; }; +&mdio1 { + clock-frequency = <5000000>; +}; &iomuxc { pinctrl_gpio_e6185_eeprom_sel: pinctrl-gpio-e6185-eeprom-spi0 { diff --git a/arch/arm/configs/imx_v4_v5_defconfig b/arch/arm/configs/imx_v4_v5_defconfig index aeb1209e0804..bb70acc6b526 100644 --- a/arch/arm/configs/imx_v4_v5_defconfig +++ b/arch/arm/configs/imx_v4_v5_defconfig @@ -93,6 +93,7 @@ CONFIG_SPI=y CONFIG_SPI_IMX=y CONFIG_SPI_SPIDEV=y CONFIG_GPIO_SYSFS=y +CONFIG_GPIO_MXC=y CONFIG_W1=y CONFIG_W1_MASTER_MXC=y CONFIG_W1_SLAVE_THERM=y diff --git a/arch/arm/configs/imx_v6_v7_defconfig b/arch/arm/configs/imx_v6_v7_defconfig index 0fa79bd00219..221f5c340c86 100644 --- a/arch/arm/configs/imx_v6_v7_defconfig +++ b/arch/arm/configs/imx_v6_v7_defconfig @@ -217,6 +217,7 @@ CONFIG_GPIO_PCA953X=y CONFIG_GPIO_PCF857X=y CONFIG_GPIO_STMPE=y CONFIG_GPIO_74X164=y +CONFIG_GPIO_MXC=y CONFIG_POWER_RESET=y CONFIG_POWER_RESET_SYSCON=y CONFIG_POWER_RESET_SYSCON_POWEROFF=y diff --git a/arch/arm/configs/multi_v5_defconfig b/arch/arm/configs/multi_v5_defconfig index 70b709a669d2..e00be9faa23b 100644 --- a/arch/arm/configs/multi_v5_defconfig +++ b/arch/arm/configs/multi_v5_defconfig @@ -166,6 +166,7 @@ CONFIG_SPI_IMX=y CONFIG_SPI_ORION=y CONFIG_GPIO_ASPEED=m CONFIG_GPIO_ASPEED_SGPIO=y +CONFIG_GPIO_MXC=y CONFIG_POWER_RESET=y CONFIG_POWER_RESET_GPIO=y CONFIG_POWER_RESET_QNAP=y diff --git a/arch/arm/configs/multi_v7_defconfig b/arch/arm/configs/multi_v7_defconfig index e731cdf7c88c..a611b0c1e540 100644 --- a/arch/arm/configs/multi_v7_defconfig +++ b/arch/arm/configs/multi_v7_defconfig @@ -465,6 +465,7 @@ CONFIG_GPIO_PALMAS=y CONFIG_GPIO_TPS6586X=y CONFIG_GPIO_TPS65910=y CONFIG_GPIO_TWL4030=y +CONFIG_GPIO_MXC=y CONFIG_POWER_AVS=y CONFIG_ROCKCHIP_IODOMAIN=y CONFIG_POWER_RESET_AS3722=y diff --git a/arch/arm/configs/omap2plus_defconfig b/arch/arm/configs/omap2plus_defconfig index 34793aabdb65..58df9fd79a76 100644 --- a/arch/arm/configs/omap2plus_defconfig +++ b/arch/arm/configs/omap2plus_defconfig @@ -81,7 +81,6 @@ CONFIG_PARTITION_ADVANCED=y CONFIG_BINFMT_MISC=y CONFIG_CMA=y CONFIG_ZSMALLOC=m -CONFIG_ZSMALLOC_PGTABLE_MAPPING=y CONFIG_NET=y CONFIG_PACKET=y CONFIG_UNIX=y diff --git a/arch/arm/crypto/aes-ce-core.S b/arch/arm/crypto/aes-ce-core.S index 4d1707388d94..312428d83eed 100644 --- a/arch/arm/crypto/aes-ce-core.S +++ b/arch/arm/crypto/aes-ce-core.S @@ -386,20 +386,32 @@ ENTRY(ce_aes_ctr_encrypt) .Lctrloop4x: subs r4, r4, #4 bmi .Lctr1x - add r6, r6, #1 + + /* + * NOTE: the sequence below has been carefully tweaked to avoid + * a silicon erratum that exists in Cortex-A57 (#1742098) and + * Cortex-A72 (#1655431) cores, where AESE/AESMC instruction pairs + * may produce an incorrect result if they take their input from a + * register of which a single 32-bit lane has been updated the last + * time it was modified. To work around this, the lanes of registers + * q0-q3 below are not manipulated individually, and the different + * counter values are prepared by successive manipulations of q7. + */ + add ip, r6, #1 vmov q0, q7 + rev ip, ip + add lr, r6, #2 + vmov s31, ip @ set lane 3 of q1 via q7 + add ip, r6, #3 + rev lr, lr vmov q1, q7 - rev ip, r6 - add r6, r6, #1 + vmov s31, lr @ set lane 3 of q2 via q7 + rev ip, ip vmov q2, q7 - vmov s7, ip - rev ip, r6 - add r6, r6, #1 + vmov s31, ip @ set lane 3 of q3 via q7 + add r6, r6, #4 vmov q3, q7 - vmov s11, ip - rev ip, r6 - add r6, r6, #1 - vmov s15, ip + vld1.8 {q4-q5}, [r1]! vld1.8 {q6}, [r1]! vld1.8 {q15}, [r1]! diff --git a/arch/arm/crypto/aes-neonbs-glue.c b/arch/arm/crypto/aes-neonbs-glue.c index bda8bf17631e..f70af1d0514b 100644 --- a/arch/arm/crypto/aes-neonbs-glue.c +++ b/arch/arm/crypto/aes-neonbs-glue.c @@ -19,7 +19,7 @@ MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>"); MODULE_LICENSE("GPL v2"); MODULE_ALIAS_CRYPTO("ecb(aes)"); -MODULE_ALIAS_CRYPTO("cbc(aes)"); +MODULE_ALIAS_CRYPTO("cbc(aes)-all"); MODULE_ALIAS_CRYPTO("ctr(aes)"); MODULE_ALIAS_CRYPTO("xts(aes)"); @@ -191,7 +191,8 @@ static int cbc_init(struct crypto_skcipher *tfm) struct aesbs_cbc_ctx *ctx = crypto_skcipher_ctx(tfm); unsigned int reqsize; - ctx->enc_tfm = crypto_alloc_skcipher("cbc(aes)", 0, CRYPTO_ALG_ASYNC); + ctx->enc_tfm = crypto_alloc_skcipher("cbc(aes)", 0, CRYPTO_ALG_ASYNC | + CRYPTO_ALG_NEED_FALLBACK); if (IS_ERR(ctx->enc_tfm)) return PTR_ERR(ctx->enc_tfm); @@ -441,7 +442,8 @@ static struct skcipher_alg aes_algs[] = { { .base.cra_blocksize = AES_BLOCK_SIZE, .base.cra_ctxsize = sizeof(struct aesbs_cbc_ctx), .base.cra_module = THIS_MODULE, - .base.cra_flags = CRYPTO_ALG_INTERNAL, + .base.cra_flags = CRYPTO_ALG_INTERNAL | + CRYPTO_ALG_NEED_FALLBACK, .min_keysize = AES_MIN_KEY_SIZE, .max_keysize = AES_MAX_KEY_SIZE, diff --git a/arch/arm/crypto/chacha-glue.c b/arch/arm/crypto/chacha-glue.c index 59da6c0b63b6..7b5cf8430c6d 100644 --- a/arch/arm/crypto/chacha-glue.c +++ b/arch/arm/crypto/chacha-glue.c @@ -23,7 +23,7 @@ asmlinkage void chacha_block_xor_neon(const u32 *state, u8 *dst, const u8 *src, int nrounds); asmlinkage void chacha_4block_xor_neon(const u32 *state, u8 *dst, const u8 *src, - int nrounds); + int nrounds, unsigned int nbytes); asmlinkage void hchacha_block_arm(const u32 *state, u32 *out, int nrounds); asmlinkage void hchacha_block_neon(const u32 *state, u32 *out, int nrounds); @@ -42,24 +42,24 @@ static void chacha_doneon(u32 *state, u8 *dst, const u8 *src, { u8 buf[CHACHA_BLOCK_SIZE]; - while (bytes >= CHACHA_BLOCK_SIZE * 4) { - chacha_4block_xor_neon(state, dst, src, nrounds); - bytes -= CHACHA_BLOCK_SIZE * 4; - src += CHACHA_BLOCK_SIZE * 4; - dst += CHACHA_BLOCK_SIZE * 4; - state[12] += 4; - } - while (bytes >= CHACHA_BLOCK_SIZE) { - chacha_block_xor_neon(state, dst, src, nrounds); - bytes -= CHACHA_BLOCK_SIZE; - src += CHACHA_BLOCK_SIZE; - dst += CHACHA_BLOCK_SIZE; - state[12]++; + while (bytes > CHACHA_BLOCK_SIZE) { + unsigned int l = min(bytes, CHACHA_BLOCK_SIZE * 4U); + + chacha_4block_xor_neon(state, dst, src, nrounds, l); + bytes -= l; + src += l; + dst += l; + state[12] += DIV_ROUND_UP(l, CHACHA_BLOCK_SIZE); } if (bytes) { - memcpy(buf, src, bytes); - chacha_block_xor_neon(state, buf, buf, nrounds); - memcpy(dst, buf, bytes); + const u8 *s = src; + u8 *d = dst; + + if (bytes != CHACHA_BLOCK_SIZE) + s = d = memcpy(buf, src, bytes); + chacha_block_xor_neon(state, d, s, nrounds); + if (d != dst) + memcpy(dst, buf, bytes); } } diff --git a/arch/arm/crypto/chacha-neon-core.S b/arch/arm/crypto/chacha-neon-core.S index eb22926d4912..13d12f672656 100644 --- a/arch/arm/crypto/chacha-neon-core.S +++ b/arch/arm/crypto/chacha-neon-core.S @@ -47,6 +47,7 @@ */ #include <linux/linkage.h> +#include <asm/cache.h> .text .fpu neon @@ -205,7 +206,7 @@ ENDPROC(hchacha_block_neon) .align 5 ENTRY(chacha_4block_xor_neon) - push {r4-r5} + push {r4, lr} mov r4, sp // preserve the stack pointer sub ip, sp, #0x20 // allocate a 32 byte buffer bic ip, ip, #0x1f // aligned to 32 bytes @@ -229,10 +230,10 @@ ENTRY(chacha_4block_xor_neon) vld1.32 {q0-q1}, [r0] vld1.32 {q2-q3}, [ip] - adr r5, .Lctrinc + adr lr, .Lctrinc vdup.32 q15, d7[1] vdup.32 q14, d7[0] - vld1.32 {q4}, [r5, :128] + vld1.32 {q4}, [lr, :128] vdup.32 q13, d6[1] vdup.32 q12, d6[0] vdup.32 q11, d5[1] @@ -455,7 +456,7 @@ ENTRY(chacha_4block_xor_neon) // Re-interleave the words in the first two rows of each block (x0..7). // Also add the counter values 0-3 to x12[0-3]. - vld1.32 {q8}, [r5, :128] // load counter values 0-3 + vld1.32 {q8}, [lr, :128] // load counter values 0-3 vzip.32 q0, q1 // => (0 1 0 1) (0 1 0 1) vzip.32 q2, q3 // => (2 3 2 3) (2 3 2 3) vzip.32 q4, q5 // => (4 5 4 5) (4 5 4 5) @@ -493,6 +494,8 @@ ENTRY(chacha_4block_xor_neon) // Re-interleave the words in the last two rows of each block (x8..15). vld1.32 {q8-q9}, [sp, :256] + mov sp, r4 // restore original stack pointer + ldr r4, [r4, #8] // load number of bytes vzip.32 q12, q13 // => (12 13 12 13) (12 13 12 13) vzip.32 q14, q15 // => (14 15 14 15) (14 15 14 15) vzip.32 q8, q9 // => (8 9 8 9) (8 9 8 9) @@ -520,41 +523,121 @@ ENTRY(chacha_4block_xor_neon) // XOR the rest of the data with the keystream vld1.8 {q0-q1}, [r2]! + subs r4, r4, #96 veor q0, q0, q8 veor q1, q1, q12 + ble .Lle96 vst1.8 {q0-q1}, [r1]! vld1.8 {q0-q1}, [r2]! + subs r4, r4, #32 veor q0, q0, q2 veor q1, q1, q6 + ble .Lle128 vst1.8 {q0-q1}, [r1]! vld1.8 {q0-q1}, [r2]! + subs r4, r4, #32 veor q0, q0, q10 veor q1, q1, q14 + ble .Lle160 vst1.8 {q0-q1}, [r1]! vld1.8 {q0-q1}, [r2]! + subs r4, r4, #32 veor q0, q0, q4 veor q1, q1, q5 + ble .Lle192 vst1.8 {q0-q1}, [r1]! vld1.8 {q0-q1}, [r2]! + subs r4, r4, #32 veor q0, q0, q9 veor q1, q1, q13 + ble .Lle224 vst1.8 {q0-q1}, [r1]! vld1.8 {q0-q1}, [r2]! + subs r4, r4, #32 veor q0, q0, q3 veor q1, q1, q7 + blt .Llt256 +.Lout: vst1.8 {q0-q1}, [r1]! vld1.8 {q0-q1}, [r2] - mov sp, r4 // restore original stack pointer veor q0, q0, q11 veor q1, q1, q15 vst1.8 {q0-q1}, [r1] - pop {r4-r5} - bx lr + pop {r4, pc} + +.Lle192: + vmov q4, q9 + vmov q5, q13 + +.Lle160: + // nothing to do + +.Lfinalblock: + // Process the final block if processing less than 4 full blocks. + // Entered with 32 bytes of ChaCha cipher stream in q4-q5, and the + // previous 32 byte output block that still needs to be written at + // [r1] in q0-q1. + beq .Lfullblock + +.Lpartialblock: + adr lr, .Lpermute + 32 + add r2, r2, r4 + add lr, lr, r4 + add r4, r4, r1 + + vld1.8 {q2-q3}, [lr] + vld1.8 {q6-q7}, [r2] + + add r4, r4, #32 + + vtbl.8 d4, {q4-q5}, d4 + vtbl.8 d5, {q4-q5}, d5 + vtbl.8 d6, {q4-q5}, d6 + vtbl.8 d7, {q4-q5}, d7 + + veor q6, q6, q2 + veor q7, q7, q3 + + vst1.8 {q6-q7}, [r4] // overlapping stores + vst1.8 {q0-q1}, [r1] + pop {r4, pc} + +.Lfullblock: + vmov q11, q4 + vmov q15, q5 + b .Lout +.Lle96: + vmov q4, q2 + vmov q5, q6 + b .Lfinalblock +.Lle128: + vmov q4, q10 + vmov q5, q14 + b .Lfinalblock +.Lle224: + vmov q4, q3 + vmov q5, q7 + b .Lfinalblock +.Llt256: + vmov q4, q11 + vmov q5, q15 + b .Lpartialblock ENDPROC(chacha_4block_xor_neon) + + .align L1_CACHE_SHIFT +.Lpermute: + .byte 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07 + .byte 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f + .byte 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17 + .byte 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f + .byte 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07 + .byte 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f + .byte 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17 + .byte 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f diff --git a/arch/arm/crypto/sha1-ce-glue.c b/arch/arm/crypto/sha1-ce-glue.c index e79b1fb4b4dc..de9100c67b37 100644 --- a/arch/arm/crypto/sha1-ce-glue.c +++ b/arch/arm/crypto/sha1-ce-glue.c @@ -7,7 +7,7 @@ #include <crypto/internal/hash.h> #include <crypto/internal/simd.h> -#include <crypto/sha.h> +#include <crypto/sha1.h> #include <crypto/sha1_base.h> #include <linux/cpufeature.h> #include <linux/crypto.h> diff --git a/arch/arm/crypto/sha1.h b/arch/arm/crypto/sha1.h index 758db3e9ff0a..b1b7e21da2c3 100644 --- a/arch/arm/crypto/sha1.h +++ b/arch/arm/crypto/sha1.h @@ -3,7 +3,7 @@ #define ASM_ARM_CRYPTO_SHA1_H #include <linux/crypto.h> -#include <crypto/sha.h> +#include <crypto/sha1.h> extern int sha1_update_arm(struct shash_desc *desc, const u8 *data, unsigned int len); diff --git a/arch/arm/crypto/sha1_glue.c b/arch/arm/crypto/sha1_glue.c index 4e954b3f7ecd..6c2b849e459d 100644 --- a/arch/arm/crypto/sha1_glue.c +++ b/arch/arm/crypto/sha1_glue.c @@ -15,7 +15,7 @@ #include <linux/init.h> #include <linux/module.h> #include <linux/types.h> -#include <crypto/sha.h> +#include <crypto/sha1.h> #include <crypto/sha1_base.h> #include <asm/byteorder.h> diff --git a/arch/arm/crypto/sha1_neon_glue.c b/arch/arm/crypto/sha1_neon_glue.c index 0071e5e4411a..cfe36ae0f3f5 100644 --- a/arch/arm/crypto/sha1_neon_glue.c +++ b/arch/arm/crypto/sha1_neon_glue.c @@ -19,7 +19,7 @@ #include <linux/module.h> #include <linux/mm.h> #include <linux/types.h> -#include <crypto/sha.h> +#include <crypto/sha1.h> #include <crypto/sha1_base.h> #include <asm/neon.h> #include <asm/simd.h> diff --git a/arch/arm/crypto/sha2-ce-glue.c b/arch/arm/crypto/sha2-ce-glue.c index 87f0b62386c6..c62ce89dd3e0 100644 --- a/arch/arm/crypto/sha2-ce-glue.c +++ b/arch/arm/crypto/sha2-ce-glue.c @@ -7,7 +7,7 @@ #include <crypto/internal/hash.h> #include <crypto/internal/simd.h> -#include <crypto/sha.h> +#include <crypto/sha2.h> #include <crypto/sha256_base.h> #include <linux/cpufeature.h> #include <linux/crypto.h> diff --git a/arch/arm/crypto/sha256_glue.c b/arch/arm/crypto/sha256_glue.c index b8a4f79020cf..433ee4ddce6c 100644 --- a/arch/arm/crypto/sha256_glue.c +++ b/arch/arm/crypto/sha256_glue.c @@ -17,7 +17,7 @@ #include <linux/mm.h> #include <linux/types.h> #include <linux/string.h> -#include <crypto/sha.h> +#include <crypto/sha2.h> #include <crypto/sha256_base.h> #include <asm/simd.h> #include <asm/neon.h> diff --git a/arch/arm/crypto/sha256_neon_glue.c b/arch/arm/crypto/sha256_neon_glue.c index 79820b9e2541..701706262ef3 100644 --- a/arch/arm/crypto/sha256_neon_glue.c +++ b/arch/arm/crypto/sha256_neon_glue.c @@ -13,7 +13,7 @@ #include <crypto/internal/simd.h> #include <linux/types.h> #include <linux/string.h> -#include <crypto/sha.h> +#include <crypto/sha2.h> #include <crypto/sha256_base.h> #include <asm/byteorder.h> #include <asm/simd.h> diff --git a/arch/arm/crypto/sha512-glue.c b/arch/arm/crypto/sha512-glue.c index 8775aa42bbbe..0635a65aa488 100644 --- a/arch/arm/crypto/sha512-glue.c +++ b/arch/arm/crypto/sha512-glue.c @@ -6,7 +6,7 @@ */ #include <crypto/internal/hash.h> -#include <crypto/sha.h> +#include <crypto/sha2.h> #include <crypto/sha512_base.h> #include <linux/crypto.h> #include <linux/module.h> diff --git a/arch/arm/crypto/sha512-neon-glue.c b/arch/arm/crypto/sha512-neon-glue.c index 96cb94403540..c879ad32db51 100644 --- a/arch/arm/crypto/sha512-neon-glue.c +++ b/arch/arm/crypto/sha512-neon-glue.c @@ -7,7 +7,7 @@ #include <crypto/internal/hash.h> #include <crypto/internal/simd.h> -#include <crypto/sha.h> +#include <crypto/sha2.h> #include <crypto/sha512_base.h> #include <linux/crypto.h> #include <linux/module.h> diff --git a/arch/arm/include/asm/kprobes.h b/arch/arm/include/asm/kprobes.h index 213607a1f45c..e26a278d301a 100644 --- a/arch/arm/include/asm/kprobes.h +++ b/arch/arm/include/asm/kprobes.h @@ -44,20 +44,20 @@ int kprobe_exceptions_notify(struct notifier_block *self, unsigned long val, void *data); /* optinsn template addresses */ -extern __visible kprobe_opcode_t optprobe_template_entry; -extern __visible kprobe_opcode_t optprobe_template_val; -extern __visible kprobe_opcode_t optprobe_template_call; -extern __visible kprobe_opcode_t optprobe_template_end; -extern __visible kprobe_opcode_t optprobe_template_sub_sp; -extern __visible kprobe_opcode_t optprobe_template_add_sp; -extern __visible kprobe_opcode_t optprobe_template_restore_begin; -extern __visible kprobe_opcode_t optprobe_template_restore_orig_insn; -extern __visible kprobe_opcode_t optprobe_template_restore_end; +extern __visible kprobe_opcode_t optprobe_template_entry[]; +extern __visible kprobe_opcode_t optprobe_template_val[]; +extern __visible kprobe_opcode_t optprobe_template_call[]; +extern __visible kprobe_opcode_t optprobe_template_end[]; +extern __visible kprobe_opcode_t optprobe_template_sub_sp[]; +extern __visible kprobe_opcode_t optprobe_template_add_sp[]; +extern __visible kprobe_opcode_t optprobe_template_restore_begin[]; +extern __visible kprobe_opcode_t optprobe_template_restore_orig_insn[]; +extern __visible kprobe_opcode_t optprobe_template_restore_end[]; #define MAX_OPTIMIZED_LENGTH 4 #define MAX_OPTINSN_SIZE \ - ((unsigned long)&optprobe_template_end - \ - (unsigned long)&optprobe_template_entry) + ((unsigned long)optprobe_template_end - \ + (unsigned long)optprobe_template_entry) #define RELATIVEJUMP_SIZE 4 struct arch_optimized_insn { diff --git a/arch/arm/include/asm/pgtable-2level.h b/arch/arm/include/asm/pgtable-2level.h index 3502c2f746ca..baf7d0204eb5 100644 --- a/arch/arm/include/asm/pgtable-2level.h +++ b/arch/arm/include/asm/pgtable-2level.h @@ -75,6 +75,8 @@ #define PTE_HWTABLE_OFF (PTE_HWTABLE_PTRS * sizeof(pte_t)) #define PTE_HWTABLE_SIZE (PTRS_PER_PTE * sizeof(u32)) +#define MAX_POSSIBLE_PHYSMEM_BITS 32 + /* * PMD_SHIFT determines the size of the area a second-level page table can map * PGDIR_SHIFT determines what a third-level page table entry can map diff --git a/arch/arm/include/asm/pgtable-3level.h b/arch/arm/include/asm/pgtable-3level.h index fbb6693c3352..2b85d175e999 100644 --- a/arch/arm/include/asm/pgtable-3level.h +++ b/arch/arm/include/asm/pgtable-3level.h @@ -25,6 +25,8 @@ #define PTE_HWTABLE_OFF (0) #define PTE_HWTABLE_SIZE (PTRS_PER_PTE * sizeof(u64)) +#define MAX_POSSIBLE_PHYSMEM_BITS 40 + /* * PGDIR_SHIFT determines the size a top-level page table entry can map. */ diff --git a/arch/arm/kernel/perf_regs.c b/arch/arm/kernel/perf_regs.c index 05fe92aa7d98..0529f90395c9 100644 --- a/arch/arm/kernel/perf_regs.c +++ b/arch/arm/kernel/perf_regs.c @@ -32,8 +32,7 @@ u64 perf_reg_abi(struct task_struct *task) } void perf_get_regs_user(struct perf_regs *regs_user, - struct pt_regs *regs, - struct pt_regs *regs_user_copy) + struct pt_regs *regs) { regs_user->regs = task_pt_regs(current); regs_user->abi = perf_reg_abi(current); diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c index 8e6ace03e960..9f199b1e8383 100644 --- a/arch/arm/kernel/process.c +++ b/arch/arm/kernel/process.c @@ -71,7 +71,7 @@ void arch_cpu_idle(void) arm_pm_idle(); else cpu_do_idle(); - local_irq_enable(); + raw_local_irq_enable(); } void arch_cpu_idle_prepare(void) diff --git a/arch/arm/kernel/vmlinux.lds.S b/arch/arm/kernel/vmlinux.lds.S index 5f4922e858d0..f7f4620d59c3 100644 --- a/arch/arm/kernel/vmlinux.lds.S +++ b/arch/arm/kernel/vmlinux.lds.S @@ -41,6 +41,10 @@ SECTIONS #ifndef CONFIG_SMP_ON_UP *(.alt.smp.init) #endif +#ifndef CONFIG_ARM_UNWIND + *(.ARM.exidx) *(.ARM.exidx.*) + *(.ARM.extab) *(.ARM.extab.*) +#endif } . = PAGE_OFFSET + TEXT_OFFSET; diff --git a/arch/arm/mach-imx/anatop.c b/arch/arm/mach-imx/anatop.c index d841bed8664d..7bb47eb3fc07 100644 --- a/arch/arm/mach-imx/anatop.c +++ b/arch/arm/mach-imx/anatop.c @@ -136,7 +136,7 @@ void __init imx_init_revision_from_anatop(void) src_np = of_find_compatible_node(NULL, NULL, "fsl,imx6ul-src"); - src_base = of_iomap(np, 0); + src_base = of_iomap(src_np, 0); of_node_put(src_np); WARN_ON(!src_base); sbmr2 = readl_relaxed(src_base + SRC_SBMR2); diff --git a/arch/arm/mach-keystone/memory.h b/arch/arm/mach-keystone/memory.h index 9147565d0581..1b9ed1271e05 100644 --- a/arch/arm/mach-keystone/memory.h +++ b/arch/arm/mach-keystone/memory.h @@ -6,9 +6,6 @@ #ifndef __MEMORY_H #define __MEMORY_H -#define MAX_PHYSMEM_BITS 36 -#define SECTION_SIZE_BITS 34 - #define KEYSTONE_LOW_PHYS_START 0x80000000ULL #define KEYSTONE_LOW_PHYS_SIZE 0x80000000ULL /* 2G */ #define KEYSTONE_LOW_PHYS_END (KEYSTONE_LOW_PHYS_START + \ diff --git a/arch/arm/mach-mvebu/coherency_ll.S b/arch/arm/mach-mvebu/coherency_ll.S index 2d962fe48821..a3a64bf97250 100644 --- a/arch/arm/mach-mvebu/coherency_ll.S +++ b/arch/arm/mach-mvebu/coherency_ll.S @@ -35,13 +35,8 @@ ENTRY(ll_get_coherency_base) /* * MMU is disabled, use the physical address of the coherency - * base address. However, if the coherency fabric isn't mapped - * (i.e its virtual address is zero), it means coherency is - * not enabled, so we return 0. + * base address, (or 0x0 if the coherency fabric is not mapped) */ - ldr r1, =coherency_base - cmp r1, #0 - beq 2f adr r1, 3f ldr r3, [r1] ldr r1, [r1, r3] diff --git a/arch/arm/mach-omap1/board-osk.c b/arch/arm/mach-omap1/board-osk.c index 144b9caa935c..a720259099ed 100644 --- a/arch/arm/mach-omap1/board-osk.c +++ b/arch/arm/mach-omap1/board-osk.c @@ -288,7 +288,7 @@ static struct gpiod_lookup_table osk_usb_gpio_table = { .dev_id = "ohci", .table = { /* Power GPIO on the I2C-attached TPS65010 */ - GPIO_LOOKUP("i2c-tps65010", 1, "power", GPIO_ACTIVE_HIGH), + GPIO_LOOKUP("tps65010", 0, "power", GPIO_ACTIVE_HIGH), GPIO_LOOKUP(OMAP_GPIO_LABEL, 9, "overcurrent", GPIO_ACTIVE_HIGH), }, diff --git a/arch/arm/mach-omap2/Kconfig b/arch/arm/mach-omap2/Kconfig index 3ee7bdff86b2..3f62a0c9450d 100644 --- a/arch/arm/mach-omap2/Kconfig +++ b/arch/arm/mach-omap2/Kconfig @@ -7,7 +7,6 @@ config ARCH_OMAP2 depends on ARCH_MULTI_V6 select ARCH_OMAP2PLUS select CPU_V6 - select PM_GENERIC_DOMAINS if PM select SOC_HAS_OMAP2_SDRC config ARCH_OMAP3 @@ -106,6 +105,8 @@ config ARCH_OMAP2PLUS select OMAP_DM_TIMER select OMAP_GPMC select PINCTRL + select PM_GENERIC_DOMAINS if PM + select PM_GENERIC_DOMAINS_OF if PM select RESET_CONTROLLER select SOC_BUS select TI_SYSC diff --git a/arch/arm/mach-omap2/cpuidle44xx.c b/arch/arm/mach-omap2/cpuidle44xx.c index a92d277f81a0..c8d317fafe2e 100644 --- a/arch/arm/mach-omap2/cpuidle44xx.c +++ b/arch/arm/mach-omap2/cpuidle44xx.c @@ -175,8 +175,11 @@ static int omap_enter_idle_coupled(struct cpuidle_device *dev, if (mpuss_can_lose_context) { error = cpu_cluster_pm_enter(); if (error) { - omap_set_pwrdm_state(mpu_pd, PWRDM_POWER_ON); - goto cpu_cluster_pm_out; + index = 0; + cx = state_ptr + index; + pwrdm_set_logic_retst(mpu_pd, cx->mpu_logic_state); + omap_set_pwrdm_state(mpu_pd, cx->mpu_state); + mpuss_can_lose_context = 0; } } } @@ -184,7 +187,6 @@ static int omap_enter_idle_coupled(struct cpuidle_device *dev, omap4_enter_lowpower(dev->cpu, cx->cpu_state); cpu_done[dev->cpu] = true; -cpu_cluster_pm_out: /* Wakeup CPU1 only if it is not offlined */ if (dev->cpu == 0 && cpumask_test_cpu(1, cpu_online_mask)) { diff --git a/arch/arm/mach-sunxi/sunxi.c b/arch/arm/mach-sunxi/sunxi.c index 06da2747a90b..19635721013d 100644 --- a/arch/arm/mach-sunxi/sunxi.c +++ b/arch/arm/mach-sunxi/sunxi.c @@ -66,6 +66,7 @@ static const char * const sun8i_board_dt_compat[] = { "allwinner,sun8i-h2-plus", "allwinner,sun8i-h3", "allwinner,sun8i-r40", + "allwinner,sun8i-v3", "allwinner,sun8i-v3s", NULL, }; diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c index d57112a276f5..c23dbf8bebee 100644 --- a/arch/arm/mm/init.c +++ b/arch/arm/mm/init.c @@ -354,8 +354,8 @@ static void __init free_highpages(void) /* set highmem page free */ for_each_free_mem_range(i, NUMA_NO_NODE, MEMBLOCK_NONE, &range_start, &range_end, NULL) { - unsigned long start = PHYS_PFN(range_start); - unsigned long end = PHYS_PFN(range_end); + unsigned long start = PFN_UP(range_start); + unsigned long end = PFN_DOWN(range_end); /* Ignore complete lowmem entries */ if (end <= max_low) diff --git a/arch/arm/probes/kprobes/opt-arm.c b/arch/arm/probes/kprobes/opt-arm.c index 7a449df0b359..c78180172120 100644 --- a/arch/arm/probes/kprobes/opt-arm.c +++ b/arch/arm/probes/kprobes/opt-arm.c @@ -85,21 +85,21 @@ asm ( "optprobe_template_end:\n"); #define TMPL_VAL_IDX \ - ((unsigned long *)&optprobe_template_val - (unsigned long *)&optprobe_template_entry) + ((unsigned long *)optprobe_template_val - (unsigned long *)optprobe_template_entry) #define TMPL_CALL_IDX \ - ((unsigned long *)&optprobe_template_call - (unsigned long *)&optprobe_template_entry) + ((unsigned long *)optprobe_template_call - (unsigned long *)optprobe_template_entry) #define TMPL_END_IDX \ - ((unsigned long *)&optprobe_template_end - (unsigned long *)&optprobe_template_entry) + ((unsigned long *)optprobe_template_end - (unsigned long *)optprobe_template_entry) #define TMPL_ADD_SP \ - ((unsigned long *)&optprobe_template_add_sp - (unsigned long *)&optprobe_template_entry) + ((unsigned long *)optprobe_template_add_sp - (unsigned long *)optprobe_template_entry) #define TMPL_SUB_SP \ - ((unsigned long *)&optprobe_template_sub_sp - (unsigned long *)&optprobe_template_entry) + ((unsigned long *)optprobe_template_sub_sp - (unsigned long *)optprobe_template_entry) #define TMPL_RESTORE_BEGIN \ - ((unsigned long *)&optprobe_template_restore_begin - (unsigned long *)&optprobe_template_entry) + ((unsigned long *)optprobe_template_restore_begin - (unsigned long *)optprobe_template_entry) #define TMPL_RESTORE_ORIGN_INSN \ - ((unsigned long *)&optprobe_template_restore_orig_insn - (unsigned long *)&optprobe_template_entry) + ((unsigned long *)optprobe_template_restore_orig_insn - (unsigned long *)optprobe_template_entry) #define TMPL_RESTORE_END \ - ((unsigned long *)&optprobe_template_restore_end - (unsigned long *)&optprobe_template_entry) + ((unsigned long *)optprobe_template_restore_end - (unsigned long *)optprobe_template_entry) /* * ARM can always optimize an instruction when using ARM ISA, except @@ -234,7 +234,7 @@ int arch_prepare_optimized_kprobe(struct optimized_kprobe *op, struct kprobe *or } /* Copy arch-dep-instance from template. */ - memcpy(code, (unsigned long *)&optprobe_template_entry, + memcpy(code, (unsigned long *)optprobe_template_entry, TMPL_END_IDX * sizeof(kprobe_opcode_t)); /* Adjust buffer according to instruction. */ diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index f858c352f72a..a6b5b7ef40ae 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -81,6 +81,7 @@ config ARM64 select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT select ARCH_WANT_FRAME_POINTERS select ARCH_WANT_HUGE_PMD_SHARE if ARM64_4K_PAGES || (ARM64_16K_PAGES && !ARM64_VA_BITS_36) + select ARCH_WANT_LD_ORPHAN_WARN select ARCH_HAS_UBSAN_SANITIZE_ALL select ARM_AMBA select ARM_ARCH_TIMER @@ -636,6 +637,26 @@ config ARM64_ERRATUM_1542419 If unsure, say Y. +config ARM64_ERRATUM_1508412 + bool "Cortex-A77: 1508412: workaround deadlock on sequence of NC/Device load and store exclusive or PAR read" + default y + help + This option adds a workaround for Arm Cortex-A77 erratum 1508412. + + Affected Cortex-A77 cores (r0p0, r1p0) could deadlock on a sequence + of a store-exclusive or read of PAR_EL1 and a load with device or + non-cacheable memory attributes. The workaround depends on a firmware + counterpart. + + KVM guests must also have the workaround implemented or they can + deadlock the system. + + Work around the issue by inserting DMB SY barriers around PAR_EL1 + register reads and warning KVM users. The DMB barrier is sufficient + to prevent a speculative PAR_EL1 read. + + If unsure, say Y. + config CAVIUM_ERRATUM_22375 bool "Cavium erratum 22375, 24313" default y @@ -982,7 +1003,7 @@ config NUMA config NODES_SHIFT int "Maximum NUMA Nodes (as a power of 2)" range 1 10 - default "2" + default "4" depends on NEED_MULTIPLE_NODES help Specify the maximum number of NUMA Nodes available on the target diff --git a/arch/arm64/Kconfig.platforms b/arch/arm64/Kconfig.platforms index 6f2494dd6d60..5c4ac1c9f4e0 100644 --- a/arch/arm64/Kconfig.platforms +++ b/arch/arm64/Kconfig.platforms @@ -54,6 +54,7 @@ config ARCH_BCM_IPROC config ARCH_BERLIN bool "Marvell Berlin SoC Family" select DW_APB_ICTL + select DW_APB_TIMER_OF select GPIOLIB select PINCTRL help diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile index 5789c2d18d43..6a87d592bd00 100644 --- a/arch/arm64/Makefile +++ b/arch/arm64/Makefile @@ -28,10 +28,6 @@ LDFLAGS_vmlinux += --fix-cortex-a53-843419 endif endif -# We never want expected sections to be placed heuristically by the -# linker. All sections should be explicitly named in the linker script. -LDFLAGS_vmlinux += $(call ld-option, --orphan-handling=warn) - ifeq ($(CONFIG_ARM64_USE_LSE_ATOMICS), y) ifneq ($(CONFIG_ARM64_LSE_ATOMICS), y) $(warning LSE atomics not supported by binutils) diff --git a/arch/arm64/boot/dts/allwinner/sun50i-a64-bananapi-m64.dts b/arch/arm64/boot/dts/allwinner/sun50i-a64-bananapi-m64.dts index 3ea5182ca489..e5e840b9fbb4 100644 --- a/arch/arm64/boot/dts/allwinner/sun50i-a64-bananapi-m64.dts +++ b/arch/arm64/boot/dts/allwinner/sun50i-a64-bananapi-m64.dts @@ -105,7 +105,7 @@ &emac { pinctrl-names = "default"; pinctrl-0 = <&rgmii_pins>; - phy-mode = "rgmii"; + phy-mode = "rgmii-id"; phy-handle = <&ext_rgmii_phy>; phy-supply = <®_dc1sw>; status = "okay"; diff --git a/arch/arm64/boot/dts/allwinner/sun50i-a64-orangepi-win.dts b/arch/arm64/boot/dts/allwinner/sun50i-a64-orangepi-win.dts index d894ec5fa8a1..70e31743f0ba 100644 --- a/arch/arm64/boot/dts/allwinner/sun50i-a64-orangepi-win.dts +++ b/arch/arm64/boot/dts/allwinner/sun50i-a64-orangepi-win.dts @@ -120,7 +120,7 @@ &emac { pinctrl-names = "default"; pinctrl-0 = <&rgmii_pins>; - phy-mode = "rgmii"; + phy-mode = "rgmii-id"; phy-handle = <&ext_rgmii_phy>; phy-supply = <®_gmac_3v3>; status = "okay"; diff --git a/arch/arm64/boot/dts/allwinner/sun50i-a64-pine64-plus.dts b/arch/arm64/boot/dts/allwinner/sun50i-a64-pine64-plus.dts index b26181cf9095..b54099b654c8 100644 --- a/arch/arm64/boot/dts/allwinner/sun50i-a64-pine64-plus.dts +++ b/arch/arm64/boot/dts/allwinner/sun50i-a64-pine64-plus.dts @@ -13,7 +13,7 @@ &emac { pinctrl-names = "default"; pinctrl-0 = <&rgmii_pins>; - phy-mode = "rgmii"; + phy-mode = "rgmii-txid"; phy-handle = <&ext_rgmii_phy>; status = "okay"; }; diff --git a/arch/arm64/boot/dts/allwinner/sun50i-a64-pinetab.dts b/arch/arm64/boot/dts/allwinner/sun50i-a64-pinetab.dts index 3ab0f0347bc9..0494bfaf2ffa 100644 --- a/arch/arm64/boot/dts/allwinner/sun50i-a64-pinetab.dts +++ b/arch/arm64/boot/dts/allwinner/sun50i-a64-pinetab.dts @@ -122,9 +122,6 @@ status = "okay"; port { - #address-cells = <1>; - #size-cells = <0>; - csi_ep: endpoint { remote-endpoint = <&ov5640_ep>; bus-width = <8>; diff --git a/arch/arm64/boot/dts/allwinner/sun50i-a64-sopine-baseboard.dts b/arch/arm64/boot/dts/allwinner/sun50i-a64-sopine-baseboard.dts index 9ebb9e07fae3..d4069749d721 100644 --- a/arch/arm64/boot/dts/allwinner/sun50i-a64-sopine-baseboard.dts +++ b/arch/arm64/boot/dts/allwinner/sun50i-a64-sopine-baseboard.dts @@ -79,7 +79,7 @@ &emac { pinctrl-names = "default"; pinctrl-0 = <&rgmii_pins>; - phy-mode = "rgmii"; + phy-mode = "rgmii-id"; phy-handle = <&ext_rgmii_phy>; phy-supply = <®_dc1sw>; status = "okay"; diff --git a/arch/arm64/boot/dts/allwinner/sun50i-h5-libretech-all-h5-cc.dts b/arch/arm64/boot/dts/allwinner/sun50i-h5-libretech-all-h5-cc.dts index df1b9263ad0e..6e30a564c87f 100644 --- a/arch/arm64/boot/dts/allwinner/sun50i-h5-libretech-all-h5-cc.dts +++ b/arch/arm64/boot/dts/allwinner/sun50i-h5-libretech-all-h5-cc.dts @@ -36,7 +36,7 @@ pinctrl-0 = <&emac_rgmii_pins>; phy-supply = <®_gmac_3v3>; phy-handle = <&ext_rgmii_phy>; - phy-mode = "rgmii"; + phy-mode = "rgmii-id"; /delete-property/ allwinner,leds-active-low; status = "okay"; }; diff --git a/arch/arm64/boot/dts/allwinner/sun50i-h5-nanopi-neo-plus2.dts b/arch/arm64/boot/dts/allwinner/sun50i-h5-nanopi-neo-plus2.dts index 4f9ba53ffaae..9d93fe153689 100644 --- a/arch/arm64/boot/dts/allwinner/sun50i-h5-nanopi-neo-plus2.dts +++ b/arch/arm64/boot/dts/allwinner/sun50i-h5-nanopi-neo-plus2.dts @@ -96,7 +96,7 @@ pinctrl-0 = <&emac_rgmii_pins>; phy-supply = <®_gmac_3v3>; phy-handle = <&ext_rgmii_phy>; - phy-mode = "rgmii"; + phy-mode = "rgmii-id"; status = "okay"; }; diff --git a/arch/arm64/boot/dts/allwinner/sun50i-h5-orangepi-pc2.dts b/arch/arm64/boot/dts/allwinner/sun50i-h5-orangepi-pc2.dts index 7d7aad18f078..8bf2db9dcbda 100644 --- a/arch/arm64/boot/dts/allwinner/sun50i-h5-orangepi-pc2.dts +++ b/arch/arm64/boot/dts/allwinner/sun50i-h5-orangepi-pc2.dts @@ -123,7 +123,7 @@ pinctrl-0 = <&emac_rgmii_pins>; phy-supply = <®_gmac_3v3>; phy-handle = <&ext_rgmii_phy>; - phy-mode = "rgmii"; + phy-mode = "rgmii-id"; status = "okay"; }; diff --git a/arch/arm64/boot/dts/allwinner/sun50i-h5-orangepi-prime.dts b/arch/arm64/boot/dts/allwinner/sun50i-h5-orangepi-prime.dts index cb44bfa5981f..33ab44072e6d 100644 --- a/arch/arm64/boot/dts/allwinner/sun50i-h5-orangepi-prime.dts +++ b/arch/arm64/boot/dts/allwinner/sun50i-h5-orangepi-prime.dts @@ -124,7 +124,7 @@ pinctrl-0 = <&emac_rgmii_pins>; phy-supply = <®_gmac_3v3>; phy-handle = <&ext_rgmii_phy>; - phy-mode = "rgmii"; + phy-mode = "rgmii-id"; status = "okay"; }; diff --git a/arch/arm64/boot/dts/allwinner/sun50i-h6-beelink-gs1.dts b/arch/arm64/boot/dts/allwinner/sun50i-h6-beelink-gs1.dts index 3f7ceeb1a767..7c9dbde645b5 100644 --- a/arch/arm64/boot/dts/allwinner/sun50i-h6-beelink-gs1.dts +++ b/arch/arm64/boot/dts/allwinner/sun50i-h6-beelink-gs1.dts @@ -97,7 +97,7 @@ &emac { pinctrl-names = "default"; pinctrl-0 = <&ext_rgmii_pins>; - phy-mode = "rgmii"; + phy-mode = "rgmii-id"; phy-handle = <&ext_rgmii_phy>; phy-supply = <®_aldo2>; status = "okay"; diff --git a/arch/arm64/boot/dts/allwinner/sun50i-h6-orangepi-one-plus.dts b/arch/arm64/boot/dts/allwinner/sun50i-h6-orangepi-one-plus.dts index fceb298bfd53..29a081e72a9b 100644 --- a/arch/arm64/boot/dts/allwinner/sun50i-h6-orangepi-one-plus.dts +++ b/arch/arm64/boot/dts/allwinner/sun50i-h6-orangepi-one-plus.dts @@ -27,7 +27,7 @@ &emac { pinctrl-names = "default"; pinctrl-0 = <&ext_rgmii_pins>; - phy-mode = "rgmii"; + phy-mode = "rgmii-id"; phy-handle = <&ext_rgmii_phy>; phy-supply = <®_gmac_3v3>; allwinner,rx-delay-ps = <200>; diff --git a/arch/arm64/boot/dts/allwinner/sun50i-h6-pine-h64.dts b/arch/arm64/boot/dts/allwinner/sun50i-h6-pine-h64.dts index af85b2074867..961732c52aa0 100644 --- a/arch/arm64/boot/dts/allwinner/sun50i-h6-pine-h64.dts +++ b/arch/arm64/boot/dts/allwinner/sun50i-h6-pine-h64.dts @@ -100,7 +100,7 @@ &emac { pinctrl-names = "default"; pinctrl-0 = <&ext_rgmii_pins>; - phy-mode = "rgmii"; + phy-mode = "rgmii-id"; phy-handle = <&ext_rgmii_phy>; phy-supply = <®_gmac_3v3>; allwinner,rx-delay-ps = <200>; diff --git a/arch/arm64/boot/dts/altera/socfpga_stratix10_socdk.dts b/arch/arm64/boot/dts/altera/socfpga_stratix10_socdk.dts index feadd21bc0dc..46e558ab7729 100644 --- a/arch/arm64/boot/dts/altera/socfpga_stratix10_socdk.dts +++ b/arch/arm64/boot/dts/altera/socfpga_stratix10_socdk.dts @@ -159,7 +159,7 @@ flash@0 { #address-cells = <1>; #size-cells = <1>; - compatible = "n25q00a"; + compatible = "micron,mt25qu02g", "jedec,spi-nor"; reg = <0>; spi-max-frequency = <100000000>; diff --git a/arch/arm64/boot/dts/altera/socfpga_stratix10_socdk_nand.dts b/arch/arm64/boot/dts/altera/socfpga_stratix10_socdk_nand.dts index c07966740e14..f9b4a39683cf 100644 --- a/arch/arm64/boot/dts/altera/socfpga_stratix10_socdk_nand.dts +++ b/arch/arm64/boot/dts/altera/socfpga_stratix10_socdk_nand.dts @@ -192,7 +192,7 @@ flash@0 { #address-cells = <1>; #size-cells = <1>; - compatible = "n25q00a"; + compatible = "micron,mt25qu02g", "jedec,spi-nor"; reg = <0>; spi-max-frequency = <100000000>; diff --git a/arch/arm64/boot/dts/amlogic/meson-axg-s400.dts b/arch/arm64/boot/dts/amlogic/meson-axg-s400.dts index cb1360ae1211..7740f97c240f 100644 --- a/arch/arm64/boot/dts/amlogic/meson-axg-s400.dts +++ b/arch/arm64/boot/dts/amlogic/meson-axg-s400.dts @@ -584,3 +584,9 @@ pinctrl-0 = <&uart_ao_a_pins>; pinctrl-names = "default"; }; + +&usb { + status = "okay"; + dr_mode = "otg"; + vbus-supply = <&usb_pwr>; +}; diff --git a/arch/arm64/boot/dts/amlogic/meson-axg.dtsi b/arch/arm64/boot/dts/amlogic/meson-axg.dtsi index b9efc8469265..724ee179b316 100644 --- a/arch/arm64/boot/dts/amlogic/meson-axg.dtsi +++ b/arch/arm64/boot/dts/amlogic/meson-axg.dtsi @@ -171,6 +171,46 @@ #size-cells = <2>; ranges; + usb: usb@ffe09080 { + compatible = "amlogic,meson-axg-usb-ctrl"; + reg = <0x0 0xffe09080 0x0 0x20>; + interrupts = <GIC_SPI 16 IRQ_TYPE_LEVEL_HIGH>; + #address-cells = <2>; + #size-cells = <2>; + ranges; + + clocks = <&clkc CLKID_USB>, <&clkc CLKID_USB1_DDR_BRIDGE>; + clock-names = "usb_ctrl", "ddr"; + resets = <&reset RESET_USB_OTG>; + + dr_mode = "otg"; + + phys = <&usb2_phy1>; + phy-names = "usb2-phy1"; + + dwc2: usb@ff400000 { + compatible = "amlogic,meson-g12a-usb", "snps,dwc2"; + reg = <0x0 0xff400000 0x0 0x40000>; + interrupts = <GIC_SPI 31 IRQ_TYPE_LEVEL_HIGH>; + clocks = <&clkc CLKID_USB1>; + clock-names = "otg"; + phys = <&usb2_phy1>; + dr_mode = "peripheral"; + g-rx-fifo-size = <192>; + g-np-tx-fifo-size = <128>; + g-tx-fifo-size = <128 128 16 16 16>; + }; + + dwc3: usb@ff500000 { + compatible = "snps,dwc3"; + reg = <0x0 0xff500000 0x0 0x100000>; + interrupts = <GIC_SPI 30 IRQ_TYPE_LEVEL_HIGH>; + dr_mode = "host"; + maximum-speed = "high-speed"; + snps,dis_u2_susphy_quirk; + }; + }; + ethmac: ethernet@ff3f0000 { compatible = "amlogic,meson-axg-dwmac", "snps,dwmac-3.70a", @@ -187,6 +227,8 @@ "timing-adjustment"; rx-fifo-depth = <4096>; tx-fifo-depth = <2048>; + resets = <&reset RESET_ETHERNET>; + reset-names = "stmmaceth"; status = "disabled"; }; @@ -1734,6 +1776,16 @@ clock-names = "core", "clkin0", "clkin1"; resets = <&reset RESET_SD_EMMC_C>; }; + + usb2_phy1: phy@9020 { + compatible = "amlogic,meson-gxl-usb2-phy"; + #phy-cells = <0>; + reg = <0x0 0x9020 0x0 0x20>; + clocks = <&clkc CLKID_USB>; + clock-names = "phy"; + resets = <&reset RESET_USB_OTG>; + reset-names = "phy"; + }; }; sram: sram@fffc0000 { diff --git a/arch/arm64/boot/dts/amlogic/meson-g12-common.dtsi b/arch/arm64/boot/dts/amlogic/meson-g12-common.dtsi index 1e83ec5b8c91..8514fe6a275a 100644 --- a/arch/arm64/boot/dts/amlogic/meson-g12-common.dtsi +++ b/arch/arm64/boot/dts/amlogic/meson-g12-common.dtsi @@ -209,7 +209,7 @@ }; ethmac: ethernet@ff3f0000 { - compatible = "amlogic,meson-axg-dwmac", + compatible = "amlogic,meson-g12a-dwmac", "snps,dwmac-3.70a", "snps,dwmac"; reg = <0x0 0xff3f0000 0x0 0x10000>, @@ -224,6 +224,8 @@ "timing-adjustment"; rx-fifo-depth = <4096>; tx-fifo-depth = <2048>; + resets = <&reset RESET_ETHERNET>; + reset-names = "stmmaceth"; status = "disabled"; mdio0: mdio { @@ -282,6 +284,8 @@ hwrng: rng@218 { compatible = "amlogic,meson-rng"; reg = <0x0 0x218 0x0 0x4>; + clocks = <&clkc CLKID_RNG0>; + clock-names = "core"; }; }; diff --git a/arch/arm64/boot/dts/amlogic/meson-g12b-odroid-n2-plus.dts b/arch/arm64/boot/dts/amlogic/meson-g12b-odroid-n2-plus.dts index 5de2815ba99d..ce1198ad34e4 100644 --- a/arch/arm64/boot/dts/amlogic/meson-g12b-odroid-n2-plus.dts +++ b/arch/arm64/boot/dts/amlogic/meson-g12b-odroid-n2-plus.dts @@ -19,7 +19,7 @@ regulator-min-microvolt = <680000>; regulator-max-microvolt = <1040000>; - pwms = <&pwm_AO_cd 1 1500 0>; + pwms = <&pwm_ab 0 1500 0>; }; &vddcpu_b { diff --git a/arch/arm64/boot/dts/amlogic/meson-g12b.dtsi b/arch/arm64/boot/dts/amlogic/meson-g12b.dtsi index 9b8548e5f6e5..ee8fcae9f9f0 100644 --- a/arch/arm64/boot/dts/amlogic/meson-g12b.dtsi +++ b/arch/arm64/boot/dts/amlogic/meson-g12b.dtsi @@ -135,3 +135,7 @@ }; }; }; + +&mali { + dma-coherent; +}; diff --git a/arch/arm64/boot/dts/amlogic/meson-gx.dtsi b/arch/arm64/boot/dts/amlogic/meson-gx.dtsi index 0edd137151f8..726b91d3a905 100644 --- a/arch/arm64/boot/dts/amlogic/meson-gx.dtsi +++ b/arch/arm64/boot/dts/amlogic/meson-gx.dtsi @@ -13,6 +13,7 @@ #include <dt-bindings/interrupt-controller/irq.h> #include <dt-bindings/interrupt-controller/arm-gic.h> #include <dt-bindings/power/meson-gxbb-power.h> +#include <dt-bindings/reset/amlogic,meson-gxbb-reset.h> #include <dt-bindings/thermal/thermal.h> / { @@ -575,6 +576,8 @@ interrupt-names = "macirq"; rx-fifo-depth = <4096>; tx-fifo-depth = <2048>; + resets = <&reset RESET_ETHERNET>; + reset-names = "stmmaceth"; power-domains = <&pwrc PWRC_GXBB_ETHERNET_MEM_ID>; status = "disabled"; }; diff --git a/arch/arm64/boot/dts/broadcom/stingray/stingray-usb.dtsi b/arch/arm64/boot/dts/broadcom/stingray/stingray-usb.dtsi index 55259f973b5a..aef8f2b00778 100644 --- a/arch/arm64/boot/dts/broadcom/stingray/stingray-usb.dtsi +++ b/arch/arm64/boot/dts/broadcom/stingray/stingray-usb.dtsi @@ -5,20 +5,20 @@ usb { compatible = "simple-bus"; dma-ranges; - #address-cells = <1>; - #size-cells = <1>; - ranges = <0x0 0x0 0x68500000 0x00400000>; + #address-cells = <2>; + #size-cells = <2>; + ranges = <0x0 0x0 0x0 0x68500000 0x0 0x00400000>; usbphy0: usb-phy@0 { compatible = "brcm,sr-usb-combo-phy"; - reg = <0x00000000 0x100>; + reg = <0x0 0x00000000 0x0 0x100>; #phy-cells = <1>; status = "disabled"; }; xhci0: usb@1000 { compatible = "generic-xhci"; - reg = <0x00001000 0x1000>; + reg = <0x0 0x00001000 0x0 0x1000>; interrupts = <GIC_SPI 256 IRQ_TYPE_LEVEL_HIGH>; phys = <&usbphy0 1>, <&usbphy0 0>; phy-names = "phy0", "phy1"; @@ -28,7 +28,7 @@ bdc0: usb@2000 { compatible = "brcm,bdc-v0.16"; - reg = <0x00002000 0x1000>; + reg = <0x0 0x00002000 0x0 0x1000>; interrupts = <GIC_SPI 259 IRQ_TYPE_LEVEL_HIGH>; phys = <&usbphy0 0>, <&usbphy0 1>; phy-names = "phy0", "phy1"; @@ -38,21 +38,21 @@ usbphy1: usb-phy@10000 { compatible = "brcm,sr-usb-combo-phy"; - reg = <0x00010000 0x100>; + reg = <0x0 0x00010000 0x0 0x100>; #phy-cells = <1>; status = "disabled"; }; usbphy2: usb-phy@20000 { compatible = "brcm,sr-usb-hs-phy"; - reg = <0x00020000 0x100>; + reg = <0x0 0x00020000 0x0 0x100>; #phy-cells = <0>; status = "disabled"; }; xhci1: usb@11000 { compatible = "generic-xhci"; - reg = <0x00011000 0x1000>; + reg = <0x0 0x00011000 0x0 0x1000>; interrupts = <GIC_SPI 263 IRQ_TYPE_LEVEL_HIGH>; phys = <&usbphy1 1>, <&usbphy2>, <&usbphy1 0>; phy-names = "phy0", "phy1", "phy2"; @@ -62,7 +62,7 @@ bdc1: usb@21000 { compatible = "brcm,bdc-v0.16"; - reg = <0x00021000 0x1000>; + reg = <0x0 0x00021000 0x0 0x1000>; interrupts = <GIC_SPI 266 IRQ_TYPE_LEVEL_HIGH>; phys = <&usbphy2>; phy-names = "phy0"; diff --git a/arch/arm64/boot/dts/freescale/fsl-ls1028a-kontron-sl28.dts b/arch/arm64/boot/dts/freescale/fsl-ls1028a-kontron-sl28.dts index f46eb47cfa4d..8161dd237971 100644 --- a/arch/arm64/boot/dts/freescale/fsl-ls1028a-kontron-sl28.dts +++ b/arch/arm64/boot/dts/freescale/fsl-ls1028a-kontron-sl28.dts @@ -75,6 +75,7 @@ &enetc_port0 { phy-handle = <&phy0>; phy-connection-type = "sgmii"; + managed = "in-band-status"; status = "okay"; mdio { diff --git a/arch/arm64/boot/dts/freescale/fsl-ls1028a.dtsi b/arch/arm64/boot/dts/freescale/fsl-ls1028a.dtsi index 73e4f9466887..7a6fb7e1fb82 100644 --- a/arch/arm64/boot/dts/freescale/fsl-ls1028a.dtsi +++ b/arch/arm64/boot/dts/freescale/fsl-ls1028a.dtsi @@ -1012,6 +1012,7 @@ compatible = "fsl,ls1028a-rcpm", "fsl,qoriq-rcpm-2.1+"; reg = <0x0 0x1e34040 0x0 0x1c>; #fsl,rcpm-wakeup-cells = <7>; + little-endian; }; ftm_alarm0: timer@2800000 { diff --git a/arch/arm64/boot/dts/freescale/fsl-ls1088a.dtsi b/arch/arm64/boot/dts/freescale/fsl-ls1088a.dtsi index ff5805206a28..692d8f4a206d 100644 --- a/arch/arm64/boot/dts/freescale/fsl-ls1088a.dtsi +++ b/arch/arm64/boot/dts/freescale/fsl-ls1088a.dtsi @@ -805,6 +805,7 @@ compatible = "fsl,ls1088a-rcpm", "fsl,qoriq-rcpm-2.1+"; reg = <0x0 0x1e34040 0x0 0x18>; #fsl,rcpm-wakeup-cells = <6>; + little-endian; }; ftm_alarm0: timer@2800000 { diff --git a/arch/arm64/boot/dts/freescale/fsl-ls208xa.dtsi b/arch/arm64/boot/dts/freescale/fsl-ls208xa.dtsi index bf72918fe545..e7abb74bd816 100644 --- a/arch/arm64/boot/dts/freescale/fsl-ls208xa.dtsi +++ b/arch/arm64/boot/dts/freescale/fsl-ls208xa.dtsi @@ -892,6 +892,7 @@ compatible = "fsl,ls208xa-rcpm", "fsl,qoriq-rcpm-2.1+"; reg = <0x0 0x1e34040 0x0 0x18>; #fsl,rcpm-wakeup-cells = <6>; + little-endian; }; ftm_alarm0: timer@2800000 { diff --git a/arch/arm64/boot/dts/freescale/imx8mm-beacon-som.dtsi b/arch/arm64/boot/dts/freescale/imx8mm-beacon-som.dtsi index 6de86a4f0ec4..b88c3c99b007 100644 --- a/arch/arm64/boot/dts/freescale/imx8mm-beacon-som.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8mm-beacon-som.dtsi @@ -72,6 +72,7 @@ pmic@4b { compatible = "rohm,bd71847"; reg = <0x4b>; + pinctrl-names = "default"; pinctrl-0 = <&pinctrl_pmic>; interrupt-parent = <&gpio1>; interrupts = <3 IRQ_TYPE_LEVEL_LOW>; @@ -210,6 +211,7 @@ host-wakeup-gpios = <&gpio2 8 GPIO_ACTIVE_HIGH>; device-wakeup-gpios = <&gpio2 7 GPIO_ACTIVE_HIGH>; clocks = <&osc_32k>; + max-speed = <4000000>; clock-names = "extclk"; }; }; diff --git a/arch/arm64/boot/dts/freescale/imx8mm-evk.dtsi b/arch/arm64/boot/dts/freescale/imx8mm-evk.dtsi index f305a530ff6f..521eb3a5a12e 100644 --- a/arch/arm64/boot/dts/freescale/imx8mm-evk.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8mm-evk.dtsi @@ -121,6 +121,7 @@ pmic@4b { compatible = "rohm,bd71847"; reg = <0x4b>; + pinctrl-names = "default"; pinctrl-0 = <&pinctrl_pmic>; interrupt-parent = <&gpio1>; interrupts = <3 IRQ_TYPE_LEVEL_LOW>; diff --git a/arch/arm64/boot/dts/freescale/imx8mm-var-som.dtsi b/arch/arm64/boot/dts/freescale/imx8mm-var-som.dtsi index 4107fe914d08..49082529764f 100644 --- a/arch/arm64/boot/dts/freescale/imx8mm-var-som.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8mm-var-som.dtsi @@ -135,13 +135,10 @@ pmic@4b { compatible = "rohm,bd71847"; reg = <0x4b>; + pinctrl-names = "default"; pinctrl-0 = <&pinctrl_pmic>; interrupt-parent = <&gpio2>; - /* - * The interrupt is not correct. It should be level low, - * however with internal pull up this causes IRQ storm. - */ - interrupts = <8 IRQ_TYPE_EDGE_RISING>; + interrupts = <8 IRQ_TYPE_LEVEL_LOW>; rohm,reset-snvs-powered; #clock-cells = <0>; @@ -398,7 +395,7 @@ pinctrl_pmic: pmicirqgrp { fsl,pins = < - MX8MM_IOMUXC_SD1_DATA6_GPIO2_IO8 0x41 + MX8MM_IOMUXC_SD1_DATA6_GPIO2_IO8 0x141 >; }; diff --git a/arch/arm64/boot/dts/freescale/imx8mm.dtsi b/arch/arm64/boot/dts/freescale/imx8mm.dtsi index b83f400def8b..05ee062548e4 100644 --- a/arch/arm64/boot/dts/freescale/imx8mm.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8mm.dtsi @@ -129,7 +129,7 @@ opp-1600000000 { opp-hz = /bits/ 64 <1600000000>; - opp-microvolt = <900000>; + opp-microvolt = <950000>; opp-supported-hw = <0xc>, <0x7>; clock-latency-ns = <150000>; opp-suspend; diff --git a/arch/arm64/boot/dts/freescale/imx8mn-ddr4-evk.dts b/arch/arm64/boot/dts/freescale/imx8mn-ddr4-evk.dts index 46e76cf32b2f..7dfee715a2c4 100644 --- a/arch/arm64/boot/dts/freescale/imx8mn-ddr4-evk.dts +++ b/arch/arm64/boot/dts/freescale/imx8mn-ddr4-evk.dts @@ -53,6 +53,7 @@ pmic@4b { compatible = "rohm,bd71847"; reg = <0x4b>; + pinctrl-names = "default"; pinctrl-0 = <&pinctrl_pmic>; interrupt-parent = <&gpio1>; interrupts = <3 IRQ_TYPE_LEVEL_LOW>; diff --git a/arch/arm64/boot/dts/freescale/imx8mn-evk.dts b/arch/arm64/boot/dts/freescale/imx8mn-evk.dts index 707d8486b4d8..8311b95dee49 100644 --- a/arch/arm64/boot/dts/freescale/imx8mn-evk.dts +++ b/arch/arm64/boot/dts/freescale/imx8mn-evk.dts @@ -18,6 +18,7 @@ pmic: pmic@25 { compatible = "nxp,pca9450b"; reg = <0x25>; + pinctrl-names = "default"; pinctrl-0 = <&pinctrl_pmic>; interrupt-parent = <&gpio1>; interrupts = <3 IRQ_TYPE_LEVEL_LOW>; diff --git a/arch/arm64/boot/dts/freescale/imx8mn-var-som.dtsi b/arch/arm64/boot/dts/freescale/imx8mn-var-som.dtsi index a2d0190921e4..7f356edf9f91 100644 --- a/arch/arm64/boot/dts/freescale/imx8mn-var-som.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8mn-var-som.dtsi @@ -116,13 +116,10 @@ pmic@4b { compatible = "rohm,bd71847"; reg = <0x4b>; + pinctrl-names = "default"; pinctrl-0 = <&pinctrl_pmic>; interrupt-parent = <&gpio2>; - /* - * The interrupt is not correct. It should be level low, - * however with internal pull up this causes IRQ storm. - */ - interrupts = <8 IRQ_TYPE_EDGE_RISING>; + interrupts = <8 IRQ_TYPE_LEVEL_LOW>; rohm,reset-snvs-powered; regulators { @@ -388,7 +385,7 @@ pinctrl_pmic: pmicirqgrp { fsl,pins = < - MX8MN_IOMUXC_SD1_DATA6_GPIO2_IO8 0x101 + MX8MN_IOMUXC_SD1_DATA6_GPIO2_IO8 0x141 >; }; diff --git a/arch/arm64/boot/dts/freescale/imx8mn.dtsi b/arch/arm64/boot/dts/freescale/imx8mn.dtsi index 746faf1cf2fb..16c7202885d7 100644 --- a/arch/arm64/boot/dts/freescale/imx8mn.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8mn.dtsi @@ -790,28 +790,6 @@ #index-cells = <1>; reg = <0x32e40200 0x200>; }; - - usbotg2: usb@32e50000 { - compatible = "fsl,imx8mn-usb", "fsl,imx7d-usb"; - reg = <0x32e50000 0x200>; - interrupts = <GIC_SPI 41 IRQ_TYPE_LEVEL_HIGH>; - clocks = <&clk IMX8MN_CLK_USB1_CTRL_ROOT>; - clock-names = "usb1_ctrl_root_clk"; - assigned-clocks = <&clk IMX8MN_CLK_USB_BUS>, - <&clk IMX8MN_CLK_USB_CORE_REF>; - assigned-clock-parents = <&clk IMX8MN_SYS_PLL2_500M>, - <&clk IMX8MN_SYS_PLL1_100M>; - fsl,usbphy = <&usbphynop2>; - fsl,usbmisc = <&usbmisc2 0>; - status = "disabled"; - }; - - usbmisc2: usbmisc@32e50200 { - compatible = "fsl,imx8mn-usbmisc", "fsl,imx7d-usbmisc"; - #index-cells = <1>; - reg = <0x32e50200 0x200>; - }; - }; dma_apbh: dma-controller@33000000 { @@ -876,12 +854,4 @@ assigned-clock-parents = <&clk IMX8MN_SYS_PLL1_100M>; clock-names = "main_clk"; }; - - usbphynop2: usbphynop2 { - compatible = "usb-nop-xceiv"; - clocks = <&clk IMX8MN_CLK_USB_PHY_REF>; - assigned-clocks = <&clk IMX8MN_CLK_USB_PHY_REF>; - assigned-clock-parents = <&clk IMX8MN_SYS_PLL1_100M>; - clock-names = "main_clk"; - }; }; diff --git a/arch/arm64/boot/dts/freescale/qoriq-fman3-0.dtsi b/arch/arm64/boot/dts/freescale/qoriq-fman3-0.dtsi index 8bc6caa9167d..4338db14c5da 100644 --- a/arch/arm64/boot/dts/freescale/qoriq-fman3-0.dtsi +++ b/arch/arm64/boot/dts/freescale/qoriq-fman3-0.dtsi @@ -19,6 +19,7 @@ fman0: fman@1a00000 { clock-names = "fmanclk"; fsl,qman-channel-range = <0x800 0x10>; ptimer-handle = <&ptp_timer0>; + dma-coherent; muram@0 { compatible = "fsl,fman-muram"; diff --git a/arch/arm64/boot/dts/intel/socfpga_agilex_socdk.dts b/arch/arm64/boot/dts/intel/socfpga_agilex_socdk.dts index 96c50d48289d..a7a83f29f00b 100644 --- a/arch/arm64/boot/dts/intel/socfpga_agilex_socdk.dts +++ b/arch/arm64/boot/dts/intel/socfpga_agilex_socdk.dts @@ -110,7 +110,7 @@ flash@0 { #address-cells = <1>; #size-cells = <1>; - compatible = "mt25qu02g"; + compatible = "micron,mt25qu02g", "jedec,spi-nor"; reg = <0>; spi-max-frequency = <100000000>; diff --git a/arch/arm64/boot/dts/marvell/armada-3720-espressobin-v7-emmc.dts b/arch/arm64/boot/dts/marvell/armada-3720-espressobin-v7-emmc.dts index 03733fd92732..215d2f702623 100644 --- a/arch/arm64/boot/dts/marvell/armada-3720-espressobin-v7-emmc.dts +++ b/arch/arm64/boot/dts/marvell/armada-3720-espressobin-v7-emmc.dts @@ -20,17 +20,23 @@ compatible = "globalscale,espressobin-v7-emmc", "globalscale,espressobin-v7", "globalscale,espressobin", "marvell,armada3720", "marvell,armada3710"; + + aliases { + /* ethernet1 is wan port */ + ethernet1 = &switch0port3; + ethernet3 = &switch0port1; + }; }; &switch0 { ports { - port@1 { + switch0port1: port@1 { reg = <1>; label = "lan1"; phy-handle = <&switch0phy0>; }; - port@3 { + switch0port3: port@3 { reg = <3>; label = "wan"; phy-handle = <&switch0phy2>; diff --git a/arch/arm64/boot/dts/marvell/armada-3720-espressobin-v7.dts b/arch/arm64/boot/dts/marvell/armada-3720-espressobin-v7.dts index 8570c5f47d7d..b6f4af8ebafb 100644 --- a/arch/arm64/boot/dts/marvell/armada-3720-espressobin-v7.dts +++ b/arch/arm64/boot/dts/marvell/armada-3720-espressobin-v7.dts @@ -19,17 +19,23 @@ model = "Globalscale Marvell ESPRESSOBin Board V7"; compatible = "globalscale,espressobin-v7", "globalscale,espressobin", "marvell,armada3720", "marvell,armada3710"; + + aliases { + /* ethernet1 is wan port */ + ethernet1 = &switch0port3; + ethernet3 = &switch0port1; + }; }; &switch0 { ports { - port@1 { + switch0port1: port@1 { reg = <1>; label = "lan1"; phy-handle = <&switch0phy0>; }; - port@3 { + switch0port3: port@3 { reg = <3>; label = "wan"; phy-handle = <&switch0phy2>; diff --git a/arch/arm64/boot/dts/marvell/armada-3720-espressobin.dtsi b/arch/arm64/boot/dts/marvell/armada-3720-espressobin.dtsi index b97218c72727..0775c16e0ec8 100644 --- a/arch/arm64/boot/dts/marvell/armada-3720-espressobin.dtsi +++ b/arch/arm64/boot/dts/marvell/armada-3720-espressobin.dtsi @@ -13,6 +13,10 @@ / { aliases { ethernet0 = ð0; + /* for dsa slave device */ + ethernet1 = &switch0port1; + ethernet2 = &switch0port2; + ethernet3 = &switch0port3; serial0 = &uart0; serial1 = &uart1; }; @@ -120,7 +124,7 @@ #address-cells = <1>; #size-cells = <0>; - port@0 { + switch0port0: port@0 { reg = <0>; label = "cpu"; ethernet = <ð0>; @@ -131,19 +135,19 @@ }; }; - port@1 { + switch0port1: port@1 { reg = <1>; label = "wan"; phy-handle = <&switch0phy0>; }; - port@2 { + switch0port2: port@2 { reg = <2>; label = "lan0"; phy-handle = <&switch0phy1>; }; - port@3 { + switch0port3: port@3 { reg = <3>; label = "lan1"; phy-handle = <&switch0phy2>; diff --git a/arch/arm64/boot/dts/nvidia/tegra186-p2771-0000.dts b/arch/arm64/boot/dts/nvidia/tegra186-p2771-0000.dts index 381a84912ba8..c28d51cc5797 100644 --- a/arch/arm64/boot/dts/nvidia/tegra186-p2771-0000.dts +++ b/arch/arm64/boot/dts/nvidia/tegra186-p2771-0000.dts @@ -10,18 +10,6 @@ model = "NVIDIA Jetson TX2 Developer Kit"; compatible = "nvidia,p2771-0000", "nvidia,tegra186"; - aconnect { - status = "okay"; - - dma-controller@2930000 { - status = "okay"; - }; - - interrupt-controller@2a40000 { - status = "okay"; - }; - }; - i2c@3160000 { power-monitor@42 { compatible = "ti,ina3221"; diff --git a/arch/arm64/boot/dts/nvidia/tegra194-p3668-0000.dtsi b/arch/arm64/boot/dts/nvidia/tegra194-p3668-0000.dtsi index a2893be80507..0dc8304a2edd 100644 --- a/arch/arm64/boot/dts/nvidia/tegra194-p3668-0000.dtsi +++ b/arch/arm64/boot/dts/nvidia/tegra194-p3668-0000.dtsi @@ -54,7 +54,7 @@ status = "okay"; }; - serial@c280000 { + serial@3100000 { status = "okay"; }; diff --git a/arch/arm64/boot/dts/nvidia/tegra194.dtsi b/arch/arm64/boot/dts/nvidia/tegra194.dtsi index e9c90f0f44ff..93438d2b9469 100644 --- a/arch/arm64/boot/dts/nvidia/tegra194.dtsi +++ b/arch/arm64/boot/dts/nvidia/tegra194.dtsi @@ -1161,7 +1161,7 @@ hsp_aon: hsp@c150000 { compatible = "nvidia,tegra194-hsp", "nvidia,tegra186-hsp"; - reg = <0x0c150000 0xa0000>; + reg = <0x0c150000 0x90000>; interrupts = <GIC_SPI 133 IRQ_TYPE_LEVEL_HIGH>, <GIC_SPI 134 IRQ_TYPE_LEVEL_HIGH>, <GIC_SPI 135 IRQ_TYPE_LEVEL_HIGH>, diff --git a/arch/arm64/boot/dts/nvidia/tegra210-p2597.dtsi b/arch/arm64/boot/dts/nvidia/tegra210-p2597.dtsi index e18e1a9a3011..a9caaf7c0d67 100644 --- a/arch/arm64/boot/dts/nvidia/tegra210-p2597.dtsi +++ b/arch/arm64/boot/dts/nvidia/tegra210-p2597.dtsi @@ -1663,16 +1663,6 @@ vin-supply = <&vdd_5v0_sys>; }; - vdd_usb_vbus_otg: regulator@11 { - compatible = "regulator-fixed"; - regulator-name = "USB_VBUS_EN0"; - regulator-min-microvolt = <5000000>; - regulator-max-microvolt = <5000000>; - gpio = <&gpio TEGRA_GPIO(CC, 4) GPIO_ACTIVE_HIGH>; - enable-active-high; - vin-supply = <&vdd_5v0_sys>; - }; - vdd_hdmi: regulator@10 { compatible = "regulator-fixed"; regulator-name = "VDD_HDMI_5V0"; @@ -1712,4 +1702,14 @@ enable-active-high; vin-supply = <&vdd_3v3_sys>; }; + + vdd_usb_vbus_otg: regulator@14 { + compatible = "regulator-fixed"; + regulator-name = "USB_VBUS_EN0"; + regulator-min-microvolt = <5000000>; + regulator-max-microvolt = <5000000>; + gpio = <&gpio TEGRA_GPIO(CC, 4) GPIO_ACTIVE_HIGH>; + enable-active-high; + vin-supply = <&vdd_5v0_sys>; + }; }; diff --git a/arch/arm64/boot/dts/nvidia/tegra234-sim-vdk.dts b/arch/arm64/boot/dts/nvidia/tegra234-sim-vdk.dts index f6e6a24829af..b5d9a5526272 100644 --- a/arch/arm64/boot/dts/nvidia/tegra234-sim-vdk.dts +++ b/arch/arm64/boot/dts/nvidia/tegra234-sim-vdk.dts @@ -8,7 +8,7 @@ compatible = "nvidia,tegra234-vdk", "nvidia,tegra234"; aliases { - sdhci3 = "/cbb@0/sdhci@3460000"; + mmc3 = "/bus@0/mmc@3460000"; serial0 = &uarta; }; @@ -17,12 +17,12 @@ stdout-path = "serial0:115200n8"; }; - cbb@0 { + bus@0 { serial@3100000 { status = "okay"; }; - sdhci@3460000 { + mmc@3460000 { status = "okay"; bus-width = <8>; non-removable; diff --git a/arch/arm64/boot/dts/qcom/ipq6018.dtsi b/arch/arm64/boot/dts/qcom/ipq6018.dtsi index a94dac76bf3f..59e0cbfa2214 100644 --- a/arch/arm64/boot/dts/qcom/ipq6018.dtsi +++ b/arch/arm64/boot/dts/qcom/ipq6018.dtsi @@ -179,22 +179,22 @@ }; soc: soc { - #address-cells = <1>; - #size-cells = <1>; - ranges = <0 0 0 0xffffffff>; + #address-cells = <2>; + #size-cells = <2>; + ranges = <0 0 0 0 0x0 0xffffffff>; dma-ranges; compatible = "simple-bus"; prng: qrng@e1000 { compatible = "qcom,prng-ee"; - reg = <0xe3000 0x1000>; + reg = <0x0 0xe3000 0x0 0x1000>; clocks = <&gcc GCC_PRNG_AHB_CLK>; clock-names = "core"; }; cryptobam: dma@704000 { compatible = "qcom,bam-v1.7.0"; - reg = <0x00704000 0x20000>; + reg = <0x0 0x00704000 0x0 0x20000>; interrupts = <GIC_SPI 207 IRQ_TYPE_LEVEL_HIGH>; clocks = <&gcc GCC_CRYPTO_AHB_CLK>; clock-names = "bam_clk"; @@ -206,7 +206,7 @@ crypto: crypto@73a000 { compatible = "qcom,crypto-v5.1"; - reg = <0x0073a000 0x6000>; + reg = <0x0 0x0073a000 0x0 0x6000>; clocks = <&gcc GCC_CRYPTO_AHB_CLK>, <&gcc GCC_CRYPTO_AXI_CLK>, <&gcc GCC_CRYPTO_CLK>; @@ -217,7 +217,7 @@ tlmm: pinctrl@1000000 { compatible = "qcom,ipq6018-pinctrl"; - reg = <0x01000000 0x300000>; + reg = <0x0 0x01000000 0x0 0x300000>; interrupts = <GIC_SPI 208 IRQ_TYPE_LEVEL_HIGH>; gpio-controller; #gpio-cells = <2>; @@ -235,7 +235,7 @@ gcc: gcc@1800000 { compatible = "qcom,gcc-ipq6018"; - reg = <0x01800000 0x80000>; + reg = <0x0 0x01800000 0x0 0x80000>; clocks = <&xo>, <&sleep_clk>; clock-names = "xo", "sleep_clk"; #clock-cells = <1>; @@ -244,17 +244,17 @@ tcsr_mutex_regs: syscon@1905000 { compatible = "syscon"; - reg = <0x01905000 0x8000>; + reg = <0x0 0x01905000 0x0 0x8000>; }; tcsr_q6: syscon@1945000 { compatible = "syscon"; - reg = <0x01945000 0xe000>; + reg = <0x0 0x01945000 0x0 0xe000>; }; blsp_dma: dma@7884000 { compatible = "qcom,bam-v1.7.0"; - reg = <0x07884000 0x2b000>; + reg = <0x0 0x07884000 0x0 0x2b000>; interrupts = <GIC_SPI 238 IRQ_TYPE_LEVEL_HIGH>; clocks = <&gcc GCC_BLSP1_AHB_CLK>; clock-names = "bam_clk"; @@ -264,7 +264,7 @@ blsp1_uart3: serial@78b1000 { compatible = "qcom,msm-uartdm-v1.4", "qcom,msm-uartdm"; - reg = <0x078b1000 0x200>; + reg = <0x0 0x078b1000 0x0 0x200>; interrupts = <GIC_SPI 306 IRQ_TYPE_LEVEL_HIGH>; clocks = <&gcc GCC_BLSP1_UART3_APPS_CLK>, <&gcc GCC_BLSP1_AHB_CLK>; @@ -276,7 +276,7 @@ compatible = "qcom,spi-qup-v2.2.1"; #address-cells = <1>; #size-cells = <0>; - reg = <0x078b5000 0x600>; + reg = <0x0 0x078b5000 0x0 0x600>; interrupts = <GIC_SPI 95 IRQ_TYPE_LEVEL_HIGH>; spi-max-frequency = <50000000>; clocks = <&gcc GCC_BLSP1_QUP1_SPI_APPS_CLK>, @@ -291,7 +291,7 @@ compatible = "qcom,spi-qup-v2.2.1"; #address-cells = <1>; #size-cells = <0>; - reg = <0x078b6000 0x600>; + reg = <0x0 0x078b6000 0x0 0x600>; interrupts = <GIC_SPI 96 IRQ_TYPE_LEVEL_HIGH>; spi-max-frequency = <50000000>; clocks = <&gcc GCC_BLSP1_QUP2_SPI_APPS_CLK>, @@ -306,7 +306,7 @@ compatible = "qcom,i2c-qup-v2.2.1"; #address-cells = <1>; #size-cells = <0>; - reg = <0x078b6000 0x600>; + reg = <0x0 0x078b6000 0x0 0x600>; interrupts = <GIC_SPI 96 IRQ_TYPE_LEVEL_HIGH>; clocks = <&gcc GCC_BLSP1_AHB_CLK>, <&gcc GCC_BLSP1_QUP2_I2C_APPS_CLK>; @@ -321,7 +321,7 @@ compatible = "qcom,i2c-qup-v2.2.1"; #address-cells = <1>; #size-cells = <0>; - reg = <0x078b7000 0x600>; + reg = <0x0 0x078b7000 0x0 0x600>; interrupts = <GIC_SPI 97 IRQ_TYPE_LEVEL_HIGH>; clocks = <&gcc GCC_BLSP1_AHB_CLK>, <&gcc GCC_BLSP1_QUP3_I2C_APPS_CLK>; @@ -336,24 +336,24 @@ compatible = "qcom,msm-qgic2"; interrupt-controller; #interrupt-cells = <0x3>; - reg = <0x0b000000 0x1000>, /*GICD*/ - <0x0b002000 0x1000>, /*GICC*/ - <0x0b001000 0x1000>, /*GICH*/ - <0x0b004000 0x1000>; /*GICV*/ + reg = <0x0 0x0b000000 0x0 0x1000>, /*GICD*/ + <0x0 0x0b002000 0x0 0x1000>, /*GICC*/ + <0x0 0x0b001000 0x0 0x1000>, /*GICH*/ + <0x0 0x0b004000 0x0 0x1000>; /*GICV*/ interrupts = <GIC_PPI 9 IRQ_TYPE_LEVEL_HIGH>; }; watchdog@b017000 { compatible = "qcom,kpss-wdt"; interrupts = <GIC_SPI 3 IRQ_TYPE_EDGE_RISING>; - reg = <0x0b017000 0x40>; + reg = <0x0 0x0b017000 0x0 0x40>; clocks = <&sleep_clk>; timeout-sec = <10>; }; apcs_glb: mailbox@b111000 { compatible = "qcom,ipq6018-apcs-apps-global"; - reg = <0x0b111000 0x1000>; + reg = <0x0 0x0b111000 0x0 0x1000>; #clock-cells = <1>; clocks = <&a53pll>, <&xo>; clock-names = "pll", "xo"; @@ -362,7 +362,7 @@ a53pll: clock@b116000 { compatible = "qcom,ipq6018-a53pll"; - reg = <0x0b116000 0x40>; + reg = <0x0 0x0b116000 0x0 0x40>; #clock-cells = <0>; clocks = <&xo>; clock-names = "xo"; @@ -377,68 +377,68 @@ }; timer@b120000 { - #address-cells = <1>; - #size-cells = <1>; + #address-cells = <2>; + #size-cells = <2>; ranges; compatible = "arm,armv7-timer-mem"; - reg = <0x0b120000 0x1000>; + reg = <0x0 0x0b120000 0x0 0x1000>; clock-frequency = <19200000>; frame@b120000 { frame-number = <0>; interrupts = <GIC_SPI 8 IRQ_TYPE_LEVEL_HIGH>, <GIC_SPI 7 IRQ_TYPE_LEVEL_HIGH>; - reg = <0x0b121000 0x1000>, - <0x0b122000 0x1000>; + reg = <0x0 0x0b121000 0x0 0x1000>, + <0x0 0x0b122000 0x0 0x1000>; }; frame@b123000 { frame-number = <1>; interrupts = <GIC_SPI 9 IRQ_TYPE_LEVEL_HIGH>; - reg = <0xb123000 0x1000>; + reg = <0x0 0xb123000 0x0 0x1000>; status = "disabled"; }; frame@b124000 { frame-number = <2>; interrupts = <GIC_SPI 10 IRQ_TYPE_LEVEL_HIGH>; - reg = <0x0b124000 0x1000>; + reg = <0x0 0x0b124000 0x0 0x1000>; status = "disabled"; }; frame@b125000 { frame-number = <3>; interrupts = <GIC_SPI 11 IRQ_TYPE_LEVEL_HIGH>; - reg = <0x0b125000 0x1000>; + reg = <0x0 0x0b125000 0x0 0x1000>; status = "disabled"; }; frame@b126000 { frame-number = <4>; interrupts = <GIC_SPI 12 IRQ_TYPE_LEVEL_HIGH>; - reg = <0x0b126000 0x1000>; + reg = <0x0 0x0b126000 0x0 0x1000>; status = "disabled"; }; frame@b127000 { frame-number = <5>; interrupts = <GIC_SPI 13 IRQ_TYPE_LEVEL_HIGH>; - reg = <0x0b127000 0x1000>; + reg = <0x0 0x0b127000 0x0 0x1000>; status = "disabled"; }; frame@b128000 { frame-number = <6>; interrupts = <GIC_SPI 14 IRQ_TYPE_LEVEL_HIGH>; - reg = <0x0b128000 0x1000>; + reg = <0x0 0x0b128000 0x0 0x1000>; status = "disabled"; }; }; q6v5_wcss: remoteproc@cd00000 { compatible = "qcom,ipq8074-wcss-pil"; - reg = <0x0cd00000 0x4040>, - <0x004ab000 0x20>; + reg = <0x0 0x0cd00000 0x0 0x4040>, + <0x0 0x004ab000 0x0 0x20>; reg-names = "qdsp6", "rmb"; interrupts-extended = <&intc GIC_SPI 325 IRQ_TYPE_EDGE_RISING>, diff --git a/arch/arm64/boot/dts/renesas/r8a774e1.dtsi b/arch/arm64/boot/dts/renesas/r8a774e1.dtsi index 9cbf963aa068..c29643442e91 100644 --- a/arch/arm64/boot/dts/renesas/r8a774e1.dtsi +++ b/arch/arm64/boot/dts/renesas/r8a774e1.dtsi @@ -28,6 +28,12 @@ clock-frequency = <0>; }; + audio_clk_b: audio_clk_b { + compatible = "fixed-clock"; + #clock-cells = <0>; + clock-frequency = <0>; + }; + audio_clk_c: audio_clk_c { compatible = "fixed-clock"; #clock-cells = <0>; diff --git a/arch/arm64/boot/dts/rockchip/rk3326-odroid-go2.dts b/arch/arm64/boot/dts/rockchip/rk3326-odroid-go2.dts index 35bd6b904b9c..337681038519 100644 --- a/arch/arm64/boot/dts/rockchip/rk3326-odroid-go2.dts +++ b/arch/arm64/boot/dts/rockchip/rk3326-odroid-go2.dts @@ -243,7 +243,6 @@ interrupts = <RK_PB2 IRQ_TYPE_LEVEL_LOW>; pinctrl-names = "default"; pinctrl-0 = <&pmic_int>; - rockchip,system-power-controller; wakeup-source; #clock-cells = <1>; clock-output-names = "rk808-clkout1", "xin32k"; diff --git a/arch/arm64/boot/dts/rockchip/rk3328-nanopi-r2s.dts b/arch/arm64/boot/dts/rockchip/rk3328-nanopi-r2s.dts index be7a31d81632..2ee07d15a6e3 100644 --- a/arch/arm64/boot/dts/rockchip/rk3328-nanopi-r2s.dts +++ b/arch/arm64/boot/dts/rockchip/rk3328-nanopi-r2s.dts @@ -20,7 +20,7 @@ gmac_clk: gmac-clock { compatible = "fixed-clock"; clock-frequency = <125000000>; - clock-output-names = "gmac_clk"; + clock-output-names = "gmac_clkin"; #clock-cells = <0>; }; diff --git a/arch/arm64/boot/dts/rockchip/rk3399-roc-pc.dtsi b/arch/arm64/boot/dts/rockchip/rk3399-roc-pc.dtsi index e7a459fa4322..20309076dbac 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399-roc-pc.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3399-roc-pc.dtsi @@ -74,14 +74,14 @@ label = "red:diy"; gpios = <&gpio0 RK_PB5 GPIO_ACTIVE_HIGH>; default-state = "off"; - linux,default-trigger = "mmc1"; + linux,default-trigger = "mmc2"; }; yellow_led: led-2 { label = "yellow:yellow-led"; gpios = <&gpio0 RK_PA2 GPIO_ACTIVE_HIGH>; default-state = "off"; - linux,default-trigger = "mmc0"; + linux,default-trigger = "mmc1"; }; }; diff --git a/arch/arm64/boot/dts/rockchip/rk3399.dtsi b/arch/arm64/boot/dts/rockchip/rk3399.dtsi index ada724b12f01..7a9a7aca86c6 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3399.dtsi @@ -29,6 +29,9 @@ i2c6 = &i2c6; i2c7 = &i2c7; i2c8 = &i2c8; + mmc0 = &sdio0; + mmc1 = &sdmmc; + mmc2 = &sdhci; serial0 = &uart0; serial1 = &uart1; serial2 = &uart2; diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig index 17a2df6a263e..5e7d86cf5dfa 100644 --- a/arch/arm64/configs/defconfig +++ b/arch/arm64/configs/defconfig @@ -500,6 +500,7 @@ CONFIG_GPIO_ALTERA=m CONFIG_GPIO_DWAPB=y CONFIG_GPIO_MB86S7X=y CONFIG_GPIO_MPC8XXX=y +CONFIG_GPIO_MXC=y CONFIG_GPIO_PL061=y CONFIG_GPIO_RCAR=y CONFIG_GPIO_UNIPHIER=y @@ -1081,6 +1082,7 @@ CONFIG_CRYPTO_DEV_CCREE=m CONFIG_CRYPTO_DEV_HISI_SEC2=m CONFIG_CRYPTO_DEV_HISI_ZIP=m CONFIG_CRYPTO_DEV_HISI_HPRE=m +CONFIG_CRYPTO_DEV_HISI_TRNG=m CONFIG_CMA_SIZE_MBYTES=32 CONFIG_PRINTK_TIME=y CONFIG_DEBUG_INFO=y diff --git a/arch/arm64/crypto/aes-glue.c b/arch/arm64/crypto/aes-glue.c index 395bbf64b2ab..34b8a89197be 100644 --- a/arch/arm64/crypto/aes-glue.c +++ b/arch/arm64/crypto/aes-glue.c @@ -10,7 +10,7 @@ #include <asm/simd.h> #include <crypto/aes.h> #include <crypto/ctr.h> -#include <crypto/sha.h> +#include <crypto/sha2.h> #include <crypto/internal/hash.h> #include <crypto/internal/simd.h> #include <crypto/internal/skcipher.h> diff --git a/arch/arm64/crypto/chacha-neon-core.S b/arch/arm64/crypto/chacha-neon-core.S index e90386a7db8e..b70ac76f2610 100644 --- a/arch/arm64/crypto/chacha-neon-core.S +++ b/arch/arm64/crypto/chacha-neon-core.S @@ -195,7 +195,6 @@ SYM_FUNC_START(chacha_4block_xor_neon) adr_l x10, .Lpermute and x5, x4, #63 add x10, x10, x5 - add x11, x10, #64 // // This function encrypts four consecutive ChaCha blocks by loading @@ -645,11 +644,11 @@ CPU_BE( rev a15, a15 ) zip2 v31.4s, v14.4s, v15.4s eor a15, a15, w9 - mov x3, #64 + add x3, x2, x4 + sub x3, x3, #128 // start of last block + subs x5, x4, #128 - add x6, x5, x2 - csel x3, x3, xzr, ge - csel x2, x2, x6, ge + csel x2, x2, x3, ge // interleave 64-bit words in state n, n+2 zip1 v0.2d, v16.2d, v18.2d @@ -658,13 +657,10 @@ CPU_BE( rev a15, a15 ) zip1 v8.2d, v17.2d, v19.2d zip2 v12.2d, v17.2d, v19.2d stp a2, a3, [x1, #-56] - ld1 {v16.16b-v19.16b}, [x2], x3 subs x6, x4, #192 - ccmp x3, xzr, #4, lt - add x7, x6, x2 - csel x3, x3, xzr, eq - csel x2, x2, x7, eq + ld1 {v16.16b-v19.16b}, [x2], #64 + csel x2, x2, x3, ge zip1 v1.2d, v20.2d, v22.2d zip2 v5.2d, v20.2d, v22.2d @@ -672,13 +668,10 @@ CPU_BE( rev a15, a15 ) zip1 v9.2d, v21.2d, v23.2d zip2 v13.2d, v21.2d, v23.2d stp a6, a7, [x1, #-40] - ld1 {v20.16b-v23.16b}, [x2], x3 subs x7, x4, #256 - ccmp x3, xzr, #4, lt - add x8, x7, x2 - csel x3, x3, xzr, eq - csel x2, x2, x8, eq + ld1 {v20.16b-v23.16b}, [x2], #64 + csel x2, x2, x3, ge zip1 v2.2d, v24.2d, v26.2d zip2 v6.2d, v24.2d, v26.2d @@ -686,12 +679,10 @@ CPU_BE( rev a15, a15 ) zip1 v10.2d, v25.2d, v27.2d zip2 v14.2d, v25.2d, v27.2d stp a10, a11, [x1, #-24] - ld1 {v24.16b-v27.16b}, [x2], x3 subs x8, x4, #320 - ccmp x3, xzr, #4, lt - add x9, x8, x2 - csel x2, x2, x9, eq + ld1 {v24.16b-v27.16b}, [x2], #64 + csel x2, x2, x3, ge zip1 v3.2d, v28.2d, v30.2d zip2 v7.2d, v28.2d, v30.2d @@ -699,151 +690,105 @@ CPU_BE( rev a15, a15 ) zip1 v11.2d, v29.2d, v31.2d zip2 v15.2d, v29.2d, v31.2d stp a14, a15, [x1, #-8] + + tbnz x5, #63, .Lt128 ld1 {v28.16b-v31.16b}, [x2] // xor with corresponding input, write to output - tbnz x5, #63, 0f eor v16.16b, v16.16b, v0.16b eor v17.16b, v17.16b, v1.16b eor v18.16b, v18.16b, v2.16b eor v19.16b, v19.16b, v3.16b - st1 {v16.16b-v19.16b}, [x1], #64 - cbz x5, .Lout - tbnz x6, #63, 1f + tbnz x6, #63, .Lt192 + eor v20.16b, v20.16b, v4.16b eor v21.16b, v21.16b, v5.16b eor v22.16b, v22.16b, v6.16b eor v23.16b, v23.16b, v7.16b - st1 {v20.16b-v23.16b}, [x1], #64 - cbz x6, .Lout - tbnz x7, #63, 2f + st1 {v16.16b-v19.16b}, [x1], #64 + tbnz x7, #63, .Lt256 + eor v24.16b, v24.16b, v8.16b eor v25.16b, v25.16b, v9.16b eor v26.16b, v26.16b, v10.16b eor v27.16b, v27.16b, v11.16b - st1 {v24.16b-v27.16b}, [x1], #64 - cbz x7, .Lout - tbnz x8, #63, 3f + st1 {v20.16b-v23.16b}, [x1], #64 + tbnz x8, #63, .Lt320 + eor v28.16b, v28.16b, v12.16b eor v29.16b, v29.16b, v13.16b eor v30.16b, v30.16b, v14.16b eor v31.16b, v31.16b, v15.16b + + st1 {v24.16b-v27.16b}, [x1], #64 st1 {v28.16b-v31.16b}, [x1] .Lout: frame_pop ret - // fewer than 128 bytes of in/output -0: ld1 {v8.16b}, [x10] - ld1 {v9.16b}, [x11] - movi v10.16b, #16 - sub x2, x1, #64 - add x1, x1, x5 - ld1 {v16.16b-v19.16b}, [x2] - tbl v4.16b, {v0.16b-v3.16b}, v8.16b - tbx v20.16b, {v16.16b-v19.16b}, v9.16b - add v8.16b, v8.16b, v10.16b - add v9.16b, v9.16b, v10.16b - tbl v5.16b, {v0.16b-v3.16b}, v8.16b - tbx v21.16b, {v16.16b-v19.16b}, v9.16b - add v8.16b, v8.16b, v10.16b - add v9.16b, v9.16b, v10.16b - tbl v6.16b, {v0.16b-v3.16b}, v8.16b - tbx v22.16b, {v16.16b-v19.16b}, v9.16b - add v8.16b, v8.16b, v10.16b - add v9.16b, v9.16b, v10.16b - tbl v7.16b, {v0.16b-v3.16b}, v8.16b - tbx v23.16b, {v16.16b-v19.16b}, v9.16b - - eor v20.16b, v20.16b, v4.16b - eor v21.16b, v21.16b, v5.16b - eor v22.16b, v22.16b, v6.16b - eor v23.16b, v23.16b, v7.16b - st1 {v20.16b-v23.16b}, [x1] - b .Lout - // fewer than 192 bytes of in/output -1: ld1 {v8.16b}, [x10] - ld1 {v9.16b}, [x11] - movi v10.16b, #16 - add x1, x1, x6 - tbl v0.16b, {v4.16b-v7.16b}, v8.16b - tbx v20.16b, {v16.16b-v19.16b}, v9.16b - add v8.16b, v8.16b, v10.16b - add v9.16b, v9.16b, v10.16b - tbl v1.16b, {v4.16b-v7.16b}, v8.16b - tbx v21.16b, {v16.16b-v19.16b}, v9.16b - add v8.16b, v8.16b, v10.16b - add v9.16b, v9.16b, v10.16b - tbl v2.16b, {v4.16b-v7.16b}, v8.16b - tbx v22.16b, {v16.16b-v19.16b}, v9.16b - add v8.16b, v8.16b, v10.16b - add v9.16b, v9.16b, v10.16b - tbl v3.16b, {v4.16b-v7.16b}, v8.16b - tbx v23.16b, {v16.16b-v19.16b}, v9.16b - - eor v20.16b, v20.16b, v0.16b - eor v21.16b, v21.16b, v1.16b - eor v22.16b, v22.16b, v2.16b - eor v23.16b, v23.16b, v3.16b - st1 {v20.16b-v23.16b}, [x1] +.Lt192: cbz x5, 1f // exactly 128 bytes? + ld1 {v28.16b-v31.16b}, [x10] + add x5, x5, x1 + tbl v28.16b, {v4.16b-v7.16b}, v28.16b + tbl v29.16b, {v4.16b-v7.16b}, v29.16b + tbl v30.16b, {v4.16b-v7.16b}, v30.16b + tbl v31.16b, {v4.16b-v7.16b}, v31.16b + +0: eor v20.16b, v20.16b, v28.16b + eor v21.16b, v21.16b, v29.16b + eor v22.16b, v22.16b, v30.16b + eor v23.16b, v23.16b, v31.16b + st1 {v20.16b-v23.16b}, [x5] // overlapping stores +1: st1 {v16.16b-v19.16b}, [x1] b .Lout + // fewer than 128 bytes of in/output +.Lt128: ld1 {v28.16b-v31.16b}, [x10] + add x5, x5, x1 + sub x1, x1, #64 + tbl v28.16b, {v0.16b-v3.16b}, v28.16b + tbl v29.16b, {v0.16b-v3.16b}, v29.16b + tbl v30.16b, {v0.16b-v3.16b}, v30.16b + tbl v31.16b, {v0.16b-v3.16b}, v31.16b + ld1 {v16.16b-v19.16b}, [x1] // reload first output block + b 0b + // fewer than 256 bytes of in/output -2: ld1 {v4.16b}, [x10] - ld1 {v5.16b}, [x11] - movi v6.16b, #16 - add x1, x1, x7 +.Lt256: cbz x6, 2f // exactly 192 bytes? + ld1 {v4.16b-v7.16b}, [x10] + add x6, x6, x1 tbl v0.16b, {v8.16b-v11.16b}, v4.16b - tbx v24.16b, {v20.16b-v23.16b}, v5.16b - add v4.16b, v4.16b, v6.16b - add v5.16b, v5.16b, v6.16b - tbl v1.16b, {v8.16b-v11.16b}, v4.16b - tbx v25.16b, {v20.16b-v23.16b}, v5.16b - add v4.16b, v4.16b, v6.16b - add v5.16b, v5.16b, v6.16b - tbl v2.16b, {v8.16b-v11.16b}, v4.16b - tbx v26.16b, {v20.16b-v23.16b}, v5.16b - add v4.16b, v4.16b, v6.16b - add v5.16b, v5.16b, v6.16b - tbl v3.16b, {v8.16b-v11.16b}, v4.16b - tbx v27.16b, {v20.16b-v23.16b}, v5.16b - - eor v24.16b, v24.16b, v0.16b - eor v25.16b, v25.16b, v1.16b - eor v26.16b, v26.16b, v2.16b - eor v27.16b, v27.16b, v3.16b - st1 {v24.16b-v27.16b}, [x1] + tbl v1.16b, {v8.16b-v11.16b}, v5.16b + tbl v2.16b, {v8.16b-v11.16b}, v6.16b + tbl v3.16b, {v8.16b-v11.16b}, v7.16b + + eor v28.16b, v28.16b, v0.16b + eor v29.16b, v29.16b, v1.16b + eor v30.16b, v30.16b, v2.16b + eor v31.16b, v31.16b, v3.16b + st1 {v28.16b-v31.16b}, [x6] // overlapping stores +2: st1 {v20.16b-v23.16b}, [x1] b .Lout // fewer than 320 bytes of in/output -3: ld1 {v4.16b}, [x10] - ld1 {v5.16b}, [x11] - movi v6.16b, #16 - add x1, x1, x8 +.Lt320: cbz x7, 3f // exactly 256 bytes? + ld1 {v4.16b-v7.16b}, [x10] + add x7, x7, x1 tbl v0.16b, {v12.16b-v15.16b}, v4.16b - tbx v28.16b, {v24.16b-v27.16b}, v5.16b - add v4.16b, v4.16b, v6.16b - add v5.16b, v5.16b, v6.16b - tbl v1.16b, {v12.16b-v15.16b}, v4.16b - tbx v29.16b, {v24.16b-v27.16b}, v5.16b - add v4.16b, v4.16b, v6.16b - add v5.16b, v5.16b, v6.16b - tbl v2.16b, {v12.16b-v15.16b}, v4.16b - tbx v30.16b, {v24.16b-v27.16b}, v5.16b - add v4.16b, v4.16b, v6.16b - add v5.16b, v5.16b, v6.16b - tbl v3.16b, {v12.16b-v15.16b}, v4.16b - tbx v31.16b, {v24.16b-v27.16b}, v5.16b + tbl v1.16b, {v12.16b-v15.16b}, v5.16b + tbl v2.16b, {v12.16b-v15.16b}, v6.16b + tbl v3.16b, {v12.16b-v15.16b}, v7.16b eor v28.16b, v28.16b, v0.16b eor v29.16b, v29.16b, v1.16b eor v30.16b, v30.16b, v2.16b eor v31.16b, v31.16b, v3.16b - st1 {v28.16b-v31.16b}, [x1] + st1 {v28.16b-v31.16b}, [x7] // overlapping stores +3: st1 {v24.16b-v27.16b}, [x1] b .Lout SYM_FUNC_END(chacha_4block_xor_neon) @@ -851,7 +796,7 @@ SYM_FUNC_END(chacha_4block_xor_neon) .align L1_CACHE_SHIFT .Lpermute: .set .Li, 0 - .rept 192 + .rept 128 .byte (.Li - 64) .set .Li, .Li + 1 .endr diff --git a/arch/arm64/crypto/ghash-ce-core.S b/arch/arm64/crypto/ghash-ce-core.S index 6b958dcdf136..7868330dd54e 100644 --- a/arch/arm64/crypto/ghash-ce-core.S +++ b/arch/arm64/crypto/ghash-ce-core.S @@ -544,7 +544,22 @@ CPU_LE( rev w8, w8 ) ext XL.16b, XL.16b, XL.16b, #8 rev64 XL.16b, XL.16b eor XL.16b, XL.16b, KS0.16b + + .if \enc == 1 st1 {XL.16b}, [x10] // store tag + .else + ldp x11, x12, [sp, #40] // load tag pointer and authsize + adr_l x17, .Lpermute_table + ld1 {KS0.16b}, [x11] // load supplied tag + add x17, x17, x12 + ld1 {KS1.16b}, [x17] // load permute vector + + cmeq XL.16b, XL.16b, KS0.16b // compare tags + mvn XL.16b, XL.16b // -1 for fail, 0 for pass + tbl XL.16b, {XL.16b}, KS1.16b // keep authsize bytes only + sminv b0, XL.16b // signed minimum across XL + smov w0, v0.b[0] // return b0 + .endif 4: ldp x29, x30, [sp], #32 ret diff --git a/arch/arm64/crypto/ghash-ce-glue.c b/arch/arm64/crypto/ghash-ce-glue.c index 8536008e3e35..720cd3a58da3 100644 --- a/arch/arm64/crypto/ghash-ce-glue.c +++ b/arch/arm64/crypto/ghash-ce-glue.c @@ -55,10 +55,10 @@ asmlinkage void pmull_ghash_update_p8(int blocks, u64 dg[], const char *src, asmlinkage void pmull_gcm_encrypt(int bytes, u8 dst[], const u8 src[], u64 const h[][2], u64 dg[], u8 ctr[], u32 const rk[], int rounds, u8 tag[]); - -asmlinkage void pmull_gcm_decrypt(int bytes, u8 dst[], const u8 src[], - u64 const h[][2], u64 dg[], u8 ctr[], - u32 const rk[], int rounds, u8 tag[]); +asmlinkage int pmull_gcm_decrypt(int bytes, u8 dst[], const u8 src[], + u64 const h[][2], u64 dg[], u8 ctr[], + u32 const rk[], int rounds, const u8 l[], + const u8 tag[], u64 authsize); static int ghash_init(struct shash_desc *desc) { @@ -168,7 +168,7 @@ static int ghash_final(struct shash_desc *desc, u8 *dst) put_unaligned_be64(ctx->digest[1], dst); put_unaligned_be64(ctx->digest[0], dst + 8); - *ctx = (struct ghash_desc_ctx){}; + memzero_explicit(ctx, sizeof(*ctx)); return 0; } @@ -458,6 +458,7 @@ static int gcm_decrypt(struct aead_request *req) unsigned int authsize = crypto_aead_authsize(aead); int nrounds = num_rounds(&ctx->aes_key); struct skcipher_walk walk; + u8 otag[AES_BLOCK_SIZE]; u8 buf[AES_BLOCK_SIZE]; u8 iv[AES_BLOCK_SIZE]; u64 dg[2] = {}; @@ -474,9 +475,15 @@ static int gcm_decrypt(struct aead_request *req) memcpy(iv, req->iv, GCM_IV_SIZE); put_unaligned_be32(2, iv + GCM_IV_SIZE); + scatterwalk_map_and_copy(otag, req->src, + req->assoclen + req->cryptlen - authsize, + authsize, 0); + err = skcipher_walk_aead_decrypt(&walk, req, false); if (likely(crypto_simd_usable())) { + int ret; + do { const u8 *src = walk.src.virt.addr; u8 *dst = walk.dst.virt.addr; @@ -493,9 +500,10 @@ static int gcm_decrypt(struct aead_request *req) } kernel_neon_begin(); - pmull_gcm_decrypt(nbytes, dst, src, ctx->ghash_key.h, - dg, iv, ctx->aes_key.key_enc, nrounds, - tag); + ret = pmull_gcm_decrypt(nbytes, dst, src, + ctx->ghash_key.h, + dg, iv, ctx->aes_key.key_enc, + nrounds, tag, otag, authsize); kernel_neon_end(); if (unlikely(!nbytes)) @@ -507,6 +515,11 @@ static int gcm_decrypt(struct aead_request *req) err = skcipher_walk_done(&walk, walk.nbytes - nbytes); } while (walk.nbytes); + + if (err) + return err; + if (ret) + return -EBADMSG; } else { while (walk.nbytes >= AES_BLOCK_SIZE) { int blocks = walk.nbytes / AES_BLOCK_SIZE; @@ -548,23 +561,20 @@ static int gcm_decrypt(struct aead_request *req) err = skcipher_walk_done(&walk, 0); } + if (err) + return err; + put_unaligned_be64(dg[1], tag); put_unaligned_be64(dg[0], tag + 8); put_unaligned_be32(1, iv + GCM_IV_SIZE); aes_encrypt(&ctx->aes_key, iv, iv); crypto_xor(tag, iv, AES_BLOCK_SIZE); - } - - if (err) - return err; - /* compare calculated auth tag with the stored one */ - scatterwalk_map_and_copy(buf, req->src, - req->assoclen + req->cryptlen - authsize, - authsize, 0); - - if (crypto_memneq(tag, buf, authsize)) - return -EBADMSG; + if (crypto_memneq(tag, otag, authsize)) { + memzero_explicit(tag, AES_BLOCK_SIZE); + return -EBADMSG; + } + } return 0; } diff --git a/arch/arm64/crypto/poly1305-armv8.pl b/arch/arm64/crypto/poly1305-armv8.pl index 6e5576d19af8..cbc980fb02e3 100644 --- a/arch/arm64/crypto/poly1305-armv8.pl +++ b/arch/arm64/crypto/poly1305-armv8.pl @@ -840,7 +840,6 @@ poly1305_blocks_neon: ldp d14,d15,[sp,#64] addp $ACC2,$ACC2,$ACC2 ldr x30,[sp,#8] - .inst 0xd50323bf // autiasp //////////////////////////////////////////////////////////////// // lazy reduction, but without narrowing @@ -882,6 +881,7 @@ poly1305_blocks_neon: str x4,[$ctx,#8] // set is_base2_26 ldr x29,[sp],#80 + .inst 0xd50323bf // autiasp ret .size poly1305_blocks_neon,.-poly1305_blocks_neon diff --git a/arch/arm64/crypto/poly1305-core.S_shipped b/arch/arm64/crypto/poly1305-core.S_shipped index 8d1c4e420ccd..fb2822abf63a 100644 --- a/arch/arm64/crypto/poly1305-core.S_shipped +++ b/arch/arm64/crypto/poly1305-core.S_shipped @@ -779,7 +779,6 @@ poly1305_blocks_neon: ldp d14,d15,[sp,#64] addp v21.2d,v21.2d,v21.2d ldr x30,[sp,#8] - .inst 0xd50323bf // autiasp //////////////////////////////////////////////////////////////// // lazy reduction, but without narrowing @@ -821,6 +820,7 @@ poly1305_blocks_neon: str x4,[x0,#8] // set is_base2_26 ldr x29,[sp],#80 + .inst 0xd50323bf // autiasp ret .size poly1305_blocks_neon,.-poly1305_blocks_neon diff --git a/arch/arm64/crypto/poly1305-glue.c b/arch/arm64/crypto/poly1305-glue.c index f33ada70c4ed..683de671741a 100644 --- a/arch/arm64/crypto/poly1305-glue.c +++ b/arch/arm64/crypto/poly1305-glue.c @@ -177,7 +177,7 @@ void poly1305_final_arch(struct poly1305_desc_ctx *dctx, u8 *dst) } poly1305_emit(&dctx->h, dst, dctx->s); - *dctx = (struct poly1305_desc_ctx){}; + memzero_explicit(dctx, sizeof(*dctx)); } EXPORT_SYMBOL(poly1305_final_arch); diff --git a/arch/arm64/crypto/sha1-ce-glue.c b/arch/arm64/crypto/sha1-ce-glue.c index c63b99211db3..c93121bcfdeb 100644 --- a/arch/arm64/crypto/sha1-ce-glue.c +++ b/arch/arm64/crypto/sha1-ce-glue.c @@ -10,7 +10,7 @@ #include <asm/unaligned.h> #include <crypto/internal/hash.h> #include <crypto/internal/simd.h> -#include <crypto/sha.h> +#include <crypto/sha1.h> #include <crypto/sha1_base.h> #include <linux/cpufeature.h> #include <linux/crypto.h> diff --git a/arch/arm64/crypto/sha2-ce-glue.c b/arch/arm64/crypto/sha2-ce-glue.c index 5e956d7582a5..31ba3da5e61b 100644 --- a/arch/arm64/crypto/sha2-ce-glue.c +++ b/arch/arm64/crypto/sha2-ce-glue.c @@ -10,7 +10,7 @@ #include <asm/unaligned.h> #include <crypto/internal/hash.h> #include <crypto/internal/simd.h> -#include <crypto/sha.h> +#include <crypto/sha2.h> #include <crypto/sha256_base.h> #include <linux/cpufeature.h> #include <linux/crypto.h> diff --git a/arch/arm64/crypto/sha256-glue.c b/arch/arm64/crypto/sha256-glue.c index 77bc6e72abae..9462f6088b3f 100644 --- a/arch/arm64/crypto/sha256-glue.c +++ b/arch/arm64/crypto/sha256-glue.c @@ -10,7 +10,7 @@ #include <asm/simd.h> #include <crypto/internal/hash.h> #include <crypto/internal/simd.h> -#include <crypto/sha.h> +#include <crypto/sha2.h> #include <crypto/sha256_base.h> #include <linux/types.h> #include <linux/string.h> diff --git a/arch/arm64/crypto/sha3-ce-glue.c b/arch/arm64/crypto/sha3-ce-glue.c index 9a4bbfc45f40..e5a2936f0886 100644 --- a/arch/arm64/crypto/sha3-ce-glue.c +++ b/arch/arm64/crypto/sha3-ce-glue.c @@ -94,7 +94,7 @@ static int sha3_final(struct shash_desc *desc, u8 *out) if (digest_size & 4) put_unaligned_le32(sctx->st[i], (__le32 *)digest); - *sctx = (struct sha3_state){}; + memzero_explicit(sctx, sizeof(*sctx)); return 0; } diff --git a/arch/arm64/crypto/sha512-ce-glue.c b/arch/arm64/crypto/sha512-ce-glue.c index dc890a719f54..faa83f6cf376 100644 --- a/arch/arm64/crypto/sha512-ce-glue.c +++ b/arch/arm64/crypto/sha512-ce-glue.c @@ -14,7 +14,7 @@ #include <asm/unaligned.h> #include <crypto/internal/hash.h> #include <crypto/internal/simd.h> -#include <crypto/sha.h> +#include <crypto/sha2.h> #include <crypto/sha512_base.h> #include <linux/cpufeature.h> #include <linux/crypto.h> diff --git a/arch/arm64/crypto/sha512-glue.c b/arch/arm64/crypto/sha512-glue.c index 370ccb29602f..2acff1c7df5d 100644 --- a/arch/arm64/crypto/sha512-glue.c +++ b/arch/arm64/crypto/sha512-glue.c @@ -8,7 +8,7 @@ #include <crypto/internal/hash.h> #include <linux/types.h> #include <linux/string.h> -#include <crypto/sha.h> +#include <crypto/sha2.h> #include <crypto/sha512_base.h> #include <asm/neon.h> diff --git a/arch/arm64/include/asm/brk-imm.h b/arch/arm64/include/asm/brk-imm.h index e3d47b52161d..ec7720dbe2c8 100644 --- a/arch/arm64/include/asm/brk-imm.h +++ b/arch/arm64/include/asm/brk-imm.h @@ -10,6 +10,7 @@ * #imm16 values used for BRK instruction generation * 0x004: for installing kprobes * 0x005: for installing uprobes + * 0x006: for kprobe software single-step * Allowed values for kgdb are 0x400 - 0x7ff * 0x100: for triggering a fault on purpose (reserved) * 0x400: for dynamic BRK instruction @@ -19,6 +20,7 @@ */ #define KPROBES_BRK_IMM 0x004 #define UPROBES_BRK_IMM 0x005 +#define KPROBES_BRK_SS_IMM 0x006 #define FAULT_BRK_IMM 0x100 #define KGDB_DYN_DBG_BRK_IMM 0x400 #define KGDB_COMPILED_DBG_BRK_IMM 0x401 diff --git a/arch/arm64/include/asm/cache.h b/arch/arm64/include/asm/cache.h index 0ac3e06a2118..63d43b5f82f6 100644 --- a/arch/arm64/include/asm/cache.h +++ b/arch/arm64/include/asm/cache.h @@ -24,6 +24,7 @@ #define CTR_L1IP(ctr) (((ctr) >> CTR_L1IP_SHIFT) & CTR_L1IP_MASK) #define ICACHE_POLICY_VPIPT 0 +#define ICACHE_POLICY_RESERVED 1 #define ICACHE_POLICY_VIPT 2 #define ICACHE_POLICY_PIPT 3 diff --git a/arch/arm64/include/asm/cpucaps.h b/arch/arm64/include/asm/cpucaps.h index 42868dbd29fd..e7d98997c09c 100644 --- a/arch/arm64/include/asm/cpucaps.h +++ b/arch/arm64/include/asm/cpucaps.h @@ -65,7 +65,8 @@ #define ARM64_HAS_ARMv8_4_TTL 55 #define ARM64_HAS_TLB_RANGE 56 #define ARM64_MTE 57 +#define ARM64_WORKAROUND_1508412 58 -#define ARM64_NCAPS 58 +#define ARM64_NCAPS 59 #endif /* __ASM_CPUCAPS_H */ diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index f7e7144af174..da250e4741bd 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -268,6 +268,8 @@ extern struct arm64_ftr_reg arm64_ftr_reg_ctrel0; /* * CPU feature detected at boot time based on feature of one or more CPUs. * All possible conflicts for a late CPU are ignored. + * NOTE: this means that a late CPU with the feature will *not* cause the + * capability to be advertised by cpus_have_*cap()! */ #define ARM64_CPUCAP_WEAK_LOCAL_CPU_FEATURE \ (ARM64_CPUCAP_SCOPE_LOCAL_CPU | \ @@ -375,6 +377,23 @@ cpucap_multi_entry_cap_matches(const struct arm64_cpu_capabilities *entry, return false; } +static __always_inline bool is_vhe_hyp_code(void) +{ + /* Only defined for code run in VHE hyp context */ + return __is_defined(__KVM_VHE_HYPERVISOR__); +} + +static __always_inline bool is_nvhe_hyp_code(void) +{ + /* Only defined for code run in NVHE hyp context */ + return __is_defined(__KVM_NVHE_HYPERVISOR__); +} + +static __always_inline bool is_hyp_code(void) +{ + return is_vhe_hyp_code() || is_nvhe_hyp_code(); +} + extern DECLARE_BITMAP(cpu_hwcaps, ARM64_NCAPS); extern struct static_key_false cpu_hwcap_keys[ARM64_NCAPS]; extern struct static_key_false arm64_const_caps_ready; @@ -428,35 +447,40 @@ static __always_inline bool __cpus_have_const_cap(int num) } /* - * Test for a capability, possibly with a runtime check. + * Test for a capability without a runtime check. * - * Before capabilities are finalized, this behaves as cpus_have_cap(). + * Before capabilities are finalized, this will BUG(). * After capabilities are finalized, this is patched to avoid a runtime check. * * @num must be a compile-time constant. */ -static __always_inline bool cpus_have_const_cap(int num) +static __always_inline bool cpus_have_final_cap(int num) { if (system_capabilities_finalized()) return __cpus_have_const_cap(num); else - return cpus_have_cap(num); + BUG(); } /* - * Test for a capability without a runtime check. + * Test for a capability, possibly with a runtime check for non-hyp code. * - * Before capabilities are finalized, this will BUG(). + * For hyp code, this behaves the same as cpus_have_final_cap(). + * + * For non-hyp code: + * Before capabilities are finalized, this behaves as cpus_have_cap(). * After capabilities are finalized, this is patched to avoid a runtime check. * * @num must be a compile-time constant. */ -static __always_inline bool cpus_have_final_cap(int num) +static __always_inline bool cpus_have_const_cap(int num) { - if (system_capabilities_finalized()) + if (is_hyp_code()) + return cpus_have_final_cap(num); + else if (system_capabilities_finalized()) return __cpus_have_const_cap(num); else - BUG(); + return cpus_have_cap(num); } static inline void cpus_set_cap(unsigned int num) diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h index 7219cddeba66..ef5b040dee44 100644 --- a/arch/arm64/include/asm/cputype.h +++ b/arch/arm64/include/asm/cputype.h @@ -71,6 +71,7 @@ #define ARM_CPU_PART_CORTEX_A55 0xD05 #define ARM_CPU_PART_CORTEX_A76 0xD0B #define ARM_CPU_PART_NEOVERSE_N1 0xD0C +#define ARM_CPU_PART_CORTEX_A77 0xD0D #define APM_CPU_PART_POTENZA 0x000 @@ -85,6 +86,8 @@ #define QCOM_CPU_PART_FALKOR_V1 0x800 #define QCOM_CPU_PART_FALKOR 0xC00 #define QCOM_CPU_PART_KRYO 0x200 +#define QCOM_CPU_PART_KRYO_2XX_GOLD 0x800 +#define QCOM_CPU_PART_KRYO_2XX_SILVER 0x801 #define QCOM_CPU_PART_KRYO_3XX_SILVER 0x803 #define QCOM_CPU_PART_KRYO_4XX_GOLD 0x804 #define QCOM_CPU_PART_KRYO_4XX_SILVER 0x805 @@ -105,6 +108,7 @@ #define MIDR_CORTEX_A55 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A55) #define MIDR_CORTEX_A76 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A76) #define MIDR_NEOVERSE_N1 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_NEOVERSE_N1) +#define MIDR_CORTEX_A77 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A77) #define MIDR_THUNDERX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX) #define MIDR_THUNDERX_81XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX_81XX) #define MIDR_THUNDERX_83XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX_83XX) @@ -114,6 +118,8 @@ #define MIDR_QCOM_FALKOR_V1 MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_FALKOR_V1) #define MIDR_QCOM_FALKOR MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_FALKOR) #define MIDR_QCOM_KRYO MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_KRYO) +#define MIDR_QCOM_KRYO_2XX_GOLD MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_KRYO_2XX_GOLD) +#define MIDR_QCOM_KRYO_2XX_SILVER MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_KRYO_2XX_SILVER) #define MIDR_QCOM_KRYO_3XX_SILVER MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_KRYO_3XX_SILVER) #define MIDR_QCOM_KRYO_4XX_GOLD MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_KRYO_4XX_GOLD) #define MIDR_QCOM_KRYO_4XX_SILVER MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_KRYO_4XX_SILVER) diff --git a/arch/arm64/include/asm/daifflags.h b/arch/arm64/include/asm/daifflags.h index ec213b4a1650..1c26d7baa67f 100644 --- a/arch/arm64/include/asm/daifflags.h +++ b/arch/arm64/include/asm/daifflags.h @@ -128,6 +128,9 @@ static inline void local_daif_inherit(struct pt_regs *regs) { unsigned long flags = regs->pstate & DAIF_MASK; + if (interrupts_enabled(regs)) + trace_hardirqs_on(); + /* * We can't use local_daif_restore(regs->pstate) here as * system_has_prio_mask_debugging() won't restore the I bit if it can diff --git a/arch/arm64/include/asm/debug-monitors.h b/arch/arm64/include/asm/debug-monitors.h index 0b298f48f5bf..657c921fd784 100644 --- a/arch/arm64/include/asm/debug-monitors.h +++ b/arch/arm64/include/asm/debug-monitors.h @@ -53,6 +53,7 @@ /* kprobes BRK opcodes with ESR encoding */ #define BRK64_OPCODE_KPROBES (AARCH64_BREAK_MON | (KPROBES_BRK_IMM << 5)) +#define BRK64_OPCODE_KPROBES_SS (AARCH64_BREAK_MON | (KPROBES_BRK_SS_IMM << 5)) /* uprobes BRK opcodes with ESR encoding */ #define BRK64_OPCODE_UPROBES (AARCH64_BREAK_MON | (UPROBES_BRK_IMM << 5)) diff --git a/arch/arm64/include/asm/esr.h b/arch/arm64/include/asm/esr.h index 22c81f1edda2..85a3e49f92f4 100644 --- a/arch/arm64/include/asm/esr.h +++ b/arch/arm64/include/asm/esr.h @@ -104,6 +104,7 @@ /* Shared ISS fault status code(IFSC/DFSC) for Data/Instruction aborts */ #define ESR_ELx_FSC (0x3F) #define ESR_ELx_FSC_TYPE (0x3C) +#define ESR_ELx_FSC_LEVEL (0x03) #define ESR_ELx_FSC_EXTABT (0x10) #define ESR_ELx_FSC_SERROR (0x11) #define ESR_ELx_FSC_ACCESS (0x08) diff --git a/arch/arm64/include/asm/exception.h b/arch/arm64/include/asm/exception.h index 99b9383cd036..0756191f44f6 100644 --- a/arch/arm64/include/asm/exception.h +++ b/arch/arm64/include/asm/exception.h @@ -31,7 +31,12 @@ static inline u32 disr_to_esr(u64 disr) return esr; } +asmlinkage void noinstr enter_el1_irq_or_nmi(struct pt_regs *regs); +asmlinkage void noinstr exit_el1_irq_or_nmi(struct pt_regs *regs); asmlinkage void enter_from_user_mode(void); +asmlinkage void exit_to_user_mode(void); +void arm64_enter_nmi(struct pt_regs *regs); +void arm64_exit_nmi(struct pt_regs *regs); void do_mem_abort(unsigned long addr, unsigned int esr, struct pt_regs *regs); void do_undefinstr(struct pt_regs *regs); void do_bti(struct pt_regs *regs); diff --git a/arch/arm64/include/asm/kprobes.h b/arch/arm64/include/asm/kprobes.h index 97e511d645a2..8699ce30f587 100644 --- a/arch/arm64/include/asm/kprobes.h +++ b/arch/arm64/include/asm/kprobes.h @@ -16,7 +16,7 @@ #include <linux/percpu.h> #define __ARCH_WANT_KPROBES_INSN_SLOT -#define MAX_INSN_SIZE 1 +#define MAX_INSN_SIZE 2 #define flush_insn_slot(p) do { } while (0) #define kretprobe_blacklist_size 0 diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h index 5ef2669ccd6c..00bc6f1234ba 100644 --- a/arch/arm64/include/asm/kvm_emulate.h +++ b/arch/arm64/include/asm/kvm_emulate.h @@ -350,6 +350,11 @@ static __always_inline u8 kvm_vcpu_trap_get_fault_type(const struct kvm_vcpu *vc return kvm_vcpu_get_esr(vcpu) & ESR_ELx_FSC_TYPE; } +static __always_inline u8 kvm_vcpu_trap_get_fault_level(const struct kvm_vcpu *vcpu) +{ + return kvm_vcpu_get_esr(vcpu) & ESR_ELx_FSC_LEVEL; +} + static __always_inline bool kvm_vcpu_abt_issea(const struct kvm_vcpu *vcpu) { switch (kvm_vcpu_trap_get_fault(vcpu)) { diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 0aecbab6a7fb..0cd9f0f75c13 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -118,6 +118,8 @@ struct kvm_arch { */ unsigned long *pmu_filter; unsigned int pmuver; + + u8 pfr0_csv2; }; struct kvm_vcpu_fault_info { @@ -239,6 +241,7 @@ enum vcpu_sysreg { #define cp14_DBGWCR0 (DBGWCR0_EL1 * 2) #define cp14_DBGWVR0 (DBGWVR0_EL1 * 2) #define cp14_DBGDCCINT (MDCCINT_EL1 * 2) +#define cp14_DBGVCR (DBGVCR32_EL2 * 2) #define NR_COPRO_REGS (NR_SYS_REGS * 2) diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index 4ff12a7adcfd..5628289b9d5e 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -115,8 +115,6 @@ extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)]; #define pte_valid(pte) (!!(pte_val(pte) & PTE_VALID)) #define pte_valid_not_user(pte) \ ((pte_val(pte) & (PTE_VALID | PTE_USER)) == PTE_VALID) -#define pte_valid_young(pte) \ - ((pte_val(pte) & (PTE_VALID | PTE_AF)) == (PTE_VALID | PTE_AF)) #define pte_valid_user(pte) \ ((pte_val(pte) & (PTE_VALID | PTE_USER)) == (PTE_VALID | PTE_USER)) @@ -124,9 +122,12 @@ extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)]; * Could the pte be present in the TLB? We must check mm_tlb_flush_pending * so that we don't erroneously return false for pages that have been * remapped as PROT_NONE but are yet to be flushed from the TLB. + * Note that we can't make any assumptions based on the state of the access + * flag, since ptep_clear_flush_young() elides a DSB when invalidating the + * TLB. */ #define pte_accessible(mm, pte) \ - (mm_tlb_flush_pending(mm) ? pte_present(pte) : pte_valid_young(pte)) + (mm_tlb_flush_pending(mm) ? pte_present(pte) : pte_valid(pte)) /* * p??_access_permitted() is true for valid user mappings (subject to the @@ -164,13 +165,6 @@ static inline pmd_t set_pmd_bit(pmd_t pmd, pgprot_t prot) return pmd; } -static inline pte_t pte_wrprotect(pte_t pte) -{ - pte = clear_pte_bit(pte, __pgprot(PTE_WRITE)); - pte = set_pte_bit(pte, __pgprot(PTE_RDONLY)); - return pte; -} - static inline pte_t pte_mkwrite(pte_t pte) { pte = set_pte_bit(pte, __pgprot(PTE_WRITE)); @@ -196,6 +190,20 @@ static inline pte_t pte_mkdirty(pte_t pte) return pte; } +static inline pte_t pte_wrprotect(pte_t pte) +{ + /* + * If hardware-dirty (PTE_WRITE/DBM bit set and PTE_RDONLY + * clear), set the PTE_DIRTY bit. + */ + if (pte_hw_dirty(pte)) + pte = pte_mkdirty(pte); + + pte = clear_pte_bit(pte, __pgprot(PTE_WRITE)); + pte = set_pte_bit(pte, __pgprot(PTE_RDONLY)); + return pte; +} + static inline pte_t pte_mkold(pte_t pte) { return clear_pte_bit(pte, __pgprot(PTE_AF)); @@ -845,12 +853,6 @@ static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addres pte = READ_ONCE(*ptep); do { old_pte = pte; - /* - * If hardware-dirty (PTE_WRITE/DBM bit set and PTE_RDONLY - * clear), set the PTE_DIRTY bit. - */ - if (pte_hw_dirty(pte)) - pte = pte_mkdirty(pte); pte = pte_wrprotect(pte); pte_val(pte) = cmpxchg_relaxed(&pte_val(*ptep), pte_val(old_pte), pte_val(pte)); diff --git a/arch/arm64/include/asm/probes.h b/arch/arm64/include/asm/probes.h index 4266262101fe..006946745352 100644 --- a/arch/arm64/include/asm/probes.h +++ b/arch/arm64/include/asm/probes.h @@ -7,6 +7,8 @@ #ifndef _ARM_PROBES_H #define _ARM_PROBES_H +#include <asm/insn.h> + typedef u32 probe_opcode_t; typedef void (probes_handler_t) (u32 opcode, long addr, struct pt_regs *); diff --git a/arch/arm64/include/asm/ptrace.h b/arch/arm64/include/asm/ptrace.h index 997cf8c8cd52..28c85b87b8cd 100644 --- a/arch/arm64/include/asm/ptrace.h +++ b/arch/arm64/include/asm/ptrace.h @@ -193,6 +193,10 @@ struct pt_regs { /* Only valid when ARM64_HAS_IRQ_PRIO_MASKING is enabled. */ u64 pmr_save; u64 stackframe[2]; + + /* Only valid for some EL1 exceptions. */ + u64 lockdep_hardirqs; + u64 exit_rcu; }; static inline bool in_syscall(struct pt_regs const *regs) diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index d52c1b3ce589..801861d05426 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -372,6 +372,8 @@ #define SYS_CONTEXTIDR_EL1 sys_reg(3, 0, 13, 0, 1) #define SYS_TPIDR_EL1 sys_reg(3, 0, 13, 0, 4) +#define SYS_SCXTNUM_EL1 sys_reg(3, 0, 13, 0, 7) + #define SYS_CNTKCTL_EL1 sys_reg(3, 0, 14, 1, 0) #define SYS_CCSIDR_EL1 sys_reg(3, 1, 0, 0, 0) @@ -404,6 +406,8 @@ #define SYS_TPIDR_EL0 sys_reg(3, 3, 13, 0, 2) #define SYS_TPIDRRO_EL0 sys_reg(3, 3, 13, 0, 3) +#define SYS_SCXTNUM_EL0 sys_reg(3, 3, 13, 0, 7) + /* Definitions for system register interface to AMU for ARMv8.4 onwards */ #define SYS_AM_EL0(crm, op2) sys_reg(3, 3, 13, (crm), (op2)) #define SYS_AMCR_EL0 SYS_AM_EL0(2, 0) @@ -983,7 +987,7 @@ #define SYS_TFSR_EL1_TF0_SHIFT 0 #define SYS_TFSR_EL1_TF1_SHIFT 1 #define SYS_TFSR_EL1_TF0 (UL(1) << SYS_TFSR_EL1_TF0_SHIFT) -#define SYS_TFSR_EL1_TF1 (UK(2) << SYS_TFSR_EL1_TF1_SHIFT) +#define SYS_TFSR_EL1_TF1 (UL(1) << SYS_TFSR_EL1_TF1_SHIFT) /* Safe value for MPIDR_EL1: Bit31:RES1, Bit30:U:0, Bit24:MT:0 */ #define SYS_MPIDR_SAFE_VAL (BIT(31)) @@ -1007,6 +1011,7 @@ #include <linux/build_bug.h> #include <linux/types.h> +#include <asm/alternative.h> #define __DEFINE_MRS_MSR_S_REGNUM \ " .irp num,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30\n" \ @@ -1095,6 +1100,14 @@ write_sysreg_s(__scs_new, sysreg); \ } while (0) +#define read_sysreg_par() ({ \ + u64 par; \ + asm(ALTERNATIVE("nop", "dmb sy", ARM64_WORKAROUND_1508412)); \ + par = read_sysreg(par_el1); \ + asm(ALTERNATIVE("nop", "dmb sy", ARM64_WORKAROUND_1508412)); \ + par; \ +}) + #endif #endif /* __ASM_SYSREG_H */ diff --git a/arch/arm64/include/asm/virt.h b/arch/arm64/include/asm/virt.h index 09977acc007d..6069be50baf9 100644 --- a/arch/arm64/include/asm/virt.h +++ b/arch/arm64/include/asm/virt.h @@ -86,13 +86,12 @@ static inline bool is_kernel_in_hyp_mode(void) static __always_inline bool has_vhe(void) { /* - * The following macros are defined for code specic to VHE/nVHE. - * If has_vhe() is inlined into those compilation units, it can - * be determined statically. Otherwise fall back to caps. + * Code only run in VHE/NVHE hyp context can assume VHE is present or + * absent. Otherwise fall back to caps. */ - if (__is_defined(__KVM_VHE_HYPERVISOR__)) + if (is_vhe_hyp_code()) return true; - else if (__is_defined(__KVM_NVHE_HYPERVISOR__)) + else if (is_nvhe_hyp_code()) return false; else return cpus_have_final_cap(ARM64_HAS_VIRT_HOST_EXTN); diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index 24d75af344b1..cafaf0da05b7 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -299,6 +299,8 @@ static const struct midr_range erratum_845719_list[] = { MIDR_REV_RANGE(MIDR_CORTEX_A53, 0, 0, 4), /* Brahma-B53 r0p[0] */ MIDR_REV(MIDR_BRAHMA_B53, 0, 0), + /* Kryo2XX Silver rAp4 */ + MIDR_REV(MIDR_QCOM_KRYO_2XX_SILVER, 0xa, 0x4), {}, }; #endif @@ -523,6 +525,16 @@ const struct arm64_cpu_capabilities arm64_errata[] = { .cpu_enable = cpu_enable_trap_ctr_access, }, #endif +#ifdef CONFIG_ARM64_ERRATUM_1508412 + { + /* we depend on the firmware portion for correctness */ + .desc = "ARM erratum 1508412 (kernel portion)", + .capability = ARM64_WORKAROUND_1508412, + ERRATA_MIDR_RANGE(MIDR_CORTEX_A77, + 0, 0, + 1, 0), + }, +#endif { } }; diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index dcc165b3fc04..6f36c4f62f69 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -1337,6 +1337,8 @@ static bool unmap_kernel_at_el0(const struct arm64_cpu_capabilities *entry, MIDR_ALL_VERSIONS(MIDR_CORTEX_A73), MIDR_ALL_VERSIONS(MIDR_HISI_TSV110), MIDR_ALL_VERSIONS(MIDR_NVIDIA_CARMEL), + MIDR_ALL_VERSIONS(MIDR_QCOM_KRYO_2XX_GOLD), + MIDR_ALL_VERSIONS(MIDR_QCOM_KRYO_2XX_SILVER), MIDR_ALL_VERSIONS(MIDR_QCOM_KRYO_3XX_SILVER), MIDR_ALL_VERSIONS(MIDR_QCOM_KRYO_4XX_SILVER), { /* sentinel */ } diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c index 6a7bb3729d60..77605aec25fe 100644 --- a/arch/arm64/kernel/cpuinfo.c +++ b/arch/arm64/kernel/cpuinfo.c @@ -34,10 +34,10 @@ DEFINE_PER_CPU(struct cpuinfo_arm64, cpu_data); static struct cpuinfo_arm64 boot_cpu_data; static const char *icache_policy_str[] = { - [0 ... ICACHE_POLICY_PIPT] = "RESERVED/UNKNOWN", + [ICACHE_POLICY_VPIPT] = "VPIPT", + [ICACHE_POLICY_RESERVED] = "RESERVED/UNKNOWN", [ICACHE_POLICY_VIPT] = "VIPT", [ICACHE_POLICY_PIPT] = "PIPT", - [ICACHE_POLICY_VPIPT] = "VPIPT", }; unsigned long __icache_flags; @@ -334,10 +334,11 @@ static void cpuinfo_detect_icache_policy(struct cpuinfo_arm64 *info) case ICACHE_POLICY_VPIPT: set_bit(ICACHEF_VPIPT, &__icache_flags); break; - default: + case ICACHE_POLICY_RESERVED: case ICACHE_POLICY_VIPT: /* Assume aliasing */ set_bit(ICACHEF_ALIASING, &__icache_flags); + break; } pr_info("Detected %s I-cache on CPU%d\n", icache_policy_str[l1ip], cpu); diff --git a/arch/arm64/kernel/efi-header.S b/arch/arm64/kernel/efi-header.S index df67c0f2a077..a71844fb923e 100644 --- a/arch/arm64/kernel/efi-header.S +++ b/arch/arm64/kernel/efi-header.S @@ -147,6 +147,6 @@ efi_debug_entry: * correctly at this alignment, we must ensure that .text is * placed at a 4k boundary in the Image to begin with. */ - .align 12 + .balign SEGMENT_ALIGN efi_header_end: .endm diff --git a/arch/arm64/kernel/entry-common.c b/arch/arm64/kernel/entry-common.c index 43d4c329775f..70e0a7591245 100644 --- a/arch/arm64/kernel/entry-common.c +++ b/arch/arm64/kernel/entry-common.c @@ -17,40 +17,164 @@ #include <asm/mmu.h> #include <asm/sysreg.h> -static void notrace el1_abort(struct pt_regs *regs, unsigned long esr) +/* + * This is intended to match the logic in irqentry_enter(), handling the kernel + * mode transitions only. + */ +static void noinstr enter_from_kernel_mode(struct pt_regs *regs) +{ + regs->exit_rcu = false; + + if (!IS_ENABLED(CONFIG_TINY_RCU) && is_idle_task(current)) { + lockdep_hardirqs_off(CALLER_ADDR0); + rcu_irq_enter(); + trace_hardirqs_off_finish(); + + regs->exit_rcu = true; + return; + } + + lockdep_hardirqs_off(CALLER_ADDR0); + rcu_irq_enter_check_tick(); + trace_hardirqs_off_finish(); +} + +/* + * This is intended to match the logic in irqentry_exit(), handling the kernel + * mode transitions only, and with preemption handled elsewhere. + */ +static void noinstr exit_to_kernel_mode(struct pt_regs *regs) +{ + lockdep_assert_irqs_disabled(); + + if (interrupts_enabled(regs)) { + if (regs->exit_rcu) { + trace_hardirqs_on_prepare(); + lockdep_hardirqs_on_prepare(CALLER_ADDR0); + rcu_irq_exit(); + lockdep_hardirqs_on(CALLER_ADDR0); + return; + } + + trace_hardirqs_on(); + } else { + if (regs->exit_rcu) + rcu_irq_exit(); + } +} + +void noinstr arm64_enter_nmi(struct pt_regs *regs) +{ + regs->lockdep_hardirqs = lockdep_hardirqs_enabled(); + + __nmi_enter(); + lockdep_hardirqs_off(CALLER_ADDR0); + lockdep_hardirq_enter(); + rcu_nmi_enter(); + + trace_hardirqs_off_finish(); + ftrace_nmi_enter(); +} + +void noinstr arm64_exit_nmi(struct pt_regs *regs) +{ + bool restore = regs->lockdep_hardirqs; + + ftrace_nmi_exit(); + if (restore) { + trace_hardirqs_on_prepare(); + lockdep_hardirqs_on_prepare(CALLER_ADDR0); + } + + rcu_nmi_exit(); + lockdep_hardirq_exit(); + if (restore) + lockdep_hardirqs_on(CALLER_ADDR0); + __nmi_exit(); +} + +asmlinkage void noinstr enter_el1_irq_or_nmi(struct pt_regs *regs) +{ + if (IS_ENABLED(CONFIG_ARM64_PSEUDO_NMI) && !interrupts_enabled(regs)) + arm64_enter_nmi(regs); + else + enter_from_kernel_mode(regs); +} + +asmlinkage void noinstr exit_el1_irq_or_nmi(struct pt_regs *regs) +{ + if (IS_ENABLED(CONFIG_ARM64_PSEUDO_NMI) && !interrupts_enabled(regs)) + arm64_exit_nmi(regs); + else + exit_to_kernel_mode(regs); +} + +static void noinstr el1_abort(struct pt_regs *regs, unsigned long esr) { unsigned long far = read_sysreg(far_el1); + enter_from_kernel_mode(regs); local_daif_inherit(regs); far = untagged_addr(far); do_mem_abort(far, esr, regs); + local_daif_mask(); + exit_to_kernel_mode(regs); } -NOKPROBE_SYMBOL(el1_abort); -static void notrace el1_pc(struct pt_regs *regs, unsigned long esr) +static void noinstr el1_pc(struct pt_regs *regs, unsigned long esr) { unsigned long far = read_sysreg(far_el1); + enter_from_kernel_mode(regs); local_daif_inherit(regs); do_sp_pc_abort(far, esr, regs); + local_daif_mask(); + exit_to_kernel_mode(regs); } -NOKPROBE_SYMBOL(el1_pc); -static void notrace el1_undef(struct pt_regs *regs) +static void noinstr el1_undef(struct pt_regs *regs) { + enter_from_kernel_mode(regs); local_daif_inherit(regs); do_undefinstr(regs); + local_daif_mask(); + exit_to_kernel_mode(regs); } -NOKPROBE_SYMBOL(el1_undef); -static void notrace el1_inv(struct pt_regs *regs, unsigned long esr) +static void noinstr el1_inv(struct pt_regs *regs, unsigned long esr) { + enter_from_kernel_mode(regs); local_daif_inherit(regs); bad_mode(regs, 0, esr); + local_daif_mask(); + exit_to_kernel_mode(regs); } -NOKPROBE_SYMBOL(el1_inv); -static void notrace el1_dbg(struct pt_regs *regs, unsigned long esr) +static void noinstr arm64_enter_el1_dbg(struct pt_regs *regs) +{ + regs->lockdep_hardirqs = lockdep_hardirqs_enabled(); + + lockdep_hardirqs_off(CALLER_ADDR0); + rcu_nmi_enter(); + + trace_hardirqs_off_finish(); +} + +static void noinstr arm64_exit_el1_dbg(struct pt_regs *regs) +{ + bool restore = regs->lockdep_hardirqs; + + if (restore) { + trace_hardirqs_on_prepare(); + lockdep_hardirqs_on_prepare(CALLER_ADDR0); + } + + rcu_nmi_exit(); + if (restore) + lockdep_hardirqs_on(CALLER_ADDR0); +} + +static void noinstr el1_dbg(struct pt_regs *regs, unsigned long esr) { unsigned long far = read_sysreg(far_el1); @@ -62,18 +186,21 @@ static void notrace el1_dbg(struct pt_regs *regs, unsigned long esr) if (system_uses_irq_prio_masking()) gic_write_pmr(GIC_PRIO_IRQON | GIC_PRIO_PSR_I_SET); + arm64_enter_el1_dbg(regs); do_debug_exception(far, esr, regs); + arm64_exit_el1_dbg(regs); } -NOKPROBE_SYMBOL(el1_dbg); -static void notrace el1_fpac(struct pt_regs *regs, unsigned long esr) +static void noinstr el1_fpac(struct pt_regs *regs, unsigned long esr) { + enter_from_kernel_mode(regs); local_daif_inherit(regs); do_ptrauth_fault(regs, esr); + local_daif_mask(); + exit_to_kernel_mode(regs); } -NOKPROBE_SYMBOL(el1_fpac); -asmlinkage void notrace el1_sync_handler(struct pt_regs *regs) +asmlinkage void noinstr el1_sync_handler(struct pt_regs *regs) { unsigned long esr = read_sysreg(esr_el1); @@ -106,20 +233,34 @@ asmlinkage void notrace el1_sync_handler(struct pt_regs *regs) el1_inv(regs, esr); } } -NOKPROBE_SYMBOL(el1_sync_handler); -static void notrace el0_da(struct pt_regs *regs, unsigned long esr) +asmlinkage void noinstr enter_from_user_mode(void) +{ + lockdep_hardirqs_off(CALLER_ADDR0); + CT_WARN_ON(ct_state() != CONTEXT_USER); + user_exit_irqoff(); + trace_hardirqs_off_finish(); +} + +asmlinkage void noinstr exit_to_user_mode(void) +{ + trace_hardirqs_on_prepare(); + lockdep_hardirqs_on_prepare(CALLER_ADDR0); + user_enter_irqoff(); + lockdep_hardirqs_on(CALLER_ADDR0); +} + +static void noinstr el0_da(struct pt_regs *regs, unsigned long esr) { unsigned long far = read_sysreg(far_el1); - user_exit_irqoff(); + enter_from_user_mode(); local_daif_restore(DAIF_PROCCTX); far = untagged_addr(far); do_mem_abort(far, esr, regs); } -NOKPROBE_SYMBOL(el0_da); -static void notrace el0_ia(struct pt_regs *regs, unsigned long esr) +static void noinstr el0_ia(struct pt_regs *regs, unsigned long esr) { unsigned long far = read_sysreg(far_el1); @@ -131,90 +272,80 @@ static void notrace el0_ia(struct pt_regs *regs, unsigned long esr) if (!is_ttbr0_addr(far)) arm64_apply_bp_hardening(); - user_exit_irqoff(); + enter_from_user_mode(); local_daif_restore(DAIF_PROCCTX); do_mem_abort(far, esr, regs); } -NOKPROBE_SYMBOL(el0_ia); -static void notrace el0_fpsimd_acc(struct pt_regs *regs, unsigned long esr) +static void noinstr el0_fpsimd_acc(struct pt_regs *regs, unsigned long esr) { - user_exit_irqoff(); + enter_from_user_mode(); local_daif_restore(DAIF_PROCCTX); do_fpsimd_acc(esr, regs); } -NOKPROBE_SYMBOL(el0_fpsimd_acc); -static void notrace el0_sve_acc(struct pt_regs *regs, unsigned long esr) +static void noinstr el0_sve_acc(struct pt_regs *regs, unsigned long esr) { - user_exit_irqoff(); + enter_from_user_mode(); local_daif_restore(DAIF_PROCCTX); do_sve_acc(esr, regs); } -NOKPROBE_SYMBOL(el0_sve_acc); -static void notrace el0_fpsimd_exc(struct pt_regs *regs, unsigned long esr) +static void noinstr el0_fpsimd_exc(struct pt_regs *regs, unsigned long esr) { - user_exit_irqoff(); + enter_from_user_mode(); local_daif_restore(DAIF_PROCCTX); do_fpsimd_exc(esr, regs); } -NOKPROBE_SYMBOL(el0_fpsimd_exc); -static void notrace el0_sys(struct pt_regs *regs, unsigned long esr) +static void noinstr el0_sys(struct pt_regs *regs, unsigned long esr) { - user_exit_irqoff(); + enter_from_user_mode(); local_daif_restore(DAIF_PROCCTX); do_sysinstr(esr, regs); } -NOKPROBE_SYMBOL(el0_sys); -static void notrace el0_pc(struct pt_regs *regs, unsigned long esr) +static void noinstr el0_pc(struct pt_regs *regs, unsigned long esr) { unsigned long far = read_sysreg(far_el1); if (!is_ttbr0_addr(instruction_pointer(regs))) arm64_apply_bp_hardening(); - user_exit_irqoff(); + enter_from_user_mode(); local_daif_restore(DAIF_PROCCTX); do_sp_pc_abort(far, esr, regs); } -NOKPROBE_SYMBOL(el0_pc); -static void notrace el0_sp(struct pt_regs *regs, unsigned long esr) +static void noinstr el0_sp(struct pt_regs *regs, unsigned long esr) { - user_exit_irqoff(); + enter_from_user_mode(); local_daif_restore(DAIF_PROCCTX); do_sp_pc_abort(regs->sp, esr, regs); } -NOKPROBE_SYMBOL(el0_sp); -static void notrace el0_undef(struct pt_regs *regs) +static void noinstr el0_undef(struct pt_regs *regs) { - user_exit_irqoff(); + enter_from_user_mode(); local_daif_restore(DAIF_PROCCTX); do_undefinstr(regs); } -NOKPROBE_SYMBOL(el0_undef); -static void notrace el0_bti(struct pt_regs *regs) +static void noinstr el0_bti(struct pt_regs *regs) { - user_exit_irqoff(); + enter_from_user_mode(); local_daif_restore(DAIF_PROCCTX); do_bti(regs); } -NOKPROBE_SYMBOL(el0_bti); -static void notrace el0_inv(struct pt_regs *regs, unsigned long esr) +static void noinstr el0_inv(struct pt_regs *regs, unsigned long esr) { - user_exit_irqoff(); + enter_from_user_mode(); local_daif_restore(DAIF_PROCCTX); bad_el0_sync(regs, 0, esr); } -NOKPROBE_SYMBOL(el0_inv); -static void notrace el0_dbg(struct pt_regs *regs, unsigned long esr) +static void noinstr el0_dbg(struct pt_regs *regs, unsigned long esr) { /* Only watchpoints write FAR_EL1, otherwise its UNKNOWN */ unsigned long far = read_sysreg(far_el1); @@ -222,30 +353,28 @@ static void notrace el0_dbg(struct pt_regs *regs, unsigned long esr) if (system_uses_irq_prio_masking()) gic_write_pmr(GIC_PRIO_IRQON | GIC_PRIO_PSR_I_SET); - user_exit_irqoff(); + enter_from_user_mode(); do_debug_exception(far, esr, regs); local_daif_restore(DAIF_PROCCTX_NOIRQ); } -NOKPROBE_SYMBOL(el0_dbg); -static void notrace el0_svc(struct pt_regs *regs) +static void noinstr el0_svc(struct pt_regs *regs) { if (system_uses_irq_prio_masking()) gic_write_pmr(GIC_PRIO_IRQON | GIC_PRIO_PSR_I_SET); + enter_from_user_mode(); do_el0_svc(regs); } -NOKPROBE_SYMBOL(el0_svc); -static void notrace el0_fpac(struct pt_regs *regs, unsigned long esr) +static void noinstr el0_fpac(struct pt_regs *regs, unsigned long esr) { - user_exit_irqoff(); + enter_from_user_mode(); local_daif_restore(DAIF_PROCCTX); do_ptrauth_fault(regs, esr); } -NOKPROBE_SYMBOL(el0_fpac); -asmlinkage void notrace el0_sync_handler(struct pt_regs *regs) +asmlinkage void noinstr el0_sync_handler(struct pt_regs *regs) { unsigned long esr = read_sysreg(esr_el1); @@ -297,27 +426,25 @@ asmlinkage void notrace el0_sync_handler(struct pt_regs *regs) el0_inv(regs, esr); } } -NOKPROBE_SYMBOL(el0_sync_handler); #ifdef CONFIG_COMPAT -static void notrace el0_cp15(struct pt_regs *regs, unsigned long esr) +static void noinstr el0_cp15(struct pt_regs *regs, unsigned long esr) { - user_exit_irqoff(); + enter_from_user_mode(); local_daif_restore(DAIF_PROCCTX); do_cp15instr(esr, regs); } -NOKPROBE_SYMBOL(el0_cp15); -static void notrace el0_svc_compat(struct pt_regs *regs) +static void noinstr el0_svc_compat(struct pt_regs *regs) { if (system_uses_irq_prio_masking()) gic_write_pmr(GIC_PRIO_IRQON | GIC_PRIO_PSR_I_SET); + enter_from_user_mode(); do_el0_svc_compat(regs); } -NOKPROBE_SYMBOL(el0_svc_compat); -asmlinkage void notrace el0_sync_compat_handler(struct pt_regs *regs) +asmlinkage void noinstr el0_sync_compat_handler(struct pt_regs *regs) { unsigned long esr = read_sysreg(esr_el1); @@ -360,5 +487,4 @@ asmlinkage void notrace el0_sync_compat_handler(struct pt_regs *regs) el0_inv(regs, esr); } } -NOKPROBE_SYMBOL(el0_sync_compat_handler); #endif /* CONFIG_COMPAT */ diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index f30007dff35f..d72c818b019c 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -30,18 +30,18 @@ #include <asm/unistd.h> /* - * Context tracking subsystem. Used to instrument transitions - * between user and kernel mode. + * Context tracking and irqflag tracing need to instrument transitions between + * user and kernel mode. */ - .macro ct_user_exit_irqoff -#ifdef CONFIG_CONTEXT_TRACKING + .macro user_exit_irqoff +#if defined(CONFIG_CONTEXT_TRACKING) || defined(CONFIG_TRACE_IRQFLAGS) bl enter_from_user_mode #endif .endm - .macro ct_user_enter -#ifdef CONFIG_CONTEXT_TRACKING - bl context_tracking_user_enter + .macro user_enter_irqoff +#if defined(CONFIG_CONTEXT_TRACKING) || defined(CONFIG_TRACE_IRQFLAGS) + bl exit_to_user_mode #endif .endm @@ -298,9 +298,6 @@ alternative_if ARM64_HAS_IRQ_PRIO_MASKING alternative_else_nop_endif ldp x21, x22, [sp, #S_PC] // load ELR, SPSR - .if \el == 0 - ct_user_enter - .endif #ifdef CONFIG_ARM64_SW_TTBR0_PAN alternative_if_not ARM64_HAS_PAN @@ -365,6 +362,9 @@ alternative_insn eret, nop, ARM64_UNMAP_KERNEL_AT_EL0 br x30 #endif .else + /* Ensure any device/NC reads complete */ + alternative_insn nop, "dmb sy", ARM64_WORKAROUND_1508412 + eret .endif sb @@ -634,16 +634,8 @@ SYM_CODE_START_LOCAL_NOALIGN(el1_irq) gic_prio_irq_setup pmr=x20, tmp=x1 enable_da_f -#ifdef CONFIG_ARM64_PSEUDO_NMI - test_irqs_unmasked res=x0, pmr=x20 - cbz x0, 1f - bl asm_nmi_enter -1: -#endif - -#ifdef CONFIG_TRACE_IRQFLAGS - bl trace_hardirqs_off -#endif + mov x0, sp + bl enter_el1_irq_or_nmi irq_handler @@ -662,26 +654,8 @@ alternative_else_nop_endif 1: #endif -#ifdef CONFIG_ARM64_PSEUDO_NMI - /* - * When using IRQ priority masking, we can get spurious interrupts while - * PMR is set to GIC_PRIO_IRQOFF. An NMI might also have occurred in a - * section with interrupts disabled. Skip tracing in those cases. - */ - test_irqs_unmasked res=x0, pmr=x20 - cbz x0, 1f - bl asm_nmi_exit -1: -#endif - -#ifdef CONFIG_TRACE_IRQFLAGS -#ifdef CONFIG_ARM64_PSEUDO_NMI - test_irqs_unmasked res=x0, pmr=x20 - cbnz x0, 1f -#endif - bl trace_hardirqs_on -1: -#endif + mov x0, sp + bl exit_el1_irq_or_nmi kernel_exit 1 SYM_CODE_END(el1_irq) @@ -723,21 +697,14 @@ SYM_CODE_START_LOCAL_NOALIGN(el0_irq) kernel_entry 0 el0_irq_naked: gic_prio_irq_setup pmr=x20, tmp=x0 - ct_user_exit_irqoff + user_exit_irqoff enable_da_f -#ifdef CONFIG_TRACE_IRQFLAGS - bl trace_hardirqs_off -#endif - tbz x22, #55, 1f bl do_el0_irq_bp_hardening 1: irq_handler -#ifdef CONFIG_TRACE_IRQFLAGS - bl trace_hardirqs_on -#endif b ret_to_user SYM_CODE_END(el0_irq) @@ -756,7 +723,7 @@ SYM_CODE_START_LOCAL(el0_error) el0_error_naked: mrs x25, esr_el1 gic_prio_kentry_setup tmp=x2 - ct_user_exit_irqoff + user_exit_irqoff enable_dbg mov x0, sp mov x1, x25 @@ -771,13 +738,17 @@ SYM_CODE_END(el0_error) SYM_CODE_START_LOCAL(ret_to_user) disable_daif gic_prio_kentry_setup tmp=x3 - ldr x1, [tsk, #TSK_TI_FLAGS] - and x2, x1, #_TIF_WORK_MASK +#ifdef CONFIG_TRACE_IRQFLAGS + bl trace_hardirqs_off +#endif + ldr x19, [tsk, #TSK_TI_FLAGS] + and x2, x19, #_TIF_WORK_MASK cbnz x2, work_pending finish_ret_to_user: + user_enter_irqoff /* Ignore asynchronous tag check faults in the uaccess routines */ clear_mte_async_tcf - enable_step_tsk x1, x2 + enable_step_tsk x19, x2 #ifdef CONFIG_GCC_PLUGIN_STACKLEAK bl stackleak_erase #endif @@ -788,11 +759,9 @@ finish_ret_to_user: */ work_pending: mov x0, sp // 'regs' + mov x1, x19 bl do_notify_resume -#ifdef CONFIG_TRACE_IRQFLAGS - bl trace_hardirqs_on // enabled while in userspace -#endif - ldr x1, [tsk, #TSK_TI_FLAGS] // re-check for single-step + ldr x19, [tsk, #TSK_TI_FLAGS] // re-check for single-step b finish_ret_to_user SYM_CODE_END(ret_to_user) diff --git a/arch/arm64/kernel/image-vars.h b/arch/arm64/kernel/image-vars.h index 61684a500914..c615b285ff5b 100644 --- a/arch/arm64/kernel/image-vars.h +++ b/arch/arm64/kernel/image-vars.h @@ -87,7 +87,6 @@ KVM_NVHE_ALIAS(__icache_flags); /* Kernel symbols needed for cpus_have_final/const_caps checks. */ KVM_NVHE_ALIAS(arm64_const_caps_ready); KVM_NVHE_ALIAS(cpu_hwcap_keys); -KVM_NVHE_ALIAS(cpu_hwcaps); /* Static keys which are set if a vGIC trap should be handled in hyp. */ KVM_NVHE_ALIAS(vgic_v2_cpuif_trap); diff --git a/arch/arm64/kernel/irq.c b/arch/arm64/kernel/irq.c index 9cf2fb87584a..60456a62da11 100644 --- a/arch/arm64/kernel/irq.c +++ b/arch/arm64/kernel/irq.c @@ -67,18 +67,3 @@ void __init init_IRQ(void) local_daif_restore(DAIF_PROCCTX_NOIRQ); } } - -/* - * Stubs to make nmi_enter/exit() code callable from ASM - */ -asmlinkage void notrace asm_nmi_enter(void) -{ - nmi_enter(); -} -NOKPROBE_SYMBOL(asm_nmi_enter); - -asmlinkage void notrace asm_nmi_exit(void) -{ - nmi_exit(); -} -NOKPROBE_SYMBOL(asm_nmi_exit); diff --git a/arch/arm64/kernel/kexec_image.c b/arch/arm64/kernel/kexec_image.c index af9987c154ca..9ec34690e255 100644 --- a/arch/arm64/kernel/kexec_image.c +++ b/arch/arm64/kernel/kexec_image.c @@ -43,7 +43,7 @@ static void *image_load(struct kimage *image, u64 flags, value; bool be_image, be_kernel; struct kexec_buf kbuf; - unsigned long text_offset; + unsigned long text_offset, kernel_segment_number; struct kexec_segment *kernel_segment; int ret; @@ -88,11 +88,37 @@ static void *image_load(struct kimage *image, /* Adjust kernel segment with TEXT_OFFSET */ kbuf.memsz += text_offset; - ret = kexec_add_buffer(&kbuf); - if (ret) + kernel_segment_number = image->nr_segments; + + /* + * The location of the kernel segment may make it impossible to satisfy + * the other segment requirements, so we try repeatedly to find a + * location that will work. + */ + while ((ret = kexec_add_buffer(&kbuf)) == 0) { + /* Try to load additional data */ + kernel_segment = &image->segment[kernel_segment_number]; + ret = load_other_segments(image, kernel_segment->mem, + kernel_segment->memsz, initrd, + initrd_len, cmdline); + if (!ret) + break; + + /* + * We couldn't find space for the other segments; erase the + * kernel segment and try the next available hole. + */ + image->nr_segments -= 1; + kbuf.buf_min = kernel_segment->mem + kernel_segment->memsz; + kbuf.mem = KEXEC_BUF_MEM_UNKNOWN; + } + + if (ret) { + pr_err("Could not find any suitable kernel location!"); return ERR_PTR(ret); + } - kernel_segment = &image->segment[image->nr_segments - 1]; + kernel_segment = &image->segment[kernel_segment_number]; kernel_segment->mem += text_offset; kernel_segment->memsz -= text_offset; image->start = kernel_segment->mem; @@ -101,12 +127,7 @@ static void *image_load(struct kimage *image, kernel_segment->mem, kbuf.bufsz, kernel_segment->memsz); - /* Load additional data */ - ret = load_other_segments(image, - kernel_segment->mem, kernel_segment->memsz, - initrd, initrd_len, cmdline); - - return ERR_PTR(ret); + return NULL; } #ifdef CONFIG_KEXEC_IMAGE_VERIFY_SIG diff --git a/arch/arm64/kernel/machine_kexec_file.c b/arch/arm64/kernel/machine_kexec_file.c index 5b0e67b93cdc..03210f644790 100644 --- a/arch/arm64/kernel/machine_kexec_file.c +++ b/arch/arm64/kernel/machine_kexec_file.c @@ -240,6 +240,11 @@ static int prepare_elf_headers(void **addr, unsigned long *sz) return ret; } +/* + * Tries to add the initrd and DTB to the image. If it is not possible to find + * valid locations, this function will undo changes to the image and return non + * zero. + */ int load_other_segments(struct kimage *image, unsigned long kernel_load_addr, unsigned long kernel_size, @@ -248,7 +253,8 @@ int load_other_segments(struct kimage *image, { struct kexec_buf kbuf; void *headers, *dtb = NULL; - unsigned long headers_sz, initrd_load_addr = 0, dtb_len; + unsigned long headers_sz, initrd_load_addr = 0, dtb_len, + orig_segments = image->nr_segments; int ret = 0; kbuf.image = image; @@ -334,6 +340,7 @@ int load_other_segments(struct kimage *image, return 0; out_err: + image->nr_segments = orig_segments; vfree(dtb); return ret; } diff --git a/arch/arm64/kernel/perf_regs.c b/arch/arm64/kernel/perf_regs.c index 94e8718e7229..f6f58e6265df 100644 --- a/arch/arm64/kernel/perf_regs.c +++ b/arch/arm64/kernel/perf_regs.c @@ -73,8 +73,7 @@ u64 perf_reg_abi(struct task_struct *task) } void perf_get_regs_user(struct perf_regs *regs_user, - struct pt_regs *regs, - struct pt_regs *regs_user_copy) + struct pt_regs *regs) { regs_user->regs = task_pt_regs(current); regs_user->abi = perf_reg_abi(current); diff --git a/arch/arm64/kernel/probes/kprobes.c b/arch/arm64/kernel/probes/kprobes.c index deba738142ed..f11a1a1f7026 100644 --- a/arch/arm64/kernel/probes/kprobes.c +++ b/arch/arm64/kernel/probes/kprobes.c @@ -36,25 +36,16 @@ DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk); static void __kprobes post_kprobe_handler(struct kprobe_ctlblk *, struct pt_regs *); -static int __kprobes patch_text(kprobe_opcode_t *addr, u32 opcode) -{ - void *addrs[1]; - u32 insns[1]; - - addrs[0] = addr; - insns[0] = opcode; - - return aarch64_insn_patch_text(addrs, insns, 1); -} - static void __kprobes arch_prepare_ss_slot(struct kprobe *p) { + kprobe_opcode_t *addr = p->ainsn.api.insn; + void *addrs[] = {addr, addr + 1}; + u32 insns[] = {p->opcode, BRK64_OPCODE_KPROBES_SS}; + /* prepare insn slot */ - patch_text(p->ainsn.api.insn, p->opcode); + aarch64_insn_patch_text(addrs, insns, 2); - flush_icache_range((uintptr_t) (p->ainsn.api.insn), - (uintptr_t) (p->ainsn.api.insn) + - MAX_INSN_SIZE * sizeof(kprobe_opcode_t)); + flush_icache_range((uintptr_t)addr, (uintptr_t)(addr + MAX_INSN_SIZE)); /* * Needs restoring of return address after stepping xol. @@ -128,13 +119,18 @@ void *alloc_insn_page(void) /* arm kprobe: install breakpoint in text */ void __kprobes arch_arm_kprobe(struct kprobe *p) { - patch_text(p->addr, BRK64_OPCODE_KPROBES); + void *addr = p->addr; + u32 insn = BRK64_OPCODE_KPROBES; + + aarch64_insn_patch_text(&addr, &insn, 1); } /* disarm kprobe: remove breakpoint from text */ void __kprobes arch_disarm_kprobe(struct kprobe *p) { - patch_text(p->addr, p->opcode); + void *addr = p->addr; + + aarch64_insn_patch_text(&addr, &p->opcode, 1); } void __kprobes arch_remove_kprobe(struct kprobe *p) @@ -163,20 +159,15 @@ static void __kprobes set_current_kprobe(struct kprobe *p) } /* - * Interrupts need to be disabled before single-step mode is set, and not - * reenabled until after single-step mode ends. - * Without disabling interrupt on local CPU, there is a chance of - * interrupt occurrence in the period of exception return and start of - * out-of-line single-step, that result in wrongly single stepping - * into the interrupt handler. + * Mask all of DAIF while executing the instruction out-of-line, to keep things + * simple and avoid nesting exceptions. Interrupts do have to be disabled since + * the kprobe state is per-CPU and doesn't get migrated. */ static void __kprobes kprobes_save_local_irqflag(struct kprobe_ctlblk *kcb, struct pt_regs *regs) { kcb->saved_irqflag = regs->pstate & DAIF_MASK; - regs->pstate |= PSR_I_BIT; - /* Unmask PSTATE.D for enabling software step exceptions. */ - regs->pstate &= ~PSR_D_BIT; + regs->pstate |= DAIF_MASK; } static void __kprobes kprobes_restore_local_irqflag(struct kprobe_ctlblk *kcb, @@ -219,10 +210,7 @@ static void __kprobes setup_singlestep(struct kprobe *p, slot = (unsigned long)p->ainsn.api.insn; set_ss_context(kcb, slot); /* mark pending ss */ - - /* IRQs and single stepping do not mix well. */ kprobes_save_local_irqflag(kcb, regs); - kernel_enable_single_step(regs); instruction_pointer_set(regs, slot); } else { /* insn simulation */ @@ -273,12 +261,8 @@ post_kprobe_handler(struct kprobe_ctlblk *kcb, struct pt_regs *regs) } /* call post handler */ kcb->kprobe_status = KPROBE_HIT_SSDONE; - if (cur->post_handler) { - /* post_handler can hit breakpoint and single step - * again, so we enable D-flag for recursive exception. - */ + if (cur->post_handler) cur->post_handler(cur, regs, 0); - } reset_current_kprobe(); } @@ -302,8 +286,6 @@ int __kprobes kprobe_fault_handler(struct pt_regs *regs, unsigned int fsr) if (!instruction_pointer(regs)) BUG(); - kernel_disable_single_step(); - if (kcb->kprobe_status == KPROBE_REENTER) restore_previous_kprobe(kcb); else @@ -365,10 +347,6 @@ static void __kprobes kprobe_handler(struct pt_regs *regs) * pre-handler and it returned non-zero, it will * modify the execution path and no need to single * stepping. Let's just reset current kprobe and exit. - * - * pre_handler can hit a breakpoint and can step thru - * before return, keep PSTATE D-flag enabled until - * pre_handler return back. */ if (!p->pre_handler || !p->pre_handler(p, regs)) { setup_singlestep(p, regs, kcb, 0); @@ -399,7 +377,7 @@ kprobe_ss_hit(struct kprobe_ctlblk *kcb, unsigned long addr) } static int __kprobes -kprobe_single_step_handler(struct pt_regs *regs, unsigned int esr) +kprobe_breakpoint_ss_handler(struct pt_regs *regs, unsigned int esr) { struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); int retval; @@ -409,16 +387,15 @@ kprobe_single_step_handler(struct pt_regs *regs, unsigned int esr) if (retval == DBG_HOOK_HANDLED) { kprobes_restore_local_irqflag(kcb, regs); - kernel_disable_single_step(); - post_kprobe_handler(kcb, regs); } return retval; } -static struct step_hook kprobes_step_hook = { - .fn = kprobe_single_step_handler, +static struct break_hook kprobes_break_ss_hook = { + .imm = KPROBES_BRK_SS_IMM, + .fn = kprobe_breakpoint_ss_handler, }; static int __kprobes @@ -486,7 +463,7 @@ int __kprobes arch_trampoline_kprobe(struct kprobe *p) int __init arch_init_kprobes(void) { register_kernel_break_hook(&kprobes_break_hook); - register_kernel_step_hook(&kprobes_step_hook); + register_kernel_break_hook(&kprobes_break_ss_hook); return 0; } diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c index 4784011cecac..ed919f633ed8 100644 --- a/arch/arm64/kernel/process.c +++ b/arch/arm64/kernel/process.c @@ -72,13 +72,13 @@ EXPORT_SYMBOL_GPL(pm_power_off); void (*arm_pm_restart)(enum reboot_mode reboot_mode, const char *cmd); -static void __cpu_do_idle(void) +static void noinstr __cpu_do_idle(void) { dsb(sy); wfi(); } -static void __cpu_do_idle_irqprio(void) +static void noinstr __cpu_do_idle_irqprio(void) { unsigned long pmr; unsigned long daif_bits; @@ -108,7 +108,7 @@ static void __cpu_do_idle_irqprio(void) * ensure that interrupts are not masked at the PMR (because the core will * not wake up if we block the wake up signal in the interrupt controller). */ -void cpu_do_idle(void) +void noinstr cpu_do_idle(void) { if (system_uses_irq_prio_masking()) __cpu_do_idle_irqprio(); @@ -119,14 +119,14 @@ void cpu_do_idle(void) /* * This is our default idle handler. */ -void arch_cpu_idle(void) +void noinstr arch_cpu_idle(void) { /* * This should do all the clock switching and wait for interrupt * tricks */ cpu_do_idle(); - local_irq_enable(); + raw_local_irq_enable(); } #ifdef CONFIG_HOTPLUG_CPU @@ -522,14 +522,13 @@ static void erratum_1418040_thread_switch(struct task_struct *prev, bool prev32, next32; u64 val; - if (!(IS_ENABLED(CONFIG_ARM64_ERRATUM_1418040) && - cpus_have_const_cap(ARM64_WORKAROUND_1418040))) + if (!IS_ENABLED(CONFIG_ARM64_ERRATUM_1418040)) return; prev32 = is_compat_thread(task_thread_info(prev)); next32 = is_compat_thread(task_thread_info(next)); - if (prev32 == next32) + if (prev32 == next32 || !this_cpu_has_cap(ARM64_WORKAROUND_1418040)) return; val = read_sysreg(cntkctl_el1); diff --git a/arch/arm64/kernel/proton-pack.c b/arch/arm64/kernel/proton-pack.c index 25f3c80b5ffe..f6e4e3737405 100644 --- a/arch/arm64/kernel/proton-pack.c +++ b/arch/arm64/kernel/proton-pack.c @@ -118,6 +118,7 @@ static enum mitigation_state spectre_v2_get_cpu_hw_mitigation_state(void) MIDR_ALL_VERSIONS(MIDR_CORTEX_A55), MIDR_ALL_VERSIONS(MIDR_BRAHMA_B53), MIDR_ALL_VERSIONS(MIDR_HISI_TSV110), + MIDR_ALL_VERSIONS(MIDR_QCOM_KRYO_2XX_SILVER), MIDR_ALL_VERSIONS(MIDR_QCOM_KRYO_3XX_SILVER), MIDR_ALL_VERSIONS(MIDR_QCOM_KRYO_4XX_SILVER), { /* sentinel */ } @@ -135,8 +136,6 @@ static enum mitigation_state spectre_v2_get_cpu_hw_mitigation_state(void) return SPECTRE_VULNERABLE; } -#define SMCCC_ARCH_WORKAROUND_RET_UNAFFECTED (1) - static enum mitigation_state spectre_v2_get_cpu_fw_mitigation_state(void) { int ret; diff --git a/arch/arm64/kernel/psci.c b/arch/arm64/kernel/psci.c index 43ae4e0c968f..62d2bda7adb8 100644 --- a/arch/arm64/kernel/psci.c +++ b/arch/arm64/kernel/psci.c @@ -66,7 +66,6 @@ static int cpu_psci_cpu_disable(unsigned int cpu) static void cpu_psci_cpu_die(unsigned int cpu) { - int ret; /* * There are no known implementations of PSCI actually using the * power state field, pass a sensible default for now. @@ -74,9 +73,7 @@ static void cpu_psci_cpu_die(unsigned int cpu) u32 state = PSCI_POWER_STATE_TYPE_POWER_DOWN << PSCI_0_2_POWER_STATE_TYPE_SHIFT; - ret = psci_ops.cpu_off(state); - - pr_crit("unable to power off CPU%u (%d)\n", cpu, ret); + psci_ops.cpu_off(state); } static int cpu_psci_cpu_kill(unsigned int cpu) diff --git a/arch/arm64/kernel/sdei.c b/arch/arm64/kernel/sdei.c index 7689f2031c0c..793c46d6a447 100644 --- a/arch/arm64/kernel/sdei.c +++ b/arch/arm64/kernel/sdei.c @@ -10,6 +10,7 @@ #include <linux/uaccess.h> #include <asm/alternative.h> +#include <asm/exception.h> #include <asm/kprobes.h> #include <asm/mmu.h> #include <asm/ptrace.h> @@ -223,16 +224,16 @@ static __kprobes unsigned long _sdei_handler(struct pt_regs *regs, } -asmlinkage __kprobes notrace unsigned long +asmlinkage noinstr unsigned long __sdei_handler(struct pt_regs *regs, struct sdei_registered_event *arg) { unsigned long ret; - nmi_enter(); + arm64_enter_nmi(regs); ret = _sdei_handler(regs, arg); - nmi_exit(); + arm64_exit_nmi(regs); return ret; } diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index 82e75fc2c903..18e9727d3f64 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -222,6 +222,7 @@ asmlinkage notrace void secondary_start_kernel(void) if (system_uses_irq_prio_masking()) init_gic_priority_masking(); + rcu_cpu_starting(cpu); preempt_disable(); trace_hardirqs_off(); @@ -412,6 +413,7 @@ void cpu_die_early(void) /* Mark this CPU absent */ set_cpu_present(cpu, 0); + rcu_report_dead(cpu); if (IS_ENABLED(CONFIG_HOTPLUG_CPU)) { update_cpu_boot_status(CPU_KILL_ME); diff --git a/arch/arm64/kernel/syscall.c b/arch/arm64/kernel/syscall.c index e4c0dadf0d92..f8f758e4a306 100644 --- a/arch/arm64/kernel/syscall.c +++ b/arch/arm64/kernel/syscall.c @@ -121,7 +121,6 @@ static void el0_svc_common(struct pt_regs *regs, int scno, int sc_nr, cortex_a76_erratum_1463225_svc_handler(); local_daif_restore(DAIF_PROCCTX); - user_exit(); if (system_supports_mte() && (flags & _TIF_MTE_ASYNC_FAULT)) { /* diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index 8af4e0e85736..2059d8f43f55 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -34,6 +34,7 @@ #include <asm/daifflags.h> #include <asm/debug-monitors.h> #include <asm/esr.h> +#include <asm/exception.h> #include <asm/extable.h> #include <asm/insn.h> #include <asm/kprobes.h> @@ -753,8 +754,10 @@ const char *esr_get_class_string(u32 esr) * bad_mode handles the impossible case in the exception vector. This is always * fatal. */ -asmlinkage void bad_mode(struct pt_regs *regs, int reason, unsigned int esr) +asmlinkage void notrace bad_mode(struct pt_regs *regs, int reason, unsigned int esr) { + arm64_enter_nmi(regs); + console_verbose(); pr_crit("Bad mode in %s handler detected on CPU%d, code 0x%08x -- %s\n", @@ -786,7 +789,7 @@ void bad_el0_sync(struct pt_regs *regs, int reason, unsigned int esr) DEFINE_PER_CPU(unsigned long [OVERFLOW_STACK_SIZE/sizeof(long)], overflow_stack) __aligned(16); -asmlinkage void handle_bad_stack(struct pt_regs *regs) +asmlinkage void noinstr handle_bad_stack(struct pt_regs *regs) { unsigned long tsk_stk = (unsigned long)current->stack; unsigned long irq_stk = (unsigned long)this_cpu_read(irq_stack_ptr); @@ -794,6 +797,8 @@ asmlinkage void handle_bad_stack(struct pt_regs *regs) unsigned int esr = read_sysreg(esr_el1); unsigned long far = read_sysreg(far_el1); + arm64_enter_nmi(regs); + console_verbose(); pr_emerg("Insufficient stack space to handle exception!"); @@ -865,23 +870,16 @@ bool arm64_is_fatal_ras_serror(struct pt_regs *regs, unsigned int esr) } } -asmlinkage void do_serror(struct pt_regs *regs, unsigned int esr) +asmlinkage void noinstr do_serror(struct pt_regs *regs, unsigned int esr) { - nmi_enter(); + arm64_enter_nmi(regs); /* non-RAS errors are not containable */ if (!arm64_is_ras_serror(esr) || arm64_is_fatal_ras_serror(regs, esr)) arm64_serror_panic(regs, esr); - nmi_exit(); -} - -asmlinkage void enter_from_user_mode(void) -{ - CT_WARN_ON(ct_state() != CONTEXT_USER); - user_exit_irqoff(); + arm64_exit_nmi(regs); } -NOKPROBE_SYMBOL(enter_from_user_mode); /* GENERIC_BUG traps */ diff --git a/arch/arm64/kernel/vdso32/Makefile b/arch/arm64/kernel/vdso32/Makefile index 7f96a1a9f68c..79280c53b9a6 100644 --- a/arch/arm64/kernel/vdso32/Makefile +++ b/arch/arm64/kernel/vdso32/Makefile @@ -22,16 +22,21 @@ endif CC_COMPAT ?= $(CC) CC_COMPAT += $(CC_COMPAT_CLANG_FLAGS) + +ifneq ($(LLVM),) +LD_COMPAT ?= $(LD) +else +LD_COMPAT ?= $(CROSS_COMPILE_COMPAT)ld +endif else CC_COMPAT ?= $(CROSS_COMPILE_COMPAT)gcc +LD_COMPAT ?= $(CROSS_COMPILE_COMPAT)ld endif cc32-option = $(call try-run,\ $(CC_COMPAT) $(1) -c -x c /dev/null -o "$$TMP",$(1),$(2)) cc32-disable-warning = $(call try-run,\ $(CC_COMPAT) -W$(strip $(1)) -c -x c /dev/null -o "$$TMP",-Wno-$(strip $(1))) -cc32-ldoption = $(call try-run,\ - $(CC_COMPAT) $(1) -nostdlib -x c /dev/null -o "$$TMP",$(1),$(2)) cc32-as-instr = $(call try-run,\ printf "%b\n" "$(1)" | $(CC_COMPAT) $(VDSO_AFLAGS) -c -x assembler -o "$$TMP" -,$(2),$(3)) @@ -122,14 +127,10 @@ dmbinstr := $(call cc32-as-instr,dmb ishld,-DCONFIG_AS_DMB_ISHLD=1) VDSO_CFLAGS += $(dmbinstr) VDSO_AFLAGS += $(dmbinstr) -VDSO_LDFLAGS := $(VDSO_CPPFLAGS) # From arm vDSO Makefile -VDSO_LDFLAGS += -Wl,-Bsymbolic -Wl,--no-undefined -Wl,-soname=linux-vdso.so.1 -VDSO_LDFLAGS += -Wl,-z,max-page-size=4096 -Wl,-z,common-page-size=4096 -VDSO_LDFLAGS += -nostdlib -shared -mfloat-abi=soft -VDSO_LDFLAGS += -Wl,--hash-style=sysv -VDSO_LDFLAGS += -Wl,--build-id=sha1 -VDSO_LDFLAGS += $(call cc32-ldoption,-fuse-ld=bfd) +VDSO_LDFLAGS += -Bsymbolic --no-undefined -soname=linux-vdso.so.1 +VDSO_LDFLAGS += -z max-page-size=4096 -z common-page-size=4096 +VDSO_LDFLAGS += -nostdlib -shared --hash-style=sysv --build-id=sha1 # Borrow vdsomunge.c from the arm vDSO @@ -189,8 +190,8 @@ quiet_cmd_vdsold_and_vdso_check = LD32 $@ cmd_vdsold_and_vdso_check = $(cmd_vdsold); $(cmd_vdso_check) quiet_cmd_vdsold = LD32 $@ - cmd_vdsold = $(CC_COMPAT) -Wp,-MD,$(depfile) $(VDSO_LDFLAGS) \ - -Wl,-T $(filter %.lds,$^) $(filter %.o,$^) -o $@ + cmd_vdsold = $(LD_COMPAT) $(VDSO_LDFLAGS) \ + -T $(filter %.lds,$^) $(filter %.o,$^) -o $@ quiet_cmd_vdsocc = CC32 $@ cmd_vdsocc = $(CC_COMPAT) -Wp,-MD,$(depfile) $(VDSO_CFLAGS) -c -o $@ $< quiet_cmd_vdsocc_gettimeofday = CC32 $@ diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S index 6d78c041fdf6..1bda604f4c70 100644 --- a/arch/arm64/kernel/vmlinux.lds.S +++ b/arch/arm64/kernel/vmlinux.lds.S @@ -278,7 +278,7 @@ SECTIONS * explicitly check instead of blindly discarding. */ .plt : { - *(.plt) *(.plt.*) *(.iplt) *(.igot) + *(.plt) *(.plt.*) *(.iplt) *(.igot .igot.plt) } ASSERT(SIZEOF(.plt) == 0, "Unexpected run-time procedure linkages detected!") diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index f56122eedffc..c0ffb019ca8b 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -102,6 +102,20 @@ static int kvm_arm_default_max_vcpus(void) return vgic_present ? kvm_vgic_get_max_vcpus() : KVM_MAX_VCPUS; } +static void set_default_csv2(struct kvm *kvm) +{ + /* + * The default is to expose CSV2 == 1 if the HW isn't affected. + * Although this is a per-CPU feature, we make it global because + * asymmetric systems are just a nuisance. + * + * Userspace can override this as long as it doesn't promise + * the impossible. + */ + if (arm64_get_spectre_v2_state() == SPECTRE_UNAFFECTED) + kvm->arch.pfr0_csv2 = 1; +} + /** * kvm_arch_init_vm - initializes a VM data structure * @kvm: pointer to the KVM struct @@ -127,6 +141,8 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) /* The maximum number of VCPUs is limited by the host's GIC model */ kvm->arch.max_vcpus = kvm_arm_default_max_vcpus(); + set_default_csv2(kvm); + return ret; out_free_stage2_pgd: kvm_free_stage2_pgd(&kvm->arch.mmu); @@ -808,6 +824,25 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) preempt_enable(); + /* + * The ARMv8 architecture doesn't give the hypervisor + * a mechanism to prevent a guest from dropping to AArch32 EL0 + * if implemented by the CPU. If we spot the guest in such + * state and that we decided it wasn't supposed to do so (like + * with the asymmetric AArch32 case), return to userspace with + * a fatal error. + */ + if (!system_supports_32bit_el0() && vcpu_mode_is_32bit(vcpu)) { + /* + * As we have caught the guest red-handed, decide that + * it isn't fit for purpose anymore by making the vcpu + * invalid. The VMM can try and fix it by issuing a + * KVM_ARM_VCPU_INIT if it really wants to. + */ + vcpu->arch.target = -1; + ret = ARM_EXCEPTION_IL; + } + ret = handle_exit(vcpu, ret); } @@ -1719,7 +1754,8 @@ int kvm_arch_init(void *opaque) return -ENODEV; } - if (cpus_have_final_cap(ARM64_WORKAROUND_DEVICE_LOAD_ACQUIRE)) + if (cpus_have_final_cap(ARM64_WORKAROUND_DEVICE_LOAD_ACQUIRE) || + cpus_have_final_cap(ARM64_WORKAROUND_1508412)) kvm_info("Guests without required CPU erratum workarounds can deadlock system!\n" \ "Only trusted guests should be used on this system.\n"); diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h index 313a8fa3c721..1f875a8f20c4 100644 --- a/arch/arm64/kvm/hyp/include/hyp/switch.h +++ b/arch/arm64/kvm/hyp/include/hyp/switch.h @@ -140,9 +140,9 @@ static inline bool __translate_far_to_hpfar(u64 far, u64 *hpfar) * We do need to save/restore PAR_EL1 though, as we haven't * saved the guest context yet, and we may return early... */ - par = read_sysreg(par_el1); + par = read_sysreg_par(); if (!__kvm_at("s1e1r", far)) - tmp = read_sysreg(par_el1); + tmp = read_sysreg_par(); else tmp = SYS_PAR_EL1_F; /* back to the guest */ write_sysreg(par, par_el1); @@ -421,7 +421,7 @@ static inline bool fixup_guest_exit(struct kvm_vcpu *vcpu, u64 *exit_code) if (cpus_have_final_cap(ARM64_WORKAROUND_CAVIUM_TX2_219_TVM) && kvm_vcpu_trap_get_class(vcpu) == ESR_ELx_EC_SYS64 && handle_tx2_tvm(vcpu)) - return true; + goto guest; /* * We trap the first access to the FP/SIMD to save the host context @@ -431,13 +431,13 @@ static inline bool fixup_guest_exit(struct kvm_vcpu *vcpu, u64 *exit_code) * Similarly for trapped SVE accesses. */ if (__hyp_handle_fpsimd(vcpu)) - return true; + goto guest; if (__hyp_handle_ptrauth(vcpu)) - return true; + goto guest; if (!__populate_fault_info(vcpu)) - return true; + goto guest; if (static_branch_unlikely(&vgic_v2_cpuif_trap)) { bool valid; @@ -452,7 +452,7 @@ static inline bool fixup_guest_exit(struct kvm_vcpu *vcpu, u64 *exit_code) int ret = __vgic_v2_perform_cpuif_access(vcpu); if (ret == 1) - return true; + goto guest; /* Promote an illegal access to an SError.*/ if (ret == -1) @@ -468,12 +468,17 @@ static inline bool fixup_guest_exit(struct kvm_vcpu *vcpu, u64 *exit_code) int ret = __vgic_v3_perform_cpuif_access(vcpu); if (ret == 1) - return true; + goto guest; } exit: /* Return to the host kernel and handle the exit */ return false; + +guest: + /* Re-enter the guest */ + asm(ALTERNATIVE("nop", "dmb sy", ARM64_WORKAROUND_1508412)); + return true; } static inline void __kvm_unexpected_el2_exception(void) diff --git a/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h b/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h index 7a986030145f..cce43bfe158f 100644 --- a/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h +++ b/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h @@ -43,7 +43,7 @@ static inline void __sysreg_save_el1_state(struct kvm_cpu_context *ctxt) ctxt_sys_reg(ctxt, CONTEXTIDR_EL1) = read_sysreg_el1(SYS_CONTEXTIDR); ctxt_sys_reg(ctxt, AMAIR_EL1) = read_sysreg_el1(SYS_AMAIR); ctxt_sys_reg(ctxt, CNTKCTL_EL1) = read_sysreg_el1(SYS_CNTKCTL); - ctxt_sys_reg(ctxt, PAR_EL1) = read_sysreg(par_el1); + ctxt_sys_reg(ctxt, PAR_EL1) = read_sysreg_par(); ctxt_sys_reg(ctxt, TPIDR_EL1) = read_sysreg(tpidr_el1); ctxt_sys_reg(ctxt, SP_EL1) = read_sysreg(sp_el1); diff --git a/arch/arm64/kvm/hyp/nvhe/host.S b/arch/arm64/kvm/hyp/nvhe/host.S index ff9a0f547b9f..ed27f06a31ba 100644 --- a/arch/arm64/kvm/hyp/nvhe/host.S +++ b/arch/arm64/kvm/hyp/nvhe/host.S @@ -17,8 +17,6 @@ SYM_FUNC_START(__host_exit) get_host_ctxt x0, x1 - ALTERNATIVE(nop, SET_PSTATE_PAN(1), ARM64_HAS_PAN, CONFIG_ARM64_PAN) - /* Store the host regs x2 and x3 */ stp x2, x3, [x0, #CPU_XREG_OFFSET(2)] diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-init.S b/arch/arm64/kvm/hyp/nvhe/hyp-init.S index 47224dc62c51..b11a9d7db677 100644 --- a/arch/arm64/kvm/hyp/nvhe/hyp-init.S +++ b/arch/arm64/kvm/hyp/nvhe/hyp-init.S @@ -57,16 +57,25 @@ __do_hyp_init: cmp x0, #HVC_STUB_HCALL_NR b.lo __kvm_handle_stub_hvc - /* Set tpidr_el2 for use by HYP to free a register */ - msr tpidr_el2, x2 - - mov x2, #KVM_HOST_SMCCC_FUNC(__kvm_hyp_init) - cmp x0, x2 - b.eq 1f + // We only actively check bits [24:31], and everything + // else has to be zero, which we check at build time. +#if (KVM_HOST_SMCCC_FUNC(__kvm_hyp_init) & 0xFFFFFFFF00FFFFFF) +#error Unexpected __KVM_HOST_SMCCC_FUNC___kvm_hyp_init value +#endif + + ror x0, x0, #24 + eor x0, x0, #((KVM_HOST_SMCCC_FUNC(__kvm_hyp_init) >> 24) & 0xF) + ror x0, x0, #4 + eor x0, x0, #((KVM_HOST_SMCCC_FUNC(__kvm_hyp_init) >> 28) & 0xF) + cbz x0, 1f mov x0, #SMCCC_RET_NOT_SUPPORTED eret -1: phys_to_ttbr x0, x1 +1: + /* Set tpidr_el2 for use by HYP to free a register */ + msr tpidr_el2, x2 + + phys_to_ttbr x0, x1 alternative_if ARM64_HAS_CNP orr x0, x0, #TTBR_CNP_BIT alternative_else_nop_endif diff --git a/arch/arm64/kvm/hyp/nvhe/hyp.lds.S b/arch/arm64/kvm/hyp/nvhe/hyp.lds.S index bb2d986ff696..a797abace13f 100644 --- a/arch/arm64/kvm/hyp/nvhe/hyp.lds.S +++ b/arch/arm64/kvm/hyp/nvhe/hyp.lds.S @@ -13,6 +13,11 @@ SECTIONS { HYP_SECTION(.text) + /* + * .hyp..data..percpu needs to be page aligned to maintain the same + * alignment for when linking into vmlinux. + */ + . = ALIGN(PAGE_SIZE); HYP_SECTION_NAME(.data..percpu) : { PERCPU_INPUT(L1_CACHE_BYTES) } diff --git a/arch/arm64/kvm/hyp/nvhe/switch.c b/arch/arm64/kvm/hyp/nvhe/switch.c index a457a0306e03..8ae8160bc93a 100644 --- a/arch/arm64/kvm/hyp/nvhe/switch.c +++ b/arch/arm64/kvm/hyp/nvhe/switch.c @@ -250,7 +250,7 @@ void __noreturn hyp_panic(void) { u64 spsr = read_sysreg_el2(SYS_SPSR); u64 elr = read_sysreg_el2(SYS_ELR); - u64 par = read_sysreg(par_el1); + u64 par = read_sysreg_par(); bool restore_host = true; struct kvm_cpu_context *host_ctxt; struct kvm_vcpu *vcpu; diff --git a/arch/arm64/kvm/hyp/nvhe/tlb.c b/arch/arm64/kvm/hyp/nvhe/tlb.c index 39ca71ab8866..fbde89a2c6e8 100644 --- a/arch/arm64/kvm/hyp/nvhe/tlb.c +++ b/arch/arm64/kvm/hyp/nvhe/tlb.c @@ -128,7 +128,6 @@ void __kvm_tlb_flush_local_vmid(struct kvm_s2_mmu *mmu) struct tlb_inv_context cxt; /* Switch to requested VMID */ - mmu = kern_hyp_va(mmu); __tlb_switch_to_guest(mmu, &cxt); __tlbi(vmalle1); diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c index 0cdf6e461cbd..bdf8e55ed308 100644 --- a/arch/arm64/kvm/hyp/pgtable.c +++ b/arch/arm64/kvm/hyp/pgtable.c @@ -470,6 +470,15 @@ static bool stage2_map_walker_try_leaf(u64 addr, u64 end, u32 level, if (!kvm_block_mapping_supported(addr, end, phys, level)) return false; + /* + * If the PTE was already valid, drop the refcount on the table + * early, as it will be bumped-up again in stage2_map_walk_leaf(). + * This ensures that the refcount stays constant across a valid to + * valid PTE update. + */ + if (kvm_pte_valid(*ptep)) + put_page(virt_to_page(ptep)); + if (kvm_set_valid_leaf_pte(ptep, phys, data->attr, level)) goto out; @@ -493,7 +502,13 @@ static int stage2_map_walk_table_pre(u64 addr, u64 end, u32 level, return 0; kvm_set_invalid_pte(ptep); - kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, data->mmu, addr, 0); + + /* + * Invalidate the whole stage-2, as we may have numerous leaf + * entries below us which would otherwise need invalidating + * individually. + */ + kvm_call_hyp(__kvm_tlb_flush_vmid, data->mmu); data->anchor = ptep; return 0; } @@ -635,7 +650,7 @@ static void stage2_flush_dcache(void *addr, u64 size) static bool stage2_pte_cacheable(kvm_pte_t pte) { - u64 memattr = FIELD_GET(KVM_PTE_LEAF_ATTR_LO_S2_MEMATTR, pte); + u64 memattr = pte & KVM_PTE_LEAF_ATTR_LO_S2_MEMATTR; return memattr == PAGE_S2_MEMATTR(NORMAL); } @@ -846,7 +861,7 @@ int kvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm *kvm) u32 start_level = VTCR_EL2_TGRAN_SL0_BASE - sl0; pgd_sz = kvm_pgd_pages(ia_bits, start_level) * PAGE_SIZE; - pgt->pgd = alloc_pages_exact(pgd_sz, GFP_KERNEL | __GFP_ZERO); + pgt->pgd = alloc_pages_exact(pgd_sz, GFP_KERNEL_ACCOUNT | __GFP_ZERO); if (!pgt->pgd) return -ENOMEM; diff --git a/arch/arm64/kvm/hyp/vhe/switch.c b/arch/arm64/kvm/hyp/vhe/switch.c index fe69de16dadc..62546e20b251 100644 --- a/arch/arm64/kvm/hyp/vhe/switch.c +++ b/arch/arm64/kvm/hyp/vhe/switch.c @@ -215,7 +215,7 @@ void __noreturn hyp_panic(void) { u64 spsr = read_sysreg_el2(SYS_SPSR); u64 elr = read_sysreg_el2(SYS_ELR); - u64 par = read_sysreg(par_el1); + u64 par = read_sysreg_par(); __hyp_call_panic(spsr, elr, par); unreachable(); diff --git a/arch/arm64/kvm/hypercalls.c b/arch/arm64/kvm/hypercalls.c index 9824025ccc5c..25ea4ecb6449 100644 --- a/arch/arm64/kvm/hypercalls.c +++ b/arch/arm64/kvm/hypercalls.c @@ -31,7 +31,7 @@ int kvm_hvc_call_handler(struct kvm_vcpu *vcpu) val = SMCCC_RET_SUCCESS; break; case SPECTRE_UNAFFECTED: - val = SMCCC_RET_NOT_REQUIRED; + val = SMCCC_ARCH_WORKAROUND_RET_UNAFFECTED; break; } break; diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c index 19aacc7d64de..75814a02d189 100644 --- a/arch/arm64/kvm/mmu.c +++ b/arch/arm64/kvm/mmu.c @@ -754,10 +754,12 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, gfn_t gfn; kvm_pfn_t pfn; bool logging_active = memslot_is_logging(memslot); - unsigned long vma_pagesize; + unsigned long fault_level = kvm_vcpu_trap_get_fault_level(vcpu); + unsigned long vma_pagesize, fault_granule; enum kvm_pgtable_prot prot = KVM_PGTABLE_PROT_R; struct kvm_pgtable *pgt; + fault_granule = 1UL << ARM64_HW_PGTABLE_LEVEL_SHIFT(fault_level); write_fault = kvm_is_write_fault(vcpu); exec_fault = kvm_vcpu_trap_is_exec_fault(vcpu); VM_BUG_ON(write_fault && exec_fault); @@ -787,14 +789,28 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, vma_shift = PAGE_SHIFT; } - if (vma_shift == PUD_SHIFT && - !fault_supports_stage2_huge_mapping(memslot, hva, PUD_SIZE)) - vma_shift = PMD_SHIFT; - - if (vma_shift == PMD_SHIFT && - !fault_supports_stage2_huge_mapping(memslot, hva, PMD_SIZE)) { - force_pte = true; + switch (vma_shift) { +#ifndef __PAGETABLE_PMD_FOLDED + case PUD_SHIFT: + if (fault_supports_stage2_huge_mapping(memslot, hva, PUD_SIZE)) + break; + fallthrough; +#endif + case CONT_PMD_SHIFT: + vma_shift = PMD_SHIFT; + fallthrough; + case PMD_SHIFT: + if (fault_supports_stage2_huge_mapping(memslot, hva, PMD_SIZE)) + break; + fallthrough; + case CONT_PTE_SHIFT: vma_shift = PAGE_SHIFT; + force_pte = true; + fallthrough; + case PAGE_SHIFT: + break; + default: + WARN_ONCE(1, "Unknown vma_shift %d", vma_shift); } vma_pagesize = 1UL << vma_shift; @@ -839,6 +855,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, if (kvm_is_device_pfn(pfn)) { device = true; + force_pte = true; } else if (logging_active && !write_fault) { /* * Only actually map the page as writable if this was a write @@ -881,7 +898,12 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, else if (cpus_have_const_cap(ARM64_HAS_CACHE_DIC)) prot |= KVM_PGTABLE_PROT_X; - if (fault_status == FSC_PERM && !(logging_active && writable)) { + /* + * Under the premise of getting a FSC_PERM fault, we just need to relax + * permissions only if vma_pagesize equals fault_granule. Otherwise, + * kvm_pgtable_stage2_map() should be called to change block size. + */ + if (fault_status == FSC_PERM && vma_pagesize == fault_granule) { ret = kvm_pgtable_stage2_relax_perms(pgt, fault_ipa, prot); } else { ret = kvm_pgtable_stage2_map(pgt, fault_ipa, vma_pagesize, diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index d9117bc56237..c1fac9836af1 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -95,7 +95,7 @@ static bool __vcpu_read_sys_reg_from_cpu(int reg, u64 *val) case AMAIR_EL1: *val = read_sysreg_s(SYS_AMAIR_EL12); break; case CNTKCTL_EL1: *val = read_sysreg_s(SYS_CNTKCTL_EL12); break; case ELR_EL1: *val = read_sysreg_s(SYS_ELR_EL12); break; - case PAR_EL1: *val = read_sysreg_s(SYS_PAR_EL1); break; + case PAR_EL1: *val = read_sysreg_par(); break; case DACR32_EL2: *val = read_sysreg_s(SYS_DACR32_EL2); break; case IFSR32_EL2: *val = read_sysreg_s(SYS_IFSR32_EL2); break; case DBGVCR32_EL2: *val = read_sysreg_s(SYS_DBGVCR32_EL2); break; @@ -1038,8 +1038,8 @@ static bool access_pmuserenr(struct kvm_vcpu *vcpu, struct sys_reg_params *p, { SYS_DESC(SYS_PMEVTYPERn_EL0(n)), \ access_pmu_evtyper, reset_unknown, (PMEVTYPER0_EL0 + n), } -static bool access_amu(struct kvm_vcpu *vcpu, struct sys_reg_params *p, - const struct sys_reg_desc *r) +static bool undef_access(struct kvm_vcpu *vcpu, struct sys_reg_params *p, + const struct sys_reg_desc *r) { kvm_inject_undefined(vcpu); @@ -1047,33 +1047,25 @@ static bool access_amu(struct kvm_vcpu *vcpu, struct sys_reg_params *p, } /* Macro to expand the AMU counter and type registers*/ -#define AMU_AMEVCNTR0_EL0(n) { SYS_DESC(SYS_AMEVCNTR0_EL0(n)), access_amu } -#define AMU_AMEVTYPER0_EL0(n) { SYS_DESC(SYS_AMEVTYPER0_EL0(n)), access_amu } -#define AMU_AMEVCNTR1_EL0(n) { SYS_DESC(SYS_AMEVCNTR1_EL0(n)), access_amu } -#define AMU_AMEVTYPER1_EL0(n) { SYS_DESC(SYS_AMEVTYPER1_EL0(n)), access_amu } - -static bool trap_ptrauth(struct kvm_vcpu *vcpu, - struct sys_reg_params *p, - const struct sys_reg_desc *rd) -{ - /* - * If we land here, that is because we didn't fixup the access on exit - * by allowing the PtrAuth sysregs. The only way this happens is when - * the guest does not have PtrAuth support enabled. - */ - kvm_inject_undefined(vcpu); - - return false; -} +#define AMU_AMEVCNTR0_EL0(n) { SYS_DESC(SYS_AMEVCNTR0_EL0(n)), undef_access } +#define AMU_AMEVTYPER0_EL0(n) { SYS_DESC(SYS_AMEVTYPER0_EL0(n)), undef_access } +#define AMU_AMEVCNTR1_EL0(n) { SYS_DESC(SYS_AMEVCNTR1_EL0(n)), undef_access } +#define AMU_AMEVTYPER1_EL0(n) { SYS_DESC(SYS_AMEVTYPER1_EL0(n)), undef_access } static unsigned int ptrauth_visibility(const struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd) { - return vcpu_has_ptrauth(vcpu) ? 0 : REG_HIDDEN_USER | REG_HIDDEN_GUEST; + return vcpu_has_ptrauth(vcpu) ? 0 : REG_HIDDEN; } +/* + * If we land here on a PtrAuth access, that is because we didn't + * fixup the access on exit by allowing the PtrAuth sysregs. The only + * way this happens is when the guest does not have PtrAuth support + * enabled. + */ #define __PTRAUTH_KEY(k) \ - { SYS_DESC(SYS_## k), trap_ptrauth, reset_unknown, k, \ + { SYS_DESC(SYS_## k), undef_access, reset_unknown, k, \ .visibility = ptrauth_visibility} #define PTRAUTH_KEY(k) \ @@ -1128,9 +1120,8 @@ static u64 read_id_reg(const struct kvm_vcpu *vcpu, if (!vcpu_has_sve(vcpu)) val &= ~(0xfUL << ID_AA64PFR0_SVE_SHIFT); val &= ~(0xfUL << ID_AA64PFR0_AMU_SHIFT); - if (!(val & (0xfUL << ID_AA64PFR0_CSV2_SHIFT)) && - arm64_get_spectre_v2_state() == SPECTRE_UNAFFECTED) - val |= (1UL << ID_AA64PFR0_CSV2_SHIFT); + val &= ~(0xfUL << ID_AA64PFR0_CSV2_SHIFT); + val |= ((u64)vcpu->kvm->arch.pfr0_csv2 << ID_AA64PFR0_CSV2_SHIFT); } else if (id == SYS_ID_AA64PFR1_EL1) { val &= ~(0xfUL << ID_AA64PFR1_MTE_SHIFT); } else if (id == SYS_ID_AA64ISAR1_EL1 && !vcpu_has_ptrauth(vcpu)) { @@ -1153,6 +1144,22 @@ static u64 read_id_reg(const struct kvm_vcpu *vcpu, return val; } +static unsigned int id_visibility(const struct kvm_vcpu *vcpu, + const struct sys_reg_desc *r) +{ + u32 id = sys_reg((u32)r->Op0, (u32)r->Op1, + (u32)r->CRn, (u32)r->CRm, (u32)r->Op2); + + switch (id) { + case SYS_ID_AA64ZFR0_EL1: + if (!vcpu_has_sve(vcpu)) + return REG_RAZ; + break; + } + + return 0; +} + /* cpufeature ID register access trap handlers */ static bool __access_id_reg(struct kvm_vcpu *vcpu, @@ -1171,7 +1178,9 @@ static bool access_id_reg(struct kvm_vcpu *vcpu, struct sys_reg_params *p, const struct sys_reg_desc *r) { - return __access_id_reg(vcpu, p, r, false); + bool raz = sysreg_visible_as_raz(vcpu, r); + + return __access_id_reg(vcpu, p, r, raz); } static bool access_raz_id_reg(struct kvm_vcpu *vcpu, @@ -1192,71 +1201,40 @@ static unsigned int sve_visibility(const struct kvm_vcpu *vcpu, if (vcpu_has_sve(vcpu)) return 0; - return REG_HIDDEN_USER | REG_HIDDEN_GUEST; -} - -/* Visibility overrides for SVE-specific ID registers */ -static unsigned int sve_id_visibility(const struct kvm_vcpu *vcpu, - const struct sys_reg_desc *rd) -{ - if (vcpu_has_sve(vcpu)) - return 0; - - return REG_HIDDEN_USER; + return REG_HIDDEN; } -/* Generate the emulated ID_AA64ZFR0_EL1 value exposed to the guest */ -static u64 guest_id_aa64zfr0_el1(const struct kvm_vcpu *vcpu) -{ - if (!vcpu_has_sve(vcpu)) - return 0; - - return read_sanitised_ftr_reg(SYS_ID_AA64ZFR0_EL1); -} - -static bool access_id_aa64zfr0_el1(struct kvm_vcpu *vcpu, - struct sys_reg_params *p, - const struct sys_reg_desc *rd) -{ - if (p->is_write) - return write_to_read_only(vcpu, p, rd); - - p->regval = guest_id_aa64zfr0_el1(vcpu); - return true; -} - -static int get_id_aa64zfr0_el1(struct kvm_vcpu *vcpu, - const struct sys_reg_desc *rd, - const struct kvm_one_reg *reg, void __user *uaddr) -{ - u64 val; - - if (WARN_ON(!vcpu_has_sve(vcpu))) - return -ENOENT; - - val = guest_id_aa64zfr0_el1(vcpu); - return reg_to_user(uaddr, &val, reg->id); -} - -static int set_id_aa64zfr0_el1(struct kvm_vcpu *vcpu, - const struct sys_reg_desc *rd, - const struct kvm_one_reg *reg, void __user *uaddr) +static int set_id_aa64pfr0_el1(struct kvm_vcpu *vcpu, + const struct sys_reg_desc *rd, + const struct kvm_one_reg *reg, void __user *uaddr) { const u64 id = sys_reg_to_index(rd); int err; u64 val; - - if (WARN_ON(!vcpu_has_sve(vcpu))) - return -ENOENT; + u8 csv2; err = reg_from_user(&val, uaddr, id); if (err) return err; - /* This is what we mean by invariant: you can't change it. */ - if (val != guest_id_aa64zfr0_el1(vcpu)) + /* + * Allow AA64PFR0_EL1.CSV2 to be set from userspace as long as + * it doesn't promise more than what is actually provided (the + * guest could otherwise be covered in ectoplasmic residue). + */ + csv2 = cpuid_feature_extract_unsigned_field(val, ID_AA64PFR0_CSV2_SHIFT); + if (csv2 > 1 || + (csv2 && arm64_get_spectre_v2_state() != SPECTRE_UNAFFECTED)) return -EINVAL; + /* We can only differ with CSV2, and anything else is an error */ + val ^= read_id_reg(vcpu, rd, false); + val &= ~(0xFUL << ID_AA64PFR0_CSV2_SHIFT); + if (val) + return -EINVAL; + + vcpu->kvm->arch.pfr0_csv2 = csv2; + return 0; } @@ -1299,13 +1277,17 @@ static int __set_id_reg(const struct kvm_vcpu *vcpu, static int get_id_reg(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd, const struct kvm_one_reg *reg, void __user *uaddr) { - return __get_id_reg(vcpu, rd, uaddr, false); + bool raz = sysreg_visible_as_raz(vcpu, rd); + + return __get_id_reg(vcpu, rd, uaddr, raz); } static int set_id_reg(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd, const struct kvm_one_reg *reg, void __user *uaddr) { - return __set_id_reg(vcpu, rd, uaddr, false); + bool raz = sysreg_visible_as_raz(vcpu, rd); + + return __set_id_reg(vcpu, rd, uaddr, raz); } static int get_raz_id_reg(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd, @@ -1384,19 +1366,13 @@ static bool access_ccsidr(struct kvm_vcpu *vcpu, struct sys_reg_params *p, return true; } -static bool access_mte_regs(struct kvm_vcpu *vcpu, struct sys_reg_params *p, - const struct sys_reg_desc *r) -{ - kvm_inject_undefined(vcpu); - return false; -} - /* sys_reg_desc initialiser for known cpufeature ID registers */ #define ID_SANITISED(name) { \ SYS_DESC(SYS_##name), \ .access = access_id_reg, \ .get_user = get_id_reg, \ .set_user = set_id_reg, \ + .visibility = id_visibility, \ } /* @@ -1514,11 +1490,12 @@ static const struct sys_reg_desc sys_reg_descs[] = { /* AArch64 ID registers */ /* CRm=4 */ - ID_SANITISED(ID_AA64PFR0_EL1), + { SYS_DESC(SYS_ID_AA64PFR0_EL1), .access = access_id_reg, + .get_user = get_id_reg, .set_user = set_id_aa64pfr0_el1, }, ID_SANITISED(ID_AA64PFR1_EL1), ID_UNALLOCATED(4,2), ID_UNALLOCATED(4,3), - { SYS_DESC(SYS_ID_AA64ZFR0_EL1), access_id_aa64zfr0_el1, .get_user = get_id_aa64zfr0_el1, .set_user = set_id_aa64zfr0_el1, .visibility = sve_id_visibility }, + ID_SANITISED(ID_AA64ZFR0_EL1), ID_UNALLOCATED(4,5), ID_UNALLOCATED(4,6), ID_UNALLOCATED(4,7), @@ -1557,8 +1534,8 @@ static const struct sys_reg_desc sys_reg_descs[] = { { SYS_DESC(SYS_ACTLR_EL1), access_actlr, reset_actlr, ACTLR_EL1 }, { SYS_DESC(SYS_CPACR_EL1), NULL, reset_val, CPACR_EL1, 0 }, - { SYS_DESC(SYS_RGSR_EL1), access_mte_regs }, - { SYS_DESC(SYS_GCR_EL1), access_mte_regs }, + { SYS_DESC(SYS_RGSR_EL1), undef_access }, + { SYS_DESC(SYS_GCR_EL1), undef_access }, { SYS_DESC(SYS_ZCR_EL1), NULL, reset_val, ZCR_EL1, 0, .visibility = sve_visibility }, { SYS_DESC(SYS_TTBR0_EL1), access_vm_reg, reset_unknown, TTBR0_EL1 }, @@ -1584,8 +1561,8 @@ static const struct sys_reg_desc sys_reg_descs[] = { { SYS_DESC(SYS_ERXMISC0_EL1), trap_raz_wi }, { SYS_DESC(SYS_ERXMISC1_EL1), trap_raz_wi }, - { SYS_DESC(SYS_TFSR_EL1), access_mte_regs }, - { SYS_DESC(SYS_TFSRE0_EL1), access_mte_regs }, + { SYS_DESC(SYS_TFSR_EL1), undef_access }, + { SYS_DESC(SYS_TFSRE0_EL1), undef_access }, { SYS_DESC(SYS_FAR_EL1), access_vm_reg, reset_unknown, FAR_EL1 }, { SYS_DESC(SYS_PAR_EL1), NULL, reset_unknown, PAR_EL1 }, @@ -1621,6 +1598,8 @@ static const struct sys_reg_desc sys_reg_descs[] = { { SYS_DESC(SYS_CONTEXTIDR_EL1), access_vm_reg, reset_val, CONTEXTIDR_EL1, 0 }, { SYS_DESC(SYS_TPIDR_EL1), NULL, reset_unknown, TPIDR_EL1 }, + { SYS_DESC(SYS_SCXTNUM_EL1), undef_access }, + { SYS_DESC(SYS_CNTKCTL_EL1), NULL, reset_val, CNTKCTL_EL1, 0}, { SYS_DESC(SYS_CCSIDR_EL1), access_ccsidr }, @@ -1649,14 +1628,16 @@ static const struct sys_reg_desc sys_reg_descs[] = { { SYS_DESC(SYS_TPIDR_EL0), NULL, reset_unknown, TPIDR_EL0 }, { SYS_DESC(SYS_TPIDRRO_EL0), NULL, reset_unknown, TPIDRRO_EL0 }, - { SYS_DESC(SYS_AMCR_EL0), access_amu }, - { SYS_DESC(SYS_AMCFGR_EL0), access_amu }, - { SYS_DESC(SYS_AMCGCR_EL0), access_amu }, - { SYS_DESC(SYS_AMUSERENR_EL0), access_amu }, - { SYS_DESC(SYS_AMCNTENCLR0_EL0), access_amu }, - { SYS_DESC(SYS_AMCNTENSET0_EL0), access_amu }, - { SYS_DESC(SYS_AMCNTENCLR1_EL0), access_amu }, - { SYS_DESC(SYS_AMCNTENSET1_EL0), access_amu }, + { SYS_DESC(SYS_SCXTNUM_EL0), undef_access }, + + { SYS_DESC(SYS_AMCR_EL0), undef_access }, + { SYS_DESC(SYS_AMCFGR_EL0), undef_access }, + { SYS_DESC(SYS_AMCGCR_EL0), undef_access }, + { SYS_DESC(SYS_AMUSERENR_EL0), undef_access }, + { SYS_DESC(SYS_AMCNTENCLR0_EL0), undef_access }, + { SYS_DESC(SYS_AMCNTENSET0_EL0), undef_access }, + { SYS_DESC(SYS_AMCNTENCLR1_EL0), undef_access }, + { SYS_DESC(SYS_AMCNTENSET1_EL0), undef_access }, AMU_AMEVCNTR0_EL0(0), AMU_AMEVCNTR0_EL0(1), AMU_AMEVCNTR0_EL0(2), @@ -1897,9 +1878,9 @@ static const struct sys_reg_desc cp14_regs[] = { { Op1( 0), CRn( 0), CRm( 1), Op2( 0), trap_raz_wi }, DBG_BCR_BVR_WCR_WVR(1), /* DBGDCCINT */ - { Op1( 0), CRn( 0), CRm( 2), Op2( 0), trap_debug32 }, + { Op1( 0), CRn( 0), CRm( 2), Op2( 0), trap_debug32, NULL, cp14_DBGDCCINT }, /* DBGDSCRext */ - { Op1( 0), CRn( 0), CRm( 2), Op2( 2), trap_debug32 }, + { Op1( 0), CRn( 0), CRm( 2), Op2( 2), trap_debug32, NULL, cp14_DBGDSCRext }, DBG_BCR_BVR_WCR_WVR(2), /* DBGDTR[RT]Xint */ { Op1( 0), CRn( 0), CRm( 3), Op2( 0), trap_raz_wi }, @@ -1914,7 +1895,7 @@ static const struct sys_reg_desc cp14_regs[] = { { Op1( 0), CRn( 0), CRm( 6), Op2( 2), trap_raz_wi }, DBG_BCR_BVR_WCR_WVR(6), /* DBGVCR */ - { Op1( 0), CRn( 0), CRm( 7), Op2( 0), trap_debug32 }, + { Op1( 0), CRn( 0), CRm( 7), Op2( 0), trap_debug32, NULL, cp14_DBGVCR }, DBG_BCR_BVR_WCR_WVR(7), DBG_BCR_BVR_WCR_WVR(8), DBG_BCR_BVR_WCR_WVR(9), @@ -2185,7 +2166,7 @@ static void perform_access(struct kvm_vcpu *vcpu, trace_kvm_sys_access(*vcpu_pc(vcpu), params, r); /* Check for regs disabled by runtime config */ - if (sysreg_hidden_from_guest(vcpu, r)) { + if (sysreg_hidden(vcpu, r)) { kvm_inject_undefined(vcpu); return; } @@ -2684,7 +2665,7 @@ int kvm_arm_sys_reg_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg return get_invariant_sys_reg(reg->id, uaddr); /* Check for regs disabled by runtime config */ - if (sysreg_hidden_from_user(vcpu, r)) + if (sysreg_hidden(vcpu, r)) return -ENOENT; if (r->get_user) @@ -2709,7 +2690,7 @@ int kvm_arm_sys_reg_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg return set_invariant_sys_reg(reg->id, uaddr); /* Check for regs disabled by runtime config */ - if (sysreg_hidden_from_user(vcpu, r)) + if (sysreg_hidden(vcpu, r)) return -ENOENT; if (r->set_user) @@ -2780,7 +2761,7 @@ static int walk_one_sys_reg(const struct kvm_vcpu *vcpu, if (!(rd->reg || rd->get_user)) return 0; - if (sysreg_hidden_from_user(vcpu, rd)) + if (sysreg_hidden(vcpu, rd)) return 0; if (!copy_reg_to_user(rd, uind)) diff --git a/arch/arm64/kvm/sys_regs.h b/arch/arm64/kvm/sys_regs.h index 5a6fc30f5989..0f95964339b1 100644 --- a/arch/arm64/kvm/sys_regs.h +++ b/arch/arm64/kvm/sys_regs.h @@ -59,8 +59,8 @@ struct sys_reg_desc { const struct sys_reg_desc *rd); }; -#define REG_HIDDEN_USER (1 << 0) /* hidden from userspace ioctls */ -#define REG_HIDDEN_GUEST (1 << 1) /* hidden from guest */ +#define REG_HIDDEN (1 << 0) /* hidden from userspace and guest */ +#define REG_RAZ (1 << 1) /* RAZ from userspace and guest */ static __printf(2, 3) inline void print_sys_reg_msg(const struct sys_reg_params *p, @@ -111,22 +111,22 @@ static inline void reset_val(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r __vcpu_sys_reg(vcpu, r->reg) = r->val; } -static inline bool sysreg_hidden_from_guest(const struct kvm_vcpu *vcpu, - const struct sys_reg_desc *r) +static inline bool sysreg_hidden(const struct kvm_vcpu *vcpu, + const struct sys_reg_desc *r) { if (likely(!r->visibility)) return false; - return r->visibility(vcpu, r) & REG_HIDDEN_GUEST; + return r->visibility(vcpu, r) & REG_HIDDEN; } -static inline bool sysreg_hidden_from_user(const struct kvm_vcpu *vcpu, - const struct sys_reg_desc *r) +static inline bool sysreg_visible_as_raz(const struct kvm_vcpu *vcpu, + const struct sys_reg_desc *r) { if (likely(!r->visibility)) return false; - return r->visibility(vcpu, r) & REG_HIDDEN_USER; + return r->visibility(vcpu, r) & REG_RAZ; } static inline int cmp_sys_reg(const struct sys_reg_desc *i1, diff --git a/arch/arm64/kvm/vgic/vgic-mmio-v3.c b/arch/arm64/kvm/vgic/vgic-mmio-v3.c index 52d6f24f65dc..15a6c98ee92f 100644 --- a/arch/arm64/kvm/vgic/vgic-mmio-v3.c +++ b/arch/arm64/kvm/vgic/vgic-mmio-v3.c @@ -273,6 +273,23 @@ static unsigned long vgic_mmio_read_v3r_typer(struct kvm_vcpu *vcpu, return extract_bytes(value, addr & 7, len); } +static unsigned long vgic_uaccess_read_v3r_typer(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len) +{ + unsigned long mpidr = kvm_vcpu_get_mpidr_aff(vcpu); + int target_vcpu_id = vcpu->vcpu_id; + u64 value; + + value = (u64)(mpidr & GENMASK(23, 0)) << 32; + value |= ((target_vcpu_id & 0xffff) << 8); + + if (vgic_has_its(vcpu->kvm)) + value |= GICR_TYPER_PLPIS; + + /* reporting of the Last bit is not supported for userspace */ + return extract_bytes(value, addr & 7, len); +} + static unsigned long vgic_mmio_read_v3r_iidr(struct kvm_vcpu *vcpu, gpa_t addr, unsigned int len) { @@ -593,8 +610,9 @@ static const struct vgic_register_region vgic_v3_rd_registers[] = { REGISTER_DESC_WITH_LENGTH(GICR_IIDR, vgic_mmio_read_v3r_iidr, vgic_mmio_write_wi, 4, VGIC_ACCESS_32bit), - REGISTER_DESC_WITH_LENGTH(GICR_TYPER, - vgic_mmio_read_v3r_typer, vgic_mmio_write_wi, 8, + REGISTER_DESC_WITH_LENGTH_UACCESS(GICR_TYPER, + vgic_mmio_read_v3r_typer, vgic_mmio_write_wi, + vgic_uaccess_read_v3r_typer, vgic_mmio_uaccess_write_wi, 8, VGIC_ACCESS_64bit | VGIC_ACCESS_32bit), REGISTER_DESC_WITH_LENGTH(GICR_WAKER, vgic_mmio_read_raz, vgic_mmio_write_wi, 4, diff --git a/arch/arm64/lib/memcpy.S b/arch/arm64/lib/memcpy.S index e0bf83d556f2..dc8d2a216a6e 100644 --- a/arch/arm64/lib/memcpy.S +++ b/arch/arm64/lib/memcpy.S @@ -56,9 +56,8 @@ stp \reg1, \reg2, [\ptr], \val .endm - .weak memcpy SYM_FUNC_START_ALIAS(__memcpy) -SYM_FUNC_START_PI(memcpy) +SYM_FUNC_START_WEAK_PI(memcpy) #include "copy_template.S" ret SYM_FUNC_END_PI(memcpy) diff --git a/arch/arm64/lib/memmove.S b/arch/arm64/lib/memmove.S index 02cda2e33bde..1035dce4bdaf 100644 --- a/arch/arm64/lib/memmove.S +++ b/arch/arm64/lib/memmove.S @@ -45,9 +45,8 @@ C_h .req x12 D_l .req x13 D_h .req x14 - .weak memmove SYM_FUNC_START_ALIAS(__memmove) -SYM_FUNC_START_PI(memmove) +SYM_FUNC_START_WEAK_PI(memmove) cmp dstin, src b.lo __memcpy add tmp1, src, count diff --git a/arch/arm64/lib/memset.S b/arch/arm64/lib/memset.S index 77c3c7ba0084..a9c1c9a01ea9 100644 --- a/arch/arm64/lib/memset.S +++ b/arch/arm64/lib/memset.S @@ -42,9 +42,8 @@ dst .req x8 tmp3w .req w9 tmp3 .req x9 - .weak memset SYM_FUNC_START_ALIAS(__memset) -SYM_FUNC_START_PI(memset) +SYM_FUNC_START_WEAK_PI(memset) mov dst, dstin /* Preserve return value. */ and A_lw, val, #255 orr A_lw, A_lw, A_lw, lsl #8 diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index 94c99c1c19e3..795d224f184f 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -262,7 +262,7 @@ static bool __kprobes is_spurious_el1_translation_fault(unsigned long addr, local_irq_save(flags); asm volatile("at s1e1r, %0" :: "r" (addr)); isb(); - par = read_sysreg(par_el1); + par = read_sysreg_par(); local_irq_restore(flags); /* @@ -789,25 +789,6 @@ void __init hook_debug_fault_code(int nr, */ static void debug_exception_enter(struct pt_regs *regs) { - /* - * Tell lockdep we disabled irqs in entry.S. Do nothing if they were - * already disabled to preserve the last enabled/disabled addresses. - */ - if (interrupts_enabled(regs)) - trace_hardirqs_off(); - - if (user_mode(regs)) { - RCU_LOCKDEP_WARN(!rcu_is_watching(), "entry code didn't wake RCU"); - } else { - /* - * We might have interrupted pretty much anything. In - * fact, if we're a debug exception, we can even interrupt - * NMI processing. We don't want this code makes in_nmi() - * to return true, but we need to notify RCU. - */ - rcu_nmi_enter(); - } - preempt_disable(); /* This code is a bit fragile. Test it. */ @@ -818,12 +799,6 @@ NOKPROBE_SYMBOL(debug_exception_enter); static void debug_exception_exit(struct pt_regs *regs) { preempt_enable_no_resched(); - - if (!user_mode(regs)) - rcu_nmi_exit(); - - if (interrupts_enabled(regs)) - trace_hardirqs_on(); } NOKPROBE_SYMBOL(debug_exception_exit); diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index 1c0f3e02f731..ca692a815731 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -1444,11 +1444,28 @@ static void __remove_pgd_mapping(pgd_t *pgdir, unsigned long start, u64 size) free_empty_tables(start, end, PAGE_OFFSET, PAGE_END); } +static bool inside_linear_region(u64 start, u64 size) +{ + /* + * Linear mapping region is the range [PAGE_OFFSET..(PAGE_END - 1)] + * accommodating both its ends but excluding PAGE_END. Max physical + * range which can be mapped inside this linear mapping range, must + * also be derived from its end points. + */ + return start >= __pa(_PAGE_OFFSET(vabits_actual)) && + (start + size - 1) <= __pa(PAGE_END - 1); +} + int arch_add_memory(int nid, u64 start, u64 size, struct mhp_params *params) { int ret, flags = 0; + if (!inside_linear_region(start, size)) { + pr_err("[%llx %llx] is outside linear mapping region\n", start, start + size); + return -EINVAL; + } + if (rodata_full || debug_pagealloc_enabled()) flags = NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS; diff --git a/arch/csky/kernel/perf_regs.c b/arch/csky/kernel/perf_regs.c index eb32838b8210..09b7f88a2d6a 100644 --- a/arch/csky/kernel/perf_regs.c +++ b/arch/csky/kernel/perf_regs.c @@ -32,8 +32,7 @@ u64 perf_reg_abi(struct task_struct *task) } void perf_get_regs_user(struct perf_regs *regs_user, - struct pt_regs *regs, - struct pt_regs *regs_user_copy) + struct pt_regs *regs) { regs_user->regs = task_pt_regs(current); regs_user->abi = perf_reg_abi(current); diff --git a/arch/csky/kernel/process.c b/arch/csky/kernel/process.c index f730869e21ee..69af6bc87e64 100644 --- a/arch/csky/kernel/process.c +++ b/arch/csky/kernel/process.c @@ -102,6 +102,6 @@ void arch_cpu_idle(void) #ifdef CONFIG_CPU_PM_STOP asm volatile("stop\n"); #endif - local_irq_enable(); + raw_local_irq_enable(); } #endif diff --git a/arch/h8300/kernel/process.c b/arch/h8300/kernel/process.c index aea0a40b77a9..bc1364db58fe 100644 --- a/arch/h8300/kernel/process.c +++ b/arch/h8300/kernel/process.c @@ -57,7 +57,7 @@ asmlinkage void ret_from_kernel_thread(void); */ void arch_cpu_idle(void) { - local_irq_enable(); + raw_local_irq_enable(); __asm__("sleep"); } diff --git a/arch/hexagon/kernel/process.c b/arch/hexagon/kernel/process.c index 5a0a95d93ddb..67767c5ed98c 100644 --- a/arch/hexagon/kernel/process.c +++ b/arch/hexagon/kernel/process.c @@ -44,7 +44,7 @@ void arch_cpu_idle(void) { __vmwait(); /* interrupts wake us up, but irqs are still disabled */ - local_irq_enable(); + raw_local_irq_enable(); } /* diff --git a/arch/ia64/include/asm/sparsemem.h b/arch/ia64/include/asm/sparsemem.h index 336d0570e1fa..dd8c166ffd7b 100644 --- a/arch/ia64/include/asm/sparsemem.h +++ b/arch/ia64/include/asm/sparsemem.h @@ -18,4 +18,10 @@ #endif #endif /* CONFIG_SPARSEMEM */ + +#ifdef CONFIG_MEMORY_HOTPLUG +int memory_add_physaddr_to_nid(u64 addr); +#define memory_add_physaddr_to_nid memory_add_physaddr_to_nid +#endif + #endif /* _ASM_IA64_SPARSEMEM_H */ diff --git a/arch/ia64/kernel/process.c b/arch/ia64/kernel/process.c index 6b61a703bcf5..c9ff8796b509 100644 --- a/arch/ia64/kernel/process.c +++ b/arch/ia64/kernel/process.c @@ -239,7 +239,7 @@ void arch_cpu_idle(void) if (mark_idle) (*mark_idle)(1); - safe_halt(); + raw_safe_halt(); if (mark_idle) (*mark_idle)(0); diff --git a/arch/microblaze/kernel/process.c b/arch/microblaze/kernel/process.c index a9e46e525cd0..f99860771ff4 100644 --- a/arch/microblaze/kernel/process.c +++ b/arch/microblaze/kernel/process.c @@ -149,5 +149,5 @@ int dump_fpu(struct pt_regs *regs, elf_fpregset_t *fpregs) void arch_cpu_idle(void) { - local_irq_enable(); + raw_local_irq_enable(); } diff --git a/arch/mips/alchemy/common/clock.c b/arch/mips/alchemy/common/clock.c index a95a894aceaf..f0c830337104 100644 --- a/arch/mips/alchemy/common/clock.c +++ b/arch/mips/alchemy/common/clock.c @@ -152,6 +152,7 @@ static struct clk __init *alchemy_clk_setup_cpu(const char *parent_name, { struct clk_init_data id; struct clk_hw *h; + struct clk *clk; h = kzalloc(sizeof(*h), GFP_KERNEL); if (!h) @@ -164,7 +165,13 @@ static struct clk __init *alchemy_clk_setup_cpu(const char *parent_name, id.ops = &alchemy_clkops_cpu; h->init = &id; - return clk_register(NULL, h); + clk = clk_register(NULL, h); + if (IS_ERR(clk)) { + pr_err("failed to register clock\n"); + kfree(h); + } + + return clk; } /* AUXPLLs ************************************************************/ diff --git a/arch/mips/cavium-octeon/crypto/octeon-crypto.h b/arch/mips/cavium-octeon/crypto/octeon-crypto.h index 7315cc307397..cb68f9e284bb 100644 --- a/arch/mips/cavium-octeon/crypto/octeon-crypto.h +++ b/arch/mips/cavium-octeon/crypto/octeon-crypto.h @@ -41,7 +41,7 @@ do { \ */ #define read_octeon_64bit_hash_dword(index) \ ({ \ - u64 __value; \ + __be64 __value; \ \ __asm__ __volatile__ ( \ "dmfc2 %[rt],0x0048+" STR(index) \ diff --git a/arch/mips/cavium-octeon/crypto/octeon-md5.c b/arch/mips/cavium-octeon/crypto/octeon-md5.c index 8c8ea139653e..5ee4ade99b99 100644 --- a/arch/mips/cavium-octeon/crypto/octeon-md5.c +++ b/arch/mips/cavium-octeon/crypto/octeon-md5.c @@ -68,10 +68,11 @@ static int octeon_md5_init(struct shash_desc *desc) { struct md5_state *mctx = shash_desc_ctx(desc); - mctx->hash[0] = cpu_to_le32(MD5_H0); - mctx->hash[1] = cpu_to_le32(MD5_H1); - mctx->hash[2] = cpu_to_le32(MD5_H2); - mctx->hash[3] = cpu_to_le32(MD5_H3); + mctx->hash[0] = MD5_H0; + mctx->hash[1] = MD5_H1; + mctx->hash[2] = MD5_H2; + mctx->hash[3] = MD5_H3; + cpu_to_le32_array(mctx->hash, 4); mctx->byte_count = 0; return 0; @@ -139,8 +140,9 @@ static int octeon_md5_final(struct shash_desc *desc, u8 *out) } memset(p, 0, padding); - mctx->block[14] = cpu_to_le32(mctx->byte_count << 3); - mctx->block[15] = cpu_to_le32(mctx->byte_count >> 29); + mctx->block[14] = mctx->byte_count << 3; + mctx->block[15] = mctx->byte_count >> 29; + cpu_to_le32_array(mctx->block + 14, 2); octeon_md5_transform(mctx->block); octeon_md5_read_hash(mctx); diff --git a/arch/mips/cavium-octeon/crypto/octeon-sha1.c b/arch/mips/cavium-octeon/crypto/octeon-sha1.c index 75e79b47abfe..30f1d75208a5 100644 --- a/arch/mips/cavium-octeon/crypto/octeon-sha1.c +++ b/arch/mips/cavium-octeon/crypto/octeon-sha1.c @@ -14,7 +14,7 @@ */ #include <linux/mm.h> -#include <crypto/sha.h> +#include <crypto/sha1.h> #include <linux/init.h> #include <linux/types.h> #include <linux/module.h> diff --git a/arch/mips/cavium-octeon/crypto/octeon-sha256.c b/arch/mips/cavium-octeon/crypto/octeon-sha256.c index a682ce76716a..36cb92895d72 100644 --- a/arch/mips/cavium-octeon/crypto/octeon-sha256.c +++ b/arch/mips/cavium-octeon/crypto/octeon-sha256.c @@ -15,7 +15,7 @@ */ #include <linux/mm.h> -#include <crypto/sha.h> +#include <crypto/sha2.h> #include <linux/init.h> #include <linux/types.h> #include <linux/module.h> diff --git a/arch/mips/cavium-octeon/crypto/octeon-sha512.c b/arch/mips/cavium-octeon/crypto/octeon-sha512.c index 50722a0cfb53..359f039820d8 100644 --- a/arch/mips/cavium-octeon/crypto/octeon-sha512.c +++ b/arch/mips/cavium-octeon/crypto/octeon-sha512.c @@ -14,7 +14,7 @@ */ #include <linux/mm.h> -#include <crypto/sha.h> +#include <crypto/sha2.h> #include <linux/init.h> #include <linux/types.h> #include <linux/module.h> diff --git a/arch/mips/include/asm/pgtable-32.h b/arch/mips/include/asm/pgtable-32.h index a950fc1ddb4d..6c0532d7b211 100644 --- a/arch/mips/include/asm/pgtable-32.h +++ b/arch/mips/include/asm/pgtable-32.h @@ -154,6 +154,7 @@ static inline void pmd_clear(pmd_t *pmdp) #if defined(CONFIG_XPA) +#define MAX_POSSIBLE_PHYSMEM_BITS 40 #define pte_pfn(x) (((unsigned long)((x).pte_high >> _PFN_SHIFT)) | (unsigned long)((x).pte_low << _PAGE_PRESENT_SHIFT)) static inline pte_t pfn_pte(unsigned long pfn, pgprot_t prot) @@ -169,6 +170,7 @@ pfn_pte(unsigned long pfn, pgprot_t prot) #elif defined(CONFIG_PHYS_ADDR_T_64BIT) && defined(CONFIG_CPU_MIPS32) +#define MAX_POSSIBLE_PHYSMEM_BITS 36 #define pte_pfn(x) ((unsigned long)((x).pte_high >> 6)) static inline pte_t pfn_pte(unsigned long pfn, pgprot_t prot) @@ -183,6 +185,7 @@ static inline pte_t pfn_pte(unsigned long pfn, pgprot_t prot) #else +#define MAX_POSSIBLE_PHYSMEM_BITS 32 #ifdef CONFIG_CPU_VR41XX #define pte_pfn(x) ((unsigned long)((x).pte >> (PAGE_SHIFT + 2))) #define pfn_pte(pfn, prot) __pte(((pfn) << (PAGE_SHIFT + 2)) | pgprot_val(prot)) diff --git a/arch/mips/kernel/idle.c b/arch/mips/kernel/idle.c index 5bc3b04693c7..18e69ebf5691 100644 --- a/arch/mips/kernel/idle.c +++ b/arch/mips/kernel/idle.c @@ -33,19 +33,19 @@ static void __cpuidle r3081_wait(void) { unsigned long cfg = read_c0_conf(); write_c0_conf(cfg | R30XX_CONF_HALT); - local_irq_enable(); + raw_local_irq_enable(); } static void __cpuidle r39xx_wait(void) { if (!need_resched()) write_c0_conf(read_c0_conf() | TX39_CONF_HALT); - local_irq_enable(); + raw_local_irq_enable(); } void __cpuidle r4k_wait(void) { - local_irq_enable(); + raw_local_irq_enable(); __r4k_wait(); } @@ -64,7 +64,7 @@ void __cpuidle r4k_wait_irqoff(void) " .set arch=r4000 \n" " wait \n" " .set pop \n"); - local_irq_enable(); + raw_local_irq_enable(); } /* @@ -84,7 +84,7 @@ static void __cpuidle rm7k_wait_irqoff(void) " wait \n" " mtc0 $1, $12 # stalls until W stage \n" " .set pop \n"); - local_irq_enable(); + raw_local_irq_enable(); } /* @@ -257,7 +257,7 @@ void arch_cpu_idle(void) if (cpu_wait) cpu_wait(); else - local_irq_enable(); + raw_local_irq_enable(); } #ifdef CONFIG_CPU_IDLE diff --git a/arch/mips/kernel/setup.c b/arch/mips/kernel/setup.c index 0d4253208bde..ca579deef939 100644 --- a/arch/mips/kernel/setup.c +++ b/arch/mips/kernel/setup.c @@ -262,8 +262,8 @@ static void __init bootmem_init(void) static void __init bootmem_init(void) { phys_addr_t ramstart, ramend; - phys_addr_t start, end; - u64 i; + unsigned long start, end; + int i; ramstart = memblock_start_of_DRAM(); ramend = memblock_end_of_DRAM(); @@ -300,7 +300,7 @@ static void __init bootmem_init(void) min_low_pfn = ARCH_PFN_OFFSET; max_pfn = PFN_DOWN(ramend); - for_each_mem_range(i, &start, &end) { + for_each_mem_pfn_range(i, MAX_NUMNODES, &start, &end, NULL) { /* * Skip highmem here so we get an accurate max_low_pfn if low * memory stops short of high memory. diff --git a/arch/mips/mm/tlb-r4k.c b/arch/mips/mm/tlb-r4k.c index 38e2894d5fa3..1b939abbe4ca 100644 --- a/arch/mips/mm/tlb-r4k.c +++ b/arch/mips/mm/tlb-r4k.c @@ -438,6 +438,7 @@ int has_transparent_hugepage(void) } return mask == PM_HUGE_MASK; } +EXPORT_SYMBOL(has_transparent_hugepage); #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ diff --git a/arch/nios2/kernel/process.c b/arch/nios2/kernel/process.c index 4ffe857e6ada..50b4eb19a6cc 100644 --- a/arch/nios2/kernel/process.c +++ b/arch/nios2/kernel/process.c @@ -33,7 +33,7 @@ EXPORT_SYMBOL(pm_power_off); void arch_cpu_idle(void) { - local_irq_enable(); + raw_local_irq_enable(); } /* diff --git a/arch/openrisc/kernel/process.c b/arch/openrisc/kernel/process.c index 0ff391f00334..3c98728cce24 100644 --- a/arch/openrisc/kernel/process.c +++ b/arch/openrisc/kernel/process.c @@ -79,7 +79,7 @@ void machine_power_off(void) */ void arch_cpu_idle(void) { - local_irq_enable(); + raw_local_irq_enable(); if (mfspr(SPR_UPR) & SPR_UPR_PMP) mtspr(SPR_PMR, mfspr(SPR_PMR) | SPR_PMR_DME); } diff --git a/arch/parisc/kernel/process.c b/arch/parisc/kernel/process.c index f196d96e2f9f..a92a23d6acd9 100644 --- a/arch/parisc/kernel/process.c +++ b/arch/parisc/kernel/process.c @@ -169,7 +169,7 @@ void __cpuidle arch_cpu_idle_dead(void) void __cpuidle arch_cpu_idle(void) { - local_irq_enable(); + raw_local_irq_enable(); /* nop on real hardware, qemu will idle sleep. */ asm volatile("or %%r10,%%r10,%%r10\n":::); diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index e9f13fe08492..5181872f9452 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -152,6 +152,7 @@ config PPC select ARCH_USE_QUEUED_SPINLOCKS if PPC_QUEUED_SPINLOCKS select ARCH_WANT_IPC_PARSE_VERSION select ARCH_WANT_IRQS_OFF_ACTIVATE_MM + select ARCH_WANT_LD_ORPHAN_WARN select ARCH_WEAK_RELEASE_ACQUIRE select BINFMT_ELF select BUILDTIME_TABLE_SORT diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile index a4d56f0a41d9..5c8c06215dd4 100644 --- a/arch/powerpc/Makefile +++ b/arch/powerpc/Makefile @@ -123,7 +123,6 @@ endif LDFLAGS_vmlinux-y := -Bstatic LDFLAGS_vmlinux-$(CONFIG_RELOCATABLE) := -pie LDFLAGS_vmlinux := $(LDFLAGS_vmlinux-y) -LDFLAGS_vmlinux += $(call ld-option,--orphan-handling=warn) ifdef CONFIG_PPC64 ifeq ($(call cc-option-yn,-mcmodel=medium),y) @@ -248,7 +247,6 @@ KBUILD_CFLAGS += $(call cc-option,-mno-string) cpu-as-$(CONFIG_40x) += -Wa,-m405 cpu-as-$(CONFIG_44x) += -Wa,-m440 cpu-as-$(CONFIG_ALTIVEC) += $(call as-option,-Wa$(comma)-maltivec) -cpu-as-$(CONFIG_E200) += -Wa,-me200 cpu-as-$(CONFIG_E500) += -Wa,-me500 # When using '-many -mpower4' gas will first try and find a matching power4 diff --git a/arch/powerpc/crypto/sha1-spe-glue.c b/arch/powerpc/crypto/sha1-spe-glue.c index cb57be4ada61..b1e577cbf00c 100644 --- a/arch/powerpc/crypto/sha1-spe-glue.c +++ b/arch/powerpc/crypto/sha1-spe-glue.c @@ -12,7 +12,7 @@ #include <linux/module.h> #include <linux/mm.h> #include <linux/types.h> -#include <crypto/sha.h> +#include <crypto/sha1.h> #include <asm/byteorder.h> #include <asm/switch_to.h> #include <linux/hardirq.h> diff --git a/arch/powerpc/crypto/sha1.c b/arch/powerpc/crypto/sha1.c index b40dc50a6908..7a55d790cdb1 100644 --- a/arch/powerpc/crypto/sha1.c +++ b/arch/powerpc/crypto/sha1.c @@ -17,7 +17,7 @@ #include <linux/module.h> #include <linux/mm.h> #include <linux/types.h> -#include <crypto/sha.h> +#include <crypto/sha1.h> #include <asm/byteorder.h> void powerpc_sha_transform(u32 *state, const u8 *src); diff --git a/arch/powerpc/crypto/sha256-spe-glue.c b/arch/powerpc/crypto/sha256-spe-glue.c index ceb0b6c980b3..a6e650a97d8f 100644 --- a/arch/powerpc/crypto/sha256-spe-glue.c +++ b/arch/powerpc/crypto/sha256-spe-glue.c @@ -13,7 +13,7 @@ #include <linux/module.h> #include <linux/mm.h> #include <linux/types.h> -#include <crypto/sha.h> +#include <crypto/sha2.h> #include <asm/byteorder.h> #include <asm/switch_to.h> #include <linux/hardirq.h> @@ -177,7 +177,7 @@ static int ppc_spe_sha256_final(struct shash_desc *desc, u8 *out) static int ppc_spe_sha224_final(struct shash_desc *desc, u8 *out) { - u32 D[SHA256_DIGEST_SIZE >> 2]; + __be32 D[SHA256_DIGEST_SIZE >> 2]; __be32 *dst = (__be32 *)out; ppc_spe_sha256_final(desc, (u8 *)D); diff --git a/arch/powerpc/include/asm/book3s/32/pgtable.h b/arch/powerpc/include/asm/book3s/32/pgtable.h index 36443cda8dcf..1376be95e975 100644 --- a/arch/powerpc/include/asm/book3s/32/pgtable.h +++ b/arch/powerpc/include/asm/book3s/32/pgtable.h @@ -36,8 +36,10 @@ static inline bool pte_user(pte_t pte) */ #ifdef CONFIG_PTE_64BIT #define PTE_RPN_MASK (~((1ULL << PTE_RPN_SHIFT) - 1)) +#define MAX_POSSIBLE_PHYSMEM_BITS 36 #else #define PTE_RPN_MASK (~((1UL << PTE_RPN_SHIFT) - 1)) +#define MAX_POSSIBLE_PHYSMEM_BITS 32 #endif /* diff --git a/arch/powerpc/include/asm/book3s/64/kup-radix.h b/arch/powerpc/include/asm/book3s/64/kup-radix.h index 3ee1ec60be84..a39e2d193fdc 100644 --- a/arch/powerpc/include/asm/book3s/64/kup-radix.h +++ b/arch/powerpc/include/asm/book3s/64/kup-radix.h @@ -27,6 +27,7 @@ #endif .endm +#ifdef CONFIG_PPC_KUAP .macro kuap_check_amr gpr1, gpr2 #ifdef CONFIG_PPC_KUAP_DEBUG BEGIN_MMU_FTR_SECTION_NESTED(67) @@ -38,6 +39,7 @@ END_MMU_FTR_SECTION_NESTED_IFSET(MMU_FTR_RADIX_KUAP, 67) #endif .endm +#endif .macro kuap_save_amr_and_lock gpr1, gpr2, use_cr, msr_pr_cr #ifdef CONFIG_PPC_KUAP @@ -61,6 +63,10 @@ #else /* !__ASSEMBLY__ */ +#include <linux/jump_label.h> + +DECLARE_STATIC_KEY_FALSE(uaccess_flush_key); + #ifdef CONFIG_PPC_KUAP #include <asm/mmu.h> @@ -103,8 +109,16 @@ static inline void kuap_check_amr(void) static inline unsigned long get_kuap(void) { + /* + * We return AMR_KUAP_BLOCKED when we don't support KUAP because + * prevent_user_access_return needs to return AMR_KUAP_BLOCKED to + * cause restore_user_access to do a flush. + * + * This has no effect in terms of actually blocking things on hash, + * so it doesn't break anything. + */ if (!early_mmu_has_feature(MMU_FTR_RADIX_KUAP)) - return 0; + return AMR_KUAP_BLOCKED; return mfspr(SPRN_AMR); } @@ -123,6 +137,29 @@ static inline void set_kuap(unsigned long value) isync(); } +static inline bool +bad_kuap_fault(struct pt_regs *regs, unsigned long address, bool is_write) +{ + return WARN(mmu_has_feature(MMU_FTR_RADIX_KUAP) && + (regs->kuap & (is_write ? AMR_KUAP_BLOCK_WRITE : AMR_KUAP_BLOCK_READ)), + "Bug: %s fault blocked by AMR!", is_write ? "Write" : "Read"); +} +#else /* CONFIG_PPC_KUAP */ +static inline void kuap_restore_amr(struct pt_regs *regs, unsigned long amr) { } + +static inline unsigned long kuap_get_and_check_amr(void) +{ + return 0UL; +} + +static inline unsigned long get_kuap(void) +{ + return AMR_KUAP_BLOCKED; +} + +static inline void set_kuap(unsigned long value) { } +#endif /* !CONFIG_PPC_KUAP */ + static __always_inline void allow_user_access(void __user *to, const void __user *from, unsigned long size, unsigned long dir) { @@ -142,6 +179,8 @@ static inline void prevent_user_access(void __user *to, const void __user *from, unsigned long size, unsigned long dir) { set_kuap(AMR_KUAP_BLOCKED); + if (static_branch_unlikely(&uaccess_flush_key)) + do_uaccess_flush(); } static inline unsigned long prevent_user_access_return(void) @@ -149,6 +188,8 @@ static inline unsigned long prevent_user_access_return(void) unsigned long flags = get_kuap(); set_kuap(AMR_KUAP_BLOCKED); + if (static_branch_unlikely(&uaccess_flush_key)) + do_uaccess_flush(); return flags; } @@ -156,30 +197,9 @@ static inline unsigned long prevent_user_access_return(void) static inline void restore_user_access(unsigned long flags) { set_kuap(flags); + if (static_branch_unlikely(&uaccess_flush_key) && flags == AMR_KUAP_BLOCKED) + do_uaccess_flush(); } - -static inline bool -bad_kuap_fault(struct pt_regs *regs, unsigned long address, bool is_write) -{ - return WARN(mmu_has_feature(MMU_FTR_RADIX_KUAP) && - (regs->kuap & (is_write ? AMR_KUAP_BLOCK_WRITE : AMR_KUAP_BLOCK_READ)), - "Bug: %s fault blocked by AMR!", is_write ? "Write" : "Read"); -} -#else /* CONFIG_PPC_KUAP */ -static inline void kuap_restore_amr(struct pt_regs *regs, unsigned long amr) -{ -} - -static inline void kuap_check_amr(void) -{ -} - -static inline unsigned long kuap_get_and_check_amr(void) -{ - return 0; -} -#endif /* CONFIG_PPC_KUAP */ - #endif /* __ASSEMBLY__ */ #endif /* _ASM_POWERPC_BOOK3S_64_KUP_RADIX_H */ diff --git a/arch/powerpc/include/asm/book3s/64/mmu.h b/arch/powerpc/include/asm/book3s/64/mmu.h index e0b52940e43c..750918451dd2 100644 --- a/arch/powerpc/include/asm/book3s/64/mmu.h +++ b/arch/powerpc/include/asm/book3s/64/mmu.h @@ -242,6 +242,18 @@ extern void radix_init_pseries(void); static inline void radix_init_pseries(void) { }; #endif +#ifdef CONFIG_HOTPLUG_CPU +#define arch_clear_mm_cpumask_cpu(cpu, mm) \ + do { \ + if (cpumask_test_cpu(cpu, mm_cpumask(mm))) { \ + atomic_dec(&(mm)->context.active_cpus); \ + cpumask_clear_cpu(cpu, mm_cpumask(mm)); \ + } \ + } while (0) + +void cleanup_cpu_mmu_context(void); +#endif + static inline int get_user_context(mm_context_t *ctx, unsigned long ea) { int index = ea >> MAX_EA_BITS_PER_CONTEXT; diff --git a/arch/powerpc/include/asm/exception-64s.h b/arch/powerpc/include/asm/exception-64s.h index ebe95aa04d53..1d32b174ab6a 100644 --- a/arch/powerpc/include/asm/exception-64s.h +++ b/arch/powerpc/include/asm/exception-64s.h @@ -57,11 +57,18 @@ nop; \ nop +#define ENTRY_FLUSH_SLOT \ + ENTRY_FLUSH_FIXUP_SECTION; \ + nop; \ + nop; \ + nop; + /* * r10 must be free to use, r13 must be paca */ #define INTERRUPT_TO_KERNEL \ - STF_ENTRY_BARRIER_SLOT + STF_ENTRY_BARRIER_SLOT; \ + ENTRY_FLUSH_SLOT /* * Macros for annotating the expected destination of (h)rfid @@ -137,6 +144,9 @@ RFSCV; \ b rfscv_flush_fallback +#else /* __ASSEMBLY__ */ +/* Prototype for function defined in exceptions-64s.S */ +void do_uaccess_flush(void); #endif /* __ASSEMBLY__ */ #endif /* _ASM_POWERPC_EXCEPTION_H */ diff --git a/arch/powerpc/include/asm/feature-fixups.h b/arch/powerpc/include/asm/feature-fixups.h index b0af97add751..fbd406cd6916 100644 --- a/arch/powerpc/include/asm/feature-fixups.h +++ b/arch/powerpc/include/asm/feature-fixups.h @@ -205,6 +205,22 @@ label##3: \ FTR_ENTRY_OFFSET 955b-956b; \ .popsection; +#define UACCESS_FLUSH_FIXUP_SECTION \ +959: \ + .pushsection __uaccess_flush_fixup,"a"; \ + .align 2; \ +960: \ + FTR_ENTRY_OFFSET 959b-960b; \ + .popsection; + +#define ENTRY_FLUSH_FIXUP_SECTION \ +957: \ + .pushsection __entry_flush_fixup,"a"; \ + .align 2; \ +958: \ + FTR_ENTRY_OFFSET 957b-958b; \ + .popsection; + #define RFI_FLUSH_FIXUP_SECTION \ 951: \ .pushsection __rfi_flush_fixup,"a"; \ @@ -237,8 +253,11 @@ label##3: \ #include <linux/types.h> extern long stf_barrier_fallback; +extern long entry_flush_fallback; extern long __start___stf_entry_barrier_fixup, __stop___stf_entry_barrier_fixup; extern long __start___stf_exit_barrier_fixup, __stop___stf_exit_barrier_fixup; +extern long __start___uaccess_flush_fixup, __stop___uaccess_flush_fixup; +extern long __start___entry_flush_fixup, __stop___entry_flush_fixup; extern long __start___rfi_flush_fixup, __stop___rfi_flush_fixup; extern long __start___barrier_nospec_fixup, __stop___barrier_nospec_fixup; extern long __start__btb_flush_fixup, __stop__btb_flush_fixup; diff --git a/arch/powerpc/include/asm/kup.h b/arch/powerpc/include/asm/kup.h index 1d0f7d838b2e..0d93331d0fab 100644 --- a/arch/powerpc/include/asm/kup.h +++ b/arch/powerpc/include/asm/kup.h @@ -14,7 +14,7 @@ #define KUAP_CURRENT_WRITE 8 #define KUAP_CURRENT (KUAP_CURRENT_READ | KUAP_CURRENT_WRITE) -#ifdef CONFIG_PPC64 +#ifdef CONFIG_PPC_BOOK3S_64 #include <asm/book3s/64/kup-radix.h> #endif #ifdef CONFIG_PPC_8xx @@ -35,6 +35,9 @@ .macro kuap_check current, gpr .endm +.macro kuap_check_amr gpr1, gpr2 +.endm + #endif #else /* !__ASSEMBLY__ */ @@ -53,17 +56,28 @@ static inline void setup_kuep(bool disabled) { } void setup_kuap(bool disabled); #else static inline void setup_kuap(bool disabled) { } + +static inline bool +bad_kuap_fault(struct pt_regs *regs, unsigned long address, bool is_write) +{ + return false; +} + +static inline void kuap_check_amr(void) { } + +/* + * book3s/64/kup-radix.h defines these functions for the !KUAP case to flush + * the L1D cache after user accesses. Only include the empty stubs for other + * platforms. + */ +#ifndef CONFIG_PPC_BOOK3S_64 static inline void allow_user_access(void __user *to, const void __user *from, unsigned long size, unsigned long dir) { } static inline void prevent_user_access(void __user *to, const void __user *from, unsigned long size, unsigned long dir) { } static inline unsigned long prevent_user_access_return(void) { return 0UL; } static inline void restore_user_access(unsigned long flags) { } -static inline bool -bad_kuap_fault(struct pt_regs *regs, unsigned long address, bool is_write) -{ - return false; -} +#endif /* CONFIG_PPC_BOOK3S_64 */ #endif /* CONFIG_PPC_KUAP */ static inline void allow_read_from_user(const void __user *from, unsigned long size) diff --git a/arch/powerpc/include/asm/mmzone.h b/arch/powerpc/include/asm/mmzone.h index 91c69ff53a8a..6cda76b57c5d 100644 --- a/arch/powerpc/include/asm/mmzone.h +++ b/arch/powerpc/include/asm/mmzone.h @@ -46,5 +46,10 @@ u64 memory_hotplug_max(void); #define __HAVE_ARCH_RESERVED_KERNEL_PAGES #endif +#ifdef CONFIG_MEMORY_HOTPLUG +extern int create_section_mapping(unsigned long start, unsigned long end, + int nid, pgprot_t prot); +#endif + #endif /* __KERNEL__ */ #endif /* _ASM_MMZONE_H_ */ diff --git a/arch/powerpc/include/asm/nohash/32/kup-8xx.h b/arch/powerpc/include/asm/nohash/32/kup-8xx.h index 85ed2390fb99..567cdc557402 100644 --- a/arch/powerpc/include/asm/nohash/32/kup-8xx.h +++ b/arch/powerpc/include/asm/nohash/32/kup-8xx.h @@ -63,7 +63,7 @@ static inline void restore_user_access(unsigned long flags) static inline bool bad_kuap_fault(struct pt_regs *regs, unsigned long address, bool is_write) { - return WARN(!((regs->kuap ^ MD_APG_KUAP) & 0xf0000000), + return WARN(!((regs->kuap ^ MD_APG_KUAP) & 0xff000000), "Bug: fault blocked by AP register !"); } diff --git a/arch/powerpc/include/asm/nohash/32/mmu-8xx.h b/arch/powerpc/include/asm/nohash/32/mmu-8xx.h index 1d9ac0f9c794..0bd1b144eb76 100644 --- a/arch/powerpc/include/asm/nohash/32/mmu-8xx.h +++ b/arch/powerpc/include/asm/nohash/32/mmu-8xx.h @@ -33,19 +33,18 @@ * respectively NA for All or X for Supervisor and no access for User. * Then we use the APG to say whether accesses are according to Page rules or * "all Supervisor" rules (Access to all) - * Therefore, we define 2 APG groups. lsb is _PMD_USER - * 0 => Kernel => 01 (all accesses performed according to page definition) - * 1 => User => 00 (all accesses performed as supervisor iaw page definition) - * 2-15 => Not Used - */ -#define MI_APG_INIT 0x40000000 - -/* - * 0 => Kernel => 01 (all accesses performed according to page definition) - * 1 => User => 10 (all accesses performed according to swaped page definition) - * 2-15 => Not Used - */ -#define MI_APG_KUEP 0x60000000 + * _PAGE_ACCESSED is also managed via APG. When _PAGE_ACCESSED is not set, say + * "all User" rules, that will lead to NA for all. + * Therefore, we define 4 APG groups. lsb is _PAGE_ACCESSED + * 0 => Kernel => 11 (all accesses performed according as user iaw page definition) + * 1 => Kernel+Accessed => 01 (all accesses performed according to page definition) + * 2 => User => 11 (all accesses performed according as user iaw page definition) + * 3 => User+Accessed => 00 (all accesses performed as supervisor iaw page definition) for INIT + * => 10 (all accesses performed according to swaped page definition) for KUEP + * 4-15 => Not Used + */ +#define MI_APG_INIT 0xdc000000 +#define MI_APG_KUEP 0xde000000 /* The effective page number register. When read, contains the information * about the last instruction TLB miss. When MI_RPN is written, bits in @@ -106,25 +105,9 @@ #define MD_Ks 0x80000000 /* Should not be set */ #define MD_Kp 0x40000000 /* Should always be set */ -/* - * All pages' PP data bits are set to either 000 or 011 or 001, which means - * respectively RW for Supervisor and no access for User, or RO for - * Supervisor and no access for user and NA for ALL. - * Then we use the APG to say whether accesses are according to Page rules or - * "all Supervisor" rules (Access to all) - * Therefore, we define 2 APG groups. lsb is _PMD_USER - * 0 => Kernel => 01 (all accesses performed according to page definition) - * 1 => User => 00 (all accesses performed as supervisor iaw page definition) - * 2-15 => Not Used - */ -#define MD_APG_INIT 0x40000000 - -/* - * 0 => No user => 01 (all accesses performed according to page definition) - * 1 => User => 10 (all accesses performed according to swaped page definition) - * 2-15 => Not Used - */ -#define MD_APG_KUAP 0x60000000 +/* See explanation above at the definition of MI_APG_INIT */ +#define MD_APG_INIT 0xdc000000 +#define MD_APG_KUAP 0xde000000 /* The effective page number register. When read, contains the information * about the last instruction TLB miss. When MD_RPN is written, bits in diff --git a/arch/powerpc/include/asm/nohash/32/pgtable.h b/arch/powerpc/include/asm/nohash/32/pgtable.h index ee2243ba96cf..96522f7f0618 100644 --- a/arch/powerpc/include/asm/nohash/32/pgtable.h +++ b/arch/powerpc/include/asm/nohash/32/pgtable.h @@ -153,8 +153,10 @@ int map_kernel_page(unsigned long va, phys_addr_t pa, pgprot_t prot); */ #if defined(CONFIG_PPC32) && defined(CONFIG_PTE_64BIT) #define PTE_RPN_MASK (~((1ULL << PTE_RPN_SHIFT) - 1)) +#define MAX_POSSIBLE_PHYSMEM_BITS 36 #else #define PTE_RPN_MASK (~((1UL << PTE_RPN_SHIFT) - 1)) +#define MAX_POSSIBLE_PHYSMEM_BITS 32 #endif /* diff --git a/arch/powerpc/include/asm/nohash/32/pte-8xx.h b/arch/powerpc/include/asm/nohash/32/pte-8xx.h index 66f403a7da44..1581204467e1 100644 --- a/arch/powerpc/include/asm/nohash/32/pte-8xx.h +++ b/arch/powerpc/include/asm/nohash/32/pte-8xx.h @@ -39,9 +39,9 @@ * into the TLB. */ #define _PAGE_GUARDED 0x0010 /* Copied to L1 G entry in DTLB */ -#define _PAGE_SPECIAL 0x0020 /* SW entry */ +#define _PAGE_ACCESSED 0x0020 /* Copied to L1 APG 1 entry in I/DTLB */ #define _PAGE_EXEC 0x0040 /* Copied to PP (bit 21) in ITLB */ -#define _PAGE_ACCESSED 0x0080 /* software: page referenced */ +#define _PAGE_SPECIAL 0x0080 /* SW entry */ #define _PAGE_NA 0x0200 /* Supervisor NA, User no access */ #define _PAGE_RO 0x0600 /* Supervisor RO, User no access */ @@ -59,11 +59,12 @@ #define _PMD_PRESENT 0x0001 #define _PMD_PRESENT_MASK _PMD_PRESENT -#define _PMD_BAD 0x0fd0 +#define _PMD_BAD 0x0f90 #define _PMD_PAGE_MASK 0x000c #define _PMD_PAGE_8M 0x000c #define _PMD_PAGE_512K 0x0004 -#define _PMD_USER 0x0020 /* APG 1 */ +#define _PMD_ACCESSED 0x0020 /* APG 1 */ +#define _PMD_USER 0x0040 /* APG 2 */ #define _PTE_NONE_MASK 0 diff --git a/arch/powerpc/include/asm/security_features.h b/arch/powerpc/include/asm/security_features.h index fbb8fa32150f..b774a4477d5f 100644 --- a/arch/powerpc/include/asm/security_features.h +++ b/arch/powerpc/include/asm/security_features.h @@ -86,12 +86,19 @@ static inline bool security_ftr_enabled(u64 feature) // Software required to flush link stack on context switch #define SEC_FTR_FLUSH_LINK_STACK 0x0000000000001000ull +// The L1-D cache should be flushed when entering the kernel +#define SEC_FTR_L1D_FLUSH_ENTRY 0x0000000000004000ull + +// The L1-D cache should be flushed after user accesses from the kernel +#define SEC_FTR_L1D_FLUSH_UACCESS 0x0000000000008000ull // Features enabled by default #define SEC_FTR_DEFAULT \ (SEC_FTR_L1D_FLUSH_HV | \ SEC_FTR_L1D_FLUSH_PR | \ SEC_FTR_BNDS_CHK_SPEC_BAR | \ + SEC_FTR_L1D_FLUSH_ENTRY | \ + SEC_FTR_L1D_FLUSH_UACCESS | \ SEC_FTR_FAVOUR_SECURITY) #endif /* _ASM_POWERPC_SECURITY_FEATURES_H */ diff --git a/arch/powerpc/include/asm/setup.h b/arch/powerpc/include/asm/setup.h index 9efbddee2bca..a466749703f1 100644 --- a/arch/powerpc/include/asm/setup.h +++ b/arch/powerpc/include/asm/setup.h @@ -52,12 +52,16 @@ enum l1d_flush_type { }; void setup_rfi_flush(enum l1d_flush_type, bool enable); +void setup_entry_flush(bool enable); +void setup_uaccess_flush(bool enable); void do_rfi_flush_fixups(enum l1d_flush_type types); #ifdef CONFIG_PPC_BARRIER_NOSPEC void setup_barrier_nospec(void); #else static inline void setup_barrier_nospec(void) { }; #endif +void do_uaccess_flush_fixups(enum l1d_flush_type types); +void do_entry_flush_fixups(enum l1d_flush_type types); void do_barrier_nospec_fixups(bool enable); extern bool barrier_nospec_enabled; diff --git a/arch/powerpc/include/asm/sparsemem.h b/arch/powerpc/include/asm/sparsemem.h index 1e6fa371cc38..d072866842e4 100644 --- a/arch/powerpc/include/asm/sparsemem.h +++ b/arch/powerpc/include/asm/sparsemem.h @@ -13,9 +13,9 @@ #endif /* CONFIG_SPARSEMEM */ #ifdef CONFIG_MEMORY_HOTPLUG -extern int create_section_mapping(unsigned long start, unsigned long end, - int nid, pgprot_t prot); extern int remove_section_mapping(unsigned long start, unsigned long end); +extern int memory_add_physaddr_to_nid(u64 start); +#define memory_add_physaddr_to_nid memory_add_physaddr_to_nid #ifdef CONFIG_NUMA extern int hot_add_scn_to_nid(unsigned long scn_addr); @@ -26,6 +26,5 @@ static inline int hot_add_scn_to_nid(unsigned long scn_addr) } #endif /* CONFIG_NUMA */ #endif /* CONFIG_MEMORY_HOTPLUG */ - #endif /* __KERNEL__ */ #endif /* _ASM_POWERPC_SPARSEMEM_H */ diff --git a/arch/powerpc/include/asm/topology.h b/arch/powerpc/include/asm/topology.h index 8728590f514a..3beeb030cd78 100644 --- a/arch/powerpc/include/asm/topology.h +++ b/arch/powerpc/include/asm/topology.h @@ -6,6 +6,7 @@ struct device; struct device_node; +struct drmem_lmb; #ifdef CONFIG_NUMA @@ -61,6 +62,9 @@ static inline int early_cpu_to_node(int cpu) */ return (nid < 0) ? 0 : nid; } + +int of_drconf_to_nid_single(struct drmem_lmb *lmb); + #else static inline int early_cpu_to_node(int cpu) { return 0; } @@ -84,10 +88,12 @@ static inline int cpu_distance(__be32 *cpu1_assoc, __be32 *cpu2_assoc) return 0; } -#endif /* CONFIG_NUMA */ +static inline int of_drconf_to_nid_single(struct drmem_lmb *lmb) +{ + return first_online_node; +} -struct drmem_lmb; -int of_drconf_to_nid_single(struct drmem_lmb *lmb); +#endif /* CONFIG_NUMA */ #if defined(CONFIG_NUMA) && defined(CONFIG_PPC_SPLPAR) extern int find_and_online_cpu_nid(int cpu); diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h index ef5bbb705c08..501c9a79038c 100644 --- a/arch/powerpc/include/asm/uaccess.h +++ b/arch/powerpc/include/asm/uaccess.h @@ -178,7 +178,7 @@ do { \ * are no aliasing issues. */ #define __put_user_asm_goto(x, addr, label, op) \ - asm volatile goto( \ + asm_volatile_goto( \ "1: " op "%U1%X1 %0,%1 # put_user\n" \ EX_TABLE(1b, %l2) \ : \ @@ -191,7 +191,7 @@ do { \ __put_user_asm_goto(x, ptr, label, "std") #else /* __powerpc64__ */ #define __put_user_asm2_goto(x, addr, label) \ - asm volatile goto( \ + asm_volatile_goto( \ "1: stw%X1 %0, %1\n" \ "2: stw%X1 %L0, %L1\n" \ EX_TABLE(1b, %l2) \ diff --git a/arch/powerpc/kernel/eeh_cache.c b/arch/powerpc/kernel/eeh_cache.c index 6b50bf15d8c1..bf3270426d82 100644 --- a/arch/powerpc/kernel/eeh_cache.c +++ b/arch/powerpc/kernel/eeh_cache.c @@ -264,8 +264,9 @@ static int eeh_addr_cache_show(struct seq_file *s, void *v) { struct pci_io_addr_range *piar; struct rb_node *n; + unsigned long flags; - spin_lock(&pci_io_addr_cache_root.piar_lock); + spin_lock_irqsave(&pci_io_addr_cache_root.piar_lock, flags); for (n = rb_first(&pci_io_addr_cache_root.rb_root); n; n = rb_next(n)) { piar = rb_entry(n, struct pci_io_addr_range, rb_node); @@ -273,7 +274,7 @@ static int eeh_addr_cache_show(struct seq_file *s, void *v) (piar->flags & IORESOURCE_IO) ? "i/o" : "mem", &piar->addr_lo, &piar->addr_hi, pci_name(piar->pcidev)); } - spin_unlock(&pci_io_addr_cache_root.piar_lock); + spin_unlock_irqrestore(&pci_io_addr_cache_root.piar_lock, flags); return 0; } diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index f7d748b88705..4d01f09ecf80 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -1000,8 +1000,6 @@ TRAMP_REAL_BEGIN(system_reset_idle_wake) * Vectors for the FWNMI option. Share common code. */ TRAMP_REAL_BEGIN(system_reset_fwnmi) - /* XXX: fwnmi guest could run a nested/PR guest, so why no test? */ - __IKVM_REAL(system_reset)=0 GEN_INT_ENTRY system_reset, virt=0 #endif /* CONFIG_PPC_PSERIES */ @@ -1412,6 +1410,11 @@ END_FTR_SECTION_IFSET(CPU_FTR_HVMODE) * If none is found, do a Linux page fault. Linux page faults can happen in * kernel mode due to user copy operations of course. * + * KVM: The KVM HDSI handler may perform a load with MSR[DR]=1 in guest + * MMU context, which may cause a DSI in the host, which must go to the + * KVM handler. MSR[IR] is not enabled, so the real-mode handler will + * always be used regardless of AIL setting. + * * - Radix MMU * The hardware loads from the Linux page table directly, so a fault goes * immediately to Linux page fault. @@ -1422,10 +1425,8 @@ INT_DEFINE_BEGIN(data_access) IVEC=0x300 IDAR=1 IDSISR=1 -#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE IKVM_SKIP=1 IKVM_REAL=1 -#endif INT_DEFINE_END(data_access) EXC_REAL_BEGIN(data_access, 0x300, 0x80) @@ -1464,6 +1465,8 @@ ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX) * ppc64_bolted_size (first segment). The kernel handler must avoid stomping * on user-handler data structures. * + * KVM: Same as 0x300, DSLB must test for KVM guest. + * * A dedicated save area EXSLB is used (XXX: but it actually need not be * these days, we could use EXGEN). */ @@ -1472,10 +1475,8 @@ INT_DEFINE_BEGIN(data_access_slb) IAREA=PACA_EXSLB IRECONCILE=0 IDAR=1 -#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE IKVM_SKIP=1 IKVM_REAL=1 -#endif INT_DEFINE_END(data_access_slb) EXC_REAL_BEGIN(data_access_slb, 0x380, 0x80) @@ -2951,15 +2952,8 @@ TRAMP_REAL_BEGIN(stf_barrier_fallback) .endr blr -TRAMP_REAL_BEGIN(rfi_flush_fallback) - SET_SCRATCH0(r13); - GET_PACA(r13); - std r1,PACA_EXRFI+EX_R12(r13) - ld r1,PACAKSAVE(r13) - std r9,PACA_EXRFI+EX_R9(r13) - std r10,PACA_EXRFI+EX_R10(r13) - std r11,PACA_EXRFI+EX_R11(r13) - mfctr r9 +/* Clobbers r10, r11, ctr */ +.macro L1D_DISPLACEMENT_FLUSH ld r10,PACA_RFI_FLUSH_FALLBACK_AREA(r13) ld r11,PACA_L1D_FLUSH_SIZE(r13) srdi r11,r11,(7 + 3) /* 128 byte lines, unrolled 8x */ @@ -2970,7 +2964,7 @@ TRAMP_REAL_BEGIN(rfi_flush_fallback) sync /* - * The load adresses are at staggered offsets within cachelines, + * The load addresses are at staggered offsets within cachelines, * which suits some pipelines better (on others it should not * hurt). */ @@ -2985,7 +2979,30 @@ TRAMP_REAL_BEGIN(rfi_flush_fallback) ld r11,(0x80 + 8)*7(r10) addi r10,r10,0x80*8 bdnz 1b +.endm +TRAMP_REAL_BEGIN(entry_flush_fallback) + std r9,PACA_EXRFI+EX_R9(r13) + std r10,PACA_EXRFI+EX_R10(r13) + std r11,PACA_EXRFI+EX_R11(r13) + mfctr r9 + L1D_DISPLACEMENT_FLUSH + mtctr r9 + ld r9,PACA_EXRFI+EX_R9(r13) + ld r10,PACA_EXRFI+EX_R10(r13) + ld r11,PACA_EXRFI+EX_R11(r13) + blr + +TRAMP_REAL_BEGIN(rfi_flush_fallback) + SET_SCRATCH0(r13); + GET_PACA(r13); + std r1,PACA_EXRFI+EX_R12(r13) + ld r1,PACAKSAVE(r13) + std r9,PACA_EXRFI+EX_R9(r13) + std r10,PACA_EXRFI+EX_R10(r13) + std r11,PACA_EXRFI+EX_R11(r13) + mfctr r9 + L1D_DISPLACEMENT_FLUSH mtctr r9 ld r9,PACA_EXRFI+EX_R9(r13) ld r10,PACA_EXRFI+EX_R10(r13) @@ -3003,32 +3020,7 @@ TRAMP_REAL_BEGIN(hrfi_flush_fallback) std r10,PACA_EXRFI+EX_R10(r13) std r11,PACA_EXRFI+EX_R11(r13) mfctr r9 - ld r10,PACA_RFI_FLUSH_FALLBACK_AREA(r13) - ld r11,PACA_L1D_FLUSH_SIZE(r13) - srdi r11,r11,(7 + 3) /* 128 byte lines, unrolled 8x */ - mtctr r11 - DCBT_BOOK3S_STOP_ALL_STREAM_IDS(r11) /* Stop prefetch streams */ - - /* order ld/st prior to dcbt stop all streams with flushing */ - sync - - /* - * The load adresses are at staggered offsets within cachelines, - * which suits some pipelines better (on others it should not - * hurt). - */ -1: - ld r11,(0x80 + 8)*0(r10) - ld r11,(0x80 + 8)*1(r10) - ld r11,(0x80 + 8)*2(r10) - ld r11,(0x80 + 8)*3(r10) - ld r11,(0x80 + 8)*4(r10) - ld r11,(0x80 + 8)*5(r10) - ld r11,(0x80 + 8)*6(r10) - ld r11,(0x80 + 8)*7(r10) - addi r10,r10,0x80*8 - bdnz 1b - + L1D_DISPLACEMENT_FLUSH mtctr r9 ld r9,PACA_EXRFI+EX_R9(r13) ld r10,PACA_EXRFI+EX_R10(r13) @@ -3079,8 +3071,21 @@ TRAMP_REAL_BEGIN(rfscv_flush_fallback) RFSCV USE_TEXT_SECTION() - MASKED_INTERRUPT - MASKED_INTERRUPT hsrr=1 + +_GLOBAL(do_uaccess_flush) + UACCESS_FLUSH_FIXUP_SECTION + nop + nop + nop + blr + L1D_DISPLACEMENT_FLUSH + blr +_ASM_NOKPROBE_SYMBOL(do_uaccess_flush) +EXPORT_SYMBOL(do_uaccess_flush) + + +MASKED_INTERRUPT +MASKED_INTERRUPT hsrr=1 #ifdef CONFIG_KVM_BOOK3S_64_HANDLER kvmppc_skip_interrupt: diff --git a/arch/powerpc/kernel/head_40x.S b/arch/powerpc/kernel/head_40x.S index 44c9018aed1b..a1ae00689e0f 100644 --- a/arch/powerpc/kernel/head_40x.S +++ b/arch/powerpc/kernel/head_40x.S @@ -284,11 +284,7 @@ _ENTRY(saved_ksp_limit) rlwimi r11, r10, 22, 20, 29 /* Compute PTE address */ lwz r11, 0(r11) /* Get Linux PTE */ -#ifdef CONFIG_SWAP li r9, _PAGE_PRESENT | _PAGE_ACCESSED -#else - li r9, _PAGE_PRESENT -#endif andc. r9, r9, r11 /* Check permission */ bne 5f @@ -369,11 +365,7 @@ _ENTRY(saved_ksp_limit) rlwimi r11, r10, 22, 20, 29 /* Compute PTE address */ lwz r11, 0(r11) /* Get Linux PTE */ -#ifdef CONFIG_SWAP li r9, _PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_EXEC -#else - li r9, _PAGE_PRESENT | _PAGE_EXEC -#endif andc. r9, r9, r11 /* Check permission */ bne 5f diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index 9f359d3fba74..ee0bfebc375f 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -202,9 +202,7 @@ SystemCall: InstructionTLBMiss: mtspr SPRN_SPRG_SCRATCH0, r10 -#if defined(ITLB_MISS_KERNEL) || defined(CONFIG_SWAP) || defined(CONFIG_HUGETLBFS) mtspr SPRN_SPRG_SCRATCH1, r11 -#endif /* If we are faulting a kernel address, we have to use the * kernel page tables. @@ -224,25 +222,13 @@ InstructionTLBMiss: 3: mtcr r11 #endif -#if defined(CONFIG_HUGETLBFS) || !defined(CONFIG_PIN_TLB_TEXT) lwz r11, (swapper_pg_dir-PAGE_OFFSET)@l(r10) /* Get level 1 entry */ mtspr SPRN_MD_TWC, r11 -#else - lwz r10, (swapper_pg_dir-PAGE_OFFSET)@l(r10) /* Get level 1 entry */ - mtspr SPRN_MI_TWC, r10 /* Set segment attributes */ - mtspr SPRN_MD_TWC, r10 -#endif mfspr r10, SPRN_MD_TWC lwz r10, 0(r10) /* Get the pte */ -#if defined(CONFIG_HUGETLBFS) || !defined(CONFIG_PIN_TLB_TEXT) + rlwimi r11, r10, 0, _PAGE_GUARDED | _PAGE_ACCESSED rlwimi r11, r10, 32 - 9, _PMD_PAGE_512K mtspr SPRN_MI_TWC, r11 -#endif -#ifdef CONFIG_SWAP - rlwinm r11, r10, 32-5, _PAGE_PRESENT - and r11, r11, r10 - rlwimi r10, r11, 0, _PAGE_PRESENT -#endif /* The Linux PTE won't go exactly into the MMU TLB. * Software indicator bits 20 and 23 must be clear. * Software indicator bits 22, 24, 25, 26, and 27 must be @@ -256,9 +242,7 @@ InstructionTLBMiss: /* Restore registers */ 0: mfspr r10, SPRN_SPRG_SCRATCH0 -#if defined(ITLB_MISS_KERNEL) || defined(CONFIG_SWAP) || defined(CONFIG_HUGETLBFS) mfspr r11, SPRN_SPRG_SCRATCH1 -#endif rfi patch_site 0b, patch__itlbmiss_exit_1 @@ -268,9 +252,7 @@ InstructionTLBMiss: addi r10, r10, 1 stw r10, (itlb_miss_counter - PAGE_OFFSET)@l(0) mfspr r10, SPRN_SPRG_SCRATCH0 -#if defined(ITLB_MISS_KERNEL) || defined(CONFIG_SWAP) mfspr r11, SPRN_SPRG_SCRATCH1 -#endif rfi #endif @@ -297,30 +279,16 @@ DataStoreTLBMiss: mfspr r10, SPRN_MD_TWC lwz r10, 0(r10) /* Get the pte */ - /* Insert the Guarded flag into the TWC from the Linux PTE. + /* Insert Guarded and Accessed flags into the TWC from the Linux PTE. * It is bit 27 of both the Linux PTE and the TWC (at least * I got that right :-). It will be better when we can put * this into the Linux pgd/pmd and load it in the operation * above. */ - rlwimi r11, r10, 0, _PAGE_GUARDED + rlwimi r11, r10, 0, _PAGE_GUARDED | _PAGE_ACCESSED rlwimi r11, r10, 32 - 9, _PMD_PAGE_512K mtspr SPRN_MD_TWC, r11 - /* Both _PAGE_ACCESSED and _PAGE_PRESENT has to be set. - * We also need to know if the insn is a load/store, so: - * Clear _PAGE_PRESENT and load that which will - * trap into DTLB Error with store bit set accordinly. - */ - /* PRESENT=0x1, ACCESSED=0x20 - * r11 = ((r10 & PRESENT) & ((r10 & ACCESSED) >> 5)); - * r10 = (r10 & ~PRESENT) | r11; - */ -#ifdef CONFIG_SWAP - rlwinm r11, r10, 32-5, _PAGE_PRESENT - and r11, r11, r10 - rlwimi r10, r11, 0, _PAGE_PRESENT -#endif /* The Linux PTE won't go exactly into the MMU TLB. * Software indicator bits 24, 25, 26, and 27 must be * set. All other Linux PTE bits control the behavior @@ -711,7 +679,7 @@ initial_mmu: li r9, 4 /* up to 4 pages of 8M */ mtctr r9 lis r9, KERNELBASE@h /* Create vaddr for TLB */ - li r10, MI_PS8MEG | MI_SVALID /* Set 8M byte page */ + li r10, MI_PS8MEG | _PMD_ACCESSED | MI_SVALID li r11, MI_BOOTINIT /* Create RPN for address 0 */ 1: mtspr SPRN_MI_CTR, r8 /* Set instruction MMU control */ @@ -775,7 +743,7 @@ _GLOBAL(mmu_pin_tlb) #ifdef CONFIG_PIN_TLB_TEXT LOAD_REG_IMMEDIATE(r5, 28 << 8) LOAD_REG_IMMEDIATE(r6, PAGE_OFFSET) - LOAD_REG_IMMEDIATE(r7, MI_SVALID | MI_PS8MEG) + LOAD_REG_IMMEDIATE(r7, MI_SVALID | MI_PS8MEG | _PMD_ACCESSED) LOAD_REG_IMMEDIATE(r8, 0xf0 | _PAGE_RO | _PAGE_SPS | _PAGE_SH | _PAGE_PRESENT) LOAD_REG_ADDR(r9, _sinittext) li r0, 4 @@ -797,7 +765,7 @@ _GLOBAL(mmu_pin_tlb) LOAD_REG_IMMEDIATE(r5, 28 << 8 | MD_TWAM) #ifdef CONFIG_PIN_TLB_DATA LOAD_REG_IMMEDIATE(r6, PAGE_OFFSET) - LOAD_REG_IMMEDIATE(r7, MI_SVALID | MI_PS8MEG) + LOAD_REG_IMMEDIATE(r7, MI_SVALID | MI_PS8MEG | _PMD_ACCESSED) #ifdef CONFIG_PIN_TLB_IMMR li r0, 3 #else @@ -834,7 +802,7 @@ _GLOBAL(mmu_pin_tlb) #endif #ifdef CONFIG_PIN_TLB_IMMR LOAD_REG_IMMEDIATE(r0, VIRT_IMMR_BASE | MD_EVALID) - LOAD_REG_IMMEDIATE(r7, MD_SVALID | MD_PS512K | MD_GUARDED) + LOAD_REG_IMMEDIATE(r7, MD_SVALID | MD_PS512K | MD_GUARDED | _PMD_ACCESSED) mfspr r8, SPRN_IMMR rlwinm r8, r8, 0, 0xfff80000 ori r8, r8, 0xf0 | _PAGE_DIRTY | _PAGE_SPS | _PAGE_SH | \ diff --git a/arch/powerpc/kernel/head_book3s_32.S b/arch/powerpc/kernel/head_book3s_32.S index 5eb9eedac920..a0dda2a1f2df 100644 --- a/arch/powerpc/kernel/head_book3s_32.S +++ b/arch/powerpc/kernel/head_book3s_32.S @@ -156,6 +156,7 @@ __after_mmu_off: bl initial_bats bl load_segment_registers BEGIN_MMU_FTR_SECTION + bl reloc_offset bl early_hash_table END_MMU_FTR_SECTION_IFSET(MMU_FTR_HPTE_TABLE) #if defined(CONFIG_BOOTX_TEXT) @@ -457,11 +458,7 @@ InstructionTLBMiss: cmplw 0,r1,r3 #endif mfspr r2, SPRN_SPRG_PGDIR -#ifdef CONFIG_SWAP li r1,_PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_EXEC -#else - li r1,_PAGE_PRESENT | _PAGE_EXEC -#endif #if defined(CONFIG_MODULES) || defined(CONFIG_DEBUG_PAGEALLOC) bgt- 112f lis r2, (swapper_pg_dir - PAGE_OFFSET)@ha /* if kernel address, use */ @@ -523,11 +520,7 @@ DataLoadTLBMiss: lis r1, TASK_SIZE@h /* check if kernel address */ cmplw 0,r1,r3 mfspr r2, SPRN_SPRG_PGDIR -#ifdef CONFIG_SWAP li r1, _PAGE_PRESENT | _PAGE_ACCESSED -#else - li r1, _PAGE_PRESENT -#endif bgt- 112f lis r2, (swapper_pg_dir - PAGE_OFFSET)@ha /* if kernel address, use */ addi r2, r2, (swapper_pg_dir - PAGE_OFFSET)@l /* kernel page table */ @@ -603,11 +596,7 @@ DataStoreTLBMiss: lis r1, TASK_SIZE@h /* check if kernel address */ cmplw 0,r1,r3 mfspr r2, SPRN_SPRG_PGDIR -#ifdef CONFIG_SWAP li r1, _PAGE_RW | _PAGE_DIRTY | _PAGE_PRESENT | _PAGE_ACCESSED -#else - li r1, _PAGE_RW | _PAGE_DIRTY | _PAGE_PRESENT -#endif bgt- 112f lis r2, (swapper_pg_dir - PAGE_OFFSET)@ha /* if kernel address, use */ addi r2, r2, (swapper_pg_dir - PAGE_OFFSET)@l /* kernel page table */ @@ -932,7 +921,7 @@ early_hash_table: ori r6, r6, 3 /* 256kB table */ mtspr SPRN_SDR1, r6 lis r6, early_hash@h - lis r3, Hash@ha + addis r3, r3, Hash@ha stw r6, Hash@l(r3) blr diff --git a/arch/powerpc/kernel/idle.c b/arch/powerpc/kernel/idle.c index ae0e2632393d..1f835539fda4 100644 --- a/arch/powerpc/kernel/idle.c +++ b/arch/powerpc/kernel/idle.c @@ -52,9 +52,9 @@ void arch_cpu_idle(void) * interrupts enabled, some don't. */ if (irqs_disabled()) - local_irq_enable(); + raw_local_irq_enable(); } else { - local_irq_enable(); + raw_local_irq_enable(); /* * Go into low thread priority and possibly * low power mode. diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index bb9cab3641d7..74fd47f46fa5 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -945,7 +945,13 @@ early_initcall(disable_hardlockup_detector); static enum l1d_flush_type enabled_flush_types; static void *l1d_flush_fallback_area; static bool no_rfi_flush; +static bool no_entry_flush; +static bool no_uaccess_flush; bool rfi_flush; +bool entry_flush; +bool uaccess_flush; +DEFINE_STATIC_KEY_FALSE(uaccess_flush_key); +EXPORT_SYMBOL(uaccess_flush_key); static int __init handle_no_rfi_flush(char *p) { @@ -955,6 +961,22 @@ static int __init handle_no_rfi_flush(char *p) } early_param("no_rfi_flush", handle_no_rfi_flush); +static int __init handle_no_entry_flush(char *p) +{ + pr_info("entry-flush: disabled on command line."); + no_entry_flush = true; + return 0; +} +early_param("no_entry_flush", handle_no_entry_flush); + +static int __init handle_no_uaccess_flush(char *p) +{ + pr_info("uaccess-flush: disabled on command line."); + no_uaccess_flush = true; + return 0; +} +early_param("no_uaccess_flush", handle_no_uaccess_flush); + /* * The RFI flush is not KPTI, but because users will see doco that says to use * nopti we hijack that option here to also disable the RFI flush. @@ -986,6 +1008,32 @@ void rfi_flush_enable(bool enable) rfi_flush = enable; } +void entry_flush_enable(bool enable) +{ + if (enable) { + do_entry_flush_fixups(enabled_flush_types); + on_each_cpu(do_nothing, NULL, 1); + } else { + do_entry_flush_fixups(L1D_FLUSH_NONE); + } + + entry_flush = enable; +} + +void uaccess_flush_enable(bool enable) +{ + if (enable) { + do_uaccess_flush_fixups(enabled_flush_types); + static_branch_enable(&uaccess_flush_key); + on_each_cpu(do_nothing, NULL, 1); + } else { + static_branch_disable(&uaccess_flush_key); + do_uaccess_flush_fixups(L1D_FLUSH_NONE); + } + + uaccess_flush = enable; +} + static void __ref init_fallback_flush(void) { u64 l1d_size, limit; @@ -1044,10 +1092,28 @@ void setup_rfi_flush(enum l1d_flush_type types, bool enable) enabled_flush_types = types; - if (!no_rfi_flush && !cpu_mitigations_off()) + if (!cpu_mitigations_off() && !no_rfi_flush) rfi_flush_enable(enable); } +void setup_entry_flush(bool enable) +{ + if (cpu_mitigations_off()) + return; + + if (!no_entry_flush) + entry_flush_enable(enable); +} + +void setup_uaccess_flush(bool enable) +{ + if (cpu_mitigations_off()) + return; + + if (!no_uaccess_flush) + uaccess_flush_enable(enable); +} + #ifdef CONFIG_DEBUG_FS static int rfi_flush_set(void *data, u64 val) { @@ -1075,9 +1141,63 @@ static int rfi_flush_get(void *data, u64 *val) DEFINE_SIMPLE_ATTRIBUTE(fops_rfi_flush, rfi_flush_get, rfi_flush_set, "%llu\n"); +static int entry_flush_set(void *data, u64 val) +{ + bool enable; + + if (val == 1) + enable = true; + else if (val == 0) + enable = false; + else + return -EINVAL; + + /* Only do anything if we're changing state */ + if (enable != entry_flush) + entry_flush_enable(enable); + + return 0; +} + +static int entry_flush_get(void *data, u64 *val) +{ + *val = entry_flush ? 1 : 0; + return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(fops_entry_flush, entry_flush_get, entry_flush_set, "%llu\n"); + +static int uaccess_flush_set(void *data, u64 val) +{ + bool enable; + + if (val == 1) + enable = true; + else if (val == 0) + enable = false; + else + return -EINVAL; + + /* Only do anything if we're changing state */ + if (enable != uaccess_flush) + uaccess_flush_enable(enable); + + return 0; +} + +static int uaccess_flush_get(void *data, u64 *val) +{ + *val = uaccess_flush ? 1 : 0; + return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(fops_uaccess_flush, uaccess_flush_get, uaccess_flush_set, "%llu\n"); + static __init int rfi_flush_debugfs_init(void) { debugfs_create_file("rfi_flush", 0600, powerpc_debugfs_root, NULL, &fops_rfi_flush); + debugfs_create_file("entry_flush", 0600, powerpc_debugfs_root, NULL, &fops_entry_flush); + debugfs_create_file("uaccess_flush", 0600, powerpc_debugfs_root, NULL, &fops_uaccess_flush); return 0; } device_initcall(rfi_flush_debugfs_init); diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index 3c6b9822f978..8c2857cbd960 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -1393,13 +1393,14 @@ static void add_cpu_to_masks(int cpu) /* Activate a secondary processor. */ void start_secondary(void *unused) { - unsigned int cpu = smp_processor_id(); + unsigned int cpu = raw_smp_processor_id(); mmgrab(&init_mm); current->active_mm = &init_mm; smp_store_cpu_info(cpu); set_dec(tb_ticks_per_jiffy); + rcu_cpu_starting(cpu); preempt_disable(); cpu_callin_map[cpu] = 1; diff --git a/arch/powerpc/kernel/syscall_64.c b/arch/powerpc/kernel/syscall_64.c index 8e50818aa50b..310bcd768cd5 100644 --- a/arch/powerpc/kernel/syscall_64.c +++ b/arch/powerpc/kernel/syscall_64.c @@ -2,7 +2,7 @@ #include <linux/err.h> #include <asm/asm-prototypes.h> -#include <asm/book3s/64/kup-radix.h> +#include <asm/kup.h> #include <asm/cputime.h> #include <asm/hw_irq.h> #include <asm/kprobes.h> diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S index e0548b4950de..6db90cdf11da 100644 --- a/arch/powerpc/kernel/vmlinux.lds.S +++ b/arch/powerpc/kernel/vmlinux.lds.S @@ -132,6 +132,20 @@ SECTIONS } . = ALIGN(8); + __uaccess_flush_fixup : AT(ADDR(__uaccess_flush_fixup) - LOAD_OFFSET) { + __start___uaccess_flush_fixup = .; + *(__uaccess_flush_fixup) + __stop___uaccess_flush_fixup = .; + } + + . = ALIGN(8); + __entry_flush_fixup : AT(ADDR(__entry_flush_fixup) - LOAD_OFFSET) { + __start___entry_flush_fixup = .; + *(__entry_flush_fixup) + __stop___entry_flush_fixup = .; + } + + . = ALIGN(8); __stf_exit_barrier_fixup : AT(ADDR(__stf_exit_barrier_fixup) - LOAD_OFFSET) { __start___stf_exit_barrier_fixup = .; *(__stf_exit_barrier_fixup) diff --git a/arch/powerpc/kvm/book3s_xive.c b/arch/powerpc/kvm/book3s_xive.c index 85215e79db42..a0ebc29f30b2 100644 --- a/arch/powerpc/kvm/book3s_xive.c +++ b/arch/powerpc/kvm/book3s_xive.c @@ -1214,12 +1214,9 @@ void kvmppc_xive_cleanup_vcpu(struct kvm_vcpu *vcpu) static bool kvmppc_xive_vcpu_id_valid(struct kvmppc_xive *xive, u32 cpu) { /* We have a block of xive->nr_servers VPs. We just need to check - * raw vCPU ids are below the expected limit for this guest's - * core stride ; kvmppc_pack_vcpu_id() will pack them down to an - * index that can be safely used to compute a VP id that belongs - * to the VP block. + * packed vCPU ids are below that. */ - return cpu < xive->nr_servers * xive->kvm->arch.emul_smt_mode; + return kvmppc_pack_vcpu_id(xive->kvm, cpu) < xive->nr_servers; } int kvmppc_xive_compute_vp_id(struct kvmppc_xive *xive, u32 cpu, u32 *vp) diff --git a/arch/powerpc/kvm/book3s_xive_native.c b/arch/powerpc/kvm/book3s_xive_native.c index d0c2db0e07fa..a59a94f02733 100644 --- a/arch/powerpc/kvm/book3s_xive_native.c +++ b/arch/powerpc/kvm/book3s_xive_native.c @@ -251,6 +251,13 @@ static vm_fault_t xive_native_esb_fault(struct vm_fault *vmf) } state = &sb->irq_state[src]; + + /* Some sanity checking */ + if (!state->valid) { + pr_devel("%s: source %lx invalid !\n", __func__, irq); + return VM_FAULT_SIGBUS; + } + kvmppc_xive_select_irq(state, &hw_num, &xd); arch_spin_lock(&sb->lock); diff --git a/arch/powerpc/lib/feature-fixups.c b/arch/powerpc/lib/feature-fixups.c index 4c0a7ee9fa00..321c12a9ef6b 100644 --- a/arch/powerpc/lib/feature-fixups.c +++ b/arch/powerpc/lib/feature-fixups.c @@ -234,6 +234,110 @@ void do_stf_barrier_fixups(enum stf_barrier_type types) do_stf_exit_barrier_fixups(types); } +void do_uaccess_flush_fixups(enum l1d_flush_type types) +{ + unsigned int instrs[4], *dest; + long *start, *end; + int i; + + start = PTRRELOC(&__start___uaccess_flush_fixup); + end = PTRRELOC(&__stop___uaccess_flush_fixup); + + instrs[0] = 0x60000000; /* nop */ + instrs[1] = 0x60000000; /* nop */ + instrs[2] = 0x60000000; /* nop */ + instrs[3] = 0x4e800020; /* blr */ + + i = 0; + if (types == L1D_FLUSH_FALLBACK) { + instrs[3] = 0x60000000; /* nop */ + /* fallthrough to fallback flush */ + } + + if (types & L1D_FLUSH_ORI) { + instrs[i++] = 0x63ff0000; /* ori 31,31,0 speculation barrier */ + instrs[i++] = 0x63de0000; /* ori 30,30,0 L1d flush*/ + } + + if (types & L1D_FLUSH_MTTRIG) + instrs[i++] = 0x7c12dba6; /* mtspr TRIG2,r0 (SPR #882) */ + + for (i = 0; start < end; start++, i++) { + dest = (void *)start + *start; + + pr_devel("patching dest %lx\n", (unsigned long)dest); + + patch_instruction((struct ppc_inst *)dest, ppc_inst(instrs[0])); + + patch_instruction((struct ppc_inst *)(dest + 1), ppc_inst(instrs[1])); + patch_instruction((struct ppc_inst *)(dest + 2), ppc_inst(instrs[2])); + patch_instruction((struct ppc_inst *)(dest + 3), ppc_inst(instrs[3])); + } + + printk(KERN_DEBUG "uaccess-flush: patched %d locations (%s flush)\n", i, + (types == L1D_FLUSH_NONE) ? "no" : + (types == L1D_FLUSH_FALLBACK) ? "fallback displacement" : + (types & L1D_FLUSH_ORI) ? (types & L1D_FLUSH_MTTRIG) + ? "ori+mttrig type" + : "ori type" : + (types & L1D_FLUSH_MTTRIG) ? "mttrig type" + : "unknown"); +} + +void do_entry_flush_fixups(enum l1d_flush_type types) +{ + unsigned int instrs[3], *dest; + long *start, *end; + int i; + + start = PTRRELOC(&__start___entry_flush_fixup); + end = PTRRELOC(&__stop___entry_flush_fixup); + + instrs[0] = 0x60000000; /* nop */ + instrs[1] = 0x60000000; /* nop */ + instrs[2] = 0x60000000; /* nop */ + + i = 0; + if (types == L1D_FLUSH_FALLBACK) { + instrs[i++] = 0x7d4802a6; /* mflr r10 */ + instrs[i++] = 0x60000000; /* branch patched below */ + instrs[i++] = 0x7d4803a6; /* mtlr r10 */ + } + + if (types & L1D_FLUSH_ORI) { + instrs[i++] = 0x63ff0000; /* ori 31,31,0 speculation barrier */ + instrs[i++] = 0x63de0000; /* ori 30,30,0 L1d flush*/ + } + + if (types & L1D_FLUSH_MTTRIG) + instrs[i++] = 0x7c12dba6; /* mtspr TRIG2,r0 (SPR #882) */ + + for (i = 0; start < end; start++, i++) { + dest = (void *)start + *start; + + pr_devel("patching dest %lx\n", (unsigned long)dest); + + patch_instruction((struct ppc_inst *)dest, ppc_inst(instrs[0])); + + if (types == L1D_FLUSH_FALLBACK) + patch_branch((struct ppc_inst *)(dest + 1), (unsigned long)&entry_flush_fallback, + BRANCH_SET_LINK); + else + patch_instruction((struct ppc_inst *)(dest + 1), ppc_inst(instrs[1])); + + patch_instruction((struct ppc_inst *)(dest + 2), ppc_inst(instrs[2])); + } + + printk(KERN_DEBUG "entry-flush: patched %d locations (%s flush)\n", i, + (types == L1D_FLUSH_NONE) ? "no" : + (types == L1D_FLUSH_FALLBACK) ? "fallback displacement" : + (types & L1D_FLUSH_ORI) ? (types & L1D_FLUSH_MTTRIG) + ? "ori+mttrig type" + : "ori type" : + (types & L1D_FLUSH_MTTRIG) ? "mttrig type" + : "unknown"); +} + void do_rfi_flush_fixups(enum l1d_flush_type types) { unsigned int instrs[3], *dest; diff --git a/arch/powerpc/mm/Makefile b/arch/powerpc/mm/Makefile index 5e147986400d..55b4a8bd408a 100644 --- a/arch/powerpc/mm/Makefile +++ b/arch/powerpc/mm/Makefile @@ -5,7 +5,7 @@ ccflags-$(CONFIG_PPC64) := $(NO_MINIMAL_TOC) -obj-y := fault.o mem.o pgtable.o mmap.o \ +obj-y := fault.o mem.o pgtable.o mmap.o maccess.o \ init_$(BITS).o pgtable_$(BITS).o \ pgtable-frag.o ioremap.o ioremap_$(BITS).o \ init-common.o mmu_context.o drmem.o diff --git a/arch/powerpc/mm/book3s64/hash_native.c b/arch/powerpc/mm/book3s64/hash_native.c index 0203cdf48c54..52e170bd95ae 100644 --- a/arch/powerpc/mm/book3s64/hash_native.c +++ b/arch/powerpc/mm/book3s64/hash_native.c @@ -68,7 +68,7 @@ static __always_inline void tlbiel_hash_set_isa300(unsigned int set, unsigned in rs = ((unsigned long)pid << PPC_BITLSHIFT(31)); asm volatile(PPC_TLBIEL(%0, %1, %2, %3, %4) - : : "r"(rb), "r"(rs), "i"(ric), "i"(prs), "r"(r) + : : "r"(rb), "r"(rs), "i"(ric), "i"(prs), "i"(r) : "memory"); } @@ -92,16 +92,15 @@ static void tlbiel_all_isa300(unsigned int num_sets, unsigned int is) asm volatile("ptesync": : :"memory"); /* - * Flush the first set of the TLB, and any caching of partition table - * entries. Then flush the remaining sets of the TLB. Hash mode uses - * partition scoped TLB translations. + * Flush the partition table cache if this is HV mode. */ - tlbiel_hash_set_isa300(0, is, 0, 2, 0); - for (set = 1; set < num_sets; set++) - tlbiel_hash_set_isa300(set, is, 0, 0, 0); + if (early_cpu_has_feature(CPU_FTR_HVMODE)) + tlbiel_hash_set_isa300(0, is, 0, 2, 0); /* - * Now invalidate the process table cache. + * Now invalidate the process table cache. UPRT=0 HPT modes (what + * current hardware implements) do not use the process table, but + * add the flushes anyway. * * From ISA v3.0B p. 1078: * The following forms are invalid. @@ -110,6 +109,14 @@ static void tlbiel_all_isa300(unsigned int num_sets, unsigned int is) */ tlbiel_hash_set_isa300(0, is, 0, 2, 1); + /* + * Then flush the sets of the TLB proper. Hash mode uses + * partition scoped TLB translations, which may be flushed + * in !HV mode. + */ + for (set = 0; set < num_sets; set++) + tlbiel_hash_set_isa300(set, is, 0, 0, 0); + ppc_after_tlbiel_barrier(); asm volatile(PPC_ISA_3_0_INVALIDATE_ERAT "; isync" : : :"memory"); diff --git a/arch/powerpc/mm/book3s64/mmu_context.c b/arch/powerpc/mm/book3s64/mmu_context.c index 1c54821de7bf..0c8557220ae2 100644 --- a/arch/powerpc/mm/book3s64/mmu_context.c +++ b/arch/powerpc/mm/book3s64/mmu_context.c @@ -17,6 +17,7 @@ #include <linux/export.h> #include <linux/gfp.h> #include <linux/slab.h> +#include <linux/cpu.h> #include <asm/mmu_context.h> #include <asm/pgalloc.h> @@ -307,3 +308,22 @@ void radix__switch_mmu_context(struct mm_struct *prev, struct mm_struct *next) isync(); } #endif + +/** + * cleanup_cpu_mmu_context - Clean up MMU details for this CPU (newly offlined) + * + * This clears the CPU from mm_cpumask for all processes, and then flushes the + * local TLB to ensure TLB coherency in case the CPU is onlined again. + * + * KVM guest translations are not necessarily flushed here. If KVM started + * using mm_cpumask or the Linux APIs which do, this would have to be resolved. + */ +#ifdef CONFIG_HOTPLUG_CPU +void cleanup_cpu_mmu_context(void) +{ + int cpu = smp_processor_id(); + + clear_tasks_mm_cpumask(cpu); + tlbiel_all(); +} +#endif diff --git a/arch/powerpc/mm/maccess.c b/arch/powerpc/mm/maccess.c new file mode 100644 index 000000000000..fa9a7a718fc6 --- /dev/null +++ b/arch/powerpc/mm/maccess.c @@ -0,0 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0-only + +#include <linux/uaccess.h> +#include <linux/kernel.h> + +bool copy_from_kernel_nofault_allowed(const void *unsafe_src, size_t size) +{ + return is_kernel_addr((unsigned long)unsafe_src); +} diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index 01ec2a252f09..3fc325bebe4d 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -50,6 +50,7 @@ #include <asm/rtas.h> #include <asm/kasan.h> #include <asm/svm.h> +#include <asm/mmzone.h> #include <mm/mmu_decl.h> diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c index 63f61d8b55e5..f2bf98bdcea2 100644 --- a/arch/powerpc/mm/numa.c +++ b/arch/powerpc/mm/numa.c @@ -742,8 +742,7 @@ static int __init parse_numa_properties(void) of_node_put(cpu); } - if (likely(nid > 0)) - node_set_online(nid); + node_set_online(nid); } get_n_mem_cells(&n_mem_addr_cells, &n_mem_size_cells); diff --git a/arch/powerpc/perf/imc-pmu.c b/arch/powerpc/perf/imc-pmu.c index 9ed4fcccf8a9..7b25548ec42b 100644 --- a/arch/powerpc/perf/imc-pmu.c +++ b/arch/powerpc/perf/imc-pmu.c @@ -1336,7 +1336,7 @@ static void dump_trace_imc_data(struct perf_event *event) /* If this is a valid record, create the sample */ struct perf_output_handle handle; - if (perf_output_begin(&handle, event, header.size)) + if (perf_output_begin(&handle, &data, event, header.size)) return; perf_output_sample(&handle, &header, &data, event); diff --git a/arch/powerpc/perf/perf_regs.c b/arch/powerpc/perf/perf_regs.c index 8e53f2fc3fe0..6f681b105eec 100644 --- a/arch/powerpc/perf/perf_regs.c +++ b/arch/powerpc/perf/perf_regs.c @@ -144,8 +144,7 @@ u64 perf_reg_abi(struct task_struct *task) } void perf_get_regs_user(struct perf_regs *regs_user, - struct pt_regs *regs, - struct pt_regs *regs_user_copy) + struct pt_regs *regs) { regs_user->regs = task_pt_regs(current); regs_user->abi = (regs_user->regs) ? perf_reg_abi(current) : diff --git a/arch/powerpc/platforms/powermac/smp.c b/arch/powerpc/platforms/powermac/smp.c index 74ebe664b016..adae2a6712e1 100644 --- a/arch/powerpc/platforms/powermac/smp.c +++ b/arch/powerpc/platforms/powermac/smp.c @@ -911,6 +911,8 @@ static int smp_core99_cpu_disable(void) mpic_cpu_set_priority(0xf); + cleanup_cpu_mmu_context(); + return 0; } diff --git a/arch/powerpc/platforms/powernv/setup.c b/arch/powerpc/platforms/powernv/setup.c index 9acaa0f131b9..4426a109ec2f 100644 --- a/arch/powerpc/platforms/powernv/setup.c +++ b/arch/powerpc/platforms/powernv/setup.c @@ -98,7 +98,7 @@ static void init_fw_feat_flags(struct device_node *np) security_ftr_clear(SEC_FTR_BNDS_CHK_SPEC_BAR); } -static void pnv_setup_rfi_flush(void) +static void pnv_setup_security_mitigations(void) { struct device_node *np, *fw_features; enum l1d_flush_type type; @@ -122,12 +122,31 @@ static void pnv_setup_rfi_flush(void) type = L1D_FLUSH_ORI; } + /* + * If we are non-Power9 bare metal, we don't need to flush on kernel + * entry or after user access: they fix a P9 specific vulnerability. + */ + if (!pvr_version_is(PVR_POWER9)) { + security_ftr_clear(SEC_FTR_L1D_FLUSH_ENTRY); + security_ftr_clear(SEC_FTR_L1D_FLUSH_UACCESS); + } + enable = security_ftr_enabled(SEC_FTR_FAVOUR_SECURITY) && \ (security_ftr_enabled(SEC_FTR_L1D_FLUSH_PR) || \ security_ftr_enabled(SEC_FTR_L1D_FLUSH_HV)); setup_rfi_flush(type, enable); setup_count_cache_flush(); + + enable = security_ftr_enabled(SEC_FTR_FAVOUR_SECURITY) && + security_ftr_enabled(SEC_FTR_L1D_FLUSH_ENTRY); + setup_entry_flush(enable); + + enable = security_ftr_enabled(SEC_FTR_FAVOUR_SECURITY) && + security_ftr_enabled(SEC_FTR_L1D_FLUSH_UACCESS); + setup_uaccess_flush(enable); + + setup_stf_barrier(); } static void __init pnv_check_guarded_cores(void) @@ -156,8 +175,7 @@ static void __init pnv_setup_arch(void) { set_arch_panic_timeout(10, ARCH_PANIC_TIMEOUT); - pnv_setup_rfi_flush(); - setup_stf_barrier(); + pnv_setup_security_mitigations(); /* Initialize SMP */ pnv_smp_init(); @@ -193,11 +211,16 @@ static void __init pnv_init(void) add_preferred_console("hvc", 0, NULL); if (!radix_enabled()) { + size_t size = sizeof(struct slb_entry) * mmu_slb_size; int i; /* Allocate per cpu area to save old slb contents during MCE */ - for_each_possible_cpu(i) - paca_ptrs[i]->mce_faulty_slbs = memblock_alloc_node(mmu_slb_size, __alignof__(*paca_ptrs[i]->mce_faulty_slbs), cpu_to_node(i)); + for_each_possible_cpu(i) { + paca_ptrs[i]->mce_faulty_slbs = + memblock_alloc_node(size, + __alignof__(struct slb_entry), + cpu_to_node(i)); + } } } diff --git a/arch/powerpc/platforms/powernv/smp.c b/arch/powerpc/platforms/powernv/smp.c index 54c4ba45c7ce..cbb67813cd5d 100644 --- a/arch/powerpc/platforms/powernv/smp.c +++ b/arch/powerpc/platforms/powernv/smp.c @@ -143,6 +143,9 @@ static int pnv_smp_cpu_disable(void) xive_smp_disable_cpu(); else xics_migrate_irqs_away(); + + cleanup_cpu_mmu_context(); + return 0; } diff --git a/arch/powerpc/platforms/pseries/hotplug-cpu.c b/arch/powerpc/platforms/pseries/hotplug-cpu.c index f2837e33bf5d..a02012f1b04a 100644 --- a/arch/powerpc/platforms/pseries/hotplug-cpu.c +++ b/arch/powerpc/platforms/pseries/hotplug-cpu.c @@ -90,6 +90,9 @@ static int pseries_cpu_disable(void) xive_smp_disable_cpu(); else xics_migrate_irqs_away(); + + cleanup_cpu_mmu_context(); + return 0; } diff --git a/arch/powerpc/platforms/pseries/mobility.c b/arch/powerpc/platforms/pseries/mobility.c index d6f4162478a5..2f73cb5bf12d 100644 --- a/arch/powerpc/platforms/pseries/mobility.c +++ b/arch/powerpc/platforms/pseries/mobility.c @@ -349,8 +349,8 @@ void post_mobility_fixup(void) cpus_read_unlock(); - /* Possibly switch to a new RFI flush type */ - pseries_setup_rfi_flush(); + /* Possibly switch to a new L1 flush type */ + pseries_setup_security_mitigations(); /* Reinitialise system information for hv-24x7 */ read_24x7_sys_info(); diff --git a/arch/powerpc/platforms/pseries/msi.c b/arch/powerpc/platforms/pseries/msi.c index 133f6adcb39c..b3ac2455faad 100644 --- a/arch/powerpc/platforms/pseries/msi.c +++ b/arch/powerpc/platforms/pseries/msi.c @@ -458,7 +458,8 @@ again: return hwirq; } - virq = irq_create_mapping(NULL, hwirq); + virq = irq_create_mapping_affinity(NULL, hwirq, + entry->affinity); if (!virq) { pr_debug("rtas_msi: Failed mapping hwirq %d\n", hwirq); diff --git a/arch/powerpc/platforms/pseries/pseries.h b/arch/powerpc/platforms/pseries/pseries.h index 13fa370a87e4..593840847cd3 100644 --- a/arch/powerpc/platforms/pseries/pseries.h +++ b/arch/powerpc/platforms/pseries/pseries.h @@ -111,7 +111,7 @@ static inline unsigned long cmo_get_page_size(void) int dlpar_workqueue_init(void); -void pseries_setup_rfi_flush(void); +void pseries_setup_security_mitigations(void); void pseries_lpar_read_hblkrm_characteristics(void); #endif /* _PSERIES_PSERIES_H */ diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c index 633c45ec406d..090c13f6c881 100644 --- a/arch/powerpc/platforms/pseries/setup.c +++ b/arch/powerpc/platforms/pseries/setup.c @@ -542,7 +542,7 @@ static void init_cpu_char_feature_flags(struct h_cpu_char_result *result) security_ftr_clear(SEC_FTR_BNDS_CHK_SPEC_BAR); } -void pseries_setup_rfi_flush(void) +void pseries_setup_security_mitigations(void) { struct h_cpu_char_result result; enum l1d_flush_type types; @@ -579,6 +579,16 @@ void pseries_setup_rfi_flush(void) setup_rfi_flush(types, enable); setup_count_cache_flush(); + + enable = security_ftr_enabled(SEC_FTR_FAVOUR_SECURITY) && + security_ftr_enabled(SEC_FTR_L1D_FLUSH_ENTRY); + setup_entry_flush(enable); + + enable = security_ftr_enabled(SEC_FTR_FAVOUR_SECURITY) && + security_ftr_enabled(SEC_FTR_L1D_FLUSH_UACCESS); + setup_uaccess_flush(enable); + + setup_stf_barrier(); } #ifdef CONFIG_PCI_IOV @@ -768,8 +778,7 @@ static void __init pSeries_setup_arch(void) fwnmi_init(); - pseries_setup_rfi_flush(); - setup_stf_barrier(); + pseries_setup_security_mitigations(); pseries_lpar_read_hblkrm_characteristics(); /* By default, only probe PCI (can be overridden by rtas_pci) */ diff --git a/arch/riscv/include/asm/pgtable-32.h b/arch/riscv/include/asm/pgtable-32.h index b0ab66e5fdb1..5b2e79e5bfa5 100644 --- a/arch/riscv/include/asm/pgtable-32.h +++ b/arch/riscv/include/asm/pgtable-32.h @@ -14,4 +14,6 @@ #define PGDIR_SIZE (_AC(1, UL) << PGDIR_SHIFT) #define PGDIR_MASK (~(PGDIR_SIZE - 1)) +#define MAX_POSSIBLE_PHYSMEM_BITS 34 + #endif /* _ASM_RISCV_PGTABLE_32_H */ diff --git a/arch/riscv/include/asm/timex.h b/arch/riscv/include/asm/timex.h index ab104905d4db..81de51e6aa32 100644 --- a/arch/riscv/include/asm/timex.h +++ b/arch/riscv/include/asm/timex.h @@ -60,6 +60,8 @@ static inline u32 get_cycles_hi(void) } #define get_cycles_hi get_cycles_hi +#endif /* !CONFIG_RISCV_M_MODE */ + #ifdef CONFIG_64BIT static inline u64 get_cycles64(void) { @@ -79,8 +81,6 @@ static inline u64 get_cycles64(void) } #endif /* CONFIG_64BIT */ -#endif /* !CONFIG_RISCV_M_MODE */ - #define ARCH_HAS_READ_CURRENT_TIMER static inline int read_current_timer(unsigned long *timer_val) { diff --git a/arch/riscv/include/asm/uaccess.h b/arch/riscv/include/asm/uaccess.h index c47e6b35c551..824b2c9da75b 100644 --- a/arch/riscv/include/asm/uaccess.h +++ b/arch/riscv/include/asm/uaccess.h @@ -476,7 +476,7 @@ do { \ do { \ long __kr_err; \ \ - __put_user_nocheck(*((type *)(dst)), (type *)(src), __kr_err); \ + __put_user_nocheck(*((type *)(src)), (type *)(dst), __kr_err); \ if (unlikely(__kr_err)) \ goto err_label; \ } while (0) diff --git a/arch/riscv/include/asm/vdso/processor.h b/arch/riscv/include/asm/vdso/processor.h index 82a5693b1861..134388cbaaa1 100644 --- a/arch/riscv/include/asm/vdso/processor.h +++ b/arch/riscv/include/asm/vdso/processor.h @@ -4,6 +4,8 @@ #ifndef __ASSEMBLY__ +#include <asm/barrier.h> + static inline void cpu_relax(void) { #ifdef __riscv_muldiv diff --git a/arch/riscv/kernel/ftrace.c b/arch/riscv/kernel/ftrace.c index 99e12faa5498..765b62434f30 100644 --- a/arch/riscv/kernel/ftrace.c +++ b/arch/riscv/kernel/ftrace.c @@ -1,4 +1,4 @@ -/* SPDX-License-Identifier: GPL-2.0 */ +// SPDX-License-Identifier: GPL-2.0 /* * Copyright (C) 2013 Linaro Limited * Author: AKASHI Takahiro <takahiro.akashi@linaro.org> diff --git a/arch/riscv/kernel/head.S b/arch/riscv/kernel/head.S index 11e2a4fe66e0..7e849797c9c3 100644 --- a/arch/riscv/kernel/head.S +++ b/arch/riscv/kernel/head.S @@ -35,6 +35,10 @@ ENTRY(_start) .word 0 #endif .balign 8 +#ifdef CONFIG_RISCV_M_MODE + /* Image load offset (0MB) from start of RAM for M-mode */ + .dword 0 +#else #if __riscv_xlen == 64 /* Image load offset(2MB) from start of RAM */ .dword 0x200000 @@ -42,6 +46,7 @@ ENTRY(_start) /* Image load offset(4MB) from start of RAM */ .dword 0x400000 #endif +#endif /* Effective size of kernel image */ .dword _end - _start .dword __HEAD_FLAGS diff --git a/arch/riscv/kernel/perf_regs.c b/arch/riscv/kernel/perf_regs.c index 04a38fbeb9c7..fd304a248de6 100644 --- a/arch/riscv/kernel/perf_regs.c +++ b/arch/riscv/kernel/perf_regs.c @@ -36,8 +36,7 @@ u64 perf_reg_abi(struct task_struct *task) } void perf_get_regs_user(struct perf_regs *regs_user, - struct pt_regs *regs, - struct pt_regs *regs_user_copy) + struct pt_regs *regs) { regs_user->regs = task_pt_regs(current); regs_user->abi = perf_reg_abi(current); diff --git a/arch/riscv/kernel/process.c b/arch/riscv/kernel/process.c index 19225ec65db6..dd5f985b1f40 100644 --- a/arch/riscv/kernel/process.c +++ b/arch/riscv/kernel/process.c @@ -36,7 +36,7 @@ extern asmlinkage void ret_from_kernel_thread(void); void arch_cpu_idle(void) { wait_for_interrupt(); - local_irq_enable(); + raw_local_irq_enable(); } void show_regs(struct pt_regs *regs) diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c index c424cc6dd833..117f3212a8e4 100644 --- a/arch/riscv/kernel/setup.c +++ b/arch/riscv/kernel/setup.c @@ -75,6 +75,7 @@ void __init setup_arch(char **cmdline_p) *cmdline_p = boot_command_line; early_ioremap_setup(); + jump_label_init(); parse_early_param(); efi_init(); diff --git a/arch/riscv/kernel/vdso/.gitignore b/arch/riscv/kernel/vdso/.gitignore index 11ebee9e4c1d..3a19def868ec 100644 --- a/arch/riscv/kernel/vdso/.gitignore +++ b/arch/riscv/kernel/vdso/.gitignore @@ -1,3 +1,4 @@ # SPDX-License-Identifier: GPL-2.0-only vdso.lds *.tmp +vdso-syms.S diff --git a/arch/riscv/kernel/vdso/Makefile b/arch/riscv/kernel/vdso/Makefile index 7d6a94d45ec9..0cfd6da784f8 100644 --- a/arch/riscv/kernel/vdso/Makefile +++ b/arch/riscv/kernel/vdso/Makefile @@ -43,19 +43,14 @@ $(obj)/vdso.o: $(obj)/vdso.so SYSCFLAGS_vdso.so.dbg = $(c_flags) $(obj)/vdso.so.dbg: $(src)/vdso.lds $(obj-vdso) FORCE $(call if_changed,vdsold) +SYSCFLAGS_vdso.so.dbg = -shared -s -Wl,-soname=linux-vdso.so.1 \ + -Wl,--build-id=sha1 -Wl,--hash-style=both # We also create a special relocatable object that should mirror the symbol # table and layout of the linked DSO. With ld --just-symbols we can then # refer to these symbols in the kernel code rather than hand-coded addresses. - -SYSCFLAGS_vdso.so.dbg = -shared -s -Wl,-soname=linux-vdso.so.1 \ - -Wl,--build-id=sha1 -Wl,--hash-style=both -$(obj)/vdso-dummy.o: $(src)/vdso.lds $(obj)/rt_sigreturn.o FORCE - $(call if_changed,vdsold) - -LDFLAGS_vdso-syms.o := -r --just-symbols -$(obj)/vdso-syms.o: $(obj)/vdso-dummy.o FORCE - $(call if_changed,ld) +$(obj)/vdso-syms.S: $(obj)/vdso.so FORCE + $(call if_changed,so2s) # strip rule for the .so file $(obj)/%.so: OBJCOPYFLAGS := -S @@ -73,6 +68,11 @@ quiet_cmd_vdsold = VDSOLD $@ $(patsubst %, -G __vdso_%, $(vdso-syms)) $@.tmp $@ && \ rm $@.tmp +# Extracts symbol offsets from the VDSO, converting them into an assembly file +# that contains the same symbols at the same offsets. +quiet_cmd_so2s = SO2S $@ + cmd_so2s = $(NM) -D $< | $(srctree)/$(src)/so2s.sh > $@ + # install commands for the unstripped file quiet_cmd_vdso_install = INSTALL $@ cmd_vdso_install = cp $(obj)/$@.dbg $(MODLIB)/vdso/$@ diff --git a/arch/riscv/kernel/vdso/so2s.sh b/arch/riscv/kernel/vdso/so2s.sh new file mode 100755 index 000000000000..e64cb6d9440e --- /dev/null +++ b/arch/riscv/kernel/vdso/so2s.sh @@ -0,0 +1,6 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0+ +# Copyright 2020 Palmer Dabbelt <palmerdabbelt@google.com> + +sed 's!\([0-9a-f]*\) T \([a-z0-9_]*\)\(@@LINUX_4.15\)*!.global \2\n.set \2,0x\1!' \ +| grep '^\.' diff --git a/arch/riscv/mm/fault.c b/arch/riscv/mm/fault.c index 1359e21c0c62..3c8b9e433c67 100644 --- a/arch/riscv/mm/fault.c +++ b/arch/riscv/mm/fault.c @@ -86,6 +86,7 @@ static inline void vmalloc_fault(struct pt_regs *regs, int code, unsigned long a pmd_t *pmd, *pmd_k; pte_t *pte_k; int index; + unsigned long pfn; /* User mode accesses just cause a SIGSEGV */ if (user_mode(regs)) @@ -100,7 +101,8 @@ static inline void vmalloc_fault(struct pt_regs *regs, int code, unsigned long a * of a task switch. */ index = pgd_index(addr); - pgd = (pgd_t *)pfn_to_virt(csr_read(CSR_SATP)) + index; + pfn = csr_read(CSR_SATP) & SATP_PPN; + pgd = (pgd_t *)pfn_to_virt(pfn) + index; pgd_k = init_mm.pgd + index; if (!pgd_present(*pgd_k)) { diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c index ea933b789a88..8e577f14f120 100644 --- a/arch/riscv/mm/init.c +++ b/arch/riscv/mm/init.c @@ -154,9 +154,8 @@ disable: void __init setup_bootmem(void) { - phys_addr_t mem_size = 0; - phys_addr_t total_mem = 0; - phys_addr_t mem_start, start, end = 0; + phys_addr_t mem_start = 0; + phys_addr_t start, end = 0; phys_addr_t vmlinux_end = __pa_symbol(&_end); phys_addr_t vmlinux_start = __pa_symbol(&_start); u64 i; @@ -164,21 +163,18 @@ void __init setup_bootmem(void) /* Find the memory region containing the kernel */ for_each_mem_range(i, &start, &end) { phys_addr_t size = end - start; - if (!total_mem) + if (!mem_start) mem_start = start; if (start <= vmlinux_start && vmlinux_end <= end) BUG_ON(size == 0); - total_mem = total_mem + size; } /* - * Remove memblock from the end of usable area to the - * end of region + * The maximal physical memory size is -PAGE_OFFSET. + * Make sure that any memory beyond mem_start + (-PAGE_OFFSET) is removed + * as it is unusable by kernel. */ - mem_size = min(total_mem, (phys_addr_t)-PAGE_OFFSET); - if (mem_start + mem_size < end) - memblock_remove(mem_start + mem_size, - end - mem_start - mem_size); + memblock_enforce_memory_limit(mem_start - PAGE_OFFSET); /* Reserve from the start of the kernel to the end of the kernel */ memblock_reserve(vmlinux_start, vmlinux_end - vmlinux_start); @@ -297,6 +293,7 @@ pmd_t fixmap_pmd[PTRS_PER_PMD] __page_aligned_bss; #define NUM_EARLY_PMDS (1UL + MAX_EARLY_MAPPING_SIZE / PGDIR_SIZE) #endif pmd_t early_pmd[PTRS_PER_PMD * NUM_EARLY_PMDS] __initdata __aligned(PAGE_SIZE); +pmd_t early_dtb_pmd[PTRS_PER_PMD] __initdata __aligned(PAGE_SIZE); static pmd_t *__init get_pmd_virt_early(phys_addr_t pa) { @@ -494,6 +491,18 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa) load_pa + (va - PAGE_OFFSET), map_size, PAGE_KERNEL_EXEC); +#ifndef __PAGETABLE_PMD_FOLDED + /* Setup early PMD for DTB */ + create_pgd_mapping(early_pg_dir, DTB_EARLY_BASE_VA, + (uintptr_t)early_dtb_pmd, PGDIR_SIZE, PAGE_TABLE); + /* Create two consecutive PMD mappings for FDT early scan */ + pa = dtb_pa & ~(PMD_SIZE - 1); + create_pmd_mapping(early_dtb_pmd, DTB_EARLY_BASE_VA, + pa, PMD_SIZE, PAGE_KERNEL); + create_pmd_mapping(early_dtb_pmd, DTB_EARLY_BASE_VA + PMD_SIZE, + pa + PMD_SIZE, PMD_SIZE, PAGE_KERNEL); + dtb_early_va = (void *)DTB_EARLY_BASE_VA + (dtb_pa & (PMD_SIZE - 1)); +#else /* Create two consecutive PGD mappings for FDT early scan */ pa = dtb_pa & ~(PGDIR_SIZE - 1); create_pgd_mapping(early_pg_dir, DTB_EARLY_BASE_VA, @@ -501,6 +510,7 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa) create_pgd_mapping(early_pg_dir, DTB_EARLY_BASE_VA + PGDIR_SIZE, pa + PGDIR_SIZE, PGDIR_SIZE, PAGE_KERNEL); dtb_early_va = (void *)DTB_EARLY_BASE_VA + (dtb_pa & (PGDIR_SIZE - 1)); +#endif dtb_early_pa = dtb_pa; /* diff --git a/arch/s390/configs/debug_defconfig b/arch/s390/configs/debug_defconfig index 0784bf3caf43..fe6f529ac82c 100644 --- a/arch/s390/configs/debug_defconfig +++ b/arch/s390/configs/debug_defconfig @@ -1,3 +1,4 @@ +CONFIG_UAPI_HEADER_TEST=y CONFIG_SYSVIPC=y CONFIG_POSIX_MQUEUE=y CONFIG_WATCH_QUEUE=y @@ -93,9 +94,10 @@ CONFIG_CLEANCACHE=y CONFIG_FRONTSWAP=y CONFIG_CMA_DEBUG=y CONFIG_CMA_DEBUGFS=y +CONFIG_CMA_AREAS=7 CONFIG_MEM_SOFT_DIRTY=y CONFIG_ZSWAP=y -CONFIG_ZSMALLOC=m +CONFIG_ZSMALLOC=y CONFIG_ZSMALLOC_STAT=y CONFIG_DEFERRED_STRUCT_PAGE_INIT=y CONFIG_IDLE_PAGE_TRACKING=y @@ -378,7 +380,6 @@ CONFIG_NETLINK_DIAG=m CONFIG_CGROUP_NET_PRIO=y CONFIG_BPF_JIT=y CONFIG_NET_PKTGEN=m -# CONFIG_NET_DROP_MONITOR is not set CONFIG_PCI=y # CONFIG_PCIEASPM is not set CONFIG_PCI_DEBUG=y @@ -386,7 +387,7 @@ CONFIG_HOTPLUG_PCI=y CONFIG_HOTPLUG_PCI_S390=y CONFIG_DEVTMPFS=y CONFIG_CONNECTOR=y -CONFIG_ZRAM=m +CONFIG_ZRAM=y CONFIG_BLK_DEV_LOOP=m CONFIG_BLK_DEV_CRYPTOLOOP=m CONFIG_BLK_DEV_DRBD=m @@ -689,6 +690,7 @@ CONFIG_CRYPTO_TEST=m CONFIG_CRYPTO_DH=m CONFIG_CRYPTO_ECDH=m CONFIG_CRYPTO_ECRDSA=m +CONFIG_CRYPTO_SM2=m CONFIG_CRYPTO_CURVE25519=m CONFIG_CRYPTO_GCM=y CONFIG_CRYPTO_CHACHA20POLY1305=m @@ -709,7 +711,6 @@ CONFIG_CRYPTO_RMD160=m CONFIG_CRYPTO_RMD256=m CONFIG_CRYPTO_RMD320=m CONFIG_CRYPTO_SHA3=m -CONFIG_CRYPTO_SM3=m CONFIG_CRYPTO_TGR192=m CONFIG_CRYPTO_WP512=m CONFIG_CRYPTO_AES_TI=m @@ -753,6 +754,7 @@ CONFIG_CRYPTO_DES_S390=m CONFIG_CRYPTO_AES_S390=m CONFIG_CRYPTO_GHASH_S390=m CONFIG_CRYPTO_CRC32_S390=y +CONFIG_CRYPTO_DEV_VIRTIO=m CONFIG_CORDIC=m CONFIG_CRC32_SELFTEST=y CONFIG_CRC4=m @@ -829,6 +831,7 @@ CONFIG_NETDEV_NOTIFIER_ERROR_INJECT=m CONFIG_FAULT_INJECTION=y CONFIG_FAILSLAB=y CONFIG_FAIL_PAGE_ALLOC=y +CONFIG_FAULT_INJECTION_USERCOPY=y CONFIG_FAIL_MAKE_REQUEST=y CONFIG_FAIL_IO_TIMEOUT=y CONFIG_FAIL_FUTEX=y diff --git a/arch/s390/configs/defconfig b/arch/s390/configs/defconfig index 905bc8c4cfaf..17d5df2c1eff 100644 --- a/arch/s390/configs/defconfig +++ b/arch/s390/configs/defconfig @@ -87,9 +87,10 @@ CONFIG_KSM=y CONFIG_TRANSPARENT_HUGEPAGE=y CONFIG_CLEANCACHE=y CONFIG_FRONTSWAP=y +CONFIG_CMA_AREAS=7 CONFIG_MEM_SOFT_DIRTY=y CONFIG_ZSWAP=y -CONFIG_ZSMALLOC=m +CONFIG_ZSMALLOC=y CONFIG_ZSMALLOC_STAT=y CONFIG_DEFERRED_STRUCT_PAGE_INIT=y CONFIG_IDLE_PAGE_TRACKING=y @@ -371,7 +372,6 @@ CONFIG_NETLINK_DIAG=m CONFIG_CGROUP_NET_PRIO=y CONFIG_BPF_JIT=y CONFIG_NET_PKTGEN=m -# CONFIG_NET_DROP_MONITOR is not set CONFIG_PCI=y # CONFIG_PCIEASPM is not set CONFIG_HOTPLUG_PCI=y @@ -379,7 +379,7 @@ CONFIG_HOTPLUG_PCI_S390=y CONFIG_UEVENT_HELPER=y CONFIG_DEVTMPFS=y CONFIG_CONNECTOR=y -CONFIG_ZRAM=m +CONFIG_ZRAM=y CONFIG_BLK_DEV_LOOP=m CONFIG_BLK_DEV_CRYPTOLOOP=m CONFIG_BLK_DEV_DRBD=m @@ -680,6 +680,7 @@ CONFIG_CRYPTO_TEST=m CONFIG_CRYPTO_DH=m CONFIG_CRYPTO_ECDH=m CONFIG_CRYPTO_ECRDSA=m +CONFIG_CRYPTO_SM2=m CONFIG_CRYPTO_CURVE25519=m CONFIG_CRYPTO_GCM=y CONFIG_CRYPTO_CHACHA20POLY1305=m @@ -701,7 +702,6 @@ CONFIG_CRYPTO_RMD160=m CONFIG_CRYPTO_RMD256=m CONFIG_CRYPTO_RMD320=m CONFIG_CRYPTO_SHA3=m -CONFIG_CRYPTO_SM3=m CONFIG_CRYPTO_TGR192=m CONFIG_CRYPTO_WP512=m CONFIG_CRYPTO_AES_TI=m @@ -745,6 +745,7 @@ CONFIG_CRYPTO_DES_S390=m CONFIG_CRYPTO_AES_S390=m CONFIG_CRYPTO_GHASH_S390=m CONFIG_CRYPTO_CRC32_S390=y +CONFIG_CRYPTO_DEV_VIRTIO=m CONFIG_CORDIC=m CONFIG_PRIME_NUMBERS=m CONFIG_CRC4=m diff --git a/arch/s390/configs/zfcpdump_defconfig b/arch/s390/configs/zfcpdump_defconfig index 8f67c55625f9..a302630341ef 100644 --- a/arch/s390/configs/zfcpdump_defconfig +++ b/arch/s390/configs/zfcpdump_defconfig @@ -17,11 +17,11 @@ CONFIG_HZ_100=y # CONFIG_CHSC_SCH is not set # CONFIG_SCM_BUS is not set CONFIG_CRASH_DUMP=y -# CONFIG_SECCOMP is not set # CONFIG_PFAULT is not set # CONFIG_S390_HYPFS_FS is not set # CONFIG_VIRTUALIZATION is not set # CONFIG_S390_GUEST is not set +# CONFIG_SECCOMP is not set CONFIG_PARTITION_ADVANCED=y CONFIG_IBM_PARTITION=y # CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set diff --git a/arch/s390/crypto/sha.h b/arch/s390/crypto/sha.h index ada2f98c27b7..65ea12fc87a1 100644 --- a/arch/s390/crypto/sha.h +++ b/arch/s390/crypto/sha.h @@ -11,7 +11,8 @@ #define _CRYPTO_ARCH_S390_SHA_H #include <linux/crypto.h> -#include <crypto/sha.h> +#include <crypto/sha1.h> +#include <crypto/sha2.h> #include <crypto/sha3.h> /* must be big enough for the largest SHA variant */ diff --git a/arch/s390/crypto/sha1_s390.c b/arch/s390/crypto/sha1_s390.c index 698b1e6d3c14..a3fabf310a38 100644 --- a/arch/s390/crypto/sha1_s390.c +++ b/arch/s390/crypto/sha1_s390.c @@ -22,7 +22,7 @@ #include <linux/init.h> #include <linux/module.h> #include <linux/cpufeature.h> -#include <crypto/sha.h> +#include <crypto/sha1.h> #include <asm/cpacf.h> #include "sha.h" diff --git a/arch/s390/crypto/sha256_s390.c b/arch/s390/crypto/sha256_s390.c index b52c87e44939..24983f175676 100644 --- a/arch/s390/crypto/sha256_s390.c +++ b/arch/s390/crypto/sha256_s390.c @@ -12,7 +12,7 @@ #include <linux/init.h> #include <linux/module.h> #include <linux/cpufeature.h> -#include <crypto/sha.h> +#include <crypto/sha2.h> #include <asm/cpacf.h> #include "sha.h" diff --git a/arch/s390/crypto/sha3_256_s390.c b/arch/s390/crypto/sha3_256_s390.c index 460cbbbaa44a..30ac49b635bf 100644 --- a/arch/s390/crypto/sha3_256_s390.c +++ b/arch/s390/crypto/sha3_256_s390.c @@ -12,7 +12,6 @@ #include <linux/init.h> #include <linux/module.h> #include <linux/cpufeature.h> -#include <crypto/sha.h> #include <crypto/sha3.h> #include <asm/cpacf.h> diff --git a/arch/s390/crypto/sha3_512_s390.c b/arch/s390/crypto/sha3_512_s390.c index 72cf460a53e5..e70d50f7620f 100644 --- a/arch/s390/crypto/sha3_512_s390.c +++ b/arch/s390/crypto/sha3_512_s390.c @@ -11,7 +11,6 @@ #include <linux/init.h> #include <linux/module.h> #include <linux/cpufeature.h> -#include <crypto/sha.h> #include <crypto/sha3.h> #include <asm/cpacf.h> diff --git a/arch/s390/crypto/sha512_s390.c b/arch/s390/crypto/sha512_s390.c index ad29db085a18..29a6bd404c59 100644 --- a/arch/s390/crypto/sha512_s390.c +++ b/arch/s390/crypto/sha512_s390.c @@ -8,7 +8,7 @@ * Author(s): Jan Glauber (jang@de.ibm.com) */ #include <crypto/internal/hash.h> -#include <crypto/sha.h> +#include <crypto/sha2.h> #include <linux/errno.h> #include <linux/init.h> #include <linux/kernel.h> diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index 6b8d8c69b1a1..b5dbae78969b 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -692,16 +692,6 @@ static inline int pud_large(pud_t pud) return !!(pud_val(pud) & _REGION3_ENTRY_LARGE); } -static inline unsigned long pud_pfn(pud_t pud) -{ - unsigned long origin_mask; - - origin_mask = _REGION_ENTRY_ORIGIN; - if (pud_large(pud)) - origin_mask = _REGION3_ENTRY_ORIGIN_LARGE; - return (pud_val(pud) & origin_mask) >> PAGE_SHIFT; -} - #define pmd_leaf pmd_large static inline int pmd_large(pmd_t pmd) { @@ -747,16 +737,6 @@ static inline int pmd_none(pmd_t pmd) return pmd_val(pmd) == _SEGMENT_ENTRY_EMPTY; } -static inline unsigned long pmd_pfn(pmd_t pmd) -{ - unsigned long origin_mask; - - origin_mask = _SEGMENT_ENTRY_ORIGIN; - if (pmd_large(pmd)) - origin_mask = _SEGMENT_ENTRY_ORIGIN_LARGE; - return (pmd_val(pmd) & origin_mask) >> PAGE_SHIFT; -} - #define pmd_write pmd_write static inline int pmd_write(pmd_t pmd) { @@ -1238,11 +1218,39 @@ static inline pte_t mk_pte(struct page *page, pgprot_t pgprot) #define pud_index(address) (((address) >> PUD_SHIFT) & (PTRS_PER_PUD-1)) #define pmd_index(address) (((address) >> PMD_SHIFT) & (PTRS_PER_PMD-1)) -#define pmd_deref(pmd) (pmd_val(pmd) & _SEGMENT_ENTRY_ORIGIN) -#define pud_deref(pud) (pud_val(pud) & _REGION_ENTRY_ORIGIN) #define p4d_deref(pud) (p4d_val(pud) & _REGION_ENTRY_ORIGIN) #define pgd_deref(pgd) (pgd_val(pgd) & _REGION_ENTRY_ORIGIN) +static inline unsigned long pmd_deref(pmd_t pmd) +{ + unsigned long origin_mask; + + origin_mask = _SEGMENT_ENTRY_ORIGIN; + if (pmd_large(pmd)) + origin_mask = _SEGMENT_ENTRY_ORIGIN_LARGE; + return pmd_val(pmd) & origin_mask; +} + +static inline unsigned long pmd_pfn(pmd_t pmd) +{ + return pmd_deref(pmd) >> PAGE_SHIFT; +} + +static inline unsigned long pud_deref(pud_t pud) +{ + unsigned long origin_mask; + + origin_mask = _REGION_ENTRY_ORIGIN; + if (pud_large(pud)) + origin_mask = _REGION3_ENTRY_ORIGIN_LARGE; + return pud_val(pud) & origin_mask; +} + +static inline unsigned long pud_pfn(pud_t pud) +{ + return pud_deref(pud) >> PAGE_SHIFT; +} + /* * The pgd_offset function *always* adds the index for the top-level * region/segment table. This is done to get a sequence like the diff --git a/arch/s390/include/asm/sections.h b/arch/s390/include/asm/sections.h index a996d3990a02..0c2151451ba5 100644 --- a/arch/s390/include/asm/sections.h +++ b/arch/s390/include/asm/sections.h @@ -26,14 +26,14 @@ static inline int arch_is_kernel_initmem_freed(unsigned long addr) * final .boot.data section, which should be identical in the decompressor and * the decompressed kernel (that is checked during the build). */ -#define __bootdata(var) __section(".boot.data.var") var +#define __bootdata(var) __section(".boot.data." #var) var /* * .boot.preserved.data is similar to .boot.data, but it is not part of the * .init section and thus will be preserved for later use in the decompressed * kernel. */ -#define __bootdata_preserved(var) __section(".boot.preserved.data.var") var +#define __bootdata_preserved(var) __section(".boot.preserved.data." #var) var extern unsigned long __sdma, __edma; extern unsigned long __stext_dma, __etext_dma; diff --git a/arch/s390/include/asm/vdso/vdso.h b/arch/s390/include/asm/vdso/vdso.h deleted file mode 100644 index e69de29bb2d1..000000000000 --- a/arch/s390/include/asm/vdso/vdso.h +++ /dev/null diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c index ece58f2217cb..483051e10db3 100644 --- a/arch/s390/kernel/asm-offsets.c +++ b/arch/s390/kernel/asm-offsets.c @@ -53,22 +53,14 @@ int main(void) /* stack_frame offsets */ OFFSET(__SF_BACKCHAIN, stack_frame, back_chain); OFFSET(__SF_GPRS, stack_frame, gprs); - OFFSET(__SF_EMPTY, stack_frame, empty1); - OFFSET(__SF_SIE_CONTROL, stack_frame, empty1[0]); - OFFSET(__SF_SIE_SAVEAREA, stack_frame, empty1[1]); - OFFSET(__SF_SIE_REASON, stack_frame, empty1[2]); - OFFSET(__SF_SIE_FLAGS, stack_frame, empty1[3]); + OFFSET(__SF_EMPTY, stack_frame, empty1[0]); + OFFSET(__SF_SIE_CONTROL, stack_frame, empty1[1]); + OFFSET(__SF_SIE_SAVEAREA, stack_frame, empty1[2]); + OFFSET(__SF_SIE_REASON, stack_frame, empty1[3]); + OFFSET(__SF_SIE_FLAGS, stack_frame, empty1[4]); BLANK(); OFFSET(__VDSO_GETCPU_VAL, vdso_per_cpu_data, getcpu_val); BLANK(); - /* constants used by the vdso */ - DEFINE(__CLOCK_REALTIME, CLOCK_REALTIME); - DEFINE(__CLOCK_MONOTONIC, CLOCK_MONOTONIC); - DEFINE(__CLOCK_REALTIME_COARSE, CLOCK_REALTIME_COARSE); - DEFINE(__CLOCK_MONOTONIC_COARSE, CLOCK_MONOTONIC_COARSE); - DEFINE(__CLOCK_THREAD_CPUTIME_ID, CLOCK_THREAD_CPUTIME_ID); - DEFINE(__CLOCK_COARSE_RES, LOW_RES_NSEC); - BLANK(); /* idle data offsets */ OFFSET(__CLOCK_IDLE_ENTER, s390_idle_data, clock_idle_enter); OFFSET(__CLOCK_IDLE_EXIT, s390_idle_data, clock_idle_exit); diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index 86235919c2d1..92beb1444644 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -422,6 +422,7 @@ ENTRY(system_call) #endif LOCKDEP_SYS_EXIT .Lsysc_tif: + DISABLE_INTS TSTMSK __PT_FLAGS(%r11),_PIF_WORK jnz .Lsysc_work TSTMSK __TI_flags(%r12),_TIF_WORK @@ -444,6 +445,7 @@ ENTRY(system_call) # One of the work bits is on. Find out which one. # .Lsysc_work: + ENABLE_INTS TSTMSK __TI_flags(%r12),_TIF_NEED_RESCHED jo .Lsysc_reschedule TSTMSK __PT_FLAGS(%r11),_PIF_SYSCALL_RESTART @@ -761,12 +763,7 @@ ENTRY(io_int_handler) xc __PT_FLAGS(8,%r11),__PT_FLAGS(%r11) TSTMSK __LC_CPU_FLAGS,_CIF_IGNORE_IRQ jo .Lio_restore -#if IS_ENABLED(CONFIG_TRACE_IRQFLAGS) - tmhh %r8,0x300 - jz 1f TRACE_IRQS_OFF -1: -#endif xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) .Lio_loop: lgr %r2,%r11 # pass pointer to pt_regs @@ -789,12 +786,7 @@ ENTRY(io_int_handler) TSTMSK __LC_CPU_FLAGS,_CIF_WORK jnz .Lio_work .Lio_restore: -#if IS_ENABLED(CONFIG_TRACE_IRQFLAGS) - tm __PT_PSW(%r11),3 - jno 0f TRACE_IRQS_ON -0: -#endif mvc __LC_RETURN_PSW(16),__PT_PSW(%r11) tm __PT_PSW+1(%r11),0x01 # returning to user ? jno .Lio_exit_kernel @@ -974,12 +966,7 @@ ENTRY(ext_int_handler) xc __PT_FLAGS(8,%r11),__PT_FLAGS(%r11) TSTMSK __LC_CPU_FLAGS,_CIF_IGNORE_IRQ jo .Lio_restore -#if IS_ENABLED(CONFIG_TRACE_IRQFLAGS) - tmhh %r8,0x300 - jz 1f TRACE_IRQS_OFF -1: -#endif xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) lgr %r2,%r11 # pass pointer to pt_regs lghi %r3,EXT_INTERRUPT @@ -1066,6 +1053,7 @@ EXPORT_SYMBOL(save_fpu_regs) * %r4 */ load_fpu_regs: + stnsm __SF_EMPTY(%r15),0xfc lg %r4,__LC_CURRENT aghi %r4,__TASK_thread TSTMSK __LC_CPU_FLAGS,_CIF_FPU @@ -1097,6 +1085,7 @@ load_fpu_regs: .Lload_fpu_regs_done: ni __LC_CPU_FLAGS+7,255-_CIF_FPU .Lload_fpu_regs_exit: + ssm __SF_EMPTY(%r15) BR_EX %r14 .Lload_fpu_regs_end: ENDPROC(load_fpu_regs) diff --git a/arch/s390/kernel/idle.c b/arch/s390/kernel/idle.c index f7f1e64e0d98..2b85096964f8 100644 --- a/arch/s390/kernel/idle.c +++ b/arch/s390/kernel/idle.c @@ -33,10 +33,10 @@ void enabled_wait(void) PSW_MASK_IO | PSW_MASK_EXT | PSW_MASK_MCHECK; clear_cpu_flag(CIF_NOHZ_DELAY); - local_irq_save(flags); + raw_local_irq_save(flags); /* Call the assembler magic in entry.S */ psw_idle(idle, psw_mask); - local_irq_restore(flags); + raw_local_irq_restore(flags); /* Account time spent with enabled wait psw loaded as idle time. */ raw_write_seqcount_begin(&idle->seqcount); @@ -123,7 +123,7 @@ void arch_cpu_idle_enter(void) void arch_cpu_idle(void) { enabled_wait(); - local_irq_enable(); + raw_local_irq_enable(); } void arch_cpu_idle_exit(void) diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c index 4f9e4626df55..19cd7b961c45 100644 --- a/arch/s390/kernel/perf_cpum_sf.c +++ b/arch/s390/kernel/perf_cpum_sf.c @@ -672,7 +672,7 @@ static void cpumsf_output_event_pid(struct perf_event *event, rcu_read_lock(); perf_prepare_sample(&header, data, event, regs); - if (perf_output_begin(&handle, event, header.size)) + if (perf_output_begin(&handle, data, event, header.size)) goto out; /* Update the process ID (see also kernel/events/core.c) */ @@ -2228,4 +2228,4 @@ out: } arch_initcall(init_cpum_sampling_pmu); -core_param(cpum_sfb_size, CPUM_SF_MAX_SDB, sfb_size, 0640); +core_param(cpum_sfb_size, CPUM_SF_MAX_SDB, sfb_size, 0644); diff --git a/arch/s390/kernel/perf_regs.c b/arch/s390/kernel/perf_regs.c index 4352a504f235..6e9e5d5e927e 100644 --- a/arch/s390/kernel/perf_regs.c +++ b/arch/s390/kernel/perf_regs.c @@ -53,8 +53,7 @@ u64 perf_reg_abi(struct task_struct *task) } void perf_get_regs_user(struct perf_regs *regs_user, - struct pt_regs *regs, - struct pt_regs *regs_user_copy) + struct pt_regs *regs) { /* * Use the regs from the first interruption and let diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index ebfe86d097f0..390d97daa2b3 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -855,13 +855,14 @@ void __init smp_detect_cpus(void) static void smp_init_secondary(void) { - int cpu = smp_processor_id(); + int cpu = raw_smp_processor_id(); S390_lowcore.last_update_clock = get_tod_clock(); restore_access_regs(S390_lowcore.access_regs_save_area); set_cpu_flag(CIF_ASCE_PRIMARY); set_cpu_flag(CIF_ASCE_SECONDARY); cpu_init(); + rcu_cpu_starting(cpu); preempt_disable(); init_cpu_timer(); vtime_init(); diff --git a/arch/s390/kernel/uv.c b/arch/s390/kernel/uv.c index 14bd9d58edc9..883bfed9f5c2 100644 --- a/arch/s390/kernel/uv.c +++ b/arch/s390/kernel/uv.c @@ -129,8 +129,15 @@ int uv_destroy_page(unsigned long paddr) .paddr = paddr }; - if (uv_call(0, (u64)&uvcb)) + if (uv_call(0, (u64)&uvcb)) { + /* + * Older firmware uses 107/d as an indication of a non secure + * page. Let us emulate the newer variant (no-op). + */ + if (uvcb.header.rc == 0x107 && uvcb.header.rrc == 0xd) + return 0; return -EINVAL; + } return 0; } diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 6b74b92c1a58..425d3d75320b 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -2312,7 +2312,7 @@ static int kvm_s390_handle_pv(struct kvm *kvm, struct kvm_pv_cmd *cmd) struct kvm_s390_pv_unp unp = {}; r = -EINVAL; - if (!kvm_s390_pv_is_protected(kvm)) + if (!kvm_s390_pv_is_protected(kvm) || !mm_is_protected(kvm->mm)) break; r = -EFAULT; @@ -3564,7 +3564,6 @@ static void kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu) vcpu->arch.sie_block->pp = 0; vcpu->arch.sie_block->fpf &= ~FPF_BPBC; vcpu->arch.sie_block->todpr = 0; - vcpu->arch.sie_block->cpnc = 0; } } @@ -3582,7 +3581,6 @@ static void kvm_arch_vcpu_ioctl_clear_reset(struct kvm_vcpu *vcpu) regs->etoken = 0; regs->etoken_extension = 0; - regs->diag318 = 0; } int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) diff --git a/arch/s390/kvm/pv.c b/arch/s390/kvm/pv.c index eb99e2f95ebe..f5847f9dec7c 100644 --- a/arch/s390/kvm/pv.c +++ b/arch/s390/kvm/pv.c @@ -208,7 +208,6 @@ int kvm_s390_pv_init_vm(struct kvm *kvm, u16 *rc, u16 *rrc) return -EIO; } kvm->arch.gmap->guest_handle = uvcb.guest_handle; - atomic_set(&kvm->mm->context.is_protected, 1); return 0; } @@ -228,6 +227,8 @@ int kvm_s390_pv_set_sec_parms(struct kvm *kvm, void *hdr, u64 length, u16 *rc, *rrc = uvcb.header.rrc; KVM_UV_EVENT(kvm, 3, "PROTVIRT VM SET PARMS: rc %x rrc %x", *rc, *rrc); + if (!cc) + atomic_set(&kvm->mm->context.is_protected, 1); return cc ? -EINVAL : 0; } diff --git a/arch/s390/lib/delay.c b/arch/s390/lib/delay.c index daca7bad66de..8c0c68e7770e 100644 --- a/arch/s390/lib/delay.c +++ b/arch/s390/lib/delay.c @@ -33,7 +33,7 @@ EXPORT_SYMBOL(__delay); static void __udelay_disabled(unsigned long long usecs) { - unsigned long cr0, cr0_new, psw_mask, flags; + unsigned long cr0, cr0_new, psw_mask; struct s390_idle_data idle; u64 end; @@ -45,9 +45,8 @@ static void __udelay_disabled(unsigned long long usecs) psw_mask = __extract_psw() | PSW_MASK_EXT | PSW_MASK_WAIT; set_clock_comparator(end); set_cpu_flag(CIF_IGNORE_IRQ); - local_irq_save(flags); psw_idle(&idle, psw_mask); - local_irq_restore(flags); + trace_hardirqs_off(); clear_cpu_flag(CIF_IGNORE_IRQ); set_clock_comparator(S390_lowcore.clock_comparator); __ctl_load(cr0, 0, 0); diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c index cfb0017f33a7..64795d034926 100644 --- a/arch/s390/mm/gmap.c +++ b/arch/s390/mm/gmap.c @@ -2690,6 +2690,8 @@ static const struct mm_walk_ops reset_acc_walk_ops = { #include <linux/sched/mm.h> void s390_reset_acc(struct mm_struct *mm) { + if (!mm_is_protected(mm)) + return; /* * we might be called during * reset: we walk the pages and clear diff --git a/arch/s390/pci/pci_event.c b/arch/s390/pci/pci_event.c index d33f21545dfd..9a6bae503fe6 100644 --- a/arch/s390/pci/pci_event.c +++ b/arch/s390/pci/pci_event.c @@ -101,6 +101,10 @@ static void __zpci_event_availability(struct zpci_ccdf_avail *ccdf) if (ret) break; + /* the PCI function will be scanned once function 0 appears */ + if (!zdev->zbus->bus) + break; + pdev = pci_scan_single_device(zdev->zbus->bus, zdev->devfn); if (!pdev) break; diff --git a/arch/s390/pci/pci_irq.c b/arch/s390/pci/pci_irq.c index 743f257cf2cb..75217fb63d7b 100644 --- a/arch/s390/pci/pci_irq.c +++ b/arch/s390/pci/pci_irq.c @@ -103,9 +103,10 @@ static int zpci_set_irq_affinity(struct irq_data *data, const struct cpumask *de { struct msi_desc *entry = irq_get_msi_desc(data->irq); struct msi_msg msg = entry->msg; + int cpu_addr = smp_cpu_get_cpu_address(cpumask_first(dest)); msg.address_lo &= 0xff0000ff; - msg.address_lo |= (cpumask_first(dest) << 8); + msg.address_lo |= (cpu_addr << 8); pci_write_msi_msg(data->irq, &msg); return IRQ_SET_MASK_OK; @@ -238,6 +239,7 @@ int arch_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type) unsigned long bit; struct msi_desc *msi; struct msi_msg msg; + int cpu_addr; int rc, irq; zdev->aisb = -1UL; @@ -287,9 +289,15 @@ int arch_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type) handle_percpu_irq); msg.data = hwirq - bit; if (irq_delivery == DIRECTED) { + if (msi->affinity) + cpu = cpumask_first(&msi->affinity->mask); + else + cpu = 0; + cpu_addr = smp_cpu_get_cpu_address(cpu); + msg.address_lo = zdev->msi_addr & 0xff0000ff; - msg.address_lo |= msi->affinity ? - (cpumask_first(&msi->affinity->mask) << 8) : 0; + msg.address_lo |= (cpu_addr << 8); + for_each_possible_cpu(cpu) { airq_iv_set_data(zpci_ibv[cpu], hwirq, irq); } diff --git a/arch/s390/purgatory/purgatory.c b/arch/s390/purgatory/purgatory.c index 0a423bcf6746..030efda05dbe 100644 --- a/arch/s390/purgatory/purgatory.c +++ b/arch/s390/purgatory/purgatory.c @@ -9,7 +9,7 @@ #include <linux/kexec.h> #include <linux/string.h> -#include <crypto/sha.h> +#include <crypto/sha2.h> #include <asm/purgatory.h> int verify_sha256_digest(void) diff --git a/arch/sh/kernel/idle.c b/arch/sh/kernel/idle.c index 0dc0f52f9bb8..f59814983bd5 100644 --- a/arch/sh/kernel/idle.c +++ b/arch/sh/kernel/idle.c @@ -22,7 +22,7 @@ static void (*sh_idle)(void); void default_idle(void) { set_bl_bit(); - local_irq_enable(); + raw_local_irq_enable(); /* Isn't this racy ? */ cpu_sleep(); clear_bl_bit(); diff --git a/arch/sparc/crypto/crc32c_glue.c b/arch/sparc/crypto/crc32c_glue.c index 4e9323229e71..82efb7f81c28 100644 --- a/arch/sparc/crypto/crc32c_glue.c +++ b/arch/sparc/crypto/crc32c_glue.c @@ -35,7 +35,7 @@ static int crc32c_sparc64_setkey(struct crypto_shash *hash, const u8 *key, if (keylen != sizeof(u32)) return -EINVAL; - *(__le32 *)mctx = le32_to_cpup((__le32 *)key); + *mctx = le32_to_cpup((__le32 *)key); return 0; } diff --git a/arch/sparc/crypto/md5_glue.c b/arch/sparc/crypto/md5_glue.c index 111283fe837e..511db98d590a 100644 --- a/arch/sparc/crypto/md5_glue.c +++ b/arch/sparc/crypto/md5_glue.c @@ -33,10 +33,11 @@ static int md5_sparc64_init(struct shash_desc *desc) { struct md5_state *mctx = shash_desc_ctx(desc); - mctx->hash[0] = cpu_to_le32(MD5_H0); - mctx->hash[1] = cpu_to_le32(MD5_H1); - mctx->hash[2] = cpu_to_le32(MD5_H2); - mctx->hash[3] = cpu_to_le32(MD5_H3); + mctx->hash[0] = MD5_H0; + mctx->hash[1] = MD5_H1; + mctx->hash[2] = MD5_H2; + mctx->hash[3] = MD5_H3; + le32_to_cpu_array(mctx->hash, 4); mctx->byte_count = 0; return 0; diff --git a/arch/sparc/crypto/sha1_glue.c b/arch/sparc/crypto/sha1_glue.c index dc017782be52..86a654cce5ab 100644 --- a/arch/sparc/crypto/sha1_glue.c +++ b/arch/sparc/crypto/sha1_glue.c @@ -16,7 +16,7 @@ #include <linux/module.h> #include <linux/mm.h> #include <linux/types.h> -#include <crypto/sha.h> +#include <crypto/sha1.h> #include <asm/pstate.h> #include <asm/elf.h> diff --git a/arch/sparc/crypto/sha256_glue.c b/arch/sparc/crypto/sha256_glue.c index ca2547df9652..60ec524cf9ca 100644 --- a/arch/sparc/crypto/sha256_glue.c +++ b/arch/sparc/crypto/sha256_glue.c @@ -16,7 +16,7 @@ #include <linux/module.h> #include <linux/mm.h> #include <linux/types.h> -#include <crypto/sha.h> +#include <crypto/sha2.h> #include <asm/pstate.h> #include <asm/elf.h> diff --git a/arch/sparc/crypto/sha512_glue.c b/arch/sparc/crypto/sha512_glue.c index 3b2ca732ff7a..273ce21918c1 100644 --- a/arch/sparc/crypto/sha512_glue.c +++ b/arch/sparc/crypto/sha512_glue.c @@ -15,7 +15,7 @@ #include <linux/module.h> #include <linux/mm.h> #include <linux/types.h> -#include <crypto/sha.h> +#include <crypto/sha2.h> #include <asm/pstate.h> #include <asm/elf.h> diff --git a/arch/sparc/kernel/leon_pmc.c b/arch/sparc/kernel/leon_pmc.c index 065e2d4b7290..396f46bca52e 100644 --- a/arch/sparc/kernel/leon_pmc.c +++ b/arch/sparc/kernel/leon_pmc.c @@ -50,7 +50,7 @@ static void pmc_leon_idle_fixup(void) register unsigned int address = (unsigned int)leon3_irqctrl_regs; /* Interrupts need to be enabled to not hang the CPU */ - local_irq_enable(); + raw_local_irq_enable(); __asm__ __volatile__ ( "wr %%g0, %%asr19\n" @@ -66,7 +66,7 @@ static void pmc_leon_idle_fixup(void) static void pmc_leon_idle(void) { /* Interrupts need to be enabled to not hang the CPU */ - local_irq_enable(); + raw_local_irq_enable(); /* For systems without power-down, this will be no-op */ __asm__ __volatile__ ("wr %g0, %asr19\n\t"); diff --git a/arch/sparc/kernel/process_32.c b/arch/sparc/kernel/process_32.c index adfcaeab3ddc..a02363735915 100644 --- a/arch/sparc/kernel/process_32.c +++ b/arch/sparc/kernel/process_32.c @@ -74,7 +74,7 @@ void arch_cpu_idle(void) { if (sparc_idle) (*sparc_idle)(); - local_irq_enable(); + raw_local_irq_enable(); } /* XXX cli/sti -> local_irq_xxx here, check this works once SMP is fixed. */ diff --git a/arch/sparc/kernel/process_64.c b/arch/sparc/kernel/process_64.c index a75093b993f9..6f8c7822fc06 100644 --- a/arch/sparc/kernel/process_64.c +++ b/arch/sparc/kernel/process_64.c @@ -62,11 +62,11 @@ void arch_cpu_idle(void) { if (tlb_type != hypervisor) { touch_nmi_watchdog(); - local_irq_enable(); + raw_local_irq_enable(); } else { unsigned long pstate; - local_irq_enable(); + raw_local_irq_enable(); /* The sun4v sleeping code requires that we have PSTATE.IE cleared over * the cpu sleep hypervisor call. diff --git a/arch/sparc/lib/csum_copy.S b/arch/sparc/lib/csum_copy.S index 0c0268e77155..d839956407a7 100644 --- a/arch/sparc/lib/csum_copy.S +++ b/arch/sparc/lib/csum_copy.S @@ -71,7 +71,7 @@ FUNC_NAME: /* %o0=src, %o1=dst, %o2=len */ LOAD(prefetch, %o0 + 0x000, #n_reads) xor %o0, %o1, %g1 - mov 1, %o3 + mov -1, %o3 clr %o4 andcc %g1, 0x3, %g0 bne,pn %icc, 95f diff --git a/arch/um/include/asm/pgalloc.h b/arch/um/include/asm/pgalloc.h index 5393e13e07e0..2bbf28cf3aa9 100644 --- a/arch/um/include/asm/pgalloc.h +++ b/arch/um/include/asm/pgalloc.h @@ -33,7 +33,13 @@ do { \ } while (0) #ifdef CONFIG_3_LEVEL_PGTABLES -#define __pmd_free_tlb(tlb,x, address) tlb_remove_page((tlb),virt_to_page(x)) + +#define __pmd_free_tlb(tlb, pmd, address) \ +do { \ + pgtable_pmd_page_dtor(virt_to_page(pmd)); \ + tlb_remove_page((tlb),virt_to_page(pmd)); \ +} while (0) \ + #endif #endif diff --git a/arch/um/kernel/process.c b/arch/um/kernel/process.c index 3bed09538dd9..9505a7e87396 100644 --- a/arch/um/kernel/process.c +++ b/arch/um/kernel/process.c @@ -217,7 +217,7 @@ void arch_cpu_idle(void) { cpu_tasks[current_thread_info()->cpu].pid = os_getpid(); um_idle_sleep(); - local_irq_enable(); + raw_local_irq_enable(); } int __cant_sleep(void) { diff --git a/arch/um/kernel/skas/clone.c b/arch/um/kernel/skas/clone.c index 95c355181dcd..bfb70c456b30 100644 --- a/arch/um/kernel/skas/clone.c +++ b/arch/um/kernel/skas/clone.c @@ -21,7 +21,7 @@ * on some systems. */ -void __section(".__syscall_stub") +void __attribute__ ((__section__ (".__syscall_stub"))) stub_clone_handler(void) { struct stub_data *data = (struct stub_data *) STUB_DATA; diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index f6946b81f74a..52e36adb5112 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -100,6 +100,7 @@ config X86 select ARCH_WANT_DEFAULT_BPF_JIT if X86_64 select ARCH_WANTS_DYNAMIC_TASK_STRUCT select ARCH_WANT_HUGE_PMD_SHARE + select ARCH_WANT_LD_ORPHAN_WARN select ARCH_WANTS_THP_SWAP if X86_64 select BUILDTIME_TABLE_SORT select CLKEVT_I8253 @@ -1930,6 +1931,23 @@ config X86_INTEL_TSX_MODE_AUTO side channel attacks- equals the tsx=auto command line parameter. endchoice +config X86_SGX + bool "Software Guard eXtensions (SGX)" + depends on X86_64 && CPU_SUP_INTEL + depends on CRYPTO=y + depends on CRYPTO_SHA256=y + select SRCU + select MMU_NOTIFIER + help + Intel(R) Software Guard eXtensions (SGX) is a set of CPU instructions + that can be used by applications to set aside private regions of code + and data, referred to as enclaves. An enclave's private memory can + only be accessed by code running within the enclave. Accesses from + outside the enclave, including other enclaves, are disallowed by + hardware. + + If unsure, say N. + config EFI bool "EFI runtime service support" depends on ACPI diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug index 27b5e2bc6a01..80b57e7f4947 100644 --- a/arch/x86/Kconfig.debug +++ b/arch/x86/Kconfig.debug @@ -62,9 +62,6 @@ config EARLY_PRINTK_USB_XDBC You should normally say N here, unless you want to debug early crashes or need a very simple printk logging facility. -config COPY_MC_TEST - def_bool n - config EFI_PGT_DUMP bool "Dump the EFI pagetable" depends on EFI diff --git a/arch/x86/Makefile b/arch/x86/Makefile index b8910661447f..7116da3980be 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile @@ -202,9 +202,6 @@ ifdef CONFIG_X86_64 LDFLAGS_vmlinux += -z max-page-size=0x200000 endif -# We never want expected sections to be placed heuristically by the -# linker. All sections should be explicitly named in the linker script. -LDFLAGS_vmlinux += $(call ld-option, --orphan-handling=warn) archscripts: scripts_basic $(Q)$(MAKE) $(build)=arch/x86/tools relocs diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile index ee249088cbfe..40b8fd375d52 100644 --- a/arch/x86/boot/compressed/Makefile +++ b/arch/x86/boot/compressed/Makefile @@ -61,7 +61,9 @@ KBUILD_LDFLAGS += $(call ld-option,--no-ld-generated-unwind-info) # Compressed kernel should be built as PIE since it may be loaded at any # address by the bootloader. LDFLAGS_vmlinux := -pie $(call ld-option, --no-dynamic-linker) -LDFLAGS_vmlinux += $(call ld-option, --orphan-handling=warn) +ifdef CONFIG_LD_ORPHAN_WARN +LDFLAGS_vmlinux += --orphan-handling=warn +endif LDFLAGS_vmlinux += -T hostprogs := mkpiggy diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S index 017de6cc87dc..e94874f4bbc1 100644 --- a/arch/x86/boot/compressed/head_64.S +++ b/arch/x86/boot/compressed/head_64.S @@ -241,12 +241,12 @@ SYM_FUNC_START(startup_32) leal rva(startup_64)(%ebp), %eax #ifdef CONFIG_EFI_MIXED movl rva(efi32_boot_args)(%ebp), %edi - cmp $0, %edi + testl %edi, %edi jz 1f leal rva(efi64_stub_entry)(%ebp), %eax movl rva(efi32_boot_args+4)(%ebp), %esi movl rva(efi32_boot_args+8)(%ebp), %edx // saved bootparams pointer - cmpl $0, %edx + testl %edx, %edx jnz 1f /* * efi_pe_entry uses MS calling convention, which requires 32 bytes of @@ -592,7 +592,7 @@ SYM_CODE_START(trampoline_32bit_src) movl %eax, %cr0 /* Check what paging mode we want to be in after the trampoline */ - cmpl $0, %edx + testl %edx, %edx jz 1f /* We want 5-level paging: don't touch CR3 if it already points to 5-level page tables */ @@ -622,7 +622,7 @@ SYM_CODE_START(trampoline_32bit_src) /* Enable PAE and LA57 (if required) paging modes */ movl $X86_CR4_PAE, %eax - cmpl $0, %edx + testl %edx, %edx jz 1f orl $X86_CR4_LA57, %eax 1: diff --git a/arch/x86/boot/compressed/ident_map_64.c b/arch/x86/boot/compressed/ident_map_64.c index a5e5db6ada3c..f7213d0943b8 100644 --- a/arch/x86/boot/compressed/ident_map_64.c +++ b/arch/x86/boot/compressed/ident_map_64.c @@ -164,16 +164,7 @@ void initialize_identity_maps(void *rmode) add_identity_map(cmdline, cmdline + COMMAND_LINE_SIZE); /* Load the new page-table. */ - write_cr3(top_level_pgt); -} - -/* - * This switches the page tables to the new level4 that has been built - * via calls to add_identity_map() above. If booted via startup_32(), - * this is effectively a no-op. - */ -void finalize_identity_maps(void) -{ + sev_verify_cbit(top_level_pgt); write_cr3(top_level_pgt); } diff --git a/arch/x86/boot/compressed/mem_encrypt.S b/arch/x86/boot/compressed/mem_encrypt.S index dd07e7b41b11..aa561795efd1 100644 --- a/arch/x86/boot/compressed/mem_encrypt.S +++ b/arch/x86/boot/compressed/mem_encrypt.S @@ -68,6 +68,9 @@ SYM_FUNC_START(get_sev_encryption_bit) SYM_FUNC_END(get_sev_encryption_bit) .code64 + +#include "../../kernel/sev_verify_cbit.S" + SYM_FUNC_START(set_sev_encryption_mask) #ifdef CONFIG_AMD_MEM_ENCRYPT push %rbp @@ -81,6 +84,19 @@ SYM_FUNC_START(set_sev_encryption_mask) bts %rax, sme_me_mask(%rip) /* Create the encryption mask */ + /* + * Read MSR_AMD64_SEV again and store it to sev_status. Can't do this in + * get_sev_encryption_bit() because this function is 32-bit code and + * shared between 64-bit and 32-bit boot path. + */ + movl $MSR_AMD64_SEV, %ecx /* Read the SEV MSR */ + rdmsr + + /* Store MSR value in sev_status */ + shlq $32, %rdx + orq %rdx, %rax + movq %rax, sev_status(%rip) + .Lno_sev_mask: movq %rbp, %rsp /* Restore original stack pointer */ @@ -96,5 +112,7 @@ SYM_FUNC_END(set_sev_encryption_mask) #ifdef CONFIG_AMD_MEM_ENCRYPT .balign 8 -SYM_DATA(sme_me_mask, .quad 0) +SYM_DATA(sme_me_mask, .quad 0) +SYM_DATA(sev_status, .quad 0) +SYM_DATA(sev_check_data, .quad 0) #endif diff --git a/arch/x86/boot/compressed/misc.h b/arch/x86/boot/compressed/misc.h index 6d31f1b4c4d1..d9a631c5973c 100644 --- a/arch/x86/boot/compressed/misc.h +++ b/arch/x86/boot/compressed/misc.h @@ -159,4 +159,6 @@ void boot_page_fault(void); void boot_stage1_vc(void); void boot_stage2_vc(void); +unsigned long sev_verify_cbit(unsigned long cr3); + #endif /* BOOT_COMPRESSED_MISC_H */ diff --git a/arch/x86/boot/compressed/sev-es.c b/arch/x86/boot/compressed/sev-es.c index 954cb2702e23..27826c265aab 100644 --- a/arch/x86/boot/compressed/sev-es.c +++ b/arch/x86/boot/compressed/sev-es.c @@ -32,13 +32,12 @@ struct ghcb *boot_ghcb; */ static bool insn_has_rep_prefix(struct insn *insn) { + insn_byte_t p; int i; insn_get_prefixes(insn); - for (i = 0; i < insn->prefixes.nbytes; i++) { - insn_byte_t p = insn->prefixes.bytes[i]; - + for_each_insn_prefix(insn, i, p) { if (p == 0xf2 || p == 0xf3) return true; } diff --git a/arch/x86/crypto/aes_glue.c b/arch/x86/crypto/aes_glue.c deleted file mode 100644 index 7b7dc05fa1a4..000000000000 --- a/arch/x86/crypto/aes_glue.c +++ /dev/null @@ -1 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only diff --git a/arch/x86/crypto/aesni-intel_asm.S b/arch/x86/crypto/aesni-intel_asm.S index 1852b19a73a0..d1436c37008b 100644 --- a/arch/x86/crypto/aesni-intel_asm.S +++ b/arch/x86/crypto/aesni-intel_asm.S @@ -318,7 +318,7 @@ _initial_blocks_\@: # Main loop - Encrypt/Decrypt remaining blocks - cmp $0, %r13 + test %r13, %r13 je _zero_cipher_left_\@ sub $64, %r13 je _four_cipher_left_\@ @@ -437,7 +437,7 @@ _multiple_of_16_bytes_\@: mov PBlockLen(%arg2), %r12 - cmp $0, %r12 + test %r12, %r12 je _partial_done\@ GHASH_MUL %xmm8, %xmm13, %xmm9, %xmm10, %xmm11, %xmm5, %xmm6 @@ -474,7 +474,7 @@ _T_8_\@: add $8, %r10 sub $8, %r11 psrldq $8, %xmm0 - cmp $0, %r11 + test %r11, %r11 je _return_T_done_\@ _T_4_\@: movd %xmm0, %eax @@ -482,7 +482,7 @@ _T_4_\@: add $4, %r10 sub $4, %r11 psrldq $4, %xmm0 - cmp $0, %r11 + test %r11, %r11 je _return_T_done_\@ _T_123_\@: movd %xmm0, %eax @@ -619,7 +619,7 @@ _get_AAD_blocks\@: /* read the last <16B of AAD */ _get_AAD_rest\@: - cmp $0, %r11 + test %r11, %r11 je _get_AAD_done\@ READ_PARTIAL_BLOCK %r10, %r11, \TMP1, \TMP7 @@ -640,7 +640,7 @@ _get_AAD_done\@: .macro PARTIAL_BLOCK CYPH_PLAIN_OUT PLAIN_CYPH_IN PLAIN_CYPH_LEN DATA_OFFSET \ AAD_HASH operation mov PBlockLen(%arg2), %r13 - cmp $0, %r13 + test %r13, %r13 je _partial_block_done_\@ # Leave Macro if no partial blocks # Read in input data without over reading cmp $16, \PLAIN_CYPH_LEN @@ -692,7 +692,7 @@ _no_extra_mask_1_\@: pshufb %xmm2, %xmm3 pxor %xmm3, \AAD_HASH - cmp $0, %r10 + test %r10, %r10 jl _partial_incomplete_1_\@ # GHASH computation for the last <16 Byte block @@ -727,7 +727,7 @@ _no_extra_mask_2_\@: pshufb %xmm2, %xmm9 pxor %xmm9, \AAD_HASH - cmp $0, %r10 + test %r10, %r10 jl _partial_incomplete_2_\@ # GHASH computation for the last <16 Byte block @@ -747,7 +747,7 @@ _encode_done_\@: pshufb %xmm2, %xmm9 .endif # output encrypted Bytes - cmp $0, %r10 + test %r10, %r10 jl _partial_fill_\@ mov %r13, %r12 mov $16, %r13 @@ -2720,7 +2720,7 @@ SYM_FUNC_END(aesni_ctr_enc) */ SYM_FUNC_START(aesni_xts_crypt8) FRAME_BEGIN - cmpb $0, %cl + testb %cl, %cl movl $0, %ecx movl $240, %r10d leaq _aesni_enc4, %r11 diff --git a/arch/x86/crypto/aesni-intel_avx-x86_64.S b/arch/x86/crypto/aesni-intel_avx-x86_64.S index 5fee47956f3b..2cf8e94d986a 100644 --- a/arch/x86/crypto/aesni-intel_avx-x86_64.S +++ b/arch/x86/crypto/aesni-intel_avx-x86_64.S @@ -369,7 +369,7 @@ _initial_num_blocks_is_0\@: _initial_blocks_encrypted\@: - cmp $0, %r13 + test %r13, %r13 je _zero_cipher_left\@ sub $128, %r13 @@ -528,7 +528,7 @@ _multiple_of_16_bytes\@: vmovdqu HashKey(arg2), %xmm13 mov PBlockLen(arg2), %r12 - cmp $0, %r12 + test %r12, %r12 je _partial_done\@ #GHASH computation for the last <16 Byte block @@ -573,7 +573,7 @@ _T_8\@: add $8, %r10 sub $8, %r11 vpsrldq $8, %xmm9, %xmm9 - cmp $0, %r11 + test %r11, %r11 je _return_T_done\@ _T_4\@: vmovd %xmm9, %eax @@ -581,7 +581,7 @@ _T_4\@: add $4, %r10 sub $4, %r11 vpsrldq $4, %xmm9, %xmm9 - cmp $0, %r11 + test %r11, %r11 je _return_T_done\@ _T_123\@: vmovd %xmm9, %eax @@ -625,7 +625,7 @@ _get_AAD_blocks\@: cmp $16, %r11 jge _get_AAD_blocks\@ vmovdqu \T8, \T7 - cmp $0, %r11 + test %r11, %r11 je _get_AAD_done\@ vpxor \T7, \T7, \T7 @@ -644,7 +644,7 @@ _get_AAD_rest8\@: vpxor \T1, \T7, \T7 jmp _get_AAD_rest8\@ _get_AAD_rest4\@: - cmp $0, %r11 + test %r11, %r11 jle _get_AAD_rest0\@ mov (%r10), %eax movq %rax, \T1 @@ -749,7 +749,7 @@ _done_read_partial_block_\@: .macro PARTIAL_BLOCK GHASH_MUL CYPH_PLAIN_OUT PLAIN_CYPH_IN PLAIN_CYPH_LEN DATA_OFFSET \ AAD_HASH ENC_DEC mov PBlockLen(arg2), %r13 - cmp $0, %r13 + test %r13, %r13 je _partial_block_done_\@ # Leave Macro if no partial blocks # Read in input data without over reading cmp $16, \PLAIN_CYPH_LEN @@ -801,7 +801,7 @@ _no_extra_mask_1_\@: vpshufb %xmm2, %xmm3, %xmm3 vpxor %xmm3, \AAD_HASH, \AAD_HASH - cmp $0, %r10 + test %r10, %r10 jl _partial_incomplete_1_\@ # GHASH computation for the last <16 Byte block @@ -836,7 +836,7 @@ _no_extra_mask_2_\@: vpshufb %xmm2, %xmm9, %xmm9 vpxor %xmm9, \AAD_HASH, \AAD_HASH - cmp $0, %r10 + test %r10, %r10 jl _partial_incomplete_2_\@ # GHASH computation for the last <16 Byte block @@ -856,7 +856,7 @@ _encode_done_\@: vpshufb %xmm2, %xmm9, %xmm9 .endif # output encrypted Bytes - cmp $0, %r10 + test %r10, %r10 jl _partial_fill_\@ mov %r13, %r12 mov $16, %r13 diff --git a/arch/x86/crypto/poly1305-x86_64-cryptogams.pl b/arch/x86/crypto/poly1305-x86_64-cryptogams.pl index 7d568012cc15..71fae5a09e56 100644 --- a/arch/x86/crypto/poly1305-x86_64-cryptogams.pl +++ b/arch/x86/crypto/poly1305-x86_64-cryptogams.pl @@ -251,7 +251,7 @@ $code.=<<___; mov %rax,8($ctx) mov %rax,16($ctx) - cmp \$0,$inp + test $inp,$inp je .Lno_key ___ $code.=<<___ if (!$kernel); diff --git a/arch/x86/crypto/poly1305_glue.c b/arch/x86/crypto/poly1305_glue.c index e508dbd91813..646da46e8d10 100644 --- a/arch/x86/crypto/poly1305_glue.c +++ b/arch/x86/crypto/poly1305_glue.c @@ -158,6 +158,7 @@ static unsigned int crypto_poly1305_setdctxkey(struct poly1305_desc_ctx *dctx, dctx->s[1] = get_unaligned_le32(&inp[4]); dctx->s[2] = get_unaligned_le32(&inp[8]); dctx->s[3] = get_unaligned_le32(&inp[12]); + acc += POLY1305_BLOCK_SIZE; dctx->sset = true; } } @@ -209,7 +210,7 @@ void poly1305_final_arch(struct poly1305_desc_ctx *dctx, u8 *dst) } poly1305_simd_emit(&dctx->h, dst, dctx->s); - *dctx = (struct poly1305_desc_ctx){}; + memzero_explicit(dctx, sizeof(*dctx)); } EXPORT_SYMBOL(poly1305_final_arch); diff --git a/arch/x86/crypto/sha1_ssse3_glue.c b/arch/x86/crypto/sha1_ssse3_glue.c index 18200135603f..44340a1139e0 100644 --- a/arch/x86/crypto/sha1_ssse3_glue.c +++ b/arch/x86/crypto/sha1_ssse3_glue.c @@ -22,7 +22,7 @@ #include <linux/module.h> #include <linux/mm.h> #include <linux/types.h> -#include <crypto/sha.h> +#include <crypto/sha1.h> #include <crypto/sha1_base.h> #include <asm/simd.h> diff --git a/arch/x86/crypto/sha256_ssse3_glue.c b/arch/x86/crypto/sha256_ssse3_glue.c index dd06249229e1..3a5f6be7dbba 100644 --- a/arch/x86/crypto/sha256_ssse3_glue.c +++ b/arch/x86/crypto/sha256_ssse3_glue.c @@ -35,7 +35,7 @@ #include <linux/module.h> #include <linux/mm.h> #include <linux/types.h> -#include <crypto/sha.h> +#include <crypto/sha2.h> #include <crypto/sha256_base.h> #include <linux/string.h> #include <asm/simd.h> diff --git a/arch/x86/crypto/sha512-avx-asm.S b/arch/x86/crypto/sha512-avx-asm.S index 63470fd6ae32..684d58c8bc4f 100644 --- a/arch/x86/crypto/sha512-avx-asm.S +++ b/arch/x86/crypto/sha512-avx-asm.S @@ -278,7 +278,7 @@ frame_size = frame_GPRSAVE + GPRSAVE_SIZE # "blocks" is the message length in SHA512 blocks ######################################################################## SYM_FUNC_START(sha512_transform_avx) - cmp $0, msglen + test msglen, msglen je nowork # Allocate Stack Space diff --git a/arch/x86/crypto/sha512-ssse3-asm.S b/arch/x86/crypto/sha512-ssse3-asm.S index 7946a1bee85b..50812af0b083 100644 --- a/arch/x86/crypto/sha512-ssse3-asm.S +++ b/arch/x86/crypto/sha512-ssse3-asm.S @@ -280,7 +280,7 @@ frame_size = frame_GPRSAVE + GPRSAVE_SIZE ######################################################################## SYM_FUNC_START(sha512_transform_ssse3) - cmp $0, msglen + test msglen, msglen je nowork # Allocate Stack Space diff --git a/arch/x86/crypto/sha512_ssse3_glue.c b/arch/x86/crypto/sha512_ssse3_glue.c index b0b05c93409e..30e70f4fe2f7 100644 --- a/arch/x86/crypto/sha512_ssse3_glue.c +++ b/arch/x86/crypto/sha512_ssse3_glue.c @@ -34,7 +34,7 @@ #include <linux/mm.h> #include <linux/string.h> #include <linux/types.h> -#include <crypto/sha.h> +#include <crypto/sha2.h> #include <crypto/sha512_base.h> #include <asm/simd.h> diff --git a/arch/x86/entry/syscalls/syscall_64.tbl b/arch/x86/entry/syscalls/syscall_64.tbl index 1f47e24fb65c..379819244b91 100644 --- a/arch/x86/entry/syscalls/syscall_64.tbl +++ b/arch/x86/entry/syscalls/syscall_64.tbl @@ -364,10 +364,10 @@ 440 common process_madvise sys_process_madvise # -# x32-specific system call numbers start at 512 to avoid cache impact -# for native 64-bit operation. The __x32_compat_sys stubs are created -# on-the-fly for compat_sys_*() compatibility system calls if X86_X32 -# is defined. +# Due to a historical design error, certain syscalls are numbered differently +# in x32 as compared to native x86_64. These syscalls have numbers 512-547. +# Do not add new syscalls to this range. Numbers 548 and above are available +# for non-x32 use. # 512 x32 rt_sigaction compat_sys_rt_sigaction 513 x32 rt_sigreturn compat_sys_x32_rt_sigreturn @@ -405,3 +405,5 @@ 545 x32 execveat compat_sys_execveat 546 x32 preadv2 compat_sys_preadv64v2 547 x32 pwritev2 compat_sys_pwritev64v2 +# This is the end of the legacy x32 range. Numbers 548 and above are +# not special and are not to be used for x32-specific syscalls. diff --git a/arch/x86/entry/vdso/Makefile b/arch/x86/entry/vdso/Makefile index 21243747965d..02e3e42f380b 100644 --- a/arch/x86/entry/vdso/Makefile +++ b/arch/x86/entry/vdso/Makefile @@ -27,9 +27,10 @@ VDSO32-$(CONFIG_IA32_EMULATION) := y vobjs-y := vdso-note.o vclock_gettime.o vgetcpu.o vobjs32-y := vdso32/note.o vdso32/system_call.o vdso32/sigreturn.o vobjs32-y += vdso32/vclock_gettime.o +vobjs-$(CONFIG_X86_SGX) += vsgx.o # files to link into kernel -obj-y += vma.o +obj-y += vma.o extable.o KASAN_SANITIZE_vma.o := y UBSAN_SANITIZE_vma.o := y KCSAN_SANITIZE_vma.o := y @@ -98,6 +99,7 @@ $(vobjs): KBUILD_CFLAGS := $(filter-out $(GCC_PLUGINS_CFLAGS) $(RETPOLINE_CFLAGS CFLAGS_REMOVE_vclock_gettime.o = -pg CFLAGS_REMOVE_vdso32/vclock_gettime.o = -pg CFLAGS_REMOVE_vgetcpu.o = -pg +CFLAGS_REMOVE_vsgx.o = -pg # # X32 processes use x32 vDSO to access 64bit kernel data. @@ -128,8 +130,8 @@ $(obj)/%-x32.o: $(obj)/%.o FORCE targets += vdsox32.lds $(vobjx32s-y) -$(obj)/%.so: OBJCOPYFLAGS := -S -$(obj)/%.so: $(obj)/%.so.dbg FORCE +$(obj)/%.so: OBJCOPYFLAGS := -S --remove-section __ex_table +$(obj)/%.so: $(obj)/%.so.dbg $(call if_changed,objcopy) $(obj)/vdsox32.so.dbg: $(obj)/vdsox32.lds $(vobjx32s) FORCE diff --git a/arch/x86/entry/vdso/extable.c b/arch/x86/entry/vdso/extable.c new file mode 100644 index 000000000000..afcf5b65beef --- /dev/null +++ b/arch/x86/entry/vdso/extable.c @@ -0,0 +1,46 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <linux/err.h> +#include <linux/mm.h> +#include <asm/current.h> +#include <asm/traps.h> +#include <asm/vdso.h> + +struct vdso_exception_table_entry { + int insn, fixup; +}; + +bool fixup_vdso_exception(struct pt_regs *regs, int trapnr, + unsigned long error_code, unsigned long fault_addr) +{ + const struct vdso_image *image = current->mm->context.vdso_image; + const struct vdso_exception_table_entry *extable; + unsigned int nr_entries, i; + unsigned long base; + + /* + * Do not attempt to fixup #DB or #BP. It's impossible to identify + * whether or not a #DB/#BP originated from within an SGX enclave and + * SGX enclaves are currently the only use case for vDSO fixup. + */ + if (trapnr == X86_TRAP_DB || trapnr == X86_TRAP_BP) + return false; + + if (!current->mm->context.vdso) + return false; + + base = (unsigned long)current->mm->context.vdso + image->extable_base; + nr_entries = image->extable_len / (sizeof(*extable)); + extable = image->extable; + + for (i = 0; i < nr_entries; i++) { + if (regs->ip == base + extable[i].insn) { + regs->ip = base + extable[i].fixup; + regs->di = trapnr; + regs->si = error_code; + regs->dx = fault_addr; + return true; + } + } + + return false; +} diff --git a/arch/x86/entry/vdso/extable.h b/arch/x86/entry/vdso/extable.h new file mode 100644 index 000000000000..b56f6b012941 --- /dev/null +++ b/arch/x86/entry/vdso/extable.h @@ -0,0 +1,28 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __VDSO_EXTABLE_H +#define __VDSO_EXTABLE_H + +/* + * Inject exception fixup for vDSO code. Unlike normal exception fixup, + * vDSO uses a dedicated handler the addresses are relative to the overall + * exception table, not each individual entry. + */ +#ifdef __ASSEMBLY__ +#define _ASM_VDSO_EXTABLE_HANDLE(from, to) \ + ASM_VDSO_EXTABLE_HANDLE from to + +.macro ASM_VDSO_EXTABLE_HANDLE from:req to:req + .pushsection __ex_table, "a" + .long (\from) - __ex_table + .long (\to) - __ex_table + .popsection +.endm +#else +#define _ASM_VDSO_EXTABLE_HANDLE(from, to) \ + ".pushsection __ex_table, \"a\"\n" \ + ".long (" #from ") - __ex_table\n" \ + ".long (" #to ") - __ex_table\n" \ + ".popsection\n" +#endif + +#endif /* __VDSO_EXTABLE_H */ diff --git a/arch/x86/entry/vdso/vdso-layout.lds.S b/arch/x86/entry/vdso/vdso-layout.lds.S index 4d152933547d..dc8da7695859 100644 --- a/arch/x86/entry/vdso/vdso-layout.lds.S +++ b/arch/x86/entry/vdso/vdso-layout.lds.S @@ -75,11 +75,18 @@ SECTIONS * stuff that isn't used at runtime in between. */ - .text : { *(.text*) } :text =0x90909090, + .text : { + *(.text*) + *(.fixup) + } :text =0x90909090, + + .altinstructions : { *(.altinstructions) } :text .altinstr_replacement : { *(.altinstr_replacement) } :text + __ex_table : { *(__ex_table) } :text + /DISCARD/ : { *(.discard) *(.discard.*) diff --git a/arch/x86/entry/vdso/vdso.lds.S b/arch/x86/entry/vdso/vdso.lds.S index 36b644e16272..4bf48462fca7 100644 --- a/arch/x86/entry/vdso/vdso.lds.S +++ b/arch/x86/entry/vdso/vdso.lds.S @@ -27,6 +27,7 @@ VERSION { __vdso_time; clock_getres; __vdso_clock_getres; + __vdso_sgx_enter_enclave; local: *; }; } diff --git a/arch/x86/entry/vdso/vdso2c.h b/arch/x86/entry/vdso/vdso2c.h index 6f46e11ce539..1c7cfac7e64a 100644 --- a/arch/x86/entry/vdso/vdso2c.h +++ b/arch/x86/entry/vdso/vdso2c.h @@ -5,6 +5,41 @@ * are built for 32-bit userspace. */ +static void BITSFUNC(copy)(FILE *outfile, const unsigned char *data, size_t len) +{ + size_t i; + + for (i = 0; i < len; i++) { + if (i % 10 == 0) + fprintf(outfile, "\n\t"); + fprintf(outfile, "0x%02X, ", (int)(data)[i]); + } +} + + +/* + * Extract a section from the input data into a standalone blob. Used to + * capture kernel-only data that needs to persist indefinitely, e.g. the + * exception fixup tables, but only in the kernel, i.e. the section can + * be stripped from the final vDSO image. + */ +static void BITSFUNC(extract)(const unsigned char *data, size_t data_len, + FILE *outfile, ELF(Shdr) *sec, const char *name) +{ + unsigned long offset; + size_t len; + + offset = (unsigned long)GET_LE(&sec->sh_offset); + len = (size_t)GET_LE(&sec->sh_size); + + if (offset + len > data_len) + fail("section to extract overruns input data"); + + fprintf(outfile, "static const unsigned char %s[%lu] = {", name, len); + BITSFUNC(copy)(outfile, data + offset, len); + fprintf(outfile, "\n};\n\n"); +} + static void BITSFUNC(go)(void *raw_addr, size_t raw_len, void *stripped_addr, size_t stripped_len, FILE *outfile, const char *image_name) @@ -15,7 +50,7 @@ static void BITSFUNC(go)(void *raw_addr, size_t raw_len, ELF(Ehdr) *hdr = (ELF(Ehdr) *)raw_addr; unsigned long i, syms_nr; ELF(Shdr) *symtab_hdr = NULL, *strtab_hdr, *secstrings_hdr, - *alt_sec = NULL; + *alt_sec = NULL, *extable_sec = NULL; ELF(Dyn) *dyn = 0, *dyn_end = 0; const char *secstrings; INT_BITS syms[NSYMS] = {}; @@ -77,6 +112,8 @@ static void BITSFUNC(go)(void *raw_addr, size_t raw_len, if (!strcmp(secstrings + GET_LE(&sh->sh_name), ".altinstructions")) alt_sec = sh; + if (!strcmp(secstrings + GET_LE(&sh->sh_name), "__ex_table")) + extable_sec = sh; } if (!symtab_hdr) @@ -155,6 +192,9 @@ static void BITSFUNC(go)(void *raw_addr, size_t raw_len, (int)((unsigned char *)stripped_addr)[i]); } fprintf(outfile, "\n};\n\n"); + if (extable_sec) + BITSFUNC(extract)(raw_addr, raw_len, outfile, + extable_sec, "extable"); fprintf(outfile, "const struct vdso_image %s = {\n", image_name); fprintf(outfile, "\t.data = raw_data,\n"); @@ -165,6 +205,14 @@ static void BITSFUNC(go)(void *raw_addr, size_t raw_len, fprintf(outfile, "\t.alt_len = %lu,\n", (unsigned long)GET_LE(&alt_sec->sh_size)); } + if (extable_sec) { + fprintf(outfile, "\t.extable_base = %lu,\n", + (unsigned long)GET_LE(&extable_sec->sh_offset)); + fprintf(outfile, "\t.extable_len = %lu,\n", + (unsigned long)GET_LE(&extable_sec->sh_size)); + fprintf(outfile, "\t.extable = extable,\n"); + } + for (i = 0; i < NSYMS; i++) { if (required_syms[i].export && syms[i]) fprintf(outfile, "\t.sym_%s = %" PRIi64 ",\n", diff --git a/arch/x86/entry/vdso/vma.c b/arch/x86/entry/vdso/vma.c index 9185cb1d13b9..50e5d3a2e70a 100644 --- a/arch/x86/entry/vdso/vma.c +++ b/arch/x86/entry/vdso/vma.c @@ -413,10 +413,10 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) #ifdef CONFIG_COMPAT int compat_arch_setup_additional_pages(struct linux_binprm *bprm, - int uses_interp) + int uses_interp, bool x32) { #ifdef CONFIG_X86_X32_ABI - if (test_thread_flag(TIF_X32)) { + if (x32) { if (!vdso64_enabled) return 0; return map_vdso_randomized(&vdso_image_x32); diff --git a/arch/x86/entry/vdso/vsgx.S b/arch/x86/entry/vdso/vsgx.S new file mode 100644 index 000000000000..86a0e94f68df --- /dev/null +++ b/arch/x86/entry/vdso/vsgx.S @@ -0,0 +1,151 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#include <linux/linkage.h> +#include <asm/export.h> +#include <asm/errno.h> +#include <asm/enclu.h> + +#include "extable.h" + +/* Relative to %rbp. */ +#define SGX_ENCLAVE_OFFSET_OF_RUN 16 + +/* The offsets relative to struct sgx_enclave_run. */ +#define SGX_ENCLAVE_RUN_TCS 0 +#define SGX_ENCLAVE_RUN_LEAF 8 +#define SGX_ENCLAVE_RUN_EXCEPTION_VECTOR 12 +#define SGX_ENCLAVE_RUN_EXCEPTION_ERROR_CODE 14 +#define SGX_ENCLAVE_RUN_EXCEPTION_ADDR 16 +#define SGX_ENCLAVE_RUN_USER_HANDLER 24 +#define SGX_ENCLAVE_RUN_USER_DATA 32 /* not used */ +#define SGX_ENCLAVE_RUN_RESERVED_START 40 +#define SGX_ENCLAVE_RUN_RESERVED_END 256 + +.code64 +.section .text, "ax" + +SYM_FUNC_START(__vdso_sgx_enter_enclave) + /* Prolog */ + .cfi_startproc + push %rbp + .cfi_adjust_cfa_offset 8 + .cfi_rel_offset %rbp, 0 + mov %rsp, %rbp + .cfi_def_cfa_register %rbp + push %rbx + .cfi_rel_offset %rbx, -8 + + mov %ecx, %eax +.Lenter_enclave: + /* EENTER <= function <= ERESUME */ + cmp $EENTER, %eax + jb .Linvalid_input + cmp $ERESUME, %eax + ja .Linvalid_input + + mov SGX_ENCLAVE_OFFSET_OF_RUN(%rbp), %rcx + + /* Validate that the reserved area contains only zeros. */ + mov $SGX_ENCLAVE_RUN_RESERVED_START, %rbx +1: + cmpq $0, (%rcx, %rbx) + jne .Linvalid_input + add $8, %rbx + cmpq $SGX_ENCLAVE_RUN_RESERVED_END, %rbx + jne 1b + + /* Load TCS and AEP */ + mov SGX_ENCLAVE_RUN_TCS(%rcx), %rbx + lea .Lasync_exit_pointer(%rip), %rcx + + /* Single ENCLU serving as both EENTER and AEP (ERESUME) */ +.Lasync_exit_pointer: +.Lenclu_eenter_eresume: + enclu + + /* EEXIT jumps here unless the enclave is doing something fancy. */ + mov SGX_ENCLAVE_OFFSET_OF_RUN(%rbp), %rbx + + /* Set exit_reason. */ + movl $EEXIT, SGX_ENCLAVE_RUN_LEAF(%rbx) + + /* Invoke userspace's exit handler if one was provided. */ +.Lhandle_exit: + cmpq $0, SGX_ENCLAVE_RUN_USER_HANDLER(%rbx) + jne .Linvoke_userspace_handler + + /* Success, in the sense that ENCLU was attempted. */ + xor %eax, %eax + +.Lout: + pop %rbx + leave + .cfi_def_cfa %rsp, 8 + ret + + /* The out-of-line code runs with the pre-leave stack frame. */ + .cfi_def_cfa %rbp, 16 + +.Linvalid_input: + mov $(-EINVAL), %eax + jmp .Lout + +.Lhandle_exception: + mov SGX_ENCLAVE_OFFSET_OF_RUN(%rbp), %rbx + + /* Set the exception info. */ + mov %eax, (SGX_ENCLAVE_RUN_LEAF)(%rbx) + mov %di, (SGX_ENCLAVE_RUN_EXCEPTION_VECTOR)(%rbx) + mov %si, (SGX_ENCLAVE_RUN_EXCEPTION_ERROR_CODE)(%rbx) + mov %rdx, (SGX_ENCLAVE_RUN_EXCEPTION_ADDR)(%rbx) + jmp .Lhandle_exit + +.Linvoke_userspace_handler: + /* Pass the untrusted RSP (at exit) to the callback via %rcx. */ + mov %rsp, %rcx + + /* Save struct sgx_enclave_exception %rbx is about to be clobbered. */ + mov %rbx, %rax + + /* Save the untrusted RSP offset in %rbx (non-volatile register). */ + mov %rsp, %rbx + and $0xf, %rbx + + /* + * Align stack per x86_64 ABI. Note, %rsp needs to be 16-byte aligned + * _after_ pushing the parameters on the stack, hence the bonus push. + */ + and $-0x10, %rsp + push %rax + + /* Push struct sgx_enclave_exception as a param to the callback. */ + push %rax + + /* Clear RFLAGS.DF per x86_64 ABI */ + cld + + /* + * Load the callback pointer to %rax and lfence for LVI (load value + * injection) protection before making the call. + */ + mov SGX_ENCLAVE_RUN_USER_HANDLER(%rax), %rax + lfence + call *%rax + + /* Undo the post-exit %rsp adjustment. */ + lea 0x10(%rsp, %rbx), %rsp + + /* + * If the return from callback is zero or negative, return immediately, + * else re-execute ENCLU with the postive return value interpreted as + * the requested ENCLU function. + */ + cmp $0, %eax + jle .Lout + jmp .Lenter_enclave + + .cfi_endproc + +_ASM_VDSO_EXTABLE_HANDLE(.Lenclu_eenter_eresume, .Lhandle_exception) + +SYM_FUNC_END(__vdso_sgx_enter_enclave) diff --git a/arch/x86/entry/vsyscall/vsyscall_64.c b/arch/x86/entry/vsyscall/vsyscall_64.c index 44c33103a955..1b40b9297083 100644 --- a/arch/x86/entry/vsyscall/vsyscall_64.c +++ b/arch/x86/entry/vsyscall/vsyscall_64.c @@ -316,7 +316,7 @@ static struct vm_area_struct gate_vma __ro_after_init = { struct vm_area_struct *get_gate_vma(struct mm_struct *mm) { #ifdef CONFIG_COMPAT - if (!mm || mm->context.ia32_compat) + if (!mm || !(mm->context.flags & MM_CONTEXT_HAS_VSYSCALL)) return NULL; #endif if (vsyscall_mode == NONE) diff --git a/arch/x86/events/amd/core.c b/arch/x86/events/amd/core.c index 39eb276d0277..2c1791c4a518 100644 --- a/arch/x86/events/amd/core.c +++ b/arch/x86/events/amd/core.c @@ -538,7 +538,7 @@ static void amd_pmu_cpu_starting(int cpu) if (!x86_pmu.amd_nb_constraints) return; - nb_id = amd_get_nb_id(cpu); + nb_id = topology_die_id(cpu); WARN_ON_ONCE(nb_id == BAD_APICID); for_each_online_cpu(i) { diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c index a88c94d65693..77b963e5e70a 100644 --- a/arch/x86/events/core.c +++ b/arch/x86/events/core.c @@ -2602,7 +2602,7 @@ perf_callchain_user32(struct pt_regs *regs, struct perf_callchain_entry_ctx *ent struct stack_frame_ia32 frame; const struct stack_frame_ia32 __user *fp; - if (!test_thread_flag(TIF_IA32)) + if (user_64bit_mode(regs)) return 0; cs_base = get_segment_base(regs->cs); diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index f1926e9f2143..af457f8cb29d 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -2630,7 +2630,7 @@ static int handle_pmi_common(struct pt_regs *regs, u64 status) u64 pebs_enabled = cpuc->pebs_enabled; handled++; - x86_pmu.drain_pebs(regs); + x86_pmu.drain_pebs(regs, &data); status &= x86_pmu.intel_ctrl | GLOBAL_STATUS_TRACE_TOPAPMI; /* @@ -4987,6 +4987,12 @@ __init int intel_pmu_init(void) x86_add_quirk(intel_arch_events_quirk); /* Install first, so it runs last */ + if (version >= 5) { + x86_pmu.intel_cap.anythread_deprecated = edx.split.anythread_deprecated; + if (x86_pmu.intel_cap.anythread_deprecated) + pr_cont(" AnyThread deprecated, "); + } + /* * Install the hw-cache-events table: */ @@ -5512,6 +5518,10 @@ __init int intel_pmu_init(void) x86_pmu.intel_ctrl |= ((1LL << x86_pmu.num_counters_fixed)-1) << INTEL_PMC_IDX_FIXED; + /* AnyThread may be deprecated on arch perfmon v5 or later */ + if (x86_pmu.intel_cap.anythread_deprecated) + x86_pmu.format_attrs = intel_arch_formats_attr; + if (x86_pmu.event_constraints) { /* * event on fixed counter2 (REF_CYCLES) only works on this diff --git a/arch/x86/events/intel/cstate.c b/arch/x86/events/intel/cstate.c index 442e1ed4acd4..4eb7ee5fed72 100644 --- a/arch/x86/events/intel/cstate.c +++ b/arch/x86/events/intel/cstate.c @@ -107,14 +107,14 @@ MODULE_LICENSE("GPL"); #define DEFINE_CSTATE_FORMAT_ATTR(_var, _name, _format) \ -static ssize_t __cstate_##_var##_show(struct kobject *kobj, \ - struct kobj_attribute *attr, \ +static ssize_t __cstate_##_var##_show(struct device *dev, \ + struct device_attribute *attr, \ char *page) \ { \ BUILD_BUG_ON(sizeof(_format) >= PAGE_SIZE); \ return sprintf(page, _format "\n"); \ } \ -static struct kobj_attribute format_attr_##_var = \ +static struct device_attribute format_attr_##_var = \ __ATTR(_name, 0444, __cstate_##_var##_show, NULL) static ssize_t cstate_get_attr_cpumask(struct device *dev, diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c index 404315df1e16..a5c6ff5f1b4c 100644 --- a/arch/x86/events/intel/ds.c +++ b/arch/x86/events/intel/ds.c @@ -642,8 +642,8 @@ int intel_pmu_drain_bts_buffer(void) rcu_read_lock(); perf_prepare_sample(&header, &data, event, ®s); - if (perf_output_begin(&handle, event, header.size * - (top - base - skip))) + if (perf_output_begin(&handle, &data, event, + header.size * (top - base - skip))) goto unlock; for (at = base; at < top; at++) { @@ -670,7 +670,9 @@ unlock: static inline void intel_pmu_drain_pebs_buffer(void) { - x86_pmu.drain_pebs(NULL); + struct perf_sample_data data; + + x86_pmu.drain_pebs(NULL, &data); } /* @@ -1259,7 +1261,7 @@ static int intel_pmu_pebs_fixup_ip(struct pt_regs *regs) old_to = to; #ifdef CONFIG_X86_64 - is_64bit = kernel_ip(to) || !test_thread_flag(TIF_IA32); + is_64bit = kernel_ip(to) || any_64bit_mode(regs); #endif insn_init(&insn, kaddr, size, is_64bit); insn_get_length(&insn); @@ -1719,23 +1721,24 @@ intel_pmu_save_and_restart_reload(struct perf_event *event, int count) return 0; } -static void __intel_pmu_pebs_event(struct perf_event *event, - struct pt_regs *iregs, - void *base, void *top, - int bit, int count, - void (*setup_sample)(struct perf_event *, - struct pt_regs *, - void *, - struct perf_sample_data *, - struct pt_regs *)) +static __always_inline void +__intel_pmu_pebs_event(struct perf_event *event, + struct pt_regs *iregs, + struct perf_sample_data *data, + void *base, void *top, + int bit, int count, + void (*setup_sample)(struct perf_event *, + struct pt_regs *, + void *, + struct perf_sample_data *, + struct pt_regs *)) { struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); struct hw_perf_event *hwc = &event->hw; - struct perf_sample_data data; struct x86_perf_regs perf_regs; struct pt_regs *regs = &perf_regs.regs; void *at = get_next_pebs_record_by_bit(base, top, bit); - struct pt_regs dummy_iregs; + static struct pt_regs dummy_iregs; if (hwc->flags & PERF_X86_EVENT_AUTO_RELOAD) { /* @@ -1752,14 +1755,14 @@ static void __intel_pmu_pebs_event(struct perf_event *event, iregs = &dummy_iregs; while (count > 1) { - setup_sample(event, iregs, at, &data, regs); - perf_event_output(event, &data, regs); + setup_sample(event, iregs, at, data, regs); + perf_event_output(event, data, regs); at += cpuc->pebs_record_size; at = get_next_pebs_record_by_bit(at, top, bit); count--; } - setup_sample(event, iregs, at, &data, regs); + setup_sample(event, iregs, at, data, regs); if (iregs == &dummy_iregs) { /* * The PEBS records may be drained in the non-overflow context, @@ -1767,18 +1770,18 @@ static void __intel_pmu_pebs_event(struct perf_event *event, * last record the same as other PEBS records, and doesn't * invoke the generic overflow handler. */ - perf_event_output(event, &data, regs); + perf_event_output(event, data, regs); } else { /* * All but the last records are processed. * The last one is left to be able to call the overflow handler. */ - if (perf_event_overflow(event, &data, regs)) + if (perf_event_overflow(event, data, regs)) x86_pmu_stop(event, 0); } } -static void intel_pmu_drain_pebs_core(struct pt_regs *iregs) +static void intel_pmu_drain_pebs_core(struct pt_regs *iregs, struct perf_sample_data *data) { struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); struct debug_store *ds = cpuc->ds; @@ -1812,7 +1815,7 @@ static void intel_pmu_drain_pebs_core(struct pt_regs *iregs) return; } - __intel_pmu_pebs_event(event, iregs, at, top, 0, n, + __intel_pmu_pebs_event(event, iregs, data, at, top, 0, n, setup_pebs_fixed_sample_data); } @@ -1835,7 +1838,7 @@ static void intel_pmu_pebs_event_update_no_drain(struct cpu_hw_events *cpuc, int } } -static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs) +static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs, struct perf_sample_data *data) { struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); struct debug_store *ds = cpuc->ds; @@ -1913,7 +1916,7 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs) * that caused the PEBS record. It's called collision. * If collision happened, the record will be dropped. */ - if (p->status != (1ULL << bit)) { + if (pebs_status != (1ULL << bit)) { for_each_set_bit(i, (unsigned long *)&pebs_status, size) error[i]++; continue; @@ -1937,19 +1940,19 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs) if (error[bit]) { perf_log_lost_samples(event, error[bit]); - if (perf_event_account_interrupt(event)) + if (iregs && perf_event_account_interrupt(event)) x86_pmu_stop(event, 0); } if (counts[bit]) { - __intel_pmu_pebs_event(event, iregs, base, + __intel_pmu_pebs_event(event, iregs, data, base, top, bit, counts[bit], setup_pebs_fixed_sample_data); } } } -static void intel_pmu_drain_pebs_icl(struct pt_regs *iregs) +static void intel_pmu_drain_pebs_icl(struct pt_regs *iregs, struct perf_sample_data *data) { short counts[INTEL_PMC_IDX_FIXED + MAX_FIXED_PEBS_EVENTS] = {}; struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); @@ -1997,7 +2000,7 @@ static void intel_pmu_drain_pebs_icl(struct pt_regs *iregs) if (WARN_ON_ONCE(!event->attr.precise_ip)) continue; - __intel_pmu_pebs_event(event, iregs, base, + __intel_pmu_pebs_event(event, iregs, data, base, top, bit, counts[bit], setup_pebs_adaptive_sample_data); } diff --git a/arch/x86/events/intel/lbr.c b/arch/x86/events/intel/lbr.c index 8961653c5dd2..1aadb253d296 100644 --- a/arch/x86/events/intel/lbr.c +++ b/arch/x86/events/intel/lbr.c @@ -1221,7 +1221,7 @@ static int branch_type(unsigned long from, unsigned long to, int abort) * on 64-bit systems running 32-bit apps */ #ifdef CONFIG_X86_64 - is64 = kernel_ip((unsigned long)addr) || !test_thread_flag(TIF_IA32); + is64 = kernel_ip((unsigned long)addr) || any_64bit_mode(current_pt_regs()); #endif insn_init(&insn, addr, bytes_read, is64); insn_get_opcode(&insn); diff --git a/arch/x86/events/intel/uncore.c b/arch/x86/events/intel/uncore.c index 86d012b3e0b4..80d52cbe2fde 100644 --- a/arch/x86/events/intel/uncore.c +++ b/arch/x86/events/intel/uncore.c @@ -94,8 +94,8 @@ end: return map; } -ssize_t uncore_event_show(struct kobject *kobj, - struct kobj_attribute *attr, char *buf) +ssize_t uncore_event_show(struct device *dev, + struct device_attribute *attr, char *buf) { struct uncore_event_desc *event = container_of(attr, struct uncore_event_desc, attr); diff --git a/arch/x86/events/intel/uncore.h b/arch/x86/events/intel/uncore.h index 83d2a7d490e0..9efea154349d 100644 --- a/arch/x86/events/intel/uncore.h +++ b/arch/x86/events/intel/uncore.h @@ -157,7 +157,7 @@ struct intel_uncore_box { #define UNCORE_BOX_FLAG_CFL8_CBOX_MSR_OFFS 2 struct uncore_event_desc { - struct kobj_attribute attr; + struct device_attribute attr; const char *config; }; @@ -179,8 +179,8 @@ struct pci2phy_map { struct pci2phy_map *__find_pci2phy_map(int segment); int uncore_pcibus_to_physid(struct pci_bus *bus); -ssize_t uncore_event_show(struct kobject *kobj, - struct kobj_attribute *attr, char *buf); +ssize_t uncore_event_show(struct device *dev, + struct device_attribute *attr, char *buf); static inline struct intel_uncore_pmu *dev_to_uncore_pmu(struct device *dev) { @@ -201,14 +201,14 @@ extern int __uncore_max_dies; } #define DEFINE_UNCORE_FORMAT_ATTR(_var, _name, _format) \ -static ssize_t __uncore_##_var##_show(struct kobject *kobj, \ - struct kobj_attribute *attr, \ +static ssize_t __uncore_##_var##_show(struct device *dev, \ + struct device_attribute *attr, \ char *page) \ { \ BUILD_BUG_ON(sizeof(_format) >= PAGE_SIZE); \ return sprintf(page, _format "\n"); \ } \ -static struct kobj_attribute format_attr_##_var = \ +static struct device_attribute format_attr_##_var = \ __ATTR(_name, 0444, __uncore_##_var##_show, NULL) static inline bool uncore_pmc_fixed(int idx) diff --git a/arch/x86/events/intel/uncore_snb.c b/arch/x86/events/intel/uncore_snb.c index 39e632ed6ca9..bbd1120ae161 100644 --- a/arch/x86/events/intel/uncore_snb.c +++ b/arch/x86/events/intel/uncore_snb.c @@ -475,7 +475,7 @@ enum perf_snb_uncore_imc_freerunning_types { static struct freerunning_counters snb_uncore_imc_freerunning[] = { [SNB_PCI_UNCORE_IMC_DATA_READS] = { SNB_UNCORE_PCI_IMC_DATA_READS_BASE, 0x0, 0x0, 1, 32 }, - [SNB_PCI_UNCORE_IMC_DATA_READS] = { SNB_UNCORE_PCI_IMC_DATA_WRITES_BASE, + [SNB_PCI_UNCORE_IMC_DATA_WRITES] = { SNB_UNCORE_PCI_IMC_DATA_WRITES_BASE, 0x0, 0x0, 1, 32 }, [SNB_PCI_UNCORE_IMC_GT_REQUESTS] = { SNB_UNCORE_PCI_IMC_GT_REQUESTS_BASE, 0x0, 0x0, 1, 32 }, diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h index ee2b9b9fc2a5..6a8edfe59b09 100644 --- a/arch/x86/events/perf_event.h +++ b/arch/x86/events/perf_event.h @@ -585,6 +585,7 @@ union perf_capabilities { u64 pebs_baseline:1; u64 perf_metrics:1; u64 pebs_output_pt_available:1; + u64 anythread_deprecated:1; }; u64 capabilities; }; @@ -727,7 +728,7 @@ struct x86_pmu { int pebs_record_size; int pebs_buffer_size; int max_pebs_events; - void (*drain_pebs)(struct pt_regs *regs); + void (*drain_pebs)(struct pt_regs *regs, struct perf_sample_data *data); struct event_constraint *pebs_constraints; void (*pebs_aliases)(struct perf_event *event); unsigned long large_pebs_flags; diff --git a/arch/x86/events/rapl.c b/arch/x86/events/rapl.c index 7c0120e2e957..7dbbeaacd995 100644 --- a/arch/x86/events/rapl.c +++ b/arch/x86/events/rapl.c @@ -93,18 +93,6 @@ static const char *const rapl_domain_names[NR_RAPL_DOMAINS] __initconst = { * any other bit is reserved */ #define RAPL_EVENT_MASK 0xFFULL - -#define DEFINE_RAPL_FORMAT_ATTR(_var, _name, _format) \ -static ssize_t __rapl_##_var##_show(struct kobject *kobj, \ - struct kobj_attribute *attr, \ - char *page) \ -{ \ - BUILD_BUG_ON(sizeof(_format) >= PAGE_SIZE); \ - return sprintf(page, _format "\n"); \ -} \ -static struct kobj_attribute format_attr_##_var = \ - __ATTR(_name, 0444, __rapl_##_var##_show, NULL) - #define RAPL_CNTR_WIDTH 32 #define RAPL_EVENT_ATTR_STR(_name, v, str) \ @@ -441,7 +429,7 @@ static struct attribute_group rapl_pmu_events_group = { .attrs = attrs_empty, }; -DEFINE_RAPL_FORMAT_ATTR(event, event, "config:0-7"); +PMU_FORMAT_ATTR(event, "config:0-7"); static struct attribute *rapl_formats_attr[] = { &format_attr_event.attr, NULL, diff --git a/arch/x86/hyperv/hv_apic.c b/arch/x86/hyperv/hv_apic.c index 40e0e322161d..284e73661a18 100644 --- a/arch/x86/hyperv/hv_apic.c +++ b/arch/x86/hyperv/hv_apic.c @@ -273,11 +273,15 @@ void __init hv_apic_init(void) pr_info("Hyper-V: Using enlightened APIC (%s mode)", x2apic_enabled() ? "x2apic" : "xapic"); /* - * With x2apic, architectural x2apic MSRs are equivalent to the - * respective synthetic MSRs, so there's no need to override - * the apic accessors. The only exception is - * hv_apic_eoi_write, because it benefits from lazy EOI when - * available, but it works for both xapic and x2apic modes. + * When in x2apic mode, don't use the Hyper-V specific APIC + * accessors since the field layout in the ICR register is + * different in x2apic mode. Furthermore, the architectural + * x2apic MSRs function just as well as the Hyper-V + * synthetic APIC MSRs, so there's no benefit in having + * separate Hyper-V accessors for x2apic mode. The only + * exception is hv_apic_eoi_write, because it benefits from + * lazy EOI when available, but the same accessor works for + * both xapic and x2apic because the field layout is the same. */ apic_set_eoi_write(hv_apic_eoi_write); if (!x2apic_enabled()) { diff --git a/arch/x86/ia32/ia32_signal.c b/arch/x86/ia32/ia32_signal.c index 81cf22398cd1..5e3d9b7fd5fb 100644 --- a/arch/x86/ia32/ia32_signal.c +++ b/arch/x86/ia32/ia32_signal.c @@ -347,7 +347,7 @@ int ia32_setup_rt_frame(int sig, struct ksignal *ksig, */ unsafe_put_user(*((u64 *)&code), (u64 __user *)frame->retcode, Efault); unsafe_put_sigcontext32(&frame->uc.uc_mcontext, fp, regs, set, Efault); - unsafe_put_user(*(__u64 *)set, (__u64 *)&frame->uc.uc_sigmask, Efault); + unsafe_put_user(*(__u64 *)set, (__u64 __user *)&frame->uc.uc_sigmask, Efault); user_access_end(); if (__copy_siginfo_to_user32(&frame->info, &ksig->info)) diff --git a/arch/x86/include/asm/acpi.h b/arch/x86/include/asm/acpi.h index 6d2df1ee427b..65064d9f7fa6 100644 --- a/arch/x86/include/asm/acpi.h +++ b/arch/x86/include/asm/acpi.h @@ -159,6 +159,8 @@ static inline u64 x86_default_get_root_pointer(void) extern int x86_acpi_numa_init(void); #endif /* CONFIG_ACPI_NUMA */ +struct cper_ia_proc_ctx; + #ifdef CONFIG_ACPI_APEI static inline pgprot_t arch_apei_get_mem_attribute(phys_addr_t addr) { @@ -177,6 +179,15 @@ static inline pgprot_t arch_apei_get_mem_attribute(phys_addr_t addr) */ return PAGE_KERNEL_NOENC; } + +int arch_apei_report_x86_error(struct cper_ia_proc_ctx *ctx_info, + u64 lapic_id); +#else +static inline int arch_apei_report_x86_error(struct cper_ia_proc_ctx *ctx_info, + u64 lapic_id) +{ + return -EINVAL; +} #endif #define ACPI_TABLE_UPGRADE_MAX_PHYS (max_low_pfn_mapped << PAGE_SHIFT) diff --git a/arch/x86/include/asm/cacheinfo.h b/arch/x86/include/asm/cacheinfo.h index 86b63c7feab7..86b2e0dcc4bf 100644 --- a/arch/x86/include/asm/cacheinfo.h +++ b/arch/x86/include/asm/cacheinfo.h @@ -2,7 +2,7 @@ #ifndef _ASM_X86_CACHEINFO_H #define _ASM_X86_CACHEINFO_H -void cacheinfo_amd_init_llc_id(struct cpuinfo_x86 *c, int cpu, u8 node_id); -void cacheinfo_hygon_init_llc_id(struct cpuinfo_x86 *c, int cpu, u8 node_id); +void cacheinfo_amd_init_llc_id(struct cpuinfo_x86 *c, int cpu); +void cacheinfo_hygon_init_llc_id(struct cpuinfo_x86 *c, int cpu); #endif /* _ASM_X86_CACHEINFO_H */ diff --git a/arch/x86/include/asm/compat.h b/arch/x86/include/asm/compat.h index 0e327a01f50f..f145e3326c6d 100644 --- a/arch/x86/include/asm/compat.h +++ b/arch/x86/include/asm/compat.h @@ -177,14 +177,13 @@ typedef struct user_regs_struct compat_elf_gregset_t; static inline void __user *arch_compat_alloc_user_space(long len) { - compat_uptr_t sp; - - if (test_thread_flag(TIF_IA32)) { - sp = task_pt_regs(current)->sp; - } else { - /* -128 for the x32 ABI redzone */ - sp = task_pt_regs(current)->sp - 128; - } + compat_uptr_t sp = task_pt_regs(current)->sp; + + /* + * -128 for the x32 ABI redzone. For IA32, it is not strictly + * necessary, but not harmful. + */ + sp -= 128; return (void __user *)round_down(sp - len, 16); } diff --git a/arch/x86/include/asm/copy_mc_test.h b/arch/x86/include/asm/copy_mc_test.h deleted file mode 100644 index e4991ba96726..000000000000 --- a/arch/x86/include/asm/copy_mc_test.h +++ /dev/null @@ -1,75 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _COPY_MC_TEST_H_ -#define _COPY_MC_TEST_H_ - -#ifndef __ASSEMBLY__ -#ifdef CONFIG_COPY_MC_TEST -extern unsigned long copy_mc_test_src; -extern unsigned long copy_mc_test_dst; - -static inline void copy_mc_inject_src(void *addr) -{ - if (addr) - copy_mc_test_src = (unsigned long) addr; - else - copy_mc_test_src = ~0UL; -} - -static inline void copy_mc_inject_dst(void *addr) -{ - if (addr) - copy_mc_test_dst = (unsigned long) addr; - else - copy_mc_test_dst = ~0UL; -} -#else /* CONFIG_COPY_MC_TEST */ -static inline void copy_mc_inject_src(void *addr) -{ -} - -static inline void copy_mc_inject_dst(void *addr) -{ -} -#endif /* CONFIG_COPY_MC_TEST */ - -#else /* __ASSEMBLY__ */ -#include <asm/export.h> - -#ifdef CONFIG_COPY_MC_TEST -.macro COPY_MC_TEST_CTL - .pushsection .data - .align 8 - .globl copy_mc_test_src - copy_mc_test_src: - .quad 0 - EXPORT_SYMBOL_GPL(copy_mc_test_src) - .globl copy_mc_test_dst - copy_mc_test_dst: - .quad 0 - EXPORT_SYMBOL_GPL(copy_mc_test_dst) - .popsection -.endm - -.macro COPY_MC_TEST_SRC reg count target - leaq \count(\reg), %r9 - cmp copy_mc_test_src, %r9 - ja \target -.endm - -.macro COPY_MC_TEST_DST reg count target - leaq \count(\reg), %r9 - cmp copy_mc_test_dst, %r9 - ja \target -.endm -#else -.macro COPY_MC_TEST_CTL -.endm - -.macro COPY_MC_TEST_SRC reg count target -.endm - -.macro COPY_MC_TEST_DST reg count target -.endm -#endif /* CONFIG_COPY_MC_TEST */ -#endif /* __ASSEMBLY__ */ -#endif /* _COPY_MC_TEST_H_ */ diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index dad350d42ecf..f5ef2d5b9231 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -241,6 +241,7 @@ /* Intel-defined CPU features, CPUID level 0x00000007:0 (EBX), word 9 */ #define X86_FEATURE_FSGSBASE ( 9*32+ 0) /* RDFSBASE, WRFSBASE, RDGSBASE, WRGSBASE instructions*/ #define X86_FEATURE_TSC_ADJUST ( 9*32+ 1) /* TSC adjustment MSR 0x3B */ +#define X86_FEATURE_SGX ( 9*32+ 2) /* Software Guard Extensions */ #define X86_FEATURE_BMI1 ( 9*32+ 3) /* 1st group bit manipulation extensions */ #define X86_FEATURE_HLE ( 9*32+ 4) /* Hardware Lock Elision */ #define X86_FEATURE_AVX2 ( 9*32+ 5) /* AVX2 instructions */ @@ -356,6 +357,7 @@ #define X86_FEATURE_MOVDIRI (16*32+27) /* MOVDIRI instruction */ #define X86_FEATURE_MOVDIR64B (16*32+28) /* MOVDIR64B instruction */ #define X86_FEATURE_ENQCMD (16*32+29) /* ENQCMD and ENQCMDS instructions */ +#define X86_FEATURE_SGX_LC (16*32+30) /* Software Guard Extensions Launch Control */ /* AMD-defined CPU features, CPUID level 0x80000007 (EBX), word 17 */ #define X86_FEATURE_OVERFLOW_RECOV (17*32+ 0) /* MCA overflow recovery support */ diff --git a/arch/x86/include/asm/disabled-features.h b/arch/x86/include/asm/disabled-features.h index 5861d34f9771..7947cb1782da 100644 --- a/arch/x86/include/asm/disabled-features.h +++ b/arch/x86/include/asm/disabled-features.h @@ -62,6 +62,12 @@ # define DISABLE_ENQCMD (1 << (X86_FEATURE_ENQCMD & 31)) #endif +#ifdef CONFIG_X86_SGX +# define DISABLE_SGX 0 +#else +# define DISABLE_SGX (1 << (X86_FEATURE_SGX & 31)) +#endif + /* * Make sure to add features to the correct mask */ @@ -74,7 +80,7 @@ #define DISABLED_MASK6 0 #define DISABLED_MASK7 (DISABLE_PTI) #define DISABLED_MASK8 0 -#define DISABLED_MASK9 (DISABLE_SMAP) +#define DISABLED_MASK9 (DISABLE_SMAP|DISABLE_SGX) #define DISABLED_MASK10 0 #define DISABLED_MASK11 0 #define DISABLED_MASK12 0 diff --git a/arch/x86/include/asm/elf.h b/arch/x86/include/asm/elf.h index b9a5d488f1a5..44a9b9940535 100644 --- a/arch/x86/include/asm/elf.h +++ b/arch/x86/include/asm/elf.h @@ -186,8 +186,9 @@ static inline void elf_common_init(struct thread_struct *t, #define COMPAT_ELF_PLAT_INIT(regs, load_addr) \ elf_common_init(¤t->thread, regs, __USER_DS) -void compat_start_thread(struct pt_regs *regs, u32 new_ip, u32 new_sp); -#define compat_start_thread compat_start_thread +void compat_start_thread(struct pt_regs *regs, u32 new_ip, u32 new_sp, bool x32); +#define COMPAT_START_THREAD(ex, regs, new_ip, new_sp) \ + compat_start_thread(regs, new_ip, new_sp, ex->e_machine == EM_X86_64) void set_personality_ia32(bool); #define COMPAT_SET_PERSONALITY(ex) \ @@ -361,7 +362,7 @@ do { \ #define AT_SYSINFO 32 #define COMPAT_ARCH_DLINFO \ -if (test_thread_flag(TIF_X32)) \ +if (exec->e_machine == EM_X86_64) \ ARCH_DLINFO_X32; \ else \ ARCH_DLINFO_IA32 @@ -382,8 +383,10 @@ struct linux_binprm; extern int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp); extern int compat_arch_setup_additional_pages(struct linux_binprm *bprm, - int uses_interp); -#define compat_arch_setup_additional_pages compat_arch_setup_additional_pages + int uses_interp, bool x32); +#define COMPAT_ARCH_SETUP_ADDITIONAL_PAGES(bprm, ex, interpreter) \ + compat_arch_setup_additional_pages(bprm, interpreter, \ + (ex->e_machine == EM_X86_64)) /* Do not change the values. See get_align_mask() */ enum align_flags { diff --git a/arch/x86/include/asm/enclu.h b/arch/x86/include/asm/enclu.h new file mode 100644 index 000000000000..b1314e41a744 --- /dev/null +++ b/arch/x86/include/asm/enclu.h @@ -0,0 +1,9 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_ENCLU_H +#define _ASM_X86_ENCLU_H + +#define EENTER 0x02 +#define ERESUME 0x03 +#define EEXIT 0x04 + +#endif /* _ASM_X86_ENCLU_H */ diff --git a/arch/x86/include/asm/insn.h b/arch/x86/include/asm/insn.h index 5c1ae3eff9d4..a8c3d284fa46 100644 --- a/arch/x86/include/asm/insn.h +++ b/arch/x86/include/asm/insn.h @@ -201,6 +201,21 @@ static inline int insn_offset_immediate(struct insn *insn) return insn_offset_displacement(insn) + insn->displacement.nbytes; } +/** + * for_each_insn_prefix() -- Iterate prefixes in the instruction + * @insn: Pointer to struct insn. + * @idx: Index storage. + * @prefix: Prefix byte. + * + * Iterate prefix bytes of given @insn. Each prefix byte is stored in @prefix + * and the index is stored in @idx (note that this @idx is just for a cursor, + * do not change it.) + * Since prefixes.nbytes can be bigger than 4 if some prefixes + * are repeated, it cannot be used for looping over the prefixes. + */ +#define for_each_insn_prefix(insn, idx, prefix) \ + for (idx = 0; idx < ARRAY_SIZE(insn->prefixes.bytes) && (prefix = insn->prefixes.bytes[idx]) != 0; idx++) + #define POP_SS_OPCODE 0x1f #define MOV_SREG_OPCODE 0x8e diff --git a/arch/x86/include/asm/inst.h b/arch/x86/include/asm/inst.h index bd7f02480ca1..438ccd4f3cc4 100644 --- a/arch/x86/include/asm/inst.h +++ b/arch/x86/include/asm/inst.h @@ -143,21 +143,6 @@ .macro MODRM mod opd1 opd2 .byte \mod | (\opd1 & 7) | ((\opd2 & 7) << 3) .endm - -.macro RDPID opd - REG_TYPE rdpid_opd_type \opd - .if rdpid_opd_type == REG_TYPE_R64 - R64_NUM rdpid_opd \opd - .else - R32_NUM rdpid_opd \opd - .endif - .byte 0xf3 - .if rdpid_opd > 7 - PFX_REX rdpid_opd 0 - .endif - .byte 0x0f, 0xc7 - MODRM 0xc0 rdpid_opd 0x7 -.endm #endif #endif diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index d44858b69353..7e5f33a0d0e2 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -639,6 +639,7 @@ struct kvm_vcpu_arch { int cpuid_nent; struct kvm_cpuid_entry2 *cpuid_entries; + unsigned long cr3_lm_rsvd_bits; int maxphyaddr; int max_tdp_level; @@ -1655,6 +1656,7 @@ int kvm_test_age_hva(struct kvm *kvm, unsigned long hva); int kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte); int kvm_cpu_has_injectable_intr(struct kvm_vcpu *v); int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu); +int kvm_cpu_has_extint(struct kvm_vcpu *v); int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu); int kvm_cpu_get_interrupt(struct kvm_vcpu *v); void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event); diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h index a0f147893a04..56cdeaac76a0 100644 --- a/arch/x86/include/asm/mce.h +++ b/arch/x86/include/asm/mce.h @@ -177,7 +177,8 @@ enum mce_notifier_prios { MCE_PRIO_EXTLOG, MCE_PRIO_UC, MCE_PRIO_EARLY, - MCE_PRIO_CEC + MCE_PRIO_CEC, + MCE_PRIO_HIGHEST = MCE_PRIO_CEC }; struct notifier_block; @@ -198,16 +199,22 @@ static inline void enable_copy_mc_fragile(void) } #endif +struct cper_ia_proc_ctx; + #ifdef CONFIG_X86_MCE int mcheck_init(void); void mcheck_cpu_init(struct cpuinfo_x86 *c); void mcheck_cpu_clear(struct cpuinfo_x86 *c); void mcheck_vendor_init_severity(void); +int apei_smca_report_x86_error(struct cper_ia_proc_ctx *ctx_info, + u64 lapic_id); #else static inline int mcheck_init(void) { return 0; } static inline void mcheck_cpu_init(struct cpuinfo_x86 *c) {} static inline void mcheck_cpu_clear(struct cpuinfo_x86 *c) {} static inline void mcheck_vendor_init_severity(void) {} +static inline int apei_smca_report_x86_error(struct cper_ia_proc_ctx *ctx_info, + u64 lapic_id) { return -EINVAL; } #endif #ifdef CONFIG_X86_ANCIENT_MCE diff --git a/arch/x86/include/asm/mmu.h b/arch/x86/include/asm/mmu.h index 9257667d13c5..5d7494631ea9 100644 --- a/arch/x86/include/asm/mmu.h +++ b/arch/x86/include/asm/mmu.h @@ -6,6 +6,12 @@ #include <linux/rwsem.h> #include <linux/mutex.h> #include <linux/atomic.h> +#include <linux/bits.h> + +/* Uprobes on this MM assume 32-bit code */ +#define MM_CONTEXT_UPROBE_IA32 BIT(0) +/* vsyscall page is accessible on this MM */ +#define MM_CONTEXT_HAS_VSYSCALL BIT(1) /* * x86 has arch-specific MMU state beyond what lives in mm_struct. @@ -33,8 +39,7 @@ typedef struct { #endif #ifdef CONFIG_X86_64 - /* True if mm supports a task running in 32 bit compatibility mode. */ - unsigned short ia32_compat; + unsigned short flags; #endif struct mutex lock; diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h index d98016b83755..054a79157323 100644 --- a/arch/x86/include/asm/mmu_context.h +++ b/arch/x86/include/asm/mmu_context.h @@ -177,7 +177,7 @@ static inline void arch_exit_mmap(struct mm_struct *mm) static inline bool is_64bit_mm(struct mm_struct *mm) { return !IS_ENABLED(CONFIG_IA32_EMULATION) || - !(mm->context.ia32_compat == TIF_IA32); + !(mm->context.flags & MM_CONTEXT_UPROBE_IA32); } #else static inline bool is_64bit_mm(struct mm_struct *mm) diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 972a34d93505..6a5085a5f9d8 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -139,6 +139,7 @@ #define MSR_IA32_MCG_CAP 0x00000179 #define MSR_IA32_MCG_STATUS 0x0000017a #define MSR_IA32_MCG_CTL 0x0000017b +#define MSR_ERROR_CONTROL 0x0000017f #define MSR_IA32_MCG_EXT_CTL 0x000004d0 #define MSR_OFFCORE_RSP_0 0x000001a6 @@ -609,6 +610,8 @@ #define FEAT_CTL_LOCKED BIT(0) #define FEAT_CTL_VMX_ENABLED_INSIDE_SMX BIT(1) #define FEAT_CTL_VMX_ENABLED_OUTSIDE_SMX BIT(2) +#define FEAT_CTL_SGX_LC_ENABLED BIT(17) +#define FEAT_CTL_SGX_ENABLED BIT(18) #define FEAT_CTL_LMCE_ENABLED BIT(20) #define MSR_IA32_TSC_ADJUST 0x0000003b @@ -628,6 +631,12 @@ #define MSR_IA32_UCODE_WRITE 0x00000079 #define MSR_IA32_UCODE_REV 0x0000008b +/* Intel SGX Launch Enclave Public Key Hash MSRs */ +#define MSR_IA32_SGXLEPUBKEYHASH0 0x0000008C +#define MSR_IA32_SGXLEPUBKEYHASH1 0x0000008D +#define MSR_IA32_SGXLEPUBKEYHASH2 0x0000008E +#define MSR_IA32_SGXLEPUBKEYHASH3 0x0000008F + #define MSR_IA32_SMM_MONITOR_CTL 0x0000009b #define MSR_IA32_SMBASE 0x0000009e diff --git a/arch/x86/include/asm/mwait.h b/arch/x86/include/asm/mwait.h index e039a933aca3..29dd27b5a339 100644 --- a/arch/x86/include/asm/mwait.h +++ b/arch/x86/include/asm/mwait.h @@ -88,8 +88,6 @@ static inline void __mwaitx(unsigned long eax, unsigned long ebx, static inline void __sti_mwait(unsigned long eax, unsigned long ecx) { - trace_hardirqs_on(); - mds_idle_clear_cpu_buffers(); /* "mwait %eax, %ecx;" */ asm volatile("sti; .byte 0x0f, 0x01, 0xc9;" diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h index d25cc6830e89..f8dce11d2bc1 100644 --- a/arch/x86/include/asm/paravirt.h +++ b/arch/x86/include/asm/paravirt.h @@ -812,17 +812,6 @@ extern void default_banner(void); #endif /* CONFIG_PARAVIRT_XXL */ #endif /* CONFIG_X86_64 */ -#ifdef CONFIG_PARAVIRT_XXL - -#define GET_CR2_INTO_AX \ - PARA_SITE(PARA_PATCH(PV_MMU_read_cr2), \ - ANNOTATE_RETPOLINE_SAFE; \ - call PARA_INDIRECT(pv_ops+PV_MMU_read_cr2); \ - ) - -#endif /* CONFIG_PARAVIRT_XXL */ - - #endif /* __ASSEMBLY__ */ #else /* CONFIG_PARAVIRT */ # define default_banner x86_init_noop diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h index 6960cd6d1f23..b9a7fd0a27e2 100644 --- a/arch/x86/include/asm/perf_event.h +++ b/arch/x86/include/asm/perf_event.h @@ -137,7 +137,9 @@ union cpuid10_edx { struct { unsigned int num_counters_fixed:5; unsigned int bit_width_fixed:8; - unsigned int reserved:19; + unsigned int reserved1:2; + unsigned int anythread_deprecated:1; + unsigned int reserved2:16; } split; unsigned int full; }; diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h index 816b31c68550..394757ee030a 100644 --- a/arch/x86/include/asm/pgtable_types.h +++ b/arch/x86/include/asm/pgtable_types.h @@ -155,6 +155,7 @@ enum page_cache_mode { #define _PAGE_ENC (_AT(pteval_t, sme_me_mask)) #define _PAGE_CACHE_MASK (_PAGE_PWT | _PAGE_PCD | _PAGE_PAT) +#define _PAGE_LARGE_CACHE_MASK (_PAGE_PWT | _PAGE_PCD | _PAGE_PAT_LARGE) #define _PAGE_NOCACHE (cachemode2protval(_PAGE_CACHE_MODE_UC)) #define _PAGE_CACHE_WP (cachemode2protval(_PAGE_CACHE_MODE_WP)) diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 82a08b585818..c20a52b5534b 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -813,10 +813,8 @@ extern int set_tsc_mode(unsigned int val); DECLARE_PER_CPU(u64, msr_misc_features_shadow); #ifdef CONFIG_CPU_SUP_AMD -extern u16 amd_get_nb_id(int cpu); extern u32 amd_get_nodes_per_socket(void); #else -static inline u16 amd_get_nb_id(int cpu) { return 0; } static inline u32 amd_get_nodes_per_socket(void) { return 0; } #endif diff --git a/arch/x86/include/asm/sparsemem.h b/arch/x86/include/asm/sparsemem.h index 6bfc878f6771..6a9ccc1b2be5 100644 --- a/arch/x86/include/asm/sparsemem.h +++ b/arch/x86/include/asm/sparsemem.h @@ -28,4 +28,14 @@ #endif #endif /* CONFIG_SPARSEMEM */ + +#ifndef __ASSEMBLY__ +#ifdef CONFIG_NUMA_KEEP_MEMINFO +extern int phys_to_target_node(phys_addr_t start); +#define phys_to_target_node phys_to_target_node +extern int memory_add_physaddr_to_nid(u64 start); +#define memory_add_physaddr_to_nid memory_add_physaddr_to_nid +#endif +#endif /* __ASSEMBLY__ */ + #endif /* _ASM_X86_SPARSEMEM_H */ diff --git a/arch/x86/include/asm/stacktrace.h b/arch/x86/include/asm/stacktrace.h index 49600643faba..f248eb2ac2d4 100644 --- a/arch/x86/include/asm/stacktrace.h +++ b/arch/x86/include/asm/stacktrace.h @@ -88,9 +88,6 @@ get_stack_pointer(struct task_struct *task, struct pt_regs *regs) return (unsigned long *)task->thread.sp; } -void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, - unsigned long *stack, const char *log_lvl); - /* The form of the top of the frame on the stack */ struct stack_frame { struct stack_frame *next_frame; diff --git a/arch/x86/include/asm/sync_core.h b/arch/x86/include/asm/sync_core.h index 0fd4a9dfb29c..ab7382f92aff 100644 --- a/arch/x86/include/asm/sync_core.h +++ b/arch/x86/include/asm/sync_core.h @@ -98,12 +98,13 @@ static inline void sync_core_before_usermode(void) /* With PTI, we unconditionally serialize before running user code. */ if (static_cpu_has(X86_FEATURE_PTI)) return; + /* - * Return from interrupt and NMI is done through iret, which is core - * serializing. + * Even if we're in an interrupt, we might reschedule before returning, + * in which case we could switch to a different thread in the same mm + * and return using SYSRET or SYSEXIT. Instead of trying to keep + * track of our need to sync the core, just sync right away. */ - if (in_irq() || in_nmi()) - return; sync_core(); } diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index 44733a4bfc42..a12b9644193b 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h @@ -91,7 +91,6 @@ struct thread_info { #define TIF_NEED_FPU_LOAD 14 /* load FPU on return to userspace */ #define TIF_NOCPUID 15 /* CPUID is not accessible in userland */ #define TIF_NOTSC 16 /* TSC is not accessible in userland */ -#define TIF_IA32 17 /* IA32 compatibility process */ #define TIF_SLD 18 /* Restore split lock detection on context switch */ #define TIF_MEMDIE 20 /* is terminating due to OOM killer */ #define TIF_POLLING_NRFLAG 21 /* idle is polling for TIF_NEED_RESCHED */ @@ -101,7 +100,6 @@ struct thread_info { #define TIF_LAZY_MMU_UPDATES 27 /* task is updating the mmu lazily */ #define TIF_SYSCALL_TRACEPOINT 28 /* syscall tracepoint instrumentation */ #define TIF_ADDR32 29 /* 32-bit address space on 64 bits */ -#define TIF_X32 30 /* 32-bit native x86-64 binary */ #define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE) #define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME) @@ -120,7 +118,6 @@ struct thread_info { #define _TIF_NEED_FPU_LOAD (1 << TIF_NEED_FPU_LOAD) #define _TIF_NOCPUID (1 << TIF_NOCPUID) #define _TIF_NOTSC (1 << TIF_NOTSC) -#define _TIF_IA32 (1 << TIF_IA32) #define _TIF_SLD (1 << TIF_SLD) #define _TIF_POLLING_NRFLAG (1 << TIF_POLLING_NRFLAG) #define _TIF_IO_BITMAP (1 << TIF_IO_BITMAP) @@ -129,7 +126,6 @@ struct thread_info { #define _TIF_LAZY_MMU_UPDATES (1 << TIF_LAZY_MMU_UPDATES) #define _TIF_SYSCALL_TRACEPOINT (1 << TIF_SYSCALL_TRACEPOINT) #define _TIF_ADDR32 (1 << TIF_ADDR32) -#define _TIF_X32 (1 << TIF_X32) /* flags to check in __switch_to() */ #define _TIF_WORK_CTXSW_BASE \ diff --git a/arch/x86/include/asm/trap_pf.h b/arch/x86/include/asm/trap_pf.h index 305bc1214aef..10b1de500ab1 100644 --- a/arch/x86/include/asm/trap_pf.h +++ b/arch/x86/include/asm/trap_pf.h @@ -11,6 +11,7 @@ * bit 3 == 1: use of reserved bit detected * bit 4 == 1: fault was an instruction fetch * bit 5 == 1: protection keys block access + * bit 15 == 1: SGX MMU page-fault */ enum x86_pf_error_code { X86_PF_PROT = 1 << 0, @@ -19,6 +20,7 @@ enum x86_pf_error_code { X86_PF_RSVD = 1 << 3, X86_PF_INSTR = 1 << 4, X86_PF_PK = 1 << 5, + X86_PF_SGX = 1 << 15, }; #endif /* _ASM_X86_TRAP_PF_H */ diff --git a/arch/x86/include/asm/uv/bios.h b/arch/x86/include/asm/uv/bios.h index 08b3d810dfba..1b6455f881f9 100644 --- a/arch/x86/include/asm/uv/bios.h +++ b/arch/x86/include/asm/uv/bios.h @@ -28,6 +28,20 @@ enum uv_bios_cmd { UV_BIOS_SET_LEGACY_VGA_TARGET }; +#define UV_BIOS_EXTRA 0x10000 +#define UV_BIOS_GET_PCI_TOPOLOGY 0x10001 +#define UV_BIOS_GET_GEOINFO 0x10003 + +#define UV_BIOS_EXTRA_OP_MEM_COPYIN 0x1000 +#define UV_BIOS_EXTRA_OP_MEM_COPYOUT 0x2000 +#define UV_BIOS_EXTRA_OP_MASK 0x0fff +#define UV_BIOS_EXTRA_GET_HEAPSIZE 1 +#define UV_BIOS_EXTRA_INSTALL_HEAP 2 +#define UV_BIOS_EXTRA_MASTER_NASID 3 +#define UV_BIOS_EXTRA_OBJECT_COUNT (10|UV_BIOS_EXTRA_OP_MEM_COPYOUT) +#define UV_BIOS_EXTRA_ENUM_OBJECTS (12|UV_BIOS_EXTRA_OP_MEM_COPYOUT) +#define UV_BIOS_EXTRA_ENUM_PORTS (13|UV_BIOS_EXTRA_OP_MEM_COPYOUT) + /* * Status values returned from a BIOS call. */ @@ -109,6 +123,32 @@ struct uv_systab { } entry[1]; /* additional entries follow */ }; extern struct uv_systab *uv_systab; + +#define UV_BIOS_MAXSTRING 128 +struct uv_bios_hub_info { + unsigned int id; + union { + struct { + unsigned long long this_part:1; + unsigned long long is_shared:1; + unsigned long long is_disabled:1; + } fields; + struct { + unsigned long long flags; + unsigned long long reserved; + } b; + } f; + char name[UV_BIOS_MAXSTRING]; + char location[UV_BIOS_MAXSTRING]; + unsigned int ports; +}; + +struct uv_bios_port_info { + unsigned int port; + unsigned int conn_id; + unsigned int conn_port; +}; + /* (... end of definitions from UV BIOS ...) */ enum { @@ -142,6 +182,15 @@ extern s64 uv_bios_change_memprotect(u64, u64, enum uv_memprotect); extern s64 uv_bios_reserved_page_pa(u64, u64 *, u64 *, u64 *); extern int uv_bios_set_legacy_vga_target(bool decode, int domain, int bus); +extern s64 uv_bios_get_master_nasid(u64 sz, u64 *nasid); +extern s64 uv_bios_get_heapsize(u64 nasid, u64 sz, u64 *heap_sz); +extern s64 uv_bios_install_heap(u64 nasid, u64 sz, u64 *heap); +extern s64 uv_bios_obj_count(u64 nasid, u64 sz, u64 *objcnt); +extern s64 uv_bios_enum_objs(u64 nasid, u64 sz, u64 *objbuf); +extern s64 uv_bios_enum_ports(u64 nasid, u64 obj_id, u64 sz, u64 *portbuf); +extern s64 uv_bios_get_geoinfo(u64 nasid, u64 sz, u64 *geo); +extern s64 uv_bios_get_pci_topology(u64 sz, u64 *buf); + extern int uv_bios_init(void); extern unsigned long get_uv_systab_phys(bool msg); @@ -151,6 +200,8 @@ extern long sn_partition_id; extern long sn_coherency_id; extern long sn_region_size; extern long system_serial_number; +extern ssize_t uv_get_archtype(char *buf, int len); +extern int uv_get_hubless_system(void); extern struct kobject *sgi_uv_kobj; /* /sys/firmware/sgi_uv */ diff --git a/arch/x86/include/asm/uv/uv.h b/arch/x86/include/asm/uv/uv.h index 172d3e4a9e4b..648eb23fe7f0 100644 --- a/arch/x86/include/asm/uv/uv.h +++ b/arch/x86/include/asm/uv/uv.h @@ -2,14 +2,8 @@ #ifndef _ASM_X86_UV_UV_H #define _ASM_X86_UV_UV_H -#include <asm/tlbflush.h> - enum uv_system_type {UV_NONE, UV_LEGACY_APIC, UV_X2APIC}; -struct cpumask; -struct mm_struct; -struct flush_tlb_info; - #ifdef CONFIG_X86_UV #include <linux/efi.h> @@ -44,10 +38,6 @@ static inline int is_uv_system(void) { return 0; } static inline int is_uv_hubbed(int uv) { return 0; } static inline void uv_cpu_init(void) { } static inline void uv_system_init(void) { } -static inline const struct cpumask * -uv_flush_tlb_others(const struct cpumask *cpumask, - const struct flush_tlb_info *info) -{ return cpumask; } #endif /* X86_UV */ diff --git a/arch/x86/include/asm/uv/uv_geo.h b/arch/x86/include/asm/uv/uv_geo.h new file mode 100644 index 000000000000..f241451035fb --- /dev/null +++ b/arch/x86/include/asm/uv/uv_geo.h @@ -0,0 +1,103 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later + * + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 2020 Hewlett Packard Enterprise Development LP. All rights reserved. + */ + +#ifndef _ASM_UV_GEO_H +#define _ASM_UV_GEO_H + +/* Type declaractions */ + +/* Size of a geoid_s structure (must be before decl. of geoid_u) */ +#define GEOID_SIZE 8 + +/* Fields common to all substructures */ +struct geo_common_s { + unsigned char type; /* What type of h/w is named by this geoid_s */ + unsigned char blade; + unsigned char slot; /* slot is IRU */ + unsigned char upos; + unsigned char rack; +}; + +/* Additional fields for particular types of hardware */ +struct geo_node_s { + struct geo_common_s common; /* No additional fields needed */ +}; + +struct geo_rtr_s { + struct geo_common_s common; /* No additional fields needed */ +}; + +struct geo_iocntl_s { + struct geo_common_s common; /* No additional fields needed */ +}; + +struct geo_pcicard_s { + struct geo_iocntl_s common; + char bus; /* Bus/widget number */ + char slot; /* PCI slot number */ +}; + +/* Subcomponents of a node */ +struct geo_cpu_s { + struct geo_node_s node; + unsigned char socket:4, /* Which CPU on the node */ + thread:4; + unsigned char core; +}; + +struct geo_mem_s { + struct geo_node_s node; + char membus; /* The memory bus on the node */ + char memslot; /* The memory slot on the bus */ +}; + +union geoid_u { + struct geo_common_s common; + struct geo_node_s node; + struct geo_iocntl_s iocntl; + struct geo_pcicard_s pcicard; + struct geo_rtr_s rtr; + struct geo_cpu_s cpu; + struct geo_mem_s mem; + char padsize[GEOID_SIZE]; +}; + +/* Defined constants */ + +#define GEO_MAX_LEN 48 + +#define GEO_TYPE_INVALID 0 +#define GEO_TYPE_MODULE 1 +#define GEO_TYPE_NODE 2 +#define GEO_TYPE_RTR 3 +#define GEO_TYPE_IOCNTL 4 +#define GEO_TYPE_IOCARD 5 +#define GEO_TYPE_CPU 6 +#define GEO_TYPE_MEM 7 +#define GEO_TYPE_MAX (GEO_TYPE_MEM+1) + +static inline int geo_rack(union geoid_u g) +{ + return (g.common.type == GEO_TYPE_INVALID) ? + -1 : g.common.rack; +} + +static inline int geo_slot(union geoid_u g) +{ + return (g.common.type == GEO_TYPE_INVALID) ? + -1 : g.common.upos; +} + +static inline int geo_blade(union geoid_u g) +{ + return (g.common.type == GEO_TYPE_INVALID) ? + -1 : g.common.blade * 2 + g.common.slot; +} + +#endif /* _ASM_UV_GEO_H */ diff --git a/arch/x86/include/asm/vdso.h b/arch/x86/include/asm/vdso.h index bbcdc7b8f963..b5d23470f56b 100644 --- a/arch/x86/include/asm/vdso.h +++ b/arch/x86/include/asm/vdso.h @@ -15,6 +15,8 @@ struct vdso_image { unsigned long size; /* Always a multiple of PAGE_SIZE */ unsigned long alt, alt_len; + unsigned long extable_base, extable_len; + const void *extable; long sym_vvar_start; /* Negative offset to the vvar area */ @@ -45,6 +47,9 @@ extern void __init init_vdso_image(const struct vdso_image *image); extern int map_vdso_once(const struct vdso_image *image, unsigned long addr); +extern bool fixup_vdso_exception(struct pt_regs *regs, int trapnr, + unsigned long error_code, + unsigned long fault_addr); #endif /* __ASSEMBLER__ */ #endif /* _ASM_X86_VDSO_H */ diff --git a/arch/x86/include/uapi/asm/kvm_para.h b/arch/x86/include/uapi/asm/kvm_para.h index 812e9b4c1114..950afebfba88 100644 --- a/arch/x86/include/uapi/asm/kvm_para.h +++ b/arch/x86/include/uapi/asm/kvm_para.h @@ -32,6 +32,7 @@ #define KVM_FEATURE_POLL_CONTROL 12 #define KVM_FEATURE_PV_SCHED_YIELD 13 #define KVM_FEATURE_ASYNC_PF_INT 14 +#define KVM_FEATURE_MSI_EXT_DEST_ID 15 #define KVM_HINTS_REALTIME 0 diff --git a/arch/x86/include/uapi/asm/sgx.h b/arch/x86/include/uapi/asm/sgx.h new file mode 100644 index 000000000000..9034f3007c4e --- /dev/null +++ b/arch/x86/include/uapi/asm/sgx.h @@ -0,0 +1,168 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * Copyright(c) 2016-20 Intel Corporation. + */ +#ifndef _UAPI_ASM_X86_SGX_H +#define _UAPI_ASM_X86_SGX_H + +#include <linux/types.h> +#include <linux/ioctl.h> + +/** + * enum sgx_page_flags - page control flags + * %SGX_PAGE_MEASURE: Measure the page contents with a sequence of + * ENCLS[EEXTEND] operations. + */ +enum sgx_page_flags { + SGX_PAGE_MEASURE = 0x01, +}; + +#define SGX_MAGIC 0xA4 + +#define SGX_IOC_ENCLAVE_CREATE \ + _IOW(SGX_MAGIC, 0x00, struct sgx_enclave_create) +#define SGX_IOC_ENCLAVE_ADD_PAGES \ + _IOWR(SGX_MAGIC, 0x01, struct sgx_enclave_add_pages) +#define SGX_IOC_ENCLAVE_INIT \ + _IOW(SGX_MAGIC, 0x02, struct sgx_enclave_init) +#define SGX_IOC_ENCLAVE_PROVISION \ + _IOW(SGX_MAGIC, 0x03, struct sgx_enclave_provision) + +/** + * struct sgx_enclave_create - parameter structure for the + * %SGX_IOC_ENCLAVE_CREATE ioctl + * @src: address for the SECS page data + */ +struct sgx_enclave_create { + __u64 src; +}; + +/** + * struct sgx_enclave_add_pages - parameter structure for the + * %SGX_IOC_ENCLAVE_ADD_PAGE ioctl + * @src: start address for the page data + * @offset: starting page offset + * @length: length of the data (multiple of the page size) + * @secinfo: address for the SECINFO data + * @flags: page control flags + * @count: number of bytes added (multiple of the page size) + */ +struct sgx_enclave_add_pages { + __u64 src; + __u64 offset; + __u64 length; + __u64 secinfo; + __u64 flags; + __u64 count; +}; + +/** + * struct sgx_enclave_init - parameter structure for the + * %SGX_IOC_ENCLAVE_INIT ioctl + * @sigstruct: address for the SIGSTRUCT data + */ +struct sgx_enclave_init { + __u64 sigstruct; +}; + +/** + * struct sgx_enclave_provision - parameter structure for the + * %SGX_IOC_ENCLAVE_PROVISION ioctl + * @fd: file handle of /dev/sgx_provision + */ +struct sgx_enclave_provision { + __u64 fd; +}; + +struct sgx_enclave_run; + +/** + * typedef sgx_enclave_user_handler_t - Exit handler function accepted by + * __vdso_sgx_enter_enclave() + * @run: The run instance given by the caller + * + * The register parameters contain the snapshot of their values at enclave + * exit. An invalid ENCLU function number will cause -EINVAL to be returned + * to the caller. + * + * Return: + * - <= 0: The given value is returned back to the caller. + * - > 0: ENCLU function to invoke, either EENTER or ERESUME. + */ +typedef int (*sgx_enclave_user_handler_t)(long rdi, long rsi, long rdx, + long rsp, long r8, long r9, + struct sgx_enclave_run *run); + +/** + * struct sgx_enclave_run - the execution context of __vdso_sgx_enter_enclave() + * @tcs: TCS used to enter the enclave + * @function: The last seen ENCLU function (EENTER, ERESUME or EEXIT) + * @exception_vector: The interrupt vector of the exception + * @exception_error_code: The exception error code pulled out of the stack + * @exception_addr: The address that triggered the exception + * @user_handler: User provided callback run on exception + * @user_data: Data passed to the user handler + * @reserved Reserved for future extensions + * + * If @user_handler is provided, the handler will be invoked on all return paths + * of the normal flow. The user handler may transfer control, e.g. via a + * longjmp() call or a C++ exception, without returning to + * __vdso_sgx_enter_enclave(). + */ +struct sgx_enclave_run { + __u64 tcs; + __u32 function; + __u16 exception_vector; + __u16 exception_error_code; + __u64 exception_addr; + __u64 user_handler; + __u64 user_data; + __u8 reserved[216]; +}; + +/** + * typedef vdso_sgx_enter_enclave_t - Prototype for __vdso_sgx_enter_enclave(), + * a vDSO function to enter an SGX enclave. + * @rdi: Pass-through value for RDI + * @rsi: Pass-through value for RSI + * @rdx: Pass-through value for RDX + * @function: ENCLU function, must be EENTER or ERESUME + * @r8: Pass-through value for R8 + * @r9: Pass-through value for R9 + * @run: struct sgx_enclave_run, must be non-NULL + * + * NOTE: __vdso_sgx_enter_enclave() does not ensure full compliance with the + * x86-64 ABI, e.g. doesn't handle XSAVE state. Except for non-volatile + * general purpose registers, EFLAGS.DF, and RSP alignment, preserving/setting + * state in accordance with the x86-64 ABI is the responsibility of the enclave + * and its runtime, i.e. __vdso_sgx_enter_enclave() cannot be called from C + * code without careful consideration by both the enclave and its runtime. + * + * All general purpose registers except RAX, RBX and RCX are passed as-is to the + * enclave. RAX, RBX and RCX are consumed by EENTER and ERESUME and are loaded + * with @function, asynchronous exit pointer, and @run.tcs respectively. + * + * RBP and the stack are used to anchor __vdso_sgx_enter_enclave() to the + * pre-enclave state, e.g. to retrieve @run.exception and @run.user_handler + * after an enclave exit. All other registers are available for use by the + * enclave and its runtime, e.g. an enclave can push additional data onto the + * stack (and modify RSP) to pass information to the optional user handler (see + * below). + * + * Most exceptions reported on ENCLU, including those that occur within the + * enclave, are fixed up and reported synchronously instead of being delivered + * via a standard signal. Debug Exceptions (#DB) and Breakpoints (#BP) are + * never fixed up and are always delivered via standard signals. On synchrously + * reported exceptions, -EFAULT is returned and details about the exception are + * recorded in @run.exception, the optional sgx_enclave_exception struct. + * + * Return: + * - 0: ENCLU function was successfully executed. + * - -EINVAL: Invalid ENCL number (neither EENTER nor ERESUME). + */ +typedef int (*vdso_sgx_enter_enclave_t)(unsigned long rdi, unsigned long rsi, + unsigned long rdx, unsigned int function, + unsigned long r8, unsigned long r9, + struct sgx_enclave_run *run); + +#endif /* _UAPI_ASM_X86_SGX_H */ diff --git a/arch/x86/kernel/acpi/apei.c b/arch/x86/kernel/acpi/apei.c index c22fb55abcfd..0916f00a992e 100644 --- a/arch/x86/kernel/acpi/apei.c +++ b/arch/x86/kernel/acpi/apei.c @@ -43,3 +43,8 @@ void arch_apei_report_mem_error(int sev, struct cper_sec_mem_err *mem_err) apei_mce_report_mem_error(sev, mem_err); #endif } + +int arch_apei_report_x86_error(struct cper_ia_proc_ctx *ctx_info, u64 lapic_id) +{ + return apei_smca_report_x86_error(ctx_info, lapic_id); +} diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index 4adbe65afe23..8d778e46725d 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -807,6 +807,15 @@ static inline temp_mm_state_t use_temporary_mm(struct mm_struct *mm) temp_mm_state_t temp_state; lockdep_assert_irqs_disabled(); + + /* + * Make sure not to be in TLB lazy mode, as otherwise we'll end up + * with a stale address space WITHOUT being in lazy mode after + * restoring the previous mm. + */ + if (this_cpu_read(cpu_tlbstate.is_lazy)) + leave_mm(smp_processor_id()); + temp_state.mm = this_cpu_read(cpu_tlbstate.loaded_mm); switch_mm_irqs_off(NULL, mm, current); @@ -1365,7 +1374,7 @@ void __ref text_poke_queue(void *addr, const void *opcode, size_t len, const voi * @addr: address to patch * @opcode: opcode of new instruction * @len: length to copy - * @handler: address to jump to when the temporary breakpoint is hit + * @emulate: instruction to be emulated * * Update a single instruction with the vector in the stack, avoiding * dynamically allocated memory. This function should be used when it is diff --git a/arch/x86/kernel/amd_nb.c b/arch/x86/kernel/amd_nb.c index 18f6b7c4bd79..b4396952c9a6 100644 --- a/arch/x86/kernel/amd_nb.c +++ b/arch/x86/kernel/amd_nb.c @@ -384,7 +384,7 @@ struct resource *amd_get_mmconfig_range(struct resource *res) int amd_get_subcaches(int cpu) { - struct pci_dev *link = node_to_amd_nb(amd_get_nb_id(cpu))->link; + struct pci_dev *link = node_to_amd_nb(topology_die_id(cpu))->link; unsigned int mask; if (!amd_nb_has_feature(AMD_NB_L3_PARTITIONING)) @@ -398,7 +398,7 @@ int amd_get_subcaches(int cpu) int amd_set_subcaches(int cpu, unsigned long mask) { static unsigned int reset, ban; - struct amd_northbridge *nb = node_to_amd_nb(amd_get_nb_id(cpu)); + struct amd_northbridge *nb = node_to_amd_nb(topology_die_id(cpu)); unsigned int reg; int cuid; diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c index 1eac53632786..758bbf25ef74 100644 --- a/arch/x86/kernel/apic/vector.c +++ b/arch/x86/kernel/apic/vector.c @@ -273,20 +273,24 @@ static int assign_irq_vector_any_locked(struct irq_data *irqd) const struct cpumask *affmsk = irq_data_get_affinity_mask(irqd); int node = irq_data_get_node(irqd); - if (node == NUMA_NO_NODE) - goto all; - /* Try the intersection of @affmsk and node mask */ - cpumask_and(vector_searchmask, cpumask_of_node(node), affmsk); - if (!assign_vector_locked(irqd, vector_searchmask)) - return 0; - /* Try the node mask */ - if (!assign_vector_locked(irqd, cpumask_of_node(node))) - return 0; -all: + if (node != NUMA_NO_NODE) { + /* Try the intersection of @affmsk and node mask */ + cpumask_and(vector_searchmask, cpumask_of_node(node), affmsk); + if (!assign_vector_locked(irqd, vector_searchmask)) + return 0; + } + /* Try the full affinity mask */ cpumask_and(vector_searchmask, affmsk, cpu_online_mask); if (!assign_vector_locked(irqd, vector_searchmask)) return 0; + + if (node != NUMA_NO_NODE) { + /* Try the node mask */ + if (!assign_vector_locked(irqd, cpumask_of_node(node))) + return 0; + } + /* Try the full online mask */ return assign_vector_locked(irqd, cpu_online_mask); } diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c index 714233cee0b5..4bde125a5bf1 100644 --- a/arch/x86/kernel/apic/x2apic_uv_x.c +++ b/arch/x86/kernel/apic/x2apic_uv_x.c @@ -33,7 +33,7 @@ static union uvh_apicid uvh_apicid; static int uv_node_id; /* Unpack AT/OEM/TABLE ID's to be NULL terminated strings */ -static u8 uv_archtype[UV_AT_SIZE]; +static u8 uv_archtype[UV_AT_SIZE + 1]; static u8 oem_id[ACPI_OEM_ID_SIZE + 1]; static u8 oem_table_id[ACPI_OEM_TABLE_ID_SIZE + 1]; @@ -161,7 +161,7 @@ static int __init early_set_hub_type(void) /* UV4/4A only have a revision difference */ case UV4_HUB_PART_NUMBER: uv_min_hub_revision_id = node_id.s.revision - + UV4_HUB_REVISION_BASE; + + UV4_HUB_REVISION_BASE - 1; uv_hub_type_set(UV4); if (uv_min_hub_revision_id == UV4A_HUB_REVISION_BASE) uv_hub_type_set(UV4|UV4A); @@ -290,6 +290,9 @@ static void __init uv_stringify(int len, char *to, char *from) { /* Relies on 'to' being NULL chars so result will be NULL terminated */ strncpy(to, from, len-1); + + /* Trim trailing spaces */ + (void)strim(to); } /* Find UV arch type entry in UVsystab */ @@ -317,7 +320,7 @@ static int __init decode_arch_type(unsigned long ptr) if (n > 0 && n < sizeof(uv_ate->archtype)) { pr_info("UV: UVarchtype received from BIOS\n"); - uv_stringify(UV_AT_SIZE, uv_archtype, uv_ate->archtype); + uv_stringify(sizeof(uv_archtype), uv_archtype, uv_ate->archtype); return 1; } return 0; @@ -366,7 +369,7 @@ static int __init early_get_arch_type(void) return ret; } -static int __init uv_set_system_type(char *_oem_id) +static int __init uv_set_system_type(char *_oem_id, char *_oem_table_id) { /* Save OEM_ID passed from ACPI MADT */ uv_stringify(sizeof(oem_id), oem_id, _oem_id); @@ -375,7 +378,7 @@ static int __init uv_set_system_type(char *_oem_id) if (!early_get_arch_type()) /* If not use OEM ID for UVarchtype */ - uv_stringify(UV_AT_SIZE, uv_archtype, _oem_id); + uv_stringify(sizeof(uv_archtype), uv_archtype, oem_id); /* Check if not hubbed */ if (strncmp(uv_archtype, "SGI", 3) != 0) { @@ -386,13 +389,23 @@ static int __init uv_set_system_type(char *_oem_id) /* (Not hubless), not a UV */ return 0; + /* Is UV hubless system */ + uv_hubless_system = 0x01; + + /* UV5 Hubless */ + if (strncmp(uv_archtype, "NSGI5", 5) == 0) + uv_hubless_system |= 0x20; + /* UV4 Hubless: CH */ - if (strncmp(uv_archtype, "NSGI4", 5) == 0) - uv_hubless_system = 0x11; + else if (strncmp(uv_archtype, "NSGI4", 5) == 0) + uv_hubless_system |= 0x10; /* UV3 Hubless: UV300/MC990X w/o hub */ else - uv_hubless_system = 0x9; + uv_hubless_system |= 0x8; + + /* Copy APIC type */ + uv_stringify(sizeof(oem_table_id), oem_table_id, _oem_table_id); pr_info("UV: OEM IDs %s/%s, SystemType %d, HUBLESS ID %x\n", oem_id, oem_table_id, uv_system_type, uv_hubless_system); @@ -456,7 +469,7 @@ static int __init uv_acpi_madt_oem_check(char *_oem_id, char *_oem_table_id) uv_cpu_info->p_uv_hub_info = &uv_hub_info_node0; /* If not UV, return. */ - if (likely(uv_set_system_type(_oem_id) == 0)) + if (uv_set_system_type(_oem_id, _oem_table_id) == 0) return 0; /* Save and Decode OEM Table ID */ @@ -489,6 +502,18 @@ enum uv_system_type get_uv_system_type(void) return uv_system_type; } +int uv_get_hubless_system(void) +{ + return uv_hubless_system; +} +EXPORT_SYMBOL_GPL(uv_get_hubless_system); + +ssize_t uv_get_archtype(char *buf, int len) +{ + return scnprintf(buf, len, "%s/%s", uv_archtype, oem_table_id); +} +EXPORT_SYMBOL_GPL(uv_get_archtype); + int is_uv_system(void) { return uv_system_type != UV_NONE; @@ -1590,21 +1615,30 @@ static void check_efi_reboot(void) reboot_type = BOOT_ACPI; } -/* Setup user proc fs files */ +/* + * User proc fs file handling now deprecated. + * Recommend using /sys/firmware/sgi_uv/... instead. + */ static int __maybe_unused proc_hubbed_show(struct seq_file *file, void *data) { + pr_notice_once("%s: using deprecated /proc/sgi_uv/hubbed, use /sys/firmware/sgi_uv/hub_type\n", + current->comm); seq_printf(file, "0x%x\n", uv_hubbed_system); return 0; } static int __maybe_unused proc_hubless_show(struct seq_file *file, void *data) { + pr_notice_once("%s: using deprecated /proc/sgi_uv/hubless, use /sys/firmware/sgi_uv/hubless\n", + current->comm); seq_printf(file, "0x%x\n", uv_hubless_system); return 0; } static int __maybe_unused proc_archtype_show(struct seq_file *file, void *data) { + pr_notice_once("%s: using deprecated /proc/sgi_uv/archtype, use /sys/firmware/sgi_uv/archtype\n", + current->comm); seq_printf(file, "%s/%s\n", uv_archtype, oem_table_id); return 0; } diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c index 70b7154f4bdd..60b9f42ce3c1 100644 --- a/arch/x86/kernel/asm-offsets.c +++ b/arch/x86/kernel/asm-offsets.c @@ -66,7 +66,6 @@ static void __used common(void) OFFSET(PV_IRQ_irq_disable, paravirt_patch_template, irq.irq_disable); OFFSET(PV_IRQ_irq_enable, paravirt_patch_template, irq.irq_enable); OFFSET(PV_CPU_iret, paravirt_patch_template, cpu.iret); - OFFSET(PV_MMU_read_cr2, paravirt_patch_template, mmu.read_cr2); #endif #ifdef CONFIG_XEN diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile index 93792b457b81..637b499450d1 100644 --- a/arch/x86/kernel/cpu/Makefile +++ b/arch/x86/kernel/cpu/Makefile @@ -48,6 +48,7 @@ obj-$(CONFIG_X86_MCE) += mce/ obj-$(CONFIG_MTRR) += mtrr/ obj-$(CONFIG_MICROCODE) += microcode/ obj-$(CONFIG_X86_CPU_RESCTRL) += resctrl/ +obj-$(CONFIG_X86_SGX) += sgx/ obj-$(CONFIG_X86_LOCAL_APIC) += perfctr-watchdog.o diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 6062ce586b95..f8ca66f3d861 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -23,7 +23,6 @@ #ifdef CONFIG_X86_64 # include <asm/mmconfig.h> -# include <asm/set_memory.h> #endif #include "cpu.h" @@ -330,7 +329,6 @@ static void legacy_fixup_core_id(struct cpuinfo_x86 *c) */ static void amd_get_topology(struct cpuinfo_x86 *c) { - u8 node_id; int cpu = smp_processor_id(); /* get information required for multi-node processors */ @@ -340,7 +338,7 @@ static void amd_get_topology(struct cpuinfo_x86 *c) cpuid(0x8000001e, &eax, &ebx, &ecx, &edx); - node_id = ecx & 0xff; + c->cpu_die_id = ecx & 0xff; if (c->x86 == 0x15) c->cu_id = ebx & 0xff; @@ -360,15 +358,15 @@ static void amd_get_topology(struct cpuinfo_x86 *c) if (!err) c->x86_coreid_bits = get_count_order(c->x86_max_cores); - cacheinfo_amd_init_llc_id(c, cpu, node_id); + cacheinfo_amd_init_llc_id(c, cpu); } else if (cpu_has(c, X86_FEATURE_NODEID_MSR)) { u64 value; rdmsrl(MSR_FAM10H_NODE_ID, value); - node_id = value & 7; + c->cpu_die_id = value & 7; - per_cpu(cpu_llc_id, cpu) = node_id; + per_cpu(cpu_llc_id, cpu) = c->cpu_die_id; } else return; @@ -393,7 +391,7 @@ static void amd_detect_cmp(struct cpuinfo_x86 *c) /* Convert the initial APIC ID into the socket ID */ c->phys_proc_id = c->initial_apicid >> bits; /* use socket ID also for last level cache */ - per_cpu(cpu_llc_id, cpu) = c->phys_proc_id; + per_cpu(cpu_llc_id, cpu) = c->cpu_die_id = c->phys_proc_id; } static void amd_detect_ppin(struct cpuinfo_x86 *c) @@ -425,12 +423,6 @@ clear_ppin: clear_cpu_cap(c, X86_FEATURE_AMD_PPIN); } -u16 amd_get_nb_id(int cpu) -{ - return per_cpu(cpu_llc_id, cpu); -} -EXPORT_SYMBOL_GPL(amd_get_nb_id); - u32 amd_get_nodes_per_socket(void) { return nodes_per_socket; @@ -516,26 +508,6 @@ static void early_init_amd_mc(struct cpuinfo_x86 *c) static void bsp_init_amd(struct cpuinfo_x86 *c) { - -#ifdef CONFIG_X86_64 - if (c->x86 >= 0xf) { - unsigned long long tseg; - - /* - * Split up direct mapping around the TSEG SMM area. - * Don't do it for gbpages because there seems very little - * benefit in doing so. - */ - if (!rdmsrl_safe(MSR_K8_TSEG_ADDR, &tseg)) { - unsigned long pfn = tseg >> PAGE_SHIFT; - - pr_debug("tseg: %010llx\n", tseg); - if (pfn_range_is_mapped(pfn, pfn + 1)) - set_memory_4k((unsigned long)__va(tseg), 1); - } - } -#endif - if (cpu_has(c, X86_FEATURE_CONSTANT_TSC)) { if (c->x86 > 0x10 || diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index d3f0db463f96..d41b70fe4918 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -739,11 +739,13 @@ spectre_v2_user_select_mitigation(enum spectre_v2_mitigation_cmd v2_cmd) if (boot_cpu_has(X86_FEATURE_IBPB)) { setup_force_cpu_cap(X86_FEATURE_USE_IBPB); + spectre_v2_user_ibpb = mode; switch (cmd) { case SPECTRE_V2_USER_CMD_FORCE: case SPECTRE_V2_USER_CMD_PRCTL_IBPB: case SPECTRE_V2_USER_CMD_SECCOMP_IBPB: static_branch_enable(&switch_mm_always_ibpb); + spectre_v2_user_ibpb = SPECTRE_V2_USER_STRICT; break; case SPECTRE_V2_USER_CMD_PRCTL: case SPECTRE_V2_USER_CMD_AUTO: @@ -757,8 +759,6 @@ spectre_v2_user_select_mitigation(enum spectre_v2_mitigation_cmd v2_cmd) pr_info("mitigation: Enabling %s Indirect Branch Prediction Barrier\n", static_key_enabled(&switch_mm_always_ibpb) ? "always-on" : "conditional"); - - spectre_v2_user_ibpb = mode; } /* @@ -1254,6 +1254,14 @@ static int ssb_prctl_set(struct task_struct *task, unsigned long ctrl) return 0; } +static bool is_spec_ib_user_controlled(void) +{ + return spectre_v2_user_ibpb == SPECTRE_V2_USER_PRCTL || + spectre_v2_user_ibpb == SPECTRE_V2_USER_SECCOMP || + spectre_v2_user_stibp == SPECTRE_V2_USER_PRCTL || + spectre_v2_user_stibp == SPECTRE_V2_USER_SECCOMP; +} + static int ib_prctl_set(struct task_struct *task, unsigned long ctrl) { switch (ctrl) { @@ -1261,16 +1269,26 @@ static int ib_prctl_set(struct task_struct *task, unsigned long ctrl) if (spectre_v2_user_ibpb == SPECTRE_V2_USER_NONE && spectre_v2_user_stibp == SPECTRE_V2_USER_NONE) return 0; + /* - * Indirect branch speculation is always disabled in strict - * mode. It can neither be enabled if it was force-disabled - * by a previous prctl call. + * With strict mode for both IBPB and STIBP, the instruction + * code paths avoid checking this task flag and instead, + * unconditionally run the instruction. However, STIBP and IBPB + * are independent and either can be set to conditionally + * enabled regardless of the mode of the other. + * + * If either is set to conditional, allow the task flag to be + * updated, unless it was force-disabled by a previous prctl + * call. Currently, this is possible on an AMD CPU which has the + * feature X86_FEATURE_AMD_STIBP_ALWAYS_ON. In this case, if the + * kernel is booted with 'spectre_v2_user=seccomp', then + * spectre_v2_user_ibpb == SPECTRE_V2_USER_SECCOMP and + * spectre_v2_user_stibp == SPECTRE_V2_USER_STRICT_PREFERRED. */ - if (spectre_v2_user_ibpb == SPECTRE_V2_USER_STRICT || - spectre_v2_user_stibp == SPECTRE_V2_USER_STRICT || - spectre_v2_user_stibp == SPECTRE_V2_USER_STRICT_PREFERRED || + if (!is_spec_ib_user_controlled() || task_spec_ib_force_disable(task)) return -EPERM; + task_clear_spec_ib_disable(task); task_update_spec_tif(task); break; @@ -1283,10 +1301,10 @@ static int ib_prctl_set(struct task_struct *task, unsigned long ctrl) if (spectre_v2_user_ibpb == SPECTRE_V2_USER_NONE && spectre_v2_user_stibp == SPECTRE_V2_USER_NONE) return -EPERM; - if (spectre_v2_user_ibpb == SPECTRE_V2_USER_STRICT || - spectre_v2_user_stibp == SPECTRE_V2_USER_STRICT || - spectre_v2_user_stibp == SPECTRE_V2_USER_STRICT_PREFERRED) + + if (!is_spec_ib_user_controlled()) return 0; + task_set_spec_ib_disable(task); if (ctrl == PR_SPEC_FORCE_DISABLE) task_set_spec_ib_force_disable(task); @@ -1351,20 +1369,17 @@ static int ib_prctl_get(struct task_struct *task) if (spectre_v2_user_ibpb == SPECTRE_V2_USER_NONE && spectre_v2_user_stibp == SPECTRE_V2_USER_NONE) return PR_SPEC_ENABLE; - else if (spectre_v2_user_ibpb == SPECTRE_V2_USER_STRICT || - spectre_v2_user_stibp == SPECTRE_V2_USER_STRICT || - spectre_v2_user_stibp == SPECTRE_V2_USER_STRICT_PREFERRED) - return PR_SPEC_DISABLE; - else if (spectre_v2_user_ibpb == SPECTRE_V2_USER_PRCTL || - spectre_v2_user_ibpb == SPECTRE_V2_USER_SECCOMP || - spectre_v2_user_stibp == SPECTRE_V2_USER_PRCTL || - spectre_v2_user_stibp == SPECTRE_V2_USER_SECCOMP) { + else if (is_spec_ib_user_controlled()) { if (task_spec_ib_force_disable(task)) return PR_SPEC_PRCTL | PR_SPEC_FORCE_DISABLE; if (task_spec_ib_disable(task)) return PR_SPEC_PRCTL | PR_SPEC_DISABLE; return PR_SPEC_PRCTL | PR_SPEC_ENABLE; - } else + } else if (spectre_v2_user_ibpb == SPECTRE_V2_USER_STRICT || + spectre_v2_user_stibp == SPECTRE_V2_USER_STRICT || + spectre_v2_user_stibp == SPECTRE_V2_USER_STRICT_PREFERRED) + return PR_SPEC_DISABLE; + else return PR_SPEC_NOT_AFFECTED; } diff --git a/arch/x86/kernel/cpu/cacheinfo.c b/arch/x86/kernel/cpu/cacheinfo.c index 57074cf3ad7c..3ca9be482a9e 100644 --- a/arch/x86/kernel/cpu/cacheinfo.c +++ b/arch/x86/kernel/cpu/cacheinfo.c @@ -580,7 +580,7 @@ static void amd_init_l3_cache(struct _cpuid4_info_regs *this_leaf, int index) if (index < 3) return; - node = amd_get_nb_id(smp_processor_id()); + node = topology_die_id(smp_processor_id()); this_leaf->nb = node_to_amd_nb(node); if (this_leaf->nb && !this_leaf->nb->l3_cache.indices) amd_calc_l3_indices(this_leaf->nb); @@ -646,7 +646,7 @@ static int find_num_cache_leaves(struct cpuinfo_x86 *c) return i; } -void cacheinfo_amd_init_llc_id(struct cpuinfo_x86 *c, int cpu, u8 node_id) +void cacheinfo_amd_init_llc_id(struct cpuinfo_x86 *c, int cpu) { /* * We may have multiple LLCs if L3 caches exist, so check if we @@ -657,7 +657,7 @@ void cacheinfo_amd_init_llc_id(struct cpuinfo_x86 *c, int cpu, u8 node_id) if (c->x86 < 0x17) { /* LLC is at the node level. */ - per_cpu(cpu_llc_id, cpu) = node_id; + per_cpu(cpu_llc_id, cpu) = c->cpu_die_id; } else if (c->x86 == 0x17 && c->x86_model <= 0x1F) { /* * LLC is at the core complex level. @@ -684,7 +684,7 @@ void cacheinfo_amd_init_llc_id(struct cpuinfo_x86 *c, int cpu, u8 node_id) } } -void cacheinfo_hygon_init_llc_id(struct cpuinfo_x86 *c, int cpu, u8 node_id) +void cacheinfo_hygon_init_llc_id(struct cpuinfo_x86 *c, int cpu) { /* * We may have multiple LLCs if L3 caches exist, so check if we diff --git a/arch/x86/kernel/cpu/feat_ctl.c b/arch/x86/kernel/cpu/feat_ctl.c index 29a3bedabd06..3b1b01f2b248 100644 --- a/arch/x86/kernel/cpu/feat_ctl.c +++ b/arch/x86/kernel/cpu/feat_ctl.c @@ -93,16 +93,41 @@ static void init_vmx_capabilities(struct cpuinfo_x86 *c) } #endif /* CONFIG_X86_VMX_FEATURE_NAMES */ +static void clear_sgx_caps(void) +{ + setup_clear_cpu_cap(X86_FEATURE_SGX); + setup_clear_cpu_cap(X86_FEATURE_SGX_LC); +} + +static int __init nosgx(char *str) +{ + clear_sgx_caps(); + + return 0; +} + +early_param("nosgx", nosgx); + void init_ia32_feat_ctl(struct cpuinfo_x86 *c) { bool tboot = tboot_enabled(); + bool enable_sgx; u64 msr; if (rdmsrl_safe(MSR_IA32_FEAT_CTL, &msr)) { clear_cpu_cap(c, X86_FEATURE_VMX); + clear_sgx_caps(); return; } + /* + * Enable SGX if and only if the kernel supports SGX and Launch Control + * is supported, i.e. disable SGX if the LE hash MSRs can't be written. + */ + enable_sgx = cpu_has(c, X86_FEATURE_SGX) && + cpu_has(c, X86_FEATURE_SGX_LC) && + IS_ENABLED(CONFIG_X86_SGX); + if (msr & FEAT_CTL_LOCKED) goto update_caps; @@ -124,13 +149,16 @@ void init_ia32_feat_ctl(struct cpuinfo_x86 *c) msr |= FEAT_CTL_VMX_ENABLED_INSIDE_SMX; } + if (enable_sgx) + msr |= FEAT_CTL_SGX_ENABLED | FEAT_CTL_SGX_LC_ENABLED; + wrmsrl(MSR_IA32_FEAT_CTL, msr); update_caps: set_cpu_cap(c, X86_FEATURE_MSR_IA32_FEAT_CTL); if (!cpu_has(c, X86_FEATURE_VMX)) - return; + goto update_sgx; if ( (tboot && !(msr & FEAT_CTL_VMX_ENABLED_INSIDE_SMX)) || (!tboot && !(msr & FEAT_CTL_VMX_ENABLED_OUTSIDE_SMX))) { @@ -143,4 +171,12 @@ update_caps: init_vmx_capabilities(c); #endif } + +update_sgx: + if (!(msr & FEAT_CTL_SGX_ENABLED) || + !(msr & FEAT_CTL_SGX_LC_ENABLED) || !enable_sgx) { + if (enable_sgx) + pr_err_once("SGX disabled by BIOS\n"); + clear_sgx_caps(); + } } diff --git a/arch/x86/kernel/cpu/hygon.c b/arch/x86/kernel/cpu/hygon.c index ac6c30e5801d..ae59115d18f9 100644 --- a/arch/x86/kernel/cpu/hygon.c +++ b/arch/x86/kernel/cpu/hygon.c @@ -14,9 +14,6 @@ #include <asm/cacheinfo.h> #include <asm/spec-ctrl.h> #include <asm/delay.h> -#ifdef CONFIG_X86_64 -# include <asm/set_memory.h> -#endif #include "cpu.h" @@ -65,7 +62,6 @@ static void hygon_get_topology_early(struct cpuinfo_x86 *c) */ static void hygon_get_topology(struct cpuinfo_x86 *c) { - u8 node_id; int cpu = smp_processor_id(); /* get information required for multi-node processors */ @@ -75,7 +71,7 @@ static void hygon_get_topology(struct cpuinfo_x86 *c) cpuid(0x8000001e, &eax, &ebx, &ecx, &edx); - node_id = ecx & 0xff; + c->cpu_die_id = ecx & 0xff; c->cpu_core_id = ebx & 0xff; @@ -93,14 +89,14 @@ static void hygon_get_topology(struct cpuinfo_x86 *c) /* Socket ID is ApicId[6] for these processors. */ c->phys_proc_id = c->apicid >> APICID_SOCKET_ID_BIT; - cacheinfo_hygon_init_llc_id(c, cpu, node_id); + cacheinfo_hygon_init_llc_id(c, cpu); } else if (cpu_has(c, X86_FEATURE_NODEID_MSR)) { u64 value; rdmsrl(MSR_FAM10H_NODE_ID, value); - node_id = value & 7; + c->cpu_die_id = value & 7; - per_cpu(cpu_llc_id, cpu) = node_id; + per_cpu(cpu_llc_id, cpu) = c->cpu_die_id; } else return; @@ -123,7 +119,7 @@ static void hygon_detect_cmp(struct cpuinfo_x86 *c) /* Convert the initial APIC ID into the socket ID */ c->phys_proc_id = c->initial_apicid >> bits; /* use socket ID also for last level cache */ - per_cpu(cpu_llc_id, cpu) = c->phys_proc_id; + per_cpu(cpu_llc_id, cpu) = c->cpu_die_id = c->phys_proc_id; } static void srat_detect_node(struct cpuinfo_x86 *c) @@ -204,23 +200,6 @@ static void early_init_hygon_mc(struct cpuinfo_x86 *c) static void bsp_init_hygon(struct cpuinfo_x86 *c) { -#ifdef CONFIG_X86_64 - unsigned long long tseg; - - /* - * Split up direct mapping around the TSEG SMM area. - * Don't do it for gbpages because there seems very little - * benefit in doing so. - */ - if (!rdmsrl_safe(MSR_K8_TSEG_ADDR, &tseg)) { - unsigned long pfn = tseg >> PAGE_SHIFT; - - pr_debug("tseg: %010llx\n", tseg); - if (pfn_range_is_mapped(pfn, pfn + 1)) - set_memory_4k((unsigned long)__va(tseg), 1); - } -#endif - if (cpu_has(c, X86_FEATURE_CONSTANT_TSC)) { u64 val; diff --git a/arch/x86/kernel/cpu/mce/amd.c b/arch/x86/kernel/cpu/mce/amd.c index 0c6b02dd744c..e486f96b3cb3 100644 --- a/arch/x86/kernel/cpu/mce/amd.c +++ b/arch/x86/kernel/cpu/mce/amd.c @@ -1341,7 +1341,7 @@ static int threshold_create_bank(struct threshold_bank **bp, unsigned int cpu, return -ENODEV; if (is_shared_bank(bank)) { - nb = node_to_amd_nb(amd_get_nb_id(cpu)); + nb = node_to_amd_nb(topology_die_id(cpu)); /* threshold descriptor already initialized on this node? */ if (nb && nb->bank4) { @@ -1445,7 +1445,7 @@ static void threshold_remove_bank(struct threshold_bank *bank) * The last CPU on this node using the shared bank is going * away, remove that bank now. */ - nb = node_to_amd_nb(amd_get_nb_id(smp_processor_id())); + nb = node_to_amd_nb(topology_die_id(smp_processor_id())); nb->bank4 = NULL; } diff --git a/arch/x86/kernel/cpu/mce/apei.c b/arch/x86/kernel/cpu/mce/apei.c index af8d37962586..b58b85380ddb 100644 --- a/arch/x86/kernel/cpu/mce/apei.c +++ b/arch/x86/kernel/cpu/mce/apei.c @@ -51,6 +51,67 @@ void apei_mce_report_mem_error(int severity, struct cper_sec_mem_err *mem_err) } EXPORT_SYMBOL_GPL(apei_mce_report_mem_error); +int apei_smca_report_x86_error(struct cper_ia_proc_ctx *ctx_info, u64 lapic_id) +{ + const u64 *i_mce = ((const u64 *) (ctx_info + 1)); + unsigned int cpu; + struct mce m; + + if (!boot_cpu_has(X86_FEATURE_SMCA)) + return -EINVAL; + + /* + * The starting address of the register array extracted from BERT must + * match with the first expected register in the register layout of + * SMCA address space. This address corresponds to banks's MCA_STATUS + * register. + * + * Match any MCi_STATUS register by turning off bank numbers. + */ + if ((ctx_info->msr_addr & MSR_AMD64_SMCA_MC0_STATUS) != + MSR_AMD64_SMCA_MC0_STATUS) + return -EINVAL; + + /* + * The register array size must be large enough to include all the + * SMCA registers which need to be extracted. + * + * The number of registers in the register array is determined by + * Register Array Size/8 as defined in UEFI spec v2.8, sec N.2.4.2.2. + * The register layout is fixed and currently the raw data in the + * register array includes 6 SMCA registers which the kernel can + * extract. + */ + if (ctx_info->reg_arr_size < 48) + return -EINVAL; + + mce_setup(&m); + + m.extcpu = -1; + m.socketid = -1; + + for_each_possible_cpu(cpu) { + if (cpu_data(cpu).initial_apicid == lapic_id) { + m.extcpu = cpu; + m.socketid = cpu_data(m.extcpu).phys_proc_id; + break; + } + } + + m.apicid = lapic_id; + m.bank = (ctx_info->msr_addr >> 4) & 0xFF; + m.status = *i_mce; + m.addr = *(i_mce + 1); + m.misc = *(i_mce + 2); + /* Skipping MCA_CONFIG */ + m.ipid = *(i_mce + 4); + m.synd = *(i_mce + 5); + + mce_log(&m); + + return 0; +} + #define CPER_CREATOR_MCE \ GUID_INIT(0x75a574e3, 0x5052, 0x4b29, 0x8a, 0x8e, 0xbe, 0x2c, \ 0x64, 0x90, 0xb8, 0x9d) diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c index 4102b866e7c0..6af6a3c0698f 100644 --- a/arch/x86/kernel/cpu/mce/core.c +++ b/arch/x86/kernel/cpu/mce/core.c @@ -162,7 +162,8 @@ EXPORT_SYMBOL_GPL(mce_log); void mce_register_decode_chain(struct notifier_block *nb) { - if (WARN_ON(nb->priority > MCE_PRIO_MCELOG && nb->priority < MCE_PRIO_EDAC)) + if (WARN_ON(nb->priority < MCE_PRIO_LOWEST || + nb->priority > MCE_PRIO_HIGHEST)) return; blocking_notifier_chain_register(&x86_mce_decoder_chain, nb); @@ -1265,14 +1266,14 @@ static void kill_me_maybe(struct callback_head *cb) } } -static void queue_task_work(struct mce *m, int kill_it) +static void queue_task_work(struct mce *m, int kill_current_task) { current->mce_addr = m->addr; current->mce_kflags = m->kflags; current->mce_ripv = !!(m->mcgstatus & MCG_STATUS_RIPV); current->mce_whole_page = whole_page(m); - if (kill_it) + if (kill_current_task) current->mce_kill_me.func = kill_me_now; else current->mce_kill_me.func = kill_me_maybe; @@ -1320,10 +1321,10 @@ noinstr void do_machine_check(struct pt_regs *regs) int no_way_out = 0; /* - * If kill_it gets set, there might be a way to recover from this + * If kill_current_task is not set, there might be a way to recover from this * error. */ - int kill_it = 0; + int kill_current_task = 0; /* * MCEs are always local on AMD. Same is determined by MCG_STATUS_LMCES @@ -1350,8 +1351,7 @@ noinstr void do_machine_check(struct pt_regs *regs) * severity is MCE_AR_SEVERITY we have other options. */ if (!(m.mcgstatus & MCG_STATUS_RIPV)) - kill_it = 1; - + kill_current_task = (cfg->tolerant == 3) ? 0 : 1; /* * Check if this MCE is signaled to only this logical processor, * on Intel, Zhaoxin only. @@ -1368,7 +1368,7 @@ noinstr void do_machine_check(struct pt_regs *regs) * to see it will clear it. */ if (lmce) { - if (no_way_out) + if (no_way_out && cfg->tolerant < 3) mce_panic("Fatal local machine check", &m, msg); } else { order = mce_start(&no_way_out); @@ -1384,8 +1384,13 @@ noinstr void do_machine_check(struct pt_regs *regs) * When there's any problem use only local no_way_out state. */ if (!lmce) { - if (mce_end(order) < 0) - no_way_out = worst >= MCE_PANIC_SEVERITY; + if (mce_end(order) < 0) { + if (!no_way_out) + no_way_out = worst >= MCE_PANIC_SEVERITY; + + if (no_way_out && cfg->tolerant < 3) + mce_panic("Fatal machine check on current CPU", &m, msg); + } } else { /* * If there was a fatal machine check we should have @@ -1401,19 +1406,7 @@ noinstr void do_machine_check(struct pt_regs *regs) } } - /* - * If tolerant is at an insane level we drop requests to kill - * processes and continue even when there is no way out. - */ - if (cfg->tolerant == 3) - kill_it = 0; - else if (no_way_out) - mce_panic("Fatal machine check on current CPU", &m, msg); - - if (worst > 0) - irq_work_queue(&mce_irq_work); - - if (worst != MCE_AR_SEVERITY && !kill_it) + if (worst != MCE_AR_SEVERITY && !kill_current_task) goto out; /* Fault was in user mode and we need to take some action */ @@ -1421,7 +1414,7 @@ noinstr void do_machine_check(struct pt_regs *regs) /* If this triggers there is no way to recover. Die hard. */ BUG_ON(!on_thread_stack() || !user_mode(regs)); - queue_task_work(&m, kill_it); + queue_task_work(&m, kill_current_task); } else { /* @@ -1439,7 +1432,7 @@ noinstr void do_machine_check(struct pt_regs *regs) } if (m.kflags & MCE_IN_KERNEL_COPYIN) - queue_task_work(&m, kill_it); + queue_task_work(&m, kill_current_task); } out: mce_wrmsrl(MSR_IA32_MCG_STATUS, 0); @@ -1581,7 +1574,7 @@ static void __mcheck_cpu_mce_banks_init(void) * __mcheck_cpu_init_clear_banks() does the final bank setup. */ b->ctl = -1ULL; - b->init = 1; + b->init = true; } } @@ -1762,7 +1755,7 @@ static int __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c) */ if (c->x86 == 6 && c->x86_model < 0x1A && this_cpu_read(mce_num_banks) > 0) - mce_banks[0].init = 0; + mce_banks[0].init = false; /* * All newer Intel systems support MCE broadcasting. Enable @@ -1811,11 +1804,9 @@ static int __mcheck_cpu_ancient_init(struct cpuinfo_x86 *c) case X86_VENDOR_INTEL: intel_p5_mcheck_init(c); return 1; - break; case X86_VENDOR_CENTAUR: winchip_mcheck_init(c); return 1; - break; default: return 0; } diff --git a/arch/x86/kernel/cpu/mce/inject.c b/arch/x86/kernel/cpu/mce/inject.c index 3a44346f2276..7b360731fc2d 100644 --- a/arch/x86/kernel/cpu/mce/inject.c +++ b/arch/x86/kernel/cpu/mce/inject.c @@ -522,8 +522,8 @@ static void do_inject(void) if (boot_cpu_has(X86_FEATURE_AMD_DCM) && b == 4 && boot_cpu_data.x86 < 0x17) { - toggle_nb_mca_mst_cpu(amd_get_nb_id(cpu)); - cpu = get_nbc_for_node(amd_get_nb_id(cpu)); + toggle_nb_mca_mst_cpu(topology_die_id(cpu)); + cpu = get_nbc_for_node(topology_die_id(cpu)); } get_online_cpus(); diff --git a/arch/x86/kernel/cpu/mce/intel.c b/arch/x86/kernel/cpu/mce/intel.c index abe9fe0fb851..c2476fe0682e 100644 --- a/arch/x86/kernel/cpu/mce/intel.c +++ b/arch/x86/kernel/cpu/mce/intel.c @@ -509,12 +509,33 @@ static void intel_ppin_init(struct cpuinfo_x86 *c) } } +/* + * Enable additional error logs from the integrated + * memory controller on processors that support this. + */ +static void intel_imc_init(struct cpuinfo_x86 *c) +{ + u64 error_control; + + switch (c->x86_model) { + case INTEL_FAM6_SANDYBRIDGE_X: + case INTEL_FAM6_IVYBRIDGE_X: + case INTEL_FAM6_HASWELL_X: + if (rdmsrl_safe(MSR_ERROR_CONTROL, &error_control)) + return; + error_control |= 2; + wrmsrl_safe(MSR_ERROR_CONTROL, error_control); + break; + } +} + void mce_intel_feature_init(struct cpuinfo_x86 *c) { intel_init_thermal(c); intel_init_cmci(); intel_init_lmce(); intel_ppin_init(c); + intel_imc_init(c); } void mce_intel_feature_clear(struct cpuinfo_x86 *c) diff --git a/arch/x86/kernel/cpu/microcode/amd.c b/arch/x86/kernel/cpu/microcode/amd.c index 3f6b137ef4e6..3d4a48336084 100644 --- a/arch/x86/kernel/cpu/microcode/amd.c +++ b/arch/x86/kernel/cpu/microcode/amd.c @@ -215,7 +215,6 @@ static unsigned int __verify_patch_size(u8 family, u32 sh_psize, size_t buf_size default: WARN(1, "%s: WTF family: 0x%x\n", __func__, family); return 0; - break; } if (sh_psize > min_t(u32, buf_size, max_size)) diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c index 6a99535d7f37..7e8e07bddd5f 100644 --- a/arch/x86/kernel/cpu/microcode/intel.c +++ b/arch/x86/kernel/cpu/microcode/intel.c @@ -100,53 +100,6 @@ static int has_newer_microcode(void *mc, unsigned int csig, int cpf, int new_rev return find_matching_signature(mc, csig, cpf); } -/* - * Given CPU signature and a microcode patch, this function finds if the - * microcode patch has matching family and model with the CPU. - * - * %true - if there's a match - * %false - otherwise - */ -static bool microcode_matches(struct microcode_header_intel *mc_header, - unsigned long sig) -{ - unsigned long total_size = get_totalsize(mc_header); - unsigned long data_size = get_datasize(mc_header); - struct extended_sigtable *ext_header; - unsigned int fam_ucode, model_ucode; - struct extended_signature *ext_sig; - unsigned int fam, model; - int ext_sigcount, i; - - fam = x86_family(sig); - model = x86_model(sig); - - fam_ucode = x86_family(mc_header->sig); - model_ucode = x86_model(mc_header->sig); - - if (fam == fam_ucode && model == model_ucode) - return true; - - /* Look for ext. headers: */ - if (total_size <= data_size + MC_HEADER_SIZE) - return false; - - ext_header = (void *) mc_header + data_size + MC_HEADER_SIZE; - ext_sig = (void *)ext_header + EXT_HEADER_SIZE; - ext_sigcount = ext_header->count; - - for (i = 0; i < ext_sigcount; i++) { - fam_ucode = x86_family(ext_sig->sig); - model_ucode = x86_model(ext_sig->sig); - - if (fam == fam_ucode && model == model_ucode) - return true; - - ext_sig++; - } - return false; -} - static struct ucode_patch *memdup_patch(void *data, unsigned int size) { struct ucode_patch *p; @@ -164,7 +117,7 @@ static struct ucode_patch *memdup_patch(void *data, unsigned int size) return p; } -static void save_microcode_patch(void *data, unsigned int size) +static void save_microcode_patch(struct ucode_cpu_info *uci, void *data, unsigned int size) { struct microcode_header_intel *mc_hdr, *mc_saved_hdr; struct ucode_patch *iter, *tmp, *p = NULL; @@ -210,6 +163,9 @@ static void save_microcode_patch(void *data, unsigned int size) if (!p) return; + if (!find_matching_signature(p->data, uci->cpu_sig.sig, uci->cpu_sig.pf)) + return; + /* * Save for early loading. On 32-bit, that needs to be a physical * address as the APs are running from physical addresses, before @@ -344,13 +300,14 @@ scan_microcode(void *data, size_t size, struct ucode_cpu_info *uci, bool save) size -= mc_size; - if (!microcode_matches(mc_header, uci->cpu_sig.sig)) { + if (!find_matching_signature(data, uci->cpu_sig.sig, + uci->cpu_sig.pf)) { data += mc_size; continue; } if (save) { - save_microcode_patch(data, mc_size); + save_microcode_patch(uci, data, mc_size); goto next; } @@ -483,14 +440,14 @@ static void show_saved_mc(void) * Save this microcode patch. It will be loaded early when a CPU is * hot-added or resumes. */ -static void save_mc_for_early(u8 *mc, unsigned int size) +static void save_mc_for_early(struct ucode_cpu_info *uci, u8 *mc, unsigned int size) { /* Synchronization during CPU hotplug. */ static DEFINE_MUTEX(x86_cpu_microcode_mutex); mutex_lock(&x86_cpu_microcode_mutex); - save_microcode_patch(mc, size); + save_microcode_patch(uci, mc, size); show_saved_mc(); mutex_unlock(&x86_cpu_microcode_mutex); @@ -935,7 +892,7 @@ static enum ucode_state generic_load_microcode(int cpu, struct iov_iter *iter) * permanent memory. So it will be loaded early when a CPU is hot added * or resumes. */ - save_mc_for_early(new_mc, new_mc_size); + save_mc_for_early(uci, new_mc, new_mc_size); pr_debug("CPU%d found a matching microcode update with version 0x%x (current=0x%x)\n", cpu, new_rev, uci->cpu_sig.rev); diff --git a/arch/x86/kernel/cpu/mtrr/mtrr.c b/arch/x86/kernel/cpu/mtrr/mtrr.c index 6a80f36b5d59..08a30c8e9431 100644 --- a/arch/x86/kernel/cpu/mtrr/mtrr.c +++ b/arch/x86/kernel/cpu/mtrr/mtrr.c @@ -813,7 +813,8 @@ void mtrr_ap_init(void) } /** - * Save current fixed-range MTRR state of the first cpu in cpu_online_mask. + * mtrr_save_state - Save current fixed-range MTRR state of the first + * cpu in cpu_online_mask. */ void mtrr_save_state(void) { diff --git a/arch/x86/kernel/cpu/resctrl/core.c b/arch/x86/kernel/cpu/resctrl/core.c index e5f4ee8f4c3b..698bb26aeb6e 100644 --- a/arch/x86/kernel/cpu/resctrl/core.c +++ b/arch/x86/kernel/cpu/resctrl/core.c @@ -570,6 +570,8 @@ static void domain_add_cpu(int cpu, struct rdt_resource *r) if (d) { cpumask_set_cpu(cpu, &d->cpu_mask); + if (r->cache.arch_has_per_cpu_cfg) + rdt_domain_reconfigure_cdp(r); return; } @@ -893,6 +895,10 @@ static __init void __check_quirks_intel(void) set_rdt_options("!cmt,!mbmtotal,!mbmlocal,!l3cat"); else set_rdt_options("!l3cat"); + fallthrough; + case INTEL_FAM6_BROADWELL_X: + intel_rdt_mbm_apply_quirk(); + break; } } @@ -923,6 +929,7 @@ static __init void rdt_init_res_defs_intel(void) r->rid == RDT_RESOURCE_L2CODE) { r->cache.arch_has_sparse_bitmaps = false; r->cache.arch_has_empty_bitmaps = false; + r->cache.arch_has_per_cpu_cfg = false; } else if (r->rid == RDT_RESOURCE_MBA) { r->msr_base = MSR_IA32_MBA_THRTL_BASE; r->msr_update = mba_wrmsr_intel; @@ -943,6 +950,7 @@ static __init void rdt_init_res_defs_amd(void) r->rid == RDT_RESOURCE_L2CODE) { r->cache.arch_has_sparse_bitmaps = true; r->cache.arch_has_empty_bitmaps = true; + r->cache.arch_has_per_cpu_cfg = true; } else if (r->rid == RDT_RESOURCE_MBA) { r->msr_base = MSR_IA32_MBA_BW_BASE; r->msr_update = mba_wrmsr_amd; diff --git a/arch/x86/kernel/cpu/resctrl/internal.h b/arch/x86/kernel/cpu/resctrl/internal.h index 80fa997fae60..ee71c47844cb 100644 --- a/arch/x86/kernel/cpu/resctrl/internal.h +++ b/arch/x86/kernel/cpu/resctrl/internal.h @@ -264,7 +264,7 @@ void __exit rdtgroup_exit(void); struct rftype { char *name; umode_t mode; - struct kernfs_ops *kf_ops; + const struct kernfs_ops *kf_ops; unsigned long flags; unsigned long fflags; @@ -360,6 +360,8 @@ struct msr_param { * executing entities * @arch_has_sparse_bitmaps: True if a bitmap like f00f is valid. * @arch_has_empty_bitmaps: True if the '0' bitmap is valid. + * @arch_has_per_cpu_cfg: True if QOS_CFG register for this cache + * level has CPU scope. */ struct rdt_cache { unsigned int cbm_len; @@ -369,6 +371,7 @@ struct rdt_cache { unsigned int shareable_bits; bool arch_has_sparse_bitmaps; bool arch_has_empty_bitmaps; + bool arch_has_per_cpu_cfg; }; /** @@ -616,6 +619,7 @@ void mon_event_read(struct rmid_read *rr, struct rdt_resource *r, void mbm_setup_overflow_handler(struct rdt_domain *dom, unsigned long delay_ms); void mbm_handle_overflow(struct work_struct *work); +void __init intel_rdt_mbm_apply_quirk(void); bool is_mba_sc(struct rdt_resource *r); void setup_default_ctrlval(struct rdt_resource *r, u32 *dc, u32 *dm); u32 delay_bw_map(unsigned long bw, struct rdt_resource *r); diff --git a/arch/x86/kernel/cpu/resctrl/monitor.c b/arch/x86/kernel/cpu/resctrl/monitor.c index 54dffe574e67..7ac31210e452 100644 --- a/arch/x86/kernel/cpu/resctrl/monitor.c +++ b/arch/x86/kernel/cpu/resctrl/monitor.c @@ -64,6 +64,69 @@ unsigned int rdt_mon_features; */ unsigned int resctrl_cqm_threshold; +#define CF(cf) ((unsigned long)(1048576 * (cf) + 0.5)) + +/* + * The correction factor table is documented in Documentation/x86/resctrl.rst. + * If rmid > rmid threshold, MBM total and local values should be multiplied + * by the correction factor. + * + * The original table is modified for better code: + * + * 1. The threshold 0 is changed to rmid count - 1 so don't do correction + * for the case. + * 2. MBM total and local correction table indexed by core counter which is + * equal to (x86_cache_max_rmid + 1) / 8 - 1 and is from 0 up to 27. + * 3. The correction factor is normalized to 2^20 (1048576) so it's faster + * to calculate corrected value by shifting: + * corrected_value = (original_value * correction_factor) >> 20 + */ +static const struct mbm_correction_factor_table { + u32 rmidthreshold; + u64 cf; +} mbm_cf_table[] __initdata = { + {7, CF(1.000000)}, + {15, CF(1.000000)}, + {15, CF(0.969650)}, + {31, CF(1.000000)}, + {31, CF(1.066667)}, + {31, CF(0.969650)}, + {47, CF(1.142857)}, + {63, CF(1.000000)}, + {63, CF(1.185115)}, + {63, CF(1.066553)}, + {79, CF(1.454545)}, + {95, CF(1.000000)}, + {95, CF(1.230769)}, + {95, CF(1.142857)}, + {95, CF(1.066667)}, + {127, CF(1.000000)}, + {127, CF(1.254863)}, + {127, CF(1.185255)}, + {151, CF(1.000000)}, + {127, CF(1.066667)}, + {167, CF(1.000000)}, + {159, CF(1.454334)}, + {183, CF(1.000000)}, + {127, CF(0.969744)}, + {191, CF(1.280246)}, + {191, CF(1.230921)}, + {215, CF(1.000000)}, + {191, CF(1.143118)}, +}; + +static u32 mbm_cf_rmidthreshold __read_mostly = UINT_MAX; +static u64 mbm_cf __read_mostly; + +static inline u64 get_corrected_mbm_count(u32 rmid, unsigned long val) +{ + /* Correct MBM value. */ + if (rmid > mbm_cf_rmidthreshold) + val = (val * mbm_cf) >> 20; + + return val; +} + static inline struct rmid_entry *__rmid_entry(u32 rmid) { struct rmid_entry *entry; @@ -260,7 +323,8 @@ static int __mon_event_count(u32 rmid, struct rmid_read *rr) m->chunks += chunks; m->prev_msr = tval; - rr->val += m->chunks; + rr->val += get_corrected_mbm_count(rmid, m->chunks); + return 0; } @@ -279,8 +343,7 @@ static void mbm_bw_count(u32 rmid, struct rmid_read *rr) return; chunks = mbm_overflow_count(m->prev_bw_msr, tval, rr->r->mbm_width); - m->chunks += chunks; - cur_bw = (chunks * r->mon_scale) >> 20; + cur_bw = (get_corrected_mbm_count(rmid, chunks) * r->mon_scale) >> 20; if (m->delta_comp) m->delta_bw = abs(cur_bw - m->prev_bw); @@ -450,15 +513,14 @@ static void mbm_update(struct rdt_resource *r, struct rdt_domain *d, int rmid) } if (is_mbm_local_enabled()) { rr.evtid = QOS_L3_MBM_LOCAL_EVENT_ID; + __mon_event_count(rmid, &rr); /* * Call the MBA software controller only for the * control groups and when user has enabled * the software controller explicitly. */ - if (!is_mba_sc(NULL)) - __mon_event_count(rmid, &rr); - else + if (is_mba_sc(NULL)) mbm_bw_count(rmid, &rr); } } @@ -644,3 +706,17 @@ int rdt_get_mon_l3_config(struct rdt_resource *r) return 0; } + +void __init intel_rdt_mbm_apply_quirk(void) +{ + int cf_index; + + cf_index = (boot_cpu_data.x86_cache_max_rmid + 1) / 8 - 1; + if (cf_index >= ARRAY_SIZE(mbm_cf_table)) { + pr_info("No MBM correction factor available\n"); + return; + } + + mbm_cf_rmidthreshold = mbm_cf_table[cf_index].rmidthreshold; + mbm_cf = mbm_cf_table[cf_index].cf; +} diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c index af323e2e3100..29ffb95b25ff 100644 --- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c +++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c @@ -240,13 +240,13 @@ static ssize_t rdtgroup_file_write(struct kernfs_open_file *of, char *buf, return -EINVAL; } -static struct kernfs_ops rdtgroup_kf_single_ops = { +static const struct kernfs_ops rdtgroup_kf_single_ops = { .atomic_write_len = PAGE_SIZE, .write = rdtgroup_file_write, .seq_show = rdtgroup_seqfile_show, }; -static struct kernfs_ops kf_mondata_ops = { +static const struct kernfs_ops kf_mondata_ops = { .atomic_write_len = PAGE_SIZE, .seq_show = rdtgroup_mondata_show, }; @@ -507,6 +507,24 @@ unlock: return ret ?: nbytes; } +/** + * rdtgroup_remove - the helper to remove resource group safely + * @rdtgrp: resource group to remove + * + * On resource group creation via a mkdir, an extra kernfs_node reference is + * taken to ensure that the rdtgroup structure remains accessible for the + * rdtgroup_kn_unlock() calls where it is removed. + * + * Drop the extra reference here, then free the rdtgroup structure. + * + * Return: void + */ +static void rdtgroup_remove(struct rdtgroup *rdtgrp) +{ + kernfs_put(rdtgrp->kn); + kfree(rdtgrp); +} + struct task_move_callback { struct callback_head work; struct rdtgroup *rdtgrp; @@ -529,7 +547,7 @@ static void move_myself(struct callback_head *head) (rdtgrp->flags & RDT_DELETED)) { current->closid = 0; current->rmid = 0; - kfree(rdtgrp); + rdtgroup_remove(rdtgrp); } if (unlikely(current->flags & PF_EXITING)) @@ -1769,7 +1787,6 @@ static int rdtgroup_mkdir_info_resdir(struct rdt_resource *r, char *name, if (IS_ERR(kn_subdir)) return PTR_ERR(kn_subdir); - kernfs_get(kn_subdir); ret = rdtgroup_kn_set_ugid(kn_subdir); if (ret) return ret; @@ -1792,7 +1809,6 @@ static int rdtgroup_create_info_dir(struct kernfs_node *parent_kn) kn_info = kernfs_create_dir(parent_kn, "info", parent_kn->mode, NULL); if (IS_ERR(kn_info)) return PTR_ERR(kn_info); - kernfs_get(kn_info); ret = rdtgroup_add_files(kn_info, RF_TOP_INFO); if (ret) @@ -1813,12 +1829,6 @@ static int rdtgroup_create_info_dir(struct kernfs_node *parent_kn) goto out_destroy; } - /* - * This extra ref will be put in kernfs_remove() and guarantees - * that @rdtgrp->kn is always accessible. - */ - kernfs_get(kn_info); - ret = rdtgroup_kn_set_ugid(kn_info); if (ret) goto out_destroy; @@ -1847,12 +1857,6 @@ mongroup_create_dir(struct kernfs_node *parent_kn, struct rdtgroup *prgrp, if (dest_kn) *dest_kn = kn; - /* - * This extra ref will be put in kernfs_remove() and guarantees - * that @rdtgrp->kn is always accessible. - */ - kernfs_get(kn); - ret = rdtgroup_kn_set_ugid(kn); if (ret) goto out_destroy; @@ -1905,8 +1909,13 @@ static int set_cache_qos_cfg(int level, bool enable) r_l = &rdt_resources_all[level]; list_for_each_entry(d, &r_l->domains, list) { - /* Pick one CPU from each domain instance to update MSR */ - cpumask_set_cpu(cpumask_any(&d->cpu_mask), cpu_mask); + if (r_l->cache.arch_has_per_cpu_cfg) + /* Pick all the CPUs in the domain instance */ + for_each_cpu(cpu, &d->cpu_mask) + cpumask_set_cpu(cpu, cpu_mask); + else + /* Pick one CPU from each domain instance to update MSR */ + cpumask_set_cpu(cpumask_any(&d->cpu_mask), cpu_mask); } cpu = get_cpu(); /* Update QOS_CFG MSR on this cpu if it's in cpu_mask. */ @@ -2079,8 +2088,7 @@ void rdtgroup_kn_unlock(struct kernfs_node *kn) rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED) rdtgroup_pseudo_lock_remove(rdtgrp); kernfs_unbreak_active_protection(kn); - kernfs_put(rdtgrp->kn); - kfree(rdtgrp); + rdtgroup_remove(rdtgrp); } else { kernfs_unbreak_active_protection(kn); } @@ -2139,13 +2147,11 @@ static int rdt_get_tree(struct fs_context *fc) &kn_mongrp); if (ret < 0) goto out_info; - kernfs_get(kn_mongrp); ret = mkdir_mondata_all(rdtgroup_default.kn, &rdtgroup_default, &kn_mondata); if (ret < 0) goto out_mongrp; - kernfs_get(kn_mondata); rdtgroup_default.mon.mon_data_kn = kn_mondata; } @@ -2357,7 +2363,7 @@ static void free_all_child_rdtgrp(struct rdtgroup *rdtgrp) if (atomic_read(&sentry->waitcount) != 0) sentry->flags = RDT_DELETED; else - kfree(sentry); + rdtgroup_remove(sentry); } } @@ -2399,7 +2405,7 @@ static void rmdir_all_sub(void) if (atomic_read(&rdtgrp->waitcount) != 0) rdtgrp->flags = RDT_DELETED; else - kfree(rdtgrp); + rdtgroup_remove(rdtgrp); } /* Notify online CPUs to update per cpu storage and PQR_ASSOC MSR */ update_closid_rmid(cpu_online_mask, &rdtgroup_default); @@ -2499,11 +2505,6 @@ static int mkdir_mondata_subdir(struct kernfs_node *parent_kn, if (IS_ERR(kn)) return PTR_ERR(kn); - /* - * This extra ref will be put in kernfs_remove() and guarantees - * that kn is always accessible. - */ - kernfs_get(kn); ret = rdtgroup_kn_set_ugid(kn); if (ret) goto out_destroy; @@ -2838,8 +2839,8 @@ static int mkdir_rdt_prepare(struct kernfs_node *parent_kn, /* * kernfs_remove() will drop the reference count on "kn" which * will free it. But we still need it to stick around for the - * rdtgroup_kn_unlock(kn} call below. Take one extra reference - * here, which will be dropped inside rdtgroup_kn_unlock(). + * rdtgroup_kn_unlock(kn) call. Take one extra reference here, + * which will be dropped by kernfs_put() in rdtgroup_remove(). */ kernfs_get(kn); @@ -2880,6 +2881,7 @@ static int mkdir_rdt_prepare(struct kernfs_node *parent_kn, out_idfree: free_rmid(rdtgrp->mon.rmid); out_destroy: + kernfs_put(rdtgrp->kn); kernfs_remove(rdtgrp->kn); out_free_rgrp: kfree(rdtgrp); @@ -2892,7 +2894,7 @@ static void mkdir_rdt_prepare_clean(struct rdtgroup *rgrp) { kernfs_remove(rgrp->kn); free_rmid(rgrp->mon.rmid); - kfree(rgrp); + rdtgroup_remove(rgrp); } /* @@ -3021,8 +3023,7 @@ static int rdtgroup_mkdir(struct kernfs_node *parent_kn, const char *name, return -EPERM; } -static int rdtgroup_rmdir_mon(struct kernfs_node *kn, struct rdtgroup *rdtgrp, - cpumask_var_t tmpmask) +static int rdtgroup_rmdir_mon(struct rdtgroup *rdtgrp, cpumask_var_t tmpmask) { struct rdtgroup *prdtgrp = rdtgrp->mon.parent; int cpu; @@ -3049,33 +3050,21 @@ static int rdtgroup_rmdir_mon(struct kernfs_node *kn, struct rdtgroup *rdtgrp, WARN_ON(list_empty(&prdtgrp->mon.crdtgrp_list)); list_del(&rdtgrp->mon.crdtgrp_list); - /* - * one extra hold on this, will drop when we kfree(rdtgrp) - * in rdtgroup_kn_unlock() - */ - kernfs_get(kn); kernfs_remove(rdtgrp->kn); return 0; } -static int rdtgroup_ctrl_remove(struct kernfs_node *kn, - struct rdtgroup *rdtgrp) +static int rdtgroup_ctrl_remove(struct rdtgroup *rdtgrp) { rdtgrp->flags = RDT_DELETED; list_del(&rdtgrp->rdtgroup_list); - /* - * one extra hold on this, will drop when we kfree(rdtgrp) - * in rdtgroup_kn_unlock() - */ - kernfs_get(kn); kernfs_remove(rdtgrp->kn); return 0; } -static int rdtgroup_rmdir_ctrl(struct kernfs_node *kn, struct rdtgroup *rdtgrp, - cpumask_var_t tmpmask) +static int rdtgroup_rmdir_ctrl(struct rdtgroup *rdtgrp, cpumask_var_t tmpmask) { int cpu; @@ -3102,7 +3091,7 @@ static int rdtgroup_rmdir_ctrl(struct kernfs_node *kn, struct rdtgroup *rdtgrp, closid_free(rdtgrp->closid); free_rmid(rdtgrp->mon.rmid); - rdtgroup_ctrl_remove(kn, rdtgrp); + rdtgroup_ctrl_remove(rdtgrp); /* * Free all the child monitor group rmids. @@ -3139,13 +3128,13 @@ static int rdtgroup_rmdir(struct kernfs_node *kn) rdtgrp != &rdtgroup_default) { if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP || rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED) { - ret = rdtgroup_ctrl_remove(kn, rdtgrp); + ret = rdtgroup_ctrl_remove(rdtgrp); } else { - ret = rdtgroup_rmdir_ctrl(kn, rdtgrp, tmpmask); + ret = rdtgroup_rmdir_ctrl(rdtgrp, tmpmask); } } else if (rdtgrp->type == RDTMON_GROUP && is_mon_groups(parent_kn, kn->name)) { - ret = rdtgroup_rmdir_mon(kn, rdtgrp, tmpmask); + ret = rdtgroup_rmdir_mon(rdtgrp, tmpmask); } else { ret = -EPERM; } diff --git a/arch/x86/kernel/cpu/sgx/Makefile b/arch/x86/kernel/cpu/sgx/Makefile new file mode 100644 index 000000000000..91d3dc784a29 --- /dev/null +++ b/arch/x86/kernel/cpu/sgx/Makefile @@ -0,0 +1,5 @@ +obj-y += \ + driver.o \ + encl.o \ + ioctl.o \ + main.o diff --git a/arch/x86/kernel/cpu/sgx/arch.h b/arch/x86/kernel/cpu/sgx/arch.h new file mode 100644 index 000000000000..dd7602c44c72 --- /dev/null +++ b/arch/x86/kernel/cpu/sgx/arch.h @@ -0,0 +1,338 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/** + * Copyright(c) 2016-20 Intel Corporation. + * + * Contains data structures defined by the SGX architecture. Data structures + * defined by the Linux software stack should not be placed here. + */ +#ifndef _ASM_X86_SGX_ARCH_H +#define _ASM_X86_SGX_ARCH_H + +#include <linux/bits.h> +#include <linux/types.h> + +/* The SGX specific CPUID function. */ +#define SGX_CPUID 0x12 +/* EPC enumeration. */ +#define SGX_CPUID_EPC 2 +/* An invalid EPC section, i.e. the end marker. */ +#define SGX_CPUID_EPC_INVALID 0x0 +/* A valid EPC section. */ +#define SGX_CPUID_EPC_SECTION 0x1 +/* The bitmask for the EPC section type. */ +#define SGX_CPUID_EPC_MASK GENMASK(3, 0) + +/** + * enum sgx_return_code - The return code type for ENCLS, ENCLU and ENCLV + * %SGX_NOT_TRACKED: Previous ETRACK's shootdown sequence has not + * been completed yet. + * %SGX_INVALID_EINITTOKEN: EINITTOKEN is invalid and enclave signer's + * public key does not match IA32_SGXLEPUBKEYHASH. + * %SGX_UNMASKED_EVENT: An unmasked event, e.g. INTR, was received + */ +enum sgx_return_code { + SGX_NOT_TRACKED = 11, + SGX_INVALID_EINITTOKEN = 16, + SGX_UNMASKED_EVENT = 128, +}; + +/* The modulus size for 3072-bit RSA keys. */ +#define SGX_MODULUS_SIZE 384 + +/** + * enum sgx_miscselect - additional information to an SSA frame + * %SGX_MISC_EXINFO: Report #PF or #GP to the SSA frame. + * + * Save State Area (SSA) is a stack inside the enclave used to store processor + * state when an exception or interrupt occurs. This enum defines additional + * information stored to an SSA frame. + */ +enum sgx_miscselect { + SGX_MISC_EXINFO = BIT(0), +}; + +#define SGX_MISC_RESERVED_MASK GENMASK_ULL(63, 1) + +#define SGX_SSA_GPRS_SIZE 184 +#define SGX_SSA_MISC_EXINFO_SIZE 16 + +/** + * enum sgx_attributes - the attributes field in &struct sgx_secs + * %SGX_ATTR_INIT: Enclave can be entered (is initialized). + * %SGX_ATTR_DEBUG: Allow ENCLS(EDBGRD) and ENCLS(EDBGWR). + * %SGX_ATTR_MODE64BIT: Tell that this a 64-bit enclave. + * %SGX_ATTR_PROVISIONKEY: Allow to use provisioning keys for remote + * attestation. + * %SGX_ATTR_KSS: Allow to use key separation and sharing (KSS). + * %SGX_ATTR_EINITTOKENKEY: Allow to use token signing key that is used to + * sign cryptographic tokens that can be passed to + * EINIT as an authorization to run an enclave. + */ +enum sgx_attribute { + SGX_ATTR_INIT = BIT(0), + SGX_ATTR_DEBUG = BIT(1), + SGX_ATTR_MODE64BIT = BIT(2), + SGX_ATTR_PROVISIONKEY = BIT(4), + SGX_ATTR_EINITTOKENKEY = BIT(5), + SGX_ATTR_KSS = BIT(7), +}; + +#define SGX_ATTR_RESERVED_MASK (BIT_ULL(3) | BIT_ULL(6) | GENMASK_ULL(63, 8)) + +/** + * struct sgx_secs - SGX Enclave Control Structure (SECS) + * @size: size of the address space + * @base: base address of the address space + * @ssa_frame_size: size of an SSA frame + * @miscselect: additional information stored to an SSA frame + * @attributes: attributes for enclave + * @xfrm: XSave-Feature Request Mask (subset of XCR0) + * @mrenclave: SHA256-hash of the enclave contents + * @mrsigner: SHA256-hash of the public key used to sign the SIGSTRUCT + * @config_id: a user-defined value that is used in key derivation + * @isv_prod_id: a user-defined value that is used in key derivation + * @isv_svn: a user-defined value that is used in key derivation + * @config_svn: a user-defined value that is used in key derivation + * + * SGX Enclave Control Structure (SECS) is a special enclave page that is not + * visible in the address space. In fact, this structure defines the address + * range and other global attributes for the enclave and it is the first EPC + * page created for any enclave. It is moved from a temporary buffer to an EPC + * by the means of ENCLS[ECREATE] function. + */ +struct sgx_secs { + u64 size; + u64 base; + u32 ssa_frame_size; + u32 miscselect; + u8 reserved1[24]; + u64 attributes; + u64 xfrm; + u32 mrenclave[8]; + u8 reserved2[32]; + u32 mrsigner[8]; + u8 reserved3[32]; + u32 config_id[16]; + u16 isv_prod_id; + u16 isv_svn; + u16 config_svn; + u8 reserved4[3834]; +} __packed; + +/** + * enum sgx_tcs_flags - execution flags for TCS + * %SGX_TCS_DBGOPTIN: If enabled allows single-stepping and breakpoints + * inside an enclave. It is cleared by EADD but can + * be set later with EDBGWR. + */ +enum sgx_tcs_flags { + SGX_TCS_DBGOPTIN = 0x01, +}; + +#define SGX_TCS_RESERVED_MASK GENMASK_ULL(63, 1) +#define SGX_TCS_RESERVED_SIZE 4024 + +/** + * struct sgx_tcs - Thread Control Structure (TCS) + * @state: used to mark an entered TCS + * @flags: execution flags (cleared by EADD) + * @ssa_offset: SSA stack offset relative to the enclave base + * @ssa_index: the current SSA frame index (cleard by EADD) + * @nr_ssa_frames: the number of frame in the SSA stack + * @entry_offset: entry point offset relative to the enclave base + * @exit_addr: address outside the enclave to exit on an exception or + * interrupt + * @fs_offset: offset relative to the enclave base to become FS + * segment inside the enclave + * @gs_offset: offset relative to the enclave base to become GS + * segment inside the enclave + * @fs_limit: size to become a new FS-limit (only 32-bit enclaves) + * @gs_limit: size to become a new GS-limit (only 32-bit enclaves) + * + * Thread Control Structure (TCS) is an enclave page visible in its address + * space that defines an entry point inside the enclave. A thread enters inside + * an enclave by supplying address of TCS to ENCLU(EENTER). A TCS can be entered + * by only one thread at a time. + */ +struct sgx_tcs { + u64 state; + u64 flags; + u64 ssa_offset; + u32 ssa_index; + u32 nr_ssa_frames; + u64 entry_offset; + u64 exit_addr; + u64 fs_offset; + u64 gs_offset; + u32 fs_limit; + u32 gs_limit; + u8 reserved[SGX_TCS_RESERVED_SIZE]; +} __packed; + +/** + * struct sgx_pageinfo - an enclave page descriptor + * @addr: address of the enclave page + * @contents: pointer to the page contents + * @metadata: pointer either to a SECINFO or PCMD instance + * @secs: address of the SECS page + */ +struct sgx_pageinfo { + u64 addr; + u64 contents; + u64 metadata; + u64 secs; +} __packed __aligned(32); + + +/** + * enum sgx_page_type - bits in the SECINFO flags defining the page type + * %SGX_PAGE_TYPE_SECS: a SECS page + * %SGX_PAGE_TYPE_TCS: a TCS page + * %SGX_PAGE_TYPE_REG: a regular page + * %SGX_PAGE_TYPE_VA: a VA page + * %SGX_PAGE_TYPE_TRIM: a page in trimmed state + */ +enum sgx_page_type { + SGX_PAGE_TYPE_SECS, + SGX_PAGE_TYPE_TCS, + SGX_PAGE_TYPE_REG, + SGX_PAGE_TYPE_VA, + SGX_PAGE_TYPE_TRIM, +}; + +#define SGX_NR_PAGE_TYPES 5 +#define SGX_PAGE_TYPE_MASK GENMASK(7, 0) + +/** + * enum sgx_secinfo_flags - the flags field in &struct sgx_secinfo + * %SGX_SECINFO_R: allow read + * %SGX_SECINFO_W: allow write + * %SGX_SECINFO_X: allow execution + * %SGX_SECINFO_SECS: a SECS page + * %SGX_SECINFO_TCS: a TCS page + * %SGX_SECINFO_REG: a regular page + * %SGX_SECINFO_VA: a VA page + * %SGX_SECINFO_TRIM: a page in trimmed state + */ +enum sgx_secinfo_flags { + SGX_SECINFO_R = BIT(0), + SGX_SECINFO_W = BIT(1), + SGX_SECINFO_X = BIT(2), + SGX_SECINFO_SECS = (SGX_PAGE_TYPE_SECS << 8), + SGX_SECINFO_TCS = (SGX_PAGE_TYPE_TCS << 8), + SGX_SECINFO_REG = (SGX_PAGE_TYPE_REG << 8), + SGX_SECINFO_VA = (SGX_PAGE_TYPE_VA << 8), + SGX_SECINFO_TRIM = (SGX_PAGE_TYPE_TRIM << 8), +}; + +#define SGX_SECINFO_PERMISSION_MASK GENMASK_ULL(2, 0) +#define SGX_SECINFO_PAGE_TYPE_MASK (SGX_PAGE_TYPE_MASK << 8) +#define SGX_SECINFO_RESERVED_MASK ~(SGX_SECINFO_PERMISSION_MASK | \ + SGX_SECINFO_PAGE_TYPE_MASK) + +/** + * struct sgx_secinfo - describes attributes of an EPC page + * @flags: permissions and type + * + * Used together with ENCLS leaves that add or modify an EPC page to an + * enclave to define page permissions and type. + */ +struct sgx_secinfo { + u64 flags; + u8 reserved[56]; +} __packed __aligned(64); + +#define SGX_PCMD_RESERVED_SIZE 40 + +/** + * struct sgx_pcmd - Paging Crypto Metadata (PCMD) + * @enclave_id: enclave identifier + * @mac: MAC over PCMD, page contents and isvsvn + * + * PCMD is stored for every swapped page to the regular memory. When ELDU loads + * the page back it recalculates the MAC by using a isvsvn number stored in a + * VA page. Together these two structures bring integrity and rollback + * protection. + */ +struct sgx_pcmd { + struct sgx_secinfo secinfo; + u64 enclave_id; + u8 reserved[SGX_PCMD_RESERVED_SIZE]; + u8 mac[16]; +} __packed __aligned(128); + +#define SGX_SIGSTRUCT_RESERVED1_SIZE 84 +#define SGX_SIGSTRUCT_RESERVED2_SIZE 20 +#define SGX_SIGSTRUCT_RESERVED3_SIZE 32 +#define SGX_SIGSTRUCT_RESERVED4_SIZE 12 + +/** + * struct sgx_sigstruct_header - defines author of the enclave + * @header1: constant byte string + * @vendor: must be either 0x0000 or 0x8086 + * @date: YYYYMMDD in BCD + * @header2: costant byte string + * @swdefined: software defined value + */ +struct sgx_sigstruct_header { + u64 header1[2]; + u32 vendor; + u32 date; + u64 header2[2]; + u32 swdefined; + u8 reserved1[84]; +} __packed; + +/** + * struct sgx_sigstruct_body - defines contents of the enclave + * @miscselect: additional information stored to an SSA frame + * @misc_mask: required miscselect in SECS + * @attributes: attributes for enclave + * @xfrm: XSave-Feature Request Mask (subset of XCR0) + * @attributes_mask: required attributes in SECS + * @xfrm_mask: required XFRM in SECS + * @mrenclave: SHA256-hash of the enclave contents + * @isvprodid: a user-defined value that is used in key derivation + * @isvsvn: a user-defined value that is used in key derivation + */ +struct sgx_sigstruct_body { + u32 miscselect; + u32 misc_mask; + u8 reserved2[20]; + u64 attributes; + u64 xfrm; + u64 attributes_mask; + u64 xfrm_mask; + u8 mrenclave[32]; + u8 reserved3[32]; + u16 isvprodid; + u16 isvsvn; +} __packed; + +/** + * struct sgx_sigstruct - an enclave signature + * @header: defines author of the enclave + * @modulus: the modulus of the public key + * @exponent: the exponent of the public key + * @signature: the signature calculated over the fields except modulus, + * @body: defines contents of the enclave + * @q1: a value used in RSA signature verification + * @q2: a value used in RSA signature verification + * + * Header and body are the parts that are actual signed. The remaining fields + * define the signature of the enclave. + */ +struct sgx_sigstruct { + struct sgx_sigstruct_header header; + u8 modulus[SGX_MODULUS_SIZE]; + u32 exponent; + u8 signature[SGX_MODULUS_SIZE]; + struct sgx_sigstruct_body body; + u8 reserved4[12]; + u8 q1[SGX_MODULUS_SIZE]; + u8 q2[SGX_MODULUS_SIZE]; +} __packed; + +#define SGX_LAUNCH_TOKEN_SIZE 304 + +#endif /* _ASM_X86_SGX_ARCH_H */ diff --git a/arch/x86/kernel/cpu/sgx/driver.c b/arch/x86/kernel/cpu/sgx/driver.c new file mode 100644 index 000000000000..f2eac41bb4ff --- /dev/null +++ b/arch/x86/kernel/cpu/sgx/driver.c @@ -0,0 +1,194 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright(c) 2016-20 Intel Corporation. */ + +#include <linux/acpi.h> +#include <linux/miscdevice.h> +#include <linux/mman.h> +#include <linux/security.h> +#include <linux/suspend.h> +#include <asm/traps.h> +#include "driver.h" +#include "encl.h" + +u64 sgx_attributes_reserved_mask; +u64 sgx_xfrm_reserved_mask = ~0x3; +u32 sgx_misc_reserved_mask; + +static int sgx_open(struct inode *inode, struct file *file) +{ + struct sgx_encl *encl; + int ret; + + encl = kzalloc(sizeof(*encl), GFP_KERNEL); + if (!encl) + return -ENOMEM; + + kref_init(&encl->refcount); + xa_init(&encl->page_array); + mutex_init(&encl->lock); + INIT_LIST_HEAD(&encl->va_pages); + INIT_LIST_HEAD(&encl->mm_list); + spin_lock_init(&encl->mm_lock); + + ret = init_srcu_struct(&encl->srcu); + if (ret) { + kfree(encl); + return ret; + } + + file->private_data = encl; + + return 0; +} + +static int sgx_release(struct inode *inode, struct file *file) +{ + struct sgx_encl *encl = file->private_data; + struct sgx_encl_mm *encl_mm; + + /* + * Drain the remaining mm_list entries. At this point the list contains + * entries for processes, which have closed the enclave file but have + * not exited yet. The processes, which have exited, are gone from the + * list by sgx_mmu_notifier_release(). + */ + for ( ; ; ) { + spin_lock(&encl->mm_lock); + + if (list_empty(&encl->mm_list)) { + encl_mm = NULL; + } else { + encl_mm = list_first_entry(&encl->mm_list, + struct sgx_encl_mm, list); + list_del_rcu(&encl_mm->list); + } + + spin_unlock(&encl->mm_lock); + + /* The enclave is no longer mapped by any mm. */ + if (!encl_mm) + break; + + synchronize_srcu(&encl->srcu); + mmu_notifier_unregister(&encl_mm->mmu_notifier, encl_mm->mm); + kfree(encl_mm); + } + + kref_put(&encl->refcount, sgx_encl_release); + return 0; +} + +static int sgx_mmap(struct file *file, struct vm_area_struct *vma) +{ + struct sgx_encl *encl = file->private_data; + int ret; + + ret = sgx_encl_may_map(encl, vma->vm_start, vma->vm_end, vma->vm_flags); + if (ret) + return ret; + + ret = sgx_encl_mm_add(encl, vma->vm_mm); + if (ret) + return ret; + + vma->vm_ops = &sgx_vm_ops; + vma->vm_flags |= VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP | VM_IO; + vma->vm_private_data = encl; + + return 0; +} + +static unsigned long sgx_get_unmapped_area(struct file *file, + unsigned long addr, + unsigned long len, + unsigned long pgoff, + unsigned long flags) +{ + if ((flags & MAP_TYPE) == MAP_PRIVATE) + return -EINVAL; + + if (flags & MAP_FIXED) + return addr; + + return current->mm->get_unmapped_area(file, addr, len, pgoff, flags); +} + +#ifdef CONFIG_COMPAT +static long sgx_compat_ioctl(struct file *filep, unsigned int cmd, + unsigned long arg) +{ + return sgx_ioctl(filep, cmd, arg); +} +#endif + +static const struct file_operations sgx_encl_fops = { + .owner = THIS_MODULE, + .open = sgx_open, + .release = sgx_release, + .unlocked_ioctl = sgx_ioctl, +#ifdef CONFIG_COMPAT + .compat_ioctl = sgx_compat_ioctl, +#endif + .mmap = sgx_mmap, + .get_unmapped_area = sgx_get_unmapped_area, +}; + +const struct file_operations sgx_provision_fops = { + .owner = THIS_MODULE, +}; + +static struct miscdevice sgx_dev_enclave = { + .minor = MISC_DYNAMIC_MINOR, + .name = "sgx_enclave", + .nodename = "sgx_enclave", + .fops = &sgx_encl_fops, +}; + +static struct miscdevice sgx_dev_provision = { + .minor = MISC_DYNAMIC_MINOR, + .name = "sgx_provision", + .nodename = "sgx_provision", + .fops = &sgx_provision_fops, +}; + +int __init sgx_drv_init(void) +{ + unsigned int eax, ebx, ecx, edx; + u64 attr_mask; + u64 xfrm_mask; + int ret; + + if (!cpu_feature_enabled(X86_FEATURE_SGX_LC)) + return -ENODEV; + + cpuid_count(SGX_CPUID, 0, &eax, &ebx, &ecx, &edx); + + if (!(eax & 1)) { + pr_err("SGX disabled: SGX1 instruction support not available.\n"); + return -ENODEV; + } + + sgx_misc_reserved_mask = ~ebx | SGX_MISC_RESERVED_MASK; + + cpuid_count(SGX_CPUID, 1, &eax, &ebx, &ecx, &edx); + + attr_mask = (((u64)ebx) << 32) + (u64)eax; + sgx_attributes_reserved_mask = ~attr_mask | SGX_ATTR_RESERVED_MASK; + + if (cpu_feature_enabled(X86_FEATURE_OSXSAVE)) { + xfrm_mask = (((u64)edx) << 32) + (u64)ecx; + sgx_xfrm_reserved_mask = ~xfrm_mask; + } + + ret = misc_register(&sgx_dev_enclave); + if (ret) + return ret; + + ret = misc_register(&sgx_dev_provision); + if (ret) { + misc_deregister(&sgx_dev_enclave); + return ret; + } + + return 0; +} diff --git a/arch/x86/kernel/cpu/sgx/driver.h b/arch/x86/kernel/cpu/sgx/driver.h new file mode 100644 index 000000000000..4eddb4d571ef --- /dev/null +++ b/arch/x86/kernel/cpu/sgx/driver.h @@ -0,0 +1,29 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __ARCH_SGX_DRIVER_H__ +#define __ARCH_SGX_DRIVER_H__ + +#include <crypto/hash.h> +#include <linux/kref.h> +#include <linux/mmu_notifier.h> +#include <linux/radix-tree.h> +#include <linux/rwsem.h> +#include <linux/sched.h> +#include <linux/workqueue.h> +#include <uapi/asm/sgx.h> +#include "sgx.h" + +#define SGX_EINIT_SPIN_COUNT 20 +#define SGX_EINIT_SLEEP_COUNT 50 +#define SGX_EINIT_SLEEP_TIME 20 + +extern u64 sgx_attributes_reserved_mask; +extern u64 sgx_xfrm_reserved_mask; +extern u32 sgx_misc_reserved_mask; + +extern const struct file_operations sgx_provision_fops; + +long sgx_ioctl(struct file *filep, unsigned int cmd, unsigned long arg); + +int sgx_drv_init(void); + +#endif /* __ARCH_X86_SGX_DRIVER_H__ */ diff --git a/arch/x86/kernel/cpu/sgx/encl.c b/arch/x86/kernel/cpu/sgx/encl.c new file mode 100644 index 000000000000..ee50a5010277 --- /dev/null +++ b/arch/x86/kernel/cpu/sgx/encl.c @@ -0,0 +1,740 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright(c) 2016-20 Intel Corporation. */ + +#include <linux/lockdep.h> +#include <linux/mm.h> +#include <linux/mman.h> +#include <linux/shmem_fs.h> +#include <linux/suspend.h> +#include <linux/sched/mm.h> +#include "arch.h" +#include "encl.h" +#include "encls.h" +#include "sgx.h" + +/* + * ELDU: Load an EPC page as unblocked. For more info, see "OS Management of EPC + * Pages" in the SDM. + */ +static int __sgx_encl_eldu(struct sgx_encl_page *encl_page, + struct sgx_epc_page *epc_page, + struct sgx_epc_page *secs_page) +{ + unsigned long va_offset = encl_page->desc & SGX_ENCL_PAGE_VA_OFFSET_MASK; + struct sgx_encl *encl = encl_page->encl; + struct sgx_pageinfo pginfo; + struct sgx_backing b; + pgoff_t page_index; + int ret; + + if (secs_page) + page_index = PFN_DOWN(encl_page->desc - encl_page->encl->base); + else + page_index = PFN_DOWN(encl->size); + + ret = sgx_encl_get_backing(encl, page_index, &b); + if (ret) + return ret; + + pginfo.addr = encl_page->desc & PAGE_MASK; + pginfo.contents = (unsigned long)kmap_atomic(b.contents); + pginfo.metadata = (unsigned long)kmap_atomic(b.pcmd) + + b.pcmd_offset; + + if (secs_page) + pginfo.secs = (u64)sgx_get_epc_virt_addr(secs_page); + else + pginfo.secs = 0; + + ret = __eldu(&pginfo, sgx_get_epc_virt_addr(epc_page), + sgx_get_epc_virt_addr(encl_page->va_page->epc_page) + va_offset); + if (ret) { + if (encls_failed(ret)) + ENCLS_WARN(ret, "ELDU"); + + ret = -EFAULT; + } + + kunmap_atomic((void *)(unsigned long)(pginfo.metadata - b.pcmd_offset)); + kunmap_atomic((void *)(unsigned long)pginfo.contents); + + sgx_encl_put_backing(&b, false); + + return ret; +} + +static struct sgx_epc_page *sgx_encl_eldu(struct sgx_encl_page *encl_page, + struct sgx_epc_page *secs_page) +{ + + unsigned long va_offset = encl_page->desc & SGX_ENCL_PAGE_VA_OFFSET_MASK; + struct sgx_encl *encl = encl_page->encl; + struct sgx_epc_page *epc_page; + int ret; + + epc_page = sgx_alloc_epc_page(encl_page, false); + if (IS_ERR(epc_page)) + return epc_page; + + ret = __sgx_encl_eldu(encl_page, epc_page, secs_page); + if (ret) { + sgx_free_epc_page(epc_page); + return ERR_PTR(ret); + } + + sgx_free_va_slot(encl_page->va_page, va_offset); + list_move(&encl_page->va_page->list, &encl->va_pages); + encl_page->desc &= ~SGX_ENCL_PAGE_VA_OFFSET_MASK; + encl_page->epc_page = epc_page; + + return epc_page; +} + +static struct sgx_encl_page *sgx_encl_load_page(struct sgx_encl *encl, + unsigned long addr, + unsigned long vm_flags) +{ + unsigned long vm_prot_bits = vm_flags & (VM_READ | VM_WRITE | VM_EXEC); + struct sgx_epc_page *epc_page; + struct sgx_encl_page *entry; + + entry = xa_load(&encl->page_array, PFN_DOWN(addr)); + if (!entry) + return ERR_PTR(-EFAULT); + + /* + * Verify that the faulted page has equal or higher build time + * permissions than the VMA permissions (i.e. the subset of {VM_READ, + * VM_WRITE, VM_EXECUTE} in vma->vm_flags). + */ + if ((entry->vm_max_prot_bits & vm_prot_bits) != vm_prot_bits) + return ERR_PTR(-EFAULT); + + /* Entry successfully located. */ + if (entry->epc_page) { + if (entry->desc & SGX_ENCL_PAGE_BEING_RECLAIMED) + return ERR_PTR(-EBUSY); + + return entry; + } + + if (!(encl->secs.epc_page)) { + epc_page = sgx_encl_eldu(&encl->secs, NULL); + if (IS_ERR(epc_page)) + return ERR_CAST(epc_page); + } + + epc_page = sgx_encl_eldu(entry, encl->secs.epc_page); + if (IS_ERR(epc_page)) + return ERR_CAST(epc_page); + + encl->secs_child_cnt++; + sgx_mark_page_reclaimable(entry->epc_page); + + return entry; +} + +static vm_fault_t sgx_vma_fault(struct vm_fault *vmf) +{ + unsigned long addr = (unsigned long)vmf->address; + struct vm_area_struct *vma = vmf->vma; + struct sgx_encl_page *entry; + unsigned long phys_addr; + struct sgx_encl *encl; + unsigned long pfn; + vm_fault_t ret; + + encl = vma->vm_private_data; + + /* + * It's very unlikely but possible that allocating memory for the + * mm_list entry of a forked process failed in sgx_vma_open(). When + * this happens, vm_private_data is set to NULL. + */ + if (unlikely(!encl)) + return VM_FAULT_SIGBUS; + + mutex_lock(&encl->lock); + + entry = sgx_encl_load_page(encl, addr, vma->vm_flags); + if (IS_ERR(entry)) { + mutex_unlock(&encl->lock); + + if (PTR_ERR(entry) == -EBUSY) + return VM_FAULT_NOPAGE; + + return VM_FAULT_SIGBUS; + } + + phys_addr = sgx_get_epc_phys_addr(entry->epc_page); + + /* Check if another thread got here first to insert the PTE. */ + if (!follow_pfn(vma, addr, &pfn)) { + mutex_unlock(&encl->lock); + + return VM_FAULT_NOPAGE; + } + + ret = vmf_insert_pfn(vma, addr, PFN_DOWN(phys_addr)); + if (ret != VM_FAULT_NOPAGE) { + mutex_unlock(&encl->lock); + + return VM_FAULT_SIGBUS; + } + + sgx_encl_test_and_clear_young(vma->vm_mm, entry); + mutex_unlock(&encl->lock); + + return VM_FAULT_NOPAGE; +} + +static void sgx_vma_open(struct vm_area_struct *vma) +{ + struct sgx_encl *encl = vma->vm_private_data; + + /* + * It's possible but unlikely that vm_private_data is NULL. This can + * happen in a grandchild of a process, when sgx_encl_mm_add() had + * failed to allocate memory in this callback. + */ + if (unlikely(!encl)) + return; + + if (sgx_encl_mm_add(encl, vma->vm_mm)) + vma->vm_private_data = NULL; +} + + +/** + * sgx_encl_may_map() - Check if a requested VMA mapping is allowed + * @encl: an enclave pointer + * @start: lower bound of the address range, inclusive + * @end: upper bound of the address range, exclusive + * @vm_flags: VMA flags + * + * Iterate through the enclave pages contained within [@start, @end) to verify + * that the permissions requested by a subset of {VM_READ, VM_WRITE, VM_EXEC} + * do not contain any permissions that are not contained in the build time + * permissions of any of the enclave pages within the given address range. + * + * An enclave creator must declare the strongest permissions that will be + * needed for each enclave page. This ensures that mappings have the identical + * or weaker permissions than the earlier declared permissions. + * + * Return: 0 on success, -EACCES otherwise + */ +int sgx_encl_may_map(struct sgx_encl *encl, unsigned long start, + unsigned long end, unsigned long vm_flags) +{ + unsigned long vm_prot_bits = vm_flags & (VM_READ | VM_WRITE | VM_EXEC); + struct sgx_encl_page *page; + unsigned long count = 0; + int ret = 0; + + XA_STATE(xas, &encl->page_array, PFN_DOWN(start)); + + /* + * Disallow READ_IMPLIES_EXEC tasks as their VMA permissions might + * conflict with the enclave page permissions. + */ + if (current->personality & READ_IMPLIES_EXEC) + return -EACCES; + + mutex_lock(&encl->lock); + xas_lock(&xas); + xas_for_each(&xas, page, PFN_DOWN(end - 1)) { + if (~page->vm_max_prot_bits & vm_prot_bits) { + ret = -EACCES; + break; + } + + /* Reschedule on every XA_CHECK_SCHED iteration. */ + if (!(++count % XA_CHECK_SCHED)) { + xas_pause(&xas); + xas_unlock(&xas); + mutex_unlock(&encl->lock); + + cond_resched(); + + mutex_lock(&encl->lock); + xas_lock(&xas); + } + } + xas_unlock(&xas); + mutex_unlock(&encl->lock); + + return ret; +} + +static int sgx_vma_mprotect(struct vm_area_struct *vma, unsigned long start, + unsigned long end, unsigned long newflags) +{ + return sgx_encl_may_map(vma->vm_private_data, start, end, newflags); +} + +static int sgx_encl_debug_read(struct sgx_encl *encl, struct sgx_encl_page *page, + unsigned long addr, void *data) +{ + unsigned long offset = addr & ~PAGE_MASK; + int ret; + + + ret = __edbgrd(sgx_get_epc_virt_addr(page->epc_page) + offset, data); + if (ret) + return -EIO; + + return 0; +} + +static int sgx_encl_debug_write(struct sgx_encl *encl, struct sgx_encl_page *page, + unsigned long addr, void *data) +{ + unsigned long offset = addr & ~PAGE_MASK; + int ret; + + ret = __edbgwr(sgx_get_epc_virt_addr(page->epc_page) + offset, data); + if (ret) + return -EIO; + + return 0; +} + +/* + * Load an enclave page to EPC if required, and take encl->lock. + */ +static struct sgx_encl_page *sgx_encl_reserve_page(struct sgx_encl *encl, + unsigned long addr, + unsigned long vm_flags) +{ + struct sgx_encl_page *entry; + + for ( ; ; ) { + mutex_lock(&encl->lock); + + entry = sgx_encl_load_page(encl, addr, vm_flags); + if (PTR_ERR(entry) != -EBUSY) + break; + + mutex_unlock(&encl->lock); + } + + if (IS_ERR(entry)) + mutex_unlock(&encl->lock); + + return entry; +} + +static int sgx_vma_access(struct vm_area_struct *vma, unsigned long addr, + void *buf, int len, int write) +{ + struct sgx_encl *encl = vma->vm_private_data; + struct sgx_encl_page *entry = NULL; + char data[sizeof(unsigned long)]; + unsigned long align; + int offset; + int cnt; + int ret = 0; + int i; + + /* + * If process was forked, VMA is still there but vm_private_data is set + * to NULL. + */ + if (!encl) + return -EFAULT; + + if (!test_bit(SGX_ENCL_DEBUG, &encl->flags)) + return -EFAULT; + + for (i = 0; i < len; i += cnt) { + entry = sgx_encl_reserve_page(encl, (addr + i) & PAGE_MASK, + vma->vm_flags); + if (IS_ERR(entry)) { + ret = PTR_ERR(entry); + break; + } + + align = ALIGN_DOWN(addr + i, sizeof(unsigned long)); + offset = (addr + i) & (sizeof(unsigned long) - 1); + cnt = sizeof(unsigned long) - offset; + cnt = min(cnt, len - i); + + ret = sgx_encl_debug_read(encl, entry, align, data); + if (ret) + goto out; + + if (write) { + memcpy(data + offset, buf + i, cnt); + ret = sgx_encl_debug_write(encl, entry, align, data); + if (ret) + goto out; + } else { + memcpy(buf + i, data + offset, cnt); + } + +out: + mutex_unlock(&encl->lock); + + if (ret) + break; + } + + return ret < 0 ? ret : i; +} + +const struct vm_operations_struct sgx_vm_ops = { + .fault = sgx_vma_fault, + .mprotect = sgx_vma_mprotect, + .open = sgx_vma_open, + .access = sgx_vma_access, +}; + +/** + * sgx_encl_release - Destroy an enclave instance + * @kref: address of a kref inside &sgx_encl + * + * Used together with kref_put(). Frees all the resources associated with the + * enclave and the instance itself. + */ +void sgx_encl_release(struct kref *ref) +{ + struct sgx_encl *encl = container_of(ref, struct sgx_encl, refcount); + struct sgx_va_page *va_page; + struct sgx_encl_page *entry; + unsigned long index; + + xa_for_each(&encl->page_array, index, entry) { + if (entry->epc_page) { + /* + * The page and its radix tree entry cannot be freed + * if the page is being held by the reclaimer. + */ + if (sgx_unmark_page_reclaimable(entry->epc_page)) + continue; + + sgx_free_epc_page(entry->epc_page); + encl->secs_child_cnt--; + entry->epc_page = NULL; + } + + kfree(entry); + } + + xa_destroy(&encl->page_array); + + if (!encl->secs_child_cnt && encl->secs.epc_page) { + sgx_free_epc_page(encl->secs.epc_page); + encl->secs.epc_page = NULL; + } + + while (!list_empty(&encl->va_pages)) { + va_page = list_first_entry(&encl->va_pages, struct sgx_va_page, + list); + list_del(&va_page->list); + sgx_free_epc_page(va_page->epc_page); + kfree(va_page); + } + + if (encl->backing) + fput(encl->backing); + + cleanup_srcu_struct(&encl->srcu); + + WARN_ON_ONCE(!list_empty(&encl->mm_list)); + + /* Detect EPC page leak's. */ + WARN_ON_ONCE(encl->secs_child_cnt); + WARN_ON_ONCE(encl->secs.epc_page); + + kfree(encl); +} + +/* + * 'mm' is exiting and no longer needs mmu notifications. + */ +static void sgx_mmu_notifier_release(struct mmu_notifier *mn, + struct mm_struct *mm) +{ + struct sgx_encl_mm *encl_mm = container_of(mn, struct sgx_encl_mm, mmu_notifier); + struct sgx_encl_mm *tmp = NULL; + + /* + * The enclave itself can remove encl_mm. Note, objects can't be moved + * off an RCU protected list, but deletion is ok. + */ + spin_lock(&encl_mm->encl->mm_lock); + list_for_each_entry(tmp, &encl_mm->encl->mm_list, list) { + if (tmp == encl_mm) { + list_del_rcu(&encl_mm->list); + break; + } + } + spin_unlock(&encl_mm->encl->mm_lock); + + if (tmp == encl_mm) { + synchronize_srcu(&encl_mm->encl->srcu); + mmu_notifier_put(mn); + } +} + +static void sgx_mmu_notifier_free(struct mmu_notifier *mn) +{ + struct sgx_encl_mm *encl_mm = container_of(mn, struct sgx_encl_mm, mmu_notifier); + + kfree(encl_mm); +} + +static const struct mmu_notifier_ops sgx_mmu_notifier_ops = { + .release = sgx_mmu_notifier_release, + .free_notifier = sgx_mmu_notifier_free, +}; + +static struct sgx_encl_mm *sgx_encl_find_mm(struct sgx_encl *encl, + struct mm_struct *mm) +{ + struct sgx_encl_mm *encl_mm = NULL; + struct sgx_encl_mm *tmp; + int idx; + + idx = srcu_read_lock(&encl->srcu); + + list_for_each_entry_rcu(tmp, &encl->mm_list, list) { + if (tmp->mm == mm) { + encl_mm = tmp; + break; + } + } + + srcu_read_unlock(&encl->srcu, idx); + + return encl_mm; +} + +int sgx_encl_mm_add(struct sgx_encl *encl, struct mm_struct *mm) +{ + struct sgx_encl_mm *encl_mm; + int ret; + + /* + * Even though a single enclave may be mapped into an mm more than once, + * each 'mm' only appears once on encl->mm_list. This is guaranteed by + * holding the mm's mmap lock for write before an mm can be added or + * remove to an encl->mm_list. + */ + mmap_assert_write_locked(mm); + + /* + * It's possible that an entry already exists in the mm_list, because it + * is removed only on VFS release or process exit. + */ + if (sgx_encl_find_mm(encl, mm)) + return 0; + + encl_mm = kzalloc(sizeof(*encl_mm), GFP_KERNEL); + if (!encl_mm) + return -ENOMEM; + + encl_mm->encl = encl; + encl_mm->mm = mm; + encl_mm->mmu_notifier.ops = &sgx_mmu_notifier_ops; + + ret = __mmu_notifier_register(&encl_mm->mmu_notifier, mm); + if (ret) { + kfree(encl_mm); + return ret; + } + + spin_lock(&encl->mm_lock); + list_add_rcu(&encl_mm->list, &encl->mm_list); + /* Pairs with smp_rmb() in sgx_reclaimer_block(). */ + smp_wmb(); + encl->mm_list_version++; + spin_unlock(&encl->mm_lock); + + return 0; +} + +static struct page *sgx_encl_get_backing_page(struct sgx_encl *encl, + pgoff_t index) +{ + struct inode *inode = encl->backing->f_path.dentry->d_inode; + struct address_space *mapping = inode->i_mapping; + gfp_t gfpmask = mapping_gfp_mask(mapping); + + return shmem_read_mapping_page_gfp(mapping, index, gfpmask); +} + +/** + * sgx_encl_get_backing() - Pin the backing storage + * @encl: an enclave pointer + * @page_index: enclave page index + * @backing: data for accessing backing storage for the page + * + * Pin the backing storage pages for storing the encrypted contents and Paging + * Crypto MetaData (PCMD) of an enclave page. + * + * Return: + * 0 on success, + * -errno otherwise. + */ +int sgx_encl_get_backing(struct sgx_encl *encl, unsigned long page_index, + struct sgx_backing *backing) +{ + pgoff_t pcmd_index = PFN_DOWN(encl->size) + 1 + (page_index >> 5); + struct page *contents; + struct page *pcmd; + + contents = sgx_encl_get_backing_page(encl, page_index); + if (IS_ERR(contents)) + return PTR_ERR(contents); + + pcmd = sgx_encl_get_backing_page(encl, pcmd_index); + if (IS_ERR(pcmd)) { + put_page(contents); + return PTR_ERR(pcmd); + } + + backing->page_index = page_index; + backing->contents = contents; + backing->pcmd = pcmd; + backing->pcmd_offset = + (page_index & (PAGE_SIZE / sizeof(struct sgx_pcmd) - 1)) * + sizeof(struct sgx_pcmd); + + return 0; +} + +/** + * sgx_encl_put_backing() - Unpin the backing storage + * @backing: data for accessing backing storage for the page + * @do_write: mark pages dirty + */ +void sgx_encl_put_backing(struct sgx_backing *backing, bool do_write) +{ + if (do_write) { + set_page_dirty(backing->pcmd); + set_page_dirty(backing->contents); + } + + put_page(backing->pcmd); + put_page(backing->contents); +} + +static int sgx_encl_test_and_clear_young_cb(pte_t *ptep, unsigned long addr, + void *data) +{ + pte_t pte; + int ret; + + ret = pte_young(*ptep); + if (ret) { + pte = pte_mkold(*ptep); + set_pte_at((struct mm_struct *)data, addr, ptep, pte); + } + + return ret; +} + +/** + * sgx_encl_test_and_clear_young() - Test and reset the accessed bit + * @mm: mm_struct that is checked + * @page: enclave page to be tested for recent access + * + * Checks the Access (A) bit from the PTE corresponding to the enclave page and + * clears it. + * + * Return: 1 if the page has been recently accessed and 0 if not. + */ +int sgx_encl_test_and_clear_young(struct mm_struct *mm, + struct sgx_encl_page *page) +{ + unsigned long addr = page->desc & PAGE_MASK; + struct sgx_encl *encl = page->encl; + struct vm_area_struct *vma; + int ret; + + ret = sgx_encl_find(mm, addr, &vma); + if (ret) + return 0; + + if (encl != vma->vm_private_data) + return 0; + + ret = apply_to_page_range(vma->vm_mm, addr, PAGE_SIZE, + sgx_encl_test_and_clear_young_cb, vma->vm_mm); + if (ret < 0) + return 0; + + return ret; +} + +/** + * sgx_alloc_va_page() - Allocate a Version Array (VA) page + * + * Allocate a free EPC page and convert it to a Version Array (VA) page. + * + * Return: + * a VA page, + * -errno otherwise + */ +struct sgx_epc_page *sgx_alloc_va_page(void) +{ + struct sgx_epc_page *epc_page; + int ret; + + epc_page = sgx_alloc_epc_page(NULL, true); + if (IS_ERR(epc_page)) + return ERR_CAST(epc_page); + + ret = __epa(sgx_get_epc_virt_addr(epc_page)); + if (ret) { + WARN_ONCE(1, "EPA returned %d (0x%x)", ret, ret); + sgx_free_epc_page(epc_page); + return ERR_PTR(-EFAULT); + } + + return epc_page; +} + +/** + * sgx_alloc_va_slot - allocate a VA slot + * @va_page: a &struct sgx_va_page instance + * + * Allocates a slot from a &struct sgx_va_page instance. + * + * Return: offset of the slot inside the VA page + */ +unsigned int sgx_alloc_va_slot(struct sgx_va_page *va_page) +{ + int slot = find_first_zero_bit(va_page->slots, SGX_VA_SLOT_COUNT); + + if (slot < SGX_VA_SLOT_COUNT) + set_bit(slot, va_page->slots); + + return slot << 3; +} + +/** + * sgx_free_va_slot - free a VA slot + * @va_page: a &struct sgx_va_page instance + * @offset: offset of the slot inside the VA page + * + * Frees a slot from a &struct sgx_va_page instance. + */ +void sgx_free_va_slot(struct sgx_va_page *va_page, unsigned int offset) +{ + clear_bit(offset >> 3, va_page->slots); +} + +/** + * sgx_va_page_full - is the VA page full? + * @va_page: a &struct sgx_va_page instance + * + * Return: true if all slots have been taken + */ +bool sgx_va_page_full(struct sgx_va_page *va_page) +{ + int slot = find_first_zero_bit(va_page->slots, SGX_VA_SLOT_COUNT); + + return slot == SGX_VA_SLOT_COUNT; +} diff --git a/arch/x86/kernel/cpu/sgx/encl.h b/arch/x86/kernel/cpu/sgx/encl.h new file mode 100644 index 000000000000..d8d30ccbef4c --- /dev/null +++ b/arch/x86/kernel/cpu/sgx/encl.h @@ -0,0 +1,119 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/** + * Copyright(c) 2016-20 Intel Corporation. + * + * Contains the software defined data structures for enclaves. + */ +#ifndef _X86_ENCL_H +#define _X86_ENCL_H + +#include <linux/cpumask.h> +#include <linux/kref.h> +#include <linux/list.h> +#include <linux/mm_types.h> +#include <linux/mmu_notifier.h> +#include <linux/mutex.h> +#include <linux/notifier.h> +#include <linux/srcu.h> +#include <linux/workqueue.h> +#include <linux/xarray.h> +#include "sgx.h" + +/* 'desc' bits holding the offset in the VA (version array) page. */ +#define SGX_ENCL_PAGE_VA_OFFSET_MASK GENMASK_ULL(11, 3) + +/* 'desc' bit marking that the page is being reclaimed. */ +#define SGX_ENCL_PAGE_BEING_RECLAIMED BIT(3) + +struct sgx_encl_page { + unsigned long desc; + unsigned long vm_max_prot_bits; + struct sgx_epc_page *epc_page; + struct sgx_encl *encl; + struct sgx_va_page *va_page; +}; + +enum sgx_encl_flags { + SGX_ENCL_IOCTL = BIT(0), + SGX_ENCL_DEBUG = BIT(1), + SGX_ENCL_CREATED = BIT(2), + SGX_ENCL_INITIALIZED = BIT(3), +}; + +struct sgx_encl_mm { + struct sgx_encl *encl; + struct mm_struct *mm; + struct list_head list; + struct mmu_notifier mmu_notifier; +}; + +struct sgx_encl { + unsigned long base; + unsigned long size; + unsigned long flags; + unsigned int page_cnt; + unsigned int secs_child_cnt; + struct mutex lock; + struct xarray page_array; + struct sgx_encl_page secs; + unsigned long attributes; + unsigned long attributes_mask; + + cpumask_t cpumask; + struct file *backing; + struct kref refcount; + struct list_head va_pages; + unsigned long mm_list_version; + struct list_head mm_list; + spinlock_t mm_lock; + struct srcu_struct srcu; +}; + +#define SGX_VA_SLOT_COUNT 512 + +struct sgx_va_page { + struct sgx_epc_page *epc_page; + DECLARE_BITMAP(slots, SGX_VA_SLOT_COUNT); + struct list_head list; +}; + +struct sgx_backing { + pgoff_t page_index; + struct page *contents; + struct page *pcmd; + unsigned long pcmd_offset; +}; + +extern const struct vm_operations_struct sgx_vm_ops; + +static inline int sgx_encl_find(struct mm_struct *mm, unsigned long addr, + struct vm_area_struct **vma) +{ + struct vm_area_struct *result; + + result = find_vma(mm, addr); + if (!result || result->vm_ops != &sgx_vm_ops || addr < result->vm_start) + return -EINVAL; + + *vma = result; + + return 0; +} + +int sgx_encl_may_map(struct sgx_encl *encl, unsigned long start, + unsigned long end, unsigned long vm_flags); + +void sgx_encl_release(struct kref *ref); +int sgx_encl_mm_add(struct sgx_encl *encl, struct mm_struct *mm); +int sgx_encl_get_backing(struct sgx_encl *encl, unsigned long page_index, + struct sgx_backing *backing); +void sgx_encl_put_backing(struct sgx_backing *backing, bool do_write); +int sgx_encl_test_and_clear_young(struct mm_struct *mm, + struct sgx_encl_page *page); + +struct sgx_epc_page *sgx_alloc_va_page(void); +unsigned int sgx_alloc_va_slot(struct sgx_va_page *va_page); +void sgx_free_va_slot(struct sgx_va_page *va_page, unsigned int offset); +bool sgx_va_page_full(struct sgx_va_page *va_page); + +#endif /* _X86_ENCL_H */ diff --git a/arch/x86/kernel/cpu/sgx/encls.h b/arch/x86/kernel/cpu/sgx/encls.h new file mode 100644 index 000000000000..443188fe7e70 --- /dev/null +++ b/arch/x86/kernel/cpu/sgx/encls.h @@ -0,0 +1,231 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _X86_ENCLS_H +#define _X86_ENCLS_H + +#include <linux/bitops.h> +#include <linux/err.h> +#include <linux/io.h> +#include <linux/rwsem.h> +#include <linux/types.h> +#include <asm/asm.h> +#include <asm/traps.h> +#include "sgx.h" + +enum sgx_encls_function { + ECREATE = 0x00, + EADD = 0x01, + EINIT = 0x02, + EREMOVE = 0x03, + EDGBRD = 0x04, + EDGBWR = 0x05, + EEXTEND = 0x06, + ELDU = 0x08, + EBLOCK = 0x09, + EPA = 0x0A, + EWB = 0x0B, + ETRACK = 0x0C, +}; + +/** + * ENCLS_FAULT_FLAG - flag signifying an ENCLS return code is a trapnr + * + * ENCLS has its own (positive value) error codes and also generates + * ENCLS specific #GP and #PF faults. And the ENCLS values get munged + * with system error codes as everything percolates back up the stack. + * Unfortunately (for us), we need to precisely identify each unique + * error code, e.g. the action taken if EWB fails varies based on the + * type of fault and on the exact SGX error code, i.e. we can't simply + * convert all faults to -EFAULT. + * + * To make all three error types coexist, we set bit 30 to identify an + * ENCLS fault. Bit 31 (technically bits N:31) is used to differentiate + * between positive (faults and SGX error codes) and negative (system + * error codes) values. + */ +#define ENCLS_FAULT_FLAG 0x40000000 + +/* Retrieve the encoded trapnr from the specified return code. */ +#define ENCLS_TRAPNR(r) ((r) & ~ENCLS_FAULT_FLAG) + +/* Issue a WARN() about an ENCLS function. */ +#define ENCLS_WARN(r, name) { \ + do { \ + int _r = (r); \ + WARN_ONCE(_r, "%s returned %d (0x%x)\n", (name), _r, _r); \ + } while (0); \ +} + +/** + * encls_failed() - Check if an ENCLS function failed + * @ret: the return value of an ENCLS function call + * + * Check if an ENCLS function failed. This happens when the function causes a + * fault that is not caused by an EPCM conflict or when the function returns a + * non-zero value. + */ +static inline bool encls_failed(int ret) +{ + if (ret & ENCLS_FAULT_FLAG) + return ENCLS_TRAPNR(ret) != X86_TRAP_PF; + + return !!ret; +} + +/** + * __encls_ret_N - encode an ENCLS function that returns an error code in EAX + * @rax: function number + * @inputs: asm inputs for the function + * + * Emit assembly for an ENCLS function that returns an error code, e.g. EREMOVE. + * And because SGX isn't complex enough as it is, function that return an error + * code also modify flags. + * + * Return: + * 0 on success, + * SGX error code on failure + */ +#define __encls_ret_N(rax, inputs...) \ + ({ \ + int ret; \ + asm volatile( \ + "1: .byte 0x0f, 0x01, 0xcf;\n\t" \ + "2:\n" \ + ".section .fixup,\"ax\"\n" \ + "3: orl $"__stringify(ENCLS_FAULT_FLAG)",%%eax\n" \ + " jmp 2b\n" \ + ".previous\n" \ + _ASM_EXTABLE_FAULT(1b, 3b) \ + : "=a"(ret) \ + : "a"(rax), inputs \ + : "memory", "cc"); \ + ret; \ + }) + +#define __encls_ret_1(rax, rcx) \ + ({ \ + __encls_ret_N(rax, "c"(rcx)); \ + }) + +#define __encls_ret_2(rax, rbx, rcx) \ + ({ \ + __encls_ret_N(rax, "b"(rbx), "c"(rcx)); \ + }) + +#define __encls_ret_3(rax, rbx, rcx, rdx) \ + ({ \ + __encls_ret_N(rax, "b"(rbx), "c"(rcx), "d"(rdx)); \ + }) + +/** + * __encls_N - encode an ENCLS function that doesn't return an error code + * @rax: function number + * @rbx_out: optional output variable + * @inputs: asm inputs for the function + * + * Emit assembly for an ENCLS function that does not return an error code, e.g. + * ECREATE. Leaves without error codes either succeed or fault. @rbx_out is an + * optional parameter for use by EDGBRD, which returns the requested value in + * RBX. + * + * Return: + * 0 on success, + * trapnr with ENCLS_FAULT_FLAG set on fault + */ +#define __encls_N(rax, rbx_out, inputs...) \ + ({ \ + int ret; \ + asm volatile( \ + "1: .byte 0x0f, 0x01, 0xcf;\n\t" \ + " xor %%eax,%%eax;\n" \ + "2:\n" \ + ".section .fixup,\"ax\"\n" \ + "3: orl $"__stringify(ENCLS_FAULT_FLAG)",%%eax\n" \ + " jmp 2b\n" \ + ".previous\n" \ + _ASM_EXTABLE_FAULT(1b, 3b) \ + : "=a"(ret), "=b"(rbx_out) \ + : "a"(rax), inputs \ + : "memory"); \ + ret; \ + }) + +#define __encls_2(rax, rbx, rcx) \ + ({ \ + unsigned long ign_rbx_out; \ + __encls_N(rax, ign_rbx_out, "b"(rbx), "c"(rcx)); \ + }) + +#define __encls_1_1(rax, data, rcx) \ + ({ \ + unsigned long rbx_out; \ + int ret = __encls_N(rax, rbx_out, "c"(rcx)); \ + if (!ret) \ + data = rbx_out; \ + ret; \ + }) + +static inline int __ecreate(struct sgx_pageinfo *pginfo, void *secs) +{ + return __encls_2(ECREATE, pginfo, secs); +} + +static inline int __eextend(void *secs, void *addr) +{ + return __encls_2(EEXTEND, secs, addr); +} + +static inline int __eadd(struct sgx_pageinfo *pginfo, void *addr) +{ + return __encls_2(EADD, pginfo, addr); +} + +static inline int __einit(void *sigstruct, void *token, void *secs) +{ + return __encls_ret_3(EINIT, sigstruct, secs, token); +} + +static inline int __eremove(void *addr) +{ + return __encls_ret_1(EREMOVE, addr); +} + +static inline int __edbgwr(void *addr, unsigned long *data) +{ + return __encls_2(EDGBWR, *data, addr); +} + +static inline int __edbgrd(void *addr, unsigned long *data) +{ + return __encls_1_1(EDGBRD, *data, addr); +} + +static inline int __etrack(void *addr) +{ + return __encls_ret_1(ETRACK, addr); +} + +static inline int __eldu(struct sgx_pageinfo *pginfo, void *addr, + void *va) +{ + return __encls_ret_3(ELDU, pginfo, addr, va); +} + +static inline int __eblock(void *addr) +{ + return __encls_ret_1(EBLOCK, addr); +} + +static inline int __epa(void *addr) +{ + unsigned long rbx = SGX_PAGE_TYPE_VA; + + return __encls_2(EPA, rbx, addr); +} + +static inline int __ewb(struct sgx_pageinfo *pginfo, void *addr, + void *va) +{ + return __encls_ret_3(EWB, pginfo, addr, va); +} + +#endif /* _X86_ENCLS_H */ diff --git a/arch/x86/kernel/cpu/sgx/ioctl.c b/arch/x86/kernel/cpu/sgx/ioctl.c new file mode 100644 index 000000000000..90a5caf76939 --- /dev/null +++ b/arch/x86/kernel/cpu/sgx/ioctl.c @@ -0,0 +1,716 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright(c) 2016-20 Intel Corporation. */ + +#include <asm/mman.h> +#include <linux/mman.h> +#include <linux/delay.h> +#include <linux/file.h> +#include <linux/hashtable.h> +#include <linux/highmem.h> +#include <linux/ratelimit.h> +#include <linux/sched/signal.h> +#include <linux/shmem_fs.h> +#include <linux/slab.h> +#include <linux/suspend.h> +#include "driver.h" +#include "encl.h" +#include "encls.h" + +static struct sgx_va_page *sgx_encl_grow(struct sgx_encl *encl) +{ + struct sgx_va_page *va_page = NULL; + void *err; + + BUILD_BUG_ON(SGX_VA_SLOT_COUNT != + (SGX_ENCL_PAGE_VA_OFFSET_MASK >> 3) + 1); + + if (!(encl->page_cnt % SGX_VA_SLOT_COUNT)) { + va_page = kzalloc(sizeof(*va_page), GFP_KERNEL); + if (!va_page) + return ERR_PTR(-ENOMEM); + + va_page->epc_page = sgx_alloc_va_page(); + if (IS_ERR(va_page->epc_page)) { + err = ERR_CAST(va_page->epc_page); + kfree(va_page); + return err; + } + + WARN_ON_ONCE(encl->page_cnt % SGX_VA_SLOT_COUNT); + } + encl->page_cnt++; + return va_page; +} + +static void sgx_encl_shrink(struct sgx_encl *encl, struct sgx_va_page *va_page) +{ + encl->page_cnt--; + + if (va_page) { + sgx_free_epc_page(va_page->epc_page); + list_del(&va_page->list); + kfree(va_page); + } +} + +static int sgx_encl_create(struct sgx_encl *encl, struct sgx_secs *secs) +{ + struct sgx_epc_page *secs_epc; + struct sgx_va_page *va_page; + struct sgx_pageinfo pginfo; + struct sgx_secinfo secinfo; + unsigned long encl_size; + struct file *backing; + long ret; + + va_page = sgx_encl_grow(encl); + if (IS_ERR(va_page)) + return PTR_ERR(va_page); + else if (va_page) + list_add(&va_page->list, &encl->va_pages); + /* else the tail page of the VA page list had free slots. */ + + /* The extra page goes to SECS. */ + encl_size = secs->size + PAGE_SIZE; + + backing = shmem_file_setup("SGX backing", encl_size + (encl_size >> 5), + VM_NORESERVE); + if (IS_ERR(backing)) { + ret = PTR_ERR(backing); + goto err_out_shrink; + } + + encl->backing = backing; + + secs_epc = sgx_alloc_epc_page(&encl->secs, true); + if (IS_ERR(secs_epc)) { + ret = PTR_ERR(secs_epc); + goto err_out_backing; + } + + encl->secs.epc_page = secs_epc; + + pginfo.addr = 0; + pginfo.contents = (unsigned long)secs; + pginfo.metadata = (unsigned long)&secinfo; + pginfo.secs = 0; + memset(&secinfo, 0, sizeof(secinfo)); + + ret = __ecreate((void *)&pginfo, sgx_get_epc_virt_addr(secs_epc)); + if (ret) { + ret = -EIO; + goto err_out; + } + + if (secs->attributes & SGX_ATTR_DEBUG) + set_bit(SGX_ENCL_DEBUG, &encl->flags); + + encl->secs.encl = encl; + encl->base = secs->base; + encl->size = secs->size; + encl->attributes = secs->attributes; + encl->attributes_mask = SGX_ATTR_DEBUG | SGX_ATTR_MODE64BIT | SGX_ATTR_KSS; + + /* Set only after completion, as encl->lock has not been taken. */ + set_bit(SGX_ENCL_CREATED, &encl->flags); + + return 0; + +err_out: + sgx_free_epc_page(encl->secs.epc_page); + encl->secs.epc_page = NULL; + +err_out_backing: + fput(encl->backing); + encl->backing = NULL; + +err_out_shrink: + sgx_encl_shrink(encl, va_page); + + return ret; +} + +/** + * sgx_ioc_enclave_create() - handler for %SGX_IOC_ENCLAVE_CREATE + * @encl: An enclave pointer. + * @arg: The ioctl argument. + * + * Allocate kernel data structures for the enclave and invoke ECREATE. + * + * Return: + * - 0: Success. + * - -EIO: ECREATE failed. + * - -errno: POSIX error. + */ +static long sgx_ioc_enclave_create(struct sgx_encl *encl, void __user *arg) +{ + struct sgx_enclave_create create_arg; + void *secs; + int ret; + + if (test_bit(SGX_ENCL_CREATED, &encl->flags)) + return -EINVAL; + + if (copy_from_user(&create_arg, arg, sizeof(create_arg))) + return -EFAULT; + + secs = kmalloc(PAGE_SIZE, GFP_KERNEL); + if (!secs) + return -ENOMEM; + + if (copy_from_user(secs, (void __user *)create_arg.src, PAGE_SIZE)) + ret = -EFAULT; + else + ret = sgx_encl_create(encl, secs); + + kfree(secs); + return ret; +} + +static struct sgx_encl_page *sgx_encl_page_alloc(struct sgx_encl *encl, + unsigned long offset, + u64 secinfo_flags) +{ + struct sgx_encl_page *encl_page; + unsigned long prot; + + encl_page = kzalloc(sizeof(*encl_page), GFP_KERNEL); + if (!encl_page) + return ERR_PTR(-ENOMEM); + + encl_page->desc = encl->base + offset; + encl_page->encl = encl; + + prot = _calc_vm_trans(secinfo_flags, SGX_SECINFO_R, PROT_READ) | + _calc_vm_trans(secinfo_flags, SGX_SECINFO_W, PROT_WRITE) | + _calc_vm_trans(secinfo_flags, SGX_SECINFO_X, PROT_EXEC); + + /* + * TCS pages must always RW set for CPU access while the SECINFO + * permissions are *always* zero - the CPU ignores the user provided + * values and silently overwrites them with zero permissions. + */ + if ((secinfo_flags & SGX_SECINFO_PAGE_TYPE_MASK) == SGX_SECINFO_TCS) + prot |= PROT_READ | PROT_WRITE; + + /* Calculate maximum of the VM flags for the page. */ + encl_page->vm_max_prot_bits = calc_vm_prot_bits(prot, 0); + + return encl_page; +} + +static int sgx_validate_secinfo(struct sgx_secinfo *secinfo) +{ + u64 perm = secinfo->flags & SGX_SECINFO_PERMISSION_MASK; + u64 pt = secinfo->flags & SGX_SECINFO_PAGE_TYPE_MASK; + + if (pt != SGX_SECINFO_REG && pt != SGX_SECINFO_TCS) + return -EINVAL; + + if ((perm & SGX_SECINFO_W) && !(perm & SGX_SECINFO_R)) + return -EINVAL; + + /* + * CPU will silently overwrite the permissions as zero, which means + * that we need to validate it ourselves. + */ + if (pt == SGX_SECINFO_TCS && perm) + return -EINVAL; + + if (secinfo->flags & SGX_SECINFO_RESERVED_MASK) + return -EINVAL; + + if (memchr_inv(secinfo->reserved, 0, sizeof(secinfo->reserved))) + return -EINVAL; + + return 0; +} + +static int __sgx_encl_add_page(struct sgx_encl *encl, + struct sgx_encl_page *encl_page, + struct sgx_epc_page *epc_page, + struct sgx_secinfo *secinfo, unsigned long src) +{ + struct sgx_pageinfo pginfo; + struct vm_area_struct *vma; + struct page *src_page; + int ret; + + /* Deny noexec. */ + vma = find_vma(current->mm, src); + if (!vma) + return -EFAULT; + + if (!(vma->vm_flags & VM_MAYEXEC)) + return -EACCES; + + ret = get_user_pages(src, 1, 0, &src_page, NULL); + if (ret < 1) + return -EFAULT; + + pginfo.secs = (unsigned long)sgx_get_epc_virt_addr(encl->secs.epc_page); + pginfo.addr = encl_page->desc & PAGE_MASK; + pginfo.metadata = (unsigned long)secinfo; + pginfo.contents = (unsigned long)kmap_atomic(src_page); + + ret = __eadd(&pginfo, sgx_get_epc_virt_addr(epc_page)); + + kunmap_atomic((void *)pginfo.contents); + put_page(src_page); + + return ret ? -EIO : 0; +} + +/* + * If the caller requires measurement of the page as a proof for the content, + * use EEXTEND to add a measurement for 256 bytes of the page. Repeat this + * operation until the entire page is measured." + */ +static int __sgx_encl_extend(struct sgx_encl *encl, + struct sgx_epc_page *epc_page) +{ + unsigned long offset; + int ret; + + for (offset = 0; offset < PAGE_SIZE; offset += SGX_EEXTEND_BLOCK_SIZE) { + ret = __eextend(sgx_get_epc_virt_addr(encl->secs.epc_page), + sgx_get_epc_virt_addr(epc_page) + offset); + if (ret) { + if (encls_failed(ret)) + ENCLS_WARN(ret, "EEXTEND"); + + return -EIO; + } + } + + return 0; +} + +static int sgx_encl_add_page(struct sgx_encl *encl, unsigned long src, + unsigned long offset, struct sgx_secinfo *secinfo, + unsigned long flags) +{ + struct sgx_encl_page *encl_page; + struct sgx_epc_page *epc_page; + struct sgx_va_page *va_page; + int ret; + + encl_page = sgx_encl_page_alloc(encl, offset, secinfo->flags); + if (IS_ERR(encl_page)) + return PTR_ERR(encl_page); + + epc_page = sgx_alloc_epc_page(encl_page, true); + if (IS_ERR(epc_page)) { + kfree(encl_page); + return PTR_ERR(epc_page); + } + + va_page = sgx_encl_grow(encl); + if (IS_ERR(va_page)) { + ret = PTR_ERR(va_page); + goto err_out_free; + } + + mmap_read_lock(current->mm); + mutex_lock(&encl->lock); + + /* + * Adding to encl->va_pages must be done under encl->lock. Ditto for + * deleting (via sgx_encl_shrink()) in the error path. + */ + if (va_page) + list_add(&va_page->list, &encl->va_pages); + + /* + * Insert prior to EADD in case of OOM. EADD modifies MRENCLAVE, i.e. + * can't be gracefully unwound, while failure on EADD/EXTEND is limited + * to userspace errors (or kernel/hardware bugs). + */ + ret = xa_insert(&encl->page_array, PFN_DOWN(encl_page->desc), + encl_page, GFP_KERNEL); + if (ret) + goto err_out_unlock; + + ret = __sgx_encl_add_page(encl, encl_page, epc_page, secinfo, + src); + if (ret) + goto err_out; + + /* + * Complete the "add" before doing the "extend" so that the "add" + * isn't in a half-baked state in the extremely unlikely scenario + * the enclave will be destroyed in response to EEXTEND failure. + */ + encl_page->encl = encl; + encl_page->epc_page = epc_page; + encl->secs_child_cnt++; + + if (flags & SGX_PAGE_MEASURE) { + ret = __sgx_encl_extend(encl, epc_page); + if (ret) + goto err_out; + } + + sgx_mark_page_reclaimable(encl_page->epc_page); + mutex_unlock(&encl->lock); + mmap_read_unlock(current->mm); + return ret; + +err_out: + xa_erase(&encl->page_array, PFN_DOWN(encl_page->desc)); + +err_out_unlock: + sgx_encl_shrink(encl, va_page); + mutex_unlock(&encl->lock); + mmap_read_unlock(current->mm); + +err_out_free: + sgx_free_epc_page(epc_page); + kfree(encl_page); + + return ret; +} + +/** + * sgx_ioc_enclave_add_pages() - The handler for %SGX_IOC_ENCLAVE_ADD_PAGES + * @encl: an enclave pointer + * @arg: a user pointer to a struct sgx_enclave_add_pages instance + * + * Add one or more pages to an uninitialized enclave, and optionally extend the + * measurement with the contents of the page. The SECINFO and measurement mask + * are applied to all pages. + * + * A SECINFO for a TCS is required to always contain zero permissions because + * CPU silently zeros them. Allowing anything else would cause a mismatch in + * the measurement. + * + * mmap()'s protection bits are capped by the page permissions. For each page + * address, the maximum protection bits are computed with the following + * heuristics: + * + * 1. A regular page: PROT_R, PROT_W and PROT_X match the SECINFO permissions. + * 2. A TCS page: PROT_R | PROT_W. + * + * mmap() is not allowed to surpass the minimum of the maximum protection bits + * within the given address range. + * + * The function deinitializes kernel data structures for enclave and returns + * -EIO in any of the following conditions: + * + * - Enclave Page Cache (EPC), the physical memory holding enclaves, has + * been invalidated. This will cause EADD and EEXTEND to fail. + * - If the source address is corrupted somehow when executing EADD. + * + * Return: + * - 0: Success. + * - -EACCES: The source page is located in a noexec partition. + * - -ENOMEM: Out of EPC pages. + * - -EINTR: The call was interrupted before data was processed. + * - -EIO: Either EADD or EEXTEND failed because invalid source address + * or power cycle. + * - -errno: POSIX error. + */ +static long sgx_ioc_enclave_add_pages(struct sgx_encl *encl, void __user *arg) +{ + struct sgx_enclave_add_pages add_arg; + struct sgx_secinfo secinfo; + unsigned long c; + int ret; + + if (!test_bit(SGX_ENCL_CREATED, &encl->flags) || + test_bit(SGX_ENCL_INITIALIZED, &encl->flags)) + return -EINVAL; + + if (copy_from_user(&add_arg, arg, sizeof(add_arg))) + return -EFAULT; + + if (!IS_ALIGNED(add_arg.offset, PAGE_SIZE) || + !IS_ALIGNED(add_arg.src, PAGE_SIZE)) + return -EINVAL; + + if (!add_arg.length || add_arg.length & (PAGE_SIZE - 1)) + return -EINVAL; + + if (add_arg.offset + add_arg.length - PAGE_SIZE >= encl->size) + return -EINVAL; + + if (copy_from_user(&secinfo, (void __user *)add_arg.secinfo, + sizeof(secinfo))) + return -EFAULT; + + if (sgx_validate_secinfo(&secinfo)) + return -EINVAL; + + for (c = 0 ; c < add_arg.length; c += PAGE_SIZE) { + if (signal_pending(current)) { + if (!c) + ret = -ERESTARTSYS; + + break; + } + + if (need_resched()) + cond_resched(); + + ret = sgx_encl_add_page(encl, add_arg.src + c, add_arg.offset + c, + &secinfo, add_arg.flags); + if (ret) + break; + } + + add_arg.count = c; + + if (copy_to_user(arg, &add_arg, sizeof(add_arg))) + return -EFAULT; + + return ret; +} + +static int __sgx_get_key_hash(struct crypto_shash *tfm, const void *modulus, + void *hash) +{ + SHASH_DESC_ON_STACK(shash, tfm); + + shash->tfm = tfm; + + return crypto_shash_digest(shash, modulus, SGX_MODULUS_SIZE, hash); +} + +static int sgx_get_key_hash(const void *modulus, void *hash) +{ + struct crypto_shash *tfm; + int ret; + + tfm = crypto_alloc_shash("sha256", 0, CRYPTO_ALG_ASYNC); + if (IS_ERR(tfm)) + return PTR_ERR(tfm); + + ret = __sgx_get_key_hash(tfm, modulus, hash); + + crypto_free_shash(tfm); + return ret; +} + +static int sgx_encl_init(struct sgx_encl *encl, struct sgx_sigstruct *sigstruct, + void *token) +{ + u64 mrsigner[4]; + int i, j, k; + void *addr; + int ret; + + /* + * Deny initializing enclaves with attributes (namely provisioning) + * that have not been explicitly allowed. + */ + if (encl->attributes & ~encl->attributes_mask) + return -EACCES; + + /* + * Attributes should not be enforced *only* against what's available on + * platform (done in sgx_encl_create) but checked and enforced against + * the mask for enforcement in sigstruct. For example an enclave could + * opt to sign with AVX bit in xfrm, but still be loadable on a platform + * without it if the sigstruct->body.attributes_mask does not turn that + * bit on. + */ + if (sigstruct->body.attributes & sigstruct->body.attributes_mask & + sgx_attributes_reserved_mask) + return -EINVAL; + + if (sigstruct->body.miscselect & sigstruct->body.misc_mask & + sgx_misc_reserved_mask) + return -EINVAL; + + if (sigstruct->body.xfrm & sigstruct->body.xfrm_mask & + sgx_xfrm_reserved_mask) + return -EINVAL; + + ret = sgx_get_key_hash(sigstruct->modulus, mrsigner); + if (ret) + return ret; + + mutex_lock(&encl->lock); + + /* + * ENCLS[EINIT] is interruptible because it has such a high latency, + * e.g. 50k+ cycles on success. If an IRQ/NMI/SMI becomes pending, + * EINIT may fail with SGX_UNMASKED_EVENT so that the event can be + * serviced. + */ + for (i = 0; i < SGX_EINIT_SLEEP_COUNT; i++) { + for (j = 0; j < SGX_EINIT_SPIN_COUNT; j++) { + addr = sgx_get_epc_virt_addr(encl->secs.epc_page); + + preempt_disable(); + + for (k = 0; k < 4; k++) + wrmsrl(MSR_IA32_SGXLEPUBKEYHASH0 + k, mrsigner[k]); + + ret = __einit(sigstruct, token, addr); + + preempt_enable(); + + if (ret == SGX_UNMASKED_EVENT) + continue; + else + break; + } + + if (ret != SGX_UNMASKED_EVENT) + break; + + msleep_interruptible(SGX_EINIT_SLEEP_TIME); + + if (signal_pending(current)) { + ret = -ERESTARTSYS; + goto err_out; + } + } + + if (ret & ENCLS_FAULT_FLAG) { + if (encls_failed(ret)) + ENCLS_WARN(ret, "EINIT"); + + ret = -EIO; + } else if (ret) { + pr_debug("EINIT returned %d\n", ret); + ret = -EPERM; + } else { + set_bit(SGX_ENCL_INITIALIZED, &encl->flags); + } + +err_out: + mutex_unlock(&encl->lock); + return ret; +} + +/** + * sgx_ioc_enclave_init() - handler for %SGX_IOC_ENCLAVE_INIT + * @encl: an enclave pointer + * @arg: userspace pointer to a struct sgx_enclave_init instance + * + * Flush any outstanding enqueued EADD operations and perform EINIT. The + * Launch Enclave Public Key Hash MSRs are rewritten as necessary to match + * the enclave's MRSIGNER, which is caculated from the provided sigstruct. + * + * Return: + * - 0: Success. + * - -EPERM: Invalid SIGSTRUCT. + * - -EIO: EINIT failed because of a power cycle. + * - -errno: POSIX error. + */ +static long sgx_ioc_enclave_init(struct sgx_encl *encl, void __user *arg) +{ + struct sgx_sigstruct *sigstruct; + struct sgx_enclave_init init_arg; + struct page *initp_page; + void *token; + int ret; + + if (!test_bit(SGX_ENCL_CREATED, &encl->flags) || + test_bit(SGX_ENCL_INITIALIZED, &encl->flags)) + return -EINVAL; + + if (copy_from_user(&init_arg, arg, sizeof(init_arg))) + return -EFAULT; + + initp_page = alloc_page(GFP_KERNEL); + if (!initp_page) + return -ENOMEM; + + sigstruct = kmap(initp_page); + token = (void *)((unsigned long)sigstruct + PAGE_SIZE / 2); + memset(token, 0, SGX_LAUNCH_TOKEN_SIZE); + + if (copy_from_user(sigstruct, (void __user *)init_arg.sigstruct, + sizeof(*sigstruct))) { + ret = -EFAULT; + goto out; + } + + /* + * A legacy field used with Intel signed enclaves. These used to mean + * regular and architectural enclaves. The CPU only accepts these values + * but they do not have any other meaning. + * + * Thus, reject any other values. + */ + if (sigstruct->header.vendor != 0x0000 && + sigstruct->header.vendor != 0x8086) { + ret = -EINVAL; + goto out; + } + + ret = sgx_encl_init(encl, sigstruct, token); + +out: + kunmap(initp_page); + __free_page(initp_page); + return ret; +} + +/** + * sgx_ioc_enclave_provision() - handler for %SGX_IOC_ENCLAVE_PROVISION + * @encl: an enclave pointer + * @arg: userspace pointer to a struct sgx_enclave_provision instance + * + * Allow ATTRIBUTE.PROVISION_KEY for an enclave by providing a file handle to + * /dev/sgx_provision. + * + * Return: + * - 0: Success. + * - -errno: Otherwise. + */ +static long sgx_ioc_enclave_provision(struct sgx_encl *encl, void __user *arg) +{ + struct sgx_enclave_provision params; + struct file *file; + + if (copy_from_user(¶ms, arg, sizeof(params))) + return -EFAULT; + + file = fget(params.fd); + if (!file) + return -EINVAL; + + if (file->f_op != &sgx_provision_fops) { + fput(file); + return -EINVAL; + } + + encl->attributes_mask |= SGX_ATTR_PROVISIONKEY; + + fput(file); + return 0; +} + +long sgx_ioctl(struct file *filep, unsigned int cmd, unsigned long arg) +{ + struct sgx_encl *encl = filep->private_data; + int ret; + + if (test_and_set_bit(SGX_ENCL_IOCTL, &encl->flags)) + return -EBUSY; + + switch (cmd) { + case SGX_IOC_ENCLAVE_CREATE: + ret = sgx_ioc_enclave_create(encl, (void __user *)arg); + break; + case SGX_IOC_ENCLAVE_ADD_PAGES: + ret = sgx_ioc_enclave_add_pages(encl, (void __user *)arg); + break; + case SGX_IOC_ENCLAVE_INIT: + ret = sgx_ioc_enclave_init(encl, (void __user *)arg); + break; + case SGX_IOC_ENCLAVE_PROVISION: + ret = sgx_ioc_enclave_provision(encl, (void __user *)arg); + break; + default: + ret = -ENOIOCTLCMD; + break; + } + + clear_bit(SGX_ENCL_IOCTL, &encl->flags); + return ret; +} diff --git a/arch/x86/kernel/cpu/sgx/main.c b/arch/x86/kernel/cpu/sgx/main.c new file mode 100644 index 000000000000..c519fc5f6948 --- /dev/null +++ b/arch/x86/kernel/cpu/sgx/main.c @@ -0,0 +1,733 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright(c) 2016-20 Intel Corporation. */ + +#include <linux/freezer.h> +#include <linux/highmem.h> +#include <linux/kthread.h> +#include <linux/pagemap.h> +#include <linux/ratelimit.h> +#include <linux/sched/mm.h> +#include <linux/sched/signal.h> +#include <linux/slab.h> +#include "driver.h" +#include "encl.h" +#include "encls.h" + +struct sgx_epc_section sgx_epc_sections[SGX_MAX_EPC_SECTIONS]; +static int sgx_nr_epc_sections; +static struct task_struct *ksgxd_tsk; +static DECLARE_WAIT_QUEUE_HEAD(ksgxd_waitq); + +/* + * These variables are part of the state of the reclaimer, and must be accessed + * with sgx_reclaimer_lock acquired. + */ +static LIST_HEAD(sgx_active_page_list); + +static DEFINE_SPINLOCK(sgx_reclaimer_lock); + +/* + * Reset dirty EPC pages to uninitialized state. Laundry can be left with SECS + * pages whose child pages blocked EREMOVE. + */ +static void sgx_sanitize_section(struct sgx_epc_section *section) +{ + struct sgx_epc_page *page; + LIST_HEAD(dirty); + int ret; + + /* init_laundry_list is thread-local, no need for a lock: */ + while (!list_empty(§ion->init_laundry_list)) { + if (kthread_should_stop()) + return; + + /* needed for access to ->page_list: */ + spin_lock(§ion->lock); + + page = list_first_entry(§ion->init_laundry_list, + struct sgx_epc_page, list); + + ret = __eremove(sgx_get_epc_virt_addr(page)); + if (!ret) + list_move(&page->list, §ion->page_list); + else + list_move_tail(&page->list, &dirty); + + spin_unlock(§ion->lock); + + cond_resched(); + } + + list_splice(&dirty, §ion->init_laundry_list); +} + +static bool sgx_reclaimer_age(struct sgx_epc_page *epc_page) +{ + struct sgx_encl_page *page = epc_page->owner; + struct sgx_encl *encl = page->encl; + struct sgx_encl_mm *encl_mm; + bool ret = true; + int idx; + + idx = srcu_read_lock(&encl->srcu); + + list_for_each_entry_rcu(encl_mm, &encl->mm_list, list) { + if (!mmget_not_zero(encl_mm->mm)) + continue; + + mmap_read_lock(encl_mm->mm); + ret = !sgx_encl_test_and_clear_young(encl_mm->mm, page); + mmap_read_unlock(encl_mm->mm); + + mmput_async(encl_mm->mm); + + if (!ret) + break; + } + + srcu_read_unlock(&encl->srcu, idx); + + if (!ret) + return false; + + return true; +} + +static void sgx_reclaimer_block(struct sgx_epc_page *epc_page) +{ + struct sgx_encl_page *page = epc_page->owner; + unsigned long addr = page->desc & PAGE_MASK; + struct sgx_encl *encl = page->encl; + unsigned long mm_list_version; + struct sgx_encl_mm *encl_mm; + struct vm_area_struct *vma; + int idx, ret; + + do { + mm_list_version = encl->mm_list_version; + + /* Pairs with smp_rmb() in sgx_encl_mm_add(). */ + smp_rmb(); + + idx = srcu_read_lock(&encl->srcu); + + list_for_each_entry_rcu(encl_mm, &encl->mm_list, list) { + if (!mmget_not_zero(encl_mm->mm)) + continue; + + mmap_read_lock(encl_mm->mm); + + ret = sgx_encl_find(encl_mm->mm, addr, &vma); + if (!ret && encl == vma->vm_private_data) + zap_vma_ptes(vma, addr, PAGE_SIZE); + + mmap_read_unlock(encl_mm->mm); + + mmput_async(encl_mm->mm); + } + + srcu_read_unlock(&encl->srcu, idx); + } while (unlikely(encl->mm_list_version != mm_list_version)); + + mutex_lock(&encl->lock); + + ret = __eblock(sgx_get_epc_virt_addr(epc_page)); + if (encls_failed(ret)) + ENCLS_WARN(ret, "EBLOCK"); + + mutex_unlock(&encl->lock); +} + +static int __sgx_encl_ewb(struct sgx_epc_page *epc_page, void *va_slot, + struct sgx_backing *backing) +{ + struct sgx_pageinfo pginfo; + int ret; + + pginfo.addr = 0; + pginfo.secs = 0; + + pginfo.contents = (unsigned long)kmap_atomic(backing->contents); + pginfo.metadata = (unsigned long)kmap_atomic(backing->pcmd) + + backing->pcmd_offset; + + ret = __ewb(&pginfo, sgx_get_epc_virt_addr(epc_page), va_slot); + + kunmap_atomic((void *)(unsigned long)(pginfo.metadata - + backing->pcmd_offset)); + kunmap_atomic((void *)(unsigned long)pginfo.contents); + + return ret; +} + +static void sgx_ipi_cb(void *info) +{ +} + +static const cpumask_t *sgx_encl_ewb_cpumask(struct sgx_encl *encl) +{ + cpumask_t *cpumask = &encl->cpumask; + struct sgx_encl_mm *encl_mm; + int idx; + + /* + * Can race with sgx_encl_mm_add(), but ETRACK has already been + * executed, which means that the CPUs running in the new mm will enter + * into the enclave with a fresh epoch. + */ + cpumask_clear(cpumask); + + idx = srcu_read_lock(&encl->srcu); + + list_for_each_entry_rcu(encl_mm, &encl->mm_list, list) { + if (!mmget_not_zero(encl_mm->mm)) + continue; + + cpumask_or(cpumask, cpumask, mm_cpumask(encl_mm->mm)); + + mmput_async(encl_mm->mm); + } + + srcu_read_unlock(&encl->srcu, idx); + + return cpumask; +} + +/* + * Swap page to the regular memory transformed to the blocked state by using + * EBLOCK, which means that it can no loger be referenced (no new TLB entries). + * + * The first trial just tries to write the page assuming that some other thread + * has reset the count for threads inside the enlave by using ETRACK, and + * previous thread count has been zeroed out. The second trial calls ETRACK + * before EWB. If that fails we kick all the HW threads out, and then do EWB, + * which should be guaranteed the succeed. + */ +static void sgx_encl_ewb(struct sgx_epc_page *epc_page, + struct sgx_backing *backing) +{ + struct sgx_encl_page *encl_page = epc_page->owner; + struct sgx_encl *encl = encl_page->encl; + struct sgx_va_page *va_page; + unsigned int va_offset; + void *va_slot; + int ret; + + encl_page->desc &= ~SGX_ENCL_PAGE_BEING_RECLAIMED; + + va_page = list_first_entry(&encl->va_pages, struct sgx_va_page, + list); + va_offset = sgx_alloc_va_slot(va_page); + va_slot = sgx_get_epc_virt_addr(va_page->epc_page) + va_offset; + if (sgx_va_page_full(va_page)) + list_move_tail(&va_page->list, &encl->va_pages); + + ret = __sgx_encl_ewb(epc_page, va_slot, backing); + if (ret == SGX_NOT_TRACKED) { + ret = __etrack(sgx_get_epc_virt_addr(encl->secs.epc_page)); + if (ret) { + if (encls_failed(ret)) + ENCLS_WARN(ret, "ETRACK"); + } + + ret = __sgx_encl_ewb(epc_page, va_slot, backing); + if (ret == SGX_NOT_TRACKED) { + /* + * Slow path, send IPIs to kick cpus out of the + * enclave. Note, it's imperative that the cpu + * mask is generated *after* ETRACK, else we'll + * miss cpus that entered the enclave between + * generating the mask and incrementing epoch. + */ + on_each_cpu_mask(sgx_encl_ewb_cpumask(encl), + sgx_ipi_cb, NULL, 1); + ret = __sgx_encl_ewb(epc_page, va_slot, backing); + } + } + + if (ret) { + if (encls_failed(ret)) + ENCLS_WARN(ret, "EWB"); + + sgx_free_va_slot(va_page, va_offset); + } else { + encl_page->desc |= va_offset; + encl_page->va_page = va_page; + } +} + +static void sgx_reclaimer_write(struct sgx_epc_page *epc_page, + struct sgx_backing *backing) +{ + struct sgx_encl_page *encl_page = epc_page->owner; + struct sgx_encl *encl = encl_page->encl; + struct sgx_backing secs_backing; + int ret; + + mutex_lock(&encl->lock); + + sgx_encl_ewb(epc_page, backing); + encl_page->epc_page = NULL; + encl->secs_child_cnt--; + + if (!encl->secs_child_cnt && test_bit(SGX_ENCL_INITIALIZED, &encl->flags)) { + ret = sgx_encl_get_backing(encl, PFN_DOWN(encl->size), + &secs_backing); + if (ret) + goto out; + + sgx_encl_ewb(encl->secs.epc_page, &secs_backing); + + sgx_free_epc_page(encl->secs.epc_page); + encl->secs.epc_page = NULL; + + sgx_encl_put_backing(&secs_backing, true); + } + +out: + mutex_unlock(&encl->lock); +} + +/* + * Take a fixed number of pages from the head of the active page pool and + * reclaim them to the enclave's private shmem files. Skip the pages, which have + * been accessed since the last scan. Move those pages to the tail of active + * page pool so that the pages get scanned in LRU like fashion. + * + * Batch process a chunk of pages (at the moment 16) in order to degrade amount + * of IPI's and ETRACK's potentially required. sgx_encl_ewb() does degrade a bit + * among the HW threads with three stage EWB pipeline (EWB, ETRACK + EWB and IPI + * + EWB) but not sufficiently. Reclaiming one page at a time would also be + * problematic as it would increase the lock contention too much, which would + * halt forward progress. + */ +static void sgx_reclaim_pages(void) +{ + struct sgx_epc_page *chunk[SGX_NR_TO_SCAN]; + struct sgx_backing backing[SGX_NR_TO_SCAN]; + struct sgx_epc_section *section; + struct sgx_encl_page *encl_page; + struct sgx_epc_page *epc_page; + pgoff_t page_index; + int cnt = 0; + int ret; + int i; + + spin_lock(&sgx_reclaimer_lock); + for (i = 0; i < SGX_NR_TO_SCAN; i++) { + if (list_empty(&sgx_active_page_list)) + break; + + epc_page = list_first_entry(&sgx_active_page_list, + struct sgx_epc_page, list); + list_del_init(&epc_page->list); + encl_page = epc_page->owner; + + if (kref_get_unless_zero(&encl_page->encl->refcount) != 0) + chunk[cnt++] = epc_page; + else + /* The owner is freeing the page. No need to add the + * page back to the list of reclaimable pages. + */ + epc_page->flags &= ~SGX_EPC_PAGE_RECLAIMER_TRACKED; + } + spin_unlock(&sgx_reclaimer_lock); + + for (i = 0; i < cnt; i++) { + epc_page = chunk[i]; + encl_page = epc_page->owner; + + if (!sgx_reclaimer_age(epc_page)) + goto skip; + + page_index = PFN_DOWN(encl_page->desc - encl_page->encl->base); + ret = sgx_encl_get_backing(encl_page->encl, page_index, &backing[i]); + if (ret) + goto skip; + + mutex_lock(&encl_page->encl->lock); + encl_page->desc |= SGX_ENCL_PAGE_BEING_RECLAIMED; + mutex_unlock(&encl_page->encl->lock); + continue; + +skip: + spin_lock(&sgx_reclaimer_lock); + list_add_tail(&epc_page->list, &sgx_active_page_list); + spin_unlock(&sgx_reclaimer_lock); + + kref_put(&encl_page->encl->refcount, sgx_encl_release); + + chunk[i] = NULL; + } + + for (i = 0; i < cnt; i++) { + epc_page = chunk[i]; + if (epc_page) + sgx_reclaimer_block(epc_page); + } + + for (i = 0; i < cnt; i++) { + epc_page = chunk[i]; + if (!epc_page) + continue; + + encl_page = epc_page->owner; + sgx_reclaimer_write(epc_page, &backing[i]); + sgx_encl_put_backing(&backing[i], true); + + kref_put(&encl_page->encl->refcount, sgx_encl_release); + epc_page->flags &= ~SGX_EPC_PAGE_RECLAIMER_TRACKED; + + section = &sgx_epc_sections[epc_page->section]; + spin_lock(§ion->lock); + list_add_tail(&epc_page->list, §ion->page_list); + section->free_cnt++; + spin_unlock(§ion->lock); + } +} + +static unsigned long sgx_nr_free_pages(void) +{ + unsigned long cnt = 0; + int i; + + for (i = 0; i < sgx_nr_epc_sections; i++) + cnt += sgx_epc_sections[i].free_cnt; + + return cnt; +} + +static bool sgx_should_reclaim(unsigned long watermark) +{ + return sgx_nr_free_pages() < watermark && + !list_empty(&sgx_active_page_list); +} + +static int ksgxd(void *p) +{ + int i; + + set_freezable(); + + /* + * Sanitize pages in order to recover from kexec(). The 2nd pass is + * required for SECS pages, whose child pages blocked EREMOVE. + */ + for (i = 0; i < sgx_nr_epc_sections; i++) + sgx_sanitize_section(&sgx_epc_sections[i]); + + for (i = 0; i < sgx_nr_epc_sections; i++) { + sgx_sanitize_section(&sgx_epc_sections[i]); + + /* Should never happen. */ + if (!list_empty(&sgx_epc_sections[i].init_laundry_list)) + WARN(1, "EPC section %d has unsanitized pages.\n", i); + } + + while (!kthread_should_stop()) { + if (try_to_freeze()) + continue; + + wait_event_freezable(ksgxd_waitq, + kthread_should_stop() || + sgx_should_reclaim(SGX_NR_HIGH_PAGES)); + + if (sgx_should_reclaim(SGX_NR_HIGH_PAGES)) + sgx_reclaim_pages(); + + cond_resched(); + } + + return 0; +} + +static bool __init sgx_page_reclaimer_init(void) +{ + struct task_struct *tsk; + + tsk = kthread_run(ksgxd, NULL, "ksgxd"); + if (IS_ERR(tsk)) + return false; + + ksgxd_tsk = tsk; + + return true; +} + +static struct sgx_epc_page *__sgx_alloc_epc_page_from_section(struct sgx_epc_section *section) +{ + struct sgx_epc_page *page; + + spin_lock(§ion->lock); + + if (list_empty(§ion->page_list)) { + spin_unlock(§ion->lock); + return NULL; + } + + page = list_first_entry(§ion->page_list, struct sgx_epc_page, list); + list_del_init(&page->list); + section->free_cnt--; + + spin_unlock(§ion->lock); + return page; +} + +/** + * __sgx_alloc_epc_page() - Allocate an EPC page + * + * Iterate through EPC sections and borrow a free EPC page to the caller. When a + * page is no longer needed it must be released with sgx_free_epc_page(). + * + * Return: + * an EPC page, + * -errno on error + */ +struct sgx_epc_page *__sgx_alloc_epc_page(void) +{ + struct sgx_epc_section *section; + struct sgx_epc_page *page; + int i; + + for (i = 0; i < sgx_nr_epc_sections; i++) { + section = &sgx_epc_sections[i]; + + page = __sgx_alloc_epc_page_from_section(section); + if (page) + return page; + } + + return ERR_PTR(-ENOMEM); +} + +/** + * sgx_mark_page_reclaimable() - Mark a page as reclaimable + * @page: EPC page + * + * Mark a page as reclaimable and add it to the active page list. Pages + * are automatically removed from the active list when freed. + */ +void sgx_mark_page_reclaimable(struct sgx_epc_page *page) +{ + spin_lock(&sgx_reclaimer_lock); + page->flags |= SGX_EPC_PAGE_RECLAIMER_TRACKED; + list_add_tail(&page->list, &sgx_active_page_list); + spin_unlock(&sgx_reclaimer_lock); +} + +/** + * sgx_unmark_page_reclaimable() - Remove a page from the reclaim list + * @page: EPC page + * + * Clear the reclaimable flag and remove the page from the active page list. + * + * Return: + * 0 on success, + * -EBUSY if the page is in the process of being reclaimed + */ +int sgx_unmark_page_reclaimable(struct sgx_epc_page *page) +{ + spin_lock(&sgx_reclaimer_lock); + if (page->flags & SGX_EPC_PAGE_RECLAIMER_TRACKED) { + /* The page is being reclaimed. */ + if (list_empty(&page->list)) { + spin_unlock(&sgx_reclaimer_lock); + return -EBUSY; + } + + list_del(&page->list); + page->flags &= ~SGX_EPC_PAGE_RECLAIMER_TRACKED; + } + spin_unlock(&sgx_reclaimer_lock); + + return 0; +} + +/** + * sgx_alloc_epc_page() - Allocate an EPC page + * @owner: the owner of the EPC page + * @reclaim: reclaim pages if necessary + * + * Iterate through EPC sections and borrow a free EPC page to the caller. When a + * page is no longer needed it must be released with sgx_free_epc_page(). If + * @reclaim is set to true, directly reclaim pages when we are out of pages. No + * mm's can be locked when @reclaim is set to true. + * + * Finally, wake up ksgxd when the number of pages goes below the watermark + * before returning back to the caller. + * + * Return: + * an EPC page, + * -errno on error + */ +struct sgx_epc_page *sgx_alloc_epc_page(void *owner, bool reclaim) +{ + struct sgx_epc_page *page; + + for ( ; ; ) { + page = __sgx_alloc_epc_page(); + if (!IS_ERR(page)) { + page->owner = owner; + break; + } + + if (list_empty(&sgx_active_page_list)) + return ERR_PTR(-ENOMEM); + + if (!reclaim) { + page = ERR_PTR(-EBUSY); + break; + } + + if (signal_pending(current)) { + page = ERR_PTR(-ERESTARTSYS); + break; + } + + sgx_reclaim_pages(); + cond_resched(); + } + + if (sgx_should_reclaim(SGX_NR_LOW_PAGES)) + wake_up(&ksgxd_waitq); + + return page; +} + +/** + * sgx_free_epc_page() - Free an EPC page + * @page: an EPC page + * + * Call EREMOVE for an EPC page and insert it back to the list of free pages. + */ +void sgx_free_epc_page(struct sgx_epc_page *page) +{ + struct sgx_epc_section *section = &sgx_epc_sections[page->section]; + int ret; + + WARN_ON_ONCE(page->flags & SGX_EPC_PAGE_RECLAIMER_TRACKED); + + ret = __eremove(sgx_get_epc_virt_addr(page)); + if (WARN_ONCE(ret, "EREMOVE returned %d (0x%x)", ret, ret)) + return; + + spin_lock(§ion->lock); + list_add_tail(&page->list, §ion->page_list); + section->free_cnt++; + spin_unlock(§ion->lock); +} + +static bool __init sgx_setup_epc_section(u64 phys_addr, u64 size, + unsigned long index, + struct sgx_epc_section *section) +{ + unsigned long nr_pages = size >> PAGE_SHIFT; + unsigned long i; + + section->virt_addr = memremap(phys_addr, size, MEMREMAP_WB); + if (!section->virt_addr) + return false; + + section->pages = vmalloc(nr_pages * sizeof(struct sgx_epc_page)); + if (!section->pages) { + memunmap(section->virt_addr); + return false; + } + + section->phys_addr = phys_addr; + spin_lock_init(§ion->lock); + INIT_LIST_HEAD(§ion->page_list); + INIT_LIST_HEAD(§ion->init_laundry_list); + + for (i = 0; i < nr_pages; i++) { + section->pages[i].section = index; + section->pages[i].flags = 0; + section->pages[i].owner = NULL; + list_add_tail(§ion->pages[i].list, §ion->init_laundry_list); + } + + section->free_cnt = nr_pages; + return true; +} + +/** + * A section metric is concatenated in a way that @low bits 12-31 define the + * bits 12-31 of the metric and @high bits 0-19 define the bits 32-51 of the + * metric. + */ +static inline u64 __init sgx_calc_section_metric(u64 low, u64 high) +{ + return (low & GENMASK_ULL(31, 12)) + + ((high & GENMASK_ULL(19, 0)) << 32); +} + +static bool __init sgx_page_cache_init(void) +{ + u32 eax, ebx, ecx, edx, type; + u64 pa, size; + int i; + + for (i = 0; i < ARRAY_SIZE(sgx_epc_sections); i++) { + cpuid_count(SGX_CPUID, i + SGX_CPUID_EPC, &eax, &ebx, &ecx, &edx); + + type = eax & SGX_CPUID_EPC_MASK; + if (type == SGX_CPUID_EPC_INVALID) + break; + + if (type != SGX_CPUID_EPC_SECTION) { + pr_err_once("Unknown EPC section type: %u\n", type); + break; + } + + pa = sgx_calc_section_metric(eax, ebx); + size = sgx_calc_section_metric(ecx, edx); + + pr_info("EPC section 0x%llx-0x%llx\n", pa, pa + size - 1); + + if (!sgx_setup_epc_section(pa, size, i, &sgx_epc_sections[i])) { + pr_err("No free memory for an EPC section\n"); + break; + } + + sgx_nr_epc_sections++; + } + + if (!sgx_nr_epc_sections) { + pr_err("There are zero EPC sections.\n"); + return false; + } + + return true; +} + +static void __init sgx_init(void) +{ + int ret; + int i; + + if (!cpu_feature_enabled(X86_FEATURE_SGX)) + return; + + if (!sgx_page_cache_init()) + return; + + if (!sgx_page_reclaimer_init()) + goto err_page_cache; + + ret = sgx_drv_init(); + if (ret) + goto err_kthread; + + return; + +err_kthread: + kthread_stop(ksgxd_tsk); + +err_page_cache: + for (i = 0; i < sgx_nr_epc_sections; i++) { + vfree(sgx_epc_sections[i].pages); + memunmap(sgx_epc_sections[i].virt_addr); + } +} + +device_initcall(sgx_init); diff --git a/arch/x86/kernel/cpu/sgx/sgx.h b/arch/x86/kernel/cpu/sgx/sgx.h new file mode 100644 index 000000000000..5fa42d143feb --- /dev/null +++ b/arch/x86/kernel/cpu/sgx/sgx.h @@ -0,0 +1,86 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _X86_SGX_H +#define _X86_SGX_H + +#include <linux/bitops.h> +#include <linux/err.h> +#include <linux/io.h> +#include <linux/rwsem.h> +#include <linux/types.h> +#include <asm/asm.h> +#include "arch.h" + +#undef pr_fmt +#define pr_fmt(fmt) "sgx: " fmt + +#define SGX_MAX_EPC_SECTIONS 8 +#define SGX_EEXTEND_BLOCK_SIZE 256 +#define SGX_NR_TO_SCAN 16 +#define SGX_NR_LOW_PAGES 32 +#define SGX_NR_HIGH_PAGES 64 + +/* Pages, which are being tracked by the page reclaimer. */ +#define SGX_EPC_PAGE_RECLAIMER_TRACKED BIT(0) + +struct sgx_epc_page { + unsigned int section; + unsigned int flags; + struct sgx_encl_page *owner; + struct list_head list; +}; + +/* + * The firmware can define multiple chunks of EPC to the different areas of the + * physical memory e.g. for memory areas of the each node. This structure is + * used to store EPC pages for one EPC section and virtual memory area where + * the pages have been mapped. + * + * 'lock' must be held before accessing 'page_list' or 'free_cnt'. + */ +struct sgx_epc_section { + unsigned long phys_addr; + void *virt_addr; + struct sgx_epc_page *pages; + + spinlock_t lock; + struct list_head page_list; + unsigned long free_cnt; + + /* + * Pages which need EREMOVE run on them before they can be + * used. Only safe to be accessed in ksgxd and init code. + * Not protected by locks. + */ + struct list_head init_laundry_list; +}; + +extern struct sgx_epc_section sgx_epc_sections[SGX_MAX_EPC_SECTIONS]; + +static inline unsigned long sgx_get_epc_phys_addr(struct sgx_epc_page *page) +{ + struct sgx_epc_section *section = &sgx_epc_sections[page->section]; + unsigned long index; + + index = ((unsigned long)page - (unsigned long)section->pages) / sizeof(*page); + + return section->phys_addr + index * PAGE_SIZE; +} + +static inline void *sgx_get_epc_virt_addr(struct sgx_epc_page *page) +{ + struct sgx_epc_section *section = &sgx_epc_sections[page->section]; + unsigned long index; + + index = ((unsigned long)page - (unsigned long)section->pages) / sizeof(*page); + + return section->virt_addr + index * PAGE_SIZE; +} + +struct sgx_epc_page *__sgx_alloc_epc_page(void); +void sgx_free_epc_page(struct sgx_epc_page *page); + +void sgx_mark_page_reclaimable(struct sgx_epc_page *page); +int sgx_unmark_page_reclaimable(struct sgx_epc_page *page); +struct sgx_epc_page *sgx_alloc_epc_page(void *owner, bool reclaim); + +#endif /* _X86_SGX_H */ diff --git a/arch/x86/kernel/cpu/topology.c b/arch/x86/kernel/cpu/topology.c index d3a0791bc052..1068002c8532 100644 --- a/arch/x86/kernel/cpu/topology.c +++ b/arch/x86/kernel/cpu/topology.c @@ -96,6 +96,7 @@ int detect_extended_topology(struct cpuinfo_x86 *c) unsigned int ht_mask_width, core_plus_mask_width, die_plus_mask_width; unsigned int core_select_mask, core_level_siblings; unsigned int die_select_mask, die_level_siblings; + bool die_level_present = false; int leaf; leaf = detect_extended_topology_leaf(c); @@ -126,6 +127,7 @@ int detect_extended_topology(struct cpuinfo_x86 *c) die_plus_mask_width = BITS_SHIFT_NEXT_LEVEL(eax); } if (LEAFB_SUBTYPE(ecx) == DIE_TYPE) { + die_level_present = true; die_level_siblings = LEVEL_MAX_SIBLINGS(ebx); die_plus_mask_width = BITS_SHIFT_NEXT_LEVEL(eax); } @@ -139,8 +141,12 @@ int detect_extended_topology(struct cpuinfo_x86 *c) c->cpu_core_id = apic->phys_pkg_id(c->initial_apicid, ht_mask_width) & core_select_mask; - c->cpu_die_id = apic->phys_pkg_id(c->initial_apicid, - core_plus_mask_width) & die_select_mask; + + if (die_level_present) { + c->cpu_die_id = apic->phys_pkg_id(c->initial_apicid, + core_plus_mask_width) & die_select_mask; + } + c->phys_proc_id = apic->phys_pkg_id(c->initial_apicid, die_plus_mask_width); /* diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c index 25c06b67e7e0..299c20f0a38b 100644 --- a/arch/x86/kernel/dumpstack.c +++ b/arch/x86/kernel/dumpstack.c @@ -78,6 +78,9 @@ static int copy_code(struct pt_regs *regs, u8 *buf, unsigned long src, if (!user_mode(regs)) return copy_from_kernel_nofault(buf, (u8 *)src, nbytes); + /* The user space code from other tasks cannot be accessed. */ + if (regs != task_pt_regs(current)) + return -EPERM; /* * Make sure userspace isn't trying to trick us into dumping kernel * memory by pointing the userspace instruction pointer at it. @@ -85,6 +88,12 @@ static int copy_code(struct pt_regs *regs, u8 *buf, unsigned long src, if (__chk_range_not_ok(src, nbytes, TASK_SIZE_MAX)) return -EINVAL; + /* + * Even if named copy_from_user_nmi() this can be invoked from + * other contexts and will not try to resolve a pagefault, which is + * the correct thing to do here as this code can be called from any + * context. + */ return copy_from_user_nmi(buf, (void __user *)src, nbytes); } @@ -115,13 +124,19 @@ void show_opcodes(struct pt_regs *regs, const char *loglvl) u8 opcodes[OPCODE_BUFSIZE]; unsigned long prologue = regs->ip - PROLOGUE_SIZE; - if (copy_code(regs, opcodes, prologue, sizeof(opcodes))) { - printk("%sCode: Unable to access opcode bytes at RIP 0x%lx.\n", - loglvl, prologue); - } else { + switch (copy_code(regs, opcodes, prologue, sizeof(opcodes))) { + case 0: printk("%sCode: %" __stringify(PROLOGUE_SIZE) "ph <%02x> %" __stringify(EPILOGUE_SIZE) "ph\n", loglvl, opcodes, opcodes[PROLOGUE_SIZE], opcodes + PROLOGUE_SIZE + 1); + break; + case -EPERM: + /* No access to the user space stack of other tasks. Ignore. */ + break; + default: + printk("%sCode: Unable to access opcode bytes at RIP 0x%lx.\n", + loglvl, prologue); + break; } } @@ -168,7 +183,7 @@ static void show_regs_if_on_stack(struct stack_info *info, struct pt_regs *regs, } } -void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, +static void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, unsigned long *stack, const char *log_lvl) { struct unwind_state state; diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index 05e117137b45..5e9beb77cafd 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c @@ -37,7 +37,6 @@ #include <asm/kasan.h> #include <asm/fixmap.h> #include <asm/realmode.h> -#include <asm/desc.h> #include <asm/extable.h> #include <asm/trapnr.h> #include <asm/sev-es.h> diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index d41fa5bb77fe..04bddaaba8e2 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S @@ -26,15 +26,6 @@ #include <asm/nospec-branch.h> #include <asm/fixmap.h> -#ifdef CONFIG_PARAVIRT_XXL -#include <asm/asm-offsets.h> -#include <asm/paravirt.h> -#define GET_CR2_INTO(reg) GET_CR2_INTO_AX ; _ASM_MOV %_ASM_AX, reg -#else -#define INTERRUPT_RETURN iretq -#define GET_CR2_INTO(reg) _ASM_MOV %cr2, reg -#endif - /* * We are not able to switch in one step to the final KERNEL ADDRESS SPACE * because we need identity-mapped pages. @@ -161,6 +152,21 @@ SYM_INNER_LABEL(secondary_startup_64_no_verify, SYM_L_GLOBAL) /* Setup early boot stage 4-/5-level pagetables. */ addq phys_base(%rip), %rax + + /* + * For SEV guests: Verify that the C-bit is correct. A malicious + * hypervisor could lie about the C-bit position to perform a ROP + * attack on the guest by writing to the unencrypted stack and wait for + * the next RET instruction. + * %rsi carries pointer to realmode data and is callee-clobbered. Save + * and restore it. + */ + pushq %rsi + movq %rax, %rdi + call sev_verify_cbit + popq %rsi + + /* Switch to new page-table */ movq %rax, %cr3 /* Ensure I am executing from virtual addresses */ @@ -279,6 +285,7 @@ SYM_INNER_LABEL(secondary_startup_64_no_verify, SYM_L_GLOBAL) SYM_CODE_END(secondary_startup_64) #include "verify_cpu.S" +#include "sev_verify_cbit.S" #ifdef CONFIG_HOTPLUG_CPU /* diff --git a/arch/x86/kernel/kexec-bzimage64.c b/arch/x86/kernel/kexec-bzimage64.c index 57c2ecf43134..ce831f9448e7 100644 --- a/arch/x86/kernel/kexec-bzimage64.c +++ b/arch/x86/kernel/kexec-bzimage64.c @@ -200,8 +200,7 @@ setup_boot_parameters(struct kimage *image, struct boot_params *params, params->hdr.hardware_subarch = boot_params.hdr.hardware_subarch; /* Copying screen_info will do? */ - memcpy(¶ms->screen_info, &boot_params.screen_info, - sizeof(struct screen_info)); + memcpy(¶ms->screen_info, &screen_info, sizeof(struct screen_info)); /* Fill in memsize later */ params->screen_info.ext_mem_k = 0; diff --git a/arch/x86/kernel/kprobes/opt.c b/arch/x86/kernel/kprobes/opt.c index 041f0b50bc27..08eb23074f92 100644 --- a/arch/x86/kernel/kprobes/opt.c +++ b/arch/x86/kernel/kprobes/opt.c @@ -272,6 +272,19 @@ static int insn_is_indirect_jump(struct insn *insn) return ret; } +static bool is_padding_int3(unsigned long addr, unsigned long eaddr) +{ + unsigned char ops; + + for (; addr < eaddr; addr++) { + if (get_kernel_nofault(ops, (void *)addr) < 0 || + ops != INT3_INSN_OPCODE) + return false; + } + + return true; +} + /* Decode whole function to ensure any instructions don't jump into target */ static int can_optimize(unsigned long paddr) { @@ -310,9 +323,14 @@ static int can_optimize(unsigned long paddr) return 0; kernel_insn_init(&insn, (void *)recovered_insn, MAX_INSN_SIZE); insn_get_length(&insn); - /* Another subsystem puts a breakpoint */ + /* + * In the case of detecting unknown breakpoint, this could be + * a padding INT3 between functions. Let's check that all the + * rest of the bytes are also INT3. + */ if (insn.opcode.bytes[0] == INT3_INSN_OPCODE) - return 0; + return is_padding_int3(addr, paddr - offset + size) ? 1 : 0; + /* Recover address */ insn.kaddr = (void *)addr; insn.next_byte = (void *)(addr + insn.length); diff --git a/arch/x86/kernel/msr.c b/arch/x86/kernel/msr.c index c0d409810658..8a67d1fa8dc5 100644 --- a/arch/x86/kernel/msr.c +++ b/arch/x86/kernel/msr.c @@ -99,11 +99,9 @@ static int filter_write(u32 reg) if (!__ratelimit(&fw_rs)) return 0; - if (reg == MSR_IA32_ENERGY_PERF_BIAS) - return 0; - - pr_err("Write to unrecognized MSR 0x%x by %s (pid: %d). Please report to x86@kernel.org.\n", - reg, current->comm, current->pid); + pr_warn("Write to unrecognized MSR 0x%x by %s (pid: %d).\n", + reg, current->comm, current->pid); + pr_warn("See https://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git/about for details.\n"); return 0; } diff --git a/arch/x86/kernel/perf_regs.c b/arch/x86/kernel/perf_regs.c index bb7e1132290b..624703af80a1 100644 --- a/arch/x86/kernel/perf_regs.c +++ b/arch/x86/kernel/perf_regs.c @@ -101,8 +101,7 @@ u64 perf_reg_abi(struct task_struct *task) } void perf_get_regs_user(struct perf_regs *regs_user, - struct pt_regs *regs, - struct pt_regs *regs_user_copy) + struct pt_regs *regs) { regs_user->regs = task_pt_regs(current); regs_user->abi = perf_reg_abi(current); @@ -123,18 +122,26 @@ int perf_reg_validate(u64 mask) u64 perf_reg_abi(struct task_struct *task) { - if (test_tsk_thread_flag(task, TIF_IA32)) + if (!user_64bit_mode(task_pt_regs(task))) return PERF_SAMPLE_REGS_ABI_32; else return PERF_SAMPLE_REGS_ABI_64; } +static DEFINE_PER_CPU(struct pt_regs, nmi_user_regs); + void perf_get_regs_user(struct perf_regs *regs_user, - struct pt_regs *regs, - struct pt_regs *regs_user_copy) + struct pt_regs *regs) { + struct pt_regs *regs_user_copy = this_cpu_ptr(&nmi_user_regs); struct pt_regs *user_regs = task_pt_regs(current); + if (!in_nmi()) { + regs_user->regs = user_regs; + regs_user->abi = perf_reg_abi(current); + return; + } + /* * If we're in an NMI that interrupted task_pt_regs setup, then * we can't sample user regs at all. This check isn't really diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index ba4593a913fa..145a7ac0c19a 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -685,7 +685,7 @@ void arch_cpu_idle(void) */ void __cpuidle default_idle(void) { - safe_halt(); + raw_safe_halt(); } #if defined(CONFIG_APM_MODULE) || defined(CONFIG_HALTPOLL_CPUIDLE_MODULE) EXPORT_SYMBOL(default_idle); @@ -736,6 +736,8 @@ void stop_this_cpu(void *dummy) /* * AMD Erratum 400 aware idle routine. We handle it the same way as C3 power * states (local apic timer and TSC stop). + * + * XXX this function is completely buggered vs RCU and tracing. */ static void amd_e400_idle(void) { @@ -757,9 +759,9 @@ static void amd_e400_idle(void) * The switch back from broadcast mode needs to be called with * interrupts disabled. */ - local_irq_disable(); + raw_local_irq_disable(); tick_broadcast_exit(); - local_irq_enable(); + raw_local_irq_enable(); } /* @@ -801,9 +803,9 @@ static __cpuidle void mwait_idle(void) if (!need_resched()) __sti_mwait(0, 0); else - local_irq_enable(); + raw_local_irq_enable(); } else { - local_irq_enable(); + raw_local_irq_enable(); } __current_clr_polling(); } diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index df342bedea88..ad582f9ac5a6 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -511,11 +511,10 @@ start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp) EXPORT_SYMBOL_GPL(start_thread); #ifdef CONFIG_COMPAT -void compat_start_thread(struct pt_regs *regs, u32 new_ip, u32 new_sp) +void compat_start_thread(struct pt_regs *regs, u32 new_ip, u32 new_sp, bool x32) { start_thread_common(regs, new_ip, new_sp, - test_thread_flag(TIF_X32) - ? __USER_CS : __USER32_CS, + x32 ? __USER_CS : __USER32_CS, __USER_DS, __USER_DS); } #endif @@ -641,16 +640,12 @@ void set_personality_64bit(void) /* inherit personality from parent */ /* Make sure to be in 64bit mode */ - clear_thread_flag(TIF_IA32); clear_thread_flag(TIF_ADDR32); - clear_thread_flag(TIF_X32); /* Pretend that this comes from a 64bit execve */ task_pt_regs(current)->orig_ax = __NR_execve; current_thread_info()->status &= ~TS_COMPAT; - - /* Ensure the corresponding mm is not marked. */ if (current->mm) - current->mm->context.ia32_compat = 0; + current->mm->context.flags = MM_CONTEXT_HAS_VSYSCALL; /* TBD: overwrites user setup. Should have two bits. But 64bit processes have always behaved this way, @@ -662,10 +657,9 @@ void set_personality_64bit(void) static void __set_personality_x32(void) { #ifdef CONFIG_X86_X32 - clear_thread_flag(TIF_IA32); - set_thread_flag(TIF_X32); if (current->mm) - current->mm->context.ia32_compat = TIF_X32; + current->mm->context.flags = 0; + current->personality &= ~READ_IMPLIES_EXEC; /* * in_32bit_syscall() uses the presence of the x32 syscall bit @@ -683,10 +677,14 @@ static void __set_personality_x32(void) static void __set_personality_ia32(void) { #ifdef CONFIG_IA32_EMULATION - set_thread_flag(TIF_IA32); - clear_thread_flag(TIF_X32); - if (current->mm) - current->mm->context.ia32_compat = TIF_IA32; + if (current->mm) { + /* + * uprobes applied to this MM need to know this and + * cannot use user_64bit_mode() at that time. + */ + current->mm->context.flags = MM_CONTEXT_UPROBE_IA32; + } + current->personality |= force_personality32; /* Prepare the first "return" to user space */ task_pt_regs(current)->orig_ax = __NR_ia32_execve; diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 84f581c91db4..a23130c86bdd 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -119,11 +119,6 @@ EXPORT_SYMBOL(boot_cpu_data); unsigned int def_to_bigsmp; -/* For MCA, but anyone else can use it if they want */ -unsigned int machine_id; -unsigned int machine_submodel_id; -unsigned int BIOS_revision; - struct apm_info apm_info; EXPORT_SYMBOL(apm_info); diff --git a/arch/x86/kernel/sev-es-shared.c b/arch/x86/kernel/sev-es-shared.c index 5f83ccaab877..7d04b356d44d 100644 --- a/arch/x86/kernel/sev-es-shared.c +++ b/arch/x86/kernel/sev-es-shared.c @@ -178,6 +178,32 @@ void __init do_vc_no_ghcb(struct pt_regs *regs, unsigned long exit_code) goto fail; regs->dx = val >> 32; + /* + * This is a VC handler and the #VC is only raised when SEV-ES is + * active, which means SEV must be active too. Do sanity checks on the + * CPUID results to make sure the hypervisor does not trick the kernel + * into the no-sev path. This could map sensitive data unencrypted and + * make it accessible to the hypervisor. + * + * In particular, check for: + * - Hypervisor CPUID bit + * - Availability of CPUID leaf 0x8000001f + * - SEV CPUID bit. + * + * The hypervisor might still report the wrong C-bit position, but this + * can't be checked here. + */ + + if ((fn == 1 && !(regs->cx & BIT(31)))) + /* Hypervisor bit */ + goto fail; + else if (fn == 0x80000000 && (regs->ax < 0x8000001f)) + /* SEV leaf check */ + goto fail; + else if ((fn == 0x8000001f && !(regs->ax & BIT(1)))) + /* SEV bit */ + goto fail; + /* Skip over the CPUID two-byte opcode */ regs->ip += 2; diff --git a/arch/x86/kernel/sev-es.c b/arch/x86/kernel/sev-es.c index 4a96726fbaf8..0bd1a0fc587e 100644 --- a/arch/x86/kernel/sev-es.c +++ b/arch/x86/kernel/sev-es.c @@ -374,8 +374,8 @@ fault: return ES_EXCEPTION; } -static bool vc_slow_virt_to_phys(struct ghcb *ghcb, struct es_em_ctxt *ctxt, - unsigned long vaddr, phys_addr_t *paddr) +static enum es_result vc_slow_virt_to_phys(struct ghcb *ghcb, struct es_em_ctxt *ctxt, + unsigned long vaddr, phys_addr_t *paddr) { unsigned long va = (unsigned long)vaddr; unsigned int level; @@ -394,15 +394,19 @@ static bool vc_slow_virt_to_phys(struct ghcb *ghcb, struct es_em_ctxt *ctxt, if (user_mode(ctxt->regs)) ctxt->fi.error_code |= X86_PF_USER; - return false; + return ES_EXCEPTION; } + if (WARN_ON_ONCE(pte_val(*pte) & _PAGE_ENC)) + /* Emulated MMIO to/from encrypted memory not supported */ + return ES_UNSUPPORTED; + pa = (phys_addr_t)pte_pfn(*pte) << PAGE_SHIFT; pa |= va & ~page_level_mask(level); *paddr = pa; - return true; + return ES_OK; } /* Include code shared with pre-decompression boot stage */ @@ -731,6 +735,7 @@ static enum es_result vc_do_mmio(struct ghcb *ghcb, struct es_em_ctxt *ctxt, { u64 exit_code, exit_info_1, exit_info_2; unsigned long ghcb_pa = __pa(ghcb); + enum es_result res; phys_addr_t paddr; void __user *ref; @@ -740,11 +745,12 @@ static enum es_result vc_do_mmio(struct ghcb *ghcb, struct es_em_ctxt *ctxt, exit_code = read ? SVM_VMGEXIT_MMIO_READ : SVM_VMGEXIT_MMIO_WRITE; - if (!vc_slow_virt_to_phys(ghcb, ctxt, (unsigned long)ref, &paddr)) { - if (!read) + res = vc_slow_virt_to_phys(ghcb, ctxt, (unsigned long)ref, &paddr); + if (res != ES_OK) { + if (res == ES_EXCEPTION && !read) ctxt->fi.error_code |= X86_PF_WRITE; - return ES_EXCEPTION; + return res; } exit_info_1 = paddr; diff --git a/arch/x86/kernel/sev_verify_cbit.S b/arch/x86/kernel/sev_verify_cbit.S new file mode 100644 index 000000000000..ee04941a6546 --- /dev/null +++ b/arch/x86/kernel/sev_verify_cbit.S @@ -0,0 +1,89 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * sev_verify_cbit.S - Code for verification of the C-bit position reported + * by the Hypervisor when running with SEV enabled. + * + * Copyright (c) 2020 Joerg Roedel (jroedel@suse.de) + * + * sev_verify_cbit() is called before switching to a new long-mode page-table + * at boot. + * + * Verify that the C-bit position is correct by writing a random value to + * an encrypted memory location while on the current page-table. Then it + * switches to the new page-table to verify the memory content is still the + * same. After that it switches back to the current page-table and when the + * check succeeded it returns. If the check failed the code invalidates the + * stack pointer and goes into a hlt loop. The stack-pointer is invalidated to + * make sure no interrupt or exception can get the CPU out of the hlt loop. + * + * New page-table pointer is expected in %rdi (first parameter) + * + */ +SYM_FUNC_START(sev_verify_cbit) +#ifdef CONFIG_AMD_MEM_ENCRYPT + /* First check if a C-bit was detected */ + movq sme_me_mask(%rip), %rsi + testq %rsi, %rsi + jz 3f + + /* sme_me_mask != 0 could mean SME or SEV - Check also for SEV */ + movq sev_status(%rip), %rsi + testq %rsi, %rsi + jz 3f + + /* Save CR4 in %rsi */ + movq %cr4, %rsi + + /* Disable Global Pages */ + movq %rsi, %rdx + andq $(~X86_CR4_PGE), %rdx + movq %rdx, %cr4 + + /* + * Verified that running under SEV - now get a random value using + * RDRAND. This instruction is mandatory when running as an SEV guest. + * + * Don't bail out of the loop if RDRAND returns errors. It is better to + * prevent forward progress than to work with a non-random value here. + */ +1: rdrand %rdx + jnc 1b + + /* Store value to memory and keep it in %rdx */ + movq %rdx, sev_check_data(%rip) + + /* Backup current %cr3 value to restore it later */ + movq %cr3, %rcx + + /* Switch to new %cr3 - This might unmap the stack */ + movq %rdi, %cr3 + + /* + * Compare value in %rdx with memory location. If C-bit is incorrect + * this would read the encrypted data and make the check fail. + */ + cmpq %rdx, sev_check_data(%rip) + + /* Restore old %cr3 */ + movq %rcx, %cr3 + + /* Restore previous CR4 */ + movq %rsi, %cr4 + + /* Check CMPQ result */ + je 3f + + /* + * The check failed, prevent any forward progress to prevent ROP + * attacks, invalidate the stack and go into a hlt loop. + */ + xorq %rsp, %rsp + subq $0x1000, %rsp +2: hlt + jmp 2b +3: +#endif + /* Return page-table pointer */ + movq %rdi, %rax + ret +SYM_FUNC_END(sev_verify_cbit) diff --git a/arch/x86/kernel/tboot.c b/arch/x86/kernel/tboot.c index 992fb1415c0f..ae64f98ec2ab 100644 --- a/arch/x86/kernel/tboot.c +++ b/arch/x86/kernel/tboot.c @@ -514,16 +514,10 @@ int tboot_force_iommu(void) if (!tboot_enabled()) return 0; - if (intel_iommu_tboot_noforce) - return 1; - - if (no_iommu || swiotlb || dmar_disabled) + if (no_iommu || dmar_disabled) pr_warn("Forcing Intel-IOMMU to enabled\n"); dmar_disabled = 0; -#ifdef CONFIG_SWIOTLB - swiotlb = 0; -#endif no_iommu = 0; return 1; diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 3c70fb34028b..7798d862983f 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -60,6 +60,7 @@ #include <asm/umip.h> #include <asm/insn.h> #include <asm/insn-eval.h> +#include <asm/vdso.h> #ifdef CONFIG_X86_64 #include <asm/x86_init.h> @@ -117,6 +118,9 @@ do_trap_no_signal(struct task_struct *tsk, int trapnr, const char *str, tsk->thread.error_code = error_code; tsk->thread.trap_nr = trapnr; die(str, regs, error_code); + } else { + if (fixup_vdso_exception(regs, trapnr, error_code, 0)) + return 0; } /* @@ -550,6 +554,9 @@ DEFINE_IDTENTRY_ERRORCODE(exc_general_protection) tsk->thread.error_code = error_code; tsk->thread.trap_nr = X86_TRAP_GP; + if (fixup_vdso_exception(regs, X86_TRAP_GP, error_code, 0)) + return; + show_signal(tsk, SIGSEGV, "", desc, regs, error_code); force_sig(SIGSEGV); goto exit; @@ -793,19 +800,6 @@ static __always_inline unsigned long debug_read_clear_dr6(void) set_debugreg(DR6_RESERVED, 6); dr6 ^= DR6_RESERVED; /* Flip to positive polarity */ - /* - * Clear the virtual DR6 value, ptrace routines will set bits here for - * things we want signals for. - */ - current->thread.virtual_dr6 = 0; - - /* - * The SDM says "The processor clears the BTF flag when it - * generates a debug exception." Clear TIF_BLOCKSTEP to keep - * TIF_BLOCKSTEP in sync with the hardware BTF flag. - */ - clear_thread_flag(TIF_BLOCKSTEP); - return dr6; } @@ -873,6 +867,20 @@ static __always_inline void exc_debug_kernel(struct pt_regs *regs, */ WARN_ON_ONCE(user_mode(regs)); + if (test_thread_flag(TIF_BLOCKSTEP)) { + /* + * The SDM says "The processor clears the BTF flag when it + * generates a debug exception." but PTRACE_BLOCKSTEP requested + * it for userspace, but we just took a kernel #DB, so re-set + * BTF. + */ + unsigned long debugctl; + + rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctl); + debugctl |= DEBUGCTLMSR_BTF; + wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctl); + } + /* * Catch SYSENTER with TF set and clear DR_STEP. If this hit a * watchpoint at the same time then that will still be handled. @@ -936,6 +944,22 @@ static __always_inline void exc_debug_user(struct pt_regs *regs, instrumentation_begin(); /* + * Start the virtual/ptrace DR6 value with just the DR_STEP mask + * of the real DR6. ptrace_triggered() will set the DR_TRAPn bits. + * + * Userspace expects DR_STEP to be visible in ptrace_get_debugreg(6) + * even if it is not the result of PTRACE_SINGLESTEP. + */ + current->thread.virtual_dr6 = (dr6 & DR_STEP); + + /* + * The SDM says "The processor clears the BTF flag when it + * generates a debug exception." Clear TIF_BLOCKSTEP to keep + * TIF_BLOCKSTEP in sync with the hardware BTF flag. + */ + clear_thread_flag(TIF_BLOCKSTEP); + + /* * If dr6 has no reason to give us about the origin of this trap, * then it's very likely the result of an icebp/int01 trap. * User wants a sigtrap for that. @@ -1031,6 +1055,9 @@ static void math_error(struct pt_regs *regs, int trapnr) if (!si_code) goto exit; + if (fixup_vdso_exception(regs, trapnr, 0, 0)) + return; + force_sig_fault(SIGFPE, si_code, (void __user *)uprobe_get_trap_addr(regs)); exit: diff --git a/arch/x86/kernel/unwind_orc.c b/arch/x86/kernel/unwind_orc.c index 6a339ce328e0..73f800100066 100644 --- a/arch/x86/kernel/unwind_orc.c +++ b/arch/x86/kernel/unwind_orc.c @@ -321,19 +321,12 @@ EXPORT_SYMBOL_GPL(unwind_get_return_address); unsigned long *unwind_get_return_address_ptr(struct unwind_state *state) { - struct task_struct *task = state->task; - if (unwind_done(state)) return NULL; if (state->regs) return &state->regs->ip; - if (task != current && state->sp == task->thread.sp) { - struct inactive_task_frame *frame = (void *)task->thread.sp; - return &frame->ret_addr; - } - if (state->sp) return (unsigned long *)state->sp - 1; @@ -663,7 +656,7 @@ void __unwind_start(struct unwind_state *state, struct task_struct *task, } else { struct inactive_task_frame *frame = (void *)task->thread.sp; - state->sp = task->thread.sp; + state->sp = task->thread.sp + sizeof(*frame); state->bp = READ_ONCE_NOCHECK(frame->bp); state->ip = READ_ONCE_NOCHECK(frame->ret_addr); state->signal = (void *)state->ip == ret_from_fork; diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c index 3fdaa042823d..138bdb1fd136 100644 --- a/arch/x86/kernel/uprobes.c +++ b/arch/x86/kernel/uprobes.c @@ -255,12 +255,13 @@ static volatile u32 good_2byte_insns[256 / 32] = { static bool is_prefix_bad(struct insn *insn) { + insn_byte_t p; int i; - for (i = 0; i < insn->prefixes.nbytes; i++) { + for_each_insn_prefix(insn, i, p) { insn_attr_t attr; - attr = inat_get_opcode_attribute(insn->prefixes.bytes[i]); + attr = inat_get_opcode_attribute(p); switch (attr) { case INAT_MAKE_PREFIX(INAT_PFX_ES): case INAT_MAKE_PREFIX(INAT_PFX_CS): @@ -715,6 +716,7 @@ static const struct uprobe_xol_ops push_xol_ops = { static int branch_setup_xol_ops(struct arch_uprobe *auprobe, struct insn *insn) { u8 opc1 = OPCODE1(insn); + insn_byte_t p; int i; switch (opc1) { @@ -746,8 +748,8 @@ static int branch_setup_xol_ops(struct arch_uprobe *auprobe, struct insn *insn) * Intel and AMD behavior differ in 64-bit mode: Intel ignores 66 prefix. * No one uses these insns, reject any branch insns with such prefix. */ - for (i = 0; i < insn->prefixes.nbytes; i++) { - if (insn->prefixes.bytes[i] == 0x66) + for_each_insn_prefix(insn, i, p) { + if (p == 0x66) return -ENOTSUPP; } diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index 06a278b3701d..83637a2ff605 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -90,6 +90,20 @@ static int kvm_check_cpuid(struct kvm_cpuid_entry2 *entries, int nent) return 0; } +void kvm_update_pv_runtime(struct kvm_vcpu *vcpu) +{ + struct kvm_cpuid_entry2 *best; + + best = kvm_find_cpuid_entry(vcpu, KVM_CPUID_FEATURES, 0); + + /* + * save the feature bitmap to avoid cpuid lookup for every PV + * operation + */ + if (best) + vcpu->arch.pv_cpuid.features = best->eax; +} + void kvm_update_cpuid_runtime(struct kvm_vcpu *vcpu) { struct kvm_cpuid_entry2 *best; @@ -124,13 +138,6 @@ void kvm_update_cpuid_runtime(struct kvm_vcpu *vcpu) (best->eax & (1 << KVM_FEATURE_PV_UNHALT))) best->eax &= ~(1 << KVM_FEATURE_PV_UNHALT); - /* - * save the feature bitmap to avoid cpuid lookup for every PV - * operation - */ - if (best) - vcpu->arch.pv_cpuid.features = best->eax; - if (!kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_MISC_ENABLE_NO_MWAIT)) { best = kvm_find_cpuid_entry(vcpu, 0x1, 0); if (best) @@ -162,6 +169,8 @@ static void kvm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu) vcpu->arch.guest_supported_xcr0 = (best->eax | ((u64)best->edx << 32)) & supported_xcr0; + kvm_update_pv_runtime(vcpu); + vcpu->arch.maxphyaddr = cpuid_query_maxphyaddr(vcpu); kvm_mmu_reset_context(vcpu); @@ -169,6 +178,8 @@ static void kvm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu) vcpu->arch.cr4_guest_rsvd_bits = __cr4_reserved_bits(guest_cpuid_has, vcpu); + vcpu->arch.cr3_lm_rsvd_bits = rsvd_bits(cpuid_maxphyaddr(vcpu), 63); + /* Invoke the vendor callback only after the above state is updated. */ kvm_x86_ops.vcpu_after_set_cpuid(vcpu); } @@ -672,7 +683,9 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function) edx.split.num_counters_fixed = min(cap.num_counters_fixed, MAX_FIXED_COUNTERS); edx.split.bit_width_fixed = cap.bit_width_fixed; - edx.split.reserved = 0; + edx.split.anythread_deprecated = 1; + edx.split.reserved1 = 0; + edx.split.reserved2 = 0; entry->eax = eax.full; entry->ebx = cap.events_mask; diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h index bf8577947ed2..f7a6e8f83783 100644 --- a/arch/x86/kvm/cpuid.h +++ b/arch/x86/kvm/cpuid.h @@ -11,6 +11,7 @@ extern u32 kvm_cpu_caps[NCAPINTS] __read_mostly; void kvm_set_cpu_caps(void); void kvm_update_cpuid_runtime(struct kvm_vcpu *vcpu); +void kvm_update_pv_runtime(struct kvm_vcpu *vcpu); struct kvm_cpuid_entry2 *kvm_find_cpuid_entry(struct kvm_vcpu *vcpu, u32 function, u32 index); int kvm_dev_ioctl_get_cpuid(struct kvm_cpuid2 *cpuid, diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 0d917eb70319..56cae1ff9e3f 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -4046,6 +4046,12 @@ static int em_clflush(struct x86_emulate_ctxt *ctxt) return X86EMUL_CONTINUE; } +static int em_clflushopt(struct x86_emulate_ctxt *ctxt) +{ + /* emulating clflushopt regardless of cpuid */ + return X86EMUL_CONTINUE; +} + static int em_movsxd(struct x86_emulate_ctxt *ctxt) { ctxt->dst.val = (s32) ctxt->src.val; @@ -4585,7 +4591,7 @@ static const struct opcode group11[] = { }; static const struct gprefix pfx_0f_ae_7 = { - I(SrcMem | ByteOp, em_clflush), N, N, N, + I(SrcMem | ByteOp, em_clflush), I(SrcMem | ByteOp, em_clflushopt), N, N, }; static const struct group_dual group15 = { { diff --git a/arch/x86/kvm/irq.c b/arch/x86/kvm/irq.c index 99d118ffc67d..814698e5b152 100644 --- a/arch/x86/kvm/irq.c +++ b/arch/x86/kvm/irq.c @@ -40,29 +40,10 @@ static int pending_userspace_extint(struct kvm_vcpu *v) * check if there is pending interrupt from * non-APIC source without intack. */ -static int kvm_cpu_has_extint(struct kvm_vcpu *v) -{ - u8 accept = kvm_apic_accept_pic_intr(v); - - if (accept) { - if (irqchip_split(v->kvm)) - return pending_userspace_extint(v); - else - return v->kvm->arch.vpic->output; - } else - return 0; -} - -/* - * check if there is injectable interrupt: - * when virtual interrupt delivery enabled, - * interrupt from apic will handled by hardware, - * we don't need to check it here. - */ -int kvm_cpu_has_injectable_intr(struct kvm_vcpu *v) +int kvm_cpu_has_extint(struct kvm_vcpu *v) { /* - * FIXME: interrupt.injected represents an interrupt that it's + * FIXME: interrupt.injected represents an interrupt whose * side-effects have already been applied (e.g. bit from IRR * already moved to ISR). Therefore, it is incorrect to rely * on interrupt.injected to know if there is a pending @@ -75,6 +56,23 @@ int kvm_cpu_has_injectable_intr(struct kvm_vcpu *v) if (!lapic_in_kernel(v)) return v->arch.interrupt.injected; + if (!kvm_apic_accept_pic_intr(v)) + return 0; + + if (irqchip_split(v->kvm)) + return pending_userspace_extint(v); + else + return v->kvm->arch.vpic->output; +} + +/* + * check if there is injectable interrupt: + * when virtual interrupt delivery enabled, + * interrupt from apic will handled by hardware, + * we don't need to check it here. + */ +int kvm_cpu_has_injectable_intr(struct kvm_vcpu *v) +{ if (kvm_cpu_has_extint(v)) return 1; @@ -91,20 +89,6 @@ EXPORT_SYMBOL_GPL(kvm_cpu_has_injectable_intr); */ int kvm_cpu_has_interrupt(struct kvm_vcpu *v) { - /* - * FIXME: interrupt.injected represents an interrupt that it's - * side-effects have already been applied (e.g. bit from IRR - * already moved to ISR). Therefore, it is incorrect to rely - * on interrupt.injected to know if there is a pending - * interrupt in the user-mode LAPIC. - * This leads to nVMX/nSVM not be able to distinguish - * if it should exit from L2 to L1 on EXTERNAL_INTERRUPT on - * pending interrupt or should re-inject an injected - * interrupt. - */ - if (!lapic_in_kernel(v)) - return v->arch.interrupt.injected; - if (kvm_cpu_has_extint(v)) return 1; @@ -118,16 +102,21 @@ EXPORT_SYMBOL_GPL(kvm_cpu_has_interrupt); */ static int kvm_cpu_get_extint(struct kvm_vcpu *v) { - if (kvm_cpu_has_extint(v)) { - if (irqchip_split(v->kvm)) { - int vector = v->arch.pending_external_vector; - - v->arch.pending_external_vector = -1; - return vector; - } else - return kvm_pic_read_irq(v->kvm); /* PIC */ - } else + if (!kvm_cpu_has_extint(v)) { + WARN_ON(!lapic_in_kernel(v)); return -1; + } + + if (!lapic_in_kernel(v)) + return v->arch.interrupt.nr; + + if (irqchip_split(v->kvm)) { + int vector = v->arch.pending_external_vector; + + v->arch.pending_external_vector = -1; + return vector; + } else + return kvm_pic_read_irq(v->kvm); /* PIC */ } /* @@ -135,13 +124,7 @@ static int kvm_cpu_get_extint(struct kvm_vcpu *v) */ int kvm_cpu_get_interrupt(struct kvm_vcpu *v) { - int vector; - - if (!lapic_in_kernel(v)) - return v->arch.interrupt.nr; - - vector = kvm_cpu_get_extint(v); - + int vector = kvm_cpu_get_extint(v); if (vector != -1) return vector; /* PIC */ diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 105e7859d1f2..86c33d53c90a 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -2465,7 +2465,7 @@ int kvm_apic_has_interrupt(struct kvm_vcpu *vcpu) struct kvm_lapic *apic = vcpu->arch.apic; u32 ppr; - if (!kvm_apic_hw_enabled(apic)) + if (!kvm_apic_present(vcpu)) return -1; __apic_update_ppr(apic, &ppr); diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index 17587f496ec7..7a6ae9e90bd7 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -225,7 +225,7 @@ static gfn_t get_mmio_spte_gfn(u64 spte) { u64 gpa = spte & shadow_nonpresent_or_rsvd_lower_gfn_mask; - gpa |= (spte >> shadow_nonpresent_or_rsvd_mask_len) + gpa |= (spte >> SHADOW_NONPRESENT_OR_RSVD_MASK_LEN) & shadow_nonpresent_or_rsvd_mask; return gpa >> PAGE_SHIFT; @@ -591,15 +591,15 @@ static u64 mmu_spte_get_lockless(u64 *sptep) static u64 restore_acc_track_spte(u64 spte) { u64 new_spte = spte; - u64 saved_bits = (spte >> shadow_acc_track_saved_bits_shift) - & shadow_acc_track_saved_bits_mask; + u64 saved_bits = (spte >> SHADOW_ACC_TRACK_SAVED_BITS_SHIFT) + & SHADOW_ACC_TRACK_SAVED_BITS_MASK; WARN_ON_ONCE(spte_ad_enabled(spte)); WARN_ON_ONCE(!is_access_track_spte(spte)); new_spte &= ~shadow_acc_track_mask; - new_spte &= ~(shadow_acc_track_saved_bits_mask << - shadow_acc_track_saved_bits_shift); + new_spte &= ~(SHADOW_ACC_TRACK_SAVED_BITS_MASK << + SHADOW_ACC_TRACK_SAVED_BITS_SHIFT); new_spte |= saved_bits; return new_spte; @@ -856,12 +856,14 @@ static int pte_list_add(struct kvm_vcpu *vcpu, u64 *spte, } else { rmap_printk("pte_list_add: %p %llx many->many\n", spte, *spte); desc = (struct pte_list_desc *)(rmap_head->val & ~1ul); - while (desc->sptes[PTE_LIST_EXT-1] && desc->more) { - desc = desc->more; + while (desc->sptes[PTE_LIST_EXT-1]) { count += PTE_LIST_EXT; - } - if (desc->sptes[PTE_LIST_EXT-1]) { - desc->more = mmu_alloc_pte_list_desc(vcpu); + + if (!desc->more) { + desc->more = mmu_alloc_pte_list_desc(vcpu); + desc = desc->more; + break; + } desc = desc->more; } for (i = 0; desc->sptes[i]; ++i) @@ -3515,7 +3517,7 @@ static bool get_mmio_spte(struct kvm_vcpu *vcpu, u64 addr, u64 *sptep) { u64 sptes[PT64_ROOT_MAX_LEVEL]; struct rsvd_bits_validate *rsvd_check; - int root = vcpu->arch.mmu->root_level; + int root = vcpu->arch.mmu->shadow_root_level; int leaf; int level; bool reserved = false; diff --git a/arch/x86/kvm/mmu/spte.c b/arch/x86/kvm/mmu/spte.c index d9c5665a55e9..c51ad544f25b 100644 --- a/arch/x86/kvm/mmu/spte.c +++ b/arch/x86/kvm/mmu/spte.c @@ -40,8 +40,8 @@ static u64 generation_mmio_spte_mask(u64 gen) WARN_ON(gen & ~MMIO_SPTE_GEN_MASK); BUILD_BUG_ON((MMIO_SPTE_GEN_HIGH_MASK | MMIO_SPTE_GEN_LOW_MASK) & SPTE_SPECIAL_MASK); - mask = (gen << MMIO_SPTE_GEN_LOW_START) & MMIO_SPTE_GEN_LOW_MASK; - mask |= (gen << MMIO_SPTE_GEN_HIGH_START) & MMIO_SPTE_GEN_HIGH_MASK; + mask = (gen << MMIO_SPTE_GEN_LOW_SHIFT) & MMIO_SPTE_GEN_LOW_MASK; + mask |= (gen << MMIO_SPTE_GEN_HIGH_SHIFT) & MMIO_SPTE_GEN_HIGH_MASK; return mask; } @@ -55,7 +55,7 @@ u64 make_mmio_spte(struct kvm_vcpu *vcpu, u64 gfn, unsigned int access) mask |= shadow_mmio_value | access; mask |= gpa | shadow_nonpresent_or_rsvd_mask; mask |= (gpa & shadow_nonpresent_or_rsvd_mask) - << shadow_nonpresent_or_rsvd_mask_len; + << SHADOW_NONPRESENT_OR_RSVD_MASK_LEN; return mask; } @@ -231,12 +231,12 @@ u64 mark_spte_for_access_track(u64 spte) !spte_can_locklessly_be_made_writable(spte), "kvm: Writable SPTE is not locklessly dirty-trackable\n"); - WARN_ONCE(spte & (shadow_acc_track_saved_bits_mask << - shadow_acc_track_saved_bits_shift), + WARN_ONCE(spte & (SHADOW_ACC_TRACK_SAVED_BITS_MASK << + SHADOW_ACC_TRACK_SAVED_BITS_SHIFT), "kvm: Access Tracking saved bit locations are not zero\n"); - spte |= (spte & shadow_acc_track_saved_bits_mask) << - shadow_acc_track_saved_bits_shift; + spte |= (spte & SHADOW_ACC_TRACK_SAVED_BITS_MASK) << + SHADOW_ACC_TRACK_SAVED_BITS_SHIFT; spte &= ~shadow_acc_track_mask; return spte; @@ -245,7 +245,7 @@ u64 mark_spte_for_access_track(u64 spte) void kvm_mmu_set_mmio_spte_mask(u64 mmio_value, u64 access_mask) { BUG_ON((u64)(unsigned)access_mask != access_mask); - WARN_ON(mmio_value & (shadow_nonpresent_or_rsvd_mask << shadow_nonpresent_or_rsvd_mask_len)); + WARN_ON(mmio_value & (shadow_nonpresent_or_rsvd_mask << SHADOW_NONPRESENT_OR_RSVD_MASK_LEN)); WARN_ON(mmio_value & shadow_nonpresent_or_rsvd_lower_gfn_mask); shadow_mmio_value = mmio_value | SPTE_MMIO_MASK; shadow_mmio_access_mask = access_mask; @@ -306,9 +306,9 @@ void kvm_mmu_reset_all_pte_masks(void) low_phys_bits = boot_cpu_data.x86_phys_bits; if (boot_cpu_has_bug(X86_BUG_L1TF) && !WARN_ON_ONCE(boot_cpu_data.x86_cache_bits >= - 52 - shadow_nonpresent_or_rsvd_mask_len)) { + 52 - SHADOW_NONPRESENT_OR_RSVD_MASK_LEN)) { low_phys_bits = boot_cpu_data.x86_cache_bits - - shadow_nonpresent_or_rsvd_mask_len; + - SHADOW_NONPRESENT_OR_RSVD_MASK_LEN; shadow_nonpresent_or_rsvd_mask = rsvd_bits(low_phys_bits, boot_cpu_data.x86_cache_bits - 1); } diff --git a/arch/x86/kvm/mmu/spte.h b/arch/x86/kvm/mmu/spte.h index 4ecf40e0b8fe..2b3a30bd38b0 100644 --- a/arch/x86/kvm/mmu/spte.h +++ b/arch/x86/kvm/mmu/spte.h @@ -56,11 +56,11 @@ #define SPTE_MMU_WRITEABLE (1ULL << (PT_FIRST_AVAIL_BITS_SHIFT + 1)) /* - * Due to limited space in PTEs, the MMIO generation is a 19 bit subset of + * Due to limited space in PTEs, the MMIO generation is a 18 bit subset of * the memslots generation and is derived as follows: * * Bits 0-8 of the MMIO generation are propagated to spte bits 3-11 - * Bits 9-18 of the MMIO generation are propagated to spte bits 52-61 + * Bits 9-17 of the MMIO generation are propagated to spte bits 54-62 * * The KVM_MEMSLOT_GEN_UPDATE_IN_PROGRESS flag is intentionally not included in * the MMIO generation number, as doing so would require stealing a bit from @@ -69,18 +69,29 @@ * requires a full MMU zap). The flag is instead explicitly queried when * checking for MMIO spte cache hits. */ -#define MMIO_SPTE_GEN_MASK GENMASK_ULL(17, 0) #define MMIO_SPTE_GEN_LOW_START 3 #define MMIO_SPTE_GEN_LOW_END 11 -#define MMIO_SPTE_GEN_LOW_MASK GENMASK_ULL(MMIO_SPTE_GEN_LOW_END, \ - MMIO_SPTE_GEN_LOW_START) #define MMIO_SPTE_GEN_HIGH_START PT64_SECOND_AVAIL_BITS_SHIFT #define MMIO_SPTE_GEN_HIGH_END 62 + +#define MMIO_SPTE_GEN_LOW_MASK GENMASK_ULL(MMIO_SPTE_GEN_LOW_END, \ + MMIO_SPTE_GEN_LOW_START) #define MMIO_SPTE_GEN_HIGH_MASK GENMASK_ULL(MMIO_SPTE_GEN_HIGH_END, \ MMIO_SPTE_GEN_HIGH_START) +#define MMIO_SPTE_GEN_LOW_BITS (MMIO_SPTE_GEN_LOW_END - MMIO_SPTE_GEN_LOW_START + 1) +#define MMIO_SPTE_GEN_HIGH_BITS (MMIO_SPTE_GEN_HIGH_END - MMIO_SPTE_GEN_HIGH_START + 1) + +/* remember to adjust the comment above as well if you change these */ +static_assert(MMIO_SPTE_GEN_LOW_BITS == 9 && MMIO_SPTE_GEN_HIGH_BITS == 9); + +#define MMIO_SPTE_GEN_LOW_SHIFT (MMIO_SPTE_GEN_LOW_START - 0) +#define MMIO_SPTE_GEN_HIGH_SHIFT (MMIO_SPTE_GEN_HIGH_START - MMIO_SPTE_GEN_LOW_BITS) + +#define MMIO_SPTE_GEN_MASK GENMASK_ULL(MMIO_SPTE_GEN_LOW_BITS + MMIO_SPTE_GEN_HIGH_BITS - 1, 0) + extern u64 __read_mostly shadow_nx_mask; extern u64 __read_mostly shadow_x_mask; /* mutual exclusive with nx_mask */ extern u64 __read_mostly shadow_user_mask; @@ -105,19 +116,19 @@ extern u64 __read_mostly shadow_acc_track_mask; extern u64 __read_mostly shadow_nonpresent_or_rsvd_mask; /* + * The number of high-order 1 bits to use in the mask above. + */ +#define SHADOW_NONPRESENT_OR_RSVD_MASK_LEN 5 + +/* * The mask/shift to use for saving the original R/X bits when marking the PTE * as not-present for access tracking purposes. We do not save the W bit as the * PTEs being access tracked also need to be dirty tracked, so the W bit will be * restored only when a write is attempted to the page. */ -static const u64 shadow_acc_track_saved_bits_mask = PT64_EPT_READABLE_MASK | - PT64_EPT_EXECUTABLE_MASK; -static const u64 shadow_acc_track_saved_bits_shift = PT64_SECOND_AVAIL_BITS_SHIFT; - -/* - * The number of high-order 1 bits to use in the mask above. - */ -static const u64 shadow_nonpresent_or_rsvd_mask_len = 5; +#define SHADOW_ACC_TRACK_SAVED_BITS_MASK (PT64_EPT_READABLE_MASK | \ + PT64_EPT_EXECUTABLE_MASK) +#define SHADOW_ACC_TRACK_SAVED_BITS_SHIFT PT64_SECOND_AVAIL_BITS_SHIFT /* * In some cases, we need to preserve the GFN of a non-present or reserved @@ -228,8 +239,8 @@ static inline u64 get_mmio_spte_generation(u64 spte) { u64 gen; - gen = (spte & MMIO_SPTE_GEN_LOW_MASK) >> MMIO_SPTE_GEN_LOW_START; - gen |= (spte & MMIO_SPTE_GEN_HIGH_MASK) >> MMIO_SPTE_GEN_HIGH_START; + gen = (spte & MMIO_SPTE_GEN_LOW_MASK) >> MMIO_SPTE_GEN_LOW_SHIFT; + gen |= (spte & MMIO_SPTE_GEN_HIGH_MASK) >> MMIO_SPTE_GEN_HIGH_SHIFT; return gen; } diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c index 27e381c9da6c..84c8f06bec26 100644 --- a/arch/x86/kvm/mmu/tdp_mmu.c +++ b/arch/x86/kvm/mmu/tdp_mmu.c @@ -49,7 +49,14 @@ bool is_tdp_mmu_root(struct kvm *kvm, hpa_t hpa) { struct kvm_mmu_page *sp; + if (!kvm->arch.tdp_mmu_enabled) + return false; + if (WARN_ON(!VALID_PAGE(hpa))) + return false; + sp = to_shadow_page(hpa); + if (WARN_ON(!sp)) + return false; return sp->tdp_mmu_page && sp->root_count; } @@ -59,7 +66,7 @@ static bool zap_gfn_range(struct kvm *kvm, struct kvm_mmu_page *root, void kvm_tdp_mmu_free_root(struct kvm *kvm, struct kvm_mmu_page *root) { - gfn_t max_gfn = 1ULL << (boot_cpu_data.x86_phys_bits - PAGE_SHIFT); + gfn_t max_gfn = 1ULL << (shadow_phys_bits - PAGE_SHIFT); lockdep_assert_held(&kvm->mmu_lock); @@ -449,7 +456,7 @@ bool kvm_tdp_mmu_zap_gfn_range(struct kvm *kvm, gfn_t start, gfn_t end) void kvm_tdp_mmu_zap_all(struct kvm *kvm) { - gfn_t max_gfn = 1ULL << (boot_cpu_data.x86_phys_bits - PAGE_SHIFT); + gfn_t max_gfn = 1ULL << (shadow_phys_bits - PAGE_SHIFT); bool flush; flush = kvm_tdp_mmu_zap_gfn_range(kvm, 0, max_gfn); diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index c0b14106258a..566f4d18185b 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -642,8 +642,8 @@ static int __sev_dbg_decrypt(struct kvm *kvm, unsigned long src_paddr, * Its safe to read more than we are asked, caller should ensure that * destination has enough space. */ - src_paddr = round_down(src_paddr, 16); offset = src_paddr & 15; + src_paddr = round_down(src_paddr, 16); sz = round_up(sz + offset, 16); return __sev_issue_dbg_cmd(kvm, src_paddr, dst_paddr, sz, err, false); diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 2f32fd09e259..da7eb4aaf44f 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -530,12 +530,12 @@ static int svm_hardware_enable(void) static void svm_cpu_uninit(int cpu) { - struct svm_cpu_data *sd = per_cpu(svm_data, raw_smp_processor_id()); + struct svm_cpu_data *sd = per_cpu(svm_data, cpu); if (!sd) return; - per_cpu(svm_data, raw_smp_processor_id()) = NULL; + per_cpu(svm_data, cpu) = NULL; kfree(sd->sev_vmcbs); __free_page(sd->save_area); kfree(sd); @@ -1309,8 +1309,10 @@ static int svm_create_vcpu(struct kvm_vcpu *vcpu) svm->avic_is_running = true; svm->msrpm = svm_vcpu_alloc_msrpm(); - if (!svm->msrpm) + if (!svm->msrpm) { + err = -ENOMEM; goto error_free_vmcb_page; + } svm_vcpu_init_msrpm(vcpu, svm->msrpm); @@ -3741,6 +3743,7 @@ static u64 svm_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio) static void svm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); + struct kvm_cpuid_entry2 *best; vcpu->arch.xsaves_enabled = guest_cpuid_has(vcpu, X86_FEATURE_XSAVE) && boot_cpu_has(X86_FEATURE_XSAVE) && @@ -3753,6 +3756,13 @@ static void svm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu) /* Check again if INVPCID interception if required */ svm_check_invpcid(svm); + /* For sev guests, the memory encryption bit is not reserved in CR3. */ + if (sev_guest(vcpu->kvm)) { + best = kvm_find_cpuid_entry(vcpu, 0x8000001F, 0); + if (best) + vcpu->arch.cr3_lm_rsvd_bits &= ~(1UL << (best->ebx & 0x3f)); + } + if (!kvm_vcpu_apicv_active(vcpu)) return; diff --git a/arch/x86/kvm/vmx/evmcs.c b/arch/x86/kvm/vmx/evmcs.c index e5325bd0f304..f3199bb02f22 100644 --- a/arch/x86/kvm/vmx/evmcs.c +++ b/arch/x86/kvm/vmx/evmcs.c @@ -297,14 +297,13 @@ const struct evmcs_field vmcs_field_to_evmcs_1[] = { }; const unsigned int nr_evmcs_1_fields = ARRAY_SIZE(vmcs_field_to_evmcs_1); -void evmcs_sanitize_exec_ctrls(struct vmcs_config *vmcs_conf) +__init void evmcs_sanitize_exec_ctrls(struct vmcs_config *vmcs_conf) { vmcs_conf->pin_based_exec_ctrl &= ~EVMCS1_UNSUPPORTED_PINCTRL; vmcs_conf->cpu_based_2nd_exec_ctrl &= ~EVMCS1_UNSUPPORTED_2NDEXEC; vmcs_conf->vmexit_ctrl &= ~EVMCS1_UNSUPPORTED_VMEXIT_CTRL; vmcs_conf->vmentry_ctrl &= ~EVMCS1_UNSUPPORTED_VMENTRY_CTRL; - } #endif diff --git a/arch/x86/kvm/vmx/evmcs.h b/arch/x86/kvm/vmx/evmcs.h index e5f7a7ebf27d..bd41d9462355 100644 --- a/arch/x86/kvm/vmx/evmcs.h +++ b/arch/x86/kvm/vmx/evmcs.h @@ -185,7 +185,7 @@ static inline void evmcs_load(u64 phys_addr) vp_ap->enlighten_vmentry = 1; } -void evmcs_sanitize_exec_ctrls(struct vmcs_config *vmcs_conf); +__init void evmcs_sanitize_exec_ctrls(struct vmcs_config *vmcs_conf); #else /* !IS_ENABLED(CONFIG_HYPERV) */ static inline void evmcs_write64(unsigned long field, u64 value) {} static inline void evmcs_write32(unsigned long field, u32 value) {} @@ -194,7 +194,6 @@ static inline u64 evmcs_read64(unsigned long field) { return 0; } static inline u32 evmcs_read32(unsigned long field) { return 0; } static inline u16 evmcs_read16(unsigned long field) { return 0; } static inline void evmcs_load(u64 phys_addr) {} -static inline void evmcs_sanitize_exec_ctrls(struct vmcs_config *vmcs_conf) {} static inline void evmcs_touch_msr_bitmap(void) {} #endif /* IS_ENABLED(CONFIG_HYPERV) */ diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index d14c94d0aff1..47b8357b9751 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -2560,8 +2560,10 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf, vmcs_conf->vmexit_ctrl = _vmexit_control; vmcs_conf->vmentry_ctrl = _vmentry_control; - if (static_branch_unlikely(&enable_evmcs)) +#if IS_ENABLED(CONFIG_HYPERV) + if (enlightened_vmcs) evmcs_sanitize_exec_ctrls(vmcs_conf); +#endif return 0; } @@ -6834,7 +6836,6 @@ static void vmx_free_vcpu(struct kvm_vcpu *vcpu) static int vmx_create_vcpu(struct kvm_vcpu *vcpu) { struct vcpu_vmx *vmx; - unsigned long *msr_bitmap; int i, cpu, err; BUILD_BUG_ON(offsetof(struct vcpu_vmx, vcpu) != 0); @@ -6894,7 +6895,6 @@ static int vmx_create_vcpu(struct kvm_vcpu *vcpu) bitmap_fill(vmx->shadow_msr_intercept.read, MAX_POSSIBLE_PASSTHROUGH_MSRS); bitmap_fill(vmx->shadow_msr_intercept.write, MAX_POSSIBLE_PASSTHROUGH_MSRS); - msr_bitmap = vmx->vmcs01.msr_bitmap; vmx_disable_intercept_for_msr(vcpu, MSR_IA32_TSC, MSR_TYPE_R); vmx_disable_intercept_for_msr(vcpu, MSR_FS_BASE, MSR_TYPE_RW); vmx_disable_intercept_for_msr(vcpu, MSR_GS_BASE, MSR_TYPE_RW); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 397f599b20e5..e545a8a613b1 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -255,24 +255,23 @@ static struct kmem_cache *x86_emulator_cache; /* * When called, it means the previous get/set msr reached an invalid msr. - * Return 0 if we want to ignore/silent this failed msr access, or 1 if we want - * to fail the caller. + * Return true if we want to ignore/silent this failed msr access. */ -static int kvm_msr_ignored_check(struct kvm_vcpu *vcpu, u32 msr, - u64 data, bool write) +static bool kvm_msr_ignored_check(struct kvm_vcpu *vcpu, u32 msr, + u64 data, bool write) { const char *op = write ? "wrmsr" : "rdmsr"; if (ignore_msrs) { if (report_ignored_msrs) - vcpu_unimpl(vcpu, "ignored %s: 0x%x data 0x%llx\n", - op, msr, data); + kvm_pr_unimpl("ignored %s: 0x%x data 0x%llx\n", + op, msr, data); /* Mask the error */ - return 0; + return true; } else { - vcpu_debug_ratelimited(vcpu, "unhandled %s: 0x%x data 0x%llx\n", - op, msr, data); - return -ENOENT; + kvm_debug_ratelimited("unhandled %s: 0x%x data 0x%llx\n", + op, msr, data); + return false; } } @@ -1042,7 +1041,7 @@ int kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3) } if (is_long_mode(vcpu) && - (cr3 & rsvd_bits(cpuid_maxphyaddr(vcpu), 63))) + (cr3 & vcpu->arch.cr3_lm_rsvd_bits)) return 1; else if (is_pae_paging(vcpu) && !load_pdptrs(vcpu, vcpu->arch.walk_mmu, cr3)) @@ -1416,7 +1415,8 @@ static int do_get_msr_feature(struct kvm_vcpu *vcpu, unsigned index, u64 *data) if (r == KVM_MSR_RET_INVALID) { /* Unconditionally clear the output for simplicity */ *data = 0; - r = kvm_msr_ignored_check(vcpu, index, 0, false); + if (kvm_msr_ignored_check(vcpu, index, 0, false)) + r = 0; } if (r) @@ -1540,7 +1540,7 @@ static int __kvm_set_msr(struct kvm_vcpu *vcpu, u32 index, u64 data, struct msr_data msr; if (!host_initiated && !kvm_msr_allowed(vcpu, index, KVM_MSR_FILTER_WRITE)) - return -EPERM; + return KVM_MSR_RET_FILTERED; switch (index) { case MSR_FS_BASE: @@ -1581,7 +1581,8 @@ static int kvm_set_msr_ignored_check(struct kvm_vcpu *vcpu, int ret = __kvm_set_msr(vcpu, index, data, host_initiated); if (ret == KVM_MSR_RET_INVALID) - ret = kvm_msr_ignored_check(vcpu, index, data, true); + if (kvm_msr_ignored_check(vcpu, index, data, true)) + ret = 0; return ret; } @@ -1599,7 +1600,7 @@ int __kvm_get_msr(struct kvm_vcpu *vcpu, u32 index, u64 *data, int ret; if (!host_initiated && !kvm_msr_allowed(vcpu, index, KVM_MSR_FILTER_READ)) - return -EPERM; + return KVM_MSR_RET_FILTERED; msr.index = index; msr.host_initiated = host_initiated; @@ -1618,7 +1619,8 @@ static int kvm_get_msr_ignored_check(struct kvm_vcpu *vcpu, if (ret == KVM_MSR_RET_INVALID) { /* Unconditionally clear *data for simplicity */ *data = 0; - ret = kvm_msr_ignored_check(vcpu, index, 0, false); + if (kvm_msr_ignored_check(vcpu, index, 0, false)) + ret = 0; } return ret; @@ -1662,9 +1664,9 @@ static int complete_emulated_wrmsr(struct kvm_vcpu *vcpu) static u64 kvm_msr_reason(int r) { switch (r) { - case -ENOENT: + case KVM_MSR_RET_INVALID: return KVM_MSR_EXIT_REASON_UNKNOWN; - case -EPERM: + case KVM_MSR_RET_FILTERED: return KVM_MSR_EXIT_REASON_FILTER; default: return KVM_MSR_EXIT_REASON_INVAL; @@ -1965,7 +1967,7 @@ static void kvm_write_system_time(struct kvm_vcpu *vcpu, gpa_t system_time, struct kvm_arch *ka = &vcpu->kvm->arch; if (vcpu->vcpu_id == 0 && !host_initiated) { - if (ka->boot_vcpu_runs_old_kvmclock && old_msr) + if (ka->boot_vcpu_runs_old_kvmclock != old_msr) kvm_make_request(KVM_REQ_MASTERCLOCK_UPDATE, vcpu); ka->boot_vcpu_runs_old_kvmclock = old_msr; @@ -3063,9 +3065,9 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) /* Values other than LBR and BTF are vendor-specific, thus reserved and should throw a #GP */ return 1; - } - vcpu_unimpl(vcpu, "%s: MSR_IA32_DEBUGCTLMSR 0x%llx, nop\n", - __func__, data); + } else if (report_ignored_msrs) + vcpu_unimpl(vcpu, "%s: MSR_IA32_DEBUGCTLMSR 0x%llx, nop\n", + __func__, data); break; case 0x200 ... 0x2ff: return kvm_mtrr_set_msr(vcpu, msr, data); @@ -3463,29 +3465,63 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) msr_info->data = vcpu->arch.efer; break; case MSR_KVM_WALL_CLOCK: + if (!guest_pv_has(vcpu, KVM_FEATURE_CLOCKSOURCE)) + return 1; + + msr_info->data = vcpu->kvm->arch.wall_clock; + break; case MSR_KVM_WALL_CLOCK_NEW: + if (!guest_pv_has(vcpu, KVM_FEATURE_CLOCKSOURCE2)) + return 1; + msr_info->data = vcpu->kvm->arch.wall_clock; break; case MSR_KVM_SYSTEM_TIME: + if (!guest_pv_has(vcpu, KVM_FEATURE_CLOCKSOURCE)) + return 1; + + msr_info->data = vcpu->arch.time; + break; case MSR_KVM_SYSTEM_TIME_NEW: + if (!guest_pv_has(vcpu, KVM_FEATURE_CLOCKSOURCE2)) + return 1; + msr_info->data = vcpu->arch.time; break; case MSR_KVM_ASYNC_PF_EN: + if (!guest_pv_has(vcpu, KVM_FEATURE_ASYNC_PF)) + return 1; + msr_info->data = vcpu->arch.apf.msr_en_val; break; case MSR_KVM_ASYNC_PF_INT: + if (!guest_pv_has(vcpu, KVM_FEATURE_ASYNC_PF_INT)) + return 1; + msr_info->data = vcpu->arch.apf.msr_int_val; break; case MSR_KVM_ASYNC_PF_ACK: + if (!guest_pv_has(vcpu, KVM_FEATURE_ASYNC_PF)) + return 1; + msr_info->data = 0; break; case MSR_KVM_STEAL_TIME: + if (!guest_pv_has(vcpu, KVM_FEATURE_STEAL_TIME)) + return 1; + msr_info->data = vcpu->arch.st.msr_val; break; case MSR_KVM_PV_EOI_EN: + if (!guest_pv_has(vcpu, KVM_FEATURE_PV_EOI)) + return 1; + msr_info->data = vcpu->arch.pv_eoi.msr_val; break; case MSR_KVM_POLL_CONTROL: + if (!guest_pv_has(vcpu, KVM_FEATURE_POLL_CONTROL)) + return 1; + msr_info->data = vcpu->arch.msr_kvm_poll_control; break; case MSR_IA32_P5_MC_ADDR: @@ -4015,21 +4051,23 @@ static int kvm_vcpu_ioctl_set_lapic(struct kvm_vcpu *vcpu, static int kvm_cpu_accept_dm_intr(struct kvm_vcpu *vcpu) { + /* + * We can accept userspace's request for interrupt injection + * as long as we have a place to store the interrupt number. + * The actual injection will happen when the CPU is able to + * deliver the interrupt. + */ + if (kvm_cpu_has_extint(vcpu)) + return false; + + /* Acknowledging ExtINT does not happen if LINT0 is masked. */ return (!lapic_in_kernel(vcpu) || kvm_apic_accept_pic_intr(vcpu)); } -/* - * if userspace requested an interrupt window, check that the - * interrupt window is open. - * - * No need to exit to userspace if we already have an interrupt queued. - */ static int kvm_vcpu_ready_for_interrupt_injection(struct kvm_vcpu *vcpu) { return kvm_arch_interrupt_allowed(vcpu) && - !kvm_cpu_has_interrupt(vcpu) && - !kvm_event_needs_reinjection(vcpu) && kvm_cpu_accept_dm_intr(vcpu); } @@ -4575,6 +4613,8 @@ static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu, case KVM_CAP_ENFORCE_PV_FEATURE_CPUID: vcpu->arch.pv_cpuid.enforce = cap->args[0]; + if (vcpu->arch.pv_cpuid.enforce) + kvm_update_pv_runtime(vcpu); return 0; diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h index 3900ab0c6004..e7ca622a468f 100644 --- a/arch/x86/kvm/x86.h +++ b/arch/x86/kvm/x86.h @@ -376,7 +376,13 @@ int kvm_handle_memory_failure(struct kvm_vcpu *vcpu, int r, int kvm_handle_invpcid(struct kvm_vcpu *vcpu, unsigned long type, gva_t gva); bool kvm_msr_allowed(struct kvm_vcpu *vcpu, u32 index, u32 type); -#define KVM_MSR_RET_INVALID 2 +/* + * Internal error codes that are used to indicate that MSR emulation encountered + * an error that should result in #GP in the guest, unless userspace + * handles it. + */ +#define KVM_MSR_RET_INVALID 2 /* in-kernel MSR emulation #GP condition */ +#define KVM_MSR_RET_FILTERED 3 /* #GP due to userspace MSR filter */ #define __cr4_reserved_bits(__cpu_has, __c) \ ({ \ diff --git a/arch/x86/lib/copy_mc.c b/arch/x86/lib/copy_mc.c index c13e8c9ee926..80efd45a7761 100644 --- a/arch/x86/lib/copy_mc.c +++ b/arch/x86/lib/copy_mc.c @@ -10,10 +10,6 @@ #include <asm/mce.h> #ifdef CONFIG_X86_MCE -/* - * See COPY_MC_TEST for self-test of the copy_mc_fragile() - * implementation. - */ static DEFINE_STATIC_KEY_FALSE(copy_mc_fragile_key); void enable_copy_mc_fragile(void) diff --git a/arch/x86/lib/copy_mc_64.S b/arch/x86/lib/copy_mc_64.S index 892d8915f609..e5f77e293034 100644 --- a/arch/x86/lib/copy_mc_64.S +++ b/arch/x86/lib/copy_mc_64.S @@ -2,14 +2,11 @@ /* Copyright(c) 2016-2020 Intel Corporation. All rights reserved. */ #include <linux/linkage.h> -#include <asm/copy_mc_test.h> -#include <asm/export.h> #include <asm/asm.h> #ifndef CONFIG_UML #ifdef CONFIG_X86_MCE -COPY_MC_TEST_CTL /* * copy_mc_fragile - copy memory with indication if an exception / fault happened @@ -38,8 +35,6 @@ SYM_FUNC_START(copy_mc_fragile) subl %ecx, %edx .L_read_leading_bytes: movb (%rsi), %al - COPY_MC_TEST_SRC %rsi 1 .E_leading_bytes - COPY_MC_TEST_DST %rdi 1 .E_leading_bytes .L_write_leading_bytes: movb %al, (%rdi) incq %rsi @@ -55,8 +50,6 @@ SYM_FUNC_START(copy_mc_fragile) .L_read_words: movq (%rsi), %r8 - COPY_MC_TEST_SRC %rsi 8 .E_read_words - COPY_MC_TEST_DST %rdi 8 .E_write_words .L_write_words: movq %r8, (%rdi) addq $8, %rsi @@ -73,8 +66,6 @@ SYM_FUNC_START(copy_mc_fragile) movl %edx, %ecx .L_read_trailing_bytes: movb (%rsi), %al - COPY_MC_TEST_SRC %rsi 1 .E_trailing_bytes - COPY_MC_TEST_DST %rdi 1 .E_trailing_bytes .L_write_trailing_bytes: movb %al, (%rdi) incq %rsi @@ -88,7 +79,6 @@ SYM_FUNC_START(copy_mc_fragile) .L_done: ret SYM_FUNC_END(copy_mc_fragile) -EXPORT_SYMBOL_GPL(copy_mc_fragile) .section .fixup, "ax" /* diff --git a/arch/x86/lib/insn-eval.c b/arch/x86/lib/insn-eval.c index 58f7fb95c7f4..4229950a5d78 100644 --- a/arch/x86/lib/insn-eval.c +++ b/arch/x86/lib/insn-eval.c @@ -63,13 +63,12 @@ static bool is_string_insn(struct insn *insn) */ bool insn_has_rep_prefix(struct insn *insn) { + insn_byte_t p; int i; insn_get_prefixes(insn); - for (i = 0; i < insn->prefixes.nbytes; i++) { - insn_byte_t p = insn->prefixes.bytes[i]; - + for_each_insn_prefix(insn, i, p) { if (p == 0xf2 || p == 0xf3) return true; } @@ -95,14 +94,15 @@ static int get_seg_reg_override_idx(struct insn *insn) { int idx = INAT_SEG_REG_DEFAULT; int num_overrides = 0, i; + insn_byte_t p; insn_get_prefixes(insn); /* Look for any segment override prefixes. */ - for (i = 0; i < insn->prefixes.nbytes; i++) { + for_each_insn_prefix(insn, i, p) { insn_attr_t attr; - attr = inat_get_opcode_attribute(insn->prefixes.bytes[i]); + attr = inat_get_opcode_attribute(p); switch (attr) { case INAT_MAKE_PREFIX(INAT_PFX_CS): idx = INAT_SEG_REG_CS; diff --git a/arch/x86/lib/memcpy_64.S b/arch/x86/lib/memcpy_64.S index 037faac46b0c..1e299ac73c86 100644 --- a/arch/x86/lib/memcpy_64.S +++ b/arch/x86/lib/memcpy_64.S @@ -16,8 +16,6 @@ * to a jmp to memcpy_erms which does the REP; MOVSB mem copy. */ -.weak memcpy - /* * memcpy - Copy a memory block. * @@ -30,7 +28,7 @@ * rax original destination */ SYM_FUNC_START_ALIAS(__memcpy) -SYM_FUNC_START_LOCAL(memcpy) +SYM_FUNC_START_WEAK(memcpy) ALTERNATIVE_2 "jmp memcpy_orig", "", X86_FEATURE_REP_GOOD, \ "jmp memcpy_erms", X86_FEATURE_ERMS diff --git a/arch/x86/lib/memmove_64.S b/arch/x86/lib/memmove_64.S index 7ff00ea64e4f..41902fe8b859 100644 --- a/arch/x86/lib/memmove_64.S +++ b/arch/x86/lib/memmove_64.S @@ -24,9 +24,7 @@ * Output: * rax: dest */ -.weak memmove - -SYM_FUNC_START_ALIAS(memmove) +SYM_FUNC_START_WEAK(memmove) SYM_FUNC_START(__memmove) mov %rdi, %rax diff --git a/arch/x86/lib/memset_64.S b/arch/x86/lib/memset_64.S index 9ff15ee404a4..0bfd26e4ca9e 100644 --- a/arch/x86/lib/memset_64.S +++ b/arch/x86/lib/memset_64.S @@ -6,8 +6,6 @@ #include <asm/alternative-asm.h> #include <asm/export.h> -.weak memset - /* * ISO C memset - set a memory block to a byte value. This function uses fast * string to get better performance than the original function. The code is @@ -19,7 +17,7 @@ * * rax original destination */ -SYM_FUNC_START_ALIAS(memset) +SYM_FUNC_START_WEAK(memset) SYM_FUNC_START(__memset) /* * Some CPUs support enhanced REP MOVSB/STOSB feature. It is recommended diff --git a/arch/x86/lib/usercopy.c b/arch/x86/lib/usercopy.c index 3f435d7fca5e..c3e8a62ca561 100644 --- a/arch/x86/lib/usercopy.c +++ b/arch/x86/lib/usercopy.c @@ -9,9 +9,23 @@ #include <asm/tlbflush.h> -/* - * We rely on the nested NMI work to allow atomic faults from the NMI path; the - * nested NMI paths are careful to preserve CR2. +/** + * copy_from_user_nmi - NMI safe copy from user + * @to: Pointer to the destination buffer + * @from: Pointer to a user space address of the current task + * @n: Number of bytes to copy + * + * Returns: The number of not copied bytes. 0 is success, i.e. all bytes copied + * + * Contrary to other copy_from_user() variants this function can be called + * from NMI context. Despite the name it is not restricted to be called + * from NMI context. It is safe to be called from any other context as + * well. It disables pagefaults across the copy which means a fault will + * abort the copy. + * + * For NMI context invocations this relies on the nested NMI work to allow + * atomic faults from the NMI path; the nested NMI paths are careful to + * preserve CR2. */ unsigned long copy_from_user_nmi(void *to, const void __user *from, unsigned long n) @@ -27,7 +41,7 @@ copy_from_user_nmi(void *to, const void __user *from, unsigned long n) /* * Even though this function is typically called from NMI/IRQ context * disable pagefaults so that its behaviour is consistent even when - * called form other contexts. + * called from other contexts. */ pagefault_disable(); ret = __copy_from_user_inatomic(to, from, n); diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 82bf37a5c9ec..f1f1b5a0956a 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -30,6 +30,7 @@ #include <asm/cpu_entry_area.h> /* exception stack */ #include <asm/pgtable_areas.h> /* VMALLOC_START, ... */ #include <asm/kvm_para.h> /* kvm_handle_async_pf */ +#include <asm/vdso.h> /* fixup_vdso_exception() */ #define CREATE_TRACE_POINTS #include <asm/trace/exceptions.h> @@ -602,11 +603,9 @@ pgtable_bad(struct pt_regs *regs, unsigned long error_code, oops_end(flags, regs, sig); } -static void set_signal_archinfo(unsigned long address, - unsigned long error_code) +static void sanitize_error_code(unsigned long address, + unsigned long *error_code) { - struct task_struct *tsk = current; - /* * To avoid leaking information about the kernel page * table layout, pretend that user-mode accesses to @@ -617,7 +616,13 @@ static void set_signal_archinfo(unsigned long address, * information and does not appear to cause any problems. */ if (address >= TASK_SIZE_MAX) - error_code |= X86_PF_PROT; + *error_code |= X86_PF_PROT; +} + +static void set_signal_archinfo(unsigned long address, + unsigned long error_code) +{ + struct task_struct *tsk = current; tsk->thread.trap_nr = X86_TRAP_PF; tsk->thread.error_code = error_code | X86_PF_USER; @@ -658,6 +663,8 @@ no_context(struct pt_regs *regs, unsigned long error_code, * faulting through the emulate_vsyscall() logic. */ if (current->thread.sig_on_uaccess_err && signal) { + sanitize_error_code(address, &error_code); + set_signal_archinfo(address, error_code); /* XXX: hwpoison faults will set the wrong code. */ @@ -806,13 +813,10 @@ __bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code, if (is_errata100(regs, address)) return; - /* - * To avoid leaking information about the kernel page table - * layout, pretend that user-mode accesses to kernel addresses - * are always protection faults. - */ - if (address >= TASK_SIZE_MAX) - error_code |= X86_PF_PROT; + sanitize_error_code(address, &error_code); + + if (fixup_vdso_exception(regs, X86_TRAP_PF, error_code, address)) + return; if (likely(show_unhandled_signals)) show_signal_msg(regs, error_code, address, tsk); @@ -931,6 +935,11 @@ do_sigbus(struct pt_regs *regs, unsigned long error_code, unsigned long address, if (is_prefetch(regs, error_code, address)) return; + sanitize_error_code(address, &error_code); + + if (fixup_vdso_exception(regs, X86_TRAP_PF, error_code, address)) + return; + set_signal_archinfo(address, error_code); #ifdef CONFIG_MEMORY_FAILURE @@ -1102,6 +1111,18 @@ access_error(unsigned long error_code, struct vm_area_struct *vma) return 1; /* + * SGX hardware blocked the access. This usually happens + * when the enclave memory contents have been destroyed, like + * after a suspend/resume cycle. In any case, the kernel can't + * fix the cause of the fault. Handle the fault as an access + * error even in cases where no actual access violation + * occurred. This allows userspace to rebuild the enclave in + * response to the signal. + */ + if (unlikely(error_code & X86_PF_SGX)) + return 1; + + /* * Make sure to check the VMA so that we do not perform * faults just to hit a X86_PF_PK as soon as we fill in a * page. diff --git a/arch/x86/mm/ident_map.c b/arch/x86/mm/ident_map.c index fe7a12599d8e..968d7005f4a7 100644 --- a/arch/x86/mm/ident_map.c +++ b/arch/x86/mm/ident_map.c @@ -62,6 +62,7 @@ static int ident_p4d_init(struct x86_mapping_info *info, p4d_t *p4d_page, unsigned long addr, unsigned long end) { unsigned long next; + int result; for (; addr < end; addr = next) { p4d_t *p4d = p4d_page + p4d_index(addr); @@ -73,13 +74,20 @@ static int ident_p4d_init(struct x86_mapping_info *info, p4d_t *p4d_page, if (p4d_present(*p4d)) { pud = pud_offset(p4d, 0); - ident_pud_init(info, pud, addr, next); + result = ident_pud_init(info, pud, addr, next); + if (result) + return result; + continue; } pud = (pud_t *)info->alloc_pgt_page(info->context); if (!pud) return -ENOMEM; - ident_pud_init(info, pud, addr, next); + + result = ident_pud_init(info, pud, addr, next); + if (result) + return result; + set_p4d(p4d, __p4d(__pa(pud) | info->kernpg_flag)); } diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index c7a47603537f..e26f5c5c6565 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c @@ -596,7 +596,7 @@ static unsigned long __init get_new_step_size(unsigned long step_size) static void __init memory_map_top_down(unsigned long map_start, unsigned long map_end) { - unsigned long real_end, start, last_start; + unsigned long real_end, last_start; unsigned long step_size; unsigned long addr; unsigned long mapped_ram_size = 0; @@ -609,7 +609,7 @@ static void __init memory_map_top_down(unsigned long map_start, step_size = PMD_SIZE; max_pfn_mapped = 0; /* will get exact value next */ min_pfn_mapped = real_end >> PAGE_SHIFT; - last_start = start = real_end; + last_start = real_end; /* * We start from the top (end of memory) and go to the bottom. @@ -618,6 +618,8 @@ static void __init memory_map_top_down(unsigned long map_start, * for page table. */ while (last_start > map_start) { + unsigned long start; + if (last_start > step_size) { start = round_down(last_start - 1, step_size); if (start < map_start) diff --git a/arch/x86/mm/mem_encrypt.c b/arch/x86/mm/mem_encrypt.c index efbb3de472df..bc0833713be9 100644 --- a/arch/x86/mm/mem_encrypt.c +++ b/arch/x86/mm/mem_encrypt.c @@ -39,6 +39,7 @@ */ u64 sme_me_mask __section(".data") = 0; u64 sev_status __section(".data") = 0; +u64 sev_check_data __section(".data") = 0; EXPORT_SYMBOL(sme_me_mask); DEFINE_STATIC_KEY_FALSE(sev_enable_key); EXPORT_SYMBOL_GPL(sev_enable_key); diff --git a/arch/x86/mm/mem_encrypt_identity.c b/arch/x86/mm/mem_encrypt_identity.c index 733b983f3a89..6c5eb6f3f14f 100644 --- a/arch/x86/mm/mem_encrypt_identity.c +++ b/arch/x86/mm/mem_encrypt_identity.c @@ -45,8 +45,8 @@ #define PMD_FLAGS_LARGE (__PAGE_KERNEL_LARGE_EXEC & ~_PAGE_GLOBAL) #define PMD_FLAGS_DEC PMD_FLAGS_LARGE -#define PMD_FLAGS_DEC_WP ((PMD_FLAGS_DEC & ~_PAGE_CACHE_MASK) | \ - (_PAGE_PAT | _PAGE_PWT)) +#define PMD_FLAGS_DEC_WP ((PMD_FLAGS_DEC & ~_PAGE_LARGE_CACHE_MASK) | \ + (_PAGE_PAT_LARGE | _PAGE_PWT)) #define PMD_FLAGS_ENC (PMD_FLAGS_LARGE | _PAGE_ENC) diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c index 44148691d78b..5eb4dc2b97da 100644 --- a/arch/x86/mm/numa.c +++ b/arch/x86/mm/numa.c @@ -938,6 +938,7 @@ int phys_to_target_node(phys_addr_t start) return meminfo_to_nid(&numa_reserved_meminfo, start); } +EXPORT_SYMBOL_GPL(phys_to_target_node); int memory_add_physaddr_to_nid(u64 start) { @@ -947,4 +948,5 @@ int memory_add_physaddr_to_nid(u64 start) nid = numa_meminfo.blk[0].nid; return nid; } +EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid); #endif diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c index 11666ba19b62..569ac1d57f55 100644 --- a/arch/x86/mm/tlb.c +++ b/arch/x86/mm/tlb.c @@ -474,8 +474,14 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next, /* * The membarrier system call requires a full memory barrier and * core serialization before returning to user-space, after - * storing to rq->curr. Writing to CR3 provides that full - * memory barrier and core serializing instruction. + * storing to rq->curr, when changing mm. This is because + * membarrier() sends IPIs to all CPUs that are in the target mm + * to make them issue memory barriers. However, if another CPU + * switches to/from the target mm concurrently with + * membarrier(), it can cause that CPU not to receive an IPI + * when it really should issue a memory barrier. Writing to CR3 + * provides that full memory barrier and core serializing + * instruction. */ if (real_prev == next) { VM_WARN_ON(this_cpu_read(cpu_tlbstate.ctxs[prev_asid].ctx_id) != diff --git a/arch/x86/oprofile/backtrace.c b/arch/x86/oprofile/backtrace.c index a2488b6e27d6..1d8391fcca68 100644 --- a/arch/x86/oprofile/backtrace.c +++ b/arch/x86/oprofile/backtrace.c @@ -49,7 +49,7 @@ x86_backtrace_32(struct pt_regs * const regs, unsigned int depth) struct stack_frame_ia32 *head; /* User process is IA32 */ - if (!current || !test_thread_flag(TIF_IA32)) + if (!current || user_64bit_mode(regs)) return 0; head = (struct stack_frame_ia32 *) regs->bp; diff --git a/arch/x86/pci/i386.c b/arch/x86/pci/i386.c index fa855bbaebaf..f2f4a5d50b27 100644 --- a/arch/x86/pci/i386.c +++ b/arch/x86/pci/i386.c @@ -366,9 +366,9 @@ static int __init pcibios_assign_resources(void) return 0; } -/** - * called in fs_initcall (one below subsys_initcall), - * give a chance for motherboard reserve resources +/* + * This is an fs_initcall (one below subsys_initcall) in order to reserve + * resources properly. */ fs_initcall(pcibios_assign_resources); diff --git a/arch/x86/pci/mmconfig-shared.c b/arch/x86/pci/mmconfig-shared.c index 6fa42e9c4e6f..234998f196d4 100644 --- a/arch/x86/pci/mmconfig-shared.c +++ b/arch/x86/pci/mmconfig-shared.c @@ -425,7 +425,7 @@ static acpi_status find_mboard_resource(acpi_handle handle, u32 lvl, return AE_OK; } -static bool is_acpi_reserved(u64 start, u64 end, unsigned not_used) +static bool is_acpi_reserved(u64 start, u64 end, enum e820_type not_used) { struct resource mcfg_res; @@ -442,7 +442,7 @@ static bool is_acpi_reserved(u64 start, u64 end, unsigned not_used) return mcfg_res.flags; } -typedef bool (*check_reserved_t)(u64 start, u64 end, unsigned type); +typedef bool (*check_reserved_t)(u64 start, u64 end, enum e820_type type); static bool __ref is_mmconf_reserved(check_reserved_t is_reserved, struct pci_mmcfg_region *cfg, diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c index 8f5759df7776..e1e8d4e3a213 100644 --- a/arch/x86/platform/efi/efi_64.c +++ b/arch/x86/platform/efi/efi_64.c @@ -78,28 +78,30 @@ int __init efi_alloc_page_tables(void) gfp_mask = GFP_KERNEL | __GFP_ZERO; efi_pgd = (pgd_t *)__get_free_pages(gfp_mask, PGD_ALLOCATION_ORDER); if (!efi_pgd) - return -ENOMEM; + goto fail; pgd = efi_pgd + pgd_index(EFI_VA_END); p4d = p4d_alloc(&init_mm, pgd, EFI_VA_END); - if (!p4d) { - free_page((unsigned long)efi_pgd); - return -ENOMEM; - } + if (!p4d) + goto free_pgd; pud = pud_alloc(&init_mm, p4d, EFI_VA_END); - if (!pud) { - if (pgtable_l5_enabled()) - free_page((unsigned long) pgd_page_vaddr(*pgd)); - free_pages((unsigned long)efi_pgd, PGD_ALLOCATION_ORDER); - return -ENOMEM; - } + if (!pud) + goto free_p4d; efi_mm.pgd = efi_pgd; mm_init_cpumask(&efi_mm); init_new_context(NULL, &efi_mm); return 0; + +free_p4d: + if (pgtable_l5_enabled()) + free_page((unsigned long)pgd_page_vaddr(*pgd)); +free_pgd: + free_pages((unsigned long)efi_pgd, PGD_ALLOCATION_ORDER); +fail: + return -ENOMEM; } /* diff --git a/arch/x86/platform/uv/Makefile b/arch/x86/platform/uv/Makefile index 224ff0504890..1441dda8edf7 100644 --- a/arch/x86/platform/uv/Makefile +++ b/arch/x86/platform/uv/Makefile @@ -1,2 +1,2 @@ # SPDX-License-Identifier: GPL-2.0-only -obj-$(CONFIG_X86_UV) += bios_uv.o uv_irq.o uv_sysfs.o uv_time.o uv_nmi.o +obj-$(CONFIG_X86_UV) += bios_uv.o uv_irq.o uv_time.o uv_nmi.o diff --git a/arch/x86/platform/uv/bios_uv.c b/arch/x86/platform/uv/bios_uv.c index 54511eaccf4d..bf31af3d32d6 100644 --- a/arch/x86/platform/uv/bios_uv.c +++ b/arch/x86/platform/uv/bios_uv.c @@ -72,6 +72,7 @@ static s64 uv_bios_call_irqsave(enum uv_bios_cmd which, u64 a1, u64 a2, u64 a3, long sn_partition_id; EXPORT_SYMBOL_GPL(sn_partition_id); long sn_coherency_id; +EXPORT_SYMBOL_GPL(sn_coherency_id); long sn_region_size; EXPORT_SYMBOL_GPL(sn_region_size); long system_serial_number; @@ -171,6 +172,60 @@ int uv_bios_set_legacy_vga_target(bool decode, int domain, int bus) (u64)decode, (u64)domain, (u64)bus, 0, 0); } +extern s64 uv_bios_get_master_nasid(u64 size, u64 *master_nasid) +{ + return uv_bios_call(UV_BIOS_EXTRA, 0, UV_BIOS_EXTRA_MASTER_NASID, 0, + size, (u64)master_nasid); +} +EXPORT_SYMBOL_GPL(uv_bios_get_master_nasid); + +extern s64 uv_bios_get_heapsize(u64 nasid, u64 size, u64 *heap_size) +{ + return uv_bios_call(UV_BIOS_EXTRA, nasid, UV_BIOS_EXTRA_GET_HEAPSIZE, + 0, size, (u64)heap_size); +} +EXPORT_SYMBOL_GPL(uv_bios_get_heapsize); + +extern s64 uv_bios_install_heap(u64 nasid, u64 heap_size, u64 *bios_heap) +{ + return uv_bios_call(UV_BIOS_EXTRA, nasid, UV_BIOS_EXTRA_INSTALL_HEAP, + 0, heap_size, (u64)bios_heap); +} +EXPORT_SYMBOL_GPL(uv_bios_install_heap); + +extern s64 uv_bios_obj_count(u64 nasid, u64 size, u64 *objcnt) +{ + return uv_bios_call(UV_BIOS_EXTRA, nasid, UV_BIOS_EXTRA_OBJECT_COUNT, + 0, size, (u64)objcnt); +} +EXPORT_SYMBOL_GPL(uv_bios_obj_count); + +extern s64 uv_bios_enum_objs(u64 nasid, u64 size, u64 *objbuf) +{ + return uv_bios_call(UV_BIOS_EXTRA, nasid, UV_BIOS_EXTRA_ENUM_OBJECTS, + 0, size, (u64)objbuf); +} +EXPORT_SYMBOL_GPL(uv_bios_enum_objs); + +extern s64 uv_bios_enum_ports(u64 nasid, u64 obj_id, u64 size, u64 *portbuf) +{ + return uv_bios_call(UV_BIOS_EXTRA, nasid, UV_BIOS_EXTRA_ENUM_PORTS, + obj_id, size, (u64)portbuf); +} +EXPORT_SYMBOL_GPL(uv_bios_enum_ports); + +extern s64 uv_bios_get_geoinfo(u64 nasid, u64 size, u64 *buf) +{ + return uv_bios_call(UV_BIOS_GET_GEOINFO, nasid, (u64)buf, size, 0, 0); +} +EXPORT_SYMBOL_GPL(uv_bios_get_geoinfo); + +extern s64 uv_bios_get_pci_topology(u64 size, u64 *buf) +{ + return uv_bios_call(UV_BIOS_GET_PCI_TOPOLOGY, (u64)buf, size, 0, 0, 0); +} +EXPORT_SYMBOL_GPL(uv_bios_get_pci_topology); + unsigned long get_uv_systab_phys(bool msg) { if ((uv_systab_phys == EFI_INVALID_TABLE_ADDR) || diff --git a/arch/x86/platform/uv/uv_sysfs.c b/arch/x86/platform/uv/uv_sysfs.c deleted file mode 100644 index 266773e2fb37..000000000000 --- a/arch/x86/platform/uv/uv_sysfs.c +++ /dev/null @@ -1,63 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * This file supports the /sys/firmware/sgi_uv interfaces for SGI UV. - * - * Copyright (c) 2008 Silicon Graphics, Inc. All Rights Reserved. - * Copyright (c) Russ Anderson - */ - -#include <linux/device.h> -#include <asm/uv/bios.h> -#include <asm/uv/uv.h> - -struct kobject *sgi_uv_kobj; - -static ssize_t partition_id_show(struct kobject *kobj, - struct kobj_attribute *attr, char *buf) -{ - return snprintf(buf, PAGE_SIZE, "%ld\n", sn_partition_id); -} - -static ssize_t coherence_id_show(struct kobject *kobj, - struct kobj_attribute *attr, char *buf) -{ - return snprintf(buf, PAGE_SIZE, "%ld\n", sn_coherency_id); -} - -static struct kobj_attribute partition_id_attr = - __ATTR(partition_id, S_IRUGO, partition_id_show, NULL); - -static struct kobj_attribute coherence_id_attr = - __ATTR(coherence_id, S_IRUGO, coherence_id_show, NULL); - - -static int __init sgi_uv_sysfs_init(void) -{ - unsigned long ret; - - if (!is_uv_system()) - return -ENODEV; - - if (!sgi_uv_kobj) - sgi_uv_kobj = kobject_create_and_add("sgi_uv", firmware_kobj); - if (!sgi_uv_kobj) { - printk(KERN_WARNING "kobject_create_and_add sgi_uv failed\n"); - return -EINVAL; - } - - ret = sysfs_create_file(sgi_uv_kobj, &partition_id_attr.attr); - if (ret) { - printk(KERN_WARNING "sysfs_create_file partition_id failed\n"); - return ret; - } - - ret = sysfs_create_file(sgi_uv_kobj, &coherence_id_attr.attr); - if (ret) { - printk(KERN_WARNING "sysfs_create_file coherence_id failed\n"); - return ret; - } - - return 0; -} - -device_initcall(sgi_uv_sysfs_init); diff --git a/arch/x86/purgatory/purgatory.c b/arch/x86/purgatory/purgatory.c index 7b37a412f829..f03b64d9cb51 100644 --- a/arch/x86/purgatory/purgatory.c +++ b/arch/x86/purgatory/purgatory.c @@ -9,7 +9,7 @@ */ #include <linux/bug.h> -#include <crypto/sha.h> +#include <crypto/sha2.h> #include <asm/purgatory.h> #include "../boot/string.h" diff --git a/arch/x86/um/stub_segv.c b/arch/x86/um/stub_segv.c index fdcd58af707a..27361cbb7ca9 100644 --- a/arch/x86/um/stub_segv.c +++ b/arch/x86/um/stub_segv.c @@ -8,7 +8,7 @@ #include <sysdep/mcontext.h> #include <sys/ucontext.h> -void __section(".__syscall_stub") +void __attribute__ ((__section__ (".__syscall_stub"))) stub_segv_handler(int sig, siginfo_t *info, void *p) { ucontext_t *uc = p; diff --git a/arch/x86/xen/spinlock.c b/arch/x86/xen/spinlock.c index 799f4eba0a62..043c73dfd2c9 100644 --- a/arch/x86/xen/spinlock.c +++ b/arch/x86/xen/spinlock.c @@ -93,10 +93,20 @@ void xen_init_lock_cpu(int cpu) void xen_uninit_lock_cpu(int cpu) { + int irq; + if (!xen_pvspin) return; - unbind_from_irqhandler(per_cpu(lock_kicker_irq, cpu), NULL); + /* + * When booting the kernel with 'mitigations=auto,nosmt', the secondary + * CPUs are not activated, and lock_kicker_irq is not initialized. + */ + irq = per_cpu(lock_kicker_irq, cpu); + if (irq == -1) + return; + + unbind_from_irqhandler(irq, NULL); per_cpu(lock_kicker_irq, cpu) = -1; kfree(per_cpu(irq_name, cpu)); per_cpu(irq_name, cpu) = NULL; diff --git a/arch/xtensa/include/asm/pgtable.h b/arch/xtensa/include/asm/pgtable.h index fa054a1772e1..4dc04e6c01d7 100644 --- a/arch/xtensa/include/asm/pgtable.h +++ b/arch/xtensa/include/asm/pgtable.h @@ -69,7 +69,7 @@ */ #define VMALLOC_START (XCHAL_KSEG_CACHED_VADDR - 0x10000000) #define VMALLOC_END (VMALLOC_START + 0x07FEFFFF) -#define TLBTEMP_BASE_1 (VMALLOC_END + 1) +#define TLBTEMP_BASE_1 (VMALLOC_START + 0x08000000) #define TLBTEMP_BASE_2 (TLBTEMP_BASE_1 + DCACHE_WAY_SIZE) #if 2 * DCACHE_WAY_SIZE > ICACHE_WAY_SIZE #define TLBTEMP_SIZE (2 * DCACHE_WAY_SIZE) diff --git a/arch/xtensa/include/asm/uaccess.h b/arch/xtensa/include/asm/uaccess.h index b9758119feca..5c9fb8005aa8 100644 --- a/arch/xtensa/include/asm/uaccess.h +++ b/arch/xtensa/include/asm/uaccess.h @@ -302,7 +302,7 @@ strncpy_from_user(char *dst, const char __user *src, long count) return -EFAULT; } #else -long strncpy_from_user(char *dst, const char *src, long count); +long strncpy_from_user(char *dst, const char __user *src, long count); #endif /* diff --git a/arch/xtensa/mm/cache.c b/arch/xtensa/mm/cache.c index 5835406b3cec..085b8c77b9d9 100644 --- a/arch/xtensa/mm/cache.c +++ b/arch/xtensa/mm/cache.c @@ -70,8 +70,10 @@ static inline void kmap_invalidate_coherent(struct page *page, kvaddr = TLBTEMP_BASE_1 + (page_to_phys(page) & DCACHE_ALIAS_MASK); + preempt_disable(); __invalidate_dcache_page_alias(kvaddr, page_to_phys(page)); + preempt_enable(); } } } @@ -156,6 +158,7 @@ void flush_dcache_page(struct page *page) if (!alias && !mapping) return; + preempt_disable(); virt = TLBTEMP_BASE_1 + (phys & DCACHE_ALIAS_MASK); __flush_invalidate_dcache_page_alias(virt, phys); @@ -166,6 +169,7 @@ void flush_dcache_page(struct page *page) if (mapping) __invalidate_icache_page_alias(virt, phys); + preempt_enable(); } /* There shouldn't be an entry in the cache for this page anymore. */ @@ -199,8 +203,10 @@ void local_flush_cache_page(struct vm_area_struct *vma, unsigned long address, unsigned long phys = page_to_phys(pfn_to_page(pfn)); unsigned long virt = TLBTEMP_BASE_1 + (address & DCACHE_ALIAS_MASK); + preempt_disable(); __flush_invalidate_dcache_page_alias(virt, phys); __invalidate_icache_page_alias(virt, phys); + preempt_enable(); } EXPORT_SYMBOL(local_flush_cache_page); @@ -227,11 +233,13 @@ update_mmu_cache(struct vm_area_struct * vma, unsigned long addr, pte_t *ptep) unsigned long phys = page_to_phys(page); unsigned long tmp; + preempt_disable(); tmp = TLBTEMP_BASE_1 + (phys & DCACHE_ALIAS_MASK); __flush_invalidate_dcache_page_alias(tmp, phys); tmp = TLBTEMP_BASE_1 + (addr & DCACHE_ALIAS_MASK); __flush_invalidate_dcache_page_alias(tmp, phys); __invalidate_icache_page_alias(tmp, phys); + preempt_enable(); clear_bit(PG_arch_1, &page->flags); } @@ -265,7 +273,9 @@ void copy_to_user_page(struct vm_area_struct *vma, struct page *page, if (alias) { unsigned long t = TLBTEMP_BASE_1 + (vaddr & DCACHE_ALIAS_MASK); + preempt_disable(); __flush_invalidate_dcache_page_alias(t, phys); + preempt_enable(); } /* Copy data */ @@ -280,9 +290,11 @@ void copy_to_user_page(struct vm_area_struct *vma, struct page *page, if (alias) { unsigned long t = TLBTEMP_BASE_1 + (vaddr & DCACHE_ALIAS_MASK); + preempt_disable(); __flush_invalidate_dcache_range((unsigned long) dst, len); if ((vma->vm_flags & VM_EXEC) != 0) __invalidate_icache_page_alias(t, phys); + preempt_enable(); } else if ((vma->vm_flags & VM_EXEC) != 0) { __flush_dcache_range((unsigned long)dst,len); @@ -304,7 +316,9 @@ extern void copy_from_user_page(struct vm_area_struct *vma, struct page *page, if (alias) { unsigned long t = TLBTEMP_BASE_1 + (vaddr & DCACHE_ALIAS_MASK); + preempt_disable(); __flush_invalidate_dcache_page_alias(t, phys); + preempt_enable(); } memcpy(dst, src, len); diff --git a/arch/xtensa/mm/init.c b/arch/xtensa/mm/init.c index c6fc83efee0c..8731b7ad9308 100644 --- a/arch/xtensa/mm/init.c +++ b/arch/xtensa/mm/init.c @@ -89,8 +89,8 @@ static void __init free_highpages(void) /* set highmem page free */ for_each_free_mem_range(i, NUMA_NO_NODE, MEMBLOCK_NONE, &range_start, &range_end, NULL) { - unsigned long start = PHYS_PFN(range_start); - unsigned long end = PHYS_PFN(range_end); + unsigned long start = PFN_UP(range_start); + unsigned long end = PFN_DOWN(range_end); /* Ignore complete lowmem entries */ if (end <= max_low) |