diff options
267 files changed, 4323 insertions, 3256 deletions
@@ -82,7 +82,10 @@ Dengcheng Zhu <dzhu@wavecomp.com> <dengcheng.zhu@gmail.com> Dengcheng Zhu <dzhu@wavecomp.com> <dengcheng.zhu@imgtec.com> Dengcheng Zhu <dzhu@wavecomp.com> <dengcheng.zhu@mips.com> <dev.kurt@vandijck-laurijssen.be> <kurt.van.dijck@eia.be> -Dmitry Eremin-Solenikov <dbaryshkov@gmail.com> +Dmitry Baryshkov <dbaryshkov@gmail.com> +Dmitry Baryshkov <dbaryshkov@gmail.com> <[dbaryshkov@gmail.com]> +Dmitry Baryshkov <dbaryshkov@gmail.com> <dmitry_baryshkov@mentor.com> +Dmitry Baryshkov <dbaryshkov@gmail.com> <dmitry_eremin@mentor.com> Dmitry Safonov <0x7f454c46@gmail.com> <dima@arista.com> Dmitry Safonov <0x7f454c46@gmail.com> <d.safonov@partner.samsung.com> Dmitry Safonov <0x7f454c46@gmail.com> <dsafonov@virtuozzo.com> diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 526d65d8573a..ee9f13776388 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -2259,6 +2259,16 @@ for all guests. Default is 1 (enabled) if in 64-bit or 32-bit PAE mode. + kvm-arm.mode= + [KVM,ARM] Select one of KVM/arm64's modes of operation. + + protected: nVHE-based mode with support for guests whose + state is kept private from the host. + Not valid if the kernel is running in EL2. + + Defaults to VHE/nVHE based on hardware support and + the value of CONFIG_ARM64_VHE. + kvm-arm.vgic_v3_group0_trap= [KVM,ARM] Trap guest accesses to GICv3 group-0 system registers diff --git a/Documentation/arm64/memory.rst b/Documentation/arm64/memory.rst index cf03b3290800..75df7fb30a7b 100644 --- a/Documentation/arm64/memory.rst +++ b/Documentation/arm64/memory.rst @@ -100,7 +100,7 @@ hypervisor maps kernel pages in EL2 at a fixed (and potentially random) offset from the linear mapping. See the kern_hyp_va macro and kvm_update_va_mask function for more details. MMIO devices such as GICv2 gets mapped next to the HYP idmap page, as do vectors when -ARM64_HARDEN_EL2_VECTORS is selected for particular CPUs. +ARM64_SPECTRE_V3A is enabled for particular CPUs. When using KVM with the Virtualization Host Extensions, no additional mappings are created, since the host kernel runs directly in EL2. diff --git a/Documentation/devicetree/bindings/clock/imx5-clock.yaml b/Documentation/devicetree/bindings/clock/imx5-clock.yaml index 4d9e7c73dce9..90775c2669b8 100644 --- a/Documentation/devicetree/bindings/clock/imx5-clock.yaml +++ b/Documentation/devicetree/bindings/clock/imx5-clock.yaml @@ -57,7 +57,7 @@ examples: }; can@53fc8000 { - compatible = "fsl,imx53-flexcan", "fsl,p1010-flexcan"; + compatible = "fsl,imx53-flexcan", "fsl,imx25-flexcan"; reg = <0x53fc8000 0x4000>; interrupts = <82>; clocks = <&clks IMX5_CLK_CAN1_IPG_GATE>, <&clks IMX5_CLK_CAN1_SERIAL_GATE>; diff --git a/Documentation/devicetree/bindings/net/can/fsl,flexcan.yaml b/Documentation/devicetree/bindings/net/can/fsl,flexcan.yaml index 43df15ba8fa4..13875eab2ed6 100644 --- a/Documentation/devicetree/bindings/net/can/fsl,flexcan.yaml +++ b/Documentation/devicetree/bindings/net/can/fsl,flexcan.yaml @@ -20,8 +20,6 @@ properties: - fsl,imx8qm-flexcan - fsl,imx8mp-flexcan - fsl,imx6q-flexcan - - fsl,imx53-flexcan - - fsl,imx35-flexcan - fsl,imx28-flexcan - fsl,imx25-flexcan - fsl,p1010-flexcan @@ -30,6 +28,11 @@ properties: - fsl,lx2160ar1-flexcan - items: - enum: + - fsl,imx53-flexcan + - fsl,imx35-flexcan + - const: fsl,imx25-flexcan + - items: + - enum: - fsl,imx7d-flexcan - fsl,imx6ul-flexcan - fsl,imx6sx-flexcan @@ -81,11 +84,12 @@ properties: req_bit is the bit offset of CAN stop request. $ref: /schemas/types.yaml#/definitions/phandle-array items: - - description: The 'gpr' is the phandle to general purpose register node. - - description: The 'req_gpr' is the gpr register offset of CAN stop request. - maximum: 0xff - - description: The 'req_bit' is the bit offset of CAN stop request. - maximum: 0x1f + items: + - description: The 'gpr' is the phandle to general purpose register node. + - description: The 'req_gpr' is the gpr register offset of CAN stop request. + maximum: 0xff + - description: The 'req_bit' is the bit offset of CAN stop request. + maximum: 0x1f fsl,clk-source: description: | diff --git a/Documentation/virt/kvm/arm/pvtime.rst b/Documentation/virt/kvm/arm/pvtime.rst index 687b60d76ca9..392521af7c90 100644 --- a/Documentation/virt/kvm/arm/pvtime.rst +++ b/Documentation/virt/kvm/arm/pvtime.rst @@ -19,8 +19,8 @@ Two new SMCCC compatible hypercalls are defined: These are only available in the SMC64/HVC64 calling convention as paravirtualized time is not available to 32 bit Arm guests. The existence of -the PV_FEATURES hypercall should be probed using the SMCCC 1.1 ARCH_FEATURES -mechanism before calling it. +the PV_TIME_FEATURES hypercall should be probed using the SMCCC 1.1 +ARCH_FEATURES mechanism before calling it. PV_TIME_FEATURES ============= ======== ========== diff --git a/MAINTAINERS b/MAINTAINERS index 4a34b25ecc1f..e451dcce054f 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -18173,6 +18173,14 @@ L: linux-usb@vger.kernel.org S: Supported F: drivers/usb/class/usblp.c +USB RAW GADGET DRIVER +R: Andrey Konovalov <andreyknvl@gmail.com> +L: linux-usb@vger.kernel.org +S: Maintained +F: Documentation/usb/raw-gadget.rst +F: drivers/usb/gadget/legacy/raw_gadget.c +F: include/uapi/linux/usb/raw_gadget.h + USB QMI WWAN NETWORK DRIVER M: Bjørn Mork <bjorn@mork.no> L: netdev@vger.kernel.org @@ -2,7 +2,7 @@ VERSION = 5 PATCHLEVEL = 10 SUBLEVEL = 0 -EXTRAVERSION = -rc3 +EXTRAVERSION = -rc4 NAME = Kleptomaniac Octopus # *DOCUMENTATION* diff --git a/arch/arm/include/asm/kprobes.h b/arch/arm/include/asm/kprobes.h index 213607a1f45c..e26a278d301a 100644 --- a/arch/arm/include/asm/kprobes.h +++ b/arch/arm/include/asm/kprobes.h @@ -44,20 +44,20 @@ int kprobe_exceptions_notify(struct notifier_block *self, unsigned long val, void *data); /* optinsn template addresses */ -extern __visible kprobe_opcode_t optprobe_template_entry; -extern __visible kprobe_opcode_t optprobe_template_val; -extern __visible kprobe_opcode_t optprobe_template_call; -extern __visible kprobe_opcode_t optprobe_template_end; -extern __visible kprobe_opcode_t optprobe_template_sub_sp; -extern __visible kprobe_opcode_t optprobe_template_add_sp; -extern __visible kprobe_opcode_t optprobe_template_restore_begin; -extern __visible kprobe_opcode_t optprobe_template_restore_orig_insn; -extern __visible kprobe_opcode_t optprobe_template_restore_end; +extern __visible kprobe_opcode_t optprobe_template_entry[]; +extern __visible kprobe_opcode_t optprobe_template_val[]; +extern __visible kprobe_opcode_t optprobe_template_call[]; +extern __visible kprobe_opcode_t optprobe_template_end[]; +extern __visible kprobe_opcode_t optprobe_template_sub_sp[]; +extern __visible kprobe_opcode_t optprobe_template_add_sp[]; +extern __visible kprobe_opcode_t optprobe_template_restore_begin[]; +extern __visible kprobe_opcode_t optprobe_template_restore_orig_insn[]; +extern __visible kprobe_opcode_t optprobe_template_restore_end[]; #define MAX_OPTIMIZED_LENGTH 4 #define MAX_OPTINSN_SIZE \ - ((unsigned long)&optprobe_template_end - \ - (unsigned long)&optprobe_template_entry) + ((unsigned long)optprobe_template_end - \ + (unsigned long)optprobe_template_entry) #define RELATIVEJUMP_SIZE 4 struct arch_optimized_insn { diff --git a/arch/arm/kernel/perf_regs.c b/arch/arm/kernel/perf_regs.c index 05fe92aa7d98..0529f90395c9 100644 --- a/arch/arm/kernel/perf_regs.c +++ b/arch/arm/kernel/perf_regs.c @@ -32,8 +32,7 @@ u64 perf_reg_abi(struct task_struct *task) } void perf_get_regs_user(struct perf_regs *regs_user, - struct pt_regs *regs, - struct pt_regs *regs_user_copy) + struct pt_regs *regs) { regs_user->regs = task_pt_regs(current); regs_user->abi = perf_reg_abi(current); diff --git a/arch/arm/probes/kprobes/opt-arm.c b/arch/arm/probes/kprobes/opt-arm.c index 7a449df0b359..c78180172120 100644 --- a/arch/arm/probes/kprobes/opt-arm.c +++ b/arch/arm/probes/kprobes/opt-arm.c @@ -85,21 +85,21 @@ asm ( "optprobe_template_end:\n"); #define TMPL_VAL_IDX \ - ((unsigned long *)&optprobe_template_val - (unsigned long *)&optprobe_template_entry) + ((unsigned long *)optprobe_template_val - (unsigned long *)optprobe_template_entry) #define TMPL_CALL_IDX \ - ((unsigned long *)&optprobe_template_call - (unsigned long *)&optprobe_template_entry) + ((unsigned long *)optprobe_template_call - (unsigned long *)optprobe_template_entry) #define TMPL_END_IDX \ - ((unsigned long *)&optprobe_template_end - (unsigned long *)&optprobe_template_entry) + ((unsigned long *)optprobe_template_end - (unsigned long *)optprobe_template_entry) #define TMPL_ADD_SP \ - ((unsigned long *)&optprobe_template_add_sp - (unsigned long *)&optprobe_template_entry) + ((unsigned long *)optprobe_template_add_sp - (unsigned long *)optprobe_template_entry) #define TMPL_SUB_SP \ - ((unsigned long *)&optprobe_template_sub_sp - (unsigned long *)&optprobe_template_entry) + ((unsigned long *)optprobe_template_sub_sp - (unsigned long *)optprobe_template_entry) #define TMPL_RESTORE_BEGIN \ - ((unsigned long *)&optprobe_template_restore_begin - (unsigned long *)&optprobe_template_entry) + ((unsigned long *)optprobe_template_restore_begin - (unsigned long *)optprobe_template_entry) #define TMPL_RESTORE_ORIGN_INSN \ - ((unsigned long *)&optprobe_template_restore_orig_insn - (unsigned long *)&optprobe_template_entry) + ((unsigned long *)optprobe_template_restore_orig_insn - (unsigned long *)optprobe_template_entry) #define TMPL_RESTORE_END \ - ((unsigned long *)&optprobe_template_restore_end - (unsigned long *)&optprobe_template_entry) + ((unsigned long *)optprobe_template_restore_end - (unsigned long *)optprobe_template_entry) /* * ARM can always optimize an instruction when using ARM ISA, except @@ -234,7 +234,7 @@ int arch_prepare_optimized_kprobe(struct optimized_kprobe *op, struct kprobe *or } /* Copy arch-dep-instance from template. */ - memcpy(code, (unsigned long *)&optprobe_template_entry, + memcpy(code, (unsigned long *)optprobe_template_entry, TMPL_END_IDX * sizeof(kprobe_opcode_t)); /* Adjust buffer according to instruction. */ diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 1515f6f153a0..8d13b9135634 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -195,7 +195,6 @@ config ARM64 select PCI_SYSCALL if PCI select POWER_RESET select POWER_SUPPLY - select SET_FS select SPARSE_IRQ select SWIOTLB select SYSCTL_EXCEPTION_TRACE @@ -1388,6 +1387,9 @@ config ARM64_PAN The feature is detected at runtime, and will remain as a 'nop' instruction if the cpu does not implement the feature. +config AS_HAS_LDAPR + def_bool $(as-instr,.arch_extension rcpc) + config ARM64_LSE_ATOMICS bool default ARM64_USE_LSE_ATOMICS @@ -1425,27 +1427,6 @@ endmenu menu "ARMv8.2 architectural features" -config ARM64_UAO - bool "Enable support for User Access Override (UAO)" - default y - help - User Access Override (UAO; part of the ARMv8.2 Extensions) - causes the 'unprivileged' variant of the load/store instructions to - be overridden to be privileged. - - This option changes get_user() and friends to use the 'unprivileged' - variant of the load/store instructions. This ensures that user-space - really did have access to the supplied memory. When addr_limit is - set to kernel memory the UAO bit will be set, allowing privileged - access to kernel memory. - - Choosing this option will cause copy_to_user() et al to use user-space - memory permissions. - - The feature is detected at runtime, the kernel will use the - regular load/store instructions if the cpu does not implement the - feature. - config ARM64_PMEM bool "Enable support for persistent memory" select ARCH_HAS_PMEM_API diff --git a/arch/arm64/include/asm/alternative-macros.h b/arch/arm64/include/asm/alternative-macros.h new file mode 100644 index 000000000000..5df500dcc627 --- /dev/null +++ b/arch/arm64/include/asm/alternative-macros.h @@ -0,0 +1,217 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __ASM_ALTERNATIVE_MACROS_H +#define __ASM_ALTERNATIVE_MACROS_H + +#include <asm/cpucaps.h> + +#define ARM64_CB_PATCH ARM64_NCAPS + +/* A64 instructions are always 32 bits. */ +#define AARCH64_INSN_SIZE 4 + +#ifndef __ASSEMBLY__ + +#include <linux/stringify.h> + +#define ALTINSTR_ENTRY(feature) \ + " .word 661b - .\n" /* label */ \ + " .word 663f - .\n" /* new instruction */ \ + " .hword " __stringify(feature) "\n" /* feature bit */ \ + " .byte 662b-661b\n" /* source len */ \ + " .byte 664f-663f\n" /* replacement len */ + +#define ALTINSTR_ENTRY_CB(feature, cb) \ + " .word 661b - .\n" /* label */ \ + " .word " __stringify(cb) "- .\n" /* callback */ \ + " .hword " __stringify(feature) "\n" /* feature bit */ \ + " .byte 662b-661b\n" /* source len */ \ + " .byte 664f-663f\n" /* replacement len */ + +/* + * alternative assembly primitive: + * + * If any of these .org directive fail, it means that insn1 and insn2 + * don't have the same length. This used to be written as + * + * .if ((664b-663b) != (662b-661b)) + * .error "Alternatives instruction length mismatch" + * .endif + * + * but most assemblers die if insn1 or insn2 have a .inst. This should + * be fixed in a binutils release posterior to 2.25.51.0.2 (anything + * containing commit 4e4d08cf7399b606 or c1baaddf8861). + * + * Alternatives with callbacks do not generate replacement instructions. + */ +#define __ALTERNATIVE_CFG(oldinstr, newinstr, feature, cfg_enabled) \ + ".if "__stringify(cfg_enabled)" == 1\n" \ + "661:\n\t" \ + oldinstr "\n" \ + "662:\n" \ + ".pushsection .altinstructions,\"a\"\n" \ + ALTINSTR_ENTRY(feature) \ + ".popsection\n" \ + ".subsection 1\n" \ + "663:\n\t" \ + newinstr "\n" \ + "664:\n\t" \ + ".org . - (664b-663b) + (662b-661b)\n\t" \ + ".org . - (662b-661b) + (664b-663b)\n\t" \ + ".previous\n" \ + ".endif\n" + +#define __ALTERNATIVE_CFG_CB(oldinstr, feature, cfg_enabled, cb) \ + ".if "__stringify(cfg_enabled)" == 1\n" \ + "661:\n\t" \ + oldinstr "\n" \ + "662:\n" \ + ".pushsection .altinstructions,\"a\"\n" \ + ALTINSTR_ENTRY_CB(feature, cb) \ + ".popsection\n" \ + "663:\n\t" \ + "664:\n\t" \ + ".endif\n" + +#define _ALTERNATIVE_CFG(oldinstr, newinstr, feature, cfg, ...) \ + __ALTERNATIVE_CFG(oldinstr, newinstr, feature, IS_ENABLED(cfg)) + +#define ALTERNATIVE_CB(oldinstr, cb) \ + __ALTERNATIVE_CFG_CB(oldinstr, ARM64_CB_PATCH, 1, cb) +#else + +#include <asm/assembler.h> + +.macro altinstruction_entry orig_offset alt_offset feature orig_len alt_len + .word \orig_offset - . + .word \alt_offset - . + .hword \feature + .byte \orig_len + .byte \alt_len +.endm + +.macro alternative_insn insn1, insn2, cap, enable = 1 + .if \enable +661: \insn1 +662: .pushsection .altinstructions, "a" + altinstruction_entry 661b, 663f, \cap, 662b-661b, 664f-663f + .popsection + .subsection 1 +663: \insn2 +664: .previous + .org . - (664b-663b) + (662b-661b) + .org . - (662b-661b) + (664b-663b) + .endif +.endm + +/* + * Alternative sequences + * + * The code for the case where the capability is not present will be + * assembled and linked as normal. There are no restrictions on this + * code. + * + * The code for the case where the capability is present will be + * assembled into a special section to be used for dynamic patching. + * Code for that case must: + * + * 1. Be exactly the same length (in bytes) as the default code + * sequence. + * + * 2. Not contain a branch target that is used outside of the + * alternative sequence it is defined in (branches into an + * alternative sequence are not fixed up). + */ + +/* + * Begin an alternative code sequence. + */ +.macro alternative_if_not cap + .set .Lasm_alt_mode, 0 + .pushsection .altinstructions, "a" + altinstruction_entry 661f, 663f, \cap, 662f-661f, 664f-663f + .popsection +661: +.endm + +.macro alternative_if cap + .set .Lasm_alt_mode, 1 + .pushsection .altinstructions, "a" + altinstruction_entry 663f, 661f, \cap, 664f-663f, 662f-661f + .popsection + .subsection 1 + .align 2 /* So GAS knows label 661 is suitably aligned */ +661: +.endm + +.macro alternative_cb cb + .set .Lasm_alt_mode, 0 + .pushsection .altinstructions, "a" + altinstruction_entry 661f, \cb, ARM64_CB_PATCH, 662f-661f, 0 + .popsection +661: +.endm + +/* + * Provide the other half of the alternative code sequence. + */ +.macro alternative_else +662: + .if .Lasm_alt_mode==0 + .subsection 1 + .else + .previous + .endif +663: +.endm + +/* + * Complete an alternative code sequence. + */ +.macro alternative_endif +664: + .if .Lasm_alt_mode==0 + .previous + .endif + .org . - (664b-663b) + (662b-661b) + .org . - (662b-661b) + (664b-663b) +.endm + +/* + * Callback-based alternative epilogue + */ +.macro alternative_cb_end +662: +.endm + +/* + * Provides a trivial alternative or default sequence consisting solely + * of NOPs. The number of NOPs is chosen automatically to match the + * previous case. + */ +.macro alternative_else_nop_endif +alternative_else + nops (662b-661b) / AARCH64_INSN_SIZE +alternative_endif +.endm + +#define _ALTERNATIVE_CFG(insn1, insn2, cap, cfg, ...) \ + alternative_insn insn1, insn2, cap, IS_ENABLED(cfg) + +.macro user_alt, label, oldinstr, newinstr, cond +9999: alternative_insn "\oldinstr", "\newinstr", \cond + _asm_extable 9999b, \label +.endm + +#endif /* __ASSEMBLY__ */ + +/* + * Usage: asm(ALTERNATIVE(oldinstr, newinstr, feature)); + * + * Usage: asm(ALTERNATIVE(oldinstr, newinstr, feature, CONFIG_FOO)); + * N.B. If CONFIG_FOO is specified, but not selected, the whole block + * will be omitted, including oldinstr. + */ +#define ALTERNATIVE(oldinstr, newinstr, ...) \ + _ALTERNATIVE_CFG(oldinstr, newinstr, __VA_ARGS__, 1) + +#endif /* __ASM_ALTERNATIVE_MACROS_H */ diff --git a/arch/arm64/include/asm/alternative.h b/arch/arm64/include/asm/alternative.h index 619db9b4c9d5..a38b92e11811 100644 --- a/arch/arm64/include/asm/alternative.h +++ b/arch/arm64/include/asm/alternative.h @@ -2,17 +2,13 @@ #ifndef __ASM_ALTERNATIVE_H #define __ASM_ALTERNATIVE_H -#include <asm/cpucaps.h> -#include <asm/insn.h> - -#define ARM64_CB_PATCH ARM64_NCAPS +#include <asm/alternative-macros.h> #ifndef __ASSEMBLY__ #include <linux/init.h> #include <linux/types.h> #include <linux/stddef.h> -#include <linux/stringify.h> struct alt_instr { s32 orig_offset; /* offset to original instruction */ @@ -35,264 +31,5 @@ void apply_alternatives_module(void *start, size_t length); static inline void apply_alternatives_module(void *start, size_t length) { } #endif -#define ALTINSTR_ENTRY(feature) \ - " .word 661b - .\n" /* label */ \ - " .word 663f - .\n" /* new instruction */ \ - " .hword " __stringify(feature) "\n" /* feature bit */ \ - " .byte 662b-661b\n" /* source len */ \ - " .byte 664f-663f\n" /* replacement len */ - -#define ALTINSTR_ENTRY_CB(feature, cb) \ - " .word 661b - .\n" /* label */ \ - " .word " __stringify(cb) "- .\n" /* callback */ \ - " .hword " __stringify(feature) "\n" /* feature bit */ \ - " .byte 662b-661b\n" /* source len */ \ - " .byte 664f-663f\n" /* replacement len */ - -/* - * alternative assembly primitive: - * - * If any of these .org directive fail, it means that insn1 and insn2 - * don't have the same length. This used to be written as - * - * .if ((664b-663b) != (662b-661b)) - * .error "Alternatives instruction length mismatch" - * .endif - * - * but most assemblers die if insn1 or insn2 have a .inst. This should - * be fixed in a binutils release posterior to 2.25.51.0.2 (anything - * containing commit 4e4d08cf7399b606 or c1baaddf8861). - * - * Alternatives with callbacks do not generate replacement instructions. - */ -#define __ALTERNATIVE_CFG(oldinstr, newinstr, feature, cfg_enabled) \ - ".if "__stringify(cfg_enabled)" == 1\n" \ - "661:\n\t" \ - oldinstr "\n" \ - "662:\n" \ - ".pushsection .altinstructions,\"a\"\n" \ - ALTINSTR_ENTRY(feature) \ - ".popsection\n" \ - ".subsection 1\n" \ - "663:\n\t" \ - newinstr "\n" \ - "664:\n\t" \ - ".org . - (664b-663b) + (662b-661b)\n\t" \ - ".org . - (662b-661b) + (664b-663b)\n\t" \ - ".previous\n" \ - ".endif\n" - -#define __ALTERNATIVE_CFG_CB(oldinstr, feature, cfg_enabled, cb) \ - ".if "__stringify(cfg_enabled)" == 1\n" \ - "661:\n\t" \ - oldinstr "\n" \ - "662:\n" \ - ".pushsection .altinstructions,\"a\"\n" \ - ALTINSTR_ENTRY_CB(feature, cb) \ - ".popsection\n" \ - "663:\n\t" \ - "664:\n\t" \ - ".endif\n" - -#define _ALTERNATIVE_CFG(oldinstr, newinstr, feature, cfg, ...) \ - __ALTERNATIVE_CFG(oldinstr, newinstr, feature, IS_ENABLED(cfg)) - -#define ALTERNATIVE_CB(oldinstr, cb) \ - __ALTERNATIVE_CFG_CB(oldinstr, ARM64_CB_PATCH, 1, cb) -#else - -#include <asm/assembler.h> - -.macro altinstruction_entry orig_offset alt_offset feature orig_len alt_len - .word \orig_offset - . - .word \alt_offset - . - .hword \feature - .byte \orig_len - .byte \alt_len -.endm - -.macro alternative_insn insn1, insn2, cap, enable = 1 - .if \enable -661: \insn1 -662: .pushsection .altinstructions, "a" - altinstruction_entry 661b, 663f, \cap, 662b-661b, 664f-663f - .popsection - .subsection 1 -663: \insn2 -664: .previous - .org . - (664b-663b) + (662b-661b) - .org . - (662b-661b) + (664b-663b) - .endif -.endm - -/* - * Alternative sequences - * - * The code for the case where the capability is not present will be - * assembled and linked as normal. There are no restrictions on this - * code. - * - * The code for the case where the capability is present will be - * assembled into a special section to be used for dynamic patching. - * Code for that case must: - * - * 1. Be exactly the same length (in bytes) as the default code - * sequence. - * - * 2. Not contain a branch target that is used outside of the - * alternative sequence it is defined in (branches into an - * alternative sequence are not fixed up). - */ - -/* - * Begin an alternative code sequence. - */ -.macro alternative_if_not cap - .set .Lasm_alt_mode, 0 - .pushsection .altinstructions, "a" - altinstruction_entry 661f, 663f, \cap, 662f-661f, 664f-663f - .popsection -661: -.endm - -.macro alternative_if cap - .set .Lasm_alt_mode, 1 - .pushsection .altinstructions, "a" - altinstruction_entry 663f, 661f, \cap, 664f-663f, 662f-661f - .popsection - .subsection 1 - .align 2 /* So GAS knows label 661 is suitably aligned */ -661: -.endm - -.macro alternative_cb cb - .set .Lasm_alt_mode, 0 - .pushsection .altinstructions, "a" - altinstruction_entry 661f, \cb, ARM64_CB_PATCH, 662f-661f, 0 - .popsection -661: -.endm - -/* - * Provide the other half of the alternative code sequence. - */ -.macro alternative_else -662: - .if .Lasm_alt_mode==0 - .subsection 1 - .else - .previous - .endif -663: -.endm - -/* - * Complete an alternative code sequence. - */ -.macro alternative_endif -664: - .if .Lasm_alt_mode==0 - .previous - .endif - .org . - (664b-663b) + (662b-661b) - .org . - (662b-661b) + (664b-663b) -.endm - -/* - * Callback-based alternative epilogue - */ -.macro alternative_cb_end -662: -.endm - -/* - * Provides a trivial alternative or default sequence consisting solely - * of NOPs. The number of NOPs is chosen automatically to match the - * previous case. - */ -.macro alternative_else_nop_endif -alternative_else - nops (662b-661b) / AARCH64_INSN_SIZE -alternative_endif -.endm - -#define _ALTERNATIVE_CFG(insn1, insn2, cap, cfg, ...) \ - alternative_insn insn1, insn2, cap, IS_ENABLED(cfg) - -.macro user_alt, label, oldinstr, newinstr, cond -9999: alternative_insn "\oldinstr", "\newinstr", \cond - _asm_extable 9999b, \label -.endm - -/* - * Generate the assembly for UAO alternatives with exception table entries. - * This is complicated as there is no post-increment or pair versions of the - * unprivileged instructions, and USER() only works for single instructions. - */ -#ifdef CONFIG_ARM64_UAO - .macro uao_ldp l, reg1, reg2, addr, post_inc - alternative_if_not ARM64_HAS_UAO -8888: ldp \reg1, \reg2, [\addr], \post_inc; -8889: nop; - nop; - alternative_else - ldtr \reg1, [\addr]; - ldtr \reg2, [\addr, #8]; - add \addr, \addr, \post_inc; - alternative_endif - - _asm_extable 8888b,\l; - _asm_extable 8889b,\l; - .endm - - .macro uao_stp l, reg1, reg2, addr, post_inc - alternative_if_not ARM64_HAS_UAO -8888: stp \reg1, \reg2, [\addr], \post_inc; -8889: nop; - nop; - alternative_else - sttr \reg1, [\addr]; - sttr \reg2, [\addr, #8]; - add \addr, \addr, \post_inc; - alternative_endif - - _asm_extable 8888b,\l; - _asm_extable 8889b,\l; - .endm - - .macro uao_user_alternative l, inst, alt_inst, reg, addr, post_inc - alternative_if_not ARM64_HAS_UAO -8888: \inst \reg, [\addr], \post_inc; - nop; - alternative_else - \alt_inst \reg, [\addr]; - add \addr, \addr, \post_inc; - alternative_endif - - _asm_extable 8888b,\l; - .endm -#else - .macro uao_ldp l, reg1, reg2, addr, post_inc - USER(\l, ldp \reg1, \reg2, [\addr], \post_inc) - .endm - .macro uao_stp l, reg1, reg2, addr, post_inc - USER(\l, stp \reg1, \reg2, [\addr], \post_inc) - .endm - .macro uao_user_alternative l, inst, alt_inst, reg, addr, post_inc - USER(\l, \inst \reg, [\addr], \post_inc) - .endm -#endif - -#endif /* __ASSEMBLY__ */ - -/* - * Usage: asm(ALTERNATIVE(oldinstr, newinstr, feature)); - * - * Usage: asm(ALTERNATIVE(oldinstr, newinstr, feature, CONFIG_FOO)); - * N.B. If CONFIG_FOO is specified, but not selected, the whole block - * will be omitted, including oldinstr. - */ -#define ALTERNATIVE(oldinstr, newinstr, ...) \ - _ALTERNATIVE_CFG(oldinstr, newinstr, __VA_ARGS__, 1) - +#endif /* __ASSEMBLY__ */ #endif /* __ASM_ALTERNATIVE_H */ diff --git a/arch/arm64/include/asm/asm-uaccess.h b/arch/arm64/include/asm/asm-uaccess.h index f68a0e64482a..0fa95d1b311f 100644 --- a/arch/arm64/include/asm/asm-uaccess.h +++ b/arch/arm64/include/asm/asm-uaccess.h @@ -2,7 +2,7 @@ #ifndef __ASM_ASM_UACCESS_H #define __ASM_ASM_UACCESS_H -#include <asm/alternative.h> +#include <asm/alternative-macros.h> #include <asm/kernel-pgtable.h> #include <asm/mmu.h> #include <asm/sysreg.h> @@ -58,4 +58,33 @@ alternative_else_nop_endif .endm #endif +/* + * Generate the assembly for LDTR/STTR with exception table entries. + * This is complicated as there is no post-increment or pair versions of the + * unprivileged instructions, and USER() only works for single instructions. + */ + .macro user_ldp l, reg1, reg2, addr, post_inc +8888: ldtr \reg1, [\addr]; +8889: ldtr \reg2, [\addr, #8]; + add \addr, \addr, \post_inc; + + _asm_extable 8888b,\l; + _asm_extable 8889b,\l; + .endm + + .macro user_stp l, reg1, reg2, addr, post_inc +8888: sttr \reg1, [\addr]; +8889: sttr \reg2, [\addr, #8]; + add \addr, \addr, \post_inc; + + _asm_extable 8888b,\l; + _asm_extable 8889b,\l; + .endm + + .macro user_ldst l, inst, reg, addr, post_inc +8888: \inst \reg, [\addr]; + add \addr, \addr, \post_inc; + + _asm_extable 8888b,\l; + .endm #endif diff --git a/arch/arm64/include/asm/cpucaps.h b/arch/arm64/include/asm/cpucaps.h index e7d98997c09c..b77d997b173b 100644 --- a/arch/arm64/include/asm/cpucaps.h +++ b/arch/arm64/include/asm/cpucaps.h @@ -16,12 +16,10 @@ #define ARM64_WORKAROUND_CAVIUM_23154 6 #define ARM64_WORKAROUND_834220 7 #define ARM64_HAS_NO_HW_PREFETCH 8 -#define ARM64_HAS_UAO 9 -#define ARM64_ALT_PAN_NOT_UAO 10 #define ARM64_HAS_VIRT_HOST_EXTN 11 #define ARM64_WORKAROUND_CAVIUM_27456 12 #define ARM64_HAS_32BIT_EL0 13 -#define ARM64_HARDEN_EL2_VECTORS 14 +#define ARM64_SPECTRE_V3A 14 #define ARM64_HAS_CNP 15 #define ARM64_HAS_NO_FPSIMD 16 #define ARM64_WORKAROUND_REPEAT_TLBI 17 @@ -66,7 +64,9 @@ #define ARM64_HAS_TLB_RANGE 56 #define ARM64_MTE 57 #define ARM64_WORKAROUND_1508412 58 +#define ARM64_HAS_LDAPR 59 +#define ARM64_KVM_PROTECTED_MODE 60 -#define ARM64_NCAPS 59 +#define ARM64_NCAPS 61 #endif /* __ASM_CPUCAPS_H */ diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index 97244d4feca9..16063c813dcd 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -268,6 +268,8 @@ extern struct arm64_ftr_reg arm64_ftr_reg_ctrel0; /* * CPU feature detected at boot time based on feature of one or more CPUs. * All possible conflicts for a late CPU are ignored. + * NOTE: this means that a late CPU with the feature will *not* cause the + * capability to be advertised by cpus_have_*cap()! */ #define ARM64_CPUCAP_WEAK_LOCAL_CPU_FEATURE \ (ARM64_CPUCAP_SCOPE_LOCAL_CPU | \ @@ -667,10 +669,16 @@ static __always_inline bool system_supports_fpsimd(void) return !cpus_have_const_cap(ARM64_HAS_NO_FPSIMD); } +static inline bool system_uses_hw_pan(void) +{ + return IS_ENABLED(CONFIG_ARM64_PAN) && + cpus_have_const_cap(ARM64_HAS_PAN); +} + static inline bool system_uses_ttbr0_pan(void) { return IS_ENABLED(CONFIG_ARM64_SW_TTBR0_PAN) && - !cpus_have_const_cap(ARM64_HAS_PAN); + !system_uses_hw_pan(); } static __always_inline bool system_supports_sve(void) @@ -697,6 +705,11 @@ static inline bool system_supports_generic_auth(void) cpus_have_const_cap(ARM64_HAS_GENERIC_AUTH); } +static inline bool system_has_full_ptr_auth(void) +{ + return system_supports_address_auth() && system_supports_generic_auth(); +} + static __always_inline bool system_uses_irq_prio_masking(void) { return IS_ENABLED(CONFIG_ARM64_PSEUDO_NMI) && @@ -762,6 +775,13 @@ static inline bool cpu_has_hw_af(void) ID_AA64MMFR1_HADBS_SHIFT); } +static inline bool cpu_has_pan(void) +{ + u64 mmfr1 = read_cpuid(ID_AA64MMFR1_EL1); + return cpuid_feature_extract_unsigned_field(mmfr1, + ID_AA64MMFR1_PAN_SHIFT); +} + #ifdef CONFIG_ARM64_AMU_EXTN /* Check whether the cpu supports the Activity Monitors Unit (AMU) */ extern bool cpu_has_amu_feat(int cpu); diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h index 9e2e9a63c7b6..ef5b040dee44 100644 --- a/arch/arm64/include/asm/cputype.h +++ b/arch/arm64/include/asm/cputype.h @@ -86,6 +86,8 @@ #define QCOM_CPU_PART_FALKOR_V1 0x800 #define QCOM_CPU_PART_FALKOR 0xC00 #define QCOM_CPU_PART_KRYO 0x200 +#define QCOM_CPU_PART_KRYO_2XX_GOLD 0x800 +#define QCOM_CPU_PART_KRYO_2XX_SILVER 0x801 #define QCOM_CPU_PART_KRYO_3XX_SILVER 0x803 #define QCOM_CPU_PART_KRYO_4XX_GOLD 0x804 #define QCOM_CPU_PART_KRYO_4XX_SILVER 0x805 @@ -116,6 +118,8 @@ #define MIDR_QCOM_FALKOR_V1 MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_FALKOR_V1) #define MIDR_QCOM_FALKOR MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_FALKOR) #define MIDR_QCOM_KRYO MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_KRYO) +#define MIDR_QCOM_KRYO_2XX_GOLD MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_KRYO_2XX_GOLD) +#define MIDR_QCOM_KRYO_2XX_SILVER MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_KRYO_2XX_SILVER) #define MIDR_QCOM_KRYO_3XX_SILVER MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_KRYO_3XX_SILVER) #define MIDR_QCOM_KRYO_4XX_GOLD MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_KRYO_4XX_GOLD) #define MIDR_QCOM_KRYO_4XX_SILVER MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_KRYO_4XX_SILVER) diff --git a/arch/arm64/include/asm/el2_setup.h b/arch/arm64/include/asm/el2_setup.h new file mode 100644 index 000000000000..a7f5a1bbc8ac --- /dev/null +++ b/arch/arm64/include/asm/el2_setup.h @@ -0,0 +1,181 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2012,2013 - ARM Ltd + * Author: Marc Zyngier <marc.zyngier@arm.com> + */ + +#ifndef __ARM_KVM_INIT_H__ +#define __ARM_KVM_INIT_H__ + +#ifndef __ASSEMBLY__ +#error Assembly-only header +#endif + +#include <asm/kvm_arm.h> +#include <asm/ptrace.h> +#include <asm/sysreg.h> +#include <linux/irqchip/arm-gic-v3.h> + +.macro __init_el2_sctlr + mov_q x0, INIT_SCTLR_EL2_MMU_OFF + msr sctlr_el2, x0 + isb +.endm + +/* + * Allow Non-secure EL1 and EL0 to access physical timer and counter. + * This is not necessary for VHE, since the host kernel runs in EL2, + * and EL0 accesses are configured in the later stage of boot process. + * Note that when HCR_EL2.E2H == 1, CNTHCTL_EL2 has the same bit layout + * as CNTKCTL_EL1, and CNTKCTL_EL1 accessing instructions are redefined + * to access CNTHCTL_EL2. This allows the kernel designed to run at EL1 + * to transparently mess with the EL0 bits via CNTKCTL_EL1 access in + * EL2. + */ +.macro __init_el2_timers mode +.ifeqs "\mode", "nvhe" + mrs x0, cnthctl_el2 + orr x0, x0, #3 // Enable EL1 physical timers + msr cnthctl_el2, x0 +.endif + msr cntvoff_el2, xzr // Clear virtual offset +.endm + +.macro __init_el2_debug mode + mrs x1, id_aa64dfr0_el1 + sbfx x0, x1, #ID_AA64DFR0_PMUVER_SHIFT, #4 + cmp x0, #1 + b.lt 1f // Skip if no PMU present + mrs x0, pmcr_el0 // Disable debug access traps + ubfx x0, x0, #11, #5 // to EL2 and allow access to +1: + csel x2, xzr, x0, lt // all PMU counters from EL1 + + /* Statistical profiling */ + ubfx x0, x1, #ID_AA64DFR0_PMSVER_SHIFT, #4 + cbz x0, 3f // Skip if SPE not present + +.ifeqs "\mode", "nvhe" + mrs_s x0, SYS_PMBIDR_EL1 // If SPE available at EL2, + and x0, x0, #(1 << SYS_PMBIDR_EL1_P_SHIFT) + cbnz x0, 2f // then permit sampling of physical + mov x0, #(1 << SYS_PMSCR_EL2_PCT_SHIFT | \ + 1 << SYS_PMSCR_EL2_PA_SHIFT) + msr_s SYS_PMSCR_EL2, x0 // addresses and physical counter +2: + mov x0, #(MDCR_EL2_E2PB_MASK << MDCR_EL2_E2PB_SHIFT) + orr x2, x2, x0 // If we don't have VHE, then + // use EL1&0 translation. +.else + orr x2, x2, #MDCR_EL2_TPMS // For VHE, use EL2 translation + // and disable access from EL1 +.endif + +3: + msr mdcr_el2, x2 // Configure debug traps +.endm + +/* LORegions */ +.macro __init_el2_lor + mrs x1, id_aa64mmfr1_el1 + ubfx x0, x1, #ID_AA64MMFR1_LOR_SHIFT, 4 + cbz x0, 1f + msr_s SYS_LORC_EL1, xzr +1: +.endm + +/* Stage-2 translation */ +.macro __init_el2_stage2 + msr vttbr_el2, xzr +.endm + +/* GICv3 system register access */ +.macro __init_el2_gicv3 + mrs x0, id_aa64pfr0_el1 + ubfx x0, x0, #ID_AA64PFR0_GIC_SHIFT, #4 + cbz x0, 1f + + mrs_s x0, SYS_ICC_SRE_EL2 + orr x0, x0, #ICC_SRE_EL2_SRE // Set ICC_SRE_EL2.SRE==1 + orr x0, x0, #ICC_SRE_EL2_ENABLE // Set ICC_SRE_EL2.Enable==1 + msr_s SYS_ICC_SRE_EL2, x0 + isb // Make sure SRE is now set + mrs_s x0, SYS_ICC_SRE_EL2 // Read SRE back, + tbz x0, #0, 1f // and check that it sticks + msr_s SYS_ICH_HCR_EL2, xzr // Reset ICC_HCR_EL2 to defaults +1: +.endm + +.macro __init_el2_hstr + msr hstr_el2, xzr // Disable CP15 traps to EL2 +.endm + +/* Virtual CPU ID registers */ +.macro __init_el2_nvhe_idregs + mrs x0, midr_el1 + mrs x1, mpidr_el1 + msr vpidr_el2, x0 + msr vmpidr_el2, x1 +.endm + +/* Coprocessor traps */ +.macro __init_el2_nvhe_cptr + mov x0, #0x33ff + msr cptr_el2, x0 // Disable copro. traps to EL2 +.endm + +/* SVE register access */ +.macro __init_el2_nvhe_sve + mrs x1, id_aa64pfr0_el1 + ubfx x1, x1, #ID_AA64PFR0_SVE_SHIFT, #4 + cbz x1, 1f + + bic x0, x0, #CPTR_EL2_TZ // Also disable SVE traps + msr cptr_el2, x0 // Disable copro. traps to EL2 + isb + mov x1, #ZCR_ELx_LEN_MASK // SVE: Enable full vector + msr_s SYS_ZCR_EL2, x1 // length for EL1. +1: +.endm + +.macro __init_el2_nvhe_prepare_eret + mov x0, #INIT_PSTATE_EL1 + msr spsr_el2, x0 +.endm + +/** + * Initialize EL2 registers to sane values. This should be called early on all + * cores that were booted in EL2. + * + * Regs: x0, x1 and x2 are clobbered. + */ +.macro init_el2_state mode +.ifnes "\mode", "vhe" +.ifnes "\mode", "nvhe" +.error "Invalid 'mode' argument" +.endif +.endif + + __init_el2_sctlr + __init_el2_timers \mode + __init_el2_debug \mode + __init_el2_lor + __init_el2_stage2 + __init_el2_gicv3 + __init_el2_hstr + + /* + * When VHE is not in use, early init of EL2 needs to be done here. + * When VHE _is_ in use, EL1 will not be used in the host and + * requires no configuration, and all non-hyp-specific EL2 setup + * will be done via the _EL1 system register aliases in __cpu_setup. + */ +.ifeqs "\mode", "nvhe" + __init_el2_nvhe_idregs + __init_el2_nvhe_cptr + __init_el2_nvhe_sve + __init_el2_nvhe_prepare_eret +.endif +.endm + +#endif /* __ARM_KVM_INIT_H__ */ diff --git a/arch/arm64/include/asm/exec.h b/arch/arm64/include/asm/exec.h index 1aae6f9962fc..9a1c22ce664b 100644 --- a/arch/arm64/include/asm/exec.h +++ b/arch/arm64/include/asm/exec.h @@ -10,6 +10,5 @@ #include <linux/sched.h> extern unsigned long arch_align_stack(unsigned long sp); -void uao_thread_switch(struct task_struct *next); #endif /* __ASM_EXEC_H */ diff --git a/arch/arm64/include/asm/futex.h b/arch/arm64/include/asm/futex.h index 97f6a63810ec..8e41faa37c69 100644 --- a/arch/arm64/include/asm/futex.h +++ b/arch/arm64/include/asm/futex.h @@ -16,7 +16,7 @@ do { \ unsigned int loops = FUTEX_MAX_LOOPS; \ \ - uaccess_enable(); \ + uaccess_enable_privileged(); \ asm volatile( \ " prfm pstl1strm, %2\n" \ "1: ldxr %w1, %2\n" \ @@ -39,7 +39,7 @@ do { \ "+r" (loops) \ : "r" (oparg), "Ir" (-EFAULT), "Ir" (-EAGAIN) \ : "memory"); \ - uaccess_disable(); \ + uaccess_disable_privileged(); \ } while (0) static inline int @@ -95,7 +95,7 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *_uaddr, return -EFAULT; uaddr = __uaccess_mask_ptr(_uaddr); - uaccess_enable(); + uaccess_enable_privileged(); asm volatile("// futex_atomic_cmpxchg_inatomic\n" " prfm pstl1strm, %2\n" "1: ldxr %w1, %2\n" @@ -118,7 +118,7 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *_uaddr, : "+r" (ret), "=&r" (val), "+Q" (*uaddr), "=&r" (tmp), "+r" (loops) : "r" (oldval), "r" (newval), "Ir" (-EFAULT), "Ir" (-EAGAIN) : "memory"); - uaccess_disable(); + uaccess_disable_privileged(); if (!ret) *uval = val; diff --git a/arch/arm64/include/asm/insn.h b/arch/arm64/include/asm/insn.h index 4b39293d0f72..4ebb9c054ccc 100644 --- a/arch/arm64/include/asm/insn.h +++ b/arch/arm64/include/asm/insn.h @@ -10,8 +10,7 @@ #include <linux/build_bug.h> #include <linux/types.h> -/* A64 instructions are always 32 bits. */ -#define AARCH64_INSN_SIZE 4 +#include <asm/alternative.h> #ifndef __ASSEMBLY__ /* diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h index 64ce29378467..4e90c2debf70 100644 --- a/arch/arm64/include/asm/kvm_arm.h +++ b/arch/arm64/include/asm/kvm_arm.h @@ -80,6 +80,7 @@ HCR_FMO | HCR_IMO | HCR_PTW ) #define HCR_VIRT_EXCP_MASK (HCR_VSE | HCR_VI | HCR_VF) #define HCR_HOST_NVHE_FLAGS (HCR_RW | HCR_API | HCR_APK | HCR_ATA) +#define HCR_HOST_NVHE_PROTECTED_FLAGS (HCR_HOST_NVHE_FLAGS | HCR_TSC) #define HCR_HOST_VHE_FLAGS (HCR_RW | HCR_TGE | HCR_E2H) /* TCR_EL2 Registers bits */ diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h index 54387ccd1ab2..7ccf770c53d9 100644 --- a/arch/arm64/include/asm/kvm_asm.h +++ b/arch/arm64/include/asm/kvm_asm.h @@ -34,8 +34,6 @@ */ #define KVM_VECTOR_PREAMBLE (2 * AARCH64_INSN_SIZE) -#define __SMCCC_WORKAROUND_1_SMC_SZ 36 - #define KVM_HOST_SMCCC_ID(id) \ ARM_SMCCC_CALL_VAL(ARM_SMCCC_FAST_CALL, \ ARM_SMCCC_SMC_64, \ @@ -150,6 +148,14 @@ extern void *__vhe_undefined_symbol; #endif +struct kvm_nvhe_init_params { + unsigned long mair_el2; + unsigned long tcr_el2; + unsigned long tpidr_el2; + unsigned long stack_hyp_va; + phys_addr_t pgd_pa; +}; + /* Translate a kernel address @ptr into its equivalent linear mapping */ #define kvm_ksym_ref(ptr) \ ({ \ @@ -165,17 +171,14 @@ struct kvm_vcpu; struct kvm_s2_mmu; DECLARE_KVM_NVHE_SYM(__kvm_hyp_init); -DECLARE_KVM_NVHE_SYM(__kvm_hyp_host_vector); DECLARE_KVM_HYP_SYM(__kvm_hyp_vector); #define __kvm_hyp_init CHOOSE_NVHE_SYM(__kvm_hyp_init) -#define __kvm_hyp_host_vector CHOOSE_NVHE_SYM(__kvm_hyp_host_vector) #define __kvm_hyp_vector CHOOSE_HYP_SYM(__kvm_hyp_vector) extern unsigned long kvm_arm_hyp_percpu_base[NR_CPUS]; DECLARE_KVM_NVHE_SYM(__per_cpu_start); DECLARE_KVM_NVHE_SYM(__per_cpu_end); -extern atomic_t arm64_el2_vector_last_slot; DECLARE_KVM_HYP_SYM(__bp_harden_hyp_vecs); #define __bp_harden_hyp_vecs CHOOSE_HYP_SYM(__bp_harden_hyp_vecs) @@ -189,8 +192,6 @@ extern void __kvm_timer_set_cntvoff(u64 cntvoff); extern int __kvm_vcpu_run(struct kvm_vcpu *vcpu); -extern void __kvm_enable_ssbs(void); - extern u64 __vgic_v3_get_ich_vtr_el2(void); extern u64 __vgic_v3_read_vmcr(void); extern void __vgic_v3_write_vmcr(u32 vmcr); @@ -198,8 +199,6 @@ extern void __vgic_v3_init_lrs(void); extern u32 __kvm_get_mdcr_el2(void); -extern char __smccc_workaround_1_smc[__SMCCC_WORKAROUND_1_SMC_SZ]; - /* * Obtain the PC-relative address of a kernel symbol * s: symbol diff --git a/arch/arm64/include/asm/kvm_coproc.h b/arch/arm64/include/asm/kvm_coproc.h deleted file mode 100644 index d6bb40122fdb..000000000000 --- a/arch/arm64/include/asm/kvm_coproc.h +++ /dev/null @@ -1,38 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Copyright (C) 2012,2013 - ARM Ltd - * Author: Marc Zyngier <marc.zyngier@arm.com> - * - * Derived from arch/arm/include/asm/kvm_coproc.h - * Copyright (C) 2012 Rusty Russell IBM Corporation - */ - -#ifndef __ARM64_KVM_COPROC_H__ -#define __ARM64_KVM_COPROC_H__ - -#include <linux/kvm_host.h> - -void kvm_reset_sys_regs(struct kvm_vcpu *vcpu); - -struct kvm_sys_reg_table { - const struct sys_reg_desc *table; - size_t num; -}; - -int kvm_handle_cp14_load_store(struct kvm_vcpu *vcpu); -int kvm_handle_cp14_32(struct kvm_vcpu *vcpu); -int kvm_handle_cp14_64(struct kvm_vcpu *vcpu); -int kvm_handle_cp15_32(struct kvm_vcpu *vcpu); -int kvm_handle_cp15_64(struct kvm_vcpu *vcpu); -int kvm_handle_sys_reg(struct kvm_vcpu *vcpu); - -#define kvm_coproc_table_init kvm_sys_reg_table_init -void kvm_sys_reg_table_init(void); - -struct kvm_one_reg; -int kvm_arm_copy_sys_reg_indices(struct kvm_vcpu *vcpu, u64 __user *uindices); -int kvm_arm_sys_reg_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *); -int kvm_arm_sys_reg_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *); -unsigned long kvm_arm_num_sys_reg_descs(struct kvm_vcpu *vcpu); - -#endif /* __ARM64_KVM_COPROC_H__ */ diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h index 5ef2669ccd6c..c8f550a53516 100644 --- a/arch/arm64/include/asm/kvm_emulate.h +++ b/arch/arm64/include/asm/kvm_emulate.h @@ -21,20 +21,25 @@ #include <asm/cputype.h> #include <asm/virt.h> -unsigned long *vcpu_reg32(const struct kvm_vcpu *vcpu, u8 reg_num); -unsigned long vcpu_read_spsr32(const struct kvm_vcpu *vcpu); -void vcpu_write_spsr32(struct kvm_vcpu *vcpu, unsigned long v); +#define CURRENT_EL_SP_EL0_VECTOR 0x0 +#define CURRENT_EL_SP_ELx_VECTOR 0x200 +#define LOWER_EL_AArch64_VECTOR 0x400 +#define LOWER_EL_AArch32_VECTOR 0x600 + +enum exception_type { + except_type_sync = 0, + except_type_irq = 0x80, + except_type_fiq = 0x100, + except_type_serror = 0x180, +}; bool kvm_condition_valid32(const struct kvm_vcpu *vcpu); -void kvm_skip_instr32(struct kvm_vcpu *vcpu, bool is_wide_instr); +void kvm_skip_instr32(struct kvm_vcpu *vcpu); void kvm_inject_undefined(struct kvm_vcpu *vcpu); void kvm_inject_vabt(struct kvm_vcpu *vcpu); void kvm_inject_dabt(struct kvm_vcpu *vcpu, unsigned long addr); void kvm_inject_pabt(struct kvm_vcpu *vcpu, unsigned long addr); -void kvm_inject_undef32(struct kvm_vcpu *vcpu); -void kvm_inject_dabt32(struct kvm_vcpu *vcpu, unsigned long addr); -void kvm_inject_pabt32(struct kvm_vcpu *vcpu, unsigned long addr); static __always_inline bool vcpu_el1_is_32bit(struct kvm_vcpu *vcpu) { @@ -168,30 +173,6 @@ static __always_inline void vcpu_set_reg(struct kvm_vcpu *vcpu, u8 reg_num, vcpu_gp_regs(vcpu)->regs[reg_num] = val; } -static inline unsigned long vcpu_read_spsr(const struct kvm_vcpu *vcpu) -{ - if (vcpu_mode_is_32bit(vcpu)) - return vcpu_read_spsr32(vcpu); - - if (vcpu->arch.sysregs_loaded_on_cpu) - return read_sysreg_el1(SYS_SPSR); - else - return __vcpu_sys_reg(vcpu, SPSR_EL1); -} - -static inline void vcpu_write_spsr(struct kvm_vcpu *vcpu, unsigned long v) -{ - if (vcpu_mode_is_32bit(vcpu)) { - vcpu_write_spsr32(vcpu, v); - return; - } - - if (vcpu->arch.sysregs_loaded_on_cpu) - write_sysreg_el1(v, SYS_SPSR); - else - __vcpu_sys_reg(vcpu, SPSR_EL1) = v; -} - /* * The layout of SPSR for an AArch32 state is different when observed from an * AArch64 SPSR_ELx or an AArch32 SPSR_*. This function generates the AArch32 @@ -472,32 +453,9 @@ static inline unsigned long vcpu_data_host_to_guest(struct kvm_vcpu *vcpu, return data; /* Leave LE untouched */ } -static __always_inline void kvm_skip_instr(struct kvm_vcpu *vcpu, bool is_wide_instr) -{ - if (vcpu_mode_is_32bit(vcpu)) { - kvm_skip_instr32(vcpu, is_wide_instr); - } else { - *vcpu_pc(vcpu) += 4; - *vcpu_cpsr(vcpu) &= ~PSR_BTYPE_MASK; - } - - /* advance the singlestep state machine */ - *vcpu_cpsr(vcpu) &= ~DBG_SPSR_SS; -} - -/* - * Skip an instruction which has been emulated at hyp while most guest sysregs - * are live. - */ -static __always_inline void __kvm_skip_instr(struct kvm_vcpu *vcpu) +static __always_inline void kvm_incr_pc(struct kvm_vcpu *vcpu) { - *vcpu_pc(vcpu) = read_sysreg_el2(SYS_ELR); - vcpu_gp_regs(vcpu)->pstate = read_sysreg_el2(SYS_SPSR); - - kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu)); - - write_sysreg_el2(vcpu_gp_regs(vcpu)->pstate, SYS_SPSR); - write_sysreg_el2(*vcpu_pc(vcpu), SYS_ELR); + vcpu->arch.flags |= KVM_ARM64_INCREMENT_PC; } #endif /* __ARM64_KVM_EMULATE_H__ */ diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 0cd9f0f75c13..11beda85ee7e 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -50,6 +50,16 @@ #define KVM_DIRTY_LOG_MANUAL_CAPS (KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE | \ KVM_DIRTY_LOG_INITIALLY_SET) +/* + * Mode of operation configurable with kvm-arm.mode early param. + * See Documentation/admin-guide/kernel-parameters.txt for more information. + */ +enum kvm_mode { + KVM_MODE_DEFAULT, + KVM_MODE_PROTECTED, +}; +enum kvm_mode kvm_get_mode(void); + DECLARE_STATIC_KEY_FALSE(userspace_irqchip_in_use); extern unsigned int kvm_sve_max_vl; @@ -58,8 +68,6 @@ int kvm_arm_init_sve(void); int __attribute_const__ kvm_target_cpu(void); int kvm_reset_vcpu(struct kvm_vcpu *vcpu); void kvm_arm_vcpu_destroy(struct kvm_vcpu *vcpu); -int kvm_arch_vm_ioctl_check_extension(struct kvm *kvm, long ext); -void __extended_idmap_trampoline(phys_addr_t boot_pgd, phys_addr_t idmap_start); struct kvm_vmid { /* The VMID generation used for the virt. memory system */ @@ -89,6 +97,9 @@ struct kvm_s2_mmu { struct kvm *kvm; }; +struct kvm_arch_memory_slot { +}; + struct kvm_arch { struct kvm_s2_mmu mmu; @@ -120,6 +131,7 @@ struct kvm_arch { unsigned int pmuver; u8 pfr0_csv2; + u8 pfr0_csv3; }; struct kvm_vcpu_fault_info { @@ -203,48 +215,6 @@ enum vcpu_sysreg { NR_SYS_REGS /* Nothing after this line! */ }; -/* 32bit mapping */ -#define c0_MPIDR (MPIDR_EL1 * 2) /* MultiProcessor ID Register */ -#define c0_CSSELR (CSSELR_EL1 * 2)/* Cache Size Selection Register */ -#define c1_SCTLR (SCTLR_EL1 * 2) /* System Control Register */ -#define c1_ACTLR (ACTLR_EL1 * 2) /* Auxiliary Control Register */ -#define c1_CPACR (CPACR_EL1 * 2) /* Coprocessor Access Control */ -#define c2_TTBR0 (TTBR0_EL1 * 2) /* Translation Table Base Register 0 */ -#define c2_TTBR0_high (c2_TTBR0 + 1) /* TTBR0 top 32 bits */ -#define c2_TTBR1 (TTBR1_EL1 * 2) /* Translation Table Base Register 1 */ -#define c2_TTBR1_high (c2_TTBR1 + 1) /* TTBR1 top 32 bits */ -#define c2_TTBCR (TCR_EL1 * 2) /* Translation Table Base Control R. */ -#define c3_DACR (DACR32_EL2 * 2)/* Domain Access Control Register */ -#define c5_DFSR (ESR_EL1 * 2) /* Data Fault Status Register */ -#define c5_IFSR (IFSR32_EL2 * 2)/* Instruction Fault Status Register */ -#define c5_ADFSR (AFSR0_EL1 * 2) /* Auxiliary Data Fault Status R */ -#define c5_AIFSR (AFSR1_EL1 * 2) /* Auxiliary Instr Fault Status R */ -#define c6_DFAR (FAR_EL1 * 2) /* Data Fault Address Register */ -#define c6_IFAR (c6_DFAR + 1) /* Instruction Fault Address Register */ -#define c7_PAR (PAR_EL1 * 2) /* Physical Address Register */ -#define c7_PAR_high (c7_PAR + 1) /* PAR top 32 bits */ -#define c10_PRRR (MAIR_EL1 * 2) /* Primary Region Remap Register */ -#define c10_NMRR (c10_PRRR + 1) /* Normal Memory Remap Register */ -#define c12_VBAR (VBAR_EL1 * 2) /* Vector Base Address Register */ -#define c13_CID (CONTEXTIDR_EL1 * 2) /* Context ID Register */ -#define c13_TID_URW (TPIDR_EL0 * 2) /* Thread ID, User R/W */ -#define c13_TID_URO (TPIDRRO_EL0 * 2)/* Thread ID, User R/O */ -#define c13_TID_PRIV (TPIDR_EL1 * 2) /* Thread ID, Privileged */ -#define c10_AMAIR0 (AMAIR_EL1 * 2) /* Aux Memory Attr Indirection Reg */ -#define c10_AMAIR1 (c10_AMAIR0 + 1)/* Aux Memory Attr Indirection Reg */ -#define c14_CNTKCTL (CNTKCTL_EL1 * 2) /* Timer Control Register (PL1) */ - -#define cp14_DBGDSCRext (MDSCR_EL1 * 2) -#define cp14_DBGBCR0 (DBGBCR0_EL1 * 2) -#define cp14_DBGBVR0 (DBGBVR0_EL1 * 2) -#define cp14_DBGBXVR0 (cp14_DBGBVR0 + 1) -#define cp14_DBGWCR0 (DBGWCR0_EL1 * 2) -#define cp14_DBGWVR0 (DBGWVR0_EL1 * 2) -#define cp14_DBGDCCINT (MDCCINT_EL1 * 2) -#define cp14_DBGVCR (DBGVCR32_EL2 * 2) - -#define NR_COPRO_REGS (NR_SYS_REGS * 2) - struct kvm_cpu_context { struct user_pt_regs regs; /* sp = sp_el0 */ @@ -255,10 +225,7 @@ struct kvm_cpu_context { struct user_fpsimd_state fp_regs; - union { - u64 sys_regs[NR_SYS_REGS]; - u32 copro[NR_COPRO_REGS]; - }; + u64 sys_regs[NR_SYS_REGS]; struct kvm_vcpu *__hyp_running_vcpu; }; @@ -409,8 +376,33 @@ struct kvm_vcpu_arch { #define KVM_ARM64_GUEST_HAS_SVE (1 << 5) /* SVE exposed to guest */ #define KVM_ARM64_VCPU_SVE_FINALIZED (1 << 6) /* SVE config completed */ #define KVM_ARM64_GUEST_HAS_PTRAUTH (1 << 7) /* PTRAUTH exposed to guest */ +#define KVM_ARM64_PENDING_EXCEPTION (1 << 8) /* Exception pending */ +#define KVM_ARM64_EXCEPT_MASK (7 << 9) /* Target EL/MODE */ -#define vcpu_has_sve(vcpu) (system_supports_sve() && \ +/* + * When KVM_ARM64_PENDING_EXCEPTION is set, KVM_ARM64_EXCEPT_MASK can + * take the following values: + * + * For AArch32 EL1: + */ +#define KVM_ARM64_EXCEPT_AA32_UND (0 << 9) +#define KVM_ARM64_EXCEPT_AA32_IABT (1 << 9) +#define KVM_ARM64_EXCEPT_AA32_DABT (2 << 9) +/* For AArch64: */ +#define KVM_ARM64_EXCEPT_AA64_ELx_SYNC (0 << 9) +#define KVM_ARM64_EXCEPT_AA64_ELx_IRQ (1 << 9) +#define KVM_ARM64_EXCEPT_AA64_ELx_FIQ (2 << 9) +#define KVM_ARM64_EXCEPT_AA64_ELx_SERR (3 << 9) +#define KVM_ARM64_EXCEPT_AA64_EL1 (0 << 11) +#define KVM_ARM64_EXCEPT_AA64_EL2 (1 << 11) + +/* + * Overlaps with KVM_ARM64_EXCEPT_MASK on purpose so that it can't be + * set together with an exception... + */ +#define KVM_ARM64_INCREMENT_PC (1 << 9) /* Increment PC */ + +#define vcpu_has_sve(vcpu) (system_supports_sve() && \ ((vcpu)->arch.flags & KVM_ARM64_GUEST_HAS_SVE)) #ifdef CONFIG_ARM64_PTR_AUTH @@ -440,14 +432,96 @@ struct kvm_vcpu_arch { u64 vcpu_read_sys_reg(const struct kvm_vcpu *vcpu, int reg); void vcpu_write_sys_reg(struct kvm_vcpu *vcpu, u64 val, int reg); -/* - * CP14 and CP15 live in the same array, as they are backed by the - * same system registers. - */ -#define CPx_BIAS IS_ENABLED(CONFIG_CPU_BIG_ENDIAN) +static inline bool __vcpu_read_sys_reg_from_cpu(int reg, u64 *val) +{ + /* + * *** VHE ONLY *** + * + * System registers listed in the switch are not saved on every + * exit from the guest but are only saved on vcpu_put. + * + * Note that MPIDR_EL1 for the guest is set by KVM via VMPIDR_EL2 but + * should never be listed below, because the guest cannot modify its + * own MPIDR_EL1 and MPIDR_EL1 is accessed for VCPU A from VCPU B's + * thread when emulating cross-VCPU communication. + */ + if (!has_vhe()) + return false; + + switch (reg) { + case CSSELR_EL1: *val = read_sysreg_s(SYS_CSSELR_EL1); break; + case SCTLR_EL1: *val = read_sysreg_s(SYS_SCTLR_EL12); break; + case CPACR_EL1: *val = read_sysreg_s(SYS_CPACR_EL12); break; + case TTBR0_EL1: *val = read_sysreg_s(SYS_TTBR0_EL12); break; + case TTBR1_EL1: *val = read_sysreg_s(SYS_TTBR1_EL12); break; + case TCR_EL1: *val = read_sysreg_s(SYS_TCR_EL12); break; + case ESR_EL1: *val = read_sysreg_s(SYS_ESR_EL12); break; + case AFSR0_EL1: *val = read_sysreg_s(SYS_AFSR0_EL12); break; + case AFSR1_EL1: *val = read_sysreg_s(SYS_AFSR1_EL12); break; + case FAR_EL1: *val = read_sysreg_s(SYS_FAR_EL12); break; + case MAIR_EL1: *val = read_sysreg_s(SYS_MAIR_EL12); break; + case VBAR_EL1: *val = read_sysreg_s(SYS_VBAR_EL12); break; + case CONTEXTIDR_EL1: *val = read_sysreg_s(SYS_CONTEXTIDR_EL12);break; + case TPIDR_EL0: *val = read_sysreg_s(SYS_TPIDR_EL0); break; + case TPIDRRO_EL0: *val = read_sysreg_s(SYS_TPIDRRO_EL0); break; + case TPIDR_EL1: *val = read_sysreg_s(SYS_TPIDR_EL1); break; + case AMAIR_EL1: *val = read_sysreg_s(SYS_AMAIR_EL12); break; + case CNTKCTL_EL1: *val = read_sysreg_s(SYS_CNTKCTL_EL12); break; + case ELR_EL1: *val = read_sysreg_s(SYS_ELR_EL12); break; + case PAR_EL1: *val = read_sysreg_par(); break; + case DACR32_EL2: *val = read_sysreg_s(SYS_DACR32_EL2); break; + case IFSR32_EL2: *val = read_sysreg_s(SYS_IFSR32_EL2); break; + case DBGVCR32_EL2: *val = read_sysreg_s(SYS_DBGVCR32_EL2); break; + default: return false; + } + + return true; +} -#define vcpu_cp14(v,r) ((v)->arch.ctxt.copro[(r) ^ CPx_BIAS]) -#define vcpu_cp15(v,r) ((v)->arch.ctxt.copro[(r) ^ CPx_BIAS]) +static inline bool __vcpu_write_sys_reg_to_cpu(u64 val, int reg) +{ + /* + * *** VHE ONLY *** + * + * System registers listed in the switch are not restored on every + * entry to the guest but are only restored on vcpu_load. + * + * Note that MPIDR_EL1 for the guest is set by KVM via VMPIDR_EL2 but + * should never be listed below, because the MPIDR should only be set + * once, before running the VCPU, and never changed later. + */ + if (!has_vhe()) + return false; + + switch (reg) { + case CSSELR_EL1: write_sysreg_s(val, SYS_CSSELR_EL1); break; + case SCTLR_EL1: write_sysreg_s(val, SYS_SCTLR_EL12); break; + case CPACR_EL1: write_sysreg_s(val, SYS_CPACR_EL12); break; + case TTBR0_EL1: write_sysreg_s(val, SYS_TTBR0_EL12); break; + case TTBR1_EL1: write_sysreg_s(val, SYS_TTBR1_EL12); break; + case TCR_EL1: write_sysreg_s(val, SYS_TCR_EL12); break; + case ESR_EL1: write_sysreg_s(val, SYS_ESR_EL12); break; + case AFSR0_EL1: write_sysreg_s(val, SYS_AFSR0_EL12); break; + case AFSR1_EL1: write_sysreg_s(val, SYS_AFSR1_EL12); break; + case FAR_EL1: write_sysreg_s(val, SYS_FAR_EL12); break; + case MAIR_EL1: write_sysreg_s(val, SYS_MAIR_EL12); break; + case VBAR_EL1: write_sysreg_s(val, SYS_VBAR_EL12); break; + case CONTEXTIDR_EL1: write_sysreg_s(val, SYS_CONTEXTIDR_EL12);break; + case TPIDR_EL0: write_sysreg_s(val, SYS_TPIDR_EL0); break; + case TPIDRRO_EL0: write_sysreg_s(val, SYS_TPIDRRO_EL0); break; + case TPIDR_EL1: write_sysreg_s(val, SYS_TPIDR_EL1); break; + case AMAIR_EL1: write_sysreg_s(val, SYS_AMAIR_EL12); break; + case CNTKCTL_EL1: write_sysreg_s(val, SYS_CNTKCTL_EL12); break; + case ELR_EL1: write_sysreg_s(val, SYS_ELR_EL12); break; + case PAR_EL1: write_sysreg_s(val, SYS_PAR_EL1); break; + case DACR32_EL2: write_sysreg_s(val, SYS_DACR32_EL2); break; + case IFSR32_EL2: write_sysreg_s(val, SYS_IFSR32_EL2); break; + case DBGVCR32_EL2: write_sysreg_s(val, SYS_DBGVCR32_EL2); break; + default: return false; + } + + return true; +} struct kvm_vm_stat { ulong remote_tlb_flush; @@ -473,6 +547,12 @@ unsigned long kvm_arm_num_regs(struct kvm_vcpu *vcpu); int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *indices); int kvm_arm_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg); int kvm_arm_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg); + +unsigned long kvm_arm_num_sys_reg_descs(struct kvm_vcpu *vcpu); +int kvm_arm_copy_sys_reg_indices(struct kvm_vcpu *vcpu, u64 __user *uindices); +int kvm_arm_sys_reg_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *); +int kvm_arm_sys_reg_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *); + int __kvm_arm_vcpu_get_events(struct kvm_vcpu *vcpu, struct kvm_vcpu_events *events); @@ -535,6 +615,17 @@ void kvm_mmu_wp_memory_region(struct kvm *kvm, int slot); int handle_exit(struct kvm_vcpu *vcpu, int exception_index); void handle_exit_early(struct kvm_vcpu *vcpu, int exception_index); +int kvm_handle_cp14_load_store(struct kvm_vcpu *vcpu); +int kvm_handle_cp14_32(struct kvm_vcpu *vcpu); +int kvm_handle_cp14_64(struct kvm_vcpu *vcpu); +int kvm_handle_cp15_32(struct kvm_vcpu *vcpu); +int kvm_handle_cp15_64(struct kvm_vcpu *vcpu); +int kvm_handle_sys_reg(struct kvm_vcpu *vcpu); + +void kvm_reset_sys_regs(struct kvm_vcpu *vcpu); + +void kvm_sys_reg_table_init(void); + /* MMIO helpers */ void kvm_mmio_write_buf(void *buf, unsigned int len, unsigned long data); unsigned long kvm_mmio_read_buf(const void *buf, unsigned int len); @@ -654,4 +745,7 @@ bool kvm_arm_vcpu_is_finalized(struct kvm_vcpu *vcpu); #define kvm_arm_vcpu_sve_finalized(vcpu) \ ((vcpu)->arch.flags & KVM_ARM64_VCPU_SVE_FINALIZED) +#define kvm_vcpu_has_pmu(vcpu) \ + (test_bit(KVM_ARM_VCPU_PMU_V3, (vcpu)->arch.features)) + #endif /* __ARM64_KVM_HOST_H__ */ diff --git a/arch/arm64/include/asm/kvm_hyp.h b/arch/arm64/include/asm/kvm_hyp.h index 6b664de5ec1f..c0450828378b 100644 --- a/arch/arm64/include/asm/kvm_hyp.h +++ b/arch/arm64/include/asm/kvm_hyp.h @@ -14,6 +14,7 @@ DECLARE_PER_CPU(struct kvm_cpu_context, kvm_hyp_ctxt); DECLARE_PER_CPU(unsigned long, kvm_hyp_vector); +DECLARE_PER_CPU(struct kvm_nvhe_init_params, kvm_init_params); #define read_sysreg_elx(r,nvh,vh) \ ({ \ @@ -92,10 +93,11 @@ void deactivate_traps_vhe_put(void); u64 __guest_enter(struct kvm_vcpu *vcpu); +bool kvm_host_psci_handler(struct kvm_cpu_context *host_ctxt); + void __noreturn hyp_panic(void); #ifdef __KVM_NVHE_HYPERVISOR__ void __noreturn __hyp_do_panic(bool restore_host, u64 spsr, u64 elr, u64 par); #endif #endif /* __ARM64_KVM_HYP_H__ */ - diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h index 331394306cce..e52d82aeadca 100644 --- a/arch/arm64/include/asm/kvm_mmu.h +++ b/arch/arm64/include/asm/kvm_mmu.h @@ -72,6 +72,52 @@ alternative_cb kvm_update_va_mask alternative_cb_end .endm +/* + * Convert a kernel image address to a PA + * reg: kernel address to be converted in place + * tmp: temporary register + * + * The actual code generation takes place in kvm_get_kimage_voffset, and + * the instructions below are only there to reserve the space and + * perform the register allocation (kvm_get_kimage_voffset uses the + * specific registers encoded in the instructions). + */ +.macro kimg_pa reg, tmp +alternative_cb kvm_get_kimage_voffset + movz \tmp, #0 + movk \tmp, #0, lsl #16 + movk \tmp, #0, lsl #32 + movk \tmp, #0, lsl #48 +alternative_cb_end + + /* reg = __pa(reg) */ + sub \reg, \reg, \tmp +.endm + +/* + * Convert a kernel image address to a hyp VA + * reg: kernel address to be converted in place + * tmp: temporary register + * + * The actual code generation takes place in kvm_get_kimage_voffset, and + * the instructions below are only there to reserve the space and + * perform the register allocation (kvm_update_kimg_phys_offset uses the + * specific registers encoded in the instructions). + */ +.macro kimg_hyp_va reg, tmp +alternative_cb kvm_update_kimg_phys_offset + movz \tmp, #0 + movk \tmp, #0, lsl #16 + movk \tmp, #0, lsl #32 + movk \tmp, #0, lsl #48 +alternative_cb_end + + sub \reg, \reg, \tmp + mov_q \tmp, PAGE_OFFSET + orr \reg, \reg, \tmp + kern_hyp_va \reg +.endm + #else #include <linux/pgtable.h> @@ -98,6 +144,24 @@ static __always_inline unsigned long __kern_hyp_va(unsigned long v) #define kern_hyp_va(v) ((typeof(v))(__kern_hyp_va((unsigned long)(v)))) +static __always_inline unsigned long __kimg_hyp_va(unsigned long v) +{ + unsigned long offset; + + asm volatile(ALTERNATIVE_CB("movz %0, #0\n" + "movk %0, #0, lsl #16\n" + "movk %0, #0, lsl #32\n" + "movk %0, #0, lsl #48\n", + kvm_update_kimg_phys_offset) + : "=r" (offset)); + + return __kern_hyp_va((v - offset) | PAGE_OFFSET); +} + +#define kimg_fn_hyp_va(v) ((typeof(*v))(__kimg_hyp_va((unsigned long)(v)))) + +#define kimg_fn_ptr(x) (typeof(x) **)(x) + /* * We currently support using a VM-specified IPA size. For backward * compatibility, the default IPA size is fixed to 40bits. @@ -208,52 +272,6 @@ static inline int kvm_write_guest_lock(struct kvm *kvm, gpa_t gpa, return ret; } -/* - * EL2 vectors can be mapped and rerouted in a number of ways, - * depending on the kernel configuration and CPU present: - * - * - If the CPU is affected by Spectre-v2, the hardening sequence is - * placed in one of the vector slots, which is executed before jumping - * to the real vectors. - * - * - If the CPU also has the ARM64_HARDEN_EL2_VECTORS cap, the slot - * containing the hardening sequence is mapped next to the idmap page, - * and executed before jumping to the real vectors. - * - * - If the CPU only has the ARM64_HARDEN_EL2_VECTORS cap, then an - * empty slot is selected, mapped next to the idmap page, and - * executed before jumping to the real vectors. - * - * Note that ARM64_HARDEN_EL2_VECTORS is somewhat incompatible with - * VHE, as we don't have hypervisor-specific mappings. If the system - * is VHE and yet selects this capability, it will be ignored. - */ -extern void *__kvm_bp_vect_base; -extern int __kvm_harden_el2_vector_slot; - -static inline void *kvm_get_hyp_vector(void) -{ - struct bp_hardening_data *data = arm64_get_bp_hardening_data(); - void *vect = kern_hyp_va(kvm_ksym_ref(__kvm_hyp_vector)); - int slot = -1; - - if (cpus_have_const_cap(ARM64_SPECTRE_V2) && data->fn) { - vect = kern_hyp_va(kvm_ksym_ref(__bp_harden_hyp_vecs)); - slot = data->hyp_vectors_slot; - } - - if (this_cpu_has_cap(ARM64_HARDEN_EL2_VECTORS) && !has_vhe()) { - vect = __kvm_bp_vect_base; - if (slot == -1) - slot = __kvm_harden_el2_vector_slot; - } - - if (slot != -1) - vect += slot * SZ_2K; - - return vect; -} - #define kvm_phys_to_vttbr(addr) phys_to_ttbr(addr) static __always_inline u64 kvm_get_vttbr(struct kvm_s2_mmu *mmu) diff --git a/arch/arm64/include/asm/mmu.h b/arch/arm64/include/asm/mmu.h index b2e91c187e2a..75beffe2ee8a 100644 --- a/arch/arm64/include/asm/mmu.h +++ b/arch/arm64/include/asm/mmu.h @@ -12,9 +12,6 @@ #define USER_ASID_FLAG (UL(1) << USER_ASID_BIT) #define TTBR_ASID_MASK (UL(0xffff) << 48) -#define BP_HARDEN_EL2_SLOTS 4 -#define __BP_HARDEN_HYP_VECS_SZ (BP_HARDEN_EL2_SLOTS * SZ_2K) - #ifndef __ASSEMBLY__ #include <linux/refcount.h> @@ -41,32 +38,6 @@ static inline bool arm64_kernel_unmapped_at_el0(void) return cpus_have_const_cap(ARM64_UNMAP_KERNEL_AT_EL0); } -typedef void (*bp_hardening_cb_t)(void); - -struct bp_hardening_data { - int hyp_vectors_slot; - bp_hardening_cb_t fn; -}; - -DECLARE_PER_CPU_READ_MOSTLY(struct bp_hardening_data, bp_hardening_data); - -static inline struct bp_hardening_data *arm64_get_bp_hardening_data(void) -{ - return this_cpu_ptr(&bp_hardening_data); -} - -static inline void arm64_apply_bp_hardening(void) -{ - struct bp_hardening_data *d; - - if (!cpus_have_const_cap(ARM64_SPECTRE_V2)) - return; - - d = arm64_get_bp_hardening_data(); - if (d->fn) - d->fn(); -} - extern void arm64_memblock_init(void); extern void paging_init(void); extern void bootmem_init(void); diff --git a/arch/arm64/include/asm/percpu.h b/arch/arm64/include/asm/percpu.h index 1599e17379d8..8f1661603b78 100644 --- a/arch/arm64/include/asm/percpu.h +++ b/arch/arm64/include/asm/percpu.h @@ -239,6 +239,12 @@ PERCPU_RET_OP(add, add, ldadd) #define this_cpu_cmpxchg_8(pcp, o, n) \ _pcp_protect_return(cmpxchg_relaxed, pcp, o, n) +#ifdef __KVM_NVHE_HYPERVISOR__ +extern unsigned long __hyp_per_cpu_offset(unsigned int cpu); +#define __per_cpu_offset +#define per_cpu_offset(cpu) __hyp_per_cpu_offset((cpu)) +#endif + #include <asm-generic/percpu.h> /* Redefine macros for nVHE hyp under DEBUG_PREEMPT to avoid its dependencies. */ diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h index fce8cbecd6bc..724249f37af5 100644 --- a/arch/arm64/include/asm/processor.h +++ b/arch/arm64/include/asm/processor.h @@ -8,9 +8,6 @@ #ifndef __ASM_PROCESSOR_H #define __ASM_PROCESSOR_H -#define KERNEL_DS UL(-1) -#define USER_DS ((UL(1) << VA_BITS) - 1) - /* * On arm64 systems, unaligned accesses by the CPU are cheap, and so there is * no point in shifting all network buffers by 2 bytes just to make some IP @@ -48,6 +45,7 @@ #define DEFAULT_MAP_WINDOW_64 (UL(1) << VA_BITS_MIN) #define TASK_SIZE_64 (UL(1) << vabits_actual) +#define TASK_SIZE_MAX (UL(1) << VA_BITS) #ifdef CONFIG_COMPAT #if defined(CONFIG_ARM64_64K_PAGES) && defined(CONFIG_KUSER_HELPERS) diff --git a/arch/arm64/include/asm/ptrace.h b/arch/arm64/include/asm/ptrace.h index 997cf8c8cd52..2bb53bc3c326 100644 --- a/arch/arm64/include/asm/ptrace.h +++ b/arch/arm64/include/asm/ptrace.h @@ -16,6 +16,11 @@ #define CurrentEL_EL1 (1 << 2) #define CurrentEL_EL2 (2 << 2) +#define INIT_PSTATE_EL1 \ + (PSR_D_BIT | PSR_A_BIT | PSR_I_BIT | PSR_F_BIT | PSR_MODE_EL1h) +#define INIT_PSTATE_EL2 \ + (PSR_D_BIT | PSR_A_BIT | PSR_I_BIT | PSR_F_BIT | PSR_MODE_EL2h) + /* * PMR values used to mask/unmask interrupts. * @@ -188,8 +193,7 @@ struct pt_regs { s32 syscallno; u32 unused2; #endif - - u64 orig_addr_limit; + u64 sdei_ttbr1; /* Only valid when ARM64_HAS_IRQ_PRIO_MASKING is enabled. */ u64 pmr_save; u64 stackframe[2]; diff --git a/arch/arm64/include/asm/rwonce.h b/arch/arm64/include/asm/rwonce.h new file mode 100644 index 000000000000..1bce62fa908a --- /dev/null +++ b/arch/arm64/include/asm/rwonce.h @@ -0,0 +1,73 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2020 Google LLC. + */ +#ifndef __ASM_RWONCE_H +#define __ASM_RWONCE_H + +#ifdef CONFIG_LTO + +#include <linux/compiler_types.h> +#include <asm/alternative-macros.h> + +#ifndef BUILD_VDSO + +#ifdef CONFIG_AS_HAS_LDAPR +#define __LOAD_RCPC(sfx, regs...) \ + ALTERNATIVE( \ + "ldar" #sfx "\t" #regs, \ + ".arch_extension rcpc\n" \ + "ldapr" #sfx "\t" #regs, \ + ARM64_HAS_LDAPR) +#else +#define __LOAD_RCPC(sfx, regs...) "ldar" #sfx "\t" #regs +#endif /* CONFIG_AS_HAS_LDAPR */ + +/* + * When building with LTO, there is an increased risk of the compiler + * converting an address dependency headed by a READ_ONCE() invocation + * into a control dependency and consequently allowing for harmful + * reordering by the CPU. + * + * Ensure that such transformations are harmless by overriding the generic + * READ_ONCE() definition with one that provides RCpc acquire semantics + * when building with LTO. + */ +#define __READ_ONCE(x) \ +({ \ + typeof(&(x)) __x = &(x); \ + int atomic = 1; \ + union { __unqual_scalar_typeof(*__x) __val; char __c[1]; } __u; \ + switch (sizeof(x)) { \ + case 1: \ + asm volatile(__LOAD_RCPC(b, %w0, %1) \ + : "=r" (*(__u8 *)__u.__c) \ + : "Q" (*__x) : "memory"); \ + break; \ + case 2: \ + asm volatile(__LOAD_RCPC(h, %w0, %1) \ + : "=r" (*(__u16 *)__u.__c) \ + : "Q" (*__x) : "memory"); \ + break; \ + case 4: \ + asm volatile(__LOAD_RCPC(, %w0, %1) \ + : "=r" (*(__u32 *)__u.__c) \ + : "Q" (*__x) : "memory"); \ + break; \ + case 8: \ + asm volatile(__LOAD_RCPC(, %0, %1) \ + : "=r" (*(__u64 *)__u.__c) \ + : "Q" (*__x) : "memory"); \ + break; \ + default: \ + atomic = 0; \ + } \ + atomic ? (typeof(*__x))__u.__val : (*(volatile typeof(__x))__x);\ +}) + +#endif /* !BUILD_VDSO */ +#endif /* CONFIG_LTO */ + +#include <asm-generic/rwonce.h> + +#endif /* __ASM_RWONCE_H */ diff --git a/arch/arm64/include/asm/sections.h b/arch/arm64/include/asm/sections.h index 3994169985ef..8ff579361731 100644 --- a/arch/arm64/include/asm/sections.h +++ b/arch/arm64/include/asm/sections.h @@ -11,6 +11,7 @@ extern char __alt_instructions[], __alt_instructions_end[]; extern char __hibernate_exit_text_start[], __hibernate_exit_text_end[]; extern char __hyp_idmap_text_start[], __hyp_idmap_text_end[]; extern char __hyp_text_start[], __hyp_text_end[]; +extern char __hyp_data_ro_after_init_start[], __hyp_data_ro_after_init_end[]; extern char __idmap_text_start[], __idmap_text_end[]; extern char __initdata_begin[], __initdata_end[]; extern char __inittext_begin[], __inittext_end[]; diff --git a/arch/arm64/include/asm/smp.h b/arch/arm64/include/asm/smp.h index 2e7f529ec5a6..bcb01ca15325 100644 --- a/arch/arm64/include/asm/smp.h +++ b/arch/arm64/include/asm/smp.h @@ -46,9 +46,9 @@ DECLARE_PER_CPU_READ_MOSTLY(int, cpu_number); * Logical CPU mapping. */ extern u64 __cpu_logical_map[NR_CPUS]; -extern u64 cpu_logical_map(int cpu); +extern u64 cpu_logical_map(unsigned int cpu); -static inline void set_cpu_logical_map(int cpu, u64 hwid) +static inline void set_cpu_logical_map(unsigned int cpu, u64 hwid) { __cpu_logical_map[cpu] = hwid; } diff --git a/arch/arm64/include/asm/spectre.h b/arch/arm64/include/asm/spectre.h index fcdfbce302bd..f62ca39da6c5 100644 --- a/arch/arm64/include/asm/spectre.h +++ b/arch/arm64/include/asm/spectre.h @@ -9,7 +9,15 @@ #ifndef __ASM_SPECTRE_H #define __ASM_SPECTRE_H +#define BP_HARDEN_EL2_SLOTS 4 +#define __BP_HARDEN_HYP_VECS_SZ ((BP_HARDEN_EL2_SLOTS - 1) * SZ_2K) + +#ifndef __ASSEMBLY__ + +#include <linux/percpu.h> + #include <asm/cpufeature.h> +#include <asm/virt.h> /* Watch out, ordering is important here. */ enum mitigation_state { @@ -20,13 +28,70 @@ enum mitigation_state { struct task_struct; +/* + * Note: the order of this enum corresponds to __bp_harden_hyp_vecs and + * we rely on having the direct vectors first. + */ +enum arm64_hyp_spectre_vector { + /* + * Take exceptions directly to __kvm_hyp_vector. This must be + * 0 so that it used by default when mitigations are not needed. + */ + HYP_VECTOR_DIRECT, + + /* + * Bounce via a slot in the hypervisor text mapping of + * __bp_harden_hyp_vecs, which contains an SMC call. + */ + HYP_VECTOR_SPECTRE_DIRECT, + + /* + * Bounce via a slot in a special mapping of __bp_harden_hyp_vecs + * next to the idmap page. + */ + HYP_VECTOR_INDIRECT, + + /* + * Bounce via a slot in a special mapping of __bp_harden_hyp_vecs + * next to the idmap page, which contains an SMC call. + */ + HYP_VECTOR_SPECTRE_INDIRECT, +}; + +typedef void (*bp_hardening_cb_t)(void); + +struct bp_hardening_data { + enum arm64_hyp_spectre_vector slot; + bp_hardening_cb_t fn; +}; + +DECLARE_PER_CPU_READ_MOSTLY(struct bp_hardening_data, bp_hardening_data); + +static inline void arm64_apply_bp_hardening(void) +{ + struct bp_hardening_data *d; + + if (!cpus_have_const_cap(ARM64_SPECTRE_V2)) + return; + + d = this_cpu_ptr(&bp_hardening_data); + if (d->fn) + d->fn(); +} + enum mitigation_state arm64_get_spectre_v2_state(void); bool has_spectre_v2(const struct arm64_cpu_capabilities *cap, int scope); void spectre_v2_enable_mitigation(const struct arm64_cpu_capabilities *__unused); +bool has_spectre_v3a(const struct arm64_cpu_capabilities *cap, int scope); +void spectre_v3a_enable_mitigation(const struct arm64_cpu_capabilities *__unused); + enum mitigation_state arm64_get_spectre_v4_state(void); bool has_spectre_v4(const struct arm64_cpu_capabilities *cap, int scope); void spectre_v4_enable_mitigation(const struct arm64_cpu_capabilities *__unused); void spectre_v4_enable_task_mitigation(struct task_struct *tsk); +enum mitigation_state arm64_get_meltdown_state(void); + +#endif /* __ASSEMBLY__ */ #endif /* __ASM_SPECTRE_H */ diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index e2ef4c2edf06..82521cdbfc1c 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -98,6 +98,10 @@ #define SET_PSTATE_SSBS(x) __emit_inst(0xd500401f | PSTATE_SSBS | ((!!x) << PSTATE_Imm_shift)) #define SET_PSTATE_TCO(x) __emit_inst(0xd500401f | PSTATE_TCO | ((!!x) << PSTATE_Imm_shift)) +#define set_pstate_pan(x) asm volatile(SET_PSTATE_PAN(x)) +#define set_pstate_uao(x) asm volatile(SET_PSTATE_UAO(x)) +#define set_pstate_ssbs(x) asm volatile(SET_PSTATE_SSBS(x)) + #define __SYS_BARRIER_INSN(CRm, op2, Rt) \ __emit_inst(0xd5000000 | sys_insn(0, 3, 3, (CRm), (op2)) | ((Rt) & 0x1f)) @@ -465,6 +469,7 @@ #define SYS_PMCCFILTR_EL0 sys_reg(3, 3, 14, 15, 7) +#define SYS_SCTLR_EL2 sys_reg(3, 4, 1, 0, 0) #define SYS_ZCR_EL2 sys_reg(3, 4, 1, 2, 0) #define SYS_DACR32_EL2 sys_reg(3, 4, 3, 0, 0) #define SYS_SPSR_EL2 sys_reg(3, 4, 4, 0, 0) @@ -582,6 +587,9 @@ #define ENDIAN_SET_EL2 0 #endif +#define INIT_SCTLR_EL2_MMU_OFF \ + (SCTLR_EL2_RES1 | ENDIAN_SET_EL2) + /* SCTLR_EL1 specific flags. */ #define SCTLR_EL1_ATA0 (BIT(42)) @@ -615,12 +623,15 @@ #define ENDIAN_SET_EL1 0 #endif -#define SCTLR_EL1_SET (SCTLR_ELx_M | SCTLR_ELx_C | SCTLR_ELx_SA |\ - SCTLR_EL1_SA0 | SCTLR_EL1_SED | SCTLR_ELx_I |\ - SCTLR_EL1_DZE | SCTLR_EL1_UCT |\ - SCTLR_EL1_NTWE | SCTLR_ELx_IESB | SCTLR_EL1_SPAN |\ - SCTLR_ELx_ITFSB| SCTLR_ELx_ATA | SCTLR_EL1_ATA0 |\ - ENDIAN_SET_EL1 | SCTLR_EL1_UCI | SCTLR_EL1_RES1) +#define INIT_SCTLR_EL1_MMU_OFF \ + (ENDIAN_SET_EL1 | SCTLR_EL1_RES1) + +#define INIT_SCTLR_EL1_MMU_ON \ + (SCTLR_ELx_M | SCTLR_ELx_C | SCTLR_ELx_SA | SCTLR_EL1_SA0 | \ + SCTLR_EL1_SED | SCTLR_ELx_I | SCTLR_EL1_DZE | SCTLR_EL1_UCT | \ + SCTLR_EL1_NTWE | SCTLR_ELx_IESB | SCTLR_EL1_SPAN | SCTLR_ELx_ITFSB | \ + SCTLR_ELx_ATA | SCTLR_EL1_ATA0 | ENDIAN_SET_EL1 | SCTLR_EL1_UCI | \ + SCTLR_EL1_RES1) /* MAIR_ELx memory attributes (used by Linux) */ #define MAIR_ATTR_DEVICE_nGnRnE UL(0x00) diff --git a/arch/arm64/include/asm/thread_info.h b/arch/arm64/include/asm/thread_info.h index 1fbab854a51b..015beafe58f5 100644 --- a/arch/arm64/include/asm/thread_info.h +++ b/arch/arm64/include/asm/thread_info.h @@ -18,14 +18,11 @@ struct task_struct; #include <asm/stack_pointer.h> #include <asm/types.h> -typedef unsigned long mm_segment_t; - /* * low level task data that entry.S needs immediate access to. */ struct thread_info { unsigned long flags; /* low level flags */ - mm_segment_t addr_limit; /* address limit */ #ifdef CONFIG_ARM64_SW_TTBR0_PAN u64 ttbr0; /* saved TTBR0_EL1 */ #endif @@ -66,8 +63,7 @@ void arch_release_task_struct(struct task_struct *tsk); #define TIF_NOTIFY_RESUME 2 /* callback before returning to user */ #define TIF_FOREIGN_FPSTATE 3 /* CPU's FP state is not current's */ #define TIF_UPROBE 4 /* uprobe breakpoint or singlestep */ -#define TIF_FSCHECK 5 /* Check FS is USER_DS on return */ -#define TIF_MTE_ASYNC_FAULT 6 /* MTE Asynchronous Tag Check Fault */ +#define TIF_MTE_ASYNC_FAULT 5 /* MTE Asynchronous Tag Check Fault */ #define TIF_SYSCALL_TRACE 8 /* syscall trace active */ #define TIF_SYSCALL_AUDIT 9 /* syscall auditing */ #define TIF_SYSCALL_TRACEPOINT 10 /* syscall tracepoint for ftrace */ @@ -93,7 +89,6 @@ void arch_release_task_struct(struct task_struct *tsk); #define _TIF_SECCOMP (1 << TIF_SECCOMP) #define _TIF_SYSCALL_EMU (1 << TIF_SYSCALL_EMU) #define _TIF_UPROBE (1 << TIF_UPROBE) -#define _TIF_FSCHECK (1 << TIF_FSCHECK) #define _TIF_SINGLESTEP (1 << TIF_SINGLESTEP) #define _TIF_32BIT (1 << TIF_32BIT) #define _TIF_SVE (1 << TIF_SVE) @@ -101,7 +96,7 @@ void arch_release_task_struct(struct task_struct *tsk); #define _TIF_WORK_MASK (_TIF_NEED_RESCHED | _TIF_SIGPENDING | \ _TIF_NOTIFY_RESUME | _TIF_FOREIGN_FPSTATE | \ - _TIF_UPROBE | _TIF_FSCHECK | _TIF_MTE_ASYNC_FAULT) + _TIF_UPROBE | _TIF_MTE_ASYNC_FAULT) #define _TIF_SYSCALL_WORK (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | \ _TIF_SYSCALL_TRACEPOINT | _TIF_SECCOMP | \ @@ -119,7 +114,6 @@ void arch_release_task_struct(struct task_struct *tsk); { \ .flags = _TIF_FOREIGN_FPSTATE, \ .preempt_count = INIT_PREEMPT_COUNT, \ - .addr_limit = KERNEL_DS, \ INIT_SCS \ } diff --git a/arch/arm64/include/asm/uaccess.h b/arch/arm64/include/asm/uaccess.h index 991dd5f031e4..769cad7b4910 100644 --- a/arch/arm64/include/asm/uaccess.h +++ b/arch/arm64/include/asm/uaccess.h @@ -24,44 +24,18 @@ #include <asm/memory.h> #include <asm/extable.h> -#define get_fs() (current_thread_info()->addr_limit) - -static inline void set_fs(mm_segment_t fs) -{ - current_thread_info()->addr_limit = fs; - - /* - * Prevent a mispredicted conditional call to set_fs from forwarding - * the wrong address limit to access_ok under speculation. - */ - spec_bar(); - - /* On user-mode return, check fs is correct */ - set_thread_flag(TIF_FSCHECK); - - /* - * Enable/disable UAO so that copy_to_user() etc can access - * kernel memory with the unprivileged instructions. - */ - if (IS_ENABLED(CONFIG_ARM64_UAO) && fs == KERNEL_DS) - asm(ALTERNATIVE("nop", SET_PSTATE_UAO(1), ARM64_HAS_UAO)); - else - asm(ALTERNATIVE("nop", SET_PSTATE_UAO(0), ARM64_HAS_UAO, - CONFIG_ARM64_UAO)); -} - -#define uaccess_kernel() (get_fs() == KERNEL_DS) +#define HAVE_GET_KERNEL_NOFAULT /* * Test whether a block of memory is a valid user space address. * Returns 1 if the range is valid, 0 otherwise. * * This is equivalent to the following test: - * (u65)addr + (u65)size <= (u65)current->addr_limit + 1 + * (u65)addr + (u65)size <= (u65)TASK_SIZE_MAX */ static inline unsigned long __range_ok(const void __user *addr, unsigned long size) { - unsigned long ret, limit = current_thread_info()->addr_limit; + unsigned long ret, limit = TASK_SIZE_MAX - 1; /* * Asynchronous I/O running in a kernel thread does not have the @@ -94,7 +68,6 @@ static inline unsigned long __range_ok(const void __user *addr, unsigned long si } #define access_ok(addr, size) __range_ok(addr, size) -#define user_addr_max get_fs #define _ASM_EXTABLE(from, to) \ " .pushsection __ex_table, \"a\"\n" \ @@ -186,47 +159,26 @@ static inline void __uaccess_enable_hw_pan(void) CONFIG_ARM64_PAN)); } -#define __uaccess_disable(alt) \ -do { \ - if (!uaccess_ttbr0_disable()) \ - asm(ALTERNATIVE("nop", SET_PSTATE_PAN(1), alt, \ - CONFIG_ARM64_PAN)); \ -} while (0) - -#define __uaccess_enable(alt) \ -do { \ - if (!uaccess_ttbr0_enable()) \ - asm(ALTERNATIVE("nop", SET_PSTATE_PAN(0), alt, \ - CONFIG_ARM64_PAN)); \ -} while (0) - -static inline void uaccess_disable(void) +static inline void uaccess_disable_privileged(void) { - __uaccess_disable(ARM64_HAS_PAN); -} + if (uaccess_ttbr0_disable()) + return; -static inline void uaccess_enable(void) -{ - __uaccess_enable(ARM64_HAS_PAN); + __uaccess_enable_hw_pan(); } -/* - * These functions are no-ops when UAO is present. - */ -static inline void uaccess_disable_not_uao(void) +static inline void uaccess_enable_privileged(void) { - __uaccess_disable(ARM64_ALT_PAN_NOT_UAO); -} + if (uaccess_ttbr0_enable()) + return; -static inline void uaccess_enable_not_uao(void) -{ - __uaccess_enable(ARM64_ALT_PAN_NOT_UAO); + __uaccess_disable_hw_pan(); } /* - * Sanitise a uaccess pointer such that it becomes NULL if above the - * current addr_limit. In case the pointer is tagged (has the top byte set), - * untag the pointer before checking. + * Sanitise a uaccess pointer such that it becomes NULL if above the maximum + * user address. In case the pointer is tagged (has the top byte set), untag + * the pointer before checking. */ #define uaccess_mask_ptr(ptr) (__typeof__(ptr))__uaccess_mask_ptr(ptr) static inline void __user *__uaccess_mask_ptr(const void __user *ptr) @@ -237,7 +189,7 @@ static inline void __user *__uaccess_mask_ptr(const void __user *ptr) " bics xzr, %3, %2\n" " csel %0, %1, xzr, eq\n" : "=&r" (safe_ptr) - : "r" (ptr), "r" (current_thread_info()->addr_limit), + : "r" (ptr), "r" (TASK_SIZE_MAX - 1), "r" (untagged_addr(ptr)) : "cc"); @@ -253,10 +205,9 @@ static inline void __user *__uaccess_mask_ptr(const void __user *ptr) * The "__xxx_error" versions set the third argument to -EFAULT if an error * occurs, and leave it unchanged on success. */ -#define __get_user_asm(instr, alt_instr, reg, x, addr, err, feature) \ +#define __get_mem_asm(load, reg, x, addr, err) \ asm volatile( \ - "1:"ALTERNATIVE(instr " " reg "1, [%2]\n", \ - alt_instr " " reg "1, [%2]\n", feature) \ + "1: " load " " reg "1, [%2]\n" \ "2:\n" \ " .section .fixup, \"ax\"\n" \ " .align 2\n" \ @@ -268,35 +219,36 @@ static inline void __user *__uaccess_mask_ptr(const void __user *ptr) : "+r" (err), "=&r" (x) \ : "r" (addr), "i" (-EFAULT)) -#define __raw_get_user(x, ptr, err) \ +#define __raw_get_mem(ldr, x, ptr, err) \ do { \ unsigned long __gu_val; \ - __chk_user_ptr(ptr); \ - uaccess_enable_not_uao(); \ switch (sizeof(*(ptr))) { \ case 1: \ - __get_user_asm("ldrb", "ldtrb", "%w", __gu_val, (ptr), \ - (err), ARM64_HAS_UAO); \ + __get_mem_asm(ldr "b", "%w", __gu_val, (ptr), (err)); \ break; \ case 2: \ - __get_user_asm("ldrh", "ldtrh", "%w", __gu_val, (ptr), \ - (err), ARM64_HAS_UAO); \ + __get_mem_asm(ldr "h", "%w", __gu_val, (ptr), (err)); \ break; \ case 4: \ - __get_user_asm("ldr", "ldtr", "%w", __gu_val, (ptr), \ - (err), ARM64_HAS_UAO); \ + __get_mem_asm(ldr, "%w", __gu_val, (ptr), (err)); \ break; \ case 8: \ - __get_user_asm("ldr", "ldtr", "%x", __gu_val, (ptr), \ - (err), ARM64_HAS_UAO); \ + __get_mem_asm(ldr, "%x", __gu_val, (ptr), (err)); \ break; \ default: \ BUILD_BUG(); \ } \ - uaccess_disable_not_uao(); \ (x) = (__force __typeof__(*(ptr)))__gu_val; \ } while (0) +#define __raw_get_user(x, ptr, err) \ +do { \ + __chk_user_ptr(ptr); \ + uaccess_ttbr0_enable(); \ + __raw_get_mem("ldtr", x, ptr, err); \ + uaccess_ttbr0_disable(); \ +} while (0) + #define __get_user_error(x, ptr, err) \ do { \ __typeof__(*(ptr)) __user *__p = (ptr); \ @@ -318,10 +270,19 @@ do { \ #define get_user __get_user -#define __put_user_asm(instr, alt_instr, reg, x, addr, err, feature) \ +#define __get_kernel_nofault(dst, src, type, err_label) \ +do { \ + int __gkn_err = 0; \ + \ + __raw_get_mem("ldr", *((type *)(dst)), \ + (__force type *)(src), __gkn_err); \ + if (unlikely(__gkn_err)) \ + goto err_label; \ +} while (0) + +#define __put_mem_asm(store, reg, x, addr, err) \ asm volatile( \ - "1:"ALTERNATIVE(instr " " reg "1, [%2]\n", \ - alt_instr " " reg "1, [%2]\n", feature) \ + "1: " store " " reg "1, [%2]\n" \ "2:\n" \ " .section .fixup,\"ax\"\n" \ " .align 2\n" \ @@ -332,32 +293,33 @@ do { \ : "+r" (err) \ : "r" (x), "r" (addr), "i" (-EFAULT)) -#define __raw_put_user(x, ptr, err) \ +#define __raw_put_mem(str, x, ptr, err) \ do { \ __typeof__(*(ptr)) __pu_val = (x); \ - __chk_user_ptr(ptr); \ - uaccess_enable_not_uao(); \ switch (sizeof(*(ptr))) { \ case 1: \ - __put_user_asm("strb", "sttrb", "%w", __pu_val, (ptr), \ - (err), ARM64_HAS_UAO); \ + __put_mem_asm(str "b", "%w", __pu_val, (ptr), (err)); \ break; \ case 2: \ - __put_user_asm("strh", "sttrh", "%w", __pu_val, (ptr), \ - (err), ARM64_HAS_UAO); \ + __put_mem_asm(str "h", "%w", __pu_val, (ptr), (err)); \ break; \ case 4: \ - __put_user_asm("str", "sttr", "%w", __pu_val, (ptr), \ - (err), ARM64_HAS_UAO); \ + __put_mem_asm(str, "%w", __pu_val, (ptr), (err)); \ break; \ case 8: \ - __put_user_asm("str", "sttr", "%x", __pu_val, (ptr), \ - (err), ARM64_HAS_UAO); \ + __put_mem_asm(str, "%x", __pu_val, (ptr), (err)); \ break; \ default: \ BUILD_BUG(); \ } \ - uaccess_disable_not_uao(); \ +} while (0) + +#define __raw_put_user(x, ptr, err) \ +do { \ + __chk_user_ptr(ptr); \ + uaccess_ttbr0_enable(); \ + __raw_put_mem("sttr", x, ptr, err); \ + uaccess_ttbr0_disable(); \ } while (0) #define __put_user_error(x, ptr, err) \ @@ -381,14 +343,24 @@ do { \ #define put_user __put_user +#define __put_kernel_nofault(dst, src, type, err_label) \ +do { \ + int __pkn_err = 0; \ + \ + __raw_put_mem("str", *((type *)(src)), \ + (__force type *)(dst), __pkn_err); \ + if (unlikely(__pkn_err)) \ + goto err_label; \ +} while(0) + extern unsigned long __must_check __arch_copy_from_user(void *to, const void __user *from, unsigned long n); #define raw_copy_from_user(to, from, n) \ ({ \ unsigned long __acfu_ret; \ - uaccess_enable_not_uao(); \ + uaccess_ttbr0_enable(); \ __acfu_ret = __arch_copy_from_user((to), \ __uaccess_mask_ptr(from), (n)); \ - uaccess_disable_not_uao(); \ + uaccess_ttbr0_disable(); \ __acfu_ret; \ }) @@ -396,10 +368,10 @@ extern unsigned long __must_check __arch_copy_to_user(void __user *to, const voi #define raw_copy_to_user(to, from, n) \ ({ \ unsigned long __actu_ret; \ - uaccess_enable_not_uao(); \ + uaccess_ttbr0_enable(); \ __actu_ret = __arch_copy_to_user(__uaccess_mask_ptr(to), \ (from), (n)); \ - uaccess_disable_not_uao(); \ + uaccess_ttbr0_disable(); \ __actu_ret; \ }) @@ -407,10 +379,10 @@ extern unsigned long __must_check __arch_copy_in_user(void __user *to, const voi #define raw_copy_in_user(to, from, n) \ ({ \ unsigned long __aciu_ret; \ - uaccess_enable_not_uao(); \ + uaccess_ttbr0_enable(); \ __aciu_ret = __arch_copy_in_user(__uaccess_mask_ptr(to), \ __uaccess_mask_ptr(from), (n)); \ - uaccess_disable_not_uao(); \ + uaccess_ttbr0_disable(); \ __aciu_ret; \ }) @@ -421,9 +393,9 @@ extern unsigned long __must_check __arch_clear_user(void __user *to, unsigned lo static inline unsigned long __must_check __clear_user(void __user *to, unsigned long n) { if (access_ok(to, n)) { - uaccess_enable_not_uao(); + uaccess_ttbr0_enable(); n = __arch_clear_user(__uaccess_mask_ptr(to), n); - uaccess_disable_not_uao(); + uaccess_ttbr0_disable(); } return n; } diff --git a/arch/arm64/include/asm/virt.h b/arch/arm64/include/asm/virt.h index 6069be50baf9..ee6a48df89d9 100644 --- a/arch/arm64/include/asm/virt.h +++ b/arch/arm64/include/asm/virt.h @@ -65,9 +65,19 @@ extern u32 __boot_cpu_mode[2]; void __hyp_set_vectors(phys_addr_t phys_vector_base); void __hyp_reset_vectors(void); +DECLARE_STATIC_KEY_FALSE(kvm_protected_mode_initialized); + /* Reports the availability of HYP mode */ static inline bool is_hyp_mode_available(void) { + /* + * If KVM protected mode is initialized, all CPUs must have been booted + * in EL2. Avoid checking __boot_cpu_mode as CPUs now come up in EL1. + */ + if (IS_ENABLED(CONFIG_KVM) && + static_branch_likely(&kvm_protected_mode_initialized)) + return true; + return (__boot_cpu_mode[0] == BOOT_CPU_MODE_EL2 && __boot_cpu_mode[1] == BOOT_CPU_MODE_EL2); } @@ -75,6 +85,14 @@ static inline bool is_hyp_mode_available(void) /* Check if the bootloader has booted CPUs in different modes */ static inline bool is_hyp_mode_mismatched(void) { + /* + * If KVM protected mode is initialized, all CPUs must have been booted + * in EL2. Avoid checking __boot_cpu_mode as CPUs now come up in EL1. + */ + if (IS_ENABLED(CONFIG_KVM) && + static_branch_likely(&kvm_protected_mode_initialized)) + return false; + return __boot_cpu_mode[0] != __boot_cpu_mode[1]; } @@ -97,6 +115,14 @@ static __always_inline bool has_vhe(void) return cpus_have_final_cap(ARM64_HAS_VIRT_HOST_EXTN); } +static __always_inline bool is_protected_kvm_enabled(void) +{ + if (is_vhe_hyp_code()) + return false; + else + return cpus_have_final_cap(ARM64_KVM_PROTECTED_MODE); +} + #endif /* __ASSEMBLY__ */ #endif /* ! __ASM__VIRT_H */ diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h index 1c17c3a24411..24223adae150 100644 --- a/arch/arm64/include/uapi/asm/kvm.h +++ b/arch/arm64/include/uapi/asm/kvm.h @@ -156,9 +156,6 @@ struct kvm_sync_regs { __u64 device_irq_level; }; -struct kvm_arch_memory_slot { -}; - /* * PMU filter structure. Describe a range of events with a particular * action. To be used with KVM_ARM_VCPU_PMU_V3_FILTER. diff --git a/arch/arm64/kernel/alternative.c b/arch/arm64/kernel/alternative.c index 73039949b5ce..a57cffb752e8 100644 --- a/arch/arm64/kernel/alternative.c +++ b/arch/arm64/kernel/alternative.c @@ -21,7 +21,8 @@ #define ALT_ORIG_PTR(a) __ALT_PTR(a, orig_offset) #define ALT_REPL_PTR(a) __ALT_PTR(a, alt_offset) -static int all_alternatives_applied; +/* Volatile, as we may be patching the guts of READ_ONCE() */ +static volatile int all_alternatives_applied; static DECLARE_BITMAP(applied_alternatives, ARM64_NCAPS); @@ -205,7 +206,7 @@ static int __apply_alternatives_multi_stop(void *unused) /* We always have a CPU 0 at this point (__init) */ if (smp_processor_id()) { - while (!READ_ONCE(all_alternatives_applied)) + while (!all_alternatives_applied) cpu_relax(); isb(); } else { @@ -217,7 +218,7 @@ static int __apply_alternatives_multi_stop(void *unused) BUG_ON(all_alternatives_applied); __apply_alternatives(®ion, false, remaining_capabilities); /* Barriers provided by the cache flushing */ - WRITE_ONCE(all_alternatives_applied, 1); + all_alternatives_applied = 1; } return 0; diff --git a/arch/arm64/kernel/armv8_deprecated.c b/arch/arm64/kernel/armv8_deprecated.c index 7364de008bab..0e86e8b9cedd 100644 --- a/arch/arm64/kernel/armv8_deprecated.c +++ b/arch/arm64/kernel/armv8_deprecated.c @@ -277,7 +277,7 @@ static void __init register_insn_emulation_sysctl(void) #define __user_swpX_asm(data, addr, res, temp, temp2, B) \ do { \ - uaccess_enable(); \ + uaccess_enable_privileged(); \ __asm__ __volatile__( \ " mov %w3, %w7\n" \ "0: ldxr"B" %w2, [%4]\n" \ @@ -302,7 +302,7 @@ do { \ "i" (-EFAULT), \ "i" (__SWP_LL_SC_LOOPS) \ : "memory"); \ - uaccess_disable(); \ + uaccess_disable_privileged(); \ } while (0) #define __user_swp_asm(data, addr, res, temp, temp2) \ diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c index 7d32fc959b1a..5e82488f1b82 100644 --- a/arch/arm64/kernel/asm-offsets.c +++ b/arch/arm64/kernel/asm-offsets.c @@ -30,7 +30,6 @@ int main(void) BLANK(); DEFINE(TSK_TI_FLAGS, offsetof(struct task_struct, thread_info.flags)); DEFINE(TSK_TI_PREEMPT, offsetof(struct task_struct, thread_info.preempt_count)); - DEFINE(TSK_TI_ADDR_LIMIT, offsetof(struct task_struct, thread_info.addr_limit)); #ifdef CONFIG_ARM64_SW_TTBR0_PAN DEFINE(TSK_TI_TTBR0, offsetof(struct task_struct, thread_info.ttbr0)); #endif @@ -70,7 +69,7 @@ int main(void) DEFINE(S_PSTATE, offsetof(struct pt_regs, pstate)); DEFINE(S_PC, offsetof(struct pt_regs, pc)); DEFINE(S_SYSCALLNO, offsetof(struct pt_regs, syscallno)); - DEFINE(S_ORIG_ADDR_LIMIT, offsetof(struct pt_regs, orig_addr_limit)); + DEFINE(S_SDEI_TTBR1, offsetof(struct pt_regs, sdei_ttbr1)); DEFINE(S_PMR_SAVE, offsetof(struct pt_regs, pmr_save)); DEFINE(S_STACKFRAME, offsetof(struct pt_regs, stackframe)); DEFINE(S_FRAME_SIZE, sizeof(struct pt_regs)); @@ -110,6 +109,11 @@ int main(void) DEFINE(CPU_APGAKEYLO_EL1, offsetof(struct kvm_cpu_context, sys_regs[APGAKEYLO_EL1])); DEFINE(HOST_CONTEXT_VCPU, offsetof(struct kvm_cpu_context, __hyp_running_vcpu)); DEFINE(HOST_DATA_CONTEXT, offsetof(struct kvm_host_data, host_ctxt)); + DEFINE(NVHE_INIT_MAIR_EL2, offsetof(struct kvm_nvhe_init_params, mair_el2)); + DEFINE(NVHE_INIT_TCR_EL2, offsetof(struct kvm_nvhe_init_params, tcr_el2)); + DEFINE(NVHE_INIT_TPIDR_EL2, offsetof(struct kvm_nvhe_init_params, tpidr_el2)); + DEFINE(NVHE_INIT_STACK_HYP_VA, offsetof(struct kvm_nvhe_init_params, stack_hyp_va)); + DEFINE(NVHE_INIT_PGD_PA, offsetof(struct kvm_nvhe_init_params, pgd_pa)); #endif #ifdef CONFIG_CPU_PM DEFINE(CPU_CTX_SP, offsetof(struct cpu_suspend_ctx, sp)); diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index 61314fd70f13..a63428301f42 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -196,16 +196,6 @@ has_neoverse_n1_erratum_1542419(const struct arm64_cpu_capabilities *entry, return is_midr_in_range(midr, &range) && has_dic; } -#ifdef CONFIG_RANDOMIZE_BASE - -static const struct midr_range ca57_a72[] = { - MIDR_ALL_VERSIONS(MIDR_CORTEX_A57), - MIDR_ALL_VERSIONS(MIDR_CORTEX_A72), - {}, -}; - -#endif - #ifdef CONFIG_ARM64_WORKAROUND_REPEAT_TLBI static const struct arm64_cpu_capabilities arm64_repeat_tlbi_list[] = { #ifdef CONFIG_QCOM_FALKOR_ERRATUM_1009 @@ -299,6 +289,8 @@ static const struct midr_range erratum_845719_list[] = { MIDR_REV_RANGE(MIDR_CORTEX_A53, 0, 0, 4), /* Brahma-B53 r0p[0] */ MIDR_REV(MIDR_BRAHMA_B53, 0, 0), + /* Kryo2XX Silver rAp4 */ + MIDR_REV(MIDR_QCOM_KRYO_2XX_SILVER, 0xa, 0x4), {}, }; #endif @@ -459,9 +451,12 @@ const struct arm64_cpu_capabilities arm64_errata[] = { }, #ifdef CONFIG_RANDOMIZE_BASE { - .desc = "EL2 vector hardening", - .capability = ARM64_HARDEN_EL2_VECTORS, - ERRATA_MIDR_RANGE_LIST(ca57_a72), + /* Must come after the Spectre-v2 entry */ + .desc = "Spectre-v3a", + .capability = ARM64_SPECTRE_V3A, + .type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM, + .matches = has_spectre_v3a, + .cpu_enable = spectre_v3a_enable_mitigation, }, #endif { diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index dcc165b3fc04..d96f4554282d 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -74,6 +74,7 @@ #include <asm/cpufeature.h> #include <asm/cpu_ops.h> #include <asm/fpsimd.h> +#include <asm/kvm_host.h> #include <asm/mmu_context.h> #include <asm/mte.h> #include <asm/processor.h> @@ -153,10 +154,6 @@ EXPORT_SYMBOL(cpu_hwcap_keys); .width = 0, \ } -/* meta feature for alternatives */ -static bool __maybe_unused -cpufeature_pan_not_uao(const struct arm64_cpu_capabilities *entry, int __unused); - static void cpu_enable_cnp(struct arm64_cpu_capabilities const *cap); static bool __system_matches_cap(unsigned int n); @@ -1337,6 +1334,8 @@ static bool unmap_kernel_at_el0(const struct arm64_cpu_capabilities *entry, MIDR_ALL_VERSIONS(MIDR_CORTEX_A73), MIDR_ALL_VERSIONS(MIDR_HISI_TSV110), MIDR_ALL_VERSIONS(MIDR_NVIDIA_CARMEL), + MIDR_ALL_VERSIONS(MIDR_QCOM_KRYO_2XX_GOLD), + MIDR_ALL_VERSIONS(MIDR_QCOM_KRYO_2XX_SILVER), MIDR_ALL_VERSIONS(MIDR_QCOM_KRYO_3XX_SILVER), MIDR_ALL_VERSIONS(MIDR_QCOM_KRYO_4XX_SILVER), { /* sentinel */ } @@ -1598,7 +1597,7 @@ static void cpu_enable_pan(const struct arm64_cpu_capabilities *__unused) WARN_ON_ONCE(in_interrupt()); sysreg_clear_set(sctlr_el1, SCTLR_EL1_SPAN, 0); - asm(SET_PSTATE_PAN(1)); + set_pstate_pan(1); } #endif /* CONFIG_ARM64_PAN */ @@ -1707,6 +1706,21 @@ static void cpu_enable_mte(struct arm64_cpu_capabilities const *cap) } #endif /* CONFIG_ARM64_MTE */ +#ifdef CONFIG_KVM +static bool is_kvm_protected_mode(const struct arm64_cpu_capabilities *entry, int __unused) +{ + if (kvm_get_mode() != KVM_MODE_PROTECTED) + return false; + + if (is_kernel_in_hyp_mode()) { + pr_warn("Protected KVM not available with VHE\n"); + return false; + } + + return true; +} +#endif /* CONFIG_KVM */ + /* Internal helper functions to match cpu capability type */ static bool cpucap_late_cpu_optional(const struct arm64_cpu_capabilities *cap) @@ -1768,28 +1782,6 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .type = ARM64_CPUCAP_WEAK_LOCAL_CPU_FEATURE, .matches = has_no_hw_prefetch, }, -#ifdef CONFIG_ARM64_UAO - { - .desc = "User Access Override", - .capability = ARM64_HAS_UAO, - .type = ARM64_CPUCAP_SYSTEM_FEATURE, - .matches = has_cpuid_feature, - .sys_reg = SYS_ID_AA64MMFR2_EL1, - .field_pos = ID_AA64MMFR2_UAO_SHIFT, - .min_field_value = 1, - /* - * We rely on stop_machine() calling uao_thread_switch() to set - * UAO immediately after patching. - */ - }, -#endif /* CONFIG_ARM64_UAO */ -#ifdef CONFIG_ARM64_PAN - { - .capability = ARM64_ALT_PAN_NOT_UAO, - .type = ARM64_CPUCAP_SYSTEM_FEATURE, - .matches = cpufeature_pan_not_uao, - }, -#endif /* CONFIG_ARM64_PAN */ #ifdef CONFIG_ARM64_VHE { .desc = "Virtualization Host Extensions", @@ -1820,6 +1812,12 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .field_pos = ID_AA64PFR0_EL1_SHIFT, .min_field_value = ID_AA64PFR0_EL1_32BIT_64BIT, }, + { + .desc = "Protected KVM", + .capability = ARM64_KVM_PROTECTED_MODE, + .type = ARM64_CPUCAP_SYSTEM_FEATURE, + .matches = is_kvm_protected_mode, + }, #endif { .desc = "Kernel page table isolation (KPTI)", @@ -2136,6 +2134,16 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .cpu_enable = cpu_enable_mte, }, #endif /* CONFIG_ARM64_MTE */ + { + .desc = "RCpc load-acquire (LDAPR)", + .capability = ARM64_HAS_LDAPR, + .type = ARM64_CPUCAP_SYSTEM_FEATURE, + .sys_reg = SYS_ID_AA64ISAR1_EL1, + .sign = FTR_UNSIGNED, + .field_pos = ID_AA64ISAR1_LRCPC_SHIFT, + .matches = has_cpuid_feature, + .min_field_value = 1, + }, {}, }; @@ -2650,7 +2658,7 @@ bool this_cpu_has_cap(unsigned int n) * - The SYSTEM_FEATURE cpu_hwcaps may not have been set. * In all other cases cpus_have_{const_}cap() should be used. */ -static bool __system_matches_cap(unsigned int n) +static bool __maybe_unused __system_matches_cap(unsigned int n) { if (n < ARM64_NCAPS) { const struct arm64_cpu_capabilities *cap = cpu_hwcaps_ptrs[n]; @@ -2730,12 +2738,6 @@ void __init setup_cpu_features(void) ARCH_DMA_MINALIGN); } -static bool __maybe_unused -cpufeature_pan_not_uao(const struct arm64_cpu_capabilities *entry, int __unused) -{ - return (__system_matches_cap(ARM64_HAS_PAN) && !__system_matches_cap(ARM64_HAS_UAO)); -} - static void __maybe_unused cpu_enable_cnp(struct arm64_cpu_capabilities const *cap) { cpu_replace_ttbr1(lm_alias(swapper_pg_dir)); @@ -2844,14 +2846,28 @@ static int __init enable_mrs_emulation(void) core_initcall(enable_mrs_emulation); +enum mitigation_state arm64_get_meltdown_state(void) +{ + if (__meltdown_safe) + return SPECTRE_UNAFFECTED; + + if (arm64_kernel_unmapped_at_el0()) + return SPECTRE_MITIGATED; + + return SPECTRE_VULNERABLE; +} + ssize_t cpu_show_meltdown(struct device *dev, struct device_attribute *attr, char *buf) { - if (__meltdown_safe) + switch (arm64_get_meltdown_state()) { + case SPECTRE_UNAFFECTED: return sprintf(buf, "Not affected\n"); - if (arm64_kernel_unmapped_at_el0()) + case SPECTRE_MITIGATED: return sprintf(buf, "Mitigation: PTI\n"); - return sprintf(buf, "Vulnerable\n"); + default: + return sprintf(buf, "Vulnerable\n"); + } } diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index b295fb912b12..bdd3b57b12f5 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -216,12 +216,6 @@ alternative_else_nop_endif .else add x21, sp, #S_FRAME_SIZE get_current_task tsk - /* Save the task's original addr_limit and set USER_DS */ - ldr x20, [tsk, #TSK_TI_ADDR_LIMIT] - str x20, [sp, #S_ORIG_ADDR_LIMIT] - mov x20, #USER_DS - str x20, [tsk, #TSK_TI_ADDR_LIMIT] - /* No need to reset PSTATE.UAO, hardware's already set it to 0 for us */ .endif /* \el == 0 */ mrs x22, elr_el1 mrs x23, spsr_el1 @@ -279,12 +273,6 @@ alternative_else_nop_endif .macro kernel_exit, el .if \el != 0 disable_daif - - /* Restore the task's original addr_limit. */ - ldr x20, [sp, #S_ORIG_ADDR_LIMIT] - str x20, [tsk, #TSK_TI_ADDR_LIMIT] - - /* No need to restore UAO, it will be restored from SPSR_EL1 */ .endif /* Restore pmr */ @@ -999,10 +987,9 @@ SYM_CODE_START(__sdei_asm_entry_trampoline) mov x4, xzr /* - * Use reg->interrupted_regs.addr_limit to remember whether to unmap - * the kernel on exit. + * Remember whether to unmap the kernel on exit. */ -1: str x4, [x1, #(SDEI_EVENT_INTREGS + S_ORIG_ADDR_LIMIT)] +1: str x4, [x1, #(SDEI_EVENT_INTREGS + S_SDEI_TTBR1)] #ifdef CONFIG_RANDOMIZE_BASE adr x4, tramp_vectors + PAGE_SIZE @@ -1023,7 +1010,7 @@ NOKPROBE(__sdei_asm_entry_trampoline) * x4: struct sdei_registered_event argument from registration time. */ SYM_CODE_START(__sdei_asm_exit_trampoline) - ldr x4, [x4, #(SDEI_EVENT_INTREGS + S_ORIG_ADDR_LIMIT)] + ldr x4, [x4, #(SDEI_EVENT_INTREGS + S_SDEI_TTBR1)] cbnz x4, 1f tramp_unmap_kernel tmp=x4 diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index d8d9caf02834..957683029438 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -11,7 +11,6 @@ #include <linux/linkage.h> #include <linux/init.h> -#include <linux/irqchip/arm-gic-v3.h> #include <linux/pgtable.h> #include <asm/asm_pointer_auth.h> @@ -21,6 +20,7 @@ #include <asm/asm-offsets.h> #include <asm/cache.h> #include <asm/cputype.h> +#include <asm/el2_setup.h> #include <asm/elf.h> #include <asm/image.h> #include <asm/kernel-pgtable.h> @@ -104,7 +104,7 @@ pe_header: */ SYM_CODE_START(primary_entry) bl preserve_boot_args - bl el2_setup // Drop to EL1, w0=cpu_boot_mode + bl init_kernel_el // w0=cpu_boot_mode adrp x23, __PHYS_OFFSET and x23, x23, MIN_KIMG_ALIGN - 1 // KASLR offset, defaults to 0 bl set_cpu_boot_mode_flag @@ -482,174 +482,86 @@ EXPORT_SYMBOL(kimage_vaddr) .section ".idmap.text","awx" /* - * If we're fortunate enough to boot at EL2, ensure that the world is - * sane before dropping to EL1. + * Starting from EL2 or EL1, configure the CPU to execute at the highest + * reachable EL supported by the kernel in a chosen default state. If dropping + * from EL2 to EL1, configure EL2 before configuring EL1. + * + * Since we cannot always rely on ERET synchronizing writes to sysregs (e.g. if + * SCTLR_ELx.EOS is clear), we place an ISB prior to ERET. * * Returns either BOOT_CPU_MODE_EL1 or BOOT_CPU_MODE_EL2 in w0 if * booted in EL1 or EL2 respectively. */ -SYM_FUNC_START(el2_setup) - msr SPsel, #1 // We want to use SP_EL{1,2} +SYM_FUNC_START(init_kernel_el) mrs x0, CurrentEL cmp x0, #CurrentEL_EL2 - b.eq 1f - mov_q x0, (SCTLR_EL1_RES1 | ENDIAN_SET_EL1) + b.eq init_el2 + +SYM_INNER_LABEL(init_el1, SYM_L_LOCAL) + mov_q x0, INIT_SCTLR_EL1_MMU_OFF msr sctlr_el1, x0 - mov w0, #BOOT_CPU_MODE_EL1 // This cpu booted in EL1 isb - ret - -1: mov_q x0, (SCTLR_EL2_RES1 | ENDIAN_SET_EL2) - msr sctlr_el2, x0 + mov_q x0, INIT_PSTATE_EL1 + msr spsr_el1, x0 + msr elr_el1, lr + mov w0, #BOOT_CPU_MODE_EL1 + eret +SYM_INNER_LABEL(init_el2, SYM_L_LOCAL) #ifdef CONFIG_ARM64_VHE /* - * Check for VHE being present. For the rest of the EL2 setup, - * x2 being non-zero indicates that we do have VHE, and that the - * kernel is intended to run at EL2. + * Check for VHE being present. x2 being non-zero indicates that we + * do have VHE, and that the kernel is intended to run at EL2. */ mrs x2, id_aa64mmfr1_el1 ubfx x2, x2, #ID_AA64MMFR1_VHE_SHIFT, #4 #else mov x2, xzr #endif + cbz x2, init_el2_nvhe - /* Hyp configuration. */ - mov_q x0, HCR_HOST_NVHE_FLAGS - cbz x2, set_hcr + /* + * When VHE _is_ in use, EL1 will not be used in the host and + * requires no configuration, and all non-hyp-specific EL2 setup + * will be done via the _EL1 system register aliases in __cpu_setup. + */ mov_q x0, HCR_HOST_VHE_FLAGS -set_hcr: msr hcr_el2, x0 isb - /* - * Allow Non-secure EL1 and EL0 to access physical timer and counter. - * This is not necessary for VHE, since the host kernel runs in EL2, - * and EL0 accesses are configured in the later stage of boot process. - * Note that when HCR_EL2.E2H == 1, CNTHCTL_EL2 has the same bit layout - * as CNTKCTL_EL1, and CNTKCTL_EL1 accessing instructions are redefined - * to access CNTHCTL_EL2. This allows the kernel designed to run at EL1 - * to transparently mess with the EL0 bits via CNTKCTL_EL1 access in - * EL2. - */ - cbnz x2, 1f - mrs x0, cnthctl_el2 - orr x0, x0, #3 // Enable EL1 physical timers - msr cnthctl_el2, x0 -1: - msr cntvoff_el2, xzr // Clear virtual offset - -#ifdef CONFIG_ARM_GIC_V3 - /* GICv3 system register access */ - mrs x0, id_aa64pfr0_el1 - ubfx x0, x0, #ID_AA64PFR0_GIC_SHIFT, #4 - cbz x0, 3f - - mrs_s x0, SYS_ICC_SRE_EL2 - orr x0, x0, #ICC_SRE_EL2_SRE // Set ICC_SRE_EL2.SRE==1 - orr x0, x0, #ICC_SRE_EL2_ENABLE // Set ICC_SRE_EL2.Enable==1 - msr_s SYS_ICC_SRE_EL2, x0 - isb // Make sure SRE is now set - mrs_s x0, SYS_ICC_SRE_EL2 // Read SRE back, - tbz x0, #0, 3f // and check that it sticks - msr_s SYS_ICH_HCR_EL2, xzr // Reset ICC_HCR_EL2 to defaults - -3: -#endif - - /* Populate ID registers. */ - mrs x0, midr_el1 - mrs x1, mpidr_el1 - msr vpidr_el2, x0 - msr vmpidr_el2, x1 - -#ifdef CONFIG_COMPAT - msr hstr_el2, xzr // Disable CP15 traps to EL2 -#endif - - /* EL2 debug */ - mrs x1, id_aa64dfr0_el1 - sbfx x0, x1, #ID_AA64DFR0_PMUVER_SHIFT, #4 - cmp x0, #1 - b.lt 4f // Skip if no PMU present - mrs x0, pmcr_el0 // Disable debug access traps - ubfx x0, x0, #11, #5 // to EL2 and allow access to -4: - csel x3, xzr, x0, lt // all PMU counters from EL1 - - /* Statistical profiling */ - ubfx x0, x1, #ID_AA64DFR0_PMSVER_SHIFT, #4 - cbz x0, 7f // Skip if SPE not present - cbnz x2, 6f // VHE? - mrs_s x4, SYS_PMBIDR_EL1 // If SPE available at EL2, - and x4, x4, #(1 << SYS_PMBIDR_EL1_P_SHIFT) - cbnz x4, 5f // then permit sampling of physical - mov x4, #(1 << SYS_PMSCR_EL2_PCT_SHIFT | \ - 1 << SYS_PMSCR_EL2_PA_SHIFT) - msr_s SYS_PMSCR_EL2, x4 // addresses and physical counter -5: - mov x1, #(MDCR_EL2_E2PB_MASK << MDCR_EL2_E2PB_SHIFT) - orr x3, x3, x1 // If we don't have VHE, then - b 7f // use EL1&0 translation. -6: // For VHE, use EL2 translation - orr x3, x3, #MDCR_EL2_TPMS // and disable access from EL1 -7: - msr mdcr_el2, x3 // Configure debug traps + init_el2_state vhe - /* LORegions */ - mrs x1, id_aa64mmfr1_el1 - ubfx x0, x1, #ID_AA64MMFR1_LOR_SHIFT, 4 - cbz x0, 1f - msr_s SYS_LORC_EL1, xzr -1: - - /* Stage-2 translation */ - msr vttbr_el2, xzr - - cbz x2, install_el2_stub - - mov w0, #BOOT_CPU_MODE_EL2 // This CPU booted in EL2 isb - ret -SYM_INNER_LABEL(install_el2_stub, SYM_L_LOCAL) + mov_q x0, INIT_PSTATE_EL2 + msr spsr_el2, x0 + msr elr_el2, lr + mov w0, #BOOT_CPU_MODE_EL2 + eret + +SYM_INNER_LABEL(init_el2_nvhe, SYM_L_LOCAL) /* * When VHE is not in use, early init of EL2 and EL1 needs to be * done here. - * When VHE _is_ in use, EL1 will not be used in the host and - * requires no configuration, and all non-hyp-specific EL2 setup - * will be done via the _EL1 system register aliases in __cpu_setup. */ - mov_q x0, (SCTLR_EL1_RES1 | ENDIAN_SET_EL1) + mov_q x0, INIT_SCTLR_EL1_MMU_OFF msr sctlr_el1, x0 - /* Coprocessor traps. */ - mov x0, #0x33ff - msr cptr_el2, x0 // Disable copro. traps to EL2 - - /* SVE register access */ - mrs x1, id_aa64pfr0_el1 - ubfx x1, x1, #ID_AA64PFR0_SVE_SHIFT, #4 - cbz x1, 7f - - bic x0, x0, #CPTR_EL2_TZ // Also disable SVE traps - msr cptr_el2, x0 // Disable copro. traps to EL2 + mov_q x0, HCR_HOST_NVHE_FLAGS + msr hcr_el2, x0 isb - mov x1, #ZCR_ELx_LEN_MASK // SVE: Enable full vector - msr_s SYS_ZCR_EL2, x1 // length for EL1. + + init_el2_state nvhe /* Hypervisor stub */ -7: adr_l x0, __hyp_stub_vectors + adr_l x0, __hyp_stub_vectors msr vbar_el2, x0 + isb - /* spsr */ - mov x0, #(PSR_F_BIT | PSR_I_BIT | PSR_A_BIT | PSR_D_BIT |\ - PSR_MODE_EL1h) - msr spsr_el2, x0 msr elr_el2, lr - mov w0, #BOOT_CPU_MODE_EL2 // This CPU booted in EL2 + mov w0, #BOOT_CPU_MODE_EL2 eret -SYM_FUNC_END(el2_setup) +SYM_FUNC_END(init_kernel_el) /* * Sets the __boot_cpu_mode flag depending on the CPU boot mode passed @@ -699,7 +611,7 @@ SYM_DATA_END(__early_cpu_boot_status) * cores are held until we're ready for them to initialise. */ SYM_FUNC_START(secondary_holding_pen) - bl el2_setup // Drop to EL1, w0=cpu_boot_mode + bl init_kernel_el // w0=cpu_boot_mode bl set_cpu_boot_mode_flag mrs x0, mpidr_el1 mov_q x1, MPIDR_HWID_BITMASK @@ -717,7 +629,7 @@ SYM_FUNC_END(secondary_holding_pen) * be used where CPUs are brought online dynamically by the kernel. */ SYM_FUNC_START(secondary_entry) - bl el2_setup // Drop to EL1 + bl init_kernel_el // w0=cpu_boot_mode bl set_cpu_boot_mode_flag b secondary_startup SYM_FUNC_END(secondary_entry) diff --git a/arch/arm64/kernel/image-vars.h b/arch/arm64/kernel/image-vars.h index c615b285ff5b..39289d75118d 100644 --- a/arch/arm64/kernel/image-vars.h +++ b/arch/arm64/kernel/image-vars.h @@ -64,13 +64,12 @@ __efistub__ctype = _ctype; /* Alternative callbacks for init-time patching of nVHE hyp code. */ KVM_NVHE_ALIAS(kvm_patch_vector_branch); KVM_NVHE_ALIAS(kvm_update_va_mask); +KVM_NVHE_ALIAS(kvm_update_kimg_phys_offset); +KVM_NVHE_ALIAS(kvm_get_kimage_voffset); /* Global kernel state accessed by nVHE hyp code. */ KVM_NVHE_ALIAS(kvm_vgic_global_state); -/* Kernel constant needed to compute idmap addresses. */ -KVM_NVHE_ALIAS(kimage_voffset); - /* Kernel symbols used to call panic() from nVHE hyp code (via ERET). */ KVM_NVHE_ALIAS(__hyp_panic_string); KVM_NVHE_ALIAS(panic); @@ -78,9 +77,6 @@ KVM_NVHE_ALIAS(panic); /* Vectors installed by hyp-init on reset HVC. */ KVM_NVHE_ALIAS(__hyp_stub_vectors); -/* IDMAP TCR_EL1.T0SZ as computed by the EL1 init code */ -KVM_NVHE_ALIAS(idmap_t0sz); - /* Kernel symbol used by icache_is_vpipt(). */ KVM_NVHE_ALIAS(__icache_flags); @@ -103,6 +99,9 @@ KVM_NVHE_ALIAS(gic_nonsecure_priorities); KVM_NVHE_ALIAS(__start___kvm_ex_table); KVM_NVHE_ALIAS(__stop___kvm_ex_table); +/* Array containing bases of nVHE per-CPU memory regions. */ +KVM_NVHE_ALIAS(kvm_arm_hyp_percpu_base); + #endif /* CONFIG_KVM */ #endif /* __ARM64_KERNEL_IMAGE_VARS_H */ diff --git a/arch/arm64/kernel/kexec_image.c b/arch/arm64/kernel/kexec_image.c index 66adee8b5fc8..9ec34690e255 100644 --- a/arch/arm64/kernel/kexec_image.c +++ b/arch/arm64/kernel/kexec_image.c @@ -127,7 +127,7 @@ static void *image_load(struct kimage *image, kernel_segment->mem, kbuf.bufsz, kernel_segment->memsz); - return 0; + return NULL; } #ifdef CONFIG_KEXEC_IMAGE_VERIFY_SIG diff --git a/arch/arm64/kernel/perf_regs.c b/arch/arm64/kernel/perf_regs.c index 94e8718e7229..f6f58e6265df 100644 --- a/arch/arm64/kernel/perf_regs.c +++ b/arch/arm64/kernel/perf_regs.c @@ -73,8 +73,7 @@ u64 perf_reg_abi(struct task_struct *task) } void perf_get_regs_user(struct perf_regs *regs_user, - struct pt_regs *regs, - struct pt_regs *regs_user_copy) + struct pt_regs *regs) { regs_user->regs = task_pt_regs(current); regs_user->abi = perf_reg_abi(current); diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c index 4784011cecac..71005cb0f4e0 100644 --- a/arch/arm64/kernel/process.c +++ b/arch/arm64/kernel/process.c @@ -422,16 +422,15 @@ int copy_thread(unsigned long clone_flags, unsigned long stack_start, if (clone_flags & CLONE_SETTLS) p->thread.uw.tp_value = tls; } else { + /* + * A kthread has no context to ERET to, so ensure any buggy + * ERET is treated as an illegal exception return. + * + * When a user task is created from a kthread, childregs will + * be initialized by start_thread() or start_compat_thread(). + */ memset(childregs, 0, sizeof(struct pt_regs)); - childregs->pstate = PSR_MODE_EL1h; - if (IS_ENABLED(CONFIG_ARM64_UAO) && - cpus_have_const_cap(ARM64_HAS_UAO)) - childregs->pstate |= PSR_UAO_BIT; - - spectre_v4_enable_task_mitigation(p); - - if (system_uses_irq_prio_masking()) - childregs->pmr_save = GIC_PRIO_IRQON; + childregs->pstate = PSR_MODE_EL1h | PSR_IL_BIT; p->thread.cpu_context.x19 = stack_start; p->thread.cpu_context.x20 = stk_sz; @@ -461,17 +460,6 @@ static void tls_thread_switch(struct task_struct *next) write_sysreg(*task_user_tls(next), tpidr_el0); } -/* Restore the UAO state depending on next's addr_limit */ -void uao_thread_switch(struct task_struct *next) -{ - if (IS_ENABLED(CONFIG_ARM64_UAO)) { - if (task_thread_info(next)->addr_limit == KERNEL_DS) - asm(ALTERNATIVE("nop", SET_PSTATE_UAO(1), ARM64_HAS_UAO)); - else - asm(ALTERNATIVE("nop", SET_PSTATE_UAO(0), ARM64_HAS_UAO)); - } -} - /* * Force SSBS state on context-switch, since it may be lost after migrating * from a CPU which treats the bit as RES0 in a heterogeneous system. @@ -522,14 +510,13 @@ static void erratum_1418040_thread_switch(struct task_struct *prev, bool prev32, next32; u64 val; - if (!(IS_ENABLED(CONFIG_ARM64_ERRATUM_1418040) && - cpus_have_const_cap(ARM64_WORKAROUND_1418040))) + if (!IS_ENABLED(CONFIG_ARM64_ERRATUM_1418040)) return; prev32 = is_compat_thread(task_thread_info(prev)); next32 = is_compat_thread(task_thread_info(next)); - if (prev32 == next32) + if (prev32 == next32 || !this_cpu_has_cap(ARM64_WORKAROUND_1418040)) return; val = read_sysreg(cntkctl_el1); @@ -555,7 +542,6 @@ __notrace_funcgraph struct task_struct *__switch_to(struct task_struct *prev, hw_breakpoint_thread_switch(next); contextidr_thread_switch(next); entry_task_switch(next); - uao_thread_switch(next); ssbs_thread_switch(next); erratum_1418040_thread_switch(prev, next); diff --git a/arch/arm64/kernel/proton-pack.c b/arch/arm64/kernel/proton-pack.c index c18eb7d41274..902e4084c477 100644 --- a/arch/arm64/kernel/proton-pack.c +++ b/arch/arm64/kernel/proton-pack.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0-only /* - * Handle detection, reporting and mitigation of Spectre v1, v2 and v4, as + * Handle detection, reporting and mitigation of Spectre v1, v2, v3a and v4, as * detailed at: * * https://developer.arm.com/support/arm-security-updates/speculative-processor-vulnerability @@ -24,8 +24,10 @@ #include <linux/prctl.h> #include <linux/sched/task_stack.h> +#include <asm/insn.h> #include <asm/spectre.h> #include <asm/traps.h> +#include <asm/virt.h> /* * We try to ensure that the mitigation state can never change as the result of @@ -118,6 +120,7 @@ static enum mitigation_state spectre_v2_get_cpu_hw_mitigation_state(void) MIDR_ALL_VERSIONS(MIDR_CORTEX_A55), MIDR_ALL_VERSIONS(MIDR_BRAHMA_B53), MIDR_ALL_VERSIONS(MIDR_HISI_TSV110), + MIDR_ALL_VERSIONS(MIDR_QCOM_KRYO_2XX_SILVER), MIDR_ALL_VERSIONS(MIDR_QCOM_KRYO_3XX_SILVER), MIDR_ALL_VERSIONS(MIDR_QCOM_KRYO_4XX_SILVER), { /* sentinel */ } @@ -169,72 +172,26 @@ bool has_spectre_v2(const struct arm64_cpu_capabilities *entry, int scope) return true; } -DEFINE_PER_CPU_READ_MOSTLY(struct bp_hardening_data, bp_hardening_data); - enum mitigation_state arm64_get_spectre_v2_state(void) { return spectre_v2_state; } -#ifdef CONFIG_KVM -#include <asm/cacheflush.h> -#include <asm/kvm_asm.h> - -atomic_t arm64_el2_vector_last_slot = ATOMIC_INIT(-1); - -static void __copy_hyp_vect_bpi(int slot, const char *hyp_vecs_start, - const char *hyp_vecs_end) -{ - void *dst = lm_alias(__bp_harden_hyp_vecs + slot * SZ_2K); - int i; - - for (i = 0; i < SZ_2K; i += 0x80) - memcpy(dst + i, hyp_vecs_start, hyp_vecs_end - hyp_vecs_start); - - __flush_icache_range((uintptr_t)dst, (uintptr_t)dst + SZ_2K); -} +DEFINE_PER_CPU_READ_MOSTLY(struct bp_hardening_data, bp_hardening_data); static void install_bp_hardening_cb(bp_hardening_cb_t fn) { - static DEFINE_RAW_SPINLOCK(bp_lock); - int cpu, slot = -1; - const char *hyp_vecs_start = __smccc_workaround_1_smc; - const char *hyp_vecs_end = __smccc_workaround_1_smc + - __SMCCC_WORKAROUND_1_SMC_SZ; + __this_cpu_write(bp_hardening_data.fn, fn); /* * Vinz Clortho takes the hyp_vecs start/end "keys" at * the door when we're a guest. Skip the hyp-vectors work. */ - if (!is_hyp_mode_available()) { - __this_cpu_write(bp_hardening_data.fn, fn); + if (!is_hyp_mode_available()) return; - } - - raw_spin_lock(&bp_lock); - for_each_possible_cpu(cpu) { - if (per_cpu(bp_hardening_data.fn, cpu) == fn) { - slot = per_cpu(bp_hardening_data.hyp_vectors_slot, cpu); - break; - } - } - - if (slot == -1) { - slot = atomic_inc_return(&arm64_el2_vector_last_slot); - BUG_ON(slot >= BP_HARDEN_EL2_SLOTS); - __copy_hyp_vect_bpi(slot, hyp_vecs_start, hyp_vecs_end); - } - __this_cpu_write(bp_hardening_data.hyp_vectors_slot, slot); - __this_cpu_write(bp_hardening_data.fn, fn); - raw_spin_unlock(&bp_lock); -} -#else -static void install_bp_hardening_cb(bp_hardening_cb_t fn) -{ - __this_cpu_write(bp_hardening_data.fn, fn); + __this_cpu_write(bp_hardening_data.slot, HYP_VECTOR_SPECTRE_DIRECT); } -#endif /* CONFIG_KVM */ static void call_smc_arch_workaround_1(void) { @@ -316,6 +273,33 @@ void spectre_v2_enable_mitigation(const struct arm64_cpu_capabilities *__unused) } /* + * Spectre-v3a. + * + * Phew, there's not an awful lot to do here! We just instruct EL2 to use + * an indirect trampoline for the hyp vectors so that guests can't read + * VBAR_EL2 to defeat randomisation of the hypervisor VA layout. + */ +bool has_spectre_v3a(const struct arm64_cpu_capabilities *entry, int scope) +{ + static const struct midr_range spectre_v3a_unsafe_list[] = { + MIDR_ALL_VERSIONS(MIDR_CORTEX_A57), + MIDR_ALL_VERSIONS(MIDR_CORTEX_A72), + {}, + }; + + WARN_ON(scope != SCOPE_LOCAL_CPU || preemptible()); + return is_midr_in_range_list(read_cpuid_id(), spectre_v3a_unsafe_list); +} + +void spectre_v3a_enable_mitigation(const struct arm64_cpu_capabilities *__unused) +{ + struct bp_hardening_data *data = this_cpu_ptr(&bp_hardening_data); + + if (this_cpu_has_cap(ARM64_SPECTRE_V3A)) + data->slot += HYP_VECTOR_INDIRECT; +} + +/* * Spectre v4. * * If you thought Spectre v2 was nasty, wait until you see this mess. A CPU is @@ -537,12 +521,12 @@ static enum mitigation_state spectre_v4_enable_hw_mitigation(void) if (spectre_v4_mitigations_off()) { sysreg_clear_set(sctlr_el1, 0, SCTLR_ELx_DSSBS); - asm volatile(SET_PSTATE_SSBS(1)); + set_pstate_ssbs(1); return SPECTRE_VULNERABLE; } /* SCTLR_EL1.DSSBS was initialised to 0 during boot */ - asm volatile(SET_PSTATE_SSBS(0)); + set_pstate_ssbs(0); return SPECTRE_MITIGATED; } diff --git a/arch/arm64/kernel/psci.c b/arch/arm64/kernel/psci.c index 43ae4e0c968f..62d2bda7adb8 100644 --- a/arch/arm64/kernel/psci.c +++ b/arch/arm64/kernel/psci.c @@ -66,7 +66,6 @@ static int cpu_psci_cpu_disable(unsigned int cpu) static void cpu_psci_cpu_die(unsigned int cpu) { - int ret; /* * There are no known implementations of PSCI actually using the * power state field, pass a sensible default for now. @@ -74,9 +73,7 @@ static void cpu_psci_cpu_die(unsigned int cpu) u32 state = PSCI_POWER_STATE_TYPE_POWER_DOWN << PSCI_0_2_POWER_STATE_TYPE_SHIFT; - ret = psci_ops.cpu_off(state); - - pr_crit("unable to power off CPU%u (%d)\n", cpu, ret); + psci_ops.cpu_off(state); } static int cpu_psci_cpu_kill(unsigned int cpu) diff --git a/arch/arm64/kernel/sdei.c b/arch/arm64/kernel/sdei.c index 7689f2031c0c..e04b3e90c003 100644 --- a/arch/arm64/kernel/sdei.c +++ b/arch/arm64/kernel/sdei.c @@ -178,12 +178,6 @@ static __kprobes unsigned long _sdei_handler(struct pt_regs *regs, sdei_api_event_context(i, ®s->regs[i]); } - /* - * We didn't take an exception to get here, set PAN. UAO will be cleared - * by sdei_event_handler()s force_uaccess_begin() call. - */ - __uaccess_enable_hw_pan(); - err = sdei_event_handler(regs, arg); if (err) return SDEI_EV_FAILED; @@ -222,12 +216,39 @@ static __kprobes unsigned long _sdei_handler(struct pt_regs *regs, return vbar + 0x480; } +static void __kprobes notrace __sdei_pstate_entry(void) +{ + /* + * The original SDEI spec (ARM DEN 0054A) can be read ambiguously as to + * whether PSTATE bits are inherited unchanged or generated from + * scratch, and the TF-A implementation always clears PAN and always + * clears UAO. There are no other known implementations. + * + * Subsequent revisions (ARM DEN 0054B) follow the usual rules for how + * PSTATE is modified upon architectural exceptions, and so PAN is + * either inherited or set per SCTLR_ELx.SPAN, and UAO is always + * cleared. + * + * We must explicitly reset PAN to the expected state, including + * clearing it when the host isn't using it, in case a VM had it set. + */ + if (system_uses_hw_pan()) + set_pstate_pan(1); + else if (cpu_has_pan()) + set_pstate_pan(0); +} asmlinkage __kprobes notrace unsigned long __sdei_handler(struct pt_regs *regs, struct sdei_registered_event *arg) { unsigned long ret; + /* + * We didn't take an exception to get here, so the HW hasn't + * set/cleared bits in PSTATE that we may rely on. Initialize PAN. + */ + __sdei_pstate_entry(); + nmi_enter(); ret = _sdei_handler(regs, arg); diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c index 133257ffd859..2f2973bc67c7 100644 --- a/arch/arm64/kernel/setup.c +++ b/arch/arm64/kernel/setup.c @@ -276,7 +276,7 @@ arch_initcall(reserve_memblock_reserved_regions); u64 __cpu_logical_map[NR_CPUS] = { [0 ... NR_CPUS-1] = INVALID_HWID }; -u64 cpu_logical_map(int cpu) +u64 cpu_logical_map(unsigned int cpu) { return __cpu_logical_map[cpu]; } diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c index a8184cad8890..af5c6c6638f7 100644 --- a/arch/arm64/kernel/signal.c +++ b/arch/arm64/kernel/signal.c @@ -922,9 +922,6 @@ asmlinkage void do_notify_resume(struct pt_regs *regs, trace_hardirqs_off(); do { - /* Check valid user FS if needed */ - addr_limit_user_check(); - if (thread_flags & _TIF_NEED_RESCHED) { /* Unmask Debug and SError for the next task */ local_daif_restore(DAIF_PROCCTX_NOIRQ); diff --git a/arch/arm64/kernel/sleep.S b/arch/arm64/kernel/sleep.S index ba40d57757d6..4be7f7eed875 100644 --- a/arch/arm64/kernel/sleep.S +++ b/arch/arm64/kernel/sleep.S @@ -99,7 +99,7 @@ SYM_FUNC_END(__cpu_suspend_enter) .pushsection ".idmap.text", "awx" SYM_CODE_START(cpu_resume) - bl el2_setup // if in EL2 drop to EL1 cleanly + bl init_kernel_el bl __cpu_setup /* enable the MMU early - so we can access sleep_save_stash by va */ adrp x1, swapper_pg_dir diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index 09c96f57818c..18e9727d3f64 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -413,6 +413,7 @@ void cpu_die_early(void) /* Mark this CPU absent */ set_cpu_present(cpu, 0); + rcu_report_dead(cpu); if (IS_ENABLED(CONFIG_HOTPLUG_CPU)) { update_cpu_boot_status(CPU_KILL_ME); diff --git a/arch/arm64/kernel/suspend.c b/arch/arm64/kernel/suspend.c index 96cd347c7a46..a67b37a7a47e 100644 --- a/arch/arm64/kernel/suspend.c +++ b/arch/arm64/kernel/suspend.c @@ -58,7 +58,6 @@ void notrace __cpu_suspend_exit(void) * features that might not have been set correctly. */ __uaccess_enable_hw_pan(); - uao_thread_switch(current); /* * Restore HW breakpoint registers to sane values diff --git a/arch/arm64/kernel/vdso/Makefile b/arch/arm64/kernel/vdso/Makefile index d65f52264aba..a8f8e409e2bf 100644 --- a/arch/arm64/kernel/vdso/Makefile +++ b/arch/arm64/kernel/vdso/Makefile @@ -28,7 +28,7 @@ ldflags-y := -shared -nostdlib -soname=linux-vdso.so.1 --hash-style=sysv \ $(btildflags-y) -T ccflags-y := -fno-common -fno-builtin -fno-stack-protector -ffixed-x18 -ccflags-y += -DDISABLE_BRANCH_PROFILING +ccflags-y += -DDISABLE_BRANCH_PROFILING -DBUILD_VDSO CFLAGS_REMOVE_vgettimeofday.o = $(CC_FLAGS_FTRACE) -Os $(CC_FLAGS_SCS) $(GCC_PLUGINS_CFLAGS) KASAN_SANITIZE := n diff --git a/arch/arm64/kernel/vdso32/Makefile b/arch/arm64/kernel/vdso32/Makefile index 79280c53b9a6..a1e0f91e6cea 100644 --- a/arch/arm64/kernel/vdso32/Makefile +++ b/arch/arm64/kernel/vdso32/Makefile @@ -48,7 +48,7 @@ cc32-as-instr = $(call try-run,\ # As a result we set our own flags here. # KBUILD_CPPFLAGS and NOSTDINC_FLAGS from top-level Makefile -VDSO_CPPFLAGS := -D__KERNEL__ -nostdinc -isystem $(shell $(CC_COMPAT) -print-file-name=include) +VDSO_CPPFLAGS := -DBUILD_VDSO -D__KERNEL__ -nostdinc -isystem $(shell $(CC_COMPAT) -print-file-name=include) VDSO_CPPFLAGS += $(LINUXINCLUDE) # Common C and assembly flags diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S index 1bda604f4c70..43af13968dfd 100644 --- a/arch/arm64/kernel/vmlinux.lds.S +++ b/arch/arm64/kernel/vmlinux.lds.S @@ -30,6 +30,13 @@ jiffies = jiffies_64; *(__kvm_ex_table) \ __stop___kvm_ex_table = .; +#define HYPERVISOR_DATA_SECTIONS \ + HYP_SECTION_NAME(.data..ro_after_init) : { \ + __hyp_data_ro_after_init_start = .; \ + *(HYP_SECTION_NAME(.data..ro_after_init)) \ + __hyp_data_ro_after_init_end = .; \ + } + #define HYPERVISOR_PERCPU_SECTION \ . = ALIGN(PAGE_SIZE); \ HYP_SECTION_NAME(.data..percpu) : { \ @@ -37,6 +44,7 @@ jiffies = jiffies_64; } #else /* CONFIG_KVM */ #define HYPERVISOR_EXTABLE +#define HYPERVISOR_DATA_SECTIONS #define HYPERVISOR_PERCPU_SECTION #endif @@ -201,7 +209,7 @@ SECTIONS INIT_CALLS CON_INITCALL INIT_RAM_FS - *(.init.rodata.* .init.bss) /* from the EFI stub */ + *(.init.altinstructions .init.rodata.* .init.bss) /* from the EFI stub */ } .exit.data : { EXIT_DATA @@ -234,6 +242,8 @@ SECTIONS _sdata = .; RW_DATA(L1_CACHE_BYTES, PAGE_SIZE, THREAD_ALIGN) + HYPERVISOR_DATA_SECTIONS + /* * Data written with the MMU off but read with the MMU on requires * cache lines to be invalidated, discarding up to a Cache Writeback diff --git a/arch/arm64/kvm/Makefile b/arch/arm64/kvm/Makefile index 1504c81fbf5d..60fd181df624 100644 --- a/arch/arm64/kvm/Makefile +++ b/arch/arm64/kvm/Makefile @@ -13,10 +13,10 @@ obj-$(CONFIG_KVM) += hyp/ kvm-y := $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o $(KVM)/eventfd.o \ $(KVM)/vfio.o $(KVM)/irqchip.o \ arm.o mmu.o mmio.o psci.o perf.o hypercalls.o pvtime.o \ - inject_fault.o regmap.o va_layout.o handle_exit.o \ + inject_fault.o va_layout.o handle_exit.o \ guest.o debug.o reset.o sys_regs.o \ vgic-sys-reg-v3.o fpsimd.o pmu.o \ - aarch32.o arch_timer.o \ + arch_timer.o \ vgic/vgic.o vgic/vgic-init.o \ vgic/vgic-irqfd.o vgic/vgic-v2.o \ vgic/vgic-v3.o vgic/vgic-v4.o \ diff --git a/arch/arm64/kvm/aarch32.c b/arch/arm64/kvm/aarch32.c deleted file mode 100644 index 40a62a99fbf8..000000000000 --- a/arch/arm64/kvm/aarch32.c +++ /dev/null @@ -1,232 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * (not much of an) Emulation layer for 32bit guests. - * - * Copyright (C) 2012,2013 - ARM Ltd - * Author: Marc Zyngier <marc.zyngier@arm.com> - * - * based on arch/arm/kvm/emulate.c - * Copyright (C) 2012 - Virtual Open Systems and Columbia University - * Author: Christoffer Dall <c.dall@virtualopensystems.com> - */ - -#include <linux/bits.h> -#include <linux/kvm_host.h> -#include <asm/kvm_emulate.h> -#include <asm/kvm_hyp.h> - -#define DFSR_FSC_EXTABT_LPAE 0x10 -#define DFSR_FSC_EXTABT_nLPAE 0x08 -#define DFSR_LPAE BIT(9) - -/* - * Table taken from ARMv8 ARM DDI0487B-B, table G1-10. - */ -static const u8 return_offsets[8][2] = { - [0] = { 0, 0 }, /* Reset, unused */ - [1] = { 4, 2 }, /* Undefined */ - [2] = { 0, 0 }, /* SVC, unused */ - [3] = { 4, 4 }, /* Prefetch abort */ - [4] = { 8, 8 }, /* Data abort */ - [5] = { 0, 0 }, /* HVC, unused */ - [6] = { 4, 4 }, /* IRQ, unused */ - [7] = { 4, 4 }, /* FIQ, unused */ -}; - -static bool pre_fault_synchronize(struct kvm_vcpu *vcpu) -{ - preempt_disable(); - if (vcpu->arch.sysregs_loaded_on_cpu) { - kvm_arch_vcpu_put(vcpu); - return true; - } - - preempt_enable(); - return false; -} - -static void post_fault_synchronize(struct kvm_vcpu *vcpu, bool loaded) -{ - if (loaded) { - kvm_arch_vcpu_load(vcpu, smp_processor_id()); - preempt_enable(); - } -} - -/* - * When an exception is taken, most CPSR fields are left unchanged in the - * handler. However, some are explicitly overridden (e.g. M[4:0]). - * - * The SPSR/SPSR_ELx layouts differ, and the below is intended to work with - * either format. Note: SPSR.J bit doesn't exist in SPSR_ELx, but this bit was - * obsoleted by the ARMv7 virtualization extensions and is RES0. - * - * For the SPSR layout seen from AArch32, see: - * - ARM DDI 0406C.d, page B1-1148 - * - ARM DDI 0487E.a, page G8-6264 - * - * For the SPSR_ELx layout for AArch32 seen from AArch64, see: - * - ARM DDI 0487E.a, page C5-426 - * - * Here we manipulate the fields in order of the AArch32 SPSR_ELx layout, from - * MSB to LSB. - */ -static unsigned long get_except32_cpsr(struct kvm_vcpu *vcpu, u32 mode) -{ - u32 sctlr = vcpu_cp15(vcpu, c1_SCTLR); - unsigned long old, new; - - old = *vcpu_cpsr(vcpu); - new = 0; - - new |= (old & PSR_AA32_N_BIT); - new |= (old & PSR_AA32_Z_BIT); - new |= (old & PSR_AA32_C_BIT); - new |= (old & PSR_AA32_V_BIT); - new |= (old & PSR_AA32_Q_BIT); - - // CPSR.IT[7:0] are set to zero upon any exception - // See ARM DDI 0487E.a, section G1.12.3 - // See ARM DDI 0406C.d, section B1.8.3 - - new |= (old & PSR_AA32_DIT_BIT); - - // CPSR.SSBS is set to SCTLR.DSSBS upon any exception - // See ARM DDI 0487E.a, page G8-6244 - if (sctlr & BIT(31)) - new |= PSR_AA32_SSBS_BIT; - - // CPSR.PAN is unchanged unless SCTLR.SPAN == 0b0 - // SCTLR.SPAN is RES1 when ARMv8.1-PAN is not implemented - // See ARM DDI 0487E.a, page G8-6246 - new |= (old & PSR_AA32_PAN_BIT); - if (!(sctlr & BIT(23))) - new |= PSR_AA32_PAN_BIT; - - // SS does not exist in AArch32, so ignore - - // CPSR.IL is set to zero upon any exception - // See ARM DDI 0487E.a, page G1-5527 - - new |= (old & PSR_AA32_GE_MASK); - - // CPSR.IT[7:0] are set to zero upon any exception - // See prior comment above - - // CPSR.E is set to SCTLR.EE upon any exception - // See ARM DDI 0487E.a, page G8-6245 - // See ARM DDI 0406C.d, page B4-1701 - if (sctlr & BIT(25)) - new |= PSR_AA32_E_BIT; - - // CPSR.A is unchanged upon an exception to Undefined, Supervisor - // CPSR.A is set upon an exception to other modes - // See ARM DDI 0487E.a, pages G1-5515 to G1-5516 - // See ARM DDI 0406C.d, page B1-1182 - new |= (old & PSR_AA32_A_BIT); - if (mode != PSR_AA32_MODE_UND && mode != PSR_AA32_MODE_SVC) - new |= PSR_AA32_A_BIT; - - // CPSR.I is set upon any exception - // See ARM DDI 0487E.a, pages G1-5515 to G1-5516 - // See ARM DDI 0406C.d, page B1-1182 - new |= PSR_AA32_I_BIT; - - // CPSR.F is set upon an exception to FIQ - // CPSR.F is unchanged upon an exception to other modes - // See ARM DDI 0487E.a, pages G1-5515 to G1-5516 - // See ARM DDI 0406C.d, page B1-1182 - new |= (old & PSR_AA32_F_BIT); - if (mode == PSR_AA32_MODE_FIQ) - new |= PSR_AA32_F_BIT; - - // CPSR.T is set to SCTLR.TE upon any exception - // See ARM DDI 0487E.a, page G8-5514 - // See ARM DDI 0406C.d, page B1-1181 - if (sctlr & BIT(30)) - new |= PSR_AA32_T_BIT; - - new |= mode; - - return new; -} - -static void prepare_fault32(struct kvm_vcpu *vcpu, u32 mode, u32 vect_offset) -{ - unsigned long spsr = *vcpu_cpsr(vcpu); - bool is_thumb = (spsr & PSR_AA32_T_BIT); - u32 return_offset = return_offsets[vect_offset >> 2][is_thumb]; - u32 sctlr = vcpu_cp15(vcpu, c1_SCTLR); - - *vcpu_cpsr(vcpu) = get_except32_cpsr(vcpu, mode); - - /* Note: These now point to the banked copies */ - vcpu_write_spsr(vcpu, host_spsr_to_spsr32(spsr)); - *vcpu_reg32(vcpu, 14) = *vcpu_pc(vcpu) + return_offset; - - /* Branch to exception vector */ - if (sctlr & (1 << 13)) - vect_offset += 0xffff0000; - else /* always have security exceptions */ - vect_offset += vcpu_cp15(vcpu, c12_VBAR); - - *vcpu_pc(vcpu) = vect_offset; -} - -void kvm_inject_undef32(struct kvm_vcpu *vcpu) -{ - bool loaded = pre_fault_synchronize(vcpu); - - prepare_fault32(vcpu, PSR_AA32_MODE_UND, 4); - post_fault_synchronize(vcpu, loaded); -} - -/* - * Modelled after TakeDataAbortException() and TakePrefetchAbortException - * pseudocode. - */ -static void inject_abt32(struct kvm_vcpu *vcpu, bool is_pabt, - unsigned long addr) -{ - u32 vect_offset; - u32 *far, *fsr; - bool is_lpae; - bool loaded; - - loaded = pre_fault_synchronize(vcpu); - - if (is_pabt) { - vect_offset = 12; - far = &vcpu_cp15(vcpu, c6_IFAR); - fsr = &vcpu_cp15(vcpu, c5_IFSR); - } else { /* !iabt */ - vect_offset = 16; - far = &vcpu_cp15(vcpu, c6_DFAR); - fsr = &vcpu_cp15(vcpu, c5_DFSR); - } - - prepare_fault32(vcpu, PSR_AA32_MODE_ABT, vect_offset); - - *far = addr; - - /* Give the guest an IMPLEMENTATION DEFINED exception */ - is_lpae = (vcpu_cp15(vcpu, c2_TTBCR) >> 31); - if (is_lpae) { - *fsr = DFSR_LPAE | DFSR_FSC_EXTABT_LPAE; - } else { - /* no need to shuffle FS[4] into DFSR[10] as its 0 */ - *fsr = DFSR_FSC_EXTABT_nLPAE; - } - - post_fault_synchronize(vcpu, loaded); -} - -void kvm_inject_dabt32(struct kvm_vcpu *vcpu, unsigned long addr) -{ - inject_abt32(vcpu, false, addr); -} - -void kvm_inject_pabt32(struct kvm_vcpu *vcpu, unsigned long addr) -{ - inject_abt32(vcpu, true, addr); -} diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index c0ffb019ca8b..6e637d2b4cfb 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -19,6 +19,7 @@ #include <linux/kvm_irqfd.h> #include <linux/irqbypass.h> #include <linux/sched/stat.h> +#include <linux/psci.h> #include <trace/events/kvm.h> #define CREATE_TRACE_POINTS @@ -35,7 +36,6 @@ #include <asm/kvm_asm.h> #include <asm/kvm_mmu.h> #include <asm/kvm_emulate.h> -#include <asm/kvm_coproc.h> #include <asm/sections.h> #include <kvm/arm_hypercalls.h> @@ -46,10 +46,14 @@ __asm__(".arch_extension virt"); #endif +static enum kvm_mode kvm_mode = KVM_MODE_DEFAULT; +DEFINE_STATIC_KEY_FALSE(kvm_protected_mode_initialized); + DECLARE_KVM_HYP_PER_CPU(unsigned long, kvm_hyp_vector); static DEFINE_PER_CPU(unsigned long, kvm_arm_hyp_stack_page); unsigned long kvm_arm_hyp_percpu_base[NR_CPUS]; +DECLARE_KVM_NVHE_PER_CPU(struct kvm_nvhe_init_params, kvm_init_params); /* The VMID used in the VTTBR */ static atomic64_t kvm_vmid_gen = ATOMIC64_INIT(1); @@ -61,6 +65,10 @@ static bool vgic_present; static DEFINE_PER_CPU(unsigned char, kvm_arm_hardware_enabled); DEFINE_STATIC_KEY_FALSE(userspace_irqchip_in_use); +extern u64 kvm_nvhe_sym(__cpu_logical_map)[NR_CPUS]; +extern u32 kvm_nvhe_sym(kvm_host_psci_version); +extern struct psci_0_1_function_ids kvm_nvhe_sym(kvm_host_psci_0_1_function_ids); + int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu) { return kvm_vcpu_exiting_guest_mode(vcpu) == IN_GUEST_MODE; @@ -102,7 +110,7 @@ static int kvm_arm_default_max_vcpus(void) return vgic_present ? kvm_vgic_get_max_vcpus() : KVM_MAX_VCPUS; } -static void set_default_csv2(struct kvm *kvm) +static void set_default_spectre(struct kvm *kvm) { /* * The default is to expose CSV2 == 1 if the HW isn't affected. @@ -114,6 +122,8 @@ static void set_default_csv2(struct kvm *kvm) */ if (arm64_get_spectre_v2_state() == SPECTRE_UNAFFECTED) kvm->arch.pfr0_csv2 = 1; + if (arm64_get_meltdown_state() == SPECTRE_UNAFFECTED) + kvm->arch.pfr0_csv3 = 1; } /** @@ -141,7 +151,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) /* The maximum number of VCPUs is limited by the host's GIC model */ kvm->arch.max_vcpus = kvm_arm_default_max_vcpus(); - set_default_csv2(kvm); + set_default_spectre(kvm); return ret; out_free_stage2_pgd: @@ -198,6 +208,8 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_ARM_IRQ_LINE_LAYOUT_2: case KVM_CAP_ARM_NISV_TO_USER: case KVM_CAP_ARM_INJECT_EXT_DABT: + case KVM_CAP_SET_GUEST_DEBUG: + case KVM_CAP_VCPU_ATTRIBUTES: r = 1; break; case KVM_CAP_ARM_SET_DEVICE_ADDR: @@ -229,10 +241,35 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_STEAL_TIME: r = kvm_arm_pvtime_supported(); break; - default: - r = kvm_arch_vm_ioctl_check_extension(kvm, ext); + case KVM_CAP_ARM_EL1_32BIT: + r = cpus_have_const_cap(ARM64_HAS_32BIT_EL1); + break; + case KVM_CAP_GUEST_DEBUG_HW_BPS: + r = get_num_brps(); + break; + case KVM_CAP_GUEST_DEBUG_HW_WPS: + r = get_num_wrps(); + break; + case KVM_CAP_ARM_PMU_V3: + r = kvm_arm_support_pmu_v3(); + break; + case KVM_CAP_ARM_INJECT_SERROR_ESR: + r = cpus_have_const_cap(ARM64_HAS_RAS_EXTN); break; + case KVM_CAP_ARM_VM_IPA_SIZE: + r = get_kvm_ipa_limit(); + break; + case KVM_CAP_ARM_SVE: + r = system_supports_sve(); + break; + case KVM_CAP_ARM_PTRAUTH_ADDRESS: + case KVM_CAP_ARM_PTRAUTH_GENERIC: + r = system_has_full_ptr_auth(); + break; + default: + r = 0; } + return r; } @@ -1311,47 +1348,52 @@ static unsigned long nvhe_percpu_order(void) return size ? get_order(size) : 0; } -static int kvm_map_vectors(void) +/* A lookup table holding the hypervisor VA for each vector slot */ +static void *hyp_spectre_vector_selector[BP_HARDEN_EL2_SLOTS]; + +static int __kvm_vector_slot2idx(enum arm64_hyp_spectre_vector slot) { - /* - * SV2 = ARM64_SPECTRE_V2 - * HEL2 = ARM64_HARDEN_EL2_VECTORS - * - * !SV2 + !HEL2 -> use direct vectors - * SV2 + !HEL2 -> use hardened vectors in place - * !SV2 + HEL2 -> allocate one vector slot and use exec mapping - * SV2 + HEL2 -> use hardened vectors and use exec mapping - */ - if (cpus_have_const_cap(ARM64_SPECTRE_V2)) { - __kvm_bp_vect_base = kvm_ksym_ref(__bp_harden_hyp_vecs); - __kvm_bp_vect_base = kern_hyp_va(__kvm_bp_vect_base); - } + return slot - (slot != HYP_VECTOR_DIRECT); +} - if (cpus_have_const_cap(ARM64_HARDEN_EL2_VECTORS)) { - phys_addr_t vect_pa = __pa_symbol(__bp_harden_hyp_vecs); - unsigned long size = __BP_HARDEN_HYP_VECS_SZ; +static void kvm_init_vector_slot(void *base, enum arm64_hyp_spectre_vector slot) +{ + int idx = __kvm_vector_slot2idx(slot); - /* - * Always allocate a spare vector slot, as we don't - * know yet which CPUs have a BP hardening slot that - * we can reuse. - */ - __kvm_harden_el2_vector_slot = atomic_inc_return(&arm64_el2_vector_last_slot); - BUG_ON(__kvm_harden_el2_vector_slot >= BP_HARDEN_EL2_SLOTS); - return create_hyp_exec_mappings(vect_pa, size, - &__kvm_bp_vect_base); + hyp_spectre_vector_selector[slot] = base + (idx * SZ_2K); +} + +static int kvm_init_vector_slots(void) +{ + int err; + void *base; + + base = kern_hyp_va(kvm_ksym_ref(__kvm_hyp_vector)); + kvm_init_vector_slot(base, HYP_VECTOR_DIRECT); + + base = kern_hyp_va(kvm_ksym_ref(__bp_harden_hyp_vecs)); + kvm_init_vector_slot(base, HYP_VECTOR_SPECTRE_DIRECT); + + if (!cpus_have_const_cap(ARM64_SPECTRE_V3A)) + return 0; + + if (!has_vhe()) { + err = create_hyp_exec_mappings(__pa_symbol(__bp_harden_hyp_vecs), + __BP_HARDEN_HYP_VECS_SZ, &base); + if (err) + return err; } + kvm_init_vector_slot(base, HYP_VECTOR_INDIRECT); + kvm_init_vector_slot(base, HYP_VECTOR_SPECTRE_INDIRECT); return 0; } static void cpu_init_hyp_mode(void) { - phys_addr_t pgd_ptr; - unsigned long hyp_stack_ptr; - unsigned long vector_ptr; - unsigned long tpidr_el2; + struct kvm_nvhe_init_params *params = this_cpu_ptr_nvhe_sym(kvm_init_params); struct arm_smccc_res res; + unsigned long tcr; /* Switch from the HYP stub to our own HYP init vector */ __hyp_set_vectors(kvm_get_idmap_vector()); @@ -1361,13 +1403,38 @@ static void cpu_init_hyp_mode(void) * kernel's mapping to the linear mapping, and store it in tpidr_el2 * so that we can use adr_l to access per-cpu variables in EL2. */ - tpidr_el2 = (unsigned long)this_cpu_ptr_nvhe_sym(__per_cpu_start) - - (unsigned long)kvm_ksym_ref(CHOOSE_NVHE_SYM(__per_cpu_start)); + params->tpidr_el2 = (unsigned long)this_cpu_ptr_nvhe_sym(__per_cpu_start) - + (unsigned long)kvm_ksym_ref(CHOOSE_NVHE_SYM(__per_cpu_start)); + + params->mair_el2 = read_sysreg(mair_el1); + + /* + * The ID map may be configured to use an extended virtual address + * range. This is only the case if system RAM is out of range for the + * currently configured page size and VA_BITS, in which case we will + * also need the extended virtual range for the HYP ID map, or we won't + * be able to enable the EL2 MMU. + * + * However, at EL2, there is only one TTBR register, and we can't switch + * between translation tables *and* update TCR_EL2.T0SZ at the same + * time. Bottom line: we need to use the extended range with *both* our + * translation tables. + * + * So use the same T0SZ value we use for the ID map. + */ + tcr = (read_sysreg(tcr_el1) & TCR_EL2_MASK) | TCR_EL2_RES1; + tcr &= ~TCR_T0SZ_MASK; + tcr |= (idmap_t0sz & GENMASK(TCR_TxSZ_WIDTH - 1, 0)) << TCR_T0SZ_OFFSET; + params->tcr_el2 = tcr; + + params->stack_hyp_va = kern_hyp_va(__this_cpu_read(kvm_arm_hyp_stack_page) + PAGE_SIZE); + params->pgd_pa = kvm_mmu_get_httbr(); - pgd_ptr = kvm_mmu_get_httbr(); - hyp_stack_ptr = __this_cpu_read(kvm_arm_hyp_stack_page) + PAGE_SIZE; - hyp_stack_ptr = kern_hyp_va(hyp_stack_ptr); - vector_ptr = (unsigned long)kern_hyp_va(kvm_ksym_ref(__kvm_hyp_host_vector)); + /* + * Flush the init params from the data cache because the struct will + * be read while the MMU is off. + */ + kvm_flush_dcache_to_poc(params, sizeof(*params)); /* * Call initialization code, and switch to the full blown HYP code. @@ -1376,8 +1443,7 @@ static void cpu_init_hyp_mode(void) * cpus_have_const_cap() wrapper. */ BUG_ON(!system_capabilities_finalized()); - arm_smccc_1_1_hvc(KVM_HOST_SMCCC_FUNC(__kvm_hyp_init), - pgd_ptr, tpidr_el2, hyp_stack_ptr, vector_ptr, &res); + arm_smccc_1_1_hvc(KVM_HOST_SMCCC_FUNC(__kvm_hyp_init), virt_to_phys(params), &res); WARN_ON(res.a0 != SMCCC_RET_SUCCESS); /* @@ -1396,13 +1462,40 @@ static void cpu_hyp_reset(void) __hyp_reset_vectors(); } +/* + * EL2 vectors can be mapped and rerouted in a number of ways, + * depending on the kernel configuration and CPU present: + * + * - If the CPU is affected by Spectre-v2, the hardening sequence is + * placed in one of the vector slots, which is executed before jumping + * to the real vectors. + * + * - If the CPU also has the ARM64_SPECTRE_V3A cap, the slot + * containing the hardening sequence is mapped next to the idmap page, + * and executed before jumping to the real vectors. + * + * - If the CPU only has the ARM64_SPECTRE_V3A cap, then an + * empty slot is selected, mapped next to the idmap page, and + * executed before jumping to the real vectors. + * + * Note that ARM64_SPECTRE_V3A is somewhat incompatible with + * VHE, as we don't have hypervisor-specific mappings. If the system + * is VHE and yet selects this capability, it will be ignored. + */ +static void cpu_set_hyp_vector(void) +{ + struct bp_hardening_data *data = this_cpu_ptr(&bp_hardening_data); + void *vector = hyp_spectre_vector_selector[data->slot]; + + *this_cpu_ptr_hyp_sym(kvm_hyp_vector) = (unsigned long)vector; +} + static void cpu_hyp_reinit(void) { kvm_init_host_cpu_context(&this_cpu_ptr_hyp_sym(kvm_host_data)->host_ctxt); cpu_hyp_reset(); - - *this_cpu_ptr_hyp_sym(kvm_hyp_vector) = (unsigned long)kvm_get_hyp_vector(); + cpu_set_hyp_vector(); if (is_kernel_in_hyp_mode()) kvm_timer_init_vhe(); @@ -1439,7 +1532,8 @@ static void _kvm_arch_hardware_disable(void *discard) void kvm_arch_hardware_disable(void) { - _kvm_arch_hardware_disable(NULL); + if (!is_protected_kvm_enabled()) + _kvm_arch_hardware_disable(NULL); } #ifdef CONFIG_CPU_PM @@ -1482,11 +1576,13 @@ static struct notifier_block hyp_init_cpu_pm_nb = { static void __init hyp_cpu_pm_init(void) { - cpu_pm_register_notifier(&hyp_init_cpu_pm_nb); + if (!is_protected_kvm_enabled()) + cpu_pm_register_notifier(&hyp_init_cpu_pm_nb); } static void __init hyp_cpu_pm_exit(void) { - cpu_pm_unregister_notifier(&hyp_init_cpu_pm_nb); + if (!is_protected_kvm_enabled()) + cpu_pm_unregister_notifier(&hyp_init_cpu_pm_nb); } #else static inline void hyp_cpu_pm_init(void) @@ -1497,6 +1593,36 @@ static inline void hyp_cpu_pm_exit(void) } #endif +static void init_cpu_logical_map(void) +{ + unsigned int cpu; + + /* + * Copy the MPIDR <-> logical CPU ID mapping to hyp. + * Only copy the set of online CPUs whose features have been chacked + * against the finalized system capabilities. The hypervisor will not + * allow any other CPUs from the `possible` set to boot. + */ + for_each_online_cpu(cpu) + kvm_nvhe_sym(__cpu_logical_map)[cpu] = cpu_logical_map(cpu); +} + +static bool init_psci_relay(void) +{ + /* + * If PSCI has not been initialized, protected KVM cannot install + * itself on newly booted CPUs. + */ + if (!psci_ops.get_version) { + kvm_err("Cannot initialize protected mode without PSCI\n"); + return false; + } + + kvm_nvhe_sym(kvm_host_psci_version) = psci_ops.get_version(); + kvm_nvhe_sym(kvm_host_psci_0_1_function_ids) = get_psci_0_1_function_ids(); + return true; +} + static int init_common_resources(void) { return kvm_set_ipa_limit(); @@ -1541,10 +1667,11 @@ static int init_subsystems(void) goto out; kvm_perf_init(); - kvm_coproc_table_init(); + kvm_sys_reg_table_init(); out: - on_each_cpu(_kvm_arch_hardware_disable, NULL, 1); + if (err || !is_protected_kvm_enabled()) + on_each_cpu(_kvm_arch_hardware_disable, NULL, 1); return err; } @@ -1618,6 +1745,14 @@ static int init_hyp_mode(void) goto out_err; } + err = create_hyp_mappings(kvm_ksym_ref(__hyp_data_ro_after_init_start), + kvm_ksym_ref(__hyp_data_ro_after_init_end), + PAGE_HYP_RO); + if (err) { + kvm_err("Cannot map .hyp.data..ro_after_init section\n"); + goto out_err; + } + err = create_hyp_mappings(kvm_ksym_ref(__start_rodata), kvm_ksym_ref(__end_rodata), PAGE_HYP_RO); if (err) { @@ -1632,12 +1767,6 @@ static int init_hyp_mode(void) goto out_err; } - err = kvm_map_vectors(); - if (err) { - kvm_err("Cannot map vectors\n"); - goto out_err; - } - /* * Map the Hyp stack pages */ @@ -1667,6 +1796,13 @@ static int init_hyp_mode(void) } } + if (is_protected_kvm_enabled()) { + init_cpu_logical_map(); + + if (!init_psci_relay()) + goto out_err; + } + return 0; out_err: @@ -1781,14 +1917,24 @@ int kvm_arch_init(void *opaque) goto out_err; } + err = kvm_init_vector_slots(); + if (err) { + kvm_err("Cannot initialise vector slots\n"); + goto out_err; + } + err = init_subsystems(); if (err) goto out_hyp; - if (in_hyp_mode) + if (is_protected_kvm_enabled()) { + static_branch_enable(&kvm_protected_mode_initialized); + kvm_info("Protected nVHE mode initialized successfully\n"); + } else if (in_hyp_mode) { kvm_info("VHE mode initialized successfully\n"); - else + } else { kvm_info("Hyp mode initialized successfully\n"); + } return 0; @@ -1806,6 +1952,25 @@ void kvm_arch_exit(void) kvm_perf_teardown(); } +static int __init early_kvm_mode_cfg(char *arg) +{ + if (!arg) + return -EINVAL; + + if (strcmp(arg, "protected") == 0) { + kvm_mode = KVM_MODE_PROTECTED; + return 0; + } + + return -EINVAL; +} +early_param("kvm-arm.mode", early_kvm_mode_cfg); + +enum kvm_mode kvm_get_mode(void) +{ + return kvm_mode; +} + static int arm_init(void) { int rc = kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE); diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c index dfb5218137ca..9bbd30e62799 100644 --- a/arch/arm64/kvm/guest.c +++ b/arch/arm64/kvm/guest.c @@ -24,7 +24,6 @@ #include <asm/fpsimd.h> #include <asm/kvm.h> #include <asm/kvm_emulate.h> -#include <asm/kvm_coproc.h> #include <asm/sigcontext.h> #include "trace.h" @@ -252,10 +251,32 @@ static int set_core_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) memcpy(addr, valp, KVM_REG_SIZE(reg->id)); if (*vcpu_cpsr(vcpu) & PSR_MODE32_BIT) { - int i; + int i, nr_reg; + + switch (*vcpu_cpsr(vcpu)) { + /* + * Either we are dealing with user mode, and only the + * first 15 registers (+ PC) must be narrowed to 32bit. + * AArch32 r0-r14 conveniently map to AArch64 x0-x14. + */ + case PSR_AA32_MODE_USR: + case PSR_AA32_MODE_SYS: + nr_reg = 15; + break; + + /* + * Otherwide, this is a priviledged mode, and *all* the + * registers must be narrowed to 32bit. + */ + default: + nr_reg = 31; + break; + } + + for (i = 0; i < nr_reg; i++) + vcpu_set_reg(vcpu, i, (u32)vcpu_get_reg(vcpu, i)); - for (i = 0; i < 16; i++) - *vcpu_reg32(vcpu, i) = (u32)*vcpu_reg32(vcpu, i); + *vcpu_pc(vcpu) = (u32)*vcpu_pc(vcpu); } out: return err; diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c index 5d690d60ccad..cebe39f3b1b6 100644 --- a/arch/arm64/kvm/handle_exit.c +++ b/arch/arm64/kvm/handle_exit.c @@ -14,7 +14,6 @@ #include <asm/esr.h> #include <asm/exception.h> #include <asm/kvm_asm.h> -#include <asm/kvm_coproc.h> #include <asm/kvm_emulate.h> #include <asm/kvm_mmu.h> #include <asm/debug-monitors.h> @@ -61,7 +60,7 @@ static int handle_smc(struct kvm_vcpu *vcpu) * otherwise return to the same address... */ vcpu_set_reg(vcpu, 0, ~0UL); - kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu)); + kvm_incr_pc(vcpu); return 1; } @@ -100,7 +99,7 @@ static int kvm_handle_wfx(struct kvm_vcpu *vcpu) kvm_clear_request(KVM_REQ_UNHALT, vcpu); } - kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu)); + kvm_incr_pc(vcpu); return 1; } @@ -221,7 +220,7 @@ static int handle_trap_exceptions(struct kvm_vcpu *vcpu) * that fail their condition code check" */ if (!kvm_condition_valid(vcpu)) { - kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu)); + kvm_incr_pc(vcpu); handled = 1; } else { exit_handle_fn exit_handler; @@ -241,23 +240,6 @@ int handle_exit(struct kvm_vcpu *vcpu, int exception_index) { struct kvm_run *run = vcpu->run; - if (ARM_SERROR_PENDING(exception_index)) { - u8 esr_ec = ESR_ELx_EC(kvm_vcpu_get_esr(vcpu)); - - /* - * HVC/SMC already have an adjusted PC, which we need - * to correct in order to return to after having - * injected the SError. - */ - if (esr_ec == ESR_ELx_EC_HVC32 || esr_ec == ESR_ELx_EC_HVC64 || - esr_ec == ESR_ELx_EC_SMC32 || esr_ec == ESR_ELx_EC_SMC64) { - u32 adj = kvm_vcpu_trap_il_is32bit(vcpu) ? 4 : 2; - *vcpu_pc(vcpu) -= adj; - } - - return 1; - } - exception_index = ARM_EXCEPTION_CODE(exception_index); switch (exception_index) { diff --git a/arch/arm64/kvm/hyp/Makefile b/arch/arm64/kvm/hyp/Makefile index 4a81eddabcd8..687598e41b21 100644 --- a/arch/arm64/kvm/hyp/Makefile +++ b/arch/arm64/kvm/hyp/Makefile @@ -10,4 +10,4 @@ subdir-ccflags-y := -I$(incdir) \ -DDISABLE_BRANCH_PROFILING \ $(DISABLE_STACKLEAK_PLUGIN) -obj-$(CONFIG_KVM) += vhe/ nvhe/ pgtable.o smccc_wa.o +obj-$(CONFIG_KVM) += vhe/ nvhe/ pgtable.o diff --git a/arch/arm64/kvm/hyp/aarch32.c b/arch/arm64/kvm/hyp/aarch32.c index ae56d8a4b382..f98cbe2626a1 100644 --- a/arch/arm64/kvm/hyp/aarch32.c +++ b/arch/arm64/kvm/hyp/aarch32.c @@ -123,13 +123,13 @@ static void kvm_adjust_itstate(struct kvm_vcpu *vcpu) * kvm_skip_instr - skip a trapped instruction and proceed to the next * @vcpu: The vcpu pointer */ -void kvm_skip_instr32(struct kvm_vcpu *vcpu, bool is_wide_instr) +void kvm_skip_instr32(struct kvm_vcpu *vcpu) { u32 pc = *vcpu_pc(vcpu); bool is_thumb; is_thumb = !!(*vcpu_cpsr(vcpu) & PSR_AA32_T_BIT); - if (is_thumb && !is_wide_instr) + if (is_thumb && !kvm_vcpu_trap_il_is32bit(vcpu)) pc += 2; else pc += 4; diff --git a/arch/arm64/kvm/hyp/exception.c b/arch/arm64/kvm/hyp/exception.c new file mode 100644 index 000000000000..73629094f903 --- /dev/null +++ b/arch/arm64/kvm/hyp/exception.c @@ -0,0 +1,331 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Fault injection for both 32 and 64bit guests. + * + * Copyright (C) 2012,2013 - ARM Ltd + * Author: Marc Zyngier <marc.zyngier@arm.com> + * + * Based on arch/arm/kvm/emulate.c + * Copyright (C) 2012 - Virtual Open Systems and Columbia University + * Author: Christoffer Dall <c.dall@virtualopensystems.com> + */ + +#include <hyp/adjust_pc.h> +#include <linux/kvm_host.h> +#include <asm/kvm_emulate.h> + +#if !defined (__KVM_NVHE_HYPERVISOR__) && !defined (__KVM_VHE_HYPERVISOR__) +#error Hypervisor code only! +#endif + +static inline u64 __vcpu_read_sys_reg(const struct kvm_vcpu *vcpu, int reg) +{ + u64 val; + + if (__vcpu_read_sys_reg_from_cpu(reg, &val)) + return val; + + return __vcpu_sys_reg(vcpu, reg); +} + +static inline void __vcpu_write_sys_reg(struct kvm_vcpu *vcpu, u64 val, int reg) +{ + if (__vcpu_write_sys_reg_to_cpu(val, reg)) + return; + + __vcpu_sys_reg(vcpu, reg) = val; +} + +static void __vcpu_write_spsr(struct kvm_vcpu *vcpu, u64 val) +{ + write_sysreg_el1(val, SYS_SPSR); +} + +static void __vcpu_write_spsr_abt(struct kvm_vcpu *vcpu, u64 val) +{ + if (has_vhe()) + write_sysreg(val, spsr_abt); + else + vcpu->arch.ctxt.spsr_abt = val; +} + +static void __vcpu_write_spsr_und(struct kvm_vcpu *vcpu, u64 val) +{ + if (has_vhe()) + write_sysreg(val, spsr_und); + else + vcpu->arch.ctxt.spsr_und = val; +} + +/* + * This performs the exception entry at a given EL (@target_mode), stashing PC + * and PSTATE into ELR and SPSR respectively, and compute the new PC/PSTATE. + * The EL passed to this function *must* be a non-secure, privileged mode with + * bit 0 being set (PSTATE.SP == 1). + * + * When an exception is taken, most PSTATE fields are left unchanged in the + * handler. However, some are explicitly overridden (e.g. M[4:0]). Luckily all + * of the inherited bits have the same position in the AArch64/AArch32 SPSR_ELx + * layouts, so we don't need to shuffle these for exceptions from AArch32 EL0. + * + * For the SPSR_ELx layout for AArch64, see ARM DDI 0487E.a page C5-429. + * For the SPSR_ELx layout for AArch32, see ARM DDI 0487E.a page C5-426. + * + * Here we manipulate the fields in order of the AArch64 SPSR_ELx layout, from + * MSB to LSB. + */ +static void enter_exception64(struct kvm_vcpu *vcpu, unsigned long target_mode, + enum exception_type type) +{ + unsigned long sctlr, vbar, old, new, mode; + u64 exc_offset; + + mode = *vcpu_cpsr(vcpu) & (PSR_MODE_MASK | PSR_MODE32_BIT); + + if (mode == target_mode) + exc_offset = CURRENT_EL_SP_ELx_VECTOR; + else if ((mode | PSR_MODE_THREAD_BIT) == target_mode) + exc_offset = CURRENT_EL_SP_EL0_VECTOR; + else if (!(mode & PSR_MODE32_BIT)) + exc_offset = LOWER_EL_AArch64_VECTOR; + else + exc_offset = LOWER_EL_AArch32_VECTOR; + + switch (target_mode) { + case PSR_MODE_EL1h: + vbar = __vcpu_read_sys_reg(vcpu, VBAR_EL1); + sctlr = __vcpu_read_sys_reg(vcpu, SCTLR_EL1); + __vcpu_write_sys_reg(vcpu, *vcpu_pc(vcpu), ELR_EL1); + break; + default: + /* Don't do that */ + BUG(); + } + + *vcpu_pc(vcpu) = vbar + exc_offset + type; + + old = *vcpu_cpsr(vcpu); + new = 0; + + new |= (old & PSR_N_BIT); + new |= (old & PSR_Z_BIT); + new |= (old & PSR_C_BIT); + new |= (old & PSR_V_BIT); + + // TODO: TCO (if/when ARMv8.5-MemTag is exposed to guests) + + new |= (old & PSR_DIT_BIT); + + // PSTATE.UAO is set to zero upon any exception to AArch64 + // See ARM DDI 0487E.a, page D5-2579. + + // PSTATE.PAN is unchanged unless SCTLR_ELx.SPAN == 0b0 + // SCTLR_ELx.SPAN is RES1 when ARMv8.1-PAN is not implemented + // See ARM DDI 0487E.a, page D5-2578. + new |= (old & PSR_PAN_BIT); + if (!(sctlr & SCTLR_EL1_SPAN)) + new |= PSR_PAN_BIT; + + // PSTATE.SS is set to zero upon any exception to AArch64 + // See ARM DDI 0487E.a, page D2-2452. + + // PSTATE.IL is set to zero upon any exception to AArch64 + // See ARM DDI 0487E.a, page D1-2306. + + // PSTATE.SSBS is set to SCTLR_ELx.DSSBS upon any exception to AArch64 + // See ARM DDI 0487E.a, page D13-3258 + if (sctlr & SCTLR_ELx_DSSBS) + new |= PSR_SSBS_BIT; + + // PSTATE.BTYPE is set to zero upon any exception to AArch64 + // See ARM DDI 0487E.a, pages D1-2293 to D1-2294. + + new |= PSR_D_BIT; + new |= PSR_A_BIT; + new |= PSR_I_BIT; + new |= PSR_F_BIT; + + new |= target_mode; + + *vcpu_cpsr(vcpu) = new; + __vcpu_write_spsr(vcpu, old); +} + +/* + * When an exception is taken, most CPSR fields are left unchanged in the + * handler. However, some are explicitly overridden (e.g. M[4:0]). + * + * The SPSR/SPSR_ELx layouts differ, and the below is intended to work with + * either format. Note: SPSR.J bit doesn't exist in SPSR_ELx, but this bit was + * obsoleted by the ARMv7 virtualization extensions and is RES0. + * + * For the SPSR layout seen from AArch32, see: + * - ARM DDI 0406C.d, page B1-1148 + * - ARM DDI 0487E.a, page G8-6264 + * + * For the SPSR_ELx layout for AArch32 seen from AArch64, see: + * - ARM DDI 0487E.a, page C5-426 + * + * Here we manipulate the fields in order of the AArch32 SPSR_ELx layout, from + * MSB to LSB. + */ +static unsigned long get_except32_cpsr(struct kvm_vcpu *vcpu, u32 mode) +{ + u32 sctlr = __vcpu_read_sys_reg(vcpu, SCTLR_EL1); + unsigned long old, new; + + old = *vcpu_cpsr(vcpu); + new = 0; + + new |= (old & PSR_AA32_N_BIT); + new |= (old & PSR_AA32_Z_BIT); + new |= (old & PSR_AA32_C_BIT); + new |= (old & PSR_AA32_V_BIT); + new |= (old & PSR_AA32_Q_BIT); + + // CPSR.IT[7:0] are set to zero upon any exception + // See ARM DDI 0487E.a, section G1.12.3 + // See ARM DDI 0406C.d, section B1.8.3 + + new |= (old & PSR_AA32_DIT_BIT); + + // CPSR.SSBS is set to SCTLR.DSSBS upon any exception + // See ARM DDI 0487E.a, page G8-6244 + if (sctlr & BIT(31)) + new |= PSR_AA32_SSBS_BIT; + + // CPSR.PAN is unchanged unless SCTLR.SPAN == 0b0 + // SCTLR.SPAN is RES1 when ARMv8.1-PAN is not implemented + // See ARM DDI 0487E.a, page G8-6246 + new |= (old & PSR_AA32_PAN_BIT); + if (!(sctlr & BIT(23))) + new |= PSR_AA32_PAN_BIT; + + // SS does not exist in AArch32, so ignore + + // CPSR.IL is set to zero upon any exception + // See ARM DDI 0487E.a, page G1-5527 + + new |= (old & PSR_AA32_GE_MASK); + + // CPSR.IT[7:0] are set to zero upon any exception + // See prior comment above + + // CPSR.E is set to SCTLR.EE upon any exception + // See ARM DDI 0487E.a, page G8-6245 + // See ARM DDI 0406C.d, page B4-1701 + if (sctlr & BIT(25)) + new |= PSR_AA32_E_BIT; + + // CPSR.A is unchanged upon an exception to Undefined, Supervisor + // CPSR.A is set upon an exception to other modes + // See ARM DDI 0487E.a, pages G1-5515 to G1-5516 + // See ARM DDI 0406C.d, page B1-1182 + new |= (old & PSR_AA32_A_BIT); + if (mode != PSR_AA32_MODE_UND && mode != PSR_AA32_MODE_SVC) + new |= PSR_AA32_A_BIT; + + // CPSR.I is set upon any exception + // See ARM DDI 0487E.a, pages G1-5515 to G1-5516 + // See ARM DDI 0406C.d, page B1-1182 + new |= PSR_AA32_I_BIT; + + // CPSR.F is set upon an exception to FIQ + // CPSR.F is unchanged upon an exception to other modes + // See ARM DDI 0487E.a, pages G1-5515 to G1-5516 + // See ARM DDI 0406C.d, page B1-1182 + new |= (old & PSR_AA32_F_BIT); + if (mode == PSR_AA32_MODE_FIQ) + new |= PSR_AA32_F_BIT; + + // CPSR.T is set to SCTLR.TE upon any exception + // See ARM DDI 0487E.a, page G8-5514 + // See ARM DDI 0406C.d, page B1-1181 + if (sctlr & BIT(30)) + new |= PSR_AA32_T_BIT; + + new |= mode; + + return new; +} + +/* + * Table taken from ARMv8 ARM DDI0487B-B, table G1-10. + */ +static const u8 return_offsets[8][2] = { + [0] = { 0, 0 }, /* Reset, unused */ + [1] = { 4, 2 }, /* Undefined */ + [2] = { 0, 0 }, /* SVC, unused */ + [3] = { 4, 4 }, /* Prefetch abort */ + [4] = { 8, 8 }, /* Data abort */ + [5] = { 0, 0 }, /* HVC, unused */ + [6] = { 4, 4 }, /* IRQ, unused */ + [7] = { 4, 4 }, /* FIQ, unused */ +}; + +static void enter_exception32(struct kvm_vcpu *vcpu, u32 mode, u32 vect_offset) +{ + unsigned long spsr = *vcpu_cpsr(vcpu); + bool is_thumb = (spsr & PSR_AA32_T_BIT); + u32 sctlr = __vcpu_read_sys_reg(vcpu, SCTLR_EL1); + u32 return_address; + + *vcpu_cpsr(vcpu) = get_except32_cpsr(vcpu, mode); + return_address = *vcpu_pc(vcpu); + return_address += return_offsets[vect_offset >> 2][is_thumb]; + + /* KVM only enters the ABT and UND modes, so only deal with those */ + switch(mode) { + case PSR_AA32_MODE_ABT: + __vcpu_write_spsr_abt(vcpu, host_spsr_to_spsr32(spsr)); + vcpu_gp_regs(vcpu)->compat_lr_abt = return_address; + break; + + case PSR_AA32_MODE_UND: + __vcpu_write_spsr_und(vcpu, host_spsr_to_spsr32(spsr)); + vcpu_gp_regs(vcpu)->compat_lr_und = return_address; + break; + } + + /* Branch to exception vector */ + if (sctlr & (1 << 13)) + vect_offset += 0xffff0000; + else /* always have security exceptions */ + vect_offset += __vcpu_read_sys_reg(vcpu, VBAR_EL1); + + *vcpu_pc(vcpu) = vect_offset; +} + +void kvm_inject_exception(struct kvm_vcpu *vcpu) +{ + if (vcpu_el1_is_32bit(vcpu)) { + switch (vcpu->arch.flags & KVM_ARM64_EXCEPT_MASK) { + case KVM_ARM64_EXCEPT_AA32_UND: + enter_exception32(vcpu, PSR_AA32_MODE_UND, 4); + break; + case KVM_ARM64_EXCEPT_AA32_IABT: + enter_exception32(vcpu, PSR_AA32_MODE_ABT, 12); + break; + case KVM_ARM64_EXCEPT_AA32_DABT: + enter_exception32(vcpu, PSR_AA32_MODE_ABT, 16); + break; + default: + /* Err... */ + break; + } + } else { + switch (vcpu->arch.flags & KVM_ARM64_EXCEPT_MASK) { + case (KVM_ARM64_EXCEPT_AA64_ELx_SYNC | + KVM_ARM64_EXCEPT_AA64_EL1): + enter_exception64(vcpu, PSR_MODE_EL1h, except_type_sync); + break; + default: + /* + * Only EL1_SYNC makes sense so far, EL2_{SYNC,IRQ} + * will be implemented at some point. Everything + * else gets silently ignored. + */ + break; + } + } +} diff --git a/arch/arm64/kvm/hyp/hyp-entry.S b/arch/arm64/kvm/hyp/hyp-entry.S index 0a5b36eb54b3..d179056e1af8 100644 --- a/arch/arm64/kvm/hyp/hyp-entry.S +++ b/arch/arm64/kvm/hyp/hyp-entry.S @@ -13,6 +13,7 @@ #include <asm/kvm_arm.h> #include <asm/kvm_asm.h> #include <asm/mmu.h> +#include <asm/spectre.h> .macro save_caller_saved_regs_vect /* x0 and x1 were saved in the vector entry */ @@ -187,52 +188,60 @@ SYM_CODE_START(__kvm_hyp_vector) valid_vect el1_error // Error 32-bit EL1 SYM_CODE_END(__kvm_hyp_vector) -.macro hyp_ventry - .align 7 +.macro spectrev2_smccc_wa1_smc + sub sp, sp, #(8 * 4) + stp x2, x3, [sp, #(8 * 0)] + stp x0, x1, [sp, #(8 * 2)] + mov w0, #ARM_SMCCC_ARCH_WORKAROUND_1 + smc #0 + ldp x2, x3, [sp, #(8 * 0)] + add sp, sp, #(8 * 2) +.endm + +.macro hyp_ventry indirect, spectrev2 + .align 7 1: esb - .rept 26 - nop - .endr -/* - * The default sequence is to directly branch to the KVM vectors, - * using the computed offset. This applies for VHE as well as - * !ARM64_HARDEN_EL2_VECTORS. The first vector must always run the preamble. - * - * For ARM64_HARDEN_EL2_VECTORS configurations, this gets replaced - * with: - * - * stp x0, x1, [sp, #-16]! - * movz x0, #(addr & 0xffff) - * movk x0, #((addr >> 16) & 0xffff), lsl #16 - * movk x0, #((addr >> 32) & 0xffff), lsl #32 - * br x0 - * - * Where: - * addr = kern_hyp_va(__kvm_hyp_vector) + vector-offset + KVM_VECTOR_PREAMBLE. - * See kvm_patch_vector_branch for details. - */ -alternative_cb kvm_patch_vector_branch + .if \spectrev2 != 0 + spectrev2_smccc_wa1_smc + .else stp x0, x1, [sp, #-16]! - b __kvm_hyp_vector + (1b - 0b + KVM_VECTOR_PREAMBLE) + .endif + .if \indirect != 0 + alternative_cb kvm_patch_vector_branch + /* + * For ARM64_SPECTRE_V3A configurations, these NOPs get replaced with: + * + * movz x0, #(addr & 0xffff) + * movk x0, #((addr >> 16) & 0xffff), lsl #16 + * movk x0, #((addr >> 32) & 0xffff), lsl #32 + * br x0 + * + * Where: + * addr = kern_hyp_va(__kvm_hyp_vector) + vector-offset + KVM_VECTOR_PREAMBLE. + * See kvm_patch_vector_branch for details. + */ nop nop nop -alternative_cb_end + nop + alternative_cb_end + .endif + b __kvm_hyp_vector + (1b - 0b + KVM_VECTOR_PREAMBLE) .endm -.macro generate_vectors +.macro generate_vectors indirect, spectrev2 0: .rept 16 - hyp_ventry + hyp_ventry \indirect, \spectrev2 .endr .org 0b + SZ_2K // Safety measure .endm .align 11 SYM_CODE_START(__bp_harden_hyp_vecs) - .rept BP_HARDEN_EL2_SLOTS - generate_vectors - .endr + generate_vectors indirect = 0, spectrev2 = 1 // HYP_VECTOR_SPECTRE_DIRECT + generate_vectors indirect = 1, spectrev2 = 0 // HYP_VECTOR_INDIRECT + generate_vectors indirect = 1, spectrev2 = 1 // HYP_VECTOR_SPECTRE_INDIRECT 1: .org __bp_harden_hyp_vecs + __BP_HARDEN_HYP_VECS_SZ .org 1b SYM_CODE_END(__bp_harden_hyp_vecs) diff --git a/arch/arm64/kvm/hyp/include/hyp/adjust_pc.h b/arch/arm64/kvm/hyp/include/hyp/adjust_pc.h new file mode 100644 index 000000000000..b1f60923a8fe --- /dev/null +++ b/arch/arm64/kvm/hyp/include/hyp/adjust_pc.h @@ -0,0 +1,62 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Guest PC manipulation helpers + * + * Copyright (C) 2012,2013 - ARM Ltd + * Copyright (C) 2020 - Google LLC + * Author: Marc Zyngier <maz@kernel.org> + */ + +#ifndef __ARM64_KVM_HYP_ADJUST_PC_H__ +#define __ARM64_KVM_HYP_ADJUST_PC_H__ + +#include <asm/kvm_emulate.h> +#include <asm/kvm_host.h> + +void kvm_inject_exception(struct kvm_vcpu *vcpu); + +static inline void kvm_skip_instr(struct kvm_vcpu *vcpu) +{ + if (vcpu_mode_is_32bit(vcpu)) { + kvm_skip_instr32(vcpu); + } else { + *vcpu_pc(vcpu) += 4; + *vcpu_cpsr(vcpu) &= ~PSR_BTYPE_MASK; + } + + /* advance the singlestep state machine */ + *vcpu_cpsr(vcpu) &= ~DBG_SPSR_SS; +} + +/* + * Skip an instruction which has been emulated at hyp while most guest sysregs + * are live. + */ +static inline void __kvm_skip_instr(struct kvm_vcpu *vcpu) +{ + *vcpu_pc(vcpu) = read_sysreg_el2(SYS_ELR); + vcpu_gp_regs(vcpu)->pstate = read_sysreg_el2(SYS_SPSR); + + kvm_skip_instr(vcpu); + + write_sysreg_el2(vcpu_gp_regs(vcpu)->pstate, SYS_SPSR); + write_sysreg_el2(*vcpu_pc(vcpu), SYS_ELR); +} + +/* + * Adjust the guest PC on entry, depending on flags provided by EL1 + * for the purpose of emulation (MMIO, sysreg) or exception injection. + */ +static inline void __adjust_pc(struct kvm_vcpu *vcpu) +{ + if (vcpu->arch.flags & KVM_ARM64_PENDING_EXCEPTION) { + kvm_inject_exception(vcpu); + vcpu->arch.flags &= ~(KVM_ARM64_PENDING_EXCEPTION | + KVM_ARM64_EXCEPT_MASK); + } else if (vcpu->arch.flags & KVM_ARM64_INCREMENT_PC) { + kvm_skip_instr(vcpu); + vcpu->arch.flags &= ~KVM_ARM64_INCREMENT_PC; + } +} + +#endif diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h index 1f875a8f20c4..84473574c2e7 100644 --- a/arch/arm64/kvm/hyp/include/hyp/switch.h +++ b/arch/arm64/kvm/hyp/include/hyp/switch.h @@ -7,6 +7,8 @@ #ifndef __ARM64_KVM_HYP_SWITCH_H__ #define __ARM64_KVM_HYP_SWITCH_H__ +#include <hyp/adjust_pc.h> + #include <linux/arm-smccc.h> #include <linux/kvm_host.h> #include <linux/types.h> @@ -409,6 +411,21 @@ static inline bool fixup_guest_exit(struct kvm_vcpu *vcpu, u64 *exit_code) if (ARM_EXCEPTION_CODE(*exit_code) != ARM_EXCEPTION_IRQ) vcpu->arch.fault.esr_el2 = read_sysreg_el2(SYS_ESR); + if (ARM_SERROR_PENDING(*exit_code)) { + u8 esr_ec = kvm_vcpu_trap_get_class(vcpu); + + /* + * HVC already have an adjusted PC, which we need to + * correct in order to return to after having injected + * the SError. + * + * SMC, on the other hand, is *trapped*, meaning its + * preferred return address is the SMC itself. + */ + if (esr_ec == ESR_ELx_EC_HVC32 || esr_ec == ESR_ELx_EC_HVC64) + write_sysreg_el2(read_sysreg_el2(SYS_ELR) - 4, SYS_ELR); + } + /* * We're using the raw exception code in order to only process * the trap if no SError is pending. We will come back to the diff --git a/arch/arm64/kvm/hyp/include/nvhe/trap_handler.h b/arch/arm64/kvm/hyp/include/nvhe/trap_handler.h new file mode 100644 index 000000000000..1e6d995968a1 --- /dev/null +++ b/arch/arm64/kvm/hyp/include/nvhe/trap_handler.h @@ -0,0 +1,18 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Trap handler helpers. + * + * Copyright (C) 2020 - Google LLC + * Author: Marc Zyngier <maz@kernel.org> + */ + +#ifndef __ARM64_KVM_NVHE_TRAP_HANDLER_H__ +#define __ARM64_KVM_NVHE_TRAP_HANDLER_H__ + +#include <asm/kvm_host.h> + +#define cpu_reg(ctxt, r) (ctxt)->regs.regs[r] +#define DECLARE_REG(type, name, ctxt, reg) \ + type name = (type)cpu_reg(ctxt, (reg)) + +#endif /* __ARM64_KVM_NVHE_TRAP_HANDLER_H__ */ diff --git a/arch/arm64/kvm/hyp/nvhe/Makefile b/arch/arm64/kvm/hyp/nvhe/Makefile index ddde15fe85f2..1f1e351c5fe2 100644 --- a/arch/arm64/kvm/hyp/nvhe/Makefile +++ b/arch/arm64/kvm/hyp/nvhe/Makefile @@ -6,9 +6,10 @@ asflags-y := -D__KVM_NVHE_HYPERVISOR__ ccflags-y := -D__KVM_NVHE_HYPERVISOR__ -obj-y := timer-sr.o sysreg-sr.o debug-sr.o switch.o tlb.o hyp-init.o host.o hyp-main.o +obj-y := timer-sr.o sysreg-sr.o debug-sr.o switch.o tlb.o hyp-init.o host.o \ + hyp-main.o hyp-smp.o psci-relay.o obj-y += ../vgic-v3-sr.o ../aarch32.o ../vgic-v2-cpuif-proxy.o ../entry.o \ - ../fpsimd.o ../hyp-entry.o + ../fpsimd.o ../hyp-entry.o ../exception.o ## ## Build rules for compiling nVHE hyp code diff --git a/arch/arm64/kvm/hyp/nvhe/host.S b/arch/arm64/kvm/hyp/nvhe/host.S index ed27f06a31ba..a820dfdc9c25 100644 --- a/arch/arm64/kvm/hyp/nvhe/host.S +++ b/arch/arm64/kvm/hyp/nvhe/host.S @@ -13,8 +13,6 @@ .text SYM_FUNC_START(__host_exit) - stp x0, x1, [sp, #-16]! - get_host_ctxt x0, x1 /* Store the host regs x2 and x3 */ @@ -41,6 +39,7 @@ SYM_FUNC_START(__host_exit) bl handle_trap /* Restore host regs x0-x17 */ +__host_enter_restore_full: ldp x0, x1, [x29, #CPU_XREG_OFFSET(0)] ldp x2, x3, [x29, #CPU_XREG_OFFSET(2)] ldp x4, x5, [x29, #CPU_XREG_OFFSET(4)] @@ -64,6 +63,14 @@ __host_enter_without_restoring: SYM_FUNC_END(__host_exit) /* + * void __noreturn __host_enter(struct kvm_cpu_context *host_ctxt); + */ +SYM_FUNC_START(__host_enter) + mov x29, x0 + b __host_enter_restore_full +SYM_FUNC_END(__host_enter) + +/* * void __noreturn __hyp_do_panic(bool restore_host, u64 spsr, u64 elr, u64 par); */ SYM_FUNC_START(__hyp_do_panic) @@ -99,13 +106,15 @@ SYM_FUNC_END(__hyp_do_panic) mrs x0, esr_el2 lsr x0, x0, #ESR_ELx_EC_SHIFT cmp x0, #ESR_ELx_EC_HVC64 - ldp x0, x1, [sp], #16 b.ne __host_exit + ldp x0, x1, [sp] // Don't fixup the stack yet + /* Check for a stub HVC call */ cmp x0, #HVC_STUB_HCALL_NR b.hs __host_exit + add sp, sp, #16 /* * Compute the idmap address of __kvm_handle_stub_hvc and * jump there. Since we use kimage_voffset, do not use the @@ -115,10 +124,7 @@ SYM_FUNC_END(__hyp_do_panic) * Preserve x0-x4, which may contain stub parameters. */ ldr x5, =__kvm_handle_stub_hvc - ldr_l x6, kimage_voffset - - /* x5 = __pa(x5) */ - sub x5, x5, x6 + kimg_pa x5, x6 br x5 .L__vect_end\@: .if ((.L__vect_end\@ - .L__vect_start\@) > 0x80) @@ -183,3 +189,41 @@ SYM_CODE_START(__kvm_hyp_host_vector) invalid_host_el1_vect // FIQ 32-bit EL1 invalid_host_el1_vect // Error 32-bit EL1 SYM_CODE_END(__kvm_hyp_host_vector) + +/* + * Forward SMC with arguments in struct kvm_cpu_context, and + * store the result into the same struct. Assumes SMCCC 1.2 or older. + * + * x0: struct kvm_cpu_context* + */ +SYM_CODE_START(__kvm_hyp_host_forward_smc) + /* + * Use x18 to keep the pointer to the host context because + * x18 is callee-saved in SMCCC but not in AAPCS64. + */ + mov x18, x0 + + ldp x0, x1, [x18, #CPU_XREG_OFFSET(0)] + ldp x2, x3, [x18, #CPU_XREG_OFFSET(2)] + ldp x4, x5, [x18, #CPU_XREG_OFFSET(4)] + ldp x6, x7, [x18, #CPU_XREG_OFFSET(6)] + ldp x8, x9, [x18, #CPU_XREG_OFFSET(8)] + ldp x10, x11, [x18, #CPU_XREG_OFFSET(10)] + ldp x12, x13, [x18, #CPU_XREG_OFFSET(12)] + ldp x14, x15, [x18, #CPU_XREG_OFFSET(14)] + ldp x16, x17, [x18, #CPU_XREG_OFFSET(16)] + + smc #0 + + stp x0, x1, [x18, #CPU_XREG_OFFSET(0)] + stp x2, x3, [x18, #CPU_XREG_OFFSET(2)] + stp x4, x5, [x18, #CPU_XREG_OFFSET(4)] + stp x6, x7, [x18, #CPU_XREG_OFFSET(6)] + stp x8, x9, [x18, #CPU_XREG_OFFSET(8)] + stp x10, x11, [x18, #CPU_XREG_OFFSET(10)] + stp x12, x13, [x18, #CPU_XREG_OFFSET(12)] + stp x14, x15, [x18, #CPU_XREG_OFFSET(14)] + stp x16, x17, [x18, #CPU_XREG_OFFSET(16)] + + ret +SYM_CODE_END(__kvm_hyp_host_forward_smc) diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-init.S b/arch/arm64/kvm/hyp/nvhe/hyp-init.S index b11a9d7db677..31b060a44045 100644 --- a/arch/arm64/kvm/hyp/nvhe/hyp-init.S +++ b/arch/arm64/kvm/hyp/nvhe/hyp-init.S @@ -9,6 +9,7 @@ #include <asm/alternative.h> #include <asm/assembler.h> +#include <asm/el2_setup.h> #include <asm/kvm_arm.h> #include <asm/kvm_asm.h> #include <asm/kvm_mmu.h> @@ -47,10 +48,7 @@ __invalid: /* * x0: SMCCC function ID - * x1: HYP pgd - * x2: per-CPU offset - * x3: HYP stack - * x4: HYP vectors + * x1: struct kvm_nvhe_init_params PA */ __do_hyp_init: /* Check for a stub HVC call */ @@ -71,48 +69,53 @@ __do_hyp_init: mov x0, #SMCCC_RET_NOT_SUPPORTED eret -1: - /* Set tpidr_el2 for use by HYP to free a register */ - msr tpidr_el2, x2 +1: mov x0, x1 + mov x4, lr + bl ___kvm_hyp_init + mov lr, x4 - phys_to_ttbr x0, x1 -alternative_if ARM64_HAS_CNP - orr x0, x0, #TTBR_CNP_BIT + /* Hello, World! */ + mov x0, #SMCCC_RET_SUCCESS + eret +SYM_CODE_END(__kvm_hyp_init) + +/* + * Initialize the hypervisor in EL2. + * + * Only uses x0..x3 so as to not clobber callee-saved SMCCC registers + * and leave x4 for the caller. + * + * x0: struct kvm_nvhe_init_params PA + */ +SYM_CODE_START_LOCAL(___kvm_hyp_init) +alternative_if ARM64_KVM_PROTECTED_MODE + mov_q x1, HCR_HOST_NVHE_PROTECTED_FLAGS + msr hcr_el2, x1 alternative_else_nop_endif - msr ttbr0_el2, x0 - mrs x0, tcr_el1 - mov_q x1, TCR_EL2_MASK - and x0, x0, x1 - mov x1, #TCR_EL2_RES1 - orr x0, x0, x1 + ldr x1, [x0, #NVHE_INIT_TPIDR_EL2] + msr tpidr_el2, x1 - /* - * The ID map may be configured to use an extended virtual address - * range. This is only the case if system RAM is out of range for the - * currently configured page size and VA_BITS, in which case we will - * also need the extended virtual range for the HYP ID map, or we won't - * be able to enable the EL2 MMU. - * - * However, at EL2, there is only one TTBR register, and we can't switch - * between translation tables *and* update TCR_EL2.T0SZ at the same - * time. Bottom line: we need to use the extended range with *both* our - * translation tables. - * - * So use the same T0SZ value we use for the ID map. - */ - ldr_l x1, idmap_t0sz - bfi x0, x1, TCR_T0SZ_OFFSET, TCR_TxSZ_WIDTH + ldr x1, [x0, #NVHE_INIT_STACK_HYP_VA] + mov sp, x1 + + ldr x1, [x0, #NVHE_INIT_MAIR_EL2] + msr mair_el2, x1 + + ldr x1, [x0, #NVHE_INIT_PGD_PA] + phys_to_ttbr x2, x1 +alternative_if ARM64_HAS_CNP + orr x2, x2, #TTBR_CNP_BIT +alternative_else_nop_endif + msr ttbr0_el2, x2 /* * Set the PS bits in TCR_EL2. */ - tcr_compute_pa_size x0, #TCR_EL2_PS_SHIFT, x1, x2 + ldr x1, [x0, #NVHE_INIT_TCR_EL2] + tcr_compute_pa_size x1, #TCR_EL2_PS_SHIFT, x2, x3 + msr tcr_el2, x1 - msr tcr_el2, x0 - - mrs x0, mair_el1 - msr mair_el2, x0 isb /* Invalidate the stale TLBs from Bootloader */ @@ -134,14 +137,70 @@ alternative_else_nop_endif msr sctlr_el2, x0 isb - /* Set the stack and new vectors */ - mov sp, x3 - msr vbar_el2, x4 + /* Set the host vector */ + ldr x0, =__kvm_hyp_host_vector + kimg_hyp_va x0, x1 + msr vbar_el2, x0 - /* Hello, World! */ - mov x0, #SMCCC_RET_SUCCESS - eret -SYM_CODE_END(__kvm_hyp_init) + ret +SYM_CODE_END(___kvm_hyp_init) + +/* + * PSCI CPU_ON entry point + * + * x0: struct kvm_nvhe_init_params PA + */ +SYM_CODE_START(kvm_hyp_cpu_entry) + mov x1, #1 // is_cpu_on = true + b __kvm_hyp_init_cpu +SYM_CODE_END(kvm_hyp_cpu_entry) + +/* + * PSCI CPU_SUSPEND / SYSTEM_SUSPEND entry point + * + * x0: struct kvm_nvhe_init_params PA + */ +SYM_CODE_START(kvm_hyp_cpu_resume) + mov x1, #0 // is_cpu_on = false + b __kvm_hyp_init_cpu +SYM_CODE_END(kvm_hyp_cpu_resume) + +/* + * Common code for CPU entry points. Initializes EL2 state and + * installs the hypervisor before handing over to a C handler. + * + * x0: struct kvm_nvhe_init_params PA + * x1: bool is_cpu_on + */ +SYM_CODE_START_LOCAL(__kvm_hyp_init_cpu) + mov x28, x0 // Stash arguments + mov x29, x1 + + /* Check that the core was booted in EL2. */ + mrs x0, CurrentEL + cmp x0, #CurrentEL_EL2 + b.eq 2f + + /* The core booted in EL1. KVM cannot be initialized on it. */ +1: wfe + wfi + b 1b + +2: msr SPsel, #1 // We want to use SP_EL{1,2} + + /* Initialize EL2 CPU state to sane values. */ + init_el2_state nvhe // Clobbers x0..x2 + + /* Enable MMU, set vectors and stack. */ + mov x0, x28 + bl ___kvm_hyp_init // Clobbers x0..x3 + + /* Leave idmap. */ + mov x0, x29 + ldr x1, =kvm_host_psci_cpu_entry + kimg_hyp_va x1, x2 + br x1 +SYM_CODE_END(__kvm_hyp_init_cpu) SYM_CODE_START(__kvm_handle_stub_hvc) cmp x0, #HVC_SOFT_RESTART @@ -176,6 +235,11 @@ reset: msr sctlr_el2, x5 isb +alternative_if ARM64_KVM_PROTECTED_MODE + mov_q x5, HCR_HOST_NVHE_FLAGS + msr hcr_el2, x5 +alternative_else_nop_endif + /* Install stub vectors */ adr_l x5, __hyp_stub_vectors msr vbar_el2, x5 diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-main.c b/arch/arm64/kvm/hyp/nvhe/hyp-main.c index e2eafe2c93af..bde658d51404 100644 --- a/arch/arm64/kvm/hyp/nvhe/hyp-main.c +++ b/arch/arm64/kvm/hyp/nvhe/hyp-main.c @@ -12,106 +12,183 @@ #include <asm/kvm_hyp.h> #include <asm/kvm_mmu.h> -#include <kvm/arm_hypercalls.h> +#include <nvhe/trap_handler.h> -static void handle_host_hcall(unsigned long func_id, - struct kvm_cpu_context *host_ctxt) +DEFINE_PER_CPU(struct kvm_nvhe_init_params, kvm_init_params); + +void __kvm_hyp_host_forward_smc(struct kvm_cpu_context *host_ctxt); + +static void handle___kvm_vcpu_run(struct kvm_cpu_context *host_ctxt) { - unsigned long ret = 0; + DECLARE_REG(struct kvm_vcpu *, vcpu, host_ctxt, 1); - switch (func_id) { - case KVM_HOST_SMCCC_FUNC(__kvm_vcpu_run): { - unsigned long r1 = host_ctxt->regs.regs[1]; - struct kvm_vcpu *vcpu = (struct kvm_vcpu *)r1; + cpu_reg(host_ctxt, 1) = __kvm_vcpu_run(kern_hyp_va(vcpu)); +} - ret = __kvm_vcpu_run(kern_hyp_va(vcpu)); - break; - } - case KVM_HOST_SMCCC_FUNC(__kvm_flush_vm_context): - __kvm_flush_vm_context(); - break; - case KVM_HOST_SMCCC_FUNC(__kvm_tlb_flush_vmid_ipa): { - unsigned long r1 = host_ctxt->regs.regs[1]; - struct kvm_s2_mmu *mmu = (struct kvm_s2_mmu *)r1; - phys_addr_t ipa = host_ctxt->regs.regs[2]; - int level = host_ctxt->regs.regs[3]; +static void handle___kvm_flush_vm_context(struct kvm_cpu_context *host_ctxt) +{ + __kvm_flush_vm_context(); +} - __kvm_tlb_flush_vmid_ipa(kern_hyp_va(mmu), ipa, level); - break; - } - case KVM_HOST_SMCCC_FUNC(__kvm_tlb_flush_vmid): { - unsigned long r1 = host_ctxt->regs.regs[1]; - struct kvm_s2_mmu *mmu = (struct kvm_s2_mmu *)r1; +static void handle___kvm_tlb_flush_vmid_ipa(struct kvm_cpu_context *host_ctxt) +{ + DECLARE_REG(struct kvm_s2_mmu *, mmu, host_ctxt, 1); + DECLARE_REG(phys_addr_t, ipa, host_ctxt, 2); + DECLARE_REG(int, level, host_ctxt, 3); - __kvm_tlb_flush_vmid(kern_hyp_va(mmu)); - break; - } - case KVM_HOST_SMCCC_FUNC(__kvm_tlb_flush_local_vmid): { - unsigned long r1 = host_ctxt->regs.regs[1]; - struct kvm_s2_mmu *mmu = (struct kvm_s2_mmu *)r1; + __kvm_tlb_flush_vmid_ipa(kern_hyp_va(mmu), ipa, level); +} - __kvm_tlb_flush_local_vmid(kern_hyp_va(mmu)); - break; - } - case KVM_HOST_SMCCC_FUNC(__kvm_timer_set_cntvoff): { - u64 cntvoff = host_ctxt->regs.regs[1]; +static void handle___kvm_tlb_flush_vmid(struct kvm_cpu_context *host_ctxt) +{ + DECLARE_REG(struct kvm_s2_mmu *, mmu, host_ctxt, 1); - __kvm_timer_set_cntvoff(cntvoff); - break; - } - case KVM_HOST_SMCCC_FUNC(__kvm_enable_ssbs): - __kvm_enable_ssbs(); - break; - case KVM_HOST_SMCCC_FUNC(__vgic_v3_get_ich_vtr_el2): - ret = __vgic_v3_get_ich_vtr_el2(); - break; - case KVM_HOST_SMCCC_FUNC(__vgic_v3_read_vmcr): - ret = __vgic_v3_read_vmcr(); - break; - case KVM_HOST_SMCCC_FUNC(__vgic_v3_write_vmcr): { - u32 vmcr = host_ctxt->regs.regs[1]; + __kvm_tlb_flush_vmid(kern_hyp_va(mmu)); +} - __vgic_v3_write_vmcr(vmcr); - break; - } - case KVM_HOST_SMCCC_FUNC(__vgic_v3_init_lrs): - __vgic_v3_init_lrs(); - break; - case KVM_HOST_SMCCC_FUNC(__kvm_get_mdcr_el2): - ret = __kvm_get_mdcr_el2(); - break; - case KVM_HOST_SMCCC_FUNC(__vgic_v3_save_aprs): { - unsigned long r1 = host_ctxt->regs.regs[1]; - struct vgic_v3_cpu_if *cpu_if = (struct vgic_v3_cpu_if *)r1; +static void handle___kvm_tlb_flush_local_vmid(struct kvm_cpu_context *host_ctxt) +{ + DECLARE_REG(struct kvm_s2_mmu *, mmu, host_ctxt, 1); - __vgic_v3_save_aprs(kern_hyp_va(cpu_if)); - break; - } - case KVM_HOST_SMCCC_FUNC(__vgic_v3_restore_aprs): { - unsigned long r1 = host_ctxt->regs.regs[1]; - struct vgic_v3_cpu_if *cpu_if = (struct vgic_v3_cpu_if *)r1; + __kvm_tlb_flush_local_vmid(kern_hyp_va(mmu)); +} - __vgic_v3_restore_aprs(kern_hyp_va(cpu_if)); - break; - } - default: - /* Invalid host HVC. */ - host_ctxt->regs.regs[0] = SMCCC_RET_NOT_SUPPORTED; - return; - } +static void handle___kvm_timer_set_cntvoff(struct kvm_cpu_context *host_ctxt) +{ + __kvm_timer_set_cntvoff(cpu_reg(host_ctxt, 1)); +} + +static void handle___kvm_enable_ssbs(struct kvm_cpu_context *host_ctxt) +{ + u64 tmp; - host_ctxt->regs.regs[0] = SMCCC_RET_SUCCESS; - host_ctxt->regs.regs[1] = ret; + tmp = read_sysreg_el2(SYS_SCTLR); + tmp |= SCTLR_ELx_DSSBS; + write_sysreg_el2(tmp, SYS_SCTLR); +} + +static void handle___vgic_v3_get_ich_vtr_el2(struct kvm_cpu_context *host_ctxt) +{ + cpu_reg(host_ctxt, 1) = __vgic_v3_get_ich_vtr_el2(); +} + +static void handle___vgic_v3_read_vmcr(struct kvm_cpu_context *host_ctxt) +{ + cpu_reg(host_ctxt, 1) = __vgic_v3_read_vmcr(); +} + +static void handle___vgic_v3_write_vmcr(struct kvm_cpu_context *host_ctxt) +{ + __vgic_v3_write_vmcr(cpu_reg(host_ctxt, 1)); +} + +static void handle___vgic_v3_init_lrs(struct kvm_cpu_context *host_ctxt) +{ + __vgic_v3_init_lrs(); +} + +static void handle___kvm_get_mdcr_el2(struct kvm_cpu_context *host_ctxt) +{ + cpu_reg(host_ctxt, 1) = __kvm_get_mdcr_el2(); +} + +static void handle___vgic_v3_save_aprs(struct kvm_cpu_context *host_ctxt) +{ + DECLARE_REG(struct vgic_v3_cpu_if *, cpu_if, host_ctxt, 1); + + __vgic_v3_save_aprs(kern_hyp_va(cpu_if)); +} + +static void handle___vgic_v3_restore_aprs(struct kvm_cpu_context *host_ctxt) +{ + DECLARE_REG(struct vgic_v3_cpu_if *, cpu_if, host_ctxt, 1); + + __vgic_v3_restore_aprs(kern_hyp_va(cpu_if)); +} + +typedef void (*hcall_t)(struct kvm_cpu_context *); + +#define HANDLE_FUNC(x) [__KVM_HOST_SMCCC_FUNC_##x] = kimg_fn_ptr(handle_##x) + +static const hcall_t *host_hcall[] = { + HANDLE_FUNC(__kvm_vcpu_run), + HANDLE_FUNC(__kvm_flush_vm_context), + HANDLE_FUNC(__kvm_tlb_flush_vmid_ipa), + HANDLE_FUNC(__kvm_tlb_flush_vmid), + HANDLE_FUNC(__kvm_tlb_flush_local_vmid), + HANDLE_FUNC(__kvm_timer_set_cntvoff), + HANDLE_FUNC(__kvm_enable_ssbs), + HANDLE_FUNC(__vgic_v3_get_ich_vtr_el2), + HANDLE_FUNC(__vgic_v3_read_vmcr), + HANDLE_FUNC(__vgic_v3_write_vmcr), + HANDLE_FUNC(__vgic_v3_init_lrs), + HANDLE_FUNC(__kvm_get_mdcr_el2), + HANDLE_FUNC(__vgic_v3_save_aprs), + HANDLE_FUNC(__vgic_v3_restore_aprs), +}; + +static void handle_host_hcall(struct kvm_cpu_context *host_ctxt) +{ + DECLARE_REG(unsigned long, id, host_ctxt, 0); + const hcall_t *kfn; + hcall_t hfn; + + id -= KVM_HOST_SMCCC_ID(0); + + if (unlikely(id >= ARRAY_SIZE(host_hcall))) + goto inval; + + kfn = host_hcall[id]; + if (unlikely(!kfn)) + goto inval; + + cpu_reg(host_ctxt, 0) = SMCCC_RET_SUCCESS; + + hfn = kimg_fn_hyp_va(kfn); + hfn(host_ctxt); + + return; +inval: + cpu_reg(host_ctxt, 0) = SMCCC_RET_NOT_SUPPORTED; +} + +static void default_host_smc_handler(struct kvm_cpu_context *host_ctxt) +{ + __kvm_hyp_host_forward_smc(host_ctxt); +} + +static void skip_host_instruction(void) +{ + write_sysreg_el2(read_sysreg_el2(SYS_ELR) + 4, SYS_ELR); +} + +static void handle_host_smc(struct kvm_cpu_context *host_ctxt) +{ + bool handled; + + handled = kvm_host_psci_handler(host_ctxt); + if (!handled) + default_host_smc_handler(host_ctxt); + + /* + * Unlike HVC, the return address of an SMC is the instruction's PC. + * Move the return address past the instruction. + */ + skip_host_instruction(); } void handle_trap(struct kvm_cpu_context *host_ctxt) { u64 esr = read_sysreg_el2(SYS_ESR); - unsigned long func_id; - if (ESR_ELx_EC(esr) != ESR_ELx_EC_HVC64) + switch (ESR_ELx_EC(esr)) { + case ESR_ELx_EC_HVC64: + handle_host_hcall(host_ctxt); + break; + case ESR_ELx_EC_SMC64: + handle_host_smc(host_ctxt); + break; + default: hyp_panic(); - - func_id = host_ctxt->regs.regs[0]; - handle_host_hcall(func_id, host_ctxt); + } } diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-smp.c b/arch/arm64/kvm/hyp/nvhe/hyp-smp.c new file mode 100644 index 000000000000..cbab0c6246e2 --- /dev/null +++ b/arch/arm64/kvm/hyp/nvhe/hyp-smp.c @@ -0,0 +1,40 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2020 - Google LLC + * Author: David Brazdil <dbrazdil@google.com> + */ + +#include <asm/kvm_asm.h> +#include <asm/kvm_hyp.h> +#include <asm/kvm_mmu.h> + +/* + * nVHE copy of data structures tracking available CPU cores. + * Only entries for CPUs that were online at KVM init are populated. + * Other CPUs should not be allowed to boot because their features were + * not checked against the finalized system capabilities. + */ +u64 __ro_after_init __cpu_logical_map[NR_CPUS] = { [0 ... NR_CPUS-1] = INVALID_HWID }; + +u64 cpu_logical_map(unsigned int cpu) +{ + if (cpu >= ARRAY_SIZE(__cpu_logical_map)) + hyp_panic(); + + return __cpu_logical_map[cpu]; +} + +unsigned long __hyp_per_cpu_offset(unsigned int cpu) +{ + unsigned long *cpu_base_array; + unsigned long this_cpu_base; + unsigned long elf_base; + + if (cpu >= ARRAY_SIZE(kvm_arm_hyp_percpu_base)) + hyp_panic(); + + cpu_base_array = (unsigned long *)hyp_symbol_addr(kvm_arm_hyp_percpu_base); + this_cpu_base = kern_hyp_va(cpu_base_array[cpu]); + elf_base = (unsigned long)hyp_symbol_addr(__per_cpu_start); + return this_cpu_base - elf_base; +} diff --git a/arch/arm64/kvm/hyp/nvhe/hyp.lds.S b/arch/arm64/kvm/hyp/nvhe/hyp.lds.S index bb2d986ff696..5d76ff2ba63e 100644 --- a/arch/arm64/kvm/hyp/nvhe/hyp.lds.S +++ b/arch/arm64/kvm/hyp/nvhe/hyp.lds.S @@ -16,4 +16,5 @@ SECTIONS { HYP_SECTION_NAME(.data..percpu) : { PERCPU_INPUT(L1_CACHE_BYTES) } + HYP_SECTION(.data..ro_after_init) } diff --git a/arch/arm64/kvm/hyp/nvhe/psci-relay.c b/arch/arm64/kvm/hyp/nvhe/psci-relay.c new file mode 100644 index 000000000000..08dc9de69314 --- /dev/null +++ b/arch/arm64/kvm/hyp/nvhe/psci-relay.c @@ -0,0 +1,324 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2020 - Google LLC + * Author: David Brazdil <dbrazdil@google.com> + */ + +#include <asm/kvm_asm.h> +#include <asm/kvm_hyp.h> +#include <asm/kvm_mmu.h> +#include <kvm/arm_hypercalls.h> +#include <linux/arm-smccc.h> +#include <linux/kvm_host.h> +#include <linux/psci.h> +#include <kvm/arm_psci.h> +#include <uapi/linux/psci.h> + +#include <nvhe/trap_handler.h> + +void kvm_hyp_cpu_entry(unsigned long r0); +void kvm_hyp_cpu_resume(unsigned long r0); + +void __noreturn __host_enter(struct kvm_cpu_context *host_ctxt); + +/* Config options set by the host. */ +__ro_after_init u32 kvm_host_psci_version; +__ro_after_init struct psci_0_1_function_ids kvm_host_psci_0_1_function_ids; +__ro_after_init s64 hyp_physvirt_offset; + +#define __hyp_pa(x) ((phys_addr_t)((x)) + hyp_physvirt_offset) + +#define INVALID_CPU_ID UINT_MAX + +struct psci_boot_args { + atomic_t lock; + unsigned long pc; + unsigned long r0; +}; + +#define PSCI_BOOT_ARGS_UNLOCKED 0 +#define PSCI_BOOT_ARGS_LOCKED 1 + +#define PSCI_BOOT_ARGS_INIT \ + ((struct psci_boot_args){ \ + .lock = ATOMIC_INIT(PSCI_BOOT_ARGS_UNLOCKED), \ + }) + +static DEFINE_PER_CPU(struct psci_boot_args, cpu_on_args) = PSCI_BOOT_ARGS_INIT; +static DEFINE_PER_CPU(struct psci_boot_args, suspend_args) = PSCI_BOOT_ARGS_INIT; + +static u64 get_psci_func_id(struct kvm_cpu_context *host_ctxt) +{ + DECLARE_REG(u64, func_id, host_ctxt, 0); + + return func_id; +} + +static bool is_psci_0_1_call(u64 func_id) +{ + return (func_id == kvm_host_psci_0_1_function_ids.cpu_suspend) || + (func_id == kvm_host_psci_0_1_function_ids.cpu_on) || + (func_id == kvm_host_psci_0_1_function_ids.cpu_off) || + (func_id == kvm_host_psci_0_1_function_ids.migrate); +} + +static bool is_psci_0_2_call(u64 func_id) +{ + /* SMCCC reserves IDs 0x00-1F with the given 32/64-bit base for PSCI. */ + return (PSCI_0_2_FN(0) <= func_id && func_id <= PSCI_0_2_FN(31)) || + (PSCI_0_2_FN64(0) <= func_id && func_id <= PSCI_0_2_FN64(31)); +} + +static bool is_psci_call(u64 func_id) +{ + switch (kvm_host_psci_version) { + case PSCI_VERSION(0, 1): + return is_psci_0_1_call(func_id); + default: + return is_psci_0_2_call(func_id); + } +} + +static unsigned long psci_call(unsigned long fn, unsigned long arg0, + unsigned long arg1, unsigned long arg2) +{ + struct arm_smccc_res res; + + arm_smccc_1_1_smc(fn, arg0, arg1, arg2, &res); + return res.a0; +} + +static unsigned long psci_forward(struct kvm_cpu_context *host_ctxt) +{ + return psci_call(cpu_reg(host_ctxt, 0), cpu_reg(host_ctxt, 1), + cpu_reg(host_ctxt, 2), cpu_reg(host_ctxt, 3)); +} + +static __noreturn unsigned long psci_forward_noreturn(struct kvm_cpu_context *host_ctxt) +{ + psci_forward(host_ctxt); + hyp_panic(); /* unreachable */ +} + +static unsigned int find_cpu_id(u64 mpidr) +{ + unsigned int i; + + /* Reject invalid MPIDRs */ + if (mpidr & ~MPIDR_HWID_BITMASK) + return INVALID_CPU_ID; + + for (i = 0; i < NR_CPUS; i++) { + if (cpu_logical_map(i) == mpidr) + return i; + } + + return INVALID_CPU_ID; +} + +static __always_inline bool try_acquire_boot_args(struct psci_boot_args *args) +{ + return atomic_cmpxchg_acquire(&args->lock, + PSCI_BOOT_ARGS_UNLOCKED, + PSCI_BOOT_ARGS_LOCKED) == + PSCI_BOOT_ARGS_UNLOCKED; +} + +static __always_inline void release_boot_args(struct psci_boot_args *args) +{ + atomic_set_release(&args->lock, PSCI_BOOT_ARGS_UNLOCKED); +} + +static int psci_cpu_on(u64 func_id, struct kvm_cpu_context *host_ctxt) +{ + DECLARE_REG(u64, mpidr, host_ctxt, 1); + DECLARE_REG(unsigned long, pc, host_ctxt, 2); + DECLARE_REG(unsigned long, r0, host_ctxt, 3); + + unsigned int cpu_id; + struct psci_boot_args *boot_args; + struct kvm_nvhe_init_params *init_params; + int ret; + + /* + * Find the logical CPU ID for the given MPIDR. The search set is + * the set of CPUs that were online at the point of KVM initialization. + * Booting other CPUs is rejected because their cpufeatures were not + * checked against the finalized capabilities. This could be relaxed + * by doing the feature checks in hyp. + */ + cpu_id = find_cpu_id(mpidr); + if (cpu_id == INVALID_CPU_ID) + return PSCI_RET_INVALID_PARAMS; + + boot_args = per_cpu_ptr(hyp_symbol_addr(cpu_on_args), cpu_id); + init_params = per_cpu_ptr(hyp_symbol_addr(kvm_init_params), cpu_id); + + /* Check if the target CPU is already being booted. */ + if (!try_acquire_boot_args(boot_args)) + return PSCI_RET_ALREADY_ON; + + boot_args->pc = pc; + boot_args->r0 = r0; + wmb(); + + ret = psci_call(func_id, mpidr, + __hyp_pa(hyp_symbol_addr(kvm_hyp_cpu_entry)), + __hyp_pa(init_params)); + + /* If successful, the lock will be released by the target CPU. */ + if (ret != PSCI_RET_SUCCESS) + release_boot_args(boot_args); + + return ret; +} + +static int psci_cpu_suspend(u64 func_id, struct kvm_cpu_context *host_ctxt) +{ + DECLARE_REG(u64, power_state, host_ctxt, 1); + DECLARE_REG(unsigned long, pc, host_ctxt, 2); + DECLARE_REG(unsigned long, r0, host_ctxt, 3); + + struct psci_boot_args *boot_args; + struct kvm_nvhe_init_params *init_params; + + boot_args = this_cpu_ptr(hyp_symbol_addr(suspend_args)); + init_params = this_cpu_ptr(hyp_symbol_addr(kvm_init_params)); + + /* + * No need to acquire a lock before writing to boot_args because a core + * can only suspend itself. Racy CPU_ON calls use a separate struct. + */ + boot_args->pc = pc; + boot_args->r0 = r0; + + /* + * Will either return if shallow sleep state, or wake up into the entry + * point if it is a deep sleep state. + */ + return psci_call(func_id, power_state, + __hyp_pa(hyp_symbol_addr(kvm_hyp_cpu_resume)), + __hyp_pa(init_params)); +} + +static int psci_system_suspend(u64 func_id, struct kvm_cpu_context *host_ctxt) +{ + DECLARE_REG(unsigned long, pc, host_ctxt, 1); + DECLARE_REG(unsigned long, r0, host_ctxt, 2); + + struct psci_boot_args *boot_args; + struct kvm_nvhe_init_params *init_params; + + boot_args = this_cpu_ptr(hyp_symbol_addr(suspend_args)); + init_params = this_cpu_ptr(hyp_symbol_addr(kvm_init_params)); + + /* + * No need to acquire a lock before writing to boot_args because a core + * can only suspend itself. Racy CPU_ON calls use a separate struct. + */ + boot_args->pc = pc; + boot_args->r0 = r0; + + /* Will only return on error. */ + return psci_call(func_id, + __hyp_pa(hyp_symbol_addr(kvm_hyp_cpu_resume)), + __hyp_pa(init_params), 0); +} + +asmlinkage void __noreturn kvm_host_psci_cpu_entry(bool is_cpu_on) +{ + struct psci_boot_args *boot_args; + struct kvm_cpu_context *host_ctxt; + + host_ctxt = &this_cpu_ptr(hyp_symbol_addr(kvm_host_data))->host_ctxt; + + if (is_cpu_on) + boot_args = this_cpu_ptr(hyp_symbol_addr(cpu_on_args)); + else + boot_args = this_cpu_ptr(hyp_symbol_addr(suspend_args)); + + cpu_reg(host_ctxt, 0) = boot_args->r0; + write_sysreg_el2(boot_args->pc, SYS_ELR); + + if (is_cpu_on) + release_boot_args(boot_args); + + __host_enter(host_ctxt); +} + +static unsigned long psci_0_1_handler(u64 func_id, struct kvm_cpu_context *host_ctxt) +{ + if ((func_id == kvm_host_psci_0_1_function_ids.cpu_off) || + (func_id == kvm_host_psci_0_1_function_ids.migrate)) + return psci_forward(host_ctxt); + else if (func_id == kvm_host_psci_0_1_function_ids.cpu_on) + return psci_cpu_on(func_id, host_ctxt); + else if (func_id == kvm_host_psci_0_1_function_ids.cpu_suspend) + return psci_cpu_suspend(func_id, host_ctxt); + else + return PSCI_RET_NOT_SUPPORTED; +} + +static unsigned long psci_0_2_handler(u64 func_id, struct kvm_cpu_context *host_ctxt) +{ + switch (func_id) { + case PSCI_0_2_FN_PSCI_VERSION: + case PSCI_0_2_FN_CPU_OFF: + case PSCI_0_2_FN64_AFFINITY_INFO: + case PSCI_0_2_FN64_MIGRATE: + case PSCI_0_2_FN_MIGRATE_INFO_TYPE: + case PSCI_0_2_FN64_MIGRATE_INFO_UP_CPU: + return psci_forward(host_ctxt); + case PSCI_0_2_FN_SYSTEM_OFF: + case PSCI_0_2_FN_SYSTEM_RESET: + psci_forward_noreturn(host_ctxt); + unreachable(); + case PSCI_0_2_FN64_CPU_SUSPEND: + return psci_cpu_suspend(func_id, host_ctxt); + case PSCI_0_2_FN64_CPU_ON: + return psci_cpu_on(func_id, host_ctxt); + default: + return PSCI_RET_NOT_SUPPORTED; + } +} + +static unsigned long psci_1_0_handler(u64 func_id, struct kvm_cpu_context *host_ctxt) +{ + switch (func_id) { + case PSCI_1_0_FN_PSCI_FEATURES: + case PSCI_1_0_FN_SET_SUSPEND_MODE: + case PSCI_1_1_FN64_SYSTEM_RESET2: + return psci_forward(host_ctxt); + case PSCI_1_0_FN64_SYSTEM_SUSPEND: + return psci_system_suspend(func_id, host_ctxt); + default: + return psci_0_2_handler(func_id, host_ctxt); + } +} + +bool kvm_host_psci_handler(struct kvm_cpu_context *host_ctxt) +{ + u64 func_id = get_psci_func_id(host_ctxt); + unsigned long ret; + + if (!is_psci_call(func_id)) + return false; + + switch (kvm_host_psci_version) { + case PSCI_VERSION(0, 1): + ret = psci_0_1_handler(func_id, host_ctxt); + break; + case PSCI_VERSION(0, 2): + ret = psci_0_2_handler(func_id, host_ctxt); + break; + default: + ret = psci_1_0_handler(func_id, host_ctxt); + break; + } + + cpu_reg(host_ctxt, 0) = ret; + cpu_reg(host_ctxt, 1) = 0; + cpu_reg(host_ctxt, 2) = 0; + cpu_reg(host_ctxt, 3) = 0; + return true; +} diff --git a/arch/arm64/kvm/hyp/nvhe/switch.c b/arch/arm64/kvm/hyp/nvhe/switch.c index 8ae8160bc93a..f3d0e9eca56c 100644 --- a/arch/arm64/kvm/hyp/nvhe/switch.c +++ b/arch/arm64/kvm/hyp/nvhe/switch.c @@ -4,6 +4,7 @@ * Author: Marc Zyngier <marc.zyngier@arm.com> */ +#include <hyp/adjust_pc.h> #include <hyp/switch.h> #include <hyp/sysreg-sr.h> @@ -96,7 +97,10 @@ static void __deactivate_traps(struct kvm_vcpu *vcpu) mdcr_el2 |= MDCR_EL2_E2PB_MASK << MDCR_EL2_E2PB_SHIFT; write_sysreg(mdcr_el2, mdcr_el2); - write_sysreg(HCR_HOST_NVHE_FLAGS, hcr_el2); + if (is_protected_kvm_enabled()) + write_sysreg(HCR_HOST_NVHE_PROTECTED_FLAGS, hcr_el2); + else + write_sysreg(HCR_HOST_NVHE_FLAGS, hcr_el2); write_sysreg(CPTR_EL2_DEFAULT, cptr_el2); write_sysreg(__kvm_hyp_host_vector, vbar_el2); } @@ -189,6 +193,8 @@ int __kvm_vcpu_run(struct kvm_vcpu *vcpu) __sysreg_save_state_nvhe(host_ctxt); + __adjust_pc(vcpu); + /* * We must restore the 32-bit state before the sysregs, thanks * to erratum #852523 (Cortex-A57) or #853709 (Cortex-A72). diff --git a/arch/arm64/kvm/hyp/nvhe/sysreg-sr.c b/arch/arm64/kvm/hyp/nvhe/sysreg-sr.c index 88a25fc8fcd3..29305022bc04 100644 --- a/arch/arm64/kvm/hyp/nvhe/sysreg-sr.c +++ b/arch/arm64/kvm/hyp/nvhe/sysreg-sr.c @@ -33,14 +33,3 @@ void __sysreg_restore_state_nvhe(struct kvm_cpu_context *ctxt) __sysreg_restore_user_state(ctxt); __sysreg_restore_el2_return_state(ctxt); } - -void __kvm_enable_ssbs(void) -{ - u64 tmp; - - asm volatile( - "mrs %0, sctlr_el2\n" - "orr %0, %0, %1\n" - "msr sctlr_el2, %0" - : "=&r" (tmp) : "L" (SCTLR_ELx_DSSBS)); -} diff --git a/arch/arm64/kvm/hyp/smccc_wa.S b/arch/arm64/kvm/hyp/smccc_wa.S deleted file mode 100644 index b0441dbdf68b..000000000000 --- a/arch/arm64/kvm/hyp/smccc_wa.S +++ /dev/null @@ -1,32 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Copyright (C) 2015-2018 - ARM Ltd - * Author: Marc Zyngier <marc.zyngier@arm.com> - */ - -#include <linux/arm-smccc.h> -#include <linux/linkage.h> - -#include <asm/kvm_asm.h> -#include <asm/kvm_mmu.h> - - /* - * This is not executed directly and is instead copied into the vectors - * by install_bp_hardening_cb(). - */ - .data - .pushsection .rodata - .global __smccc_workaround_1_smc -SYM_DATA_START(__smccc_workaround_1_smc) - esb - sub sp, sp, #(8 * 4) - stp x2, x3, [sp, #(8 * 0)] - stp x0, x1, [sp, #(8 * 2)] - mov w0, #ARM_SMCCC_ARCH_WORKAROUND_1 - smc #0 - ldp x2, x3, [sp, #(8 * 0)] - ldp x0, x1, [sp, #(8 * 2)] - add sp, sp, #(8 * 4) -1: .org __smccc_workaround_1_smc + __SMCCC_WORKAROUND_1_SMC_SZ - .org 1b -SYM_DATA_END(__smccc_workaround_1_smc) diff --git a/arch/arm64/kvm/hyp/vgic-v2-cpuif-proxy.c b/arch/arm64/kvm/hyp/vgic-v2-cpuif-proxy.c index bd1bab551d48..8f0585640241 100644 --- a/arch/arm64/kvm/hyp/vgic-v2-cpuif-proxy.c +++ b/arch/arm64/kvm/hyp/vgic-v2-cpuif-proxy.c @@ -4,6 +4,8 @@ * Author: Marc Zyngier <marc.zyngier@arm.com> */ +#include <hyp/adjust_pc.h> + #include <linux/compiler.h> #include <linux/irqchip/arm-gic.h> #include <linux/kvm_host.h> diff --git a/arch/arm64/kvm/hyp/vgic-v3-sr.c b/arch/arm64/kvm/hyp/vgic-v3-sr.c index 452f4cacd674..80406f463c28 100644 --- a/arch/arm64/kvm/hyp/vgic-v3-sr.c +++ b/arch/arm64/kvm/hyp/vgic-v3-sr.c @@ -4,6 +4,8 @@ * Author: Marc Zyngier <marc.zyngier@arm.com> */ +#include <hyp/adjust_pc.h> + #include <linux/compiler.h> #include <linux/irqchip/arm-gic-v3.h> #include <linux/kvm_host.h> diff --git a/arch/arm64/kvm/hyp/vhe/Makefile b/arch/arm64/kvm/hyp/vhe/Makefile index 461e97c375cc..96bec0ecf9dd 100644 --- a/arch/arm64/kvm/hyp/vhe/Makefile +++ b/arch/arm64/kvm/hyp/vhe/Makefile @@ -8,4 +8,4 @@ ccflags-y := -D__KVM_VHE_HYPERVISOR__ obj-y := timer-sr.o sysreg-sr.o debug-sr.o switch.o tlb.o obj-y += ../vgic-v3-sr.o ../aarch32.o ../vgic-v2-cpuif-proxy.o ../entry.o \ - ../fpsimd.o ../hyp-entry.o + ../fpsimd.o ../hyp-entry.o ../exception.o diff --git a/arch/arm64/kvm/hyp/vhe/switch.c b/arch/arm64/kvm/hyp/vhe/switch.c index 62546e20b251..af8e940d0f03 100644 --- a/arch/arm64/kvm/hyp/vhe/switch.c +++ b/arch/arm64/kvm/hyp/vhe/switch.c @@ -4,6 +4,7 @@ * Author: Marc Zyngier <marc.zyngier@arm.com> */ +#include <hyp/adjust_pc.h> #include <hyp/switch.h> #include <linux/arm-smccc.h> @@ -133,6 +134,8 @@ static int __kvm_vcpu_run_vhe(struct kvm_vcpu *vcpu) __load_guest_stage2(vcpu->arch.hw_mmu); __activate_traps(vcpu); + __adjust_pc(vcpu); + sysreg_restore_guest_state_vhe(guest_ctxt); __debug_switch_to_guest(vcpu); diff --git a/arch/arm64/kvm/inject_fault.c b/arch/arm64/kvm/inject_fault.c index 34a96ab244fa..b47df73e98d7 100644 --- a/arch/arm64/kvm/inject_fault.c +++ b/arch/arm64/kvm/inject_fault.c @@ -14,119 +14,15 @@ #include <asm/kvm_emulate.h> #include <asm/esr.h> -#define CURRENT_EL_SP_EL0_VECTOR 0x0 -#define CURRENT_EL_SP_ELx_VECTOR 0x200 -#define LOWER_EL_AArch64_VECTOR 0x400 -#define LOWER_EL_AArch32_VECTOR 0x600 - -enum exception_type { - except_type_sync = 0, - except_type_irq = 0x80, - except_type_fiq = 0x100, - except_type_serror = 0x180, -}; - -/* - * This performs the exception entry at a given EL (@target_mode), stashing PC - * and PSTATE into ELR and SPSR respectively, and compute the new PC/PSTATE. - * The EL passed to this function *must* be a non-secure, privileged mode with - * bit 0 being set (PSTATE.SP == 1). - * - * When an exception is taken, most PSTATE fields are left unchanged in the - * handler. However, some are explicitly overridden (e.g. M[4:0]). Luckily all - * of the inherited bits have the same position in the AArch64/AArch32 SPSR_ELx - * layouts, so we don't need to shuffle these for exceptions from AArch32 EL0. - * - * For the SPSR_ELx layout for AArch64, see ARM DDI 0487E.a page C5-429. - * For the SPSR_ELx layout for AArch32, see ARM DDI 0487E.a page C5-426. - * - * Here we manipulate the fields in order of the AArch64 SPSR_ELx layout, from - * MSB to LSB. - */ -static void enter_exception64(struct kvm_vcpu *vcpu, unsigned long target_mode, - enum exception_type type) -{ - unsigned long sctlr, vbar, old, new, mode; - u64 exc_offset; - - mode = *vcpu_cpsr(vcpu) & (PSR_MODE_MASK | PSR_MODE32_BIT); - - if (mode == target_mode) - exc_offset = CURRENT_EL_SP_ELx_VECTOR; - else if ((mode | PSR_MODE_THREAD_BIT) == target_mode) - exc_offset = CURRENT_EL_SP_EL0_VECTOR; - else if (!(mode & PSR_MODE32_BIT)) - exc_offset = LOWER_EL_AArch64_VECTOR; - else - exc_offset = LOWER_EL_AArch32_VECTOR; - - switch (target_mode) { - case PSR_MODE_EL1h: - vbar = vcpu_read_sys_reg(vcpu, VBAR_EL1); - sctlr = vcpu_read_sys_reg(vcpu, SCTLR_EL1); - vcpu_write_sys_reg(vcpu, *vcpu_pc(vcpu), ELR_EL1); - break; - default: - /* Don't do that */ - BUG(); - } - - *vcpu_pc(vcpu) = vbar + exc_offset + type; - - old = *vcpu_cpsr(vcpu); - new = 0; - - new |= (old & PSR_N_BIT); - new |= (old & PSR_Z_BIT); - new |= (old & PSR_C_BIT); - new |= (old & PSR_V_BIT); - - // TODO: TCO (if/when ARMv8.5-MemTag is exposed to guests) - - new |= (old & PSR_DIT_BIT); - - // PSTATE.UAO is set to zero upon any exception to AArch64 - // See ARM DDI 0487E.a, page D5-2579. - - // PSTATE.PAN is unchanged unless SCTLR_ELx.SPAN == 0b0 - // SCTLR_ELx.SPAN is RES1 when ARMv8.1-PAN is not implemented - // See ARM DDI 0487E.a, page D5-2578. - new |= (old & PSR_PAN_BIT); - if (!(sctlr & SCTLR_EL1_SPAN)) - new |= PSR_PAN_BIT; - - // PSTATE.SS is set to zero upon any exception to AArch64 - // See ARM DDI 0487E.a, page D2-2452. - - // PSTATE.IL is set to zero upon any exception to AArch64 - // See ARM DDI 0487E.a, page D1-2306. - - // PSTATE.SSBS is set to SCTLR_ELx.DSSBS upon any exception to AArch64 - // See ARM DDI 0487E.a, page D13-3258 - if (sctlr & SCTLR_ELx_DSSBS) - new |= PSR_SSBS_BIT; - - // PSTATE.BTYPE is set to zero upon any exception to AArch64 - // See ARM DDI 0487E.a, pages D1-2293 to D1-2294. - - new |= PSR_D_BIT; - new |= PSR_A_BIT; - new |= PSR_I_BIT; - new |= PSR_F_BIT; - - new |= target_mode; - - *vcpu_cpsr(vcpu) = new; - vcpu_write_spsr(vcpu, old); -} - static void inject_abt64(struct kvm_vcpu *vcpu, bool is_iabt, unsigned long addr) { unsigned long cpsr = *vcpu_cpsr(vcpu); bool is_aarch32 = vcpu_mode_is_32bit(vcpu); u32 esr = 0; - enter_exception64(vcpu, PSR_MODE_EL1h, except_type_sync); + vcpu->arch.flags |= (KVM_ARM64_EXCEPT_AA64_EL1 | + KVM_ARM64_EXCEPT_AA64_ELx_SYNC | + KVM_ARM64_PENDING_EXCEPTION); vcpu_write_sys_reg(vcpu, addr, FAR_EL1); @@ -156,7 +52,9 @@ static void inject_undef64(struct kvm_vcpu *vcpu) { u32 esr = (ESR_ELx_EC_UNKNOWN << ESR_ELx_EC_SHIFT); - enter_exception64(vcpu, PSR_MODE_EL1h, except_type_sync); + vcpu->arch.flags |= (KVM_ARM64_EXCEPT_AA64_EL1 | + KVM_ARM64_EXCEPT_AA64_ELx_SYNC | + KVM_ARM64_PENDING_EXCEPTION); /* * Build an unknown exception, depending on the instruction @@ -168,6 +66,53 @@ static void inject_undef64(struct kvm_vcpu *vcpu) vcpu_write_sys_reg(vcpu, esr, ESR_EL1); } +#define DFSR_FSC_EXTABT_LPAE 0x10 +#define DFSR_FSC_EXTABT_nLPAE 0x08 +#define DFSR_LPAE BIT(9) +#define TTBCR_EAE BIT(31) + +static void inject_undef32(struct kvm_vcpu *vcpu) +{ + vcpu->arch.flags |= (KVM_ARM64_EXCEPT_AA32_UND | + KVM_ARM64_PENDING_EXCEPTION); +} + +/* + * Modelled after TakeDataAbortException() and TakePrefetchAbortException + * pseudocode. + */ +static void inject_abt32(struct kvm_vcpu *vcpu, bool is_pabt, u32 addr) +{ + u64 far; + u32 fsr; + + /* Give the guest an IMPLEMENTATION DEFINED exception */ + if (vcpu_read_sys_reg(vcpu, TCR_EL1) & TTBCR_EAE) { + fsr = DFSR_LPAE | DFSR_FSC_EXTABT_LPAE; + } else { + /* no need to shuffle FS[4] into DFSR[10] as its 0 */ + fsr = DFSR_FSC_EXTABT_nLPAE; + } + + far = vcpu_read_sys_reg(vcpu, FAR_EL1); + + if (is_pabt) { + vcpu->arch.flags |= (KVM_ARM64_EXCEPT_AA32_IABT | + KVM_ARM64_PENDING_EXCEPTION); + far &= GENMASK(31, 0); + far |= (u64)addr << 32; + vcpu_write_sys_reg(vcpu, fsr, IFSR32_EL2); + } else { /* !iabt */ + vcpu->arch.flags |= (KVM_ARM64_EXCEPT_AA32_DABT | + KVM_ARM64_PENDING_EXCEPTION); + far &= GENMASK(63, 32); + far |= addr; + vcpu_write_sys_reg(vcpu, fsr, ESR_EL1); + } + + vcpu_write_sys_reg(vcpu, far, FAR_EL1); +} + /** * kvm_inject_dabt - inject a data abort into the guest * @vcpu: The VCPU to receive the data abort @@ -179,7 +124,7 @@ static void inject_undef64(struct kvm_vcpu *vcpu) void kvm_inject_dabt(struct kvm_vcpu *vcpu, unsigned long addr) { if (vcpu_el1_is_32bit(vcpu)) - kvm_inject_dabt32(vcpu, addr); + inject_abt32(vcpu, false, addr); else inject_abt64(vcpu, false, addr); } @@ -195,7 +140,7 @@ void kvm_inject_dabt(struct kvm_vcpu *vcpu, unsigned long addr) void kvm_inject_pabt(struct kvm_vcpu *vcpu, unsigned long addr) { if (vcpu_el1_is_32bit(vcpu)) - kvm_inject_pabt32(vcpu, addr); + inject_abt32(vcpu, true, addr); else inject_abt64(vcpu, true, addr); } @@ -210,7 +155,7 @@ void kvm_inject_pabt(struct kvm_vcpu *vcpu, unsigned long addr) void kvm_inject_undefined(struct kvm_vcpu *vcpu) { if (vcpu_el1_is_32bit(vcpu)) - kvm_inject_undef32(vcpu); + inject_undef32(vcpu); else inject_undef64(vcpu); } diff --git a/arch/arm64/kvm/mmio.c b/arch/arm64/kvm/mmio.c index 6a2826f1bf5e..3e2d8ba11a02 100644 --- a/arch/arm64/kvm/mmio.c +++ b/arch/arm64/kvm/mmio.c @@ -115,7 +115,7 @@ int kvm_handle_mmio_return(struct kvm_vcpu *vcpu) * The MMIO instruction is emulated and should not be re-executed * in the guest. */ - kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu)); + kvm_incr_pc(vcpu); return 0; } diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c index 1a01da9fdc99..1f41173e6149 100644 --- a/arch/arm64/kvm/mmu.c +++ b/arch/arm64/kvm/mmu.c @@ -1016,7 +1016,7 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu) * cautious, and skip the instruction. */ if (kvm_is_error_hva(hva) && kvm_vcpu_dabt_is_cm(vcpu)) { - kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu)); + kvm_incr_pc(vcpu); ret = 1; goto out_unlock; } diff --git a/arch/arm64/kvm/pmu-emul.c b/arch/arm64/kvm/pmu-emul.c index 2ed5ef8f274b..398f6df1bbe4 100644 --- a/arch/arm64/kvm/pmu-emul.c +++ b/arch/arm64/kvm/pmu-emul.c @@ -384,7 +384,7 @@ static void kvm_pmu_update_state(struct kvm_vcpu *vcpu) struct kvm_pmu *pmu = &vcpu->arch.pmu; bool overflow; - if (!kvm_arm_pmu_v3_ready(vcpu)) + if (!kvm_vcpu_has_pmu(vcpu)) return; overflow = !!kvm_pmu_overflow_status(vcpu); @@ -825,9 +825,12 @@ bool kvm_arm_support_pmu_v3(void) int kvm_arm_pmu_v3_enable(struct kvm_vcpu *vcpu) { - if (!vcpu->arch.pmu.created) + if (!kvm_vcpu_has_pmu(vcpu)) return 0; + if (!vcpu->arch.pmu.created) + return -EINVAL; + /* * A valid interrupt configuration for the PMU is either to have a * properly configured interrupt number and using an in-kernel @@ -835,9 +838,6 @@ int kvm_arm_pmu_v3_enable(struct kvm_vcpu *vcpu) */ if (irqchip_in_kernel(vcpu->kvm)) { int irq = vcpu->arch.pmu.irq_num; - if (!kvm_arm_pmu_irq_initialized(vcpu)) - return -EINVAL; - /* * If we are using an in-kernel vgic, at this point we know * the vgic will be initialized, so we can check the PMU irq @@ -851,7 +851,6 @@ int kvm_arm_pmu_v3_enable(struct kvm_vcpu *vcpu) } kvm_pmu_vcpu_reset(vcpu); - vcpu->arch.pmu.ready = true; return 0; } @@ -913,8 +912,7 @@ static bool pmu_irq_is_valid(struct kvm *kvm, int irq) int kvm_arm_pmu_v3_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr) { - if (!kvm_arm_support_pmu_v3() || - !test_bit(KVM_ARM_VCPU_PMU_V3, vcpu->arch.features)) + if (!kvm_vcpu_has_pmu(vcpu)) return -ENODEV; if (vcpu->arch.pmu.created) @@ -1015,7 +1013,7 @@ int kvm_arm_pmu_v3_get_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr) if (!irqchip_in_kernel(vcpu->kvm)) return -EINVAL; - if (!test_bit(KVM_ARM_VCPU_PMU_V3, vcpu->arch.features)) + if (!kvm_vcpu_has_pmu(vcpu)) return -ENODEV; if (!kvm_arm_pmu_irq_initialized(vcpu)) @@ -1035,8 +1033,7 @@ int kvm_arm_pmu_v3_has_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr) case KVM_ARM_VCPU_PMU_V3_IRQ: case KVM_ARM_VCPU_PMU_V3_INIT: case KVM_ARM_VCPU_PMU_V3_FILTER: - if (kvm_arm_support_pmu_v3() && - test_bit(KVM_ARM_VCPU_PMU_V3, vcpu->arch.features)) + if (kvm_vcpu_has_pmu(vcpu)) return 0; } diff --git a/arch/arm64/kvm/pvtime.c b/arch/arm64/kvm/pvtime.c index 920ac43077ad..78a09f7a6637 100644 --- a/arch/arm64/kvm/pvtime.c +++ b/arch/arm64/kvm/pvtime.c @@ -53,7 +53,6 @@ gpa_t kvm_init_stolen_time(struct kvm_vcpu *vcpu) struct pvclock_vcpu_stolen_time init_values = {}; struct kvm *kvm = vcpu->kvm; u64 base = vcpu->arch.steal.base; - int idx; if (base == GPA_INVALID) return base; @@ -63,10 +62,7 @@ gpa_t kvm_init_stolen_time(struct kvm_vcpu *vcpu) * the feature enabled. */ vcpu->arch.steal.last_steal = current->sched_info.run_delay; - - idx = srcu_read_lock(&kvm->srcu); - kvm_write_guest(kvm, base, &init_values, sizeof(init_values)); - srcu_read_unlock(&kvm->srcu, idx); + kvm_write_guest_lock(kvm, base, &init_values, sizeof(init_values)); return base; } diff --git a/arch/arm64/kvm/regmap.c b/arch/arm64/kvm/regmap.c deleted file mode 100644 index accc1d5fba61..000000000000 --- a/arch/arm64/kvm/regmap.c +++ /dev/null @@ -1,224 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * Copyright (C) 2012,2013 - ARM Ltd - * Author: Marc Zyngier <marc.zyngier@arm.com> - * - * Derived from arch/arm/kvm/emulate.c: - * Copyright (C) 2012 - Virtual Open Systems and Columbia University - * Author: Christoffer Dall <c.dall@virtualopensystems.com> - */ - -#include <linux/mm.h> -#include <linux/kvm_host.h> -#include <asm/kvm_emulate.h> -#include <asm/ptrace.h> - -#define VCPU_NR_MODES 6 -#define REG_OFFSET(_reg) \ - (offsetof(struct user_pt_regs, _reg) / sizeof(unsigned long)) - -#define USR_REG_OFFSET(R) REG_OFFSET(compat_usr(R)) - -static const unsigned long vcpu_reg_offsets[VCPU_NR_MODES][16] = { - /* USR Registers */ - { - USR_REG_OFFSET(0), USR_REG_OFFSET(1), USR_REG_OFFSET(2), - USR_REG_OFFSET(3), USR_REG_OFFSET(4), USR_REG_OFFSET(5), - USR_REG_OFFSET(6), USR_REG_OFFSET(7), USR_REG_OFFSET(8), - USR_REG_OFFSET(9), USR_REG_OFFSET(10), USR_REG_OFFSET(11), - USR_REG_OFFSET(12), USR_REG_OFFSET(13), USR_REG_OFFSET(14), - REG_OFFSET(pc) - }, - - /* FIQ Registers */ - { - USR_REG_OFFSET(0), USR_REG_OFFSET(1), USR_REG_OFFSET(2), - USR_REG_OFFSET(3), USR_REG_OFFSET(4), USR_REG_OFFSET(5), - USR_REG_OFFSET(6), USR_REG_OFFSET(7), - REG_OFFSET(compat_r8_fiq), /* r8 */ - REG_OFFSET(compat_r9_fiq), /* r9 */ - REG_OFFSET(compat_r10_fiq), /* r10 */ - REG_OFFSET(compat_r11_fiq), /* r11 */ - REG_OFFSET(compat_r12_fiq), /* r12 */ - REG_OFFSET(compat_sp_fiq), /* r13 */ - REG_OFFSET(compat_lr_fiq), /* r14 */ - REG_OFFSET(pc) - }, - - /* IRQ Registers */ - { - USR_REG_OFFSET(0), USR_REG_OFFSET(1), USR_REG_OFFSET(2), - USR_REG_OFFSET(3), USR_REG_OFFSET(4), USR_REG_OFFSET(5), - USR_REG_OFFSET(6), USR_REG_OFFSET(7), USR_REG_OFFSET(8), - USR_REG_OFFSET(9), USR_REG_OFFSET(10), USR_REG_OFFSET(11), - USR_REG_OFFSET(12), - REG_OFFSET(compat_sp_irq), /* r13 */ - REG_OFFSET(compat_lr_irq), /* r14 */ - REG_OFFSET(pc) - }, - - /* SVC Registers */ - { - USR_REG_OFFSET(0), USR_REG_OFFSET(1), USR_REG_OFFSET(2), - USR_REG_OFFSET(3), USR_REG_OFFSET(4), USR_REG_OFFSET(5), - USR_REG_OFFSET(6), USR_REG_OFFSET(7), USR_REG_OFFSET(8), - USR_REG_OFFSET(9), USR_REG_OFFSET(10), USR_REG_OFFSET(11), - USR_REG_OFFSET(12), - REG_OFFSET(compat_sp_svc), /* r13 */ - REG_OFFSET(compat_lr_svc), /* r14 */ - REG_OFFSET(pc) - }, - - /* ABT Registers */ - { - USR_REG_OFFSET(0), USR_REG_OFFSET(1), USR_REG_OFFSET(2), - USR_REG_OFFSET(3), USR_REG_OFFSET(4), USR_REG_OFFSET(5), - USR_REG_OFFSET(6), USR_REG_OFFSET(7), USR_REG_OFFSET(8), - USR_REG_OFFSET(9), USR_REG_OFFSET(10), USR_REG_OFFSET(11), - USR_REG_OFFSET(12), - REG_OFFSET(compat_sp_abt), /* r13 */ - REG_OFFSET(compat_lr_abt), /* r14 */ - REG_OFFSET(pc) - }, - - /* UND Registers */ - { - USR_REG_OFFSET(0), USR_REG_OFFSET(1), USR_REG_OFFSET(2), - USR_REG_OFFSET(3), USR_REG_OFFSET(4), USR_REG_OFFSET(5), - USR_REG_OFFSET(6), USR_REG_OFFSET(7), USR_REG_OFFSET(8), - USR_REG_OFFSET(9), USR_REG_OFFSET(10), USR_REG_OFFSET(11), - USR_REG_OFFSET(12), - REG_OFFSET(compat_sp_und), /* r13 */ - REG_OFFSET(compat_lr_und), /* r14 */ - REG_OFFSET(pc) - }, -}; - -/* - * Return a pointer to the register number valid in the current mode of - * the virtual CPU. - */ -unsigned long *vcpu_reg32(const struct kvm_vcpu *vcpu, u8 reg_num) -{ - unsigned long *reg_array = (unsigned long *)&vcpu->arch.ctxt.regs; - unsigned long mode = *vcpu_cpsr(vcpu) & PSR_AA32_MODE_MASK; - - switch (mode) { - case PSR_AA32_MODE_USR ... PSR_AA32_MODE_SVC: - mode &= ~PSR_MODE32_BIT; /* 0 ... 3 */ - break; - - case PSR_AA32_MODE_ABT: - mode = 4; - break; - - case PSR_AA32_MODE_UND: - mode = 5; - break; - - case PSR_AA32_MODE_SYS: - mode = 0; /* SYS maps to USR */ - break; - - default: - BUG(); - } - - return reg_array + vcpu_reg_offsets[mode][reg_num]; -} - -/* - * Return the SPSR for the current mode of the virtual CPU. - */ -static int vcpu_spsr32_mode(const struct kvm_vcpu *vcpu) -{ - unsigned long mode = *vcpu_cpsr(vcpu) & PSR_AA32_MODE_MASK; - switch (mode) { - case PSR_AA32_MODE_SVC: return KVM_SPSR_SVC; - case PSR_AA32_MODE_ABT: return KVM_SPSR_ABT; - case PSR_AA32_MODE_UND: return KVM_SPSR_UND; - case PSR_AA32_MODE_IRQ: return KVM_SPSR_IRQ; - case PSR_AA32_MODE_FIQ: return KVM_SPSR_FIQ; - default: BUG(); - } -} - -unsigned long vcpu_read_spsr32(const struct kvm_vcpu *vcpu) -{ - int spsr_idx = vcpu_spsr32_mode(vcpu); - - if (!vcpu->arch.sysregs_loaded_on_cpu) { - switch (spsr_idx) { - case KVM_SPSR_SVC: - return __vcpu_sys_reg(vcpu, SPSR_EL1); - case KVM_SPSR_ABT: - return vcpu->arch.ctxt.spsr_abt; - case KVM_SPSR_UND: - return vcpu->arch.ctxt.spsr_und; - case KVM_SPSR_IRQ: - return vcpu->arch.ctxt.spsr_irq; - case KVM_SPSR_FIQ: - return vcpu->arch.ctxt.spsr_fiq; - } - } - - switch (spsr_idx) { - case KVM_SPSR_SVC: - return read_sysreg_el1(SYS_SPSR); - case KVM_SPSR_ABT: - return read_sysreg(spsr_abt); - case KVM_SPSR_UND: - return read_sysreg(spsr_und); - case KVM_SPSR_IRQ: - return read_sysreg(spsr_irq); - case KVM_SPSR_FIQ: - return read_sysreg(spsr_fiq); - default: - BUG(); - } -} - -void vcpu_write_spsr32(struct kvm_vcpu *vcpu, unsigned long v) -{ - int spsr_idx = vcpu_spsr32_mode(vcpu); - - if (!vcpu->arch.sysregs_loaded_on_cpu) { - switch (spsr_idx) { - case KVM_SPSR_SVC: - __vcpu_sys_reg(vcpu, SPSR_EL1) = v; - break; - case KVM_SPSR_ABT: - vcpu->arch.ctxt.spsr_abt = v; - break; - case KVM_SPSR_UND: - vcpu->arch.ctxt.spsr_und = v; - break; - case KVM_SPSR_IRQ: - vcpu->arch.ctxt.spsr_irq = v; - break; - case KVM_SPSR_FIQ: - vcpu->arch.ctxt.spsr_fiq = v; - break; - } - - return; - } - - switch (spsr_idx) { - case KVM_SPSR_SVC: - write_sysreg_el1(v, SYS_SPSR); - break; - case KVM_SPSR_ABT: - write_sysreg(v, spsr_abt); - break; - case KVM_SPSR_UND: - write_sysreg(v, spsr_und); - break; - case KVM_SPSR_IRQ: - write_sysreg(v, spsr_irq); - break; - case KVM_SPSR_FIQ: - write_sysreg(v, spsr_fiq); - break; - } -} diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c index f32490229a4c..47f3f035f3ea 100644 --- a/arch/arm64/kvm/reset.c +++ b/arch/arm64/kvm/reset.c @@ -25,7 +25,6 @@ #include <asm/ptrace.h> #include <asm/kvm_arm.h> #include <asm/kvm_asm.h> -#include <asm/kvm_coproc.h> #include <asm/kvm_emulate.h> #include <asm/kvm_mmu.h> #include <asm/virt.h> @@ -42,58 +41,6 @@ static u32 kvm_ipa_limit; #define VCPU_RESET_PSTATE_SVC (PSR_AA32_MODE_SVC | PSR_AA32_A_BIT | \ PSR_AA32_I_BIT | PSR_AA32_F_BIT) -static bool system_has_full_ptr_auth(void) -{ - return system_supports_address_auth() && system_supports_generic_auth(); -} - -/** - * kvm_arch_vm_ioctl_check_extension - * - * We currently assume that the number of HW registers is uniform - * across all CPUs (see cpuinfo_sanity_check). - */ -int kvm_arch_vm_ioctl_check_extension(struct kvm *kvm, long ext) -{ - int r; - - switch (ext) { - case KVM_CAP_ARM_EL1_32BIT: - r = cpus_have_const_cap(ARM64_HAS_32BIT_EL1); - break; - case KVM_CAP_GUEST_DEBUG_HW_BPS: - r = get_num_brps(); - break; - case KVM_CAP_GUEST_DEBUG_HW_WPS: - r = get_num_wrps(); - break; - case KVM_CAP_ARM_PMU_V3: - r = kvm_arm_support_pmu_v3(); - break; - case KVM_CAP_ARM_INJECT_SERROR_ESR: - r = cpus_have_const_cap(ARM64_HAS_RAS_EXTN); - break; - case KVM_CAP_SET_GUEST_DEBUG: - case KVM_CAP_VCPU_ATTRIBUTES: - r = 1; - break; - case KVM_CAP_ARM_VM_IPA_SIZE: - r = kvm_ipa_limit; - break; - case KVM_CAP_ARM_SVE: - r = system_supports_sve(); - break; - case KVM_CAP_ARM_PTRAUTH_ADDRESS: - case KVM_CAP_ARM_PTRAUTH_GENERIC: - r = system_has_full_ptr_auth(); - break; - default: - r = 0; - } - - return r; -} - unsigned int kvm_sve_max_vl; int kvm_arm_init_sve(void) @@ -286,6 +233,10 @@ int kvm_reset_vcpu(struct kvm_vcpu *vcpu) pstate = VCPU_RESET_PSTATE_EL1; } + if (kvm_vcpu_has_pmu(vcpu) && !kvm_arm_support_pmu_v3()) { + ret = -EINVAL; + goto out; + } break; } diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index c1fac9836af1..3313dedfa505 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -20,7 +20,6 @@ #include <asm/debug-monitors.h> #include <asm/esr.h> #include <asm/kvm_arm.h> -#include <asm/kvm_coproc.h> #include <asm/kvm_emulate.h> #include <asm/kvm_hyp.h> #include <asm/kvm_mmu.h> @@ -64,87 +63,6 @@ static bool write_to_read_only(struct kvm_vcpu *vcpu, return false; } -static bool __vcpu_read_sys_reg_from_cpu(int reg, u64 *val) -{ - /* - * System registers listed in the switch are not saved on every - * exit from the guest but are only saved on vcpu_put. - * - * Note that MPIDR_EL1 for the guest is set by KVM via VMPIDR_EL2 but - * should never be listed below, because the guest cannot modify its - * own MPIDR_EL1 and MPIDR_EL1 is accessed for VCPU A from VCPU B's - * thread when emulating cross-VCPU communication. - */ - switch (reg) { - case CSSELR_EL1: *val = read_sysreg_s(SYS_CSSELR_EL1); break; - case SCTLR_EL1: *val = read_sysreg_s(SYS_SCTLR_EL12); break; - case CPACR_EL1: *val = read_sysreg_s(SYS_CPACR_EL12); break; - case TTBR0_EL1: *val = read_sysreg_s(SYS_TTBR0_EL12); break; - case TTBR1_EL1: *val = read_sysreg_s(SYS_TTBR1_EL12); break; - case TCR_EL1: *val = read_sysreg_s(SYS_TCR_EL12); break; - case ESR_EL1: *val = read_sysreg_s(SYS_ESR_EL12); break; - case AFSR0_EL1: *val = read_sysreg_s(SYS_AFSR0_EL12); break; - case AFSR1_EL1: *val = read_sysreg_s(SYS_AFSR1_EL12); break; - case FAR_EL1: *val = read_sysreg_s(SYS_FAR_EL12); break; - case MAIR_EL1: *val = read_sysreg_s(SYS_MAIR_EL12); break; - case VBAR_EL1: *val = read_sysreg_s(SYS_VBAR_EL12); break; - case CONTEXTIDR_EL1: *val = read_sysreg_s(SYS_CONTEXTIDR_EL12);break; - case TPIDR_EL0: *val = read_sysreg_s(SYS_TPIDR_EL0); break; - case TPIDRRO_EL0: *val = read_sysreg_s(SYS_TPIDRRO_EL0); break; - case TPIDR_EL1: *val = read_sysreg_s(SYS_TPIDR_EL1); break; - case AMAIR_EL1: *val = read_sysreg_s(SYS_AMAIR_EL12); break; - case CNTKCTL_EL1: *val = read_sysreg_s(SYS_CNTKCTL_EL12); break; - case ELR_EL1: *val = read_sysreg_s(SYS_ELR_EL12); break; - case PAR_EL1: *val = read_sysreg_par(); break; - case DACR32_EL2: *val = read_sysreg_s(SYS_DACR32_EL2); break; - case IFSR32_EL2: *val = read_sysreg_s(SYS_IFSR32_EL2); break; - case DBGVCR32_EL2: *val = read_sysreg_s(SYS_DBGVCR32_EL2); break; - default: return false; - } - - return true; -} - -static bool __vcpu_write_sys_reg_to_cpu(u64 val, int reg) -{ - /* - * System registers listed in the switch are not restored on every - * entry to the guest but are only restored on vcpu_load. - * - * Note that MPIDR_EL1 for the guest is set by KVM via VMPIDR_EL2 but - * should never be listed below, because the MPIDR should only be set - * once, before running the VCPU, and never changed later. - */ - switch (reg) { - case CSSELR_EL1: write_sysreg_s(val, SYS_CSSELR_EL1); break; - case SCTLR_EL1: write_sysreg_s(val, SYS_SCTLR_EL12); break; - case CPACR_EL1: write_sysreg_s(val, SYS_CPACR_EL12); break; - case TTBR0_EL1: write_sysreg_s(val, SYS_TTBR0_EL12); break; - case TTBR1_EL1: write_sysreg_s(val, SYS_TTBR1_EL12); break; - case TCR_EL1: write_sysreg_s(val, SYS_TCR_EL12); break; - case ESR_EL1: write_sysreg_s(val, SYS_ESR_EL12); break; - case AFSR0_EL1: write_sysreg_s(val, SYS_AFSR0_EL12); break; - case AFSR1_EL1: write_sysreg_s(val, SYS_AFSR1_EL12); break; - case FAR_EL1: write_sysreg_s(val, SYS_FAR_EL12); break; - case MAIR_EL1: write_sysreg_s(val, SYS_MAIR_EL12); break; - case VBAR_EL1: write_sysreg_s(val, SYS_VBAR_EL12); break; - case CONTEXTIDR_EL1: write_sysreg_s(val, SYS_CONTEXTIDR_EL12);break; - case TPIDR_EL0: write_sysreg_s(val, SYS_TPIDR_EL0); break; - case TPIDRRO_EL0: write_sysreg_s(val, SYS_TPIDRRO_EL0); break; - case TPIDR_EL1: write_sysreg_s(val, SYS_TPIDR_EL1); break; - case AMAIR_EL1: write_sysreg_s(val, SYS_AMAIR_EL12); break; - case CNTKCTL_EL1: write_sysreg_s(val, SYS_CNTKCTL_EL12); break; - case ELR_EL1: write_sysreg_s(val, SYS_ELR_EL12); break; - case PAR_EL1: write_sysreg_s(val, SYS_PAR_EL1); break; - case DACR32_EL2: write_sysreg_s(val, SYS_DACR32_EL2); break; - case IFSR32_EL2: write_sysreg_s(val, SYS_IFSR32_EL2); break; - case DBGVCR32_EL2: write_sysreg_s(val, SYS_DBGVCR32_EL2); break; - default: return false; - } - - return true; -} - u64 vcpu_read_sys_reg(const struct kvm_vcpu *vcpu, int reg) { u64 val = 0x8badf00d8badf00d; @@ -169,7 +87,7 @@ void vcpu_write_sys_reg(struct kvm_vcpu *vcpu, u64 val, int reg) static u32 cache_levels; /* CSSELR values; used to index KVM_REG_ARM_DEMUX_ID_CCSIDR */ -#define CSSELR_MAX 12 +#define CSSELR_MAX 14 /* Which cache CCSIDR represents depends on CSSELR value. */ static u32 get_ccsidr(u32 csselr) @@ -209,6 +127,24 @@ static bool access_dcsw(struct kvm_vcpu *vcpu, return true; } +static void get_access_mask(const struct sys_reg_desc *r, u64 *mask, u64 *shift) +{ + switch (r->aarch32_map) { + case AA32_LO: + *mask = GENMASK_ULL(31, 0); + *shift = 0; + break; + case AA32_HI: + *mask = GENMASK_ULL(63, 32); + *shift = 32; + break; + default: + *mask = GENMASK_ULL(63, 0); + *shift = 0; + break; + } +} + /* * Generic accessor for VM registers. Only called as long as HCR_TVM * is set. If the guest enables the MMU, we stop trapping the VM @@ -219,26 +155,21 @@ static bool access_vm_reg(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) { bool was_enabled = vcpu_has_cache_enabled(vcpu); - u64 val; - int reg = r->reg; + u64 val, mask, shift; BUG_ON(!p->is_write); - /* See the 32bit mapping in kvm_host.h */ - if (p->is_aarch32) - reg = r->reg / 2; + get_access_mask(r, &mask, &shift); - if (!p->is_aarch32 || !p->is_32bit) { - val = p->regval; + if (~mask) { + val = vcpu_read_sys_reg(vcpu, r->reg); + val &= ~mask; } else { - val = vcpu_read_sys_reg(vcpu, reg); - if (r->reg % 2) - val = (p->regval << 32) | (u64)lower_32_bits(val); - else - val = ((u64)upper_32_bits(val) << 32) | - lower_32_bits(p->regval); + val = 0; } - vcpu_write_sys_reg(vcpu, val, reg); + + val |= (p->regval & (mask >> shift)) << shift; + vcpu_write_sys_reg(vcpu, val, r->reg); kvm_toggle_cache(vcpu, was_enabled); return true; @@ -248,17 +179,13 @@ static bool access_actlr(struct kvm_vcpu *vcpu, struct sys_reg_params *p, const struct sys_reg_desc *r) { + u64 mask, shift; + if (p->is_write) return ignore_write(vcpu, p); - p->regval = vcpu_read_sys_reg(vcpu, ACTLR_EL1); - - if (p->is_aarch32) { - if (r->Op2 & 2) - p->regval = upper_32_bits(p->regval); - else - p->regval = lower_32_bits(p->regval); - } + get_access_mask(r, &mask, &shift); + p->regval = (vcpu_read_sys_reg(vcpu, r->reg) & mask) >> shift; return true; } @@ -285,7 +212,7 @@ static bool access_gic_sgi(struct kvm_vcpu *vcpu, * equivalent to ICC_SGI0R_EL1, as there is no "alternative" secure * group. */ - if (p->is_aarch32) { + if (p->Op0 == 0) { /* AArch32 */ switch (p->Op1) { default: /* Keep GCC quiet */ case 0: /* ICC_SGI1R */ @@ -296,7 +223,7 @@ static bool access_gic_sgi(struct kvm_vcpu *vcpu, g1 = false; break; } - } else { + } else { /* AArch64 */ switch (p->Op2) { default: /* Keep GCC quiet */ case 5: /* ICC_SGI1R_EL1 */ @@ -438,26 +365,30 @@ static bool trap_debug_regs(struct kvm_vcpu *vcpu, */ static void reg_to_dbg(struct kvm_vcpu *vcpu, struct sys_reg_params *p, + const struct sys_reg_desc *rd, u64 *dbg_reg) { - u64 val = p->regval; + u64 mask, shift, val; - if (p->is_32bit) { - val &= 0xffffffffUL; - val |= ((*dbg_reg >> 32) << 32); - } + get_access_mask(rd, &mask, &shift); + val = *dbg_reg; + val &= ~mask; + val |= (p->regval & (mask >> shift)) << shift; *dbg_reg = val; + vcpu->arch.flags |= KVM_ARM64_DEBUG_DIRTY; } static void dbg_to_reg(struct kvm_vcpu *vcpu, struct sys_reg_params *p, + const struct sys_reg_desc *rd, u64 *dbg_reg) { - p->regval = *dbg_reg; - if (p->is_32bit) - p->regval &= 0xffffffffUL; + u64 mask, shift; + + get_access_mask(rd, &mask, &shift); + p->regval = (*dbg_reg & mask) >> shift; } static bool trap_bvr(struct kvm_vcpu *vcpu, @@ -467,9 +398,9 @@ static bool trap_bvr(struct kvm_vcpu *vcpu, u64 *dbg_reg = &vcpu->arch.vcpu_debug_state.dbg_bvr[rd->reg]; if (p->is_write) - reg_to_dbg(vcpu, p, dbg_reg); + reg_to_dbg(vcpu, p, rd, dbg_reg); else - dbg_to_reg(vcpu, p, dbg_reg); + dbg_to_reg(vcpu, p, rd, dbg_reg); trace_trap_reg(__func__, rd->reg, p->is_write, *dbg_reg); @@ -509,9 +440,9 @@ static bool trap_bcr(struct kvm_vcpu *vcpu, u64 *dbg_reg = &vcpu->arch.vcpu_debug_state.dbg_bcr[rd->reg]; if (p->is_write) - reg_to_dbg(vcpu, p, dbg_reg); + reg_to_dbg(vcpu, p, rd, dbg_reg); else - dbg_to_reg(vcpu, p, dbg_reg); + dbg_to_reg(vcpu, p, rd, dbg_reg); trace_trap_reg(__func__, rd->reg, p->is_write, *dbg_reg); @@ -552,9 +483,9 @@ static bool trap_wvr(struct kvm_vcpu *vcpu, u64 *dbg_reg = &vcpu->arch.vcpu_debug_state.dbg_wvr[rd->reg]; if (p->is_write) - reg_to_dbg(vcpu, p, dbg_reg); + reg_to_dbg(vcpu, p, rd, dbg_reg); else - dbg_to_reg(vcpu, p, dbg_reg); + dbg_to_reg(vcpu, p, rd, dbg_reg); trace_trap_reg(__func__, rd->reg, p->is_write, vcpu->arch.vcpu_debug_state.dbg_wvr[rd->reg]); @@ -595,9 +526,9 @@ static bool trap_wcr(struct kvm_vcpu *vcpu, u64 *dbg_reg = &vcpu->arch.vcpu_debug_state.dbg_wcr[rd->reg]; if (p->is_write) - reg_to_dbg(vcpu, p, dbg_reg); + reg_to_dbg(vcpu, p, rd, dbg_reg); else - dbg_to_reg(vcpu, p, dbg_reg); + dbg_to_reg(vcpu, p, rd, dbg_reg); trace_trap_reg(__func__, rd->reg, p->is_write, *dbg_reg); @@ -678,8 +609,9 @@ static void reset_pmcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) static bool check_pmu_access_disabled(struct kvm_vcpu *vcpu, u64 flags) { u64 reg = __vcpu_sys_reg(vcpu, PMUSERENR_EL0); - bool enabled = (reg & flags) || vcpu_mode_priv(vcpu); + bool enabled = kvm_vcpu_has_pmu(vcpu); + enabled &= (reg & flags) || vcpu_mode_priv(vcpu); if (!enabled) kvm_inject_undefined(vcpu); @@ -711,9 +643,6 @@ static bool access_pmcr(struct kvm_vcpu *vcpu, struct sys_reg_params *p, { u64 val; - if (!kvm_arm_pmu_v3_ready(vcpu)) - return trap_raz_wi(vcpu, p, r); - if (pmu_access_el0_disabled(vcpu)) return false; @@ -740,9 +669,6 @@ static bool access_pmcr(struct kvm_vcpu *vcpu, struct sys_reg_params *p, static bool access_pmselr(struct kvm_vcpu *vcpu, struct sys_reg_params *p, const struct sys_reg_desc *r) { - if (!kvm_arm_pmu_v3_ready(vcpu)) - return trap_raz_wi(vcpu, p, r); - if (pmu_access_event_counter_el0_disabled(vcpu)) return false; @@ -761,9 +687,6 @@ static bool access_pmceid(struct kvm_vcpu *vcpu, struct sys_reg_params *p, { u64 pmceid; - if (!kvm_arm_pmu_v3_ready(vcpu)) - return trap_raz_wi(vcpu, p, r); - BUG_ON(p->is_write); if (pmu_access_el0_disabled(vcpu)) @@ -794,10 +717,7 @@ static bool access_pmu_evcntr(struct kvm_vcpu *vcpu, struct sys_reg_params *p, const struct sys_reg_desc *r) { - u64 idx; - - if (!kvm_arm_pmu_v3_ready(vcpu)) - return trap_raz_wi(vcpu, p, r); + u64 idx = ~0UL; if (r->CRn == 9 && r->CRm == 13) { if (r->Op2 == 2) { @@ -813,8 +733,6 @@ static bool access_pmu_evcntr(struct kvm_vcpu *vcpu, return false; idx = ARMV8_PMU_CYCLE_IDX; - } else { - return false; } } else if (r->CRn == 0 && r->CRm == 9) { /* PMCCNTR */ @@ -828,10 +746,11 @@ static bool access_pmu_evcntr(struct kvm_vcpu *vcpu, return false; idx = ((r->CRm & 3) << 3) | (r->Op2 & 7); - } else { - return false; } + /* Catch any decoding mistake */ + WARN_ON(idx == ~0UL); + if (!pmu_counter_idx_valid(vcpu, idx)) return false; @@ -852,9 +771,6 @@ static bool access_pmu_evtyper(struct kvm_vcpu *vcpu, struct sys_reg_params *p, { u64 idx, reg; - if (!kvm_arm_pmu_v3_ready(vcpu)) - return trap_raz_wi(vcpu, p, r); - if (pmu_access_el0_disabled(vcpu)) return false; @@ -892,9 +808,6 @@ static bool access_pmcnten(struct kvm_vcpu *vcpu, struct sys_reg_params *p, { u64 val, mask; - if (!kvm_arm_pmu_v3_ready(vcpu)) - return trap_raz_wi(vcpu, p, r); - if (pmu_access_el0_disabled(vcpu)) return false; @@ -923,13 +836,8 @@ static bool access_pminten(struct kvm_vcpu *vcpu, struct sys_reg_params *p, { u64 mask = kvm_pmu_valid_counter_mask(vcpu); - if (!kvm_arm_pmu_v3_ready(vcpu)) - return trap_raz_wi(vcpu, p, r); - - if (!vcpu_mode_priv(vcpu)) { - kvm_inject_undefined(vcpu); + if (check_pmu_access_disabled(vcpu, 0)) return false; - } if (p->is_write) { u64 val = p->regval & mask; @@ -952,9 +860,6 @@ static bool access_pmovs(struct kvm_vcpu *vcpu, struct sys_reg_params *p, { u64 mask = kvm_pmu_valid_counter_mask(vcpu); - if (!kvm_arm_pmu_v3_ready(vcpu)) - return trap_raz_wi(vcpu, p, r); - if (pmu_access_el0_disabled(vcpu)) return false; @@ -977,9 +882,6 @@ static bool access_pmswinc(struct kvm_vcpu *vcpu, struct sys_reg_params *p, { u64 mask; - if (!kvm_arm_pmu_v3_ready(vcpu)) - return trap_raz_wi(vcpu, p, r); - if (!p->is_write) return read_from_write_only(vcpu, p, r); @@ -994,8 +896,10 @@ static bool access_pmswinc(struct kvm_vcpu *vcpu, struct sys_reg_params *p, static bool access_pmuserenr(struct kvm_vcpu *vcpu, struct sys_reg_params *p, const struct sys_reg_desc *r) { - if (!kvm_arm_pmu_v3_ready(vcpu)) - return trap_raz_wi(vcpu, p, r); + if (!kvm_vcpu_has_pmu(vcpu)) { + kvm_inject_undefined(vcpu); + return false; + } if (p->is_write) { if (!vcpu_mode_priv(vcpu)) { @@ -1122,6 +1026,8 @@ static u64 read_id_reg(const struct kvm_vcpu *vcpu, val &= ~(0xfUL << ID_AA64PFR0_AMU_SHIFT); val &= ~(0xfUL << ID_AA64PFR0_CSV2_SHIFT); val |= ((u64)vcpu->kvm->arch.pfr0_csv2 << ID_AA64PFR0_CSV2_SHIFT); + val &= ~(0xfUL << ID_AA64PFR0_CSV3_SHIFT); + val |= ((u64)vcpu->kvm->arch.pfr0_csv3 << ID_AA64PFR0_CSV3_SHIFT); } else if (id == SYS_ID_AA64PFR1_EL1) { val &= ~(0xfUL << ID_AA64PFR1_MTE_SHIFT); } else if (id == SYS_ID_AA64ISAR1_EL1 && !vcpu_has_ptrauth(vcpu)) { @@ -1130,10 +1036,15 @@ static u64 read_id_reg(const struct kvm_vcpu *vcpu, (0xfUL << ID_AA64ISAR1_GPA_SHIFT) | (0xfUL << ID_AA64ISAR1_GPI_SHIFT)); } else if (id == SYS_ID_AA64DFR0_EL1) { + u64 cap = 0; + /* Limit guests to PMUv3 for ARMv8.1 */ + if (kvm_vcpu_has_pmu(vcpu)) + cap = ID_AA64DFR0_PMUVER_8_1; + val = cpuid_feature_cap_perfmon_field(val, ID_AA64DFR0_PMUVER_SHIFT, - ID_AA64DFR0_PMUVER_8_1); + cap); } else if (id == SYS_ID_DFR0_EL1) { /* Limit guests to PMUv3 for ARMv8.1 */ val = cpuid_feature_cap_perfmon_field(val, @@ -1209,9 +1120,9 @@ static int set_id_aa64pfr0_el1(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg, void __user *uaddr) { const u64 id = sys_reg_to_index(rd); + u8 csv2, csv3; int err; u64 val; - u8 csv2; err = reg_from_user(&val, uaddr, id); if (err) @@ -1227,13 +1138,21 @@ static int set_id_aa64pfr0_el1(struct kvm_vcpu *vcpu, (csv2 && arm64_get_spectre_v2_state() != SPECTRE_UNAFFECTED)) return -EINVAL; - /* We can only differ with CSV2, and anything else is an error */ + /* Same thing for CSV3 */ + csv3 = cpuid_feature_extract_unsigned_field(val, ID_AA64PFR0_CSV3_SHIFT); + if (csv3 > 1 || + (csv3 && arm64_get_meltdown_state() != SPECTRE_UNAFFECTED)) + return -EINVAL; + + /* We can only differ with CSV[23], and anything else is an error */ val ^= read_id_reg(vcpu, rd, false); - val &= ~(0xFUL << ID_AA64PFR0_CSV2_SHIFT); + val &= ~((0xFUL << ID_AA64PFR0_CSV2_SHIFT) | + (0xFUL << ID_AA64PFR0_CSV3_SHIFT)); if (val) return -EINVAL; vcpu->kvm->arch.pfr0_csv2 = csv2; + vcpu->kvm->arch.pfr0_csv3 = csv3 ; return 0; } @@ -1327,10 +1246,6 @@ static bool access_csselr(struct kvm_vcpu *vcpu, struct sys_reg_params *p, { int reg = r->reg; - /* See the 32bit mapping in kvm_host.h */ - if (p->is_aarch32) - reg = r->reg / 2; - if (p->is_write) vcpu_write_sys_reg(vcpu, p->regval, reg); else @@ -1801,66 +1716,27 @@ static bool trap_dbgidr(struct kvm_vcpu *vcpu, } } -static bool trap_debug32(struct kvm_vcpu *vcpu, - struct sys_reg_params *p, - const struct sys_reg_desc *r) -{ - if (p->is_write) { - vcpu_cp14(vcpu, r->reg) = p->regval; - vcpu->arch.flags |= KVM_ARM64_DEBUG_DIRTY; - } else { - p->regval = vcpu_cp14(vcpu, r->reg); - } - - return true; -} - -/* AArch32 debug register mappings +/* + * AArch32 debug register mappings * * AArch32 DBGBVRn is mapped to DBGBVRn_EL1[31:0] * AArch32 DBGBXVRn is mapped to DBGBVRn_EL1[63:32] * - * All control registers and watchpoint value registers are mapped to - * the lower 32 bits of their AArch64 equivalents. We share the trap - * handlers with the above AArch64 code which checks what mode the - * system is in. + * None of the other registers share their location, so treat them as + * if they were 64bit. */ - -static bool trap_xvr(struct kvm_vcpu *vcpu, - struct sys_reg_params *p, - const struct sys_reg_desc *rd) -{ - u64 *dbg_reg = &vcpu->arch.vcpu_debug_state.dbg_bvr[rd->reg]; - - if (p->is_write) { - u64 val = *dbg_reg; - - val &= 0xffffffffUL; - val |= p->regval << 32; - *dbg_reg = val; - - vcpu->arch.flags |= KVM_ARM64_DEBUG_DIRTY; - } else { - p->regval = *dbg_reg >> 32; - } - - trace_trap_reg(__func__, rd->reg, p->is_write, *dbg_reg); - - return true; -} - -#define DBG_BCR_BVR_WCR_WVR(n) \ - /* DBGBVRn */ \ - { Op1( 0), CRn( 0), CRm((n)), Op2( 4), trap_bvr, NULL, n }, \ - /* DBGBCRn */ \ - { Op1( 0), CRn( 0), CRm((n)), Op2( 5), trap_bcr, NULL, n }, \ - /* DBGWVRn */ \ - { Op1( 0), CRn( 0), CRm((n)), Op2( 6), trap_wvr, NULL, n }, \ - /* DBGWCRn */ \ +#define DBG_BCR_BVR_WCR_WVR(n) \ + /* DBGBVRn */ \ + { AA32(LO), Op1( 0), CRn( 0), CRm((n)), Op2( 4), trap_bvr, NULL, n }, \ + /* DBGBCRn */ \ + { Op1( 0), CRn( 0), CRm((n)), Op2( 5), trap_bcr, NULL, n }, \ + /* DBGWVRn */ \ + { Op1( 0), CRn( 0), CRm((n)), Op2( 6), trap_wvr, NULL, n }, \ + /* DBGWCRn */ \ { Op1( 0), CRn( 0), CRm((n)), Op2( 7), trap_wcr, NULL, n } -#define DBGBXVR(n) \ - { Op1( 0), CRn( 1), CRm((n)), Op2( 1), trap_xvr, NULL, n } +#define DBGBXVR(n) \ + { AA32(HI), Op1( 0), CRn( 1), CRm((n)), Op2( 1), trap_bvr, NULL, n } /* * Trapped cp14 registers. We generally ignore most of the external @@ -1878,9 +1754,9 @@ static const struct sys_reg_desc cp14_regs[] = { { Op1( 0), CRn( 0), CRm( 1), Op2( 0), trap_raz_wi }, DBG_BCR_BVR_WCR_WVR(1), /* DBGDCCINT */ - { Op1( 0), CRn( 0), CRm( 2), Op2( 0), trap_debug32, NULL, cp14_DBGDCCINT }, + { Op1( 0), CRn( 0), CRm( 2), Op2( 0), trap_debug_regs, NULL, MDCCINT_EL1 }, /* DBGDSCRext */ - { Op1( 0), CRn( 0), CRm( 2), Op2( 2), trap_debug32, NULL, cp14_DBGDSCRext }, + { Op1( 0), CRn( 0), CRm( 2), Op2( 2), trap_debug_regs, NULL, MDSCR_EL1 }, DBG_BCR_BVR_WCR_WVR(2), /* DBGDTR[RT]Xint */ { Op1( 0), CRn( 0), CRm( 3), Op2( 0), trap_raz_wi }, @@ -1895,7 +1771,7 @@ static const struct sys_reg_desc cp14_regs[] = { { Op1( 0), CRn( 0), CRm( 6), Op2( 2), trap_raz_wi }, DBG_BCR_BVR_WCR_WVR(6), /* DBGVCR */ - { Op1( 0), CRn( 0), CRm( 7), Op2( 0), trap_debug32, NULL, cp14_DBGVCR }, + { Op1( 0), CRn( 0), CRm( 7), Op2( 0), trap_debug_regs, NULL, DBGVCR32_EL2 }, DBG_BCR_BVR_WCR_WVR(7), DBG_BCR_BVR_WCR_WVR(8), DBG_BCR_BVR_WCR_WVR(9), @@ -1981,19 +1857,29 @@ static const struct sys_reg_desc cp14_64_regs[] = { */ static const struct sys_reg_desc cp15_regs[] = { { Op1( 0), CRn( 0), CRm( 0), Op2( 1), access_ctr }, - { Op1( 0), CRn( 1), CRm( 0), Op2( 0), access_vm_reg, NULL, c1_SCTLR }, - { Op1( 0), CRn( 1), CRm( 0), Op2( 1), access_actlr }, - { Op1( 0), CRn( 1), CRm( 0), Op2( 3), access_actlr }, - { Op1( 0), CRn( 2), CRm( 0), Op2( 0), access_vm_reg, NULL, c2_TTBR0 }, - { Op1( 0), CRn( 2), CRm( 0), Op2( 1), access_vm_reg, NULL, c2_TTBR1 }, - { Op1( 0), CRn( 2), CRm( 0), Op2( 2), access_vm_reg, NULL, c2_TTBCR }, - { Op1( 0), CRn( 3), CRm( 0), Op2( 0), access_vm_reg, NULL, c3_DACR }, - { Op1( 0), CRn( 5), CRm( 0), Op2( 0), access_vm_reg, NULL, c5_DFSR }, - { Op1( 0), CRn( 5), CRm( 0), Op2( 1), access_vm_reg, NULL, c5_IFSR }, - { Op1( 0), CRn( 5), CRm( 1), Op2( 0), access_vm_reg, NULL, c5_ADFSR }, - { Op1( 0), CRn( 5), CRm( 1), Op2( 1), access_vm_reg, NULL, c5_AIFSR }, - { Op1( 0), CRn( 6), CRm( 0), Op2( 0), access_vm_reg, NULL, c6_DFAR }, - { Op1( 0), CRn( 6), CRm( 0), Op2( 2), access_vm_reg, NULL, c6_IFAR }, + { Op1( 0), CRn( 1), CRm( 0), Op2( 0), access_vm_reg, NULL, SCTLR_EL1 }, + /* ACTLR */ + { AA32(LO), Op1( 0), CRn( 1), CRm( 0), Op2( 1), access_actlr, NULL, ACTLR_EL1 }, + /* ACTLR2 */ + { AA32(HI), Op1( 0), CRn( 1), CRm( 0), Op2( 3), access_actlr, NULL, ACTLR_EL1 }, + { Op1( 0), CRn( 2), CRm( 0), Op2( 0), access_vm_reg, NULL, TTBR0_EL1 }, + { Op1( 0), CRn( 2), CRm( 0), Op2( 1), access_vm_reg, NULL, TTBR1_EL1 }, + /* TTBCR */ + { AA32(LO), Op1( 0), CRn( 2), CRm( 0), Op2( 2), access_vm_reg, NULL, TCR_EL1 }, + /* TTBCR2 */ + { AA32(HI), Op1( 0), CRn( 2), CRm( 0), Op2( 3), access_vm_reg, NULL, TCR_EL1 }, + { Op1( 0), CRn( 3), CRm( 0), Op2( 0), access_vm_reg, NULL, DACR32_EL2 }, + /* DFSR */ + { Op1( 0), CRn( 5), CRm( 0), Op2( 0), access_vm_reg, NULL, ESR_EL1 }, + { Op1( 0), CRn( 5), CRm( 0), Op2( 1), access_vm_reg, NULL, IFSR32_EL2 }, + /* ADFSR */ + { Op1( 0), CRn( 5), CRm( 1), Op2( 0), access_vm_reg, NULL, AFSR0_EL1 }, + /* AIFSR */ + { Op1( 0), CRn( 5), CRm( 1), Op2( 1), access_vm_reg, NULL, AFSR1_EL1 }, + /* DFAR */ + { AA32(LO), Op1( 0), CRn( 6), CRm( 0), Op2( 0), access_vm_reg, NULL, FAR_EL1 }, + /* IFAR */ + { AA32(HI), Op1( 0), CRn( 6), CRm( 0), Op2( 2), access_vm_reg, NULL, FAR_EL1 }, /* * DC{C,I,CI}SW operations: @@ -2019,15 +1905,19 @@ static const struct sys_reg_desc cp15_regs[] = { { Op1( 0), CRn( 9), CRm(14), Op2( 2), access_pminten }, { Op1( 0), CRn( 9), CRm(14), Op2( 3), access_pmovs }, - { Op1( 0), CRn(10), CRm( 2), Op2( 0), access_vm_reg, NULL, c10_PRRR }, - { Op1( 0), CRn(10), CRm( 2), Op2( 1), access_vm_reg, NULL, c10_NMRR }, - { Op1( 0), CRn(10), CRm( 3), Op2( 0), access_vm_reg, NULL, c10_AMAIR0 }, - { Op1( 0), CRn(10), CRm( 3), Op2( 1), access_vm_reg, NULL, c10_AMAIR1 }, + /* PRRR/MAIR0 */ + { AA32(LO), Op1( 0), CRn(10), CRm( 2), Op2( 0), access_vm_reg, NULL, MAIR_EL1 }, + /* NMRR/MAIR1 */ + { AA32(HI), Op1( 0), CRn(10), CRm( 2), Op2( 1), access_vm_reg, NULL, MAIR_EL1 }, + /* AMAIR0 */ + { AA32(LO), Op1( 0), CRn(10), CRm( 3), Op2( 0), access_vm_reg, NULL, AMAIR_EL1 }, + /* AMAIR1 */ + { AA32(HI), Op1( 0), CRn(10), CRm( 3), Op2( 1), access_vm_reg, NULL, AMAIR_EL1 }, /* ICC_SRE */ { Op1( 0), CRn(12), CRm(12), Op2( 5), access_gic_sre }, - { Op1( 0), CRn(13), CRm( 0), Op2( 1), access_vm_reg, NULL, c13_CID }, + { Op1( 0), CRn(13), CRm( 0), Op2( 1), access_vm_reg, NULL, CONTEXTIDR_EL1 }, /* Arch Tmers */ { SYS_DESC(SYS_AARCH32_CNTP_TVAL), access_arch_timer }, @@ -2102,14 +1992,14 @@ static const struct sys_reg_desc cp15_regs[] = { { Op1(1), CRn( 0), CRm( 0), Op2(0), access_ccsidr }, { Op1(1), CRn( 0), CRm( 0), Op2(1), access_clidr }, - { Op1(2), CRn( 0), CRm( 0), Op2(0), access_csselr, NULL, c0_CSSELR }, + { Op1(2), CRn( 0), CRm( 0), Op2(0), access_csselr, NULL, CSSELR_EL1 }, }; static const struct sys_reg_desc cp15_64_regs[] = { - { Op1( 0), CRn( 0), CRm( 2), Op2( 0), access_vm_reg, NULL, c2_TTBR0 }, + { Op1( 0), CRn( 0), CRm( 2), Op2( 0), access_vm_reg, NULL, TTBR0_EL1 }, { Op1( 0), CRn( 0), CRm( 9), Op2( 0), access_pmu_evcntr }, { Op1( 0), CRn( 0), CRm(12), Op2( 0), access_gic_sgi }, /* ICC_SGI1R */ - { Op1( 1), CRn( 0), CRm( 2), Op2( 0), access_vm_reg, NULL, c2_TTBR1 }, + { Op1( 1), CRn( 0), CRm( 2), Op2( 0), access_vm_reg, NULL, TTBR1_EL1 }, { Op1( 1), CRn( 0), CRm(12), Op2( 0), access_gic_sgi }, /* ICC_ASGI1R */ { Op1( 2), CRn( 0), CRm(12), Op2( 0), access_gic_sgi }, /* ICC_SGI0R */ { SYS_DESC(SYS_AARCH32_CNTP_CVAL), access_arch_timer }, @@ -2180,7 +2070,7 @@ static void perform_access(struct kvm_vcpu *vcpu, /* Skip instruction if instructed so */ if (likely(r->access(vcpu, params, r))) - kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu)); + kvm_incr_pc(vcpu); } /* @@ -2253,8 +2143,6 @@ static int kvm_handle_cp_64(struct kvm_vcpu *vcpu, int Rt = kvm_vcpu_sys_get_rt(vcpu); int Rt2 = (esr >> 10) & 0x1f; - params.is_aarch32 = true; - params.is_32bit = false; params.CRm = (esr >> 1) & 0xf; params.is_write = ((esr & 1) == 0); @@ -2304,8 +2192,6 @@ static int kvm_handle_cp_32(struct kvm_vcpu *vcpu, u32 esr = kvm_vcpu_get_esr(vcpu); int Rt = kvm_vcpu_sys_get_rt(vcpu); - params.is_aarch32 = true; - params.is_32bit = true; params.CRm = (esr >> 1) & 0xf; params.regval = vcpu_get_reg(vcpu, Rt); params.is_write = ((esr & 1) == 0); @@ -2399,8 +2285,6 @@ int kvm_handle_sys_reg(struct kvm_vcpu *vcpu) trace_kvm_handle_sys_reg(esr); - params.is_aarch32 = false; - params.is_32bit = false; params.Op0 = (esr >> 20) & 3; params.Op1 = (esr >> 14) & 0x7; params.CRn = (esr >> 10) & 0xf; diff --git a/arch/arm64/kvm/sys_regs.h b/arch/arm64/kvm/sys_regs.h index 0f95964339b1..9d0621417c2a 100644 --- a/arch/arm64/kvm/sys_regs.h +++ b/arch/arm64/kvm/sys_regs.h @@ -19,14 +19,18 @@ struct sys_reg_params { u8 Op2; u64 regval; bool is_write; - bool is_aarch32; - bool is_32bit; /* Only valid if is_aarch32 is true */ }; struct sys_reg_desc { /* Sysreg string for debug */ const char *name; + enum { + AA32_ZEROHIGH, + AA32_LO, + AA32_HI, + } aarch32_map; + /* MRS/MSR instruction which accesses it. */ u8 Op0; u8 Op1; @@ -153,6 +157,7 @@ const struct sys_reg_desc *find_reg_by_id(u64 id, const struct sys_reg_desc table[], unsigned int num); +#define AA32(_x) .aarch32_map = AA32_##_x #define Op0(_x) .Op0 = _x #define Op1(_x) .Op1 = _x #define CRn(_x) .CRn = _x diff --git a/arch/arm64/kvm/va_layout.c b/arch/arm64/kvm/va_layout.c index e0404bcab019..d8cc51bd60bf 100644 --- a/arch/arm64/kvm/va_layout.c +++ b/arch/arm64/kvm/va_layout.c @@ -11,6 +11,7 @@ #include <asm/debug-monitors.h> #include <asm/insn.h> #include <asm/kvm_mmu.h> +#include <asm/memory.h> /* * The LSB of the HYP VA tag @@ -23,6 +24,30 @@ static u64 tag_val; static u64 va_mask; /* + * Compute HYP VA by using the same computation as kern_hyp_va(). + */ +static u64 __early_kern_hyp_va(u64 addr) +{ + addr &= va_mask; + addr |= tag_val << tag_lsb; + return addr; +} + +/* + * Store a hyp VA <-> PA offset into a hyp-owned variable. + */ +static void init_hyp_physvirt_offset(void) +{ + extern s64 kvm_nvhe_sym(hyp_physvirt_offset); + u64 kern_va, hyp_va; + + /* Compute the offset from the hyp VA and PA of a random symbol. */ + kern_va = (u64)kvm_ksym_ref(__hyp_text_start); + hyp_va = __early_kern_hyp_va(kern_va); + CHOOSE_NVHE_SYM(hyp_physvirt_offset) = (s64)__pa(kern_va) - (s64)hyp_va; +} + +/* * We want to generate a hyp VA with the following format (with V == * vabits_actual): * @@ -53,6 +78,8 @@ __init void kvm_compute_layout(void) tag_val |= get_random_long() & GENMASK_ULL(vabits_actual - 2, tag_lsb); } tag_val >>= tag_lsb; + + init_hyp_physvirt_offset(); } static u32 compute_instruction(int n, u32 rd, u32 rn) @@ -131,28 +158,21 @@ void __init kvm_update_va_mask(struct alt_instr *alt, } } -void *__kvm_bp_vect_base; -int __kvm_harden_el2_vector_slot; - void kvm_patch_vector_branch(struct alt_instr *alt, __le32 *origptr, __le32 *updptr, int nr_inst) { u64 addr; u32 insn; - BUG_ON(nr_inst != 5); + BUG_ON(nr_inst != 4); - if (has_vhe() || !cpus_have_const_cap(ARM64_HARDEN_EL2_VECTORS)) { - WARN_ON_ONCE(cpus_have_const_cap(ARM64_HARDEN_EL2_VECTORS)); + if (!cpus_have_const_cap(ARM64_SPECTRE_V3A) || WARN_ON_ONCE(has_vhe())) return; - } /* * Compute HYP VA by using the same computation as kern_hyp_va() */ - addr = (uintptr_t)kvm_ksym_ref(__kvm_hyp_vector); - addr &= va_mask; - addr |= tag_val << tag_lsb; + addr = __early_kern_hyp_va((u64)kvm_ksym_ref(__kvm_hyp_vector)); /* Use PC[10:7] to branch to the same vector in KVM */ addr |= ((u64)origptr & GENMASK_ULL(10, 7)); @@ -163,15 +183,6 @@ void kvm_patch_vector_branch(struct alt_instr *alt, */ addr += KVM_VECTOR_PREAMBLE; - /* stp x0, x1, [sp, #-16]! */ - insn = aarch64_insn_gen_load_store_pair(AARCH64_INSN_REG_0, - AARCH64_INSN_REG_1, - AARCH64_INSN_REG_SP, - -16, - AARCH64_INSN_VARIANT_64BIT, - AARCH64_INSN_LDST_STORE_PAIR_PRE_INDEX); - *updptr++ = cpu_to_le32(insn); - /* movz x0, #(addr & 0xffff) */ insn = aarch64_insn_gen_movewide(AARCH64_INSN_REG_0, (u16)addr, @@ -201,3 +212,58 @@ void kvm_patch_vector_branch(struct alt_instr *alt, AARCH64_INSN_BRANCH_NOLINK); *updptr++ = cpu_to_le32(insn); } + +static void generate_mov_q(u64 val, __le32 *origptr, __le32 *updptr, int nr_inst) +{ + u32 insn, oinsn, rd; + + BUG_ON(nr_inst != 4); + + /* Compute target register */ + oinsn = le32_to_cpu(*origptr); + rd = aarch64_insn_decode_register(AARCH64_INSN_REGTYPE_RD, oinsn); + + /* movz rd, #(val & 0xffff) */ + insn = aarch64_insn_gen_movewide(rd, + (u16)val, + 0, + AARCH64_INSN_VARIANT_64BIT, + AARCH64_INSN_MOVEWIDE_ZERO); + *updptr++ = cpu_to_le32(insn); + + /* movk rd, #((val >> 16) & 0xffff), lsl #16 */ + insn = aarch64_insn_gen_movewide(rd, + (u16)(val >> 16), + 16, + AARCH64_INSN_VARIANT_64BIT, + AARCH64_INSN_MOVEWIDE_KEEP); + *updptr++ = cpu_to_le32(insn); + + /* movk rd, #((val >> 32) & 0xffff), lsl #32 */ + insn = aarch64_insn_gen_movewide(rd, + (u16)(val >> 32), + 32, + AARCH64_INSN_VARIANT_64BIT, + AARCH64_INSN_MOVEWIDE_KEEP); + *updptr++ = cpu_to_le32(insn); + + /* movk rd, #((val >> 48) & 0xffff), lsl #48 */ + insn = aarch64_insn_gen_movewide(rd, + (u16)(val >> 48), + 48, + AARCH64_INSN_VARIANT_64BIT, + AARCH64_INSN_MOVEWIDE_KEEP); + *updptr++ = cpu_to_le32(insn); +} + +void kvm_update_kimg_phys_offset(struct alt_instr *alt, + __le32 *origptr, __le32 *updptr, int nr_inst) +{ + generate_mov_q(kimage_voffset + PHYS_OFFSET, origptr, updptr, nr_inst); +} + +void kvm_get_kimage_voffset(struct alt_instr *alt, + __le32 *origptr, __le32 *updptr, int nr_inst) +{ + generate_mov_q(kimage_voffset, origptr, updptr, nr_inst); +} diff --git a/arch/arm64/kvm/vgic-sys-reg-v3.c b/arch/arm64/kvm/vgic-sys-reg-v3.c index 2f92bdcb1188..07d5271e9f05 100644 --- a/arch/arm64/kvm/vgic-sys-reg-v3.c +++ b/arch/arm64/kvm/vgic-sys-reg-v3.c @@ -268,8 +268,6 @@ int vgic_v3_has_cpu_sysregs_attr(struct kvm_vcpu *vcpu, bool is_write, u64 id, params.regval = *reg; params.is_write = is_write; - params.is_aarch32 = false; - params.is_32bit = false; if (find_reg_by_id(sysreg, ¶ms, gic_v3_icc_reg_descs, ARRAY_SIZE(gic_v3_icc_reg_descs))) @@ -288,8 +286,6 @@ int vgic_v3_cpu_sysregs_uaccess(struct kvm_vcpu *vcpu, bool is_write, u64 id, if (is_write) params.regval = *reg; params.is_write = is_write; - params.is_aarch32 = false; - params.is_32bit = false; r = find_reg_by_id(sysreg, ¶ms, gic_v3_icc_reg_descs, ARRAY_SIZE(gic_v3_icc_reg_descs)); diff --git a/arch/arm64/kvm/vgic/vgic-v4.c b/arch/arm64/kvm/vgic/vgic-v4.c index b5fa73c9fd35..66508b03094f 100644 --- a/arch/arm64/kvm/vgic/vgic-v4.c +++ b/arch/arm64/kvm/vgic/vgic-v4.c @@ -353,6 +353,18 @@ int vgic_v4_load(struct kvm_vcpu *vcpu) return err; } +void vgic_v4_commit(struct kvm_vcpu *vcpu) +{ + struct its_vpe *vpe = &vcpu->arch.vgic_cpu.vgic_v3.its_vpe; + + /* + * No need to wait for the vPE to be ready across a shallow guest + * exit, as only a vcpu_put will invalidate it. + */ + if (!vpe->ready) + its_commit_vpe(vpe); +} + static struct vgic_its *vgic_get_its(struct kvm *kvm, struct kvm_kernel_irq_routing_entry *irq_entry) { diff --git a/arch/arm64/kvm/vgic/vgic.c b/arch/arm64/kvm/vgic/vgic.c index c3643b7f101b..1c597c9885fa 100644 --- a/arch/arm64/kvm/vgic/vgic.c +++ b/arch/arm64/kvm/vgic/vgic.c @@ -915,6 +915,9 @@ void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu) if (can_access_vgic_from_kernel()) vgic_restore_state(vcpu); + + if (vgic_supports_direct_msis(vcpu->kvm)) + vgic_v4_commit(vcpu); } void kvm_vgic_load(struct kvm_vcpu *vcpu) diff --git a/arch/arm64/lib/clear_user.S b/arch/arm64/lib/clear_user.S index 48a3a26eff66..af9afcbec92c 100644 --- a/arch/arm64/lib/clear_user.S +++ b/arch/arm64/lib/clear_user.S @@ -24,20 +24,20 @@ SYM_FUNC_START(__arch_clear_user) subs x1, x1, #8 b.mi 2f 1: -uao_user_alternative 9f, str, sttr, xzr, x0, 8 +user_ldst 9f, sttr, xzr, x0, 8 subs x1, x1, #8 b.pl 1b 2: adds x1, x1, #4 b.mi 3f -uao_user_alternative 9f, str, sttr, wzr, x0, 4 +user_ldst 9f, sttr, wzr, x0, 4 sub x1, x1, #4 3: adds x1, x1, #2 b.mi 4f -uao_user_alternative 9f, strh, sttrh, wzr, x0, 2 +user_ldst 9f, sttrh, wzr, x0, 2 sub x1, x1, #2 4: adds x1, x1, #1 b.mi 5f -uao_user_alternative 9f, strb, sttrb, wzr, x0, 0 +user_ldst 9f, sttrb, wzr, x0, 0 5: mov x0, #0 ret SYM_FUNC_END(__arch_clear_user) diff --git a/arch/arm64/lib/copy_from_user.S b/arch/arm64/lib/copy_from_user.S index 0f8a3a9e3795..95cd62d67371 100644 --- a/arch/arm64/lib/copy_from_user.S +++ b/arch/arm64/lib/copy_from_user.S @@ -21,7 +21,7 @@ */ .macro ldrb1 reg, ptr, val - uao_user_alternative 9998f, ldrb, ldtrb, \reg, \ptr, \val + user_ldst 9998f, ldtrb, \reg, \ptr, \val .endm .macro strb1 reg, ptr, val @@ -29,7 +29,7 @@ .endm .macro ldrh1 reg, ptr, val - uao_user_alternative 9998f, ldrh, ldtrh, \reg, \ptr, \val + user_ldst 9998f, ldtrh, \reg, \ptr, \val .endm .macro strh1 reg, ptr, val @@ -37,7 +37,7 @@ .endm .macro ldr1 reg, ptr, val - uao_user_alternative 9998f, ldr, ldtr, \reg, \ptr, \val + user_ldst 9998f, ldtr, \reg, \ptr, \val .endm .macro str1 reg, ptr, val @@ -45,7 +45,7 @@ .endm .macro ldp1 reg1, reg2, ptr, val - uao_ldp 9998f, \reg1, \reg2, \ptr, \val + user_ldp 9998f, \reg1, \reg2, \ptr, \val .endm .macro stp1 reg1, reg2, ptr, val diff --git a/arch/arm64/lib/copy_in_user.S b/arch/arm64/lib/copy_in_user.S index 80e37ada0ee1..1f61cd0df062 100644 --- a/arch/arm64/lib/copy_in_user.S +++ b/arch/arm64/lib/copy_in_user.S @@ -22,35 +22,35 @@ * x0 - bytes not copied */ .macro ldrb1 reg, ptr, val - uao_user_alternative 9998f, ldrb, ldtrb, \reg, \ptr, \val + user_ldst 9998f, ldtrb, \reg, \ptr, \val .endm .macro strb1 reg, ptr, val - uao_user_alternative 9998f, strb, sttrb, \reg, \ptr, \val + user_ldst 9998f, sttrb, \reg, \ptr, \val .endm .macro ldrh1 reg, ptr, val - uao_user_alternative 9998f, ldrh, ldtrh, \reg, \ptr, \val + user_ldst 9998f, ldtrh, \reg, \ptr, \val .endm .macro strh1 reg, ptr, val - uao_user_alternative 9998f, strh, sttrh, \reg, \ptr, \val + user_ldst 9998f, sttrh, \reg, \ptr, \val .endm .macro ldr1 reg, ptr, val - uao_user_alternative 9998f, ldr, ldtr, \reg, \ptr, \val + user_ldst 9998f, ldtr, \reg, \ptr, \val .endm .macro str1 reg, ptr, val - uao_user_alternative 9998f, str, sttr, \reg, \ptr, \val + user_ldst 9998f, sttr, \reg, \ptr, \val .endm .macro ldp1 reg1, reg2, ptr, val - uao_ldp 9998f, \reg1, \reg2, \ptr, \val + user_ldp 9998f, \reg1, \reg2, \ptr, \val .endm .macro stp1 reg1, reg2, ptr, val - uao_stp 9998f, \reg1, \reg2, \ptr, \val + user_stp 9998f, \reg1, \reg2, \ptr, \val .endm end .req x5 diff --git a/arch/arm64/lib/copy_to_user.S b/arch/arm64/lib/copy_to_user.S index 4ec59704b8f2..043da90f5dd7 100644 --- a/arch/arm64/lib/copy_to_user.S +++ b/arch/arm64/lib/copy_to_user.S @@ -24,7 +24,7 @@ .endm .macro strb1 reg, ptr, val - uao_user_alternative 9998f, strb, sttrb, \reg, \ptr, \val + user_ldst 9998f, sttrb, \reg, \ptr, \val .endm .macro ldrh1 reg, ptr, val @@ -32,7 +32,7 @@ .endm .macro strh1 reg, ptr, val - uao_user_alternative 9998f, strh, sttrh, \reg, \ptr, \val + user_ldst 9998f, sttrh, \reg, \ptr, \val .endm .macro ldr1 reg, ptr, val @@ -40,7 +40,7 @@ .endm .macro str1 reg, ptr, val - uao_user_alternative 9998f, str, sttr, \reg, \ptr, \val + user_ldst 9998f, sttr, \reg, \ptr, \val .endm .macro ldp1 reg1, reg2, ptr, val @@ -48,7 +48,7 @@ .endm .macro stp1 reg1, reg2, ptr, val - uao_stp 9998f, \reg1, \reg2, \ptr, \val + user_stp 9998f, \reg1, \reg2, \ptr, \val .endm end .req x5 diff --git a/arch/arm64/lib/mte.S b/arch/arm64/lib/mte.S index 03ca6d8b8670..351537c12f36 100644 --- a/arch/arm64/lib/mte.S +++ b/arch/arm64/lib/mte.S @@ -4,7 +4,7 @@ */ #include <linux/linkage.h> -#include <asm/alternative.h> +#include <asm/asm-uaccess.h> #include <asm/assembler.h> #include <asm/mte.h> #include <asm/page.h> @@ -67,7 +67,7 @@ SYM_FUNC_START(mte_copy_tags_from_user) mov x3, x1 cbz x2, 2f 1: - uao_user_alternative 2f, ldrb, ldtrb, w4, x1, 0 + user_ldst 2f, ldtrb, w4, x1, 0 lsl x4, x4, #MTE_TAG_SHIFT stg x4, [x0], #MTE_GRANULE_SIZE add x1, x1, #1 @@ -94,7 +94,7 @@ SYM_FUNC_START(mte_copy_tags_to_user) 1: ldg x4, [x1] ubfx x4, x4, #MTE_TAG_SHIFT, #MTE_TAG_SIZE - uao_user_alternative 2f, strb, sttrb, w4, x0, 0 + user_ldst 2f, sttrb, w4, x0, 0 add x0, x0, #1 add x1, x1, #MTE_GRANULE_SIZE subs x2, x2, #1 diff --git a/arch/arm64/lib/uaccess_flushcache.c b/arch/arm64/lib/uaccess_flushcache.c index bfa30b75b2b8..c83bb5a4aad2 100644 --- a/arch/arm64/lib/uaccess_flushcache.c +++ b/arch/arm64/lib/uaccess_flushcache.c @@ -30,9 +30,7 @@ unsigned long __copy_user_flushcache(void *to, const void __user *from, { unsigned long rc; - uaccess_enable_not_uao(); - rc = __arch_copy_from_user(to, from, n); - uaccess_disable_not_uao(); + rc = raw_copy_from_user(to, from, n); /* See above */ __clean_dcache_area_pop(to, n - rc); diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index 1ee94002801f..8dc17e650c8e 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -479,11 +479,6 @@ static int __kprobes do_page_fault(unsigned long addr, unsigned int esr, } if (is_ttbr0_addr(addr) && is_el1_permission_fault(addr, esr, regs)) { - /* regs->orig_addr_limit may be 0 if we entered from EL0 */ - if (regs->orig_addr_limit == KERNEL_DS) - die_kernel_fault("access to user memory with fs=KERNEL_DS", - addr, esr, regs); - if (is_el1_instruction_abort(esr)) die_kernel_fault("execution of user memory", addr, esr, regs); diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index 1c0f3e02f731..ca692a815731 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -1444,11 +1444,28 @@ static void __remove_pgd_mapping(pgd_t *pgdir, unsigned long start, u64 size) free_empty_tables(start, end, PAGE_OFFSET, PAGE_END); } +static bool inside_linear_region(u64 start, u64 size) +{ + /* + * Linear mapping region is the range [PAGE_OFFSET..(PAGE_END - 1)] + * accommodating both its ends but excluding PAGE_END. Max physical + * range which can be mapped inside this linear mapping range, must + * also be derived from its end points. + */ + return start >= __pa(_PAGE_OFFSET(vabits_actual)) && + (start + size - 1) <= __pa(PAGE_END - 1); +} + int arch_add_memory(int nid, u64 start, u64 size, struct mhp_params *params) { int ret, flags = 0; + if (!inside_linear_region(start, size)) { + pr_err("[%llx %llx] is outside linear mapping region\n", start, start + size); + return -EINVAL; + } + if (rodata_full || debug_pagealloc_enabled()) flags = NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS; diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S index 23c326a06b2d..29f064e117d9 100644 --- a/arch/arm64/mm/proc.S +++ b/arch/arm64/mm/proc.S @@ -489,6 +489,6 @@ SYM_FUNC_START(__cpu_setup) /* * Prepare SCTLR */ - mov_q x0, SCTLR_EL1_SET + mov_q x0, INIT_SCTLR_EL1_MMU_ON ret // return to head.S SYM_FUNC_END(__cpu_setup) diff --git a/arch/csky/kernel/perf_regs.c b/arch/csky/kernel/perf_regs.c index eb32838b8210..09b7f88a2d6a 100644 --- a/arch/csky/kernel/perf_regs.c +++ b/arch/csky/kernel/perf_regs.c @@ -32,8 +32,7 @@ u64 perf_reg_abi(struct task_struct *task) } void perf_get_regs_user(struct perf_regs *regs_user, - struct pt_regs *regs, - struct pt_regs *regs_user_copy) + struct pt_regs *regs) { regs_user->regs = task_pt_regs(current); regs_user->abi = perf_reg_abi(current); diff --git a/arch/powerpc/perf/imc-pmu.c b/arch/powerpc/perf/imc-pmu.c index 9ed4fcccf8a9..7b25548ec42b 100644 --- a/arch/powerpc/perf/imc-pmu.c +++ b/arch/powerpc/perf/imc-pmu.c @@ -1336,7 +1336,7 @@ static void dump_trace_imc_data(struct perf_event *event) /* If this is a valid record, create the sample */ struct perf_output_handle handle; - if (perf_output_begin(&handle, event, header.size)) + if (perf_output_begin(&handle, &data, event, header.size)) return; perf_output_sample(&handle, &header, &data, event); diff --git a/arch/powerpc/perf/perf_regs.c b/arch/powerpc/perf/perf_regs.c index 8e53f2fc3fe0..6f681b105eec 100644 --- a/arch/powerpc/perf/perf_regs.c +++ b/arch/powerpc/perf/perf_regs.c @@ -144,8 +144,7 @@ u64 perf_reg_abi(struct task_struct *task) } void perf_get_regs_user(struct perf_regs *regs_user, - struct pt_regs *regs, - struct pt_regs *regs_user_copy) + struct pt_regs *regs) { regs_user->regs = task_pt_regs(current); regs_user->abi = (regs_user->regs) ? perf_reg_abi(current) : diff --git a/arch/riscv/kernel/perf_regs.c b/arch/riscv/kernel/perf_regs.c index 04a38fbeb9c7..fd304a248de6 100644 --- a/arch/riscv/kernel/perf_regs.c +++ b/arch/riscv/kernel/perf_regs.c @@ -36,8 +36,7 @@ u64 perf_reg_abi(struct task_struct *task) } void perf_get_regs_user(struct perf_regs *regs_user, - struct pt_regs *regs, - struct pt_regs *regs_user_copy) + struct pt_regs *regs) { regs_user->regs = task_pt_regs(current); regs_user->abi = perf_reg_abi(current); diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c index 4f9e4626df55..00255ae3979d 100644 --- a/arch/s390/kernel/perf_cpum_sf.c +++ b/arch/s390/kernel/perf_cpum_sf.c @@ -672,7 +672,7 @@ static void cpumsf_output_event_pid(struct perf_event *event, rcu_read_lock(); perf_prepare_sample(&header, data, event, regs); - if (perf_output_begin(&handle, event, header.size)) + if (perf_output_begin(&handle, data, event, header.size)) goto out; /* Update the process ID (see also kernel/events/core.c) */ diff --git a/arch/s390/kernel/perf_regs.c b/arch/s390/kernel/perf_regs.c index 4352a504f235..6e9e5d5e927e 100644 --- a/arch/s390/kernel/perf_regs.c +++ b/arch/s390/kernel/perf_regs.c @@ -53,8 +53,7 @@ u64 perf_reg_abi(struct task_struct *task) } void perf_get_regs_user(struct perf_regs *regs_user, - struct pt_regs *regs, - struct pt_regs *regs_user_copy) + struct pt_regs *regs) { /* * Use the regs from the first interruption and let diff --git a/arch/um/include/asm/pgalloc.h b/arch/um/include/asm/pgalloc.h index 5393e13e07e0..2bbf28cf3aa9 100644 --- a/arch/um/include/asm/pgalloc.h +++ b/arch/um/include/asm/pgalloc.h @@ -33,7 +33,13 @@ do { \ } while (0) #ifdef CONFIG_3_LEVEL_PGTABLES -#define __pmd_free_tlb(tlb,x, address) tlb_remove_page((tlb),virt_to_page(x)) + +#define __pmd_free_tlb(tlb, pmd, address) \ +do { \ + pgtable_pmd_page_dtor(virt_to_page(pmd)); \ + tlb_remove_page((tlb),virt_to_page(pmd)); \ +} while (0) \ + #endif #endif diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index f1926e9f2143..af457f8cb29d 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -2630,7 +2630,7 @@ static int handle_pmi_common(struct pt_regs *regs, u64 status) u64 pebs_enabled = cpuc->pebs_enabled; handled++; - x86_pmu.drain_pebs(regs); + x86_pmu.drain_pebs(regs, &data); status &= x86_pmu.intel_ctrl | GLOBAL_STATUS_TRACE_TOPAPMI; /* @@ -4987,6 +4987,12 @@ __init int intel_pmu_init(void) x86_add_quirk(intel_arch_events_quirk); /* Install first, so it runs last */ + if (version >= 5) { + x86_pmu.intel_cap.anythread_deprecated = edx.split.anythread_deprecated; + if (x86_pmu.intel_cap.anythread_deprecated) + pr_cont(" AnyThread deprecated, "); + } + /* * Install the hw-cache-events table: */ @@ -5512,6 +5518,10 @@ __init int intel_pmu_init(void) x86_pmu.intel_ctrl |= ((1LL << x86_pmu.num_counters_fixed)-1) << INTEL_PMC_IDX_FIXED; + /* AnyThread may be deprecated on arch perfmon v5 or later */ + if (x86_pmu.intel_cap.anythread_deprecated) + x86_pmu.format_attrs = intel_arch_formats_attr; + if (x86_pmu.event_constraints) { /* * event on fixed counter2 (REF_CYCLES) only works on this diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c index 404315df1e16..b47cc4226934 100644 --- a/arch/x86/events/intel/ds.c +++ b/arch/x86/events/intel/ds.c @@ -642,8 +642,8 @@ int intel_pmu_drain_bts_buffer(void) rcu_read_lock(); perf_prepare_sample(&header, &data, event, ®s); - if (perf_output_begin(&handle, event, header.size * - (top - base - skip))) + if (perf_output_begin(&handle, &data, event, + header.size * (top - base - skip))) goto unlock; for (at = base; at < top; at++) { @@ -670,7 +670,9 @@ unlock: static inline void intel_pmu_drain_pebs_buffer(void) { - x86_pmu.drain_pebs(NULL); + struct perf_sample_data data; + + x86_pmu.drain_pebs(NULL, &data); } /* @@ -1719,23 +1721,24 @@ intel_pmu_save_and_restart_reload(struct perf_event *event, int count) return 0; } -static void __intel_pmu_pebs_event(struct perf_event *event, - struct pt_regs *iregs, - void *base, void *top, - int bit, int count, - void (*setup_sample)(struct perf_event *, - struct pt_regs *, - void *, - struct perf_sample_data *, - struct pt_regs *)) +static __always_inline void +__intel_pmu_pebs_event(struct perf_event *event, + struct pt_regs *iregs, + struct perf_sample_data *data, + void *base, void *top, + int bit, int count, + void (*setup_sample)(struct perf_event *, + struct pt_regs *, + void *, + struct perf_sample_data *, + struct pt_regs *)) { struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); struct hw_perf_event *hwc = &event->hw; - struct perf_sample_data data; struct x86_perf_regs perf_regs; struct pt_regs *regs = &perf_regs.regs; void *at = get_next_pebs_record_by_bit(base, top, bit); - struct pt_regs dummy_iregs; + static struct pt_regs dummy_iregs; if (hwc->flags & PERF_X86_EVENT_AUTO_RELOAD) { /* @@ -1752,14 +1755,14 @@ static void __intel_pmu_pebs_event(struct perf_event *event, iregs = &dummy_iregs; while (count > 1) { - setup_sample(event, iregs, at, &data, regs); - perf_event_output(event, &data, regs); + setup_sample(event, iregs, at, data, regs); + perf_event_output(event, data, regs); at += cpuc->pebs_record_size; at = get_next_pebs_record_by_bit(at, top, bit); count--; } - setup_sample(event, iregs, at, &data, regs); + setup_sample(event, iregs, at, data, regs); if (iregs == &dummy_iregs) { /* * The PEBS records may be drained in the non-overflow context, @@ -1767,18 +1770,18 @@ static void __intel_pmu_pebs_event(struct perf_event *event, * last record the same as other PEBS records, and doesn't * invoke the generic overflow handler. */ - perf_event_output(event, &data, regs); + perf_event_output(event, data, regs); } else { /* * All but the last records are processed. * The last one is left to be able to call the overflow handler. */ - if (perf_event_overflow(event, &data, regs)) + if (perf_event_overflow(event, data, regs)) x86_pmu_stop(event, 0); } } -static void intel_pmu_drain_pebs_core(struct pt_regs *iregs) +static void intel_pmu_drain_pebs_core(struct pt_regs *iregs, struct perf_sample_data *data) { struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); struct debug_store *ds = cpuc->ds; @@ -1812,7 +1815,7 @@ static void intel_pmu_drain_pebs_core(struct pt_regs *iregs) return; } - __intel_pmu_pebs_event(event, iregs, at, top, 0, n, + __intel_pmu_pebs_event(event, iregs, data, at, top, 0, n, setup_pebs_fixed_sample_data); } @@ -1835,7 +1838,7 @@ static void intel_pmu_pebs_event_update_no_drain(struct cpu_hw_events *cpuc, int } } -static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs) +static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs, struct perf_sample_data *data) { struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); struct debug_store *ds = cpuc->ds; @@ -1942,14 +1945,14 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs) } if (counts[bit]) { - __intel_pmu_pebs_event(event, iregs, base, + __intel_pmu_pebs_event(event, iregs, data, base, top, bit, counts[bit], setup_pebs_fixed_sample_data); } } } -static void intel_pmu_drain_pebs_icl(struct pt_regs *iregs) +static void intel_pmu_drain_pebs_icl(struct pt_regs *iregs, struct perf_sample_data *data) { short counts[INTEL_PMC_IDX_FIXED + MAX_FIXED_PEBS_EVENTS] = {}; struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); @@ -1997,7 +2000,7 @@ static void intel_pmu_drain_pebs_icl(struct pt_regs *iregs) if (WARN_ON_ONCE(!event->attr.precise_ip)) continue; - __intel_pmu_pebs_event(event, iregs, base, + __intel_pmu_pebs_event(event, iregs, data, base, top, bit, counts[bit], setup_pebs_adaptive_sample_data); } diff --git a/arch/x86/events/intel/uncore_snb.c b/arch/x86/events/intel/uncore_snb.c index 39e632ed6ca9..bbd1120ae161 100644 --- a/arch/x86/events/intel/uncore_snb.c +++ b/arch/x86/events/intel/uncore_snb.c @@ -475,7 +475,7 @@ enum perf_snb_uncore_imc_freerunning_types { static struct freerunning_counters snb_uncore_imc_freerunning[] = { [SNB_PCI_UNCORE_IMC_DATA_READS] = { SNB_UNCORE_PCI_IMC_DATA_READS_BASE, 0x0, 0x0, 1, 32 }, - [SNB_PCI_UNCORE_IMC_DATA_READS] = { SNB_UNCORE_PCI_IMC_DATA_WRITES_BASE, + [SNB_PCI_UNCORE_IMC_DATA_WRITES] = { SNB_UNCORE_PCI_IMC_DATA_WRITES_BASE, 0x0, 0x0, 1, 32 }, [SNB_PCI_UNCORE_IMC_GT_REQUESTS] = { SNB_UNCORE_PCI_IMC_GT_REQUESTS_BASE, 0x0, 0x0, 1, 32 }, diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h index ee2b9b9fc2a5..6a8edfe59b09 100644 --- a/arch/x86/events/perf_event.h +++ b/arch/x86/events/perf_event.h @@ -585,6 +585,7 @@ union perf_capabilities { u64 pebs_baseline:1; u64 perf_metrics:1; u64 pebs_output_pt_available:1; + u64 anythread_deprecated:1; }; u64 capabilities; }; @@ -727,7 +728,7 @@ struct x86_pmu { int pebs_record_size; int pebs_buffer_size; int max_pebs_events; - void (*drain_pebs)(struct pt_regs *regs); + void (*drain_pebs)(struct pt_regs *regs, struct perf_sample_data *data); struct event_constraint *pebs_constraints; void (*pebs_aliases)(struct perf_event *event); unsigned long large_pebs_flags; diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h index 6960cd6d1f23..b9a7fd0a27e2 100644 --- a/arch/x86/include/asm/perf_event.h +++ b/arch/x86/include/asm/perf_event.h @@ -137,7 +137,9 @@ union cpuid10_edx { struct { unsigned int num_counters_fixed:5; unsigned int bit_width_fixed:8; - unsigned int reserved:19; + unsigned int reserved1:2; + unsigned int anythread_deprecated:1; + unsigned int reserved2:16; } split; unsigned int full; }; diff --git a/arch/x86/include/asm/uv/uv.h b/arch/x86/include/asm/uv/uv.h index 172d3e4a9e4b..648eb23fe7f0 100644 --- a/arch/x86/include/asm/uv/uv.h +++ b/arch/x86/include/asm/uv/uv.h @@ -2,14 +2,8 @@ #ifndef _ASM_X86_UV_UV_H #define _ASM_X86_UV_UV_H -#include <asm/tlbflush.h> - enum uv_system_type {UV_NONE, UV_LEGACY_APIC, UV_X2APIC}; -struct cpumask; -struct mm_struct; -struct flush_tlb_info; - #ifdef CONFIG_X86_UV #include <linux/efi.h> @@ -44,10 +38,6 @@ static inline int is_uv_system(void) { return 0; } static inline int is_uv_hubbed(int uv) { return 0; } static inline void uv_cpu_init(void) { } static inline void uv_system_init(void) { } -static inline const struct cpumask * -uv_flush_tlb_others(const struct cpumask *cpumask, - const struct flush_tlb_info *info) -{ return cpumask; } #endif /* X86_UV */ diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c index 3115caa7d7d0..1b98f8c12b96 100644 --- a/arch/x86/kernel/apic/x2apic_uv_x.c +++ b/arch/x86/kernel/apic/x2apic_uv_x.c @@ -33,7 +33,7 @@ static union uvh_apicid uvh_apicid; static int uv_node_id; /* Unpack AT/OEM/TABLE ID's to be NULL terminated strings */ -static u8 uv_archtype[UV_AT_SIZE]; +static u8 uv_archtype[UV_AT_SIZE + 1]; static u8 oem_id[ACPI_OEM_ID_SIZE + 1]; static u8 oem_table_id[ACPI_OEM_TABLE_ID_SIZE + 1]; @@ -320,7 +320,7 @@ static int __init decode_arch_type(unsigned long ptr) if (n > 0 && n < sizeof(uv_ate->archtype)) { pr_info("UV: UVarchtype received from BIOS\n"); - uv_stringify(UV_AT_SIZE, uv_archtype, uv_ate->archtype); + uv_stringify(sizeof(uv_archtype), uv_archtype, uv_ate->archtype); return 1; } return 0; @@ -378,7 +378,7 @@ static int __init uv_set_system_type(char *_oem_id, char *_oem_table_id) if (!early_get_arch_type()) /* If not use OEM ID for UVarchtype */ - uv_stringify(UV_AT_SIZE, uv_archtype, _oem_id); + uv_stringify(sizeof(uv_archtype), uv_archtype, oem_id); /* Check if not hubbed */ if (strncmp(uv_archtype, "SGI", 3) != 0) { diff --git a/arch/x86/kernel/perf_regs.c b/arch/x86/kernel/perf_regs.c index bb7e1132290b..f9e5352b3bef 100644 --- a/arch/x86/kernel/perf_regs.c +++ b/arch/x86/kernel/perf_regs.c @@ -101,8 +101,7 @@ u64 perf_reg_abi(struct task_struct *task) } void perf_get_regs_user(struct perf_regs *regs_user, - struct pt_regs *regs, - struct pt_regs *regs_user_copy) + struct pt_regs *regs) { regs_user->regs = task_pt_regs(current); regs_user->abi = perf_reg_abi(current); @@ -129,12 +128,20 @@ u64 perf_reg_abi(struct task_struct *task) return PERF_SAMPLE_REGS_ABI_64; } +static DEFINE_PER_CPU(struct pt_regs, nmi_user_regs); + void perf_get_regs_user(struct perf_regs *regs_user, - struct pt_regs *regs, - struct pt_regs *regs_user_copy) + struct pt_regs *regs) { + struct pt_regs *regs_user_copy = this_cpu_ptr(&nmi_user_regs); struct pt_regs *user_regs = task_pt_regs(current); + if (!in_nmi()) { + regs_user->regs = user_regs; + regs_user->abi = perf_reg_abi(current); + return; + } + /* * If we're in an NMI that interrupted task_pt_regs setup, then * we can't sample user regs at all. This check isn't really diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index a22a3108b5f0..13036cf0b912 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -684,7 +684,9 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function) edx.split.num_counters_fixed = min(cap.num_counters_fixed, MAX_FIXED_COUNTERS); edx.split.bit_width_fixed = cap.bit_width_fixed; - edx.split.reserved = 0; + edx.split.anythread_deprecated = 1; + edx.split.reserved1 = 0; + edx.split.reserved2 = 0; entry->eax = eax.full; entry->ebx = cap.events_mask; diff --git a/block/genhd.c b/block/genhd.c index 0a273211fec2..9387f050c248 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -49,7 +49,7 @@ static void disk_release_events(struct gendisk *disk); * Set disk capacity and notify if the size is not currently * zero and will not be set to zero */ -void set_capacity_revalidate_and_notify(struct gendisk *disk, sector_t size, +bool set_capacity_revalidate_and_notify(struct gendisk *disk, sector_t size, bool update_bdev) { sector_t capacity = get_capacity(disk); @@ -62,7 +62,10 @@ void set_capacity_revalidate_and_notify(struct gendisk *disk, sector_t size, char *envp[] = { "RESIZE=1", NULL }; kobject_uevent_env(&disk_to_dev(disk)->kobj, KOBJ_CHANGE, envp); + return true; } + + return false; } EXPORT_SYMBOL_GPL(set_capacity_revalidate_and_notify); diff --git a/drivers/accessibility/speakup/main.c b/drivers/accessibility/speakup/main.c index be79b2135fac..48019660a096 100644 --- a/drivers/accessibility/speakup/main.c +++ b/drivers/accessibility/speakup/main.c @@ -357,7 +357,6 @@ static void speakup_cut(struct vc_data *vc) mark_cut_flag = 0; synth_printf("%s\n", spk_msg_get(MSG_CUT)); - speakup_clear_selection(); ret = speakup_set_selection(tty); switch (ret) { diff --git a/drivers/accessibility/speakup/selection.c b/drivers/accessibility/speakup/selection.c index 032f3264fba1..7df7afad5ab4 100644 --- a/drivers/accessibility/speakup/selection.c +++ b/drivers/accessibility/speakup/selection.c @@ -22,13 +22,6 @@ struct speakup_selection_work { struct tty_struct *tty; }; -void speakup_clear_selection(void) -{ - console_lock(); - clear_selection(); - console_unlock(); -} - static void __speakup_set_selection(struct work_struct *work) { struct speakup_selection_work *ssw = @@ -51,6 +44,10 @@ static void __speakup_set_selection(struct work_struct *work) goto unref; } + console_lock(); + clear_selection(); + console_unlock(); + set_selection_kernel(&sel, tty); unref: diff --git a/drivers/accessibility/speakup/speakup.h b/drivers/accessibility/speakup/speakup.h index 74fe49c2c511..33594f5a7983 100644 --- a/drivers/accessibility/speakup/speakup.h +++ b/drivers/accessibility/speakup/speakup.h @@ -70,7 +70,6 @@ void spk_do_flush(void); void speakup_start_ttys(void); void synth_buffer_add(u16 ch); void synth_buffer_clear(void); -void speakup_clear_selection(void); int speakup_set_selection(struct tty_struct *tty); void speakup_cancel_selection(void); int speakup_paste_selection(struct tty_struct *tty); diff --git a/drivers/accessibility/speakup/spk_ttyio.c b/drivers/accessibility/speakup/spk_ttyio.c index a831ff64f8ba..ecc39983e946 100644 --- a/drivers/accessibility/speakup/spk_ttyio.c +++ b/drivers/accessibility/speakup/spk_ttyio.c @@ -298,11 +298,13 @@ static unsigned char ttyio_in(int timeout) struct spk_ldisc_data *ldisc_data = speakup_tty->disc_data; char rv; - if (wait_for_completion_timeout(&ldisc_data->completion, + if (!timeout) { + if (!try_wait_for_completion(&ldisc_data->completion)) + return 0xff; + } else if (wait_for_completion_timeout(&ldisc_data->completion, usecs_to_jiffies(timeout)) == 0) { - if (timeout) - pr_warn("spk_ttyio: timeout (%d) while waiting for input\n", - timeout); + pr_warn("spk_ttyio: timeout (%d) while waiting for input\n", + timeout); return 0xff; } diff --git a/drivers/accessibility/speakup/spk_types.h b/drivers/accessibility/speakup/spk_types.h index 7398f1196e10..91fca3033a45 100644 --- a/drivers/accessibility/speakup/spk_types.h +++ b/drivers/accessibility/speakup/spk_types.h @@ -32,6 +32,10 @@ enum { E_NEW_DEFAULT, }; +/* + * Note: add new members at the end, speakupmap.h depends on the values of the + * enum starting from SPELL_DELAY (see inc_dec_var) + */ enum var_id_t { VERSION = 0, SYNTH, SILENT, SYNTH_DIRECT, KEYMAP, CHARS, @@ -42,9 +46,9 @@ enum var_id_t { SAY_CONTROL, SAY_WORD_CTL, NO_INTERRUPT, KEY_ECHO, SPELL_DELAY, PUNC_LEVEL, READING_PUNC, ATTRIB_BLEEP, BLEEPS, - RATE, PITCH, INFLECTION, VOL, TONE, PUNCT, VOICE, FREQUENCY, LANG, + RATE, PITCH, VOL, TONE, PUNCT, VOICE, FREQUENCY, LANG, DIRECT, PAUSE, - CAPS_START, CAPS_STOP, CHARTAB, + CAPS_START, CAPS_STOP, CHARTAB, INFLECTION, MAXVARS }; diff --git a/drivers/block/loop.c b/drivers/block/loop.c index cb1191d6e945..a58084c2ed7c 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -255,7 +255,8 @@ static void loop_set_size(struct loop_device *lo, loff_t size) bd_set_nr_sectors(bdev, size); - set_capacity_revalidate_and_notify(lo->lo_disk, size, false); + if (!set_capacity_revalidate_and_notify(lo->lo_disk, size, false)) + kobject_uevent(&disk_to_dev(bdev->bd_disk)->kobj, KOBJ_CHANGE); } static inline int diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index c4f9ccf5cc2a..aaae9220f3a0 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -1518,6 +1518,7 @@ static void nbd_release(struct gendisk *disk, fmode_t mode) if (test_bit(NBD_RT_DISCONNECT_ON_CLOSE, &nbd->config->runtime_flags) && bdev->bd_openers == 0) nbd_disconnect_and_put(nbd); + bdput(bdev); nbd_config_put(nbd); nbd_put(nbd); diff --git a/drivers/char/virtio_console.c b/drivers/char/virtio_console.c index a2da8f768b94..1836cc56e357 100644 --- a/drivers/char/virtio_console.c +++ b/drivers/char/virtio_console.c @@ -435,12 +435,12 @@ static struct port_buffer *alloc_buf(struct virtio_device *vdev, size_t buf_size /* * Allocate DMA memory from ancestor. When a virtio * device is created by remoteproc, the DMA memory is - * associated with the grandparent device: - * vdev => rproc => platform-dev. + * associated with the parent device: + * virtioY => remoteprocX#vdevYbuffer. */ - if (!vdev->dev.parent || !vdev->dev.parent->parent) + buf->dev = vdev->dev.parent; + if (!buf->dev) goto free_buf; - buf->dev = vdev->dev.parent->parent; /* Increase device refcnt to avoid freeing it */ get_device(buf->dev); diff --git a/drivers/clk/imx/clk-imx8mm.c b/drivers/clk/imx/clk-imx8mm.c index 0de0be0cf548..f358ad907299 100644 --- a/drivers/clk/imx/clk-imx8mm.c +++ b/drivers/clk/imx/clk-imx8mm.c @@ -443,9 +443,9 @@ static int imx8mm_clocks_probe(struct platform_device *pdev) hws[IMX8MM_CLK_A53_CORE] = imx_clk_hw_mux2("arm_a53_core", base + 0x9880, 24, 1, imx8mm_a53_core_sels, ARRAY_SIZE(imx8mm_a53_core_sels)); /* BUS */ - hws[IMX8MM_CLK_MAIN_AXI] = imx8m_clk_hw_composite_critical("main_axi", imx8mm_main_axi_sels, base + 0x8800); + hws[IMX8MM_CLK_MAIN_AXI] = imx8m_clk_hw_composite_bus_critical("main_axi", imx8mm_main_axi_sels, base + 0x8800); hws[IMX8MM_CLK_ENET_AXI] = imx8m_clk_hw_composite_bus("enet_axi", imx8mm_enet_axi_sels, base + 0x8880); - hws[IMX8MM_CLK_NAND_USDHC_BUS] = imx8m_clk_hw_composite_critical("nand_usdhc_bus", imx8mm_nand_usdhc_sels, base + 0x8900); + hws[IMX8MM_CLK_NAND_USDHC_BUS] = imx8m_clk_hw_composite_bus_critical("nand_usdhc_bus", imx8mm_nand_usdhc_sels, base + 0x8900); hws[IMX8MM_CLK_VPU_BUS] = imx8m_clk_hw_composite_bus("vpu_bus", imx8mm_vpu_bus_sels, base + 0x8980); hws[IMX8MM_CLK_DISP_AXI] = imx8m_clk_hw_composite_bus("disp_axi", imx8mm_disp_axi_sels, base + 0x8a00); hws[IMX8MM_CLK_DISP_APB] = imx8m_clk_hw_composite_bus("disp_apb", imx8mm_disp_apb_sels, base + 0x8a80); @@ -453,11 +453,11 @@ static int imx8mm_clocks_probe(struct platform_device *pdev) hws[IMX8MM_CLK_USB_BUS] = imx8m_clk_hw_composite_bus("usb_bus", imx8mm_usb_bus_sels, base + 0x8b80); hws[IMX8MM_CLK_GPU_AXI] = imx8m_clk_hw_composite_bus("gpu_axi", imx8mm_gpu_axi_sels, base + 0x8c00); hws[IMX8MM_CLK_GPU_AHB] = imx8m_clk_hw_composite_bus("gpu_ahb", imx8mm_gpu_ahb_sels, base + 0x8c80); - hws[IMX8MM_CLK_NOC] = imx8m_clk_hw_composite_critical("noc", imx8mm_noc_sels, base + 0x8d00); - hws[IMX8MM_CLK_NOC_APB] = imx8m_clk_hw_composite_critical("noc_apb", imx8mm_noc_apb_sels, base + 0x8d80); + hws[IMX8MM_CLK_NOC] = imx8m_clk_hw_composite_bus_critical("noc", imx8mm_noc_sels, base + 0x8d00); + hws[IMX8MM_CLK_NOC_APB] = imx8m_clk_hw_composite_bus_critical("noc_apb", imx8mm_noc_apb_sels, base + 0x8d80); /* AHB */ - hws[IMX8MM_CLK_AHB] = imx8m_clk_hw_composite_critical("ahb", imx8mm_ahb_sels, base + 0x9000); + hws[IMX8MM_CLK_AHB] = imx8m_clk_hw_composite_bus_critical("ahb", imx8mm_ahb_sels, base + 0x9000); hws[IMX8MM_CLK_AUDIO_AHB] = imx8m_clk_hw_composite_bus("audio_ahb", imx8mm_audio_ahb_sels, base + 0x9100); /* IPG */ diff --git a/drivers/clk/imx/clk-imx8mn.c b/drivers/clk/imx/clk-imx8mn.c index e984de543f0b..f3c5e6cf55dd 100644 --- a/drivers/clk/imx/clk-imx8mn.c +++ b/drivers/clk/imx/clk-imx8mn.c @@ -431,7 +431,7 @@ static int imx8mn_clocks_probe(struct platform_device *pdev) hws[IMX8MN_CLK_A53_CORE] = imx_clk_hw_mux2("arm_a53_core", base + 0x9880, 24, 1, imx8mn_a53_core_sels, ARRAY_SIZE(imx8mn_a53_core_sels)); /* BUS */ - hws[IMX8MN_CLK_MAIN_AXI] = imx8m_clk_hw_composite_critical("main_axi", imx8mn_main_axi_sels, base + 0x8800); + hws[IMX8MN_CLK_MAIN_AXI] = imx8m_clk_hw_composite_bus_critical("main_axi", imx8mn_main_axi_sels, base + 0x8800); hws[IMX8MN_CLK_ENET_AXI] = imx8m_clk_hw_composite_bus("enet_axi", imx8mn_enet_axi_sels, base + 0x8880); hws[IMX8MN_CLK_NAND_USDHC_BUS] = imx8m_clk_hw_composite_bus("nand_usdhc_bus", imx8mn_nand_usdhc_sels, base + 0x8900); hws[IMX8MN_CLK_DISP_AXI] = imx8m_clk_hw_composite_bus("disp_axi", imx8mn_disp_axi_sels, base + 0x8a00); @@ -439,9 +439,9 @@ static int imx8mn_clocks_probe(struct platform_device *pdev) hws[IMX8MN_CLK_USB_BUS] = imx8m_clk_hw_composite_bus("usb_bus", imx8mn_usb_bus_sels, base + 0x8b80); hws[IMX8MN_CLK_GPU_AXI] = imx8m_clk_hw_composite_bus("gpu_axi", imx8mn_gpu_axi_sels, base + 0x8c00); hws[IMX8MN_CLK_GPU_AHB] = imx8m_clk_hw_composite_bus("gpu_ahb", imx8mn_gpu_ahb_sels, base + 0x8c80); - hws[IMX8MN_CLK_NOC] = imx8m_clk_hw_composite_critical("noc", imx8mn_noc_sels, base + 0x8d00); + hws[IMX8MN_CLK_NOC] = imx8m_clk_hw_composite_bus_critical("noc", imx8mn_noc_sels, base + 0x8d00); - hws[IMX8MN_CLK_AHB] = imx8m_clk_hw_composite_critical("ahb", imx8mn_ahb_sels, base + 0x9000); + hws[IMX8MN_CLK_AHB] = imx8m_clk_hw_composite_bus_critical("ahb", imx8mn_ahb_sels, base + 0x9000); hws[IMX8MN_CLK_AUDIO_AHB] = imx8m_clk_hw_composite_bus("audio_ahb", imx8mn_audio_ahb_sels, base + 0x9100); hws[IMX8MN_CLK_IPG_ROOT] = imx_clk_hw_divider2("ipg_root", "ahb", base + 0x9080, 0, 1); hws[IMX8MN_CLK_IPG_AUDIO_ROOT] = imx_clk_hw_divider2("ipg_audio_root", "audio_ahb", base + 0x9180, 0, 1); diff --git a/drivers/clk/imx/clk-imx8mp.c b/drivers/clk/imx/clk-imx8mp.c index 12ce4770f702..48e212477f52 100644 --- a/drivers/clk/imx/clk-imx8mp.c +++ b/drivers/clk/imx/clk-imx8mp.c @@ -557,9 +557,9 @@ static int imx8mp_clocks_probe(struct platform_device *pdev) /* CORE SEL */ hws[IMX8MP_CLK_A53_CORE] = imx_clk_hw_mux2("arm_a53_core", ccm_base + 0x9880, 24, 1, imx8mp_a53_core_sels, ARRAY_SIZE(imx8mp_a53_core_sels)); - hws[IMX8MP_CLK_MAIN_AXI] = imx8m_clk_hw_composite_critical("main_axi", imx8mp_main_axi_sels, ccm_base + 0x8800); + hws[IMX8MP_CLK_MAIN_AXI] = imx8m_clk_hw_composite_bus_critical("main_axi", imx8mp_main_axi_sels, ccm_base + 0x8800); hws[IMX8MP_CLK_ENET_AXI] = imx8m_clk_hw_composite_bus("enet_axi", imx8mp_enet_axi_sels, ccm_base + 0x8880); - hws[IMX8MP_CLK_NAND_USDHC_BUS] = imx8m_clk_hw_composite_critical("nand_usdhc_bus", imx8mp_nand_usdhc_sels, ccm_base + 0x8900); + hws[IMX8MP_CLK_NAND_USDHC_BUS] = imx8m_clk_hw_composite_bus_critical("nand_usdhc_bus", imx8mp_nand_usdhc_sels, ccm_base + 0x8900); hws[IMX8MP_CLK_VPU_BUS] = imx8m_clk_hw_composite_bus("vpu_bus", imx8mp_vpu_bus_sels, ccm_base + 0x8980); hws[IMX8MP_CLK_MEDIA_AXI] = imx8m_clk_hw_composite_bus("media_axi", imx8mp_media_axi_sels, ccm_base + 0x8a00); hws[IMX8MP_CLK_MEDIA_APB] = imx8m_clk_hw_composite_bus("media_apb", imx8mp_media_apb_sels, ccm_base + 0x8a80); @@ -567,12 +567,12 @@ static int imx8mp_clocks_probe(struct platform_device *pdev) hws[IMX8MP_CLK_HDMI_AXI] = imx8m_clk_hw_composite_bus("hdmi_axi", imx8mp_media_axi_sels, ccm_base + 0x8b80); hws[IMX8MP_CLK_GPU_AXI] = imx8m_clk_hw_composite_bus("gpu_axi", imx8mp_gpu_axi_sels, ccm_base + 0x8c00); hws[IMX8MP_CLK_GPU_AHB] = imx8m_clk_hw_composite_bus("gpu_ahb", imx8mp_gpu_ahb_sels, ccm_base + 0x8c80); - hws[IMX8MP_CLK_NOC] = imx8m_clk_hw_composite_critical("noc", imx8mp_noc_sels, ccm_base + 0x8d00); - hws[IMX8MP_CLK_NOC_IO] = imx8m_clk_hw_composite_critical("noc_io", imx8mp_noc_io_sels, ccm_base + 0x8d80); + hws[IMX8MP_CLK_NOC] = imx8m_clk_hw_composite_bus_critical("noc", imx8mp_noc_sels, ccm_base + 0x8d00); + hws[IMX8MP_CLK_NOC_IO] = imx8m_clk_hw_composite_bus_critical("noc_io", imx8mp_noc_io_sels, ccm_base + 0x8d80); hws[IMX8MP_CLK_ML_AXI] = imx8m_clk_hw_composite_bus("ml_axi", imx8mp_ml_axi_sels, ccm_base + 0x8e00); hws[IMX8MP_CLK_ML_AHB] = imx8m_clk_hw_composite_bus("ml_ahb", imx8mp_ml_ahb_sels, ccm_base + 0x8e80); - hws[IMX8MP_CLK_AHB] = imx8m_clk_hw_composite_critical("ahb_root", imx8mp_ahb_sels, ccm_base + 0x9000); + hws[IMX8MP_CLK_AHB] = imx8m_clk_hw_composite_bus_critical("ahb_root", imx8mp_ahb_sels, ccm_base + 0x9000); hws[IMX8MP_CLK_AUDIO_AHB] = imx8m_clk_hw_composite_bus("audio_ahb", imx8mp_audio_ahb_sels, ccm_base + 0x9100); hws[IMX8MP_CLK_MIPI_DSI_ESC_RX] = imx8m_clk_hw_composite_bus("mipi_dsi_esc_rx", imx8mp_mipi_dsi_esc_rx_sels, ccm_base + 0x9200); diff --git a/drivers/clk/imx/clk-imx8mq.c b/drivers/clk/imx/clk-imx8mq.c index 8265d1d48af4..06292d4a98ff 100644 --- a/drivers/clk/imx/clk-imx8mq.c +++ b/drivers/clk/imx/clk-imx8mq.c @@ -431,7 +431,7 @@ static int imx8mq_clocks_probe(struct platform_device *pdev) hws[IMX8MQ_CLK_A53_CORE] = imx_clk_hw_mux2("arm_a53_core", base + 0x9880, 24, 1, imx8mq_a53_core_sels, ARRAY_SIZE(imx8mq_a53_core_sels)); /* BUS */ - hws[IMX8MQ_CLK_MAIN_AXI] = imx8m_clk_hw_composite_critical("main_axi", imx8mq_main_axi_sels, base + 0x8800); + hws[IMX8MQ_CLK_MAIN_AXI] = imx8m_clk_hw_composite_bus_critical("main_axi", imx8mq_main_axi_sels, base + 0x8800); hws[IMX8MQ_CLK_ENET_AXI] = imx8m_clk_hw_composite_bus("enet_axi", imx8mq_enet_axi_sels, base + 0x8880); hws[IMX8MQ_CLK_NAND_USDHC_BUS] = imx8m_clk_hw_composite_bus("nand_usdhc_bus", imx8mq_nand_usdhc_sels, base + 0x8900); hws[IMX8MQ_CLK_VPU_BUS] = imx8m_clk_hw_composite_bus("vpu_bus", imx8mq_vpu_bus_sels, base + 0x8980); @@ -441,12 +441,12 @@ static int imx8mq_clocks_probe(struct platform_device *pdev) hws[IMX8MQ_CLK_USB_BUS] = imx8m_clk_hw_composite_bus("usb_bus", imx8mq_usb_bus_sels, base + 0x8b80); hws[IMX8MQ_CLK_GPU_AXI] = imx8m_clk_hw_composite_bus("gpu_axi", imx8mq_gpu_axi_sels, base + 0x8c00); hws[IMX8MQ_CLK_GPU_AHB] = imx8m_clk_hw_composite_bus("gpu_ahb", imx8mq_gpu_ahb_sels, base + 0x8c80); - hws[IMX8MQ_CLK_NOC] = imx8m_clk_hw_composite_critical("noc", imx8mq_noc_sels, base + 0x8d00); - hws[IMX8MQ_CLK_NOC_APB] = imx8m_clk_hw_composite_critical("noc_apb", imx8mq_noc_apb_sels, base + 0x8d80); + hws[IMX8MQ_CLK_NOC] = imx8m_clk_hw_composite_bus_critical("noc", imx8mq_noc_sels, base + 0x8d00); + hws[IMX8MQ_CLK_NOC_APB] = imx8m_clk_hw_composite_bus_critical("noc_apb", imx8mq_noc_apb_sels, base + 0x8d80); /* AHB */ /* AHB clock is used by the AHB bus therefore marked as critical */ - hws[IMX8MQ_CLK_AHB] = imx8m_clk_hw_composite_critical("ahb", imx8mq_ahb_sels, base + 0x9000); + hws[IMX8MQ_CLK_AHB] = imx8m_clk_hw_composite_bus_critical("ahb", imx8mq_ahb_sels, base + 0x9000); hws[IMX8MQ_CLK_AUDIO_AHB] = imx8m_clk_hw_composite_bus("audio_ahb", imx8mq_audio_ahb_sels, base + 0x9100); /* IPG */ diff --git a/drivers/clk/imx/clk.h b/drivers/clk/imx/clk.h index 3b796b3da249..1d7be0c86538 100644 --- a/drivers/clk/imx/clk.h +++ b/drivers/clk/imx/clk.h @@ -549,6 +549,11 @@ struct clk_hw *imx8m_clk_hw_composite_flags(const char *name, IMX_COMPOSITE_BUS, \ CLK_SET_RATE_NO_REPARENT | CLK_OPS_PARENT_ENABLE) +#define imx8m_clk_hw_composite_bus_critical(name, parent_names, reg) \ + imx8m_clk_hw_composite_flags(name, parent_names, ARRAY_SIZE(parent_names), reg, \ + IMX_COMPOSITE_BUS, \ + CLK_SET_RATE_NO_REPARENT | CLK_OPS_PARENT_ENABLE | CLK_IS_CRITICAL) + #define imx8m_clk_hw_composite_core(name, parent_names, reg) \ imx8m_clk_hw_composite_flags(name, parent_names, \ ARRAY_SIZE(parent_names), reg, \ diff --git a/drivers/clk/meson/clk-regmap.h b/drivers/clk/meson/clk-regmap.h index c4a39604cffd..e365312da54e 100644 --- a/drivers/clk/meson/clk-regmap.h +++ b/drivers/clk/meson/clk-regmap.h @@ -26,7 +26,10 @@ struct clk_regmap { void *data; }; -#define to_clk_regmap(_hw) container_of(_hw, struct clk_regmap, hw) +static inline struct clk_regmap *to_clk_regmap(struct clk_hw *hw) +{ + return container_of(hw, struct clk_regmap, hw); +} /** * struct clk_regmap_gate_data - regmap backed gate specific data diff --git a/drivers/clk/qcom/clk-regmap.h b/drivers/clk/qcom/clk-regmap.h index 6cfc1bccb255..14ec659a3a77 100644 --- a/drivers/clk/qcom/clk-regmap.h +++ b/drivers/clk/qcom/clk-regmap.h @@ -24,7 +24,11 @@ struct clk_regmap { unsigned int enable_mask; bool enable_is_inverted; }; -#define to_clk_regmap(_hw) container_of(_hw, struct clk_regmap, hw) + +static inline struct clk_regmap *to_clk_regmap(struct clk_hw *hw) +{ + return container_of(hw, struct clk_regmap, hw); +} int clk_is_enabled_regmap(struct clk_hw *hw); int clk_enable_regmap(struct clk_hw *hw); diff --git a/drivers/firmware/arm_sdei.c b/drivers/firmware/arm_sdei.c index 840754dcc6ca..a7e762c352f9 100644 --- a/drivers/firmware/arm_sdei.c +++ b/drivers/firmware/arm_sdei.c @@ -31,7 +31,6 @@ #include <linux/slab.h> #include <linux/smp.h> #include <linux/spinlock.h> -#include <linux/uaccess.h> /* * The call to use to reach the firmware. @@ -1092,26 +1091,13 @@ int sdei_event_handler(struct pt_regs *regs, struct sdei_registered_event *arg) { int err; - mm_segment_t orig_addr_limit; u32 event_num = arg->event_num; - /* - * Save restore 'fs'. - * The architecture's entry code save/restores 'fs' when taking an - * exception from the kernel. This ensures addr_limit isn't inherited - * if you interrupted something that allowed the uaccess routines to - * access kernel memory. - * Do the same here because this doesn't come via the same entry code. - */ - orig_addr_limit = force_uaccess_begin(); - err = arg->callback(event_num, regs, arg->callback_arg); if (err) pr_err_ratelimited("event %u on CPU %u failed with error: %d\n", event_num, smp_processor_id(), err); - force_uaccess_end(orig_addr_limit); - return err; } NOKPROBE_SYMBOL(sdei_event_handler); diff --git a/drivers/firmware/psci/psci.c b/drivers/firmware/psci/psci.c index 00af99b6f97c..f5fc429cae3f 100644 --- a/drivers/firmware/psci/psci.c +++ b/drivers/firmware/psci/psci.c @@ -58,15 +58,12 @@ typedef unsigned long (psci_fn)(unsigned long, unsigned long, unsigned long, unsigned long); static psci_fn *invoke_psci_fn; -enum psci_function { - PSCI_FN_CPU_SUSPEND, - PSCI_FN_CPU_ON, - PSCI_FN_CPU_OFF, - PSCI_FN_MIGRATE, - PSCI_FN_MAX, -}; +static struct psci_0_1_function_ids psci_0_1_function_ids; -static u32 psci_function_id[PSCI_FN_MAX]; +struct psci_0_1_function_ids get_psci_0_1_function_ids(void) +{ + return psci_0_1_function_ids; +} #define PSCI_0_2_POWER_STATE_MASK \ (PSCI_0_2_POWER_STATE_ID_MASK | \ @@ -146,7 +143,12 @@ static int psci_to_linux_errno(int errno) return -EINVAL; } -static u32 psci_get_version(void) +static u32 psci_0_1_get_version(void) +{ + return PSCI_VERSION(0, 1); +} + +static u32 psci_0_2_get_version(void) { return invoke_psci_fn(PSCI_0_2_FN_PSCI_VERSION, 0, 0, 0); } @@ -163,46 +165,80 @@ int psci_set_osi_mode(bool enable) return psci_to_linux_errno(err); } -static int psci_cpu_suspend(u32 state, unsigned long entry_point) +static int __psci_cpu_suspend(u32 fn, u32 state, unsigned long entry_point) { int err; - u32 fn; - fn = psci_function_id[PSCI_FN_CPU_SUSPEND]; err = invoke_psci_fn(fn, state, entry_point, 0); return psci_to_linux_errno(err); } -static int psci_cpu_off(u32 state) +static int psci_0_1_cpu_suspend(u32 state, unsigned long entry_point) +{ + return __psci_cpu_suspend(psci_0_1_function_ids.cpu_suspend, + state, entry_point); +} + +static int psci_0_2_cpu_suspend(u32 state, unsigned long entry_point) +{ + return __psci_cpu_suspend(PSCI_FN_NATIVE(0_2, CPU_SUSPEND), + state, entry_point); +} + +static int __psci_cpu_off(u32 fn, u32 state) { int err; - u32 fn; - fn = psci_function_id[PSCI_FN_CPU_OFF]; err = invoke_psci_fn(fn, state, 0, 0); return psci_to_linux_errno(err); } -static int psci_cpu_on(unsigned long cpuid, unsigned long entry_point) +static int psci_0_1_cpu_off(u32 state) +{ + return __psci_cpu_off(psci_0_1_function_ids.cpu_off, state); +} + +static int psci_0_2_cpu_off(u32 state) +{ + return __psci_cpu_off(PSCI_0_2_FN_CPU_OFF, state); +} + +static int __psci_cpu_on(u32 fn, unsigned long cpuid, unsigned long entry_point) { int err; - u32 fn; - fn = psci_function_id[PSCI_FN_CPU_ON]; err = invoke_psci_fn(fn, cpuid, entry_point, 0); return psci_to_linux_errno(err); } -static int psci_migrate(unsigned long cpuid) +static int psci_0_1_cpu_on(unsigned long cpuid, unsigned long entry_point) +{ + return __psci_cpu_on(psci_0_1_function_ids.cpu_on, cpuid, entry_point); +} + +static int psci_0_2_cpu_on(unsigned long cpuid, unsigned long entry_point) +{ + return __psci_cpu_on(PSCI_FN_NATIVE(0_2, CPU_ON), cpuid, entry_point); +} + +static int __psci_migrate(u32 fn, unsigned long cpuid) { int err; - u32 fn; - fn = psci_function_id[PSCI_FN_MIGRATE]; err = invoke_psci_fn(fn, cpuid, 0, 0); return psci_to_linux_errno(err); } +static int psci_0_1_migrate(unsigned long cpuid) +{ + return __psci_migrate(psci_0_1_function_ids.migrate, cpuid); +} + +static int psci_0_2_migrate(unsigned long cpuid) +{ + return __psci_migrate(PSCI_FN_NATIVE(0_2, MIGRATE), cpuid); +} + static int psci_affinity_info(unsigned long target_affinity, unsigned long lowest_affinity_level) { @@ -347,7 +383,7 @@ static void __init psci_init_system_suspend(void) static void __init psci_init_cpu_suspend(void) { - int feature = psci_features(psci_function_id[PSCI_FN_CPU_SUSPEND]); + int feature = psci_features(PSCI_FN_NATIVE(0_2, CPU_SUSPEND)); if (feature != PSCI_RET_NOT_SUPPORTED) psci_cpu_suspend_feature = feature; @@ -421,24 +457,16 @@ static void __init psci_init_smccc(void) static void __init psci_0_2_set_functions(void) { pr_info("Using standard PSCI v0.2 function IDs\n"); - psci_ops.get_version = psci_get_version; - - psci_function_id[PSCI_FN_CPU_SUSPEND] = - PSCI_FN_NATIVE(0_2, CPU_SUSPEND); - psci_ops.cpu_suspend = psci_cpu_suspend; - - psci_function_id[PSCI_FN_CPU_OFF] = PSCI_0_2_FN_CPU_OFF; - psci_ops.cpu_off = psci_cpu_off; - - psci_function_id[PSCI_FN_CPU_ON] = PSCI_FN_NATIVE(0_2, CPU_ON); - psci_ops.cpu_on = psci_cpu_on; - psci_function_id[PSCI_FN_MIGRATE] = PSCI_FN_NATIVE(0_2, MIGRATE); - psci_ops.migrate = psci_migrate; - - psci_ops.affinity_info = psci_affinity_info; - - psci_ops.migrate_info_type = psci_migrate_info_type; + psci_ops = (struct psci_operations){ + .get_version = psci_0_2_get_version, + .cpu_suspend = psci_0_2_cpu_suspend, + .cpu_off = psci_0_2_cpu_off, + .cpu_on = psci_0_2_cpu_on, + .migrate = psci_0_2_migrate, + .affinity_info = psci_affinity_info, + .migrate_info_type = psci_migrate_info_type, + }; arm_pm_restart = psci_sys_reset; @@ -450,7 +478,7 @@ static void __init psci_0_2_set_functions(void) */ static int __init psci_probe(void) { - u32 ver = psci_get_version(); + u32 ver = psci_0_2_get_version(); pr_info("PSCIv%d.%d detected in firmware.\n", PSCI_VERSION_MAJOR(ver), @@ -514,24 +542,26 @@ static int __init psci_0_1_init(struct device_node *np) pr_info("Using PSCI v0.1 Function IDs from DT\n"); + psci_ops.get_version = psci_0_1_get_version; + if (!of_property_read_u32(np, "cpu_suspend", &id)) { - psci_function_id[PSCI_FN_CPU_SUSPEND] = id; - psci_ops.cpu_suspend = psci_cpu_suspend; + psci_0_1_function_ids.cpu_suspend = id; + psci_ops.cpu_suspend = psci_0_1_cpu_suspend; } if (!of_property_read_u32(np, "cpu_off", &id)) { - psci_function_id[PSCI_FN_CPU_OFF] = id; - psci_ops.cpu_off = psci_cpu_off; + psci_0_1_function_ids.cpu_off = id; + psci_ops.cpu_off = psci_0_1_cpu_off; } if (!of_property_read_u32(np, "cpu_on", &id)) { - psci_function_id[PSCI_FN_CPU_ON] = id; - psci_ops.cpu_on = psci_cpu_on; + psci_0_1_function_ids.cpu_on = id; + psci_ops.cpu_on = psci_0_1_cpu_on; } if (!of_property_read_u32(np, "migrate", &id)) { - psci_function_id[PSCI_FN_MIGRATE] = id; - psci_ops.migrate = psci_migrate; + psci_0_1_function_ids.migrate = id; + psci_ops.migrate = psci_0_1_migrate; } return 0; diff --git a/drivers/firmware/xilinx/zynqmp.c b/drivers/firmware/xilinx/zynqmp.c index 8d1ff2454e2e..efb8a66efc68 100644 --- a/drivers/firmware/xilinx/zynqmp.c +++ b/drivers/firmware/xilinx/zynqmp.c @@ -147,6 +147,9 @@ static int zynqmp_pm_feature(u32 api_id) return 0; /* Return value if feature is already checked */ + if (api_id > ARRAY_SIZE(zynqmp_pm_features)) + return PM_FEATURE_INVALID; + if (zynqmp_pm_features[api_id] != PM_FEATURE_UNCHECKED) return zynqmp_pm_features[api_id]; diff --git a/drivers/gpio/gpio-aspeed.c b/drivers/gpio/gpio-aspeed.c index e44d5de2a120..b966f5e28ebf 100644 --- a/drivers/gpio/gpio-aspeed.c +++ b/drivers/gpio/gpio-aspeed.c @@ -1114,6 +1114,7 @@ static const struct aspeed_gpio_config ast2500_config = static const struct aspeed_bank_props ast2600_bank_props[] = { /* input output */ + {4, 0xffffffff, 0x00ffffff}, /* Q/R/S/T */ {5, 0xffffffff, 0xffffff00}, /* U/V/W/X */ {6, 0x0000ffff, 0x0000ffff}, /* Y/Z */ { }, diff --git a/drivers/gpio/gpio-dwapb.c b/drivers/gpio/gpio-dwapb.c index a5b326754124..2a9046c0fb16 100644 --- a/drivers/gpio/gpio-dwapb.c +++ b/drivers/gpio/gpio-dwapb.c @@ -343,8 +343,8 @@ static int dwapb_irq_set_type(struct irq_data *d, u32 type) #ifdef CONFIG_PM_SLEEP static int dwapb_irq_set_wake(struct irq_data *d, unsigned int enable) { - struct irq_chip_generic *igc = irq_data_get_irq_chip_data(d); - struct dwapb_gpio *gpio = igc->private; + struct gpio_chip *gc = irq_data_get_irq_chip_data(d); + struct dwapb_gpio *gpio = to_dwapb_gpio(gc); struct dwapb_context *ctx = gpio->ports[0].ctx; irq_hw_number_t bit = irqd_to_hwirq(d); diff --git a/drivers/gpio/gpio-omap.c b/drivers/gpio/gpio-omap.c index 6d59e3a43761..f7ceb2b11afc 100644 --- a/drivers/gpio/gpio-omap.c +++ b/drivers/gpio/gpio-omap.c @@ -1114,13 +1114,23 @@ static void omap_gpio_idle(struct gpio_bank *bank, bool may_lose_context) { struct device *dev = bank->chip.parent; void __iomem *base = bank->base; - u32 nowake; + u32 mask, nowake; bank->saved_datain = readl_relaxed(base + bank->regs->datain); if (!bank->enabled_non_wakeup_gpios) goto update_gpio_context_count; + /* Check for pending EDGE_FALLING, ignore EDGE_BOTH */ + mask = bank->enabled_non_wakeup_gpios & bank->context.fallingdetect; + mask &= ~bank->context.risingdetect; + bank->saved_datain |= mask; + + /* Check for pending EDGE_RISING, ignore EDGE_BOTH */ + mask = bank->enabled_non_wakeup_gpios & bank->context.risingdetect; + mask &= ~bank->context.fallingdetect; + bank->saved_datain &= ~mask; + if (!may_lose_context) goto update_gpio_context_count; diff --git a/drivers/gpio/gpio-pcie-idio-24.c b/drivers/gpio/gpio-pcie-idio-24.c index a68941d19ac6..2a07fd96707e 100644 --- a/drivers/gpio/gpio-pcie-idio-24.c +++ b/drivers/gpio/gpio-pcie-idio-24.c @@ -28,6 +28,47 @@ #include <linux/spinlock.h> #include <linux/types.h> +/* + * PLX PEX8311 PCI LCS_INTCSR Interrupt Control/Status + * + * Bit: Description + * 0: Enable Interrupt Sources (Bit 0) + * 1: Enable Interrupt Sources (Bit 1) + * 2: Generate Internal PCI Bus Internal SERR# Interrupt + * 3: Mailbox Interrupt Enable + * 4: Power Management Interrupt Enable + * 5: Power Management Interrupt + * 6: Slave Read Local Data Parity Check Error Enable + * 7: Slave Read Local Data Parity Check Error Status + * 8: Internal PCI Wire Interrupt Enable + * 9: PCI Express Doorbell Interrupt Enable + * 10: PCI Abort Interrupt Enable + * 11: Local Interrupt Input Enable + * 12: Retry Abort Enable + * 13: PCI Express Doorbell Interrupt Active + * 14: PCI Abort Interrupt Active + * 15: Local Interrupt Input Active + * 16: Local Interrupt Output Enable + * 17: Local Doorbell Interrupt Enable + * 18: DMA Channel 0 Interrupt Enable + * 19: DMA Channel 1 Interrupt Enable + * 20: Local Doorbell Interrupt Active + * 21: DMA Channel 0 Interrupt Active + * 22: DMA Channel 1 Interrupt Active + * 23: Built-In Self-Test (BIST) Interrupt Active + * 24: Direct Master was the Bus Master during a Master or Target Abort + * 25: DMA Channel 0 was the Bus Master during a Master or Target Abort + * 26: DMA Channel 1 was the Bus Master during a Master or Target Abort + * 27: Target Abort after internal 256 consecutive Master Retrys + * 28: PCI Bus wrote data to LCS_MBOX0 + * 29: PCI Bus wrote data to LCS_MBOX1 + * 30: PCI Bus wrote data to LCS_MBOX2 + * 31: PCI Bus wrote data to LCS_MBOX3 + */ +#define PLX_PEX8311_PCI_LCS_INTCSR 0x68 +#define INTCSR_INTERNAL_PCI_WIRE BIT(8) +#define INTCSR_LOCAL_INPUT BIT(11) + /** * struct idio_24_gpio_reg - GPIO device registers structure * @out0_7: Read: FET Outputs 0-7 @@ -92,6 +133,7 @@ struct idio_24_gpio_reg { struct idio_24_gpio { struct gpio_chip chip; raw_spinlock_t lock; + __u8 __iomem *plx; struct idio_24_gpio_reg __iomem *reg; unsigned long irq_mask; }; @@ -334,13 +376,13 @@ static void idio_24_irq_mask(struct irq_data *data) unsigned long flags; const unsigned long bit_offset = irqd_to_hwirq(data) - 24; unsigned char new_irq_mask; - const unsigned long bank_offset = bit_offset/8 * 8; + const unsigned long bank_offset = bit_offset / 8; unsigned char cos_enable_state; raw_spin_lock_irqsave(&idio24gpio->lock, flags); - idio24gpio->irq_mask &= BIT(bit_offset); - new_irq_mask = idio24gpio->irq_mask >> bank_offset; + idio24gpio->irq_mask &= ~BIT(bit_offset); + new_irq_mask = idio24gpio->irq_mask >> bank_offset * 8; if (!new_irq_mask) { cos_enable_state = ioread8(&idio24gpio->reg->cos_enable); @@ -363,12 +405,12 @@ static void idio_24_irq_unmask(struct irq_data *data) unsigned long flags; unsigned char prev_irq_mask; const unsigned long bit_offset = irqd_to_hwirq(data) - 24; - const unsigned long bank_offset = bit_offset/8 * 8; + const unsigned long bank_offset = bit_offset / 8; unsigned char cos_enable_state; raw_spin_lock_irqsave(&idio24gpio->lock, flags); - prev_irq_mask = idio24gpio->irq_mask >> bank_offset; + prev_irq_mask = idio24gpio->irq_mask >> bank_offset * 8; idio24gpio->irq_mask |= BIT(bit_offset); if (!prev_irq_mask) { @@ -455,6 +497,7 @@ static int idio_24_probe(struct pci_dev *pdev, const struct pci_device_id *id) struct device *const dev = &pdev->dev; struct idio_24_gpio *idio24gpio; int err; + const size_t pci_plx_bar_index = 1; const size_t pci_bar_index = 2; const char *const name = pci_name(pdev); struct gpio_irq_chip *girq; @@ -469,12 +512,13 @@ static int idio_24_probe(struct pci_dev *pdev, const struct pci_device_id *id) return err; } - err = pcim_iomap_regions(pdev, BIT(pci_bar_index), name); + err = pcim_iomap_regions(pdev, BIT(pci_plx_bar_index) | BIT(pci_bar_index), name); if (err) { dev_err(dev, "Unable to map PCI I/O addresses (%d)\n", err); return err; } + idio24gpio->plx = pcim_iomap_table(pdev)[pci_plx_bar_index]; idio24gpio->reg = pcim_iomap_table(pdev)[pci_bar_index]; idio24gpio->chip.label = name; @@ -504,6 +548,12 @@ static int idio_24_probe(struct pci_dev *pdev, const struct pci_device_id *id) /* Software board reset */ iowrite8(0, &idio24gpio->reg->soft_reset); + /* + * enable PLX PEX8311 internal PCI wire interrupt and local interrupt + * input + */ + iowrite8((INTCSR_INTERNAL_PCI_WIRE | INTCSR_LOCAL_INPUT) >> 8, + idio24gpio->plx + PLX_PEX8311_PCI_LCS_INTCSR + 1); err = devm_gpiochip_add_data(dev, &idio24gpio->chip, idio24gpio); if (err) { diff --git a/drivers/gpio/gpio-sifive.c b/drivers/gpio/gpio-sifive.c index c54dd08f2cbf..d5eb9ca11901 100644 --- a/drivers/gpio/gpio-sifive.c +++ b/drivers/gpio/gpio-sifive.c @@ -183,7 +183,7 @@ static int sifive_gpio_probe(struct platform_device *pdev) return PTR_ERR(chip->regs); ngpio = of_irq_count(node); - if (ngpio >= SIFIVE_GPIO_MAX) { + if (ngpio > SIFIVE_GPIO_MAX) { dev_err(dev, "Too many GPIO interrupts (max=%d)\n", SIFIVE_GPIO_MAX); return -ENXIO; diff --git a/drivers/gpio/gpiolib-cdev.h b/drivers/gpio/gpiolib-cdev.h index cb41dd757338..b42644cbffb8 100644 --- a/drivers/gpio/gpiolib-cdev.h +++ b/drivers/gpio/gpiolib-cdev.h @@ -7,22 +7,7 @@ struct gpio_device; -#ifdef CONFIG_GPIO_CDEV - int gpiolib_cdev_register(struct gpio_device *gdev, dev_t devt); void gpiolib_cdev_unregister(struct gpio_device *gdev); -#else - -static inline int gpiolib_cdev_register(struct gpio_device *gdev, dev_t devt) -{ - return 0; -} - -static inline void gpiolib_cdev_unregister(struct gpio_device *gdev) -{ -} - -#endif /* CONFIG_GPIO_CDEV */ - #endif /* GPIOLIB_CDEV_H */ diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c index 3cdf9effc13a..089ddcaa9bc6 100644 --- a/drivers/gpio/gpiolib.c +++ b/drivers/gpio/gpiolib.c @@ -480,11 +480,23 @@ static void gpiodevice_release(struct device *dev) kfree(gdev); } +#ifdef CONFIG_GPIO_CDEV +#define gcdev_register(gdev, devt) gpiolib_cdev_register((gdev), (devt)) +#define gcdev_unregister(gdev) gpiolib_cdev_unregister((gdev)) +#else +/* + * gpiolib_cdev_register() indirectly calls device_add(), which is still + * required even when cdev is not selected. + */ +#define gcdev_register(gdev, devt) device_add(&(gdev)->dev) +#define gcdev_unregister(gdev) device_del(&(gdev)->dev) +#endif + static int gpiochip_setup_dev(struct gpio_device *gdev) { int ret; - ret = gpiolib_cdev_register(gdev, gpio_devt); + ret = gcdev_register(gdev, gpio_devt); if (ret) return ret; @@ -500,7 +512,7 @@ static int gpiochip_setup_dev(struct gpio_device *gdev) return 0; err_remove_device: - gpiolib_cdev_unregister(gdev); + gcdev_unregister(gdev); return ret; } @@ -825,7 +837,7 @@ void gpiochip_remove(struct gpio_chip *gc) * be removed, else it will be dangling until the last user is * gone. */ - gpiolib_cdev_unregister(gdev); + gcdev_unregister(gdev); put_device(&gdev->dev); } EXPORT_SYMBOL_GPL(gpiochip_remove); diff --git a/drivers/gpu/drm/amd/amdgpu/nv.c b/drivers/gpu/drm/amd/amdgpu/nv.c index d5715c1d177b..8eeba8096493 100644 --- a/drivers/gpu/drm/amd/amdgpu/nv.c +++ b/drivers/gpu/drm/amd/amdgpu/nv.c @@ -492,8 +492,7 @@ int nv_set_ip_blocks(struct amdgpu_device *adev) if (adev->enable_virtual_display || amdgpu_sriov_vf(adev)) amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block); #if defined(CONFIG_DRM_AMD_DC) - else if (amdgpu_device_has_dc_support(adev) && - !nv_is_headless_sku(adev->pdev)) + else if (amdgpu_device_has_dc_support(adev)) amdgpu_device_ip_block_add(adev, &dm_ip_block); #endif amdgpu_device_ip_block_add(adev, &gfx_v10_0_ip_block); diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v12_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v12_0.c index dff5c15b4858..c4828bd3264b 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v12_0.c @@ -40,6 +40,7 @@ MODULE_FIRMWARE("amdgpu/renoir_asd.bin"); MODULE_FIRMWARE("amdgpu/renoir_ta.bin"); MODULE_FIRMWARE("amdgpu/green_sardine_asd.bin"); +MODULE_FIRMWARE("amdgpu/green_sardine_ta.bin"); /* address block */ #define smnMP1_FIRMWARE_FLAGS 0x3010024 diff --git a/drivers/gpu/drm/amd/display/dc/irq/dcn30/irq_service_dcn30.c b/drivers/gpu/drm/amd/display/dc/irq/dcn30/irq_service_dcn30.c index 49689f71f4f1..0effbb2bd74a 100644 --- a/drivers/gpu/drm/amd/display/dc/irq/dcn30/irq_service_dcn30.c +++ b/drivers/gpu/drm/amd/display/dc/irq/dcn30/irq_service_dcn30.c @@ -306,8 +306,8 @@ irq_source_info_dcn30[DAL_IRQ_SOURCES_NUMBER] = { pflip_int_entry(1), pflip_int_entry(2), pflip_int_entry(3), - [DC_IRQ_SOURCE_PFLIP5] = dummy_irq_entry(), - [DC_IRQ_SOURCE_PFLIP6] = dummy_irq_entry(), + pflip_int_entry(4), + pflip_int_entry(5), [DC_IRQ_SOURCE_PFLIP_UNDERLAY0] = dummy_irq_entry(), gpio_pad_int_entry(0), gpio_pad_int_entry(1), diff --git a/drivers/gpu/drm/bridge/cadence/Kconfig b/drivers/gpu/drm/bridge/cadence/Kconfig index 511d67b16d14..ef8c230e0f62 100644 --- a/drivers/gpu/drm/bridge/cadence/Kconfig +++ b/drivers/gpu/drm/bridge/cadence/Kconfig @@ -13,7 +13,7 @@ config DRM_CDNS_MHDP8546 if DRM_CDNS_MHDP8546 config DRM_CDNS_MHDP8546_J721E - depends on ARCH_K3_J721E_SOC || COMPILE_TEST + depends on ARCH_K3 || COMPILE_TEST bool "J721E Cadence DPI/DP wrapper support" default y help diff --git a/drivers/gpu/drm/gma500/psb_irq.c b/drivers/gpu/drm/gma500/psb_irq.c index 15eb3770d817..361e3a0c5ab6 100644 --- a/drivers/gpu/drm/gma500/psb_irq.c +++ b/drivers/gpu/drm/gma500/psb_irq.c @@ -347,6 +347,7 @@ int psb_irq_postinstall(struct drm_device *dev) { struct drm_psb_private *dev_priv = dev->dev_private; unsigned long irqflags; + unsigned int i; spin_lock_irqsave(&dev_priv->irqmask_lock, irqflags); @@ -359,20 +360,12 @@ int psb_irq_postinstall(struct drm_device *dev) PSB_WVDC32(dev_priv->vdc_irq_mask, PSB_INT_ENABLE_R); PSB_WVDC32(0xFFFFFFFF, PSB_HWSTAM); - if (dev->vblank[0].enabled) - psb_enable_pipestat(dev_priv, 0, PIPE_VBLANK_INTERRUPT_ENABLE); - else - psb_disable_pipestat(dev_priv, 0, PIPE_VBLANK_INTERRUPT_ENABLE); - - if (dev->vblank[1].enabled) - psb_enable_pipestat(dev_priv, 1, PIPE_VBLANK_INTERRUPT_ENABLE); - else - psb_disable_pipestat(dev_priv, 1, PIPE_VBLANK_INTERRUPT_ENABLE); - - if (dev->vblank[2].enabled) - psb_enable_pipestat(dev_priv, 2, PIPE_VBLANK_INTERRUPT_ENABLE); - else - psb_disable_pipestat(dev_priv, 2, PIPE_VBLANK_INTERRUPT_ENABLE); + for (i = 0; i < dev->num_crtcs; ++i) { + if (dev->vblank[i].enabled) + psb_enable_pipestat(dev_priv, i, PIPE_VBLANK_INTERRUPT_ENABLE); + else + psb_disable_pipestat(dev_priv, i, PIPE_VBLANK_INTERRUPT_ENABLE); + } if (dev_priv->ops->hotplug_enable) dev_priv->ops->hotplug_enable(dev, true); @@ -385,6 +378,7 @@ void psb_irq_uninstall(struct drm_device *dev) { struct drm_psb_private *dev_priv = dev->dev_private; unsigned long irqflags; + unsigned int i; spin_lock_irqsave(&dev_priv->irqmask_lock, irqflags); @@ -393,14 +387,10 @@ void psb_irq_uninstall(struct drm_device *dev) PSB_WVDC32(0xFFFFFFFF, PSB_HWSTAM); - if (dev->vblank[0].enabled) - psb_disable_pipestat(dev_priv, 0, PIPE_VBLANK_INTERRUPT_ENABLE); - - if (dev->vblank[1].enabled) - psb_disable_pipestat(dev_priv, 1, PIPE_VBLANK_INTERRUPT_ENABLE); - - if (dev->vblank[2].enabled) - psb_disable_pipestat(dev_priv, 2, PIPE_VBLANK_INTERRUPT_ENABLE); + for (i = 0; i < dev->num_crtcs; ++i) { + if (dev->vblank[i].enabled) + psb_disable_pipestat(dev_priv, i, PIPE_VBLANK_INTERRUPT_ENABLE); + } dev_priv->vdc_irq_mask &= _PSB_IRQ_SGX_FLAG | _PSB_IRQ_MSVDX_FLAG | diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h index b5c15557cc87..d6711caa7f39 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h @@ -56,6 +56,8 @@ struct drm_i915_gem_object_ops { void (*truncate)(struct drm_i915_gem_object *obj); void (*writeback)(struct drm_i915_gem_object *obj); + int (*pread)(struct drm_i915_gem_object *obj, + const struct drm_i915_gem_pread *arg); int (*pwrite)(struct drm_i915_gem_object *obj, const struct drm_i915_gem_pwrite *arg); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_phys.c b/drivers/gpu/drm/i915/gem/i915_gem_phys.c index 28147aab47b9..3a4dfe2ef1da 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_phys.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_phys.c @@ -134,6 +134,58 @@ i915_gem_object_put_pages_phys(struct drm_i915_gem_object *obj, vaddr, dma); } +static int +phys_pwrite(struct drm_i915_gem_object *obj, + const struct drm_i915_gem_pwrite *args) +{ + void *vaddr = sg_page(obj->mm.pages->sgl) + args->offset; + char __user *user_data = u64_to_user_ptr(args->data_ptr); + int err; + + err = i915_gem_object_wait(obj, + I915_WAIT_INTERRUPTIBLE | + I915_WAIT_ALL, + MAX_SCHEDULE_TIMEOUT); + if (err) + return err; + + /* + * We manually control the domain here and pretend that it + * remains coherent i.e. in the GTT domain, like shmem_pwrite. + */ + i915_gem_object_invalidate_frontbuffer(obj, ORIGIN_CPU); + + if (copy_from_user(vaddr, user_data, args->size)) + return -EFAULT; + + drm_clflush_virt_range(vaddr, args->size); + intel_gt_chipset_flush(&to_i915(obj->base.dev)->gt); + + i915_gem_object_flush_frontbuffer(obj, ORIGIN_CPU); + return 0; +} + +static int +phys_pread(struct drm_i915_gem_object *obj, + const struct drm_i915_gem_pread *args) +{ + void *vaddr = sg_page(obj->mm.pages->sgl) + args->offset; + char __user *user_data = u64_to_user_ptr(args->data_ptr); + int err; + + err = i915_gem_object_wait(obj, + I915_WAIT_INTERRUPTIBLE, + MAX_SCHEDULE_TIMEOUT); + if (err) + return err; + + drm_clflush_virt_range(vaddr, args->size); + if (copy_to_user(user_data, vaddr, args->size)) + return -EFAULT; + + return 0; +} + static void phys_release(struct drm_i915_gem_object *obj) { fput(obj->base.filp); @@ -144,6 +196,9 @@ static const struct drm_i915_gem_object_ops i915_gem_phys_ops = { .get_pages = i915_gem_object_get_pages_phys, .put_pages = i915_gem_object_put_pages_phys, + .pread = phys_pread, + .pwrite = phys_pwrite, + .release = phys_release, }; diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c index 5bfb5f7ed02c..efdeb7b7b2a0 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -371,7 +371,8 @@ static void __setup_engine_capabilities(struct intel_engine_cs *engine) * instances. */ if ((INTEL_GEN(i915) >= 11 && - engine->gt->info.vdbox_sfc_access & engine->mask) || + (engine->gt->info.vdbox_sfc_access & + BIT(engine->instance))) || (INTEL_GEN(i915) >= 9 && engine->instance == 0)) engine->uabi_capabilities |= I915_VIDEO_AND_ENHANCE_CLASS_CAPABILITY_SFC; diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index bb0c12975f38..58276694c848 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -180,30 +180,6 @@ try_again: } static int -i915_gem_phys_pwrite(struct drm_i915_gem_object *obj, - struct drm_i915_gem_pwrite *args, - struct drm_file *file) -{ - void *vaddr = sg_page(obj->mm.pages->sgl) + args->offset; - char __user *user_data = u64_to_user_ptr(args->data_ptr); - - /* - * We manually control the domain here and pretend that it - * remains coherent i.e. in the GTT domain, like shmem_pwrite. - */ - i915_gem_object_invalidate_frontbuffer(obj, ORIGIN_CPU); - - if (copy_from_user(vaddr, user_data, args->size)) - return -EFAULT; - - drm_clflush_virt_range(vaddr, args->size); - intel_gt_chipset_flush(&to_i915(obj->base.dev)->gt); - - i915_gem_object_flush_frontbuffer(obj, ORIGIN_CPU); - return 0; -} - -static int i915_gem_create(struct drm_file *file, struct intel_memory_region *mr, u64 *size_p, @@ -527,6 +503,12 @@ i915_gem_pread_ioctl(struct drm_device *dev, void *data, trace_i915_gem_object_pread(obj, args->offset, args->size); + ret = -ENODEV; + if (obj->ops->pread) + ret = obj->ops->pread(obj, args); + if (ret != -ENODEV) + goto out; + ret = i915_gem_object_wait(obj, I915_WAIT_INTERRUPTIBLE, MAX_SCHEDULE_TIMEOUT); @@ -866,8 +848,6 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data, if (ret == -EFAULT || ret == -ENOSPC) { if (i915_gem_object_has_struct_page(obj)) ret = i915_gem_shmem_pwrite(obj, args); - else - ret = i915_gem_phys_pwrite(obj, args, file); } i915_gem_object_unpin_pages(obj); diff --git a/drivers/gpu/drm/mcde/mcde_drv.c b/drivers/gpu/drm/mcde/mcde_drv.c index c592957ed07f..92f8bd907193 100644 --- a/drivers/gpu/drm/mcde/mcde_drv.c +++ b/drivers/gpu/drm/mcde/mcde_drv.c @@ -413,7 +413,13 @@ static int mcde_probe(struct platform_device *pdev) match); if (ret) { dev_err(dev, "failed to add component master\n"); - goto clk_disable; + /* + * The EPOD regulator is already disabled at this point so some + * special errorpath code is needed + */ + clk_disable_unprepare(mcde->mcde_clk); + regulator_disable(mcde->vana); + return ret; } return 0; diff --git a/drivers/gpu/drm/nouveau/dispnv50/disp.c b/drivers/gpu/drm/nouveau/dispnv50/disp.c index b111fe24a06b..36d6b6093d16 100644 --- a/drivers/gpu/drm/nouveau/dispnv50/disp.c +++ b/drivers/gpu/drm/nouveau/dispnv50/disp.c @@ -455,7 +455,7 @@ nv50_outp_get_old_connector(struct nouveau_encoder *outp, * DAC *****************************************************************************/ static void -nv50_dac_disable(struct drm_encoder *encoder) +nv50_dac_disable(struct drm_encoder *encoder, struct drm_atomic_state *state) { struct nouveau_encoder *nv_encoder = nouveau_encoder(encoder); struct nv50_core *core = nv50_disp(encoder->dev)->core; @@ -467,7 +467,7 @@ nv50_dac_disable(struct drm_encoder *encoder) } static void -nv50_dac_enable(struct drm_encoder *encoder) +nv50_dac_enable(struct drm_encoder *encoder, struct drm_atomic_state *state) { struct nouveau_encoder *nv_encoder = nouveau_encoder(encoder); struct nouveau_crtc *nv_crtc = nouveau_crtc(encoder->crtc); @@ -525,8 +525,8 @@ nv50_dac_detect(struct drm_encoder *encoder, struct drm_connector *connector) static const struct drm_encoder_helper_funcs nv50_dac_help = { .atomic_check = nv50_outp_atomic_check, - .enable = nv50_dac_enable, - .disable = nv50_dac_disable, + .atomic_enable = nv50_dac_enable, + .atomic_disable = nv50_dac_disable, .detect = nv50_dac_detect }; @@ -1055,7 +1055,7 @@ nv50_dp_bpc_to_depth(unsigned int bpc) } static void -nv50_msto_enable(struct drm_encoder *encoder) +nv50_msto_enable(struct drm_encoder *encoder, struct drm_atomic_state *state) { struct nv50_head *head = nv50_head(encoder->crtc); struct nv50_head_atom *armh = nv50_head_atom(head->base.base.state); @@ -1101,7 +1101,7 @@ nv50_msto_enable(struct drm_encoder *encoder) } static void -nv50_msto_disable(struct drm_encoder *encoder) +nv50_msto_disable(struct drm_encoder *encoder, struct drm_atomic_state *state) { struct nv50_msto *msto = nv50_msto(encoder); struct nv50_mstc *mstc = msto->mstc; @@ -1118,8 +1118,8 @@ nv50_msto_disable(struct drm_encoder *encoder) static const struct drm_encoder_helper_funcs nv50_msto_help = { - .disable = nv50_msto_disable, - .enable = nv50_msto_enable, + .atomic_disable = nv50_msto_disable, + .atomic_enable = nv50_msto_enable, .atomic_check = nv50_msto_atomic_check, }; @@ -1645,8 +1645,7 @@ nv50_sor_disable(struct drm_encoder *encoder, } static void -nv50_sor_enable(struct drm_encoder *encoder, - struct drm_atomic_state *state) +nv50_sor_enable(struct drm_encoder *encoder, struct drm_atomic_state *state) { struct nouveau_encoder *nv_encoder = nouveau_encoder(encoder); struct nouveau_crtc *nv_crtc = nouveau_crtc(encoder->crtc); @@ -1873,7 +1872,7 @@ nv50_pior_atomic_check(struct drm_encoder *encoder, } static void -nv50_pior_disable(struct drm_encoder *encoder) +nv50_pior_disable(struct drm_encoder *encoder, struct drm_atomic_state *state) { struct nouveau_encoder *nv_encoder = nouveau_encoder(encoder); struct nv50_core *core = nv50_disp(encoder->dev)->core; @@ -1885,7 +1884,7 @@ nv50_pior_disable(struct drm_encoder *encoder) } static void -nv50_pior_enable(struct drm_encoder *encoder) +nv50_pior_enable(struct drm_encoder *encoder, struct drm_atomic_state *state) { struct nouveau_encoder *nv_encoder = nouveau_encoder(encoder); struct nouveau_crtc *nv_crtc = nouveau_crtc(encoder->crtc); @@ -1921,14 +1920,14 @@ nv50_pior_enable(struct drm_encoder *encoder) } core->func->pior->ctrl(core, nv_encoder->or, ctrl, asyh); - nv_encoder->crtc = encoder->crtc; + nv_encoder->crtc = &nv_crtc->base; } static const struct drm_encoder_helper_funcs nv50_pior_help = { .atomic_check = nv50_pior_atomic_check, - .enable = nv50_pior_enable, - .disable = nv50_pior_disable, + .atomic_enable = nv50_pior_enable, + .atomic_disable = nv50_pior_disable, }; static void diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c b/drivers/gpu/drm/nouveau/nouveau_bo.c index 2ee75646ad6f..56b335a55966 100644 --- a/drivers/gpu/drm/nouveau/nouveau_bo.c +++ b/drivers/gpu/drm/nouveau/nouveau_bo.c @@ -350,14 +350,13 @@ set_placement_list(struct nouveau_drm *drm, struct ttm_place *pl, unsigned *n, if (domain & NOUVEAU_GEM_DOMAIN_VRAM) { struct nvif_mmu *mmu = &drm->client.mmu; - const u8 type = mmu->type[drm->ttm.type_vram].type; pl[*n].mem_type = TTM_PL_VRAM; pl[*n].flags = flags & ~TTM_PL_FLAG_CACHED; /* Some BARs do not support being ioremapped WC */ if (drm->client.device.info.family >= NV_DEVICE_INFO_V0_TESLA && - type & NVIF_MEM_UNCACHED) + mmu->type[drm->ttm.type_vram].type & NVIF_MEM_UNCACHED) pl[*n].flags &= ~TTM_PL_FLAG_WC; (*n)++; diff --git a/drivers/gpu/drm/nouveau/nouveau_connector.c b/drivers/gpu/drm/nouveau/nouveau_connector.c index 6f21f36719fc..8b4b3688c7ae 100644 --- a/drivers/gpu/drm/nouveau/nouveau_connector.c +++ b/drivers/gpu/drm/nouveau/nouveau_connector.c @@ -532,11 +532,13 @@ static void nouveau_connector_set_edid(struct nouveau_connector *nv_connector, struct edid *edid) { - struct edid *old_edid = nv_connector->edid; + if (nv_connector->edid != edid) { + struct edid *old_edid = nv_connector->edid; - drm_connector_update_edid_property(&nv_connector->base, edid); - kfree(old_edid); - nv_connector->edid = edid; + drm_connector_update_edid_property(&nv_connector->base, edid); + kfree(old_edid); + nv_connector->edid = edid; + } } static enum drm_connector_status @@ -669,8 +671,10 @@ nouveau_connector_detect_lvds(struct drm_connector *connector, bool force) /* Try retrieving EDID via DDC */ if (!drm->vbios.fp_no_ddc) { status = nouveau_connector_detect(connector, force); - if (status == connector_status_connected) + if (status == connector_status_connected) { + edid = nv_connector->edid; goto out; + } } /* On some laptops (Sony, i'm looking at you) there appears to diff --git a/drivers/hwmon/amd_energy.c b/drivers/hwmon/amd_energy.c index d06597303d5a..3197cda7bcd9 100644 --- a/drivers/hwmon/amd_energy.c +++ b/drivers/hwmon/amd_energy.c @@ -171,7 +171,7 @@ static umode_t amd_energy_is_visible(const void *_data, enum hwmon_sensor_types type, u32 attr, int channel) { - return 0444; + return 0440; } static int energy_accumulator(void *p) diff --git a/drivers/hwmon/applesmc.c b/drivers/hwmon/applesmc.c index a18887990f4a..79b498f816fe 100644 --- a/drivers/hwmon/applesmc.c +++ b/drivers/hwmon/applesmc.c @@ -32,6 +32,7 @@ #include <linux/hwmon.h> #include <linux/workqueue.h> #include <linux/err.h> +#include <linux/bits.h> /* data port used by Apple SMC */ #define APPLESMC_DATA_PORT 0x300 @@ -42,10 +43,13 @@ #define APPLESMC_MAX_DATA_LENGTH 32 -/* wait up to 128 ms for a status change. */ -#define APPLESMC_MIN_WAIT 0x0010 -#define APPLESMC_RETRY_WAIT 0x0100 -#define APPLESMC_MAX_WAIT 0x20000 +/* Apple SMC status bits */ +#define SMC_STATUS_AWAITING_DATA BIT(0) /* SMC has data waiting to be read */ +#define SMC_STATUS_IB_CLOSED BIT(1) /* Will ignore any input */ +#define SMC_STATUS_BUSY BIT(2) /* Command in progress */ + +/* Initial wait is 8us */ +#define APPLESMC_MIN_WAIT 0x0008 #define APPLESMC_READ_CMD 0x10 #define APPLESMC_WRITE_CMD 0x11 @@ -151,65 +155,84 @@ static unsigned int key_at_index; static struct workqueue_struct *applesmc_led_wq; /* - * wait_read - Wait for a byte to appear on SMC port. Callers must - * hold applesmc_lock. + * Wait for specific status bits with a mask on the SMC. + * Used before all transactions. + * This does 10 fast loops of 8us then exponentially backs off for a + * minimum total wait of 262ms. Depending on usleep_range this could + * run out past 500ms. */ -static int wait_read(void) + +static int wait_status(u8 val, u8 mask) { - unsigned long end = jiffies + (APPLESMC_MAX_WAIT * HZ) / USEC_PER_SEC; u8 status; int us; + int i; - for (us = APPLESMC_MIN_WAIT; us < APPLESMC_MAX_WAIT; us <<= 1) { - usleep_range(us, us * 16); + us = APPLESMC_MIN_WAIT; + for (i = 0; i < 24 ; i++) { status = inb(APPLESMC_CMD_PORT); - /* read: wait for smc to settle */ - if (status & 0x01) + if ((status & mask) == val) return 0; - /* timeout: give up */ - if (time_after(jiffies, end)) - break; + usleep_range(us, us * 2); + if (i > 9) + us <<= 1; } - - pr_warn("wait_read() fail: 0x%02x\n", status); return -EIO; } -/* - * send_byte - Write to SMC port, retrying when necessary. Callers - * must hold applesmc_lock. - */ +/* send_byte - Write to SMC data port. Callers must hold applesmc_lock. */ + static int send_byte(u8 cmd, u16 port) { - u8 status; - int us; - unsigned long end = jiffies + (APPLESMC_MAX_WAIT * HZ) / USEC_PER_SEC; + int status; + + status = wait_status(0, SMC_STATUS_IB_CLOSED); + if (status) + return status; + /* + * This needs to be a separate read looking for bit 0x04 + * after bit 0x02 falls. If consolidated with the wait above + * this extra read may not happen if status returns both + * simultaneously and this would appear to be required. + */ + status = wait_status(SMC_STATUS_BUSY, SMC_STATUS_BUSY); + if (status) + return status; outb(cmd, port); - for (us = APPLESMC_MIN_WAIT; us < APPLESMC_MAX_WAIT; us <<= 1) { - usleep_range(us, us * 16); - status = inb(APPLESMC_CMD_PORT); - /* write: wait for smc to settle */ - if (status & 0x02) - continue; - /* ready: cmd accepted, return */ - if (status & 0x04) - return 0; - /* timeout: give up */ - if (time_after(jiffies, end)) - break; - /* busy: long wait and resend */ - udelay(APPLESMC_RETRY_WAIT); - outb(cmd, port); - } - - pr_warn("send_byte(0x%02x, 0x%04x) fail: 0x%02x\n", cmd, port, status); - return -EIO; + return 0; } +/* send_command - Write a command to the SMC. Callers must hold applesmc_lock. */ + static int send_command(u8 cmd) { - return send_byte(cmd, APPLESMC_CMD_PORT); + int ret; + + ret = wait_status(0, SMC_STATUS_IB_CLOSED); + if (ret) + return ret; + outb(cmd, APPLESMC_CMD_PORT); + return 0; +} + +/* + * Based on logic from the Apple driver. This is issued before any interaction + * If busy is stuck high, issue a read command to reset the SMC state machine. + * If busy is stuck high after the command then the SMC is jammed. + */ + +static int smc_sane(void) +{ + int ret; + + ret = wait_status(0, SMC_STATUS_BUSY); + if (!ret) + return ret; + ret = send_command(APPLESMC_READ_CMD); + if (ret) + return ret; + return wait_status(0, SMC_STATUS_BUSY); } static int send_argument(const char *key) @@ -226,6 +249,11 @@ static int read_smc(u8 cmd, const char *key, u8 *buffer, u8 len) { u8 status, data = 0; int i; + int ret; + + ret = smc_sane(); + if (ret) + return ret; if (send_command(cmd) || send_argument(key)) { pr_warn("%.4s: read arg fail\n", key); @@ -239,7 +267,8 @@ static int read_smc(u8 cmd, const char *key, u8 *buffer, u8 len) } for (i = 0; i < len; i++) { - if (wait_read()) { + if (wait_status(SMC_STATUS_AWAITING_DATA | SMC_STATUS_BUSY, + SMC_STATUS_AWAITING_DATA | SMC_STATUS_BUSY)) { pr_warn("%.4s: read data[%d] fail\n", key, i); return -EIO; } @@ -250,19 +279,24 @@ static int read_smc(u8 cmd, const char *key, u8 *buffer, u8 len) for (i = 0; i < 16; i++) { udelay(APPLESMC_MIN_WAIT); status = inb(APPLESMC_CMD_PORT); - if (!(status & 0x01)) + if (!(status & SMC_STATUS_AWAITING_DATA)) break; data = inb(APPLESMC_DATA_PORT); } if (i) pr_warn("flushed %d bytes, last value is: %d\n", i, data); - return 0; + return wait_status(0, SMC_STATUS_BUSY); } static int write_smc(u8 cmd, const char *key, const u8 *buffer, u8 len) { int i; + int ret; + + ret = smc_sane(); + if (ret) + return ret; if (send_command(cmd) || send_argument(key)) { pr_warn("%s: write arg fail\n", key); @@ -281,7 +315,7 @@ static int write_smc(u8 cmd, const char *key, const u8 *buffer, u8 len) } } - return 0; + return wait_status(0, SMC_STATUS_BUSY); } static int read_register_count(unsigned int *count) diff --git a/drivers/hwmon/pmbus/max20730.c b/drivers/hwmon/pmbus/max20730.c index 57923d72490c..be83b98411c7 100644 --- a/drivers/hwmon/pmbus/max20730.c +++ b/drivers/hwmon/pmbus/max20730.c @@ -122,8 +122,8 @@ static ssize_t max20730_debugfs_read(struct file *file, char __user *buf, switch (idx) { case MAX20730_DEBUGFS_VOUT_MIN: ret = VOLT_FROM_REG(data->mfr_voutmin * 10000); - len = snprintf(tbuf, DEBUG_FS_DATA_MAX, "%d.%d\n", - ret / 10000, ret % 10000); + len = scnprintf(tbuf, DEBUG_FS_DATA_MAX, "%d.%d\n", + ret / 10000, ret % 10000); break; case MAX20730_DEBUGFS_FREQUENCY: val = (data->mfr_devset1 & MAX20730_MFR_DEVSET1_FSW_MASK) @@ -141,7 +141,7 @@ static ssize_t max20730_debugfs_read(struct file *file, char __user *buf, ret = 800; else ret = 900; - len = snprintf(tbuf, DEBUG_FS_DATA_MAX, "%d\n", ret); + len = scnprintf(tbuf, DEBUG_FS_DATA_MAX, "%d\n", ret); break; case MAX20730_DEBUGFS_PG_DELAY: val = (data->mfr_devset1 & MAX20730_MFR_DEVSET1_TSTAT_MASK) @@ -223,7 +223,7 @@ static ssize_t max20730_debugfs_read(struct file *file, char __user *buf, case MAX20730_DEBUGFS_OC_PROTECT_MODE: ret = (data->mfr_devset2 & MAX20730_MFR_DEVSET2_OCPM_MASK) >> MAX20730_MFR_DEVSET2_OCPM_BIT_POS; - len = snprintf(tbuf, DEBUG_FS_DATA_MAX, "%d\n", ret); + len = scnprintf(tbuf, DEBUG_FS_DATA_MAX, "%d\n", ret); break; case MAX20730_DEBUGFS_SS_TIMING: val = (data->mfr_devset2 & MAX20730_MFR_DEVSET2_SS_MASK) @@ -241,32 +241,32 @@ static ssize_t max20730_debugfs_read(struct file *file, char __user *buf, case MAX20730_DEBUGFS_IMAX: ret = (data->mfr_devset2 & MAX20730_MFR_DEVSET2_IMAX_MASK) >> MAX20730_MFR_DEVSET2_IMAX_BIT_POS; - len = snprintf(tbuf, DEBUG_FS_DATA_MAX, "%d\n", ret); + len = scnprintf(tbuf, DEBUG_FS_DATA_MAX, "%d\n", ret); break; case MAX20730_DEBUGFS_OPERATION: ret = i2c_smbus_read_byte_data(psu->client, PMBUS_OPERATION); if (ret < 0) return ret; - len = snprintf(tbuf, DEBUG_FS_DATA_MAX, "%d\n", ret); + len = scnprintf(tbuf, DEBUG_FS_DATA_MAX, "%d\n", ret); break; case MAX20730_DEBUGFS_ON_OFF_CONFIG: ret = i2c_smbus_read_byte_data(psu->client, PMBUS_ON_OFF_CONFIG); if (ret < 0) return ret; - len = snprintf(tbuf, DEBUG_FS_DATA_MAX, "%d\n", ret); + len = scnprintf(tbuf, DEBUG_FS_DATA_MAX, "%d\n", ret); break; case MAX20730_DEBUGFS_SMBALERT_MASK: ret = i2c_smbus_read_word_data(psu->client, PMBUS_SMB_ALERT_MASK); if (ret < 0) return ret; - len = snprintf(tbuf, DEBUG_FS_DATA_MAX, "%d\n", ret); + len = scnprintf(tbuf, DEBUG_FS_DATA_MAX, "%d\n", ret); break; case MAX20730_DEBUGFS_VOUT_MODE: ret = i2c_smbus_read_byte_data(psu->client, PMBUS_VOUT_MODE); if (ret < 0) return ret; - len = snprintf(tbuf, DEBUG_FS_DATA_MAX, "%d\n", ret); + len = scnprintf(tbuf, DEBUG_FS_DATA_MAX, "%d\n", ret); break; case MAX20730_DEBUGFS_VOUT_COMMAND: ret = i2c_smbus_read_word_data(psu->client, PMBUS_VOUT_COMMAND); @@ -274,8 +274,8 @@ static ssize_t max20730_debugfs_read(struct file *file, char __user *buf, return ret; ret = VOLT_FROM_REG(ret * 10000); - len = snprintf(tbuf, DEBUG_FS_DATA_MAX, - "%d.%d\n", ret / 10000, ret % 10000); + len = scnprintf(tbuf, DEBUG_FS_DATA_MAX, + "%d.%d\n", ret / 10000, ret % 10000); break; case MAX20730_DEBUGFS_VOUT_MAX: ret = i2c_smbus_read_word_data(psu->client, PMBUS_VOUT_MAX); @@ -283,8 +283,8 @@ static ssize_t max20730_debugfs_read(struct file *file, char __user *buf, return ret; ret = VOLT_FROM_REG(ret * 10000); - len = snprintf(tbuf, DEBUG_FS_DATA_MAX, - "%d.%d\n", ret / 10000, ret % 10000); + len = scnprintf(tbuf, DEBUG_FS_DATA_MAX, + "%d.%d\n", ret / 10000, ret % 10000); break; default: len = strlcpy(tbuf, "Invalid\n", DEBUG_FS_DATA_MAX); diff --git a/drivers/hwmon/pmbus/pmbus_core.c b/drivers/hwmon/pmbus/pmbus_core.c index 170a9f82ca61..b0e2820a2d57 100644 --- a/drivers/hwmon/pmbus/pmbus_core.c +++ b/drivers/hwmon/pmbus/pmbus_core.c @@ -941,12 +941,16 @@ static ssize_t pmbus_show_sensor(struct device *dev, struct i2c_client *client = to_i2c_client(dev->parent); struct pmbus_sensor *sensor = to_pmbus_sensor(devattr); struct pmbus_data *data = i2c_get_clientdata(client); + ssize_t ret; + mutex_lock(&data->update_lock); pmbus_update_sensor_data(client, sensor); if (sensor->data < 0) - return sensor->data; - - return snprintf(buf, PAGE_SIZE, "%lld\n", pmbus_reg2data(data, sensor)); + ret = sensor->data; + else + ret = snprintf(buf, PAGE_SIZE, "%lld\n", pmbus_reg2data(data, sensor)); + mutex_unlock(&data->update_lock); + return ret; } static ssize_t pmbus_set_sensor(struct device *dev, @@ -2012,8 +2016,11 @@ static ssize_t pmbus_show_samples(struct device *dev, int val; struct i2c_client *client = to_i2c_client(dev->parent); struct pmbus_samples_reg *reg = to_samples_reg(devattr); + struct pmbus_data *data = i2c_get_clientdata(client); + mutex_lock(&data->update_lock); val = _pmbus_read_word_data(client, reg->page, 0xff, reg->attr->reg); + mutex_unlock(&data->update_lock); if (val < 0) return val; diff --git a/drivers/hwmon/pwm-fan.c b/drivers/hwmon/pwm-fan.c index bdba2143021a..1f63807c0399 100644 --- a/drivers/hwmon/pwm-fan.c +++ b/drivers/hwmon/pwm-fan.c @@ -54,16 +54,18 @@ static irqreturn_t pulse_handler(int irq, void *dev_id) static void sample_timer(struct timer_list *t) { struct pwm_fan_ctx *ctx = from_timer(ctx, t, rpm_timer); + unsigned int delta = ktime_ms_delta(ktime_get(), ctx->sample_start); int pulses; - u64 tmp; - pulses = atomic_read(&ctx->pulses); - atomic_sub(pulses, &ctx->pulses); - tmp = (u64)pulses * ktime_ms_delta(ktime_get(), ctx->sample_start) * 60; - do_div(tmp, ctx->pulses_per_revolution * 1000); - ctx->rpm = tmp; + if (delta) { + pulses = atomic_read(&ctx->pulses); + atomic_sub(pulses, &ctx->pulses); + ctx->rpm = (unsigned int)(pulses * 1000 * 60) / + (ctx->pulses_per_revolution * delta); + + ctx->sample_start = ktime_get(); + } - ctx->sample_start = ktime_get(); mod_timer(&ctx->rpm_timer, jiffies + HZ); } diff --git a/drivers/iommu/intel/dmar.c b/drivers/iommu/intel/dmar.c index 404b40af31cb..b2e804473209 100644 --- a/drivers/iommu/intel/dmar.c +++ b/drivers/iommu/intel/dmar.c @@ -333,6 +333,11 @@ static void dmar_pci_bus_del_dev(struct dmar_pci_notify_info *info) dmar_iommu_notify_scope_dev(info); } +static inline void vf_inherit_msi_domain(struct pci_dev *pdev) +{ + dev_set_msi_domain(&pdev->dev, dev_get_msi_domain(&pdev->physfn->dev)); +} + static int dmar_pci_bus_notifier(struct notifier_block *nb, unsigned long action, void *data) { @@ -342,8 +347,20 @@ static int dmar_pci_bus_notifier(struct notifier_block *nb, /* Only care about add/remove events for physical functions. * For VFs we actually do the lookup based on the corresponding * PF in device_to_iommu() anyway. */ - if (pdev->is_virtfn) + if (pdev->is_virtfn) { + /* + * Ensure that the VF device inherits the irq domain of the + * PF device. Ideally the device would inherit the domain + * from the bus, but DMAR can have multiple units per bus + * which makes this impossible. The VF 'bus' could inherit + * from the PF device, but that's yet another x86'sism to + * inflict on everybody else. + */ + if (action == BUS_NOTIFY_ADD_DEVICE) + vf_inherit_msi_domain(pdev); return NOTIFY_DONE; + } + if (action != BUS_NOTIFY_ADD_DEVICE && action != BUS_NOTIFY_REMOVED_DEVICE) return NOTIFY_DONE; diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c index 0fec31931e11..7db602434ac5 100644 --- a/drivers/irqchip/irq-gic-v3-its.c +++ b/drivers/irqchip/irq-gic-v3-its.c @@ -3842,8 +3842,6 @@ static void its_vpe_schedule(struct its_vpe *vpe) val |= vpe->idai ? GICR_VPENDBASER_IDAI : 0; val |= GICR_VPENDBASER_Valid; gicr_write_vpendbaser(val, vlpi_base + GICR_VPENDBASER); - - its_wait_vpt_parse_complete(); } static void its_vpe_deschedule(struct its_vpe *vpe) @@ -3891,6 +3889,10 @@ static int its_vpe_set_vcpu_affinity(struct irq_data *d, void *vcpu_info) its_vpe_deschedule(vpe); return 0; + case COMMIT_VPE: + its_wait_vpt_parse_complete(); + return 0; + case INVALL_VPE: its_vpe_invall(vpe); return 0; @@ -4052,8 +4054,6 @@ static void its_vpe_4_1_schedule(struct its_vpe *vpe, val |= FIELD_PREP(GICR_VPENDBASER_4_1_VPEID, vpe->vpe_id); gicr_write_vpendbaser(val, vlpi_base + GICR_VPENDBASER); - - its_wait_vpt_parse_complete(); } static void its_vpe_4_1_deschedule(struct its_vpe *vpe, @@ -4128,6 +4128,10 @@ static int its_vpe_4_1_set_vcpu_affinity(struct irq_data *d, void *vcpu_info) its_vpe_4_1_deschedule(vpe, info); return 0; + case COMMIT_VPE: + its_wait_vpt_parse_complete(); + return 0; + case INVALL_VPE: its_vpe_4_1_invall(vpe); return 0; diff --git a/drivers/irqchip/irq-gic-v4.c b/drivers/irqchip/irq-gic-v4.c index 0c18714ae13e..5d1dc9915272 100644 --- a/drivers/irqchip/irq-gic-v4.c +++ b/drivers/irqchip/irq-gic-v4.c @@ -232,6 +232,8 @@ int its_make_vpe_non_resident(struct its_vpe *vpe, bool db) if (!ret) vpe->resident = false; + vpe->ready = false; + return ret; } @@ -258,6 +260,23 @@ int its_make_vpe_resident(struct its_vpe *vpe, bool g0en, bool g1en) return ret; } +int its_commit_vpe(struct its_vpe *vpe) +{ + struct its_cmd_info info = { + .cmd_type = COMMIT_VPE, + }; + int ret; + + WARN_ON(preemptible()); + + ret = its_send_vpe_cmd(vpe, &info); + if (!ret) + vpe->ready = true; + + return ret; +} + + int its_invall_vpe(struct its_vpe *vpe) { struct its_cmd_info info = { diff --git a/drivers/misc/habanalabs/common/command_buffer.c b/drivers/misc/habanalabs/common/command_buffer.c index 901e213daf40..ada570f35a41 100644 --- a/drivers/misc/habanalabs/common/command_buffer.c +++ b/drivers/misc/habanalabs/common/command_buffer.c @@ -142,11 +142,10 @@ static void cb_fini(struct hl_device *hdev, struct hl_cb *cb) { if (cb->is_internal) gen_pool_free(hdev->internal_cb_pool, - cb->kernel_address, cb->size); + (uintptr_t)cb->kernel_address, cb->size); else hdev->asic_funcs->asic_dma_free_coherent(hdev, cb->size, - (void *) (uintptr_t) cb->kernel_address, - cb->bus_address); + cb->kernel_address, cb->bus_address); kfree(cb); } @@ -230,7 +229,7 @@ static struct hl_cb *hl_cb_alloc(struct hl_device *hdev, u32 cb_size, return NULL; } - cb->kernel_address = (u64) (uintptr_t) p; + cb->kernel_address = p; cb->size = cb_size; return cb; @@ -509,7 +508,7 @@ int hl_cb_mmap(struct hl_fpriv *hpriv, struct vm_area_struct *vma) vma->vm_private_data = cb; - rc = hdev->asic_funcs->cb_mmap(hdev, vma, (void *) cb->kernel_address, + rc = hdev->asic_funcs->cb_mmap(hdev, vma, cb->kernel_address, cb->bus_address, cb->size); if (rc) { spin_lock(&cb->lock); diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h index 80d4d7385ffe..6ed974d2def0 100644 --- a/drivers/misc/habanalabs/common/habanalabs.h +++ b/drivers/misc/habanalabs/common/habanalabs.h @@ -452,7 +452,7 @@ struct hl_cb { struct list_head pool_list; struct list_head va_block_list; u64 id; - u64 kernel_address; + void *kernel_address; dma_addr_t bus_address; u32 mmap_size; u32 size; @@ -515,7 +515,7 @@ struct hl_hw_queue { struct hl_hw_sob hw_sob[HL_RSVD_SOBS]; struct hl_cs_job **shadow_queue; enum hl_queue_type queue_type; - u64 kernel_address; + void *kernel_address; dma_addr_t bus_address; u32 pi; atomic_t ci; @@ -544,7 +544,7 @@ struct hl_hw_queue { */ struct hl_cq { struct hl_device *hdev; - u64 kernel_address; + void *kernel_address; dma_addr_t bus_address; u32 cq_idx; u32 hw_queue_id; @@ -562,7 +562,7 @@ struct hl_cq { */ struct hl_eq { struct hl_device *hdev; - u64 kernel_address; + void *kernel_address; dma_addr_t bus_address; u32 ci; }; @@ -757,7 +757,7 @@ struct hl_asic_funcs { u32 (*get_dma_desc_list_size)(struct hl_device *hdev, struct sg_table *sgt); void (*add_end_of_cb_packets)(struct hl_device *hdev, - u64 kernel_address, u32 len, + void *kernel_address, u32 len, u64 cq_addr, u32 cq_val, u32 msix_num, bool eb); void (*update_eq_ci)(struct hl_device *hdev, u32 val); @@ -1382,13 +1382,13 @@ void hl_wreg(struct hl_device *hdev, u32 reg, u32 val); for (;;) { \ /* Verify we read updates done by other cores or by device */ \ mb(); \ - (val) = *((u32 *) (uintptr_t) (addr)); \ + (val) = *((u32 *)(addr)); \ if (mem_written_by_device) \ (val) = le32_to_cpu(*(__le32 *) &(val)); \ if (cond) \ break; \ if (timeout_us && ktime_compare(ktime_get(), __timeout) > 0) { \ - (val) = *((u32 *) (uintptr_t) (addr)); \ + (val) = *((u32 *)(addr)); \ if (mem_written_by_device) \ (val) = le32_to_cpu(*(__le32 *) &(val)); \ break; \ diff --git a/drivers/misc/habanalabs/common/hw_queue.c b/drivers/misc/habanalabs/common/hw_queue.c index 5e66c98fb0d3..250cf9cefc06 100644 --- a/drivers/misc/habanalabs/common/hw_queue.c +++ b/drivers/misc/habanalabs/common/hw_queue.c @@ -75,7 +75,7 @@ static void ext_and_hw_queue_submit_bd(struct hl_device *hdev, { struct hl_bd *bd; - bd = (struct hl_bd *) (uintptr_t) q->kernel_address; + bd = q->kernel_address; bd += hl_pi_2_offset(q->pi); bd->ctl = cpu_to_le32(ctl); bd->len = cpu_to_le32(len); @@ -335,8 +335,7 @@ static void int_queue_schedule_job(struct hl_cs_job *job) bd.len = cpu_to_le32(job->job_cb_size); bd.ptr = cpu_to_le64((u64) (uintptr_t) job->user_cb); - pi = (__le64 *) (uintptr_t) (q->kernel_address + - ((q->pi & (q->int_queue_len - 1)) * sizeof(bd))); + pi = q->kernel_address + (q->pi & (q->int_queue_len - 1)) * sizeof(bd); q->pi++; q->pi &= ((q->int_queue_len << 1) - 1); @@ -630,7 +629,7 @@ static int ext_and_cpu_queue_init(struct hl_device *hdev, struct hl_hw_queue *q, if (!p) return -ENOMEM; - q->kernel_address = (u64) (uintptr_t) p; + q->kernel_address = p; q->shadow_queue = kmalloc_array(HL_QUEUE_LENGTH, sizeof(*q->shadow_queue), @@ -653,11 +652,11 @@ free_queue: if (is_cpu_queue) hdev->asic_funcs->cpu_accessible_dma_pool_free(hdev, HL_QUEUE_SIZE_IN_BYTES, - (void *) (uintptr_t) q->kernel_address); + q->kernel_address); else hdev->asic_funcs->asic_dma_free_coherent(hdev, HL_QUEUE_SIZE_IN_BYTES, - (void *) (uintptr_t) q->kernel_address, + q->kernel_address, q->bus_address); return rc; @@ -676,7 +675,7 @@ static int int_queue_init(struct hl_device *hdev, struct hl_hw_queue *q) return -EFAULT; } - q->kernel_address = (u64) (uintptr_t) p; + q->kernel_address = p; q->pi = 0; atomic_set(&q->ci, 0); @@ -704,7 +703,7 @@ static int hw_queue_init(struct hl_device *hdev, struct hl_hw_queue *q) if (!p) return -ENOMEM; - q->kernel_address = (u64) (uintptr_t) p; + q->kernel_address = p; /* Make sure read/write pointers are initialized to start of queue */ atomic_set(&q->ci, 0); @@ -839,11 +838,11 @@ static void queue_fini(struct hl_device *hdev, struct hl_hw_queue *q) if (q->queue_type == QUEUE_TYPE_CPU) hdev->asic_funcs->cpu_accessible_dma_pool_free(hdev, HL_QUEUE_SIZE_IN_BYTES, - (void *) (uintptr_t) q->kernel_address); + q->kernel_address); else hdev->asic_funcs->asic_dma_free_coherent(hdev, HL_QUEUE_SIZE_IN_BYTES, - (void *) (uintptr_t) q->kernel_address, + q->kernel_address, q->bus_address); } diff --git a/drivers/misc/habanalabs/common/irq.c b/drivers/misc/habanalabs/common/irq.c index d20e40a53d70..de53fb5f978a 100644 --- a/drivers/misc/habanalabs/common/irq.c +++ b/drivers/misc/habanalabs/common/irq.c @@ -90,7 +90,7 @@ irqreturn_t hl_irq_handler_cq(int irq, void *arg) return IRQ_HANDLED; } - cq_base = (struct hl_cq_entry *) (uintptr_t) cq->kernel_address; + cq_base = cq->kernel_address; while (1) { bool entry_ready = ((le32_to_cpu(cq_base[cq->ci].data) & @@ -152,7 +152,7 @@ irqreturn_t hl_irq_handler_eq(int irq, void *arg) struct hl_eq_entry *eq_base; struct hl_eqe_work *handle_eqe_work; - eq_base = (struct hl_eq_entry *) (uintptr_t) eq->kernel_address; + eq_base = eq->kernel_address; while (1) { bool entry_ready = @@ -221,7 +221,7 @@ int hl_cq_init(struct hl_device *hdev, struct hl_cq *q, u32 hw_queue_id) return -ENOMEM; q->hdev = hdev; - q->kernel_address = (u64) (uintptr_t) p; + q->kernel_address = p; q->hw_queue_id = hw_queue_id; q->ci = 0; q->pi = 0; @@ -242,7 +242,8 @@ int hl_cq_init(struct hl_device *hdev, struct hl_cq *q, u32 hw_queue_id) void hl_cq_fini(struct hl_device *hdev, struct hl_cq *q) { hdev->asic_funcs->asic_dma_free_coherent(hdev, HL_CQ_SIZE_IN_BYTES, - (void *) (uintptr_t) q->kernel_address, q->bus_address); + q->kernel_address, + q->bus_address); } void hl_cq_reset(struct hl_device *hdev, struct hl_cq *q) @@ -259,7 +260,7 @@ void hl_cq_reset(struct hl_device *hdev, struct hl_cq *q) * when the device is operational again */ - memset((void *) (uintptr_t) q->kernel_address, 0, HL_CQ_SIZE_IN_BYTES); + memset(q->kernel_address, 0, HL_CQ_SIZE_IN_BYTES); } /** @@ -282,7 +283,7 @@ int hl_eq_init(struct hl_device *hdev, struct hl_eq *q) return -ENOMEM; q->hdev = hdev; - q->kernel_address = (u64) (uintptr_t) p; + q->kernel_address = p; q->ci = 0; return 0; @@ -302,7 +303,7 @@ void hl_eq_fini(struct hl_device *hdev, struct hl_eq *q) hdev->asic_funcs->cpu_accessible_dma_pool_free(hdev, HL_EQ_SIZE_IN_BYTES, - (void *) (uintptr_t) q->kernel_address); + q->kernel_address); } void hl_eq_reset(struct hl_device *hdev, struct hl_eq *q) @@ -316,5 +317,5 @@ void hl_eq_reset(struct hl_device *hdev, struct hl_eq *q) * when the device is operational again */ - memset((void *) (uintptr_t) q->kernel_address, 0, HL_EQ_SIZE_IN_BYTES); + memset(q->kernel_address, 0, HL_EQ_SIZE_IN_BYTES); } diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c index 5f65a1691551..2519a34e25b7 100644 --- a/drivers/misc/habanalabs/gaudi/gaudi.c +++ b/drivers/misc/habanalabs/gaudi/gaudi.c @@ -680,8 +680,7 @@ static int _gaudi_init_tpc_mem(struct hl_device *hdev, if (!cb) return -EFAULT; - init_tpc_mem_pkt = (struct packet_lin_dma *) (uintptr_t) - cb->kernel_address; + init_tpc_mem_pkt = cb->kernel_address; cb_size = sizeof(*init_tpc_mem_pkt); memset(init_tpc_mem_pkt, 0, cb_size); @@ -3811,8 +3810,7 @@ static int gaudi_validate_cb(struct hl_device *hdev, u16 pkt_size; struct gaudi_packet *user_pkt; - user_pkt = (struct gaudi_packet *) (uintptr_t) - (parser->user_cb->kernel_address + cb_parsed_length); + user_pkt = parser->user_cb->kernel_address + cb_parsed_length; pkt_id = (enum packet_id) ( (le64_to_cpu(user_pkt->header) & @@ -4035,11 +4033,9 @@ static int gaudi_patch_cb(struct hl_device *hdev, u32 new_pkt_size = 0; struct gaudi_packet *user_pkt, *kernel_pkt; - user_pkt = (struct gaudi_packet *) (uintptr_t) - (parser->user_cb->kernel_address + cb_parsed_length); - kernel_pkt = (struct gaudi_packet *) (uintptr_t) - (parser->patched_cb->kernel_address + - cb_patched_cur_length); + user_pkt = parser->user_cb->kernel_address + cb_parsed_length; + kernel_pkt = parser->patched_cb->kernel_address + + cb_patched_cur_length; pkt_id = (enum packet_id) ( (le64_to_cpu(user_pkt->header) & @@ -4155,8 +4151,8 @@ static int gaudi_parse_cb_mmu(struct hl_device *hdev, * The check that parser->user_cb_size <= parser->user_cb->size was done * in validate_queue_index(). */ - memcpy((void *) (uintptr_t) parser->patched_cb->kernel_address, - (void *) (uintptr_t) parser->user_cb->kernel_address, + memcpy(parser->patched_cb->kernel_address, + parser->user_cb->kernel_address, parser->user_cb_size); patched_cb_size = parser->patched_cb_size; @@ -4290,7 +4286,7 @@ static int gaudi_cs_parser(struct hl_device *hdev, struct hl_cs_parser *parser) } static void gaudi_add_end_of_cb_packets(struct hl_device *hdev, - u64 kernel_address, u32 len, + void *kernel_address, u32 len, u64 cq_addr, u32 cq_val, u32 msi_vec, bool eb) { @@ -4298,8 +4294,7 @@ static void gaudi_add_end_of_cb_packets(struct hl_device *hdev, struct packet_msg_prot *cq_pkt; u32 tmp; - cq_pkt = (struct packet_msg_prot *) (uintptr_t) - (kernel_address + len - (sizeof(struct packet_msg_prot) * 2)); + cq_pkt = kernel_address + len - (sizeof(struct packet_msg_prot) * 2); tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT); tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1); @@ -4342,7 +4337,7 @@ static int gaudi_memset_device_memory(struct hl_device *hdev, u64 addr, if (!cb) return -EFAULT; - lin_dma_pkt = (struct packet_lin_dma *) (uintptr_t) cb->kernel_address; + lin_dma_pkt = cb->kernel_address; memset(lin_dma_pkt, 0, sizeof(*lin_dma_pkt)); cb_size = sizeof(*lin_dma_pkt); @@ -4747,7 +4742,7 @@ static void gaudi_write_pte(struct hl_device *hdev, u64 addr, u64 val) (addr - gaudi->hbm_bar_cur_addr)); } -static void gaudi_mmu_prepare_reg(struct hl_device *hdev, u64 reg, u32 asid) +void gaudi_mmu_prepare_reg(struct hl_device *hdev, u64 reg, u32 asid) { /* mask to zero the MMBP and ASID bits */ WREG32_AND(reg, ~0x7FF); @@ -4915,9 +4910,6 @@ static void gaudi_mmu_prepare(struct hl_device *hdev, u32 asid) gaudi_mmu_prepare_reg(hdev, mmMME2_ACC_WBC, asid); gaudi_mmu_prepare_reg(hdev, mmMME3_ACC_WBC, asid); - gaudi_mmu_prepare_reg(hdev, mmPSOC_GLOBAL_CONF_TRACE_ARUSER, asid); - gaudi_mmu_prepare_reg(hdev, mmPSOC_GLOBAL_CONF_TRACE_AWUSER, asid); - hdev->asic_funcs->set_clock_gating(hdev); mutex_unlock(&gaudi->clk_gate_mutex); @@ -4954,8 +4946,8 @@ static int gaudi_send_job_on_qman0(struct hl_device *hdev, cb = job->patched_cb; - fence_pkt = (struct packet_msg_prot *) (uintptr_t) (cb->kernel_address + - job->job_cb_size - sizeof(struct packet_msg_prot)); + fence_pkt = cb->kernel_address + + job->job_cb_size - sizeof(struct packet_msg_prot); tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT); tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1); @@ -6386,7 +6378,7 @@ static void gaudi_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id) struct packet_msg_short *pkt; u32 value, ctl; - pkt = (struct packet_msg_short *) (uintptr_t) cb->kernel_address; + pkt = cb->kernel_address; memset(pkt, 0, sizeof(*pkt)); /* Inc by 1, Mode ADD */ @@ -6478,7 +6470,7 @@ static void gaudi_gen_wait_cb(struct hl_device *hdev, void *data, u16 sob_id, u16 sob_val, u16 mon_id, u32 q_idx) { struct hl_cb *cb = (struct hl_cb *) data; - void *buf = (void *) (uintptr_t) cb->kernel_address; + void *buf = cb->kernel_address; u64 monitor_base, fence_addr = 0; u32 size = 0; u16 msg_addr_offset; diff --git a/drivers/misc/habanalabs/gaudi/gaudiP.h b/drivers/misc/habanalabs/gaudi/gaudiP.h index 83ad2b0a3a61..8eb598db81b2 100644 --- a/drivers/misc/habanalabs/gaudi/gaudiP.h +++ b/drivers/misc/habanalabs/gaudi/gaudiP.h @@ -271,5 +271,6 @@ void gaudi_set_pll_profile(struct hl_device *hdev, enum hl_pll_frequency freq); int gaudi_debug_coresight(struct hl_device *hdev, void *data); void gaudi_halt_coresight(struct hl_device *hdev); int gaudi_get_clk_rate(struct hl_device *hdev, u32 *cur_clk, u32 *max_clk); +void gaudi_mmu_prepare_reg(struct hl_device *hdev, u64 reg, u32 asid); #endif /* GAUDIP_H_ */ diff --git a/drivers/misc/habanalabs/gaudi/gaudi_coresight.c b/drivers/misc/habanalabs/gaudi/gaudi_coresight.c index 881531d4d9da..3d2b0f0f4650 100644 --- a/drivers/misc/habanalabs/gaudi/gaudi_coresight.c +++ b/drivers/misc/habanalabs/gaudi/gaudi_coresight.c @@ -623,6 +623,11 @@ static int gaudi_config_etr(struct hl_device *hdev, return -EINVAL; } + gaudi_mmu_prepare_reg(hdev, mmPSOC_GLOBAL_CONF_TRACE_ARUSER, + hdev->compute_ctx->asid); + gaudi_mmu_prepare_reg(hdev, mmPSOC_GLOBAL_CONF_TRACE_AWUSER, + hdev->compute_ctx->asid); + msb = upper_32_bits(input->buffer_address) >> 8; msb &= PSOC_GLOBAL_CONF_TRACE_ADDR_MSB_MASK; WREG32(mmPSOC_GLOBAL_CONF_TRACE_ADDR, msb); diff --git a/drivers/misc/habanalabs/goya/goya.c b/drivers/misc/habanalabs/goya/goya.c index 5db52064ed9e..235d47b2420f 100644 --- a/drivers/misc/habanalabs/goya/goya.c +++ b/drivers/misc/habanalabs/goya/goya.c @@ -2882,8 +2882,8 @@ static int goya_send_job_on_qman0(struct hl_device *hdev, struct hl_cs_job *job) cb = job->patched_cb; - fence_pkt = (struct packet_msg_prot *) (uintptr_t) (cb->kernel_address + - job->job_cb_size - sizeof(struct packet_msg_prot)); + fence_pkt = cb->kernel_address + + job->job_cb_size - sizeof(struct packet_msg_prot); tmp = (PACKET_MSG_PROT << GOYA_PKT_CTL_OPCODE_SHIFT) | (1 << GOYA_PKT_CTL_EB_SHIFT) | @@ -3475,8 +3475,7 @@ static int goya_validate_cb(struct hl_device *hdev, u16 pkt_size; struct goya_packet *user_pkt; - user_pkt = (struct goya_packet *) (uintptr_t) - (parser->user_cb->kernel_address + cb_parsed_length); + user_pkt = parser->user_cb->kernel_address + cb_parsed_length; pkt_id = (enum packet_id) ( (le64_to_cpu(user_pkt->header) & @@ -3713,11 +3712,9 @@ static int goya_patch_cb(struct hl_device *hdev, u32 new_pkt_size = 0; struct goya_packet *user_pkt, *kernel_pkt; - user_pkt = (struct goya_packet *) (uintptr_t) - (parser->user_cb->kernel_address + cb_parsed_length); - kernel_pkt = (struct goya_packet *) (uintptr_t) - (parser->patched_cb->kernel_address + - cb_patched_cur_length); + user_pkt = parser->user_cb->kernel_address + cb_parsed_length; + kernel_pkt = parser->patched_cb->kernel_address + + cb_patched_cur_length; pkt_id = (enum packet_id) ( (le64_to_cpu(user_pkt->header) & @@ -3841,8 +3838,8 @@ static int goya_parse_cb_mmu(struct hl_device *hdev, * The check that parser->user_cb_size <= parser->user_cb->size was done * in validate_queue_index(). */ - memcpy((void *) (uintptr_t) parser->patched_cb->kernel_address, - (void *) (uintptr_t) parser->user_cb->kernel_address, + memcpy(parser->patched_cb->kernel_address, + parser->user_cb->kernel_address, parser->user_cb_size); patched_cb_size = parser->patched_cb_size; @@ -3974,15 +3971,14 @@ int goya_cs_parser(struct hl_device *hdev, struct hl_cs_parser *parser) return goya_parse_cb_no_mmu(hdev, parser); } -void goya_add_end_of_cb_packets(struct hl_device *hdev, u64 kernel_address, +void goya_add_end_of_cb_packets(struct hl_device *hdev, void *kernel_address, u32 len, u64 cq_addr, u32 cq_val, u32 msix_vec, bool eb) { struct packet_msg_prot *cq_pkt; u32 tmp; - cq_pkt = (struct packet_msg_prot *) (uintptr_t) - (kernel_address + len - (sizeof(struct packet_msg_prot) * 2)); + cq_pkt = kernel_address + len - (sizeof(struct packet_msg_prot) * 2); tmp = (PACKET_MSG_PROT << GOYA_PKT_CTL_OPCODE_SHIFT) | (1 << GOYA_PKT_CTL_EB_SHIFT) | @@ -4746,7 +4742,7 @@ static int goya_memset_device_memory(struct hl_device *hdev, u64 addr, u64 size, if (!cb) return -ENOMEM; - lin_dma_pkt = (struct packet_lin_dma *) (uintptr_t) cb->kernel_address; + lin_dma_pkt = cb->kernel_address; do { memset(lin_dma_pkt, 0, sizeof(*lin_dma_pkt)); diff --git a/drivers/misc/habanalabs/goya/goyaP.h b/drivers/misc/habanalabs/goya/goyaP.h index 09b4006d4dc3..def86c75e035 100644 --- a/drivers/misc/habanalabs/goya/goyaP.h +++ b/drivers/misc/habanalabs/goya/goyaP.h @@ -217,7 +217,7 @@ int goya_resume(struct hl_device *hdev); void goya_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entry); void *goya_get_events_stat(struct hl_device *hdev, bool aggregate, u32 *size); -void goya_add_end_of_cb_packets(struct hl_device *hdev, u64 kernel_address, +void goya_add_end_of_cb_packets(struct hl_device *hdev, void *kernel_address, u32 len, u64 cq_addr, u32 cq_val, u32 msix_vec, bool eb); int goya_cs_parser(struct hl_device *hdev, struct hl_cs_parser *parser); diff --git a/drivers/misc/habanalabs/include/gaudi/gaudi_masks.h b/drivers/misc/habanalabs/include/gaudi/gaudi_masks.h index f395721060bd..46aed13f16b1 100644 --- a/drivers/misc/habanalabs/include/gaudi/gaudi_masks.h +++ b/drivers/misc/habanalabs/include/gaudi/gaudi_masks.h @@ -421,7 +421,6 @@ enum axi_id { #define QM_ARB_ERR_MSG_EN_MASK (\ QM_ARB_ERR_MSG_EN_CHOISE_OVF_MASK |\ - QM_ARB_ERR_MSG_EN_CHOISE_WDT_MASK |\ QM_ARB_ERR_MSG_EN_AXI_LBW_ERR_MASK) #define PCIE_AUX_FLR_CTRL_HW_CTRL_MASK 0x1 diff --git a/drivers/misc/mei/client.h b/drivers/misc/mei/client.h index 64143d4ec758..9e08a9843bba 100644 --- a/drivers/misc/mei/client.h +++ b/drivers/misc/mei/client.h @@ -182,11 +182,11 @@ static inline u8 mei_cl_me_id(const struct mei_cl *cl) * * @cl: host client * - * Return: mtu + * Return: mtu or 0 if client is not connected */ static inline size_t mei_cl_mtu(const struct mei_cl *cl) { - return cl->me_cl->props.max_msg_length; + return cl->me_cl ? cl->me_cl->props.max_msg_length : 0; } /** diff --git a/drivers/mmc/host/renesas_sdhi_core.c b/drivers/mmc/host/renesas_sdhi_core.c index 414314151d0a..acb9c81a4e45 100644 --- a/drivers/mmc/host/renesas_sdhi_core.c +++ b/drivers/mmc/host/renesas_sdhi_core.c @@ -572,17 +572,6 @@ static void renesas_sdhi_reset(struct tmio_mmc_host *host) TMIO_MASK_INIT_RCAR2); } -/* - * This is a temporary workaround! This driver used 'hw_reset' wrongly and the - * fix for that showed a regression. So, we mimic the old behaviour until the - * proper solution is found. - */ -static void renesas_sdhi_hw_reset(struct mmc_host *mmc) -{ - struct tmio_mmc_host *host = mmc_priv(mmc); - renesas_sdhi_reset(host); -} - #define SH_MOBILE_SDHI_MIN_TAP_ROW 3 static int renesas_sdhi_select_tuning(struct tmio_mmc_host *host) @@ -1020,8 +1009,6 @@ int renesas_sdhi_probe(struct platform_device *pdev, if (of_data && of_data->scc_offset) { priv->scc_ctl = host->ctl + of_data->scc_offset; host->reset = renesas_sdhi_reset; - host->ops.hw_reset = renesas_sdhi_hw_reset; - host->mmc->caps |= MMC_CAP_HW_RESET; } } @@ -1160,6 +1147,7 @@ int renesas_sdhi_remove(struct platform_device *pdev) tmio_mmc_host_remove(host); renesas_sdhi_clk_disable(host); + tmio_mmc_host_free(host); return 0; } diff --git a/drivers/mmc/host/sdhci-of-esdhc.c b/drivers/mmc/host/sdhci-of-esdhc.c index bb094459196a..ab5ab969f711 100644 --- a/drivers/mmc/host/sdhci-of-esdhc.c +++ b/drivers/mmc/host/sdhci-of-esdhc.c @@ -1324,6 +1324,8 @@ static struct soc_device_attribute soc_fixup_sdhc_clkdivs[] = { static struct soc_device_attribute soc_unreliable_pulse_detection[] = { { .family = "QorIQ LX2160A", .revision = "1.0", }, + { .family = "QorIQ LX2160A", .revision = "2.0", }, + { .family = "QorIQ LS1028A", .revision = "1.0", }, { }, }; diff --git a/drivers/mmc/host/tmio_mmc_core.c b/drivers/mmc/host/tmio_mmc_core.c index 2fce0518632d..cb4149fd12e0 100644 --- a/drivers/mmc/host/tmio_mmc_core.c +++ b/drivers/mmc/host/tmio_mmc_core.c @@ -175,6 +175,8 @@ static void tmio_mmc_reset(struct tmio_mmc_host *host) if (host->reset) host->reset(host); + tmio_mmc_abort_dma(host); + if (host->pdata->flags & TMIO_MMC_SDIO_IRQ) { sd_ctrl_write16(host, CTL_SDIO_IRQ_MASK, host->sdio_irq_mask); sd_ctrl_write16(host, CTL_TRANSACTION_CTL, 0x0001); @@ -223,8 +225,6 @@ static void tmio_mmc_reset_work(struct work_struct *work) /* Ready for new calls */ host->mrq = NULL; - - tmio_mmc_abort_dma(host); mmc_request_done(host->mmc, mrq); } @@ -927,6 +927,9 @@ static void tmio_mmc_set_ios(struct mmc_host *mmc, struct mmc_ios *ios) switch (ios->power_mode) { case MMC_POWER_OFF: tmio_mmc_power_off(host); + /* Downgrade ensures a sane state for tuning HW (e.g. SCC) */ + if (host->mmc->ops->hs400_downgrade) + host->mmc->ops->hs400_downgrade(host->mmc); host->set_clock(host, 0); break; case MMC_POWER_UP: diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 40ca71b29bb9..9b01afcb7777 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -2060,8 +2060,6 @@ static void nvme_update_disk_info(struct gendisk *disk, if (id->nsattr & NVME_NS_ATTR_RO) set_disk_ro(disk, true); - else - set_disk_ro(disk, false); } static inline bool nvme_first_scan(struct gendisk *disk) diff --git a/drivers/of/address.c b/drivers/of/address.c index eb9ab4f1e80b..1c3257a2d4e3 100644 --- a/drivers/of/address.c +++ b/drivers/of/address.c @@ -1034,11 +1034,13 @@ out: */ bool of_dma_is_coherent(struct device_node *np) { - struct device_node *node = of_node_get(np); + struct device_node *node; if (IS_ENABLED(CONFIG_OF_DMA_DEFAULT_COHERENT)) return true; + node = of_node_get(np); + while (node) { if (of_property_read_bool(node, "dma-coherent")) { of_node_put(node); diff --git a/drivers/pinctrl/aspeed/pinctrl-aspeed.c b/drivers/pinctrl/aspeed/pinctrl-aspeed.c index 6a94eaecf638..d6b849552a1e 100644 --- a/drivers/pinctrl/aspeed/pinctrl-aspeed.c +++ b/drivers/pinctrl/aspeed/pinctrl-aspeed.c @@ -286,13 +286,14 @@ int aspeed_pinmux_set_mux(struct pinctrl_dev *pctldev, unsigned int function, static bool aspeed_expr_is_gpio(const struct aspeed_sig_expr *expr) { /* - * The signal type is GPIO if the signal name has "GPIO" as a prefix. + * The signal type is GPIO if the signal name has "GPI" as a prefix. * strncmp (rather than strcmp) is used to implement the prefix * requirement. * - * expr->signal might look like "GPIOT3" in the GPIO case. + * expr->signal might look like "GPIOB1" in the GPIO case. + * expr->signal might look like "GPIT0" in the GPI case. */ - return strncmp(expr->signal, "GPIO", 4) == 0; + return strncmp(expr->signal, "GPI", 3) == 0; } static bool aspeed_gpio_in_exprs(const struct aspeed_sig_expr **exprs) diff --git a/drivers/pinctrl/intel/pinctrl-intel.c b/drivers/pinctrl/intel/pinctrl-intel.c index 154ce3f908cd..1c10ab184783 100644 --- a/drivers/pinctrl/intel/pinctrl-intel.c +++ b/drivers/pinctrl/intel/pinctrl-intel.c @@ -62,10 +62,10 @@ #define PADCFG1_TERM_UP BIT(13) #define PADCFG1_TERM_SHIFT 10 #define PADCFG1_TERM_MASK GENMASK(12, 10) -#define PADCFG1_TERM_20K 4 -#define PADCFG1_TERM_2K 3 -#define PADCFG1_TERM_5K 2 -#define PADCFG1_TERM_1K 1 +#define PADCFG1_TERM_20K BIT(2) +#define PADCFG1_TERM_5K BIT(1) +#define PADCFG1_TERM_1K BIT(0) +#define PADCFG1_TERM_833 (BIT(1) | BIT(0)) #define PADCFG2 0x008 #define PADCFG2_DEBEN BIT(0) @@ -549,12 +549,12 @@ static int intel_config_get_pull(struct intel_pinctrl *pctrl, unsigned int pin, return -EINVAL; switch (term) { + case PADCFG1_TERM_833: + *arg = 833; + break; case PADCFG1_TERM_1K: *arg = 1000; break; - case PADCFG1_TERM_2K: - *arg = 2000; - break; case PADCFG1_TERM_5K: *arg = 5000; break; @@ -570,6 +570,11 @@ static int intel_config_get_pull(struct intel_pinctrl *pctrl, unsigned int pin, return -EINVAL; switch (term) { + case PADCFG1_TERM_833: + if (!(community->features & PINCTRL_FEATURE_1K_PD)) + return -EINVAL; + *arg = 833; + break; case PADCFG1_TERM_1K: if (!(community->features & PINCTRL_FEATURE_1K_PD)) return -EINVAL; @@ -678,6 +683,10 @@ static int intel_config_set_pull(struct intel_pinctrl *pctrl, unsigned int pin, value |= PADCFG1_TERM_UP; + /* Set default strength value in case none is given */ + if (arg == 1) + arg = 5000; + switch (arg) { case 20000: value |= PADCFG1_TERM_20K << PADCFG1_TERM_SHIFT; @@ -685,12 +694,12 @@ static int intel_config_set_pull(struct intel_pinctrl *pctrl, unsigned int pin, case 5000: value |= PADCFG1_TERM_5K << PADCFG1_TERM_SHIFT; break; - case 2000: - value |= PADCFG1_TERM_2K << PADCFG1_TERM_SHIFT; - break; case 1000: value |= PADCFG1_TERM_1K << PADCFG1_TERM_SHIFT; break; + case 833: + value |= PADCFG1_TERM_833 << PADCFG1_TERM_SHIFT; + break; default: ret = -EINVAL; } @@ -700,6 +709,10 @@ static int intel_config_set_pull(struct intel_pinctrl *pctrl, unsigned int pin, case PIN_CONFIG_BIAS_PULL_DOWN: value &= ~(PADCFG1_TERM_UP | PADCFG1_TERM_MASK); + /* Set default strength value in case none is given */ + if (arg == 1) + arg = 5000; + switch (arg) { case 20000: value |= PADCFG1_TERM_20K << PADCFG1_TERM_SHIFT; @@ -714,6 +727,13 @@ static int intel_config_set_pull(struct intel_pinctrl *pctrl, unsigned int pin, } value |= PADCFG1_TERM_1K << PADCFG1_TERM_SHIFT; break; + case 833: + if (!(community->features & PINCTRL_FEATURE_1K_PD)) { + ret = -EINVAL; + break; + } + value |= PADCFG1_TERM_833 << PADCFG1_TERM_SHIFT; + break; default: ret = -EINVAL; } diff --git a/drivers/pinctrl/pinctrl-amd.c b/drivers/pinctrl/pinctrl-amd.c index 9a760f5cd7ed..4aea3e05e8c6 100644 --- a/drivers/pinctrl/pinctrl-amd.c +++ b/drivers/pinctrl/pinctrl-amd.c @@ -156,7 +156,7 @@ static int amd_gpio_set_debounce(struct gpio_chip *gc, unsigned offset, pin_reg |= BIT(DB_TMR_OUT_UNIT_OFF); pin_reg &= ~BIT(DB_TMR_LARGE_OFF); } else if (debounce < 250000) { - time = debounce / 15600; + time = debounce / 15625; pin_reg |= time & DB_TMR_OUT_MASK; pin_reg &= ~BIT(DB_TMR_OUT_UNIT_OFF); pin_reg |= BIT(DB_TMR_LARGE_OFF); @@ -166,14 +166,14 @@ static int amd_gpio_set_debounce(struct gpio_chip *gc, unsigned offset, pin_reg |= BIT(DB_TMR_OUT_UNIT_OFF); pin_reg |= BIT(DB_TMR_LARGE_OFF); } else { - pin_reg &= ~DB_CNTRl_MASK; + pin_reg &= ~(DB_CNTRl_MASK << DB_CNTRL_OFF); ret = -EINVAL; } } else { pin_reg &= ~BIT(DB_TMR_OUT_UNIT_OFF); pin_reg &= ~BIT(DB_TMR_LARGE_OFF); pin_reg &= ~DB_TMR_OUT_MASK; - pin_reg &= ~DB_CNTRl_MASK; + pin_reg &= ~(DB_CNTRl_MASK << DB_CNTRL_OFF); } writel(pin_reg, gpio_dev->base + offset * 4); raw_spin_unlock_irqrestore(&gpio_dev->lock, flags); diff --git a/drivers/pinctrl/pinctrl-ingenic.c b/drivers/pinctrl/pinctrl-ingenic.c index c8e50a58a5e5..621909b01deb 100644 --- a/drivers/pinctrl/pinctrl-ingenic.c +++ b/drivers/pinctrl/pinctrl-ingenic.c @@ -635,44 +635,44 @@ static int jz4770_uart3_data_pins[] = { 0x6c, 0x85, }; static int jz4770_uart3_hwflow_pins[] = { 0x88, 0x89, }; static int jz4770_ssi0_dt_a_pins[] = { 0x15, }; static int jz4770_ssi0_dt_b_pins[] = { 0x35, }; -static int jz4770_ssi0_dt_d_pins[] = { 0x55, }; -static int jz4770_ssi0_dt_e_pins[] = { 0x71, }; +static int jz4770_ssi0_dt_d_pins[] = { 0x75, }; +static int jz4770_ssi0_dt_e_pins[] = { 0x91, }; static int jz4770_ssi0_dr_a_pins[] = { 0x14, }; static int jz4770_ssi0_dr_b_pins[] = { 0x34, }; -static int jz4770_ssi0_dr_d_pins[] = { 0x54, }; -static int jz4770_ssi0_dr_e_pins[] = { 0x6e, }; +static int jz4770_ssi0_dr_d_pins[] = { 0x74, }; +static int jz4770_ssi0_dr_e_pins[] = { 0x8e, }; static int jz4770_ssi0_clk_a_pins[] = { 0x12, }; static int jz4770_ssi0_clk_b_pins[] = { 0x3c, }; -static int jz4770_ssi0_clk_d_pins[] = { 0x58, }; -static int jz4770_ssi0_clk_e_pins[] = { 0x6f, }; +static int jz4770_ssi0_clk_d_pins[] = { 0x78, }; +static int jz4770_ssi0_clk_e_pins[] = { 0x8f, }; static int jz4770_ssi0_gpc_b_pins[] = { 0x3e, }; -static int jz4770_ssi0_gpc_d_pins[] = { 0x56, }; -static int jz4770_ssi0_gpc_e_pins[] = { 0x73, }; +static int jz4770_ssi0_gpc_d_pins[] = { 0x76, }; +static int jz4770_ssi0_gpc_e_pins[] = { 0x93, }; static int jz4770_ssi0_ce0_a_pins[] = { 0x13, }; static int jz4770_ssi0_ce0_b_pins[] = { 0x3d, }; -static int jz4770_ssi0_ce0_d_pins[] = { 0x59, }; -static int jz4770_ssi0_ce0_e_pins[] = { 0x70, }; +static int jz4770_ssi0_ce0_d_pins[] = { 0x79, }; +static int jz4770_ssi0_ce0_e_pins[] = { 0x90, }; static int jz4770_ssi0_ce1_b_pins[] = { 0x3f, }; -static int jz4770_ssi0_ce1_d_pins[] = { 0x57, }; -static int jz4770_ssi0_ce1_e_pins[] = { 0x72, }; +static int jz4770_ssi0_ce1_d_pins[] = { 0x77, }; +static int jz4770_ssi0_ce1_e_pins[] = { 0x92, }; static int jz4770_ssi1_dt_b_pins[] = { 0x35, }; -static int jz4770_ssi1_dt_d_pins[] = { 0x55, }; -static int jz4770_ssi1_dt_e_pins[] = { 0x71, }; +static int jz4770_ssi1_dt_d_pins[] = { 0x75, }; +static int jz4770_ssi1_dt_e_pins[] = { 0x91, }; static int jz4770_ssi1_dr_b_pins[] = { 0x34, }; -static int jz4770_ssi1_dr_d_pins[] = { 0x54, }; -static int jz4770_ssi1_dr_e_pins[] = { 0x6e, }; +static int jz4770_ssi1_dr_d_pins[] = { 0x74, }; +static int jz4770_ssi1_dr_e_pins[] = { 0x8e, }; static int jz4770_ssi1_clk_b_pins[] = { 0x3c, }; -static int jz4770_ssi1_clk_d_pins[] = { 0x58, }; -static int jz4770_ssi1_clk_e_pins[] = { 0x6f, }; +static int jz4770_ssi1_clk_d_pins[] = { 0x78, }; +static int jz4770_ssi1_clk_e_pins[] = { 0x8f, }; static int jz4770_ssi1_gpc_b_pins[] = { 0x3e, }; -static int jz4770_ssi1_gpc_d_pins[] = { 0x56, }; -static int jz4770_ssi1_gpc_e_pins[] = { 0x73, }; +static int jz4770_ssi1_gpc_d_pins[] = { 0x76, }; +static int jz4770_ssi1_gpc_e_pins[] = { 0x93, }; static int jz4770_ssi1_ce0_b_pins[] = { 0x3d, }; -static int jz4770_ssi1_ce0_d_pins[] = { 0x59, }; -static int jz4770_ssi1_ce0_e_pins[] = { 0x70, }; +static int jz4770_ssi1_ce0_d_pins[] = { 0x79, }; +static int jz4770_ssi1_ce0_e_pins[] = { 0x90, }; static int jz4770_ssi1_ce1_b_pins[] = { 0x3f, }; -static int jz4770_ssi1_ce1_d_pins[] = { 0x57, }; -static int jz4770_ssi1_ce1_e_pins[] = { 0x72, }; +static int jz4770_ssi1_ce1_d_pins[] = { 0x77, }; +static int jz4770_ssi1_ce1_e_pins[] = { 0x92, }; static int jz4770_mmc0_1bit_a_pins[] = { 0x12, 0x13, 0x14, }; static int jz4770_mmc0_4bit_a_pins[] = { 0x15, 0x16, 0x17, }; static int jz4770_mmc0_1bit_e_pins[] = { 0x9c, 0x9d, 0x94, }; @@ -1050,35 +1050,35 @@ static int jz4780_ssi0_dt_a_19_pins[] = { 0x13, }; static int jz4780_ssi0_dt_a_21_pins[] = { 0x15, }; static int jz4780_ssi0_dt_a_28_pins[] = { 0x1c, }; static int jz4780_ssi0_dt_b_pins[] = { 0x3d, }; -static int jz4780_ssi0_dt_d_pins[] = { 0x59, }; +static int jz4780_ssi0_dt_d_pins[] = { 0x79, }; static int jz4780_ssi0_dr_a_20_pins[] = { 0x14, }; static int jz4780_ssi0_dr_a_27_pins[] = { 0x1b, }; static int jz4780_ssi0_dr_b_pins[] = { 0x34, }; -static int jz4780_ssi0_dr_d_pins[] = { 0x54, }; +static int jz4780_ssi0_dr_d_pins[] = { 0x74, }; static int jz4780_ssi0_clk_a_pins[] = { 0x12, }; static int jz4780_ssi0_clk_b_5_pins[] = { 0x25, }; static int jz4780_ssi0_clk_b_28_pins[] = { 0x3c, }; -static int jz4780_ssi0_clk_d_pins[] = { 0x58, }; +static int jz4780_ssi0_clk_d_pins[] = { 0x78, }; static int jz4780_ssi0_gpc_b_pins[] = { 0x3e, }; -static int jz4780_ssi0_gpc_d_pins[] = { 0x56, }; +static int jz4780_ssi0_gpc_d_pins[] = { 0x76, }; static int jz4780_ssi0_ce0_a_23_pins[] = { 0x17, }; static int jz4780_ssi0_ce0_a_25_pins[] = { 0x19, }; static int jz4780_ssi0_ce0_b_pins[] = { 0x3f, }; -static int jz4780_ssi0_ce0_d_pins[] = { 0x57, }; +static int jz4780_ssi0_ce0_d_pins[] = { 0x77, }; static int jz4780_ssi0_ce1_b_pins[] = { 0x35, }; -static int jz4780_ssi0_ce1_d_pins[] = { 0x55, }; +static int jz4780_ssi0_ce1_d_pins[] = { 0x75, }; static int jz4780_ssi1_dt_b_pins[] = { 0x3d, }; -static int jz4780_ssi1_dt_d_pins[] = { 0x59, }; +static int jz4780_ssi1_dt_d_pins[] = { 0x79, }; static int jz4780_ssi1_dr_b_pins[] = { 0x34, }; -static int jz4780_ssi1_dr_d_pins[] = { 0x54, }; +static int jz4780_ssi1_dr_d_pins[] = { 0x74, }; static int jz4780_ssi1_clk_b_pins[] = { 0x3c, }; -static int jz4780_ssi1_clk_d_pins[] = { 0x58, }; +static int jz4780_ssi1_clk_d_pins[] = { 0x78, }; static int jz4780_ssi1_gpc_b_pins[] = { 0x3e, }; -static int jz4780_ssi1_gpc_d_pins[] = { 0x56, }; +static int jz4780_ssi1_gpc_d_pins[] = { 0x76, }; static int jz4780_ssi1_ce0_b_pins[] = { 0x3f, }; -static int jz4780_ssi1_ce0_d_pins[] = { 0x57, }; +static int jz4780_ssi1_ce0_d_pins[] = { 0x77, }; static int jz4780_ssi1_ce1_b_pins[] = { 0x35, }; -static int jz4780_ssi1_ce1_d_pins[] = { 0x55, }; +static int jz4780_ssi1_ce1_d_pins[] = { 0x75, }; static int jz4780_mmc0_8bit_a_pins[] = { 0x04, 0x05, 0x06, 0x07, 0x18, }; static int jz4780_i2c3_pins[] = { 0x6a, 0x6b, }; static int jz4780_i2c4_e_pins[] = { 0x8c, 0x8d, }; diff --git a/drivers/pinctrl/pinctrl-mcp23s08_spi.c b/drivers/pinctrl/pinctrl-mcp23s08_spi.c index 1f47a661b0a7..9ae10318f6f3 100644 --- a/drivers/pinctrl/pinctrl-mcp23s08_spi.c +++ b/drivers/pinctrl/pinctrl-mcp23s08_spi.c @@ -119,13 +119,15 @@ static int mcp23s08_spi_regmap_init(struct mcp23s08 *mcp, struct device *dev, return -EINVAL; } - copy = devm_kmemdup(dev, &config, sizeof(config), GFP_KERNEL); + copy = devm_kmemdup(dev, config, sizeof(*config), GFP_KERNEL); if (!copy) return -ENOMEM; copy->name = name; mcp->regmap = devm_regmap_init(dev, &mcp23sxx_spi_regmap, mcp, copy); + if (IS_ERR(mcp->regmap)) + dev_err(dev, "regmap init failed for %s\n", mcp->chip.label); return PTR_ERR_OR_ZERO(mcp->regmap); } diff --git a/drivers/pinctrl/pinctrl-rockchip.c b/drivers/pinctrl/pinctrl-rockchip.c index 0401c1da79dd..aa1a1c850d05 100644 --- a/drivers/pinctrl/pinctrl-rockchip.c +++ b/drivers/pinctrl/pinctrl-rockchip.c @@ -3155,7 +3155,9 @@ static int rockchip_gpio_to_irq(struct gpio_chip *gc, unsigned offset) if (!bank->domain) return -ENXIO; + clk_enable(bank->clk); virq = irq_create_mapping(bank->domain, offset); + clk_disable(bank->clk); return (virq) ? : -ENXIO; } @@ -3194,7 +3196,7 @@ static void rockchip_irq_demux(struct irq_desc *desc) irq = __ffs(pend); pend &= ~BIT(irq); - virq = irq_linear_revmap(bank->domain, irq); + virq = irq_find_mapping(bank->domain, irq); if (!virq) { dev_err(bank->drvdata->dev, "unmapped irq %d\n", irq); @@ -3373,7 +3375,7 @@ static int rockchip_interrupts_register(struct platform_device *pdev, unsigned int clr = IRQ_NOREQUEST | IRQ_NOPROBE | IRQ_NOAUTOEN; struct irq_chip_generic *gc; int ret; - int i, j; + int i; for (i = 0; i < ctrl->nr_banks; ++i, ++bank) { if (!bank->valid) { @@ -3400,7 +3402,7 @@ static int rockchip_interrupts_register(struct platform_device *pdev, ret = irq_alloc_domain_generic_chips(bank->domain, 32, 1, "rockchip_gpio_irq", handle_level_irq, - clr, 0, IRQ_GC_INIT_MASK_CACHE); + clr, 0, 0); if (ret) { dev_err(&pdev->dev, "could not alloc generic chips for bank %s\n", bank->name); @@ -3409,14 +3411,6 @@ static int rockchip_interrupts_register(struct platform_device *pdev, continue; } - /* - * Linux assumes that all interrupts start out disabled/masked. - * Our driver only uses the concept of masked and always keeps - * things enabled, so for us that's all masked and all enabled. - */ - writel_relaxed(0xffffffff, bank->reg_base + GPIO_INTMASK); - writel_relaxed(0xffffffff, bank->reg_base + GPIO_INTEN); - gc = irq_get_domain_generic_chip(bank->domain, 0); gc->reg_base = bank->reg_base; gc->private = bank; @@ -3433,13 +3427,17 @@ static int rockchip_interrupts_register(struct platform_device *pdev, gc->chip_types[0].chip.irq_set_type = rockchip_irq_set_type; gc->wake_enabled = IRQ_MSK(bank->nr_pins); + /* + * Linux assumes that all interrupts start out disabled/masked. + * Our driver only uses the concept of masked and always keeps + * things enabled, so for us that's all masked and all enabled. + */ + writel_relaxed(0xffffffff, bank->reg_base + GPIO_INTMASK); + writel_relaxed(0xffffffff, bank->reg_base + GPIO_INTEN); + gc->mask_cache = 0xffffffff; + irq_set_chained_handler_and_data(bank->irq, rockchip_irq_demux, bank); - - /* map the gpio irqs here, when the clock is still running */ - for (j = 0 ; j < 32 ; j++) - irq_create_mapping(bank->domain, j); - clk_disable(bank->clk); } diff --git a/drivers/pinctrl/qcom/pinctrl-msm.c b/drivers/pinctrl/qcom/pinctrl-msm.c index c4bcda90aac4..77a25bdf0da7 100644 --- a/drivers/pinctrl/qcom/pinctrl-msm.c +++ b/drivers/pinctrl/qcom/pinctrl-msm.c @@ -815,21 +815,14 @@ static void msm_gpio_irq_clear_unmask(struct irq_data *d, bool status_clear) static void msm_gpio_irq_enable(struct irq_data *d) { - /* - * Clear the interrupt that may be pending before we enable - * the line. - * This is especially a problem with the GPIOs routed to the - * PDC. These GPIOs are direct-connect interrupts to the GIC. - * Disabling the interrupt line at the PDC does not prevent - * the interrupt from being latched at the GIC. The state at - * GIC needs to be cleared before enabling. - */ - if (d->parent_data) { - irq_chip_set_parent_state(d, IRQCHIP_STATE_PENDING, 0); + struct gpio_chip *gc = irq_data_get_irq_chip_data(d); + struct msm_pinctrl *pctrl = gpiochip_get_data(gc); + + if (d->parent_data) irq_chip_enable_parent(d); - } - msm_gpio_irq_clear_unmask(d, true); + if (!test_bit(d->hwirq, pctrl->skip_wake_irqs)) + msm_gpio_irq_clear_unmask(d, true); } static void msm_gpio_irq_disable(struct irq_data *d) @@ -1104,6 +1097,19 @@ static int msm_gpio_irq_reqres(struct irq_data *d) ret = -EINVAL; goto out; } + + /* + * Clear the interrupt that may be pending before we enable + * the line. + * This is especially a problem with the GPIOs routed to the + * PDC. These GPIOs are direct-connect interrupts to the GIC. + * Disabling the interrupt line at the PDC does not prevent + * the interrupt from being latched at the GIC. The state at + * GIC needs to be cleared before enabling. + */ + if (d->parent_data && test_bit(d->hwirq, pctrl->skip_wake_irqs)) + irq_chip_set_parent_state(d, IRQCHIP_STATE_PENDING, 0); + return 0; out: module_put(gc->owner); diff --git a/drivers/pinctrl/qcom/pinctrl-sm8250.c b/drivers/pinctrl/qcom/pinctrl-sm8250.c index 826df0d637ea..af144e724bd9 100644 --- a/drivers/pinctrl/qcom/pinctrl-sm8250.c +++ b/drivers/pinctrl/qcom/pinctrl-sm8250.c @@ -1313,6 +1313,22 @@ static const struct msm_pingroup sm8250_groups[] = { [183] = SDC_PINGROUP(sdc2_data, 0xb7000, 9, 0), }; +static const struct msm_gpio_wakeirq_map sm8250_pdc_map[] = { + { 0, 79 }, { 1, 84 }, { 2, 80 }, { 3, 82 }, { 4, 107 }, { 7, 43 }, + { 11, 42 }, { 14, 44 }, { 15, 52 }, { 19, 67 }, { 23, 68 }, { 24, 105 }, + { 27, 92 }, { 28, 106 }, { 31, 69 }, { 35, 70 }, { 39, 37 }, + { 40, 108 }, { 43, 71 }, { 45, 72 }, { 47, 83 }, { 51, 74 }, { 55, 77 }, + { 59, 78 }, { 63, 75 }, { 64, 81 }, { 65, 87 }, { 66, 88 }, { 67, 89 }, + { 68, 54 }, { 70, 85 }, { 77, 46 }, { 80, 90 }, { 81, 91 }, { 83, 97 }, + { 84, 98 }, { 86, 99 }, { 87, 100 }, { 88, 101 }, { 89, 102 }, + { 92, 103 }, { 93, 104 }, { 100, 53 }, { 103, 47 }, { 104, 48 }, + { 108, 49 }, { 109, 94 }, { 110, 95 }, { 111, 96 }, { 112, 55 }, + { 113, 56 }, { 118, 50 }, { 121, 51 }, { 122, 57 }, { 123, 58 }, + { 124, 45 }, { 126, 59 }, { 128, 76 }, { 129, 86 }, { 132, 93 }, + { 133, 65 }, { 134, 66 }, { 136, 62 }, { 137, 63 }, { 138, 64 }, + { 142, 60 }, { 143, 61 } +}; + static const struct msm_pinctrl_soc_data sm8250_pinctrl = { .pins = sm8250_pins, .npins = ARRAY_SIZE(sm8250_pins), @@ -1323,6 +1339,8 @@ static const struct msm_pinctrl_soc_data sm8250_pinctrl = { .ngpios = 181, .tiles = sm8250_tiles, .ntiles = ARRAY_SIZE(sm8250_tiles), + .wakeirq_map = sm8250_pdc_map, + .nwakeirq_map = ARRAY_SIZE(sm8250_pdc_map), }; static int sm8250_pinctrl_probe(struct platform_device *pdev) diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c index b8f573a02713..7a160b86adc6 100644 --- a/drivers/scsi/ufs/ufshcd.c +++ b/drivers/scsi/ufs/ufshcd.c @@ -1627,12 +1627,12 @@ start: */ fallthrough; case CLKS_OFF: - ufshcd_scsi_block_requests(hba); hba->clk_gating.state = REQ_CLKS_ON; trace_ufshcd_clk_gating(dev_name(hba->dev), hba->clk_gating.state); - queue_work(hba->clk_gating.clk_gating_workq, - &hba->clk_gating.ungate_work); + if (queue_work(hba->clk_gating.clk_gating_workq, + &hba->clk_gating.ungate_work)) + ufshcd_scsi_block_requests(hba); /* * fall through to check if we should wait for this * work to be done or not. @@ -2115,10 +2115,20 @@ ufshcd_wait_for_uic_cmd(struct ufs_hba *hba, struct uic_command *uic_cmd) unsigned long flags; if (wait_for_completion_timeout(&uic_cmd->done, - msecs_to_jiffies(UIC_CMD_TIMEOUT))) + msecs_to_jiffies(UIC_CMD_TIMEOUT))) { ret = uic_cmd->argument2 & MASK_UIC_COMMAND_RESULT; - else + } else { ret = -ETIMEDOUT; + dev_err(hba->dev, + "uic cmd 0x%x with arg3 0x%x completion timeout\n", + uic_cmd->command, uic_cmd->argument3); + + if (!uic_cmd->cmd_active) { + dev_err(hba->dev, "%s: UIC cmd has been completed, return the result\n", + __func__); + ret = uic_cmd->argument2 & MASK_UIC_COMMAND_RESULT; + } + } spin_lock_irqsave(hba->host->host_lock, flags); hba->active_uic_cmd = NULL; @@ -2150,6 +2160,7 @@ __ufshcd_send_uic_cmd(struct ufs_hba *hba, struct uic_command *uic_cmd, if (completion) init_completion(&uic_cmd->done); + uic_cmd->cmd_active = 1; ufshcd_dispatch_uic_cmd(hba, uic_cmd); return 0; @@ -3807,10 +3818,18 @@ static int ufshcd_uic_pwr_ctrl(struct ufs_hba *hba, struct uic_command *cmd) dev_err(hba->dev, "pwr ctrl cmd 0x%x with mode 0x%x completion timeout\n", cmd->command, cmd->argument3); + + if (!cmd->cmd_active) { + dev_err(hba->dev, "%s: Power Mode Change operation has been completed, go check UPMCRS\n", + __func__); + goto check_upmcrs; + } + ret = -ETIMEDOUT; goto out; } +check_upmcrs: status = ufshcd_get_upmcrs(hba); if (status != PWR_LOCAL) { dev_err(hba->dev, @@ -4902,11 +4921,14 @@ static irqreturn_t ufshcd_uic_cmd_compl(struct ufs_hba *hba, u32 intr_status) ufshcd_get_uic_cmd_result(hba); hba->active_uic_cmd->argument3 = ufshcd_get_dme_attr_val(hba); + if (!hba->uic_async_done) + hba->active_uic_cmd->cmd_active = 0; complete(&hba->active_uic_cmd->done); retval = IRQ_HANDLED; } if ((intr_status & UFSHCD_UIC_PWR_MASK) && hba->uic_async_done) { + hba->active_uic_cmd->cmd_active = 0; complete(hba->uic_async_done); retval = IRQ_HANDLED; } @@ -8906,6 +8928,7 @@ void ufshcd_remove(struct ufs_hba *hba) blk_mq_free_tag_set(&hba->tmf_tag_set); blk_cleanup_queue(hba->cmd_queue); scsi_remove_host(hba->host); + destroy_workqueue(hba->eh_wq); /* disable interrupts */ ufshcd_disable_intr(hba, hba->intr_mask); ufshcd_hba_stop(hba); @@ -9206,6 +9229,7 @@ out_remove_scsi_host: exit_gating: ufshcd_exit_clk_scaling(hba); ufshcd_exit_clk_gating(hba); + destroy_workqueue(hba->eh_wq); out_disable: hba->is_irq_enabled = false; ufshcd_hba_exit(hba); diff --git a/drivers/scsi/ufs/ufshcd.h b/drivers/scsi/ufs/ufshcd.h index 47eb1430274c..e0f00a42371c 100644 --- a/drivers/scsi/ufs/ufshcd.h +++ b/drivers/scsi/ufs/ufshcd.h @@ -64,6 +64,7 @@ enum dev_cmd_type { * @argument1: UIC command argument 1 * @argument2: UIC command argument 2 * @argument3: UIC command argument 3 + * @cmd_active: Indicate if UIC command is outstanding * @done: UIC command completion */ struct uic_command { @@ -71,6 +72,7 @@ struct uic_command { u32 argument1; u32 argument2; u32 argument3; + int cmd_active; struct completion done; }; diff --git a/drivers/thunderbolt/debugfs.c b/drivers/thunderbolt/debugfs.c index 3680b2784ea1..ed65d2b13964 100644 --- a/drivers/thunderbolt/debugfs.c +++ b/drivers/thunderbolt/debugfs.c @@ -9,6 +9,7 @@ #include <linux/debugfs.h> #include <linux/pm_runtime.h> +#include <linux/uaccess.h> #include "tb.h" diff --git a/drivers/thunderbolt/icm.c b/drivers/thunderbolt/icm.c index b51fc3f62b1f..977ba91f4d0e 100644 --- a/drivers/thunderbolt/icm.c +++ b/drivers/thunderbolt/icm.c @@ -2284,6 +2284,8 @@ struct tb *icm_probe(struct tb_nhi *nhi) case PCI_DEVICE_ID_INTEL_TGL_NHI0: case PCI_DEVICE_ID_INTEL_TGL_NHI1: + case PCI_DEVICE_ID_INTEL_TGL_H_NHI0: + case PCI_DEVICE_ID_INTEL_TGL_H_NHI1: icm->is_supported = icm_tgl_is_supported; icm->driver_ready = icm_icl_driver_ready; icm->set_uuid = icm_icl_set_uuid; diff --git a/drivers/thunderbolt/nhi.c b/drivers/thunderbolt/nhi.c index 3f79baa54829..db80dc5dfeba 100644 --- a/drivers/thunderbolt/nhi.c +++ b/drivers/thunderbolt/nhi.c @@ -406,12 +406,23 @@ static int ring_request_msix(struct tb_ring *ring, bool no_suspend) ring->vector = ret; - ring->irq = pci_irq_vector(ring->nhi->pdev, ring->vector); - if (ring->irq < 0) - return ring->irq; + ret = pci_irq_vector(ring->nhi->pdev, ring->vector); + if (ret < 0) + goto err_ida_remove; + + ring->irq = ret; irqflags = no_suspend ? IRQF_NO_SUSPEND : 0; - return request_irq(ring->irq, ring_msix, irqflags, "thunderbolt", ring); + ret = request_irq(ring->irq, ring_msix, irqflags, "thunderbolt", ring); + if (ret) + goto err_ida_remove; + + return 0; + +err_ida_remove: + ida_simple_remove(&nhi->msix_ida, ring->vector); + + return ret; } static void ring_release_msix(struct tb_ring *ring) @@ -1334,6 +1345,10 @@ static struct pci_device_id nhi_ids[] = { .driver_data = (kernel_ulong_t)&icl_nhi_ops }, { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_TGL_NHI1), .driver_data = (kernel_ulong_t)&icl_nhi_ops }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_TGL_H_NHI0), + .driver_data = (kernel_ulong_t)&icl_nhi_ops }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_TGL_H_NHI1), + .driver_data = (kernel_ulong_t)&icl_nhi_ops }, /* Any USB4 compliant host */ { PCI_DEVICE_CLASS(PCI_CLASS_SERIAL_USB_USB4, ~0) }, diff --git a/drivers/thunderbolt/nhi.h b/drivers/thunderbolt/nhi.h index 80162e4b013f..4e0861d75072 100644 --- a/drivers/thunderbolt/nhi.h +++ b/drivers/thunderbolt/nhi.h @@ -75,6 +75,8 @@ extern const struct tb_nhi_ops icl_nhi_ops; #define PCI_DEVICE_ID_INTEL_ICL_NHI0 0x8a17 #define PCI_DEVICE_ID_INTEL_TGL_NHI0 0x9a1b #define PCI_DEVICE_ID_INTEL_TGL_NHI1 0x9a1d +#define PCI_DEVICE_ID_INTEL_TGL_H_NHI0 0x9a1f +#define PCI_DEVICE_ID_INTEL_TGL_H_NHI1 0x9a21 #define PCI_CLASS_SERIAL_USB_USB4 0x0c0340 diff --git a/drivers/thunderbolt/tb.h b/drivers/thunderbolt/tb.h index a9995e21b916..8ea360b0ff77 100644 --- a/drivers/thunderbolt/tb.h +++ b/drivers/thunderbolt/tb.h @@ -784,6 +784,8 @@ static inline bool tb_switch_is_tiger_lake(const struct tb_switch *sw) switch (sw->config.device_id) { case PCI_DEVICE_ID_INTEL_TGL_NHI0: case PCI_DEVICE_ID_INTEL_TGL_NHI1: + case PCI_DEVICE_ID_INTEL_TGL_H_NHI0: + case PCI_DEVICE_ID_INTEL_TGL_H_NHI1: return true; } } diff --git a/drivers/thunderbolt/usb4.c b/drivers/thunderbolt/usb4.c index 40f13579a3fe..f2583b4053e4 100644 --- a/drivers/thunderbolt/usb4.c +++ b/drivers/thunderbolt/usb4.c @@ -421,8 +421,12 @@ int usb4_switch_set_wake(struct tb_switch *sw, unsigned int flags) * upstream USB4 port. */ tb_switch_for_each_port(sw, port) { + if (!tb_port_is_null(port)) + continue; if (!route && tb_is_upstream_port(port)) continue; + if (!port->cap_usb4) + continue; ret = tb_port_read(port, &val, TB_CFG_PORT, port->cap_usb4 + PORT_CS_19, 1); diff --git a/drivers/thunderbolt/xdomain.c b/drivers/thunderbolt/xdomain.c index 48907853732a..c00ad817042e 100644 --- a/drivers/thunderbolt/xdomain.c +++ b/drivers/thunderbolt/xdomain.c @@ -881,6 +881,7 @@ static void enumerate_services(struct tb_xdomain *xd) id = ida_simple_get(&xd->service_ids, 0, 0, GFP_KERNEL); if (id < 0) { + kfree(svc->key); kfree(svc); break; } diff --git a/drivers/uio/uio.c b/drivers/uio/uio.c index 6dca744e39e9..be06f1a961c2 100644 --- a/drivers/uio/uio.c +++ b/drivers/uio/uio.c @@ -413,10 +413,10 @@ static int uio_get_minor(struct uio_device *idev) return retval; } -static void uio_free_minor(struct uio_device *idev) +static void uio_free_minor(unsigned long minor) { mutex_lock(&minor_lock); - idr_remove(&uio_idr, idev->minor); + idr_remove(&uio_idr, minor); mutex_unlock(&minor_lock); } @@ -990,7 +990,7 @@ err_request_irq: err_uio_dev_add_attributes: device_del(&idev->dev); err_device_create: - uio_free_minor(idev); + uio_free_minor(idev->minor); put_device(&idev->dev); return ret; } @@ -1042,11 +1042,13 @@ EXPORT_SYMBOL_GPL(__devm_uio_register_device); void uio_unregister_device(struct uio_info *info) { struct uio_device *idev; + unsigned long minor; if (!info || !info->uio_dev) return; idev = info->uio_dev; + minor = idev->minor; mutex_lock(&idev->info_lock); uio_dev_del_attributes(idev); @@ -1062,7 +1064,7 @@ void uio_unregister_device(struct uio_info *info) device_unregister(&idev->dev); - uio_free_minor(idev); + uio_free_minor(minor); return; } diff --git a/drivers/usb/class/cdc-acm.c b/drivers/usb/class/cdc-acm.c index 1e7568867910..f52f1bc0559f 100644 --- a/drivers/usb/class/cdc-acm.c +++ b/drivers/usb/class/cdc-acm.c @@ -1693,6 +1693,15 @@ static const struct usb_device_id acm_ids[] = { { USB_DEVICE(0x0870, 0x0001), /* Metricom GS Modem */ .driver_info = NO_UNION_NORMAL, /* has no union descriptor */ }, + { USB_DEVICE(0x045b, 0x023c), /* Renesas USB Download mode */ + .driver_info = DISABLE_ECHO, /* Don't echo banner */ + }, + { USB_DEVICE(0x045b, 0x0248), /* Renesas USB Download mode */ + .driver_info = DISABLE_ECHO, /* Don't echo banner */ + }, + { USB_DEVICE(0x045b, 0x024D), /* Renesas USB Download mode */ + .driver_info = DISABLE_ECHO, /* Don't echo banner */ + }, { USB_DEVICE(0x0e8d, 0x0003), /* FIREFLY, MediaTek Inc; andrey.arapov@gmail.com */ .driver_info = NO_UNION_NORMAL, /* has no union descriptor */ }, diff --git a/drivers/usb/host/xhci-histb.c b/drivers/usb/host/xhci-histb.c index 5546e7e013a8..08369857686e 100644 --- a/drivers/usb/host/xhci-histb.c +++ b/drivers/usb/host/xhci-histb.c @@ -240,7 +240,7 @@ static int xhci_histb_probe(struct platform_device *pdev) /* Initialize dma_mask and coherent_dma_mask to 32-bits */ ret = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(32)); if (ret) - return ret; + goto disable_pm; hcd = usb_create_hcd(driver, dev, dev_name(dev)); if (!hcd) { diff --git a/drivers/usb/musb/musb_dsps.c b/drivers/usb/musb/musb_dsps.c index 30085b2be7b9..5892f3ce0cdc 100644 --- a/drivers/usb/musb/musb_dsps.c +++ b/drivers/usb/musb/musb_dsps.c @@ -429,10 +429,12 @@ static int dsps_musb_init(struct musb *musb) struct platform_device *parent = to_platform_device(dev->parent); const struct dsps_musb_wrapper *wrp = glue->wrp; void __iomem *reg_base; + struct resource *r; u32 rev, val; int ret; - reg_base = devm_platform_ioremap_resource_byname(parent, "control"); + r = platform_get_resource_byname(parent, IORESOURCE_MEM, "control"); + reg_base = devm_ioremap_resource(dev, r); if (IS_ERR(reg_base)) return PTR_ERR(reg_base); musb->ctrl_base = reg_base; diff --git a/drivers/usb/typec/ucsi/psy.c b/drivers/usb/typec/ucsi/psy.c index 26ed0b520749..571a51e16234 100644 --- a/drivers/usb/typec/ucsi/psy.c +++ b/drivers/usb/typec/ucsi/psy.c @@ -238,4 +238,13 @@ void ucsi_unregister_port_psy(struct ucsi_connector *con) return; power_supply_unregister(con->psy); + con->psy = NULL; +} + +void ucsi_port_psy_changed(struct ucsi_connector *con) +{ + if (IS_ERR_OR_NULL(con->psy)) + return; + + power_supply_changed(con->psy); } diff --git a/drivers/usb/typec/ucsi/ucsi.c b/drivers/usb/typec/ucsi/ucsi.c index 758b988ac518..51a570d40a42 100644 --- a/drivers/usb/typec/ucsi/ucsi.c +++ b/drivers/usb/typec/ucsi/ucsi.c @@ -643,8 +643,10 @@ static void ucsi_handle_connector_change(struct work_struct *work) role = !!(con->status.flags & UCSI_CONSTAT_PWR_DIR); if (con->status.change & UCSI_CONSTAT_POWER_OPMODE_CHANGE || - con->status.change & UCSI_CONSTAT_POWER_LEVEL_CHANGE) + con->status.change & UCSI_CONSTAT_POWER_LEVEL_CHANGE) { ucsi_pwr_opmode_change(con); + ucsi_port_psy_changed(con); + } if (con->status.change & UCSI_CONSTAT_POWER_DIR_CHANGE) { typec_set_pwr_role(con->port, role); @@ -674,6 +676,8 @@ static void ucsi_handle_connector_change(struct work_struct *work) ucsi_register_partner(con); else ucsi_unregister_partner(con); + + ucsi_port_psy_changed(con); } if (con->status.change & UCSI_CONSTAT_CAM_CHANGE) { @@ -994,6 +998,7 @@ static int ucsi_register_port(struct ucsi *ucsi, int index) !!(con->status.flags & UCSI_CONSTAT_PWR_DIR)); ucsi_pwr_opmode_change(con); ucsi_register_partner(con); + ucsi_port_psy_changed(con); } if (con->partner) { diff --git a/drivers/usb/typec/ucsi/ucsi.h b/drivers/usb/typec/ucsi/ucsi.h index cba6f77bea61..b7a92f246050 100644 --- a/drivers/usb/typec/ucsi/ucsi.h +++ b/drivers/usb/typec/ucsi/ucsi.h @@ -340,9 +340,11 @@ int ucsi_resume(struct ucsi *ucsi); #if IS_ENABLED(CONFIG_POWER_SUPPLY) int ucsi_register_port_psy(struct ucsi_connector *con); void ucsi_unregister_port_psy(struct ucsi_connector *con); +void ucsi_port_psy_changed(struct ucsi_connector *con); #else static inline int ucsi_register_port_psy(struct ucsi_connector *con) { return 0; } static inline void ucsi_unregister_port_psy(struct ucsi_connector *con) { } +static inline void ucsi_port_psy_changed(struct ucsi_connector *con) { } #endif /* CONFIG_POWER_SUPPLY */ #if IS_ENABLED(CONFIG_TYPEC_DP_ALTMODE) diff --git a/drivers/video/fbdev/hyperv_fb.c b/drivers/video/fbdev/hyperv_fb.c index e36fb1a0ecdb..5bc86f481a78 100644 --- a/drivers/video/fbdev/hyperv_fb.c +++ b/drivers/video/fbdev/hyperv_fb.c @@ -47,6 +47,7 @@ #include <linux/module.h> #include <linux/kernel.h> +#include <linux/vmalloc.h> #include <linux/init.h> #include <linux/completion.h> #include <linux/fb.h> diff --git a/drivers/virt/nitro_enclaves/ne_misc_dev.c b/drivers/virt/nitro_enclaves/ne_misc_dev.c index f06622b48d69..f1964ea4b826 100644 --- a/drivers/virt/nitro_enclaves/ne_misc_dev.c +++ b/drivers/virt/nitro_enclaves/ne_misc_dev.c @@ -1505,10 +1505,8 @@ static __poll_t ne_enclave_poll(struct file *file, poll_table *wait) poll_wait(file, &ne_enclave->eventq, wait); - if (!ne_enclave->has_event) - return mask; - - mask = POLLHUP; + if (ne_enclave->has_event) + mask |= EPOLLHUP; return mask; } diff --git a/fs/afs/write.c b/fs/afs/write.c index 50371207f327..c9195fc67fd8 100644 --- a/fs/afs/write.c +++ b/fs/afs/write.c @@ -169,11 +169,14 @@ int afs_write_end(struct file *file, struct address_space *mapping, unsigned int f, from = pos & (PAGE_SIZE - 1); unsigned int t, to = from + copied; loff_t i_size, maybe_i_size; - int ret; + int ret = 0; _enter("{%llx:%llu},{%lx}", vnode->fid.vid, vnode->fid.vnode, page->index); + if (copied == 0) + goto out; + maybe_i_size = pos + copied; i_size = i_size_read(&vnode->vfs_inode); @@ -1572,7 +1572,7 @@ static int aio_write(struct kiocb *req, const struct iocb *iocb, * we return to userspace. */ if (S_ISREG(file_inode(file)->i_mode)) { - __sb_start_write(file_inode(file)->i_sb, SB_FREEZE_WRITE, true); + sb_start_write(file_inode(file)->i_sb); __sb_writers_release(file_inode(file)->i_sb, SB_FREEZE_WRITE); } req->ki_flags |= IOCB_WRITE; diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 1b399cafb15a..bf9429484462 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -1231,13 +1231,13 @@ struct ext4_inode_info { blocks */ #define EXT4_MOUNT2_HURD_COMPAT 0x00000004 /* Support HURD-castrated file systems */ -#define EXT4_MOUNT2_DAX_NEVER 0x00000008 /* Do not allow Direct Access */ -#define EXT4_MOUNT2_DAX_INODE 0x00000010 /* For printing options only */ - #define EXT4_MOUNT2_EXPLICIT_JOURNAL_CHECKSUM 0x00000008 /* User explicitly specified journal checksum */ #define EXT4_MOUNT2_JOURNAL_FAST_COMMIT 0x00000010 /* Journal fast commit */ +#define EXT4_MOUNT2_DAX_NEVER 0x00000020 /* Do not allow Direct Access */ +#define EXT4_MOUNT2_DAX_INODE 0x00000040 /* For printing options only */ + #define clear_opt(sb, opt) EXT4_SB(sb)->s_mount_opt &= \ ~EXT4_MOUNT_##opt diff --git a/fs/ext4/super.c b/fs/ext4/super.c index c3b864588a0b..6633b20224d5 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -289,18 +289,7 @@ void ext4_superblock_csum_set(struct super_block *sb) if (!ext4_has_metadata_csum(sb)) return; - /* - * Locking the superblock prevents the scenario - * where: - * 1) a first thread pauses during checksum calculation. - * 2) a second thread updates the superblock, recalculates - * the checksum, and updates s_checksum - * 3) the first thread resumes and finishes its checksum calculation - * and updates s_checksum with a potentially stale or torn value. - */ - lock_buffer(EXT4_SB(sb)->s_sbh); es->s_checksum = ext4_superblock_csum(sb, es); - unlock_buffer(EXT4_SB(sb)->s_sbh); } ext4_fsblk_t ext4_block_bitmap(struct super_block *sb, diff --git a/fs/io_uring.c b/fs/io_uring.c index 8018c7076b25..4ead291b2976 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -3547,8 +3547,7 @@ static int io_write(struct io_kiocb *req, bool force_nonblock, * we return to userspace. */ if (req->flags & REQ_F_ISREG) { - __sb_start_write(file_inode(req->file)->i_sb, - SB_FREEZE_WRITE, true); + sb_start_write(file_inode(req->file)->i_sb); __sb_writers_release(file_inode(req->file)->i_sb, SB_FREEZE_WRITE); } @@ -9226,6 +9225,7 @@ static int io_uring_create(unsigned entries, struct io_uring_params *p, * to a power-of-two, if it isn't already. We do NOT impose * any cq vs sq ring sizing. */ + p->cq_entries = roundup_pow_of_two(p->cq_entries); if (p->cq_entries < p->sq_entries) return -EINVAL; if (p->cq_entries > IORING_MAX_CQ_ENTRIES) { @@ -9233,7 +9233,6 @@ static int io_uring_create(unsigned entries, struct io_uring_params *p, return -EINVAL; p->cq_entries = IORING_MAX_CQ_ENTRIES; } - p->cq_entries = roundup_pow_of_two(p->cq_entries); } else { p->cq_entries = 2 * p->sq_entries; } diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c index 1d91dd1e8711..2febc76e9de7 100644 --- a/fs/ocfs2/super.c +++ b/fs/ocfs2/super.c @@ -1713,6 +1713,7 @@ static void ocfs2_inode_init_once(void *data) oi->ip_blkno = 0ULL; oi->ip_clusters = 0; + oi->ip_next_orphan = NULL; ocfs2_resv_init_once(&oi->ip_la_data_resv); diff --git a/fs/super.c b/fs/super.c index a51c2083cd6b..98bb0629ee10 100644 --- a/fs/super.c +++ b/fs/super.c @@ -1631,55 +1631,6 @@ int super_setup_bdi(struct super_block *sb) } EXPORT_SYMBOL(super_setup_bdi); -/* - * This is an internal function, please use sb_end_{write,pagefault,intwrite} - * instead. - */ -void __sb_end_write(struct super_block *sb, int level) -{ - percpu_up_read(sb->s_writers.rw_sem + level-1); -} -EXPORT_SYMBOL(__sb_end_write); - -/* - * This is an internal function, please use sb_start_{write,pagefault,intwrite} - * instead. - */ -int __sb_start_write(struct super_block *sb, int level, bool wait) -{ - bool force_trylock = false; - int ret = 1; - -#ifdef CONFIG_LOCKDEP - /* - * We want lockdep to tell us about possible deadlocks with freezing - * but it's it bit tricky to properly instrument it. Getting a freeze - * protection works as getting a read lock but there are subtle - * problems. XFS for example gets freeze protection on internal level - * twice in some cases, which is OK only because we already hold a - * freeze protection also on higher level. Due to these cases we have - * to use wait == F (trylock mode) which must not fail. - */ - if (wait) { - int i; - - for (i = 0; i < level - 1; i++) - if (percpu_rwsem_is_held(sb->s_writers.rw_sem + i)) { - force_trylock = true; - break; - } - } -#endif - if (wait && !force_trylock) - percpu_down_read(sb->s_writers.rw_sem + level-1); - else - ret = percpu_down_read_trylock(sb->s_writers.rw_sem + level-1); - - WARN_ON(force_trylock && !ret); - return ret; -} -EXPORT_SYMBOL(__sb_start_write); - /** * sb_wait_write - wait until all writers to given file system finish * @sb: the super for which we wait diff --git a/fs/xfs/libxfs/xfs_rmap.c b/fs/xfs/libxfs/xfs_rmap.c index 340c83f76c80..2668ebe02865 100644 --- a/fs/xfs/libxfs/xfs_rmap.c +++ b/fs/xfs/libxfs/xfs_rmap.c @@ -1514,7 +1514,7 @@ xfs_rmap_convert_shared( * record for our insertion point. This will also give us the record for * start block contiguity tests. */ - error = xfs_rmap_lookup_le_range(cur, bno, owner, offset, flags, + error = xfs_rmap_lookup_le_range(cur, bno, owner, offset, oldext, &PREV, &i); if (error) goto done; diff --git a/fs/xfs/libxfs/xfs_rmap_btree.c b/fs/xfs/libxfs/xfs_rmap_btree.c index beb81c84a937..577a66381327 100644 --- a/fs/xfs/libxfs/xfs_rmap_btree.c +++ b/fs/xfs/libxfs/xfs_rmap_btree.c @@ -243,8 +243,8 @@ xfs_rmapbt_key_diff( else if (y > x) return -1; - x = XFS_RMAP_OFF(be64_to_cpu(kp->rm_offset)); - y = rec->rm_offset; + x = be64_to_cpu(kp->rm_offset); + y = xfs_rmap_irec_offset_pack(rec); if (x > y) return 1; else if (y > x) @@ -275,8 +275,8 @@ xfs_rmapbt_diff_two_keys( else if (y > x) return -1; - x = XFS_RMAP_OFF(be64_to_cpu(kp1->rm_offset)); - y = XFS_RMAP_OFF(be64_to_cpu(kp2->rm_offset)); + x = be64_to_cpu(kp1->rm_offset); + y = be64_to_cpu(kp2->rm_offset); if (x > y) return 1; else if (y > x) @@ -390,8 +390,8 @@ xfs_rmapbt_keys_inorder( return 1; else if (a > b) return 0; - a = XFS_RMAP_OFF(be64_to_cpu(k1->rmap.rm_offset)); - b = XFS_RMAP_OFF(be64_to_cpu(k2->rmap.rm_offset)); + a = be64_to_cpu(k1->rmap.rm_offset); + b = be64_to_cpu(k2->rmap.rm_offset); if (a <= b) return 1; return 0; @@ -420,8 +420,8 @@ xfs_rmapbt_recs_inorder( return 1; else if (a > b) return 0; - a = XFS_RMAP_OFF(be64_to_cpu(r1->rmap.rm_offset)); - b = XFS_RMAP_OFF(be64_to_cpu(r2->rmap.rm_offset)); + a = be64_to_cpu(r1->rmap.rm_offset); + b = be64_to_cpu(r2->rmap.rm_offset); if (a <= b) return 1; return 0; diff --git a/fs/xfs/scrub/bmap.c b/fs/xfs/scrub/bmap.c index 955302e7cdde..412e2ec55e38 100644 --- a/fs/xfs/scrub/bmap.c +++ b/fs/xfs/scrub/bmap.c @@ -113,6 +113,8 @@ xchk_bmap_get_rmap( if (info->whichfork == XFS_ATTR_FORK) rflags |= XFS_RMAP_ATTR_FORK; + if (irec->br_state == XFS_EXT_UNWRITTEN) + rflags |= XFS_RMAP_UNWRITTEN; /* * CoW staging extents are owned (on disk) by the refcountbt, so diff --git a/fs/xfs/scrub/refcount.c b/fs/xfs/scrub/refcount.c index beaeb6fa3119..dd672e6bbc75 100644 --- a/fs/xfs/scrub/refcount.c +++ b/fs/xfs/scrub/refcount.c @@ -170,7 +170,6 @@ xchk_refcountbt_process_rmap_fragments( */ INIT_LIST_HEAD(&worklist); rbno = NULLAGBLOCK; - nr = 1; /* Make sure the fragments actually /are/ in agbno order. */ bno = 0; @@ -184,15 +183,14 @@ xchk_refcountbt_process_rmap_fragments( * Find all the rmaps that start at or before the refc extent, * and put them on the worklist. */ + nr = 0; list_for_each_entry_safe(frag, n, &refchk->fragments, list) { - if (frag->rm.rm_startblock > refchk->bno) - goto done; + if (frag->rm.rm_startblock > refchk->bno || nr > target_nr) + break; bno = frag->rm.rm_startblock + frag->rm.rm_blockcount; if (bno < rbno) rbno = bno; list_move_tail(&frag->list, &worklist); - if (nr == target_nr) - break; nr++; } diff --git a/fs/xfs/xfs_pnfs.c b/fs/xfs/xfs_pnfs.c index b101feb2aab4..f3082a957d5e 100644 --- a/fs/xfs/xfs_pnfs.c +++ b/fs/xfs/xfs_pnfs.c @@ -134,7 +134,7 @@ xfs_fs_map_blocks( goto out_unlock; error = invalidate_inode_pages2(inode->i_mapping); if (WARN_ON_ONCE(error)) - return error; + goto out_unlock; end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + length); offset_fsb = XFS_B_TO_FSBT(mp, offset); diff --git a/include/asm-generic/barrier.h b/include/asm-generic/barrier.h index 798027bb89be..640f09479bdf 100644 --- a/include/asm-generic/barrier.h +++ b/include/asm-generic/barrier.h @@ -13,6 +13,7 @@ #ifndef __ASSEMBLY__ +#include <linux/compiler.h> #include <asm/rwonce.h> #ifndef nop diff --git a/include/asm-generic/percpu.h b/include/asm-generic/percpu.h index 35e4a53b83e6..6432a7fade91 100644 --- a/include/asm-generic/percpu.h +++ b/include/asm-generic/percpu.h @@ -114,21 +114,21 @@ do { \ #define __this_cpu_generic_read_nopreempt(pcp) \ ({ \ - typeof(pcp) __ret; \ + typeof(pcp) ___ret; \ preempt_disable_notrace(); \ - __ret = READ_ONCE(*raw_cpu_ptr(&(pcp))); \ + ___ret = READ_ONCE(*raw_cpu_ptr(&(pcp))); \ preempt_enable_notrace(); \ - __ret; \ + ___ret; \ }) #define __this_cpu_generic_read_noirq(pcp) \ ({ \ - typeof(pcp) __ret; \ - unsigned long __flags; \ - raw_local_irq_save(__flags); \ - __ret = raw_cpu_generic_read(pcp); \ - raw_local_irq_restore(__flags); \ - __ret; \ + typeof(pcp) ___ret; \ + unsigned long ___flags; \ + raw_local_irq_save(___flags); \ + ___ret = raw_cpu_generic_read(pcp); \ + raw_local_irq_restore(___flags); \ + ___ret; \ }) #define this_cpu_generic_read(pcp) \ diff --git a/include/kvm/arm_pmu.h b/include/kvm/arm_pmu.h index 1d94acd0bc85..fc85f50fa0e9 100644 --- a/include/kvm/arm_pmu.h +++ b/include/kvm/arm_pmu.h @@ -24,13 +24,11 @@ struct kvm_pmu { int irq_num; struct kvm_pmc pmc[ARMV8_PMU_MAX_COUNTERS]; DECLARE_BITMAP(chained, ARMV8_PMU_MAX_COUNTER_PAIRS); - bool ready; bool created; bool irq_level; struct irq_work overflow_work; }; -#define kvm_arm_pmu_v3_ready(v) ((v)->arch.pmu.ready) #define kvm_arm_pmu_irq_initialized(v) ((v)->arch.pmu.irq_num >= VGIC_NR_SGIS) u64 kvm_pmu_get_counter_value(struct kvm_vcpu *vcpu, u64 select_idx); void kvm_pmu_set_counter_value(struct kvm_vcpu *vcpu, u64 select_idx, u64 val); @@ -61,7 +59,6 @@ int kvm_arm_pmu_v3_enable(struct kvm_vcpu *vcpu); struct kvm_pmu { }; -#define kvm_arm_pmu_v3_ready(v) (false) #define kvm_arm_pmu_irq_initialized(v) (false) static inline u64 kvm_pmu_get_counter_value(struct kvm_vcpu *vcpu, u64 select_idx) diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h index a8d8fdcd3723..3d74f1060bd1 100644 --- a/include/kvm/arm_vgic.h +++ b/include/kvm/arm_vgic.h @@ -402,6 +402,7 @@ int kvm_vgic_v4_unset_forwarding(struct kvm *kvm, int irq, struct kvm_kernel_irq_routing_entry *irq_entry); int vgic_v4_load(struct kvm_vcpu *vcpu); +void vgic_v4_commit(struct kvm_vcpu *vcpu); int vgic_v4_put(struct kvm_vcpu *vcpu, bool need_db); #endif /* __KVM_ARM_VGIC_H */ diff --git a/include/linux/compiler-clang.h b/include/linux/compiler-clang.h index 230604e7f057..dd7233c48bf3 100644 --- a/include/linux/compiler-clang.h +++ b/include/linux/compiler-clang.h @@ -60,12 +60,6 @@ #define COMPILER_HAS_GENERIC_BUILTIN_OVERFLOW 1 #endif -/* The following are for compatibility with GCC, from compiler-gcc.h, - * and may be redefined here because they should not be shared with other - * compilers, like ICC. - */ -#define barrier() __asm__ __volatile__("" : : : "memory") - #if __has_feature(shadow_call_stack) # define __noscs __attribute__((__no_sanitize__("shadow-call-stack"))) #endif diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h index 5deb37024574..74c6c0486eed 100644 --- a/include/linux/compiler-gcc.h +++ b/include/linux/compiler-gcc.h @@ -15,25 +15,6 @@ # error Sorry, your version of GCC is too old - please use 4.9 or newer. #endif -/* Optimization barrier */ - -/* The "volatile" is due to gcc bugs */ -#define barrier() __asm__ __volatile__("": : :"memory") -/* - * This version is i.e. to prevent dead stores elimination on @ptr - * where gcc and llvm may behave differently when otherwise using - * normal barrier(): while gcc behavior gets along with a normal - * barrier(), llvm needs an explicit input variable to be assumed - * clobbered. The issue is as follows: while the inline asm might - * access any memory it wants, the compiler could have fit all of - * @ptr into memory registers instead, and since @ptr never escaped - * from that, it proved that the inline asm wasn't touching any of - * it. This version works well with both compilers, i.e. we're telling - * the compiler that the inline asm absolutely may see the contents - * of @ptr. See also: https://llvm.org/bugs/show_bug.cgi?id=15495 - */ -#define barrier_data(ptr) __asm__ __volatile__("": :"r"(ptr) :"memory") - /* * This macro obfuscates arithmetic on a variable address so that gcc * shouldn't recognize the original var, and make assumptions about it. diff --git a/include/linux/compiler.h b/include/linux/compiler.h index e512f5505dad..b8fe0c23cfff 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -80,11 +80,25 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val, /* Optimization barrier */ #ifndef barrier -# define barrier() __memory_barrier() +/* The "volatile" is due to gcc bugs */ +# define barrier() __asm__ __volatile__("": : :"memory") #endif #ifndef barrier_data -# define barrier_data(ptr) barrier() +/* + * This version is i.e. to prevent dead stores elimination on @ptr + * where gcc and llvm may behave differently when otherwise using + * normal barrier(): while gcc behavior gets along with a normal + * barrier(), llvm needs an explicit input variable to be assumed + * clobbered. The issue is as follows: while the inline asm might + * access any memory it wants, the compiler could have fit all of + * @ptr into memory registers instead, and since @ptr never escaped + * from that, it proved that the inline asm wasn't touching any of + * it. This version works well with both compilers, i.e. we're telling + * the compiler that the inline asm absolutely may see the contents + * of @ptr. See also: https://llvm.org/bugs/show_bug.cgi?id=15495 + */ +# define barrier_data(ptr) __asm__ __volatile__("": :"r"(ptr) :"memory") #endif /* workaround for GCC PR82365 if needed */ diff --git a/include/linux/fs.h b/include/linux/fs.h index 21cc971fd960..8667d0cdc71e 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1580,8 +1580,24 @@ extern struct timespec64 current_time(struct inode *inode); * Snapshotting support. */ -void __sb_end_write(struct super_block *sb, int level); -int __sb_start_write(struct super_block *sb, int level, bool wait); +/* + * These are internal functions, please use sb_start_{write,pagefault,intwrite} + * instead. + */ +static inline void __sb_end_write(struct super_block *sb, int level) +{ + percpu_up_read(sb->s_writers.rw_sem + level-1); +} + +static inline void __sb_start_write(struct super_block *sb, int level) +{ + percpu_down_read(sb->s_writers.rw_sem + level - 1); +} + +static inline bool __sb_start_write_trylock(struct super_block *sb, int level) +{ + return percpu_down_read_trylock(sb->s_writers.rw_sem + level - 1); +} #define __sb_writers_acquired(sb, lev) \ percpu_rwsem_acquire(&(sb)->s_writers.rw_sem[(lev)-1], 1, _THIS_IP_) @@ -1645,12 +1661,12 @@ static inline void sb_end_intwrite(struct super_block *sb) */ static inline void sb_start_write(struct super_block *sb) { - __sb_start_write(sb, SB_FREEZE_WRITE, true); + __sb_start_write(sb, SB_FREEZE_WRITE); } -static inline int sb_start_write_trylock(struct super_block *sb) +static inline bool sb_start_write_trylock(struct super_block *sb) { - return __sb_start_write(sb, SB_FREEZE_WRITE, false); + return __sb_start_write_trylock(sb, SB_FREEZE_WRITE); } /** @@ -1674,7 +1690,7 @@ static inline int sb_start_write_trylock(struct super_block *sb) */ static inline void sb_start_pagefault(struct super_block *sb) { - __sb_start_write(sb, SB_FREEZE_PAGEFAULT, true); + __sb_start_write(sb, SB_FREEZE_PAGEFAULT); } /* @@ -1692,12 +1708,12 @@ static inline void sb_start_pagefault(struct super_block *sb) */ static inline void sb_start_intwrite(struct super_block *sb) { - __sb_start_write(sb, SB_FREEZE_FS, true); + __sb_start_write(sb, SB_FREEZE_FS); } -static inline int sb_start_intwrite_trylock(struct super_block *sb) +static inline bool sb_start_intwrite_trylock(struct super_block *sb) { - return __sb_start_write(sb, SB_FREEZE_FS, false); + return __sb_start_write_trylock(sb, SB_FREEZE_FS); } @@ -2756,14 +2772,14 @@ static inline void file_start_write(struct file *file) { if (!S_ISREG(file_inode(file)->i_mode)) return; - __sb_start_write(file_inode(file)->i_sb, SB_FREEZE_WRITE, true); + sb_start_write(file_inode(file)->i_sb); } static inline bool file_start_write_trylock(struct file *file) { if (!S_ISREG(file_inode(file)->i_mode)) return true; - return __sb_start_write(file_inode(file)->i_sb, SB_FREEZE_WRITE, false); + return sb_start_write_trylock(file_inode(file)->i_sb); } static inline void file_end_write(struct file *file) diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 38f23d757013..03da3f603d30 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -315,7 +315,7 @@ static inline int get_disk_ro(struct gendisk *disk) extern void disk_block_events(struct gendisk *disk); extern void disk_unblock_events(struct gendisk *disk); extern void disk_flush_events(struct gendisk *disk, unsigned int mask); -void set_capacity_revalidate_and_notify(struct gendisk *disk, sector_t size, +bool set_capacity_revalidate_and_notify(struct gendisk *disk, sector_t size, bool update_bdev); /* drivers/char/random.c */ diff --git a/include/linux/irqchip/arm-gic-v4.h b/include/linux/irqchip/arm-gic-v4.h index 6976b8331b60..943c3411ca10 100644 --- a/include/linux/irqchip/arm-gic-v4.h +++ b/include/linux/irqchip/arm-gic-v4.h @@ -39,6 +39,8 @@ struct its_vpe { irq_hw_number_t vpe_db_lpi; /* VPE resident */ bool resident; + /* VPT parse complete */ + bool ready; union { /* GICv4.0 implementations */ struct { @@ -104,6 +106,7 @@ enum its_vcpu_info_cmd_type { PROP_UPDATE_AND_INV_VLPI, SCHEDULE_VPE, DESCHEDULE_VPE, + COMMIT_VPE, INVALL_VPE, PROP_UPDATE_VSGI, }; @@ -129,6 +132,7 @@ int its_alloc_vcpu_irqs(struct its_vm *vm); void its_free_vcpu_irqs(struct its_vm *vm); int its_make_vpe_resident(struct its_vpe *vpe, bool g0en, bool g1en); int its_make_vpe_non_resident(struct its_vpe *vpe, bool db); +int its_commit_vpe(struct its_vpe *vpe); int its_invall_vpe(struct its_vpe *vpe); int its_map_vlpi(int irq, struct its_vlpi_map *map); int its_get_vlpi(int irq, struct its_vlpi_map *map); diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index e391e3c56de5..a80c59af2c60 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -900,12 +900,19 @@ static inline void count_memcg_event_mm(struct mm_struct *mm, static inline void memcg_memory_event(struct mem_cgroup *memcg, enum memcg_memory_event event) { + bool swap_event = event == MEMCG_SWAP_HIGH || event == MEMCG_SWAP_MAX || + event == MEMCG_SWAP_FAIL; + atomic_long_inc(&memcg->memory_events_local[event]); - cgroup_file_notify(&memcg->events_local_file); + if (!swap_event) + cgroup_file_notify(&memcg->events_local_file); do { atomic_long_inc(&memcg->memory_events[event]); - cgroup_file_notify(&memcg->events_file); + if (swap_event) + cgroup_file_notify(&memcg->swap_events_file); + else + cgroup_file_notify(&memcg->events_file); if (!cgroup_subsys_on_dfl(memory_cgrp_subsys)) break; diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 0c19d279b97f..96450f6fb1de 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -1022,13 +1022,7 @@ struct perf_sample_data { struct perf_callchain_entry *callchain; u64 aux_size; - /* - * regs_user may point to task_pt_regs or to regs_user_copy, depending - * on arch details. - */ struct perf_regs regs_user; - struct pt_regs regs_user_copy; - struct perf_regs regs_intr; u64 stack_user_size; @@ -1400,11 +1394,14 @@ perf_event_addr_filters(struct perf_event *event) extern void perf_event_addr_filters_sync(struct perf_event *event); extern int perf_output_begin(struct perf_output_handle *handle, + struct perf_sample_data *data, struct perf_event *event, unsigned int size); extern int perf_output_begin_forward(struct perf_output_handle *handle, - struct perf_event *event, - unsigned int size); + struct perf_sample_data *data, + struct perf_event *event, + unsigned int size); extern int perf_output_begin_backward(struct perf_output_handle *handle, + struct perf_sample_data *data, struct perf_event *event, unsigned int size); diff --git a/include/linux/perf_regs.h b/include/linux/perf_regs.h index 2d12e97d5e7b..f632c5725f16 100644 --- a/include/linux/perf_regs.h +++ b/include/linux/perf_regs.h @@ -20,8 +20,7 @@ u64 perf_reg_value(struct pt_regs *regs, int idx); int perf_reg_validate(u64 mask); u64 perf_reg_abi(struct task_struct *task); void perf_get_regs_user(struct perf_regs *regs_user, - struct pt_regs *regs, - struct pt_regs *regs_user_copy); + struct pt_regs *regs); #else #define PERF_REG_EXTENDED_MASK 0 @@ -42,8 +41,7 @@ static inline u64 perf_reg_abi(struct task_struct *task) } static inline void perf_get_regs_user(struct perf_regs *regs_user, - struct pt_regs *regs, - struct pt_regs *regs_user_copy) + struct pt_regs *regs) { regs_user->regs = task_pt_regs(current); regs_user->abi = perf_reg_abi(current); diff --git a/include/linux/psci.h b/include/linux/psci.h index 2a1bfb890e58..4ca0060a3fc4 100644 --- a/include/linux/psci.h +++ b/include/linux/psci.h @@ -34,6 +34,15 @@ struct psci_operations { extern struct psci_operations psci_ops; +struct psci_0_1_function_ids { + u32 cpu_suspend; + u32 cpu_on; + u32 cpu_off; + u32 migrate; +}; + +struct psci_0_1_function_ids get_psci_0_1_function_ids(void); + #if defined(CONFIG_ARM_PSCI_FW) int __init psci_dt_init(void); #else diff --git a/include/uapi/linux/gpio.h b/include/uapi/linux/gpio.h index 07865c601099..2072c260f5d0 100644 --- a/include/uapi/linux/gpio.h +++ b/include/uapi/linux/gpio.h @@ -26,7 +26,7 @@ * struct gpiochip_info - Information about a certain GPIO chip * @name: the Linux kernel name of this GPIO chip * @label: a functional name for this GPIO chip, such as a product - * number, may be empty + * number, may be empty (i.e. label[0] == '\0') * @lines: number of GPIO lines on this chip */ struct gpiochip_info { @@ -98,7 +98,7 @@ struct gpio_v2_line_values { * identifying which field of the attribute union is in use. * @GPIO_V2_LINE_ATTR_ID_FLAGS: flags field is in use * @GPIO_V2_LINE_ATTR_ID_OUTPUT_VALUES: values field is in use - * @GPIO_V2_LINE_ATTR_ID_DEBOUNCE: debounce_period_us is in use + * @GPIO_V2_LINE_ATTR_ID_DEBOUNCE: debounce_period_us field is in use */ enum gpio_v2_line_attr_id { GPIO_V2_LINE_ATTR_ID_FLAGS = 1, @@ -110,17 +110,17 @@ enum gpio_v2_line_attr_id { * struct gpio_v2_line_attribute - a configurable attribute of a line * @id: attribute identifier with value from &enum gpio_v2_line_attr_id * @padding: reserved for future use and must be zero filled - * @flags: if id is GPIO_V2_LINE_ATTR_ID_FLAGS, the flags for the GPIO - * line, with values from enum gpio_v2_line_flag, such as - * GPIO_V2_LINE_FLAG_ACTIVE_LOW, GPIO_V2_LINE_FLAG_OUTPUT etc, OR:ed + * @flags: if id is %GPIO_V2_LINE_ATTR_ID_FLAGS, the flags for the GPIO + * line, with values from &enum gpio_v2_line_flag, such as + * %GPIO_V2_LINE_FLAG_ACTIVE_LOW, %GPIO_V2_LINE_FLAG_OUTPUT etc, added * together. This overrides the default flags contained in the &struct * gpio_v2_line_config for the associated line. - * @values: if id is GPIO_V2_LINE_ATTR_ID_OUTPUT_VALUES, a bitmap + * @values: if id is %GPIO_V2_LINE_ATTR_ID_OUTPUT_VALUES, a bitmap * containing the values to which the lines will be set, with each bit * number corresponding to the index into &struct * gpio_v2_line_request.offsets. - * @debounce_period_us: if id is GPIO_V2_LINE_ATTR_ID_DEBOUNCE, the desired - * debounce period, in microseconds + * @debounce_period_us: if id is %GPIO_V2_LINE_ATTR_ID_DEBOUNCE, the + * desired debounce period, in microseconds */ struct gpio_v2_line_attribute { __u32 id; @@ -147,12 +147,12 @@ struct gpio_v2_line_config_attribute { /** * struct gpio_v2_line_config - Configuration for GPIO lines - * @flags: flags for the GPIO lines, with values from enum - * gpio_v2_line_flag, such as GPIO_V2_LINE_FLAG_ACTIVE_LOW, - * GPIO_V2_LINE_FLAG_OUTPUT etc, OR:ed together. This is the default for + * @flags: flags for the GPIO lines, with values from &enum + * gpio_v2_line_flag, such as %GPIO_V2_LINE_FLAG_ACTIVE_LOW, + * %GPIO_V2_LINE_FLAG_OUTPUT etc, added together. This is the default for * all requested lines but may be overridden for particular lines using - * attrs. - * @num_attrs: the number of attributes in attrs + * @attrs. + * @num_attrs: the number of attributes in @attrs * @padding: reserved for future use and must be zero filled * @attrs: the configuration attributes associated with the requested * lines. Any attribute should only be associated with a particular line @@ -175,17 +175,17 @@ struct gpio_v2_line_config { * "my-bitbanged-relay" * @config: requested configuration for the lines. * @num_lines: number of lines requested in this request, i.e. the number - * of valid fields in the GPIO_V2_LINES_MAX sized arrays, set to 1 to + * of valid fields in the %GPIO_V2_LINES_MAX sized arrays, set to 1 to * request a single line * @event_buffer_size: a suggested minimum number of line events that the * kernel should buffer. This is only relevant if edge detection is * enabled in the configuration. Note that this is only a suggested value * and the kernel may allocate a larger buffer or cap the size of the * buffer. If this field is zero then the buffer size defaults to a minimum - * of num_lines*16. + * of @num_lines * 16. * @padding: reserved for future use and must be zero filled * @fd: if successful this field will contain a valid anonymous file handle - * after a GPIO_GET_LINE_IOCTL operation, zero or negative value means + * after a %GPIO_GET_LINE_IOCTL operation, zero or negative value means * error */ struct gpio_v2_line_request { @@ -203,15 +203,16 @@ struct gpio_v2_line_request { * struct gpio_v2_line_info - Information about a certain GPIO line * @name: the name of this GPIO line, such as the output pin of the line on * the chip, a rail or a pin header name on a board, as specified by the - * GPIO chip, may be empty + * GPIO chip, may be empty (i.e. name[0] == '\0') * @consumer: a functional name for the consumer of this GPIO line as set * by whatever is using it, will be empty if there is no current user but * may also be empty if the consumer doesn't set this up - * @flags: flags for the GPIO line, such as GPIO_V2_LINE_FLAG_ACTIVE_LOW, - * GPIO_V2_LINE_FLAG_OUTPUT etc, OR:ed together * @offset: the local offset on this GPIO chip, fill this in when * requesting the line information from the kernel - * @num_attrs: the number of attributes in attrs + * @num_attrs: the number of attributes in @attrs + * @flags: flags for the GPIO lines, with values from &enum + * gpio_v2_line_flag, such as %GPIO_V2_LINE_FLAG_ACTIVE_LOW, + * %GPIO_V2_LINE_FLAG_OUTPUT etc, added together. * @attrs: the configuration attributes associated with the line * @padding: reserved for future use */ @@ -244,7 +245,7 @@ enum gpio_v2_line_changed_type { * of a GPIO line * @info: updated line information * @timestamp_ns: estimate of time of status change occurrence, in nanoseconds - * @event_type: the type of change with a value from enum + * @event_type: the type of change with a value from &enum * gpio_v2_line_changed_type * @padding: reserved for future use */ @@ -269,10 +270,10 @@ enum gpio_v2_line_event_id { /** * struct gpio_v2_line_event - The actual event being pushed to userspace * @timestamp_ns: best estimate of time of event occurrence, in nanoseconds. - * The timestamp_ns is read from CLOCK_MONOTONIC and is intended to allow the - * accurate measurement of the time between events. It does not provide + * The @timestamp_ns is read from %CLOCK_MONOTONIC and is intended to allow + * the accurate measurement of the time between events. It does not provide * the wall-clock time. - * @id: event identifier with value from enum gpio_v2_line_event_id + * @id: event identifier with value from &enum gpio_v2_line_event_id * @offset: the offset of the line that triggered the event * @seqno: the sequence number for this event in the sequence of events for * all the lines in this line request @@ -291,7 +292,7 @@ struct gpio_v2_line_event { }; /* - * ABI v1 + * ABI v1 * * This version of the ABI is deprecated. * Use the latest version of the ABI, defined above, instead. @@ -314,13 +315,13 @@ struct gpio_v2_line_event { * @flags: various flags for this line * @name: the name of this GPIO line, such as the output pin of the line on the * chip, a rail or a pin header name on a board, as specified by the gpio - * chip, may be empty + * chip, may be empty (i.e. name[0] == '\0') * @consumer: a functional name for the consumer of this GPIO line as set by * whatever is using it, will be empty if there is no current user but may * also be empty if the consumer doesn't set this up * - * This struct is part of ABI v1 and is deprecated. - * Use struct gpio_v2_line_info instead. + * Note: This struct is part of ABI v1 and is deprecated. + * Use &struct gpio_v2_line_info instead. */ struct gpioline_info { __u32 line_offset; @@ -344,17 +345,18 @@ enum { * of a GPIO line * @info: updated line information * @timestamp: estimate of time of status change occurrence, in nanoseconds - * @event_type: one of GPIOLINE_CHANGED_REQUESTED, GPIOLINE_CHANGED_RELEASED - * and GPIOLINE_CHANGED_CONFIG + * @event_type: one of %GPIOLINE_CHANGED_REQUESTED, + * %GPIOLINE_CHANGED_RELEASED and %GPIOLINE_CHANGED_CONFIG + * @padding: reserved for future use * - * Note: struct gpioline_info embedded here has 32-bit alignment on its own, + * The &struct gpioline_info embedded here has 32-bit alignment on its own, * but it works fine with 64-bit alignment too. With its 72 byte size, we can * guarantee there are no implicit holes between it and subsequent members. * The 20-byte padding at the end makes sure we don't add any implicit padding * at the end of the structure on 64-bit architectures. * - * This struct is part of ABI v1 and is deprecated. - * Use struct gpio_v2_line_info_changed instead. + * Note: This struct is part of ABI v1 and is deprecated. + * Use &struct gpio_v2_line_info_changed instead. */ struct gpioline_info_changed { struct gpioline_info info; @@ -378,13 +380,13 @@ struct gpioline_info_changed { * @lineoffsets: an array of desired lines, specified by offset index for the * associated GPIO device * @flags: desired flags for the desired GPIO lines, such as - * GPIOHANDLE_REQUEST_OUTPUT, GPIOHANDLE_REQUEST_ACTIVE_LOW etc, OR:ed + * %GPIOHANDLE_REQUEST_OUTPUT, %GPIOHANDLE_REQUEST_ACTIVE_LOW etc, added * together. Note that even if multiple lines are requested, the same flags * must be applicable to all of them, if you want lines with individual * flags set, request them one by one. It is possible to select * a batch of input or output lines, but they must all have the same * characteristics, i.e. all inputs or all outputs, all active low etc - * @default_values: if the GPIOHANDLE_REQUEST_OUTPUT is set for a requested + * @default_values: if the %GPIOHANDLE_REQUEST_OUTPUT is set for a requested * line, this specifies the default output value, should be 0 (low) or * 1 (high), anything else than 0 or 1 will be interpreted as 1 (high) * @consumer_label: a desired consumer label for the selected GPIO line(s) @@ -392,11 +394,11 @@ struct gpioline_info_changed { * @lines: number of lines requested in this request, i.e. the number of * valid fields in the above arrays, set to 1 to request a single line * @fd: if successful this field will contain a valid anonymous file handle - * after a GPIO_GET_LINEHANDLE_IOCTL operation, zero or negative value + * after a %GPIO_GET_LINEHANDLE_IOCTL operation, zero or negative value * means error * - * This struct is part of ABI v1 and is deprecated. - * Use struct gpio_v2_line_request instead. + * Note: This struct is part of ABI v1 and is deprecated. + * Use &struct gpio_v2_line_request instead. */ struct gpiohandle_request { __u32 lineoffsets[GPIOHANDLES_MAX]; @@ -410,15 +412,15 @@ struct gpiohandle_request { /** * struct gpiohandle_config - Configuration for a GPIO handle request * @flags: updated flags for the requested GPIO lines, such as - * GPIOHANDLE_REQUEST_OUTPUT, GPIOHANDLE_REQUEST_ACTIVE_LOW etc, OR:ed + * %GPIOHANDLE_REQUEST_OUTPUT, %GPIOHANDLE_REQUEST_ACTIVE_LOW etc, added * together - * @default_values: if the GPIOHANDLE_REQUEST_OUTPUT is set in flags, + * @default_values: if the %GPIOHANDLE_REQUEST_OUTPUT is set in flags, * this specifies the default output value, should be 0 (low) or * 1 (high), anything else than 0 or 1 will be interpreted as 1 (high) * @padding: reserved for future use and should be zero filled * - * This struct is part of ABI v1 and is deprecated. - * Use struct gpio_v2_line_config instead. + * Note: This struct is part of ABI v1 and is deprecated. + * Use &struct gpio_v2_line_config instead. */ struct gpiohandle_config { __u32 flags; @@ -432,8 +434,8 @@ struct gpiohandle_config { * state of a line, when setting the state of lines these should contain * the desired target state * - * This struct is part of ABI v1 and is deprecated. - * Use struct gpio_v2_line_values instead. + * Note: This struct is part of ABI v1 and is deprecated. + * Use &struct gpio_v2_line_values instead. */ struct gpiohandle_data { __u8 values[GPIOHANDLES_MAX]; @@ -449,17 +451,17 @@ struct gpiohandle_data { * @lineoffset: the desired line to subscribe to events from, specified by * offset index for the associated GPIO device * @handleflags: desired handle flags for the desired GPIO line, such as - * GPIOHANDLE_REQUEST_ACTIVE_LOW or GPIOHANDLE_REQUEST_OPEN_DRAIN + * %GPIOHANDLE_REQUEST_ACTIVE_LOW or %GPIOHANDLE_REQUEST_OPEN_DRAIN * @eventflags: desired flags for the desired GPIO event line, such as - * GPIOEVENT_REQUEST_RISING_EDGE or GPIOEVENT_REQUEST_FALLING_EDGE + * %GPIOEVENT_REQUEST_RISING_EDGE or %GPIOEVENT_REQUEST_FALLING_EDGE * @consumer_label: a desired consumer label for the selected GPIO line(s) * such as "my-listener" * @fd: if successful this field will contain a valid anonymous file handle - * after a GPIO_GET_LINEEVENT_IOCTL operation, zero or negative value + * after a %GPIO_GET_LINEEVENT_IOCTL operation, zero or negative value * means error * - * This struct is part of ABI v1 and is deprecated. - * Use struct gpio_v2_line_request instead. + * Note: This struct is part of ABI v1 and is deprecated. + * Use &struct gpio_v2_line_request instead. */ struct gpioevent_request { __u32 lineoffset; @@ -469,7 +471,7 @@ struct gpioevent_request { int fd; }; -/** +/* * GPIO event types */ #define GPIOEVENT_EVENT_RISING_EDGE 0x01 @@ -480,8 +482,8 @@ struct gpioevent_request { * @timestamp: best estimate of time of event occurrence, in nanoseconds * @id: event identifier * - * This struct is part of ABI v1 and is deprecated. - * Use struct gpio_v2_line_event instead. + * Note: This struct is part of ABI v1 and is deprecated. + * Use &struct gpio_v2_line_event instead. */ struct gpioevent_data { __u64 timestamp; diff --git a/init/main.c b/init/main.c index 130376ec10ba..20baced721ad 100644 --- a/init/main.c +++ b/init/main.c @@ -269,14 +269,24 @@ static void * __init get_boot_config_from_initrd(u32 *_size, u32 *_csum) u32 size, csum; char *data; u32 *hdr; + int i; if (!initrd_end) return NULL; data = (char *)initrd_end - BOOTCONFIG_MAGIC_LEN; - if (memcmp(data, BOOTCONFIG_MAGIC, BOOTCONFIG_MAGIC_LEN)) - return NULL; + /* + * Since Grub may align the size of initrd to 4, we must + * check the preceding 3 bytes as well. + */ + for (i = 0; i < 4; i++) { + if (!memcmp(data, BOOTCONFIG_MAGIC, BOOTCONFIG_MAGIC_LEN)) + goto found; + data--; + } + return NULL; +found: hdr = (u32 *)(data - 8); size = hdr[0]; csum = hdr[1]; diff --git a/kernel/events/core.c b/kernel/events/core.c index 5a29ab09e72d..dc568ca295bd 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -2312,9 +2312,6 @@ group_sched_out(struct perf_event *group_event, event_sched_out(event, cpuctx, ctx); perf_pmu_enable(ctx->pmu); - - if (group_event->attr.exclusive) - cpuctx->exclusive = 0; } #define DETACH_GROUP 0x01UL @@ -2583,11 +2580,8 @@ group_sched_in(struct perf_event *group_event, pmu->start_txn(pmu, PERF_PMU_TXN_ADD); - if (event_sched_in(group_event, cpuctx, ctx)) { - pmu->cancel_txn(pmu); - perf_mux_hrtimer_restart(cpuctx); - return -EAGAIN; - } + if (event_sched_in(group_event, cpuctx, ctx)) + goto error; /* * Schedule in siblings as one group (if any): @@ -2616,10 +2610,8 @@ group_error: } event_sched_out(group_event, cpuctx, ctx); +error: pmu->cancel_txn(pmu); - - perf_mux_hrtimer_restart(cpuctx); - return -EAGAIN; } @@ -2645,7 +2637,7 @@ static int group_can_go_on(struct perf_event *event, * If this group is exclusive and there are already * events on the CPU, it can't go on. */ - if (event->attr.exclusive && cpuctx->active_oncpu) + if (event->attr.exclusive && !list_empty(get_event_list(event))) return 0; /* * Otherwise, try to add it if all previous groups were able @@ -3679,6 +3671,7 @@ static int merge_sched_in(struct perf_event *event, void *data) *can_add_hw = 0; ctx->rotate_necessary = 1; + perf_mux_hrtimer_restart(cpuctx); } return 0; @@ -6374,14 +6367,13 @@ perf_output_sample_regs(struct perf_output_handle *handle, } static void perf_sample_regs_user(struct perf_regs *regs_user, - struct pt_regs *regs, - struct pt_regs *regs_user_copy) + struct pt_regs *regs) { if (user_mode(regs)) { regs_user->abi = perf_reg_abi(current); regs_user->regs = regs; } else if (!(current->flags & PF_KTHREAD)) { - perf_get_regs_user(regs_user, regs, regs_user_copy); + perf_get_regs_user(regs_user, regs); } else { regs_user->abi = PERF_SAMPLE_REGS_ABI_NONE; regs_user->regs = NULL; @@ -7083,8 +7075,7 @@ void perf_prepare_sample(struct perf_event_header *header, } if (sample_type & (PERF_SAMPLE_REGS_USER | PERF_SAMPLE_STACK_USER)) - perf_sample_regs_user(&data->regs_user, regs, - &data->regs_user_copy); + perf_sample_regs_user(&data->regs_user, regs); if (sample_type & PERF_SAMPLE_REGS_USER) { /* regs dump ABI info */ @@ -7186,6 +7177,7 @@ __perf_event_output(struct perf_event *event, struct perf_sample_data *data, struct pt_regs *regs, int (*output_begin)(struct perf_output_handle *, + struct perf_sample_data *, struct perf_event *, unsigned int)) { @@ -7198,7 +7190,7 @@ __perf_event_output(struct perf_event *event, perf_prepare_sample(&header, data, event, regs); - err = output_begin(&handle, event, header.size); + err = output_begin(&handle, data, event, header.size); if (err) goto exit; @@ -7264,7 +7256,7 @@ perf_event_read_event(struct perf_event *event, int ret; perf_event_header__init_id(&read_event.header, &sample, event); - ret = perf_output_begin(&handle, event, read_event.header.size); + ret = perf_output_begin(&handle, &sample, event, read_event.header.size); if (ret) return; @@ -7533,7 +7525,7 @@ static void perf_event_task_output(struct perf_event *event, perf_event_header__init_id(&task_event->event_id.header, &sample, event); - ret = perf_output_begin(&handle, event, + ret = perf_output_begin(&handle, &sample, event, task_event->event_id.header.size); if (ret) goto out; @@ -7636,7 +7628,7 @@ static void perf_event_comm_output(struct perf_event *event, return; perf_event_header__init_id(&comm_event->event_id.header, &sample, event); - ret = perf_output_begin(&handle, event, + ret = perf_output_begin(&handle, &sample, event, comm_event->event_id.header.size); if (ret) @@ -7736,7 +7728,7 @@ static void perf_event_namespaces_output(struct perf_event *event, perf_event_header__init_id(&namespaces_event->event_id.header, &sample, event); - ret = perf_output_begin(&handle, event, + ret = perf_output_begin(&handle, &sample, event, namespaces_event->event_id.header.size); if (ret) goto out; @@ -7863,7 +7855,7 @@ static void perf_event_cgroup_output(struct perf_event *event, void *data) perf_event_header__init_id(&cgroup_event->event_id.header, &sample, event); - ret = perf_output_begin(&handle, event, + ret = perf_output_begin(&handle, &sample, event, cgroup_event->event_id.header.size); if (ret) goto out; @@ -7989,7 +7981,7 @@ static void perf_event_mmap_output(struct perf_event *event, } perf_event_header__init_id(&mmap_event->event_id.header, &sample, event); - ret = perf_output_begin(&handle, event, + ret = perf_output_begin(&handle, &sample, event, mmap_event->event_id.header.size); if (ret) goto out; @@ -8299,7 +8291,7 @@ void perf_event_aux_event(struct perf_event *event, unsigned long head, int ret; perf_event_header__init_id(&rec.header, &sample, event); - ret = perf_output_begin(&handle, event, rec.header.size); + ret = perf_output_begin(&handle, &sample, event, rec.header.size); if (ret) return; @@ -8333,7 +8325,7 @@ void perf_log_lost_samples(struct perf_event *event, u64 lost) perf_event_header__init_id(&lost_samples_event.header, &sample, event); - ret = perf_output_begin(&handle, event, + ret = perf_output_begin(&handle, &sample, event, lost_samples_event.header.size); if (ret) return; @@ -8388,7 +8380,7 @@ static void perf_event_switch_output(struct perf_event *event, void *data) perf_event_header__init_id(&se->event_id.header, &sample, event); - ret = perf_output_begin(&handle, event, se->event_id.header.size); + ret = perf_output_begin(&handle, &sample, event, se->event_id.header.size); if (ret) return; @@ -8463,7 +8455,7 @@ static void perf_log_throttle(struct perf_event *event, int enable) perf_event_header__init_id(&throttle_event.header, &sample, event); - ret = perf_output_begin(&handle, event, + ret = perf_output_begin(&handle, &sample, event, throttle_event.header.size); if (ret) return; @@ -8506,7 +8498,7 @@ static void perf_event_ksymbol_output(struct perf_event *event, void *data) perf_event_header__init_id(&ksymbol_event->event_id.header, &sample, event); - ret = perf_output_begin(&handle, event, + ret = perf_output_begin(&handle, &sample, event, ksymbol_event->event_id.header.size); if (ret) return; @@ -8596,7 +8588,7 @@ static void perf_event_bpf_output(struct perf_event *event, void *data) perf_event_header__init_id(&bpf_event->event_id.header, &sample, event); - ret = perf_output_begin(&handle, event, + ret = perf_output_begin(&handle, data, event, bpf_event->event_id.header.size); if (ret) return; @@ -8705,7 +8697,8 @@ static void perf_event_text_poke_output(struct perf_event *event, void *data) perf_event_header__init_id(&text_poke_event->event_id.header, &sample, event); - ret = perf_output_begin(&handle, event, text_poke_event->event_id.header.size); + ret = perf_output_begin(&handle, &sample, event, + text_poke_event->event_id.header.size); if (ret) return; @@ -8786,7 +8779,7 @@ static void perf_log_itrace_start(struct perf_event *event) rec.tid = perf_event_tid(event, current); perf_event_header__init_id(&rec.header, &sample, event); - ret = perf_output_begin(&handle, event, rec.header.size); + ret = perf_output_begin(&handle, &sample, event, rec.header.size); if (ret) return; diff --git a/kernel/events/internal.h b/kernel/events/internal.h index fcbf5616a441..228801e20788 100644 --- a/kernel/events/internal.h +++ b/kernel/events/internal.h @@ -205,16 +205,12 @@ DEFINE_OUTPUT_COPY(__output_copy_user, arch_perf_out_copy_user) static inline int get_recursion_context(int *recursion) { - int rctx; - - if (unlikely(in_nmi())) - rctx = 3; - else if (in_irq()) - rctx = 2; - else if (in_softirq()) - rctx = 1; - else - rctx = 0; + unsigned int pc = preempt_count(); + unsigned char rctx = 0; + + rctx += !!(pc & (NMI_MASK)); + rctx += !!(pc & (NMI_MASK | HARDIRQ_MASK)); + rctx += !!(pc & (NMI_MASK | HARDIRQ_MASK | SOFTIRQ_OFFSET)); if (recursion[rctx]) return -1; diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c index 192b8abc6330..ef91ae75ca56 100644 --- a/kernel/events/ring_buffer.c +++ b/kernel/events/ring_buffer.c @@ -147,6 +147,7 @@ ring_buffer_has_space(unsigned long head, unsigned long tail, static __always_inline int __perf_output_begin(struct perf_output_handle *handle, + struct perf_sample_data *data, struct perf_event *event, unsigned int size, bool backward) { @@ -237,18 +238,16 @@ __perf_output_begin(struct perf_output_handle *handle, handle->size = (1UL << page_shift) - offset; if (unlikely(have_lost)) { - struct perf_sample_data sample_data; - lost_event.header.size = sizeof(lost_event); lost_event.header.type = PERF_RECORD_LOST; lost_event.header.misc = 0; lost_event.id = event->id; lost_event.lost = local_xchg(&rb->lost, 0); - perf_event_header__init_id(&lost_event.header, - &sample_data, event); + /* XXX mostly redundant; @data is already fully initializes */ + perf_event_header__init_id(&lost_event.header, data, event); perf_output_put(handle, lost_event); - perf_event__output_id_sample(event, handle, &sample_data); + perf_event__output_id_sample(event, handle, data); } return 0; @@ -263,22 +262,25 @@ out: } int perf_output_begin_forward(struct perf_output_handle *handle, - struct perf_event *event, unsigned int size) + struct perf_sample_data *data, + struct perf_event *event, unsigned int size) { - return __perf_output_begin(handle, event, size, false); + return __perf_output_begin(handle, data, event, size, false); } int perf_output_begin_backward(struct perf_output_handle *handle, + struct perf_sample_data *data, struct perf_event *event, unsigned int size) { - return __perf_output_begin(handle, event, size, true); + return __perf_output_begin(handle, data, event, size, true); } int perf_output_begin(struct perf_output_handle *handle, + struct perf_sample_data *data, struct perf_event *event, unsigned int size) { - return __perf_output_begin(handle, event, size, + return __perf_output_begin(handle, data, event, size, unlikely(is_write_backward(event))); } diff --git a/kernel/futex.c b/kernel/futex.c index ac328874f6e5..00259c7e288e 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -788,8 +788,9 @@ static void put_pi_state(struct futex_pi_state *pi_state) */ if (pi_state->owner) { struct task_struct *owner; + unsigned long flags; - raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock); + raw_spin_lock_irqsave(&pi_state->pi_mutex.wait_lock, flags); owner = pi_state->owner; if (owner) { raw_spin_lock(&owner->pi_lock); @@ -797,7 +798,7 @@ static void put_pi_state(struct futex_pi_state *pi_state) raw_spin_unlock(&owner->pi_lock); } rt_mutex_proxy_unlock(&pi_state->pi_mutex, owner); - raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock); + raw_spin_unlock_irqrestore(&pi_state->pi_mutex.wait_lock, flags); } if (current->pi_state_cache) { diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c index b71ad8d9f1c9..d9fb9e19d2ed 100644 --- a/kernel/locking/lockdep.c +++ b/kernel/locking/lockdep.c @@ -2765,7 +2765,9 @@ print_deadlock_bug(struct task_struct *curr, struct held_lock *prev, * (Note that this has to be done separately, because the graph cannot * detect such classes of deadlocks.) * - * Returns: 0 on deadlock detected, 1 on OK, 2 on recursive read + * Returns: 0 on deadlock detected, 1 on OK, 2 if another lock with the same + * lock class is held but nest_lock is also held, i.e. we rely on the + * nest_lock to avoid the deadlock. */ static int check_deadlock(struct task_struct *curr, struct held_lock *next) @@ -2788,7 +2790,7 @@ check_deadlock(struct task_struct *curr, struct held_lock *next) * lock class (i.e. read_lock(lock)+read_lock(lock)): */ if ((next->read == 2) && prev->read) - return 2; + continue; /* * We're holding the nest_lock, which serializes this lock's @@ -3593,15 +3595,12 @@ static int validate_chain(struct task_struct *curr, if (!ret) return 0; /* - * Mark recursive read, as we jump over it when - * building dependencies (just like we jump over - * trylock entries): - */ - if (ret == 2) - hlock->read = 2; - /* * Add dependency only if this lock is not the head - * of the chain, and if it's not a secondary read-lock: + * of the chain, and if the new lock introduces no more + * lock dependency (because we already hold a lock with the + * same lock class) nor deadlock (because the nest_lock + * serializes nesting locks), see the comments for + * check_deadlock(). */ if (!chain_head && ret != 2) { if (!check_prevs_add(curr, hlock)) diff --git a/kernel/panic.c b/kernel/panic.c index 396142ee43fd..332736a72a58 100644 --- a/kernel/panic.c +++ b/kernel/panic.c @@ -605,7 +605,8 @@ void __warn(const char *file, int line, void *caller, unsigned taint, panic("panic_on_warn set ...\n"); } - dump_stack(); + if (!regs) + dump_stack(); print_irqtrace_events(current); diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 2a52f42f64b6..bd04b09b84b3 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -4077,7 +4077,6 @@ void rcu_cpu_starting(unsigned int cpu) smp_mb(); /* Ensure RCU read-side usage follows above initialization. */ } -#ifdef CONFIG_HOTPLUG_CPU /* * The outgoing function has no further need of RCU, so remove it from * the rcu_node tree's ->qsmaskinitnext bit masks. @@ -4117,6 +4116,7 @@ void rcu_report_dead(unsigned int cpu) rdp->cpu_started = false; } +#ifdef CONFIG_HOTPLUG_CPU /* * The outgoing CPU has just passed through the dying-idle state, and we * are being invoked from the CPU that was IPIed to continue the offline diff --git a/kernel/reboot.c b/kernel/reboot.c index e7b78d5ae1ab..af6f23d8bea1 100644 --- a/kernel/reboot.c +++ b/kernel/reboot.c @@ -551,22 +551,22 @@ static int __init reboot_setup(char *str) break; case 's': - { - int rc; - - if (isdigit(*(str+1))) { - rc = kstrtoint(str+1, 0, &reboot_cpu); - if (rc) - return rc; - } else if (str[1] == 'm' && str[2] == 'p' && - isdigit(*(str+3))) { - rc = kstrtoint(str+3, 0, &reboot_cpu); - if (rc) - return rc; - } else + if (isdigit(*(str+1))) + reboot_cpu = simple_strtoul(str+1, NULL, 0); + else if (str[1] == 'm' && str[2] == 'p' && + isdigit(*(str+3))) + reboot_cpu = simple_strtoul(str+3, NULL, 0); + else *mode = REBOOT_SOFT; + if (reboot_cpu >= num_possible_cpus()) { + pr_err("Ignoring the CPU number in reboot= option. " + "CPU %d exceeds possible cpu number %d\n", + reboot_cpu, num_possible_cpus()); + reboot_cpu = 0; + break; + } break; - } + case 'g': *mode = REBOOT_GPIO; break; diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c index 0655524700d2..2357921580f9 100644 --- a/kernel/sched/debug.c +++ b/kernel/sched/debug.c @@ -251,7 +251,7 @@ static int sd_ctl_doflags(struct ctl_table *table, int write, unsigned long flags = *(unsigned long *)table->data; size_t data_size = 0; size_t len = 0; - char *tmp; + char *tmp, *buf; int idx; if (write) @@ -269,17 +269,17 @@ static int sd_ctl_doflags(struct ctl_table *table, int write, return 0; } - tmp = kcalloc(data_size + 1, sizeof(*tmp), GFP_KERNEL); - if (!tmp) + buf = kcalloc(data_size + 1, sizeof(*buf), GFP_KERNEL); + if (!buf) return -ENOMEM; for_each_set_bit(idx, &flags, __SD_FLAG_CNT) { char *name = sd_flag_debug[idx].name; - len += snprintf(tmp + len, strlen(name) + 2, "%s ", name); + len += snprintf(buf + len, strlen(name) + 2, "%s ", name); } - tmp += *ppos; + tmp = buf + *ppos; len -= *ppos; if (len > *lenp) @@ -294,7 +294,7 @@ static int sd_ctl_doflags(struct ctl_table *table, int write, *lenp = len; *ppos += len; - kfree(tmp); + kfree(buf); return 0; } diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 290f9e38378c..8917d2d715ef 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -6172,21 +6172,21 @@ static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, int t static int select_idle_capacity(struct task_struct *p, struct sched_domain *sd, int target) { - unsigned long best_cap = 0; + unsigned long task_util, best_cap = 0; int cpu, best_cpu = -1; struct cpumask *cpus; - sync_entity_load_avg(&p->se); - cpus = this_cpu_cpumask_var_ptr(select_idle_mask); cpumask_and(cpus, sched_domain_span(sd), p->cpus_ptr); + task_util = uclamp_task_util(p); + for_each_cpu_wrap(cpu, cpus, target) { unsigned long cpu_cap = capacity_of(cpu); if (!available_idle_cpu(cpu) && !sched_idle_cpu(cpu)) continue; - if (task_fits_capacity(p, cpu_cap)) + if (fits_capacity(task_util, cpu_cap)) return cpu; if (cpu_cap > best_cap) { @@ -6198,44 +6198,42 @@ select_idle_capacity(struct task_struct *p, struct sched_domain *sd, int target) return best_cpu; } +static inline bool asym_fits_capacity(int task_util, int cpu) +{ + if (static_branch_unlikely(&sched_asym_cpucapacity)) + return fits_capacity(task_util, capacity_of(cpu)); + + return true; +} + /* * Try and locate an idle core/thread in the LLC cache domain. */ static int select_idle_sibling(struct task_struct *p, int prev, int target) { struct sched_domain *sd; + unsigned long task_util; int i, recent_used_cpu; /* - * For asymmetric CPU capacity systems, our domain of interest is - * sd_asym_cpucapacity rather than sd_llc. + * On asymmetric system, update task utilization because we will check + * that the task fits with cpu's capacity. */ if (static_branch_unlikely(&sched_asym_cpucapacity)) { - sd = rcu_dereference(per_cpu(sd_asym_cpucapacity, target)); - /* - * On an asymmetric CPU capacity system where an exclusive - * cpuset defines a symmetric island (i.e. one unique - * capacity_orig value through the cpuset), the key will be set - * but the CPUs within that cpuset will not have a domain with - * SD_ASYM_CPUCAPACITY. These should follow the usual symmetric - * capacity path. - */ - if (!sd) - goto symmetric; - - i = select_idle_capacity(p, sd, target); - return ((unsigned)i < nr_cpumask_bits) ? i : target; + sync_entity_load_avg(&p->se); + task_util = uclamp_task_util(p); } -symmetric: - if (available_idle_cpu(target) || sched_idle_cpu(target)) + if ((available_idle_cpu(target) || sched_idle_cpu(target)) && + asym_fits_capacity(task_util, target)) return target; /* * If the previous CPU is cache affine and idle, don't be stupid: */ if (prev != target && cpus_share_cache(prev, target) && - (available_idle_cpu(prev) || sched_idle_cpu(prev))) + (available_idle_cpu(prev) || sched_idle_cpu(prev)) && + asym_fits_capacity(task_util, prev)) return prev; /* @@ -6258,7 +6256,8 @@ symmetric: recent_used_cpu != target && cpus_share_cache(recent_used_cpu, target) && (available_idle_cpu(recent_used_cpu) || sched_idle_cpu(recent_used_cpu)) && - cpumask_test_cpu(p->recent_used_cpu, p->cpus_ptr)) { + cpumask_test_cpu(p->recent_used_cpu, p->cpus_ptr) && + asym_fits_capacity(task_util, recent_used_cpu)) { /* * Replace recent_used_cpu with prev as it is a potential * candidate for the next wake: @@ -6267,6 +6266,26 @@ symmetric: return recent_used_cpu; } + /* + * For asymmetric CPU capacity systems, our domain of interest is + * sd_asym_cpucapacity rather than sd_llc. + */ + if (static_branch_unlikely(&sched_asym_cpucapacity)) { + sd = rcu_dereference(per_cpu(sd_asym_cpucapacity, target)); + /* + * On an asymmetric CPU capacity system where an exclusive + * cpuset defines a symmetric island (i.e. one unique + * capacity_orig value through the cpuset), the key will be set + * but the CPUs within that cpuset will not have a domain with + * SD_ASYM_CPUCAPACITY. These should follow the usual symmetric + * capacity path. + */ + if (sd) { + i = select_idle_capacity(p, sd, target); + return ((unsigned)i < nr_cpumask_bits) ? i : target; + } + } + sd = rcu_dereference(per_cpu(sd_llc, target)); if (!sd) return target; @@ -9031,7 +9050,8 @@ static inline void calculate_imbalance(struct lb_env *env, struct sd_lb_stats *s * emptying busiest. */ if (local->group_type == group_has_spare) { - if (busiest->group_type > group_fully_busy) { + if ((busiest->group_type > group_fully_busy) && + !(env->sd->flags & SD_SHARE_PKG_RESOURCES)) { /* * If busiest is overloaded, try to fill spare * capacity. This might end up creating spare capacity diff --git a/kernel/watchdog.c b/kernel/watchdog.c index 5abb5b22ad13..71109065bd8e 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c @@ -44,8 +44,6 @@ int __read_mostly soft_watchdog_user_enabled = 1; int __read_mostly watchdog_thresh = 10; static int __read_mostly nmi_watchdog_available; -static struct cpumask watchdog_allowed_mask __read_mostly; - struct cpumask watchdog_cpumask __read_mostly; unsigned long *watchdog_cpumask_bits = cpumask_bits(&watchdog_cpumask); @@ -162,6 +160,8 @@ static void lockup_detector_update_enable(void) int __read_mostly sysctl_softlockup_all_cpu_backtrace; #endif +static struct cpumask watchdog_allowed_mask __read_mostly; + /* Global variables, exported for sysctl */ unsigned int __read_mostly softlockup_panic = CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC_VALUE; diff --git a/mm/compaction.c b/mm/compaction.c index 6e0ee5641788..13cb7a961b31 100644 --- a/mm/compaction.c +++ b/mm/compaction.c @@ -817,6 +817,10 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn, * delay for some time until fewer pages are isolated */ while (unlikely(too_many_isolated(pgdat))) { + /* stop isolation if there are still pages not migrated */ + if (cc->nr_migratepages) + return 0; + /* async migration should just abort */ if (cc->mode == MIGRATE_ASYNC) return 0; @@ -1012,8 +1016,8 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn, isolate_success: list_add(&page->lru, &cc->migratepages); - cc->nr_migratepages++; - nr_isolated++; + cc->nr_migratepages += compound_nr(page); + nr_isolated += compound_nr(page); /* * Avoid isolating too much unless this block is being @@ -1021,7 +1025,7 @@ isolate_success: * or a lock is contended. For contention, isolate quickly to * potentially remove one source of contention. */ - if (cc->nr_migratepages == COMPACT_CLUSTER_MAX && + if (cc->nr_migratepages >= COMPACT_CLUSTER_MAX && !cc->rescan && !cc->contended) { ++low_pfn; break; @@ -1132,7 +1136,7 @@ isolate_migratepages_range(struct compact_control *cc, unsigned long start_pfn, if (!pfn) break; - if (cc->nr_migratepages == COMPACT_CLUSTER_MAX) + if (cc->nr_migratepages >= COMPACT_CLUSTER_MAX) break; } @@ -1647,8 +1647,11 @@ check_again: /* * drop the above get_user_pages reference. */ - for (i = 0; i < nr_pages; i++) - put_page(pages[i]); + if (gup_flags & FOLL_PIN) + unpin_user_pages(pages, nr_pages); + else + for (i = 0; i < nr_pages; i++) + put_page(pages[i]); if (migrate_pages(&cma_page_list, alloc_migration_target, NULL, (unsigned long)&mtc, MIGRATE_SYNC, MR_CONTIG_RANGE)) { @@ -1728,8 +1731,11 @@ static long __gup_longterm_locked(struct mm_struct *mm, goto out; if (check_dax_vmas(vmas_tmp, rc)) { - for (i = 0; i < rc; i++) - put_page(pages[i]); + if (gup_flags & FOLL_PIN) + unpin_user_pages(pages, rc); + else + for (i = 0; i < rc; i++) + put_page(pages[i]); rc = -EOPNOTSUPP; goto out; } diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 5a620f690911..37f15c3c24dc 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -1568,103 +1568,23 @@ int PageHeadHuge(struct page *page_head) } /* - * Find address_space associated with hugetlbfs page. - * Upon entry page is locked and page 'was' mapped although mapped state - * could change. If necessary, use anon_vma to find vma and associated - * address space. The returned mapping may be stale, but it can not be - * invalid as page lock (which is held) is required to destroy mapping. - */ -static struct address_space *_get_hugetlb_page_mapping(struct page *hpage) -{ - struct anon_vma *anon_vma; - pgoff_t pgoff_start, pgoff_end; - struct anon_vma_chain *avc; - struct address_space *mapping = page_mapping(hpage); - - /* Simple file based mapping */ - if (mapping) - return mapping; - - /* - * Even anonymous hugetlbfs mappings are associated with an - * underlying hugetlbfs file (see hugetlb_file_setup in mmap - * code). Find a vma associated with the anonymous vma, and - * use the file pointer to get address_space. - */ - anon_vma = page_lock_anon_vma_read(hpage); - if (!anon_vma) - return mapping; /* NULL */ - - /* Use first found vma */ - pgoff_start = page_to_pgoff(hpage); - pgoff_end = pgoff_start + pages_per_huge_page(page_hstate(hpage)) - 1; - anon_vma_interval_tree_foreach(avc, &anon_vma->rb_root, - pgoff_start, pgoff_end) { - struct vm_area_struct *vma = avc->vma; - - mapping = vma->vm_file->f_mapping; - break; - } - - anon_vma_unlock_read(anon_vma); - return mapping; -} - -/* * Find and lock address space (mapping) in write mode. * - * Upon entry, the page is locked which allows us to find the mapping - * even in the case of an anon page. However, locking order dictates - * the i_mmap_rwsem be acquired BEFORE the page lock. This is hugetlbfs - * specific. So, we first try to lock the sema while still holding the - * page lock. If this works, great! If not, then we need to drop the - * page lock and then acquire i_mmap_rwsem and reacquire page lock. Of - * course, need to revalidate state along the way. + * Upon entry, the page is locked which means that page_mapping() is + * stable. Due to locking order, we can only trylock_write. If we can + * not get the lock, simply return NULL to caller. */ struct address_space *hugetlb_page_mapping_lock_write(struct page *hpage) { - struct address_space *mapping, *mapping2; + struct address_space *mapping = page_mapping(hpage); - mapping = _get_hugetlb_page_mapping(hpage); -retry: if (!mapping) return mapping; - /* - * If no contention, take lock and return - */ if (i_mmap_trylock_write(mapping)) return mapping; - /* - * Must drop page lock and wait on mapping sema. - * Note: Once page lock is dropped, mapping could become invalid. - * As a hack, increase map count until we lock page again. - */ - atomic_inc(&hpage->_mapcount); - unlock_page(hpage); - i_mmap_lock_write(mapping); - lock_page(hpage); - atomic_add_negative(-1, &hpage->_mapcount); - - /* verify page is still mapped */ - if (!page_mapped(hpage)) { - i_mmap_unlock_write(mapping); - return NULL; - } - - /* - * Get address space again and verify it is the same one - * we locked. If not, drop lock and retry. - */ - mapping2 = _get_hugetlb_page_mapping(hpage); - if (mapping2 != mapping) { - i_mmap_unlock_write(mapping); - mapping = mapping2; - goto retry; - } - - return mapping; + return NULL; } pgoff_t __basepage_index(struct page *page) diff --git a/mm/memory-failure.c b/mm/memory-failure.c index c0bb186bba62..5d880d4eb9a2 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -1057,27 +1057,25 @@ static bool hwpoison_user_mappings(struct page *p, unsigned long pfn, if (!PageHuge(hpage)) { unmap_success = try_to_unmap(hpage, ttu); } else { - /* - * For hugetlb pages, try_to_unmap could potentially call - * huge_pmd_unshare. Because of this, take semaphore in - * write mode here and set TTU_RMAP_LOCKED to indicate we - * have taken the lock at this higer level. - * - * Note that the call to hugetlb_page_mapping_lock_write - * is necessary even if mapping is already set. It handles - * ugliness of potentially having to drop page lock to obtain - * i_mmap_rwsem. - */ - mapping = hugetlb_page_mapping_lock_write(hpage); - - if (mapping) { - unmap_success = try_to_unmap(hpage, + if (!PageAnon(hpage)) { + /* + * For hugetlb pages in shared mappings, try_to_unmap + * could potentially call huge_pmd_unshare. Because of + * this, take semaphore in write mode here and set + * TTU_RMAP_LOCKED to indicate we have taken the lock + * at this higer level. + */ + mapping = hugetlb_page_mapping_lock_write(hpage); + if (mapping) { + unmap_success = try_to_unmap(hpage, ttu|TTU_RMAP_LOCKED); - i_mmap_unlock_write(mapping); + i_mmap_unlock_write(mapping); + } else { + pr_info("Memory failure: %#lx: could not lock mapping for mapped huge page\n", pfn); + unmap_success = false; + } } else { - pr_info("Memory failure: %#lx: could not find mapping for mapped huge page\n", - pfn); - unmap_success = false; + unmap_success = try_to_unmap(hpage, ttu); } } if (!unmap_success) diff --git a/mm/migrate.c b/mm/migrate.c index 5ca5842df5db..5795cb82e27c 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -1328,34 +1328,38 @@ static int unmap_and_move_huge_page(new_page_t get_new_page, goto put_anon; if (page_mapped(hpage)) { - /* - * try_to_unmap could potentially call huge_pmd_unshare. - * Because of this, take semaphore in write mode here and - * set TTU_RMAP_LOCKED to let lower levels know we have - * taken the lock. - */ - mapping = hugetlb_page_mapping_lock_write(hpage); - if (unlikely(!mapping)) - goto unlock_put_anon; + bool mapping_locked = false; + enum ttu_flags ttu = TTU_MIGRATION|TTU_IGNORE_MLOCK| + TTU_IGNORE_ACCESS; + + if (!PageAnon(hpage)) { + /* + * In shared mappings, try_to_unmap could potentially + * call huge_pmd_unshare. Because of this, take + * semaphore in write mode here and set TTU_RMAP_LOCKED + * to let lower levels know we have taken the lock. + */ + mapping = hugetlb_page_mapping_lock_write(hpage); + if (unlikely(!mapping)) + goto unlock_put_anon; + + mapping_locked = true; + ttu |= TTU_RMAP_LOCKED; + } - try_to_unmap(hpage, - TTU_MIGRATION|TTU_IGNORE_MLOCK|TTU_IGNORE_ACCESS| - TTU_RMAP_LOCKED); + try_to_unmap(hpage, ttu); page_was_mapped = 1; - /* - * Leave mapping locked until after subsequent call to - * remove_migration_ptes() - */ + + if (mapping_locked) + i_mmap_unlock_write(mapping); } if (!page_mapped(hpage)) rc = move_to_new_page(new_hpage, hpage, mode); - if (page_was_mapped) { + if (page_was_mapped) remove_migration_ptes(hpage, - rc == MIGRATEPAGE_SUCCESS ? new_hpage : hpage, true); - i_mmap_unlock_write(mapping); - } + rc == MIGRATEPAGE_SUCCESS ? new_hpage : hpage, false); unlock_put_anon: unlock_page(new_hpage); diff --git a/mm/percpu.c b/mm/percpu.c index 66a93f096394..ad7a37ee74ef 100644 --- a/mm/percpu.c +++ b/mm/percpu.c @@ -1315,8 +1315,8 @@ static struct pcpu_chunk * __init pcpu_alloc_first_chunk(unsigned long tmp_addr, region_size = ALIGN(start_offset + map_size, lcm_align); /* allocate chunk */ - alloc_size = sizeof(struct pcpu_chunk) + - BITS_TO_LONGS(region_size >> PAGE_SHIFT) * sizeof(unsigned long); + alloc_size = struct_size(chunk, populated, + BITS_TO_LONGS(region_size >> PAGE_SHIFT)); chunk = memblock_alloc(alloc_size, SMP_CACHE_BYTES); if (!chunk) panic("%s: Failed to allocate %zu bytes\n", __func__, @@ -2521,8 +2521,8 @@ void __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai, pcpu_unit_pages = ai->unit_size >> PAGE_SHIFT; pcpu_unit_size = pcpu_unit_pages << PAGE_SHIFT; pcpu_atom_size = ai->atom_size; - pcpu_chunk_struct_size = sizeof(struct pcpu_chunk) + - BITS_TO_LONGS(pcpu_unit_pages) * sizeof(unsigned long); + pcpu_chunk_struct_size = struct_size(chunk, populated, + BITS_TO_LONGS(pcpu_unit_pages)); pcpu_stats_save_ai(ai); diff --git a/mm/rmap.c b/mm/rmap.c index 1b84945d655c..31b29321adfe 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -1413,9 +1413,6 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma, /* * If sharing is possible, start and end will be adjusted * accordingly. - * - * If called for a huge page, caller must hold i_mmap_rwsem - * in write mode as it is possible to call huge_pmd_unshare. */ adjust_range_if_pmd_sharing_possible(vma, &range.start, &range.end); @@ -1462,7 +1459,7 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma, subpage = page - page_to_pfn(page) + pte_pfn(*pvmw.pte); address = pvmw.address; - if (PageHuge(page)) { + if (PageHuge(page) && !PageAnon(page)) { /* * To call huge_pmd_unshare, i_mmap_rwsem must be * held in write mode. Caller needs to explicitly diff --git a/mm/slub.c b/mm/slub.c index b30be2385d1c..34dcc09e2ec9 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -2852,7 +2852,7 @@ redo: object = c->freelist; page = c->page; - if (unlikely(!object || !node_match(page, node))) { + if (unlikely(!object || !page || !node_match(page, node))) { object = __slab_alloc(s, gfpflags, node, addr, c); } else { void *next_object = get_freepointer_safe(s, object); diff --git a/mm/vmscan.c b/mm/vmscan.c index 1b8f0e059767..7b4e31eac2cf 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -1516,7 +1516,8 @@ unsigned int reclaim_clean_pages_from_list(struct zone *zone, nr_reclaimed = shrink_page_list(&clean_pages, zone->zone_pgdat, &sc, TTU_IGNORE_ACCESS, &stat, true); list_splice(&clean_pages, page_list); - mod_node_page_state(zone->zone_pgdat, NR_ISOLATED_FILE, -nr_reclaimed); + mod_node_page_state(zone->zone_pgdat, NR_ISOLATED_FILE, + -(long)nr_reclaimed); /* * Since lazyfree pages are isolated from file LRU from the beginning, * they will rotate back to anonymous LRU in the end if it failed to @@ -1526,7 +1527,7 @@ unsigned int reclaim_clean_pages_from_list(struct zone *zone, mod_node_page_state(zone->zone_pgdat, NR_ISOLATED_ANON, stat.nr_lazyfree_fail); mod_node_page_state(zone->zone_pgdat, NR_ISOLATED_FILE, - -stat.nr_lazyfree_fail); + -(long)stat.nr_lazyfree_fail); return nr_reclaimed; } diff --git a/security/selinux/ibpkey.c b/security/selinux/ibpkey.c index f68a7617cfb9..3a63a989e55e 100644 --- a/security/selinux/ibpkey.c +++ b/security/selinux/ibpkey.c @@ -151,8 +151,10 @@ static int sel_ib_pkey_sid_slow(u64 subnet_prefix, u16 pkey_num, u32 *sid) * is valid, it just won't be added to the cache. */ new = kzalloc(sizeof(*new), GFP_ATOMIC); - if (!new) + if (!new) { + ret = -ENOMEM; goto out; + } new->psec.subnet_prefix = subnet_prefix; new->psec.pkey = pkey_num; diff --git a/tools/testing/selftests/kvm/aarch64/get-reg-list.c b/tools/testing/selftests/kvm/aarch64/get-reg-list.c index 33218a395d9f..486932164cf2 100644 --- a/tools/testing/selftests/kvm/aarch64/get-reg-list.c +++ b/tools/testing/selftests/kvm/aarch64/get-reg-list.c @@ -42,12 +42,16 @@ #define for_each_reg(i) \ for ((i) = 0; (i) < reg_list->n; ++(i)) +#define for_each_reg_filtered(i) \ + for_each_reg(i) \ + if (!filter_reg(reg_list->reg[i])) + #define for_each_missing_reg(i) \ for ((i) = 0; (i) < blessed_n; ++(i)) \ if (!find_reg(reg_list->reg, reg_list->n, blessed_reg[i])) #define for_each_new_reg(i) \ - for ((i) = 0; (i) < reg_list->n; ++(i)) \ + for_each_reg_filtered(i) \ if (!find_reg(blessed_reg, blessed_n, reg_list->reg[i])) @@ -57,6 +61,18 @@ static __u64 base_regs[], vregs[], sve_regs[], rejects_set[]; static __u64 base_regs_n, vregs_n, sve_regs_n, rejects_set_n; static __u64 *blessed_reg, blessed_n; +static bool filter_reg(__u64 reg) +{ + /* + * DEMUX register presence depends on the host's CLIDR_EL1. + * This means there's no set of them that we can bless. + */ + if ((reg & KVM_REG_ARM_COPROC_MASK) == KVM_REG_ARM_DEMUX) + return true; + + return false; +} + static bool find_reg(__u64 regs[], __u64 nr_regs, __u64 reg) { int i; @@ -325,7 +341,7 @@ int main(int ac, char **av) struct kvm_vcpu_init init = { .target = -1, }; int new_regs = 0, missing_regs = 0, i; int failed_get = 0, failed_set = 0, failed_reject = 0; - bool print_list = false, fixup_core_regs = false; + bool print_list = false, print_filtered = false, fixup_core_regs = false; struct kvm_vm *vm; __u64 *vec_regs; @@ -336,8 +352,10 @@ int main(int ac, char **av) fixup_core_regs = true; else if (strcmp(av[i], "--list") == 0) print_list = true; + else if (strcmp(av[i], "--list-filtered") == 0) + print_filtered = true; else - fprintf(stderr, "Ignoring unknown option: %s\n", av[i]); + TEST_FAIL("Unknown option: %s\n", av[i]); } vm = vm_create(VM_MODE_DEFAULT, DEFAULT_GUEST_PHY_PAGES, O_RDWR); @@ -350,10 +368,14 @@ int main(int ac, char **av) if (fixup_core_regs) core_reg_fixup(); - if (print_list) { + if (print_list || print_filtered) { putchar('\n'); - for_each_reg(i) - print_reg(reg_list->reg[i]); + for_each_reg(i) { + __u64 id = reg_list->reg[i]; + if ((print_list && !filter_reg(id)) || + (print_filtered && filter_reg(id))) + print_reg(id); + } putchar('\n'); return 0; } @@ -458,6 +480,8 @@ int main(int ac, char **av) /* * The current blessed list was primed with the output of kernel version * v4.15 with --core-reg-fixup and then later updated with new registers. + * + * The blessed list is up to date with kernel version v5.10-rc5 */ static __u64 base_regs[] = { KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[0]), @@ -736,9 +760,6 @@ static __u64 base_regs[] = { ARM64_SYS_REG(3, 4, 3, 0, 0), /* DACR32_EL2 */ ARM64_SYS_REG(3, 4, 5, 0, 1), /* IFSR32_EL2 */ ARM64_SYS_REG(3, 4, 5, 3, 0), /* FPEXC32_EL2 */ - KVM_REG_ARM64 | KVM_REG_SIZE_U32 | KVM_REG_ARM_DEMUX | KVM_REG_ARM_DEMUX_ID_CCSIDR | 0, - KVM_REG_ARM64 | KVM_REG_SIZE_U32 | KVM_REG_ARM_DEMUX | KVM_REG_ARM_DEMUX_ID_CCSIDR | 1, - KVM_REG_ARM64 | KVM_REG_SIZE_U32 | KVM_REG_ARM_DEMUX | KVM_REG_ARM_DEMUX_ID_CCSIDR | 2, }; static __u64 base_regs_n = ARRAY_SIZE(base_regs); |