diff options
author | Paolo Bonzini | 2020-10-20 08:14:25 -0400 |
---|---|---|
committer | Paolo Bonzini | 2020-10-20 08:14:25 -0400 |
commit | 1b21c8db0e3b71523ada0cf568372ebfcf0d3466 (patch) | |
tree | 70e537fadc469c5fddabbdf116ea6090baaf4cf8 /arch/arm64/include | |
parent | 628ade2d0816b2675ab61ba6aadfc9a94e3e1589 (diff) | |
parent | 4e5dc64c43192b4fd4c96ac150a8f013065f5f5b (diff) |
Merge tag 'kvmarm-5.10' of git://git.kernel.org/pub/scm/linux/kernel/git/kvmarm/kvmarm into HEAD
KVM/arm64 updates for Linux 5.10
- New page table code for both hypervisor and guest stage-2
- Introduction of a new EL2-private host context
- Allow EL2 to have its own private per-CPU variables
- Support of PMU event filtering
- Complete rework of the Spectre mitigation
Diffstat (limited to 'arch/arm64/include')
-rw-r--r-- | arch/arm64/include/asm/assembler.h | 29 | ||||
-rw-r--r-- | arch/arm64/include/asm/cpucaps.h | 4 | ||||
-rw-r--r-- | arch/arm64/include/asm/cpufeature.h | 24 | ||||
-rw-r--r-- | arch/arm64/include/asm/hyp_image.h | 36 | ||||
-rw-r--r-- | arch/arm64/include/asm/kvm_asm.h | 192 | ||||
-rw-r--r-- | arch/arm64/include/asm/kvm_emulate.h | 14 | ||||
-rw-r--r-- | arch/arm64/include/asm/kvm_host.h | 75 | ||||
-rw-r--r-- | arch/arm64/include/asm/kvm_hyp.h | 9 | ||||
-rw-r--r-- | arch/arm64/include/asm/kvm_mmu.h | 341 | ||||
-rw-r--r-- | arch/arm64/include/asm/kvm_pgtable.h | 309 | ||||
-rw-r--r-- | arch/arm64/include/asm/kvm_ptrauth.h | 6 | ||||
-rw-r--r-- | arch/arm64/include/asm/mmu.h | 11 | ||||
-rw-r--r-- | arch/arm64/include/asm/percpu.h | 28 | ||||
-rw-r--r-- | arch/arm64/include/asm/pgtable-hwdef.h | 24 | ||||
-rw-r--r-- | arch/arm64/include/asm/pgtable-prot.h | 19 | ||||
-rw-r--r-- | arch/arm64/include/asm/processor.h | 44 | ||||
-rw-r--r-- | arch/arm64/include/asm/spectre.h | 32 | ||||
-rw-r--r-- | arch/arm64/include/asm/stage2_pgtable.h | 215 | ||||
-rw-r--r-- | arch/arm64/include/uapi/asm/kvm.h | 25 |
19 files changed, 643 insertions, 794 deletions
diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h index 54d181177656..ddbe6bf00e33 100644 --- a/arch/arm64/include/asm/assembler.h +++ b/arch/arm64/include/asm/assembler.h @@ -219,6 +219,23 @@ lr .req x30 // link register .endm /* + * @dst: destination register + */ +#if defined(__KVM_NVHE_HYPERVISOR__) || defined(__KVM_VHE_HYPERVISOR__) + .macro this_cpu_offset, dst + mrs \dst, tpidr_el2 + .endm +#else + .macro this_cpu_offset, dst +alternative_if_not ARM64_HAS_VIRT_HOST_EXTN + mrs \dst, tpidr_el1 +alternative_else + mrs \dst, tpidr_el2 +alternative_endif + .endm +#endif + + /* * @dst: Result of per_cpu(sym, smp_processor_id()) (can be SP) * @sym: The name of the per-cpu variable * @tmp: scratch register @@ -226,11 +243,7 @@ lr .req x30 // link register .macro adr_this_cpu, dst, sym, tmp adrp \tmp, \sym add \dst, \tmp, #:lo12:\sym -alternative_if_not ARM64_HAS_VIRT_HOST_EXTN - mrs \tmp, tpidr_el1 -alternative_else - mrs \tmp, tpidr_el2 -alternative_endif + this_cpu_offset \tmp add \dst, \dst, \tmp .endm @@ -241,11 +254,7 @@ alternative_endif */ .macro ldr_this_cpu dst, sym, tmp adr_l \dst, \sym -alternative_if_not ARM64_HAS_VIRT_HOST_EXTN - mrs \tmp, tpidr_el1 -alternative_else - mrs \tmp, tpidr_el2 -alternative_endif + this_cpu_offset \tmp ldr \dst, [\dst, \tmp] .endm diff --git a/arch/arm64/include/asm/cpucaps.h b/arch/arm64/include/asm/cpucaps.h index 07b643a70710..c4ac9a13ad5f 100644 --- a/arch/arm64/include/asm/cpucaps.h +++ b/arch/arm64/include/asm/cpucaps.h @@ -31,13 +31,13 @@ #define ARM64_HAS_DCPOP 21 #define ARM64_SVE 22 #define ARM64_UNMAP_KERNEL_AT_EL0 23 -#define ARM64_HARDEN_BRANCH_PREDICTOR 24 +#define ARM64_SPECTRE_V2 24 #define ARM64_HAS_RAS_EXTN 25 #define ARM64_WORKAROUND_843419 26 #define ARM64_HAS_CACHE_IDC 27 #define ARM64_HAS_CACHE_DIC 28 #define ARM64_HW_DBM 29 -#define ARM64_SSBD 30 +#define ARM64_SPECTRE_V4 30 #define ARM64_MISMATCHED_CACHE_TYPE 31 #define ARM64_HAS_STAGE2_FWB 32 #define ARM64_HAS_CRC32 33 diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index 89b4f0142c28..fba6700b457b 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -698,30 +698,6 @@ static inline bool system_supports_tlb_range(void) cpus_have_const_cap(ARM64_HAS_TLB_RANGE); } -#define ARM64_BP_HARDEN_UNKNOWN -1 -#define ARM64_BP_HARDEN_WA_NEEDED 0 -#define ARM64_BP_HARDEN_NOT_REQUIRED 1 - -int get_spectre_v2_workaround_state(void); - -#define ARM64_SSBD_UNKNOWN -1 -#define ARM64_SSBD_FORCE_DISABLE 0 -#define ARM64_SSBD_KERNEL 1 -#define ARM64_SSBD_FORCE_ENABLE 2 -#define ARM64_SSBD_MITIGATED 3 - -static inline int arm64_get_ssbd_state(void) -{ -#ifdef CONFIG_ARM64_SSBD - extern int ssbd_state; - return ssbd_state; -#else - return ARM64_SSBD_UNKNOWN; -#endif -} - -void arm64_set_ssbd_mitigation(bool state); - extern int do_emulate_mrs(struct pt_regs *regs, u32 sys_reg, u32 rt); static inline u32 id_aa64mmfr0_parange_to_phys_shift(int parange) diff --git a/arch/arm64/include/asm/hyp_image.h b/arch/arm64/include/asm/hyp_image.h new file mode 100644 index 000000000000..daa1a1da539e --- /dev/null +++ b/arch/arm64/include/asm/hyp_image.h @@ -0,0 +1,36 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2020 Google LLC. + * Written by David Brazdil <dbrazdil@google.com> + */ + +#ifndef __ARM64_HYP_IMAGE_H__ +#define __ARM64_HYP_IMAGE_H__ + +/* + * KVM nVHE code has its own symbol namespace prefixed with __kvm_nvhe_, + * to separate it from the kernel proper. + */ +#define kvm_nvhe_sym(sym) __kvm_nvhe_##sym + +#ifdef LINKER_SCRIPT + +/* + * KVM nVHE ELF section names are prefixed with .hyp, to separate them + * from the kernel proper. + */ +#define HYP_SECTION_NAME(NAME) .hyp##NAME + +/* Defines an ELF hyp section from input section @NAME and its subsections. */ +#define HYP_SECTION(NAME) \ + HYP_SECTION_NAME(NAME) : { *(NAME NAME##.*) } + +/* + * Defines a linker script alias of a kernel-proper symbol referenced by + * KVM nVHE hyp code. + */ +#define KVM_NVHE_ALIAS(sym) kvm_nvhe_sym(sym) = sym; + +#endif /* LINKER_SCRIPT */ + +#endif /* __ARM64_HYP_IMAGE_H__ */ diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h index 6f98fbd0ac81..54387ccd1ab2 100644 --- a/arch/arm64/include/asm/kvm_asm.h +++ b/arch/arm64/include/asm/kvm_asm.h @@ -7,11 +7,9 @@ #ifndef __ARM_KVM_ASM_H__ #define __ARM_KVM_ASM_H__ +#include <asm/hyp_image.h> #include <asm/virt.h> -#define VCPU_WORKAROUND_2_FLAG_SHIFT 0 -#define VCPU_WORKAROUND_2_FLAG (_AC(1, UL) << VCPU_WORKAROUND_2_FLAG_SHIFT) - #define ARM_EXIT_WITH_SERROR_BIT 31 #define ARM_EXCEPTION_CODE(x) ((x) & ~(1U << ARM_EXIT_WITH_SERROR_BIT)) #define ARM_EXCEPTION_IS_TRAP(x) (ARM_EXCEPTION_CODE((x)) == ARM_EXCEPTION_TRAP) @@ -38,17 +36,34 @@ #define __SMCCC_WORKAROUND_1_SMC_SZ 36 +#define KVM_HOST_SMCCC_ID(id) \ + ARM_SMCCC_CALL_VAL(ARM_SMCCC_FAST_CALL, \ + ARM_SMCCC_SMC_64, \ + ARM_SMCCC_OWNER_VENDOR_HYP, \ + (id)) + +#define KVM_HOST_SMCCC_FUNC(name) KVM_HOST_SMCCC_ID(__KVM_HOST_SMCCC_FUNC_##name) + +#define __KVM_HOST_SMCCC_FUNC___kvm_hyp_init 0 +#define __KVM_HOST_SMCCC_FUNC___kvm_vcpu_run 1 +#define __KVM_HOST_SMCCC_FUNC___kvm_flush_vm_context 2 +#define __KVM_HOST_SMCCC_FUNC___kvm_tlb_flush_vmid_ipa 3 +#define __KVM_HOST_SMCCC_FUNC___kvm_tlb_flush_vmid 4 +#define __KVM_HOST_SMCCC_FUNC___kvm_tlb_flush_local_vmid 5 +#define __KVM_HOST_SMCCC_FUNC___kvm_timer_set_cntvoff 6 +#define __KVM_HOST_SMCCC_FUNC___kvm_enable_ssbs 7 +#define __KVM_HOST_SMCCC_FUNC___vgic_v3_get_ich_vtr_el2 8 +#define __KVM_HOST_SMCCC_FUNC___vgic_v3_read_vmcr 9 +#define __KVM_HOST_SMCCC_FUNC___vgic_v3_write_vmcr 10 +#define __KVM_HOST_SMCCC_FUNC___vgic_v3_init_lrs 11 +#define __KVM_HOST_SMCCC_FUNC___kvm_get_mdcr_el2 12 +#define __KVM_HOST_SMCCC_FUNC___vgic_v3_save_aprs 13 +#define __KVM_HOST_SMCCC_FUNC___vgic_v3_restore_aprs 14 + #ifndef __ASSEMBLY__ #include <linux/mm.h> -/* - * Translate name of a symbol defined in nVHE hyp to the name seen - * by kernel proper. All nVHE symbols are prefixed by the build system - * to avoid clashes with the VHE variants. - */ -#define kvm_nvhe_sym(sym) __kvm_nvhe_##sym - #define DECLARE_KVM_VHE_SYM(sym) extern char sym[] #define DECLARE_KVM_NVHE_SYM(sym) extern char kvm_nvhe_sym(sym)[] @@ -60,10 +75,53 @@ DECLARE_KVM_VHE_SYM(sym); \ DECLARE_KVM_NVHE_SYM(sym) +#define DECLARE_KVM_VHE_PER_CPU(type, sym) \ + DECLARE_PER_CPU(type, sym) +#define DECLARE_KVM_NVHE_PER_CPU(type, sym) \ + DECLARE_PER_CPU(type, kvm_nvhe_sym(sym)) + +#define DECLARE_KVM_HYP_PER_CPU(type, sym) \ + DECLARE_KVM_VHE_PER_CPU(type, sym); \ + DECLARE_KVM_NVHE_PER_CPU(type, sym) + +/* + * Compute pointer to a symbol defined in nVHE percpu region. + * Returns NULL if percpu memory has not been allocated yet. + */ +#define this_cpu_ptr_nvhe_sym(sym) per_cpu_ptr_nvhe_sym(sym, smp_processor_id()) +#define per_cpu_ptr_nvhe_sym(sym, cpu) \ + ({ \ + unsigned long base, off; \ + base = kvm_arm_hyp_percpu_base[cpu]; \ + off = (unsigned long)&CHOOSE_NVHE_SYM(sym) - \ + (unsigned long)&CHOOSE_NVHE_SYM(__per_cpu_start); \ + base ? (typeof(CHOOSE_NVHE_SYM(sym))*)(base + off) : NULL; \ + }) + +#if defined(__KVM_NVHE_HYPERVISOR__) + +#define CHOOSE_NVHE_SYM(sym) sym +#define CHOOSE_HYP_SYM(sym) CHOOSE_NVHE_SYM(sym) + +/* The nVHE hypervisor shouldn't even try to access VHE symbols */ +extern void *__nvhe_undefined_symbol; +#define CHOOSE_VHE_SYM(sym) __nvhe_undefined_symbol +#define this_cpu_ptr_hyp_sym(sym) (&__nvhe_undefined_symbol) +#define per_cpu_ptr_hyp_sym(sym, cpu) (&__nvhe_undefined_symbol) + +#elif defined(__KVM_VHE_HYPERVISOR__) + #define CHOOSE_VHE_SYM(sym) sym -#define CHOOSE_NVHE_SYM(sym) kvm_nvhe_sym(sym) +#define CHOOSE_HYP_SYM(sym) CHOOSE_VHE_SYM(sym) + +/* The VHE hypervisor shouldn't even try to access nVHE symbols */ +extern void *__vhe_undefined_symbol; +#define CHOOSE_NVHE_SYM(sym) __vhe_undefined_symbol +#define this_cpu_ptr_hyp_sym(sym) (&__vhe_undefined_symbol) +#define per_cpu_ptr_hyp_sym(sym, cpu) (&__vhe_undefined_symbol) + +#else -#ifndef __KVM_NVHE_HYPERVISOR__ /* * BIG FAT WARNINGS: * @@ -75,12 +133,21 @@ * - Don't let the nVHE hypervisor have access to this, as it will * pick the *wrong* symbol (yes, it runs at EL2...). */ -#define CHOOSE_HYP_SYM(sym) (is_kernel_in_hyp_mode() ? CHOOSE_VHE_SYM(sym) \ +#define CHOOSE_HYP_SYM(sym) (is_kernel_in_hyp_mode() \ + ? CHOOSE_VHE_SYM(sym) \ : CHOOSE_NVHE_SYM(sym)) -#else -/* The nVHE hypervisor shouldn't even try to access anything */ -extern void *__nvhe_undefined_symbol; -#define CHOOSE_HYP_SYM(sym) __nvhe_undefined_symbol + +#define this_cpu_ptr_hyp_sym(sym) (is_kernel_in_hyp_mode() \ + ? this_cpu_ptr(&sym) \ + : this_cpu_ptr_nvhe_sym(sym)) + +#define per_cpu_ptr_hyp_sym(sym, cpu) (is_kernel_in_hyp_mode() \ + ? per_cpu_ptr(&sym, cpu) \ + : per_cpu_ptr_nvhe_sym(sym, cpu)) + +#define CHOOSE_VHE_SYM(sym) sym +#define CHOOSE_NVHE_SYM(sym) kvm_nvhe_sym(sym) + #endif /* Translate a kernel address @ptr into its equivalent linear mapping */ @@ -98,15 +165,19 @@ struct kvm_vcpu; struct kvm_s2_mmu; DECLARE_KVM_NVHE_SYM(__kvm_hyp_init); +DECLARE_KVM_NVHE_SYM(__kvm_hyp_host_vector); DECLARE_KVM_HYP_SYM(__kvm_hyp_vector); #define __kvm_hyp_init CHOOSE_NVHE_SYM(__kvm_hyp_init) +#define __kvm_hyp_host_vector CHOOSE_NVHE_SYM(__kvm_hyp_host_vector) #define __kvm_hyp_vector CHOOSE_HYP_SYM(__kvm_hyp_vector) -#ifdef CONFIG_KVM_INDIRECT_VECTORS +extern unsigned long kvm_arm_hyp_percpu_base[NR_CPUS]; +DECLARE_KVM_NVHE_SYM(__per_cpu_start); +DECLARE_KVM_NVHE_SYM(__per_cpu_end); + extern atomic_t arm64_el2_vector_last_slot; DECLARE_KVM_HYP_SYM(__bp_harden_hyp_vecs); #define __bp_harden_hyp_vecs CHOOSE_HYP_SYM(__bp_harden_hyp_vecs) -#endif extern void __kvm_flush_vm_context(void); extern void __kvm_tlb_flush_vmid_ipa(struct kvm_s2_mmu *mmu, phys_addr_t ipa, @@ -149,26 +220,6 @@ extern char __smccc_workaround_1_smc[__SMCCC_WORKAROUND_1_SMC_SZ]; addr; \ }) -/* - * Home-grown __this_cpu_{ptr,read} variants that always work at HYP, - * provided that sym is really a *symbol* and not a pointer obtained from - * a data structure. As for SHIFT_PERCPU_PTR(), the creative casting keeps - * sparse quiet. - */ -#define __hyp_this_cpu_ptr(sym) \ - ({ \ - void *__ptr; \ - __verify_pcpu_ptr(&sym); \ - __ptr = hyp_symbol_addr(sym); \ - __ptr += read_sysreg(tpidr_el2); \ - (typeof(sym) __kernel __force *)__ptr; \ - }) - -#define __hyp_this_cpu_read(sym) \ - ({ \ - *__hyp_this_cpu_ptr(sym); \ - }) - #define __KVM_EXTABLE(from, to) \ " .pushsection __kvm_ex_table, \"a\"\n" \ " .align 3\n" \ @@ -199,20 +250,8 @@ extern char __smccc_workaround_1_smc[__SMCCC_WORKAROUND_1_SMC_SZ]; #else /* __ASSEMBLY__ */ -.macro hyp_adr_this_cpu reg, sym, tmp - adr_l \reg, \sym - mrs \tmp, tpidr_el2 - add \reg, \reg, \tmp -.endm - -.macro hyp_ldr_this_cpu reg, sym, tmp - adr_l \reg, \sym - mrs \tmp, tpidr_el2 - ldr \reg, [\reg, \tmp] -.endm - .macro get_host_ctxt reg, tmp - hyp_adr_this_cpu \reg, kvm_host_data, \tmp + adr_this_cpu \reg, kvm_host_data, \tmp add \reg, \reg, #HOST_DATA_CONTEXT .endm @@ -221,6 +260,16 @@ extern char __smccc_workaround_1_smc[__SMCCC_WORKAROUND_1_SMC_SZ]; ldr \vcpu, [\ctxt, #HOST_CONTEXT_VCPU] .endm +.macro get_loaded_vcpu vcpu, ctxt + adr_this_cpu \ctxt, kvm_hyp_ctxt, \vcpu + ldr \vcpu, [\ctxt, #HOST_CONTEXT_VCPU] +.endm + +.macro set_loaded_vcpu vcpu, ctxt, tmp + adr_this_cpu \ctxt, kvm_hyp_ctxt, \tmp + str \vcpu, [\ctxt, #HOST_CONTEXT_VCPU] +.endm + /* * KVM extable for unexpected exceptions. * In the same format _asm_extable, but output to a different section so that @@ -236,6 +285,45 @@ extern char __smccc_workaround_1_smc[__SMCCC_WORKAROUND_1_SMC_SZ]; .popsection .endm +#define CPU_XREG_OFFSET(x) (CPU_USER_PT_REGS + 8*x) +#define CPU_LR_OFFSET CPU_XREG_OFFSET(30) +#define CPU_SP_EL0_OFFSET (CPU_LR_OFFSET + 8) + +/* + * We treat x18 as callee-saved as the host may use it as a platform + * register (e.g. for shadow call stack). + */ +.macro save_callee_saved_regs ctxt + str x18, [\ctxt, #CPU_XREG_OFFSET(18)] + stp x19, x20, [\ctxt, #CPU_XREG_OFFSET(19)] + stp x21, x22, [\ctxt, #CPU_XREG_OFFSET(21)] + stp x23, x24, [\ctxt, #CPU_XREG_OFFSET(23)] + stp x25, x26, [\ctxt, #CPU_XREG_OFFSET(25)] + stp x27, x28, [\ctxt, #CPU_XREG_OFFSET(27)] + stp x29, lr, [\ctxt, #CPU_XREG_OFFSET(29)] +.endm + +.macro restore_callee_saved_regs ctxt + // We require \ctxt is not x18-x28 + ldr x18, [\ctxt, #CPU_XREG_OFFSET(18)] + ldp x19, x20, [\ctxt, #CPU_XREG_OFFSET(19)] + ldp x21, x22, [\ctxt, #CPU_XREG_OFFSET(21)] + ldp x23, x24, [\ctxt, #CPU_XREG_OFFSET(23)] + ldp x25, x26, [\ctxt, #CPU_XREG_OFFSET(25)] + ldp x27, x28, [\ctxt, #CPU_XREG_OFFSET(27)] + ldp x29, lr, [\ctxt, #CPU_XREG_OFFSET(29)] +.endm + +.macro save_sp_el0 ctxt, tmp + mrs \tmp, sp_el0 + str \tmp, [\ctxt, #CPU_SP_EL0_OFFSET] +.endm + +.macro restore_sp_el0 ctxt, tmp + ldr \tmp, [\ctxt, #CPU_SP_EL0_OFFSET] + msr sp_el0, \tmp +.endm + #endif #endif /* __ARM_KVM_ASM_H__ */ diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h index 1cc5f5f72d0b..5ef2669ccd6c 100644 --- a/arch/arm64/include/asm/kvm_emulate.h +++ b/arch/arm64/include/asm/kvm_emulate.h @@ -391,20 +391,6 @@ static inline unsigned long kvm_vcpu_get_mpidr_aff(struct kvm_vcpu *vcpu) return vcpu_read_sys_reg(vcpu, MPIDR_EL1) & MPIDR_HWID_BITMASK; } -static inline bool kvm_arm_get_vcpu_workaround_2_flag(struct kvm_vcpu *vcpu) -{ - return vcpu->arch.workaround_flags & VCPU_WORKAROUND_2_FLAG; -} - -static inline void kvm_arm_set_vcpu_workaround_2_flag(struct kvm_vcpu *vcpu, - bool flag) -{ - if (flag) - vcpu->arch.workaround_flags |= VCPU_WORKAROUND_2_FLAG; - else - vcpu->arch.workaround_flags &= ~VCPU_WORKAROUND_2_FLAG; -} - static inline void kvm_vcpu_set_be(struct kvm_vcpu *vcpu) { if (vcpu_mode_is_32bit(vcpu)) { diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 905c2b87e05a..0aecbab6a7fb 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -11,6 +11,7 @@ #ifndef __ARM64_KVM_HOST_H__ #define __ARM64_KVM_HOST_H__ +#include <linux/arm-smccc.h> #include <linux/bitmap.h> #include <linux/types.h> #include <linux/jump_label.h> @@ -79,8 +80,8 @@ struct kvm_s2_mmu { * for vEL1/EL0 with vHCR_EL2.VM == 0. In that case, we use the * canonical stage-2 page tables. */ - pgd_t *pgd; phys_addr_t pgd_phys; + struct kvm_pgtable *pgt; /* The last vcpu id that ran on each physical CPU */ int __percpu *last_vcpu_ran; @@ -110,6 +111,13 @@ struct kvm_arch { * supported. */ bool return_nisv_io_abort_to_user; + + /* + * VM-wide PMU filter, implemented as a bitmap and big enough for + * up to 2^10 events (ARMv8.0) or 2^16 events (ARMv8.1+). + */ + unsigned long *pmu_filter; + unsigned int pmuver; }; struct kvm_vcpu_fault_info { @@ -262,8 +270,6 @@ struct kvm_host_data { struct kvm_pmu_events pmu_events; }; -typedef struct kvm_host_data kvm_host_data_t; - struct vcpu_reset_state { unsigned long pc; unsigned long r0; @@ -480,18 +486,15 @@ int kvm_test_age_hva(struct kvm *kvm, unsigned long hva); void kvm_arm_halt_guest(struct kvm *kvm); void kvm_arm_resume_guest(struct kvm *kvm); -u64 __kvm_call_hyp(void *hypfn, ...); - -#define kvm_call_hyp_nvhe(f, ...) \ - do { \ - DECLARE_KVM_NVHE_SYM(f); \ - __kvm_call_hyp(kvm_ksym_ref_nvhe(f), ##__VA_ARGS__); \ - } while(0) - -#define kvm_call_hyp_nvhe_ret(f, ...) \ +#define kvm_call_hyp_nvhe(f, ...) \ ({ \ - DECLARE_KVM_NVHE_SYM(f); \ - __kvm_call_hyp(kvm_ksym_ref_nvhe(f), ##__VA_ARGS__); \ + struct arm_smccc_res res; \ + \ + arm_smccc_1_1_hvc(KVM_HOST_SMCCC_FUNC(f), \ + ##__VA_ARGS__, &res); \ + WARN_ON(res.a0 != SMCCC_RET_SUCCESS); \ + \ + res.a1; \ }) /* @@ -517,7 +520,7 @@ u64 __kvm_call_hyp(void *hypfn, ...); ret = f(__VA_ARGS__); \ isb(); \ } else { \ - ret = kvm_call_hyp_nvhe_ret(f, ##__VA_ARGS__); \ + ret = kvm_call_hyp_nvhe(f, ##__VA_ARGS__); \ } \ \ ret; \ @@ -565,7 +568,7 @@ void kvm_set_sei_esr(struct kvm_vcpu *vcpu, u64 syndrome); struct kvm_vcpu *kvm_mpidr_to_vcpu(struct kvm *kvm, unsigned long mpidr); -DECLARE_PER_CPU(kvm_host_data_t, kvm_host_data); +DECLARE_KVM_HYP_PER_CPU(struct kvm_host_data, kvm_host_data); static inline void kvm_init_host_cpu_context(struct kvm_cpu_context *cpu_ctxt) { @@ -631,46 +634,6 @@ static inline void kvm_set_pmu_events(u32 set, struct perf_event_attr *attr) {} static inline void kvm_clr_pmu_events(u32 clr) {} #endif -#define KVM_BP_HARDEN_UNKNOWN -1 -#define KVM_BP_HARDEN_WA_NEEDED 0 -#define KVM_BP_HARDEN_NOT_REQUIRED 1 - -static inline int kvm_arm_harden_branch_predictor(void) -{ - switch (get_spectre_v2_workaround_state()) { - case ARM64_BP_HARDEN_WA_NEEDED: - return KVM_BP_HARDEN_WA_NEEDED; - case ARM64_BP_HARDEN_NOT_REQUIRED: - return KVM_BP_HARDEN_NOT_REQUIRED; - case ARM64_BP_HARDEN_UNKNOWN: - default: - return KVM_BP_HARDEN_UNKNOWN; - } -} - -#define KVM_SSBD_UNKNOWN -1 -#define KVM_SSBD_FORCE_DISABLE 0 -#define KVM_SSBD_KERNEL 1 -#define KVM_SSBD_FORCE_ENABLE 2 -#define KVM_SSBD_MITIGATED 3 - -static inline int kvm_arm_have_ssbd(void) -{ - switch (arm64_get_ssbd_state()) { - case ARM64_SSBD_FORCE_DISABLE: - return KVM_SSBD_FORCE_DISABLE; - case ARM64_SSBD_KERNEL: - return KVM_SSBD_KERNEL; - case ARM64_SSBD_FORCE_ENABLE: - return KVM_SSBD_FORCE_ENABLE; - case ARM64_SSBD_MITIGATED: - return KVM_SSBD_MITIGATED; - case ARM64_SSBD_UNKNOWN: - default: - return KVM_SSBD_UNKNOWN; - } -} - void kvm_vcpu_load_sysregs_vhe(struct kvm_vcpu *vcpu); void kvm_vcpu_put_sysregs_vhe(struct kvm_vcpu *vcpu); diff --git a/arch/arm64/include/asm/kvm_hyp.h b/arch/arm64/include/asm/kvm_hyp.h index 46689e7db46c..6b664de5ec1f 100644 --- a/arch/arm64/include/asm/kvm_hyp.h +++ b/arch/arm64/include/asm/kvm_hyp.h @@ -12,6 +12,9 @@ #include <asm/alternative.h> #include <asm/sysreg.h> +DECLARE_PER_CPU(struct kvm_cpu_context, kvm_hyp_ctxt); +DECLARE_PER_CPU(unsigned long, kvm_hyp_vector); + #define read_sysreg_elx(r,nvh,vh) \ ({ \ u64 reg; \ @@ -87,11 +90,11 @@ void activate_traps_vhe_load(struct kvm_vcpu *vcpu); void deactivate_traps_vhe_put(void); #endif -u64 __guest_enter(struct kvm_vcpu *vcpu, struct kvm_cpu_context *host_ctxt); +u64 __guest_enter(struct kvm_vcpu *vcpu); -void __noreturn hyp_panic(struct kvm_cpu_context *host_ctxt); +void __noreturn hyp_panic(void); #ifdef __KVM_NVHE_HYPERVISOR__ -void __noreturn __hyp_do_panic(unsigned long, ...); +void __noreturn __hyp_do_panic(bool restore_host, u64 spsr, u64 elr, u64 par); #endif #endif /* __ARM64_KVM_HYP_H__ */ diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h index 189839c3706a..331394306cce 100644 --- a/arch/arm64/include/asm/kvm_mmu.h +++ b/arch/arm64/include/asm/kvm_mmu.h @@ -9,6 +9,7 @@ #include <asm/page.h> #include <asm/memory.h> +#include <asm/mmu.h> #include <asm/cpufeature.h> /* @@ -43,16 +44,6 @@ * HYP_VA_MIN = 1 << (VA_BITS - 1) * HYP_VA_MAX = HYP_VA_MIN + (1 << (VA_BITS - 1)) - 1 * - * This of course assumes that the trampoline page exists within the - * VA_BITS range. If it doesn't, then it means we're in the odd case - * where the kernel idmap (as well as HYP) uses more levels than the - * kernel runtime page tables (as seen when the kernel is configured - * for 4k pages, 39bits VA, and yet memory lives just above that - * limit, forcing the idmap to use 4 levels of page tables while the - * kernel itself only uses 3). In this particular case, it doesn't - * matter which side of VA_BITS we use, as we're guaranteed not to - * conflict with anything. - * * When using VHE, there are no separate hyp mappings and all KVM * functionality is already mapped as part of the main kernel * mappings, and none of this applies in that case. @@ -117,15 +108,10 @@ static __always_inline unsigned long __kern_hyp_va(unsigned long v) #define kvm_phys_size(kvm) (_AC(1, ULL) << kvm_phys_shift(kvm)) #define kvm_phys_mask(kvm) (kvm_phys_size(kvm) - _AC(1, ULL)) -static inline bool kvm_page_empty(void *ptr) -{ - struct page *ptr_page = virt_to_page(ptr); - return page_count(ptr_page) == 1; -} - +#include <asm/kvm_pgtable.h> #include <asm/stage2_pgtable.h> -int create_hyp_mappings(void *from, void *to, pgprot_t prot); +int create_hyp_mappings(void *from, void *to, enum kvm_pgtable_prot prot); int create_hyp_io_mappings(phys_addr_t phys_addr, size_t size, void __iomem **kaddr, void __iomem **haddr); @@ -141,149 +127,9 @@ int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa, int kvm_handle_guest_abort(struct kvm_vcpu *vcpu); -void kvm_mmu_free_memory_caches(struct kvm_vcpu *vcpu); - phys_addr_t kvm_mmu_get_httbr(void); phys_addr_t kvm_get_idmap_vector(void); int kvm_mmu_init(void); -void kvm_clear_hyp_idmap(void); - -#define kvm_mk_pmd(ptep) \ - __pmd(__phys_to_pmd_val(__pa(ptep)) | PMD_TYPE_TABLE) -#define kvm_mk_pud(pmdp) \ - __pud(__phys_to_pud_val(__pa(pmdp)) | PMD_TYPE_TABLE) -#define kvm_mk_p4d(pmdp) \ - __p4d(__phys_to_p4d_val(__pa(pmdp)) | PUD_TYPE_TABLE) - -#define kvm_set_pud(pudp, pud) set_pud(pudp, pud) - -#define kvm_pfn_pte(pfn, prot) pfn_pte(pfn, prot) -#define kvm_pfn_pmd(pfn, prot) pfn_pmd(pfn, prot) -#define kvm_pfn_pud(pfn, prot) pfn_pud(pfn, prot) - -#define kvm_pud_pfn(pud) pud_pfn(pud) - -#define kvm_pmd_mkhuge(pmd) pmd_mkhuge(pmd) -#define kvm_pud_mkhuge(pud) pud_mkhuge(pud) - -static inline pte_t kvm_s2pte_mkwrite(pte_t pte) -{ - pte_val(pte) |= PTE_S2_RDWR; - return pte; -} - -static inline pmd_t kvm_s2pmd_mkwrite(pmd_t pmd) -{ - pmd_val(pmd) |= PMD_S2_RDWR; - return pmd; -} - -static inline pud_t kvm_s2pud_mkwrite(pud_t pud) -{ - pud_val(pud) |= PUD_S2_RDWR; - return pud; -} - -static inline pte_t kvm_s2pte_mkexec(pte_t pte) -{ - pte_val(pte) &= ~PTE_S2_XN; - return pte; -} - -static inline pmd_t kvm_s2pmd_mkexec(pmd_t pmd) -{ - pmd_val(pmd) &= ~PMD_S2_XN; - return pmd; -} - -static inline pud_t kvm_s2pud_mkexec(pud_t pud) -{ - pud_val(pud) &= ~PUD_S2_XN; - return pud; -} - -static inline void kvm_set_s2pte_readonly(pte_t *ptep) -{ - pteval_t old_pteval, pteval; - - pteval = READ_ONCE(pte_val(*ptep)); - do { - old_pteval = pteval; - pteval &= ~PTE_S2_RDWR; - pteval |= PTE_S2_RDONLY; - pteval = cmpxchg_relaxed(&pte_val(*ptep), old_pteval, pteval); - } while (pteval != old_pteval); -} - -static inline bool kvm_s2pte_readonly(pte_t *ptep) -{ - return (READ_ONCE(pte_val(*ptep)) & PTE_S2_RDWR) == PTE_S2_RDONLY; -} - -static inline bool kvm_s2pte_exec(pte_t *ptep) -{ - return !(READ_ONCE(pte_val(*ptep)) & PTE_S2_XN); -} - -static inline void kvm_set_s2pmd_readonly(pmd_t *pmdp) -{ - kvm_set_s2pte_readonly((pte_t *)pmdp); -} - -static inline bool kvm_s2pmd_readonly(pmd_t *pmdp) -{ - return kvm_s2pte_readonly((pte_t *)pmdp); -} - -static inline bool kvm_s2pmd_exec(pmd_t *pmdp) -{ - return !(READ_ONCE(pmd_val(*pmdp)) & PMD_S2_XN); -} - -static inline void kvm_set_s2pud_readonly(pud_t *pudp) -{ - kvm_set_s2pte_readonly((pte_t *)pudp); -} - -static inline bool kvm_s2pud_readonly(pud_t *pudp) -{ - return kvm_s2pte_readonly((pte_t *)pudp); -} - -static inline bool kvm_s2pud_exec(pud_t *pudp) -{ - return !(READ_ONCE(pud_val(*pudp)) & PUD_S2_XN); -} - -static inline pud_t kvm_s2pud_mkyoung(pud_t pud) -{ - return pud_mkyoung(pud); -} - -static inline bool kvm_s2pud_young(pud_t pud) -{ - return pud_young(pud); -} - -#define hyp_pte_table_empty(ptep) kvm_page_empty(ptep) - -#ifdef __PAGETABLE_PMD_FOLDED -#define hyp_pmd_table_empty(pmdp) (0) -#else -#define hyp_pmd_table_empty(pmdp) kvm_page_empty(pmdp) -#endif - -#ifdef __PAGETABLE_PUD_FOLDED -#define hyp_pud_table_empty(pudp) (0) -#else -#define hyp_pud_table_empty(pudp) kvm_page_empty(pudp) -#endif - -#ifdef __PAGETABLE_P4D_FOLDED -#define hyp_p4d_table_empty(p4dp) (0) -#else -#define hyp_p4d_table_empty(p4dp) kvm_page_empty(p4dp) -#endif struct kvm; @@ -325,77 +171,9 @@ static inline void __invalidate_icache_guest_page(kvm_pfn_t pfn, } } -static inline void __kvm_flush_dcache_pte(pte_t pte) -{ - if (!cpus_have_const_cap(ARM64_HAS_STAGE2_FWB)) { - struct page *page = pte_page(pte); - kvm_flush_dcache_to_poc(page_address(page), PAGE_SIZE); - } -} - -static inline void __kvm_flush_dcache_pmd(pmd_t pmd) -{ - if (!cpus_have_const_cap(ARM64_HAS_STAGE2_FWB)) { - struct page *page = pmd_page(pmd); - kvm_flush_dcache_to_poc(page_address(page), PMD_SIZE); - } -} - -static inline void __kvm_flush_dcache_pud(pud_t pud) -{ - if (!cpus_have_const_cap(ARM64_HAS_STAGE2_FWB)) { - struct page *page = pud_page(pud); - kvm_flush_dcache_to_poc(page_address(page), PUD_SIZE); - } -} - void kvm_set_way_flush(struct kvm_vcpu *vcpu); void kvm_toggle_cache(struct kvm_vcpu *vcpu, bool was_enabled); -static inline bool __kvm_cpu_uses_extended_idmap(void) -{ - return __cpu_uses_extended_idmap_level(); -} - -static inline unsigned long __kvm_idmap_ptrs_per_pgd(void) -{ - return idmap_ptrs_per_pgd; -} - -/* - * Can't use pgd_populate here, because the extended idmap adds an extra level - * above CONFIG_PGTABLE_LEVELS (which is 2 or 3 if we're using the extended - * idmap), and pgd_populate is only available if CONFIG_PGTABLE_LEVELS = 4. - */ -static inline void __kvm_extend_hypmap(pgd_t *boot_hyp_pgd, - pgd_t *hyp_pgd, - pgd_t *merged_hyp_pgd, - unsigned long hyp_idmap_start) -{ - int idmap_idx; - u64 pgd_addr; - - /* - * Use the first entry to access the HYP mappings. It is - * guaranteed to be free, otherwise we wouldn't use an - * extended idmap. - */ - VM_BUG_ON(pgd_val(merged_hyp_pgd[0])); - pgd_addr = __phys_to_pgd_val(__pa(hyp_pgd)); - merged_hyp_pgd[0] = __pgd(pgd_addr | PMD_TYPE_TABLE); - - /* - * Create another extended level entry that points to the boot HYP map, - * which contains an ID mapping of the HYP init code. We essentially - * merge the boot and runtime HYP maps by doing so, but they don't - * overlap anyway, so this is fine. - */ - idmap_idx = hyp_idmap_start >> VA_BITS; - VM_BUG_ON(pgd_val(merged_hyp_pgd[idmap_idx])); - pgd_addr = __phys_to_pgd_val(__pa(boot_hyp_pgd)); - merged_hyp_pgd[idmap_idx] = __pgd(pgd_addr | PMD_TYPE_TABLE); -} - static inline unsigned int kvm_get_vmid_bits(void) { int reg = read_sanitised_ftr_reg(SYS_ID_AA64MMFR1_EL1); @@ -430,19 +208,17 @@ static inline int kvm_write_guest_lock(struct kvm *kvm, gpa_t gpa, return ret; } -#ifdef CONFIG_KVM_INDIRECT_VECTORS /* * EL2 vectors can be mapped and rerouted in a number of ways, * depending on the kernel configuration and CPU present: * - * - If the CPU has the ARM64_HARDEN_BRANCH_PREDICTOR cap, the - * hardening sequence is placed in one of the vector slots, which is - * executed before jumping to the real vectors. + * - If the CPU is affected by Spectre-v2, the hardening sequence is + * placed in one of the vector slots, which is executed before jumping + * to the real vectors. * - * - If the CPU has both the ARM64_HARDEN_EL2_VECTORS cap and the - * ARM64_HARDEN_BRANCH_PREDICTOR cap, the slot containing the - * hardening sequence is mapped next to the idmap page, and executed - * before jumping to the real vectors. + * - If the CPU also has the ARM64_HARDEN_EL2_VECTORS cap, the slot + * containing the hardening sequence is mapped next to the idmap page, + * and executed before jumping to the real vectors. * * - If the CPU only has the ARM64_HARDEN_EL2_VECTORS cap, then an * empty slot is selected, mapped next to the idmap page, and @@ -452,19 +228,16 @@ static inline int kvm_write_guest_lock(struct kvm *kvm, gpa_t gpa, * VHE, as we don't have hypervisor-specific mappings. If the system * is VHE and yet selects this capability, it will be ignored. */ -#include <asm/mmu.h> - extern void *__kvm_bp_vect_base; extern int __kvm_harden_el2_vector_slot; -/* This is called on both VHE and !VHE systems */ static inline void *kvm_get_hyp_vector(void) { struct bp_hardening_data *data = arm64_get_bp_hardening_data(); void *vect = kern_hyp_va(kvm_ksym_ref(__kvm_hyp_vector)); int slot = -1; - if (cpus_have_const_cap(ARM64_HARDEN_BRANCH_PREDICTOR) && data->fn) { + if (cpus_have_const_cap(ARM64_SPECTRE_V2) && data->fn) { vect = kern_hyp_va(kvm_ksym_ref(__bp_harden_hyp_vecs)); slot = data->hyp_vectors_slot; } @@ -481,102 +254,8 @@ static inline void *kvm_get_hyp_vector(void) return vect; } -/* This is only called on a !VHE system */ -static inline int kvm_map_vectors(void) -{ - /* - * HBP = ARM64_HARDEN_BRANCH_PREDICTOR - * HEL2 = ARM64_HARDEN_EL2_VECTORS - * - * !HBP + !HEL2 -> use direct vectors - * HBP + !HEL2 -> use hardened vectors in place - * !HBP + HEL2 -> allocate one vector slot and use exec mapping - * HBP + HEL2 -> use hardened vertors and use exec mapping - */ - if (cpus_have_const_cap(ARM64_HARDEN_BRANCH_PREDICTOR)) { - __kvm_bp_vect_base = kvm_ksym_ref(__bp_harden_hyp_vecs); - __kvm_bp_vect_base = kern_hyp_va(__kvm_bp_vect_base); - } - - if (cpus_have_const_cap(ARM64_HARDEN_EL2_VECTORS)) { - phys_addr_t vect_pa = __pa_symbol(__bp_harden_hyp_vecs); - unsigned long size = __BP_HARDEN_HYP_VECS_SZ; - - /* - * Always allocate a spare vector slot, as we don't - * know yet which CPUs have a BP hardening slot that - * we can reuse. - */ - __kvm_harden_el2_vector_slot = atomic_inc_return(&arm64_el2_vector_last_slot); - BUG_ON(__kvm_harden_el2_vector_slot >= BP_HARDEN_EL2_SLOTS); - return create_hyp_exec_mappings(vect_pa, size, - &__kvm_bp_vect_base); - } - - return 0; -} -#else -static inline void *kvm_get_hyp_vector(void) -{ - return kern_hyp_va(kvm_ksym_ref(__kvm_hyp_vector)); -} - -static inline int kvm_map_vectors(void) -{ - return 0; -} -#endif - -#ifdef CONFIG_ARM64_SSBD -DECLARE_PER_CPU_READ_MOSTLY(u64, arm64_ssbd_callback_required); - -static inline int hyp_map_aux_data(void) -{ - int cpu, err; - - for_each_possible_cpu(cpu) { - u64 *ptr; - - ptr = per_cpu_ptr(&arm64_ssbd_callback_required, cpu); - err = create_hyp_mappings(ptr, ptr + 1, PAGE_HYP); - if (err) - return err; - } - return 0; -} -#else -static inline int hyp_map_aux_data(void) -{ - return 0; -} -#endif - #define kvm_phys_to_vttbr(addr) phys_to_ttbr(addr) -/* - * Get the magic number 'x' for VTTBR:BADDR of this KVM instance. - * With v8.2 LVA extensions, 'x' should be a minimum of 6 with - * 52bit IPS. - */ -static inline int arm64_vttbr_x(u32 ipa_shift, u32 levels) -{ - int x = ARM64_VTTBR_X(ipa_shift, levels); - - return (IS_ENABLED(CONFIG_ARM64_PA_BITS_52) && x < 6) ? 6 : x; -} - -static inline u64 vttbr_baddr_mask(u32 ipa_shift, u32 levels) -{ - unsigned int x = arm64_vttbr_x(ipa_shift, levels); - - return GENMASK_ULL(PHYS_MASK_SHIFT - 1, x); -} - -static inline u64 kvm_vttbr_baddr_mask(struct kvm *kvm) -{ - return vttbr_baddr_mask(kvm_phys_shift(kvm), kvm_stage2_levels(kvm)); -} - static __always_inline u64 kvm_get_vttbr(struct kvm_s2_mmu *mmu) { struct kvm_vmid *vmid = &mmu->vmid; diff --git a/arch/arm64/include/asm/kvm_pgtable.h b/arch/arm64/include/asm/kvm_pgtable.h new file mode 100644 index 000000000000..52ab38db04c7 --- /dev/null +++ b/arch/arm64/include/asm/kvm_pgtable.h @@ -0,0 +1,309 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2020 Google LLC + * Author: Will Deacon <will@kernel.org> + */ + +#ifndef __ARM64_KVM_PGTABLE_H__ +#define __ARM64_KVM_PGTABLE_H__ + +#include <linux/bits.h> +#include <linux/kvm_host.h> +#include <linux/types.h> + +typedef u64 kvm_pte_t; + +/** + * struct kvm_pgtable - KVM page-table. + * @ia_bits: Maximum input address size, in bits. + * @start_level: Level at which the page-table walk starts. + * @pgd: Pointer to the first top-level entry of the page-table. + * @mmu: Stage-2 KVM MMU struct. Unused for stage-1 page-tables. + */ +struct kvm_pgtable { + u32 ia_bits; + u32 start_level; + kvm_pte_t *pgd; + + /* Stage-2 only */ + struct kvm_s2_mmu *mmu; +}; + +/** + * enum kvm_pgtable_prot - Page-table permissions and attributes. + * @KVM_PGTABLE_PROT_X: Execute permission. + * @KVM_PGTABLE_PROT_W: Write permission. + * @KVM_PGTABLE_PROT_R: Read permission. + * @KVM_PGTABLE_PROT_DEVICE: Device attributes. + */ +enum kvm_pgtable_prot { + KVM_PGTABLE_PROT_X = BIT(0), + KVM_PGTABLE_PROT_W = BIT(1), + KVM_PGTABLE_PROT_R = BIT(2), + + KVM_PGTABLE_PROT_DEVICE = BIT(3), +}; + +#define PAGE_HYP (KVM_PGTABLE_PROT_R | KVM_PGTABLE_PROT_W) +#define PAGE_HYP_EXEC (KVM_PGTABLE_PROT_R | KVM_PGTABLE_PROT_X) +#define PAGE_HYP_RO (KVM_PGTABLE_PROT_R) +#define PAGE_HYP_DEVICE (PAGE_HYP | KVM_PGTABLE_PROT_DEVICE) + +/** + * enum kvm_pgtable_walk_flags - Flags to control a depth-first page-table walk. + * @KVM_PGTABLE_WALK_LEAF: Visit leaf entries, including invalid + * entries. + * @KVM_PGTABLE_WALK_TABLE_PRE: Visit table entries before their + * children. + * @KVM_PGTABLE_WALK_TABLE_POST: Visit table entries after their + * children. + */ +enum kvm_pgtable_walk_flags { + KVM_PGTABLE_WALK_LEAF = BIT(0), + KVM_PGTABLE_WALK_TABLE_PRE = BIT(1), + KVM_PGTABLE_WALK_TABLE_POST = BIT(2), +}; + +typedef int (*kvm_pgtable_visitor_fn_t)(u64 addr, u64 end, u32 level, + kvm_pte_t *ptep, + enum kvm_pgtable_walk_flags flag, + void * const arg); + +/** + * struct kvm_pgtable_walker - Hook into a page-table walk. + * @cb: Callback function to invoke during the walk. + * @arg: Argument passed to the callback function. + * @flags: Bitwise-OR of flags to identify the entry types on which to + * invoke the callback function. + */ +struct kvm_pgtable_walker { + const kvm_pgtable_visitor_fn_t cb; + void * const arg; + const enum kvm_pgtable_walk_flags flags; +}; + +/** + * kvm_pgtable_hyp_init() - Initialise a hypervisor stage-1 page-table. + * @pgt: Uninitialised page-table structure to initialise. + * @va_bits: Maximum virtual address bits. + * + * Return: 0 on success, negative error code on failure. + */ +int kvm_pgtable_hyp_init(struct kvm_pgtable *pgt, u32 va_bits); + +/** + * kvm_pgtable_hyp_destroy() - Destroy an unused hypervisor stage-1 page-table. + * @pgt: Page-table structure initialised by kvm_pgtable_hyp_init(). + * + * The page-table is assumed to be unreachable by any hardware walkers prior + * to freeing and therefore no TLB invalidation is performed. + */ +void kvm_pgtable_hyp_destroy(struct kvm_pgtable *pgt); + +/** + * kvm_pgtable_hyp_map() - Install a mapping in a hypervisor stage-1 page-table. + * @pgt: Page-table structure initialised by kvm_pgtable_hyp_init(). + * @addr: Virtual address at which to place the mapping. + * @size: Size of the mapping. + * @phys: Physical address of the memory to map. + * @prot: Permissions and attributes for the mapping. + * + * The offset of @addr within a page is ignored, @size is rounded-up to + * the next page boundary and @phys is rounded-down to the previous page + * boundary. + * + * If device attributes are not explicitly requested in @prot, then the + * mapping will be normal, cacheable. Attempts to install a new mapping + * for a virtual address that is already mapped will be rejected with an + * error and a WARN(). + * + * Return: 0 on success, negative error code on failure. + */ +int kvm_pgtable_hyp_map(struct kvm_pgtable *pgt, u64 addr, u64 size, u64 phys, + enum kvm_pgtable_prot prot); + +/** + * kvm_pgtable_stage2_init() - Initialise a guest stage-2 page-table. + * @pgt: Uninitialised page-table structure to initialise. + * @kvm: KVM structure representing the guest virtual machine. + * + * Return: 0 on success, negative error code on failure. + */ +int kvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm *kvm); + +/** + * kvm_pgtable_stage2_destroy() - Destroy an unused guest stage-2 page-table. + * @pgt: Page-table structure initialised by kvm_pgtable_stage2_init(). + * + * The page-table is assumed to be unreachable by any hardware walkers prior + * to freeing and therefore no TLB invalidation is performed. + */ +void kvm_pgtable_stage2_destroy(struct kvm_pgtable *pgt); + +/** + * kvm_pgtable_stage2_map() - Install a mapping in a guest stage-2 page-table. + * @pgt: Page-table structure initialised by kvm_pgtable_stage2_init(). + * @addr: Intermediate physical address at which to place the mapping. + * @size: Size of the mapping. + * @phys: Physical address of the memory to map. + * @prot: Permissions and attributes for the mapping. + * @mc: Cache of pre-allocated GFP_PGTABLE_USER memory from which to + * allocate page-table pages. + * + * The offset of @addr within a page is ignored, @size is rounded-up to + * the next page boundary and @phys is rounded-down to the previous page + * boundary. + * + * If device attributes are not explicitly requested in @prot, then the + * mapping will be normal, cacheable. + * + * Note that this function will both coalesce existing table entries and split + * existing block mappings, relying on page-faults to fault back areas outside + * of the new mapping lazily. + * + * Return: 0 on success, negative error code on failure. + */ +int kvm_pgtable_stage2_map(struct kvm_pgtable *pgt, u64 addr, u64 size, + u64 phys, enum kvm_pgtable_prot prot, + struct kvm_mmu_memory_cache *mc); + +/** + * kvm_pgtable_stage2_unmap() - Remove a mapping from a guest stage-2 page-table. + * @pgt: Page-table structure initialised by kvm_pgtable_stage2_init(). + * @addr: Intermediate physical address from which to remove the mapping. + * @size: Size of the mapping. + * + * The offset of @addr within a page is ignored and @size is rounded-up to + * the next page boundary. + * + * TLB invalidation is performed for each page-table entry cleared during the + * unmapping operation and the reference count for the page-table page + * containing the cleared entry is decremented, with unreferenced pages being + * freed. Unmapping a cacheable page will ensure that it is clean to the PoC if + * FWB is not supported by the CPU. + * + * Return: 0 on success, negative error code on failure. + */ +int kvm_pgtable_stage2_unmap(struct kvm_pgtable *pgt, u64 addr, u64 size); + +/** + * kvm_pgtable_stage2_wrprotect() - Write-protect guest stage-2 address range + * without TLB invalidation. + * @pgt: Page-table structure initialised by kvm_pgtable_stage2_init(). + * @addr: Intermediate physical address from which to write-protect, + * @size: Size of the range. + * + * The offset of @addr within a page is ignored and @size is rounded-up to + * the next page boundary. + * + * Note that it is the caller's responsibility to invalidate the TLB after + * calling this function to ensure that the updated permissions are visible + * to the CPUs. + * + * Return: 0 on success, negative error code on failure. + */ +int kvm_pgtable_stage2_wrprotect(struct kvm_pgtable *pgt, u64 addr, u64 size); + +/** + * kvm_pgtable_stage2_mkyoung() - Set the access flag in a page-table entry. + * @pgt: Page-table structure initialised by kvm_pgtable_stage2_init(). + * @addr: Intermediate physical address to identify the page-table entry. + * + * The offset of @addr within a page is ignored. + * + * If there is a valid, leaf page-table entry used to translate @addr, then + * set the access flag in that entry. + * + * Return: The old page-table entry prior to setting the flag, 0 on failure. + */ +kvm_pte_t kvm_pgtable_stage2_mkyoung(struct kvm_pgtable *pgt, u64 addr); + +/** + * kvm_pgtable_stage2_mkold() - Clear the access flag in a page-table entry. + * @pgt: Page-table structure initialised by kvm_pgtable_stage2_init(). + * @addr: Intermediate physical address to identify the page-table entry. + * + * The offset of @addr within a page is ignored. + * + * If there is a valid, leaf page-table entry used to translate @addr, then + * clear the access flag in that entry. + * + * Note that it is the caller's responsibility to invalidate the TLB after + * calling this function to ensure that the updated permissions are visible + * to the CPUs. + * + * Return: The old page-table entry prior to clearing the flag, 0 on failure. + */ +kvm_pte_t kvm_pgtable_stage2_mkold(struct kvm_pgtable *pgt, u64 addr); + +/** + * kvm_pgtable_stage2_relax_perms() - Relax the permissions enforced by a + * page-table entry. + * @pgt: Page-table structure initialised by kvm_pgtable_stage2_init(). + * @addr: Intermediate physical address to identify the page-table entry. + * @prot: Additional permissions to grant for the mapping. + * + * The offset of @addr within a page is ignored. + * + * If there is a valid, leaf page-table entry used to translate @addr, then + * relax the permissions in that entry according to the read, write and + * execute permissions specified by @prot. No permissions are removed, and + * TLB invalidation is performed after updating the entry. + * + * Return: 0 on success, negative error code on failure. + */ +int kvm_pgtable_stage2_relax_perms(struct kvm_pgtable *pgt, u64 addr, + enum kvm_pgtable_prot prot); + +/** + * kvm_pgtable_stage2_is_young() - Test whether a page-table entry has the + * access flag set. + * @pgt: Page-table structure initialised by kvm_pgtable_stage2_init(). + * @addr: Intermediate physical address to identify the page-table entry. + * + * The offset of @addr within a page is ignored. + * + * Return: True if the page-table entry has the access flag set, false otherwise. + */ +bool kvm_pgtable_stage2_is_young(struct kvm_pgtable *pgt, u64 addr); + +/** + * kvm_pgtable_stage2_flush_range() - Clean and invalidate data cache to Point + * of Coherency for guest stage-2 address + * range. + * @pgt: Page-table structure initialised by kvm_pgtable_stage2_init(). + * @addr: Intermediate physical address from which to flush. + * @size: Size of the range. + * + * The offset of @addr within a page is ignored and @size is rounded-up to + * the next page boundary. + * + * Return: 0 on success, negative error code on failure. + */ +int kvm_pgtable_stage2_flush(struct kvm_pgtable *pgt, u64 addr, u64 size); + +/** + * kvm_pgtable_walk() - Walk a page-table. + * @pgt: Page-table structure initialised by kvm_pgtable_*_init(). + * @addr: Input address for the start of the walk. + * @size: Size of the range to walk. + * @walker: Walker callback description. + * + * The offset of @addr within a page is ignored and @size is rounded-up to + * the next page boundary. + * + * The walker will walk the page-table entries corresponding to the input + * address range specified, visiting entries according to the walker flags. + * Invalid entries are treated as leaf entries. Leaf entries are reloaded + * after invoking the walker callback, allowing the walker to descend into + * a newly installed table. + * + * Returning a negative error code from the walker callback function will + * terminate the walk immediately with the same error code. + * + * Return: 0 on success, negative error code on failure. + */ +int kvm_pgtable_walk(struct kvm_pgtable *pgt, u64 addr, u64 size, + struct kvm_pgtable_walker *walker); + +#endif /* __ARM64_KVM_PGTABLE_H__ */ diff --git a/arch/arm64/include/asm/kvm_ptrauth.h b/arch/arm64/include/asm/kvm_ptrauth.h index 0ddf98c3ba9f..0cd0965255d2 100644 --- a/arch/arm64/include/asm/kvm_ptrauth.h +++ b/arch/arm64/include/asm/kvm_ptrauth.h @@ -60,7 +60,7 @@ .endm /* - * Both ptrauth_switch_to_guest and ptrauth_switch_to_host macros will + * Both ptrauth_switch_to_guest and ptrauth_switch_to_hyp macros will * check for the presence ARM64_HAS_ADDRESS_AUTH, which is defined as * (ARM64_HAS_ADDRESS_AUTH_ARCH || ARM64_HAS_ADDRESS_AUTH_IMP_DEF) and * then proceed ahead with the save/restore of Pointer Authentication @@ -78,7 +78,7 @@ alternative_else_nop_endif .L__skip_switch\@: .endm -.macro ptrauth_switch_to_host g_ctxt, h_ctxt, reg1, reg2, reg3 +.macro ptrauth_switch_to_hyp g_ctxt, h_ctxt, reg1, reg2, reg3 alternative_if_not ARM64_HAS_ADDRESS_AUTH b .L__skip_switch\@ alternative_else_nop_endif @@ -96,7 +96,7 @@ alternative_else_nop_endif #else /* !CONFIG_ARM64_PTR_AUTH */ .macro ptrauth_switch_to_guest g_ctxt, reg1, reg2, reg3 .endm -.macro ptrauth_switch_to_host g_ctxt, h_ctxt, reg1, reg2, reg3 +.macro ptrauth_switch_to_hyp g_ctxt, h_ctxt, reg1, reg2, reg3 .endm #endif /* CONFIG_ARM64_PTR_AUTH */ #endif /* __ASSEMBLY__ */ diff --git a/arch/arm64/include/asm/mmu.h b/arch/arm64/include/asm/mmu.h index a7a5ecaa2e83..cbff2d42c1d8 100644 --- a/arch/arm64/include/asm/mmu.h +++ b/arch/arm64/include/asm/mmu.h @@ -45,7 +45,6 @@ struct bp_hardening_data { bp_hardening_cb_t fn; }; -#ifdef CONFIG_HARDEN_BRANCH_PREDICTOR DECLARE_PER_CPU_READ_MOSTLY(struct bp_hardening_data, bp_hardening_data); static inline struct bp_hardening_data *arm64_get_bp_hardening_data(void) @@ -57,21 +56,13 @@ static inline void arm64_apply_bp_hardening(void) { struct bp_hardening_data *d; - if (!cpus_have_const_cap(ARM64_HARDEN_BRANCH_PREDICTOR)) + if (!cpus_have_const_cap(ARM64_SPECTRE_V2)) return; d = arm64_get_bp_hardening_data(); if (d->fn) d->fn(); } -#else -static inline struct bp_hardening_data *arm64_get_bp_hardening_data(void) -{ - return NULL; -} - -static inline void arm64_apply_bp_hardening(void) { } -#endif /* CONFIG_HARDEN_BRANCH_PREDICTOR */ extern void arm64_memblock_init(void); extern void paging_init(void); diff --git a/arch/arm64/include/asm/percpu.h b/arch/arm64/include/asm/percpu.h index 0b6409b89e5e..1599e17379d8 100644 --- a/arch/arm64/include/asm/percpu.h +++ b/arch/arm64/include/asm/percpu.h @@ -19,7 +19,16 @@ static inline void set_my_cpu_offset(unsigned long off) :: "r" (off) : "memory"); } -static inline unsigned long __my_cpu_offset(void) +static inline unsigned long __hyp_my_cpu_offset(void) +{ + /* + * Non-VHE hyp code runs with preemption disabled. No need to hazard + * the register access against barrier() as in __kern_my_cpu_offset. + */ + return read_sysreg(tpidr_el2); +} + +static inline unsigned long __kern_my_cpu_offset(void) { unsigned long off; @@ -35,7 +44,12 @@ static inline unsigned long __my_cpu_offset(void) return off; } -#define __my_cpu_offset __my_cpu_offset() + +#ifdef __KVM_NVHE_HYPERVISOR__ +#define __my_cpu_offset __hyp_my_cpu_offset() +#else +#define __my_cpu_offset __kern_my_cpu_offset() +#endif #define PERCPU_RW_OPS(sz) \ static inline unsigned long __percpu_read_##sz(void *ptr) \ @@ -227,4 +241,14 @@ PERCPU_RET_OP(add, add, ldadd) #include <asm-generic/percpu.h> +/* Redefine macros for nVHE hyp under DEBUG_PREEMPT to avoid its dependencies. */ +#if defined(__KVM_NVHE_HYPERVISOR__) && defined(CONFIG_DEBUG_PREEMPT) +#undef this_cpu_ptr +#define this_cpu_ptr raw_cpu_ptr +#undef __this_cpu_read +#define __this_cpu_read raw_cpu_read +#undef __this_cpu_write +#define __this_cpu_write raw_cpu_write +#endif + #endif /* __ASM_PERCPU_H */ diff --git a/arch/arm64/include/asm/pgtable-hwdef.h b/arch/arm64/include/asm/pgtable-hwdef.h index d400a4d9aee2..2b5f822fb033 100644 --- a/arch/arm64/include/asm/pgtable-hwdef.h +++ b/arch/arm64/include/asm/pgtable-hwdef.h @@ -156,7 +156,6 @@ #define PTE_CONT (_AT(pteval_t, 1) << 52) /* Contiguous range */ #define PTE_PXN (_AT(pteval_t, 1) << 53) /* Privileged XN */ #define PTE_UXN (_AT(pteval_t, 1) << 54) /* User XN */ -#define PTE_HYP_XN (_AT(pteval_t, 1) << 54) /* HYP XN */ #define PTE_ADDR_LOW (((_AT(pteval_t, 1) << (48 - PAGE_SHIFT)) - 1) << PAGE_SHIFT) #ifdef CONFIG_ARM64_PA_BITS_52 @@ -173,34 +172,11 @@ #define PTE_ATTRINDX_MASK (_AT(pteval_t, 7) << 2) /* - * 2nd stage PTE definitions - */ -#define PTE_S2_RDONLY (_AT(pteval_t, 1) << 6) /* HAP[2:1] */ -#define PTE_S2_RDWR (_AT(pteval_t, 3) << 6) /* HAP[2:1] */ -#define PTE_S2_XN (_AT(pteval_t, 2) << 53) /* XN[1:0] */ -#define PTE_S2_SW_RESVD (_AT(pteval_t, 15) << 55) /* Reserved for SW */ - -#define PMD_S2_RDONLY (_AT(pmdval_t, 1) << 6) /* HAP[2:1] */ -#define PMD_S2_RDWR (_AT(pmdval_t, 3) << 6) /* HAP[2:1] */ -#define PMD_S2_XN (_AT(pmdval_t, 2) << 53) /* XN[1:0] */ -#define PMD_S2_SW_RESVD (_AT(pmdval_t, 15) << 55) /* Reserved for SW */ - -#define PUD_S2_RDONLY (_AT(pudval_t, 1) << 6) /* HAP[2:1] */ -#define PUD_S2_RDWR (_AT(pudval_t, 3) << 6) /* HAP[2:1] */ -#define PUD_S2_XN (_AT(pudval_t, 2) << 53) /* XN[1:0] */ - -/* * Memory Attribute override for Stage-2 (MemAttr[3:0]) */ #define PTE_S2_MEMATTR(t) (_AT(pteval_t, (t)) << 2) /* - * EL2/HYP PTE/PMD definitions - */ -#define PMD_HYP PMD_SECT_USER -#define PTE_HYP PTE_USER - -/* * Highest possible physical address supported. */ #define PHYS_MASK_SHIFT (CONFIG_ARM64_PA_BITS) diff --git a/arch/arm64/include/asm/pgtable-prot.h b/arch/arm64/include/asm/pgtable-prot.h index 4d867c6446c4..8f094c43072a 100644 --- a/arch/arm64/include/asm/pgtable-prot.h +++ b/arch/arm64/include/asm/pgtable-prot.h @@ -56,7 +56,6 @@ extern bool arm64_use_ng_mappings; #define PROT_SECT_NORMAL_EXEC (PROT_SECT_DEFAULT | PMD_SECT_UXN | PMD_ATTRINDX(MT_NORMAL)) #define _PAGE_DEFAULT (_PROT_DEFAULT | PTE_ATTRINDX(MT_NORMAL)) -#define _HYP_PAGE_DEFAULT _PAGE_DEFAULT #define PAGE_KERNEL __pgprot(PROT_NORMAL) #define PAGE_KERNEL_RO __pgprot((PROT_NORMAL & ~PTE_WRITE) | PTE_RDONLY) @@ -64,11 +63,6 @@ extern bool arm64_use_ng_mappings; #define PAGE_KERNEL_EXEC __pgprot(PROT_NORMAL & ~PTE_PXN) #define PAGE_KERNEL_EXEC_CONT __pgprot((PROT_NORMAL & ~PTE_PXN) | PTE_CONT) -#define PAGE_HYP __pgprot(_HYP_PAGE_DEFAULT | PTE_HYP | PTE_HYP_XN) -#define PAGE_HYP_EXEC __pgprot(_HYP_PAGE_DEFAULT | PTE_HYP | PTE_RDONLY) -#define PAGE_HYP_RO __pgprot(_HYP_PAGE_DEFAULT | PTE_HYP | PTE_RDONLY | PTE_HYP_XN) -#define PAGE_HYP_DEVICE __pgprot(_PROT_DEFAULT | PTE_ATTRINDX(MT_DEVICE_nGnRE) | PTE_HYP | PTE_HYP_XN) - #define PAGE_S2_MEMATTR(attr) \ ({ \ u64 __val; \ @@ -79,19 +73,6 @@ extern bool arm64_use_ng_mappings; __val; \ }) -#define PAGE_S2_XN \ - ({ \ - u64 __val; \ - if (cpus_have_const_cap(ARM64_HAS_CACHE_DIC)) \ - __val = 0; \ - else \ - __val = PTE_S2_XN; \ - __val; \ - }) - -#define PAGE_S2 __pgprot(_PROT_DEFAULT | PAGE_S2_MEMATTR(NORMAL) | PTE_S2_RDONLY | PAGE_S2_XN) -#define PAGE_S2_DEVICE __pgprot(_PROT_DEFAULT | PAGE_S2_MEMATTR(DEVICE_nGnRE) | PTE_S2_RDONLY | PTE_S2_XN) - #define PAGE_NONE __pgprot(((_PAGE_DEFAULT) & ~PTE_VALID) | PTE_PROT_NONE | PTE_RDONLY | PTE_NG | PTE_PXN | PTE_UXN) /* shared+writable pages are clean by default, hence PTE_RDONLY|PTE_WRITE */ #define PAGE_SHARED __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_RDONLY | PTE_NG | PTE_PXN | PTE_UXN | PTE_WRITE) diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h index 240fe5e5b720..7d90ea2e2063 100644 --- a/arch/arm64/include/asm/processor.h +++ b/arch/arm64/include/asm/processor.h @@ -38,6 +38,7 @@ #include <asm/pgtable-hwdef.h> #include <asm/pointer_auth.h> #include <asm/ptrace.h> +#include <asm/spectre.h> #include <asm/types.h> /* @@ -197,40 +198,15 @@ static inline void start_thread_common(struct pt_regs *regs, unsigned long pc) regs->pmr_save = GIC_PRIO_IRQON; } -static inline void set_ssbs_bit(struct pt_regs *regs) -{ - regs->pstate |= PSR_SSBS_BIT; -} - -static inline void set_compat_ssbs_bit(struct pt_regs *regs) -{ - regs->pstate |= PSR_AA32_SSBS_BIT; -} - static inline void start_thread(struct pt_regs *regs, unsigned long pc, unsigned long sp) { start_thread_common(regs, pc); regs->pstate = PSR_MODE_EL0t; - - if (arm64_get_ssbd_state() != ARM64_SSBD_FORCE_ENABLE) - set_ssbs_bit(regs); - + spectre_v4_enable_task_mitigation(current); regs->sp = sp; } -static inline bool is_ttbr0_addr(unsigned long addr) -{ - /* entry assembly clears tags for TTBR0 addrs */ - return addr < TASK_SIZE; -} - -static inline bool is_ttbr1_addr(unsigned long addr) -{ - /* TTBR1 addresses may have a tag if KASAN_SW_TAGS is in use */ - return arch_kasan_reset_tag(addr) >= PAGE_OFFSET; -} - #ifdef CONFIG_COMPAT static inline void compat_start_thread(struct pt_regs *regs, unsigned long pc, unsigned long sp) @@ -244,13 +220,23 @@ static inline void compat_start_thread(struct pt_regs *regs, unsigned long pc, regs->pstate |= PSR_AA32_E_BIT; #endif - if (arm64_get_ssbd_state() != ARM64_SSBD_FORCE_ENABLE) - set_compat_ssbs_bit(regs); - + spectre_v4_enable_task_mitigation(current); regs->compat_sp = sp; } #endif +static inline bool is_ttbr0_addr(unsigned long addr) +{ + /* entry assembly clears tags for TTBR0 addrs */ + return addr < TASK_SIZE; +} + +static inline bool is_ttbr1_addr(unsigned long addr) +{ + /* TTBR1 addresses may have a tag if KASAN_SW_TAGS is in use */ + return arch_kasan_reset_tag(addr) >= PAGE_OFFSET; +} + /* Forward declaration, a strange C thing */ struct task_struct; diff --git a/arch/arm64/include/asm/spectre.h b/arch/arm64/include/asm/spectre.h new file mode 100644 index 000000000000..fcdfbce302bd --- /dev/null +++ b/arch/arm64/include/asm/spectre.h @@ -0,0 +1,32 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Interface for managing mitigations for Spectre vulnerabilities. + * + * Copyright (C) 2020 Google LLC + * Author: Will Deacon <will@kernel.org> + */ + +#ifndef __ASM_SPECTRE_H +#define __ASM_SPECTRE_H + +#include <asm/cpufeature.h> + +/* Watch out, ordering is important here. */ +enum mitigation_state { + SPECTRE_UNAFFECTED, + SPECTRE_MITIGATED, + SPECTRE_VULNERABLE, +}; + +struct task_struct; + +enum mitigation_state arm64_get_spectre_v2_state(void); +bool has_spectre_v2(const struct arm64_cpu_capabilities *cap, int scope); +void spectre_v2_enable_mitigation(const struct arm64_cpu_capabilities *__unused); + +enum mitigation_state arm64_get_spectre_v4_state(void); +bool has_spectre_v4(const struct arm64_cpu_capabilities *cap, int scope); +void spectre_v4_enable_mitigation(const struct arm64_cpu_capabilities *__unused); +void spectre_v4_enable_task_mitigation(struct task_struct *tsk); + +#endif /* __ASM_SPECTRE_H */ diff --git a/arch/arm64/include/asm/stage2_pgtable.h b/arch/arm64/include/asm/stage2_pgtable.h index 996bf98f0cab..fe341a6578c3 100644 --- a/arch/arm64/include/asm/stage2_pgtable.h +++ b/arch/arm64/include/asm/stage2_pgtable.h @@ -8,7 +8,6 @@ #ifndef __ARM64_S2_PGTABLE_H_ #define __ARM64_S2_PGTABLE_H_ -#include <linux/hugetlb.h> #include <linux/pgtable.h> /* @@ -37,217 +36,12 @@ #define stage2_pgdir_mask(kvm) ~(stage2_pgdir_size(kvm) - 1) /* - * The number of PTRS across all concatenated stage2 tables given by the - * number of bits resolved at the initial level. - * If we force more levels than necessary, we may have (stage2_pgdir_shift > IPA), - * in which case, stage2_pgd_ptrs will have one entry. - */ -#define pgd_ptrs_shift(ipa, pgdir_shift) \ - ((ipa) > (pgdir_shift) ? ((ipa) - (pgdir_shift)) : 0) -#define __s2_pgd_ptrs(ipa, lvls) \ - (1 << (pgd_ptrs_shift((ipa), pt_levels_pgdir_shift(lvls)))) -#define __s2_pgd_size(ipa, lvls) (__s2_pgd_ptrs((ipa), (lvls)) * sizeof(pgd_t)) - -#define stage2_pgd_ptrs(kvm) __s2_pgd_ptrs(kvm_phys_shift(kvm), kvm_stage2_levels(kvm)) -#define stage2_pgd_size(kvm) __s2_pgd_size(kvm_phys_shift(kvm), kvm_stage2_levels(kvm)) - -/* * kvm_mmmu_cache_min_pages() is the number of pages required to install * a stage-2 translation. We pre-allocate the entry level page table at * the VM creation. */ #define kvm_mmu_cache_min_pages(kvm) (kvm_stage2_levels(kvm) - 1) -/* Stage2 PUD definitions when the level is present */ -static inline bool kvm_stage2_has_pud(struct kvm *kvm) -{ - return (CONFIG_PGTABLE_LEVELS > 3) && (kvm_stage2_levels(kvm) > 3); -} - -#define S2_PUD_SHIFT ARM64_HW_PGTABLE_LEVEL_SHIFT(1) -#define S2_PUD_SIZE (1UL << S2_PUD_SHIFT) -#define S2_PUD_MASK (~(S2_PUD_SIZE - 1)) - -#define stage2_pgd_none(kvm, pgd) pgd_none(pgd) -#define stage2_pgd_clear(kvm, pgd) pgd_clear(pgd) -#define stage2_pgd_present(kvm, pgd) pgd_present(pgd) -#define stage2_pgd_populate(kvm, pgd, p4d) pgd_populate(NULL, pgd, p4d) - -static inline p4d_t *stage2_p4d_offset(struct kvm *kvm, - pgd_t *pgd, unsigned long address) -{ - return p4d_offset(pgd, address); -} - -static inline void stage2_p4d_free(struct kvm *kvm, p4d_t *p4d) -{ -} - -static inline bool stage2_p4d_table_empty(struct kvm *kvm, p4d_t *p4dp) -{ - return false; -} - -static inline phys_addr_t stage2_p4d_addr_end(struct kvm *kvm, - phys_addr_t addr, phys_addr_t end) -{ - return end; -} - -static inline bool stage2_p4d_none(struct kvm *kvm, p4d_t p4d) -{ - if (kvm_stage2_has_pud(kvm)) - return p4d_none(p4d); - else - return 0; -} - -static inline void stage2_p4d_clear(struct kvm *kvm, p4d_t *p4dp) -{ - if (kvm_stage2_has_pud(kvm)) - p4d_clear(p4dp); -} - -static inline bool stage2_p4d_present(struct kvm *kvm, p4d_t p4d) -{ - if (kvm_stage2_has_pud(kvm)) - return p4d_present(p4d); - else - return 1; -} - -static inline void stage2_p4d_populate(struct kvm *kvm, p4d_t *p4d, pud_t *pud) -{ - if (kvm_stage2_has_pud(kvm)) - p4d_populate(NULL, p4d, pud); -} - -static inline pud_t *stage2_pud_offset(struct kvm *kvm, - p4d_t *p4d, unsigned long address) -{ - if (kvm_stage2_has_pud(kvm)) - return pud_offset(p4d, address); - else - return (pud_t *)p4d; -} - -static inline void stage2_pud_free(struct kvm *kvm, pud_t *pud) -{ - if (kvm_stage2_has_pud(kvm)) - free_page((unsigned long)pud); -} - -static inline bool stage2_pud_table_empty(struct kvm *kvm, pud_t *pudp) -{ - if (kvm_stage2_has_pud(kvm)) - return kvm_page_empty(pudp); - else - return false; -} - -static inline phys_addr_t -stage2_pud_addr_end(struct kvm *kvm, phys_addr_t addr, phys_addr_t end) -{ - if (kvm_stage2_has_pud(kvm)) { - phys_addr_t boundary = (addr + S2_PUD_SIZE) & S2_PUD_MASK; - - return (boundary - 1 < end - 1) ? boundary : end; - } else { - return end; - } -} - -/* Stage2 PMD definitions when the level is present */ -static inline bool kvm_stage2_has_pmd(struct kvm *kvm) -{ - return (CONFIG_PGTABLE_LEVELS > 2) && (kvm_stage2_levels(kvm) > 2); -} - -#define S2_PMD_SHIFT ARM64_HW_PGTABLE_LEVEL_SHIFT(2) -#define S2_PMD_SIZE (1UL << S2_PMD_SHIFT) -#define S2_PMD_MASK (~(S2_PMD_SIZE - 1)) - -static inline bool stage2_pud_none(struct kvm *kvm, pud_t pud) -{ - if (kvm_stage2_has_pmd(kvm)) - return pud_none(pud); - else - return 0; -} - -static inline void stage2_pud_clear(struct kvm *kvm, pud_t *pud) -{ - if (kvm_stage2_has_pmd(kvm)) - pud_clear(pud); -} - -static inline bool stage2_pud_present(struct kvm *kvm, pud_t pud) -{ - if (kvm_stage2_has_pmd(kvm)) - return pud_present(pud); - else - return 1; -} - -static inline void stage2_pud_populate(struct kvm *kvm, pud_t *pud, pmd_t *pmd) -{ - if (kvm_stage2_has_pmd(kvm)) - pud_populate(NULL, pud, pmd); -} - -static inline pmd_t *stage2_pmd_offset(struct kvm *kvm, - pud_t *pud, unsigned long address) -{ - if (kvm_stage2_has_pmd(kvm)) - return pmd_offset(pud, address); - else - return (pmd_t *)pud; -} - -static inline void stage2_pmd_free(struct kvm *kvm, pmd_t *pmd) -{ - if (kvm_stage2_has_pmd(kvm)) - free_page((unsigned long)pmd); -} - -static inline bool stage2_pud_huge(struct kvm *kvm, pud_t pud) -{ - if (kvm_stage2_has_pmd(kvm)) - return pud_huge(pud); - else - return 0; -} - -static inline bool stage2_pmd_table_empty(struct kvm *kvm, pmd_t *pmdp) -{ - if (kvm_stage2_has_pmd(kvm)) - return kvm_page_empty(pmdp); - else - return 0; -} - -static inline phys_addr_t -stage2_pmd_addr_end(struct kvm *kvm, phys_addr_t addr, phys_addr_t end) -{ - if (kvm_stage2_has_pmd(kvm)) { - phys_addr_t boundary = (addr + S2_PMD_SIZE) & S2_PMD_MASK; - - return (boundary - 1 < end - 1) ? boundary : end; - } else { - return end; - } -} - -static inline bool stage2_pte_table_empty(struct kvm *kvm, pte_t *ptep) -{ - return kvm_page_empty(ptep); -} - -static inline unsigned long stage2_pgd_index(struct kvm *kvm, phys_addr_t addr) -{ - return (((addr) >> stage2_pgdir_shift(kvm)) & (stage2_pgd_ptrs(kvm) - 1)); -} - static inline phys_addr_t stage2_pgd_addr_end(struct kvm *kvm, phys_addr_t addr, phys_addr_t end) { @@ -256,13 +50,4 @@ stage2_pgd_addr_end(struct kvm *kvm, phys_addr_t addr, phys_addr_t end) return (boundary - 1 < end - 1) ? boundary : end; } -/* - * Level values for the ARMv8.4-TTL extension, mapping PUD/PMD/PTE and - * the architectural page-table level. - */ -#define S2_NO_LEVEL_HINT 0 -#define S2_PUD_LEVEL 1 -#define S2_PMD_LEVEL 2 -#define S2_PTE_LEVEL 3 - #endif /* __ARM64_S2_PGTABLE_H_ */ diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h index ba85bb23f060..1c17c3a24411 100644 --- a/arch/arm64/include/uapi/asm/kvm.h +++ b/arch/arm64/include/uapi/asm/kvm.h @@ -159,6 +159,21 @@ struct kvm_sync_regs { struct kvm_arch_memory_slot { }; +/* + * PMU filter structure. Describe a range of events with a particular + * action. To be used with KVM_ARM_VCPU_PMU_V3_FILTER. + */ +struct kvm_pmu_event_filter { + __u16 base_event; + __u16 nevents; + +#define KVM_PMU_EVENT_ALLOW 0 +#define KVM_PMU_EVENT_DENY 1 + + __u8 action; + __u8 pad[3]; +}; + /* for KVM_GET/SET_VCPU_EVENTS */ struct kvm_vcpu_events { struct { @@ -242,6 +257,15 @@ struct kvm_vcpu_events { #define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_NOT_AVAIL 0 #define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_AVAIL 1 #define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_NOT_REQUIRED 2 + +/* + * Only two states can be presented by the host kernel: + * - NOT_REQUIRED: the guest doesn't need to do anything + * - NOT_AVAIL: the guest isn't mitigated (it can still use SSBS if available) + * + * All the other values are deprecated. The host still accepts all + * values (they are ABI), but will narrow them to the above two. + */ #define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2 KVM_REG_ARM_FW_REG(2) #define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_NOT_AVAIL 0 #define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_UNKNOWN 1 @@ -329,6 +353,7 @@ struct kvm_vcpu_events { #define KVM_ARM_VCPU_PMU_V3_CTRL 0 #define KVM_ARM_VCPU_PMU_V3_IRQ 0 #define KVM_ARM_VCPU_PMU_V3_INIT 1 +#define KVM_ARM_VCPU_PMU_V3_FILTER 2 #define KVM_ARM_VCPU_TIMER_CTRL 1 #define KVM_ARM_VCPU_TIMER_IRQ_VTIMER 0 #define KVM_ARM_VCPU_TIMER_IRQ_PTIMER 1 |