diff options
author | Linus Torvalds | 2020-12-12 10:08:16 -0800 |
---|---|---|
committer | Linus Torvalds | 2020-12-12 10:08:16 -0800 |
commit | 7b1b868e1d9156484ccce9bf11122c053de82617 (patch) | |
tree | df9820018f1454d8b44aec832e9e9867bbdd4b36 /arch | |
parent | b53966ffd4c0676c02987d4fc33b99bdfc548cf0 (diff) | |
parent | 111d0bda8eeb4b54e0c63897b071effbf9fd9251 (diff) |
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Pull kvm fixes from Paolo Bonzini:
"Bugfixes for ARM, x86 and tools"
* tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm:
tools/kvm_stat: Exempt time-based counters
KVM: mmu: Fix SPTE encoding of MMIO generation upper half
kvm: x86/mmu: Use cpuid to determine max gfn
kvm: svm: de-allocate svm_cpu_data for all cpus in svm_cpu_uninit()
selftests: kvm/set_memory_region_test: Fix race in move region test
KVM: arm64: Add usage of stage 2 fault lookup level in user_mem_abort()
KVM: arm64: Fix handling of merging tables into a block entry
KVM: arm64: Fix memory leak on stage2 update of a valid PTE
Diffstat (limited to 'arch')
-rw-r--r-- | arch/arm64/include/asm/esr.h | 1 | ||||
-rw-r--r-- | arch/arm64/include/asm/kvm_emulate.h | 5 | ||||
-rw-r--r-- | arch/arm64/kvm/hyp/pgtable.c | 17 | ||||
-rw-r--r-- | arch/arm64/kvm/mmu.c | 11 | ||||
-rw-r--r-- | arch/x86/kvm/mmu/spte.c | 4 | ||||
-rw-r--r-- | arch/x86/kvm/mmu/spte.h | 25 | ||||
-rw-r--r-- | arch/x86/kvm/mmu/tdp_mmu.c | 4 | ||||
-rw-r--r-- | arch/x86/kvm/svm/svm.c | 4 |
8 files changed, 55 insertions, 16 deletions
diff --git a/arch/arm64/include/asm/esr.h b/arch/arm64/include/asm/esr.h index 22c81f1edda2..85a3e49f92f4 100644 --- a/arch/arm64/include/asm/esr.h +++ b/arch/arm64/include/asm/esr.h @@ -104,6 +104,7 @@ /* Shared ISS fault status code(IFSC/DFSC) for Data/Instruction aborts */ #define ESR_ELx_FSC (0x3F) #define ESR_ELx_FSC_TYPE (0x3C) +#define ESR_ELx_FSC_LEVEL (0x03) #define ESR_ELx_FSC_EXTABT (0x10) #define ESR_ELx_FSC_SERROR (0x11) #define ESR_ELx_FSC_ACCESS (0x08) diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h index 5ef2669ccd6c..00bc6f1234ba 100644 --- a/arch/arm64/include/asm/kvm_emulate.h +++ b/arch/arm64/include/asm/kvm_emulate.h @@ -350,6 +350,11 @@ static __always_inline u8 kvm_vcpu_trap_get_fault_type(const struct kvm_vcpu *vc return kvm_vcpu_get_esr(vcpu) & ESR_ELx_FSC_TYPE; } +static __always_inline u8 kvm_vcpu_trap_get_fault_level(const struct kvm_vcpu *vcpu) +{ + return kvm_vcpu_get_esr(vcpu) & ESR_ELx_FSC_LEVEL; +} + static __always_inline bool kvm_vcpu_abt_issea(const struct kvm_vcpu *vcpu) { switch (kvm_vcpu_trap_get_fault(vcpu)) { diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c index 0271b4a3b9fe..bdf8e55ed308 100644 --- a/arch/arm64/kvm/hyp/pgtable.c +++ b/arch/arm64/kvm/hyp/pgtable.c @@ -470,6 +470,15 @@ static bool stage2_map_walker_try_leaf(u64 addr, u64 end, u32 level, if (!kvm_block_mapping_supported(addr, end, phys, level)) return false; + /* + * If the PTE was already valid, drop the refcount on the table + * early, as it will be bumped-up again in stage2_map_walk_leaf(). + * This ensures that the refcount stays constant across a valid to + * valid PTE update. + */ + if (kvm_pte_valid(*ptep)) + put_page(virt_to_page(ptep)); + if (kvm_set_valid_leaf_pte(ptep, phys, data->attr, level)) goto out; @@ -493,7 +502,13 @@ static int stage2_map_walk_table_pre(u64 addr, u64 end, u32 level, return 0; kvm_set_invalid_pte(ptep); - kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, data->mmu, addr, 0); + + /* + * Invalidate the whole stage-2, as we may have numerous leaf + * entries below us which would otherwise need invalidating + * individually. + */ + kvm_call_hyp(__kvm_tlb_flush_vmid, data->mmu); data->anchor = ptep; return 0; } diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c index 1a01da9fdc99..75814a02d189 100644 --- a/arch/arm64/kvm/mmu.c +++ b/arch/arm64/kvm/mmu.c @@ -754,10 +754,12 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, gfn_t gfn; kvm_pfn_t pfn; bool logging_active = memslot_is_logging(memslot); - unsigned long vma_pagesize; + unsigned long fault_level = kvm_vcpu_trap_get_fault_level(vcpu); + unsigned long vma_pagesize, fault_granule; enum kvm_pgtable_prot prot = KVM_PGTABLE_PROT_R; struct kvm_pgtable *pgt; + fault_granule = 1UL << ARM64_HW_PGTABLE_LEVEL_SHIFT(fault_level); write_fault = kvm_is_write_fault(vcpu); exec_fault = kvm_vcpu_trap_is_exec_fault(vcpu); VM_BUG_ON(write_fault && exec_fault); @@ -896,7 +898,12 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, else if (cpus_have_const_cap(ARM64_HAS_CACHE_DIC)) prot |= KVM_PGTABLE_PROT_X; - if (fault_status == FSC_PERM && !(logging_active && writable)) { + /* + * Under the premise of getting a FSC_PERM fault, we just need to relax + * permissions only if vma_pagesize equals fault_granule. Otherwise, + * kvm_pgtable_stage2_map() should be called to change block size. + */ + if (fault_status == FSC_PERM && vma_pagesize == fault_granule) { ret = kvm_pgtable_stage2_relax_perms(pgt, fault_ipa, prot); } else { ret = kvm_pgtable_stage2_map(pgt, fault_ipa, vma_pagesize, diff --git a/arch/x86/kvm/mmu/spte.c b/arch/x86/kvm/mmu/spte.c index fcac2cac78fe..c51ad544f25b 100644 --- a/arch/x86/kvm/mmu/spte.c +++ b/arch/x86/kvm/mmu/spte.c @@ -40,8 +40,8 @@ static u64 generation_mmio_spte_mask(u64 gen) WARN_ON(gen & ~MMIO_SPTE_GEN_MASK); BUILD_BUG_ON((MMIO_SPTE_GEN_HIGH_MASK | MMIO_SPTE_GEN_LOW_MASK) & SPTE_SPECIAL_MASK); - mask = (gen << MMIO_SPTE_GEN_LOW_START) & MMIO_SPTE_GEN_LOW_MASK; - mask |= (gen << MMIO_SPTE_GEN_HIGH_START) & MMIO_SPTE_GEN_HIGH_MASK; + mask = (gen << MMIO_SPTE_GEN_LOW_SHIFT) & MMIO_SPTE_GEN_LOW_MASK; + mask |= (gen << MMIO_SPTE_GEN_HIGH_SHIFT) & MMIO_SPTE_GEN_HIGH_MASK; return mask; } diff --git a/arch/x86/kvm/mmu/spte.h b/arch/x86/kvm/mmu/spte.h index 5c75a451c000..2b3a30bd38b0 100644 --- a/arch/x86/kvm/mmu/spte.h +++ b/arch/x86/kvm/mmu/spte.h @@ -56,11 +56,11 @@ #define SPTE_MMU_WRITEABLE (1ULL << (PT_FIRST_AVAIL_BITS_SHIFT + 1)) /* - * Due to limited space in PTEs, the MMIO generation is a 19 bit subset of + * Due to limited space in PTEs, the MMIO generation is a 18 bit subset of * the memslots generation and is derived as follows: * * Bits 0-8 of the MMIO generation are propagated to spte bits 3-11 - * Bits 9-18 of the MMIO generation are propagated to spte bits 52-61 + * Bits 9-17 of the MMIO generation are propagated to spte bits 54-62 * * The KVM_MEMSLOT_GEN_UPDATE_IN_PROGRESS flag is intentionally not included in * the MMIO generation number, as doing so would require stealing a bit from @@ -69,18 +69,29 @@ * requires a full MMU zap). The flag is instead explicitly queried when * checking for MMIO spte cache hits. */ -#define MMIO_SPTE_GEN_MASK GENMASK_ULL(17, 0) #define MMIO_SPTE_GEN_LOW_START 3 #define MMIO_SPTE_GEN_LOW_END 11 -#define MMIO_SPTE_GEN_LOW_MASK GENMASK_ULL(MMIO_SPTE_GEN_LOW_END, \ - MMIO_SPTE_GEN_LOW_START) #define MMIO_SPTE_GEN_HIGH_START PT64_SECOND_AVAIL_BITS_SHIFT #define MMIO_SPTE_GEN_HIGH_END 62 + +#define MMIO_SPTE_GEN_LOW_MASK GENMASK_ULL(MMIO_SPTE_GEN_LOW_END, \ + MMIO_SPTE_GEN_LOW_START) #define MMIO_SPTE_GEN_HIGH_MASK GENMASK_ULL(MMIO_SPTE_GEN_HIGH_END, \ MMIO_SPTE_GEN_HIGH_START) +#define MMIO_SPTE_GEN_LOW_BITS (MMIO_SPTE_GEN_LOW_END - MMIO_SPTE_GEN_LOW_START + 1) +#define MMIO_SPTE_GEN_HIGH_BITS (MMIO_SPTE_GEN_HIGH_END - MMIO_SPTE_GEN_HIGH_START + 1) + +/* remember to adjust the comment above as well if you change these */ +static_assert(MMIO_SPTE_GEN_LOW_BITS == 9 && MMIO_SPTE_GEN_HIGH_BITS == 9); + +#define MMIO_SPTE_GEN_LOW_SHIFT (MMIO_SPTE_GEN_LOW_START - 0) +#define MMIO_SPTE_GEN_HIGH_SHIFT (MMIO_SPTE_GEN_HIGH_START - MMIO_SPTE_GEN_LOW_BITS) + +#define MMIO_SPTE_GEN_MASK GENMASK_ULL(MMIO_SPTE_GEN_LOW_BITS + MMIO_SPTE_GEN_HIGH_BITS - 1, 0) + extern u64 __read_mostly shadow_nx_mask; extern u64 __read_mostly shadow_x_mask; /* mutual exclusive with nx_mask */ extern u64 __read_mostly shadow_user_mask; @@ -228,8 +239,8 @@ static inline u64 get_mmio_spte_generation(u64 spte) { u64 gen; - gen = (spte & MMIO_SPTE_GEN_LOW_MASK) >> MMIO_SPTE_GEN_LOW_START; - gen |= (spte & MMIO_SPTE_GEN_HIGH_MASK) >> MMIO_SPTE_GEN_HIGH_START; + gen = (spte & MMIO_SPTE_GEN_LOW_MASK) >> MMIO_SPTE_GEN_LOW_SHIFT; + gen |= (spte & MMIO_SPTE_GEN_HIGH_MASK) >> MMIO_SPTE_GEN_HIGH_SHIFT; return gen; } diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c index ff28a5c6abd6..84c8f06bec26 100644 --- a/arch/x86/kvm/mmu/tdp_mmu.c +++ b/arch/x86/kvm/mmu/tdp_mmu.c @@ -66,7 +66,7 @@ static bool zap_gfn_range(struct kvm *kvm, struct kvm_mmu_page *root, void kvm_tdp_mmu_free_root(struct kvm *kvm, struct kvm_mmu_page *root) { - gfn_t max_gfn = 1ULL << (boot_cpu_data.x86_phys_bits - PAGE_SHIFT); + gfn_t max_gfn = 1ULL << (shadow_phys_bits - PAGE_SHIFT); lockdep_assert_held(&kvm->mmu_lock); @@ -456,7 +456,7 @@ bool kvm_tdp_mmu_zap_gfn_range(struct kvm *kvm, gfn_t start, gfn_t end) void kvm_tdp_mmu_zap_all(struct kvm *kvm) { - gfn_t max_gfn = 1ULL << (boot_cpu_data.x86_phys_bits - PAGE_SHIFT); + gfn_t max_gfn = 1ULL << (shadow_phys_bits - PAGE_SHIFT); bool flush; flush = kvm_tdp_mmu_zap_gfn_range(kvm, 0, max_gfn); diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 79b3a564f1c9..da7eb4aaf44f 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -530,12 +530,12 @@ static int svm_hardware_enable(void) static void svm_cpu_uninit(int cpu) { - struct svm_cpu_data *sd = per_cpu(svm_data, raw_smp_processor_id()); + struct svm_cpu_data *sd = per_cpu(svm_data, cpu); if (!sd) return; - per_cpu(svm_data, raw_smp_processor_id()) = NULL; + per_cpu(svm_data, cpu) = NULL; kfree(sd->sev_vmcbs); __free_page(sd->save_area); kfree(sd); |