diff options
Diffstat (limited to 'arch/x86')
-rw-r--r-- | arch/x86/kvm/Kconfig | 7 | ||||
-rw-r--r-- | arch/x86/kvm/mmu/mmu.c | 42 | ||||
-rw-r--r-- | arch/x86/kvm/svm/sev.c | 23 | ||||
-rw-r--r-- | arch/x86/kvm/x86.c | 12 |
4 files changed, 71 insertions, 13 deletions
diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig index 87e3da7b0439..65ed14b6540b 100644 --- a/arch/x86/kvm/Kconfig +++ b/arch/x86/kvm/Kconfig @@ -80,9 +80,10 @@ config KVM_SW_PROTECTED_VM depends on KVM && X86_64 select KVM_GENERIC_PRIVATE_MEM help - Enable support for KVM software-protected VMs. Currently "protected" - means the VM can be backed with memory provided by - KVM_CREATE_GUEST_MEMFD. + Enable support for KVM software-protected VMs. Currently, software- + protected VMs are purely a development and testing vehicle for + KVM_CREATE_GUEST_MEMFD. Attempting to run a "real" VM workload as a + software-protected VM will fail miserably. If unsure, say "N". diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index 2d6cdeab1f8a..0544700ca50b 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -4405,6 +4405,31 @@ static int kvm_faultin_pfn(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault, fault->mmu_seq = vcpu->kvm->mmu_invalidate_seq; smp_rmb(); + /* + * Check for a relevant mmu_notifier invalidation event before getting + * the pfn from the primary MMU, and before acquiring mmu_lock. + * + * For mmu_lock, if there is an in-progress invalidation and the kernel + * allows preemption, the invalidation task may drop mmu_lock and yield + * in response to mmu_lock being contended, which is *very* counter- + * productive as this vCPU can't actually make forward progress until + * the invalidation completes. + * + * Retrying now can also avoid unnessary lock contention in the primary + * MMU, as the primary MMU doesn't necessarily hold a single lock for + * the duration of the invalidation, i.e. faulting in a conflicting pfn + * can cause the invalidation to take longer by holding locks that are + * needed to complete the invalidation. + * + * Do the pre-check even for non-preemtible kernels, i.e. even if KVM + * will never yield mmu_lock in response to contention, as this vCPU is + * *guaranteed* to need to retry, i.e. waiting until mmu_lock is held + * to detect retry guarantees the worst case latency for the vCPU. + */ + if (fault->slot && + mmu_invalidate_retry_gfn_unsafe(vcpu->kvm, fault->mmu_seq, fault->gfn)) + return RET_PF_RETRY; + ret = __kvm_faultin_pfn(vcpu, fault); if (ret != RET_PF_CONTINUE) return ret; @@ -4415,6 +4440,18 @@ static int kvm_faultin_pfn(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault, if (unlikely(!fault->slot)) return kvm_handle_noslot_fault(vcpu, fault, access); + /* + * Check again for a relevant mmu_notifier invalidation event purely to + * avoid contending mmu_lock. Most invalidations will be detected by + * the previous check, but checking is extremely cheap relative to the + * overall cost of failing to detect the invalidation until after + * mmu_lock is acquired. + */ + if (mmu_invalidate_retry_gfn_unsafe(vcpu->kvm, fault->mmu_seq, fault->gfn)) { + kvm_release_pfn_clean(fault->pfn); + return RET_PF_RETRY; + } + return RET_PF_CONTINUE; } @@ -4442,6 +4479,11 @@ static bool is_page_fault_stale(struct kvm_vcpu *vcpu, if (!sp && kvm_test_request(KVM_REQ_MMU_FREE_OBSOLETE_ROOTS, vcpu)) return true; + /* + * Check for a relevant mmu_notifier invalidation event one last time + * now that mmu_lock is held, as the "unsafe" checks performed without + * holding mmu_lock can get false negatives. + */ return fault->slot && mmu_invalidate_retry_gfn(vcpu->kvm, fault->mmu_seq, fault->gfn); } diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index f760106c31f8..a8ce5226b3b5 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -57,7 +57,7 @@ static bool sev_es_enabled = true; module_param_named(sev_es, sev_es_enabled, bool, 0444); /* enable/disable SEV-ES DebugSwap support */ -static bool sev_es_debug_swap_enabled = true; +static bool sev_es_debug_swap_enabled = false; module_param_named(debug_swap, sev_es_debug_swap_enabled, bool, 0444); #else #define sev_enabled false @@ -612,8 +612,11 @@ static int sev_es_sync_vmsa(struct vcpu_svm *svm) save->xss = svm->vcpu.arch.ia32_xss; save->dr6 = svm->vcpu.arch.dr6; - if (sev_es_debug_swap_enabled) + if (sev_es_debug_swap_enabled) { save->sev_features |= SVM_SEV_FEAT_DEBUG_SWAP; + pr_warn_once("Enabling DebugSwap with KVM_SEV_ES_INIT. " + "This will not work starting with Linux 6.10\n"); + } pr_debug("Virtual Machine Save Area (VMSA):\n"); print_hex_dump_debug("", DUMP_PREFIX_NONE, 16, 1, save, sizeof(*save), false); @@ -1975,20 +1978,22 @@ int sev_mem_enc_register_region(struct kvm *kvm, goto e_free; } - region->uaddr = range->addr; - region->size = range->size; - - list_add_tail(®ion->list, &sev->regions_list); - mutex_unlock(&kvm->lock); - /* * The guest may change the memory encryption attribute from C=0 -> C=1 * or vice versa for this memory range. Lets make sure caches are * flushed to ensure that guest data gets written into memory with - * correct C-bit. + * correct C-bit. Note, this must be done before dropping kvm->lock, + * as region and its array of pages can be freed by a different task + * once kvm->lock is released. */ sev_clflush_pages(region->pages, region->npages); + region->uaddr = range->addr; + region->size = range->size; + + list_add_tail(®ion->list, &sev->regions_list); + mutex_unlock(&kvm->lock); + return ret; e_free: diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 48a61d283406..e02cc710f56d 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -4580,7 +4580,7 @@ static bool kvm_is_vm_type_supported(unsigned long type) { return type == KVM_X86_DEFAULT_VM || (type == KVM_X86_SW_PROTECTED_VM && - IS_ENABLED(CONFIG_KVM_SW_PROTECTED_VM) && tdp_enabled); + IS_ENABLED(CONFIG_KVM_SW_PROTECTED_VM) && tdp_mmu_enabled); } int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) @@ -8007,6 +8007,16 @@ static int emulator_cmpxchg_emulated(struct x86_emulate_ctxt *ctxt, if (r < 0) return X86EMUL_UNHANDLEABLE; + + /* + * Mark the page dirty _before_ checking whether or not the CMPXCHG was + * successful, as the old value is written back on failure. Note, for + * live migration, this is unnecessarily conservative as CMPXCHG writes + * back the original value and the access is atomic, but KVM's ABI is + * that all writes are dirty logged, regardless of the value written. + */ + kvm_vcpu_mark_page_dirty(vcpu, gpa_to_gfn(gpa)); + if (r) return X86EMUL_CMPXCHG_FAILED; |