diff options
Diffstat (limited to 'arch/x86/kvm')
-rw-r--r-- | arch/x86/kvm/pmu.h | 9 | ||||
-rw-r--r-- | arch/x86/kvm/svm/pmu.c | 1 | ||||
-rw-r--r-- | arch/x86/kvm/svm/sev.c | 67 | ||||
-rw-r--r-- | arch/x86/kvm/svm/svm.c | 1 | ||||
-rw-r--r-- | arch/x86/kvm/svm/svm.h | 2 | ||||
-rw-r--r-- | arch/x86/kvm/vmx/nested.c | 5 | ||||
-rw-r--r-- | arch/x86/kvm/vmx/pmu_intel.c | 8 | ||||
-rw-r--r-- | arch/x86/kvm/vmx/vmx.c | 5 | ||||
-rw-r--r-- | arch/x86/kvm/vmx/vmx.h | 1 | ||||
-rw-r--r-- | arch/x86/kvm/x86.c | 60 |
10 files changed, 95 insertions, 64 deletions
diff --git a/arch/x86/kvm/pmu.h b/arch/x86/kvm/pmu.h index 9e66fba1d6a3..22992b049d38 100644 --- a/arch/x86/kvm/pmu.h +++ b/arch/x86/kvm/pmu.h @@ -138,6 +138,15 @@ static inline u64 get_sample_period(struct kvm_pmc *pmc, u64 counter_value) return sample_period; } +static inline void pmc_update_sample_period(struct kvm_pmc *pmc) +{ + if (!pmc->perf_event || pmc->is_paused) + return; + + perf_event_period(pmc->perf_event, + get_sample_period(pmc, pmc->counter)); +} + void reprogram_gp_counter(struct kvm_pmc *pmc, u64 eventsel); void reprogram_fixed_counter(struct kvm_pmc *pmc, u8 ctrl, int fixed_idx); void reprogram_counter(struct kvm_pmu *pmu, int pmc_idx); diff --git a/arch/x86/kvm/svm/pmu.c b/arch/x86/kvm/svm/pmu.c index 24eb935b6f85..b14860863c39 100644 --- a/arch/x86/kvm/svm/pmu.c +++ b/arch/x86/kvm/svm/pmu.c @@ -257,6 +257,7 @@ static int amd_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) pmc = get_gp_pmc_amd(pmu, msr, PMU_TYPE_COUNTER); if (pmc) { pmc->counter += data - pmc_read_counter(pmc); + pmc_update_sample_period(pmc); return 0; } /* MSR_EVNTSELn */ diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index 537aaddc852f..0ad70c12c7c3 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -2226,51 +2226,47 @@ int sev_cpu_init(struct svm_cpu_data *sd) * Pages used by hardware to hold guest encrypted state must be flushed before * returning them to the system. */ -static void sev_flush_guest_memory(struct vcpu_svm *svm, void *va, - unsigned long len) +static void sev_flush_encrypted_page(struct kvm_vcpu *vcpu, void *va) { + int asid = to_kvm_svm(vcpu->kvm)->sev_info.asid; + /* - * If hardware enforced cache coherency for encrypted mappings of the - * same physical page is supported, nothing to do. + * Note! The address must be a kernel address, as regular page walk + * checks are performed by VM_PAGE_FLUSH, i.e. operating on a user + * address is non-deterministic and unsafe. This function deliberately + * takes a pointer to deter passing in a user address. */ - if (boot_cpu_has(X86_FEATURE_SME_COHERENT)) - return; + unsigned long addr = (unsigned long)va; /* - * If the VM Page Flush MSR is supported, use it to flush the page - * (using the page virtual address and the guest ASID). + * If CPU enforced cache coherency for encrypted mappings of the + * same physical page is supported, use CLFLUSHOPT instead. NOTE: cache + * flush is still needed in order to work properly with DMA devices. */ - if (boot_cpu_has(X86_FEATURE_VM_PAGE_FLUSH)) { - struct kvm_sev_info *sev; - unsigned long va_start; - u64 start, stop; - - /* Align start and stop to page boundaries. */ - va_start = (unsigned long)va; - start = (u64)va_start & PAGE_MASK; - stop = PAGE_ALIGN((u64)va_start + len); - - if (start < stop) { - sev = &to_kvm_svm(svm->vcpu.kvm)->sev_info; + if (boot_cpu_has(X86_FEATURE_SME_COHERENT)) { + clflush_cache_range(va, PAGE_SIZE); + return; + } - while (start < stop) { - wrmsrl(MSR_AMD64_VM_PAGE_FLUSH, - start | sev->asid); + /* + * VM Page Flush takes a host virtual address and a guest ASID. Fall + * back to WBINVD if this faults so as not to make any problems worse + * by leaving stale encrypted data in the cache. + */ + if (WARN_ON_ONCE(wrmsrl_safe(MSR_AMD64_VM_PAGE_FLUSH, addr | asid))) + goto do_wbinvd; - start += PAGE_SIZE; - } + return; - return; - } +do_wbinvd: + wbinvd_on_all_cpus(); +} - WARN(1, "Address overflow, using WBINVD\n"); - } +void sev_guest_memory_reclaimed(struct kvm *kvm) +{ + if (!sev_guest(kvm)) + return; - /* - * Hardware should always have one of the above features, - * but if not, use WBINVD and issue a warning. - */ - WARN_ONCE(1, "Using WBINVD to flush guest memory\n"); wbinvd_on_all_cpus(); } @@ -2284,7 +2280,8 @@ void sev_free_vcpu(struct kvm_vcpu *vcpu) svm = to_svm(vcpu); if (vcpu->arch.guest_state_protected) - sev_flush_guest_memory(svm, svm->sev_es.vmsa, PAGE_SIZE); + sev_flush_encrypted_page(vcpu, svm->sev_es.vmsa); + __free_page(virt_to_page(svm->sev_es.vmsa)); if (svm->sev_es.ghcb_sa_free) diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index bd4c64b362d2..7e45d03cd018 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -4620,6 +4620,7 @@ static struct kvm_x86_ops svm_x86_ops __initdata = { .mem_enc_ioctl = sev_mem_enc_ioctl, .mem_enc_register_region = sev_mem_enc_register_region, .mem_enc_unregister_region = sev_mem_enc_unregister_region, + .guest_memory_reclaimed = sev_guest_memory_reclaimed, .vm_copy_enc_context_from = sev_vm_copy_enc_context_from, .vm_move_enc_context_from = sev_vm_move_enc_context_from, diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h index f77a7d2d39dd..f76deff71002 100644 --- a/arch/x86/kvm/svm/svm.h +++ b/arch/x86/kvm/svm/svm.h @@ -609,6 +609,8 @@ int sev_mem_enc_unregister_region(struct kvm *kvm, struct kvm_enc_region *range); int sev_vm_copy_enc_context_from(struct kvm *kvm, unsigned int source_fd); int sev_vm_move_enc_context_from(struct kvm *kvm, unsigned int source_fd); +void sev_guest_memory_reclaimed(struct kvm *kvm); + void pre_sev_run(struct vcpu_svm *svm, int cpu); void __init sev_set_cpu_caps(void); void __init sev_hardware_setup(void); diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index f18744f7ff82..856c87563883 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -4618,6 +4618,11 @@ void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 vm_exit_reason, kvm_make_request(KVM_REQ_APIC_PAGE_RELOAD, vcpu); } + if (vmx->nested.update_vmcs01_apicv_status) { + vmx->nested.update_vmcs01_apicv_status = false; + kvm_make_request(KVM_REQ_APICV_UPDATE, vcpu); + } + if ((vm_exit_reason != -1) && (enable_shadow_vmcs || evmptr_is_valid(vmx->nested.hv_evmcs_vmptr))) vmx->nested.need_vmcs12_to_shadow_sync = true; diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c index bc3f8512bb64..b82b6709d7a8 100644 --- a/arch/x86/kvm/vmx/pmu_intel.c +++ b/arch/x86/kvm/vmx/pmu_intel.c @@ -431,15 +431,11 @@ static int intel_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) !(msr & MSR_PMC_FULL_WIDTH_BIT)) data = (s64)(s32)data; pmc->counter += data - pmc_read_counter(pmc); - if (pmc->perf_event && !pmc->is_paused) - perf_event_period(pmc->perf_event, - get_sample_period(pmc, data)); + pmc_update_sample_period(pmc); return 0; } else if ((pmc = get_fixed_pmc(pmu, msr))) { pmc->counter += data - pmc_read_counter(pmc); - if (pmc->perf_event && !pmc->is_paused) - perf_event_period(pmc->perf_event, - get_sample_period(pmc, data)); + pmc_update_sample_period(pmc); return 0; } else if ((pmc = get_gp_pmc(pmu, msr, MSR_P6_EVNTSEL0))) { if (data == pmc->eventsel) diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 04d170c4b61e..d58b763df855 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -4174,6 +4174,11 @@ static void vmx_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu) { struct vcpu_vmx *vmx = to_vmx(vcpu); + if (is_guest_mode(vcpu)) { + vmx->nested.update_vmcs01_apicv_status = true; + return; + } + pin_controls_set(vmx, vmx_pin_based_exec_ctrl(vmx)); if (cpu_has_secondary_exec_ctrls()) { if (kvm_vcpu_apicv_active(vcpu)) diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h index 9c6bfcd84008..b98c7e96697a 100644 --- a/arch/x86/kvm/vmx/vmx.h +++ b/arch/x86/kvm/vmx/vmx.h @@ -183,6 +183,7 @@ struct nested_vmx { bool change_vmcs01_virtual_apic_mode; bool reload_vmcs01_apic_access_page; bool update_vmcs01_cpu_dirty_logging; + bool update_vmcs01_apicv_status; /* * Enlightened VMCS has been enabled. It does not mean that L1 has to diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 547ba00ef64f..a6ab19afc638 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -9111,7 +9111,7 @@ static void kvm_apicv_init(struct kvm *kvm) if (!enable_apicv) set_or_clear_apicv_inhibit(inhibits, - APICV_INHIBIT_REASON_ABSENT, true); + APICV_INHIBIT_REASON_DISABLE, true); } static void kvm_sched_yield(struct kvm_vcpu *vcpu, unsigned long dest_id) @@ -9889,6 +9889,11 @@ void kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm, kvm_make_all_cpus_request(kvm, KVM_REQ_APIC_PAGE_RELOAD); } +void kvm_arch_guest_memory_reclaimed(struct kvm *kvm) +{ + static_call_cond(kvm_x86_guest_memory_reclaimed)(kvm); +} + static void kvm_vcpu_reload_apic_access_page(struct kvm_vcpu *vcpu) { if (!lapic_in_kernel(vcpu)) @@ -10097,7 +10102,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) /* Store vcpu->apicv_active before vcpu->mode. */ smp_store_release(&vcpu->mode, IN_GUEST_MODE); - srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); + kvm_vcpu_srcu_read_unlock(vcpu); /* * 1) We should set ->mode before checking ->requests. Please see @@ -10128,7 +10133,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) smp_wmb(); local_irq_enable(); preempt_enable(); - vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); + kvm_vcpu_srcu_read_lock(vcpu); r = 1; goto cancel_injection; } @@ -10254,7 +10259,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) local_irq_enable(); preempt_enable(); - vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); + kvm_vcpu_srcu_read_lock(vcpu); /* * Profile KVM exit RIPs: @@ -10284,7 +10289,7 @@ out: } /* Called within kvm->srcu read side. */ -static inline int vcpu_block(struct kvm *kvm, struct kvm_vcpu *vcpu) +static inline int vcpu_block(struct kvm_vcpu *vcpu) { bool hv_timer; @@ -10300,12 +10305,12 @@ static inline int vcpu_block(struct kvm *kvm, struct kvm_vcpu *vcpu) if (hv_timer) kvm_lapic_switch_to_sw_timer(vcpu); - srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx); + kvm_vcpu_srcu_read_unlock(vcpu); if (vcpu->arch.mp_state == KVM_MP_STATE_HALTED) kvm_vcpu_halt(vcpu); else kvm_vcpu_block(vcpu); - vcpu->srcu_idx = srcu_read_lock(&kvm->srcu); + kvm_vcpu_srcu_read_lock(vcpu); if (hv_timer) kvm_lapic_switch_to_hv_timer(vcpu); @@ -10347,7 +10352,6 @@ static inline bool kvm_vcpu_running(struct kvm_vcpu *vcpu) static int vcpu_run(struct kvm_vcpu *vcpu) { int r; - struct kvm *kvm = vcpu->kvm; vcpu->arch.l1tf_flush_l1d = true; @@ -10355,7 +10359,7 @@ static int vcpu_run(struct kvm_vcpu *vcpu) if (kvm_vcpu_running(vcpu)) { r = vcpu_enter_guest(vcpu); } else { - r = vcpu_block(kvm, vcpu); + r = vcpu_block(vcpu); } if (r <= 0) @@ -10374,9 +10378,9 @@ static int vcpu_run(struct kvm_vcpu *vcpu) } if (__xfer_to_guest_mode_work_pending()) { - srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx); + kvm_vcpu_srcu_read_unlock(vcpu); r = xfer_to_guest_mode_handle_work(vcpu); - vcpu->srcu_idx = srcu_read_lock(&kvm->srcu); + kvm_vcpu_srcu_read_lock(vcpu); if (r) return r; } @@ -10387,12 +10391,7 @@ static int vcpu_run(struct kvm_vcpu *vcpu) static inline int complete_emulated_io(struct kvm_vcpu *vcpu) { - int r; - - vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); - r = kvm_emulate_instruction(vcpu, EMULTYPE_NO_DECODE); - srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); - return r; + return kvm_emulate_instruction(vcpu, EMULTYPE_NO_DECODE); } static int complete_emulated_pio(struct kvm_vcpu *vcpu) @@ -10484,7 +10483,6 @@ static void kvm_put_guest_fpu(struct kvm_vcpu *vcpu) int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) { struct kvm_run *kvm_run = vcpu->run; - struct kvm *kvm = vcpu->kvm; int r; vcpu_load(vcpu); @@ -10492,7 +10490,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) kvm_run->flags = 0; kvm_load_guest_fpu(vcpu); - vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); + kvm_vcpu_srcu_read_lock(vcpu); if (unlikely(vcpu->arch.mp_state == KVM_MP_STATE_UNINITIALIZED)) { if (kvm_run->immediate_exit) { r = -EINTR; @@ -10504,9 +10502,9 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) */ WARN_ON_ONCE(kvm_lapic_hv_timer_in_use(vcpu)); - srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx); + kvm_vcpu_srcu_read_unlock(vcpu); kvm_vcpu_block(vcpu); - vcpu->srcu_idx = srcu_read_lock(&kvm->srcu); + kvm_vcpu_srcu_read_lock(vcpu); if (kvm_apic_accept_events(vcpu) < 0) { r = 0; @@ -10567,7 +10565,7 @@ out: if (kvm_run->kvm_valid_regs) store_regs(vcpu); post_kvm_run_save(vcpu); - srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx); + kvm_vcpu_srcu_read_unlock(vcpu); kvm_sigset_deactivate(vcpu); vcpu_put(vcpu); @@ -10985,6 +10983,9 @@ static void kvm_arch_vcpu_guestdbg_update_apicv_inhibit(struct kvm *kvm) struct kvm_vcpu *vcpu; unsigned long i; + if (!enable_apicv) + return; + down_write(&kvm->arch.apicv_update_lock); kvm_for_each_vcpu(i, vcpu, kvm) { @@ -11196,8 +11197,21 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu) r = kvm_create_lapic(vcpu, lapic_timer_advance_ns); if (r < 0) goto fail_mmu_destroy; - if (kvm_apicv_activated(vcpu->kvm)) + + /* + * Defer evaluating inhibits until the vCPU is first run, as + * this vCPU will not get notified of any changes until this + * vCPU is visible to other vCPUs (marked online and added to + * the set of vCPUs). Opportunistically mark APICv active as + * VMX in particularly is highly unlikely to have inhibits. + * Ignore the current per-VM APICv state so that vCPU creation + * is guaranteed to run with a deterministic value, the request + * will ensure the vCPU gets the correct state before VM-Entry. + */ + if (enable_apicv) { vcpu->arch.apicv_active = true; + kvm_make_request(KVM_REQ_APICV_UPDATE, vcpu); + } } else static_branch_inc(&kvm_has_noapic_vcpu); |