diff options
author | Linus Torvalds | 2022-03-24 11:58:57 -0700 |
---|---|---|
committer | Linus Torvalds | 2022-03-24 11:58:57 -0700 |
commit | 1ebdbeb03efe89f01f15df038a589077df3d21f5 (patch) | |
tree | 06b6b7bb565668d136c060c5104481e48cbf71e2 /arch/x86/kvm/vmx/vmx.c | |
parent | efee6c79298fd823c569d501d041de85caa102a6 (diff) | |
parent | c9b8fecddb5bb4b67e351bbaeaa648a6f7456912 (diff) |
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Pull kvm updates from Paolo Bonzini:
"ARM:
- Proper emulation of the OSLock feature of the debug architecture
- Scalibility improvements for the MMU lock when dirty logging is on
- New VMID allocator, which will eventually help with SVA in VMs
- Better support for PMUs in heterogenous systems
- PSCI 1.1 support, enabling support for SYSTEM_RESET2
- Implement CONFIG_DEBUG_LIST at EL2
- Make CONFIG_ARM64_ERRATUM_2077057 default y
- Reduce the overhead of VM exit when no interrupt is pending
- Remove traces of 32bit ARM host support from the documentation
- Updated vgic selftests
- Various cleanups, doc updates and spelling fixes
RISC-V:
- Prevent KVM_COMPAT from being selected
- Optimize __kvm_riscv_switch_to() implementation
- RISC-V SBI v0.3 support
s390:
- memop selftest
- fix SCK locking
- adapter interruptions virtualization for secure guests
- add Claudio Imbrenda as maintainer
- first step to do proper storage key checking
x86:
- Continue switching kvm_x86_ops to static_call(); introduce
static_call_cond() and __static_call_ret0 when applicable.
- Cleanup unused arguments in several functions
- Synthesize AMD 0x80000021 leaf
- Fixes and optimization for Hyper-V sparse-bank hypercalls
- Implement Hyper-V's enlightened MSR bitmap for nested SVM
- Remove MMU auditing
- Eager splitting of page tables (new aka "TDP" MMU only) when dirty
page tracking is enabled
- Cleanup the implementation of the guest PGD cache
- Preparation for the implementation of Intel IPI virtualization
- Fix some segment descriptor checks in the emulator
- Allow AMD AVIC support on systems with physical APIC ID above 255
- Better API to disable virtualization quirks
- Fixes and optimizations for the zapping of page tables:
- Zap roots in two passes, avoiding RCU read-side critical
sections that last too long for very large guests backed by 4
KiB SPTEs.
- Zap invalid and defunct roots asynchronously via
concurrency-managed work queue.
- Allowing yielding when zapping TDP MMU roots in response to the
root's last reference being put.
- Batch more TLB flushes with an RCU trick. Whoever frees the
paging structure now holds RCU as a proxy for all vCPUs running
in the guest, i.e. to prolongs the grace period on their behalf.
It then kicks the the vCPUs out of guest mode before doing
rcu_read_unlock().
Generic:
- Introduce __vcalloc and use it for very large allocations that need
memcg accounting"
* tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (246 commits)
KVM: use kvcalloc for array allocations
KVM: x86: Introduce KVM_CAP_DISABLE_QUIRKS2
kvm: x86: Require const tsc for RT
KVM: x86: synthesize CPUID leaf 0x80000021h if useful
KVM: x86: add support for CPUID leaf 0x80000021
KVM: x86: do not use KVM_X86_OP_OPTIONAL_RET0 for get_mt_mask
Revert "KVM: x86/mmu: Zap only TDP MMU leafs in kvm_zap_gfn_range()"
kvm: x86/mmu: Flush TLB before zap_gfn_range releases RCU
KVM: arm64: fix typos in comments
KVM: arm64: Generalise VM features into a set of flags
KVM: s390: selftests: Add error memop tests
KVM: s390: selftests: Add more copy memop tests
KVM: s390: selftests: Add named stages for memop test
KVM: s390: selftests: Add macro as abstraction for MEM_OP
KVM: s390: selftests: Split memop tests
KVM: s390x: fix SCK locking
RISC-V: KVM: Implement SBI HSM suspend call
RISC-V: KVM: Add common kvm_riscv_vcpu_wfi() function
RISC-V: Add SBI HSM suspend related defines
RISC-V: KVM: Implement SBI v0.3 SRST extension
...
Diffstat (limited to 'arch/x86/kvm/vmx/vmx.c')
-rw-r--r-- | arch/x86/kvm/vmx/vmx.c | 116 |
1 files changed, 59 insertions, 57 deletions
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index b730d799c26e..e8963f5af618 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -541,11 +541,6 @@ static inline bool cpu_need_virtualize_apic_accesses(struct kvm_vcpu *vcpu) return flexpriority_enabled && lapic_in_kernel(vcpu); } -static inline bool report_flexpriority(void) -{ - return flexpriority_enabled; -} - static int possible_passthrough_msr_slot(u32 msr) { u32 i; @@ -645,10 +640,10 @@ static void __loaded_vmcs_clear(void *arg) /* * Ensure all writes to loaded_vmcs, including deleting it from its - * current percpu list, complete before setting loaded_vmcs->vcpu to - * -1, otherwise a different cpu can see vcpu == -1 first and add - * loaded_vmcs to its percpu list before it's deleted from this cpu's - * list. Pairs with the smp_rmb() in vmx_vcpu_load_vmcs(). + * current percpu list, complete before setting loaded_vmcs->cpu to + * -1, otherwise a different cpu can see loaded_vmcs->cpu == -1 first + * and add loaded_vmcs to its percpu list before it's deleted from this + * cpu's list. Pairs with the smp_rmb() in vmx_vcpu_load_vmcs(). */ smp_wmb(); @@ -2334,7 +2329,7 @@ fault: return -EFAULT; } -static int hardware_enable(void) +static int vmx_hardware_enable(void) { int cpu = raw_smp_processor_id(); u64 phys_addr = __pa(per_cpu(vmxarea, cpu)); @@ -2375,7 +2370,7 @@ static void vmclear_local_loaded_vmcss(void) __loaded_vmcs_clear(v); } -static void hardware_disable(void) +static void vmx_hardware_disable(void) { vmclear_local_loaded_vmcss(); @@ -2950,7 +2945,7 @@ static inline int vmx_get_current_vpid(struct kvm_vcpu *vcpu) static void vmx_flush_tlb_current(struct kvm_vcpu *vcpu) { struct kvm_mmu *mmu = vcpu->arch.mmu; - u64 root_hpa = mmu->root_hpa; + u64 root_hpa = mmu->root.hpa; /* No flush required if the current context is invalid. */ if (!VALID_PAGE(root_hpa)) @@ -3930,31 +3925,33 @@ static inline void kvm_vcpu_trigger_posted_interrupt(struct kvm_vcpu *vcpu, #ifdef CONFIG_SMP if (vcpu->mode == IN_GUEST_MODE) { /* - * The vector of interrupt to be delivered to vcpu had - * been set in PIR before this function. + * The vector of the virtual has already been set in the PIR. + * Send a notification event to deliver the virtual interrupt + * unless the vCPU is the currently running vCPU, i.e. the + * event is being sent from a fastpath VM-Exit handler, in + * which case the PIR will be synced to the vIRR before + * re-entering the guest. * - * Following cases will be reached in this block, and - * we always send a notification event in all cases as - * explained below. + * When the target is not the running vCPU, the following + * possibilities emerge: * - * Case 1: vcpu keeps in non-root mode. Sending a - * notification event posts the interrupt to vcpu. + * Case 1: vCPU stays in non-root mode. Sending a notification + * event posts the interrupt to the vCPU. * - * Case 2: vcpu exits to root mode and is still - * runnable. PIR will be synced to vIRR before the - * next vcpu entry. Sending a notification event in - * this case has no effect, as vcpu is not in root - * mode. + * Case 2: vCPU exits to root mode and is still runnable. The + * PIR will be synced to the vIRR before re-entering the guest. + * Sending a notification event is ok as the host IRQ handler + * will ignore the spurious event. * - * Case 3: vcpu exits to root mode and is blocked. - * vcpu_block() has already synced PIR to vIRR and - * never blocks vcpu if vIRR is not cleared. Therefore, - * a blocked vcpu here does not wait for any requested - * interrupts in PIR, and sending a notification event - * which has no effect is safe here. + * Case 3: vCPU exits to root mode and is blocked. vcpu_block() + * has already synced PIR to vIRR and never blocks the vCPU if + * the vIRR is not empty. Therefore, a blocked vCPU here does + * not wait for any requested interrupts in PIR, and sending a + * notification event also results in a benign, spurious event. */ - apic->send_IPI_mask(get_cpu_mask(vcpu->cpu), pi_vec); + if (vcpu != kvm_get_running_vcpu()) + apic->send_IPI_mask(get_cpu_mask(vcpu->cpu), pi_vec); return; } #endif @@ -5177,7 +5174,7 @@ static int handle_dr(struct kvm_vcpu *vcpu) if (!kvm_require_dr(vcpu, dr)) return 1; - if (kvm_x86_ops.get_cpl(vcpu) > 0) + if (vmx_get_cpl(vcpu) > 0) goto out; dr7 = vmcs_readl(GUEST_DR7); @@ -5310,9 +5307,16 @@ static int handle_apic_eoi_induced(struct kvm_vcpu *vcpu) static int handle_apic_write(struct kvm_vcpu *vcpu) { unsigned long exit_qualification = vmx_get_exit_qual(vcpu); - u32 offset = exit_qualification & 0xfff; - /* APIC-write VM exit is trap-like and thus no need to adjust IP */ + /* + * APIC-write VM-Exit is trap-like, KVM doesn't need to advance RIP and + * hardware has done any necessary aliasing, offset adjustments, etc... + * for the access. I.e. the correct value has already been written to + * the vAPIC page for the correct 16-byte chunk. KVM needs only to + * retrieve the register value and emulate the access. + */ + u32 offset = exit_qualification & 0xff0; + kvm_apic_write_nodecode(vcpu, offset); return 1; } @@ -6975,7 +6979,7 @@ static fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu) return vmx_exit_handlers_fastpath(vcpu); } -static void vmx_free_vcpu(struct kvm_vcpu *vcpu) +static void vmx_vcpu_free(struct kvm_vcpu *vcpu) { struct vcpu_vmx *vmx = to_vmx(vcpu); @@ -6986,7 +6990,7 @@ static void vmx_free_vcpu(struct kvm_vcpu *vcpu) free_loaded_vmcs(vmx->loaded_vmcs); } -static int vmx_create_vcpu(struct kvm_vcpu *vcpu) +static int vmx_vcpu_create(struct kvm_vcpu *vcpu) { struct vmx_uret_msr *tsx_ctrl; struct vcpu_vmx *vmx; @@ -7348,11 +7352,11 @@ static void vmx_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu) vmx_secondary_exec_control(vmx)); if (nested_vmx_allowed(vcpu)) - to_vmx(vcpu)->msr_ia32_feature_control_valid_bits |= + vmx->msr_ia32_feature_control_valid_bits |= FEAT_CTL_VMX_ENABLED_INSIDE_SMX | FEAT_CTL_VMX_ENABLED_OUTSIDE_SMX; else - to_vmx(vcpu)->msr_ia32_feature_control_valid_bits &= + vmx->msr_ia32_feature_control_valid_bits &= ~(FEAT_CTL_VMX_ENABLED_INSIDE_SMX | FEAT_CTL_VMX_ENABLED_OUTSIDE_SMX); @@ -7691,7 +7695,7 @@ static void vmx_migrate_timers(struct kvm_vcpu *vcpu) } } -static void hardware_unsetup(void) +static void vmx_hardware_unsetup(void) { kvm_set_posted_intr_wakeup_handler(NULL); @@ -7714,21 +7718,20 @@ static bool vmx_check_apicv_inhibit_reasons(ulong bit) static struct kvm_x86_ops vmx_x86_ops __initdata = { .name = "kvm_intel", - .hardware_unsetup = hardware_unsetup, + .hardware_unsetup = vmx_hardware_unsetup, - .hardware_enable = hardware_enable, - .hardware_disable = hardware_disable, - .cpu_has_accelerated_tpr = report_flexpriority, + .hardware_enable = vmx_hardware_enable, + .hardware_disable = vmx_hardware_disable, .has_emulated_msr = vmx_has_emulated_msr, .vm_size = sizeof(struct kvm_vmx), .vm_init = vmx_vm_init, - .vcpu_create = vmx_create_vcpu, - .vcpu_free = vmx_free_vcpu, + .vcpu_create = vmx_vcpu_create, + .vcpu_free = vmx_vcpu_free, .vcpu_reset = vmx_vcpu_reset, - .prepare_guest_switch = vmx_prepare_switch_to_guest, + .prepare_switch_to_guest = vmx_prepare_switch_to_guest, .vcpu_load = vmx_vcpu_load, .vcpu_put = vmx_vcpu_put, @@ -7756,21 +7759,21 @@ static struct kvm_x86_ops vmx_x86_ops __initdata = { .set_rflags = vmx_set_rflags, .get_if_flag = vmx_get_if_flag, - .tlb_flush_all = vmx_flush_tlb_all, - .tlb_flush_current = vmx_flush_tlb_current, - .tlb_flush_gva = vmx_flush_tlb_gva, - .tlb_flush_guest = vmx_flush_tlb_guest, + .flush_tlb_all = vmx_flush_tlb_all, + .flush_tlb_current = vmx_flush_tlb_current, + .flush_tlb_gva = vmx_flush_tlb_gva, + .flush_tlb_guest = vmx_flush_tlb_guest, .vcpu_pre_run = vmx_vcpu_pre_run, - .run = vmx_vcpu_run, + .vcpu_run = vmx_vcpu_run, .handle_exit = vmx_handle_exit, .skip_emulated_instruction = vmx_skip_emulated_instruction, .update_emulated_instruction = vmx_update_emulated_instruction, .set_interrupt_shadow = vmx_set_interrupt_shadow, .get_interrupt_shadow = vmx_get_interrupt_shadow, .patch_hypercall = vmx_patch_hypercall, - .set_irq = vmx_inject_irq, - .set_nmi = vmx_inject_nmi, + .inject_irq = vmx_inject_irq, + .inject_nmi = vmx_inject_nmi, .queue_exception = vmx_queue_exception, .cancel_injection = vmx_cancel_injection, .interrupt_allowed = vmx_interrupt_allowed, @@ -7823,8 +7826,8 @@ static struct kvm_x86_ops vmx_x86_ops __initdata = { .pmu_ops = &intel_pmu_ops, .nested_ops = &vmx_nested_ops, - .update_pi_irte = pi_update_irte, - .start_assignment = vmx_pi_start_assignment, + .pi_update_irte = vmx_pi_update_irte, + .pi_start_assignment = vmx_pi_start_assignment, #ifdef CONFIG_X86_64 .set_hv_timer = vmx_set_hv_timer, @@ -8059,7 +8062,7 @@ static __init int hardware_setup(void) vmx_set_cpu_caps(); r = alloc_kvm_area(); - if (r) + if (r && nested) nested_vmx_hardware_unsetup(); kvm_set_posted_intr_wakeup_handler(pi_wakeup_handler); @@ -8139,7 +8142,6 @@ static int __init vmx_init(void) ms_hyperv.hints & HV_X64_ENLIGHTENED_VMCS_RECOMMENDED && (ms_hyperv.nested_features & HV_X64_ENLIGHTENED_VMCS_VERSION) >= KVM_EVMCS_VERSION) { - int cpu; /* Check that we have assist pages on all online CPUs */ for_each_online_cpu(cpu) { |