aboutsummaryrefslogtreecommitdiff
path: root/arch/x86/kvm
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/kvm')
-rw-r--r--arch/x86/kvm/cpuid.h8
-rw-r--r--arch/x86/kvm/i8254.c1
-rw-r--r--arch/x86/kvm/mtrr.c25
-rw-r--r--arch/x86/kvm/svm.c4
-rw-r--r--arch/x86/kvm/vmx.c12
-rw-r--r--arch/x86/kvm/x86.c74
6 files changed, 75 insertions, 49 deletions
diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h
index 06332cb7e7d1..3f5c48ddba45 100644
--- a/arch/x86/kvm/cpuid.h
+++ b/arch/x86/kvm/cpuid.h
@@ -38,6 +38,14 @@ static inline bool guest_cpuid_has_xsave(struct kvm_vcpu *vcpu)
return best && (best->ecx & bit(X86_FEATURE_XSAVE));
}
+static inline bool guest_cpuid_has_mtrr(struct kvm_vcpu *vcpu)
+{
+ struct kvm_cpuid_entry2 *best;
+
+ best = kvm_find_cpuid_entry(vcpu, 1, 0);
+ return best && (best->edx & bit(X86_FEATURE_MTRR));
+}
+
static inline bool guest_cpuid_has_tsc_adjust(struct kvm_vcpu *vcpu)
{
struct kvm_cpuid_entry2 *best;
diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c
index 08116ff227cc..b0ea42b78ccd 100644
--- a/arch/x86/kvm/i8254.c
+++ b/arch/x86/kvm/i8254.c
@@ -420,6 +420,7 @@ void kvm_pit_load_count(struct kvm *kvm, int channel, u32 val, int hpet_legacy_s
u8 saved_mode;
if (hpet_legacy_start) {
/* save existing mode for later reenablement */
+ WARN_ON(channel != 0);
saved_mode = kvm->arch.vpit->pit_state.channels[0].mode;
kvm->arch.vpit->pit_state.channels[0].mode = 0xff; /* disable timer */
pit_load_count(kvm, channel, val);
diff --git a/arch/x86/kvm/mtrr.c b/arch/x86/kvm/mtrr.c
index 9e8bf13572e6..3f8c732117ec 100644
--- a/arch/x86/kvm/mtrr.c
+++ b/arch/x86/kvm/mtrr.c
@@ -120,14 +120,22 @@ static u8 mtrr_default_type(struct kvm_mtrr *mtrr_state)
return mtrr_state->deftype & IA32_MTRR_DEF_TYPE_TYPE_MASK;
}
-static u8 mtrr_disabled_type(void)
+static u8 mtrr_disabled_type(struct kvm_vcpu *vcpu)
{
/*
* Intel SDM 11.11.2.2: all MTRRs are disabled when
* IA32_MTRR_DEF_TYPE.E bit is cleared, and the UC
* memory type is applied to all of physical memory.
+ *
+ * However, virtual machines can be run with CPUID such that
+ * there are no MTRRs. In that case, the firmware will never
+ * enable MTRRs and it is obviously undesirable to run the
+ * guest entirely with UC memory and we use WB.
*/
- return MTRR_TYPE_UNCACHABLE;
+ if (guest_cpuid_has_mtrr(vcpu))
+ return MTRR_TYPE_UNCACHABLE;
+ else
+ return MTRR_TYPE_WRBACK;
}
/*
@@ -267,7 +275,7 @@ static int fixed_mtrr_addr_to_seg(u64 addr)
for (seg = 0; seg < seg_num; seg++) {
mtrr_seg = &fixed_seg_table[seg];
- if (mtrr_seg->start >= addr && addr < mtrr_seg->end)
+ if (mtrr_seg->start <= addr && addr < mtrr_seg->end)
return seg;
}
@@ -300,7 +308,6 @@ static void var_mtrr_range(struct kvm_mtrr_range *range, u64 *start, u64 *end)
*start = range->base & PAGE_MASK;
mask = range->mask & PAGE_MASK;
- mask |= ~0ULL << boot_cpu_data.x86_phys_bits;
/* This cannot overflow because writing to the reserved bits of
* variable MTRRs causes a #GP.
@@ -356,10 +363,14 @@ static void set_var_mtrr_msr(struct kvm_vcpu *vcpu, u32 msr, u64 data)
if (var_mtrr_range_is_valid(cur))
list_del(&mtrr_state->var_ranges[index].node);
+ /* Extend the mask with all 1 bits to the left, since those
+ * bits must implicitly be 0. The bits are then cleared
+ * when reading them.
+ */
if (!is_mtrr_mask)
cur->base = data;
else
- cur->mask = data;
+ cur->mask = data | (-1LL << cpuid_maxphyaddr(vcpu));
/* add it to the list if it's enabled. */
if (var_mtrr_range_is_valid(cur)) {
@@ -426,6 +437,8 @@ int kvm_mtrr_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
*pdata = vcpu->arch.mtrr_state.var_ranges[index].base;
else
*pdata = vcpu->arch.mtrr_state.var_ranges[index].mask;
+
+ *pdata &= (1ULL << cpuid_maxphyaddr(vcpu)) - 1;
}
return 0;
@@ -670,7 +683,7 @@ u8 kvm_mtrr_get_guest_memory_type(struct kvm_vcpu *vcpu, gfn_t gfn)
}
if (iter.mtrr_disabled)
- return mtrr_disabled_type();
+ return mtrr_disabled_type(vcpu);
/* not contained in any MTRRs. */
if (type == -1)
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 83a1c643f9a5..899c40f826dd 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -3422,6 +3422,8 @@ static int handle_exit(struct kvm_vcpu *vcpu)
struct kvm_run *kvm_run = vcpu->run;
u32 exit_code = svm->vmcb->control.exit_code;
+ trace_kvm_exit(exit_code, vcpu, KVM_ISA_SVM);
+
if (!is_cr_intercept(svm, INTERCEPT_CR0_WRITE))
vcpu->arch.cr0 = svm->vmcb->save.cr0;
if (npt_enabled)
@@ -3892,8 +3894,6 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
vcpu->arch.regs[VCPU_REGS_RSP] = svm->vmcb->save.rsp;
vcpu->arch.regs[VCPU_REGS_RIP] = svm->vmcb->save.rip;
- trace_kvm_exit(svm->vmcb->control.exit_code, vcpu, KVM_ISA_SVM);
-
if (unlikely(svm->vmcb->control.exit_code == SVM_EXIT_NMI))
kvm_before_handle_nmi(&svm->vcpu);
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 87acc5221740..44976a596fa6 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -2803,7 +2803,7 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
msr_info->data = vcpu->arch.ia32_xss;
break;
case MSR_TSC_AUX:
- if (!guest_cpuid_has_rdtscp(vcpu))
+ if (!guest_cpuid_has_rdtscp(vcpu) && !msr_info->host_initiated)
return 1;
/* Otherwise falls through */
default:
@@ -2909,7 +2909,7 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
clear_atomic_switch_msr(vmx, MSR_IA32_XSS);
break;
case MSR_TSC_AUX:
- if (!guest_cpuid_has_rdtscp(vcpu))
+ if (!guest_cpuid_has_rdtscp(vcpu) && !msr_info->host_initiated)
return 1;
/* Check reserved bit, higher 32 bits should be zero */
if ((data >> 32) != 0)
@@ -7394,11 +7394,6 @@ static int handle_invvpid(struct kvm_vcpu *vcpu)
switch (type) {
case VMX_VPID_EXTENT_ALL_CONTEXT:
- if (get_vmcs12(vcpu)->virtual_processor_id == 0) {
- nested_vmx_failValid(vcpu,
- VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID);
- return 1;
- }
__vmx_flush_tlb(vcpu, to_vmx(vcpu)->nested.vpid02);
nested_vmx_succeed(vcpu);
break;
@@ -8047,6 +8042,8 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu)
u32 exit_reason = vmx->exit_reason;
u32 vectoring_info = vmx->idt_vectoring_info;
+ trace_kvm_exit(exit_reason, vcpu, KVM_ISA_VMX);
+
/*
* Flush logged GPAs PML buffer, this will make dirty_bitmap more
* updated. Another good is, in kvm_vm_ioctl_get_dirty_log, before
@@ -8673,7 +8670,6 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
vmx->loaded_vmcs->launched = 1;
vmx->exit_reason = vmcs_read32(VM_EXIT_REASON);
- trace_kvm_exit(vmx->exit_reason, vcpu, KVM_ISA_VMX);
/*
* the KVM_REQ_EVENT optimization bit is only on for one entry, and if
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 00462bd63129..97592e190413 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -2763,6 +2763,26 @@ static int kvm_vcpu_ioctl_set_lapic(struct kvm_vcpu *vcpu,
return 0;
}
+static int kvm_cpu_accept_dm_intr(struct kvm_vcpu *vcpu)
+{
+ return (!lapic_in_kernel(vcpu) ||
+ kvm_apic_accept_pic_intr(vcpu));
+}
+
+/*
+ * if userspace requested an interrupt window, check that the
+ * interrupt window is open.
+ *
+ * No need to exit to userspace if we already have an interrupt queued.
+ */
+static int kvm_vcpu_ready_for_interrupt_injection(struct kvm_vcpu *vcpu)
+{
+ return kvm_arch_interrupt_allowed(vcpu) &&
+ !kvm_cpu_has_interrupt(vcpu) &&
+ !kvm_event_needs_reinjection(vcpu) &&
+ kvm_cpu_accept_dm_intr(vcpu);
+}
+
static int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu,
struct kvm_interrupt *irq)
{
@@ -2786,6 +2806,7 @@ static int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu,
return -EEXIST;
vcpu->arch.pending_external_vector = irq->irq;
+ kvm_make_request(KVM_REQ_EVENT, vcpu);
return 0;
}
@@ -3551,9 +3572,11 @@ static int kvm_vm_ioctl_get_pit(struct kvm *kvm, struct kvm_pit_state *ps)
static int kvm_vm_ioctl_set_pit(struct kvm *kvm, struct kvm_pit_state *ps)
{
+ int i;
mutex_lock(&kvm->arch.vpit->pit_state.lock);
memcpy(&kvm->arch.vpit->pit_state, ps, sizeof(struct kvm_pit_state));
- kvm_pit_load_count(kvm, 0, ps->channels[0].count, 0);
+ for (i = 0; i < 3; i++)
+ kvm_pit_load_count(kvm, i, ps->channels[i].count, 0);
mutex_unlock(&kvm->arch.vpit->pit_state.lock);
return 0;
}
@@ -3572,6 +3595,7 @@ static int kvm_vm_ioctl_get_pit2(struct kvm *kvm, struct kvm_pit_state2 *ps)
static int kvm_vm_ioctl_set_pit2(struct kvm *kvm, struct kvm_pit_state2 *ps)
{
int start = 0;
+ int i;
u32 prev_legacy, cur_legacy;
mutex_lock(&kvm->arch.vpit->pit_state.lock);
prev_legacy = kvm->arch.vpit->pit_state.flags & KVM_PIT_FLAGS_HPET_LEGACY;
@@ -3581,7 +3605,9 @@ static int kvm_vm_ioctl_set_pit2(struct kvm *kvm, struct kvm_pit_state2 *ps)
memcpy(&kvm->arch.vpit->pit_state.channels, &ps->channels,
sizeof(kvm->arch.vpit->pit_state.channels));
kvm->arch.vpit->pit_state.flags = ps->flags;
- kvm_pit_load_count(kvm, 0, kvm->arch.vpit->pit_state.channels[0].count, start);
+ for (i = 0; i < 3; i++)
+ kvm_pit_load_count(kvm, i, kvm->arch.vpit->pit_state.channels[i].count,
+ start && i == 0);
mutex_unlock(&kvm->arch.vpit->pit_state.lock);
return 0;
}
@@ -5910,23 +5936,10 @@ static int emulator_fix_hypercall(struct x86_emulate_ctxt *ctxt)
return emulator_write_emulated(ctxt, rip, instruction, 3, NULL);
}
-/*
- * Check if userspace requested an interrupt window, and that the
- * interrupt window is open.
- *
- * No need to exit to userspace if we already have an interrupt queued.
- */
static int dm_request_for_irq_injection(struct kvm_vcpu *vcpu)
{
- if (!vcpu->run->request_interrupt_window || pic_in_kernel(vcpu->kvm))
- return false;
-
- if (kvm_cpu_has_interrupt(vcpu))
- return false;
-
- return (irqchip_split(vcpu->kvm)
- ? kvm_apic_accept_pic_intr(vcpu)
- : kvm_arch_interrupt_allowed(vcpu));
+ return vcpu->run->request_interrupt_window &&
+ likely(!pic_in_kernel(vcpu->kvm));
}
static void post_kvm_run_save(struct kvm_vcpu *vcpu)
@@ -5937,17 +5950,9 @@ static void post_kvm_run_save(struct kvm_vcpu *vcpu)
kvm_run->flags = is_smm(vcpu) ? KVM_RUN_X86_SMM : 0;
kvm_run->cr8 = kvm_get_cr8(vcpu);
kvm_run->apic_base = kvm_get_apic_base(vcpu);
- if (!irqchip_in_kernel(vcpu->kvm))
- kvm_run->ready_for_interrupt_injection =
- kvm_arch_interrupt_allowed(vcpu) &&
- !kvm_cpu_has_interrupt(vcpu) &&
- !kvm_event_needs_reinjection(vcpu);
- else if (!pic_in_kernel(vcpu->kvm))
- kvm_run->ready_for_interrupt_injection =
- kvm_apic_accept_pic_intr(vcpu) &&
- !kvm_cpu_has_interrupt(vcpu);
- else
- kvm_run->ready_for_interrupt_injection = 1;
+ kvm_run->ready_for_interrupt_injection =
+ pic_in_kernel(vcpu->kvm) ||
+ kvm_vcpu_ready_for_interrupt_injection(vcpu);
}
static void update_cr8_intercept(struct kvm_vcpu *vcpu)
@@ -6360,8 +6365,10 @@ void kvm_arch_mmu_notifier_invalidate_page(struct kvm *kvm,
static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
{
int r;
- bool req_int_win = !lapic_in_kernel(vcpu) &&
- vcpu->run->request_interrupt_window;
+ bool req_int_win =
+ dm_request_for_irq_injection(vcpu) &&
+ kvm_cpu_accept_dm_intr(vcpu);
+
bool req_immediate_exit = false;
if (vcpu->requests) {
@@ -6513,6 +6520,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
if (req_immediate_exit)
smp_send_reschedule(vcpu->cpu);
+ trace_kvm_entry(vcpu->vcpu_id);
+ wait_lapic_expire(vcpu);
__kvm_guest_enter();
if (unlikely(vcpu->arch.switch_db_regs)) {
@@ -6525,8 +6534,6 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
vcpu->arch.switch_db_regs &= ~KVM_DEBUGREG_RELOAD;
}
- trace_kvm_entry(vcpu->vcpu_id);
- wait_lapic_expire(vcpu);
kvm_x86_ops->run(vcpu);
/*
@@ -6663,7 +6670,8 @@ static int vcpu_run(struct kvm_vcpu *vcpu)
if (kvm_cpu_has_pending_timer(vcpu))
kvm_inject_pending_timer_irqs(vcpu);
- if (dm_request_for_irq_injection(vcpu)) {
+ if (dm_request_for_irq_injection(vcpu) &&
+ kvm_vcpu_ready_for_interrupt_injection(vcpu)) {
r = 0;
vcpu->run->exit_reason = KVM_EXIT_IRQ_WINDOW_OPEN;
++vcpu->stat.request_irq_exits;