From d9565a7399d665fa7313122504778cb3d5ef3e19 Mon Sep 17 00:00:00 2001 From: Eric Auger Date: Fri, 22 Jul 2016 16:20:40 +0000 Subject: KVM: Move kvm_setup_default/empty_irq_routing declaration in arch specific header kvm_setup_default_irq_routing and kvm_setup_empty_irq_routing are not used by generic code. So let's move the declarations in x86 irq.h header instead of kvm_host.h. Signed-off-by: Eric Auger Suggested-by: Andre Przywara Acked-by: Radim Krčmář Signed-off-by: Marc Zyngier --- arch/x86/kvm/irq.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'arch') diff --git a/arch/x86/kvm/irq.h b/arch/x86/kvm/irq.h index 61ebdc13a29a..035731eb3897 100644 --- a/arch/x86/kvm/irq.h +++ b/arch/x86/kvm/irq.h @@ -120,4 +120,7 @@ void __kvm_migrate_timers(struct kvm_vcpu *vcpu); int apic_has_pending_timer(struct kvm_vcpu *vcpu); +int kvm_setup_default_irq_routing(struct kvm *kvm); +int kvm_setup_empty_irq_routing(struct kvm *kvm); + #endif -- cgit v1.2.3 From 180ae7b1182344ca617d8b5200306b02a6b5075d Mon Sep 17 00:00:00 2001 From: Eric Auger Date: Fri, 22 Jul 2016 16:20:41 +0000 Subject: KVM: arm/arm64: Enable irqchip routing This patch adds compilation and link against irqchip. Main motivation behind using irqchip code is to enable MSI routing code. In the future irqchip routing may also be useful when targeting multiple irqchips. Routing standard callbacks now are implemented in vgic-irqfd: - kvm_set_routing_entry - kvm_set_irq - kvm_set_msi They only are supported with new_vgic code. Both HAVE_KVM_IRQCHIP and HAVE_KVM_IRQ_ROUTING are defined. KVM_CAP_IRQ_ROUTING is advertised and KVM_SET_GSI_ROUTING is allowed. So from now on IRQCHIP routing is enabled and a routing table entry must exist for irqfd injection to succeed for a given SPI. This patch builds a default flat irqchip routing table (gsi=irqchip.pin) covering all the VGIC SPI indexes. This routing table is overwritten by the first first user-space call to KVM_SET_GSI_ROUTING ioctl. MSI routing setup is not yet allowed. Signed-off-by: Eric Auger Signed-off-by: Marc Zyngier --- Documentation/virtual/kvm/api.txt | 12 +++-- arch/arm/kvm/Kconfig | 2 + arch/arm/kvm/Makefile | 1 + arch/arm/kvm/irq.h | 19 ++++++++ arch/arm64/kvm/Kconfig | 2 + arch/arm64/kvm/Makefile | 1 + arch/arm64/kvm/irq.h | 19 ++++++++ include/kvm/arm_vgic.h | 7 +++ virt/kvm/arm/vgic/vgic-init.c | 4 ++ virt/kvm/arm/vgic/vgic-irqfd.c | 100 +++++++++++++++++++++++++++++++------- virt/kvm/arm/vgic/vgic.c | 7 --- 11 files changed, 145 insertions(+), 29 deletions(-) create mode 100644 arch/arm/kvm/irq.h create mode 100644 arch/arm64/kvm/irq.h (limited to 'arch') diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index 415cde1647e9..7e5f9afcc693 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -1433,13 +1433,16 @@ KVM_ASSIGN_DEV_IRQ. Partial deassignment of host or guest IRQ is allowed. 4.52 KVM_SET_GSI_ROUTING Capability: KVM_CAP_IRQ_ROUTING -Architectures: x86 s390 +Architectures: x86 s390 arm arm64 Type: vm ioctl Parameters: struct kvm_irq_routing (in) Returns: 0 on success, -1 on error Sets the GSI routing table entries, overwriting any previously set entries. +On arm/arm64, GSI routing has the following limitation: +- GSI routing does not apply to KVM_IRQ_LINE but only to KVM_IRQFD. + struct kvm_irq_routing { __u32 nr; __u32 flags; @@ -2374,9 +2377,10 @@ Note that closing the resamplefd is not sufficient to disable the irqfd. The KVM_IRQFD_FLAG_RESAMPLE is only necessary on assignment and need not be specified with KVM_IRQFD_FLAG_DEASSIGN. -On ARM/ARM64, the gsi field in the kvm_irqfd struct specifies the Shared -Peripheral Interrupt (SPI) index, such that the GIC interrupt ID is -given by gsi + 32. +On arm/arm64, gsi routing being supported, the following can happen: +- in case no routing entry is associated to this gsi, injection fails +- in case the gsi is associated to an irqchip routing entry, + irqchip.pin + 32 corresponds to the injected SPI ID. 4.76 KVM_PPC_ALLOCATE_HTAB diff --git a/arch/arm/kvm/Kconfig b/arch/arm/kvm/Kconfig index 95a000515e43..3e1cd0452d67 100644 --- a/arch/arm/kvm/Kconfig +++ b/arch/arm/kvm/Kconfig @@ -32,6 +32,8 @@ config KVM select KVM_VFIO select HAVE_KVM_EVENTFD select HAVE_KVM_IRQFD + select HAVE_KVM_IRQCHIP + select HAVE_KVM_IRQ_ROUTING depends on ARM_VIRT_EXT && ARM_LPAE && ARM_ARCH_TIMER ---help--- Support hosting virtualized guest machines. diff --git a/arch/arm/kvm/Makefile b/arch/arm/kvm/Makefile index 5e28df80dca7..10d77a66cad5 100644 --- a/arch/arm/kvm/Makefile +++ b/arch/arm/kvm/Makefile @@ -29,4 +29,5 @@ obj-y += $(KVM)/arm/vgic/vgic-v2.o obj-y += $(KVM)/arm/vgic/vgic-mmio.o obj-y += $(KVM)/arm/vgic/vgic-mmio-v2.o obj-y += $(KVM)/arm/vgic/vgic-kvm-device.o +obj-y += $(KVM)/irqchip.o obj-y += $(KVM)/arm/arch_timer.o diff --git a/arch/arm/kvm/irq.h b/arch/arm/kvm/irq.h new file mode 100644 index 000000000000..b74099b905fd --- /dev/null +++ b/arch/arm/kvm/irq.h @@ -0,0 +1,19 @@ +/* + * irq.h: in kernel interrupt controller related definitions + * Copyright (c) 2016 Red Hat, Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This header is included by irqchip.c. However, on ARM, interrupt + * controller declarations are located in include/kvm/arm_vgic.h since + * they are mostly shared between arm and arm64. + */ + +#ifndef __IRQ_H +#define __IRQ_H + +#include + +#endif diff --git a/arch/arm64/kvm/Kconfig b/arch/arm64/kvm/Kconfig index 9d2eff0b3ad3..9c9edc98d271 100644 --- a/arch/arm64/kvm/Kconfig +++ b/arch/arm64/kvm/Kconfig @@ -37,6 +37,8 @@ config KVM select KVM_ARM_VGIC_V3 select KVM_ARM_PMU if HW_PERF_EVENTS select HAVE_KVM_MSI + select HAVE_KVM_IRQCHIP + select HAVE_KVM_IRQ_ROUTING ---help--- Support hosting virtualized guest machines. We don't support KVM with 16K page tables yet, due to the multiple diff --git a/arch/arm64/kvm/Makefile b/arch/arm64/kvm/Makefile index a5b96642a9cb..695eb3c7ef41 100644 --- a/arch/arm64/kvm/Makefile +++ b/arch/arm64/kvm/Makefile @@ -30,5 +30,6 @@ kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-mmio-v2.o kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-mmio-v3.o kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-kvm-device.o kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-its.o +kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/irqchip.o kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/arch_timer.o kvm-$(CONFIG_KVM_ARM_PMU) += $(KVM)/arm/pmu.o diff --git a/arch/arm64/kvm/irq.h b/arch/arm64/kvm/irq.h new file mode 100644 index 000000000000..b74099b905fd --- /dev/null +++ b/arch/arm64/kvm/irq.h @@ -0,0 +1,19 @@ +/* + * irq.h: in kernel interrupt controller related definitions + * Copyright (c) 2016 Red Hat, Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This header is included by irqchip.c. However, on ARM, interrupt + * controller declarations are located in include/kvm/arm_vgic.h since + * they are mostly shared between arm and arm64. + */ + +#ifndef __IRQ_H +#define __IRQ_H + +#include + +#endif diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h index 540da5149ba7..19b698ef3336 100644 --- a/include/kvm/arm_vgic.h +++ b/include/kvm/arm_vgic.h @@ -34,6 +34,7 @@ #define VGIC_MAX_SPI 1019 #define VGIC_MAX_RESERVED 1023 #define VGIC_MIN_LPI 8192 +#define KVM_IRQCHIP_NUM_PINS (1020 - 32) enum vgic_type { VGIC_V2, /* Good ol' GICv2 */ @@ -314,4 +315,10 @@ static inline int kvm_vgic_get_max_vcpus(void) int kvm_send_userspace_msi(struct kvm *kvm, struct kvm_msi *msi); +/** + * kvm_vgic_setup_default_irq_routing: + * Setup a default flat gsi routing table mapping all SPIs + */ +int kvm_vgic_setup_default_irq_routing(struct kvm *kvm); + #endif /* __KVM_ARM_VGIC_H */ diff --git a/virt/kvm/arm/vgic/vgic-init.c b/virt/kvm/arm/vgic/vgic-init.c index 01a60dcd05d6..1aba785cd498 100644 --- a/virt/kvm/arm/vgic/vgic-init.c +++ b/virt/kvm/arm/vgic/vgic-init.c @@ -264,6 +264,10 @@ int vgic_init(struct kvm *kvm) kvm_for_each_vcpu(i, vcpu, kvm) kvm_vgic_vcpu_init(vcpu); + ret = kvm_vgic_setup_default_irq_routing(kvm); + if (ret) + goto out; + dist->initialized = true; out: return ret; diff --git a/virt/kvm/arm/vgic/vgic-irqfd.c b/virt/kvm/arm/vgic/vgic-irqfd.c index c675513270bb..6e84d530d9f7 100644 --- a/virt/kvm/arm/vgic/vgic-irqfd.c +++ b/virt/kvm/arm/vgic/vgic-irqfd.c @@ -17,36 +17,100 @@ #include #include #include +#include +#include "vgic.h" -int kvm_irq_map_gsi(struct kvm *kvm, - struct kvm_kernel_irq_routing_entry *entries, - int gsi) +/** + * vgic_irqfd_set_irq: inject the IRQ corresponding to the + * irqchip routing entry + * + * This is the entry point for irqfd IRQ injection + */ +static int vgic_irqfd_set_irq(struct kvm_kernel_irq_routing_entry *e, + struct kvm *kvm, int irq_source_id, + int level, bool line_status) { - return 0; + unsigned int spi_id = e->irqchip.pin + VGIC_NR_PRIVATE_IRQS; + + if (!vgic_valid_spi(kvm, spi_id)) + return -EINVAL; + return kvm_vgic_inject_irq(kvm, 0, spi_id, level); } -int kvm_irq_map_chip_pin(struct kvm *kvm, unsigned int irqchip, - unsigned int pin) +/** + * kvm_set_routing_entry: populate a kvm routing entry + * from a user routing entry + * + * @e: kvm kernel routing entry handle + * @ue: user api routing entry handle + * return 0 on success, -EINVAL on errors. + */ +int kvm_set_routing_entry(struct kvm_kernel_irq_routing_entry *e, + const struct kvm_irq_routing_entry *ue) { - return pin; + int r = -EINVAL; + + switch (ue->type) { + case KVM_IRQ_ROUTING_IRQCHIP: + e->set = vgic_irqfd_set_irq; + e->irqchip.irqchip = ue->u.irqchip.irqchip; + e->irqchip.pin = ue->u.irqchip.pin; + if ((e->irqchip.pin >= KVM_IRQCHIP_NUM_PINS) || + (e->irqchip.irqchip >= KVM_NR_IRQCHIPS)) + goto out; + break; + default: + goto out; + } + r = 0; +out: + return r; } -int kvm_set_irq(struct kvm *kvm, int irq_source_id, - u32 irq, int level, bool line_status) +/** + * kvm_set_msi: inject the MSI corresponding to the + * MSI routing entry + * + * This is the entry point for irqfd MSI injection + * and userspace MSI injection. + */ +int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e, + struct kvm *kvm, int irq_source_id, + int level, bool line_status) { - unsigned int spi = irq + VGIC_NR_PRIVATE_IRQS; + struct kvm_msi msi; - trace_kvm_set_irq(irq, level, irq_source_id); + msi.address_lo = e->msi.address_lo; + msi.address_hi = e->msi.address_hi; + msi.data = e->msi.data; + msi.flags = e->msi.flags; + msi.devid = e->msi.devid; - BUG_ON(!vgic_initialized(kvm)); + if (!vgic_has_its(kvm)) + return -ENODEV; - return kvm_vgic_inject_irq(kvm, 0, spi, level); + return vgic_its_inject_msi(kvm, &msi); } -/* MSI not implemented yet */ -int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e, - struct kvm *kvm, int irq_source_id, - int level, bool line_status) +int kvm_vgic_setup_default_irq_routing(struct kvm *kvm) { - return 0; + struct kvm_irq_routing_entry *entries; + struct vgic_dist *dist = &kvm->arch.vgic; + u32 nr = dist->nr_spis; + int i, ret; + + entries = kcalloc(nr, sizeof(struct kvm_kernel_irq_routing_entry), + GFP_KERNEL); + if (!entries) + return -ENOMEM; + + for (i = 0; i < nr; i++) { + entries[i].gsi = i; + entries[i].type = KVM_IRQ_ROUTING_IRQCHIP; + entries[i].u.irqchip.irqchip = 0; + entries[i].u.irqchip.pin = i; + } + ret = kvm_set_irq_routing(kvm, entries, nr, 0); + kfree(entries); + return ret; } diff --git a/virt/kvm/arm/vgic/vgic.c b/virt/kvm/arm/vgic/vgic.c index 39f3358c6d91..e7aeac719e09 100644 --- a/virt/kvm/arm/vgic/vgic.c +++ b/virt/kvm/arm/vgic/vgic.c @@ -711,10 +711,3 @@ bool kvm_vgic_map_is_active(struct kvm_vcpu *vcpu, unsigned int virt_irq) return map_is_active; } -int kvm_send_userspace_msi(struct kvm *kvm, struct kvm_msi *msi) -{ - if (vgic_has_its(kvm)) - return vgic_its_inject_msi(kvm, msi); - else - return -ENODEV; -} -- cgit v1.2.3 From 89581f06b2bc225f0c9822fa52e714aa2e3810dd Mon Sep 17 00:00:00 2001 From: Andrew Jones Date: Fri, 22 Jul 2016 10:38:46 -0400 Subject: arm64: KVM: Set cpsr before spsr on fault injection We need to set cpsr before determining the spsr bank, as the bank depends on the target exception level of the injection, not the current mode of the vcpu. Normally this is one in the same (EL1), but not when we manage to trap an EL0 fault. It still doesn't really matter for the 64-bit EL0 case though, as vcpu_spsr() unconditionally uses the EL1 bank for that. However the 32-bit EL0 case gets fun, as that path will lead to the BUG() in vcpu_spsr32(). This patch fixes the assignment order and also modifies some white space in order to better group pairs of lines that have strict order. Cc: stable@vger.kernel.org # v4.5 Signed-off-by: Andrew Jones Signed-off-by: Marc Zyngier --- arch/arm64/kvm/inject_fault.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) (limited to 'arch') diff --git a/arch/arm64/kvm/inject_fault.c b/arch/arm64/kvm/inject_fault.c index e9e0e6db73f6..898c0e6aedd4 100644 --- a/arch/arm64/kvm/inject_fault.c +++ b/arch/arm64/kvm/inject_fault.c @@ -132,16 +132,14 @@ static u64 get_except_vector(struct kvm_vcpu *vcpu, enum exception_type type) static void inject_abt64(struct kvm_vcpu *vcpu, bool is_iabt, unsigned long addr) { unsigned long cpsr = *vcpu_cpsr(vcpu); - bool is_aarch32; + bool is_aarch32 = vcpu_mode_is_32bit(vcpu); u32 esr = 0; - is_aarch32 = vcpu_mode_is_32bit(vcpu); - - *vcpu_spsr(vcpu) = cpsr; *vcpu_elr_el1(vcpu) = *vcpu_pc(vcpu); - *vcpu_pc(vcpu) = get_except_vector(vcpu, except_type_sync); + *vcpu_cpsr(vcpu) = PSTATE_FAULT_BITS_64; + *vcpu_spsr(vcpu) = cpsr; vcpu_sys_reg(vcpu, FAR_EL1) = addr; @@ -172,11 +170,11 @@ static void inject_undef64(struct kvm_vcpu *vcpu) unsigned long cpsr = *vcpu_cpsr(vcpu); u32 esr = (ESR_ELx_EC_UNKNOWN << ESR_ELx_EC_SHIFT); - *vcpu_spsr(vcpu) = cpsr; *vcpu_elr_el1(vcpu) = *vcpu_pc(vcpu); - *vcpu_pc(vcpu) = get_except_vector(vcpu, except_type_sync); + *vcpu_cpsr(vcpu) = PSTATE_FAULT_BITS_64; + *vcpu_spsr(vcpu) = cpsr; /* * Build an unknown exception, depending on the instruction -- cgit v1.2.3 From 3aed64f6d341cdb62bb2d6232589fb13448ce063 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 9 Jun 2016 13:06:08 +0200 Subject: pvclock: introduce seqcount-like API The version field in struct pvclock_vcpu_time_info basically implements a seqcount. Wrap it with the usual read_begin and read_retry functions, and use these APIs instead of peppering the code with smp_rmb()s. While at it, change it to the more pedantically correct virt_rmb(). With this change, __pvclock_read_cycles can be simplified noticeably. Signed-off-by: Paolo Bonzini --- arch/x86/entry/vdso/vclock_gettime.c | 9 ++------- arch/x86/include/asm/pvclock.h | 39 +++++++++++++++++++++--------------- arch/x86/kernel/pvclock.c | 17 ++++++---------- 3 files changed, 31 insertions(+), 34 deletions(-) (limited to 'arch') diff --git a/arch/x86/entry/vdso/vclock_gettime.c b/arch/x86/entry/vdso/vclock_gettime.c index 2f02d23a05ef..db1e3b4c3693 100644 --- a/arch/x86/entry/vdso/vclock_gettime.c +++ b/arch/x86/entry/vdso/vclock_gettime.c @@ -123,9 +123,7 @@ static notrace cycle_t vread_pvclock(int *mode) */ do { - version = pvti->version; - - smp_rmb(); + version = pvclock_read_begin(pvti); if (unlikely(!(pvti->flags & PVCLOCK_TSC_STABLE_BIT))) { *mode = VCLOCK_NONE; @@ -137,10 +135,7 @@ static notrace cycle_t vread_pvclock(int *mode) pvti_tsc_shift = pvti->tsc_shift; pvti_system_time = pvti->system_time; pvti_tsc = pvti->tsc_timestamp; - - /* Make sure that the version double-check is last. */ - smp_rmb(); - } while (unlikely((version & 1) || version != pvti->version)); + } while (pvclock_read_retry(pvti, version)); delta = tsc - pvti_tsc; ret = pvti_system_time + diff --git a/arch/x86/include/asm/pvclock.h b/arch/x86/include/asm/pvclock.h index 7c1c89598688..d019f0cc80ec 100644 --- a/arch/x86/include/asm/pvclock.h +++ b/arch/x86/include/asm/pvclock.h @@ -25,6 +25,24 @@ void pvclock_resume(void); void pvclock_touch_watchdogs(void); +static __always_inline +unsigned pvclock_read_begin(const struct pvclock_vcpu_time_info *src) +{ + unsigned version = src->version & ~1; + /* Make sure that the version is read before the data. */ + virt_rmb(); + return version; +} + +static __always_inline +bool pvclock_read_retry(const struct pvclock_vcpu_time_info *src, + unsigned version) +{ + /* Make sure that the version is re-read after the data. */ + virt_rmb(); + return unlikely(version != src->version); +} + /* * Scale a 64-bit delta by scaling and multiplying by a 32-bit fraction, * yielding a 64-bit result. @@ -69,23 +87,12 @@ static inline u64 pvclock_scale_delta(u64 delta, u32 mul_frac, int shift) } static __always_inline -unsigned __pvclock_read_cycles(const struct pvclock_vcpu_time_info *src, - cycle_t *cycles, u8 *flags) +cycle_t __pvclock_read_cycles(const struct pvclock_vcpu_time_info *src) { - unsigned version; - cycle_t offset; - u64 delta; - - version = src->version; - /* Make the latest version visible */ - smp_rmb(); - - delta = rdtsc_ordered() - src->tsc_timestamp; - offset = pvclock_scale_delta(delta, src->tsc_to_system_mul, - src->tsc_shift); - *cycles = src->system_time + offset; - *flags = src->flags; - return version; + u64 delta = rdtsc_ordered() - src->tsc_timestamp; + cycle_t offset = pvclock_scale_delta(delta, src->tsc_to_system_mul, + src->tsc_shift); + return src->system_time + offset; } struct pvclock_vsyscall_time_info { diff --git a/arch/x86/kernel/pvclock.c b/arch/x86/kernel/pvclock.c index 06c58ce46762..3599404e3089 100644 --- a/arch/x86/kernel/pvclock.c +++ b/arch/x86/kernel/pvclock.c @@ -64,14 +64,9 @@ u8 pvclock_read_flags(struct pvclock_vcpu_time_info *src) u8 flags; do { - version = src->version; - /* Make the latest version visible */ - smp_rmb(); - + version = pvclock_read_begin(src); flags = src->flags; - /* Make sure that the version double-check is last. */ - smp_rmb(); - } while ((src->version & 1) || version != src->version); + } while (pvclock_read_retry(src, version)); return flags & valid_flags; } @@ -84,10 +79,10 @@ cycle_t pvclock_clocksource_read(struct pvclock_vcpu_time_info *src) u8 flags; do { - version = __pvclock_read_cycles(src, &ret, &flags); - /* Make sure that the version double-check is last. */ - smp_rmb(); - } while ((src->version & 1) || version != src->version); + version = pvclock_read_begin(src); + ret = __pvclock_read_cycles(src); + flags = src->flags; + } while (pvclock_read_retry(src, version)); if (unlikely((flags & PVCLOCK_GUEST_STOPPED) != 0)) { src->flags &= ~PVCLOCK_GUEST_STOPPED; -- cgit v1.2.3 From abe9efa79be02cf2ba27f643b214b07877bb050b Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 9 Jun 2016 20:12:34 +0200 Subject: x86: vdso: use __pvclock_read_cycles The new simplified __pvclock_read_cycles does the same computation as vread_pvclock, except that (because it takes the pvclock_vcpu_time_info pointer) it has to be moved inside the loop. Since the loop is expected to never roll, this makes no difference. Acked-by: Andy Lutomirski Signed-off-by: Paolo Bonzini --- arch/x86/entry/vdso/vclock_gettime.c | 16 +++------------- 1 file changed, 3 insertions(+), 13 deletions(-) (limited to 'arch') diff --git a/arch/x86/entry/vdso/vclock_gettime.c b/arch/x86/entry/vdso/vclock_gettime.c index db1e3b4c3693..94d54d0defa7 100644 --- a/arch/x86/entry/vdso/vclock_gettime.c +++ b/arch/x86/entry/vdso/vclock_gettime.c @@ -96,9 +96,8 @@ static notrace cycle_t vread_pvclock(int *mode) { const struct pvclock_vcpu_time_info *pvti = &get_pvti0()->pvti; cycle_t ret; - u64 tsc, pvti_tsc; - u64 last, delta, pvti_system_time; - u32 version, pvti_tsc_to_system_mul, pvti_tsc_shift; + u64 last; + u32 version; /* * Note: The kernel and hypervisor must guarantee that cpu ID @@ -130,18 +129,9 @@ static notrace cycle_t vread_pvclock(int *mode) return 0; } - tsc = rdtsc_ordered(); - pvti_tsc_to_system_mul = pvti->tsc_to_system_mul; - pvti_tsc_shift = pvti->tsc_shift; - pvti_system_time = pvti->system_time; - pvti_tsc = pvti->tsc_timestamp; + ret = __pvclock_read_cycles(pvti); } while (pvclock_read_retry(pvti, version)); - delta = tsc - pvti_tsc; - ret = pvti_system_time + - pvclock_scale_delta(delta, pvti_tsc_to_system_mul, - pvti_tsc_shift); - /* refer to vread_tsc() comment for rationale */ last = gtod->cycle_last; -- cgit v1.2.3 From 910053002ef1fdc1d42b7015d5b2400172f43e42 Mon Sep 17 00:00:00 2001 From: Wanpeng Li Date: Wed, 3 Aug 2016 12:04:12 +0800 Subject: KVM: lapic: fix access preemption timer stuff even if kernel_irqchip=off BUG: unable to handle kernel NULL pointer dereference at 000000000000008c IP: [] kvm_lapic_hv_timer_in_use+0x10/0x20 [kvm] PGD 0 Oops: 0000 [#1] SMP Call Trace: kvm_arch_vcpu_load+0x86/0x260 [kvm] vcpu_load+0x46/0x60 [kvm] kvm_vcpu_ioctl+0x79/0x7c0 [kvm] ? __lock_is_held+0x54/0x70 do_vfs_ioctl+0x96/0x6a0 ? __fget_light+0x2a/0x90 SyS_ioctl+0x79/0x90 do_syscall_64+0x7c/0x1e0 entry_SYSCALL64_slow_path+0x25/0x25 RIP [] kvm_lapic_hv_timer_in_use+0x10/0x20 [kvm] RSP CR2: 000000000000008c ---[ end trace a55fb79d2b3b4ee8 ]--- This can be reproduced steadily by kernel_irqchip=off. We should not access preemption timer stuff if lapic is emulated in userspace. This patch fix it by avoiding access preemption timer stuff when kernel_irqchip=off. Cc: Paolo Bonzini Cc: Radim Krčmář Cc: Yunhong Jiang Signed-off-by: Wanpeng Li Signed-off-by: Paolo Bonzini --- arch/x86/kvm/lapic.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'arch') diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 730cf174090a..b62c85229711 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -1349,6 +1349,9 @@ static void start_sw_tscdeadline(struct kvm_lapic *apic) bool kvm_lapic_hv_timer_in_use(struct kvm_vcpu *vcpu) { + if (!lapic_in_kernel(vcpu)) + return false; + return vcpu->arch.apic->lapic_timer.hv_timer_in_use; } EXPORT_SYMBOL_GPL(kvm_lapic_hv_timer_in_use); -- cgit v1.2.3 From 03331b4b8bd1f0fef34539afa6bb6c45ec25c47b Mon Sep 17 00:00:00 2001 From: Bandan Das Date: Tue, 2 Aug 2016 16:32:35 -0400 Subject: nvmx: remove comment about missing nested vpid support Nested vpid is already supported and both single/global modes are advertised to the guest Signed-off-by: Bandan Das Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx.c | 1 - 1 file changed, 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index bc354f003ce1..b26c222ebeef 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -2945,7 +2945,6 @@ static int vmx_get_vmx_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata) vmx->nested.nested_vmx_secondary_ctls_high); break; case MSR_IA32_VMX_EPT_VPID_CAP: - /* Currently, no nested vpid support */ *pdata = vmx->nested.nested_vmx_ept_caps | ((u64)vmx->nested.nested_vmx_vpid_caps << 32); break; -- cgit v1.2.3 From 45e11817d5703eceb65a673927a8bc74dc1286d6 Mon Sep 17 00:00:00 2001 From: Bandan Das Date: Tue, 2 Aug 2016 16:32:36 -0400 Subject: nvmx: mark ept single context invalidation as supported Commit 4b855078601f ("KVM: nVMX: Don't advertise single context invalidation for invept") removed advertising single context invalidation since the spec does not mandate it. However, some hypervisors (such as ESX) require it to be present before willing to use ept in a nested environment. Advertise it and fallback to the global case. Signed-off-by: Bandan Das Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index b26c222ebeef..a45d8580f91e 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -2809,12 +2809,8 @@ static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx) vmx->nested.nested_vmx_ept_caps |= VMX_EPT_EXECUTE_ONLY_BIT; vmx->nested.nested_vmx_ept_caps &= vmx_capability.ept; - /* - * For nested guests, we don't do anything specific - * for single context invalidation. Hence, only advertise - * support for global context invalidation. - */ - vmx->nested.nested_vmx_ept_caps |= VMX_EPT_EXTENT_GLOBAL_BIT; + vmx->nested.nested_vmx_ept_caps |= VMX_EPT_EXTENT_GLOBAL_BIT | + VMX_EPT_EXTENT_CONTEXT_BIT; } else vmx->nested.nested_vmx_ept_caps = 0; @@ -7608,12 +7604,16 @@ static int handle_invept(struct kvm_vcpu *vcpu) switch (type) { case VMX_EPT_EXTENT_GLOBAL: + /* + * TODO: track mappings and invalidate + * single context requests appropriately + */ + case VMX_EPT_EXTENT_CONTEXT: kvm_mmu_sync_roots(vcpu); kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu); nested_vmx_succeed(vcpu); break; default: - /* Trap single context invalidation invept calls */ BUG_ON(1); break; } -- cgit v1.2.3