diff options
author | Linus Torvalds | 2016-08-06 09:18:21 -0400 |
---|---|---|
committer | Linus Torvalds | 2016-08-06 09:18:21 -0400 |
commit | 80fac0f577a35c437219a2786c1804ab8ca1e998 (patch) | |
tree | dfe87e66f937b2b42ba6b87c4538b7e5bea06d07 | |
parent | 4305f42401b29e2e024bd064618faf25aef5cb69 (diff) | |
parent | 45e11817d5703eceb65a673927a8bc74dc1286d6 (diff) |
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Pull more KVM updates from Paolo Bonzini:
- ARM bugfix and MSI injection support
- x86 nested virt tweak and OOPS fix
- Simplify pvclock code (vdso bits acked by Andy Lutomirski).
* tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm:
nvmx: mark ept single context invalidation as supported
nvmx: remove comment about missing nested vpid support
KVM: lapic: fix access preemption timer stuff even if kernel_irqchip=off
KVM: documentation: fix KVM_CAP_X2APIC_API information
x86: vdso: use __pvclock_read_cycles
pvclock: introduce seqcount-like API
arm64: KVM: Set cpsr before spsr on fault injection
KVM: arm: vgic-irqfd: Workaround changing kvm_set_routing_entry prototype
KVM: arm/arm64: Enable MSI routing
KVM: arm/arm64: Enable irqchip routing
KVM: Move kvm_setup_default/empty_irq_routing declaration in arch specific header
KVM: irqchip: Convey devid to kvm_set_msi
KVM: Add devid in kvm_kernel_irq_routing_entry
KVM: api: Pass the devid in the msi routing entry
-rw-r--r-- | Documentation/virtual/kvm/api.txt | 53 | ||||
-rw-r--r-- | arch/arm/kvm/Kconfig | 2 | ||||
-rw-r--r-- | arch/arm/kvm/Makefile | 1 | ||||
-rw-r--r-- | arch/arm/kvm/irq.h | 19 | ||||
-rw-r--r-- | arch/arm64/kvm/Kconfig | 2 | ||||
-rw-r--r-- | arch/arm64/kvm/Makefile | 1 | ||||
-rw-r--r-- | arch/arm64/kvm/inject_fault.c | 12 | ||||
-rw-r--r-- | arch/arm64/kvm/irq.h | 19 | ||||
-rw-r--r-- | arch/x86/entry/vdso/vclock_gettime.c | 25 | ||||
-rw-r--r-- | arch/x86/include/asm/pvclock.h | 39 | ||||
-rw-r--r-- | arch/x86/kernel/pvclock.c | 17 | ||||
-rw-r--r-- | arch/x86/kvm/irq.h | 3 | ||||
-rw-r--r-- | arch/x86/kvm/lapic.c | 3 | ||||
-rw-r--r-- | arch/x86/kvm/vmx.c | 15 | ||||
-rw-r--r-- | include/kvm/arm_vgic.h | 7 | ||||
-rw-r--r-- | include/linux/kvm_host.h | 12 | ||||
-rw-r--r-- | include/uapi/linux/kvm.h | 5 | ||||
-rw-r--r-- | virt/kvm/arm/vgic/vgic-init.c | 4 | ||||
-rw-r--r-- | virt/kvm/arm/vgic/vgic-irqfd.c | 116 | ||||
-rw-r--r-- | virt/kvm/arm/vgic/vgic.c | 7 | ||||
-rw-r--r-- | virt/kvm/irqchip.c | 28 |
21 files changed, 273 insertions, 117 deletions
diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index 5237e1b2fd66..739db9ab16b2 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -1433,13 +1433,16 @@ KVM_ASSIGN_DEV_IRQ. Partial deassignment of host or guest IRQ is allowed. 4.52 KVM_SET_GSI_ROUTING Capability: KVM_CAP_IRQ_ROUTING -Architectures: x86 s390 +Architectures: x86 s390 arm arm64 Type: vm ioctl Parameters: struct kvm_irq_routing (in) Returns: 0 on success, -1 on error Sets the GSI routing table entries, overwriting any previously set entries. +On arm/arm64, GSI routing has the following limitation: +- GSI routing does not apply to KVM_IRQ_LINE but only to KVM_IRQFD. + struct kvm_irq_routing { __u32 nr; __u32 flags; @@ -1468,7 +1471,13 @@ struct kvm_irq_routing_entry { #define KVM_IRQ_ROUTING_S390_ADAPTER 3 #define KVM_IRQ_ROUTING_HV_SINT 4 -No flags are specified so far, the corresponding field must be set to zero. +flags: +- KVM_MSI_VALID_DEVID: used along with KVM_IRQ_ROUTING_MSI routing entry + type, specifies that the devid field contains a valid value. The per-VM + KVM_CAP_MSI_DEVID capability advertises the requirement to provide + the device ID. If this capability is not available, userspace should + never set the KVM_MSI_VALID_DEVID flag as the ioctl might fail. +- zero otherwise struct kvm_irq_routing_irqchip { __u32 irqchip; @@ -1479,9 +1488,16 @@ struct kvm_irq_routing_msi { __u32 address_lo; __u32 address_hi; __u32 data; - __u32 pad; + union { + __u32 pad; + __u32 devid; + }; }; +If KVM_MSI_VALID_DEVID is set, devid contains a unique device identifier +for the device that wrote the MSI message. For PCI, this is usually a +BFD identifier in the lower 16 bits. + On x86, address_hi is ignored unless the KVM_X2APIC_API_USE_32BIT_IDS feature of KVM_CAP_X2APIC_API capability is enabled. If it is enabled, address_hi bits 31-8 provide bits 31-8 of the destination id. Bits 7-0 of @@ -2199,18 +2215,19 @@ struct kvm_msi { __u8 pad[12]; }; -flags: KVM_MSI_VALID_DEVID: devid contains a valid value -devid: If KVM_MSI_VALID_DEVID is set, contains a unique device identifier - for the device that wrote the MSI message. - For PCI, this is usually a BFD identifier in the lower 16 bits. +flags: KVM_MSI_VALID_DEVID: devid contains a valid value. The per-VM + KVM_CAP_MSI_DEVID capability advertises the requirement to provide + the device ID. If this capability is not available, userspace + should never set the KVM_MSI_VALID_DEVID flag as the ioctl might fail. -The per-VM KVM_CAP_MSI_DEVID capability advertises the need to provide -the device ID. If this capability is not set, userland cannot rely on -the kernel to allow the KVM_MSI_VALID_DEVID flag being set. +If KVM_MSI_VALID_DEVID is set, devid contains a unique device identifier +for the device that wrote the MSI message. For PCI, this is usually a +BFD identifier in the lower 16 bits. -On x86, address_hi is ignored unless the KVM_CAP_X2APIC_API capability is -enabled. If it is enabled, address_hi bits 31-8 provide bits 31-8 of the -destination id. Bits 7-0 of address_hi must be zero. +On x86, address_hi is ignored unless the KVM_X2APIC_API_USE_32BIT_IDS +feature of KVM_CAP_X2APIC_API capability is enabled. If it is enabled, +address_hi bits 31-8 provide bits 31-8 of the destination id. Bits 7-0 of +address_hi must be zero. 4.71 KVM_CREATE_PIT2 @@ -2383,9 +2400,13 @@ Note that closing the resamplefd is not sufficient to disable the irqfd. The KVM_IRQFD_FLAG_RESAMPLE is only necessary on assignment and need not be specified with KVM_IRQFD_FLAG_DEASSIGN. -On ARM/ARM64, the gsi field in the kvm_irqfd struct specifies the Shared -Peripheral Interrupt (SPI) index, such that the GIC interrupt ID is -given by gsi + 32. +On arm/arm64, gsi routing being supported, the following can happen: +- in case no routing entry is associated to this gsi, injection fails +- in case the gsi is associated to an irqchip routing entry, + irqchip.pin + 32 corresponds to the injected SPI ID. +- in case the gsi is associated to an MSI routing entry, the MSI + message and device ID are translated into an LPI (support restricted + to GICv3 ITS in-kernel emulation). 4.76 KVM_PPC_ALLOCATE_HTAB diff --git a/arch/arm/kvm/Kconfig b/arch/arm/kvm/Kconfig index 95a000515e43..3e1cd0452d67 100644 --- a/arch/arm/kvm/Kconfig +++ b/arch/arm/kvm/Kconfig @@ -32,6 +32,8 @@ config KVM select KVM_VFIO select HAVE_KVM_EVENTFD select HAVE_KVM_IRQFD + select HAVE_KVM_IRQCHIP + select HAVE_KVM_IRQ_ROUTING depends on ARM_VIRT_EXT && ARM_LPAE && ARM_ARCH_TIMER ---help--- Support hosting virtualized guest machines. diff --git a/arch/arm/kvm/Makefile b/arch/arm/kvm/Makefile index 5e28df80dca7..10d77a66cad5 100644 --- a/arch/arm/kvm/Makefile +++ b/arch/arm/kvm/Makefile @@ -29,4 +29,5 @@ obj-y += $(KVM)/arm/vgic/vgic-v2.o obj-y += $(KVM)/arm/vgic/vgic-mmio.o obj-y += $(KVM)/arm/vgic/vgic-mmio-v2.o obj-y += $(KVM)/arm/vgic/vgic-kvm-device.o +obj-y += $(KVM)/irqchip.o obj-y += $(KVM)/arm/arch_timer.o diff --git a/arch/arm/kvm/irq.h b/arch/arm/kvm/irq.h new file mode 100644 index 000000000000..b74099b905fd --- /dev/null +++ b/arch/arm/kvm/irq.h @@ -0,0 +1,19 @@ +/* + * irq.h: in kernel interrupt controller related definitions + * Copyright (c) 2016 Red Hat, Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This header is included by irqchip.c. However, on ARM, interrupt + * controller declarations are located in include/kvm/arm_vgic.h since + * they are mostly shared between arm and arm64. + */ + +#ifndef __IRQ_H +#define __IRQ_H + +#include <kvm/arm_vgic.h> + +#endif diff --git a/arch/arm64/kvm/Kconfig b/arch/arm64/kvm/Kconfig index 9d2eff0b3ad3..9c9edc98d271 100644 --- a/arch/arm64/kvm/Kconfig +++ b/arch/arm64/kvm/Kconfig @@ -37,6 +37,8 @@ config KVM select KVM_ARM_VGIC_V3 select KVM_ARM_PMU if HW_PERF_EVENTS select HAVE_KVM_MSI + select HAVE_KVM_IRQCHIP + select HAVE_KVM_IRQ_ROUTING ---help--- Support hosting virtualized guest machines. We don't support KVM with 16K page tables yet, due to the multiple diff --git a/arch/arm64/kvm/Makefile b/arch/arm64/kvm/Makefile index a5b96642a9cb..695eb3c7ef41 100644 --- a/arch/arm64/kvm/Makefile +++ b/arch/arm64/kvm/Makefile @@ -30,5 +30,6 @@ kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-mmio-v2.o kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-mmio-v3.o kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-kvm-device.o kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-its.o +kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/irqchip.o kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/arch_timer.o kvm-$(CONFIG_KVM_ARM_PMU) += $(KVM)/arm/pmu.o diff --git a/arch/arm64/kvm/inject_fault.c b/arch/arm64/kvm/inject_fault.c index e9e0e6db73f6..898c0e6aedd4 100644 --- a/arch/arm64/kvm/inject_fault.c +++ b/arch/arm64/kvm/inject_fault.c @@ -132,16 +132,14 @@ static u64 get_except_vector(struct kvm_vcpu *vcpu, enum exception_type type) static void inject_abt64(struct kvm_vcpu *vcpu, bool is_iabt, unsigned long addr) { unsigned long cpsr = *vcpu_cpsr(vcpu); - bool is_aarch32; + bool is_aarch32 = vcpu_mode_is_32bit(vcpu); u32 esr = 0; - is_aarch32 = vcpu_mode_is_32bit(vcpu); - - *vcpu_spsr(vcpu) = cpsr; *vcpu_elr_el1(vcpu) = *vcpu_pc(vcpu); - *vcpu_pc(vcpu) = get_except_vector(vcpu, except_type_sync); + *vcpu_cpsr(vcpu) = PSTATE_FAULT_BITS_64; + *vcpu_spsr(vcpu) = cpsr; vcpu_sys_reg(vcpu, FAR_EL1) = addr; @@ -172,11 +170,11 @@ static void inject_undef64(struct kvm_vcpu *vcpu) unsigned long cpsr = *vcpu_cpsr(vcpu); u32 esr = (ESR_ELx_EC_UNKNOWN << ESR_ELx_EC_SHIFT); - *vcpu_spsr(vcpu) = cpsr; *vcpu_elr_el1(vcpu) = *vcpu_pc(vcpu); - *vcpu_pc(vcpu) = get_except_vector(vcpu, except_type_sync); + *vcpu_cpsr(vcpu) = PSTATE_FAULT_BITS_64; + *vcpu_spsr(vcpu) = cpsr; /* * Build an unknown exception, depending on the instruction diff --git a/arch/arm64/kvm/irq.h b/arch/arm64/kvm/irq.h new file mode 100644 index 000000000000..b74099b905fd --- /dev/null +++ b/arch/arm64/kvm/irq.h @@ -0,0 +1,19 @@ +/* + * irq.h: in kernel interrupt controller related definitions + * Copyright (c) 2016 Red Hat, Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This header is included by irqchip.c. However, on ARM, interrupt + * controller declarations are located in include/kvm/arm_vgic.h since + * they are mostly shared between arm and arm64. + */ + +#ifndef __IRQ_H +#define __IRQ_H + +#include <kvm/arm_vgic.h> + +#endif diff --git a/arch/x86/entry/vdso/vclock_gettime.c b/arch/x86/entry/vdso/vclock_gettime.c index 2f02d23a05ef..94d54d0defa7 100644 --- a/arch/x86/entry/vdso/vclock_gettime.c +++ b/arch/x86/entry/vdso/vclock_gettime.c @@ -96,9 +96,8 @@ static notrace cycle_t vread_pvclock(int *mode) { const struct pvclock_vcpu_time_info *pvti = &get_pvti0()->pvti; cycle_t ret; - u64 tsc, pvti_tsc; - u64 last, delta, pvti_system_time; - u32 version, pvti_tsc_to_system_mul, pvti_tsc_shift; + u64 last; + u32 version; /* * Note: The kernel and hypervisor must guarantee that cpu ID @@ -123,29 +122,15 @@ static notrace cycle_t vread_pvclock(int *mode) */ do { - version = pvti->version; - - smp_rmb(); + version = pvclock_read_begin(pvti); if (unlikely(!(pvti->flags & PVCLOCK_TSC_STABLE_BIT))) { *mode = VCLOCK_NONE; return 0; } - tsc = rdtsc_ordered(); - pvti_tsc_to_system_mul = pvti->tsc_to_system_mul; - pvti_tsc_shift = pvti->tsc_shift; - pvti_system_time = pvti->system_time; - pvti_tsc = pvti->tsc_timestamp; - - /* Make sure that the version double-check is last. */ - smp_rmb(); - } while (unlikely((version & 1) || version != pvti->version)); - - delta = tsc - pvti_tsc; - ret = pvti_system_time + - pvclock_scale_delta(delta, pvti_tsc_to_system_mul, - pvti_tsc_shift); + ret = __pvclock_read_cycles(pvti); + } while (pvclock_read_retry(pvti, version)); /* refer to vread_tsc() comment for rationale */ last = gtod->cycle_last; diff --git a/arch/x86/include/asm/pvclock.h b/arch/x86/include/asm/pvclock.h index 7c1c89598688..d019f0cc80ec 100644 --- a/arch/x86/include/asm/pvclock.h +++ b/arch/x86/include/asm/pvclock.h @@ -25,6 +25,24 @@ void pvclock_resume(void); void pvclock_touch_watchdogs(void); +static __always_inline +unsigned pvclock_read_begin(const struct pvclock_vcpu_time_info *src) +{ + unsigned version = src->version & ~1; + /* Make sure that the version is read before the data. */ + virt_rmb(); + return version; +} + +static __always_inline +bool pvclock_read_retry(const struct pvclock_vcpu_time_info *src, + unsigned version) +{ + /* Make sure that the version is re-read after the data. */ + virt_rmb(); + return unlikely(version != src->version); +} + /* * Scale a 64-bit delta by scaling and multiplying by a 32-bit fraction, * yielding a 64-bit result. @@ -69,23 +87,12 @@ static inline u64 pvclock_scale_delta(u64 delta, u32 mul_frac, int shift) } static __always_inline -unsigned __pvclock_read_cycles(const struct pvclock_vcpu_time_info *src, - cycle_t *cycles, u8 *flags) +cycle_t __pvclock_read_cycles(const struct pvclock_vcpu_time_info *src) { - unsigned version; - cycle_t offset; - u64 delta; - - version = src->version; - /* Make the latest version visible */ - smp_rmb(); - - delta = rdtsc_ordered() - src->tsc_timestamp; - offset = pvclock_scale_delta(delta, src->tsc_to_system_mul, - src->tsc_shift); - *cycles = src->system_time + offset; - *flags = src->flags; - return version; + u64 delta = rdtsc_ordered() - src->tsc_timestamp; + cycle_t offset = pvclock_scale_delta(delta, src->tsc_to_system_mul, + src->tsc_shift); + return src->system_time + offset; } struct pvclock_vsyscall_time_info { diff --git a/arch/x86/kernel/pvclock.c b/arch/x86/kernel/pvclock.c index 06c58ce46762..3599404e3089 100644 --- a/arch/x86/kernel/pvclock.c +++ b/arch/x86/kernel/pvclock.c @@ -64,14 +64,9 @@ u8 pvclock_read_flags(struct pvclock_vcpu_time_info *src) u8 flags; do { - version = src->version; - /* Make the latest version visible */ - smp_rmb(); - + version = pvclock_read_begin(src); flags = src->flags; - /* Make sure that the version double-check is last. */ - smp_rmb(); - } while ((src->version & 1) || version != src->version); + } while (pvclock_read_retry(src, version)); return flags & valid_flags; } @@ -84,10 +79,10 @@ cycle_t pvclock_clocksource_read(struct pvclock_vcpu_time_info *src) u8 flags; do { - version = __pvclock_read_cycles(src, &ret, &flags); - /* Make sure that the version double-check is last. */ - smp_rmb(); - } while ((src->version & 1) || version != src->version); + version = pvclock_read_begin(src); + ret = __pvclock_read_cycles(src); + flags = src->flags; + } while (pvclock_read_retry(src, version)); if (unlikely((flags & PVCLOCK_GUEST_STOPPED) != 0)) { src->flags &= ~PVCLOCK_GUEST_STOPPED; diff --git a/arch/x86/kvm/irq.h b/arch/x86/kvm/irq.h index 61ebdc13a29a..035731eb3897 100644 --- a/arch/x86/kvm/irq.h +++ b/arch/x86/kvm/irq.h @@ -120,4 +120,7 @@ void __kvm_migrate_timers(struct kvm_vcpu *vcpu); int apic_has_pending_timer(struct kvm_vcpu *vcpu); +int kvm_setup_default_irq_routing(struct kvm *kvm); +int kvm_setup_empty_irq_routing(struct kvm *kvm); + #endif diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 730cf174090a..b62c85229711 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -1349,6 +1349,9 @@ static void start_sw_tscdeadline(struct kvm_lapic *apic) bool kvm_lapic_hv_timer_in_use(struct kvm_vcpu *vcpu) { + if (!lapic_in_kernel(vcpu)) + return false; + return vcpu->arch.apic->lapic_timer.hv_timer_in_use; } EXPORT_SYMBOL_GPL(kvm_lapic_hv_timer_in_use); diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index bc354f003ce1..a45d8580f91e 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -2809,12 +2809,8 @@ static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx) vmx->nested.nested_vmx_ept_caps |= VMX_EPT_EXECUTE_ONLY_BIT; vmx->nested.nested_vmx_ept_caps &= vmx_capability.ept; - /* - * For nested guests, we don't do anything specific - * for single context invalidation. Hence, only advertise - * support for global context invalidation. - */ - vmx->nested.nested_vmx_ept_caps |= VMX_EPT_EXTENT_GLOBAL_BIT; + vmx->nested.nested_vmx_ept_caps |= VMX_EPT_EXTENT_GLOBAL_BIT | + VMX_EPT_EXTENT_CONTEXT_BIT; } else vmx->nested.nested_vmx_ept_caps = 0; @@ -2945,7 +2941,6 @@ static int vmx_get_vmx_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata) vmx->nested.nested_vmx_secondary_ctls_high); break; case MSR_IA32_VMX_EPT_VPID_CAP: - /* Currently, no nested vpid support */ *pdata = vmx->nested.nested_vmx_ept_caps | ((u64)vmx->nested.nested_vmx_vpid_caps << 32); break; @@ -7609,12 +7604,16 @@ static int handle_invept(struct kvm_vcpu *vcpu) switch (type) { case VMX_EPT_EXTENT_GLOBAL: + /* + * TODO: track mappings and invalidate + * single context requests appropriately + */ + case VMX_EPT_EXTENT_CONTEXT: kvm_mmu_sync_roots(vcpu); kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu); nested_vmx_succeed(vcpu); break; default: - /* Trap single context invalidation invept calls */ BUG_ON(1); break; } diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h index 540da5149ba7..19b698ef3336 100644 --- a/include/kvm/arm_vgic.h +++ b/include/kvm/arm_vgic.h @@ -34,6 +34,7 @@ #define VGIC_MAX_SPI 1019 #define VGIC_MAX_RESERVED 1023 #define VGIC_MIN_LPI 8192 +#define KVM_IRQCHIP_NUM_PINS (1020 - 32) enum vgic_type { VGIC_V2, /* Good ol' GICv2 */ @@ -314,4 +315,10 @@ static inline int kvm_vgic_get_max_vcpus(void) int kvm_send_userspace_msi(struct kvm *kvm, struct kvm_msi *msi); +/** + * kvm_vgic_setup_default_irq_routing: + * Setup a default flat gsi routing table mapping all SPIs + */ +int kvm_vgic_setup_default_irq_routing(struct kvm *kvm); + #endif /* __KVM_ARM_VGIC_H */ diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index aafd702f3e21..01e908ac4a39 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -317,7 +317,13 @@ struct kvm_kernel_irq_routing_entry { unsigned irqchip; unsigned pin; } irqchip; - struct msi_msg msi; + struct { + u32 address_lo; + u32 address_hi; + u32 data; + u32 flags; + u32 devid; + } msi; struct kvm_s390_adapter_int adapter; struct kvm_hv_sint hv_sint; }; @@ -1003,12 +1009,12 @@ static inline int mmu_notifier_retry(struct kvm *kvm, unsigned long mmu_seq) #ifdef CONFIG_S390 #define KVM_MAX_IRQ_ROUTES 4096 //FIXME: we can have more than that... +#elif defined(CONFIG_ARM64) +#define KVM_MAX_IRQ_ROUTES 4096 #else #define KVM_MAX_IRQ_ROUTES 1024 #endif -int kvm_setup_default_irq_routing(struct kvm *kvm); -int kvm_setup_empty_irq_routing(struct kvm *kvm); int kvm_set_irq_routing(struct kvm *kvm, const struct kvm_irq_routing_entry *entries, unsigned nr, diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index e98bb4cce639..300ef255d1e0 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -882,7 +882,10 @@ struct kvm_irq_routing_msi { __u32 address_lo; __u32 address_hi; __u32 data; - __u32 pad; + union { + __u32 pad; + __u32 devid; + }; }; struct kvm_irq_routing_s390_adapter { diff --git a/virt/kvm/arm/vgic/vgic-init.c b/virt/kvm/arm/vgic/vgic-init.c index 1e30ce08700d..fb4b0a79a950 100644 --- a/virt/kvm/arm/vgic/vgic-init.c +++ b/virt/kvm/arm/vgic/vgic-init.c @@ -264,6 +264,10 @@ int vgic_init(struct kvm *kvm) kvm_for_each_vcpu(i, vcpu, kvm) kvm_vgic_vcpu_init(vcpu); + ret = kvm_vgic_setup_default_irq_routing(kvm); + if (ret) + goto out; + dist->initialized = true; out: return ret; diff --git a/virt/kvm/arm/vgic/vgic-irqfd.c b/virt/kvm/arm/vgic/vgic-irqfd.c index c675513270bb..b31a51a14efb 100644 --- a/virt/kvm/arm/vgic/vgic-irqfd.c +++ b/virt/kvm/arm/vgic/vgic-irqfd.c @@ -17,36 +17,116 @@ #include <linux/kvm.h> #include <linux/kvm_host.h> #include <trace/events/kvm.h> +#include <kvm/arm_vgic.h> +#include "vgic.h" -int kvm_irq_map_gsi(struct kvm *kvm, - struct kvm_kernel_irq_routing_entry *entries, - int gsi) +/** + * vgic_irqfd_set_irq: inject the IRQ corresponding to the + * irqchip routing entry + * + * This is the entry point for irqfd IRQ injection + */ +static int vgic_irqfd_set_irq(struct kvm_kernel_irq_routing_entry *e, + struct kvm *kvm, int irq_source_id, + int level, bool line_status) { - return 0; + unsigned int spi_id = e->irqchip.pin + VGIC_NR_PRIVATE_IRQS; + + if (!vgic_valid_spi(kvm, spi_id)) + return -EINVAL; + return kvm_vgic_inject_irq(kvm, 0, spi_id, level); } -int kvm_irq_map_chip_pin(struct kvm *kvm, unsigned int irqchip, - unsigned int pin) +/** + * kvm_set_routing_entry: populate a kvm routing entry + * from a user routing entry + * + * @kvm: the VM this entry is applied to + * @e: kvm kernel routing entry handle + * @ue: user api routing entry handle + * return 0 on success, -EINVAL on errors. + */ +#ifdef KVM_CAP_X2APIC_API +int kvm_set_routing_entry(struct kvm *kvm, + struct kvm_kernel_irq_routing_entry *e, + const struct kvm_irq_routing_entry *ue) +#else +/* Remove this version and the ifdefery once merged into 4.8 */ +int kvm_set_routing_entry(struct kvm_kernel_irq_routing_entry *e, + const struct kvm_irq_routing_entry *ue) +#endif { - return pin; + int r = -EINVAL; + + switch (ue->type) { + case KVM_IRQ_ROUTING_IRQCHIP: + e->set = vgic_irqfd_set_irq; + e->irqchip.irqchip = ue->u.irqchip.irqchip; + e->irqchip.pin = ue->u.irqchip.pin; + if ((e->irqchip.pin >= KVM_IRQCHIP_NUM_PINS) || + (e->irqchip.irqchip >= KVM_NR_IRQCHIPS)) + goto out; + break; + case KVM_IRQ_ROUTING_MSI: + e->set = kvm_set_msi; + e->msi.address_lo = ue->u.msi.address_lo; + e->msi.address_hi = ue->u.msi.address_hi; + e->msi.data = ue->u.msi.data; + e->msi.flags = ue->flags; + e->msi.devid = ue->u.msi.devid; + break; + default: + goto out; + } + r = 0; +out: + return r; } -int kvm_set_irq(struct kvm *kvm, int irq_source_id, - u32 irq, int level, bool line_status) +/** + * kvm_set_msi: inject the MSI corresponding to the + * MSI routing entry + * + * This is the entry point for irqfd MSI injection + * and userspace MSI injection. + */ +int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e, + struct kvm *kvm, int irq_source_id, + int level, bool line_status) { - unsigned int spi = irq + VGIC_NR_PRIVATE_IRQS; + struct kvm_msi msi; - trace_kvm_set_irq(irq, level, irq_source_id); + msi.address_lo = e->msi.address_lo; + msi.address_hi = e->msi.address_hi; + msi.data = e->msi.data; + msi.flags = e->msi.flags; + msi.devid = e->msi.devid; - BUG_ON(!vgic_initialized(kvm)); + if (!vgic_has_its(kvm)) + return -ENODEV; - return kvm_vgic_inject_irq(kvm, 0, spi, level); + return vgic_its_inject_msi(kvm, &msi); } -/* MSI not implemented yet */ -int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e, - struct kvm *kvm, int irq_source_id, - int level, bool line_status) +int kvm_vgic_setup_default_irq_routing(struct kvm *kvm) { - return 0; + struct kvm_irq_routing_entry *entries; + struct vgic_dist *dist = &kvm->arch.vgic; + u32 nr = dist->nr_spis; + int i, ret; + + entries = kcalloc(nr, sizeof(struct kvm_kernel_irq_routing_entry), + GFP_KERNEL); + if (!entries) + return -ENOMEM; + + for (i = 0; i < nr; i++) { + entries[i].gsi = i; + entries[i].type = KVM_IRQ_ROUTING_IRQCHIP; + entries[i].u.irqchip.irqchip = 0; + entries[i].u.irqchip.pin = i; + } + ret = kvm_set_irq_routing(kvm, entries, nr, 0); + kfree(entries); + return ret; } diff --git a/virt/kvm/arm/vgic/vgic.c b/virt/kvm/arm/vgic/vgic.c index 39f3358c6d91..e7aeac719e09 100644 --- a/virt/kvm/arm/vgic/vgic.c +++ b/virt/kvm/arm/vgic/vgic.c @@ -711,10 +711,3 @@ bool kvm_vgic_map_is_active(struct kvm_vcpu *vcpu, unsigned int virt_irq) return map_is_active; } -int kvm_send_userspace_msi(struct kvm *kvm, struct kvm_msi *msi) -{ - if (vgic_has_its(kvm)) - return vgic_its_inject_msi(kvm, msi); - else - return -ENODEV; -} diff --git a/virt/kvm/irqchip.c b/virt/kvm/irqchip.c index df99e9c3b64d..3bcc9990adf7 100644 --- a/virt/kvm/irqchip.c +++ b/virt/kvm/irqchip.c @@ -62,12 +62,14 @@ int kvm_send_userspace_msi(struct kvm *kvm, struct kvm_msi *msi) { struct kvm_kernel_irq_routing_entry route; - if (!irqchip_in_kernel(kvm) || msi->flags != 0) + if (!irqchip_in_kernel(kvm) || (msi->flags & ~KVM_MSI_VALID_DEVID)) return -EINVAL; route.msi.address_lo = msi->address_lo; route.msi.address_hi = msi->address_hi; route.msi.data = msi->data; + route.msi.flags = msi->flags; + route.msi.devid = msi->devid; return kvm_set_msi(&route, kvm, KVM_USERSPACE_IRQ_SOURCE_ID, 1, false); } @@ -177,6 +179,7 @@ int kvm_set_irq_routing(struct kvm *kvm, unsigned flags) { struct kvm_irq_routing_table *new, *old; + struct kvm_kernel_irq_routing_entry *e; u32 i, j, nr_rt_entries = 0; int r; @@ -200,23 +203,25 @@ int kvm_set_irq_routing(struct kvm *kvm, new->chip[i][j] = -1; for (i = 0; i < nr; ++i) { - struct kvm_kernel_irq_routing_entry *e; - r = -ENOMEM; e = kzalloc(sizeof(*e), GFP_KERNEL); if (!e) goto out; r = -EINVAL; - if (ue->flags) { - kfree(e); - goto out; + switch (ue->type) { + case KVM_IRQ_ROUTING_MSI: + if (ue->flags & ~KVM_MSI_VALID_DEVID) + goto free_entry; + break; + default: + if (ue->flags) + goto free_entry; + break; } r = setup_routing_entry(kvm, new, e, ue); - if (r) { - kfree(e); - goto out; - } + if (r) + goto free_entry; ++ue; } @@ -233,7 +238,10 @@ int kvm_set_irq_routing(struct kvm *kvm, new = old; r = 0; + goto out; +free_entry: + kfree(e); out: free_irq_routing_table(new); |