diff options
author | Linus Torvalds | 2017-03-04 11:36:19 -0800 |
---|---|---|
committer | Linus Torvalds | 2017-03-04 11:36:19 -0800 |
commit | 2d62e0768d3c28536d4cfe4c40ba1e5e8e442a93 (patch) | |
tree | 333f8cbcdb3b650813d758711a9e4ceee7b6fbce | |
parent | be834aafdf5f8a37c191e697ac8ee6d53ab5020c (diff) | |
parent | 16ce771b93ab569490fd27415694132a7ade0d79 (diff) |
Merge tag 'kvm-4.11-2' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Pull more KVM updates from Radim Krčmář:
"Second batch of KVM changes for the 4.11 merge window:
PPC:
- correct assumption about ASDR on POWER9
- fix MMIO emulation on POWER9
x86:
- add a simple test for ioperm
- cleanup TSS (going through KVM tree as the whole undertaking was
caused by VMX's use of TSS)
- fix nVMX interrupt delivery
- fix some performance counters in the guest
... and two cleanup patches"
* tag 'kvm-4.11-2' of git://git.kernel.org/pub/scm/virt/kvm/kvm:
KVM: nVMX: Fix pending events injection
x86/kvm/vmx: remove unused variable in segment_base()
selftests/x86: Add a basic selftest for ioperm
x86/asm: Tidy up TSS limit code
kvm: convert kvm.users_count from atomic_t to refcount_t
KVM: x86: never specify a sample period for virtualized in_tx_cp counters
KVM: PPC: Book3S HV: Don't use ASDR for real-mode HPT faults on POWER9
KVM: PPC: Book3S HV: Fix software walk of guest process page tables
-rw-r--r-- | arch/powerpc/include/asm/book3s/64/mmu.h | 3 | ||||
-rw-r--r-- | arch/powerpc/kvm/book3s_64_mmu_radix.c | 5 | ||||
-rw-r--r-- | arch/powerpc/kvm/book3s_hv_rmhandlers.S | 8 | ||||
-rw-r--r-- | arch/x86/include/asm/desc.h | 18 | ||||
-rw-r--r-- | arch/x86/kernel/ioport.c | 8 | ||||
-rw-r--r-- | arch/x86/kernel/process.c | 6 | ||||
-rw-r--r-- | arch/x86/kvm/pmu.c | 13 | ||||
-rw-r--r-- | arch/x86/kvm/vmx.c | 9 | ||||
-rw-r--r-- | include/linux/kvm_host.h | 3 | ||||
-rw-r--r-- | tools/testing/selftests/x86/Makefile | 2 | ||||
-rw-r--r-- | tools/testing/selftests/x86/ioperm.c | 170 | ||||
-rw-r--r-- | virt/kvm/kvm_main.c | 8 |
12 files changed, 223 insertions, 30 deletions
diff --git a/arch/powerpc/include/asm/book3s/64/mmu.h b/arch/powerpc/include/asm/book3s/64/mmu.h index 1145dc8e726d..805d4105e9bb 100644 --- a/arch/powerpc/include/asm/book3s/64/mmu.h +++ b/arch/powerpc/include/asm/book3s/64/mmu.h @@ -46,7 +46,7 @@ extern struct patb_entry *partition_tb; /* Bits in patb0 field */ #define PATB_HR (1UL << 63) -#define RPDB_MASK 0x0ffffffffffff00fUL +#define RPDB_MASK 0x0fffffffffffff00UL #define RPDB_SHIFT (1UL << 8) #define RTS1_SHIFT 61 /* top 2 bits of radix tree size */ #define RTS1_MASK (3UL << RTS1_SHIFT) @@ -57,6 +57,7 @@ extern struct patb_entry *partition_tb; /* Bits in patb1 field */ #define PATB_GR (1UL << 63) /* guest uses radix; must match HR */ #define PRTS_MASK 0x1f /* process table size field */ +#define PRTB_MASK 0x0ffffffffffff000UL /* * Limit process table to PAGE_SIZE table. This diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c b/arch/powerpc/kvm/book3s_64_mmu_radix.c index 4344651f408c..f6b3e67c5762 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_radix.c +++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c @@ -32,6 +32,7 @@ int kvmppc_mmu_radix_xlate(struct kvm_vcpu *vcpu, gva_t eaddr, u32 pid; int ret, level, ps; __be64 prte, rpte; + unsigned long ptbl; unsigned long root, pte, index; unsigned long rts, bits, offset; unsigned long gpa; @@ -53,8 +54,8 @@ int kvmppc_mmu_radix_xlate(struct kvm_vcpu *vcpu, gva_t eaddr, return -EINVAL; /* Read partition table to find root of tree for effective PID */ - ret = kvm_read_guest(kvm, kvm->arch.process_table + pid * 16, - &prte, sizeof(prte)); + ptbl = (kvm->arch.process_table & PRTB_MASK) + (pid * 16); + ret = kvm_read_guest(kvm, ptbl, &prte, sizeof(prte)); if (ret) return ret; diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index 47414a6fe2dd..7c6477d1840a 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -1787,12 +1787,12 @@ kvmppc_hdsi: /* HPTE not found fault or protection fault? */ andis. r0, r6, (DSISR_NOHPTE | DSISR_PROTFAULT)@h beq 1f /* if not, send it to the guest */ + andi. r0, r11, MSR_DR /* data relocation enabled? */ + beq 3f BEGIN_FTR_SECTION mfspr r5, SPRN_ASDR /* on POWER9, use ASDR to get VSID */ b 4f END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) - andi. r0, r11, MSR_DR /* data relocation enabled? */ - beq 3f clrrdi r0, r4, 28 PPC_SLBFEE_DOT(R5, R0) /* if so, look up SLB */ li r0, BOOK3S_INTERRUPT_DATA_SEGMENT @@ -1879,12 +1879,12 @@ kvmppc_hisi: bne .Lradix_hisi /* for radix, just save ASDR */ andis. r0, r11, SRR1_ISI_NOPT@h beq 1f + andi. r0, r11, MSR_IR /* instruction relocation enabled? */ + beq 3f BEGIN_FTR_SECTION mfspr r5, SPRN_ASDR /* on POWER9, use ASDR to get VSID */ b 4f END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) - andi. r0, r11, MSR_IR /* instruction relocation enabled? */ - beq 3f clrrdi r0, r10, 28 PPC_SLBFEE_DOT(R5, R0) /* if so, look up SLB */ li r0, BOOK3S_INTERRUPT_INST_SEGMENT diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h index cb8f9149f6c8..1548ca92ad3f 100644 --- a/arch/x86/include/asm/desc.h +++ b/arch/x86/include/asm/desc.h @@ -205,6 +205,8 @@ static inline void native_load_tr_desc(void) asm volatile("ltr %w0"::"q" (GDT_ENTRY_TSS*8)); } +DECLARE_PER_CPU(bool, __tss_limit_invalid); + static inline void force_reload_TR(void) { struct desc_struct *d = get_cpu_gdt_table(smp_processor_id()); @@ -220,18 +222,20 @@ static inline void force_reload_TR(void) write_gdt_entry(d, GDT_ENTRY_TSS, &tss, DESC_TSS); load_TR_desc(); + this_cpu_write(__tss_limit_invalid, false); } -DECLARE_PER_CPU(bool, need_tr_refresh); - -static inline void refresh_TR(void) +/* + * Call this if you need the TSS limit to be correct, which should be the case + * if and only if you have TIF_IO_BITMAP set or you're switching to a task + * with TIF_IO_BITMAP set. + */ +static inline void refresh_tss_limit(void) { DEBUG_LOCKS_WARN_ON(preemptible()); - if (unlikely(this_cpu_read(need_tr_refresh))) { + if (unlikely(this_cpu_read(__tss_limit_invalid))) force_reload_TR(); - this_cpu_write(need_tr_refresh, false); - } } /* @@ -250,7 +254,7 @@ static inline void invalidate_tss_limit(void) if (unlikely(test_thread_flag(TIF_IO_BITMAP))) force_reload_TR(); else - this_cpu_write(need_tr_refresh, true); + this_cpu_write(__tss_limit_invalid, true); } static inline void native_load_gdt(const struct desc_ptr *dtr) diff --git a/arch/x86/kernel/ioport.c b/arch/x86/kernel/ioport.c index ca49bab3e467..9c3cf0944bce 100644 --- a/arch/x86/kernel/ioport.c +++ b/arch/x86/kernel/ioport.c @@ -48,8 +48,14 @@ asmlinkage long sys_ioperm(unsigned long from, unsigned long num, int turn_on) t->io_bitmap_ptr = bitmap; set_thread_flag(TIF_IO_BITMAP); + /* + * Now that we have an IO bitmap, we need our TSS limit to be + * correct. It's fine if we are preempted after doing this: + * with TIF_IO_BITMAP set, context switches will keep our TSS + * limit correct. + */ preempt_disable(); - refresh_TR(); + refresh_tss_limit(); preempt_enable(); } diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 56b059486c3b..f67591561711 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -69,8 +69,8 @@ __visible DEFINE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss) = { }; EXPORT_PER_CPU_SYMBOL(cpu_tss); -DEFINE_PER_CPU(bool, need_tr_refresh); -EXPORT_PER_CPU_SYMBOL_GPL(need_tr_refresh); +DEFINE_PER_CPU(bool, __tss_limit_invalid); +EXPORT_PER_CPU_SYMBOL_GPL(__tss_limit_invalid); /* * this gets called so that we can store lazy state into memory and copy the @@ -222,7 +222,7 @@ void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, * Make sure that the TSS limit is correct for the CPU * to notice the IO bitmap. */ - refresh_TR(); + refresh_tss_limit(); } else if (test_tsk_thread_flag(prev_p, TIF_IO_BITMAP)) { /* * Clear any possible leftover bits: diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c index 06ce377dcbc9..026db42a86c3 100644 --- a/arch/x86/kvm/pmu.c +++ b/arch/x86/kvm/pmu.c @@ -113,12 +113,19 @@ static void pmc_reprogram_counter(struct kvm_pmc *pmc, u32 type, .config = config, }; + attr.sample_period = (-pmc->counter) & pmc_bitmask(pmc); + if (in_tx) attr.config |= HSW_IN_TX; - if (in_tx_cp) + if (in_tx_cp) { + /* + * HSW_IN_TX_CHECKPOINTED is not supported with nonzero + * period. Just clear the sample period so at least + * allocating the counter doesn't fail. + */ + attr.sample_period = 0; attr.config |= HSW_IN_TX_CHECKPOINTED; - - attr.sample_period = (-pmc->counter) & pmc_bitmask(pmc); + } event = perf_event_create_kernel_counter(&attr, -1, current, intr ? kvm_perf_overflow_intr : diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index ef4ba71dbb66..283aa8601833 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -2053,7 +2053,6 @@ static bool update_transition_efer(struct vcpu_vmx *vmx, int efer_offset) static unsigned long segment_base(u16 selector) { struct desc_ptr *gdt = this_cpu_ptr(&host_gdt); - struct desc_struct *d; struct desc_struct *table; unsigned long v; @@ -10642,6 +10641,11 @@ static int vmx_check_nested_events(struct kvm_vcpu *vcpu, bool external_intr) { struct vcpu_vmx *vmx = to_vmx(vcpu); + if (vcpu->arch.exception.pending || + vcpu->arch.nmi_injected || + vcpu->arch.interrupt.pending) + return -EBUSY; + if (nested_cpu_has_preemption_timer(get_vmcs12(vcpu)) && vmx->nested.preemption_timer_expired) { if (vmx->nested.nested_run_pending) @@ -10651,8 +10655,7 @@ static int vmx_check_nested_events(struct kvm_vcpu *vcpu, bool external_intr) } if (vcpu->arch.nmi_pending && nested_exit_on_nmi(vcpu)) { - if (vmx->nested.nested_run_pending || - vcpu->arch.interrupt.pending) + if (vmx->nested.nested_run_pending) return -EBUSY; nested_vmx_vmexit(vcpu, EXIT_REASON_EXCEPTION_NMI, NMI_VECTOR | INTR_TYPE_NMI_INTR | diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 8d69d5150748..2c14ad9809da 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -26,6 +26,7 @@ #include <linux/context_tracking.h> #include <linux/irqbypass.h> #include <linux/swait.h> +#include <linux/refcount.h> #include <asm/signal.h> #include <linux/kvm.h> @@ -401,7 +402,7 @@ struct kvm { #endif struct kvm_vm_stat stat; struct kvm_arch arch; - atomic_t users_count; + refcount_t users_count; #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET struct kvm_coalesced_mmio_ring *coalesced_mmio_ring; spinlock_t ring_lock; diff --git a/tools/testing/selftests/x86/Makefile b/tools/testing/selftests/x86/Makefile index 3a5ebae5303e..38e0a9ca5d71 100644 --- a/tools/testing/selftests/x86/Makefile +++ b/tools/testing/selftests/x86/Makefile @@ -5,7 +5,7 @@ include ../lib.mk .PHONY: all all_32 all_64 warn_32bit_failure clean TARGETS_C_BOTHBITS := single_step_syscall sysret_ss_attrs syscall_nt ptrace_syscall test_mremap_vdso \ - check_initial_reg_state sigreturn ldt_gdt iopl mpx-mini-test \ + check_initial_reg_state sigreturn ldt_gdt iopl mpx-mini-test ioperm \ protection_keys test_vdso TARGETS_C_32BIT_ONLY := entry_from_vm86 syscall_arg_fault test_syscall_vdso unwind_vdso \ test_FCMOV test_FCOMI test_FISTTP \ diff --git a/tools/testing/selftests/x86/ioperm.c b/tools/testing/selftests/x86/ioperm.c new file mode 100644 index 000000000000..b77313ba2ab1 --- /dev/null +++ b/tools/testing/selftests/x86/ioperm.c @@ -0,0 +1,170 @@ +/* + * ioperm.c - Test case for ioperm(2) + * Copyright (c) 2015 Andrew Lutomirski + */ + +#define _GNU_SOURCE +#include <err.h> +#include <stdio.h> +#include <stdint.h> +#include <signal.h> +#include <setjmp.h> +#include <stdlib.h> +#include <string.h> +#include <errno.h> +#include <unistd.h> +#include <sys/types.h> +#include <sys/wait.h> +#include <stdbool.h> +#include <sched.h> +#include <sys/io.h> + +static int nerrs = 0; + +static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *), + int flags) +{ + struct sigaction sa; + memset(&sa, 0, sizeof(sa)); + sa.sa_sigaction = handler; + sa.sa_flags = SA_SIGINFO | flags; + sigemptyset(&sa.sa_mask); + if (sigaction(sig, &sa, 0)) + err(1, "sigaction"); + +} + +static void clearhandler(int sig) +{ + struct sigaction sa; + memset(&sa, 0, sizeof(sa)); + sa.sa_handler = SIG_DFL; + sigemptyset(&sa.sa_mask); + if (sigaction(sig, &sa, 0)) + err(1, "sigaction"); +} + +static jmp_buf jmpbuf; + +static void sigsegv(int sig, siginfo_t *si, void *ctx_void) +{ + siglongjmp(jmpbuf, 1); +} + +static bool try_outb(unsigned short port) +{ + sethandler(SIGSEGV, sigsegv, SA_RESETHAND); + if (sigsetjmp(jmpbuf, 1) != 0) { + return false; + } else { + asm volatile ("outb %%al, %w[port]" + : : [port] "Nd" (port), "a" (0)); + return true; + } + clearhandler(SIGSEGV); +} + +static void expect_ok(unsigned short port) +{ + if (!try_outb(port)) { + printf("[FAIL]\toutb to 0x%02hx failed\n", port); + exit(1); + } + + printf("[OK]\toutb to 0x%02hx worked\n", port); +} + +static void expect_gp(unsigned short port) +{ + if (try_outb(port)) { + printf("[FAIL]\toutb to 0x%02hx worked\n", port); + exit(1); + } + + printf("[OK]\toutb to 0x%02hx failed\n", port); +} + +int main(void) +{ + cpu_set_t cpuset; + CPU_ZERO(&cpuset); + CPU_SET(0, &cpuset); + if (sched_setaffinity(0, sizeof(cpuset), &cpuset) != 0) + err(1, "sched_setaffinity to CPU 0"); + + expect_gp(0x80); + expect_gp(0xed); + + /* + * Probe for ioperm support. Note that clearing ioperm bits + * works even as nonroot. + */ + printf("[RUN]\tenable 0x80\n"); + if (ioperm(0x80, 1, 1) != 0) { + printf("[OK]\tioperm(0x80, 1, 1) failed (%d) -- try running as root\n", + errno); + return 0; + } + expect_ok(0x80); + expect_gp(0xed); + + printf("[RUN]\tdisable 0x80\n"); + if (ioperm(0x80, 1, 0) != 0) { + printf("[FAIL]\tioperm(0x80, 1, 0) failed (%d)", errno); + return 1; + } + expect_gp(0x80); + expect_gp(0xed); + + /* Make sure that fork() preserves ioperm. */ + if (ioperm(0x80, 1, 1) != 0) { + printf("[FAIL]\tioperm(0x80, 1, 0) failed (%d)", errno); + return 1; + } + + pid_t child = fork(); + if (child == -1) + err(1, "fork"); + + if (child == 0) { + printf("[RUN]\tchild: check that we inherited permissions\n"); + expect_ok(0x80); + expect_gp(0xed); + return 0; + } else { + int status; + if (waitpid(child, &status, 0) != child || + !WIFEXITED(status)) { + printf("[FAIL]\tChild died\n"); + nerrs++; + } else if (WEXITSTATUS(status) != 0) { + printf("[FAIL]\tChild failed\n"); + nerrs++; + } else { + printf("[OK]\tChild succeeded\n"); + } + } + + /* Test the capability checks. */ + + printf("\tDrop privileges\n"); + if (setresuid(1, 1, 1) != 0) { + printf("[WARN]\tDropping privileges failed\n"); + return 0; + } + + printf("[RUN]\tdisable 0x80\n"); + if (ioperm(0x80, 1, 0) != 0) { + printf("[FAIL]\tioperm(0x80, 1, 0) failed (%d)", errno); + return 1; + } + printf("[OK]\tit worked\n"); + + printf("[RUN]\tenable 0x80 again\n"); + if (ioperm(0x80, 1, 1) == 0) { + printf("[FAIL]\tit succeeded but should have failed.\n"); + return 1; + } + printf("[OK]\tit failed\n"); + return 0; +} diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 799499417f5b..a17d78759727 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -619,7 +619,7 @@ static struct kvm *kvm_create_vm(unsigned long type) mutex_init(&kvm->lock); mutex_init(&kvm->irq_lock); mutex_init(&kvm->slots_lock); - atomic_set(&kvm->users_count, 1); + refcount_set(&kvm->users_count, 1); INIT_LIST_HEAD(&kvm->devices); r = kvm_arch_init_vm(kvm, type); @@ -749,13 +749,13 @@ static void kvm_destroy_vm(struct kvm *kvm) void kvm_get_kvm(struct kvm *kvm) { - atomic_inc(&kvm->users_count); + refcount_inc(&kvm->users_count); } EXPORT_SYMBOL_GPL(kvm_get_kvm); void kvm_put_kvm(struct kvm *kvm) { - if (atomic_dec_and_test(&kvm->users_count)) + if (refcount_dec_and_test(&kvm->users_count)) kvm_destroy_vm(kvm); } EXPORT_SYMBOL_GPL(kvm_put_kvm); @@ -3641,7 +3641,7 @@ static int kvm_debugfs_open(struct inode *inode, struct file *file, * To avoid the race between open and the removal of the debugfs * directory we test against the users count. */ - if (!atomic_add_unless(&stat_data->kvm->users_count, 1, 0)) + if (!refcount_inc_not_zero(&stat_data->kvm->users_count)) return -ENOENT; if (simple_attr_open(inode, file, get, set, fmt)) { |