From 1ea252afcd4b264b71d9c3f55358ff5ba4c04f1b Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Thu, 8 Mar 2007 11:48:09 +0200 Subject: KVM: Fix bogus sign extension in mmu mapping audit When auditing a 32-bit guest on a 64-bit host, sign extension of the page table directory pointer table index caused bogus addresses to be shown on audit errors. Fix by declaring the index unsigned. Signed-off-by: Avi Kivity --- drivers/kvm/mmu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/kvm/mmu.c') diff --git a/drivers/kvm/mmu.c b/drivers/kvm/mmu.c index cab26f301eab..2d905770fd88 100644 --- a/drivers/kvm/mmu.c +++ b/drivers/kvm/mmu.c @@ -1360,7 +1360,7 @@ static void audit_mappings_page(struct kvm_vcpu *vcpu, u64 page_pte, static void audit_mappings(struct kvm_vcpu *vcpu) { - int i; + unsigned i; if (vcpu->mmu.root_level == 4) audit_mappings_page(vcpu, vcpu->mmu.root_hpa, 0, 4); -- cgit v1.2.3 From 039576c03c35e2f990ad9bb9c39e1bad3cd60d34 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Tue, 20 Mar 2007 12:46:50 +0200 Subject: KVM: Avoid guest virtual addresses in string pio userspace interface The current string pio interface communicates using guest virtual addresses, relying on userspace to translate addresses and to check permissions. This interface cannot fully support guest smp, as the check needs to take into account two pages at one in case an unaligned string transfer straddles a page boundary. Change the interface not to communicate guest addresses at all; instead use a buffer page (mmaped by userspace) and do transfers there. The kernel manages the virtual to physical translation and can perform the checks atomically by taking the appropriate locks. Signed-off-by: Avi Kivity --- drivers/kvm/kvm.h | 21 +++++- drivers/kvm/kvm_main.c | 183 +++++++++++++++++++++++++++++++++++++++++++++---- drivers/kvm/mmu.c | 9 +++ drivers/kvm/svm.c | 40 +++++------ drivers/kvm/vmx.c | 40 +++++------ include/linux/kvm.h | 11 +-- 6 files changed, 238 insertions(+), 66 deletions(-) (limited to 'drivers/kvm/mmu.c') diff --git a/drivers/kvm/kvm.h b/drivers/kvm/kvm.h index 1c4a581938bf..7866b34b6c96 100644 --- a/drivers/kvm/kvm.h +++ b/drivers/kvm/kvm.h @@ -74,6 +74,8 @@ #define IOPL_SHIFT 12 +#define KVM_PIO_PAGE_OFFSET 1 + /* * Address types: * @@ -220,6 +222,18 @@ enum { VCPU_SREG_LDTR, }; +struct kvm_pio_request { + unsigned long count; + int cur_count; + struct page *guest_pages[2]; + unsigned guest_page_offset; + int in; + int size; + int string; + int down; + int rep; +}; + struct kvm_vcpu { struct kvm *kvm; union { @@ -275,7 +289,8 @@ struct kvm_vcpu { int mmio_size; unsigned char mmio_data[8]; gpa_t mmio_phys_addr; - int pio_pending; + struct kvm_pio_request pio; + void *pio_data; int sigset_active; sigset_t sigset; @@ -421,6 +436,7 @@ hpa_t gpa_to_hpa(struct kvm_vcpu *vcpu, gpa_t gpa); #define HPA_ERR_MASK ((hpa_t)1 << HPA_MSB) static inline int is_error_hpa(hpa_t hpa) { return hpa >> HPA_MSB; } hpa_t gva_to_hpa(struct kvm_vcpu *vcpu, gva_t gva); +struct page *gva_to_page(struct kvm_vcpu *vcpu, gva_t gva); void kvm_emulator_want_group7_invlpg(void); @@ -453,6 +469,9 @@ void realmode_set_cr(struct kvm_vcpu *vcpu, int cr, unsigned long value, struct x86_emulate_ctxt; +int kvm_setup_pio(struct kvm_vcpu *vcpu, struct kvm_run *run, int in, + int size, unsigned long count, int string, int down, + gva_t address, int rep, unsigned port); void kvm_emulate_cpuid(struct kvm_vcpu *vcpu); int emulate_invlpg(struct kvm_vcpu *vcpu, gva_t address); int emulate_clts(struct kvm_vcpu *vcpu); diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c index ba7f43a4459e..205998c141fb 100644 --- a/drivers/kvm/kvm_main.c +++ b/drivers/kvm/kvm_main.c @@ -346,6 +346,17 @@ static void kvm_free_physmem(struct kvm *kvm) kvm_free_physmem_slot(&kvm->memslots[i], NULL); } +static void free_pio_guest_pages(struct kvm_vcpu *vcpu) +{ + int i; + + for (i = 0; i < 2; ++i) + if (vcpu->pio.guest_pages[i]) { + __free_page(vcpu->pio.guest_pages[i]); + vcpu->pio.guest_pages[i] = NULL; + } +} + static void kvm_free_vcpu(struct kvm_vcpu *vcpu) { if (!vcpu->vmcs) @@ -357,6 +368,9 @@ static void kvm_free_vcpu(struct kvm_vcpu *vcpu) kvm_arch_ops->vcpu_free(vcpu); free_page((unsigned long)vcpu->run); vcpu->run = NULL; + free_page((unsigned long)vcpu->pio_data); + vcpu->pio_data = NULL; + free_pio_guest_pages(vcpu); } static void kvm_free_vcpus(struct kvm *kvm) @@ -1550,44 +1564,168 @@ void kvm_emulate_cpuid(struct kvm_vcpu *vcpu) } EXPORT_SYMBOL_GPL(kvm_emulate_cpuid); -static void complete_pio(struct kvm_vcpu *vcpu) +static int pio_copy_data(struct kvm_vcpu *vcpu) { - struct kvm_io *io = &vcpu->run->io; + void *p = vcpu->pio_data; + void *q; + unsigned bytes; + int nr_pages = vcpu->pio.guest_pages[1] ? 2 : 1; + + kvm_arch_ops->vcpu_put(vcpu); + q = vmap(vcpu->pio.guest_pages, nr_pages, VM_READ|VM_WRITE, + PAGE_KERNEL); + if (!q) { + kvm_arch_ops->vcpu_load(vcpu); + free_pio_guest_pages(vcpu); + return -ENOMEM; + } + q += vcpu->pio.guest_page_offset; + bytes = vcpu->pio.size * vcpu->pio.cur_count; + if (vcpu->pio.in) + memcpy(q, p, bytes); + else + memcpy(p, q, bytes); + q -= vcpu->pio.guest_page_offset; + vunmap(q); + kvm_arch_ops->vcpu_load(vcpu); + free_pio_guest_pages(vcpu); + return 0; +} + +static int complete_pio(struct kvm_vcpu *vcpu) +{ + struct kvm_pio_request *io = &vcpu->pio; long delta; + int r; kvm_arch_ops->cache_regs(vcpu); if (!io->string) { - if (io->direction == KVM_EXIT_IO_IN) - memcpy(&vcpu->regs[VCPU_REGS_RAX], &io->value, + if (io->in) + memcpy(&vcpu->regs[VCPU_REGS_RAX], vcpu->pio_data, io->size); } else { + if (io->in) { + r = pio_copy_data(vcpu); + if (r) { + kvm_arch_ops->cache_regs(vcpu); + return r; + } + } + delta = 1; if (io->rep) { - delta *= io->count; + delta *= io->cur_count; /* * The size of the register should really depend on * current address size. */ vcpu->regs[VCPU_REGS_RCX] -= delta; } - if (io->string_down) + if (io->down) delta = -delta; delta *= io->size; - if (io->direction == KVM_EXIT_IO_IN) + if (io->in) vcpu->regs[VCPU_REGS_RDI] += delta; else vcpu->regs[VCPU_REGS_RSI] += delta; } - vcpu->pio_pending = 0; vcpu->run->io_completed = 0; kvm_arch_ops->decache_regs(vcpu); - kvm_arch_ops->skip_emulated_instruction(vcpu); + io->count -= io->cur_count; + io->cur_count = 0; + + if (!io->count) + kvm_arch_ops->skip_emulated_instruction(vcpu); + return 0; } +int kvm_setup_pio(struct kvm_vcpu *vcpu, struct kvm_run *run, int in, + int size, unsigned long count, int string, int down, + gva_t address, int rep, unsigned port) +{ + unsigned now, in_page; + int i; + int nr_pages = 1; + struct page *page; + + vcpu->run->exit_reason = KVM_EXIT_IO; + vcpu->run->io.direction = in ? KVM_EXIT_IO_IN : KVM_EXIT_IO_OUT; + vcpu->run->io.size = size; + vcpu->run->io.data_offset = KVM_PIO_PAGE_OFFSET * PAGE_SIZE; + vcpu->run->io.count = count; + vcpu->run->io.port = port; + vcpu->pio.count = count; + vcpu->pio.cur_count = count; + vcpu->pio.size = size; + vcpu->pio.in = in; + vcpu->pio.string = string; + vcpu->pio.down = down; + vcpu->pio.guest_page_offset = offset_in_page(address); + vcpu->pio.rep = rep; + + if (!string) { + kvm_arch_ops->cache_regs(vcpu); + memcpy(vcpu->pio_data, &vcpu->regs[VCPU_REGS_RAX], 4); + kvm_arch_ops->decache_regs(vcpu); + return 0; + } + + if (!count) { + kvm_arch_ops->skip_emulated_instruction(vcpu); + return 1; + } + + now = min(count, PAGE_SIZE / size); + + if (!down) + in_page = PAGE_SIZE - offset_in_page(address); + else + in_page = offset_in_page(address) + size; + now = min(count, (unsigned long)in_page / size); + if (!now) { + /* + * String I/O straddles page boundary. Pin two guest pages + * so that we satisfy atomicity constraints. Do just one + * transaction to avoid complexity. + */ + nr_pages = 2; + now = 1; + } + if (down) { + /* + * String I/O in reverse. Yuck. Kill the guest, fix later. + */ + printk(KERN_ERR "kvm: guest string pio down\n"); + inject_gp(vcpu); + return 1; + } + vcpu->run->io.count = now; + vcpu->pio.cur_count = now; + + for (i = 0; i < nr_pages; ++i) { + spin_lock(&vcpu->kvm->lock); + page = gva_to_page(vcpu, address + i * PAGE_SIZE); + if (page) + get_page(page); + vcpu->pio.guest_pages[i] = page; + spin_unlock(&vcpu->kvm->lock); + if (!page) { + inject_gp(vcpu); + free_pio_guest_pages(vcpu); + return 1; + } + } + + if (!vcpu->pio.in) + return pio_copy_data(vcpu); + return 0; +} +EXPORT_SYMBOL_GPL(kvm_setup_pio); + static int kvm_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) { int r; @@ -1602,9 +1740,11 @@ static int kvm_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) vcpu->cr8 = kvm_run->cr8; if (kvm_run->io_completed) { - if (vcpu->pio_pending) - complete_pio(vcpu); - else { + if (vcpu->pio.cur_count) { + r = complete_pio(vcpu); + if (r) + goto out; + } else { memcpy(vcpu->mmio_data, kvm_run->mmio.data, 8); vcpu->mmio_read_completed = 1; } @@ -1620,6 +1760,7 @@ static int kvm_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) r = kvm_arch_ops->run(vcpu, kvm_run); +out: if (vcpu->sigset_active) sigprocmask(SIG_SETMASK, &sigsaved, NULL); @@ -1995,9 +2136,12 @@ static struct page *kvm_vcpu_nopage(struct vm_area_struct *vma, *type = VM_FAULT_MINOR; pgoff = ((address - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff; - if (pgoff != 0) + if (pgoff == 0) + page = virt_to_page(vcpu->run); + else if (pgoff == KVM_PIO_PAGE_OFFSET) + page = virt_to_page(vcpu->pio_data); + else return NOPAGE_SIGBUS; - page = virt_to_page(vcpu->run); get_page(page); return page; } @@ -2094,6 +2238,12 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, int n) goto out_unlock; vcpu->run = page_address(page); + page = alloc_page(GFP_KERNEL | __GFP_ZERO); + r = -ENOMEM; + if (!page) + goto out_free_run; + vcpu->pio_data = page_address(page); + vcpu->host_fx_image = (char*)ALIGN((hva_t)vcpu->fx_buf, FX_IMAGE_ALIGN); vcpu->guest_fx_image = vcpu->host_fx_image + FX_IMAGE_SIZE; @@ -2123,6 +2273,9 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, int n) out_free_vcpus: kvm_free_vcpu(vcpu); +out_free_run: + free_page((unsigned long)vcpu->run); + vcpu->run = NULL; out_unlock: mutex_unlock(&vcpu->mutex); out: @@ -2491,7 +2644,7 @@ static long kvm_dev_ioctl(struct file *filp, r = -EINVAL; if (arg) goto out; - r = PAGE_SIZE; + r = 2 * PAGE_SIZE; break; default: ; diff --git a/drivers/kvm/mmu.c b/drivers/kvm/mmu.c index 2d905770fd88..4843e95e54e1 100644 --- a/drivers/kvm/mmu.c +++ b/drivers/kvm/mmu.c @@ -735,6 +735,15 @@ hpa_t gva_to_hpa(struct kvm_vcpu *vcpu, gva_t gva) return gpa_to_hpa(vcpu, gpa); } +struct page *gva_to_page(struct kvm_vcpu *vcpu, gva_t gva) +{ + gpa_t gpa = vcpu->mmu.gva_to_gpa(vcpu, gva); + + if (gpa == UNMAPPED_GVA) + return NULL; + return pfn_to_page(gpa_to_hpa(vcpu, gpa) >> PAGE_SHIFT); +} + static void nonpaging_new_cr3(struct kvm_vcpu *vcpu) { } diff --git a/drivers/kvm/svm.c b/drivers/kvm/svm.c index 2396ada23777..64afc5cf890d 100644 --- a/drivers/kvm/svm.c +++ b/drivers/kvm/svm.c @@ -984,7 +984,7 @@ static int io_get_override(struct kvm_vcpu *vcpu, return 0; } -static unsigned long io_adress(struct kvm_vcpu *vcpu, int ins, u64 *address) +static unsigned long io_adress(struct kvm_vcpu *vcpu, int ins, gva_t *address) { unsigned long addr_mask; unsigned long *reg; @@ -1028,40 +1028,38 @@ static unsigned long io_adress(struct kvm_vcpu *vcpu, int ins, u64 *address) static int io_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) { u32 io_info = vcpu->svm->vmcb->control.exit_info_1; //address size bug? - int _in = io_info & SVM_IOIO_TYPE_MASK; + int size, down, in, string, rep; + unsigned port; + unsigned long count; + gva_t address = 0; ++kvm_stat.io_exits; vcpu->svm->next_rip = vcpu->svm->vmcb->control.exit_info_2; - kvm_run->exit_reason = KVM_EXIT_IO; - kvm_run->io.port = io_info >> 16; - kvm_run->io.direction = (_in) ? KVM_EXIT_IO_IN : KVM_EXIT_IO_OUT; - kvm_run->io.size = ((io_info & SVM_IOIO_SIZE_MASK) >> SVM_IOIO_SIZE_SHIFT); - kvm_run->io.string = (io_info & SVM_IOIO_STR_MASK) != 0; - kvm_run->io.rep = (io_info & SVM_IOIO_REP_MASK) != 0; - kvm_run->io.count = 1; + in = (io_info & SVM_IOIO_TYPE_MASK) != 0; + port = io_info >> 16; + size = (io_info & SVM_IOIO_SIZE_MASK) >> SVM_IOIO_SIZE_SHIFT; + string = (io_info & SVM_IOIO_STR_MASK) != 0; + rep = (io_info & SVM_IOIO_REP_MASK) != 0; + count = 1; + down = (vcpu->svm->vmcb->save.rflags & X86_EFLAGS_DF) != 0; - if (kvm_run->io.string) { + if (string) { unsigned addr_mask; - addr_mask = io_adress(vcpu, _in, &kvm_run->io.address); + addr_mask = io_adress(vcpu, in, &address); if (!addr_mask) { printk(KERN_DEBUG "%s: get io address failed\n", __FUNCTION__); return 1; } - if (kvm_run->io.rep) { - kvm_run->io.count - = vcpu->regs[VCPU_REGS_RCX] & addr_mask; - kvm_run->io.string_down = (vcpu->svm->vmcb->save.rflags - & X86_EFLAGS_DF) != 0; - } - } else - kvm_run->io.value = vcpu->svm->vmcb->save.rax; - vcpu->pio_pending = 1; - return 0; + if (rep) + count = vcpu->regs[VCPU_REGS_RCX] & addr_mask; + } + return kvm_setup_pio(vcpu, kvm_run, in, size, count, string, down, + address, rep, port); } static int nop_on_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) diff --git a/drivers/kvm/vmx.c b/drivers/kvm/vmx.c index e69bab6d811d..0d9bf0b36d37 100644 --- a/drivers/kvm/vmx.c +++ b/drivers/kvm/vmx.c @@ -1394,7 +1394,7 @@ static int handle_triple_fault(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) return 0; } -static int get_io_count(struct kvm_vcpu *vcpu, u64 *count) +static int get_io_count(struct kvm_vcpu *vcpu, unsigned long *count) { u64 inst; gva_t rip; @@ -1439,35 +1439,35 @@ static int get_io_count(struct kvm_vcpu *vcpu, u64 *count) done: countr_size *= 8; *count = vcpu->regs[VCPU_REGS_RCX] & (~0ULL >> (64 - countr_size)); + //printk("cx: %lx\n", vcpu->regs[VCPU_REGS_RCX]); return 1; } static int handle_io(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) { u64 exit_qualification; + int size, down, in, string, rep; + unsigned port; + unsigned long count; + gva_t address; ++kvm_stat.io_exits; exit_qualification = vmcs_read64(EXIT_QUALIFICATION); - kvm_run->exit_reason = KVM_EXIT_IO; - if (exit_qualification & 8) - kvm_run->io.direction = KVM_EXIT_IO_IN; - else - kvm_run->io.direction = KVM_EXIT_IO_OUT; - kvm_run->io.size = (exit_qualification & 7) + 1; - kvm_run->io.string = (exit_qualification & 16) != 0; - kvm_run->io.string_down - = (vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_DF) != 0; - kvm_run->io.rep = (exit_qualification & 32) != 0; - kvm_run->io.port = exit_qualification >> 16; - kvm_run->io.count = 1; - if (kvm_run->io.string) { - if (!get_io_count(vcpu, &kvm_run->io.count)) + in = (exit_qualification & 8) != 0; + size = (exit_qualification & 7) + 1; + string = (exit_qualification & 16) != 0; + down = (vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_DF) != 0; + count = 1; + rep = (exit_qualification & 32) != 0; + port = exit_qualification >> 16; + address = 0; + if (string) { + if (rep && !get_io_count(vcpu, &count)) return 1; - kvm_run->io.address = vmcs_readl(GUEST_LINEAR_ADDRESS); - } else - kvm_run->io.value = vcpu->regs[VCPU_REGS_RAX]; /* rax */ - vcpu->pio_pending = 1; - return 0; + address = vmcs_readl(GUEST_LINEAR_ADDRESS); + } + return kvm_setup_pio(vcpu, kvm_run, in, size, count, string, down, + address, rep, port); } static void diff --git a/include/linux/kvm.h b/include/linux/kvm.h index dad90816cad8..728b24cf5d77 100644 --- a/include/linux/kvm.h +++ b/include/linux/kvm.h @@ -86,16 +86,9 @@ struct kvm_run { #define KVM_EXIT_IO_OUT 1 __u8 direction; __u8 size; /* bytes */ - __u8 string; - __u8 string_down; - __u8 rep; - __u8 pad; __u16 port; - __u64 count; - union { - __u64 address; - __u32 value; - }; + __u32 count; + __u64 data_offset; /* relative to kvm_run start */ } io; struct { } debug; -- cgit v1.2.3 From aac012245a59d78372dc66d292ba567367d86b60 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Tue, 20 Mar 2007 14:34:28 +0200 Subject: KVM: MMU: Remove global pte tracking The initial, noncaching, version of the kvm mmu flushed the all nonglobal shadow page table translations (much like a native tlb flush). The new implementation flushes translations only when they change, rendering global pte tracking superfluous. This removes the unused tracking mechanism and storage space. Signed-off-by: Avi Kivity --- drivers/kvm/kvm.h | 1 - drivers/kvm/mmu.c | 9 --------- 2 files changed, 10 deletions(-) (limited to 'drivers/kvm/mmu.c') diff --git a/drivers/kvm/kvm.h b/drivers/kvm/kvm.h index 7866b34b6c96..a4331da816d0 100644 --- a/drivers/kvm/kvm.h +++ b/drivers/kvm/kvm.h @@ -136,7 +136,6 @@ struct kvm_mmu_page { unsigned long slot_bitmap; /* One bit set per slot which has memory * in this shadow page. */ - int global; /* Set if all ptes in this page are global */ int multimapped; /* More than one parent_pte? */ int root_count; /* Currently serving as active root */ union { diff --git a/drivers/kvm/mmu.c b/drivers/kvm/mmu.c index 4843e95e54e1..2930d7cc7c06 100644 --- a/drivers/kvm/mmu.c +++ b/drivers/kvm/mmu.c @@ -461,7 +461,6 @@ static struct kvm_mmu_page *kvm_mmu_alloc_page(struct kvm_vcpu *vcpu, list_add(&page->link, &vcpu->kvm->active_mmu_pages); ASSERT(is_empty_shadow_page(page->page_hpa)); page->slot_bitmap = 0; - page->global = 1; page->multimapped = 0; page->parent_pte = parent_pte; --vcpu->kvm->n_free_mmu_pages; @@ -927,11 +926,6 @@ static void paging_new_cr3(struct kvm_vcpu *vcpu) kvm_arch_ops->set_cr3(vcpu, vcpu->mmu.root_hpa); } -static void mark_pagetable_nonglobal(void *shadow_pte) -{ - page_header(__pa(shadow_pte))->global = 0; -} - static inline void set_pte_common(struct kvm_vcpu *vcpu, u64 *shadow_pte, gpa_t gaddr, @@ -949,9 +943,6 @@ static inline void set_pte_common(struct kvm_vcpu *vcpu, *shadow_pte |= access_bits; - if (!(*shadow_pte & PT_GLOBAL_MASK)) - mark_pagetable_nonglobal(shadow_pte); - if (is_error_hpa(paddr)) { *shadow_pte |= gaddr; *shadow_pte |= PT_SHADOW_IO_MARK; -- cgit v1.2.3 From d28c6cfbbc5e2d4fccfe6d733995ed5971ca87f6 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Fri, 23 Mar 2007 09:55:25 +0200 Subject: KVM: MMU: Fix hugepage pdes mapping same physical address with different access The kvm mmu keeps a shadow page for hugepage pdes; if several such pdes map the same physical address, they share the same shadow page. This is a fairly common case (kernel mappings on i386 nonpae Linux, for example). However, if the two pdes map the same memory but with different permissions, kvm will happily use the cached shadow page. If the access through the more permissive pde will occur after the access to the strict pde, an endless pagefault loop will be generated and the guest will make no progress. Fix by making the access permissions part of the cache lookup key. The fix allows Xen pae to boot on kvm and run guest domains. Thanks to Jeremy Fitzhardinge for reporting the bug and testing the fix. Signed-off-by: Avi Kivity --- drivers/kvm/kvm.h | 2 ++ drivers/kvm/mmu.c | 8 +++++--- drivers/kvm/paging_tmpl.h | 7 ++++++- 3 files changed, 13 insertions(+), 4 deletions(-) (limited to 'drivers/kvm/mmu.c') diff --git a/drivers/kvm/kvm.h b/drivers/kvm/kvm.h index 7361c45d70c9..f5e343cb06b0 100644 --- a/drivers/kvm/kvm.h +++ b/drivers/kvm/kvm.h @@ -109,6 +109,7 @@ struct kvm_pte_chain { * bits 4:7 - page table level for this shadow (1-4) * bits 8:9 - page table quadrant for 2-level guests * bit 16 - "metaphysical" - gfn is not a real page (huge page/real mode) + * bits 17:18 - "access" - the user and writable bits of a huge page pde */ union kvm_mmu_page_role { unsigned word; @@ -118,6 +119,7 @@ union kvm_mmu_page_role { unsigned quadrant : 2; unsigned pad_for_nice_hex_output : 6; unsigned metaphysical : 1; + unsigned hugepage_access : 2; }; }; diff --git a/drivers/kvm/mmu.c b/drivers/kvm/mmu.c index 2930d7cc7c06..c738fb1cea30 100644 --- a/drivers/kvm/mmu.c +++ b/drivers/kvm/mmu.c @@ -568,6 +568,7 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu, gva_t gaddr, unsigned level, int metaphysical, + unsigned hugepage_access, u64 *parent_pte) { union kvm_mmu_page_role role; @@ -581,6 +582,7 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu, role.glevels = vcpu->mmu.root_level; role.level = level; role.metaphysical = metaphysical; + role.hugepage_access = hugepage_access; if (vcpu->mmu.root_level <= PT32_ROOT_LEVEL) { quadrant = gaddr >> (PAGE_SHIFT + (PT64_PT_BITS * level)); quadrant &= (1 << ((PT32_PT_BITS - PT64_PT_BITS) * level)) - 1; @@ -780,7 +782,7 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, hpa_t p) >> PAGE_SHIFT; new_table = kvm_mmu_get_page(vcpu, pseudo_gfn, v, level - 1, - 1, &table[index]); + 1, 0, &table[index]); if (!new_table) { pgprintk("nonpaging_map: ENOMEM\n"); return -ENOMEM; @@ -835,7 +837,7 @@ static void mmu_alloc_roots(struct kvm_vcpu *vcpu) ASSERT(!VALID_PAGE(root)); page = kvm_mmu_get_page(vcpu, root_gfn, 0, - PT64_ROOT_LEVEL, 0, NULL); + PT64_ROOT_LEVEL, 0, 0, NULL); root = page->page_hpa; ++page->root_count; vcpu->mmu.root_hpa = root; @@ -852,7 +854,7 @@ static void mmu_alloc_roots(struct kvm_vcpu *vcpu) root_gfn = 0; page = kvm_mmu_get_page(vcpu, root_gfn, i << 30, PT32_ROOT_LEVEL, !is_paging(vcpu), - NULL); + 0, NULL); root = page->page_hpa; ++page->root_count; vcpu->mmu.pae_root[i] = root | PT_PRESENT_MASK; diff --git a/drivers/kvm/paging_tmpl.h b/drivers/kvm/paging_tmpl.h index 17bd4400c92b..b94010dad809 100644 --- a/drivers/kvm/paging_tmpl.h +++ b/drivers/kvm/paging_tmpl.h @@ -247,6 +247,7 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, u64 shadow_pte; int metaphysical; gfn_t table_gfn; + unsigned hugepage_access = 0; if (is_present_pte(*shadow_ent) || is_io_pte(*shadow_ent)) { if (level == PT_PAGE_TABLE_LEVEL) @@ -276,6 +277,9 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, if (level - 1 == PT_PAGE_TABLE_LEVEL && walker->level == PT_DIRECTORY_LEVEL) { metaphysical = 1; + hugepage_access = *guest_ent; + hugepage_access &= PT_USER_MASK | PT_WRITABLE_MASK; + hugepage_access >>= PT_WRITABLE_SHIFT; table_gfn = (*guest_ent & PT_BASE_ADDR_MASK) >> PAGE_SHIFT; } else { @@ -283,7 +287,8 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, table_gfn = walker->table_gfn[level - 2]; } shadow_page = kvm_mmu_get_page(vcpu, table_gfn, addr, level-1, - metaphysical, shadow_ent); + metaphysical, hugepage_access, + shadow_ent); shadow_addr = shadow_page->page_hpa; shadow_pte = shadow_addr | PT_PRESENT_MASK | PT_ACCESSED_MASK | PT_WRITABLE_MASK | PT_USER_MASK; -- cgit v1.2.3 From 36868f7b0efd0b6a1d45fe3b40a6c4bc63222659 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Mon, 26 Mar 2007 19:31:52 +0200 Subject: KVM: Use list_move() Use list_move() where possible. Noticed by Dor Laor. Signed-off-by: Avi Kivity --- drivers/kvm/mmu.c | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) (limited to 'drivers/kvm/mmu.c') diff --git a/drivers/kvm/mmu.c b/drivers/kvm/mmu.c index c738fb1cea30..d81b9cd3465f 100644 --- a/drivers/kvm/mmu.c +++ b/drivers/kvm/mmu.c @@ -437,9 +437,8 @@ static void kvm_mmu_free_page(struct kvm_vcpu *vcpu, hpa_t page_hpa) struct kvm_mmu_page *page_head = page_header(page_hpa); ASSERT(is_empty_shadow_page(page_hpa)); - list_del(&page_head->link); page_head->page_hpa = page_hpa; - list_add(&page_head->link, &vcpu->free_pages); + list_move(&page_head->link, &vcpu->free_pages); ++vcpu->kvm->n_free_mmu_pages; } @@ -457,8 +456,7 @@ static struct kvm_mmu_page *kvm_mmu_alloc_page(struct kvm_vcpu *vcpu, return NULL; page = list_entry(vcpu->free_pages.next, struct kvm_mmu_page, link); - list_del(&page->link); - list_add(&page->link, &vcpu->kvm->active_mmu_pages); + list_move(&page->link, &vcpu->kvm->active_mmu_pages); ASSERT(is_empty_shadow_page(page->page_hpa)); page->slot_bitmap = 0; page->multimapped = 0; @@ -670,10 +668,8 @@ static void kvm_mmu_zap_page(struct kvm_vcpu *vcpu, if (!page->root_count) { hlist_del(&page->hash_link); kvm_mmu_free_page(vcpu, page->page_hpa); - } else { - list_del(&page->link); - list_add(&page->link, &vcpu->kvm->active_mmu_pages); - } + } else + list_move(&page->link, &vcpu->kvm->active_mmu_pages); } static int kvm_mmu_unprotect_page(struct kvm_vcpu *vcpu, gfn_t gfn) -- cgit v1.2.3 From e0fa826f969c262c23908953bf85add487cc2e6c Mon Sep 17 00:00:00 2001 From: Dor Laor Date: Fri, 30 Mar 2007 13:06:33 +0300 Subject: KVM: Add mmu cache clear function Functions that play around with the physical memory map need a way to clear mappings to possibly nonexistent or invalid memory. Both the mmu cache and the processor tlb are cleared. Signed-off-by: Dor Laor Signed-off-by: Avi Kivity --- drivers/kvm/kvm.h | 1 + drivers/kvm/mmu.c | 17 +++++++++++++++++ 2 files changed, 18 insertions(+) (limited to 'drivers/kvm/mmu.c') diff --git a/drivers/kvm/kvm.h b/drivers/kvm/kvm.h index 6d0bd7aab92e..59357bea5b61 100644 --- a/drivers/kvm/kvm.h +++ b/drivers/kvm/kvm.h @@ -430,6 +430,7 @@ int kvm_mmu_setup(struct kvm_vcpu *vcpu); int kvm_mmu_reset_context(struct kvm_vcpu *vcpu); void kvm_mmu_slot_remove_write_access(struct kvm_vcpu *vcpu, int slot); +void kvm_mmu_zap_all(struct kvm_vcpu *vcpu); hpa_t gpa_to_hpa(struct kvm_vcpu *vcpu, gpa_t gpa); #define HPA_MSB ((sizeof(hpa_t) * 8) - 1) diff --git a/drivers/kvm/mmu.c b/drivers/kvm/mmu.c index d81b9cd3465f..376800a33968 100644 --- a/drivers/kvm/mmu.c +++ b/drivers/kvm/mmu.c @@ -1314,6 +1314,23 @@ void kvm_mmu_slot_remove_write_access(struct kvm_vcpu *vcpu, int slot) } } +void kvm_mmu_zap_all(struct kvm_vcpu *vcpu) +{ + destroy_kvm_mmu(vcpu); + + while (!list_empty(&vcpu->kvm->active_mmu_pages)) { + struct kvm_mmu_page *page; + + page = container_of(vcpu->kvm->active_mmu_pages.next, + struct kvm_mmu_page, link); + kvm_mmu_zap_page(vcpu, page); + } + + mmu_free_memory_caches(vcpu); + kvm_arch_ops->tlb_flush(vcpu); + init_kvm_mmu(vcpu); +} + #ifdef AUDIT static const char *audit_msg; -- cgit v1.2.3 From 954bbbc236afe23b368abdf4942f313a5f6e1d50 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Fri, 30 Mar 2007 14:02:32 +0300 Subject: KVM: Simply gfn_to_page() Mapping a guest page to a host page is a common operation. Currently, one has first to find the memory slot where the page belongs (gfn_to_memslot), then locate the page itself (gfn_to_page()). This is clumsy, and also won't work well with memory aliases. So simplify gfn_to_page() not to require memory slot translation first, and instead do it internally. Signed-off-by: Avi Kivity --- drivers/kvm/kvm.h | 12 +----------- drivers/kvm/kvm_main.c | 45 +++++++++++++++++++++++++-------------------- drivers/kvm/mmu.c | 12 ++++-------- drivers/kvm/vmx.c | 6 +++--- 4 files changed, 33 insertions(+), 42 deletions(-) (limited to 'drivers/kvm/mmu.c') diff --git a/drivers/kvm/kvm.h b/drivers/kvm/kvm.h index 59357bea5b61..d19985a5508a 100644 --- a/drivers/kvm/kvm.h +++ b/drivers/kvm/kvm.h @@ -443,11 +443,7 @@ void kvm_emulator_want_group7_invlpg(void); extern hpa_t bad_page_address; -static inline struct page *gfn_to_page(struct kvm_memory_slot *slot, gfn_t gfn) -{ - return slot->phys_mem[gfn - slot->base_gfn]; -} - +struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn); struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn); void mark_page_dirty(struct kvm *kvm, gfn_t gfn); @@ -523,12 +519,6 @@ static inline int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t gva, return vcpu->mmu.page_fault(vcpu, gva, error_code); } -static inline struct page *_gfn_to_page(struct kvm *kvm, gfn_t gfn) -{ - struct kvm_memory_slot *slot = gfn_to_memslot(kvm, gfn); - return (slot) ? slot->phys_mem[gfn - slot->base_gfn] : NULL; -} - static inline int is_long_mode(struct kvm_vcpu *vcpu) { #ifdef CONFIG_X86_64 diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c index 33eade7e237c..a0ec5dda3305 100644 --- a/drivers/kvm/kvm_main.c +++ b/drivers/kvm/kvm_main.c @@ -420,12 +420,12 @@ static int load_pdptrs(struct kvm_vcpu *vcpu, unsigned long cr3) u64 pdpte; u64 *pdpt; int ret; - struct kvm_memory_slot *memslot; + struct page *page; spin_lock(&vcpu->kvm->lock); - memslot = gfn_to_memslot(vcpu->kvm, pdpt_gfn); - /* FIXME: !memslot - emulate? 0xff? */ - pdpt = kmap_atomic(gfn_to_page(memslot, pdpt_gfn), KM_USER0); + page = gfn_to_page(vcpu->kvm, pdpt_gfn); + /* FIXME: !page - emulate? 0xff? */ + pdpt = kmap_atomic(page, KM_USER0); ret = 1; for (i = 0; i < 4; ++i) { @@ -861,6 +861,17 @@ struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn) } EXPORT_SYMBOL_GPL(gfn_to_memslot); +struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn) +{ + struct kvm_memory_slot *slot; + + slot = gfn_to_memslot(kvm, gfn); + if (!slot) + return NULL; + return slot->phys_mem[gfn - slot->base_gfn]; +} +EXPORT_SYMBOL_GPL(gfn_to_page); + void mark_page_dirty(struct kvm *kvm, gfn_t gfn) { int i; @@ -899,20 +910,20 @@ static int emulator_read_std(unsigned long addr, unsigned offset = addr & (PAGE_SIZE-1); unsigned tocopy = min(bytes, (unsigned)PAGE_SIZE - offset); unsigned long pfn; - struct kvm_memory_slot *memslot; - void *page; + struct page *page; + void *page_virt; if (gpa == UNMAPPED_GVA) return X86EMUL_PROPAGATE_FAULT; pfn = gpa >> PAGE_SHIFT; - memslot = gfn_to_memslot(vcpu->kvm, pfn); - if (!memslot) + page = gfn_to_page(vcpu->kvm, pfn); + if (!page) return X86EMUL_UNHANDLEABLE; - page = kmap_atomic(gfn_to_page(memslot, pfn), KM_USER0); + page_virt = kmap_atomic(page, KM_USER0); - memcpy(data, page + offset, tocopy); + memcpy(data, page_virt + offset, tocopy); - kunmap_atomic(page, KM_USER0); + kunmap_atomic(page_virt, KM_USER0); bytes -= tocopy; data += tocopy; @@ -963,16 +974,14 @@ static int emulator_read_emulated(unsigned long addr, static int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa, unsigned long val, int bytes) { - struct kvm_memory_slot *m; struct page *page; void *virt; if (((gpa + bytes - 1) >> PAGE_SHIFT) != (gpa >> PAGE_SHIFT)) return 0; - m = gfn_to_memslot(vcpu->kvm, gpa >> PAGE_SHIFT); - if (!m) + page = gfn_to_page(vcpu->kvm, gpa >> PAGE_SHIFT); + if (!page) return 0; - page = gfn_to_page(m, gpa >> PAGE_SHIFT); kvm_mmu_pre_write(vcpu, gpa, bytes); mark_page_dirty(vcpu->kvm, gpa >> PAGE_SHIFT); virt = kmap_atomic(page, KM_USER0); @@ -2516,15 +2525,11 @@ static struct page *kvm_vm_nopage(struct vm_area_struct *vma, { struct kvm *kvm = vma->vm_file->private_data; unsigned long pgoff; - struct kvm_memory_slot *slot; struct page *page; *type = VM_FAULT_MINOR; pgoff = ((address - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff; - slot = gfn_to_memslot(kvm, pgoff); - if (!slot) - return NOPAGE_SIGBUS; - page = gfn_to_page(slot, pgoff); + page = gfn_to_page(kvm, pgoff); if (!page) return NOPAGE_SIGBUS; get_page(page); diff --git a/drivers/kvm/mmu.c b/drivers/kvm/mmu.c index 376800a33968..8bdb9ca1811c 100644 --- a/drivers/kvm/mmu.c +++ b/drivers/kvm/mmu.c @@ -390,13 +390,11 @@ static void rmap_write_protect(struct kvm_vcpu *vcpu, u64 gfn) { struct kvm *kvm = vcpu->kvm; struct page *page; - struct kvm_memory_slot *slot; struct kvm_rmap_desc *desc; u64 *spte; - slot = gfn_to_memslot(kvm, gfn); - BUG_ON(!slot); - page = gfn_to_page(slot, gfn); + page = gfn_to_page(kvm, gfn); + BUG_ON(!page); while (page_private(page)) { if (!(page_private(page) & 1)) @@ -711,14 +709,12 @@ hpa_t safe_gpa_to_hpa(struct kvm_vcpu *vcpu, gpa_t gpa) hpa_t gpa_to_hpa(struct kvm_vcpu *vcpu, gpa_t gpa) { - struct kvm_memory_slot *slot; struct page *page; ASSERT((gpa & HPA_ERR_MASK) == 0); - slot = gfn_to_memslot(vcpu->kvm, gpa >> PAGE_SHIFT); - if (!slot) + page = gfn_to_page(vcpu->kvm, gpa >> PAGE_SHIFT); + if (!page) return gpa | HPA_ERR_MASK; - page = gfn_to_page(slot, gpa >> PAGE_SHIFT); return ((hpa_t)page_to_pfn(page) << PAGE_SHIFT) | (gpa & (PAGE_SIZE-1)); } diff --git a/drivers/kvm/vmx.c b/drivers/kvm/vmx.c index b64b7b792e84..61a611691e50 100644 --- a/drivers/kvm/vmx.c +++ b/drivers/kvm/vmx.c @@ -926,9 +926,9 @@ static int init_rmode_tss(struct kvm* kvm) gfn_t fn = rmode_tss_base(kvm) >> PAGE_SHIFT; char *page; - p1 = _gfn_to_page(kvm, fn++); - p2 = _gfn_to_page(kvm, fn++); - p3 = _gfn_to_page(kvm, fn); + p1 = gfn_to_page(kvm, fn++); + p2 = gfn_to_page(kvm, fn++); + p3 = gfn_to_page(kvm, fn); if (!p1 || !p2 || !p3) { kvm_printf(kvm,"%s: gfn_to_page failed\n", __FUNCTION__); -- cgit v1.2.3 From 417726a3fbecb2092f1054bbaee87bc442b05ef3 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Thu, 12 Apr 2007 17:35:58 +0300 Subject: KVM: Handle partial pae pdptr Some guests (Solaris) do not set up all four pdptrs, but leave some invalid. kvm incorrectly treated these as valid page directories, pinning the wrong pages and causing general confusion. Fix by checking the valid bit of a pae pdpte. This closes sourceforge bug 1698922. Signed-off-by: Avi Kivity --- drivers/kvm/mmu.c | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) (limited to 'drivers/kvm/mmu.c') diff --git a/drivers/kvm/mmu.c b/drivers/kvm/mmu.c index 8bdb9ca1811c..9ff74805c7d1 100644 --- a/drivers/kvm/mmu.c +++ b/drivers/kvm/mmu.c @@ -806,10 +806,12 @@ static void mmu_free_roots(struct kvm_vcpu *vcpu) for (i = 0; i < 4; ++i) { hpa_t root = vcpu->mmu.pae_root[i]; - ASSERT(VALID_PAGE(root)); - root &= PT64_BASE_ADDR_MASK; - page = page_header(root); - --page->root_count; + if (root) { + ASSERT(VALID_PAGE(root)); + root &= PT64_BASE_ADDR_MASK; + page = page_header(root); + --page->root_count; + } vcpu->mmu.pae_root[i] = INVALID_PAGE; } vcpu->mmu.root_hpa = INVALID_PAGE; @@ -840,9 +842,13 @@ static void mmu_alloc_roots(struct kvm_vcpu *vcpu) hpa_t root = vcpu->mmu.pae_root[i]; ASSERT(!VALID_PAGE(root)); - if (vcpu->mmu.root_level == PT32E_ROOT_LEVEL) + if (vcpu->mmu.root_level == PT32E_ROOT_LEVEL) { + if (!is_present_pte(vcpu->pdptrs[i])) { + vcpu->mmu.pae_root[i] = 0; + continue; + } root_gfn = vcpu->pdptrs[i] >> PAGE_SHIFT; - else if (vcpu->mmu.root_level == 0) + } else if (vcpu->mmu.root_level == 0) root_gfn = 0; page = kvm_mmu_get_page(vcpu, root_gfn, i << 30, PT32_ROOT_LEVEL, !is_paging(vcpu), -- cgit v1.2.3 From b5a33a75720c03d58d8281a72b45ffd214f00ed7 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Sun, 15 Apr 2007 16:31:09 +0300 Subject: KVM: Use slab caches to allocate mmu data structures Better leak detection, statistics, memory use, speed -- goodness all around. Signed-off-by: Avi Kivity --- drivers/kvm/kvm.h | 3 +++ drivers/kvm/kvm_main.c | 7 +++++++ drivers/kvm/mmu.c | 39 +++++++++++++++++++++++++++++++++++---- 3 files changed, 45 insertions(+), 4 deletions(-) (limited to 'drivers/kvm/mmu.c') diff --git a/drivers/kvm/kvm.h b/drivers/kvm/kvm.h index fceeb840a255..b9c318a9e334 100644 --- a/drivers/kvm/kvm.h +++ b/drivers/kvm/kvm.h @@ -433,6 +433,9 @@ extern struct kvm_arch_ops *kvm_arch_ops; int kvm_init_arch(struct kvm_arch_ops *ops, struct module *module); void kvm_exit_arch(void); +int kvm_mmu_module_init(void); +void kvm_mmu_module_exit(void); + void kvm_mmu_destroy(struct kvm_vcpu *vcpu); int kvm_mmu_create(struct kvm_vcpu *vcpu); int kvm_mmu_setup(struct kvm_vcpu *vcpu); diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c index 0b30631585bd..ab4dbd7fa5f8 100644 --- a/drivers/kvm/kvm_main.c +++ b/drivers/kvm/kvm_main.c @@ -3063,6 +3063,10 @@ static __init int kvm_init(void) static struct page *bad_page; int r; + r = kvm_mmu_module_init(); + if (r) + goto out4; + r = register_filesystem(&kvm_fs_type); if (r) goto out3; @@ -3091,6 +3095,8 @@ out: out2: unregister_filesystem(&kvm_fs_type); out3: + kvm_mmu_module_exit(); +out4: return r; } @@ -3100,6 +3106,7 @@ static __exit void kvm_exit(void) __free_page(pfn_to_page(bad_page_address >> PAGE_SHIFT)); mntput(kvmfs_mnt); unregister_filesystem(&kvm_fs_type); + kvm_mmu_module_exit(); } module_init(kvm_init) diff --git a/drivers/kvm/mmu.c b/drivers/kvm/mmu.c index 9ff74805c7d1..a368ea8297f3 100644 --- a/drivers/kvm/mmu.c +++ b/drivers/kvm/mmu.c @@ -159,6 +159,9 @@ struct kvm_rmap_desc { struct kvm_rmap_desc *more; }; +static struct kmem_cache *pte_chain_cache; +static struct kmem_cache *rmap_desc_cache; + static int is_write_protection(struct kvm_vcpu *vcpu) { return vcpu->cr0 & CR0_WP_MASK; @@ -196,14 +199,14 @@ static int is_rmap_pte(u64 pte) } static int mmu_topup_memory_cache(struct kvm_mmu_memory_cache *cache, - size_t objsize, int min) + struct kmem_cache *base_cache, int min) { void *obj; if (cache->nobjs >= min) return 0; while (cache->nobjs < ARRAY_SIZE(cache->objects)) { - obj = kzalloc(objsize, GFP_NOWAIT); + obj = kmem_cache_zalloc(base_cache, GFP_NOWAIT); if (!obj) return -ENOMEM; cache->objects[cache->nobjs++] = obj; @@ -222,11 +225,11 @@ static int mmu_topup_memory_caches(struct kvm_vcpu *vcpu) int r; r = mmu_topup_memory_cache(&vcpu->mmu_pte_chain_cache, - sizeof(struct kvm_pte_chain), 4); + pte_chain_cache, 4); if (r) goto out; r = mmu_topup_memory_cache(&vcpu->mmu_rmap_desc_cache, - sizeof(struct kvm_rmap_desc), 1); + rmap_desc_cache, 1); out: return r; } @@ -1333,6 +1336,34 @@ void kvm_mmu_zap_all(struct kvm_vcpu *vcpu) init_kvm_mmu(vcpu); } +void kvm_mmu_module_exit(void) +{ + if (pte_chain_cache) + kmem_cache_destroy(pte_chain_cache); + if (rmap_desc_cache) + kmem_cache_destroy(rmap_desc_cache); +} + +int kvm_mmu_module_init(void) +{ + pte_chain_cache = kmem_cache_create("kvm_pte_chain", + sizeof(struct kvm_pte_chain), + 0, 0, NULL, NULL); + if (!pte_chain_cache) + goto nomem; + rmap_desc_cache = kmem_cache_create("kvm_rmap_desc", + sizeof(struct kvm_rmap_desc), + 0, 0, NULL, NULL); + if (!rmap_desc_cache) + goto nomem; + + return 0; + +nomem: + kvm_mmu_module_exit(); + return -ENOMEM; +} + #ifdef AUDIT static const char *audit_msg; -- cgit v1.2.3 From 8c4385024d31cb909ad84a2cafa5c83a4c5fab61 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Mon, 16 Apr 2007 11:53:17 +0300 Subject: KVM: Retry sleeping allocation if atomic allocation fails This avoids -ENOMEM under memory pressure. Signed-off-by: Avi Kivity --- drivers/kvm/mmu.c | 26 +++++++++++++++++++++----- 1 file changed, 21 insertions(+), 5 deletions(-) (limited to 'drivers/kvm/mmu.c') diff --git a/drivers/kvm/mmu.c b/drivers/kvm/mmu.c index a368ea8297f3..c814394a966d 100644 --- a/drivers/kvm/mmu.c +++ b/drivers/kvm/mmu.c @@ -199,14 +199,15 @@ static int is_rmap_pte(u64 pte) } static int mmu_topup_memory_cache(struct kvm_mmu_memory_cache *cache, - struct kmem_cache *base_cache, int min) + struct kmem_cache *base_cache, int min, + gfp_t gfp_flags) { void *obj; if (cache->nobjs >= min) return 0; while (cache->nobjs < ARRAY_SIZE(cache->objects)) { - obj = kmem_cache_zalloc(base_cache, GFP_NOWAIT); + obj = kmem_cache_zalloc(base_cache, gfp_flags); if (!obj) return -ENOMEM; cache->objects[cache->nobjs++] = obj; @@ -220,20 +221,35 @@ static void mmu_free_memory_cache(struct kvm_mmu_memory_cache *mc) kfree(mc->objects[--mc->nobjs]); } -static int mmu_topup_memory_caches(struct kvm_vcpu *vcpu) +static int __mmu_topup_memory_caches(struct kvm_vcpu *vcpu, gfp_t gfp_flags) { int r; r = mmu_topup_memory_cache(&vcpu->mmu_pte_chain_cache, - pte_chain_cache, 4); + pte_chain_cache, 4, gfp_flags); if (r) goto out; r = mmu_topup_memory_cache(&vcpu->mmu_rmap_desc_cache, - rmap_desc_cache, 1); + rmap_desc_cache, 1, gfp_flags); out: return r; } +static int mmu_topup_memory_caches(struct kvm_vcpu *vcpu) +{ + int r; + + r = __mmu_topup_memory_caches(vcpu, GFP_NOWAIT); + if (r < 0) { + spin_unlock(&vcpu->kvm->lock); + kvm_arch_ops->vcpu_put(vcpu); + r = __mmu_topup_memory_caches(vcpu, GFP_KERNEL); + kvm_arch_ops->vcpu_load(vcpu); + spin_lock(&vcpu->kvm->lock); + } + return r; +} + static void mmu_free_memory_caches(struct kvm_vcpu *vcpu) { mmu_free_memory_cache(&vcpu->mmu_pte_chain_cache); -- cgit v1.2.3 From d6c69ee9a24b307ce94e55ebfba6208a830c9ecb Mon Sep 17 00:00:00 2001 From: Yaozu Dong Date: Wed, 25 Apr 2007 14:17:25 +0800 Subject: KVM: MMU: Avoid heavy ASSERT at non debug mode. Signed-off-by: Avi Kivity --- drivers/kvm/mmu.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'drivers/kvm/mmu.c') diff --git a/drivers/kvm/mmu.c b/drivers/kvm/mmu.c index c814394a966d..8ccf84e3fda8 100644 --- a/drivers/kvm/mmu.c +++ b/drivers/kvm/mmu.c @@ -52,11 +52,15 @@ static void kvm_mmu_audit(struct kvm_vcpu *vcpu, const char *msg) {} static int dbg = 1; #endif +#ifndef MMU_DEBUG +#define ASSERT(x) do { } while (0) +#else #define ASSERT(x) \ if (!(x)) { \ printk(KERN_WARNING "assertion failed %s:%d: %s\n", \ __FILE__, __LINE__, #x); \ } +#endif #define PT64_PT_BITS 9 #define PT64_ENT_PER_PAGE (1 << PT64_PT_BITS) @@ -434,6 +438,7 @@ static void rmap_write_protect(struct kvm_vcpu *vcpu, u64 gfn) } } +#ifdef MMU_DEBUG static int is_empty_shadow_page(hpa_t page_hpa) { u64 *pos; @@ -448,6 +453,7 @@ static int is_empty_shadow_page(hpa_t page_hpa) } return 1; } +#endif static void kvm_mmu_free_page(struct kvm_vcpu *vcpu, hpa_t page_hpa) { -- cgit v1.2.3 From 1165f5fec18c077bdba88e7125fd41f8e3617cb4 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Thu, 19 Apr 2007 17:27:43 +0300 Subject: KVM: Per-vcpu statistics Make the exit statistics per-vcpu instead of global. This gives a 3.5% boost when running one virtual machine per core on my two socket dual core (4 cores total) machine. Signed-off-by: Avi Kivity --- drivers/kvm/kvm.h | 35 ++++++++++++++------------- drivers/kvm/kvm_main.c | 61 +++++++++++++++++++++++++++++++++-------------- drivers/kvm/mmu.c | 2 +- drivers/kvm/paging_tmpl.h | 2 +- drivers/kvm/svm.c | 14 +++++------ drivers/kvm/vmx.c | 18 +++++++------- 6 files changed, 79 insertions(+), 53 deletions(-) (limited to 'drivers/kvm/mmu.c') diff --git a/drivers/kvm/kvm.h b/drivers/kvm/kvm.h index b9c318a9e334..d1a90c5d76ce 100644 --- a/drivers/kvm/kvm.h +++ b/drivers/kvm/kvm.h @@ -236,6 +236,22 @@ struct kvm_pio_request { int rep; }; +struct kvm_stat { + u32 pf_fixed; + u32 pf_guest; + u32 tlb_flush; + u32 invlpg; + + u32 exits; + u32 io_exits; + u32 mmio_exits; + u32 signal_exits; + u32 irq_window_exits; + u32 halt_exits; + u32 request_irq_exits; + u32 irq_exits; +}; + struct kvm_vcpu { struct kvm *kvm; union { @@ -298,6 +314,8 @@ struct kvm_vcpu { int sigset_active; sigset_t sigset; + struct kvm_stat stat; + struct { int active; u8 save_iopl; @@ -347,22 +365,6 @@ struct kvm { struct file *filp; }; -struct kvm_stat { - u32 pf_fixed; - u32 pf_guest; - u32 tlb_flush; - u32 invlpg; - - u32 exits; - u32 io_exits; - u32 mmio_exits; - u32 signal_exits; - u32 irq_window_exits; - u32 halt_exits; - u32 request_irq_exits; - u32 irq_exits; -}; - struct descriptor_table { u16 limit; unsigned long base; @@ -424,7 +426,6 @@ struct kvm_arch_ops { unsigned char *hypercall_addr); }; -extern struct kvm_stat kvm_stat; extern struct kvm_arch_ops *kvm_arch_ops; #define kvm_printf(kvm, fmt ...) printk(KERN_DEBUG fmt) diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c index f5356358acff..911c8175cc08 100644 --- a/drivers/kvm/kvm_main.c +++ b/drivers/kvm/kvm_main.c @@ -51,27 +51,27 @@ static DEFINE_SPINLOCK(kvm_lock); static LIST_HEAD(vm_list); struct kvm_arch_ops *kvm_arch_ops; -struct kvm_stat kvm_stat; -EXPORT_SYMBOL_GPL(kvm_stat); + +#define STAT_OFFSET(x) offsetof(struct kvm_vcpu, stat.x) static struct kvm_stats_debugfs_item { const char *name; - u32 *data; + int offset; struct dentry *dentry; } debugfs_entries[] = { - { "pf_fixed", &kvm_stat.pf_fixed }, - { "pf_guest", &kvm_stat.pf_guest }, - { "tlb_flush", &kvm_stat.tlb_flush }, - { "invlpg", &kvm_stat.invlpg }, - { "exits", &kvm_stat.exits }, - { "io_exits", &kvm_stat.io_exits }, - { "mmio_exits", &kvm_stat.mmio_exits }, - { "signal_exits", &kvm_stat.signal_exits }, - { "irq_window", &kvm_stat.irq_window_exits }, - { "halt_exits", &kvm_stat.halt_exits }, - { "request_irq", &kvm_stat.request_irq_exits }, - { "irq_exits", &kvm_stat.irq_exits }, - { NULL, NULL } + { "pf_fixed", STAT_OFFSET(pf_fixed) }, + { "pf_guest", STAT_OFFSET(pf_guest) }, + { "tlb_flush", STAT_OFFSET(tlb_flush) }, + { "invlpg", STAT_OFFSET(invlpg) }, + { "exits", STAT_OFFSET(exits) }, + { "io_exits", STAT_OFFSET(io_exits) }, + { "mmio_exits", STAT_OFFSET(mmio_exits) }, + { "signal_exits", STAT_OFFSET(signal_exits) }, + { "irq_window", STAT_OFFSET(irq_window_exits) }, + { "halt_exits", STAT_OFFSET(halt_exits) }, + { "request_irq", STAT_OFFSET(request_irq_exits) }, + { "irq_exits", STAT_OFFSET(irq_exits) }, + { NULL } }; static struct dentry *debugfs_dir; @@ -2930,14 +2930,39 @@ static struct notifier_block kvm_cpu_notifier = { .priority = 20, /* must be > scheduler priority */ }; +static u64 stat_get(void *_offset) +{ + unsigned offset = (long)_offset; + u64 total = 0; + struct kvm *kvm; + struct kvm_vcpu *vcpu; + int i; + + spin_lock(&kvm_lock); + list_for_each_entry(kvm, &vm_list, vm_list) + for (i = 0; i < KVM_MAX_VCPUS; ++i) { + vcpu = &kvm->vcpus[i]; + total += *(u32 *)((void *)vcpu + offset); + } + spin_unlock(&kvm_lock); + return total; +} + +static void stat_set(void *offset, u64 val) +{ +} + +DEFINE_SIMPLE_ATTRIBUTE(stat_fops, stat_get, stat_set, "%llu\n"); + static __init void kvm_init_debug(void) { struct kvm_stats_debugfs_item *p; debugfs_dir = debugfs_create_dir("kvm", NULL); for (p = debugfs_entries; p->name; ++p) - p->dentry = debugfs_create_u32(p->name, 0444, debugfs_dir, - p->data); + p->dentry = debugfs_create_file(p->name, 0444, debugfs_dir, + (void *)(long)p->offset, + &stat_fops); } static void kvm_exit_debug(void) diff --git a/drivers/kvm/mmu.c b/drivers/kvm/mmu.c index 8ccf84e3fda8..32c64f682081 100644 --- a/drivers/kvm/mmu.c +++ b/drivers/kvm/mmu.c @@ -936,7 +936,7 @@ static int nonpaging_init_context(struct kvm_vcpu *vcpu) static void kvm_mmu_flush_tlb(struct kvm_vcpu *vcpu) { - ++kvm_stat.tlb_flush; + ++vcpu->stat.tlb_flush; kvm_arch_ops->tlb_flush(vcpu); } diff --git a/drivers/kvm/paging_tmpl.h b/drivers/kvm/paging_tmpl.h index b94010dad809..73ffbffb1097 100644 --- a/drivers/kvm/paging_tmpl.h +++ b/drivers/kvm/paging_tmpl.h @@ -448,7 +448,7 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, if (is_io_pte(*shadow_pte)) return 1; - ++kvm_stat.pf_fixed; + ++vcpu->stat.pf_fixed; kvm_mmu_audit(vcpu, "post page fault (fixed)"); return write_pt; diff --git a/drivers/kvm/svm.c b/drivers/kvm/svm.c index 61ed7352e506..644efc5381ad 100644 --- a/drivers/kvm/svm.c +++ b/drivers/kvm/svm.c @@ -914,7 +914,7 @@ static int pf_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) case EMULATE_DONE: return 1; case EMULATE_DO_MMIO: - ++kvm_stat.mmio_exits; + ++vcpu->stat.mmio_exits; kvm_run->exit_reason = KVM_EXIT_MMIO; return 0; case EMULATE_FAIL: @@ -1054,7 +1054,7 @@ static int io_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) unsigned long count; gva_t address = 0; - ++kvm_stat.io_exits; + ++vcpu->stat.io_exits; vcpu->svm->next_rip = vcpu->svm->vmcb->control.exit_info_2; @@ -1096,7 +1096,7 @@ static int halt_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) return 1; kvm_run->exit_reason = KVM_EXIT_HLT; - ++kvm_stat.halt_exits; + ++vcpu->stat.halt_exits; return 0; } @@ -1264,7 +1264,7 @@ static int interrupt_window_interception(struct kvm_vcpu *vcpu, */ if (kvm_run->request_interrupt_window && !vcpu->irq_summary) { - ++kvm_stat.irq_window_exits; + ++vcpu->stat.irq_window_exits; kvm_run->exit_reason = KVM_EXIT_IRQ_WINDOW_OPEN; return 0; } @@ -1636,14 +1636,14 @@ again: r = handle_exit(vcpu, kvm_run); if (r > 0) { if (signal_pending(current)) { - ++kvm_stat.signal_exits; + ++vcpu->stat.signal_exits; post_kvm_run_save(vcpu, kvm_run); kvm_run->exit_reason = KVM_EXIT_INTR; return -EINTR; } if (dm_request_for_irq_injection(vcpu, kvm_run)) { - ++kvm_stat.request_irq_exits; + ++vcpu->stat.request_irq_exits; post_kvm_run_save(vcpu, kvm_run); kvm_run->exit_reason = KVM_EXIT_INTR; return -EINTR; @@ -1672,7 +1672,7 @@ static void svm_inject_page_fault(struct kvm_vcpu *vcpu, { uint32_t exit_int_info = vcpu->svm->vmcb->control.exit_int_info; - ++kvm_stat.pf_guest; + ++vcpu->stat.pf_guest; if (is_page_fault(exit_int_info)) { diff --git a/drivers/kvm/vmx.c b/drivers/kvm/vmx.c index 37537af126d1..10845b7ff297 100644 --- a/drivers/kvm/vmx.c +++ b/drivers/kvm/vmx.c @@ -1396,7 +1396,7 @@ static int handle_exception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) case EMULATE_DONE: return 1; case EMULATE_DO_MMIO: - ++kvm_stat.mmio_exits; + ++vcpu->stat.mmio_exits; kvm_run->exit_reason = KVM_EXIT_MMIO; return 0; case EMULATE_FAIL: @@ -1425,7 +1425,7 @@ static int handle_exception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) static int handle_external_interrupt(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) { - ++kvm_stat.irq_exits; + ++vcpu->stat.irq_exits; return 1; } @@ -1492,7 +1492,7 @@ static int handle_io(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) unsigned long count; gva_t address; - ++kvm_stat.io_exits; + ++vcpu->stat.io_exits; exit_qualification = vmcs_read64(EXIT_QUALIFICATION); in = (exit_qualification & 8) != 0; size = (exit_qualification & 7) + 1; @@ -1682,7 +1682,7 @@ static int handle_interrupt_window(struct kvm_vcpu *vcpu, if (kvm_run->request_interrupt_window && !vcpu->irq_summary) { kvm_run->exit_reason = KVM_EXIT_IRQ_WINDOW_OPEN; - ++kvm_stat.irq_window_exits; + ++vcpu->stat.irq_window_exits; return 0; } return 1; @@ -1695,7 +1695,7 @@ static int handle_halt(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) return 1; kvm_run->exit_reason = KVM_EXIT_HLT; - ++kvm_stat.halt_exits; + ++vcpu->stat.halt_exits; return 0; } @@ -1956,7 +1956,7 @@ again: reload_tss(); } - ++kvm_stat.exits; + ++vcpu->stat.exits; #ifdef CONFIG_X86_64 if (is_long_mode(vcpu)) { @@ -1988,14 +1988,14 @@ again: if (r > 0) { /* Give scheduler a change to reschedule. */ if (signal_pending(current)) { - ++kvm_stat.signal_exits; + ++vcpu->stat.signal_exits; post_kvm_run_save(vcpu, kvm_run); kvm_run->exit_reason = KVM_EXIT_INTR; return -EINTR; } if (dm_request_for_irq_injection(vcpu, kvm_run)) { - ++kvm_stat.request_irq_exits; + ++vcpu->stat.request_irq_exits; post_kvm_run_save(vcpu, kvm_run); kvm_run->exit_reason = KVM_EXIT_INTR; return -EINTR; @@ -2021,7 +2021,7 @@ static void vmx_inject_page_fault(struct kvm_vcpu *vcpu, { u32 vect_info = vmcs_read32(IDT_VECTORING_INFO_FIELD); - ++kvm_stat.pf_guest; + ++vcpu->stat.pf_guest; if (is_page_fault(vect_info)) { printk(KERN_DEBUG "inject_page_fault: " -- cgit v1.2.3 From 2807696c3791d6dd1dcf20f022eaa2dc7615bc5d Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Sat, 28 Apr 2007 21:20:48 +0200 Subject: KVM: fix an if() condition It might have worked in this case since PT_PRESENT_MASK is 1, but let's express this correctly. Signed-off-by: Adrian Bunk Signed-off-by: Avi Kivity --- drivers/kvm/mmu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/kvm/mmu.c') diff --git a/drivers/kvm/mmu.c b/drivers/kvm/mmu.c index 32c64f682081..e8e228118de9 100644 --- a/drivers/kvm/mmu.c +++ b/drivers/kvm/mmu.c @@ -1408,7 +1408,7 @@ static void audit_mappings_page(struct kvm_vcpu *vcpu, u64 page_pte, for (i = 0; i < PT64_ENT_PER_PAGE; ++i, va += va_delta) { u64 ent = pt[i]; - if (!ent & PT_PRESENT_MASK) + if (!(ent & PT_PRESENT_MASK)) continue; va = canonicalize(va); -- cgit v1.2.3