aboutsummaryrefslogtreecommitdiff
path: root/arch/x86/kvm/mmu.c
diff options
context:
space:
mode:
authorLinus Torvalds2016-03-22 16:28:22 -0700
committerLinus Torvalds2016-03-22 16:28:22 -0700
commitb91d9c6716319dcd9e6ffcfc9defaf79e705daab (patch)
tree97cec2bac36d3ff0581bbdc743a6152e733a672c /arch/x86/kvm/mmu.c
parentb8ba4526832fcccba7f46e55ce9a8b79902bdcec (diff)
parenta6adb106225f9e2f177d3d883596e011df321965 (diff)
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Pull more KVM updates from Paolo Bonzini: "Second round of KVM changes for 4.6: - build fixes for PPC KVM - miscellaneous bugfixes for ARM KVM - cleanup of memory barrier and removal of redundant barriers - x86 fixes: page tracking oops, support for old buggy KVM nested on 4.5 - support for protection keys in guests - lockdep fix - another conversion to simple wait queues and raw spinlocks, backported from PREEMPT_RT" * tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (27 commits) KVM: page_track: fix access to NULL slot KVM: PPC: do not compile in vfio.o unconditionally kvm, rt: change async pagefault code locking for PREEMPT_RT KVM/PPC: update the comment of memory barrier in the kvmppc_prepare_to_enter() KVM/x86: update the comment of memory barrier in the vcpu_enter_guest() KVM: Replace smp_mb() with smp_load_acquire() in the kvm_flush_remote_tlbs() KVM/x86: Call smp_wmb() before increasing tlbs_dirty KVM: Replace smp_mb() with smp_mb_after_atomic() in the kvm_make_all_cpus_request() KVM/x86: Replace smp_mb() with smp_store_mb/release() in the walk_shadow_page_lockless_begin/end() KVM: Remove redundant smp_mb() in the kvm_mmu_commit_zap_page() KVM, pkeys: expose CPUID/CR4 to guest KVM, pkeys: add pkeys support for permission_fault KVM, pkeys: introduce pkru_mask to cache conditions KVM, pkeys: save/restore PKRU when guest/host switches x86: pkey: introduce write_pkru() for KVM KVM, pkeys: add pkeys support for xsave state KVM, pkeys: disable pkeys for guests in non-paging mode KVM: x86: remove magic number with enum cpuid_leafs KVM: MMU: return page fault error code from permission_fault KVM: fix spin_lock_init order on x86 ...
Diffstat (limited to 'arch/x86/kvm/mmu.c')
-rw-r--r--arch/x86/kvm/mmu.c102
1 files changed, 90 insertions, 12 deletions
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index c512f095cdac..6bdfbc23ecaa 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -632,12 +632,12 @@ static void walk_shadow_page_lockless_begin(struct kvm_vcpu *vcpu)
* kvm_flush_remote_tlbs() IPI to all active vcpus.
*/
local_irq_disable();
- vcpu->mode = READING_SHADOW_PAGE_TABLES;
+
/*
* Make sure a following spte read is not reordered ahead of the write
* to vcpu->mode.
*/
- smp_mb();
+ smp_store_mb(vcpu->mode, READING_SHADOW_PAGE_TABLES);
}
static void walk_shadow_page_lockless_end(struct kvm_vcpu *vcpu)
@@ -647,8 +647,7 @@ static void walk_shadow_page_lockless_end(struct kvm_vcpu *vcpu)
* reads to sptes. If it does, kvm_commit_zap_page() can see us
* OUTSIDE_GUEST_MODE and proceed to free the shadow page table.
*/
- smp_mb();
- vcpu->mode = OUTSIDE_GUEST_MODE;
+ smp_store_release(&vcpu->mode, OUTSIDE_GUEST_MODE);
local_irq_enable();
}
@@ -2390,14 +2389,13 @@ static void kvm_mmu_commit_zap_page(struct kvm *kvm,
return;
/*
- * wmb: make sure everyone sees our modifications to the page tables
- * rmb: make sure we see changes to vcpu->mode
- */
- smp_mb();
-
- /*
- * Wait for all vcpus to exit guest mode and/or lockless shadow
- * page table walks.
+ * We need to make sure everyone sees our modifications to
+ * the page tables and see changes to vcpu->mode here. The barrier
+ * in the kvm_flush_remote_tlbs() achieves this. This pairs
+ * with vcpu_enter_guest and walk_shadow_page_lockless_begin/end.
+ *
+ * In addition, kvm_flush_remote_tlbs waits for all vcpus to exit
+ * guest mode and/or lockless shadow page table walks.
*/
kvm_flush_remote_tlbs(kvm);
@@ -3923,6 +3921,81 @@ static void update_permission_bitmask(struct kvm_vcpu *vcpu,
}
}
+/*
+* PKU is an additional mechanism by which the paging controls access to
+* user-mode addresses based on the value in the PKRU register. Protection
+* key violations are reported through a bit in the page fault error code.
+* Unlike other bits of the error code, the PK bit is not known at the
+* call site of e.g. gva_to_gpa; it must be computed directly in
+* permission_fault based on two bits of PKRU, on some machine state (CR4,
+* CR0, EFER, CPL), and on other bits of the error code and the page tables.
+*
+* In particular the following conditions come from the error code, the
+* page tables and the machine state:
+* - PK is always zero unless CR4.PKE=1 and EFER.LMA=1
+* - PK is always zero if RSVD=1 (reserved bit set) or F=1 (instruction fetch)
+* - PK is always zero if U=0 in the page tables
+* - PKRU.WD is ignored if CR0.WP=0 and the access is a supervisor access.
+*
+* The PKRU bitmask caches the result of these four conditions. The error
+* code (minus the P bit) and the page table's U bit form an index into the
+* PKRU bitmask. Two bits of the PKRU bitmask are then extracted and ANDed
+* with the two bits of the PKRU register corresponding to the protection key.
+* For the first three conditions above the bits will be 00, thus masking
+* away both AD and WD. For all reads or if the last condition holds, WD
+* only will be masked away.
+*/
+static void update_pkru_bitmask(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
+ bool ept)
+{
+ unsigned bit;
+ bool wp;
+
+ if (ept) {
+ mmu->pkru_mask = 0;
+ return;
+ }
+
+ /* PKEY is enabled only if CR4.PKE and EFER.LMA are both set. */
+ if (!kvm_read_cr4_bits(vcpu, X86_CR4_PKE) || !is_long_mode(vcpu)) {
+ mmu->pkru_mask = 0;
+ return;
+ }
+
+ wp = is_write_protection(vcpu);
+
+ for (bit = 0; bit < ARRAY_SIZE(mmu->permissions); ++bit) {
+ unsigned pfec, pkey_bits;
+ bool check_pkey, check_write, ff, uf, wf, pte_user;
+
+ pfec = bit << 1;
+ ff = pfec & PFERR_FETCH_MASK;
+ uf = pfec & PFERR_USER_MASK;
+ wf = pfec & PFERR_WRITE_MASK;
+
+ /* PFEC.RSVD is replaced by ACC_USER_MASK. */
+ pte_user = pfec & PFERR_RSVD_MASK;
+
+ /*
+ * Only need to check the access which is not an
+ * instruction fetch and is to a user page.
+ */
+ check_pkey = (!ff && pte_user);
+ /*
+ * write access is controlled by PKRU if it is a
+ * user access or CR0.WP = 1.
+ */
+ check_write = check_pkey && wf && (uf || wp);
+
+ /* PKRU.AD stops both read and write access. */
+ pkey_bits = !!check_pkey;
+ /* PKRU.WD stops write access. */
+ pkey_bits |= (!!check_write) << 1;
+
+ mmu->pkru_mask |= (pkey_bits & 3) << pfec;
+ }
+}
+
static void update_last_nonleaf_level(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu)
{
unsigned root_level = mmu->root_level;
@@ -3941,6 +4014,7 @@ static void paging64_init_context_common(struct kvm_vcpu *vcpu,
reset_rsvds_bits_mask(vcpu, context);
update_permission_bitmask(vcpu, context, false);
+ update_pkru_bitmask(vcpu, context, false);
update_last_nonleaf_level(vcpu, context);
MMU_WARN_ON(!is_pae(vcpu));
@@ -3968,6 +4042,7 @@ static void paging32_init_context(struct kvm_vcpu *vcpu,
reset_rsvds_bits_mask(vcpu, context);
update_permission_bitmask(vcpu, context, false);
+ update_pkru_bitmask(vcpu, context, false);
update_last_nonleaf_level(vcpu, context);
context->page_fault = paging32_page_fault;
@@ -4026,6 +4101,7 @@ static void init_kvm_tdp_mmu(struct kvm_vcpu *vcpu)
}
update_permission_bitmask(vcpu, context, false);
+ update_pkru_bitmask(vcpu, context, false);
update_last_nonleaf_level(vcpu, context);
reset_tdp_shadow_zero_bits_mask(vcpu, context);
}
@@ -4078,6 +4154,7 @@ void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, bool execonly)
context->direct_map = false;
update_permission_bitmask(vcpu, context, true);
+ update_pkru_bitmask(vcpu, context, true);
reset_rsvds_bits_mask_ept(vcpu, context, execonly);
reset_ept_shadow_zero_bits_mask(vcpu, context, execonly);
}
@@ -4132,6 +4209,7 @@ static void init_kvm_nested_mmu(struct kvm_vcpu *vcpu)
}
update_permission_bitmask(vcpu, g_context, false);
+ update_pkru_bitmask(vcpu, g_context, false);
update_last_nonleaf_level(vcpu, g_context);
}