aboutsummaryrefslogtreecommitdiff
path: root/virt/kvm
diff options
context:
space:
mode:
authorDavid Woodhouse2022-03-03 15:41:12 +0000
committerPaolo Bonzini2022-04-02 05:34:41 -0400
commitcf1d88b36ba7e83bdaa50bccc4c47864e8f08cbe (patch)
tree1e6264614c9f79e91364fa1dd988ee8a14d4584f /virt/kvm
parentd0d96121d03d6d9cf608d948247a9f24f5a02da9 (diff)
KVM: Remove dirty handling from gfn_to_pfn_cache completely
It isn't OK to cache the dirty status of a page in internal structures for an indefinite period of time. Any time a vCPU exits the run loop to userspace might be its last; the VMM might do its final check of the dirty log, flush the last remaining dirty pages to the destination and complete a live migration. If we have internal 'dirty' state which doesn't get flushed until the vCPU is finally destroyed on the source after migration is complete, then we have lost data because that will escape the final copy. This problem already exists with the use of kvm_vcpu_unmap() to mark pages dirty in e.g. VMX nesting. Note that the actual Linux MM already considers the page to be dirty since we have a writeable mapping of it. This is just about the KVM dirty logging. For the nesting-style use cases (KVM_GUEST_USES_PFN) we will need to track which gfn_to_pfn_caches have been used and explicitly mark the corresponding pages dirty before returning to userspace. But we would have needed external tracking of that anyway, rather than walking the full list of GPCs to find those belonging to this vCPU which are dirty. So let's rely *solely* on that external tracking, and keep it simple rather than laying a tempting trap for callers to fall into. Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> Message-Id: <20220303154127.202856-3-dwmw2@infradead.org> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Diffstat (limited to 'virt/kvm')
-rw-r--r--virt/kvm/pfncache.c41
1 files changed, 8 insertions, 33 deletions
diff --git a/virt/kvm/pfncache.c b/virt/kvm/pfncache.c
index efb69c923027..dd84676615f1 100644
--- a/virt/kvm/pfncache.c
+++ b/virt/kvm/pfncache.c
@@ -49,19 +49,6 @@ void gfn_to_pfn_cache_invalidate_start(struct kvm *kvm, unsigned long start,
}
__set_bit(gpc->vcpu->vcpu_idx, vcpu_bitmap);
}
-
- /*
- * We cannot call mark_page_dirty() from here because
- * this physical CPU might not have an active vCPU
- * with which to do the KVM dirty tracking.
- *
- * Neither is there any point in telling the kernel MM
- * that the underlying page is dirty. A vCPU in guest
- * mode might still be writing to it up to the point
- * where we wake them a few lines further down anyway.
- *
- * So all the dirty marking happens on the unmap.
- */
}
write_unlock_irq(&gpc->lock);
}
@@ -108,8 +95,7 @@ bool kvm_gfn_to_pfn_cache_check(struct kvm *kvm, struct gfn_to_pfn_cache *gpc,
}
EXPORT_SYMBOL_GPL(kvm_gfn_to_pfn_cache_check);
-static void __release_gpc(struct kvm *kvm, kvm_pfn_t pfn, void *khva,
- gpa_t gpa, bool dirty)
+static void __release_gpc(struct kvm *kvm, kvm_pfn_t pfn, void *khva, gpa_t gpa)
{
/* Unmap the old page if it was mapped before, and release it */
if (!is_error_noslot_pfn(pfn)) {
@@ -122,9 +108,7 @@ static void __release_gpc(struct kvm *kvm, kvm_pfn_t pfn, void *khva,
#endif
}
- kvm_release_pfn(pfn, dirty);
- if (dirty)
- mark_page_dirty(kvm, gpa);
+ kvm_release_pfn(pfn, false);
}
}
@@ -156,7 +140,7 @@ static kvm_pfn_t hva_to_pfn_retry(struct kvm *kvm, unsigned long uhva)
}
int kvm_gfn_to_pfn_cache_refresh(struct kvm *kvm, struct gfn_to_pfn_cache *gpc,
- gpa_t gpa, unsigned long len, bool dirty)
+ gpa_t gpa, unsigned long len)
{
struct kvm_memslots *slots = kvm_memslots(kvm);
unsigned long page_offset = gpa & ~PAGE_MASK;
@@ -164,7 +148,7 @@ int kvm_gfn_to_pfn_cache_refresh(struct kvm *kvm, struct gfn_to_pfn_cache *gpc,
unsigned long old_uhva;
gpa_t old_gpa;
void *old_khva;
- bool old_valid, old_dirty;
+ bool old_valid;
int ret = 0;
/*
@@ -181,14 +165,12 @@ int kvm_gfn_to_pfn_cache_refresh(struct kvm *kvm, struct gfn_to_pfn_cache *gpc,
old_khva = gpc->khva - offset_in_page(gpc->khva);
old_uhva = gpc->uhva;
old_valid = gpc->valid;
- old_dirty = gpc->dirty;
/* If the userspace HVA is invalid, refresh that first */
if (gpc->gpa != gpa || gpc->generation != slots->generation ||
kvm_is_error_hva(gpc->uhva)) {
gfn_t gfn = gpa_to_gfn(gpa);
- gpc->dirty = false;
gpc->gpa = gpa;
gpc->generation = slots->generation;
gpc->memslot = __gfn_to_memslot(slots, gfn);
@@ -260,14 +242,9 @@ int kvm_gfn_to_pfn_cache_refresh(struct kvm *kvm, struct gfn_to_pfn_cache *gpc,
}
out:
- if (ret)
- gpc->dirty = false;
- else
- gpc->dirty = dirty;
-
write_unlock_irq(&gpc->lock);
- __release_gpc(kvm, old_pfn, old_khva, old_gpa, old_dirty);
+ __release_gpc(kvm, old_pfn, old_khva, old_gpa);
return ret;
}
@@ -277,7 +254,6 @@ void kvm_gfn_to_pfn_cache_unmap(struct kvm *kvm, struct gfn_to_pfn_cache *gpc)
{
void *old_khva;
kvm_pfn_t old_pfn;
- bool old_dirty;
gpa_t old_gpa;
write_lock_irq(&gpc->lock);
@@ -285,7 +261,6 @@ void kvm_gfn_to_pfn_cache_unmap(struct kvm *kvm, struct gfn_to_pfn_cache *gpc)
gpc->valid = false;
old_khva = gpc->khva - offset_in_page(gpc->khva);
- old_dirty = gpc->dirty;
old_gpa = gpc->gpa;
old_pfn = gpc->pfn;
@@ -298,14 +273,14 @@ void kvm_gfn_to_pfn_cache_unmap(struct kvm *kvm, struct gfn_to_pfn_cache *gpc)
write_unlock_irq(&gpc->lock);
- __release_gpc(kvm, old_pfn, old_khva, old_gpa, old_dirty);
+ __release_gpc(kvm, old_pfn, old_khva, old_gpa);
}
EXPORT_SYMBOL_GPL(kvm_gfn_to_pfn_cache_unmap);
int kvm_gfn_to_pfn_cache_init(struct kvm *kvm, struct gfn_to_pfn_cache *gpc,
struct kvm_vcpu *vcpu, enum pfn_cache_usage usage,
- gpa_t gpa, unsigned long len, bool dirty)
+ gpa_t gpa, unsigned long len)
{
WARN_ON_ONCE(!usage || (usage & KVM_GUEST_AND_HOST_USE_PFN) != usage);
@@ -324,7 +299,7 @@ int kvm_gfn_to_pfn_cache_init(struct kvm *kvm, struct gfn_to_pfn_cache *gpc,
list_add(&gpc->list, &kvm->gpc_list);
spin_unlock(&kvm->gpc_lock);
}
- return kvm_gfn_to_pfn_cache_refresh(kvm, gpc, gpa, len, dirty);
+ return kvm_gfn_to_pfn_cache_refresh(kvm, gpc, gpa, len);
}
EXPORT_SYMBOL_GPL(kvm_gfn_to_pfn_cache_init);