11 files changed, 579 insertions, 67 deletions
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c
index 8653855d808b..1de306c03aaf 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c
@@ -142,7 +142,16 @@ int i915_ttm_move_notify(struct ttm_buffer_object *bo)
 	struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo);
 	int ret;
 
-	ret = i915_gem_object_unbind(obj, I915_GEM_OBJECT_UNBIND_ACTIVE);
+	/*
+	 * Note: The async unbinding here will actually transform the
+	 * blocking wait for unbind into a wait before finally submitting
+	 * evict / migration blit and thus stall the migration timeline
+	 * which may not be good for overall throughput. We should make
+	 * sure we await the unbind fences *after* the migration blit
+	 * instead of *before* as we currently do.
+	 */
+	ret = i915_gem_object_unbind(obj, I915_GEM_OBJECT_UNBIND_ACTIVE |
+				     I915_GEM_OBJECT_UNBIND_ASYNC);
 	if (ret)
 		return ret;
 
diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c b/drivers/gpu/drm/i915/gt/intel_ggtt.c
index bb885d862ec4..91ea2882efda 100644
--- a/drivers/gpu/drm/i915/gt/intel_ggtt.c
+++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c
@@ -131,7 +131,7 @@ void i915_ggtt_suspend(struct i915_ggtt *ggtt)
 			continue;
 
 		if (!i915_vma_is_bound(vma, I915_VMA_GLOBAL_BIND)) {
-			__i915_vma_evict(vma);
+			__i915_vma_evict(vma, false);
 			drm_mm_remove_node(&vma->node);
 		}
 	}
diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.c b/drivers/gpu/drm/i915/gt/intel_gtt.c
index c3846ff33118..058c88682970 100644
--- a/drivers/gpu/drm/i915/gt/intel_gtt.c
+++ b/drivers/gpu/drm/i915/gt/intel_gtt.c
@@ -160,6 +160,9 @@ static void __i915_vm_release(struct work_struct *work)
 	struct i915_address_space *vm =
 		container_of(work, struct i915_address_space, release_work);
 
+	/* Synchronize async unbinds. */
+	i915_vma_resource_bind_dep_sync_all(vm);
+
 	vm->cleanup(vm);
 	i915_address_space_fini(vm);
 
@@ -188,6 +191,7 @@ void i915_address_space_init(struct i915_address_space *vm, int subclass)
 	if (!kref_read(&vm->resv_ref))
 		kref_init(&vm->resv_ref);
 
+	vm->pending_unbind = RB_ROOT_CACHED;
 	INIT_WORK(&vm->release_work, __i915_vm_release);
 	atomic_set(&vm->open, 1);
 
diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.h b/drivers/gpu/drm/i915/gt/intel_gtt.h
index 01519e1d1651..ac1c4479e697 100644
--- a/drivers/gpu/drm/i915/gt/intel_gtt.h
+++ b/drivers/gpu/drm/i915/gt/intel_gtt.h
@@ -265,6 +265,9 @@ struct i915_address_space {
 	/* Flags used when creating page-table objects for this vm */
 	unsigned long lmem_pt_obj_flags;
 
+	/* Interval tree for pending unbind vma resources */
+	struct rb_root_cached pending_unbind;
+
 	struct drm_i915_gem_object *
 		(*alloc_pt_dma)(struct i915_address_space *vm, int sz);
 	struct drm_i915_gem_object *
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 63a579e952b9..b38810f84181 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1878,6 +1878,7 @@ int i915_gem_object_unbind(struct drm_i915_gem_object *obj,
 #define I915_GEM_OBJECT_UNBIND_BARRIER BIT(1)
 #define I915_GEM_OBJECT_UNBIND_TEST BIT(2)
 #define I915_GEM_OBJECT_UNBIND_VM_TRYLOCK BIT(3)
+#define I915_GEM_OBJECT_UNBIND_ASYNC BIT(4)
 
 void i915_gem_runtime_suspend(struct drm_i915_private *dev_priv);
 
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index e3730096abd9..3d6c00f845a3 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -156,10 +156,16 @@ try_again:
 		spin_unlock(&obj->vma.lock);
 
 		if (vma) {
+			bool vm_trylock = !!(flags & I915_GEM_OBJECT_UNBIND_VM_TRYLOCK);
 			ret = -EBUSY;
-			if (flags & I915_GEM_OBJECT_UNBIND_ACTIVE ||
-			    !i915_vma_is_active(vma)) {
-				if (flags & I915_GEM_OBJECT_UNBIND_VM_TRYLOCK) {
+			if (flags & I915_GEM_OBJECT_UNBIND_ASYNC) {
+				assert_object_held(vma->obj);
+				ret = i915_vma_unbind_async(vma, vm_trylock);
+			}
+
+			if (ret == -EBUSY && (flags & I915_GEM_OBJECT_UNBIND_ACTIVE ||
+					      !i915_vma_is_active(vma))) {
+				if (vm_trylock) {
 					if (mutex_trylock(&vma->vm->mutex)) {
 						ret = __i915_vma_unbind(vma);
 						mutex_unlock(&vma->vm->mutex);
diff --git a/drivers/gpu/drm/i915/i915_module.c b/drivers/gpu/drm/i915/i915_module.c
index ab2295dd4500..8d18714cf75f 100644
--- a/drivers/gpu/drm/i915/i915_module.c
+++ b/drivers/gpu/drm/i915/i915_module.c
@@ -17,6 +17,7 @@
 #include "i915_scheduler.h"
 #include "i915_selftest.h"
 #include "i915_vma.h"
+#include "i915_vma_resource.h"
 
 static int i915_check_nomodeset(void)
 {
@@ -64,6 +65,8 @@ static const struct {
 	  .exit = i915_scheduler_module_exit },
 	{ .init = i915_vma_module_init,
 	  .exit = i915_vma_module_exit },
+	{ .init = i915_vma_resource_module_init,
+	  .exit = i915_vma_resource_module_exit },
 	{ .init = i915_mock_selftests },
 	{ .init = i915_pmu_init,
 	  .exit = i915_pmu_exit },
diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
index 9f00dfa5e022..b86666f653ca 100644
--- a/drivers/gpu/drm/i915/i915_vma.c
+++ b/drivers/gpu/drm/i915/i915_vma.c
@@ -285,9 +285,10 @@ struct i915_vma_work {
 	struct dma_fence_work base;
 	struct i915_address_space *vm;
 	struct i915_vm_pt_stash stash;
-	struct i915_vma *vma;
+	struct i915_vma_resource *vma_res;
 	struct drm_i915_gem_object *pinned;
 	struct i915_sw_dma_fence_cb cb;
+	struct i915_refct_sgt *rsgt;
 	enum i915_cache_level cache_level;
 	unsigned int flags;
 };
@@ -295,10 +296,11 @@ struct i915_vma_work {
 static void __vma_bind(struct dma_fence_work *work)
 {
 	struct i915_vma_work *vw = container_of(work, typeof(*vw), base);
-	struct i915_vma *vma = vw->vma;
+	struct i915_vma_resource *vma_res = vw->vma_res;
+
+	vma_res->ops->bind_vma(vma_res->vm, &vw->stash,
+			       vma_res, vw->cache_level, vw->flags);
 
-	vma->ops->bind_vma(vw->vm, &vw->stash,
-			   vma->resource, vw->cache_level, vw->flags);
 }
 
 static void __vma_release(struct dma_fence_work *work)
@@ -310,6 +312,10 @@ static void __vma_release(struct dma_fence_work *work)
 
 	i915_vm_free_pt_stash(vw->vm, &vw->stash);
 	i915_vm_put(vw->vm);
+	if (vw->vma_res)
+		i915_vma_resource_put(vw->vma_res);
+	if (vw->rsgt)
+		i915_refct_sgt_put(vw->rsgt);
 }
 
 static const struct dma_fence_work_ops bind_ops = {
@@ -379,13 +385,11 @@ i915_vma_resource_init_from_vma(struct i915_vma_resource *vma_res,
 {
 	struct drm_i915_gem_object *obj = vma->obj;
 
-	i915_vma_resource_init(vma_res, vma->pages, &vma->page_sizes,
+	i915_vma_resource_init(vma_res, vma->vm, vma->pages, &vma->page_sizes,
 			       i915_gem_object_is_readonly(obj),
 			       i915_gem_object_is_lmem(obj),
-			       vma->private,
-			       vma->node.start,
-			       vma->node.size,
-			       vma->size);
+			       vma->ops, vma->private, vma->node.start,
+			       vma->node.size, vma->size);
 }
 
 /**
@@ -409,6 +413,7 @@ int i915_vma_bind(struct i915_vma *vma,
 {
 	u32 bind_flags;
 	u32 vma_flags;
+	int ret;
 
 	lockdep_assert_held(&vma->vm->mutex);
 	GEM_BUG_ON(!drm_mm_node_allocated(&vma->node));
@@ -417,12 +422,12 @@ int i915_vma_bind(struct i915_vma *vma,
 	if (GEM_DEBUG_WARN_ON(range_overflows(vma->node.start,
 					      vma->node.size,
 					      vma->vm->total))) {
-		kfree(vma_res);
+		i915_vma_resource_free(vma_res);
 		return -ENODEV;
 	}
 
 	if (GEM_DEBUG_WARN_ON(!flags)) {
-		kfree(vma_res);
+		i915_vma_resource_free(vma_res);
 		return -EINVAL;
 	}
 
@@ -434,12 +439,30 @@ int i915_vma_bind(struct i915_vma *vma,
 
 	bind_flags &= ~vma_flags;
 	if (bind_flags == 0) {
-		kfree(vma_res);
+		i915_vma_resource_free(vma_res);
 		return 0;
 	}
 
 	GEM_BUG_ON(!atomic_read(&vma->pages_count));
 
+	/* Wait for or await async unbinds touching our range */
+	if (work && bind_flags & vma->vm->bind_async_flags)
+		ret = i915_vma_resource_bind_dep_await(vma->vm,
+						       &work->base.chain,
+						       vma->node.start,
+						       vma->node.size,
+						       true,
+						       GFP_NOWAIT |
+						       __GFP_RETRY_MAYFAIL |
+						       __GFP_NOWARN);
+	else
+		ret = i915_vma_resource_bind_dep_sync(vma->vm, vma->node.start,
+						      vma->node.size, true);
+	if (ret) {
+		i915_vma_resource_free(vma_res);
+		return ret;
+	}
+
 	if (vma->resource || !vma_res) {
 		/* Rebinding with an additional I915_VMA_*_BIND */
 		GEM_WARN_ON(!vma_flags);
@@ -452,9 +475,11 @@ int i915_vma_bind(struct i915_vma *vma,
 	if (work && bind_flags & vma->vm->bind_async_flags) {
 		struct dma_fence *prev;
 
-		work->vma = vma;
+		work->vma_res = i915_vma_resource_get(vma->resource);
 		work->cache_level = cache_level;
 		work->flags = bind_flags;
+		if (vma->obj->mm.rsgt)
+			work->rsgt = i915_refct_sgt_get(vma->obj->mm.rsgt);
 
 		/*
 		 * Note we only want to chain up to the migration fence on
@@ -475,14 +500,24 @@ int i915_vma_bind(struct i915_vma *vma,
 
 		work->base.dma.error = 0; /* enable the queue_work() */
 
-		work->pinned = i915_gem_object_get(vma->obj);
+		/*
+		 * If we don't have the refcounted pages list, keep a reference
+		 * on the object to avoid waiting for the async bind to
+		 * complete in the object destruction path.
+		 */
+		if (!work->rsgt)
+			work->pinned = i915_gem_object_get(vma->obj);
 	} else {
 		if (vma->obj) {
 			int ret;
 
 			ret = i915_gem_object_wait_moving_fence(vma->obj, true);
-			if (ret)
+			if (ret) {
+				i915_vma_resource_free(vma->resource);
+				vma->resource = NULL;
+
 				return ret;
+			}
 		}
 		vma->ops->bind_vma(vma->vm, NULL, vma->resource, cache_level,
 				   bind_flags);
@@ -1700,8 +1735,9 @@ int _i915_vma_move_to_active(struct i915_vma *vma,
 	return 0;
 }
 
-void __i915_vma_evict(struct i915_vma *vma)
+struct dma_fence *__i915_vma_evict(struct i915_vma *vma, bool async)
 {
+	struct i915_vma_resource *vma_res = vma->resource;
 	struct dma_fence *unbind_fence;
 
 	GEM_BUG_ON(i915_vma_is_pinned(vma));
@@ -1734,27 +1770,39 @@ void __i915_vma_evict(struct i915_vma *vma)
 	GEM_BUG_ON(vma->fence);
 	GEM_BUG_ON(i915_vma_has_userfault(vma));
 
-	if (likely(atomic_read(&vma->vm->open))) {
-		trace_i915_vma_unbind(vma);
-		vma->ops->unbind_vma(vma->vm, vma->resource);
-	}
+	/* Object backend must be async capable. */
+	GEM_WARN_ON(async && !vma->obj->mm.rsgt);
+
+	/* If vm is not open, unbind is a nop. */
+	vma_res->needs_wakeref = i915_vma_is_bound(vma, I915_VMA_GLOBAL_BIND) &&
+		atomic_read(&vma->vm->open);
+	trace_i915_vma_unbind(vma);
+
+	unbind_fence = i915_vma_resource_unbind(vma_res);
+	vma->resource = NULL;
+
 	atomic_and(~(I915_VMA_BIND_MASK | I915_VMA_ERROR | I915_VMA_GGTT_WRITE),
 		   &vma->flags);
 
-	unbind_fence = i915_vma_resource_unbind(vma->resource);
-	i915_vma_resource_put(vma->resource);
-	vma->resource = NULL;
+	/* Object backend must be async capable. */
+	GEM_WARN_ON(async && !vma->obj->mm.rsgt);
 
 	i915_vma_detach(vma);
-	vma_unbind_pages(vma);
+
+	if (!async && unbind_fence) {
+		dma_fence_wait(unbind_fence, false);
+		dma_fence_put(unbind_fence);
+		unbind_fence = NULL;
+	}
 
 	/*
-	 * This uninterruptible wait under the vm mutex is currently
-	 * only ever blocking while the vma is being captured from.
-	 * With async unbinding, this wait here will be removed.
+	 * Binding itself may not have completed until the unbind fence signals,
+	 * so don't drop the pages until that happens, unless the resource is
+	 * async_capable.
 	 */
-	dma_fence_wait(unbind_fence, false);
-	dma_fence_put(unbind_fence);
+
+	vma_unbind_pages(vma);
+	return unbind_fence;
 }
 
 int __i915_vma_unbind(struct i915_vma *vma)
@@ -1781,12 +1829,47 @@ int __i915_vma_unbind(struct i915_vma *vma)
 		return ret;
 
 	GEM_BUG_ON(i915_vma_is_active(vma));
-	__i915_vma_evict(vma);
+	__i915_vma_evict(vma, false);
 
 	drm_mm_remove_node(&vma->node); /* pairs with i915_vma_release() */
 	return 0;
 }
 
+static struct dma_fence *__i915_vma_unbind_async(struct i915_vma *vma)
+{
+	struct dma_fence *fence;
+
+	lockdep_assert_held(&vma->vm->mutex);
+
+	if (!drm_mm_node_allocated(&vma->node))
+		return NULL;
+
+	if (i915_vma_is_pinned(vma) ||
+	    &vma->obj->mm.rsgt->table != vma->resource->bi.pages)
+		return ERR_PTR(-EAGAIN);
+
+	/*
+	 * We probably need to replace this with awaiting the fences of the
+	 * object's dma_resv when the vma active goes away. When doing that
+	 * we need to be careful to not add the vma_resource unbind fence
+	 * immediately to the object's dma_resv, because then unbinding
+	 * the next vma from the object, in case there are many, will
+	 * actually await the unbinding of the previous vmas, which is
+	 * undesirable.
+	 */
+	if (i915_sw_fence_await_active(&vma->resource->chain, &vma->active,
+				       I915_ACTIVE_AWAIT_EXCL |
+				       I915_ACTIVE_AWAIT_ACTIVE) < 0) {
+		return ERR_PTR(-EBUSY);
+	}
+
+	fence = __i915_vma_evict(vma, true);
+
+	drm_mm_remove_node(&vma->node); /* pairs with i915_vma_release() */
+
+	return fence;
+}
+
 int i915_vma_unbind(struct i915_vma *vma)
 {
 	struct i915_address_space *vm = vma->vm;
@@ -1823,6 +1906,68 @@ out_rpm:
 	return err;
 }
 
+int i915_vma_unbind_async(struct i915_vma *vma, bool trylock_vm)
+{
+	struct drm_i915_gem_object *obj = vma->obj;
+	struct i915_address_space *vm = vma->vm;
+	intel_wakeref_t wakeref = 0;
+	struct dma_fence *fence;
+	int err;
+
+	/*
+	 * We need the dma-resv lock since we add the
+	 * unbind fence to the dma-resv object.
+	 */
+	assert_object_held(obj);
+
+	if (!drm_mm_node_allocated(&vma->node))
+		return 0;
+
+	if (i915_vma_is_pinned(vma)) {
+		vma_print_allocator(vma, "is pinned");
+		return -EAGAIN;
+	}
+
+	if (!obj->mm.rsgt)
+		return -EBUSY;
+
+	err = dma_resv_reserve_shared(obj->base.resv, 1);
+	if (err)
+		return -EBUSY;
+
+	/*
+	 * It would be great if we could grab this wakeref from the
+	 * async unbind work if needed, but we can't because it uses
+	 * kmalloc and it's in the dma-fence signalling critical path.
+	 */
+	if (i915_vma_is_bound(vma, I915_VMA_GLOBAL_BIND))
+		wakeref = intel_runtime_pm_get(&vm->i915->runtime_pm);
+
+	if (trylock_vm && !mutex_trylock(&vm->mutex)) {
+		err = -EBUSY;
+		goto out_rpm;
+	} else if (!trylock_vm) {
+		err = mutex_lock_interruptible_nested(&vm->mutex, !wakeref);
+		if (err)
+			goto out_rpm;
+	}
+
+	fence = __i915_vma_unbind_async(vma);
+	mutex_unlock(&vm->mutex);
+	if (IS_ERR_OR_NULL(fence)) {
+		err = PTR_ERR_OR_ZERO(fence);
+		goto out_rpm;
+	}
+
+	dma_resv_add_shared_fence(obj->base.resv, fence);
+	dma_fence_put(fence);
+
+out_rpm:
+	if (wakeref)
+		intel_runtime_pm_put(&vm->i915->runtime_pm, wakeref);
+	return err;
+}
+
 struct i915_vma *i915_vma_make_unshrinkable(struct i915_vma *vma)
 {
 	i915_gem_object_make_unshrinkable(vma->obj);
diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h
index 1df57ec832bd..a560bae04e7e 100644
--- a/drivers/gpu/drm/i915/i915_vma.h
+++ b/drivers/gpu/drm/i915/i915_vma.h
@@ -213,9 +213,10 @@ bool i915_vma_misplaced(const struct i915_vma *vma,
 			u64 size, u64 alignment, u64 flags);
 void __i915_vma_set_map_and_fenceable(struct i915_vma *vma);
 void i915_vma_revoke_mmap(struct i915_vma *vma);
-void __i915_vma_evict(struct i915_vma *vma);
+struct dma_fence *__i915_vma_evict(struct i915_vma *vma, bool async);
 int __i915_vma_unbind(struct i915_vma *vma);
 int __must_check i915_vma_unbind(struct i915_vma *vma);
+int __must_check i915_vma_unbind_async(struct i915_vma *vma, bool trylock_vm);
 void i915_vma_unlink_ctx(struct i915_vma *vma);
 void i915_vma_close(struct i915_vma *vma);
 void i915_vma_reopen(struct i915_vma *vma);
diff --git a/drivers/gpu/drm/i915/i915_vma_resource.c b/drivers/gpu/drm/i915/i915_vma_resource.c
index b50e67035d15..3e55a30b2da7 100644
--- a/drivers/gpu/drm/i915/i915_vma_resource.c
+++ b/drivers/gpu/drm/i915/i915_vma_resource.c
@@ -2,39 +2,44 @@
 /*
  * Copyright © 2021 Intel Corporation
  */
+
+#include <linux/interval_tree_generic.h>
 #include <linux/slab.h>
 
+#include "i915_sw_fence.h"
 #include "i915_vma_resource.h"
+#include "i915_drv.h"
 
-/* Callbacks for the unbind dma-fence. */
-static const char *get_driver_name(struct dma_fence *fence)
-{
-	return "vma unbind fence";
-}
+#include "gt/intel_gtt.h"
 
-static const char *get_timeline_name(struct dma_fence *fence)
-{
-	return "unbound";
-}
-
-static struct dma_fence_ops unbind_fence_ops = {
-	.get_driver_name = get_driver_name,
-	.get_timeline_name = get_timeline_name,
-};
+static struct kmem_cache *slab_vma_resources;
 
 /**
- * __i915_vma_resource_init - Initialize a vma resource.
- * @vma_res: The vma resource to initialize
+ * DOC:
+ * We use a per-vm interval tree to keep track of vma_resources
+ * scheduled for unbind but not yet unbound. The tree is protected by
+ * the vm mutex, and nodes are removed just after the unbind fence signals.
+ * The removal takes the vm mutex from a kernel thread which we need to
+ * keep in mind so that we don't grab the mutex and try to wait for all
+ * pending unbinds to complete, because that will temporaryily block many
+ * of the workqueue threads, and people will get angry.
  *
- * Initializes the private members of a vma resource.
+ * We should consider using a single ordered fence per VM instead but that
+ * requires ordering the unbinds and might introduce unnecessary waiting
+ * for unrelated unbinds. Amount of code will probably be roughly the same
+ * due to the simplicity of using the interval tree interface.
+ *
+ * Another drawback of this interval tree is that the complexity of insertion
+ * and removal of fences increases as O(ln(pending_unbinds)) instead of
+ * O(1) for a single fence without interval tree.
  */
-void __i915_vma_resource_init(struct i915_vma_resource *vma_res)
-{
-	spin_lock_init(&vma_res->lock);
-	dma_fence_init(&vma_res->unbind_fence, &unbind_fence_ops,
-		       &vma_res->lock, 0, 0);
-	refcount_set(&vma_res->hold_count, 1);
-}
+#define VMA_RES_START(_node) ((_node)->start)
+#define VMA_RES_LAST(_node) ((_node)->start + (_node)->node_size - 1)
+INTERVAL_TREE_DEFINE(struct i915_vma_resource, rb,
+		     u64, __subtree_last,
+		     VMA_RES_START, VMA_RES_LAST, static, vma_res_itree);
+
+/* Callbacks for the unbind dma-fence. */
 
 /**
  * i915_vma_resource_alloc - Allocate a vma resource
@@ -45,15 +50,73 @@ void __i915_vma_resource_init(struct i915_vma_resource *vma_res)
 struct i915_vma_resource *i915_vma_resource_alloc(void)
 {
 	struct i915_vma_resource *vma_res =
-		kzalloc(sizeof(*vma_res), GFP_KERNEL);
+		kmem_cache_zalloc(slab_vma_resources, GFP_KERNEL);
 
 	return vma_res ? vma_res : ERR_PTR(-ENOMEM);
 }
 
+/**
+ * i915_vma_resource_free - Free a vma resource
+ * @vma_res: The vma resource to free.
+ */
+void i915_vma_resource_free(struct i915_vma_resource *vma_res)
+{
+	kmem_cache_free(slab_vma_resources, vma_res);
+}
+
+static const char *get_driver_name(struct dma_fence *fence)
+{
+	return "vma unbind fence";
+}
+
+static const char *get_timeline_name(struct dma_fence *fence)
+{
+	return "unbound";
+}
+
+static void unbind_fence_free_rcu(struct rcu_head *head)
+{
+	struct i915_vma_resource *vma_res =
+		container_of(head, typeof(*vma_res), unbind_fence.rcu);
+
+	i915_vma_resource_free(vma_res);
+}
+
+static void unbind_fence_release(struct dma_fence *fence)
+{
+	struct i915_vma_resource *vma_res =
+		container_of(fence, typeof(*vma_res), unbind_fence);
+
+	i915_sw_fence_fini(&vma_res->chain);
+
+	call_rcu(&fence->rcu, unbind_fence_free_rcu);
+}
+
+static struct dma_fence_ops unbind_fence_ops = {
+	.get_driver_name = get_driver_name,
+	.get_timeline_name = get_timeline_name,
+	.release = unbind_fence_release,
+};
+
 static void __i915_vma_resource_unhold(struct i915_vma_resource *vma_res)
 {
-	if (refcount_dec_and_test(&vma_res->hold_count))
-		dma_fence_signal(&vma_res->unbind_fence);
+	struct i915_address_space *vm;
+
+	if (!refcount_dec_and_test(&vma_res->hold_count))
+		return;
+
+	dma_fence_signal(&vma_res->unbind_fence);
+
+	vm = vma_res->vm;
+	if (vma_res->wakeref)
+		intel_runtime_pm_put(&vm->i915->runtime_pm, vma_res->wakeref);
+
+	vma_res->vm = NULL;
+	if (!RB_EMPTY_NODE(&vma_res->rb)) {
+		mutex_lock(&vm->mutex);
+		vma_res_itree_remove(vma_res, &vm->pending_unbind);
+		mutex_unlock(&vm->mutex);
+	}
 }
 
 /**
@@ -102,6 +165,49 @@ bool i915_vma_resource_hold(struct i915_vma_resource *vma_res,
 	return held;
 }
 
+static void i915_vma_resource_unbind_work(struct work_struct *work)
+{
+	struct i915_vma_resource *vma_res =
+		container_of(work, typeof(*vma_res), work);
+	struct i915_address_space *vm = vma_res->vm;
+	bool lockdep_cookie;
+
+	lockdep_cookie = dma_fence_begin_signalling();
+	if (likely(atomic_read(&vm->open)))
+		vma_res->ops->unbind_vma(vm, vma_res);
+
+	dma_fence_end_signalling(lockdep_cookie);
+	__i915_vma_resource_unhold(vma_res);
+	i915_vma_resource_put(vma_res);
+}
+
+static int
+i915_vma_resource_fence_notify(struct i915_sw_fence *fence,
+			       enum i915_sw_fence_notify state)
+{
+	struct i915_vma_resource *vma_res =
+		container_of(fence, typeof(*vma_res), chain);
+	struct dma_fence *unbind_fence =
+		&vma_res->unbind_fence;
+
+	switch (state) {
+	case FENCE_COMPLETE:
+		dma_fence_get(unbind_fence);
+		if (vma_res->immediate_unbind) {
+			i915_vma_resource_unbind_work(&vma_res->work);
+		} else {
+			INIT_WORK(&vma_res->work, i915_vma_resource_unbind_work);
+			queue_work(system_unbound_wq, &vma_res->work);
+		}
+		break;
+	case FENCE_FREE:
+		i915_vma_resource_put(vma_res);
+		break;
+	}
+
+	return NOTIFY_DONE;
+}
+
 /**
  * i915_vma_resource_unbind - Unbind a vma resource
  * @vma_res: The vma resource to unbind.
@@ -112,10 +218,196 @@ bool i915_vma_resource_hold(struct i915_vma_resource *vma_res,
  * Return: A refcounted pointer to a dma-fence that signals when unbinding is
  * complete.
  */
-struct dma_fence *
-i915_vma_resource_unbind(struct i915_vma_resource *vma_res)
+struct dma_fence *i915_vma_resource_unbind(struct i915_vma_resource *vma_res)
 {
-	__i915_vma_resource_unhold(vma_res);
-	dma_fence_get(&vma_res->unbind_fence);
+	struct i915_address_space *vm = vma_res->vm;
+
+	/* Reference for the sw fence */
+	i915_vma_resource_get(vma_res);
+
+	/* Caller must already have a wakeref in this case. */
+	if (vma_res->needs_wakeref)
+		vma_res->wakeref = intel_runtime_pm_get_if_in_use(&vm->i915->runtime_pm);
+
+	if (atomic_read(&vma_res->chain.pending) <= 1) {
+		RB_CLEAR_NODE(&vma_res->rb);
+		vma_res->immediate_unbind = 1;
+	} else {
+		vma_res_itree_insert(vma_res, &vma_res->vm->pending_unbind);
+	}
+
+	i915_sw_fence_commit(&vma_res->chain);
+
 	return &vma_res->unbind_fence;
 }
+
+/**
+ * __i915_vma_resource_init - Initialize a vma resource.
+ * @vma_res: The vma resource to initialize
+ *
+ * Initializes the private members of a vma resource.
+ */
+void __i915_vma_resource_init(struct i915_vma_resource *vma_res)
+{
+	spin_lock_init(&vma_res->lock);
+	dma_fence_init(&vma_res->unbind_fence, &unbind_fence_ops,
+		       &vma_res->lock, 0, 0);
+	refcount_set(&vma_res->hold_count, 1);
+	i915_sw_fence_init(&vma_res->chain, i915_vma_resource_fence_notify);
+}
+
+static void
+i915_vma_resource_color_adjust_range(struct i915_address_space *vm,
+				     u64 *start,
+				     u64 *end)
+{
+	if (i915_vm_has_cache_coloring(vm)) {
+		if (*start)
+			*start -= I915_GTT_PAGE_SIZE;
+		*end += I915_GTT_PAGE_SIZE;
+	}
+}
+
+/**
+ * i915_vma_resource_bind_dep_sync - Wait for / sync all unbinds touching a
+ * certain vm range.
+ * @vm: The vm to look at.
+ * @offset: The range start.
+ * @size: The range size.
+ * @intr: Whether to wait interrubtible.
+ *
+ * The function needs to be called with the vm lock held.
+ *
+ * Return: Zero on success, -ERESTARTSYS if interrupted and @intr==true
+ */
+int i915_vma_resource_bind_dep_sync(struct i915_address_space *vm,
+				    u64 offset,
+				    u64 size,
+				    bool intr)
+{
+	struct i915_vma_resource *node;
+	u64 last = offset + size - 1;
+
+	lockdep_assert_held(&vm->mutex);
+	might_sleep();
+
+	i915_vma_resource_color_adjust_range(vm, &offset, &last);
+	node = vma_res_itree_iter_first(&vm->pending_unbind, offset, last);
+	while (node) {
+		int ret = dma_fence_wait(&node->unbind_fence, intr);
+
+		if (ret)
+			return ret;
+
+		node = vma_res_itree_iter_next(node, offset, last);
+	}
+
+	return 0;
+}
+
+/**
+ * i915_vma_resource_bind_dep_sync_all - Wait for / sync all unbinds of a vm,
+ * releasing the vm lock while waiting.
+ * @vm: The vm to look at.
+ *
+ * The function may not be called with the vm lock held.
+ * Typically this is called at vm destruction to finish any pending
+ * unbind operations. The vm mutex is released while waiting to avoid
+ * stalling kernel workqueues trying to grab the mutex.
+ */
+void i915_vma_resource_bind_dep_sync_all(struct i915_address_space *vm)
+{
+	struct i915_vma_resource *node;
+	struct dma_fence *fence;
+
+	do {
+		fence = NULL;
+		mutex_lock(&vm->mutex);
+		node = vma_res_itree_iter_first(&vm->pending_unbind, 0,
+						U64_MAX);
+		if (node)
+			fence = dma_fence_get_rcu(&node->unbind_fence);
+		mutex_unlock(&vm->mutex);
+
+		if (fence) {
+			/*
+			 * The wait makes sure the node eventually removes
+			 * itself from the tree.
+			 */
+			dma_fence_wait(fence, false);
+			dma_fence_put(fence);
+		}
+	} while (node);
+}
+
+/**
+ * i915_vma_resource_bind_dep_await - Have a struct i915_sw_fence await all
+ * pending unbinds in a certain range of a vm.
+ * @vm: The vm to look at.
+ * @sw_fence: The struct i915_sw_fence that will be awaiting the unbinds.
+ * @offset: The range start.
+ * @size: The range size.
+ * @intr: Whether to wait interrubtible.
+ * @gfp: Allocation mode for memory allocations.
+ *
+ * The function makes @sw_fence await all pending unbinds in a certain
+ * vm range before calling the complete notifier. To be able to await
+ * each individual unbind, the function needs to allocate memory using
+ * the @gpf allocation mode. If that fails, the function will instead
+ * wait for the unbind fence to signal, using @intr to judge whether to
+ * wait interruptible or not. Note that @gfp should ideally be selected so
+ * as to avoid any expensive memory allocation stalls and rather fail and
+ * synchronize itself. For now the vm mutex is required when calling this
+ * function with means that @gfp can't call into direct reclaim. In reality
+ * this means that during heavy memory pressure, we will sync in this
+ * function.
+ *
+ * Return: Zero on success, -ERESTARTSYS if interrupted and @intr==true
+ */
+int i915_vma_resource_bind_dep_await(struct i915_address_space *vm,
+				     struct i915_sw_fence *sw_fence,
+				     u64 offset,
+				     u64 size,
+				     bool intr,
+				     gfp_t gfp)
+{
+	struct i915_vma_resource *node;
+	u64 last = offset + size - 1;
+
+	lockdep_assert_held(&vm->mutex);
+	might_alloc(gfp);
+	might_sleep();
+
+	i915_vma_resource_color_adjust_range(vm, &offset, &last);
+	node = vma_res_itree_iter_first(&vm->pending_unbind, offset, last);
+	while (node) {
+		int ret;
+
+		ret = i915_sw_fence_await_dma_fence(sw_fence,
+						    &node->unbind_fence,
+						    0, gfp);
+		if (ret < 0) {
+			ret = dma_fence_wait(&node->unbind_fence, intr);
+			if (ret)
+				return ret;
+		}
+
+		node = vma_res_itree_iter_next(node, offset, last);
+	}
+
+	return 0;
+}
+
+void i915_vma_resource_module_exit(void)
+{
+	kmem_cache_destroy(slab_vma_resources);
+}
+
+int __init i915_vma_resource_module_init(void)
+{
+	slab_vma_resources = KMEM_CACHE(i915_vma_resource, SLAB_HWCACHE_ALIGN);
+	if (!slab_vma_resources)
+		return -ENOMEM;
+
+	return 0;
+}
diff --git a/drivers/gpu/drm/i915/i915_vma_resource.h b/drivers/gpu/drm/i915/i915_vma_resource.h
index 8071b40f4f9c..cb3f4e1e8457 100644
--- a/drivers/gpu/drm/i915/i915_vma_resource.h
+++ b/drivers/gpu/drm/i915/i915_vma_resource.h
@@ -10,6 +10,8 @@
 #include <linux/refcount.h>
 
 #include "i915_gem.h"
+#include "i915_sw_fence.h"
+#include "intel_runtime_pm.h"
 
 struct i915_page_sizes {
 	/**
@@ -38,6 +40,13 @@ struct i915_page_sizes {
  * @hold_count: Number of holders blocking the fence from finishing.
  * The vma itself is keeping a hold, which is released when unbind
  * is scheduled.
+ * @work: Work struct for deferred unbind work.
+ * @chain: Pointer to struct i915_sw_fence used to await dependencies.
+ * @rb: Rb node for the vm's pending unbind interval tree.
+ * @__subtree_last: Interval tree private member.
+ * @vm: non-refcounted pointer to the vm. This is for internal use only and
+ * this member is cleared after vm_resource unbind.
+ * @ops: Pointer to the backend i915_vma_ops.
  * @private: Bind backend private info.
  * @start: Offset into the address space of bind range start.
  * @node_size: Size of the allocated range manager node.
@@ -45,6 +54,8 @@ struct i915_page_sizes {
  * @page_sizes_gtt: Resulting page sizes from the bind operation.
  * @bound_flags: Flags indicating binding status.
  * @allocated: Backend private data. TODO: Should move into @private.
+ * @immediate_unbind: Unbind can be done immediately and don't need to be
+ * deferred to a work item awaiting unsignaled fences.
  *
  * The lifetime of a struct i915_vma_resource is from a binding request to
  * the actual possible asynchronous unbind has completed.
@@ -54,6 +65,12 @@ struct i915_vma_resource {
 	/* See above for description of the lock. */
 	spinlock_t lock;
 	refcount_t hold_count;
+	struct work_struct work;
+	struct i915_sw_fence chain;
+	struct rb_node rb;
+	u64 __subtree_last;
+	struct i915_address_space *vm;
+	intel_wakeref_t wakeref;
 
 	/**
 	 * struct i915_vma_bindinfo - Information needed for async bind
@@ -73,13 +90,17 @@ struct i915_vma_resource {
 		bool lmem:1;
 	} bi;
 
+	const struct i915_vma_ops *ops;
 	void *private;
 	u64 start;
 	u64 node_size;
 	u64 vma_size;
 	u32 page_sizes_gtt;
+
 	u32 bound_flags;
 	bool allocated:1;
+	bool immediate_unbind:1;
+	bool needs_wakeref:1;
 };
 
 bool i915_vma_resource_hold(struct i915_vma_resource *vma_res,
@@ -90,6 +111,8 @@ void i915_vma_resource_unhold(struct i915_vma_resource *vma_res,
 
 struct i915_vma_resource *i915_vma_resource_alloc(void);
 
+void i915_vma_resource_free(struct i915_vma_resource *vma_res);
+
 struct dma_fence *i915_vma_resource_unbind(struct i915_vma_resource *vma_res);
 
 void __i915_vma_resource_init(struct i915_vma_resource *vma_res);
@@ -119,10 +142,12 @@ static inline void i915_vma_resource_put(struct i915_vma_resource *vma_res)
 /**
  * i915_vma_resource_init - Initialize a vma resource.
  * @vma_res: The vma resource to initialize
+ * @vm: Pointer to the vm.
  * @pages: The pages sg-table.
  * @page_sizes: Page sizes of the pages.
  * @readonly: Whether the vma should be bound read-only.
  * @lmem: Whether the vma points to lmem.
+ * @ops: The backend ops.
  * @private: Bind backend private info.
  * @start: Offset into the address space of bind range start.
  * @node_size: Size of the allocated range manager node.
@@ -134,20 +159,24 @@ static inline void i915_vma_resource_put(struct i915_vma_resource *vma_res)
  * allocation is not allowed.
  */
 static inline void i915_vma_resource_init(struct i915_vma_resource *vma_res,
+					  struct i915_address_space *vm,
 					  struct sg_table *pages,
 					  const struct i915_page_sizes *page_sizes,
 					  bool readonly,
 					  bool lmem,
+					  const struct i915_vma_ops *ops,
 					  void *private,
 					  u64 start,
 					  u64 node_size,
 					  u64 size)
 {
 	__i915_vma_resource_init(vma_res);
+	vma_res->vm = vm;
 	vma_res->bi.pages = pages;
 	vma_res->bi.page_sizes = *page_sizes;
 	vma_res->bi.readonly = readonly;
 	vma_res->bi.lmem = lmem;
+	vma_res->ops = ops;
 	vma_res->private = private;
 	vma_res->start = start;
 	vma_res->node_size = node_size;
@@ -157,6 +186,25 @@ static inline void i915_vma_resource_init(struct i915_vma_resource *vma_res,
 static inline void i915_vma_resource_fini(struct i915_vma_resource *vma_res)
 {
 	GEM_BUG_ON(refcount_read(&vma_res->hold_count) != 1);
+	i915_sw_fence_fini(&vma_res->chain);
 }
 
+int i915_vma_resource_bind_dep_sync(struct i915_address_space *vm,
+				    u64 first,
+				    u64 last,
+				    bool intr);
+
+int i915_vma_resource_bind_dep_await(struct i915_address_space *vm,
+				     struct i915_sw_fence *sw_fence,
+				     u64 first,
+				     u64 last,
+				     bool intr,
+				     gfp_t gfp);
+
+void i915_vma_resource_bind_dep_sync_all(struct i915_address_space *vm);
+
+void i915_vma_resource_module_exit(void);
+
+int i915_vma_resource_module_init(void);
+
 #endif