From d935cc61d466f6cc7514032835f4fc379cb7e2ca Mon Sep 17 00:00:00 2001
From: Daniel Vetter
Date: Thu, 16 Sep 2010 15:13:11 +0200
Subject: drm_mm: add support for range-restricted fair-lru scans

Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 include/drm/drm_mm.h | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'include/drm')

diff --git a/include/drm/drm_mm.h b/include/drm/drm_mm.h
index bf01531193d5..e39177778601 100644
--- a/include/drm/drm_mm.h
+++ b/include/drm/drm_mm.h
@@ -62,11 +62,14 @@ struct drm_mm {
 	struct list_head unused_nodes;
 	int num_unused;
 	spinlock_t unused_lock;
+	unsigned int scan_check_range : 1;
 	unsigned scan_alignment;
 	unsigned long scan_size;
 	unsigned long scan_hit_start;
 	unsigned scan_hit_size;
 	unsigned scanned_blocks;
+	unsigned long scan_start;
+	unsigned long scan_end;
 };
 
 /*
@@ -145,6 +148,10 @@ static inline struct drm_mm *drm_get_mm(struct drm_mm_node *block)
 
 void drm_mm_init_scan(struct drm_mm *mm, unsigned long size,
 		      unsigned alignment);
+void drm_mm_init_scan_with_range(struct drm_mm *mm, unsigned long size,
+				 unsigned alignment,
+				 unsigned long start,
+				 unsigned long end);
 int drm_mm_scan_add_block(struct drm_mm_node *node);
 int drm_mm_scan_remove_block(struct drm_mm_node *node);
 
-- 
cgit v1.2.3


From a00b10c360b35d6431a94cbf130a4e162870d661 Mon Sep 17 00:00:00 2001
From: Chris Wilson
Date: Fri, 24 Sep 2010 21:15:47 +0100
Subject: drm/i915: Only enforce fence limits inside the GTT.

So long as we adhere to the fence registers rules for alignment and no
overlaps (including with unfenced accesses to linear memory) and account
for the tiled access in our size allocation, we do not have to allocate
the full fenced region for the object. This allows us to fight the bloat
tiling imposed on pre-i965 chipsets and frees up RAM for real use. [Inside
the GTT we still suffer the additional alignment constraints, so it doesn't
magic allow us to render larger scenes without stalls -- we need the
expanded GTT and fence pipelining to overcome those...]

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_debugfs.c     |   3 +-
 drivers/gpu/drm/i915/i915_dma.c         |   3 +
 drivers/gpu/drm/i915/i915_drv.h         |   8 +-
 drivers/gpu/drm/i915/i915_gem.c         | 290 +++++++++++++++++---------------
 drivers/gpu/drm/i915/i915_gem_tiling.c  |  32 ++--
 drivers/gpu/drm/i915/intel_display.c    |   7 +-
 drivers/gpu/drm/i915/intel_overlay.c    |   4 +-
 drivers/gpu/drm/i915/intel_ringbuffer.c |   4 +-
 include/drm/i915_drm.h                  |   1 +
 9 files changed, 197 insertions(+), 155 deletions(-)

(limited to 'include/drm')

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 4fc1e05b769f..ba2af4e046ed 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -130,7 +130,8 @@ describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj)
 	if (obj->fence_reg != I915_FENCE_REG_NONE)
 		seq_printf(m, " (fence: %d)", obj->fence_reg);
 	if (obj->gtt_space != NULL)
-		seq_printf(m, " (gtt_offset: %08x)", obj->gtt_offset);
+		seq_printf(m, " (gtt offset: %08x, size: %08x)",
+			   obj->gtt_offset, (unsigned int)obj->gtt_space->size);
 	if (obj->pin_mappable || obj->fault_mappable)
 		seq_printf(m, " (mappable)");
 	if (obj->ring != NULL)
diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
index dddca007912a..00d8fb3e989f 100644
--- a/drivers/gpu/drm/i915/i915_dma.c
+++ b/drivers/gpu/drm/i915/i915_dma.c
@@ -770,6 +770,9 @@ static int i915_getparam(struct drm_device *dev, void *data,
 	case I915_PARAM_HAS_BLT:
 		value = HAS_BLT(dev);
 		break;
+	case I915_PARAM_HAS_RELAXED_FENCING:
+		value = 1;
+		break;
 	default:
 		DRM_DEBUG_DRIVER("Unknown parameter %d\n",
 				 param->param);
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 3df8a624ddc9..7aa7f8abf892 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -746,6 +746,8 @@ struct drm_i915_gem_object {
 	 * Advice: are the backing pages purgeable?
 	 */
 	unsigned int madv : 2;
+	unsigned int fenceable : 1;
+	unsigned int mappable : 1;
 
 	/**
 	 * Current tiling mode for the object.
@@ -1005,7 +1007,7 @@ struct drm_gem_object * i915_gem_alloc_object(struct drm_device *dev,
 					      size_t size);
 void i915_gem_free_object(struct drm_gem_object *obj);
 int i915_gem_object_pin(struct drm_gem_object *obj, uint32_t alignment,
-			bool mappable);
+			bool mappable, bool need_fence);
 void i915_gem_object_unpin(struct drm_gem_object *obj);
 int i915_gem_object_unbind(struct drm_gem_object *obj);
 void i915_gem_release_mmap(struct drm_gem_object *obj);
@@ -1068,10 +1070,6 @@ int i915_gem_evict_inactive(struct drm_device *dev);
 void i915_gem_detect_bit_6_swizzle(struct drm_device *dev);
 void i915_gem_object_do_bit_17_swizzle(struct drm_gem_object *obj);
 void i915_gem_object_save_bit_17_swizzle(struct drm_gem_object *obj);
-bool i915_tiling_ok(struct drm_device *dev, int stride, int size,
-		    int tiling_mode);
-bool i915_gem_object_fence_offset_ok(struct drm_gem_object *obj,
-				     int tiling_mode);
 
 /* i915_gem_debug.c */
 void i915_gem_dump_object(struct drm_gem_object *obj, int len,
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 08f57aedaf51..07ad1e354084 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -36,7 +36,8 @@
 #include <linux/pci.h>
 #include <linux/intel-gtt.h>
 
-static uint32_t i915_gem_get_gtt_alignment(struct drm_gem_object *obj);
+static uint32_t i915_gem_get_gtt_alignment(struct drm_i915_gem_object *obj_priv);
+static uint32_t i915_gem_get_gtt_size(struct drm_i915_gem_object *obj_priv);
 
 static int i915_gem_object_flush_gpu_write_domain(struct drm_gem_object *obj,
 						  bool pipelined);
@@ -51,7 +52,9 @@ static void i915_gem_object_set_to_full_cpu_read_domain(struct drm_gem_object *o
 static int i915_gem_object_wait_rendering(struct drm_gem_object *obj,
 					  bool interruptible);
 static int i915_gem_object_bind_to_gtt(struct drm_gem_object *obj,
-				       unsigned alignment, bool mappable);
+				       unsigned alignment,
+				       bool mappable,
+				       bool need_fence);
 static void i915_gem_clear_fence_reg(struct drm_gem_object *obj);
 static int i915_gem_phys_pwrite(struct drm_device *dev, struct drm_gem_object *obj,
 				struct drm_i915_gem_pwrite *args,
@@ -79,30 +82,26 @@ static void i915_gem_info_remove_obj(struct drm_i915_private *dev_priv,
 }
 
 static void i915_gem_info_add_gtt(struct drm_i915_private *dev_priv,
-				  struct drm_gem_object *obj)
+				  struct drm_i915_gem_object *obj)
 {
-	struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
 	dev_priv->mm.gtt_count++;
-	dev_priv->mm.gtt_memory += obj->size;
-	if (obj_priv->gtt_offset < dev_priv->mm.gtt_mappable_end) {
+	dev_priv->mm.gtt_memory += obj->gtt_space->size;
+	if (obj->gtt_offset < dev_priv->mm.gtt_mappable_end) {
 		dev_priv->mm.mappable_gtt_used +=
-			min_t(size_t, obj->size,
-			      dev_priv->mm.gtt_mappable_end
-					- obj_priv->gtt_offset);
+			min_t(size_t, obj->gtt_space->size,
+			      dev_priv->mm.gtt_mappable_end - obj->gtt_offset);
 	}
 }
 
 static void i915_gem_info_remove_gtt(struct drm_i915_private *dev_priv,
-				     struct drm_gem_object *obj)
+				     struct drm_i915_gem_object *obj)
 {
-	struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
 	dev_priv->mm.gtt_count--;
-	dev_priv->mm.gtt_memory -= obj->size;
-	if (obj_priv->gtt_offset < dev_priv->mm.gtt_mappable_end) {
+	dev_priv->mm.gtt_memory -= obj->gtt_space->size;
+	if (obj->gtt_offset < dev_priv->mm.gtt_mappable_end) {
 		dev_priv->mm.mappable_gtt_used -=
-			min_t(size_t, obj->size,
-			      dev_priv->mm.gtt_mappable_end
-					- obj_priv->gtt_offset);
+			min_t(size_t, obj->gtt_space->size,
+			      dev_priv->mm.gtt_mappable_end - obj->gtt_offset);
 	}
 }
 
@@ -113,47 +112,43 @@ static void i915_gem_info_remove_gtt(struct drm_i915_private *dev_priv,
  */
 static void
 i915_gem_info_update_mappable(struct drm_i915_private *dev_priv,
-			      struct drm_gem_object *obj,
+			      struct drm_i915_gem_object *obj,
 			      bool mappable)
 {
-	struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
-
 	if (mappable) {
-		if (obj_priv->pin_mappable && obj_priv->fault_mappable)
+		if (obj->pin_mappable && obj->fault_mappable)
 			/* Combined state was already mappable. */
 			return;
 		dev_priv->mm.gtt_mappable_count++;
-		dev_priv->mm.gtt_mappable_memory += obj->size;
+		dev_priv->mm.gtt_mappable_memory += obj->gtt_space->size;
 	} else {
-		if (obj_priv->pin_mappable || obj_priv->fault_mappable)
+		if (obj->pin_mappable || obj->fault_mappable)
 			/* Combined state still mappable. */
 			return;
 		dev_priv->mm.gtt_mappable_count--;
-		dev_priv->mm.gtt_mappable_memory -= obj->size;
+		dev_priv->mm.gtt_mappable_memory -= obj->gtt_space->size;
 	}
 }
 
 static void i915_gem_info_add_pin(struct drm_i915_private *dev_priv,
-				  struct drm_gem_object *obj,
+				  struct drm_i915_gem_object *obj,
 				  bool mappable)
 {
-	struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
 	dev_priv->mm.pin_count++;
-	dev_priv->mm.pin_memory += obj->size;
+	dev_priv->mm.pin_memory += obj->gtt_space->size;
 	if (mappable) {
-		obj_priv->pin_mappable = true;
+		obj->pin_mappable = true;
 		i915_gem_info_update_mappable(dev_priv, obj, true);
 	}
 }
 
 static void i915_gem_info_remove_pin(struct drm_i915_private *dev_priv,
-				     struct drm_gem_object *obj)
+				     struct drm_i915_gem_object *obj)
 {
-	struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
 	dev_priv->mm.pin_count--;
-	dev_priv->mm.pin_memory -= obj->size;
-	if (obj_priv->pin_mappable) {
-		obj_priv->pin_mappable = false;
+	dev_priv->mm.pin_memory -= obj->gtt_space->size;
+	if (obj->pin_mappable) {
+		obj->pin_mappable = false;
 		i915_gem_info_update_mappable(dev_priv, obj, false);
 	}
 }
@@ -309,16 +304,6 @@ i915_gem_create_ioctl(struct drm_device *dev, void *data,
 	return 0;
 }
 
-static bool
-i915_gem_object_cpu_accessible(struct drm_i915_gem_object *obj)
-{
-	struct drm_device *dev = obj->base.dev;
-	drm_i915_private_t *dev_priv = dev->dev_private;
-
-	return obj->gtt_space == NULL ||
-		obj->gtt_offset + obj->base.size <= dev_priv->mm.gtt_mappable_end;
-}
-
 static int i915_gem_object_needs_bit17_swizzle(struct drm_gem_object *obj)
 {
 	drm_i915_private_t *dev_priv = obj->dev->dev_private;
@@ -1083,7 +1068,7 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
 	else if (obj_priv->tiling_mode == I915_TILING_NONE &&
 		 obj_priv->gtt_space &&
 		 obj->write_domain != I915_GEM_DOMAIN_CPU) {
-		ret = i915_gem_object_pin(obj, 0, true);
+		ret = i915_gem_object_pin(obj, 0, true, false);
 		if (ret)
 			goto out;
 
@@ -1307,11 +1292,19 @@ int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 	/* Now bind it into the GTT if needed */
 	mutex_lock(&dev->struct_mutex);
 	BUG_ON(obj_priv->pin_count && !obj_priv->pin_mappable);
-	if (!i915_gem_object_cpu_accessible(obj_priv))
-		i915_gem_object_unbind(obj);
+
+	if (obj_priv->gtt_space) {
+		if (!obj_priv->mappable ||
+		    (obj_priv->tiling_mode && !obj_priv->fenceable)) {
+			ret = i915_gem_object_unbind(obj);
+			if (ret)
+				goto unlock;
+		}
+	}
 
 	if (!obj_priv->gtt_space) {
-		ret = i915_gem_object_bind_to_gtt(obj, 0, true);
+		ret = i915_gem_object_bind_to_gtt(obj, 0,
+						  true, obj_priv->tiling_mode);
 		if (ret)
 			goto unlock;
 	}
@@ -1322,7 +1315,7 @@ int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 
 	if (!obj_priv->fault_mappable) {
 		obj_priv->fault_mappable = true;
-		i915_gem_info_update_mappable(dev_priv, obj, true);
+		i915_gem_info_update_mappable(dev_priv, obj_priv, true);
 	}
 
 	/* Need a new fence register? */
@@ -1448,7 +1441,7 @@ i915_gem_release_mmap(struct drm_gem_object *obj)
 
 	if (obj_priv->fault_mappable) {
 		obj_priv->fault_mappable = false;
-		i915_gem_info_update_mappable(dev_priv, obj, false);
+		i915_gem_info_update_mappable(dev_priv, obj_priv, false);
 	}
 }
 
@@ -1473,32 +1466,51 @@ i915_gem_free_mmap_offset(struct drm_gem_object *obj)
  * potential fence register mapping if needed.
  */
 static uint32_t
-i915_gem_get_gtt_alignment(struct drm_gem_object *obj)
+i915_gem_get_gtt_alignment(struct drm_i915_gem_object *obj_priv)
 {
-	struct drm_device *dev = obj->dev;
-	struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
-	int start, i;
+	struct drm_device *dev = obj_priv->base.dev;
 
 	/*
 	 * Minimum alignment is 4k (GTT page size), but might be greater
 	 * if a fence register is needed for the object.
 	 */
-	if (INTEL_INFO(dev)->gen >= 4 || obj_priv->tiling_mode == I915_TILING_NONE)
+	if (INTEL_INFO(dev)->gen >= 4 ||
+	    obj_priv->tiling_mode == I915_TILING_NONE)
 		return 4096;
 
+	/*
+	 * Previous chips need to be aligned to the size of the smallest
+	 * fence register that can contain the object.
+	 */
+	return i915_gem_get_gtt_size(obj_priv);
+}
+
+static uint32_t
+i915_gem_get_gtt_size(struct drm_i915_gem_object *obj_priv)
+{
+	struct drm_device *dev = obj_priv->base.dev;
+	uint32_t size;
+
+	/*
+	 * Minimum alignment is 4k (GTT page size), but might be greater
+	 * if a fence register is needed for the object.
+	 */
+	if (INTEL_INFO(dev)->gen >= 4)
+		return obj_priv->base.size;
+
 	/*
 	 * Previous chips need to be aligned to the size of the smallest
 	 * fence register that can contain the object.
 	 */
 	if (INTEL_INFO(dev)->gen == 3)
-		start = 1024*1024;
+		size = 1024*1024;
 	else
-		start = 512*1024;
+		size = 512*1024;
 
-	for (i = start; i < obj->size; i <<= 1)
-		;
+	while (size < obj_priv->base.size)
+		size <<= 1;
 
-	return i;
+	return size;
 }
 
 /**
@@ -2253,8 +2265,10 @@ i915_gem_object_unbind(struct drm_gem_object *obj)
 
 	i915_gem_object_put_pages_gtt(obj);
 
-	i915_gem_info_remove_gtt(dev_priv, obj);
+	i915_gem_info_remove_gtt(dev_priv, obj_priv);
 	list_del_init(&obj_priv->mm_list);
+	obj_priv->fenceable = true;
+	obj_priv->mappable = true;
 
 	drm_mm_put_block(obj_priv->gtt_space);
 	obj_priv->gtt_space = NULL;
@@ -2311,16 +2325,16 @@ i915_gpu_idle(struct drm_device *dev)
 	return 0;
 }
 
-static void sandybridge_write_fence_reg(struct drm_i915_fence_reg *reg)
+static void sandybridge_write_fence_reg(struct drm_gem_object *obj)
 {
-	struct drm_gem_object *obj = reg->obj;
 	struct drm_device *dev = obj->dev;
 	drm_i915_private_t *dev_priv = dev->dev_private;
 	struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
+	u32 size = i915_gem_get_gtt_size(obj_priv);
 	int regnum = obj_priv->fence_reg;
 	uint64_t val;
 
-	val = (uint64_t)((obj_priv->gtt_offset + obj->size - 4096) &
+	val = (uint64_t)((obj_priv->gtt_offset + size - 4096) &
 		    0xfffff000) << 32;
 	val |= obj_priv->gtt_offset & 0xfffff000;
 	val |= (uint64_t)((obj_priv->stride / 128) - 1) <<
@@ -2333,16 +2347,16 @@ static void sandybridge_write_fence_reg(struct drm_i915_fence_reg *reg)
 	I915_WRITE64(FENCE_REG_SANDYBRIDGE_0 + (regnum * 8), val);
 }
 
-static void i965_write_fence_reg(struct drm_i915_fence_reg *reg)
+static void i965_write_fence_reg(struct drm_gem_object *obj)
 {
-	struct drm_gem_object *obj = reg->obj;
 	struct drm_device *dev = obj->dev;
 	drm_i915_private_t *dev_priv = dev->dev_private;
 	struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
+	u32 size = i915_gem_get_gtt_size(obj_priv);
 	int regnum = obj_priv->fence_reg;
 	uint64_t val;
 
-	val = (uint64_t)((obj_priv->gtt_offset + obj->size - 4096) &
+	val = (uint64_t)((obj_priv->gtt_offset + size - 4096) &
 		    0xfffff000) << 32;
 	val |= obj_priv->gtt_offset & 0xfffff000;
 	val |= ((obj_priv->stride / 128) - 1) << I965_FENCE_PITCH_SHIFT;
@@ -2353,21 +2367,20 @@ static void i965_write_fence_reg(struct drm_i915_fence_reg *reg)
 	I915_WRITE64(FENCE_REG_965_0 + (regnum * 8), val);
 }
 
-static void i915_write_fence_reg(struct drm_i915_fence_reg *reg)
+static void i915_write_fence_reg(struct drm_gem_object *obj)
 {
-	struct drm_gem_object *obj = reg->obj;
 	struct drm_device *dev = obj->dev;
 	drm_i915_private_t *dev_priv = dev->dev_private;
 	struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
-	int regnum = obj_priv->fence_reg;
+	u32 size = i915_gem_get_gtt_size(obj_priv);
+	uint32_t fence_reg, val, pitch_val;
 	int tile_width;
-	uint32_t fence_reg, val;
-	uint32_t pitch_val;
 
 	if ((obj_priv->gtt_offset & ~I915_FENCE_START_MASK) ||
-	    (obj_priv->gtt_offset & (obj->size - 1))) {
-		WARN(1, "%s: object 0x%08x not 1M or size (0x%zx) aligned\n",
-		     __func__, obj_priv->gtt_offset, obj->size);
+	    (obj_priv->gtt_offset & (size - 1))) {
+		WARN(1, "%s: object 0x%08x [fenceable? %d] not 1M or size (0x%08x) aligned [gtt_space offset=%lx, size=%lx]\n",
+		     __func__, obj_priv->gtt_offset, obj_priv->fenceable, size,
+		     obj_priv->gtt_space->start, obj_priv->gtt_space->size);
 		return;
 	}
 
@@ -2390,23 +2403,24 @@ static void i915_write_fence_reg(struct drm_i915_fence_reg *reg)
 	val = obj_priv->gtt_offset;
 	if (obj_priv->tiling_mode == I915_TILING_Y)
 		val |= 1 << I830_FENCE_TILING_Y_SHIFT;
-	val |= I915_FENCE_SIZE_BITS(obj->size);
+	val |= I915_FENCE_SIZE_BITS(size);
 	val |= pitch_val << I830_FENCE_PITCH_SHIFT;
 	val |= I830_FENCE_REG_VALID;
 
-	if (regnum < 8)
-		fence_reg = FENCE_REG_830_0 + (regnum * 4);
+	fence_reg = obj_priv->fence_reg;
+	if (fence_reg < 8)
+		fence_reg = FENCE_REG_830_0 + fence_reg * 4;
 	else
-		fence_reg = FENCE_REG_945_8 + ((regnum - 8) * 4);
+		fence_reg = FENCE_REG_945_8 + (fence_reg - 8) * 4;
 	I915_WRITE(fence_reg, val);
 }
 
-static void i830_write_fence_reg(struct drm_i915_fence_reg *reg)
+static void i830_write_fence_reg(struct drm_gem_object *obj)
 {
-	struct drm_gem_object *obj = reg->obj;
 	struct drm_device *dev = obj->dev;
 	drm_i915_private_t *dev_priv = dev->dev_private;
 	struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
+	u32 size = i915_gem_get_gtt_size(obj_priv);
 	int regnum = obj_priv->fence_reg;
 	uint32_t val;
 	uint32_t pitch_val;
@@ -2426,7 +2440,7 @@ static void i830_write_fence_reg(struct drm_i915_fence_reg *reg)
 	val = obj_priv->gtt_offset;
 	if (obj_priv->tiling_mode == I915_TILING_Y)
 		val |= 1 << I830_FENCE_TILING_Y_SHIFT;
-	fence_size_bits = I830_FENCE_SIZE_BITS(obj->size);
+	fence_size_bits = I830_FENCE_SIZE_BITS(size);
 	WARN_ON(fence_size_bits & ~0x00000f00);
 	val |= fence_size_bits;
 	val |= pitch_val << I830_FENCE_PITCH_SHIFT;
@@ -2438,10 +2452,9 @@ static void i830_write_fence_reg(struct drm_i915_fence_reg *reg)
 static int i915_find_fence_reg(struct drm_device *dev,
 			       bool interruptible)
 {
-	struct drm_i915_fence_reg *reg = NULL;
-	struct drm_i915_gem_object *obj_priv = NULL;
 	struct drm_i915_private *dev_priv = dev->dev_private;
-	struct drm_gem_object *obj = NULL;
+	struct drm_i915_fence_reg *reg;
+	struct drm_i915_gem_object *obj_priv = NULL;
 	int i, avail, ret;
 
 	/* First try to find a free reg */
@@ -2460,33 +2473,31 @@ static int i915_find_fence_reg(struct drm_device *dev,
 		return -ENOSPC;
 
 	/* None available, try to steal one or wait for a user to finish */
-	i = I915_FENCE_REG_NONE;
+	avail = I915_FENCE_REG_NONE;
 	list_for_each_entry(reg, &dev_priv->mm.fence_list,
 			    lru_list) {
-		obj = reg->obj;
-		obj_priv = to_intel_bo(obj);
-
+		obj_priv = to_intel_bo(reg->obj);
 		if (obj_priv->pin_count)
 			continue;
 
 		/* found one! */
-		i = obj_priv->fence_reg;
+		avail = obj_priv->fence_reg;
 		break;
 	}
 
-	BUG_ON(i == I915_FENCE_REG_NONE);
+	BUG_ON(avail == I915_FENCE_REG_NONE);
 
 	/* We only have a reference on obj from the active list. put_fence_reg
 	 * might drop that one, causing a use-after-free in it. So hold a
 	 * private reference to obj like the other callers of put_fence_reg
 	 * (set_tiling ioctl) do. */
-	drm_gem_object_reference(obj);
-	ret = i915_gem_object_put_fence_reg(obj, interruptible);
-	drm_gem_object_unreference(obj);
+	drm_gem_object_reference(&obj_priv->base);
+	ret = i915_gem_object_put_fence_reg(&obj_priv->base, interruptible);
+	drm_gem_object_unreference(&obj_priv->base);
 	if (ret != 0)
 		return ret;
 
-	return i;
+	return avail;
 }
 
 /**
@@ -2551,22 +2562,23 @@ i915_gem_object_get_fence_reg(struct drm_gem_object *obj,
 
 	switch (INTEL_INFO(dev)->gen) {
 	case 6:
-		sandybridge_write_fence_reg(reg);
+		sandybridge_write_fence_reg(obj);
 		break;
 	case 5:
 	case 4:
-		i965_write_fence_reg(reg);
+		i965_write_fence_reg(obj);
 		break;
 	case 3:
-		i915_write_fence_reg(reg);
+		i915_write_fence_reg(obj);
 		break;
 	case 2:
-		i830_write_fence_reg(reg);
+		i830_write_fence_reg(obj);
 		break;
 	}
 
-	trace_i915_gem_object_get_fence(obj, obj_priv->fence_reg,
-			obj_priv->tiling_mode);
+	trace_i915_gem_object_get_fence(obj,
+					obj_priv->fence_reg,
+					obj_priv->tiling_mode);
 
 	return 0;
 }
@@ -2671,13 +2683,15 @@ i915_gem_object_put_fence_reg(struct drm_gem_object *obj,
 static int
 i915_gem_object_bind_to_gtt(struct drm_gem_object *obj,
 			    unsigned alignment,
-			    bool mappable)
+			    bool mappable,
+			    bool need_fence)
 {
 	struct drm_device *dev = obj->dev;
 	drm_i915_private_t *dev_priv = dev->dev_private;
 	struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
 	struct drm_mm_node *free_space;
-	gfp_t gfpmask =  __GFP_NORETRY | __GFP_NOWARN;
+	gfp_t gfpmask = __GFP_NORETRY | __GFP_NOWARN;
+	u32 size, fence_size, fence_alignment;
 	int ret;
 
 	if (obj_priv->madv != I915_MADV_WILLNEED) {
@@ -2685,13 +2699,18 @@ i915_gem_object_bind_to_gtt(struct drm_gem_object *obj,
 		return -EINVAL;
 	}
 
+	fence_size = i915_gem_get_gtt_size(obj_priv);
+	fence_alignment = i915_gem_get_gtt_alignment(obj_priv);
+
 	if (alignment == 0)
-		alignment = i915_gem_get_gtt_alignment(obj);
-	if (alignment & (i915_gem_get_gtt_alignment(obj) - 1)) {
+		alignment = need_fence ? fence_alignment : 4096;
+	if (need_fence && alignment & (fence_alignment - 1)) {
 		DRM_ERROR("Invalid object alignment requested %u\n", alignment);
 		return -EINVAL;
 	}
 
+	size = need_fence ? fence_size : obj->size;
+
 	/* If the object is bigger than the entire aperture, reject it early
 	 * before evicting everything in a vain attempt to find space.
 	 */
@@ -2705,32 +2724,29 @@ i915_gem_object_bind_to_gtt(struct drm_gem_object *obj,
 	if (mappable)
 		free_space =
 			drm_mm_search_free_in_range(&dev_priv->mm.gtt_space,
-						    obj->size, alignment, 0,
+						    size, alignment, 0,
 						    dev_priv->mm.gtt_mappable_end,
 						    0);
 	else
 		free_space = drm_mm_search_free(&dev_priv->mm.gtt_space,
-						obj->size, alignment, 0);
+						size, alignment, 0);
 
 	if (free_space != NULL) {
 		if (mappable)
 			obj_priv->gtt_space =
 				drm_mm_get_block_range_generic(free_space,
-							       obj->size,
-							       alignment, 0,
+							       size, alignment, 0,
 							       dev_priv->mm.gtt_mappable_end,
 							       0);
 		else
 			obj_priv->gtt_space =
-				drm_mm_get_block(free_space, obj->size,
-						 alignment);
+				drm_mm_get_block(free_space, size, alignment);
 	}
 	if (obj_priv->gtt_space == NULL) {
 		/* If the gtt is empty and we're still having trouble
 		 * fitting our object in, we're out of memory.
 		 */
-		ret = i915_gem_evict_something(dev, obj->size, alignment,
-					       mappable);
+		ret = i915_gem_evict_something(dev, size, alignment, mappable);
 		if (ret)
 			return ret;
 
@@ -2744,7 +2760,7 @@ i915_gem_object_bind_to_gtt(struct drm_gem_object *obj,
 
 		if (ret == -ENOMEM) {
 			/* first try to clear up some space from the GTT */
-			ret = i915_gem_evict_something(dev, obj->size,
+			ret = i915_gem_evict_something(dev, size,
 						       alignment, mappable);
 			if (ret) {
 				/* now try to shrink everyone else */
@@ -2775,8 +2791,8 @@ i915_gem_object_bind_to_gtt(struct drm_gem_object *obj,
 		drm_mm_put_block(obj_priv->gtt_space);
 		obj_priv->gtt_space = NULL;
 
-		ret = i915_gem_evict_something(dev, obj->size, alignment,
-					       mappable);
+		ret = i915_gem_evict_something(dev, size,
+					       alignment, mappable);
 		if (ret)
 			return ret;
 
@@ -2787,7 +2803,7 @@ i915_gem_object_bind_to_gtt(struct drm_gem_object *obj,
 
 	/* keep track of bounds object by adding it to the inactive list */
 	list_add_tail(&obj_priv->mm_list, &dev_priv->mm.inactive_list);
-	i915_gem_info_add_gtt(dev_priv, obj);
+	i915_gem_info_add_gtt(dev_priv, obj_priv);
 
 	/* Assert that the object is not currently in any GPU domain. As it
 	 * wasn't in the GTT, there shouldn't be any way it could have been in
@@ -2798,6 +2814,13 @@ i915_gem_object_bind_to_gtt(struct drm_gem_object *obj,
 
 	trace_i915_gem_object_bind(obj, obj_priv->gtt_offset, mappable);
 
+	obj_priv->fenceable =
+		obj_priv->gtt_space->size == fence_size &&
+		(obj_priv->gtt_space->start & (fence_alignment -1)) == 0;
+
+	obj_priv->mappable =
+		obj_priv->gtt_offset + obj->size <= dev_priv->mm.gtt_mappable_end;
+
 	return 0;
 }
 
@@ -3516,9 +3539,8 @@ i915_gem_execbuffer_pin(struct drm_device *dev,
 				entry->relocation_count ? true : need_fence;
 
 			/* Check fence reg constraints and rebind if necessary */
-			if (need_fence &&
-			    !i915_gem_object_fence_offset_ok(&obj->base,
-							     obj->tiling_mode)) {
+			if ((need_fence && !obj->fenceable) ||
+			    (need_mappable && !obj->mappable)) {
 				ret = i915_gem_object_unbind(&obj->base);
 				if (ret)
 					break;
@@ -3526,7 +3548,8 @@ i915_gem_execbuffer_pin(struct drm_device *dev,
 
 			ret = i915_gem_object_pin(&obj->base,
 						  entry->alignment,
-						  need_mappable);
+						  need_mappable,
+						  need_fence);
 			if (ret)
 				break;
 
@@ -4097,7 +4120,7 @@ i915_gem_execbuffer2(struct drm_device *dev, void *data,
 
 int
 i915_gem_object_pin(struct drm_gem_object *obj, uint32_t alignment,
-		    bool mappable)
+		    bool mappable, bool need_fence)
 {
 	struct drm_device *dev = obj->dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
@@ -4108,14 +4131,15 @@ i915_gem_object_pin(struct drm_gem_object *obj, uint32_t alignment,
 	WARN_ON(i915_verify_lists(dev));
 
 	if (obj_priv->gtt_space != NULL) {
-		if (alignment == 0)
-			alignment = i915_gem_get_gtt_alignment(obj);
-		if (obj_priv->gtt_offset & (alignment - 1) ||
-		    (mappable && !i915_gem_object_cpu_accessible(obj_priv))) {
+		if ((alignment && obj_priv->gtt_offset & (alignment - 1)) ||
+		    (need_fence && !obj_priv->fenceable) ||
+		    (mappable && !obj_priv->mappable)) {
 			WARN(obj_priv->pin_count,
 			     "bo is already pinned with incorrect alignment:"
-			     " offset=%x, req.alignment=%x\n",
-			     obj_priv->gtt_offset, alignment);
+			     " offset=%x, req.alignment=%x, need_fence=%d, fenceable=%d, mappable=%d, cpu_accessible=%d\n",
+			     obj_priv->gtt_offset, alignment,
+			     need_fence, obj_priv->fenceable,
+			     mappable, obj_priv->mappable);
 			ret = i915_gem_object_unbind(obj);
 			if (ret)
 				return ret;
@@ -4123,13 +4147,14 @@ i915_gem_object_pin(struct drm_gem_object *obj, uint32_t alignment,
 	}
 
 	if (obj_priv->gtt_space == NULL) {
-		ret = i915_gem_object_bind_to_gtt(obj, alignment, mappable);
+		ret = i915_gem_object_bind_to_gtt(obj, alignment,
+						  mappable, need_fence);
 		if (ret)
 			return ret;
 	}
 
 	if (obj_priv->pin_count++ == 0) {
-		i915_gem_info_add_pin(dev_priv, obj, mappable);
+		i915_gem_info_add_pin(dev_priv, obj_priv, mappable);
 		if (!obj_priv->active)
 			list_move_tail(&obj_priv->mm_list,
 				       &dev_priv->mm.pinned_list);
@@ -4155,7 +4180,7 @@ i915_gem_object_unpin(struct drm_gem_object *obj)
 		if (!obj_priv->active)
 			list_move_tail(&obj_priv->mm_list,
 				       &dev_priv->mm.inactive_list);
-		i915_gem_info_remove_pin(dev_priv, obj);
+		i915_gem_info_remove_pin(dev_priv, obj_priv);
 	}
 	WARN_ON(i915_verify_lists(dev));
 }
@@ -4196,7 +4221,8 @@ i915_gem_pin_ioctl(struct drm_device *dev, void *data,
 	obj_priv->user_pin_count++;
 	obj_priv->pin_filp = file_priv;
 	if (obj_priv->user_pin_count == 1) {
-		ret = i915_gem_object_pin(obj, args->alignment, true);
+		ret = i915_gem_object_pin(obj, args->alignment,
+					  true, obj_priv->tiling_mode);
 		if (ret)
 			goto out;
 	}
@@ -4389,6 +4415,8 @@ struct drm_gem_object * i915_gem_alloc_object(struct drm_device *dev,
 	INIT_LIST_HEAD(&obj->ring_list);
 	INIT_LIST_HEAD(&obj->gpu_write_list);
 	obj->madv = I915_MADV_WILLNEED;
+	obj->fenceable = true;
+	obj->mappable = true;
 
 	return &obj->base;
 }
@@ -4508,7 +4536,7 @@ i915_gem_init_pipe_control(struct drm_device *dev)
 	obj_priv = to_intel_bo(obj);
 	obj_priv->agp_type = AGP_USER_CACHED_MEMORY;
 
-	ret = i915_gem_object_pin(obj, 4096, true);
+	ret = i915_gem_object_pin(obj, 4096, true, false);
 	if (ret)
 		goto err_unref;
 
diff --git a/drivers/gpu/drm/i915/i915_gem_tiling.c b/drivers/gpu/drm/i915/i915_gem_tiling.c
index af352de70be1..0597a737ebad 100644
--- a/drivers/gpu/drm/i915/i915_gem_tiling.c
+++ b/drivers/gpu/drm/i915/i915_gem_tiling.c
@@ -181,7 +181,7 @@ i915_gem_detect_bit_6_swizzle(struct drm_device *dev)
 }
 
 /* Check pitch constriants for all chips & tiling formats */
-bool
+static bool
 i915_tiling_ok(struct drm_device *dev, int stride, int size, int tiling_mode)
 {
 	int tile_width;
@@ -232,25 +232,35 @@ i915_tiling_ok(struct drm_device *dev, int stride, int size, int tiling_mode)
 	return true;
 }
 
-bool
-i915_gem_object_fence_offset_ok(struct drm_gem_object *obj, int tiling_mode)
+/* Is the current GTT allocation valid for the change in tiling? */
+static bool
+i915_gem_object_fence_ok(struct drm_gem_object *obj, int tiling_mode)
 {
-	struct drm_device *dev = obj->dev;
 	struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
-
-	if (obj_priv->gtt_space == NULL)
-		return true;
+	u32 size;
 
 	if (tiling_mode == I915_TILING_NONE)
 		return true;
 
-	if (INTEL_INFO(dev)->gen >= 4)
+	if (INTEL_INFO(obj->dev)->gen >= 4)
 		return true;
 
-	if (obj_priv->gtt_offset & (obj->size - 1))
+	/*
+	 * Previous chips need to be aligned to the size of the smallest
+	 * fence register that can contain the object.
+	 */
+	if (INTEL_INFO(obj->dev)->gen == 3)
+		size = 1024*1024;
+	else
+		size = 512*1024;
+
+	while (size < obj_priv->base.size)
+		size <<= 1;
+
+	if (obj_priv->gtt_offset & (size - 1))
 		return false;
 
-	if (IS_GEN3(dev)) {
+	if (INTEL_INFO(obj->dev)->gen == 3) {
 		if (obj_priv->gtt_offset & ~I915_FENCE_START_MASK)
 			return false;
 	} else {
@@ -331,7 +341,7 @@ i915_gem_set_tiling(struct drm_device *dev, void *data,
 		 * tiling mode. Otherwise we can just leave it alone, but
 		 * need to ensure that any fence register is cleared.
 		 */
-		if (!i915_gem_object_fence_offset_ok(obj, args->tiling_mode))
+		if (!i915_gem_object_fence_ok(obj, args->tiling_mode))
 			ret = i915_gem_object_unbind(obj);
 		else if (obj_priv->fence_reg != I915_FENCE_REG_NONE)
 			ret = i915_gem_object_put_fence_reg(obj, true);
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index c9c4c707cf1a..4954af23b7c8 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -1461,7 +1461,8 @@ intel_pin_and_fence_fb_obj(struct drm_device *dev,
 		BUG();
 	}
 
-	ret = i915_gem_object_pin(obj, alignment, true);
+	ret = i915_gem_object_pin(obj, alignment,
+				  !pipelined, obj_priv->tiling_mode);
 	if (ret)
 		return ret;
 
@@ -4353,7 +4354,7 @@ static int intel_crtc_cursor_set(struct drm_crtc *crtc,
 	/* we only need to pin inside GTT if cursor is non-phy */
 	mutex_lock(&dev->struct_mutex);
 	if (!dev_priv->info->cursor_needs_physical) {
-		ret = i915_gem_object_pin(bo, PAGE_SIZE, true);
+		ret = i915_gem_object_pin(bo, PAGE_SIZE, true, false);
 		if (ret) {
 			DRM_ERROR("failed to pin cursor bo\n");
 			goto fail_locked;
@@ -5517,7 +5518,7 @@ intel_alloc_context_page(struct drm_device *dev)
 	}
 
 	mutex_lock(&dev->struct_mutex);
-	ret = i915_gem_object_pin(ctx, 4096, true);
+	ret = i915_gem_object_pin(ctx, 4096, false, false);
 	if (ret) {
 		DRM_ERROR("failed to pin power context: %d\n", ret);
 		goto err_unref;
diff --git a/drivers/gpu/drm/i915/intel_overlay.c b/drivers/gpu/drm/i915/intel_overlay.c
index beda2016eb16..e62e1b3d243f 100644
--- a/drivers/gpu/drm/i915/intel_overlay.c
+++ b/drivers/gpu/drm/i915/intel_overlay.c
@@ -781,7 +781,7 @@ static int intel_overlay_do_put_image(struct intel_overlay *overlay,
 	if (ret != 0)
 		return ret;
 
-	ret = i915_gem_object_pin(new_bo, PAGE_SIZE, true);
+	ret = i915_gem_object_pin(new_bo, PAGE_SIZE, false, false);
 	if (ret != 0)
 		return ret;
 
@@ -1423,7 +1423,7 @@ void intel_setup_overlay(struct drm_device *dev)
                 }
 		overlay->flip_addr = overlay->reg_bo->phys_obj->handle->busaddr;
 	} else {
-		ret = i915_gem_object_pin(reg_bo, PAGE_SIZE, true);
+		ret = i915_gem_object_pin(reg_bo, PAGE_SIZE, true, false);
 		if (ret) {
                         DRM_ERROR("failed to pin overlay register bo\n");
                         goto out_free_bo;
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index e88214ef24b1..632a98e0ba5c 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -547,7 +547,7 @@ static int init_status_page(struct intel_ring_buffer *ring)
 	obj_priv = to_intel_bo(obj);
 	obj_priv->agp_type = AGP_USER_CACHED_MEMORY;
 
-	ret = i915_gem_object_pin(obj, 4096, true);
+	ret = i915_gem_object_pin(obj, 4096, true, false);
 	if (ret != 0) {
 		goto err_unref;
 	}
@@ -603,7 +603,7 @@ int intel_init_ring_buffer(struct drm_device *dev,
 
 	ring->gem_object = obj;
 
-	ret = i915_gem_object_pin(obj, PAGE_SIZE, true);
+	ret = i915_gem_object_pin(obj, PAGE_SIZE, true, false);
 	if (ret)
 		goto err_unref;
 
diff --git a/include/drm/i915_drm.h b/include/drm/i915_drm.h
index 8c641bed9bbd..b20dbb2d7174 100644
--- a/include/drm/i915_drm.h
+++ b/include/drm/i915_drm.h
@@ -287,6 +287,7 @@ typedef struct drm_i915_irq_wait {
 #define I915_PARAM_HAS_EXECBUF2          9
 #define I915_PARAM_HAS_BSD		 10
 #define I915_PARAM_HAS_BLT		 11
+#define I915_PARAM_HAS_RELAXED_FENCING	 12
 
 typedef struct drm_i915_getparam {
 	int param;
-- 
cgit v1.2.3


From 0f8c6d7ca9257d6a01671ab69b897860d3ae9bc0 Mon Sep 17 00:00:00 2001
From: Chris Wilson
Date: Mon, 1 Nov 2010 12:38:44 +0000
Subject: drm/i915: Move the invalidate|flush information out of the device
 struct

... and into a local structure scoped for the single function in which
it is used.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_drv.h |  2 --
 drivers/gpu/drm/i915/i915_gem.c | 45 +++++++++++++++++++++--------------------
 include/drm/drmP.h              |  2 --
 3 files changed, 23 insertions(+), 26 deletions(-)

(limited to 'include/drm')

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 3f8786049cb6..a0063f82fa33 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -642,8 +642,6 @@ typedef struct drm_i915_private {
 		/* storage for physical objects */
 		struct drm_i915_gem_phys_object *phys_objs[I915_MAX_PHYS_OBJECT];
 
-		uint32_t flush_rings;
-
 		/* accounting, useful for userland debugging */
 		size_t object_memory;
 		size_t pin_memory;
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index c797d2b9b233..ffea847c8a0d 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -36,6 +36,12 @@
 #include <linux/pci.h>
 #include <linux/intel-gtt.h>
 
+struct change_domains {
+	uint32_t invalidate_domains;
+	uint32_t flush_domains;
+	uint32_t flush_rings;
+};
+
 static uint32_t i915_gem_get_gtt_alignment(struct drm_i915_gem_object *obj_priv);
 static uint32_t i915_gem_get_gtt_size(struct drm_i915_gem_object *obj_priv);
 
@@ -3167,10 +3173,9 @@ i915_gem_object_set_to_cpu_domain(struct drm_gem_object *obj, int write)
  */
 static void
 i915_gem_object_set_to_gpu_domain(struct drm_gem_object *obj,
-				  struct intel_ring_buffer *ring)
+				  struct intel_ring_buffer *ring,
+				  struct change_domains *cd)
 {
-	struct drm_device		*dev = obj->dev;
-	struct drm_i915_private		*dev_priv = dev->dev_private;
 	struct drm_i915_gem_object	*obj_priv = to_intel_bo(obj);
 	uint32_t			invalidate_domains = 0;
 	uint32_t			flush_domains = 0;
@@ -3216,12 +3221,12 @@ i915_gem_object_set_to_gpu_domain(struct drm_gem_object *obj,
 	if (flush_domains == 0 && obj->pending_write_domain == 0)
 		obj->pending_write_domain = obj->write_domain;
 
-	dev->invalidate_domains |= invalidate_domains;
-	dev->flush_domains |= flush_domains;
+	cd->invalidate_domains |= invalidate_domains;
+	cd->flush_domains |= flush_domains;
 	if (flush_domains & I915_GEM_GPU_DOMAINS)
-		dev_priv->mm.flush_rings |= obj_priv->ring->id;
+		cd->flush_rings |= obj_priv->ring->id;
 	if (invalidate_domains & I915_GEM_GPU_DOMAINS)
-		dev_priv->mm.flush_rings |= ring->id;
+		cd->flush_rings |= ring->id;
 }
 
 /**
@@ -3590,30 +3595,26 @@ i915_gem_execbuffer_move_to_gpu(struct drm_device *dev,
 				struct drm_gem_object **objects,
 				int count)
 {
-	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct change_domains cd;
 	int ret, i;
 
-	/* Zero the global flush/invalidate flags. These
-	 * will be modified as new domains are computed
-	 * for each object
-	 */
-	dev->invalidate_domains = 0;
-	dev->flush_domains = 0;
-	dev_priv->mm.flush_rings = 0;
+	cd.invalidate_domains = 0;
+	cd.flush_domains = 0;
+	cd.flush_rings = 0;
 	for (i = 0; i < count; i++)
-		i915_gem_object_set_to_gpu_domain(objects[i], ring);
+		i915_gem_object_set_to_gpu_domain(objects[i], ring, &cd);
 
-	if (dev->invalidate_domains | dev->flush_domains) {
+	if (cd.invalidate_domains | cd.flush_domains) {
 #if WATCH_EXEC
 		DRM_INFO("%s: invalidate_domains %08x flush_domains %08x\n",
 			  __func__,
-			 dev->invalidate_domains,
-			 dev->flush_domains);
+			 cd.invalidate_domains,
+			 cd.flush_domains);
 #endif
 		i915_gem_flush(dev, file,
-			       dev->invalidate_domains,
-			       dev->flush_domains,
-			       dev_priv->mm.flush_rings);
+			       cd.invalidate_domains,
+			       cd.flush_domains,
+			       cd.flush_rings);
 	}
 
 	for (i = 0; i < count; i++) {
diff --git a/include/drm/drmP.h b/include/drm/drmP.h
index 274eaaa15c36..d4bc0f5cab8f 100644
--- a/include/drm/drmP.h
+++ b/include/drm/drmP.h
@@ -1041,8 +1041,6 @@ struct drm_device {
 	/*@{ */
 	spinlock_t object_name_lock;
 	struct idr object_name_idr;
-	uint32_t invalidate_domains;    /* domains pending invalidation */
-	uint32_t flush_domains;         /* domains pending flush */
 	/*@} */
 
 };
-- 
cgit v1.2.3


From 27641c3f003e7f3b6585c01d8a788883603eb262 Mon Sep 17 00:00:00 2001
From: Mario Kleiner
Date: Sat, 23 Oct 2010 04:20:23 +0200
Subject: drm/vblank: Add support for precise vblank timestamping.

The DRI2 swap & sync implementation needs precise
vblank counts and precise timestamps corresponding
to those vblank counts. For conformance to the OpenML
OML_sync_control extension specification the DRM
timestamp associated with a vblank count should
correspond to the start of video scanout of the first
scanline of the video frame following the vblank
interval for that vblank count.

Therefore we need to carry around precise timestamps
for vblanks. Currently the DRM and KMS drivers generate
timestamps ad-hoc via do_gettimeofday() in some
places. The resulting timestamps are sometimes not
very precise due to interrupt handling delays, they
don't conform to OML_sync_control and some are wrong,
as they aren't taken synchronized to the vblank.

This patch implements support inside the drm core
for precise and robust timestamping. It consists
of the following interrelated pieces.

1. Vblank timestamp caching:

A per-crtc ringbuffer stores the most recent vblank
timestamps corresponding to vblank counts.

The ringbuffer can be read out lock-free via the
accessor function:

struct timeval timestamp;
vblankcount = drm_vblank_count_and_time(dev, crtcid, &timestamp).

The function returns the current vblank count and
the corresponding timestamp for start of video
scanout following the vblank interval. It can be
used anywhere between enclosing drm_vblank_get(dev, crtcid)
and drm_vblank_put(dev,crtcid) statements. It is used
inside the drmWaitVblank ioctl and in the vblank event
queueing and handling. It should be used by kms drivers for
timestamping of bufferswap completion.

The timestamp ringbuffer is reinitialized each time
vblank irq's get reenabled in drm_vblank_get()/
drm_update_vblank_count(). It is invalidated when
vblank irq's get disabled.

The ringbuffer is updated inside drm_handle_vblank()
at each vblank irq.

2. Calculation of precise vblank timestamps:

drm_get_last_vbltimestamp() is used to compute the
timestamp for the end of the most recent vblank (if
inside active scanout), or the expected end of the
current vblank interval (if called inside a vblank
interval). The function calls into a new optional kms
driver entry point dev->driver->get_vblank_timestamp()
which is supposed to provide the precise timestamp.
If a kms driver doesn't implement the entry point or
if the call fails, a simple do_gettimeofday() timestamp
is returned as crude approximation of the true vblank time.

A new drm module parameter drm.timestamp_precision_usec
allows to disable high precision timestamps (if set to
zero) or to specify the maximum acceptable error in
the timestamps in microseconds.

Kms drivers could implement their get_vblank_timestamp()
function in a gpu specific way, as long as returned
timestamps conform to OML_sync_control, e.g., by use
of gpu specific hardware timestamps.

Optionally, kms drivers can simply wrap and use the new
utility function drm_calc_vbltimestamp_from_scanoutpos().
This function calls a new optional kms driver function
dev->driver->get_scanout_position() which returns the
current horizontal and vertical video scanout position
of the crtc. The scanout position together with the
drm_display_timing of the current video mode is used
to calculate elapsed time relative to start of active scanout
for the current video frame. This elapsed time is subtracted
from the current do_gettimeofday() time to get the timestamp
corresponding to start of video scanout. Currently
non-interlaced, non-doublescan video modes, with or
without panel scaling are handled correctly. Interlaced/
doublescan modes are tbd in a future patch.

3. Filtering of redundant vblank irq's and removal of
some race-conditions in the vblank irq enable/disable path:

Some gpu's (e.g., Radeon R500/R600) send spurious vblank
irq's outside the vblank if vblank irq's get reenabled.
These get detected by use of the vblank timestamps and
filtered out to avoid miscounting of vblanks.

Some race-conditions between the vblank irq enable/disable
functions, the vblank irq handler and the gpu itself (updating
its hardware vblank counter in the "wrong" moment) are
fixed inside vblank_disable_and_save() and
drm_update_vblank_count() by use of the vblank timestamps and
a new spinlock dev->vblank_time_lock.

The time until vblank irq disable is now configurable via
a new drm module parameter drm.vblankoffdelay to allow
experimentation with timeouts that are much shorter than
the current 5 seconds and should allow longer vblank off
periods for better power savings.

Followup patches will use these new functions to
implement precise timestamping for the intel and radeon
kms drivers.

Signed-off-by: Mario Kleiner <mario.kleiner@tuebingen.mpg.de>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/drm_crtc_helper.c |  13 +-
 drivers/gpu/drm/drm_irq.c         | 563 ++++++++++++++++++++++++++++++++++++--
 drivers/gpu/drm/drm_stub.c        |  10 +
 include/drm/drmP.h                |  93 +++++++
 include/drm/drm_crtc.h            |   9 +
 5 files changed, 662 insertions(+), 26 deletions(-)

(limited to 'include/drm')

diff --git a/drivers/gpu/drm/drm_crtc_helper.c b/drivers/gpu/drm/drm_crtc_helper.c
index f7af91cb273d..4c200931a6bc 100644
--- a/drivers/gpu/drm/drm_crtc_helper.c
+++ b/drivers/gpu/drm/drm_crtc_helper.c
@@ -336,7 +336,7 @@ bool drm_crtc_helper_set_mode(struct drm_crtc *crtc,
 			      struct drm_framebuffer *old_fb)
 {
 	struct drm_device *dev = crtc->dev;
-	struct drm_display_mode *adjusted_mode, saved_mode;
+	struct drm_display_mode *adjusted_mode, saved_mode, saved_hwmode;
 	struct drm_crtc_helper_funcs *crtc_funcs = crtc->helper_private;
 	struct drm_encoder_helper_funcs *encoder_funcs;
 	int saved_x, saved_y;
@@ -350,6 +350,7 @@ bool drm_crtc_helper_set_mode(struct drm_crtc *crtc,
 	if (!crtc->enabled)
 		return true;
 
+	saved_hwmode = crtc->hwmode;
 	saved_mode = crtc->mode;
 	saved_x = crtc->x;
 	saved_y = crtc->y;
@@ -427,11 +428,21 @@ bool drm_crtc_helper_set_mode(struct drm_crtc *crtc,
 
 	}
 
+	/* Store real post-adjustment hardware mode. */
+	crtc->hwmode = *adjusted_mode;
+
+	/* Calculate and store various constants which
+	 * are later needed by vblank and swap-completion
+	 * timestamping. They are derived from true hwmode.
+	 */
+	drm_calc_timestamping_constants(crtc);
+
 	/* XXX free adjustedmode */
 	drm_mode_destroy(dev, adjusted_mode);
 	/* FIXME: add subpixel order */
 done:
 	if (!ret) {
+		crtc->hwmode = saved_hwmode;
 		crtc->mode = saved_mode;
 		crtc->x = saved_x;
 		crtc->y = saved_y;
diff --git a/drivers/gpu/drm/drm_irq.c b/drivers/gpu/drm/drm_irq.c
index 9d3a5030b6e1..4e82d0d3c378 100644
--- a/drivers/gpu/drm/drm_irq.c
+++ b/drivers/gpu/drm/drm_irq.c
@@ -40,6 +40,22 @@
 #include <linux/slab.h>
 
 #include <linux/vgaarb.h>
+
+/* Access macro for slots in vblank timestamp ringbuffer. */
+#define vblanktimestamp(dev, crtc, count) ( \
+	(dev)->_vblank_time[(crtc) * DRM_VBLANKTIME_RBSIZE + \
+	((count) % DRM_VBLANKTIME_RBSIZE)])
+
+/* Retry timestamp calculation up to 3 times to satisfy
+ * drm_timestamp_precision before giving up.
+ */
+#define DRM_TIMESTAMP_MAXRETRIES 3
+
+/* Threshold in nanoseconds for detection of redundant
+ * vblank irq in drm_handle_vblank(). 1 msec should be ok.
+ */
+#define DRM_REDUNDANT_VBLIRQ_THRESH_NS 1000000
+
 /**
  * Get interrupt from bus id.
  *
@@ -77,6 +93,87 @@ int drm_irq_by_busid(struct drm_device *dev, void *data,
 	return 0;
 }
 
+/*
+ * Clear vblank timestamp buffer for a crtc.
+ */
+static void clear_vblank_timestamps(struct drm_device *dev, int crtc)
+{
+	memset(&dev->_vblank_time[crtc * DRM_VBLANKTIME_RBSIZE], 0,
+		DRM_VBLANKTIME_RBSIZE * sizeof(struct timeval));
+}
+
+/*
+ * Disable vblank irq's on crtc, make sure that last vblank count
+ * of hardware and corresponding consistent software vblank counter
+ * are preserved, even if there are any spurious vblank irq's after
+ * disable.
+ */
+static void vblank_disable_and_save(struct drm_device *dev, int crtc)
+{
+	unsigned long irqflags;
+	u32 vblcount;
+	s64 diff_ns;
+	int vblrc;
+	struct timeval tvblank;
+
+	/* Prevent vblank irq processing while disabling vblank irqs,
+	 * so no updates of timestamps or count can happen after we've
+	 * disabled. Needed to prevent races in case of delayed irq's.
+	 * Disable preemption, so vblank_time_lock is held as short as
+	 * possible, even under a kernel with PREEMPT_RT patches.
+	 */
+	preempt_disable();
+	spin_lock_irqsave(&dev->vblank_time_lock, irqflags);
+
+	dev->driver->disable_vblank(dev, crtc);
+	dev->vblank_enabled[crtc] = 0;
+
+	/* No further vblank irq's will be processed after
+	 * this point. Get current hardware vblank count and
+	 * vblank timestamp, repeat until they are consistent.
+	 *
+	 * FIXME: There is still a race condition here and in
+	 * drm_update_vblank_count() which can cause off-by-one
+	 * reinitialization of software vblank counter. If gpu
+	 * vblank counter doesn't increment exactly at the leading
+	 * edge of a vblank interval, then we can lose 1 count if
+	 * we happen to execute between start of vblank and the
+	 * delayed gpu counter increment.
+	 */
+	do {
+		dev->last_vblank[crtc] = dev->driver->get_vblank_counter(dev, crtc);
+		vblrc = drm_get_last_vbltimestamp(dev, crtc, &tvblank, 0);
+	} while (dev->last_vblank[crtc] != dev->driver->get_vblank_counter(dev, crtc));
+
+	/* Compute time difference to stored timestamp of last vblank
+	 * as updated by last invocation of drm_handle_vblank() in vblank irq.
+	 */
+	vblcount = atomic_read(&dev->_vblank_count[crtc]);
+	diff_ns = timeval_to_ns(&tvblank) -
+		  timeval_to_ns(&vblanktimestamp(dev, crtc, vblcount));
+
+	/* If there is at least 1 msec difference between the last stored
+	 * timestamp and tvblank, then we are currently executing our
+	 * disable inside a new vblank interval, the tvblank timestamp
+	 * corresponds to this new vblank interval and the irq handler
+	 * for this vblank didn't run yet and won't run due to our disable.
+	 * Therefore we need to do the job of drm_handle_vblank() and
+	 * increment the vblank counter by one to account for this vblank.
+	 *
+	 * Skip this step if there isn't any high precision timestamp
+	 * available. In that case we can't account for this and just
+	 * hope for the best.
+	 */
+	if ((vblrc > 0) && (abs(diff_ns) > 1000000))
+		atomic_inc(&dev->_vblank_count[crtc]);
+
+	/* Invalidate all timestamps while vblank irq's are off. */
+	clear_vblank_timestamps(dev, crtc);
+
+	spin_unlock_irqrestore(&dev->vblank_time_lock, irqflags);
+	preempt_enable();
+}
+
 static void vblank_disable_fn(unsigned long arg)
 {
 	struct drm_device *dev = (struct drm_device *)arg;
@@ -91,10 +188,7 @@ static void vblank_disable_fn(unsigned long arg)
 		if (atomic_read(&dev->vblank_refcount[i]) == 0 &&
 		    dev->vblank_enabled[i]) {
 			DRM_DEBUG("disabling vblank on crtc %d\n", i);
-			dev->last_vblank[i] =
-				dev->driver->get_vblank_counter(dev, i);
-			dev->driver->disable_vblank(dev, i);
-			dev->vblank_enabled[i] = 0;
+			vblank_disable_and_save(dev, i);
 		}
 		spin_unlock_irqrestore(&dev->vbl_lock, irqflags);
 	}
@@ -117,6 +211,7 @@ void drm_vblank_cleanup(struct drm_device *dev)
 	kfree(dev->last_vblank);
 	kfree(dev->last_vblank_wait);
 	kfree(dev->vblank_inmodeset);
+	kfree(dev->_vblank_time);
 
 	dev->num_crtcs = 0;
 }
@@ -129,6 +224,8 @@ int drm_vblank_init(struct drm_device *dev, int num_crtcs)
 	setup_timer(&dev->vblank_disable_timer, vblank_disable_fn,
 		    (unsigned long)dev);
 	spin_lock_init(&dev->vbl_lock);
+	spin_lock_init(&dev->vblank_time_lock);
+
 	dev->num_crtcs = num_crtcs;
 
 	dev->vbl_queue = kmalloc(sizeof(wait_queue_head_t) * num_crtcs,
@@ -161,6 +258,19 @@ int drm_vblank_init(struct drm_device *dev, int num_crtcs)
 	if (!dev->vblank_inmodeset)
 		goto err;
 
+	dev->_vblank_time = kcalloc(num_crtcs * DRM_VBLANKTIME_RBSIZE,
+				    sizeof(struct timeval), GFP_KERNEL);
+	if (!dev->_vblank_time)
+		goto err;
+
+	DRM_INFO("Supports vblank timestamp caching Rev 1 (10.10.2010).\n");
+
+	/* Driver specific high-precision vblank timestamping supported? */
+	if (dev->driver->get_vblank_timestamp)
+		DRM_INFO("Driver supports precise vblank timestamp query.\n");
+	else
+		DRM_INFO("No driver support for vblank timestamp query.\n");
+
 	/* Zero per-crtc vblank stuff */
 	for (i = 0; i < num_crtcs; i++) {
 		init_waitqueue_head(&dev->vbl_queue[i]);
@@ -279,7 +389,7 @@ EXPORT_SYMBOL(drm_irq_install);
  *
  * Calls the driver's \c drm_driver_irq_uninstall() function, and stops the irq.
  */
-int drm_irq_uninstall(struct drm_device * dev)
+int drm_irq_uninstall(struct drm_device *dev)
 {
 	unsigned long irqflags;
 	int irq_enabled, i;
@@ -335,7 +445,9 @@ int drm_control(struct drm_device *dev, void *data,
 {
 	struct drm_control *ctl = data;
 
-	/* if we haven't irq we fallback for compatibility reasons - this used to be a separate function in drm_dma.h */
+	/* if we haven't irq we fallback for compatibility reasons -
+	 * this used to be a separate function in drm_dma.h
+	 */
 
 
 	switch (ctl->func) {
@@ -359,6 +471,287 @@ int drm_control(struct drm_device *dev, void *data,
 	}
 }
 
+/**
+ * drm_calc_timestamping_constants - Calculate and
+ * store various constants which are later needed by
+ * vblank and swap-completion timestamping, e.g, by
+ * drm_calc_vbltimestamp_from_scanoutpos().
+ * They are derived from crtc's true scanout timing,
+ * so they take things like panel scaling or other
+ * adjustments into account.
+ *
+ * @crtc drm_crtc whose timestamp constants should be updated.
+ *
+ */
+void drm_calc_timestamping_constants(struct drm_crtc *crtc)
+{
+	s64 linedur_ns = 0, pixeldur_ns = 0, framedur_ns = 0;
+	u64 dotclock;
+
+	/* Dot clock in Hz: */
+	dotclock = (u64) crtc->hwmode.clock * 1000;
+
+	/* Valid dotclock? */
+	if (dotclock > 0) {
+		/* Convert scanline length in pixels and video dot clock to
+		 * line duration, frame duration and pixel duration in
+		 * nanoseconds:
+		 */
+		pixeldur_ns = (s64) div64_u64(1000000000, dotclock);
+		linedur_ns  = (s64) div64_u64(((u64) crtc->hwmode.crtc_htotal *
+					      1000000000), dotclock);
+		framedur_ns = (s64) crtc->hwmode.crtc_vtotal * linedur_ns;
+	} else
+		DRM_ERROR("crtc %d: Can't calculate constants, dotclock = 0!\n",
+			  crtc->base.id);
+
+	crtc->pixeldur_ns = pixeldur_ns;
+	crtc->linedur_ns  = linedur_ns;
+	crtc->framedur_ns = framedur_ns;
+
+	DRM_DEBUG("crtc %d: hwmode: htotal %d, vtotal %d, vdisplay %d\n",
+		  crtc->base.id, crtc->hwmode.crtc_htotal,
+		  crtc->hwmode.crtc_vtotal, crtc->hwmode.crtc_vdisplay);
+	DRM_DEBUG("crtc %d: clock %d kHz framedur %d linedur %d, pixeldur %d\n",
+		  crtc->base.id, (int) dotclock/1000, (int) framedur_ns,
+		  (int) linedur_ns, (int) pixeldur_ns);
+}
+EXPORT_SYMBOL(drm_calc_timestamping_constants);
+
+/**
+ * drm_calc_vbltimestamp_from_scanoutpos - helper routine for kms
+ * drivers. Implements calculation of exact vblank timestamps from
+ * given drm_display_mode timings and current video scanout position
+ * of a crtc. This can be called from within get_vblank_timestamp()
+ * implementation of a kms driver to implement the actual timestamping.
+ *
+ * Should return timestamps conforming to the OML_sync_control OpenML
+ * extension specification. The timestamp corresponds to the end of
+ * the vblank interval, aka start of scanout of topmost-leftmost display
+ * pixel in the following video frame.
+ *
+ * Requires support for optional dev->driver->get_scanout_position()
+ * in kms driver, plus a bit of setup code to provide a drm_display_mode
+ * that corresponds to the true scanout timing.
+ *
+ * The current implementation only handles standard video modes. It
+ * returns as no operation if a doublescan or interlaced video mode is
+ * active. Higher level code is expected to handle this.
+ *
+ * @dev: DRM device.
+ * @crtc: Which crtc's vblank timestamp to retrieve.
+ * @max_error: Desired maximum allowable error in timestamps (nanosecs).
+ *             On return contains true maximum error of timestamp.
+ * @vblank_time: Pointer to struct timeval which should receive the timestamp.
+ * @flags: Flags to pass to driver:
+ *         0 = Default.
+ *         DRM_CALLED_FROM_VBLIRQ = If function is called from vbl irq handler.
+ * @refcrtc: drm_crtc* of crtc which defines scanout timing.
+ *
+ * Returns negative value on error, failure or if not supported in current
+ * video mode:
+ *
+ * -EINVAL   - Invalid crtc.
+ * -EAGAIN   - Temporary unavailable, e.g., called before initial modeset.
+ * -ENOTSUPP - Function not supported in current display mode.
+ * -EIO      - Failed, e.g., due to failed scanout position query.
+ *
+ * Returns or'ed positive status flags on success:
+ *
+ * DRM_VBLANKTIME_SCANOUTPOS_METHOD - Signal this method used for timestamping.
+ * DRM_VBLANKTIME_INVBL - Timestamp taken while scanout was in vblank interval.
+ *
+ */
+int drm_calc_vbltimestamp_from_scanoutpos(struct drm_device *dev, int crtc,
+					  int *max_error,
+					  struct timeval *vblank_time,
+					  unsigned flags,
+					  struct drm_crtc *refcrtc)
+{
+	struct timeval stime, raw_time;
+	struct drm_display_mode *mode;
+	int vbl_status, vtotal, vdisplay;
+	int vpos, hpos, i;
+	s64 framedur_ns, linedur_ns, pixeldur_ns, delta_ns, duration_ns;
+	bool invbl;
+
+	if (crtc < 0 || crtc >= dev->num_crtcs) {
+		DRM_ERROR("Invalid crtc %d\n", crtc);
+		return -EINVAL;
+	}
+
+	/* Scanout position query not supported? Should not happen. */
+	if (!dev->driver->get_scanout_position) {
+		DRM_ERROR("Called from driver w/o get_scanout_position()!?\n");
+		return -EIO;
+	}
+
+	mode = &refcrtc->hwmode;
+	vtotal = mode->crtc_vtotal;
+	vdisplay = mode->crtc_vdisplay;
+
+	/* Durations of frames, lines, pixels in nanoseconds. */
+	framedur_ns = refcrtc->framedur_ns;
+	linedur_ns  = refcrtc->linedur_ns;
+	pixeldur_ns = refcrtc->pixeldur_ns;
+
+	/* If mode timing undefined, just return as no-op:
+	 * Happens during initial modesetting of a crtc.
+	 */
+	if (vtotal <= 0 || vdisplay <= 0 || framedur_ns == 0) {
+		DRM_DEBUG("crtc %d: Noop due to uninitialized mode.\n", crtc);
+		return -EAGAIN;
+	}
+
+	/* Don't know yet how to handle interlaced or
+	 * double scan modes. Just no-op for now.
+	 */
+	if (mode->flags & (DRM_MODE_FLAG_INTERLACE | DRM_MODE_FLAG_DBLSCAN)) {
+		DRM_DEBUG("crtc %d: Noop due to unsupported mode.\n", crtc);
+		return -ENOTSUPP;
+	}
+
+	/* Get current scanout position with system timestamp.
+	 * Repeat query up to DRM_TIMESTAMP_MAXRETRIES times
+	 * if single query takes longer than max_error nanoseconds.
+	 *
+	 * This guarantees a tight bound on maximum error if
+	 * code gets preempted or delayed for some reason.
+	 */
+	for (i = 0; i < DRM_TIMESTAMP_MAXRETRIES; i++) {
+		/* Disable preemption to make it very likely to
+		 * succeed in the first iteration even on PREEMPT_RT kernel.
+		 */
+		preempt_disable();
+
+		/* Get system timestamp before query. */
+		do_gettimeofday(&stime);
+
+		/* Get vertical and horizontal scanout pos. vpos, hpos. */
+		vbl_status = dev->driver->get_scanout_position(dev, crtc, &vpos, &hpos);
+
+		/* Get system timestamp after query. */
+		do_gettimeofday(&raw_time);
+
+		preempt_enable();
+
+		/* Return as no-op if scanout query unsupported or failed. */
+		if (!(vbl_status & DRM_SCANOUTPOS_VALID)) {
+			DRM_DEBUG("crtc %d : scanoutpos query failed [%d].\n",
+				  crtc, vbl_status);
+			return -EIO;
+		}
+
+		duration_ns = timeval_to_ns(&raw_time) - timeval_to_ns(&stime);
+
+		/* Accept result with <  max_error nsecs timing uncertainty. */
+		if (duration_ns <= (s64) *max_error)
+			break;
+	}
+
+	/* Noisy system timing? */
+	if (i == DRM_TIMESTAMP_MAXRETRIES) {
+		DRM_DEBUG("crtc %d: Noisy timestamp %d us > %d us [%d reps].\n",
+			  crtc, (int) duration_ns/1000, *max_error/1000, i);
+	}
+
+	/* Return upper bound of timestamp precision error. */
+	*max_error = (int) duration_ns;
+
+	/* Check if in vblank area:
+	 * vpos is >=0 in video scanout area, but negative
+	 * within vblank area, counting down the number of lines until
+	 * start of scanout.
+	 */
+	invbl = vbl_status & DRM_SCANOUTPOS_INVBL;
+
+	/* Convert scanout position into elapsed time at raw_time query
+	 * since start of scanout at first display scanline. delta_ns
+	 * can be negative if start of scanout hasn't happened yet.
+	 */
+	delta_ns = (s64) vpos * linedur_ns + (s64) hpos * pixeldur_ns;
+
+	/* Is vpos outside nominal vblank area, but less than
+	 * 1/100 of a frame height away from start of vblank?
+	 * If so, assume this isn't a massively delayed vblank
+	 * interrupt, but a vblank interrupt that fired a few
+	 * microseconds before true start of vblank. Compensate
+	 * by adding a full frame duration to the final timestamp.
+	 * Happens, e.g., on ATI R500, R600.
+	 *
+	 * We only do this if DRM_CALLED_FROM_VBLIRQ.
+	 */
+	if ((flags & DRM_CALLED_FROM_VBLIRQ) && !invbl &&
+	    ((vdisplay - vpos) < vtotal / 100)) {
+		delta_ns = delta_ns - framedur_ns;
+
+		/* Signal this correction as "applied". */
+		vbl_status |= 0x8;
+	}
+
+	/* Subtract time delta from raw timestamp to get final
+	 * vblank_time timestamp for end of vblank.
+	 */
+	*vblank_time = ns_to_timeval(timeval_to_ns(&raw_time) - delta_ns);
+
+	DRM_DEBUG("crtc %d : v %d p(%d,%d)@ %d.%d -> %d.%d [e %d us, %d rep]\n",
+		  crtc, (int) vbl_status, hpos, vpos, raw_time.tv_sec,
+		  raw_time.tv_usec, vblank_time->tv_sec, vblank_time->tv_usec,
+		  (int) duration_ns/1000, i);
+
+	vbl_status = DRM_VBLANKTIME_SCANOUTPOS_METHOD;
+	if (invbl)
+		vbl_status |= DRM_VBLANKTIME_INVBL;
+
+	return vbl_status;
+}
+EXPORT_SYMBOL(drm_calc_vbltimestamp_from_scanoutpos);
+
+/**
+ * drm_get_last_vbltimestamp - retrieve raw timestamp for the most recent
+ * vblank interval.
+ *
+ * @dev: DRM device
+ * @crtc: which crtc's vblank timestamp to retrieve
+ * @tvblank: Pointer to target struct timeval which should receive the timestamp
+ * @flags: Flags to pass to driver:
+ *         0 = Default.
+ *         DRM_CALLED_FROM_VBLIRQ = If function is called from vbl irq handler.
+ *
+ * Fetches the system timestamp corresponding to the time of the most recent
+ * vblank interval on specified crtc. May call into kms-driver to
+ * compute the timestamp with a high-precision GPU specific method.
+ *
+ * Returns zero if timestamp originates from uncorrected do_gettimeofday()
+ * call, i.e., it isn't very precisely locked to the true vblank.
+ *
+ * Returns non-zero if timestamp is considered to be very precise.
+ */
+u32 drm_get_last_vbltimestamp(struct drm_device *dev, int crtc,
+			      struct timeval *tvblank, unsigned flags)
+{
+	int ret = 0;
+
+	/* Define requested maximum error on timestamps (nanoseconds). */
+	int max_error = (int) drm_timestamp_precision * 1000;
+
+	/* Query driver if possible and precision timestamping enabled. */
+	if (dev->driver->get_vblank_timestamp && (max_error > 0)) {
+		ret = dev->driver->get_vblank_timestamp(dev, crtc, &max_error,
+							tvblank, flags);
+		if (ret > 0)
+			return (u32) ret;
+	}
+
+	/* GPU high precision timestamp query unsupported or failed.
+	 * Return gettimeofday timestamp as best estimate.
+	 */
+	do_gettimeofday(tvblank);
+
+	return 0;
+}
+EXPORT_SYMBOL(drm_get_last_vbltimestamp);
+
 /**
  * drm_vblank_count - retrieve "cooked" vblank counter value
  * @dev: DRM device
@@ -374,6 +767,40 @@ u32 drm_vblank_count(struct drm_device *dev, int crtc)
 }
 EXPORT_SYMBOL(drm_vblank_count);
 
+/**
+ * drm_vblank_count_and_time - retrieve "cooked" vblank counter value
+ * and the system timestamp corresponding to that vblank counter value.
+ *
+ * @dev: DRM device
+ * @crtc: which counter to retrieve
+ * @vblanktime: Pointer to struct timeval to receive the vblank timestamp.
+ *
+ * Fetches the "cooked" vblank count value that represents the number of
+ * vblank events since the system was booted, including lost events due to
+ * modesetting activity. Returns corresponding system timestamp of the time
+ * of the vblank interval that corresponds to the current value vblank counter
+ * value.
+ */
+u32 drm_vblank_count_and_time(struct drm_device *dev, int crtc,
+			      struct timeval *vblanktime)
+{
+	u32 cur_vblank;
+
+	/* Read timestamp from slot of _vblank_time ringbuffer
+	 * that corresponds to current vblank count. Retry if
+	 * count has incremented during readout. This works like
+	 * a seqlock.
+	 */
+	do {
+		cur_vblank = atomic_read(&dev->_vblank_count[crtc]);
+		*vblanktime = vblanktimestamp(dev, crtc, cur_vblank);
+		smp_rmb();
+	} while (cur_vblank != atomic_read(&dev->_vblank_count[crtc]));
+
+	return cur_vblank;
+}
+EXPORT_SYMBOL(drm_vblank_count_and_time);
+
 /**
  * drm_update_vblank_count - update the master vblank counter
  * @dev: DRM device
@@ -392,7 +819,8 @@ EXPORT_SYMBOL(drm_vblank_count);
  */
 static void drm_update_vblank_count(struct drm_device *dev, int crtc)
 {
-	u32 cur_vblank, diff;
+	u32 cur_vblank, diff, tslot, rc;
+	struct timeval t_vblank;
 
 	/*
 	 * Interrupts were disabled prior to this call, so deal with counter
@@ -400,8 +828,18 @@ static void drm_update_vblank_count(struct drm_device *dev, int crtc)
 	 * NOTE!  It's possible we lost a full dev->max_vblank_count events
 	 * here if the register is small or we had vblank interrupts off for
 	 * a long time.
+	 *
+	 * We repeat the hardware vblank counter & timestamp query until
+	 * we get consistent results. This to prevent races between gpu
+	 * updating its hardware counter while we are retrieving the
+	 * corresponding vblank timestamp.
 	 */
-	cur_vblank = dev->driver->get_vblank_counter(dev, crtc);
+	do {
+		cur_vblank = dev->driver->get_vblank_counter(dev, crtc);
+		rc = drm_get_last_vbltimestamp(dev, crtc, &t_vblank, 0);
+	} while (cur_vblank != dev->driver->get_vblank_counter(dev, crtc));
+
+	/* Deal with counter wrap */
 	diff = cur_vblank - dev->last_vblank[crtc];
 	if (cur_vblank < dev->last_vblank[crtc]) {
 		diff += dev->max_vblank_count;
@@ -413,6 +851,16 @@ static void drm_update_vblank_count(struct drm_device *dev, int crtc)
 	DRM_DEBUG("enabling vblank interrupts on crtc %d, missed %d\n",
 		  crtc, diff);
 
+	/* Reinitialize corresponding vblank timestamp if high-precision query
+	 * available. Skip this step if query unsupported or failed. Will
+	 * reinitialize delayed at next vblank interrupt in that case.
+	 */
+	if (rc) {
+		tslot = atomic_read(&dev->_vblank_count[crtc]) + diff;
+		vblanktimestamp(dev, crtc, tslot) = t_vblank;
+		smp_wmb();
+	}
+
 	atomic_add(diff, &dev->_vblank_count[crtc]);
 }
 
@@ -429,15 +877,27 @@ static void drm_update_vblank_count(struct drm_device *dev, int crtc)
  */
 int drm_vblank_get(struct drm_device *dev, int crtc)
 {
-	unsigned long irqflags;
+	unsigned long irqflags, irqflags2;
 	int ret = 0;
 
 	spin_lock_irqsave(&dev->vbl_lock, irqflags);
 	/* Going from 0->1 means we have to enable interrupts again */
 	if (atomic_add_return(1, &dev->vblank_refcount[crtc]) == 1) {
+		/* Disable preemption while holding vblank_time_lock. Do
+		 * it explicitely to guard against PREEMPT_RT kernel.
+		 */
+		preempt_disable();
+		spin_lock_irqsave(&dev->vblank_time_lock, irqflags2);
 		if (!dev->vblank_enabled[crtc]) {
+			/* Enable vblank irqs under vblank_time_lock protection.
+			 * All vblank count & timestamp updates are held off
+			 * until we are done reinitializing master counter and
+			 * timestamps. Filtercode in drm_handle_vblank() will
+			 * prevent double-accounting of same vblank interval.
+			 */
 			ret = dev->driver->enable_vblank(dev, crtc);
-			DRM_DEBUG("enabling vblank on crtc %d, ret: %d\n", crtc, ret);
+			DRM_DEBUG("enabling vblank on crtc %d, ret: %d\n",
+				  crtc, ret);
 			if (ret)
 				atomic_dec(&dev->vblank_refcount[crtc]);
 			else {
@@ -445,6 +905,8 @@ int drm_vblank_get(struct drm_device *dev, int crtc)
 				drm_update_vblank_count(dev, crtc);
 			}
 		}
+		spin_unlock_irqrestore(&dev->vblank_time_lock, irqflags2);
+		preempt_enable();
 	} else {
 		if (!dev->vblank_enabled[crtc]) {
 			atomic_dec(&dev->vblank_refcount[crtc]);
@@ -463,15 +925,17 @@ EXPORT_SYMBOL(drm_vblank_get);
  * @crtc: which counter to give up
  *
  * Release ownership of a given vblank counter, turning off interrupts
- * if possible.
+ * if possible. Disable interrupts after drm_vblank_offdelay milliseconds.
  */
 void drm_vblank_put(struct drm_device *dev, int crtc)
 {
-	BUG_ON (atomic_read (&dev->vblank_refcount[crtc]) == 0);
+	BUG_ON(atomic_read(&dev->vblank_refcount[crtc]) == 0);
 
 	/* Last user schedules interrupt disable */
-	if (atomic_dec_and_test(&dev->vblank_refcount[crtc]))
-		mod_timer(&dev->vblank_disable_timer, jiffies + 5*DRM_HZ);
+	if (atomic_dec_and_test(&dev->vblank_refcount[crtc]) &&
+	    (drm_vblank_offdelay > 0))
+		mod_timer(&dev->vblank_disable_timer,
+			  jiffies + ((drm_vblank_offdelay * DRM_HZ)/1000));
 }
 EXPORT_SYMBOL(drm_vblank_put);
 
@@ -480,10 +944,8 @@ void drm_vblank_off(struct drm_device *dev, int crtc)
 	unsigned long irqflags;
 
 	spin_lock_irqsave(&dev->vbl_lock, irqflags);
-	dev->driver->disable_vblank(dev, crtc);
+	vblank_disable_and_save(dev, crtc);
 	DRM_WAKEUP(&dev->vbl_queue[crtc]);
-	dev->vblank_enabled[crtc] = 0;
-	dev->last_vblank[crtc] = dev->driver->get_vblank_counter(dev, crtc);
 	spin_unlock_irqrestore(&dev->vbl_lock, irqflags);
 }
 EXPORT_SYMBOL(drm_vblank_off);
@@ -599,7 +1061,6 @@ static int drm_queue_vblank_event(struct drm_device *dev, int pipe,
 	e->base.file_priv = file_priv;
 	e->base.destroy = (void (*) (struct drm_pending_event *)) kfree;
 
-	do_gettimeofday(&now);
 	spin_lock_irqsave(&dev->event_lock, flags);
 
 	if (file_priv->event_space < sizeof e->event) {
@@ -609,7 +1070,8 @@ static int drm_queue_vblank_event(struct drm_device *dev, int pipe,
 	}
 
 	file_priv->event_space -= sizeof e->event;
-	seq = drm_vblank_count(dev, pipe);
+	seq = drm_vblank_count_and_time(dev, pipe, &now);
+
 	if ((vblwait->request.type & _DRM_VBLANK_NEXTONMISS) &&
 	    (seq - vblwait->request.sequence) <= (1 << 23)) {
 		vblwait->request.sequence = seq + 1;
@@ -718,11 +1180,10 @@ int drm_wait_vblank(struct drm_device *dev, void *data,
 	if (ret != -EINTR) {
 		struct timeval now;
 
-		do_gettimeofday(&now);
-
+		vblwait->reply.sequence = drm_vblank_count_and_time(dev, crtc, &now);
 		vblwait->reply.tval_sec = now.tv_sec;
 		vblwait->reply.tval_usec = now.tv_usec;
-		vblwait->reply.sequence = drm_vblank_count(dev, crtc);
+
 		DRM_DEBUG("returning %d to client\n",
 			  vblwait->reply.sequence);
 	} else {
@@ -741,8 +1202,7 @@ void drm_handle_vblank_events(struct drm_device *dev, int crtc)
 	unsigned long flags;
 	unsigned int seq;
 
-	do_gettimeofday(&now);
-	seq = drm_vblank_count(dev, crtc);
+	seq = drm_vblank_count_and_time(dev, crtc, &now);
 
 	spin_lock_irqsave(&dev->event_lock, flags);
 
@@ -780,11 +1240,64 @@ void drm_handle_vblank_events(struct drm_device *dev, int crtc)
  */
 void drm_handle_vblank(struct drm_device *dev, int crtc)
 {
+	u32 vblcount;
+	s64 diff_ns;
+	struct timeval tvblank;
+	unsigned long irqflags;
+
 	if (!dev->num_crtcs)
 		return;
 
-	atomic_inc(&dev->_vblank_count[crtc]);
+	/* Need timestamp lock to prevent concurrent execution with
+	 * vblank enable/disable, as this would cause inconsistent
+	 * or corrupted timestamps and vblank counts.
+	 */
+	spin_lock_irqsave(&dev->vblank_time_lock, irqflags);
+
+	/* Vblank irq handling disabled. Nothing to do. */
+	if (!dev->vblank_enabled[crtc]) {
+		spin_unlock_irqrestore(&dev->vblank_time_lock, irqflags);
+		return;
+	}
+
+	/* Fetch corresponding timestamp for this vblank interval from
+	 * driver and store it in proper slot of timestamp ringbuffer.
+	 */
+
+	/* Get current timestamp and count. */
+	vblcount = atomic_read(&dev->_vblank_count[crtc]);
+	drm_get_last_vbltimestamp(dev, crtc, &tvblank, DRM_CALLED_FROM_VBLIRQ);
+
+	/* Compute time difference to timestamp of last vblank */
+	diff_ns = timeval_to_ns(&tvblank) -
+		  timeval_to_ns(&vblanktimestamp(dev, crtc, vblcount));
+
+	/* Update vblank timestamp and count if at least
+	 * DRM_REDUNDANT_VBLIRQ_THRESH_NS nanoseconds
+	 * difference between last stored timestamp and current
+	 * timestamp. A smaller difference means basically
+	 * identical timestamps. Happens if this vblank has
+	 * been already processed and this is a redundant call,
+	 * e.g., due to spurious vblank interrupts. We need to
+	 * ignore those for accounting.
+	 */
+	if (abs(diff_ns) > DRM_REDUNDANT_VBLIRQ_THRESH_NS) {
+		/* Store new timestamp in ringbuffer. */
+		vblanktimestamp(dev, crtc, vblcount + 1) = tvblank;
+		smp_wmb();
+
+		/* Increment cooked vblank count. This also atomically commits
+		 * the timestamp computed above.
+		 */
+		atomic_inc(&dev->_vblank_count[crtc]);
+	} else {
+		DRM_DEBUG("crtc %d: Redundant vblirq ignored. diff_ns = %d\n",
+			  crtc, (int) diff_ns);
+	}
+
 	DRM_WAKEUP(&dev->vbl_queue[crtc]);
 	drm_handle_vblank_events(dev, crtc);
+
+	spin_unlock_irqrestore(&dev->vblank_time_lock, irqflags);
 }
 EXPORT_SYMBOL(drm_handle_vblank);
diff --git a/drivers/gpu/drm/drm_stub.c b/drivers/gpu/drm/drm_stub.c
index cdc89ee042cc..d59edc18301f 100644
--- a/drivers/gpu/drm/drm_stub.c
+++ b/drivers/gpu/drm/drm_stub.c
@@ -40,12 +40,22 @@
 unsigned int drm_debug = 0;	/* 1 to enable debug output */
 EXPORT_SYMBOL(drm_debug);
 
+unsigned int drm_vblank_offdelay = 5000;    /* Default to 5000 msecs. */
+EXPORT_SYMBOL(drm_vblank_offdelay);
+
+unsigned int drm_timestamp_precision = 20;  /* Default to 20 usecs. */
+EXPORT_SYMBOL(drm_timestamp_precision);
+
 MODULE_AUTHOR(CORE_AUTHOR);
 MODULE_DESCRIPTION(CORE_DESC);
 MODULE_LICENSE("GPL and additional rights");
 MODULE_PARM_DESC(debug, "Enable debug output");
+MODULE_PARM_DESC(vblankoffdelay, "Delay until vblank irq auto-disable [msecs]");
+MODULE_PARM_DESC(timestamp_precision_usec, "Max. error on timestamps [usecs]");
 
 module_param_named(debug, drm_debug, int, 0600);
+module_param_named(vblankoffdelay, drm_vblank_offdelay, int, 0600);
+module_param_named(timestamp_precision_usec, drm_timestamp_precision, int, 0600);
 
 struct idr drm_minors_idr;
 
diff --git a/include/drm/drmP.h b/include/drm/drmP.h
index 274eaaa15c36..2b3398004aa5 100644
--- a/include/drm/drmP.h
+++ b/include/drm/drmP.h
@@ -683,6 +683,21 @@ struct drm_master {
 	void *driver_priv; /**< Private structure for driver to use */
 };
 
+/* Size of ringbuffer for vblank timestamps. Just double-buffer
+ * in initial implementation.
+ */
+#define DRM_VBLANKTIME_RBSIZE 2
+
+/* Flags and return codes for get_vblank_timestamp() driver function. */
+#define DRM_CALLED_FROM_VBLIRQ 1
+#define DRM_VBLANKTIME_SCANOUTPOS_METHOD (1 << 0)
+#define DRM_VBLANKTIME_INVBL             (1 << 1)
+
+/* get_scanout_position() return flags */
+#define DRM_SCANOUTPOS_VALID        (1 << 0)
+#define DRM_SCANOUTPOS_INVBL        (1 << 1)
+#define DRM_SCANOUTPOS_ACCURATE     (1 << 2)
+
 /**
  * DRM driver structure. This structure represent the common code for
  * a family of cards. There will one drm_device for each card present
@@ -760,6 +775,68 @@ struct drm_driver {
 	 */
 	int (*device_is_agp) (struct drm_device *dev);
 
+	/**
+	 * Called by vblank timestamping code.
+	 *
+	 * Return the current display scanout position from a crtc.
+	 *
+	 * \param dev  DRM device.
+	 * \param crtc Id of the crtc to query.
+	 * \param *vpos Target location for current vertical scanout position.
+	 * \param *hpos Target location for current horizontal scanout position.
+	 *
+	 * Returns vpos as a positive number while in active scanout area.
+	 * Returns vpos as a negative number inside vblank, counting the number
+	 * of scanlines to go until end of vblank, e.g., -1 means "one scanline
+	 * until start of active scanout / end of vblank."
+	 *
+	 * \return Flags, or'ed together as follows:
+	 *
+	 * DRM_SCANOUTPOS_VALID = Query successfull.
+	 * DRM_SCANOUTPOS_INVBL = Inside vblank.
+	 * DRM_SCANOUTPOS_ACCURATE = Returned position is accurate. A lack of
+	 * this flag means that returned position may be offset by a constant
+	 * but unknown small number of scanlines wrt. real scanout position.
+	 *
+	 */
+	int (*get_scanout_position) (struct drm_device *dev, int crtc,
+				     int *vpos, int *hpos);
+
+	/**
+	 * Called by \c drm_get_last_vbltimestamp. Should return a precise
+	 * timestamp when the most recent VBLANK interval ended or will end.
+	 *
+	 * Specifically, the timestamp in @vblank_time should correspond as
+	 * closely as possible to the time when the first video scanline of
+	 * the video frame after the end of VBLANK will start scanning out,
+	 * the time immmediately after end of the VBLANK interval. If the
+	 * @crtc is currently inside VBLANK, this will be a time in the future.
+	 * If the @crtc is currently scanning out a frame, this will be the
+	 * past start time of the current scanout. This is meant to adhere
+	 * to the OpenML OML_sync_control extension specification.
+	 *
+	 * \param dev dev DRM device handle.
+	 * \param crtc crtc for which timestamp should be returned.
+	 * \param *max_error Maximum allowable timestamp error in nanoseconds.
+	 *                   Implementation should strive to provide timestamp
+	 *                   with an error of at most *max_error nanoseconds.
+	 *                   Returns true upper bound on error for timestamp.
+	 * \param *vblank_time Target location for returned vblank timestamp.
+	 * \param flags 0 = Defaults, no special treatment needed.
+	 * \param       DRM_CALLED_FROM_VBLIRQ = Function is called from vblank
+	 *	        irq handler. Some drivers need to apply some workarounds
+	 *              for gpu-specific vblank irq quirks if flag is set.
+	 *
+	 * \returns
+	 * Zero if timestamping isn't supported in current display mode or a
+	 * negative number on failure. A positive status code on success,
+	 * which describes how the vblank_time timestamp was computed.
+	 */
+	int (*get_vblank_timestamp) (struct drm_device *dev, int crtc,
+				     int *max_error,
+				     struct timeval *vblank_time,
+				     unsigned flags);
+
 	/* these have to be filled in */
 
 	irqreturn_t(*irq_handler) (DRM_IRQ_ARGS);
@@ -983,6 +1060,8 @@ struct drm_device {
 
 	wait_queue_head_t *vbl_queue;   /**< VBLANK wait queue */
 	atomic_t *_vblank_count;        /**< number of VBLANK interrupts (driver must alloc the right number of counters) */
+	struct timeval *_vblank_time;   /**< timestamp of current vblank_count (drivers must alloc right number of fields) */
+	spinlock_t vblank_time_lock;    /**< Protects vblank count and time updates during vblank enable/disable */
 	spinlock_t vbl_lock;
 	atomic_t *vblank_refcount;      /* number of users of vblank interruptsper crtc */
 	u32 *last_vblank;               /* protected by dev->vbl_lock, used */
@@ -1284,11 +1363,22 @@ extern int drm_wait_vblank(struct drm_device *dev, void *data,
 			   struct drm_file *filp);
 extern int drm_vblank_wait(struct drm_device *dev, unsigned int *vbl_seq);
 extern u32 drm_vblank_count(struct drm_device *dev, int crtc);
+extern u32 drm_vblank_count_and_time(struct drm_device *dev, int crtc,
+				     struct timeval *vblanktime);
 extern void drm_handle_vblank(struct drm_device *dev, int crtc);
 extern int drm_vblank_get(struct drm_device *dev, int crtc);
 extern void drm_vblank_put(struct drm_device *dev, int crtc);
 extern void drm_vblank_off(struct drm_device *dev, int crtc);
 extern void drm_vblank_cleanup(struct drm_device *dev);
+extern u32 drm_get_last_vbltimestamp(struct drm_device *dev, int crtc,
+				     struct timeval *tvblank, unsigned flags);
+extern int drm_calc_vbltimestamp_from_scanoutpos(struct drm_device *dev,
+						 int crtc, int *max_error,
+						 struct timeval *vblank_time,
+						 unsigned flags,
+						 struct drm_crtc *refcrtc);
+extern void drm_calc_timestamping_constants(struct drm_crtc *crtc);
+
 /* Modesetting support */
 extern void drm_vblank_pre_modeset(struct drm_device *dev, int crtc);
 extern void drm_vblank_post_modeset(struct drm_device *dev, int crtc);
@@ -1340,6 +1430,9 @@ extern void drm_put_dev(struct drm_device *dev);
 extern int drm_put_minor(struct drm_minor **minor);
 extern unsigned int drm_debug;
 
+extern unsigned int drm_vblank_offdelay;
+extern unsigned int drm_timestamp_precision;
+
 extern struct class *drm_class;
 extern struct proc_dir_entry *drm_proc_root;
 extern struct dentry *drm_debugfs_root;
diff --git a/include/drm/drm_crtc.h b/include/drm/drm_crtc.h
index 029aa688e787..acd7fade160d 100644
--- a/include/drm/drm_crtc.h
+++ b/include/drm/drm_crtc.h
@@ -351,8 +351,14 @@ struct drm_crtc {
 
 	bool enabled;
 
+	/* Requested mode from modesetting. */
 	struct drm_display_mode mode;
 
+	/* Programmed mode in hw, after adjustments for encoders,
+	 * crtc, panel scaling etc. Needed for timestamping etc.
+	 */
+	struct drm_display_mode hwmode;
+
 	int x, y;
 	const struct drm_crtc_funcs *funcs;
 
@@ -360,6 +366,9 @@ struct drm_crtc {
 	uint32_t gamma_size;
 	uint16_t *gamma_store;
 
+	/* Constants needed for precise vblank and swap timestamping. */
+	s64 framedur_ns, linedur_ns, pixeldur_ns;
+
 	/* if you are using the helper */
 	void *helper_private;
 };
-- 
cgit v1.2.3


From d6ea88865d3e5b0c62040531310c1f2c6a994f46 Mon Sep 17 00:00:00 2001
From: Dave Airlie
Date: Mon, 22 Nov 2010 13:24:40 +1000
Subject: drm/ttm: Add a bo list reserve fastpath (v2)

Makes it possible to reserve a list of buffer objects with a single
spin lock / unlock if there is no contention.
Should improve cpu usage on SMP kernels.

v2: Initialize private list members on reserve and don't call
ttm_bo_list_ref_sub() with zero put_count.

Signed-off-by: Thomas Hellstrom <thellstrom@vmware.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/ttm/ttm_bo.c           |  32 ++++-----
 drivers/gpu/drm/ttm/ttm_execbuf_util.c | 124 ++++++++++++++++++++++++++++++---
 include/drm/ttm/ttm_bo_api.h           |  38 ++++++++++
 include/drm/ttm/ttm_bo_driver.h        |  14 ++++
 include/drm/ttm/ttm_execbuf_util.h     |   6 +-
 5 files changed, 186 insertions(+), 28 deletions(-)

(limited to 'include/drm')

diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index 148a322d8f5d..a586378b1b2b 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -169,7 +169,7 @@ int ttm_bo_wait_unreserved(struct ttm_buffer_object *bo, bool interruptible)
 }
 EXPORT_SYMBOL(ttm_bo_wait_unreserved);
 
-static void ttm_bo_add_to_lru(struct ttm_buffer_object *bo)
+void ttm_bo_add_to_lru(struct ttm_buffer_object *bo)
 {
 	struct ttm_bo_device *bdev = bo->bdev;
 	struct ttm_mem_type_manager *man;
@@ -191,11 +191,7 @@ static void ttm_bo_add_to_lru(struct ttm_buffer_object *bo)
 	}
 }
 
-/**
- * Call with the lru_lock held.
- */
-
-static int ttm_bo_del_from_lru(struct ttm_buffer_object *bo)
+int ttm_bo_del_from_lru(struct ttm_buffer_object *bo)
 {
 	int put_count = 0;
 
@@ -267,6 +263,15 @@ static void ttm_bo_ref_bug(struct kref *list_kref)
 	BUG();
 }
 
+void ttm_bo_list_ref_sub(struct ttm_buffer_object *bo, int count,
+			 bool never_free)
+{
+	while (count--)
+		kref_put(&bo->list_kref,
+			 (never_free || (count >= 0)) ? ttm_bo_ref_bug :
+			 ttm_bo_release_list);
+}
+
 int ttm_bo_reserve(struct ttm_buffer_object *bo,
 		   bool interruptible,
 		   bool no_wait, bool use_sequence, uint32_t sequence)
@@ -282,8 +287,7 @@ int ttm_bo_reserve(struct ttm_buffer_object *bo,
 		put_count = ttm_bo_del_from_lru(bo);
 	spin_unlock(&glob->lru_lock);
 
-	while (put_count--)
-		kref_put(&bo->list_kref, ttm_bo_ref_bug);
+	ttm_bo_list_ref_sub(bo, put_count, true);
 
 	return ret;
 }
@@ -496,8 +500,7 @@ static void ttm_bo_cleanup_refs_or_queue(struct ttm_buffer_object *bo)
 		spin_unlock(&glob->lru_lock);
 		ttm_bo_cleanup_memtype_use(bo);
 
-		while (put_count--)
-			kref_put(&bo->list_kref, ttm_bo_ref_bug);
+		ttm_bo_list_ref_sub(bo, put_count, true);
 
 		return;
 	} else {
@@ -580,8 +583,7 @@ retry:
 	spin_unlock(&glob->lru_lock);
 	ttm_bo_cleanup_memtype_use(bo);
 
-	while (put_count--)
-		kref_put(&bo->list_kref, ttm_bo_ref_bug);
+	ttm_bo_list_ref_sub(bo, put_count, true);
 
 	return 0;
 }
@@ -802,8 +804,7 @@ retry:
 
 	BUG_ON(ret != 0);
 
-	while (put_count--)
-		kref_put(&bo->list_kref, ttm_bo_ref_bug);
+	ttm_bo_list_ref_sub(bo, put_count, true);
 
 	ret = ttm_bo_evict(bo, interruptible, no_wait_reserve, no_wait_gpu);
 	ttm_bo_unreserve(bo);
@@ -1783,8 +1784,7 @@ static int ttm_bo_swapout(struct ttm_mem_shrink *shrink)
 	put_count = ttm_bo_del_from_lru(bo);
 	spin_unlock(&glob->lru_lock);
 
-	while (put_count--)
-		kref_put(&bo->list_kref, ttm_bo_ref_bug);
+	ttm_bo_list_ref_sub(bo, put_count, true);
 
 	/**
 	 * Wait for GPU, then move to system cached.
diff --git a/drivers/gpu/drm/ttm/ttm_execbuf_util.c b/drivers/gpu/drm/ttm/ttm_execbuf_util.c
index c285c2902d15..201a71d111ec 100644
--- a/drivers/gpu/drm/ttm/ttm_execbuf_util.c
+++ b/drivers/gpu/drm/ttm/ttm_execbuf_util.c
@@ -32,6 +32,72 @@
 #include <linux/sched.h>
 #include <linux/module.h>
 
+static void ttm_eu_backoff_reservation_locked(struct list_head *list)
+{
+	struct ttm_validate_buffer *entry;
+
+	list_for_each_entry(entry, list, head) {
+		struct ttm_buffer_object *bo = entry->bo;
+		if (!entry->reserved)
+			continue;
+
+		if (entry->removed) {
+			ttm_bo_add_to_lru(bo);
+			entry->removed = false;
+
+		}
+		entry->reserved = false;
+		atomic_set(&bo->reserved, 0);
+		wake_up_all(&bo->event_queue);
+	}
+}
+
+static void ttm_eu_del_from_lru_locked(struct list_head *list)
+{
+	struct ttm_validate_buffer *entry;
+
+	list_for_each_entry(entry, list, head) {
+		struct ttm_buffer_object *bo = entry->bo;
+		if (!entry->reserved)
+			continue;
+
+		if (!entry->removed) {
+			entry->put_count = ttm_bo_del_from_lru(bo);
+			entry->removed = true;
+		}
+	}
+}
+
+static void ttm_eu_list_ref_sub(struct list_head *list)
+{
+	struct ttm_validate_buffer *entry;
+
+	list_for_each_entry(entry, list, head) {
+		struct ttm_buffer_object *bo = entry->bo;
+
+		if (entry->put_count) {
+			ttm_bo_list_ref_sub(bo, entry->put_count, true);
+			entry->put_count = 0;
+		}
+	}
+}
+
+static int ttm_eu_wait_unreserved_locked(struct list_head *list,
+					 struct ttm_buffer_object *bo)
+{
+	struct ttm_bo_global *glob = bo->glob;
+	int ret;
+
+	ttm_eu_del_from_lru_locked(list);
+	spin_unlock(&glob->lru_lock);
+	ret = ttm_bo_wait_unreserved(bo, true);
+	spin_lock(&glob->lru_lock);
+	if (unlikely(ret != 0))
+		ttm_eu_backoff_reservation_locked(list);
+	return ret;
+}
+
+
 void ttm_eu_backoff_reservation(struct list_head *list)
 {
 	struct ttm_validate_buffer *entry;
@@ -61,35 +127,71 @@ EXPORT_SYMBOL(ttm_eu_backoff_reservation);
 
 int ttm_eu_reserve_buffers(struct list_head *list, uint32_t val_seq)
 {
+	struct ttm_bo_global *glob;
 	struct ttm_validate_buffer *entry;
 	int ret;
 
+	if (list_empty(list))
+		return 0;
+
+	list_for_each_entry(entry, list, head) {
+		entry->reserved = false;
+		entry->put_count = 0;
+		entry->removed = false;
+	}
+
+	entry = list_first_entry(list, struct ttm_validate_buffer, head);
+	glob = entry->bo->glob;
+
 retry:
+	spin_lock(&glob->lru_lock);
 	list_for_each_entry(entry, list, head) {
 		struct ttm_buffer_object *bo = entry->bo;
 
-		entry->reserved = false;
-		ret = ttm_bo_reserve(bo, true, false, true, val_seq);
-		if (ret != 0) {
-			ttm_eu_backoff_reservation(list);
-			if (ret == -EAGAIN) {
-				ret = ttm_bo_wait_unreserved(bo, true);
-				if (unlikely(ret != 0))
-					return ret;
-				goto retry;
-			} else
+retry_this_bo:
+		ret = ttm_bo_reserve_locked(bo, true, true, true, val_seq);
+		switch (ret) {
+		case 0:
+			break;
+		case -EBUSY:
+			ret = ttm_eu_wait_unreserved_locked(list, bo);
+			if (unlikely(ret != 0)) {
+				spin_unlock(&glob->lru_lock);
+				ttm_eu_list_ref_sub(list);
+				return ret;
+			}
+			goto retry_this_bo;
+		case -EAGAIN:
+			ttm_eu_backoff_reservation_locked(list);
+			spin_unlock(&glob->lru_lock);
+			ttm_eu_list_ref_sub(list);
+			ret = ttm_bo_wait_unreserved(bo, true);
+			if (unlikely(ret != 0))
 				return ret;
+			goto retry;
+		default:
+			ttm_eu_backoff_reservation_locked(list);
+			spin_unlock(&glob->lru_lock);
+			ttm_eu_list_ref_sub(list);
+			return ret;
 		}
 
 		entry->reserved = true;
 		if (unlikely(atomic_read(&bo->cpu_writers) > 0)) {
-			ttm_eu_backoff_reservation(list);
+			ttm_eu_backoff_reservation_locked(list);
+			spin_unlock(&glob->lru_lock);
+			ttm_eu_list_ref_sub(list);
 			ret = ttm_bo_wait_cpu(bo, false);
 			if (ret)
 				return ret;
 			goto retry;
 		}
 	}
+
+	ttm_eu_del_from_lru_locked(list);
+	spin_unlock(&glob->lru_lock);
+	ttm_eu_list_ref_sub(list);
+
 	return 0;
 }
 EXPORT_SYMBOL(ttm_eu_reserve_buffers);
diff --git a/include/drm/ttm/ttm_bo_api.h b/include/drm/ttm/ttm_bo_api.h
index beafc156a535..b0fc9c12554b 100644
--- a/include/drm/ttm/ttm_bo_api.h
+++ b/include/drm/ttm/ttm_bo_api.h
@@ -364,6 +364,44 @@ extern int ttm_bo_validate(struct ttm_buffer_object *bo,
  */
 extern void ttm_bo_unref(struct ttm_buffer_object **bo);
 
+
+/**
+ * ttm_bo_list_ref_sub
+ *
+ * @bo: The buffer object.
+ * @count: The number of references with which to decrease @bo::list_kref;
+ * @never_free: The refcount should not reach zero with this operation.
+ *
+ * Release @count lru list references to this buffer object.
+ */
+extern void ttm_bo_list_ref_sub(struct ttm_buffer_object *bo, int count,
+				bool never_free);
+
+/**
+ * ttm_bo_add_to_lru
+ *
+ * @bo: The buffer object.
+ *
+ * Add this bo to the relevant mem type lru and, if it's backed by
+ * system pages (ttms) to the swap list.
+ * This function must be called with struct ttm_bo_global::lru_lock held, and
+ * is typically called immediately prior to unreserving a bo.
+ */
+extern void ttm_bo_add_to_lru(struct ttm_buffer_object *bo);
+
+/**
+ * ttm_bo_del_from_lru
+ *
+ * @bo: The buffer object.
+ *
+ * Remove this bo from all lru lists used to lookup and reserve an object.
+ * This function must be called with struct ttm_bo_global::lru_lock held,
+ * and is usually called just immediately after the bo has been reserved to
+ * avoid recursive reservation from lru lists.
+ */
+extern int ttm_bo_del_from_lru(struct ttm_buffer_object *bo);
+
+
 /**
  * ttm_bo_lock_delayed_workqueue
  *
diff --git a/include/drm/ttm/ttm_bo_driver.h b/include/drm/ttm/ttm_bo_driver.h
index 8e0c848326b6..95068e6024db 100644
--- a/include/drm/ttm/ttm_bo_driver.h
+++ b/include/drm/ttm/ttm_bo_driver.h
@@ -864,6 +864,20 @@ extern int ttm_bo_reserve(struct ttm_buffer_object *bo,
 			  bool interruptible,
 			  bool no_wait, bool use_sequence, uint32_t sequence);
 
+
+/**
+ * ttm_bo_reserve_locked:
+ *
+ * Similar to ttm_bo_reserve, but must be called with the glob::lru_lock
+ * spinlock held, and will not remove reserved buffers from the lru lists.
+ * The function may release the LRU spinlock if it needs to sleep.
+ */
+
+extern int ttm_bo_reserve_locked(struct ttm_buffer_object *bo,
+				 bool interruptible,
+				 bool no_wait, bool use_sequence,
+				 uint32_t sequence);
+
 /**
  * ttm_bo_unreserve
  *
diff --git a/include/drm/ttm/ttm_execbuf_util.h b/include/drm/ttm/ttm_execbuf_util.h
index cd2c475da9ea..fd09b8438977 100644
--- a/include/drm/ttm/ttm_execbuf_util.h
+++ b/include/drm/ttm/ttm_execbuf_util.h
@@ -41,7 +41,9 @@
  * @bo:             refcounted buffer object pointer.
  * @new_sync_obj_arg: New sync_obj_arg for @bo, to be used once
  * adding a new sync object.
- * @reservied:      Indicates whether @bo has been reserved for validation.
+ * @reserved:       Indicates whether @bo has been reserved for validation.
+ * @removed:        Indicates whether @bo has been removed from lru lists.
+ * @put_count:      Number of outstanding references on bo::list_kref.
  */
 
 struct ttm_validate_buffer {
@@ -49,6 +51,8 @@ struct ttm_validate_buffer {
 	struct ttm_buffer_object *bo;
 	void *new_sync_obj_arg;
 	bool reserved;
+	bool removed;
+	int put_count;
 };
 
 /**
-- 
cgit v1.2.3


From 96726fe50feae74812a2ccf5d5da23cb01c0a413 Mon Sep 17 00:00:00 2001
From: Thomas Hellstrom
Date: Wed, 17 Nov 2010 12:28:28 +0000
Subject: drm/ttm: Don't deadlock on recursive multi-bo reservations

Add an aid for the driver to detect deadlocks on multi-bo reservations
Update documentation.

Signed-off-by: Thomas Hellstrom <thellstrom@vmware.com>
Reviewed-by: Jerome Glisse <j.glisse@gmail.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/ttm/ttm_bo.c    | 15 ++++++++++++---
 include/drm/ttm/ttm_bo_driver.h | 25 ++++++++++++++++++++++---
 2 files changed, 34 insertions(+), 6 deletions(-)

(limited to 'include/drm')

diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index 9ef893d5da88..5d8750830dc3 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -223,9 +223,18 @@ int ttm_bo_reserve_locked(struct ttm_buffer_object *bo,
 		/**
 		 * Deadlock avoidance for multi-bo reserving.
 		 */
-		if (use_sequence && bo->seq_valid &&
-			(sequence - bo->val_seq < (1 << 31))) {
-			return -EAGAIN;
+		if (use_sequence && bo->seq_valid) {
+			/**
+			 * We've already reserved this one.
+			 */
+			if (unlikely(sequence == bo->val_seq))
+				return -EDEADLK;
+			/**
+			 * Already reserved by a thread that will not back
+			 * off for us. We need to back off.
+			 */
+			if (unlikely(sequence - bo->val_seq < (1 << 31)))
+				return -EAGAIN;
 		}
 
 		if (no_wait)
diff --git a/include/drm/ttm/ttm_bo_driver.h b/include/drm/ttm/ttm_bo_driver.h
index 95068e6024db..1e25a40c688e 100644
--- a/include/drm/ttm/ttm_bo_driver.h
+++ b/include/drm/ttm/ttm_bo_driver.h
@@ -859,6 +859,9 @@ extern void ttm_bo_unmap_virtual(struct ttm_buffer_object *bo);
  * try again. (only if use_sequence == 1).
  * -ERESTARTSYS: A wait for the buffer to become unreserved was interrupted by
  * a signal. Release all buffer reservations and return to user-space.
+ * -EBUSY: The function needed to sleep, but @no_wait was true
+ * -EDEADLK: Bo already reserved using @sequence. This error code will only
+ * be returned if @use_sequence is set to true.
  */
 extern int ttm_bo_reserve(struct ttm_buffer_object *bo,
 			  bool interruptible,
@@ -868,11 +871,27 @@ extern int ttm_bo_reserve(struct ttm_buffer_object *bo,
 /**
  * ttm_bo_reserve_locked:
  *
- * Similar to ttm_bo_reserve, but must be called with the glob::lru_lock
- * spinlock held, and will not remove reserved buffers from the lru lists.
+ * @bo: A pointer to a struct ttm_buffer_object.
+ * @interruptible: Sleep interruptible if waiting.
+ * @no_wait: Don't sleep while trying to reserve, rather return -EBUSY.
+ * @use_sequence: If @bo is already reserved, Only sleep waiting for
+ * it to become unreserved if @sequence < (@bo)->sequence.
+ *
+ * Must be called with struct ttm_bo_global::lru_lock held,
+ * and will not remove reserved buffers from the lru lists.
  * The function may release the LRU spinlock if it needs to sleep.
+ * Otherwise identical to ttm_bo_reserve.
+ *
+ * Returns:
+ * -EAGAIN: The reservation may cause a deadlock.
+ * Release all buffer reservations, wait for @bo to become unreserved and
+ * try again. (only if use_sequence == 1).
+ * -ERESTARTSYS: A wait for the buffer to become unreserved was interrupted by
+ * a signal. Release all buffer reservations and return to user-space.
+ * -EBUSY: The function needed to sleep, but @no_wait was true
+ * -EDEADLK: Bo already reserved using @sequence. This error code will only
+ * be returned if @use_sequence is set to true.
  */
-
 extern int ttm_bo_reserve_locked(struct ttm_buffer_object *bo,
 				 bool interruptible,
 				 bool no_wait, bool use_sequence,
-- 
cgit v1.2.3


From 702adba22433c175e8429a47760f35ca16caf1cd Mon Sep 17 00:00:00 2001
From: Thomas Hellstrom
Date: Wed, 17 Nov 2010 12:28:29 +0000
Subject: drm/ttm/radeon/nouveau: Kill the bo lock in favour of a bo device
 fence_lock

The bo lock used only to protect the bo sync object members, and since it
is a per bo lock, fencing a buffer list will see a lot of locks and unlocks.
Replace it with a per-device lock that protects the sync object members on
*all* bos. Reading and setting these members will always be very quick, so
the risc of heavy lock contention is microscopic. Note that waiting for
sync objects will always take place outside of this lock.

The bo device fence lock will eventually be replaced with a seqlock /
rcu mechanism so we can determine that a bo is idle under a
rcu / read seqlock.

However this change will allow us to batch fencing and unreserving of
buffers with a minimal amount of locking.

Signed-off-by: Thomas Hellstrom <thellstrom@vmware.com>
Reviewed-by: Jerome Glisse <j.glisse@gmail.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/nouveau/nouveau_gem.c  | 12 ++++----
 drivers/gpu/drm/radeon/radeon_object.c |  4 +--
 drivers/gpu/drm/radeon/radeon_object.h |  4 +--
 drivers/gpu/drm/ttm/ttm_bo.c           | 55 +++++++++++++++++-----------------
 drivers/gpu/drm/ttm/ttm_bo_util.c      |  7 ++---
 drivers/gpu/drm/ttm/ttm_bo_vm.c        |  6 ++--
 drivers/gpu/drm/ttm/ttm_execbuf_util.c |  7 +++--
 include/drm/ttm/ttm_bo_api.h           |  6 ++--
 include/drm/ttm/ttm_bo_driver.h        |  3 ++
 9 files changed, 53 insertions(+), 51 deletions(-)

(limited to 'include/drm')

diff --git a/drivers/gpu/drm/nouveau/nouveau_gem.c b/drivers/gpu/drm/nouveau/nouveau_gem.c
index 9a1fdcf400c2..1f2301d26c0a 100644
--- a/drivers/gpu/drm/nouveau/nouveau_gem.c
+++ b/drivers/gpu/drm/nouveau/nouveau_gem.c
@@ -234,10 +234,10 @@ validate_fini_list(struct list_head *list, struct nouveau_fence *fence)
 		if (likely(fence)) {
 			struct nouveau_fence *prev_fence;
 
-			spin_lock(&nvbo->bo.lock);
+			spin_lock(&nvbo->bo.bdev->fence_lock);
 			prev_fence = nvbo->bo.sync_obj;
 			nvbo->bo.sync_obj = nouveau_fence_ref(fence);
-			spin_unlock(&nvbo->bo.lock);
+			spin_unlock(&nvbo->bo.bdev->fence_lock);
 			nouveau_fence_unref((void *)&prev_fence);
 		}
 
@@ -557,9 +557,9 @@ nouveau_gem_pushbuf_reloc_apply(struct drm_device *dev,
 				data |= r->vor;
 		}
 
-		spin_lock(&nvbo->bo.lock);
+		spin_lock(&nvbo->bo.bdev->fence_lock);
 		ret = ttm_bo_wait(&nvbo->bo, false, false, false);
-		spin_unlock(&nvbo->bo.lock);
+		spin_unlock(&nvbo->bo.bdev->fence_lock);
 		if (ret) {
 			NV_ERROR(dev, "reloc wait_idle failed: %d\n", ret);
 			break;
@@ -791,9 +791,9 @@ nouveau_gem_ioctl_cpu_prep(struct drm_device *dev, void *data,
 	}
 
 	if (req->flags & NOUVEAU_GEM_CPU_PREP_NOBLOCK) {
-		spin_lock(&nvbo->bo.lock);
+		spin_lock(&nvbo->bo.bdev->fence_lock);
 		ret = ttm_bo_wait(&nvbo->bo, false, false, no_wait);
-		spin_unlock(&nvbo->bo.lock);
+		spin_unlock(&nvbo->bo.bdev->fence_lock);
 	} else {
 		ret = ttm_bo_synccpu_write_grab(&nvbo->bo, no_wait);
 		if (ret == 0)
diff --git a/drivers/gpu/drm/radeon/radeon_object.c b/drivers/gpu/drm/radeon/radeon_object.c
index 1d067743fee0..e939cb6a91cc 100644
--- a/drivers/gpu/drm/radeon/radeon_object.c
+++ b/drivers/gpu/drm/radeon/radeon_object.c
@@ -369,11 +369,11 @@ void radeon_bo_list_fence(struct list_head *head, void *fence)
 
 	list_for_each_entry(lobj, head, list) {
 		bo = lobj->bo;
-		spin_lock(&bo->tbo.lock);
+		spin_lock(&bo->tbo.bdev->fence_lock);
 		old_fence = (struct radeon_fence *)bo->tbo.sync_obj;
 		bo->tbo.sync_obj = radeon_fence_ref(fence);
 		bo->tbo.sync_obj_arg = NULL;
-		spin_unlock(&bo->tbo.lock);
+		spin_unlock(&bo->tbo.bdev->fence_lock);
 		if (old_fence) {
 			radeon_fence_unref(&old_fence);
 		}
diff --git a/drivers/gpu/drm/radeon/radeon_object.h b/drivers/gpu/drm/radeon/radeon_object.h
index d143702b244a..fd536751f865 100644
--- a/drivers/gpu/drm/radeon/radeon_object.h
+++ b/drivers/gpu/drm/radeon/radeon_object.h
@@ -126,12 +126,12 @@ static inline int radeon_bo_wait(struct radeon_bo *bo, u32 *mem_type,
 	r = ttm_bo_reserve(&bo->tbo, true, no_wait, false, 0);
 	if (unlikely(r != 0))
 		return r;
-	spin_lock(&bo->tbo.lock);
+	spin_lock(&bo->tbo.bdev->fence_lock);
 	if (mem_type)
 		*mem_type = bo->tbo.mem.mem_type;
 	if (bo->tbo.sync_obj)
 		r = ttm_bo_wait(&bo->tbo, true, true, no_wait);
-	spin_unlock(&bo->tbo.lock);
+	spin_unlock(&bo->tbo.bdev->fence_lock);
 	ttm_bo_unreserve(&bo->tbo);
 	return r;
 }
diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index 5d8750830dc3..d93c73b1c471 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -427,11 +427,9 @@ moved:
 	}
 
 	if (bo->mem.mm_node) {
-		spin_lock(&bo->lock);
 		bo->offset = (bo->mem.start << PAGE_SHIFT) +
 		    bdev->man[bo->mem.mem_type].gpu_offset;
 		bo->cur_placement = bo->mem.placement;
-		spin_unlock(&bo->lock);
 	} else
 		bo->offset = 0;
 
@@ -485,14 +483,14 @@ static void ttm_bo_cleanup_refs_or_queue(struct ttm_buffer_object *bo)
 	int put_count;
 	int ret;
 
-	spin_lock(&bo->lock);
+	spin_lock(&bdev->fence_lock);
 	(void) ttm_bo_wait(bo, false, false, true);
 	if (!bo->sync_obj) {
 
 		spin_lock(&glob->lru_lock);
 
 		/**
-		 * Lock inversion between bo::reserve and bo::lock here,
+		 * Lock inversion between bo:reserve and bdev::fence_lock here,
 		 * but that's OK, since we're only trylocking.
 		 */
 
@@ -501,7 +499,7 @@ static void ttm_bo_cleanup_refs_or_queue(struct ttm_buffer_object *bo)
 		if (unlikely(ret == -EBUSY))
 			goto queue;
 
-		spin_unlock(&bo->lock);
+		spin_unlock(&bdev->fence_lock);
 		put_count = ttm_bo_del_from_lru(bo);
 
 		spin_unlock(&glob->lru_lock);
@@ -522,7 +520,7 @@ queue:
 	kref_get(&bo->list_kref);
 	list_add_tail(&bo->ddestroy, &bdev->ddestroy);
 	spin_unlock(&glob->lru_lock);
-	spin_unlock(&bo->lock);
+	spin_unlock(&bdev->fence_lock);
 
 	if (sync_obj) {
 		driver->sync_obj_flush(sync_obj, sync_obj_arg);
@@ -547,14 +545,15 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo,
 			       bool no_wait_reserve,
 			       bool no_wait_gpu)
 {
+	struct ttm_bo_device *bdev = bo->bdev;
 	struct ttm_bo_global *glob = bo->glob;
 	int put_count;
 	int ret = 0;
 
 retry:
-	spin_lock(&bo->lock);
+	spin_lock(&bdev->fence_lock);
 	ret = ttm_bo_wait(bo, false, interruptible, no_wait_gpu);
-	spin_unlock(&bo->lock);
+	spin_unlock(&bdev->fence_lock);
 
 	if (unlikely(ret != 0))
 		return ret;
@@ -707,9 +706,9 @@ static int ttm_bo_evict(struct ttm_buffer_object *bo, bool interruptible,
 	struct ttm_placement placement;
 	int ret = 0;
 
-	spin_lock(&bo->lock);
+	spin_lock(&bdev->fence_lock);
 	ret = ttm_bo_wait(bo, false, interruptible, no_wait_gpu);
-	spin_unlock(&bo->lock);
+	spin_unlock(&bdev->fence_lock);
 
 	if (unlikely(ret != 0)) {
 		if (ret != -ERESTARTSYS) {
@@ -1044,6 +1043,7 @@ int ttm_bo_move_buffer(struct ttm_buffer_object *bo,
 {
 	int ret = 0;
 	struct ttm_mem_reg mem;
+	struct ttm_bo_device *bdev = bo->bdev;
 
 	BUG_ON(!atomic_read(&bo->reserved));
 
@@ -1052,9 +1052,9 @@ int ttm_bo_move_buffer(struct ttm_buffer_object *bo,
 	 * Have the driver move function wait for idle when necessary,
 	 * instead of doing it here.
 	 */
-	spin_lock(&bo->lock);
+	spin_lock(&bdev->fence_lock);
 	ret = ttm_bo_wait(bo, false, interruptible, no_wait_gpu);
-	spin_unlock(&bo->lock);
+	spin_unlock(&bdev->fence_lock);
 	if (ret)
 		return ret;
 	mem.num_pages = bo->num_pages;
@@ -1171,7 +1171,6 @@ int ttm_bo_init(struct ttm_bo_device *bdev,
 	}
 	bo->destroy = destroy;
 
-	spin_lock_init(&bo->lock);
 	kref_init(&bo->kref);
 	kref_init(&bo->list_kref);
 	atomic_set(&bo->cpu_writers, 0);
@@ -1535,7 +1534,7 @@ int ttm_bo_device_init(struct ttm_bo_device *bdev,
 	bdev->dev_mapping = NULL;
 	bdev->glob = glob;
 	bdev->need_dma32 = need_dma32;
-
+	spin_lock_init(&bdev->fence_lock);
 	mutex_lock(&glob->device_list_mutex);
 	list_add_tail(&bdev->device_list, &glob->device_list);
 	mutex_unlock(&glob->device_list_mutex);
@@ -1659,6 +1658,7 @@ int ttm_bo_wait(struct ttm_buffer_object *bo,
 		bool lazy, bool interruptible, bool no_wait)
 {
 	struct ttm_bo_driver *driver = bo->bdev->driver;
+	struct ttm_bo_device *bdev = bo->bdev;
 	void *sync_obj;
 	void *sync_obj_arg;
 	int ret = 0;
@@ -1672,9 +1672,9 @@ int ttm_bo_wait(struct ttm_buffer_object *bo,
 			void *tmp_obj = bo->sync_obj;
 			bo->sync_obj = NULL;
 			clear_bit(TTM_BO_PRIV_FLAG_MOVING, &bo->priv_flags);
-			spin_unlock(&bo->lock);
+			spin_unlock(&bdev->fence_lock);
 			driver->sync_obj_unref(&tmp_obj);
-			spin_lock(&bo->lock);
+			spin_lock(&bdev->fence_lock);
 			continue;
 		}
 
@@ -1683,29 +1683,29 @@ int ttm_bo_wait(struct ttm_buffer_object *bo,
 
 		sync_obj = driver->sync_obj_ref(bo->sync_obj);
 		sync_obj_arg = bo->sync_obj_arg;
-		spin_unlock(&bo->lock);
+		spin_unlock(&bdev->fence_lock);
 		ret = driver->sync_obj_wait(sync_obj, sync_obj_arg,
 					    lazy, interruptible);
 		if (unlikely(ret != 0)) {
 			driver->sync_obj_unref(&sync_obj);
-			spin_lock(&bo->lock);
+			spin_lock(&bdev->fence_lock);
 			return ret;
 		}
-		spin_lock(&bo->lock);
+		spin_lock(&bdev->fence_lock);
 		if (likely(bo->sync_obj == sync_obj &&
 			   bo->sync_obj_arg == sync_obj_arg)) {
 			void *tmp_obj = bo->sync_obj;
 			bo->sync_obj = NULL;
 			clear_bit(TTM_BO_PRIV_FLAG_MOVING,
 				  &bo->priv_flags);
-			spin_unlock(&bo->lock);
+			spin_unlock(&bdev->fence_lock);
 			driver->sync_obj_unref(&sync_obj);
 			driver->sync_obj_unref(&tmp_obj);
-			spin_lock(&bo->lock);
+			spin_lock(&bdev->fence_lock);
 		} else {
-			spin_unlock(&bo->lock);
+			spin_unlock(&bdev->fence_lock);
 			driver->sync_obj_unref(&sync_obj);
-			spin_lock(&bo->lock);
+			spin_lock(&bdev->fence_lock);
 		}
 	}
 	return 0;
@@ -1714,6 +1714,7 @@ EXPORT_SYMBOL(ttm_bo_wait);
 
 int ttm_bo_synccpu_write_grab(struct ttm_buffer_object *bo, bool no_wait)
 {
+	struct ttm_bo_device *bdev = bo->bdev;
 	int ret = 0;
 
 	/*
@@ -1723,9 +1724,9 @@ int ttm_bo_synccpu_write_grab(struct ttm_buffer_object *bo, bool no_wait)
 	ret = ttm_bo_reserve(bo, true, no_wait, false, 0);
 	if (unlikely(ret != 0))
 		return ret;
-	spin_lock(&bo->lock);
+	spin_lock(&bdev->fence_lock);
 	ret = ttm_bo_wait(bo, false, true, no_wait);
-	spin_unlock(&bo->lock);
+	spin_unlock(&bdev->fence_lock);
 	if (likely(ret == 0))
 		atomic_inc(&bo->cpu_writers);
 	ttm_bo_unreserve(bo);
@@ -1797,9 +1798,9 @@ static int ttm_bo_swapout(struct ttm_mem_shrink *shrink)
 	 * Wait for GPU, then move to system cached.
 	 */
 
-	spin_lock(&bo->lock);
+	spin_lock(&bo->bdev->fence_lock);
 	ret = ttm_bo_wait(bo, false, false, false);
-	spin_unlock(&bo->lock);
+	spin_unlock(&bo->bdev->fence_lock);
 
 	if (unlikely(ret != 0))
 		goto out;
diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c b/drivers/gpu/drm/ttm/ttm_bo_util.c
index 3106d5bcce32..4b75133d6606 100644
--- a/drivers/gpu/drm/ttm/ttm_bo_util.c
+++ b/drivers/gpu/drm/ttm/ttm_bo_util.c
@@ -337,7 +337,6 @@ static int ttm_buffer_object_transfer(struct ttm_buffer_object *bo,
 	 * TODO: Explicit member copy would probably be better here.
 	 */
 
-	spin_lock_init(&fbo->lock);
 	init_waitqueue_head(&fbo->event_queue);
 	INIT_LIST_HEAD(&fbo->ddestroy);
 	INIT_LIST_HEAD(&fbo->lru);
@@ -520,7 +519,7 @@ int ttm_bo_move_accel_cleanup(struct ttm_buffer_object *bo,
 	struct ttm_buffer_object *ghost_obj;
 	void *tmp_obj = NULL;
 
-	spin_lock(&bo->lock);
+	spin_lock(&bdev->fence_lock);
 	if (bo->sync_obj) {
 		tmp_obj = bo->sync_obj;
 		bo->sync_obj = NULL;
@@ -529,7 +528,7 @@ int ttm_bo_move_accel_cleanup(struct ttm_buffer_object *bo,
 	bo->sync_obj_arg = sync_obj_arg;
 	if (evict) {
 		ret = ttm_bo_wait(bo, false, false, false);
-		spin_unlock(&bo->lock);
+		spin_unlock(&bdev->fence_lock);
 		if (tmp_obj)
 			driver->sync_obj_unref(&tmp_obj);
 		if (ret)
@@ -552,7 +551,7 @@ int ttm_bo_move_accel_cleanup(struct ttm_buffer_object *bo,
 		 */
 
 		set_bit(TTM_BO_PRIV_FLAG_MOVING, &bo->priv_flags);
-		spin_unlock(&bo->lock);
+		spin_unlock(&bdev->fence_lock);
 		if (tmp_obj)
 			driver->sync_obj_unref(&tmp_obj);
 
diff --git a/drivers/gpu/drm/ttm/ttm_bo_vm.c b/drivers/gpu/drm/ttm/ttm_bo_vm.c
index fe6cb77899f4..8dd446cb778e 100644
--- a/drivers/gpu/drm/ttm/ttm_bo_vm.c
+++ b/drivers/gpu/drm/ttm/ttm_bo_vm.c
@@ -118,17 +118,17 @@ static int ttm_bo_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 	 * move.
 	 */
 
-	spin_lock(&bo->lock);
+	spin_lock(&bdev->fence_lock);
 	if (test_bit(TTM_BO_PRIV_FLAG_MOVING, &bo->priv_flags)) {
 		ret = ttm_bo_wait(bo, false, true, false);
-		spin_unlock(&bo->lock);
+		spin_unlock(&bdev->fence_lock);
 		if (unlikely(ret != 0)) {
 			retval = (ret != -ERESTARTSYS) ?
 			    VM_FAULT_SIGBUS : VM_FAULT_NOPAGE;
 			goto out_unlock;
 		}
 	} else
-		spin_unlock(&bo->lock);
+		spin_unlock(&bdev->fence_lock);
 
 
 	ret = ttm_mem_io_reserve(bdev, &bo->mem);
diff --git a/drivers/gpu/drm/ttm/ttm_execbuf_util.c b/drivers/gpu/drm/ttm/ttm_execbuf_util.c
index 7dcc6470e2f5..c3a2100bace6 100644
--- a/drivers/gpu/drm/ttm/ttm_execbuf_util.c
+++ b/drivers/gpu/drm/ttm/ttm_execbuf_util.c
@@ -203,14 +203,15 @@ void ttm_eu_fence_buffer_objects(struct list_head *list, void *sync_obj)
 
 	list_for_each_entry(entry, list, head) {
 		struct ttm_buffer_object *bo = entry->bo;
-		struct ttm_bo_driver *driver = bo->bdev->driver;
+		struct ttm_bo_device *bdev = bo->bdev;
+		struct ttm_bo_driver *driver = bdev->driver;
 		void *old_sync_obj;
 
-		spin_lock(&bo->lock);
+		spin_lock(&bdev->fence_lock);
 		old_sync_obj = bo->sync_obj;
 		bo->sync_obj = driver->sync_obj_ref(sync_obj);
 		bo->sync_obj_arg = entry->new_sync_obj_arg;
-		spin_unlock(&bo->lock);
+		spin_unlock(&bdev->fence_lock);
 		ttm_bo_unreserve(bo);
 		entry->reserved = false;
 		if (old_sync_obj)
diff --git a/include/drm/ttm/ttm_bo_api.h b/include/drm/ttm/ttm_bo_api.h
index b0fc9c12554b..edacd483c59c 100644
--- a/include/drm/ttm/ttm_bo_api.h
+++ b/include/drm/ttm/ttm_bo_api.h
@@ -154,7 +154,6 @@ struct ttm_tt;
  * keeps one refcount. When this refcount reaches zero,
  * the object is destroyed.
  * @event_queue: Queue for processes waiting on buffer object status change.
- * @lock: spinlock protecting mostly synchronization members.
  * @mem: structure describing current placement.
  * @persistant_swap_storage: Usually the swap storage is deleted for buffers
  * pinned in physical memory. If this behaviour is not desired, this member
@@ -213,7 +212,6 @@ struct ttm_buffer_object {
 	struct kref kref;
 	struct kref list_kref;
 	wait_queue_head_t event_queue;
-	spinlock_t lock;
 
 	/**
 	 * Members protected by the bo::reserved lock.
@@ -248,10 +246,10 @@ struct ttm_buffer_object {
 	atomic_t reserved;
 
 	/**
-	 * Members protected by the bo::lock
+	 * Members protected by struct buffer_object_device::fence_lock
 	 * In addition, setting sync_obj to anything else
 	 * than NULL requires bo::reserved to be held. This allows for
-	 * checking NULL while reserved but not holding bo::lock.
+	 * checking NULL while reserved but not holding the mentioned lock.
 	 */
 
 	void *sync_obj_arg;
diff --git a/include/drm/ttm/ttm_bo_driver.h b/include/drm/ttm/ttm_bo_driver.h
index 1e25a40c688e..ca8131e98300 100644
--- a/include/drm/ttm/ttm_bo_driver.h
+++ b/include/drm/ttm/ttm_bo_driver.h
@@ -510,6 +510,8 @@ struct ttm_bo_global {
  *
  * @driver: Pointer to a struct ttm_bo_driver struct setup by the driver.
  * @man: An array of mem_type_managers.
+ * @fence_lock: Protects the synchronizing members on *all* bos belonging
+ * to this device.
  * @addr_space_mm: Range manager for the device address space.
  * lru_lock: Spinlock that protects the buffer+device lru lists and
  * ddestroy lists.
@@ -531,6 +533,7 @@ struct ttm_bo_device {
 	struct ttm_bo_driver *driver;
 	rwlock_t vm_lock;
 	struct ttm_mem_type_manager man[TTM_NUM_MEM_TYPES];
+	spinlock_t fence_lock;
 	/*
 	 * Protected by the vm lock.
 	 */
-- 
cgit v1.2.3


From 95762c2b34069bf4adb7929969f1f5f5fc8a38df Mon Sep 17 00:00:00 2001
From: Thomas Hellstrom
Date: Wed, 17 Nov 2010 12:28:30 +0000
Subject: drm/ttm: Improved fencing of buffer object lists

Drastically reduce the number of spin lock / unlock operations by performing
unreserving and fencing under global locks.

Signed-off-by: Thomas Hellstrom <thellstrom@vmware.com>
Reviewed-by: Jerome Glisse <j.glisse@redhat.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/ttm/ttm_bo.c           | 11 ++++++++---
 drivers/gpu/drm/ttm/ttm_execbuf_util.c | 36 +++++++++++++++++++++++-----------
 include/drm/ttm/ttm_bo_driver.h        | 10 ++++++++++
 include/drm/ttm/ttm_execbuf_util.h     |  2 ++
 4 files changed, 45 insertions(+), 14 deletions(-)

(limited to 'include/drm')

diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index d93c73b1c471..551a5d31cadf 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -299,14 +299,19 @@ int ttm_bo_reserve(struct ttm_buffer_object *bo,
 	return ret;
 }
 
+void ttm_bo_unreserve_locked(struct ttm_buffer_object *bo)
+{
+	ttm_bo_add_to_lru(bo);
+	atomic_set(&bo->reserved, 0);
+	wake_up_all(&bo->event_queue);
+}
+
 void ttm_bo_unreserve(struct ttm_buffer_object *bo)
 {
 	struct ttm_bo_global *glob = bo->glob;
 
 	spin_lock(&glob->lru_lock);
-	ttm_bo_add_to_lru(bo);
-	atomic_set(&bo->reserved, 0);
-	wake_up_all(&bo->event_queue);
+	ttm_bo_unreserve_locked(bo);
 	spin_unlock(&glob->lru_lock);
 }
 EXPORT_SYMBOL(ttm_bo_unreserve);
diff --git a/drivers/gpu/drm/ttm/ttm_execbuf_util.c b/drivers/gpu/drm/ttm/ttm_execbuf_util.c
index c3a2100bace6..b6da65cc502a 100644
--- a/drivers/gpu/drm/ttm/ttm_execbuf_util.c
+++ b/drivers/gpu/drm/ttm/ttm_execbuf_util.c
@@ -200,22 +200,36 @@ EXPORT_SYMBOL(ttm_eu_reserve_buffers);
 void ttm_eu_fence_buffer_objects(struct list_head *list, void *sync_obj)
 {
 	struct ttm_validate_buffer *entry;
+	struct ttm_buffer_object *bo;
+	struct ttm_bo_global *glob;
+	struct ttm_bo_device *bdev;
+	struct ttm_bo_driver *driver;
 
-	list_for_each_entry(entry, list, head) {
-		struct ttm_buffer_object *bo = entry->bo;
-		struct ttm_bo_device *bdev = bo->bdev;
-		struct ttm_bo_driver *driver = bdev->driver;
-		void *old_sync_obj;
+	if (list_empty(list))
+		return;
+
+	bo = list_first_entry(list, struct ttm_validate_buffer, head)->bo;
+	bdev = bo->bdev;
+	driver = bdev->driver;
+	glob = bo->glob;
 
-		spin_lock(&bdev->fence_lock);
-		old_sync_obj = bo->sync_obj;
+	spin_lock(&bdev->fence_lock);
+	spin_lock(&glob->lru_lock);
+
+	list_for_each_entry(entry, list, head) {
+		bo = entry->bo;
+		entry->old_sync_obj = bo->sync_obj;
 		bo->sync_obj = driver->sync_obj_ref(sync_obj);
 		bo->sync_obj_arg = entry->new_sync_obj_arg;
-		spin_unlock(&bdev->fence_lock);
-		ttm_bo_unreserve(bo);
+		ttm_bo_unreserve_locked(bo);
 		entry->reserved = false;
-		if (old_sync_obj)
-			driver->sync_obj_unref(&old_sync_obj);
+	}
+	spin_unlock(&glob->lru_lock);
+	spin_unlock(&bdev->fence_lock);
+
+	list_for_each_entry(entry, list, head) {
+		if (entry->old_sync_obj)
+			driver->sync_obj_unref(&entry->old_sync_obj);
 	}
 }
 EXPORT_SYMBOL(ttm_eu_fence_buffer_objects);
diff --git a/include/drm/ttm/ttm_bo_driver.h b/include/drm/ttm/ttm_bo_driver.h
index ca8131e98300..cfb9ca4ec1c4 100644
--- a/include/drm/ttm/ttm_bo_driver.h
+++ b/include/drm/ttm/ttm_bo_driver.h
@@ -909,6 +909,16 @@ extern int ttm_bo_reserve_locked(struct ttm_buffer_object *bo,
  */
 extern void ttm_bo_unreserve(struct ttm_buffer_object *bo);
 
+/**
+ * ttm_bo_unreserve_locked
+ *
+ * @bo: A pointer to a struct ttm_buffer_object.
+ *
+ * Unreserve a previous reservation of @bo.
+ * Needs to be called with struct ttm_bo_global::lru_lock held.
+ */
+extern void ttm_bo_unreserve_locked(struct ttm_buffer_object *bo);
+
 /**
  * ttm_bo_wait_unreserved
  *
diff --git a/include/drm/ttm/ttm_execbuf_util.h b/include/drm/ttm/ttm_execbuf_util.h
index fd09b8438977..535ab00407e0 100644
--- a/include/drm/ttm/ttm_execbuf_util.h
+++ b/include/drm/ttm/ttm_execbuf_util.h
@@ -44,6 +44,7 @@
  * @reserved:       Indicates whether @bo has been reserved for validation.
  * @removed:        Indicates whether @bo has been removed from lru lists.
  * @put_count:      Number of outstanding references on bo::list_kref.
+ * @old_sync_obj:   Pointer to a sync object about to be unreferenced
  */
 
 struct ttm_validate_buffer {
@@ -53,6 +54,7 @@ struct ttm_validate_buffer {
 	bool reserved;
 	bool removed;
 	int put_count;
+	void *old_sync_obj;
 };
 
 /**
-- 
cgit v1.2.3


From 65705962025df490d13df59ec57c5329d1bd0a16 Mon Sep 17 00:00:00 2001
From: Thomas Hellstrom
Date: Wed, 17 Nov 2010 12:28:31 +0000
Subject: drm/ttm/vmwgfx: Have TTM manage the validation sequence.

Rather than having the driver supply the validation sequence, leave that
responsibility to TTM. This saves some confusion and a function argument.

Signed-off-by: Thomas Hellstrom <thellstrom@vmware.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/ttm/ttm_bo.c            | 1 +
 drivers/gpu/drm/ttm/ttm_execbuf_util.c  | 5 ++++-
 drivers/gpu/drm/vmwgfx/vmwgfx_drv.h     | 1 -
 drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c | 3 +--
 include/drm/ttm/ttm_bo_driver.h         | 2 ++
 include/drm/ttm/ttm_execbuf_util.h      | 3 +--
 6 files changed, 9 insertions(+), 6 deletions(-)

(limited to 'include/drm')

diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index 551a5d31cadf..25e4c2a1d1d8 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -1539,6 +1539,7 @@ int ttm_bo_device_init(struct ttm_bo_device *bdev,
 	bdev->dev_mapping = NULL;
 	bdev->glob = glob;
 	bdev->need_dma32 = need_dma32;
+	bdev->val_seq = 0;
 	spin_lock_init(&bdev->fence_lock);
 	mutex_lock(&glob->device_list_mutex);
 	list_add_tail(&bdev->device_list, &glob->device_list);
diff --git a/drivers/gpu/drm/ttm/ttm_execbuf_util.c b/drivers/gpu/drm/ttm/ttm_execbuf_util.c
index b6da65cc502a..3832fe10b4df 100644
--- a/drivers/gpu/drm/ttm/ttm_execbuf_util.c
+++ b/drivers/gpu/drm/ttm/ttm_execbuf_util.c
@@ -126,11 +126,12 @@ EXPORT_SYMBOL(ttm_eu_backoff_reservation);
  * buffers in different orders.
  */
 
-int ttm_eu_reserve_buffers(struct list_head *list, uint32_t val_seq)
+int ttm_eu_reserve_buffers(struct list_head *list)
 {
 	struct ttm_bo_global *glob;
 	struct ttm_validate_buffer *entry;
 	int ret;
+	uint32_t val_seq;
 
 	if (list_empty(list))
 		return 0;
@@ -146,6 +147,8 @@ int ttm_eu_reserve_buffers(struct list_head *list, uint32_t val_seq)
 
 retry:
 	spin_lock(&glob->lru_lock);
+	val_seq = entry->bo->bdev->val_seq++;
+
 	list_for_each_entry(entry, list, head) {
 		struct ttm_buffer_object *bo = entry->bo;
 
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
index e7a58d055041..10fc01f69c40 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
@@ -264,7 +264,6 @@ struct vmw_private {
 	 */
 
 	struct vmw_sw_context ctx;
-	uint32_t val_seq;
 	struct mutex cmdbuf_mutex;
 
 	/**
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c
index 76954e3528c1..41b95ed6dbcd 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c
@@ -653,8 +653,7 @@ int vmw_execbuf_ioctl(struct drm_device *dev, void *data,
 	ret = vmw_cmd_check_all(dev_priv, sw_context, cmd, arg->command_size);
 	if (unlikely(ret != 0))
 		goto out_err;
-	ret = ttm_eu_reserve_buffers(&sw_context->validate_nodes,
-				     dev_priv->val_seq++);
+	ret = ttm_eu_reserve_buffers(&sw_context->validate_nodes);
 	if (unlikely(ret != 0))
 		goto out_err;
 
diff --git a/include/drm/ttm/ttm_bo_driver.h b/include/drm/ttm/ttm_bo_driver.h
index cfb9ca4ec1c4..e3b2e245db1b 100644
--- a/include/drm/ttm/ttm_bo_driver.h
+++ b/include/drm/ttm/ttm_bo_driver.h
@@ -515,6 +515,7 @@ struct ttm_bo_global {
  * @addr_space_mm: Range manager for the device address space.
  * lru_lock: Spinlock that protects the buffer+device lru lists and
  * ddestroy lists.
+ * @val_seq: Current validation sequence.
  * @nice_mode: Try nicely to wait for buffer idle when cleaning a manager.
  * If a GPU lockup has been detected, this is forced to 0.
  * @dev_mapping: A pointer to the struct address_space representing the
@@ -544,6 +545,7 @@ struct ttm_bo_device {
 	 * Protected by the global:lru lock.
 	 */
 	struct list_head ddestroy;
+	uint32_t val_seq;
 
 	/*
 	 * Protected by load / firstopen / lastclose /unload sync.
diff --git a/include/drm/ttm/ttm_execbuf_util.h b/include/drm/ttm/ttm_execbuf_util.h
index 535ab00407e0..26cc7f9ffa41 100644
--- a/include/drm/ttm/ttm_execbuf_util.h
+++ b/include/drm/ttm/ttm_execbuf_util.h
@@ -72,7 +72,6 @@ extern void ttm_eu_backoff_reservation(struct list_head *list);
  * function ttm_eu_reserve_buffers
  *
  * @list:    thread private list of ttm_validate_buffer structs.
- * @val_seq: A unique sequence number.
  *
  * Tries to reserve bos pointed to by the list entries for validation.
  * If the function returns 0, all buffers are marked as "unfenced",
@@ -94,7 +93,7 @@ extern void ttm_eu_backoff_reservation(struct list_head *list);
  * has failed.
  */
 
-extern int ttm_eu_reserve_buffers(struct list_head *list, uint32_t val_seq);
+extern int ttm_eu_reserve_buffers(struct list_head *list);
 
 /**
  * function ttm_eu_fence_buffer_objects.
-- 
cgit v1.2.3


From eba67093f535322cb4f1c4b737319c0907a0c81d Mon Sep 17 00:00:00 2001
From: Thomas Hellstrom
Date: Thu, 11 Nov 2010 09:41:57 +0100
Subject: drm/ttm: Fix up io_mem_reserve / io_mem_free calling

This patch attempts to fix up shortcomings with the current calling
sequences.

1) There's a fastpath where no locking occurs and only io_mem_reserved is
   called to obtain needed info for mapping. The fastpath is set per
   memory type manager.
2) If the fastpath is disabled, io_mem_reserve and io_mem_free will be exactly
   balanced and not called recursively for the same struct ttm_mem_reg.
3) Optionally the driver can choose to enable a per memory type manager LRU
   eviction mechanism that, when io_mem_reserve returns -EAGAIN will attempt
   to kill user-space mappings of memory in that manager to free up needed
   resources

Signed-off-by: Thomas Hellstrom <thellstrom@vmware.com>
Reviewed-by: Ben Skeggs <bskeggs@redhat.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/ttm/ttm_bo.c      |  44 ++++++++++---
 drivers/gpu/drm/ttm/ttm_bo_util.c | 129 +++++++++++++++++++++++++++++++++-----
 drivers/gpu/drm/ttm/ttm_bo_vm.c   |  23 ++++---
 include/drm/ttm/ttm_bo_api.h      |   6 +-
 include/drm/ttm/ttm_bo_driver.h   | 104 +++++++++++++++---------------
 5 files changed, 226 insertions(+), 80 deletions(-)

(limited to 'include/drm')

diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index 25e4c2a1d1d8..cf2ec562550e 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -378,8 +378,13 @@ static int ttm_bo_handle_move_mem(struct ttm_buffer_object *bo,
 	int ret = 0;
 
 	if (old_is_pci || new_is_pci ||
-	    ((mem->placement & bo->mem.placement & TTM_PL_MASK_CACHING) == 0))
-		ttm_bo_unmap_virtual(bo);
+	    ((mem->placement & bo->mem.placement & TTM_PL_MASK_CACHING) == 0)) {
+		ret = ttm_mem_io_lock(old_man, true);
+		if (unlikely(ret != 0))
+			goto out_err;
+		ttm_bo_unmap_virtual_locked(bo);
+		ttm_mem_io_unlock(old_man);
+	}
 
 	/*
 	 * Create and bind a ttm if required.
@@ -466,7 +471,6 @@ static void ttm_bo_cleanup_memtype_use(struct ttm_buffer_object *bo)
 		ttm_tt_destroy(bo->ttm);
 		bo->ttm = NULL;
 	}
-
 	ttm_bo_mem_put(bo, &bo->mem);
 
 	atomic_set(&bo->reserved, 0);
@@ -665,6 +669,7 @@ static void ttm_bo_release(struct kref *kref)
 	struct ttm_buffer_object *bo =
 	    container_of(kref, struct ttm_buffer_object, kref);
 	struct ttm_bo_device *bdev = bo->bdev;
+	struct ttm_mem_type_manager *man = &bdev->man[bo->mem.mem_type];
 
 	if (likely(bo->vm_node != NULL)) {
 		rb_erase(&bo->vm_rb, &bdev->addr_space_rb);
@@ -672,6 +677,9 @@ static void ttm_bo_release(struct kref *kref)
 		bo->vm_node = NULL;
 	}
 	write_unlock(&bdev->vm_lock);
+	ttm_mem_io_lock(man, false);
+	ttm_mem_io_free_vm(bo);
+	ttm_mem_io_unlock(man);
 	ttm_bo_cleanup_refs_or_queue(bo);
 	kref_put(&bo->list_kref, ttm_bo_release_list);
 	write_lock(&bdev->vm_lock);
@@ -728,7 +736,8 @@ static int ttm_bo_evict(struct ttm_buffer_object *bo, bool interruptible,
 
 	evict_mem = bo->mem;
 	evict_mem.mm_node = NULL;
-	evict_mem.bus.io_reserved = false;
+	evict_mem.bus.io_reserved_vm = false;
+	evict_mem.bus.io_reserved_count = 0;
 
 	placement.fpfn = 0;
 	placement.lpfn = 0;
@@ -1065,7 +1074,8 @@ int ttm_bo_move_buffer(struct ttm_buffer_object *bo,
 	mem.num_pages = bo->num_pages;
 	mem.size = mem.num_pages << PAGE_SHIFT;
 	mem.page_alignment = bo->mem.page_alignment;
-	mem.bus.io_reserved = false;
+	mem.bus.io_reserved_vm = false;
+	mem.bus.io_reserved_count = 0;
 	/*
 	 * Determine where to move the buffer.
 	 */
@@ -1184,6 +1194,7 @@ int ttm_bo_init(struct ttm_bo_device *bdev,
 	INIT_LIST_HEAD(&bo->lru);
 	INIT_LIST_HEAD(&bo->ddestroy);
 	INIT_LIST_HEAD(&bo->swap);
+	INIT_LIST_HEAD(&bo->io_reserve_lru);
 	bo->bdev = bdev;
 	bo->glob = bdev->glob;
 	bo->type = type;
@@ -1193,7 +1204,8 @@ int ttm_bo_init(struct ttm_bo_device *bdev,
 	bo->mem.num_pages = bo->num_pages;
 	bo->mem.mm_node = NULL;
 	bo->mem.page_alignment = page_alignment;
-	bo->mem.bus.io_reserved = false;
+	bo->mem.bus.io_reserved_vm = false;
+	bo->mem.bus.io_reserved_count = 0;
 	bo->buffer_start = buffer_start & PAGE_MASK;
 	bo->priv_flags = 0;
 	bo->mem.placement = (TTM_PL_FLAG_SYSTEM | TTM_PL_FLAG_CACHED);
@@ -1367,6 +1379,10 @@ int ttm_bo_init_mm(struct ttm_bo_device *bdev, unsigned type,
 	BUG_ON(type >= TTM_NUM_MEM_TYPES);
 	man = &bdev->man[type];
 	BUG_ON(man->has_type);
+	man->io_reserve_fastpath = true;
+	man->use_io_reserve_lru = false;
+	mutex_init(&man->io_reserve_mutex);
+	INIT_LIST_HEAD(&man->io_reserve_lru);
 
 	ret = bdev->driver->init_mem_type(bdev, type, man);
 	if (ret)
@@ -1574,7 +1590,7 @@ bool ttm_mem_reg_is_pci(struct ttm_bo_device *bdev, struct ttm_mem_reg *mem)
 	return true;
 }
 
-void ttm_bo_unmap_virtual(struct ttm_buffer_object *bo)
+void ttm_bo_unmap_virtual_locked(struct ttm_buffer_object *bo)
 {
 	struct ttm_bo_device *bdev = bo->bdev;
 	loff_t offset = (loff_t) bo->addr_space_offset;
@@ -1583,8 +1599,20 @@ void ttm_bo_unmap_virtual(struct ttm_buffer_object *bo)
 	if (!bdev->dev_mapping)
 		return;
 	unmap_mapping_range(bdev->dev_mapping, offset, holelen, 1);
-	ttm_mem_io_free(bdev, &bo->mem);
+	ttm_mem_io_free_vm(bo);
 }
+
+void ttm_bo_unmap_virtual(struct ttm_buffer_object *bo)
+{
+	struct ttm_bo_device *bdev = bo->bdev;
+	struct ttm_mem_type_manager *man = &bdev->man[bo->mem.mem_type];
+
+	ttm_mem_io_lock(man, false);
+	ttm_bo_unmap_virtual_locked(bo);
+	ttm_mem_io_unlock(man);
+}
+
+
 EXPORT_SYMBOL(ttm_bo_unmap_virtual);
 
 static void ttm_bo_vm_insert_rb(struct ttm_buffer_object *bo)
diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c b/drivers/gpu/drm/ttm/ttm_bo_util.c
index 4b75133d6606..a89839f83f6c 100644
--- a/drivers/gpu/drm/ttm/ttm_bo_util.c
+++ b/drivers/gpu/drm/ttm/ttm_bo_util.c
@@ -75,37 +75,123 @@ int ttm_bo_move_ttm(struct ttm_buffer_object *bo,
 }
 EXPORT_SYMBOL(ttm_bo_move_ttm);
 
-int ttm_mem_io_reserve(struct ttm_bo_device *bdev, struct ttm_mem_reg *mem)
+int ttm_mem_io_lock(struct ttm_mem_type_manager *man, bool interruptible)
 {
-	int ret;
+	if (likely(man->io_reserve_fastpath))
+		return 0;
+
+	if (interruptible)
+		return mutex_lock_interruptible(&man->io_reserve_mutex);
+
+	mutex_lock(&man->io_reserve_mutex);
+	return 0;
+}
 
-	if (!mem->bus.io_reserved) {
-		mem->bus.io_reserved = true;
+void ttm_mem_io_unlock(struct ttm_mem_type_manager *man)
+{
+	if (likely(man->io_reserve_fastpath))
+		return;
+
+	mutex_unlock(&man->io_reserve_mutex);
+}
+
+static int ttm_mem_io_evict(struct ttm_mem_type_manager *man)
+{
+	struct ttm_buffer_object *bo;
+
+	if (!man->use_io_reserve_lru || list_empty(&man->io_reserve_lru))
+		return -EAGAIN;
+
+	bo = list_first_entry(&man->io_reserve_lru,
+			      struct ttm_buffer_object,
+			      io_reserve_lru);
+	list_del_init(&bo->io_reserve_lru);
+	ttm_bo_unmap_virtual_locked(bo);
+
+	return 0;
+}
+
+static int ttm_mem_io_reserve(struct ttm_bo_device *bdev,
+			      struct ttm_mem_reg *mem)
+{
+	struct ttm_mem_type_manager *man = &bdev->man[mem->mem_type];
+	int ret = 0;
+
+	if (!bdev->driver->io_mem_reserve)
+		return 0;
+	if (likely(man->io_reserve_fastpath))
+		return bdev->driver->io_mem_reserve(bdev, mem);
+
+	if (bdev->driver->io_mem_reserve &&
+	    mem->bus.io_reserved_count++ == 0) {
+retry:
 		ret = bdev->driver->io_mem_reserve(bdev, mem);
+		if (ret == -EAGAIN) {
+			ret = ttm_mem_io_evict(man);
+			if (ret == 0)
+				goto retry;
+		}
+	}
+	return ret;
+}
+
+static void ttm_mem_io_free(struct ttm_bo_device *bdev,
+			    struct ttm_mem_reg *mem)
+{
+	struct ttm_mem_type_manager *man = &bdev->man[mem->mem_type];
+
+	if (likely(man->io_reserve_fastpath))
+		return;
+
+	if (bdev->driver->io_mem_reserve &&
+	    --mem->bus.io_reserved_count == 0 &&
+	    bdev->driver->io_mem_free)
+		bdev->driver->io_mem_free(bdev, mem);
+
+}
+
+int ttm_mem_io_reserve_vm(struct ttm_buffer_object *bo)
+{
+	struct ttm_mem_reg *mem = &bo->mem;
+	int ret;
+
+	if (!mem->bus.io_reserved_vm) {
+		struct ttm_mem_type_manager *man =
+			&bo->bdev->man[mem->mem_type];
+
+		ret = ttm_mem_io_reserve(bo->bdev, mem);
 		if (unlikely(ret != 0))
 			return ret;
+		mem->bus.io_reserved_vm = true;
+		if (man->use_io_reserve_lru)
+			list_add_tail(&bo->io_reserve_lru,
+				      &man->io_reserve_lru);
 	}
 	return 0;
 }
 
-void ttm_mem_io_free(struct ttm_bo_device *bdev, struct ttm_mem_reg *mem)
+void ttm_mem_io_free_vm(struct ttm_buffer_object *bo)
 {
-	if (bdev->driver->io_mem_reserve) {
-		if (mem->bus.io_reserved) {
-			mem->bus.io_reserved = false;
-			bdev->driver->io_mem_free(bdev, mem);
-		}
+	struct ttm_mem_reg *mem = &bo->mem;
+
+	if (mem->bus.io_reserved_vm) {
+		mem->bus.io_reserved_vm = false;
+		list_del_init(&bo->io_reserve_lru);
+		ttm_mem_io_free(bo->bdev, mem);
 	}
 }
 
 int ttm_mem_reg_ioremap(struct ttm_bo_device *bdev, struct ttm_mem_reg *mem,
 			void **virtual)
 {
+	struct ttm_mem_type_manager *man = &bdev->man[mem->mem_type];
 	int ret;
 	void *addr;
 
 	*virtual = NULL;
+	(void) ttm_mem_io_lock(man, false);
 	ret = ttm_mem_io_reserve(bdev, mem);
+	ttm_mem_io_unlock(man);
 	if (ret || !mem->bus.is_iomem)
 		return ret;
 
@@ -117,7 +203,9 @@ int ttm_mem_reg_ioremap(struct ttm_bo_device *bdev, struct ttm_mem_reg *mem,
 		else
 			addr = ioremap_nocache(mem->bus.base + mem->bus.offset, mem->bus.size);
 		if (!addr) {
+			(void) ttm_mem_io_lock(man, false);
 			ttm_mem_io_free(bdev, mem);
+			ttm_mem_io_unlock(man);
 			return -ENOMEM;
 		}
 	}
@@ -134,7 +222,9 @@ void ttm_mem_reg_iounmap(struct ttm_bo_device *bdev, struct ttm_mem_reg *mem,
 
 	if (virtual && mem->bus.addr == NULL)
 		iounmap(virtual);
+	(void) ttm_mem_io_lock(man, false);
 	ttm_mem_io_free(bdev, mem);
+	ttm_mem_io_unlock(man);
 }
 
 static int ttm_copy_io_page(void *dst, void *src, unsigned long page)
@@ -231,7 +321,7 @@ int ttm_bo_move_memcpy(struct ttm_buffer_object *bo,
 	struct ttm_mem_type_manager *man = &bdev->man[new_mem->mem_type];
 	struct ttm_tt *ttm = bo->ttm;
 	struct ttm_mem_reg *old_mem = &bo->mem;
-	struct ttm_mem_reg old_copy = *old_mem;
+	struct ttm_mem_reg old_copy;
 	void *old_iomap;
 	void *new_iomap;
 	int ret;
@@ -281,7 +371,7 @@ int ttm_bo_move_memcpy(struct ttm_buffer_object *bo,
 	mb();
 out2:
 	ttm_bo_free_old_node(bo);
-
+	old_copy = *old_mem;
 	*old_mem = *new_mem;
 	new_mem->mm_node = NULL;
 
@@ -292,7 +382,7 @@ out2:
 	}
 
 out1:
-	ttm_mem_reg_iounmap(bdev, new_mem, new_iomap);
+	ttm_mem_reg_iounmap(bdev, old_mem, new_iomap);
 out:
 	ttm_mem_reg_iounmap(bdev, &old_copy, old_iomap);
 	return ret;
@@ -341,6 +431,7 @@ static int ttm_buffer_object_transfer(struct ttm_buffer_object *bo,
 	INIT_LIST_HEAD(&fbo->ddestroy);
 	INIT_LIST_HEAD(&fbo->lru);
 	INIT_LIST_HEAD(&fbo->swap);
+	INIT_LIST_HEAD(&fbo->io_reserve_lru);
 	fbo->vm_node = NULL;
 	atomic_set(&fbo->cpu_writers, 0);
 
@@ -452,6 +543,8 @@ int ttm_bo_kmap(struct ttm_buffer_object *bo,
 		unsigned long start_page, unsigned long num_pages,
 		struct ttm_bo_kmap_obj *map)
 {
+	struct ttm_mem_type_manager *man =
+		&bo->bdev->man[bo->mem.mem_type];
 	unsigned long offset, size;
 	int ret;
 
@@ -466,7 +559,9 @@ int ttm_bo_kmap(struct ttm_buffer_object *bo,
 	if (num_pages > 1 && !DRM_SUSER(DRM_CURPROC))
 		return -EPERM;
 #endif
+	(void) ttm_mem_io_lock(man, false);
 	ret = ttm_mem_io_reserve(bo->bdev, &bo->mem);
+	ttm_mem_io_unlock(man);
 	if (ret)
 		return ret;
 	if (!bo->mem.bus.is_iomem) {
@@ -481,12 +576,15 @@ EXPORT_SYMBOL(ttm_bo_kmap);
 
 void ttm_bo_kunmap(struct ttm_bo_kmap_obj *map)
 {
+	struct ttm_buffer_object *bo = map->bo;
+	struct ttm_mem_type_manager *man =
+		&bo->bdev->man[bo->mem.mem_type];
+
 	if (!map->virtual)
 		return;
 	switch (map->bo_kmap_type) {
 	case ttm_bo_map_iomap:
 		iounmap(map->virtual);
-		ttm_mem_io_free(map->bo->bdev, &map->bo->mem);
 		break;
 	case ttm_bo_map_vmap:
 		vunmap(map->virtual);
@@ -499,6 +597,9 @@ void ttm_bo_kunmap(struct ttm_bo_kmap_obj *map)
 	default:
 		BUG();
 	}
+	(void) ttm_mem_io_lock(man, false);
+	ttm_mem_io_free(map->bo->bdev, &map->bo->mem);
+	ttm_mem_io_unlock(man);
 	map->virtual = NULL;
 	map->page = NULL;
 }
diff --git a/drivers/gpu/drm/ttm/ttm_bo_vm.c b/drivers/gpu/drm/ttm/ttm_bo_vm.c
index 8dd446cb778e..221b924acebe 100644
--- a/drivers/gpu/drm/ttm/ttm_bo_vm.c
+++ b/drivers/gpu/drm/ttm/ttm_bo_vm.c
@@ -83,6 +83,8 @@ static int ttm_bo_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 	int i;
 	unsigned long address = (unsigned long)vmf->virtual_address;
 	int retval = VM_FAULT_NOPAGE;
+	struct ttm_mem_type_manager *man =
+		&bdev->man[bo->mem.mem_type];
 
 	/*
 	 * Work around locking order reversal in fault / nopfn
@@ -130,12 +132,16 @@ static int ttm_bo_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 	} else
 		spin_unlock(&bdev->fence_lock);
 
-
-	ret = ttm_mem_io_reserve(bdev, &bo->mem);
-	if (ret) {
-		retval = VM_FAULT_SIGBUS;
+	ret = ttm_mem_io_lock(man, true);
+	if (unlikely(ret != 0)) {
+		retval = VM_FAULT_NOPAGE;
 		goto out_unlock;
 	}
+	ret = ttm_mem_io_reserve_vm(bo);
+	if (unlikely(ret != 0)) {
+		retval = VM_FAULT_SIGBUS;
+		goto out_io_unlock;
+	}
 
 	page_offset = ((address - vma->vm_start) >> PAGE_SHIFT) +
 	    bo->vm_node->start - vma->vm_pgoff;
@@ -144,7 +150,7 @@ static int ttm_bo_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 
 	if (unlikely(page_offset >= bo->num_pages)) {
 		retval = VM_FAULT_SIGBUS;
-		goto out_unlock;
+		goto out_io_unlock;
 	}
 
 	/*
@@ -182,7 +188,7 @@ static int ttm_bo_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 			page = ttm_tt_get_page(ttm, page_offset);
 			if (unlikely(!page && i == 0)) {
 				retval = VM_FAULT_OOM;
-				goto out_unlock;
+				goto out_io_unlock;
 			} else if (unlikely(!page)) {
 				break;
 			}
@@ -200,14 +206,15 @@ static int ttm_bo_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 		else if (unlikely(ret != 0)) {
 			retval =
 			    (ret == -ENOMEM) ? VM_FAULT_OOM : VM_FAULT_SIGBUS;
-			goto out_unlock;
+			goto out_io_unlock;
 		}
 
 		address += PAGE_SIZE;
 		if (unlikely(++page_offset >= page_last))
 			break;
 	}
-
+out_io_unlock:
+	ttm_mem_io_unlock(man);
 out_unlock:
 	ttm_bo_unreserve(bo);
 	return retval;
diff --git a/include/drm/ttm/ttm_bo_api.h b/include/drm/ttm/ttm_bo_api.h
index edacd483c59c..50852aad260a 100644
--- a/include/drm/ttm/ttm_bo_api.h
+++ b/include/drm/ttm/ttm_bo_api.h
@@ -74,6 +74,8 @@ struct ttm_placement {
  * @is_iomem:		is this io memory ?
  * @size:		size in byte
  * @offset:		offset from the base address
+ * @io_reserved_vm:     The VM system has a refcount in @io_reserved_count
+ * @io_reserved_count:  Refcounting the numbers of callers to ttm_mem_io_reserve
  *
  * Structure indicating the bus placement of an object.
  */
@@ -83,7 +85,8 @@ struct ttm_bus_placement {
 	unsigned long	size;
 	unsigned long	offset;
 	bool		is_iomem;
-	bool		io_reserved;
+	bool		io_reserved_vm;
+	uint64_t        io_reserved_count;
 };
 
 
@@ -235,6 +238,7 @@ struct ttm_buffer_object {
 	struct list_head lru;
 	struct list_head ddestroy;
 	struct list_head swap;
+	struct list_head io_reserve_lru;
 	uint32_t val_seq;
 	bool seq_valid;
 
diff --git a/include/drm/ttm/ttm_bo_driver.h b/include/drm/ttm/ttm_bo_driver.h
index e3b2e245db1b..1da8af6ac884 100644
--- a/include/drm/ttm/ttm_bo_driver.h
+++ b/include/drm/ttm/ttm_bo_driver.h
@@ -179,30 +179,6 @@ struct ttm_tt {
 #define TTM_MEMTYPE_FLAG_MAPPABLE      (1 << 1)	/* Memory mappable */
 #define TTM_MEMTYPE_FLAG_CMA           (1 << 3)	/* Can't map aperture */
 
-/**
- * struct ttm_mem_type_manager
- *
- * @has_type: The memory type has been initialized.
- * @use_type: The memory type is enabled.
- * @flags: TTM_MEMTYPE_XX flags identifying the traits of the memory
- * managed by this memory type.
- * @gpu_offset: If used, the GPU offset of the first managed page of
- * fixed memory or the first managed location in an aperture.
- * @size: Size of the managed region.
- * @available_caching: A mask of available caching types, TTM_PL_FLAG_XX,
- * as defined in ttm_placement_common.h
- * @default_caching: The default caching policy used for a buffer object
- * placed in this memory type if the user doesn't provide one.
- * @manager: The range manager used for this memory type. FIXME: If the aperture
- * has a page size different from the underlying system, the granularity
- * of this manager should take care of this. But the range allocating code
- * in ttm_bo.c needs to be modified for this.
- * @lru: The lru list for this memory type.
- *
- * This structure is used to identify and manage memory types for a device.
- * It's set up by the ttm_bo_driver::init_mem_type method.
- */
-
 struct ttm_mem_type_manager;
 
 struct ttm_mem_type_manager_func {
@@ -287,6 +263,36 @@ struct ttm_mem_type_manager_func {
 	void (*debug)(struct ttm_mem_type_manager *man, const char *prefix);
 };
 
+/**
+ * struct ttm_mem_type_manager
+ *
+ * @has_type: The memory type has been initialized.
+ * @use_type: The memory type is enabled.
+ * @flags: TTM_MEMTYPE_XX flags identifying the traits of the memory
+ * managed by this memory type.
+ * @gpu_offset: If used, the GPU offset of the first managed page of
+ * fixed memory or the first managed location in an aperture.
+ * @size: Size of the managed region.
+ * @available_caching: A mask of available caching types, TTM_PL_FLAG_XX,
+ * as defined in ttm_placement_common.h
+ * @default_caching: The default caching policy used for a buffer object
+ * placed in this memory type if the user doesn't provide one.
+ * @func: structure pointer implementing the range manager. See above
+ * @priv: Driver private closure for @func.
+ * @io_reserve_mutex: Mutex optionally protecting shared io_reserve structures
+ * @use_io_reserve_lru: Use an lru list to try to unreserve io_mem_regions
+ * reserved by the TTM vm system.
+ * @io_reserve_lru: Optional lru list for unreserving io mem regions.
+ * @io_reserve_fastpath: Only use bdev::driver::io_mem_reserve to obtain
+ * static information. bdev::driver::io_mem_free is never used.
+ * @lru: The lru list for this memory type.
+ *
+ * This structure is used to identify and manage memory types for a device.
+ * It's set up by the ttm_bo_driver::init_mem_type method.
+ */
+
+
+
 struct ttm_mem_type_manager {
 	struct ttm_bo_device *bdev;
 
@@ -303,6 +309,15 @@ struct ttm_mem_type_manager {
 	uint32_t default_caching;
 	const struct ttm_mem_type_manager_func *func;
 	void *priv;
+	struct mutex io_reserve_mutex;
+	bool use_io_reserve_lru;
+	bool io_reserve_fastpath;
+
+	/*
+	 * Protected by @io_reserve_mutex:
+	 */
+
+	struct list_head io_reserve_lru;
 
 	/*
 	 * Protected by the global->lru_lock.
@@ -758,31 +773,6 @@ extern void ttm_bo_mem_put_locked(struct ttm_buffer_object *bo,
 
 extern int ttm_bo_wait_cpu(struct ttm_buffer_object *bo, bool no_wait);
 
-/**
- * ttm_bo_pci_offset - Get the PCI offset for the buffer object memory.
- *
- * @bo Pointer to a struct ttm_buffer_object.
- * @bus_base On return the base of the PCI region
- * @bus_offset On return the byte offset into the PCI region
- * @bus_size On return the byte size of the buffer object or zero if
- * the buffer object memory is not accessible through a PCI region.
- *
- * Returns:
- * -EINVAL if the buffer object is currently not mappable.
- * 0 otherwise.
- */
-
-extern int ttm_bo_pci_offset(struct ttm_bo_device *bdev,
-			     struct ttm_mem_reg *mem,
-			     unsigned long *bus_base,
-			     unsigned long *bus_offset,
-			     unsigned long *bus_size);
-
-extern int ttm_mem_io_reserve(struct ttm_bo_device *bdev,
-				struct ttm_mem_reg *mem);
-extern void ttm_mem_io_free(struct ttm_bo_device *bdev,
-				struct ttm_mem_reg *mem);
-
 extern void ttm_bo_global_release(struct drm_global_reference *ref);
 extern int ttm_bo_global_init(struct drm_global_reference *ref);
 
@@ -814,6 +804,22 @@ extern int ttm_bo_device_init(struct ttm_bo_device *bdev,
  */
 extern void ttm_bo_unmap_virtual(struct ttm_buffer_object *bo);
 
+/**
+ * ttm_bo_unmap_virtual
+ *
+ * @bo: tear down the virtual mappings for this BO
+ *
+ * The caller must take ttm_mem_io_lock before calling this function.
+ */
+extern void ttm_bo_unmap_virtual_locked(struct ttm_buffer_object *bo);
+
+extern int ttm_mem_io_reserve_vm(struct ttm_buffer_object *bo);
+extern void ttm_mem_io_free_vm(struct ttm_buffer_object *bo);
+extern int ttm_mem_io_lock(struct ttm_mem_type_manager *man,
+			   bool interruptible);
+extern void ttm_mem_io_unlock(struct ttm_mem_type_manager *man);
+
+
 /**
  * ttm_bo_reserve:
  *
-- 
cgit v1.2.3


From 11fa1618e327af7f08085e134b3325de9caf7a94 Mon Sep 17 00:00:00 2001
From: Alex Deucher
Date: Mon, 22 Nov 2010 17:56:37 -0500
Subject: drm/radeon/kms: add Ontario Fusion APU pci ids

Signed-off-by: Alex Deucher <alexdeucher@gmail.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 include/drm/drm_pciids.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include/drm')

diff --git a/include/drm/drm_pciids.h b/include/drm/drm_pciids.h
index 883c1d439899..e6b28a39942f 100644
--- a/include/drm/drm_pciids.h
+++ b/include/drm/drm_pciids.h
@@ -419,6 +419,10 @@
 	{0x1002, 0x9713, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RS880|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \
 	{0x1002, 0x9714, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RS880|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \
 	{0x1002, 0x9715, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RS880|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \
+	{0x1002, 0x9802, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_PALM|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \
+	{0x1002, 0x9803, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_PALM|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \
+	{0x1002, 0x9804, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_PALM|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \
+	{0x1002, 0x9805, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_PALM|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \
 	{0, 0, 0}
 
 #define r128_PCI_IDS \
-- 
cgit v1.2.3


From c64f7ba5f1006d8c20eacafecf98d4d00a6902a0 Mon Sep 17 00:00:00 2001
From: Chris Wilson
Date: Tue, 23 Nov 2010 14:24:24 +0000
Subject: agp/intel: Remove confusion of stolen entries not stolen memory

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/char/agp/intel-gtt.c    | 40 +++++++---------------------------------
 drivers/gpu/drm/i915/i915_dma.c |  4 ++--
 drivers/gpu/drm/i915/i915_drv.h |  2 +-
 include/drm/intel-gtt.h         | 11 +++++------
 4 files changed, 15 insertions(+), 42 deletions(-)

(limited to 'include/drm')

diff --git a/drivers/char/agp/intel-gtt.c b/drivers/char/agp/intel-gtt.c
index 19919ef9d661..d2733e526a68 100644
--- a/drivers/char/agp/intel-gtt.c
+++ b/drivers/char/agp/intel-gtt.c
@@ -477,26 +477,17 @@ static const struct aper_size_info_fixed const intel_fake_agp_sizes[] = {
 	{512, 131072, 7},
 };
 
-static unsigned int intel_gtt_stolen_entries(void)
+static unsigned int intel_gtt_stolen_size(void)
 {
 	u16 gmch_ctrl;
 	u8 rdct;
 	int local = 0;
 	static const int ddt[4] = { 0, 16, 32, 64 };
-	unsigned int overhead_entries;
 	unsigned int stolen_size = 0;
 
 	pci_read_config_word(intel_private.bridge_dev,
 			     I830_GMCH_CTRL, &gmch_ctrl);
 
-	if (INTEL_GTT_GEN > 4 || IS_PINEVIEW)
-		overhead_entries = 0;
-	else
-		overhead_entries = intel_private.base.gtt_mappable_entries
-			/ 1024;
-
-	overhead_entries += 1; /* BIOS popup */
-
 	if (intel_private.bridge_dev->device == PCI_DEVICE_ID_INTEL_82830_HB ||
 	    intel_private.bridge_dev->device == PCI_DEVICE_ID_INTEL_82845G_HB) {
 		switch (gmch_ctrl & I830_GMCH_GMS_MASK) {
@@ -631,7 +622,7 @@ static unsigned int intel_gtt_stolen_entries(void)
 		stolen_size = 0;
 	}
 
-	return stolen_size/KB(4) - overhead_entries;
+	return stolen_size;
 }
 
 static void i965_adjust_pgetbl_size(unsigned int size_flag)
@@ -817,8 +808,8 @@ static int intel_gtt_init(void)
 	global_cache_flush();   /* FIXME: ? */
 
 	/* we have to call this as early as possible after the MMIO base address is known */
-	intel_private.base.gtt_stolen_entries = intel_gtt_stolen_entries();
-	if (intel_private.base.gtt_stolen_entries == 0) {
+	intel_private.base.stolen_size = intel_gtt_stolen_size();
+	if (intel_private.base.stolen_size == 0) {
 		intel_private.driver->cleanup();
 		iounmap(intel_private.registers);
 		iounmap(intel_private.gtt);
@@ -1006,8 +997,7 @@ static int intel_fake_agp_configure(void)
 
 	agp_bridge->gart_bus_addr = intel_private.gma_bus_addr;
 
-	for (i = intel_private.base.gtt_stolen_entries;
-			i < intel_private.base.gtt_total_entries; i++) {
+	for (i = 0; i < intel_private.base.gtt_total_entries; i++) {
 		intel_private.driver->write_entry(intel_private.scratch_page_dma,
 						  i, 0);
 	}
@@ -1065,17 +1055,7 @@ static int intel_fake_agp_insert_entries(struct agp_memory *mem,
 	if (mem->page_count == 0)
 		goto out;
 
-	if (pg_start < intel_private.base.gtt_stolen_entries) {
-		dev_printk(KERN_DEBUG, &intel_private.pcidev->dev,
-			   "pg_start == 0x%.8lx, gtt_stolen_entries == 0x%.8x\n",
-			   pg_start, intel_private.base.gtt_stolen_entries);
-
-		dev_info(&intel_private.pcidev->dev,
-			 "trying to insert into local/stolen memory\n");
-		goto out_err;
-	}
-
-	if ((pg_start + mem->page_count) > intel_private.base.gtt_total_entries)
+	if (pg_start + mem->page_count > intel_private.base.gtt_total_entries)
 		goto out_err;
 
 	if (type != mem->type)
@@ -1118,12 +1098,6 @@ static int intel_fake_agp_remove_entries(struct agp_memory *mem,
 	if (mem->page_count == 0)
 		return 0;
 
-	if (pg_start < intel_private.base.gtt_stolen_entries) {
-		dev_info(&intel_private.pcidev->dev,
-			 "trying to disable local/stolen memory\n");
-		return -EINVAL;
-	}
-
 	if (USE_PCI_DMA_API && INTEL_GTT_GEN > 2)
 		intel_agp_unmap_memory(mem);
 
@@ -1629,7 +1603,7 @@ int intel_gmch_probe(struct pci_dev *pdev,
 }
 EXPORT_SYMBOL(intel_gmch_probe);
 
-struct intel_gtt *intel_gtt_get(void)
+const struct intel_gtt *intel_gtt_get(void)
 {
 	return &intel_private.base;
 }
diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
index 3df271215ab9..86b0e8052ec9 100644
--- a/drivers/gpu/drm/i915/i915_dma.c
+++ b/drivers/gpu/drm/i915/i915_dma.c
@@ -1044,7 +1044,7 @@ static unsigned long i915_stolen_to_phys(struct drm_device *dev, u32 offset)
 		pci_read_config_byte(pdev, 0x9c, &val);
 		base = val >> 3 << 27;
 	}
-	base -= dev_priv->mm.gtt->gtt_stolen_entries << PAGE_SHIFT;
+	base -= dev_priv->mm.gtt->stolen_size;
 #endif
 
 	return base + offset;
@@ -1168,7 +1168,7 @@ static int i915_load_modeset_init(struct drm_device *dev)
 	unsigned long prealloc_size, gtt_size, mappable_size;
 	int ret = 0;
 
-	prealloc_size = dev_priv->mm.gtt->gtt_stolen_entries << PAGE_SHIFT;
+	prealloc_size = dev_priv->mm.gtt->stolen_size;
 	gtt_size = dev_priv->mm.gtt->gtt_total_entries << PAGE_SHIFT;
 	mappable_size = dev_priv->mm.gtt->gtt_mappable_entries << PAGE_SHIFT;
 
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 745e46b0673f..b62ff5d6fa7e 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -542,7 +542,7 @@ typedef struct drm_i915_private {
 
 	struct {
 		/** Bridge to intel-gtt-ko */
-		struct intel_gtt *gtt;
+		const struct intel_gtt *gtt;
 		/** Memory allocator for GTT stolen memory */
 		struct drm_mm stolen;
 		/** Memory allocator for GTT */
diff --git a/include/drm/intel-gtt.h b/include/drm/intel-gtt.h
index d3c81946f613..020f8aab7e5b 100644
--- a/include/drm/intel-gtt.h
+++ b/include/drm/intel-gtt.h
@@ -2,17 +2,16 @@
 
 #ifndef _DRM_INTEL_GTT_H
 #define	_DRM_INTEL_GTT_H
-struct intel_gtt {
-	/* Number of stolen gtt entries at the beginning. */
-	unsigned int gtt_stolen_entries;
+
+const struct intel_gtt {
+	/* Size of memory reserved for graphics by the BIOS */
+	unsigned int stolen_size;
 	/* Total number of gtt entries. */
 	unsigned int gtt_total_entries;
 	/* Part of the gtt that is mappable by the cpu, for those chips where
 	 * this is not the full gtt. */
 	unsigned int gtt_mappable_entries;
-};
-
-struct intel_gtt *intel_gtt_get(void);
+} *intel_gtt_get(void);
 
 #endif
 
-- 
cgit v1.2.3


From 23ed992a5ebe6964ebe312b54142fbc5e8185cdc Mon Sep 17 00:00:00 2001
From: Daniel Vetter
Date: Fri, 5 Nov 2010 18:04:52 +0100
Subject: drm/i915|intel-gtt: consolidate intel-gtt.h headers

... and a few other defines.

Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/char/agp/intel-gtt.c    |  5 -----
 drivers/gpu/drm/i915/i915_gem.c |  1 -
 include/drm/intel-gtt.h         | 12 ++++++++++++
 include/linux/intel-gtt.h       | 20 --------------------
 4 files changed, 12 insertions(+), 26 deletions(-)
 delete mode 100644 include/linux/intel-gtt.h

(limited to 'include/drm')

diff --git a/drivers/char/agp/intel-gtt.c b/drivers/char/agp/intel-gtt.c
index 72267c801637..291ac5113576 100644
--- a/drivers/char/agp/intel-gtt.c
+++ b/drivers/char/agp/intel-gtt.c
@@ -24,7 +24,6 @@
 #include <asm/smp.h>
 #include "agp.h"
 #include "intel-agp.h"
-#include <linux/intel-gtt.h>
 #include <drm/intel-gtt.h>
 
 /*
@@ -39,10 +38,6 @@
 #define USE_PCI_DMA_API 0
 #endif
 
-#define AGP_DCACHE_MEMORY	1
-#define AGP_PHYS_MEMORY		2
-#define INTEL_AGP_CACHED_MEMORY 3
-
 struct intel_gtt_driver {
 	unsigned int gen : 8;
 	unsigned int is_g33 : 1;
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index bf05ac414b1f..68492357658c 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -34,7 +34,6 @@
 #include <linux/slab.h>
 #include <linux/swap.h>
 #include <linux/pci.h>
-#include <linux/intel-gtt.h>
 
 struct change_domains {
 	uint32_t invalidate_domains;
diff --git a/include/drm/intel-gtt.h b/include/drm/intel-gtt.h
index 020f8aab7e5b..9f91cbe35157 100644
--- a/include/drm/intel-gtt.h
+++ b/include/drm/intel-gtt.h
@@ -13,5 +13,17 @@ const struct intel_gtt {
 	unsigned int gtt_mappable_entries;
 } *intel_gtt_get(void);
 
+
+/* Special gtt memory types */
+#define AGP_DCACHE_MEMORY	1
+#define AGP_PHYS_MEMORY		2
+
+/* New caching attributes for gen6/sandybridge */
+#define AGP_USER_CACHED_MEMORY_LLC_MLC (AGP_USER_TYPES + 2)
+#define AGP_USER_UNCACHED_MEMORY (AGP_USER_TYPES + 4)
+
+/* flag for GFDT type */
+#define AGP_USER_CACHED_MEMORY_GFDT (1 << 3)
+
 #endif
 
diff --git a/include/linux/intel-gtt.h b/include/linux/intel-gtt.h
deleted file mode 100644
index 1d19ab2afa39..000000000000
--- a/include/linux/intel-gtt.h
+++ /dev/null
@@ -1,20 +0,0 @@
-/*
- * Common Intel AGPGART and GTT definitions.
- */
-#ifndef _INTEL_GTT_H
-#define _INTEL_GTT_H
-
-#include <linux/agp_backend.h>
-
-/* This is for Intel only GTT controls.
- *
- * Sandybridge: AGP_USER_CACHED_MEMORY default to LLC only
- */
-
-#define AGP_USER_CACHED_MEMORY_LLC_MLC (AGP_USER_TYPES + 2)
-#define AGP_USER_UNCACHED_MEMORY (AGP_USER_TYPES + 4)
-
-/* flag for GFDT type */
-#define AGP_USER_CACHED_MEMORY_GFDT (1 << 3)
-
-#endif
-- 
cgit v1.2.3


From 40ce6575102b23e432932b5ce41c44bf7cc5023b Mon Sep 17 00:00:00 2001
From: Daniel Vetter
Date: Fri, 5 Nov 2010 18:12:18 +0100
Subject: drm/i915/gtt: call chipset flush directly

Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/char/agp/intel-gtt.c    | 7 +++++++
 drivers/gpu/drm/i915/i915_gem.c | 9 ++++-----
 include/drm/intel-gtt.h         | 2 +-
 3 files changed, 12 insertions(+), 6 deletions(-)

(limited to 'include/drm')

diff --git a/drivers/char/agp/intel-gtt.c b/drivers/char/agp/intel-gtt.c
index 291ac5113576..8e2e208c925b 100644
--- a/drivers/char/agp/intel-gtt.c
+++ b/drivers/char/agp/intel-gtt.c
@@ -1476,6 +1476,13 @@ const struct intel_gtt *intel_gtt_get(void)
 }
 EXPORT_SYMBOL(intel_gtt_get);
 
+void intel_gtt_chipset_flush(void)
+{
+	if (intel_private.driver->chipset_flush)
+		intel_private.driver->chipset_flush();
+}
+EXPORT_SYMBOL(intel_gtt_chipset_flush);
+
 void intel_gmch_remove(struct pci_dev *pdev)
 {
 	if (intel_private.pcidev)
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 68492357658c..b663d2da1db3 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2192,7 +2192,7 @@ i915_gem_flush(struct drm_device *dev,
 	drm_i915_private_t *dev_priv = dev->dev_private;
 
 	if (flush_domains & I915_GEM_DOMAIN_CPU)
-		drm_agp_chipset_flush(dev);
+		intel_gtt_chipset_flush();
 
 	if ((flush_domains | invalidate_domains) & I915_GEM_GPU_DOMAINS) {
 		if (flush_rings & RING_RENDER)
@@ -2920,14 +2920,13 @@ i915_gem_object_flush_gtt_write_domain(struct drm_gem_object *obj)
 static void
 i915_gem_object_flush_cpu_write_domain(struct drm_gem_object *obj)
 {
-	struct drm_device *dev = obj->dev;
 	uint32_t old_write_domain;
 
 	if (obj->write_domain != I915_GEM_DOMAIN_CPU)
 		return;
 
 	i915_gem_clflush_object(obj);
-	drm_agp_chipset_flush(dev);
+	intel_gtt_chipset_flush();
 	old_write_domain = obj->write_domain;
 	obj->write_domain = 0;
 
@@ -5069,7 +5068,7 @@ void i915_gem_detach_phys_object(struct drm_device *dev,
 			page_cache_release(page);
 		}
 	}
-	drm_agp_chipset_flush(dev);
+	intel_gtt_chipset_flush();
 
 	obj_priv->phys_obj->cur_obj = NULL;
 	obj_priv->phys_obj = NULL;
@@ -5161,7 +5160,7 @@ i915_gem_phys_pwrite(struct drm_device *dev, struct drm_gem_object *obj,
 			return -EFAULT;
 	}
 
-	drm_agp_chipset_flush(dev);
+	intel_gtt_chipset_flush();
 	return 0;
 }
 
diff --git a/include/drm/intel-gtt.h b/include/drm/intel-gtt.h
index 9f91cbe35157..c35817a11690 100644
--- a/include/drm/intel-gtt.h
+++ b/include/drm/intel-gtt.h
@@ -13,6 +13,7 @@ const struct intel_gtt {
 	unsigned int gtt_mappable_entries;
 } *intel_gtt_get(void);
 
+void intel_gtt_chipset_flush(void);
 
 /* Special gtt memory types */
 #define AGP_DCACHE_MEMORY	1
@@ -26,4 +27,3 @@ const struct intel_gtt {
 #define AGP_USER_CACHED_MEMORY_GFDT (1 << 3)
 
 #endif
-
-- 
cgit v1.2.3


From 4af72e2865a23ac090884a421bd1a8b19e247a22 Mon Sep 17 00:00:00 2001
From: Daniel Vetter
Date: Fri, 5 Nov 2010 18:13:43 +0100
Subject: drm: kill drm_agp_chipset_flush

No longer used.

Cc: Dave Airlie <airlied@gmail.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/drm_agpsupport.c | 6 ------
 include/drm/drmP.h               | 1 -
 2 files changed, 7 deletions(-)

(limited to 'include/drm')

diff --git a/drivers/gpu/drm/drm_agpsupport.c b/drivers/gpu/drm/drm_agpsupport.c
index 252fdb98b73a..0cb2ba50af53 100644
--- a/drivers/gpu/drm/drm_agpsupport.c
+++ b/drivers/gpu/drm/drm_agpsupport.c
@@ -466,10 +466,4 @@ drm_agp_bind_pages(struct drm_device *dev,
 }
 EXPORT_SYMBOL(drm_agp_bind_pages);
 
-void drm_agp_chipset_flush(struct drm_device *dev)
-{
-	agp_flush_chipset(dev->agp->bridge);
-}
-EXPORT_SYMBOL(drm_agp_chipset_flush);
-
 #endif /* __OS_HAS_AGP */
diff --git a/include/drm/drmP.h b/include/drm/drmP.h
index d4bc0f5cab8f..628f76772d22 100644
--- a/include/drm/drmP.h
+++ b/include/drm/drmP.h
@@ -1319,7 +1319,6 @@ extern int drm_agp_unbind_ioctl(struct drm_device *dev, void *data,
 extern int drm_agp_bind(struct drm_device *dev, struct drm_agp_binding *request);
 extern int drm_agp_bind_ioctl(struct drm_device *dev, void *data,
 			struct drm_file *file_priv);
-extern void drm_agp_chipset_flush(struct drm_device *dev);
 
 				/* Stub support (drm_stub.h) */
 extern int drm_setmaster_ioctl(struct drm_device *dev, void *data,
-- 
cgit v1.2.3


From 4080775b60cc26044e7c4aba5e76e5041b0d7004 Mon Sep 17 00:00:00 2001
From: Daniel Vetter
Date: Sat, 6 Nov 2010 11:18:58 +0100
Subject: intel-gtt: export api for drm/i915

Just some minor shuffling to get rid of any agp traces in the
exported functions.

Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/char/agp/intel-gtt.c | 120 ++++++++++++++++++++++++-------------------
 include/drm/intel-gtt.h      |  12 +++++
 2 files changed, 80 insertions(+), 52 deletions(-)

(limited to 'include/drm')

diff --git a/drivers/char/agp/intel-gtt.c b/drivers/char/agp/intel-gtt.c
index 1603e4f8ae73..5a2b7360f5be 100644
--- a/drivers/char/agp/intel-gtt.c
+++ b/drivers/char/agp/intel-gtt.c
@@ -87,41 +87,29 @@ static struct _intel_private {
 #define IS_IRONLAKE	intel_private.driver->is_ironlake
 #define HAS_PGTBL_EN	intel_private.driver->has_pgtbl_enable
 
-static void intel_agp_free_sglist(struct agp_memory *mem)
-{
-	struct sg_table st;
-
-	st.sgl = mem->sg_list;
-	st.orig_nents = st.nents = mem->page_count;
-
-	sg_free_table(&st);
-
-	mem->sg_list = NULL;
-	mem->num_sg = 0;
-}
-
-static int intel_agp_map_memory(struct agp_memory *mem)
+int intel_gtt_map_memory(struct page **pages, unsigned int num_entries,
+			 struct scatterlist **sg_list, int *num_sg)
 {
 	struct sg_table st;
 	struct scatterlist *sg;
 	int i;
 
-	if (mem->sg_list)
+	if (*sg_list)
 		return 0; /* already mapped (for e.g. resume */
 
-	DBG("try mapping %lu pages\n", (unsigned long)mem->page_count);
+	DBG("try mapping %lu pages\n", (unsigned long)num_entries);
 
-	if (sg_alloc_table(&st, mem->page_count, GFP_KERNEL))
+	if (sg_alloc_table(&st, num_entries, GFP_KERNEL))
 		goto err;
 
-	mem->sg_list = sg = st.sgl;
+	*sg_list = sg = st.sgl;
 
-	for (i = 0 ; i < mem->page_count; i++, sg = sg_next(sg))
-		sg_set_page(sg, mem->pages[i], PAGE_SIZE, 0);
+	for (i = 0 ; i < num_entries; i++, sg = sg_next(sg))
+		sg_set_page(sg, pages[i], PAGE_SIZE, 0);
 
-	mem->num_sg = pci_map_sg(intel_private.pcidev, mem->sg_list,
-				 mem->page_count, PCI_DMA_BIDIRECTIONAL);
-	if (unlikely(!mem->num_sg))
+	*num_sg = pci_map_sg(intel_private.pcidev, *sg_list,
+				 num_entries, PCI_DMA_BIDIRECTIONAL);
+	if (unlikely(!*num_sg))
 		goto err;
 
 	return 0;
@@ -130,15 +118,22 @@ err:
 	sg_free_table(&st);
 	return -ENOMEM;
 }
+EXPORT_SYMBOL(intel_gtt_map_memory);
 
-static void intel_agp_unmap_memory(struct agp_memory *mem)
+void intel_gtt_unmap_memory(struct scatterlist *sg_list, int num_sg)
 {
+	struct sg_table st;
 	DBG("try unmapping %lu pages\n", (unsigned long)mem->page_count);
 
-	pci_unmap_sg(intel_private.pcidev, mem->sg_list,
-		     mem->page_count, PCI_DMA_BIDIRECTIONAL);
-	intel_agp_free_sglist(mem);
+	pci_unmap_sg(intel_private.pcidev, sg_list,
+		     num_sg, PCI_DMA_BIDIRECTIONAL);
+
+	st.sgl = sg_list;
+	st.orig_nents = st.nents = num_sg;
+
+	sg_free_table(&st);
 }
+EXPORT_SYMBOL(intel_gtt_unmap_memory);
 
 static void intel_fake_agp_enable(struct agp_bridge_data *bridge, u32 mode)
 {
@@ -307,7 +302,7 @@ static int intel_gtt_setup_scratch_page(void)
 	get_page(page);
 	set_pages_uc(page, 1);
 
-	if (USE_PCI_DMA_API && INTEL_GTT_GEN > 2) {
+	if (intel_private.base.needs_dmar) {
 		dma_addr = pci_map_page(intel_private.pcidev, page, 0,
 				    PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
 		if (pci_dma_mapping_error(intel_private.pcidev, dma_addr))
@@ -699,6 +694,8 @@ static int intel_gtt_init(void)
 		return ret;
 	}
 
+	intel_private.base.needs_dmar = USE_PCI_DMA_API && INTEL_GTT_GEN > 2;
+
 	return 0;
 }
 
@@ -892,10 +889,10 @@ static bool i830_check_flags(unsigned int flags)
 	return false;
 }
 
-static void intel_gtt_insert_sg_entries(struct scatterlist *sg_list,
-					unsigned int sg_len,
-					unsigned int pg_start,
-					unsigned int flags)
+void intel_gtt_insert_sg_entries(struct scatterlist *sg_list,
+				 unsigned int sg_len,
+				 unsigned int pg_start,
+				 unsigned int flags)
 {
 	struct scatterlist *sg;
 	unsigned int len, m;
@@ -916,11 +913,25 @@ static void intel_gtt_insert_sg_entries(struct scatterlist *sg_list,
 	}
 	readl(intel_private.gtt+j-1);
 }
+EXPORT_SYMBOL(intel_gtt_insert_sg_entries);
+
+void intel_gtt_insert_pages(unsigned int first_entry, unsigned int num_entries,
+			    struct page **pages, unsigned int flags)
+{
+	int i, j;
+
+	for (i = 0, j = first_entry; i < num_entries; i++, j++) {
+		dma_addr_t addr = page_to_phys(pages[i]);
+		intel_private.driver->write_entry(addr,
+						  j, flags);
+	}
+	readl(intel_private.gtt+j-1);
+}
+EXPORT_SYMBOL(intel_gtt_insert_pages);
 
 static int intel_fake_agp_insert_entries(struct agp_memory *mem,
 					 off_t pg_start, int type)
 {
-	int i, j;
 	int ret = -EINVAL;
 
 	if (INTEL_GTT_GEN == 1 && type == AGP_DCACHE_MEMORY)
@@ -941,21 +952,17 @@ static int intel_fake_agp_insert_entries(struct agp_memory *mem,
 	if (!mem->is_flushed)
 		global_cache_flush();
 
-	if (USE_PCI_DMA_API && INTEL_GTT_GEN > 2) {
-		ret = intel_agp_map_memory(mem);
+	if (intel_private.base.needs_dmar) {
+		ret = intel_gtt_map_memory(mem->pages, mem->page_count,
+					   &mem->sg_list, &mem->num_sg);
 		if (ret != 0)
 			return ret;
 
 		intel_gtt_insert_sg_entries(mem->sg_list, mem->num_sg,
 					    pg_start, type);
-	} else {
-		for (i = 0, j = pg_start; i < mem->page_count; i++, j++) {
-			dma_addr_t addr = page_to_phys(mem->pages[i]);
-			intel_private.driver->write_entry(addr,
-							  j, type);
-		}
-		readl(intel_private.gtt+j-1);
-	}
+	} else
+		intel_gtt_insert_pages(pg_start, mem->page_count, mem->pages,
+				       type);
 
 out:
 	ret = 0;
@@ -964,22 +971,31 @@ out_err:
 	return ret;
 }
 
+void intel_gtt_clear_range(unsigned int first_entry, unsigned int num_entries)
+{
+	unsigned int i;
+
+	for (i = first_entry; i < (first_entry + num_entries); i++) {
+		intel_private.driver->write_entry(intel_private.scratch_page_dma,
+						  i, 0);
+	}
+	readl(intel_private.gtt+i-1);
+}
+EXPORT_SYMBOL(intel_gtt_clear_range);
+
 static int intel_fake_agp_remove_entries(struct agp_memory *mem,
 					 off_t pg_start, int type)
 {
-	int i;
-
 	if (mem->page_count == 0)
 		return 0;
 
-	if (USE_PCI_DMA_API && INTEL_GTT_GEN > 2)
-		intel_agp_unmap_memory(mem);
-
-	for (i = pg_start; i < (mem->page_count + pg_start); i++) {
-		intel_private.driver->write_entry(intel_private.scratch_page_dma,
-						  i, 0);
+	if (intel_private.base.needs_dmar) {
+		intel_gtt_unmap_memory(mem->sg_list, mem->num_sg);
+		mem->sg_list = NULL;
+		mem->num_sg = 0;
 	}
-	readl(intel_private.gtt+i-1);
+
+	intel_gtt_clear_range(pg_start, mem->page_count);
 
 	return 0;
 }
diff --git a/include/drm/intel-gtt.h b/include/drm/intel-gtt.h
index c35817a11690..9e343c0998b4 100644
--- a/include/drm/intel-gtt.h
+++ b/include/drm/intel-gtt.h
@@ -11,9 +11,21 @@ const struct intel_gtt {
 	/* Part of the gtt that is mappable by the cpu, for those chips where
 	 * this is not the full gtt. */
 	unsigned int gtt_mappable_entries;
+	/* Whether i915 needs to use the dmar apis or not. */
+	unsigned int needs_dmar : 1;
 } *intel_gtt_get(void);
 
 void intel_gtt_chipset_flush(void);
+void intel_gtt_unmap_memory(struct scatterlist *sg_list, int num_sg);
+void intel_gtt_clear_range(unsigned int first_entry, unsigned int num_entries);
+int intel_gtt_map_memory(struct page **pages, unsigned int num_entries,
+			 struct scatterlist **sg_list, int *num_sg);
+void intel_gtt_insert_sg_entries(struct scatterlist *sg_list,
+				 unsigned int sg_len,
+				 unsigned int pg_start,
+				 unsigned int flags);
+void intel_gtt_insert_pages(unsigned int first_entry, unsigned int num_entries,
+			    struct page **pages, unsigned int flags);
 
 /* Special gtt memory types */
 #define AGP_DCACHE_MEMORY	1
-- 
cgit v1.2.3


From 21e86c1c8a844bf978f8fc431a59c9f5a578812d Mon Sep 17 00:00:00 2001
From: Ben Skeggs
Date: Mon, 11 Oct 2010 11:48:45 +1000
Subject: drm/nouveau: remove cpu_writers lock

No other driver uses this, and userspace should be responsible for handling
locking between them if they share BOs.

Signed-off-by: Ben Skeggs <bskeggs@redhat.com>
---
 drivers/gpu/drm/nouveau/nouveau_drv.h |  1 -
 drivers/gpu/drm/nouveau/nouveau_gem.c | 64 +++--------------------------------
 include/drm/nouveau_drm.h             |  1 -
 3 files changed, 4 insertions(+), 62 deletions(-)

(limited to 'include/drm')

diff --git a/drivers/gpu/drm/nouveau/nouveau_drv.h b/drivers/gpu/drm/nouveau/nouveau_drv.h
index c3f102125083..b78663fc334c 100644
--- a/drivers/gpu/drm/nouveau/nouveau_drv.h
+++ b/drivers/gpu/drm/nouveau/nouveau_drv.h
@@ -96,7 +96,6 @@ struct nouveau_bo {
 	struct nouveau_tile_reg *tile;
 
 	struct drm_gem_object *gem;
-	struct drm_file *cpu_filp;
 	int pin_refcnt;
 };
 
diff --git a/drivers/gpu/drm/nouveau/nouveau_gem.c b/drivers/gpu/drm/nouveau/nouveau_gem.c
index 454d5ceb28f1..e14d10e38870 100644
--- a/drivers/gpu/drm/nouveau/nouveau_gem.c
+++ b/drivers/gpu/drm/nouveau/nouveau_gem.c
@@ -48,9 +48,6 @@ nouveau_gem_object_del(struct drm_gem_object *gem)
 		return;
 	nvbo->gem = NULL;
 
-	if (unlikely(nvbo->cpu_filp))
-		ttm_bo_synccpu_write_release(bo);
-
 	if (unlikely(nvbo->pin_refcnt)) {
 		nvbo->pin_refcnt = 1;
 		nouveau_bo_unpin(nvbo);
@@ -334,23 +331,6 @@ retry:
 			validate_fini(op, NULL);
 			return -EINVAL;
 		}
-
-		if (unlikely(atomic_read(&nvbo->bo.cpu_writers) > 0)) {
-			validate_fini(op, NULL);
-
-			if (nvbo->cpu_filp == file_priv) {
-				NV_ERROR(dev, "bo %p mapped by process trying "
-					      "to validate it!\n", nvbo);
-				return -EINVAL;
-			}
-
-			ret = ttm_bo_wait_cpu(&nvbo->bo, false);
-			if (ret) {
-				NV_ERROR(dev, "fail wait_cpu\n");
-				return ret;
-			}
-			goto retry;
-		}
 	}
 
 	return 0;
@@ -791,26 +771,9 @@ nouveau_gem_ioctl_cpu_prep(struct drm_device *dev, void *data,
 		return -ENOENT;
 	nvbo = nouveau_gem_object(gem);
 
-	if (nvbo->cpu_filp) {
-		if (nvbo->cpu_filp == file_priv)
-			goto out;
-
-		ret = ttm_bo_wait_cpu(&nvbo->bo, no_wait);
-		if (ret)
-			goto out;
-	}
-
-	if (req->flags & NOUVEAU_GEM_CPU_PREP_NOBLOCK) {
-		spin_lock(&nvbo->bo.bdev->fence_lock);
-		ret = ttm_bo_wait(&nvbo->bo, false, false, no_wait);
-		spin_unlock(&nvbo->bo.bdev->fence_lock);
-	} else {
-		ret = ttm_bo_synccpu_write_grab(&nvbo->bo, no_wait);
-		if (ret == 0)
-			nvbo->cpu_filp = file_priv;
-	}
-
-out:
+	spin_lock(&nvbo->bo.bdev->fence_lock);
+	ret = ttm_bo_wait(&nvbo->bo, true, true, no_wait);
+	spin_unlock(&nvbo->bo.bdev->fence_lock);
 	drm_gem_object_unreference_unlocked(gem);
 	return ret;
 }
@@ -819,26 +782,7 @@ int
 nouveau_gem_ioctl_cpu_fini(struct drm_device *dev, void *data,
 			   struct drm_file *file_priv)
 {
-	struct drm_nouveau_gem_cpu_prep *req = data;
-	struct drm_gem_object *gem;
-	struct nouveau_bo *nvbo;
-	int ret = -EINVAL;
-
-	gem = drm_gem_object_lookup(dev, file_priv, req->handle);
-	if (!gem)
-		return -ENOENT;
-	nvbo = nouveau_gem_object(gem);
-
-	if (nvbo->cpu_filp != file_priv)
-		goto out;
-	nvbo->cpu_filp = NULL;
-
-	ttm_bo_synccpu_write_release(&nvbo->bo);
-	ret = 0;
-
-out:
-	drm_gem_object_unreference_unlocked(gem);
-	return ret;
+	return 0;
 }
 
 int
diff --git a/include/drm/nouveau_drm.h b/include/drm/nouveau_drm.h
index bc5590b1a1ac..60a7b3e9c2e9 100644
--- a/include/drm/nouveau_drm.h
+++ b/include/drm/nouveau_drm.h
@@ -171,7 +171,6 @@ struct drm_nouveau_gem_pushbuf {
 };
 
 #define NOUVEAU_GEM_CPU_PREP_NOWAIT                                  0x00000001
-#define NOUVEAU_GEM_CPU_PREP_NOBLOCK                                 0x00000002
 #define NOUVEAU_GEM_CPU_PREP_WRITE                                   0x00000004
 struct drm_nouveau_gem_cpu_prep {
 	uint32_t handle;
-- 
cgit v1.2.3


From 332b242f47786d1a43bd7a19a0513dd5d493db8e Mon Sep 17 00:00:00 2001
From: Francisco Jerez
Date: Wed, 20 Oct 2010 23:35:40 +0200
Subject: drm/nouveau: Implement the pageflip ioctl.

nv0x-nv4x should be mostly fine, nv50 doesn't work yet.

Signed-off-by: Francisco Jerez <currojerez@riseup.net>
Signed-off-by: Ben Skeggs <bskeggs@redhat.com>
---
 drivers/gpu/drm/nouveau/nouveau_bo.c      |  12 ++
 drivers/gpu/drm/nouveau/nouveau_channel.c |   1 +
 drivers/gpu/drm/nouveau/nouveau_display.c | 180 ++++++++++++++++++++++++++++++
 drivers/gpu/drm/nouveau/nouveau_drv.h     |  16 +++
 drivers/gpu/drm/nouveau/nouveau_gem.c     |  11 +-
 drivers/gpu/drm/nouveau/nouveau_state.c   |   3 +
 drivers/gpu/drm/nouveau/nv04_crtc.c       |   1 +
 drivers/gpu/drm/nouveau/nv04_graph.c      |  16 +++
 drivers/gpu/drm/nouveau/nv10_graph.c      |   4 +
 drivers/gpu/drm/nouveau/nv20_graph.c      |   8 ++
 drivers/gpu/drm/nouveau/nv40_graph.c      |   4 +
 drivers/gpu/drm/nouveau/nv50_crtc.c       |   1 +
 drivers/gpu/drm/nouveau/nv50_graph.c      |  16 +++
 include/drm/nouveau_drm.h                 |   1 +
 14 files changed, 265 insertions(+), 9 deletions(-)

(limited to 'include/drm')

diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c b/drivers/gpu/drm/nouveau/nouveau_bo.c
index c09928322eb9..cdc8f544d47f 100644
--- a/drivers/gpu/drm/nouveau/nouveau_bo.c
+++ b/drivers/gpu/drm/nouveau/nouveau_bo.c
@@ -942,6 +942,18 @@ nouveau_ttm_fault_reserve_notify(struct ttm_buffer_object *bo)
 	return ttm_bo_validate(bo, &nvbo->placement, false, true, false);
 }
 
+void
+nouveau_bo_fence(struct nouveau_bo *nvbo, struct nouveau_fence *fence)
+{
+	spin_lock(&nvbo->bo.bdev->fence_lock);
+	__nouveau_fence_unref(&nvbo->bo.sync_obj);
+
+	if (likely(fence))
+		nvbo->bo.sync_obj = nouveau_fence_ref(fence);
+
+	spin_unlock(&nvbo->bo.bdev->fence_lock);
+}
+
 struct ttm_bo_driver nouveau_bo_driver = {
 	.create_ttm_backend_entry = nouveau_bo_create_ttm_backend_entry,
 	.invalidate_caches = nouveau_bo_invalidate_caches,
diff --git a/drivers/gpu/drm/nouveau/nouveau_channel.c b/drivers/gpu/drm/nouveau/nouveau_channel.c
index c9cdbd786dae..11b2370e16da 100644
--- a/drivers/gpu/drm/nouveau/nouveau_channel.c
+++ b/drivers/gpu/drm/nouveau/nouveau_channel.c
@@ -148,6 +148,7 @@ nouveau_channel_alloc(struct drm_device *dev, struct nouveau_channel **chan_ret,
 
 	NV_DEBUG(dev, "initialising channel %d\n", chan->id);
 	INIT_LIST_HEAD(&chan->nvsw.vbl_wait);
+	INIT_LIST_HEAD(&chan->nvsw.flip);
 	INIT_LIST_HEAD(&chan->fence.pending);
 
 	/* Allocate DMA push buffer */
diff --git a/drivers/gpu/drm/nouveau/nouveau_display.c b/drivers/gpu/drm/nouveau/nouveau_display.c
index f8987bcb7f51..505c6bfb4d75 100644
--- a/drivers/gpu/drm/nouveau/nouveau_display.c
+++ b/drivers/gpu/drm/nouveau/nouveau_display.c
@@ -30,6 +30,8 @@
 #include "nouveau_fb.h"
 #include "nouveau_fbcon.h"
 #include "nouveau_hw.h"
+#include "nouveau_crtc.h"
+#include "nouveau_dma.h"
 
 static void
 nouveau_user_framebuffer_destroy(struct drm_framebuffer *drm_fb)
@@ -131,3 +133,181 @@ nouveau_vblank_disable(struct drm_device *dev, int crtc)
 	else
 		NVWriteCRTC(dev, crtc, NV_PCRTC_INTR_EN_0, 0);
 }
+
+static int
+nouveau_page_flip_reserve(struct nouveau_bo *old_bo,
+			  struct nouveau_bo *new_bo)
+{
+	int ret;
+
+	ret = nouveau_bo_pin(new_bo, TTM_PL_FLAG_VRAM);
+	if (ret)
+		return ret;
+
+	ret = ttm_bo_reserve(&new_bo->bo, false, false, false, 0);
+	if (ret)
+		goto fail;
+
+	ret = ttm_bo_reserve(&old_bo->bo, false, false, false, 0);
+	if (ret)
+		goto fail_unreserve;
+
+	return 0;
+
+fail_unreserve:
+	ttm_bo_unreserve(&new_bo->bo);
+fail:
+	nouveau_bo_unpin(new_bo);
+	return ret;
+}
+
+static void
+nouveau_page_flip_unreserve(struct nouveau_bo *old_bo,
+			    struct nouveau_bo *new_bo,
+			    struct nouveau_fence *fence)
+{
+	nouveau_bo_fence(new_bo, fence);
+	ttm_bo_unreserve(&new_bo->bo);
+
+	nouveau_bo_fence(old_bo, fence);
+	ttm_bo_unreserve(&old_bo->bo);
+
+	nouveau_bo_unpin(old_bo);
+}
+
+static int
+nouveau_page_flip_emit(struct nouveau_channel *chan,
+		       struct nouveau_bo *old_bo,
+		       struct nouveau_bo *new_bo,
+		       struct nouveau_page_flip_state *s,
+		       struct nouveau_fence **pfence)
+{
+	struct drm_device *dev = chan->dev;
+	unsigned long flags;
+	int ret;
+
+	/* Queue it to the pending list */
+	spin_lock_irqsave(&dev->event_lock, flags);
+	list_add_tail(&s->head, &chan->nvsw.flip);
+	spin_unlock_irqrestore(&dev->event_lock, flags);
+
+	/* Synchronize with the old framebuffer */
+	ret = nouveau_fence_sync(old_bo->bo.sync_obj, chan);
+	if (ret)
+		goto fail;
+
+	/* Emit the pageflip */
+	ret = RING_SPACE(chan, 2);
+	if (ret)
+		goto fail;
+
+	BEGIN_RING(chan, NvSubSw, NV_SW_PAGE_FLIP, 1);
+	OUT_RING(chan, 0);
+	FIRE_RING(chan);
+
+	ret = nouveau_fence_new(chan, pfence, true);
+	if (ret)
+		goto fail;
+
+	return 0;
+fail:
+	spin_lock_irqsave(&dev->event_lock, flags);
+	list_del(&s->head);
+	spin_unlock_irqrestore(&dev->event_lock, flags);
+	return ret;
+}
+
+int
+nouveau_crtc_page_flip(struct drm_crtc *crtc, struct drm_framebuffer *fb,
+		       struct drm_pending_vblank_event *event)
+{
+	struct drm_device *dev = crtc->dev;
+	struct drm_nouveau_private *dev_priv = dev->dev_private;
+	struct nouveau_bo *old_bo = nouveau_framebuffer(crtc->fb)->nvbo;
+	struct nouveau_bo *new_bo = nouveau_framebuffer(fb)->nvbo;
+	struct nouveau_page_flip_state *s;
+	struct nouveau_channel *chan;
+	struct nouveau_fence *fence;
+	int ret;
+
+	if (dev_priv->engine.graph.accel_blocked)
+		return -ENODEV;
+
+	s = kzalloc(sizeof(*s), GFP_KERNEL);
+	if (!s)
+		return -ENOMEM;
+
+	/* Don't let the buffers go away while we flip */
+	ret = nouveau_page_flip_reserve(old_bo, new_bo);
+	if (ret)
+		goto fail_free;
+
+	/* Initialize a page flip struct */
+	*s = (struct nouveau_page_flip_state)
+		{ { }, s->event, nouveau_crtc(crtc)->index,
+		  fb->bits_per_pixel, fb->pitch, crtc->x, crtc->y,
+		  new_bo->bo.offset };
+
+	/* Choose the channel the flip will be handled in */
+	chan = nouveau_fence_channel(new_bo->bo.sync_obj);
+	if (!chan)
+		chan = nouveau_channel_get_unlocked(dev_priv->channel);
+	mutex_lock(&chan->mutex);
+
+	/* Emit a page flip */
+	ret = nouveau_page_flip_emit(chan, old_bo, new_bo, s, &fence);
+	nouveau_channel_put(&chan);
+	if (ret)
+		goto fail_unreserve;
+
+	/* Update the crtc struct and cleanup */
+	crtc->fb = fb;
+
+	nouveau_page_flip_unreserve(old_bo, new_bo, fence);
+	nouveau_fence_unref(&fence);
+	return 0;
+
+fail_unreserve:
+	nouveau_page_flip_unreserve(old_bo, new_bo, NULL);
+fail_free:
+	kfree(s);
+	return ret;
+}
+
+int
+nouveau_finish_page_flip(struct nouveau_channel *chan,
+			 struct nouveau_page_flip_state *ps)
+{
+	struct drm_device *dev = chan->dev;
+	struct nouveau_page_flip_state *s;
+	unsigned long flags;
+
+	spin_lock_irqsave(&dev->event_lock, flags);
+
+	if (list_empty(&chan->nvsw.flip)) {
+		NV_ERROR(dev, "Unexpected pageflip in channel %d.\n", chan->id);
+		spin_unlock_irqrestore(&dev->event_lock, flags);
+		return -EINVAL;
+	}
+
+	s = list_first_entry(&chan->nvsw.flip,
+			     struct nouveau_page_flip_state, head);
+	if (s->event) {
+		struct drm_pending_vblank_event *e = s->event;
+		struct timeval now;
+
+		do_gettimeofday(&now);
+		e->event.sequence = 0;
+		e->event.tv_sec = now.tv_sec;
+		e->event.tv_usec = now.tv_usec;
+		list_add_tail(&e->base.link, &e->base.file_priv->event_list);
+		wake_up_interruptible(&e->base.file_priv->event_wait);
+	}
+
+	list_del(&s->head);
+	*ps = *s;
+	kfree(s);
+
+	spin_unlock_irqrestore(&dev->event_lock, flags);
+	return 0;
+}
diff --git a/drivers/gpu/drm/nouveau/nouveau_drv.h b/drivers/gpu/drm/nouveau/nouveau_drv.h
index 7cf034fd5cd0..2bb1f1572a55 100644
--- a/drivers/gpu/drm/nouveau/nouveau_drv.h
+++ b/drivers/gpu/drm/nouveau/nouveau_drv.h
@@ -166,6 +166,13 @@ struct nouveau_gpuobj {
 	void *priv;
 };
 
+struct nouveau_page_flip_state {
+	struct list_head head;
+	struct drm_pending_vblank_event *event;
+	int crtc, bpp, pitch, x, y;
+	uint64_t offset;
+};
+
 struct nouveau_channel {
 	struct drm_device *dev;
 	int id;
@@ -253,6 +260,7 @@ struct nouveau_channel {
 		uint32_t vblsem_offset;
 		uint32_t vblsem_rval;
 		struct list_head vbl_wait;
+		struct list_head flip;
 	} nvsw;
 
 	struct {
@@ -1076,6 +1084,8 @@ extern void nv04_graph_destroy_context(struct nouveau_channel *);
 extern int  nv04_graph_load_context(struct nouveau_channel *);
 extern int  nv04_graph_unload_context(struct drm_device *);
 extern void nv04_graph_context_switch(struct drm_device *);
+extern int  nv04_graph_mthd_page_flip(struct nouveau_channel *chan,
+				      u32 class, u32 mthd, u32 data);
 
 /* nv10_graph.c */
 extern int  nv10_graph_init(struct drm_device *);
@@ -1249,6 +1259,7 @@ extern u16 nouveau_bo_rd16(struct nouveau_bo *nvbo, unsigned index);
 extern void nouveau_bo_wr16(struct nouveau_bo *nvbo, unsigned index, u16 val);
 extern u32 nouveau_bo_rd32(struct nouveau_bo *nvbo, unsigned index);
 extern void nouveau_bo_wr32(struct nouveau_bo *nvbo, unsigned index, u32 val);
+extern void nouveau_bo_fence(struct nouveau_bo *, struct nouveau_fence *);
 
 /* nouveau_fence.c */
 struct nouveau_fence;
@@ -1315,6 +1326,10 @@ extern int nouveau_gem_ioctl_info(struct drm_device *, void *,
 /* nouveau_display.c */
 int nouveau_vblank_enable(struct drm_device *dev, int crtc);
 void nouveau_vblank_disable(struct drm_device *dev, int crtc);
+int nouveau_crtc_page_flip(struct drm_crtc *crtc, struct drm_framebuffer *fb,
+			   struct drm_pending_vblank_event *event);
+int nouveau_finish_page_flip(struct nouveau_channel *,
+			     struct nouveau_page_flip_state *);
 
 /* nv10_gpio.c */
 int nv10_gpio_get(struct drm_device *dev, enum dcb_gpio_tag tag);
@@ -1514,5 +1529,6 @@ nv_match_device(struct drm_device *dev, unsigned device,
 #define NV_SW_VBLSEM_OFFSET                                          0x00000400
 #define NV_SW_VBLSEM_RELEASE_VALUE                                   0x00000404
 #define NV_SW_VBLSEM_RELEASE                                         0x00000408
+#define NV_SW_PAGE_FLIP                                              0x00000500
 
 #endif /* __NOUVEAU_DRV_H__ */
diff --git a/drivers/gpu/drm/nouveau/nouveau_gem.c b/drivers/gpu/drm/nouveau/nouveau_gem.c
index de8535b58710..9886b644f27d 100644
--- a/drivers/gpu/drm/nouveau/nouveau_gem.c
+++ b/drivers/gpu/drm/nouveau/nouveau_gem.c
@@ -231,15 +231,8 @@ validate_fini_list(struct list_head *list, struct nouveau_fence *fence)
 
 	list_for_each_safe(entry, tmp, list) {
 		nvbo = list_entry(entry, struct nouveau_bo, entry);
-		if (likely(fence)) {
-			struct nouveau_fence *prev_fence;
-
-			spin_lock(&nvbo->bo.bdev->fence_lock);
-			prev_fence = nvbo->bo.sync_obj;
-			nvbo->bo.sync_obj = nouveau_fence_ref(fence);
-			spin_unlock(&nvbo->bo.bdev->fence_lock);
-			nouveau_fence_unref(&prev_fence);
-		}
+
+		nouveau_bo_fence(nvbo, fence);
 
 		if (unlikely(nvbo->validate_mapped)) {
 			ttm_bo_kunmap(&nvbo->kmap);
diff --git a/drivers/gpu/drm/nouveau/nouveau_state.c b/drivers/gpu/drm/nouveau/nouveau_state.c
index d72aa8d19a19..ec9d193b6f61 100644
--- a/drivers/gpu/drm/nouveau/nouveau_state.c
+++ b/drivers/gpu/drm/nouveau/nouveau_state.c
@@ -1090,6 +1090,9 @@ int nouveau_ioctl_getparam(struct drm_device *dev, void *data,
 	case NOUVEAU_GETPARAM_HAS_BO_USAGE:
 		getparam->value = 1;
 		break;
+	case NOUVEAU_GETPARAM_HAS_PAGEFLIP:
+		getparam->value = (dev_priv->card_type < NV_50);
+		break;
 	case NOUVEAU_GETPARAM_GRAPH_UNITS:
 		/* NV40 and NV50 versions are quite different, but register
 		 * address is the same. User is supposed to know the card
diff --git a/drivers/gpu/drm/nouveau/nv04_crtc.c b/drivers/gpu/drm/nouveau/nv04_crtc.c
index b61a7ff8b7b7..e4aea721971f 100644
--- a/drivers/gpu/drm/nouveau/nv04_crtc.c
+++ b/drivers/gpu/drm/nouveau/nv04_crtc.c
@@ -989,6 +989,7 @@ static const struct drm_crtc_funcs nv04_crtc_funcs = {
 	.cursor_move = nv04_crtc_cursor_move,
 	.gamma_set = nv_crtc_gamma_set,
 	.set_config = drm_crtc_helper_set_config,
+	.page_flip = nouveau_crtc_page_flip,
 	.destroy = nv_crtc_destroy,
 };
 
diff --git a/drivers/gpu/drm/nouveau/nv04_graph.c b/drivers/gpu/drm/nouveau/nv04_graph.c
index 81aba097ef95..239519aefce6 100644
--- a/drivers/gpu/drm/nouveau/nv04_graph.c
+++ b/drivers/gpu/drm/nouveau/nv04_graph.c
@@ -26,6 +26,7 @@
 #include "drm.h"
 #include "nouveau_drm.h"
 #include "nouveau_drv.h"
+#include "nouveau_hw.h"
 
 static int nv04_graph_register(struct drm_device *dev);
 
@@ -553,6 +554,20 @@ nv04_graph_mthd_set_ref(struct nouveau_channel *chan,
 	return 0;
 }
 
+int
+nv04_graph_mthd_page_flip(struct nouveau_channel *chan,
+			  u32 class, u32 mthd, u32 data)
+{
+	struct drm_device *dev = chan->dev;
+	struct nouveau_page_flip_state s;
+
+	if (!nouveau_finish_page_flip(chan, &s))
+		nv_set_crtc_base(dev, s.crtc,
+				 s.offset + s.y * s.pitch + s.x * s.bpp / 8);
+
+	return 0;
+}
+
 /*
  * Software methods, why they are needed, and how they all work:
  *
@@ -1204,6 +1219,7 @@ nv04_graph_register(struct drm_device *dev)
 	/* nvsw */
 	NVOBJ_CLASS(dev, 0x506e, SW);
 	NVOBJ_MTHD (dev, 0x506e, 0x0150, nv04_graph_mthd_set_ref);
+	NVOBJ_MTHD (dev, 0x506e, 0x0500, nv04_graph_mthd_page_flip);
 
 	dev_priv->engine.graph.registered = true;
 	return 0;
diff --git a/drivers/gpu/drm/nouveau/nv10_graph.c b/drivers/gpu/drm/nouveau/nv10_graph.c
index a571bfd4471b..3fbb49dfc09c 100644
--- a/drivers/gpu/drm/nouveau/nv10_graph.c
+++ b/drivers/gpu/drm/nouveau/nv10_graph.c
@@ -1113,6 +1113,10 @@ nv10_graph_register(struct drm_device *dev)
 		NVOBJ_MTHD (dev, 0x0099, 0x1658, nv17_graph_mthd_lma_enable);
 	}
 
+	/* nvsw */
+	NVOBJ_CLASS(dev, 0x506e, SW);
+	NVOBJ_MTHD (dev, 0x506e, 0x0500, nv04_graph_mthd_page_flip);
+
 	dev_priv->engine.graph.registered = true;
 	return 0;
 }
diff --git a/drivers/gpu/drm/nouveau/nv20_graph.c b/drivers/gpu/drm/nouveau/nv20_graph.c
index 7720bccb3c98..51b9dd12949d 100644
--- a/drivers/gpu/drm/nouveau/nv20_graph.c
+++ b/drivers/gpu/drm/nouveau/nv20_graph.c
@@ -801,6 +801,10 @@ nv20_graph_register(struct drm_device *dev)
 	else
 		NVOBJ_CLASS(dev, 0x0597, GR);
 
+	/* nvsw */
+	NVOBJ_CLASS(dev, 0x506e, SW);
+	NVOBJ_MTHD (dev, 0x506e, 0x0500, nv04_graph_mthd_page_flip);
+
 	dev_priv->engine.graph.registered = true;
 	return 0;
 }
@@ -841,6 +845,10 @@ nv30_graph_register(struct drm_device *dev)
 	if (0x000001e0 & (1 << (dev_priv->chipset & 0x0f)))
 		NVOBJ_CLASS(dev, 0x0497, GR);
 
+	/* nvsw */
+	NVOBJ_CLASS(dev, 0x506e, SW);
+	NVOBJ_MTHD (dev, 0x506e, 0x0500, nv04_graph_mthd_page_flip);
+
 	dev_priv->engine.graph.registered = true;
 	return 0;
 }
diff --git a/drivers/gpu/drm/nouveau/nv40_graph.c b/drivers/gpu/drm/nouveau/nv40_graph.c
index b9361e28687c..159bdcd757d4 100644
--- a/drivers/gpu/drm/nouveau/nv40_graph.c
+++ b/drivers/gpu/drm/nouveau/nv40_graph.c
@@ -446,6 +446,10 @@ nv40_graph_register(struct drm_device *dev)
 	else
 		NVOBJ_CLASS(dev, 0x4097, GR);
 
+	/* nvsw */
+	NVOBJ_CLASS(dev, 0x506e, SW);
+	NVOBJ_MTHD (dev, 0x506e, 0x0500, nv04_graph_mthd_page_flip);
+
 	dev_priv->engine.graph.registered = true;
 	return 0;
 }
diff --git a/drivers/gpu/drm/nouveau/nv50_crtc.c b/drivers/gpu/drm/nouveau/nv50_crtc.c
index 56476d0c6de8..1225ea0a841b 100644
--- a/drivers/gpu/drm/nouveau/nv50_crtc.c
+++ b/drivers/gpu/drm/nouveau/nv50_crtc.c
@@ -437,6 +437,7 @@ static const struct drm_crtc_funcs nv50_crtc_funcs = {
 	.cursor_move = nv50_crtc_cursor_move,
 	.gamma_set = nv50_crtc_gamma_set,
 	.set_config = drm_crtc_helper_set_config,
+	.page_flip = nouveau_crtc_page_flip,
 	.destroy = nv50_crtc_destroy,
 };
 
diff --git a/drivers/gpu/drm/nouveau/nv50_graph.c b/drivers/gpu/drm/nouveau/nv50_graph.c
index ac7f62d524bb..6d81f4dab37d 100644
--- a/drivers/gpu/drm/nouveau/nv50_graph.c
+++ b/drivers/gpu/drm/nouveau/nv50_graph.c
@@ -29,6 +29,8 @@
 #include "nouveau_drv.h"
 #include "nouveau_ramht.h"
 #include "nouveau_grctx.h"
+#include "nouveau_dma.h"
+#include "nv50_evo.h"
 
 static int nv50_graph_register(struct drm_device *);
 
@@ -389,6 +391,19 @@ nv50_graph_nvsw_vblsem_release(struct nouveau_channel *chan,
 	return 0;
 }
 
+static int
+nv50_graph_nvsw_mthd_page_flip(struct nouveau_channel *chan,
+			       u32 class, u32 mthd, u32 data)
+{
+	struct nouveau_page_flip_state s;
+
+	if (!nouveau_finish_page_flip(chan, &s)) {
+		/* XXX - Do something here */
+	}
+
+	return 0;
+}
+
 static int
 nv50_graph_register(struct drm_device *dev)
 {
@@ -402,6 +417,7 @@ nv50_graph_register(struct drm_device *dev)
 	NVOBJ_MTHD (dev, 0x506e, 0x0400, nv50_graph_nvsw_vblsem_offset);
 	NVOBJ_MTHD (dev, 0x506e, 0x0404, nv50_graph_nvsw_vblsem_release_val);
 	NVOBJ_MTHD (dev, 0x506e, 0x0408, nv50_graph_nvsw_vblsem_release);
+	NVOBJ_MTHD (dev, 0x506e, 0x0500, nv50_graph_nvsw_mthd_page_flip);
 
 	NVOBJ_CLASS(dev, 0x0030, GR); /* null */
 	NVOBJ_CLASS(dev, 0x5039, GR); /* m2mf */
diff --git a/include/drm/nouveau_drm.h b/include/drm/nouveau_drm.h
index 60a7b3e9c2e9..39aa0fd6a529 100644
--- a/include/drm/nouveau_drm.h
+++ b/include/drm/nouveau_drm.h
@@ -81,6 +81,7 @@ struct drm_nouveau_gpuobj_free {
 #define NOUVEAU_GETPARAM_GRAPH_UNITS     13
 #define NOUVEAU_GETPARAM_PTIMER_TIME     14
 #define NOUVEAU_GETPARAM_HAS_BO_USAGE    15
+#define NOUVEAU_GETPARAM_HAS_PAGEFLIP    16
 struct drm_nouveau_getparam {
 	uint64_t param;
 	uint64_t value;
-- 
cgit v1.2.3


From 6d6c5a157af45a5bd50ab913b07d826811a9ea0a Mon Sep 17 00:00:00 2001
From: Ben Skeggs
Date: Tue, 16 Nov 2010 10:17:53 +1000
Subject: drm/nouveau: remove some useless GETPARAMs

These have been unused since UMS support was ripped out, so lets remove
them completely.

Signed-off-by: Ben Skeggs <bskeggs@redhat.com>
---
 drivers/gpu/drm/nouveau/nouveau_state.c | 17 +----------------
 include/drm/nouveau_drm.h               |  3 ---
 2 files changed, 1 insertion(+), 19 deletions(-)

(limited to 'include/drm')

diff --git a/drivers/gpu/drm/nouveau/nouveau_state.c b/drivers/gpu/drm/nouveau/nouveau_state.c
index 75e70022fbd3..35b28406caf6 100644
--- a/drivers/gpu/drm/nouveau/nouveau_state.c
+++ b/drivers/gpu/drm/nouveau/nouveau_state.c
@@ -1074,21 +1074,6 @@ int nouveau_ioctl_getparam(struct drm_device *dev, void *data,
 		else
 			getparam->value = NV_PCI;
 		break;
-	case NOUVEAU_GETPARAM_FB_PHYSICAL:
-		getparam->value = dev_priv->fb_phys;
-		break;
-	case NOUVEAU_GETPARAM_AGP_PHYSICAL:
-		getparam->value = dev_priv->gart_info.aper_base;
-		break;
-	case NOUVEAU_GETPARAM_PCI_PHYSICAL:
-		if (dev->sg) {
-			getparam->value = (unsigned long)dev->sg->virtual;
-		} else {
-			NV_ERROR(dev, "Requested PCIGART address, "
-					"while no PCIGART was created\n");
-			return -EINVAL;
-		}
-		break;
 	case NOUVEAU_GETPARAM_FB_SIZE:
 		getparam->value = dev_priv->fb_available_size;
 		break;
@@ -1096,7 +1081,7 @@ int nouveau_ioctl_getparam(struct drm_device *dev, void *data,
 		getparam->value = dev_priv->gart_info.aper_size;
 		break;
 	case NOUVEAU_GETPARAM_VM_VRAM_BASE:
-		getparam->value = dev_priv->vm_vram_base;
+		getparam->value = 0; /* deprecated */
 		break;
 	case NOUVEAU_GETPARAM_PTIMER_TIME:
 		getparam->value = dev_priv->engine.timer.read(dev);
diff --git a/include/drm/nouveau_drm.h b/include/drm/nouveau_drm.h
index 39aa0fd6a529..e2cfe80f6fca 100644
--- a/include/drm/nouveau_drm.h
+++ b/include/drm/nouveau_drm.h
@@ -71,11 +71,8 @@ struct drm_nouveau_gpuobj_free {
 #define NOUVEAU_GETPARAM_PCI_VENDOR      3
 #define NOUVEAU_GETPARAM_PCI_DEVICE      4
 #define NOUVEAU_GETPARAM_BUS_TYPE        5
-#define NOUVEAU_GETPARAM_FB_PHYSICAL     6
-#define NOUVEAU_GETPARAM_AGP_PHYSICAL    7
 #define NOUVEAU_GETPARAM_FB_SIZE         8
 #define NOUVEAU_GETPARAM_AGP_SIZE        9
-#define NOUVEAU_GETPARAM_PCI_PHYSICAL    10
 #define NOUVEAU_GETPARAM_CHIPSET_ID      11
 #define NOUVEAU_GETPARAM_VM_VRAM_BASE    12
 #define NOUVEAU_GETPARAM_GRAPH_UNITS     13
-- 
cgit v1.2.3


From 72bfa19c8deb4d1db5ad068c34fd580cb295cbe8 Mon Sep 17 00:00:00 2001
From: Chris Wilson
Date: Sun, 19 Dec 2010 11:42:05 +0000
Subject: drm/i915: Allow the application to choose the constant addressing
 mode

The relative-to-general state default is useless as it means having to
rewrite the streaming kernels for each batch. Relative-to-surface is
more useful, as that stream usually needs to be rewritten for each
batch. And absolute addressing mode, vital if you start streaming
state, is also only available by adjusting the register...

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_dma.c            |  3 +++
 drivers/gpu/drm/i915/i915_drv.h            |  1 +
 drivers/gpu/drm/i915/i915_gem.c            |  2 ++
 drivers/gpu/drm/i915/i915_gem_execbuffer.c | 35 +++++++++++++++++++++++++++++-
 include/drm/i915_drm.h                     | 12 ++++++++++
 5 files changed, 52 insertions(+), 1 deletion(-)

(limited to 'include/drm')

diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
index 3f7b20392e26..18746e6cb129 100644
--- a/drivers/gpu/drm/i915/i915_dma.c
+++ b/drivers/gpu/drm/i915/i915_dma.c
@@ -778,6 +778,9 @@ static int i915_getparam(struct drm_device *dev, void *data,
 	case I915_PARAM_HAS_COHERENT_RINGS:
 		value = 1;
 		break;
+	case I915_PARAM_HAS_EXEC_CONSTANTS:
+		value = INTEL_INFO(dev)->gen >= 4;
+		break;
 	default:
 		DRM_DEBUG_DRIVER("Unknown parameter %d\n",
 				 param->param);
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 2a653cc80395..aac1bf332f75 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -258,6 +258,7 @@ typedef struct drm_i915_private {
 	const struct intel_device_info *info;
 
 	int has_gem;
+	int relative_constants_mode;
 
 	void __iomem *regs;
 
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 5a0fbe59dd5b..c79c0b62ef60 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -3735,6 +3735,8 @@ i915_gem_load(struct drm_device *dev)
 		}
 	}
 
+	dev_priv->relative_constants_mode = I915_EXEC_CONSTANTS_REL_GENERAL;
+
 	/* Old X drivers will take 0-2 for front, back, depth buffers */
 	if (!drm_core_check_feature(dev, DRIVER_MODESET))
 		dev_priv->fence_reg_start = 3;
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index fda0dc858a1f..61129e6759eb 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -957,7 +957,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
 	struct intel_ring_buffer *ring;
 	u32 exec_start, exec_len;
 	u32 seqno;
-	int ret, i;
+	int ret, mode, i;
 
 	if (!i915_gem_check_execbuffer(args)) {
 		DRM_ERROR("execbuf with invalid offset/length\n");
@@ -997,6 +997,39 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
 		return -EINVAL;
 	}
 
+	mode = args->flags & I915_EXEC_CONSTANTS_MASK;
+	switch (mode) {
+	case I915_EXEC_CONSTANTS_REL_GENERAL:
+	case I915_EXEC_CONSTANTS_ABSOLUTE:
+	case I915_EXEC_CONSTANTS_REL_SURFACE:
+		if (ring == &dev_priv->ring[RCS] &&
+		    mode != dev_priv->relative_constants_mode) {
+			if (INTEL_INFO(dev)->gen < 4)
+				return -EINVAL;
+
+			if (INTEL_INFO(dev)->gen > 5 &&
+			    mode == I915_EXEC_CONSTANTS_REL_SURFACE)
+				return -EINVAL;
+
+			ret = intel_ring_begin(ring, 4);
+			if (ret)
+				return ret;
+
+			intel_ring_emit(ring, MI_NOOP);
+			intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
+			intel_ring_emit(ring, INSTPM);
+			intel_ring_emit(ring,
+					I915_EXEC_CONSTANTS_MASK << 16 | mode);
+			intel_ring_advance(ring);
+
+			dev_priv->relative_constants_mode = mode;
+		}
+		break;
+	default:
+		DRM_ERROR("execbuf with unknown constants: %d\n", mode);
+		return -EINVAL;
+	}
+
 	if (args->buffer_count < 1) {
 		DRM_ERROR("execbuf with %d buffers\n", args->buffer_count);
 		return -EINVAL;
diff --git a/include/drm/i915_drm.h b/include/drm/i915_drm.h
index a2776e2807a4..0039f1f97ad8 100644
--- a/include/drm/i915_drm.h
+++ b/include/drm/i915_drm.h
@@ -289,6 +289,7 @@ typedef struct drm_i915_irq_wait {
 #define I915_PARAM_HAS_BLT		 11
 #define I915_PARAM_HAS_RELAXED_FENCING	 12
 #define I915_PARAM_HAS_COHERENT_RINGS	 13
+#define I915_PARAM_HAS_EXEC_CONSTANTS	 14
 
 typedef struct drm_i915_getparam {
 	int param;
@@ -635,6 +636,17 @@ struct drm_i915_gem_execbuffer2 {
 #define I915_EXEC_RENDER                 (1<<0)
 #define I915_EXEC_BSD                    (2<<0)
 #define I915_EXEC_BLT                    (3<<0)
+
+/* Used for switching the constants addressing mode on gen4+ RENDER ring.
+ * Gen6+ only supports relative addressing to dynamic state (default) and
+ * absolute addressing.
+ *
+ * These flags are ignored for the BSD and BLT rings.
+ */
+#define I915_EXEC_CONSTANTS_MASK 	(3<<6)
+#define I915_EXEC_CONSTANTS_REL_GENERAL (0<<6) /* default */
+#define I915_EXEC_CONSTANTS_ABSOLUTE 	(1<<6)
+#define I915_EXEC_CONSTANTS_REL_SURFACE (2<<6) /* gen4/5 only */
 	__u64 flags;
 	__u64 rsvd1;
 	__u64 rsvd2;
-- 
cgit v1.2.3


From 5bcf719b7db0f9366cedaf102b081f99b1c325ae Mon Sep 17 00:00:00 2001
From: Dave Airlie
Date: Tue, 7 Dec 2010 09:20:40 +1000
Subject: drm/switcheroo: track state of switch in drivers.

We need to track the state of the switch in drivers, so that after s/r
we don't resume the card we've explicitly switched off before. Also
don't allow a userspace open to occur if we've switched the gpu off.

Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/drm_fops.c              |  2 ++
 drivers/gpu/drm/i915/i915_dma.c         |  4 ++++
 drivers/gpu/drm/i915/i915_drv.c         | 12 +++++++++++-
 drivers/gpu/drm/nouveau/nouveau_drv.c   |  6 ++++++
 drivers/gpu/drm/nouveau/nouveau_drv.h   |  2 ++
 drivers/gpu/drm/nouveau/nouveau_state.c |  4 ++++
 drivers/gpu/drm/radeon/radeon.h         |  1 -
 drivers/gpu/drm/radeon/radeon_device.c  | 12 ++++++------
 drivers/gpu/drm/radeon/radeon_kms.c     |  4 ----
 include/drm/drmP.h                      |  6 +++++-
 10 files changed, 40 insertions(+), 13 deletions(-)

(limited to 'include/drm')

diff --git a/drivers/gpu/drm/drm_fops.c b/drivers/gpu/drm/drm_fops.c
index a39794bac04b..2ec7d48fc4a8 100644
--- a/drivers/gpu/drm/drm_fops.c
+++ b/drivers/gpu/drm/drm_fops.c
@@ -236,6 +236,8 @@ static int drm_open_helper(struct inode *inode, struct file *filp,
 		return -EBUSY;	/* No exclusive opens */
 	if (!drm_cpu_valid())
 		return -EINVAL;
+	if (dev->switch_power_state != DRM_SWITCH_POWER_ON)
+		return -EINVAL;
 
 	DRM_DEBUG("pid = %d, minor = %d\n", task_pid_nr(current), minor_id);
 
diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
index ec1f650f6fab..0568dbdc10ef 100644
--- a/drivers/gpu/drm/i915/i915_dma.c
+++ b/drivers/gpu/drm/i915/i915_dma.c
@@ -1151,12 +1151,16 @@ static void i915_switcheroo_set_state(struct pci_dev *pdev, enum vga_switcheroo_
 	pm_message_t pmm = { .event = PM_EVENT_SUSPEND };
 	if (state == VGA_SWITCHEROO_ON) {
 		printk(KERN_INFO "i915: switched on\n");
+		dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
 		/* i915 resume handler doesn't set to D0 */
 		pci_set_power_state(dev->pdev, PCI_D0);
 		i915_resume(dev);
+		dev->switch_power_state = DRM_SWITCH_POWER_ON;
 	} else {
 		printk(KERN_ERR "i915: switched off\n");
+		dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
 		i915_suspend(dev, pmm);
+		dev->switch_power_state = DRM_SWITCH_POWER_OFF;
 	}
 }
 
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 9eee6cf7901e..872493331988 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -271,6 +271,8 @@ static int i915_drm_freeze(struct drm_device *dev)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
 
+	drm_kms_helper_poll_disable(dev);
+
 	pci_save_state(dev->pdev);
 
 	/* If KMS is active, we do the leavevt stuff here */
@@ -307,7 +309,9 @@ int i915_suspend(struct drm_device *dev, pm_message_t state)
 	if (state.event == PM_EVENT_PRETHAW)
 		return 0;
 
-	drm_kms_helper_poll_disable(dev);
+
+	if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
+		return 0;
 
 	error = i915_drm_freeze(dev);
 	if (error)
@@ -361,6 +365,9 @@ int i915_resume(struct drm_device *dev)
 {
 	int ret;
 
+	if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
+		return 0;
+
 	if (pci_enable_device(dev->pdev))
 		return -EIO;
 
@@ -569,6 +576,9 @@ static int i915_pm_suspend(struct device *dev)
 		return -ENODEV;
 	}
 
+	if (drm_dev->switch_power_state == DRM_SWITCH_POWER_OFF)
+		return 0;
+
 	error = i915_drm_freeze(drm_dev);
 	if (error)
 		return error;
diff --git a/drivers/gpu/drm/nouveau/nouveau_drv.c b/drivers/gpu/drm/nouveau/nouveau_drv.c
index bb170570938b..13bb672a16f4 100644
--- a/drivers/gpu/drm/nouveau/nouveau_drv.c
+++ b/drivers/gpu/drm/nouveau/nouveau_drv.c
@@ -171,6 +171,9 @@ nouveau_pci_suspend(struct pci_dev *pdev, pm_message_t pm_state)
 	if (pm_state.event == PM_EVENT_PRETHAW)
 		return 0;
 
+	if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
+		return 0;
+
 	NV_INFO(dev, "Disabling fbcon acceleration...\n");
 	nouveau_fbcon_save_disable_accel(dev);
 
@@ -254,6 +257,9 @@ nouveau_pci_resume(struct pci_dev *pdev)
 	struct drm_crtc *crtc;
 	int ret, i;
 
+	if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
+		return 0;
+
 	nouveau_fbcon_save_disable_accel(dev);
 
 	NV_INFO(dev, "We're back, enabling device...\n");
diff --git a/drivers/gpu/drm/nouveau/nouveau_drv.h b/drivers/gpu/drm/nouveau/nouveau_drv.h
index e81575687354..e59f5bcab1ad 100644
--- a/drivers/gpu/drm/nouveau/nouveau_drv.h
+++ b/drivers/gpu/drm/nouveau/nouveau_drv.h
@@ -753,6 +753,8 @@ struct drm_nouveau_private {
 
 	struct nouveau_fbdev *nfbdev;
 	struct apertures_struct *apertures;
+
+	bool powered_down;
 };
 
 static inline struct drm_nouveau_private *
diff --git a/drivers/gpu/drm/nouveau/nouveau_state.c b/drivers/gpu/drm/nouveau/nouveau_state.c
index 1b87eee22fa9..a54fc431fe98 100644
--- a/drivers/gpu/drm/nouveau/nouveau_state.c
+++ b/drivers/gpu/drm/nouveau/nouveau_state.c
@@ -596,12 +596,16 @@ static void nouveau_switcheroo_set_state(struct pci_dev *pdev,
 	pm_message_t pmm = { .event = PM_EVENT_SUSPEND };
 	if (state == VGA_SWITCHEROO_ON) {
 		printk(KERN_ERR "VGA switcheroo: switched nouveau on\n");
+		dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
 		nouveau_pci_resume(pdev);
 		drm_kms_helper_poll_enable(dev);
+		dev->switch_power_state = DRM_SWITCH_POWER_ON;
 	} else {
 		printk(KERN_ERR "VGA switcheroo: switched nouveau off\n");
+		dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
 		drm_kms_helper_poll_disable(dev);
 		nouveau_pci_suspend(pdev, pmm);
+		dev->switch_power_state = DRM_SWITCH_POWER_OFF;
 	}
 }
 
diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
index d2697f8f2da8..140eaceab279 100644
--- a/drivers/gpu/drm/radeon/radeon.h
+++ b/drivers/gpu/drm/radeon/radeon.h
@@ -1167,7 +1167,6 @@ struct radeon_device {
 	uint8_t			audio_status_bits;
 	uint8_t			audio_category_code;
 
-	bool powered_down;
 	struct notifier_block acpi_nb;
 	/* only one userspace can use Hyperz features at a time */
 	struct drm_file *hyperz_filp;
diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c
index 1a1017f0d9db..4ee0c53b28a7 100644
--- a/drivers/gpu/drm/radeon/radeon_device.c
+++ b/drivers/gpu/drm/radeon/radeon_device.c
@@ -642,20 +642,20 @@ void radeon_check_arguments(struct radeon_device *rdev)
 static void radeon_switcheroo_set_state(struct pci_dev *pdev, enum vga_switcheroo_state state)
 {
 	struct drm_device *dev = pci_get_drvdata(pdev);
-	struct radeon_device *rdev = dev->dev_private;
 	pm_message_t pmm = { .event = PM_EVENT_SUSPEND };
 	if (state == VGA_SWITCHEROO_ON) {
 		printk(KERN_INFO "radeon: switched on\n");
 		/* don't suspend or resume card normally */
-		rdev->powered_down = false;
+		dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
 		radeon_resume_kms(dev);
+		dev->switch_power_state = DRM_SWITCH_POWER_ON;
 		drm_kms_helper_poll_enable(dev);
 	} else {
 		printk(KERN_INFO "radeon: switched off\n");
 		drm_kms_helper_poll_disable(dev);
+		dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
 		radeon_suspend_kms(dev, pmm);
-		/* don't suspend or resume card normally */
-		rdev->powered_down = true;
+		dev->switch_power_state = DRM_SWITCH_POWER_OFF;
 	}
 }
 
@@ -842,7 +842,7 @@ int radeon_suspend_kms(struct drm_device *dev, pm_message_t state)
 	}
 	rdev = dev->dev_private;
 
-	if (rdev->powered_down)
+	if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
 		return 0;
 
 	/* turn off display hw */
@@ -900,7 +900,7 @@ int radeon_resume_kms(struct drm_device *dev)
 	struct drm_connector *connector;
 	struct radeon_device *rdev = dev->dev_private;
 
-	if (rdev->powered_down)
+	if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
 		return 0;
 
 	acquire_console_sem();
diff --git a/drivers/gpu/drm/radeon/radeon_kms.c b/drivers/gpu/drm/radeon/radeon_kms.c
index 4bf423ca4c12..b2686334d46b 100644
--- a/drivers/gpu/drm/radeon/radeon_kms.c
+++ b/drivers/gpu/drm/radeon/radeon_kms.c
@@ -203,10 +203,6 @@ int radeon_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
  */
 int radeon_driver_firstopen_kms(struct drm_device *dev)
 {
-	struct radeon_device *rdev = dev->dev_private;
-
-	if (rdev->powered_down)
-		return -EINVAL;
 	return 0;
 }
 
diff --git a/include/drm/drmP.h b/include/drm/drmP.h
index 0f14f94ed8f4..a4694c610330 100644
--- a/include/drm/drmP.h
+++ b/include/drm/drmP.h
@@ -1121,9 +1121,13 @@ struct drm_device {
 	spinlock_t object_name_lock;
 	struct idr object_name_idr;
 	/*@} */
-
+	int switch_power_state;
 };
 
+#define DRM_SWITCH_POWER_ON 0
+#define DRM_SWITCH_POWER_OFF 1
+#define DRM_SWITCH_POWER_CHANGING 2
+
 static __inline__ int drm_core_check_feature(struct drm_device *dev,
 					     int feature)
 {
-- 
cgit v1.2.3


From 9eba4a93ce520a627e876b0d1851d4f78a701c2b Mon Sep 17 00:00:00 2001
From: Marek Olšák
Date: Wed, 5 Jan 2011 05:46:48 +0100
Subject: drm/radeon/kms: manage r300 CMASK RAM access and allow CMASK clear

The CMASK RAM is for colorbuffer compression (used in conjunction
with MSAA). Only one user (filp) can access it.

The CMASK RAM access is managed in the same way as Hyper-Z, but there is
a separate ioctl, because an app that uses MSAA does not necessarily
have to use zbuffering.

Signed-off-by: Marek Olšák <maraeo@gmail.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/radeon/r300.c       |  9 +++++++++
 drivers/gpu/drm/radeon/r300d.h      |  1 +
 drivers/gpu/drm/radeon/radeon.h     |  3 ++-
 drivers/gpu/drm/radeon/radeon_drv.c |  2 +-
 drivers/gpu/drm/radeon/radeon_kms.c | 39 +++++++++++++++++++++++++------------
 include/drm/radeon_drm.h            |  1 +
 6 files changed, 41 insertions(+), 14 deletions(-)

(limited to 'include/drm')

diff --git a/drivers/gpu/drm/radeon/r300.c b/drivers/gpu/drm/radeon/r300.c
index 36b4f7b48d6a..23fee54c3b75 100644
--- a/drivers/gpu/drm/radeon/r300.c
+++ b/drivers/gpu/drm/radeon/r300.c
@@ -745,6 +745,11 @@ static int r300_packet0_check(struct radeon_cs_parser *p,
 		break;
 	case 0x4E00:
 		/* RB3D_CCTL */
+		if ((idx_value & (1 << 10)) && /* CMASK_ENABLE */
+		    p->rdev->cmask_filp != p->filp) {
+			DRM_ERROR("Invalid RB3D_CCTL: Cannot enable CMASK.\n");
+			return -EINVAL;
+		}
 		track->num_cb = ((idx_value >> 5) & 0x3) + 1;
 		break;
 	case 0x4E38:
@@ -1206,6 +1211,10 @@ static int r300_packet3_check(struct radeon_cs_parser *p,
 		if (p->rdev->hyperz_filp != p->filp)
 			return -EINVAL;
 		break;
+	case PACKET3_3D_CLEAR_CMASK:
+		if (p->rdev->cmask_filp != p->filp)
+			return -EINVAL;
+		break;
 	case PACKET3_NOP:
 		break;
 	default:
diff --git a/drivers/gpu/drm/radeon/r300d.h b/drivers/gpu/drm/radeon/r300d.h
index 0c036c60d9df..1f519a5ffb8c 100644
--- a/drivers/gpu/drm/radeon/r300d.h
+++ b/drivers/gpu/drm/radeon/r300d.h
@@ -54,6 +54,7 @@
 #define		PACKET3_3D_DRAW_IMMD_2		0x35
 #define		PACKET3_3D_DRAW_INDX_2		0x36
 #define		PACKET3_3D_CLEAR_HIZ		0x37
+#define		PACKET3_3D_CLEAR_CMASK		0x38
 #define		PACKET3_BITBLT_MULTI		0x9B
 
 #define PACKET0(reg, n)	(CP_PACKET0 |					\
diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
index 140eaceab279..a835d95021d1 100644
--- a/drivers/gpu/drm/radeon/radeon.h
+++ b/drivers/gpu/drm/radeon/radeon.h
@@ -1168,8 +1168,9 @@ struct radeon_device {
 	uint8_t			audio_category_code;
 
 	struct notifier_block acpi_nb;
-	/* only one userspace can use Hyperz features at a time */
+	/* only one userspace can use Hyperz features or CMASK at a time */
 	struct drm_file *hyperz_filp;
+	struct drm_file *cmask_filp;
 	/* i2c buses */
 	struct radeon_i2c_chan *i2c_bus[RADEON_MAX_I2C_BUS];
 };
diff --git a/drivers/gpu/drm/radeon/radeon_drv.c b/drivers/gpu/drm/radeon/radeon_drv.c
index 18225f21ee0f..be5cb4f28c29 100644
--- a/drivers/gpu/drm/radeon/radeon_drv.c
+++ b/drivers/gpu/drm/radeon/radeon_drv.c
@@ -48,7 +48,7 @@
  * - 2.5.0 - add get accel 2 to work around ddx breakage for evergreen
  * - 2.6.0 - add tiling config query (r6xx+), add initial HiZ support (r300->r500)
  *   2.7.0 - fixups for r600 2D tiling support. (no external ABI change), add eg dyn gpr regs
- *   2.8.0 - pageflip support, r500 US_FORMAT regs. r500 ARGB2101010 colorbuf
+ *   2.8.0 - pageflip support, r500 US_FORMAT regs. r500 ARGB2101010 colorbuf, r300->r500 CMASK
  */
 #define KMS_DRIVER_MAJOR	2
 #define KMS_DRIVER_MINOR	8
diff --git a/drivers/gpu/drm/radeon/radeon_kms.c b/drivers/gpu/drm/radeon/radeon_kms.c
index b2686334d46b..28a53e4a925f 100644
--- a/drivers/gpu/drm/radeon/radeon_kms.c
+++ b/drivers/gpu/drm/radeon/radeon_kms.c
@@ -96,9 +96,27 @@ out:
 	return r;
 }
 
+static void radeon_set_filp_rights(struct drm_device *dev,
+				   struct drm_file **owner,
+				   struct drm_file *applier,
+				   uint32_t *value)
+{
+	mutex_lock(&dev->struct_mutex);
+	if (*value == 1) {
+		/* wants rights */
+		if (!*owner)
+			*owner = applier;
+	} else if (*value == 0) {
+		/* revokes rights */
+		if (*owner == applier)
+			*owner = NULL;
+	}
+	*value = *owner == applier ? 1 : 0;
+	mutex_unlock(&dev->struct_mutex);
+}
 
 /*
- * Userspace get informations ioctl
+ * Userspace get information ioctl
  */
 int radeon_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
 {
@@ -173,18 +191,15 @@ int radeon_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
 			DRM_DEBUG_KMS("WANT_HYPERZ: invalid value %d\n", value);
 			return -EINVAL;
 		}
-		mutex_lock(&dev->struct_mutex);
-		if (value == 1) {
-			/* wants hyper-z */
-			if (!rdev->hyperz_filp)
-				rdev->hyperz_filp = filp;
-		} else if (value == 0) {
-			/* revokes hyper-z */
-			if (rdev->hyperz_filp == filp)
-				rdev->hyperz_filp = NULL;
+		radeon_set_filp_rights(dev, &rdev->hyperz_filp, filp, &value);
+		break;
+	case RADEON_INFO_WANT_CMASK:
+		/* The same logic as Hyper-Z. */
+		if (value >= 2) {
+			DRM_DEBUG_KMS("WANT_CMASK: invalid value %d\n", value);
+			return -EINVAL;
 		}
-		value = rdev->hyperz_filp == filp ?  1 : 0;
-		mutex_unlock(&dev->struct_mutex);
+		radeon_set_filp_rights(dev, &rdev->cmask_filp, filp, &value);
 		break;
 	default:
 		DRM_DEBUG_KMS("Invalid request %d\n", info->request);
diff --git a/include/drm/radeon_drm.h b/include/drm/radeon_drm.h
index 10f8b53bdd40..e95a86b8b689 100644
--- a/include/drm/radeon_drm.h
+++ b/include/drm/radeon_drm.h
@@ -906,6 +906,7 @@ struct drm_radeon_cs {
 #define RADEON_INFO_ACCEL_WORKING2	0x05
 #define RADEON_INFO_TILING_CONFIG	0x06
 #define RADEON_INFO_WANT_HYPERZ		0x07
+#define RADEON_INFO_WANT_CMASK		0x08 /* get access to CMASK on r300 */
 
 struct drm_radeon_info {
 	uint32_t		request;
-- 
cgit v1.2.3


From dfe63bb0ad9810db13aab0058caba97866e0a681 Mon Sep 17 00:00:00 2001
From: James Simmons
Date: Thu, 23 Dec 2010 16:40:37 +0000
Subject: drm: Update fbdev fb_fix_screeninfo

If you change the color depth via fbset or some other framebuffer aware
userland application struct fb_fix_screeninfo is not updated to this new
information. This patch fixes this issue. Also the function is changed to
just pass in struct drm_framebuffer so in the future we could use more
fields. I'm hoping some day fix->smem* could be set here :-)

Signed-off-by: James Simmons <jsimmons@infradead.org>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/drm_fb_helper.c         | 41 +++++++++++++++++----------------
 drivers/gpu/drm/i915/intel_fb.c         |  1 -
 drivers/gpu/drm/nouveau/nouveau_fbcon.c |  1 -
 drivers/gpu/drm/radeon/radeon_fb.c      |  2 --
 include/drm/drm_fb_helper.h             |  3 ---
 5 files changed, 21 insertions(+), 27 deletions(-)

(limited to 'include/drm')

diff --git a/drivers/gpu/drm/drm_fb_helper.c b/drivers/gpu/drm/drm_fb_helper.c
index 5c4f9b9ecdc0..0307d601f5e5 100644
--- a/drivers/gpu/drm/drm_fb_helper.c
+++ b/drivers/gpu/drm/drm_fb_helper.c
@@ -607,6 +607,25 @@ void drm_fb_helper_fini(struct drm_fb_helper *fb_helper)
 }
 EXPORT_SYMBOL(drm_fb_helper_fini);
 
+void drm_fb_helper_fill_fix(struct fb_info *info, struct drm_framebuffer *fb)
+{
+	info->fix.type = FB_TYPE_PACKED_PIXELS;
+	info->fix.visual = fb->depth == 8 ? FB_VISUAL_PSEUDOCOLOR :
+		FB_VISUAL_TRUECOLOR;
+	info->fix.mmio_start = 0;
+	info->fix.mmio_len = 0;
+	info->fix.type_aux = 0;
+	info->fix.xpanstep = 1; /* doing it in hw */
+	info->fix.ypanstep = 1; /* doing it in hw */
+	info->fix.ywrapstep = 0;
+	info->fix.accel = FB_ACCEL_NONE;
+	info->fix.type_aux = 0;
+
+	info->fix.line_length = fb->pitch;
+	return;
+}
+EXPORT_SYMBOL(drm_fb_helper_fill_fix);
+
 static int setcolreg(struct drm_crtc *crtc, u16 red, u16 green,
 		     u16 blue, u16 regno, struct fb_info *info)
 {
@@ -816,6 +835,7 @@ int drm_fb_helper_set_par(struct fb_info *info)
 			mutex_unlock(&dev->mode_config.mutex);
 			return ret;
 		}
+		drm_fb_helper_fill_fix(info, fb_helper->fb);
 	}
 	mutex_unlock(&dev->mode_config.mutex);
 
@@ -953,6 +973,7 @@ int drm_fb_helper_single_fb_probe(struct drm_fb_helper *fb_helper,
 
 	if (new_fb) {
 		info->var.pixclock = 0;
+		drm_fb_helper_fill_fix(info, fb_helper->fb);
 		if (register_framebuffer(info) < 0) {
 			return -EINVAL;
 		}
@@ -979,26 +1000,6 @@ int drm_fb_helper_single_fb_probe(struct drm_fb_helper *fb_helper,
 }
 EXPORT_SYMBOL(drm_fb_helper_single_fb_probe);
 
-void drm_fb_helper_fill_fix(struct fb_info *info, uint32_t pitch,
-			    uint32_t depth)
-{
-	info->fix.type = FB_TYPE_PACKED_PIXELS;
-	info->fix.visual = depth == 8 ? FB_VISUAL_PSEUDOCOLOR :
-		FB_VISUAL_TRUECOLOR;
-	info->fix.mmio_start = 0;
-	info->fix.mmio_len = 0;
-	info->fix.type_aux = 0;
-	info->fix.xpanstep = 1; /* doing it in hw */
-	info->fix.ypanstep = 1; /* doing it in hw */
-	info->fix.ywrapstep = 0;
-	info->fix.accel = FB_ACCEL_NONE;
-	info->fix.type_aux = 0;
-
-	info->fix.line_length = pitch;
-	return;
-}
-EXPORT_SYMBOL(drm_fb_helper_fill_fix);
-
 void drm_fb_helper_fill_var(struct fb_info *info, struct drm_fb_helper *fb_helper,
 			    uint32_t fb_width, uint32_t fb_height)
 {
diff --git a/drivers/gpu/drm/i915/intel_fb.c b/drivers/gpu/drm/i915/intel_fb.c
index 67738f32dfd4..701e830d0012 100644
--- a/drivers/gpu/drm/i915/intel_fb.c
+++ b/drivers/gpu/drm/i915/intel_fb.c
@@ -150,7 +150,6 @@ static int intelfb_create(struct intel_fbdev *ifbdev,
 
 //	memset(info->screen_base, 0, size);
 
-	drm_fb_helper_fill_fix(info, fb->pitch, fb->depth);
 	drm_fb_helper_fill_var(info, &ifbdev->helper, sizes->fb_width, sizes->fb_height);
 
 	info->pixmap.size = 64*1024;
diff --git a/drivers/gpu/drm/nouveau/nouveau_fbcon.c b/drivers/gpu/drm/nouveau/nouveau_fbcon.c
index 6d56a54b6e2e..a26d04740c88 100644
--- a/drivers/gpu/drm/nouveau/nouveau_fbcon.c
+++ b/drivers/gpu/drm/nouveau/nouveau_fbcon.c
@@ -359,7 +359,6 @@ nouveau_fbcon_create(struct nouveau_fbdev *nfbdev,
 	info->screen_base = nvbo_kmap_obj_iovirtual(nouveau_fb->nvbo);
 	info->screen_size = size;
 
-	drm_fb_helper_fill_fix(info, fb->pitch, fb->depth);
 	drm_fb_helper_fill_var(info, &nfbdev->helper, sizes->fb_width, sizes->fb_height);
 
 	/* Set aperture base/size for vesafb takeover */
diff --git a/drivers/gpu/drm/radeon/radeon_fb.c b/drivers/gpu/drm/radeon/radeon_fb.c
index 66324b5bb5ba..ca32e9c1e91d 100644
--- a/drivers/gpu/drm/radeon/radeon_fb.c
+++ b/drivers/gpu/drm/radeon/radeon_fb.c
@@ -225,8 +225,6 @@ static int radeonfb_create(struct radeon_fbdev *rfbdev,
 
 	strcpy(info->fix.id, "radeondrmfb");
 
-	drm_fb_helper_fill_fix(info, fb->pitch, fb->depth);
-
 	info->flags = FBINFO_DEFAULT | FBINFO_CAN_FORCE_OUTPUT;
 	info->fbops = &radeonfb_ops;
 
diff --git a/include/drm/drm_fb_helper.h b/include/drm/drm_fb_helper.h
index f22e7fe4b6db..aac27bd56e89 100644
--- a/include/drm/drm_fb_helper.h
+++ b/include/drm/drm_fb_helper.h
@@ -121,9 +121,6 @@ int drm_fb_helper_setcolreg(unsigned regno,
 void drm_fb_helper_restore(void);
 void drm_fb_helper_fill_var(struct fb_info *info, struct drm_fb_helper *fb_helper,
 			    uint32_t fb_width, uint32_t fb_height);
-void drm_fb_helper_fill_fix(struct fb_info *info, uint32_t pitch,
-			    uint32_t depth);
-
 int drm_fb_helper_setcmap(struct fb_cmap *cmap, struct fb_info *info);
 
 bool drm_fb_helper_hotplug_event(struct drm_fb_helper *fb_helper);
-- 
cgit v1.2.3


From 2b2fd604bda01bfea9f1657ca468c134448e03a5 Mon Sep 17 00:00:00 2001
From: Alex Deucher
Date: Thu, 6 Jan 2011 21:19:35 -0500
Subject: drm/radeon/kms: add NI pci ids

Signed-off-by: Alex Deucher <alexdeucher@gmail.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 include/drm/drm_pciids.h | 36 ++++++++++++++++++++++++++++++++++++
 1 file changed, 36 insertions(+)

(limited to 'include/drm')

diff --git a/include/drm/drm_pciids.h b/include/drm/drm_pciids.h
index e6b28a39942f..fe29ae328bd9 100644
--- a/include/drm/drm_pciids.h
+++ b/include/drm/drm_pciids.h
@@ -142,6 +142,42 @@
 	{0x1002, 0x5e4c, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV410|RADEON_NEW_MEMMAP}, \
 	{0x1002, 0x5e4d, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV410|RADEON_NEW_MEMMAP}, \
 	{0x1002, 0x5e4f, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV410|RADEON_NEW_MEMMAP}, \
+	{0x1002, 0x6720, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_BARTS|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \
+	{0x1002, 0x6721, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_BARTS|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \
+	{0x1002, 0x6722, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_BARTS|RADEON_NEW_MEMMAP}, \
+	{0x1002, 0x6723, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_BARTS|RADEON_NEW_MEMMAP}, \
+	{0x1002, 0x6724, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_BARTS|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \
+	{0x1002, 0x6725, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_BARTS|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \
+	{0x1002, 0x6726, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_BARTS|RADEON_NEW_MEMMAP}, \
+	{0x1002, 0x6727, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_BARTS|RADEON_NEW_MEMMAP}, \
+	{0x1002, 0x6728, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_BARTS|RADEON_NEW_MEMMAP}, \
+	{0x1002, 0x6729, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_BARTS|RADEON_NEW_MEMMAP}, \
+	{0x1002, 0x6738, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_BARTS|RADEON_NEW_MEMMAP}, \
+	{0x1002, 0x6739, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_BARTS|RADEON_NEW_MEMMAP}, \
+	{0x1002, 0x6740, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TURKS|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \
+	{0x1002, 0x6741, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TURKS|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \
+	{0x1002, 0x6742, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TURKS|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \
+	{0x1002, 0x6743, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TURKS|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \
+	{0x1002, 0x6744, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TURKS|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \
+	{0x1002, 0x6745, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TURKS|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \
+	{0x1002, 0x6746, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TURKS|RADEON_NEW_MEMMAP}, \
+	{0x1002, 0x6747, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TURKS|RADEON_NEW_MEMMAP}, \
+	{0x1002, 0x6748, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TURKS|RADEON_NEW_MEMMAP}, \
+	{0x1002, 0x6749, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TURKS|RADEON_NEW_MEMMAP}, \
+	{0x1002, 0x6750, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TURKS|RADEON_NEW_MEMMAP}, \
+	{0x1002, 0x6758, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TURKS|RADEON_NEW_MEMMAP}, \
+	{0x1002, 0x6759, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TURKS|RADEON_NEW_MEMMAP}, \
+	{0x1002, 0x6760, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_CAICOS|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \
+	{0x1002, 0x6761, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_CAICOS|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \
+	{0x1002, 0x6762, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_CAICOS|RADEON_NEW_MEMMAP}, \
+	{0x1002, 0x6763, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_CAICOS|RADEON_NEW_MEMMAP}, \
+	{0x1002, 0x6764, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_CAICOS|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \
+	{0x1002, 0x6765, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_CAICOS|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \
+	{0x1002, 0x6766, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_CAICOS|RADEON_NEW_MEMMAP}, \
+	{0x1002, 0x6767, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_CAICOS|RADEON_NEW_MEMMAP}, \
+	{0x1002, 0x6768, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_CAICOS|RADEON_NEW_MEMMAP}, \
+	{0x1002, 0x6770, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_CAICOS|RADEON_NEW_MEMMAP}, \
+	{0x1002, 0x6779, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_CAICOS|RADEON_NEW_MEMMAP}, \
 	{0x1002, 0x6880, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_CYPRESS|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \
 	{0x1002, 0x6888, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_CYPRESS|RADEON_NEW_MEMMAP}, \
 	{0x1002, 0x6889, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_CYPRESS|RADEON_NEW_MEMMAP}, \
-- 
cgit v1.2.3