diff options
author | Dave Airlie | 2022-07-22 15:51:26 +1000 |
---|---|---|
committer | Dave Airlie | 2022-07-22 15:51:31 +1000 |
commit | 417c1c1963549e9a48b83ada59d90258e38c6594 (patch) | |
tree | 1b5c36833e1c7b2ea3756767f498bf615436d952 /drivers/gpu/drm | |
parent | cb6b81b21bd9cf09d72b7fe711be1b55001eb166 (diff) | |
parent | 17cd10a44a8962860ff4ba351b2a290e752dbbde (diff) |
Merge tag 'drm-intel-gt-next-2022-07-13' of git://anongit.freedesktop.org/drm/drm-intel into drm-next
Driver uAPI changes:
- All related to the Small BAR support: (and all by Matt Auld)
* add probed_cpu_visible_size
* expose the avail memory region tracking
* apply ALLOC_GPU only by default
* add NEEDS_CPU_ACCESS hint
* tweak error capture on recoverable contexts
Driver highlights:
- Add Small BAR support (Matt)
- Add MeteorLake support (RK)
- Add support for LMEM PCIe resizable BAR (Akeem)
Driver important fixes:
- ttm related fixes (Matt Auld)
- Fix a performance regression related to waitboost (Chris)
- Fix GT resets (Chris)
Driver others:
- Adding GuC SLPC selftest (Vinay)
- Fix ADL-N GuC load (Daniele)
- Add platform workaround (Gustavo, Matt Roper)
- DG2 and ATS-M device ID updates (Matt Roper)
- Add VM_BIND doc rfc with uAPI documentation (Niranjana)
- Fix user-after-free in vma destruction (Thomas)
- Async flush of GuC log regions (Alan)
- Fixes in selftests (Chris, Dan, Andrzej)
- Convert to drm_dbg (Umesh)
- Disable OA sseu config param for newer hardware (Umesh)
- Multi-cast register steering changes (Matt Roper)
- Add lmem_bar_size modparam (Priyanka)
Signed-off-by: Dave Airlie <airlied@redhat.com>
From: Rodrigo Vivi <rodrigo.vivi@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/Ys85pcMYLkqF/HtB@intel.com
Diffstat (limited to 'drivers/gpu/drm')
51 files changed, 1181 insertions, 432 deletions
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_create.c b/drivers/gpu/drm/i915/gem/i915_gem_create.c index 5802692ea604..33673fe7ee0a 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_create.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_create.c @@ -241,6 +241,7 @@ struct create_ext { struct drm_i915_private *i915; struct intel_memory_region *placements[INTEL_REGION_UNKNOWN]; unsigned int n_placements; + unsigned int placement_mask; unsigned long flags; }; @@ -337,6 +338,7 @@ static int set_placements(struct drm_i915_gem_create_ext_memory_regions *args, for (i = 0; i < args->num_regions; i++) ext_data->placements[i] = placements[i]; + ext_data->placement_mask = mask; return 0; out_dump: @@ -411,7 +413,7 @@ i915_gem_create_ext_ioctl(struct drm_device *dev, void *data, struct drm_i915_gem_object *obj; int ret; - if (args->flags) + if (args->flags & ~I915_GEM_CREATE_EXT_FLAG_NEEDS_CPU_ACCESS) return -EINVAL; ret = i915_user_extensions(u64_to_user_ptr(args->extensions), @@ -427,6 +429,22 @@ i915_gem_create_ext_ioctl(struct drm_device *dev, void *data, ext_data.n_placements = 1; } + if (args->flags & I915_GEM_CREATE_EXT_FLAG_NEEDS_CPU_ACCESS) { + if (ext_data.n_placements == 1) + return -EINVAL; + + /* + * We always need to be able to spill to system memory, if we + * can't place in the mappable part of LMEM. + */ + if (!(ext_data.placement_mask & BIT(INTEL_REGION_SMEM))) + return -EINVAL; + } else { + if (ext_data.n_placements > 1 || + ext_data.placements[0]->type != INTEL_MEMORY_SYSTEM) + ext_data.flags |= I915_BO_ALLOC_GPU_ONLY; + } + obj = __i915_gem_object_create_user_ext(i915, args->size, ext_data.placements, ext_data.n_placements, diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c index 30fe847c6664..b7b2c14fd9e1 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c @@ -1951,7 +1951,7 @@ eb_find_first_request_added(struct i915_execbuffer *eb) #if IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR) /* Stage with GFP_KERNEL allocations before we enter the signaling critical path */ -static void eb_capture_stage(struct i915_execbuffer *eb) +static int eb_capture_stage(struct i915_execbuffer *eb) { const unsigned int count = eb->buffer_count; unsigned int i = count, j; @@ -1964,6 +1964,10 @@ static void eb_capture_stage(struct i915_execbuffer *eb) if (!(flags & EXEC_OBJECT_CAPTURE)) continue; + if (i915_gem_context_is_recoverable(eb->gem_context) && + (IS_DGFX(eb->i915) || GRAPHICS_VER_FULL(eb->i915) > IP_VER(12, 0))) + return -EINVAL; + for_each_batch_create_order(eb, j) { struct i915_capture_list *capture; @@ -1976,6 +1980,8 @@ static void eb_capture_stage(struct i915_execbuffer *eb) eb->capture_lists[j] = capture; } } + + return 0; } /* Commit once we're in the critical path */ @@ -2017,8 +2023,9 @@ static void eb_capture_list_clear(struct i915_execbuffer *eb) #else -static void eb_capture_stage(struct i915_execbuffer *eb) +static int eb_capture_stage(struct i915_execbuffer *eb) { + return 0; } static void eb_capture_commit(struct i915_execbuffer *eb) @@ -3410,7 +3417,9 @@ i915_gem_do_execbuffer(struct drm_device *dev, } ww_acquire_done(&eb.ww.ctx); - eb_capture_stage(&eb); + err = eb_capture_stage(&eb); + if (err) + goto err_vma; out_fence = eb_requests_create(&eb, in_fence, out_fence_fd); if (IS_ERR(out_fence)) { diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c b/drivers/gpu/drm/i915/gem/i915_gem_object.c index 06b1b188ce5a..ccec4055fde3 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c @@ -717,6 +717,32 @@ bool i915_gem_object_placement_possible(struct drm_i915_gem_object *obj, return false; } +/** + * i915_gem_object_needs_ccs_pages - Check whether the object requires extra + * pages when placed in system-memory, in order to save and later restore the + * flat-CCS aux state when the object is moved between local-memory and + * system-memory + * @obj: Pointer to the object + * + * Return: True if the object needs extra ccs pages. False otherwise. + */ +bool i915_gem_object_needs_ccs_pages(struct drm_i915_gem_object *obj) +{ + bool lmem_placement = false; + int i; + + for (i = 0; i < obj->mm.n_placements; i++) { + /* Compression is not allowed for the objects with smem placement */ + if (obj->mm.placements[i]->type == INTEL_MEMORY_SYSTEM) + return false; + if (!lmem_placement && + obj->mm.placements[i]->type == INTEL_MEMORY_LOCAL) + lmem_placement = true; + } + + return lmem_placement; +} + void i915_gem_init__objects(struct drm_i915_private *i915) { INIT_DELAYED_WORK(&i915->mm.free_work, __i915_gem_free_work); @@ -783,10 +809,31 @@ int i915_gem_object_wait_moving_fence(struct drm_i915_gem_object *obj, intr, MAX_SCHEDULE_TIMEOUT); if (!ret) ret = -ETIME; + else if (ret > 0 && i915_gem_object_has_unknown_state(obj)) + ret = -EIO; return ret < 0 ? ret : 0; } +/** + * i915_gem_object_has_unknown_state - Return true if the object backing pages are + * in an unknown_state. This means that userspace must NEVER be allowed to touch + * the pages, with either the GPU or CPU. + * + * ONLY valid to be called after ensuring that all kernel fences have signalled + * (in particular the fence for moving/clearing the object). + */ +bool i915_gem_object_has_unknown_state(struct drm_i915_gem_object *obj) +{ + /* + * The below barrier pairs with the dma_fence_signal() in + * __memcpy_work(). We should only sample the unknown_state after all + * the kernel fences have signalled. + */ + smp_rmb(); + return obj->mm.unknown_state; +} + #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) #include "selftests/huge_gem_object.c" #include "selftests/huge_pages.c" diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h b/drivers/gpu/drm/i915/gem/i915_gem_object.h index e11d82a9f7c3..6f0a3ce35567 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h @@ -524,6 +524,7 @@ int i915_gem_object_get_moving_fence(struct drm_i915_gem_object *obj, struct dma_fence **fence); int i915_gem_object_wait_moving_fence(struct drm_i915_gem_object *obj, bool intr); +bool i915_gem_object_has_unknown_state(struct drm_i915_gem_object *obj); void i915_gem_object_set_cache_coherency(struct drm_i915_gem_object *obj, unsigned int cache_level); @@ -617,6 +618,8 @@ int i915_gem_object_wait_migration(struct drm_i915_gem_object *obj, bool i915_gem_object_placement_possible(struct drm_i915_gem_object *obj, enum intel_memory_type type); +bool i915_gem_object_needs_ccs_pages(struct drm_i915_gem_object *obj); + int shmem_sg_alloc_table(struct drm_i915_private *i915, struct sg_table *st, size_t size, struct intel_memory_region *mr, struct address_space *mapping, diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h index 2c88bdb8ff7c..5cf36a130061 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h @@ -548,6 +548,24 @@ struct drm_i915_gem_object { bool ttm_shrinkable; /** + * @unknown_state: Indicate that the object is effectively + * borked. This is write-once and set if we somehow encounter a + * fatal error when moving/clearing the pages, and we are not + * able to fallback to memcpy/memset, like on small-BAR systems. + * The GPU should also be wedged (or in the process) at this + * point. + * + * Only valid to read this after acquiring the dma-resv lock and + * waiting for all DMA_RESV_USAGE_KERNEL fences to be signalled, + * or if we otherwise know that the moving fence has signalled, + * and we are certain the pages underneath are valid for + * immediate access (under normal operation), like just prior to + * binding the object or when setting up the CPU fault handler. + * See i915_gem_object_has_unknown_state(); + */ + bool unknown_state; + + /** * Priority list of potential placements for this object. */ struct intel_memory_region **placements; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_region.c b/drivers/gpu/drm/i915/gem/i915_gem_region.c index f46ee16a323a..a4fb577eceb4 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_region.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_region.c @@ -60,6 +60,8 @@ __i915_gem_object_create_region(struct intel_memory_region *mem, if (page_size) default_page_size = page_size; + /* We should be able to fit a page within an sg entry */ + GEM_BUG_ON(overflows_type(default_page_size, u32)); GEM_BUG_ON(!is_power_of_2_u64(default_page_size)); GEM_BUG_ON(default_page_size < PAGE_SIZE); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c index 4c25d9b2f138..f131dc065f47 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c @@ -266,24 +266,6 @@ static const struct i915_refct_sgt_ops tt_rsgt_ops = { .release = i915_ttm_tt_release }; -static inline bool -i915_gem_object_needs_ccs_pages(struct drm_i915_gem_object *obj) -{ - bool lmem_placement = false; - int i; - - for (i = 0; i < obj->mm.n_placements; i++) { - /* Compression is not allowed for the objects with smem placement */ - if (obj->mm.placements[i]->type == INTEL_MEMORY_SYSTEM) - return false; - if (!lmem_placement && - obj->mm.placements[i]->type == INTEL_MEMORY_LOCAL) - lmem_placement = true; - } - - return lmem_placement; -} - static struct ttm_tt *i915_ttm_tt_create(struct ttm_buffer_object *bo, uint32_t page_flags) { @@ -620,10 +602,15 @@ i915_ttm_resource_get_st(struct drm_i915_gem_object *obj, struct ttm_resource *res) { struct ttm_buffer_object *bo = i915_gem_to_ttm(obj); + u32 page_alignment; if (!i915_ttm_gtt_binds_lmem(res)) return i915_ttm_tt_get_st(bo->ttm); + page_alignment = bo->page_alignment << PAGE_SHIFT; + if (!page_alignment) + page_alignment = obj->mm.region->min_page_size; + /* * If CPU mapping differs, we need to add the ttm_tt pages to * the resulting st. Might make sense for GGTT. @@ -634,7 +621,8 @@ i915_ttm_resource_get_st(struct drm_i915_gem_object *obj, struct i915_refct_sgt *rsgt; rsgt = intel_region_ttm_resource_to_rsgt(obj->mm.region, - res); + res, + page_alignment); if (IS_ERR(rsgt)) return rsgt; @@ -643,7 +631,8 @@ i915_ttm_resource_get_st(struct drm_i915_gem_object *obj, return i915_refct_sgt_get(obj->ttm.cached_io_rsgt); } - return intel_region_ttm_resource_to_rsgt(obj->mm.region, res); + return intel_region_ttm_resource_to_rsgt(obj->mm.region, res, + page_alignment); } static int i915_ttm_truncate(struct drm_i915_gem_object *obj) @@ -675,7 +664,15 @@ static void i915_ttm_swap_notify(struct ttm_buffer_object *bo) i915_ttm_purge(obj); } -static bool i915_ttm_resource_mappable(struct ttm_resource *res) +/** + * i915_ttm_resource_mappable - Return true if the ttm resource is CPU + * accessible. + * @res: The TTM resource to check. + * + * This is interesting on small-BAR systems where we may encounter lmem objects + * that can't be accessed via the CPU. + */ +bool i915_ttm_resource_mappable(struct ttm_resource *res) { struct i915_ttm_buddy_resource *bman_res = to_ttm_buddy_resource(res); @@ -687,6 +684,22 @@ static bool i915_ttm_resource_mappable(struct ttm_resource *res) static int i915_ttm_io_mem_reserve(struct ttm_device *bdev, struct ttm_resource *mem) { + struct drm_i915_gem_object *obj = i915_ttm_to_gem(mem->bo); + bool unknown_state; + + if (!obj) + return -EINVAL; + + if (!kref_get_unless_zero(&obj->base.refcount)) + return -EINVAL; + + assert_object_held(obj); + + unknown_state = i915_gem_object_has_unknown_state(obj); + i915_gem_object_put(obj); + if (unknown_state) + return -EINVAL; + if (!i915_ttm_cpu_maps_iomem(mem)) return 0; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.h b/drivers/gpu/drm/i915/gem/i915_gem_ttm.h index 73e371aa3850..e4842b4296fc 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.h @@ -92,4 +92,7 @@ static inline bool i915_ttm_cpu_maps_iomem(struct ttm_resource *mem) /* Once / if we support GGTT, this is also false for cached ttm_tts */ return mem->mem_type != I915_PL_SYSTEM; } + +bool i915_ttm_resource_mappable(struct ttm_resource *res); + #endif diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c index a10716f4e717..9a7e50534b84 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c @@ -33,6 +33,7 @@ #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) static bool fail_gpu_migration; static bool fail_work_allocation; +static bool ban_memcpy; void i915_ttm_migrate_set_failure_modes(bool gpu_migration, bool work_allocation) @@ -40,6 +41,11 @@ void i915_ttm_migrate_set_failure_modes(bool gpu_migration, fail_gpu_migration = gpu_migration; fail_work_allocation = work_allocation; } + +void i915_ttm_migrate_set_ban_memcpy(bool ban) +{ + ban_memcpy = ban; +} #endif static enum i915_cache_level @@ -258,15 +264,23 @@ struct i915_ttm_memcpy_arg { * from the callback for lockdep reasons. * @cb: Callback for the accelerated migration fence. * @arg: The argument for the memcpy functionality. + * @i915: The i915 pointer. + * @obj: The GEM object. + * @memcpy_allowed: Instead of processing the @arg, and falling back to memcpy + * or memset, we wedge the device and set the @obj unknown_state, to prevent + * further access to the object with the CPU or GPU. On some devices we might + * only be permitted to use the blitter engine for such operations. */ struct i915_ttm_memcpy_work { struct dma_fence fence; struct work_struct work; - /* The fence lock */ spinlock_t lock; struct irq_work irq_work; struct dma_fence_cb cb; struct i915_ttm_memcpy_arg arg; + struct drm_i915_private *i915; + struct drm_i915_gem_object *obj; + bool memcpy_allowed; }; static void i915_ttm_move_memcpy(struct i915_ttm_memcpy_arg *arg) @@ -317,14 +331,42 @@ static void __memcpy_work(struct work_struct *work) struct i915_ttm_memcpy_work *copy_work = container_of(work, typeof(*copy_work), work); struct i915_ttm_memcpy_arg *arg = ©_work->arg; - bool cookie = dma_fence_begin_signalling(); + bool cookie; + + /* + * FIXME: We need to take a closer look here. We should be able to plonk + * this into the fence critical section. + */ + if (!copy_work->memcpy_allowed) { + struct intel_gt *gt; + unsigned int id; + + for_each_gt(gt, copy_work->i915, id) + intel_gt_set_wedged(gt); + } + + cookie = dma_fence_begin_signalling(); + + if (copy_work->memcpy_allowed) { + i915_ttm_move_memcpy(arg); + } else { + /* + * Prevent further use of the object. Any future GTT binding or + * CPU access is not allowed once we signal the fence. Outside + * of the fence critical section, we then also then wedge the gpu + * to indicate the device is not functional. + * + * The below dma_fence_signal() is our write-memory-barrier. + */ + copy_work->obj->mm.unknown_state = true; + } - i915_ttm_move_memcpy(arg); dma_fence_end_signalling(cookie); dma_fence_signal(©_work->fence); i915_ttm_memcpy_release(arg); + i915_gem_object_put(copy_work->obj); dma_fence_put(©_work->fence); } @@ -336,6 +378,7 @@ static void __memcpy_irq_work(struct irq_work *irq_work) dma_fence_signal(©_work->fence); i915_ttm_memcpy_release(arg); + i915_gem_object_put(copy_work->obj); dma_fence_put(©_work->fence); } @@ -389,6 +432,19 @@ i915_ttm_memcpy_work_arm(struct i915_ttm_memcpy_work *work, return &work->fence; } +static bool i915_ttm_memcpy_allowed(struct ttm_buffer_object *bo, + struct ttm_resource *dst_mem) +{ + if (i915_gem_object_needs_ccs_pages(i915_ttm_to_gem(bo))) + return false; + + if (!(i915_ttm_resource_mappable(bo->resource) && + i915_ttm_resource_mappable(dst_mem))) + return false; + + return I915_SELFTEST_ONLY(ban_memcpy) ? false : true; +} + static struct dma_fence * __i915_ttm_move(struct ttm_buffer_object *bo, const struct ttm_operation_ctx *ctx, bool clear, @@ -396,6 +452,9 @@ __i915_ttm_move(struct ttm_buffer_object *bo, struct i915_refct_sgt *dst_rsgt, bool allow_accel, const struct i915_deps *move_deps) { + const bool memcpy_allowed = i915_ttm_memcpy_allowed(bo, dst_mem); + struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo); + struct drm_i915_private *i915 = to_i915(bo->base.dev); struct i915_ttm_memcpy_work *copy_work = NULL; struct i915_ttm_memcpy_arg _arg, *arg = &_arg; struct dma_fence *fence = ERR_PTR(-EINVAL); @@ -423,9 +482,14 @@ __i915_ttm_move(struct ttm_buffer_object *bo, copy_work = kzalloc(sizeof(*copy_work), GFP_KERNEL); if (copy_work) { + copy_work->i915 = i915; + copy_work->memcpy_allowed = memcpy_allowed; + copy_work->obj = i915_gem_object_get(obj); arg = ©_work->arg; - i915_ttm_memcpy_init(arg, bo, clear, dst_mem, dst_ttm, - dst_rsgt); + if (memcpy_allowed) + i915_ttm_memcpy_init(arg, bo, clear, dst_mem, + dst_ttm, dst_rsgt); + fence = i915_ttm_memcpy_work_arm(copy_work, dep); } else { dma_fence_wait(dep, false); @@ -450,17 +514,23 @@ __i915_ttm_move(struct ttm_buffer_object *bo, } /* Error intercept failed or no accelerated migration to start with */ - if (!copy_work) - i915_ttm_memcpy_init(arg, bo, clear, dst_mem, dst_ttm, - dst_rsgt); - i915_ttm_move_memcpy(arg); - i915_ttm_memcpy_release(arg); + + if (memcpy_allowed) { + if (!copy_work) + i915_ttm_memcpy_init(arg, bo, clear, dst_mem, dst_ttm, + dst_rsgt); + i915_ttm_move_memcpy(arg); + i915_ttm_memcpy_release(arg); + } + if (copy_work) + i915_gem_object_put(copy_work->obj); kfree(copy_work); - return NULL; + return memcpy_allowed ? NULL : ERR_PTR(-EIO); out: if (!fence && copy_work) { i915_ttm_memcpy_release(arg); + i915_gem_object_put(copy_work->obj); kfree(copy_work); } @@ -539,8 +609,11 @@ int i915_ttm_move(struct ttm_buffer_object *bo, bool evict, } if (migration_fence) { - ret = ttm_bo_move_accel_cleanup(bo, migration_fence, evict, - true, dst_mem); + if (I915_SELFTEST_ONLY(evict && fail_gpu_migration)) + ret = -EIO; /* never feed non-migrate fences into ttm */ + else + ret = ttm_bo_move_accel_cleanup(bo, migration_fence, evict, + true, dst_mem); if (ret) { dma_fence_wait(migration_fence, false); ttm_bo_move_sync_cleanup(bo, dst_mem); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.h b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.h index d2e7f149e05c..8a5d5ab0cc34 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.h @@ -22,6 +22,7 @@ int i915_ttm_move_notify(struct ttm_buffer_object *bo); I915_SELFTEST_DECLARE(void i915_ttm_migrate_set_failure_modes(bool gpu_migration, bool work_allocation)); +I915_SELFTEST_DECLARE(void i915_ttm_migrate_set_ban_memcpy(bool ban)); int i915_gem_obj_copy_ttm(struct drm_i915_gem_object *dst, struct drm_i915_gem_object *src, diff --git a/drivers/gpu/drm/i915/gem/i915_gem_wait.c b/drivers/gpu/drm/i915/gem/i915_gem_wait.c index 319936f91ac5..e6e01c2a74a6 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_wait.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_wait.c @@ -9,6 +9,7 @@ #include <linux/jiffies.h> #include "gt/intel_engine.h" +#include "gt/intel_rps.h" #include "i915_gem_ioctls.h" #include "i915_gem_object.h" @@ -31,6 +32,37 @@ i915_gem_object_wait_fence(struct dma_fence *fence, timeout); } +static void +i915_gem_object_boost(struct dma_resv *resv, unsigned int flags) +{ + struct dma_resv_iter cursor; + struct dma_fence *fence; + + /* + * Prescan all fences for potential boosting before we begin waiting. + * + * When we wait, we wait on outstanding fences serially. If the + * dma-resv contains a sequence such as 1:1, 1:2 instead of a reduced + * form 1:2, then as we look at each wait in turn we see that each + * request is currently executing and not worthy of boosting. But if + * we only happen to look at the final fence in the sequence (because + * of request coalescing or splitting between read/write arrays by + * the iterator), then we would boost. As such our decision to boost + * or not is delicately balanced on the order we wait on fences. + * + * So instead of looking for boosts sequentially, look for all boosts + * upfront and then wait on the outstanding fences. + */ + + dma_resv_iter_begin(&cursor, resv, + dma_resv_usage_rw(flags & I915_WAIT_ALL)); + dma_resv_for_each_fence_unlocked(&cursor, fence) + if (dma_fence_is_i915(fence) && + !i915_request_started(to_request(fence))) + intel_rps_boost(to_request(fence)); + dma_resv_iter_end(&cursor); +} + static long i915_gem_object_wait_reservation(struct dma_resv *resv, unsigned int flags, @@ -40,6 +72,8 @@ i915_gem_object_wait_reservation(struct dma_resv *resv, struct dma_fence *fence; long ret = timeout ?: 1; + i915_gem_object_boost(resv, flags); + dma_resv_iter_begin(&cursor, resv, dma_resv_usage_rw(flags & I915_WAIT_ALL)); dma_resv_for_each_fence_unlocked(&cursor, fence) { diff --git a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c index ef15967be51a..72ce2c9f42fd 100644 --- a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c +++ b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c @@ -1623,6 +1623,7 @@ static int igt_shrink_thp(void *arg) struct file *file; unsigned int flags = PIN_USER; unsigned int n; + intel_wakeref_t wf; bool should_swap; int err; @@ -1659,9 +1660,11 @@ static int igt_shrink_thp(void *arg) goto out_put; } + wf = intel_runtime_pm_get(&i915->runtime_pm); /* active shrink */ + err = i915_vma_pin(vma, 0, 0, flags); if (err) - goto out_put; + goto out_wf; if (obj->mm.page_sizes.phys < I915_GTT_PAGE_SIZE_2M) { pr_info("failed to allocate THP, finishing test early\n"); @@ -1732,6 +1735,8 @@ static int igt_shrink_thp(void *arg) out_unpin: i915_vma_unpin(vma); +out_wf: + intel_runtime_pm_put(&i915->runtime_pm, wf); out_put: i915_gem_object_put(obj); out_vm: diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_migrate.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_migrate.c index 801af51aff62..fe6c37fd7859 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_migrate.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_migrate.c @@ -9,6 +9,7 @@ #include "i915_deps.h" +#include "selftests/igt_reset.h" #include "selftests/igt_spinner.h" static int igt_fill_check_buffer(struct drm_i915_gem_object *obj, @@ -109,7 +110,8 @@ static int igt_same_create_migrate(void *arg) static int lmem_pages_migrate_one(struct i915_gem_ww_ctx *ww, struct drm_i915_gem_object *obj, - struct i915_vma *vma) + struct i915_vma *vma, + bool silent_migrate) { int err; @@ -138,7 +140,8 @@ static int lmem_pages_migrate_one(struct i915_gem_ww_ctx *ww, if (i915_gem_object_is_lmem(obj)) { err = i915_gem_object_migrate(obj, ww, INTEL_REGION_SMEM); if (err) { - pr_err("Object failed migration to smem\n"); + if (!silent_migrate) + pr_err("Object failed migration to smem\n"); if (err) return err; } @@ -156,7 +159,8 @@ static int lmem_pages_migrate_one(struct i915_gem_ww_ctx *ww, } else { err = i915_gem_object_migrate(obj, ww, INTEL_REGION_LMEM_0); if (err) { - pr_err("Object failed migration to lmem\n"); + if (!silent_migrate) + pr_err("Object failed migration to lmem\n"); if (err) return err; } @@ -179,7 +183,8 @@ static int __igt_lmem_pages_migrate(struct intel_gt *gt, struct i915_address_space *vm, struct i915_deps *deps, struct igt_spinner *spin, - struct dma_fence *spin_fence) + struct dma_fence *spin_fence, + bool borked_migrate) { struct drm_i915_private *i915 = gt->i915; struct drm_i915_gem_object *obj; @@ -242,7 +247,8 @@ static int __igt_lmem_pages_migrate(struct intel_gt *gt, */ for (i = 1; i <= 5; ++i) { for_i915_gem_ww(&ww, err, true) - err = lmem_pages_migrate_one(&ww, obj, vma); + err = lmem_pages_migrate_one(&ww, obj, vma, + borked_migrate); if (err) goto out_put; } @@ -283,23 +289,70 @@ out_put: static int igt_lmem_pages_failsafe_migrate(void *arg) { - int fail_gpu, fail_alloc, ret; + int fail_gpu, fail_alloc, ban_memcpy, ret; struct intel_gt *gt = arg; for (fail_gpu = 0; fail_gpu < 2; ++fail_gpu) { for (fail_alloc = 0; fail_alloc < 2; ++fail_alloc) { - pr_info("Simulated failure modes: gpu: %d, alloc: %d\n", - fail_gpu, fail_alloc); - i915_ttm_migrate_set_failure_modes(fail_gpu, - fail_alloc); - ret = __igt_lmem_pages_migrate(gt, NULL, NULL, NULL, NULL); - if (ret) - goto out_err; + for (ban_memcpy = 0; ban_memcpy < 2; ++ban_memcpy) { + pr_info("Simulated failure modes: gpu: %d, alloc:%d, ban_memcpy: %d\n", + fail_gpu, fail_alloc, ban_memcpy); + i915_ttm_migrate_set_ban_memcpy(ban_memcpy); + i915_ttm_migrate_set_failure_modes(fail_gpu, + fail_alloc); + ret = __igt_lmem_pages_migrate(gt, NULL, NULL, + NULL, NULL, + ban_memcpy && + fail_gpu); + + if (ban_memcpy && fail_gpu) { + struct intel_gt *__gt; + unsigned int id; + + if (ret != -EIO) { + pr_err("expected -EIO, got (%d)\n", ret); + ret = -EINVAL; + } else { + ret = 0; + } + + for_each_gt(__gt, gt->i915, id) { + intel_wakeref_t wakeref; + bool wedged; + + mutex_lock(&__gt->reset.mutex); + wedged = test_bit(I915_WEDGED, &__gt->reset.flags); + mutex_unlock(&__gt->reset.mutex); + + if (fail_gpu && !fail_alloc) { + if (!wedged) { + pr_err("gt(%u) not wedged\n", id); + ret = -EINVAL; + continue; + } + } else if (wedged) { + pr_err("gt(%u) incorrectly wedged\n", id); + ret = -EINVAL; + } else { + continue; + } + + wakeref = intel_runtime_pm_get(__gt->uncore->rpm); + igt_global_reset_lock(__gt); + intel_gt_reset(__gt, ALL_ENGINES, NULL); + igt_global_reset_unlock(__gt); + intel_runtime_pm_put(__gt->uncore->rpm, wakeref); + } + if (ret) + goto out_err; + } + } } } out_err: i915_ttm_migrate_set_failure_modes(false, false); + i915_ttm_migrate_set_ban_memcpy(false); return ret; } @@ -370,7 +423,7 @@ static int igt_async_migrate(struct intel_gt *gt) goto out_ce; err = __igt_lmem_pages_migrate(gt, &ppgtt->vm, &deps, &spin, - spin_fence); + spin_fence, false); i915_deps_fini(&deps); dma_fence_put(spin_fence); if (err) @@ -394,23 +447,67 @@ out_spin: #define ASYNC_FAIL_ALLOC 1 static int igt_lmem_async_migrate(void *arg) { - int fail_gpu, fail_alloc, ret; + int fail_gpu, fail_alloc, ban_memcpy, ret; struct intel_gt *gt = arg; for (fail_gpu = 0; fail_gpu < 2; ++fail_gpu) { for (fail_alloc = 0; fail_alloc < ASYNC_FAIL_ALLOC; ++fail_alloc) { - pr_info("Simulated failure modes: gpu: %d, alloc: %d\n", - fail_gpu, fail_alloc); - i915_ttm_migrate_set_failure_modes(fail_gpu, - fail_alloc); - ret = igt_async_migrate(gt); - if (ret) - goto out_err; + for (ban_memcpy = 0; ban_memcpy < 2; ++ban_memcpy) { + pr_info("Simulated failure modes: gpu: %d, alloc: %d, ban_memcpy: %d\n", + fail_gpu, fail_alloc, ban_memcpy); + i915_ttm_migrate_set_ban_memcpy(ban_memcpy); + i915_ttm_migrate_set_failure_modes(fail_gpu, + fail_alloc); + ret = igt_async_migrate(gt); + + if (fail_gpu && ban_memcpy) { + struct intel_gt *__gt; + unsigned int id; + + if (ret != -EIO) { + pr_err("expected -EIO, got (%d)\n", ret); + ret = -EINVAL; + } else { + ret = 0; + } + + for_each_gt(__gt, gt->i915, id) { + intel_wakeref_t wakeref; + bool wedged; + + mutex_lock(&__gt->reset.mutex); + wedged = test_bit(I915_WEDGED, &__gt->reset.flags); + mutex_unlock(&__gt->reset.mutex); + + if (fail_gpu && !fail_alloc) { + if (!wedged) { + pr_err("gt(%u) not wedged\n", id); + ret = -EINVAL; + continue; + } + } else if (wedged) { + pr_err("gt(%u) incorrectly wedged\n", id); + ret = -EINVAL; + } else { + continue; + } + + wakeref = intel_runtime_pm_get(__gt->uncore->rpm); + igt_global_reset_lock(__gt); + intel_gt_reset(__gt, ALL_ENGINES, NULL); + igt_global_reset_unlock(__gt); + intel_runtime_pm_put(__gt->uncore->rpm, wakeref); + } + } + if (ret) + goto out_err; + } } } out_err: i915_ttm_migrate_set_failure_modes(false, false); + i915_ttm_migrate_set_ban_memcpy(false); return ret; } diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c index 5bc93a1ce3e3..3ced9948a331 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c @@ -10,6 +10,7 @@ #include "gem/i915_gem_internal.h" #include "gem/i915_gem_region.h" #include "gem/i915_gem_ttm.h" +#include "gem/i915_gem_ttm_move.h" #include "gt/intel_engine_pm.h" #include "gt/intel_gpu_commands.h" #include "gt/intel_gt.h" @@ -21,6 +22,7 @@ #include "i915_selftest.h" #include "selftests/i915_random.h" #include "selftests/igt_flush_test.h" +#include "selftests/igt_reset.h" #include "selftests/igt_mmap.h" struct tile { @@ -979,6 +981,9 @@ static int igt_mmap(void *arg) }; int i; + if (mr->private) + continue; + for (i = 0; i < ARRAY_SIZE(sizes); i++) { struct drm_i915_gem_object *obj; int err; @@ -1160,6 +1165,7 @@ out_unmap: #define IGT_MMAP_MIGRATE_FILL (1 << 1) #define IGT_MMAP_MIGRATE_EVICTABLE (1 << 2) #define IGT_MMAP_MIGRATE_UNFAULTABLE (1 << 3) +#define IGT_MMAP_MIGRATE_FAIL_GPU (1 << 4) static int __igt_mmap_migrate(struct intel_memory_region **placements, int n_placements, struct intel_memory_region *expected_mr, @@ -1221,8 +1227,10 @@ static int __igt_mmap_migrate(struct intel_memory_region **placements, expand32(POISON_INUSE), &rq); i915_gem_object_unpin_pages(obj); if (rq) { - dma_resv_add_fence(obj->base.resv, &rq->fence, - DMA_RESV_USAGE_KERNEL); + err = dma_resv_reserve_fences(obj->base.resv, 1); + if (!err) + dma_resv_add_fence(obj->base.resv, &rq->fence, + DMA_RESV_USAGE_KERNEL); i915_request_put(rq); } i915_gem_object_unlock(obj); @@ -1232,13 +1240,62 @@ static int __igt_mmap_migrate(struct intel_memory_region **placements, if (flags & IGT_MMAP_MIGRATE_EVICTABLE) igt_make_evictable(&objects); + if (flags & IGT_MMAP_MIGRATE_FAIL_GPU) { + err = i915_gem_object_lock(obj, NULL); + if (err) + goto out_put; + + /* + * Ensure we only simulate the gpu failuire when faulting the + * pages. + */ + err = i915_gem_object_wait_moving_fence(obj, true); + i915_gem_object_unlock(obj); + if (err) + goto out_put; + i915_ttm_migrate_set_failure_modes(true, false); + } + err = ___igt_mmap_migrate(i915, obj, addr, flags & IGT_MMAP_MIGRATE_UNFAULTABLE); + if (!err && obj->mm.region != expected_mr) { pr_err("%s region mismatch %s\n", __func__, expected_mr->name); err = -EINVAL; } + if (flags & IGT_MMAP_MIGRATE_FAIL_GPU) { + struct intel_gt *gt; + unsigned int id; + + i915_ttm_migrate_set_failure_modes(false, false); + + for_each_gt(gt, i915, id) { + intel_wakeref_t wakeref; + bool wedged; + + mutex_lock(>->reset.mutex); + wedged = test_bit(I915_WEDGED, >->reset.flags); + mutex_unlock(>->reset.mutex); + if (!wedged) { + pr_err("gt(%u) not wedged\n", id); + err = -EINVAL; + continue; + } + + wakeref = intel_runtime_pm_get(gt->uncore->rpm); + igt_global_reset_lock(gt); + intel_gt_reset(gt, ALL_ENGINES, NULL); + igt_global_reset_unlock(gt); + intel_runtime_pm_put(gt->uncore->rpm, wakeref); + } + + if (!i915_gem_object_has_unknown_state(obj)) { + pr_err("object missing unknown_state\n"); + err = -EINVAL; + } + } + out_put: i915_gem_object_put(obj); igt_close_objects(i915, &objects); @@ -1319,6 +1376,23 @@ static int igt_mmap_migrate(void *arg) IGT_MMAP_MIGRATE_TOPDOWN | IGT_MMAP_MIGRATE_FILL | IGT_MMAP_MIGRATE_UNFAULTABLE); + if (err) + goto out_io_size; + + /* + * Allocate in the non-mappable portion, but force migrating to + * the mappable portion on fault (LMEM -> LMEM). We then also + * simulate a gpu error when moving the pages when faulting the + * pages, which should result in wedging the gpu and returning + * SIGBUS in the fault handler, since we can't fallback to + * memcpy. + */ + err = __igt_mmap_migrate(single, ARRAY_SIZE(single), mr, + IGT_MMAP_MIGRATE_TOPDOWN | + IGT_MMAP_MIGRATE_FILL | + IGT_MMAP_MIGRATE_EVICTABLE | + IGT_MMAP_MIGRATE_FAIL_GPU | + IGT_MMAP_MIGRATE_UNFAULTABLE); out_io_size: mr->io_size = saved_io_size; i915_ttm_buddy_man_force_visible_size(man, @@ -1435,6 +1509,9 @@ static int igt_mmap_access(void *arg) struct drm_i915_gem_object *obj; int err; + if (mr->private) + continue; + obj = __i915_gem_object_create_user(i915, PAGE_SIZE, &mr, 1); if (obj == ERR_PTR(-ENODEV)) continue; @@ -1580,6 +1657,9 @@ static int igt_mmap_gpu(void *arg) struct drm_i915_gem_object *obj; int err; + if (mr->private) + continue; + obj = __i915_gem_object_create_user(i915, PAGE_SIZE, &mr, 1); if (obj == ERR_PTR(-ENODEV)) continue; @@ -1727,6 +1807,9 @@ static int igt_mmap_revoke(void *arg) struct drm_i915_gem_object *obj; int err; + if (mr->private) + continue; + obj = __i915_gem_object_create_user(i915, PAGE_SIZE, &mr, 1); if (obj == ERR_PTR(-ENODEV)) continue; diff --git a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c index 9dc9dccf7b09..ecc990ec1b95 100644 --- a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c +++ b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c @@ -399,7 +399,8 @@ static void insert_breadcrumb(struct i915_request *rq) * the request as it may have completed and raised the interrupt as * we were attaching it into the lists. */ - irq_work_queue(&b->irq_work); + if (!b->irq_armed || __i915_request_is_complete(rq)) + irq_work_queue(&b->irq_work); } bool i915_request_enable_breadcrumb(struct i915_request *rq) diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c index 283870c65991..37fa813af766 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -1517,7 +1517,6 @@ void intel_engine_get_instdone(const struct intel_engine_cs *engine, struct intel_instdone *instdone) { struct drm_i915_private *i915 = engine->i915; - const struct sseu_dev_info *sseu = &engine->gt->info.sseu; struct intel_uncore *uncore = engine->uncore; u32 mmio_base = engine->mmio_base; int slice; @@ -1542,32 +1541,19 @@ void intel_engine_get_instdone(const struct intel_engine_cs *engine, intel_uncore_read(uncore, GEN12_SC_INSTDONE_EXTRA2); } - if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50)) { - for_each_instdone_gslice_dss_xehp(i915, sseu, iter, slice, subslice) { - instdone->sampler[slice][subslice] = - intel_gt_mcr_read(engine->gt, - GEN7_SAMPLER_INSTDONE, - slice, subslice); - instdone->row[slice][subslice] = - intel_gt_mcr_read(engine->gt, - GEN7_ROW_INSTDONE, - slice, subslice); - } - } else { - for_each_instdone_slice_subslice(i915, sseu, slice, subslice) { - instdone->sampler[slice][subslice] = - intel_gt_mcr_read(engine->gt, - GEN7_SAMPLER_INSTDONE, - slice, subslice); - instdone->row[slice][subslice] = - intel_gt_mcr_read(engine->gt, - GEN7_ROW_INSTDONE, - slice, subslice); - } + for_each_ss_steering(iter, engine->gt, slice, subslice) { + instdone->sampler[slice][subslice] = + intel_gt_mcr_read(engine->gt, + GEN7_SAMPLER_INSTDONE, + slice, subslice); + instdone->row[slice][subslice] = + intel_gt_mcr_read(engine->gt, + GEN7_ROW_INSTDONE, + slice, subslice); } if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 55)) { - for_each_instdone_gslice_dss_xehp(i915, sseu, iter, slice, subslice) + for_each_ss_steering(iter, engine->gt, slice, subslice) instdone->geom_svg[slice][subslice] = intel_gt_mcr_read(engine->gt, XEHPG_INSTDONE_GEOM_SVG, diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h index 2286f96f5f87..633a7e5dba3b 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h @@ -647,26 +647,4 @@ intel_engine_uses_wa_hold_ccs_switchout(struct intel_engine_cs *engine) return engine->flags & I915_ENGINE_USES_WA_HOLD_CCS_SWITCHOUT; } -#define instdone_has_slice(dev_priv___, sseu___, slice___) \ - ((GRAPHICS_VER(dev_priv___) == 7 ? 1 : ((sseu___)->slice_mask)) & BIT(slice___)) - -#define instdone_has_subslice(dev_priv__, sseu__, slice__, subslice__) \ - (GRAPHICS_VER(dev_priv__) == 7 ? (1 & BIT(subslice__)) : \ - intel_sseu_has_subslice(sseu__, 0, subslice__)) - -#define for_each_instdone_slice_subslice(dev_priv_, sseu_, slice_, subslice_) \ - for ((slice_) = 0, (subslice_) = 0; (slice_) < I915_MAX_SLICES; \ - (subslice_) = ((subslice_) + 1) % I915_MAX_SUBSLICES, \ - (slice_) += ((subslice_) == 0)) \ - for_each_if((instdone_has_slice(dev_priv_, sseu_, slice_)) && \ - (instdone_has_subslice(dev_priv_, sseu_, slice_, \ - subslice_))) - -#define for_each_instdone_gslice_dss_xehp(dev_priv_, sseu_, iter_, gslice_, dss_) \ - for ((iter_) = 0, (gslice_) = 0, (dss_) = 0; \ - (iter_) < GEN_SS_MASK_SIZE; \ - (iter_)++, (gslice_) = (iter_) / GEN_DSS_PER_GSLICE, \ - (dss_) = (iter_) % GEN_DSS_PER_GSLICE) \ - for_each_if(intel_sseu_has_subslice((sseu_), 0, (iter_))) - #endif /* __INTEL_ENGINE_TYPES_H__ */ diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c index 8da3314bb6bf..68c2b0d8f187 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt.c +++ b/drivers/gpu/drm/i915/gt/intel_gt.c @@ -952,6 +952,20 @@ void intel_gt_invalidate_tlbs(struct intel_gt *gt) mutex_lock(>->tlb_invalidate_lock); intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL); + spin_lock_irq(&uncore->lock); /* serialise invalidate with GT reset */ + + for_each_engine(engine, gt, id) { + struct reg_and_bit rb; + + rb = get_reg_and_bit(engine, regs == gen8_regs, regs, num); + if (!i915_mmio_reg_offset(rb.reg)) + continue; + + intel_uncore_write_fw(uncore, rb.reg, rb.bit); + } + + spin_unlock_irq(&uncore->lock); + for_each_engine(engine, gt, id) { /* * HW architecture suggest typical invalidation time at 40us, @@ -966,7 +980,6 @@ void intel_gt_invalidate_tlbs(struct intel_gt *gt) if (!i915_mmio_reg_offset(rb.reg)) continue; - intel_uncore_write_fw(uncore, rb.reg, rb.bit); if (__intel_wait_for_register_fw(uncore, rb.reg, rb.bit, 0, timeout_us, timeout_ms, diff --git a/drivers/gpu/drm/i915/gt/intel_gt_mcr.c b/drivers/gpu/drm/i915/gt/intel_gt_mcr.c index 777025d5bd66..e79405a45312 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_mcr.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_mcr.c @@ -495,3 +495,28 @@ void intel_gt_mcr_report_steering(struct drm_printer *p, struct intel_gt *gt, } } +/** + * intel_gt_mcr_get_ss_steering - returns the group/instance steering for a SS + * @gt: GT structure + * @dss: DSS ID to obtain steering for + * @group: pointer to storage for steering group ID + * @instance: pointer to storage for steering instance ID + * + * Returns the steering IDs (via the @group and @instance parameters) that + * correspond to a specific subslice/DSS ID. + */ +void intel_gt_mcr_get_ss_steering(struct intel_gt *gt, unsigned int dss, + unsigned int *group, unsigned int *instance) +{ + if (IS_PONTEVECCHIO(gt->i915)) { + *group = dss / GEN_DSS_PER_CSLICE; + *instance = dss % GEN_DSS_PER_CSLICE; + } else if (GRAPHICS_VER_FULL(gt->i915) >= IP_VER(12, 50)) { + *group = dss / GEN_DSS_PER_GSLICE; + *instance = dss % GEN_DSS_PER_GSLICE; + } else { + *group = dss / GEN_MAX_SS_PER_HSW_SLICE; + *instance = dss % GEN_MAX_SS_PER_HSW_SLICE; + return; + } +} diff --git a/drivers/gpu/drm/i915/gt/intel_gt_mcr.h b/drivers/gpu/drm/i915/gt/intel_gt_mcr.h index 506b0cbc8db3..77a8b11c287d 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_mcr.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_mcr.h @@ -31,4 +31,28 @@ void intel_gt_mcr_get_nonterminated_steering(struct intel_gt *gt, void intel_gt_mcr_report_steering(struct drm_printer *p, struct intel_gt *gt, bool dump_table); +void intel_gt_mcr_get_ss_steering(struct intel_gt *gt, unsigned int dss, + unsigned int *group, unsigned int *instance); + +/* + * Helper for for_each_ss_steering loop. On pre-Xe_HP platforms, subslice + * presence is determined by using the group/instance as direct lookups in the + * slice/subslice topology. On Xe_HP and beyond, the steering is unrelated to + * the topology, so we lookup the DSS ID directly in "slice 0." + */ +#define _HAS_SS(ss_, gt_, group_, instance_) ( \ + GRAPHICS_VER_FULL(gt_->i915) >= IP_VER(12, 50) ? \ + intel_sseu_has_subslice(&(gt_)->info.sseu, 0, ss_) : \ + intel_sseu_has_subslice(&(gt_)->info.sseu, group_, instance_)) + +/* + * Loop over each subslice/DSS and determine the group and instance IDs that + * should be used to steer MCR accesses toward this DSS. + */ +#define for_each_ss_steering(ss_, gt_, group_, instance_) \ + for (ss_ = 0, intel_gt_mcr_get_ss_steering(gt_, 0, &group_, &instance_); \ + ss_ < I915_MAX_SS_FUSE_BITS; \ + ss_++, intel_gt_mcr_get_ss_steering(gt_, ss_, &group_, &instance_)) \ + for_each_if(_HAS_SS(ss_, gt_, group_, instance_)) + #endif /* __INTEL_GT_MCR__ */ diff --git a/drivers/gpu/drm/i915/gt/intel_gt_regs.h b/drivers/gpu/drm/i915/gt/intel_gt_regs.h index 37c1095d8603..60d6eb5f245b 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_regs.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_regs.h @@ -371,6 +371,9 @@ #define GEN9_WM_CHICKEN3 _MMIO(0x5588) #define GEN9_FACTOR_IN_CLR_VAL_HIZ (1 << 9) +#define CHICKEN_RASTER_1 _MMIO(0x6204) +#define DIS_SF_ROUND_NEAREST_EVEN REG_BIT(8) + #define VFLSKPD _MMIO(0x62a8) #define DIS_OVER_FETCH_CACHE REG_BIT(1) #define DIS_MULT_MISS_RD_SQUASH REG_BIT(0) @@ -918,6 +921,10 @@ #define GEN7_L3CNTLREG1 _MMIO(0xb01c) #define GEN7_WA_FOR_GEN7_L3_CONTROL 0x3C47FF8C #define GEN7_L3AGDIS (1 << 19) + +#define XEHPC_LNCFMISCCFGREG0 _MMIO(0xb01c) +#define XEHPC_OVRLSCCC REG_BIT(0) + #define GEN7_L3CNTLREG2 _MMIO(0xb020) /* MOCS (Memory Object Control State) registers */ diff --git a/drivers/gpu/drm/i915/gt/intel_region_lmem.c b/drivers/gpu/drm/i915/gt/intel_region_lmem.c index d09b996a9759..6e90032e12e9 100644 --- a/drivers/gpu/drm/i915/gt/intel_region_lmem.c +++ b/drivers/gpu/drm/i915/gt/intel_region_lmem.c @@ -15,6 +15,103 @@ #include "gt/intel_gt_mcr.h" #include "gt/intel_gt_regs.h" +static void _release_bars(struct pci_dev *pdev) +{ + int resno; + + for (resno = PCI_STD_RESOURCES; resno < PCI_STD_RESOURCE_END; resno++) { + if (pci_resource_len(pdev, resno)) + pci_release_resource(pdev, resno); + } +} + +static void +_resize_bar(struct drm_i915_private *i915, int resno, resource_size_t size) +{ + struct pci_dev *pdev = to_pci_dev(i915->drm.dev); + int bar_size = pci_rebar_bytes_to_size(size); + int ret; + + _release_bars(pdev); + + ret = pci_resize_resource(pdev, resno, bar_size); + if (ret) { + drm_info(&i915->drm, "Failed to resize BAR%d to %dM (%pe)\n", + resno, 1 << bar_size, ERR_PTR(ret)); + return; + } + + drm_info(&i915->drm, "BAR%d resized to %dM\n", resno, 1 << bar_size); +} + +#define LMEM_BAR_NUM 2 +static void i915_resize_lmem_bar(struct drm_i915_private *i915, resource_size_t lmem_size) +{ + struct pci_dev *pdev = to_pci_dev(i915->drm.dev); + struct pci_bus *root = pdev->bus; + struct resource *root_res; + resource_size_t rebar_size; + resource_size_t current_size; + u32 pci_cmd; + int i; + + current_size = roundup_pow_of_two(pci_resource_len(pdev, LMEM_BAR_NUM)); + + if (i915->params.lmem_bar_size) { + u32 bar_sizes; + + rebar_size = i915->params.lmem_bar_size * + (resource_size_t)SZ_1M; + bar_sizes = pci_rebar_get_possible_sizes(pdev, + LMEM_BAR_NUM); + + if (rebar_size == current_size) + return; + + if (!(bar_sizes & BIT(pci_rebar_bytes_to_size(rebar_size))) || + rebar_size >= roundup_pow_of_two(lmem_size)) { + rebar_size = lmem_size; + + drm_info(&i915->drm, + "Given bar size is not within supported size, setting it to default: %llu\n", + (u64)lmem_size >> 20); + } + } else { + rebar_size = current_size; + + if (rebar_size != roundup_pow_of_two(lmem_size)) + rebar_size = lmem_size; + else + return; + } + + /* Find out if root bus contains 64bit memory addressing */ + while (root->parent) + root = root->parent; + + pci_bus_for_each_resource(root, root_res, i) { + if (root_res && root_res->flags & (IORESOURCE_MEM | IORESOURCE_MEM_64) && + root_res->start > 0x100000000ull) + break; + } + + /* pci_resize_resource will fail anyways */ + if (!root_res) { + drm_info(&i915->drm, "Can't resize LMEM BAR - platform support is missing\n"); + return; + } + + /* First disable PCI memory decoding references */ + pci_read_config_dword(pdev, PCI_COMMAND, &pci_cmd); + pci_write_config_dword(pdev, PCI_COMMAND, + pci_cmd & ~PCI_COMMAND_MEMORY); + + _resize_bar(i915, LMEM_BAR_NUM, rebar_size); + + pci_assign_unassigned_bus_resources(pdev->bus); + pci_write_config_dword(pdev, PCI_COMMAND, pci_cmd); +} + static int region_lmem_release(struct intel_memory_region *mem) { @@ -112,12 +209,6 @@ static struct intel_memory_region *setup_lmem(struct intel_gt *gt) flat_ccs_base = intel_gt_mcr_read_any(gt, XEHP_FLAT_CCS_BASE_ADDR); flat_ccs_base = (flat_ccs_base >> XEHP_CCS_BASE_SHIFT) * SZ_64K; - /* FIXME: Remove this when we have small-bar enabled */ - if (pci_resource_len(pdev, 2) < lmem_size) { - drm_err(&i915->drm, "System requires small-BAR support, which is currently unsupported on this kernel\n"); - return ERR_PTR(-EINVAL); - } - if (GEM_WARN_ON(lmem_size < flat_ccs_base)) return ERR_PTR(-EIO); @@ -134,6 +225,8 @@ static struct intel_memory_region *setup_lmem(struct intel_gt *gt) lmem_size = intel_uncore_read64(&i915->uncore, GEN12_GSMBASE); } + i915_resize_lmem_bar(i915, lmem_size); + if (i915->params.lmem_size > 0) { lmem_size = min_t(resource_size_t, lmem_size, mul_u32_u32(i915->params.lmem_size, SZ_1M)); @@ -170,6 +263,10 @@ static struct intel_memory_region *setup_lmem(struct intel_gt *gt) drm_info(&i915->drm, "Local memory available: %pa\n", &lmem_size); + if (io_size < lmem_size) + drm_info(&i915->drm, "Using a reduced BAR size of %lluMiB. Consider enabling 'Resizable BAR' or similar, if available in the BIOS.\n", + (u64)io_size >> 20); + return mem; err_region_put: diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c index a5338c3fde7a..c68d36fb5bbd 100644 --- a/drivers/gpu/drm/i915/gt/intel_reset.c +++ b/drivers/gpu/drm/i915/gt/intel_reset.c @@ -300,9 +300,9 @@ static int gen6_hw_domain_reset(struct intel_gt *gt, u32 hw_domain_mask) return err; } -static int gen6_reset_engines(struct intel_gt *gt, - intel_engine_mask_t engine_mask, - unsigned int retry) +static int __gen6_reset_engines(struct intel_gt *gt, + intel_engine_mask_t engine_mask, + unsigned int retry) { struct intel_engine_cs *engine; u32 hw_mask; @@ -321,6 +321,20 @@ static int gen6_reset_engines(struct intel_gt *gt, return gen6_hw_domain_reset(gt, hw_mask); } +static int gen6_reset_engines(struct intel_gt *gt, + intel_engine_mask_t engine_mask, + unsigned int retry) +{ + unsigned long flags; + int ret; + + spin_lock_irqsave(>->uncore->lock, flags); + ret = __gen6_reset_engines(gt, engine_mask, retry); + spin_unlock_irqrestore(>->uncore->lock, flags); + + return ret; +} + static struct intel_engine_cs *find_sfc_paired_vecs_engine(struct intel_engine_cs *engine) { int vecs_id; @@ -487,9 +501,9 @@ static void gen11_unlock_sfc(struct intel_engine_cs *engine) rmw_clear_fw(uncore, sfc_lock.lock_reg, sfc_lock.lock_bit); } -static int gen11_reset_engines(struct intel_gt *gt, - intel_engine_mask_t engine_mask, - unsigned int retry) +static int __gen11_reset_engines(struct intel_gt *gt, + intel_engine_mask_t engine_mask, + unsigned int retry) { struct intel_engine_cs *engine; intel_engine_mask_t tmp; @@ -583,8 +597,11 @@ static int gen8_reset_engines(struct intel_gt *gt, struct intel_engine_cs *engine; const bool reset_non_ready = retry >= 1; intel_engine_mask_t tmp; + unsigned long flags; int ret; + spin_lock_irqsave(>->uncore->lock, flags); + for_each_engine_masked(engine, gt, engine_mask, tmp) { ret = gen8_engine_reset_prepare(engine); if (ret && !reset_non_ready) @@ -612,17 +629,19 @@ static int gen8_reset_engines(struct intel_gt *gt, * This is best effort, so ignore any error from the initial reset. */ if (IS_DG2(gt->i915) && engine_mask == ALL_ENGINES) - gen11_reset_engines(gt, gt->info.engine_mask, 0); + __gen11_reset_engines(gt, gt->info.engine_mask, 0); if (GRAPHICS_VER(gt->i915) >= 11) - ret = gen11_reset_engines(gt, engine_mask, retry); + ret = __gen11_reset_engines(gt, engine_mask, retry); else - ret = gen6_reset_engines(gt, engine_mask, retry); + ret = __gen6_reset_engines(gt, engine_mask, retry); skip_reset: for_each_engine_masked(engine, gt, engine_mask, tmp) gen8_engine_reset_cancel(engine); + spin_unlock_irqrestore(>->uncore->lock, flags); + return ret; } diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c index 3213c593a55f..e8111fce56d0 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c @@ -689,6 +689,9 @@ static void dg2_ctx_workarounds_init(struct intel_engine_cs *engine, if (IS_DG2_GRAPHICS_STEP(engine->i915, G10, STEP_B0, STEP_FOREVER) || IS_DG2_G11(engine->i915) || IS_DG2_G12(engine->i915)) wa_masked_field_set(wal, VF_PREEMPTION, PREEMPTION_VERTEX_COUNT, 0x4000); + + /* Wa_15010599737:dg2 */ + wa_masked_en(wal, CHICKEN_RASTER_1, DIS_SF_ROUND_NEAREST_EVEN); } static void fakewa_disable_nestedbb_mode(struct intel_engine_cs *engine, @@ -2687,6 +2690,9 @@ general_render_compute_wa_init(struct intel_engine_cs *engine, struct i915_wa_li * performance guide section. */ wa_write(wal, XEHPC_L3SCRUB, SCRUB_CL_DWNGRADE_SHARED | SCRUB_RATE_4B_PER_CLK); + + /* Wa_16016694945 */ + wa_masked_en(wal, XEHPC_LNCFMISCCFGREG0, XEHPC_OVRLSCCC); } if (IS_XEHPSDV(i915)) { diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c index 8b2c11dbe354..1109088fe8f6 100644 --- a/drivers/gpu/drm/i915/gt/selftest_lrc.c +++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c @@ -176,8 +176,8 @@ static int live_lrc_layout(void *arg) continue; hw = shmem_pin_map(engine->default_state); - if (IS_ERR(hw)) { - err = PTR_ERR(hw); + if (!hw) { + err = -ENOMEM; break; } hw += LRC_STATE_OFFSET / sizeof(*hw); @@ -365,8 +365,8 @@ static int live_lrc_fixed(void *arg) continue; hw = shmem_pin_map(engine->default_state); - if (IS_ERR(hw)) { - err = PTR_ERR(hw); + if (!hw) { + err = -ENOMEM; break; } hw += LRC_STATE_OFFSET / sizeof(*hw); diff --git a/drivers/gpu/drm/i915/gt/selftest_slpc.c b/drivers/gpu/drm/i915/gt/selftest_slpc.c index b768cea5943d..ac29691e0b1a 100644 --- a/drivers/gpu/drm/i915/gt/selftest_slpc.c +++ b/drivers/gpu/drm/i915/gt/selftest_slpc.c @@ -8,6 +8,11 @@ #define delay_for_h2g() usleep_range(H2G_DELAY, H2G_DELAY + 10000) #define FREQUENCY_REQ_UNIT DIV_ROUND_CLOSEST(GT_FREQUENCY_MULTIPLIER, \ GEN9_FREQ_SCALER) +enum test_type { + VARY_MIN, + VARY_MAX, + MAX_GRANTED +}; static int slpc_set_min_freq(struct intel_guc_slpc *slpc, u32 freq) { @@ -36,147 +41,114 @@ static int slpc_set_max_freq(struct intel_guc_slpc *slpc, u32 freq) return ret; } -static int live_slpc_clamp_min(void *arg) +static int vary_max_freq(struct intel_guc_slpc *slpc, struct intel_rps *rps, + u32 *max_act_freq) { - struct drm_i915_private *i915 = arg; - struct intel_gt *gt = to_gt(i915); - struct intel_guc_slpc *slpc = >->uc.guc.slpc; - struct intel_rps *rps = >->rps; - struct intel_engine_cs *engine; - enum intel_engine_id id; - struct igt_spinner spin; - u32 slpc_min_freq, slpc_max_freq; + u32 step, max_freq, req_freq; + u32 act_freq; int err = 0; - if (!intel_uc_uses_guc_slpc(>->uc)) - return 0; + /* Go from max to min in 5 steps */ + step = (slpc->rp0_freq - slpc->min_freq) / NUM_STEPS; + *max_act_freq = slpc->min_freq; + for (max_freq = slpc->rp0_freq; max_freq > slpc->min_freq; + max_freq -= step) { + err = slpc_set_max_freq(slpc, max_freq); + if (err) + break; - if (igt_spinner_init(&spin, gt)) - return -ENOMEM; + req_freq = intel_rps_read_punit_req_frequency(rps); - if (intel_guc_slpc_get_max_freq(slpc, &slpc_max_freq)) { - pr_err("Could not get SLPC max freq\n"); - return -EIO; - } + /* GuC requests freq in multiples of 50/3 MHz */ + if (req_freq > (max_freq + FREQUENCY_REQ_UNIT)) { + pr_err("SWReq is %d, should be at most %d\n", req_freq, + max_freq + FREQUENCY_REQ_UNIT); + err = -EINVAL; + } - if (intel_guc_slpc_get_min_freq(slpc, &slpc_min_freq)) { - pr_err("Could not get SLPC min freq\n"); - return -EIO; - } + act_freq = intel_rps_read_actual_frequency(rps); + if (act_freq > *max_act_freq) + *max_act_freq = act_freq; - if (slpc_min_freq == slpc_max_freq) { - pr_err("Min/Max are fused to the same value\n"); - return -EINVAL; + if (err) + break; } - intel_gt_pm_wait_for_idle(gt); - intel_gt_pm_get(gt); - for_each_engine(engine, gt, id) { - struct i915_request *rq; - u32 step, min_freq, req_freq; - u32 act_freq, max_act_freq; + return err; +} - if (!intel_engine_can_store_dword(engine)) - continue; +static int vary_min_freq(struct intel_guc_slpc *slpc, struct intel_rps *rps, + u32 *max_act_freq) +{ + u32 step, min_freq, req_freq; + u32 act_freq; + int err = 0; - /* Go from min to max in 5 steps */ - step = (slpc_max_freq - slpc_min_freq) / NUM_STEPS; - max_act_freq = slpc_min_freq; - for (min_freq = slpc_min_freq; min_freq < slpc_max_freq; - min_freq += step) { - err = slpc_set_min_freq(slpc, min_freq); - if (err) - break; - - st_engine_heartbeat_disable(engine); - - rq = igt_spinner_create_request(&spin, - engine->kernel_context, - MI_NOOP); - if (IS_ERR(rq)) { - err = PTR_ERR(rq); - st_engine_heartbeat_enable(engine); - break; - } + /* Go from min to max in 5 steps */ + step = (slpc->rp0_freq - slpc->min_freq) / NUM_STEPS; + *max_act_freq = slpc->min_freq; + for (min_freq = slpc->min_freq; min_freq < slpc->rp0_freq; + min_freq += step) { + err = slpc_set_min_freq(slpc, min_freq); + if (err) + break; - i915_request_add(rq); + req_freq = intel_rps_read_punit_req_frequency(rps); - if (!igt_wait_for_spinner(&spin, rq)) { - pr_err("%s: Spinner did not start\n", - engine->name); - igt_spinner_end(&spin); - st_engine_heartbeat_enable(engine); - intel_gt_set_wedged(engine->gt); - err = -EIO; - break; - } + /* GuC requests freq in multiples of 50/3 MHz */ + if (req_freq < (min_freq - FREQUENCY_REQ_UNIT)) { + pr_err("SWReq is %d, should be at least %d\n", req_freq, + min_freq - FREQUENCY_REQ_UNIT); + err = -EINVAL; + } - /* Wait for GuC to detect business and raise - * requested frequency if necessary. - */ - delay_for_h2g(); + act_freq = intel_rps_read_actual_frequency(rps); + if (act_freq > *max_act_freq) + *max_act_freq = act_freq; - req_freq = intel_rps_read_punit_req_frequency(rps); + if (err) + break; + } - /* GuC requests freq in multiples of 50/3 MHz */ - if (req_freq < (min_freq - FREQUENCY_REQ_UNIT)) { - pr_err("SWReq is %d, should be at least %d\n", req_freq, - min_freq - FREQUENCY_REQ_UNIT); - igt_spinner_end(&spin); - st_engine_heartbeat_enable(engine); - err = -EINVAL; - break; - } + return err; +} - act_freq = intel_rps_read_actual_frequency(rps); - if (act_freq > max_act_freq) - max_act_freq = act_freq; +static int max_granted_freq(struct intel_guc_slpc *slpc, struct intel_rps *rps, u32 *max_act_freq) +{ + struct intel_gt *gt = rps_to_gt(rps); + u32 perf_limit_reasons; + int err = 0; - igt_spinner_end(&spin); - st_engine_heartbeat_enable(engine); - } + err = slpc_set_min_freq(slpc, slpc->rp0_freq); + if (err) + return err; - pr_info("Max actual frequency for %s was %d\n", - engine->name, max_act_freq); + *max_act_freq = intel_rps_read_actual_frequency(rps); + if (*max_act_freq != slpc->rp0_freq) { + /* Check if there was some throttling by pcode */ + perf_limit_reasons = intel_uncore_read(gt->uncore, GT0_PERF_LIMIT_REASONS); - /* Actual frequency should rise above min */ - if (max_act_freq == slpc_min_freq) { - pr_err("Actual freq did not rise above min\n"); + /* If not, this is an error */ + if (!(perf_limit_reasons & GT0_PERF_LIMIT_REASONS_MASK)) { + pr_err("Pcode did not grant max freq\n"); err = -EINVAL; + } else { + pr_info("Pcode throttled frequency 0x%x\n", perf_limit_reasons); } - - if (err) - break; } - /* Restore min/max frequencies */ - slpc_set_max_freq(slpc, slpc_max_freq); - slpc_set_min_freq(slpc, slpc_min_freq); - - if (igt_flush_test(gt->i915)) - err = -EIO; - - intel_gt_pm_put(gt); - igt_spinner_fini(&spin); - intel_gt_pm_wait_for_idle(gt); - return err; } -static int live_slpc_clamp_max(void *arg) +static int run_test(struct intel_gt *gt, int test_type) { - struct drm_i915_private *i915 = arg; - struct intel_gt *gt = to_gt(i915); - struct intel_guc_slpc *slpc; - struct intel_rps *rps; + struct intel_guc_slpc *slpc = >->uc.guc.slpc; + struct intel_rps *rps = >->rps; struct intel_engine_cs *engine; enum intel_engine_id id; struct igt_spinner spin; - int err = 0; u32 slpc_min_freq, slpc_max_freq; - - slpc = >->uc.guc.slpc; - rps = >->rps; + int err = 0; if (!intel_uc_uses_guc_slpc(>->uc)) return 0; @@ -194,7 +166,7 @@ static int live_slpc_clamp_max(void *arg) return -EIO; } - if (slpc_min_freq == slpc_max_freq) { + if (slpc->min_freq == slpc->rp0_freq) { pr_err("Min/Max are fused to the same value\n"); return -EINVAL; } @@ -203,93 +175,82 @@ static int live_slpc_clamp_max(void *arg) intel_gt_pm_get(gt); for_each_engine(engine, gt, id) { struct i915_request *rq; - u32 max_freq, req_freq; - u32 act_freq, max_act_freq; - u32 step; + u32 max_act_freq; if (!intel_engine_can_store_dword(engine)) continue; - /* Go from max to min in 5 steps */ - step = (slpc_max_freq - slpc_min_freq) / NUM_STEPS; - max_act_freq = slpc_min_freq; - for (max_freq = slpc_max_freq; max_freq > slpc_min_freq; - max_freq -= step) { - err = slpc_set_max_freq(slpc, max_freq); - if (err) - break; - - st_engine_heartbeat_disable(engine); - - rq = igt_spinner_create_request(&spin, - engine->kernel_context, - MI_NOOP); - if (IS_ERR(rq)) { - st_engine_heartbeat_enable(engine); - err = PTR_ERR(rq); - break; - } + st_engine_heartbeat_disable(engine); - i915_request_add(rq); + rq = igt_spinner_create_request(&spin, + engine->kernel_context, + MI_NOOP); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + st_engine_heartbeat_enable(engine); + break; + } - if (!igt_wait_for_spinner(&spin, rq)) { - pr_err("%s: SLPC spinner did not start\n", - engine->name); - igt_spinner_end(&spin); - st_engine_heartbeat_enable(engine); - intel_gt_set_wedged(engine->gt); - err = -EIO; - break; - } + i915_request_add(rq); - delay_for_h2g(); + if (!igt_wait_for_spinner(&spin, rq)) { + pr_err("%s: Spinner did not start\n", + engine->name); + igt_spinner_end(&spin); + st_engine_heartbeat_enable(engine); + intel_gt_set_wedged(engine->gt); + err = -EIO; + break; + } - /* Verify that SWREQ indeed was set to specific value */ - req_freq = intel_rps_read_punit_req_frequency(rps); + switch (test_type) { + case VARY_MIN: + err = vary_min_freq(slpc, rps, &max_act_freq); + break; - /* GuC requests freq in multiples of 50/3 MHz */ - if (req_freq > (max_freq + FREQUENCY_REQ_UNIT)) { - pr_err("SWReq is %d, should be at most %d\n", req_freq, - max_freq + FREQUENCY_REQ_UNIT); + case VARY_MAX: + err = vary_max_freq(slpc, rps, &max_act_freq); + break; + + case MAX_GRANTED: + /* Media engines have a different RP0 */ + if (engine->class == VIDEO_DECODE_CLASS || + engine->class == VIDEO_ENHANCEMENT_CLASS) { igt_spinner_end(&spin); st_engine_heartbeat_enable(engine); - err = -EINVAL; - break; + err = 0; + continue; } - act_freq = intel_rps_read_actual_frequency(rps); - if (act_freq > max_act_freq) - max_act_freq = act_freq; - - st_engine_heartbeat_enable(engine); - igt_spinner_end(&spin); - - if (err) - break; + err = max_granted_freq(slpc, rps, &max_act_freq); + break; } pr_info("Max actual frequency for %s was %d\n", engine->name, max_act_freq); /* Actual frequency should rise above min */ - if (max_act_freq == slpc_min_freq) { + if (max_act_freq <= slpc_min_freq) { pr_err("Actual freq did not rise above min\n"); + pr_err("Perf Limit Reasons: 0x%x\n", + intel_uncore_read(gt->uncore, GT0_PERF_LIMIT_REASONS)); err = -EINVAL; } - if (igt_flush_test(gt->i915)) { - err = -EIO; - break; - } + igt_spinner_end(&spin); + st_engine_heartbeat_enable(engine); if (err) break; } - /* Restore min/max freq */ + /* Restore min/max frequencies */ slpc_set_max_freq(slpc, slpc_max_freq); slpc_set_min_freq(slpc, slpc_min_freq); + if (igt_flush_test(gt->i915)) + err = -EIO; + intel_gt_pm_put(gt); igt_spinner_fini(&spin); intel_gt_pm_wait_for_idle(gt); @@ -297,11 +258,37 @@ static int live_slpc_clamp_max(void *arg) return err; } +static int live_slpc_vary_min(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct intel_gt *gt = to_gt(i915); + + return run_test(gt, VARY_MIN); +} + +static int live_slpc_vary_max(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct intel_gt *gt = to_gt(i915); + + return run_test(gt, VARY_MAX); +} + +/* check if pcode can grant RP0 */ +static int live_slpc_max_granted(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct intel_gt *gt = to_gt(i915); + + return run_test(gt, MAX_GRANTED); +} + int intel_slpc_live_selftests(struct drm_i915_private *i915) { static const struct i915_subtest tests[] = { - SUBTEST(live_slpc_clamp_max), - SUBTEST(live_slpc_clamp_min), + SUBTEST(live_slpc_vary_max), + SUBTEST(live_slpc_vary_min), + SUBTEST(live_slpc_max_granted), }; if (intel_gt_is_wedged(to_gt(i915))) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c index 97a32e610c30..75257bd20ff0 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c @@ -9,6 +9,7 @@ #include "gt/intel_engine_regs.h" #include "gt/intel_gt.h" +#include "gt/intel_gt_mcr.h" #include "gt/intel_gt_regs.h" #include "gt/intel_lrc.h" #include "guc_capture_fwif.h" @@ -281,8 +282,7 @@ guc_capture_alloc_steered_lists_xe_lpd(struct intel_guc *guc, const struct __guc_mmio_reg_descr_group *lists) { struct intel_gt *gt = guc_to_gt(guc); - struct drm_i915_private *i915 = guc_to_gt(guc)->i915; - int slice, subslice, i, num_steer_regs, num_tot_regs = 0; + int slice, subslice, iter, i, num_steer_regs, num_tot_regs = 0; const struct __guc_mmio_reg_descr_group *list; struct __guc_mmio_reg_descr_group *extlists; struct __guc_mmio_reg_descr *extarray; @@ -298,7 +298,7 @@ guc_capture_alloc_steered_lists_xe_lpd(struct intel_guc *guc, num_steer_regs = ARRAY_SIZE(xe_extregs); sseu = >->info.sseu; - for_each_instdone_slice_subslice(i915, sseu, slice, subslice) + for_each_ss_steering(iter, gt, slice, subslice) num_tot_regs += num_steer_regs; if (!num_tot_regs) @@ -315,7 +315,7 @@ guc_capture_alloc_steered_lists_xe_lpd(struct intel_guc *guc, } extarray = extlists[0].extlist; - for_each_instdone_slice_subslice(i915, sseu, slice, subslice) { + for_each_ss_steering(iter, gt, slice, subslice) { for (i = 0; i < num_steer_regs; ++i) { __fill_ext_reg(extarray, &xe_extregs[i], slice, subslice); ++extarray; @@ -359,9 +359,8 @@ guc_capture_alloc_steered_lists_xe_hpg(struct intel_guc *guc, num_steer_regs += ARRAY_SIZE(xehpg_extregs); sseu = >->info.sseu; - for_each_instdone_gslice_dss_xehp(i915, sseu, iter, slice, subslice) { + for_each_ss_steering(iter, gt, slice, subslice) num_tot_regs += num_steer_regs; - } if (!num_tot_regs) return; @@ -377,7 +376,7 @@ guc_capture_alloc_steered_lists_xe_hpg(struct intel_guc *guc, } extarray = extlists[0].extlist; - for_each_instdone_gslice_dss_xehp(i915, sseu, iter, slice, subslice) { + for_each_ss_steering(iter, gt, slice, subslice) { for (i = 0; i < ARRAY_SIZE(xe_extregs); ++i) { __fill_ext_reg(extarray, &xe_extregs[i], slice, subslice); ++extarray; @@ -1261,7 +1260,8 @@ static int __guc_capture_flushlog_complete(struct intel_guc *guc) GUC_CAPTURE_LOG_BUFFER }; - return intel_guc_send(guc, action, ARRAY_SIZE(action)); + return intel_guc_send_nb(guc, action, ARRAY_SIZE(action), 0); + } static void __guc_capture_process_output(struct intel_guc *guc) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c index 02311ad90264..25b2d7ce6640 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c @@ -31,7 +31,7 @@ static int guc_action_flush_log_complete(struct intel_guc *guc) GUC_DEBUG_LOG_BUFFER }; - return intel_guc_send(guc, action, ARRAY_SIZE(action)); + return intel_guc_send_nb(guc, action, ARRAY_SIZE(action), 0); } static int guc_action_flush_log(struct intel_guc *guc) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c index c06e83872c34..27363091e1af 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c @@ -162,6 +162,15 @@ __uc_fw_auto_select(struct drm_i915_private *i915, struct intel_uc_fw *uc_fw) u8 rev = INTEL_REVID(i915); int i; + /* + * The only difference between the ADL GuC FWs is the HWConfig support. + * ADL-N does not support HWConfig, so we should use the same binary as + * ADL-S, otherwise the GuC might attempt to fetch a config table that + * does not exist. + */ + if (IS_ADLP_N(i915)) + p = INTEL_ALDERLAKE_S; + GEM_BUG_ON(uc_fw->type >= ARRAY_SIZE(blobs_all)); fw_blobs = blobs_all[uc_fw->type].blobs; fw_count = blobs_all[uc_fw->type].count; diff --git a/drivers/gpu/drm/i915/i915_active.c b/drivers/gpu/drm/i915/i915_active.c index ee2b3a375362..7412abf166a8 100644 --- a/drivers/gpu/drm/i915/i915_active.c +++ b/drivers/gpu/drm/i915/i915_active.c @@ -974,7 +974,7 @@ void i915_active_acquire_barrier(struct i915_active *ref) GEM_BUG_ON(!intel_engine_pm_is_awake(engine)); llist_add(barrier_to_ll(node), &engine->barrier_tasks); - intel_engine_pm_put_delay(engine, 1); + intel_engine_pm_put_delay(engine, 2); } } diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index c22f29c3faa0..d25647be25d1 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1005,7 +1005,12 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915, #define IS_XEHPSDV(dev_priv) IS_PLATFORM(dev_priv, INTEL_XEHPSDV) #define IS_DG2(dev_priv) IS_PLATFORM(dev_priv, INTEL_DG2) #define IS_PONTEVECCHIO(dev_priv) IS_PLATFORM(dev_priv, INTEL_PONTEVECCHIO) +#define IS_METEORLAKE(dev_priv) IS_PLATFORM(dev_priv, INTEL_METEORLAKE) +#define IS_METEORLAKE_M(dev_priv) \ + IS_SUBPLATFORM(dev_priv, INTEL_METEORLAKE, INTEL_SUBPLATFORM_M) +#define IS_METEORLAKE_P(dev_priv) \ + IS_SUBPLATFORM(dev_priv, INTEL_METEORLAKE, INTEL_SUBPLATFORM_P) #define IS_DG2_G10(dev_priv) \ IS_SUBPLATFORM(dev_priv, INTEL_DG2, INTEL_SUBPLATFORM_G10) #define IS_DG2_G11(dev_priv) \ diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index f9b1969ed7ed..32e92651ef7c 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -46,6 +46,7 @@ #include "gem/i915_gem_lmem.h" #include "gt/intel_engine_regs.h" #include "gt/intel_gt.h" +#include "gt/intel_gt_mcr.h" #include "gt/intel_gt_pm.h" #include "gt/intel_gt_regs.h" #include "gt/uc/intel_guc_capture.h" @@ -436,7 +437,6 @@ static void err_compression_marker(struct drm_i915_error_state_buf *m) static void error_print_instdone(struct drm_i915_error_state_buf *m, const struct intel_engine_coredump *ee) { - const struct sseu_dev_info *sseu = &ee->engine->gt->info.sseu; int slice; int subslice; int iter; @@ -453,33 +453,21 @@ static void error_print_instdone(struct drm_i915_error_state_buf *m, if (GRAPHICS_VER(m->i915) <= 6) return; - if (GRAPHICS_VER_FULL(m->i915) >= IP_VER(12, 50)) { - for_each_instdone_gslice_dss_xehp(m->i915, sseu, iter, slice, subslice) - err_printf(m, " SAMPLER_INSTDONE[%d][%d]: 0x%08x\n", - slice, subslice, - ee->instdone.sampler[slice][subslice]); - - for_each_instdone_gslice_dss_xehp(m->i915, sseu, iter, slice, subslice) - err_printf(m, " ROW_INSTDONE[%d][%d]: 0x%08x\n", - slice, subslice, - ee->instdone.row[slice][subslice]); - } else { - for_each_instdone_slice_subslice(m->i915, sseu, slice, subslice) - err_printf(m, " SAMPLER_INSTDONE[%d][%d]: 0x%08x\n", - slice, subslice, - ee->instdone.sampler[slice][subslice]); + for_each_ss_steering(iter, ee->engine->gt, slice, subslice) + err_printf(m, " SAMPLER_INSTDONE[%d][%d]: 0x%08x\n", + slice, subslice, + ee->instdone.sampler[slice][subslice]); - for_each_instdone_slice_subslice(m->i915, sseu, slice, subslice) - err_printf(m, " ROW_INSTDONE[%d][%d]: 0x%08x\n", - slice, subslice, - ee->instdone.row[slice][subslice]); - } + for_each_ss_steering(iter, ee->engine->gt, slice, subslice) + err_printf(m, " ROW_INSTDONE[%d][%d]: 0x%08x\n", + slice, subslice, + ee->instdone.row[slice][subslice]); if (GRAPHICS_VER(m->i915) < 12) return; if (GRAPHICS_VER_FULL(m->i915) >= IP_VER(12, 55)) { - for_each_instdone_gslice_dss_xehp(m->i915, sseu, iter, slice, subslice) + for_each_ss_steering(iter, ee->engine->gt, slice, subslice) err_printf(m, " GEOM_SVGUNIT_INSTDONE[%d][%d]: 0x%08x\n", slice, subslice, ee->instdone.geom_svg[slice][subslice]); @@ -1129,11 +1117,15 @@ i915_vma_coredump_create(const struct intel_gt *gt, dma_addr_t dma; for_each_sgt_daddr(dma, iter, vma_res->bi.pages) { + dma_addr_t offset = dma - mem->region.start; void __iomem *s; - s = io_mapping_map_wc(&mem->iomap, - dma - mem->region.start, - PAGE_SIZE); + if (offset + PAGE_SIZE > mem->io_size) { + ret = -EINVAL; + break; + } + + s = io_mapping_map_wc(&mem->iomap, offset, PAGE_SIZE); ret = compress_page(compress, (void __force *)s, dst, true); diff --git a/drivers/gpu/drm/i915/i915_params.c b/drivers/gpu/drm/i915/i915_params.c index 701fbc98afa0..6fc475a5db61 100644 --- a/drivers/gpu/drm/i915/i915_params.c +++ b/drivers/gpu/drm/i915/i915_params.c @@ -204,6 +204,8 @@ i915_param_named_unsafe(request_timeout_ms, uint, 0600, i915_param_named_unsafe(lmem_size, uint, 0400, "Set the lmem size(in MiB) for each region. (default: 0, all memory)"); +i915_param_named_unsafe(lmem_bar_size, uint, 0400, + "Set the lmem bar size(in MiB)."); static __always_inline void _print_param(struct drm_printer *p, const char *name, diff --git a/drivers/gpu/drm/i915/i915_params.h b/drivers/gpu/drm/i915/i915_params.h index b5e7ea45d191..2733cb6cfe09 100644 --- a/drivers/gpu/drm/i915/i915_params.h +++ b/drivers/gpu/drm/i915/i915_params.h @@ -74,6 +74,7 @@ struct drm_printer; param(char *, force_probe, CONFIG_DRM_I915_FORCE_PROBE, 0400) \ param(unsigned int, request_timeout_ms, CONFIG_DRM_I915_REQUEST_TIMEOUT, CONFIG_DRM_I915_REQUEST_TIMEOUT ? 0600 : 0) \ param(unsigned int, lmem_size, 0, 0400) \ + param(unsigned int, lmem_bar_size, 0, 0400) \ /* leave bools at the end to not create holes */ \ param(bool, enable_hangcheck, true, 0600) \ param(bool, load_detect_test, false, 0600) \ diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c index 5edc8fbf1dff..aacc10f2e73f 100644 --- a/drivers/gpu/drm/i915/i915_pci.c +++ b/drivers/gpu/drm/i915/i915_pci.c @@ -1075,7 +1075,6 @@ static const struct intel_device_info dg2_info = { .require_force_probe = 1, }; -__maybe_unused static const struct intel_device_info ats_m_info = { DG2_FEATURES, .display = { 0 }, @@ -1108,6 +1107,31 @@ static const struct intel_device_info pvc_info = { .require_force_probe = 1, }; +#define XE_LPDP_FEATURES \ + XE_LPD_FEATURES, \ + .display.ver = 14, \ + .display.has_cdclk_crawl = 1 + +__maybe_unused +static const struct intel_device_info mtl_info = { + XE_HP_FEATURES, + XE_LPDP_FEATURES, + /* + * Real graphics IP version will be obtained from hardware GMD_ID + * register. Value provided here is just for sanity checking. + */ + .graphics.ver = 12, + .graphics.rel = 70, + .media.ver = 13, + PLATFORM(INTEL_METEORLAKE), + .display.has_modular_fia = 1, + .has_flat_ccs = 0, + .has_snoop = 1, + .memory_regions = REGION_SMEM | REGION_STOLEN_LMEM, + .platform_engine_mask = BIT(RCS0) | BIT(BCS0) | BIT(CCS0), + .require_force_probe = 1, +}; + #undef PLATFORM /* @@ -1189,6 +1213,8 @@ static const struct pci_device_id pciidlist[] = { INTEL_RPLS_IDS(&adl_s_info), INTEL_RPLP_IDS(&adl_p_info), INTEL_DG2_IDS(&dg2_info), + INTEL_ATS_M_IDS(&ats_m_info), + INTEL_MTL_IDS(&mtl_info), {0, 0, 0} }; MODULE_DEVICE_TABLE(pci, pciidlist); diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index 1577ab6754db..f3c23fe9ad9c 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -885,8 +885,9 @@ static int gen8_oa_read(struct i915_perf_stream *stream, if (ret) return ret; - DRM_DEBUG("OA buffer overflow (exponent = %d): force restart\n", - stream->period_exponent); + drm_dbg(&stream->perf->i915->drm, + "OA buffer overflow (exponent = %d): force restart\n", + stream->period_exponent); stream->perf->ops.oa_disable(stream); stream->perf->ops.oa_enable(stream); @@ -1108,8 +1109,9 @@ static int gen7_oa_read(struct i915_perf_stream *stream, if (ret) return ret; - DRM_DEBUG("OA buffer overflow (exponent = %d): force restart\n", - stream->period_exponent); + drm_dbg(&stream->perf->i915->drm, + "OA buffer overflow (exponent = %d): force restart\n", + stream->period_exponent); stream->perf->ops.oa_disable(stream); stream->perf->ops.oa_enable(stream); @@ -2863,7 +2865,8 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream, int ret; if (!props->engine) { - DRM_DEBUG("OA engine not specified\n"); + drm_dbg(&stream->perf->i915->drm, + "OA engine not specified\n"); return -EINVAL; } @@ -2873,18 +2876,21 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream, * IDs */ if (!perf->metrics_kobj) { - DRM_DEBUG("OA metrics weren't advertised via sysfs\n"); + drm_dbg(&stream->perf->i915->drm, + "OA metrics weren't advertised via sysfs\n"); return -EINVAL; } if (!(props->sample_flags & SAMPLE_OA_REPORT) && (GRAPHICS_VER(perf->i915) < 12 || !stream->ctx)) { - DRM_DEBUG("Only OA report sampling supported\n"); + drm_dbg(&stream->perf->i915->drm, + "Only OA report sampling supported\n"); return -EINVAL; } if (!perf->ops.enable_metric_set) { - DRM_DEBUG("OA unit not supported\n"); + drm_dbg(&stream->perf->i915->drm, + "OA unit not supported\n"); return -ENODEV; } @@ -2894,12 +2900,14 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream, * we currently only allow exclusive access */ if (perf->exclusive_stream) { - DRM_DEBUG("OA unit already in use\n"); + drm_dbg(&stream->perf->i915->drm, + "OA unit already in use\n"); return -EBUSY; } if (!props->oa_format) { - DRM_DEBUG("OA report format not specified\n"); + drm_dbg(&stream->perf->i915->drm, + "OA report format not specified\n"); return -EINVAL; } @@ -2929,20 +2937,23 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream, if (stream->ctx) { ret = oa_get_render_ctx_id(stream); if (ret) { - DRM_DEBUG("Invalid context id to filter with\n"); + drm_dbg(&stream->perf->i915->drm, + "Invalid context id to filter with\n"); return ret; } } ret = alloc_noa_wait(stream); if (ret) { - DRM_DEBUG("Unable to allocate NOA wait batch buffer\n"); + drm_dbg(&stream->perf->i915->drm, + "Unable to allocate NOA wait batch buffer\n"); goto err_noa_wait_alloc; } stream->oa_config = i915_perf_get_oa_config(perf, props->metrics_set); if (!stream->oa_config) { - DRM_DEBUG("Invalid OA config id=%i\n", props->metrics_set); + drm_dbg(&stream->perf->i915->drm, + "Invalid OA config id=%i\n", props->metrics_set); ret = -EINVAL; goto err_config; } @@ -2973,11 +2984,13 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream, ret = i915_perf_stream_enable_sync(stream); if (ret) { - DRM_DEBUG("Unable to enable metric set\n"); + drm_dbg(&stream->perf->i915->drm, + "Unable to enable metric set\n"); goto err_enable; } - DRM_DEBUG("opening stream oa config uuid=%s\n", + drm_dbg(&stream->perf->i915->drm, + "opening stream oa config uuid=%s\n", stream->oa_config->uuid); hrtimer_init(&stream->poll_check_timer, @@ -3429,7 +3442,8 @@ i915_perf_open_ioctl_locked(struct i915_perf *perf, specific_ctx = i915_gem_context_lookup(file_priv, ctx_handle); if (IS_ERR(specific_ctx)) { - DRM_DEBUG("Failed to look up context with ID %u for opening perf stream\n", + drm_dbg(&perf->i915->drm, + "Failed to look up context with ID %u for opening perf stream\n", ctx_handle); ret = PTR_ERR(specific_ctx); goto err; @@ -3463,7 +3477,8 @@ i915_perf_open_ioctl_locked(struct i915_perf *perf, if (props->hold_preemption) { if (!props->single_context) { - DRM_DEBUG("preemption disable with no context\n"); + drm_dbg(&perf->i915->drm, + "preemption disable with no context\n"); ret = -EINVAL; goto err; } @@ -3485,7 +3500,8 @@ i915_perf_open_ioctl_locked(struct i915_perf *perf, */ if (privileged_op && i915_perf_stream_paranoid && !perfmon_capable()) { - DRM_DEBUG("Insufficient privileges to open i915 perf stream\n"); + drm_dbg(&perf->i915->drm, + "Insufficient privileges to open i915 perf stream\n"); ret = -EACCES; goto err_ctx; } @@ -3592,7 +3608,8 @@ static int read_properties_unlocked(struct i915_perf *perf, props->poll_oa_period = DEFAULT_POLL_PERIOD_NS; if (!n_props) { - DRM_DEBUG("No i915 perf properties given\n"); + drm_dbg(&perf->i915->drm, + "No i915 perf properties given\n"); return -EINVAL; } @@ -3601,7 +3618,8 @@ static int read_properties_unlocked(struct i915_perf *perf, I915_ENGINE_CLASS_RENDER, 0); if (!props->engine) { - DRM_DEBUG("No RENDER-capable engines\n"); + drm_dbg(&perf->i915->drm, + "No RENDER-capable engines\n"); return -EINVAL; } @@ -3612,7 +3630,8 @@ static int read_properties_unlocked(struct i915_perf *perf, * from userspace. */ if (n_props >= DRM_I915_PERF_PROP_MAX) { - DRM_DEBUG("More i915 perf properties specified than exist\n"); + drm_dbg(&perf->i915->drm, + "More i915 perf properties specified than exist\n"); return -EINVAL; } @@ -3629,7 +3648,8 @@ static int read_properties_unlocked(struct i915_perf *perf, return ret; if (id == 0 || id >= DRM_I915_PERF_PROP_MAX) { - DRM_DEBUG("Unknown i915 perf property ID\n"); + drm_dbg(&perf->i915->drm, + "Unknown i915 perf property ID\n"); return -EINVAL; } @@ -3644,19 +3664,22 @@ static int read_properties_unlocked(struct i915_perf *perf, break; case DRM_I915_PERF_PROP_OA_METRICS_SET: if (value == 0) { - DRM_DEBUG("Unknown OA metric set ID\n"); + drm_dbg(&perf->i915->drm, + "Unknown OA metric set ID\n"); return -EINVAL; } props->metrics_set = value; break; case DRM_I915_PERF_PROP_OA_FORMAT: if (value == 0 || value >= I915_OA_FORMAT_MAX) { - DRM_DEBUG("Out-of-range OA report format %llu\n", + drm_dbg(&perf->i915->drm, + "Out-of-range OA report format %llu\n", value); return -EINVAL; } if (!oa_format_valid(perf, value)) { - DRM_DEBUG("Unsupported OA report format %llu\n", + drm_dbg(&perf->i915->drm, + "Unsupported OA report format %llu\n", value); return -EINVAL; } @@ -3664,7 +3687,8 @@ static int read_properties_unlocked(struct i915_perf *perf, break; case DRM_I915_PERF_PROP_OA_EXPONENT: if (value > OA_EXPONENT_MAX) { - DRM_DEBUG("OA timer exponent too high (> %u)\n", + drm_dbg(&perf->i915->drm, + "OA timer exponent too high (> %u)\n", OA_EXPONENT_MAX); return -EINVAL; } @@ -3692,7 +3716,8 @@ static int read_properties_unlocked(struct i915_perf *perf, oa_freq_hz = 0; if (oa_freq_hz > i915_oa_max_sample_rate && !perfmon_capable()) { - DRM_DEBUG("OA exponent would exceed the max sampling frequency (sysctl dev.i915.oa_max_sample_rate) %uHz without CAP_PERFMON or CAP_SYS_ADMIN privileges\n", + drm_dbg(&perf->i915->drm, + "OA exponent would exceed the max sampling frequency (sysctl dev.i915.oa_max_sample_rate) %uHz without CAP_PERFMON or CAP_SYS_ADMIN privileges\n", i915_oa_max_sample_rate); return -EACCES; } @@ -3706,16 +3731,25 @@ static int read_properties_unlocked(struct i915_perf *perf, case DRM_I915_PERF_PROP_GLOBAL_SSEU: { struct drm_i915_gem_context_param_sseu user_sseu; + if (GRAPHICS_VER_FULL(perf->i915) >= IP_VER(12, 50)) { + drm_dbg(&perf->i915->drm, + "SSEU config not supported on gfx %x\n", + GRAPHICS_VER_FULL(perf->i915)); + return -ENODEV; + } + if (copy_from_user(&user_sseu, u64_to_user_ptr(value), sizeof(user_sseu))) { - DRM_DEBUG("Unable to copy global sseu parameter\n"); + drm_dbg(&perf->i915->drm, + "Unable to copy global sseu parameter\n"); return -EFAULT; } ret = get_sseu_config(&props->sseu, props->engine, &user_sseu); if (ret) { - DRM_DEBUG("Invalid SSEU configuration\n"); + drm_dbg(&perf->i915->drm, + "Invalid SSEU configuration\n"); return ret; } props->has_sseu = true; @@ -3723,7 +3757,8 @@ static int read_properties_unlocked(struct i915_perf *perf, } case DRM_I915_PERF_PROP_POLL_OA_PERIOD: if (value < 100000 /* 100us */) { - DRM_DEBUG("OA availability timer too small (%lluns < 100us)\n", + drm_dbg(&perf->i915->drm, + "OA availability timer too small (%lluns < 100us)\n", value); return -EINVAL; } @@ -3774,7 +3809,8 @@ int i915_perf_open_ioctl(struct drm_device *dev, void *data, int ret; if (!perf->i915) { - DRM_DEBUG("i915 perf interface not available for this system\n"); + drm_dbg(&perf->i915->drm, + "i915 perf interface not available for this system\n"); return -ENOTSUPP; } @@ -3782,7 +3818,8 @@ int i915_perf_open_ioctl(struct drm_device *dev, void *data, I915_PERF_FLAG_FD_NONBLOCK | I915_PERF_FLAG_DISABLED; if (param->flags & ~known_open_flags) { - DRM_DEBUG("Unknown drm_i915_perf_open_param flag\n"); + drm_dbg(&perf->i915->drm, + "Unknown drm_i915_perf_open_param flag\n"); return -EINVAL; } @@ -4028,7 +4065,8 @@ static struct i915_oa_reg *alloc_oa_regs(struct i915_perf *perf, goto addr_err; if (!is_valid(perf, addr)) { - DRM_DEBUG("Invalid oa_reg address: %X\n", addr); + drm_dbg(&perf->i915->drm, + "Invalid oa_reg address: %X\n", addr); err = -EINVAL; goto addr_err; } @@ -4102,30 +4140,35 @@ int i915_perf_add_config_ioctl(struct drm_device *dev, void *data, int err, id; if (!perf->i915) { - DRM_DEBUG("i915 perf interface not available for this system\n"); + drm_dbg(&perf->i915->drm, + "i915 perf interface not available for this system\n"); return -ENOTSUPP; } if (!perf->metrics_kobj) { - DRM_DEBUG("OA metrics weren't advertised via sysfs\n"); + drm_dbg(&perf->i915->drm, + "OA metrics weren't advertised via sysfs\n"); return -EINVAL; } if (i915_perf_stream_paranoid && !perfmon_capable()) { - DRM_DEBUG("Insufficient privileges to add i915 OA config\n"); + drm_dbg(&perf->i915->drm, + "Insufficient privileges to add i915 OA config\n"); return -EACCES; } if ((!args->mux_regs_ptr || !args->n_mux_regs) && (!args->boolean_regs_ptr || !args->n_boolean_regs) && (!args->flex_regs_ptr || !args->n_flex_regs)) { - DRM_DEBUG("No OA registers given\n"); + drm_dbg(&perf->i915->drm, + "No OA registers given\n"); return -EINVAL; } oa_config = kzalloc(sizeof(*oa_config), GFP_KERNEL); if (!oa_config) { - DRM_DEBUG("Failed to allocate memory for the OA config\n"); + drm_dbg(&perf->i915->drm, + "Failed to allocate memory for the OA config\n"); return -ENOMEM; } @@ -4133,7 +4176,8 @@ int i915_perf_add_config_ioctl(struct drm_device *dev, void *data, kref_init(&oa_config->ref); if (!uuid_is_valid(args->uuid)) { - DRM_DEBUG("Invalid uuid format for OA config\n"); + drm_dbg(&perf->i915->drm, + "Invalid uuid format for OA config\n"); err = -EINVAL; goto reg_err; } @@ -4150,7 +4194,8 @@ int i915_perf_add_config_ioctl(struct drm_device *dev, void *data, args->n_mux_regs); if (IS_ERR(regs)) { - DRM_DEBUG("Failed to create OA config for mux_regs\n"); + drm_dbg(&perf->i915->drm, + "Failed to create OA config for mux_regs\n"); err = PTR_ERR(regs); goto reg_err; } @@ -4163,7 +4208,8 @@ int i915_perf_add_config_ioctl(struct drm_device *dev, void *data, args->n_boolean_regs); if (IS_ERR(regs)) { - DRM_DEBUG("Failed to create OA config for b_counter_regs\n"); + drm_dbg(&perf->i915->drm, + "Failed to create OA config for b_counter_regs\n"); err = PTR_ERR(regs); goto reg_err; } @@ -4182,7 +4228,8 @@ int i915_perf_add_config_ioctl(struct drm_device *dev, void *data, args->n_flex_regs); if (IS_ERR(regs)) { - DRM_DEBUG("Failed to create OA config for flex_regs\n"); + drm_dbg(&perf->i915->drm, + "Failed to create OA config for flex_regs\n"); err = PTR_ERR(regs); goto reg_err; } @@ -4198,7 +4245,8 @@ int i915_perf_add_config_ioctl(struct drm_device *dev, void *data, */ idr_for_each_entry(&perf->metrics_idr, tmp, id) { if (!strcmp(tmp->uuid, oa_config->uuid)) { - DRM_DEBUG("OA config already exists with this uuid\n"); + drm_dbg(&perf->i915->drm, + "OA config already exists with this uuid\n"); err = -EADDRINUSE; goto sysfs_err; } @@ -4206,7 +4254,8 @@ int i915_perf_add_config_ioctl(struct drm_device *dev, void *data, err = create_dynamic_oa_sysfs_entry(perf, oa_config); if (err) { - DRM_DEBUG("Failed to create sysfs entry for OA config\n"); + drm_dbg(&perf->i915->drm, + "Failed to create sysfs entry for OA config\n"); goto sysfs_err; } @@ -4215,14 +4264,16 @@ int i915_perf_add_config_ioctl(struct drm_device *dev, void *data, oa_config, 2, 0, GFP_KERNEL); if (oa_config->id < 0) { - DRM_DEBUG("Failed to create sysfs entry for OA config\n"); + drm_dbg(&perf->i915->drm, + "Failed to create sysfs entry for OA config\n"); err = oa_config->id; goto sysfs_err; } mutex_unlock(&perf->metrics_lock); - DRM_DEBUG("Added config %s id=%i\n", oa_config->uuid, oa_config->id); + drm_dbg(&perf->i915->drm, + "Added config %s id=%i\n", oa_config->uuid, oa_config->id); return oa_config->id; @@ -4230,7 +4281,8 @@ sysfs_err: mutex_unlock(&perf->metrics_lock); reg_err: i915_oa_config_put(oa_config); - DRM_DEBUG("Failed to add new OA config\n"); + drm_dbg(&perf->i915->drm, + "Failed to add new OA config\n"); return err; } @@ -4254,12 +4306,14 @@ int i915_perf_remove_config_ioctl(struct drm_device *dev, void *data, int ret; if (!perf->i915) { - DRM_DEBUG("i915 perf interface not available for this system\n"); + drm_dbg(&perf->i915->drm, + "i915 perf interface not available for this system\n"); return -ENOTSUPP; } if (i915_perf_stream_paranoid && !perfmon_capable()) { - DRM_DEBUG("Insufficient privileges to remove i915 OA config\n"); + drm_dbg(&perf->i915->drm, + "Insufficient privileges to remove i915 OA config\n"); return -EACCES; } @@ -4269,7 +4323,8 @@ int i915_perf_remove_config_ioctl(struct drm_device *dev, void *data, oa_config = idr_find(&perf->metrics_idr, *arg); if (!oa_config) { - DRM_DEBUG("Failed to remove unknown OA config\n"); + drm_dbg(&perf->i915->drm, + "Failed to remove unknown OA config\n"); ret = -ENOENT; goto err_unlock; } @@ -4282,7 +4337,8 @@ int i915_perf_remove_config_ioctl(struct drm_device *dev, void *data, mutex_unlock(&perf->metrics_lock); - DRM_DEBUG("Removed config %s id=%i\n", oa_config->uuid, oa_config->id); + drm_dbg(&perf->i915->drm, + "Removed config %s id=%i\n", oa_config->uuid, oa_config->id); i915_oa_config_put(oa_config); diff --git a/drivers/gpu/drm/i915/i915_query.c b/drivers/gpu/drm/i915/i915_query.c index 0094f67c63f2..6ec9c9fb7b0d 100644 --- a/drivers/gpu/drm/i915/i915_query.c +++ b/drivers/gpu/drm/i915/i915_query.c @@ -498,7 +498,21 @@ static int query_memregion_info(struct drm_i915_private *i915, info.region.memory_class = mr->type; info.region.memory_instance = mr->instance; info.probed_size = mr->total; - info.unallocated_size = mr->avail; + + if (mr->type == INTEL_MEMORY_LOCAL) + info.probed_cpu_visible_size = mr->io_size; + else + info.probed_cpu_visible_size = mr->total; + + if (perfmon_capable()) { + intel_memory_region_avail(mr, + &info.unallocated_size, + &info.unallocated_cpu_visible_size); + } else { + info.unallocated_size = info.probed_size; + info.unallocated_cpu_visible_size = + info.probed_cpu_visible_size; + } if (__copy_to_user(info_ptr, &info, sizeof(info))) return -EFAULT; diff --git a/drivers/gpu/drm/i915/i915_scatterlist.c b/drivers/gpu/drm/i915/i915_scatterlist.c index 159571b9bd24..dcc081874ec8 100644 --- a/drivers/gpu/drm/i915/i915_scatterlist.c +++ b/drivers/gpu/drm/i915/i915_scatterlist.c @@ -68,6 +68,7 @@ void i915_refct_sgt_init(struct i915_refct_sgt *rsgt, size_t size) * drm_mm_node * @node: The drm_mm_node. * @region_start: An offset to add to the dma addresses of the sg list. + * @page_alignment: Required page alignment for each sg entry. Power of two. * * Create a struct sg_table, initializing it from a struct drm_mm_node, * taking a maximum segment length into account, splitting into segments @@ -77,22 +78,25 @@ void i915_refct_sgt_init(struct i915_refct_sgt *rsgt, size_t size) * error code cast to an error pointer on failure. */ struct i915_refct_sgt *i915_rsgt_from_mm_node(const struct drm_mm_node *node, - u64 region_start) + u64 region_start, + u32 page_alignment) { - const u64 max_segment = SZ_1G; /* Do we have a limit on this? */ - u64 segment_pages = max_segment >> PAGE_SHIFT; + const u32 max_segment = round_down(UINT_MAX, page_alignment); + const u32 segment_pages = max_segment >> PAGE_SHIFT; u64 block_size, offset, prev_end; struct i915_refct_sgt *rsgt; struct sg_table *st; struct scatterlist *sg; + GEM_BUG_ON(!max_segment); + rsgt = kmalloc(sizeof(*rsgt), GFP_KERNEL); if (!rsgt) return ERR_PTR(-ENOMEM); i915_refct_sgt_init(rsgt, node->size << PAGE_SHIFT); st = &rsgt->table; - if (sg_alloc_table(st, DIV_ROUND_UP(node->size, segment_pages), + if (sg_alloc_table(st, DIV_ROUND_UP_ULL(node->size, segment_pages), GFP_KERNEL)) { i915_refct_sgt_put(rsgt); return ERR_PTR(-ENOMEM); @@ -112,12 +116,14 @@ struct i915_refct_sgt *i915_rsgt_from_mm_node(const struct drm_mm_node *node, sg = __sg_next(sg); sg_dma_address(sg) = region_start + offset; + GEM_BUG_ON(!IS_ALIGNED(sg_dma_address(sg), + page_alignment)); sg_dma_len(sg) = 0; sg->length = 0; st->nents++; } - len = min(block_size, max_segment - sg->length); + len = min_t(u64, block_size, max_segment - sg->length); sg->length += len; sg_dma_len(sg) += len; @@ -138,6 +144,7 @@ struct i915_refct_sgt *i915_rsgt_from_mm_node(const struct drm_mm_node *node, * i915_buddy_block list * @res: The struct i915_ttm_buddy_resource. * @region_start: An offset to add to the dma addresses of the sg list. + * @page_alignment: Required page alignment for each sg entry. Power of two. * * Create a struct sg_table, initializing it from struct i915_buddy_block list, * taking a maximum segment length into account, splitting into segments @@ -147,11 +154,12 @@ struct i915_refct_sgt *i915_rsgt_from_mm_node(const struct drm_mm_node *node, * error code cast to an error pointer on failure. */ struct i915_refct_sgt *i915_rsgt_from_buddy_resource(struct ttm_resource *res, - u64 region_start) + u64 region_start, + u32 page_alignment) { struct i915_ttm_buddy_resource *bman_res = to_ttm_buddy_resource(res); const u64 size = res->num_pages << PAGE_SHIFT; - const u64 max_segment = rounddown(UINT_MAX, PAGE_SIZE); + const u32 max_segment = round_down(UINT_MAX, page_alignment); struct drm_buddy *mm = bman_res->mm; struct list_head *blocks = &bman_res->blocks; struct drm_buddy_block *block; @@ -161,6 +169,7 @@ struct i915_refct_sgt *i915_rsgt_from_buddy_resource(struct ttm_resource *res, resource_size_t prev_end; GEM_BUG_ON(list_empty(blocks)); + GEM_BUG_ON(!max_segment); rsgt = kmalloc(sizeof(*rsgt), GFP_KERNEL); if (!rsgt) @@ -191,12 +200,14 @@ struct i915_refct_sgt *i915_rsgt_from_buddy_resource(struct ttm_resource *res, sg = __sg_next(sg); sg_dma_address(sg) = region_start + offset; + GEM_BUG_ON(!IS_ALIGNED(sg_dma_address(sg), + page_alignment)); sg_dma_len(sg) = 0; sg->length = 0; st->nents++; } - len = min(block_size, max_segment - sg->length); + len = min_t(u64, block_size, max_segment - sg->length); sg->length += len; sg_dma_len(sg) += len; diff --git a/drivers/gpu/drm/i915/i915_scatterlist.h b/drivers/gpu/drm/i915/i915_scatterlist.h index 12c6a1684081..9ddb3e743a3e 100644 --- a/drivers/gpu/drm/i915/i915_scatterlist.h +++ b/drivers/gpu/drm/i915/i915_scatterlist.h @@ -213,9 +213,11 @@ static inline void __i915_refct_sgt_init(struct i915_refct_sgt *rsgt, void i915_refct_sgt_init(struct i915_refct_sgt *rsgt, size_t size); struct i915_refct_sgt *i915_rsgt_from_mm_node(const struct drm_mm_node *node, - u64 region_start); + u64 region_start, + u32 page_alignment); struct i915_refct_sgt *i915_rsgt_from_buddy_resource(struct ttm_resource *res, - u64 region_start); + u64 region_start, + u32 page_alignment); #endif diff --git a/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c b/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c index a5109548abc0..427de1aaab36 100644 --- a/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c +++ b/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c @@ -104,18 +104,15 @@ static int i915_ttm_buddy_man_alloc(struct ttm_resource_manager *man, min_page_size, &bman_res->blocks, bman_res->flags); - mutex_unlock(&bman->lock); if (unlikely(err)) goto err_free_blocks; if (place->flags & TTM_PL_FLAG_CONTIGUOUS) { u64 original_size = (u64)bman_res->base.num_pages << PAGE_SHIFT; - mutex_lock(&bman->lock); drm_buddy_block_trim(mm, original_size, &bman_res->blocks); - mutex_unlock(&bman->lock); } if (lpfn <= bman->visible_size) { @@ -137,11 +134,10 @@ static int i915_ttm_buddy_man_alloc(struct ttm_resource_manager *man, } } - if (bman_res->used_visible_size) { - mutex_lock(&bman->lock); + if (bman_res->used_visible_size) bman->visible_avail -= bman_res->used_visible_size; - mutex_unlock(&bman->lock); - } + + mutex_unlock(&bman->lock); if (place->lpfn - place->fpfn == n_pages) bman_res->base.start = place->fpfn; @@ -154,7 +150,6 @@ static int i915_ttm_buddy_man_alloc(struct ttm_resource_manager *man, return 0; err_free_blocks: - mutex_lock(&bman->lock); drm_buddy_free_list(mm, &bman_res->blocks); mutex_unlock(&bman->lock); err_free_res: @@ -365,6 +360,26 @@ u64 i915_ttm_buddy_man_visible_size(struct ttm_resource_manager *man) return bman->visible_size; } +/** + * i915_ttm_buddy_man_avail - Query the avail tracking for the manager. + * + * @man: The buddy allocator ttm manager + * @avail: The total available memory in pages for the entire manager. + * @visible_avail: The total available memory in pages for the CPU visible + * portion. Note that this will always give the same value as @avail on + * configurations that don't have a small BAR. + */ +void i915_ttm_buddy_man_avail(struct ttm_resource_manager *man, + u64 *avail, u64 *visible_avail) +{ + struct i915_ttm_buddy_manager *bman = to_buddy_manager(man); + + mutex_lock(&bman->lock); + *avail = bman->mm.avail >> PAGE_SHIFT; + *visible_avail = bman->visible_avail; + mutex_unlock(&bman->lock); +} + #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) void i915_ttm_buddy_man_force_visible_size(struct ttm_resource_manager *man, u64 size) diff --git a/drivers/gpu/drm/i915/i915_ttm_buddy_manager.h b/drivers/gpu/drm/i915/i915_ttm_buddy_manager.h index 52d9586d242c..d64620712830 100644 --- a/drivers/gpu/drm/i915/i915_ttm_buddy_manager.h +++ b/drivers/gpu/drm/i915/i915_ttm_buddy_manager.h @@ -61,6 +61,9 @@ int i915_ttm_buddy_man_reserve(struct ttm_resource_manager *man, u64 i915_ttm_buddy_man_visible_size(struct ttm_resource_manager *man); +void i915_ttm_buddy_man_avail(struct ttm_resource_manager *man, + u64 *avail, u64 *avail_visible); + #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) void i915_ttm_buddy_man_force_visible_size(struct ttm_resource_manager *man, u64 size); diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c index 5d5828b9a242..ef3b04c7e153 100644 --- a/drivers/gpu/drm/i915/i915_vma.c +++ b/drivers/gpu/drm/i915/i915_vma.c @@ -310,7 +310,7 @@ struct i915_vma_work { struct i915_address_space *vm; struct i915_vm_pt_stash stash; struct i915_vma_resource *vma_res; - struct drm_i915_gem_object *pinned; + struct drm_i915_gem_object *obj; struct i915_sw_dma_fence_cb cb; enum i915_cache_level cache_level; unsigned int flags; @@ -321,17 +321,25 @@ static void __vma_bind(struct dma_fence_work *work) struct i915_vma_work *vw = container_of(work, typeof(*vw), base); struct i915_vma_resource *vma_res = vw->vma_res; + /* + * We are about the bind the object, which must mean we have already + * signaled the work to potentially clear/move the pages underneath. If + * something went wrong at that stage then the object should have + * unknown_state set, in which case we need to skip the bind. + */ + if (i915_gem_object_has_unknown_state(vw->obj)) + return; + vma_res->ops->bind_vma(vma_res->vm, &vw->stash, vma_res, vw->cache_level, vw->flags); - } static void __vma_release(struct dma_fence_work *work) { struct i915_vma_work *vw = container_of(work, typeof(*vw), base); - if (vw->pinned) - i915_gem_object_put(vw->pinned); + if (vw->obj) + i915_gem_object_put(vw->obj); i915_vm_free_pt_stash(vw->vm, &vw->stash); if (vw->vma_res) @@ -517,14 +525,7 @@ int i915_vma_bind(struct i915_vma *vma, } work->base.dma.error = 0; /* enable the queue_work() */ - - /* - * If we don't have the refcounted pages list, keep a reference - * on the object to avoid waiting for the async bind to - * complete in the object destruction path. - */ - if (!work->vma_res->bi.pages_rsgt) - work->pinned = i915_gem_object_get(vma->obj); + work->obj = i915_gem_object_get(vma->obj); } else { ret = i915_gem_object_wait_moving_fence(vma->obj, true); if (ret) { @@ -1645,10 +1646,10 @@ static void force_unbind(struct i915_vma *vma) GEM_BUG_ON(drm_mm_node_allocated(&vma->node)); } -static void release_references(struct i915_vma *vma, bool vm_ddestroy) +static void release_references(struct i915_vma *vma, struct intel_gt *gt, + bool vm_ddestroy) { struct drm_i915_gem_object *obj = vma->obj; - struct intel_gt *gt = vma->vm->gt; GEM_BUG_ON(i915_vma_is_active(vma)); @@ -1703,11 +1704,12 @@ void i915_vma_destroy_locked(struct i915_vma *vma) force_unbind(vma); list_del_init(&vma->vm_link); - release_references(vma, false); + release_references(vma, vma->vm->gt, false); } void i915_vma_destroy(struct i915_vma *vma) { + struct intel_gt *gt; bool vm_ddestroy; mutex_lock(&vma->vm->mutex); @@ -1715,8 +1717,11 @@ void i915_vma_destroy(struct i915_vma *vma) list_del_init(&vma->vm_link); vm_ddestroy = vma->vm_ddestroy; vma->vm_ddestroy = false; + + /* vma->vm may be freed when releasing vma->vm->mutex. */ + gt = vma->vm->gt; mutex_unlock(&vma->vm->mutex); - release_references(vma, vm_ddestroy); + release_references(vma, gt, vm_ddestroy); } void i915_vma_parked(struct intel_gt *gt) diff --git a/drivers/gpu/drm/i915/intel_device_info.c b/drivers/gpu/drm/i915/intel_device_info.c index 7eb893666595..d98fbbd589aa 100644 --- a/drivers/gpu/drm/i915/intel_device_info.c +++ b/drivers/gpu/drm/i915/intel_device_info.c @@ -73,6 +73,7 @@ static const char * const platform_names[] = { PLATFORM_NAME(XEHPSDV), PLATFORM_NAME(DG2), PLATFORM_NAME(PONTEVECCHIO), + PLATFORM_NAME(METEORLAKE), }; #undef PLATFORM_NAME @@ -189,16 +190,26 @@ static const u16 subplatform_rpl_ids[] = { static const u16 subplatform_g10_ids[] = { INTEL_DG2_G10_IDS(0), + INTEL_ATS_M150_IDS(0), }; static const u16 subplatform_g11_ids[] = { INTEL_DG2_G11_IDS(0), + INTEL_ATS_M75_IDS(0), }; static const u16 subplatform_g12_ids[] = { INTEL_DG2_G12_IDS(0), }; +static const u16 subplatform_m_ids[] = { + INTEL_MTL_M_IDS(0), +}; + +static const u16 subplatform_p_ids[] = { + INTEL_MTL_P_IDS(0), +}; + static bool find_devid(u16 id, const u16 *p, unsigned int num) { for (; num; num--, p++) { @@ -253,6 +264,12 @@ void intel_device_info_subplatform_init(struct drm_i915_private *i915) } else if (find_devid(devid, subplatform_g12_ids, ARRAY_SIZE(subplatform_g12_ids))) { mask = BIT(INTEL_SUBPLATFORM_G12); + } else if (find_devid(devid, subplatform_m_ids, + ARRAY_SIZE(subplatform_m_ids))) { + mask = BIT(INTEL_SUBPLATFORM_M); + } else if (find_devid(devid, subplatform_p_ids, + ARRAY_SIZE(subplatform_p_ids))) { + mask = BIT(INTEL_SUBPLATFORM_P); } GEM_BUG_ON(mask & ~INTEL_SUBPLATFORM_MASK); diff --git a/drivers/gpu/drm/i915/intel_device_info.h b/drivers/gpu/drm/i915/intel_device_info.h index 1c150cd7dceb..23bf230aa104 100644 --- a/drivers/gpu/drm/i915/intel_device_info.h +++ b/drivers/gpu/drm/i915/intel_device_info.h @@ -89,6 +89,7 @@ enum intel_platform { INTEL_XEHPSDV, INTEL_DG2, INTEL_PONTEVECCHIO, + INTEL_METEORLAKE, INTEL_MAX_PLATFORMS }; @@ -126,6 +127,10 @@ enum intel_platform { */ #define INTEL_SUBPLATFORM_N 1 +/* MTL */ +#define INTEL_SUBPLATFORM_M 0 +#define INTEL_SUBPLATFORM_P 1 + enum intel_ppgtt_type { INTEL_PPGTT_NONE = I915_GEM_PPGTT_NONE, INTEL_PPGTT_ALIASING = I915_GEM_PPGTT_ALIASING, diff --git a/drivers/gpu/drm/i915/intel_memory_region.c b/drivers/gpu/drm/i915/intel_memory_region.c index e38d2db1c3e3..9a4a7fb55582 100644 --- a/drivers/gpu/drm/i915/intel_memory_region.c +++ b/drivers/gpu/drm/i915/intel_memory_region.c @@ -198,8 +198,7 @@ void intel_memory_region_debug(struct intel_memory_region *mr, if (mr->region_private) ttm_resource_manager_debug(mr->region_private, printer); else - drm_printf(printer, "total:%pa, available:%pa bytes\n", - &mr->total, &mr->avail); + drm_printf(printer, "total:%pa bytes\n", &mr->total); } static int intel_memory_region_memtest(struct intel_memory_region *mem, @@ -242,7 +241,6 @@ intel_memory_region_create(struct drm_i915_private *i915, mem->min_page_size = min_page_size; mem->ops = ops; mem->total = size; - mem->avail = mem->total; mem->type = type; mem->instance = instance; @@ -279,6 +277,20 @@ void intel_memory_region_set_name(struct intel_memory_region *mem, va_end(ap); } +void intel_memory_region_avail(struct intel_memory_region *mr, + u64 *avail, u64 *visible_avail) +{ + if (mr->type == INTEL_MEMORY_LOCAL) { + i915_ttm_buddy_man_avail(mr->region_private, + avail, visible_avail); + *avail <<= PAGE_SHIFT; + *visible_avail <<= PAGE_SHIFT; + } else { + *avail = mr->total; + *visible_avail = mr->total; + } +} + void intel_memory_region_destroy(struct intel_memory_region *mem) { int ret = 0; diff --git a/drivers/gpu/drm/i915/intel_memory_region.h b/drivers/gpu/drm/i915/intel_memory_region.h index 3d8378c1b447..2953ed5c3248 100644 --- a/drivers/gpu/drm/i915/intel_memory_region.h +++ b/drivers/gpu/drm/i915/intel_memory_region.h @@ -75,7 +75,6 @@ struct intel_memory_region { resource_size_t io_size; resource_size_t min_page_size; resource_size_t total; - resource_size_t avail; u16 type; u16 instance; @@ -127,6 +126,9 @@ int intel_memory_region_reserve(struct intel_memory_region *mem, void intel_memory_region_debug(struct intel_memory_region *mr, struct drm_printer *printer); +void intel_memory_region_avail(struct intel_memory_region *mr, + u64 *avail, u64 *visible_avail); + struct intel_memory_region * i915_gem_ttm_system_setup(struct drm_i915_private *i915, u16 type, u16 instance); diff --git a/drivers/gpu/drm/i915/intel_region_ttm.c b/drivers/gpu/drm/i915/intel_region_ttm.c index 62ff77445b01..575d67bc6ffe 100644 --- a/drivers/gpu/drm/i915/intel_region_ttm.c +++ b/drivers/gpu/drm/i915/intel_region_ttm.c @@ -152,6 +152,7 @@ int intel_region_ttm_fini(struct intel_memory_region *mem) * Convert an opaque TTM resource manager resource to a refcounted sg_table. * @mem: The memory region. * @res: The resource manager resource obtained from the TTM resource manager. + * @page_alignment: Required page alignment for each sg entry. Power of two. * * The gem backends typically use sg-tables for operations on the underlying * io_memory. So provide a way for the backends to translate the @@ -161,16 +162,19 @@ int intel_region_ttm_fini(struct intel_memory_region *mem) */ struct i915_refct_sgt * intel_region_ttm_resource_to_rsgt(struct intel_memory_region *mem, - struct ttm_resource *res) + struct ttm_resource *res, + u32 page_alignment) { if (mem->is_range_manager) { struct ttm_range_mgr_node *range_node = to_ttm_range_mgr_node(res); return i915_rsgt_from_mm_node(&range_node->mm_nodes[0], - mem->region.start); + mem->region.start, + page_alignment); } else { - return i915_rsgt_from_buddy_resource(res, mem->region.start); + return i915_rsgt_from_buddy_resource(res, mem->region.start, + page_alignment); } } diff --git a/drivers/gpu/drm/i915/intel_region_ttm.h b/drivers/gpu/drm/i915/intel_region_ttm.h index cf9d86dcf409..5bb8d8b582ae 100644 --- a/drivers/gpu/drm/i915/intel_region_ttm.h +++ b/drivers/gpu/drm/i915/intel_region_ttm.h @@ -24,7 +24,8 @@ int intel_region_ttm_fini(struct intel_memory_region *mem); struct i915_refct_sgt * intel_region_ttm_resource_to_rsgt(struct intel_memory_region *mem, - struct ttm_resource *res); + struct ttm_resource *res, + u32 page_alignment); void intel_region_ttm_resource_free(struct intel_memory_region *mem, struct ttm_resource *res); diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c index 8633bec18fa7..ab9f17fc85bc 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c @@ -742,7 +742,7 @@ static int pot_hole(struct i915_address_space *vm, u64 addr; for (addr = round_up(hole_start + min_alignment, step) - min_alignment; - addr <= round_down(hole_end - (2 * min_alignment), step) - min_alignment; + hole_end > addr && hole_end - addr >= 2 * min_alignment; addr += step) { err = i915_vma_pin(vma, 0, 0, addr | flags); if (err) { diff --git a/drivers/gpu/drm/i915/selftests/intel_memory_region.c b/drivers/gpu/drm/i915/selftests/intel_memory_region.c index 73eb53edb8de..3b18e5905c86 100644 --- a/drivers/gpu/drm/i915/selftests/intel_memory_region.c +++ b/drivers/gpu/drm/i915/selftests/intel_memory_region.c @@ -451,7 +451,6 @@ out_put: static int igt_mock_max_segment(void *arg) { - const unsigned int max_segment = rounddown(UINT_MAX, PAGE_SIZE); struct intel_memory_region *mem = arg; struct drm_i915_private *i915 = mem->i915; struct i915_ttm_buddy_resource *res; @@ -460,7 +459,10 @@ static int igt_mock_max_segment(void *arg) struct drm_buddy *mm; struct list_head *blocks; struct scatterlist *sg; + I915_RND_STATE(prng); LIST_HEAD(objects); + unsigned int max_segment; + unsigned int ps; u64 size; int err = 0; @@ -472,7 +474,13 @@ static int igt_mock_max_segment(void *arg) */ size = SZ_8G; - mem = mock_region_create(i915, 0, size, PAGE_SIZE, 0, 0); + ps = PAGE_SIZE; + if (i915_prandom_u64_state(&prng) & 1) + ps = SZ_64K; /* For something like DG2 */ + + max_segment = round_down(UINT_MAX, ps); + + mem = mock_region_create(i915, 0, size, ps, 0, 0); if (IS_ERR(mem)) return PTR_ERR(mem); @@ -498,12 +506,21 @@ static int igt_mock_max_segment(void *arg) } for (sg = obj->mm.pages->sgl; sg; sg = sg_next(sg)) { + dma_addr_t daddr = sg_dma_address(sg); + if (sg->length > max_segment) { pr_err("%s: Created an oversized scatterlist entry, %u > %u\n", __func__, sg->length, max_segment); err = -EINVAL; goto out_close; } + + if (!IS_ALIGNED(daddr, ps)) { + pr_err("%s: Created an unaligned scatterlist entry, addr=%pa, ps=%u\n", + __func__, &daddr, ps); + err = -EINVAL; + goto out_close; + } } out_close: diff --git a/drivers/gpu/drm/i915/selftests/mock_region.c b/drivers/gpu/drm/i915/selftests/mock_region.c index 670557ce1024..bac21fe84ca5 100644 --- a/drivers/gpu/drm/i915/selftests/mock_region.c +++ b/drivers/gpu/drm/i915/selftests/mock_region.c @@ -33,7 +33,8 @@ static int mock_region_get_pages(struct drm_i915_gem_object *obj) return PTR_ERR(obj->mm.res); obj->mm.rsgt = intel_region_ttm_resource_to_rsgt(obj->mm.region, - obj->mm.res); + obj->mm.res, + obj->mm.region->min_page_size); if (IS_ERR(obj->mm.rsgt)) { err = PTR_ERR(obj->mm.rsgt); goto err_free_resource; |