diff options
author | Jani Nikula | 2022-08-29 14:44:38 +0300 |
---|---|---|
committer | Jani Nikula | 2022-08-29 15:14:59 +0300 |
commit | 917bda9ab155032a02be1a57ebd4d949ae9e1528 (patch) | |
tree | 7758d62783f5fb81777c37d0ad4e26eea6312c0f /drivers/gpu/drm/i915 | |
parent | 95086cb969b2cb8abe4984457f219ec70d24052e (diff) | |
parent | 2c2d7a67defa198a8b8148dbaddc9e5554efebc8 (diff) |
Merge drm/drm-next into drm-intel-next
Sync drm-intel-next with v6.0-rc as well as recent drm-intel-gt-next.
Since drm-next does not have commit f0c70d41e4e8 ("drm/i915/guc: remove
runtime info printing from time stamp logging") yet, only
drm-intel-gt-next, will need to do that as part of the merge here to
build.
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
Diffstat (limited to 'drivers/gpu/drm/i915')
56 files changed, 1264 insertions, 442 deletions
diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index 651b2309593a..458f010e46f3 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -27,7 +27,6 @@ #include <acpi/video.h> #include <linux/i2c.h> #include <linux/input.h> -#include <linux/intel-iommu.h> #include <linux/kernel.h> #include <linux/module.h> #include <linux/dma-resv.h> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c index b7b2c14fd9e1..cd75b0ca2555 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c @@ -6,7 +6,6 @@ #include <linux/dma-resv.h> #include <linux/highmem.h> -#include <linux/intel-iommu.h> #include <linux/sync_file.h> #include <linux/uaccess.h> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c b/drivers/gpu/drm/i915/gem/i915_gem_object.c index ccec4055fde3..389e9f157ca5 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c @@ -268,7 +268,7 @@ static void __i915_gem_object_free_mmaps(struct drm_i915_gem_object *obj) */ void __i915_gem_object_pages_fini(struct drm_i915_gem_object *obj) { - assert_object_held(obj); + assert_object_held_shared(obj); if (!list_empty(&obj->vma.list)) { struct i915_vma *vma; @@ -331,15 +331,7 @@ static void __i915_gem_free_objects(struct drm_i915_private *i915, continue; } - if (!i915_gem_object_trylock(obj, NULL)) { - /* busy, toss it back to the pile */ - if (llist_add(&obj->freed, &i915->mm.free_list)) - queue_delayed_work(i915->wq, &i915->mm.free_work, msecs_to_jiffies(10)); - continue; - } - __i915_gem_object_pages_fini(obj); - i915_gem_object_unlock(obj); __i915_gem_free_object(obj); /* But keep the pointer alive for RCU-protected lookups */ @@ -359,7 +351,7 @@ void i915_gem_flush_free_objects(struct drm_i915_private *i915) static void __i915_gem_free_work(struct work_struct *work) { struct drm_i915_private *i915 = - container_of(work, struct drm_i915_private, mm.free_work.work); + container_of(work, struct drm_i915_private, mm.free_work); i915_gem_flush_free_objects(i915); } @@ -391,7 +383,7 @@ static void i915_gem_free_object(struct drm_gem_object *gem_obj) */ if (llist_add(&obj->freed, &i915->mm.free_list)) - queue_delayed_work(i915->wq, &i915->mm.free_work, 0); + queue_work(i915->wq, &i915->mm.free_work); } void __i915_gem_object_flush_frontbuffer(struct drm_i915_gem_object *obj, @@ -745,7 +737,7 @@ bool i915_gem_object_needs_ccs_pages(struct drm_i915_gem_object *obj) void i915_gem_init__objects(struct drm_i915_private *i915) { - INIT_DELAYED_WORK(&i915->mm.free_work, __i915_gem_free_work); + INIT_WORK(&i915->mm.free_work, __i915_gem_free_work); } void i915_objects_module_exit(void) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h index 5cf36a130061..9f6b14ec189a 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h @@ -335,7 +335,6 @@ struct drm_i915_gem_object { #define I915_BO_READONLY BIT(7) #define I915_TILING_QUIRK_BIT 8 /* unknown swizzling; do not release! */ #define I915_BO_PROTECTED BIT(9) -#define I915_BO_WAS_BOUND_BIT 10 /** * @mem_flags - Mutable placement-related flags * @@ -616,6 +615,8 @@ struct drm_i915_gem_object { * pages were last acquired. */ bool dirty:1; + + u32 tlb; } mm; struct { diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pages.c b/drivers/gpu/drm/i915/gem/i915_gem_pages.c index 8df8ae0e1dea..4df50b049cea 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_pages.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_pages.c @@ -6,14 +6,15 @@ #include <drm/drm_cache.h> +#include "gt/intel_gt.h" +#include "gt/intel_gt_pm.h" + #include "i915_drv.h" #include "i915_gem_object.h" #include "i915_scatterlist.h" #include "i915_gem_lmem.h" #include "i915_gem_mman.h" -#include "gt/intel_gt.h" - void __i915_gem_object_set_pages(struct drm_i915_gem_object *obj, struct sg_table *pages, unsigned int sg_page_sizes) @@ -190,6 +191,18 @@ static void unmap_object(struct drm_i915_gem_object *obj, void *ptr) vunmap(ptr); } +static void flush_tlb_invalidate(struct drm_i915_gem_object *obj) +{ + struct drm_i915_private *i915 = to_i915(obj->base.dev); + struct intel_gt *gt = to_gt(i915); + + if (!obj->mm.tlb) + return; + + intel_gt_invalidate_tlb(gt, obj->mm.tlb); + obj->mm.tlb = 0; +} + struct sg_table * __i915_gem_object_unset_pages(struct drm_i915_gem_object *obj) { @@ -215,13 +228,7 @@ __i915_gem_object_unset_pages(struct drm_i915_gem_object *obj) __i915_gem_object_reset_page_iter(obj); obj->mm.page_sizes.phys = obj->mm.page_sizes.sg = 0; - if (test_and_clear_bit(I915_BO_WAS_BOUND_BIT, &obj->flags)) { - struct drm_i915_private *i915 = to_i915(obj->base.dev); - intel_wakeref_t wakeref; - - with_intel_runtime_pm_if_active(&i915->runtime_pm, wakeref) - intel_gt_invalidate_tlbs(to_gt(i915)); - } + flush_tlb_invalidate(obj); return pages; } diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c index 4eed3dd90ba8..f42ca1179f37 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c @@ -75,7 +75,7 @@ int shmem_sg_alloc_table(struct drm_i915_private *i915, struct sg_table *st, if (size > resource_size(&mr->region)) return -ENOMEM; - if (sg_alloc_table(st, page_count, GFP_KERNEL)) + if (sg_alloc_table(st, page_count, GFP_KERNEL | __GFP_NOWARN)) return -ENOMEM; /* @@ -137,7 +137,7 @@ int shmem_sg_alloc_table(struct drm_i915_private *i915, struct sg_table *st, * trigger the out-of-memory killer and for * this we want __GFP_RETRY_MAYFAIL. */ - gfp |= __GFP_RETRY_MAYFAIL; + gfp |= __GFP_RETRY_MAYFAIL | __GFP_NOWARN; } } while (1); @@ -209,7 +209,7 @@ static int shmem_get_pages(struct drm_i915_gem_object *obj) GEM_BUG_ON(obj->write_domain & I915_GEM_GPU_DOMAINS); rebuild_st: - st = kmalloc(sizeof(*st), GFP_KERNEL); + st = kmalloc(sizeof(*st), GFP_KERNEL | __GFP_NOWARN); if (!st) return -ENOMEM; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c index 1030053571a2..8dc5c8874d8a 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c @@ -426,7 +426,8 @@ void i915_gem_driver_register__shrinker(struct drm_i915_private *i915) i915->mm.shrinker.count_objects = i915_gem_shrinker_count; i915->mm.shrinker.seeks = DEFAULT_SEEKS; i915->mm.shrinker.batch = 4096; - drm_WARN_ON(&i915->drm, register_shrinker(&i915->mm.shrinker)); + drm_WARN_ON(&i915->drm, register_shrinker(&i915->mm.shrinker, + "drm-i915_gem")); i915->mm.oom_notifier.notifier_call = i915_gem_shrinker_oom; drm_WARN_ON(&i915->drm, register_oom_notifier(&i915->mm.oom_notifier)); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_userptr.c b/drivers/gpu/drm/i915/gem/i915_gem_userptr.c index 094f06b4ce33..8423df021b71 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_userptr.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_userptr.c @@ -216,8 +216,8 @@ i915_gem_userptr_put_pages(struct drm_i915_gem_object *obj, * However...! * * The mmu-notifier can be invalidated for a - * migrate_page, that is alreadying holding the lock - * on the page. Such a try_to_unmap() will result + * migrate_folio, that is alreadying holding the lock + * on the folio. Such a try_to_unmap() will result * in us calling put_pages() and so recursively try * to lock the page. We avoid that deadlock with * a trylock_page() and in exchange we risk missing diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h index d2d75d9c0c8d..04eacae1aca5 100644 --- a/drivers/gpu/drm/i915/gt/intel_context_types.h +++ b/drivers/gpu/drm/i915/gt/intel_context_types.h @@ -275,10 +275,17 @@ struct intel_context { u8 child_index; /** @guc: GuC specific members for parallel submission */ struct { - /** @wqi_head: head pointer in work queue */ + /** @wqi_head: cached head pointer in work queue */ u16 wqi_head; - /** @wqi_tail: tail pointer in work queue */ + /** @wqi_tail: cached tail pointer in work queue */ u16 wqi_tail; + /** @wq_head: pointer to the actual head in work queue */ + u32 *wq_head; + /** @wq_tail: pointer to the actual head in work queue */ + u32 *wq_tail; + /** @wq_status: pointer to the status in work queue */ + u32 *wq_status; + /** * @parent_page: page in context state (ce->state) used * by parent for work queue, process descriptor diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c index 7b059fdf8cbc..e4bac2431e41 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt.c +++ b/drivers/gpu/drm/i915/gt/intel_gt.c @@ -11,7 +11,9 @@ #include "pxp/intel_pxp.h" #include "i915_drv.h" +#include "i915_perf_oa_regs.h" #include "intel_context.h" +#include "intel_engine_pm.h" #include "intel_engine_regs.h" #include "intel_ggtt_gmch.h" #include "intel_gt.h" @@ -37,8 +39,6 @@ static void __intel_gt_init_early(struct intel_gt *gt) { spin_lock_init(>->irq_lock); - mutex_init(>->tlb_invalidate_lock); - INIT_LIST_HEAD(>->closed_vma); spin_lock_init(>->closed_lock); @@ -49,6 +49,8 @@ static void __intel_gt_init_early(struct intel_gt *gt) intel_gt_init_reset(gt); intel_gt_init_requests(gt); intel_gt_init_timelines(gt); + mutex_init(>->tlb.invalidate_lock); + seqcount_mutex_init(>->tlb.seqno, >->tlb.invalidate_lock); intel_gt_pm_init_early(gt); intel_uc_init_early(>->uc); @@ -769,6 +771,7 @@ void intel_gt_driver_late_release_all(struct drm_i915_private *i915) intel_gt_fini_requests(gt); intel_gt_fini_reset(gt); intel_gt_fini_timelines(gt); + mutex_destroy(>->tlb.invalidate_lock); intel_engines_free(gt); } } @@ -907,7 +910,7 @@ get_reg_and_bit(const struct intel_engine_cs *engine, const bool gen8, return rb; } -void intel_gt_invalidate_tlbs(struct intel_gt *gt) +static void mmio_invalidate_full(struct intel_gt *gt) { static const i915_reg_t gen8_regs[] = { [RENDER_CLASS] = GEN8_RTCR, @@ -925,13 +928,11 @@ void intel_gt_invalidate_tlbs(struct intel_gt *gt) struct drm_i915_private *i915 = gt->i915; struct intel_uncore *uncore = gt->uncore; struct intel_engine_cs *engine; + intel_engine_mask_t awake, tmp; enum intel_engine_id id; const i915_reg_t *regs; unsigned int num = 0; - if (I915_SELFTEST_ONLY(gt->awake == -ENODEV)) - return; - if (GRAPHICS_VER(i915) == 12) { regs = gen12_regs; num = ARRAY_SIZE(gen12_regs); @@ -946,28 +947,41 @@ void intel_gt_invalidate_tlbs(struct intel_gt *gt) "Platform does not implement TLB invalidation!")) return; - GEM_TRACE("\n"); - - assert_rpm_wakelock_held(&i915->runtime_pm); - - mutex_lock(>->tlb_invalidate_lock); intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL); spin_lock_irq(&uncore->lock); /* serialise invalidate with GT reset */ + awake = 0; for_each_engine(engine, gt, id) { struct reg_and_bit rb; + if (!intel_engine_pm_is_awake(engine)) + continue; + rb = get_reg_and_bit(engine, regs == gen8_regs, regs, num); if (!i915_mmio_reg_offset(rb.reg)) continue; intel_uncore_write_fw(uncore, rb.reg, rb.bit); + awake |= engine->mask; } + GT_TRACE(gt, "invalidated engines %08x\n", awake); + + /* Wa_2207587034:tgl,dg1,rkl,adl-s,adl-p */ + if (awake && + (IS_TIGERLAKE(i915) || + IS_DG1(i915) || + IS_ROCKETLAKE(i915) || + IS_ALDERLAKE_S(i915) || + IS_ALDERLAKE_P(i915))) + intel_uncore_write_fw(uncore, GEN12_OA_TLB_INV_CR, 1); + spin_unlock_irq(&uncore->lock); - for_each_engine(engine, gt, id) { + for_each_engine_masked(engine, gt, awake, tmp) { + struct reg_and_bit rb; + /* * HW architecture suggest typical invalidation time at 40us, * with pessimistic cases up to 100us and a recommendation to @@ -975,12 +989,8 @@ void intel_gt_invalidate_tlbs(struct intel_gt *gt) */ const unsigned int timeout_us = 100; const unsigned int timeout_ms = 4; - struct reg_and_bit rb; rb = get_reg_and_bit(engine, regs == gen8_regs, regs, num); - if (!i915_mmio_reg_offset(rb.reg)) - continue; - if (__intel_wait_for_register_fw(uncore, rb.reg, rb.bit, 0, timeout_us, timeout_ms, @@ -997,5 +1007,38 @@ void intel_gt_invalidate_tlbs(struct intel_gt *gt) * transitions. */ intel_uncore_forcewake_put_delayed(uncore, FORCEWAKE_ALL); - mutex_unlock(>->tlb_invalidate_lock); +} + +static bool tlb_seqno_passed(const struct intel_gt *gt, u32 seqno) +{ + u32 cur = intel_gt_tlb_seqno(gt); + + /* Only skip if a *full* TLB invalidate barrier has passed */ + return (s32)(cur - ALIGN(seqno, 2)) > 0; +} + +void intel_gt_invalidate_tlb(struct intel_gt *gt, u32 seqno) +{ + intel_wakeref_t wakeref; + + if (I915_SELFTEST_ONLY(gt->awake == -ENODEV)) + return; + + if (intel_gt_is_wedged(gt)) + return; + + if (tlb_seqno_passed(gt, seqno)) + return; + + with_intel_gt_pm_if_awake(gt, wakeref) { + mutex_lock(>->tlb.invalidate_lock); + if (tlb_seqno_passed(gt, seqno)) + goto unlock; + + mmio_invalidate_full(gt); + + write_seqcount_invalidate(>->tlb.seqno); +unlock: + mutex_unlock(>->tlb.invalidate_lock); + } } diff --git a/drivers/gpu/drm/i915/gt/intel_gt.h b/drivers/gpu/drm/i915/gt/intel_gt.h index 82d6f248d876..40b06adf509a 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt.h +++ b/drivers/gpu/drm/i915/gt/intel_gt.h @@ -101,6 +101,16 @@ void intel_gt_info_print(const struct intel_gt_info *info, void intel_gt_watchdog_work(struct work_struct *work); -void intel_gt_invalidate_tlbs(struct intel_gt *gt); +static inline u32 intel_gt_tlb_seqno(const struct intel_gt *gt) +{ + return seqprop_sequence(>->tlb.seqno); +} + +static inline u32 intel_gt_next_invalidate_tlb_full(const struct intel_gt *gt) +{ + return intel_gt_tlb_seqno(gt) | 1; +} + +void intel_gt_invalidate_tlb(struct intel_gt *gt, u32 seqno); #endif /* __INTEL_GT_H__ */ diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm.h b/drivers/gpu/drm/i915/gt/intel_gt_pm.h index bc898df7a48c..6c9a46452364 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_pm.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_pm.h @@ -55,6 +55,17 @@ static inline void intel_gt_pm_might_put(struct intel_gt *gt) for (tmp = 1, intel_gt_pm_get(gt); tmp; \ intel_gt_pm_put(gt), tmp = 0) +/** + * with_intel_gt_pm_if_awake - if GT is PM awake, get a reference to prevent + * it to sleep, run some code and then asynchrously put the reference + * away. + * + * @gt: pointer to the gt + * @wf: pointer to a temporary wakeref. + */ +#define with_intel_gt_pm_if_awake(gt, wf) \ + for (wf = intel_gt_pm_get_if_awake(gt); wf; intel_gt_pm_put_async(gt), wf = 0) + static inline int intel_gt_pm_wait_for_idle(struct intel_gt *gt) { return intel_wakeref_wait_for_idle(>->wakeref); diff --git a/drivers/gpu/drm/i915/gt/intel_gt_regs.h b/drivers/gpu/drm/i915/gt/intel_gt_regs.h index 60d6eb5f245b..94f9ddcfb3a5 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_regs.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_regs.h @@ -259,6 +259,9 @@ #define GEN9_PREEMPT_GPGPU_COMMAND_LEVEL GEN9_PREEMPT_GPGPU_LEVEL(1, 0) #define GEN9_PREEMPT_GPGPU_LEVEL_MASK GEN9_PREEMPT_GPGPU_LEVEL(1, 1) +#define DRAW_WATERMARK _MMIO(0x26c0) +#define VERT_WM_VAL REG_GENMASK(9, 0) + #define GEN12_GLOBAL_MOCS(i) _MMIO(0x4000 + (i) * 4) /* Global MOCS regs */ #define RENDER_HWS_PGA_GEN7 _MMIO(0x4080) @@ -374,6 +377,9 @@ #define CHICKEN_RASTER_1 _MMIO(0x6204) #define DIS_SF_ROUND_NEAREST_EVEN REG_BIT(8) +#define CHICKEN_RASTER_2 _MMIO(0x6208) +#define TBIMR_FAST_CLIP REG_BIT(5) + #define VFLSKPD _MMIO(0x62a8) #define DIS_OVER_FETCH_CACHE REG_BIT(1) #define DIS_MULT_MISS_RD_SQUASH REG_BIT(0) @@ -1007,6 +1013,8 @@ #define GEN11_LSN_UNSLCVC_GAFS_HALF_CL2_MAXALLOC (1 << 9) #define GEN11_LSN_UNSLCVC_GAFS_HALF_SF_MAXALLOC (1 << 7) +#define GUCPMTIMESTAMP _MMIO(0xc3e8) + #define __GEN9_RCS0_MOCS0 0xc800 #define GEN9_GFX_MOCS(i) _MMIO(__GEN9_RCS0_MOCS0 + (i) * 4) #define __GEN9_VCS0_MOCS0 0xc900 @@ -1078,6 +1086,7 @@ #define GEN10_SAMPLER_MODE _MMIO(0xe18c) #define ENABLE_SMALLPL REG_BIT(15) +#define SC_DISABLE_POWER_OPTIMIZATION_EBB REG_BIT(9) #define GEN11_SAMPLER_ENABLE_HEADLESS_MSG REG_BIT(5) #define GEN9_HALF_SLICE_CHICKEN7 _MMIO(0xe194) @@ -1123,6 +1132,8 @@ #define RT_CTRL _MMIO(0xe530) #define DIS_NULL_QUERY REG_BIT(10) +#define STACKID_CTRL REG_GENMASK(6, 5) +#define STACKID_CTRL_512 REG_FIELD_PREP(STACKID_CTRL, 0x2) #define EU_PERF_CNTL1 _MMIO(0xe558) #define EU_PERF_CNTL5 _MMIO(0xe55c) diff --git a/drivers/gpu/drm/i915/gt/intel_gt_sysfs.c b/drivers/gpu/drm/i915/gt/intel_gt_sysfs.c index 9e4ebf53379b..d651ccd0ab20 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_sysfs.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_sysfs.c @@ -22,11 +22,6 @@ bool is_object_gt(struct kobject *kobj) return !strncmp(kobj->name, "gt", 2); } -static struct intel_gt *kobj_to_gt(struct kobject *kobj) -{ - return container_of(kobj, struct intel_gt, sysfs_gt); -} - struct intel_gt *intel_gt_sysfs_get_drvdata(struct device *dev, const char *name) { @@ -101,6 +96,10 @@ void intel_gt_sysfs_register(struct intel_gt *gt) gt->i915->sysfs_gt, "gt%d", gt->info.id)) goto exit_fail; + gt->sysfs_defaults = kobject_create_and_add(".defaults", >->sysfs_gt); + if (!gt->sysfs_defaults) + goto exit_fail; + intel_gt_sysfs_pm_init(gt, >->sysfs_gt); return; @@ -113,5 +112,6 @@ exit_fail: void intel_gt_sysfs_unregister(struct intel_gt *gt) { + kobject_put(gt->sysfs_defaults); kobject_put(>->sysfs_gt); } diff --git a/drivers/gpu/drm/i915/gt/intel_gt_sysfs.h b/drivers/gpu/drm/i915/gt/intel_gt_sysfs.h index a99aa7e8b01a..6232923a420d 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_sysfs.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_sysfs.h @@ -10,6 +10,7 @@ #include <linux/kobject.h> #include "i915_gem.h" /* GEM_BUG_ON() */ +#include "intel_gt_types.h" struct intel_gt; @@ -22,6 +23,11 @@ intel_gt_create_kobj(struct intel_gt *gt, struct kobject *dir, const char *name); +static inline struct intel_gt *kobj_to_gt(struct kobject *kobj) +{ + return container_of(kobj, struct intel_gt, sysfs_gt); +} + void intel_gt_sysfs_register(struct intel_gt *gt); void intel_gt_sysfs_unregister(struct intel_gt *gt); struct intel_gt *intel_gt_sysfs_get_drvdata(struct device *dev, diff --git a/drivers/gpu/drm/i915/gt/intel_gt_sysfs_pm.c b/drivers/gpu/drm/i915/gt/intel_gt_sysfs_pm.c index 73a8b46e0234..e066cc33d9f2 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_sysfs_pm.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_sysfs_pm.c @@ -727,6 +727,34 @@ static const struct attribute *media_perf_power_attrs[] = { NULL }; +static ssize_t +default_min_freq_mhz_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) +{ + struct intel_gt *gt = kobj_to_gt(kobj->parent); + + return sysfs_emit(buf, "%u\n", gt->defaults.min_freq); +} + +static struct kobj_attribute default_min_freq_mhz = +__ATTR(rps_min_freq_mhz, 0444, default_min_freq_mhz_show, NULL); + +static ssize_t +default_max_freq_mhz_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) +{ + struct intel_gt *gt = kobj_to_gt(kobj->parent); + + return sysfs_emit(buf, "%u\n", gt->defaults.max_freq); +} + +static struct kobj_attribute default_max_freq_mhz = +__ATTR(rps_max_freq_mhz, 0444, default_max_freq_mhz_show, NULL); + +static const struct attribute * const rps_defaults_attrs[] = { + &default_min_freq_mhz.attr, + &default_max_freq_mhz.attr, + NULL +}; + static int intel_sysfs_rps_init(struct intel_gt *gt, struct kobject *kobj, const struct attribute * const *attrs) { @@ -776,4 +804,10 @@ void intel_gt_sysfs_pm_init(struct intel_gt *gt, struct kobject *kobj) "failed to create gt%u media_perf_power_attrs sysfs (%pe)\n", gt->info.id, ERR_PTR(ret)); } + + ret = sysfs_create_files(gt->sysfs_defaults, rps_defaults_attrs); + if (ret) + drm_warn(>->i915->drm, + "failed to add gt%u rps defaults (%pe)\n", + gt->info.id, ERR_PTR(ret)); } diff --git a/drivers/gpu/drm/i915/gt/intel_gt_types.h b/drivers/gpu/drm/i915/gt/intel_gt_types.h index df708802889d..4d56f7d5a3be 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_types.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_types.h @@ -11,6 +11,7 @@ #include <linux/llist.h> #include <linux/mutex.h> #include <linux/notifier.h> +#include <linux/seqlock.h> #include <linux/spinlock.h> #include <linux/types.h> #include <linux/workqueue.h> @@ -75,6 +76,11 @@ enum intel_submission_method { INTEL_SUBMISSION_GUC, }; +struct gt_defaults { + u32 min_freq; + u32 max_freq; +}; + struct intel_gt { struct drm_i915_private *i915; struct intel_uncore *uncore; @@ -83,7 +89,22 @@ struct intel_gt { struct intel_uc uc; struct intel_gsc gsc; - struct mutex tlb_invalidate_lock; + struct { + /* Serialize global tlb invalidations */ + struct mutex invalidate_lock; + + /* + * Batch TLB invalidations + * + * After unbinding the PTE, we need to ensure the TLB + * are invalidated prior to releasing the physical pages. + * But we only need one such invalidation for all unbinds, + * so we track how many TLB invalidations have been + * performed since unbind the PTE and only emit an extra + * invalidate if no full barrier has been passed. + */ + seqcount_mutex_t seqno; + } tlb; struct i915_wa_list wa_list; @@ -235,6 +256,10 @@ struct intel_gt { /* gt/gtN sysfs */ struct kobject sysfs_gt; + + /* sysfs defaults per gt */ + struct gt_defaults defaults; + struct kobject *sysfs_defaults; }; enum intel_gt_scratch_field { diff --git a/drivers/gpu/drm/i915/gt/intel_migrate.c b/drivers/gpu/drm/i915/gt/intel_migrate.c index 2c35324b5f68..aaaf1906026c 100644 --- a/drivers/gpu/drm/i915/gt/intel_migrate.c +++ b/drivers/gpu/drm/i915/gt/intel_migrate.c @@ -511,44 +511,16 @@ static inline u32 *i915_flush_dw(u32 *cmd, u32 flags) return cmd; } -static u32 calc_ctrl_surf_instr_size(struct drm_i915_private *i915, int size) -{ - u32 num_cmds, num_blks, total_size; - - if (!GET_CCS_BYTES(i915, size)) - return 0; - - /* - * XY_CTRL_SURF_COPY_BLT transfers CCS in 256 byte - * blocks. one XY_CTRL_SURF_COPY_BLT command can - * transfer upto 1024 blocks. - */ - num_blks = DIV_ROUND_UP(GET_CCS_BYTES(i915, size), - NUM_CCS_BYTES_PER_BLOCK); - num_cmds = DIV_ROUND_UP(num_blks, NUM_CCS_BLKS_PER_XFER); - total_size = XY_CTRL_SURF_INSTR_SIZE * num_cmds; - - /* - * Adding a flush before and after XY_CTRL_SURF_COPY_BLT - */ - total_size += 2 * MI_FLUSH_DW_SIZE; - - return total_size; -} - static int emit_copy_ccs(struct i915_request *rq, u32 dst_offset, u8 dst_access, u32 src_offset, u8 src_access, int size) { struct drm_i915_private *i915 = rq->engine->i915; int mocs = rq->engine->gt->mocs.uc_index << 1; - u32 num_ccs_blks, ccs_ring_size; + u32 num_ccs_blks; u32 *cs; - ccs_ring_size = calc_ctrl_surf_instr_size(i915, size); - WARN_ON(!ccs_ring_size); - - cs = intel_ring_begin(rq, round_up(ccs_ring_size, 2)); + cs = intel_ring_begin(rq, 12); if (IS_ERR(cs)) return PTR_ERR(cs); @@ -583,8 +555,7 @@ static int emit_copy_ccs(struct i915_request *rq, FIELD_PREP(XY_CTRL_SURF_MOCS_MASK, mocs); cs = i915_flush_dw(cs, MI_FLUSH_DW_LLC | MI_FLUSH_DW_CCS); - if (ccs_ring_size & 1) - *cs++ = MI_NOOP; + *cs++ = MI_NOOP; intel_ring_advance(rq, cs); @@ -638,40 +609,38 @@ static int emit_copy(struct i915_request *rq, return 0; } -static int scatter_list_length(struct scatterlist *sg) +static u64 scatter_list_length(struct scatterlist *sg) { - int len = 0; + u64 len = 0; while (sg && sg_dma_len(sg)) { len += sg_dma_len(sg); sg = sg_next(sg); - }; + } return len; } -static void +static int calculate_chunk_sz(struct drm_i915_private *i915, bool src_is_lmem, - int *src_sz, u32 bytes_to_cpy, u32 ccs_bytes_to_cpy) + u64 bytes_to_cpy, u64 ccs_bytes_to_cpy) { - if (ccs_bytes_to_cpy) { - if (!src_is_lmem) - /* - * When CHUNK_SZ is passed all the pages upto CHUNK_SZ - * will be taken for the blt. in Flat-ccs supported - * platform Smem obj will have more pages than required - * for main meory hence limit it to the required size - * for main memory - */ - *src_sz = min_t(int, bytes_to_cpy, CHUNK_SZ); - } else { /* ccs handling is not required */ - *src_sz = CHUNK_SZ; - } + if (ccs_bytes_to_cpy && !src_is_lmem) + /* + * When CHUNK_SZ is passed all the pages upto CHUNK_SZ + * will be taken for the blt. in Flat-ccs supported + * platform Smem obj will have more pages than required + * for main meory hence limit it to the required size + * for main memory + */ + return min_t(u64, bytes_to_cpy, CHUNK_SZ); + else + return CHUNK_SZ; } -static void get_ccs_sg_sgt(struct sgt_dma *it, u32 bytes_to_cpy) +static void get_ccs_sg_sgt(struct sgt_dma *it, u64 bytes_to_cpy) { - u32 len; + u64 len; do { GEM_BUG_ON(!it->sg || !sg_dma_len(it->sg)); @@ -702,13 +671,13 @@ intel_context_migrate_copy(struct intel_context *ce, { struct sgt_dma it_src = sg_sgt(src), it_dst = sg_sgt(dst), it_ccs; struct drm_i915_private *i915 = ce->engine->i915; - u32 ccs_bytes_to_cpy = 0, bytes_to_cpy; + u64 ccs_bytes_to_cpy = 0, bytes_to_cpy; enum i915_cache_level ccs_cache_level; u32 src_offset, dst_offset; u8 src_access, dst_access; struct i915_request *rq; - int src_sz, dst_sz; - bool ccs_is_src; + u64 src_sz, dst_sz; + bool ccs_is_src, overwrite_ccs; int err; GEM_BUG_ON(ce->vm != ce->engine->gt->migrate.context->vm); @@ -749,6 +718,8 @@ intel_context_migrate_copy(struct intel_context *ce, get_ccs_sg_sgt(&it_ccs, bytes_to_cpy); } + overwrite_ccs = HAS_FLAT_CCS(i915) && !ccs_bytes_to_cpy && dst_is_lmem; + src_offset = 0; dst_offset = CHUNK_SZ; if (HAS_64K_PAGES(ce->engine->i915)) { @@ -788,8 +759,8 @@ intel_context_migrate_copy(struct intel_context *ce, if (err) goto out_rq; - calculate_chunk_sz(i915, src_is_lmem, &src_sz, - bytes_to_cpy, ccs_bytes_to_cpy); + src_sz = calculate_chunk_sz(i915, src_is_lmem, + bytes_to_cpy, ccs_bytes_to_cpy); len = emit_pte(rq, &it_src, src_cache_level, src_is_lmem, src_offset, src_sz); @@ -852,6 +823,25 @@ intel_context_migrate_copy(struct intel_context *ce, if (err) goto out_rq; ccs_bytes_to_cpy -= ccs_sz; + } else if (overwrite_ccs) { + err = rq->engine->emit_flush(rq, EMIT_INVALIDATE); + if (err) + goto out_rq; + + /* + * While we can't always restore/manage the CCS state, + * we still need to ensure we don't leak the CCS state + * from the previous user, so make sure we overwrite it + * with something. + */ + err = emit_copy_ccs(rq, dst_offset, INDIRECT_ACCESS, + dst_offset, DIRECT_ACCESS, len); + if (err) + goto out_rq; + + err = rq->engine->emit_flush(rq, EMIT_INVALIDATE); + if (err) + goto out_rq; } /* Arbitration is re-enabled between requests. */ diff --git a/drivers/gpu/drm/i915/gt/intel_ppgtt.c b/drivers/gpu/drm/i915/gt/intel_ppgtt.c index bf8570ae749a..7ecfa672f738 100644 --- a/drivers/gpu/drm/i915/gt/intel_ppgtt.c +++ b/drivers/gpu/drm/i915/gt/intel_ppgtt.c @@ -206,8 +206,12 @@ void ppgtt_bind_vma(struct i915_address_space *vm, void ppgtt_unbind_vma(struct i915_address_space *vm, struct i915_vma_resource *vma_res) { - if (vma_res->allocated) - vm->clear_range(vm, vma_res->start, vma_res->vma_size); + if (!vma_res->allocated) + return; + + vm->clear_range(vm, vma_res->start, vma_res->vma_size); + if (vma_res->tlb) + vma_invalidate_tlb(vm, vma_res->tlb); } static unsigned long pd_count(u64 size, int shift) diff --git a/drivers/gpu/drm/i915/gt/intel_region_lmem.c b/drivers/gpu/drm/i915/gt/intel_region_lmem.c index 360b11fd57bb..f3ad93db0b21 100644 --- a/drivers/gpu/drm/i915/gt/intel_region_lmem.c +++ b/drivers/gpu/drm/i915/gt/intel_region_lmem.c @@ -17,6 +17,7 @@ #include "gt/intel_gt_mcr.h" #include "gt/intel_gt_regs.h" +#ifdef CONFIG_64BIT static void _release_bars(struct pci_dev *pdev) { int resno; @@ -111,6 +112,9 @@ static void i915_resize_lmem_bar(struct drm_i915_private *i915, resource_size_t pci_assign_unassigned_bus_resources(pdev->bus); pci_write_config_dword(pdev, PCI_COMMAND, pci_cmd); } +#else +static void i915_resize_lmem_bar(struct drm_i915_private *i915, resource_size_t lmem_size) {} +#endif static int region_lmem_release(struct intel_memory_region *mem) diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c index c68d36fb5bbd..1211774e1d91 100644 --- a/drivers/gpu/drm/i915/gt/intel_reset.c +++ b/drivers/gpu/drm/i915/gt/intel_reset.c @@ -1281,9 +1281,6 @@ static void intel_gt_reset_global(struct intel_gt *gt, intel_wedge_on_timeout(&w, gt, 5 * HZ) { intel_display_prepare_reset(gt->i915); - /* Flush everyone using a resource about to be clobbered */ - synchronize_srcu_expedited(>->reset.backoff_srcu); - intel_gt_reset(gt, engine_mask, reason); intel_display_finish_reset(gt->i915); @@ -1392,6 +1389,9 @@ void intel_gt_handle_error(struct intel_gt *gt, } } + /* Flush everyone using a resource about to be clobbered */ + synchronize_srcu_expedited(>->reset.backoff_srcu); + intel_gt_reset_global(gt, engine_mask, msg); if (!intel_uc_uses_guc_submission(>->uc)) { diff --git a/drivers/gpu/drm/i915/gt/intel_rps.c b/drivers/gpu/drm/i915/gt/intel_rps.c index fb3f57ee450b..c7d381ad90cf 100644 --- a/drivers/gpu/drm/i915/gt/intel_rps.c +++ b/drivers/gpu/drm/i915/gt/intel_rps.c @@ -1979,7 +1979,9 @@ void intel_rps_init(struct intel_rps *rps) /* Derive initial user preferences/limits from the hardware limits */ rps->max_freq_softlimit = rps->max_freq; + rps_to_gt(rps)->defaults.max_freq = rps->max_freq_softlimit; rps->min_freq_softlimit = rps->min_freq; + rps_to_gt(rps)->defaults.min_freq = rps->min_freq_softlimit; /* After setting max-softlimit, find the overclock max freq */ if (GRAPHICS_VER(i915) == 6 || IS_IVYBRIDGE(i915) || IS_HASWELL(i915)) { diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c index e8111fce56d0..31e129329fb0 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c @@ -568,6 +568,7 @@ static void icl_ctx_workarounds_init(struct intel_engine_cs *engine, static void dg2_ctx_gt_tuning_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) { + wa_masked_en(wal, CHICKEN_RASTER_2, TBIMR_FAST_CLIP); wa_write_clr_set(wal, GEN11_L3SQCREG5, L3_PWM_TIMER_INIT_VAL_MASK, REG_FIELD_PREP(L3_PWM_TIMER_INIT_VAL_MASK, 0x7f)); wa_add(wal, @@ -2102,13 +2103,6 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) /* Wa_1509235366:dg2 */ wa_write_or(wal, GEN12_GAMCNTRL_CTRL, INVALIDATION_BROADCAST_MODE_DIS | GLOBAL_INVALIDATION_MODE); - - /* - * The following are not actually "workarounds" but rather - * recommended tuning settings documented in the bspec's - * performance guide section. - */ - wa_write_or(wal, XEHP_L3SCQREG7, BLEND_FILL_CACHING_OPT_DIS); } if (IS_DG2_GRAPHICS_STEP(i915, G11, STEP_A0, STEP_B0)) { @@ -2119,6 +2113,13 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) wa_write_or(wal, LSC_CHICKEN_BIT_0_UDW, DIS_CHAIN_2XSIMD8); } + if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_B0, STEP_FOREVER) || + IS_DG2_G11(i915) || IS_DG2_G12(i915)) { + /* Wa_1509727124:dg2 */ + wa_masked_en(wal, GEN10_SAMPLER_MODE, + SC_DISABLE_POWER_OPTIMIZATION_EBB); + } + if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_A0, STEP_B0) || IS_DG2_GRAPHICS_STEP(i915, G11, STEP_A0, STEP_B0)) { /* Wa_14012419201:dg2 */ @@ -2195,15 +2196,6 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) wa_write_or(wal, XEHP_L3NODEARBCFG, XEHP_LNESPARE); } - if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_A0, STEP_C0) || - IS_DG2_G11(i915)) { - /* Wa_22012654132:dg2 */ - wa_add(wal, GEN10_CACHE_MODE_SS, 0, - _MASKED_BIT_ENABLE(ENABLE_PREFETCH_INTO_IC), - 0 /* write-only, so skip validation */, - true); - } - /* Wa_14013202645:dg2 */ if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_B0, STEP_C0) || IS_DG2_GRAPHICS_STEP(i915, G11, STEP_A0, STEP_B0)) @@ -2670,6 +2662,49 @@ ccs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) } /* + * The bspec performance guide has recommended MMIO tuning settings. These + * aren't truly "workarounds" but we want to program them with the same + * workaround infrastructure to ensure that they're automatically added to + * the GuC save/restore lists, re-applied at the right times, and checked for + * any conflicting programming requested by real workarounds. + * + * Programming settings should be added here only if their registers are not + * part of an engine's register state context. If a register is part of a + * context, then any tuning settings should be programmed in an appropriate + * function invoked by __intel_engine_init_ctx_wa(). + */ +static void +add_render_compute_tuning_settings(struct drm_i915_private *i915, + struct i915_wa_list *wal) +{ + if (IS_PONTEVECCHIO(i915)) { + wa_write(wal, XEHPC_L3SCRUB, + SCRUB_CL_DWNGRADE_SHARED | SCRUB_RATE_4B_PER_CLK); + } + + if (IS_DG2(i915)) { + wa_write_or(wal, XEHP_L3SCQREG7, BLEND_FILL_CACHING_OPT_DIS); + wa_write_clr_set(wal, RT_CTRL, STACKID_CTRL, STACKID_CTRL_512); + wa_write_clr_set(wal, DRAW_WATERMARK, VERT_WM_VAL, + REG_FIELD_PREP(VERT_WM_VAL, 0x3FF)); + + /* + * This is also listed as Wa_22012654132 for certain DG2 + * steppings, but the tuning setting programming is a superset + * since it applies to all DG2 variants and steppings. + * + * Note that register 0xE420 is write-only and cannot be read + * back for verification on DG2 (due to Wa_14012342262), so + * we need to explicitly skip the readback. + */ + wa_add(wal, GEN10_CACHE_MODE_SS, 0, + _MASKED_BIT_ENABLE(ENABLE_PREFETCH_INTO_IC), + 0 /* write-only, so skip validation */, + true); + } +} + +/* * The workarounds in this function apply to shared registers in * the general render reset domain that aren't tied to a * specific engine. Since all render+compute engines get reset @@ -2683,14 +2718,9 @@ general_render_compute_wa_init(struct intel_engine_cs *engine, struct i915_wa_li { struct drm_i915_private *i915 = engine->i915; - if (IS_PONTEVECCHIO(i915)) { - /* - * The following is not actually a "workaround" but rather - * a recommended tuning setting documented in the bspec's - * performance guide section. - */ - wa_write(wal, XEHPC_L3SCRUB, SCRUB_CL_DWNGRADE_SHARED | SCRUB_RATE_4B_PER_CLK); + add_render_compute_tuning_settings(i915, wal); + if (IS_PONTEVECCHIO(i915)) { /* Wa_16016694945 */ wa_masked_en(wal, XEHPC_LNCFMISCCFGREG0, XEHPC_OVRLSCCC); } diff --git a/drivers/gpu/drm/i915/gt/selftest_execlists.c b/drivers/gpu/drm/i915/gt/selftest_execlists.c index 09f8cd2d0e2c..1e08b2473b99 100644 --- a/drivers/gpu/drm/i915/gt/selftest_execlists.c +++ b/drivers/gpu/drm/i915/gt/selftest_execlists.c @@ -2077,7 +2077,7 @@ static int __cancel_active0(struct live_preempt_cancel *arg) goto out; } - intel_context_set_banned(rq->context); + intel_context_ban(rq->context, rq); err = intel_engine_pulse(arg->engine); if (err) goto out; @@ -2136,7 +2136,7 @@ static int __cancel_active1(struct live_preempt_cancel *arg) if (err) goto out; - intel_context_set_banned(rq[1]->context); + intel_context_ban(rq[1]->context, rq[1]); err = intel_engine_pulse(arg->engine); if (err) goto out; @@ -2219,7 +2219,7 @@ static int __cancel_queued(struct live_preempt_cancel *arg) if (err) goto out; - intel_context_set_banned(rq[2]->context); + intel_context_ban(rq[2]->context, rq[2]); err = intel_engine_pulse(arg->engine); if (err) goto out; @@ -2234,7 +2234,13 @@ static int __cancel_queued(struct live_preempt_cancel *arg) goto out; } - if (rq[1]->fence.error != 0) { + /* + * The behavior between having semaphores and not is different. With + * semaphores the subsequent request is on the hardware and not cancelled + * while without the request is held in the driver and cancelled. + */ + if (intel_engine_has_semaphores(rq[1]->engine) && + rq[1]->fence.error != 0) { pr_err("Normal inflight1 request did not complete\n"); err = -EINVAL; goto out; @@ -2282,7 +2288,7 @@ static int __cancel_hostile(struct live_preempt_cancel *arg) goto out; } - intel_context_set_banned(rq->context); + intel_context_ban(rq->context, rq); err = intel_engine_pulse(arg->engine); /* force reset */ if (err) goto out; diff --git a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c index 6493265d5f64..7f3bb1d34dfb 100644 --- a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c +++ b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c @@ -1302,13 +1302,15 @@ static int igt_reset_wait(void *arg) { struct intel_gt *gt = arg; struct i915_gpu_error *global = >->i915->gpu_error; - struct intel_engine_cs *engine = gt->engine[RCS0]; + struct intel_engine_cs *engine; struct i915_request *rq; unsigned int reset_count; struct hang h; long timeout; int err; + engine = intel_selftest_find_any_engine(gt); + if (!engine || !intel_engine_can_store_dword(engine)) return 0; @@ -1432,7 +1434,7 @@ static int __igt_reset_evict_vma(struct intel_gt *gt, int (*fn)(void *), unsigned int flags) { - struct intel_engine_cs *engine = gt->engine[RCS0]; + struct intel_engine_cs *engine; struct drm_i915_gem_object *obj; struct task_struct *tsk = NULL; struct i915_request *rq; @@ -1444,6 +1446,8 @@ static int __igt_reset_evict_vma(struct intel_gt *gt, if (!gt->ggtt->num_fences && flags & EXEC_OBJECT_NEEDS_FENCE) return 0; + engine = intel_selftest_find_any_engine(gt); + if (!engine || !intel_engine_can_store_dword(engine)) return 0; @@ -1819,12 +1823,14 @@ static int igt_handle_error(void *arg) { struct intel_gt *gt = arg; struct i915_gpu_error *global = >->i915->gpu_error; - struct intel_engine_cs *engine = gt->engine[RCS0]; + struct intel_engine_cs *engine; struct hang h; struct i915_request *rq; struct i915_gpu_coredump *error; int err; + engine = intel_selftest_find_any_engine(gt); + /* Check that we can issue a global GPU and engine reset */ if (!intel_has_reset_engine(gt)) diff --git a/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h b/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h index 4ef9990ed7f8..29ef8afc8c2e 100644 --- a/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h +++ b/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h @@ -122,6 +122,9 @@ enum intel_guc_action { INTEL_GUC_ACTION_SCHED_CONTEXT_MODE_DONE = 0x1002, INTEL_GUC_ACTION_SCHED_ENGINE_MODE_SET = 0x1003, INTEL_GUC_ACTION_SCHED_ENGINE_MODE_DONE = 0x1004, + INTEL_GUC_ACTION_V69_SET_CONTEXT_PRIORITY = 0x1005, + INTEL_GUC_ACTION_V69_SET_CONTEXT_EXECUTION_QUANTUM = 0x1006, + INTEL_GUC_ACTION_V69_SET_CONTEXT_PREEMPTION_TIMEOUT = 0x1007, INTEL_GUC_ACTION_CONTEXT_RESET_NOTIFICATION = 0x1008, INTEL_GUC_ACTION_ENGINE_FAILURE_NOTIFICATION = 0x1009, INTEL_GUC_ACTION_HOST2GUC_UPDATE_CONTEXT_POLICIES = 0x100B, diff --git a/drivers/gpu/drm/i915/gt/uc/abi/guc_communication_ctb_abi.h b/drivers/gpu/drm/i915/gt/uc/abi/guc_communication_ctb_abi.h index df83c1cc7c7a..28b8387f97b7 100644 --- a/drivers/gpu/drm/i915/gt/uc/abi/guc_communication_ctb_abi.h +++ b/drivers/gpu/drm/i915/gt/uc/abi/guc_communication_ctb_abi.h @@ -37,6 +37,7 @@ * | | | - _`GUC_CTB_STATUS_OVERFLOW` = 1 (head/tail too large) | * | | | - _`GUC_CTB_STATUS_UNDERFLOW` = 2 (truncated message) | * | | | - _`GUC_CTB_STATUS_MISMATCH` = 4 (head/tail modified) | + * | | | - _`GUC_CTB_STATUS_UNUSED` = 8 (CTB is not in use) | * +---+-------+--------------------------------------------------------------+ * |...| | RESERVED = MBZ | * +---+-------+--------------------------------------------------------------+ @@ -49,9 +50,10 @@ struct guc_ct_buffer_desc { u32 tail; u32 status; #define GUC_CTB_STATUS_NO_ERROR 0 -#define GUC_CTB_STATUS_OVERFLOW (1 << 0) -#define GUC_CTB_STATUS_UNDERFLOW (1 << 1) -#define GUC_CTB_STATUS_MISMATCH (1 << 2) +#define GUC_CTB_STATUS_OVERFLOW BIT(0) +#define GUC_CTB_STATUS_UNDERFLOW BIT(1) +#define GUC_CTB_STATUS_MISMATCH BIT(2) +#define GUC_CTB_STATUS_UNUSED BIT(3) u32 reserved[13]; } __packed; static_assert(sizeof(struct guc_ct_buffer_desc) == 64); diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc.c index 2706a8c65090..50af247e84c4 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.c @@ -389,6 +389,23 @@ void intel_guc_write_params(struct intel_guc *guc) intel_uncore_forcewake_put(uncore, FORCEWAKE_GT); } +void intel_guc_dump_time_info(struct intel_guc *guc, struct drm_printer *p) +{ + struct intel_gt *gt = guc_to_gt(guc); + intel_wakeref_t wakeref; + u32 stamp = 0; + u64 ktime; + + with_intel_runtime_pm(>->i915->runtime_pm, wakeref) + stamp = intel_uncore_read(gt->uncore, GUCPMTIMESTAMP); + ktime = ktime_get_boottime_ns(); + + drm_printf(p, "Kernel timestamp: 0x%08llX [%llu]\n", ktime, ktime); + drm_printf(p, "GuC timestamp: 0x%08X [%u]\n", stamp, stamp); + drm_printf(p, "CS timestamp frequency: %u Hz, %u ns\n", + gt->clock_frequency, gt->clock_period_ns); +} + int intel_guc_init(struct intel_guc *guc) { struct intel_gt *gt = guc_to_gt(guc); diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.h b/drivers/gpu/drm/i915/gt/uc/intel_guc.h index d0d99f178f2d..804133df1ac9 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.h @@ -170,6 +170,11 @@ struct intel_guc { /** @ads_engine_usage_size: size of engine usage in the ADS */ u32 ads_engine_usage_size; + /** @lrc_desc_pool_v69: object allocated to hold the GuC LRC descriptor pool */ + struct i915_vma *lrc_desc_pool_v69; + /** @lrc_desc_pool_vaddr_v69: contents of the GuC LRC descriptor pool */ + void *lrc_desc_pool_vaddr_v69; + /** * @context_lookup: used to resolve intel_context from guc_id, if a * context is present in this structure it is registered with the GuC @@ -459,4 +464,6 @@ void intel_guc_load_status(struct intel_guc *guc, struct drm_printer *p); void intel_guc_write_barrier(struct intel_guc *guc); +void intel_guc_dump_time_info(struct intel_guc *guc, struct drm_printer *p); + #endif diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c index ba7541f3ca61..74cbe8eaf531 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c @@ -464,7 +464,11 @@ static void fill_engine_enable_masks(struct intel_gt *gt, } #define LR_HW_CONTEXT_SIZE (80 * sizeof(u32)) -#define LRC_SKIP_SIZE (LRC_PPHWSP_SZ * PAGE_SIZE + LR_HW_CONTEXT_SIZE) +#define XEHP_LR_HW_CONTEXT_SIZE (96 * sizeof(u32)) +#define LR_HW_CONTEXT_SZ(i915) (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50) ? \ + XEHP_LR_HW_CONTEXT_SIZE : \ + LR_HW_CONTEXT_SIZE) +#define LRC_SKIP_SIZE(i915) (LRC_PPHWSP_SZ * PAGE_SIZE + LR_HW_CONTEXT_SZ(i915)) static int guc_prep_golden_context(struct intel_guc *guc) { struct intel_gt *gt = guc_to_gt(guc); @@ -525,7 +529,7 @@ static int guc_prep_golden_context(struct intel_guc *guc) * on all engines). */ ads_blob_write(guc, ads.eng_state_size[guc_class], - real_size - LRC_SKIP_SIZE); + real_size - LRC_SKIP_SIZE(gt->i915)); ads_blob_write(guc, ads.golden_context_lrca[guc_class], addr_ggtt); @@ -599,7 +603,7 @@ static void guc_init_golden_context(struct intel_guc *guc) } GEM_BUG_ON(ads_blob_read(guc, ads.eng_state_size[guc_class]) != - real_size - LRC_SKIP_SIZE); + real_size - LRC_SKIP_SIZE(gt->i915)); GEM_BUG_ON(ads_blob_read(guc, ads.golden_context_lrca[guc_class]) != addr_ggtt); addr_ggtt += alloc_size; diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c index 75257bd20ff0..b54b7883320b 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c @@ -600,10 +600,8 @@ intel_guc_capture_getnullheader(struct intel_guc *guc, return 0; } -#define GUC_CAPTURE_OVERBUFFER_MULTIPLIER 3 - -int -intel_guc_capture_output_min_size_est(struct intel_guc *guc) +static int +guc_capture_output_min_size_est(struct intel_guc *guc) { struct intel_gt *gt = guc_to_gt(guc); struct intel_engine_cs *engine; @@ -623,13 +621,8 @@ intel_guc_capture_output_min_size_est(struct intel_guc *guc) * For each engine instance, there would be 1 x guc_state_capture_group_t output * followed by 3 x guc_state_capture_t lists. The latter is how the register * dumps are split across different register types (where the '3' are global vs class - * vs instance). Finally, let's multiply the whole thing by 3x (just so we are - * not limited to just 1 round of data in a worst case full register dump log) - * - * NOTE: intel_guc_log that allocates the log buffer would round this size up to - * a power of two. + * vs instance). */ - for_each_engine(engine, gt, id) { worst_min_size += sizeof(struct guc_state_capture_group_header_t) + (3 * sizeof(struct guc_state_capture_header_t)); @@ -649,7 +642,30 @@ intel_guc_capture_output_min_size_est(struct intel_guc *guc) worst_min_size += (num_regs * sizeof(struct guc_mmio_reg)); - return (worst_min_size * GUC_CAPTURE_OVERBUFFER_MULTIPLIER); + return worst_min_size; +} + +/* + * Add on a 3x multiplier to allow for multiple back-to-back captures occurring + * before the i915 can read the data out and process it + */ +#define GUC_CAPTURE_OVERBUFFER_MULTIPLIER 3 + +static void check_guc_capture_size(struct intel_guc *guc) +{ + struct drm_i915_private *i915 = guc_to_gt(guc)->i915; + int min_size = guc_capture_output_min_size_est(guc); + int spare_size = min_size * GUC_CAPTURE_OVERBUFFER_MULTIPLIER; + + if (min_size < 0) + drm_warn(&i915->drm, "Failed to calculate GuC error state capture buffer minimum size: %d!\n", + min_size); + else if (min_size > CAPTURE_BUFFER_SIZE) + drm_warn(&i915->drm, "GuC error state capture buffer is too small: %d < %d\n", + CAPTURE_BUFFER_SIZE, min_size); + else if (spare_size > CAPTURE_BUFFER_SIZE) + drm_notice(&i915->drm, "GuC error state capture buffer maybe too small: %d < %d (min = %d)\n", + CAPTURE_BUFFER_SIZE, spare_size, min_size); } /* @@ -1580,5 +1596,7 @@ int intel_guc_capture_init(struct intel_guc *guc) INIT_LIST_HEAD(&guc->capture->outlist); INIT_LIST_HEAD(&guc->capture->cachelist); + check_guc_capture_size(guc); + return 0; } diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.h index d3d7bd0b6db6..fbd3713c7832 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.h @@ -21,7 +21,6 @@ int intel_guc_capture_print_engine_node(struct drm_i915_error_state_buf *m, void intel_guc_capture_get_matching_node(struct intel_gt *gt, struct intel_engine_coredump *ee, struct intel_context *ce); void intel_guc_capture_process(struct intel_guc *guc); -int intel_guc_capture_output_min_size_est(struct intel_guc *guc); int intel_guc_capture_getlist(struct intel_guc *guc, u32 owner, u32 type, u32 classid, void **outptr); int intel_guc_capture_getlistsize(struct intel_guc *guc, u32 owner, u32 type, u32 classid, diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c index f01325cd1b62..2b22065e87bf 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c @@ -455,6 +455,7 @@ corrupted: /** * wait_for_ct_request_update - Wait for CT request state update. + * @ct: pointer to CT * @req: pointer to pending request * @status: placeholder for status * @@ -467,9 +468,10 @@ corrupted: * * 0 response received (status is valid) * * -ETIMEDOUT no response within hardcoded timeout */ -static int wait_for_ct_request_update(struct ct_request *req, u32 *status) +static int wait_for_ct_request_update(struct intel_guc_ct *ct, struct ct_request *req, u32 *status) { int err; + bool ct_enabled; /* * Fast commands should complete in less than 10us, so sample quickly @@ -481,12 +483,15 @@ static int wait_for_ct_request_update(struct ct_request *req, u32 *status) #define GUC_CTB_RESPONSE_TIMEOUT_SHORT_MS 10 #define GUC_CTB_RESPONSE_TIMEOUT_LONG_MS 1000 #define done \ - (FIELD_GET(GUC_HXG_MSG_0_ORIGIN, READ_ONCE(req->status)) == \ + (!(ct_enabled = intel_guc_ct_enabled(ct)) || \ + FIELD_GET(GUC_HXG_MSG_0_ORIGIN, READ_ONCE(req->status)) == \ GUC_HXG_ORIGIN_GUC) err = wait_for_us(done, GUC_CTB_RESPONSE_TIMEOUT_SHORT_MS); if (err) err = wait_for(done, GUC_CTB_RESPONSE_TIMEOUT_LONG_MS); #undef done + if (!ct_enabled) + err = -ENODEV; *status = req->status; return err; @@ -703,11 +708,18 @@ retry: intel_guc_notify(ct_to_guc(ct)); - err = wait_for_ct_request_update(&request, status); + err = wait_for_ct_request_update(ct, &request, status); g2h_release_space(ct, GUC_CTB_HXG_MSG_MAX_LEN); if (unlikely(err)) { - CT_ERROR(ct, "No response for request %#x (fence %u)\n", - action[0], request.fence); + if (err == -ENODEV) + /* wait_for_ct_request_update returns -ENODEV on reset/suspend in progress. + * In this case, output is debug rather than error info + */ + CT_DEBUG(ct, "Request %#x (fence %u) cancelled as CTB is disabled\n", + action[0], request.fence); + else + CT_ERROR(ct, "No response for request %#x (fence %u)\n", + action[0], request.fence); goto unlink; } @@ -771,8 +783,9 @@ int intel_guc_ct_send(struct intel_guc_ct *ct, const u32 *action, u32 len, ret = ct_send(ct, action, len, response_buf, response_buf_size, &status); if (unlikely(ret < 0)) { - CT_ERROR(ct, "Sending action %#x failed (%pe) status=%#X\n", - action[0], ERR_PTR(ret), status); + if (ret != -ENODEV) + CT_ERROR(ct, "Sending action %#x failed (%pe) status=%#X\n", + action[0], ERR_PTR(ret), status); } else if (unlikely(ret)) { CT_DEBUG(ct, "send action %#x returned %d (%#x)\n", action[0], ret, ret); @@ -816,8 +829,22 @@ static int ct_read(struct intel_guc_ct *ct, struct ct_incoming_msg **msg) if (unlikely(ctb->broken)) return -EPIPE; - if (unlikely(desc->status)) - goto corrupted; + if (unlikely(desc->status)) { + u32 status = desc->status; + + if (status & GUC_CTB_STATUS_UNUSED) { + /* + * Potentially valid if a CLIENT_RESET request resulted in + * contexts/engines being reset. But should never happen as + * no contexts should be active when CLIENT_RESET is sent. + */ + CT_ERROR(ct, "Unexpected G2H after GuC has stopped!\n"); + status &= ~GUC_CTB_STATUS_UNUSED; + } + + if (status) + goto corrupted; + } GEM_BUG_ON(head > size); diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h index b3c9a9327f76..323b055e5db9 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h @@ -204,6 +204,20 @@ struct guc_wq_item { u32 fence_id; } __packed; +struct guc_process_desc_v69 { + u32 stage_id; + u64 db_base_addr; + u32 head; + u32 tail; + u32 error_offset; + u64 wq_base_addr; + u32 wq_size_bytes; + u32 wq_status; + u32 engine_presence; + u32 priority; + u32 reserved[36]; +} __packed; + struct guc_sched_wq_desc { u32 head; u32 tail; @@ -228,6 +242,37 @@ struct guc_ctxt_registration_info { }; #define CONTEXT_REGISTRATION_FLAG_KMD BIT(0) +/* Preempt to idle on quantum expiry */ +#define CONTEXT_POLICY_FLAG_PREEMPT_TO_IDLE_V69 BIT(0) + +/* + * GuC Context registration descriptor. + * FIXME: This is only required to exist during context registration. + * The current 1:1 between guc_lrc_desc and LRCs for the lifetime of the LRC + * is not required. + */ +struct guc_lrc_desc_v69 { + u32 hw_context_desc; + u32 slpm_perf_mode_hint; /* SPLC v1 only */ + u32 slpm_freq_hint; + u32 engine_submit_mask; /* In logical space */ + u8 engine_class; + u8 reserved0[3]; + u32 priority; + u32 process_desc; + u32 wq_addr; + u32 wq_size; + u32 context_flags; /* CONTEXT_REGISTRATION_* */ + /* Time for one workload to execute. (in micro seconds) */ + u32 execution_quantum; + /* Time to wait for a preemption request to complete before issuing a + * reset. (in micro seconds). + */ + u32 preemption_timeout; + u32 policy_flags; /* CONTEXT_POLICY_* */ + u32 reserved1[19]; +} __packed; + /* 32-bit KLV structure as used by policy updates and others */ struct guc_klv_generic_dw_t { u32 kl; diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c index 25b2d7ce6640..4722d4b18ed1 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c @@ -15,6 +15,32 @@ static void guc_log_copy_debuglogs_for_relay(struct intel_guc_log *log); +static u32 intel_guc_log_size(struct intel_guc_log *log) +{ + /* + * GuC Log buffer Layout: + * + * NB: Ordering must follow "enum guc_log_buffer_type". + * + * +===============================+ 00B + * | Debug state header | + * +-------------------------------+ 32B + * | Crash dump state header | + * +-------------------------------+ 64B + * | Capture state header | + * +-------------------------------+ 96B + * | | + * +===============================+ PAGE_SIZE (4KB) + * | Debug logs | + * +===============================+ + DEBUG_SIZE + * | Crash Dump logs | + * +===============================+ + CRASH_SIZE + * | Capture logs | + * +===============================+ + CAPTURE_SIZE + */ + return PAGE_SIZE + CRASH_BUFFER_SIZE + DEBUG_BUFFER_SIZE + CAPTURE_BUFFER_SIZE; +} + /** * DOC: GuC firmware log * @@ -461,32 +487,7 @@ int intel_guc_log_create(struct intel_guc_log *log) GEM_BUG_ON(log->vma); - /* - * GuC Log buffer Layout - * (this ordering must follow "enum guc_log_buffer_type" definition) - * - * +===============================+ 00B - * | Debug state header | - * +-------------------------------+ 32B - * | Crash dump state header | - * +-------------------------------+ 64B - * | Capture state header | - * +-------------------------------+ 96B - * | | - * +===============================+ PAGE_SIZE (4KB) - * | Debug logs | - * +===============================+ + DEBUG_SIZE - * | Crash Dump logs | - * +===============================+ + CRASH_SIZE - * | Capture logs | - * +===============================+ + CAPTURE_SIZE - */ - if (intel_guc_capture_output_min_size_est(guc) > CAPTURE_BUFFER_SIZE) - DRM_WARN("GuC log buffer for state_capture maybe too small. %d < %d\n", - CAPTURE_BUFFER_SIZE, intel_guc_capture_output_min_size_est(guc)); - - guc_log_size = PAGE_SIZE + CRASH_BUFFER_SIZE + DEBUG_BUFFER_SIZE + - CAPTURE_BUFFER_SIZE; + guc_log_size = intel_guc_log_size(log); vma = intel_guc_allocate_vma(guc, guc_log_size); if (IS_ERR(vma)) { @@ -749,8 +750,9 @@ int intel_guc_log_dump(struct intel_guc_log *log, struct drm_printer *p, struct intel_guc *guc = log_to_guc(log); struct intel_uc *uc = container_of(guc, struct intel_uc, guc); struct drm_i915_gem_object *obj = NULL; - u32 *map; - int i = 0; + void *map; + u32 *page; + int i, j; if (!intel_guc_is_supported(guc)) return -ENODEV; @@ -763,21 +765,34 @@ int intel_guc_log_dump(struct intel_guc_log *log, struct drm_printer *p, if (!obj) return 0; + page = (u32 *)__get_free_page(GFP_KERNEL); + if (!page) + return -ENOMEM; + + intel_guc_dump_time_info(guc, p); + map = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WC); if (IS_ERR(map)) { DRM_DEBUG("Failed to pin object\n"); drm_puts(p, "(log data unaccessible)\n"); + free_page((unsigned long)page); return PTR_ERR(map); } - for (i = 0; i < obj->base.size / sizeof(u32); i += 4) - drm_printf(p, "0x%08x 0x%08x 0x%08x 0x%08x\n", - *(map + i), *(map + i + 1), - *(map + i + 2), *(map + i + 3)); + for (i = 0; i < obj->base.size; i += PAGE_SIZE) { + if (!i915_memcpy_from_wc(page, map + i, PAGE_SIZE)) + memcpy(page, map + i, PAGE_SIZE); + + for (j = 0; j < PAGE_SIZE / sizeof(u32); j += 4) + drm_printf(p, "0x%08x 0x%08x 0x%08x 0x%08x\n", + *(page + j + 0), *(page + j + 1), + *(page + j + 2), *(page + j + 3)); + } drm_puts(p, "\n"); i915_gem_object_unpin_map(obj); + free_page((unsigned long)page); return 0; } diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_log.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_log.h index 18007e639be9..dc9715411d62 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_log.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_log.h @@ -22,11 +22,11 @@ struct intel_guc; #elif defined(CONFIG_DRM_I915_DEBUG_GEM) #define CRASH_BUFFER_SIZE SZ_1M #define DEBUG_BUFFER_SIZE SZ_2M -#define CAPTURE_BUFFER_SIZE SZ_1M +#define CAPTURE_BUFFER_SIZE SZ_4M #else #define CRASH_BUFFER_SIZE SZ_8K #define DEBUG_BUFFER_SIZE SZ_64K -#define CAPTURE_BUFFER_SIZE SZ_16K +#define CAPTURE_BUFFER_SIZE SZ_2M #endif /* diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_reg.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_reg.h index 8dc063f087eb..a7092f711e9c 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_reg.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_reg.h @@ -102,6 +102,10 @@ #define GUC_SEND_TRIGGER (1<<0) #define GEN11_GUC_HOST_INTERRUPT _MMIO(0x1901f0) +#define GEN12_GUC_SEM_INTR_ENABLES _MMIO(0xc71c) +#define GUC_SEM_INTR_ROUTE_TO_GUC BIT(31) +#define GUC_SEM_INTR_ENABLE_ALL (0xff) + #define GUC_NUM_DOORBELLS 256 /* format of the HW-monitored doorbell cacheline */ diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c index ec9c4ca0f615..e1fa1f32f29e 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c @@ -575,20 +575,24 @@ static int slpc_set_softlimits(struct intel_guc_slpc *slpc) * unless they have deviated from defaults, in which case, * we retain the values and set min/max accordingly. */ - if (!slpc->max_freq_softlimit) + if (!slpc->max_freq_softlimit) { slpc->max_freq_softlimit = slpc->rp0_freq; - else if (slpc->max_freq_softlimit != slpc->rp0_freq) + slpc_to_gt(slpc)->defaults.max_freq = slpc->max_freq_softlimit; + } else if (slpc->max_freq_softlimit != slpc->rp0_freq) { ret = intel_guc_slpc_set_max_freq(slpc, slpc->max_freq_softlimit); + } if (unlikely(ret)) return ret; - if (!slpc->min_freq_softlimit) + if (!slpc->min_freq_softlimit) { slpc->min_freq_softlimit = slpc->min_freq; - else if (slpc->min_freq_softlimit != slpc->min_freq) + slpc_to_gt(slpc)->defaults.min_freq = slpc->min_freq_softlimit; + } else if (slpc->min_freq_softlimit != slpc->min_freq) { return intel_guc_slpc_set_min_freq(slpc, slpc->min_freq_softlimit); + } return 0; } diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c index 40f726c61e95..0d17da77e787 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c @@ -414,12 +414,15 @@ struct sync_semaphore { }; struct parent_scratch { - struct guc_sched_wq_desc wq_desc; + union guc_descs { + struct guc_sched_wq_desc wq_desc; + struct guc_process_desc_v69 pdesc; + } descs; struct sync_semaphore go; struct sync_semaphore join[MAX_ENGINE_INSTANCE + 1]; - u8 unused[WQ_OFFSET - sizeof(struct guc_sched_wq_desc) - + u8 unused[WQ_OFFSET - sizeof(union guc_descs) - sizeof(struct sync_semaphore) * (MAX_ENGINE_INSTANCE + 2)]; u32 wq[WQ_SIZE / sizeof(u32)]; @@ -456,17 +459,23 @@ __get_parent_scratch(struct intel_context *ce) LRC_STATE_OFFSET) / sizeof(u32))); } +static struct guc_process_desc_v69 * +__get_process_desc_v69(struct intel_context *ce) +{ + struct parent_scratch *ps = __get_parent_scratch(ce); + + return &ps->descs.pdesc; +} + static struct guc_sched_wq_desc * -__get_wq_desc(struct intel_context *ce) +__get_wq_desc_v70(struct intel_context *ce) { struct parent_scratch *ps = __get_parent_scratch(ce); - return &ps->wq_desc; + return &ps->descs.wq_desc; } -static u32 *get_wq_pointer(struct guc_sched_wq_desc *wq_desc, - struct intel_context *ce, - u32 wqi_size) +static u32 *get_wq_pointer(struct intel_context *ce, u32 wqi_size) { /* * Check for space in work queue. Caching a value of head pointer in @@ -476,7 +485,7 @@ static u32 *get_wq_pointer(struct guc_sched_wq_desc *wq_desc, #define AVAILABLE_SPACE \ CIRC_SPACE(ce->parallel.guc.wqi_tail, ce->parallel.guc.wqi_head, WQ_SIZE) if (wqi_size > AVAILABLE_SPACE) { - ce->parallel.guc.wqi_head = READ_ONCE(wq_desc->head); + ce->parallel.guc.wqi_head = READ_ONCE(*ce->parallel.guc.wq_head); if (wqi_size > AVAILABLE_SPACE) return NULL; @@ -495,11 +504,55 @@ static inline struct intel_context *__get_context(struct intel_guc *guc, u32 id) return ce; } +static struct guc_lrc_desc_v69 *__get_lrc_desc_v69(struct intel_guc *guc, u32 index) +{ + struct guc_lrc_desc_v69 *base = guc->lrc_desc_pool_vaddr_v69; + + if (!base) + return NULL; + + GEM_BUG_ON(index >= GUC_MAX_CONTEXT_ID); + + return &base[index]; +} + +static int guc_lrc_desc_pool_create_v69(struct intel_guc *guc) +{ + u32 size; + int ret; + + size = PAGE_ALIGN(sizeof(struct guc_lrc_desc_v69) * + GUC_MAX_CONTEXT_ID); + ret = intel_guc_allocate_and_map_vma(guc, size, &guc->lrc_desc_pool_v69, + (void **)&guc->lrc_desc_pool_vaddr_v69); + if (ret) + return ret; + + return 0; +} + +static void guc_lrc_desc_pool_destroy_v69(struct intel_guc *guc) +{ + if (!guc->lrc_desc_pool_vaddr_v69) + return; + + guc->lrc_desc_pool_vaddr_v69 = NULL; + i915_vma_unpin_and_release(&guc->lrc_desc_pool_v69, I915_VMA_RELEASE_MAP); +} + static inline bool guc_submission_initialized(struct intel_guc *guc) { return guc->submission_initialized; } +static inline void _reset_lrc_desc_v69(struct intel_guc *guc, u32 id) +{ + struct guc_lrc_desc_v69 *desc = __get_lrc_desc_v69(guc, id); + + if (desc) + memset(desc, 0, sizeof(*desc)); +} + static inline bool ctx_id_mapped(struct intel_guc *guc, u32 id) { return __get_context(guc, id); @@ -526,6 +579,8 @@ static inline void clr_ctx_id_mapping(struct intel_guc *guc, u32 id) if (unlikely(!guc_submission_initialized(guc))) return; + _reset_lrc_desc_v69(guc, id); + /* * xarray API doesn't have xa_erase_irqsave wrapper, so calling * the lower level functions directly. @@ -611,7 +666,7 @@ int intel_guc_wait_for_idle(struct intel_guc *guc, long timeout) true, timeout); } -static int guc_context_policy_init(struct intel_context *ce, bool loop); +static int guc_context_policy_init_v70(struct intel_context *ce, bool loop); static int try_context_registration(struct intel_context *ce, bool loop); static int __guc_add_request(struct intel_guc *guc, struct i915_request *rq) @@ -639,7 +694,7 @@ static int __guc_add_request(struct intel_guc *guc, struct i915_request *rq) GEM_BUG_ON(context_guc_id_invalid(ce)); if (context_policy_required(ce)) { - err = guc_context_policy_init(ce, false); + err = guc_context_policy_init_v70(ce, false); if (err) return err; } @@ -737,9 +792,7 @@ static u32 wq_space_until_wrap(struct intel_context *ce) return (WQ_SIZE - ce->parallel.guc.wqi_tail); } -static void write_wqi(struct guc_sched_wq_desc *wq_desc, - struct intel_context *ce, - u32 wqi_size) +static void write_wqi(struct intel_context *ce, u32 wqi_size) { BUILD_BUG_ON(!is_power_of_2(WQ_SIZE)); @@ -750,13 +803,12 @@ static void write_wqi(struct guc_sched_wq_desc *wq_desc, ce->parallel.guc.wqi_tail = (ce->parallel.guc.wqi_tail + wqi_size) & (WQ_SIZE - 1); - WRITE_ONCE(wq_desc->tail, ce->parallel.guc.wqi_tail); + WRITE_ONCE(*ce->parallel.guc.wq_tail, ce->parallel.guc.wqi_tail); } static int guc_wq_noop_append(struct intel_context *ce) { - struct guc_sched_wq_desc *wq_desc = __get_wq_desc(ce); - u32 *wqi = get_wq_pointer(wq_desc, ce, wq_space_until_wrap(ce)); + u32 *wqi = get_wq_pointer(ce, wq_space_until_wrap(ce)); u32 len_dw = wq_space_until_wrap(ce) / sizeof(u32) - 1; if (!wqi) @@ -775,7 +827,6 @@ static int __guc_wq_item_append(struct i915_request *rq) { struct intel_context *ce = request_to_scheduling_context(rq); struct intel_context *child; - struct guc_sched_wq_desc *wq_desc = __get_wq_desc(ce); unsigned int wqi_size = (ce->parallel.number_children + 4) * sizeof(u32); u32 *wqi; @@ -795,7 +846,7 @@ static int __guc_wq_item_append(struct i915_request *rq) return ret; } - wqi = get_wq_pointer(wq_desc, ce, wqi_size); + wqi = get_wq_pointer(ce, wqi_size); if (!wqi) return -EBUSY; @@ -810,7 +861,7 @@ static int __guc_wq_item_append(struct i915_request *rq) for_each_child(ce, child) *wqi++ = child->ring->tail / sizeof(u64); - write_wqi(wq_desc, ce, wqi_size); + write_wqi(ce, wqi_size); return 0; } @@ -1812,20 +1863,34 @@ static void reset_fail_worker_func(struct work_struct *w); int intel_guc_submission_init(struct intel_guc *guc) { struct intel_gt *gt = guc_to_gt(guc); + int ret; if (guc->submission_initialized) return 0; + if (guc->fw.major_ver_found < 70) { + ret = guc_lrc_desc_pool_create_v69(guc); + if (ret) + return ret; + } + guc->submission_state.guc_ids_bitmap = bitmap_zalloc(NUMBER_MULTI_LRC_GUC_ID(guc), GFP_KERNEL); - if (!guc->submission_state.guc_ids_bitmap) - return -ENOMEM; + if (!guc->submission_state.guc_ids_bitmap) { + ret = -ENOMEM; + goto destroy_pool; + } guc->timestamp.ping_delay = (POLL_TIME_CLKS / gt->clock_frequency + 1) * HZ; guc->timestamp.shift = gpm_timestamp_shift(gt); guc->submission_initialized = true; return 0; + +destroy_pool: + guc_lrc_desc_pool_destroy_v69(guc); + + return ret; } void intel_guc_submission_fini(struct intel_guc *guc) @@ -1834,6 +1899,7 @@ void intel_guc_submission_fini(struct intel_guc *guc) return; guc_flush_destroyed_contexts(guc); + guc_lrc_desc_pool_destroy_v69(guc); i915_sched_engine_put(guc->sched_engine); bitmap_free(guc->submission_state.guc_ids_bitmap); guc->submission_initialized = false; @@ -2091,10 +2157,34 @@ static void unpin_guc_id(struct intel_guc *guc, struct intel_context *ce) spin_unlock_irqrestore(&guc->submission_state.lock, flags); } -static int __guc_action_register_multi_lrc(struct intel_guc *guc, - struct intel_context *ce, - struct guc_ctxt_registration_info *info, - bool loop) +static int __guc_action_register_multi_lrc_v69(struct intel_guc *guc, + struct intel_context *ce, + u32 guc_id, + u32 offset, + bool loop) +{ + struct intel_context *child; + u32 action[4 + MAX_ENGINE_INSTANCE]; + int len = 0; + + GEM_BUG_ON(ce->parallel.number_children > MAX_ENGINE_INSTANCE); + + action[len++] = INTEL_GUC_ACTION_REGISTER_CONTEXT_MULTI_LRC; + action[len++] = guc_id; + action[len++] = ce->parallel.number_children + 1; + action[len++] = offset; + for_each_child(ce, child) { + offset += sizeof(struct guc_lrc_desc_v69); + action[len++] = offset; + } + + return guc_submission_send_busy_loop(guc, action, len, 0, loop); +} + +static int __guc_action_register_multi_lrc_v70(struct intel_guc *guc, + struct intel_context *ce, + struct guc_ctxt_registration_info *info, + bool loop) { struct intel_context *child; u32 action[13 + (MAX_ENGINE_INSTANCE * 2)]; @@ -2134,9 +2224,24 @@ static int __guc_action_register_multi_lrc(struct intel_guc *guc, return guc_submission_send_busy_loop(guc, action, len, 0, loop); } -static int __guc_action_register_context(struct intel_guc *guc, - struct guc_ctxt_registration_info *info, - bool loop) +static int __guc_action_register_context_v69(struct intel_guc *guc, + u32 guc_id, + u32 offset, + bool loop) +{ + u32 action[] = { + INTEL_GUC_ACTION_REGISTER_CONTEXT, + guc_id, + offset, + }; + + return guc_submission_send_busy_loop(guc, action, ARRAY_SIZE(action), + 0, loop); +} + +static int __guc_action_register_context_v70(struct intel_guc *guc, + struct guc_ctxt_registration_info *info, + bool loop) { u32 action[] = { INTEL_GUC_ACTION_REGISTER_CONTEXT, @@ -2157,24 +2262,52 @@ static int __guc_action_register_context(struct intel_guc *guc, 0, loop); } -static void prepare_context_registration_info(struct intel_context *ce, - struct guc_ctxt_registration_info *info); +static void prepare_context_registration_info_v69(struct intel_context *ce); +static void prepare_context_registration_info_v70(struct intel_context *ce, + struct guc_ctxt_registration_info *info); -static int register_context(struct intel_context *ce, bool loop) +static int +register_context_v69(struct intel_guc *guc, struct intel_context *ce, bool loop) +{ + u32 offset = intel_guc_ggtt_offset(guc, guc->lrc_desc_pool_v69) + + ce->guc_id.id * sizeof(struct guc_lrc_desc_v69); + + prepare_context_registration_info_v69(ce); + + if (intel_context_is_parent(ce)) + return __guc_action_register_multi_lrc_v69(guc, ce, ce->guc_id.id, + offset, loop); + else + return __guc_action_register_context_v69(guc, ce->guc_id.id, + offset, loop); +} + +static int +register_context_v70(struct intel_guc *guc, struct intel_context *ce, bool loop) { struct guc_ctxt_registration_info info; + + prepare_context_registration_info_v70(ce, &info); + + if (intel_context_is_parent(ce)) + return __guc_action_register_multi_lrc_v70(guc, ce, &info, loop); + else + return __guc_action_register_context_v70(guc, &info, loop); +} + +static int register_context(struct intel_context *ce, bool loop) +{ struct intel_guc *guc = ce_to_guc(ce); int ret; GEM_BUG_ON(intel_context_is_child(ce)); trace_intel_context_register(ce); - prepare_context_registration_info(ce, &info); - - if (intel_context_is_parent(ce)) - ret = __guc_action_register_multi_lrc(guc, ce, &info, loop); + if (guc->fw.major_ver_found >= 70) + ret = register_context_v70(guc, ce, loop); else - ret = __guc_action_register_context(guc, &info, loop); + ret = register_context_v69(guc, ce, loop); + if (likely(!ret)) { unsigned long flags; @@ -2182,7 +2315,8 @@ static int register_context(struct intel_context *ce, bool loop) set_context_registered(ce); spin_unlock_irqrestore(&ce->guc_state.lock, flags); - guc_context_policy_init(ce, loop); + if (guc->fw.major_ver_found >= 70) + guc_context_policy_init_v70(ce, loop); } return ret; @@ -2279,14 +2413,13 @@ static int __guc_context_set_context_policies(struct intel_guc *guc, 0, loop); } -static int guc_context_policy_init(struct intel_context *ce, bool loop) +static int guc_context_policy_init_v70(struct intel_context *ce, bool loop) { struct intel_engine_cs *engine = ce->engine; struct intel_guc *guc = &engine->gt->uc.guc; struct context_policy policy; u32 execution_quantum; u32 preemption_timeout; - bool missing = false; unsigned long flags; int ret; @@ -2304,32 +2437,9 @@ static int guc_context_policy_init(struct intel_context *ce, bool loop) __guc_context_policy_add_preempt_to_idle(&policy, 1); ret = __guc_context_set_context_policies(guc, &policy, loop); - missing = ret != 0; - - if (!missing && intel_context_is_parent(ce)) { - struct intel_context *child; - - for_each_child(ce, child) { - __guc_context_policy_start_klv(&policy, child->guc_id.id); - - if (engine->flags & I915_ENGINE_WANT_FORCED_PREEMPTION) - __guc_context_policy_add_preempt_to_idle(&policy, 1); - - child->guc_state.prio = ce->guc_state.prio; - __guc_context_policy_add_priority(&policy, ce->guc_state.prio); - __guc_context_policy_add_execution_quantum(&policy, execution_quantum); - __guc_context_policy_add_preemption_timeout(&policy, preemption_timeout); - - ret = __guc_context_set_context_policies(guc, &policy, loop); - if (ret) { - missing = true; - break; - } - } - } spin_lock_irqsave(&ce->guc_state.lock, flags); - if (missing) + if (ret != 0) set_context_policy_required(ce); else clr_context_policy_required(ce); @@ -2338,6 +2448,19 @@ static int guc_context_policy_init(struct intel_context *ce, bool loop) return ret; } +static void guc_context_policy_init_v69(struct intel_engine_cs *engine, + struct guc_lrc_desc_v69 *desc) +{ + desc->policy_flags = 0; + + if (engine->flags & I915_ENGINE_WANT_FORCED_PREEMPTION) + desc->policy_flags |= CONTEXT_POLICY_FLAG_PREEMPT_TO_IDLE_V69; + + /* NB: For both of these, zero means disabled. */ + desc->execution_quantum = engine->props.timeslice_duration_ms * 1000; + desc->preemption_timeout = engine->props.preempt_timeout_ms * 1000; +} + static u32 map_guc_prio_to_lrc_desc_prio(u8 prio) { /* @@ -2358,8 +2481,75 @@ static u32 map_guc_prio_to_lrc_desc_prio(u8 prio) } } -static void prepare_context_registration_info(struct intel_context *ce, - struct guc_ctxt_registration_info *info) +static void prepare_context_registration_info_v69(struct intel_context *ce) +{ + struct intel_engine_cs *engine = ce->engine; + struct intel_guc *guc = &engine->gt->uc.guc; + u32 ctx_id = ce->guc_id.id; + struct guc_lrc_desc_v69 *desc; + struct intel_context *child; + + GEM_BUG_ON(!engine->mask); + + /* + * Ensure LRC + CT vmas are is same region as write barrier is done + * based on CT vma region. + */ + GEM_BUG_ON(i915_gem_object_is_lmem(guc->ct.vma->obj) != + i915_gem_object_is_lmem(ce->ring->vma->obj)); + + desc = __get_lrc_desc_v69(guc, ctx_id); + desc->engine_class = engine_class_to_guc_class(engine->class); + desc->engine_submit_mask = engine->logical_mask; + desc->hw_context_desc = ce->lrc.lrca; + desc->priority = ce->guc_state.prio; + desc->context_flags = CONTEXT_REGISTRATION_FLAG_KMD; + guc_context_policy_init_v69(engine, desc); + + /* + * If context is a parent, we need to register a process descriptor + * describing a work queue and register all child contexts. + */ + if (intel_context_is_parent(ce)) { + struct guc_process_desc_v69 *pdesc; + + ce->parallel.guc.wqi_tail = 0; + ce->parallel.guc.wqi_head = 0; + + desc->process_desc = i915_ggtt_offset(ce->state) + + __get_parent_scratch_offset(ce); + desc->wq_addr = i915_ggtt_offset(ce->state) + + __get_wq_offset(ce); + desc->wq_size = WQ_SIZE; + + pdesc = __get_process_desc_v69(ce); + memset(pdesc, 0, sizeof(*(pdesc))); + pdesc->stage_id = ce->guc_id.id; + pdesc->wq_base_addr = desc->wq_addr; + pdesc->wq_size_bytes = desc->wq_size; + pdesc->wq_status = WQ_STATUS_ACTIVE; + + ce->parallel.guc.wq_head = &pdesc->head; + ce->parallel.guc.wq_tail = &pdesc->tail; + ce->parallel.guc.wq_status = &pdesc->wq_status; + + for_each_child(ce, child) { + desc = __get_lrc_desc_v69(guc, child->guc_id.id); + + desc->engine_class = + engine_class_to_guc_class(engine->class); + desc->hw_context_desc = child->lrc.lrca; + desc->priority = ce->guc_state.prio; + desc->context_flags = CONTEXT_REGISTRATION_FLAG_KMD; + guc_context_policy_init_v69(engine, desc); + } + + clear_children_join_go_memory(ce); + } +} + +static void prepare_context_registration_info_v70(struct intel_context *ce, + struct guc_ctxt_registration_info *info) { struct intel_engine_cs *engine = ce->engine; struct intel_guc *guc = &engine->gt->uc.guc; @@ -2409,10 +2599,14 @@ static void prepare_context_registration_info(struct intel_context *ce, info->wq_base_hi = upper_32_bits(wq_base_offset); info->wq_size = WQ_SIZE; - wq_desc = __get_wq_desc(ce); + wq_desc = __get_wq_desc_v70(ce); memset(wq_desc, 0, sizeof(*wq_desc)); wq_desc->wq_status = WQ_STATUS_ACTIVE; + ce->parallel.guc.wq_head = &wq_desc->head; + ce->parallel.guc.wq_tail = &wq_desc->tail; + ce->parallel.guc.wq_status = &wq_desc->wq_status; + clear_children_join_go_memory(ce); } } @@ -2727,11 +2921,21 @@ static void __guc_context_set_preemption_timeout(struct intel_guc *guc, u16 guc_id, u32 preemption_timeout) { - struct context_policy policy; + if (guc->fw.major_ver_found >= 70) { + struct context_policy policy; - __guc_context_policy_start_klv(&policy, guc_id); - __guc_context_policy_add_preemption_timeout(&policy, preemption_timeout); - __guc_context_set_context_policies(guc, &policy, true); + __guc_context_policy_start_klv(&policy, guc_id); + __guc_context_policy_add_preemption_timeout(&policy, preemption_timeout); + __guc_context_set_context_policies(guc, &policy, true); + } else { + u32 action[] = { + INTEL_GUC_ACTION_V69_SET_CONTEXT_PREEMPTION_TIMEOUT, + guc_id, + preemption_timeout + }; + + intel_guc_send_busy_loop(guc, action, ARRAY_SIZE(action), 0, true); + } } static void @@ -2982,11 +3186,21 @@ static int guc_context_alloc(struct intel_context *ce) static void __guc_context_set_prio(struct intel_guc *guc, struct intel_context *ce) { - struct context_policy policy; + if (guc->fw.major_ver_found >= 70) { + struct context_policy policy; - __guc_context_policy_start_klv(&policy, ce->guc_id.id); - __guc_context_policy_add_priority(&policy, ce->guc_state.prio); - __guc_context_set_context_policies(guc, &policy, true); + __guc_context_policy_start_klv(&policy, ce->guc_id.id); + __guc_context_policy_add_priority(&policy, ce->guc_state.prio); + __guc_context_set_context_policies(guc, &policy, true); + } else { + u32 action[] = { + INTEL_GUC_ACTION_V69_SET_CONTEXT_PRIORITY, + ce->guc_id.id, + ce->guc_state.prio, + }; + + guc_submission_send_busy_loop(guc, action, ARRAY_SIZE(action), 0, true); + } } static void guc_context_set_prio(struct intel_guc *guc, @@ -3953,13 +4167,27 @@ int intel_guc_submission_setup(struct intel_engine_cs *engine) void intel_guc_submission_enable(struct intel_guc *guc) { + struct intel_gt *gt = guc_to_gt(guc); + + /* Enable and route to GuC */ + if (GRAPHICS_VER(gt->i915) >= 12) + intel_uncore_write(gt->uncore, GEN12_GUC_SEM_INTR_ENABLES, + GUC_SEM_INTR_ROUTE_TO_GUC | + GUC_SEM_INTR_ENABLE_ALL); + guc_init_lrc_mapping(guc); guc_init_engine_stats(guc); } void intel_guc_submission_disable(struct intel_guc *guc) { + struct intel_gt *gt = guc_to_gt(guc); + /* Note: By the time we're here, GuC may have already been reset */ + + /* Disable and route to host */ + if (GRAPHICS_VER(gt->i915) >= 12) + intel_uncore_write(gt->uncore, GEN12_GUC_SEM_INTR_ENABLES, 0x0); } static bool __guc_submission_supported(struct intel_guc *guc) @@ -4496,17 +4724,19 @@ void intel_guc_submission_print_context_info(struct intel_guc *guc, guc_log_context_priority(p, ce); if (intel_context_is_parent(ce)) { - struct guc_sched_wq_desc *wq_desc = __get_wq_desc(ce); struct intel_context *child; drm_printf(p, "\t\tNumber children: %u\n", ce->parallel.number_children); - drm_printf(p, "\t\tWQI Head: %u\n", - READ_ONCE(wq_desc->head)); - drm_printf(p, "\t\tWQI Tail: %u\n", - READ_ONCE(wq_desc->tail)); - drm_printf(p, "\t\tWQI Status: %u\n\n", - READ_ONCE(wq_desc->wq_status)); + + if (ce->parallel.guc.wq_status) { + drm_printf(p, "\t\tWQI Head: %u\n", + READ_ONCE(*ce->parallel.guc.wq_head)); + drm_printf(p, "\t\tWQI Tail: %u\n", + READ_ONCE(*ce->parallel.guc.wq_tail)); + drm_printf(p, "\t\tWQI Status: %u\n\n", + READ_ONCE(*ce->parallel.guc.wq_status)); + } if (ce->engine->emit_bb_start == emit_bb_start_parent_no_preempt_mid_batch) { @@ -4923,4 +5153,5 @@ bool intel_guc_virtual_engine_has_heartbeat(const struct intel_engine_cs *ve) #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) #include "selftest_guc.c" #include "selftest_guc_multi_lrc.c" +#include "selftest_guc_hangcheck.c" #endif diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c index 27363091e1af..58547292efa0 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c @@ -53,7 +53,7 @@ void intel_uc_fw_change_status(struct intel_uc_fw *uc_fw, * firmware as TGL. */ #define INTEL_GUC_FIRMWARE_DEFS(fw_def, guc_def) \ - fw_def(DG2, 0, guc_def(dg2, 70, 1, 2)) \ + fw_def(DG2, 0, guc_def(dg2, 70, 4, 1)) \ fw_def(ALDERLAKE_P, 0, guc_def(adlp, 70, 1, 1)) \ fw_def(ALDERLAKE_S, 0, guc_def(tgl, 70, 1, 1)) \ fw_def(DG1, 0, guc_def(dg1, 70, 1, 1)) \ @@ -70,6 +70,10 @@ void intel_uc_fw_change_status(struct intel_uc_fw *uc_fw, fw_def(BROXTON, 0, guc_def(bxt, 70, 1, 1)) \ fw_def(SKYLAKE, 0, guc_def(skl, 70, 1, 1)) +#define INTEL_GUC_FIRMWARE_DEFS_FALLBACK(fw_def, guc_def) \ + fw_def(ALDERLAKE_P, 0, guc_def(adlp, 69, 0, 3)) \ + fw_def(ALDERLAKE_S, 0, guc_def(tgl, 69, 0, 3)) + #define INTEL_HUC_FIRMWARE_DEFS(fw_def, huc_def) \ fw_def(ALDERLAKE_P, 0, huc_def(tgl, 7, 9, 3)) \ fw_def(ALDERLAKE_S, 0, huc_def(tgl, 7, 9, 3)) \ @@ -105,6 +109,7 @@ void intel_uc_fw_change_status(struct intel_uc_fw *uc_fw, MODULE_FIRMWARE(uc_); INTEL_GUC_FIRMWARE_DEFS(INTEL_UC_MODULE_FW, MAKE_GUC_FW_PATH) +INTEL_GUC_FIRMWARE_DEFS_FALLBACK(INTEL_UC_MODULE_FW, MAKE_GUC_FW_PATH) INTEL_HUC_FIRMWARE_DEFS(INTEL_UC_MODULE_FW, MAKE_HUC_FW_PATH) /* The below structs and macros are used to iterate across the list of blobs */ @@ -149,6 +154,9 @@ __uc_fw_auto_select(struct drm_i915_private *i915, struct intel_uc_fw *uc_fw) static const struct uc_fw_platform_requirement blobs_guc[] = { INTEL_GUC_FIRMWARE_DEFS(MAKE_FW_LIST, GUC_FW_BLOB) }; + static const struct uc_fw_platform_requirement blobs_guc_fallback[] = { + INTEL_GUC_FIRMWARE_DEFS_FALLBACK(MAKE_FW_LIST, GUC_FW_BLOB) + }; static const struct uc_fw_platform_requirement blobs_huc[] = { INTEL_HUC_FIRMWARE_DEFS(MAKE_FW_LIST, HUC_FW_BLOB) }; @@ -179,12 +187,29 @@ __uc_fw_auto_select(struct drm_i915_private *i915, struct intel_uc_fw *uc_fw) if (p == fw_blobs[i].p && rev >= fw_blobs[i].rev) { const struct uc_fw_blob *blob = &fw_blobs[i].blob; uc_fw->path = blob->path; + uc_fw->wanted_path = blob->path; uc_fw->major_ver_wanted = blob->major; uc_fw->minor_ver_wanted = blob->minor; break; } } + if (uc_fw->type == INTEL_UC_FW_TYPE_GUC) { + const struct uc_fw_platform_requirement *blobs = blobs_guc_fallback; + u32 count = ARRAY_SIZE(blobs_guc_fallback); + + for (i = 0; i < count && p <= blobs[i].p; i++) { + if (p == blobs[i].p && rev >= blobs[i].rev) { + const struct uc_fw_blob *blob = &blobs[i].blob; + + uc_fw->fallback.path = blob->path; + uc_fw->fallback.major_ver = blob->major; + uc_fw->fallback.minor_ver = blob->minor; + break; + } + } + } + /* make sure the list is ordered as expected */ if (IS_ENABLED(CONFIG_DRM_I915_SELFTEST)) { for (i = 1; i < fw_count; i++) { @@ -195,11 +220,11 @@ __uc_fw_auto_select(struct drm_i915_private *i915, struct intel_uc_fw *uc_fw) fw_blobs[i].rev < fw_blobs[i - 1].rev) continue; - pr_err("invalid FW blob order: %s r%u comes before %s r%u\n", - intel_platform_name(fw_blobs[i - 1].p), - fw_blobs[i - 1].rev, - intel_platform_name(fw_blobs[i].p), - fw_blobs[i].rev); + drm_err(&i915->drm, "Invalid FW blob order: %s r%u comes before %s r%u\n", + intel_platform_name(fw_blobs[i - 1].p), + fw_blobs[i - 1].rev, + intel_platform_name(fw_blobs[i].p), + fw_blobs[i].rev); uc_fw->path = NULL; } @@ -412,7 +437,24 @@ int intel_uc_fw_fetch(struct intel_uc_fw *uc_fw) __force_fw_fetch_failures(uc_fw, -EINVAL); __force_fw_fetch_failures(uc_fw, -ESTALE); - err = request_firmware(&fw, uc_fw->path, dev); + err = firmware_request_nowarn(&fw, uc_fw->path, dev); + if (err && !intel_uc_fw_is_overridden(uc_fw) && uc_fw->fallback.path) { + err = firmware_request_nowarn(&fw, uc_fw->fallback.path, dev); + if (!err) { + drm_notice(&i915->drm, + "%s firmware %s is recommended, but only %s was found\n", + intel_uc_fw_type_repr(uc_fw->type), + uc_fw->wanted_path, + uc_fw->fallback.path); + drm_info(&i915->drm, + "Consider updating your linux-firmware pkg or downloading from %s\n", + INTEL_UC_FIRMWARE_URL); + + uc_fw->path = uc_fw->fallback.path; + uc_fw->major_ver_wanted = uc_fw->fallback.major_ver; + uc_fw->minor_ver_wanted = uc_fw->fallback.minor_ver; + } + } if (err) goto fail; @@ -460,8 +502,8 @@ fail: INTEL_UC_FIRMWARE_MISSING : INTEL_UC_FIRMWARE_ERROR); - drm_notice(&i915->drm, "%s firmware %s: fetch failed with error %d\n", - intel_uc_fw_type_repr(uc_fw->type), uc_fw->path, err); + i915_probe_error(i915, "%s firmware %s: fetch failed with error %d\n", + intel_uc_fw_type_repr(uc_fw->type), uc_fw->path, err); drm_info(&i915->drm, "%s firmware(s) can be downloaded from %s\n", intel_uc_fw_type_repr(uc_fw->type), INTEL_UC_FIRMWARE_URL); @@ -822,7 +864,13 @@ size_t intel_uc_fw_copy_rsa(struct intel_uc_fw *uc_fw, void *dst, u32 max_len) void intel_uc_fw_dump(const struct intel_uc_fw *uc_fw, struct drm_printer *p) { drm_printf(p, "%s firmware: %s\n", - intel_uc_fw_type_repr(uc_fw->type), uc_fw->path); + intel_uc_fw_type_repr(uc_fw->type), uc_fw->wanted_path); + if (uc_fw->fallback.path) { + drm_printf(p, "%s firmware fallback: %s\n", + intel_uc_fw_type_repr(uc_fw->type), uc_fw->fallback.path); + drm_printf(p, "fallback selected: %s\n", + str_yes_no(uc_fw->path == uc_fw->fallback.path)); + } drm_printf(p, "\tstatus: %s\n", intel_uc_fw_status_repr(uc_fw->status)); drm_printf(p, "\tversion: wanted %u.%u, found %u.%u\n", diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h index 4f169035f504..7aa2644400b9 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h @@ -74,6 +74,7 @@ struct intel_uc_fw { const enum intel_uc_fw_status status; enum intel_uc_fw_status __status; /* no accidental overwrites */ }; + const char *wanted_path; const char *path; bool user_overridden; size_t size; @@ -98,6 +99,12 @@ struct intel_uc_fw { u16 major_ver_found; u16 minor_ver_found; + struct { + const char *path; + u16 major_ver; + u16 minor_ver; + } fallback; + u32 rsa_size; u32 ucode_size; diff --git a/drivers/gpu/drm/i915/gt/uc/selftest_guc.c b/drivers/gpu/drm/i915/gt/uc/selftest_guc.c index 1df71d0796ae..20e0c39259fb 100644 --- a/drivers/gpu/drm/i915/gt/uc/selftest_guc.c +++ b/drivers/gpu/drm/i915/gt/uc/selftest_guc.c @@ -62,7 +62,7 @@ static int intel_guc_scrub_ctbs(void *arg) ce = intel_context_create(engine); if (IS_ERR(ce)) { ret = PTR_ERR(ce); - pr_err("Failed to create context, %d: %d\n", i, ret); + drm_err(>->i915->drm, "Failed to create context, %d: %d\n", i, ret); goto err; } @@ -83,7 +83,7 @@ static int intel_guc_scrub_ctbs(void *arg) if (IS_ERR(rq)) { ret = PTR_ERR(rq); - pr_err("Failed to create request, %d: %d\n", i, ret); + drm_err(>->i915->drm, "Failed to create request, %d: %d\n", i, ret); goto err; } @@ -93,7 +93,7 @@ static int intel_guc_scrub_ctbs(void *arg) for (i = 0; i < 3; ++i) { ret = i915_request_wait(last[i], 0, HZ); if (ret < 0) { - pr_err("Last request failed to complete: %d\n", ret); + drm_err(>->i915->drm, "Last request failed to complete: %d\n", ret); goto err; } i915_request_put(last[i]); @@ -110,7 +110,7 @@ static int intel_guc_scrub_ctbs(void *arg) /* GT will not idle if G2H are lost */ ret = intel_gt_wait_for_idle(gt, HZ); if (ret < 0) { - pr_err("GT failed to idle: %d\n", ret); + drm_err(>->i915->drm, "GT failed to idle: %d\n", ret); goto err; } @@ -150,7 +150,7 @@ static int intel_guc_steal_guc_ids(void *arg) ce = kcalloc(GUC_MAX_CONTEXT_ID, sizeof(*ce), GFP_KERNEL); if (!ce) { - pr_err("Context array allocation failed\n"); + drm_err(>->i915->drm, "Context array allocation failed\n"); return -ENOMEM; } @@ -164,24 +164,24 @@ static int intel_guc_steal_guc_ids(void *arg) if (IS_ERR(ce[context_index])) { ret = PTR_ERR(ce[context_index]); ce[context_index] = NULL; - pr_err("Failed to create context: %d\n", ret); + drm_err(>->i915->drm, "Failed to create context: %d\n", ret); goto err_wakeref; } ret = igt_spinner_init(&spin, engine->gt); if (ret) { - pr_err("Failed to create spinner: %d\n", ret); + drm_err(>->i915->drm, "Failed to create spinner: %d\n", ret); goto err_contexts; } spin_rq = igt_spinner_create_request(&spin, ce[context_index], MI_ARB_CHECK); if (IS_ERR(spin_rq)) { ret = PTR_ERR(spin_rq); - pr_err("Failed to create spinner request: %d\n", ret); + drm_err(>->i915->drm, "Failed to create spinner request: %d\n", ret); goto err_contexts; } ret = request_add_spin(spin_rq, &spin); if (ret) { - pr_err("Failed to add Spinner request: %d\n", ret); + drm_err(>->i915->drm, "Failed to add Spinner request: %d\n", ret); goto err_spin_rq; } @@ -191,7 +191,7 @@ static int intel_guc_steal_guc_ids(void *arg) if (IS_ERR(ce[context_index])) { ret = PTR_ERR(ce[context_index--]); ce[context_index] = NULL; - pr_err("Failed to create context: %d\n", ret); + drm_err(>->i915->drm, "Failed to create context: %d\n", ret); goto err_spin_rq; } @@ -200,8 +200,8 @@ static int intel_guc_steal_guc_ids(void *arg) ret = PTR_ERR(rq); rq = NULL; if (ret != -EAGAIN) { - pr_err("Failed to create request, %d: %d\n", - context_index, ret); + drm_err(>->i915->drm, "Failed to create request, %d: %d\n", + context_index, ret); goto err_spin_rq; } } else { @@ -215,7 +215,7 @@ static int intel_guc_steal_guc_ids(void *arg) igt_spinner_end(&spin); ret = intel_selftest_wait_for_rq(spin_rq); if (ret) { - pr_err("Spin request failed to complete: %d\n", ret); + drm_err(>->i915->drm, "Spin request failed to complete: %d\n", ret); i915_request_put(last); goto err_spin_rq; } @@ -227,7 +227,7 @@ static int intel_guc_steal_guc_ids(void *arg) ret = i915_request_wait(last, 0, HZ * 30); i915_request_put(last); if (ret < 0) { - pr_err("Last request failed to complete: %d\n", ret); + drm_err(>->i915->drm, "Last request failed to complete: %d\n", ret); goto err_spin_rq; } @@ -235,7 +235,7 @@ static int intel_guc_steal_guc_ids(void *arg) rq = nop_user_request(ce[context_index], NULL); if (IS_ERR(rq)) { ret = PTR_ERR(rq); - pr_err("Failed to steal guc_id, %d: %d\n", context_index, ret); + drm_err(>->i915->drm, "Failed to steal guc_id, %d: %d\n", context_index, ret); goto err_spin_rq; } @@ -243,21 +243,20 @@ static int intel_guc_steal_guc_ids(void *arg) ret = i915_request_wait(rq, 0, HZ); i915_request_put(rq); if (ret < 0) { - pr_err("Request with stolen guc_id failed to complete: %d\n", - ret); + drm_err(>->i915->drm, "Request with stolen guc_id failed to complete: %d\n", ret); goto err_spin_rq; } /* Wait for idle */ ret = intel_gt_wait_for_idle(gt, HZ * 30); if (ret < 0) { - pr_err("GT failed to idle: %d\n", ret); + drm_err(>->i915->drm, "GT failed to idle: %d\n", ret); goto err_spin_rq; } /* Verify a guc_id was stolen */ if (guc->number_guc_id_stolen == number_guc_id_stolen) { - pr_err("No guc_id was stolen"); + drm_err(>->i915->drm, "No guc_id was stolen"); ret = -EINVAL; } else { ret = 0; diff --git a/drivers/gpu/drm/i915/gt/uc/selftest_guc_hangcheck.c b/drivers/gpu/drm/i915/gt/uc/selftest_guc_hangcheck.c new file mode 100644 index 000000000000..01f8cd3c3134 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/uc/selftest_guc_hangcheck.c @@ -0,0 +1,159 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2022 Intel Corporation + */ + +#include "selftests/igt_spinner.h" +#include "selftests/igt_reset.h" +#include "selftests/intel_scheduler_helpers.h" +#include "gt/intel_engine_heartbeat.h" +#include "gem/selftests/mock_context.h" + +#define BEAT_INTERVAL 100 + +static struct i915_request *nop_request(struct intel_engine_cs *engine) +{ + struct i915_request *rq; + + rq = intel_engine_create_kernel_request(engine); + if (IS_ERR(rq)) + return rq; + + i915_request_get(rq); + i915_request_add(rq); + + return rq; +} + +static int intel_hang_guc(void *arg) +{ + struct intel_gt *gt = arg; + int ret = 0; + struct i915_gem_context *ctx; + struct intel_context *ce; + struct igt_spinner spin; + struct i915_request *rq; + intel_wakeref_t wakeref; + struct i915_gpu_error *global = >->i915->gpu_error; + struct intel_engine_cs *engine; + unsigned int reset_count; + u32 guc_status; + u32 old_beat; + + ctx = kernel_context(gt->i915, NULL); + if (IS_ERR(ctx)) { + drm_err(>->i915->drm, "Failed get kernel context: %ld\n", PTR_ERR(ctx)); + return PTR_ERR(ctx); + } + + wakeref = intel_runtime_pm_get(gt->uncore->rpm); + + ce = intel_context_create(gt->engine[BCS0]); + if (IS_ERR(ce)) { + ret = PTR_ERR(ce); + drm_err(>->i915->drm, "Failed to create spinner request: %d\n", ret); + goto err; + } + + engine = ce->engine; + reset_count = i915_reset_count(global); + + old_beat = engine->props.heartbeat_interval_ms; + ret = intel_engine_set_heartbeat(engine, BEAT_INTERVAL); + if (ret) { + drm_err(>->i915->drm, "Failed to boost heatbeat interval: %d\n", ret); + goto err; + } + + ret = igt_spinner_init(&spin, engine->gt); + if (ret) { + drm_err(>->i915->drm, "Failed to create spinner: %d\n", ret); + goto err; + } + + rq = igt_spinner_create_request(&spin, ce, MI_NOOP); + intel_context_put(ce); + if (IS_ERR(rq)) { + ret = PTR_ERR(rq); + drm_err(>->i915->drm, "Failed to create spinner request: %d\n", ret); + goto err_spin; + } + + ret = request_add_spin(rq, &spin); + if (ret) { + i915_request_put(rq); + drm_err(>->i915->drm, "Failed to add Spinner request: %d\n", ret); + goto err_spin; + } + + ret = intel_reset_guc(gt); + if (ret) { + i915_request_put(rq); + drm_err(>->i915->drm, "Failed to reset GuC, ret = %d\n", ret); + goto err_spin; + } + + guc_status = intel_uncore_read(gt->uncore, GUC_STATUS); + if (!(guc_status & GS_MIA_IN_RESET)) { + i915_request_put(rq); + drm_err(>->i915->drm, "GuC failed to reset: status = 0x%08X\n", guc_status); + ret = -EIO; + goto err_spin; + } + + /* Wait for the heartbeat to cause a reset */ + ret = intel_selftest_wait_for_rq(rq); + i915_request_put(rq); + if (ret) { + drm_err(>->i915->drm, "Request failed to complete: %d\n", ret); + goto err_spin; + } + + if (i915_reset_count(global) == reset_count) { + drm_err(>->i915->drm, "Failed to record a GPU reset\n"); + ret = -EINVAL; + goto err_spin; + } + +err_spin: + igt_spinner_end(&spin); + igt_spinner_fini(&spin); + intel_engine_set_heartbeat(engine, old_beat); + + if (ret == 0) { + rq = nop_request(engine); + if (IS_ERR(rq)) { + ret = PTR_ERR(rq); + goto err; + } + + ret = intel_selftest_wait_for_rq(rq); + i915_request_put(rq); + if (ret) { + drm_err(>->i915->drm, "No-op failed to complete: %d\n", ret); + goto err; + } + } + +err: + intel_runtime_pm_put(gt->uncore->rpm, wakeref); + kernel_context_close(ctx); + + return ret; +} + +int intel_guc_hang_check(struct drm_i915_private *i915) +{ + static const struct i915_subtest tests[] = { + SUBTEST(intel_hang_guc), + }; + struct intel_gt *gt = to_gt(i915); + + if (intel_gt_is_wedged(gt)) + return 0; + + if (!intel_uc_uses_guc_submission(>->uc)) + return 0; + + return intel_gt_live_subtests(tests, gt); +} diff --git a/drivers/gpu/drm/i915/gt/uc/selftest_guc_multi_lrc.c b/drivers/gpu/drm/i915/gt/uc/selftest_guc_multi_lrc.c index 812220a43df8..d17982c36d25 100644 --- a/drivers/gpu/drm/i915/gt/uc/selftest_guc_multi_lrc.c +++ b/drivers/gpu/drm/i915/gt/uc/selftest_guc_multi_lrc.c @@ -115,30 +115,30 @@ static int __intel_guc_multi_lrc_basic(struct intel_gt *gt, unsigned int class) parent = multi_lrc_create_parent(gt, class, 0); if (IS_ERR(parent)) { - pr_err("Failed creating contexts: %ld", PTR_ERR(parent)); + drm_err(>->i915->drm, "Failed creating contexts: %ld", PTR_ERR(parent)); return PTR_ERR(parent); } else if (!parent) { - pr_debug("Not enough engines in class: %d", class); + drm_dbg(>->i915->drm, "Not enough engines in class: %d", class); return 0; } rq = multi_lrc_nop_request(parent); if (IS_ERR(rq)) { ret = PTR_ERR(rq); - pr_err("Failed creating requests: %d", ret); + drm_err(>->i915->drm, "Failed creating requests: %d", ret); goto out; } ret = intel_selftest_wait_for_rq(rq); if (ret) - pr_err("Failed waiting on request: %d", ret); + drm_err(>->i915->drm, "Failed waiting on request: %d", ret); i915_request_put(rq); if (ret >= 0) { ret = intel_gt_wait_for_idle(gt, HZ * 5); if (ret < 0) - pr_err("GT failed to idle: %d\n", ret); + drm_err(>->i915->drm, "GT failed to idle: %d\n", ret); } out: diff --git a/drivers/gpu/drm/i915/gvt/cmd_parser.c b/drivers/gpu/drm/i915/gvt/cmd_parser.c index 0ba2a3455d99..de13f102d4fd 100644 --- a/drivers/gpu/drm/i915/gvt/cmd_parser.c +++ b/drivers/gpu/drm/i915/gvt/cmd_parser.c @@ -3117,9 +3117,9 @@ void intel_gvt_update_reg_whitelist(struct intel_vgpu *vgpu) continue; vaddr = shmem_pin_map(engine->default_state); - if (IS_ERR(vaddr)) { - gvt_err("failed to map %s->default state, err:%zd\n", - engine->name, PTR_ERR(vaddr)); + if (!vaddr) { + gvt_err("failed to map %s->default state\n", + engine->name); return; } diff --git a/drivers/gpu/drm/i915/gvt/gvt.h b/drivers/gpu/drm/i915/gvt/gvt.h index aee1a45da74b..705689e64011 100644 --- a/drivers/gpu/drm/i915/gvt/gvt.h +++ b/drivers/gpu/drm/i915/gvt/gvt.h @@ -226,7 +226,6 @@ struct intel_vgpu { unsigned long nr_cache_entries; struct mutex cache_lock; - struct notifier_block iommu_notifier; atomic_t released; struct kvm_page_track_notifier_node track_node; diff --git a/drivers/gpu/drm/i915/gvt/kvmgt.c b/drivers/gpu/drm/i915/gvt/kvmgt.c index e2f6c56ab342..e3cd58946477 100644 --- a/drivers/gpu/drm/i915/gvt/kvmgt.c +++ b/drivers/gpu/drm/i915/gvt/kvmgt.c @@ -231,57 +231,38 @@ static void intel_gvt_cleanup_vgpu_type_groups(struct intel_gvt *gvt) static void gvt_unpin_guest_page(struct intel_vgpu *vgpu, unsigned long gfn, unsigned long size) { - struct drm_i915_private *i915 = vgpu->gvt->gt->i915; - int total_pages; - int npage; - int ret; - - total_pages = roundup(size, PAGE_SIZE) / PAGE_SIZE; - - for (npage = 0; npage < total_pages; npage++) { - unsigned long cur_gfn = gfn + npage; - - ret = vfio_unpin_pages(&vgpu->vfio_device, &cur_gfn, 1); - drm_WARN_ON(&i915->drm, ret != 1); - } + vfio_unpin_pages(&vgpu->vfio_device, gfn << PAGE_SHIFT, + DIV_ROUND_UP(size, PAGE_SIZE)); } /* Pin a normal or compound guest page for dma. */ static int gvt_pin_guest_page(struct intel_vgpu *vgpu, unsigned long gfn, unsigned long size, struct page **page) { - unsigned long base_pfn = 0; - int total_pages; + int total_pages = DIV_ROUND_UP(size, PAGE_SIZE); + struct page *base_page = NULL; int npage; int ret; - total_pages = roundup(size, PAGE_SIZE) / PAGE_SIZE; /* * We pin the pages one-by-one to avoid allocating a big arrary * on stack to hold pfns. */ for (npage = 0; npage < total_pages; npage++) { - unsigned long cur_gfn = gfn + npage; - unsigned long pfn; + dma_addr_t cur_iova = (gfn + npage) << PAGE_SHIFT; + struct page *cur_page; - ret = vfio_pin_pages(&vgpu->vfio_device, &cur_gfn, 1, - IOMMU_READ | IOMMU_WRITE, &pfn); + ret = vfio_pin_pages(&vgpu->vfio_device, cur_iova, 1, + IOMMU_READ | IOMMU_WRITE, &cur_page); if (ret != 1) { - gvt_vgpu_err("vfio_pin_pages failed for gfn 0x%lx, ret %d\n", - cur_gfn, ret); - goto err; - } - - if (!pfn_valid(pfn)) { - gvt_vgpu_err("pfn 0x%lx is not mem backed\n", pfn); - npage++; - ret = -EFAULT; + gvt_vgpu_err("vfio_pin_pages failed for iova %pad, ret %d\n", + &cur_iova, ret); goto err; } if (npage == 0) - base_pfn = pfn; - else if (base_pfn + npage != pfn) { + base_page = cur_page; + else if (base_page + npage != cur_page) { gvt_vgpu_err("The pages are not continuous\n"); ret = -EINVAL; npage++; @@ -289,7 +270,7 @@ static int gvt_pin_guest_page(struct intel_vgpu *vgpu, unsigned long gfn, } } - *page = pfn_to_page(base_pfn); + *page = base_page; return 0; err: gvt_unpin_guest_page(vgpu, gfn, npage * PAGE_SIZE); @@ -729,34 +710,25 @@ int intel_gvt_set_edid(struct intel_vgpu *vgpu, int port_num) return ret; } -static int intel_vgpu_iommu_notifier(struct notifier_block *nb, - unsigned long action, void *data) +static void intel_vgpu_dma_unmap(struct vfio_device *vfio_dev, u64 iova, + u64 length) { - struct intel_vgpu *vgpu = - container_of(nb, struct intel_vgpu, iommu_notifier); - - if (action == VFIO_IOMMU_NOTIFY_DMA_UNMAP) { - struct vfio_iommu_type1_dma_unmap *unmap = data; - struct gvt_dma *entry; - unsigned long iov_pfn, end_iov_pfn; + struct intel_vgpu *vgpu = vfio_dev_to_vgpu(vfio_dev); + struct gvt_dma *entry; + u64 iov_pfn = iova >> PAGE_SHIFT; + u64 end_iov_pfn = iov_pfn + length / PAGE_SIZE; - iov_pfn = unmap->iova >> PAGE_SHIFT; - end_iov_pfn = iov_pfn + unmap->size / PAGE_SIZE; + mutex_lock(&vgpu->cache_lock); + for (; iov_pfn < end_iov_pfn; iov_pfn++) { + entry = __gvt_cache_find_gfn(vgpu, iov_pfn); + if (!entry) + continue; - mutex_lock(&vgpu->cache_lock); - for (; iov_pfn < end_iov_pfn; iov_pfn++) { - entry = __gvt_cache_find_gfn(vgpu, iov_pfn); - if (!entry) - continue; - - gvt_dma_unmap_page(vgpu, entry->gfn, entry->dma_addr, - entry->size); - __gvt_cache_remove_entry(vgpu, entry); - } - mutex_unlock(&vgpu->cache_lock); + gvt_dma_unmap_page(vgpu, entry->gfn, entry->dma_addr, + entry->size); + __gvt_cache_remove_entry(vgpu, entry); } - - return NOTIFY_OK; + mutex_unlock(&vgpu->cache_lock); } static bool __kvmgt_vgpu_exist(struct intel_vgpu *vgpu) @@ -783,36 +755,20 @@ out: static int intel_vgpu_open_device(struct vfio_device *vfio_dev) { struct intel_vgpu *vgpu = vfio_dev_to_vgpu(vfio_dev); - unsigned long events; - int ret; - - vgpu->iommu_notifier.notifier_call = intel_vgpu_iommu_notifier; - - events = VFIO_IOMMU_NOTIFY_DMA_UNMAP; - ret = vfio_register_notifier(vfio_dev, VFIO_IOMMU_NOTIFY, &events, - &vgpu->iommu_notifier); - if (ret != 0) { - gvt_vgpu_err("vfio_register_notifier for iommu failed: %d\n", - ret); - goto out; - } - ret = -EEXIST; if (vgpu->attached) - goto undo_iommu; + return -EEXIST; - ret = -ESRCH; if (!vgpu->vfio_device.kvm || vgpu->vfio_device.kvm->mm != current->mm) { gvt_vgpu_err("KVM is required to use Intel vGPU\n"); - goto undo_iommu; + return -ESRCH; } kvm_get_kvm(vgpu->vfio_device.kvm); - ret = -EEXIST; if (__kvmgt_vgpu_exist(vgpu)) - goto undo_iommu; + return -EEXIST; vgpu->attached = true; @@ -831,12 +787,6 @@ static int intel_vgpu_open_device(struct vfio_device *vfio_dev) atomic_set(&vgpu->released, 0); return 0; - -undo_iommu: - vfio_unregister_notifier(vfio_dev, VFIO_IOMMU_NOTIFY, - &vgpu->iommu_notifier); -out: - return ret; } static void intel_vgpu_release_msi_eventfd_ctx(struct intel_vgpu *vgpu) @@ -853,8 +803,6 @@ static void intel_vgpu_release_msi_eventfd_ctx(struct intel_vgpu *vgpu) static void intel_vgpu_close_device(struct vfio_device *vfio_dev) { struct intel_vgpu *vgpu = vfio_dev_to_vgpu(vfio_dev); - struct drm_i915_private *i915 = vgpu->gvt->gt->i915; - int ret; if (!vgpu->attached) return; @@ -864,11 +812,6 @@ static void intel_vgpu_close_device(struct vfio_device *vfio_dev) intel_gvt_release_vgpu(vgpu); - ret = vfio_unregister_notifier(&vgpu->vfio_device, VFIO_IOMMU_NOTIFY, - &vgpu->iommu_notifier); - drm_WARN(&i915->drm, ret, - "vfio_unregister_notifier for iommu failed: %d\n", ret); - debugfs_remove(debugfs_lookup(KVMGT_DEBUGFS_FILENAME, vgpu->debugfs)); kvm_page_track_unregister_notifier(vgpu->vfio_device.kvm, @@ -1610,6 +1553,7 @@ static const struct vfio_device_ops intel_vgpu_dev_ops = { .write = intel_vgpu_write, .mmap = intel_vgpu_mmap, .ioctl = intel_vgpu_ioctl, + .dma_unmap = intel_vgpu_dma_unmap, }; static int intel_vgpu_probe(struct mdev_device *mdev) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index f12f3aa3bc13..443ed6dac92a 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -162,7 +162,7 @@ struct i915_gem_mm { * List of objects which are pending destruction. */ struct llist_head free_list; - struct delayed_work free_work; + struct work_struct free_work; /** * Count of objects pending destructions. Used to skip needlessly * waiting on an RCU barrier if no objects are waiting to be freed. @@ -1134,7 +1134,7 @@ static inline void i915_gem_drain_freed_objects(struct drm_i915_private *i915) * armed the work again. */ while (atomic_read(&i915->mm.free_count)) { - flush_delayed_work(&i915->mm.free_work); + flush_work(&i915->mm.free_work); flush_delayed_work(&i915->bdev.wq); rcu_barrier(); } diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index d10f4eb0e272..b5fbc2252784 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -670,6 +670,18 @@ static void err_print_pciid(struct drm_i915_error_state_buf *m, pdev->subsystem_device); } +static void err_print_guc_ctb(struct drm_i915_error_state_buf *m, + const char *name, + const struct intel_ctb_coredump *ctb) +{ + if (!ctb->size) + return; + + err_printf(m, "GuC %s CTB: raw: 0x%08X, 0x%08X/%08X, cached: 0x%08X/%08X, desc = 0x%08X, buf = 0x%08X x 0x%08X\n", + name, ctb->raw_status, ctb->raw_head, ctb->raw_tail, + ctb->head, ctb->tail, ctb->desc_offset, ctb->cmds_offset, ctb->size); +} + static void err_print_uc(struct drm_i915_error_state_buf *m, const struct intel_uc_coredump *error_uc) { @@ -677,7 +689,12 @@ static void err_print_uc(struct drm_i915_error_state_buf *m, intel_uc_fw_dump(&error_uc->guc_fw, &p); intel_uc_fw_dump(&error_uc->huc_fw, &p); - intel_gpu_error_print_vma(m, NULL, error_uc->guc_log); + err_printf(m, "GuC timestamp: 0x%08x\n", error_uc->guc.timestamp); + intel_gpu_error_print_vma(m, NULL, error_uc->guc.vma_log); + err_printf(m, "GuC CTB fence: %d\n", error_uc->guc.last_fence); + err_print_guc_ctb(m, "Send", error_uc->guc.ctb + 0); + err_print_guc_ctb(m, "Recv", error_uc->guc.ctb + 1); + intel_gpu_error_print_vma(m, NULL, error_uc->guc.vma_ctb); } static void err_free_sgl(struct scatterlist *sgl) @@ -719,6 +736,8 @@ static void err_print_gt_global_nonguc(struct drm_i915_error_state_buf *m, int i; err_printf(m, "GT awake: %s\n", str_yes_no(gt->awake)); + err_printf(m, "CS timestamp frequency: %u Hz, %d ns\n", + gt->clock_frequency, gt->clock_period_ns); err_printf(m, "EIR: 0x%08x\n", gt->eir); err_printf(m, "PGTBL_ER: 0x%08x\n", gt->pgtbl_er); @@ -850,7 +869,7 @@ static void __err_print_to_sgl(struct drm_i915_error_state_buf *m, if (error->gt) { bool print_guc_capture = false; - if (error->gt->uc && error->gt->uc->is_guc_capture) + if (error->gt->uc && error->gt->uc->guc.is_guc_capture) print_guc_capture = true; err_print_gt_display(m, error->gt); @@ -1005,7 +1024,8 @@ static void cleanup_uc(struct intel_uc_coredump *uc) { kfree(uc->guc_fw.path); kfree(uc->huc_fw.path); - i915_vma_coredump_free(uc->guc_log); + i915_vma_coredump_free(uc->guc.vma_log); + i915_vma_coredump_free(uc->guc.vma_ctb); kfree(uc); } @@ -1654,6 +1674,23 @@ gt_record_engines(struct intel_gt_coredump *gt, } } +static void gt_record_guc_ctb(struct intel_ctb_coredump *saved, + const struct intel_guc_ct_buffer *ctb, + const void *blob_ptr, struct intel_guc *guc) +{ + if (!ctb || !ctb->desc) + return; + + saved->raw_status = ctb->desc->status; + saved->raw_head = ctb->desc->head; + saved->raw_tail = ctb->desc->tail; + saved->head = ctb->head; + saved->tail = ctb->tail; + saved->size = ctb->size; + saved->desc_offset = ((void *)ctb->desc) - blob_ptr; + saved->cmds_offset = ((void *)ctb->cmds) - blob_ptr; +} + static struct intel_uc_coredump * gt_record_uc(struct intel_gt_coredump *gt, struct i915_vma_compress *compress) @@ -1674,8 +1711,22 @@ gt_record_uc(struct intel_gt_coredump *gt, */ error_uc->guc_fw.path = kstrdup(uc->guc.fw.path, ALLOW_FAIL); error_uc->huc_fw.path = kstrdup(uc->huc.fw.path, ALLOW_FAIL); - error_uc->guc_log = create_vma_coredump(gt->_gt, uc->guc.log.vma, - "GuC log buffer", compress); + + /* + * Save the GuC log and include a timestamp reference for converting the + * log times to system times (in conjunction with the error->boottime and + * gt->clock_frequency fields saved elsewhere). + */ + error_uc->guc.timestamp = intel_uncore_read(gt->_gt->uncore, GUCPMTIMESTAMP); + error_uc->guc.vma_log = create_vma_coredump(gt->_gt, uc->guc.log.vma, + "GuC log buffer", compress); + error_uc->guc.vma_ctb = create_vma_coredump(gt->_gt, uc->guc.ct.vma, + "GuC CT buffer", compress); + error_uc->guc.last_fence = uc->guc.ct.requests.last_fence; + gt_record_guc_ctb(error_uc->guc.ctb + 0, &uc->guc.ct.ctbs.send, + uc->guc.ct.ctbs.send.desc, (struct intel_guc *)&uc->guc); + gt_record_guc_ctb(error_uc->guc.ctb + 1, &uc->guc.ct.ctbs.recv, + uc->guc.ct.ctbs.send.desc, (struct intel_guc *)&uc->guc); return error_uc; } @@ -1832,6 +1883,8 @@ static void gt_record_global_regs(struct intel_gt_coredump *gt) static void gt_record_info(struct intel_gt_coredump *gt) { memcpy(>->info, >->_gt->info, sizeof(struct intel_gt_info)); + gt->clock_frequency = gt->_gt->clock_frequency; + gt->clock_period_ns = gt->_gt->clock_period_ns; } /* @@ -2026,9 +2079,9 @@ __i915_gpu_coredump(struct intel_gt *gt, intel_engine_mask_t engine_mask, u32 du error->gt->uc = gt_record_uc(error->gt, compress); if (error->gt->uc) { if (dump_flags & CORE_DUMP_FLAG_IS_GUC_CAPTURE) - error->gt->uc->is_guc_capture = true; + error->gt->uc->guc.is_guc_capture = true; else - GEM_BUG_ON(error->gt->uc->is_guc_capture); + GEM_BUG_ON(error->gt->uc->guc.is_guc_capture); } } diff --git a/drivers/gpu/drm/i915/i915_gpu_error.h b/drivers/gpu/drm/i915/i915_gpu_error.h index 55a143b92d10..efc75cc2ffdb 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.h +++ b/drivers/gpu/drm/i915/i915_gpu_error.h @@ -125,6 +125,15 @@ struct intel_engine_coredump { struct intel_engine_coredump *next; }; +struct intel_ctb_coredump { + u32 raw_head, head; + u32 raw_tail, tail; + u32 raw_status; + u32 desc_offset; + u32 cmds_offset; + u32 size; +}; + struct intel_gt_coredump { const struct intel_gt *_gt; bool awake; @@ -150,6 +159,8 @@ struct intel_gt_coredump { u32 gtt_cache; u32 aux_err; /* gen12 */ u32 gam_done; /* gen12 */ + u32 clock_frequency; + u32 clock_period_ns; /* Display related */ u32 derrmr; @@ -163,8 +174,14 @@ struct intel_gt_coredump { struct intel_uc_coredump { struct intel_uc_fw guc_fw; struct intel_uc_fw huc_fw; - struct i915_vma_coredump *guc_log; - bool is_guc_capture; + struct guc_info { + struct intel_ctb_coredump ctb[2]; + struct i915_vma_coredump *vma_ctb; + struct i915_vma_coredump *vma_log; + u32 timestamp; + u16 last_fence; + bool is_guc_capture; + } guc; } *uc; struct intel_gt_coredump *next; diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c index ef3b04c7e153..260371716490 100644 --- a/drivers/gpu/drm/i915/i915_vma.c +++ b/drivers/gpu/drm/i915/i915_vma.c @@ -538,8 +538,6 @@ int i915_vma_bind(struct i915_vma *vma, bind_flags); } - set_bit(I915_BO_WAS_BOUND_BIT, &vma->obj->flags); - atomic_or(bind_flags, &vma->flags); return 0; } @@ -1310,6 +1308,19 @@ err_unpin: return err; } +void vma_invalidate_tlb(struct i915_address_space *vm, u32 *tlb) +{ + /* + * Before we release the pages that were bound by this vma, we + * must invalidate all the TLBs that may still have a reference + * back to our physical address. It only needs to be done once, + * so after updating the PTE to point away from the pages, record + * the most recent TLB invalidation seqno, and if we have not yet + * flushed the TLBs upon release, perform a full invalidation. + */ + WRITE_ONCE(*tlb, intel_gt_next_invalidate_tlb_full(vm->gt)); +} + static void __vma_put_pages(struct i915_vma *vma, unsigned int count) { /* We allocate under vma_get_pages, so beware the shrinker */ @@ -1941,7 +1952,12 @@ struct dma_fence *__i915_vma_evict(struct i915_vma *vma, bool async) vma->vm->skip_pte_rewrite; trace_i915_vma_unbind(vma); - unbind_fence = i915_vma_resource_unbind(vma_res); + if (async) + unbind_fence = i915_vma_resource_unbind(vma_res, + &vma->obj->mm.tlb); + else + unbind_fence = i915_vma_resource_unbind(vma_res, NULL); + vma->resource = NULL; atomic_and(~(I915_VMA_BIND_MASK | I915_VMA_ERROR | I915_VMA_GGTT_WRITE), @@ -1949,10 +1965,13 @@ struct dma_fence *__i915_vma_evict(struct i915_vma *vma, bool async) i915_vma_detach(vma); - if (!async && unbind_fence) { - dma_fence_wait(unbind_fence, false); - dma_fence_put(unbind_fence); - unbind_fence = NULL; + if (!async) { + if (unbind_fence) { + dma_fence_wait(unbind_fence, false); + dma_fence_put(unbind_fence); + unbind_fence = NULL; + } + vma_invalidate_tlb(vma->vm, &vma->obj->mm.tlb); } /* diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h index 88ca0bd9c900..33a58f605d75 100644 --- a/drivers/gpu/drm/i915/i915_vma.h +++ b/drivers/gpu/drm/i915/i915_vma.h @@ -213,6 +213,7 @@ bool i915_vma_misplaced(const struct i915_vma *vma, u64 size, u64 alignment, u64 flags); void __i915_vma_set_map_and_fenceable(struct i915_vma *vma); void i915_vma_revoke_mmap(struct i915_vma *vma); +void vma_invalidate_tlb(struct i915_address_space *vm, u32 *tlb); struct dma_fence *__i915_vma_evict(struct i915_vma *vma, bool async); int __i915_vma_unbind(struct i915_vma *vma); int __must_check i915_vma_unbind(struct i915_vma *vma); diff --git a/drivers/gpu/drm/i915/i915_vma_resource.c b/drivers/gpu/drm/i915/i915_vma_resource.c index 27c55027387a..de1342dbfa12 100644 --- a/drivers/gpu/drm/i915/i915_vma_resource.c +++ b/drivers/gpu/drm/i915/i915_vma_resource.c @@ -216,6 +216,10 @@ i915_vma_resource_fence_notify(struct i915_sw_fence *fence, /** * i915_vma_resource_unbind - Unbind a vma resource * @vma_res: The vma resource to unbind. + * @tlb: pointer to vma->obj->mm.tlb associated with the resource + * to be stored at vma_res->tlb. When not-NULL, it will be used + * to do TLB cache invalidation before freeing a VMA resource. + * Used only for async unbind. * * At this point this function does little more than publish a fence that * signals immediately unless signaling is held back. @@ -223,10 +227,13 @@ i915_vma_resource_fence_notify(struct i915_sw_fence *fence, * Return: A refcounted pointer to a dma-fence that signals when unbinding is * complete. */ -struct dma_fence *i915_vma_resource_unbind(struct i915_vma_resource *vma_res) +struct dma_fence *i915_vma_resource_unbind(struct i915_vma_resource *vma_res, + u32 *tlb) { struct i915_address_space *vm = vma_res->vm; + vma_res->tlb = tlb; + /* Reference for the sw fence */ i915_vma_resource_get(vma_res); diff --git a/drivers/gpu/drm/i915/i915_vma_resource.h b/drivers/gpu/drm/i915/i915_vma_resource.h index 5d8427caa2ba..06923d1816e7 100644 --- a/drivers/gpu/drm/i915/i915_vma_resource.h +++ b/drivers/gpu/drm/i915/i915_vma_resource.h @@ -67,6 +67,7 @@ struct i915_page_sizes { * taken when the unbind is scheduled. * @skip_pte_rewrite: During ggtt suspend and vm takedown pte rewriting * needs to be skipped for unbind. + * @tlb: pointer for obj->mm.tlb, if async unbind. Otherwise, NULL * * The lifetime of a struct i915_vma_resource is from a binding request to * the actual possible asynchronous unbind has completed. @@ -119,6 +120,8 @@ struct i915_vma_resource { bool immediate_unbind:1; bool needs_wakeref:1; bool skip_pte_rewrite:1; + + u32 *tlb; }; bool i915_vma_resource_hold(struct i915_vma_resource *vma_res, @@ -131,7 +134,8 @@ struct i915_vma_resource *i915_vma_resource_alloc(void); void i915_vma_resource_free(struct i915_vma_resource *vma_res); -struct dma_fence *i915_vma_resource_unbind(struct i915_vma_resource *vma_res); +struct dma_fence *i915_vma_resource_unbind(struct i915_vma_resource *vma_res, + u32 *tlb); void __i915_vma_resource_init(struct i915_vma_resource *vma_res); diff --git a/drivers/gpu/drm/i915/selftests/i915_live_selftests.h b/drivers/gpu/drm/i915/selftests/i915_live_selftests.h index bdd290f2bf3c..aaf8a380e5c7 100644 --- a/drivers/gpu/drm/i915/selftests/i915_live_selftests.h +++ b/drivers/gpu/drm/i915/selftests/i915_live_selftests.h @@ -49,5 +49,6 @@ selftest(perf, i915_perf_live_selftests) selftest(slpc, intel_slpc_live_selftests) selftest(guc, intel_guc_live_selftests) selftest(guc_multi_lrc, intel_guc_multi_lrc_live_selftests) +selftest(guc_hang, intel_guc_hang_check) /* Here be dragons: keep last to run last! */ selftest(late_gt_pm, intel_gt_pm_late_selftests) diff --git a/drivers/gpu/drm/i915/selftests/i915_request.c b/drivers/gpu/drm/i915/selftests/i915_request.c index c56a0c2cd2f7..ec05f578a698 100644 --- a/drivers/gpu/drm/i915/selftests/i915_request.c +++ b/drivers/gpu/drm/i915/selftests/i915_request.c @@ -971,7 +971,7 @@ static struct i915_vma *empty_batch(struct drm_i915_private *i915) if (err) goto err; - /* Force the wait wait now to avoid including it in the benchmark */ + /* Force the wait now to avoid including it in the benchmark */ err = i915_vma_sync(vma); if (err) goto err_pin; |