aboutsummaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/i915
diff options
context:
space:
mode:
authorJani Nikula2022-08-29 14:44:38 +0300
committerJani Nikula2022-08-29 15:14:59 +0300
commit917bda9ab155032a02be1a57ebd4d949ae9e1528 (patch)
tree7758d62783f5fb81777c37d0ad4e26eea6312c0f /drivers/gpu/drm/i915
parent95086cb969b2cb8abe4984457f219ec70d24052e (diff)
parent2c2d7a67defa198a8b8148dbaddc9e5554efebc8 (diff)
Merge drm/drm-next into drm-intel-next
Sync drm-intel-next with v6.0-rc as well as recent drm-intel-gt-next. Since drm-next does not have commit f0c70d41e4e8 ("drm/i915/guc: remove runtime info printing from time stamp logging") yet, only drm-intel-gt-next, will need to do that as part of the merge here to build. Signed-off-by: Jani Nikula <jani.nikula@intel.com>
Diffstat (limited to 'drivers/gpu/drm/i915')
-rw-r--r--drivers/gpu/drm/i915/display/intel_display.c1
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c1
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_object.c16
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_object_types.h3
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_pages.c25
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_shmem.c6
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_shrinker.c3
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_userptr.c4
-rw-r--r--drivers/gpu/drm/i915/gt/intel_context_types.h11
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt.c77
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt.h12
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_pm.h11
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_regs.h11
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_sysfs.c10
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_sysfs.h6
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_sysfs_pm.c34
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_types.h27
-rw-r--r--drivers/gpu/drm/i915/gt/intel_migrate.c104
-rw-r--r--drivers/gpu/drm/i915/gt/intel_ppgtt.c8
-rw-r--r--drivers/gpu/drm/i915/gt/intel_region_lmem.c4
-rw-r--r--drivers/gpu/drm/i915/gt/intel_reset.c6
-rw-r--r--drivers/gpu/drm/i915/gt/intel_rps.c2
-rw-r--r--drivers/gpu/drm/i915/gt/intel_workarounds.c76
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_execlists.c16
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_hangcheck.c12
-rw-r--r--drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h3
-rw-r--r--drivers/gpu/drm/i915/gt/uc/abi/guc_communication_ctb_abi.h8
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc.c17
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc.h7
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c10
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c40
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_capture.h1
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c45
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h45
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_log.c79
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_log.h4
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_reg.h4
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c12
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c393
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c68
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h7
-rw-r--r--drivers/gpu/drm/i915/gt/uc/selftest_guc.c37
-rw-r--r--drivers/gpu/drm/i915/gt/uc/selftest_guc_hangcheck.c159
-rw-r--r--drivers/gpu/drm/i915/gt/uc/selftest_guc_multi_lrc.c10
-rw-r--r--drivers/gpu/drm/i915/gvt/cmd_parser.c6
-rw-r--r--drivers/gpu/drm/i915/gvt/gvt.h1
-rw-r--r--drivers/gpu/drm/i915/gvt/kvmgt.c120
-rw-r--r--drivers/gpu/drm/i915/i915_drv.h4
-rw-r--r--drivers/gpu/drm/i915/i915_gpu_error.c67
-rw-r--r--drivers/gpu/drm/i915/i915_gpu_error.h21
-rw-r--r--drivers/gpu/drm/i915/i915_vma.c33
-rw-r--r--drivers/gpu/drm/i915/i915_vma.h1
-rw-r--r--drivers/gpu/drm/i915/i915_vma_resource.c9
-rw-r--r--drivers/gpu/drm/i915/i915_vma_resource.h6
-rw-r--r--drivers/gpu/drm/i915/selftests/i915_live_selftests.h1
-rw-r--r--drivers/gpu/drm/i915/selftests/i915_request.c2
56 files changed, 1264 insertions, 442 deletions
diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c
index 651b2309593a..458f010e46f3 100644
--- a/drivers/gpu/drm/i915/display/intel_display.c
+++ b/drivers/gpu/drm/i915/display/intel_display.c
@@ -27,7 +27,6 @@
#include <acpi/video.h>
#include <linux/i2c.h>
#include <linux/input.h>
-#include <linux/intel-iommu.h>
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/dma-resv.h>
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
index b7b2c14fd9e1..cd75b0ca2555 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
@@ -6,7 +6,6 @@
#include <linux/dma-resv.h>
#include <linux/highmem.h>
-#include <linux/intel-iommu.h>
#include <linux/sync_file.h>
#include <linux/uaccess.h>
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c b/drivers/gpu/drm/i915/gem/i915_gem_object.c
index ccec4055fde3..389e9f157ca5 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c
@@ -268,7 +268,7 @@ static void __i915_gem_object_free_mmaps(struct drm_i915_gem_object *obj)
*/
void __i915_gem_object_pages_fini(struct drm_i915_gem_object *obj)
{
- assert_object_held(obj);
+ assert_object_held_shared(obj);
if (!list_empty(&obj->vma.list)) {
struct i915_vma *vma;
@@ -331,15 +331,7 @@ static void __i915_gem_free_objects(struct drm_i915_private *i915,
continue;
}
- if (!i915_gem_object_trylock(obj, NULL)) {
- /* busy, toss it back to the pile */
- if (llist_add(&obj->freed, &i915->mm.free_list))
- queue_delayed_work(i915->wq, &i915->mm.free_work, msecs_to_jiffies(10));
- continue;
- }
-
__i915_gem_object_pages_fini(obj);
- i915_gem_object_unlock(obj);
__i915_gem_free_object(obj);
/* But keep the pointer alive for RCU-protected lookups */
@@ -359,7 +351,7 @@ void i915_gem_flush_free_objects(struct drm_i915_private *i915)
static void __i915_gem_free_work(struct work_struct *work)
{
struct drm_i915_private *i915 =
- container_of(work, struct drm_i915_private, mm.free_work.work);
+ container_of(work, struct drm_i915_private, mm.free_work);
i915_gem_flush_free_objects(i915);
}
@@ -391,7 +383,7 @@ static void i915_gem_free_object(struct drm_gem_object *gem_obj)
*/
if (llist_add(&obj->freed, &i915->mm.free_list))
- queue_delayed_work(i915->wq, &i915->mm.free_work, 0);
+ queue_work(i915->wq, &i915->mm.free_work);
}
void __i915_gem_object_flush_frontbuffer(struct drm_i915_gem_object *obj,
@@ -745,7 +737,7 @@ bool i915_gem_object_needs_ccs_pages(struct drm_i915_gem_object *obj)
void i915_gem_init__objects(struct drm_i915_private *i915)
{
- INIT_DELAYED_WORK(&i915->mm.free_work, __i915_gem_free_work);
+ INIT_WORK(&i915->mm.free_work, __i915_gem_free_work);
}
void i915_objects_module_exit(void)
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
index 5cf36a130061..9f6b14ec189a 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
@@ -335,7 +335,6 @@ struct drm_i915_gem_object {
#define I915_BO_READONLY BIT(7)
#define I915_TILING_QUIRK_BIT 8 /* unknown swizzling; do not release! */
#define I915_BO_PROTECTED BIT(9)
-#define I915_BO_WAS_BOUND_BIT 10
/**
* @mem_flags - Mutable placement-related flags
*
@@ -616,6 +615,8 @@ struct drm_i915_gem_object {
* pages were last acquired.
*/
bool dirty:1;
+
+ u32 tlb;
} mm;
struct {
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pages.c b/drivers/gpu/drm/i915/gem/i915_gem_pages.c
index 8df8ae0e1dea..4df50b049cea 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_pages.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_pages.c
@@ -6,14 +6,15 @@
#include <drm/drm_cache.h>
+#include "gt/intel_gt.h"
+#include "gt/intel_gt_pm.h"
+
#include "i915_drv.h"
#include "i915_gem_object.h"
#include "i915_scatterlist.h"
#include "i915_gem_lmem.h"
#include "i915_gem_mman.h"
-#include "gt/intel_gt.h"
-
void __i915_gem_object_set_pages(struct drm_i915_gem_object *obj,
struct sg_table *pages,
unsigned int sg_page_sizes)
@@ -190,6 +191,18 @@ static void unmap_object(struct drm_i915_gem_object *obj, void *ptr)
vunmap(ptr);
}
+static void flush_tlb_invalidate(struct drm_i915_gem_object *obj)
+{
+ struct drm_i915_private *i915 = to_i915(obj->base.dev);
+ struct intel_gt *gt = to_gt(i915);
+
+ if (!obj->mm.tlb)
+ return;
+
+ intel_gt_invalidate_tlb(gt, obj->mm.tlb);
+ obj->mm.tlb = 0;
+}
+
struct sg_table *
__i915_gem_object_unset_pages(struct drm_i915_gem_object *obj)
{
@@ -215,13 +228,7 @@ __i915_gem_object_unset_pages(struct drm_i915_gem_object *obj)
__i915_gem_object_reset_page_iter(obj);
obj->mm.page_sizes.phys = obj->mm.page_sizes.sg = 0;
- if (test_and_clear_bit(I915_BO_WAS_BOUND_BIT, &obj->flags)) {
- struct drm_i915_private *i915 = to_i915(obj->base.dev);
- intel_wakeref_t wakeref;
-
- with_intel_runtime_pm_if_active(&i915->runtime_pm, wakeref)
- intel_gt_invalidate_tlbs(to_gt(i915));
- }
+ flush_tlb_invalidate(obj);
return pages;
}
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
index 4eed3dd90ba8..f42ca1179f37 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
@@ -75,7 +75,7 @@ int shmem_sg_alloc_table(struct drm_i915_private *i915, struct sg_table *st,
if (size > resource_size(&mr->region))
return -ENOMEM;
- if (sg_alloc_table(st, page_count, GFP_KERNEL))
+ if (sg_alloc_table(st, page_count, GFP_KERNEL | __GFP_NOWARN))
return -ENOMEM;
/*
@@ -137,7 +137,7 @@ int shmem_sg_alloc_table(struct drm_i915_private *i915, struct sg_table *st,
* trigger the out-of-memory killer and for
* this we want __GFP_RETRY_MAYFAIL.
*/
- gfp |= __GFP_RETRY_MAYFAIL;
+ gfp |= __GFP_RETRY_MAYFAIL | __GFP_NOWARN;
}
} while (1);
@@ -209,7 +209,7 @@ static int shmem_get_pages(struct drm_i915_gem_object *obj)
GEM_BUG_ON(obj->write_domain & I915_GEM_GPU_DOMAINS);
rebuild_st:
- st = kmalloc(sizeof(*st), GFP_KERNEL);
+ st = kmalloc(sizeof(*st), GFP_KERNEL | __GFP_NOWARN);
if (!st)
return -ENOMEM;
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c
index 1030053571a2..8dc5c8874d8a 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c
@@ -426,7 +426,8 @@ void i915_gem_driver_register__shrinker(struct drm_i915_private *i915)
i915->mm.shrinker.count_objects = i915_gem_shrinker_count;
i915->mm.shrinker.seeks = DEFAULT_SEEKS;
i915->mm.shrinker.batch = 4096;
- drm_WARN_ON(&i915->drm, register_shrinker(&i915->mm.shrinker));
+ drm_WARN_ON(&i915->drm, register_shrinker(&i915->mm.shrinker,
+ "drm-i915_gem"));
i915->mm.oom_notifier.notifier_call = i915_gem_shrinker_oom;
drm_WARN_ON(&i915->drm, register_oom_notifier(&i915->mm.oom_notifier));
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_userptr.c b/drivers/gpu/drm/i915/gem/i915_gem_userptr.c
index 094f06b4ce33..8423df021b71 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_userptr.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_userptr.c
@@ -216,8 +216,8 @@ i915_gem_userptr_put_pages(struct drm_i915_gem_object *obj,
* However...!
*
* The mmu-notifier can be invalidated for a
- * migrate_page, that is alreadying holding the lock
- * on the page. Such a try_to_unmap() will result
+ * migrate_folio, that is alreadying holding the lock
+ * on the folio. Such a try_to_unmap() will result
* in us calling put_pages() and so recursively try
* to lock the page. We avoid that deadlock with
* a trylock_page() and in exchange we risk missing
diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h
index d2d75d9c0c8d..04eacae1aca5 100644
--- a/drivers/gpu/drm/i915/gt/intel_context_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_context_types.h
@@ -275,10 +275,17 @@ struct intel_context {
u8 child_index;
/** @guc: GuC specific members for parallel submission */
struct {
- /** @wqi_head: head pointer in work queue */
+ /** @wqi_head: cached head pointer in work queue */
u16 wqi_head;
- /** @wqi_tail: tail pointer in work queue */
+ /** @wqi_tail: cached tail pointer in work queue */
u16 wqi_tail;
+ /** @wq_head: pointer to the actual head in work queue */
+ u32 *wq_head;
+ /** @wq_tail: pointer to the actual head in work queue */
+ u32 *wq_tail;
+ /** @wq_status: pointer to the status in work queue */
+ u32 *wq_status;
+
/**
* @parent_page: page in context state (ce->state) used
* by parent for work queue, process descriptor
diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c
index 7b059fdf8cbc..e4bac2431e41 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt.c
@@ -11,7 +11,9 @@
#include "pxp/intel_pxp.h"
#include "i915_drv.h"
+#include "i915_perf_oa_regs.h"
#include "intel_context.h"
+#include "intel_engine_pm.h"
#include "intel_engine_regs.h"
#include "intel_ggtt_gmch.h"
#include "intel_gt.h"
@@ -37,8 +39,6 @@ static void __intel_gt_init_early(struct intel_gt *gt)
{
spin_lock_init(&gt->irq_lock);
- mutex_init(&gt->tlb_invalidate_lock);
-
INIT_LIST_HEAD(&gt->closed_vma);
spin_lock_init(&gt->closed_lock);
@@ -49,6 +49,8 @@ static void __intel_gt_init_early(struct intel_gt *gt)
intel_gt_init_reset(gt);
intel_gt_init_requests(gt);
intel_gt_init_timelines(gt);
+ mutex_init(&gt->tlb.invalidate_lock);
+ seqcount_mutex_init(&gt->tlb.seqno, &gt->tlb.invalidate_lock);
intel_gt_pm_init_early(gt);
intel_uc_init_early(&gt->uc);
@@ -769,6 +771,7 @@ void intel_gt_driver_late_release_all(struct drm_i915_private *i915)
intel_gt_fini_requests(gt);
intel_gt_fini_reset(gt);
intel_gt_fini_timelines(gt);
+ mutex_destroy(&gt->tlb.invalidate_lock);
intel_engines_free(gt);
}
}
@@ -907,7 +910,7 @@ get_reg_and_bit(const struct intel_engine_cs *engine, const bool gen8,
return rb;
}
-void intel_gt_invalidate_tlbs(struct intel_gt *gt)
+static void mmio_invalidate_full(struct intel_gt *gt)
{
static const i915_reg_t gen8_regs[] = {
[RENDER_CLASS] = GEN8_RTCR,
@@ -925,13 +928,11 @@ void intel_gt_invalidate_tlbs(struct intel_gt *gt)
struct drm_i915_private *i915 = gt->i915;
struct intel_uncore *uncore = gt->uncore;
struct intel_engine_cs *engine;
+ intel_engine_mask_t awake, tmp;
enum intel_engine_id id;
const i915_reg_t *regs;
unsigned int num = 0;
- if (I915_SELFTEST_ONLY(gt->awake == -ENODEV))
- return;
-
if (GRAPHICS_VER(i915) == 12) {
regs = gen12_regs;
num = ARRAY_SIZE(gen12_regs);
@@ -946,28 +947,41 @@ void intel_gt_invalidate_tlbs(struct intel_gt *gt)
"Platform does not implement TLB invalidation!"))
return;
- GEM_TRACE("\n");
-
- assert_rpm_wakelock_held(&i915->runtime_pm);
-
- mutex_lock(&gt->tlb_invalidate_lock);
intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL);
spin_lock_irq(&uncore->lock); /* serialise invalidate with GT reset */
+ awake = 0;
for_each_engine(engine, gt, id) {
struct reg_and_bit rb;
+ if (!intel_engine_pm_is_awake(engine))
+ continue;
+
rb = get_reg_and_bit(engine, regs == gen8_regs, regs, num);
if (!i915_mmio_reg_offset(rb.reg))
continue;
intel_uncore_write_fw(uncore, rb.reg, rb.bit);
+ awake |= engine->mask;
}
+ GT_TRACE(gt, "invalidated engines %08x\n", awake);
+
+ /* Wa_2207587034:tgl,dg1,rkl,adl-s,adl-p */
+ if (awake &&
+ (IS_TIGERLAKE(i915) ||
+ IS_DG1(i915) ||
+ IS_ROCKETLAKE(i915) ||
+ IS_ALDERLAKE_S(i915) ||
+ IS_ALDERLAKE_P(i915)))
+ intel_uncore_write_fw(uncore, GEN12_OA_TLB_INV_CR, 1);
+
spin_unlock_irq(&uncore->lock);
- for_each_engine(engine, gt, id) {
+ for_each_engine_masked(engine, gt, awake, tmp) {
+ struct reg_and_bit rb;
+
/*
* HW architecture suggest typical invalidation time at 40us,
* with pessimistic cases up to 100us and a recommendation to
@@ -975,12 +989,8 @@ void intel_gt_invalidate_tlbs(struct intel_gt *gt)
*/
const unsigned int timeout_us = 100;
const unsigned int timeout_ms = 4;
- struct reg_and_bit rb;
rb = get_reg_and_bit(engine, regs == gen8_regs, regs, num);
- if (!i915_mmio_reg_offset(rb.reg))
- continue;
-
if (__intel_wait_for_register_fw(uncore,
rb.reg, rb.bit, 0,
timeout_us, timeout_ms,
@@ -997,5 +1007,38 @@ void intel_gt_invalidate_tlbs(struct intel_gt *gt)
* transitions.
*/
intel_uncore_forcewake_put_delayed(uncore, FORCEWAKE_ALL);
- mutex_unlock(&gt->tlb_invalidate_lock);
+}
+
+static bool tlb_seqno_passed(const struct intel_gt *gt, u32 seqno)
+{
+ u32 cur = intel_gt_tlb_seqno(gt);
+
+ /* Only skip if a *full* TLB invalidate barrier has passed */
+ return (s32)(cur - ALIGN(seqno, 2)) > 0;
+}
+
+void intel_gt_invalidate_tlb(struct intel_gt *gt, u32 seqno)
+{
+ intel_wakeref_t wakeref;
+
+ if (I915_SELFTEST_ONLY(gt->awake == -ENODEV))
+ return;
+
+ if (intel_gt_is_wedged(gt))
+ return;
+
+ if (tlb_seqno_passed(gt, seqno))
+ return;
+
+ with_intel_gt_pm_if_awake(gt, wakeref) {
+ mutex_lock(&gt->tlb.invalidate_lock);
+ if (tlb_seqno_passed(gt, seqno))
+ goto unlock;
+
+ mmio_invalidate_full(gt);
+
+ write_seqcount_invalidate(&gt->tlb.seqno);
+unlock:
+ mutex_unlock(&gt->tlb.invalidate_lock);
+ }
}
diff --git a/drivers/gpu/drm/i915/gt/intel_gt.h b/drivers/gpu/drm/i915/gt/intel_gt.h
index 82d6f248d876..40b06adf509a 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt.h
@@ -101,6 +101,16 @@ void intel_gt_info_print(const struct intel_gt_info *info,
void intel_gt_watchdog_work(struct work_struct *work);
-void intel_gt_invalidate_tlbs(struct intel_gt *gt);
+static inline u32 intel_gt_tlb_seqno(const struct intel_gt *gt)
+{
+ return seqprop_sequence(&gt->tlb.seqno);
+}
+
+static inline u32 intel_gt_next_invalidate_tlb_full(const struct intel_gt *gt)
+{
+ return intel_gt_tlb_seqno(gt) | 1;
+}
+
+void intel_gt_invalidate_tlb(struct intel_gt *gt, u32 seqno);
#endif /* __INTEL_GT_H__ */
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm.h b/drivers/gpu/drm/i915/gt/intel_gt_pm.h
index bc898df7a48c..6c9a46452364 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_pm.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_pm.h
@@ -55,6 +55,17 @@ static inline void intel_gt_pm_might_put(struct intel_gt *gt)
for (tmp = 1, intel_gt_pm_get(gt); tmp; \
intel_gt_pm_put(gt), tmp = 0)
+/**
+ * with_intel_gt_pm_if_awake - if GT is PM awake, get a reference to prevent
+ * it to sleep, run some code and then asynchrously put the reference
+ * away.
+ *
+ * @gt: pointer to the gt
+ * @wf: pointer to a temporary wakeref.
+ */
+#define with_intel_gt_pm_if_awake(gt, wf) \
+ for (wf = intel_gt_pm_get_if_awake(gt); wf; intel_gt_pm_put_async(gt), wf = 0)
+
static inline int intel_gt_pm_wait_for_idle(struct intel_gt *gt)
{
return intel_wakeref_wait_for_idle(&gt->wakeref);
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_regs.h b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
index 60d6eb5f245b..94f9ddcfb3a5 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_regs.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
@@ -259,6 +259,9 @@
#define GEN9_PREEMPT_GPGPU_COMMAND_LEVEL GEN9_PREEMPT_GPGPU_LEVEL(1, 0)
#define GEN9_PREEMPT_GPGPU_LEVEL_MASK GEN9_PREEMPT_GPGPU_LEVEL(1, 1)
+#define DRAW_WATERMARK _MMIO(0x26c0)
+#define VERT_WM_VAL REG_GENMASK(9, 0)
+
#define GEN12_GLOBAL_MOCS(i) _MMIO(0x4000 + (i) * 4) /* Global MOCS regs */
#define RENDER_HWS_PGA_GEN7 _MMIO(0x4080)
@@ -374,6 +377,9 @@
#define CHICKEN_RASTER_1 _MMIO(0x6204)
#define DIS_SF_ROUND_NEAREST_EVEN REG_BIT(8)
+#define CHICKEN_RASTER_2 _MMIO(0x6208)
+#define TBIMR_FAST_CLIP REG_BIT(5)
+
#define VFLSKPD _MMIO(0x62a8)
#define DIS_OVER_FETCH_CACHE REG_BIT(1)
#define DIS_MULT_MISS_RD_SQUASH REG_BIT(0)
@@ -1007,6 +1013,8 @@
#define GEN11_LSN_UNSLCVC_GAFS_HALF_CL2_MAXALLOC (1 << 9)
#define GEN11_LSN_UNSLCVC_GAFS_HALF_SF_MAXALLOC (1 << 7)
+#define GUCPMTIMESTAMP _MMIO(0xc3e8)
+
#define __GEN9_RCS0_MOCS0 0xc800
#define GEN9_GFX_MOCS(i) _MMIO(__GEN9_RCS0_MOCS0 + (i) * 4)
#define __GEN9_VCS0_MOCS0 0xc900
@@ -1078,6 +1086,7 @@
#define GEN10_SAMPLER_MODE _MMIO(0xe18c)
#define ENABLE_SMALLPL REG_BIT(15)
+#define SC_DISABLE_POWER_OPTIMIZATION_EBB REG_BIT(9)
#define GEN11_SAMPLER_ENABLE_HEADLESS_MSG REG_BIT(5)
#define GEN9_HALF_SLICE_CHICKEN7 _MMIO(0xe194)
@@ -1123,6 +1132,8 @@
#define RT_CTRL _MMIO(0xe530)
#define DIS_NULL_QUERY REG_BIT(10)
+#define STACKID_CTRL REG_GENMASK(6, 5)
+#define STACKID_CTRL_512 REG_FIELD_PREP(STACKID_CTRL, 0x2)
#define EU_PERF_CNTL1 _MMIO(0xe558)
#define EU_PERF_CNTL5 _MMIO(0xe55c)
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_sysfs.c b/drivers/gpu/drm/i915/gt/intel_gt_sysfs.c
index 9e4ebf53379b..d651ccd0ab20 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_sysfs.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt_sysfs.c
@@ -22,11 +22,6 @@ bool is_object_gt(struct kobject *kobj)
return !strncmp(kobj->name, "gt", 2);
}
-static struct intel_gt *kobj_to_gt(struct kobject *kobj)
-{
- return container_of(kobj, struct intel_gt, sysfs_gt);
-}
-
struct intel_gt *intel_gt_sysfs_get_drvdata(struct device *dev,
const char *name)
{
@@ -101,6 +96,10 @@ void intel_gt_sysfs_register(struct intel_gt *gt)
gt->i915->sysfs_gt, "gt%d", gt->info.id))
goto exit_fail;
+ gt->sysfs_defaults = kobject_create_and_add(".defaults", &gt->sysfs_gt);
+ if (!gt->sysfs_defaults)
+ goto exit_fail;
+
intel_gt_sysfs_pm_init(gt, &gt->sysfs_gt);
return;
@@ -113,5 +112,6 @@ exit_fail:
void intel_gt_sysfs_unregister(struct intel_gt *gt)
{
+ kobject_put(gt->sysfs_defaults);
kobject_put(&gt->sysfs_gt);
}
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_sysfs.h b/drivers/gpu/drm/i915/gt/intel_gt_sysfs.h
index a99aa7e8b01a..6232923a420d 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_sysfs.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_sysfs.h
@@ -10,6 +10,7 @@
#include <linux/kobject.h>
#include "i915_gem.h" /* GEM_BUG_ON() */
+#include "intel_gt_types.h"
struct intel_gt;
@@ -22,6 +23,11 @@ intel_gt_create_kobj(struct intel_gt *gt,
struct kobject *dir,
const char *name);
+static inline struct intel_gt *kobj_to_gt(struct kobject *kobj)
+{
+ return container_of(kobj, struct intel_gt, sysfs_gt);
+}
+
void intel_gt_sysfs_register(struct intel_gt *gt);
void intel_gt_sysfs_unregister(struct intel_gt *gt);
struct intel_gt *intel_gt_sysfs_get_drvdata(struct device *dev,
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_sysfs_pm.c b/drivers/gpu/drm/i915/gt/intel_gt_sysfs_pm.c
index 73a8b46e0234..e066cc33d9f2 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_sysfs_pm.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt_sysfs_pm.c
@@ -727,6 +727,34 @@ static const struct attribute *media_perf_power_attrs[] = {
NULL
};
+static ssize_t
+default_min_freq_mhz_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf)
+{
+ struct intel_gt *gt = kobj_to_gt(kobj->parent);
+
+ return sysfs_emit(buf, "%u\n", gt->defaults.min_freq);
+}
+
+static struct kobj_attribute default_min_freq_mhz =
+__ATTR(rps_min_freq_mhz, 0444, default_min_freq_mhz_show, NULL);
+
+static ssize_t
+default_max_freq_mhz_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf)
+{
+ struct intel_gt *gt = kobj_to_gt(kobj->parent);
+
+ return sysfs_emit(buf, "%u\n", gt->defaults.max_freq);
+}
+
+static struct kobj_attribute default_max_freq_mhz =
+__ATTR(rps_max_freq_mhz, 0444, default_max_freq_mhz_show, NULL);
+
+static const struct attribute * const rps_defaults_attrs[] = {
+ &default_min_freq_mhz.attr,
+ &default_max_freq_mhz.attr,
+ NULL
+};
+
static int intel_sysfs_rps_init(struct intel_gt *gt, struct kobject *kobj,
const struct attribute * const *attrs)
{
@@ -776,4 +804,10 @@ void intel_gt_sysfs_pm_init(struct intel_gt *gt, struct kobject *kobj)
"failed to create gt%u media_perf_power_attrs sysfs (%pe)\n",
gt->info.id, ERR_PTR(ret));
}
+
+ ret = sysfs_create_files(gt->sysfs_defaults, rps_defaults_attrs);
+ if (ret)
+ drm_warn(&gt->i915->drm,
+ "failed to add gt%u rps defaults (%pe)\n",
+ gt->info.id, ERR_PTR(ret));
}
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_types.h b/drivers/gpu/drm/i915/gt/intel_gt_types.h
index df708802889d..4d56f7d5a3be 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_types.h
@@ -11,6 +11,7 @@
#include <linux/llist.h>
#include <linux/mutex.h>
#include <linux/notifier.h>
+#include <linux/seqlock.h>
#include <linux/spinlock.h>
#include <linux/types.h>
#include <linux/workqueue.h>
@@ -75,6 +76,11 @@ enum intel_submission_method {
INTEL_SUBMISSION_GUC,
};
+struct gt_defaults {
+ u32 min_freq;
+ u32 max_freq;
+};
+
struct intel_gt {
struct drm_i915_private *i915;
struct intel_uncore *uncore;
@@ -83,7 +89,22 @@ struct intel_gt {
struct intel_uc uc;
struct intel_gsc gsc;
- struct mutex tlb_invalidate_lock;
+ struct {
+ /* Serialize global tlb invalidations */
+ struct mutex invalidate_lock;
+
+ /*
+ * Batch TLB invalidations
+ *
+ * After unbinding the PTE, we need to ensure the TLB
+ * are invalidated prior to releasing the physical pages.
+ * But we only need one such invalidation for all unbinds,
+ * so we track how many TLB invalidations have been
+ * performed since unbind the PTE and only emit an extra
+ * invalidate if no full barrier has been passed.
+ */
+ seqcount_mutex_t seqno;
+ } tlb;
struct i915_wa_list wa_list;
@@ -235,6 +256,10 @@ struct intel_gt {
/* gt/gtN sysfs */
struct kobject sysfs_gt;
+
+ /* sysfs defaults per gt */
+ struct gt_defaults defaults;
+ struct kobject *sysfs_defaults;
};
enum intel_gt_scratch_field {
diff --git a/drivers/gpu/drm/i915/gt/intel_migrate.c b/drivers/gpu/drm/i915/gt/intel_migrate.c
index 2c35324b5f68..aaaf1906026c 100644
--- a/drivers/gpu/drm/i915/gt/intel_migrate.c
+++ b/drivers/gpu/drm/i915/gt/intel_migrate.c
@@ -511,44 +511,16 @@ static inline u32 *i915_flush_dw(u32 *cmd, u32 flags)
return cmd;
}
-static u32 calc_ctrl_surf_instr_size(struct drm_i915_private *i915, int size)
-{
- u32 num_cmds, num_blks, total_size;
-
- if (!GET_CCS_BYTES(i915, size))
- return 0;
-
- /*
- * XY_CTRL_SURF_COPY_BLT transfers CCS in 256 byte
- * blocks. one XY_CTRL_SURF_COPY_BLT command can
- * transfer upto 1024 blocks.
- */
- num_blks = DIV_ROUND_UP(GET_CCS_BYTES(i915, size),
- NUM_CCS_BYTES_PER_BLOCK);
- num_cmds = DIV_ROUND_UP(num_blks, NUM_CCS_BLKS_PER_XFER);
- total_size = XY_CTRL_SURF_INSTR_SIZE * num_cmds;
-
- /*
- * Adding a flush before and after XY_CTRL_SURF_COPY_BLT
- */
- total_size += 2 * MI_FLUSH_DW_SIZE;
-
- return total_size;
-}
-
static int emit_copy_ccs(struct i915_request *rq,
u32 dst_offset, u8 dst_access,
u32 src_offset, u8 src_access, int size)
{
struct drm_i915_private *i915 = rq->engine->i915;
int mocs = rq->engine->gt->mocs.uc_index << 1;
- u32 num_ccs_blks, ccs_ring_size;
+ u32 num_ccs_blks;
u32 *cs;
- ccs_ring_size = calc_ctrl_surf_instr_size(i915, size);
- WARN_ON(!ccs_ring_size);
-
- cs = intel_ring_begin(rq, round_up(ccs_ring_size, 2));
+ cs = intel_ring_begin(rq, 12);
if (IS_ERR(cs))
return PTR_ERR(cs);
@@ -583,8 +555,7 @@ static int emit_copy_ccs(struct i915_request *rq,
FIELD_PREP(XY_CTRL_SURF_MOCS_MASK, mocs);
cs = i915_flush_dw(cs, MI_FLUSH_DW_LLC | MI_FLUSH_DW_CCS);
- if (ccs_ring_size & 1)
- *cs++ = MI_NOOP;
+ *cs++ = MI_NOOP;
intel_ring_advance(rq, cs);
@@ -638,40 +609,38 @@ static int emit_copy(struct i915_request *rq,
return 0;
}
-static int scatter_list_length(struct scatterlist *sg)
+static u64 scatter_list_length(struct scatterlist *sg)
{
- int len = 0;
+ u64 len = 0;
while (sg && sg_dma_len(sg)) {
len += sg_dma_len(sg);
sg = sg_next(sg);
- };
+ }
return len;
}
-static void
+static int
calculate_chunk_sz(struct drm_i915_private *i915, bool src_is_lmem,
- int *src_sz, u32 bytes_to_cpy, u32 ccs_bytes_to_cpy)
+ u64 bytes_to_cpy, u64 ccs_bytes_to_cpy)
{
- if (ccs_bytes_to_cpy) {
- if (!src_is_lmem)
- /*
- * When CHUNK_SZ is passed all the pages upto CHUNK_SZ
- * will be taken for the blt. in Flat-ccs supported
- * platform Smem obj will have more pages than required
- * for main meory hence limit it to the required size
- * for main memory
- */
- *src_sz = min_t(int, bytes_to_cpy, CHUNK_SZ);
- } else { /* ccs handling is not required */
- *src_sz = CHUNK_SZ;
- }
+ if (ccs_bytes_to_cpy && !src_is_lmem)
+ /*
+ * When CHUNK_SZ is passed all the pages upto CHUNK_SZ
+ * will be taken for the blt. in Flat-ccs supported
+ * platform Smem obj will have more pages than required
+ * for main meory hence limit it to the required size
+ * for main memory
+ */
+ return min_t(u64, bytes_to_cpy, CHUNK_SZ);
+ else
+ return CHUNK_SZ;
}
-static void get_ccs_sg_sgt(struct sgt_dma *it, u32 bytes_to_cpy)
+static void get_ccs_sg_sgt(struct sgt_dma *it, u64 bytes_to_cpy)
{
- u32 len;
+ u64 len;
do {
GEM_BUG_ON(!it->sg || !sg_dma_len(it->sg));
@@ -702,13 +671,13 @@ intel_context_migrate_copy(struct intel_context *ce,
{
struct sgt_dma it_src = sg_sgt(src), it_dst = sg_sgt(dst), it_ccs;
struct drm_i915_private *i915 = ce->engine->i915;
- u32 ccs_bytes_to_cpy = 0, bytes_to_cpy;
+ u64 ccs_bytes_to_cpy = 0, bytes_to_cpy;
enum i915_cache_level ccs_cache_level;
u32 src_offset, dst_offset;
u8 src_access, dst_access;
struct i915_request *rq;
- int src_sz, dst_sz;
- bool ccs_is_src;
+ u64 src_sz, dst_sz;
+ bool ccs_is_src, overwrite_ccs;
int err;
GEM_BUG_ON(ce->vm != ce->engine->gt->migrate.context->vm);
@@ -749,6 +718,8 @@ intel_context_migrate_copy(struct intel_context *ce,
get_ccs_sg_sgt(&it_ccs, bytes_to_cpy);
}
+ overwrite_ccs = HAS_FLAT_CCS(i915) && !ccs_bytes_to_cpy && dst_is_lmem;
+
src_offset = 0;
dst_offset = CHUNK_SZ;
if (HAS_64K_PAGES(ce->engine->i915)) {
@@ -788,8 +759,8 @@ intel_context_migrate_copy(struct intel_context *ce,
if (err)
goto out_rq;
- calculate_chunk_sz(i915, src_is_lmem, &src_sz,
- bytes_to_cpy, ccs_bytes_to_cpy);
+ src_sz = calculate_chunk_sz(i915, src_is_lmem,
+ bytes_to_cpy, ccs_bytes_to_cpy);
len = emit_pte(rq, &it_src, src_cache_level, src_is_lmem,
src_offset, src_sz);
@@ -852,6 +823,25 @@ intel_context_migrate_copy(struct intel_context *ce,
if (err)
goto out_rq;
ccs_bytes_to_cpy -= ccs_sz;
+ } else if (overwrite_ccs) {
+ err = rq->engine->emit_flush(rq, EMIT_INVALIDATE);
+ if (err)
+ goto out_rq;
+
+ /*
+ * While we can't always restore/manage the CCS state,
+ * we still need to ensure we don't leak the CCS state
+ * from the previous user, so make sure we overwrite it
+ * with something.
+ */
+ err = emit_copy_ccs(rq, dst_offset, INDIRECT_ACCESS,
+ dst_offset, DIRECT_ACCESS, len);
+ if (err)
+ goto out_rq;
+
+ err = rq->engine->emit_flush(rq, EMIT_INVALIDATE);
+ if (err)
+ goto out_rq;
}
/* Arbitration is re-enabled between requests. */
diff --git a/drivers/gpu/drm/i915/gt/intel_ppgtt.c b/drivers/gpu/drm/i915/gt/intel_ppgtt.c
index bf8570ae749a..7ecfa672f738 100644
--- a/drivers/gpu/drm/i915/gt/intel_ppgtt.c
+++ b/drivers/gpu/drm/i915/gt/intel_ppgtt.c
@@ -206,8 +206,12 @@ void ppgtt_bind_vma(struct i915_address_space *vm,
void ppgtt_unbind_vma(struct i915_address_space *vm,
struct i915_vma_resource *vma_res)
{
- if (vma_res->allocated)
- vm->clear_range(vm, vma_res->start, vma_res->vma_size);
+ if (!vma_res->allocated)
+ return;
+
+ vm->clear_range(vm, vma_res->start, vma_res->vma_size);
+ if (vma_res->tlb)
+ vma_invalidate_tlb(vm, vma_res->tlb);
}
static unsigned long pd_count(u64 size, int shift)
diff --git a/drivers/gpu/drm/i915/gt/intel_region_lmem.c b/drivers/gpu/drm/i915/gt/intel_region_lmem.c
index 360b11fd57bb..f3ad93db0b21 100644
--- a/drivers/gpu/drm/i915/gt/intel_region_lmem.c
+++ b/drivers/gpu/drm/i915/gt/intel_region_lmem.c
@@ -17,6 +17,7 @@
#include "gt/intel_gt_mcr.h"
#include "gt/intel_gt_regs.h"
+#ifdef CONFIG_64BIT
static void _release_bars(struct pci_dev *pdev)
{
int resno;
@@ -111,6 +112,9 @@ static void i915_resize_lmem_bar(struct drm_i915_private *i915, resource_size_t
pci_assign_unassigned_bus_resources(pdev->bus);
pci_write_config_dword(pdev, PCI_COMMAND, pci_cmd);
}
+#else
+static void i915_resize_lmem_bar(struct drm_i915_private *i915, resource_size_t lmem_size) {}
+#endif
static int
region_lmem_release(struct intel_memory_region *mem)
diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c
index c68d36fb5bbd..1211774e1d91 100644
--- a/drivers/gpu/drm/i915/gt/intel_reset.c
+++ b/drivers/gpu/drm/i915/gt/intel_reset.c
@@ -1281,9 +1281,6 @@ static void intel_gt_reset_global(struct intel_gt *gt,
intel_wedge_on_timeout(&w, gt, 5 * HZ) {
intel_display_prepare_reset(gt->i915);
- /* Flush everyone using a resource about to be clobbered */
- synchronize_srcu_expedited(&gt->reset.backoff_srcu);
-
intel_gt_reset(gt, engine_mask, reason);
intel_display_finish_reset(gt->i915);
@@ -1392,6 +1389,9 @@ void intel_gt_handle_error(struct intel_gt *gt,
}
}
+ /* Flush everyone using a resource about to be clobbered */
+ synchronize_srcu_expedited(&gt->reset.backoff_srcu);
+
intel_gt_reset_global(gt, engine_mask, msg);
if (!intel_uc_uses_guc_submission(&gt->uc)) {
diff --git a/drivers/gpu/drm/i915/gt/intel_rps.c b/drivers/gpu/drm/i915/gt/intel_rps.c
index fb3f57ee450b..c7d381ad90cf 100644
--- a/drivers/gpu/drm/i915/gt/intel_rps.c
+++ b/drivers/gpu/drm/i915/gt/intel_rps.c
@@ -1979,7 +1979,9 @@ void intel_rps_init(struct intel_rps *rps)
/* Derive initial user preferences/limits from the hardware limits */
rps->max_freq_softlimit = rps->max_freq;
+ rps_to_gt(rps)->defaults.max_freq = rps->max_freq_softlimit;
rps->min_freq_softlimit = rps->min_freq;
+ rps_to_gt(rps)->defaults.min_freq = rps->min_freq_softlimit;
/* After setting max-softlimit, find the overclock max freq */
if (GRAPHICS_VER(i915) == 6 || IS_IVYBRIDGE(i915) || IS_HASWELL(i915)) {
diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c
index e8111fce56d0..31e129329fb0 100644
--- a/drivers/gpu/drm/i915/gt/intel_workarounds.c
+++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c
@@ -568,6 +568,7 @@ static void icl_ctx_workarounds_init(struct intel_engine_cs *engine,
static void dg2_ctx_gt_tuning_init(struct intel_engine_cs *engine,
struct i915_wa_list *wal)
{
+ wa_masked_en(wal, CHICKEN_RASTER_2, TBIMR_FAST_CLIP);
wa_write_clr_set(wal, GEN11_L3SQCREG5, L3_PWM_TIMER_INIT_VAL_MASK,
REG_FIELD_PREP(L3_PWM_TIMER_INIT_VAL_MASK, 0x7f));
wa_add(wal,
@@ -2102,13 +2103,6 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
/* Wa_1509235366:dg2 */
wa_write_or(wal, GEN12_GAMCNTRL_CTRL, INVALIDATION_BROADCAST_MODE_DIS |
GLOBAL_INVALIDATION_MODE);
-
- /*
- * The following are not actually "workarounds" but rather
- * recommended tuning settings documented in the bspec's
- * performance guide section.
- */
- wa_write_or(wal, XEHP_L3SCQREG7, BLEND_FILL_CACHING_OPT_DIS);
}
if (IS_DG2_GRAPHICS_STEP(i915, G11, STEP_A0, STEP_B0)) {
@@ -2119,6 +2113,13 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
wa_write_or(wal, LSC_CHICKEN_BIT_0_UDW, DIS_CHAIN_2XSIMD8);
}
+ if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_B0, STEP_FOREVER) ||
+ IS_DG2_G11(i915) || IS_DG2_G12(i915)) {
+ /* Wa_1509727124:dg2 */
+ wa_masked_en(wal, GEN10_SAMPLER_MODE,
+ SC_DISABLE_POWER_OPTIMIZATION_EBB);
+ }
+
if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_A0, STEP_B0) ||
IS_DG2_GRAPHICS_STEP(i915, G11, STEP_A0, STEP_B0)) {
/* Wa_14012419201:dg2 */
@@ -2195,15 +2196,6 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
wa_write_or(wal, XEHP_L3NODEARBCFG, XEHP_LNESPARE);
}
- if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_A0, STEP_C0) ||
- IS_DG2_G11(i915)) {
- /* Wa_22012654132:dg2 */
- wa_add(wal, GEN10_CACHE_MODE_SS, 0,
- _MASKED_BIT_ENABLE(ENABLE_PREFETCH_INTO_IC),
- 0 /* write-only, so skip validation */,
- true);
- }
-
/* Wa_14013202645:dg2 */
if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_B0, STEP_C0) ||
IS_DG2_GRAPHICS_STEP(i915, G11, STEP_A0, STEP_B0))
@@ -2670,6 +2662,49 @@ ccs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
}
/*
+ * The bspec performance guide has recommended MMIO tuning settings. These
+ * aren't truly "workarounds" but we want to program them with the same
+ * workaround infrastructure to ensure that they're automatically added to
+ * the GuC save/restore lists, re-applied at the right times, and checked for
+ * any conflicting programming requested by real workarounds.
+ *
+ * Programming settings should be added here only if their registers are not
+ * part of an engine's register state context. If a register is part of a
+ * context, then any tuning settings should be programmed in an appropriate
+ * function invoked by __intel_engine_init_ctx_wa().
+ */
+static void
+add_render_compute_tuning_settings(struct drm_i915_private *i915,
+ struct i915_wa_list *wal)
+{
+ if (IS_PONTEVECCHIO(i915)) {
+ wa_write(wal, XEHPC_L3SCRUB,
+ SCRUB_CL_DWNGRADE_SHARED | SCRUB_RATE_4B_PER_CLK);
+ }
+
+ if (IS_DG2(i915)) {
+ wa_write_or(wal, XEHP_L3SCQREG7, BLEND_FILL_CACHING_OPT_DIS);
+ wa_write_clr_set(wal, RT_CTRL, STACKID_CTRL, STACKID_CTRL_512);
+ wa_write_clr_set(wal, DRAW_WATERMARK, VERT_WM_VAL,
+ REG_FIELD_PREP(VERT_WM_VAL, 0x3FF));
+
+ /*
+ * This is also listed as Wa_22012654132 for certain DG2
+ * steppings, but the tuning setting programming is a superset
+ * since it applies to all DG2 variants and steppings.
+ *
+ * Note that register 0xE420 is write-only and cannot be read
+ * back for verification on DG2 (due to Wa_14012342262), so
+ * we need to explicitly skip the readback.
+ */
+ wa_add(wal, GEN10_CACHE_MODE_SS, 0,
+ _MASKED_BIT_ENABLE(ENABLE_PREFETCH_INTO_IC),
+ 0 /* write-only, so skip validation */,
+ true);
+ }
+}
+
+/*
* The workarounds in this function apply to shared registers in
* the general render reset domain that aren't tied to a
* specific engine. Since all render+compute engines get reset
@@ -2683,14 +2718,9 @@ general_render_compute_wa_init(struct intel_engine_cs *engine, struct i915_wa_li
{
struct drm_i915_private *i915 = engine->i915;
- if (IS_PONTEVECCHIO(i915)) {
- /*
- * The following is not actually a "workaround" but rather
- * a recommended tuning setting documented in the bspec's
- * performance guide section.
- */
- wa_write(wal, XEHPC_L3SCRUB, SCRUB_CL_DWNGRADE_SHARED | SCRUB_RATE_4B_PER_CLK);
+ add_render_compute_tuning_settings(i915, wal);
+ if (IS_PONTEVECCHIO(i915)) {
/* Wa_16016694945 */
wa_masked_en(wal, XEHPC_LNCFMISCCFGREG0, XEHPC_OVRLSCCC);
}
diff --git a/drivers/gpu/drm/i915/gt/selftest_execlists.c b/drivers/gpu/drm/i915/gt/selftest_execlists.c
index 09f8cd2d0e2c..1e08b2473b99 100644
--- a/drivers/gpu/drm/i915/gt/selftest_execlists.c
+++ b/drivers/gpu/drm/i915/gt/selftest_execlists.c
@@ -2077,7 +2077,7 @@ static int __cancel_active0(struct live_preempt_cancel *arg)
goto out;
}
- intel_context_set_banned(rq->context);
+ intel_context_ban(rq->context, rq);
err = intel_engine_pulse(arg->engine);
if (err)
goto out;
@@ -2136,7 +2136,7 @@ static int __cancel_active1(struct live_preempt_cancel *arg)
if (err)
goto out;
- intel_context_set_banned(rq[1]->context);
+ intel_context_ban(rq[1]->context, rq[1]);
err = intel_engine_pulse(arg->engine);
if (err)
goto out;
@@ -2219,7 +2219,7 @@ static int __cancel_queued(struct live_preempt_cancel *arg)
if (err)
goto out;
- intel_context_set_banned(rq[2]->context);
+ intel_context_ban(rq[2]->context, rq[2]);
err = intel_engine_pulse(arg->engine);
if (err)
goto out;
@@ -2234,7 +2234,13 @@ static int __cancel_queued(struct live_preempt_cancel *arg)
goto out;
}
- if (rq[1]->fence.error != 0) {
+ /*
+ * The behavior between having semaphores and not is different. With
+ * semaphores the subsequent request is on the hardware and not cancelled
+ * while without the request is held in the driver and cancelled.
+ */
+ if (intel_engine_has_semaphores(rq[1]->engine) &&
+ rq[1]->fence.error != 0) {
pr_err("Normal inflight1 request did not complete\n");
err = -EINVAL;
goto out;
@@ -2282,7 +2288,7 @@ static int __cancel_hostile(struct live_preempt_cancel *arg)
goto out;
}
- intel_context_set_banned(rq->context);
+ intel_context_ban(rq->context, rq);
err = intel_engine_pulse(arg->engine); /* force reset */
if (err)
goto out;
diff --git a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c
index 6493265d5f64..7f3bb1d34dfb 100644
--- a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c
+++ b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c
@@ -1302,13 +1302,15 @@ static int igt_reset_wait(void *arg)
{
struct intel_gt *gt = arg;
struct i915_gpu_error *global = &gt->i915->gpu_error;
- struct intel_engine_cs *engine = gt->engine[RCS0];
+ struct intel_engine_cs *engine;
struct i915_request *rq;
unsigned int reset_count;
struct hang h;
long timeout;
int err;
+ engine = intel_selftest_find_any_engine(gt);
+
if (!engine || !intel_engine_can_store_dword(engine))
return 0;
@@ -1432,7 +1434,7 @@ static int __igt_reset_evict_vma(struct intel_gt *gt,
int (*fn)(void *),
unsigned int flags)
{
- struct intel_engine_cs *engine = gt->engine[RCS0];
+ struct intel_engine_cs *engine;
struct drm_i915_gem_object *obj;
struct task_struct *tsk = NULL;
struct i915_request *rq;
@@ -1444,6 +1446,8 @@ static int __igt_reset_evict_vma(struct intel_gt *gt,
if (!gt->ggtt->num_fences && flags & EXEC_OBJECT_NEEDS_FENCE)
return 0;
+ engine = intel_selftest_find_any_engine(gt);
+
if (!engine || !intel_engine_can_store_dword(engine))
return 0;
@@ -1819,12 +1823,14 @@ static int igt_handle_error(void *arg)
{
struct intel_gt *gt = arg;
struct i915_gpu_error *global = &gt->i915->gpu_error;
- struct intel_engine_cs *engine = gt->engine[RCS0];
+ struct intel_engine_cs *engine;
struct hang h;
struct i915_request *rq;
struct i915_gpu_coredump *error;
int err;
+ engine = intel_selftest_find_any_engine(gt);
+
/* Check that we can issue a global GPU and engine reset */
if (!intel_has_reset_engine(gt))
diff --git a/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h b/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h
index 4ef9990ed7f8..29ef8afc8c2e 100644
--- a/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h
+++ b/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h
@@ -122,6 +122,9 @@ enum intel_guc_action {
INTEL_GUC_ACTION_SCHED_CONTEXT_MODE_DONE = 0x1002,
INTEL_GUC_ACTION_SCHED_ENGINE_MODE_SET = 0x1003,
INTEL_GUC_ACTION_SCHED_ENGINE_MODE_DONE = 0x1004,
+ INTEL_GUC_ACTION_V69_SET_CONTEXT_PRIORITY = 0x1005,
+ INTEL_GUC_ACTION_V69_SET_CONTEXT_EXECUTION_QUANTUM = 0x1006,
+ INTEL_GUC_ACTION_V69_SET_CONTEXT_PREEMPTION_TIMEOUT = 0x1007,
INTEL_GUC_ACTION_CONTEXT_RESET_NOTIFICATION = 0x1008,
INTEL_GUC_ACTION_ENGINE_FAILURE_NOTIFICATION = 0x1009,
INTEL_GUC_ACTION_HOST2GUC_UPDATE_CONTEXT_POLICIES = 0x100B,
diff --git a/drivers/gpu/drm/i915/gt/uc/abi/guc_communication_ctb_abi.h b/drivers/gpu/drm/i915/gt/uc/abi/guc_communication_ctb_abi.h
index df83c1cc7c7a..28b8387f97b7 100644
--- a/drivers/gpu/drm/i915/gt/uc/abi/guc_communication_ctb_abi.h
+++ b/drivers/gpu/drm/i915/gt/uc/abi/guc_communication_ctb_abi.h
@@ -37,6 +37,7 @@
* | | | - _`GUC_CTB_STATUS_OVERFLOW` = 1 (head/tail too large) |
* | | | - _`GUC_CTB_STATUS_UNDERFLOW` = 2 (truncated message) |
* | | | - _`GUC_CTB_STATUS_MISMATCH` = 4 (head/tail modified) |
+ * | | | - _`GUC_CTB_STATUS_UNUSED` = 8 (CTB is not in use) |
* +---+-------+--------------------------------------------------------------+
* |...| | RESERVED = MBZ |
* +---+-------+--------------------------------------------------------------+
@@ -49,9 +50,10 @@ struct guc_ct_buffer_desc {
u32 tail;
u32 status;
#define GUC_CTB_STATUS_NO_ERROR 0
-#define GUC_CTB_STATUS_OVERFLOW (1 << 0)
-#define GUC_CTB_STATUS_UNDERFLOW (1 << 1)
-#define GUC_CTB_STATUS_MISMATCH (1 << 2)
+#define GUC_CTB_STATUS_OVERFLOW BIT(0)
+#define GUC_CTB_STATUS_UNDERFLOW BIT(1)
+#define GUC_CTB_STATUS_MISMATCH BIT(2)
+#define GUC_CTB_STATUS_UNUSED BIT(3)
u32 reserved[13];
} __packed;
static_assert(sizeof(struct guc_ct_buffer_desc) == 64);
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc.c
index 2706a8c65090..50af247e84c4 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.c
@@ -389,6 +389,23 @@ void intel_guc_write_params(struct intel_guc *guc)
intel_uncore_forcewake_put(uncore, FORCEWAKE_GT);
}
+void intel_guc_dump_time_info(struct intel_guc *guc, struct drm_printer *p)
+{
+ struct intel_gt *gt = guc_to_gt(guc);
+ intel_wakeref_t wakeref;
+ u32 stamp = 0;
+ u64 ktime;
+
+ with_intel_runtime_pm(&gt->i915->runtime_pm, wakeref)
+ stamp = intel_uncore_read(gt->uncore, GUCPMTIMESTAMP);
+ ktime = ktime_get_boottime_ns();
+
+ drm_printf(p, "Kernel timestamp: 0x%08llX [%llu]\n", ktime, ktime);
+ drm_printf(p, "GuC timestamp: 0x%08X [%u]\n", stamp, stamp);
+ drm_printf(p, "CS timestamp frequency: %u Hz, %u ns\n",
+ gt->clock_frequency, gt->clock_period_ns);
+}
+
int intel_guc_init(struct intel_guc *guc)
{
struct intel_gt *gt = guc_to_gt(guc);
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.h b/drivers/gpu/drm/i915/gt/uc/intel_guc.h
index d0d99f178f2d..804133df1ac9 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.h
@@ -170,6 +170,11 @@ struct intel_guc {
/** @ads_engine_usage_size: size of engine usage in the ADS */
u32 ads_engine_usage_size;
+ /** @lrc_desc_pool_v69: object allocated to hold the GuC LRC descriptor pool */
+ struct i915_vma *lrc_desc_pool_v69;
+ /** @lrc_desc_pool_vaddr_v69: contents of the GuC LRC descriptor pool */
+ void *lrc_desc_pool_vaddr_v69;
+
/**
* @context_lookup: used to resolve intel_context from guc_id, if a
* context is present in this structure it is registered with the GuC
@@ -459,4 +464,6 @@ void intel_guc_load_status(struct intel_guc *guc, struct drm_printer *p);
void intel_guc_write_barrier(struct intel_guc *guc);
+void intel_guc_dump_time_info(struct intel_guc *guc, struct drm_printer *p);
+
#endif
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c
index ba7541f3ca61..74cbe8eaf531 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c
@@ -464,7 +464,11 @@ static void fill_engine_enable_masks(struct intel_gt *gt,
}
#define LR_HW_CONTEXT_SIZE (80 * sizeof(u32))
-#define LRC_SKIP_SIZE (LRC_PPHWSP_SZ * PAGE_SIZE + LR_HW_CONTEXT_SIZE)
+#define XEHP_LR_HW_CONTEXT_SIZE (96 * sizeof(u32))
+#define LR_HW_CONTEXT_SZ(i915) (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50) ? \
+ XEHP_LR_HW_CONTEXT_SIZE : \
+ LR_HW_CONTEXT_SIZE)
+#define LRC_SKIP_SIZE(i915) (LRC_PPHWSP_SZ * PAGE_SIZE + LR_HW_CONTEXT_SZ(i915))
static int guc_prep_golden_context(struct intel_guc *guc)
{
struct intel_gt *gt = guc_to_gt(guc);
@@ -525,7 +529,7 @@ static int guc_prep_golden_context(struct intel_guc *guc)
* on all engines).
*/
ads_blob_write(guc, ads.eng_state_size[guc_class],
- real_size - LRC_SKIP_SIZE);
+ real_size - LRC_SKIP_SIZE(gt->i915));
ads_blob_write(guc, ads.golden_context_lrca[guc_class],
addr_ggtt);
@@ -599,7 +603,7 @@ static void guc_init_golden_context(struct intel_guc *guc)
}
GEM_BUG_ON(ads_blob_read(guc, ads.eng_state_size[guc_class]) !=
- real_size - LRC_SKIP_SIZE);
+ real_size - LRC_SKIP_SIZE(gt->i915));
GEM_BUG_ON(ads_blob_read(guc, ads.golden_context_lrca[guc_class]) != addr_ggtt);
addr_ggtt += alloc_size;
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c
index 75257bd20ff0..b54b7883320b 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c
@@ -600,10 +600,8 @@ intel_guc_capture_getnullheader(struct intel_guc *guc,
return 0;
}
-#define GUC_CAPTURE_OVERBUFFER_MULTIPLIER 3
-
-int
-intel_guc_capture_output_min_size_est(struct intel_guc *guc)
+static int
+guc_capture_output_min_size_est(struct intel_guc *guc)
{
struct intel_gt *gt = guc_to_gt(guc);
struct intel_engine_cs *engine;
@@ -623,13 +621,8 @@ intel_guc_capture_output_min_size_est(struct intel_guc *guc)
* For each engine instance, there would be 1 x guc_state_capture_group_t output
* followed by 3 x guc_state_capture_t lists. The latter is how the register
* dumps are split across different register types (where the '3' are global vs class
- * vs instance). Finally, let's multiply the whole thing by 3x (just so we are
- * not limited to just 1 round of data in a worst case full register dump log)
- *
- * NOTE: intel_guc_log that allocates the log buffer would round this size up to
- * a power of two.
+ * vs instance).
*/
-
for_each_engine(engine, gt, id) {
worst_min_size += sizeof(struct guc_state_capture_group_header_t) +
(3 * sizeof(struct guc_state_capture_header_t));
@@ -649,7 +642,30 @@ intel_guc_capture_output_min_size_est(struct intel_guc *guc)
worst_min_size += (num_regs * sizeof(struct guc_mmio_reg));
- return (worst_min_size * GUC_CAPTURE_OVERBUFFER_MULTIPLIER);
+ return worst_min_size;
+}
+
+/*
+ * Add on a 3x multiplier to allow for multiple back-to-back captures occurring
+ * before the i915 can read the data out and process it
+ */
+#define GUC_CAPTURE_OVERBUFFER_MULTIPLIER 3
+
+static void check_guc_capture_size(struct intel_guc *guc)
+{
+ struct drm_i915_private *i915 = guc_to_gt(guc)->i915;
+ int min_size = guc_capture_output_min_size_est(guc);
+ int spare_size = min_size * GUC_CAPTURE_OVERBUFFER_MULTIPLIER;
+
+ if (min_size < 0)
+ drm_warn(&i915->drm, "Failed to calculate GuC error state capture buffer minimum size: %d!\n",
+ min_size);
+ else if (min_size > CAPTURE_BUFFER_SIZE)
+ drm_warn(&i915->drm, "GuC error state capture buffer is too small: %d < %d\n",
+ CAPTURE_BUFFER_SIZE, min_size);
+ else if (spare_size > CAPTURE_BUFFER_SIZE)
+ drm_notice(&i915->drm, "GuC error state capture buffer maybe too small: %d < %d (min = %d)\n",
+ CAPTURE_BUFFER_SIZE, spare_size, min_size);
}
/*
@@ -1580,5 +1596,7 @@ int intel_guc_capture_init(struct intel_guc *guc)
INIT_LIST_HEAD(&guc->capture->outlist);
INIT_LIST_HEAD(&guc->capture->cachelist);
+ check_guc_capture_size(guc);
+
return 0;
}
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.h
index d3d7bd0b6db6..fbd3713c7832 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.h
@@ -21,7 +21,6 @@ int intel_guc_capture_print_engine_node(struct drm_i915_error_state_buf *m,
void intel_guc_capture_get_matching_node(struct intel_gt *gt, struct intel_engine_coredump *ee,
struct intel_context *ce);
void intel_guc_capture_process(struct intel_guc *guc);
-int intel_guc_capture_output_min_size_est(struct intel_guc *guc);
int intel_guc_capture_getlist(struct intel_guc *guc, u32 owner, u32 type, u32 classid,
void **outptr);
int intel_guc_capture_getlistsize(struct intel_guc *guc, u32 owner, u32 type, u32 classid,
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
index f01325cd1b62..2b22065e87bf 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
@@ -455,6 +455,7 @@ corrupted:
/**
* wait_for_ct_request_update - Wait for CT request state update.
+ * @ct: pointer to CT
* @req: pointer to pending request
* @status: placeholder for status
*
@@ -467,9 +468,10 @@ corrupted:
* * 0 response received (status is valid)
* * -ETIMEDOUT no response within hardcoded timeout
*/
-static int wait_for_ct_request_update(struct ct_request *req, u32 *status)
+static int wait_for_ct_request_update(struct intel_guc_ct *ct, struct ct_request *req, u32 *status)
{
int err;
+ bool ct_enabled;
/*
* Fast commands should complete in less than 10us, so sample quickly
@@ -481,12 +483,15 @@ static int wait_for_ct_request_update(struct ct_request *req, u32 *status)
#define GUC_CTB_RESPONSE_TIMEOUT_SHORT_MS 10
#define GUC_CTB_RESPONSE_TIMEOUT_LONG_MS 1000
#define done \
- (FIELD_GET(GUC_HXG_MSG_0_ORIGIN, READ_ONCE(req->status)) == \
+ (!(ct_enabled = intel_guc_ct_enabled(ct)) || \
+ FIELD_GET(GUC_HXG_MSG_0_ORIGIN, READ_ONCE(req->status)) == \
GUC_HXG_ORIGIN_GUC)
err = wait_for_us(done, GUC_CTB_RESPONSE_TIMEOUT_SHORT_MS);
if (err)
err = wait_for(done, GUC_CTB_RESPONSE_TIMEOUT_LONG_MS);
#undef done
+ if (!ct_enabled)
+ err = -ENODEV;
*status = req->status;
return err;
@@ -703,11 +708,18 @@ retry:
intel_guc_notify(ct_to_guc(ct));
- err = wait_for_ct_request_update(&request, status);
+ err = wait_for_ct_request_update(ct, &request, status);
g2h_release_space(ct, GUC_CTB_HXG_MSG_MAX_LEN);
if (unlikely(err)) {
- CT_ERROR(ct, "No response for request %#x (fence %u)\n",
- action[0], request.fence);
+ if (err == -ENODEV)
+ /* wait_for_ct_request_update returns -ENODEV on reset/suspend in progress.
+ * In this case, output is debug rather than error info
+ */
+ CT_DEBUG(ct, "Request %#x (fence %u) cancelled as CTB is disabled\n",
+ action[0], request.fence);
+ else
+ CT_ERROR(ct, "No response for request %#x (fence %u)\n",
+ action[0], request.fence);
goto unlink;
}
@@ -771,8 +783,9 @@ int intel_guc_ct_send(struct intel_guc_ct *ct, const u32 *action, u32 len,
ret = ct_send(ct, action, len, response_buf, response_buf_size, &status);
if (unlikely(ret < 0)) {
- CT_ERROR(ct, "Sending action %#x failed (%pe) status=%#X\n",
- action[0], ERR_PTR(ret), status);
+ if (ret != -ENODEV)
+ CT_ERROR(ct, "Sending action %#x failed (%pe) status=%#X\n",
+ action[0], ERR_PTR(ret), status);
} else if (unlikely(ret)) {
CT_DEBUG(ct, "send action %#x returned %d (%#x)\n",
action[0], ret, ret);
@@ -816,8 +829,22 @@ static int ct_read(struct intel_guc_ct *ct, struct ct_incoming_msg **msg)
if (unlikely(ctb->broken))
return -EPIPE;
- if (unlikely(desc->status))
- goto corrupted;
+ if (unlikely(desc->status)) {
+ u32 status = desc->status;
+
+ if (status & GUC_CTB_STATUS_UNUSED) {
+ /*
+ * Potentially valid if a CLIENT_RESET request resulted in
+ * contexts/engines being reset. But should never happen as
+ * no contexts should be active when CLIENT_RESET is sent.
+ */
+ CT_ERROR(ct, "Unexpected G2H after GuC has stopped!\n");
+ status &= ~GUC_CTB_STATUS_UNUSED;
+ }
+
+ if (status)
+ goto corrupted;
+ }
GEM_BUG_ON(head > size);
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h
index b3c9a9327f76..323b055e5db9 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h
@@ -204,6 +204,20 @@ struct guc_wq_item {
u32 fence_id;
} __packed;
+struct guc_process_desc_v69 {
+ u32 stage_id;
+ u64 db_base_addr;
+ u32 head;
+ u32 tail;
+ u32 error_offset;
+ u64 wq_base_addr;
+ u32 wq_size_bytes;
+ u32 wq_status;
+ u32 engine_presence;
+ u32 priority;
+ u32 reserved[36];
+} __packed;
+
struct guc_sched_wq_desc {
u32 head;
u32 tail;
@@ -228,6 +242,37 @@ struct guc_ctxt_registration_info {
};
#define CONTEXT_REGISTRATION_FLAG_KMD BIT(0)
+/* Preempt to idle on quantum expiry */
+#define CONTEXT_POLICY_FLAG_PREEMPT_TO_IDLE_V69 BIT(0)
+
+/*
+ * GuC Context registration descriptor.
+ * FIXME: This is only required to exist during context registration.
+ * The current 1:1 between guc_lrc_desc and LRCs for the lifetime of the LRC
+ * is not required.
+ */
+struct guc_lrc_desc_v69 {
+ u32 hw_context_desc;
+ u32 slpm_perf_mode_hint; /* SPLC v1 only */
+ u32 slpm_freq_hint;
+ u32 engine_submit_mask; /* In logical space */
+ u8 engine_class;
+ u8 reserved0[3];
+ u32 priority;
+ u32 process_desc;
+ u32 wq_addr;
+ u32 wq_size;
+ u32 context_flags; /* CONTEXT_REGISTRATION_* */
+ /* Time for one workload to execute. (in micro seconds) */
+ u32 execution_quantum;
+ /* Time to wait for a preemption request to complete before issuing a
+ * reset. (in micro seconds).
+ */
+ u32 preemption_timeout;
+ u32 policy_flags; /* CONTEXT_POLICY_* */
+ u32 reserved1[19];
+} __packed;
+
/* 32-bit KLV structure as used by policy updates and others */
struct guc_klv_generic_dw_t {
u32 kl;
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c
index 25b2d7ce6640..4722d4b18ed1 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c
@@ -15,6 +15,32 @@
static void guc_log_copy_debuglogs_for_relay(struct intel_guc_log *log);
+static u32 intel_guc_log_size(struct intel_guc_log *log)
+{
+ /*
+ * GuC Log buffer Layout:
+ *
+ * NB: Ordering must follow "enum guc_log_buffer_type".
+ *
+ * +===============================+ 00B
+ * | Debug state header |
+ * +-------------------------------+ 32B
+ * | Crash dump state header |
+ * +-------------------------------+ 64B
+ * | Capture state header |
+ * +-------------------------------+ 96B
+ * | |
+ * +===============================+ PAGE_SIZE (4KB)
+ * | Debug logs |
+ * +===============================+ + DEBUG_SIZE
+ * | Crash Dump logs |
+ * +===============================+ + CRASH_SIZE
+ * | Capture logs |
+ * +===============================+ + CAPTURE_SIZE
+ */
+ return PAGE_SIZE + CRASH_BUFFER_SIZE + DEBUG_BUFFER_SIZE + CAPTURE_BUFFER_SIZE;
+}
+
/**
* DOC: GuC firmware log
*
@@ -461,32 +487,7 @@ int intel_guc_log_create(struct intel_guc_log *log)
GEM_BUG_ON(log->vma);
- /*
- * GuC Log buffer Layout
- * (this ordering must follow "enum guc_log_buffer_type" definition)
- *
- * +===============================+ 00B
- * | Debug state header |
- * +-------------------------------+ 32B
- * | Crash dump state header |
- * +-------------------------------+ 64B
- * | Capture state header |
- * +-------------------------------+ 96B
- * | |
- * +===============================+ PAGE_SIZE (4KB)
- * | Debug logs |
- * +===============================+ + DEBUG_SIZE
- * | Crash Dump logs |
- * +===============================+ + CRASH_SIZE
- * | Capture logs |
- * +===============================+ + CAPTURE_SIZE
- */
- if (intel_guc_capture_output_min_size_est(guc) > CAPTURE_BUFFER_SIZE)
- DRM_WARN("GuC log buffer for state_capture maybe too small. %d < %d\n",
- CAPTURE_BUFFER_SIZE, intel_guc_capture_output_min_size_est(guc));
-
- guc_log_size = PAGE_SIZE + CRASH_BUFFER_SIZE + DEBUG_BUFFER_SIZE +
- CAPTURE_BUFFER_SIZE;
+ guc_log_size = intel_guc_log_size(log);
vma = intel_guc_allocate_vma(guc, guc_log_size);
if (IS_ERR(vma)) {
@@ -749,8 +750,9 @@ int intel_guc_log_dump(struct intel_guc_log *log, struct drm_printer *p,
struct intel_guc *guc = log_to_guc(log);
struct intel_uc *uc = container_of(guc, struct intel_uc, guc);
struct drm_i915_gem_object *obj = NULL;
- u32 *map;
- int i = 0;
+ void *map;
+ u32 *page;
+ int i, j;
if (!intel_guc_is_supported(guc))
return -ENODEV;
@@ -763,21 +765,34 @@ int intel_guc_log_dump(struct intel_guc_log *log, struct drm_printer *p,
if (!obj)
return 0;
+ page = (u32 *)__get_free_page(GFP_KERNEL);
+ if (!page)
+ return -ENOMEM;
+
+ intel_guc_dump_time_info(guc, p);
+
map = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WC);
if (IS_ERR(map)) {
DRM_DEBUG("Failed to pin object\n");
drm_puts(p, "(log data unaccessible)\n");
+ free_page((unsigned long)page);
return PTR_ERR(map);
}
- for (i = 0; i < obj->base.size / sizeof(u32); i += 4)
- drm_printf(p, "0x%08x 0x%08x 0x%08x 0x%08x\n",
- *(map + i), *(map + i + 1),
- *(map + i + 2), *(map + i + 3));
+ for (i = 0; i < obj->base.size; i += PAGE_SIZE) {
+ if (!i915_memcpy_from_wc(page, map + i, PAGE_SIZE))
+ memcpy(page, map + i, PAGE_SIZE);
+
+ for (j = 0; j < PAGE_SIZE / sizeof(u32); j += 4)
+ drm_printf(p, "0x%08x 0x%08x 0x%08x 0x%08x\n",
+ *(page + j + 0), *(page + j + 1),
+ *(page + j + 2), *(page + j + 3));
+ }
drm_puts(p, "\n");
i915_gem_object_unpin_map(obj);
+ free_page((unsigned long)page);
return 0;
}
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_log.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_log.h
index 18007e639be9..dc9715411d62 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_log.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_log.h
@@ -22,11 +22,11 @@ struct intel_guc;
#elif defined(CONFIG_DRM_I915_DEBUG_GEM)
#define CRASH_BUFFER_SIZE SZ_1M
#define DEBUG_BUFFER_SIZE SZ_2M
-#define CAPTURE_BUFFER_SIZE SZ_1M
+#define CAPTURE_BUFFER_SIZE SZ_4M
#else
#define CRASH_BUFFER_SIZE SZ_8K
#define DEBUG_BUFFER_SIZE SZ_64K
-#define CAPTURE_BUFFER_SIZE SZ_16K
+#define CAPTURE_BUFFER_SIZE SZ_2M
#endif
/*
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_reg.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_reg.h
index 8dc063f087eb..a7092f711e9c 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_reg.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_reg.h
@@ -102,6 +102,10 @@
#define GUC_SEND_TRIGGER (1<<0)
#define GEN11_GUC_HOST_INTERRUPT _MMIO(0x1901f0)
+#define GEN12_GUC_SEM_INTR_ENABLES _MMIO(0xc71c)
+#define GUC_SEM_INTR_ROUTE_TO_GUC BIT(31)
+#define GUC_SEM_INTR_ENABLE_ALL (0xff)
+
#define GUC_NUM_DOORBELLS 256
/* format of the HW-monitored doorbell cacheline */
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c
index ec9c4ca0f615..e1fa1f32f29e 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c
@@ -575,20 +575,24 @@ static int slpc_set_softlimits(struct intel_guc_slpc *slpc)
* unless they have deviated from defaults, in which case,
* we retain the values and set min/max accordingly.
*/
- if (!slpc->max_freq_softlimit)
+ if (!slpc->max_freq_softlimit) {
slpc->max_freq_softlimit = slpc->rp0_freq;
- else if (slpc->max_freq_softlimit != slpc->rp0_freq)
+ slpc_to_gt(slpc)->defaults.max_freq = slpc->max_freq_softlimit;
+ } else if (slpc->max_freq_softlimit != slpc->rp0_freq) {
ret = intel_guc_slpc_set_max_freq(slpc,
slpc->max_freq_softlimit);
+ }
if (unlikely(ret))
return ret;
- if (!slpc->min_freq_softlimit)
+ if (!slpc->min_freq_softlimit) {
slpc->min_freq_softlimit = slpc->min_freq;
- else if (slpc->min_freq_softlimit != slpc->min_freq)
+ slpc_to_gt(slpc)->defaults.min_freq = slpc->min_freq_softlimit;
+ } else if (slpc->min_freq_softlimit != slpc->min_freq) {
return intel_guc_slpc_set_min_freq(slpc,
slpc->min_freq_softlimit);
+ }
return 0;
}
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
index 40f726c61e95..0d17da77e787 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
@@ -414,12 +414,15 @@ struct sync_semaphore {
};
struct parent_scratch {
- struct guc_sched_wq_desc wq_desc;
+ union guc_descs {
+ struct guc_sched_wq_desc wq_desc;
+ struct guc_process_desc_v69 pdesc;
+ } descs;
struct sync_semaphore go;
struct sync_semaphore join[MAX_ENGINE_INSTANCE + 1];
- u8 unused[WQ_OFFSET - sizeof(struct guc_sched_wq_desc) -
+ u8 unused[WQ_OFFSET - sizeof(union guc_descs) -
sizeof(struct sync_semaphore) * (MAX_ENGINE_INSTANCE + 2)];
u32 wq[WQ_SIZE / sizeof(u32)];
@@ -456,17 +459,23 @@ __get_parent_scratch(struct intel_context *ce)
LRC_STATE_OFFSET) / sizeof(u32)));
}
+static struct guc_process_desc_v69 *
+__get_process_desc_v69(struct intel_context *ce)
+{
+ struct parent_scratch *ps = __get_parent_scratch(ce);
+
+ return &ps->descs.pdesc;
+}
+
static struct guc_sched_wq_desc *
-__get_wq_desc(struct intel_context *ce)
+__get_wq_desc_v70(struct intel_context *ce)
{
struct parent_scratch *ps = __get_parent_scratch(ce);
- return &ps->wq_desc;
+ return &ps->descs.wq_desc;
}
-static u32 *get_wq_pointer(struct guc_sched_wq_desc *wq_desc,
- struct intel_context *ce,
- u32 wqi_size)
+static u32 *get_wq_pointer(struct intel_context *ce, u32 wqi_size)
{
/*
* Check for space in work queue. Caching a value of head pointer in
@@ -476,7 +485,7 @@ static u32 *get_wq_pointer(struct guc_sched_wq_desc *wq_desc,
#define AVAILABLE_SPACE \
CIRC_SPACE(ce->parallel.guc.wqi_tail, ce->parallel.guc.wqi_head, WQ_SIZE)
if (wqi_size > AVAILABLE_SPACE) {
- ce->parallel.guc.wqi_head = READ_ONCE(wq_desc->head);
+ ce->parallel.guc.wqi_head = READ_ONCE(*ce->parallel.guc.wq_head);
if (wqi_size > AVAILABLE_SPACE)
return NULL;
@@ -495,11 +504,55 @@ static inline struct intel_context *__get_context(struct intel_guc *guc, u32 id)
return ce;
}
+static struct guc_lrc_desc_v69 *__get_lrc_desc_v69(struct intel_guc *guc, u32 index)
+{
+ struct guc_lrc_desc_v69 *base = guc->lrc_desc_pool_vaddr_v69;
+
+ if (!base)
+ return NULL;
+
+ GEM_BUG_ON(index >= GUC_MAX_CONTEXT_ID);
+
+ return &base[index];
+}
+
+static int guc_lrc_desc_pool_create_v69(struct intel_guc *guc)
+{
+ u32 size;
+ int ret;
+
+ size = PAGE_ALIGN(sizeof(struct guc_lrc_desc_v69) *
+ GUC_MAX_CONTEXT_ID);
+ ret = intel_guc_allocate_and_map_vma(guc, size, &guc->lrc_desc_pool_v69,
+ (void **)&guc->lrc_desc_pool_vaddr_v69);
+ if (ret)
+ return ret;
+
+ return 0;
+}
+
+static void guc_lrc_desc_pool_destroy_v69(struct intel_guc *guc)
+{
+ if (!guc->lrc_desc_pool_vaddr_v69)
+ return;
+
+ guc->lrc_desc_pool_vaddr_v69 = NULL;
+ i915_vma_unpin_and_release(&guc->lrc_desc_pool_v69, I915_VMA_RELEASE_MAP);
+}
+
static inline bool guc_submission_initialized(struct intel_guc *guc)
{
return guc->submission_initialized;
}
+static inline void _reset_lrc_desc_v69(struct intel_guc *guc, u32 id)
+{
+ struct guc_lrc_desc_v69 *desc = __get_lrc_desc_v69(guc, id);
+
+ if (desc)
+ memset(desc, 0, sizeof(*desc));
+}
+
static inline bool ctx_id_mapped(struct intel_guc *guc, u32 id)
{
return __get_context(guc, id);
@@ -526,6 +579,8 @@ static inline void clr_ctx_id_mapping(struct intel_guc *guc, u32 id)
if (unlikely(!guc_submission_initialized(guc)))
return;
+ _reset_lrc_desc_v69(guc, id);
+
/*
* xarray API doesn't have xa_erase_irqsave wrapper, so calling
* the lower level functions directly.
@@ -611,7 +666,7 @@ int intel_guc_wait_for_idle(struct intel_guc *guc, long timeout)
true, timeout);
}
-static int guc_context_policy_init(struct intel_context *ce, bool loop);
+static int guc_context_policy_init_v70(struct intel_context *ce, bool loop);
static int try_context_registration(struct intel_context *ce, bool loop);
static int __guc_add_request(struct intel_guc *guc, struct i915_request *rq)
@@ -639,7 +694,7 @@ static int __guc_add_request(struct intel_guc *guc, struct i915_request *rq)
GEM_BUG_ON(context_guc_id_invalid(ce));
if (context_policy_required(ce)) {
- err = guc_context_policy_init(ce, false);
+ err = guc_context_policy_init_v70(ce, false);
if (err)
return err;
}
@@ -737,9 +792,7 @@ static u32 wq_space_until_wrap(struct intel_context *ce)
return (WQ_SIZE - ce->parallel.guc.wqi_tail);
}
-static void write_wqi(struct guc_sched_wq_desc *wq_desc,
- struct intel_context *ce,
- u32 wqi_size)
+static void write_wqi(struct intel_context *ce, u32 wqi_size)
{
BUILD_BUG_ON(!is_power_of_2(WQ_SIZE));
@@ -750,13 +803,12 @@ static void write_wqi(struct guc_sched_wq_desc *wq_desc,
ce->parallel.guc.wqi_tail = (ce->parallel.guc.wqi_tail + wqi_size) &
(WQ_SIZE - 1);
- WRITE_ONCE(wq_desc->tail, ce->parallel.guc.wqi_tail);
+ WRITE_ONCE(*ce->parallel.guc.wq_tail, ce->parallel.guc.wqi_tail);
}
static int guc_wq_noop_append(struct intel_context *ce)
{
- struct guc_sched_wq_desc *wq_desc = __get_wq_desc(ce);
- u32 *wqi = get_wq_pointer(wq_desc, ce, wq_space_until_wrap(ce));
+ u32 *wqi = get_wq_pointer(ce, wq_space_until_wrap(ce));
u32 len_dw = wq_space_until_wrap(ce) / sizeof(u32) - 1;
if (!wqi)
@@ -775,7 +827,6 @@ static int __guc_wq_item_append(struct i915_request *rq)
{
struct intel_context *ce = request_to_scheduling_context(rq);
struct intel_context *child;
- struct guc_sched_wq_desc *wq_desc = __get_wq_desc(ce);
unsigned int wqi_size = (ce->parallel.number_children + 4) *
sizeof(u32);
u32 *wqi;
@@ -795,7 +846,7 @@ static int __guc_wq_item_append(struct i915_request *rq)
return ret;
}
- wqi = get_wq_pointer(wq_desc, ce, wqi_size);
+ wqi = get_wq_pointer(ce, wqi_size);
if (!wqi)
return -EBUSY;
@@ -810,7 +861,7 @@ static int __guc_wq_item_append(struct i915_request *rq)
for_each_child(ce, child)
*wqi++ = child->ring->tail / sizeof(u64);
- write_wqi(wq_desc, ce, wqi_size);
+ write_wqi(ce, wqi_size);
return 0;
}
@@ -1812,20 +1863,34 @@ static void reset_fail_worker_func(struct work_struct *w);
int intel_guc_submission_init(struct intel_guc *guc)
{
struct intel_gt *gt = guc_to_gt(guc);
+ int ret;
if (guc->submission_initialized)
return 0;
+ if (guc->fw.major_ver_found < 70) {
+ ret = guc_lrc_desc_pool_create_v69(guc);
+ if (ret)
+ return ret;
+ }
+
guc->submission_state.guc_ids_bitmap =
bitmap_zalloc(NUMBER_MULTI_LRC_GUC_ID(guc), GFP_KERNEL);
- if (!guc->submission_state.guc_ids_bitmap)
- return -ENOMEM;
+ if (!guc->submission_state.guc_ids_bitmap) {
+ ret = -ENOMEM;
+ goto destroy_pool;
+ }
guc->timestamp.ping_delay = (POLL_TIME_CLKS / gt->clock_frequency + 1) * HZ;
guc->timestamp.shift = gpm_timestamp_shift(gt);
guc->submission_initialized = true;
return 0;
+
+destroy_pool:
+ guc_lrc_desc_pool_destroy_v69(guc);
+
+ return ret;
}
void intel_guc_submission_fini(struct intel_guc *guc)
@@ -1834,6 +1899,7 @@ void intel_guc_submission_fini(struct intel_guc *guc)
return;
guc_flush_destroyed_contexts(guc);
+ guc_lrc_desc_pool_destroy_v69(guc);
i915_sched_engine_put(guc->sched_engine);
bitmap_free(guc->submission_state.guc_ids_bitmap);
guc->submission_initialized = false;
@@ -2091,10 +2157,34 @@ static void unpin_guc_id(struct intel_guc *guc, struct intel_context *ce)
spin_unlock_irqrestore(&guc->submission_state.lock, flags);
}
-static int __guc_action_register_multi_lrc(struct intel_guc *guc,
- struct intel_context *ce,
- struct guc_ctxt_registration_info *info,
- bool loop)
+static int __guc_action_register_multi_lrc_v69(struct intel_guc *guc,
+ struct intel_context *ce,
+ u32 guc_id,
+ u32 offset,
+ bool loop)
+{
+ struct intel_context *child;
+ u32 action[4 + MAX_ENGINE_INSTANCE];
+ int len = 0;
+
+ GEM_BUG_ON(ce->parallel.number_children > MAX_ENGINE_INSTANCE);
+
+ action[len++] = INTEL_GUC_ACTION_REGISTER_CONTEXT_MULTI_LRC;
+ action[len++] = guc_id;
+ action[len++] = ce->parallel.number_children + 1;
+ action[len++] = offset;
+ for_each_child(ce, child) {
+ offset += sizeof(struct guc_lrc_desc_v69);
+ action[len++] = offset;
+ }
+
+ return guc_submission_send_busy_loop(guc, action, len, 0, loop);
+}
+
+static int __guc_action_register_multi_lrc_v70(struct intel_guc *guc,
+ struct intel_context *ce,
+ struct guc_ctxt_registration_info *info,
+ bool loop)
{
struct intel_context *child;
u32 action[13 + (MAX_ENGINE_INSTANCE * 2)];
@@ -2134,9 +2224,24 @@ static int __guc_action_register_multi_lrc(struct intel_guc *guc,
return guc_submission_send_busy_loop(guc, action, len, 0, loop);
}
-static int __guc_action_register_context(struct intel_guc *guc,
- struct guc_ctxt_registration_info *info,
- bool loop)
+static int __guc_action_register_context_v69(struct intel_guc *guc,
+ u32 guc_id,
+ u32 offset,
+ bool loop)
+{
+ u32 action[] = {
+ INTEL_GUC_ACTION_REGISTER_CONTEXT,
+ guc_id,
+ offset,
+ };
+
+ return guc_submission_send_busy_loop(guc, action, ARRAY_SIZE(action),
+ 0, loop);
+}
+
+static int __guc_action_register_context_v70(struct intel_guc *guc,
+ struct guc_ctxt_registration_info *info,
+ bool loop)
{
u32 action[] = {
INTEL_GUC_ACTION_REGISTER_CONTEXT,
@@ -2157,24 +2262,52 @@ static int __guc_action_register_context(struct intel_guc *guc,
0, loop);
}
-static void prepare_context_registration_info(struct intel_context *ce,
- struct guc_ctxt_registration_info *info);
+static void prepare_context_registration_info_v69(struct intel_context *ce);
+static void prepare_context_registration_info_v70(struct intel_context *ce,
+ struct guc_ctxt_registration_info *info);
-static int register_context(struct intel_context *ce, bool loop)
+static int
+register_context_v69(struct intel_guc *guc, struct intel_context *ce, bool loop)
+{
+ u32 offset = intel_guc_ggtt_offset(guc, guc->lrc_desc_pool_v69) +
+ ce->guc_id.id * sizeof(struct guc_lrc_desc_v69);
+
+ prepare_context_registration_info_v69(ce);
+
+ if (intel_context_is_parent(ce))
+ return __guc_action_register_multi_lrc_v69(guc, ce, ce->guc_id.id,
+ offset, loop);
+ else
+ return __guc_action_register_context_v69(guc, ce->guc_id.id,
+ offset, loop);
+}
+
+static int
+register_context_v70(struct intel_guc *guc, struct intel_context *ce, bool loop)
{
struct guc_ctxt_registration_info info;
+
+ prepare_context_registration_info_v70(ce, &info);
+
+ if (intel_context_is_parent(ce))
+ return __guc_action_register_multi_lrc_v70(guc, ce, &info, loop);
+ else
+ return __guc_action_register_context_v70(guc, &info, loop);
+}
+
+static int register_context(struct intel_context *ce, bool loop)
+{
struct intel_guc *guc = ce_to_guc(ce);
int ret;
GEM_BUG_ON(intel_context_is_child(ce));
trace_intel_context_register(ce);
- prepare_context_registration_info(ce, &info);
-
- if (intel_context_is_parent(ce))
- ret = __guc_action_register_multi_lrc(guc, ce, &info, loop);
+ if (guc->fw.major_ver_found >= 70)
+ ret = register_context_v70(guc, ce, loop);
else
- ret = __guc_action_register_context(guc, &info, loop);
+ ret = register_context_v69(guc, ce, loop);
+
if (likely(!ret)) {
unsigned long flags;
@@ -2182,7 +2315,8 @@ static int register_context(struct intel_context *ce, bool loop)
set_context_registered(ce);
spin_unlock_irqrestore(&ce->guc_state.lock, flags);
- guc_context_policy_init(ce, loop);
+ if (guc->fw.major_ver_found >= 70)
+ guc_context_policy_init_v70(ce, loop);
}
return ret;
@@ -2279,14 +2413,13 @@ static int __guc_context_set_context_policies(struct intel_guc *guc,
0, loop);
}
-static int guc_context_policy_init(struct intel_context *ce, bool loop)
+static int guc_context_policy_init_v70(struct intel_context *ce, bool loop)
{
struct intel_engine_cs *engine = ce->engine;
struct intel_guc *guc = &engine->gt->uc.guc;
struct context_policy policy;
u32 execution_quantum;
u32 preemption_timeout;
- bool missing = false;
unsigned long flags;
int ret;
@@ -2304,32 +2437,9 @@ static int guc_context_policy_init(struct intel_context *ce, bool loop)
__guc_context_policy_add_preempt_to_idle(&policy, 1);
ret = __guc_context_set_context_policies(guc, &policy, loop);
- missing = ret != 0;
-
- if (!missing && intel_context_is_parent(ce)) {
- struct intel_context *child;
-
- for_each_child(ce, child) {
- __guc_context_policy_start_klv(&policy, child->guc_id.id);
-
- if (engine->flags & I915_ENGINE_WANT_FORCED_PREEMPTION)
- __guc_context_policy_add_preempt_to_idle(&policy, 1);
-
- child->guc_state.prio = ce->guc_state.prio;
- __guc_context_policy_add_priority(&policy, ce->guc_state.prio);
- __guc_context_policy_add_execution_quantum(&policy, execution_quantum);
- __guc_context_policy_add_preemption_timeout(&policy, preemption_timeout);
-
- ret = __guc_context_set_context_policies(guc, &policy, loop);
- if (ret) {
- missing = true;
- break;
- }
- }
- }
spin_lock_irqsave(&ce->guc_state.lock, flags);
- if (missing)
+ if (ret != 0)
set_context_policy_required(ce);
else
clr_context_policy_required(ce);
@@ -2338,6 +2448,19 @@ static int guc_context_policy_init(struct intel_context *ce, bool loop)
return ret;
}
+static void guc_context_policy_init_v69(struct intel_engine_cs *engine,
+ struct guc_lrc_desc_v69 *desc)
+{
+ desc->policy_flags = 0;
+
+ if (engine->flags & I915_ENGINE_WANT_FORCED_PREEMPTION)
+ desc->policy_flags |= CONTEXT_POLICY_FLAG_PREEMPT_TO_IDLE_V69;
+
+ /* NB: For both of these, zero means disabled. */
+ desc->execution_quantum = engine->props.timeslice_duration_ms * 1000;
+ desc->preemption_timeout = engine->props.preempt_timeout_ms * 1000;
+}
+
static u32 map_guc_prio_to_lrc_desc_prio(u8 prio)
{
/*
@@ -2358,8 +2481,75 @@ static u32 map_guc_prio_to_lrc_desc_prio(u8 prio)
}
}
-static void prepare_context_registration_info(struct intel_context *ce,
- struct guc_ctxt_registration_info *info)
+static void prepare_context_registration_info_v69(struct intel_context *ce)
+{
+ struct intel_engine_cs *engine = ce->engine;
+ struct intel_guc *guc = &engine->gt->uc.guc;
+ u32 ctx_id = ce->guc_id.id;
+ struct guc_lrc_desc_v69 *desc;
+ struct intel_context *child;
+
+ GEM_BUG_ON(!engine->mask);
+
+ /*
+ * Ensure LRC + CT vmas are is same region as write barrier is done
+ * based on CT vma region.
+ */
+ GEM_BUG_ON(i915_gem_object_is_lmem(guc->ct.vma->obj) !=
+ i915_gem_object_is_lmem(ce->ring->vma->obj));
+
+ desc = __get_lrc_desc_v69(guc, ctx_id);
+ desc->engine_class = engine_class_to_guc_class(engine->class);
+ desc->engine_submit_mask = engine->logical_mask;
+ desc->hw_context_desc = ce->lrc.lrca;
+ desc->priority = ce->guc_state.prio;
+ desc->context_flags = CONTEXT_REGISTRATION_FLAG_KMD;
+ guc_context_policy_init_v69(engine, desc);
+
+ /*
+ * If context is a parent, we need to register a process descriptor
+ * describing a work queue and register all child contexts.
+ */
+ if (intel_context_is_parent(ce)) {
+ struct guc_process_desc_v69 *pdesc;
+
+ ce->parallel.guc.wqi_tail = 0;
+ ce->parallel.guc.wqi_head = 0;
+
+ desc->process_desc = i915_ggtt_offset(ce->state) +
+ __get_parent_scratch_offset(ce);
+ desc->wq_addr = i915_ggtt_offset(ce->state) +
+ __get_wq_offset(ce);
+ desc->wq_size = WQ_SIZE;
+
+ pdesc = __get_process_desc_v69(ce);
+ memset(pdesc, 0, sizeof(*(pdesc)));
+ pdesc->stage_id = ce->guc_id.id;
+ pdesc->wq_base_addr = desc->wq_addr;
+ pdesc->wq_size_bytes = desc->wq_size;
+ pdesc->wq_status = WQ_STATUS_ACTIVE;
+
+ ce->parallel.guc.wq_head = &pdesc->head;
+ ce->parallel.guc.wq_tail = &pdesc->tail;
+ ce->parallel.guc.wq_status = &pdesc->wq_status;
+
+ for_each_child(ce, child) {
+ desc = __get_lrc_desc_v69(guc, child->guc_id.id);
+
+ desc->engine_class =
+ engine_class_to_guc_class(engine->class);
+ desc->hw_context_desc = child->lrc.lrca;
+ desc->priority = ce->guc_state.prio;
+ desc->context_flags = CONTEXT_REGISTRATION_FLAG_KMD;
+ guc_context_policy_init_v69(engine, desc);
+ }
+
+ clear_children_join_go_memory(ce);
+ }
+}
+
+static void prepare_context_registration_info_v70(struct intel_context *ce,
+ struct guc_ctxt_registration_info *info)
{
struct intel_engine_cs *engine = ce->engine;
struct intel_guc *guc = &engine->gt->uc.guc;
@@ -2409,10 +2599,14 @@ static void prepare_context_registration_info(struct intel_context *ce,
info->wq_base_hi = upper_32_bits(wq_base_offset);
info->wq_size = WQ_SIZE;
- wq_desc = __get_wq_desc(ce);
+ wq_desc = __get_wq_desc_v70(ce);
memset(wq_desc, 0, sizeof(*wq_desc));
wq_desc->wq_status = WQ_STATUS_ACTIVE;
+ ce->parallel.guc.wq_head = &wq_desc->head;
+ ce->parallel.guc.wq_tail = &wq_desc->tail;
+ ce->parallel.guc.wq_status = &wq_desc->wq_status;
+
clear_children_join_go_memory(ce);
}
}
@@ -2727,11 +2921,21 @@ static void __guc_context_set_preemption_timeout(struct intel_guc *guc,
u16 guc_id,
u32 preemption_timeout)
{
- struct context_policy policy;
+ if (guc->fw.major_ver_found >= 70) {
+ struct context_policy policy;
- __guc_context_policy_start_klv(&policy, guc_id);
- __guc_context_policy_add_preemption_timeout(&policy, preemption_timeout);
- __guc_context_set_context_policies(guc, &policy, true);
+ __guc_context_policy_start_klv(&policy, guc_id);
+ __guc_context_policy_add_preemption_timeout(&policy, preemption_timeout);
+ __guc_context_set_context_policies(guc, &policy, true);
+ } else {
+ u32 action[] = {
+ INTEL_GUC_ACTION_V69_SET_CONTEXT_PREEMPTION_TIMEOUT,
+ guc_id,
+ preemption_timeout
+ };
+
+ intel_guc_send_busy_loop(guc, action, ARRAY_SIZE(action), 0, true);
+ }
}
static void
@@ -2982,11 +3186,21 @@ static int guc_context_alloc(struct intel_context *ce)
static void __guc_context_set_prio(struct intel_guc *guc,
struct intel_context *ce)
{
- struct context_policy policy;
+ if (guc->fw.major_ver_found >= 70) {
+ struct context_policy policy;
- __guc_context_policy_start_klv(&policy, ce->guc_id.id);
- __guc_context_policy_add_priority(&policy, ce->guc_state.prio);
- __guc_context_set_context_policies(guc, &policy, true);
+ __guc_context_policy_start_klv(&policy, ce->guc_id.id);
+ __guc_context_policy_add_priority(&policy, ce->guc_state.prio);
+ __guc_context_set_context_policies(guc, &policy, true);
+ } else {
+ u32 action[] = {
+ INTEL_GUC_ACTION_V69_SET_CONTEXT_PRIORITY,
+ ce->guc_id.id,
+ ce->guc_state.prio,
+ };
+
+ guc_submission_send_busy_loop(guc, action, ARRAY_SIZE(action), 0, true);
+ }
}
static void guc_context_set_prio(struct intel_guc *guc,
@@ -3953,13 +4167,27 @@ int intel_guc_submission_setup(struct intel_engine_cs *engine)
void intel_guc_submission_enable(struct intel_guc *guc)
{
+ struct intel_gt *gt = guc_to_gt(guc);
+
+ /* Enable and route to GuC */
+ if (GRAPHICS_VER(gt->i915) >= 12)
+ intel_uncore_write(gt->uncore, GEN12_GUC_SEM_INTR_ENABLES,
+ GUC_SEM_INTR_ROUTE_TO_GUC |
+ GUC_SEM_INTR_ENABLE_ALL);
+
guc_init_lrc_mapping(guc);
guc_init_engine_stats(guc);
}
void intel_guc_submission_disable(struct intel_guc *guc)
{
+ struct intel_gt *gt = guc_to_gt(guc);
+
/* Note: By the time we're here, GuC may have already been reset */
+
+ /* Disable and route to host */
+ if (GRAPHICS_VER(gt->i915) >= 12)
+ intel_uncore_write(gt->uncore, GEN12_GUC_SEM_INTR_ENABLES, 0x0);
}
static bool __guc_submission_supported(struct intel_guc *guc)
@@ -4496,17 +4724,19 @@ void intel_guc_submission_print_context_info(struct intel_guc *guc,
guc_log_context_priority(p, ce);
if (intel_context_is_parent(ce)) {
- struct guc_sched_wq_desc *wq_desc = __get_wq_desc(ce);
struct intel_context *child;
drm_printf(p, "\t\tNumber children: %u\n",
ce->parallel.number_children);
- drm_printf(p, "\t\tWQI Head: %u\n",
- READ_ONCE(wq_desc->head));
- drm_printf(p, "\t\tWQI Tail: %u\n",
- READ_ONCE(wq_desc->tail));
- drm_printf(p, "\t\tWQI Status: %u\n\n",
- READ_ONCE(wq_desc->wq_status));
+
+ if (ce->parallel.guc.wq_status) {
+ drm_printf(p, "\t\tWQI Head: %u\n",
+ READ_ONCE(*ce->parallel.guc.wq_head));
+ drm_printf(p, "\t\tWQI Tail: %u\n",
+ READ_ONCE(*ce->parallel.guc.wq_tail));
+ drm_printf(p, "\t\tWQI Status: %u\n\n",
+ READ_ONCE(*ce->parallel.guc.wq_status));
+ }
if (ce->engine->emit_bb_start ==
emit_bb_start_parent_no_preempt_mid_batch) {
@@ -4923,4 +5153,5 @@ bool intel_guc_virtual_engine_has_heartbeat(const struct intel_engine_cs *ve)
#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
#include "selftest_guc.c"
#include "selftest_guc_multi_lrc.c"
+#include "selftest_guc_hangcheck.c"
#endif
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c
index 27363091e1af..58547292efa0 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c
@@ -53,7 +53,7 @@ void intel_uc_fw_change_status(struct intel_uc_fw *uc_fw,
* firmware as TGL.
*/
#define INTEL_GUC_FIRMWARE_DEFS(fw_def, guc_def) \
- fw_def(DG2, 0, guc_def(dg2, 70, 1, 2)) \
+ fw_def(DG2, 0, guc_def(dg2, 70, 4, 1)) \
fw_def(ALDERLAKE_P, 0, guc_def(adlp, 70, 1, 1)) \
fw_def(ALDERLAKE_S, 0, guc_def(tgl, 70, 1, 1)) \
fw_def(DG1, 0, guc_def(dg1, 70, 1, 1)) \
@@ -70,6 +70,10 @@ void intel_uc_fw_change_status(struct intel_uc_fw *uc_fw,
fw_def(BROXTON, 0, guc_def(bxt, 70, 1, 1)) \
fw_def(SKYLAKE, 0, guc_def(skl, 70, 1, 1))
+#define INTEL_GUC_FIRMWARE_DEFS_FALLBACK(fw_def, guc_def) \
+ fw_def(ALDERLAKE_P, 0, guc_def(adlp, 69, 0, 3)) \
+ fw_def(ALDERLAKE_S, 0, guc_def(tgl, 69, 0, 3))
+
#define INTEL_HUC_FIRMWARE_DEFS(fw_def, huc_def) \
fw_def(ALDERLAKE_P, 0, huc_def(tgl, 7, 9, 3)) \
fw_def(ALDERLAKE_S, 0, huc_def(tgl, 7, 9, 3)) \
@@ -105,6 +109,7 @@ void intel_uc_fw_change_status(struct intel_uc_fw *uc_fw,
MODULE_FIRMWARE(uc_);
INTEL_GUC_FIRMWARE_DEFS(INTEL_UC_MODULE_FW, MAKE_GUC_FW_PATH)
+INTEL_GUC_FIRMWARE_DEFS_FALLBACK(INTEL_UC_MODULE_FW, MAKE_GUC_FW_PATH)
INTEL_HUC_FIRMWARE_DEFS(INTEL_UC_MODULE_FW, MAKE_HUC_FW_PATH)
/* The below structs and macros are used to iterate across the list of blobs */
@@ -149,6 +154,9 @@ __uc_fw_auto_select(struct drm_i915_private *i915, struct intel_uc_fw *uc_fw)
static const struct uc_fw_platform_requirement blobs_guc[] = {
INTEL_GUC_FIRMWARE_DEFS(MAKE_FW_LIST, GUC_FW_BLOB)
};
+ static const struct uc_fw_platform_requirement blobs_guc_fallback[] = {
+ INTEL_GUC_FIRMWARE_DEFS_FALLBACK(MAKE_FW_LIST, GUC_FW_BLOB)
+ };
static const struct uc_fw_platform_requirement blobs_huc[] = {
INTEL_HUC_FIRMWARE_DEFS(MAKE_FW_LIST, HUC_FW_BLOB)
};
@@ -179,12 +187,29 @@ __uc_fw_auto_select(struct drm_i915_private *i915, struct intel_uc_fw *uc_fw)
if (p == fw_blobs[i].p && rev >= fw_blobs[i].rev) {
const struct uc_fw_blob *blob = &fw_blobs[i].blob;
uc_fw->path = blob->path;
+ uc_fw->wanted_path = blob->path;
uc_fw->major_ver_wanted = blob->major;
uc_fw->minor_ver_wanted = blob->minor;
break;
}
}
+ if (uc_fw->type == INTEL_UC_FW_TYPE_GUC) {
+ const struct uc_fw_platform_requirement *blobs = blobs_guc_fallback;
+ u32 count = ARRAY_SIZE(blobs_guc_fallback);
+
+ for (i = 0; i < count && p <= blobs[i].p; i++) {
+ if (p == blobs[i].p && rev >= blobs[i].rev) {
+ const struct uc_fw_blob *blob = &blobs[i].blob;
+
+ uc_fw->fallback.path = blob->path;
+ uc_fw->fallback.major_ver = blob->major;
+ uc_fw->fallback.minor_ver = blob->minor;
+ break;
+ }
+ }
+ }
+
/* make sure the list is ordered as expected */
if (IS_ENABLED(CONFIG_DRM_I915_SELFTEST)) {
for (i = 1; i < fw_count; i++) {
@@ -195,11 +220,11 @@ __uc_fw_auto_select(struct drm_i915_private *i915, struct intel_uc_fw *uc_fw)
fw_blobs[i].rev < fw_blobs[i - 1].rev)
continue;
- pr_err("invalid FW blob order: %s r%u comes before %s r%u\n",
- intel_platform_name(fw_blobs[i - 1].p),
- fw_blobs[i - 1].rev,
- intel_platform_name(fw_blobs[i].p),
- fw_blobs[i].rev);
+ drm_err(&i915->drm, "Invalid FW blob order: %s r%u comes before %s r%u\n",
+ intel_platform_name(fw_blobs[i - 1].p),
+ fw_blobs[i - 1].rev,
+ intel_platform_name(fw_blobs[i].p),
+ fw_blobs[i].rev);
uc_fw->path = NULL;
}
@@ -412,7 +437,24 @@ int intel_uc_fw_fetch(struct intel_uc_fw *uc_fw)
__force_fw_fetch_failures(uc_fw, -EINVAL);
__force_fw_fetch_failures(uc_fw, -ESTALE);
- err = request_firmware(&fw, uc_fw->path, dev);
+ err = firmware_request_nowarn(&fw, uc_fw->path, dev);
+ if (err && !intel_uc_fw_is_overridden(uc_fw) && uc_fw->fallback.path) {
+ err = firmware_request_nowarn(&fw, uc_fw->fallback.path, dev);
+ if (!err) {
+ drm_notice(&i915->drm,
+ "%s firmware %s is recommended, but only %s was found\n",
+ intel_uc_fw_type_repr(uc_fw->type),
+ uc_fw->wanted_path,
+ uc_fw->fallback.path);
+ drm_info(&i915->drm,
+ "Consider updating your linux-firmware pkg or downloading from %s\n",
+ INTEL_UC_FIRMWARE_URL);
+
+ uc_fw->path = uc_fw->fallback.path;
+ uc_fw->major_ver_wanted = uc_fw->fallback.major_ver;
+ uc_fw->minor_ver_wanted = uc_fw->fallback.minor_ver;
+ }
+ }
if (err)
goto fail;
@@ -460,8 +502,8 @@ fail:
INTEL_UC_FIRMWARE_MISSING :
INTEL_UC_FIRMWARE_ERROR);
- drm_notice(&i915->drm, "%s firmware %s: fetch failed with error %d\n",
- intel_uc_fw_type_repr(uc_fw->type), uc_fw->path, err);
+ i915_probe_error(i915, "%s firmware %s: fetch failed with error %d\n",
+ intel_uc_fw_type_repr(uc_fw->type), uc_fw->path, err);
drm_info(&i915->drm, "%s firmware(s) can be downloaded from %s\n",
intel_uc_fw_type_repr(uc_fw->type), INTEL_UC_FIRMWARE_URL);
@@ -822,7 +864,13 @@ size_t intel_uc_fw_copy_rsa(struct intel_uc_fw *uc_fw, void *dst, u32 max_len)
void intel_uc_fw_dump(const struct intel_uc_fw *uc_fw, struct drm_printer *p)
{
drm_printf(p, "%s firmware: %s\n",
- intel_uc_fw_type_repr(uc_fw->type), uc_fw->path);
+ intel_uc_fw_type_repr(uc_fw->type), uc_fw->wanted_path);
+ if (uc_fw->fallback.path) {
+ drm_printf(p, "%s firmware fallback: %s\n",
+ intel_uc_fw_type_repr(uc_fw->type), uc_fw->fallback.path);
+ drm_printf(p, "fallback selected: %s\n",
+ str_yes_no(uc_fw->path == uc_fw->fallback.path));
+ }
drm_printf(p, "\tstatus: %s\n",
intel_uc_fw_status_repr(uc_fw->status));
drm_printf(p, "\tversion: wanted %u.%u, found %u.%u\n",
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h
index 4f169035f504..7aa2644400b9 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h
@@ -74,6 +74,7 @@ struct intel_uc_fw {
const enum intel_uc_fw_status status;
enum intel_uc_fw_status __status; /* no accidental overwrites */
};
+ const char *wanted_path;
const char *path;
bool user_overridden;
size_t size;
@@ -98,6 +99,12 @@ struct intel_uc_fw {
u16 major_ver_found;
u16 minor_ver_found;
+ struct {
+ const char *path;
+ u16 major_ver;
+ u16 minor_ver;
+ } fallback;
+
u32 rsa_size;
u32 ucode_size;
diff --git a/drivers/gpu/drm/i915/gt/uc/selftest_guc.c b/drivers/gpu/drm/i915/gt/uc/selftest_guc.c
index 1df71d0796ae..20e0c39259fb 100644
--- a/drivers/gpu/drm/i915/gt/uc/selftest_guc.c
+++ b/drivers/gpu/drm/i915/gt/uc/selftest_guc.c
@@ -62,7 +62,7 @@ static int intel_guc_scrub_ctbs(void *arg)
ce = intel_context_create(engine);
if (IS_ERR(ce)) {
ret = PTR_ERR(ce);
- pr_err("Failed to create context, %d: %d\n", i, ret);
+ drm_err(&gt->i915->drm, "Failed to create context, %d: %d\n", i, ret);
goto err;
}
@@ -83,7 +83,7 @@ static int intel_guc_scrub_ctbs(void *arg)
if (IS_ERR(rq)) {
ret = PTR_ERR(rq);
- pr_err("Failed to create request, %d: %d\n", i, ret);
+ drm_err(&gt->i915->drm, "Failed to create request, %d: %d\n", i, ret);
goto err;
}
@@ -93,7 +93,7 @@ static int intel_guc_scrub_ctbs(void *arg)
for (i = 0; i < 3; ++i) {
ret = i915_request_wait(last[i], 0, HZ);
if (ret < 0) {
- pr_err("Last request failed to complete: %d\n", ret);
+ drm_err(&gt->i915->drm, "Last request failed to complete: %d\n", ret);
goto err;
}
i915_request_put(last[i]);
@@ -110,7 +110,7 @@ static int intel_guc_scrub_ctbs(void *arg)
/* GT will not idle if G2H are lost */
ret = intel_gt_wait_for_idle(gt, HZ);
if (ret < 0) {
- pr_err("GT failed to idle: %d\n", ret);
+ drm_err(&gt->i915->drm, "GT failed to idle: %d\n", ret);
goto err;
}
@@ -150,7 +150,7 @@ static int intel_guc_steal_guc_ids(void *arg)
ce = kcalloc(GUC_MAX_CONTEXT_ID, sizeof(*ce), GFP_KERNEL);
if (!ce) {
- pr_err("Context array allocation failed\n");
+ drm_err(&gt->i915->drm, "Context array allocation failed\n");
return -ENOMEM;
}
@@ -164,24 +164,24 @@ static int intel_guc_steal_guc_ids(void *arg)
if (IS_ERR(ce[context_index])) {
ret = PTR_ERR(ce[context_index]);
ce[context_index] = NULL;
- pr_err("Failed to create context: %d\n", ret);
+ drm_err(&gt->i915->drm, "Failed to create context: %d\n", ret);
goto err_wakeref;
}
ret = igt_spinner_init(&spin, engine->gt);
if (ret) {
- pr_err("Failed to create spinner: %d\n", ret);
+ drm_err(&gt->i915->drm, "Failed to create spinner: %d\n", ret);
goto err_contexts;
}
spin_rq = igt_spinner_create_request(&spin, ce[context_index],
MI_ARB_CHECK);
if (IS_ERR(spin_rq)) {
ret = PTR_ERR(spin_rq);
- pr_err("Failed to create spinner request: %d\n", ret);
+ drm_err(&gt->i915->drm, "Failed to create spinner request: %d\n", ret);
goto err_contexts;
}
ret = request_add_spin(spin_rq, &spin);
if (ret) {
- pr_err("Failed to add Spinner request: %d\n", ret);
+ drm_err(&gt->i915->drm, "Failed to add Spinner request: %d\n", ret);
goto err_spin_rq;
}
@@ -191,7 +191,7 @@ static int intel_guc_steal_guc_ids(void *arg)
if (IS_ERR(ce[context_index])) {
ret = PTR_ERR(ce[context_index--]);
ce[context_index] = NULL;
- pr_err("Failed to create context: %d\n", ret);
+ drm_err(&gt->i915->drm, "Failed to create context: %d\n", ret);
goto err_spin_rq;
}
@@ -200,8 +200,8 @@ static int intel_guc_steal_guc_ids(void *arg)
ret = PTR_ERR(rq);
rq = NULL;
if (ret != -EAGAIN) {
- pr_err("Failed to create request, %d: %d\n",
- context_index, ret);
+ drm_err(&gt->i915->drm, "Failed to create request, %d: %d\n",
+ context_index, ret);
goto err_spin_rq;
}
} else {
@@ -215,7 +215,7 @@ static int intel_guc_steal_guc_ids(void *arg)
igt_spinner_end(&spin);
ret = intel_selftest_wait_for_rq(spin_rq);
if (ret) {
- pr_err("Spin request failed to complete: %d\n", ret);
+ drm_err(&gt->i915->drm, "Spin request failed to complete: %d\n", ret);
i915_request_put(last);
goto err_spin_rq;
}
@@ -227,7 +227,7 @@ static int intel_guc_steal_guc_ids(void *arg)
ret = i915_request_wait(last, 0, HZ * 30);
i915_request_put(last);
if (ret < 0) {
- pr_err("Last request failed to complete: %d\n", ret);
+ drm_err(&gt->i915->drm, "Last request failed to complete: %d\n", ret);
goto err_spin_rq;
}
@@ -235,7 +235,7 @@ static int intel_guc_steal_guc_ids(void *arg)
rq = nop_user_request(ce[context_index], NULL);
if (IS_ERR(rq)) {
ret = PTR_ERR(rq);
- pr_err("Failed to steal guc_id, %d: %d\n", context_index, ret);
+ drm_err(&gt->i915->drm, "Failed to steal guc_id, %d: %d\n", context_index, ret);
goto err_spin_rq;
}
@@ -243,21 +243,20 @@ static int intel_guc_steal_guc_ids(void *arg)
ret = i915_request_wait(rq, 0, HZ);
i915_request_put(rq);
if (ret < 0) {
- pr_err("Request with stolen guc_id failed to complete: %d\n",
- ret);
+ drm_err(&gt->i915->drm, "Request with stolen guc_id failed to complete: %d\n", ret);
goto err_spin_rq;
}
/* Wait for idle */
ret = intel_gt_wait_for_idle(gt, HZ * 30);
if (ret < 0) {
- pr_err("GT failed to idle: %d\n", ret);
+ drm_err(&gt->i915->drm, "GT failed to idle: %d\n", ret);
goto err_spin_rq;
}
/* Verify a guc_id was stolen */
if (guc->number_guc_id_stolen == number_guc_id_stolen) {
- pr_err("No guc_id was stolen");
+ drm_err(&gt->i915->drm, "No guc_id was stolen");
ret = -EINVAL;
} else {
ret = 0;
diff --git a/drivers/gpu/drm/i915/gt/uc/selftest_guc_hangcheck.c b/drivers/gpu/drm/i915/gt/uc/selftest_guc_hangcheck.c
new file mode 100644
index 000000000000..01f8cd3c3134
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/uc/selftest_guc_hangcheck.c
@@ -0,0 +1,159 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#include "selftests/igt_spinner.h"
+#include "selftests/igt_reset.h"
+#include "selftests/intel_scheduler_helpers.h"
+#include "gt/intel_engine_heartbeat.h"
+#include "gem/selftests/mock_context.h"
+
+#define BEAT_INTERVAL 100
+
+static struct i915_request *nop_request(struct intel_engine_cs *engine)
+{
+ struct i915_request *rq;
+
+ rq = intel_engine_create_kernel_request(engine);
+ if (IS_ERR(rq))
+ return rq;
+
+ i915_request_get(rq);
+ i915_request_add(rq);
+
+ return rq;
+}
+
+static int intel_hang_guc(void *arg)
+{
+ struct intel_gt *gt = arg;
+ int ret = 0;
+ struct i915_gem_context *ctx;
+ struct intel_context *ce;
+ struct igt_spinner spin;
+ struct i915_request *rq;
+ intel_wakeref_t wakeref;
+ struct i915_gpu_error *global = &gt->i915->gpu_error;
+ struct intel_engine_cs *engine;
+ unsigned int reset_count;
+ u32 guc_status;
+ u32 old_beat;
+
+ ctx = kernel_context(gt->i915, NULL);
+ if (IS_ERR(ctx)) {
+ drm_err(&gt->i915->drm, "Failed get kernel context: %ld\n", PTR_ERR(ctx));
+ return PTR_ERR(ctx);
+ }
+
+ wakeref = intel_runtime_pm_get(gt->uncore->rpm);
+
+ ce = intel_context_create(gt->engine[BCS0]);
+ if (IS_ERR(ce)) {
+ ret = PTR_ERR(ce);
+ drm_err(&gt->i915->drm, "Failed to create spinner request: %d\n", ret);
+ goto err;
+ }
+
+ engine = ce->engine;
+ reset_count = i915_reset_count(global);
+
+ old_beat = engine->props.heartbeat_interval_ms;
+ ret = intel_engine_set_heartbeat(engine, BEAT_INTERVAL);
+ if (ret) {
+ drm_err(&gt->i915->drm, "Failed to boost heatbeat interval: %d\n", ret);
+ goto err;
+ }
+
+ ret = igt_spinner_init(&spin, engine->gt);
+ if (ret) {
+ drm_err(&gt->i915->drm, "Failed to create spinner: %d\n", ret);
+ goto err;
+ }
+
+ rq = igt_spinner_create_request(&spin, ce, MI_NOOP);
+ intel_context_put(ce);
+ if (IS_ERR(rq)) {
+ ret = PTR_ERR(rq);
+ drm_err(&gt->i915->drm, "Failed to create spinner request: %d\n", ret);
+ goto err_spin;
+ }
+
+ ret = request_add_spin(rq, &spin);
+ if (ret) {
+ i915_request_put(rq);
+ drm_err(&gt->i915->drm, "Failed to add Spinner request: %d\n", ret);
+ goto err_spin;
+ }
+
+ ret = intel_reset_guc(gt);
+ if (ret) {
+ i915_request_put(rq);
+ drm_err(&gt->i915->drm, "Failed to reset GuC, ret = %d\n", ret);
+ goto err_spin;
+ }
+
+ guc_status = intel_uncore_read(gt->uncore, GUC_STATUS);
+ if (!(guc_status & GS_MIA_IN_RESET)) {
+ i915_request_put(rq);
+ drm_err(&gt->i915->drm, "GuC failed to reset: status = 0x%08X\n", guc_status);
+ ret = -EIO;
+ goto err_spin;
+ }
+
+ /* Wait for the heartbeat to cause a reset */
+ ret = intel_selftest_wait_for_rq(rq);
+ i915_request_put(rq);
+ if (ret) {
+ drm_err(&gt->i915->drm, "Request failed to complete: %d\n", ret);
+ goto err_spin;
+ }
+
+ if (i915_reset_count(global) == reset_count) {
+ drm_err(&gt->i915->drm, "Failed to record a GPU reset\n");
+ ret = -EINVAL;
+ goto err_spin;
+ }
+
+err_spin:
+ igt_spinner_end(&spin);
+ igt_spinner_fini(&spin);
+ intel_engine_set_heartbeat(engine, old_beat);
+
+ if (ret == 0) {
+ rq = nop_request(engine);
+ if (IS_ERR(rq)) {
+ ret = PTR_ERR(rq);
+ goto err;
+ }
+
+ ret = intel_selftest_wait_for_rq(rq);
+ i915_request_put(rq);
+ if (ret) {
+ drm_err(&gt->i915->drm, "No-op failed to complete: %d\n", ret);
+ goto err;
+ }
+ }
+
+err:
+ intel_runtime_pm_put(gt->uncore->rpm, wakeref);
+ kernel_context_close(ctx);
+
+ return ret;
+}
+
+int intel_guc_hang_check(struct drm_i915_private *i915)
+{
+ static const struct i915_subtest tests[] = {
+ SUBTEST(intel_hang_guc),
+ };
+ struct intel_gt *gt = to_gt(i915);
+
+ if (intel_gt_is_wedged(gt))
+ return 0;
+
+ if (!intel_uc_uses_guc_submission(&gt->uc))
+ return 0;
+
+ return intel_gt_live_subtests(tests, gt);
+}
diff --git a/drivers/gpu/drm/i915/gt/uc/selftest_guc_multi_lrc.c b/drivers/gpu/drm/i915/gt/uc/selftest_guc_multi_lrc.c
index 812220a43df8..d17982c36d25 100644
--- a/drivers/gpu/drm/i915/gt/uc/selftest_guc_multi_lrc.c
+++ b/drivers/gpu/drm/i915/gt/uc/selftest_guc_multi_lrc.c
@@ -115,30 +115,30 @@ static int __intel_guc_multi_lrc_basic(struct intel_gt *gt, unsigned int class)
parent = multi_lrc_create_parent(gt, class, 0);
if (IS_ERR(parent)) {
- pr_err("Failed creating contexts: %ld", PTR_ERR(parent));
+ drm_err(&gt->i915->drm, "Failed creating contexts: %ld", PTR_ERR(parent));
return PTR_ERR(parent);
} else if (!parent) {
- pr_debug("Not enough engines in class: %d", class);
+ drm_dbg(&gt->i915->drm, "Not enough engines in class: %d", class);
return 0;
}
rq = multi_lrc_nop_request(parent);
if (IS_ERR(rq)) {
ret = PTR_ERR(rq);
- pr_err("Failed creating requests: %d", ret);
+ drm_err(&gt->i915->drm, "Failed creating requests: %d", ret);
goto out;
}
ret = intel_selftest_wait_for_rq(rq);
if (ret)
- pr_err("Failed waiting on request: %d", ret);
+ drm_err(&gt->i915->drm, "Failed waiting on request: %d", ret);
i915_request_put(rq);
if (ret >= 0) {
ret = intel_gt_wait_for_idle(gt, HZ * 5);
if (ret < 0)
- pr_err("GT failed to idle: %d\n", ret);
+ drm_err(&gt->i915->drm, "GT failed to idle: %d\n", ret);
}
out:
diff --git a/drivers/gpu/drm/i915/gvt/cmd_parser.c b/drivers/gpu/drm/i915/gvt/cmd_parser.c
index 0ba2a3455d99..de13f102d4fd 100644
--- a/drivers/gpu/drm/i915/gvt/cmd_parser.c
+++ b/drivers/gpu/drm/i915/gvt/cmd_parser.c
@@ -3117,9 +3117,9 @@ void intel_gvt_update_reg_whitelist(struct intel_vgpu *vgpu)
continue;
vaddr = shmem_pin_map(engine->default_state);
- if (IS_ERR(vaddr)) {
- gvt_err("failed to map %s->default state, err:%zd\n",
- engine->name, PTR_ERR(vaddr));
+ if (!vaddr) {
+ gvt_err("failed to map %s->default state\n",
+ engine->name);
return;
}
diff --git a/drivers/gpu/drm/i915/gvt/gvt.h b/drivers/gpu/drm/i915/gvt/gvt.h
index aee1a45da74b..705689e64011 100644
--- a/drivers/gpu/drm/i915/gvt/gvt.h
+++ b/drivers/gpu/drm/i915/gvt/gvt.h
@@ -226,7 +226,6 @@ struct intel_vgpu {
unsigned long nr_cache_entries;
struct mutex cache_lock;
- struct notifier_block iommu_notifier;
atomic_t released;
struct kvm_page_track_notifier_node track_node;
diff --git a/drivers/gpu/drm/i915/gvt/kvmgt.c b/drivers/gpu/drm/i915/gvt/kvmgt.c
index e2f6c56ab342..e3cd58946477 100644
--- a/drivers/gpu/drm/i915/gvt/kvmgt.c
+++ b/drivers/gpu/drm/i915/gvt/kvmgt.c
@@ -231,57 +231,38 @@ static void intel_gvt_cleanup_vgpu_type_groups(struct intel_gvt *gvt)
static void gvt_unpin_guest_page(struct intel_vgpu *vgpu, unsigned long gfn,
unsigned long size)
{
- struct drm_i915_private *i915 = vgpu->gvt->gt->i915;
- int total_pages;
- int npage;
- int ret;
-
- total_pages = roundup(size, PAGE_SIZE) / PAGE_SIZE;
-
- for (npage = 0; npage < total_pages; npage++) {
- unsigned long cur_gfn = gfn + npage;
-
- ret = vfio_unpin_pages(&vgpu->vfio_device, &cur_gfn, 1);
- drm_WARN_ON(&i915->drm, ret != 1);
- }
+ vfio_unpin_pages(&vgpu->vfio_device, gfn << PAGE_SHIFT,
+ DIV_ROUND_UP(size, PAGE_SIZE));
}
/* Pin a normal or compound guest page for dma. */
static int gvt_pin_guest_page(struct intel_vgpu *vgpu, unsigned long gfn,
unsigned long size, struct page **page)
{
- unsigned long base_pfn = 0;
- int total_pages;
+ int total_pages = DIV_ROUND_UP(size, PAGE_SIZE);
+ struct page *base_page = NULL;
int npage;
int ret;
- total_pages = roundup(size, PAGE_SIZE) / PAGE_SIZE;
/*
* We pin the pages one-by-one to avoid allocating a big arrary
* on stack to hold pfns.
*/
for (npage = 0; npage < total_pages; npage++) {
- unsigned long cur_gfn = gfn + npage;
- unsigned long pfn;
+ dma_addr_t cur_iova = (gfn + npage) << PAGE_SHIFT;
+ struct page *cur_page;
- ret = vfio_pin_pages(&vgpu->vfio_device, &cur_gfn, 1,
- IOMMU_READ | IOMMU_WRITE, &pfn);
+ ret = vfio_pin_pages(&vgpu->vfio_device, cur_iova, 1,
+ IOMMU_READ | IOMMU_WRITE, &cur_page);
if (ret != 1) {
- gvt_vgpu_err("vfio_pin_pages failed for gfn 0x%lx, ret %d\n",
- cur_gfn, ret);
- goto err;
- }
-
- if (!pfn_valid(pfn)) {
- gvt_vgpu_err("pfn 0x%lx is not mem backed\n", pfn);
- npage++;
- ret = -EFAULT;
+ gvt_vgpu_err("vfio_pin_pages failed for iova %pad, ret %d\n",
+ &cur_iova, ret);
goto err;
}
if (npage == 0)
- base_pfn = pfn;
- else if (base_pfn + npage != pfn) {
+ base_page = cur_page;
+ else if (base_page + npage != cur_page) {
gvt_vgpu_err("The pages are not continuous\n");
ret = -EINVAL;
npage++;
@@ -289,7 +270,7 @@ static int gvt_pin_guest_page(struct intel_vgpu *vgpu, unsigned long gfn,
}
}
- *page = pfn_to_page(base_pfn);
+ *page = base_page;
return 0;
err:
gvt_unpin_guest_page(vgpu, gfn, npage * PAGE_SIZE);
@@ -729,34 +710,25 @@ int intel_gvt_set_edid(struct intel_vgpu *vgpu, int port_num)
return ret;
}
-static int intel_vgpu_iommu_notifier(struct notifier_block *nb,
- unsigned long action, void *data)
+static void intel_vgpu_dma_unmap(struct vfio_device *vfio_dev, u64 iova,
+ u64 length)
{
- struct intel_vgpu *vgpu =
- container_of(nb, struct intel_vgpu, iommu_notifier);
-
- if (action == VFIO_IOMMU_NOTIFY_DMA_UNMAP) {
- struct vfio_iommu_type1_dma_unmap *unmap = data;
- struct gvt_dma *entry;
- unsigned long iov_pfn, end_iov_pfn;
+ struct intel_vgpu *vgpu = vfio_dev_to_vgpu(vfio_dev);
+ struct gvt_dma *entry;
+ u64 iov_pfn = iova >> PAGE_SHIFT;
+ u64 end_iov_pfn = iov_pfn + length / PAGE_SIZE;
- iov_pfn = unmap->iova >> PAGE_SHIFT;
- end_iov_pfn = iov_pfn + unmap->size / PAGE_SIZE;
+ mutex_lock(&vgpu->cache_lock);
+ for (; iov_pfn < end_iov_pfn; iov_pfn++) {
+ entry = __gvt_cache_find_gfn(vgpu, iov_pfn);
+ if (!entry)
+ continue;
- mutex_lock(&vgpu->cache_lock);
- for (; iov_pfn < end_iov_pfn; iov_pfn++) {
- entry = __gvt_cache_find_gfn(vgpu, iov_pfn);
- if (!entry)
- continue;
-
- gvt_dma_unmap_page(vgpu, entry->gfn, entry->dma_addr,
- entry->size);
- __gvt_cache_remove_entry(vgpu, entry);
- }
- mutex_unlock(&vgpu->cache_lock);
+ gvt_dma_unmap_page(vgpu, entry->gfn, entry->dma_addr,
+ entry->size);
+ __gvt_cache_remove_entry(vgpu, entry);
}
-
- return NOTIFY_OK;
+ mutex_unlock(&vgpu->cache_lock);
}
static bool __kvmgt_vgpu_exist(struct intel_vgpu *vgpu)
@@ -783,36 +755,20 @@ out:
static int intel_vgpu_open_device(struct vfio_device *vfio_dev)
{
struct intel_vgpu *vgpu = vfio_dev_to_vgpu(vfio_dev);
- unsigned long events;
- int ret;
-
- vgpu->iommu_notifier.notifier_call = intel_vgpu_iommu_notifier;
-
- events = VFIO_IOMMU_NOTIFY_DMA_UNMAP;
- ret = vfio_register_notifier(vfio_dev, VFIO_IOMMU_NOTIFY, &events,
- &vgpu->iommu_notifier);
- if (ret != 0) {
- gvt_vgpu_err("vfio_register_notifier for iommu failed: %d\n",
- ret);
- goto out;
- }
- ret = -EEXIST;
if (vgpu->attached)
- goto undo_iommu;
+ return -EEXIST;
- ret = -ESRCH;
if (!vgpu->vfio_device.kvm ||
vgpu->vfio_device.kvm->mm != current->mm) {
gvt_vgpu_err("KVM is required to use Intel vGPU\n");
- goto undo_iommu;
+ return -ESRCH;
}
kvm_get_kvm(vgpu->vfio_device.kvm);
- ret = -EEXIST;
if (__kvmgt_vgpu_exist(vgpu))
- goto undo_iommu;
+ return -EEXIST;
vgpu->attached = true;
@@ -831,12 +787,6 @@ static int intel_vgpu_open_device(struct vfio_device *vfio_dev)
atomic_set(&vgpu->released, 0);
return 0;
-
-undo_iommu:
- vfio_unregister_notifier(vfio_dev, VFIO_IOMMU_NOTIFY,
- &vgpu->iommu_notifier);
-out:
- return ret;
}
static void intel_vgpu_release_msi_eventfd_ctx(struct intel_vgpu *vgpu)
@@ -853,8 +803,6 @@ static void intel_vgpu_release_msi_eventfd_ctx(struct intel_vgpu *vgpu)
static void intel_vgpu_close_device(struct vfio_device *vfio_dev)
{
struct intel_vgpu *vgpu = vfio_dev_to_vgpu(vfio_dev);
- struct drm_i915_private *i915 = vgpu->gvt->gt->i915;
- int ret;
if (!vgpu->attached)
return;
@@ -864,11 +812,6 @@ static void intel_vgpu_close_device(struct vfio_device *vfio_dev)
intel_gvt_release_vgpu(vgpu);
- ret = vfio_unregister_notifier(&vgpu->vfio_device, VFIO_IOMMU_NOTIFY,
- &vgpu->iommu_notifier);
- drm_WARN(&i915->drm, ret,
- "vfio_unregister_notifier for iommu failed: %d\n", ret);
-
debugfs_remove(debugfs_lookup(KVMGT_DEBUGFS_FILENAME, vgpu->debugfs));
kvm_page_track_unregister_notifier(vgpu->vfio_device.kvm,
@@ -1610,6 +1553,7 @@ static const struct vfio_device_ops intel_vgpu_dev_ops = {
.write = intel_vgpu_write,
.mmap = intel_vgpu_mmap,
.ioctl = intel_vgpu_ioctl,
+ .dma_unmap = intel_vgpu_dma_unmap,
};
static int intel_vgpu_probe(struct mdev_device *mdev)
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index f12f3aa3bc13..443ed6dac92a 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -162,7 +162,7 @@ struct i915_gem_mm {
* List of objects which are pending destruction.
*/
struct llist_head free_list;
- struct delayed_work free_work;
+ struct work_struct free_work;
/**
* Count of objects pending destructions. Used to skip needlessly
* waiting on an RCU barrier if no objects are waiting to be freed.
@@ -1134,7 +1134,7 @@ static inline void i915_gem_drain_freed_objects(struct drm_i915_private *i915)
* armed the work again.
*/
while (atomic_read(&i915->mm.free_count)) {
- flush_delayed_work(&i915->mm.free_work);
+ flush_work(&i915->mm.free_work);
flush_delayed_work(&i915->bdev.wq);
rcu_barrier();
}
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index d10f4eb0e272..b5fbc2252784 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -670,6 +670,18 @@ static void err_print_pciid(struct drm_i915_error_state_buf *m,
pdev->subsystem_device);
}
+static void err_print_guc_ctb(struct drm_i915_error_state_buf *m,
+ const char *name,
+ const struct intel_ctb_coredump *ctb)
+{
+ if (!ctb->size)
+ return;
+
+ err_printf(m, "GuC %s CTB: raw: 0x%08X, 0x%08X/%08X, cached: 0x%08X/%08X, desc = 0x%08X, buf = 0x%08X x 0x%08X\n",
+ name, ctb->raw_status, ctb->raw_head, ctb->raw_tail,
+ ctb->head, ctb->tail, ctb->desc_offset, ctb->cmds_offset, ctb->size);
+}
+
static void err_print_uc(struct drm_i915_error_state_buf *m,
const struct intel_uc_coredump *error_uc)
{
@@ -677,7 +689,12 @@ static void err_print_uc(struct drm_i915_error_state_buf *m,
intel_uc_fw_dump(&error_uc->guc_fw, &p);
intel_uc_fw_dump(&error_uc->huc_fw, &p);
- intel_gpu_error_print_vma(m, NULL, error_uc->guc_log);
+ err_printf(m, "GuC timestamp: 0x%08x\n", error_uc->guc.timestamp);
+ intel_gpu_error_print_vma(m, NULL, error_uc->guc.vma_log);
+ err_printf(m, "GuC CTB fence: %d\n", error_uc->guc.last_fence);
+ err_print_guc_ctb(m, "Send", error_uc->guc.ctb + 0);
+ err_print_guc_ctb(m, "Recv", error_uc->guc.ctb + 1);
+ intel_gpu_error_print_vma(m, NULL, error_uc->guc.vma_ctb);
}
static void err_free_sgl(struct scatterlist *sgl)
@@ -719,6 +736,8 @@ static void err_print_gt_global_nonguc(struct drm_i915_error_state_buf *m,
int i;
err_printf(m, "GT awake: %s\n", str_yes_no(gt->awake));
+ err_printf(m, "CS timestamp frequency: %u Hz, %d ns\n",
+ gt->clock_frequency, gt->clock_period_ns);
err_printf(m, "EIR: 0x%08x\n", gt->eir);
err_printf(m, "PGTBL_ER: 0x%08x\n", gt->pgtbl_er);
@@ -850,7 +869,7 @@ static void __err_print_to_sgl(struct drm_i915_error_state_buf *m,
if (error->gt) {
bool print_guc_capture = false;
- if (error->gt->uc && error->gt->uc->is_guc_capture)
+ if (error->gt->uc && error->gt->uc->guc.is_guc_capture)
print_guc_capture = true;
err_print_gt_display(m, error->gt);
@@ -1005,7 +1024,8 @@ static void cleanup_uc(struct intel_uc_coredump *uc)
{
kfree(uc->guc_fw.path);
kfree(uc->huc_fw.path);
- i915_vma_coredump_free(uc->guc_log);
+ i915_vma_coredump_free(uc->guc.vma_log);
+ i915_vma_coredump_free(uc->guc.vma_ctb);
kfree(uc);
}
@@ -1654,6 +1674,23 @@ gt_record_engines(struct intel_gt_coredump *gt,
}
}
+static void gt_record_guc_ctb(struct intel_ctb_coredump *saved,
+ const struct intel_guc_ct_buffer *ctb,
+ const void *blob_ptr, struct intel_guc *guc)
+{
+ if (!ctb || !ctb->desc)
+ return;
+
+ saved->raw_status = ctb->desc->status;
+ saved->raw_head = ctb->desc->head;
+ saved->raw_tail = ctb->desc->tail;
+ saved->head = ctb->head;
+ saved->tail = ctb->tail;
+ saved->size = ctb->size;
+ saved->desc_offset = ((void *)ctb->desc) - blob_ptr;
+ saved->cmds_offset = ((void *)ctb->cmds) - blob_ptr;
+}
+
static struct intel_uc_coredump *
gt_record_uc(struct intel_gt_coredump *gt,
struct i915_vma_compress *compress)
@@ -1674,8 +1711,22 @@ gt_record_uc(struct intel_gt_coredump *gt,
*/
error_uc->guc_fw.path = kstrdup(uc->guc.fw.path, ALLOW_FAIL);
error_uc->huc_fw.path = kstrdup(uc->huc.fw.path, ALLOW_FAIL);
- error_uc->guc_log = create_vma_coredump(gt->_gt, uc->guc.log.vma,
- "GuC log buffer", compress);
+
+ /*
+ * Save the GuC log and include a timestamp reference for converting the
+ * log times to system times (in conjunction with the error->boottime and
+ * gt->clock_frequency fields saved elsewhere).
+ */
+ error_uc->guc.timestamp = intel_uncore_read(gt->_gt->uncore, GUCPMTIMESTAMP);
+ error_uc->guc.vma_log = create_vma_coredump(gt->_gt, uc->guc.log.vma,
+ "GuC log buffer", compress);
+ error_uc->guc.vma_ctb = create_vma_coredump(gt->_gt, uc->guc.ct.vma,
+ "GuC CT buffer", compress);
+ error_uc->guc.last_fence = uc->guc.ct.requests.last_fence;
+ gt_record_guc_ctb(error_uc->guc.ctb + 0, &uc->guc.ct.ctbs.send,
+ uc->guc.ct.ctbs.send.desc, (struct intel_guc *)&uc->guc);
+ gt_record_guc_ctb(error_uc->guc.ctb + 1, &uc->guc.ct.ctbs.recv,
+ uc->guc.ct.ctbs.send.desc, (struct intel_guc *)&uc->guc);
return error_uc;
}
@@ -1832,6 +1883,8 @@ static void gt_record_global_regs(struct intel_gt_coredump *gt)
static void gt_record_info(struct intel_gt_coredump *gt)
{
memcpy(&gt->info, &gt->_gt->info, sizeof(struct intel_gt_info));
+ gt->clock_frequency = gt->_gt->clock_frequency;
+ gt->clock_period_ns = gt->_gt->clock_period_ns;
}
/*
@@ -2026,9 +2079,9 @@ __i915_gpu_coredump(struct intel_gt *gt, intel_engine_mask_t engine_mask, u32 du
error->gt->uc = gt_record_uc(error->gt, compress);
if (error->gt->uc) {
if (dump_flags & CORE_DUMP_FLAG_IS_GUC_CAPTURE)
- error->gt->uc->is_guc_capture = true;
+ error->gt->uc->guc.is_guc_capture = true;
else
- GEM_BUG_ON(error->gt->uc->is_guc_capture);
+ GEM_BUG_ON(error->gt->uc->guc.is_guc_capture);
}
}
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.h b/drivers/gpu/drm/i915/i915_gpu_error.h
index 55a143b92d10..efc75cc2ffdb 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.h
+++ b/drivers/gpu/drm/i915/i915_gpu_error.h
@@ -125,6 +125,15 @@ struct intel_engine_coredump {
struct intel_engine_coredump *next;
};
+struct intel_ctb_coredump {
+ u32 raw_head, head;
+ u32 raw_tail, tail;
+ u32 raw_status;
+ u32 desc_offset;
+ u32 cmds_offset;
+ u32 size;
+};
+
struct intel_gt_coredump {
const struct intel_gt *_gt;
bool awake;
@@ -150,6 +159,8 @@ struct intel_gt_coredump {
u32 gtt_cache;
u32 aux_err; /* gen12 */
u32 gam_done; /* gen12 */
+ u32 clock_frequency;
+ u32 clock_period_ns;
/* Display related */
u32 derrmr;
@@ -163,8 +174,14 @@ struct intel_gt_coredump {
struct intel_uc_coredump {
struct intel_uc_fw guc_fw;
struct intel_uc_fw huc_fw;
- struct i915_vma_coredump *guc_log;
- bool is_guc_capture;
+ struct guc_info {
+ struct intel_ctb_coredump ctb[2];
+ struct i915_vma_coredump *vma_ctb;
+ struct i915_vma_coredump *vma_log;
+ u32 timestamp;
+ u16 last_fence;
+ bool is_guc_capture;
+ } guc;
} *uc;
struct intel_gt_coredump *next;
diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
index ef3b04c7e153..260371716490 100644
--- a/drivers/gpu/drm/i915/i915_vma.c
+++ b/drivers/gpu/drm/i915/i915_vma.c
@@ -538,8 +538,6 @@ int i915_vma_bind(struct i915_vma *vma,
bind_flags);
}
- set_bit(I915_BO_WAS_BOUND_BIT, &vma->obj->flags);
-
atomic_or(bind_flags, &vma->flags);
return 0;
}
@@ -1310,6 +1308,19 @@ err_unpin:
return err;
}
+void vma_invalidate_tlb(struct i915_address_space *vm, u32 *tlb)
+{
+ /*
+ * Before we release the pages that were bound by this vma, we
+ * must invalidate all the TLBs that may still have a reference
+ * back to our physical address. It only needs to be done once,
+ * so after updating the PTE to point away from the pages, record
+ * the most recent TLB invalidation seqno, and if we have not yet
+ * flushed the TLBs upon release, perform a full invalidation.
+ */
+ WRITE_ONCE(*tlb, intel_gt_next_invalidate_tlb_full(vm->gt));
+}
+
static void __vma_put_pages(struct i915_vma *vma, unsigned int count)
{
/* We allocate under vma_get_pages, so beware the shrinker */
@@ -1941,7 +1952,12 @@ struct dma_fence *__i915_vma_evict(struct i915_vma *vma, bool async)
vma->vm->skip_pte_rewrite;
trace_i915_vma_unbind(vma);
- unbind_fence = i915_vma_resource_unbind(vma_res);
+ if (async)
+ unbind_fence = i915_vma_resource_unbind(vma_res,
+ &vma->obj->mm.tlb);
+ else
+ unbind_fence = i915_vma_resource_unbind(vma_res, NULL);
+
vma->resource = NULL;
atomic_and(~(I915_VMA_BIND_MASK | I915_VMA_ERROR | I915_VMA_GGTT_WRITE),
@@ -1949,10 +1965,13 @@ struct dma_fence *__i915_vma_evict(struct i915_vma *vma, bool async)
i915_vma_detach(vma);
- if (!async && unbind_fence) {
- dma_fence_wait(unbind_fence, false);
- dma_fence_put(unbind_fence);
- unbind_fence = NULL;
+ if (!async) {
+ if (unbind_fence) {
+ dma_fence_wait(unbind_fence, false);
+ dma_fence_put(unbind_fence);
+ unbind_fence = NULL;
+ }
+ vma_invalidate_tlb(vma->vm, &vma->obj->mm.tlb);
}
/*
diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h
index 88ca0bd9c900..33a58f605d75 100644
--- a/drivers/gpu/drm/i915/i915_vma.h
+++ b/drivers/gpu/drm/i915/i915_vma.h
@@ -213,6 +213,7 @@ bool i915_vma_misplaced(const struct i915_vma *vma,
u64 size, u64 alignment, u64 flags);
void __i915_vma_set_map_and_fenceable(struct i915_vma *vma);
void i915_vma_revoke_mmap(struct i915_vma *vma);
+void vma_invalidate_tlb(struct i915_address_space *vm, u32 *tlb);
struct dma_fence *__i915_vma_evict(struct i915_vma *vma, bool async);
int __i915_vma_unbind(struct i915_vma *vma);
int __must_check i915_vma_unbind(struct i915_vma *vma);
diff --git a/drivers/gpu/drm/i915/i915_vma_resource.c b/drivers/gpu/drm/i915/i915_vma_resource.c
index 27c55027387a..de1342dbfa12 100644
--- a/drivers/gpu/drm/i915/i915_vma_resource.c
+++ b/drivers/gpu/drm/i915/i915_vma_resource.c
@@ -216,6 +216,10 @@ i915_vma_resource_fence_notify(struct i915_sw_fence *fence,
/**
* i915_vma_resource_unbind - Unbind a vma resource
* @vma_res: The vma resource to unbind.
+ * @tlb: pointer to vma->obj->mm.tlb associated with the resource
+ * to be stored at vma_res->tlb. When not-NULL, it will be used
+ * to do TLB cache invalidation before freeing a VMA resource.
+ * Used only for async unbind.
*
* At this point this function does little more than publish a fence that
* signals immediately unless signaling is held back.
@@ -223,10 +227,13 @@ i915_vma_resource_fence_notify(struct i915_sw_fence *fence,
* Return: A refcounted pointer to a dma-fence that signals when unbinding is
* complete.
*/
-struct dma_fence *i915_vma_resource_unbind(struct i915_vma_resource *vma_res)
+struct dma_fence *i915_vma_resource_unbind(struct i915_vma_resource *vma_res,
+ u32 *tlb)
{
struct i915_address_space *vm = vma_res->vm;
+ vma_res->tlb = tlb;
+
/* Reference for the sw fence */
i915_vma_resource_get(vma_res);
diff --git a/drivers/gpu/drm/i915/i915_vma_resource.h b/drivers/gpu/drm/i915/i915_vma_resource.h
index 5d8427caa2ba..06923d1816e7 100644
--- a/drivers/gpu/drm/i915/i915_vma_resource.h
+++ b/drivers/gpu/drm/i915/i915_vma_resource.h
@@ -67,6 +67,7 @@ struct i915_page_sizes {
* taken when the unbind is scheduled.
* @skip_pte_rewrite: During ggtt suspend and vm takedown pte rewriting
* needs to be skipped for unbind.
+ * @tlb: pointer for obj->mm.tlb, if async unbind. Otherwise, NULL
*
* The lifetime of a struct i915_vma_resource is from a binding request to
* the actual possible asynchronous unbind has completed.
@@ -119,6 +120,8 @@ struct i915_vma_resource {
bool immediate_unbind:1;
bool needs_wakeref:1;
bool skip_pte_rewrite:1;
+
+ u32 *tlb;
};
bool i915_vma_resource_hold(struct i915_vma_resource *vma_res,
@@ -131,7 +134,8 @@ struct i915_vma_resource *i915_vma_resource_alloc(void);
void i915_vma_resource_free(struct i915_vma_resource *vma_res);
-struct dma_fence *i915_vma_resource_unbind(struct i915_vma_resource *vma_res);
+struct dma_fence *i915_vma_resource_unbind(struct i915_vma_resource *vma_res,
+ u32 *tlb);
void __i915_vma_resource_init(struct i915_vma_resource *vma_res);
diff --git a/drivers/gpu/drm/i915/selftests/i915_live_selftests.h b/drivers/gpu/drm/i915/selftests/i915_live_selftests.h
index bdd290f2bf3c..aaf8a380e5c7 100644
--- a/drivers/gpu/drm/i915/selftests/i915_live_selftests.h
+++ b/drivers/gpu/drm/i915/selftests/i915_live_selftests.h
@@ -49,5 +49,6 @@ selftest(perf, i915_perf_live_selftests)
selftest(slpc, intel_slpc_live_selftests)
selftest(guc, intel_guc_live_selftests)
selftest(guc_multi_lrc, intel_guc_multi_lrc_live_selftests)
+selftest(guc_hang, intel_guc_hang_check)
/* Here be dragons: keep last to run last! */
selftest(late_gt_pm, intel_gt_pm_late_selftests)
diff --git a/drivers/gpu/drm/i915/selftests/i915_request.c b/drivers/gpu/drm/i915/selftests/i915_request.c
index c56a0c2cd2f7..ec05f578a698 100644
--- a/drivers/gpu/drm/i915/selftests/i915_request.c
+++ b/drivers/gpu/drm/i915/selftests/i915_request.c
@@ -971,7 +971,7 @@ static struct i915_vma *empty_batch(struct drm_i915_private *i915)
if (err)
goto err;
- /* Force the wait wait now to avoid including it in the benchmark */
+ /* Force the wait now to avoid including it in the benchmark */
err = i915_vma_sync(vma);
if (err)
goto err_pin;