From d74a7566bef76b44416071fbb7e34f301eac8d98 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Mon, 18 Nov 2019 08:44:12 -0800 Subject: drm/i915/ehl: Update voltage level checks The bspec was recently updated with new cdclk -> voltage level tables to accommodate the new 324/326.4 cdclk values. Bspec: 21809 Fixes: 63c9dae71dc5 ("drm/i915/ehl: Add voltage level requirement table") Cc: José Roberto de Souza Cc: Vivek Kasireddy Cc: Bob Paauwe Signed-off-by: Matt Roper Link: https://patchwork.freedesktop.org/patch/msgid/20191118164412.26216-1-matthew.d.roper@intel.com Reviewed-by: José Roberto de Souza (cherry picked from commit d147483884ed08ee6bb618ef610ee0329a27fda7) Signed-off-by: Joonas Lahtinen --- drivers/gpu/drm/i915/display/intel_cdclk.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/display/intel_cdclk.c b/drivers/gpu/drm/i915/display/intel_cdclk.c index 0caef2592a7e..ed8c7ce62119 100644 --- a/drivers/gpu/drm/i915/display/intel_cdclk.c +++ b/drivers/gpu/drm/i915/display/intel_cdclk.c @@ -1273,7 +1273,9 @@ static u8 icl_calc_voltage_level(int cdclk) static u8 ehl_calc_voltage_level(int cdclk) { - if (cdclk > 312000) + if (cdclk > 326400) + return 3; + else if (cdclk > 312000) return 2; else if (cdclk > 180000) return 1; -- cgit v1.2.3 From 198dfe671fdfdb79755aa131c191a4cb10a2a8b7 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Mon, 18 Nov 2019 10:02:19 -0800 Subject: drm/i915/tgl: Add DKL PHY vswing table for HDMI The bspec initially provided a single DKL PHY vswing table for both HDMI and DP, but was recently updated to include an independent table for HDMI. Bspec: 49292 Fixes: 978c3e539be2 ("drm/i915/tgl: Add dkl phy programming sequences") Cc: Clinton A Taylor Cc: Lucas De Marchi Signed-off-by: Matt Roper Link: https://patchwork.freedesktop.org/patch/msgid/20191118180219.9309-1-matthew.d.roper@intel.com Reviewed-by: José Roberto de Souza (cherry picked from commit 362bfb995b78394aefe61f7cc0511ef7c50bb11e) Signed-off-by: Joonas Lahtinen --- drivers/gpu/drm/i915/display/intel_ddi.c | 29 ++++++++++++++++++++++++----- 1 file changed, 24 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_ddi.c b/drivers/gpu/drm/i915/display/intel_ddi.c index 0d6e494b4508..c7c2b349858d 100644 --- a/drivers/gpu/drm/i915/display/intel_ddi.c +++ b/drivers/gpu/drm/i915/display/intel_ddi.c @@ -593,7 +593,7 @@ struct tgl_dkl_phy_ddi_buf_trans { u32 dkl_de_emphasis_control; }; -static const struct tgl_dkl_phy_ddi_buf_trans tgl_dkl_phy_ddi_translations[] = { +static const struct tgl_dkl_phy_ddi_buf_trans tgl_dkl_phy_dp_ddi_trans[] = { /* VS pre-emp Non-trans mV Pre-emph dB */ { 0x7, 0x0, 0x00 }, /* 0 0 400mV 0 dB */ { 0x5, 0x0, 0x03 }, /* 0 1 400mV 3.5 dB */ @@ -607,6 +607,20 @@ static const struct tgl_dkl_phy_ddi_buf_trans tgl_dkl_phy_ddi_translations[] = { { 0x0, 0x0, 0x00 }, /* 3 0 1200mV 0 dB HDMI default */ }; +static const struct tgl_dkl_phy_ddi_buf_trans tgl_dkl_phy_hdmi_ddi_trans[] = { + /* HDMI Preset VS Pre-emph */ + { 0x7, 0x0, 0x0 }, /* 1 400mV 0dB */ + { 0x6, 0x0, 0x0 }, /* 2 500mV 0dB */ + { 0x4, 0x0, 0x0 }, /* 3 650mV 0dB */ + { 0x2, 0x0, 0x0 }, /* 4 800mV 0dB */ + { 0x0, 0x0, 0x0 }, /* 5 1000mV 0dB */ + { 0x0, 0x0, 0x5 }, /* 6 Full -1.5 dB */ + { 0x0, 0x0, 0x6 }, /* 7 Full -1.8 dB */ + { 0x0, 0x0, 0x7 }, /* 8 Full -2 dB */ + { 0x0, 0x0, 0x8 }, /* 9 Full -2.5 dB */ + { 0x0, 0x0, 0xA }, /* 10 Full -3 dB */ +}; + static const struct ddi_buf_trans * bdw_get_buf_trans_edp(struct drm_i915_private *dev_priv, int *n_entries) { @@ -898,7 +912,7 @@ static int intel_ddi_hdmi_level(struct drm_i915_private *dev_priv, enum port por icl_get_combo_buf_trans(dev_priv, INTEL_OUTPUT_HDMI, 0, &n_entries); else - n_entries = ARRAY_SIZE(tgl_dkl_phy_ddi_translations); + n_entries = ARRAY_SIZE(tgl_dkl_phy_hdmi_ddi_trans); default_entry = n_entries - 1; } else if (INTEL_GEN(dev_priv) == 11) { if (intel_phy_is_combo(dev_priv, phy)) @@ -2371,7 +2385,7 @@ u8 intel_ddi_dp_voltage_max(struct intel_encoder *encoder) icl_get_combo_buf_trans(dev_priv, encoder->type, intel_dp->link_rate, &n_entries); else - n_entries = ARRAY_SIZE(tgl_dkl_phy_ddi_translations); + n_entries = ARRAY_SIZE(tgl_dkl_phy_dp_ddi_trans); } else if (INTEL_GEN(dev_priv) == 11) { if (intel_phy_is_combo(dev_priv, phy)) icl_get_combo_buf_trans(dev_priv, encoder->type, @@ -2823,8 +2837,13 @@ tgl_dkl_phy_ddi_vswing_sequence(struct intel_encoder *encoder, int link_clock, const struct tgl_dkl_phy_ddi_buf_trans *ddi_translations; u32 n_entries, val, ln, dpcnt_mask, dpcnt_val; - n_entries = ARRAY_SIZE(tgl_dkl_phy_ddi_translations); - ddi_translations = tgl_dkl_phy_ddi_translations; + if (encoder->type == INTEL_OUTPUT_HDMI) { + n_entries = ARRAY_SIZE(tgl_dkl_phy_hdmi_ddi_trans); + ddi_translations = tgl_dkl_phy_hdmi_ddi_trans; + } else { + n_entries = ARRAY_SIZE(tgl_dkl_phy_dp_ddi_trans); + ddi_translations = tgl_dkl_phy_dp_ddi_trans; + } if (level >= n_entries) level = n_entries - 1; -- cgit v1.2.3 From 03a2a606066cf8af7a090669848e4e84351ded06 Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Fri, 22 Nov 2019 10:41:15 +0000 Subject: drm/i915/query: Align flavour of engine data lookup Commit 750e76b4f9f6 ("drm/i915/gt: Move the [class][inst] lookup for engines onto the GT") changed the engine query to iterate over uabi engines but left the buffer size calculation look at the physical engine count. Difference has no practical consequence but it is nicer to align both queries. Signed-off-by: Tvrtko Ursulin Fixes: 750e76b4f9f6 ("drm/i915/gt: Move the [class][inst] lookup for engines onto the GT") Cc: Chris Wilson Cc: Daniele Ceraolo Spurio Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20191122104115.29610-1-tvrtko.ursulin@linux.intel.com (cherry picked from commit 9acc99d8f278e3da398e927774431bd3e947ab2e) Signed-off-by: Joonas Lahtinen --- drivers/gpu/drm/i915/i915_query.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_query.c b/drivers/gpu/drm/i915/i915_query.c index c27cfef9281c..ef25ce6e395e 100644 --- a/drivers/gpu/drm/i915/i915_query.c +++ b/drivers/gpu/drm/i915/i915_query.c @@ -103,15 +103,18 @@ query_engine_info(struct drm_i915_private *i915, struct drm_i915_engine_info __user *info_ptr; struct drm_i915_query_engine_info query; struct drm_i915_engine_info info = { }; + unsigned int num_uabi_engines = 0; struct intel_engine_cs *engine; int len, ret; if (query_item->flags) return -EINVAL; + for_each_uabi_engine(engine, i915) + num_uabi_engines++; + len = sizeof(struct drm_i915_query_engine_info) + - RUNTIME_INFO(i915)->num_engines * - sizeof(struct drm_i915_engine_info); + num_uabi_engines * sizeof(struct drm_i915_engine_info); ret = copy_query_item(&query, sizeof(query), len, query_item); if (ret != 0) -- cgit v1.2.3 From 732f9ca4a737aea3983ad86007e88e7fffe8cd6a Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 20 Nov 2019 18:22:09 +0000 Subject: drm/i915/gt: Fixup config ifdeffery for pm_suspend_target_state pm_suspend_target_state is declared under CONFIG_PM_SLEEP but only defined under CONFIG_SUSPEND. Play safe and only use the symbol if it is both declared and defined. Reported-by: kbuild-all@lists.01.org Signed-off-by: Chris Wilson Fixes: a70a9e998e8e ("drm/i915: Defer rc6 shutdown to suspend_late") Cc: Andi Shyti Cc: Joonas Lahtinen Reviewed-by: Joonas Lahtinen Link: https://patchwork.freedesktop.org/patch/msgid/20191120182209.3967833-1-chris@chris-wilson.co.uk Signed-off-by: Joonas Lahtinen --- drivers/gpu/drm/i915/gt/intel_gt_pm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm.c b/drivers/gpu/drm/i915/gt/intel_gt_pm.c index 6187cdd06646..7917cc348375 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_pm.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_pm.c @@ -272,7 +272,7 @@ void intel_gt_suspend_prepare(struct intel_gt *gt) static suspend_state_t pm_suspend_target(void) { -#if IS_ENABLED(CONFIG_PM_SLEEP) +#if IS_ENABLED(CONFIG_SUSPEND) && IS_ENABLED(CONFIG_PM_SLEEP) return pm_suspend_target_state; #else return PM_SUSPEND_TO_IDLE; -- cgit v1.2.3 From f83d7e3f5189eb78e481cd02da93e4e32a253493 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 18 Nov 2019 23:02:46 +0000 Subject: drm/i915: Wait until the intel_wakeref idle callback is complete When waiting for idle, serialise with any ongoing callback so that it will have completed before completing the wait. Signed-off-by: Chris Wilson Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20191118230254.2615942-12-chris@chris-wilson.co.uk (cherry picked from commit f4ba0707c825d60f1d0f5ce7bd3d875e68f3e204) Signed-off-by: Joonas Lahtinen --- drivers/gpu/drm/i915/intel_wakeref.c | 13 +++++++++++-- drivers/gpu/drm/i915/intel_wakeref.h | 15 +++++++++++++++ 2 files changed, 26 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_wakeref.c b/drivers/gpu/drm/i915/intel_wakeref.c index 868cc78048d0..ad26d7f4ca3d 100644 --- a/drivers/gpu/drm/i915/intel_wakeref.c +++ b/drivers/gpu/drm/i915/intel_wakeref.c @@ -109,8 +109,17 @@ void __intel_wakeref_init(struct intel_wakeref *wf, int intel_wakeref_wait_for_idle(struct intel_wakeref *wf) { - return wait_var_event_killable(&wf->wakeref, - !intel_wakeref_is_active(wf)); + int err; + + might_sleep(); + + err = wait_var_event_killable(&wf->wakeref, + !intel_wakeref_is_active(wf)); + if (err) + return err; + + intel_wakeref_unlock_wait(wf); + return 0; } static void wakeref_auto_timeout(struct timer_list *t) diff --git a/drivers/gpu/drm/i915/intel_wakeref.h b/drivers/gpu/drm/i915/intel_wakeref.h index 5f0c972a80fb..affe4de3746b 100644 --- a/drivers/gpu/drm/i915/intel_wakeref.h +++ b/drivers/gpu/drm/i915/intel_wakeref.h @@ -151,6 +151,21 @@ intel_wakeref_unlock(struct intel_wakeref *wf) mutex_unlock(&wf->mutex); } +/** + * intel_wakeref_unlock_wait: Wait until the active callback is complete + * @wf: the wakeref + * + * Waits for the active callback (under the @wf->mutex or another CPU) is + * complete. + */ +static inline void +intel_wakeref_unlock_wait(struct intel_wakeref *wf) +{ + mutex_lock(&wf->mutex); + mutex_unlock(&wf->mutex); + flush_work(&wf->work); +} + /** * intel_wakeref_is_active: Query whether the wakeref is currently held * @wf: the wakeref -- cgit v1.2.3 From ee33baa83109612d9a31fc2c2a7b74967767b358 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 20 Nov 2019 12:54:33 +0000 Subject: drm/i915: Mark up the calling context for intel_wakeref_put() Previously, we assumed we could use mutex_trylock() within an atomic context, falling back to a worker if contended. However, such trickery is illegal inside interrupt context, and so we need to always use a worker under such circumstances. As we normally are in process context, we can typically use a plain mutex, and only defer to a work when we know we are being called from an interrupt path. Fixes: 51fbd8de87dc ("drm/i915/pmu: Atomically acquire the gt_pm wakeref") References: a0855d24fc22d ("locking/mutex: Complain upon mutex API misuse in IRQ contexts") References: https://bugs.freedesktop.org/show_bug.cgi?id=111626 Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20191120125433.3767149-1-chris@chris-wilson.co.uk (cherry picked from commit 07779a76ee1f93f930cf697b22be73d16e14f50c) Signed-off-by: Joonas Lahtinen --- drivers/gpu/drm/i915/gt/intel_engine_pm.c | 3 ++- drivers/gpu/drm/i915/gt/intel_engine_pm.h | 10 ++++++++++ drivers/gpu/drm/i915/gt/intel_gt_pm.c | 1 - drivers/gpu/drm/i915/gt/intel_gt_pm.h | 5 +++++ drivers/gpu/drm/i915/gt/intel_lrc.c | 2 +- drivers/gpu/drm/i915/gt/intel_reset.c | 2 +- drivers/gpu/drm/i915/gt/selftest_engine_pm.c | 7 ++++--- drivers/gpu/drm/i915/i915_active.c | 5 +++-- drivers/gpu/drm/i915/i915_pmu.c | 6 +++--- drivers/gpu/drm/i915/intel_wakeref.c | 8 ++++---- drivers/gpu/drm/i915/intel_wakeref.h | 30 +++++++++++++++++++--------- 11 files changed, 54 insertions(+), 25 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.c b/drivers/gpu/drm/i915/gt/intel_engine_pm.c index 3c0f490ff2c7..7269c87c1372 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_pm.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.c @@ -177,7 +177,8 @@ static int __engine_park(struct intel_wakeref *wf) engine->execlists.no_priolist = false; - intel_gt_pm_put(engine->gt); + /* While gt calls i915_vma_parked(), we have to break the lock cycle */ + intel_gt_pm_put_async(engine->gt); return 0; } diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.h b/drivers/gpu/drm/i915/gt/intel_engine_pm.h index 739c50fefcef..24e20344dc22 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_pm.h +++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.h @@ -31,6 +31,16 @@ static inline void intel_engine_pm_put(struct intel_engine_cs *engine) intel_wakeref_put(&engine->wakeref); } +static inline void intel_engine_pm_put_async(struct intel_engine_cs *engine) +{ + intel_wakeref_put_async(&engine->wakeref); +} + +static inline void intel_engine_pm_flush(struct intel_engine_cs *engine) +{ + intel_wakeref_unlock_wait(&engine->wakeref); +} + void intel_engine_init__pm(struct intel_engine_cs *engine); #endif /* INTEL_ENGINE_PM_H */ diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm.c b/drivers/gpu/drm/i915/gt/intel_gt_pm.c index 7917cc348375..a459a42ad5c2 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_pm.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_pm.c @@ -105,7 +105,6 @@ static int __gt_park(struct intel_wakeref *wf) static const struct intel_wakeref_ops wf_ops = { .get = __gt_unpark, .put = __gt_park, - .flags = INTEL_WAKEREF_PUT_ASYNC, }; void intel_gt_pm_init_early(struct intel_gt *gt) diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm.h b/drivers/gpu/drm/i915/gt/intel_gt_pm.h index b3e17399be9b..990efc27a4e4 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_pm.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_pm.h @@ -32,6 +32,11 @@ static inline void intel_gt_pm_put(struct intel_gt *gt) intel_wakeref_put(>->wakeref); } +static inline void intel_gt_pm_put_async(struct intel_gt *gt) +{ + intel_wakeref_put_async(>->wakeref); +} + static inline int intel_gt_pm_wait_for_idle(struct intel_gt *gt) { return intel_wakeref_wait_for_idle(>->wakeref); diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index 0ac3b26674ad..37fe72aa8e27 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -1117,7 +1117,7 @@ __execlists_schedule_out(struct i915_request *rq, intel_engine_context_out(engine); execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_OUT); - intel_gt_pm_put(engine->gt); + intel_gt_pm_put_async(engine->gt); /* * If this is part of a virtual engine, its next request may diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c index f03e000051c1..c97423a76642 100644 --- a/drivers/gpu/drm/i915/gt/intel_reset.c +++ b/drivers/gpu/drm/i915/gt/intel_reset.c @@ -1114,7 +1114,7 @@ int intel_engine_reset(struct intel_engine_cs *engine, const char *msg) out: intel_engine_cancel_stop_cs(engine); reset_finish_engine(engine); - intel_engine_pm_put(engine); + intel_engine_pm_put_async(engine); return ret; } diff --git a/drivers/gpu/drm/i915/gt/selftest_engine_pm.c b/drivers/gpu/drm/i915/gt/selftest_engine_pm.c index 20b9c83f43ad..cbf6b0735272 100644 --- a/drivers/gpu/drm/i915/gt/selftest_engine_pm.c +++ b/drivers/gpu/drm/i915/gt/selftest_engine_pm.c @@ -51,11 +51,12 @@ static int live_engine_pm(void *arg) pr_err("intel_engine_pm_get_if_awake(%s) failed under %s\n", engine->name, p->name); else - intel_engine_pm_put(engine); - intel_engine_pm_put(engine); + intel_engine_pm_put_async(engine); + intel_engine_pm_put_async(engine); p->critical_section_end(); - /* engine wakeref is sync (instant) */ + intel_engine_pm_flush(engine); + if (intel_engine_pm_is_awake(engine)) { pr_err("%s is still awake after flushing pm\n", engine->name); diff --git a/drivers/gpu/drm/i915/i915_active.c b/drivers/gpu/drm/i915/i915_active.c index 5448f37c8102..dca15ace88f6 100644 --- a/drivers/gpu/drm/i915/i915_active.c +++ b/drivers/gpu/drm/i915/i915_active.c @@ -672,12 +672,13 @@ void i915_active_acquire_barrier(struct i915_active *ref) * populated by i915_request_add_active_barriers() to point to the * request that will eventually release them. */ - spin_lock_irqsave_nested(&ref->tree_lock, flags, SINGLE_DEPTH_NESTING); llist_for_each_safe(pos, next, take_preallocated_barriers(ref)) { struct active_node *node = barrier_from_ll(pos); struct intel_engine_cs *engine = barrier_to_engine(node); struct rb_node **p, *parent; + spin_lock_irqsave_nested(&ref->tree_lock, flags, + SINGLE_DEPTH_NESTING); parent = NULL; p = &ref->tree.rb_node; while (*p) { @@ -693,12 +694,12 @@ void i915_active_acquire_barrier(struct i915_active *ref) } rb_link_node(&node->node, parent, p); rb_insert_color(&node->node, &ref->tree); + spin_unlock_irqrestore(&ref->tree_lock, flags); GEM_BUG_ON(!intel_engine_pm_is_awake(engine)); llist_add(barrier_to_ll(node), &engine->barrier_tasks); intel_engine_pm_put(engine); } - spin_unlock_irqrestore(&ref->tree_lock, flags); } void i915_request_add_active_barriers(struct i915_request *rq) diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c index 0d40dccd1409..2814218c5ba1 100644 --- a/drivers/gpu/drm/i915/i915_pmu.c +++ b/drivers/gpu/drm/i915/i915_pmu.c @@ -190,7 +190,7 @@ static u64 get_rc6(struct intel_gt *gt) val = 0; if (intel_gt_pm_get_if_awake(gt)) { val = __get_rc6(gt); - intel_gt_pm_put(gt); + intel_gt_pm_put_async(gt); } spin_lock_irqsave(&pmu->lock, flags); @@ -343,7 +343,7 @@ engines_sample(struct intel_gt *gt, unsigned int period_ns) skip: spin_unlock_irqrestore(&engine->uncore->lock, flags); - intel_engine_pm_put(engine); + intel_engine_pm_put_async(engine); } } @@ -368,7 +368,7 @@ frequency_sample(struct intel_gt *gt, unsigned int period_ns) if (intel_gt_pm_get_if_awake(gt)) { val = intel_uncore_read_notrace(uncore, GEN6_RPSTAT1); val = intel_get_cagf(rps, val); - intel_gt_pm_put(gt); + intel_gt_pm_put_async(gt); } add_sample_mult(&pmu->sample[__I915_SAMPLE_FREQ_ACT], diff --git a/drivers/gpu/drm/i915/intel_wakeref.c b/drivers/gpu/drm/i915/intel_wakeref.c index ad26d7f4ca3d..59aa1b6f1827 100644 --- a/drivers/gpu/drm/i915/intel_wakeref.c +++ b/drivers/gpu/drm/i915/intel_wakeref.c @@ -54,7 +54,8 @@ int __intel_wakeref_get_first(struct intel_wakeref *wf) static void ____intel_wakeref_put_last(struct intel_wakeref *wf) { - if (!atomic_dec_and_test(&wf->count)) + INTEL_WAKEREF_BUG_ON(atomic_read(&wf->count) <= 0); + if (unlikely(!atomic_dec_and_test(&wf->count))) goto unlock; /* ops->put() must reschedule its own release on error/deferral */ @@ -67,13 +68,12 @@ unlock: mutex_unlock(&wf->mutex); } -void __intel_wakeref_put_last(struct intel_wakeref *wf) +void __intel_wakeref_put_last(struct intel_wakeref *wf, unsigned long flags) { INTEL_WAKEREF_BUG_ON(work_pending(&wf->work)); /* Assume we are not in process context and so cannot sleep. */ - if (wf->ops->flags & INTEL_WAKEREF_PUT_ASYNC || - !mutex_trylock(&wf->mutex)) { + if (flags & INTEL_WAKEREF_PUT_ASYNC || !mutex_trylock(&wf->mutex)) { schedule_work(&wf->work); return; } diff --git a/drivers/gpu/drm/i915/intel_wakeref.h b/drivers/gpu/drm/i915/intel_wakeref.h index affe4de3746b..da6e8fd506e6 100644 --- a/drivers/gpu/drm/i915/intel_wakeref.h +++ b/drivers/gpu/drm/i915/intel_wakeref.h @@ -9,6 +9,7 @@ #include #include +#include #include #include #include @@ -29,9 +30,6 @@ typedef depot_stack_handle_t intel_wakeref_t; struct intel_wakeref_ops { int (*get)(struct intel_wakeref *wf); int (*put)(struct intel_wakeref *wf); - - unsigned long flags; -#define INTEL_WAKEREF_PUT_ASYNC BIT(0) }; struct intel_wakeref { @@ -57,7 +55,7 @@ void __intel_wakeref_init(struct intel_wakeref *wf, } while (0) int __intel_wakeref_get_first(struct intel_wakeref *wf); -void __intel_wakeref_put_last(struct intel_wakeref *wf); +void __intel_wakeref_put_last(struct intel_wakeref *wf, unsigned long flags); /** * intel_wakeref_get: Acquire the wakeref @@ -100,10 +98,9 @@ intel_wakeref_get_if_active(struct intel_wakeref *wf) } /** - * intel_wakeref_put: Release the wakeref - * @i915: the drm_i915_private device + * intel_wakeref_put_flags: Release the wakeref * @wf: the wakeref - * @fn: callback for releasing the wakeref, called only on final release. + * @flags: control flags * * Release our hold on the wakeref. When there are no more users, * the runtime pm wakeref will be released after the @fn callback is called @@ -116,11 +113,25 @@ intel_wakeref_get_if_active(struct intel_wakeref *wf) * code otherwise. */ static inline void -intel_wakeref_put(struct intel_wakeref *wf) +__intel_wakeref_put(struct intel_wakeref *wf, unsigned long flags) +#define INTEL_WAKEREF_PUT_ASYNC BIT(0) { INTEL_WAKEREF_BUG_ON(atomic_read(&wf->count) <= 0); if (unlikely(!atomic_add_unless(&wf->count, -1, 1))) - __intel_wakeref_put_last(wf); + __intel_wakeref_put_last(wf, flags); +} + +static inline void +intel_wakeref_put(struct intel_wakeref *wf) +{ + might_sleep(); + __intel_wakeref_put(wf, 0); +} + +static inline void +intel_wakeref_put_async(struct intel_wakeref *wf) +{ + __intel_wakeref_put(wf, INTEL_WAKEREF_PUT_ASYNC); } /** @@ -185,6 +196,7 @@ intel_wakeref_is_active(const struct intel_wakeref *wf) static inline void __intel_wakeref_defer_park(struct intel_wakeref *wf) { + lockdep_assert_held(&wf->mutex); INTEL_WAKEREF_BUG_ON(atomic_read(&wf->count)); atomic_set_release(&wf->count, 1); } -- cgit v1.2.3 From ca1711d1991f968d1e88725a2e607e57ecd5c5f1 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 20 Nov 2019 16:55:13 +0000 Subject: drm/i915/gt: Close race between engine_park and intel_gt_retire_requests The general concept was that intel_timeline.active_count was locked by the intel_timeline.mutex. The exception was for power management, where the engine->kernel_context->timeline could be manipulated under the global wakeref.mutex. This was quite solid, as we always manipulated the timeline only while we held an engine wakeref. And then we started retiring requests outside of struct_mutex, only using the timelines.active_list and the timeline->mutex. There we started manipulating intel_timeline.active_count outside of an engine wakeref, and so introduced a race between __engine_park() and intel_gt_retire_requests(), a race that could result in the engine->kernel_context not being added to the active timelines and so losing requests, which caused us to keep the system permanently powered up [and unloadable]. The race would be easy to close if we could take the engine wakeref for the timeline before we retire -- except timelines are not bound to any engine and so we would need to keep all active engines awake. The alternative is to guard intel_timeline_enter/intel_timeline_exit for use outside of the timeline->mutex. Fixes: e5dadff4b093 ("drm/i915: Protect request retirement with timeline->mutex") Signed-off-by: Chris Wilson Cc: Matthew Auld Cc: Tvrtko Ursulin Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20191120165514.3955081-1-chris@chris-wilson.co.uk (cherry picked from commit a6edbca74b305adc165e67065d7ee766006e6a48) Signed-off-by: Joonas Lahtinen --- drivers/gpu/drm/i915/gt/intel_gt_requests.c | 8 +++--- drivers/gpu/drm/i915/gt/intel_timeline.c | 34 ++++++++++++++++++++------ drivers/gpu/drm/i915/gt/intel_timeline_types.h | 2 +- 3 files changed, 32 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_gt_requests.c b/drivers/gpu/drm/i915/gt/intel_gt_requests.c index 353809ac2754..a0112ba08ca7 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_requests.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_requests.c @@ -52,8 +52,8 @@ long intel_gt_retire_requests_timeout(struct intel_gt *gt, long timeout) } intel_timeline_get(tl); - GEM_BUG_ON(!tl->active_count); - tl->active_count++; /* pin the list element */ + GEM_BUG_ON(!atomic_read(&tl->active_count)); + atomic_inc(&tl->active_count); /* pin the list element */ spin_unlock_irqrestore(&timelines->lock, flags); if (timeout > 0) { @@ -74,7 +74,7 @@ long intel_gt_retire_requests_timeout(struct intel_gt *gt, long timeout) /* Resume iteration after dropping lock */ list_safe_reset_next(tl, tn, link); - if (!--tl->active_count) + if (atomic_dec_and_test(&tl->active_count)) list_del(&tl->link); else active_count += !!rcu_access_pointer(tl->last_request.fence); @@ -83,7 +83,7 @@ long intel_gt_retire_requests_timeout(struct intel_gt *gt, long timeout) /* Defer the final release to after the spinlock */ if (refcount_dec_and_test(&tl->kref.refcount)) { - GEM_BUG_ON(tl->active_count); + GEM_BUG_ON(atomic_read(&tl->active_count)); list_add(&tl->link, &free); } } diff --git a/drivers/gpu/drm/i915/gt/intel_timeline.c b/drivers/gpu/drm/i915/gt/intel_timeline.c index 14ad10acd548..839de6b9aa24 100644 --- a/drivers/gpu/drm/i915/gt/intel_timeline.c +++ b/drivers/gpu/drm/i915/gt/intel_timeline.c @@ -339,15 +339,33 @@ void intel_timeline_enter(struct intel_timeline *tl) struct intel_gt_timelines *timelines = &tl->gt->timelines; unsigned long flags; + /* + * Pretend we are serialised by the timeline->mutex. + * + * While generally true, there are a few exceptions to the rule + * for the engine->kernel_context being used to manage power + * transitions. As the engine_park may be called from under any + * timeline, it uses the power mutex as a global serialisation + * lock to prevent any other request entering its timeline. + * + * The rule is generally tl->mutex, otherwise engine->wakeref.mutex. + * + * However, intel_gt_retire_request() does not know which engine + * it is retiring along and so cannot partake in the engine-pm + * barrier, and there we use the tl->active_count as a means to + * pin the timeline in the active_list while the locks are dropped. + * Ergo, as that is outside of the engine-pm barrier, we need to + * use atomic to manipulate tl->active_count. + */ lockdep_assert_held(&tl->mutex); - GEM_BUG_ON(!atomic_read(&tl->pin_count)); - if (tl->active_count++) + + if (atomic_add_unless(&tl->active_count, 1, 0)) return; - GEM_BUG_ON(!tl->active_count); /* overflow? */ spin_lock_irqsave(&timelines->lock, flags); - list_add(&tl->link, &timelines->active_list); + if (!atomic_fetch_inc(&tl->active_count)) + list_add_tail(&tl->link, &timelines->active_list); spin_unlock_irqrestore(&timelines->lock, flags); } @@ -356,14 +374,16 @@ void intel_timeline_exit(struct intel_timeline *tl) struct intel_gt_timelines *timelines = &tl->gt->timelines; unsigned long flags; + /* See intel_timeline_enter() */ lockdep_assert_held(&tl->mutex); - GEM_BUG_ON(!tl->active_count); - if (--tl->active_count) + GEM_BUG_ON(!atomic_read(&tl->active_count)); + if (atomic_add_unless(&tl->active_count, -1, 1)) return; spin_lock_irqsave(&timelines->lock, flags); - list_del(&tl->link); + if (atomic_dec_and_test(&tl->active_count)) + list_del(&tl->link); spin_unlock_irqrestore(&timelines->lock, flags); /* diff --git a/drivers/gpu/drm/i915/gt/intel_timeline_types.h b/drivers/gpu/drm/i915/gt/intel_timeline_types.h index 98d9ee166379..5244615ed1cb 100644 --- a/drivers/gpu/drm/i915/gt/intel_timeline_types.h +++ b/drivers/gpu/drm/i915/gt/intel_timeline_types.h @@ -42,7 +42,7 @@ struct intel_timeline { * from the intel_context caller plus internal atomicity. */ atomic_t pin_count; - unsigned int active_count; + atomic_t active_count; const u32 *hwsp_seqno; struct i915_vma *hwsp_ggtt; -- cgit v1.2.3 From bf201f5eda23eb57f7217d20ea3e18da8cbcf52b Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 20 Nov 2019 16:55:14 +0000 Subject: drm/i915/gt: Unlock engine-pm after queuing the kernel context switch In commit a79ca656b648 ("drm/i915: Push the wakeref->count deferral to the backend"), I erroneously concluded that we last modify the engine inside __i915_request_commit() meaning that we could enable concurrent submission for userspace as we enqueued this request. However, this falls into a trap with other users of the engine->kernel_context waking up and submitting their request before the idle-switch is queued, with the result that the kernel_context is executed out-of-sequence most likely upsetting the GPU and certainly ourselves when we try to retire the out-of-sequence requests. As such we need to hold onto the effective engine->kernel_context mutex lock (via the engine pm mutex proxy) until we have finish queuing the request to the engine. v2: Serialise against concurrent intel_gt_retire_requests() v3: Describe the hairy locking scheme with intel_gt_retire_requests() for future reference. v4: Combine timeline->lock and engine pm release; it's hairy. Fixes: a79ca656b648 ("drm/i915: Push the wakeref->count deferral to the backend") Signed-off-by: Chris Wilson Cc: Mika Kuoppala Cc: Tvrtko Ursulin Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20191120165514.3955081-2-chris@chris-wilson.co.uk (cherry picked from commit 5cba288466e9b229feb68295675246e7522fb5eb) Signed-off-by: Joonas Lahtinen --- drivers/gpu/drm/i915/gt/intel_engine_pm.c | 47 ++++++++++++++++++++++++++----- 1 file changed, 40 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.c b/drivers/gpu/drm/i915/gt/intel_engine_pm.c index 7269c87c1372..373a4b9f159c 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_pm.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.c @@ -73,8 +73,25 @@ static inline void __timeline_mark_unlock(struct intel_context *ce, #endif /* !IS_ENABLED(CONFIG_LOCKDEP) */ +static void +__intel_timeline_enter_and_release_pm(struct intel_timeline *tl, + struct intel_engine_cs *engine) +{ + struct intel_gt_timelines *timelines = &engine->gt->timelines; + + spin_lock(&timelines->lock); + + if (!atomic_fetch_inc(&tl->active_count)) + list_add_tail(&tl->link, &timelines->active_list); + + __intel_wakeref_defer_park(&engine->wakeref); + + spin_unlock(&timelines->lock); +} + static bool switch_to_kernel_context(struct intel_engine_cs *engine) { + struct intel_context *ce = engine->kernel_context; struct i915_request *rq; unsigned long flags; bool result = true; @@ -98,16 +115,31 @@ static bool switch_to_kernel_context(struct intel_engine_cs *engine) * This should hold true as we can only park the engine after * retiring the last request, thus all rings should be empty and * all timelines idle. + * + * For unlocking, there are 2 other parties and the GPU who have a + * stake here. + * + * A new gpu user will be waiting on the engine-pm to start their + * engine_unpark. New waiters are predicated on engine->wakeref.count + * and so intel_wakeref_defer_park() acts like a mutex_unlock of the + * engine->wakeref. + * + * The other party is intel_gt_retire_requests(), which is walking the + * list of active timelines looking for completions. Meanwhile as soon + * as we call __i915_request_queue(), the GPU may complete our request. + * Ergo, if we put ourselves on the timelines.active_list + * (se intel_timeline_enter()) before we increment the + * engine->wakeref.count, we may see the request completion and retire + * it causing an undeflow of the engine->wakeref. */ - flags = __timeline_mark_lock(engine->kernel_context); + flags = __timeline_mark_lock(ce); + GEM_BUG_ON(atomic_read(&ce->timeline->active_count) < 0); - rq = __i915_request_create(engine->kernel_context, GFP_NOWAIT); + rq = __i915_request_create(ce, GFP_NOWAIT); if (IS_ERR(rq)) /* Context switch failed, hope for the best! Maybe reset? */ goto out_unlock; - intel_timeline_enter(i915_request_timeline(rq)); - /* Check again on the next retirement. */ engine->wakeref_serial = engine->serial + 1; i915_request_add_active_barriers(rq); @@ -116,13 +148,14 @@ static bool switch_to_kernel_context(struct intel_engine_cs *engine) rq->sched.attr.priority = I915_PRIORITY_BARRIER; __i915_request_commit(rq); - /* Release our exclusive hold on the engine */ - __intel_wakeref_defer_park(&engine->wakeref); __i915_request_queue(rq, NULL); + /* Expose ourselves to intel_gt_retire_requests() and new submission */ + __intel_timeline_enter_and_release_pm(ce->timeline, engine); + result = false; out_unlock: - __timeline_mark_unlock(engine->kernel_context, flags); + __timeline_mark_unlock(ce, flags); return result; } -- cgit v1.2.3 From 97f9af78f38d43cd058cfd40220647be67aca9f7 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 25 Nov 2019 09:43:18 +0000 Subject: drm/i915/gt: Mark the execlists->active as the primary volatile access Since we want to do a lockless read of the current active request, and that request is written to by process_csb also without serialisation, we need to instruct gcc to take care in reading the pointer itself. Otherwise, we have observed execlists_active() to report 0x40. [ 2400.760381] igt/para-4098 1..s. 2376479300us : process_csb: rcs0 cs-irq head=3, tail=4 [ 2400.760826] igt/para-4098 1..s. 2376479303us : process_csb: rcs0 csb[4]: status=0x00000001:0x00000000 [ 2400.761271] igt/para-4098 1..s. 2376479306us : trace_ports: rcs0: promote { b9c59:2622, b9c55:2624 } [ 2400.761726] igt/para-4097 0d... 2376479311us : __i915_schedule: rcs0: -2147483648->3, inflight:0000000000000040, rq:ffff888208c1e940 which is impossible! The answer is that as we keep the existing execlists->active pointing into the array as we copy over that array, the unserialised read may see a partial pointer value. Fixes: df403069029d ("drm/i915/execlists: Lift process_csb() out of the irq-off spinlock") Signed-off-by: Chris Wilson Reviewed-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20191125094318.1630806-1-chris@chris-wilson.co.uk (cherry picked from commit 331bf90591573dfe6c8e892239713ef9702f1396) Signed-off-by: Joonas Lahtinen --- drivers/gpu/drm/i915/gt/intel_engine.h | 4 +--- drivers/gpu/drm/i915/gt/intel_lrc.c | 27 +++++++++++++++++---------- 2 files changed, 18 insertions(+), 13 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_engine.h b/drivers/gpu/drm/i915/gt/intel_engine.h index bc3b72bfa9e3..01765a7ec18f 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine.h +++ b/drivers/gpu/drm/i915/gt/intel_engine.h @@ -100,9 +100,7 @@ execlists_num_ports(const struct intel_engine_execlists * const execlists) static inline struct i915_request * execlists_active(const struct intel_engine_execlists *execlists) { - GEM_BUG_ON(execlists->active - execlists->inflight > - execlists_num_ports(execlists)); - return READ_ONCE(*execlists->active); + return *READ_ONCE(execlists->active); } static inline void diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index 37fe72aa8e27..a692e6c9ea75 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -1943,6 +1943,9 @@ cancel_port_requests(struct intel_engine_execlists * const execlists) execlists_schedule_out(rq); memset(execlists->pending, 0, sizeof(execlists->pending)); + /* Mark the end of active before we overwrite *active */ + WRITE_ONCE(execlists->active, execlists->pending); + for (port = execlists->active; (rq = *port); port++) execlists_schedule_out(rq); execlists->active = @@ -2099,23 +2102,27 @@ static void process_csb(struct intel_engine_cs *engine) else promote = gen8_csb_parse(execlists, buf + 2 * head); if (promote) { + struct i915_request * const *old = execlists->active; + + /* Point active to the new ELSP; prevent overwriting */ + WRITE_ONCE(execlists->active, execlists->pending); + set_timeslice(engine); + if (!inject_preempt_hang(execlists)) ring_set_paused(engine, 0); /* cancel old inflight, prepare for switch */ - trace_ports(execlists, "preempted", execlists->active); - while (*execlists->active) - execlists_schedule_out(*execlists->active++); + trace_ports(execlists, "preempted", old); + while (*old) + execlists_schedule_out(*old++); /* switch pending to inflight */ GEM_BUG_ON(!assert_pending_valid(execlists, "promote")); - execlists->active = - memcpy(execlists->inflight, - execlists->pending, - execlists_num_ports(execlists) * - sizeof(*execlists->pending)); - - set_timeslice(engine); + WRITE_ONCE(execlists->active, + memcpy(execlists->inflight, + execlists->pending, + execlists_num_ports(execlists) * + sizeof(*execlists->pending))); WRITE_ONCE(execlists->pending[0], NULL); } else { -- cgit v1.2.3 From 4ec5cc78c1b06f8d30b5c682acccf17fb5191cf2 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 25 Nov 2019 11:25:20 +0000 Subject: drm/i915/execlists: Fixup cancel_port_requests() I rushed a last minute correction to cancel_port_requests() to prevent the snooping of *execlists->active as the inflight array was being updated, without noticing we iterated the inflight array starting from active! Oops. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=112387 Fixes: 97f9af78f38d ("drm/i915/gt: Mark the execlists->active as the primary volatile access") Signed-off-by: Chris Wilson Cc: Mika Kuoppala Reviewed-by: Tvrtko Ursulin Reviewed-by: Andi Shyti Link: https://patchwork.freedesktop.org/patch/msgid/20191125112520.1760492-1-chris@chris-wilson.co.uk (cherry picked from commit da0ef77e1e0ccff703efee82406c629d5c4f4bbb) [Joonas: Fixed Fixes: tag to match drm-intel-next-fixes] Signed-off-by: Joonas Lahtinen --- drivers/gpu/drm/i915/gt/intel_lrc.c | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index a692e6c9ea75..217b32ae0bcc 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -1937,19 +1937,17 @@ skip_submit: static void cancel_port_requests(struct intel_engine_execlists * const execlists) { - struct i915_request * const *port, *rq; + struct i915_request * const *port; - for (port = execlists->pending; (rq = *port); port++) - execlists_schedule_out(rq); + for (port = execlists->pending; *port; port++) + execlists_schedule_out(*port); memset(execlists->pending, 0, sizeof(execlists->pending)); /* Mark the end of active before we overwrite *active */ - WRITE_ONCE(execlists->active, execlists->pending); - - for (port = execlists->active; (rq = *port); port++) - execlists_schedule_out(rq); - execlists->active = - memset(execlists->inflight, 0, sizeof(execlists->inflight)); + for (port = xchg(&execlists->active, execlists->pending); *port; port++) + execlists_schedule_out(*port); + WRITE_ONCE(execlists->active, + memset(execlists->inflight, 0, sizeof(execlists->inflight))); } static inline void -- cgit v1.2.3 From a09c2860ae4fcf4d7e25202661f3eb868547cddb Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 25 Nov 2019 10:58:57 +0000 Subject: drm/i915/gt: Adapt engine_park synchronisation rules for engine_retire In the next patch, we will introduce a new asynchronous retirement worker, fed by execlists CS events. Here we may queue a retirement as soon as a request is submitted to HW (and completes instantly), and we also want to process that retirement as early as possible and cannot afford to postpone (as there may not be another opportunity to retire it for a few seconds). To allow the new async retirer to run in parallel with our submission, pull the __i915_request_queue (that passes the request to HW) inside the timelines spinlock so that the retirement cannot release the timeline before we have completed the submission. v2: Actually to play nicely with engine_retire, we have to raise the timeline.active_lock before releasing the HW. intel_gt_retire_requsts() is still serialised by the outer lock so they cannot see this intermediate state, and engine_retire is serialised by HW submission. Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20191125105858.1718307-2-chris@chris-wilson.co.uk (cherry picked from commit 88a4655e75ac8f55eea5e3f38b176cba9cf653b5) Signed-off-by: Joonas Lahtinen --- drivers/gpu/drm/i915/gt/intel_engine_pm.c | 27 +++++++++++++++++++++------ 1 file changed, 21 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.c b/drivers/gpu/drm/i915/gt/intel_engine_pm.c index 373a4b9f159c..0e1ad4a4bd97 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_pm.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.c @@ -74,16 +74,33 @@ static inline void __timeline_mark_unlock(struct intel_context *ce, #endif /* !IS_ENABLED(CONFIG_LOCKDEP) */ static void -__intel_timeline_enter_and_release_pm(struct intel_timeline *tl, - struct intel_engine_cs *engine) +__queue_and_release_pm(struct i915_request *rq, + struct intel_timeline *tl, + struct intel_engine_cs *engine) { struct intel_gt_timelines *timelines = &engine->gt->timelines; + GEM_TRACE("%s\n", engine->name); + + /* + * We have to serialise all potential retirement paths with our + * submission, as we don't want to underflow either the + * engine->wakeref.counter or our timeline->active_count. + * + * Equally, we cannot allow a new submission to start until + * after we finish queueing, nor could we allow that submitter + * to retire us before we are ready! + */ spin_lock(&timelines->lock); + /* Let intel_gt_retire_requests() retire us (acquired under lock) */ if (!atomic_fetch_inc(&tl->active_count)) list_add_tail(&tl->link, &timelines->active_list); + /* Hand the request over to HW and so engine_retire() */ + __i915_request_queue(rq, NULL); + + /* Let new submissions commence (and maybe retire this timeline) */ __intel_wakeref_defer_park(&engine->wakeref); spin_unlock(&timelines->lock); @@ -148,10 +165,8 @@ static bool switch_to_kernel_context(struct intel_engine_cs *engine) rq->sched.attr.priority = I915_PRIORITY_BARRIER; __i915_request_commit(rq); - __i915_request_queue(rq, NULL); - - /* Expose ourselves to intel_gt_retire_requests() and new submission */ - __intel_timeline_enter_and_release_pm(ce->timeline, engine); + /* Expose ourselves to the world */ + __queue_and_release_pm(rq, ce->timeline, engine); result = false; out_unlock: -- cgit v1.2.3 From 311770173fac27845a3a83e2c16100a54d308f72 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 25 Nov 2019 10:58:58 +0000 Subject: drm/i915/gt: Schedule request retirement when timeline idles The major drawback of commit 7e34f4e4aad3 ("drm/i915/gen8+: Add RC6 CTX corruption WA") is that it disables RC6 while Skylake (and friends) is active, and we do not consider the GPU idle until all outstanding requests have been retired and the engine switched over to the kernel context. If userspace is idle, this task falls onto our background idle worker, which only runs roughly once a second, meaning that userspace has to have been idle for a couple of seconds before we enable RC6 again. Naturally, this causes us to consume considerably more energy than before as powersaving is effectively disabled while a display server (here's looking at you Xorg) is running. As execlists will get a completion event as each context is completed, we can use this interrupt to queue a retire worker bound to this engine to cleanup idle timelines. We will then immediately notice the idle engine (without userspace intervention or the aid of the background retire worker) and start parking the GPU. Thus during light workloads, we will do much more work to idle the GPU faster... Hopefully with commensurate power saving! v2: Watch context completions and only look at those local to the engine when retiring to reduce the amount of excess work we perform. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=112315 References: 7e34f4e4aad3 ("drm/i915/gen8+: Add RC6 CTX corruption WA") References: 2248a28384fe ("drm/i915/gen8+: Add RC6 CTX corruption WA") Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20191125105858.1718307-3-chris@chris-wilson.co.uk (cherry picked from commit 4f88f8747fa43c97c3b3712d8d87295ea757cc51) Signed-off-by: Joonas Lahtinen --- drivers/gpu/drm/i915/gt/intel_engine_cs.c | 8 +-- drivers/gpu/drm/i915/gt/intel_engine_types.h | 8 +++ drivers/gpu/drm/i915/gt/intel_gt_requests.c | 75 ++++++++++++++++++++++++++ drivers/gpu/drm/i915/gt/intel_gt_requests.h | 7 +++ drivers/gpu/drm/i915/gt/intel_lrc.c | 9 ++++ drivers/gpu/drm/i915/gt/intel_timeline.c | 1 + drivers/gpu/drm/i915/gt/intel_timeline_types.h | 3 ++ 7 files changed, 108 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c index 5ca3ec911e50..813bd3a610d2 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -28,13 +28,13 @@ #include "i915_drv.h" -#include "gt/intel_gt.h" - +#include "intel_context.h" #include "intel_engine.h" #include "intel_engine_pm.h" #include "intel_engine_pool.h" #include "intel_engine_user.h" -#include "intel_context.h" +#include "intel_gt.h" +#include "intel_gt_requests.h" #include "intel_lrc.h" #include "intel_reset.h" #include "intel_ring.h" @@ -616,6 +616,7 @@ static int intel_engine_setup_common(struct intel_engine_cs *engine) intel_engine_init_execlists(engine); intel_engine_init_cmd_parser(engine); intel_engine_init__pm(engine); + intel_engine_init_retire(engine); intel_engine_pool_init(&engine->pool); @@ -838,6 +839,7 @@ void intel_engine_cleanup_common(struct intel_engine_cs *engine) cleanup_status_page(engine); + intel_engine_fini_retire(engine); intel_engine_pool_fini(&engine->pool); intel_engine_fini_breadcrumbs(engine); intel_engine_cleanup_cmd_parser(engine); diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h index 758f0e8ec672..17f1f1441efc 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h @@ -451,6 +451,14 @@ struct intel_engine_cs { struct intel_engine_execlists execlists; + /* + * Keep track of completed timelines on this engine for early + * retirement with the goal of quickly enabling powersaving as + * soon as the engine is idle. + */ + struct intel_timeline *retire; + struct work_struct retire_work; + /* status_notifier: list of callbacks for context-switch changes */ struct atomic_notifier_head context_status_notifier; diff --git a/drivers/gpu/drm/i915/gt/intel_gt_requests.c b/drivers/gpu/drm/i915/gt/intel_gt_requests.c index a0112ba08ca7..3dc13ecf41bf 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_requests.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_requests.c @@ -4,6 +4,8 @@ * Copyright © 2019 Intel Corporation */ +#include + #include "i915_drv.h" /* for_each_engine() */ #include "i915_request.h" #include "intel_gt.h" @@ -29,6 +31,79 @@ static void flush_submission(struct intel_gt *gt) intel_engine_flush_submission(engine); } +static void engine_retire(struct work_struct *work) +{ + struct intel_engine_cs *engine = + container_of(work, typeof(*engine), retire_work); + struct intel_timeline *tl = xchg(&engine->retire, NULL); + + do { + struct intel_timeline *next = xchg(&tl->retire, NULL); + + /* + * Our goal here is to retire _idle_ timelines as soon as + * possible (as they are idle, we do not expect userspace + * to be cleaning up anytime soon). + * + * If the timeline is currently locked, either it is being + * retired elsewhere or about to be! + */ + if (mutex_trylock(&tl->mutex)) { + retire_requests(tl); + mutex_unlock(&tl->mutex); + } + intel_timeline_put(tl); + + GEM_BUG_ON(!next); + tl = ptr_mask_bits(next, 1); + } while (tl); +} + +static bool add_retire(struct intel_engine_cs *engine, + struct intel_timeline *tl) +{ + struct intel_timeline *first; + + /* + * We open-code a llist here to include the additional tag [BIT(0)] + * so that we know when the timeline is already on a + * retirement queue: either this engine or another. + * + * However, we rely on that a timeline can only be active on a single + * engine at any one time and that add_retire() is called before the + * engine releases the timeline and transferred to another to retire. + */ + + if (READ_ONCE(tl->retire)) /* already queued */ + return false; + + intel_timeline_get(tl); + first = READ_ONCE(engine->retire); + do + tl->retire = ptr_pack_bits(first, 1, 1); + while (!try_cmpxchg(&engine->retire, &first, tl)); + + return !first; +} + +void intel_engine_add_retire(struct intel_engine_cs *engine, + struct intel_timeline *tl) +{ + if (add_retire(engine, tl)) + schedule_work(&engine->retire_work); +} + +void intel_engine_init_retire(struct intel_engine_cs *engine) +{ + INIT_WORK(&engine->retire_work, engine_retire); +} + +void intel_engine_fini_retire(struct intel_engine_cs *engine) +{ + flush_work(&engine->retire_work); + GEM_BUG_ON(engine->retire); +} + long intel_gt_retire_requests_timeout(struct intel_gt *gt, long timeout) { struct intel_gt_timelines *timelines = >->timelines; diff --git a/drivers/gpu/drm/i915/gt/intel_gt_requests.h b/drivers/gpu/drm/i915/gt/intel_gt_requests.h index bd31cbce47e0..d626fb115386 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_requests.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_requests.h @@ -7,7 +7,9 @@ #ifndef INTEL_GT_REQUESTS_H #define INTEL_GT_REQUESTS_H +struct intel_engine_cs; struct intel_gt; +struct intel_timeline; long intel_gt_retire_requests_timeout(struct intel_gt *gt, long timeout); static inline void intel_gt_retire_requests(struct intel_gt *gt) @@ -15,6 +17,11 @@ static inline void intel_gt_retire_requests(struct intel_gt *gt) intel_gt_retire_requests_timeout(gt, 0); } +void intel_engine_init_retire(struct intel_engine_cs *engine); +void intel_engine_add_retire(struct intel_engine_cs *engine, + struct intel_timeline *tl); +void intel_engine_fini_retire(struct intel_engine_cs *engine); + int intel_gt_wait_for_idle(struct intel_gt *gt, long timeout); void intel_gt_init_requests(struct intel_gt *gt); diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index 217b32ae0bcc..9fdefbdc3546 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -142,6 +142,7 @@ #include "intel_engine_pm.h" #include "intel_gt.h" #include "intel_gt_pm.h" +#include "intel_gt_requests.h" #include "intel_lrc_reg.h" #include "intel_mocs.h" #include "intel_reset.h" @@ -1115,6 +1116,14 @@ __execlists_schedule_out(struct i915_request *rq, * refrain from doing non-trivial work here. */ + /* + * If we have just completed this context, the engine may now be + * idle and we want to re-enter powersaving. + */ + if (list_is_last(&rq->link, &ce->timeline->requests) && + i915_request_completed(rq)) + intel_engine_add_retire(engine, ce->timeline); + intel_engine_context_out(engine); execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_OUT); intel_gt_pm_put_async(engine->gt); diff --git a/drivers/gpu/drm/i915/gt/intel_timeline.c b/drivers/gpu/drm/i915/gt/intel_timeline.c index 839de6b9aa24..649798c184fb 100644 --- a/drivers/gpu/drm/i915/gt/intel_timeline.c +++ b/drivers/gpu/drm/i915/gt/intel_timeline.c @@ -282,6 +282,7 @@ void intel_timeline_fini(struct intel_timeline *timeline) { GEM_BUG_ON(atomic_read(&timeline->pin_count)); GEM_BUG_ON(!list_empty(&timeline->requests)); + GEM_BUG_ON(timeline->retire); if (timeline->hwsp_cacheline) cacheline_free(timeline->hwsp_cacheline); diff --git a/drivers/gpu/drm/i915/gt/intel_timeline_types.h b/drivers/gpu/drm/i915/gt/intel_timeline_types.h index 5244615ed1cb..aaf15cbe1ce1 100644 --- a/drivers/gpu/drm/i915/gt/intel_timeline_types.h +++ b/drivers/gpu/drm/i915/gt/intel_timeline_types.h @@ -66,6 +66,9 @@ struct intel_timeline { */ struct i915_active_fence last_request; + /** A chain of completed timelines ready for early retirement. */ + struct intel_timeline *retire; + /** * We track the most recent seqno that we wait on in every context so * that we only have to emit a new await and dependency on a more -- cgit v1.2.3 From 0725d9a31869e6c80630e99da366ede2848295cc Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 18 Nov 2019 23:02:40 +0000 Subject: drm/i915/gt: Make intel_ring_unpin() safe for concurrent pint In order to avoid some nasty mutex inversions, commit 09c5ab384f6f ("drm/i915: Keep rings pinned while the context is active") allowed the intel_ring unpinning to be run concurrently with the next context pinning it. Thus each step in intel_ring_unpin() needed to be atomic and ordered in a nice onion with intel_ring_pin() so that the lifetimes overlapped and were always safe. Sadly, a few steps in intel_ring_unpin() were overlooked, such as closing the read/write pointers of the ring and discarding the intel_ring.vaddr, as these steps were not serialised with intel_ring_pin() and so could leave the ring in disarray. Fixes: 09c5ab384f6f ("drm/i915: Keep rings pinned while the context is active") Signed-off-by: Chris Wilson Cc: Mika Kuoppala Cc: Tvrtko Ursulin Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20191118230254.2615942-6-chris@chris-wilson.co.uk (cherry picked from commit a266bf42006004306dd48a9082c35dfbff153307) Signed-off-by: Joonas Lahtinen --- drivers/gpu/drm/i915/gt/intel_ring.c | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_ring.c b/drivers/gpu/drm/i915/gt/intel_ring.c index ece20504d240..374b28f13ca0 100644 --- a/drivers/gpu/drm/i915/gt/intel_ring.c +++ b/drivers/gpu/drm/i915/gt/intel_ring.c @@ -57,9 +57,10 @@ int intel_ring_pin(struct intel_ring *ring) i915_vma_make_unshrinkable(vma); - GEM_BUG_ON(ring->vaddr); - ring->vaddr = addr; + /* Discard any unused bytes beyond that submitted to hw. */ + intel_ring_reset(ring, ring->emit); + ring->vaddr = addr; return 0; err_ring: @@ -85,20 +86,14 @@ void intel_ring_unpin(struct intel_ring *ring) if (!atomic_dec_and_test(&ring->pin_count)) return; - /* Discard any unused bytes beyond that submitted to hw. */ - intel_ring_reset(ring, ring->emit); - i915_vma_unset_ggtt_write(vma); if (i915_vma_is_map_and_fenceable(vma)) i915_vma_unpin_iomap(vma); else i915_gem_object_unpin_map(vma->obj); - GEM_BUG_ON(!ring->vaddr); - ring->vaddr = NULL; - - i915_vma_unpin(vma); i915_vma_make_purgeable(vma); + i915_vma_unpin(vma); } static struct i915_vma *create_ring_vma(struct i915_ggtt *ggtt, int size) -- cgit v1.2.3 From 8d15ede5cc6b66d5176856060b94196bc3382283 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 25 Nov 2019 16:27:37 +0000 Subject: drm/i915: Default to a more lenient forced preemption timeout Based on a sampling of a number of benchmarks across platforms, by default opt for a much more lenient timeout so that we should not adversely affect existing "good" clients. 640ms ought to be enough for anyone. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=112169 Fixes: 3a7a92aba8fb ("drm/i915/execlists: Force preemption") Signed-off-by: Chris Wilson Cc: Joonas Lahtinen Cc: Eero Tamminen Cc: Dmitry Rogozhkin Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20191125162737.2161069-1-chris@chris-wilson.co.uk (cherry picked from commit 5766a5ffc6a69595903865518c43636bde0e4ac4) Signed-off-by: Joonas Lahtinen --- drivers/gpu/drm/i915/Kconfig.profile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/Kconfig.profile b/drivers/gpu/drm/i915/Kconfig.profile index 1799537a3228..c280b6ae38eb 100644 --- a/drivers/gpu/drm/i915/Kconfig.profile +++ b/drivers/gpu/drm/i915/Kconfig.profile @@ -25,7 +25,7 @@ config DRM_I915_HEARTBEAT_INTERVAL config DRM_I915_PREEMPT_TIMEOUT int "Preempt timeout (ms, jiffy granularity)" - default 100 # milliseconds + default 640 # milliseconds help How long to wait (in milliseconds) for a preemption event to occur when submitting a new context via execlists. If the current context -- cgit v1.2.3 From 3cc44feb9861d2f5267af9b962ae92c5ea1b48fd Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 26 Nov 2019 06:55:21 +0000 Subject: drm/i915: Reduce nested prepare_remote_context() to a trylock On context retiring, we may invoke the kernel_context to unpin this context. Elsewhere, we may use the kernel_context to modify this context. This currently leads to an AB-BA lock inversion, so we need to back-off from the contended lock, and repeat. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=111732 Signed-off-by: Chris Wilson Fixes: a9877da2d629 ("drm/i915/oa: Reconfigure contexts on the fly") Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20191126065521.2331017-1-chris@chris-wilson.co.uk (cherry picked from commit 58b4c1a07ada7fb91ea757bdb9bd47df02207357) Signed-off-by: Joonas Lahtinen --- drivers/gpu/drm/i915/gt/intel_context.c | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c index ee9d2bcd2c13..ef7bc41ffffa 100644 --- a/drivers/gpu/drm/i915/gt/intel_context.c +++ b/drivers/gpu/drm/i915/gt/intel_context.c @@ -310,10 +310,23 @@ int intel_context_prepare_remote_request(struct intel_context *ce, GEM_BUG_ON(rq->hw_context == ce); if (rcu_access_pointer(rq->timeline) != tl) { /* timeline sharing! */ - err = mutex_lock_interruptible_nested(&tl->mutex, - SINGLE_DEPTH_NESTING); - if (err) - return err; + /* + * Ideally, we just want to insert our foreign fence as + * a barrier into the remove context, such that this operation + * occurs after all current operations in that context, and + * all future operations must occur after this. + * + * Currently, the timeline->last_request tracking is guarded + * by its mutex and so we must obtain that to atomically + * insert our barrier. However, since we already hold our + * timeline->mutex, we must be careful against potential + * inversion if we are the kernel_context as the remote context + * will itself poke at the kernel_context when it needs to + * unpin. Ergo, if already locked, we drop both locks and + * try again (through the magic of userspace repeating EAGAIN). + */ + if (!mutex_trylock(&tl->mutex)) + return -EAGAIN; /* Queue this switch after current activity by this context. */ err = i915_active_fence_set(&tl->last_request, rq); -- cgit v1.2.3