From 16e87459673a5cbef35cc0f2e15c664b10a4cdb6 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 30 Apr 2020 12:18:12 +0100 Subject: [PATCH 01/97] drm/i915/gt: Move the batch buffer pool from the engine to the gt Since the introduction of 'soft-rc6', we aim to park the device quickly and that results in frequent idling of the whole device. Currently upon idling we free the batch buffer pool, and so this renders the cache ineffective for many workloads. If we want to have an effective cache of recently allocated buffers available for reuse, we need to decouple that cache from the engine powermanagement and make it timer based. As there is no reason then to keep it within the engine (where it once made retirement order easier to track), we can move it up the hierarchy to the owner of the memory allocations. v2: Hook up to debugfs/drop_caches to clear the cache on demand. Signed-off-by: Chris Wilson Cc: Maarten Lankhorst Cc: Tvrtko Ursulin Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20200430111819.10262-2-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/Makefile | 2 +- .../gpu/drm/i915/gem/i915_gem_client_blt.c | 1 - .../gpu/drm/i915/gem/i915_gem_execbuffer.c | 20 +-- .../gpu/drm/i915/gem/i915_gem_object_blt.c | 18 +-- .../gpu/drm/i915/gem/i915_gem_object_blt.h | 1 - drivers/gpu/drm/i915/gt/intel_engine_cs.c | 4 - drivers/gpu/drm/i915/gt/intel_engine_pm.c | 2 - drivers/gpu/drm/i915/gt/intel_engine_pool.h | 34 ------ drivers/gpu/drm/i915/gt/intel_engine_types.h | 8 -- drivers/gpu/drm/i915/gt/intel_gt.c | 3 + ...l_engine_pool.c => intel_gt_buffer_pool.c} | 114 ++++++++++++------ .../gpu/drm/i915/gt/intel_gt_buffer_pool.h | 37 ++++++ ...l_types.h => intel_gt_buffer_pool_types.h} | 15 ++- drivers/gpu/drm/i915/gt/intel_gt_types.h | 11 ++ drivers/gpu/drm/i915/gt/mock_engine.c | 2 - drivers/gpu/drm/i915/i915_debugfs.c | 4 + 16 files changed, 160 insertions(+), 116 deletions(-) delete mode 100644 drivers/gpu/drm/i915/gt/intel_engine_pool.h rename drivers/gpu/drm/i915/gt/{intel_engine_pool.c => intel_gt_buffer_pool.c} (53%) create mode 100644 drivers/gpu/drm/i915/gt/intel_gt_buffer_pool.h rename drivers/gpu/drm/i915/gt/{intel_engine_pool_types.h => intel_gt_buffer_pool_types.h} (54%) diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index caf00d92ea9d..5359c736c789 100644 --- a/drivers/gpu/drm/i915/Makefile +++ b/drivers/gpu/drm/i915/Makefile @@ -87,11 +87,11 @@ gt-y += \ gt/intel_engine_cs.o \ gt/intel_engine_heartbeat.o \ gt/intel_engine_pm.o \ - gt/intel_engine_pool.o \ gt/intel_engine_user.o \ gt/intel_ggtt.o \ gt/intel_ggtt_fencing.o \ gt/intel_gt.o \ + gt/intel_gt_buffer_pool.o \ gt/intel_gt_clock_utils.o \ gt/intel_gt_irq.o \ gt/intel_gt_pm.o \ diff --git a/drivers/gpu/drm/i915/gem/i915_gem_client_blt.c b/drivers/gpu/drm/i915/gem/i915_gem_client_blt.c index 0598e5382a1d..3a146aa2593b 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_client_blt.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_client_blt.c @@ -6,7 +6,6 @@ #include "i915_drv.h" #include "gt/intel_context.h" #include "gt/intel_engine_pm.h" -#include "gt/intel_engine_pool.h" #include "i915_gem_client_blt.h" #include "i915_gem_object_blt.h" diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c index 964f73f062c1..414859fa2673 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c @@ -15,8 +15,8 @@ #include "gem/i915_gem_ioctls.h" #include "gt/intel_context.h" -#include "gt/intel_engine_pool.h" #include "gt/intel_gt.h" +#include "gt/intel_gt_buffer_pool.h" #include "gt/intel_gt_pm.h" #include "gt/intel_ring.h" @@ -1194,13 +1194,13 @@ static int __reloc_gpu_alloc(struct i915_execbuffer *eb, unsigned int len) { struct reloc_cache *cache = &eb->reloc_cache; - struct intel_engine_pool_node *pool; + struct intel_gt_buffer_pool_node *pool; struct i915_request *rq; struct i915_vma *batch; u32 *cmd; int err; - pool = intel_engine_get_pool(eb->engine, PAGE_SIZE); + pool = intel_gt_get_buffer_pool(eb->engine->gt, PAGE_SIZE); if (IS_ERR(pool)) return PTR_ERR(pool); @@ -1229,7 +1229,7 @@ static int __reloc_gpu_alloc(struct i915_execbuffer *eb, goto err_unpin; } - err = intel_engine_pool_mark_active(pool, rq); + err = intel_gt_buffer_pool_mark_active(pool, rq); if (err) goto err_request; @@ -1270,7 +1270,7 @@ static int __reloc_gpu_alloc(struct i915_execbuffer *eb, err_unmap: i915_gem_object_unpin_map(pool->obj); out_pool: - intel_engine_pool_put(pool); + intel_gt_buffer_pool_put(pool); return err; } @@ -1887,7 +1887,7 @@ static int eb_parse_pipeline(struct i915_execbuffer *eb, static int eb_parse(struct i915_execbuffer *eb) { struct drm_i915_private *i915 = eb->i915; - struct intel_engine_pool_node *pool; + struct intel_gt_buffer_pool_node *pool; struct i915_vma *shadow, *trampoline; unsigned int len; int err; @@ -1910,7 +1910,7 @@ static int eb_parse(struct i915_execbuffer *eb) len += I915_CMD_PARSER_TRAMPOLINE_SIZE; } - pool = intel_engine_get_pool(eb->engine, len); + pool = intel_gt_get_buffer_pool(eb->engine->gt, len); if (IS_ERR(pool)) return PTR_ERR(pool); @@ -1958,7 +1958,7 @@ static int eb_parse(struct i915_execbuffer *eb) err_shadow: i915_vma_unpin(shadow); err: - intel_engine_pool_put(pool); + intel_gt_buffer_pool_put(pool); return err; } @@ -2643,7 +2643,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, */ eb.request->batch = batch; if (batch->private) - intel_engine_pool_mark_active(batch->private, eb.request); + intel_gt_buffer_pool_mark_active(batch->private, eb.request); trace_i915_request_queue(eb.request, eb.batch_flags); err = eb_submit(&eb, batch); @@ -2672,7 +2672,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, i915_vma_unpin(batch); err_parse: if (batch->private) - intel_engine_pool_put(batch->private); + intel_gt_buffer_pool_put(batch->private); err_vma: if (eb.trampoline) i915_vma_unpin(eb.trampoline); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_blt.c b/drivers/gpu/drm/i915/gem/i915_gem_object_blt.c index e00792158f13..2fc7737ef5f4 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object_blt.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_object_blt.c @@ -6,8 +6,8 @@ #include "i915_drv.h" #include "gt/intel_context.h" #include "gt/intel_engine_pm.h" -#include "gt/intel_engine_pool.h" #include "gt/intel_gt.h" +#include "gt/intel_gt_buffer_pool.h" #include "gt/intel_ring.h" #include "i915_gem_clflush.h" #include "i915_gem_object_blt.h" @@ -18,7 +18,7 @@ struct i915_vma *intel_emit_vma_fill_blt(struct intel_context *ce, { struct drm_i915_private *i915 = ce->vm->i915; const u32 block_size = SZ_8M; /* ~1ms at 8GiB/s preemption delay */ - struct intel_engine_pool_node *pool; + struct intel_gt_buffer_pool_node *pool; struct i915_vma *batch; u64 offset; u64 count; @@ -33,7 +33,7 @@ struct i915_vma *intel_emit_vma_fill_blt(struct intel_context *ce, count = div_u64(round_up(vma->size, block_size), block_size); size = (1 + 8 * count) * sizeof(u32); size = round_up(size, PAGE_SIZE); - pool = intel_engine_get_pool(ce->engine, size); + pool = intel_gt_get_buffer_pool(ce->engine->gt, size); if (IS_ERR(pool)) { err = PTR_ERR(pool); goto out_pm; @@ -96,7 +96,7 @@ struct i915_vma *intel_emit_vma_fill_blt(struct intel_context *ce, return batch; out_put: - intel_engine_pool_put(pool); + intel_gt_buffer_pool_put(pool); out_pm: intel_engine_pm_put(ce->engine); return ERR_PTR(err); @@ -114,13 +114,13 @@ int intel_emit_vma_mark_active(struct i915_vma *vma, struct i915_request *rq) if (unlikely(err)) return err; - return intel_engine_pool_mark_active(vma->private, rq); + return intel_gt_buffer_pool_mark_active(vma->private, rq); } void intel_emit_vma_release(struct intel_context *ce, struct i915_vma *vma) { i915_vma_unpin(vma); - intel_engine_pool_put(vma->private); + intel_gt_buffer_pool_put(vma->private); intel_engine_pm_put(ce->engine); } @@ -213,7 +213,7 @@ struct i915_vma *intel_emit_vma_copy_blt(struct intel_context *ce, { struct drm_i915_private *i915 = ce->vm->i915; const u32 block_size = SZ_8M; /* ~1ms at 8GiB/s preemption delay */ - struct intel_engine_pool_node *pool; + struct intel_gt_buffer_pool_node *pool; struct i915_vma *batch; u64 src_offset, dst_offset; u64 count, rem; @@ -228,7 +228,7 @@ struct i915_vma *intel_emit_vma_copy_blt(struct intel_context *ce, count = div_u64(round_up(dst->size, block_size), block_size); size = (1 + 11 * count) * sizeof(u32); size = round_up(size, PAGE_SIZE); - pool = intel_engine_get_pool(ce->engine, size); + pool = intel_gt_get_buffer_pool(ce->engine->gt, size); if (IS_ERR(pool)) { err = PTR_ERR(pool); goto out_pm; @@ -307,7 +307,7 @@ struct i915_vma *intel_emit_vma_copy_blt(struct intel_context *ce, return batch; out_put: - intel_engine_pool_put(pool); + intel_gt_buffer_pool_put(pool); out_pm: intel_engine_pm_put(ce->engine); return ERR_PTR(err); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_blt.h b/drivers/gpu/drm/i915/gem/i915_gem_object_blt.h index 243a43a87824..8bcd336a90dc 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object_blt.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object_blt.h @@ -10,7 +10,6 @@ #include "gt/intel_context.h" #include "gt/intel_engine_pm.h" -#include "gt/intel_engine_pool.h" #include "i915_vma.h" struct drm_i915_gem_object; diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c index c9e46c5ced43..98b326a1568d 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -31,7 +31,6 @@ #include "intel_context.h" #include "intel_engine.h" #include "intel_engine_pm.h" -#include "intel_engine_pool.h" #include "intel_engine_user.h" #include "intel_gt.h" #include "intel_gt_requests.h" @@ -631,8 +630,6 @@ static int engine_setup_common(struct intel_engine_cs *engine) intel_engine_init__pm(engine); intel_engine_init_retire(engine); - intel_engine_pool_init(&engine->pool); - /* Use the whole device by default */ engine->sseu = intel_sseu_from_device_info(&RUNTIME_INFO(engine->i915)->sseu); @@ -829,7 +826,6 @@ void intel_engine_cleanup_common(struct intel_engine_cs *engine) cleanup_status_page(engine); intel_engine_fini_retire(engine); - intel_engine_pool_fini(&engine->pool); intel_engine_fini_breadcrumbs(engine); intel_engine_cleanup_cmd_parser(engine); diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.c b/drivers/gpu/drm/i915/gt/intel_engine_pm.c index 446e35ac0224..811debefebc0 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_pm.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.c @@ -10,7 +10,6 @@ #include "intel_engine.h" #include "intel_engine_heartbeat.h" #include "intel_engine_pm.h" -#include "intel_engine_pool.h" #include "intel_gt.h" #include "intel_gt_pm.h" #include "intel_rc6.h" @@ -254,7 +253,6 @@ static int __engine_park(struct intel_wakeref *wf) intel_engine_park_heartbeat(engine); intel_engine_disarm_breadcrumbs(engine); - intel_engine_pool_park(&engine->pool); /* Must be reset upon idling, or we may miss the busy wakeup. */ GEM_BUG_ON(engine->execlists.queue_priority_hint != INT_MIN); diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pool.h b/drivers/gpu/drm/i915/gt/intel_engine_pool.h deleted file mode 100644 index 1bd89cadc3b7..000000000000 --- a/drivers/gpu/drm/i915/gt/intel_engine_pool.h +++ /dev/null @@ -1,34 +0,0 @@ -/* - * SPDX-License-Identifier: MIT - * - * Copyright © 2014-2018 Intel Corporation - */ - -#ifndef INTEL_ENGINE_POOL_H -#define INTEL_ENGINE_POOL_H - -#include "intel_engine_pool_types.h" -#include "i915_active.h" -#include "i915_request.h" - -struct intel_engine_pool_node * -intel_engine_get_pool(struct intel_engine_cs *engine, size_t size); - -static inline int -intel_engine_pool_mark_active(struct intel_engine_pool_node *node, - struct i915_request *rq) -{ - return i915_active_add_request(&node->active, rq); -} - -static inline void -intel_engine_pool_put(struct intel_engine_pool_node *node) -{ - i915_active_release(&node->active); -} - -void intel_engine_pool_init(struct intel_engine_pool *pool); -void intel_engine_pool_park(struct intel_engine_pool *pool); -void intel_engine_pool_fini(struct intel_engine_pool *pool); - -#endif /* INTEL_ENGINE_POOL_H */ diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h index f760e2ef285b..3c3225c0332f 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h @@ -22,7 +22,6 @@ #include "i915_pmu.h" #include "i915_priolist_types.h" #include "i915_selftest.h" -#include "intel_engine_pool_types.h" #include "intel_sseu.h" #include "intel_timeline_types.h" #include "intel_wakeref.h" @@ -405,13 +404,6 @@ struct intel_engine_cs { struct i915_pmu_sample sample[I915_ENGINE_SAMPLE_COUNT]; } pmu; - /* - * A pool of objects to use as shadow copies of client batch buffers - * when the command parser is enabled. Prevents the client from - * modifying the batch contents after software parsing. - */ - struct intel_engine_pool pool; - struct intel_hw_status_page status_page; struct i915_ctx_workarounds wa_ctx; struct i915_wa_list ctx_wa_list; diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c index 52593edf8aa0..f069551e412f 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt.c +++ b/drivers/gpu/drm/i915/gt/intel_gt.c @@ -7,6 +7,7 @@ #include "i915_drv.h" #include "intel_context.h" #include "intel_gt.h" +#include "intel_gt_buffer_pool.h" #include "intel_gt_clock_utils.h" #include "intel_gt_pm.h" #include "intel_gt_requests.h" @@ -28,6 +29,7 @@ void intel_gt_init_early(struct intel_gt *gt, struct drm_i915_private *i915) INIT_LIST_HEAD(>->closed_vma); spin_lock_init(>->closed_lock); + intel_gt_init_buffer_pool(gt); intel_gt_init_reset(gt); intel_gt_init_requests(gt); intel_gt_init_timelines(gt); @@ -621,6 +623,7 @@ void intel_gt_driver_release(struct intel_gt *gt) intel_gt_pm_fini(gt); intel_gt_fini_scratch(gt); + intel_gt_fini_buffer_pool(gt); } void intel_gt_driver_late_release(struct intel_gt *gt) diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pool.c b/drivers/gpu/drm/i915/gt/intel_gt_buffer_pool.c similarity index 53% rename from drivers/gpu/drm/i915/gt/intel_engine_pool.c rename to drivers/gpu/drm/i915/gt/intel_gt_buffer_pool.c index 397186818305..1495054a4305 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_pool.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_buffer_pool.c @@ -1,6 +1,5 @@ +// SPDX-License-Identifier: MIT /* - * SPDX-License-Identifier: MIT - * * Copyright © 2014-2018 Intel Corporation */ @@ -8,15 +7,15 @@ #include "i915_drv.h" #include "intel_engine_pm.h" -#include "intel_engine_pool.h" +#include "intel_gt_buffer_pool.h" -static struct intel_engine_cs *to_engine(struct intel_engine_pool *pool) +static struct intel_gt *to_gt(struct intel_gt_buffer_pool *pool) { - return container_of(pool, struct intel_engine_cs, pool); + return container_of(pool, struct intel_gt, buffer_pool); } static struct list_head * -bucket_for_size(struct intel_engine_pool *pool, size_t sz) +bucket_for_size(struct intel_gt_buffer_pool *pool, size_t sz) { int n; @@ -32,16 +31,50 @@ bucket_for_size(struct intel_engine_pool *pool, size_t sz) return &pool->cache_list[n]; } -static void node_free(struct intel_engine_pool_node *node) +static void node_free(struct intel_gt_buffer_pool_node *node) { i915_gem_object_put(node->obj); i915_active_fini(&node->active); kfree(node); } +static void pool_free_work(struct work_struct *wrk) +{ + struct intel_gt_buffer_pool *pool = + container_of(wrk, typeof(*pool), work.work); + struct intel_gt_buffer_pool_node *node, *next; + unsigned long old = jiffies - HZ; + bool active = false; + LIST_HEAD(stale); + int n; + + /* Free buffers that have not been used in the past second */ + spin_lock_irq(&pool->lock); + for (n = 0; n < ARRAY_SIZE(pool->cache_list); n++) { + struct list_head *list = &pool->cache_list[n]; + + /* Most recent at head; oldest at tail */ + list_for_each_entry_safe_reverse(node, next, list, link) { + if (time_before(node->age, old)) + break; + + list_move(&node->link, &stale); + } + active |= !list_empty(list); + } + spin_unlock_irq(&pool->lock); + + list_for_each_entry_safe(node, next, &stale, link) + node_free(node); + + if (active) + schedule_delayed_work(&pool->work, + round_jiffies_up_relative(HZ)); +} + static int pool_active(struct i915_active *ref) { - struct intel_engine_pool_node *node = + struct intel_gt_buffer_pool_node *node = container_of(ref, typeof(*node), active); struct dma_resv *resv = node->obj->base.resv; int err; @@ -64,29 +97,31 @@ static int pool_active(struct i915_active *ref) __i915_active_call static void pool_retire(struct i915_active *ref) { - struct intel_engine_pool_node *node = + struct intel_gt_buffer_pool_node *node = container_of(ref, typeof(*node), active); - struct intel_engine_pool *pool = node->pool; + struct intel_gt_buffer_pool *pool = node->pool; struct list_head *list = bucket_for_size(pool, node->obj->base.size); unsigned long flags; - GEM_BUG_ON(!intel_engine_pm_is_awake(to_engine(pool))); - i915_gem_object_unpin_pages(node->obj); /* Return this object to the shrinker pool */ i915_gem_object_make_purgeable(node->obj); spin_lock_irqsave(&pool->lock, flags); + node->age = jiffies; list_add(&node->link, list); spin_unlock_irqrestore(&pool->lock, flags); + + schedule_delayed_work(&pool->work, + round_jiffies_up_relative(HZ)); } -static struct intel_engine_pool_node * -node_create(struct intel_engine_pool *pool, size_t sz) +static struct intel_gt_buffer_pool_node * +node_create(struct intel_gt_buffer_pool *pool, size_t sz) { - struct intel_engine_cs *engine = to_engine(pool); - struct intel_engine_pool_node *node; + struct intel_gt *gt = to_gt(pool); + struct intel_gt_buffer_pool_node *node; struct drm_i915_gem_object *obj; node = kmalloc(sizeof(*node), @@ -97,7 +132,7 @@ node_create(struct intel_engine_pool *pool, size_t sz) node->pool = pool; i915_active_init(&node->active, pool_active, pool_retire); - obj = i915_gem_object_create_internal(engine->i915, sz); + obj = i915_gem_object_create_internal(gt->i915, sz); if (IS_ERR(obj)) { i915_active_fini(&node->active); kfree(node); @@ -110,26 +145,15 @@ node_create(struct intel_engine_pool *pool, size_t sz) return node; } -static struct intel_engine_pool *lookup_pool(struct intel_engine_cs *engine) +struct intel_gt_buffer_pool_node * +intel_gt_get_buffer_pool(struct intel_gt *gt, size_t size) { - if (intel_engine_is_virtual(engine)) - engine = intel_virtual_engine_get_sibling(engine, 0); - - GEM_BUG_ON(!engine); - return &engine->pool; -} - -struct intel_engine_pool_node * -intel_engine_get_pool(struct intel_engine_cs *engine, size_t size) -{ - struct intel_engine_pool *pool = lookup_pool(engine); - struct intel_engine_pool_node *node; + struct intel_gt_buffer_pool *pool = >->buffer_pool; + struct intel_gt_buffer_pool_node *node; struct list_head *list; unsigned long flags; int ret; - GEM_BUG_ON(!intel_engine_pm_is_awake(to_engine(pool))); - size = PAGE_ALIGN(size); list = bucket_for_size(pool, size); @@ -157,34 +181,48 @@ intel_engine_get_pool(struct intel_engine_cs *engine, size_t size) return node; } -void intel_engine_pool_init(struct intel_engine_pool *pool) +void intel_gt_init_buffer_pool(struct intel_gt *gt) { + struct intel_gt_buffer_pool *pool = >->buffer_pool; int n; spin_lock_init(&pool->lock); for (n = 0; n < ARRAY_SIZE(pool->cache_list); n++) INIT_LIST_HEAD(&pool->cache_list[n]); + INIT_DELAYED_WORK(&pool->work, pool_free_work); } -void intel_engine_pool_park(struct intel_engine_pool *pool) +static void pool_free_imm(struct intel_gt_buffer_pool *pool) { int n; + spin_lock_irq(&pool->lock); for (n = 0; n < ARRAY_SIZE(pool->cache_list); n++) { + struct intel_gt_buffer_pool_node *node, *next; struct list_head *list = &pool->cache_list[n]; - struct intel_engine_pool_node *node, *nn; - list_for_each_entry_safe(node, nn, list, link) + list_for_each_entry_safe(node, next, list, link) node_free(node); - INIT_LIST_HEAD(list); } + spin_unlock_irq(&pool->lock); } -void intel_engine_pool_fini(struct intel_engine_pool *pool) +void intel_gt_flush_buffer_pool(struct intel_gt *gt) { + struct intel_gt_buffer_pool *pool = >->buffer_pool; + + if (cancel_delayed_work_sync(&pool->work)) + pool_free_imm(pool); +} + +void intel_gt_fini_buffer_pool(struct intel_gt *gt) +{ + struct intel_gt_buffer_pool *pool = >->buffer_pool; int n; + intel_gt_flush_buffer_pool(gt); + for (n = 0; n < ARRAY_SIZE(pool->cache_list); n++) GEM_BUG_ON(!list_empty(&pool->cache_list[n])); } diff --git a/drivers/gpu/drm/i915/gt/intel_gt_buffer_pool.h b/drivers/gpu/drm/i915/gt/intel_gt_buffer_pool.h new file mode 100644 index 000000000000..42cbac003e8a --- /dev/null +++ b/drivers/gpu/drm/i915/gt/intel_gt_buffer_pool.h @@ -0,0 +1,37 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2014-2018 Intel Corporation + */ + +#ifndef INTEL_GT_BUFFER_POOL_H +#define INTEL_GT_BUFFER_POOL_H + +#include + +#include "i915_active.h" +#include "intel_gt_buffer_pool_types.h" + +struct intel_gt; +struct i915_request; + +struct intel_gt_buffer_pool_node * +intel_gt_get_buffer_pool(struct intel_gt *gt, size_t size); + +static inline int +intel_gt_buffer_pool_mark_active(struct intel_gt_buffer_pool_node *node, + struct i915_request *rq) +{ + return i915_active_add_request(&node->active, rq); +} + +static inline void +intel_gt_buffer_pool_put(struct intel_gt_buffer_pool_node *node) +{ + i915_active_release(&node->active); +} + +void intel_gt_init_buffer_pool(struct intel_gt *gt); +void intel_gt_flush_buffer_pool(struct intel_gt *gt); +void intel_gt_fini_buffer_pool(struct intel_gt *gt); + +#endif /* INTEL_GT_BUFFER_POOL_H */ diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pool_types.h b/drivers/gpu/drm/i915/gt/intel_gt_buffer_pool_types.h similarity index 54% rename from drivers/gpu/drm/i915/gt/intel_engine_pool_types.h rename to drivers/gpu/drm/i915/gt/intel_gt_buffer_pool_types.h index e31ee361b76f..e28bdda771ed 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_pool_types.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_buffer_pool_types.h @@ -4,26 +4,29 @@ * Copyright © 2014-2018 Intel Corporation */ -#ifndef INTEL_ENGINE_POOL_TYPES_H -#define INTEL_ENGINE_POOL_TYPES_H +#ifndef INTEL_GT_BUFFER_POOL_TYPES_H +#define INTEL_GT_BUFFER_POOL_TYPES_H #include #include +#include #include "i915_active_types.h" struct drm_i915_gem_object; -struct intel_engine_pool { +struct intel_gt_buffer_pool { spinlock_t lock; struct list_head cache_list[4]; + struct delayed_work work; }; -struct intel_engine_pool_node { +struct intel_gt_buffer_pool_node { struct i915_active active; struct drm_i915_gem_object *obj; struct list_head link; - struct intel_engine_pool *pool; + struct intel_gt_buffer_pool *pool; + unsigned long age; }; -#endif /* INTEL_ENGINE_POOL_TYPES_H */ +#endif /* INTEL_GT_BUFFER_POOL_TYPES_H */ diff --git a/drivers/gpu/drm/i915/gt/intel_gt_types.h b/drivers/gpu/drm/i915/gt/intel_gt_types.h index d02ccb735e24..0cc1d6b185dc 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_types.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_types.h @@ -17,6 +17,7 @@ #include "i915_vma.h" #include "intel_engine_types.h" +#include "intel_gt_buffer_pool_types.h" #include "intel_llc_types.h" #include "intel_reset_types.h" #include "intel_rc6_types.h" @@ -97,6 +98,16 @@ struct intel_gt { */ struct i915_address_space *vm; + /* + * A pool of objects to use as shadow copies of client batch buffers + * when the command parser is enabled. Prevents the client from + * modifying the batch contents after software parsing. + * + * Buffers older than 1s are periodically reaped from the pool, + * or may be reclaimed by the shrinker before then. + */ + struct intel_gt_buffer_pool buffer_pool; + struct i915_vma *scratch; }; diff --git a/drivers/gpu/drm/i915/gt/mock_engine.c b/drivers/gpu/drm/i915/gt/mock_engine.c index 4a53ded7c2dd..b8dd3cbc8696 100644 --- a/drivers/gpu/drm/i915/gt/mock_engine.c +++ b/drivers/gpu/drm/i915/gt/mock_engine.c @@ -28,7 +28,6 @@ #include "i915_drv.h" #include "intel_context.h" #include "intel_engine_pm.h" -#include "intel_engine_pool.h" #include "mock_engine.h" #include "selftests/mock_request.h" @@ -328,7 +327,6 @@ int mock_engine_init(struct intel_engine_cs *engine) intel_engine_init_execlists(engine); intel_engine_init__pm(engine); intel_engine_init_retire(engine); - intel_engine_pool_init(&engine->pool); ce = create_kernel_context(engine); if (IS_ERR(ce)) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 23857a7512b7..c01837a16a4e 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -32,6 +32,7 @@ #include #include "gem/i915_gem_context.h" +#include "gt/intel_gt_buffer_pool.h" #include "gt/intel_gt_clock_utils.h" #include "gt/intel_gt_pm.h" #include "gt/intel_gt_requests.h" @@ -1484,6 +1485,9 @@ gt_drop_caches(struct intel_gt *gt, u64 val) if (val & DROP_RESET_ACTIVE && intel_gt_terminally_wedged(gt)) intel_gt_handle_error(gt, ALL_ENGINES, 0, NULL); + if (val & DROP_FREED) + intel_gt_flush_buffer_pool(gt); + return 0; } From 3b55cdeb8f1b71444da866fd2568b1a18ba7f9c3 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 30 Apr 2020 19:33:24 +0100 Subject: [PATCH 02/97] drm/i915/pmu: Keep a reference to module while active While a perf event is open, keep a reference to the module so we don't remove the driver internals mid-sampling. Testcase: igt/perf_pmu/module-unload Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Cc: stable@vger.kernel.org Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20200430183324.23984-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_pmu.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c index 83c6a8ccd2cb..e991a707bdb7 100644 --- a/drivers/gpu/drm/i915/i915_pmu.c +++ b/drivers/gpu/drm/i915/i915_pmu.c @@ -442,6 +442,7 @@ static u64 count_interrupts(struct drm_i915_private *i915) static void i915_pmu_event_destroy(struct perf_event *event) { WARN_ON(event->parent); + module_put(THIS_MODULE); } static int @@ -533,8 +534,10 @@ static int i915_pmu_event_init(struct perf_event *event) if (ret) return ret; - if (!event->parent) + if (!event->parent) { + __module_get(THIS_MODULE); event->destroy = i915_pmu_event_destroy; + } return 0; } From a211da9c771bf97395a3ced83a3aa383372b13a7 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 1 May 2020 13:22:49 +0100 Subject: [PATCH 03/97] drm/i915/gt: Make timeslicing an explicit engine property In order to allow userspace to rely on timeslicing to reorder their batches, we must support preemption of those user batches. Declare timeslicing as an explicit property that is a combination of having the kernel support and HW support. Suggested-by: Tvrtko Ursulin Fixes: 8ee36e048c98 ("drm/i915/execlists: Minimalistic timeslicing") Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20200501122249.12417-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gt/intel_engine.h | 9 --------- drivers/gpu/drm/i915/gt/intel_engine_types.h | 18 ++++++++++++++---- drivers/gpu/drm/i915/gt/intel_lrc.c | 5 ++++- 3 files changed, 18 insertions(+), 14 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_engine.h b/drivers/gpu/drm/i915/gt/intel_engine.h index d10e52ff059f..19d0b8830905 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine.h +++ b/drivers/gpu/drm/i915/gt/intel_engine.h @@ -332,13 +332,4 @@ intel_engine_has_preempt_reset(const struct intel_engine_cs *engine) return intel_engine_has_preemption(engine); } -static inline bool -intel_engine_has_timeslices(const struct intel_engine_cs *engine) -{ - if (!IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION)) - return false; - - return intel_engine_has_semaphores(engine); -} - #endif /* _INTEL_RINGBUFFER_H_ */ diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h index 3c3225c0332f..6c676774dcd9 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h @@ -492,10 +492,11 @@ struct intel_engine_cs { #define I915_ENGINE_SUPPORTS_STATS BIT(1) #define I915_ENGINE_HAS_PREEMPTION BIT(2) #define I915_ENGINE_HAS_SEMAPHORES BIT(3) -#define I915_ENGINE_NEEDS_BREADCRUMB_TASKLET BIT(4) -#define I915_ENGINE_IS_VIRTUAL BIT(5) -#define I915_ENGINE_HAS_RELATIVE_MMIO BIT(6) -#define I915_ENGINE_REQUIRES_CMD_PARSER BIT(7) +#define I915_ENGINE_HAS_TIMESLICES BIT(4) +#define I915_ENGINE_NEEDS_BREADCRUMB_TASKLET BIT(5) +#define I915_ENGINE_IS_VIRTUAL BIT(6) +#define I915_ENGINE_HAS_RELATIVE_MMIO BIT(7) +#define I915_ENGINE_REQUIRES_CMD_PARSER BIT(8) unsigned int flags; /* @@ -593,6 +594,15 @@ intel_engine_has_semaphores(const struct intel_engine_cs *engine) return engine->flags & I915_ENGINE_HAS_SEMAPHORES; } +static inline bool +intel_engine_has_timeslices(const struct intel_engine_cs *engine) +{ + if (!IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION)) + return false; + + return engine->flags & I915_ENGINE_HAS_TIMESLICES; +} + static inline bool intel_engine_needs_breadcrumb_tasklet(const struct intel_engine_cs *engine) { diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index 4311b12542fb..d4ef344657b0 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -4801,8 +4801,11 @@ void intel_execlists_set_default_submission(struct intel_engine_cs *engine) engine->flags |= I915_ENGINE_SUPPORTS_STATS; if (!intel_vgpu_active(engine->i915)) { engine->flags |= I915_ENGINE_HAS_SEMAPHORES; - if (HAS_LOGICAL_RING_PREEMPTION(engine->i915)) + if (HAS_LOGICAL_RING_PREEMPTION(engine->i915)) { engine->flags |= I915_ENGINE_HAS_PREEMPTION; + if (IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION)) + engine->flags |= I915_ENGINE_HAS_TIMESLICES; + } } if (INTEL_GEN(engine->i915) >= 12) From 9f909e215fea0652023b9ed09d3d7bfe10386423 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 1 May 2020 15:51:20 +0100 Subject: [PATCH 04/97] drm/i915: Implement vm_ops->access for gdb access into mmaps gdb uses ptrace() to peek and poke bytes of the target's address space. The driver must implement an vm_ops->access() handler or else gdb will be unable to inspect the pointer and report it as out-of-bounds. Worse than useless as it causes immediate suspicion of the valid GTT pointer, distracting the poor programmer trying to find his bug. v2: Write-protect readonly objects (Matthew). Testcase: igt/gem_mmap_gtt/ptrace Testcase: igt/gem_mmap_offset/ptrace Suggested-by: Kristian H. Kristensen Signed-off-by: Chris Wilson Cc: Matthew Auld Cc: Joonas Lahtinen Cc: Maciej Patelczyk Cc: Kristian H. Kristensen Reviewed-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20200501145120.18830-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gem/i915_gem_mman.c | 34 +++++ .../drm/i915/gem/selftests/i915_gem_mman.c | 124 ++++++++++++++++++ 2 files changed, 158 insertions(+) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/i915_gem_mman.c index b39c24dae64e..70f5f82da288 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_mman.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_mman.c @@ -396,6 +396,38 @@ static vm_fault_t vm_fault_gtt(struct vm_fault *vmf) return i915_error_to_vmf_fault(ret); } +static int +vm_access(struct vm_area_struct *area, unsigned long addr, + void *buf, int len, int write) +{ + struct i915_mmap_offset *mmo = area->vm_private_data; + struct drm_i915_gem_object *obj = mmo->obj; + void *vaddr; + + if (i915_gem_object_is_readonly(obj) && write) + return -EACCES; + + addr -= area->vm_start; + if (addr >= obj->base.size) + return -EINVAL; + + /* As this is primarily for debugging, let's focus on simplicity */ + vaddr = i915_gem_object_pin_map(obj, I915_MAP_FORCE_WC); + if (IS_ERR(vaddr)) + return PTR_ERR(vaddr); + + if (write) { + memcpy(vaddr + addr, buf, len); + __i915_gem_object_flush_map(obj, addr, len); + } else { + memcpy(buf, vaddr + addr, len); + } + + i915_gem_object_unpin_map(obj); + + return len; +} + void __i915_gem_object_release_mmap_gtt(struct drm_i915_gem_object *obj) { struct i915_vma *vma; @@ -745,12 +777,14 @@ static void vm_close(struct vm_area_struct *vma) static const struct vm_operations_struct vm_ops_gtt = { .fault = vm_fault_gtt, + .access = vm_access, .open = vm_open, .close = vm_close, }; static const struct vm_operations_struct vm_ops_cpu = { .fault = vm_fault_cpu, + .access = vm_access, .open = vm_open, .close = vm_close, }; diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c index ef7abcb3f4ee..9c7402ce5bf9 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c @@ -952,6 +952,129 @@ static int igt_mmap(void *arg) return 0; } +static const char *repr_mmap_type(enum i915_mmap_type type) +{ + switch (type) { + case I915_MMAP_TYPE_GTT: return "gtt"; + case I915_MMAP_TYPE_WB: return "wb"; + case I915_MMAP_TYPE_WC: return "wc"; + case I915_MMAP_TYPE_UC: return "uc"; + default: return "unknown"; + } +} + +static bool can_access(const struct drm_i915_gem_object *obj) +{ + unsigned int flags = + I915_GEM_OBJECT_HAS_STRUCT_PAGE | I915_GEM_OBJECT_HAS_IOMEM; + + return i915_gem_object_type_has(obj, flags); +} + +static int __igt_mmap_access(struct drm_i915_private *i915, + struct drm_i915_gem_object *obj, + enum i915_mmap_type type) +{ + struct i915_mmap_offset *mmo; + unsigned long __user *ptr; + unsigned long A, B; + unsigned long x, y; + unsigned long addr; + int err; + + memset(&A, 0xAA, sizeof(A)); + memset(&B, 0xBB, sizeof(B)); + + if (!can_mmap(obj, type) || !can_access(obj)) + return 0; + + mmo = mmap_offset_attach(obj, type, NULL); + if (IS_ERR(mmo)) + return PTR_ERR(mmo); + + addr = igt_mmap_node(i915, &mmo->vma_node, 0, PROT_WRITE, MAP_SHARED); + if (IS_ERR_VALUE(addr)) + return addr; + ptr = (unsigned long __user *)addr; + + err = __put_user(A, ptr); + if (err) { + pr_err("%s(%s): failed to write into user mmap\n", + obj->mm.region->name, repr_mmap_type(type)); + goto out_unmap; + } + + intel_gt_flush_ggtt_writes(&i915->gt); + + err = access_process_vm(current, addr, &x, sizeof(x), 0); + if (err != sizeof(x)) { + pr_err("%s(%s): access_process_vm() read failed\n", + obj->mm.region->name, repr_mmap_type(type)); + goto out_unmap; + } + + err = access_process_vm(current, addr, &B, sizeof(B), FOLL_WRITE); + if (err != sizeof(B)) { + pr_err("%s(%s): access_process_vm() write failed\n", + obj->mm.region->name, repr_mmap_type(type)); + goto out_unmap; + } + + intel_gt_flush_ggtt_writes(&i915->gt); + + err = __get_user(y, ptr); + if (err) { + pr_err("%s(%s): failed to read from user mmap\n", + obj->mm.region->name, repr_mmap_type(type)); + goto out_unmap; + } + + if (x != A || y != B) { + pr_err("%s(%s): failed to read/write values, found (%lx, %lx)\n", + obj->mm.region->name, repr_mmap_type(type), + x, y); + err = -EINVAL; + goto out_unmap; + } + +out_unmap: + vm_munmap(addr, obj->base.size); + return err; +} + +static int igt_mmap_access(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct intel_memory_region *mr; + enum intel_region_id id; + + for_each_memory_region(mr, i915, id) { + struct drm_i915_gem_object *obj; + int err; + + obj = i915_gem_object_create_region(mr, PAGE_SIZE, 0); + if (obj == ERR_PTR(-ENODEV)) + continue; + + if (IS_ERR(obj)) + return PTR_ERR(obj); + + err = __igt_mmap_access(i915, obj, I915_MMAP_TYPE_GTT); + if (err == 0) + err = __igt_mmap_access(i915, obj, I915_MMAP_TYPE_WB); + if (err == 0) + err = __igt_mmap_access(i915, obj, I915_MMAP_TYPE_WC); + if (err == 0) + err = __igt_mmap_access(i915, obj, I915_MMAP_TYPE_UC); + + i915_gem_object_put(obj); + if (err) + return err; + } + + return 0; +} + static int __igt_mmap_gpu(struct drm_i915_private *i915, struct drm_i915_gem_object *obj, enum i915_mmap_type type) @@ -1229,6 +1352,7 @@ int i915_gem_mman_live_selftests(struct drm_i915_private *i915) SUBTEST(igt_smoke_tiling), SUBTEST(igt_mmap_offset_exhaustion), SUBTEST(igt_mmap), + SUBTEST(igt_mmap_access), SUBTEST(igt_mmap_revoke), SUBTEST(igt_mmap_gpu), }; From 964a9b0f611ee7fedc90641bfcc2efe6ce6206aa Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 1 May 2020 20:29:43 +0100 Subject: [PATCH 05/97] drm/i915/gem: Use chained reloc batches The ring is a precious resource: we anticipate to only use a few hundred bytes for a request, and only try to reserve that before we start. If we go beyond our guess in building the request, then instead of waiting at the start of execbuf before we hold any locks or other resources, we may trigger a wait inside a critical region. One example is in using gpu relocations, where currently we emit a new MI_BB_START from the ring every time we overflow a page of relocation entries. However, instead of insert the command into the precious ring, we can chain the next page of relocation entries as MI_BB_START from the end of the previous. v2: Delay the emit_bb_start until after all the chained vma synchronisation is complete. Since the buffer pool batches are idle, this _should_ be a no-op, but one day we may some fancy async GPU bindings for new vma! v3: Use pool/batch consitently, once we start thinking in terms of the batch vma, use batch->obj. v4: Explain the magic number 4. Tvrtko spotted that we lose propagation of the error for failing to submit the relocation request; that's easier to fix up in the next patch. Testcase: igt/gem_exec_reloc/basic-many-active Signed-off-by: Chris Wilson Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20200501192945.22215-1-chris@chris-wilson.co.uk --- .../gpu/drm/i915/gem/i915_gem_execbuffer.c | 134 +++++++++++++++--- 1 file changed, 115 insertions(+), 19 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c index 414859fa2673..0411618d66a9 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c @@ -269,6 +269,7 @@ struct i915_execbuffer { bool needs_unfenced : 1; struct i915_request *rq; + struct i915_vma *rq_vma; u32 *rq_cmd; unsigned int rq_size; } reloc_cache; @@ -975,20 +976,114 @@ static inline struct i915_ggtt *cache_to_ggtt(struct reloc_cache *cache) return &i915->ggtt; } +#define RELOC_TAIL 4 + +static int reloc_gpu_chain(struct reloc_cache *cache) +{ + struct intel_gt_buffer_pool_node *pool; + struct i915_request *rq = cache->rq; + struct i915_vma *batch; + u32 *cmd; + int err; + + pool = intel_gt_get_buffer_pool(rq->engine->gt, PAGE_SIZE); + if (IS_ERR(pool)) + return PTR_ERR(pool); + + batch = i915_vma_instance(pool->obj, rq->context->vm, NULL); + if (IS_ERR(batch)) { + err = PTR_ERR(batch); + goto out_pool; + } + + err = i915_vma_pin(batch, 0, 0, PIN_USER | PIN_NONBLOCK); + if (err) + goto out_pool; + + GEM_BUG_ON(cache->rq_size + RELOC_TAIL > PAGE_SIZE / sizeof(u32)); + cmd = cache->rq_cmd + cache->rq_size; + *cmd++ = MI_ARB_CHECK; + if (cache->gen >= 8) { + *cmd++ = MI_BATCH_BUFFER_START_GEN8; + *cmd++ = lower_32_bits(batch->node.start); + *cmd++ = upper_32_bits(batch->node.start); + } else { + *cmd++ = MI_BATCH_BUFFER_START; + *cmd++ = lower_32_bits(batch->node.start); + } + i915_gem_object_flush_map(cache->rq_vma->obj); + i915_gem_object_unpin_map(cache->rq_vma->obj); + cache->rq_vma = NULL; + + err = intel_gt_buffer_pool_mark_active(pool, rq); + if (err == 0) { + i915_vma_lock(batch); + err = i915_request_await_object(rq, batch->obj, false); + if (err == 0) + err = i915_vma_move_to_active(batch, rq, 0); + i915_vma_unlock(batch); + } + i915_vma_unpin(batch); + if (err) + goto out_pool; + + cmd = i915_gem_object_pin_map(batch->obj, + cache->has_llc ? + I915_MAP_FORCE_WB : + I915_MAP_FORCE_WC); + if (IS_ERR(cmd)) { + err = PTR_ERR(cmd); + goto out_pool; + } + + /* Return with batch mapping (cmd) still pinned */ + cache->rq_cmd = cmd; + cache->rq_size = 0; + cache->rq_vma = batch; + +out_pool: + intel_gt_buffer_pool_put(pool); + return err; +} + +static unsigned int reloc_bb_flags(const struct reloc_cache *cache) +{ + return cache->gen > 5 ? 0 : I915_DISPATCH_SECURE; +} + static void reloc_gpu_flush(struct reloc_cache *cache) { - struct drm_i915_gem_object *obj = cache->rq->batch->obj; + struct i915_request *rq; + int err; - GEM_BUG_ON(cache->rq_size >= obj->base.size / sizeof(u32)); - cache->rq_cmd[cache->rq_size] = MI_BATCH_BUFFER_END; + rq = fetch_and_zero(&cache->rq); + if (!rq) + return; - __i915_gem_object_flush_map(obj, 0, sizeof(u32) * (cache->rq_size + 1)); - i915_gem_object_unpin_map(obj); + if (cache->rq_vma) { + struct drm_i915_gem_object *obj = cache->rq_vma->obj; - intel_gt_chipset_flush(cache->rq->engine->gt); + GEM_BUG_ON(cache->rq_size >= obj->base.size / sizeof(u32)); + cache->rq_cmd[cache->rq_size++] = MI_BATCH_BUFFER_END; - i915_request_add(cache->rq); - cache->rq = NULL; + __i915_gem_object_flush_map(obj, + 0, sizeof(u32) * cache->rq_size); + i915_gem_object_unpin_map(obj); + } + + err = 0; + if (rq->engine->emit_init_breadcrumb) + err = rq->engine->emit_init_breadcrumb(rq); + if (!err) + err = rq->engine->emit_bb_start(rq, + rq->batch->node.start, + PAGE_SIZE, + reloc_bb_flags(cache)); + if (err) + i915_request_set_error_once(rq, err); + + intel_gt_chipset_flush(rq->engine->gt); + i915_request_add(rq); } static void reloc_cache_reset(struct reloc_cache *cache) @@ -1237,12 +1332,6 @@ static int __reloc_gpu_alloc(struct i915_execbuffer *eb, if (err) goto err_request; - err = eb->engine->emit_bb_start(rq, - batch->node.start, PAGE_SIZE, - cache->gen > 5 ? 0 : I915_DISPATCH_SECURE); - if (err) - goto skip_request; - i915_vma_lock(batch); err = i915_request_await_object(rq, batch->obj, false); if (err == 0) @@ -1257,6 +1346,7 @@ static int __reloc_gpu_alloc(struct i915_execbuffer *eb, cache->rq = rq; cache->rq_cmd = cmd; cache->rq_size = 0; + cache->rq_vma = batch; /* Return with batch mapping (cmd) still pinned */ goto out_pool; @@ -1280,13 +1370,9 @@ static u32 *reloc_gpu(struct i915_execbuffer *eb, { struct reloc_cache *cache = &eb->reloc_cache; u32 *cmd; - - if (cache->rq_size > PAGE_SIZE/sizeof(u32) - (len + 1)) - reloc_gpu_flush(cache); + int err; if (unlikely(!cache->rq)) { - int err; - if (!intel_engine_can_store_dword(eb->engine)) return ERR_PTR(-ENODEV); @@ -1295,6 +1381,16 @@ static u32 *reloc_gpu(struct i915_execbuffer *eb, return ERR_PTR(err); } + if (unlikely(cache->rq_size + len > + PAGE_SIZE / sizeof(u32) - RELOC_TAIL)) { + err = reloc_gpu_chain(cache); + if (unlikely(err)) { + i915_request_set_error_once(cache->rq, err); + return ERR_PTR(err); + } + } + + GEM_BUG_ON(cache->rq_size + len >= PAGE_SIZE / sizeof(u32)); cmd = cache->rq_cmd + cache->rq_size; cache->rq_size += len; From 0e97fbb080553102af98296382c45e89e2ad8dbc Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 1 May 2020 20:29:44 +0100 Subject: [PATCH 06/97] drm/i915/gem: Use a single chained reloc batches for a single execbuf As we can now keep chaining together a relocation batch to process any number of relocations, we can keep building that relocation batch for all of the target vma. This avoiding emitting a new request into the ring for each target, consuming precious ring space and a potential stall. v2: Propagate the failure from submitting the relocation batch. Testcase: igt/gem_exec_reloc/basic-wide-active Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20200501192945.22215-2-chris@chris-wilson.co.uk --- .../gpu/drm/i915/gem/i915_gem_execbuffer.c | 38 ++++++++++++------- 1 file changed, 24 insertions(+), 14 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c index 0411618d66a9..3c01c4193c5f 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c @@ -268,6 +268,7 @@ struct i915_execbuffer { bool has_fence : 1; bool needs_unfenced : 1; + struct i915_vma *target; struct i915_request *rq; struct i915_vma *rq_vma; u32 *rq_cmd; @@ -1051,14 +1052,14 @@ static unsigned int reloc_bb_flags(const struct reloc_cache *cache) return cache->gen > 5 ? 0 : I915_DISPATCH_SECURE; } -static void reloc_gpu_flush(struct reloc_cache *cache) +static int reloc_gpu_flush(struct reloc_cache *cache) { struct i915_request *rq; int err; rq = fetch_and_zero(&cache->rq); if (!rq) - return; + return 0; if (cache->rq_vma) { struct drm_i915_gem_object *obj = cache->rq_vma->obj; @@ -1084,15 +1085,14 @@ static void reloc_gpu_flush(struct reloc_cache *cache) intel_gt_chipset_flush(rq->engine->gt); i915_request_add(rq); + + return err; } static void reloc_cache_reset(struct reloc_cache *cache) { void *vaddr; - if (cache->rq) - reloc_gpu_flush(cache); - if (!cache->vaddr) return; @@ -1285,7 +1285,6 @@ static int reloc_move_to_gpu(struct i915_request *rq, struct i915_vma *vma) } static int __reloc_gpu_alloc(struct i915_execbuffer *eb, - struct i915_vma *vma, unsigned int len) { struct reloc_cache *cache = &eb->reloc_cache; @@ -1308,7 +1307,7 @@ static int __reloc_gpu_alloc(struct i915_execbuffer *eb, goto out_pool; } - batch = i915_vma_instance(pool->obj, vma->vm, NULL); + batch = i915_vma_instance(pool->obj, eb->context->vm, NULL); if (IS_ERR(batch)) { err = PTR_ERR(batch); goto err_unmap; @@ -1328,10 +1327,6 @@ static int __reloc_gpu_alloc(struct i915_execbuffer *eb, if (err) goto err_request; - err = reloc_move_to_gpu(rq, vma); - if (err) - goto err_request; - i915_vma_lock(batch); err = i915_request_await_object(rq, batch->obj, false); if (err == 0) @@ -1376,11 +1371,21 @@ static u32 *reloc_gpu(struct i915_execbuffer *eb, if (!intel_engine_can_store_dword(eb->engine)) return ERR_PTR(-ENODEV); - err = __reloc_gpu_alloc(eb, vma, len); + err = __reloc_gpu_alloc(eb, len); if (unlikely(err)) return ERR_PTR(err); } + if (vma != cache->target) { + err = reloc_move_to_gpu(cache->rq, vma); + if (unlikely(err)) { + i915_request_set_error_once(cache->rq, err); + return ERR_PTR(err); + } + + cache->target = vma; + } + if (unlikely(cache->rq_size + len > PAGE_SIZE / sizeof(u32) - RELOC_TAIL)) { err = reloc_gpu_chain(cache); @@ -1692,15 +1697,20 @@ static int eb_relocate(struct i915_execbuffer *eb) /* The objects are in their final locations, apply the relocations. */ if (eb->args->flags & __EXEC_HAS_RELOC) { struct eb_vma *ev; + int flush; list_for_each_entry(ev, &eb->relocs, reloc_link) { err = eb_relocate_vma(eb, ev); if (err) - return err; + break; } + + flush = reloc_gpu_flush(&eb->reloc_cache); + if (!err) + err = flush; } - return 0; + return err; } static int eb_move_to_gpu(struct i915_execbuffer *eb) From 6f576d6277ce5e3aaf2f35b2410220019c3d3cd9 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 1 May 2020 20:29:45 +0100 Subject: [PATCH 07/97] drm/i915/gem: Try an alternate engine for relocations If at first we don't succeed, try try again. Not all engines may support the MI ops we need to perform asynchronous relocation patching, and so we end up falling back to a synchronous operation that has a liability of blocking. However, Tvrtko pointed out we don't need to use the same engine to perform the relocations as we are planning to execute the execbuf on, and so if we switch over to a working engine, we can perform the relocation asynchronously. The user execbuf will be queued after the relocations by virtue of fencing. This patch creates a new context per execbuf requiring asynchronous relocations on an unusable engines. This is perhaps a bit excessive and can be ameliorated by a small context cache, but for the moment we only need it for working around a little used engine on Sandybridge, and only if relocations are actually required to an active batch buffer. Now we just need to teach the relocation code to handle physical addressing for gen2/3, and we should then have universal support! Suggested-by: Tvrtko Ursulin Testcase: igt/gem_exec_reloc/basic-spin # snb Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20200501192945.22215-3-chris@chris-wilson.co.uk --- .../gpu/drm/i915/gem/i915_gem_execbuffer.c | 32 ++++++++++++++++--- 1 file changed, 27 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c index 3c01c4193c5f..cce7df231cb9 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c @@ -1285,6 +1285,7 @@ static int reloc_move_to_gpu(struct i915_request *rq, struct i915_vma *vma) } static int __reloc_gpu_alloc(struct i915_execbuffer *eb, + struct intel_engine_cs *engine, unsigned int len) { struct reloc_cache *cache = &eb->reloc_cache; @@ -1294,7 +1295,7 @@ static int __reloc_gpu_alloc(struct i915_execbuffer *eb, u32 *cmd; int err; - pool = intel_gt_get_buffer_pool(eb->engine->gt, PAGE_SIZE); + pool = intel_gt_get_buffer_pool(engine->gt, PAGE_SIZE); if (IS_ERR(pool)) return PTR_ERR(pool); @@ -1317,7 +1318,23 @@ static int __reloc_gpu_alloc(struct i915_execbuffer *eb, if (err) goto err_unmap; - rq = i915_request_create(eb->context); + if (engine == eb->context->engine) { + rq = i915_request_create(eb->context); + } else { + struct intel_context *ce; + + ce = intel_context_create(engine); + if (IS_ERR(ce)) { + err = PTR_ERR(rq); + goto err_unpin; + } + + i915_vm_put(ce->vm); + ce->vm = i915_vm_get(eb->context->vm); + + rq = intel_context_create_request(ce); + intel_context_put(ce); + } if (IS_ERR(rq)) { err = PTR_ERR(rq); goto err_unpin; @@ -1368,10 +1385,15 @@ static u32 *reloc_gpu(struct i915_execbuffer *eb, int err; if (unlikely(!cache->rq)) { - if (!intel_engine_can_store_dword(eb->engine)) - return ERR_PTR(-ENODEV); + struct intel_engine_cs *engine = eb->engine; - err = __reloc_gpu_alloc(eb, len); + if (!intel_engine_can_store_dword(engine)) { + engine = engine->gt->engine_class[COPY_ENGINE_CLASS][0]; + if (!engine || !intel_engine_can_store_dword(engine)) + return ERR_PTR(-ENODEV); + } + + err = __reloc_gpu_alloc(eb, engine, len); if (unlikely(err)) return ERR_PTR(err); } From 389b7f00c7396a3347743fcde5bbf0c00dd0cf71 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Sat, 2 May 2020 18:35:12 +0100 Subject: [PATCH 08/97] drm/i915/gt: Sanitize RPS interrupts upon resume Currently we clear and disable the RPS pm interrupts on module load, and presume that they remain disabled forevermore. However, the mask is cleared on suspend and so after resume they may start showing up again unexepectedly. Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/1811 Fixes: 8e99299a04bc ("drm/i915/gt: Track use of RPS interrupts in flags") Signed-off-by: Chris Wilson Cc: Andi Shyti Reviewed-by: Andi Shyti Link: https://patchwork.freedesktop.org/patch/msgid/20200502173512.32353-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gt/intel_gt_pm.c | 2 ++ drivers/gpu/drm/i915/gt/intel_rps.c | 5 ++++- drivers/gpu/drm/i915/gt/intel_rps.h | 1 + 3 files changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm.c b/drivers/gpu/drm/i915/gt/intel_gt_pm.c index 5097786f4375..e59776485457 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_pm.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_pm.c @@ -171,6 +171,8 @@ static void gt_sanitize(struct intel_gt *gt, bool force) if (engine->reset.finish) engine->reset.finish(engine); + intel_rps_sanitize(>->rps); + intel_uncore_forcewake_put(gt->uncore, FORCEWAKE_ALL); intel_runtime_pm_put(gt->uncore->rpm, wakeref); } diff --git a/drivers/gpu/drm/i915/gt/intel_rps.c b/drivers/gpu/drm/i915/gt/intel_rps.c index c682355ec79e..2f59fc6df3c2 100644 --- a/drivers/gpu/drm/i915/gt/intel_rps.c +++ b/drivers/gpu/drm/i915/gt/intel_rps.c @@ -1844,8 +1844,11 @@ void intel_rps_init(struct intel_rps *rps) if (INTEL_GEN(i915) >= 8 && INTEL_GEN(i915) < 11) rps->pm_intrmsk_mbz |= GEN8_PMINTR_DISABLE_REDIRECT_TO_GUC; +} - if (INTEL_GEN(i915) >= 6) +void intel_rps_sanitize(struct intel_rps *rps) +{ + if (INTEL_GEN(rps_to_i915(rps)) >= 6) rps_disable_interrupts(rps); } diff --git a/drivers/gpu/drm/i915/gt/intel_rps.h b/drivers/gpu/drm/i915/gt/intel_rps.h index af07fa5b7584..8d3c9d663662 100644 --- a/drivers/gpu/drm/i915/gt/intel_rps.h +++ b/drivers/gpu/drm/i915/gt/intel_rps.h @@ -13,6 +13,7 @@ struct i915_request; void intel_rps_init_early(struct intel_rps *rps); void intel_rps_init(struct intel_rps *rps); +void intel_rps_sanitize(struct intel_rps *rps); void intel_rps_driver_register(struct intel_rps *rps); void intel_rps_driver_unregister(struct intel_rps *rps); From 6983dafa310afdb416af091578019e58a773984e Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Sun, 3 May 2020 18:15:13 +0100 Subject: [PATCH 09/97] drm/i915/gem: Lazily acquire the device wakeref for freeing objects We only need the device wakeref on freeing the objects if we have to unbind the object from the global GTT, or otherwise update device information. If the objects are clean, we never need the wakeref, so avoid taking until required. Signed-off-by: Chris Wilson Cc: Janusz Krzysztofik Reviewed-by: Janusz Krzysztofik Link: https://patchwork.freedesktop.org/patch/msgid/20200503171513.18704-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gem/i915_gem_object.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c b/drivers/gpu/drm/i915/gem/i915_gem_object.c index 9d1d0131f7c2..99356c00c19e 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c @@ -162,9 +162,7 @@ static void __i915_gem_free_objects(struct drm_i915_private *i915, struct llist_node *freed) { struct drm_i915_gem_object *obj, *on; - intel_wakeref_t wakeref; - wakeref = intel_runtime_pm_get(&i915->runtime_pm); llist_for_each_entry_safe(obj, on, freed, freed) { struct i915_mmap_offset *mmo, *mn; @@ -224,7 +222,6 @@ static void __i915_gem_free_objects(struct drm_i915_private *i915, call_rcu(&obj->rcu, __i915_gem_free_object_rcu); cond_resched(); } - intel_runtime_pm_put(&i915->runtime_pm, wakeref); } void i915_gem_flush_free_objects(struct drm_i915_private *i915) From 378974f7f9754acfd5630327917c6b813495f1a9 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 4 May 2020 05:48:42 +0100 Subject: [PATCH 10/97] drm/i915: Allow some leniency in PCU reads Extend the timeout for pcode reads to 20ms as they should not be performed along critical paths, and succeeding after a short delay is better than failing entirely. References: https://gitlab.freedesktop.org/drm/intel/-/issues/1800 Signed-off-by: Chris Wilson Reviewed-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20200504044903.7626-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/intel_sideband.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_sideband.c b/drivers/gpu/drm/i915/intel_sideband.c index 14daf6af6854..d5129c1dd452 100644 --- a/drivers/gpu/drm/i915/intel_sideband.c +++ b/drivers/gpu/drm/i915/intel_sideband.c @@ -429,7 +429,7 @@ int sandybridge_pcode_read(struct drm_i915_private *i915, u32 mbox, mutex_lock(&i915->sb_lock); err = __sandybridge_pcode_rw(i915, mbox, val, val1, - 500, 0, + 500, 20, true); mutex_unlock(&i915->sb_lock); From f5b62bdbb6262de1c46205cd4167b4e90cb0d4cf Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 4 May 2020 13:51:49 +0100 Subject: [PATCH 11/97] drm/i915/gem: Specify address type for chained reloc batches It is required that a chained batch be in the same address domain as its parent, and also that must be specified in the command for earlier gen as it is not inferred from the chaining until gen6. Fixes: 964a9b0f611e ("drm/i915/gem: Use chained reloc batches") Signed-off-by: Chris Wilson Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20200504125149.4396-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c index cce7df231cb9..1c247ad0971a 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c @@ -1004,14 +1004,14 @@ static int reloc_gpu_chain(struct reloc_cache *cache) GEM_BUG_ON(cache->rq_size + RELOC_TAIL > PAGE_SIZE / sizeof(u32)); cmd = cache->rq_cmd + cache->rq_size; *cmd++ = MI_ARB_CHECK; - if (cache->gen >= 8) { + if (cache->gen >= 8) *cmd++ = MI_BATCH_BUFFER_START_GEN8; - *cmd++ = lower_32_bits(batch->node.start); - *cmd++ = upper_32_bits(batch->node.start); - } else { + else if (cache->gen >= 6) *cmd++ = MI_BATCH_BUFFER_START; - *cmd++ = lower_32_bits(batch->node.start); - } + else + *cmd++ = MI_BATCH_BUFFER_START | MI_BATCH_GTT; + *cmd++ = lower_32_bits(batch->node.start); + *cmd++ = upper_32_bits(batch->node.start); /* Always 0 for gen<8 */ i915_gem_object_flush_map(cache->rq_vma->obj); i915_gem_object_unpin_map(cache->rq_vma->obj); cache->rq_vma = NULL; From e3d291301f99ef98d71bf858478bd4a3c5525bfe Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 4 May 2020 15:06:29 +0100 Subject: [PATCH 12/97] drm/i915/gem: Implement legacy MI_STORE_DATA_IMM The older arches did not convert MI_STORE_DATA_IMM to using the GTT, but left them writing to a physical address. The notes suggest that the primary reason would be so that the writes were cache coherent, as the CPU cache uses physical tagging. As such we did not implement the legacy variant of MI_STORE_DATA_IMM and so left all the relocations synchronous -- but with a small function to convert from the vma address into the physical address, we can implement asynchronous relocs on these older arches, fixing up a few tests that require them. In order to be able to test the legacy paths, refactor the gpu relocations so that we can hook them up to a selftest. v2: Use an array of offsets not enum labels for the selftest v3: Refactor the common igt_hexdump() Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/757 Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20200504140629.28240-1-chris@chris-wilson.co.uk --- .../gpu/drm/i915/gem/i915_gem_execbuffer.c | 206 +++++++++++------- .../i915/gem/selftests/i915_gem_client_blt.c | 31 +-- .../i915/gem/selftests/i915_gem_execbuffer.c | 171 +++++++++++++++ drivers/gpu/drm/i915/gt/selftest_lrc.c | 33 +-- drivers/gpu/drm/i915/i915_selftest.h | 2 + .../drm/i915/selftests/i915_live_selftests.h | 1 + .../gpu/drm/i915/selftests/i915_selftest.c | 29 +++ 7 files changed, 337 insertions(+), 136 deletions(-) create mode 100644 drivers/gpu/drm/i915/gem/selftests/i915_gem_execbuffer.c diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c index 1c247ad0971a..966523a8503f 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c @@ -955,7 +955,7 @@ static void reloc_cache_init(struct reloc_cache *cache, cache->needs_unfenced = INTEL_INFO(i915)->unfenced_needs_alignment; cache->node.flags = 0; cache->rq = NULL; - cache->rq_size = 0; + cache->target = NULL; } static inline void *unmask_page(unsigned long p) @@ -1325,7 +1325,7 @@ static int __reloc_gpu_alloc(struct i915_execbuffer *eb, ce = intel_context_create(engine); if (IS_ERR(ce)) { - err = PTR_ERR(rq); + err = PTR_ERR(ce); goto err_unpin; } @@ -1376,6 +1376,11 @@ static int __reloc_gpu_alloc(struct i915_execbuffer *eb, return err; } +static bool reloc_can_use_engine(const struct intel_engine_cs *engine) +{ + return engine->class != VIDEO_DECODE_CLASS || !IS_GEN(engine->i915, 6); +} + static u32 *reloc_gpu(struct i915_execbuffer *eb, struct i915_vma *vma, unsigned int len) @@ -1387,9 +1392,9 @@ static u32 *reloc_gpu(struct i915_execbuffer *eb, if (unlikely(!cache->rq)) { struct intel_engine_cs *engine = eb->engine; - if (!intel_engine_can_store_dword(engine)) { + if (!reloc_can_use_engine(engine)) { engine = engine->gt->engine_class[COPY_ENGINE_CLASS][0]; - if (!engine || !intel_engine_can_store_dword(engine)) + if (!engine) return ERR_PTR(-ENODEV); } @@ -1435,91 +1440,138 @@ static inline bool use_reloc_gpu(struct i915_vma *vma) return !dma_resv_test_signaled_rcu(vma->resv, true); } +static unsigned long vma_phys_addr(struct i915_vma *vma, u32 offset) +{ + struct page *page; + unsigned long addr; + + GEM_BUG_ON(vma->pages != vma->obj->mm.pages); + + page = i915_gem_object_get_page(vma->obj, offset >> PAGE_SHIFT); + addr = PFN_PHYS(page_to_pfn(page)); + GEM_BUG_ON(overflows_type(addr, u32)); /* expected dma32 */ + + return addr + offset_in_page(offset); +} + +static bool __reloc_entry_gpu(struct i915_execbuffer *eb, + struct i915_vma *vma, + u64 offset, + u64 target_addr) +{ + const unsigned int gen = eb->reloc_cache.gen; + unsigned int len; + u32 *batch; + u64 addr; + + if (gen >= 8) + len = offset & 7 ? 8 : 5; + else if (gen >= 4) + len = 4; + else + len = 3; + + batch = reloc_gpu(eb, vma, len); + if (IS_ERR(batch)) + return false; + + addr = gen8_canonical_addr(vma->node.start + offset); + if (gen >= 8) { + if (offset & 7) { + *batch++ = MI_STORE_DWORD_IMM_GEN4; + *batch++ = lower_32_bits(addr); + *batch++ = upper_32_bits(addr); + *batch++ = lower_32_bits(target_addr); + + addr = gen8_canonical_addr(addr + 4); + + *batch++ = MI_STORE_DWORD_IMM_GEN4; + *batch++ = lower_32_bits(addr); + *batch++ = upper_32_bits(addr); + *batch++ = upper_32_bits(target_addr); + } else { + *batch++ = (MI_STORE_DWORD_IMM_GEN4 | (1 << 21)) + 1; + *batch++ = lower_32_bits(addr); + *batch++ = upper_32_bits(addr); + *batch++ = lower_32_bits(target_addr); + *batch++ = upper_32_bits(target_addr); + } + } else if (gen >= 6) { + *batch++ = MI_STORE_DWORD_IMM_GEN4; + *batch++ = 0; + *batch++ = addr; + *batch++ = target_addr; + } else if (IS_I965G(eb->i915)) { + *batch++ = MI_STORE_DWORD_IMM_GEN4; + *batch++ = 0; + *batch++ = vma_phys_addr(vma, offset); + *batch++ = target_addr; + } else if (gen >= 4) { + *batch++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; + *batch++ = 0; + *batch++ = addr; + *batch++ = target_addr; + } else if (gen >= 3 && + !(IS_I915G(eb->i915) || IS_I915GM(eb->i915))) { + *batch++ = MI_STORE_DWORD_IMM | MI_MEM_VIRTUAL; + *batch++ = addr; + *batch++ = target_addr; + } else { + *batch++ = MI_STORE_DWORD_IMM; + *batch++ = vma_phys_addr(vma, offset); + *batch++ = target_addr; + } + + return true; +} + +static bool reloc_entry_gpu(struct i915_execbuffer *eb, + struct i915_vma *vma, + u64 offset, + u64 target_addr) +{ + if (eb->reloc_cache.vaddr) + return false; + + if (!use_reloc_gpu(vma)) + return false; + + return __reloc_entry_gpu(eb, vma, offset, target_addr); +} + static u64 relocate_entry(struct i915_vma *vma, const struct drm_i915_gem_relocation_entry *reloc, struct i915_execbuffer *eb, const struct i915_vma *target) { + u64 target_addr = relocation_target(reloc, target); u64 offset = reloc->offset; - u64 target_offset = relocation_target(reloc, target); - bool wide = eb->reloc_cache.use_64bit_reloc; - void *vaddr; - if (!eb->reloc_cache.vaddr && use_reloc_gpu(vma)) { - const unsigned int gen = eb->reloc_cache.gen; - unsigned int len; - u32 *batch; - u64 addr; - - if (wide) - len = offset & 7 ? 8 : 5; - else if (gen >= 4) - len = 4; - else - len = 3; - - batch = reloc_gpu(eb, vma, len); - if (IS_ERR(batch)) - goto repeat; - - addr = gen8_canonical_addr(vma->node.start + offset); - if (wide) { - if (offset & 7) { - *batch++ = MI_STORE_DWORD_IMM_GEN4; - *batch++ = lower_32_bits(addr); - *batch++ = upper_32_bits(addr); - *batch++ = lower_32_bits(target_offset); - - addr = gen8_canonical_addr(addr + 4); - - *batch++ = MI_STORE_DWORD_IMM_GEN4; - *batch++ = lower_32_bits(addr); - *batch++ = upper_32_bits(addr); - *batch++ = upper_32_bits(target_offset); - } else { - *batch++ = (MI_STORE_DWORD_IMM_GEN4 | (1 << 21)) + 1; - *batch++ = lower_32_bits(addr); - *batch++ = upper_32_bits(addr); - *batch++ = lower_32_bits(target_offset); - *batch++ = upper_32_bits(target_offset); - } - } else if (gen >= 6) { - *batch++ = MI_STORE_DWORD_IMM_GEN4; - *batch++ = 0; - *batch++ = addr; - *batch++ = target_offset; - } else if (gen >= 4) { - *batch++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; - *batch++ = 0; - *batch++ = addr; - *batch++ = target_offset; - } else { - *batch++ = MI_STORE_DWORD_IMM | MI_MEM_VIRTUAL; - *batch++ = addr; - *batch++ = target_offset; - } - - goto out; - } + if (!reloc_entry_gpu(eb, vma, offset, target_addr)) { + bool wide = eb->reloc_cache.use_64bit_reloc; + void *vaddr; repeat: - vaddr = reloc_vaddr(vma->obj, &eb->reloc_cache, offset >> PAGE_SHIFT); - if (IS_ERR(vaddr)) - return PTR_ERR(vaddr); + vaddr = reloc_vaddr(vma->obj, + &eb->reloc_cache, + offset >> PAGE_SHIFT); + if (IS_ERR(vaddr)) + return PTR_ERR(vaddr); - clflush_write32(vaddr + offset_in_page(offset), - lower_32_bits(target_offset), - eb->reloc_cache.vaddr); + GEM_BUG_ON(!IS_ALIGNED(offset, sizeof(u32))); + clflush_write32(vaddr + offset_in_page(offset), + lower_32_bits(target_addr), + eb->reloc_cache.vaddr); - if (wide) { - offset += sizeof(u32); - target_offset >>= 32; - wide = false; - goto repeat; + if (wide) { + offset += sizeof(u32); + target_addr >>= 32; + wide = false; + goto repeat; + } } -out: return target->node.start | UPDATE; } @@ -3022,3 +3074,7 @@ end:; kvfree(exec2_list); return err; } + +#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) +#include "selftests/i915_gem_execbuffer.c" +#endif diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c index be268511cb6d..8fe3ad2ee34e 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c @@ -302,35 +302,6 @@ static void fill_scratch(struct tiled_blits *t, u32 *vaddr, u32 val) i915_gem_object_flush_map(t->scratch.vma->obj); } -static void hexdump(const void *buf, size_t len) -{ - const size_t rowsize = 8 * sizeof(u32); - const void *prev = NULL; - bool skip = false; - size_t pos; - - for (pos = 0; pos < len; pos += rowsize) { - char line[128]; - - if (prev && !memcmp(prev, buf + pos, rowsize)) { - if (!skip) { - pr_info("*\n"); - skip = true; - } - continue; - } - - WARN_ON_ONCE(hex_dump_to_buffer(buf + pos, len - pos, - rowsize, sizeof(u32), - line, sizeof(line), - false) >= sizeof(line)); - pr_info("[%04zx] %s\n", pos, line); - - prev = buf + pos; - skip = false; - } -} - static u64 swizzle_bit(unsigned int bit, u64 offset) { return (offset & BIT_ULL(bit)) >> (bit - 6); @@ -426,7 +397,7 @@ static int verify_buffer(const struct tiled_blits *t, pr_err("Invalid %s tiling detected at (%d, %d), start_val %x\n", repr_tiling(buf->tiling), x, y, buf->start_val); - hexdump(vaddr, 4096); + igt_hexdump(vaddr, 4096); } i915_gem_object_unpin_map(buf->vma->obj); diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_execbuffer.c new file mode 100644 index 000000000000..a49016f8ee0d --- /dev/null +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_execbuffer.c @@ -0,0 +1,171 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2020 Intel Corporation + */ + +#include "i915_selftest.h" + +#include "gt/intel_engine_pm.h" +#include "selftests/igt_flush_test.h" + +static u64 read_reloc(const u32 *map, int x, const u64 mask) +{ + u64 reloc; + + memcpy(&reloc, &map[x], sizeof(reloc)); + return reloc & mask; +} + +static int __igt_gpu_reloc(struct i915_execbuffer *eb, + struct drm_i915_gem_object *obj) +{ + const unsigned int offsets[] = { 8, 3, 0 }; + const u64 mask = + GENMASK_ULL(eb->reloc_cache.use_64bit_reloc ? 63 : 31, 0); + const u32 *map = page_mask_bits(obj->mm.mapping); + struct i915_request *rq; + struct i915_vma *vma; + int err; + int i; + + vma = i915_vma_instance(obj, eb->context->vm, NULL); + if (IS_ERR(vma)) + return PTR_ERR(vma); + + err = i915_vma_pin(vma, 0, 0, PIN_USER | PIN_HIGH); + if (err) + return err; + + /* 8-Byte aligned */ + if (!__reloc_entry_gpu(eb, vma, + offsets[0] * sizeof(u32), + 0)) { + err = -EIO; + goto unpin_vma; + } + + /* !8-Byte aligned */ + if (!__reloc_entry_gpu(eb, vma, + offsets[1] * sizeof(u32), + 1)) { + err = -EIO; + goto unpin_vma; + } + + /* Skip to the end of the cmd page */ + i = PAGE_SIZE / sizeof(u32) - RELOC_TAIL - 1; + i -= eb->reloc_cache.rq_size; + memset32(eb->reloc_cache.rq_cmd + eb->reloc_cache.rq_size, + MI_NOOP, i); + eb->reloc_cache.rq_size += i; + + /* Force batch chaining */ + if (!__reloc_entry_gpu(eb, vma, + offsets[2] * sizeof(u32), + 2)) { + err = -EIO; + goto unpin_vma; + } + + GEM_BUG_ON(!eb->reloc_cache.rq); + rq = i915_request_get(eb->reloc_cache.rq); + err = reloc_gpu_flush(&eb->reloc_cache); + if (err) + goto put_rq; + GEM_BUG_ON(eb->reloc_cache.rq); + + err = i915_gem_object_wait(obj, I915_WAIT_INTERRUPTIBLE, HZ / 2); + if (err) { + intel_gt_set_wedged(eb->engine->gt); + goto put_rq; + } + + if (!i915_request_completed(rq)) { + pr_err("%s: did not wait for relocations!\n", eb->engine->name); + err = -EINVAL; + goto put_rq; + } + + for (i = 0; i < ARRAY_SIZE(offsets); i++) { + u64 reloc = read_reloc(map, offsets[i], mask); + + if (reloc != i) { + pr_err("%s[%d]: map[%d] %llx != %x\n", + eb->engine->name, i, offsets[i], reloc, i); + err = -EINVAL; + } + } + if (err) + igt_hexdump(map, 4096); + +put_rq: + i915_request_put(rq); +unpin_vma: + i915_vma_unpin(vma); + return err; +} + +static int igt_gpu_reloc(void *arg) +{ + struct i915_execbuffer eb; + struct drm_i915_gem_object *scratch; + int err = 0; + u32 *map; + + eb.i915 = arg; + + scratch = i915_gem_object_create_internal(eb.i915, 4096); + if (IS_ERR(scratch)) + return PTR_ERR(scratch); + + map = i915_gem_object_pin_map(scratch, I915_MAP_WC); + if (IS_ERR(map)) { + err = PTR_ERR(map); + goto err_scratch; + } + + for_each_uabi_engine(eb.engine, eb.i915) { + reloc_cache_init(&eb.reloc_cache, eb.i915); + memset(map, POISON_INUSE, 4096); + + intel_engine_pm_get(eb.engine); + eb.context = intel_context_create(eb.engine); + if (IS_ERR(eb.context)) { + err = PTR_ERR(eb.context); + goto err_pm; + } + + err = intel_context_pin(eb.context); + if (err) + goto err_put; + + err = __igt_gpu_reloc(&eb, scratch); + + intel_context_unpin(eb.context); +err_put: + intel_context_put(eb.context); +err_pm: + intel_engine_pm_put(eb.engine); + if (err) + break; + } + + if (igt_flush_test(eb.i915)) + err = -EIO; + +err_scratch: + i915_gem_object_put(scratch); + return err; +} + +int i915_gem_execbuffer_live_selftests(struct drm_i915_private *i915) +{ + static const struct i915_subtest tests[] = { + SUBTEST(igt_gpu_reloc), + }; + + if (intel_gt_is_wedged(&i915->gt)) + return 0; + + return i915_live_subtests(tests, i915); +} diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c index 7529df92f6a2..3ced73533f6b 100644 --- a/drivers/gpu/drm/i915/gt/selftest_lrc.c +++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c @@ -4342,35 +4342,6 @@ int intel_execlists_live_selftests(struct drm_i915_private *i915) return intel_gt_live_subtests(tests, &i915->gt); } -static void hexdump(const void *buf, size_t len) -{ - const size_t rowsize = 8 * sizeof(u32); - const void *prev = NULL; - bool skip = false; - size_t pos; - - for (pos = 0; pos < len; pos += rowsize) { - char line[128]; - - if (prev && !memcmp(prev, buf + pos, rowsize)) { - if (!skip) { - pr_info("*\n"); - skip = true; - } - continue; - } - - WARN_ON_ONCE(hex_dump_to_buffer(buf + pos, len - pos, - rowsize, sizeof(u32), - line, sizeof(line), - false) >= sizeof(line)); - pr_info("[%04zx] %s\n", pos, line); - - prev = buf + pos; - skip = false; - } -} - static int emit_semaphore_signal(struct intel_context *ce, void *slot) { const u32 offset = @@ -4518,10 +4489,10 @@ static int live_lrc_layout(void *arg) if (err) { pr_info("%s: HW register image:\n", engine->name); - hexdump(hw, PAGE_SIZE); + igt_hexdump(hw, PAGE_SIZE); pr_info("%s: SW register image:\n", engine->name); - hexdump(lrc, PAGE_SIZE); + igt_hexdump(lrc, PAGE_SIZE); } shmem_unpin_map(engine->default_state, hw); diff --git a/drivers/gpu/drm/i915/i915_selftest.h b/drivers/gpu/drm/i915/i915_selftest.h index 98bcb6fa0ab4..d53d207ab6eb 100644 --- a/drivers/gpu/drm/i915/i915_selftest.h +++ b/drivers/gpu/drm/i915/i915_selftest.h @@ -133,4 +133,6 @@ bool __igt_timeout(unsigned long timeout, const char *fmt, ...); #define igt_timeout(t, fmt, ...) \ __igt_timeout((t), KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__) +void igt_hexdump(const void *buf, size_t len); + #endif /* !__I915_SELFTEST_H__ */ diff --git a/drivers/gpu/drm/i915/selftests/i915_live_selftests.h b/drivers/gpu/drm/i915/selftests/i915_live_selftests.h index 0a953bfc0585..5dd5d81646c4 100644 --- a/drivers/gpu/drm/i915/selftests/i915_live_selftests.h +++ b/drivers/gpu/drm/i915/selftests/i915_live_selftests.h @@ -37,6 +37,7 @@ selftest(gem, i915_gem_live_selftests) selftest(evict, i915_gem_evict_live_selftests) selftest(hugepages, i915_gem_huge_page_live_selftests) selftest(gem_contexts, i915_gem_context_live_selftests) +selftest(gem_execbuf, i915_gem_execbuffer_live_selftests) selftest(blt, i915_gem_object_blt_live_selftests) selftest(client, i915_gem_client_blt_live_selftests) selftest(reset, intel_reset_live_selftests) diff --git a/drivers/gpu/drm/i915/selftests/i915_selftest.c b/drivers/gpu/drm/i915/selftests/i915_selftest.c index d3bf9eefb682..1bc11c09faef 100644 --- a/drivers/gpu/drm/i915/selftests/i915_selftest.c +++ b/drivers/gpu/drm/i915/selftests/i915_selftest.c @@ -396,6 +396,35 @@ bool __igt_timeout(unsigned long timeout, const char *fmt, ...) return true; } +void igt_hexdump(const void *buf, size_t len) +{ + const size_t rowsize = 8 * sizeof(u32); + const void *prev = NULL; + bool skip = false; + size_t pos; + + for (pos = 0; pos < len; pos += rowsize) { + char line[128]; + + if (prev && !memcmp(prev, buf + pos, rowsize)) { + if (!skip) { + pr_info("*\n"); + skip = true; + } + continue; + } + + WARN_ON_ONCE(hex_dump_to_buffer(buf + pos, len - pos, + rowsize, sizeof(u32), + line, sizeof(line), + false) >= sizeof(line)); + pr_info("[%04zx] %s\n", pos, line); + + prev = buf + pos; + skip = false; + } +} + module_param_named(st_random_seed, i915_selftest.random_seed, uint, 0400); module_param_named(st_timeout, i915_selftest.timeout_ms, uint, 0400); module_param_named(st_filter, i915_selftest.filter, charp, 0400); From 9728889f42b9ba078f86cd11535a89df29e93b33 Mon Sep 17 00:00:00 2001 From: Stanislav Lisovskiy Date: Thu, 30 Apr 2020 22:17:57 +0300 Subject: [PATCH 13/97] drm/i915: Use bw state for per crtc SAGV evaluation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Future platforms require per-crtc SAGV evaluation and serializing global state when those are changed from different commits. v2: - Add has_sagv check to intel_crtc_can_enable_sagv so that it sets bit in reject mask. - Use bw_state in intel_pre/post_plane_enable_sagv instead of atomic state v3: - Fixed rebase conflict, now using intel_atomic_crtc_state_for_each_plane_state in order to call it from atomic check v4: - Use fb modifier from plane state v5: - Make intel_has_sagv static again(Ville) - Removed unnecessary NULL assignments(Ville) - Removed unnecessary SAGV debug(Ville) - Call intel_compute_sagv_mask only for modesets(Ville) - Serialize global state only if sagv results change, but not mask itself(Ville) v6: - use lock global state instead of serialize(Ville) v7: - use both global state lock and serialize depending on if we need to change only global state or access hw (Ville) Signed-off-by: Stanislav Lisovskiy Cc: Ville Syrjälä Cc: James Ausmus Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20200430191757.18206-1-stanislav.lisovskiy@intel.com --- drivers/gpu/drm/i915/display/intel_bw.h | 6 ++ drivers/gpu/drm/i915/intel_pm.c | 117 ++++++++++++++++++------ drivers/gpu/drm/i915/intel_pm.h | 3 +- 3 files changed, 97 insertions(+), 29 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_bw.h b/drivers/gpu/drm/i915/display/intel_bw.h index ac004d6f4276..d6df91058223 100644 --- a/drivers/gpu/drm/i915/display/intel_bw.h +++ b/drivers/gpu/drm/i915/display/intel_bw.h @@ -18,6 +18,12 @@ struct intel_crtc_state; struct intel_bw_state { struct intel_global_state base; + /* + * Contains a bit mask, used to determine, whether correspondent + * pipe allows SAGV or not. + */ + u8 pipe_sagv_reject; + unsigned int data_rate[I915_MAX_PIPES]; u8 num_active_planes[I915_MAX_PIPES]; }; diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index bfb180fe8047..29c37e4a666c 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -43,6 +43,7 @@ #include "i915_fixed.h" #include "i915_irq.h" #include "i915_trace.h" +#include "display/intel_bw.h" #include "intel_pm.h" #include "intel_sideband.h" #include "../../../platform/x86/intel_ips.h" @@ -3760,34 +3761,75 @@ intel_disable_sagv(struct drm_i915_private *dev_priv) void intel_sagv_pre_plane_update(struct intel_atomic_state *state) { struct drm_i915_private *dev_priv = to_i915(state->base.dev); + const struct intel_bw_state *new_bw_state; - if (!intel_can_enable_sagv(state)) + /* + * Just return if we can't control SAGV or don't have it. + * This is different from situation when we have SAGV but just can't + * afford it due to DBuf limitation - in case if SAGV is completely + * disabled in a BIOS, we are not even allowed to send a PCode request, + * as it will throw an error. So have to check it here. + */ + if (!intel_has_sagv(dev_priv)) + return; + + new_bw_state = intel_atomic_get_new_bw_state(state); + if (!new_bw_state) + return; + + if (!intel_can_enable_sagv(new_bw_state)) intel_disable_sagv(dev_priv); } void intel_sagv_post_plane_update(struct intel_atomic_state *state) { struct drm_i915_private *dev_priv = to_i915(state->base.dev); + const struct intel_bw_state *new_bw_state; - if (intel_can_enable_sagv(state)) + /* + * Just return if we can't control SAGV or don't have it. + * This is different from situation when we have SAGV but just can't + * afford it due to DBuf limitation - in case if SAGV is completely + * disabled in a BIOS, we are not even allowed to send a PCode request, + * as it will throw an error. So have to check it here. + */ + if (!intel_has_sagv(dev_priv)) + return; + + new_bw_state = intel_atomic_get_new_bw_state(state); + if (!new_bw_state) + return; + + if (intel_can_enable_sagv(new_bw_state)) intel_enable_sagv(dev_priv); } static bool intel_crtc_can_enable_sagv(const struct intel_crtc_state *crtc_state) { - struct drm_device *dev = crtc_state->uapi.crtc->dev; - struct drm_i915_private *dev_priv = to_i915(dev); + struct intel_atomic_state *state = to_intel_atomic_state(crtc_state->uapi.state); struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); + struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); struct intel_plane *plane; + const struct intel_plane_state *plane_state; int level, latency; + if (!intel_has_sagv(dev_priv)) + return false; + if (!crtc_state->hw.active) return true; + /* + * SKL+ workaround: bspec recommends we disable SAGV when we have + * more then one pipe enabled + */ + if (hweight8(state->active_pipes) > 1) + return false; + if (crtc_state->hw.adjusted_mode.flags & DRM_MODE_FLAG_INTERLACE) return false; - for_each_intel_plane_on_crtc(dev, crtc, plane) { + intel_atomic_crtc_state_for_each_plane_state(plane, plane_state, crtc_state) { const struct skl_plane_wm *wm = &crtc_state->wm.skl.optimal.planes[plane->id]; @@ -3803,7 +3845,7 @@ static bool intel_crtc_can_enable_sagv(const struct intel_crtc_state *crtc_state latency = dev_priv->wm.skl_latency[level]; if (skl_needs_memory_bw_wa(dev_priv) && - plane->base.state->fb->modifier == + plane_state->uapi.fb->modifier == I915_FORMAT_MOD_X_TILED) latency += 15; @@ -3819,35 +3861,48 @@ static bool intel_crtc_can_enable_sagv(const struct intel_crtc_state *crtc_state return true; } -bool intel_can_enable_sagv(struct intel_atomic_state *state) +bool intel_can_enable_sagv(const struct intel_bw_state *bw_state) { - struct drm_i915_private *dev_priv = to_i915(state->base.dev); + return bw_state->pipe_sagv_reject == 0; +} + +static int intel_compute_sagv_mask(struct intel_atomic_state *state) +{ + int ret; struct intel_crtc *crtc; - const struct intel_crtc_state *crtc_state; - enum pipe pipe; + struct intel_crtc_state *new_crtc_state; + struct intel_bw_state *new_bw_state = NULL; + const struct intel_bw_state *old_bw_state = NULL; + int i; - if (!intel_has_sagv(dev_priv)) - return false; + for_each_new_intel_crtc_in_state(state, crtc, + new_crtc_state, i) { + new_bw_state = intel_atomic_get_bw_state(state); + if (IS_ERR(new_bw_state)) + return PTR_ERR(new_bw_state); - /* - * If there are no active CRTCs, no additional checks need be performed - */ - if (hweight8(state->active_pipes) == 0) - return true; + old_bw_state = intel_atomic_get_old_bw_state(state); - /* - * SKL+ workaround: bspec recommends we disable SAGV when we have - * more then one pipe enabled - */ - if (hweight8(state->active_pipes) > 1) - return false; + if (intel_crtc_can_enable_sagv(new_crtc_state)) + new_bw_state->pipe_sagv_reject &= ~BIT(crtc->pipe); + else + new_bw_state->pipe_sagv_reject |= BIT(crtc->pipe); + } - /* Since we're now guaranteed to only have one active CRTC... */ - pipe = ffs(state->active_pipes) - 1; - crtc = intel_get_crtc_for_pipe(dev_priv, pipe); - crtc_state = to_intel_crtc_state(crtc->base.state); + if (!new_bw_state) + return 0; - return intel_crtc_can_enable_sagv(crtc_state); + if (intel_can_enable_sagv(new_bw_state) != intel_can_enable_sagv(old_bw_state)) { + ret = intel_atomic_serialize_global_state(&new_bw_state->base); + if (ret) + return ret; + } else if (new_bw_state->pipe_sagv_reject != old_bw_state->pipe_sagv_reject) { + ret = intel_atomic_lock_global_state(&new_bw_state->base); + if (ret) + return ret; + } + + return 0; } /* @@ -5811,6 +5866,12 @@ skl_compute_wm(struct intel_atomic_state *state) if (ret) return ret; + if (state->modeset) { + ret = intel_compute_sagv_mask(state); + if (ret) + return ret; + } + /* * skl_compute_ddb() will have adjusted the final watermarks * based on how much ddb is available. Now we can actually diff --git a/drivers/gpu/drm/i915/intel_pm.h b/drivers/gpu/drm/i915/intel_pm.h index 9a6036ab0f90..fd1dc422e6c5 100644 --- a/drivers/gpu/drm/i915/intel_pm.h +++ b/drivers/gpu/drm/i915/intel_pm.h @@ -9,6 +9,7 @@ #include #include "i915_reg.h" +#include "display/intel_bw.h" struct drm_device; struct drm_i915_private; @@ -41,7 +42,7 @@ void skl_pipe_wm_get_hw_state(struct intel_crtc *crtc, struct skl_pipe_wm *out); void g4x_wm_sanitize(struct drm_i915_private *dev_priv); void vlv_wm_sanitize(struct drm_i915_private *dev_priv); -bool intel_can_enable_sagv(struct intel_atomic_state *state); +bool intel_can_enable_sagv(const struct intel_bw_state *bw_state); int intel_enable_sagv(struct drm_i915_private *dev_priv); int intel_disable_sagv(struct drm_i915_private *dev_priv); void intel_sagv_pre_plane_update(struct intel_atomic_state *state); From ecab0f3d055d333640bbe2aa5a5141574a65c534 Mon Sep 17 00:00:00 2001 From: Stanislav Lisovskiy Date: Thu, 30 Apr 2020 22:56:34 +0300 Subject: [PATCH 14/97] drm/i915: Track active_pipes in bw_state MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We need to calculate SAGV mask also in a non-modeset commit, however currently active_pipes are only calculated for modesets in global atomic state, thus now we will be tracking those also in bw_state in order to be able to properly access global data. v2: - Removed pre/post plane SAGV updates from modeset(Ville) - Now tracking active pipes in intel_can_enable_sagv(Ville) v3: - lock global state if active_pipes change as well(Ville) Signed-off-by: Stanislav Lisovskiy Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20200430195634.7666-1-stanislav.lisovskiy@intel.com --- drivers/gpu/drm/i915/display/intel_bw.h | 3 +++ drivers/gpu/drm/i915/display/intel_display.c | 9 +++---- drivers/gpu/drm/i915/intel_pm.c | 27 ++++++++++---------- 3 files changed, 21 insertions(+), 18 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_bw.h b/drivers/gpu/drm/i915/display/intel_bw.h index d6df91058223..898b4a85ccab 100644 --- a/drivers/gpu/drm/i915/display/intel_bw.h +++ b/drivers/gpu/drm/i915/display/intel_bw.h @@ -26,6 +26,9 @@ struct intel_bw_state { unsigned int data_rate[I915_MAX_PIPES]; u8 num_active_planes[I915_MAX_PIPES]; + + /* bitmask of active pipes */ + u8 active_pipes; }; #define to_intel_bw_state(x) container_of((x), struct intel_bw_state, base) diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index 6bb87965801e..268d65e23472 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -15379,11 +15379,11 @@ static void intel_atomic_commit_tail(struct intel_atomic_state *state) intel_set_cdclk_pre_plane_update(state); - intel_sagv_pre_plane_update(state); - intel_modeset_verify_disabled(dev_priv, state); } + intel_sagv_pre_plane_update(state); + /* Complete the events for pipes that have now been disabled */ for_each_new_intel_crtc_in_state(state, crtc, new_crtc_state, i) { bool modeset = needs_modeset(new_crtc_state); @@ -15476,11 +15476,10 @@ static void intel_atomic_commit_tail(struct intel_atomic_state *state) intel_check_cpu_fifo_underruns(dev_priv); intel_check_pch_fifo_underruns(dev_priv); - if (state->modeset) { + if (state->modeset) intel_verify_planes(state); - intel_sagv_post_plane_update(state); - } + intel_sagv_post_plane_update(state); drm_atomic_helper_commit_hw_done(&state->base); diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 29c37e4a666c..c26f87427e18 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -3806,7 +3806,6 @@ void intel_sagv_post_plane_update(struct intel_atomic_state *state) static bool intel_crtc_can_enable_sagv(const struct intel_crtc_state *crtc_state) { - struct intel_atomic_state *state = to_intel_atomic_state(crtc_state->uapi.state); struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); struct intel_plane *plane; @@ -3819,13 +3818,6 @@ static bool intel_crtc_can_enable_sagv(const struct intel_crtc_state *crtc_state if (!crtc_state->hw.active) return true; - /* - * SKL+ workaround: bspec recommends we disable SAGV when we have - * more then one pipe enabled - */ - if (hweight8(state->active_pipes) > 1) - return false; - if (crtc_state->hw.adjusted_mode.flags & DRM_MODE_FLAG_INTERLACE) return false; @@ -3863,6 +3855,9 @@ static bool intel_crtc_can_enable_sagv(const struct intel_crtc_state *crtc_state bool intel_can_enable_sagv(const struct intel_bw_state *bw_state) { + if (bw_state->active_pipes && !is_power_of_2(bw_state->active_pipes)) + return false; + return bw_state->pipe_sagv_reject == 0; } @@ -3892,6 +3887,14 @@ static int intel_compute_sagv_mask(struct intel_atomic_state *state) if (!new_bw_state) return 0; + new_bw_state->active_pipes = + intel_calc_active_pipes(state, old_bw_state->active_pipes); + if (new_bw_state->active_pipes != old_bw_state->active_pipes) { + ret = intel_atomic_lock_global_state(&new_bw_state->base); + if (ret) + return ret; + } + if (intel_can_enable_sagv(new_bw_state) != intel_can_enable_sagv(old_bw_state)) { ret = intel_atomic_serialize_global_state(&new_bw_state->base); if (ret) @@ -5866,11 +5869,9 @@ skl_compute_wm(struct intel_atomic_state *state) if (ret) return ret; - if (state->modeset) { - ret = intel_compute_sagv_mask(state); - if (ret) - return ret; - } + ret = intel_compute_sagv_mask(state); + if (ret) + return ret; /* * skl_compute_ddb() will have adjusted the final watermarks From 9ff79708c54d34a3d4d451b15f8d6e5e042f1d36 Mon Sep 17 00:00:00 2001 From: Stanislav Lisovskiy Date: Thu, 23 Apr 2020 10:59:00 +0300 Subject: [PATCH 15/97] drm/i915: Rename bw_state to new_bw_state MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit That is a preparation patch before next one where we introduce old_bw_state and a bunch of other changes as well. In a review comment it was suggested to split out at least that renaming into a separate patch, what is done here. v2: Removed spurious space Reviewed-by: Ville Syrjälä Signed-off-by: Stanislav Lisovskiy Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20200423075902.21892-8-stanislav.lisovskiy@intel.com --- drivers/gpu/drm/i915/display/intel_bw.c | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_bw.c b/drivers/gpu/drm/i915/display/intel_bw.c index 4aa54fcb0629..6e7cc3a4f1aa 100644 --- a/drivers/gpu/drm/i915/display/intel_bw.c +++ b/drivers/gpu/drm/i915/display/intel_bw.c @@ -414,7 +414,7 @@ int intel_bw_atomic_check(struct intel_atomic_state *state) { struct drm_i915_private *dev_priv = to_i915(state->base.dev); struct intel_crtc_state *new_crtc_state, *old_crtc_state; - struct intel_bw_state *bw_state = NULL; + struct intel_bw_state *new_bw_state = NULL; unsigned int data_rate, max_data_rate; unsigned int num_active_planes; struct intel_crtc *crtc; @@ -443,29 +443,29 @@ int intel_bw_atomic_check(struct intel_atomic_state *state) old_active_planes == new_active_planes) continue; - bw_state = intel_atomic_get_bw_state(state); - if (IS_ERR(bw_state)) - return PTR_ERR(bw_state); + new_bw_state = intel_atomic_get_bw_state(state); + if (IS_ERR(new_bw_state)) + return PTR_ERR(new_bw_state); - bw_state->data_rate[crtc->pipe] = new_data_rate; - bw_state->num_active_planes[crtc->pipe] = new_active_planes; + new_bw_state->data_rate[crtc->pipe] = new_data_rate; + new_bw_state->num_active_planes[crtc->pipe] = new_active_planes; drm_dbg_kms(&dev_priv->drm, "pipe %c data rate %u num active planes %u\n", pipe_name(crtc->pipe), - bw_state->data_rate[crtc->pipe], - bw_state->num_active_planes[crtc->pipe]); + new_bw_state->data_rate[crtc->pipe], + new_bw_state->num_active_planes[crtc->pipe]); } - if (!bw_state) + if (!new_bw_state) return 0; - ret = intel_atomic_lock_global_state(&bw_state->base); + ret = intel_atomic_lock_global_state(&new_bw_state->base); if (ret) return ret; - data_rate = intel_bw_data_rate(dev_priv, bw_state); - num_active_planes = intel_bw_num_active_planes(dev_priv, bw_state); + data_rate = intel_bw_data_rate(dev_priv, new_bw_state); + num_active_planes = intel_bw_num_active_planes(dev_priv, new_bw_state); max_data_rate = intel_max_data_rate(dev_priv, num_active_planes); From 25444ca6cbb9fe375aa9bba58784a735efe2a649 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Wed, 29 Apr 2020 13:10:23 +0300 Subject: [PATCH 16/97] drm/i915/fbc: Require linear fb stride to be multiple of 512 bytes on gen9/glk MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Display WA #1105 says that FBC requires PLANE_STRIDE to be a multiple of 512 bytes on gen9 and glk. This is definitely true for glk as certain tests (such as igt/kms_big_fb/linear-16bpp-rotate-0) are now failing when the display resolution results in a plane stride which is not a multiple of 512 bytes. Curiously I was not able to reproduce this on a KBL. First I suspected that our use of the FBC override stride explain this, but after trying to use the override stride on glk the test still failed. I did try both the old CHICKEN_MISC_4 way and the new FBC_STRIDE way, neither had any effect on the result. Anyways, we need this at least on glk. But let's trust the spec and apply the w/a for all gen9 as well, despite being unable to reproduce the problem. v2: s/FBC_CHICKEN/FBC_STRIDE/ in commit msg Cc: José Roberto de Souza Fixes: 691f7ba58d52 ("drm/i915/display/fbc: Make fences a nice-to-have for GEN9+") Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20200429101034.8208-2-ville.syrjala@linux.intel.com Reviewed-by: Matt Roper --- drivers/gpu/drm/i915/display/intel_fbc.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_fbc.c b/drivers/gpu/drm/i915/display/intel_fbc.c index c6afa10e814c..7194f9bc62c5 100644 --- a/drivers/gpu/drm/i915/display/intel_fbc.c +++ b/drivers/gpu/drm/i915/display/intel_fbc.c @@ -564,7 +564,7 @@ void intel_fbc_cleanup_cfb(struct drm_i915_private *dev_priv) } static bool stride_is_valid(struct drm_i915_private *dev_priv, - unsigned int stride) + u64 modifier, unsigned int stride) { /* This should have been caught earlier. */ if (drm_WARN_ON_ONCE(&dev_priv->drm, (stride & (64 - 1)) != 0)) @@ -580,6 +580,11 @@ static bool stride_is_valid(struct drm_i915_private *dev_priv, if (IS_GEN(dev_priv, 4) && !IS_G4X(dev_priv) && stride < 2048) return false; + /* Display WA #1105: skl,bxt,kbl,cfl,glk */ + if (IS_GEN(dev_priv, 9) && + modifier == DRM_FORMAT_MOD_LINEAR && stride & 511) + return false; + if (stride > 16384) return false; @@ -810,7 +815,7 @@ static bool intel_fbc_can_activate(struct intel_crtc *crtc) return false; } - if (!stride_is_valid(dev_priv, cache->fb.stride)) { + if (!stride_is_valid(dev_priv, cache->fb.modifier, cache->fb.stride)) { fbc->no_fbc_reason = "framebuffer stride not supported"; return false; } From dab3aff7b1a503819c68e38f51b3dcc0508b35cd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Thu, 30 Apr 2020 15:58:22 +0300 Subject: [PATCH 17/97] drm/i915: Remove cnl pre-prod workarounds MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Remove all the stepping dependent cnl workarounds. Bspec lists more steppings than this so presumably these are classed as pre-production. And this is cnl after all so no one should really care anyway. Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20200430125822.21985-2-ville.syrjala@linux.intel.com Reviewed-by: Chris Wilson --- drivers/gpu/drm/i915/gt/intel_rc6.c | 8 +------- drivers/gpu/drm/i915/gt/intel_workarounds.c | 17 ----------------- drivers/gpu/drm/i915/intel_pm.c | 7 ------- drivers/gpu/drm/i915/intel_wopcm.c | 3 +-- 4 files changed, 2 insertions(+), 33 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_rc6.c b/drivers/gpu/drm/i915/gt/intel_rc6.c index 1c1923ec8be7..ab675d35030d 100644 --- a/drivers/gpu/drm/i915/gt/intel_rc6.c +++ b/drivers/gpu/drm/i915/gt/intel_rc6.c @@ -113,7 +113,6 @@ static void gen9_rc6_enable(struct intel_rc6 *rc6) struct intel_uncore *uncore = rc6_to_uncore(rc6); struct intel_engine_cs *engine; enum intel_engine_id id; - u32 rc6_mode; /* 2b: Program RC6 thresholds.*/ if (INTEL_GEN(rc6_to_i915(rc6)) >= 10) { @@ -165,16 +164,11 @@ static void gen9_rc6_enable(struct intel_rc6 *rc6) /* 3a: Enable RC6 */ set(uncore, GEN6_RC6_THRESHOLD, 37500); /* 37.5/125ms per EI */ - /* WaRsUseTimeoutMode:cnl (pre-prod) */ - if (IS_CNL_REVID(rc6_to_i915(rc6), CNL_REVID_A0, CNL_REVID_C0)) - rc6_mode = GEN7_RC_CTL_TO_MODE; - else - rc6_mode = GEN6_RC_CTL_EI_MODE(1); rc6->ctl_enable = GEN6_RC_CTL_HW_ENABLE | GEN6_RC_CTL_RC6_ENABLE | - rc6_mode; + GEN6_RC_CTL_EI_MODE(1); /* * WaRsDisableCoarsePowerGating:skl,cnl diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c index adddc5c93b48..aa90e6b7a118 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c @@ -485,25 +485,14 @@ static void cfl_ctx_workarounds_init(struct intel_engine_cs *engine, static void cnl_ctx_workarounds_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) { - struct drm_i915_private *i915 = engine->i915; - /* WaForceContextSaveRestoreNonCoherent:cnl */ WA_SET_BIT_MASKED(CNL_HDC_CHICKEN0, HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT); - /* WaThrottleEUPerfToAvoidTDBackPressure:cnl(pre-prod) */ - if (IS_CNL_REVID(i915, CNL_REVID_B0, CNL_REVID_B0)) - WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, THROTTLE_12_5); - /* WaDisableReplayBufferBankArbitrationOptimization:cnl */ WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2, GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION); - /* WaDisableEnhancedSBEVertexCaching:cnl (pre-prod) */ - if (IS_CNL_REVID(i915, 0, CNL_REVID_B0)) - WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2, - GEN8_CSC2_SBE_VUE_CACHE_CONSERVATIVE); - /* WaPushConstantDereferenceHoldDisable:cnl */ WA_SET_BIT_MASKED(GEN7_ROW_CHICKEN2, PUSH_CONSTANT_DEREF_DISABLE); @@ -872,12 +861,6 @@ cnl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) { wa_init_mcr(i915, wal); - /* WaDisableI2mCycleOnWRPort:cnl (pre-prod) */ - if (IS_CNL_REVID(i915, CNL_REVID_B0, CNL_REVID_B0)) - wa_write_or(wal, - GAMT_CHKN_BIT_REG, - GAMT_CHKN_DISABLE_I2M_CYCLE_ON_WR_PORT); - /* WaInPlaceDecompressionHang:cnl */ wa_write_or(wal, GEN9_GAMT_ECO_REG_RW_IA, diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index c26f87427e18..a4830a5cd401 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -5255,10 +5255,6 @@ static void skl_compute_transition_wm(const struct intel_crtc_state *crtc_state, trans_offset_b; } else { res_blocks = wm0_sel_res_b + trans_offset_b; - - /* WA BUG:1938466 add one block for non y-tile planes */ - if (IS_CNL_REVID(dev_priv, CNL_REVID_A0, CNL_REVID_A0)) - res_blocks += 1; } /* @@ -6978,9 +6974,6 @@ static void cnl_init_clock_gating(struct drm_i915_private *dev_priv) val = I915_READ(SLICE_UNIT_LEVEL_CLKGATE); /* ReadHitWriteOnlyDisable:cnl */ val |= RCCUNIT_CLKGATE_DIS; - /* WaSarbUnitClockGatingDisable:cnl (pre-prod) */ - if (IS_CNL_REVID(dev_priv, CNL_REVID_A0, CNL_REVID_B0)) - val |= SARBUNIT_CLKGATE_DIS; I915_WRITE(SLICE_UNIT_LEVEL_CLKGATE, val); /* Wa_2201832410:cnl */ diff --git a/drivers/gpu/drm/i915/intel_wopcm.c b/drivers/gpu/drm/i915/intel_wopcm.c index 6942487c14a9..ec776591e1cf 100644 --- a/drivers/gpu/drm/i915/intel_wopcm.c +++ b/drivers/gpu/drm/i915/intel_wopcm.c @@ -149,8 +149,7 @@ static bool check_hw_restrictions(struct drm_i915_private *i915, guc_wopcm_size)) return false; - if ((IS_GEN(i915, 9) || - IS_CNL_REVID(i915, CNL_REVID_A0, CNL_REVID_A0)) && + if (IS_GEN(i915, 9) && !gen9_check_huc_fw_fits(i915, guc_wopcm_size, huc_fw_size)) return false; From d19b29be653691a179e54aafc84fc40667a63ee7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Wed, 29 Apr 2020 21:54:55 +0300 Subject: [PATCH 18/97] drm/i915: Nuke mode.vrefresh usage MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit mode.vrefresh is rounded to the nearest integer. You don't want to use it anywhere that requires precision. Also I want to nuke it. vtotal*vrefresh == 1000*clock/htotal, so let's use the latter. Cc: Anshuman Gupta Cc: Uma Shankar Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20200429185457.26235-1-ville.syrjala@linux.intel.com Reviewed-by: Chris Wilson Reviewed-by: Uma Shankar --- drivers/gpu/drm/i915/display/intel_audio.c | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_audio.c b/drivers/gpu/drm/i915/display/intel_audio.c index 36aaee8536f1..f5686e50833f 100644 --- a/drivers/gpu/drm/i915/display/intel_audio.c +++ b/drivers/gpu/drm/i915/display/intel_audio.c @@ -524,14 +524,12 @@ static unsigned int get_hblank_early_enable_config(struct intel_encoder *encoder unsigned int link_clks_available, link_clks_required; unsigned int tu_data, tu_line, link_clks_active; unsigned int hblank_rise, hblank_early_prog; - unsigned int h_active, h_total, hblank_delta, pixel_clk, v_total; - unsigned int fec_coeff, refresh_rate, cdclk, vdsc_bpp; + unsigned int h_active, h_total, hblank_delta, pixel_clk; + unsigned int fec_coeff, cdclk, vdsc_bpp; h_active = crtc_state->hw.adjusted_mode.crtc_hdisplay; h_total = crtc_state->hw.adjusted_mode.crtc_htotal; - v_total = crtc_state->hw.adjusted_mode.crtc_vtotal; pixel_clk = crtc_state->hw.adjusted_mode.crtc_clock; - refresh_rate = crtc_state->hw.adjusted_mode.vrefresh; vdsc_bpp = crtc_state->dsc.compressed_bpp; cdclk = i915->cdclk.hw.cdclk; /* fec= 0.972261, using rounding multiplier of 1000000 */ @@ -549,9 +547,7 @@ static unsigned int get_hblank_early_enable_config(struct intel_encoder *encoder link_clks_available = ((((h_total - h_active) * ((crtc_state->port_clock * ROUNDING_FACTOR) / pixel_clk)) / ROUNDING_FACTOR) - 28); - - link_clks_required = DIV_ROUND_UP(192000, (refresh_rate * - v_total)) * ((48 / + link_clks_required = DIV_ROUND_UP(192000, (1000 * pixel_clk / h_total)) * ((48 / crtc_state->lane_count) + 2); if (link_clks_available > link_clks_required) From 41ee86d6ee820604adf7fdc873062d19804ba1cd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Wed, 29 Apr 2020 21:54:56 +0300 Subject: [PATCH 19/97] drm/i915: Rename variables to be consistent with bspec MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Since the code seems insistent on using the variable names from the bspec formulat, let's be consistent and use those names for all the things. For some reason 'link_clk' and 'lanes' were left out in the code until now. Cc: Anshuman Gupta Cc: Uma Shankar Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20200429185457.26235-2-ville.syrjala@linux.intel.com Reviewed-by: Uma Shankar --- drivers/gpu/drm/i915/display/intel_audio.c | 30 ++++++++++++---------- 1 file changed, 17 insertions(+), 13 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_audio.c b/drivers/gpu/drm/i915/display/intel_audio.c index f5686e50833f..00f7a3cf9a04 100644 --- a/drivers/gpu/drm/i915/display/intel_audio.c +++ b/drivers/gpu/drm/i915/display/intel_audio.c @@ -526,6 +526,7 @@ static unsigned int get_hblank_early_enable_config(struct intel_encoder *encoder unsigned int hblank_rise, hblank_early_prog; unsigned int h_active, h_total, hblank_delta, pixel_clk; unsigned int fec_coeff, cdclk, vdsc_bpp; + unsigned int link_clk, lanes; h_active = crtc_state->hw.adjusted_mode.crtc_hdisplay; h_total = crtc_state->hw.adjusted_mode.crtc_htotal; @@ -534,40 +535,40 @@ static unsigned int get_hblank_early_enable_config(struct intel_encoder *encoder cdclk = i915->cdclk.hw.cdclk; /* fec= 0.972261, using rounding multiplier of 1000000 */ fec_coeff = 972261; + link_clk = crtc_state->port_clock; + lanes = crtc_state->lane_count; drm_dbg_kms(&i915->drm, "h_active = %u link_clk = %u :" "lanes = %u vdsc_bpp = %u cdclk = %u\n", - h_active, crtc_state->port_clock, crtc_state->lane_count, - vdsc_bpp, cdclk); + h_active, link_clk, lanes, vdsc_bpp, cdclk); - if (WARN_ON(!crtc_state->port_clock || !crtc_state->lane_count || - !crtc_state->dsc.compressed_bpp || !i915->cdclk.hw.cdclk)) + if (WARN_ON(!link_clk || !lanes || !vdsc_bpp || !cdclk)) return 0; link_clks_available = ((((h_total - h_active) * - ((crtc_state->port_clock * ROUNDING_FACTOR) / + ((link_clk * ROUNDING_FACTOR) / pixel_clk)) / ROUNDING_FACTOR) - 28); link_clks_required = DIV_ROUND_UP(192000, (1000 * pixel_clk / h_total)) * ((48 / - crtc_state->lane_count) + 2); + lanes) + 2); if (link_clks_available > link_clks_required) hblank_delta = 32; else hblank_delta = DIV_ROUND_UP(((((5 * ROUNDING_FACTOR) / - crtc_state->port_clock) + ((5 * + link_clk) + ((5 * ROUNDING_FACTOR) / cdclk)) * pixel_clk), ROUNDING_FACTOR); - tu_data = (pixel_clk * vdsc_bpp * 8) / ((crtc_state->port_clock * - crtc_state->lane_count * fec_coeff) / 1000000); - tu_line = (((h_active * crtc_state->port_clock * fec_coeff) / + tu_data = (pixel_clk * vdsc_bpp * 8) / ((link_clk * + lanes * fec_coeff) / 1000000); + tu_line = (((h_active * link_clk * fec_coeff) / 1000000) / (64 * pixel_clk)); link_clks_active = (tu_line - 1) * 64 + tu_data; hblank_rise = ((link_clks_active + 6 * DIV_ROUND_UP(link_clks_active, 250) + 4) * ((pixel_clk * ROUNDING_FACTOR) / - crtc_state->port_clock)) / ROUNDING_FACTOR; + link_clk)) / ROUNDING_FACTOR; hblank_early_prog = h_active - hblank_rise + hblank_delta; @@ -577,16 +578,19 @@ static unsigned int get_hblank_early_enable_config(struct intel_encoder *encoder static unsigned int get_sample_room_req_config(const struct intel_crtc_state *crtc_state) { unsigned int h_active, h_total, pixel_clk; + unsigned int link_clk, lanes; unsigned int samples_room; h_active = crtc_state->hw.adjusted_mode.hdisplay; h_total = crtc_state->hw.adjusted_mode.htotal; pixel_clk = crtc_state->hw.adjusted_mode.clock; + link_clk = crtc_state->port_clock; + lanes = crtc_state->lane_count; - samples_room = ((((h_total - h_active) * ((crtc_state->port_clock * + samples_room = ((((h_total - h_active) * ((link_clk * ROUNDING_FACTOR) / pixel_clk)) / ROUNDING_FACTOR) - 12) / ((48 / - crtc_state->lane_count) + 2); + lanes) + 2); return samples_room; } From 2dd43144e824afffd5ee31ce0df02c47afe83d76 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Wed, 29 Apr 2020 21:54:57 +0300 Subject: [PATCH 20/97] drm/i915: Streamline the artihmetic MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit All these ROUNDING_FACTORs and whatnot are making this thing hard to read. Get rid of them. And let's massage some of the fractions to give us less questionable intermediate results and perhaps less divisions. Also looks like a good helping of 64bit math stuff is needed to avoid some of overflows present in the current code. There might still be a few overflows, namely when calculating link_clks_available/samples_room (would require a huge hblank though), and potentially when calculating hblank_rise (not sure how large link_clks_active can get). It looks like we're still not calculating exactly what the spec says since we truncate tu_data and tu_line early. But I'm too lazy to figure out if we could avoid that. v2: Fix typo in commit msg (Uma) Remove ROUNDING_FACTOR define (Uma) s/5*link_clk+5*cdclk/5*(link_clk+cdclk)/ (Chris) Cc: Anshuman Gupta Cc: Uma Shankar Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20200429185457.26235-3-ville.syrjala@linux.intel.com Reviewed-by: Chris Wilson Reviewed-by: Uma Shankar --- drivers/gpu/drm/i915/display/intel_audio.c | 57 ++++++++-------------- 1 file changed, 19 insertions(+), 38 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_audio.c b/drivers/gpu/drm/i915/display/intel_audio.c index 00f7a3cf9a04..ad4aa66fd676 100644 --- a/drivers/gpu/drm/i915/display/intel_audio.c +++ b/drivers/gpu/drm/i915/display/intel_audio.c @@ -514,19 +514,16 @@ static void hsw_audio_codec_disable(struct intel_encoder *encoder, mutex_unlock(&dev_priv->av_mutex); } -/* Add a factor to take care of rounding and truncations */ -#define ROUNDING_FACTOR 10000 - -static unsigned int get_hblank_early_enable_config(struct intel_encoder *encoder, - const struct intel_crtc_state *crtc_state) +static unsigned int calc_hblank_early_prog(struct intel_encoder *encoder, + const struct intel_crtc_state *crtc_state) { struct drm_i915_private *i915 = to_i915(encoder->base.dev); unsigned int link_clks_available, link_clks_required; unsigned int tu_data, tu_line, link_clks_active; - unsigned int hblank_rise, hblank_early_prog; unsigned int h_active, h_total, hblank_delta, pixel_clk; unsigned int fec_coeff, cdclk, vdsc_bpp; unsigned int link_clk, lanes; + unsigned int hblank_rise; h_active = crtc_state->hw.adjusted_mode.crtc_hdisplay; h_total = crtc_state->hw.adjusted_mode.crtc_htotal; @@ -542,44 +539,33 @@ static unsigned int get_hblank_early_enable_config(struct intel_encoder *encoder "lanes = %u vdsc_bpp = %u cdclk = %u\n", h_active, link_clk, lanes, vdsc_bpp, cdclk); - if (WARN_ON(!link_clk || !lanes || !vdsc_bpp || !cdclk)) + if (WARN_ON(!link_clk || !pixel_clk || !lanes || !vdsc_bpp || !cdclk)) return 0; - link_clks_available = ((((h_total - h_active) * - ((link_clk * ROUNDING_FACTOR) / - pixel_clk)) / ROUNDING_FACTOR) - 28); - link_clks_required = DIV_ROUND_UP(192000, (1000 * pixel_clk / h_total)) * ((48 / - lanes) + 2); + link_clks_available = (h_total - h_active) * link_clk / pixel_clk - 28; + link_clks_required = DIV_ROUND_UP(192000 * h_total, 1000 * pixel_clk) * (48 / lanes + 2); if (link_clks_available > link_clks_required) hblank_delta = 32; else - hblank_delta = DIV_ROUND_UP(((((5 * ROUNDING_FACTOR) / - link_clk) + ((5 * - ROUNDING_FACTOR) / - cdclk)) * pixel_clk), - ROUNDING_FACTOR); + hblank_delta = DIV64_U64_ROUND_UP(mul_u32_u32(5 * (link_clk + cdclk), pixel_clk), + mul_u32_u32(link_clk, cdclk)); - tu_data = (pixel_clk * vdsc_bpp * 8) / ((link_clk * - lanes * fec_coeff) / 1000000); - tu_line = (((h_active * link_clk * fec_coeff) / - 1000000) / (64 * pixel_clk)); + tu_data = div64_u64(mul_u32_u32(pixel_clk * vdsc_bpp * 8, 1000000), + mul_u32_u32(link_clk * lanes, fec_coeff)); + tu_line = div64_u64(h_active * mul_u32_u32(link_clk, fec_coeff), + mul_u32_u32(64 * pixel_clk, 1000000)); link_clks_active = (tu_line - 1) * 64 + tu_data; - hblank_rise = ((link_clks_active + 6 * DIV_ROUND_UP(link_clks_active, - 250) + 4) * ((pixel_clk * ROUNDING_FACTOR) / - link_clk)) / ROUNDING_FACTOR; + hblank_rise = (link_clks_active + 6 * DIV_ROUND_UP(link_clks_active, 250) + 4) * pixel_clk / link_clk; - hblank_early_prog = h_active - hblank_rise + hblank_delta; - - return hblank_early_prog; + return h_active - hblank_rise + hblank_delta; } -static unsigned int get_sample_room_req_config(const struct intel_crtc_state *crtc_state) +static unsigned int calc_samples_room(const struct intel_crtc_state *crtc_state) { unsigned int h_active, h_total, pixel_clk; unsigned int link_clk, lanes; - unsigned int samples_room; h_active = crtc_state->hw.adjusted_mode.hdisplay; h_total = crtc_state->hw.adjusted_mode.htotal; @@ -587,12 +573,8 @@ static unsigned int get_sample_room_req_config(const struct intel_crtc_state *cr link_clk = crtc_state->port_clock; lanes = crtc_state->lane_count; - samples_room = ((((h_total - h_active) * ((link_clk * - ROUNDING_FACTOR) / pixel_clk)) / - ROUNDING_FACTOR) - 12) / ((48 / - lanes) + 2); - - return samples_room; + return ((h_total - h_active) * link_clk - 12 * pixel_clk) / + (pixel_clk * (48 / lanes + 2)); } static void enable_audio_dsc_wa(struct intel_encoder *encoder, @@ -618,8 +600,7 @@ static void enable_audio_dsc_wa(struct intel_encoder *encoder, (crtc_state->hw.adjusted_mode.hdisplay >= 3840 && crtc_state->hw.adjusted_mode.vdisplay >= 2160)) { /* Get hblank early enable value required */ - hblank_early_prog = get_hblank_early_enable_config(encoder, - crtc_state); + hblank_early_prog = calc_hblank_early_prog(encoder, crtc_state); if (hblank_early_prog < 32) { val &= ~HBLANK_START_COUNT_MASK(pipe); val |= HBLANK_START_COUNT(pipe, HBLANK_START_COUNT_32); @@ -635,7 +616,7 @@ static void enable_audio_dsc_wa(struct intel_encoder *encoder, } /* Get samples room value required */ - samples_room = get_sample_room_req_config(crtc_state); + samples_room = calc_samples_room(crtc_state); if (samples_room < 3) { val &= ~NUMBER_SAMPLES_PER_LINE_MASK(pipe); val |= NUMBER_SAMPLES_PER_LINE(pipe, samples_room); From 690d22dafa88b82453516387b475664047a6bd14 Mon Sep 17 00:00:00 2001 From: Sultan Alsawaf Date: Thu, 30 Apr 2020 14:46:54 -0700 Subject: [PATCH 21/97] drm/i915: Don't enable WaIncreaseLatencyIPCEnabled when IPC is disabled MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In commit 5a7d202b1574, a logical AND was erroneously changed to an OR, causing WaIncreaseLatencyIPCEnabled to be enabled unconditionally for kabylake and coffeelake, even when IPC is disabled. Fix the logic so that WaIncreaseLatencyIPCEnabled is only used when IPC is enabled. Fixes: 5a7d202b1574 ("drm/i915: Drop WaIncreaseLatencyIPCEnabled/1140 for cnl") Cc: stable@vger.kernel.org # 5.3.x+ Signed-off-by: Sultan Alsawaf Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20200430214654.51314-1-sultan@kerneltoast.com --- drivers/gpu/drm/i915/intel_pm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index a4830a5cd401..416cb1a1e7cb 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -5080,7 +5080,7 @@ static void skl_compute_plane_wm(const struct intel_crtc_state *crtc_state, * WaIncreaseLatencyIPCEnabled: kbl,cfl * Display WA #1141: kbl,cfl */ - if ((IS_KABYLAKE(dev_priv) || IS_COFFEELAKE(dev_priv)) || + if ((IS_KABYLAKE(dev_priv) || IS_COFFEELAKE(dev_priv)) && dev_priv->ipc_enabled) latency += 4; From 0065e5f5cc56136da0be900c4a3121b38a82f37d Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Sun, 3 May 2020 19:00:34 +0100 Subject: [PATCH 22/97] drm/i915/display: Warn if the FBC is still writing to stolen on removal MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If the FBC is still writing into stolen, it will overwrite any future users of that stolen region. Check before release, just to ease any concerns -- we can remove it again later if it is barking up the wrong tree. References: https://gitlab.freedesktop.org/drm/intel/-/issues/1635 Signed-off-by: Chris Wilson Reviewed-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20200503180034.20010-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/display/intel_fbc.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/i915/display/intel_fbc.c b/drivers/gpu/drm/i915/display/intel_fbc.c index 7194f9bc62c5..1c26673acb2d 100644 --- a/drivers/gpu/drm/i915/display/intel_fbc.c +++ b/drivers/gpu/drm/i915/display/intel_fbc.c @@ -540,6 +540,9 @@ static void __intel_fbc_cleanup_cfb(struct drm_i915_private *dev_priv) { struct intel_fbc *fbc = &dev_priv->fbc; + if (WARN_ON(intel_fbc_hw_is_active(dev_priv))) + return; + if (!drm_mm_node_allocated(&fbc->compressed_fb)) return; From 8757797ff9c97e846348db634e849e11769e5d3d Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 4 May 2020 05:48:47 +0100 Subject: [PATCH 23/97] drm/i915/selftests: Repeat the rps clock frequency measurement Repeat the measurement of the clock frequency a few times and use the median to try and reduce the systematic measurement error. Signed-off-by: Chris Wilson Reviewed-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20200504044903.7626-6-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gt/selftest_rps.c | 54 +++++++++++++++++++------- 1 file changed, 40 insertions(+), 14 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/selftest_rps.c b/drivers/gpu/drm/i915/gt/selftest_rps.c index b89a7d7611f6..bfa1a15564f7 100644 --- a/drivers/gpu/drm/i915/gt/selftest_rps.c +++ b/drivers/gpu/drm/i915/gt/selftest_rps.c @@ -56,6 +56,18 @@ static int cmp_u64(const void *A, const void *B) return 0; } +static int cmp_u32(const void *A, const void *B) +{ + const u32 *a = A, *b = B; + + if (a < b) + return -1; + else if (a > b) + return 1; + else + return 0; +} + static struct i915_vma * create_spin_counter(struct intel_engine_cs *engine, struct i915_address_space *vm, @@ -236,8 +248,8 @@ int live_rps_clock_interval(void *arg) for_each_engine(engine, gt, id) { unsigned long saved_heartbeat; struct i915_request *rq; - ktime_t dt; u32 cycles; + u64 dt; if (!intel_engine_can_store_dword(engine)) continue; @@ -286,15 +298,29 @@ int live_rps_clock_interval(void *arg) engine->name); err = -ENODEV; } else { - preempt_disable(); - dt = ktime_get(); - cycles = -intel_uncore_read_fw(gt->uncore, - GEN6_RP_CUR_UP_EI); - udelay(1000); - dt = ktime_sub(ktime_get(), dt); - cycles += intel_uncore_read_fw(gt->uncore, - GEN6_RP_CUR_UP_EI); - preempt_enable(); + ktime_t dt_[5]; + u32 cycles_[5]; + int i; + + for (i = 0; i < 5; i++) { + preempt_disable(); + + dt_[i] = ktime_get(); + cycles_[i] = -intel_uncore_read_fw(gt->uncore, GEN6_RP_CUR_UP_EI); + + udelay(1000); + + dt_[i] = ktime_sub(ktime_get(), dt_[i]); + cycles_[i] += intel_uncore_read_fw(gt->uncore, GEN6_RP_CUR_UP_EI); + + preempt_enable(); + } + + /* Use the median of both cycle/dt; close enough */ + sort(cycles_, 5, sizeof(*cycles_), cmp_u32, NULL); + cycles = (cycles_[1] + 2 * cycles_[2] + cycles_[3]) / 4; + sort(dt_, 5, sizeof(*dt_), cmp_u64, NULL); + dt = div_u64(dt_[1] + 2 * dt_[2] + dt_[3], 4); } intel_uncore_write_fw(gt->uncore, GEN6_RP_CONTROL, 0); @@ -306,14 +332,14 @@ int live_rps_clock_interval(void *arg) if (err == 0) { u64 time = intel_gt_pm_interval_to_ns(gt, cycles); u32 expected = - intel_gt_ns_to_pm_interval(gt, ktime_to_ns(dt)); + intel_gt_ns_to_pm_interval(gt, dt); pr_info("%s: rps counted %d C0 cycles [%lldns] in %lldns [%d cycles], using GT clock frequency of %uKHz\n", - engine->name, cycles, time, ktime_to_ns(dt), expected, + engine->name, cycles, time, dt, expected, gt->clock_frequency / 1000); - if (10 * time < 8 * ktime_to_ns(dt) || - 8 * time > 10 * ktime_to_ns(dt)) { + if (10 * time < 8 * dt || + 8 * time > 10 * dt) { pr_err("%s: rps clock time does not match walltime!\n", engine->name); err = -EINVAL; From 25fd6de315f43ecc89331c21682b1852a3bc4025 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 4 May 2020 19:05:07 +0100 Subject: [PATCH 24/97] drm/i915/gt: Small tidy of gen8+ breadcrumb emission Use a local to shrink a line under 80 columns, and refactor the common emit_xcs_breadcrumb() wrapper of ggtt-write. Signed-off-by: Chris Wilson Reviewed-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20200504180507.6017-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gt/intel_lrc.c | 34 +++++++++++++---------------- 1 file changed, 15 insertions(+), 19 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index d4ef344657b0..c00366387b54 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -4641,8 +4641,7 @@ static u32 *emit_preempt_busywait(struct i915_request *request, u32 *cs) } static __always_inline u32* -gen8_emit_fini_breadcrumb_footer(struct i915_request *request, - u32 *cs) +gen8_emit_fini_breadcrumb_tail(struct i915_request *request, u32 *cs) { *cs++ = MI_USER_INTERRUPT; @@ -4656,14 +4655,16 @@ gen8_emit_fini_breadcrumb_footer(struct i915_request *request, return gen8_emit_wa_tail(request, cs); } -static u32 *gen8_emit_fini_breadcrumb(struct i915_request *request, u32 *cs) +static u32 *emit_xcs_breadcrumb(struct i915_request *request, u32 *cs) { - cs = gen8_emit_ggtt_write(cs, - request->fence.seqno, - i915_request_active_timeline(request)->hwsp_offset, - 0); + u32 addr = i915_request_active_timeline(request)->hwsp_offset; - return gen8_emit_fini_breadcrumb_footer(request, cs); + return gen8_emit_ggtt_write(cs, request->fence.seqno, addr, 0); +} + +static u32 *gen8_emit_fini_breadcrumb(struct i915_request *rq, u32 *cs) +{ + return gen8_emit_fini_breadcrumb_tail(rq, emit_xcs_breadcrumb(rq, cs)); } static u32 *gen8_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs) @@ -4681,7 +4682,7 @@ static u32 *gen8_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs) PIPE_CONTROL_FLUSH_ENABLE | PIPE_CONTROL_CS_STALL); - return gen8_emit_fini_breadcrumb_footer(request, cs); + return gen8_emit_fini_breadcrumb_tail(request, cs); } static u32 * @@ -4697,7 +4698,7 @@ gen11_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs) PIPE_CONTROL_DC_FLUSH_ENABLE | PIPE_CONTROL_FLUSH_ENABLE); - return gen8_emit_fini_breadcrumb_footer(request, cs); + return gen8_emit_fini_breadcrumb_tail(request, cs); } /* @@ -4735,7 +4736,7 @@ static u32 *gen12_emit_preempt_busywait(struct i915_request *request, u32 *cs) } static __always_inline u32* -gen12_emit_fini_breadcrumb_footer(struct i915_request *request, u32 *cs) +gen12_emit_fini_breadcrumb_tail(struct i915_request *request, u32 *cs) { *cs++ = MI_USER_INTERRUPT; @@ -4749,14 +4750,9 @@ gen12_emit_fini_breadcrumb_footer(struct i915_request *request, u32 *cs) return gen8_emit_wa_tail(request, cs); } -static u32 *gen12_emit_fini_breadcrumb(struct i915_request *request, u32 *cs) +static u32 *gen12_emit_fini_breadcrumb(struct i915_request *rq, u32 *cs) { - cs = gen8_emit_ggtt_write(cs, - request->fence.seqno, - i915_request_active_timeline(request)->hwsp_offset, - 0); - - return gen12_emit_fini_breadcrumb_footer(request, cs); + return gen12_emit_fini_breadcrumb_tail(rq, emit_xcs_breadcrumb(rq, cs)); } static u32 * @@ -4775,7 +4771,7 @@ gen12_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs) PIPE_CONTROL_FLUSH_ENABLE | PIPE_CONTROL_HDC_PIPELINE_FLUSH); - return gen12_emit_fini_breadcrumb_footer(request, cs); + return gen12_emit_fini_breadcrumb_tail(request, cs); } static void execlists_park(struct intel_engine_cs *engine) From 054318c7e35f1d7d06b216143fff5f32405047ee Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Mon, 4 May 2020 10:58:28 +0300 Subject: [PATCH 25/97] drm/i915/tgl+: Fix interrupt handling for DP AUX transactions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Unmask/enable AUX interrupts on all ports on TGL+. So far the interrupts worked only on port A, which meant each transaction on other ports took 10ms. Cc: # v5.4+ Signed-off-by: Imre Deak Reviewed-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20200504075828.20348-1-imre.deak@intel.com --- drivers/gpu/drm/i915/i915_irq.c | 16 +++------------- 1 file changed, 3 insertions(+), 13 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index bd722d0650c8..0b8b0c069ce3 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -3361,7 +3361,7 @@ static void gen8_de_irq_postinstall(struct drm_i915_private *dev_priv) u32 de_pipe_masked = gen8_de_pipe_fault_mask(dev_priv) | GEN8_PIPE_CDCLK_CRC_DONE; u32 de_pipe_enables; - u32 de_port_masked = GEN8_AUX_CHANNEL_A; + u32 de_port_masked = gen8_de_port_aux_mask(dev_priv); u32 de_port_enables; u32 de_misc_masked = GEN8_DE_EDP_PSR; enum pipe pipe; @@ -3369,18 +3369,8 @@ static void gen8_de_irq_postinstall(struct drm_i915_private *dev_priv) if (INTEL_GEN(dev_priv) <= 10) de_misc_masked |= GEN8_DE_MISC_GSE; - if (INTEL_GEN(dev_priv) >= 9) { - de_port_masked |= GEN9_AUX_CHANNEL_B | GEN9_AUX_CHANNEL_C | - GEN9_AUX_CHANNEL_D; - if (IS_GEN9_LP(dev_priv)) - de_port_masked |= BXT_DE_PORT_GMBUS; - } - - if (INTEL_GEN(dev_priv) >= 11) - de_port_masked |= ICL_AUX_CHANNEL_E; - - if (IS_CNL_WITH_PORT_F(dev_priv) || INTEL_GEN(dev_priv) >= 11) - de_port_masked |= CNL_AUX_CHANNEL_F; + if (IS_GEN9_LP(dev_priv)) + de_port_masked |= BXT_DE_PORT_GMBUS; de_pipe_enables = de_pipe_masked | GEN8_PIPE_VBLANK | GEN8_PIPE_FIFO_UNDERRUN; From f136c58a0de98e1b56483b7fc8c209dba0a496d9 Mon Sep 17 00:00:00 2001 From: Stanislav Lisovskiy Date: Tue, 5 May 2020 13:22:45 +0300 Subject: [PATCH 26/97] drm/i915: Added required new PCode commands MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We need a new PCode request commands and reply codes to be added as a prepartion patch for QGV points restricting for new SAGV support. v2: - Extracted those changes into separate patch (Ville Syrjälä) v3: - Moved new PCode masks to another place from PCode commands(Ville) v4: - Moved new PCode masks to correspondent PCode command, with identation(Ville) - Changed naming to ICL_ instead of GEN11_ to fit more nicely into existing definition style. Signed-off-by: Stanislav Lisovskiy Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20200505102247.32452-5-stanislav.lisovskiy@intel.com --- drivers/gpu/drm/i915/i915_reg.h | 4 ++++ drivers/gpu/drm/i915/intel_sideband.c | 2 ++ 2 files changed, 6 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index fd9f2904d93c..f23a18ee28f9 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -9064,6 +9064,7 @@ enum { #define GEN7_PCODE_ILLEGAL_DATA 0x3 #define GEN11_PCODE_ILLEGAL_SUBCOMMAND 0x4 #define GEN11_PCODE_LOCKED 0x6 +#define GEN11_PCODE_REJECTED 0x11 #define GEN7_PCODE_MIN_FREQ_TABLE_GT_RATIO_OUT_OF_RANGE 0x10 #define GEN6_PCODE_WRITE_RC6VIDS 0x4 #define GEN6_PCODE_READ_RC6VIDS 0x5 @@ -9085,6 +9086,9 @@ enum { #define ICL_PCODE_MEM_SUBSYSYSTEM_INFO 0xd #define ICL_PCODE_MEM_SS_READ_GLOBAL_INFO (0x0 << 8) #define ICL_PCODE_MEM_SS_READ_QGV_POINT_INFO(point) (((point) << 16) | (0x1 << 8)) +#define ICL_PCODE_SAGV_DE_MEM_SS_CONFIG 0xe +#define ICL_PCODE_POINTS_RESTRICTED 0x0 +#define ICL_PCODE_POINTS_RESTRICTED_MASK 0x1 #define GEN6_PCODE_READ_D_COMP 0x10 #define GEN6_PCODE_WRITE_D_COMP 0x11 #define ICL_PCODE_EXIT_TCCOLD 0x12 diff --git a/drivers/gpu/drm/i915/intel_sideband.c b/drivers/gpu/drm/i915/intel_sideband.c index d5129c1dd452..916ccd1c0e96 100644 --- a/drivers/gpu/drm/i915/intel_sideband.c +++ b/drivers/gpu/drm/i915/intel_sideband.c @@ -371,6 +371,8 @@ static int gen7_check_mailbox_status(u32 mbox) return -ENXIO; case GEN11_PCODE_LOCKED: return -EBUSY; + case GEN11_PCODE_REJECTED: + return -EACCES; case GEN7_PCODE_MIN_FREQ_TABLE_GT_RATIO_OUT_OF_RANGE: return -EOVERFLOW; default: From b68be5c623cdd62b9a8b99eb1443ba9cba812d6d Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 5 May 2020 09:46:29 +0100 Subject: [PATCH 27/97] drm/i915/execlists: Record the active CCID from before reset If we cannot trust the reset will flush out the CS event queue such that process_csb() reports an accurate view of HW, we will need to search the active and pending contexts to determine which was actually running at the time we issued the reset. Signed-off-by: Chris Wilson Reviewed-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20200505084629.31365-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gt/intel_engine_types.h | 5 +++++ drivers/gpu/drm/i915/gt/intel_lrc.c | 4 +++- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h index 6c676774dcd9..b1048f039552 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h @@ -179,6 +179,11 @@ struct intel_engine_execlists { */ u32 error_interrupt; + /** + * @reset_ccid: Active CCID [EXECLISTS_STATUS_HI] at the time of reset + */ + u32 reset_ccid; + /** * @no_priolist: priority lists disabled */ diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index c00366387b54..3ff81c89fe01 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -4074,6 +4074,8 @@ static void execlists_reset_prepare(struct intel_engine_cs *engine) */ ring_set_paused(engine, 1); intel_engine_stop_cs(engine); + + engine->execlists.reset_ccid = active_ccid(engine); } static void __reset_stop_ring(u32 *regs, const struct intel_engine_cs *engine) @@ -4116,7 +4118,7 @@ static void __execlists_reset(struct intel_engine_cs *engine, bool stalled) * its request, it was still running at the time of the * reset and will have been clobbered. */ - rq = execlists_active(execlists); + rq = active_context(engine, engine->execlists.reset_ccid); if (!rq) goto unwind; From 977253df6433f85d5e2cb3ab0f8eb4127f8173dd Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 4 May 2020 19:07:45 +0100 Subject: [PATCH 28/97] drm/i915/gt: Stop holding onto the pinned_default_state As we only restore the default context state upon banning a context, we only need enough of the state to run the ring and nothing more. That is we only need our bare protocontext. Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Cc: Mika Kuoppala Cc: Andi Shyti Reviewed-by: Andi Shyti Link: https://patchwork.freedesktop.org/patch/msgid/20200504180745.15645-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gt/intel_engine_pm.c | 14 +----- drivers/gpu/drm/i915/gt/intel_engine_types.h | 1 - drivers/gpu/drm/i915/gt/intel_lrc.c | 14 ++---- drivers/gpu/drm/i915/gt/selftest_context.c | 11 ++-- drivers/gpu/drm/i915/gt/selftest_lrc.c | 53 +++++++++++++++----- 5 files changed, 47 insertions(+), 46 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.c b/drivers/gpu/drm/i915/gt/intel_engine_pm.c index 811debefebc0..d0a1078ef632 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_pm.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.c @@ -21,18 +21,11 @@ static int __engine_unpark(struct intel_wakeref *wf) struct intel_engine_cs *engine = container_of(wf, typeof(*engine), wakeref); struct intel_context *ce; - void *map; ENGINE_TRACE(engine, "\n"); intel_gt_pm_get(engine->gt); - /* Pin the default state for fast resets from atomic context. */ - map = NULL; - if (engine->default_state) - map = shmem_pin_map(engine->default_state); - engine->pinned_default_state = map; - /* Discard stale context state from across idling */ ce = engine->kernel_context; if (ce) { @@ -42,6 +35,7 @@ static int __engine_unpark(struct intel_wakeref *wf) if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM) && ce->state) { struct drm_i915_gem_object *obj = ce->state->obj; int type = i915_coherent_map_type(engine->i915); + void *map; map = i915_gem_object_pin_map(obj, type); if (!IS_ERR(map)) { @@ -260,12 +254,6 @@ static int __engine_park(struct intel_wakeref *wf) if (engine->park) engine->park(engine); - if (engine->pinned_default_state) { - shmem_unpin_map(engine->default_state, - engine->pinned_default_state); - engine->pinned_default_state = NULL; - } - engine->execlists.no_priolist = false; /* While gt calls i915_vma_parked(), we have to break the lock cycle */ diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h index b1048f039552..c113b7805e65 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h @@ -344,7 +344,6 @@ struct intel_engine_cs { unsigned long wakeref_serial; struct intel_wakeref wakeref; struct file *default_state; - void *pinned_default_state; struct { struct intel_ring *ring; diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index 3ff81c89fe01..dc3f2ee7136d 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -1271,14 +1271,11 @@ execlists_check_context(const struct intel_context *ce, static void restore_default_state(struct intel_context *ce, struct intel_engine_cs *engine) { - u32 *regs = ce->lrc_reg_state; + u32 *regs; - if (engine->pinned_default_state) - memcpy(regs, /* skip restoring the vanilla PPHWSP */ - engine->pinned_default_state + LRC_STATE_OFFSET, - engine->context_size - PAGE_SIZE); + regs = memset(ce->lrc_reg_state, 0, engine->context_size - PAGE_SIZE); + execlists_init_reg_state(regs, ce, engine, ce->ring, true); - execlists_init_reg_state(regs, ce, engine, ce->ring, false); ce->runtime.last = intel_context_get_runtime(ce); } @@ -4168,8 +4165,6 @@ static void __execlists_reset(struct intel_engine_cs *engine, bool stalled) * image back to the expected values to skip over the guilty request. */ __i915_request_reset(rq, stalled); - if (!stalled) - goto out_replay; /* * We want a simple context + ring to execute the breadcrumb update. @@ -4179,9 +4174,6 @@ static void __execlists_reset(struct intel_engine_cs *engine, bool stalled) * future request will be after userspace has had the opportunity * to recreate its own state. */ - GEM_BUG_ON(!intel_context_is_pinned(ce)); - restore_default_state(ce, engine); - out_replay: ENGINE_TRACE(engine, "replay {head:%04x, tail:%04x}\n", head, ce->ring->tail); diff --git a/drivers/gpu/drm/i915/gt/selftest_context.c b/drivers/gpu/drm/i915/gt/selftest_context.c index b8ed3cbe1277..a56dff3b157a 100644 --- a/drivers/gpu/drm/i915/gt/selftest_context.c +++ b/drivers/gpu/drm/i915/gt/selftest_context.c @@ -154,10 +154,7 @@ static int live_context_size(void *arg) */ for_each_engine(engine, gt, id) { - struct { - struct file *state; - void *pinned; - } saved; + struct file *saved; if (!engine->context_size) continue; @@ -171,8 +168,7 @@ static int live_context_size(void *arg) * active state is sufficient, we are only checking that we * don't use more than we planned. */ - saved.state = fetch_and_zero(&engine->default_state); - saved.pinned = fetch_and_zero(&engine->pinned_default_state); + saved = fetch_and_zero(&engine->default_state); /* Overlaps with the execlists redzone */ engine->context_size += I915_GTT_PAGE_SIZE; @@ -181,8 +177,7 @@ static int live_context_size(void *arg) engine->context_size -= I915_GTT_PAGE_SIZE; - engine->pinned_default_state = saved.pinned; - engine->default_state = saved.state; + engine->default_state = saved; intel_engine_pm_put(engine); diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c index 3ced73533f6b..824f99c4cc7c 100644 --- a/drivers/gpu/drm/i915/gt/selftest_lrc.c +++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c @@ -5177,6 +5177,7 @@ store_context(struct intel_context *ce, struct i915_vma *scratch) { struct i915_vma *batch; u32 dw, x, *cs, *hw; + u32 *defaults; batch = create_user_vma(ce->vm, SZ_64K); if (IS_ERR(batch)) @@ -5188,9 +5189,16 @@ store_context(struct intel_context *ce, struct i915_vma *scratch) return ERR_CAST(cs); } + defaults = shmem_pin_map(ce->engine->default_state); + if (!defaults) { + i915_gem_object_unpin_map(batch->obj); + i915_vma_put(batch); + return ERR_PTR(-ENOMEM); + } + x = 0; dw = 0; - hw = ce->engine->pinned_default_state; + hw = defaults; hw += LRC_STATE_OFFSET / sizeof(*hw); do { u32 len = hw[dw] & 0x7f; @@ -5221,6 +5229,8 @@ store_context(struct intel_context *ce, struct i915_vma *scratch) *cs++ = MI_BATCH_BUFFER_END; + shmem_unpin_map(ce->engine->default_state, defaults); + i915_gem_object_flush_map(batch->obj); i915_gem_object_unpin_map(batch->obj); @@ -5331,6 +5341,7 @@ static struct i915_vma *load_context(struct intel_context *ce, u32 poison) { struct i915_vma *batch; u32 dw, *cs, *hw; + u32 *defaults; batch = create_user_vma(ce->vm, SZ_64K); if (IS_ERR(batch)) @@ -5342,8 +5353,15 @@ static struct i915_vma *load_context(struct intel_context *ce, u32 poison) return ERR_CAST(cs); } + defaults = shmem_pin_map(ce->engine->default_state); + if (!defaults) { + i915_gem_object_unpin_map(batch->obj); + i915_vma_put(batch); + return ERR_PTR(-ENOMEM); + } + dw = 0; - hw = ce->engine->pinned_default_state; + hw = defaults; hw += LRC_STATE_OFFSET / sizeof(*hw); do { u32 len = hw[dw] & 0x7f; @@ -5371,6 +5389,8 @@ static struct i915_vma *load_context(struct intel_context *ce, u32 poison) *cs++ = MI_BATCH_BUFFER_END; + shmem_unpin_map(ce->engine->default_state, defaults); + i915_gem_object_flush_map(batch->obj); i915_gem_object_unpin_map(batch->obj); @@ -5438,6 +5458,7 @@ static int compare_isolation(struct intel_engine_cs *engine, { u32 x, dw, *hw, *lrc; u32 *A[2], *B[2]; + u32 *defaults; int err = 0; A[0] = i915_gem_object_pin_map(ref[0]->obj, I915_MAP_WC); @@ -5470,9 +5491,15 @@ static int compare_isolation(struct intel_engine_cs *engine, } lrc += LRC_STATE_OFFSET / sizeof(*hw); + defaults = shmem_pin_map(ce->engine->default_state); + if (!defaults) { + err = -ENOMEM; + goto err_lrc; + } + x = 0; dw = 0; - hw = engine->pinned_default_state; + hw = defaults; hw += LRC_STATE_OFFSET / sizeof(*hw); do { u32 len = hw[dw] & 0x7f; @@ -5512,6 +5539,8 @@ static int compare_isolation(struct intel_engine_cs *engine, } while (dw < PAGE_SIZE / sizeof(u32) && (hw[dw] & ~BIT(0)) != MI_BATCH_BUFFER_END); + shmem_unpin_map(ce->engine->default_state, defaults); +err_lrc: i915_gem_object_unpin_map(ce->state->obj); err_B1: i915_gem_object_unpin_map(result[1]->obj); @@ -5661,18 +5690,16 @@ static int live_lrc_isolation(void *arg) continue; intel_engine_pm_get(engine); - if (engine->pinned_default_state) { - for (i = 0; i < ARRAY_SIZE(poison); i++) { - int result; + for (i = 0; i < ARRAY_SIZE(poison); i++) { + int result; - result = __lrc_isolation(engine, poison[i]); - if (result && !err) - err = result; + result = __lrc_isolation(engine, poison[i]); + if (result && !err) + err = result; - result = __lrc_isolation(engine, ~poison[i]); - if (result && !err) - err = result; - } + result = __lrc_isolation(engine, ~poison[i]); + if (result && !err) + err = result; } intel_engine_pm_put(engine); if (igt_flush_test(gt->i915)) { From 9b2383a7ac082cf71d4fc462c3ec6d5939cf6a04 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Fri, 1 May 2020 14:37:01 -0700 Subject: [PATCH 29/97] drm/i915/icp: Add Wa_14010685332 We need to toggle a SDE chicken bit on and then off as the final step when disabling interrupts in preparation for runtime suspend. Bspec: 33450 Bspec: 8402 Cc: Bob Paauwe Signed-off-by: Matt Roper Link: https://patchwork.freedesktop.org/patch/msgid/20200501213701.371443-1-matthew.d.roper@intel.com Reviewed-by: Bob Paauwe --- drivers/gpu/drm/i915/i915_irq.c | 8 ++++++++ drivers/gpu/drm/i915/i915_reg.h | 1 + 2 files changed, 9 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 0b8b0c069ce3..ea4c87784a27 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -2870,6 +2870,14 @@ static void gen11_display_irq_reset(struct drm_i915_private *dev_priv) if (INTEL_PCH_TYPE(dev_priv) >= PCH_ICP) GEN3_IRQ_RESET(uncore, SDE); + + /* Wa_14010685332:icl */ + if (INTEL_PCH_TYPE(dev_priv) == PCH_ICP) { + intel_uncore_rmw(uncore, SOUTH_CHICKEN1, + SBCLK_RUN_REFCLK_DIS, SBCLK_RUN_REFCLK_DIS); + intel_uncore_rmw(uncore, SOUTH_CHICKEN1, + SBCLK_RUN_REFCLK_DIS, 0); + } } static void gen11_irq_reset(struct drm_i915_private *dev_priv) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index f23a18ee28f9..fde54b86ea20 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -8573,6 +8573,7 @@ enum { #define FDI_BC_BIFURCATION_SELECT (1 << 12) #define CHASSIS_CLK_REQ_DURATION_MASK (0xf << 8) #define CHASSIS_CLK_REQ_DURATION(x) ((x) << 8) +#define SBCLK_RUN_REFCLK_DIS (1 << 7) #define SPT_PWM_GRANULARITY (1 << 0) #define SOUTH_CHICKEN2 _MMIO(0xc2004) #define FDI_MPHY_IOSFSB_RESET_STATUS (1 << 13) From 24fe5f2ab2478053d50a3bc629ada895903a5cbc Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 6 May 2020 17:21:36 +0100 Subject: [PATCH 30/97] drm/i915: Propagate error from completed fences We need to preserve fatal errors from fences that are being terminated as we hook them up. Fixes: ef4688497512 ("drm/i915: Propagate fence errors") Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Cc: Matthew Auld Reviewed-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20200506162136.3325-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_request.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index 22635bbabf06..4d18f808fda2 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -1034,8 +1034,10 @@ i915_request_await_request(struct i915_request *to, struct i915_request *from) GEM_BUG_ON(to == from); GEM_BUG_ON(to->timeline == from->timeline); - if (i915_request_completed(from)) + if (i915_request_completed(from)) { + i915_sw_fence_set_error_once(&to->submit, from->fence.error); return 0; + } if (to->engine->schedule) { ret = i915_sched_node_add_dependency(&to->sched, &from->sched); From f02ac414ba9497d1887b1de7fe69954284f157ac Mon Sep 17 00:00:00 2001 From: Mika Kuoppala Date: Wed, 6 May 2020 17:47:31 +0300 Subject: [PATCH 31/97] Revert "drm/i915/tgl: Include ro parts of l3 to invalidate" This reverts commit 62037ffff229b7d94f1db5ef8d2e2ec819832ef3. L3 ro cache invalidation is part of the dword0 of pipe control. Also it is not relevant to this gen. Signed-off-by: Mika Kuoppala Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20200506144734.29297-1-mika.kuoppala@linux.intel.com --- drivers/gpu/drm/i915/gt/intel_gpu_commands.h | 1 - drivers/gpu/drm/i915/gt/intel_lrc.c | 1 - 2 files changed, 2 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h index ee10122a511e..b3cf09657fb2 100644 --- a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h +++ b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h @@ -236,7 +236,6 @@ #define PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH (1<<12) /* gen6+ */ #define PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE (1<<11) /* MBZ on ILK */ #define PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE (1<<10) /* GM45+ only */ -#define PIPE_CONTROL_L3_RO_CACHE_INVALIDATE REG_BIT(10) /* gen12 */ #define PIPE_CONTROL_INDIRECT_STATE_DISABLE (1<<9) #define PIPE_CONTROL_HDC_PIPELINE_FLUSH REG_BIT(9) /* gen12 */ #define PIPE_CONTROL_NOTIFY (1<<8) diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index dc3f2ee7136d..feba021ca572 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -4579,7 +4579,6 @@ static int gen12_emit_flush_render(struct i915_request *request, flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE; flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE; flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE; - flags |= PIPE_CONTROL_L3_RO_CACHE_INVALIDATE; flags |= PIPE_CONTROL_STORE_DATA_INDEX; flags |= PIPE_CONTROL_QW_WRITE; From 32d7171ee2ae6e19c63b826904cf62d3d5a7f6fa Mon Sep 17 00:00:00 2001 From: Mika Kuoppala Date: Wed, 6 May 2020 17:47:32 +0300 Subject: [PATCH 32/97] drm/i915/gen12: Fix HDC pipeline flush HDC pipeline flush is bit on the first dword of the PIPE_CONTROL, not the second. Make it so. v2: function naming (Chris) Signed-off-by: Mika Kuoppala Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20200506144734.29297-2-mika.kuoppala@linux.intel.com --- drivers/gpu/drm/i915/gt/intel_engine.h | 34 ++++++++++++++++---- drivers/gpu/drm/i915/gt/intel_gpu_commands.h | 2 +- drivers/gpu/drm/i915/gt/intel_lrc.c | 29 +++++++++-------- 3 files changed, 44 insertions(+), 21 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_engine.h b/drivers/gpu/drm/i915/gt/intel_engine.h index 19d0b8830905..cb789c8bf06b 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine.h +++ b/drivers/gpu/drm/i915/gt/intel_engine.h @@ -241,19 +241,29 @@ void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine); void intel_engine_print_breadcrumbs(struct intel_engine_cs *engine, struct drm_printer *p); -static inline u32 *gen8_emit_pipe_control(u32 *batch, u32 flags, u32 offset) +static inline u32 *__gen8_emit_pipe_control(u32 *batch, u32 flags0, u32 flags1, u32 offset) { memset(batch, 0, 6 * sizeof(u32)); - batch[0] = GFX_OP_PIPE_CONTROL(6); - batch[1] = flags; + batch[0] = GFX_OP_PIPE_CONTROL(6) | flags0; + batch[1] = flags1; batch[2] = offset; return batch + 6; } +static inline u32 *gen8_emit_pipe_control(u32 *batch, u32 flags, u32 offset) +{ + return __gen8_emit_pipe_control(batch, 0, flags, offset); +} + +static inline u32 *gen12_emit_pipe_control(u32 *batch, u32 flags0, u32 flags1, u32 offset) +{ + return __gen8_emit_pipe_control(batch, flags0, flags1, offset); +} + static inline u32 * -gen8_emit_ggtt_write_rcs(u32 *cs, u32 value, u32 gtt_offset, u32 flags) +__gen8_emit_ggtt_write_rcs(u32 *cs, u32 value, u32 gtt_offset, u32 flags0, u32 flags1) { /* We're using qword write, offset should be aligned to 8 bytes. */ GEM_BUG_ON(!IS_ALIGNED(gtt_offset, 8)); @@ -262,8 +272,8 @@ gen8_emit_ggtt_write_rcs(u32 *cs, u32 value, u32 gtt_offset, u32 flags) * need a prior CS_STALL, which is emitted by the flush * following the batch. */ - *cs++ = GFX_OP_PIPE_CONTROL(6); - *cs++ = flags | PIPE_CONTROL_QW_WRITE | PIPE_CONTROL_GLOBAL_GTT_IVB; + *cs++ = GFX_OP_PIPE_CONTROL(6) | flags0; + *cs++ = flags1 | PIPE_CONTROL_QW_WRITE | PIPE_CONTROL_GLOBAL_GTT_IVB; *cs++ = gtt_offset; *cs++ = 0; *cs++ = value; @@ -273,6 +283,18 @@ gen8_emit_ggtt_write_rcs(u32 *cs, u32 value, u32 gtt_offset, u32 flags) return cs; } +static inline u32* +gen8_emit_ggtt_write_rcs(u32 *cs, u32 value, u32 gtt_offset, u32 flags) +{ + return __gen8_emit_ggtt_write_rcs(cs, value, gtt_offset, 0, flags); +} + +static inline u32* +gen12_emit_ggtt_write_rcs(u32 *cs, u32 value, u32 gtt_offset, u32 flags0, u32 flags1) +{ + return __gen8_emit_ggtt_write_rcs(cs, value, gtt_offset, flags0, flags1); +} + static inline u32 * gen8_emit_ggtt_write(u32 *cs, u32 value, u32 gtt_offset, u32 flags) { diff --git a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h index b3cf09657fb2..534e435f20bc 100644 --- a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h +++ b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h @@ -237,7 +237,7 @@ #define PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE (1<<11) /* MBZ on ILK */ #define PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE (1<<10) /* GM45+ only */ #define PIPE_CONTROL_INDIRECT_STATE_DISABLE (1<<9) -#define PIPE_CONTROL_HDC_PIPELINE_FLUSH REG_BIT(9) /* gen12 */ +#define PIPE_CONTROL0_HDC_PIPELINE_FLUSH REG_BIT(9) /* gen12 */ #define PIPE_CONTROL_NOTIFY (1<<8) #define PIPE_CONTROL_FLUSH_ENABLE (1<<7) /* gen7+ */ #define PIPE_CONTROL_DC_FLUSH_ENABLE (1<<5) diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index feba021ca572..78f879ed4aa7 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -4553,7 +4553,6 @@ static int gen12_emit_flush_render(struct i915_request *request, flags |= PIPE_CONTROL_DEPTH_STALL; flags |= PIPE_CONTROL_DC_FLUSH_ENABLE; flags |= PIPE_CONTROL_FLUSH_ENABLE; - flags |= PIPE_CONTROL_HDC_PIPELINE_FLUSH; flags |= PIPE_CONTROL_STORE_DATA_INDEX; flags |= PIPE_CONTROL_QW_WRITE; @@ -4564,7 +4563,9 @@ static int gen12_emit_flush_render(struct i915_request *request, if (IS_ERR(cs)) return PTR_ERR(cs); - cs = gen8_emit_pipe_control(cs, flags, LRC_PPHWSP_SCRATCH_ADDR); + cs = gen12_emit_pipe_control(cs, + PIPE_CONTROL0_HDC_PIPELINE_FLUSH, + flags, LRC_PPHWSP_SCRATCH_ADDR); intel_ring_advance(request, cs); } @@ -4751,18 +4752,18 @@ static u32 *gen12_emit_fini_breadcrumb(struct i915_request *rq, u32 *cs) static u32 * gen12_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs) { - cs = gen8_emit_ggtt_write_rcs(cs, - request->fence.seqno, - i915_request_active_timeline(request)->hwsp_offset, - PIPE_CONTROL_CS_STALL | - PIPE_CONTROL_TILE_CACHE_FLUSH | - PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH | - PIPE_CONTROL_DEPTH_CACHE_FLUSH | - /* Wa_1409600907:tgl */ - PIPE_CONTROL_DEPTH_STALL | - PIPE_CONTROL_DC_FLUSH_ENABLE | - PIPE_CONTROL_FLUSH_ENABLE | - PIPE_CONTROL_HDC_PIPELINE_FLUSH); + cs = gen12_emit_ggtt_write_rcs(cs, + request->fence.seqno, + i915_request_active_timeline(request)->hwsp_offset, + PIPE_CONTROL0_HDC_PIPELINE_FLUSH, + PIPE_CONTROL_CS_STALL | + PIPE_CONTROL_TILE_CACHE_FLUSH | + PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH | + PIPE_CONTROL_DEPTH_CACHE_FLUSH | + /* Wa_1409600907:tgl */ + PIPE_CONTROL_DEPTH_STALL | + PIPE_CONTROL_DC_FLUSH_ENABLE | + PIPE_CONTROL_FLUSH_ENABLE); return gen12_emit_fini_breadcrumb_tail(request, cs); } From 0c7c0c8e6f09e0301e02266a0c83611d721adebb Mon Sep 17 00:00:00 2001 From: Mika Kuoppala Date: Wed, 6 May 2020 17:47:33 +0300 Subject: [PATCH 33/97] drm/i915/gen12: Flush L3 Flush TDL,L3 and EUs Signed-off-by: Mika Kuoppala Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20200506144734.29297-3-mika.kuoppala@linux.intel.com --- drivers/gpu/drm/i915/gt/intel_lrc.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index 78f879ed4aa7..e1235d504837 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -4547,6 +4547,7 @@ static int gen12_emit_flush_render(struct i915_request *request, u32 *cs; flags |= PIPE_CONTROL_TILE_CACHE_FLUSH; + flags |= PIPE_CONTROL_FLUSH_L3; flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH; flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH; /* Wa_1409600907:tgl */ @@ -4758,6 +4759,7 @@ gen12_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs) PIPE_CONTROL0_HDC_PIPELINE_FLUSH, PIPE_CONTROL_CS_STALL | PIPE_CONTROL_TILE_CACHE_FLUSH | + PIPE_CONTROL_FLUSH_L3 | PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH | PIPE_CONTROL_DEPTH_CACHE_FLUSH | /* Wa_1409600907:tgl */ From d248b371f7479a99caccf91da2ec6adee85e5e70 Mon Sep 17 00:00:00 2001 From: Mika Kuoppala Date: Wed, 6 May 2020 19:53:10 +0300 Subject: [PATCH 34/97] drm/i915/gen12: Invalidate aux table entries forcibly Aux table invalidation can fail on update. So next access may cause memory access to be into stale entry. Proposed workaround is to invalidate entries between all batchbuffers. v2: correct register address (Yang) v3: respect the order (Chris) References bspec#43904, hsdes#1809175790 Cc: Chris Wilson Cc: Chuansheng Liu Cc: Rafael Antognolli Cc: Yang A Shi Signed-off-by: Mika Kuoppala Acked-by: Chris Wilson Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20200506165310.1239-1-mika.kuoppala@linux.intel.com --- drivers/gpu/drm/i915/gt/intel_lrc.c | 16 +++++++++++++++- drivers/gpu/drm/i915/i915_reg.h | 2 ++ 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index e1235d504837..bbdb0e2a4571 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -4539,6 +4539,17 @@ static u32 preparser_disable(bool state) return MI_ARB_CHECK | 1 << 8 | state; } +static u32 * +gen12_emit_aux_table_inv(struct i915_request *rq, u32 *cs) +{ + *cs++ = MI_LOAD_REGISTER_IMM(1); + *cs++ = i915_mmio_reg_offset(GEN12_GFX_CCS_AUX_NV); + *cs++ = AUX_INV; + *cs++ = MI_NOOP; + + return cs; +} + static int gen12_emit_flush_render(struct i915_request *request, u32 mode) { @@ -4587,7 +4598,7 @@ static int gen12_emit_flush_render(struct i915_request *request, flags |= PIPE_CONTROL_CS_STALL; - cs = intel_ring_begin(request, 8); + cs = intel_ring_begin(request, 8 + 4); if (IS_ERR(cs)) return PTR_ERR(cs); @@ -4600,6 +4611,9 @@ static int gen12_emit_flush_render(struct i915_request *request, cs = gen8_emit_pipe_control(cs, flags, LRC_PPHWSP_SCRATCH_ADDR); + /* hsdes: 1809175790 */ + cs = gen12_emit_aux_table_inv(request, cs); + *cs++ = preparser_disable(false); intel_ring_advance(request, cs); } diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index fde54b86ea20..dc5952200a07 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -2557,6 +2557,8 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define GEN10_PAT_INDEX(index) _MMIO(0x40e0 + (index) * 4) #define GEN12_PAT_INDEX(index) _MMIO(0x4800 + (index) * 4) #define BSD_HWS_PGA_GEN7 _MMIO(0x04180) +#define GEN12_GFX_CCS_AUX_NV _MMIO(0x4208) +#define AUX_INV REG_BIT(0) #define BLT_HWS_PGA_GEN7 _MMIO(0x04280) #define VEBOX_HWS_PGA_GEN7 _MMIO(0x04380) #define RING_ACTHD(base) _MMIO((base) + 0x74) From 6b6cd2ebd8d071e55998e32b648bb8081f7f02bb Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 7 May 2020 16:51:09 +0100 Subject: [PATCH 35/97] drm/i915: Mark concurrent submissions with a weak-dependency We recorded the dependencies for WAIT_FOR_SUBMIT in order that we could correctly perform priority inheritance from the parallel branches to the common trunk. However, for the purpose of timeslicing and reset handling, the dependency is weak -- as we the pair of requests are allowed to run in parallel and not in strict succession. The real significance though is that this allows us to rearrange groups of WAIT_FOR_SUBMIT linked requests along the single engine, and so can resolve user level inter-batch scheduling dependencies from user semaphores. Fixes: c81471f5e95c ("drm/i915: Copy across scheduler behaviour flags across submit fences") Testcase: igt/gem_exec_fence/submit Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Cc: # v5.6+ Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20200507155109.8892-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gt/intel_lrc.c | 3 +++ drivers/gpu/drm/i915/i915_request.c | 8 ++++++-- drivers/gpu/drm/i915/i915_scheduler.c | 6 +++--- drivers/gpu/drm/i915/i915_scheduler.h | 3 ++- drivers/gpu/drm/i915/i915_scheduler_types.h | 1 + 5 files changed, 15 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index bbdb0e2a4571..dd0fd4c4cf32 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -1880,6 +1880,9 @@ static void defer_request(struct i915_request *rq, struct list_head * const pl) struct i915_request *w = container_of(p->waiter, typeof(*w), sched); + if (p->flags & I915_DEPENDENCY_WEAK) + continue; + /* Leave semaphores spinning on the other engines */ if (w->engine != rq->engine) continue; diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index 4d18f808fda2..3c38d61c90f8 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -1040,7 +1040,9 @@ i915_request_await_request(struct i915_request *to, struct i915_request *from) } if (to->engine->schedule) { - ret = i915_sched_node_add_dependency(&to->sched, &from->sched); + ret = i915_sched_node_add_dependency(&to->sched, + &from->sched, + I915_DEPENDENCY_EXTERNAL); if (ret < 0) return ret; } @@ -1202,7 +1204,9 @@ __i915_request_await_execution(struct i915_request *to, /* Couple the dependency tree for PI on this exposed to->fence */ if (to->engine->schedule) { - err = i915_sched_node_add_dependency(&to->sched, &from->sched); + err = i915_sched_node_add_dependency(&to->sched, + &from->sched, + I915_DEPENDENCY_WEAK); if (err < 0) return err; } diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c index 37cfcf5b321b..6e2d4190099f 100644 --- a/drivers/gpu/drm/i915/i915_scheduler.c +++ b/drivers/gpu/drm/i915/i915_scheduler.c @@ -462,7 +462,8 @@ bool __i915_sched_node_add_dependency(struct i915_sched_node *node, } int i915_sched_node_add_dependency(struct i915_sched_node *node, - struct i915_sched_node *signal) + struct i915_sched_node *signal, + unsigned long flags) { struct i915_dependency *dep; @@ -473,8 +474,7 @@ int i915_sched_node_add_dependency(struct i915_sched_node *node, local_bh_disable(); if (!__i915_sched_node_add_dependency(node, signal, dep, - I915_DEPENDENCY_EXTERNAL | - I915_DEPENDENCY_ALLOC)) + flags | I915_DEPENDENCY_ALLOC)) i915_dependency_free(dep); local_bh_enable(); /* kick submission tasklet */ diff --git a/drivers/gpu/drm/i915/i915_scheduler.h b/drivers/gpu/drm/i915/i915_scheduler.h index d1dc4efef77b..6f0bf00fc569 100644 --- a/drivers/gpu/drm/i915/i915_scheduler.h +++ b/drivers/gpu/drm/i915/i915_scheduler.h @@ -34,7 +34,8 @@ bool __i915_sched_node_add_dependency(struct i915_sched_node *node, unsigned long flags); int i915_sched_node_add_dependency(struct i915_sched_node *node, - struct i915_sched_node *signal); + struct i915_sched_node *signal, + unsigned long flags); void i915_sched_node_fini(struct i915_sched_node *node); diff --git a/drivers/gpu/drm/i915/i915_scheduler_types.h b/drivers/gpu/drm/i915/i915_scheduler_types.h index d18e70550054..7186875088a0 100644 --- a/drivers/gpu/drm/i915/i915_scheduler_types.h +++ b/drivers/gpu/drm/i915/i915_scheduler_types.h @@ -78,6 +78,7 @@ struct i915_dependency { unsigned long flags; #define I915_DEPENDENCY_ALLOC BIT(0) #define I915_DEPENDENCY_EXTERNAL BIT(1) +#define I915_DEPENDENCY_WEAK BIT(2) }; #endif /* _I915_SCHEDULER_TYPES_H_ */ From eec39e441c29dd636bbdf5390af765cd9db6b380 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 7 May 2020 16:23:38 +0100 Subject: [PATCH 36/97] drm/i915: Remove wait priority boosting Upon waiting a request (when asked), we gave that request a small priority boost, not enough for it to cause preemption, but enough for it to be scheduled next before all equals. We also used that bit to give new clients a small priority boost, similar to FQ_CODEL, such that we favoured short interactive tasks ahead of long running streams. However, this is causing lots of complications with timeslicing where we both want to honour the boost and yet ignore it. Those complications cause unexpected user behaviour (tasks not being timesliced and run concurrently as epxected), and the easiest way to resolve that is to remove the boost. Hopefully, we can find a compromise again if we need to, but in theory timeslicing itself and future more advanced schedulers should give us the interactivity boost we seek. Testcase: igt/gem_exec_schedule/lateslice Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20200507152338.7452-3-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c | 9 --------- drivers/gpu/drm/i915/gt/intel_lrc.c | 4 +--- drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c | 2 +- drivers/gpu/drm/i915/i915_priolist_types.h | 7 ++----- drivers/gpu/drm/i915/i915_request.c | 3 --- drivers/gpu/drm/i915/i915_scheduler.c | 12 +----------- 6 files changed, 5 insertions(+), 32 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c index 966523a8503f..d54a4933cc05 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c @@ -2597,15 +2597,6 @@ static void eb_request_add(struct i915_execbuffer *eb) */ if (!(rq->sched.flags & I915_SCHED_HAS_SEMAPHORE_CHAIN)) attr.priority |= I915_PRIORITY_NOSEMAPHORE; - - /* - * Boost priorities to new clients (new request flows). - * - * Allow interactive/synchronous clients to jump ahead of - * the bulk clients. (FQ_CODEL) - */ - if (list_empty(&rq->sched.signalers_list)) - attr.priority |= I915_PRIORITY_WAIT; } else { /* Serialise with context_close via the add_to_timeline */ i915_request_set_error_once(rq, -ENOENT); diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index dd0fd4c4cf32..59c99e2f0257 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -438,9 +438,7 @@ static int effective_prio(const struct i915_request *rq) if (__i915_request_has_started(rq)) prio |= I915_PRIORITY_NOSEMAPHORE; - /* Restrict mere WAIT boosts from triggering preemption */ - BUILD_BUG_ON(__NO_PREEMPTION & ~I915_PRIORITY_MASK); /* only internal */ - return prio | __NO_PREEMPTION; + return prio; } static int queue_prio(const struct intel_engine_execlists *execlists) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c index aa6d56e25a10..94eb63f309ce 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c @@ -258,7 +258,7 @@ static void guc_submit(struct intel_engine_cs *engine, static inline int rq_prio(const struct i915_request *rq) { - return rq->sched.attr.priority | __NO_PREEMPTION; + return rq->sched.attr.priority; } static struct i915_request *schedule_in(struct i915_request *rq, int idx) diff --git a/drivers/gpu/drm/i915/i915_priolist_types.h b/drivers/gpu/drm/i915/i915_priolist_types.h index 732aad148881..e18723d8df86 100644 --- a/drivers/gpu/drm/i915/i915_priolist_types.h +++ b/drivers/gpu/drm/i915/i915_priolist_types.h @@ -24,14 +24,13 @@ enum { I915_PRIORITY_DISPLAY, }; -#define I915_USER_PRIORITY_SHIFT 2 +#define I915_USER_PRIORITY_SHIFT 1 #define I915_USER_PRIORITY(x) ((x) << I915_USER_PRIORITY_SHIFT) #define I915_PRIORITY_COUNT BIT(I915_USER_PRIORITY_SHIFT) #define I915_PRIORITY_MASK (I915_PRIORITY_COUNT - 1) -#define I915_PRIORITY_WAIT ((u8)BIT(0)) -#define I915_PRIORITY_NOSEMAPHORE ((u8)BIT(1)) +#define I915_PRIORITY_NOSEMAPHORE ((u8)BIT(0)) /* Smallest priority value that cannot be bumped. */ #define I915_PRIORITY_INVALID (INT_MIN | (u8)I915_PRIORITY_MASK) @@ -47,8 +46,6 @@ enum { #define I915_PRIORITY_UNPREEMPTABLE INT_MAX #define I915_PRIORITY_BARRIER INT_MAX -#define __NO_PREEMPTION (I915_PRIORITY_WAIT) - struct i915_priolist { struct list_head requests[I915_PRIORITY_COUNT]; struct rb_node node; diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index 3c38d61c90f8..589739bfee25 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -1464,8 +1464,6 @@ void i915_request_add(struct i915_request *rq) if (!(rq->sched.flags & I915_SCHED_HAS_SEMAPHORE_CHAIN)) attr.priority |= I915_PRIORITY_NOSEMAPHORE; - if (list_empty(&rq->sched.signalers_list)) - attr.priority |= I915_PRIORITY_WAIT; __i915_request_queue(rq, &attr); @@ -1651,7 +1649,6 @@ long i915_request_wait(struct i915_request *rq, if (flags & I915_WAIT_PRIORITY) { if (!i915_request_started(rq) && INTEL_GEN(rq->i915) >= 6) intel_rps_boost(rq); - i915_schedule_bump_priority(rq, I915_PRIORITY_WAIT); } wait.tsk = current; diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c index 6e2d4190099f..bec2a9c25425 100644 --- a/drivers/gpu/drm/i915/i915_scheduler.c +++ b/drivers/gpu/drm/i915/i915_scheduler.c @@ -174,7 +174,7 @@ sched_lock_engine(const struct i915_sched_node *node, static inline int rq_prio(const struct i915_request *rq) { - return rq->sched.attr.priority | __NO_PREEMPTION; + return rq->sched.attr.priority; } static inline bool need_preempt(int prio, int active) @@ -443,16 +443,6 @@ bool __i915_sched_node_add_dependency(struct i915_sched_node *node, list_add_rcu(&dep->signal_link, &node->signalers_list); list_add_rcu(&dep->wait_link, &signal->waiters_list); - /* - * As we do not allow WAIT to preempt inflight requests, - * once we have executed a request, along with triggering - * any execution callbacks, we must preserve its ordering - * within the non-preemptible FIFO. - */ - BUILD_BUG_ON(__NO_PREEMPTION & ~I915_PRIORITY_MASK); - if (flags & I915_DEPENDENCY_EXTERNAL) - __bump_priority(signal, __NO_PREEMPTION); - ret = true; } From 972282c4cf2454af7dca3fec16b8f2cc72d4238c Mon Sep 17 00:00:00 2001 From: Mika Kuoppala Date: Thu, 7 May 2020 17:20:45 +0300 Subject: [PATCH 37/97] drm/i915/gen12: Add aux table invalidate for all engines All engines, exception being blitter as it does not care about the form, can access compressed surfaces. So we need to add forced aux table invalidates for those engines. v2: virtual instance masking (Chris) v3: bug on if not found (Chris) References: d248b371f747 ("drm/i915/gen12: Invalidate aux table entries forcibly") References bspec#43904, hsdes#1809175790 Cc: Chris Wilson Cc: Chuansheng Liu Cc: Rafael Antognolli Signed-off-by: Mika Kuoppala Acked-by: Chris Wilson Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20200507142045.8668-1-mika.kuoppala@linux.intel.com --- drivers/gpu/drm/i915/gt/intel_lrc.c | 86 +++++++++++++++++++++++++++-- drivers/gpu/drm/i915/i915_reg.h | 6 ++ 2 files changed, 87 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index 59c99e2f0257..400b9b5a6882 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -4540,11 +4540,36 @@ static u32 preparser_disable(bool state) return MI_ARB_CHECK | 1 << 8 | state; } +static i915_reg_t aux_inv_reg(const struct intel_engine_cs *engine) +{ + static const i915_reg_t vd[] = { + GEN12_VD0_AUX_NV, + GEN12_VD1_AUX_NV, + GEN12_VD2_AUX_NV, + GEN12_VD3_AUX_NV, + }; + + static const i915_reg_t ve[] = { + GEN12_VE0_AUX_NV, + GEN12_VE1_AUX_NV, + }; + + if (engine->class == VIDEO_DECODE_CLASS) + return vd[engine->instance]; + + if (engine->class == VIDEO_ENHANCEMENT_CLASS) + return ve[engine->instance]; + + GEM_BUG_ON("unknown aux_inv_reg\n"); + + return INVALID_MMIO_REG; +} + static u32 * -gen12_emit_aux_table_inv(struct i915_request *rq, u32 *cs) +gen12_emit_aux_table_inv(const i915_reg_t inv_reg, u32 *cs) { *cs++ = MI_LOAD_REGISTER_IMM(1); - *cs++ = i915_mmio_reg_offset(GEN12_GFX_CCS_AUX_NV); + *cs++ = i915_mmio_reg_offset(inv_reg); *cs++ = AUX_INV; *cs++ = MI_NOOP; @@ -4613,7 +4638,7 @@ static int gen12_emit_flush_render(struct i915_request *request, cs = gen8_emit_pipe_control(cs, flags, LRC_PPHWSP_SCRATCH_ADDR); /* hsdes: 1809175790 */ - cs = gen12_emit_aux_table_inv(request, cs); + cs = gen12_emit_aux_table_inv(GEN12_GFX_CCS_AUX_NV, cs); *cs++ = preparser_disable(false); intel_ring_advance(request, cs); @@ -4622,6 +4647,56 @@ static int gen12_emit_flush_render(struct i915_request *request, return 0; } +static int gen12_emit_flush(struct i915_request *request, u32 mode) +{ + intel_engine_mask_t aux_inv = 0; + u32 cmd, *cs; + + if (mode & EMIT_INVALIDATE) + aux_inv = request->engine->mask & ~BIT(BCS0); + + cs = intel_ring_begin(request, + 4 + (aux_inv ? 2 * hweight8(aux_inv) + 2 : 0)); + if (IS_ERR(cs)) + return PTR_ERR(cs); + + cmd = MI_FLUSH_DW + 1; + + /* We always require a command barrier so that subsequent + * commands, such as breadcrumb interrupts, are strictly ordered + * wrt the contents of the write cache being flushed to memory + * (and thus being coherent from the CPU). + */ + cmd |= MI_FLUSH_DW_STORE_INDEX | MI_FLUSH_DW_OP_STOREDW; + + if (mode & EMIT_INVALIDATE) { + cmd |= MI_INVALIDATE_TLB; + if (request->engine->class == VIDEO_DECODE_CLASS) + cmd |= MI_INVALIDATE_BSD; + } + + *cs++ = cmd; + *cs++ = LRC_PPHWSP_SCRATCH_ADDR; + *cs++ = 0; /* upper addr */ + *cs++ = 0; /* value */ + + if (aux_inv) { /* hsdes: 1809175790 */ + struct intel_engine_cs *engine; + unsigned int tmp; + + *cs++ = MI_LOAD_REGISTER_IMM(hweight8(aux_inv)); + for_each_engine_masked(engine, request->engine->gt, + aux_inv, tmp) { + *cs++ = i915_mmio_reg_offset(aux_inv_reg(engine)); + *cs++ = AUX_INV; + } + *cs++ = MI_NOOP; + } + intel_ring_advance(request, cs); + + return 0; +} + /* * Reserve space for 2 NOOPs at the end of each request to be * used as a workaround for not being allowed to do lite @@ -4855,9 +4930,10 @@ logical_ring_default_vfuncs(struct intel_engine_cs *engine) engine->emit_flush = gen8_emit_flush; engine->emit_init_breadcrumb = gen8_emit_init_breadcrumb; engine->emit_fini_breadcrumb = gen8_emit_fini_breadcrumb; - if (INTEL_GEN(engine->i915) >= 12) + if (INTEL_GEN(engine->i915) >= 12) { engine->emit_fini_breadcrumb = gen12_emit_fini_breadcrumb; - + engine->emit_flush = gen12_emit_flush; + } engine->set_default_submission = intel_execlists_set_default_submission; if (INTEL_GEN(engine->i915) < 11) { diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index dc5952200a07..6c076a24eb82 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -2558,6 +2558,12 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define GEN12_PAT_INDEX(index) _MMIO(0x4800 + (index) * 4) #define BSD_HWS_PGA_GEN7 _MMIO(0x04180) #define GEN12_GFX_CCS_AUX_NV _MMIO(0x4208) +#define GEN12_VD0_AUX_NV _MMIO(0x4218) +#define GEN12_VD1_AUX_NV _MMIO(0x4228) +#define GEN12_VD2_AUX_NV _MMIO(0x4298) +#define GEN12_VD3_AUX_NV _MMIO(0x42A8) +#define GEN12_VE0_AUX_NV _MMIO(0x4238) +#define GEN12_VE1_AUX_NV _MMIO(0x42B8) #define AUX_INV REG_BIT(0) #define BLT_HWS_PGA_GEN7 _MMIO(0x04280) #define VEBOX_HWS_PGA_GEN7 _MMIO(0x04380) From 40dcee1b7c086715d8ce7f6c9c9bdae45f4855b0 Mon Sep 17 00:00:00 2001 From: Zhenyu Wang Date: Wed, 6 May 2020 17:43:17 +0800 Subject: [PATCH 38/97] drm/i915/gvt: move workload destroy out of execlist complete To let execlist.c only handle execlist handling and keep other workload cleanup function in scheduler.c to align with other workload specific handling there. This doesn't change current code behavior. Reviewed-by: Yan Zhao Signed-off-by: Zhenyu Wang Link: http://patchwork.freedesktop.org/patch/msgid/20200506094318.105604-1-zhenyuw@linux.intel.com --- drivers/gpu/drm/i915/gvt/execlist.c | 2 -- drivers/gpu/drm/i915/gvt/scheduler.c | 3 +++ 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/execlist.c b/drivers/gpu/drm/i915/gvt/execlist.c index dd25c3024370..158873f269b1 100644 --- a/drivers/gpu/drm/i915/gvt/execlist.c +++ b/drivers/gpu/drm/i915/gvt/execlist.c @@ -424,8 +424,6 @@ static int complete_execlist_workload(struct intel_vgpu_workload *workload) ret = emulate_execlist_ctx_schedule_out(execlist, &workload->ctx_desc); out: - intel_vgpu_unpin_mm(workload->shadow_mm); - intel_vgpu_destroy_workload(workload); return ret; } diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c index 92a055c9d8d8..d25fbae87ca9 100644 --- a/drivers/gpu/drm/i915/gvt/scheduler.c +++ b/drivers/gpu/drm/i915/gvt/scheduler.c @@ -1014,6 +1014,9 @@ static void complete_current_workload(struct intel_gvt *gvt, int ring_id) workload->complete(workload); + intel_vgpu_unpin_mm(workload->shadow_mm); + intel_vgpu_destroy_workload(workload); + atomic_dec(&s->running_workload_num); wake_up(&scheduler->workload_complete_wq); From bec3df930fbd40fcc7bcead43a39cfd3c5b0419f Mon Sep 17 00:00:00 2001 From: Zhenyu Wang Date: Fri, 8 May 2020 11:14:09 +0800 Subject: [PATCH 39/97] drm/i915/gvt: Support PPGTT table load command The PPGTT in context image can be overridden by LRI cmd with another PPGTT's pdps. In such case, the load mm is used instead of the one in the context image. So we need to load its shadow mm in GVT and replace ppgtt pointers in command. This feature is used by guest IGD driver to share gfx VM between different contexts. Verified by IGT "gem_ctx_clone" test. v4: - consolidate shadow mm handlers (Yan) - fix cmd shadow mm pin error path v3: (Zhenyu Wang) - Cleanup PDP register offset check - Add debug check for guest context ppgtt update - Skip 3-level ppgtt guest handling code. The reason is that all guests now use 4-level ppgtt table and the only left case for 3-level table is ancient aliasing ppgtt case. But those guest kernel has no use of PPGTT LRI command. So 3-level ppgtt guest for this feature becomes simply un-testable. v2: (Zhenyu Wang) - Change to list for handling possible multiple ppgtt table loads in one submission. Make sure shadow mm is to replace for each one. Reviewed-by: Yan Zhao Cc: Yan Zhao Signed-off-by: Tina Zhang Signed-off-by: Zhenyu Wang Link: http://patchwork.freedesktop.org/patch/msgid/20200508031409.2562-1-zhenyuw@linux.intel.com --- drivers/gpu/drm/i915/gvt/cmd_parser.c | 45 +++++++++++ drivers/gpu/drm/i915/gvt/gtt.c | 1 + drivers/gpu/drm/i915/gvt/gtt.h | 1 + drivers/gpu/drm/i915/gvt/handlers.c | 2 +- drivers/gpu/drm/i915/gvt/scheduler.c | 106 +++++++++++++++++++++++++- drivers/gpu/drm/i915/gvt/scheduler.h | 1 + 6 files changed, 151 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/cmd_parser.c b/drivers/gpu/drm/i915/gvt/cmd_parser.c index 9e065ad0658f..956e5a9fd11b 100644 --- a/drivers/gpu/drm/i915/gvt/cmd_parser.c +++ b/drivers/gpu/drm/i915/gvt/cmd_parser.c @@ -881,6 +881,47 @@ static int mocs_cmd_reg_handler(struct parser_exec_state *s, return 0; } +static int is_cmd_update_pdps(unsigned int offset, + struct parser_exec_state *s) +{ + u32 base = s->workload->engine->mmio_base; + return i915_mmio_reg_equal(_MMIO(offset), GEN8_RING_PDP_UDW(base, 0)); +} + +static int cmd_pdp_mmio_update_handler(struct parser_exec_state *s, + unsigned int offset, unsigned int index) +{ + struct intel_vgpu *vgpu = s->vgpu; + struct intel_vgpu_mm *shadow_mm = s->workload->shadow_mm; + struct intel_vgpu_mm *mm; + u64 pdps[GEN8_3LVL_PDPES]; + + if (shadow_mm->ppgtt_mm.root_entry_type == + GTT_TYPE_PPGTT_ROOT_L4_ENTRY) { + pdps[0] = (u64)cmd_val(s, 2) << 32; + pdps[0] |= cmd_val(s, 4); + + mm = intel_vgpu_find_ppgtt_mm(vgpu, pdps); + if (!mm) { + gvt_vgpu_err("failed to get the 4-level shadow vm\n"); + return -EINVAL; + } + intel_vgpu_mm_get(mm); + list_add_tail(&mm->ppgtt_mm.link, + &s->workload->lri_shadow_mm); + *cmd_ptr(s, 2) = upper_32_bits(mm->ppgtt_mm.shadow_pdps[0]); + *cmd_ptr(s, 4) = lower_32_bits(mm->ppgtt_mm.shadow_pdps[0]); + } else { + /* Currently all guests use PML4 table and now can't + * have a guest with 3-level table but uses LRI for + * PPGTT update. So this is simply un-testable. */ + GEM_BUG_ON(1); + gvt_vgpu_err("invalid shared shadow vm type\n"); + return -EINVAL; + } + return 0; +} + static int cmd_reg_handler(struct parser_exec_state *s, unsigned int offset, unsigned int index, char *cmd) { @@ -919,6 +960,10 @@ static int cmd_reg_handler(struct parser_exec_state *s, patch_value(s, cmd_ptr(s, index), VGT_PVINFO_PAGE); } + if (is_cmd_update_pdps(offset, s) && + cmd_pdp_mmio_update_handler(s, offset, index)) + return -EINVAL; + /* TODO * In order to let workload with inhibit context to generate * correct image data into memory, vregs values will be loaded to diff --git a/drivers/gpu/drm/i915/gvt/gtt.c b/drivers/gpu/drm/i915/gvt/gtt.c index dffd4b79b9a6..1959a6aea5bb 100644 --- a/drivers/gpu/drm/i915/gvt/gtt.c +++ b/drivers/gpu/drm/i915/gvt/gtt.c @@ -1900,6 +1900,7 @@ struct intel_vgpu_mm *intel_vgpu_create_ppgtt_mm(struct intel_vgpu *vgpu, INIT_LIST_HEAD(&mm->ppgtt_mm.list); INIT_LIST_HEAD(&mm->ppgtt_mm.lru_list); + INIT_LIST_HEAD(&mm->ppgtt_mm.link); if (root_entry_type == GTT_TYPE_PPGTT_ROOT_L4_ENTRY) mm->ppgtt_mm.guest_pdps[0] = pdps[0]; diff --git a/drivers/gpu/drm/i915/gvt/gtt.h b/drivers/gpu/drm/i915/gvt/gtt.h index 88789316807d..320b8d6ad92f 100644 --- a/drivers/gpu/drm/i915/gvt/gtt.h +++ b/drivers/gpu/drm/i915/gvt/gtt.h @@ -160,6 +160,7 @@ struct intel_vgpu_mm { struct list_head list; struct list_head lru_list; + struct list_head link; /* possible LRI shadow mm list */ } ppgtt_mm; struct { void *virtual_ggtt; diff --git a/drivers/gpu/drm/i915/gvt/handlers.c b/drivers/gpu/drm/i915/gvt/handlers.c index 0182e2a5acff..23a3193a6654 100644 --- a/drivers/gpu/drm/i915/gvt/handlers.c +++ b/drivers/gpu/drm/i915/gvt/handlers.c @@ -2808,7 +2808,7 @@ static int init_bdw_mmio_info(struct intel_gvt *gvt) MMIO_D(GAMTARBMODE, D_BDW_PLUS); #define RING_REG(base) _MMIO((base) + 0x270) - MMIO_RING_F(RING_REG, 32, 0, 0, 0, D_BDW_PLUS, NULL, NULL); + MMIO_RING_F(RING_REG, 32, F_CMD_ACCESS, 0, 0, D_BDW_PLUS, NULL, NULL); #undef RING_REG MMIO_RING_GM_RDR(RING_HWS_PGA, D_BDW_PLUS, NULL, hws_pga_write); diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c index d25fbae87ca9..da6a2a424283 100644 --- a/drivers/gpu/drm/i915/gvt/scheduler.c +++ b/drivers/gpu/drm/i915/gvt/scheduler.c @@ -647,10 +647,11 @@ static void release_shadow_batch_buffer(struct intel_vgpu_workload *workload) } } -static int prepare_workload(struct intel_vgpu_workload *workload) +static int +intel_vgpu_shadow_mm_pin(struct intel_vgpu_workload *workload) { struct intel_vgpu *vgpu = workload->vgpu; - struct intel_vgpu_submission *s = &vgpu->submission; + struct intel_vgpu_mm *m; int ret = 0; ret = intel_vgpu_pin_mm(workload->shadow_mm); @@ -665,6 +666,52 @@ static int prepare_workload(struct intel_vgpu_workload *workload) return -EINVAL; } + if (!list_empty(&workload->lri_shadow_mm)) { + list_for_each_entry(m, &workload->lri_shadow_mm, + ppgtt_mm.link) { + ret = intel_vgpu_pin_mm(m); + if (ret) { + list_for_each_entry_from_reverse(m, + &workload->lri_shadow_mm, + ppgtt_mm.link) + intel_vgpu_unpin_mm(m); + gvt_vgpu_err("LRI shadow ppgtt fail to pin\n"); + break; + } + } + } + + if (ret) + intel_vgpu_unpin_mm(workload->shadow_mm); + + return ret; +} + +static void +intel_vgpu_shadow_mm_unpin(struct intel_vgpu_workload *workload) +{ + struct intel_vgpu_mm *m; + + if (!list_empty(&workload->lri_shadow_mm)) { + list_for_each_entry(m, &workload->lri_shadow_mm, + ppgtt_mm.link) + intel_vgpu_unpin_mm(m); + } + intel_vgpu_unpin_mm(workload->shadow_mm); +} + +static int prepare_workload(struct intel_vgpu_workload *workload) +{ + struct intel_vgpu *vgpu = workload->vgpu; + struct intel_vgpu_submission *s = &vgpu->submission; + int ret = 0; + + ret = intel_vgpu_shadow_mm_pin(workload); + if (ret) { + gvt_vgpu_err("fail to pin shadow mm\n"); + return ret; + } + update_shadow_pdps(workload); set_context_ppgtt_from_shadow(workload, s->shadow[workload->engine->id]); @@ -711,7 +758,7 @@ static int prepare_workload(struct intel_vgpu_workload *workload) err_shadow_batch: release_shadow_batch_buffer(workload); err_unpin_mm: - intel_vgpu_unpin_mm(workload->shadow_mm); + intel_vgpu_shadow_mm_unpin(workload); return ret; } @@ -821,6 +868,37 @@ pick_next_workload(struct intel_gvt *gvt, struct intel_engine_cs *engine) return workload; } +static void update_guest_pdps(struct intel_vgpu *vgpu, + u64 ring_context_gpa, u32 pdp[8]) +{ + u64 gpa; + int i; + + gpa = ring_context_gpa + RING_CTX_OFF(pdps[0].val); + + for (i = 0; i < 8; i++) + intel_gvt_hypervisor_write_gpa(vgpu, + gpa + i * 8, &pdp[7 - i], 4); +} + +static bool +check_shadow_context_ppgtt(struct execlist_ring_context *c, struct intel_vgpu_mm *m) +{ + if (m->ppgtt_mm.root_entry_type == GTT_TYPE_PPGTT_ROOT_L4_ENTRY) { + u64 shadow_pdp = c->pdps[7].val | (u64) c->pdps[6].val << 32; + + if (shadow_pdp != m->ppgtt_mm.shadow_pdps[0]) { + gvt_dbg_mm("4-level context ppgtt not match LRI command\n"); + return false; + } + return true; + } else { + /* see comment in LRI handler in cmd_parser.c */ + gvt_dbg_mm("invalid shadow mm type\n"); + return false; + } +} + static void update_guest_context(struct intel_vgpu_workload *workload) { struct i915_request *rq = workload->req; @@ -906,6 +984,15 @@ static void update_guest_context(struct intel_vgpu_workload *workload) shadow_ring_context = (void *) ctx->lrc_reg_state; + if (!list_empty(&workload->lri_shadow_mm)) { + struct intel_vgpu_mm *m = list_last_entry(&workload->lri_shadow_mm, + struct intel_vgpu_mm, + ppgtt_mm.link); + GEM_BUG_ON(!check_shadow_context_ppgtt(shadow_ring_context, m)); + update_guest_pdps(vgpu, workload->ring_context_gpa, + (void *)m->ppgtt_mm.guest_pdps); + } + #define COPY_REG(name) \ intel_gvt_hypervisor_write_gpa(vgpu, workload->ring_context_gpa + \ RING_CTX_OFF(name.val), &shadow_ring_context->name.val, 4) @@ -1014,7 +1101,7 @@ static void complete_current_workload(struct intel_gvt *gvt, int ring_id) workload->complete(workload); - intel_vgpu_unpin_mm(workload->shadow_mm); + intel_vgpu_shadow_mm_unpin(workload); intel_vgpu_destroy_workload(workload); atomic_dec(&s->running_workload_num); @@ -1409,6 +1496,16 @@ void intel_vgpu_destroy_workload(struct intel_vgpu_workload *workload) release_shadow_batch_buffer(workload); release_shadow_wa_ctx(&workload->wa_ctx); + if (!list_empty(&workload->lri_shadow_mm)) { + struct intel_vgpu_mm *m, *mm; + list_for_each_entry_safe(m, mm, &workload->lri_shadow_mm, + ppgtt_mm.link) { + list_del(&m->ppgtt_mm.link); + intel_vgpu_mm_put(m); + } + } + + GEM_BUG_ON(!list_empty(&workload->lri_shadow_mm)); if (workload->shadow_mm) intel_vgpu_mm_put(workload->shadow_mm); @@ -1427,6 +1524,7 @@ alloc_workload(struct intel_vgpu *vgpu) INIT_LIST_HEAD(&workload->list); INIT_LIST_HEAD(&workload->shadow_bb); + INIT_LIST_HEAD(&workload->lri_shadow_mm); init_waitqueue_head(&workload->shadow_ctx_status_wq); atomic_set(&workload->shadow_ctx_active, 0); diff --git a/drivers/gpu/drm/i915/gvt/scheduler.h b/drivers/gpu/drm/i915/gvt/scheduler.h index bf7fc0ca4cb1..15d317f2a4a4 100644 --- a/drivers/gpu/drm/i915/gvt/scheduler.h +++ b/drivers/gpu/drm/i915/gvt/scheduler.h @@ -87,6 +87,7 @@ struct intel_vgpu_workload { int status; struct intel_vgpu_mm *shadow_mm; + struct list_head lri_shadow_mm; /* For PPGTT load cmd */ /* different submission model may need different handler */ int (*prepare)(struct intel_vgpu_workload *); From 47e51832ae93534d872511ba557115722582d94c Mon Sep 17 00:00:00 2001 From: Zhenyu Wang Date: Wed, 6 May 2020 17:59:48 +0800 Subject: [PATCH 40/97] drm/i915/gvt: use context lrc_reg_state for shadow ppgtt override We can replace kmap by using context's lrc_reg_state directly for shadow ppgtt table override. Reviewed-by: Yan Zhao Cc: Yan Zhao Signed-off-by: Zhenyu Wang Link: http://patchwork.freedesktop.org/patch/msgid/20200506095948.124979-1-zhenyuw@linux.intel.com --- drivers/gpu/drm/i915/gvt/scheduler.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c index da6a2a424283..54c10dfcd3d2 100644 --- a/drivers/gpu/drm/i915/gvt/scheduler.c +++ b/drivers/gpu/drm/i915/gvt/scheduler.c @@ -58,10 +58,8 @@ static void set_context_pdp_root_pointer( static void update_shadow_pdps(struct intel_vgpu_workload *workload) { - struct drm_i915_gem_object *ctx_obj = - workload->req->context->state->obj; struct execlist_ring_context *shadow_ring_context; - struct page *page; + struct intel_context *ctx = workload->req->context; if (WARN_ON(!workload->shadow_mm)) return; @@ -69,11 +67,9 @@ static void update_shadow_pdps(struct intel_vgpu_workload *workload) if (WARN_ON(!atomic_read(&workload->shadow_mm->pincount))) return; - page = i915_gem_object_get_page(ctx_obj, LRC_STATE_PN); - shadow_ring_context = kmap(page); + shadow_ring_context = (struct execlist_ring_context *)ctx->lrc_reg_state; set_context_pdp_root_pointer(shadow_ring_context, (void *)workload->shadow_mm->ppgtt_mm.shadow_pdps); - kunmap(page); } /* From 2045d666ae634f1676660acfb864bcba0e9f86ca Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 8 May 2020 10:29:25 +0100 Subject: [PATCH 41/97] drm/i915: Ignore submit-fences on the same timeline While we ordinarily do not skip submit-fences due to the accompanying hook that we want to callback on execution, a submit-fence on the same timeline is meaningless. Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Reviewed-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20200508092933.738-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_request.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index 589739bfee25..be2ce9065a29 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -1242,6 +1242,9 @@ i915_request_await_execution(struct i915_request *rq, continue; } + if (fence->context == rq->fence.context) + continue; + /* * We don't squash repeated fence dependencies here as we * want to run our callback in all cases. From ac938052e571a84b40a17ed21d71b3d827cf0236 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 8 May 2020 10:29:26 +0100 Subject: [PATCH 42/97] drm/i915: Pull waiting on an external dma-fence into its routine As a means for a small code consolidation, but primarily to start thinking more carefully about internal-vs-external linkage, pull the pair of i915_sw_fence_await_dma_fence() calls into a common routine. Signed-off-by: Chris Wilson Reviewed-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20200508092933.738-2-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_request.c | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index be2ce9065a29..94189c7d43cd 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -1067,6 +1067,14 @@ i915_request_await_request(struct i915_request *to, struct i915_request *from) return 0; } +static int +i915_request_await_external(struct i915_request *rq, struct dma_fence *fence) +{ + return i915_sw_fence_await_dma_fence(&rq->submit, fence, + fence->context ? I915_FENCE_TIMEOUT : 0, + I915_FENCE_GFP); +} + int i915_request_await_dma_fence(struct i915_request *rq, struct dma_fence *fence) { @@ -1114,9 +1122,7 @@ i915_request_await_dma_fence(struct i915_request *rq, struct dma_fence *fence) if (dma_fence_is_i915(fence)) ret = i915_request_await_request(rq, to_request(fence)); else - ret = i915_sw_fence_await_dma_fence(&rq->submit, fence, - fence->context ? I915_FENCE_TIMEOUT : 0, - I915_FENCE_GFP); + ret = i915_request_await_external(rq, fence); if (ret < 0) return ret; @@ -1255,9 +1261,7 @@ i915_request_await_execution(struct i915_request *rq, to_request(fence), hook); else - ret = i915_sw_fence_await_dma_fence(&rq->submit, fence, - I915_FENCE_TIMEOUT, - GFP_KERNEL); + ret = i915_request_await_external(rq, fence); if (ret < 0) return ret; } while (--nchild); From e41627db6f364f9e3157b15a7646f8dd35f75508 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 8 May 2020 11:42:20 +0100 Subject: [PATCH 43/97] drm/i915/gt: Improve precision on defer_request assert The kernel_context does not use initial-breadcrumbs, so when we ask if its requests have started we do so by comparing against the completion seqno of the previous request. This is very imprecise, not precise enough for the defer_request assertion. Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/1847 Signed-off-by: Chris Wilson Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20200508104220.9872-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gt/intel_lrc.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index 400b9b5a6882..28b477f531a5 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -1886,7 +1886,8 @@ static void defer_request(struct i915_request *rq, struct list_head * const pl) continue; /* No waiter should start before its signaler */ - GEM_BUG_ON(i915_request_started(w) && + GEM_BUG_ON(w->context->timeline->has_initial_breadcrumb && + i915_request_started(w) && !i915_request_completed(rq)); GEM_BUG_ON(i915_request_is_active(w)); From 3136deb7ba223f7ce3117cdfcd918a6f48e2d221 Mon Sep 17 00:00:00 2001 From: Lionel Landwerlin Date: Fri, 8 May 2020 19:54:48 +0100 Subject: [PATCH 44/97] drm/i915: Peel dma-fence-chains for await To allow faster engine to engine synchronization, peel the layer of dma-fence-chain to expose potential i915 fences so that the i915_request code can emit HW semaphore wait/signal operations in the ring which is faster than waking up the host to submit unblocked workloads after interrupt notification. This is similar to the peeling we do for e.g. dma_fence_array. Signed-off-by: Lionel Landwerlin Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20200508185448.29709-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_request.c | 29 ++++++++++++++++++++++++++++- 1 file changed, 28 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index 94189c7d43cd..9df2cb8d66d0 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -23,6 +23,7 @@ */ #include +#include #include #include #include @@ -1068,13 +1069,39 @@ i915_request_await_request(struct i915_request *to, struct i915_request *from) } static int -i915_request_await_external(struct i915_request *rq, struct dma_fence *fence) +__i915_request_await_external(struct i915_request *rq, struct dma_fence *fence) { return i915_sw_fence_await_dma_fence(&rq->submit, fence, fence->context ? I915_FENCE_TIMEOUT : 0, I915_FENCE_GFP); } +static int +i915_request_await_external(struct i915_request *rq, struct dma_fence *fence) +{ + struct dma_fence *iter; + int err = 0; + + if (!to_dma_fence_chain(fence)) + return __i915_request_await_external(rq, fence); + + dma_fence_chain_for_each(iter, fence) { + struct dma_fence_chain *chain = to_dma_fence_chain(iter); + + if (!dma_fence_is_i915(chain->fence)) { + err = __i915_request_await_external(rq, iter); + break; + } + + err = i915_request_await_dma_fence(rq, chain->fence); + if (err < 0) + break; + } + + dma_fence_put(iter); + return err; +} + int i915_request_await_dma_fence(struct i915_request *rq, struct dma_fence *fence) { From fcae496153a3ce719082746a01c81dd581e7a239 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 8 May 2020 10:29:27 +0100 Subject: [PATCH 45/97] drm/i915: Prevent using semaphores to chain up to external fences The downside of using semaphores is that we lose metadata passing along the signaling chain. This is particularly nasty when we need to pass along a fatal error such as EFAULT or EDEADLK. For fatal errors we want to scrub the request before it is executed, which means that we cannot preload the request onto HW and have it wait upon a semaphore. Signed-off-by: Chris Wilson Reviewed-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20200508092933.738-3-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_request.c | 26 +++++++++++++++++++++ drivers/gpu/drm/i915/i915_scheduler_types.h | 1 + 2 files changed, 27 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index 9df2cb8d66d0..2c0de3a3b8e8 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -1003,6 +1003,15 @@ emit_semaphore_wait(struct i915_request *to, if (!rcu_access_pointer(from->hwsp_cacheline)) goto await_fence; + /* + * If this or its dependents are waiting on an external fence + * that may fail catastrophically, then we want to avoid using + * sempahores as they bypass the fence signaling metadata, and we + * lose the fence->error propagation. + */ + if (from->sched.flags & I915_SCHED_HAS_EXTERNAL_CHAIN) + goto await_fence; + /* Just emit the first semaphore we see as request space is limited. */ if (already_busywaiting(to) & mask) goto await_fence; @@ -1065,12 +1074,29 @@ i915_request_await_request(struct i915_request *to, struct i915_request *from) return ret; } + if (from->sched.flags & I915_SCHED_HAS_EXTERNAL_CHAIN) + to->sched.flags |= I915_SCHED_HAS_EXTERNAL_CHAIN; + return 0; } +static void mark_external(struct i915_request *rq) +{ + /* + * The downside of using semaphores is that we lose metadata passing + * along the signaling chain. This is particularly nasty when we + * need to pass along a fatal error such as EFAULT or EDEADLK. For + * fatal errors we want to scrub the request before it is executed, + * which means that we cannot preload the request onto HW and have + * it wait upon a semaphore. + */ + rq->sched.flags |= I915_SCHED_HAS_EXTERNAL_CHAIN; +} + static int __i915_request_await_external(struct i915_request *rq, struct dma_fence *fence) { + mark_external(rq); return i915_sw_fence_await_dma_fence(&rq->submit, fence, fence->context ? I915_FENCE_TIMEOUT : 0, I915_FENCE_GFP); diff --git a/drivers/gpu/drm/i915/i915_scheduler_types.h b/drivers/gpu/drm/i915/i915_scheduler_types.h index 7186875088a0..6ab2c5289bed 100644 --- a/drivers/gpu/drm/i915/i915_scheduler_types.h +++ b/drivers/gpu/drm/i915/i915_scheduler_types.h @@ -66,6 +66,7 @@ struct i915_sched_node { struct i915_sched_attr attr; unsigned int flags; #define I915_SCHED_HAS_SEMAPHORE_CHAIN BIT(0) +#define I915_SCHED_HAS_EXTERNAL_CHAIN BIT(1) intel_engine_mask_t semaphores; }; From 16dc224f1c0ff8d242ef41c40d4da0264d351daa Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Sat, 9 May 2020 11:50:21 +0100 Subject: [PATCH 46/97] drm/i915: Replace the hardcoded I915_FENCE_TIMEOUT Expose the hardcoded timeout for unsignaled foreign fences as a Kconfig option, primarily to allow brave systems to disable the timeout and solely rely on correct signaling. Signed-off-by: Chris Wilson Cc: Joonas Lahtinen Acked-by: Michael J. Ruhl Link: https://patchwork.freedesktop.org/patch/msgid/20200509105021.12542-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/Kconfig.profile | 12 ++++++++++++ drivers/gpu/drm/i915/Makefile | 1 + drivers/gpu/drm/i915/display/intel_display.c | 5 +++-- drivers/gpu/drm/i915/gem/i915_gem_clflush.c | 2 +- drivers/gpu/drm/i915/gem/i915_gem_client_blt.c | 3 +-- drivers/gpu/drm/i915/gem/i915_gem_fence.c | 4 ++-- drivers/gpu/drm/i915/i915_config.c | 15 +++++++++++++++ drivers/gpu/drm/i915/i915_drv.h | 10 +++++++++- drivers/gpu/drm/i915/i915_request.c | 3 ++- 9 files changed, 46 insertions(+), 9 deletions(-) create mode 100644 drivers/gpu/drm/i915/i915_config.c diff --git a/drivers/gpu/drm/i915/Kconfig.profile b/drivers/gpu/drm/i915/Kconfig.profile index 0bfd276c19fe..35bbe2b80596 100644 --- a/drivers/gpu/drm/i915/Kconfig.profile +++ b/drivers/gpu/drm/i915/Kconfig.profile @@ -1,3 +1,15 @@ +config DRM_I915_FENCE_TIMEOUT + int "Timeout for unsignaled foreign fences (ms, jiffy granularity)" + default 10000 # milliseconds + help + When listening to a foreign fence, we install a supplementary timer + to ensure that we are always signaled and our userspace is able to + make forward progress. This value specifies the timeout used for an + unsignaled foreign fence. + + May be 0 to disable the timeout, and rely on the foreign fence being + eventually signaled. + config DRM_I915_USERFAULT_AUTOSUSPEND int "Runtime autosuspend delay for userspace GGTT mmaps (ms)" default 250 # milliseconds diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index 5359c736c789..b0da6ea6e3f1 100644 --- a/drivers/gpu/drm/i915/Makefile +++ b/drivers/gpu/drm/i915/Makefile @@ -35,6 +35,7 @@ subdir-ccflags-y += -I$(srctree)/$(src) # core driver code i915-y += i915_drv.o \ + i915_config.o \ i915_irq.o \ i915_getparam.o \ i915_params.o \ diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index 268d65e23472..fb41f36c0256 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -15815,7 +15815,7 @@ intel_prepare_plane_fb(struct drm_plane *_plane, if (new_plane_state->uapi.fence) { /* explicit fencing */ ret = i915_sw_fence_await_dma_fence(&state->commit_ready, new_plane_state->uapi.fence, - I915_FENCE_TIMEOUT, + i915_fence_timeout(dev_priv), GFP_KERNEL); if (ret < 0) return ret; @@ -15842,7 +15842,8 @@ intel_prepare_plane_fb(struct drm_plane *_plane, ret = i915_sw_fence_await_reservation(&state->commit_ready, obj->base.resv, NULL, - false, I915_FENCE_TIMEOUT, + false, + i915_fence_timeout(dev_priv), GFP_KERNEL); if (ret < 0) goto unpin_fb; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_clflush.c b/drivers/gpu/drm/i915/gem/i915_gem_clflush.c index 34be4c0ee7c5..bc0223716906 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_clflush.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_clflush.c @@ -108,7 +108,7 @@ bool i915_gem_clflush_object(struct drm_i915_gem_object *obj, if (clflush) { i915_sw_fence_await_reservation(&clflush->base.chain, obj->base.resv, NULL, true, - I915_FENCE_TIMEOUT, + i915_fence_timeout(to_i915(obj->base.dev)), I915_FENCE_GFP); dma_resv_add_excl_fence(obj->base.resv, &clflush->base.dma); dma_fence_work_commit(&clflush->base); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_client_blt.c b/drivers/gpu/drm/i915/gem/i915_gem_client_blt.c index 3a146aa2593b..d3a86a4d5c04 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_client_blt.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_client_blt.c @@ -288,8 +288,7 @@ int i915_gem_schedule_fill_pages_blt(struct drm_i915_gem_object *obj, i915_gem_object_lock(obj); err = i915_sw_fence_await_reservation(&work->wait, - obj->base.resv, NULL, - true, I915_FENCE_TIMEOUT, + obj->base.resv, NULL, true, 0, I915_FENCE_GFP); if (err < 0) { dma_fence_set_error(&work->dma, err); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_fence.c b/drivers/gpu/drm/i915/gem/i915_gem_fence.c index 2f6100ec2608..8ab842c80f99 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_fence.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_fence.c @@ -72,8 +72,8 @@ i915_gem_object_lock_fence(struct drm_i915_gem_object *obj) 0, 0); if (i915_sw_fence_await_reservation(&stub->chain, - obj->base.resv, NULL, - true, I915_FENCE_TIMEOUT, + obj->base.resv, NULL, true, + i915_fence_timeout(to_i915(obj->base.dev)), I915_FENCE_GFP) < 0) goto err; diff --git a/drivers/gpu/drm/i915/i915_config.c b/drivers/gpu/drm/i915/i915_config.c new file mode 100644 index 000000000000..b79b5f6d2cfa --- /dev/null +++ b/drivers/gpu/drm/i915/i915_config.c @@ -0,0 +1,15 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2020 Intel Corporation + */ + +#include "i915_drv.h" + +unsigned long +i915_fence_context_timeout(const struct drm_i915_private *i915, u64 context) +{ + if (context && IS_ACTIVE(CONFIG_DRM_I915_FENCE_TIMEOUT)) + return msecs_to_jiffies_timeout(CONFIG_DRM_I915_FENCE_TIMEOUT); + + return 0; +} diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 7af48641dbc9..99d8b103057f 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -614,8 +614,16 @@ struct i915_gem_mm { #define I915_IDLE_ENGINES_TIMEOUT (200) /* in ms */ +unsigned long i915_fence_context_timeout(const struct drm_i915_private *i915, + u64 context); + +static inline unsigned long +i915_fence_timeout(const struct drm_i915_private *i915) +{ + return i915_fence_context_timeout(i915, U64_MAX); +} + #define I915_RESET_TIMEOUT (10 * HZ) /* 10s */ -#define I915_FENCE_TIMEOUT (10 * HZ) /* 10s */ #define I915_ENGINE_DEAD_TIMEOUT (4 * HZ) /* Seqno, head and subunits dead */ #define I915_SEQNO_DEAD_TIMEOUT (12 * HZ) /* Seqno dead with active head */ diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index 2c0de3a3b8e8..2d5b98549ddc 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -1098,7 +1098,8 @@ __i915_request_await_external(struct i915_request *rq, struct dma_fence *fence) { mark_external(rq); return i915_sw_fence_await_dma_fence(&rq->submit, fence, - fence->context ? I915_FENCE_TIMEOUT : 0, + i915_fence_context_timeout(rq->i915, + fence->context), I915_FENCE_GFP); } From f1e79c7e183c8e35def44b07ff7ac221fa87bf04 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Thu, 7 May 2020 13:54:08 -0500 Subject: [PATCH 47/97] drm/i915: Replace zero-length array with flexible-array The current codebase makes use of the zero-length array language extension to the C90 standard, but the preferred mechanism to declare variable-length types such as these ones is a flexible array member[1][2], introduced in C99: struct foo { int stuff; struct boo array[]; }; By making use of the mechanism above, we will get a compiler warning in case the flexible array does not occur last in the structure, which will help us prevent some kind of undefined behavior bugs from being inadvertently introduced[3] to the codebase from now on. Also, notice that, dynamic memory allocations won't be affected by this change: "Flexible array members have incomplete type, and so the sizeof operator may not be applied. As a quirk of the original implementation of zero-length arrays, sizeof evaluates to zero."[1] sizeof(flexible-array-member) triggers a warning because flexible array members have incomplete type[1]. There are some instances of code in which the sizeof operator is being incorrectly/erroneously applied to zero-length arrays and the result is zero. Such instances may be hiding some bugs. So, this work (flexible-array member conversions) will also help to get completely rid of those sorts of issues. This issue was found with the help of Coccinelle. [1] https://gcc.gnu.org/onlinedocs/gcc/Zero-Length.html [2] https://github.com/KSPP/linux/issues/21 [3] commit 76497732932f ("cxgb3/l2t: Fix undefined behaviour") Signed-off-by: Gustavo A. R. Silva Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20200507185408.GA14561@embeddedor --- drivers/gpu/drm/i915/display/intel_vbt_defs.h | 4 ++-- drivers/gpu/drm/i915/gt/intel_lrc.c | 2 +- drivers/gpu/drm/i915/i915_gpu_error.h | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_vbt_defs.h b/drivers/gpu/drm/i915/display/intel_vbt_defs.h index 05c7cbe32eb4..aef7fe932d1a 100644 --- a/drivers/gpu/drm/i915/display/intel_vbt_defs.h +++ b/drivers/gpu/drm/i915/display/intel_vbt_defs.h @@ -462,7 +462,7 @@ struct bdb_general_definitions { * number = (block_size - sizeof(bdb_general_definitions))/ * defs->child_dev_size; */ - u8 devices[0]; + u8 devices[]; } __packed; /* @@ -839,7 +839,7 @@ struct bdb_mipi_config { struct bdb_mipi_sequence { u8 version; - u8 data[0]; /* up to 6 variable length blocks */ + u8 data[]; /* up to 6 variable length blocks */ } __packed; /* diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index 28b477f531a5..8e254f639751 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -217,7 +217,7 @@ struct virtual_engine { /* And finally, which physical engines this virtual engine maps onto. */ unsigned int num_siblings; - struct intel_engine_cs *siblings[0]; + struct intel_engine_cs *siblings[]; }; static struct virtual_engine *to_virtual_engine(struct intel_engine_cs *engine) diff --git a/drivers/gpu/drm/i915/i915_gpu_error.h b/drivers/gpu/drm/i915/i915_gpu_error.h index fa2d82a6de04..76b80fbfb7e9 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.h +++ b/drivers/gpu/drm/i915/i915_gpu_error.h @@ -42,7 +42,7 @@ struct i915_vma_coredump { int num_pages; int page_count; int unused; - u32 *pages[0]; + u32 *pages[]; }; struct i915_request_coredump { From f4d49692ad76804eb3d19e605bc5fc24d463efaa Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 11 May 2020 08:57:03 +0100 Subject: [PATCH 48/97] drm/i915/gt: Mark up the racy read of execlists->context_tag Since we are using bitops on context_tag to allow us to reserve and release inflight tags concurrently, the scan for the next bit is intentionally racy. [ 516.446854] BUG: KCSAN: data-race in execlists_schedule_in.isra.0 [i915] / execlists_schedule_out [i915] [ 516.446874] [ 516.446886] write (marked) to 0xffff8881f7644048 of 8 bytes by interrupt on cpu 2: [ 516.447076] execlists_schedule_out+0x538/0x6a0 [i915] [ 516.447263] process_csb+0x10b/0x3d0 [i915] [ 516.447449] execlists_submission_tasklet+0x30/0x170 [i915] [ 516.447468] tasklet_action_common.isra.0+0x42/0x90 [ 516.447484] __do_softirq+0xc8/0x206 [ 516.447498] irq_exit+0xcd/0xe0 [ 516.447516] do_IRQ+0x44/0xc0 [ 516.447535] ret_from_intr+0x0/0x1c [ 516.447550] cpuidle_enter_state+0x199/0x400 [ 516.447572] cpuidle_enter+0x50/0x90 [ 516.447587] do_idle+0x197/0x1e0 [ 516.447600] cpu_startup_entry+0x14/0x20 [ 516.447619] start_secondary+0xf9/0x130 [ 516.447643] secondary_startup_64+0xa4/0xb0 [ 516.447655] [ 516.447671] read to 0xffff8881f7644048 of 8 bytes by task 460 on cpu 1: [ 516.447863] execlists_schedule_in.isra.0+0x3cf/0x5a0 [i915] [ 516.448064] execlists_dequeue+0xf8f/0x1690 [i915] [ 516.448252] __execlists_submission_tasklet+0x48/0x60 [i915] [ 516.448440] execlists_submit_request+0x2e2/0x310 [i915] [ 516.448634] submit_notify+0x8f/0xc8 [i915] [ 516.448820] __i915_sw_fence_complete+0x61/0x420 [i915] [ 516.449005] i915_sw_fence_complete+0x58/0x80 [i915] [ 516.449208] i915_sw_fence_commit+0x16/0x20 [i915] [ 516.449399] __i915_request_queue+0x60/0x70 [i915] [ 516.449590] i915_gem_do_execbuffer+0x33f1/0x4a00 [i915] [ 516.449782] i915_gem_execbuffer2_ioctl+0x2a2/0x550 [i915] [ 516.449800] drm_ioctl_kernel+0xe9/0x130 [ 516.449814] drm_ioctl+0x27d/0x45e [ 516.449827] ksys_ioctl+0x89/0xb0 [ 516.449842] __x64_sys_ioctl+0x42/0x60 [ 516.449864] do_syscall_64+0x6e/0x2c0 [ 516.449878] entry_SYSCALL_64_after_hwframe+0x44/0xa9 Signed-off-by: Chris Wilson Reviewed-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20200511075722.13483-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gt/intel_lrc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index 8e254f639751..ed45fc40f884 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -1367,7 +1367,7 @@ __execlists_schedule_in(struct i915_request *rq) ce->lrc.ccid = ce->tag; } else { /* We don't need a strict matching tag, just different values */ - unsigned int tag = ffs(engine->context_tag); + unsigned int tag = ffs(READ_ONCE(engine->context_tag)); GEM_BUG_ON(tag == 0 || tag >= BITS_PER_LONG); clear_bit(tag - 1, &engine->context_tag); From e31fe02eff2610f40ac8d7efe57ec0b881b75508 Mon Sep 17 00:00:00 2001 From: Mika Kuoppala Date: Mon, 11 May 2020 13:22:01 +0300 Subject: [PATCH 49/97] drm/i915: Make intel_timeline_init static Commit fb5970da1b42 ("drm/i915/gt: Use the kernel_context to measure the breadcrumb size") removed the last external user for intel_timeline_init. Mark it static. Signed-off-by: Mika Kuoppala Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20200511102201.9275-1-mika.kuoppala@linux.intel.com --- drivers/gpu/drm/i915/gt/intel_timeline.c | 8 ++++---- drivers/gpu/drm/i915/gt/intel_timeline.h | 5 ----- 2 files changed, 4 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_timeline.c b/drivers/gpu/drm/i915/gt/intel_timeline.c index e1fac1b38f27..4546284fede1 100644 --- a/drivers/gpu/drm/i915/gt/intel_timeline.c +++ b/drivers/gpu/drm/i915/gt/intel_timeline.c @@ -211,9 +211,9 @@ static void cacheline_free(struct intel_timeline_cacheline *cl) i915_active_release(&cl->active); } -int intel_timeline_init(struct intel_timeline *timeline, - struct intel_gt *gt, - struct i915_vma *hwsp) +static int intel_timeline_init(struct intel_timeline *timeline, + struct intel_gt *gt, + struct i915_vma *hwsp) { void *vaddr; @@ -280,7 +280,7 @@ void intel_gt_init_timelines(struct intel_gt *gt) INIT_LIST_HEAD(&timelines->hwsp_free_list); } -void intel_timeline_fini(struct intel_timeline *timeline) +static void intel_timeline_fini(struct intel_timeline *timeline) { GEM_BUG_ON(atomic_read(&timeline->pin_count)); GEM_BUG_ON(!list_empty(&timeline->requests)); diff --git a/drivers/gpu/drm/i915/gt/intel_timeline.h b/drivers/gpu/drm/i915/gt/intel_timeline.h index c8e59a333182..4298b9ac7327 100644 --- a/drivers/gpu/drm/i915/gt/intel_timeline.h +++ b/drivers/gpu/drm/i915/gt/intel_timeline.h @@ -31,11 +31,6 @@ #include "i915_syncmap.h" #include "gt/intel_timeline_types.h" -int intel_timeline_init(struct intel_timeline *tl, - struct intel_gt *gt, - struct i915_vma *hwsp); -void intel_timeline_fini(struct intel_timeline *tl); - struct intel_timeline * intel_timeline_create(struct intel_gt *gt, struct i915_vma *global_hwsp); From a80d73673bc7676d0bab7f7ab51d00c5e461992d Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 11 May 2020 08:57:07 +0100 Subject: [PATCH 50/97] drm/i915: Tidy awaiting on dma-fences Just tidy up the return handling for completed dma-fences. While it may return errors for invalid fence, we already know that we have a good fence and the only error will be an already signaled fence. Signed-off-by: Chris Wilson Reviewed-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20200511075722.13483-5-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_sw_fence.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_sw_fence.c b/drivers/gpu/drm/i915/i915_sw_fence.c index 7daf81f55c90..295b9829e2da 100644 --- a/drivers/gpu/drm/i915/i915_sw_fence.c +++ b/drivers/gpu/drm/i915/i915_sw_fence.c @@ -546,13 +546,11 @@ int __i915_sw_fence_await_dma_fence(struct i915_sw_fence *fence, cb->fence = fence; i915_sw_fence_await(fence); - ret = dma_fence_add_callback(dma, &cb->base, __dma_i915_sw_fence_wake); - if (ret == 0) { - ret = 1; - } else { + ret = 1; + if (dma_fence_add_callback(dma, &cb->base, __dma_i915_sw_fence_wake)) { + /* fence already signaled */ __dma_i915_sw_fence_wake(dma, &cb->base); - if (ret == -ENOENT) /* fence already signaled */ - ret = 0; + ret = 0; } return ret; From 90c49a09c8495f1eac05b42a3f79fdc6bcb1eef0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Thu, 7 May 2020 17:41:25 +0300 Subject: [PATCH 51/97] drm/i915/mst: Wait for ACT sent before enabling the pipe MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The correct sequence according to bspec is to wait for the ACT sent status before we turn on the pipe. Make it so. Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20200507144125.2458-1-ville.syrjala@linux.intel.com Reviewed-by: Imre Deak --- drivers/gpu/drm/i915/display/intel_dp_mst.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_dp_mst.c b/drivers/gpu/drm/i915/display/intel_dp_mst.c index 74559379384a..8c732418a33f 100644 --- a/drivers/gpu/drm/i915/display/intel_dp_mst.c +++ b/drivers/gpu/drm/i915/display/intel_dp_mst.c @@ -512,10 +512,6 @@ static void intel_mst_enable_dp(struct intel_atomic_state *state, intel_ddi_enable_transcoder_func(encoder, pipe_config); - intel_enable_pipe(pipe_config); - - intel_crtc_vblank_on(pipe_config); - drm_dbg_kms(&dev_priv->drm, "active links %d\n", intel_dp->active_mst_links); @@ -526,6 +522,11 @@ static void intel_mst_enable_dp(struct intel_atomic_state *state, drm_dp_check_act_status(&intel_dp->mst_mgr); drm_dp_update_payload_part2(&intel_dp->mst_mgr); + + intel_enable_pipe(pipe_config); + + intel_crtc_vblank_on(pipe_config); + if (pipe_config->has_audio) intel_audio_codec_enable(encoder, pipe_config, conn_state); } From 260a6c1bdf1e072ae4d96f0d1ec2917237f1b627 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Thu, 30 Apr 2020 15:58:21 +0300 Subject: [PATCH 52/97] drm/i915: Fix glk watermark calculations MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit GLK wants the +1 adjustement for the "blocks per line" value for x-tile/y-tile, just like cnl+. Also the x-tile and linear cases are almost identical. The only difference is this +1 which is always done for glk+, and only done for linear on skl/bxt. Let's unify it to a single branch with a special case for the +1, just like we do for y-tile. Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20200430125822.21985-1-ville.syrjala@linux.intel.com Reviewed-by: Stanislav Lisovskiy --- drivers/gpu/drm/i915/intel_pm.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 416cb1a1e7cb..0e53e277d6a6 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -4868,7 +4868,7 @@ skl_wm_method1(const struct drm_i915_private *dev_priv, u32 pixel_rate, wm_intermediate_val = latency * pixel_rate * cpp; ret = div_fixed16(wm_intermediate_val, 1000 * dbuf_block_size); - if (INTEL_GEN(dev_priv) >= 10) + if (INTEL_GEN(dev_priv) >= 10 || IS_GEMINILAKE(dev_priv)) ret = add_fixed16_u32(ret, 1); return ret; @@ -5003,18 +5003,19 @@ skl_compute_wm_params(const struct intel_crtc_state *crtc_state, wp->y_min_scanlines, wp->dbuf_block_size); - if (INTEL_GEN(dev_priv) >= 10) + if (INTEL_GEN(dev_priv) >= 10 || IS_GEMINILAKE(dev_priv)) interm_pbpl++; wp->plane_blocks_per_line = div_fixed16(interm_pbpl, wp->y_min_scanlines); - } else if (wp->x_tiled && IS_GEN(dev_priv, 9)) { - interm_pbpl = DIV_ROUND_UP(wp->plane_bytes_per_line, - wp->dbuf_block_size); - wp->plane_blocks_per_line = u32_to_fixed16(interm_pbpl); } else { interm_pbpl = DIV_ROUND_UP(wp->plane_bytes_per_line, - wp->dbuf_block_size) + 1; + wp->dbuf_block_size); + + if (!wp->x_tiled || + INTEL_GEN(dev_priv) >= 10 || IS_GEMINILAKE(dev_priv)) + interm_pbpl++; + wp->plane_blocks_per_line = u32_to_fixed16(interm_pbpl); } From edc0e09c6ba81fc7a06cc68deec430519b7d18d8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Wed, 11 Mar 2020 17:54:20 +0200 Subject: [PATCH 53/97] drm/i915: Turn intel_digital_port_connected() in a vfunc MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Let's get rid of the platform if ladders in intel_digital_port_connected() and make it a vfunc. Now the if ladders are at the encoder initialization which makes them a bit less convoluted. v2: Add forward decl for intel_encoder in intel_tc.h v3: Duplicate stuff to avoid exposing platform specific functions across files (Jani) Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20200311155422.3043-2-ville.syrjala@linux.intel.com Reviewed-by: Imre Deak --- drivers/gpu/drm/i915/display/intel_ddi.c | 109 +++++++++++++ .../drm/i915/display/intel_display_types.h | 1 + drivers/gpu/drm/i915/display/intel_dp.c | 147 +++--------------- drivers/gpu/drm/i915/display/intel_tc.c | 3 +- drivers/gpu/drm/i915/display/intel_tc.h | 3 +- 5 files changed, 135 insertions(+), 128 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_ddi.c b/drivers/gpu/drm/i915/display/intel_ddi.c index 5601673c3f30..75339bb25fd7 100644 --- a/drivers/gpu/drm/i915/display/intel_ddi.c +++ b/drivers/gpu/drm/i915/display/intel_ddi.c @@ -4689,6 +4689,96 @@ intel_ddi_hotplug(struct intel_encoder *encoder, return state; } +static bool lpt_digital_port_connected(struct intel_encoder *encoder) +{ + struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); + u32 bit; + + switch (encoder->hpd_pin) { + case HPD_PORT_B: + bit = SDE_PORTB_HOTPLUG_CPT; + break; + case HPD_PORT_C: + bit = SDE_PORTC_HOTPLUG_CPT; + break; + case HPD_PORT_D: + bit = SDE_PORTD_HOTPLUG_CPT; + break; + default: + MISSING_CASE(encoder->hpd_pin); + return false; + } + + return intel_de_read(dev_priv, SDEISR) & bit; +} + +static bool spt_digital_port_connected(struct intel_encoder *encoder) +{ + struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); + u32 bit; + + switch (encoder->hpd_pin) { + case HPD_PORT_A: + bit = SDE_PORTA_HOTPLUG_SPT; + break; + case HPD_PORT_E: + bit = SDE_PORTE_HOTPLUG_SPT; + break; + default: + return lpt_digital_port_connected(encoder); + } + + return intel_de_read(dev_priv, SDEISR) & bit; +} + +static bool hsw_digital_port_connected(struct intel_encoder *encoder) +{ + struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); + + return intel_de_read(dev_priv, DEISR) & DE_DP_A_HOTPLUG_IVB; +} + +static bool bdw_digital_port_connected(struct intel_encoder *encoder) +{ + struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); + + return intel_de_read(dev_priv, GEN8_DE_PORT_ISR) & GEN8_PORT_DP_A_HOTPLUG; +} + +static bool bxt_digital_port_connected(struct intel_encoder *encoder) +{ + struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); + u32 bit; + + switch (encoder->hpd_pin) { + case HPD_PORT_A: + bit = BXT_DE_PORT_HP_DDIA; + break; + case HPD_PORT_B: + bit = BXT_DE_PORT_HP_DDIB; + break; + case HPD_PORT_C: + bit = BXT_DE_PORT_HP_DDIC; + break; + default: + MISSING_CASE(encoder->hpd_pin); + return false; + } + + return intel_de_read(dev_priv, GEN8_DE_PORT_ISR) & bit; +} + +static bool icp_digital_port_connected(struct intel_encoder *encoder) +{ + struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); + enum phy phy = intel_port_to_phy(dev_priv, encoder->port); + + if (HAS_PCH_MCC(dev_priv) && phy == PHY_C) + return intel_de_read(dev_priv, SDEISR) & SDE_TC_HOTPLUG_ICP(PORT_TC1); + + return intel_de_read(dev_priv, SDEISR) & SDE_DDI_HOTPLUG_ICP(phy); +} + static struct intel_connector * intel_ddi_init_hdmi_connector(struct intel_digital_port *intel_dig_port) { @@ -4885,6 +4975,25 @@ void intel_ddi_init(struct drm_i915_private *dev_priv, enum port port) port_name(port)); } + if (INTEL_GEN(dev_priv) >= 11) { + if (intel_phy_is_tc(dev_priv, phy)) + intel_dig_port->connected = intel_tc_port_connected; + else + intel_dig_port->connected = icp_digital_port_connected; + } else if (IS_GEN9_LP(dev_priv)) { + intel_dig_port->connected = bxt_digital_port_connected; + } else if (port == PORT_A) { + if (INTEL_GEN(dev_priv) >= 8) + intel_dig_port->connected = bdw_digital_port_connected; + else + intel_dig_port->connected = hsw_digital_port_connected; + } else { + if (INTEL_PCH_TYPE(dev_priv) >= PCH_SPT) + intel_dig_port->connected = spt_digital_port_connected; + else + intel_dig_port->connected = lpt_digital_port_connected; + } + intel_infoframe_init(intel_dig_port); return; diff --git a/drivers/gpu/drm/i915/display/intel_display_types.h b/drivers/gpu/drm/i915/display/intel_display_types.h index e58e25779bd6..1b689d6bac4c 100644 --- a/drivers/gpu/drm/i915/display/intel_display_types.h +++ b/drivers/gpu/drm/i915/display/intel_display_types.h @@ -1424,6 +1424,7 @@ struct intel_digital_port { const struct drm_connector_state *conn_state); u32 (*infoframes_enabled)(struct intel_encoder *encoder, const struct intel_crtc_state *pipe_config); + bool (*connected)(struct intel_encoder *encoder); }; struct intel_dp_mst_encoder { diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c index 6952b0295096..99586b20b610 100644 --- a/drivers/gpu/drm/i915/display/intel_dp.c +++ b/drivers/gpu/drm/i915/display/intel_dp.c @@ -6041,25 +6041,6 @@ static bool cpt_digital_port_connected(struct intel_encoder *encoder) return intel_de_read(dev_priv, SDEISR) & bit; } -static bool spt_digital_port_connected(struct intel_encoder *encoder) -{ - struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - u32 bit; - - switch (encoder->hpd_pin) { - case HPD_PORT_A: - bit = SDE_PORTA_HOTPLUG_SPT; - break; - case HPD_PORT_E: - bit = SDE_PORTE_HOTPLUG_SPT; - break; - default: - return cpt_digital_port_connected(encoder); - } - - return intel_de_read(dev_priv, SDEISR) & bit; -} - static bool g4x_digital_port_connected(struct intel_encoder *encoder) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); @@ -6110,88 +6091,14 @@ static bool ilk_digital_port_connected(struct intel_encoder *encoder) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - if (encoder->hpd_pin == HPD_PORT_A) - return intel_de_read(dev_priv, DEISR) & DE_DP_A_HOTPLUG; - else - return ibx_digital_port_connected(encoder); -} - -static bool snb_digital_port_connected(struct intel_encoder *encoder) -{ - struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - - if (encoder->hpd_pin == HPD_PORT_A) - return intel_de_read(dev_priv, DEISR) & DE_DP_A_HOTPLUG; - else - return cpt_digital_port_connected(encoder); + return intel_de_read(dev_priv, DEISR) & DE_DP_A_HOTPLUG; } static bool ivb_digital_port_connected(struct intel_encoder *encoder) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - if (encoder->hpd_pin == HPD_PORT_A) - return intel_de_read(dev_priv, DEISR) & DE_DP_A_HOTPLUG_IVB; - else - return cpt_digital_port_connected(encoder); -} - -static bool bdw_digital_port_connected(struct intel_encoder *encoder) -{ - struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - - if (encoder->hpd_pin == HPD_PORT_A) - return intel_de_read(dev_priv, GEN8_DE_PORT_ISR) & GEN8_PORT_DP_A_HOTPLUG; - else - return cpt_digital_port_connected(encoder); -} - -static bool bxt_digital_port_connected(struct intel_encoder *encoder) -{ - struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - u32 bit; - - switch (encoder->hpd_pin) { - case HPD_PORT_A: - bit = BXT_DE_PORT_HP_DDIA; - break; - case HPD_PORT_B: - bit = BXT_DE_PORT_HP_DDIB; - break; - case HPD_PORT_C: - bit = BXT_DE_PORT_HP_DDIC; - break; - default: - MISSING_CASE(encoder->hpd_pin); - return false; - } - - return intel_de_read(dev_priv, GEN8_DE_PORT_ISR) & bit; -} - -static bool intel_combo_phy_connected(struct drm_i915_private *dev_priv, - enum phy phy) -{ - if (HAS_PCH_MCC(dev_priv) && phy == PHY_C) - return intel_de_read(dev_priv, SDEISR) & SDE_TC_HOTPLUG_ICP(PORT_TC1); - - return intel_de_read(dev_priv, SDEISR) & SDE_DDI_HOTPLUG_ICP(phy); -} - -static bool icp_digital_port_connected(struct intel_encoder *encoder) -{ - struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - struct intel_digital_port *dig_port = enc_to_dig_port(encoder); - enum phy phy = intel_port_to_phy(dev_priv, encoder->port); - - if (intel_phy_is_combo(dev_priv, phy)) - return intel_combo_phy_connected(dev_priv, phy); - else if (intel_phy_is_tc(dev_priv, phy)) - return intel_tc_port_connected(dig_port); - else - MISSING_CASE(encoder->hpd_pin); - - return false; + return intel_de_read(dev_priv, DEISR) & DE_DP_A_HOTPLUG_IVB; } /* @@ -6205,44 +6112,15 @@ static bool icp_digital_port_connected(struct intel_encoder *encoder) * * Return %true if port is connected, %false otherwise. */ -static bool __intel_digital_port_connected(struct intel_encoder *encoder) -{ - struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - - if (HAS_GMCH(dev_priv)) { - if (IS_GM45(dev_priv)) - return gm45_digital_port_connected(encoder); - else - return g4x_digital_port_connected(encoder); - } - - if (INTEL_PCH_TYPE(dev_priv) >= PCH_ICP) - return icp_digital_port_connected(encoder); - else if (INTEL_PCH_TYPE(dev_priv) >= PCH_SPT) - return spt_digital_port_connected(encoder); - else if (IS_GEN9_LP(dev_priv)) - return bxt_digital_port_connected(encoder); - else if (IS_GEN(dev_priv, 8)) - return bdw_digital_port_connected(encoder); - else if (IS_GEN(dev_priv, 7)) - return ivb_digital_port_connected(encoder); - else if (IS_GEN(dev_priv, 6)) - return snb_digital_port_connected(encoder); - else if (IS_GEN(dev_priv, 5)) - return ilk_digital_port_connected(encoder); - - MISSING_CASE(INTEL_GEN(dev_priv)); - return false; -} - bool intel_digital_port_connected(struct intel_encoder *encoder) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); + struct intel_digital_port *dig_port = enc_to_dig_port(encoder); bool is_connected = false; intel_wakeref_t wakeref; with_intel_display_power(dev_priv, POWER_DOMAIN_DISPLAY_CORE, wakeref) - is_connected = __intel_digital_port_connected(encoder); + is_connected = dig_port->connected(encoder); return is_connected; } @@ -8522,6 +8400,23 @@ bool intel_dp_init(struct drm_i915_private *dev_priv, intel_dig_port->hpd_pulse = intel_dp_hpd_pulse; + if (HAS_GMCH(dev_priv)) { + if (IS_GM45(dev_priv)) + intel_dig_port->connected = gm45_digital_port_connected; + else + intel_dig_port->connected = g4x_digital_port_connected; + } else if (port == PORT_A) { + if (IS_IVYBRIDGE(dev_priv)) + intel_dig_port->connected = ivb_digital_port_connected; + else + intel_dig_port->connected = ilk_digital_port_connected; + } else { + if (HAS_PCH_CPT(dev_priv)) + intel_dig_port->connected = cpt_digital_port_connected; + else + intel_dig_port->connected = ibx_digital_port_connected; + } + if (port != PORT_A) intel_infoframe_init(intel_dig_port); diff --git a/drivers/gpu/drm/i915/display/intel_tc.c b/drivers/gpu/drm/i915/display/intel_tc.c index d3bd5e798fbc..8aadca51fce6 100644 --- a/drivers/gpu/drm/i915/display/intel_tc.c +++ b/drivers/gpu/drm/i915/display/intel_tc.c @@ -562,8 +562,9 @@ static bool intel_tc_port_needs_reset(struct intel_digital_port *dig_port) * connected ports are usable, and avoids exposing to the users objects they * can't really use. */ -bool intel_tc_port_connected(struct intel_digital_port *dig_port) +bool intel_tc_port_connected(struct intel_encoder *encoder) { + struct intel_digital_port *dig_port = enc_to_dig_port(encoder); bool is_connected; intel_wakeref_t tc_cold_wref; diff --git a/drivers/gpu/drm/i915/display/intel_tc.h b/drivers/gpu/drm/i915/display/intel_tc.h index 463f1b3c836f..b619e4736f85 100644 --- a/drivers/gpu/drm/i915/display/intel_tc.h +++ b/drivers/gpu/drm/i915/display/intel_tc.h @@ -10,8 +10,9 @@ #include struct intel_digital_port; +struct intel_encoder; -bool intel_tc_port_connected(struct intel_digital_port *dig_port); +bool intel_tc_port_connected(struct intel_encoder *encoder); u32 intel_tc_port_get_lane_mask(struct intel_digital_port *dig_port); u32 intel_tc_port_get_pin_assignment_mask(struct intel_digital_port *dig_port); int intel_tc_port_fia_max_lane_count(struct intel_digital_port *dig_port); From 0398993b82f40ad02d88da7c894e3faae2da3b0a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Thu, 7 May 2020 14:48:08 +0300 Subject: [PATCH 54/97] drm/i915: Stash hpd status bits under dev_priv MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Instead of constnantly having to figure out which hpd status bit array to use let's store them under dev_priv. Should perhaps take this further and stash even more stuff to make the hpd handling more abstract yet. v2: Remeber cnp (Imre) Add MISSING_CASE() for unknown PCHs (Imre) Cc: Imre Deak Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20200507114808.6150-1-ville.syrjala@linux.intel.com Reviewed-by: Imre Deak --- drivers/gpu/drm/i915/i915_drv.h | 2 + drivers/gpu/drm/i915/i915_irq.c | 203 ++++++++++++++++++-------------- 2 files changed, 116 insertions(+), 89 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 99d8b103057f..7fad6f2d80fa 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -148,6 +148,8 @@ enum hpd_pin { struct i915_hotplug { struct delayed_work hotplug_work; + const u32 *hpd, *pch_hpd; + struct { unsigned long last_jiffies; int count; diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index ea4c87784a27..4dc601dffc08 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -124,7 +124,6 @@ static const u32 hpd_status_i915[HPD_NUM_PINS] = { [HPD_PORT_D] = PORTD_HOTPLUG_INT_STATUS, }; -/* BXT hpd list */ static const u32 hpd_bxt[HPD_NUM_PINS] = { [HPD_PORT_A] = BXT_DE_PORT_HP_DDIA, [HPD_PORT_B] = BXT_DE_PORT_HP_DDIB, @@ -168,6 +167,49 @@ static const u32 hpd_tgp[HPD_NUM_PINS] = { [HPD_PORT_I] = SDE_TC_HOTPLUG_ICP(PORT_TC6), }; +static void intel_hpd_init_pins(struct drm_i915_private *dev_priv) +{ + struct i915_hotplug *hpd = &dev_priv->hotplug; + + if (HAS_GMCH(dev_priv)) { + if (IS_G4X(dev_priv) || IS_VALLEYVIEW(dev_priv) || + IS_CHERRYVIEW(dev_priv)) + hpd->hpd = hpd_status_g4x; + else + hpd->hpd = hpd_status_i915; + return; + } + + if (INTEL_GEN(dev_priv) >= 12) + hpd->hpd = hpd_gen12; + else if (INTEL_GEN(dev_priv) >= 11) + hpd->hpd = hpd_gen11; + else if (IS_GEN9_LP(dev_priv)) + hpd->hpd = hpd_bxt; + else if (INTEL_GEN(dev_priv) >= 8) + hpd->hpd = hpd_bdw; + else if (INTEL_GEN(dev_priv) >= 7) + hpd->hpd = hpd_ivb; + else + hpd->hpd = hpd_ilk; + + if (!HAS_PCH_SPLIT(dev_priv) || HAS_PCH_NOP(dev_priv)) + return; + + if (HAS_PCH_TGP(dev_priv) || HAS_PCH_JSP(dev_priv)) + hpd->pch_hpd = hpd_tgp; + else if (HAS_PCH_ICP(dev_priv) || HAS_PCH_MCC(dev_priv)) + hpd->pch_hpd = hpd_icp; + else if (HAS_PCH_CNP(dev_priv) || HAS_PCH_SPT(dev_priv)) + hpd->pch_hpd = hpd_spt; + else if (HAS_PCH_LPT(dev_priv) || HAS_PCH_CPT(dev_priv)) + hpd->pch_hpd = hpd_cpt; + else if (HAS_PCH_IBX(dev_priv)) + hpd->pch_hpd = hpd_ibx; + else + MISSING_CASE(INTEL_PCH_TYPE(dev_priv)); +} + static void intel_handle_vblank(struct drm_i915_private *dev_priv, enum pipe pipe) { @@ -1504,33 +1546,27 @@ static void i9xx_hpd_irq_handler(struct drm_i915_private *dev_priv, u32 hotplug_status) { u32 pin_mask = 0, long_mask = 0; + u32 hotplug_trigger; - if (IS_G4X(dev_priv) || IS_VALLEYVIEW(dev_priv) || - IS_CHERRYVIEW(dev_priv)) { - u32 hotplug_trigger = hotplug_status & HOTPLUG_INT_STATUS_G4X; + if (IS_G4X(dev_priv) || + IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) + hotplug_trigger = hotplug_status & HOTPLUG_INT_STATUS_G4X; + else + hotplug_trigger = hotplug_status & HOTPLUG_INT_STATUS_I915; - if (hotplug_trigger) { - intel_get_hpd_pins(dev_priv, &pin_mask, &long_mask, - hotplug_trigger, hotplug_trigger, - hpd_status_g4x, - i9xx_port_hotplug_long_detect); + if (hotplug_trigger) { + intel_get_hpd_pins(dev_priv, &pin_mask, &long_mask, + hotplug_trigger, hotplug_trigger, + dev_priv->hotplug.hpd, + i9xx_port_hotplug_long_detect); - intel_hpd_irq_handler(dev_priv, pin_mask, long_mask); - } - - if (hotplug_status & DP_AUX_CHANNEL_MASK_INT_STATUS_G4X) - dp_aux_irq_handler(dev_priv); - } else { - u32 hotplug_trigger = hotplug_status & HOTPLUG_INT_STATUS_I915; - - if (hotplug_trigger) { - intel_get_hpd_pins(dev_priv, &pin_mask, &long_mask, - hotplug_trigger, hotplug_trigger, - hpd_status_i915, - i9xx_port_hotplug_long_detect); - intel_hpd_irq_handler(dev_priv, pin_mask, long_mask); - } + intel_hpd_irq_handler(dev_priv, pin_mask, long_mask); } + + if ((IS_G4X(dev_priv) || + IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) && + hotplug_status & DP_AUX_CHANNEL_MASK_INT_STATUS_G4X) + dp_aux_irq_handler(dev_priv); } static irqreturn_t valleyview_irq_handler(int irq, void *arg) @@ -1696,8 +1732,7 @@ static irqreturn_t cherryview_irq_handler(int irq, void *arg) } static void ibx_hpd_irq_handler(struct drm_i915_private *dev_priv, - u32 hotplug_trigger, - const u32 hpd[HPD_NUM_PINS]) + u32 hotplug_trigger) { u32 dig_hotplug_reg, pin_mask = 0, long_mask = 0; @@ -1720,8 +1755,9 @@ static void ibx_hpd_irq_handler(struct drm_i915_private *dev_priv, if (!hotplug_trigger) return; - intel_get_hpd_pins(dev_priv, &pin_mask, &long_mask, hotplug_trigger, - dig_hotplug_reg, hpd, + intel_get_hpd_pins(dev_priv, &pin_mask, &long_mask, + hotplug_trigger, dig_hotplug_reg, + dev_priv->hotplug.pch_hpd, pch_port_hotplug_long_detect); intel_hpd_irq_handler(dev_priv, pin_mask, long_mask); @@ -1732,7 +1768,7 @@ static void ibx_irq_handler(struct drm_i915_private *dev_priv, u32 pch_iir) enum pipe pipe; u32 hotplug_trigger = pch_iir & SDE_HOTPLUG_MASK; - ibx_hpd_irq_handler(dev_priv, hotplug_trigger, hpd_ibx); + ibx_hpd_irq_handler(dev_priv, hotplug_trigger); if (pch_iir & SDE_AUDIO_POWER_MASK) { int port = ffs((pch_iir & SDE_AUDIO_POWER_MASK) >> @@ -1820,7 +1856,7 @@ static void cpt_irq_handler(struct drm_i915_private *dev_priv, u32 pch_iir) enum pipe pipe; u32 hotplug_trigger = pch_iir & SDE_HOTPLUG_MASK_CPT; - ibx_hpd_irq_handler(dev_priv, hotplug_trigger, hpd_cpt); + ibx_hpd_irq_handler(dev_priv, hotplug_trigger); if (pch_iir & SDE_AUDIO_POWER_MASK_CPT) { int port = ffs((pch_iir & SDE_AUDIO_POWER_MASK_CPT) >> @@ -1857,22 +1893,18 @@ static void icp_irq_handler(struct drm_i915_private *dev_priv, u32 pch_iir) u32 ddi_hotplug_trigger, tc_hotplug_trigger; u32 pin_mask = 0, long_mask = 0; bool (*tc_port_hotplug_long_detect)(enum hpd_pin pin, u32 val); - const u32 *pins; if (HAS_PCH_TGP(dev_priv)) { ddi_hotplug_trigger = pch_iir & SDE_DDI_MASK_TGP; tc_hotplug_trigger = pch_iir & SDE_TC_MASK_TGP; tc_port_hotplug_long_detect = tgp_tc_port_hotplug_long_detect; - pins = hpd_tgp; } else if (HAS_PCH_JSP(dev_priv)) { ddi_hotplug_trigger = pch_iir & SDE_DDI_MASK_TGP; tc_hotplug_trigger = 0; - pins = hpd_tgp; } else if (HAS_PCH_MCC(dev_priv)) { ddi_hotplug_trigger = pch_iir & SDE_DDI_MASK_ICP; tc_hotplug_trigger = pch_iir & SDE_TC_HOTPLUG_ICP(PORT_TC1); tc_port_hotplug_long_detect = icp_tc_port_hotplug_long_detect; - pins = hpd_icp; } else { drm_WARN(&dev_priv->drm, !HAS_PCH_ICP(dev_priv), "Unrecognized PCH type 0x%x\n", @@ -1881,7 +1913,6 @@ static void icp_irq_handler(struct drm_i915_private *dev_priv, u32 pch_iir) ddi_hotplug_trigger = pch_iir & SDE_DDI_MASK_ICP; tc_hotplug_trigger = pch_iir & SDE_TC_MASK_ICP; tc_port_hotplug_long_detect = icp_tc_port_hotplug_long_detect; - pins = hpd_icp; } if (ddi_hotplug_trigger) { @@ -1891,8 +1922,8 @@ static void icp_irq_handler(struct drm_i915_private *dev_priv, u32 pch_iir) I915_WRITE(SHOTPLUG_CTL_DDI, dig_hotplug_reg); intel_get_hpd_pins(dev_priv, &pin_mask, &long_mask, - ddi_hotplug_trigger, - dig_hotplug_reg, pins, + ddi_hotplug_trigger, dig_hotplug_reg, + dev_priv->hotplug.pch_hpd, icp_ddi_port_hotplug_long_detect); } @@ -1903,8 +1934,8 @@ static void icp_irq_handler(struct drm_i915_private *dev_priv, u32 pch_iir) I915_WRITE(SHOTPLUG_CTL_TC, dig_hotplug_reg); intel_get_hpd_pins(dev_priv, &pin_mask, &long_mask, - tc_hotplug_trigger, - dig_hotplug_reg, pins, + tc_hotplug_trigger, dig_hotplug_reg, + dev_priv->hotplug.pch_hpd, tc_port_hotplug_long_detect); } @@ -1929,7 +1960,8 @@ static void spt_irq_handler(struct drm_i915_private *dev_priv, u32 pch_iir) I915_WRITE(PCH_PORT_HOTPLUG, dig_hotplug_reg); intel_get_hpd_pins(dev_priv, &pin_mask, &long_mask, - hotplug_trigger, dig_hotplug_reg, hpd_spt, + hotplug_trigger, dig_hotplug_reg, + dev_priv->hotplug.pch_hpd, spt_port_hotplug_long_detect); } @@ -1940,7 +1972,8 @@ static void spt_irq_handler(struct drm_i915_private *dev_priv, u32 pch_iir) I915_WRITE(PCH_PORT_HOTPLUG2, dig_hotplug_reg); intel_get_hpd_pins(dev_priv, &pin_mask, &long_mask, - hotplug2_trigger, dig_hotplug_reg, hpd_spt, + hotplug2_trigger, dig_hotplug_reg, + dev_priv->hotplug.pch_hpd, spt_port_hotplug2_long_detect); } @@ -1952,16 +1985,16 @@ static void spt_irq_handler(struct drm_i915_private *dev_priv, u32 pch_iir) } static void ilk_hpd_irq_handler(struct drm_i915_private *dev_priv, - u32 hotplug_trigger, - const u32 hpd[HPD_NUM_PINS]) + u32 hotplug_trigger) { u32 dig_hotplug_reg, pin_mask = 0, long_mask = 0; dig_hotplug_reg = I915_READ(DIGITAL_PORT_HOTPLUG_CNTRL); I915_WRITE(DIGITAL_PORT_HOTPLUG_CNTRL, dig_hotplug_reg); - intel_get_hpd_pins(dev_priv, &pin_mask, &long_mask, hotplug_trigger, - dig_hotplug_reg, hpd, + intel_get_hpd_pins(dev_priv, &pin_mask, &long_mask, + hotplug_trigger, dig_hotplug_reg, + dev_priv->hotplug.hpd, ilk_port_hotplug_long_detect); intel_hpd_irq_handler(dev_priv, pin_mask, long_mask); @@ -1974,7 +2007,7 @@ static void ilk_display_irq_handler(struct drm_i915_private *dev_priv, u32 hotplug_trigger = de_iir & DE_DP_A_HOTPLUG; if (hotplug_trigger) - ilk_hpd_irq_handler(dev_priv, hotplug_trigger, hpd_ilk); + ilk_hpd_irq_handler(dev_priv, hotplug_trigger); if (de_iir & DE_AUX_CHANNEL_A) dp_aux_irq_handler(dev_priv); @@ -2020,7 +2053,7 @@ static void ivb_display_irq_handler(struct drm_i915_private *dev_priv, u32 hotplug_trigger = de_iir & DE_DP_A_HOTPLUG_IVB; if (hotplug_trigger) - ilk_hpd_irq_handler(dev_priv, hotplug_trigger, hpd_ivb); + ilk_hpd_irq_handler(dev_priv, hotplug_trigger); if (de_iir & DE_ERR_INT_IVB) ivb_err_int_handler(dev_priv); @@ -2130,16 +2163,16 @@ static irqreturn_t ilk_irq_handler(int irq, void *arg) } static void bxt_hpd_irq_handler(struct drm_i915_private *dev_priv, - u32 hotplug_trigger, - const u32 hpd[HPD_NUM_PINS]) + u32 hotplug_trigger) { u32 dig_hotplug_reg, pin_mask = 0, long_mask = 0; dig_hotplug_reg = I915_READ(PCH_PORT_HOTPLUG); I915_WRITE(PCH_PORT_HOTPLUG, dig_hotplug_reg); - intel_get_hpd_pins(dev_priv, &pin_mask, &long_mask, hotplug_trigger, - dig_hotplug_reg, hpd, + intel_get_hpd_pins(dev_priv, &pin_mask, &long_mask, + hotplug_trigger, dig_hotplug_reg, + dev_priv->hotplug.hpd, bxt_port_hotplug_long_detect); intel_hpd_irq_handler(dev_priv, pin_mask, long_mask); @@ -2151,15 +2184,11 @@ static void gen11_hpd_irq_handler(struct drm_i915_private *dev_priv, u32 iir) u32 trigger_tc = iir & GEN11_DE_TC_HOTPLUG_MASK; u32 trigger_tbt = iir & GEN11_DE_TBT_HOTPLUG_MASK; long_pulse_detect_func long_pulse_detect; - const u32 *hpd; - if (INTEL_GEN(dev_priv) >= 12) { + if (INTEL_GEN(dev_priv) >= 12) long_pulse_detect = gen12_port_hotplug_long_detect; - hpd = hpd_gen12; - } else { + else long_pulse_detect = gen11_port_hotplug_long_detect; - hpd = hpd_gen11; - } if (trigger_tc) { u32 dig_hotplug_reg; @@ -2167,8 +2196,10 @@ static void gen11_hpd_irq_handler(struct drm_i915_private *dev_priv, u32 iir) dig_hotplug_reg = I915_READ(GEN11_TC_HOTPLUG_CTL); I915_WRITE(GEN11_TC_HOTPLUG_CTL, dig_hotplug_reg); - intel_get_hpd_pins(dev_priv, &pin_mask, &long_mask, trigger_tc, - dig_hotplug_reg, hpd, long_pulse_detect); + intel_get_hpd_pins(dev_priv, &pin_mask, &long_mask, + trigger_tc, dig_hotplug_reg, + dev_priv->hotplug.hpd, + long_pulse_detect); } if (trigger_tbt) { @@ -2177,8 +2208,10 @@ static void gen11_hpd_irq_handler(struct drm_i915_private *dev_priv, u32 iir) dig_hotplug_reg = I915_READ(GEN11_TBT_HOTPLUG_CTL); I915_WRITE(GEN11_TBT_HOTPLUG_CTL, dig_hotplug_reg); - intel_get_hpd_pins(dev_priv, &pin_mask, &long_mask, trigger_tbt, - dig_hotplug_reg, hpd, long_pulse_detect); + intel_get_hpd_pins(dev_priv, &pin_mask, &long_mask, + trigger_tbt, dig_hotplug_reg, + dev_priv->hotplug.hpd, + long_pulse_detect); } if (pin_mask) @@ -2309,15 +2342,13 @@ gen8_de_irq_handler(struct drm_i915_private *dev_priv, u32 master_ctl) if (IS_GEN9_LP(dev_priv)) { tmp_mask = iir & BXT_DE_PORT_HOTPLUG_MASK; if (tmp_mask) { - bxt_hpd_irq_handler(dev_priv, tmp_mask, - hpd_bxt); + bxt_hpd_irq_handler(dev_priv, tmp_mask); found = true; } } else if (IS_BROADWELL(dev_priv)) { tmp_mask = iir & GEN8_PORT_DP_A_HOTPLUG; if (tmp_mask) { - ilk_hpd_irq_handler(dev_priv, - tmp_mask, hpd_bdw); + ilk_hpd_irq_handler(dev_priv, tmp_mask); found = true; } } @@ -2997,13 +3028,12 @@ static void ibx_hpd_irq_setup(struct drm_i915_private *dev_priv) { u32 hotplug_irqs, enabled_irqs; - if (HAS_PCH_IBX(dev_priv)) { + if (HAS_PCH_IBX(dev_priv)) hotplug_irqs = SDE_HOTPLUG_MASK; - enabled_irqs = intel_hpd_enabled_irqs(dev_priv, hpd_ibx); - } else { + else hotplug_irqs = SDE_HOTPLUG_MASK_CPT; - enabled_irqs = intel_hpd_enabled_irqs(dev_priv, hpd_cpt); - } + + enabled_irqs = intel_hpd_enabled_irqs(dev_priv, dev_priv->hotplug.pch_hpd); ibx_display_interrupt_update(dev_priv, hotplug_irqs, enabled_irqs); @@ -3029,13 +3059,12 @@ static void icp_hpd_detection_setup(struct drm_i915_private *dev_priv, static void icp_hpd_irq_setup(struct drm_i915_private *dev_priv, u32 sde_ddi_mask, u32 sde_tc_mask, - u32 ddi_enable_mask, u32 tc_enable_mask, - const u32 *pins) + u32 ddi_enable_mask, u32 tc_enable_mask) { u32 hotplug_irqs, enabled_irqs; hotplug_irqs = sde_ddi_mask | sde_tc_mask; - enabled_irqs = intel_hpd_enabled_irqs(dev_priv, pins); + enabled_irqs = intel_hpd_enabled_irqs(dev_priv, dev_priv->hotplug.pch_hpd); I915_WRITE(SHPD_FILTER_CNT, SHPD_FILTER_CNT_500_ADJ); @@ -3052,8 +3081,7 @@ static void mcc_hpd_irq_setup(struct drm_i915_private *dev_priv) { icp_hpd_irq_setup(dev_priv, SDE_DDI_MASK_ICP, SDE_TC_HOTPLUG_ICP(PORT_TC1), - ICP_DDI_HPD_ENABLE_MASK, ICP_TC_HPD_ENABLE(PORT_TC1), - hpd_icp); + ICP_DDI_HPD_ENABLE_MASK, ICP_TC_HPD_ENABLE(PORT_TC1)); } /* @@ -3065,8 +3093,7 @@ static void jsp_hpd_irq_setup(struct drm_i915_private *dev_priv) { icp_hpd_irq_setup(dev_priv, SDE_DDI_MASK_TGP, 0, - TGP_DDI_HPD_ENABLE_MASK, 0, - hpd_tgp); + TGP_DDI_HPD_ENABLE_MASK, 0); } static void gen11_hpd_detection_setup(struct drm_i915_private *dev_priv) @@ -3091,11 +3118,9 @@ static void gen11_hpd_detection_setup(struct drm_i915_private *dev_priv) static void gen11_hpd_irq_setup(struct drm_i915_private *dev_priv) { u32 hotplug_irqs, enabled_irqs; - const u32 *hpd; u32 val; - hpd = INTEL_GEN(dev_priv) >= 12 ? hpd_gen12 : hpd_gen11; - enabled_irqs = intel_hpd_enabled_irqs(dev_priv, hpd); + enabled_irqs = intel_hpd_enabled_irqs(dev_priv, dev_priv->hotplug.hpd); hotplug_irqs = GEN11_DE_TC_HOTPLUG_MASK | GEN11_DE_TBT_HOTPLUG_MASK; val = I915_READ(GEN11_DE_HPD_IMR); @@ -3107,12 +3132,10 @@ static void gen11_hpd_irq_setup(struct drm_i915_private *dev_priv) if (INTEL_PCH_TYPE(dev_priv) >= PCH_TGP) icp_hpd_irq_setup(dev_priv, SDE_DDI_MASK_TGP, SDE_TC_MASK_TGP, - TGP_DDI_HPD_ENABLE_MASK, - TGP_TC_HPD_ENABLE_MASK, hpd_tgp); + TGP_DDI_HPD_ENABLE_MASK, TGP_TC_HPD_ENABLE_MASK); else if (INTEL_PCH_TYPE(dev_priv) >= PCH_ICP) icp_hpd_irq_setup(dev_priv, SDE_DDI_MASK_ICP, SDE_TC_MASK_ICP, - ICP_DDI_HPD_ENABLE_MASK, - ICP_TC_HPD_ENABLE_MASK, hpd_icp); + ICP_DDI_HPD_ENABLE_MASK, ICP_TC_HPD_ENABLE_MASK); } static void spt_hpd_detection_setup(struct drm_i915_private *dev_priv) @@ -3148,7 +3171,7 @@ static void spt_hpd_irq_setup(struct drm_i915_private *dev_priv) I915_WRITE(SHPD_FILTER_CNT, SHPD_FILTER_CNT_500_ADJ); hotplug_irqs = SDE_HOTPLUG_MASK_SPT; - enabled_irqs = intel_hpd_enabled_irqs(dev_priv, hpd_spt); + enabled_irqs = intel_hpd_enabled_irqs(dev_priv, dev_priv->hotplug.pch_hpd); ibx_display_interrupt_update(dev_priv, hotplug_irqs, enabled_irqs); @@ -3177,17 +3200,17 @@ static void ilk_hpd_irq_setup(struct drm_i915_private *dev_priv) if (INTEL_GEN(dev_priv) >= 8) { hotplug_irqs = GEN8_PORT_DP_A_HOTPLUG; - enabled_irqs = intel_hpd_enabled_irqs(dev_priv, hpd_bdw); + enabled_irqs = intel_hpd_enabled_irqs(dev_priv, dev_priv->hotplug.hpd); bdw_update_port_irq(dev_priv, hotplug_irqs, enabled_irqs); } else if (INTEL_GEN(dev_priv) >= 7) { hotplug_irqs = DE_DP_A_HOTPLUG_IVB; - enabled_irqs = intel_hpd_enabled_irqs(dev_priv, hpd_ivb); + enabled_irqs = intel_hpd_enabled_irqs(dev_priv, dev_priv->hotplug.hpd); ilk_update_display_irq(dev_priv, hotplug_irqs, enabled_irqs); } else { hotplug_irqs = DE_DP_A_HOTPLUG; - enabled_irqs = intel_hpd_enabled_irqs(dev_priv, hpd_ilk); + enabled_irqs = intel_hpd_enabled_irqs(dev_priv, dev_priv->hotplug.hpd); ilk_update_display_irq(dev_priv, hotplug_irqs, enabled_irqs); } @@ -3238,7 +3261,7 @@ static void bxt_hpd_irq_setup(struct drm_i915_private *dev_priv) { u32 hotplug_irqs, enabled_irqs; - enabled_irqs = intel_hpd_enabled_irqs(dev_priv, hpd_bxt); + enabled_irqs = intel_hpd_enabled_irqs(dev_priv, dev_priv->hotplug.hpd); hotplug_irqs = BXT_DE_PORT_HOTPLUG_MASK; bdw_update_port_irq(dev_priv, hotplug_irqs, enabled_irqs); @@ -3932,6 +3955,8 @@ void intel_irq_init(struct drm_i915_private *dev_priv) struct drm_device *dev = &dev_priv->drm; int i; + intel_hpd_init_pins(dev_priv); + intel_hpd_init_work(dev_priv); INIT_WORK(&dev_priv->l3_parity.error_work, ivb_parity_work); From c7e8a3d674fbaa5b12ddc681bdf46c34a27e55d5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Wed, 11 Mar 2020 17:54:22 +0200 Subject: [PATCH 55/97] drm/i915: Use stashed away hpd isr bits in intel_digital_port_connected() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Get rid of several platform specific variants of intel_digital_port_connected() and just use the ISR bits we've stashed away. v2: Duplicate stuff to avoid exposing platform specific functions across files (Jani) Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20200311155422.3043-4-ville.syrjala@linux.intel.com Reviewed-by: Imre Deak --- drivers/gpu/drm/i915/display/intel_ddi.c | 88 +++--------------------- drivers/gpu/drm/i915/display/intel_dp.c | 59 ++-------------- drivers/gpu/drm/i915/display/intel_tc.c | 4 +- 3 files changed, 17 insertions(+), 134 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_ddi.c b/drivers/gpu/drm/i915/display/intel_ddi.c index 75339bb25fd7..798889f72495 100644 --- a/drivers/gpu/drm/i915/display/intel_ddi.c +++ b/drivers/gpu/drm/i915/display/intel_ddi.c @@ -4692,41 +4692,7 @@ intel_ddi_hotplug(struct intel_encoder *encoder, static bool lpt_digital_port_connected(struct intel_encoder *encoder) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - u32 bit; - - switch (encoder->hpd_pin) { - case HPD_PORT_B: - bit = SDE_PORTB_HOTPLUG_CPT; - break; - case HPD_PORT_C: - bit = SDE_PORTC_HOTPLUG_CPT; - break; - case HPD_PORT_D: - bit = SDE_PORTD_HOTPLUG_CPT; - break; - default: - MISSING_CASE(encoder->hpd_pin); - return false; - } - - return intel_de_read(dev_priv, SDEISR) & bit; -} - -static bool spt_digital_port_connected(struct intel_encoder *encoder) -{ - struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - u32 bit; - - switch (encoder->hpd_pin) { - case HPD_PORT_A: - bit = SDE_PORTA_HOTPLUG_SPT; - break; - case HPD_PORT_E: - bit = SDE_PORTE_HOTPLUG_SPT; - break; - default: - return lpt_digital_port_connected(encoder); - } + u32 bit = dev_priv->hotplug.pch_hpd[encoder->hpd_pin]; return intel_de_read(dev_priv, SDEISR) & bit; } @@ -4734,51 +4700,19 @@ static bool spt_digital_port_connected(struct intel_encoder *encoder) static bool hsw_digital_port_connected(struct intel_encoder *encoder) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); + u32 bit = dev_priv->hotplug.hpd[encoder->hpd_pin]; - return intel_de_read(dev_priv, DEISR) & DE_DP_A_HOTPLUG_IVB; + return intel_de_read(dev_priv, DEISR) & bit; } static bool bdw_digital_port_connected(struct intel_encoder *encoder) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - - return intel_de_read(dev_priv, GEN8_DE_PORT_ISR) & GEN8_PORT_DP_A_HOTPLUG; -} - -static bool bxt_digital_port_connected(struct intel_encoder *encoder) -{ - struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - u32 bit; - - switch (encoder->hpd_pin) { - case HPD_PORT_A: - bit = BXT_DE_PORT_HP_DDIA; - break; - case HPD_PORT_B: - bit = BXT_DE_PORT_HP_DDIB; - break; - case HPD_PORT_C: - bit = BXT_DE_PORT_HP_DDIC; - break; - default: - MISSING_CASE(encoder->hpd_pin); - return false; - } + u32 bit = dev_priv->hotplug.hpd[encoder->hpd_pin]; return intel_de_read(dev_priv, GEN8_DE_PORT_ISR) & bit; } -static bool icp_digital_port_connected(struct intel_encoder *encoder) -{ - struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - enum phy phy = intel_port_to_phy(dev_priv, encoder->port); - - if (HAS_PCH_MCC(dev_priv) && phy == PHY_C) - return intel_de_read(dev_priv, SDEISR) & SDE_TC_HOTPLUG_ICP(PORT_TC1); - - return intel_de_read(dev_priv, SDEISR) & SDE_DDI_HOTPLUG_ICP(phy); -} - static struct intel_connector * intel_ddi_init_hdmi_connector(struct intel_digital_port *intel_dig_port) { @@ -4979,17 +4913,15 @@ void intel_ddi_init(struct drm_i915_private *dev_priv, enum port port) if (intel_phy_is_tc(dev_priv, phy)) intel_dig_port->connected = intel_tc_port_connected; else - intel_dig_port->connected = icp_digital_port_connected; - } else if (IS_GEN9_LP(dev_priv)) { - intel_dig_port->connected = bxt_digital_port_connected; - } else if (port == PORT_A) { - if (INTEL_GEN(dev_priv) >= 8) + intel_dig_port->connected = lpt_digital_port_connected; + } else if (INTEL_GEN(dev_priv) >= 8) { + if (port == PORT_A || IS_GEN9_LP(dev_priv)) intel_dig_port->connected = bdw_digital_port_connected; else - intel_dig_port->connected = hsw_digital_port_connected; + intel_dig_port->connected = lpt_digital_port_connected; } else { - if (INTEL_PCH_TYPE(dev_priv) >= PCH_SPT) - intel_dig_port->connected = spt_digital_port_connected; + if (port == PORT_A) + intel_dig_port->connected = hsw_digital_port_connected; else intel_dig_port->connected = lpt_digital_port_connected; } diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c index 99586b20b610..67723dede1d1 100644 --- a/drivers/gpu/drm/i915/display/intel_dp.c +++ b/drivers/gpu/drm/i915/display/intel_dp.c @@ -5998,45 +5998,7 @@ edp_detect(struct intel_dp *intel_dp) static bool ibx_digital_port_connected(struct intel_encoder *encoder) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - u32 bit; - - switch (encoder->hpd_pin) { - case HPD_PORT_B: - bit = SDE_PORTB_HOTPLUG; - break; - case HPD_PORT_C: - bit = SDE_PORTC_HOTPLUG; - break; - case HPD_PORT_D: - bit = SDE_PORTD_HOTPLUG; - break; - default: - MISSING_CASE(encoder->hpd_pin); - return false; - } - - return intel_de_read(dev_priv, SDEISR) & bit; -} - -static bool cpt_digital_port_connected(struct intel_encoder *encoder) -{ - struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - u32 bit; - - switch (encoder->hpd_pin) { - case HPD_PORT_B: - bit = SDE_PORTB_HOTPLUG_CPT; - break; - case HPD_PORT_C: - bit = SDE_PORTC_HOTPLUG_CPT; - break; - case HPD_PORT_D: - bit = SDE_PORTD_HOTPLUG_CPT; - break; - default: - MISSING_CASE(encoder->hpd_pin); - return false; - } + u32 bit = dev_priv->hotplug.pch_hpd[encoder->hpd_pin]; return intel_de_read(dev_priv, SDEISR) & bit; } @@ -6090,15 +6052,9 @@ static bool gm45_digital_port_connected(struct intel_encoder *encoder) static bool ilk_digital_port_connected(struct intel_encoder *encoder) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); + u32 bit = dev_priv->hotplug.hpd[encoder->hpd_pin]; - return intel_de_read(dev_priv, DEISR) & DE_DP_A_HOTPLUG; -} - -static bool ivb_digital_port_connected(struct intel_encoder *encoder) -{ - struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - - return intel_de_read(dev_priv, DEISR) & DE_DP_A_HOTPLUG_IVB; + return intel_de_read(dev_priv, DEISR) & bit; } /* @@ -8405,14 +8361,9 @@ bool intel_dp_init(struct drm_i915_private *dev_priv, intel_dig_port->connected = gm45_digital_port_connected; else intel_dig_port->connected = g4x_digital_port_connected; - } else if (port == PORT_A) { - if (IS_IVYBRIDGE(dev_priv)) - intel_dig_port->connected = ivb_digital_port_connected; - else - intel_dig_port->connected = ilk_digital_port_connected; } else { - if (HAS_PCH_CPT(dev_priv)) - intel_dig_port->connected = cpt_digital_port_connected; + if (port == PORT_A) + intel_dig_port->connected = ilk_digital_port_connected; else intel_dig_port->connected = ibx_digital_port_connected; } diff --git a/drivers/gpu/drm/i915/display/intel_tc.c b/drivers/gpu/drm/i915/display/intel_tc.c index 8aadca51fce6..b161c15baf86 100644 --- a/drivers/gpu/drm/i915/display/intel_tc.c +++ b/drivers/gpu/drm/i915/display/intel_tc.c @@ -238,8 +238,8 @@ static void tc_port_fixup_legacy_flag(struct intel_digital_port *dig_port, static u32 tc_port_live_status_mask(struct intel_digital_port *dig_port) { struct drm_i915_private *i915 = to_i915(dig_port->base.base.dev); - enum tc_port tc_port = intel_port_to_tc(i915, dig_port->base.port); struct intel_uncore *uncore = &i915->uncore; + u32 isr_bit = i915->hotplug.pch_hpd[dig_port->base.hpd_pin]; u32 mask = 0; u32 val; @@ -258,7 +258,7 @@ static u32 tc_port_live_status_mask(struct intel_digital_port *dig_port) if (val & TC_LIVE_STATE_TC(dig_port->tc_phy_fia_idx)) mask |= BIT(TC_PORT_DP_ALT); - if (intel_uncore_read(uncore, SDEISR) & SDE_TC_HOTPLUG_ICP(tc_port)) + if (intel_uncore_read(uncore, SDEISR) & isr_bit) mask |= BIT(TC_PORT_LEGACY); /* The sink can be connected only in a single mode. */ From b0a997ae5248b293b6f6d1996ea49c57f7b94227 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Sun, 10 May 2020 11:24:29 +0100 Subject: [PATCH 56/97] drm/i915: Emit await(batch) before MI_BB_START Be consistent and ensure that we always emit the asynchronous waits prior to issuing instructions that use the address. This ensures that if we do emit GPU commands to do the await, they are before our use! Reviewed-by: Mika Kuoppala Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20200510102431.21959-1-chris@chris-wilson.co.uk --- .../drm/i915/gem/selftests/i915_gem_context.c | 49 ++++++++++++------- .../drm/i915/gem/selftests/igt_gem_utils.c | 25 ++++------ drivers/gpu/drm/i915/gt/intel_renderstate.c | 16 +++--- drivers/gpu/drm/i915/selftests/i915_request.c | 28 +++++------ 4 files changed, 64 insertions(+), 54 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c index 87d264fe54b2..b81978890641 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c @@ -972,12 +972,6 @@ emit_rpcs_query(struct drm_i915_gem_object *obj, goto err_batch; } - err = rq->engine->emit_bb_start(rq, - batch->node.start, batch->node.size, - 0); - if (err) - goto err_request; - i915_vma_lock(batch); err = i915_request_await_object(rq, batch->obj, false); if (err == 0) @@ -994,6 +988,18 @@ emit_rpcs_query(struct drm_i915_gem_object *obj, if (err) goto skip_request; + if (rq->engine->emit_init_breadcrumb) { + err = rq->engine->emit_init_breadcrumb(rq); + if (err) + goto skip_request; + } + + err = rq->engine->emit_bb_start(rq, + batch->node.start, batch->node.size, + 0); + if (err) + goto skip_request; + i915_vma_unpin_and_release(&batch, 0); i915_vma_unpin(vma); @@ -1005,7 +1011,6 @@ emit_rpcs_query(struct drm_i915_gem_object *obj, skip_request: i915_request_set_error_once(rq, err); -err_request: i915_request_add(rq); err_batch: i915_vma_unpin_and_release(&batch, 0); @@ -1541,10 +1546,6 @@ static int write_to_scratch(struct i915_gem_context *ctx, goto err_unpin; } - err = engine->emit_bb_start(rq, vma->node.start, vma->node.size, 0); - if (err) - goto err_request; - i915_vma_lock(vma); err = i915_request_await_object(rq, vma->obj, false); if (err == 0) @@ -1553,6 +1554,16 @@ static int write_to_scratch(struct i915_gem_context *ctx, if (err) goto skip_request; + if (rq->engine->emit_init_breadcrumb) { + err = rq->engine->emit_init_breadcrumb(rq); + if (err) + goto skip_request; + } + + err = engine->emit_bb_start(rq, vma->node.start, vma->node.size, 0); + if (err) + goto skip_request; + i915_vma_unpin(vma); i915_request_add(rq); @@ -1560,7 +1571,6 @@ static int write_to_scratch(struct i915_gem_context *ctx, goto out_vm; skip_request: i915_request_set_error_once(rq, err); -err_request: i915_request_add(rq); err_unpin: i915_vma_unpin(vma); @@ -1674,10 +1684,6 @@ static int read_from_scratch(struct i915_gem_context *ctx, goto err_unpin; } - err = engine->emit_bb_start(rq, vma->node.start, vma->node.size, flags); - if (err) - goto err_request; - i915_vma_lock(vma); err = i915_request_await_object(rq, vma->obj, true); if (err == 0) @@ -1686,6 +1692,16 @@ static int read_from_scratch(struct i915_gem_context *ctx, if (err) goto skip_request; + if (rq->engine->emit_init_breadcrumb) { + err = rq->engine->emit_init_breadcrumb(rq); + if (err) + goto skip_request; + } + + err = engine->emit_bb_start(rq, vma->node.start, vma->node.size, flags); + if (err) + goto skip_request; + i915_vma_unpin(vma); i915_request_add(rq); @@ -1708,7 +1724,6 @@ static int read_from_scratch(struct i915_gem_context *ctx, goto out_vm; skip_request: i915_request_set_error_once(rq, err); -err_request: i915_request_add(rq); err_unpin: i915_vma_unpin(vma); diff --git a/drivers/gpu/drm/i915/gem/selftests/igt_gem_utils.c b/drivers/gpu/drm/i915/gem/selftests/igt_gem_utils.c index 772d8cba7da9..8b3925b4036f 100644 --- a/drivers/gpu/drm/i915/gem/selftests/igt_gem_utils.c +++ b/drivers/gpu/drm/i915/gem/selftests/igt_gem_utils.c @@ -83,6 +83,7 @@ igt_emit_store_dw(struct i915_vma *vma, offset += PAGE_SIZE; } *cmd = MI_BATCH_BUFFER_END; + i915_gem_object_unpin_map(obj); intel_gt_chipset_flush(vma->vm->gt); @@ -126,16 +127,6 @@ int igt_gpu_fill_dw(struct intel_context *ce, goto err_batch; } - flags = 0; - if (INTEL_GEN(ce->vm->i915) <= 5) - flags |= I915_DISPATCH_SECURE; - - err = rq->engine->emit_bb_start(rq, - batch->node.start, batch->node.size, - flags); - if (err) - goto err_request; - i915_vma_lock(batch); err = i915_request_await_object(rq, batch->obj, false); if (err == 0) @@ -152,15 +143,17 @@ int igt_gpu_fill_dw(struct intel_context *ce, if (err) goto skip_request; - i915_request_add(rq); + flags = 0; + if (INTEL_GEN(ce->vm->i915) <= 5) + flags |= I915_DISPATCH_SECURE; - i915_vma_unpin_and_release(&batch, 0); - - return 0; + err = rq->engine->emit_bb_start(rq, + batch->node.start, batch->node.size, + flags); skip_request: - i915_request_set_error_once(rq, err); -err_request: + if (err) + i915_request_set_error_once(rq, err); i915_request_add(rq); err_batch: i915_vma_unpin_and_release(&batch, 0); diff --git a/drivers/gpu/drm/i915/gt/intel_renderstate.c b/drivers/gpu/drm/i915/gt/intel_renderstate.c index 708cb7808865..f59e7875cc5e 100644 --- a/drivers/gpu/drm/i915/gt/intel_renderstate.c +++ b/drivers/gpu/drm/i915/gt/intel_renderstate.c @@ -219,6 +219,14 @@ int intel_renderstate_emit(struct intel_renderstate *so, if (!so->vma) return 0; + i915_vma_lock(so->vma); + err = i915_request_await_object(rq, so->vma->obj, false); + if (err == 0) + err = i915_vma_move_to_active(so->vma, rq, 0); + i915_vma_unlock(so->vma); + if (err) + return err; + err = engine->emit_bb_start(rq, so->batch_offset, so->batch_size, I915_DISPATCH_SECURE); @@ -233,13 +241,7 @@ int intel_renderstate_emit(struct intel_renderstate *so, return err; } - i915_vma_lock(so->vma); - err = i915_request_await_object(rq, so->vma->obj, false); - if (err == 0) - err = i915_vma_move_to_active(so->vma, rq, 0); - i915_vma_unlock(so->vma); - - return err; + return 0; } void intel_renderstate_fini(struct intel_renderstate *so) diff --git a/drivers/gpu/drm/i915/selftests/i915_request.c b/drivers/gpu/drm/i915/selftests/i915_request.c index 15b1ca9f7a01..ffdfcb3805b5 100644 --- a/drivers/gpu/drm/i915/selftests/i915_request.c +++ b/drivers/gpu/drm/i915/selftests/i915_request.c @@ -865,13 +865,6 @@ static int live_all_engines(void *arg) goto out_request; } - err = engine->emit_bb_start(request[idx], - batch->node.start, - batch->node.size, - 0); - GEM_BUG_ON(err); - request[idx]->batch = batch; - i915_vma_lock(batch); err = i915_request_await_object(request[idx], batch->obj, 0); if (err == 0) @@ -879,6 +872,13 @@ static int live_all_engines(void *arg) i915_vma_unlock(batch); GEM_BUG_ON(err); + err = engine->emit_bb_start(request[idx], + batch->node.start, + batch->node.size, + 0); + GEM_BUG_ON(err); + request[idx]->batch = batch; + i915_request_get(request[idx]); i915_request_add(request[idx]); idx++; @@ -993,13 +993,6 @@ static int live_sequential_engines(void *arg) } } - err = engine->emit_bb_start(request[idx], - batch->node.start, - batch->node.size, - 0); - GEM_BUG_ON(err); - request[idx]->batch = batch; - i915_vma_lock(batch); err = i915_request_await_object(request[idx], batch->obj, false); @@ -1008,6 +1001,13 @@ static int live_sequential_engines(void *arg) i915_vma_unlock(batch); GEM_BUG_ON(err); + err = engine->emit_bb_start(request[idx], + batch->node.start, + batch->node.size, + 0); + GEM_BUG_ON(err); + request[idx]->batch = batch; + i915_request_get(request[idx]); i915_request_add(request[idx]); From 9bad40a27dac1f88012a1e2db0bfc5ae58fa0370 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 11 May 2020 15:13:03 +0100 Subject: [PATCH 57/97] drm/i915/selftests: Always flush before unpining after writing Be consistent, and even when we know we had used a WC, flush the mapped object after writing into it. The flush understands the mapping type and will only clflush if !I915_MAP_WC, but will always insert a wmb [sfence] so that we can be sure that all writes are visible. v2: Add the unconditional wmb so we are know that we always flush the writes to memory/HW at that point. Signed-off-by: Chris Wilson Cc: Mika Kuoppala Reviewed-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20200511141304.599-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gem/i915_gem_object_blt.c | 8 ++++++-- drivers/gpu/drm/i915/gem/i915_gem_pages.c | 1 + drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c | 2 ++ drivers/gpu/drm/i915/gem/selftests/igt_gem_utils.c | 1 + drivers/gpu/drm/i915/gt/selftest_ring_submission.c | 2 ++ drivers/gpu/drm/i915/gt/selftest_rps.c | 2 ++ drivers/gpu/drm/i915/selftests/i915_request.c | 9 +++++++-- 7 files changed, 21 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_blt.c b/drivers/gpu/drm/i915/gem/i915_gem_object_blt.c index 2fc7737ef5f4..f457d7130491 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object_blt.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_object_blt.c @@ -78,10 +78,12 @@ struct i915_vma *intel_emit_vma_fill_blt(struct intel_context *ce, } while (rem); *cmd = MI_BATCH_BUFFER_END; - intel_gt_chipset_flush(ce->vm->gt); + i915_gem_object_flush_map(pool->obj); i915_gem_object_unpin_map(pool->obj); + intel_gt_chipset_flush(ce->vm->gt); + batch = i915_vma_instance(pool->obj, ce->vm, NULL); if (IS_ERR(batch)) { err = PTR_ERR(batch); @@ -289,10 +291,12 @@ struct i915_vma *intel_emit_vma_copy_blt(struct intel_context *ce, } while (rem); *cmd = MI_BATCH_BUFFER_END; - intel_gt_chipset_flush(ce->vm->gt); + i915_gem_object_flush_map(pool->obj); i915_gem_object_unpin_map(pool->obj); + intel_gt_chipset_flush(ce->vm->gt); + batch = i915_vma_instance(pool->obj, ce->vm, NULL); if (IS_ERR(batch)) { err = PTR_ERR(batch); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pages.c b/drivers/gpu/drm/i915/gem/i915_gem_pages.c index 5d855fcd5c0f..af9e48ee4a33 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_pages.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_pages.c @@ -391,6 +391,7 @@ void __i915_gem_object_flush_map(struct drm_i915_gem_object *obj, GEM_BUG_ON(range_overflows_t(typeof(obj->base.size), offset, size, obj->base.size)); + wmb(); /* let all previous writes be visible to coherent partners */ obj->mm.dirty = true; if (obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE) diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c index 3f6079e1dfb6..87d7d8aa080f 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c @@ -158,6 +158,8 @@ static int wc_set(struct context *ctx, unsigned long offset, u32 v) return PTR_ERR(map); map[offset / sizeof(*map)] = v; + + __i915_gem_object_flush_map(ctx->obj, offset, sizeof(*map)); i915_gem_object_unpin_map(ctx->obj); return 0; diff --git a/drivers/gpu/drm/i915/gem/selftests/igt_gem_utils.c b/drivers/gpu/drm/i915/gem/selftests/igt_gem_utils.c index 8b3925b4036f..e21b5023ca7d 100644 --- a/drivers/gpu/drm/i915/gem/selftests/igt_gem_utils.c +++ b/drivers/gpu/drm/i915/gem/selftests/igt_gem_utils.c @@ -84,6 +84,7 @@ igt_emit_store_dw(struct i915_vma *vma, } *cmd = MI_BATCH_BUFFER_END; + i915_gem_object_flush_map(obj); i915_gem_object_unpin_map(obj); intel_gt_chipset_flush(vma->vm->gt); diff --git a/drivers/gpu/drm/i915/gt/selftest_ring_submission.c b/drivers/gpu/drm/i915/gt/selftest_ring_submission.c index 9995faadd7e8..3350e7c995bc 100644 --- a/drivers/gpu/drm/i915/gt/selftest_ring_submission.c +++ b/drivers/gpu/drm/i915/gt/selftest_ring_submission.c @@ -54,6 +54,8 @@ static struct i915_vma *create_wally(struct intel_engine_cs *engine) *cs++ = STACK_MAGIC; *cs++ = MI_BATCH_BUFFER_END; + + i915_gem_object_flush_map(obj); i915_gem_object_unpin_map(obj); vma->private = intel_context_create(engine); /* dummy residuals */ diff --git a/drivers/gpu/drm/i915/gt/selftest_rps.c b/drivers/gpu/drm/i915/gt/selftest_rps.c index bfa1a15564f7..6275d69aa9cc 100644 --- a/drivers/gpu/drm/i915/gt/selftest_rps.c +++ b/drivers/gpu/drm/i915/gt/selftest_rps.c @@ -727,6 +727,7 @@ int live_rps_frequency_cs(void *arg) err_vma: *cancel = MI_BATCH_BUFFER_END; + i915_gem_object_flush_map(vma->obj); i915_gem_object_unpin_map(vma->obj); i915_vma_unpin(vma); i915_vma_put(vma); @@ -868,6 +869,7 @@ int live_rps_frequency_srm(void *arg) err_vma: *cancel = MI_BATCH_BUFFER_END; + i915_gem_object_flush_map(vma->obj); i915_gem_object_unpin_map(vma->obj); i915_vma_unpin(vma); i915_vma_put(vma); diff --git a/drivers/gpu/drm/i915/selftests/i915_request.c b/drivers/gpu/drm/i915/selftests/i915_request.c index ffdfcb3805b5..6014e8dfcbb1 100644 --- a/drivers/gpu/drm/i915/selftests/i915_request.c +++ b/drivers/gpu/drm/i915/selftests/i915_request.c @@ -816,10 +816,12 @@ static int recursive_batch_resolve(struct i915_vma *batch) return PTR_ERR(cmd); *cmd = MI_BATCH_BUFFER_END; - intel_gt_chipset_flush(batch->vm->gt); + __i915_gem_object_flush_map(batch->obj, 0, sizeof(*cmd)); i915_gem_object_unpin_map(batch->obj); + intel_gt_chipset_flush(batch->vm->gt); + return 0; } @@ -1060,9 +1062,12 @@ static int live_sequential_engines(void *arg) I915_MAP_WC); if (!IS_ERR(cmd)) { *cmd = MI_BATCH_BUFFER_END; - intel_gt_chipset_flush(engine->gt); + __i915_gem_object_flush_map(request[idx]->batch->obj, + 0, sizeof(*cmd)); i915_gem_object_unpin_map(request[idx]->batch->obj); + + intel_gt_chipset_flush(engine->gt); } i915_vma_put(request[idx]->batch); From a1b2eeacbc55573afc56341e08b506aee6451c3d Mon Sep 17 00:00:00 2001 From: Pascal Terjan Date: Sun, 10 May 2020 22:25:21 +0100 Subject: [PATCH 58/97] drm/i915: Remove unused HAS_FWTABLE macro It has been unused since commit ccb2aceaaa5f ("drm/i915: use vfuncs for reg_read/write_fw_domains"). Signed-off-by: Pascal Terjan Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20200510212521.128869-1-pterjan@google.com --- drivers/gpu/drm/i915/intel_uncore.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c index 078f5b2eb8a4..a61cb8ca4d50 100644 --- a/drivers/gpu/drm/i915/intel_uncore.c +++ b/drivers/gpu/drm/i915/intel_uncore.c @@ -899,11 +899,6 @@ find_fw_domain(struct intel_uncore *uncore, u32 offset) #define GEN_FW_RANGE(s, e, d) \ { .start = (s), .end = (e), .domains = (d) } -#define HAS_FWTABLE(dev_priv) \ - (INTEL_GEN(dev_priv) >= 9 || \ - IS_CHERRYVIEW(dev_priv) || \ - IS_VALLEYVIEW(dev_priv)) - /* *Must* be sorted by offset ranges! See intel_fw_table_check(). */ static const struct intel_forcewake_range __vlv_fw_ranges[] = { GEN_FW_RANGE(0x2000, 0x3fff, FORCEWAKE_RENDER), From 84eac0c65940d9633247b0c8c826d4bcb7307351 Mon Sep 17 00:00:00 2001 From: Mika Kuoppala Date: Mon, 11 May 2020 19:08:03 +0300 Subject: [PATCH 59/97] drm/i915/gt: Force pte cacheline to main memory We have problems of tgl not seeing a valid pte entry when iommu is enabled. Add heavy handed flushing of entry modification by flushing the cpu, cacheline and then wcb. This forces the pte out to main memory past this point regarless of promises of coherency. This is an evolution of an experimental patch from Chris Wilson of adding wmb for coherent partners, by adding a clflush to force the cache->memory step. Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/1840 Testcase: igt/gem_exec_fence/parallel Signed-off-by: Mika Kuoppala Cc: Chris Wilson Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20200511160803.15407-1-mika.kuoppala@linux.intel.com --- drivers/gpu/drm/i915/gt/gen8_ppgtt.c | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c index 94e746af8926..2dc88e76ebec 100644 --- a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c +++ b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c @@ -389,6 +389,16 @@ static int gen8_ppgtt_alloc(struct i915_address_space *vm, return err; } +static __always_inline inline void +write_pte(gen8_pte_t *pte, const gen8_pte_t val) +{ + /* Magic delays? Or can we refine these to flush all in one pass? */ + *pte = val; + wmb(); /* cpu to cache */ + clflush(pte); /* cache to memory */ + wmb(); /* visible to all */ +} + static __always_inline u64 gen8_ppgtt_insert_pte(struct i915_ppgtt *ppgtt, struct i915_page_directory *pdp, @@ -405,7 +415,8 @@ gen8_ppgtt_insert_pte(struct i915_ppgtt *ppgtt, vaddr = kmap_atomic_px(i915_pt_entry(pd, gen8_pd_index(idx, 1))); do { GEM_BUG_ON(iter->sg->length < I915_GTT_PAGE_SIZE); - vaddr[gen8_pd_index(idx, 0)] = pte_encode | iter->dma; + write_pte(&vaddr[gen8_pd_index(idx, 0)], + pte_encode | iter->dma); iter->dma += I915_GTT_PAGE_SIZE; if (iter->dma >= iter->max) { @@ -487,7 +498,7 @@ static void gen8_ppgtt_insert_huge(struct i915_vma *vma, do { GEM_BUG_ON(iter->sg->length < page_size); - vaddr[index++] = encode | iter->dma; + write_pte(&vaddr[index++], encode | iter->dma); start += page_size; iter->dma += page_size; From 1c8ee8b92fb6ac9d5975147cc902e8c142eca338 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Sun, 10 May 2020 11:24:31 +0100 Subject: [PATCH 60/97] drm/i915/gt: Restore Cherryview back to full-ppgtt This reverts commit 0b718ba1e884f64dce27c19311dd2859b87e56b9. There are still some residual issues with asynchronous binding and execution, but since commit 92581f9fb99c ("drm/i915: Immediately execute the fenced work") we prefer not to use asynchronous binds, and the remaining issues do not seem restricted to Cherryview [at least the ones seen over a few dozen CI runs, less frequent issues are sure to be discovered!] These issues seem to be mitigated, if not eliminated entirely, by the previous commit 84eac0c65940 ("drm/i915/gt: Force pte cacheline to main memory"). Signed-off-by: Chris Wilson Acked-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20200510102431.21959-3-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gt/intel_lrc.c | 54 +++++++++++++++++++++++++++++ drivers/gpu/drm/i915/i915_pci.c | 2 +- 2 files changed, 55 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index ed45fc40f884..15716e4d6b76 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -3522,6 +3522,54 @@ static int gen8_emit_init_breadcrumb(struct i915_request *rq) return 0; } +static int emit_pdps(struct i915_request *rq) +{ + const struct intel_engine_cs * const engine = rq->engine; + struct i915_ppgtt * const ppgtt = i915_vm_to_ppgtt(rq->context->vm); + int err, i; + u32 *cs; + + GEM_BUG_ON(intel_vgpu_active(rq->i915)); + + /* + * Beware ye of the dragons, this sequence is magic! + * + * Small changes to this sequence can cause anything from + * GPU hangs to forcewake errors and machine lockups! + */ + + /* Flush any residual operations from the context load */ + err = engine->emit_flush(rq, EMIT_FLUSH); + if (err) + return err; + + /* Magic required to prevent forcewake errors! */ + err = engine->emit_flush(rq, EMIT_INVALIDATE); + if (err) + return err; + + cs = intel_ring_begin(rq, 4 * GEN8_3LVL_PDPES + 2); + if (IS_ERR(cs)) + return PTR_ERR(cs); + + /* Ensure the LRI have landed before we invalidate & continue */ + *cs++ = MI_LOAD_REGISTER_IMM(2 * GEN8_3LVL_PDPES) | MI_LRI_FORCE_POSTED; + for (i = GEN8_3LVL_PDPES; i--; ) { + const dma_addr_t pd_daddr = i915_page_dir_dma_addr(ppgtt, i); + u32 base = engine->mmio_base; + + *cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_UDW(base, i)); + *cs++ = upper_32_bits(pd_daddr); + *cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_LDW(base, i)); + *cs++ = lower_32_bits(pd_daddr); + } + *cs++ = MI_NOOP; + + intel_ring_advance(rq, cs); + + return 0; +} + static int execlists_request_alloc(struct i915_request *request) { int ret; @@ -3543,6 +3591,12 @@ static int execlists_request_alloc(struct i915_request *request) * to cancel/unwind this request now. */ + if (!i915_vm_is_4lvl(request->context->vm)) { + ret = emit_pdps(request); + if (ret) + return ret; + } + /* Unconditionally invalidate GPU caches and TLBs. */ ret = request->engine->emit_flush(request, EMIT_INVALIDATE); if (ret) diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c index 2fc25ec12c3d..193048ce3c3a 100644 --- a/drivers/gpu/drm/i915/i915_pci.c +++ b/drivers/gpu/drm/i915/i915_pci.c @@ -615,7 +615,7 @@ static const struct intel_device_info chv_info = { .has_logical_ring_contexts = 1, .display.has_gmch = 1, .dma_mask_size = 39, - .ppgtt_type = INTEL_PPGTT_ALIASING, + .ppgtt_type = INTEL_PPGTT_FULL, .ppgtt_size = 32, .has_reset_engine = 1, .has_snoop = true, From 73e28cc40bf00b5d168cb8f5cff1ae63e9097446 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Sat, 9 May 2020 12:52:17 +0100 Subject: [PATCH 61/97] drm/i915: Handle idling during i915_gem_evict_something busy loops i915_gem_evict_something() is charged with finding a slot within the GTT that we may reuse. Since our goal is not to stall, we first look for a slot that only overlaps idle vma. To this end, on the first pass we move any active vma to the end of the search list. However, we only stopped moving active vma after we see the first active vma twice. If during the search, that first active vma completed, we would not notice and keep on extending the search list. Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/1746 Fixes: 2850748ef876 ("drm/i915: Pull i915_vma_pin under the vm->mutex") Fixes: b1e3177bd1d8 ("drm/i915: Coordinate i915_active with its own mutex") Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Cc: # v5.5+ Reviewed-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20200509115217.26853-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem_evict.c | 26 ++++++++++++-------------- 1 file changed, 12 insertions(+), 14 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c index 0ba7b1e881c0..6501939929d5 100644 --- a/drivers/gpu/drm/i915/i915_gem_evict.c +++ b/drivers/gpu/drm/i915/i915_gem_evict.c @@ -128,6 +128,13 @@ i915_gem_evict_something(struct i915_address_space *vm, active = NULL; INIT_LIST_HEAD(&eviction_list); list_for_each_entry_safe(vma, next, &vm->bound_list, vm_link) { + if (vma == active) { /* now seen this vma twice */ + if (flags & PIN_NONBLOCK) + break; + + active = ERR_PTR(-EAGAIN); + } + /* * We keep this list in a rough least-recently scanned order * of active elements (inactive elements are cheap to reap). @@ -143,21 +150,12 @@ i915_gem_evict_something(struct i915_address_space *vm, * To notice when we complete one full cycle, we record the * first active element seen, before moving it to the tail. */ - if (i915_vma_is_active(vma)) { - if (vma == active) { - if (flags & PIN_NONBLOCK) - break; + if (active != ERR_PTR(-EAGAIN) && i915_vma_is_active(vma)) { + if (!active) + active = vma; - active = ERR_PTR(-EAGAIN); - } - - if (active != ERR_PTR(-EAGAIN)) { - if (!active) - active = vma; - - list_move_tail(&vma->vm_link, &vm->bound_list); - continue; - } + list_move_tail(&vma->vm_link, &vm->bound_list); + continue; } if (mark_free(&scan, vma, flags, &eviction_list)) From 61b088c5374a9f886efa1edbb49ce552bd1f9cba Mon Sep 17 00:00:00 2001 From: Swathi Dhanavanthri Date: Tue, 12 May 2020 11:00:50 -0700 Subject: [PATCH 62/97] drm/i915/ehl: Restrict w/a 1607087056 for EHL/JSL MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This w/a is fixed in B0 stepping and needs to be restricted for A0 stepping only. Bspec: 33451 Signed-off-by: Swathi Dhanavanthri Reviewed-by: José Roberto de Souza Signed-off-by: José Roberto de Souza Link: https://patchwork.freedesktop.org/patch/msgid/20200512180050.6785-1-swathi.dhanavanthri@intel.com --- drivers/gpu/drm/i915/gt/intel_workarounds.c | 11 +++++++---- drivers/gpu/drm/i915/i915_drv.h | 5 +++++ 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c index aa90e6b7a118..90a2b9e399b0 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c @@ -917,10 +917,13 @@ icl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) GAMT_CHKN_BIT_REG, GAMT_CHKN_DISABLE_L3_COH_PIPE); - /* Wa_1607087056:icl */ - wa_write_or(wal, - SLICE_UNIT_LEVEL_CLKGATE, - L3_CLKGATE_DIS | L3_CR2X_CLKGATE_DIS); + /* Wa_1607087056:icl,ehl,jsl */ + if (IS_ICELAKE(i915) || + IS_EHL_REVID(i915, EHL_REVID_A0, EHL_REVID_A0)) { + wa_write_or(wal, + SLICE_UNIT_LEVEL_CLKGATE, + L3_CLKGATE_DIS | L3_CR2X_CLKGATE_DIS); + } } static void diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 7fad6f2d80fa..5ca932c1fe37 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1514,6 +1514,11 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915, #define IS_ICL_REVID(p, since, until) \ (IS_ICELAKE(p) && IS_REVID(p, since, until)) +#define EHL_REVID_A0 0x0 + +#define IS_EHL_REVID(p, since, until) \ + (IS_ELKHARTLAKE(p) && IS_REVID(p, since, until)) + #define TGL_REVID_A0 0x0 #define TGL_REVID_B0 0x1 #define TGL_REVID_C0 0x2 From b428d57006663d18e3f6f98644ff9e8702a33ca4 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 13 May 2020 11:01:20 +0100 Subject: [PATCH 63/97] drm/i915/gt: Reset execlists registers before HWSP Upon gt resume, we first poison then sanitize the engine. However, our testing shows that gen9 will very rarely retain the poisoned value from the HWSP mappings of the execlists status registers. This suggests that it is reading back from the HWSP, so rejig the register reset. v2: Maybe RING_CONTEXT_STATUS_PTR is write masked. It is. References: https://gitlab.freedesktop.org/drm/intel/-/issues/1812 Signed-off-by: Chris Wilson Acked-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20200513100120.11617-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gt/intel_lrc.c | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index 15716e4d6b76..32feb2a27dfc 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -3938,6 +3938,14 @@ static void reset_csb_pointers(struct intel_engine_cs *engine) ring_set_paused(engine, 0); + /* + * Sometimes Icelake forgets to reset its pointers on a GPU reset. + * Bludgeon them with a mmio update to be sure. + */ + ENGINE_WRITE(engine, RING_CONTEXT_STATUS_PTR, + 0xffff << 16 | reset_value << 8 | reset_value); + ENGINE_POSTING_READ(engine, RING_CONTEXT_STATUS_PTR); + /* * After a reset, the HW starts writing into CSB entry [0]. We * therefore have to set our HEAD pointer back one entry so that @@ -3951,16 +3959,15 @@ static void reset_csb_pointers(struct intel_engine_cs *engine) WRITE_ONCE(*execlists->csb_write, reset_value); wmb(); /* Make sure this is visible to HW (paranoia?) */ - /* - * Sometimes Icelake forgets to reset its pointers on a GPU reset. - * Bludgeon them with a mmio update to be sure. - */ - ENGINE_WRITE(engine, RING_CONTEXT_STATUS_PTR, - reset_value << 8 | reset_value); - ENGINE_POSTING_READ(engine, RING_CONTEXT_STATUS_PTR); - invalidate_csb_entries(&execlists->csb_status[0], &execlists->csb_status[reset_value]); + + /* Once more for luck and our trusty paranoia */ + ENGINE_WRITE(engine, RING_CONTEXT_STATUS_PTR, + 0xffff << 16 | reset_value << 8 | reset_value); + ENGINE_POSTING_READ(engine, RING_CONTEXT_STATUS_PTR); + + GEM_BUG_ON(READ_ONCE(*execlists->csb_write) != reset_value); } static void execlists_sanitize(struct intel_engine_cs *engine) From d9162348db12487754e61f73497bdcfcea753590 Mon Sep 17 00:00:00 2001 From: Stanislav Lisovskiy Date: Wed, 13 May 2020 12:38:11 +0300 Subject: [PATCH 64/97] drm/i915: Introduce skl_plane_wm_level accessor. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit For future Gen12 SAGV implementation we need to seemlessly alter wm levels calculated, depending on whether we are allowed to enable SAGV or not. So this accessor will give additional flexibility to do that. Currently this accessor is still simply working as "pass-through" function. This will be changed in next coming patches from this series. v2: - plane_id -> plane->id(Ville Syrjälä) - Moved wm_level var to have more local scope (Ville Syrjälä) - Renamed yuv to color_plane(Ville Syrjälä) in skl_plane_wm_level v3: - plane->id -> plane_id(this time for real, Ville Syrjälä) - Changed colorplane id type from boolean to int as index (Ville Syrjälä) - Moved crtc_state param so that it is first now (Ville Syrjälä) - Moved wm_level declaration to tigher scope in skl_write_plane_wm(Ville Syrjälä) v4: - Started to use enum values for color plane - Do sizeof for a type what we are memset'ing - Zero out wm_uv as well(Ville Syrjälä) v5: - Fixed rebase conflict caused by COLOR_PLANE_* enum removal v6: - Do not use skl_plane_wm_level accessor in skl_allocate_pipe_ddb v7: - Get rid of wm_uv, which is not used in skl_plane_write_wm(Ville) Signed-off-by: Stanislav Lisovskiy Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20200513093816.11466-2-stanislav.lisovskiy@intel.com --- drivers/gpu/drm/i915/intel_pm.c | 23 +++++++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 0e53e277d6a6..24f1ef8e8a08 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -4632,6 +4632,17 @@ icl_get_total_relative_data_rate(struct intel_crtc_state *crtc_state, return total_data_rate; } +static const struct skl_wm_level * +skl_plane_wm_level(const struct intel_crtc_state *crtc_state, + enum plane_id plane_id, + int level) +{ + const struct skl_plane_wm *wm = + &crtc_state->wm.skl.optimal.planes[plane_id]; + + return &wm->wm[level]; +} + static int skl_allocate_pipe_ddb(struct intel_crtc_state *crtc_state) { @@ -5440,8 +5451,12 @@ void skl_write_plane_wm(struct intel_plane *plane, &crtc_state->wm.skl.plane_ddb_uv[plane_id]; for (level = 0; level <= max_level; level++) { + const struct skl_wm_level *wm_level; + + wm_level = skl_plane_wm_level(crtc_state, plane_id, level); + skl_write_wm_level(dev_priv, PLANE_WM(pipe, plane_id, level), - &wm->wm[level]); + wm_level); } skl_write_wm_level(dev_priv, PLANE_WM_TRANS(pipe, plane_id), &wm->trans_wm); @@ -5474,8 +5489,12 @@ void skl_write_cursor_wm(struct intel_plane *plane, &crtc_state->wm.skl.plane_ddb_y[plane_id]; for (level = 0; level <= max_level; level++) { + const struct skl_wm_level *wm_level; + + wm_level = skl_plane_wm_level(crtc_state, plane_id, level); + skl_write_wm_level(dev_priv, CUR_WM(pipe, level), - &wm->wm[level]); + wm_level); } skl_write_wm_level(dev_priv, CUR_WM_TRANS(pipe), &wm->trans_wm); From 1d0a6c8486aa53f7545e80f5f0293ed99e48ffc0 Mon Sep 17 00:00:00 2001 From: Stanislav Lisovskiy Date: Wed, 13 May 2020 12:38:12 +0300 Subject: [PATCH 65/97] drm/i915: Extract skl SAGV checking MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Introduce platform dependent SAGV checking in combination with bandwidth state pipe SAGV mask. This is preparation to adding TGL support, which requires different way of SAGV checking. v2, v3, v4, v5, v6: Fix rebase conflict v7: - Nuke icl specific function, use skl for icl as well, gen specific active_pipes check to be added in the next patch(Ville) v8: - Use more generic intel_crtc_can_enable_sagv for checking(Ville) Signed-off-by: Stanislav Lisovskiy Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20200513093816.11466-3-stanislav.lisovskiy@intel.com --- drivers/gpu/drm/i915/intel_pm.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 24f1ef8e8a08..3df8e60c6153 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -3804,7 +3804,7 @@ void intel_sagv_post_plane_update(struct intel_atomic_state *state) intel_enable_sagv(dev_priv); } -static bool intel_crtc_can_enable_sagv(const struct intel_crtc_state *crtc_state) +static bool skl_crtc_can_enable_sagv(const struct intel_crtc_state *crtc_state) { struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); @@ -3853,6 +3853,11 @@ static bool intel_crtc_can_enable_sagv(const struct intel_crtc_state *crtc_state return true; } +static bool intel_crtc_can_enable_sagv(const struct intel_crtc_state *crtc_state) +{ + return skl_crtc_can_enable_sagv(crtc_state); +} + bool intel_can_enable_sagv(const struct intel_bw_state *bw_state) { if (bw_state->active_pipes && !is_power_of_2(bw_state->active_pipes)) @@ -3865,7 +3870,7 @@ static int intel_compute_sagv_mask(struct intel_atomic_state *state) { int ret; struct intel_crtc *crtc; - struct intel_crtc_state *new_crtc_state; + const struct intel_crtc_state *new_crtc_state; struct intel_bw_state *new_bw_state = NULL; const struct intel_bw_state *old_bw_state = NULL; int i; @@ -3889,6 +3894,7 @@ static int intel_compute_sagv_mask(struct intel_atomic_state *state) new_bw_state->active_pipes = intel_calc_active_pipes(state, old_bw_state->active_pipes); + if (new_bw_state->active_pipes != old_bw_state->active_pipes) { ret = intel_atomic_lock_global_state(&new_bw_state->base); if (ret) From d8d5afe35e3f88f73436f79f974d96a67e879637 Mon Sep 17 00:00:00 2001 From: Stanislav Lisovskiy Date: Wed, 13 May 2020 12:38:13 +0300 Subject: [PATCH 66/97] drm/i915: Make active_pipes check skl specific MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Seems that only skl needs to have SAGV turned off for multipipe scenarios, so lets do it this way. If anything blows up - we can always revert this patch. v2: Changed if condition to look better (Ville). Signed-off-by: Stanislav Lisovskiy [vsyrjala: wrapped long line to appease checkpatch] Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20200513093816.11466-4-stanislav.lisovskiy@intel.com --- drivers/gpu/drm/i915/intel_pm.c | 14 +++++++++----- drivers/gpu/drm/i915/intel_pm.h | 3 ++- 2 files changed, 11 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 3df8e60c6153..f7bd1dbb625e 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -3777,7 +3777,7 @@ void intel_sagv_pre_plane_update(struct intel_atomic_state *state) if (!new_bw_state) return; - if (!intel_can_enable_sagv(new_bw_state)) + if (!intel_can_enable_sagv(dev_priv, new_bw_state)) intel_disable_sagv(dev_priv); } @@ -3800,7 +3800,7 @@ void intel_sagv_post_plane_update(struct intel_atomic_state *state) if (!new_bw_state) return; - if (intel_can_enable_sagv(new_bw_state)) + if (intel_can_enable_sagv(dev_priv, new_bw_state)) intel_enable_sagv(dev_priv); } @@ -3858,9 +3858,11 @@ static bool intel_crtc_can_enable_sagv(const struct intel_crtc_state *crtc_state return skl_crtc_can_enable_sagv(crtc_state); } -bool intel_can_enable_sagv(const struct intel_bw_state *bw_state) +bool intel_can_enable_sagv(struct drm_i915_private *dev_priv, + const struct intel_bw_state *bw_state) { - if (bw_state->active_pipes && !is_power_of_2(bw_state->active_pipes)) + if (INTEL_GEN(dev_priv) < 11 && + bw_state->active_pipes && !is_power_of_2(bw_state->active_pipes)) return false; return bw_state->pipe_sagv_reject == 0; @@ -3868,6 +3870,7 @@ bool intel_can_enable_sagv(const struct intel_bw_state *bw_state) static int intel_compute_sagv_mask(struct intel_atomic_state *state) { + struct drm_i915_private *dev_priv = to_i915(state->base.dev); int ret; struct intel_crtc *crtc; const struct intel_crtc_state *new_crtc_state; @@ -3901,7 +3904,8 @@ static int intel_compute_sagv_mask(struct intel_atomic_state *state) return ret; } - if (intel_can_enable_sagv(new_bw_state) != intel_can_enable_sagv(old_bw_state)) { + if (intel_can_enable_sagv(dev_priv, new_bw_state) != + intel_can_enable_sagv(dev_priv, old_bw_state)) { ret = intel_atomic_serialize_global_state(&new_bw_state->base); if (ret) return ret; diff --git a/drivers/gpu/drm/i915/intel_pm.h b/drivers/gpu/drm/i915/intel_pm.h index fd1dc422e6c5..614ac7f8d4cc 100644 --- a/drivers/gpu/drm/i915/intel_pm.h +++ b/drivers/gpu/drm/i915/intel_pm.h @@ -42,7 +42,8 @@ void skl_pipe_wm_get_hw_state(struct intel_crtc *crtc, struct skl_pipe_wm *out); void g4x_wm_sanitize(struct drm_i915_private *dev_priv); void vlv_wm_sanitize(struct drm_i915_private *dev_priv); -bool intel_can_enable_sagv(const struct intel_bw_state *bw_state); +bool intel_can_enable_sagv(struct drm_i915_private *dev_priv, + const struct intel_bw_state *bw_state); int intel_enable_sagv(struct drm_i915_private *dev_priv); int intel_disable_sagv(struct drm_i915_private *dev_priv); void intel_sagv_pre_plane_update(struct intel_atomic_state *state); From 4a0ca47a8e2fdfb7c9f5b23bba79fa632a5cd8fc Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 13 May 2020 13:28:26 +0100 Subject: [PATCH 67/97] drm/i915/gt: Suspend tasklets before resume sanitization It is possible for a residual tasklet to be pending execution as we resume (whether that's some prior test kicking off the tasklet, or if we are in a suspend/resume stress test). As such, we do not want that tasklet to execute in the middle of our sanitization, such that it sees the poisoned state. For example, <4>[ 449.386553] invalid opcode: 0000 [#1] PREEMPT SMP NOPTI <4>[ 449.386555] CPU: 1 PID: 5115 Comm: i915_selftest Tainted: G U W 5.7.0-rc4-CI-CI_DRM_8472+ #1 <4>[ 449.386556] Hardware name: Intel Corporation Ice Lake Client Platform/IceLake U DDR4 SODIMM PD RVP TLC, BIOS ICLSFWR1.R00.3183.A00.1905020411 05/02/2019 <4>[ 449.386585] RIP: 0010:process_csb+0x6bf/0x830 [i915] <4>[ 449.386588] Code: 00 48 c7 c2 10 bc 4c a0 48 c7 c7 d4 75 34 a0 e8 87 0e e6 e0 bf 01 00 00 00 e8 9d e0 e5 e0 31 f6 bf 09 00 00 00 e8 e1 ba d6 e0 <0f> 0b 8b 87 10 05 00 00 85 c0 0f 85 5f f9 ff ff 48 c7 c1 70 a5 4f <4>[ 449.386591] RSP: 0018:ffffc90000170ea0 EFLAGS: 00010297 <4>[ 449.386594] RAX: 0000000080000101 RBX: 0000000000000000 RCX: 0000000000000000 <4>[ 449.386596] RDX: ffff88849d5bc040 RSI: 0000000000000000 RDI: 0000000000000009 <4>[ 449.386598] RBP: ffffc90000170f00 R08: 0000000000000000 R09: 0000000000000000 <4>[ 449.386600] R10: 0000000000000000 R11: 0000000000000000 R12: ffff88843ccea018 <4>[ 449.386602] R13: ffff88843ccea658 R14: ffff88843ccea640 R15: ffff88843ccea000 <4>[ 449.386605] FS: 00007f826a813300(0000) GS:ffff88849fe80000(0000) knlGS:0000000000000000 <4>[ 449.386607] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 <4>[ 449.386609] CR2: 0000560366b94280 CR3: 000000048ba02002 CR4: 0000000000760ee0 <4>[ 449.386611] PKRU: 55555554 <4>[ 449.386613] Call Trace: <4>[ 449.386616] <4>[ 449.386646] ? execlists_submission_tasklet+0xcf/0x140 [i915] <4>[ 449.386674] execlists_submission_tasklet+0x2f/0x140 [i915] <4>[ 449.386679] tasklet_action_common.isra.16+0x6c/0x1c0 <4>[ 449.386684] __do_softirq+0xdf/0x49e <4>[ 449.386687] irq_exit+0xba/0xc0 <4>[ 449.386690] smp_apic_timer_interrupt+0xb7/0x280 <4>[ 449.386693] apic_timer_interrupt+0xf/0x20 <4>[ 449.386695] <4>[ 449.386698] RIP: 0010:_raw_spin_unlock_irqrestore+0x49/0x60 <4>[ 449.386701] Code: c7 02 75 1f 53 9d e8 26 ab 75 ff bf 01 00 00 00 e8 7c a3 69 ff 65 8b 05 7d 9b 5c 7e 85 c0 74 0c 5b 5d c3 e8 09 aa 75 ff 53 9d df e8 ca 39 5b ff 5b 5d c3 0f 1f 00 66 2e 0f 1f 84 00 00 00 00 <4>[ 449.386703] RSP: 0018:ffffc90000a6b950 EFLAGS: 00000202 ORIG_RAX: ffffffffffffff13 <4>[ 449.386706] RAX: 0000000080000001 RBX: 0000000000000202 RCX: 0000000000000000 <4>[ 449.386708] RDX: ffff88849d5bc040 RSI: ffff88849d5bc900 RDI: ffffffff82386f12 <4>[ 449.386710] RBP: ffff88847d400f00 R08: ffff88849d5bc900 R09: 0000000000000000 <4>[ 449.386712] R10: 0000000000000000 R11: 0000000000000000 R12: 00000000ffff0b0b <4>[ 449.386714] R13: 000000000000000c R14: ffff88847d40bf70 R15: ffff88847d40cef8 <4>[ 449.386742] reset_csb_pointers+0x59/0x140 [i915] <4>[ 449.386769] execlists_sanitize+0x3e/0x60 [i915] <4>[ 449.386797] gt_sanitize+0xd6/0x260 [i915] As part of the reset preparation, engine->reset.prepare() prevents the tasklet from running, so pull the sanitization inside the critical section for reset. Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/1812 Fixes: 23122a4d992b ("drm/i915/gt: Scrub execlists state on resume") Signed-off-by: Chris Wilson Cc: Mika Kuoppala Reviewed-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20200513122826.27484-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gt/intel_gt_pm.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm.c b/drivers/gpu/drm/i915/gt/intel_gt_pm.c index e59776485457..6bdb434a442d 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_pm.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_pm.c @@ -150,10 +150,6 @@ static void gt_sanitize(struct intel_gt *gt, bool force) if (intel_gt_is_wedged(gt)) intel_gt_unset_wedged(gt); - for_each_engine(engine, gt, id) - if (engine->sanitize) - engine->sanitize(engine); - intel_uc_sanitize(>->uc); for_each_engine(engine, gt, id) @@ -162,6 +158,10 @@ static void gt_sanitize(struct intel_gt *gt, bool force) intel_uc_reset_prepare(>->uc); + for_each_engine(engine, gt, id) + if (engine->sanitize) + engine->sanitize(engine); + if (reset_engines(gt) || force) { for_each_engine(engine, gt, id) __intel_engine_reset(engine, false); From b2379ba2b9c207f6a76b4b8c3d7252a82cfd8f7d Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Wed, 13 May 2020 11:23:40 -0700 Subject: [PATCH 68/97] drm/i915: Remove duplicate inline specifier on write_pte When building with clang: drivers/gpu/drm/i915/gt/gen8_ppgtt.c:392:24: warning: duplicate 'inline' declaration specifier [-Wduplicate-decl-specifier] declaration specifier [-Wduplicate-decl-specifier] static __always_inline inline void ^ include/linux/compiler_types.h:138:16: note: expanded from macro 'inline' #define inline inline __gnu_inline __inline_maybe_unused notrace ^ 1 warning generated. __always_inline is defined as 'inline __attribute__((__always_inline))' so we do not need to specify it twice. Fixes: 84eac0c65940 ("drm/i915/gt: Force pte cacheline to main memory") Link: https://github.com/ClangBuiltLinux/linux/issues/1024 Reported-by: kbuild test robot Signed-off-by: Nathan Chancellor Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20200513182340.3968668-1-natechancellor@gmail.com --- drivers/gpu/drm/i915/gt/gen8_ppgtt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c index 2dc88e76ebec..699125928272 100644 --- a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c +++ b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c @@ -389,7 +389,7 @@ static int gen8_ppgtt_alloc(struct i915_address_space *vm, return err; } -static __always_inline inline void +static __always_inline void write_pte(gen8_pte_t *pte, const gen8_pte_t val) { /* Magic delays? Or can we refine these to flush all in one pass? */ From 889333c772c50a3e50787aa1073a9f82dafb64f8 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 13 May 2020 19:09:37 +0100 Subject: [PATCH 69/97] drm/i915/gem: Remove redundant exec_fence Since there can only be one of in_fence/exec_fence, just use the single in_fence local. v2: Consolidate lookup Signed-off-by: Chris Wilson Cc: Mika Kuoppala Reviewed-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20200513180937.28992-1-chris@chris-wilson.co.uk --- .../gpu/drm/i915/gem/i915_gem_execbuffer.c | 40 +++++++------------ 1 file changed, 14 insertions(+), 26 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c index d54a4933cc05..3c98aaaa8d11 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c @@ -2622,7 +2622,6 @@ i915_gem_do_execbuffer(struct drm_device *dev, struct drm_i915_private *i915 = to_i915(dev); struct i915_execbuffer eb; struct dma_fence *in_fence = NULL; - struct dma_fence *exec_fence = NULL; struct sync_file *out_fence = NULL; struct i915_vma *batch; int out_fence_fd = -1; @@ -2665,30 +2664,22 @@ i915_gem_do_execbuffer(struct drm_device *dev, if (args->flags & I915_EXEC_IS_PINNED) eb.batch_flags |= I915_DISPATCH_PINNED; - if (args->flags & I915_EXEC_FENCE_IN) { +#define IN_FENCES (I915_EXEC_FENCE_IN | I915_EXEC_FENCE_SUBMIT) + if (args->flags & IN_FENCES) { + if ((args->flags & IN_FENCES) == IN_FENCES) + return -EINVAL; + in_fence = sync_file_get_fence(lower_32_bits(args->rsvd2)); if (!in_fence) return -EINVAL; } - - if (args->flags & I915_EXEC_FENCE_SUBMIT) { - if (in_fence) { - err = -EINVAL; - goto err_in_fence; - } - - exec_fence = sync_file_get_fence(lower_32_bits(args->rsvd2)); - if (!exec_fence) { - err = -EINVAL; - goto err_in_fence; - } - } +#undef IN_FENCES if (args->flags & I915_EXEC_FENCE_OUT) { out_fence_fd = get_unused_fd_flags(O_CLOEXEC); if (out_fence_fd < 0) { err = out_fence_fd; - goto err_exec_fence; + goto err_in_fence; } } @@ -2779,14 +2770,13 @@ i915_gem_do_execbuffer(struct drm_device *dev, } if (in_fence) { - err = i915_request_await_dma_fence(eb.request, in_fence); - if (err < 0) - goto err_request; - } - - if (exec_fence) { - err = i915_request_await_execution(eb.request, exec_fence, - eb.engine->bond_execute); + if (args->flags & I915_EXEC_FENCE_SUBMIT) + err = i915_request_await_execution(eb.request, + in_fence, + eb.engine->bond_execute); + else + err = i915_request_await_dma_fence(eb.request, + in_fence); if (err < 0) goto err_request; } @@ -2855,8 +2845,6 @@ i915_gem_do_execbuffer(struct drm_device *dev, err_out_fence: if (out_fence_fd != -1) put_unused_fd(out_fence_fd); -err_exec_fence: - dma_fence_put(exec_fence); err_in_fence: dma_fence_put(in_fence); return err; From 795d4d7fa34154fc621c1048f8b92e4f6bd3926f Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 13 May 2020 17:59:32 +0100 Subject: [PATCH 70/97] drm/i915: Mark the addition of the initial-breadcrumb in the request The initial-breadcrumb is used to mark the end of the awaiting and the beginning of the user payload. We verify that we do not start the user payload before all signaler are completed, checking our semaphore setup by looking for the initial breadcrumb being written too early. We also want to ensure that we do not add semaphore waits after we have already closed the semaphore section, an issue for later deferred waits. Signed-off-by: Chris Wilson Reviewed-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20200513165937.9508-2-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gt/intel_lrc.c | 5 ++++- drivers/gpu/drm/i915/i915_request.c | 7 ++++++- drivers/gpu/drm/i915/i915_request.h | 27 ++++++++++++++++++++------- 3 files changed, 30 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index 32feb2a27dfc..51470e96cdd4 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -1886,7 +1886,7 @@ static void defer_request(struct i915_request *rq, struct list_head * const pl) continue; /* No waiter should start before its signaler */ - GEM_BUG_ON(w->context->timeline->has_initial_breadcrumb && + GEM_BUG_ON(i915_request_has_initial_breadcrumb(w) && i915_request_started(w) && !i915_request_completed(rq)); @@ -3493,6 +3493,7 @@ static int gen8_emit_init_breadcrumb(struct i915_request *rq) { u32 *cs; + GEM_BUG_ON(i915_request_has_initial_breadcrumb(rq)); if (!i915_request_timeline(rq)->has_initial_breadcrumb) return 0; @@ -3519,6 +3520,8 @@ static int gen8_emit_init_breadcrumb(struct i915_request *rq) /* Record the updated position of the request's payload */ rq->infix = intel_ring_offset(rq, cs); + __set_bit(I915_FENCE_FLAG_INITIAL_BREADCRUMB, &rq->fence.flags); + return 0; } diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index 2d5b98549ddc..00b7c4eb3f32 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -951,6 +951,7 @@ __emit_semaphore_wait(struct i915_request *to, u32 *cs; GEM_BUG_ON(INTEL_GEN(to->i915) < 8); + GEM_BUG_ON(i915_request_has_initial_breadcrumb(to)); /* We need to pin the signaler's HWSP until we are finished reading. */ err = intel_timeline_read_hwsp(from, to, &hwsp_offset); @@ -1000,6 +1001,9 @@ emit_semaphore_wait(struct i915_request *to, if (!intel_context_use_semaphores(to->context)) goto await_fence; + if (i915_request_has_initial_breadcrumb(to)) + goto await_fence; + if (!rcu_access_pointer(from->hwsp_cacheline)) goto await_fence; @@ -1256,7 +1260,8 @@ __i915_request_await_execution(struct i915_request *to, * immediate execution, and so we must wait until it reaches the * active slot. */ - if (intel_engine_has_semaphores(to->engine)) { + if (intel_engine_has_semaphores(to->engine) && + !i915_request_has_initial_breadcrumb(to)) { err = __emit_semaphore_wait(to, from, from->fence.seqno - 1); if (err < 0) return err; diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h index d8ce908e1346..98ae2dc82371 100644 --- a/drivers/gpu/drm/i915/i915_request.h +++ b/drivers/gpu/drm/i915/i915_request.h @@ -83,6 +83,20 @@ enum { */ I915_FENCE_FLAG_PQUEUE, + /* + * I915_FENCE_FLAG_HOLD - this request is currently on hold + * + * This request has been suspended, pending an ongoing investigation. + */ + I915_FENCE_FLAG_HOLD, + + /* + * I915_FENCE_FLAG_INITIAL_BREADCRUMB - this request has the initial + * breadcrumb that marks the end of semaphore waits and start of the + * user payload. + */ + I915_FENCE_FLAG_INITIAL_BREADCRUMB, + /* * I915_FENCE_FLAG_SIGNAL - this request is currently on signal_list * @@ -91,13 +105,6 @@ enum { */ I915_FENCE_FLAG_SIGNAL, - /* - * I915_FENCE_FLAG_HOLD - this request is currently on hold - * - * This request has been suspended, pending an ongoing investigation. - */ - I915_FENCE_FLAG_HOLD, - /* * I915_FENCE_FLAG_NOPREEMPT - this request should not be preempted * @@ -390,6 +397,12 @@ static inline bool i915_request_in_priority_queue(const struct i915_request *rq) return test_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags); } +static inline bool +i915_request_has_initial_breadcrumb(const struct i915_request *rq) +{ + return test_bit(I915_FENCE_FLAG_INITIAL_BREADCRUMB, &rq->fence.flags); +} + /** * Returns true if seq1 is later than seq2. */ From 701f026521980dd0151130f818558e17c608ed2e Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 13 May 2020 08:47:59 +0100 Subject: [PATCH 71/97] drm/i915: Drop I915_RESET_TIMEOUT and friends These were used to set various timeouts for the reset procedure (deciding when the engine was dead, and even if the reset itself was not making forward progress). No longer used. Signed-off-by: Chris Wilson Reviewed-by: Maciej Patelczyk Link: https://patchwork.freedesktop.org/patch/msgid/20200513074809.18194-14-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_drv.h | 7 ------- 1 file changed, 7 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 5ca932c1fe37..0f76b93e0d26 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -625,13 +625,6 @@ i915_fence_timeout(const struct drm_i915_private *i915) return i915_fence_context_timeout(i915, U64_MAX); } -#define I915_RESET_TIMEOUT (10 * HZ) /* 10s */ - -#define I915_ENGINE_DEAD_TIMEOUT (4 * HZ) /* Seqno, head and subunits dead */ -#define I915_SEQNO_DEAD_TIMEOUT (12 * HZ) /* Seqno dead with active head */ - -#define I915_ENGINE_WEDGED_TIMEOUT (60 * HZ) /* Reset but no recovery? */ - /* Amount of SAGV/QGV points, BSpec precisely defines this */ #define I915_NUM_QGV_POINTS 8 From 18e4af04d2183e99e6808f55dcef30f66ac0b155 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 13 May 2020 18:35:04 +0100 Subject: [PATCH 72/97] drm/i915: Drop no-semaphore boosting Now that we have fast timeslicing on semaphores, we no longer need to prioritise none-semaphore work as we will yield any work blocked on a semaphore to the next in the queue. Previously with no timeslicing, blocking on the semaphore caused extremely bad scheduling with multiple clients utilising multiple rings. Now, there is no impact and we can remove the complication. Signed-off-by: Chris Wilson Reviewed-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20200513173504.28322-1-chris@chris-wilson.co.uk --- .../gpu/drm/i915/gem/i915_gem_execbuffer.c | 15 ------- drivers/gpu/drm/i915/gt/intel_lrc.c | 9 ----- drivers/gpu/drm/i915/gt/selftest_context.c | 1 + drivers/gpu/drm/i915/i915_priolist_types.h | 4 +- drivers/gpu/drm/i915/i915_request.c | 40 ++----------------- drivers/gpu/drm/i915/i915_request.h | 1 - drivers/gpu/drm/i915/i915_scheduler.c | 11 ++--- drivers/gpu/drm/i915/i915_scheduler_types.h | 3 +- 8 files changed, 11 insertions(+), 73 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c index 3c98aaaa8d11..c0d59d48e198 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c @@ -2582,21 +2582,6 @@ static void eb_request_add(struct i915_execbuffer *eb) /* Check that the context wasn't destroyed before submission */ if (likely(!intel_context_is_closed(eb->context))) { attr = eb->gem_context->sched; - - /* - * Boost actual workloads past semaphores! - * - * With semaphores we spin on one engine waiting for another, - * simply to reduce the latency of starting our work when - * the signaler completes. However, if there is any other - * work that we could be doing on this engine instead, that - * is better utilisation and will reduce the overall duration - * of the current work. To avoid PI boosting a semaphore - * far in the distance past over useful work, we keep a history - * of any semaphore use along our dependency chain. - */ - if (!(rq->sched.flags & I915_SCHED_HAS_SEMAPHORE_CHAIN)) - attr.priority |= I915_PRIORITY_NOSEMAPHORE; } else { /* Serialise with context_close via the add_to_timeline */ i915_request_set_error_once(rq, -ENOENT); diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index 51470e96cdd4..32fdb7cb3ae8 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -429,15 +429,6 @@ static int effective_prio(const struct i915_request *rq) if (i915_request_has_nopreempt(rq)) prio = I915_PRIORITY_UNPREEMPTABLE; - /* - * On unwinding the active request, we give it a priority bump - * if it has completed waiting on any semaphore. If we know that - * the request has already started, we can prevent an unwanted - * preempt-to-idle cycle by taking that into account now. - */ - if (__i915_request_has_started(rq)) - prio |= I915_PRIORITY_NOSEMAPHORE; - return prio; } diff --git a/drivers/gpu/drm/i915/gt/selftest_context.c b/drivers/gpu/drm/i915/gt/selftest_context.c index a56dff3b157a..52af1cee9a94 100644 --- a/drivers/gpu/drm/i915/gt/selftest_context.c +++ b/drivers/gpu/drm/i915/gt/selftest_context.c @@ -24,6 +24,7 @@ static int request_sync(struct i915_request *rq) /* Opencode i915_request_add() so we can keep the timeline locked. */ __i915_request_commit(rq); + rq->sched.attr.priority = I915_PRIORITY_BARRIER; __i915_request_queue(rq, NULL); timeout = i915_request_wait(rq, 0, HZ / 10); diff --git a/drivers/gpu/drm/i915/i915_priolist_types.h b/drivers/gpu/drm/i915/i915_priolist_types.h index e18723d8df86..5003a71113cb 100644 --- a/drivers/gpu/drm/i915/i915_priolist_types.h +++ b/drivers/gpu/drm/i915/i915_priolist_types.h @@ -24,14 +24,12 @@ enum { I915_PRIORITY_DISPLAY, }; -#define I915_USER_PRIORITY_SHIFT 1 +#define I915_USER_PRIORITY_SHIFT 0 #define I915_USER_PRIORITY(x) ((x) << I915_USER_PRIORITY_SHIFT) #define I915_PRIORITY_COUNT BIT(I915_USER_PRIORITY_SHIFT) #define I915_PRIORITY_MASK (I915_PRIORITY_COUNT - 1) -#define I915_PRIORITY_NOSEMAPHORE ((u8)BIT(0)) - /* Smallest priority value that cannot be bumped. */ #define I915_PRIORITY_INVALID (INT_MIN | (u8)I915_PRIORITY_MASK) diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index 00b7c4eb3f32..526c1e9acbd5 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -368,8 +368,6 @@ __await_execution(struct i915_request *rq, } spin_unlock_irq(&signal->lock); - /* Copy across semaphore status as we need the same behaviour */ - rq->sched.flags |= signal->sched.flags; return 0; } @@ -537,10 +535,8 @@ void __i915_request_unsubmit(struct i915_request *request) spin_unlock(&request->lock); /* We've already spun, don't charge on resubmitting. */ - if (request->sched.semaphores && i915_request_started(request)) { - request->sched.attr.priority |= I915_PRIORITY_NOSEMAPHORE; + if (request->sched.semaphores && i915_request_started(request)) request->sched.semaphores = 0; - } /* * We don't need to wake_up any waiters on request->execute, they @@ -598,15 +594,6 @@ submit_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state) return NOTIFY_DONE; } -static void irq_semaphore_cb(struct irq_work *wrk) -{ - struct i915_request *rq = - container_of(wrk, typeof(*rq), semaphore_work); - - i915_schedule_bump_priority(rq, I915_PRIORITY_NOSEMAPHORE); - i915_request_put(rq); -} - static int __i915_sw_fence_call semaphore_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state) { @@ -614,11 +601,6 @@ semaphore_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state) switch (state) { case FENCE_COMPLETE: - if (!(READ_ONCE(rq->sched.attr.priority) & I915_PRIORITY_NOSEMAPHORE)) { - i915_request_get(rq); - init_irq_work(&rq->semaphore_work, irq_semaphore_cb); - irq_work_queue(&rq->semaphore_work); - } break; case FENCE_FREE: @@ -997,6 +979,7 @@ emit_semaphore_wait(struct i915_request *to, gfp_t gfp) { const intel_engine_mask_t mask = READ_ONCE(from->engine)->mask; + struct i915_sw_fence *wait = &to->submit; if (!intel_context_use_semaphores(to->context)) goto await_fence; @@ -1031,11 +1014,10 @@ emit_semaphore_wait(struct i915_request *to, goto await_fence; to->sched.semaphores |= mask; - to->sched.flags |= I915_SCHED_HAS_SEMAPHORE_CHAIN; - return 0; + wait = &to->semaphore; await_fence: - return i915_sw_fence_await_dma_fence(&to->submit, + return i915_sw_fence_await_dma_fence(wait, &from->fence, 0, I915_FENCE_GFP); } @@ -1070,17 +1052,6 @@ i915_request_await_request(struct i915_request *to, struct i915_request *from) if (ret < 0) return ret; - if (to->sched.flags & I915_SCHED_HAS_SEMAPHORE_CHAIN) { - ret = i915_sw_fence_await_dma_fence(&to->semaphore, - &from->fence, 0, - I915_FENCE_GFP); - if (ret < 0) - return ret; - } - - if (from->sched.flags & I915_SCHED_HAS_EXTERNAL_CHAIN) - to->sched.flags |= I915_SCHED_HAS_EXTERNAL_CHAIN; - return 0; } @@ -1528,9 +1499,6 @@ void i915_request_add(struct i915_request *rq) attr = ctx->sched; rcu_read_unlock(); - if (!(rq->sched.flags & I915_SCHED_HAS_SEMAPHORE_CHAIN)) - attr.priority |= I915_PRIORITY_NOSEMAPHORE; - __i915_request_queue(rq, &attr); mutex_unlock(&tl->mutex); diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h index 98ae2dc82371..8ec7ee4dbadc 100644 --- a/drivers/gpu/drm/i915/i915_request.h +++ b/drivers/gpu/drm/i915/i915_request.h @@ -216,7 +216,6 @@ struct i915_request { }; struct list_head execute_cb; struct i915_sw_fence semaphore; - struct irq_work semaphore_work; /* * A list of everyone we wait upon, and everyone who waits upon us. diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c index bec2a9c25425..f4ea318781f0 100644 --- a/drivers/gpu/drm/i915/i915_scheduler.c +++ b/drivers/gpu/drm/i915/i915_scheduler.c @@ -51,11 +51,11 @@ static void assert_priolists(struct intel_engine_execlists * const execlists) GEM_BUG_ON(rb_first_cached(&execlists->queue) != rb_first(&execlists->queue.rb_root)); - last_prio = (INT_MAX >> I915_USER_PRIORITY_SHIFT) + 1; + last_prio = INT_MAX; for (rb = rb_first_cached(&execlists->queue); rb; rb = rb_next(rb)) { const struct i915_priolist *p = to_priolist(rb); - GEM_BUG_ON(p->priority >= last_prio); + GEM_BUG_ON(p->priority > last_prio); last_prio = p->priority; GEM_BUG_ON(!p->used); @@ -434,15 +434,12 @@ bool __i915_sched_node_add_dependency(struct i915_sched_node *node, dep->waiter = node; dep->flags = flags; - /* Keep track of whether anyone on this chain has a semaphore */ - if (signal->flags & I915_SCHED_HAS_SEMAPHORE_CHAIN && - !node_started(signal)) - node->flags |= I915_SCHED_HAS_SEMAPHORE_CHAIN; - /* All set, now publish. Beware the lockless walkers. */ list_add_rcu(&dep->signal_link, &node->signalers_list); list_add_rcu(&dep->wait_link, &signal->waiters_list); + /* Propagate the chains */ + node->flags |= signal->flags; ret = true; } diff --git a/drivers/gpu/drm/i915/i915_scheduler_types.h b/drivers/gpu/drm/i915/i915_scheduler_types.h index 6ab2c5289bed..f72e6c397b08 100644 --- a/drivers/gpu/drm/i915/i915_scheduler_types.h +++ b/drivers/gpu/drm/i915/i915_scheduler_types.h @@ -65,8 +65,7 @@ struct i915_sched_node { struct list_head link; struct i915_sched_attr attr; unsigned int flags; -#define I915_SCHED_HAS_SEMAPHORE_CHAIN BIT(0) -#define I915_SCHED_HAS_EXTERNAL_CHAIN BIT(1) +#define I915_SCHED_HAS_EXTERNAL_CHAIN BIT(0) intel_engine_mask_t semaphores; }; From 7a0ba6b43bc0b0078df5504393fd3966a5c7e808 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 14 May 2020 07:29:05 +0100 Subject: [PATCH 73/97] drm/i915: Show per-engine default property values in sysfs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit By providing the default values configured into the kernel via sysfs, it is much more convenient for userspace to restore those sane defaults, or at least know what are considered good baseline. This is useful, for example, to cleanup after any failed userspace prior to commencing new jobs. /sys/class/drm/card0/engine/rcs0/ ├── capabilities ├── class ├── .defaults │   ├── heartbeat_interval_ms │   ├── max_busywait_duration_ns │   ├── preempt_timeout_ms │   ├── stop_timeout_ms │   └── timeslice_duration_ms ├── heartbeat_interval_ms ├── instance ├── known_capabilities ├── max_busywait_duration_ns ├── mmio_base ├── name ├── preempt_timeout_ms ├── stop_timeout_ms └── timeslice_duration_ms Signed-off-by: Chris Wilson Cc: Joonas Lahtinen Cc: Tvrtko Ursulin Reviewed-by: Maciej Patelczyk Link: https://patchwork.freedesktop.org/patch/msgid/20200514062905.28668-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gt/intel_engine_cs.c | 2 + drivers/gpu/drm/i915/gt/intel_engine_types.h | 2 +- drivers/gpu/drm/i915/gt/sysfs_engines.c | 94 ++++++++++++++++++++ 3 files changed, 97 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c index 98b326a1568d..da5b61085257 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -326,6 +326,8 @@ static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id) if (INTEL_GEN(i915) == 12 && engine->class == RENDER_CLASS) engine->props.preempt_timeout_ms = 0; + engine->defaults = engine->props; /* never to change again */ + engine->context_size = intel_engine_context_size(gt, engine->class); if (WARN_ON(engine->context_size > BIT(20))) engine->context_size = 0; diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h index c113b7805e65..c39db7e249e2 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h @@ -565,7 +565,7 @@ struct intel_engine_cs { unsigned long preempt_timeout_ms; unsigned long stop_timeout_ms; unsigned long timeslice_duration_ms; - } props; + } props, defaults; }; static inline bool diff --git a/drivers/gpu/drm/i915/gt/sysfs_engines.c b/drivers/gpu/drm/i915/gt/sysfs_engines.c index 8f9b2f33dbaf..535cc1169e54 100644 --- a/drivers/gpu/drm/i915/gt/sysfs_engines.c +++ b/drivers/gpu/drm/i915/gt/sysfs_engines.c @@ -191,6 +191,17 @@ max_spin_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) static struct kobj_attribute max_spin_attr = __ATTR(max_busywait_duration_ns, 0644, max_spin_show, max_spin_store); +static ssize_t +max_spin_default(struct kobject *kobj, struct kobj_attribute *attr, char *buf) +{ + struct intel_engine_cs *engine = kobj_to_engine(kobj); + + return sprintf(buf, "%lu\n", engine->defaults.max_busywait_duration_ns); +} + +static struct kobj_attribute max_spin_def = +__ATTR(max_busywait_duration_ns, 0444, max_spin_default, NULL); + static ssize_t timeslice_store(struct kobject *kobj, struct kobj_attribute *attr, const char *buf, size_t count) @@ -233,6 +244,17 @@ timeslice_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) static struct kobj_attribute timeslice_duration_attr = __ATTR(timeslice_duration_ms, 0644, timeslice_show, timeslice_store); +static ssize_t +timeslice_default(struct kobject *kobj, struct kobj_attribute *attr, char *buf) +{ + struct intel_engine_cs *engine = kobj_to_engine(kobj); + + return sprintf(buf, "%lu\n", engine->defaults.timeslice_duration_ms); +} + +static struct kobj_attribute timeslice_duration_def = +__ATTR(timeslice_duration_ms, 0444, timeslice_default, NULL); + static ssize_t stop_store(struct kobject *kobj, struct kobj_attribute *attr, const char *buf, size_t count) @@ -272,6 +294,17 @@ stop_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) static struct kobj_attribute stop_timeout_attr = __ATTR(stop_timeout_ms, 0644, stop_show, stop_store); +static ssize_t +stop_default(struct kobject *kobj, struct kobj_attribute *attr, char *buf) +{ + struct intel_engine_cs *engine = kobj_to_engine(kobj); + + return sprintf(buf, "%lu\n", engine->defaults.stop_timeout_ms); +} + +static struct kobj_attribute stop_timeout_def = +__ATTR(stop_timeout_ms, 0444, stop_default, NULL); + static ssize_t preempt_timeout_store(struct kobject *kobj, struct kobj_attribute *attr, const char *buf, size_t count) @@ -316,6 +349,18 @@ preempt_timeout_show(struct kobject *kobj, struct kobj_attribute *attr, static struct kobj_attribute preempt_timeout_attr = __ATTR(preempt_timeout_ms, 0644, preempt_timeout_show, preempt_timeout_store); +static ssize_t +preempt_timeout_default(struct kobject *kobj, struct kobj_attribute *attr, + char *buf) +{ + struct intel_engine_cs *engine = kobj_to_engine(kobj); + + return sprintf(buf, "%lu\n", engine->defaults.preempt_timeout_ms); +} + +static struct kobj_attribute preempt_timeout_def = +__ATTR(preempt_timeout_ms, 0444, preempt_timeout_default, NULL); + static ssize_t heartbeat_store(struct kobject *kobj, struct kobj_attribute *attr, const char *buf, size_t count) @@ -359,6 +404,17 @@ heartbeat_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) static struct kobj_attribute heartbeat_interval_attr = __ATTR(heartbeat_interval_ms, 0644, heartbeat_show, heartbeat_store); +static ssize_t +heartbeat_default(struct kobject *kobj, struct kobj_attribute *attr, char *buf) +{ + struct intel_engine_cs *engine = kobj_to_engine(kobj); + + return sprintf(buf, "%lu\n", engine->defaults.heartbeat_interval_ms); +} + +static struct kobj_attribute heartbeat_interval_def = +__ATTR(heartbeat_interval_ms, 0444, heartbeat_default, NULL); + static void kobj_engine_release(struct kobject *kobj) { kfree(kobj); @@ -390,6 +446,42 @@ kobj_engine(struct kobject *dir, struct intel_engine_cs *engine) return &ke->base; } +static void add_defaults(struct kobj_engine *parent) +{ + static const struct attribute *files[] = { + &max_spin_def.attr, + &stop_timeout_def.attr, +#if CONFIG_DRM_I915_HEARTBEAT_INTERVAL + &heartbeat_interval_def.attr, +#endif + NULL + }; + struct kobj_engine *ke; + + ke = kzalloc(sizeof(*ke), GFP_KERNEL); + if (!ke) + return; + + kobject_init(&ke->base, &kobj_engine_type); + ke->engine = parent->engine; + + if (kobject_add(&ke->base, &parent->base, "%s", ".defaults")) { + kobject_put(&ke->base); + return; + } + + if (sysfs_create_files(&ke->base, files)) + return; + + if (intel_engine_has_timeslices(ke->engine) && + sysfs_create_file(&ke->base, ×lice_duration_def.attr)) + return; + + if (intel_engine_has_preempt_reset(ke->engine) && + sysfs_create_file(&ke->base, &preempt_timeout_def.attr)) + return; +} + void intel_engines_add_sysfs(struct drm_i915_private *i915) { static const struct attribute *files[] = { @@ -433,6 +525,8 @@ void intel_engines_add_sysfs(struct drm_i915_private *i915) sysfs_create_file(kobj, &preempt_timeout_attr.attr)) goto err_engine; + add_defaults(container_of(kobj, struct kobj_engine, base)); + if (0) { err_object: kobject_put(kobj); From ed610f43606efd390ecd334d725beb5f2cf53104 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 13 May 2020 08:48:01 +0100 Subject: [PATCH 74/97] drm/i915/selftests: Always call the provided engine->emit_init_breadcrumb While this does not appear to fix any issues, the backend itself knows when it wants to emit a breadcrumb, so let it make the final call. Signed-off-by: Chris Wilson Reviewed-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20200513074809.18194-16-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/selftests/i915_perf.c | 3 +-- drivers/gpu/drm/i915/selftests/igt_spinner.c | 3 +-- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/selftests/i915_perf.c b/drivers/gpu/drm/i915/selftests/i915_perf.c index 5608fab98d5d..ca0c9dbab713 100644 --- a/drivers/gpu/drm/i915/selftests/i915_perf.c +++ b/drivers/gpu/drm/i915/selftests/i915_perf.c @@ -221,8 +221,7 @@ static int live_noa_delay(void *arg) goto out; } - if (rq->engine->emit_init_breadcrumb && - i915_request_timeline(rq)->has_initial_breadcrumb) { + if (rq->engine->emit_init_breadcrumb) { err = rq->engine->emit_init_breadcrumb(rq); if (err) { i915_request_add(rq); diff --git a/drivers/gpu/drm/i915/selftests/igt_spinner.c b/drivers/gpu/drm/i915/selftests/igt_spinner.c index 9ad4ab088466..e35ba5f9e73f 100644 --- a/drivers/gpu/drm/i915/selftests/igt_spinner.c +++ b/drivers/gpu/drm/i915/selftests/igt_spinner.c @@ -169,8 +169,7 @@ igt_spinner_create_request(struct igt_spinner *spin, intel_gt_chipset_flush(engine->gt); - if (engine->emit_init_breadcrumb && - i915_request_timeline(rq)->has_initial_breadcrumb) { + if (engine->emit_init_breadcrumb) { err = engine->emit_init_breadcrumb(rq); if (err) goto cancel_rq; From 0f4013fb28ec00986f3934bc6c7d0c2a0eec695c Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 13 May 2020 08:47:46 +0100 Subject: [PATCH 75/97] drm/i915/gt: Transfer old virtual breadcrumbs to irq_worker The second try at staging the transfer of the breadcrumb. In part one, we realised we could not simply move to the second engine as we were only holding the breadcrumb lock on the first. So in commit 6c81e21a4742 ("drm/i915/gt: Stage the transfer of the virtual breadcrumb"), we removed it from the first engine and marked up this request to reattach the signaling on the new engine. However, this failed to take into account that we only attach the breadcrumb if the new request is added at the start of the queue, which if we are transferring, it is because we know there to be a request to be signaled (and hence we would not be attached). In this attempt, we try to transfer the completed requests to the irq_worker on its rq->engine->breadcrumbs. This preserves the coupling between the rq and its breadcrumbs, so that i915_request_cancel_breadcrumb() does not attempt to manipulate the list under the wrong lock. v2: Code sharing is fun. Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/1862 Fixes: 6c81e21a4742 ("drm/i915/gt: Stage the transfer of the virtual breadcrumb") Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20200513074809.18194-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gt/intel_breadcrumbs.c | 52 ++++++++++++++++---- drivers/gpu/drm/i915/gt/intel_engine.h | 3 ++ drivers/gpu/drm/i915/gt/intel_engine_types.h | 2 + drivers/gpu/drm/i915/gt/intel_lrc.c | 34 ++++--------- 4 files changed, 57 insertions(+), 34 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c index cbedba857d43..d907d538176e 100644 --- a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c +++ b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c @@ -142,6 +142,18 @@ static void add_retire(struct intel_breadcrumbs *b, struct intel_timeline *tl) intel_engine_add_retire(engine, tl); } +static void __signal_request(struct i915_request *rq, struct list_head *signals) +{ + GEM_BUG_ON(!test_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags)); + clear_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags); + + if (!__dma_fence_signal(&rq->fence)) + return; + + i915_request_get(rq); + list_add_tail(&rq->signal_link, signals); +} + static void signal_irq_work(struct irq_work *work) { struct intel_breadcrumbs *b = container_of(work, typeof(*b), irq_work); @@ -155,6 +167,8 @@ static void signal_irq_work(struct irq_work *work) if (b->irq_armed && list_empty(&b->signalers)) __intel_breadcrumbs_disarm_irq(b); + list_splice_init(&b->signaled_requests, &signal); + list_for_each_entry_safe(ce, cn, &b->signalers, signal_link) { GEM_BUG_ON(list_empty(&ce->signals)); @@ -163,24 +177,15 @@ static void signal_irq_work(struct irq_work *work) list_entry(pos, typeof(*rq), signal_link); GEM_BUG_ON(!check_signal_order(ce, rq)); - if (!__request_completed(rq)) break; - GEM_BUG_ON(!test_bit(I915_FENCE_FLAG_SIGNAL, - &rq->fence.flags)); - clear_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags); - - if (!__dma_fence_signal(&rq->fence)) - continue; - /* * Queue for execution after dropping the signaling * spinlock as the callback chain may end up adding * more signalers to the same context or engine. */ - i915_request_get(rq); - list_add_tail(&rq->signal_link, &signal); + __signal_request(rq, &signal); } /* @@ -255,6 +260,7 @@ void intel_engine_init_breadcrumbs(struct intel_engine_cs *engine) spin_lock_init(&b->irq_lock); INIT_LIST_HEAD(&b->signalers); + INIT_LIST_HEAD(&b->signaled_requests); init_irq_work(&b->irq_work, signal_irq_work); } @@ -274,6 +280,32 @@ void intel_engine_reset_breadcrumbs(struct intel_engine_cs *engine) spin_unlock_irqrestore(&b->irq_lock, flags); } +void intel_engine_transfer_stale_breadcrumbs(struct intel_engine_cs *engine, + struct intel_context *ce) +{ + struct intel_breadcrumbs *b = &engine->breadcrumbs; + unsigned long flags; + + spin_lock_irqsave(&b->irq_lock, flags); + if (!list_empty(&ce->signals)) { + struct i915_request *rq, *next; + + /* Queue for executing the signal callbacks in the irq_work */ + list_for_each_entry_safe(rq, next, &ce->signals, signal_link) { + GEM_BUG_ON(rq->engine != engine); + GEM_BUG_ON(!__request_completed(rq)); + + __signal_request(rq, &b->signaled_requests); + } + + INIT_LIST_HEAD(&ce->signals); + list_del_init(&ce->signal_link); + + irq_work_queue(&b->irq_work); + } + spin_unlock_irqrestore(&b->irq_lock, flags); +} + void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine) { } diff --git a/drivers/gpu/drm/i915/gt/intel_engine.h b/drivers/gpu/drm/i915/gt/intel_engine.h index cb789c8bf06b..9bf6d4989968 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine.h +++ b/drivers/gpu/drm/i915/gt/intel_engine.h @@ -238,6 +238,9 @@ intel_engine_signal_breadcrumbs(struct intel_engine_cs *engine) void intel_engine_reset_breadcrumbs(struct intel_engine_cs *engine); void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine); +void intel_engine_transfer_stale_breadcrumbs(struct intel_engine_cs *engine, + struct intel_context *ce); + void intel_engine_print_breadcrumbs(struct intel_engine_cs *engine, struct drm_printer *p); diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h index c39db7e249e2..2b6cdf47d428 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h @@ -377,6 +377,8 @@ struct intel_engine_cs { spinlock_t irq_lock; struct list_head signalers; + struct list_head signaled_requests; + struct irq_work irq_work; /* for use from inside irq_lock */ unsigned int irq_enabled; diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index 32fdb7cb3ae8..87e6c5bdd2dc 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -1812,30 +1812,16 @@ static bool virtual_matches(const struct virtual_engine *ve, return true; } -static void virtual_xfer_breadcrumbs(struct virtual_engine *ve, - struct i915_request *rq) +static void virtual_xfer_breadcrumbs(struct virtual_engine *ve) { - struct intel_engine_cs *old = ve->siblings[0]; - - /* All unattached (rq->engine == old) must already be completed */ - - spin_lock(&old->breadcrumbs.irq_lock); - if (!list_empty(&ve->context.signal_link)) { - list_del_init(&ve->context.signal_link); - - /* - * We cannot acquire the new engine->breadcrumbs.irq_lock - * (as we are holding a breadcrumbs.irq_lock already), - * so attach this request to the signaler on submission. - * The queued irq_work will occur when we finally drop - * the engine->active.lock after dequeue. - */ - set_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &rq->fence.flags); - - /* Also transfer the pending irq_work for the old breadcrumb. */ - intel_engine_signal_breadcrumbs(rq->engine); - } - spin_unlock(&old->breadcrumbs.irq_lock); + /* + * All the outstanding signals on ve->siblings[0] must have + * been completed, just pending the interrupt handler. As those + * signals still refer to the old sibling (via rq->engine), we must + * transfer those to the old irq_worker to keep our locking + * consistent. + */ + intel_engine_transfer_stale_breadcrumbs(ve->siblings[0], &ve->context); } #define for_each_waiter(p__, rq__) \ @@ -2270,7 +2256,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine) engine); if (!list_empty(&ve->context.signals)) - virtual_xfer_breadcrumbs(ve, rq); + virtual_xfer_breadcrumbs(ve); /* * Move the bound engine to the top of the list From f45ce9336ff0640e491c642a84ea02f21daac3a4 Mon Sep 17 00:00:00 2001 From: Gwan-gyeong Mun Date: Thu, 14 May 2020 09:07:19 +0300 Subject: [PATCH 76/97] video/hdmi: Add Unpack only function for DRM infoframe It adds an unpack only function for DRM infoframe for dynamic range and mastering infoframe readout. It unpacks the information data block contained in the binary buffer into a structured frame of the HDMI Dynamic Range and Mastering (DRM) information frame. In contrast to hdmi_drm_infoframe_unpack() function, it does not verify a checksum. It can be used for unpacking a DP HDR Metadata Infoframe SDP case. DP HDR Metadata Infoframe SDP uses the same Dynamic Range and Mastering (DRM) information (CTA-861-G spec.) such as HDMI DRM infoframe. But DP SDP header and payload structure are different from HDMI DRM Infoframe. Therefore unpacking DRM infoframe for DP requires skipping of a verifying checksum. v9: Add clear comments to hdmi_drm_infoframe_unpack_only() and hdmi_drm_infoframe_unpack() (Laurent Pinchart) Signed-off-by: Gwan-gyeong Mun Reviewed-by: Uma Shankar Cc: Laurent Pinchart Cc: Ville Syrjala Acked-by: Daniel Vetter Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20200514060732.3378396-2-gwan-gyeong.mun@intel.com --- drivers/video/hdmi.c | 65 +++++++++++++++++++++++++++++++------------- include/linux/hdmi.h | 2 ++ 2 files changed, 48 insertions(+), 19 deletions(-) diff --git a/drivers/video/hdmi.c b/drivers/video/hdmi.c index 856a8c4e84a2..e70792b3e367 100644 --- a/drivers/video/hdmi.c +++ b/drivers/video/hdmi.c @@ -1768,20 +1768,21 @@ hdmi_vendor_any_infoframe_unpack(union hdmi_vendor_any_infoframe *frame, } /** - * hdmi_drm_infoframe_unpack() - unpack binary buffer to a HDMI DRM infoframe + * hdmi_drm_infoframe_unpack_only() - unpack binary buffer of CTA-861-G DRM + * infoframe DataBytes to a HDMI DRM + * infoframe * @frame: HDMI DRM infoframe * @buffer: source buffer * @size: size of buffer * - * Unpacks the information contained in binary @buffer into a structured - * @frame of the HDMI Dynamic Range and Mastering (DRM) information frame. - * Also verifies the checksum as required by section 5.3.5 of the HDMI 1.4 - * specification. + * Unpacks CTA-861-G DRM infoframe DataBytes contained in the binary @buffer + * into a structured @frame of the HDMI Dynamic Range and Mastering (DRM) + * infoframe. * * Returns 0 on success or a negative error code on failure. */ -static int hdmi_drm_infoframe_unpack(struct hdmi_drm_infoframe *frame, - const void *buffer, size_t size) +int hdmi_drm_infoframe_unpack_only(struct hdmi_drm_infoframe *frame, + const void *buffer, size_t size) { const u8 *ptr = buffer; const u8 *temp; @@ -1790,23 +1791,13 @@ static int hdmi_drm_infoframe_unpack(struct hdmi_drm_infoframe *frame, int ret; int i; - if (size < HDMI_INFOFRAME_SIZE(DRM)) - return -EINVAL; - - if (ptr[0] != HDMI_INFOFRAME_TYPE_DRM || - ptr[1] != 1 || - ptr[2] != HDMI_DRM_INFOFRAME_SIZE) - return -EINVAL; - - if (hdmi_infoframe_checksum(buffer, HDMI_INFOFRAME_SIZE(DRM)) != 0) + if (size < HDMI_DRM_INFOFRAME_SIZE) return -EINVAL; ret = hdmi_drm_infoframe_init(frame); if (ret) return ret; - ptr += HDMI_INFOFRAME_HEADER_SIZE; - frame->eotf = ptr[0] & 0x7; frame->metadata_type = ptr[1] & 0x7; @@ -1814,7 +1805,7 @@ static int hdmi_drm_infoframe_unpack(struct hdmi_drm_infoframe *frame, for (i = 0; i < 3; i++) { x_lsb = *temp++; x_msb = *temp++; - frame->display_primaries[i].x = (x_msb << 8) | x_lsb; + frame->display_primaries[i].x = (x_msb << 8) | x_lsb; y_lsb = *temp++; y_msb = *temp++; frame->display_primaries[i].y = (y_msb << 8) | y_lsb; @@ -1830,6 +1821,42 @@ static int hdmi_drm_infoframe_unpack(struct hdmi_drm_infoframe *frame, return 0; } +EXPORT_SYMBOL(hdmi_drm_infoframe_unpack_only); + +/** + * hdmi_drm_infoframe_unpack() - unpack binary buffer to a HDMI DRM infoframe + * @frame: HDMI DRM infoframe + * @buffer: source buffer + * @size: size of buffer + * + * Unpacks the CTA-861-G DRM infoframe contained in the binary @buffer into + * a structured @frame of the HDMI Dynamic Range and Mastering (DRM) + * infoframe. It also verifies the checksum as required by section 5.3.5 of + * the HDMI 1.4 specification. + * + * Returns 0 on success or a negative error code on failure. + */ +static int hdmi_drm_infoframe_unpack(struct hdmi_drm_infoframe *frame, + const void *buffer, size_t size) +{ + const u8 *ptr = buffer; + int ret; + + if (size < HDMI_INFOFRAME_SIZE(DRM)) + return -EINVAL; + + if (ptr[0] != HDMI_INFOFRAME_TYPE_DRM || + ptr[1] != 1 || + ptr[2] != HDMI_DRM_INFOFRAME_SIZE) + return -EINVAL; + + if (hdmi_infoframe_checksum(buffer, HDMI_INFOFRAME_SIZE(DRM)) != 0) + return -EINVAL; + + ret = hdmi_drm_infoframe_unpack_only(frame, ptr + HDMI_INFOFRAME_HEADER_SIZE, + size - HDMI_INFOFRAME_HEADER_SIZE); + return ret; +} /** * hdmi_infoframe_unpack() - unpack binary buffer to a HDMI infoframe diff --git a/include/linux/hdmi.h b/include/linux/hdmi.h index 9613d796cfb1..50c31f1a0a2d 100644 --- a/include/linux/hdmi.h +++ b/include/linux/hdmi.h @@ -219,6 +219,8 @@ ssize_t hdmi_drm_infoframe_pack(struct hdmi_drm_infoframe *frame, void *buffer, ssize_t hdmi_drm_infoframe_pack_only(const struct hdmi_drm_infoframe *frame, void *buffer, size_t size); int hdmi_drm_infoframe_check(struct hdmi_drm_infoframe *frame); +int hdmi_drm_infoframe_unpack_only(struct hdmi_drm_infoframe *frame, + const void *buffer, size_t size); enum hdmi_spd_sdi { HDMI_SPD_SDI_UNKNOWN, From 1b404b7dbb100cd5d94b95bf86d8d92c31108b49 Mon Sep 17 00:00:00 2001 From: Gwan-gyeong Mun Date: Thu, 14 May 2020 09:07:20 +0300 Subject: [PATCH 77/97] drm/i915/dp: Read out DP SDPs It adds code to read the DP SDPs from the video DIP and unpack them into the crtc state. It adds routines that read out DP VSC SDP and DP HDR Metadata Infoframe SDP In order to unpack DP VSC SDP, it adds intel_dp_vsc_sdp_unpack() function. It follows DP 1.4a spec. [Table 2-116: VSC SDP Header Bytes] and [Table 2-117: VSC SDP Payload for DB16 through DB18] In order to unpack DP HDR Metadata Infoframe SDP, it adds intel_dp_hdr_metadata_infoframe_sdp_unpack(). And it follows DP 1.4a spec. ([Table 2-125: INFOFRAME SDP v1.2 Header Bytes] and [Table 2-126: INFOFRAME SDP v1.2 Payload Data Bytes - DB0 through DB31]) and CTA-861-G spec. [Table-42 Dynamic Range and Mastering InfoFrame]. A naming rule and style of intel_read_dp_sdp() function references intel_read_infoframe() function of intel_hdmi.c v2: Minor style fix v3: Replace a structure name to drm_dp_vsc_sdp from intel_dp_vsc_sdp v4: Use struct drm_device logging macros v5: Addressed review comments from Uma - Polish commit message and comments - Combine the if checks of sdp.HB2 and sdp.HB3 - Add 6bpc to unpacking of VSC SDP Signed-off-by: Gwan-gyeong Mun Reviewed-by: Uma Shankar Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20200514060732.3378396-3-gwan-gyeong.mun@intel.com --- drivers/gpu/drm/i915/display/intel_dp.c | 187 ++++++++++++++++++++++++ drivers/gpu/drm/i915/display/intel_dp.h | 3 + 2 files changed, 190 insertions(+) diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c index 67723dede1d1..84c0105be61b 100644 --- a/drivers/gpu/drm/i915/display/intel_dp.c +++ b/drivers/gpu/drm/i915/display/intel_dp.c @@ -4971,6 +4971,193 @@ void intel_dp_set_infoframes(struct intel_encoder *encoder, intel_write_dp_sdp(encoder, crtc_state, HDMI_PACKET_TYPE_GAMUT_METADATA); } +static int intel_dp_vsc_sdp_unpack(struct drm_dp_vsc_sdp *vsc, + const void *buffer, size_t size) +{ + const struct dp_sdp *sdp = buffer; + + if (size < sizeof(struct dp_sdp)) + return -EINVAL; + + memset(vsc, 0, size); + + if (sdp->sdp_header.HB0 != 0) + return -EINVAL; + + if (sdp->sdp_header.HB1 != DP_SDP_VSC) + return -EINVAL; + + vsc->sdp_type = sdp->sdp_header.HB1; + vsc->revision = sdp->sdp_header.HB2; + vsc->length = sdp->sdp_header.HB3; + + if ((sdp->sdp_header.HB2 == 0x2 && sdp->sdp_header.HB3 == 0x8) || + (sdp->sdp_header.HB2 == 0x4 && sdp->sdp_header.HB3 == 0xe)) { + /* + * - HB2 = 0x2, HB3 = 0x8 + * VSC SDP supporting 3D stereo + PSR + * - HB2 = 0x4, HB3 = 0xe + * VSC SDP supporting 3D stereo + PSR2 with Y-coordinate of + * first scan line of the SU region (applies to eDP v1.4b + * and higher). + */ + return 0; + } else if (sdp->sdp_header.HB2 == 0x5 && sdp->sdp_header.HB3 == 0x13) { + /* + * - HB2 = 0x5, HB3 = 0x13 + * VSC SDP supporting 3D stereo + PSR2 + Pixel Encoding/Colorimetry + * Format. + */ + vsc->pixelformat = (sdp->db[16] >> 4) & 0xf; + vsc->colorimetry = sdp->db[16] & 0xf; + vsc->dynamic_range = (sdp->db[17] >> 7) & 0x1; + + switch (sdp->db[17] & 0x7) { + case 0x0: + vsc->bpc = 6; + break; + case 0x1: + vsc->bpc = 8; + break; + case 0x2: + vsc->bpc = 10; + break; + case 0x3: + vsc->bpc = 12; + break; + case 0x4: + vsc->bpc = 16; + break; + default: + MISSING_CASE(sdp->db[17] & 0x7); + return -EINVAL; + } + + vsc->content_type = sdp->db[18] & 0x7; + } else { + return -EINVAL; + } + + return 0; +} + +static int +intel_dp_hdr_metadata_infoframe_sdp_unpack(struct hdmi_drm_infoframe *drm_infoframe, + const void *buffer, size_t size) +{ + int ret; + + const struct dp_sdp *sdp = buffer; + + if (size < sizeof(struct dp_sdp)) + return -EINVAL; + + if (sdp->sdp_header.HB0 != 0) + return -EINVAL; + + if (sdp->sdp_header.HB1 != HDMI_INFOFRAME_TYPE_DRM) + return -EINVAL; + + /* + * Least Significant Eight Bits of (Data Byte Count – 1) + * 1Dh (i.e., Data Byte Count = 30 bytes). + */ + if (sdp->sdp_header.HB2 != 0x1D) + return -EINVAL; + + /* Most Significant Two Bits of (Data Byte Count – 1), Clear to 00b. */ + if ((sdp->sdp_header.HB3 & 0x3) != 0) + return -EINVAL; + + /* INFOFRAME SDP Version Number */ + if (((sdp->sdp_header.HB3 >> 2) & 0x3f) != 0x13) + return -EINVAL; + + /* CTA Header Byte 2 (INFOFRAME Version Number) */ + if (sdp->db[0] != 1) + return -EINVAL; + + /* CTA Header Byte 3 (Length of INFOFRAME): HDMI_DRM_INFOFRAME_SIZE */ + if (sdp->db[1] != HDMI_DRM_INFOFRAME_SIZE) + return -EINVAL; + + ret = hdmi_drm_infoframe_unpack_only(drm_infoframe, &sdp->db[2], + HDMI_DRM_INFOFRAME_SIZE); + + return ret; +} + +static void intel_read_dp_vsc_sdp(struct intel_encoder *encoder, + struct intel_crtc_state *crtc_state, + struct drm_dp_vsc_sdp *vsc) +{ + struct intel_digital_port *intel_dig_port = enc_to_dig_port(encoder); + struct intel_dp *intel_dp = enc_to_intel_dp(encoder); + struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); + unsigned int type = DP_SDP_VSC; + struct dp_sdp sdp = {}; + int ret; + + /* When PSR is enabled, VSC SDP is handled by PSR routine */ + if (intel_psr_enabled(intel_dp)) + return; + + if ((crtc_state->infoframes.enable & + intel_hdmi_infoframe_enable(type)) == 0) + return; + + intel_dig_port->read_infoframe(encoder, crtc_state, type, &sdp, sizeof(sdp)); + + ret = intel_dp_vsc_sdp_unpack(vsc, &sdp, sizeof(sdp)); + + if (ret) + drm_dbg_kms(&dev_priv->drm, "Failed to unpack DP VSC SDP\n"); +} + +static void intel_read_dp_hdr_metadata_infoframe_sdp(struct intel_encoder *encoder, + struct intel_crtc_state *crtc_state, + struct hdmi_drm_infoframe *drm_infoframe) +{ + struct intel_digital_port *intel_dig_port = enc_to_dig_port(encoder); + struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); + unsigned int type = HDMI_PACKET_TYPE_GAMUT_METADATA; + struct dp_sdp sdp = {}; + int ret; + + if ((crtc_state->infoframes.enable & + intel_hdmi_infoframe_enable(type)) == 0) + return; + + intel_dig_port->read_infoframe(encoder, crtc_state, type, &sdp, + sizeof(sdp)); + + ret = intel_dp_hdr_metadata_infoframe_sdp_unpack(drm_infoframe, &sdp, + sizeof(sdp)); + + if (ret) + drm_dbg_kms(&dev_priv->drm, + "Failed to unpack DP HDR Metadata Infoframe SDP\n"); +} + +void intel_read_dp_sdp(struct intel_encoder *encoder, + struct intel_crtc_state *crtc_state, + unsigned int type) +{ + switch (type) { + case DP_SDP_VSC: + intel_read_dp_vsc_sdp(encoder, crtc_state, + &crtc_state->infoframes.vsc); + break; + case HDMI_PACKET_TYPE_GAMUT_METADATA: + intel_read_dp_hdr_metadata_infoframe_sdp(encoder, crtc_state, + &crtc_state->infoframes.drm.drm); + break; + default: + MISSING_CASE(type); + break; + } +} + static void intel_dp_setup_vsc_sdp(struct intel_dp *intel_dp, const struct intel_crtc_state *crtc_state, diff --git a/drivers/gpu/drm/i915/display/intel_dp.h b/drivers/gpu/drm/i915/display/intel_dp.h index 6659ce15a693..faa2a3c5ee4e 100644 --- a/drivers/gpu/drm/i915/display/intel_dp.h +++ b/drivers/gpu/drm/i915/display/intel_dp.h @@ -117,6 +117,9 @@ void intel_dp_hdr_metadata_enable(struct intel_dp *intel_dp, void intel_dp_set_infoframes(struct intel_encoder *encoder, bool enable, const struct intel_crtc_state *crtc_state, const struct drm_connector_state *conn_state); +void intel_read_dp_sdp(struct intel_encoder *encoder, + struct intel_crtc_state *crtc_state, + unsigned int type); bool intel_digital_port_connected(struct intel_encoder *encoder); void intel_dp_process_phy_request(struct intel_dp *intel_dp); From 2ba6221cca7e25bd05a416b0e64afb6a5b28dc9b Mon Sep 17 00:00:00 2001 From: Gwan-gyeong Mun Date: Thu, 14 May 2020 09:07:21 +0300 Subject: [PATCH 78/97] drm: Add logging function for DP VSC SDP When receiving video it is very useful to be able to log DP VSC SDP. This greatly simplifies debugging. v2: Minor style fix v3: Move logging functions to drm core [Jani N] v5: Rebased v10: Rebased Signed-off-by: Gwan-gyeong Mun Reviewed-by: Uma Shankar Acked-by: Daniel Vetter Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20200514060732.3378396-4-gwan-gyeong.mun@intel.com --- drivers/gpu/drm/drm_dp_helper.c | 174 ++++++++++++++++++++++++++++++++ include/drm/drm_dp_helper.h | 3 + 2 files changed, 177 insertions(+) diff --git a/drivers/gpu/drm/drm_dp_helper.c b/drivers/gpu/drm/drm_dp_helper.c index 612a59ec8116..43e57632b00a 100644 --- a/drivers/gpu/drm/drm_dp_helper.c +++ b/drivers/gpu/drm/drm_dp_helper.c @@ -1629,3 +1629,177 @@ int drm_dp_set_phy_test_pattern(struct drm_dp_aux *aux, return 0; } EXPORT_SYMBOL(drm_dp_set_phy_test_pattern); + +static const char *dp_pixelformat_get_name(enum dp_pixelformat pixelformat) +{ + if (pixelformat < 0 || pixelformat > DP_PIXELFORMAT_RESERVED) + return "Invalid"; + + switch (pixelformat) { + case DP_PIXELFORMAT_RGB: + return "RGB"; + case DP_PIXELFORMAT_YUV444: + return "YUV444"; + case DP_PIXELFORMAT_YUV422: + return "YUV422"; + case DP_PIXELFORMAT_YUV420: + return "YUV420"; + case DP_PIXELFORMAT_Y_ONLY: + return "Y_ONLY"; + case DP_PIXELFORMAT_RAW: + return "RAW"; + default: + return "Reserved"; + } +} + +static const char *dp_colorimetry_get_name(enum dp_pixelformat pixelformat, + enum dp_colorimetry colorimetry) +{ + if (pixelformat < 0 || pixelformat > DP_PIXELFORMAT_RESERVED) + return "Invalid"; + + switch (colorimetry) { + case DP_COLORIMETRY_DEFAULT: + switch (pixelformat) { + case DP_PIXELFORMAT_RGB: + return "sRGB"; + case DP_PIXELFORMAT_YUV444: + case DP_PIXELFORMAT_YUV422: + case DP_PIXELFORMAT_YUV420: + return "BT.601"; + case DP_PIXELFORMAT_Y_ONLY: + return "DICOM PS3.14"; + case DP_PIXELFORMAT_RAW: + return "Custom Color Profile"; + default: + return "Reserved"; + } + case DP_COLORIMETRY_RGB_WIDE_FIXED: /* and DP_COLORIMETRY_BT709_YCC */ + switch (pixelformat) { + case DP_PIXELFORMAT_RGB: + return "Wide Fixed"; + case DP_PIXELFORMAT_YUV444: + case DP_PIXELFORMAT_YUV422: + case DP_PIXELFORMAT_YUV420: + return "BT.709"; + default: + return "Reserved"; + } + case DP_COLORIMETRY_RGB_WIDE_FLOAT: /* and DP_COLORIMETRY_XVYCC_601 */ + switch (pixelformat) { + case DP_PIXELFORMAT_RGB: + return "Wide Float"; + case DP_PIXELFORMAT_YUV444: + case DP_PIXELFORMAT_YUV422: + case DP_PIXELFORMAT_YUV420: + return "xvYCC 601"; + default: + return "Reserved"; + } + case DP_COLORIMETRY_OPRGB: /* and DP_COLORIMETRY_XVYCC_709 */ + switch (pixelformat) { + case DP_PIXELFORMAT_RGB: + return "OpRGB"; + case DP_PIXELFORMAT_YUV444: + case DP_PIXELFORMAT_YUV422: + case DP_PIXELFORMAT_YUV420: + return "xvYCC 709"; + default: + return "Reserved"; + } + case DP_COLORIMETRY_DCI_P3_RGB: /* and DP_COLORIMETRY_SYCC_601 */ + switch (pixelformat) { + case DP_PIXELFORMAT_RGB: + return "DCI-P3"; + case DP_PIXELFORMAT_YUV444: + case DP_PIXELFORMAT_YUV422: + case DP_PIXELFORMAT_YUV420: + return "sYCC 601"; + default: + return "Reserved"; + } + case DP_COLORIMETRY_RGB_CUSTOM: /* and DP_COLORIMETRY_OPYCC_601 */ + switch (pixelformat) { + case DP_PIXELFORMAT_RGB: + return "Custom Profile"; + case DP_PIXELFORMAT_YUV444: + case DP_PIXELFORMAT_YUV422: + case DP_PIXELFORMAT_YUV420: + return "OpYCC 601"; + default: + return "Reserved"; + } + case DP_COLORIMETRY_BT2020_RGB: /* and DP_COLORIMETRY_BT2020_CYCC */ + switch (pixelformat) { + case DP_PIXELFORMAT_RGB: + return "BT.2020 RGB"; + case DP_PIXELFORMAT_YUV444: + case DP_PIXELFORMAT_YUV422: + case DP_PIXELFORMAT_YUV420: + return "BT.2020 CYCC"; + default: + return "Reserved"; + } + case DP_COLORIMETRY_BT2020_YCC: + switch (pixelformat) { + case DP_PIXELFORMAT_YUV444: + case DP_PIXELFORMAT_YUV422: + case DP_PIXELFORMAT_YUV420: + return "BT.2020 YCC"; + default: + return "Reserved"; + } + default: + return "Invalid"; + } +} + +static const char *dp_dynamic_range_get_name(enum dp_dynamic_range dynamic_range) +{ + switch (dynamic_range) { + case DP_DYNAMIC_RANGE_VESA: + return "VESA range"; + case DP_DYNAMIC_RANGE_CTA: + return "CTA range"; + default: + return "Invalid"; + } +} + +static const char *dp_content_type_get_name(enum dp_content_type content_type) +{ + switch (content_type) { + case DP_CONTENT_TYPE_NOT_DEFINED: + return "Not defined"; + case DP_CONTENT_TYPE_GRAPHICS: + return "Graphics"; + case DP_CONTENT_TYPE_PHOTO: + return "Photo"; + case DP_CONTENT_TYPE_VIDEO: + return "Video"; + case DP_CONTENT_TYPE_GAME: + return "Game"; + default: + return "Reserved"; + } +} + +void drm_dp_vsc_sdp_log(const char *level, struct device *dev, + const struct drm_dp_vsc_sdp *vsc) +{ +#define DP_SDP_LOG(fmt, ...) dev_printk(level, dev, fmt, ##__VA_ARGS__) + DP_SDP_LOG("DP SDP: %s, revision %u, length %u\n", "VSC", + vsc->revision, vsc->length); + DP_SDP_LOG(" pixelformat: %s\n", + dp_pixelformat_get_name(vsc->pixelformat)); + DP_SDP_LOG(" colorimetry: %s\n", + dp_colorimetry_get_name(vsc->pixelformat, vsc->colorimetry)); + DP_SDP_LOG(" bpc: %u\n", vsc->bpc); + DP_SDP_LOG(" dynamic range: %s\n", + dp_dynamic_range_get_name(vsc->dynamic_range)); + DP_SDP_LOG(" content type: %s\n", + dp_content_type_get_name(vsc->content_type)); +#undef DP_SDP_LOG +} +EXPORT_SYMBOL(drm_dp_vsc_sdp_log); diff --git a/include/drm/drm_dp_helper.h b/include/drm/drm_dp_helper.h index 3beb2aac8c4c..cd44509772cb 100644 --- a/include/drm/drm_dp_helper.h +++ b/include/drm/drm_dp_helper.h @@ -1348,6 +1348,9 @@ struct drm_dp_vsc_sdp { enum dp_content_type content_type; }; +void drm_dp_vsc_sdp_log(const char *level, struct device *dev, + const struct drm_dp_vsc_sdp *vsc); + int drm_dp_psr_setup_time(const u8 psr_cap[EDP_PSR_RECEIVER_CAP_SIZE]); static inline int From bfbeba29b9bc65c2fbe3b130d5ce0d096f487eb4 Mon Sep 17 00:00:00 2001 From: Gwan-gyeong Mun Date: Thu, 14 May 2020 09:07:22 +0300 Subject: [PATCH 79/97] drm/i915: Include HDMI DRM infoframe in the crtc state dump Dump out the HDMI Dynamic Range and Mastering (DRM) infoframe in the normal crtc state dump. Signed-off-by: Gwan-gyeong Mun Reviewed-by: Uma Shankar Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20200514060732.3378396-5-gwan-gyeong.mun@intel.com --- drivers/gpu/drm/i915/display/intel_display.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index fb41f36c0256..b532b988935a 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -13024,6 +13024,9 @@ static void intel_dump_pipe_config(const struct intel_crtc_state *pipe_config, if (pipe_config->infoframes.enable & intel_hdmi_infoframe_enable(HDMI_INFOFRAME_TYPE_VENDOR)) intel_dump_infoframe(dev_priv, &pipe_config->infoframes.hdmi); + if (pipe_config->infoframes.enable & + intel_hdmi_infoframe_enable(HDMI_INFOFRAME_TYPE_DRM)) + intel_dump_infoframe(dev_priv, &pipe_config->infoframes.drm); drm_dbg_kms(&dev_priv->drm, "requested mode:\n"); drm_mode_debug_printmodeline(&pipe_config->hw.mode); From e274fb32ffc8508408c3a4d65931d9fdd08f2c89 Mon Sep 17 00:00:00 2001 From: Gwan-gyeong Mun Date: Thu, 14 May 2020 09:07:23 +0300 Subject: [PATCH 80/97] drm/i915: Include DP HDR Metadata Infoframe SDP in the crtc state dump Dump out the DP HDR Metadata Infoframe SDP in the normal crtc state dump. HDMI Dynamic Range and Mastering (DRM) infoframe and DP HDR Metadata Infoframe SDP use the same member variable in infoframes of crtc state. Signed-off-by: Gwan-gyeong Mun Reviewed-by: Uma Shankar Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20200514060732.3378396-6-gwan-gyeong.mun@intel.com --- drivers/gpu/drm/i915/display/intel_display.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index b532b988935a..ef35d808c209 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -13027,6 +13027,9 @@ static void intel_dump_pipe_config(const struct intel_crtc_state *pipe_config, if (pipe_config->infoframes.enable & intel_hdmi_infoframe_enable(HDMI_INFOFRAME_TYPE_DRM)) intel_dump_infoframe(dev_priv, &pipe_config->infoframes.drm); + if (pipe_config->infoframes.enable & + intel_hdmi_infoframe_enable(HDMI_PACKET_TYPE_GAMUT_METADATA)) + intel_dump_infoframe(dev_priv, &pipe_config->infoframes.drm); drm_dbg_kms(&dev_priv->drm, "requested mode:\n"); drm_mode_debug_printmodeline(&pipe_config->hw.mode); From 42890250d0ec79ff60b5084c93ff63fcdeffc5d0 Mon Sep 17 00:00:00 2001 From: Gwan-gyeong Mun Date: Thu, 14 May 2020 09:07:24 +0300 Subject: [PATCH 81/97] drm/i915: Include DP VSC SDP in the crtc state dump Dump out the DP VSC SDP in the normal crtc state dump v3: Replace a structure name to drm_dp_vsc_sdp from intel_dp_vsc_sdp Use drm core's DP VSC SDP logging function Signed-off-by: Gwan-gyeong Mun Reviewed-by: Uma Shankar Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20200514060732.3378396-7-gwan-gyeong.mun@intel.com --- drivers/gpu/drm/i915/display/intel_display.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index ef35d808c209..a9c3125423f8 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -12867,6 +12867,16 @@ intel_dump_infoframe(struct drm_i915_private *dev_priv, hdmi_infoframe_log(KERN_DEBUG, dev_priv->drm.dev, frame); } +static void +intel_dump_dp_vsc_sdp(struct drm_i915_private *dev_priv, + const struct drm_dp_vsc_sdp *vsc) +{ + if (!drm_debug_enabled(DRM_UT_KMS)) + return; + + drm_dp_vsc_sdp_log(KERN_DEBUG, dev_priv->drm.dev, vsc); +} + #define OUTPUT_TYPE(x) [INTEL_OUTPUT_ ## x] = #x static const char * const output_type_str[] = { @@ -13030,6 +13040,9 @@ static void intel_dump_pipe_config(const struct intel_crtc_state *pipe_config, if (pipe_config->infoframes.enable & intel_hdmi_infoframe_enable(HDMI_PACKET_TYPE_GAMUT_METADATA)) intel_dump_infoframe(dev_priv, &pipe_config->infoframes.drm); + if (pipe_config->infoframes.enable & + intel_hdmi_infoframe_enable(DP_SDP_VSC)) + intel_dump_dp_vsc_sdp(dev_priv, &pipe_config->infoframes.vsc); drm_dbg_kms(&dev_priv->drm, "requested mode:\n"); drm_mode_debug_printmodeline(&pipe_config->hw.mode); From 1bf3657c03ddede977d3664792e78b4ed99452be Mon Sep 17 00:00:00 2001 From: Gwan-gyeong Mun Date: Thu, 14 May 2020 09:07:25 +0300 Subject: [PATCH 82/97] drm/i915: Program DP SDPs with computed configs In order to use computed config for DP SDPs (DP VSC SDP and DP HDR Metadata Infoframe SDP), it replaces intel_dp_vsc_enable() function and intel_dp_hdr_metadata_enable() function to intel_dp_set_infoframes() function. And it removes unused functions. Before: intel_dp_vsc_enable() and intel_dp_hdr_metadata_enable() compute sdp configs and program sdp registers on enable callback of encoder. After: It separates computing of sdp configs and programming of sdp register. The compute config callback of encoder calls computing sdp configs. The enable callback of encoder calls programming sdp register. v3: Rebased v5: Polish commit message [Uma] v10: Rebased Signed-off-by: Gwan-gyeong Mun Reviewed-by: Uma Shankar Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20200514060732.3378396-8-gwan-gyeong.mun@intel.com --- drivers/gpu/drm/i915/display/intel_ddi.c | 3 +- drivers/gpu/drm/i915/display/intel_dp.c | 229 ----------------------- drivers/gpu/drm/i915/display/intel_dp.h | 6 - 3 files changed, 1 insertion(+), 237 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_ddi.c b/drivers/gpu/drm/i915/display/intel_ddi.c index 798889f72495..75c7ffb1874c 100644 --- a/drivers/gpu/drm/i915/display/intel_ddi.c +++ b/drivers/gpu/drm/i915/display/intel_ddi.c @@ -3681,8 +3681,7 @@ static void intel_enable_ddi_dp(struct intel_atomic_state *state, intel_edp_backlight_on(crtc_state, conn_state); intel_psr_enable(intel_dp, crtc_state); - intel_dp_vsc_enable(intel_dp, crtc_state, conn_state); - intel_dp_hdr_metadata_enable(intel_dp, crtc_state, conn_state); + intel_dp_set_infoframes(encoder, true, crtc_state, conn_state); intel_edp_drrs_enable(intel_dp, crtc_state); if (crtc_state->has_audio) diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c index 84c0105be61b..4ccd6b7a3d83 100644 --- a/drivers/gpu/drm/i915/display/intel_dp.c +++ b/drivers/gpu/drm/i915/display/intel_dp.c @@ -5158,235 +5158,6 @@ void intel_read_dp_sdp(struct intel_encoder *encoder, } } -static void -intel_dp_setup_vsc_sdp(struct intel_dp *intel_dp, - const struct intel_crtc_state *crtc_state, - const struct drm_connector_state *conn_state) -{ - struct intel_digital_port *intel_dig_port = dp_to_dig_port(intel_dp); - struct dp_sdp vsc_sdp = {}; - - /* Prepare VSC Header for SU as per DP 1.4a spec, Table 2-119 */ - vsc_sdp.sdp_header.HB0 = 0; - vsc_sdp.sdp_header.HB1 = 0x7; - - /* - * VSC SDP supporting 3D stereo, PSR2, and Pixel Encoding/ - * Colorimetry Format indication. - */ - vsc_sdp.sdp_header.HB2 = 0x5; - - /* - * VSC SDP supporting 3D stereo, + PSR2, + Pixel Encoding/ - * Colorimetry Format indication (HB2 = 05h). - */ - vsc_sdp.sdp_header.HB3 = 0x13; - - /* DP 1.4a spec, Table 2-120 */ - switch (crtc_state->output_format) { - case INTEL_OUTPUT_FORMAT_YCBCR444: - vsc_sdp.db[16] = 0x1 << 4; /* YCbCr 444 : DB16[7:4] = 1h */ - break; - case INTEL_OUTPUT_FORMAT_YCBCR420: - vsc_sdp.db[16] = 0x3 << 4; /* YCbCr 420 : DB16[7:4] = 3h */ - break; - case INTEL_OUTPUT_FORMAT_RGB: - default: - /* RGB: DB16[7:4] = 0h */ - break; - } - - switch (conn_state->colorspace) { - case DRM_MODE_COLORIMETRY_BT709_YCC: - vsc_sdp.db[16] |= 0x1; - break; - case DRM_MODE_COLORIMETRY_XVYCC_601: - vsc_sdp.db[16] |= 0x2; - break; - case DRM_MODE_COLORIMETRY_XVYCC_709: - vsc_sdp.db[16] |= 0x3; - break; - case DRM_MODE_COLORIMETRY_SYCC_601: - vsc_sdp.db[16] |= 0x4; - break; - case DRM_MODE_COLORIMETRY_OPYCC_601: - vsc_sdp.db[16] |= 0x5; - break; - case DRM_MODE_COLORIMETRY_BT2020_CYCC: - case DRM_MODE_COLORIMETRY_BT2020_RGB: - vsc_sdp.db[16] |= 0x6; - break; - case DRM_MODE_COLORIMETRY_BT2020_YCC: - vsc_sdp.db[16] |= 0x7; - break; - case DRM_MODE_COLORIMETRY_DCI_P3_RGB_D65: - case DRM_MODE_COLORIMETRY_DCI_P3_RGB_THEATER: - vsc_sdp.db[16] |= 0x4; /* DCI-P3 (SMPTE RP 431-2) */ - break; - default: - /* sRGB (IEC 61966-2-1) / ITU-R BT.601: DB16[0:3] = 0h */ - - /* RGB->YCBCR color conversion uses the BT.709 color space. */ - if (crtc_state->output_format == INTEL_OUTPUT_FORMAT_YCBCR420) - vsc_sdp.db[16] |= 0x1; /* 0x1, ITU-R BT.709 */ - break; - } - - /* - * For pixel encoding formats YCbCr444, YCbCr422, YCbCr420, and Y Only, - * the following Component Bit Depth values are defined: - * 001b = 8bpc. - * 010b = 10bpc. - * 011b = 12bpc. - * 100b = 16bpc. - */ - switch (crtc_state->pipe_bpp) { - case 24: /* 8bpc */ - vsc_sdp.db[17] = 0x1; - break; - case 30: /* 10bpc */ - vsc_sdp.db[17] = 0x2; - break; - case 36: /* 12bpc */ - vsc_sdp.db[17] = 0x3; - break; - case 48: /* 16bpc */ - vsc_sdp.db[17] = 0x4; - break; - default: - MISSING_CASE(crtc_state->pipe_bpp); - break; - } - - /* - * Dynamic Range (Bit 7) - * 0 = VESA range, 1 = CTA range. - * all YCbCr are always limited range - */ - vsc_sdp.db[17] |= 0x80; - - /* - * Content Type (Bits 2:0) - * 000b = Not defined. - * 001b = Graphics. - * 010b = Photo. - * 011b = Video. - * 100b = Game - * All other values are RESERVED. - * Note: See CTA-861-G for the definition and expected - * processing by a stream sink for the above contect types. - */ - vsc_sdp.db[18] = 0; - - intel_dig_port->write_infoframe(&intel_dig_port->base, - crtc_state, DP_SDP_VSC, &vsc_sdp, sizeof(vsc_sdp)); -} - -static void -intel_dp_setup_hdr_metadata_infoframe_sdp(struct intel_dp *intel_dp, - const struct intel_crtc_state *crtc_state, - const struct drm_connector_state *conn_state) -{ - struct drm_i915_private *i915 = dp_to_i915(intel_dp); - struct intel_digital_port *intel_dig_port = dp_to_dig_port(intel_dp); - struct dp_sdp infoframe_sdp = {}; - struct hdmi_drm_infoframe drm_infoframe = {}; - const int infoframe_size = HDMI_INFOFRAME_HEADER_SIZE + HDMI_DRM_INFOFRAME_SIZE; - unsigned char buf[HDMI_INFOFRAME_HEADER_SIZE + HDMI_DRM_INFOFRAME_SIZE]; - ssize_t len; - int ret; - - ret = drm_hdmi_infoframe_set_hdr_metadata(&drm_infoframe, conn_state); - if (ret) { - drm_dbg_kms(&i915->drm, - "couldn't set HDR metadata in infoframe\n"); - return; - } - - len = hdmi_drm_infoframe_pack_only(&drm_infoframe, buf, sizeof(buf)); - if (len < 0) { - drm_dbg_kms(&i915->drm, - "buffer size is smaller than hdr metadata infoframe\n"); - return; - } - - if (len != infoframe_size) { - drm_dbg_kms(&i915->drm, "wrong static hdr metadata size\n"); - return; - } - - /* - * Set up the infoframe sdp packet for HDR static metadata. - * Prepare VSC Header for SU as per DP 1.4a spec, - * Table 2-100 and Table 2-101 - */ - - /* Packet ID, 00h for non-Audio INFOFRAME */ - infoframe_sdp.sdp_header.HB0 = 0; - /* - * Packet Type 80h + Non-audio INFOFRAME Type value - * HDMI_INFOFRAME_TYPE_DRM: 0x87, - */ - infoframe_sdp.sdp_header.HB1 = drm_infoframe.type; - /* - * Least Significant Eight Bits of (Data Byte Count – 1) - * infoframe_size - 1, - */ - infoframe_sdp.sdp_header.HB2 = 0x1D; - /* INFOFRAME SDP Version Number */ - infoframe_sdp.sdp_header.HB3 = (0x13 << 2); - /* CTA Header Byte 2 (INFOFRAME Version Number) */ - infoframe_sdp.db[0] = drm_infoframe.version; - /* CTA Header Byte 3 (Length of INFOFRAME): HDMI_DRM_INFOFRAME_SIZE */ - infoframe_sdp.db[1] = drm_infoframe.length; - /* - * Copy HDMI_DRM_INFOFRAME_SIZE size from a buffer after - * HDMI_INFOFRAME_HEADER_SIZE - */ - BUILD_BUG_ON(sizeof(infoframe_sdp.db) < HDMI_DRM_INFOFRAME_SIZE + 2); - memcpy(&infoframe_sdp.db[2], &buf[HDMI_INFOFRAME_HEADER_SIZE], - HDMI_DRM_INFOFRAME_SIZE); - - /* - * Size of DP infoframe sdp packet for HDR static metadata is consist of - * - DP SDP Header(struct dp_sdp_header): 4 bytes - * - Two Data Blocks: 2 bytes - * CTA Header Byte2 (INFOFRAME Version Number) - * CTA Header Byte3 (Length of INFOFRAME) - * - HDMI_DRM_INFOFRAME_SIZE: 26 bytes - * - * Prior to GEN11's GMP register size is identical to DP HDR static metadata - * infoframe size. But GEN11+ has larger than that size, write_infoframe - * will pad rest of the size. - */ - intel_dig_port->write_infoframe(&intel_dig_port->base, crtc_state, - HDMI_PACKET_TYPE_GAMUT_METADATA, - &infoframe_sdp, - sizeof(struct dp_sdp_header) + 2 + HDMI_DRM_INFOFRAME_SIZE); -} - -void intel_dp_vsc_enable(struct intel_dp *intel_dp, - const struct intel_crtc_state *crtc_state, - const struct drm_connector_state *conn_state) -{ - if (!intel_dp_needs_vsc_sdp(crtc_state, conn_state)) - return; - - intel_dp_setup_vsc_sdp(intel_dp, crtc_state, conn_state); -} - -void intel_dp_hdr_metadata_enable(struct intel_dp *intel_dp, - const struct intel_crtc_state *crtc_state, - const struct drm_connector_state *conn_state) -{ - if (!conn_state->hdr_output_metadata) - return; - - intel_dp_setup_hdr_metadata_infoframe_sdp(intel_dp, - crtc_state, - conn_state); -} - static u8 intel_dp_autotest_link_training(struct intel_dp *intel_dp) { struct drm_i915_private *i915 = dp_to_i915(intel_dp); diff --git a/drivers/gpu/drm/i915/display/intel_dp.h b/drivers/gpu/drm/i915/display/intel_dp.h index faa2a3c5ee4e..0d2de15703c3 100644 --- a/drivers/gpu/drm/i915/display/intel_dp.h +++ b/drivers/gpu/drm/i915/display/intel_dp.h @@ -108,12 +108,6 @@ int intel_dp_link_required(int pixel_clock, int bpp); int intel_dp_max_data_rate(int max_link_clock, int max_lanes); bool intel_dp_needs_vsc_sdp(const struct intel_crtc_state *crtc_state, const struct drm_connector_state *conn_state); -void intel_dp_vsc_enable(struct intel_dp *intel_dp, - const struct intel_crtc_state *crtc_state, - const struct drm_connector_state *conn_state); -void intel_dp_hdr_metadata_enable(struct intel_dp *intel_dp, - const struct intel_crtc_state *crtc_state, - const struct drm_connector_state *conn_state); void intel_dp_set_infoframes(struct intel_encoder *encoder, bool enable, const struct intel_crtc_state *crtc_state, const struct drm_connector_state *conn_state); From dee66f3e071b394de16da18e2807f371b789b1be Mon Sep 17 00:00:00 2001 From: Gwan-gyeong Mun Date: Thu, 14 May 2020 09:07:26 +0300 Subject: [PATCH 83/97] drm/i915: Add state readout for DP HDR Metadata Infoframe SDP Added state readout for DP HDR Metadata Infoframe SDP. v9: Rebased v10: Rebased Signed-off-by: Gwan-gyeong Mun Reviewed-by: Uma Shankar Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20200514060732.3378396-9-gwan-gyeong.mun@intel.com --- drivers/gpu/drm/i915/display/intel_ddi.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/gpu/drm/i915/display/intel_ddi.c b/drivers/gpu/drm/i915/display/intel_ddi.c index 75c7ffb1874c..f2bb7527ba0c 100644 --- a/drivers/gpu/drm/i915/display/intel_ddi.c +++ b/drivers/gpu/drm/i915/display/intel_ddi.c @@ -4234,6 +4234,9 @@ void intel_ddi_get_config(struct intel_encoder *encoder, pipe_config->fec_enable); } + pipe_config->infoframes.enable |= + intel_hdmi_infoframes_enabled(encoder, pipe_config); + break; case TRANS_DDI_MODE_SELECT_DP_MST: pipe_config->output_types |= BIT(INTEL_OUTPUT_DP_MST); @@ -4245,6 +4248,9 @@ void intel_ddi_get_config(struct intel_encoder *encoder, REG_FIELD_GET(TRANS_DDI_MST_TRANSPORT_SELECT_MASK, temp); intel_dp_get_m_n(intel_crtc, pipe_config); + + pipe_config->infoframes.enable |= + intel_hdmi_infoframes_enabled(encoder, pipe_config); break; default: break; @@ -4299,6 +4305,8 @@ void intel_ddi_get_config(struct intel_encoder *encoder, if (INTEL_GEN(dev_priv) >= 8) bdw_get_trans_port_sync_config(pipe_config); + + intel_read_dp_sdp(encoder, pipe_config, HDMI_PACKET_TYPE_GAMUT_METADATA); } static enum intel_output_type From 2c3928e4d87e37c5d2f778345e955b86c5cec9df Mon Sep 17 00:00:00 2001 From: Gwan-gyeong Mun Date: Thu, 14 May 2020 09:07:27 +0300 Subject: [PATCH 84/97] drm/i915: Add state readout for DP VSC SDP Added state readout for DP VSC SDP and enabled state validation for DP VSC SDP. v2: Minor style fix v3: Replace a structure name to drm_dp_vsc_sdp from intel_dp_vsc_sdp v4: Use struct drm_device logging macros v10: Skip checking of VSC SDP when a crtc config has psr. Signed-off-by: Gwan-gyeong Mun Reviewed-by: Uma Shankar Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20200514060732.3378396-10-gwan-gyeong.mun@intel.com --- drivers/gpu/drm/i915/display/intel_ddi.c | 1 + drivers/gpu/drm/i915/display/intel_display.c | 44 ++++++++++++++++++++ 2 files changed, 45 insertions(+) diff --git a/drivers/gpu/drm/i915/display/intel_ddi.c b/drivers/gpu/drm/i915/display/intel_ddi.c index f2bb7527ba0c..2b54ea47e822 100644 --- a/drivers/gpu/drm/i915/display/intel_ddi.c +++ b/drivers/gpu/drm/i915/display/intel_ddi.c @@ -4307,6 +4307,7 @@ void intel_ddi_get_config(struct intel_encoder *encoder, bdw_get_trans_port_sync_config(pipe_config); intel_read_dp_sdp(encoder, pipe_config, HDMI_PACKET_TYPE_GAMUT_METADATA); + intel_read_dp_sdp(encoder, pipe_config, DP_SDP_VSC); } static enum intel_output_type diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index a9c3125423f8..4747d96ec66d 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -13490,6 +13490,13 @@ intel_compare_infoframe(const union hdmi_infoframe *a, return memcmp(a, b, sizeof(*a)) == 0; } +static bool +intel_compare_dp_vsc_sdp(const struct drm_dp_vsc_sdp *a, + const struct drm_dp_vsc_sdp *b) +{ + return memcmp(a, b, sizeof(*a)) == 0; +} + static void pipe_config_infoframe_mismatch(struct drm_i915_private *dev_priv, bool fastset, const char *name, @@ -13515,6 +13522,31 @@ pipe_config_infoframe_mismatch(struct drm_i915_private *dev_priv, } } +static void +pipe_config_dp_vsc_sdp_mismatch(struct drm_i915_private *dev_priv, + bool fastset, const char *name, + const struct drm_dp_vsc_sdp *a, + const struct drm_dp_vsc_sdp *b) +{ + if (fastset) { + if (!drm_debug_enabled(DRM_UT_KMS)) + return; + + drm_dbg_kms(&dev_priv->drm, + "fastset mismatch in %s dp sdp\n", name); + drm_dbg_kms(&dev_priv->drm, "expected:\n"); + drm_dp_vsc_sdp_log(KERN_DEBUG, dev_priv->drm.dev, a); + drm_dbg_kms(&dev_priv->drm, "found:\n"); + drm_dp_vsc_sdp_log(KERN_DEBUG, dev_priv->drm.dev, b); + } else { + drm_err(&dev_priv->drm, "mismatch in %s dp sdp\n", name); + drm_err(&dev_priv->drm, "expected:\n"); + drm_dp_vsc_sdp_log(KERN_ERR, dev_priv->drm.dev, a); + drm_err(&dev_priv->drm, "found:\n"); + drm_dp_vsc_sdp_log(KERN_ERR, dev_priv->drm.dev, b); + } +} + static void __printf(4, 5) pipe_config_mismatch(bool fastset, const struct intel_crtc *crtc, const char *name, const char *format, ...) @@ -13716,6 +13748,17 @@ intel_pipe_config_compare(const struct intel_crtc_state *current_config, } \ } while (0) +#define PIPE_CONF_CHECK_DP_VSC_SDP(name) do { \ + if (!current_config->has_psr && !pipe_config->has_psr && \ + !intel_compare_dp_vsc_sdp(¤t_config->infoframes.name, \ + &pipe_config->infoframes.name)) { \ + pipe_config_dp_vsc_sdp_mismatch(dev_priv, fastset, __stringify(name), \ + ¤t_config->infoframes.name, \ + &pipe_config->infoframes.name); \ + ret = false; \ + } \ +} while (0) + #define PIPE_CONF_CHECK_COLOR_LUT(name1, name2, bit_precision) do { \ if (current_config->name1 != pipe_config->name1) { \ pipe_config_mismatch(fastset, crtc, __stringify(name1), \ @@ -13893,6 +13936,7 @@ intel_pipe_config_compare(const struct intel_crtc_state *current_config, PIPE_CONF_CHECK_INFOFRAME(spd); PIPE_CONF_CHECK_INFOFRAME(hdmi); PIPE_CONF_CHECK_INFOFRAME(drm); + PIPE_CONF_CHECK_DP_VSC_SDP(vsc); PIPE_CONF_CHECK_X(sync_mode_slaves_mask); PIPE_CONF_CHECK_I(master_transcoder); From ce58867ee17afecda7917e74a0d10afd7138c6d4 Mon Sep 17 00:00:00 2001 From: Gwan-gyeong Mun Date: Thu, 14 May 2020 09:07:28 +0300 Subject: [PATCH 85/97] drm/i915: Fix enabled infoframe states of lspcon MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Compared to implementation of DP and HDMI's encoder->infoframes_enabled, the lspcon's implementation returns its active state. (we expect enabled infoframe states of HW.) It leads to pipe state mismatch error when ddi_get_config is called. Because the current implementation of lspcon is not ready to support readout infoframes, we need to return 0 here. In order to support readout to lspcon, we need to implement read_infoframe and infoframes_enabled. And set_infoframes also have to set an appropriate bit on crtc_state->infoframes.enable Cc: Ville Syrjälä Signed-off-by: Gwan-gyeong Mun Reviewed-by: Uma Shankar Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20200514060732.3378396-11-gwan-gyeong.mun@intel.com --- drivers/gpu/drm/i915/display/intel_lspcon.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/display/intel_lspcon.c b/drivers/gpu/drm/i915/display/intel_lspcon.c index d807c5648c87..6ff7b226f0a1 100644 --- a/drivers/gpu/drm/i915/display/intel_lspcon.c +++ b/drivers/gpu/drm/i915/display/intel_lspcon.c @@ -522,7 +522,7 @@ u32 lspcon_infoframes_enabled(struct intel_encoder *encoder, const struct intel_crtc_state *pipe_config) { /* FIXME actually read this from the hw */ - return enc_to_intel_lspcon(encoder)->active; + return 0; } void lspcon_resume(struct intel_lspcon *lspcon) From 76d45d0665d234d4bc7249160094cae5550d919f Mon Sep 17 00:00:00 2001 From: Gwan-gyeong Mun Date: Thu, 14 May 2020 09:07:29 +0300 Subject: [PATCH 86/97] drm/i915: Program DP SDPs on pipe updates Call intel_dp_set_infoframes() function on pipe updates to make sure that we send VSC SDP and HDR Metadata Infoframe SDP (when applicable) on fastsets. Signed-off-by: Gwan-gyeong Mun Reviewed-by: Uma Shankar Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20200514060732.3378396-12-gwan-gyeong.mun@intel.com --- drivers/gpu/drm/i915/display/intel_ddi.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/i915/display/intel_ddi.c b/drivers/gpu/drm/i915/display/intel_ddi.c index 2b54ea47e822..e3f5db787343 100644 --- a/drivers/gpu/drm/i915/display/intel_ddi.c +++ b/drivers/gpu/drm/i915/display/intel_ddi.c @@ -3864,6 +3864,7 @@ static void intel_ddi_update_pipe_dp(struct intel_atomic_state *state, intel_ddi_set_dp_msa(crtc_state, conn_state); intel_psr_update(intel_dp, crtc_state); + intel_dp_set_infoframes(encoder, true, crtc_state, conn_state); intel_edp_drrs_enable(intel_dp, crtc_state); intel_panel_update_backlight(state, encoder, crtc_state, conn_state); From fa37a213275c6f80b8bc15189e28faeac948ba4c Mon Sep 17 00:00:00 2001 From: Gwan-gyeong Mun Date: Thu, 14 May 2020 09:07:30 +0300 Subject: [PATCH 87/97] drm/i915: Stop sending DP SDPs on ddi disable Call intel_dp_set_infoframes(false) function on intel_ddi_post_disable_dp() to make sure not to send VSC SDP and HDR Metadata Infoframe SDP. v5: Polish commit message [Uma] Signed-off-by: Gwan-gyeong Mun Reviewed-by: Uma Shankar Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20200514060732.3378396-13-gwan-gyeong.mun@intel.com --- drivers/gpu/drm/i915/display/intel_ddi.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/i915/display/intel_ddi.c b/drivers/gpu/drm/i915/display/intel_ddi.c index e3f5db787343..a5eb8b89946b 100644 --- a/drivers/gpu/drm/i915/display/intel_ddi.c +++ b/drivers/gpu/drm/i915/display/intel_ddi.c @@ -3472,6 +3472,8 @@ static void intel_ddi_post_disable_dp(struct intel_atomic_state *state, INTEL_OUTPUT_DP_MST); enum phy phy = intel_port_to_phy(dev_priv, encoder->port); + intel_dp_set_infoframes(encoder, false, old_crtc_state, old_conn_state); + /* * Power down sink before disabling the port, otherwise we end * up getting interrupts from the sink on detecting link loss. From cafac5a983619944afa639c53f0d5d885616a3d2 Mon Sep 17 00:00:00 2001 From: Gwan-gyeong Mun Date: Thu, 14 May 2020 09:07:31 +0300 Subject: [PATCH 88/97] drm/i915/dp: Add compute routine for DP PSR VSC SDP In order to use a common VSC SDP Colorimetry calculating code on PSR, it adds a compute routine for PSR VSC SDP. As PSR routine can not use infoframes.vsc of crtc state, it also adds new writing of DP SDPs (Secondary Data Packet) for PSR. PSR routine has its own scenario and timings of writing a VSC SDP. v3: Replace a structure name to drm_dp_vsc_sdp from intel_dp_vsc_sdp v4: Use struct drm_device logging macros v10: 1) Fix packing of VSC SDP where Pixel Encoding/Colorimetry Format is not supported. 2) Change a checking of PSR state. Signed-off-by: Gwan-gyeong Mun Reviewed-by: Uma Shankar Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20200514060732.3378396-14-gwan-gyeong.mun@intel.com --- drivers/gpu/drm/i915/display/intel_dp.c | 66 ++++++++++++++++++++++++- drivers/gpu/drm/i915/display/intel_dp.h | 8 +++ 2 files changed, 72 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c index 4ccd6b7a3d83..408c3c1c5e81 100644 --- a/drivers/gpu/drm/i915/display/intel_dp.c +++ b/drivers/gpu/drm/i915/display/intel_dp.c @@ -2487,8 +2487,8 @@ static void intel_dp_compute_vsc_sdp(struct intel_dp *intel_dp, { struct drm_dp_vsc_sdp *vsc = &crtc_state->infoframes.vsc; - /* When PSR is enabled, VSC SDP is handled by PSR routine */ - if (intel_psr_enabled(intel_dp)) + /* When a crtc state has PSR, VSC SDP will be handled by PSR routine */ + if (crtc_state->has_psr) return; if (!intel_dp_needs_vsc_sdp(crtc_state, conn_state)) @@ -2500,6 +2500,42 @@ static void intel_dp_compute_vsc_sdp(struct intel_dp *intel_dp, &crtc_state->infoframes.vsc); } +void intel_dp_compute_psr_vsc_sdp(struct intel_dp *intel_dp, + const struct intel_crtc_state *crtc_state, + const struct drm_connector_state *conn_state, + struct drm_dp_vsc_sdp *vsc) +{ + struct drm_i915_private *dev_priv = dp_to_i915(intel_dp); + + vsc->sdp_type = DP_SDP_VSC; + + if (dev_priv->psr.psr2_enabled) { + if (dev_priv->psr.colorimetry_support && + intel_dp_needs_vsc_sdp(crtc_state, conn_state)) { + /* [PSR2, +Colorimetry] */ + intel_dp_compute_vsc_colorimetry(crtc_state, conn_state, + vsc); + } else { + /* + * [PSR2, -Colorimetry] + * Prepare VSC Header for SU as per eDP 1.4 spec, Table 6-11 + * 3D stereo + PSR/PSR2 + Y-coordinate. + */ + vsc->revision = 0x4; + vsc->length = 0xe; + } + } else { + /* + * [PSR1] + * Prepare VSC Header for SU as per DP 1.4 spec, Table 2-118 + * VSC SDP supporting 3D stereo + PSR (applies to eDP v1.3 or + * higher). + */ + vsc->revision = 0x2; + vsc->length = 0x8; + } +} + static void intel_dp_compute_hdr_metadata_infoframe_sdp(struct intel_dp *intel_dp, struct intel_crtc_state *crtc_state, @@ -4791,6 +4827,13 @@ static ssize_t intel_dp_vsc_sdp_pack(const struct drm_dp_vsc_sdp *vsc, sdp->sdp_header.HB2 = vsc->revision; /* Revision Number */ sdp->sdp_header.HB3 = vsc->length; /* Number of Valid Data Bytes */ + /* + * Only revision 0x5 supports Pixel Encoding/Colorimetry Format as + * per DP 1.4a spec. + */ + if (vsc->revision != 0x5) + goto out; + /* VSC SDP Payload for DB16 through DB18 */ /* Pixel Encoding and Colorimetry Formats */ sdp->db[16] = (vsc->pixelformat & 0xf) << 4; /* DB16[7:4] */ @@ -4823,6 +4866,7 @@ static ssize_t intel_dp_vsc_sdp_pack(const struct drm_dp_vsc_sdp *vsc, /* Content Type */ sdp->db[18] = vsc->content_type & 0x7; +out: return length; } @@ -4935,6 +4979,24 @@ static void intel_write_dp_sdp(struct intel_encoder *encoder, intel_dig_port->write_infoframe(encoder, crtc_state, type, &sdp, len); } +void intel_write_dp_vsc_sdp(struct intel_encoder *encoder, + const struct intel_crtc_state *crtc_state, + struct drm_dp_vsc_sdp *vsc) +{ + struct intel_digital_port *intel_dig_port = enc_to_dig_port(encoder); + struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); + struct dp_sdp sdp = {}; + ssize_t len; + + len = intel_dp_vsc_sdp_pack(vsc, &sdp, sizeof(sdp)); + + if (drm_WARN_ON(&dev_priv->drm, len < 0)) + return; + + intel_dig_port->write_infoframe(encoder, crtc_state, DP_SDP_VSC, + &sdp, len); +} + void intel_dp_set_infoframes(struct intel_encoder *encoder, bool enable, const struct intel_crtc_state *crtc_state, diff --git a/drivers/gpu/drm/i915/display/intel_dp.h b/drivers/gpu/drm/i915/display/intel_dp.h index 0d2de15703c3..1702959ca079 100644 --- a/drivers/gpu/drm/i915/display/intel_dp.h +++ b/drivers/gpu/drm/i915/display/intel_dp.h @@ -16,6 +16,7 @@ struct drm_connector_state; struct drm_encoder; struct drm_i915_private; struct drm_modeset_acquire_ctx; +struct drm_dp_vsc_sdp; struct intel_connector; struct intel_crtc_state; struct intel_digital_port; @@ -108,6 +109,13 @@ int intel_dp_link_required(int pixel_clock, int bpp); int intel_dp_max_data_rate(int max_link_clock, int max_lanes); bool intel_dp_needs_vsc_sdp(const struct intel_crtc_state *crtc_state, const struct drm_connector_state *conn_state); +void intel_dp_compute_psr_vsc_sdp(struct intel_dp *intel_dp, + const struct intel_crtc_state *crtc_state, + const struct drm_connector_state *conn_state, + struct drm_dp_vsc_sdp *vsc); +void intel_write_dp_vsc_sdp(struct intel_encoder *encoder, + const struct intel_crtc_state *crtc_state, + struct drm_dp_vsc_sdp *vsc); void intel_dp_set_infoframes(struct intel_encoder *encoder, bool enable, const struct intel_crtc_state *crtc_state, const struct drm_connector_state *conn_state); From 7a00e68b431716d9258cbf4dc72c307ac39e8733 Mon Sep 17 00:00:00 2001 From: Gwan-gyeong Mun Date: Thu, 14 May 2020 09:07:32 +0300 Subject: [PATCH 89/97] drm/i915/psr: Use new DP VSC SDP compute routine on PSR In order to use a common VSC SDP Colorimetry calculating code on PSR, it uses a new psr vsc sdp compute routine. Because PSR routine has its own scenario and timings of writing a VSC SDP, the current PSR routine needs to have its own drm_dp_vsc_sdp structure member variable on struct i915_psr. In order to calculate colorimetry information, intel_psr_update() function and intel_psr_enable() function extend a drm_connector_state argument. There are no changes to PSR mechanism. v3: Replace a structure name to drm_dp_vsc_sdp from intel_dp_vsc_sdp v4: Rebased v8: Rebased v10: When a PSR is enabled, it needs to add DP_SDP_VSC to infoframes.enable. It is needed for comparing between HW and pipe_state of VSC_SDP. v11: If PSR is disabled by flag, it don't enable psr on pipe compute. v12: Fix an inconsistent indenting Signed-off-by: Gwan-gyeong Mun Reviewed-by: Uma Shankar Reported-by: kbuild test robot Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20200514060732.3378396-15-gwan-gyeong.mun@intel.com --- drivers/gpu/drm/i915/display/intel_ddi.c | 4 +- drivers/gpu/drm/i915/display/intel_psr.c | 58 ++++++++---------------- drivers/gpu/drm/i915/display/intel_psr.h | 6 ++- drivers/gpu/drm/i915/i915_drv.h | 1 + 4 files changed, 26 insertions(+), 43 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_ddi.c b/drivers/gpu/drm/i915/display/intel_ddi.c index a5eb8b89946b..aa22465bb56e 100644 --- a/drivers/gpu/drm/i915/display/intel_ddi.c +++ b/drivers/gpu/drm/i915/display/intel_ddi.c @@ -3682,7 +3682,7 @@ static void intel_enable_ddi_dp(struct intel_atomic_state *state, intel_dp_stop_link_train(intel_dp); intel_edp_backlight_on(crtc_state, conn_state); - intel_psr_enable(intel_dp, crtc_state); + intel_psr_enable(intel_dp, crtc_state, conn_state); intel_dp_set_infoframes(encoder, true, crtc_state, conn_state); intel_edp_drrs_enable(intel_dp, crtc_state); @@ -3865,7 +3865,7 @@ static void intel_ddi_update_pipe_dp(struct intel_atomic_state *state, intel_ddi_set_dp_msa(crtc_state, conn_state); - intel_psr_update(intel_dp, crtc_state); + intel_psr_update(intel_dp, crtc_state, conn_state); intel_dp_set_infoframes(encoder, true, crtc_state, conn_state); intel_edp_drrs_enable(intel_dp, crtc_state); diff --git a/drivers/gpu/drm/i915/display/intel_psr.c b/drivers/gpu/drm/i915/display/intel_psr.c index a0569fdfeb16..b7a2c102648a 100644 --- a/drivers/gpu/drm/i915/display/intel_psr.c +++ b/drivers/gpu/drm/i915/display/intel_psr.c @@ -30,6 +30,7 @@ #include "intel_display_types.h" #include "intel_psr.h" #include "intel_sprite.h" +#include "intel_hdmi.h" /** * DOC: Panel Self Refresh (PSR/SRD) @@ -357,39 +358,6 @@ void intel_psr_init_dpcd(struct intel_dp *intel_dp) } } -static void intel_psr_setup_vsc(struct intel_dp *intel_dp, - const struct intel_crtc_state *crtc_state) -{ - struct intel_digital_port *intel_dig_port = dp_to_dig_port(intel_dp); - struct drm_i915_private *dev_priv = dp_to_i915(intel_dp); - struct dp_sdp psr_vsc; - - if (dev_priv->psr.psr2_enabled) { - /* Prepare VSC Header for SU as per EDP 1.4 spec, Table 6.11 */ - memset(&psr_vsc, 0, sizeof(psr_vsc)); - psr_vsc.sdp_header.HB0 = 0; - psr_vsc.sdp_header.HB1 = 0x7; - if (dev_priv->psr.colorimetry_support) { - psr_vsc.sdp_header.HB2 = 0x5; - psr_vsc.sdp_header.HB3 = 0x13; - } else { - psr_vsc.sdp_header.HB2 = 0x4; - psr_vsc.sdp_header.HB3 = 0xe; - } - } else { - /* Prepare VSC packet as per EDP 1.3 spec, Table 3.10 */ - memset(&psr_vsc, 0, sizeof(psr_vsc)); - psr_vsc.sdp_header.HB0 = 0; - psr_vsc.sdp_header.HB1 = 0x7; - psr_vsc.sdp_header.HB2 = 0x2; - psr_vsc.sdp_header.HB3 = 0x8; - } - - intel_dig_port->write_infoframe(&intel_dig_port->base, - crtc_state, - DP_SDP_VSC, &psr_vsc, sizeof(psr_vsc)); -} - static void hsw_psr_setup_aux(struct intel_dp *intel_dp) { struct drm_i915_private *dev_priv = dp_to_i915(intel_dp); @@ -756,6 +724,8 @@ void intel_psr_compute_config(struct intel_dp *intel_dp, if (intel_dp != dev_priv->psr.dp) return; + if (!psr_global_enabled(dev_priv)) + return; /* * HSW spec explicitly says PSR is tied to port A. * BDW+ platforms have a instance of PSR registers per transcoder but @@ -798,6 +768,7 @@ void intel_psr_compute_config(struct intel_dp *intel_dp, crtc_state->has_psr = true; crtc_state->has_psr2 = intel_psr2_config_valid(intel_dp, crtc_state); + crtc_state->infoframes.enable |= intel_hdmi_infoframe_enable(DP_SDP_VSC); } static void intel_psr_activate(struct intel_dp *intel_dp) @@ -880,9 +851,12 @@ static void intel_psr_enable_source(struct intel_dp *intel_dp, } static void intel_psr_enable_locked(struct drm_i915_private *dev_priv, - const struct intel_crtc_state *crtc_state) + const struct intel_crtc_state *crtc_state, + const struct drm_connector_state *conn_state) { struct intel_dp *intel_dp = dev_priv->psr.dp; + struct intel_digital_port *intel_dig_port = dp_to_dig_port(intel_dp); + struct intel_encoder *encoder = &intel_dig_port->base; u32 val; drm_WARN_ON(&dev_priv->drm, dev_priv->psr.enabled); @@ -921,7 +895,9 @@ static void intel_psr_enable_locked(struct drm_i915_private *dev_priv, drm_dbg_kms(&dev_priv->drm, "Enabling PSR%s\n", dev_priv->psr.psr2_enabled ? "2" : "1"); - intel_psr_setup_vsc(intel_dp, crtc_state); + intel_dp_compute_psr_vsc_sdp(intel_dp, crtc_state, conn_state, + &dev_priv->psr.vsc); + intel_write_dp_vsc_sdp(encoder, crtc_state, &dev_priv->psr.vsc); intel_psr_enable_sink(intel_dp); intel_psr_enable_source(intel_dp, crtc_state); dev_priv->psr.enabled = true; @@ -933,11 +909,13 @@ static void intel_psr_enable_locked(struct drm_i915_private *dev_priv, * intel_psr_enable - Enable PSR * @intel_dp: Intel DP * @crtc_state: new CRTC state + * @conn_state: new CONNECTOR state * * This function can only be called after the pipe is fully trained and enabled. */ void intel_psr_enable(struct intel_dp *intel_dp, - const struct intel_crtc_state *crtc_state) + const struct intel_crtc_state *crtc_state, + const struct drm_connector_state *conn_state) { struct drm_i915_private *dev_priv = dp_to_i915(intel_dp); @@ -958,7 +936,7 @@ void intel_psr_enable(struct intel_dp *intel_dp, goto unlock; } - intel_psr_enable_locked(dev_priv, crtc_state); + intel_psr_enable_locked(dev_priv, crtc_state, conn_state); unlock: mutex_unlock(&dev_priv->psr.lock); @@ -1091,13 +1069,15 @@ static void psr_force_hw_tracking_exit(struct drm_i915_private *dev_priv) * intel_psr_update - Update PSR state * @intel_dp: Intel DP * @crtc_state: new CRTC state + * @conn_state: new CONNECTOR state * * This functions will update PSR states, disabling, enabling or switching PSR * version when executing fastsets. For full modeset, intel_psr_disable() and * intel_psr_enable() should be called instead. */ void intel_psr_update(struct intel_dp *intel_dp, - const struct intel_crtc_state *crtc_state) + const struct intel_crtc_state *crtc_state, + const struct drm_connector_state *conn_state) { struct drm_i915_private *dev_priv = dp_to_i915(intel_dp); struct i915_psr *psr = &dev_priv->psr; @@ -1134,7 +1114,7 @@ void intel_psr_update(struct intel_dp *intel_dp, intel_psr_disable_locked(intel_dp); if (enable) - intel_psr_enable_locked(dev_priv, crtc_state); + intel_psr_enable_locked(dev_priv, crtc_state, conn_state); unlock: mutex_unlock(&dev_priv->psr.lock); diff --git a/drivers/gpu/drm/i915/display/intel_psr.h b/drivers/gpu/drm/i915/display/intel_psr.h index 274fc6bb6221..b4515186d5f4 100644 --- a/drivers/gpu/drm/i915/display/intel_psr.h +++ b/drivers/gpu/drm/i915/display/intel_psr.h @@ -17,11 +17,13 @@ struct intel_dp; #define CAN_PSR(dev_priv) (HAS_PSR(dev_priv) && dev_priv->psr.sink_support) void intel_psr_init_dpcd(struct intel_dp *intel_dp); void intel_psr_enable(struct intel_dp *intel_dp, - const struct intel_crtc_state *crtc_state); + const struct intel_crtc_state *crtc_state, + const struct drm_connector_state *conn_state); void intel_psr_disable(struct intel_dp *intel_dp, const struct intel_crtc_state *old_crtc_state); void intel_psr_update(struct intel_dp *intel_dp, - const struct intel_crtc_state *crtc_state); + const struct intel_crtc_state *crtc_state, + const struct drm_connector_state *conn_state); int intel_psr_debug_set(struct drm_i915_private *dev_priv, u64 value); void intel_psr_invalidate(struct drm_i915_private *dev_priv, unsigned frontbuffer_bits, diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 0f76b93e0d26..ad373b57699a 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -512,6 +512,7 @@ struct i915_psr { u32 dc3co_exit_delay; struct delayed_work dc3co_work; bool force_mode_changed; + struct drm_dp_vsc_sdp vsc; }; #define QUIRK_LVDS_SSC_DISABLE (1<<1) From 7241c57d3140ad3b613777a8515ffe1f653d4800 Mon Sep 17 00:00:00 2001 From: Stanislav Lisovskiy Date: Thu, 14 May 2020 10:48:51 +0300 Subject: [PATCH 90/97] drm/i915: Add TGL+ SAGV support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Starting from TGL we need to have a separate wm0 values for SAGV and non-SAGV which affects how calculations are done. v2: Remove long lines v3: Removed COLOR_PLANE enum references v4, v5, v6: Fixed rebase conflict v7: - Removed skl_plane_wm_level accessor from skl_allocate_pipe_ddb(Ville) - Removed sagv_uv_wm0(Ville) - can_sagv->use_sagv_wm(Ville) v8: - Moved tgl_crtc_can_enable_sagv function up(Ville) - Changed comment regarding pipe_wm usage(Ville) - Call intel_can_enable_sagv and tgl_compute_sagv_wm only for Gen12(Ville) - Some sagv debugs removed(Ville) - skl_print_wm_changes improvements(Ville) - Do assignment instead of memcpy in skl_pipe_wm_get_hw_state(Ville) v9: - Removed can_sagv variable(Ville) - Removed spurious line(Ville) - Changed u32 to unsigned int as agreed(Ville) - Assign sagv only for gen12 in skl_pipe_wm_get_hw_state(Ville) Signed-off-by: Stanislav Lisovskiy [vsyrjala: Remove the dead 'return false' from intel_crtc_can_enable_sagv()] Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20200514074853.9508-2-stanislav.lisovskiy@intel.com --- drivers/gpu/drm/i915/display/intel_display.c | 8 +- .../drm/i915/display/intel_display_types.h | 2 + drivers/gpu/drm/i915/intel_pm.c | 108 +++++++++++++++--- 3 files changed, 99 insertions(+), 19 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index 4747d96ec66d..9c8af50011e7 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -14025,7 +14025,9 @@ static void verify_wm_state(struct intel_crtc *crtc, /* Watermarks */ for (level = 0; level <= max_level; level++) { if (skl_wm_level_equals(&hw_plane_wm->wm[level], - &sw_plane_wm->wm[level])) + &sw_plane_wm->wm[level]) || + (level == 0 && skl_wm_level_equals(&hw_plane_wm->wm[level], + &sw_plane_wm->sagv_wm0))) continue; drm_err(&dev_priv->drm, @@ -14080,7 +14082,9 @@ static void verify_wm_state(struct intel_crtc *crtc, /* Watermarks */ for (level = 0; level <= max_level; level++) { if (skl_wm_level_equals(&hw_plane_wm->wm[level], - &sw_plane_wm->wm[level])) + &sw_plane_wm->wm[level]) || + (level == 0 && skl_wm_level_equals(&hw_plane_wm->wm[level], + &sw_plane_wm->sagv_wm0))) continue; drm_err(&dev_priv->drm, diff --git a/drivers/gpu/drm/i915/display/intel_display_types.h b/drivers/gpu/drm/i915/display/intel_display_types.h index 1b689d6bac4c..6a27e72ccf01 100644 --- a/drivers/gpu/drm/i915/display/intel_display_types.h +++ b/drivers/gpu/drm/i915/display/intel_display_types.h @@ -688,11 +688,13 @@ struct skl_plane_wm { struct skl_wm_level wm[8]; struct skl_wm_level uv_wm[8]; struct skl_wm_level trans_wm; + struct skl_wm_level sagv_wm0; bool is_planar; }; struct skl_pipe_wm { struct skl_plane_wm planes[I915_MAX_PLANES]; + bool use_sagv_wm; }; enum vlv_wm_level { diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index f7bd1dbb625e..f067e226ca7f 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -3853,9 +3853,36 @@ static bool skl_crtc_can_enable_sagv(const struct intel_crtc_state *crtc_state) return true; } +static bool tgl_crtc_can_enable_sagv(const struct intel_crtc_state *crtc_state) +{ + struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); + enum plane_id plane_id; + + if (!crtc_state->hw.active) + return true; + + for_each_plane_id_on_crtc(crtc, plane_id) { + const struct skl_ddb_entry *plane_alloc = + &crtc_state->wm.skl.plane_ddb_y[plane_id]; + const struct skl_plane_wm *wm = + &crtc_state->wm.skl.optimal.planes[plane_id]; + + if (skl_ddb_entry_size(plane_alloc) < wm->sagv_wm0.min_ddb_alloc) + return false; + } + + return true; +} + static bool intel_crtc_can_enable_sagv(const struct intel_crtc_state *crtc_state) { - return skl_crtc_can_enable_sagv(crtc_state); + struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); + struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); + + if (INTEL_GEN(dev_priv) >= 12) + return tgl_crtc_can_enable_sagv(crtc_state); + else + return skl_crtc_can_enable_sagv(crtc_state); } bool intel_can_enable_sagv(struct drm_i915_private *dev_priv, @@ -3873,7 +3900,7 @@ static int intel_compute_sagv_mask(struct intel_atomic_state *state) struct drm_i915_private *dev_priv = to_i915(state->base.dev); int ret; struct intel_crtc *crtc; - const struct intel_crtc_state *new_crtc_state; + struct intel_crtc_state *new_crtc_state; struct intel_bw_state *new_bw_state = NULL; const struct intel_bw_state *old_bw_state = NULL; int i; @@ -3904,6 +3931,20 @@ static int intel_compute_sagv_mask(struct intel_atomic_state *state) return ret; } + for_each_new_intel_crtc_in_state(state, crtc, + new_crtc_state, i) { + struct skl_pipe_wm *pipe_wm = &new_crtc_state->wm.skl.optimal; + + /* + * We store use_sagv_wm in the crtc state rather than relying on + * that bw state since we have no convenient way to get at the + * latter from the plane commit hooks (especially in the legacy + * cursor case) + */ + pipe_wm->use_sagv_wm = INTEL_GEN(dev_priv) >= 12 && + intel_can_enable_sagv(dev_priv, new_bw_state); + } + if (intel_can_enable_sagv(dev_priv, new_bw_state) != intel_can_enable_sagv(dev_priv, old_bw_state)) { ret = intel_atomic_serialize_global_state(&new_bw_state->base); @@ -4647,8 +4688,11 @@ skl_plane_wm_level(const struct intel_crtc_state *crtc_state, enum plane_id plane_id, int level) { - const struct skl_plane_wm *wm = - &crtc_state->wm.skl.optimal.planes[plane_id]; + const struct skl_pipe_wm *pipe_wm = &crtc_state->wm.skl.optimal; + const struct skl_plane_wm *wm = &pipe_wm->planes[plane_id]; + + if (level == 0 && pipe_wm->use_sagv_wm) + return &wm->sagv_wm0; return &wm->wm[level]; } @@ -4689,7 +4733,6 @@ skl_allocate_pipe_ddb(struct intel_crtc_state *crtc_state) plane_data_rate, uv_plane_data_rate); - skl_ddb_get_pipe_allocation_limits(dev_priv, crtc_state, total_data_rate, alloc, &num_active); alloc_size = skl_ddb_entry_size(alloc); @@ -5225,6 +5268,20 @@ skl_compute_wm_levels(const struct intel_crtc_state *crtc_state, } } +static void tgl_compute_sagv_wm(const struct intel_crtc_state *crtc_state, + const struct skl_wm_params *wm_params, + struct skl_plane_wm *plane_wm) +{ + struct drm_i915_private *dev_priv = to_i915(crtc_state->uapi.crtc->dev); + struct skl_wm_level *sagv_wm = &plane_wm->sagv_wm0; + struct skl_wm_level *levels = plane_wm->wm; + unsigned int latency = dev_priv->wm.skl_latency[0] + dev_priv->sagv_block_time_us; + + skl_compute_plane_wm(crtc_state, 0, latency, + wm_params, &levels[0], + sagv_wm); +} + static void skl_compute_transition_wm(const struct intel_crtc_state *crtc_state, const struct skl_wm_params *wp, struct skl_plane_wm *wm) @@ -5292,6 +5349,8 @@ static int skl_build_plane_wm_single(struct intel_crtc_state *crtc_state, const struct intel_plane_state *plane_state, enum plane_id plane_id, int color_plane) { + struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); + struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); struct skl_plane_wm *wm = &crtc_state->wm.skl.optimal.planes[plane_id]; struct skl_wm_params wm_params; int ret; @@ -5302,6 +5361,10 @@ static int skl_build_plane_wm_single(struct intel_crtc_state *crtc_state, return ret; skl_compute_wm_levels(crtc_state, &wm_params, wm->wm); + + if (INTEL_GEN(dev_priv) >= 12) + tgl_compute_sagv_wm(crtc_state, &wm_params, wm); + skl_compute_transition_wm(crtc_state, &wm_params, wm); return 0; @@ -5668,23 +5731,25 @@ skl_print_wm_changes(struct intel_atomic_state *state) continue; drm_dbg_kms(&dev_priv->drm, - "[PLANE:%d:%s] level %cwm0,%cwm1,%cwm2,%cwm3,%cwm4,%cwm5,%cwm6,%cwm7,%ctwm" - " -> %cwm0,%cwm1,%cwm2,%cwm3,%cwm4,%cwm5,%cwm6,%cwm7,%ctwm\n", + "[PLANE:%d:%s] level %cwm0,%cwm1,%cwm2,%cwm3,%cwm4,%cwm5,%cwm6,%cwm7,%ctwm,%cswm" + " -> %cwm0,%cwm1,%cwm2,%cwm3,%cwm4,%cwm5,%cwm6,%cwm7,%ctwm,%cswm\n", plane->base.base.id, plane->base.name, enast(old_wm->wm[0].plane_en), enast(old_wm->wm[1].plane_en), enast(old_wm->wm[2].plane_en), enast(old_wm->wm[3].plane_en), enast(old_wm->wm[4].plane_en), enast(old_wm->wm[5].plane_en), enast(old_wm->wm[6].plane_en), enast(old_wm->wm[7].plane_en), enast(old_wm->trans_wm.plane_en), + enast(old_wm->sagv_wm0.plane_en), enast(new_wm->wm[0].plane_en), enast(new_wm->wm[1].plane_en), enast(new_wm->wm[2].plane_en), enast(new_wm->wm[3].plane_en), enast(new_wm->wm[4].plane_en), enast(new_wm->wm[5].plane_en), enast(new_wm->wm[6].plane_en), enast(new_wm->wm[7].plane_en), - enast(new_wm->trans_wm.plane_en)); + enast(new_wm->trans_wm.plane_en), + enast(new_wm->sagv_wm0.plane_en)); drm_dbg_kms(&dev_priv->drm, - "[PLANE:%d:%s] lines %c%3d,%c%3d,%c%3d,%c%3d,%c%3d,%c%3d,%c%3d,%c%3d,%c%3d" - " -> %c%3d,%c%3d,%c%3d,%c%3d,%c%3d,%c%3d,%c%3d,%c%3d,%c%3d\n", + "[PLANE:%d:%s] lines %c%3d,%c%3d,%c%3d,%c%3d,%c%3d,%c%3d,%c%3d,%c%3d,%c%3d,%c%3d" + " -> %c%3d,%c%3d,%c%3d,%c%3d,%c%3d,%c%3d,%c%3d,%c%3d,%c%3d,%c%3d\n", plane->base.base.id, plane->base.name, enast(old_wm->wm[0].ignore_lines), old_wm->wm[0].plane_res_l, enast(old_wm->wm[1].ignore_lines), old_wm->wm[1].plane_res_l, @@ -5695,6 +5760,7 @@ skl_print_wm_changes(struct intel_atomic_state *state) enast(old_wm->wm[6].ignore_lines), old_wm->wm[6].plane_res_l, enast(old_wm->wm[7].ignore_lines), old_wm->wm[7].plane_res_l, enast(old_wm->trans_wm.ignore_lines), old_wm->trans_wm.plane_res_l, + enast(old_wm->sagv_wm0.ignore_lines), old_wm->sagv_wm0.plane_res_l, enast(new_wm->wm[0].ignore_lines), new_wm->wm[0].plane_res_l, enast(new_wm->wm[1].ignore_lines), new_wm->wm[1].plane_res_l, @@ -5704,37 +5770,42 @@ skl_print_wm_changes(struct intel_atomic_state *state) enast(new_wm->wm[5].ignore_lines), new_wm->wm[5].plane_res_l, enast(new_wm->wm[6].ignore_lines), new_wm->wm[6].plane_res_l, enast(new_wm->wm[7].ignore_lines), new_wm->wm[7].plane_res_l, - enast(new_wm->trans_wm.ignore_lines), new_wm->trans_wm.plane_res_l); + enast(new_wm->trans_wm.ignore_lines), new_wm->trans_wm.plane_res_l, + enast(new_wm->sagv_wm0.ignore_lines), new_wm->sagv_wm0.plane_res_l); drm_dbg_kms(&dev_priv->drm, - "[PLANE:%d:%s] blocks %4d,%4d,%4d,%4d,%4d,%4d,%4d,%4d,%4d" - " -> %4d,%4d,%4d,%4d,%4d,%4d,%4d,%4d,%4d\n", + "[PLANE:%d:%s] blocks %4d,%4d,%4d,%4d,%4d,%4d,%4d,%4d,%4d,%4d" + " -> %4d,%4d,%4d,%4d,%4d,%4d,%4d,%4d,%4d,%4d\n", plane->base.base.id, plane->base.name, old_wm->wm[0].plane_res_b, old_wm->wm[1].plane_res_b, old_wm->wm[2].plane_res_b, old_wm->wm[3].plane_res_b, old_wm->wm[4].plane_res_b, old_wm->wm[5].plane_res_b, old_wm->wm[6].plane_res_b, old_wm->wm[7].plane_res_b, old_wm->trans_wm.plane_res_b, + old_wm->sagv_wm0.plane_res_b, new_wm->wm[0].plane_res_b, new_wm->wm[1].plane_res_b, new_wm->wm[2].plane_res_b, new_wm->wm[3].plane_res_b, new_wm->wm[4].plane_res_b, new_wm->wm[5].plane_res_b, new_wm->wm[6].plane_res_b, new_wm->wm[7].plane_res_b, - new_wm->trans_wm.plane_res_b); + new_wm->trans_wm.plane_res_b, + new_wm->sagv_wm0.plane_res_b); drm_dbg_kms(&dev_priv->drm, - "[PLANE:%d:%s] min_ddb %4d,%4d,%4d,%4d,%4d,%4d,%4d,%4d,%4d" - " -> %4d,%4d,%4d,%4d,%4d,%4d,%4d,%4d,%4d\n", + "[PLANE:%d:%s] min_ddb %4d,%4d,%4d,%4d,%4d,%4d,%4d,%4d,%4d,%4d" + " -> %4d,%4d,%4d,%4d,%4d,%4d,%4d,%4d,%4d,%4d\n", plane->base.base.id, plane->base.name, old_wm->wm[0].min_ddb_alloc, old_wm->wm[1].min_ddb_alloc, old_wm->wm[2].min_ddb_alloc, old_wm->wm[3].min_ddb_alloc, old_wm->wm[4].min_ddb_alloc, old_wm->wm[5].min_ddb_alloc, old_wm->wm[6].min_ddb_alloc, old_wm->wm[7].min_ddb_alloc, old_wm->trans_wm.min_ddb_alloc, + old_wm->sagv_wm0.min_ddb_alloc, new_wm->wm[0].min_ddb_alloc, new_wm->wm[1].min_ddb_alloc, new_wm->wm[2].min_ddb_alloc, new_wm->wm[3].min_ddb_alloc, new_wm->wm[4].min_ddb_alloc, new_wm->wm[5].min_ddb_alloc, new_wm->wm[6].min_ddb_alloc, new_wm->wm[7].min_ddb_alloc, - new_wm->trans_wm.min_ddb_alloc); + new_wm->trans_wm.min_ddb_alloc, + new_wm->sagv_wm0.min_ddb_alloc); } } } @@ -6027,6 +6098,9 @@ void skl_pipe_wm_get_hw_state(struct intel_crtc *crtc, skl_wm_level_from_reg_val(val, &wm->wm[level]); } + if (INTEL_GEN(dev_priv) >= 12) + wm->sagv_wm0 = wm->wm[0]; + if (plane_id != PLANE_CURSOR) val = I915_READ(PLANE_WM_TRANS(pipe, plane_id)); else From 20f505f2253106f695ba6fa0a415159145a8fb2a Mon Sep 17 00:00:00 2001 From: Stanislav Lisovskiy Date: Thu, 14 May 2020 10:48:52 +0300 Subject: [PATCH 91/97] drm/i915: Restrict qgv points which don't have enough bandwidth. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit According to BSpec 53998, we should try to restrict qgv points, which can't provide enough bandwidth for desired display configuration. Currently we are just comparing against all of those and take minimum(worst case). v2: Fixed wrong PCode reply mask, removed hardcoded values. v3: Forbid simultaneous legacy SAGV PCode requests and restricting qgv points. Put the actual restriction to commit function, added serialization(thanks to Ville) to prevent commit being applied out of order in case of nonblocking and/or nomodeset commits. v4: - Minor code refactoring, fixed few typos(thanks to James Ausmus) - Change the naming of qgv point masking/unmasking functions(James Ausmus). - Simplify the masking/unmasking operation itself, as we don't need to mask only single point per request(James Ausmus) - Reject and stick to highest bandwidth point if SAGV can't be enabled(BSpec) v5: - Add new mailbox reply codes, which seems to happen during boot time for TGL and indicate that QGV setting is not yet available. v6: - Increase number of supported QGV points to be in sync with BSpec. v7: - Rebased and resolved conflict to fix build failure. - Fix NUM_QGV_POINTS to 8 and moved that to header file(James Ausmus) v8: - Don't report an error if we can't restrict qgv points, as SAGV can be disabled by BIOS, which is completely legal. So don't make CI panic. Instead if we detect that there is only 1 QGV point accessible just analyze if we can fit the required bandwidth requirements, but no need in restricting. v9: - Fix wrong QGV transition if we have 0 planes and no SAGV simultaneously. v10: - Fix CDCLK corruption, because of global state getting serialized without modeset, which caused copying of non-calculated cdclk to be copied to dev_priv(thanks to Ville for the hint). v11: - Remove unneeded headers and spaces(Matthew Roper) - Remove unneeded intel_qgv_info qi struct from bw check and zero out the needed one(Matthew Roper) - Changed QGV error message to have more clear meaning(Matthew Roper) - Use state->modeset_set instead of any_ms(Matthew Roper) - Moved NUM_SAGV_POINTS from i915_reg.h to i915_drv.h where it's used - Keep using crtc_state->hw.active instead of .enable(Matthew Roper) - Moved unrelated changes to other patch(using latency as parameter for plane wm calculation, moved to SAGV refactoring patch) v12: - Fix rebase conflict with own temporary SAGV/QGV fix. - Remove unnecessary mask being zero check when unmasking qgv points as this is completely legal(Matt Roper) - Check if we are setting the same mask as already being set in hardware to prevent error from PCode. - Fix error message when restricting/unrestricting qgv points to "mask/unmask" which sounds more accurate(Matt Roper) - Move sagv status setting to icl_get_bw_info from atomic check as this should be calculated only once.(Matt Roper) - Edited comments for the case when we can't enable SAGV and use only 1 QGV point with highest bandwidth to be more understandable.(Matt Roper) v13: - Moved max_data_rate in bw check to closer scope(Ville Syrjälä) - Changed comment for zero new_mask in qgv points masking function to better reflect reality(Ville Syrjälä) - Simplified bit mask operation in qgv points masking function (Ville Syrjälä) - Moved intel_qgv_points_mask closer to gen11 SAGV disabling, however this still can't be under modeset condition(Ville Syrjälä) - Packed qgv_points_mask as u8 and moved closer to pipe_sagv_mask (Ville Syrjälä) - Extracted PCode changes to separate patch.(Ville Syrjälä) - Now treat num_planes 0 same as 1 to avoid confusion and returning max_bw as 0, which would prevent choosing QGV point having max bandwidth in case if SAGV is not allowed, as per BSpec(Ville Syrjälä) - Do the actual qgv_points_mask swap in the same place as all other global state parts like cdclk are swapped. In the next patch, this all will be moved to bw state as global state, once new global state patch series from Ville lands v14: - Now using global state to serialize access to qgv points - Added global state locking back, otherwise we seem to read bw state in a wrong way. v15: - Added TODO comment for near atomic global state locking in bw code. v16: - Fixed intel_atomic_bw_* functions to be intel_bw_* as discussed with Jani Nikula. - Take bw_state_changed flag into use. v17: - Moved qgv point related manipulations next to SAGV code, as those are semantically related(Ville Syrjälä) - Renamed those into intel_sagv_(pre)|(post)_plane_update (Ville Syrjälä) v18: - Move sagv related calls from commit tail into intel_sagv_(pre)|(post)_plane_update(Ville Syrjälä) v19: - Use intel_atomic_get_bw_(old)|(new)_state which is intended for commit tail stage. v20: - Return max bandwidth for 0 planes(Ville) - Constify old_bw_state in bw_atomic_check(Ville) - Removed some debugs(Ville) - Added data rate to debug print when no QGV points(Ville) - Removed some comments(Ville) v21, v22, v23: - Fixed rebase conflict v24: - Changed PCode mask to use ICL_ prefix v25: - Resolved rebase conflict v26: - Removed redundant NULL checks(Ville) - Removed redundant error prints(Ville) v27: - Use device specific drm_err(Ville) - Fixed parenthesis ident reported by checkpatch Line over 100 warns to be fixed together with existing code style. Signed-off-by: Stanislav Lisovskiy Cc: Ville Syrjälä Cc: James Ausmus [vsyrjala: Drop duplicate intel_sagv_{pre,post}_plane_update() prototypes and drop unused NUM_SAGV_POINTS define] Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20200514074853.9508-3-stanislav.lisovskiy@intel.com --- drivers/gpu/drm/i915/display/intel_bw.c | 141 +++++++++++++++++------- drivers/gpu/drm/i915/display/intel_bw.h | 9 ++ drivers/gpu/drm/i915/intel_pm.c | 53 ++++++++- 3 files changed, 164 insertions(+), 39 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_bw.c b/drivers/gpu/drm/i915/display/intel_bw.c index 6e7cc3a4f1aa..fef04e2d954e 100644 --- a/drivers/gpu/drm/i915/display/intel_bw.c +++ b/drivers/gpu/drm/i915/display/intel_bw.c @@ -8,6 +8,9 @@ #include "intel_bw.h" #include "intel_display_types.h" #include "intel_sideband.h" +#include "intel_atomic.h" +#include "intel_pm.h" + /* Parameters for Qclk Geyserville (QGV) */ struct intel_qgv_point { @@ -113,6 +116,26 @@ static int icl_pcode_read_qgv_point_info(struct drm_i915_private *dev_priv, return 0; } +int icl_pcode_restrict_qgv_points(struct drm_i915_private *dev_priv, + u32 points_mask) +{ + int ret; + + /* bspec says to keep retrying for at least 1 ms */ + ret = skl_pcode_request(dev_priv, ICL_PCODE_SAGV_DE_MEM_SS_CONFIG, + points_mask, + ICL_PCODE_POINTS_RESTRICTED_MASK, + ICL_PCODE_POINTS_RESTRICTED, + 1); + + if (ret < 0) { + drm_err(&dev_priv->drm, "Failed to disable qgv points (%d)\n", ret); + return ret; + } + + return 0; +} + static int icl_get_qgv_points(struct drm_i915_private *dev_priv, struct intel_qgv_info *qi) { @@ -240,6 +263,16 @@ static int icl_get_bw_info(struct drm_i915_private *dev_priv, const struct intel break; } + /* + * In case if SAGV is disabled in BIOS, we always get 1 + * SAGV point, but we can't send PCode commands to restrict it + * as it will fail and pointless anyway. + */ + if (qi.num_points == 1) + dev_priv->sagv_status = I915_SAGV_NOT_CONTROLLED; + else + dev_priv->sagv_status = I915_SAGV_ENABLED; + return 0; } @@ -248,6 +281,11 @@ static unsigned int icl_max_bw(struct drm_i915_private *dev_priv, { int i; + /* + * Let's return max bw for 0 planes + */ + num_planes = max(1, num_planes); + for (i = 0; i < ARRAY_SIZE(dev_priv->max_bw); i++) { const struct intel_bw_info *bi = &dev_priv->max_bw[i]; @@ -277,34 +315,6 @@ void intel_bw_init_hw(struct drm_i915_private *dev_priv) icl_get_bw_info(dev_priv, &icl_sa_info); } -static unsigned int intel_max_data_rate(struct drm_i915_private *dev_priv, - int num_planes) -{ - if (INTEL_GEN(dev_priv) >= 11) { - /* - * Any bw group has same amount of QGV points - */ - const struct intel_bw_info *bi = - &dev_priv->max_bw[0]; - unsigned int min_bw = UINT_MAX; - int i; - - /* - * FIXME with SAGV disabled maybe we can assume - * point 1 will always be used? Seems to match - * the behaviour observed in the wild. - */ - for (i = 0; i < bi->num_qgv_points; i++) { - unsigned int bw = icl_max_bw(dev_priv, num_planes, i); - - min_bw = min(bw, min_bw); - } - return min_bw; - } else { - return UINT_MAX; - } -} - static unsigned int intel_bw_crtc_num_active_planes(const struct intel_crtc_state *crtc_state) { /* @@ -415,10 +425,15 @@ int intel_bw_atomic_check(struct intel_atomic_state *state) struct drm_i915_private *dev_priv = to_i915(state->base.dev); struct intel_crtc_state *new_crtc_state, *old_crtc_state; struct intel_bw_state *new_bw_state = NULL; - unsigned int data_rate, max_data_rate; + const struct intel_bw_state *old_bw_state = NULL; + unsigned int data_rate; unsigned int num_active_planes; struct intel_crtc *crtc; int i, ret; + u32 allowed_points = 0; + unsigned int max_bw_point = 0, max_bw = 0; + unsigned int num_qgv_points = dev_priv->max_bw[0].num_qgv_points; + u32 mask = (1 << num_qgv_points) - 1; /* FIXME earlier gens need some checks too */ if (INTEL_GEN(dev_priv) < 11) @@ -465,19 +480,71 @@ int intel_bw_atomic_check(struct intel_atomic_state *state) return ret; data_rate = intel_bw_data_rate(dev_priv, new_bw_state); - num_active_planes = intel_bw_num_active_planes(dev_priv, new_bw_state); - - max_data_rate = intel_max_data_rate(dev_priv, num_active_planes); - data_rate = DIV_ROUND_UP(data_rate, 1000); - if (data_rate > max_data_rate) { - drm_dbg_kms(&dev_priv->drm, - "Bandwidth %u MB/s exceeds max available %d MB/s (%d active planes)\n", - data_rate, max_data_rate, num_active_planes); + num_active_planes = intel_bw_num_active_planes(dev_priv, new_bw_state); + + for (i = 0; i < num_qgv_points; i++) { + unsigned int max_data_rate; + + max_data_rate = icl_max_bw(dev_priv, num_active_planes, i); + /* + * We need to know which qgv point gives us + * maximum bandwidth in order to disable SAGV + * if we find that we exceed SAGV block time + * with watermarks. By that moment we already + * have those, as it is calculated earlier in + * intel_atomic_check, + */ + if (max_data_rate > max_bw) { + max_bw_point = i; + max_bw = max_data_rate; + } + if (max_data_rate >= data_rate) + allowed_points |= BIT(i); + drm_dbg_kms(&dev_priv->drm, "QGV point %d: max bw %d required %d\n", + i, max_data_rate, data_rate); + } + + /* + * BSpec states that we always should have at least one allowed point + * left, so if we couldn't - simply reject the configuration for obvious + * reasons. + */ + if (allowed_points == 0) { + drm_dbg_kms(&dev_priv->drm, "No QGV points provide sufficient memory" + " bandwidth %d for display configuration(%d active planes).\n", + data_rate, num_active_planes); return -EINVAL; } + /* + * Leave only single point with highest bandwidth, if + * we can't enable SAGV due to the increased memory latency it may + * cause. + */ + if (!intel_can_enable_sagv(dev_priv, new_bw_state)) { + allowed_points = BIT(max_bw_point); + drm_dbg_kms(&dev_priv->drm, "No SAGV, using single QGV point %d\n", + max_bw_point); + } + /* + * We store the ones which need to be masked as that is what PCode + * actually accepts as a parameter. + */ + new_bw_state->qgv_points_mask = ~allowed_points & mask; + + old_bw_state = intel_atomic_get_old_bw_state(state); + /* + * If the actual mask had changed we need to make sure that + * the commits are serialized(in case this is a nomodeset, nonblocking) + */ + if (new_bw_state->qgv_points_mask != old_bw_state->qgv_points_mask) { + ret = intel_atomic_serialize_global_state(&new_bw_state->base); + if (ret) + return ret; + } + return 0; } diff --git a/drivers/gpu/drm/i915/display/intel_bw.h b/drivers/gpu/drm/i915/display/intel_bw.h index 898b4a85ccab..bbcaaa73ec1b 100644 --- a/drivers/gpu/drm/i915/display/intel_bw.h +++ b/drivers/gpu/drm/i915/display/intel_bw.h @@ -24,6 +24,13 @@ struct intel_bw_state { */ u8 pipe_sagv_reject; + /* + * Current QGV points mask, which restricts + * some particular SAGV states, not to confuse + * with pipe_sagv_mask. + */ + u8 qgv_points_mask; + unsigned int data_rate[I915_MAX_PIPES]; u8 num_active_planes[I915_MAX_PIPES]; @@ -47,5 +54,7 @@ int intel_bw_init(struct drm_i915_private *dev_priv); int intel_bw_atomic_check(struct intel_atomic_state *state); void intel_bw_crtc_update(struct intel_bw_state *bw_state, const struct intel_crtc_state *crtc_state); +int icl_pcode_restrict_qgv_points(struct drm_i915_private *dev_priv, + u32 points_mask); #endif /* __INTEL_BW_H__ */ diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index f067e226ca7f..68d88634bc29 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -3762,6 +3762,8 @@ void intel_sagv_pre_plane_update(struct intel_atomic_state *state) { struct drm_i915_private *dev_priv = to_i915(state->base.dev); const struct intel_bw_state *new_bw_state; + const struct intel_bw_state *old_bw_state; + u32 new_mask = 0; /* * Just return if we can't control SAGV or don't have it. @@ -3777,14 +3779,42 @@ void intel_sagv_pre_plane_update(struct intel_atomic_state *state) if (!new_bw_state) return; - if (!intel_can_enable_sagv(dev_priv, new_bw_state)) + if (INTEL_GEN(dev_priv) < 11 && !intel_can_enable_sagv(dev_priv, new_bw_state)) { intel_disable_sagv(dev_priv); + return; + } + + old_bw_state = intel_atomic_get_old_bw_state(state); + /* + * Nothing to mask + */ + if (new_bw_state->qgv_points_mask == old_bw_state->qgv_points_mask) + return; + + new_mask = old_bw_state->qgv_points_mask | new_bw_state->qgv_points_mask; + + /* + * If new mask is zero - means there is nothing to mask, + * we can only unmask, which should be done in unmask. + */ + if (!new_mask) + return; + + /* + * Restrict required qgv points before updating the configuration. + * According to BSpec we can't mask and unmask qgv points at the same + * time. Also masking should be done before updating the configuration + * and unmasking afterwards. + */ + icl_pcode_restrict_qgv_points(dev_priv, new_mask); } void intel_sagv_post_plane_update(struct intel_atomic_state *state) { struct drm_i915_private *dev_priv = to_i915(state->base.dev); const struct intel_bw_state *new_bw_state; + const struct intel_bw_state *old_bw_state; + u32 new_mask = 0; /* * Just return if we can't control SAGV or don't have it. @@ -3800,8 +3830,27 @@ void intel_sagv_post_plane_update(struct intel_atomic_state *state) if (!new_bw_state) return; - if (intel_can_enable_sagv(dev_priv, new_bw_state)) + if (INTEL_GEN(dev_priv) < 11 && intel_can_enable_sagv(dev_priv, new_bw_state)) { intel_enable_sagv(dev_priv); + return; + } + + old_bw_state = intel_atomic_get_old_bw_state(state); + /* + * Nothing to unmask + */ + if (new_bw_state->qgv_points_mask == old_bw_state->qgv_points_mask) + return; + + new_mask = new_bw_state->qgv_points_mask; + + /* + * Allow required qgv points after updating the configuration. + * According to BSpec we can't mask and unmask qgv points at the same + * time. Also masking should be done before updating the configuration + * and unmasking afterwards. + */ + icl_pcode_restrict_qgv_points(dev_priv, new_mask); } static bool skl_crtc_can_enable_sagv(const struct intel_crtc_state *crtc_state) From 8ca6d0237d1696060cd4f5a3ee93ee001c1a9d5b Mon Sep 17 00:00:00 2001 From: Stanislav Lisovskiy Date: Thu, 14 May 2020 10:48:53 +0300 Subject: [PATCH 92/97] drm/i915: Enable SAGV support for Gen12 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Flip the switch and enable SAGV support for Gen12 also. Signed-off-by: Stanislav Lisovskiy Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20200514074853.9508-4-stanislav.lisovskiy@intel.com --- drivers/gpu/drm/i915/intel_pm.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 68d88634bc29..696491d71a1d 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -3638,10 +3638,6 @@ static bool skl_needs_memory_bw_wa(struct drm_i915_private *dev_priv) static bool intel_has_sagv(struct drm_i915_private *dev_priv) { - /* HACK! */ - if (IS_GEN(dev_priv, 12)) - return false; - return (IS_GEN9_BC(dev_priv) || INTEL_GEN(dev_priv) >= 10) && dev_priv->sagv_status != I915_SAGV_NOT_CONTROLLED; } From 2e2701582a8039b2f8a2fa811237ac8ec98355fa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Mon, 2 Mar 2020 16:39:38 +0200 Subject: [PATCH 93/97] drm/i915: Nuke pointless div by 64bit MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Bunch of places use a 64bit divisor needlessly. Switch to 32bit divisor. Cc: Lionel Landwerlin Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20200302143943.32676-1-ville.syrjala@linux.intel.com Reviewed-by: Chris Wilson --- drivers/gpu/drm/i915/i915_perf.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index c533f569dd42..2e611551d809 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -1612,10 +1612,9 @@ static int alloc_noa_wait(struct i915_perf_stream *stream) struct drm_i915_gem_object *bo; struct i915_vma *vma; const u64 delay_ticks = 0xffffffffffffffff - - DIV64_U64_ROUND_UP( - atomic64_read(&stream->perf->noa_programming_delay) * - RUNTIME_INFO(i915)->cs_timestamp_frequency_khz, - 1000000ull); + DIV_ROUND_UP_ULL(atomic64_read(&stream->perf->noa_programming_delay) * + RUNTIME_INFO(i915)->cs_timestamp_frequency_khz, + 1000000); const u32 base = stream->engine->mmio_base; #define CS_GPR(x) GEN8_RING_CS_GPR(base, x) u32 *batch, *ts0, *cs, *jump; @@ -3485,8 +3484,8 @@ i915_perf_open_ioctl_locked(struct i915_perf *perf, static u64 oa_exponent_to_ns(struct i915_perf *perf, int exponent) { - return div64_u64(1000000000ULL * (2ULL << exponent), - 1000ULL * RUNTIME_INFO(perf->i915)->cs_timestamp_frequency_khz); + return div_u64(1000000 * (2ULL << exponent), + RUNTIME_INFO(perf->i915)->cs_timestamp_frequency_khz); } /** From 56f1b31f1dd60db4b02024a13eea45b5bbccc44e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Mon, 2 Mar 2020 16:39:39 +0200 Subject: [PATCH 94/97] drm/i915: Store CS timestamp frequency in Hz MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit kHz isn't accurate enough for storing the CS timestamp frequency on some of the platforms. Store the value in Hz instead. Cc: Lionel Landwerlin Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20200302143943.32676-2-ville.syrjala@linux.intel.com Reviewed-by: Chris Wilson Reviewed-by: Lionel Landwerlin --- drivers/gpu/drm/i915/i915_debugfs.c | 6 ++-- drivers/gpu/drm/i915/i915_getparam.c | 2 +- drivers/gpu/drm/i915/i915_perf.c | 12 ++++---- drivers/gpu/drm/i915/intel_device_info.c | 34 +++++++++++----------- drivers/gpu/drm/i915/intel_device_info.h | 2 +- drivers/gpu/drm/i915/selftests/i915_perf.c | 4 +-- 6 files changed, 30 insertions(+), 30 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index c01837a16a4e..1469ea1e85f2 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -1304,8 +1304,8 @@ static int i915_engine_info(struct seq_file *m, void *unused) seq_printf(m, "GT awake? %s [%d]\n", yesno(dev_priv->gt.awake), atomic_read(&dev_priv->gt.wakeref.count)); - seq_printf(m, "CS timestamp frequency: %u kHz\n", - RUNTIME_INFO(dev_priv)->cs_timestamp_frequency_khz); + seq_printf(m, "CS timestamp frequency: %u Hz\n", + RUNTIME_INFO(dev_priv)->cs_timestamp_frequency_hz); p = drm_seq_file_printer(m); for_each_uabi_engine(engine, dev_priv) @@ -1404,7 +1404,7 @@ static int i915_perf_noa_delay_set(void *data, u64 val) { struct drm_i915_private *i915 = data; - const u32 clk = RUNTIME_INFO(i915)->cs_timestamp_frequency_khz; + const u32 clk = RUNTIME_INFO(i915)->cs_timestamp_frequency_hz / 1000; /* * This would lead to infinite waits as we're doing timestamp diff --git a/drivers/gpu/drm/i915/i915_getparam.c b/drivers/gpu/drm/i915/i915_getparam.c index 54fce81d5724..d042644b9cd2 100644 --- a/drivers/gpu/drm/i915/i915_getparam.c +++ b/drivers/gpu/drm/i915/i915_getparam.c @@ -153,7 +153,7 @@ int i915_getparam_ioctl(struct drm_device *dev, void *data, return -ENODEV; break; case I915_PARAM_CS_TIMESTAMP_FREQUENCY: - value = 1000 * RUNTIME_INFO(i915)->cs_timestamp_frequency_khz; + value = RUNTIME_INFO(i915)->cs_timestamp_frequency_hz; break; case I915_PARAM_MMAP_GTT_COHERENT: value = INTEL_INFO(i915)->has_coherent_ggtt; diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index 2e611551d809..205327c6c342 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -1613,8 +1613,8 @@ static int alloc_noa_wait(struct i915_perf_stream *stream) struct i915_vma *vma; const u64 delay_ticks = 0xffffffffffffffff - DIV_ROUND_UP_ULL(atomic64_read(&stream->perf->noa_programming_delay) * - RUNTIME_INFO(i915)->cs_timestamp_frequency_khz, - 1000000); + RUNTIME_INFO(i915)->cs_timestamp_frequency_hz, + 1000000000); const u32 base = stream->engine->mmio_base; #define CS_GPR(x) GEN8_RING_CS_GPR(base, x) u32 *batch, *ts0, *cs, *jump; @@ -3484,8 +3484,8 @@ i915_perf_open_ioctl_locked(struct i915_perf *perf, static u64 oa_exponent_to_ns(struct i915_perf *perf, int exponent) { - return div_u64(1000000 * (2ULL << exponent), - RUNTIME_INFO(perf->i915)->cs_timestamp_frequency_khz); + return div_u64(1000000000 * (2ULL << exponent), + RUNTIME_INFO(perf->i915)->cs_timestamp_frequency_hz); } /** @@ -4343,8 +4343,8 @@ void i915_perf_init(struct drm_i915_private *i915) if (perf->ops.enable_metric_set) { mutex_init(&perf->lock); - oa_sample_rate_hard_limit = 1000 * - (RUNTIME_INFO(i915)->cs_timestamp_frequency_khz / 2); + oa_sample_rate_hard_limit = + RUNTIME_INFO(i915)->cs_timestamp_frequency_hz / 2; mutex_init(&perf->metrics_lock); idr_init(&perf->metrics_idr); diff --git a/drivers/gpu/drm/i915/intel_device_info.c b/drivers/gpu/drm/i915/intel_device_info.c index 91bb7891c70c..7277d86f0d3f 100644 --- a/drivers/gpu/drm/i915/intel_device_info.c +++ b/drivers/gpu/drm/i915/intel_device_info.c @@ -136,8 +136,8 @@ void intel_device_info_print_runtime(const struct intel_runtime_info *info, sseu_dump(&info->sseu, p); drm_printf(p, "rawclk rate: %u kHz\n", info->rawclk_freq); - drm_printf(p, "CS timestamp frequency: %u kHz\n", - info->cs_timestamp_frequency_khz); + drm_printf(p, "CS timestamp frequency: %u Hz\n", + info->cs_timestamp_frequency_hz); } static int sseu_eu_idx(const struct sseu_dev_info *sseu, int slice, @@ -678,12 +678,12 @@ static u32 read_reference_ts_freq(struct drm_i915_private *dev_priv) base_freq = ((ts_override & GEN9_TIMESTAMP_OVERRIDE_US_COUNTER_DIVIDER_MASK) >> GEN9_TIMESTAMP_OVERRIDE_US_COUNTER_DIVIDER_SHIFT) + 1; - base_freq *= 1000; + base_freq *= 1000000; frac_freq = ((ts_override & GEN9_TIMESTAMP_OVERRIDE_US_COUNTER_DENOMINATOR_MASK) >> GEN9_TIMESTAMP_OVERRIDE_US_COUNTER_DENOMINATOR_SHIFT); - frac_freq = 1000 / (frac_freq + 1); + frac_freq = 1000000 / (frac_freq + 1); return base_freq + frac_freq; } @@ -691,8 +691,8 @@ static u32 read_reference_ts_freq(struct drm_i915_private *dev_priv) static u32 gen10_get_crystal_clock_freq(struct drm_i915_private *dev_priv, u32 rpm_config_reg) { - u32 f19_2_mhz = 19200; - u32 f24_mhz = 24000; + u32 f19_2_mhz = 19200000; + u32 f24_mhz = 24000000; u32 crystal_clock = (rpm_config_reg & GEN9_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_MASK) >> GEN9_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_SHIFT; @@ -711,10 +711,10 @@ static u32 gen10_get_crystal_clock_freq(struct drm_i915_private *dev_priv, static u32 gen11_get_crystal_clock_freq(struct drm_i915_private *dev_priv, u32 rpm_config_reg) { - u32 f19_2_mhz = 19200; - u32 f24_mhz = 24000; - u32 f25_mhz = 25000; - u32 f38_4_mhz = 38400; + u32 f19_2_mhz = 19200000; + u32 f24_mhz = 24000000; + u32 f25_mhz = 25000000; + u32 f38_4_mhz = 38400000; u32 crystal_clock = (rpm_config_reg & GEN11_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_MASK) >> GEN11_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_SHIFT; @@ -736,9 +736,9 @@ static u32 gen11_get_crystal_clock_freq(struct drm_i915_private *dev_priv, static u32 read_timestamp_frequency(struct drm_i915_private *dev_priv) { - u32 f12_5_mhz = 12500; - u32 f19_2_mhz = 19200; - u32 f24_mhz = 24000; + u32 f12_5_mhz = 12500000; + u32 f19_2_mhz = 19200000; + u32 f24_mhz = 24000000; if (INTEL_GEN(dev_priv) <= 4) { /* PRMs say: @@ -747,7 +747,7 @@ static u32 read_timestamp_frequency(struct drm_i915_private *dev_priv) * hclks." (through the “Clocking Configuration” * (“CLKCFG”) MCHBAR register) */ - return RUNTIME_INFO(dev_priv)->rawclk_freq / 16; + return RUNTIME_INFO(dev_priv)->rawclk_freq * 1000 / 16; } else if (INTEL_GEN(dev_priv) <= 8) { /* PRMs say: * @@ -1048,11 +1048,11 @@ void intel_device_info_runtime_init(struct drm_i915_private *dev_priv) drm_dbg(&dev_priv->drm, "rawclk rate: %d kHz\n", runtime->rawclk_freq); /* Initialize command stream timestamp frequency */ - runtime->cs_timestamp_frequency_khz = + runtime->cs_timestamp_frequency_hz = read_timestamp_frequency(dev_priv); - if (runtime->cs_timestamp_frequency_khz) { + if (runtime->cs_timestamp_frequency_hz) { runtime->cs_timestamp_period_ns = - div_u64(1e6, runtime->cs_timestamp_frequency_khz); + div_u64(1e9, runtime->cs_timestamp_frequency_hz); drm_dbg(&dev_priv->drm, "CS timestamp wraparound in %lldms\n", div_u64(mul_u32_u32(runtime->cs_timestamp_period_ns, diff --git a/drivers/gpu/drm/i915/intel_device_info.h b/drivers/gpu/drm/i915/intel_device_info.h index 69c9257c6c6a..62e03ffa377e 100644 --- a/drivers/gpu/drm/i915/intel_device_info.h +++ b/drivers/gpu/drm/i915/intel_device_info.h @@ -221,7 +221,7 @@ struct intel_runtime_info { u32 rawclk_freq; - u32 cs_timestamp_frequency_khz; + u32 cs_timestamp_frequency_hz; u32 cs_timestamp_period_ns; /* Media engine access to SFC per instance */ diff --git a/drivers/gpu/drm/i915/selftests/i915_perf.c b/drivers/gpu/drm/i915/selftests/i915_perf.c index ca0c9dbab713..5edfcfd42a31 100644 --- a/drivers/gpu/drm/i915/selftests/i915_perf.c +++ b/drivers/gpu/drm/i915/selftests/i915_perf.c @@ -262,8 +262,8 @@ static int live_noa_delay(void *arg) delay = intel_read_status_page(stream->engine, 0x102); delay -= intel_read_status_page(stream->engine, 0x100); - delay = div_u64(mul_u32_u32(delay, 1000 * 1000), - RUNTIME_INFO(i915)->cs_timestamp_frequency_khz); + delay = div_u64(mul_u32_u32(delay, 1000000000), + RUNTIME_INFO(i915)->cs_timestamp_frequency_hz); pr_info("GPU delay: %uns, expected %lluns\n", delay, expected); From 802a5820fc0c0f12b40280db3dbaaf8359b07243 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Mon, 2 Mar 2020 16:39:42 +0200 Subject: [PATCH 95/97] drm/i915: Extract i915_cs_timestamp_{ns_to_ticks,tick_to_ns}() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pull the code to do the CS timestamp ns<->ticks conversion into helpers and use them all over. The check in i915_perf_noa_delay_set() seems a bit dubious, so we switch it to do what I assume it wanted to do all along (ie. make sure the resulting delay in CS timestamp ticks doesn't exceed 32bits)? Cc: Lionel Landwerlin Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20200302143943.32676-5-ville.syrjala@linux.intel.com Reviewed-by: Chris Wilson --- drivers/gpu/drm/i915/i915_debugfs.c | 3 +-- drivers/gpu/drm/i915/i915_drv.h | 12 ++++++++++++ drivers/gpu/drm/i915/i915_perf.c | 7 ++----- drivers/gpu/drm/i915/intel_device_info.c | 2 +- drivers/gpu/drm/i915/selftests/i915_perf.c | 3 +-- 5 files changed, 17 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 1469ea1e85f2..4481feb34bc5 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -1404,13 +1404,12 @@ static int i915_perf_noa_delay_set(void *data, u64 val) { struct drm_i915_private *i915 = data; - const u32 clk = RUNTIME_INFO(i915)->cs_timestamp_frequency_hz / 1000; /* * This would lead to infinite waits as we're doing timestamp * difference on the CS with only 32bits. */ - if (val > mul_u32_u32(U32_MAX, clk)) + if (i915_cs_timestamp_ns_to_ticks(i915, val) > U32_MAX) return -EINVAL; atomic64_set(&i915->perf.noa_programming_delay, val); diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index ad373b57699a..13937175ab66 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1921,4 +1921,16 @@ i915_coherent_map_type(struct drm_i915_private *i915) return HAS_LLC(i915) ? I915_MAP_WB : I915_MAP_WC; } +static inline u64 i915_cs_timestamp_ns_to_ticks(struct drm_i915_private *i915, u64 val) +{ + return DIV_ROUND_UP_ULL(val * RUNTIME_INFO(i915)->cs_timestamp_frequency_hz, + 1000000000); +} + +static inline u64 i915_cs_timestamp_ticks_to_ns(struct drm_i915_private *i915, u64 val) +{ + return div_u64(val * 1000000000, + RUNTIME_INFO(i915)->cs_timestamp_frequency_hz); +} + #endif diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index 205327c6c342..f35712d04ba4 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -1612,9 +1612,7 @@ static int alloc_noa_wait(struct i915_perf_stream *stream) struct drm_i915_gem_object *bo; struct i915_vma *vma; const u64 delay_ticks = 0xffffffffffffffff - - DIV_ROUND_UP_ULL(atomic64_read(&stream->perf->noa_programming_delay) * - RUNTIME_INFO(i915)->cs_timestamp_frequency_hz, - 1000000000); + i915_cs_timestamp_ns_to_ticks(i915, atomic64_read(&stream->perf->noa_programming_delay)); const u32 base = stream->engine->mmio_base; #define CS_GPR(x) GEN8_RING_CS_GPR(base, x) u32 *batch, *ts0, *cs, *jump; @@ -3484,8 +3482,7 @@ i915_perf_open_ioctl_locked(struct i915_perf *perf, static u64 oa_exponent_to_ns(struct i915_perf *perf, int exponent) { - return div_u64(1000000000 * (2ULL << exponent), - RUNTIME_INFO(perf->i915)->cs_timestamp_frequency_hz); + return i915_cs_timestamp_ticks_to_ns(perf->i915, 2ULL << exponent); } /** diff --git a/drivers/gpu/drm/i915/intel_device_info.c b/drivers/gpu/drm/i915/intel_device_info.c index 7277d86f0d3f..8a635bd4d5d8 100644 --- a/drivers/gpu/drm/i915/intel_device_info.c +++ b/drivers/gpu/drm/i915/intel_device_info.c @@ -1052,7 +1052,7 @@ void intel_device_info_runtime_init(struct drm_i915_private *dev_priv) read_timestamp_frequency(dev_priv); if (runtime->cs_timestamp_frequency_hz) { runtime->cs_timestamp_period_ns = - div_u64(1e9, runtime->cs_timestamp_frequency_hz); + i915_cs_timestamp_ticks_to_ns(dev_priv, 1); drm_dbg(&dev_priv->drm, "CS timestamp wraparound in %lldms\n", div_u64(mul_u32_u32(runtime->cs_timestamp_period_ns, diff --git a/drivers/gpu/drm/i915/selftests/i915_perf.c b/drivers/gpu/drm/i915/selftests/i915_perf.c index 5edfcfd42a31..8eb3108f1767 100644 --- a/drivers/gpu/drm/i915/selftests/i915_perf.c +++ b/drivers/gpu/drm/i915/selftests/i915_perf.c @@ -262,8 +262,7 @@ static int live_noa_delay(void *arg) delay = intel_read_status_page(stream->engine, 0x102); delay -= intel_read_status_page(stream->engine, 0x100); - delay = div_u64(mul_u32_u32(delay, 1000000000), - RUNTIME_INFO(i915)->cs_timestamp_frequency_hz); + delay = i915_cs_timestamp_ticks_to_ns(i915, delay); pr_info("GPU delay: %uns, expected %lluns\n", delay, expected); From ca69a3c68e2150ffb73ff1ff7d2b5390d76b3eb9 Mon Sep 17 00:00:00 2001 From: Joonas Lahtinen Date: Fri, 30 Aug 2019 13:50:53 +0300 Subject: [PATCH 96/97] drm/i915: Document locking guidelines To ensure cross-driver locking compatibility, document the expected guidelines for implementing the GEM locking in i915. Note that this is a description of how things should end up after being reworked, and does not reflect the current state of things. v2: Use rst note:: tag (Rodrigo) Signed-off-by: Joonas Lahtinen Signed-off-by: Daniel Vetter Signed-off-by: Chris Wilson Cc: Dave Airlie Cc: Matthew Auld Cc: Abdiel Janulgue Cc: CQ Tang Reviewed-by: Rodrigo Vivi Acked-by: Dave Airlie Link: https://patchwork.freedesktop.org/patch/msgid/20190830105053.17491-1-joonas.lahtinen@linux.intel.com --- Documentation/gpu/i915.rst | 46 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 46 insertions(+) diff --git a/Documentation/gpu/i915.rst b/Documentation/gpu/i915.rst index 429b08aac797..33cc6ddf8f64 100644 --- a/Documentation/gpu/i915.rst +++ b/Documentation/gpu/i915.rst @@ -329,6 +329,52 @@ for execution also include a list of all locations within buffers that refer to GPU-addresses so that the kernel can edit the buffer correctly. This process is dubbed relocation. +Locking Guidelines +------------------ + +.. note:: + This is a description of how the locking should be after + refactoring is done. Does not necessarily reflect what the locking + looks like while WIP. + +#. All locking rules and interface contracts with cross-driver interfaces + (dma-buf, dma_fence) need to be followed. + +#. No struct_mutex anywhere in the code + +#. dma_resv will be the outermost lock (when needed) and ww_acquire_ctx + is to be hoisted at highest level and passed down within i915_gem_ctx + in the call chain + +#. While holding lru/memory manager (buddy, drm_mm, whatever) locks + system memory allocations are not allowed + + * Enforce this by priming lockdep (with fs_reclaim). If we + allocate memory while holding these looks we get a rehash + of the shrinker vs. struct_mutex saga, and that would be + real bad. + +#. Do not nest different lru/memory manager locks within each other. + Take them in turn to update memory allocations, relying on the object’s + dma_resv ww_mutex to serialize against other operations. + +#. The suggestion for lru/memory managers locks is that they are small + enough to be spinlocks. + +#. All features need to come with exhaustive kernel selftests and/or + IGT tests when appropriate + +#. All LMEM uAPI paths need to be fully restartable (_interruptible() + for all locks/waits/sleeps) + + * Error handling validation through signal injection. + Still the best strategy we have for validating GEM uAPI + corner cases. + Must be excessively used in the IGT, and we need to check + that we really have full path coverage of all error cases. + + * -EDEADLK handling with ww_mutex + GEM BO Management Implementation Details ---------------------------------------- From 3a36aa237e4ed04553c0998cf5f47eda3e206e4f Mon Sep 17 00:00:00 2001 From: Joonas Lahtinen Date: Fri, 15 May 2020 14:49:24 +0300 Subject: [PATCH 97/97] drm/i915: Update DRIVER_DATE to 20200515 Signed-off-by: Joonas Lahtinen --- drivers/gpu/drm/i915/i915_drv.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 13937175ab66..e993c64a02b0 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -108,8 +108,8 @@ #define DRIVER_NAME "i915" #define DRIVER_DESC "Intel Graphics" -#define DRIVER_DATE "20200430" -#define DRIVER_TIMESTAMP 1588234401 +#define DRIVER_DATE "20200515" +#define DRIVER_TIMESTAMP 1589543364 struct drm_i915_gem_object;