mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2025-01-06 05:13:18 +00:00
Merge tag 'drm-intel-gt-next-2021-12-09' of git://anongit.freedesktop.org/drm/drm-intel into drm-next
Core Changes: - Fix PENDING_ERROR leak in dma_fence_array_signaled() (Thomas Hellström) Driver Changes: - Fix runtime PM handling during PXP suspend (Tejas Upadhyay) - Improve eviction performance on discrete by implementing async TTM moves (Thomas Hellström, Maarten Lankhorst) - Improve robustness of error capture under memory pressure (Thomas Hellström) - Fix GuC PMU versus GPU reset handling (Umesh Nerlige Ramappa) - Use per device iommu check (Tvrtko Ursulin) - Make error capture work with async migration (Thomas Hellström) - Revert incorrect implementation of Wa_1508744258 causing hangs (José Roberto de Souza) - Disable coarse power gating on some DG2 steppings workaround (Matt Roper) - Add IC cache invalidation workaround on DG2 (Ramalingam C) - Move two Icelake workarounds to the right place (Raviteja Goud Talla) - Fix error pointer dereference in i915_gem_do_execbuffer() (Dan Carpenter) - Fixup a couple of generic and DG2 specific issues in migration code (Matthew Auld) - Fix kernel-doc warnings in i915_drm_object.c (Randy Dunlap) - Drop stealing of bits from i915_sw_fence function pointer (Matthew Brost) - Introduce new macros for i915 PTE (Michael Cheng) - Prep work for engine reset by reset domain lookup (Tejas Upadhyay) - Fixup drm-intel-gt-next build failure (Matthew Auld) - Fix live_engine_busy_stats selftests in GuC mode (Umesh Nerlige Ramappa) - Remove dma_resv_prune (Maarten Lankhorst) - Preserve huge pages enablement after driver reload (Matthew Auld) - Fix a NULL pointer dereference in igt_request_rewind() (selftests) (Zhou Qingyang) - Add workaround numbers to GEN7_COMMON_SLICE_CHICKEN1 whitelisting (José Roberto de Souza) - Increase timeouts in i915_gem_contexts selftests to handle GuC being slower (Bruce Chang) Signed-off-by: Dave Airlie <airlied@redhat.com> # Conflicts: # drivers/gpu/drm/i915/display/intel_fbc.c From: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/YbIBOeqhn+nPzaYD@tursulin-mobl2
This commit is contained in:
commit
211b4dbc07
@ -9496,6 +9496,7 @@ INTEL DRM DRIVERS (excluding Poulsbo, Moorestown and derivative chipsets)
|
||||
M: Jani Nikula <jani.nikula@linux.intel.com>
|
||||
M: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
|
||||
M: Rodrigo Vivi <rodrigo.vivi@intel.com>
|
||||
M: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>
|
||||
L: intel-gfx@lists.freedesktop.org
|
||||
S: Supported
|
||||
W: https://01.org/linuxgraphics/
|
||||
|
@ -104,7 +104,11 @@ static bool dma_fence_array_signaled(struct dma_fence *fence)
|
||||
{
|
||||
struct dma_fence_array *array = to_dma_fence_array(fence);
|
||||
|
||||
return atomic_read(&array->num_pending) <= 0;
|
||||
if (atomic_read(&array->num_pending) > 0)
|
||||
return false;
|
||||
|
||||
dma_fence_array_clear_pending_error(array);
|
||||
return true;
|
||||
}
|
||||
|
||||
static void dma_fence_array_release(struct dma_fence *fence)
|
||||
|
@ -153,6 +153,7 @@ gem-y += \
|
||||
gem/i915_gem_throttle.o \
|
||||
gem/i915_gem_tiling.o \
|
||||
gem/i915_gem_ttm.o \
|
||||
gem/i915_gem_ttm_move.o \
|
||||
gem/i915_gem_ttm_pm.o \
|
||||
gem/i915_gem_userptr.o \
|
||||
gem/i915_gem_wait.o \
|
||||
@ -172,6 +173,7 @@ i915-y += \
|
||||
i915_trace_points.o \
|
||||
i915_ttm_buddy_manager.o \
|
||||
i915_vma.o \
|
||||
i915_vma_snapshot.o \
|
||||
intel_wopcm.o
|
||||
|
||||
# general-purpose microcontroller (GuC) support
|
||||
|
@ -634,7 +634,7 @@ static unsigned int intel_bw_data_rate(struct drm_i915_private *dev_priv,
|
||||
for_each_pipe(dev_priv, pipe)
|
||||
data_rate += bw_state->data_rate[pipe];
|
||||
|
||||
if (DISPLAY_VER(dev_priv) >= 13 && intel_vtd_active())
|
||||
if (DISPLAY_VER(dev_priv) >= 13 && intel_vtd_active(dev_priv))
|
||||
data_rate = data_rate * 105 / 100;
|
||||
|
||||
return data_rate;
|
||||
|
@ -1293,7 +1293,7 @@ static bool needs_async_flip_vtd_wa(const struct intel_crtc_state *crtc_state)
|
||||
{
|
||||
struct drm_i915_private *i915 = to_i915(crtc_state->uapi.crtc->dev);
|
||||
|
||||
return crtc_state->uapi.async_flip && intel_vtd_active() &&
|
||||
return crtc_state->uapi.async_flip && intel_vtd_active(i915) &&
|
||||
(DISPLAY_VER(i915) == 9 || IS_BROADWELL(i915) || IS_HASWELL(i915));
|
||||
}
|
||||
|
||||
@ -8815,7 +8815,7 @@ static void intel_atomic_commit_work(struct work_struct *work)
|
||||
intel_atomic_commit_tail(state);
|
||||
}
|
||||
|
||||
static int __i915_sw_fence_call
|
||||
static int
|
||||
intel_atomic_commit_ready(struct i915_sw_fence *fence,
|
||||
enum i915_sw_fence_notify notify)
|
||||
{
|
||||
|
@ -1677,7 +1677,7 @@ static int intel_sanitize_fbc_option(struct drm_i915_private *i915)
|
||||
static bool need_fbc_vtd_wa(struct drm_i915_private *i915)
|
||||
{
|
||||
/* WaFbcTurnOffFbcWhenHyperVisorIsUsed:skl,bxt */
|
||||
if (intel_vtd_active() &&
|
||||
if (intel_vtd_active(i915) &&
|
||||
(IS_SKYLAKE(i915) || IS_BROXTON(i915))) {
|
||||
drm_info(&i915->drm,
|
||||
"Disabling framebuffer compression (FBC) to prevent screen flicker with VT-d enabled\n");
|
||||
|
@ -69,10 +69,16 @@ static struct clflush *clflush_work_create(struct drm_i915_gem_object *obj)
|
||||
bool i915_gem_clflush_object(struct drm_i915_gem_object *obj,
|
||||
unsigned int flags)
|
||||
{
|
||||
struct drm_i915_private *i915 = to_i915(obj->base.dev);
|
||||
struct clflush *clflush;
|
||||
|
||||
assert_object_held(obj);
|
||||
|
||||
if (IS_DGFX(i915)) {
|
||||
WARN_ON_ONCE(obj->cache_dirty);
|
||||
return false;
|
||||
}
|
||||
|
||||
/*
|
||||
* Stolen memory is always coherent with the GPU as it is explicitly
|
||||
* marked as wc by the system, or the system is cache-coherent.
|
||||
@ -105,16 +111,24 @@ bool i915_gem_clflush_object(struct drm_i915_gem_object *obj,
|
||||
if (clflush) {
|
||||
i915_sw_fence_await_reservation(&clflush->base.chain,
|
||||
obj->base.resv, NULL, true,
|
||||
i915_fence_timeout(to_i915(obj->base.dev)),
|
||||
i915_fence_timeout(i915),
|
||||
I915_FENCE_GFP);
|
||||
dma_resv_add_excl_fence(obj->base.resv, &clflush->base.dma);
|
||||
dma_fence_work_commit(&clflush->base);
|
||||
/*
|
||||
* We must have successfully populated the pages(since we are
|
||||
* holding a pin on the pages as per the flush worker) to reach
|
||||
* this point, which must mean we have already done the required
|
||||
* flush-on-acquire, hence resetting cache_dirty here should be
|
||||
* safe.
|
||||
*/
|
||||
obj->cache_dirty = false;
|
||||
} else if (obj->mm.pages) {
|
||||
__do_clflush(obj);
|
||||
obj->cache_dirty = false;
|
||||
} else {
|
||||
GEM_BUG_ON(obj->write_domain != I915_GEM_DOMAIN_CPU);
|
||||
}
|
||||
|
||||
obj->cache_dirty = false;
|
||||
return true;
|
||||
}
|
||||
|
@ -1001,7 +1001,7 @@ static void free_engines_rcu(struct rcu_head *rcu)
|
||||
free_engines(engines);
|
||||
}
|
||||
|
||||
static int __i915_sw_fence_call
|
||||
static int
|
||||
engines_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state)
|
||||
{
|
||||
struct i915_gem_engines *engines =
|
||||
|
@ -248,8 +248,19 @@ static int i915_gem_object_get_pages_dmabuf(struct drm_i915_gem_object *obj)
|
||||
if (IS_ERR(pages))
|
||||
return PTR_ERR(pages);
|
||||
|
||||
/* XXX: consider doing a vmap flush or something */
|
||||
if (!HAS_LLC(i915) || i915_gem_object_can_bypass_llc(obj))
|
||||
/*
|
||||
* DG1 is special here since it still snoops transactions even with
|
||||
* CACHE_NONE. This is not the case with other HAS_SNOOP platforms. We
|
||||
* might need to revisit this as we add new discrete platforms.
|
||||
*
|
||||
* XXX: Consider doing a vmap flush or something, where possible.
|
||||
* Currently we just do a heavy handed wbinvd_on_all_cpus() here since
|
||||
* the underlying sg_table might not even point to struct pages, so we
|
||||
* can't just call drm_clflush_sg or similar, like we do elsewhere in
|
||||
* the driver.
|
||||
*/
|
||||
if (i915_gem_object_can_bypass_llc(obj) ||
|
||||
(!HAS_LLC(i915) && !IS_DG1(i915)))
|
||||
wbinvd_on_all_cpus();
|
||||
|
||||
sg_page_sizes = i915_sg_dma_sizes(pages->sgl);
|
||||
|
@ -18,10 +18,32 @@
|
||||
|
||||
static bool gpu_write_needs_clflush(struct drm_i915_gem_object *obj)
|
||||
{
|
||||
struct drm_i915_private *i915 = to_i915(obj->base.dev);
|
||||
|
||||
if (IS_DGFX(i915))
|
||||
return false;
|
||||
|
||||
return !(obj->cache_level == I915_CACHE_NONE ||
|
||||
obj->cache_level == I915_CACHE_WT);
|
||||
}
|
||||
|
||||
bool i915_gem_cpu_write_needs_clflush(struct drm_i915_gem_object *obj)
|
||||
{
|
||||
struct drm_i915_private *i915 = to_i915(obj->base.dev);
|
||||
|
||||
if (obj->cache_dirty)
|
||||
return false;
|
||||
|
||||
if (!(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE))
|
||||
return true;
|
||||
|
||||
if (IS_DGFX(i915))
|
||||
return false;
|
||||
|
||||
/* Currently in use by HW (display engine)? Keep flushed. */
|
||||
return i915_gem_object_is_framebuffer(obj);
|
||||
}
|
||||
|
||||
static void
|
||||
flush_write_domain(struct drm_i915_gem_object *obj, unsigned int flush_domains)
|
||||
{
|
||||
|
@ -29,6 +29,7 @@
|
||||
#include "i915_gem_ioctls.h"
|
||||
#include "i915_trace.h"
|
||||
#include "i915_user_extensions.h"
|
||||
#include "i915_vma_snapshot.h"
|
||||
|
||||
struct eb_vma {
|
||||
struct i915_vma *vma;
|
||||
@ -307,11 +308,15 @@ struct i915_execbuffer {
|
||||
|
||||
struct eb_fence *fences;
|
||||
unsigned long num_fences;
|
||||
#if IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR)
|
||||
struct i915_capture_list *capture_lists[MAX_ENGINE_INSTANCE + 1];
|
||||
#endif
|
||||
};
|
||||
|
||||
static int eb_parse(struct i915_execbuffer *eb);
|
||||
static int eb_pin_engine(struct i915_execbuffer *eb, bool throttle);
|
||||
static void eb_unpin_engine(struct i915_execbuffer *eb);
|
||||
static void eb_capture_release(struct i915_execbuffer *eb);
|
||||
|
||||
static inline bool eb_use_cmdparser(const struct i915_execbuffer *eb)
|
||||
{
|
||||
@ -990,7 +995,7 @@ static int eb_validate_vmas(struct i915_execbuffer *eb)
|
||||
}
|
||||
|
||||
if (!(ev->flags & EXEC_OBJECT_WRITE)) {
|
||||
err = dma_resv_reserve_shared(vma->resv, 1);
|
||||
err = dma_resv_reserve_shared(vma->obj->base.resv, 1);
|
||||
if (err)
|
||||
return err;
|
||||
}
|
||||
@ -1043,6 +1048,7 @@ static void eb_release_vmas(struct i915_execbuffer *eb, bool final)
|
||||
i915_vma_put(vma);
|
||||
}
|
||||
|
||||
eb_capture_release(eb);
|
||||
eb_unpin_engine(eb);
|
||||
}
|
||||
|
||||
@ -1880,6 +1886,100 @@ eb_find_first_request_added(struct i915_execbuffer *eb)
|
||||
return NULL;
|
||||
}
|
||||
|
||||
#if IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR)
|
||||
|
||||
/* Stage with GFP_KERNEL allocations before we enter the signaling critical path */
|
||||
static void eb_capture_stage(struct i915_execbuffer *eb)
|
||||
{
|
||||
const unsigned int count = eb->buffer_count;
|
||||
unsigned int i = count, j;
|
||||
struct i915_vma_snapshot *vsnap;
|
||||
|
||||
while (i--) {
|
||||
struct eb_vma *ev = &eb->vma[i];
|
||||
struct i915_vma *vma = ev->vma;
|
||||
unsigned int flags = ev->flags;
|
||||
|
||||
if (!(flags & EXEC_OBJECT_CAPTURE))
|
||||
continue;
|
||||
|
||||
vsnap = i915_vma_snapshot_alloc(GFP_KERNEL);
|
||||
if (!vsnap)
|
||||
continue;
|
||||
|
||||
i915_vma_snapshot_init(vsnap, vma, "user");
|
||||
for_each_batch_create_order(eb, j) {
|
||||
struct i915_capture_list *capture;
|
||||
|
||||
capture = kmalloc(sizeof(*capture), GFP_KERNEL);
|
||||
if (!capture)
|
||||
continue;
|
||||
|
||||
capture->next = eb->capture_lists[j];
|
||||
capture->vma_snapshot = i915_vma_snapshot_get(vsnap);
|
||||
eb->capture_lists[j] = capture;
|
||||
}
|
||||
i915_vma_snapshot_put(vsnap);
|
||||
}
|
||||
}
|
||||
|
||||
/* Commit once we're in the critical path */
|
||||
static void eb_capture_commit(struct i915_execbuffer *eb)
|
||||
{
|
||||
unsigned int j;
|
||||
|
||||
for_each_batch_create_order(eb, j) {
|
||||
struct i915_request *rq = eb->requests[j];
|
||||
|
||||
if (!rq)
|
||||
break;
|
||||
|
||||
rq->capture_list = eb->capture_lists[j];
|
||||
eb->capture_lists[j] = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Release anything that didn't get committed due to errors.
|
||||
* The capture_list will otherwise be freed at request retire.
|
||||
*/
|
||||
static void eb_capture_release(struct i915_execbuffer *eb)
|
||||
{
|
||||
unsigned int j;
|
||||
|
||||
for_each_batch_create_order(eb, j) {
|
||||
if (eb->capture_lists[j]) {
|
||||
i915_request_free_capture_list(eb->capture_lists[j]);
|
||||
eb->capture_lists[j] = NULL;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void eb_capture_list_clear(struct i915_execbuffer *eb)
|
||||
{
|
||||
memset(eb->capture_lists, 0, sizeof(eb->capture_lists));
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
static void eb_capture_stage(struct i915_execbuffer *eb)
|
||||
{
|
||||
}
|
||||
|
||||
static void eb_capture_commit(struct i915_execbuffer *eb)
|
||||
{
|
||||
}
|
||||
|
||||
static void eb_capture_release(struct i915_execbuffer *eb)
|
||||
{
|
||||
}
|
||||
|
||||
static void eb_capture_list_clear(struct i915_execbuffer *eb)
|
||||
{
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
static int eb_move_to_gpu(struct i915_execbuffer *eb)
|
||||
{
|
||||
const unsigned int count = eb->buffer_count;
|
||||
@ -1894,23 +1994,6 @@ static int eb_move_to_gpu(struct i915_execbuffer *eb)
|
||||
|
||||
assert_vma_held(vma);
|
||||
|
||||
if (flags & EXEC_OBJECT_CAPTURE) {
|
||||
struct i915_capture_list *capture;
|
||||
|
||||
for_each_batch_create_order(eb, j) {
|
||||
if (!eb->requests[j])
|
||||
break;
|
||||
|
||||
capture = kmalloc(sizeof(*capture), GFP_KERNEL);
|
||||
if (capture) {
|
||||
capture->next =
|
||||
eb->requests[j]->capture_list;
|
||||
capture->vma = vma;
|
||||
eb->requests[j]->capture_list = capture;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* If the GPU is not _reading_ through the CPU cache, we need
|
||||
* to make sure that any writes (both previous GPU writes from
|
||||
@ -1990,6 +2073,8 @@ static int eb_move_to_gpu(struct i915_execbuffer *eb)
|
||||
|
||||
/* Unconditionally flush any chipset caches (for streaming writes). */
|
||||
intel_gt_chipset_flush(eb->gt);
|
||||
eb_capture_commit(eb);
|
||||
|
||||
return 0;
|
||||
|
||||
err_skip:
|
||||
@ -2164,7 +2249,7 @@ static int eb_parse(struct i915_execbuffer *eb)
|
||||
goto err_trampoline;
|
||||
}
|
||||
|
||||
err = dma_resv_reserve_shared(shadow->resv, 1);
|
||||
err = dma_resv_reserve_shared(shadow->obj->base.resv, 1);
|
||||
if (err)
|
||||
goto err_trampoline;
|
||||
|
||||
@ -3114,7 +3199,7 @@ eb_requests_create(struct i915_execbuffer *eb, struct dma_fence *in_fence,
|
||||
/* Allocate a request for this batch buffer nice and early. */
|
||||
eb->requests[i] = i915_request_create(eb_find_context(eb, i));
|
||||
if (IS_ERR(eb->requests[i])) {
|
||||
out_fence = ERR_PTR(PTR_ERR(eb->requests[i]));
|
||||
out_fence = ERR_CAST(eb->requests[i]);
|
||||
eb->requests[i] = NULL;
|
||||
return out_fence;
|
||||
}
|
||||
@ -3132,13 +3217,14 @@ eb_requests_create(struct i915_execbuffer *eb, struct dma_fence *in_fence,
|
||||
}
|
||||
|
||||
/*
|
||||
* Whilst this request exists, batch_obj will be on the
|
||||
* active_list, and so will hold the active reference. Only when
|
||||
* this request is retired will the batch_obj be moved onto
|
||||
* the inactive_list and lose its active reference. Hence we do
|
||||
* not need to explicitly hold another reference here.
|
||||
* Not really on stack, but we don't want to call
|
||||
* kfree on the batch_snapshot when we put it, so use the
|
||||
* _onstack interface.
|
||||
*/
|
||||
eb->requests[i]->batch = eb->batches[i]->vma;
|
||||
if (eb->batches[i]->vma)
|
||||
i915_vma_snapshot_init_onstack(&eb->requests[i]->batch_snapshot,
|
||||
eb->batches[i]->vma,
|
||||
"batch");
|
||||
if (eb->batch_pool) {
|
||||
GEM_BUG_ON(intel_context_is_parallel(eb->context));
|
||||
intel_gt_buffer_pool_mark_active(eb->batch_pool,
|
||||
@ -3187,6 +3273,8 @@ i915_gem_do_execbuffer(struct drm_device *dev,
|
||||
eb.fences = NULL;
|
||||
eb.num_fences = 0;
|
||||
|
||||
eb_capture_list_clear(&eb);
|
||||
|
||||
memset(eb.requests, 0, sizeof(struct i915_request *) *
|
||||
ARRAY_SIZE(eb.requests));
|
||||
eb.composite_fence = NULL;
|
||||
@ -3273,10 +3361,12 @@ i915_gem_do_execbuffer(struct drm_device *dev,
|
||||
}
|
||||
|
||||
ww_acquire_done(&eb.ww.ctx);
|
||||
eb_capture_stage(&eb);
|
||||
|
||||
out_fence = eb_requests_create(&eb, in_fence, out_fence_fd);
|
||||
if (IS_ERR(out_fence)) {
|
||||
err = PTR_ERR(out_fence);
|
||||
out_fence = NULL;
|
||||
if (eb.requests[0])
|
||||
goto err_request;
|
||||
else
|
||||
|
@ -145,24 +145,10 @@ static const struct drm_i915_gem_object_ops i915_gem_object_internal_ops = {
|
||||
.put_pages = i915_gem_object_put_pages_internal,
|
||||
};
|
||||
|
||||
/**
|
||||
* i915_gem_object_create_internal: create an object with volatile pages
|
||||
* @i915: the i915 device
|
||||
* @size: the size in bytes of backing storage to allocate for the object
|
||||
*
|
||||
* Creates a new object that wraps some internal memory for private use.
|
||||
* This object is not backed by swappable storage, and as such its contents
|
||||
* are volatile and only valid whilst pinned. If the object is reaped by the
|
||||
* shrinker, its pages and data will be discarded. Equally, it is not a full
|
||||
* GEM object and so not valid for access from userspace. This makes it useful
|
||||
* for hardware interfaces like ringbuffers (which are pinned from the time
|
||||
* the request is written to the time the hardware stops accessing it), but
|
||||
* not for contexts (which need to be preserved when not active for later
|
||||
* reuse). Note that it is not cleared upon allocation.
|
||||
*/
|
||||
struct drm_i915_gem_object *
|
||||
i915_gem_object_create_internal(struct drm_i915_private *i915,
|
||||
phys_addr_t size)
|
||||
__i915_gem_object_create_internal(struct drm_i915_private *i915,
|
||||
const struct drm_i915_gem_object_ops *ops,
|
||||
phys_addr_t size)
|
||||
{
|
||||
static struct lock_class_key lock_class;
|
||||
struct drm_i915_gem_object *obj;
|
||||
@ -179,7 +165,7 @@ i915_gem_object_create_internal(struct drm_i915_private *i915,
|
||||
return ERR_PTR(-ENOMEM);
|
||||
|
||||
drm_gem_private_object_init(&i915->drm, &obj->base, size);
|
||||
i915_gem_object_init(obj, &i915_gem_object_internal_ops, &lock_class, 0);
|
||||
i915_gem_object_init(obj, ops, &lock_class, 0);
|
||||
obj->mem_flags |= I915_BO_FLAG_STRUCT_PAGE;
|
||||
|
||||
/*
|
||||
@ -199,3 +185,25 @@ i915_gem_object_create_internal(struct drm_i915_private *i915,
|
||||
|
||||
return obj;
|
||||
}
|
||||
|
||||
/**
|
||||
* i915_gem_object_create_internal: create an object with volatile pages
|
||||
* @i915: the i915 device
|
||||
* @size: the size in bytes of backing storage to allocate for the object
|
||||
*
|
||||
* Creates a new object that wraps some internal memory for private use.
|
||||
* This object is not backed by swappable storage, and as such its contents
|
||||
* are volatile and only valid whilst pinned. If the object is reaped by the
|
||||
* shrinker, its pages and data will be discarded. Equally, it is not a full
|
||||
* GEM object and so not valid for access from userspace. This makes it useful
|
||||
* for hardware interfaces like ringbuffers (which are pinned from the time
|
||||
* the request is written to the time the hardware stops accessing it), but
|
||||
* not for contexts (which need to be preserved when not active for later
|
||||
* reuse). Note that it is not cleared upon allocation.
|
||||
*/
|
||||
struct drm_i915_gem_object *
|
||||
i915_gem_object_create_internal(struct drm_i915_private *i915,
|
||||
phys_addr_t size)
|
||||
{
|
||||
return __i915_gem_object_create_internal(i915, &i915_gem_object_internal_ops, size);
|
||||
}
|
||||
|
@ -31,6 +31,7 @@
|
||||
#include "i915_gem_context.h"
|
||||
#include "i915_gem_mman.h"
|
||||
#include "i915_gem_object.h"
|
||||
#include "i915_gem_ttm.h"
|
||||
#include "i915_memcpy.h"
|
||||
#include "i915_trace.h"
|
||||
|
||||
@ -91,7 +92,7 @@ void i915_gem_object_init(struct drm_i915_gem_object *obj,
|
||||
}
|
||||
|
||||
/**
|
||||
* i915_gem_object_fini - Clean up a GEM object initialization
|
||||
* __i915_gem_object_fini - Clean up a GEM object initialization
|
||||
* @obj: The gem object to cleanup
|
||||
*
|
||||
* This function cleans up gem object fields that are set up by
|
||||
@ -107,25 +108,29 @@ void __i915_gem_object_fini(struct drm_i915_gem_object *obj)
|
||||
}
|
||||
|
||||
/**
|
||||
* Mark up the object's coherency levels for a given cache_level
|
||||
* i915_gem_object_set_cache_coherency - Mark up the object's coherency levels
|
||||
* for a given cache_level
|
||||
* @obj: #drm_i915_gem_object
|
||||
* @cache_level: cache level
|
||||
*/
|
||||
void i915_gem_object_set_cache_coherency(struct drm_i915_gem_object *obj,
|
||||
unsigned int cache_level)
|
||||
{
|
||||
struct drm_i915_private *i915 = to_i915(obj->base.dev);
|
||||
|
||||
obj->cache_level = cache_level;
|
||||
|
||||
if (cache_level != I915_CACHE_NONE)
|
||||
obj->cache_coherent = (I915_BO_CACHE_COHERENT_FOR_READ |
|
||||
I915_BO_CACHE_COHERENT_FOR_WRITE);
|
||||
else if (HAS_LLC(to_i915(obj->base.dev)))
|
||||
else if (HAS_LLC(i915))
|
||||
obj->cache_coherent = I915_BO_CACHE_COHERENT_FOR_READ;
|
||||
else
|
||||
obj->cache_coherent = 0;
|
||||
|
||||
obj->cache_dirty =
|
||||
!(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE);
|
||||
!(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE) &&
|
||||
!IS_DGFX(i915);
|
||||
}
|
||||
|
||||
bool i915_gem_object_can_bypass_llc(struct drm_i915_gem_object *obj)
|
||||
@ -363,15 +368,6 @@ static void i915_gem_free_object(struct drm_gem_object *gem_obj)
|
||||
*/
|
||||
atomic_inc(&i915->mm.free_count);
|
||||
|
||||
/*
|
||||
* This serializes freeing with the shrinker. Since the free
|
||||
* is delayed, first by RCU then by the workqueue, we want the
|
||||
* shrinker to be able to free pages of unreferenced objects,
|
||||
* or else we may oom whilst there are plenty of deferred
|
||||
* freed objects.
|
||||
*/
|
||||
i915_gem_object_make_unshrinkable(obj);
|
||||
|
||||
/*
|
||||
* Since we require blocking on struct_mutex to unbind the freed
|
||||
* object from the GPU before releasing resources back to the
|
||||
@ -456,7 +452,7 @@ i915_gem_object_read_from_page_iomap(struct drm_i915_gem_object *obj, u64 offset
|
||||
* from can't cross a page boundary. The caller must ensure that @obj pages
|
||||
* are pinned and that @obj is synced wrt. any related writes.
|
||||
*
|
||||
* Returns 0 on success or -ENODEV if the type of @obj's backing store is
|
||||
* Return: %0 on success or -ENODEV if the type of @obj's backing store is
|
||||
* unsupported.
|
||||
*/
|
||||
int i915_gem_object_read_from_page(struct drm_i915_gem_object *obj, u64 offset, void *dst, int size)
|
||||
@ -732,6 +728,57 @@ static const struct drm_gem_object_funcs i915_gem_object_funcs = {
|
||||
.export = i915_gem_prime_export,
|
||||
};
|
||||
|
||||
/**
|
||||
* i915_gem_object_get_moving_fence - Get the object's moving fence if any
|
||||
* @obj: The object whose moving fence to get.
|
||||
*
|
||||
* A non-signaled moving fence means that there is an async operation
|
||||
* pending on the object that needs to be waited on before setting up
|
||||
* any GPU- or CPU PTEs to the object's pages.
|
||||
*
|
||||
* Return: A refcounted pointer to the object's moving fence if any,
|
||||
* NULL otherwise.
|
||||
*/
|
||||
struct dma_fence *
|
||||
i915_gem_object_get_moving_fence(struct drm_i915_gem_object *obj)
|
||||
{
|
||||
return dma_fence_get(i915_gem_to_ttm(obj)->moving);
|
||||
}
|
||||
|
||||
/**
|
||||
* i915_gem_object_wait_moving_fence - Wait for the object's moving fence if any
|
||||
* @obj: The object whose moving fence to wait for.
|
||||
* @intr: Whether to wait interruptible.
|
||||
*
|
||||
* If the moving fence signaled without an error, it is detached from the
|
||||
* object and put.
|
||||
*
|
||||
* Return: 0 if successful, -ERESTARTSYS if the wait was interrupted,
|
||||
* negative error code if the async operation represented by the
|
||||
* moving fence failed.
|
||||
*/
|
||||
int i915_gem_object_wait_moving_fence(struct drm_i915_gem_object *obj,
|
||||
bool intr)
|
||||
{
|
||||
struct dma_fence *fence = i915_gem_to_ttm(obj)->moving;
|
||||
int ret;
|
||||
|
||||
assert_object_held(obj);
|
||||
if (!fence)
|
||||
return 0;
|
||||
|
||||
ret = dma_fence_wait(fence, intr);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
if (fence->error)
|
||||
return fence->error;
|
||||
|
||||
i915_gem_to_ttm(obj)->moving = NULL;
|
||||
dma_fence_put(fence);
|
||||
return 0;
|
||||
}
|
||||
|
||||
#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
|
||||
#include "selftests/huge_gem_object.c"
|
||||
#include "selftests/huge_pages.c"
|
||||
|
@ -93,7 +93,6 @@ void i915_gem_flush_free_objects(struct drm_i915_private *i915);
|
||||
|
||||
struct sg_table *
|
||||
__i915_gem_object_unset_pages(struct drm_i915_gem_object *obj);
|
||||
void i915_gem_object_truncate(struct drm_i915_gem_object *obj);
|
||||
|
||||
/**
|
||||
* i915_gem_object_lookup_rcu - look up a temporary GEM object from its handle
|
||||
@ -295,6 +294,12 @@ i915_gem_object_is_shrinkable(const struct drm_i915_gem_object *obj)
|
||||
return i915_gem_object_type_has(obj, I915_GEM_OBJECT_IS_SHRINKABLE);
|
||||
}
|
||||
|
||||
static inline bool
|
||||
i915_gem_object_has_self_managed_shrink_list(const struct drm_i915_gem_object *obj)
|
||||
{
|
||||
return i915_gem_object_type_has(obj, I915_GEM_OBJECT_SELF_MANAGED_SHRINK_LIST);
|
||||
}
|
||||
|
||||
static inline bool
|
||||
i915_gem_object_is_proxy(const struct drm_i915_gem_object *obj)
|
||||
{
|
||||
@ -449,7 +454,7 @@ i915_gem_object_unpin_pages(struct drm_i915_gem_object *obj)
|
||||
}
|
||||
|
||||
int __i915_gem_object_put_pages(struct drm_i915_gem_object *obj);
|
||||
void i915_gem_object_truncate(struct drm_i915_gem_object *obj);
|
||||
int i915_gem_object_truncate(struct drm_i915_gem_object *obj);
|
||||
void i915_gem_object_writeback(struct drm_i915_gem_object *obj);
|
||||
|
||||
/**
|
||||
@ -512,11 +517,18 @@ i915_gem_object_finish_access(struct drm_i915_gem_object *obj)
|
||||
i915_gem_object_unpin_pages(obj);
|
||||
}
|
||||
|
||||
struct dma_fence *
|
||||
i915_gem_object_get_moving_fence(struct drm_i915_gem_object *obj);
|
||||
|
||||
int i915_gem_object_wait_moving_fence(struct drm_i915_gem_object *obj,
|
||||
bool intr);
|
||||
|
||||
void i915_gem_object_set_cache_coherency(struct drm_i915_gem_object *obj,
|
||||
unsigned int cache_level);
|
||||
bool i915_gem_object_can_bypass_llc(struct drm_i915_gem_object *obj);
|
||||
void i915_gem_object_flush_if_display(struct drm_i915_gem_object *obj);
|
||||
void i915_gem_object_flush_if_display_locked(struct drm_i915_gem_object *obj);
|
||||
bool i915_gem_cpu_write_needs_clflush(struct drm_i915_gem_object *obj);
|
||||
|
||||
int __must_check
|
||||
i915_gem_object_set_to_wc_domain(struct drm_i915_gem_object *obj, bool write);
|
||||
@ -533,25 +545,15 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
|
||||
|
||||
void i915_gem_object_make_unshrinkable(struct drm_i915_gem_object *obj);
|
||||
void i915_gem_object_make_shrinkable(struct drm_i915_gem_object *obj);
|
||||
void __i915_gem_object_make_shrinkable(struct drm_i915_gem_object *obj);
|
||||
void __i915_gem_object_make_purgeable(struct drm_i915_gem_object *obj);
|
||||
void i915_gem_object_make_purgeable(struct drm_i915_gem_object *obj);
|
||||
|
||||
static inline bool cpu_write_needs_clflush(struct drm_i915_gem_object *obj)
|
||||
{
|
||||
if (obj->cache_dirty)
|
||||
return false;
|
||||
|
||||
if (!(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE))
|
||||
return true;
|
||||
|
||||
/* Currently in use by HW (display engine)? Keep flushed. */
|
||||
return i915_gem_object_is_framebuffer(obj);
|
||||
}
|
||||
|
||||
static inline void __start_cpu_write(struct drm_i915_gem_object *obj)
|
||||
{
|
||||
obj->read_domains = I915_GEM_DOMAIN_CPU;
|
||||
obj->write_domain = I915_GEM_DOMAIN_CPU;
|
||||
if (cpu_write_needs_clflush(obj))
|
||||
if (i915_gem_cpu_write_needs_clflush(obj))
|
||||
obj->cache_dirty = true;
|
||||
}
|
||||
|
||||
@ -613,6 +615,14 @@ int i915_gem_object_wait_migration(struct drm_i915_gem_object *obj,
|
||||
bool i915_gem_object_placement_possible(struct drm_i915_gem_object *obj,
|
||||
enum intel_memory_type type);
|
||||
|
||||
int shmem_sg_alloc_table(struct drm_i915_private *i915, struct sg_table *st,
|
||||
size_t size, struct intel_memory_region *mr,
|
||||
struct address_space *mapping,
|
||||
unsigned int max_segment);
|
||||
void shmem_sg_free_table(struct sg_table *st, struct address_space *mapping,
|
||||
bool dirty, bool backup);
|
||||
void __shmem_writeback(size_t size, struct address_space *mapping);
|
||||
|
||||
#ifdef CONFIG_MMU_NOTIFIER
|
||||
static inline bool
|
||||
i915_gem_object_is_userptr(struct drm_i915_gem_object *obj)
|
||||
|
@ -34,9 +34,11 @@ struct i915_lut_handle {
|
||||
|
||||
struct drm_i915_gem_object_ops {
|
||||
unsigned int flags;
|
||||
#define I915_GEM_OBJECT_IS_SHRINKABLE BIT(1)
|
||||
#define I915_GEM_OBJECT_IS_PROXY BIT(2)
|
||||
#define I915_GEM_OBJECT_NO_MMAP BIT(3)
|
||||
#define I915_GEM_OBJECT_IS_SHRINKABLE BIT(1)
|
||||
/* Skip the shrinker management in set_pages/unset_pages */
|
||||
#define I915_GEM_OBJECT_SELF_MANAGED_SHRINK_LIST BIT(2)
|
||||
#define I915_GEM_OBJECT_IS_PROXY BIT(3)
|
||||
#define I915_GEM_OBJECT_NO_MMAP BIT(4)
|
||||
|
||||
/* Interface between the GEM object and its backing storage.
|
||||
* get_pages() is called once prior to the use of the associated set
|
||||
@ -54,8 +56,11 @@ struct drm_i915_gem_object_ops {
|
||||
int (*get_pages)(struct drm_i915_gem_object *obj);
|
||||
void (*put_pages)(struct drm_i915_gem_object *obj,
|
||||
struct sg_table *pages);
|
||||
void (*truncate)(struct drm_i915_gem_object *obj);
|
||||
int (*truncate)(struct drm_i915_gem_object *obj);
|
||||
void (*writeback)(struct drm_i915_gem_object *obj);
|
||||
int (*shrinker_release_pages)(struct drm_i915_gem_object *obj,
|
||||
bool no_gpu_wait,
|
||||
bool should_writeback);
|
||||
|
||||
int (*pread)(struct drm_i915_gem_object *obj,
|
||||
const struct drm_i915_gem_pread *arg);
|
||||
@ -486,8 +491,36 @@ struct drm_i915_gem_object {
|
||||
* instead go through the pin/unpin interfaces.
|
||||
*/
|
||||
atomic_t pages_pin_count;
|
||||
|
||||
/**
|
||||
* @shrink_pin: Prevents the pages from being made visible to
|
||||
* the shrinker, while the shrink_pin is non-zero. Most users
|
||||
* should pretty much never have to care about this, outside of
|
||||
* some special use cases.
|
||||
*
|
||||
* By default most objects will start out as visible to the
|
||||
* shrinker(if I915_GEM_OBJECT_IS_SHRINKABLE) as soon as the
|
||||
* backing pages are attached to the object, like in
|
||||
* __i915_gem_object_set_pages(). They will then be removed the
|
||||
* shrinker list once the pages are released.
|
||||
*
|
||||
* The @shrink_pin is incremented by calling
|
||||
* i915_gem_object_make_unshrinkable(), which will also remove
|
||||
* the object from the shrinker list, if the pin count was zero.
|
||||
*
|
||||
* Callers will then typically call
|
||||
* i915_gem_object_make_shrinkable() or
|
||||
* i915_gem_object_make_purgeable() to decrement the pin count,
|
||||
* and make the pages visible again.
|
||||
*/
|
||||
atomic_t shrink_pin;
|
||||
|
||||
/**
|
||||
* @ttm_shrinkable: True when the object is using shmem pages
|
||||
* underneath. Protected by the object lock.
|
||||
*/
|
||||
bool ttm_shrinkable;
|
||||
|
||||
/**
|
||||
* Priority list of potential placements for this object.
|
||||
*/
|
||||
@ -512,6 +545,7 @@ struct drm_i915_gem_object {
|
||||
*/
|
||||
struct list_head region_link;
|
||||
|
||||
struct i915_refct_sgt *rsgt;
|
||||
struct sg_table *pages;
|
||||
void *mapping;
|
||||
|
||||
@ -547,7 +581,7 @@ struct drm_i915_gem_object {
|
||||
struct i915_gem_object_page_iter get_dma_page;
|
||||
|
||||
/**
|
||||
* Element within i915->mm.unbound_list or i915->mm.bound_list,
|
||||
* Element within i915->mm.shrink_list or i915->mm.purge_list,
|
||||
* locked by i915->mm.obj_lock.
|
||||
*/
|
||||
struct list_head link;
|
||||
@ -565,7 +599,7 @@ struct drm_i915_gem_object {
|
||||
} mm;
|
||||
|
||||
struct {
|
||||
struct sg_table *cached_io_st;
|
||||
struct i915_refct_sgt *cached_io_rsgt;
|
||||
struct i915_gem_object_page_iter get_io_page;
|
||||
struct drm_i915_gem_object *backup;
|
||||
bool created:1;
|
||||
|
@ -26,6 +26,7 @@ void __i915_gem_object_set_pages(struct drm_i915_gem_object *obj,
|
||||
|
||||
/* Make the pages coherent with the GPU (flushing any swapin). */
|
||||
if (obj->cache_dirty) {
|
||||
WARN_ON_ONCE(IS_DGFX(i915));
|
||||
obj->write_domain = 0;
|
||||
if (i915_gem_object_has_struct_page(obj))
|
||||
drm_clflush_sg(pages);
|
||||
@ -68,7 +69,7 @@ void __i915_gem_object_set_pages(struct drm_i915_gem_object *obj,
|
||||
shrinkable = false;
|
||||
}
|
||||
|
||||
if (shrinkable) {
|
||||
if (shrinkable && !i915_gem_object_has_self_managed_shrink_list(obj)) {
|
||||
struct list_head *list;
|
||||
unsigned long flags;
|
||||
|
||||
@ -158,11 +159,13 @@ int i915_gem_object_pin_pages_unlocked(struct drm_i915_gem_object *obj)
|
||||
}
|
||||
|
||||
/* Immediately discard the backing storage */
|
||||
void i915_gem_object_truncate(struct drm_i915_gem_object *obj)
|
||||
int i915_gem_object_truncate(struct drm_i915_gem_object *obj)
|
||||
{
|
||||
drm_gem_free_mmap_offset(&obj->base);
|
||||
if (obj->ops->truncate)
|
||||
obj->ops->truncate(obj);
|
||||
return obj->ops->truncate(obj);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Try to discard unwanted pages */
|
||||
@ -208,7 +211,8 @@ __i915_gem_object_unset_pages(struct drm_i915_gem_object *obj)
|
||||
if (i915_gem_object_is_volatile(obj))
|
||||
obj->mm.madv = I915_MADV_WILLNEED;
|
||||
|
||||
i915_gem_object_make_unshrinkable(obj);
|
||||
if (!i915_gem_object_has_self_managed_shrink_list(obj))
|
||||
i915_gem_object_make_unshrinkable(obj);
|
||||
|
||||
if (obj->mm.mapping) {
|
||||
unmap_object(obj, page_mask_bits(obj->mm.mapping));
|
||||
@ -414,6 +418,12 @@ void *i915_gem_object_pin_map(struct drm_i915_gem_object *obj,
|
||||
}
|
||||
|
||||
if (!ptr) {
|
||||
err = i915_gem_object_wait_moving_fence(obj, true);
|
||||
if (err) {
|
||||
ptr = ERR_PTR(err);
|
||||
goto err_unpin;
|
||||
}
|
||||
|
||||
if (GEM_WARN_ON(type == I915_MAP_WC &&
|
||||
!static_cpu_has(X86_FEATURE_PAT)))
|
||||
ptr = ERR_PTR(-ENODEV);
|
||||
|
@ -11,7 +11,7 @@
|
||||
void i915_gem_object_init_memory_region(struct drm_i915_gem_object *obj,
|
||||
struct intel_memory_region *mem)
|
||||
{
|
||||
obj->mm.region = intel_memory_region_get(mem);
|
||||
obj->mm.region = mem;
|
||||
|
||||
mutex_lock(&mem->objects.lock);
|
||||
list_add(&obj->mm.region_link, &mem->objects.list);
|
||||
@ -25,8 +25,6 @@ void i915_gem_object_release_memory_region(struct drm_i915_gem_object *obj)
|
||||
mutex_lock(&mem->objects.lock);
|
||||
list_del(&obj->mm.region_link);
|
||||
mutex_unlock(&mem->objects.lock);
|
||||
|
||||
intel_memory_region_put(mem);
|
||||
}
|
||||
|
||||
struct drm_i915_gem_object *
|
||||
|
@ -25,62 +25,67 @@ static void check_release_pagevec(struct pagevec *pvec)
|
||||
cond_resched();
|
||||
}
|
||||
|
||||
static int shmem_get_pages(struct drm_i915_gem_object *obj)
|
||||
void shmem_sg_free_table(struct sg_table *st, struct address_space *mapping,
|
||||
bool dirty, bool backup)
|
||||
{
|
||||
struct drm_i915_private *i915 = to_i915(obj->base.dev);
|
||||
struct intel_memory_region *mem = obj->mm.region;
|
||||
const unsigned long page_count = obj->base.size / PAGE_SIZE;
|
||||
unsigned long i;
|
||||
struct address_space *mapping;
|
||||
struct sg_table *st;
|
||||
struct scatterlist *sg;
|
||||
struct sgt_iter sgt_iter;
|
||||
struct pagevec pvec;
|
||||
struct page *page;
|
||||
|
||||
mapping_clear_unevictable(mapping);
|
||||
|
||||
pagevec_init(&pvec);
|
||||
for_each_sgt_page(page, sgt_iter, st) {
|
||||
if (dirty)
|
||||
set_page_dirty(page);
|
||||
|
||||
if (backup)
|
||||
mark_page_accessed(page);
|
||||
|
||||
if (!pagevec_add(&pvec, page))
|
||||
check_release_pagevec(&pvec);
|
||||
}
|
||||
if (pagevec_count(&pvec))
|
||||
check_release_pagevec(&pvec);
|
||||
|
||||
sg_free_table(st);
|
||||
}
|
||||
|
||||
int shmem_sg_alloc_table(struct drm_i915_private *i915, struct sg_table *st,
|
||||
size_t size, struct intel_memory_region *mr,
|
||||
struct address_space *mapping,
|
||||
unsigned int max_segment)
|
||||
{
|
||||
const unsigned long page_count = size / PAGE_SIZE;
|
||||
unsigned long i;
|
||||
struct scatterlist *sg;
|
||||
struct page *page;
|
||||
unsigned long last_pfn = 0; /* suppress gcc warning */
|
||||
unsigned int max_segment = i915_sg_segment_size();
|
||||
unsigned int sg_page_sizes;
|
||||
gfp_t noreclaim;
|
||||
int ret;
|
||||
|
||||
/*
|
||||
* Assert that the object is not currently in any GPU domain. As it
|
||||
* wasn't in the GTT, there shouldn't be any way it could have been in
|
||||
* a GPU cache
|
||||
*/
|
||||
GEM_BUG_ON(obj->read_domains & I915_GEM_GPU_DOMAINS);
|
||||
GEM_BUG_ON(obj->write_domain & I915_GEM_GPU_DOMAINS);
|
||||
|
||||
/*
|
||||
* If there's no chance of allocating enough pages for the whole
|
||||
* object, bail early.
|
||||
*/
|
||||
if (obj->base.size > resource_size(&mem->region))
|
||||
if (size > resource_size(&mr->region))
|
||||
return -ENOMEM;
|
||||
|
||||
st = kmalloc(sizeof(*st), GFP_KERNEL);
|
||||
if (!st)
|
||||
if (sg_alloc_table(st, page_count, GFP_KERNEL))
|
||||
return -ENOMEM;
|
||||
|
||||
rebuild_st:
|
||||
if (sg_alloc_table(st, page_count, GFP_KERNEL)) {
|
||||
kfree(st);
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
/*
|
||||
* Get the list of pages out of our struct file. They'll be pinned
|
||||
* at this point until we release them.
|
||||
*
|
||||
* Fail silently without starting the shrinker
|
||||
*/
|
||||
mapping = obj->base.filp->f_mapping;
|
||||
mapping_set_unevictable(mapping);
|
||||
noreclaim = mapping_gfp_constraint(mapping, ~__GFP_RECLAIM);
|
||||
noreclaim |= __GFP_NORETRY | __GFP_NOWARN;
|
||||
|
||||
sg = st->sgl;
|
||||
st->nents = 0;
|
||||
sg_page_sizes = 0;
|
||||
for (i = 0; i < page_count; i++) {
|
||||
const unsigned int shrink[] = {
|
||||
I915_SHRINK_BOUND | I915_SHRINK_UNBOUND,
|
||||
@ -135,10 +140,9 @@ static int shmem_get_pages(struct drm_i915_gem_object *obj)
|
||||
if (!i ||
|
||||
sg->length >= max_segment ||
|
||||
page_to_pfn(page) != last_pfn + 1) {
|
||||
if (i) {
|
||||
sg_page_sizes |= sg->length;
|
||||
if (i)
|
||||
sg = sg_next(sg);
|
||||
}
|
||||
|
||||
st->nents++;
|
||||
sg_set_page(sg, page, PAGE_SIZE, 0);
|
||||
} else {
|
||||
@ -149,14 +153,67 @@ static int shmem_get_pages(struct drm_i915_gem_object *obj)
|
||||
/* Check that the i965g/gm workaround works. */
|
||||
GEM_BUG_ON(gfp & __GFP_DMA32 && last_pfn >= 0x00100000UL);
|
||||
}
|
||||
if (sg) { /* loop terminated early; short sg table */
|
||||
sg_page_sizes |= sg->length;
|
||||
if (sg) /* loop terminated early; short sg table */
|
||||
sg_mark_end(sg);
|
||||
}
|
||||
|
||||
/* Trim unused sg entries to avoid wasting memory. */
|
||||
i915_sg_trim(st);
|
||||
|
||||
return 0;
|
||||
err_sg:
|
||||
sg_mark_end(sg);
|
||||
if (sg != st->sgl) {
|
||||
shmem_sg_free_table(st, mapping, false, false);
|
||||
} else {
|
||||
mapping_clear_unevictable(mapping);
|
||||
sg_free_table(st);
|
||||
}
|
||||
|
||||
/*
|
||||
* shmemfs first checks if there is enough memory to allocate the page
|
||||
* and reports ENOSPC should there be insufficient, along with the usual
|
||||
* ENOMEM for a genuine allocation failure.
|
||||
*
|
||||
* We use ENOSPC in our driver to mean that we have run out of aperture
|
||||
* space and so want to translate the error from shmemfs back to our
|
||||
* usual understanding of ENOMEM.
|
||||
*/
|
||||
if (ret == -ENOSPC)
|
||||
ret = -ENOMEM;
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int shmem_get_pages(struct drm_i915_gem_object *obj)
|
||||
{
|
||||
struct drm_i915_private *i915 = to_i915(obj->base.dev);
|
||||
struct intel_memory_region *mem = obj->mm.region;
|
||||
struct address_space *mapping = obj->base.filp->f_mapping;
|
||||
const unsigned long page_count = obj->base.size / PAGE_SIZE;
|
||||
unsigned int max_segment = i915_sg_segment_size();
|
||||
struct sg_table *st;
|
||||
struct sgt_iter sgt_iter;
|
||||
struct page *page;
|
||||
int ret;
|
||||
|
||||
/*
|
||||
* Assert that the object is not currently in any GPU domain. As it
|
||||
* wasn't in the GTT, there shouldn't be any way it could have been in
|
||||
* a GPU cache
|
||||
*/
|
||||
GEM_BUG_ON(obj->read_domains & I915_GEM_GPU_DOMAINS);
|
||||
GEM_BUG_ON(obj->write_domain & I915_GEM_GPU_DOMAINS);
|
||||
|
||||
rebuild_st:
|
||||
st = kmalloc(sizeof(*st), GFP_KERNEL);
|
||||
if (!st)
|
||||
return -ENOMEM;
|
||||
|
||||
ret = shmem_sg_alloc_table(i915, st, obj->base.size, mem, mapping,
|
||||
max_segment);
|
||||
if (ret)
|
||||
goto err_st;
|
||||
|
||||
ret = i915_gem_gtt_prepare_pages(obj, st);
|
||||
if (ret) {
|
||||
/*
|
||||
@ -168,6 +225,7 @@ static int shmem_get_pages(struct drm_i915_gem_object *obj)
|
||||
for_each_sgt_page(page, sgt_iter, st)
|
||||
put_page(page);
|
||||
sg_free_table(st);
|
||||
kfree(st);
|
||||
|
||||
max_segment = PAGE_SIZE;
|
||||
goto rebuild_st;
|
||||
@ -185,28 +243,12 @@ static int shmem_get_pages(struct drm_i915_gem_object *obj)
|
||||
if (i915_gem_object_can_bypass_llc(obj))
|
||||
obj->cache_dirty = true;
|
||||
|
||||
__i915_gem_object_set_pages(obj, st, sg_page_sizes);
|
||||
__i915_gem_object_set_pages(obj, st, i915_sg_dma_sizes(st->sgl));
|
||||
|
||||
return 0;
|
||||
|
||||
err_sg:
|
||||
sg_mark_end(sg);
|
||||
err_pages:
|
||||
mapping_clear_unevictable(mapping);
|
||||
if (sg != st->sgl) {
|
||||
struct pagevec pvec;
|
||||
|
||||
pagevec_init(&pvec);
|
||||
for_each_sgt_page(page, sgt_iter, st) {
|
||||
if (!pagevec_add(&pvec, page))
|
||||
check_release_pagevec(&pvec);
|
||||
}
|
||||
if (pagevec_count(&pvec))
|
||||
check_release_pagevec(&pvec);
|
||||
}
|
||||
sg_free_table(st);
|
||||
kfree(st);
|
||||
|
||||
shmem_sg_free_table(st, mapping, false, false);
|
||||
/*
|
||||
* shmemfs first checks if there is enough memory to allocate the page
|
||||
* and reports ENOSPC should there be insufficient, along with the usual
|
||||
@ -216,13 +258,16 @@ static int shmem_get_pages(struct drm_i915_gem_object *obj)
|
||||
* space and so want to translate the error from shmemfs back to our
|
||||
* usual understanding of ENOMEM.
|
||||
*/
|
||||
err_st:
|
||||
if (ret == -ENOSPC)
|
||||
ret = -ENOMEM;
|
||||
|
||||
kfree(st);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void
|
||||
static int
|
||||
shmem_truncate(struct drm_i915_gem_object *obj)
|
||||
{
|
||||
/*
|
||||
@ -234,12 +279,12 @@ shmem_truncate(struct drm_i915_gem_object *obj)
|
||||
shmem_truncate_range(file_inode(obj->base.filp), 0, (loff_t)-1);
|
||||
obj->mm.madv = __I915_MADV_PURGED;
|
||||
obj->mm.pages = ERR_PTR(-EFAULT);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void
|
||||
shmem_writeback(struct drm_i915_gem_object *obj)
|
||||
void __shmem_writeback(size_t size, struct address_space *mapping)
|
||||
{
|
||||
struct address_space *mapping;
|
||||
struct writeback_control wbc = {
|
||||
.sync_mode = WB_SYNC_NONE,
|
||||
.nr_to_write = SWAP_CLUSTER_MAX,
|
||||
@ -255,10 +300,9 @@ shmem_writeback(struct drm_i915_gem_object *obj)
|
||||
* instead of invoking writeback so they are aged and paged out
|
||||
* as normal.
|
||||
*/
|
||||
mapping = obj->base.filp->f_mapping;
|
||||
|
||||
/* Begin writeback on each dirty page */
|
||||
for (i = 0; i < obj->base.size >> PAGE_SHIFT; i++) {
|
||||
for (i = 0; i < size >> PAGE_SHIFT; i++) {
|
||||
struct page *page;
|
||||
|
||||
page = find_lock_page(mapping, i);
|
||||
@ -281,6 +325,12 @@ shmem_writeback(struct drm_i915_gem_object *obj)
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
shmem_writeback(struct drm_i915_gem_object *obj)
|
||||
{
|
||||
__shmem_writeback(obj->base.size, obj->base.filp->f_mapping);
|
||||
}
|
||||
|
||||
void
|
||||
__i915_gem_object_release_shmem(struct drm_i915_gem_object *obj,
|
||||
struct sg_table *pages,
|
||||
@ -313,11 +363,6 @@ __i915_gem_object_release_shmem(struct drm_i915_gem_object *obj,
|
||||
|
||||
void i915_gem_object_put_pages_shmem(struct drm_i915_gem_object *obj, struct sg_table *pages)
|
||||
{
|
||||
struct sgt_iter sgt_iter;
|
||||
struct pagevec pvec;
|
||||
struct page *page;
|
||||
|
||||
GEM_WARN_ON(IS_DGFX(to_i915(obj->base.dev)));
|
||||
__i915_gem_object_release_shmem(obj, pages, true);
|
||||
|
||||
i915_gem_gtt_finish_pages(obj, pages);
|
||||
@ -325,25 +370,10 @@ void i915_gem_object_put_pages_shmem(struct drm_i915_gem_object *obj, struct sg_
|
||||
if (i915_gem_object_needs_bit17_swizzle(obj))
|
||||
i915_gem_object_save_bit_17_swizzle(obj, pages);
|
||||
|
||||
mapping_clear_unevictable(file_inode(obj->base.filp)->i_mapping);
|
||||
|
||||
pagevec_init(&pvec);
|
||||
for_each_sgt_page(page, sgt_iter, pages) {
|
||||
if (obj->mm.dirty)
|
||||
set_page_dirty(page);
|
||||
|
||||
if (obj->mm.madv == I915_MADV_WILLNEED)
|
||||
mark_page_accessed(page);
|
||||
|
||||
if (!pagevec_add(&pvec, page))
|
||||
check_release_pagevec(&pvec);
|
||||
}
|
||||
if (pagevec_count(&pvec))
|
||||
check_release_pagevec(&pvec);
|
||||
obj->mm.dirty = false;
|
||||
|
||||
sg_free_table(pages);
|
||||
shmem_sg_free_table(pages, file_inode(obj->base.filp)->i_mapping,
|
||||
obj->mm.dirty, obj->mm.madv == I915_MADV_WILLNEED);
|
||||
kfree(pages);
|
||||
obj->mm.dirty = false;
|
||||
}
|
||||
|
||||
static void
|
||||
@ -634,9 +664,10 @@ static int init_shmem(struct intel_memory_region *mem)
|
||||
return 0; /* Don't error, we can simply fallback to the kernel mnt */
|
||||
}
|
||||
|
||||
static void release_shmem(struct intel_memory_region *mem)
|
||||
static int release_shmem(struct intel_memory_region *mem)
|
||||
{
|
||||
i915_gemfs_fini(mem->i915);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static const struct intel_memory_region_ops shmem_region_ops = {
|
||||
|
@ -55,19 +55,25 @@ static bool unsafe_drop_pages(struct drm_i915_gem_object *obj,
|
||||
return false;
|
||||
}
|
||||
|
||||
static void try_to_writeback(struct drm_i915_gem_object *obj,
|
||||
unsigned int flags)
|
||||
static int try_to_writeback(struct drm_i915_gem_object *obj, unsigned int flags)
|
||||
{
|
||||
if (obj->ops->shrinker_release_pages)
|
||||
return obj->ops->shrinker_release_pages(obj,
|
||||
!(flags & I915_SHRINK_ACTIVE),
|
||||
flags & I915_SHRINK_WRITEBACK);
|
||||
|
||||
switch (obj->mm.madv) {
|
||||
case I915_MADV_DONTNEED:
|
||||
i915_gem_object_truncate(obj);
|
||||
return;
|
||||
return 0;
|
||||
case __I915_MADV_PURGED:
|
||||
return;
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (flags & I915_SHRINK_WRITEBACK)
|
||||
i915_gem_object_writeback(obj);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
@ -221,8 +227,8 @@ i915_gem_shrink(struct i915_gem_ww_ctx *ww,
|
||||
}
|
||||
|
||||
if (!__i915_gem_object_put_pages(obj)) {
|
||||
try_to_writeback(obj, shrink);
|
||||
count += obj->base.size >> PAGE_SHIFT;
|
||||
if (!try_to_writeback(obj, shrink))
|
||||
count += obj->base.size >> PAGE_SHIFT;
|
||||
}
|
||||
if (!ww)
|
||||
i915_gem_object_unlock(obj);
|
||||
@ -455,6 +461,16 @@ void i915_gem_shrinker_taints_mutex(struct drm_i915_private *i915,
|
||||
|
||||
#define obj_to_i915(obj__) to_i915((obj__)->base.dev)
|
||||
|
||||
/**
|
||||
* i915_gem_object_make_unshrinkable - Hide the object from the shrinker. By
|
||||
* default all object types that support shrinking(see IS_SHRINKABLE), will also
|
||||
* make the object visible to the shrinker after allocating the system memory
|
||||
* pages.
|
||||
* @obj: The GEM object.
|
||||
*
|
||||
* This is typically used for special kernel internal objects that can't be
|
||||
* easily processed by the shrinker, like if they are perma-pinned.
|
||||
*/
|
||||
void i915_gem_object_make_unshrinkable(struct drm_i915_gem_object *obj)
|
||||
{
|
||||
struct drm_i915_private *i915 = obj_to_i915(obj);
|
||||
@ -479,13 +495,12 @@ void i915_gem_object_make_unshrinkable(struct drm_i915_gem_object *obj)
|
||||
spin_unlock_irqrestore(&i915->mm.obj_lock, flags);
|
||||
}
|
||||
|
||||
static void __i915_gem_object_make_shrinkable(struct drm_i915_gem_object *obj,
|
||||
struct list_head *head)
|
||||
static void ___i915_gem_object_make_shrinkable(struct drm_i915_gem_object *obj,
|
||||
struct list_head *head)
|
||||
{
|
||||
struct drm_i915_private *i915 = obj_to_i915(obj);
|
||||
unsigned long flags;
|
||||
|
||||
GEM_BUG_ON(!i915_gem_object_has_pages(obj));
|
||||
if (!i915_gem_object_is_shrinkable(obj))
|
||||
return;
|
||||
|
||||
@ -505,14 +520,67 @@ static void __i915_gem_object_make_shrinkable(struct drm_i915_gem_object *obj,
|
||||
spin_unlock_irqrestore(&i915->mm.obj_lock, flags);
|
||||
}
|
||||
|
||||
void i915_gem_object_make_shrinkable(struct drm_i915_gem_object *obj)
|
||||
/**
|
||||
* __i915_gem_object_make_shrinkable - Move the object to the tail of the
|
||||
* shrinkable list. Objects on this list might be swapped out. Used with
|
||||
* WILLNEED objects.
|
||||
* @obj: The GEM object.
|
||||
*
|
||||
* DO NOT USE. This is intended to be called on very special objects that don't
|
||||
* yet have mm.pages, but are guaranteed to have potentially reclaimable pages
|
||||
* underneath.
|
||||
*/
|
||||
void __i915_gem_object_make_shrinkable(struct drm_i915_gem_object *obj)
|
||||
{
|
||||
__i915_gem_object_make_shrinkable(obj,
|
||||
&obj_to_i915(obj)->mm.shrink_list);
|
||||
___i915_gem_object_make_shrinkable(obj,
|
||||
&obj_to_i915(obj)->mm.shrink_list);
|
||||
}
|
||||
|
||||
/**
|
||||
* __i915_gem_object_make_purgeable - Move the object to the tail of the
|
||||
* purgeable list. Objects on this list might be swapped out. Used with
|
||||
* DONTNEED objects.
|
||||
* @obj: The GEM object.
|
||||
*
|
||||
* DO NOT USE. This is intended to be called on very special objects that don't
|
||||
* yet have mm.pages, but are guaranteed to have potentially reclaimable pages
|
||||
* underneath.
|
||||
*/
|
||||
void __i915_gem_object_make_purgeable(struct drm_i915_gem_object *obj)
|
||||
{
|
||||
___i915_gem_object_make_shrinkable(obj,
|
||||
&obj_to_i915(obj)->mm.purge_list);
|
||||
}
|
||||
|
||||
/**
|
||||
* i915_gem_object_make_shrinkable - Move the object to the tail of the
|
||||
* shrinkable list. Objects on this list might be swapped out. Used with
|
||||
* WILLNEED objects.
|
||||
* @obj: The GEM object.
|
||||
*
|
||||
* MUST only be called on objects which have backing pages.
|
||||
*
|
||||
* MUST be balanced with previous call to i915_gem_object_make_unshrinkable().
|
||||
*/
|
||||
void i915_gem_object_make_shrinkable(struct drm_i915_gem_object *obj)
|
||||
{
|
||||
GEM_BUG_ON(!i915_gem_object_has_pages(obj));
|
||||
__i915_gem_object_make_shrinkable(obj);
|
||||
}
|
||||
|
||||
/**
|
||||
* i915_gem_object_make_purgeable - Move the object to the tail of the purgeable
|
||||
* list. Used with DONTNEED objects. Unlike with shrinkable objects, the
|
||||
* shrinker will attempt to discard the backing pages, instead of trying to swap
|
||||
* them out.
|
||||
* @obj: The GEM object.
|
||||
*
|
||||
* MUST only be called on objects which have backing pages.
|
||||
*
|
||||
* MUST be balanced with previous call to i915_gem_object_make_unshrinkable().
|
||||
*/
|
||||
void i915_gem_object_make_purgeable(struct drm_i915_gem_object *obj)
|
||||
{
|
||||
__i915_gem_object_make_shrinkable(obj,
|
||||
&obj_to_i915(obj)->mm.purge_list);
|
||||
GEM_BUG_ON(!i915_gem_object_has_pages(obj));
|
||||
__i915_gem_object_make_purgeable(obj);
|
||||
}
|
||||
|
@ -399,7 +399,7 @@ static int i915_gem_init_stolen(struct intel_memory_region *mem)
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (intel_vtd_active() && GRAPHICS_VER(i915) < 8) {
|
||||
if (intel_vtd_active(i915) && GRAPHICS_VER(i915) < 8) {
|
||||
drm_notice(&i915->drm,
|
||||
"%s, disabling use of stolen memory\n",
|
||||
"DMAR active");
|
||||
@ -720,9 +720,10 @@ static int init_stolen_smem(struct intel_memory_region *mem)
|
||||
return i915_gem_init_stolen(mem);
|
||||
}
|
||||
|
||||
static void release_stolen_smem(struct intel_memory_region *mem)
|
||||
static int release_stolen_smem(struct intel_memory_region *mem)
|
||||
{
|
||||
i915_gem_cleanup_stolen(mem->i915);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static const struct intel_memory_region_ops i915_region_stolen_smem_ops = {
|
||||
@ -759,10 +760,11 @@ static int init_stolen_lmem(struct intel_memory_region *mem)
|
||||
return err;
|
||||
}
|
||||
|
||||
static void release_stolen_lmem(struct intel_memory_region *mem)
|
||||
static int release_stolen_lmem(struct intel_memory_region *mem)
|
||||
{
|
||||
io_mapping_fini(&mem->iomap);
|
||||
i915_gem_cleanup_stolen(mem->i915);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static const struct intel_memory_region_ops i915_region_stolen_lmem_ops = {
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -5,6 +5,8 @@
|
||||
#ifndef _I915_GEM_TTM_H_
|
||||
#define _I915_GEM_TTM_H_
|
||||
|
||||
#include <drm/ttm/ttm_placement.h>
|
||||
|
||||
#include "gem/i915_gem_object_types.h"
|
||||
|
||||
/**
|
||||
@ -35,7 +37,7 @@ void i915_ttm_bo_destroy(struct ttm_buffer_object *bo);
|
||||
static inline struct drm_i915_gem_object *
|
||||
i915_ttm_to_gem(struct ttm_buffer_object *bo)
|
||||
{
|
||||
if (GEM_WARN_ON(bo->destroy != i915_ttm_bo_destroy))
|
||||
if (bo->destroy != i915_ttm_bo_destroy)
|
||||
return NULL;
|
||||
|
||||
return container_of(bo, struct drm_i915_gem_object, __do_not_access);
|
||||
@ -47,10 +49,6 @@ int __i915_gem_ttm_object_init(struct intel_memory_region *mem,
|
||||
resource_size_t page_size,
|
||||
unsigned int flags);
|
||||
|
||||
int i915_gem_obj_copy_ttm(struct drm_i915_gem_object *dst,
|
||||
struct drm_i915_gem_object *src,
|
||||
bool allow_accel, bool intr);
|
||||
|
||||
/* Internal I915 TTM declarations and definitions below. */
|
||||
|
||||
#define I915_PL_LMEM0 TTM_PL_PRIV
|
||||
@ -60,4 +58,37 @@ int i915_gem_obj_copy_ttm(struct drm_i915_gem_object *dst,
|
||||
|
||||
struct ttm_placement *i915_ttm_sys_placement(void);
|
||||
|
||||
void i915_ttm_free_cached_io_rsgt(struct drm_i915_gem_object *obj);
|
||||
|
||||
struct i915_refct_sgt *
|
||||
i915_ttm_resource_get_st(struct drm_i915_gem_object *obj,
|
||||
struct ttm_resource *res);
|
||||
|
||||
void i915_ttm_adjust_lru(struct drm_i915_gem_object *obj);
|
||||
|
||||
int i915_ttm_purge(struct drm_i915_gem_object *obj);
|
||||
|
||||
/**
|
||||
* i915_ttm_gtt_binds_lmem - Should the memory be viewed as LMEM by the GTT?
|
||||
* @mem: struct ttm_resource representing the memory.
|
||||
*
|
||||
* Return: true if memory should be viewed as LMEM for GTT binding purposes,
|
||||
* false otherwise.
|
||||
*/
|
||||
static inline bool i915_ttm_gtt_binds_lmem(struct ttm_resource *mem)
|
||||
{
|
||||
return mem->mem_type != I915_PL_SYSTEM;
|
||||
}
|
||||
|
||||
/**
|
||||
* i915_ttm_cpu_maps_iomem - Should the memory be viewed as IOMEM by the CPU?
|
||||
* @mem: struct ttm_resource representing the memory.
|
||||
*
|
||||
* Return: true if memory should be viewed as IOMEM for CPU mapping purposes.
|
||||
*/
|
||||
static inline bool i915_ttm_cpu_maps_iomem(struct ttm_resource *mem)
|
||||
{
|
||||
/* Once / if we support GGTT, this is also false for cached ttm_tts */
|
||||
return mem->mem_type != I915_PL_SYSTEM;
|
||||
}
|
||||
#endif
|
||||
|
874
drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c
Normal file
874
drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c
Normal file
@ -0,0 +1,874 @@
|
||||
// SPDX-License-Identifier: MIT
|
||||
/*
|
||||
* Copyright © 2021 Intel Corporation
|
||||
*/
|
||||
|
||||
#include <linux/dma-fence-array.h>
|
||||
|
||||
#include <drm/ttm/ttm_bo_driver.h>
|
||||
|
||||
#include "i915_drv.h"
|
||||
#include "intel_memory_region.h"
|
||||
#include "intel_region_ttm.h"
|
||||
|
||||
#include "gem/i915_gem_object.h"
|
||||
#include "gem/i915_gem_region.h"
|
||||
#include "gem/i915_gem_ttm.h"
|
||||
#include "gem/i915_gem_ttm_move.h"
|
||||
|
||||
#include "gt/intel_engine_pm.h"
|
||||
#include "gt/intel_gt.h"
|
||||
#include "gt/intel_migrate.h"
|
||||
|
||||
/**
|
||||
* DOC: Selftest failure modes for failsafe migration:
|
||||
*
|
||||
* For fail_gpu_migration, the gpu blit scheduled is always a clear blit
|
||||
* rather than a copy blit, and then we force the failure paths as if
|
||||
* the blit fence returned an error.
|
||||
*
|
||||
* For fail_work_allocation we fail the kmalloc of the async worker, we
|
||||
* sync the gpu blit. If it then fails, or fail_gpu_migration is set to
|
||||
* true, then a memcpy operation is performed sync.
|
||||
*/
|
||||
#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
|
||||
static bool fail_gpu_migration;
|
||||
static bool fail_work_allocation;
|
||||
|
||||
void i915_ttm_migrate_set_failure_modes(bool gpu_migration,
|
||||
bool work_allocation)
|
||||
{
|
||||
fail_gpu_migration = gpu_migration;
|
||||
fail_work_allocation = work_allocation;
|
||||
}
|
||||
#endif
|
||||
|
||||
/**
|
||||
* DOC: Set of utilities to dynamically collect dependencies and
|
||||
* eventually coalesce them into a single fence which is fed into
|
||||
* the GT migration code, since it only accepts a single dependency
|
||||
* fence.
|
||||
* The single fence returned from these utilities, in the case of
|
||||
* dependencies from multiple fence contexts, a struct dma_fence_array,
|
||||
* since the i915 request code can break that up and await the individual
|
||||
* fences.
|
||||
*
|
||||
* Once we can do async unbinding, this is also needed to coalesce
|
||||
* the migration fence with the unbind fences.
|
||||
*
|
||||
* While collecting the individual dependencies, we store the refcounted
|
||||
* struct dma_fence pointers in a realloc-managed pointer array, since
|
||||
* that can be easily fed into a dma_fence_array. Other options are
|
||||
* available, like for example an xarray for similarity with drm/sched.
|
||||
* Can be changed easily if needed.
|
||||
*
|
||||
* A struct i915_deps need to be initialized using i915_deps_init().
|
||||
* If i915_deps_add_dependency() or i915_deps_add_resv() return an
|
||||
* error code they will internally call i915_deps_fini(), which frees
|
||||
* all internal references and allocations. After a call to
|
||||
* i915_deps_to_fence(), or i915_deps_sync(), the struct should similarly
|
||||
* be viewed as uninitialized.
|
||||
*
|
||||
* We might want to break this out into a separate file as a utility.
|
||||
*/
|
||||
|
||||
#define I915_DEPS_MIN_ALLOC_CHUNK 8U
|
||||
|
||||
/**
|
||||
* struct i915_deps - Collect dependencies into a single dma-fence
|
||||
* @single: Storage for pointer if the collection is a single fence.
|
||||
* @fence: Allocated array of fence pointers if more than a single fence;
|
||||
* otherwise points to the address of @single.
|
||||
* @num_deps: Current number of dependency fences.
|
||||
* @fences_size: Size of the @fences array in number of pointers.
|
||||
* @gfp: Allocation mode.
|
||||
*/
|
||||
struct i915_deps {
|
||||
struct dma_fence *single;
|
||||
struct dma_fence **fences;
|
||||
unsigned int num_deps;
|
||||
unsigned int fences_size;
|
||||
gfp_t gfp;
|
||||
};
|
||||
|
||||
static void i915_deps_reset_fences(struct i915_deps *deps)
|
||||
{
|
||||
if (deps->fences != &deps->single)
|
||||
kfree(deps->fences);
|
||||
deps->num_deps = 0;
|
||||
deps->fences_size = 1;
|
||||
deps->fences = &deps->single;
|
||||
}
|
||||
|
||||
static void i915_deps_init(struct i915_deps *deps, gfp_t gfp)
|
||||
{
|
||||
deps->fences = NULL;
|
||||
deps->gfp = gfp;
|
||||
i915_deps_reset_fences(deps);
|
||||
}
|
||||
|
||||
static void i915_deps_fini(struct i915_deps *deps)
|
||||
{
|
||||
unsigned int i;
|
||||
|
||||
for (i = 0; i < deps->num_deps; ++i)
|
||||
dma_fence_put(deps->fences[i]);
|
||||
|
||||
if (deps->fences != &deps->single)
|
||||
kfree(deps->fences);
|
||||
}
|
||||
|
||||
static int i915_deps_grow(struct i915_deps *deps, struct dma_fence *fence,
|
||||
const struct ttm_operation_ctx *ctx)
|
||||
{
|
||||
int ret;
|
||||
|
||||
if (deps->num_deps >= deps->fences_size) {
|
||||
unsigned int new_size = 2 * deps->fences_size;
|
||||
struct dma_fence **new_fences;
|
||||
|
||||
new_size = max(new_size, I915_DEPS_MIN_ALLOC_CHUNK);
|
||||
new_fences = kmalloc_array(new_size, sizeof(*new_fences), deps->gfp);
|
||||
if (!new_fences)
|
||||
goto sync;
|
||||
|
||||
memcpy(new_fences, deps->fences,
|
||||
deps->fences_size * sizeof(*new_fences));
|
||||
swap(new_fences, deps->fences);
|
||||
if (new_fences != &deps->single)
|
||||
kfree(new_fences);
|
||||
deps->fences_size = new_size;
|
||||
}
|
||||
deps->fences[deps->num_deps++] = dma_fence_get(fence);
|
||||
return 0;
|
||||
|
||||
sync:
|
||||
if (ctx->no_wait_gpu && !dma_fence_is_signaled(fence)) {
|
||||
ret = -EBUSY;
|
||||
goto unref;
|
||||
}
|
||||
|
||||
ret = dma_fence_wait(fence, ctx->interruptible);
|
||||
if (ret)
|
||||
goto unref;
|
||||
|
||||
ret = fence->error;
|
||||
if (ret)
|
||||
goto unref;
|
||||
|
||||
return 0;
|
||||
|
||||
unref:
|
||||
i915_deps_fini(deps);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int i915_deps_sync(struct i915_deps *deps,
|
||||
const struct ttm_operation_ctx *ctx)
|
||||
{
|
||||
struct dma_fence **fences = deps->fences;
|
||||
unsigned int i;
|
||||
int ret = 0;
|
||||
|
||||
for (i = 0; i < deps->num_deps; ++i, ++fences) {
|
||||
if (ctx->no_wait_gpu && !dma_fence_is_signaled(*fences)) {
|
||||
ret = -EBUSY;
|
||||
break;
|
||||
}
|
||||
|
||||
ret = dma_fence_wait(*fences, ctx->interruptible);
|
||||
if (!ret)
|
||||
ret = (*fences)->error;
|
||||
if (ret)
|
||||
break;
|
||||
}
|
||||
|
||||
i915_deps_fini(deps);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int i915_deps_add_dependency(struct i915_deps *deps,
|
||||
struct dma_fence *fence,
|
||||
const struct ttm_operation_ctx *ctx)
|
||||
{
|
||||
unsigned int i;
|
||||
int ret;
|
||||
|
||||
if (!fence)
|
||||
return 0;
|
||||
|
||||
if (dma_fence_is_signaled(fence)) {
|
||||
ret = fence->error;
|
||||
if (ret)
|
||||
i915_deps_fini(deps);
|
||||
return ret;
|
||||
}
|
||||
|
||||
for (i = 0; i < deps->num_deps; ++i) {
|
||||
struct dma_fence *entry = deps->fences[i];
|
||||
|
||||
if (!entry->context || entry->context != fence->context)
|
||||
continue;
|
||||
|
||||
if (dma_fence_is_later(fence, entry)) {
|
||||
dma_fence_put(entry);
|
||||
deps->fences[i] = dma_fence_get(fence);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
return i915_deps_grow(deps, fence, ctx);
|
||||
}
|
||||
|
||||
static struct dma_fence *i915_deps_to_fence(struct i915_deps *deps,
|
||||
const struct ttm_operation_ctx *ctx)
|
||||
{
|
||||
struct dma_fence_array *array;
|
||||
|
||||
if (deps->num_deps == 0)
|
||||
return NULL;
|
||||
|
||||
if (deps->num_deps == 1) {
|
||||
deps->num_deps = 0;
|
||||
return deps->fences[0];
|
||||
}
|
||||
|
||||
/*
|
||||
* TODO: Alter the allocation mode here to not try too hard to
|
||||
* make things async.
|
||||
*/
|
||||
array = dma_fence_array_create(deps->num_deps, deps->fences, 0, 0,
|
||||
false);
|
||||
if (!array)
|
||||
return ERR_PTR(i915_deps_sync(deps, ctx));
|
||||
|
||||
deps->fences = NULL;
|
||||
i915_deps_reset_fences(deps);
|
||||
|
||||
return &array->base;
|
||||
}
|
||||
|
||||
static int i915_deps_add_resv(struct i915_deps *deps, struct dma_resv *resv,
|
||||
bool all, const bool no_excl,
|
||||
const struct ttm_operation_ctx *ctx)
|
||||
{
|
||||
struct dma_resv_iter iter;
|
||||
struct dma_fence *fence;
|
||||
|
||||
dma_resv_assert_held(resv);
|
||||
dma_resv_for_each_fence(&iter, resv, all, fence) {
|
||||
int ret;
|
||||
|
||||
if (no_excl && dma_resv_iter_is_exclusive(&iter))
|
||||
continue;
|
||||
|
||||
ret = i915_deps_add_dependency(deps, fence, ctx);
|
||||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static enum i915_cache_level
|
||||
i915_ttm_cache_level(struct drm_i915_private *i915, struct ttm_resource *res,
|
||||
struct ttm_tt *ttm)
|
||||
{
|
||||
return ((HAS_LLC(i915) || HAS_SNOOP(i915)) &&
|
||||
!i915_ttm_gtt_binds_lmem(res) &&
|
||||
ttm->caching == ttm_cached) ? I915_CACHE_LLC :
|
||||
I915_CACHE_NONE;
|
||||
}
|
||||
|
||||
static struct intel_memory_region *
|
||||
i915_ttm_region(struct ttm_device *bdev, int ttm_mem_type)
|
||||
{
|
||||
struct drm_i915_private *i915 = container_of(bdev, typeof(*i915), bdev);
|
||||
|
||||
/* There's some room for optimization here... */
|
||||
GEM_BUG_ON(ttm_mem_type != I915_PL_SYSTEM &&
|
||||
ttm_mem_type < I915_PL_LMEM0);
|
||||
if (ttm_mem_type == I915_PL_SYSTEM)
|
||||
return intel_memory_region_lookup(i915, INTEL_MEMORY_SYSTEM,
|
||||
0);
|
||||
|
||||
return intel_memory_region_lookup(i915, INTEL_MEMORY_LOCAL,
|
||||
ttm_mem_type - I915_PL_LMEM0);
|
||||
}
|
||||
|
||||
/**
|
||||
* i915_ttm_adjust_domains_after_move - Adjust the GEM domains after a
|
||||
* TTM move
|
||||
* @obj: The gem object
|
||||
*/
|
||||
void i915_ttm_adjust_domains_after_move(struct drm_i915_gem_object *obj)
|
||||
{
|
||||
struct ttm_buffer_object *bo = i915_gem_to_ttm(obj);
|
||||
|
||||
if (i915_ttm_cpu_maps_iomem(bo->resource) || bo->ttm->caching != ttm_cached) {
|
||||
obj->write_domain = I915_GEM_DOMAIN_WC;
|
||||
obj->read_domains = I915_GEM_DOMAIN_WC;
|
||||
} else {
|
||||
obj->write_domain = I915_GEM_DOMAIN_CPU;
|
||||
obj->read_domains = I915_GEM_DOMAIN_CPU;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* i915_ttm_adjust_gem_after_move - Adjust the GEM state after a TTM move
|
||||
* @obj: The gem object
|
||||
*
|
||||
* Adjusts the GEM object's region, mem_flags and cache coherency after a
|
||||
* TTM move.
|
||||
*/
|
||||
void i915_ttm_adjust_gem_after_move(struct drm_i915_gem_object *obj)
|
||||
{
|
||||
struct ttm_buffer_object *bo = i915_gem_to_ttm(obj);
|
||||
unsigned int cache_level;
|
||||
unsigned int i;
|
||||
|
||||
/*
|
||||
* If object was moved to an allowable region, update the object
|
||||
* region to consider it migrated. Note that if it's currently not
|
||||
* in an allowable region, it's evicted and we don't update the
|
||||
* object region.
|
||||
*/
|
||||
if (intel_region_to_ttm_type(obj->mm.region) != bo->resource->mem_type) {
|
||||
for (i = 0; i < obj->mm.n_placements; ++i) {
|
||||
struct intel_memory_region *mr = obj->mm.placements[i];
|
||||
|
||||
if (intel_region_to_ttm_type(mr) == bo->resource->mem_type &&
|
||||
mr != obj->mm.region) {
|
||||
i915_gem_object_release_memory_region(obj);
|
||||
i915_gem_object_init_memory_region(obj, mr);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
obj->mem_flags &= ~(I915_BO_FLAG_STRUCT_PAGE | I915_BO_FLAG_IOMEM);
|
||||
|
||||
obj->mem_flags |= i915_ttm_cpu_maps_iomem(bo->resource) ? I915_BO_FLAG_IOMEM :
|
||||
I915_BO_FLAG_STRUCT_PAGE;
|
||||
|
||||
cache_level = i915_ttm_cache_level(to_i915(bo->base.dev), bo->resource,
|
||||
bo->ttm);
|
||||
i915_gem_object_set_cache_coherency(obj, cache_level);
|
||||
}
|
||||
|
||||
/**
|
||||
* i915_ttm_move_notify - Prepare an object for move
|
||||
* @bo: The ttm buffer object.
|
||||
*
|
||||
* This function prepares an object for move by removing all GPU bindings,
|
||||
* removing all CPU mapings and finally releasing the pages sg-table.
|
||||
*
|
||||
* Return: 0 if successful, negative error code on error.
|
||||
*/
|
||||
int i915_ttm_move_notify(struct ttm_buffer_object *bo)
|
||||
{
|
||||
struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo);
|
||||
int ret;
|
||||
|
||||
ret = i915_gem_object_unbind(obj, I915_GEM_OBJECT_UNBIND_ACTIVE);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
ret = __i915_gem_object_put_pages(obj);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static struct dma_fence *i915_ttm_accel_move(struct ttm_buffer_object *bo,
|
||||
bool clear,
|
||||
struct ttm_resource *dst_mem,
|
||||
struct ttm_tt *dst_ttm,
|
||||
struct sg_table *dst_st,
|
||||
struct dma_fence *dep)
|
||||
{
|
||||
struct drm_i915_private *i915 = container_of(bo->bdev, typeof(*i915),
|
||||
bdev);
|
||||
struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo);
|
||||
struct i915_request *rq;
|
||||
struct ttm_tt *src_ttm = bo->ttm;
|
||||
enum i915_cache_level src_level, dst_level;
|
||||
int ret;
|
||||
|
||||
if (!i915->gt.migrate.context || intel_gt_is_wedged(&i915->gt))
|
||||
return ERR_PTR(-EINVAL);
|
||||
|
||||
/* With fail_gpu_migration, we always perform a GPU clear. */
|
||||
if (I915_SELFTEST_ONLY(fail_gpu_migration))
|
||||
clear = true;
|
||||
|
||||
dst_level = i915_ttm_cache_level(i915, dst_mem, dst_ttm);
|
||||
if (clear) {
|
||||
if (bo->type == ttm_bo_type_kernel &&
|
||||
!I915_SELFTEST_ONLY(fail_gpu_migration))
|
||||
return ERR_PTR(-EINVAL);
|
||||
|
||||
intel_engine_pm_get(i915->gt.migrate.context->engine);
|
||||
ret = intel_context_migrate_clear(i915->gt.migrate.context, dep,
|
||||
dst_st->sgl, dst_level,
|
||||
i915_ttm_gtt_binds_lmem(dst_mem),
|
||||
0, &rq);
|
||||
} else {
|
||||
struct i915_refct_sgt *src_rsgt =
|
||||
i915_ttm_resource_get_st(obj, bo->resource);
|
||||
|
||||
if (IS_ERR(src_rsgt))
|
||||
return ERR_CAST(src_rsgt);
|
||||
|
||||
src_level = i915_ttm_cache_level(i915, bo->resource, src_ttm);
|
||||
intel_engine_pm_get(i915->gt.migrate.context->engine);
|
||||
ret = intel_context_migrate_copy(i915->gt.migrate.context,
|
||||
dep, src_rsgt->table.sgl,
|
||||
src_level,
|
||||
i915_ttm_gtt_binds_lmem(bo->resource),
|
||||
dst_st->sgl, dst_level,
|
||||
i915_ttm_gtt_binds_lmem(dst_mem),
|
||||
&rq);
|
||||
|
||||
i915_refct_sgt_put(src_rsgt);
|
||||
}
|
||||
|
||||
intel_engine_pm_put(i915->gt.migrate.context->engine);
|
||||
|
||||
if (ret && rq) {
|
||||
i915_request_wait(rq, 0, MAX_SCHEDULE_TIMEOUT);
|
||||
i915_request_put(rq);
|
||||
}
|
||||
|
||||
return ret ? ERR_PTR(ret) : &rq->fence;
|
||||
}
|
||||
|
||||
/**
|
||||
* struct i915_ttm_memcpy_arg - argument for the bo memcpy functionality.
|
||||
* @_dst_iter: Storage space for the destination kmap iterator.
|
||||
* @_src_iter: Storage space for the source kmap iterator.
|
||||
* @dst_iter: Pointer to the destination kmap iterator.
|
||||
* @src_iter: Pointer to the source kmap iterator.
|
||||
* @clear: Whether to clear instead of copy.
|
||||
* @src_rsgt: Refcounted scatter-gather list of source memory.
|
||||
* @dst_rsgt: Refcounted scatter-gather list of destination memory.
|
||||
*/
|
||||
struct i915_ttm_memcpy_arg {
|
||||
union {
|
||||
struct ttm_kmap_iter_tt tt;
|
||||
struct ttm_kmap_iter_iomap io;
|
||||
} _dst_iter,
|
||||
_src_iter;
|
||||
struct ttm_kmap_iter *dst_iter;
|
||||
struct ttm_kmap_iter *src_iter;
|
||||
unsigned long num_pages;
|
||||
bool clear;
|
||||
struct i915_refct_sgt *src_rsgt;
|
||||
struct i915_refct_sgt *dst_rsgt;
|
||||
};
|
||||
|
||||
/**
|
||||
* struct i915_ttm_memcpy_work - Async memcpy worker under a dma-fence.
|
||||
* @fence: The dma-fence.
|
||||
* @work: The work struct use for the memcpy work.
|
||||
* @lock: The fence lock. Not used to protect anything else ATM.
|
||||
* @irq_work: Low latency worker to signal the fence since it can't be done
|
||||
* from the callback for lockdep reasons.
|
||||
* @cb: Callback for the accelerated migration fence.
|
||||
* @arg: The argument for the memcpy functionality.
|
||||
*/
|
||||
struct i915_ttm_memcpy_work {
|
||||
struct dma_fence fence;
|
||||
struct work_struct work;
|
||||
/* The fence lock */
|
||||
spinlock_t lock;
|
||||
struct irq_work irq_work;
|
||||
struct dma_fence_cb cb;
|
||||
struct i915_ttm_memcpy_arg arg;
|
||||
};
|
||||
|
||||
static void i915_ttm_move_memcpy(struct i915_ttm_memcpy_arg *arg)
|
||||
{
|
||||
ttm_move_memcpy(arg->clear, arg->num_pages,
|
||||
arg->dst_iter, arg->src_iter);
|
||||
}
|
||||
|
||||
static void i915_ttm_memcpy_init(struct i915_ttm_memcpy_arg *arg,
|
||||
struct ttm_buffer_object *bo, bool clear,
|
||||
struct ttm_resource *dst_mem,
|
||||
struct ttm_tt *dst_ttm,
|
||||
struct i915_refct_sgt *dst_rsgt)
|
||||
{
|
||||
struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo);
|
||||
struct intel_memory_region *dst_reg, *src_reg;
|
||||
|
||||
dst_reg = i915_ttm_region(bo->bdev, dst_mem->mem_type);
|
||||
src_reg = i915_ttm_region(bo->bdev, bo->resource->mem_type);
|
||||
GEM_BUG_ON(!dst_reg || !src_reg);
|
||||
|
||||
arg->dst_iter = !i915_ttm_cpu_maps_iomem(dst_mem) ?
|
||||
ttm_kmap_iter_tt_init(&arg->_dst_iter.tt, dst_ttm) :
|
||||
ttm_kmap_iter_iomap_init(&arg->_dst_iter.io, &dst_reg->iomap,
|
||||
&dst_rsgt->table, dst_reg->region.start);
|
||||
|
||||
arg->src_iter = !i915_ttm_cpu_maps_iomem(bo->resource) ?
|
||||
ttm_kmap_iter_tt_init(&arg->_src_iter.tt, bo->ttm) :
|
||||
ttm_kmap_iter_iomap_init(&arg->_src_iter.io, &src_reg->iomap,
|
||||
&obj->ttm.cached_io_rsgt->table,
|
||||
src_reg->region.start);
|
||||
arg->clear = clear;
|
||||
arg->num_pages = bo->base.size >> PAGE_SHIFT;
|
||||
|
||||
arg->dst_rsgt = i915_refct_sgt_get(dst_rsgt);
|
||||
arg->src_rsgt = clear ? NULL :
|
||||
i915_ttm_resource_get_st(obj, bo->resource);
|
||||
}
|
||||
|
||||
static void i915_ttm_memcpy_release(struct i915_ttm_memcpy_arg *arg)
|
||||
{
|
||||
i915_refct_sgt_put(arg->src_rsgt);
|
||||
i915_refct_sgt_put(arg->dst_rsgt);
|
||||
}
|
||||
|
||||
static void __memcpy_work(struct work_struct *work)
|
||||
{
|
||||
struct i915_ttm_memcpy_work *copy_work =
|
||||
container_of(work, typeof(*copy_work), work);
|
||||
struct i915_ttm_memcpy_arg *arg = ©_work->arg;
|
||||
bool cookie = dma_fence_begin_signalling();
|
||||
|
||||
i915_ttm_move_memcpy(arg);
|
||||
dma_fence_end_signalling(cookie);
|
||||
|
||||
dma_fence_signal(©_work->fence);
|
||||
|
||||
i915_ttm_memcpy_release(arg);
|
||||
dma_fence_put(©_work->fence);
|
||||
}
|
||||
|
||||
static void __memcpy_irq_work(struct irq_work *irq_work)
|
||||
{
|
||||
struct i915_ttm_memcpy_work *copy_work =
|
||||
container_of(irq_work, typeof(*copy_work), irq_work);
|
||||
struct i915_ttm_memcpy_arg *arg = ©_work->arg;
|
||||
|
||||
dma_fence_signal(©_work->fence);
|
||||
i915_ttm_memcpy_release(arg);
|
||||
dma_fence_put(©_work->fence);
|
||||
}
|
||||
|
||||
static void __memcpy_cb(struct dma_fence *fence, struct dma_fence_cb *cb)
|
||||
{
|
||||
struct i915_ttm_memcpy_work *copy_work =
|
||||
container_of(cb, typeof(*copy_work), cb);
|
||||
|
||||
if (unlikely(fence->error || I915_SELFTEST_ONLY(fail_gpu_migration))) {
|
||||
INIT_WORK(©_work->work, __memcpy_work);
|
||||
queue_work(system_unbound_wq, ©_work->work);
|
||||
} else {
|
||||
init_irq_work(©_work->irq_work, __memcpy_irq_work);
|
||||
irq_work_queue(©_work->irq_work);
|
||||
}
|
||||
}
|
||||
|
||||
static const char *get_driver_name(struct dma_fence *fence)
|
||||
{
|
||||
return "i915_ttm_memcpy_work";
|
||||
}
|
||||
|
||||
static const char *get_timeline_name(struct dma_fence *fence)
|
||||
{
|
||||
return "unbound";
|
||||
}
|
||||
|
||||
static const struct dma_fence_ops dma_fence_memcpy_ops = {
|
||||
.get_driver_name = get_driver_name,
|
||||
.get_timeline_name = get_timeline_name,
|
||||
};
|
||||
|
||||
static struct dma_fence *
|
||||
i915_ttm_memcpy_work_arm(struct i915_ttm_memcpy_work *work,
|
||||
struct dma_fence *dep)
|
||||
{
|
||||
int ret;
|
||||
|
||||
spin_lock_init(&work->lock);
|
||||
dma_fence_init(&work->fence, &dma_fence_memcpy_ops, &work->lock, 0, 0);
|
||||
dma_fence_get(&work->fence);
|
||||
ret = dma_fence_add_callback(dep, &work->cb, __memcpy_cb);
|
||||
if (ret) {
|
||||
if (ret != -ENOENT)
|
||||
dma_fence_wait(dep, false);
|
||||
|
||||
return ERR_PTR(I915_SELFTEST_ONLY(fail_gpu_migration) ? -EINVAL :
|
||||
dep->error);
|
||||
}
|
||||
|
||||
return &work->fence;
|
||||
}
|
||||
|
||||
static struct dma_fence *
|
||||
__i915_ttm_move(struct ttm_buffer_object *bo, bool clear,
|
||||
struct ttm_resource *dst_mem, struct ttm_tt *dst_ttm,
|
||||
struct i915_refct_sgt *dst_rsgt, bool allow_accel,
|
||||
struct dma_fence *move_dep)
|
||||
{
|
||||
struct i915_ttm_memcpy_work *copy_work = NULL;
|
||||
struct i915_ttm_memcpy_arg _arg, *arg = &_arg;
|
||||
struct dma_fence *fence = ERR_PTR(-EINVAL);
|
||||
|
||||
if (allow_accel) {
|
||||
fence = i915_ttm_accel_move(bo, clear, dst_mem, dst_ttm,
|
||||
&dst_rsgt->table, move_dep);
|
||||
|
||||
/*
|
||||
* We only need to intercept the error when moving to lmem.
|
||||
* When moving to system, TTM or shmem will provide us with
|
||||
* cleared pages.
|
||||
*/
|
||||
if (!IS_ERR(fence) && !i915_ttm_gtt_binds_lmem(dst_mem) &&
|
||||
!I915_SELFTEST_ONLY(fail_gpu_migration ||
|
||||
fail_work_allocation))
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* If we've scheduled gpu migration. Try to arm error intercept. */
|
||||
if (!IS_ERR(fence)) {
|
||||
struct dma_fence *dep = fence;
|
||||
|
||||
if (!I915_SELFTEST_ONLY(fail_work_allocation))
|
||||
copy_work = kzalloc(sizeof(*copy_work), GFP_KERNEL);
|
||||
|
||||
if (copy_work) {
|
||||
arg = ©_work->arg;
|
||||
i915_ttm_memcpy_init(arg, bo, clear, dst_mem, dst_ttm,
|
||||
dst_rsgt);
|
||||
fence = i915_ttm_memcpy_work_arm(copy_work, dep);
|
||||
} else {
|
||||
dma_fence_wait(dep, false);
|
||||
fence = ERR_PTR(I915_SELFTEST_ONLY(fail_gpu_migration) ?
|
||||
-EINVAL : fence->error);
|
||||
}
|
||||
dma_fence_put(dep);
|
||||
|
||||
if (!IS_ERR(fence))
|
||||
goto out;
|
||||
} else if (move_dep) {
|
||||
int err = dma_fence_wait(move_dep, true);
|
||||
|
||||
if (err)
|
||||
return ERR_PTR(err);
|
||||
}
|
||||
|
||||
/* Error intercept failed or no accelerated migration to start with */
|
||||
if (!copy_work)
|
||||
i915_ttm_memcpy_init(arg, bo, clear, dst_mem, dst_ttm,
|
||||
dst_rsgt);
|
||||
i915_ttm_move_memcpy(arg);
|
||||
i915_ttm_memcpy_release(arg);
|
||||
kfree(copy_work);
|
||||
|
||||
return NULL;
|
||||
out:
|
||||
if (!fence && copy_work) {
|
||||
i915_ttm_memcpy_release(arg);
|
||||
kfree(copy_work);
|
||||
}
|
||||
|
||||
return fence;
|
||||
}
|
||||
|
||||
static struct dma_fence *prev_fence(struct ttm_buffer_object *bo,
|
||||
struct ttm_operation_ctx *ctx)
|
||||
{
|
||||
struct i915_deps deps;
|
||||
int ret;
|
||||
|
||||
/*
|
||||
* Instead of trying hard with GFP_KERNEL to allocate memory,
|
||||
* the dependency collection will just sync if it doesn't
|
||||
* succeed.
|
||||
*/
|
||||
i915_deps_init(&deps, GFP_KERNEL | __GFP_NORETRY | __GFP_NOWARN);
|
||||
ret = i915_deps_add_dependency(&deps, bo->moving, ctx);
|
||||
if (!ret)
|
||||
/*
|
||||
* TODO: Only await excl fence here, and shared fences before
|
||||
* signaling the migration fence.
|
||||
*/
|
||||
ret = i915_deps_add_resv(&deps, bo->base.resv, true, false, ctx);
|
||||
if (ret)
|
||||
return ERR_PTR(ret);
|
||||
|
||||
return i915_deps_to_fence(&deps, ctx);
|
||||
}
|
||||
|
||||
/**
|
||||
* i915_ttm_move - The TTM move callback used by i915.
|
||||
* @bo: The buffer object.
|
||||
* @evict: Whether this is an eviction.
|
||||
* @dst_mem: The destination ttm resource.
|
||||
* @hop: If we need multihop, what temporary memory type to move to.
|
||||
*
|
||||
* Return: 0 if successful, negative error code otherwise.
|
||||
*/
|
||||
int i915_ttm_move(struct ttm_buffer_object *bo, bool evict,
|
||||
struct ttm_operation_ctx *ctx,
|
||||
struct ttm_resource *dst_mem,
|
||||
struct ttm_place *hop)
|
||||
{
|
||||
struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo);
|
||||
struct ttm_resource_manager *dst_man =
|
||||
ttm_manager_type(bo->bdev, dst_mem->mem_type);
|
||||
struct dma_fence *migration_fence = NULL;
|
||||
struct ttm_tt *ttm = bo->ttm;
|
||||
struct i915_refct_sgt *dst_rsgt;
|
||||
bool clear;
|
||||
int ret;
|
||||
|
||||
if (GEM_WARN_ON(!obj)) {
|
||||
ttm_bo_move_null(bo, dst_mem);
|
||||
return 0;
|
||||
}
|
||||
|
||||
ret = i915_ttm_move_notify(bo);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
if (obj->mm.madv != I915_MADV_WILLNEED) {
|
||||
i915_ttm_purge(obj);
|
||||
ttm_resource_free(bo, &dst_mem);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Populate ttm with pages if needed. Typically system memory. */
|
||||
if (ttm && (dst_man->use_tt || (ttm->page_flags & TTM_TT_FLAG_SWAPPED))) {
|
||||
ret = ttm_tt_populate(bo->bdev, ttm, ctx);
|
||||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
|
||||
dst_rsgt = i915_ttm_resource_get_st(obj, dst_mem);
|
||||
if (IS_ERR(dst_rsgt))
|
||||
return PTR_ERR(dst_rsgt);
|
||||
|
||||
clear = !i915_ttm_cpu_maps_iomem(bo->resource) && (!ttm || !ttm_tt_is_populated(ttm));
|
||||
if (!(clear && ttm && !(ttm->page_flags & TTM_TT_FLAG_ZERO_ALLOC))) {
|
||||
struct dma_fence *dep = prev_fence(bo, ctx);
|
||||
|
||||
if (IS_ERR(dep)) {
|
||||
i915_refct_sgt_put(dst_rsgt);
|
||||
return PTR_ERR(dep);
|
||||
}
|
||||
|
||||
migration_fence = __i915_ttm_move(bo, clear, dst_mem, bo->ttm,
|
||||
dst_rsgt, true, dep);
|
||||
dma_fence_put(dep);
|
||||
}
|
||||
|
||||
/* We can possibly get an -ERESTARTSYS here */
|
||||
if (IS_ERR(migration_fence)) {
|
||||
i915_refct_sgt_put(dst_rsgt);
|
||||
return PTR_ERR(migration_fence);
|
||||
}
|
||||
|
||||
if (migration_fence) {
|
||||
ret = ttm_bo_move_accel_cleanup(bo, migration_fence, evict,
|
||||
true, dst_mem);
|
||||
if (ret) {
|
||||
dma_fence_wait(migration_fence, false);
|
||||
ttm_bo_move_sync_cleanup(bo, dst_mem);
|
||||
}
|
||||
dma_fence_put(migration_fence);
|
||||
} else {
|
||||
ttm_bo_move_sync_cleanup(bo, dst_mem);
|
||||
}
|
||||
|
||||
i915_ttm_adjust_domains_after_move(obj);
|
||||
i915_ttm_free_cached_io_rsgt(obj);
|
||||
|
||||
if (i915_ttm_gtt_binds_lmem(dst_mem) || i915_ttm_cpu_maps_iomem(dst_mem)) {
|
||||
obj->ttm.cached_io_rsgt = dst_rsgt;
|
||||
obj->ttm.get_io_page.sg_pos = dst_rsgt->table.sgl;
|
||||
obj->ttm.get_io_page.sg_idx = 0;
|
||||
} else {
|
||||
i915_refct_sgt_put(dst_rsgt);
|
||||
}
|
||||
|
||||
i915_ttm_adjust_lru(obj);
|
||||
i915_ttm_adjust_gem_after_move(obj);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* i915_gem_obj_copy_ttm - Copy the contents of one ttm-based gem object to
|
||||
* another
|
||||
* @dst: The destination object
|
||||
* @src: The source object
|
||||
* @allow_accel: Allow using the blitter. Otherwise TTM memcpy is used.
|
||||
* @intr: Whether to perform waits interruptible:
|
||||
*
|
||||
* Note: The caller is responsible for assuring that the underlying
|
||||
* TTM objects are populated if needed and locked.
|
||||
*
|
||||
* Return: Zero on success. Negative error code on error. If @intr == true,
|
||||
* then it may return -ERESTARTSYS or -EINTR.
|
||||
*/
|
||||
int i915_gem_obj_copy_ttm(struct drm_i915_gem_object *dst,
|
||||
struct drm_i915_gem_object *src,
|
||||
bool allow_accel, bool intr)
|
||||
{
|
||||
struct ttm_buffer_object *dst_bo = i915_gem_to_ttm(dst);
|
||||
struct ttm_buffer_object *src_bo = i915_gem_to_ttm(src);
|
||||
struct ttm_operation_ctx ctx = {
|
||||
.interruptible = intr,
|
||||
};
|
||||
struct i915_refct_sgt *dst_rsgt;
|
||||
struct dma_fence *copy_fence, *dep_fence;
|
||||
struct i915_deps deps;
|
||||
int ret, shared_err;
|
||||
|
||||
assert_object_held(dst);
|
||||
assert_object_held(src);
|
||||
i915_deps_init(&deps, GFP_KERNEL | __GFP_NORETRY | __GFP_NOWARN);
|
||||
|
||||
/*
|
||||
* We plan to add a shared fence only for the source. If that
|
||||
* fails, we await all source fences before commencing
|
||||
* the copy instead of only the exclusive.
|
||||
*/
|
||||
shared_err = dma_resv_reserve_shared(src_bo->base.resv, 1);
|
||||
ret = i915_deps_add_resv(&deps, dst_bo->base.resv, true, false, &ctx);
|
||||
if (!ret)
|
||||
ret = i915_deps_add_resv(&deps, src_bo->base.resv,
|
||||
!!shared_err, false, &ctx);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
dep_fence = i915_deps_to_fence(&deps, &ctx);
|
||||
if (IS_ERR(dep_fence))
|
||||
return PTR_ERR(dep_fence);
|
||||
|
||||
dst_rsgt = i915_ttm_resource_get_st(dst, dst_bo->resource);
|
||||
copy_fence = __i915_ttm_move(src_bo, false, dst_bo->resource,
|
||||
dst_bo->ttm, dst_rsgt, allow_accel,
|
||||
dep_fence);
|
||||
|
||||
i915_refct_sgt_put(dst_rsgt);
|
||||
if (IS_ERR_OR_NULL(copy_fence))
|
||||
return PTR_ERR_OR_ZERO(copy_fence);
|
||||
|
||||
dma_resv_add_excl_fence(dst_bo->base.resv, copy_fence);
|
||||
|
||||
/* If we failed to reserve a shared slot, add an exclusive fence */
|
||||
if (shared_err)
|
||||
dma_resv_add_excl_fence(src_bo->base.resv, copy_fence);
|
||||
else
|
||||
dma_resv_add_shared_fence(src_bo->base.resv, copy_fence);
|
||||
|
||||
dma_fence_put(copy_fence);
|
||||
|
||||
return 0;
|
||||
}
|
41
drivers/gpu/drm/i915/gem/i915_gem_ttm_move.h
Normal file
41
drivers/gpu/drm/i915/gem/i915_gem_ttm_move.h
Normal file
@ -0,0 +1,41 @@
|
||||
/* SPDX-License-Identifier: MIT */
|
||||
/*
|
||||
* Copyright © 2021 Intel Corporation
|
||||
*/
|
||||
#ifndef _I915_GEM_TTM_MOVE_H_
|
||||
#define _I915_GEM_TTM_MOVE_H_
|
||||
|
||||
#include <linux/types.h>
|
||||
|
||||
#include "i915_selftest.h"
|
||||
|
||||
struct ttm_buffer_object;
|
||||
struct ttm_operation_ctx;
|
||||
struct ttm_place;
|
||||
struct ttm_resource;
|
||||
struct ttm_tt;
|
||||
|
||||
struct drm_i915_gem_object;
|
||||
struct i915_refct_sgt;
|
||||
|
||||
int i915_ttm_move_notify(struct ttm_buffer_object *bo);
|
||||
|
||||
I915_SELFTEST_DECLARE(void i915_ttm_migrate_set_failure_modes(bool gpu_migration,
|
||||
bool work_allocation));
|
||||
|
||||
int i915_gem_obj_copy_ttm(struct drm_i915_gem_object *dst,
|
||||
struct drm_i915_gem_object *src,
|
||||
bool allow_accel, bool intr);
|
||||
|
||||
/* Internal I915 TTM declarations and definitions below. */
|
||||
|
||||
int i915_ttm_move(struct ttm_buffer_object *bo, bool evict,
|
||||
struct ttm_operation_ctx *ctx,
|
||||
struct ttm_resource *dst_mem,
|
||||
struct ttm_place *hop);
|
||||
|
||||
void i915_ttm_adjust_domains_after_move(struct drm_i915_gem_object *obj);
|
||||
|
||||
void i915_ttm_adjust_gem_after_move(struct drm_i915_gem_object *obj);
|
||||
|
||||
#endif
|
@ -12,6 +12,7 @@
|
||||
|
||||
#include "gem/i915_gem_region.h"
|
||||
#include "gem/i915_gem_ttm.h"
|
||||
#include "gem/i915_gem_ttm_move.h"
|
||||
#include "gem/i915_gem_ttm_pm.h"
|
||||
|
||||
/**
|
||||
@ -79,6 +80,7 @@ static int i915_ttm_backup(struct i915_gem_apply_to_region *apply,
|
||||
|
||||
err = i915_gem_obj_copy_ttm(backup, obj, pm_apply->allow_gpu, false);
|
||||
GEM_WARN_ON(err);
|
||||
ttm_bo_wait_ctx(backup_bo, &ctx);
|
||||
|
||||
obj->ttm.backup = backup;
|
||||
return 0;
|
||||
@ -169,6 +171,7 @@ static int i915_ttm_restore(struct i915_gem_apply_to_region *apply,
|
||||
err = i915_gem_obj_copy_ttm(obj, backup, pm_apply->allow_gpu,
|
||||
false);
|
||||
GEM_WARN_ON(err);
|
||||
ttm_bo_wait_ctx(backup_bo, &ctx);
|
||||
|
||||
obj->ttm.backup = NULL;
|
||||
err = 0;
|
||||
|
@ -254,6 +254,6 @@ int i915_gem_object_wait_migration(struct drm_i915_gem_object *obj,
|
||||
unsigned int flags)
|
||||
{
|
||||
might_sleep();
|
||||
/* NOP for now. */
|
||||
return 0;
|
||||
|
||||
return i915_gem_object_wait_moving_fence(obj, !!(flags & I915_WAIT_INTERRUPTIBLE));
|
||||
}
|
||||
|
@ -12,6 +12,7 @@
|
||||
|
||||
int i915_gemfs_init(struct drm_i915_private *i915)
|
||||
{
|
||||
char huge_opt[] = "huge=within_size"; /* r/w */
|
||||
struct file_system_type *type;
|
||||
struct vfsmount *gemfs;
|
||||
char *opts;
|
||||
@ -31,10 +32,8 @@ int i915_gemfs_init(struct drm_i915_private *i915)
|
||||
*/
|
||||
|
||||
opts = NULL;
|
||||
if (intel_vtd_active()) {
|
||||
if (intel_vtd_active(i915)) {
|
||||
if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE)) {
|
||||
static char huge_opt[] = "huge=within_size"; /* r/w */
|
||||
|
||||
opts = huge_opt;
|
||||
drm_info(&i915->drm,
|
||||
"Transparent Hugepage mode '%s'\n",
|
||||
|
@ -22,6 +22,22 @@
|
||||
#include "selftests/mock_region.h"
|
||||
#include "selftests/i915_random.h"
|
||||
|
||||
static struct i915_gem_context *hugepage_ctx(struct drm_i915_private *i915,
|
||||
struct file *file)
|
||||
{
|
||||
struct i915_gem_context *ctx = live_context(i915, file);
|
||||
struct i915_address_space *vm;
|
||||
|
||||
if (IS_ERR(ctx))
|
||||
return ctx;
|
||||
|
||||
vm = ctx->vm;
|
||||
if (vm)
|
||||
WRITE_ONCE(vm->scrub_64K, true);
|
||||
|
||||
return ctx;
|
||||
}
|
||||
|
||||
static const unsigned int page_sizes[] = {
|
||||
I915_GTT_PAGE_SIZE_2M,
|
||||
I915_GTT_PAGE_SIZE_64K,
|
||||
@ -552,7 +568,7 @@ static int igt_mock_memory_region_huge_pages(void *arg)
|
||||
out_put:
|
||||
i915_gem_object_put(obj);
|
||||
out_region:
|
||||
intel_memory_region_put(mem);
|
||||
intel_memory_region_destroy(mem);
|
||||
return err;
|
||||
}
|
||||
|
||||
@ -959,6 +975,8 @@ static int igt_mock_ppgtt_64K(void *arg)
|
||||
__i915_gem_object_put_pages(obj);
|
||||
i915_gem_object_unlock(obj);
|
||||
i915_gem_object_put(obj);
|
||||
|
||||
i915_gem_drain_freed_objects(i915);
|
||||
}
|
||||
}
|
||||
|
||||
@ -1080,10 +1098,6 @@ static int __igt_write_huge(struct intel_context *ce,
|
||||
if (IS_ERR(vma))
|
||||
return PTR_ERR(vma);
|
||||
|
||||
err = i915_vma_unbind(vma);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
err = i915_vma_pin(vma, size, 0, flags | offset);
|
||||
if (err) {
|
||||
/*
|
||||
@ -1117,7 +1131,7 @@ static int __igt_write_huge(struct intel_context *ce,
|
||||
return err;
|
||||
}
|
||||
|
||||
static int igt_write_huge(struct i915_gem_context *ctx,
|
||||
static int igt_write_huge(struct drm_i915_private *i915,
|
||||
struct drm_i915_gem_object *obj)
|
||||
{
|
||||
struct i915_gem_engines *engines;
|
||||
@ -1127,6 +1141,8 @@ static int igt_write_huge(struct i915_gem_context *ctx,
|
||||
IGT_TIMEOUT(end_time);
|
||||
unsigned int max_page_size;
|
||||
unsigned int count;
|
||||
struct i915_gem_context *ctx;
|
||||
struct file *file;
|
||||
u64 max;
|
||||
u64 num;
|
||||
u64 size;
|
||||
@ -1134,6 +1150,16 @@ static int igt_write_huge(struct i915_gem_context *ctx,
|
||||
int i, n;
|
||||
int err = 0;
|
||||
|
||||
file = mock_file(i915);
|
||||
if (IS_ERR(file))
|
||||
return PTR_ERR(file);
|
||||
|
||||
ctx = hugepage_ctx(i915, file);
|
||||
if (IS_ERR(ctx)) {
|
||||
err = PTR_ERR(ctx);
|
||||
goto out;
|
||||
}
|
||||
|
||||
GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj));
|
||||
|
||||
size = obj->base.size;
|
||||
@ -1153,7 +1179,7 @@ static int igt_write_huge(struct i915_gem_context *ctx,
|
||||
}
|
||||
i915_gem_context_unlock_engines(ctx);
|
||||
if (!n)
|
||||
return 0;
|
||||
goto out;
|
||||
|
||||
/*
|
||||
* To keep things interesting when alternating between engines in our
|
||||
@ -1215,6 +1241,8 @@ static int igt_write_huge(struct i915_gem_context *ctx,
|
||||
|
||||
kfree(order);
|
||||
|
||||
out:
|
||||
fput(file);
|
||||
return err;
|
||||
}
|
||||
|
||||
@ -1277,8 +1305,7 @@ static u32 igt_random_size(struct rnd_state *prng,
|
||||
|
||||
static int igt_ppgtt_smoke_huge(void *arg)
|
||||
{
|
||||
struct i915_gem_context *ctx = arg;
|
||||
struct drm_i915_private *i915 = ctx->i915;
|
||||
struct drm_i915_private *i915 = arg;
|
||||
struct drm_i915_gem_object *obj;
|
||||
I915_RND_STATE(prng);
|
||||
struct {
|
||||
@ -1302,6 +1329,7 @@ static int igt_ppgtt_smoke_huge(void *arg)
|
||||
u32 min = backends[i].min;
|
||||
u32 max = backends[i].max;
|
||||
u32 size = max;
|
||||
|
||||
try_again:
|
||||
size = igt_random_size(&prng, min, rounddown_pow_of_two(size));
|
||||
|
||||
@ -1336,7 +1364,7 @@ static int igt_ppgtt_smoke_huge(void *arg)
|
||||
goto out_unpin;
|
||||
}
|
||||
|
||||
err = igt_write_huge(ctx, obj);
|
||||
err = igt_write_huge(i915, obj);
|
||||
if (err) {
|
||||
pr_err("%s write-huge failed with size=%u, i=%d\n",
|
||||
__func__, size, i);
|
||||
@ -1363,8 +1391,7 @@ static int igt_ppgtt_smoke_huge(void *arg)
|
||||
|
||||
static int igt_ppgtt_sanity_check(void *arg)
|
||||
{
|
||||
struct i915_gem_context *ctx = arg;
|
||||
struct drm_i915_private *i915 = ctx->i915;
|
||||
struct drm_i915_private *i915 = arg;
|
||||
unsigned int supported = INTEL_INFO(i915)->page_sizes;
|
||||
struct {
|
||||
igt_create_fn fn;
|
||||
@ -1431,7 +1458,7 @@ static int igt_ppgtt_sanity_check(void *arg)
|
||||
if (pages)
|
||||
obj->mm.page_sizes.sg = pages;
|
||||
|
||||
err = igt_write_huge(ctx, obj);
|
||||
err = igt_write_huge(i915, obj);
|
||||
|
||||
i915_gem_object_lock(obj, NULL);
|
||||
i915_gem_object_unpin_pages(obj);
|
||||
@ -1458,15 +1485,27 @@ static int igt_ppgtt_sanity_check(void *arg)
|
||||
|
||||
static int igt_tmpfs_fallback(void *arg)
|
||||
{
|
||||
struct i915_gem_context *ctx = arg;
|
||||
struct drm_i915_private *i915 = ctx->i915;
|
||||
struct drm_i915_private *i915 = arg;
|
||||
struct i915_address_space *vm;
|
||||
struct i915_gem_context *ctx;
|
||||
struct vfsmount *gemfs = i915->mm.gemfs;
|
||||
struct i915_address_space *vm = i915_gem_context_get_eb_vm(ctx);
|
||||
struct drm_i915_gem_object *obj;
|
||||
struct i915_vma *vma;
|
||||
struct file *file;
|
||||
u32 *vaddr;
|
||||
int err = 0;
|
||||
|
||||
file = mock_file(i915);
|
||||
if (IS_ERR(file))
|
||||
return PTR_ERR(file);
|
||||
|
||||
ctx = hugepage_ctx(i915, file);
|
||||
if (IS_ERR(ctx)) {
|
||||
err = PTR_ERR(ctx);
|
||||
goto out;
|
||||
}
|
||||
vm = i915_gem_context_get_eb_vm(ctx);
|
||||
|
||||
/*
|
||||
* Make sure that we don't burst into a ball of flames upon falling back
|
||||
* to tmpfs, which we rely on if on the off-chance we encouter a failure
|
||||
@ -1510,33 +1549,47 @@ static int igt_tmpfs_fallback(void *arg)
|
||||
i915->mm.gemfs = gemfs;
|
||||
|
||||
i915_vm_put(vm);
|
||||
out:
|
||||
fput(file);
|
||||
return err;
|
||||
}
|
||||
|
||||
static int igt_shrink_thp(void *arg)
|
||||
{
|
||||
struct i915_gem_context *ctx = arg;
|
||||
struct drm_i915_private *i915 = ctx->i915;
|
||||
struct i915_address_space *vm = i915_gem_context_get_eb_vm(ctx);
|
||||
struct drm_i915_private *i915 = arg;
|
||||
struct i915_address_space *vm;
|
||||
struct i915_gem_context *ctx;
|
||||
struct drm_i915_gem_object *obj;
|
||||
struct i915_gem_engines_iter it;
|
||||
struct intel_context *ce;
|
||||
struct i915_vma *vma;
|
||||
struct file *file;
|
||||
unsigned int flags = PIN_USER;
|
||||
unsigned int n;
|
||||
bool should_swap;
|
||||
int err = 0;
|
||||
int err;
|
||||
|
||||
if (!igt_can_allocate_thp(i915)) {
|
||||
pr_info("missing THP support, skipping\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
file = mock_file(i915);
|
||||
if (IS_ERR(file))
|
||||
return PTR_ERR(file);
|
||||
|
||||
ctx = hugepage_ctx(i915, file);
|
||||
if (IS_ERR(ctx)) {
|
||||
err = PTR_ERR(ctx);
|
||||
goto out;
|
||||
}
|
||||
vm = i915_gem_context_get_eb_vm(ctx);
|
||||
|
||||
/*
|
||||
* Sanity check shrinking huge-paged object -- make sure nothing blows
|
||||
* up.
|
||||
*/
|
||||
|
||||
if (!igt_can_allocate_thp(i915)) {
|
||||
pr_info("missing THP support, skipping\n");
|
||||
goto out_vm;
|
||||
}
|
||||
|
||||
obj = i915_gem_object_create_shmem(i915, SZ_2M);
|
||||
if (IS_ERR(obj)) {
|
||||
err = PTR_ERR(obj);
|
||||
@ -1626,7 +1679,8 @@ static int igt_shrink_thp(void *arg)
|
||||
i915_gem_object_put(obj);
|
||||
out_vm:
|
||||
i915_vm_put(vm);
|
||||
|
||||
out:
|
||||
fput(file);
|
||||
return err;
|
||||
}
|
||||
|
||||
@ -1687,10 +1741,6 @@ int i915_gem_huge_page_live_selftests(struct drm_i915_private *i915)
|
||||
SUBTEST(igt_ppgtt_smoke_huge),
|
||||
SUBTEST(igt_ppgtt_sanity_check),
|
||||
};
|
||||
struct i915_gem_context *ctx;
|
||||
struct i915_address_space *vm;
|
||||
struct file *file;
|
||||
int err;
|
||||
|
||||
if (!HAS_PPGTT(i915)) {
|
||||
pr_info("PPGTT not supported, skipping live-selftests\n");
|
||||
@ -1700,23 +1750,5 @@ int i915_gem_huge_page_live_selftests(struct drm_i915_private *i915)
|
||||
if (intel_gt_is_wedged(&i915->gt))
|
||||
return 0;
|
||||
|
||||
file = mock_file(i915);
|
||||
if (IS_ERR(file))
|
||||
return PTR_ERR(file);
|
||||
|
||||
ctx = live_context(i915, file);
|
||||
if (IS_ERR(ctx)) {
|
||||
err = PTR_ERR(ctx);
|
||||
goto out_file;
|
||||
}
|
||||
|
||||
vm = ctx->vm;
|
||||
if (vm)
|
||||
WRITE_ONCE(vm->scrub_64K, true);
|
||||
|
||||
err = i915_subtests(tests, ctx);
|
||||
|
||||
out_file:
|
||||
fput(file);
|
||||
return err;
|
||||
return i915_live_subtests(tests, i915);
|
||||
}
|
||||
|
@ -88,7 +88,7 @@ static int live_nop_switch(void *arg)
|
||||
rq = i915_request_get(this);
|
||||
i915_request_add(this);
|
||||
}
|
||||
if (i915_request_wait(rq, 0, HZ) < 0) {
|
||||
if (i915_request_wait(rq, 0, 10 * HZ) < 0) {
|
||||
pr_err("Failed to populated %d contexts\n", nctx);
|
||||
intel_gt_set_wedged(&i915->gt);
|
||||
i915_request_put(rq);
|
||||
|
@ -102,7 +102,7 @@ static int igt_dmabuf_import_same_driver_lmem(void *arg)
|
||||
obj = __i915_gem_object_create_user(i915, PAGE_SIZE, &lmem, 1);
|
||||
if (IS_ERR(obj)) {
|
||||
pr_err("__i915_gem_object_create_user failed with err=%ld\n",
|
||||
PTR_ERR(dmabuf));
|
||||
PTR_ERR(obj));
|
||||
err = PTR_ERR(obj);
|
||||
goto out_ret;
|
||||
}
|
||||
@ -158,7 +158,7 @@ static int igt_dmabuf_import_same_driver(struct drm_i915_private *i915,
|
||||
regions, num_regions);
|
||||
if (IS_ERR(obj)) {
|
||||
pr_err("__i915_gem_object_create_user failed with err=%ld\n",
|
||||
PTR_ERR(dmabuf));
|
||||
PTR_ERR(obj));
|
||||
err = PTR_ERR(obj);
|
||||
goto out_ret;
|
||||
}
|
||||
|
@ -4,6 +4,7 @@
|
||||
*/
|
||||
|
||||
#include "gt/intel_migrate.h"
|
||||
#include "gem/i915_gem_ttm_move.h"
|
||||
|
||||
static int igt_fill_check_buffer(struct drm_i915_gem_object *obj,
|
||||
bool fill)
|
||||
@ -227,13 +228,34 @@ static int igt_lmem_pages_migrate(void *arg)
|
||||
return err;
|
||||
}
|
||||
|
||||
static int igt_lmem_pages_failsafe_migrate(void *arg)
|
||||
{
|
||||
int fail_gpu, fail_alloc, ret;
|
||||
|
||||
for (fail_gpu = 0; fail_gpu < 2; ++fail_gpu) {
|
||||
for (fail_alloc = 0; fail_alloc < 2; ++fail_alloc) {
|
||||
pr_info("Simulated failure modes: gpu: %d, alloc: %d\n",
|
||||
fail_gpu, fail_alloc);
|
||||
i915_ttm_migrate_set_failure_modes(fail_gpu,
|
||||
fail_alloc);
|
||||
ret = igt_lmem_pages_migrate(arg);
|
||||
if (ret)
|
||||
goto out_err;
|
||||
}
|
||||
}
|
||||
|
||||
out_err:
|
||||
i915_ttm_migrate_set_failure_modes(false, false);
|
||||
return ret;
|
||||
}
|
||||
|
||||
int i915_gem_migrate_live_selftests(struct drm_i915_private *i915)
|
||||
{
|
||||
static const struct i915_subtest tests[] = {
|
||||
SUBTEST(igt_smem_create_migrate),
|
||||
SUBTEST(igt_lmem_create_migrate),
|
||||
SUBTEST(igt_same_create_migrate),
|
||||
SUBTEST(igt_lmem_pages_migrate),
|
||||
SUBTEST(igt_lmem_pages_failsafe_migrate),
|
||||
};
|
||||
|
||||
if (!HAS_LMEM(i915))
|
||||
|
@ -185,7 +185,6 @@ static void gen6_alloc_va_range(struct i915_address_space *vm,
|
||||
|
||||
pt = stash->pt[0];
|
||||
__i915_gem_object_pin_pages(pt->base);
|
||||
i915_gem_object_make_unshrinkable(pt->base);
|
||||
|
||||
fill32_px(pt, vm->scratch[0]->encode);
|
||||
|
||||
@ -262,13 +261,10 @@ static void gen6_ppgtt_cleanup(struct i915_address_space *vm)
|
||||
{
|
||||
struct gen6_ppgtt *ppgtt = to_gen6_ppgtt(i915_vm_to_ppgtt(vm));
|
||||
|
||||
__i915_vma_put(ppgtt->vma);
|
||||
|
||||
gen6_ppgtt_free_pd(ppgtt);
|
||||
free_scratch(vm);
|
||||
|
||||
mutex_destroy(&ppgtt->flush);
|
||||
mutex_destroy(&ppgtt->pin_mutex);
|
||||
|
||||
free_pd(&ppgtt->base.vm, ppgtt->base.pd);
|
||||
}
|
||||
@ -331,37 +327,6 @@ static const struct i915_vma_ops pd_vma_ops = {
|
||||
.unbind_vma = pd_vma_unbind,
|
||||
};
|
||||
|
||||
static struct i915_vma *pd_vma_create(struct gen6_ppgtt *ppgtt, int size)
|
||||
{
|
||||
struct i915_ggtt *ggtt = ppgtt->base.vm.gt->ggtt;
|
||||
struct i915_vma *vma;
|
||||
|
||||
GEM_BUG_ON(!IS_ALIGNED(size, I915_GTT_PAGE_SIZE));
|
||||
GEM_BUG_ON(size > ggtt->vm.total);
|
||||
|
||||
vma = i915_vma_alloc();
|
||||
if (!vma)
|
||||
return ERR_PTR(-ENOMEM);
|
||||
|
||||
i915_active_init(&vma->active, NULL, NULL, 0);
|
||||
|
||||
kref_init(&vma->ref);
|
||||
mutex_init(&vma->pages_mutex);
|
||||
vma->vm = i915_vm_get(&ggtt->vm);
|
||||
vma->ops = &pd_vma_ops;
|
||||
vma->private = ppgtt;
|
||||
|
||||
vma->size = size;
|
||||
vma->fence_size = size;
|
||||
atomic_set(&vma->flags, I915_VMA_GGTT);
|
||||
vma->ggtt_view.type = I915_GGTT_VIEW_ROTATED; /* prevent fencing */
|
||||
|
||||
INIT_LIST_HEAD(&vma->obj_link);
|
||||
INIT_LIST_HEAD(&vma->closed_link);
|
||||
|
||||
return vma;
|
||||
}
|
||||
|
||||
int gen6_ppgtt_pin(struct i915_ppgtt *base, struct i915_gem_ww_ctx *ww)
|
||||
{
|
||||
struct gen6_ppgtt *ppgtt = to_gen6_ppgtt(base);
|
||||
@ -378,24 +343,85 @@ int gen6_ppgtt_pin(struct i915_ppgtt *base, struct i915_gem_ww_ctx *ww)
|
||||
if (atomic_add_unless(&ppgtt->pin_count, 1, 0))
|
||||
return 0;
|
||||
|
||||
if (mutex_lock_interruptible(&ppgtt->pin_mutex))
|
||||
return -EINTR;
|
||||
/* grab the ppgtt resv to pin the object */
|
||||
err = i915_vm_lock_objects(&ppgtt->base.vm, ww);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
/*
|
||||
* PPGTT PDEs reside in the GGTT and consists of 512 entries. The
|
||||
* allocator works in address space sizes, so it's multiplied by page
|
||||
* size. We allocate at the top of the GTT to avoid fragmentation.
|
||||
*/
|
||||
err = 0;
|
||||
if (!atomic_read(&ppgtt->pin_count))
|
||||
if (!atomic_read(&ppgtt->pin_count)) {
|
||||
err = i915_ggtt_pin(ppgtt->vma, ww, GEN6_PD_ALIGN, PIN_HIGH);
|
||||
|
||||
GEM_BUG_ON(ppgtt->vma->fence);
|
||||
clear_bit(I915_VMA_CAN_FENCE_BIT, __i915_vma_flags(ppgtt->vma));
|
||||
}
|
||||
if (!err)
|
||||
atomic_inc(&ppgtt->pin_count);
|
||||
mutex_unlock(&ppgtt->pin_mutex);
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
static int pd_dummy_obj_get_pages(struct drm_i915_gem_object *obj)
|
||||
{
|
||||
obj->mm.pages = ZERO_SIZE_PTR;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void pd_dummy_obj_put_pages(struct drm_i915_gem_object *obj,
|
||||
struct sg_table *pages)
|
||||
{
|
||||
}
|
||||
|
||||
static const struct drm_i915_gem_object_ops pd_dummy_obj_ops = {
|
||||
.name = "pd_dummy_obj",
|
||||
.get_pages = pd_dummy_obj_get_pages,
|
||||
.put_pages = pd_dummy_obj_put_pages,
|
||||
};
|
||||
|
||||
static struct i915_page_directory *
|
||||
gen6_alloc_top_pd(struct gen6_ppgtt *ppgtt)
|
||||
{
|
||||
struct i915_ggtt * const ggtt = ppgtt->base.vm.gt->ggtt;
|
||||
struct i915_page_directory *pd;
|
||||
int err;
|
||||
|
||||
pd = __alloc_pd(I915_PDES);
|
||||
if (unlikely(!pd))
|
||||
return ERR_PTR(-ENOMEM);
|
||||
|
||||
pd->pt.base = __i915_gem_object_create_internal(ppgtt->base.vm.gt->i915,
|
||||
&pd_dummy_obj_ops,
|
||||
I915_PDES * SZ_4K);
|
||||
if (IS_ERR(pd->pt.base)) {
|
||||
err = PTR_ERR(pd->pt.base);
|
||||
pd->pt.base = NULL;
|
||||
goto err_pd;
|
||||
}
|
||||
|
||||
pd->pt.base->base.resv = i915_vm_resv_get(&ppgtt->base.vm);
|
||||
pd->pt.base->shares_resv_from = &ppgtt->base.vm;
|
||||
|
||||
ppgtt->vma = i915_vma_instance(pd->pt.base, &ggtt->vm, NULL);
|
||||
if (IS_ERR(ppgtt->vma)) {
|
||||
err = PTR_ERR(ppgtt->vma);
|
||||
ppgtt->vma = NULL;
|
||||
goto err_pd;
|
||||
}
|
||||
|
||||
/* The dummy object we create is special, override ops.. */
|
||||
ppgtt->vma->ops = &pd_vma_ops;
|
||||
ppgtt->vma->private = ppgtt;
|
||||
return pd;
|
||||
|
||||
err_pd:
|
||||
free_pd(&ppgtt->base.vm, pd);
|
||||
return ERR_PTR(err);
|
||||
}
|
||||
|
||||
void gen6_ppgtt_unpin(struct i915_ppgtt *base)
|
||||
{
|
||||
struct gen6_ppgtt *ppgtt = to_gen6_ppgtt(base);
|
||||
@ -405,17 +431,6 @@ void gen6_ppgtt_unpin(struct i915_ppgtt *base)
|
||||
i915_vma_unpin(ppgtt->vma);
|
||||
}
|
||||
|
||||
void gen6_ppgtt_unpin_all(struct i915_ppgtt *base)
|
||||
{
|
||||
struct gen6_ppgtt *ppgtt = to_gen6_ppgtt(base);
|
||||
|
||||
if (!atomic_read(&ppgtt->pin_count))
|
||||
return;
|
||||
|
||||
i915_vma_unpin(ppgtt->vma);
|
||||
atomic_set(&ppgtt->pin_count, 0);
|
||||
}
|
||||
|
||||
struct i915_ppgtt *gen6_ppgtt_create(struct intel_gt *gt)
|
||||
{
|
||||
struct i915_ggtt * const ggtt = gt->ggtt;
|
||||
@ -427,7 +442,6 @@ struct i915_ppgtt *gen6_ppgtt_create(struct intel_gt *gt)
|
||||
return ERR_PTR(-ENOMEM);
|
||||
|
||||
mutex_init(&ppgtt->flush);
|
||||
mutex_init(&ppgtt->pin_mutex);
|
||||
|
||||
ppgtt_init(&ppgtt->base, gt, 0);
|
||||
ppgtt->base.vm.pd_shift = ilog2(SZ_4K * SZ_4K / sizeof(gen6_pte_t));
|
||||
@ -442,19 +456,13 @@ struct i915_ppgtt *gen6_ppgtt_create(struct intel_gt *gt)
|
||||
ppgtt->base.vm.alloc_pt_dma = alloc_pt_dma;
|
||||
ppgtt->base.vm.pte_encode = ggtt->vm.pte_encode;
|
||||
|
||||
ppgtt->base.pd = __alloc_pd(I915_PDES);
|
||||
if (!ppgtt->base.pd) {
|
||||
err = -ENOMEM;
|
||||
goto err_free;
|
||||
}
|
||||
|
||||
err = gen6_ppgtt_init_scratch(ppgtt);
|
||||
if (err)
|
||||
goto err_pd;
|
||||
goto err_free;
|
||||
|
||||
ppgtt->vma = pd_vma_create(ppgtt, GEN6_PD_SIZE);
|
||||
if (IS_ERR(ppgtt->vma)) {
|
||||
err = PTR_ERR(ppgtt->vma);
|
||||
ppgtt->base.pd = gen6_alloc_top_pd(ppgtt);
|
||||
if (IS_ERR(ppgtt->base.pd)) {
|
||||
err = PTR_ERR(ppgtt->base.pd);
|
||||
goto err_scratch;
|
||||
}
|
||||
|
||||
@ -462,10 +470,7 @@ struct i915_ppgtt *gen6_ppgtt_create(struct intel_gt *gt)
|
||||
|
||||
err_scratch:
|
||||
free_scratch(&ppgtt->base.vm);
|
||||
err_pd:
|
||||
free_pd(&ppgtt->base.vm, ppgtt->base.pd);
|
||||
err_free:
|
||||
mutex_destroy(&ppgtt->pin_mutex);
|
||||
kfree(ppgtt);
|
||||
return ERR_PTR(err);
|
||||
}
|
||||
|
@ -19,7 +19,6 @@ struct gen6_ppgtt {
|
||||
u32 pp_dir;
|
||||
|
||||
atomic_t pin_count;
|
||||
struct mutex pin_mutex;
|
||||
|
||||
bool scan_for_unused_pt;
|
||||
};
|
||||
@ -71,7 +70,6 @@ static inline struct gen6_ppgtt *to_gen6_ppgtt(struct i915_ppgtt *base)
|
||||
|
||||
int gen6_ppgtt_pin(struct i915_ppgtt *base, struct i915_gem_ww_ctx *ww);
|
||||
void gen6_ppgtt_unpin(struct i915_ppgtt *base);
|
||||
void gen6_ppgtt_unpin_all(struct i915_ppgtt *base);
|
||||
void gen6_ppgtt_enable(struct intel_gt *gt);
|
||||
void gen7_ppgtt_enable(struct intel_gt *gt);
|
||||
struct i915_ppgtt *gen6_ppgtt_create(struct intel_gt *gt);
|
||||
|
@ -42,7 +42,7 @@ int gen8_emit_flush_rcs(struct i915_request *rq, u32 mode)
|
||||
vf_flush_wa = true;
|
||||
|
||||
/* WaForGAMHang:kbl */
|
||||
if (IS_KBL_GT_STEP(rq->engine->i915, 0, STEP_C0))
|
||||
if (IS_KBL_GRAPHICS_STEP(rq->engine->i915, 0, STEP_C0))
|
||||
dc_flush_wa = true;
|
||||
}
|
||||
|
||||
|
@ -18,7 +18,7 @@
|
||||
static u64 gen8_pde_encode(const dma_addr_t addr,
|
||||
const enum i915_cache_level level)
|
||||
{
|
||||
u64 pde = addr | _PAGE_PRESENT | _PAGE_RW;
|
||||
u64 pde = addr | GEN8_PAGE_PRESENT | GEN8_PAGE_RW;
|
||||
|
||||
if (level != I915_CACHE_NONE)
|
||||
pde |= PPAT_CACHED_PDE;
|
||||
@ -32,10 +32,10 @@ static u64 gen8_pte_encode(dma_addr_t addr,
|
||||
enum i915_cache_level level,
|
||||
u32 flags)
|
||||
{
|
||||
gen8_pte_t pte = addr | _PAGE_PRESENT | _PAGE_RW;
|
||||
gen8_pte_t pte = addr | GEN8_PAGE_PRESENT | GEN8_PAGE_RW;
|
||||
|
||||
if (unlikely(flags & PTE_READ_ONLY))
|
||||
pte &= ~_PAGE_RW;
|
||||
pte &= ~GEN8_PAGE_RW;
|
||||
|
||||
if (flags & PTE_LM)
|
||||
pte |= GEN12_PPGTT_PTE_LM;
|
||||
@ -301,7 +301,6 @@ static void __gen8_ppgtt_alloc(struct i915_address_space * const vm,
|
||||
|
||||
pt = stash->pt[!!lvl];
|
||||
__i915_gem_object_pin_pages(pt->base);
|
||||
i915_gem_object_make_unshrinkable(pt->base);
|
||||
|
||||
fill_px(pt, vm->scratch[lvl]->encode);
|
||||
|
||||
@ -652,7 +651,7 @@ static int gen8_init_scratch(struct i915_address_space *vm)
|
||||
|
||||
vm->scratch[0]->encode =
|
||||
gen8_pte_encode(px_dma(vm->scratch[0]),
|
||||
I915_CACHE_LLC, pte_flags);
|
||||
I915_CACHE_NONE, pte_flags);
|
||||
|
||||
for (i = 1; i <= vm->top; i++) {
|
||||
struct drm_i915_gem_object *obj;
|
||||
@ -668,7 +667,7 @@ static int gen8_init_scratch(struct i915_address_space *vm)
|
||||
}
|
||||
|
||||
fill_px(obj, vm->scratch[i - 1]->encode);
|
||||
obj->encode = gen8_pde_encode(px_dma(obj), I915_CACHE_LLC);
|
||||
obj->encode = gen8_pde_encode(px_dma(obj), I915_CACHE_NONE);
|
||||
|
||||
vm->scratch[i] = obj;
|
||||
}
|
||||
|
@ -219,7 +219,7 @@ int __intel_context_do_pin_ww(struct intel_context *ce,
|
||||
*/
|
||||
|
||||
err = i915_gem_object_lock(ce->timeline->hwsp_ggtt->obj, ww);
|
||||
if (!err && ce->ring->vma->obj)
|
||||
if (!err)
|
||||
err = i915_gem_object_lock(ce->ring->vma->obj, ww);
|
||||
if (!err && ce->state)
|
||||
err = i915_gem_object_lock(ce->state->obj, ww);
|
||||
@ -228,17 +228,17 @@ int __intel_context_do_pin_ww(struct intel_context *ce,
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
err = i915_active_acquire(&ce->active);
|
||||
err = ce->ops->pre_pin(ce, ww, &vaddr);
|
||||
if (err)
|
||||
goto err_ctx_unpin;
|
||||
|
||||
err = ce->ops->pre_pin(ce, ww, &vaddr);
|
||||
err = i915_active_acquire(&ce->active);
|
||||
if (err)
|
||||
goto err_release;
|
||||
goto err_post_unpin;
|
||||
|
||||
err = mutex_lock_interruptible(&ce->pin_mutex);
|
||||
if (err)
|
||||
goto err_post_unpin;
|
||||
goto err_release;
|
||||
|
||||
intel_engine_pm_might_get(ce->engine);
|
||||
|
||||
@ -273,11 +273,11 @@ int __intel_context_do_pin_ww(struct intel_context *ce,
|
||||
|
||||
err_unlock:
|
||||
mutex_unlock(&ce->pin_mutex);
|
||||
err_release:
|
||||
i915_active_release(&ce->active);
|
||||
err_post_unpin:
|
||||
if (!handoff)
|
||||
ce->ops->post_unpin(ce);
|
||||
err_release:
|
||||
i915_active_release(&ce->active);
|
||||
err_ctx_unpin:
|
||||
intel_context_post_unpin(ce);
|
||||
|
||||
@ -364,7 +364,7 @@ static int __intel_context_active(struct i915_active *active)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int __i915_sw_fence_call
|
||||
static int
|
||||
sw_fence_dummy_notify(struct i915_sw_fence *sf,
|
||||
enum i915_sw_fence_notify state)
|
||||
{
|
||||
|
@ -325,6 +325,38 @@ static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id,
|
||||
engine->id = id;
|
||||
engine->legacy_idx = INVALID_ENGINE;
|
||||
engine->mask = BIT(id);
|
||||
if (GRAPHICS_VER(gt->i915) >= 11) {
|
||||
static const u32 engine_reset_domains[] = {
|
||||
[RCS0] = GEN11_GRDOM_RENDER,
|
||||
[BCS0] = GEN11_GRDOM_BLT,
|
||||
[VCS0] = GEN11_GRDOM_MEDIA,
|
||||
[VCS1] = GEN11_GRDOM_MEDIA2,
|
||||
[VCS2] = GEN11_GRDOM_MEDIA3,
|
||||
[VCS3] = GEN11_GRDOM_MEDIA4,
|
||||
[VCS4] = GEN11_GRDOM_MEDIA5,
|
||||
[VCS5] = GEN11_GRDOM_MEDIA6,
|
||||
[VCS6] = GEN11_GRDOM_MEDIA7,
|
||||
[VCS7] = GEN11_GRDOM_MEDIA8,
|
||||
[VECS0] = GEN11_GRDOM_VECS,
|
||||
[VECS1] = GEN11_GRDOM_VECS2,
|
||||
[VECS2] = GEN11_GRDOM_VECS3,
|
||||
[VECS3] = GEN11_GRDOM_VECS4,
|
||||
};
|
||||
GEM_BUG_ON(id >= ARRAY_SIZE(engine_reset_domains) ||
|
||||
!engine_reset_domains[id]);
|
||||
engine->reset_domain = engine_reset_domains[id];
|
||||
} else {
|
||||
static const u32 engine_reset_domains[] = {
|
||||
[RCS0] = GEN6_GRDOM_RENDER,
|
||||
[BCS0] = GEN6_GRDOM_BLT,
|
||||
[VCS0] = GEN6_GRDOM_MEDIA,
|
||||
[VCS1] = GEN8_GRDOM_MEDIA2,
|
||||
[VECS0] = GEN6_GRDOM_VECS,
|
||||
};
|
||||
GEM_BUG_ON(id >= ARRAY_SIZE(engine_reset_domains) ||
|
||||
!engine_reset_domains[id]);
|
||||
engine->reset_domain = engine_reset_domains[id];
|
||||
}
|
||||
engine->i915 = i915;
|
||||
engine->gt = gt;
|
||||
engine->uncore = gt->uncore;
|
||||
@ -363,7 +395,7 @@ static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id,
|
||||
DRIVER_CAPS(i915)->has_logical_contexts = true;
|
||||
|
||||
ewma__engine_latency_init(&engine->latency);
|
||||
seqcount_init(&engine->stats.lock);
|
||||
seqcount_init(&engine->stats.execlists.lock);
|
||||
|
||||
ATOMIC_INIT_NOTIFIER_HEAD(&engine->context_status_notifier);
|
||||
|
||||
@ -1676,14 +1708,18 @@ static void intel_engine_print_registers(struct intel_engine_cs *engine,
|
||||
|
||||
static void print_request_ring(struct drm_printer *m, struct i915_request *rq)
|
||||
{
|
||||
struct i915_vma_snapshot *vsnap = &rq->batch_snapshot;
|
||||
void *ring;
|
||||
int size;
|
||||
|
||||
if (!i915_vma_snapshot_present(vsnap))
|
||||
vsnap = NULL;
|
||||
|
||||
drm_printf(m,
|
||||
"[head %04x, postfix %04x, tail %04x, batch 0x%08x_%08x]:\n",
|
||||
rq->head, rq->postfix, rq->tail,
|
||||
rq->batch ? upper_32_bits(rq->batch->node.start) : ~0u,
|
||||
rq->batch ? lower_32_bits(rq->batch->node.start) : ~0u);
|
||||
vsnap ? upper_32_bits(vsnap->gtt_offset) : ~0u,
|
||||
vsnap ? lower_32_bits(vsnap->gtt_offset) : ~0u);
|
||||
|
||||
size = rq->tail - rq->head;
|
||||
if (rq->tail < rq->head)
|
||||
@ -1915,22 +1951,6 @@ void intel_engine_dump(struct intel_engine_cs *engine,
|
||||
intel_engine_print_breadcrumbs(engine, m);
|
||||
}
|
||||
|
||||
static ktime_t __intel_engine_get_busy_time(struct intel_engine_cs *engine,
|
||||
ktime_t *now)
|
||||
{
|
||||
ktime_t total = engine->stats.total;
|
||||
|
||||
/*
|
||||
* If the engine is executing something at the moment
|
||||
* add it to the total.
|
||||
*/
|
||||
*now = ktime_get();
|
||||
if (READ_ONCE(engine->stats.active))
|
||||
total = ktime_add(total, ktime_sub(*now, engine->stats.start));
|
||||
|
||||
return total;
|
||||
}
|
||||
|
||||
/**
|
||||
* intel_engine_get_busy_time() - Return current accumulated engine busyness
|
||||
* @engine: engine to report on
|
||||
@ -1940,15 +1960,7 @@ static ktime_t __intel_engine_get_busy_time(struct intel_engine_cs *engine,
|
||||
*/
|
||||
ktime_t intel_engine_get_busy_time(struct intel_engine_cs *engine, ktime_t *now)
|
||||
{
|
||||
unsigned int seq;
|
||||
ktime_t total;
|
||||
|
||||
do {
|
||||
seq = read_seqcount_begin(&engine->stats.lock);
|
||||
total = __intel_engine_get_busy_time(engine, now);
|
||||
} while (read_seqcount_retry(&engine->stats.lock, seq));
|
||||
|
||||
return total;
|
||||
return engine->busyness(engine, now);
|
||||
}
|
||||
|
||||
struct intel_context *
|
||||
|
@ -15,45 +15,46 @@
|
||||
|
||||
static inline void intel_engine_context_in(struct intel_engine_cs *engine)
|
||||
{
|
||||
struct intel_engine_execlists_stats *stats = &engine->stats.execlists;
|
||||
unsigned long flags;
|
||||
|
||||
if (engine->stats.active) {
|
||||
engine->stats.active++;
|
||||
if (stats->active) {
|
||||
stats->active++;
|
||||
return;
|
||||
}
|
||||
|
||||
/* The writer is serialised; but the pmu reader may be from hardirq */
|
||||
local_irq_save(flags);
|
||||
write_seqcount_begin(&engine->stats.lock);
|
||||
write_seqcount_begin(&stats->lock);
|
||||
|
||||
engine->stats.start = ktime_get();
|
||||
engine->stats.active++;
|
||||
stats->start = ktime_get();
|
||||
stats->active++;
|
||||
|
||||
write_seqcount_end(&engine->stats.lock);
|
||||
write_seqcount_end(&stats->lock);
|
||||
local_irq_restore(flags);
|
||||
|
||||
GEM_BUG_ON(!engine->stats.active);
|
||||
GEM_BUG_ON(!stats->active);
|
||||
}
|
||||
|
||||
static inline void intel_engine_context_out(struct intel_engine_cs *engine)
|
||||
{
|
||||
struct intel_engine_execlists_stats *stats = &engine->stats.execlists;
|
||||
unsigned long flags;
|
||||
|
||||
GEM_BUG_ON(!engine->stats.active);
|
||||
if (engine->stats.active > 1) {
|
||||
engine->stats.active--;
|
||||
GEM_BUG_ON(!stats->active);
|
||||
if (stats->active > 1) {
|
||||
stats->active--;
|
||||
return;
|
||||
}
|
||||
|
||||
local_irq_save(flags);
|
||||
write_seqcount_begin(&engine->stats.lock);
|
||||
write_seqcount_begin(&stats->lock);
|
||||
|
||||
engine->stats.active--;
|
||||
engine->stats.total =
|
||||
ktime_add(engine->stats.total,
|
||||
ktime_sub(ktime_get(), engine->stats.start));
|
||||
stats->active--;
|
||||
stats->total = ktime_add(stats->total,
|
||||
ktime_sub(ktime_get(), stats->start));
|
||||
|
||||
write_seqcount_end(&engine->stats.lock);
|
||||
write_seqcount_end(&stats->lock);
|
||||
local_irq_restore(flags);
|
||||
}
|
||||
|
||||
|
@ -257,6 +257,55 @@ struct intel_engine_execlists {
|
||||
|
||||
#define INTEL_ENGINE_CS_MAX_NAME 8
|
||||
|
||||
struct intel_engine_execlists_stats {
|
||||
/**
|
||||
* @active: Number of contexts currently scheduled in.
|
||||
*/
|
||||
unsigned int active;
|
||||
|
||||
/**
|
||||
* @lock: Lock protecting the below fields.
|
||||
*/
|
||||
seqcount_t lock;
|
||||
|
||||
/**
|
||||
* @total: Total time this engine was busy.
|
||||
*
|
||||
* Accumulated time not counting the most recent block in cases where
|
||||
* engine is currently busy (active > 0).
|
||||
*/
|
||||
ktime_t total;
|
||||
|
||||
/**
|
||||
* @start: Timestamp of the last idle to active transition.
|
||||
*
|
||||
* Idle is defined as active == 0, active is active > 0.
|
||||
*/
|
||||
ktime_t start;
|
||||
};
|
||||
|
||||
struct intel_engine_guc_stats {
|
||||
/**
|
||||
* @running: Active state of the engine when busyness was last sampled.
|
||||
*/
|
||||
bool running;
|
||||
|
||||
/**
|
||||
* @prev_total: Previous value of total runtime clock cycles.
|
||||
*/
|
||||
u32 prev_total;
|
||||
|
||||
/**
|
||||
* @total_gt_clks: Total gt clock cycles this engine was busy.
|
||||
*/
|
||||
u64 total_gt_clks;
|
||||
|
||||
/**
|
||||
* @start_gt_clk: GT clock time of last idle to active transition.
|
||||
*/
|
||||
u64 start_gt_clk;
|
||||
};
|
||||
|
||||
struct intel_engine_cs {
|
||||
struct drm_i915_private *i915;
|
||||
struct intel_gt *gt;
|
||||
@ -269,6 +318,7 @@ struct intel_engine_cs {
|
||||
unsigned int guc_id;
|
||||
|
||||
intel_engine_mask_t mask;
|
||||
u32 reset_domain;
|
||||
/**
|
||||
* @logical_mask: logical mask of engine, reported to user space via
|
||||
* query IOCTL and used to communicate with the GuC in logical space.
|
||||
@ -439,6 +489,12 @@ struct intel_engine_cs {
|
||||
void (*add_active_request)(struct i915_request *rq);
|
||||
void (*remove_active_request)(struct i915_request *rq);
|
||||
|
||||
/*
|
||||
* Get engine busyness and the time at which the busyness was sampled.
|
||||
*/
|
||||
ktime_t (*busyness)(struct intel_engine_cs *engine,
|
||||
ktime_t *now);
|
||||
|
||||
struct intel_engine_execlists execlists;
|
||||
|
||||
/*
|
||||
@ -488,30 +544,10 @@ struct intel_engine_cs {
|
||||
u32 (*get_cmd_length_mask)(u32 cmd_header);
|
||||
|
||||
struct {
|
||||
/**
|
||||
* @active: Number of contexts currently scheduled in.
|
||||
*/
|
||||
unsigned int active;
|
||||
|
||||
/**
|
||||
* @lock: Lock protecting the below fields.
|
||||
*/
|
||||
seqcount_t lock;
|
||||
|
||||
/**
|
||||
* @total: Total time this engine was busy.
|
||||
*
|
||||
* Accumulated time not counting the most recent block in cases
|
||||
* where engine is currently busy (active > 0).
|
||||
*/
|
||||
ktime_t total;
|
||||
|
||||
/**
|
||||
* @start: Timestamp of the last idle to active transition.
|
||||
*
|
||||
* Idle is defined as active == 0, active is active > 0.
|
||||
*/
|
||||
ktime_t start;
|
||||
union {
|
||||
struct intel_engine_execlists_stats execlists;
|
||||
struct intel_engine_guc_stats guc;
|
||||
};
|
||||
|
||||
/**
|
||||
* @rps: Utilisation at last RPS sampling.
|
||||
|
@ -2186,7 +2186,8 @@ struct execlists_capture {
|
||||
static void execlists_capture_work(struct work_struct *work)
|
||||
{
|
||||
struct execlists_capture *cap = container_of(work, typeof(*cap), work);
|
||||
const gfp_t gfp = GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_NOWARN;
|
||||
const gfp_t gfp = __GFP_KSWAPD_RECLAIM | __GFP_RETRY_MAYFAIL |
|
||||
__GFP_NOWARN;
|
||||
struct intel_engine_cs *engine = cap->rq->engine;
|
||||
struct intel_gt_coredump *gt = cap->error->gt;
|
||||
struct intel_engine_capture_vma *vma;
|
||||
@ -3293,6 +3294,38 @@ static void execlists_release(struct intel_engine_cs *engine)
|
||||
lrc_fini_wa_ctx(engine);
|
||||
}
|
||||
|
||||
static ktime_t __execlists_engine_busyness(struct intel_engine_cs *engine,
|
||||
ktime_t *now)
|
||||
{
|
||||
struct intel_engine_execlists_stats *stats = &engine->stats.execlists;
|
||||
ktime_t total = stats->total;
|
||||
|
||||
/*
|
||||
* If the engine is executing something at the moment
|
||||
* add it to the total.
|
||||
*/
|
||||
*now = ktime_get();
|
||||
if (READ_ONCE(stats->active))
|
||||
total = ktime_add(total, ktime_sub(*now, stats->start));
|
||||
|
||||
return total;
|
||||
}
|
||||
|
||||
static ktime_t execlists_engine_busyness(struct intel_engine_cs *engine,
|
||||
ktime_t *now)
|
||||
{
|
||||
struct intel_engine_execlists_stats *stats = &engine->stats.execlists;
|
||||
unsigned int seq;
|
||||
ktime_t total;
|
||||
|
||||
do {
|
||||
seq = read_seqcount_begin(&stats->lock);
|
||||
total = __execlists_engine_busyness(engine, now);
|
||||
} while (read_seqcount_retry(&stats->lock, seq));
|
||||
|
||||
return total;
|
||||
}
|
||||
|
||||
static void
|
||||
logical_ring_default_vfuncs(struct intel_engine_cs *engine)
|
||||
{
|
||||
@ -3349,6 +3382,8 @@ logical_ring_default_vfuncs(struct intel_engine_cs *engine)
|
||||
engine->emit_bb_start = gen8_emit_bb_start;
|
||||
else
|
||||
engine->emit_bb_start = gen8_emit_bb_start_noarb;
|
||||
|
||||
engine->busyness = execlists_engine_busyness;
|
||||
}
|
||||
|
||||
static void logical_ring_default_irqs(struct intel_engine_cs *engine)
|
||||
|
@ -106,7 +106,7 @@ static bool needs_idle_maps(struct drm_i915_private *i915)
|
||||
* Query intel_iommu to see if we need the workaround. Presumably that
|
||||
* was loaded first.
|
||||
*/
|
||||
if (!intel_vtd_active())
|
||||
if (!intel_vtd_active(i915))
|
||||
return false;
|
||||
|
||||
if (GRAPHICS_VER(i915) == 5 && IS_MOBILE(i915))
|
||||
@ -209,7 +209,7 @@ u64 gen8_ggtt_pte_encode(dma_addr_t addr,
|
||||
enum i915_cache_level level,
|
||||
u32 flags)
|
||||
{
|
||||
gen8_pte_t pte = addr | _PAGE_PRESENT;
|
||||
gen8_pte_t pte = addr | GEN8_PAGE_PRESENT;
|
||||
|
||||
if (flags & PTE_LM)
|
||||
pte |= GEN12_GGTT_PTE_LM;
|
||||
@ -1233,7 +1233,7 @@ int i915_ggtt_probe_hw(struct drm_i915_private *i915)
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
if (intel_vtd_active())
|
||||
if (intel_vtd_active(i915))
|
||||
drm_info(&i915->drm, "VT-d active for gfx access\n");
|
||||
|
||||
return 0;
|
||||
|
@ -86,6 +86,7 @@ static int __gt_unpark(struct intel_wakeref *wf)
|
||||
intel_rc6_unpark(>->rc6);
|
||||
intel_rps_unpark(>->rps);
|
||||
i915_pmu_gt_unparked(i915);
|
||||
intel_guc_busyness_unpark(gt);
|
||||
|
||||
intel_gt_unpark_requests(gt);
|
||||
runtime_begin(gt);
|
||||
@ -104,6 +105,7 @@ static int __gt_park(struct intel_wakeref *wf)
|
||||
runtime_end(gt);
|
||||
intel_gt_park_requests(gt);
|
||||
|
||||
intel_guc_busyness_park(gt);
|
||||
i915_vma_parked(gt);
|
||||
i915_pmu_gt_parked(i915);
|
||||
intel_rps_park(>->rps);
|
||||
@ -301,7 +303,7 @@ void intel_gt_suspend_prepare(struct intel_gt *gt)
|
||||
user_forcewake(gt, true);
|
||||
wait_for_suspend(gt);
|
||||
|
||||
intel_pxp_suspend(>->pxp, false);
|
||||
intel_pxp_suspend_prepare(>->pxp);
|
||||
}
|
||||
|
||||
static suspend_state_t pm_suspend_target(void)
|
||||
@ -326,6 +328,7 @@ void intel_gt_suspend_late(struct intel_gt *gt)
|
||||
GEM_BUG_ON(gt->awake);
|
||||
|
||||
intel_uc_suspend(>->uc);
|
||||
intel_pxp_suspend(>->pxp);
|
||||
|
||||
/*
|
||||
* On disabling the device, we want to turn off HW access to memory
|
||||
@ -353,7 +356,7 @@ void intel_gt_suspend_late(struct intel_gt *gt)
|
||||
|
||||
void intel_gt_runtime_suspend(struct intel_gt *gt)
|
||||
{
|
||||
intel_pxp_suspend(>->pxp, true);
|
||||
intel_pxp_runtime_suspend(>->pxp);
|
||||
intel_uc_runtime_suspend(>->uc);
|
||||
|
||||
GT_TRACE(gt, "\n");
|
||||
@ -371,7 +374,7 @@ int intel_gt_runtime_resume(struct intel_gt *gt)
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
intel_pxp_resume(>->pxp);
|
||||
intel_pxp_runtime_resume(>->pxp);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
@ -7,6 +7,8 @@
|
||||
|
||||
#include <linux/fault-inject.h>
|
||||
|
||||
#include <drm/drm_cache.h>
|
||||
|
||||
#include "gem/i915_gem_lmem.h"
|
||||
#include "i915_trace.h"
|
||||
#include "intel_gt.h"
|
||||
@ -273,6 +275,7 @@ static void poison_scratch_page(struct drm_i915_gem_object *scratch)
|
||||
val = POISON_FREE;
|
||||
|
||||
memset(vaddr, val, scratch->base.size);
|
||||
drm_clflush_virt_range(vaddr, scratch->base.size);
|
||||
}
|
||||
|
||||
int setup_scratch_page(struct i915_address_space *vm)
|
||||
|
@ -135,6 +135,9 @@ typedef u64 gen8_pte_t;
|
||||
#define GEN8_PPAT_ELLC_OVERRIDE (0<<2)
|
||||
#define GEN8_PPAT(i, x) ((u64)(x) << ((i) * 8))
|
||||
|
||||
#define GEN8_PAGE_PRESENT BIT_ULL(0)
|
||||
#define GEN8_PAGE_RW BIT_ULL(1)
|
||||
|
||||
#define GEN8_PDE_IPS_64K BIT(11)
|
||||
#define GEN8_PDE_PS_2M BIT(7)
|
||||
|
||||
|
@ -1167,6 +1167,11 @@ gen12_emit_indirect_ctx_rcs(const struct intel_context *ce, u32 *cs)
|
||||
cs = gen12_emit_cmd_buf_wa(ce, cs);
|
||||
cs = gen12_emit_restore_scratch(ce, cs);
|
||||
|
||||
/* Wa_16013000631:dg2 */
|
||||
if (IS_DG2_GRAPHICS_STEP(ce->engine->i915, G10, STEP_B0, STEP_C0) ||
|
||||
IS_DG2_G11(ce->engine->i915))
|
||||
cs = gen8_emit_pipe_control(cs, PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE, 0);
|
||||
|
||||
return cs;
|
||||
}
|
||||
|
||||
|
@ -13,7 +13,6 @@
|
||||
|
||||
struct insert_pte_data {
|
||||
u64 offset;
|
||||
bool is_lmem;
|
||||
};
|
||||
|
||||
#define CHUNK_SZ SZ_8M /* ~1ms at 8GiB/s preemption delay */
|
||||
@ -40,7 +39,7 @@ static void insert_pte(struct i915_address_space *vm,
|
||||
struct insert_pte_data *d = data;
|
||||
|
||||
vm->insert_page(vm, px_dma(pt), d->offset, I915_CACHE_NONE,
|
||||
d->is_lmem ? PTE_LM : 0);
|
||||
i915_gem_object_is_lmem(pt->base) ? PTE_LM : 0);
|
||||
d->offset += PAGE_SIZE;
|
||||
}
|
||||
|
||||
@ -134,8 +133,7 @@ static struct i915_address_space *migrate_vm(struct intel_gt *gt)
|
||||
goto err_vm;
|
||||
|
||||
/* Now allow the GPU to rewrite the PTE via its own ppGTT */
|
||||
d.is_lmem = i915_gem_object_is_lmem(vm->vm.scratch[0]);
|
||||
vm->vm.foreach(&vm->vm, base, base + sz, insert_pte, &d);
|
||||
vm->vm.foreach(&vm->vm, base, d.offset - base, insert_pte, &d);
|
||||
}
|
||||
|
||||
return &vm->vm;
|
||||
@ -281,10 +279,10 @@ static int emit_pte(struct i915_request *rq,
|
||||
GEM_BUG_ON(GRAPHICS_VER(rq->engine->i915) < 8);
|
||||
|
||||
/* Compute the page directory offset for the target address range */
|
||||
offset += (u64)rq->engine->instance << 32;
|
||||
offset >>= 12;
|
||||
offset *= sizeof(u64);
|
||||
offset += 2 * CHUNK_SZ;
|
||||
offset += (u64)rq->engine->instance << 32;
|
||||
|
||||
cs = intel_ring_begin(rq, 6);
|
||||
if (IS_ERR(cs))
|
||||
|
@ -424,7 +424,7 @@ static unsigned int get_mocs_settings(const struct drm_i915_private *i915,
|
||||
|
||||
table->unused_entries_index = I915_MOCS_PTE;
|
||||
if (IS_DG2(i915)) {
|
||||
if (IS_DG2_GT_STEP(i915, G10, STEP_A0, STEP_B0)) {
|
||||
if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_A0, STEP_B0)) {
|
||||
table->size = ARRAY_SIZE(dg2_mocs_table_g10_ax);
|
||||
table->table = dg2_mocs_table_g10_ax;
|
||||
} else {
|
||||
|
@ -117,10 +117,17 @@ static void gen11_rc6_enable(struct intel_rc6 *rc6)
|
||||
GEN6_RC_CTL_RC6_ENABLE |
|
||||
GEN6_RC_CTL_EI_MODE(1);
|
||||
|
||||
pg_enable =
|
||||
GEN9_RENDER_PG_ENABLE |
|
||||
GEN9_MEDIA_PG_ENABLE |
|
||||
GEN11_MEDIA_SAMPLER_PG_ENABLE;
|
||||
/* Wa_16011777198 - Render powergating must remain disabled */
|
||||
if (IS_DG2_GRAPHICS_STEP(gt->i915, G10, STEP_A0, STEP_C0) ||
|
||||
IS_DG2_GRAPHICS_STEP(gt->i915, G11, STEP_A0, STEP_B0))
|
||||
pg_enable =
|
||||
GEN9_MEDIA_PG_ENABLE |
|
||||
GEN11_MEDIA_SAMPLER_PG_ENABLE;
|
||||
else
|
||||
pg_enable =
|
||||
GEN9_RENDER_PG_ENABLE |
|
||||
GEN9_MEDIA_PG_ENABLE |
|
||||
GEN11_MEDIA_SAMPLER_PG_ENABLE;
|
||||
|
||||
if (GRAPHICS_VER(gt->i915) >= 12) {
|
||||
for (i = 0; i < I915_MAX_VCS; i++)
|
||||
|
@ -66,12 +66,16 @@ static void release_fake_lmem_bar(struct intel_memory_region *mem)
|
||||
DMA_ATTR_FORCE_CONTIGUOUS);
|
||||
}
|
||||
|
||||
static void
|
||||
static int
|
||||
region_lmem_release(struct intel_memory_region *mem)
|
||||
{
|
||||
intel_region_ttm_fini(mem);
|
||||
int ret;
|
||||
|
||||
ret = intel_region_ttm_fini(mem);
|
||||
io_mapping_fini(&mem->iomap);
|
||||
release_fake_lmem_bar(mem);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int
|
||||
@ -158,7 +162,7 @@ intel_gt_setup_fake_lmem(struct intel_gt *gt)
|
||||
static bool get_legacy_lowmem_region(struct intel_uncore *uncore,
|
||||
u64 *start, u32 *size)
|
||||
{
|
||||
if (!IS_DG1_GT_STEP(uncore->i915, STEP_A0, STEP_C0))
|
||||
if (!IS_DG1_GRAPHICS_STEP(uncore->i915, STEP_A0, STEP_C0))
|
||||
return false;
|
||||
|
||||
*start = 0;
|
||||
@ -231,7 +235,7 @@ static struct intel_memory_region *setup_lmem(struct intel_gt *gt)
|
||||
return mem;
|
||||
|
||||
err_region_put:
|
||||
intel_memory_region_put(mem);
|
||||
intel_memory_region_destroy(mem);
|
||||
return ERR_PTR(err);
|
||||
}
|
||||
|
||||
|
@ -297,13 +297,6 @@ static int gen6_reset_engines(struct intel_gt *gt,
|
||||
intel_engine_mask_t engine_mask,
|
||||
unsigned int retry)
|
||||
{
|
||||
static const u32 hw_engine_mask[] = {
|
||||
[RCS0] = GEN6_GRDOM_RENDER,
|
||||
[BCS0] = GEN6_GRDOM_BLT,
|
||||
[VCS0] = GEN6_GRDOM_MEDIA,
|
||||
[VCS1] = GEN8_GRDOM_MEDIA2,
|
||||
[VECS0] = GEN6_GRDOM_VECS,
|
||||
};
|
||||
struct intel_engine_cs *engine;
|
||||
u32 hw_mask;
|
||||
|
||||
@ -314,8 +307,7 @@ static int gen6_reset_engines(struct intel_gt *gt,
|
||||
|
||||
hw_mask = 0;
|
||||
for_each_engine_masked(engine, gt, engine_mask, tmp) {
|
||||
GEM_BUG_ON(engine->id >= ARRAY_SIZE(hw_engine_mask));
|
||||
hw_mask |= hw_engine_mask[engine->id];
|
||||
hw_mask |= engine->reset_domain;
|
||||
}
|
||||
}
|
||||
|
||||
@ -492,22 +484,6 @@ static int gen11_reset_engines(struct intel_gt *gt,
|
||||
intel_engine_mask_t engine_mask,
|
||||
unsigned int retry)
|
||||
{
|
||||
static const u32 hw_engine_mask[] = {
|
||||
[RCS0] = GEN11_GRDOM_RENDER,
|
||||
[BCS0] = GEN11_GRDOM_BLT,
|
||||
[VCS0] = GEN11_GRDOM_MEDIA,
|
||||
[VCS1] = GEN11_GRDOM_MEDIA2,
|
||||
[VCS2] = GEN11_GRDOM_MEDIA3,
|
||||
[VCS3] = GEN11_GRDOM_MEDIA4,
|
||||
[VCS4] = GEN11_GRDOM_MEDIA5,
|
||||
[VCS5] = GEN11_GRDOM_MEDIA6,
|
||||
[VCS6] = GEN11_GRDOM_MEDIA7,
|
||||
[VCS7] = GEN11_GRDOM_MEDIA8,
|
||||
[VECS0] = GEN11_GRDOM_VECS,
|
||||
[VECS1] = GEN11_GRDOM_VECS2,
|
||||
[VECS2] = GEN11_GRDOM_VECS3,
|
||||
[VECS3] = GEN11_GRDOM_VECS4,
|
||||
};
|
||||
struct intel_engine_cs *engine;
|
||||
intel_engine_mask_t tmp;
|
||||
u32 reset_mask, unlock_mask = 0;
|
||||
@ -518,8 +494,7 @@ static int gen11_reset_engines(struct intel_gt *gt,
|
||||
} else {
|
||||
reset_mask = 0;
|
||||
for_each_engine_masked(engine, gt, engine_mask, tmp) {
|
||||
GEM_BUG_ON(engine->id >= ARRAY_SIZE(hw_engine_mask));
|
||||
reset_mask |= hw_engine_mask[engine->id];
|
||||
reset_mask |= engine->reset_domain;
|
||||
ret = gen11_lock_sfc(engine, &reset_mask, &unlock_mask);
|
||||
if (ret)
|
||||
goto sfc_unlock;
|
||||
@ -1367,20 +1342,27 @@ void intel_gt_handle_error(struct intel_gt *gt,
|
||||
/* Make sure i915_reset_trylock() sees the I915_RESET_BACKOFF */
|
||||
synchronize_rcu_expedited();
|
||||
|
||||
/* Prevent any other reset-engine attempt. */
|
||||
for_each_engine(engine, gt, tmp) {
|
||||
while (test_and_set_bit(I915_RESET_ENGINE + engine->id,
|
||||
>->reset.flags))
|
||||
wait_on_bit(>->reset.flags,
|
||||
I915_RESET_ENGINE + engine->id,
|
||||
TASK_UNINTERRUPTIBLE);
|
||||
/*
|
||||
* Prevent any other reset-engine attempt. We don't do this for GuC
|
||||
* submission the GuC owns the per-engine reset, not the i915.
|
||||
*/
|
||||
if (!intel_uc_uses_guc_submission(>->uc)) {
|
||||
for_each_engine(engine, gt, tmp) {
|
||||
while (test_and_set_bit(I915_RESET_ENGINE + engine->id,
|
||||
>->reset.flags))
|
||||
wait_on_bit(>->reset.flags,
|
||||
I915_RESET_ENGINE + engine->id,
|
||||
TASK_UNINTERRUPTIBLE);
|
||||
}
|
||||
}
|
||||
|
||||
intel_gt_reset_global(gt, engine_mask, msg);
|
||||
|
||||
for_each_engine(engine, gt, tmp)
|
||||
clear_bit_unlock(I915_RESET_ENGINE + engine->id,
|
||||
>->reset.flags);
|
||||
if (!intel_uc_uses_guc_submission(>->uc)) {
|
||||
for_each_engine(engine, gt, tmp)
|
||||
clear_bit_unlock(I915_RESET_ENGINE + engine->id,
|
||||
>->reset.flags);
|
||||
}
|
||||
clear_bit_unlock(I915_RESET_BACKOFF, >->reset.flags);
|
||||
smp_mb__after_atomic();
|
||||
wake_up_all(>->reset.queue);
|
||||
@ -1441,6 +1423,7 @@ void intel_gt_set_wedged_on_init(struct intel_gt *gt)
|
||||
BUILD_BUG_ON(I915_RESET_ENGINE + I915_NUM_ENGINES >
|
||||
I915_WEDGED_ON_INIT);
|
||||
intel_gt_set_wedged(gt);
|
||||
i915_disable_error_state(gt->i915, -ENODEV);
|
||||
set_bit(I915_WEDGED_ON_INIT, >->reset.flags);
|
||||
|
||||
/* Wedged on init is non-recoverable */
|
||||
@ -1450,6 +1433,7 @@ void intel_gt_set_wedged_on_init(struct intel_gt *gt)
|
||||
void intel_gt_set_wedged_on_fini(struct intel_gt *gt)
|
||||
{
|
||||
intel_gt_set_wedged(gt);
|
||||
i915_disable_error_state(gt->i915, -ENODEV);
|
||||
set_bit(I915_WEDGED_ON_FINI, >->reset.flags);
|
||||
intel_gt_retire_requests(gt); /* cleanup any wedged requests */
|
||||
}
|
||||
|
@ -1357,7 +1357,7 @@ int intel_ring_submission_setup(struct intel_engine_cs *engine)
|
||||
err = i915_gem_object_lock(timeline->hwsp_ggtt->obj, &ww);
|
||||
if (!err && gen7_wa_vma)
|
||||
err = i915_gem_object_lock(gen7_wa_vma->obj, &ww);
|
||||
if (!err && engine->legacy.ring->vma->obj)
|
||||
if (!err)
|
||||
err = i915_gem_object_lock(engine->legacy.ring->vma->obj, &ww);
|
||||
if (!err)
|
||||
err = intel_timeline_pin(timeline, &ww);
|
||||
|
@ -936,8 +936,70 @@ void intel_rps_park(struct intel_rps *rps)
|
||||
GT_TRACE(rps_to_gt(rps), "park:%x\n", rps->cur_freq);
|
||||
}
|
||||
|
||||
u32 intel_rps_get_boost_frequency(struct intel_rps *rps)
|
||||
{
|
||||
struct intel_guc_slpc *slpc;
|
||||
|
||||
if (rps_uses_slpc(rps)) {
|
||||
slpc = rps_to_slpc(rps);
|
||||
|
||||
return slpc->boost_freq;
|
||||
} else {
|
||||
return intel_gpu_freq(rps, rps->boost_freq);
|
||||
}
|
||||
}
|
||||
|
||||
static int rps_set_boost_freq(struct intel_rps *rps, u32 val)
|
||||
{
|
||||
bool boost = false;
|
||||
|
||||
/* Validate against (static) hardware limits */
|
||||
val = intel_freq_opcode(rps, val);
|
||||
if (val < rps->min_freq || val > rps->max_freq)
|
||||
return -EINVAL;
|
||||
|
||||
mutex_lock(&rps->lock);
|
||||
if (val != rps->boost_freq) {
|
||||
rps->boost_freq = val;
|
||||
boost = atomic_read(&rps->num_waiters);
|
||||
}
|
||||
mutex_unlock(&rps->lock);
|
||||
if (boost)
|
||||
schedule_work(&rps->work);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int intel_rps_set_boost_frequency(struct intel_rps *rps, u32 freq)
|
||||
{
|
||||
struct intel_guc_slpc *slpc;
|
||||
|
||||
if (rps_uses_slpc(rps)) {
|
||||
slpc = rps_to_slpc(rps);
|
||||
|
||||
return intel_guc_slpc_set_boost_freq(slpc, freq);
|
||||
} else {
|
||||
return rps_set_boost_freq(rps, freq);
|
||||
}
|
||||
}
|
||||
|
||||
void intel_rps_dec_waiters(struct intel_rps *rps)
|
||||
{
|
||||
struct intel_guc_slpc *slpc;
|
||||
|
||||
if (rps_uses_slpc(rps)) {
|
||||
slpc = rps_to_slpc(rps);
|
||||
|
||||
intel_guc_slpc_dec_waiters(slpc);
|
||||
} else {
|
||||
atomic_dec(&rps->num_waiters);
|
||||
}
|
||||
}
|
||||
|
||||
void intel_rps_boost(struct i915_request *rq)
|
||||
{
|
||||
struct intel_guc_slpc *slpc;
|
||||
|
||||
if (i915_request_signaled(rq) || i915_request_has_waitboost(rq))
|
||||
return;
|
||||
|
||||
@ -945,6 +1007,16 @@ void intel_rps_boost(struct i915_request *rq)
|
||||
if (!test_and_set_bit(I915_FENCE_FLAG_BOOST, &rq->fence.flags)) {
|
||||
struct intel_rps *rps = &READ_ONCE(rq->engine)->gt->rps;
|
||||
|
||||
if (rps_uses_slpc(rps)) {
|
||||
slpc = rps_to_slpc(rps);
|
||||
|
||||
/* Return if old value is non zero */
|
||||
if (!atomic_fetch_inc(&slpc->num_waiters))
|
||||
schedule_work(&slpc->boost_work);
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
if (atomic_fetch_inc(&rps->num_waiters))
|
||||
return;
|
||||
|
||||
|
@ -23,6 +23,9 @@ void intel_rps_disable(struct intel_rps *rps);
|
||||
void intel_rps_park(struct intel_rps *rps);
|
||||
void intel_rps_unpark(struct intel_rps *rps);
|
||||
void intel_rps_boost(struct i915_request *rq);
|
||||
void intel_rps_dec_waiters(struct intel_rps *rps);
|
||||
u32 intel_rps_get_boost_frequency(struct intel_rps *rps);
|
||||
int intel_rps_set_boost_frequency(struct intel_rps *rps, u32 freq);
|
||||
|
||||
int intel_rps_set(struct intel_rps *rps, u8 val);
|
||||
void intel_rps_mark_interactive(struct intel_rps *rps, bool interactive);
|
||||
|
@ -482,7 +482,7 @@ static void kbl_ctx_workarounds_init(struct intel_engine_cs *engine,
|
||||
gen9_ctx_workarounds_init(engine, wal);
|
||||
|
||||
/* WaToEnableHwFixForPushConstHWBug:kbl */
|
||||
if (IS_KBL_GT_STEP(i915, STEP_C0, STEP_FOREVER))
|
||||
if (IS_KBL_GRAPHICS_STEP(i915, STEP_C0, STEP_FOREVER))
|
||||
wa_masked_en(wal, COMMON_SLICE_CHICKEN2,
|
||||
GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
|
||||
|
||||
@ -558,6 +558,22 @@ static void icl_ctx_workarounds_init(struct intel_engine_cs *engine,
|
||||
wa_masked_en(wal, GEN9_ROW_CHICKEN4, GEN11_DIS_PICK_2ND_EU);
|
||||
}
|
||||
|
||||
/*
|
||||
* These settings aren't actually workarounds, but general tuning settings that
|
||||
* need to be programmed on dg2 platform.
|
||||
*/
|
||||
static void dg2_ctx_gt_tuning_init(struct intel_engine_cs *engine,
|
||||
struct i915_wa_list *wal)
|
||||
{
|
||||
wa_write_clr_set(wal, GEN11_L3SQCREG5, L3_PWM_TIMER_INIT_VAL_MASK,
|
||||
REG_FIELD_PREP(L3_PWM_TIMER_INIT_VAL_MASK, 0x7f));
|
||||
wa_add(wal,
|
||||
FF_MODE2,
|
||||
FF_MODE2_TDS_TIMER_MASK,
|
||||
FF_MODE2_TDS_TIMER_128,
|
||||
0, false);
|
||||
}
|
||||
|
||||
/*
|
||||
* These settings aren't actually workarounds, but general tuning settings that
|
||||
* need to be programmed on several platforms.
|
||||
@ -621,13 +637,6 @@ static void gen12_ctx_workarounds_init(struct intel_engine_cs *engine,
|
||||
FF_MODE2_GS_TIMER_MASK,
|
||||
FF_MODE2_GS_TIMER_224,
|
||||
0, false);
|
||||
|
||||
/*
|
||||
* Wa_14012131227:dg1
|
||||
* Wa_1508744258:tgl,rkl,dg1,adl-s,adl-p
|
||||
*/
|
||||
wa_masked_en(wal, GEN7_COMMON_SLICE_CHICKEN1,
|
||||
GEN9_RHWO_OPTIMIZATION_DISABLE);
|
||||
}
|
||||
|
||||
static void dg1_ctx_workarounds_init(struct intel_engine_cs *engine,
|
||||
@ -644,6 +653,42 @@ static void dg1_ctx_workarounds_init(struct intel_engine_cs *engine,
|
||||
DG1_HZ_READ_SUPPRESSION_OPTIMIZATION_DISABLE);
|
||||
}
|
||||
|
||||
static void dg2_ctx_workarounds_init(struct intel_engine_cs *engine,
|
||||
struct i915_wa_list *wal)
|
||||
{
|
||||
dg2_ctx_gt_tuning_init(engine, wal);
|
||||
|
||||
/* Wa_16011186671:dg2_g11 */
|
||||
if (IS_DG2_GRAPHICS_STEP(engine->i915, G11, STEP_A0, STEP_B0)) {
|
||||
wa_masked_dis(wal, VFLSKPD, DIS_MULT_MISS_RD_SQUASH);
|
||||
wa_masked_en(wal, VFLSKPD, DIS_OVER_FETCH_CACHE);
|
||||
}
|
||||
|
||||
if (IS_DG2_GRAPHICS_STEP(engine->i915, G10, STEP_A0, STEP_B0)) {
|
||||
/* Wa_14010469329:dg2_g10 */
|
||||
wa_masked_en(wal, GEN11_COMMON_SLICE_CHICKEN3,
|
||||
XEHP_DUAL_SIMD8_SEQ_MERGE_DISABLE);
|
||||
|
||||
/*
|
||||
* Wa_22010465075:dg2_g10
|
||||
* Wa_22010613112:dg2_g10
|
||||
* Wa_14010698770:dg2_g10
|
||||
*/
|
||||
wa_masked_en(wal, GEN11_COMMON_SLICE_CHICKEN3,
|
||||
GEN12_DISABLE_CPS_AWARE_COLOR_PIPE);
|
||||
}
|
||||
|
||||
/* Wa_16013271637:dg2 */
|
||||
wa_masked_en(wal, SLICE_COMMON_ECO_CHICKEN1,
|
||||
MSC_MSAA_REODER_BUF_BYPASS_DISABLE);
|
||||
|
||||
/* Wa_22012532006:dg2 */
|
||||
if (IS_DG2_GRAPHICS_STEP(engine->i915, G10, STEP_A0, STEP_C0) ||
|
||||
IS_DG2_GRAPHICS_STEP(engine->i915, G11, STEP_A0, STEP_B0))
|
||||
wa_masked_en(wal, GEN9_HALF_SLICE_CHICKEN7,
|
||||
DG2_DISABLE_ROUND_ENABLE_ALLOW_FOR_SSLA);
|
||||
}
|
||||
|
||||
static void fakewa_disable_nestedbb_mode(struct intel_engine_cs *engine,
|
||||
struct i915_wa_list *wal)
|
||||
{
|
||||
@ -730,7 +775,11 @@ __intel_engine_init_ctx_wa(struct intel_engine_cs *engine,
|
||||
if (engine->class != RENDER_CLASS)
|
||||
goto done;
|
||||
|
||||
if (IS_DG1(i915))
|
||||
if (IS_DG2(i915))
|
||||
dg2_ctx_workarounds_init(engine, wal);
|
||||
else if (IS_XEHPSDV(i915))
|
||||
; /* noop; none at this time */
|
||||
else if (IS_DG1(i915))
|
||||
dg1_ctx_workarounds_init(engine, wal);
|
||||
else if (GRAPHICS_VER(i915) == 12)
|
||||
gen12_ctx_workarounds_init(engine, wal);
|
||||
@ -877,11 +926,52 @@ hsw_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
|
||||
wa_write_clr(wal, GEN7_FF_THREAD_MODE, GEN7_FF_VS_REF_CNT_FFME);
|
||||
}
|
||||
|
||||
static void
|
||||
gen9_wa_init_mcr(struct drm_i915_private *i915, struct i915_wa_list *wal)
|
||||
{
|
||||
const struct sseu_dev_info *sseu = &i915->gt.info.sseu;
|
||||
unsigned int slice, subslice;
|
||||
u32 mcr, mcr_mask;
|
||||
|
||||
GEM_BUG_ON(GRAPHICS_VER(i915) != 9);
|
||||
|
||||
/*
|
||||
* WaProgramMgsrForCorrectSliceSpecificMmioReads:gen9,glk,kbl,cml
|
||||
* Before any MMIO read into slice/subslice specific registers, MCR
|
||||
* packet control register needs to be programmed to point to any
|
||||
* enabled s/ss pair. Otherwise, incorrect values will be returned.
|
||||
* This means each subsequent MMIO read will be forwarded to an
|
||||
* specific s/ss combination, but this is OK since these registers
|
||||
* are consistent across s/ss in almost all cases. In the rare
|
||||
* occasions, such as INSTDONE, where this value is dependent
|
||||
* on s/ss combo, the read should be done with read_subslice_reg.
|
||||
*/
|
||||
slice = ffs(sseu->slice_mask) - 1;
|
||||
GEM_BUG_ON(slice >= ARRAY_SIZE(sseu->subslice_mask));
|
||||
subslice = ffs(intel_sseu_get_subslices(sseu, slice));
|
||||
GEM_BUG_ON(!subslice);
|
||||
subslice--;
|
||||
|
||||
/*
|
||||
* We use GEN8_MCR..() macros to calculate the |mcr| value for
|
||||
* Gen9 to address WaProgramMgsrForCorrectSliceSpecificMmioReads
|
||||
*/
|
||||
mcr = GEN8_MCR_SLICE(slice) | GEN8_MCR_SUBSLICE(subslice);
|
||||
mcr_mask = GEN8_MCR_SLICE_MASK | GEN8_MCR_SUBSLICE_MASK;
|
||||
|
||||
drm_dbg(&i915->drm, "MCR slice:%d/subslice:%d = %x\n", slice, subslice, mcr);
|
||||
|
||||
wa_write_clr_set(wal, GEN8_MCR_SELECTOR, mcr_mask, mcr);
|
||||
}
|
||||
|
||||
static void
|
||||
gen9_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
|
||||
{
|
||||
struct drm_i915_private *i915 = gt->i915;
|
||||
|
||||
/* WaProgramMgsrForCorrectSliceSpecificMmioReads:glk,kbl,cml,gen9 */
|
||||
gen9_wa_init_mcr(i915, wal);
|
||||
|
||||
/* WaDisableKillLogic:bxt,skl,kbl */
|
||||
if (!IS_COFFEELAKE(i915) && !IS_COMETLAKE(i915))
|
||||
wa_write_or(wal,
|
||||
@ -916,7 +1006,7 @@ skl_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
|
||||
GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE);
|
||||
|
||||
/* WaInPlaceDecompressionHang:skl */
|
||||
if (IS_SKL_GT_STEP(gt->i915, STEP_A0, STEP_H0))
|
||||
if (IS_SKL_GRAPHICS_STEP(gt->i915, STEP_A0, STEP_H0))
|
||||
wa_write_or(wal,
|
||||
GEN9_GAMT_ECO_REG_RW_IA,
|
||||
GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
|
||||
@ -928,7 +1018,7 @@ kbl_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
|
||||
gen9_gt_workarounds_init(gt, wal);
|
||||
|
||||
/* WaDisableDynamicCreditSharing:kbl */
|
||||
if (IS_KBL_GT_STEP(gt->i915, 0, STEP_C0))
|
||||
if (IS_KBL_GRAPHICS_STEP(gt->i915, 0, STEP_C0))
|
||||
wa_write_or(wal,
|
||||
GAMT_CHKN_BIT_REG,
|
||||
GAMT_CHKN_DISABLE_DYNAMIC_CREDIT_SHARING);
|
||||
@ -1134,9 +1224,18 @@ icl_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
|
||||
GAMT_CHKN_BIT_REG,
|
||||
GAMT_CHKN_DISABLE_L3_COH_PIPE);
|
||||
|
||||
/* Wa_1407352427:icl,ehl */
|
||||
wa_write_or(wal, UNSLICE_UNIT_LEVEL_CLKGATE2,
|
||||
PSDUNIT_CLKGATE_DIS);
|
||||
|
||||
/* Wa_1406680159:icl,ehl */
|
||||
wa_write_or(wal,
|
||||
SUBSLICE_UNIT_LEVEL_CLKGATE,
|
||||
GWUNIT_CLKGATE_DIS);
|
||||
|
||||
/* Wa_1607087056:icl,ehl,jsl */
|
||||
if (IS_ICELAKE(i915) ||
|
||||
IS_JSL_EHL_GT_STEP(i915, STEP_A0, STEP_B0))
|
||||
IS_JSL_EHL_GRAPHICS_STEP(i915, STEP_A0, STEP_B0))
|
||||
wa_write_or(wal,
|
||||
SLICE_UNIT_LEVEL_CLKGATE,
|
||||
L3_CLKGATE_DIS | L3_CR2X_CLKGATE_DIS);
|
||||
@ -1190,19 +1289,19 @@ tgl_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
|
||||
gen12_gt_workarounds_init(gt, wal);
|
||||
|
||||
/* Wa_1409420604:tgl */
|
||||
if (IS_TGL_UY_GT_STEP(i915, STEP_A0, STEP_B0))
|
||||
if (IS_TGL_UY_GRAPHICS_STEP(i915, STEP_A0, STEP_B0))
|
||||
wa_write_or(wal,
|
||||
SUBSLICE_UNIT_LEVEL_CLKGATE2,
|
||||
CPSSUNIT_CLKGATE_DIS);
|
||||
|
||||
/* Wa_1607087056:tgl also know as BUG:1409180338 */
|
||||
if (IS_TGL_UY_GT_STEP(i915, STEP_A0, STEP_B0))
|
||||
if (IS_TGL_UY_GRAPHICS_STEP(i915, STEP_A0, STEP_B0))
|
||||
wa_write_or(wal,
|
||||
SLICE_UNIT_LEVEL_CLKGATE,
|
||||
L3_CLKGATE_DIS | L3_CR2X_CLKGATE_DIS);
|
||||
|
||||
/* Wa_1408615072:tgl[a0] */
|
||||
if (IS_TGL_UY_GT_STEP(i915, STEP_A0, STEP_B0))
|
||||
if (IS_TGL_UY_GRAPHICS_STEP(i915, STEP_A0, STEP_B0))
|
||||
wa_write_or(wal, UNSLICE_UNIT_LEVEL_CLKGATE2,
|
||||
VSUNIT_CLKGATE_DIS_TGL);
|
||||
}
|
||||
@ -1215,7 +1314,7 @@ dg1_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
|
||||
gen12_gt_workarounds_init(gt, wal);
|
||||
|
||||
/* Wa_1607087056:dg1 */
|
||||
if (IS_DG1_GT_STEP(i915, STEP_A0, STEP_B0))
|
||||
if (IS_DG1_GRAPHICS_STEP(i915, STEP_A0, STEP_B0))
|
||||
wa_write_or(wal,
|
||||
SLICE_UNIT_LEVEL_CLKGATE,
|
||||
L3_CLKGATE_DIS | L3_CR2X_CLKGATE_DIS);
|
||||
@ -1236,7 +1335,179 @@ dg1_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
|
||||
static void
|
||||
xehpsdv_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
|
||||
{
|
||||
struct drm_i915_private *i915 = gt->i915;
|
||||
|
||||
xehp_init_mcr(gt, wal);
|
||||
|
||||
/* Wa_1409757795:xehpsdv */
|
||||
wa_write_or(wal, SCCGCTL94DC, CG3DDISURB);
|
||||
|
||||
/* Wa_18011725039:xehpsdv */
|
||||
if (IS_XEHPSDV_GRAPHICS_STEP(i915, STEP_A1, STEP_B0)) {
|
||||
wa_masked_dis(wal, MLTICTXCTL, TDONRENDER);
|
||||
wa_write_or(wal, L3SQCREG1_CCS0, FLUSHALLNONCOH);
|
||||
}
|
||||
|
||||
/* Wa_16011155590:xehpsdv */
|
||||
if (IS_XEHPSDV_GRAPHICS_STEP(i915, STEP_A0, STEP_B0))
|
||||
wa_write_or(wal, UNSLICE_UNIT_LEVEL_CLKGATE,
|
||||
TSGUNIT_CLKGATE_DIS);
|
||||
|
||||
/* Wa_14011780169:xehpsdv */
|
||||
if (IS_XEHPSDV_GRAPHICS_STEP(i915, STEP_B0, STEP_FOREVER)) {
|
||||
wa_write_or(wal, UNSLCGCTL9440, GAMTLBOACS_CLKGATE_DIS |
|
||||
GAMTLBVDBOX7_CLKGATE_DIS |
|
||||
GAMTLBVDBOX6_CLKGATE_DIS |
|
||||
GAMTLBVDBOX5_CLKGATE_DIS |
|
||||
GAMTLBVDBOX4_CLKGATE_DIS |
|
||||
GAMTLBVDBOX3_CLKGATE_DIS |
|
||||
GAMTLBVDBOX2_CLKGATE_DIS |
|
||||
GAMTLBVDBOX1_CLKGATE_DIS |
|
||||
GAMTLBVDBOX0_CLKGATE_DIS |
|
||||
GAMTLBKCR_CLKGATE_DIS |
|
||||
GAMTLBGUC_CLKGATE_DIS |
|
||||
GAMTLBBLT_CLKGATE_DIS);
|
||||
wa_write_or(wal, UNSLCGCTL9444, GAMTLBGFXA0_CLKGATE_DIS |
|
||||
GAMTLBGFXA1_CLKGATE_DIS |
|
||||
GAMTLBCOMPA0_CLKGATE_DIS |
|
||||
GAMTLBCOMPA1_CLKGATE_DIS |
|
||||
GAMTLBCOMPB0_CLKGATE_DIS |
|
||||
GAMTLBCOMPB1_CLKGATE_DIS |
|
||||
GAMTLBCOMPC0_CLKGATE_DIS |
|
||||
GAMTLBCOMPC1_CLKGATE_DIS |
|
||||
GAMTLBCOMPD0_CLKGATE_DIS |
|
||||
GAMTLBCOMPD1_CLKGATE_DIS |
|
||||
GAMTLBMERT_CLKGATE_DIS |
|
||||
GAMTLBVEBOX3_CLKGATE_DIS |
|
||||
GAMTLBVEBOX2_CLKGATE_DIS |
|
||||
GAMTLBVEBOX1_CLKGATE_DIS |
|
||||
GAMTLBVEBOX0_CLKGATE_DIS);
|
||||
}
|
||||
|
||||
/* Wa_14012362059:xehpsdv */
|
||||
wa_write_or(wal, GEN12_MERT_MOD_CTRL, FORCE_MISS_FTLB);
|
||||
|
||||
/* Wa_16012725990:xehpsdv */
|
||||
if (IS_XEHPSDV_GRAPHICS_STEP(i915, STEP_A1, STEP_FOREVER))
|
||||
wa_write_or(wal, UNSLICE_UNIT_LEVEL_CLKGATE, VFUNIT_CLKGATE_DIS);
|
||||
|
||||
/* Wa_14011060649:xehpsdv */
|
||||
wa_14011060649(gt, wal);
|
||||
|
||||
/* Wa_14014368820:xehpsdv */
|
||||
wa_write_or(wal, GEN12_GAMCNTRL_CTRL, INVALIDATION_BROADCAST_MODE_DIS |
|
||||
GLOBAL_INVALIDATION_MODE);
|
||||
}
|
||||
|
||||
static void
|
||||
dg2_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
|
||||
{
|
||||
struct intel_engine_cs *engine;
|
||||
int id;
|
||||
|
||||
xehp_init_mcr(gt, wal);
|
||||
|
||||
/* Wa_14011060649:dg2 */
|
||||
wa_14011060649(gt, wal);
|
||||
|
||||
/*
|
||||
* Although there are per-engine instances of these registers,
|
||||
* they technically exist outside the engine itself and are not
|
||||
* impacted by engine resets. Furthermore, they're part of the
|
||||
* GuC blacklist so trying to treat them as engine workarounds
|
||||
* will result in GuC initialization failure and a wedged GPU.
|
||||
*/
|
||||
for_each_engine(engine, gt, id) {
|
||||
if (engine->class != VIDEO_DECODE_CLASS)
|
||||
continue;
|
||||
|
||||
/* Wa_16010515920:dg2_g10 */
|
||||
if (IS_DG2_GRAPHICS_STEP(gt->i915, G10, STEP_A0, STEP_B0))
|
||||
wa_write_or(wal, VDBOX_CGCTL3F18(engine->mmio_base),
|
||||
ALNUNIT_CLKGATE_DIS);
|
||||
}
|
||||
|
||||
if (IS_DG2_G10(gt->i915)) {
|
||||
/* Wa_22010523718:dg2 */
|
||||
wa_write_or(wal, UNSLICE_UNIT_LEVEL_CLKGATE,
|
||||
CG3DDISCFEG_CLKGATE_DIS);
|
||||
|
||||
/* Wa_14011006942:dg2 */
|
||||
wa_write_or(wal, SUBSLICE_UNIT_LEVEL_CLKGATE,
|
||||
DSS_ROUTER_CLKGATE_DIS);
|
||||
}
|
||||
|
||||
if (IS_DG2_GRAPHICS_STEP(gt->i915, G10, STEP_A0, STEP_B0)) {
|
||||
/* Wa_14010680813:dg2_g10 */
|
||||
wa_write_or(wal, GEN12_GAMSTLB_CTRL, CONTROL_BLOCK_CLKGATE_DIS |
|
||||
EGRESS_BLOCK_CLKGATE_DIS | TAG_BLOCK_CLKGATE_DIS);
|
||||
|
||||
/* Wa_14010948348:dg2_g10 */
|
||||
wa_write_or(wal, UNSLCGCTL9430, MSQDUNIT_CLKGATE_DIS);
|
||||
|
||||
/* Wa_14011037102:dg2_g10 */
|
||||
wa_write_or(wal, UNSLCGCTL9444, LTCDD_CLKGATE_DIS);
|
||||
|
||||
/* Wa_14011371254:dg2_g10 */
|
||||
wa_write_or(wal, SLICE_UNIT_LEVEL_CLKGATE, NODEDSS_CLKGATE_DIS);
|
||||
|
||||
/* Wa_14011431319:dg2_g10 */
|
||||
wa_write_or(wal, UNSLCGCTL9440, GAMTLBOACS_CLKGATE_DIS |
|
||||
GAMTLBVDBOX7_CLKGATE_DIS |
|
||||
GAMTLBVDBOX6_CLKGATE_DIS |
|
||||
GAMTLBVDBOX5_CLKGATE_DIS |
|
||||
GAMTLBVDBOX4_CLKGATE_DIS |
|
||||
GAMTLBVDBOX3_CLKGATE_DIS |
|
||||
GAMTLBVDBOX2_CLKGATE_DIS |
|
||||
GAMTLBVDBOX1_CLKGATE_DIS |
|
||||
GAMTLBVDBOX0_CLKGATE_DIS |
|
||||
GAMTLBKCR_CLKGATE_DIS |
|
||||
GAMTLBGUC_CLKGATE_DIS |
|
||||
GAMTLBBLT_CLKGATE_DIS);
|
||||
wa_write_or(wal, UNSLCGCTL9444, GAMTLBGFXA0_CLKGATE_DIS |
|
||||
GAMTLBGFXA1_CLKGATE_DIS |
|
||||
GAMTLBCOMPA0_CLKGATE_DIS |
|
||||
GAMTLBCOMPA1_CLKGATE_DIS |
|
||||
GAMTLBCOMPB0_CLKGATE_DIS |
|
||||
GAMTLBCOMPB1_CLKGATE_DIS |
|
||||
GAMTLBCOMPC0_CLKGATE_DIS |
|
||||
GAMTLBCOMPC1_CLKGATE_DIS |
|
||||
GAMTLBCOMPD0_CLKGATE_DIS |
|
||||
GAMTLBCOMPD1_CLKGATE_DIS |
|
||||
GAMTLBMERT_CLKGATE_DIS |
|
||||
GAMTLBVEBOX3_CLKGATE_DIS |
|
||||
GAMTLBVEBOX2_CLKGATE_DIS |
|
||||
GAMTLBVEBOX1_CLKGATE_DIS |
|
||||
GAMTLBVEBOX0_CLKGATE_DIS);
|
||||
|
||||
/* Wa_14010569222:dg2_g10 */
|
||||
wa_write_or(wal, UNSLICE_UNIT_LEVEL_CLKGATE,
|
||||
GAMEDIA_CLKGATE_DIS);
|
||||
|
||||
/* Wa_14011028019:dg2_g10 */
|
||||
wa_write_or(wal, SSMCGCTL9530, RTFUNIT_CLKGATE_DIS);
|
||||
}
|
||||
|
||||
if (IS_DG2_GRAPHICS_STEP(gt->i915, G10, STEP_A0, STEP_B0) ||
|
||||
IS_DG2_GRAPHICS_STEP(gt->i915, G11, STEP_A0, STEP_B0)) {
|
||||
/* Wa_14012362059:dg2 */
|
||||
wa_write_or(wal, GEN12_MERT_MOD_CTRL, FORCE_MISS_FTLB);
|
||||
}
|
||||
|
||||
/* Wa_1509235366:dg2 */
|
||||
wa_write_or(wal, GEN12_GAMCNTRL_CTRL, INVALIDATION_BROADCAST_MODE_DIS |
|
||||
GLOBAL_INVALIDATION_MODE);
|
||||
|
||||
/* Wa_14014830051:dg2 */
|
||||
wa_write_clr(wal, SARB_CHICKEN1, COMP_CKN_IN);
|
||||
|
||||
/*
|
||||
* The following are not actually "workarounds" but rather
|
||||
* recommended tuning settings documented in the bspec's
|
||||
* performance guide section.
|
||||
*/
|
||||
wa_write_or(wal, XEHP_L3SCQREG7, BLEND_FILL_CACHING_OPT_DIS);
|
||||
wa_write_or(wal, GEN12_SQCM, EN_32B_ACCESS);
|
||||
}
|
||||
|
||||
static void
|
||||
@ -1244,7 +1515,9 @@ gt_init_workarounds(struct intel_gt *gt, struct i915_wa_list *wal)
|
||||
{
|
||||
struct drm_i915_private *i915 = gt->i915;
|
||||
|
||||
if (IS_XEHPSDV(i915))
|
||||
if (IS_DG2(i915))
|
||||
dg2_gt_workarounds_init(gt, wal);
|
||||
else if (IS_XEHPSDV(i915))
|
||||
xehpsdv_gt_workarounds_init(gt, wal);
|
||||
else if (IS_DG1(i915))
|
||||
dg1_gt_workarounds_init(gt, wal);
|
||||
@ -1518,7 +1791,7 @@ static void cfl_whitelist_build(struct intel_engine_cs *engine)
|
||||
RING_FORCE_TO_NONPRIV_RANGE_4);
|
||||
}
|
||||
|
||||
static void cml_whitelist_build(struct intel_engine_cs *engine)
|
||||
static void allow_read_ctx_timestamp(struct intel_engine_cs *engine)
|
||||
{
|
||||
struct i915_wa_list *w = &engine->whitelist;
|
||||
|
||||
@ -1526,6 +1799,11 @@ static void cml_whitelist_build(struct intel_engine_cs *engine)
|
||||
whitelist_reg_ext(w,
|
||||
RING_CTX_TIMESTAMP(engine->mmio_base),
|
||||
RING_FORCE_TO_NONPRIV_ACCESS_RD);
|
||||
}
|
||||
|
||||
static void cml_whitelist_build(struct intel_engine_cs *engine)
|
||||
{
|
||||
allow_read_ctx_timestamp(engine);
|
||||
|
||||
cfl_whitelist_build(engine);
|
||||
}
|
||||
@ -1534,6 +1812,8 @@ static void icl_whitelist_build(struct intel_engine_cs *engine)
|
||||
{
|
||||
struct i915_wa_list *w = &engine->whitelist;
|
||||
|
||||
allow_read_ctx_timestamp(engine);
|
||||
|
||||
switch (engine->class) {
|
||||
case RENDER_CLASS:
|
||||
/* WaAllowUMDToModifyHalfSliceChicken7:icl */
|
||||
@ -1569,15 +1849,9 @@ static void icl_whitelist_build(struct intel_engine_cs *engine)
|
||||
/* hucStatus2RegOffset */
|
||||
whitelist_reg_ext(w, _MMIO(0x23B0 + engine->mmio_base),
|
||||
RING_FORCE_TO_NONPRIV_ACCESS_RD);
|
||||
whitelist_reg_ext(w,
|
||||
RING_CTX_TIMESTAMP(engine->mmio_base),
|
||||
RING_FORCE_TO_NONPRIV_ACCESS_RD);
|
||||
break;
|
||||
|
||||
default:
|
||||
whitelist_reg_ext(w,
|
||||
RING_CTX_TIMESTAMP(engine->mmio_base),
|
||||
RING_FORCE_TO_NONPRIV_ACCESS_RD);
|
||||
break;
|
||||
}
|
||||
}
|
||||
@ -1586,6 +1860,8 @@ static void tgl_whitelist_build(struct intel_engine_cs *engine)
|
||||
{
|
||||
struct i915_wa_list *w = &engine->whitelist;
|
||||
|
||||
allow_read_ctx_timestamp(engine);
|
||||
|
||||
switch (engine->class) {
|
||||
case RENDER_CLASS:
|
||||
/*
|
||||
@ -1602,16 +1878,17 @@ static void tgl_whitelist_build(struct intel_engine_cs *engine)
|
||||
RING_FORCE_TO_NONPRIV_ACCESS_RD |
|
||||
RING_FORCE_TO_NONPRIV_RANGE_4);
|
||||
|
||||
/* Wa_1808121037:tgl */
|
||||
/*
|
||||
* Wa_1808121037:tgl
|
||||
* Wa_14012131227:dg1
|
||||
* Wa_1508744258:tgl,rkl,dg1,adl-s,adl-p
|
||||
*/
|
||||
whitelist_reg(w, GEN7_COMMON_SLICE_CHICKEN1);
|
||||
|
||||
/* Wa_1806527549:tgl */
|
||||
whitelist_reg(w, HIZ_CHICKEN);
|
||||
break;
|
||||
default:
|
||||
whitelist_reg_ext(w,
|
||||
RING_CTX_TIMESTAMP(engine->mmio_base),
|
||||
RING_FORCE_TO_NONPRIV_ACCESS_RD);
|
||||
break;
|
||||
}
|
||||
}
|
||||
@ -1623,13 +1900,46 @@ static void dg1_whitelist_build(struct intel_engine_cs *engine)
|
||||
tgl_whitelist_build(engine);
|
||||
|
||||
/* GEN:BUG:1409280441:dg1 */
|
||||
if (IS_DG1_GT_STEP(engine->i915, STEP_A0, STEP_B0) &&
|
||||
if (IS_DG1_GRAPHICS_STEP(engine->i915, STEP_A0, STEP_B0) &&
|
||||
(engine->class == RENDER_CLASS ||
|
||||
engine->class == COPY_ENGINE_CLASS))
|
||||
whitelist_reg_ext(w, RING_ID(engine->mmio_base),
|
||||
RING_FORCE_TO_NONPRIV_ACCESS_RD);
|
||||
}
|
||||
|
||||
static void xehpsdv_whitelist_build(struct intel_engine_cs *engine)
|
||||
{
|
||||
allow_read_ctx_timestamp(engine);
|
||||
}
|
||||
|
||||
static void dg2_whitelist_build(struct intel_engine_cs *engine)
|
||||
{
|
||||
struct i915_wa_list *w = &engine->whitelist;
|
||||
|
||||
allow_read_ctx_timestamp(engine);
|
||||
|
||||
switch (engine->class) {
|
||||
case RENDER_CLASS:
|
||||
/*
|
||||
* Wa_1507100340:dg2_g10
|
||||
*
|
||||
* This covers 4 registers which are next to one another :
|
||||
* - PS_INVOCATION_COUNT
|
||||
* - PS_INVOCATION_COUNT_UDW
|
||||
* - PS_DEPTH_COUNT
|
||||
* - PS_DEPTH_COUNT_UDW
|
||||
*/
|
||||
if (IS_DG2_GRAPHICS_STEP(engine->i915, G10, STEP_A0, STEP_B0))
|
||||
whitelist_reg_ext(w, PS_INVOCATION_COUNT,
|
||||
RING_FORCE_TO_NONPRIV_ACCESS_RD |
|
||||
RING_FORCE_TO_NONPRIV_RANGE_4);
|
||||
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
void intel_engine_init_whitelist(struct intel_engine_cs *engine)
|
||||
{
|
||||
struct drm_i915_private *i915 = engine->i915;
|
||||
@ -1637,7 +1947,11 @@ void intel_engine_init_whitelist(struct intel_engine_cs *engine)
|
||||
|
||||
wa_init_start(w, "whitelist", engine->name);
|
||||
|
||||
if (IS_DG1(i915))
|
||||
if (IS_DG2(i915))
|
||||
dg2_whitelist_build(engine);
|
||||
else if (IS_XEHPSDV(i915))
|
||||
xehpsdv_whitelist_build(engine);
|
||||
else if (IS_DG1(i915))
|
||||
dg1_whitelist_build(engine);
|
||||
else if (GRAPHICS_VER(i915) == 12)
|
||||
tgl_whitelist_build(engine);
|
||||
@ -1711,13 +2025,119 @@ engine_fake_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
|
||||
CMD_CCTL_MOCS_OVERRIDE(mocs, mocs));
|
||||
}
|
||||
}
|
||||
|
||||
static bool needs_wa_1308578152(struct intel_engine_cs *engine)
|
||||
{
|
||||
u64 dss_mask = intel_sseu_get_subslices(&engine->gt->info.sseu, 0);
|
||||
|
||||
return (dss_mask & GENMASK(GEN_DSS_PER_GSLICE - 1, 0)) == 0;
|
||||
}
|
||||
|
||||
static void
|
||||
rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
|
||||
{
|
||||
struct drm_i915_private *i915 = engine->i915;
|
||||
|
||||
if (IS_DG1_GT_STEP(i915, STEP_A0, STEP_B0) ||
|
||||
IS_TGL_UY_GT_STEP(i915, STEP_A0, STEP_B0)) {
|
||||
if (IS_DG2_GRAPHICS_STEP(engine->i915, G11, STEP_A0, STEP_B0)) {
|
||||
/* Wa_14013392000:dg2_g11 */
|
||||
wa_masked_en(wal, GEN7_ROW_CHICKEN2, GEN12_ENABLE_LARGE_GRF_MODE);
|
||||
|
||||
/* Wa_16011620976:dg2_g11 */
|
||||
wa_write_or(wal, LSC_CHICKEN_BIT_0_UDW, DIS_CHAIN_2XSIMD8);
|
||||
}
|
||||
|
||||
if (IS_DG2_GRAPHICS_STEP(engine->i915, G10, STEP_A0, STEP_B0) ||
|
||||
IS_DG2_GRAPHICS_STEP(engine->i915, G11, STEP_A0, STEP_B0)) {
|
||||
/* Wa_14012419201:dg2 */
|
||||
wa_masked_en(wal, GEN9_ROW_CHICKEN4,
|
||||
GEN12_DISABLE_HDR_PAST_PAYLOAD_HOLD_FIX);
|
||||
}
|
||||
|
||||
if (IS_DG2_GRAPHICS_STEP(engine->i915, G10, STEP_B0, STEP_C0) ||
|
||||
IS_DG2_G11(engine->i915)) {
|
||||
/*
|
||||
* Wa_22012826095:dg2
|
||||
* Wa_22013059131:dg2
|
||||
*/
|
||||
wa_write_clr_set(wal, LSC_CHICKEN_BIT_0_UDW,
|
||||
MAXREQS_PER_BANK,
|
||||
REG_FIELD_PREP(MAXREQS_PER_BANK, 2));
|
||||
|
||||
/* Wa_22013059131:dg2 */
|
||||
wa_write_or(wal, LSC_CHICKEN_BIT_0,
|
||||
FORCE_1_SUB_MESSAGE_PER_FRAGMENT);
|
||||
}
|
||||
|
||||
/* Wa_1308578152:dg2_g10 when first gslice is fused off */
|
||||
if (IS_DG2_GRAPHICS_STEP(engine->i915, G10, STEP_B0, STEP_C0) &&
|
||||
needs_wa_1308578152(engine)) {
|
||||
wa_masked_dis(wal, GEN12_CS_DEBUG_MODE1_CCCSUNIT_BE_COMMON,
|
||||
GEN12_REPLAY_MODE_GRANULARITY);
|
||||
}
|
||||
|
||||
if (IS_DG2_GRAPHICS_STEP(engine->i915, G10, STEP_B0, STEP_FOREVER) ||
|
||||
IS_DG2_G11(engine->i915)) {
|
||||
/* Wa_22013037850:dg2 */
|
||||
wa_write_or(wal, LSC_CHICKEN_BIT_0_UDW,
|
||||
DISABLE_128B_EVICTION_COMMAND_UDW);
|
||||
|
||||
/* Wa_22012856258:dg2 */
|
||||
wa_masked_en(wal, GEN7_ROW_CHICKEN2,
|
||||
GEN12_DISABLE_READ_SUPPRESSION);
|
||||
|
||||
/*
|
||||
* Wa_22010960976:dg2
|
||||
* Wa_14013347512:dg2
|
||||
*/
|
||||
wa_masked_dis(wal, GEN12_HDC_CHICKEN0,
|
||||
LSC_L1_FLUSH_CTL_3D_DATAPORT_FLUSH_EVENTS_MASK);
|
||||
}
|
||||
|
||||
if (IS_DG2_GRAPHICS_STEP(engine->i915, G10, STEP_A0, STEP_B0)) {
|
||||
/*
|
||||
* Wa_1608949956:dg2_g10
|
||||
* Wa_14010198302:dg2_g10
|
||||
*/
|
||||
wa_masked_en(wal, GEN8_ROW_CHICKEN,
|
||||
MDQ_ARBITRATION_MODE | UGM_BACKUP_MODE);
|
||||
|
||||
/*
|
||||
* Wa_14010918519:dg2_g10
|
||||
*
|
||||
* LSC_CHICKEN_BIT_0 always reads back as 0 is this stepping,
|
||||
* so ignoring verification.
|
||||
*/
|
||||
wa_add(wal, LSC_CHICKEN_BIT_0_UDW, 0,
|
||||
FORCE_SLM_FENCE_SCOPE_TO_TILE | FORCE_UGM_FENCE_SCOPE_TO_TILE,
|
||||
0, false);
|
||||
}
|
||||
|
||||
if (IS_DG2_GRAPHICS_STEP(engine->i915, G10, STEP_A0, STEP_B0)) {
|
||||
/* Wa_22010430635:dg2 */
|
||||
wa_masked_en(wal,
|
||||
GEN9_ROW_CHICKEN4,
|
||||
GEN12_DISABLE_GRF_CLEAR);
|
||||
|
||||
/* Wa_14010648519:dg2 */
|
||||
wa_write_or(wal, XEHP_L3NODEARBCFG, XEHP_LNESPARE);
|
||||
}
|
||||
|
||||
if (IS_DG2_GRAPHICS_STEP(engine->i915, G10, STEP_A0, STEP_C0) ||
|
||||
IS_DG2_G11(engine->i915)) {
|
||||
/* Wa_22012654132:dg2 */
|
||||
wa_add(wal, GEN10_CACHE_MODE_SS, 0,
|
||||
_MASKED_BIT_ENABLE(ENABLE_PREFETCH_INTO_IC),
|
||||
0 /* write-only, so skip validation */,
|
||||
true);
|
||||
}
|
||||
|
||||
/* Wa_14013202645:dg2 */
|
||||
if (IS_DG2_GRAPHICS_STEP(engine->i915, G10, STEP_B0, STEP_C0) ||
|
||||
IS_DG2_GRAPHICS_STEP(engine->i915, G11, STEP_A0, STEP_B0))
|
||||
wa_write_or(wal, RT_CTRL, DIS_NULL_QUERY);
|
||||
|
||||
if (IS_DG1_GRAPHICS_STEP(i915, STEP_A0, STEP_B0) ||
|
||||
IS_TGL_UY_GRAPHICS_STEP(i915, STEP_A0, STEP_B0)) {
|
||||
/*
|
||||
* Wa_1607138336:tgl[a0],dg1[a0]
|
||||
* Wa_1607063988:tgl[a0],dg1[a0]
|
||||
@ -1727,7 +2147,7 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
|
||||
GEN12_DISABLE_POSH_BUSY_FF_DOP_CG);
|
||||
}
|
||||
|
||||
if (IS_TGL_UY_GT_STEP(i915, STEP_A0, STEP_B0)) {
|
||||
if (IS_TGL_UY_GRAPHICS_STEP(i915, STEP_A0, STEP_B0)) {
|
||||
/*
|
||||
* Wa_1606679103:tgl
|
||||
* (see also Wa_1606682166:icl)
|
||||
@ -1762,7 +2182,7 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
|
||||
}
|
||||
|
||||
if (IS_ALDERLAKE_P(i915) || IS_ALDERLAKE_S(i915) ||
|
||||
IS_DG1_GT_STEP(i915, STEP_A0, STEP_B0) ||
|
||||
IS_DG1_GRAPHICS_STEP(i915, STEP_A0, STEP_B0) ||
|
||||
IS_ROCKETLAKE(i915) || IS_TIGERLAKE(i915)) {
|
||||
/* Wa_1409804808:tgl,rkl,dg1[a0],adl-s,adl-p */
|
||||
wa_masked_en(wal, GEN7_ROW_CHICKEN2,
|
||||
@ -1775,8 +2195,7 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
|
||||
wa_masked_en(wal, GEN9_ROW_CHICKEN4, GEN12_DISABLE_TDL_PUSH);
|
||||
}
|
||||
|
||||
|
||||
if (IS_DG1_GT_STEP(i915, STEP_A0, STEP_B0) ||
|
||||
if (IS_DG1_GRAPHICS_STEP(i915, STEP_A0, STEP_B0) ||
|
||||
IS_ROCKETLAKE(i915) || IS_TIGERLAKE(i915)) {
|
||||
/*
|
||||
* Wa_1607030317:tgl
|
||||
@ -1859,15 +2278,6 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
|
||||
wa_write_or(wal, UNSLICE_UNIT_LEVEL_CLKGATE,
|
||||
VSUNIT_CLKGATE_DIS | HSUNIT_CLKGATE_DIS);
|
||||
|
||||
/* Wa_1407352427:icl,ehl */
|
||||
wa_write_or(wal, UNSLICE_UNIT_LEVEL_CLKGATE2,
|
||||
PSDUNIT_CLKGATE_DIS);
|
||||
|
||||
/* Wa_1406680159:icl,ehl */
|
||||
wa_write_or(wal,
|
||||
SUBSLICE_UNIT_LEVEL_CLKGATE,
|
||||
GWUNIT_CLKGATE_DIS);
|
||||
|
||||
/*
|
||||
* Wa_1408767742:icl[a2..forever],ehl[all]
|
||||
* Wa_1605460711:icl[a0..c0]
|
||||
@ -2138,7 +2548,7 @@ xcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
|
||||
struct drm_i915_private *i915 = engine->i915;
|
||||
|
||||
/* WaKBLVECSSemaphoreWaitPoll:kbl */
|
||||
if (IS_KBL_GT_STEP(i915, STEP_A0, STEP_F0)) {
|
||||
if (IS_KBL_GRAPHICS_STEP(i915, STEP_A0, STEP_F0)) {
|
||||
wa_write(wal,
|
||||
RING_SEMA_WAIT_POLL(engine->mmio_base),
|
||||
1);
|
||||
|
@ -35,9 +35,31 @@ static void mock_timeline_unpin(struct intel_timeline *tl)
|
||||
atomic_dec(&tl->pin_count);
|
||||
}
|
||||
|
||||
static struct i915_vma *create_ring_vma(struct i915_ggtt *ggtt, int size)
|
||||
{
|
||||
struct i915_address_space *vm = &ggtt->vm;
|
||||
struct drm_i915_private *i915 = vm->i915;
|
||||
struct drm_i915_gem_object *obj;
|
||||
struct i915_vma *vma;
|
||||
|
||||
obj = i915_gem_object_create_internal(i915, size);
|
||||
if (IS_ERR(obj))
|
||||
return ERR_CAST(obj);
|
||||
|
||||
vma = i915_vma_instance(obj, vm, NULL);
|
||||
if (IS_ERR(vma))
|
||||
goto err;
|
||||
|
||||
return vma;
|
||||
|
||||
err:
|
||||
i915_gem_object_put(obj);
|
||||
return vma;
|
||||
}
|
||||
|
||||
static struct intel_ring *mock_ring(struct intel_engine_cs *engine)
|
||||
{
|
||||
const unsigned long sz = PAGE_SIZE / 2;
|
||||
const unsigned long sz = PAGE_SIZE;
|
||||
struct intel_ring *ring;
|
||||
|
||||
ring = kzalloc(sizeof(*ring) + sz, GFP_KERNEL);
|
||||
@ -50,15 +72,11 @@ static struct intel_ring *mock_ring(struct intel_engine_cs *engine)
|
||||
ring->vaddr = (void *)(ring + 1);
|
||||
atomic_set(&ring->pin_count, 1);
|
||||
|
||||
ring->vma = i915_vma_alloc();
|
||||
if (!ring->vma) {
|
||||
ring->vma = create_ring_vma(engine->gt->ggtt, PAGE_SIZE);
|
||||
if (IS_ERR(ring->vma)) {
|
||||
kfree(ring);
|
||||
return NULL;
|
||||
}
|
||||
i915_active_init(&ring->vma->active, NULL, NULL, 0);
|
||||
__set_bit(I915_VMA_GGTT_BIT, __i915_vma_flags(ring->vma));
|
||||
__set_bit(DRM_MM_NODE_ALLOCATED_BIT, &ring->vma->node.flags);
|
||||
ring->vma->node.size = sz;
|
||||
|
||||
intel_ring_update_space(ring);
|
||||
|
||||
@ -67,8 +85,7 @@ static struct intel_ring *mock_ring(struct intel_engine_cs *engine)
|
||||
|
||||
static void mock_ring_free(struct intel_ring *ring)
|
||||
{
|
||||
i915_active_fini(&ring->vma->active);
|
||||
i915_vma_free(ring->vma);
|
||||
i915_vma_put(ring->vma);
|
||||
|
||||
kfree(ring);
|
||||
}
|
||||
@ -125,6 +142,7 @@ static void mock_context_unpin(struct intel_context *ce)
|
||||
|
||||
static void mock_context_post_unpin(struct intel_context *ce)
|
||||
{
|
||||
i915_vma_unpin(ce->ring->vma);
|
||||
}
|
||||
|
||||
static void mock_context_destroy(struct kref *ref)
|
||||
@ -169,7 +187,7 @@ static int mock_context_alloc(struct intel_context *ce)
|
||||
static int mock_context_pre_pin(struct intel_context *ce,
|
||||
struct i915_gem_ww_ctx *ww, void **unused)
|
||||
{
|
||||
return 0;
|
||||
return i915_vma_pin_ww(ce->ring->vma, ww, 0, 0, PIN_GLOBAL | PIN_HIGH);
|
||||
}
|
||||
|
||||
static int mock_context_pin(struct intel_context *ce, void *unused)
|
||||
|
@ -214,6 +214,31 @@ static int live_engine_timestamps(void *arg)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int __spin_until_busier(struct intel_engine_cs *engine, ktime_t busyness)
|
||||
{
|
||||
ktime_t start, unused, dt;
|
||||
|
||||
if (!intel_engine_uses_guc(engine))
|
||||
return 0;
|
||||
|
||||
/*
|
||||
* In GuC mode of submission, the busyness stats may get updated after
|
||||
* the batch starts running. Poll for a change in busyness and timeout
|
||||
* after 500 us.
|
||||
*/
|
||||
start = ktime_get();
|
||||
while (intel_engine_get_busy_time(engine, &unused) == busyness) {
|
||||
dt = ktime_get() - start;
|
||||
if (dt > 500000) {
|
||||
pr_err("active wait timed out %lld\n", dt);
|
||||
ENGINE_TRACE(engine, "active wait time out %lld\n", dt);
|
||||
return -ETIME;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int live_engine_busy_stats(void *arg)
|
||||
{
|
||||
struct intel_gt *gt = arg;
|
||||
@ -232,6 +257,7 @@ static int live_engine_busy_stats(void *arg)
|
||||
GEM_BUG_ON(intel_gt_pm_is_awake(gt));
|
||||
for_each_engine(engine, gt, id) {
|
||||
struct i915_request *rq;
|
||||
ktime_t busyness, dummy;
|
||||
ktime_t de, dt;
|
||||
ktime_t t[2];
|
||||
|
||||
@ -274,16 +300,23 @@ static int live_engine_busy_stats(void *arg)
|
||||
}
|
||||
i915_request_add(rq);
|
||||
|
||||
busyness = intel_engine_get_busy_time(engine, &dummy);
|
||||
if (!igt_wait_for_spinner(&spin, rq)) {
|
||||
intel_gt_set_wedged(engine->gt);
|
||||
err = -ETIME;
|
||||
goto end;
|
||||
}
|
||||
|
||||
err = __spin_until_busier(engine, busyness);
|
||||
if (err) {
|
||||
GEM_TRACE_DUMP();
|
||||
goto end;
|
||||
}
|
||||
|
||||
ENGINE_TRACE(engine, "measuring busy time\n");
|
||||
preempt_disable();
|
||||
de = intel_engine_get_busy_time(engine, &t[0]);
|
||||
udelay(100);
|
||||
mdelay(10);
|
||||
de = ktime_sub(intel_engine_get_busy_time(engine, &t[1]), de);
|
||||
preempt_enable();
|
||||
dt = ktime_sub(t[1], t[0]);
|
||||
|
@ -471,7 +471,8 @@ static int igt_reset_nop_engine(void *arg)
|
||||
count = 0;
|
||||
|
||||
st_engine_heartbeat_disable(engine);
|
||||
set_bit(I915_RESET_ENGINE + id, >->reset.flags);
|
||||
GEM_BUG_ON(test_and_set_bit(I915_RESET_ENGINE + id,
|
||||
>->reset.flags));
|
||||
do {
|
||||
int i;
|
||||
|
||||
@ -528,7 +529,7 @@ static int igt_reset_nop_engine(void *arg)
|
||||
break;
|
||||
}
|
||||
} while (time_before(jiffies, end_time));
|
||||
clear_bit(I915_RESET_ENGINE + id, >->reset.flags);
|
||||
clear_and_wake_up_bit(I915_RESET_ENGINE + id, >->reset.flags);
|
||||
st_engine_heartbeat_enable(engine);
|
||||
|
||||
pr_info("%s(%s): %d resets\n", __func__, engine->name, count);
|
||||
@ -582,7 +583,8 @@ static int igt_reset_fail_engine(void *arg)
|
||||
}
|
||||
|
||||
st_engine_heartbeat_disable(engine);
|
||||
set_bit(I915_RESET_ENGINE + id, >->reset.flags);
|
||||
GEM_BUG_ON(test_and_set_bit(I915_RESET_ENGINE + id,
|
||||
>->reset.flags));
|
||||
|
||||
force_reset_timeout(engine);
|
||||
err = intel_engine_reset(engine, NULL);
|
||||
@ -679,7 +681,7 @@ static int igt_reset_fail_engine(void *arg)
|
||||
out:
|
||||
pr_info("%s(%s): %d resets\n", __func__, engine->name, count);
|
||||
skip:
|
||||
clear_bit(I915_RESET_ENGINE + id, >->reset.flags);
|
||||
clear_and_wake_up_bit(I915_RESET_ENGINE + id, >->reset.flags);
|
||||
st_engine_heartbeat_enable(engine);
|
||||
intel_context_put(ce);
|
||||
|
||||
@ -734,7 +736,8 @@ static int __igt_reset_engine(struct intel_gt *gt, bool active)
|
||||
reset_engine_count = i915_reset_engine_count(global, engine);
|
||||
|
||||
st_engine_heartbeat_disable(engine);
|
||||
set_bit(I915_RESET_ENGINE + id, >->reset.flags);
|
||||
GEM_BUG_ON(test_and_set_bit(I915_RESET_ENGINE + id,
|
||||
>->reset.flags));
|
||||
count = 0;
|
||||
do {
|
||||
struct i915_request *rq = NULL;
|
||||
@ -824,7 +827,7 @@ static int __igt_reset_engine(struct intel_gt *gt, bool active)
|
||||
if (err)
|
||||
break;
|
||||
} while (time_before(jiffies, end_time));
|
||||
clear_bit(I915_RESET_ENGINE + id, >->reset.flags);
|
||||
clear_and_wake_up_bit(I915_RESET_ENGINE + id, >->reset.flags);
|
||||
st_engine_heartbeat_enable(engine);
|
||||
pr_info("%s: Completed %lu %s resets\n",
|
||||
engine->name, count, active ? "active" : "idle");
|
||||
@ -1042,7 +1045,8 @@ static int __igt_reset_engines(struct intel_gt *gt,
|
||||
yield(); /* start all threads before we begin */
|
||||
|
||||
st_engine_heartbeat_disable_no_pm(engine);
|
||||
set_bit(I915_RESET_ENGINE + id, >->reset.flags);
|
||||
GEM_BUG_ON(test_and_set_bit(I915_RESET_ENGINE + id,
|
||||
>->reset.flags));
|
||||
do {
|
||||
struct i915_request *rq = NULL;
|
||||
struct intel_selftest_saved_policy saved;
|
||||
@ -1165,7 +1169,7 @@ static int __igt_reset_engines(struct intel_gt *gt,
|
||||
if (err)
|
||||
break;
|
||||
} while (time_before(jiffies, end_time));
|
||||
clear_bit(I915_RESET_ENGINE + id, >->reset.flags);
|
||||
clear_and_wake_up_bit(I915_RESET_ENGINE + id, >->reset.flags);
|
||||
st_engine_heartbeat_enable_no_pm(engine);
|
||||
|
||||
pr_info("i915_reset_engine(%s:%s): %lu resets\n",
|
||||
|
@ -49,6 +49,7 @@ static int copy(struct intel_migrate *migrate,
|
||||
if (IS_ERR(src))
|
||||
return 0;
|
||||
|
||||
sz = src->base.size;
|
||||
dst = i915_gem_object_create_internal(i915, sz);
|
||||
if (IS_ERR(dst))
|
||||
goto err_free_src;
|
||||
|
@ -144,6 +144,7 @@ enum intel_guc_action {
|
||||
INTEL_GUC_ACTION_DEREGISTER_CONTEXT_DONE = 0x4600,
|
||||
INTEL_GUC_ACTION_REGISTER_CONTEXT_MULTI_LRC = 0x4601,
|
||||
INTEL_GUC_ACTION_RESET_CLIENT = 0x5507,
|
||||
INTEL_GUC_ACTION_SET_ENG_UTIL_BUFF = 0x550A,
|
||||
INTEL_GUC_ACTION_LIMIT
|
||||
};
|
||||
|
||||
|
@ -138,6 +138,8 @@ struct intel_guc {
|
||||
u32 ads_regset_size;
|
||||
/** @ads_golden_ctxt_size: size of the golden contexts in the ADS */
|
||||
u32 ads_golden_ctxt_size;
|
||||
/** @ads_engine_usage_size: size of engine usage in the ADS */
|
||||
u32 ads_engine_usage_size;
|
||||
|
||||
/** @lrc_desc_pool: object allocated to hold the GuC LRC descriptor pool */
|
||||
struct i915_vma *lrc_desc_pool;
|
||||
@ -172,6 +174,34 @@ struct intel_guc {
|
||||
|
||||
/** @send_mutex: used to serialize the intel_guc_send actions */
|
||||
struct mutex send_mutex;
|
||||
|
||||
/**
|
||||
* @timestamp: GT timestamp object that stores a copy of the timestamp
|
||||
* and adjusts it for overflow using a worker.
|
||||
*/
|
||||
struct {
|
||||
/**
|
||||
* @lock: Lock protecting the below fields and the engine stats.
|
||||
*/
|
||||
spinlock_t lock;
|
||||
|
||||
/**
|
||||
* @gt_stamp: 64 bit extended value of the GT timestamp.
|
||||
*/
|
||||
u64 gt_stamp;
|
||||
|
||||
/**
|
||||
* @ping_delay: Period for polling the GT timestamp for
|
||||
* overflow.
|
||||
*/
|
||||
unsigned long ping_delay;
|
||||
|
||||
/**
|
||||
* @work: Periodic work to adjust GT timestamp, engine and
|
||||
* context usage for overflows.
|
||||
*/
|
||||
struct delayed_work work;
|
||||
} timestamp;
|
||||
};
|
||||
|
||||
static inline struct intel_guc *log_to_guc(struct intel_guc_log *log)
|
||||
|
@ -26,6 +26,8 @@
|
||||
* | guc_policies |
|
||||
* +---------------------------------------+
|
||||
* | guc_gt_system_info |
|
||||
* +---------------------------------------+
|
||||
* | guc_engine_usage |
|
||||
* +---------------------------------------+ <== static
|
||||
* | guc_mmio_reg[countA] (engine 0.0) |
|
||||
* | guc_mmio_reg[countB] (engine 0.1) |
|
||||
@ -47,6 +49,7 @@ struct __guc_ads_blob {
|
||||
struct guc_ads ads;
|
||||
struct guc_policies policies;
|
||||
struct guc_gt_system_info system_info;
|
||||
struct guc_engine_usage engine_usage;
|
||||
/* From here on, location is dynamic! Refer to above diagram. */
|
||||
struct guc_mmio_reg regset[0];
|
||||
} __packed;
|
||||
@ -628,3 +631,21 @@ void intel_guc_ads_reset(struct intel_guc *guc)
|
||||
|
||||
guc_ads_private_data_reset(guc);
|
||||
}
|
||||
|
||||
u32 intel_guc_engine_usage_offset(struct intel_guc *guc)
|
||||
{
|
||||
struct __guc_ads_blob *blob = guc->ads_blob;
|
||||
u32 base = intel_guc_ggtt_offset(guc, guc->ads_vma);
|
||||
u32 offset = base + ptr_offset(blob, engine_usage);
|
||||
|
||||
return offset;
|
||||
}
|
||||
|
||||
struct guc_engine_usage_record *intel_guc_engine_usage(struct intel_engine_cs *engine)
|
||||
{
|
||||
struct intel_guc *guc = &engine->gt->uc.guc;
|
||||
struct __guc_ads_blob *blob = guc->ads_blob;
|
||||
u8 guc_class = engine_class_to_guc_class(engine->class);
|
||||
|
||||
return &blob->engine_usage.engines[guc_class][ilog2(engine->logical_mask)];
|
||||
}
|
||||
|
@ -6,8 +6,11 @@
|
||||
#ifndef _INTEL_GUC_ADS_H_
|
||||
#define _INTEL_GUC_ADS_H_
|
||||
|
||||
#include <linux/types.h>
|
||||
|
||||
struct intel_guc;
|
||||
struct drm_printer;
|
||||
struct intel_engine_cs;
|
||||
|
||||
int intel_guc_ads_create(struct intel_guc *guc);
|
||||
void intel_guc_ads_destroy(struct intel_guc *guc);
|
||||
@ -15,5 +18,7 @@ void intel_guc_ads_init_late(struct intel_guc *guc);
|
||||
void intel_guc_ads_reset(struct intel_guc *guc);
|
||||
void intel_guc_ads_print_policy_info(struct intel_guc *guc,
|
||||
struct drm_printer *p);
|
||||
struct guc_engine_usage_record *intel_guc_engine_usage(struct intel_engine_cs *engine);
|
||||
u32 intel_guc_engine_usage_offset(struct intel_guc *guc);
|
||||
|
||||
#endif
|
||||
|
@ -294,6 +294,19 @@ struct guc_ads {
|
||||
u32 reserved[15];
|
||||
} __packed;
|
||||
|
||||
/* Engine usage stats */
|
||||
struct guc_engine_usage_record {
|
||||
u32 current_context_index;
|
||||
u32 last_switch_in_stamp;
|
||||
u32 reserved0;
|
||||
u32 total_runtime;
|
||||
u32 reserved1[4];
|
||||
} __packed;
|
||||
|
||||
struct guc_engine_usage {
|
||||
struct guc_engine_usage_record engines[GUC_MAX_ENGINE_CLASSES][GUC_MAX_INSTANCES_PER_CLASS];
|
||||
} __packed;
|
||||
|
||||
/* GuC logging structures */
|
||||
|
||||
enum guc_log_buffer_type {
|
||||
|
@ -79,29 +79,6 @@ static void slpc_mem_set_disabled(struct slpc_shared_data *data,
|
||||
slpc_mem_set_param(data, enable_id, 0);
|
||||
}
|
||||
|
||||
int intel_guc_slpc_init(struct intel_guc_slpc *slpc)
|
||||
{
|
||||
struct intel_guc *guc = slpc_to_guc(slpc);
|
||||
struct drm_i915_private *i915 = slpc_to_i915(slpc);
|
||||
u32 size = PAGE_ALIGN(sizeof(struct slpc_shared_data));
|
||||
int err;
|
||||
|
||||
GEM_BUG_ON(slpc->vma);
|
||||
|
||||
err = intel_guc_allocate_and_map_vma(guc, size, &slpc->vma, (void **)&slpc->vaddr);
|
||||
if (unlikely(err)) {
|
||||
drm_err(&i915->drm,
|
||||
"Failed to allocate SLPC struct (err=%pe)\n",
|
||||
ERR_PTR(err));
|
||||
return err;
|
||||
}
|
||||
|
||||
slpc->max_freq_softlimit = 0;
|
||||
slpc->min_freq_softlimit = 0;
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
static u32 slpc_get_state(struct intel_guc_slpc *slpc)
|
||||
{
|
||||
struct slpc_shared_data *data;
|
||||
@ -203,6 +180,86 @@ static int slpc_unset_param(struct intel_guc_slpc *slpc,
|
||||
return guc_action_slpc_unset_param(guc, id);
|
||||
}
|
||||
|
||||
static int slpc_force_min_freq(struct intel_guc_slpc *slpc, u32 freq)
|
||||
{
|
||||
struct drm_i915_private *i915 = slpc_to_i915(slpc);
|
||||
struct intel_guc *guc = slpc_to_guc(slpc);
|
||||
intel_wakeref_t wakeref;
|
||||
int ret = 0;
|
||||
|
||||
lockdep_assert_held(&slpc->lock);
|
||||
|
||||
if (!intel_guc_is_ready(guc))
|
||||
return -ENODEV;
|
||||
|
||||
/*
|
||||
* This function is a little different as compared to
|
||||
* intel_guc_slpc_set_min_freq(). Softlimit will not be updated
|
||||
* here since this is used to temporarily change min freq,
|
||||
* for example, during a waitboost. Caller is responsible for
|
||||
* checking bounds.
|
||||
*/
|
||||
|
||||
with_intel_runtime_pm(&i915->runtime_pm, wakeref) {
|
||||
ret = slpc_set_param(slpc,
|
||||
SLPC_PARAM_GLOBAL_MIN_GT_UNSLICE_FREQ_MHZ,
|
||||
freq);
|
||||
if (ret)
|
||||
drm_err(&i915->drm, "Unable to force min freq to %u: %d",
|
||||
freq, ret);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void slpc_boost_work(struct work_struct *work)
|
||||
{
|
||||
struct intel_guc_slpc *slpc = container_of(work, typeof(*slpc), boost_work);
|
||||
|
||||
/*
|
||||
* Raise min freq to boost. It's possible that
|
||||
* this is greater than current max. But it will
|
||||
* certainly be limited by RP0. An error setting
|
||||
* the min param is not fatal.
|
||||
*/
|
||||
mutex_lock(&slpc->lock);
|
||||
if (atomic_read(&slpc->num_waiters)) {
|
||||
slpc_force_min_freq(slpc, slpc->boost_freq);
|
||||
slpc->num_boosts++;
|
||||
}
|
||||
mutex_unlock(&slpc->lock);
|
||||
}
|
||||
|
||||
int intel_guc_slpc_init(struct intel_guc_slpc *slpc)
|
||||
{
|
||||
struct intel_guc *guc = slpc_to_guc(slpc);
|
||||
struct drm_i915_private *i915 = slpc_to_i915(slpc);
|
||||
u32 size = PAGE_ALIGN(sizeof(struct slpc_shared_data));
|
||||
int err;
|
||||
|
||||
GEM_BUG_ON(slpc->vma);
|
||||
|
||||
err = intel_guc_allocate_and_map_vma(guc, size, &slpc->vma, (void **)&slpc->vaddr);
|
||||
if (unlikely(err)) {
|
||||
drm_err(&i915->drm,
|
||||
"Failed to allocate SLPC struct (err=%pe)\n",
|
||||
ERR_PTR(err));
|
||||
return err;
|
||||
}
|
||||
|
||||
slpc->max_freq_softlimit = 0;
|
||||
slpc->min_freq_softlimit = 0;
|
||||
|
||||
slpc->boost_freq = 0;
|
||||
atomic_set(&slpc->num_waiters, 0);
|
||||
slpc->num_boosts = 0;
|
||||
|
||||
mutex_init(&slpc->lock);
|
||||
INIT_WORK(&slpc->boost_work, slpc_boost_work);
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
static const char *slpc_global_state_to_string(enum slpc_global_state state)
|
||||
{
|
||||
switch (state) {
|
||||
@ -393,7 +450,11 @@ int intel_guc_slpc_set_min_freq(struct intel_guc_slpc *slpc, u32 val)
|
||||
val > slpc->max_freq_softlimit)
|
||||
return -EINVAL;
|
||||
|
||||
/* Need a lock now since waitboost can be modifying min as well */
|
||||
mutex_lock(&slpc->lock);
|
||||
|
||||
with_intel_runtime_pm(&i915->runtime_pm, wakeref) {
|
||||
|
||||
ret = slpc_set_param(slpc,
|
||||
SLPC_PARAM_GLOBAL_MIN_GT_UNSLICE_FREQ_MHZ,
|
||||
val);
|
||||
@ -406,6 +467,8 @@ int intel_guc_slpc_set_min_freq(struct intel_guc_slpc *slpc, u32 val)
|
||||
if (!ret)
|
||||
slpc->min_freq_softlimit = val;
|
||||
|
||||
mutex_unlock(&slpc->lock);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
@ -522,6 +585,9 @@ static void slpc_get_rp_values(struct intel_guc_slpc *slpc)
|
||||
GT_FREQUENCY_MULTIPLIER;
|
||||
slpc->min_freq = REG_FIELD_GET(RPN_CAP_MASK, rp_state_cap) *
|
||||
GT_FREQUENCY_MULTIPLIER;
|
||||
|
||||
if (!slpc->boost_freq)
|
||||
slpc->boost_freq = slpc->rp0_freq;
|
||||
}
|
||||
|
||||
/*
|
||||
@ -588,6 +654,47 @@ int intel_guc_slpc_enable(struct intel_guc_slpc *slpc)
|
||||
return 0;
|
||||
}
|
||||
|
||||
int intel_guc_slpc_set_boost_freq(struct intel_guc_slpc *slpc, u32 val)
|
||||
{
|
||||
int ret = 0;
|
||||
|
||||
if (val < slpc->min_freq || val > slpc->rp0_freq)
|
||||
return -EINVAL;
|
||||
|
||||
mutex_lock(&slpc->lock);
|
||||
|
||||
if (slpc->boost_freq != val) {
|
||||
/* Apply only if there are active waiters */
|
||||
if (atomic_read(&slpc->num_waiters)) {
|
||||
ret = slpc_force_min_freq(slpc, val);
|
||||
if (ret) {
|
||||
ret = -EIO;
|
||||
goto done;
|
||||
}
|
||||
}
|
||||
|
||||
slpc->boost_freq = val;
|
||||
}
|
||||
|
||||
done:
|
||||
mutex_unlock(&slpc->lock);
|
||||
return ret;
|
||||
}
|
||||
|
||||
void intel_guc_slpc_dec_waiters(struct intel_guc_slpc *slpc)
|
||||
{
|
||||
/*
|
||||
* Return min back to the softlimit.
|
||||
* This is called during request retire,
|
||||
* so we don't need to fail that if the
|
||||
* set_param fails.
|
||||
*/
|
||||
mutex_lock(&slpc->lock);
|
||||
if (atomic_dec_and_test(&slpc->num_waiters))
|
||||
slpc_force_min_freq(slpc, slpc->min_freq_softlimit);
|
||||
mutex_unlock(&slpc->lock);
|
||||
}
|
||||
|
||||
int intel_guc_slpc_print_info(struct intel_guc_slpc *slpc, struct drm_printer *p)
|
||||
{
|
||||
struct drm_i915_private *i915 = slpc_to_i915(slpc);
|
||||
@ -611,6 +718,8 @@ int intel_guc_slpc_print_info(struct intel_guc_slpc *slpc, struct drm_printer *p
|
||||
slpc_decode_max_freq(slpc));
|
||||
drm_printf(p, "\tMin freq: %u MHz\n",
|
||||
slpc_decode_min_freq(slpc));
|
||||
drm_printf(p, "\twaitboosts: %u\n",
|
||||
slpc->num_boosts);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -34,9 +34,12 @@ int intel_guc_slpc_enable(struct intel_guc_slpc *slpc);
|
||||
void intel_guc_slpc_fini(struct intel_guc_slpc *slpc);
|
||||
int intel_guc_slpc_set_max_freq(struct intel_guc_slpc *slpc, u32 val);
|
||||
int intel_guc_slpc_set_min_freq(struct intel_guc_slpc *slpc, u32 val);
|
||||
int intel_guc_slpc_set_boost_freq(struct intel_guc_slpc *slpc, u32 val);
|
||||
int intel_guc_slpc_get_max_freq(struct intel_guc_slpc *slpc, u32 *val);
|
||||
int intel_guc_slpc_get_min_freq(struct intel_guc_slpc *slpc, u32 *val);
|
||||
int intel_guc_slpc_print_info(struct intel_guc_slpc *slpc, struct drm_printer *p);
|
||||
void intel_guc_pm_intrmsk_enable(struct intel_gt *gt);
|
||||
void intel_guc_slpc_boost(struct intel_guc_slpc *slpc);
|
||||
void intel_guc_slpc_dec_waiters(struct intel_guc_slpc *slpc);
|
||||
|
||||
#endif
|
||||
|
@ -6,6 +6,9 @@
|
||||
#ifndef _INTEL_GUC_SLPC_TYPES_H_
|
||||
#define _INTEL_GUC_SLPC_TYPES_H_
|
||||
|
||||
#include <linux/atomic.h>
|
||||
#include <linux/workqueue.h>
|
||||
#include <linux/mutex.h>
|
||||
#include <linux/types.h>
|
||||
|
||||
#define SLPC_RESET_TIMEOUT_MS 5
|
||||
@ -20,10 +23,20 @@ struct intel_guc_slpc {
|
||||
u32 min_freq;
|
||||
u32 rp0_freq;
|
||||
u32 rp1_freq;
|
||||
u32 boost_freq;
|
||||
|
||||
/* frequency softlimits */
|
||||
u32 min_freq_softlimit;
|
||||
u32 max_freq_softlimit;
|
||||
|
||||
/* Protects set/reset of boost freq
|
||||
* and value of num_waiters
|
||||
*/
|
||||
struct mutex lock;
|
||||
|
||||
struct work_struct boost_work;
|
||||
atomic_t num_waiters;
|
||||
u32 num_boosts;
|
||||
};
|
||||
|
||||
#endif
|
||||
|
@ -13,6 +13,7 @@
|
||||
#include "gt/intel_engine_heartbeat.h"
|
||||
#include "gt/intel_gpu_commands.h"
|
||||
#include "gt/intel_gt.h"
|
||||
#include "gt/intel_gt_clock_utils.h"
|
||||
#include "gt/intel_gt_irq.h"
|
||||
#include "gt/intel_gt_pm.h"
|
||||
#include "gt/intel_gt_requests.h"
|
||||
@ -21,6 +22,7 @@
|
||||
#include "gt/intel_mocs.h"
|
||||
#include "gt/intel_ring.h"
|
||||
|
||||
#include "intel_guc_ads.h"
|
||||
#include "intel_guc_submission.h"
|
||||
|
||||
#include "i915_drv.h"
|
||||
@ -1077,6 +1079,271 @@ static void scrub_guc_desc_for_outstanding_g2h(struct intel_guc *guc)
|
||||
xa_unlock_irqrestore(&guc->context_lookup, flags);
|
||||
}
|
||||
|
||||
/*
|
||||
* GuC stores busyness stats for each engine at context in/out boundaries. A
|
||||
* context 'in' logs execution start time, 'out' adds in -> out delta to total.
|
||||
* i915/kmd accesses 'start', 'total' and 'context id' from memory shared with
|
||||
* GuC.
|
||||
*
|
||||
* __i915_pmu_event_read samples engine busyness. When sampling, if context id
|
||||
* is valid (!= ~0) and start is non-zero, the engine is considered to be
|
||||
* active. For an active engine total busyness = total + (now - start), where
|
||||
* 'now' is the time at which the busyness is sampled. For inactive engine,
|
||||
* total busyness = total.
|
||||
*
|
||||
* All times are captured from GUCPMTIMESTAMP reg and are in gt clock domain.
|
||||
*
|
||||
* The start and total values provided by GuC are 32 bits and wrap around in a
|
||||
* few minutes. Since perf pmu provides busyness as 64 bit monotonically
|
||||
* increasing ns values, there is a need for this implementation to account for
|
||||
* overflows and extend the GuC provided values to 64 bits before returning
|
||||
* busyness to the user. In order to do that, a worker runs periodically at
|
||||
* frequency = 1/8th the time it takes for the timestamp to wrap (i.e. once in
|
||||
* 27 seconds for a gt clock frequency of 19.2 MHz).
|
||||
*/
|
||||
|
||||
#define WRAP_TIME_CLKS U32_MAX
|
||||
#define POLL_TIME_CLKS (WRAP_TIME_CLKS >> 3)
|
||||
|
||||
static void
|
||||
__extend_last_switch(struct intel_guc *guc, u64 *prev_start, u32 new_start)
|
||||
{
|
||||
u32 gt_stamp_hi = upper_32_bits(guc->timestamp.gt_stamp);
|
||||
u32 gt_stamp_last = lower_32_bits(guc->timestamp.gt_stamp);
|
||||
|
||||
if (new_start == lower_32_bits(*prev_start))
|
||||
return;
|
||||
|
||||
if (new_start < gt_stamp_last &&
|
||||
(new_start - gt_stamp_last) <= POLL_TIME_CLKS)
|
||||
gt_stamp_hi++;
|
||||
|
||||
if (new_start > gt_stamp_last &&
|
||||
(gt_stamp_last - new_start) <= POLL_TIME_CLKS && gt_stamp_hi)
|
||||
gt_stamp_hi--;
|
||||
|
||||
*prev_start = ((u64)gt_stamp_hi << 32) | new_start;
|
||||
}
|
||||
|
||||
static void guc_update_engine_gt_clks(struct intel_engine_cs *engine)
|
||||
{
|
||||
struct guc_engine_usage_record *rec = intel_guc_engine_usage(engine);
|
||||
struct intel_engine_guc_stats *stats = &engine->stats.guc;
|
||||
struct intel_guc *guc = &engine->gt->uc.guc;
|
||||
u32 last_switch = rec->last_switch_in_stamp;
|
||||
u32 ctx_id = rec->current_context_index;
|
||||
u32 total = rec->total_runtime;
|
||||
|
||||
lockdep_assert_held(&guc->timestamp.lock);
|
||||
|
||||
stats->running = ctx_id != ~0U && last_switch;
|
||||
if (stats->running)
|
||||
__extend_last_switch(guc, &stats->start_gt_clk, last_switch);
|
||||
|
||||
/*
|
||||
* Instead of adjusting the total for overflow, just add the
|
||||
* difference from previous sample stats->total_gt_clks
|
||||
*/
|
||||
if (total && total != ~0U) {
|
||||
stats->total_gt_clks += (u32)(total - stats->prev_total);
|
||||
stats->prev_total = total;
|
||||
}
|
||||
}
|
||||
|
||||
static void guc_update_pm_timestamp(struct intel_guc *guc,
|
||||
struct intel_engine_cs *engine,
|
||||
ktime_t *now)
|
||||
{
|
||||
u32 gt_stamp_now, gt_stamp_hi;
|
||||
|
||||
lockdep_assert_held(&guc->timestamp.lock);
|
||||
|
||||
gt_stamp_hi = upper_32_bits(guc->timestamp.gt_stamp);
|
||||
gt_stamp_now = intel_uncore_read(engine->uncore,
|
||||
RING_TIMESTAMP(engine->mmio_base));
|
||||
*now = ktime_get();
|
||||
|
||||
if (gt_stamp_now < lower_32_bits(guc->timestamp.gt_stamp))
|
||||
gt_stamp_hi++;
|
||||
|
||||
guc->timestamp.gt_stamp = ((u64)gt_stamp_hi << 32) | gt_stamp_now;
|
||||
}
|
||||
|
||||
/*
|
||||
* Unlike the execlist mode of submission total and active times are in terms of
|
||||
* gt clocks. The *now parameter is retained to return the cpu time at which the
|
||||
* busyness was sampled.
|
||||
*/
|
||||
static ktime_t guc_engine_busyness(struct intel_engine_cs *engine, ktime_t *now)
|
||||
{
|
||||
struct intel_engine_guc_stats stats_saved, *stats = &engine->stats.guc;
|
||||
struct i915_gpu_error *gpu_error = &engine->i915->gpu_error;
|
||||
struct intel_gt *gt = engine->gt;
|
||||
struct intel_guc *guc = >->uc.guc;
|
||||
u64 total, gt_stamp_saved;
|
||||
unsigned long flags;
|
||||
u32 reset_count;
|
||||
bool in_reset;
|
||||
|
||||
spin_lock_irqsave(&guc->timestamp.lock, flags);
|
||||
|
||||
/*
|
||||
* If a reset happened, we risk reading partially updated engine
|
||||
* busyness from GuC, so we just use the driver stored copy of busyness.
|
||||
* Synchronize with gt reset using reset_count and the
|
||||
* I915_RESET_BACKOFF flag. Note that reset flow updates the reset_count
|
||||
* after I915_RESET_BACKOFF flag, so ensure that the reset_count is
|
||||
* usable by checking the flag afterwards.
|
||||
*/
|
||||
reset_count = i915_reset_count(gpu_error);
|
||||
in_reset = test_bit(I915_RESET_BACKOFF, >->reset.flags);
|
||||
|
||||
*now = ktime_get();
|
||||
|
||||
/*
|
||||
* The active busyness depends on start_gt_clk and gt_stamp.
|
||||
* gt_stamp is updated by i915 only when gt is awake and the
|
||||
* start_gt_clk is derived from GuC state. To get a consistent
|
||||
* view of activity, we query the GuC state only if gt is awake.
|
||||
*/
|
||||
if (intel_gt_pm_get_if_awake(gt) && !in_reset) {
|
||||
stats_saved = *stats;
|
||||
gt_stamp_saved = guc->timestamp.gt_stamp;
|
||||
guc_update_engine_gt_clks(engine);
|
||||
guc_update_pm_timestamp(guc, engine, now);
|
||||
intel_gt_pm_put_async(gt);
|
||||
if (i915_reset_count(gpu_error) != reset_count) {
|
||||
*stats = stats_saved;
|
||||
guc->timestamp.gt_stamp = gt_stamp_saved;
|
||||
}
|
||||
}
|
||||
|
||||
total = intel_gt_clock_interval_to_ns(gt, stats->total_gt_clks);
|
||||
if (stats->running) {
|
||||
u64 clk = guc->timestamp.gt_stamp - stats->start_gt_clk;
|
||||
|
||||
total += intel_gt_clock_interval_to_ns(gt, clk);
|
||||
}
|
||||
|
||||
spin_unlock_irqrestore(&guc->timestamp.lock, flags);
|
||||
|
||||
return ns_to_ktime(total);
|
||||
}
|
||||
|
||||
static void __reset_guc_busyness_stats(struct intel_guc *guc)
|
||||
{
|
||||
struct intel_gt *gt = guc_to_gt(guc);
|
||||
struct intel_engine_cs *engine;
|
||||
enum intel_engine_id id;
|
||||
unsigned long flags;
|
||||
ktime_t unused;
|
||||
|
||||
cancel_delayed_work_sync(&guc->timestamp.work);
|
||||
|
||||
spin_lock_irqsave(&guc->timestamp.lock, flags);
|
||||
|
||||
for_each_engine(engine, gt, id) {
|
||||
guc_update_pm_timestamp(guc, engine, &unused);
|
||||
guc_update_engine_gt_clks(engine);
|
||||
engine->stats.guc.prev_total = 0;
|
||||
}
|
||||
|
||||
spin_unlock_irqrestore(&guc->timestamp.lock, flags);
|
||||
}
|
||||
|
||||
static void __update_guc_busyness_stats(struct intel_guc *guc)
|
||||
{
|
||||
struct intel_gt *gt = guc_to_gt(guc);
|
||||
struct intel_engine_cs *engine;
|
||||
enum intel_engine_id id;
|
||||
unsigned long flags;
|
||||
ktime_t unused;
|
||||
|
||||
spin_lock_irqsave(&guc->timestamp.lock, flags);
|
||||
for_each_engine(engine, gt, id) {
|
||||
guc_update_pm_timestamp(guc, engine, &unused);
|
||||
guc_update_engine_gt_clks(engine);
|
||||
}
|
||||
spin_unlock_irqrestore(&guc->timestamp.lock, flags);
|
||||
}
|
||||
|
||||
static void guc_timestamp_ping(struct work_struct *wrk)
|
||||
{
|
||||
struct intel_guc *guc = container_of(wrk, typeof(*guc),
|
||||
timestamp.work.work);
|
||||
struct intel_uc *uc = container_of(guc, typeof(*uc), guc);
|
||||
struct intel_gt *gt = guc_to_gt(guc);
|
||||
intel_wakeref_t wakeref;
|
||||
int srcu, ret;
|
||||
|
||||
/*
|
||||
* Synchronize with gt reset to make sure the worker does not
|
||||
* corrupt the engine/guc stats.
|
||||
*/
|
||||
ret = intel_gt_reset_trylock(gt, &srcu);
|
||||
if (ret)
|
||||
return;
|
||||
|
||||
with_intel_runtime_pm(>->i915->runtime_pm, wakeref)
|
||||
__update_guc_busyness_stats(guc);
|
||||
|
||||
intel_gt_reset_unlock(gt, srcu);
|
||||
|
||||
mod_delayed_work(system_highpri_wq, &guc->timestamp.work,
|
||||
guc->timestamp.ping_delay);
|
||||
}
|
||||
|
||||
static int guc_action_enable_usage_stats(struct intel_guc *guc)
|
||||
{
|
||||
u32 offset = intel_guc_engine_usage_offset(guc);
|
||||
u32 action[] = {
|
||||
INTEL_GUC_ACTION_SET_ENG_UTIL_BUFF,
|
||||
offset,
|
||||
0,
|
||||
};
|
||||
|
||||
return intel_guc_send(guc, action, ARRAY_SIZE(action));
|
||||
}
|
||||
|
||||
static void guc_init_engine_stats(struct intel_guc *guc)
|
||||
{
|
||||
struct intel_gt *gt = guc_to_gt(guc);
|
||||
intel_wakeref_t wakeref;
|
||||
|
||||
mod_delayed_work(system_highpri_wq, &guc->timestamp.work,
|
||||
guc->timestamp.ping_delay);
|
||||
|
||||
with_intel_runtime_pm(>->i915->runtime_pm, wakeref) {
|
||||
int ret = guc_action_enable_usage_stats(guc);
|
||||
|
||||
if (ret)
|
||||
drm_err(>->i915->drm,
|
||||
"Failed to enable usage stats: %d!\n", ret);
|
||||
}
|
||||
}
|
||||
|
||||
void intel_guc_busyness_park(struct intel_gt *gt)
|
||||
{
|
||||
struct intel_guc *guc = >->uc.guc;
|
||||
|
||||
if (!guc_submission_initialized(guc))
|
||||
return;
|
||||
|
||||
cancel_delayed_work(&guc->timestamp.work);
|
||||
__update_guc_busyness_stats(guc);
|
||||
}
|
||||
|
||||
void intel_guc_busyness_unpark(struct intel_gt *gt)
|
||||
{
|
||||
struct intel_guc *guc = >->uc.guc;
|
||||
|
||||
if (!guc_submission_initialized(guc))
|
||||
return;
|
||||
|
||||
mod_delayed_work(system_highpri_wq, &guc->timestamp.work,
|
||||
guc->timestamp.ping_delay);
|
||||
}
|
||||
|
||||
static inline bool
|
||||
submission_disabled(struct intel_guc *guc)
|
||||
{
|
||||
@ -1138,6 +1405,7 @@ void intel_guc_submission_reset_prepare(struct intel_guc *guc)
|
||||
intel_gt_park_heartbeats(guc_to_gt(guc));
|
||||
disable_submission(guc);
|
||||
guc->interrupts.disable(guc);
|
||||
__reset_guc_busyness_stats(guc);
|
||||
|
||||
/* Flush IRQ handler */
|
||||
spin_lock_irq(&guc_to_gt(guc)->irq_lock);
|
||||
@ -1484,6 +1752,7 @@ static void destroyed_worker_func(struct work_struct *w);
|
||||
*/
|
||||
int intel_guc_submission_init(struct intel_guc *guc)
|
||||
{
|
||||
struct intel_gt *gt = guc_to_gt(guc);
|
||||
int ret;
|
||||
|
||||
if (guc->lrc_desc_pool)
|
||||
@ -1512,6 +1781,10 @@ int intel_guc_submission_init(struct intel_guc *guc)
|
||||
if (!guc->submission_state.guc_ids_bitmap)
|
||||
return -ENOMEM;
|
||||
|
||||
spin_lock_init(&guc->timestamp.lock);
|
||||
INIT_DELAYED_WORK(&guc->timestamp.work, guc_timestamp_ping);
|
||||
guc->timestamp.ping_delay = (POLL_TIME_CLKS / gt->clock_frequency + 1) * HZ;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -3369,7 +3642,9 @@ static void guc_default_vfuncs(struct intel_engine_cs *engine)
|
||||
engine->emit_flush = gen12_emit_flush_xcs;
|
||||
}
|
||||
engine->set_default_submission = guc_set_default_submission;
|
||||
engine->busyness = guc_engine_busyness;
|
||||
|
||||
engine->flags |= I915_ENGINE_SUPPORTS_STATS;
|
||||
engine->flags |= I915_ENGINE_HAS_PREEMPTION;
|
||||
engine->flags |= I915_ENGINE_HAS_TIMESLICES;
|
||||
|
||||
@ -3468,6 +3743,7 @@ int intel_guc_submission_setup(struct intel_engine_cs *engine)
|
||||
void intel_guc_submission_enable(struct intel_guc *guc)
|
||||
{
|
||||
guc_init_lrc_mapping(guc);
|
||||
guc_init_engine_stats(guc);
|
||||
}
|
||||
|
||||
void intel_guc_submission_disable(struct intel_guc *guc)
|
||||
@ -3695,6 +3971,7 @@ int intel_guc_context_reset_process_msg(struct intel_guc *guc,
|
||||
const u32 *msg, u32 len)
|
||||
{
|
||||
struct intel_context *ce;
|
||||
unsigned long flags;
|
||||
int desc_idx;
|
||||
|
||||
if (unlikely(len != 1)) {
|
||||
@ -3703,11 +3980,24 @@ int intel_guc_context_reset_process_msg(struct intel_guc *guc,
|
||||
}
|
||||
|
||||
desc_idx = msg[0];
|
||||
|
||||
/*
|
||||
* The context lookup uses the xarray but lookups only require an RCU lock
|
||||
* not the full spinlock. So take the lock explicitly and keep it until the
|
||||
* context has been reference count locked to ensure it can't be destroyed
|
||||
* asynchronously until the reset is done.
|
||||
*/
|
||||
xa_lock_irqsave(&guc->context_lookup, flags);
|
||||
ce = g2h_context_lookup(guc, desc_idx);
|
||||
if (ce)
|
||||
intel_context_get(ce);
|
||||
xa_unlock_irqrestore(&guc->context_lookup, flags);
|
||||
|
||||
if (unlikely(!ce))
|
||||
return -EPROTO;
|
||||
|
||||
guc_handle_context_reset(guc, ce);
|
||||
intel_context_put(ce);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
@ -28,6 +28,8 @@ void intel_guc_submission_print_context_info(struct intel_guc *guc,
|
||||
void intel_guc_dump_active_requests(struct intel_engine_cs *engine,
|
||||
struct i915_request *hung_rq,
|
||||
struct drm_printer *m);
|
||||
void intel_guc_busyness_park(struct intel_gt *gt);
|
||||
void intel_guc_busyness_unpark(struct intel_gt *gt);
|
||||
|
||||
bool intel_guc_virtual_engine_has_heartbeat(const struct intel_engine_cs *ve);
|
||||
|
||||
|
@ -446,17 +446,17 @@ static bool gen8_gtt_test_present(struct intel_gvt_gtt_entry *e)
|
||||
|| e->type == GTT_TYPE_PPGTT_ROOT_L4_ENTRY)
|
||||
return (e->val64 != 0);
|
||||
else
|
||||
return (e->val64 & _PAGE_PRESENT);
|
||||
return (e->val64 & GEN8_PAGE_PRESENT);
|
||||
}
|
||||
|
||||
static void gtt_entry_clear_present(struct intel_gvt_gtt_entry *e)
|
||||
{
|
||||
e->val64 &= ~_PAGE_PRESENT;
|
||||
e->val64 &= ~GEN8_PAGE_PRESENT;
|
||||
}
|
||||
|
||||
static void gtt_entry_set_present(struct intel_gvt_gtt_entry *e)
|
||||
{
|
||||
e->val64 |= _PAGE_PRESENT;
|
||||
e->val64 |= GEN8_PAGE_PRESENT;
|
||||
}
|
||||
|
||||
static bool gen8_gtt_test_64k_splited(struct intel_gvt_gtt_entry *e)
|
||||
@ -2439,7 +2439,7 @@ static int alloc_scratch_pages(struct intel_vgpu *vgpu,
|
||||
/* The entry parameters like present/writeable/cache type
|
||||
* set to the same as i915's scratch page tree.
|
||||
*/
|
||||
se.val64 |= _PAGE_PRESENT | _PAGE_RW;
|
||||
se.val64 |= GEN8_PAGE_PRESENT | GEN8_PAGE_RW;
|
||||
if (type == GTT_TYPE_PPGTT_PDE_PT)
|
||||
se.val64 |= PPAT_CACHED;
|
||||
|
||||
@ -2896,7 +2896,7 @@ void intel_gvt_restore_ggtt(struct intel_gvt *gvt)
|
||||
offset = vgpu_aperture_gmadr_base(vgpu) >> PAGE_SHIFT;
|
||||
for (idx = 0; idx < num_low; idx++) {
|
||||
pte = mm->ggtt_mm.host_ggtt_aperture[idx];
|
||||
if (pte & _PAGE_PRESENT)
|
||||
if (pte & GEN8_PAGE_PRESENT)
|
||||
write_pte64(vgpu->gvt->gt->ggtt, offset + idx, pte);
|
||||
}
|
||||
|
||||
@ -2904,7 +2904,7 @@ void intel_gvt_restore_ggtt(struct intel_gvt *gvt)
|
||||
offset = vgpu_hidden_gmadr_base(vgpu) >> PAGE_SHIFT;
|
||||
for (idx = 0; idx < num_hi; idx++) {
|
||||
pte = mm->ggtt_mm.host_ggtt_hidden[idx];
|
||||
if (pte & _PAGE_PRESENT)
|
||||
if (pte & GEN8_PAGE_PRESENT)
|
||||
write_pte64(vgpu->gvt->gt->ggtt, offset + idx, pte);
|
||||
}
|
||||
}
|
||||
|
@ -65,6 +65,7 @@ static int i915_capabilities(struct seq_file *m, void *data)
|
||||
|
||||
intel_device_info_print_static(INTEL_INFO(i915), &p);
|
||||
intel_device_info_print_runtime(RUNTIME_INFO(i915), &p);
|
||||
i915_print_iommu_status(i915, &p);
|
||||
intel_gt_info_print(&i915->gt.info, &p);
|
||||
intel_driver_caps_print(&i915->caps, &p);
|
||||
|
||||
|
@ -418,10 +418,14 @@ static int i915_driver_mmio_probe(struct drm_i915_private *dev_priv)
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
|
||||
ret = intel_uncore_init_mmio(&dev_priv->uncore);
|
||||
ret = intel_uncore_setup_mmio(&dev_priv->uncore);
|
||||
if (ret < 0)
|
||||
goto err_bridge;
|
||||
|
||||
ret = intel_uncore_init_mmio(&dev_priv->uncore);
|
||||
if (ret)
|
||||
goto err_mmio;
|
||||
|
||||
/* Try to make sure MCHBAR is enabled before poking at it */
|
||||
intel_setup_mchbar(dev_priv);
|
||||
intel_device_info_runtime_init(dev_priv);
|
||||
@ -438,6 +442,8 @@ static int i915_driver_mmio_probe(struct drm_i915_private *dev_priv)
|
||||
err_uncore:
|
||||
intel_teardown_mchbar(dev_priv);
|
||||
intel_uncore_fini_mmio(&dev_priv->uncore);
|
||||
err_mmio:
|
||||
intel_uncore_cleanup_mmio(&dev_priv->uncore);
|
||||
err_bridge:
|
||||
pci_dev_put(dev_priv->bridge_dev);
|
||||
|
||||
@ -452,6 +458,7 @@ static void i915_driver_mmio_release(struct drm_i915_private *dev_priv)
|
||||
{
|
||||
intel_teardown_mchbar(dev_priv);
|
||||
intel_uncore_fini_mmio(&dev_priv->uncore);
|
||||
intel_uncore_cleanup_mmio(&dev_priv->uncore);
|
||||
pci_dev_put(dev_priv->bridge_dev);
|
||||
}
|
||||
|
||||
@ -734,6 +741,12 @@ static void i915_driver_unregister(struct drm_i915_private *dev_priv)
|
||||
i915_gem_driver_unregister(dev_priv);
|
||||
}
|
||||
|
||||
void
|
||||
i915_print_iommu_status(struct drm_i915_private *i915, struct drm_printer *p)
|
||||
{
|
||||
drm_printf(p, "iommu: %s\n", enableddisabled(intel_vtd_active(i915)));
|
||||
}
|
||||
|
||||
static void i915_welcome_messages(struct drm_i915_private *dev_priv)
|
||||
{
|
||||
if (drm_debug_enabled(DRM_UT_DRIVER)) {
|
||||
@ -749,6 +762,7 @@ static void i915_welcome_messages(struct drm_i915_private *dev_priv)
|
||||
|
||||
intel_device_info_print_static(INTEL_INFO(dev_priv), &p);
|
||||
intel_device_info_print_runtime(RUNTIME_INFO(dev_priv), &p);
|
||||
i915_print_iommu_status(dev_priv, &p);
|
||||
intel_gt_info_print(&dev_priv->gt.info, &p);
|
||||
}
|
||||
|
||||
@ -810,7 +824,7 @@ int i915_driver_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
|
||||
return PTR_ERR(i915);
|
||||
|
||||
/* Disable nuclear pageflip by default on pre-ILK */
|
||||
if (!i915->params.nuclear_pageflip && match_info->graphics_ver < 5)
|
||||
if (!i915->params.nuclear_pageflip && match_info->graphics.ver < 5)
|
||||
i915->drm.driver_features &= ~DRIVER_ATOMIC;
|
||||
|
||||
/*
|
||||
|
@ -190,8 +190,6 @@ struct i915_hotplug {
|
||||
I915_GEM_DOMAIN_VERTEX)
|
||||
|
||||
struct drm_i915_private;
|
||||
struct i915_mm_struct;
|
||||
struct i915_mmu_object;
|
||||
|
||||
struct drm_i915_file_private {
|
||||
struct drm_i915_private *dev_priv;
|
||||
@ -1324,15 +1322,15 @@ static inline struct drm_i915_private *pdev_to_i915(struct pci_dev *pdev)
|
||||
|
||||
#define IP_VER(ver, rel) ((ver) << 8 | (rel))
|
||||
|
||||
#define GRAPHICS_VER(i915) (INTEL_INFO(i915)->graphics_ver)
|
||||
#define GRAPHICS_VER_FULL(i915) IP_VER(INTEL_INFO(i915)->graphics_ver, \
|
||||
INTEL_INFO(i915)->graphics_rel)
|
||||
#define GRAPHICS_VER(i915) (INTEL_INFO(i915)->graphics.ver)
|
||||
#define GRAPHICS_VER_FULL(i915) IP_VER(INTEL_INFO(i915)->graphics.ver, \
|
||||
INTEL_INFO(i915)->graphics.rel)
|
||||
#define IS_GRAPHICS_VER(i915, from, until) \
|
||||
(GRAPHICS_VER(i915) >= (from) && GRAPHICS_VER(i915) <= (until))
|
||||
|
||||
#define MEDIA_VER(i915) (INTEL_INFO(i915)->media_ver)
|
||||
#define MEDIA_VER_FULL(i915) IP_VER(INTEL_INFO(i915)->media_ver, \
|
||||
INTEL_INFO(i915)->media_rel)
|
||||
#define MEDIA_VER(i915) (INTEL_INFO(i915)->media.ver)
|
||||
#define MEDIA_VER_FULL(i915) IP_VER(INTEL_INFO(i915)->media.arch, \
|
||||
INTEL_INFO(i915)->media.rel)
|
||||
#define IS_MEDIA_VER(i915, from, until) \
|
||||
(MEDIA_VER(i915) >= (from) && MEDIA_VER(i915) <= (until))
|
||||
|
||||
@ -1345,15 +1343,20 @@ static inline struct drm_i915_private *pdev_to_i915(struct pci_dev *pdev)
|
||||
#define HAS_DSB(dev_priv) (INTEL_INFO(dev_priv)->display.has_dsb)
|
||||
|
||||
#define INTEL_DISPLAY_STEP(__i915) (RUNTIME_INFO(__i915)->step.display_step)
|
||||
#define INTEL_GT_STEP(__i915) (RUNTIME_INFO(__i915)->step.gt_step)
|
||||
#define INTEL_GRAPHICS_STEP(__i915) (RUNTIME_INFO(__i915)->step.graphics_step)
|
||||
#define INTEL_MEDIA_STEP(__i915) (RUNTIME_INFO(__i915)->step.media_step)
|
||||
|
||||
#define IS_DISPLAY_STEP(__i915, since, until) \
|
||||
(drm_WARN_ON(&(__i915)->drm, INTEL_DISPLAY_STEP(__i915) == STEP_NONE), \
|
||||
INTEL_DISPLAY_STEP(__i915) >= (since) && INTEL_DISPLAY_STEP(__i915) < (until))
|
||||
|
||||
#define IS_GT_STEP(__i915, since, until) \
|
||||
(drm_WARN_ON(&(__i915)->drm, INTEL_GT_STEP(__i915) == STEP_NONE), \
|
||||
INTEL_GT_STEP(__i915) >= (since) && INTEL_GT_STEP(__i915) < (until))
|
||||
#define IS_GRAPHICS_STEP(__i915, since, until) \
|
||||
(drm_WARN_ON(&(__i915)->drm, INTEL_GRAPHICS_STEP(__i915) == STEP_NONE), \
|
||||
INTEL_GRAPHICS_STEP(__i915) >= (since) && INTEL_GRAPHICS_STEP(__i915) < (until))
|
||||
|
||||
#define IS_MEDIA_STEP(__i915, since, until) \
|
||||
(drm_WARN_ON(&(__i915)->drm, INTEL_MEDIA_STEP(__i915) == STEP_NONE), \
|
||||
INTEL_MEDIA_STEP(__i915) >= (since) && INTEL_MEDIA_STEP(__i915) < (until))
|
||||
|
||||
static __always_inline unsigned int
|
||||
__platform_mask_index(const struct intel_runtime_info *info,
|
||||
@ -1526,15 +1529,15 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915,
|
||||
#define IS_TGL_Y(dev_priv) \
|
||||
IS_SUBPLATFORM(dev_priv, INTEL_TIGERLAKE, INTEL_SUBPLATFORM_ULX)
|
||||
|
||||
#define IS_SKL_GT_STEP(p, since, until) (IS_SKYLAKE(p) && IS_GT_STEP(p, since, until))
|
||||
#define IS_SKL_GRAPHICS_STEP(p, since, until) (IS_SKYLAKE(p) && IS_GRAPHICS_STEP(p, since, until))
|
||||
|
||||
#define IS_KBL_GT_STEP(dev_priv, since, until) \
|
||||
(IS_KABYLAKE(dev_priv) && IS_GT_STEP(dev_priv, since, until))
|
||||
#define IS_KBL_GRAPHICS_STEP(dev_priv, since, until) \
|
||||
(IS_KABYLAKE(dev_priv) && IS_GRAPHICS_STEP(dev_priv, since, until))
|
||||
#define IS_KBL_DISPLAY_STEP(dev_priv, since, until) \
|
||||
(IS_KABYLAKE(dev_priv) && IS_DISPLAY_STEP(dev_priv, since, until))
|
||||
|
||||
#define IS_JSL_EHL_GT_STEP(p, since, until) \
|
||||
(IS_JSL_EHL(p) && IS_GT_STEP(p, since, until))
|
||||
#define IS_JSL_EHL_GRAPHICS_STEP(p, since, until) \
|
||||
(IS_JSL_EHL(p) && IS_GRAPHICS_STEP(p, since, until))
|
||||
#define IS_JSL_EHL_DISPLAY_STEP(p, since, until) \
|
||||
(IS_JSL_EHL(p) && IS_DISPLAY_STEP(p, since, until))
|
||||
|
||||
@ -1542,19 +1545,19 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915,
|
||||
(IS_TIGERLAKE(__i915) && \
|
||||
IS_DISPLAY_STEP(__i915, since, until))
|
||||
|
||||
#define IS_TGL_UY_GT_STEP(__i915, since, until) \
|
||||
#define IS_TGL_UY_GRAPHICS_STEP(__i915, since, until) \
|
||||
((IS_TGL_U(__i915) || IS_TGL_Y(__i915)) && \
|
||||
IS_GT_STEP(__i915, since, until))
|
||||
IS_GRAPHICS_STEP(__i915, since, until))
|
||||
|
||||
#define IS_TGL_GT_STEP(__i915, since, until) \
|
||||
#define IS_TGL_GRAPHICS_STEP(__i915, since, until) \
|
||||
(IS_TIGERLAKE(__i915) && !(IS_TGL_U(__i915) || IS_TGL_Y(__i915)) && \
|
||||
IS_GT_STEP(__i915, since, until))
|
||||
IS_GRAPHICS_STEP(__i915, since, until))
|
||||
|
||||
#define IS_RKL_DISPLAY_STEP(p, since, until) \
|
||||
(IS_ROCKETLAKE(p) && IS_DISPLAY_STEP(p, since, until))
|
||||
|
||||
#define IS_DG1_GT_STEP(p, since, until) \
|
||||
(IS_DG1(p) && IS_GT_STEP(p, since, until))
|
||||
#define IS_DG1_GRAPHICS_STEP(p, since, until) \
|
||||
(IS_DG1(p) && IS_GRAPHICS_STEP(p, since, until))
|
||||
#define IS_DG1_DISPLAY_STEP(p, since, until) \
|
||||
(IS_DG1(p) && IS_DISPLAY_STEP(p, since, until))
|
||||
|
||||
@ -1562,20 +1565,20 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915,
|
||||
(IS_ALDERLAKE_S(__i915) && \
|
||||
IS_DISPLAY_STEP(__i915, since, until))
|
||||
|
||||
#define IS_ADLS_GT_STEP(__i915, since, until) \
|
||||
#define IS_ADLS_GRAPHICS_STEP(__i915, since, until) \
|
||||
(IS_ALDERLAKE_S(__i915) && \
|
||||
IS_GT_STEP(__i915, since, until))
|
||||
IS_GRAPHICS_STEP(__i915, since, until))
|
||||
|
||||
#define IS_ADLP_DISPLAY_STEP(__i915, since, until) \
|
||||
(IS_ALDERLAKE_P(__i915) && \
|
||||
IS_DISPLAY_STEP(__i915, since, until))
|
||||
|
||||
#define IS_ADLP_GT_STEP(__i915, since, until) \
|
||||
#define IS_ADLP_GRAPHICS_STEP(__i915, since, until) \
|
||||
(IS_ALDERLAKE_P(__i915) && \
|
||||
IS_GT_STEP(__i915, since, until))
|
||||
IS_GRAPHICS_STEP(__i915, since, until))
|
||||
|
||||
#define IS_XEHPSDV_GT_STEP(__i915, since, until) \
|
||||
(IS_XEHPSDV(__i915) && IS_GT_STEP(__i915, since, until))
|
||||
#define IS_XEHPSDV_GRAPHICS_STEP(__i915, since, until) \
|
||||
(IS_XEHPSDV(__i915) && IS_GRAPHICS_STEP(__i915, since, until))
|
||||
|
||||
/*
|
||||
* DG2 hardware steppings are a bit unusual. The hardware design was forked
|
||||
@ -1591,9 +1594,9 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915,
|
||||
* and stepping-specific logic will be applied with a general DG2-wide stepping
|
||||
* number.
|
||||
*/
|
||||
#define IS_DG2_GT_STEP(__i915, variant, since, until) \
|
||||
#define IS_DG2_GRAPHICS_STEP(__i915, variant, since, until) \
|
||||
(IS_SUBPLATFORM(__i915, INTEL_DG2, INTEL_SUBPLATFORM_##variant) && \
|
||||
IS_GT_STEP(__i915, since, until))
|
||||
IS_GRAPHICS_STEP(__i915, since, until))
|
||||
|
||||
#define IS_DG2_DISP_STEP(__i915, since, until) \
|
||||
(IS_DG2(__i915) && \
|
||||
@ -1757,26 +1760,27 @@ static inline bool run_as_guest(void)
|
||||
#define HAS_D12_PLANE_MINIMIZATION(dev_priv) (IS_ROCKETLAKE(dev_priv) || \
|
||||
IS_ALDERLAKE_S(dev_priv))
|
||||
|
||||
static inline bool intel_vtd_active(void)
|
||||
static inline bool intel_vtd_active(struct drm_i915_private *i915)
|
||||
{
|
||||
#ifdef CONFIG_INTEL_IOMMU
|
||||
if (intel_iommu_gfx_mapped)
|
||||
if (device_iommu_mapped(i915->drm.dev))
|
||||
return true;
|
||||
#endif
|
||||
|
||||
/* Running as a guest, we assume the host is enforcing VT'd */
|
||||
return run_as_guest();
|
||||
}
|
||||
|
||||
void
|
||||
i915_print_iommu_status(struct drm_i915_private *i915, struct drm_printer *p);
|
||||
|
||||
static inline bool intel_scanout_needs_vtd_wa(struct drm_i915_private *dev_priv)
|
||||
{
|
||||
return GRAPHICS_VER(dev_priv) >= 6 && intel_vtd_active();
|
||||
return GRAPHICS_VER(dev_priv) >= 6 && intel_vtd_active(dev_priv);
|
||||
}
|
||||
|
||||
static inline bool
|
||||
intel_ggtt_update_needs_vtd_wa(struct drm_i915_private *i915)
|
||||
{
|
||||
return IS_BROXTON(i915) && intel_vtd_active();
|
||||
return IS_BROXTON(i915) && intel_vtd_active(i915);
|
||||
}
|
||||
|
||||
static inline bool
|
||||
@ -1806,6 +1810,7 @@ static inline void i915_gem_drain_freed_objects(struct drm_i915_private *i915)
|
||||
*/
|
||||
while (atomic_read(&i915->mm.free_count)) {
|
||||
flush_work(&i915->mm.free_work);
|
||||
flush_delayed_work(&i915->bdev.wq);
|
||||
rcu_barrier();
|
||||
}
|
||||
}
|
||||
@ -1920,6 +1925,10 @@ int i915_gem_evict_vm(struct i915_address_space *vm);
|
||||
struct drm_i915_gem_object *
|
||||
i915_gem_object_create_internal(struct drm_i915_private *dev_priv,
|
||||
phys_addr_t size);
|
||||
struct drm_i915_gem_object *
|
||||
__i915_gem_object_create_internal(struct drm_i915_private *dev_priv,
|
||||
const struct drm_i915_gem_object_ops *ops,
|
||||
phys_addr_t size);
|
||||
|
||||
/* i915_gem_tiling.c */
|
||||
static inline bool i915_gem_object_needs_bit17_swizzle(struct drm_i915_gem_object *obj)
|
||||
|
@ -764,7 +764,7 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
|
||||
* perspective, requiring manual detiling by the client.
|
||||
*/
|
||||
if (!i915_gem_object_has_struct_page(obj) ||
|
||||
cpu_write_needs_clflush(obj))
|
||||
i915_gem_cpu_write_needs_clflush(obj))
|
||||
/* Note that the gtt paths might fail with non-page-backed user
|
||||
* pointers (e.g. gtt mappings when moving data between
|
||||
* textures). Fallback to the shmem path in that case.
|
||||
@ -1005,7 +1005,8 @@ i915_gem_madvise_ioctl(struct drm_device *dev, void *data,
|
||||
obj->ops->adjust_lru(obj);
|
||||
}
|
||||
|
||||
if (i915_gem_object_has_pages(obj)) {
|
||||
if (i915_gem_object_has_pages(obj) ||
|
||||
i915_gem_object_has_self_managed_shrink_list(obj)) {
|
||||
unsigned long flags;
|
||||
|
||||
spin_lock_irqsave(&i915->mm.obj_lock, flags);
|
||||
|
@ -48,8 +48,9 @@
|
||||
#include "i915_gpu_error.h"
|
||||
#include "i915_memcpy.h"
|
||||
#include "i915_scatterlist.h"
|
||||
#include "i915_vma_snapshot.h"
|
||||
|
||||
#define ALLOW_FAIL (GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_NOWARN)
|
||||
#define ALLOW_FAIL (__GFP_KSWAPD_RECLAIM | __GFP_RETRY_MAYFAIL | __GFP_NOWARN)
|
||||
#define ATOMIC_MAYFAIL (GFP_ATOMIC | __GFP_NOWARN)
|
||||
|
||||
static void __sg_set_buf(struct scatterlist *sg,
|
||||
@ -275,16 +276,16 @@ static bool compress_start(struct i915_vma_compress *c)
|
||||
static void *compress_next_page(struct i915_vma_compress *c,
|
||||
struct i915_vma_coredump *dst)
|
||||
{
|
||||
void *page;
|
||||
void *page_addr;
|
||||
struct page *page;
|
||||
|
||||
if (dst->page_count >= dst->num_pages)
|
||||
return ERR_PTR(-ENOSPC);
|
||||
|
||||
page = pool_alloc(&c->pool, ALLOW_FAIL);
|
||||
if (!page)
|
||||
page_addr = pool_alloc(&c->pool, ALLOW_FAIL);
|
||||
if (!page_addr)
|
||||
return ERR_PTR(-ENOMEM);
|
||||
|
||||
return dst->pages[dst->page_count++] = page;
|
||||
page = virt_to_page(page_addr);
|
||||
list_add_tail(&page->lru, &dst->page_list);
|
||||
return page_addr;
|
||||
}
|
||||
|
||||
static int compress_page(struct i915_vma_compress *c,
|
||||
@ -397,7 +398,7 @@ static int compress_page(struct i915_vma_compress *c,
|
||||
|
||||
if (!(wc && i915_memcpy_from_wc(ptr, src, PAGE_SIZE)))
|
||||
memcpy(ptr, src, PAGE_SIZE);
|
||||
dst->pages[dst->page_count++] = ptr;
|
||||
list_add_tail(&virt_to_page(ptr)->lru, &dst->page_list);
|
||||
cond_resched();
|
||||
|
||||
return 0;
|
||||
@ -614,7 +615,7 @@ static void print_error_vma(struct drm_i915_error_state_buf *m,
|
||||
const struct i915_vma_coredump *vma)
|
||||
{
|
||||
char out[ASCII85_BUFSZ];
|
||||
int page;
|
||||
struct page *page;
|
||||
|
||||
if (!vma)
|
||||
return;
|
||||
@ -628,16 +629,17 @@ static void print_error_vma(struct drm_i915_error_state_buf *m,
|
||||
err_printf(m, "gtt_page_sizes = 0x%08x\n", vma->gtt_page_sizes);
|
||||
|
||||
err_compression_marker(m);
|
||||
for (page = 0; page < vma->page_count; page++) {
|
||||
list_for_each_entry(page, &vma->page_list, lru) {
|
||||
int i, len;
|
||||
const u32 *addr = page_address(page);
|
||||
|
||||
len = PAGE_SIZE;
|
||||
if (page == vma->page_count - 1)
|
||||
if (page == list_last_entry(&vma->page_list, typeof(*page), lru))
|
||||
len -= vma->unused;
|
||||
len = ascii85_encode_len(len);
|
||||
|
||||
for (i = 0; i < len; i++)
|
||||
err_puts(m, ascii85_encode(vma->pages[page][i], out));
|
||||
err_puts(m, ascii85_encode(addr[i], out));
|
||||
}
|
||||
err_puts(m, "\n");
|
||||
}
|
||||
@ -946,10 +948,12 @@ static void i915_vma_coredump_free(struct i915_vma_coredump *vma)
|
||||
{
|
||||
while (vma) {
|
||||
struct i915_vma_coredump *next = vma->next;
|
||||
int page;
|
||||
struct page *page, *n;
|
||||
|
||||
for (page = 0; page < vma->page_count; page++)
|
||||
free_page((unsigned long)vma->pages[page]);
|
||||
list_for_each_entry_safe(page, n, &vma->page_list, lru) {
|
||||
list_del_init(&page->lru);
|
||||
__free_page(page);
|
||||
}
|
||||
|
||||
kfree(vma);
|
||||
vma = next;
|
||||
@ -1009,25 +1013,21 @@ void __i915_gpu_coredump_free(struct kref *error_ref)
|
||||
|
||||
static struct i915_vma_coredump *
|
||||
i915_vma_coredump_create(const struct intel_gt *gt,
|
||||
const struct i915_vma *vma,
|
||||
const char *name,
|
||||
const struct i915_vma_snapshot *vsnap,
|
||||
struct i915_vma_compress *compress)
|
||||
{
|
||||
struct i915_ggtt *ggtt = gt->ggtt;
|
||||
const u64 slot = ggtt->error_capture.start;
|
||||
struct i915_vma_coredump *dst;
|
||||
unsigned long num_pages;
|
||||
struct sgt_iter iter;
|
||||
int ret;
|
||||
|
||||
might_sleep();
|
||||
|
||||
if (!vma || !vma->pages || !compress)
|
||||
if (!vsnap || !vsnap->pages || !compress)
|
||||
return NULL;
|
||||
|
||||
num_pages = min_t(u64, vma->size, vma->obj->base.size) >> PAGE_SHIFT;
|
||||
num_pages = DIV_ROUND_UP(10 * num_pages, 8); /* worstcase zlib growth */
|
||||
dst = kmalloc(sizeof(*dst) + num_pages * sizeof(u32 *), ALLOW_FAIL);
|
||||
dst = kmalloc(sizeof(*dst), ALLOW_FAIL);
|
||||
if (!dst)
|
||||
return NULL;
|
||||
|
||||
@ -1036,14 +1036,13 @@ i915_vma_coredump_create(const struct intel_gt *gt,
|
||||
return NULL;
|
||||
}
|
||||
|
||||
strcpy(dst->name, name);
|
||||
INIT_LIST_HEAD(&dst->page_list);
|
||||
strcpy(dst->name, vsnap->name);
|
||||
dst->next = NULL;
|
||||
|
||||
dst->gtt_offset = vma->node.start;
|
||||
dst->gtt_size = vma->node.size;
|
||||
dst->gtt_page_sizes = vma->page_sizes.gtt;
|
||||
dst->num_pages = num_pages;
|
||||
dst->page_count = 0;
|
||||
dst->gtt_offset = vsnap->gtt_offset;
|
||||
dst->gtt_size = vsnap->gtt_size;
|
||||
dst->gtt_page_sizes = vsnap->page_sizes;
|
||||
dst->unused = 0;
|
||||
|
||||
ret = -EINVAL;
|
||||
@ -1051,7 +1050,7 @@ i915_vma_coredump_create(const struct intel_gt *gt,
|
||||
void __iomem *s;
|
||||
dma_addr_t dma;
|
||||
|
||||
for_each_sgt_daddr(dma, iter, vma->pages) {
|
||||
for_each_sgt_daddr(dma, iter, vsnap->pages) {
|
||||
mutex_lock(&ggtt->error_mutex);
|
||||
ggtt->vm.insert_page(&ggtt->vm, dma, slot,
|
||||
I915_CACHE_NONE, 0);
|
||||
@ -1069,11 +1068,11 @@ i915_vma_coredump_create(const struct intel_gt *gt,
|
||||
if (ret)
|
||||
break;
|
||||
}
|
||||
} else if (__i915_gem_object_is_lmem(vma->obj)) {
|
||||
struct intel_memory_region *mem = vma->obj->mm.region;
|
||||
} else if (vsnap->mr && vsnap->mr->type != INTEL_MEMORY_SYSTEM) {
|
||||
struct intel_memory_region *mem = vsnap->mr;
|
||||
dma_addr_t dma;
|
||||
|
||||
for_each_sgt_daddr(dma, iter, vma->pages) {
|
||||
for_each_sgt_daddr(dma, iter, vsnap->pages) {
|
||||
void __iomem *s;
|
||||
|
||||
s = io_mapping_map_wc(&mem->iomap,
|
||||
@ -1089,7 +1088,7 @@ i915_vma_coredump_create(const struct intel_gt *gt,
|
||||
} else {
|
||||
struct page *page;
|
||||
|
||||
for_each_sgt_page(page, iter, vma->pages) {
|
||||
for_each_sgt_page(page, iter, vsnap->pages) {
|
||||
void *s;
|
||||
|
||||
drm_clflush_pages(&page, 1);
|
||||
@ -1106,8 +1105,13 @@ i915_vma_coredump_create(const struct intel_gt *gt,
|
||||
}
|
||||
|
||||
if (ret || compress_flush(compress, dst)) {
|
||||
while (dst->page_count--)
|
||||
pool_free(&compress->pool, dst->pages[dst->page_count]);
|
||||
struct page *page, *n;
|
||||
|
||||
list_for_each_entry_safe_reverse(page, n, &dst->page_list, lru) {
|
||||
list_del_init(&page->lru);
|
||||
pool_free(&compress->pool, page_address(page));
|
||||
}
|
||||
|
||||
kfree(dst);
|
||||
dst = NULL;
|
||||
}
|
||||
@ -1320,35 +1324,69 @@ static bool record_context(struct i915_gem_context_coredump *e,
|
||||
|
||||
struct intel_engine_capture_vma {
|
||||
struct intel_engine_capture_vma *next;
|
||||
struct i915_vma *vma;
|
||||
struct i915_vma_snapshot *vsnap;
|
||||
char name[16];
|
||||
bool lockdep_cookie;
|
||||
};
|
||||
|
||||
static struct intel_engine_capture_vma *
|
||||
capture_vma_snapshot(struct intel_engine_capture_vma *next,
|
||||
struct i915_vma_snapshot *vsnap,
|
||||
gfp_t gfp)
|
||||
{
|
||||
struct intel_engine_capture_vma *c;
|
||||
|
||||
if (!i915_vma_snapshot_present(vsnap))
|
||||
return next;
|
||||
|
||||
c = kmalloc(sizeof(*c), gfp);
|
||||
if (!c)
|
||||
return next;
|
||||
|
||||
if (!i915_vma_snapshot_resource_pin(vsnap, &c->lockdep_cookie)) {
|
||||
kfree(c);
|
||||
return next;
|
||||
}
|
||||
|
||||
strcpy(c->name, vsnap->name);
|
||||
c->vsnap = vsnap;
|
||||
i915_vma_snapshot_get(vsnap);
|
||||
|
||||
c->next = next;
|
||||
return c;
|
||||
}
|
||||
|
||||
static struct intel_engine_capture_vma *
|
||||
capture_vma(struct intel_engine_capture_vma *next,
|
||||
struct i915_vma *vma,
|
||||
const char *name,
|
||||
gfp_t gfp)
|
||||
{
|
||||
struct intel_engine_capture_vma *c;
|
||||
struct i915_vma_snapshot *vsnap;
|
||||
|
||||
if (!vma)
|
||||
return next;
|
||||
|
||||
c = kmalloc(sizeof(*c), gfp);
|
||||
if (!c)
|
||||
/*
|
||||
* If the vma isn't pinned, then the vma should be snapshotted
|
||||
* to a struct i915_vma_snapshot at command submission time.
|
||||
* Not here.
|
||||
*/
|
||||
GEM_WARN_ON(!i915_vma_is_pinned(vma));
|
||||
if (!i915_vma_is_pinned(vma))
|
||||
return next;
|
||||
|
||||
if (!i915_active_acquire_if_busy(&vma->active)) {
|
||||
kfree(c);
|
||||
vsnap = i915_vma_snapshot_alloc(gfp);
|
||||
if (!vsnap)
|
||||
return next;
|
||||
}
|
||||
|
||||
strcpy(c->name, name);
|
||||
c->vma = vma; /* reference held while active */
|
||||
i915_vma_snapshot_init(vsnap, vma, name);
|
||||
next = capture_vma_snapshot(next, vsnap, gfp);
|
||||
|
||||
c->next = next;
|
||||
return c;
|
||||
/* FIXME: Replace on async unbind. */
|
||||
i915_vma_snapshot_put(vsnap);
|
||||
|
||||
return next;
|
||||
}
|
||||
|
||||
static struct intel_engine_capture_vma *
|
||||
@ -1359,7 +1397,7 @@ capture_user(struct intel_engine_capture_vma *capture,
|
||||
struct i915_capture_list *c;
|
||||
|
||||
for (c = rq->capture_list; c; c = c->next)
|
||||
capture = capture_vma(capture, c->vma, "user", gfp);
|
||||
capture = capture_vma_snapshot(capture, c->vma_snapshot, gfp);
|
||||
|
||||
return capture;
|
||||
}
|
||||
@ -1373,6 +1411,36 @@ static void add_vma(struct intel_engine_coredump *ee,
|
||||
}
|
||||
}
|
||||
|
||||
static struct i915_vma_coredump *
|
||||
create_vma_coredump(const struct intel_gt *gt, struct i915_vma *vma,
|
||||
const char *name, struct i915_vma_compress *compress)
|
||||
{
|
||||
struct i915_vma_coredump *ret = NULL;
|
||||
struct i915_vma_snapshot tmp;
|
||||
bool lockdep_cookie;
|
||||
|
||||
if (!vma)
|
||||
return NULL;
|
||||
|
||||
i915_vma_snapshot_init_onstack(&tmp, vma, name);
|
||||
if (i915_vma_snapshot_resource_pin(&tmp, &lockdep_cookie)) {
|
||||
ret = i915_vma_coredump_create(gt, &tmp, compress);
|
||||
i915_vma_snapshot_resource_unpin(&tmp, lockdep_cookie);
|
||||
}
|
||||
i915_vma_snapshot_put_onstack(&tmp);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void add_vma_coredump(struct intel_engine_coredump *ee,
|
||||
const struct intel_gt *gt,
|
||||
struct i915_vma *vma,
|
||||
const char *name,
|
||||
struct i915_vma_compress *compress)
|
||||
{
|
||||
add_vma(ee, create_vma_coredump(gt, vma, name, compress));
|
||||
}
|
||||
|
||||
struct intel_engine_coredump *
|
||||
intel_engine_coredump_alloc(struct intel_engine_cs *engine, gfp_t gfp)
|
||||
{
|
||||
@ -1406,7 +1474,7 @@ intel_engine_coredump_add_request(struct intel_engine_coredump *ee,
|
||||
* as the simplest method to avoid being overwritten
|
||||
* by userspace.
|
||||
*/
|
||||
vma = capture_vma(vma, rq->batch, "batch", gfp);
|
||||
vma = capture_vma_snapshot(vma, &rq->batch_snapshot, gfp);
|
||||
vma = capture_user(vma, rq, gfp);
|
||||
vma = capture_vma(vma, rq->ring->vma, "ring", gfp);
|
||||
vma = capture_vma(vma, rq->context->state, "HW context", gfp);
|
||||
@ -1427,30 +1495,24 @@ intel_engine_coredump_add_vma(struct intel_engine_coredump *ee,
|
||||
|
||||
while (capture) {
|
||||
struct intel_engine_capture_vma *this = capture;
|
||||
struct i915_vma *vma = this->vma;
|
||||
struct i915_vma_snapshot *vsnap = this->vsnap;
|
||||
|
||||
add_vma(ee,
|
||||
i915_vma_coredump_create(engine->gt,
|
||||
vma, this->name,
|
||||
compress));
|
||||
vsnap, compress));
|
||||
|
||||
i915_active_release(&vma->active);
|
||||
i915_vma_snapshot_resource_unpin(vsnap, this->lockdep_cookie);
|
||||
i915_vma_snapshot_put(vsnap);
|
||||
|
||||
capture = this->next;
|
||||
kfree(this);
|
||||
}
|
||||
|
||||
add_vma(ee,
|
||||
i915_vma_coredump_create(engine->gt,
|
||||
engine->status_page.vma,
|
||||
"HW Status",
|
||||
compress));
|
||||
add_vma_coredump(ee, engine->gt, engine->status_page.vma,
|
||||
"HW Status", compress);
|
||||
|
||||
add_vma(ee,
|
||||
i915_vma_coredump_create(engine->gt,
|
||||
engine->wa_ctx.vma,
|
||||
"WA context",
|
||||
compress));
|
||||
add_vma_coredump(ee, engine->gt, engine->wa_ctx.vma,
|
||||
"WA context", compress);
|
||||
}
|
||||
|
||||
static struct intel_engine_coredump *
|
||||
@ -1486,17 +1548,25 @@ capture_engine(struct intel_engine_cs *engine,
|
||||
}
|
||||
}
|
||||
if (rq)
|
||||
capture = intel_engine_coredump_add_request(ee, rq,
|
||||
ATOMIC_MAYFAIL);
|
||||
rq = i915_request_get_rcu(rq);
|
||||
|
||||
if (!rq)
|
||||
goto no_request_capture;
|
||||
|
||||
capture = intel_engine_coredump_add_request(ee, rq, ATOMIC_MAYFAIL);
|
||||
if (!capture) {
|
||||
no_request_capture:
|
||||
kfree(ee);
|
||||
return NULL;
|
||||
i915_request_put(rq);
|
||||
goto no_request_capture;
|
||||
}
|
||||
|
||||
intel_engine_coredump_add_vma(ee, capture, compress);
|
||||
i915_request_put(rq);
|
||||
|
||||
return ee;
|
||||
|
||||
no_request_capture:
|
||||
kfree(ee);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static void
|
||||
@ -1550,10 +1620,8 @@ gt_record_uc(struct intel_gt_coredump *gt,
|
||||
*/
|
||||
error_uc->guc_fw.path = kstrdup(uc->guc.fw.path, ALLOW_FAIL);
|
||||
error_uc->huc_fw.path = kstrdup(uc->huc.fw.path, ALLOW_FAIL);
|
||||
error_uc->guc_log =
|
||||
i915_vma_coredump_create(gt->_gt,
|
||||
uc->guc.log.vma, "GuC log buffer",
|
||||
compress);
|
||||
error_uc->guc_log = create_vma_coredump(gt->_gt, uc->guc.log.vma,
|
||||
"GuC log buffer", compress);
|
||||
|
||||
return error_uc;
|
||||
}
|
||||
@ -1750,10 +1818,7 @@ static void capture_gen(struct i915_gpu_coredump *error)
|
||||
error->wakelock = atomic_read(&i915->runtime_pm.wakeref_count);
|
||||
error->suspended = i915->runtime_pm.suspended;
|
||||
|
||||
error->iommu = -1;
|
||||
#ifdef CONFIG_INTEL_IOMMU
|
||||
error->iommu = intel_iommu_gfx_mapped;
|
||||
#endif
|
||||
error->iommu = intel_vtd_active(i915);
|
||||
error->reset_count = i915_reset_count(&i915->gpu_error);
|
||||
error->suspend_count = i915->suspend_count;
|
||||
|
||||
@ -1839,8 +1904,8 @@ void i915_vma_capture_finish(struct intel_gt_coredump *gt,
|
||||
kfree(compress);
|
||||
}
|
||||
|
||||
struct i915_gpu_coredump *
|
||||
i915_gpu_coredump(struct intel_gt *gt, intel_engine_mask_t engine_mask)
|
||||
static struct i915_gpu_coredump *
|
||||
__i915_gpu_coredump(struct intel_gt *gt, intel_engine_mask_t engine_mask)
|
||||
{
|
||||
struct drm_i915_private *i915 = gt->i915;
|
||||
struct i915_gpu_coredump *error;
|
||||
@ -1881,6 +1946,22 @@ i915_gpu_coredump(struct intel_gt *gt, intel_engine_mask_t engine_mask)
|
||||
return error;
|
||||
}
|
||||
|
||||
struct i915_gpu_coredump *
|
||||
i915_gpu_coredump(struct intel_gt *gt, intel_engine_mask_t engine_mask)
|
||||
{
|
||||
static DEFINE_MUTEX(capture_mutex);
|
||||
int ret = mutex_lock_interruptible(&capture_mutex);
|
||||
struct i915_gpu_coredump *dump;
|
||||
|
||||
if (ret)
|
||||
return ERR_PTR(ret);
|
||||
|
||||
dump = __i915_gpu_coredump(gt, engine_mask);
|
||||
mutex_unlock(&capture_mutex);
|
||||
|
||||
return dump;
|
||||
}
|
||||
|
||||
void i915_error_state_store(struct i915_gpu_coredump *error)
|
||||
{
|
||||
struct drm_i915_private *i915;
|
||||
|
@ -39,10 +39,8 @@ struct i915_vma_coredump {
|
||||
u64 gtt_size;
|
||||
u32 gtt_page_sizes;
|
||||
|
||||
int num_pages;
|
||||
int page_count;
|
||||
int unused;
|
||||
u32 *pages[];
|
||||
struct list_head page_list;
|
||||
};
|
||||
|
||||
struct i915_request_coredump {
|
||||
|
@ -2772,7 +2772,7 @@ static irqreturn_t dg1_irq_handler(int irq, void *arg)
|
||||
{
|
||||
struct drm_i915_private * const i915 = arg;
|
||||
struct intel_gt *gt = &i915->gt;
|
||||
void __iomem * const regs = i915->uncore.regs;
|
||||
void __iomem * const regs = gt->uncore->regs;
|
||||
u32 master_tile_ctl, master_ctl;
|
||||
u32 gu_misc_iir;
|
||||
|
||||
@ -3173,11 +3173,12 @@ static void gen11_display_irq_reset(struct drm_i915_private *dev_priv)
|
||||
|
||||
static void gen11_irq_reset(struct drm_i915_private *dev_priv)
|
||||
{
|
||||
struct intel_uncore *uncore = &dev_priv->uncore;
|
||||
struct intel_gt *gt = &dev_priv->gt;
|
||||
struct intel_uncore *uncore = gt->uncore;
|
||||
|
||||
gen11_master_intr_disable(dev_priv->uncore.regs);
|
||||
|
||||
gen11_gt_irq_reset(&dev_priv->gt);
|
||||
gen11_gt_irq_reset(gt);
|
||||
gen11_display_irq_reset(dev_priv);
|
||||
|
||||
GEN3_IRQ_RESET(uncore, GEN11_GU_MISC_);
|
||||
@ -3186,11 +3187,12 @@ static void gen11_irq_reset(struct drm_i915_private *dev_priv)
|
||||
|
||||
static void dg1_irq_reset(struct drm_i915_private *dev_priv)
|
||||
{
|
||||
struct intel_uncore *uncore = &dev_priv->uncore;
|
||||
struct intel_gt *gt = &dev_priv->gt;
|
||||
struct intel_uncore *uncore = gt->uncore;
|
||||
|
||||
dg1_master_intr_disable(dev_priv->uncore.regs);
|
||||
|
||||
gen11_gt_irq_reset(&dev_priv->gt);
|
||||
gen11_gt_irq_reset(gt);
|
||||
gen11_display_irq_reset(dev_priv);
|
||||
|
||||
GEN3_IRQ_RESET(uncore, GEN11_GU_MISC_);
|
||||
@ -3869,13 +3871,14 @@ static void gen11_de_irq_postinstall(struct drm_i915_private *dev_priv)
|
||||
|
||||
static void gen11_irq_postinstall(struct drm_i915_private *dev_priv)
|
||||
{
|
||||
struct intel_uncore *uncore = &dev_priv->uncore;
|
||||
struct intel_gt *gt = &dev_priv->gt;
|
||||
struct intel_uncore *uncore = gt->uncore;
|
||||
u32 gu_misc_masked = GEN11_GU_MISC_GSE;
|
||||
|
||||
if (INTEL_PCH_TYPE(dev_priv) >= PCH_ICP)
|
||||
icp_irq_postinstall(dev_priv);
|
||||
|
||||
gen11_gt_irq_postinstall(&dev_priv->gt);
|
||||
gen11_gt_irq_postinstall(gt);
|
||||
gen11_de_irq_postinstall(dev_priv);
|
||||
|
||||
GEN3_IRQ_INIT(uncore, GEN11_GU_MISC_, ~gu_misc_masked, gu_misc_masked);
|
||||
@ -3886,10 +3889,11 @@ static void gen11_irq_postinstall(struct drm_i915_private *dev_priv)
|
||||
|
||||
static void dg1_irq_postinstall(struct drm_i915_private *dev_priv)
|
||||
{
|
||||
struct intel_uncore *uncore = &dev_priv->uncore;
|
||||
struct intel_gt *gt = &dev_priv->gt;
|
||||
struct intel_uncore *uncore = gt->uncore;
|
||||
u32 gu_misc_masked = GEN11_GU_MISC_GSE;
|
||||
|
||||
gen11_gt_irq_postinstall(&dev_priv->gt);
|
||||
gen11_gt_irq_postinstall(gt);
|
||||
|
||||
GEN3_IRQ_INIT(uncore, GEN11_GU_MISC_, ~gu_misc_masked, gu_misc_masked);
|
||||
|
||||
@ -3900,8 +3904,8 @@ static void dg1_irq_postinstall(struct drm_i915_private *dev_priv)
|
||||
GEN11_DISPLAY_IRQ_ENABLE);
|
||||
}
|
||||
|
||||
dg1_master_intr_enable(dev_priv->uncore.regs);
|
||||
intel_uncore_posting_read(&dev_priv->uncore, DG1_MSTR_TILE_INTR);
|
||||
dg1_master_intr_enable(uncore->regs);
|
||||
intel_uncore_posting_read(uncore, DG1_MSTR_TILE_INTR);
|
||||
}
|
||||
|
||||
static void cherryview_irq_postinstall(struct drm_i915_private *dev_priv)
|
||||
|
@ -33,8 +33,8 @@
|
||||
|
||||
#define PLATFORM(x) .platform = (x)
|
||||
#define GEN(x) \
|
||||
.graphics_ver = (x), \
|
||||
.media_ver = (x), \
|
||||
.graphics.ver = (x), \
|
||||
.media.ver = (x), \
|
||||
.display.ver = (x)
|
||||
|
||||
#define I845_PIPE_OFFSETS \
|
||||
@ -906,7 +906,7 @@ static const struct intel_device_info rkl_info = {
|
||||
static const struct intel_device_info dg1_info = {
|
||||
GEN12_FEATURES,
|
||||
DGFX_FEATURES,
|
||||
.graphics_rel = 10,
|
||||
.graphics.rel = 10,
|
||||
PLATFORM(INTEL_DG1),
|
||||
.pipe_mask = BIT(PIPE_A) | BIT(PIPE_B) | BIT(PIPE_C) | BIT(PIPE_D),
|
||||
.require_force_probe = 1,
|
||||
@ -998,8 +998,8 @@ static const struct intel_device_info adl_p_info = {
|
||||
I915_GTT_PAGE_SIZE_2M
|
||||
|
||||
#define XE_HP_FEATURES \
|
||||
.graphics_ver = 12, \
|
||||
.graphics_rel = 50, \
|
||||
.graphics.ver = 12, \
|
||||
.graphics.rel = 50, \
|
||||
XE_HP_PAGE_SIZES, \
|
||||
.dma_mask_size = 46, \
|
||||
.has_64bit_reloc = 1, \
|
||||
@ -1017,8 +1017,8 @@ static const struct intel_device_info adl_p_info = {
|
||||
.ppgtt_type = INTEL_PPGTT_FULL
|
||||
|
||||
#define XE_HPM_FEATURES \
|
||||
.media_ver = 12, \
|
||||
.media_rel = 50
|
||||
.media.ver = 12, \
|
||||
.media.rel = 50
|
||||
|
||||
__maybe_unused
|
||||
static const struct intel_device_info xehpsdv_info = {
|
||||
@ -1042,8 +1042,8 @@ static const struct intel_device_info dg2_info = {
|
||||
XE_HPM_FEATURES,
|
||||
XE_LPD_FEATURES,
|
||||
DGFX_FEATURES,
|
||||
.graphics_rel = 55,
|
||||
.media_rel = 55,
|
||||
.graphics.rel = 55,
|
||||
.media.rel = 55,
|
||||
PLATFORM(INTEL_DG2),
|
||||
.platform_engine_mask =
|
||||
BIT(RCS0) | BIT(BCS0) |
|
||||
|
@ -501,6 +501,18 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg)
|
||||
#define ECOBITS_PPGTT_CACHE64B (3 << 8)
|
||||
#define ECOBITS_PPGTT_CACHE4B (0 << 8)
|
||||
|
||||
#define GEN12_GAMCNTRL_CTRL _MMIO(0xcf54)
|
||||
#define INVALIDATION_BROADCAST_MODE_DIS REG_BIT(12)
|
||||
#define GLOBAL_INVALIDATION_MODE REG_BIT(2)
|
||||
|
||||
#define GEN12_GAMSTLB_CTRL _MMIO(0xcf4c)
|
||||
#define CONTROL_BLOCK_CLKGATE_DIS REG_BIT(12)
|
||||
#define EGRESS_BLOCK_CLKGATE_DIS REG_BIT(11)
|
||||
#define TAG_BLOCK_CLKGATE_DIS REG_BIT(7)
|
||||
|
||||
#define GEN12_MERT_MOD_CTRL _MMIO(0xcf28)
|
||||
#define FORCE_MISS_FTLB REG_BIT(3)
|
||||
|
||||
#define GAB_CTL _MMIO(0x24000)
|
||||
#define GAB_CTL_CONT_AFTER_PAGEFAULT (1 << 8)
|
||||
|
||||
@ -722,6 +734,9 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg)
|
||||
|
||||
#define GEN12_OA_TLB_INV_CR _MMIO(0xceec)
|
||||
|
||||
#define GEN12_SQCM _MMIO(0x8724)
|
||||
#define EN_32B_ACCESS REG_BIT(30)
|
||||
|
||||
/* Gen12 OAR unit */
|
||||
#define GEN12_OAR_OACONTROL _MMIO(0x2960)
|
||||
#define GEN12_OAR_OACONTROL_COUNTER_FORMAT_SHIFT 1
|
||||
@ -773,6 +788,9 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg)
|
||||
#define EU_PERF_CNTL5 _MMIO(0xe55c)
|
||||
#define EU_PERF_CNTL6 _MMIO(0xe65c)
|
||||
|
||||
#define RT_CTRL _MMIO(0xe530)
|
||||
#define DIS_NULL_QUERY REG_BIT(10)
|
||||
|
||||
/*
|
||||
* OA Boolean state
|
||||
*/
|
||||
@ -2665,6 +2683,8 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg)
|
||||
#define RING_WAIT (1 << 11) /* gen3+, PRBx_CTL */
|
||||
#define RING_WAIT_SEMAPHORE (1 << 10) /* gen6+ */
|
||||
|
||||
#define GUCPMTIMESTAMP _MMIO(0xC3E8)
|
||||
|
||||
/* There are 16 64-bit CS General Purpose Registers per-engine on Gen8+ */
|
||||
#define GEN8_RING_CS_GPR(base, n) _MMIO((base) + 0x600 + (n) * 8)
|
||||
#define GEN8_RING_CS_GPR_UDW(base, n) _MMIO((base) + 0x600 + (n) * 8 + 4)
|
||||
@ -2775,6 +2795,9 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg)
|
||||
#define VDBOX_CGCTL3F10(base) _MMIO((base) + 0x3f10)
|
||||
#define IECPUNIT_CLKGATE_DIS REG_BIT(22)
|
||||
|
||||
#define VDBOX_CGCTL3F18(base) _MMIO((base) + 0x3f18)
|
||||
#define ALNUNIT_CLKGATE_DIS REG_BIT(13)
|
||||
|
||||
#define ERROR_GEN6 _MMIO(0x40a0)
|
||||
#define GEN7_ERR_INT _MMIO(0x44040)
|
||||
#define ERR_INT_POISON (1 << 31)
|
||||
@ -2873,6 +2896,15 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg)
|
||||
#define GEN9_PREEMPT_GPGPU_SYNC_SWITCH_DISABLE (1 << 2)
|
||||
#define GEN11_ENABLE_32_PLANE_MODE (1 << 7)
|
||||
|
||||
#define SCCGCTL94DC _MMIO(0x94dc)
|
||||
#define CG3DDISURB REG_BIT(14)
|
||||
|
||||
#define MLTICTXCTL _MMIO(0xb170)
|
||||
#define TDONRENDER REG_BIT(2)
|
||||
|
||||
#define L3SQCREG1_CCS0 _MMIO(0xb200)
|
||||
#define FLUSHALLNONCOH REG_BIT(5)
|
||||
|
||||
/* WaClearTdlStateAckDirtyBits */
|
||||
#define GEN8_STATE_ACK _MMIO(0x20F0)
|
||||
#define GEN9_STATE_ACK_SLICE1 _MMIO(0x20F8)
|
||||
@ -3109,7 +3141,8 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg)
|
||||
#define GEN9_RCS_FE_FSM2 _MMIO(0x22a4)
|
||||
|
||||
#define GEN10_CACHE_MODE_SS _MMIO(0xe420)
|
||||
#define FLOAT_BLEND_OPTIMIZATION_ENABLE (1 << 4)
|
||||
#define ENABLE_PREFETCH_INTO_IC REG_BIT(3)
|
||||
#define FLOAT_BLEND_OPTIMIZATION_ENABLE REG_BIT(4)
|
||||
|
||||
/* Fuse readout registers for GT */
|
||||
#define HSW_PAVP_FUSE1 _MMIO(0x911C)
|
||||
@ -4288,21 +4321,62 @@ enum {
|
||||
/*
|
||||
* GEN10 clock gating regs
|
||||
*/
|
||||
|
||||
#define UNSLCGCTL9440 _MMIO(0x9440)
|
||||
#define GAMTLBOACS_CLKGATE_DIS REG_BIT(28)
|
||||
#define GAMTLBVDBOX5_CLKGATE_DIS REG_BIT(27)
|
||||
#define GAMTLBVDBOX6_CLKGATE_DIS REG_BIT(26)
|
||||
#define GAMTLBVDBOX3_CLKGATE_DIS REG_BIT(24)
|
||||
#define GAMTLBVDBOX4_CLKGATE_DIS REG_BIT(23)
|
||||
#define GAMTLBVDBOX7_CLKGATE_DIS REG_BIT(22)
|
||||
#define GAMTLBVDBOX2_CLKGATE_DIS REG_BIT(21)
|
||||
#define GAMTLBVDBOX0_CLKGATE_DIS REG_BIT(17)
|
||||
#define GAMTLBKCR_CLKGATE_DIS REG_BIT(16)
|
||||
#define GAMTLBGUC_CLKGATE_DIS REG_BIT(15)
|
||||
#define GAMTLBBLT_CLKGATE_DIS REG_BIT(14)
|
||||
#define GAMTLBVDBOX1_CLKGATE_DIS REG_BIT(6)
|
||||
|
||||
#define UNSLCGCTL9444 _MMIO(0x9444)
|
||||
#define GAMTLBGFXA0_CLKGATE_DIS REG_BIT(30)
|
||||
#define GAMTLBGFXA1_CLKGATE_DIS REG_BIT(29)
|
||||
#define GAMTLBCOMPA0_CLKGATE_DIS REG_BIT(28)
|
||||
#define GAMTLBCOMPA1_CLKGATE_DIS REG_BIT(27)
|
||||
#define GAMTLBCOMPB0_CLKGATE_DIS REG_BIT(26)
|
||||
#define GAMTLBCOMPB1_CLKGATE_DIS REG_BIT(25)
|
||||
#define GAMTLBCOMPC0_CLKGATE_DIS REG_BIT(24)
|
||||
#define GAMTLBCOMPC1_CLKGATE_DIS REG_BIT(23)
|
||||
#define GAMTLBCOMPD0_CLKGATE_DIS REG_BIT(22)
|
||||
#define GAMTLBCOMPD1_CLKGATE_DIS REG_BIT(21)
|
||||
#define GAMTLBMERT_CLKGATE_DIS REG_BIT(20)
|
||||
#define GAMTLBVEBOX3_CLKGATE_DIS REG_BIT(19)
|
||||
#define GAMTLBVEBOX2_CLKGATE_DIS REG_BIT(18)
|
||||
#define GAMTLBVEBOX1_CLKGATE_DIS REG_BIT(17)
|
||||
#define GAMTLBVEBOX0_CLKGATE_DIS REG_BIT(16)
|
||||
#define LTCDD_CLKGATE_DIS REG_BIT(10)
|
||||
|
||||
#define SLICE_UNIT_LEVEL_CLKGATE _MMIO(0x94d4)
|
||||
#define SARBUNIT_CLKGATE_DIS (1 << 5)
|
||||
#define RCCUNIT_CLKGATE_DIS (1 << 7)
|
||||
#define MSCUNIT_CLKGATE_DIS (1 << 10)
|
||||
#define NODEDSS_CLKGATE_DIS REG_BIT(12)
|
||||
#define L3_CLKGATE_DIS REG_BIT(16)
|
||||
#define L3_CR2X_CLKGATE_DIS REG_BIT(17)
|
||||
|
||||
#define SUBSLICE_UNIT_LEVEL_CLKGATE _MMIO(0x9524)
|
||||
#define GWUNIT_CLKGATE_DIS (1 << 16)
|
||||
#define DSS_ROUTER_CLKGATE_DIS REG_BIT(28)
|
||||
#define GWUNIT_CLKGATE_DIS REG_BIT(16)
|
||||
|
||||
#define SUBSLICE_UNIT_LEVEL_CLKGATE2 _MMIO(0x9528)
|
||||
#define CPSSUNIT_CLKGATE_DIS REG_BIT(9)
|
||||
|
||||
#define SSMCGCTL9530 _MMIO(0x9530)
|
||||
#define RTFUNIT_CLKGATE_DIS REG_BIT(18)
|
||||
|
||||
#define UNSLICE_UNIT_LEVEL_CLKGATE _MMIO(0x9434)
|
||||
#define VFUNIT_CLKGATE_DIS REG_BIT(20)
|
||||
#define TSGUNIT_CLKGATE_DIS REG_BIT(17) /* XEHPSDV */
|
||||
#define CG3DDISCFEG_CLKGATE_DIS REG_BIT(17) /* DG2 */
|
||||
#define GAMEDIA_CLKGATE_DIS REG_BIT(11)
|
||||
#define HSUNIT_CLKGATE_DIS REG_BIT(8)
|
||||
#define VSUNIT_CLKGATE_DIS REG_BIT(3)
|
||||
|
||||
@ -8367,6 +8441,9 @@ enum {
|
||||
#define GEN9_CTX_PREEMPT_REG _MMIO(0x2248)
|
||||
#define GEN12_DISABLE_POSH_BUSY_FF_DOP_CG REG_BIT(11)
|
||||
|
||||
#define GEN12_CS_DEBUG_MODE1_CCCSUNIT_BE_COMMON _MMIO(0x20EC)
|
||||
#define GEN12_REPLAY_MODE_GRANULARITY REG_BIT(0)
|
||||
|
||||
#define GEN8_CS_CHICKEN1 _MMIO(0x2580)
|
||||
#define GEN9_PREEMPT_3D_OBJECT_LEVEL (1 << 0)
|
||||
#define GEN9_PREEMPT_GPGPU_LEVEL(hi, lo) (((hi) << 2) | ((lo) << 1))
|
||||
@ -8390,9 +8467,10 @@ enum {
|
||||
#define GEN8_ERRDETBCTRL (1 << 9)
|
||||
|
||||
#define GEN11_COMMON_SLICE_CHICKEN3 _MMIO(0x7304)
|
||||
#define DG1_FLOAT_POINT_BLEND_OPT_STRICT_MODE_EN REG_BIT(12)
|
||||
#define GEN11_BLEND_EMB_FIX_DISABLE_IN_RCC REG_BIT(11)
|
||||
#define GEN12_DISABLE_CPS_AWARE_COLOR_PIPE REG_BIT(9)
|
||||
#define DG1_FLOAT_POINT_BLEND_OPT_STRICT_MODE_EN REG_BIT(12)
|
||||
#define XEHP_DUAL_SIMD8_SEQ_MERGE_DISABLE REG_BIT(12)
|
||||
#define GEN11_BLEND_EMB_FIX_DISABLE_IN_RCC REG_BIT(11)
|
||||
#define GEN12_DISABLE_CPS_AWARE_COLOR_PIPE REG_BIT(9)
|
||||
|
||||
#define HIZ_CHICKEN _MMIO(0x7018)
|
||||
# define CHV_HZ_8X8_MODE_IN_1X REG_BIT(15)
|
||||
@ -8446,6 +8524,12 @@ enum {
|
||||
#define GEN8_LQSC_FLUSH_COHERENT_LINES (1 << 21)
|
||||
#define GEN8_LQSQ_NONIA_COHERENT_ATOMICS_ENABLE REG_BIT(22)
|
||||
|
||||
#define GEN11_L3SQCREG5 _MMIO(0xb158)
|
||||
#define L3_PWM_TIMER_INIT_VAL_MASK REG_GENMASK(9, 0)
|
||||
|
||||
#define XEHP_L3SCQREG7 _MMIO(0xb188)
|
||||
#define BLEND_FILL_CACHING_OPT_DIS REG_BIT(3)
|
||||
|
||||
/* GEN8 chicken */
|
||||
#define HDC_CHICKEN0 _MMIO(0x7300)
|
||||
#define ICL_HDC_MODE _MMIO(0xE5F4)
|
||||
@ -8456,6 +8540,12 @@ enum {
|
||||
#define HDC_FORCE_NON_COHERENT (1 << 4)
|
||||
#define HDC_BARRIER_PERFORMANCE_DISABLE (1 << 10)
|
||||
|
||||
#define GEN12_HDC_CHICKEN0 _MMIO(0xE5F0)
|
||||
#define LSC_L1_FLUSH_CTL_3D_DATAPORT_FLUSH_EVENTS_MASK REG_GENMASK(13, 11)
|
||||
|
||||
#define SARB_CHICKEN1 _MMIO(0xe90c)
|
||||
#define COMP_CKN_IN REG_GENMASK(30, 29)
|
||||
|
||||
#define GEN8_HDC_CHICKEN1 _MMIO(0x7304)
|
||||
|
||||
/* GEN9 chicken */
|
||||
@ -8486,6 +8576,10 @@ enum {
|
||||
#define PIXEL_ROUNDING_TRUNC_FB_PASSTHRU (1 << 15)
|
||||
#define PER_PIXEL_ALPHA_BYPASS_EN (1 << 7)
|
||||
|
||||
#define VFLSKPD _MMIO(0x62a8)
|
||||
#define DIS_OVER_FETCH_CACHE REG_BIT(1)
|
||||
#define DIS_MULT_MISS_RD_SQUASH REG_BIT(0)
|
||||
|
||||
#define FF_MODE2 _MMIO(0x6604)
|
||||
#define FF_MODE2_GS_TIMER_MASK REG_GENMASK(31, 24)
|
||||
#define FF_MODE2_GS_TIMER_224 REG_FIELD_PREP(FF_MODE2_GS_TIMER_MASK, 224)
|
||||
@ -9309,6 +9403,9 @@ enum {
|
||||
#define GEN8_SDEUNIT_CLOCK_GATE_DISABLE (1 << 14)
|
||||
#define GEN8_HDCUNIT_CLOCK_GATE_DISABLE_HDCREQ (1 << 28)
|
||||
|
||||
#define UNSLCGCTL9430 _MMIO(0x9430)
|
||||
#define MSQDUNIT_CLKGATE_DIS REG_BIT(3)
|
||||
|
||||
#define GEN6_GFXPAUSE _MMIO(0xA000)
|
||||
#define GEN6_RPNSWREQ _MMIO(0xA008)
|
||||
#define GEN6_TURBO_DISABLE (1 << 31)
|
||||
@ -9624,24 +9721,39 @@ enum {
|
||||
#define GEN9_CCS_TLB_PREFETCH_ENABLE (1 << 3)
|
||||
|
||||
#define GEN8_ROW_CHICKEN _MMIO(0xe4f0)
|
||||
#define FLOW_CONTROL_ENABLE (1 << 15)
|
||||
#define PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE (1 << 8)
|
||||
#define STALL_DOP_GATING_DISABLE (1 << 5)
|
||||
#define THROTTLE_12_5 (7 << 2)
|
||||
#define DISABLE_EARLY_EOT (1 << 1)
|
||||
#define FLOW_CONTROL_ENABLE REG_BIT(15)
|
||||
#define UGM_BACKUP_MODE REG_BIT(13)
|
||||
#define MDQ_ARBITRATION_MODE REG_BIT(12)
|
||||
#define PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE REG_BIT(8)
|
||||
#define STALL_DOP_GATING_DISABLE REG_BIT(5)
|
||||
#define THROTTLE_12_5 REG_GENMASK(4, 2)
|
||||
#define DISABLE_EARLY_EOT REG_BIT(1)
|
||||
|
||||
#define GEN7_ROW_CHICKEN2 _MMIO(0xe4f4)
|
||||
#define GEN12_DISABLE_READ_SUPPRESSION REG_BIT(15)
|
||||
#define GEN12_DISABLE_EARLY_READ REG_BIT(14)
|
||||
#define GEN12_ENABLE_LARGE_GRF_MODE REG_BIT(12)
|
||||
#define GEN12_PUSH_CONST_DEREF_HOLD_DIS REG_BIT(8)
|
||||
|
||||
#define LSC_CHICKEN_BIT_0 _MMIO(0xe7c8)
|
||||
#define FORCE_1_SUB_MESSAGE_PER_FRAGMENT REG_BIT(15)
|
||||
#define LSC_CHICKEN_BIT_0_UDW _MMIO(0xe7c8 + 4)
|
||||
#define DIS_CHAIN_2XSIMD8 REG_BIT(55 - 32)
|
||||
#define FORCE_SLM_FENCE_SCOPE_TO_TILE REG_BIT(42 - 32)
|
||||
#define FORCE_UGM_FENCE_SCOPE_TO_TILE REG_BIT(41 - 32)
|
||||
#define MAXREQS_PER_BANK REG_GENMASK(39 - 32, 37 - 32)
|
||||
#define DISABLE_128B_EVICTION_COMMAND_UDW REG_BIT(36 - 32)
|
||||
|
||||
#define GEN7_ROW_CHICKEN2_GT2 _MMIO(0xf4f4)
|
||||
#define DOP_CLOCK_GATING_DISABLE (1 << 0)
|
||||
#define PUSH_CONSTANT_DEREF_DISABLE (1 << 8)
|
||||
#define GEN11_TDL_CLOCK_GATING_FIX_DISABLE (1 << 1)
|
||||
|
||||
#define GEN9_ROW_CHICKEN4 _MMIO(0xe48c)
|
||||
#define GEN12_DISABLE_TDL_PUSH REG_BIT(9)
|
||||
#define GEN11_DIS_PICK_2ND_EU REG_BIT(7)
|
||||
#define GEN9_ROW_CHICKEN4 _MMIO(0xe48c)
|
||||
#define GEN12_DISABLE_GRF_CLEAR REG_BIT(13)
|
||||
#define GEN12_DISABLE_TDL_PUSH REG_BIT(9)
|
||||
#define GEN11_DIS_PICK_2ND_EU REG_BIT(7)
|
||||
#define GEN12_DISABLE_HDR_PAST_PAYLOAD_HOLD_FIX REG_BIT(4)
|
||||
|
||||
#define HSW_ROW_CHICKEN3 _MMIO(0xe49c)
|
||||
#define HSW_ROW_CHICKEN3_L3_GLOBAL_ATOMICS_DISABLE (1 << 6)
|
||||
@ -9656,9 +9768,10 @@ enum {
|
||||
#define GEN8_SAMPLER_POWER_BYPASS_DIS (1 << 1)
|
||||
|
||||
#define GEN9_HALF_SLICE_CHICKEN7 _MMIO(0xe194)
|
||||
#define GEN9_SAMPLER_HASH_COMPRESSED_READ_ADDR (1 << 8)
|
||||
#define GEN9_ENABLE_YV12_BUGFIX (1 << 4)
|
||||
#define GEN9_ENABLE_GPGPU_PREEMPTION (1 << 2)
|
||||
#define DG2_DISABLE_ROUND_ENABLE_ALLOW_FOR_SSLA REG_BIT(15)
|
||||
#define GEN9_SAMPLER_HASH_COMPRESSED_READ_ADDR REG_BIT(8)
|
||||
#define GEN9_ENABLE_YV12_BUGFIX REG_BIT(4)
|
||||
#define GEN9_ENABLE_GPGPU_PREEMPTION REG_BIT(2)
|
||||
|
||||
/* Audio */
|
||||
#define G4X_AUD_VID_DID _MMIO(DISPLAY_MMIO_BASE(dev_priv) + 0x62020)
|
||||
@ -12484,11 +12597,19 @@ enum skl_power_gate {
|
||||
#define PMFLUSH_GAPL3UNBLOCK (1 << 21)
|
||||
#define PMFLUSHDONE_LNEBLK (1 << 22)
|
||||
|
||||
#define XEHP_L3NODEARBCFG _MMIO(0xb0b4)
|
||||
#define XEHP_LNESPARE REG_BIT(19)
|
||||
|
||||
#define GEN12_GLOBAL_MOCS(i) _MMIO(0x4000 + (i) * 4) /* Global MOCS regs */
|
||||
|
||||
#define GEN12_GSMBASE _MMIO(0x108100)
|
||||
#define GEN12_DSMBASE _MMIO(0x1080C0)
|
||||
|
||||
#define XEHP_CLOCK_GATE_DIS _MMIO(0x101014)
|
||||
#define SGSI_SIDECLK_DIS REG_BIT(17)
|
||||
#define SGGI_DIS REG_BIT(15)
|
||||
#define SGR_DIS REG_BIT(13)
|
||||
|
||||
/* gamt regs */
|
||||
#define GEN8_L3_LRA_1_GPGPU _MMIO(0x4dd4)
|
||||
#define GEN8_L3_LRA_1_GPGPU_DEFAULT_VALUE_BDW 0x67F1427F /* max/min for LRA1/2 */
|
||||
@ -12865,4 +12986,7 @@ enum skl_power_gate {
|
||||
#define CLKGATE_DIS_MISC _MMIO(0x46534)
|
||||
#define CLKGATE_DIS_MISC_DMASC_GATING_DIS REG_BIT(21)
|
||||
|
||||
#define SLICE_COMMON_ECO_CHICKEN1 _MMIO(0x731C)
|
||||
#define MSC_MSAA_REODER_BUF_BYPASS_DISABLE REG_BIT(14)
|
||||
|
||||
#endif /* _I915_REG_H_ */
|
||||
|
@ -113,6 +113,10 @@ static void i915_fence_release(struct dma_fence *fence)
|
||||
GEM_BUG_ON(rq->guc_prio != GUC_PRIO_INIT &&
|
||||
rq->guc_prio != GUC_PRIO_FINI);
|
||||
|
||||
i915_request_free_capture_list(fetch_and_zero(&rq->capture_list));
|
||||
if (i915_vma_snapshot_present(&rq->batch_snapshot))
|
||||
i915_vma_snapshot_put_onstack(&rq->batch_snapshot);
|
||||
|
||||
/*
|
||||
* The request is put onto a RCU freelist (i.e. the address
|
||||
* is immediately reused), mark the fences as being freed now.
|
||||
@ -186,19 +190,6 @@ void i915_request_notify_execute_cb_imm(struct i915_request *rq)
|
||||
__notify_execute_cb(rq, irq_work_imm);
|
||||
}
|
||||
|
||||
static void free_capture_list(struct i915_request *request)
|
||||
{
|
||||
struct i915_capture_list *capture;
|
||||
|
||||
capture = fetch_and_zero(&request->capture_list);
|
||||
while (capture) {
|
||||
struct i915_capture_list *next = capture->next;
|
||||
|
||||
kfree(capture);
|
||||
capture = next;
|
||||
}
|
||||
}
|
||||
|
||||
static void __i915_request_fill(struct i915_request *rq, u8 val)
|
||||
{
|
||||
void *vaddr = rq->ring->vaddr;
|
||||
@ -303,6 +294,37 @@ static void __rq_cancel_watchdog(struct i915_request *rq)
|
||||
i915_request_put(rq);
|
||||
}
|
||||
|
||||
#if IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR)
|
||||
|
||||
/**
|
||||
* i915_request_free_capture_list - Free a capture list
|
||||
* @capture: Pointer to the first list item or NULL
|
||||
*
|
||||
*/
|
||||
void i915_request_free_capture_list(struct i915_capture_list *capture)
|
||||
{
|
||||
while (capture) {
|
||||
struct i915_capture_list *next = capture->next;
|
||||
|
||||
i915_vma_snapshot_put(capture->vma_snapshot);
|
||||
capture = next;
|
||||
}
|
||||
}
|
||||
|
||||
#define assert_capture_list_is_null(_rq) GEM_BUG_ON((_rq)->capture_list)
|
||||
|
||||
#define clear_capture_list(_rq) ((_rq)->capture_list = NULL)
|
||||
|
||||
#else
|
||||
|
||||
#define i915_request_free_capture_list(_a) do {} while (0)
|
||||
|
||||
#define assert_capture_list_is_null(_a) do {} while (0)
|
||||
|
||||
#define clear_capture_list(_rq) do {} while (0)
|
||||
|
||||
#endif
|
||||
|
||||
bool i915_request_retire(struct i915_request *rq)
|
||||
{
|
||||
if (!__i915_request_is_complete(rq))
|
||||
@ -339,7 +361,7 @@ bool i915_request_retire(struct i915_request *rq)
|
||||
}
|
||||
|
||||
if (test_and_set_bit(I915_FENCE_FLAG_BOOST, &rq->fence.flags))
|
||||
atomic_dec(&rq->engine->gt->rps.num_waiters);
|
||||
intel_rps_dec_waiters(&rq->engine->gt->rps);
|
||||
|
||||
/*
|
||||
* We only loosely track inflight requests across preemption,
|
||||
@ -359,7 +381,6 @@ bool i915_request_retire(struct i915_request *rq)
|
||||
intel_context_exit(rq->context);
|
||||
intel_context_unpin(rq->context);
|
||||
|
||||
free_capture_list(rq);
|
||||
i915_sched_node_fini(&rq->sched);
|
||||
i915_request_put(rq);
|
||||
|
||||
@ -719,7 +740,7 @@ void i915_request_cancel(struct i915_request *rq, int error)
|
||||
intel_context_cancel_request(rq->context, rq);
|
||||
}
|
||||
|
||||
static int __i915_sw_fence_call
|
||||
static int
|
||||
submit_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state)
|
||||
{
|
||||
struct i915_request *request =
|
||||
@ -755,7 +776,7 @@ submit_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state)
|
||||
return NOTIFY_DONE;
|
||||
}
|
||||
|
||||
static int __i915_sw_fence_call
|
||||
static int
|
||||
semaphore_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state)
|
||||
{
|
||||
struct i915_request *rq = container_of(fence, typeof(*rq), semaphore);
|
||||
@ -829,11 +850,18 @@ static void __i915_request_ctor(void *arg)
|
||||
i915_sw_fence_init(&rq->submit, submit_notify);
|
||||
i915_sw_fence_init(&rq->semaphore, semaphore_notify);
|
||||
|
||||
rq->capture_list = NULL;
|
||||
clear_capture_list(rq);
|
||||
rq->batch_snapshot.present = false;
|
||||
|
||||
init_llist_head(&rq->execute_cb);
|
||||
}
|
||||
|
||||
#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
|
||||
#define clear_batch_ptr(_rq) ((_rq)->batch = NULL)
|
||||
#else
|
||||
#define clear_batch_ptr(_a) do {} while (0)
|
||||
#endif
|
||||
|
||||
struct i915_request *
|
||||
__i915_request_create(struct intel_context *ce, gfp_t gfp)
|
||||
{
|
||||
@ -925,10 +953,11 @@ __i915_request_create(struct intel_context *ce, gfp_t gfp)
|
||||
i915_sched_node_reinit(&rq->sched);
|
||||
|
||||
/* No zalloc, everything must be cleared after use */
|
||||
rq->batch = NULL;
|
||||
clear_batch_ptr(rq);
|
||||
__rq_init_watchdog(rq);
|
||||
GEM_BUG_ON(rq->capture_list);
|
||||
assert_capture_list_is_null(rq);
|
||||
GEM_BUG_ON(!llist_empty(&rq->execute_cb));
|
||||
GEM_BUG_ON(i915_vma_snapshot_present(&rq->batch_snapshot));
|
||||
|
||||
/*
|
||||
* Reserve space in the ring buffer for all the commands required to
|
||||
|
@ -40,6 +40,7 @@
|
||||
#include "i915_scheduler.h"
|
||||
#include "i915_selftest.h"
|
||||
#include "i915_sw_fence.h"
|
||||
#include "i915_vma_snapshot.h"
|
||||
|
||||
#include <uapi/drm/i915_drm.h>
|
||||
|
||||
@ -48,11 +49,17 @@ struct drm_i915_gem_object;
|
||||
struct drm_printer;
|
||||
struct i915_request;
|
||||
|
||||
#if IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR)
|
||||
struct i915_capture_list {
|
||||
struct i915_vma_snapshot *vma_snapshot;
|
||||
struct i915_capture_list *next;
|
||||
struct i915_vma *vma;
|
||||
};
|
||||
|
||||
void i915_request_free_capture_list(struct i915_capture_list *capture);
|
||||
#else
|
||||
#define i915_request_free_capture_list(_a) do {} while (0)
|
||||
#endif
|
||||
|
||||
#define RQ_TRACE(rq, fmt, ...) do { \
|
||||
const struct i915_request *rq__ = (rq); \
|
||||
ENGINE_TRACE(rq__->engine, "fence %llx:%lld, current %d " fmt, \
|
||||
@ -289,10 +296,12 @@ struct i915_request {
|
||||
/** Preallocate space in the ring for the emitting the request */
|
||||
u32 reserved_space;
|
||||
|
||||
/** Batch buffer related to this request if any (used for
|
||||
* error state dump only).
|
||||
*/
|
||||
struct i915_vma *batch;
|
||||
/** Batch buffer pointer for selftest internal use. */
|
||||
I915_SELFTEST_DECLARE(struct i915_vma *batch);
|
||||
|
||||
struct i915_vma_snapshot batch_snapshot;
|
||||
|
||||
#if IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR)
|
||||
/**
|
||||
* Additional buffers requested by userspace to be captured upon
|
||||
* a GPU hang. The vma/obj on this list are protected by their
|
||||
@ -300,6 +309,7 @@ struct i915_request {
|
||||
* on the active_list (of their final request).
|
||||
*/
|
||||
struct i915_capture_list *capture_list;
|
||||
#endif
|
||||
|
||||
/** Time at which this request was emitted, in jiffies. */
|
||||
unsigned long emitted_jiffies;
|
||||
|
@ -41,8 +41,32 @@ bool i915_sg_trim(struct sg_table *orig_st)
|
||||
return true;
|
||||
}
|
||||
|
||||
static void i915_refct_sgt_release(struct kref *ref)
|
||||
{
|
||||
struct i915_refct_sgt *rsgt =
|
||||
container_of(ref, typeof(*rsgt), kref);
|
||||
|
||||
sg_free_table(&rsgt->table);
|
||||
kfree(rsgt);
|
||||
}
|
||||
|
||||
static const struct i915_refct_sgt_ops rsgt_ops = {
|
||||
.release = i915_refct_sgt_release
|
||||
};
|
||||
|
||||
/**
|
||||
* i915_sg_from_mm_node - Create an sg_table from a struct drm_mm_node
|
||||
* i915_refct_sgt_init - Initialize a struct i915_refct_sgt with default ops
|
||||
* @rsgt: The struct i915_refct_sgt to initialize.
|
||||
* size: The size of the underlying memory buffer.
|
||||
*/
|
||||
void i915_refct_sgt_init(struct i915_refct_sgt *rsgt, size_t size)
|
||||
{
|
||||
__i915_refct_sgt_init(rsgt, size, &rsgt_ops);
|
||||
}
|
||||
|
||||
/**
|
||||
* i915_rsgt_from_mm_node - Create a refcounted sg_table from a struct
|
||||
* drm_mm_node
|
||||
* @node: The drm_mm_node.
|
||||
* @region_start: An offset to add to the dma addresses of the sg list.
|
||||
*
|
||||
@ -50,25 +74,28 @@ bool i915_sg_trim(struct sg_table *orig_st)
|
||||
* taking a maximum segment length into account, splitting into segments
|
||||
* if necessary.
|
||||
*
|
||||
* Return: A pointer to a kmalloced struct sg_table on success, negative
|
||||
* Return: A pointer to a kmalloced struct i915_refct_sgt on success, negative
|
||||
* error code cast to an error pointer on failure.
|
||||
*/
|
||||
struct sg_table *i915_sg_from_mm_node(const struct drm_mm_node *node,
|
||||
u64 region_start)
|
||||
struct i915_refct_sgt *i915_rsgt_from_mm_node(const struct drm_mm_node *node,
|
||||
u64 region_start)
|
||||
{
|
||||
const u64 max_segment = SZ_1G; /* Do we have a limit on this? */
|
||||
u64 segment_pages = max_segment >> PAGE_SHIFT;
|
||||
u64 block_size, offset, prev_end;
|
||||
struct i915_refct_sgt *rsgt;
|
||||
struct sg_table *st;
|
||||
struct scatterlist *sg;
|
||||
|
||||
st = kmalloc(sizeof(*st), GFP_KERNEL);
|
||||
if (!st)
|
||||
rsgt = kmalloc(sizeof(*rsgt), GFP_KERNEL);
|
||||
if (!rsgt)
|
||||
return ERR_PTR(-ENOMEM);
|
||||
|
||||
i915_refct_sgt_init(rsgt, node->size << PAGE_SHIFT);
|
||||
st = &rsgt->table;
|
||||
if (sg_alloc_table(st, DIV_ROUND_UP(node->size, segment_pages),
|
||||
GFP_KERNEL)) {
|
||||
kfree(st);
|
||||
i915_refct_sgt_put(rsgt);
|
||||
return ERR_PTR(-ENOMEM);
|
||||
}
|
||||
|
||||
@ -104,11 +131,11 @@ struct sg_table *i915_sg_from_mm_node(const struct drm_mm_node *node,
|
||||
sg_mark_end(sg);
|
||||
i915_sg_trim(st);
|
||||
|
||||
return st;
|
||||
return rsgt;
|
||||
}
|
||||
|
||||
/**
|
||||
* i915_sg_from_buddy_resource - Create an sg_table from a struct
|
||||
* i915_rsgt_from_buddy_resource - Create a refcounted sg_table from a struct
|
||||
* i915_buddy_block list
|
||||
* @res: The struct i915_ttm_buddy_resource.
|
||||
* @region_start: An offset to add to the dma addresses of the sg list.
|
||||
@ -117,11 +144,11 @@ struct sg_table *i915_sg_from_mm_node(const struct drm_mm_node *node,
|
||||
* taking a maximum segment length into account, splitting into segments
|
||||
* if necessary.
|
||||
*
|
||||
* Return: A pointer to a kmalloced struct sg_table on success, negative
|
||||
* Return: A pointer to a kmalloced struct i915_refct_sgts on success, negative
|
||||
* error code cast to an error pointer on failure.
|
||||
*/
|
||||
struct sg_table *i915_sg_from_buddy_resource(struct ttm_resource *res,
|
||||
u64 region_start)
|
||||
struct i915_refct_sgt *i915_rsgt_from_buddy_resource(struct ttm_resource *res,
|
||||
u64 region_start)
|
||||
{
|
||||
struct i915_ttm_buddy_resource *bman_res = to_ttm_buddy_resource(res);
|
||||
const u64 size = res->num_pages << PAGE_SHIFT;
|
||||
@ -129,18 +156,21 @@ struct sg_table *i915_sg_from_buddy_resource(struct ttm_resource *res,
|
||||
struct i915_buddy_mm *mm = bman_res->mm;
|
||||
struct list_head *blocks = &bman_res->blocks;
|
||||
struct i915_buddy_block *block;
|
||||
struct i915_refct_sgt *rsgt;
|
||||
struct scatterlist *sg;
|
||||
struct sg_table *st;
|
||||
resource_size_t prev_end;
|
||||
|
||||
GEM_BUG_ON(list_empty(blocks));
|
||||
|
||||
st = kmalloc(sizeof(*st), GFP_KERNEL);
|
||||
if (!st)
|
||||
rsgt = kmalloc(sizeof(*rsgt), GFP_KERNEL);
|
||||
if (!rsgt)
|
||||
return ERR_PTR(-ENOMEM);
|
||||
|
||||
i915_refct_sgt_init(rsgt, size);
|
||||
st = &rsgt->table;
|
||||
if (sg_alloc_table(st, res->num_pages, GFP_KERNEL)) {
|
||||
kfree(st);
|
||||
i915_refct_sgt_put(rsgt);
|
||||
return ERR_PTR(-ENOMEM);
|
||||
}
|
||||
|
||||
@ -181,7 +211,7 @@ struct sg_table *i915_sg_from_buddy_resource(struct ttm_resource *res,
|
||||
sg_mark_end(sg);
|
||||
i915_sg_trim(st);
|
||||
|
||||
return st;
|
||||
return rsgt;
|
||||
}
|
||||
|
||||
#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
|
||||
|
@ -144,10 +144,78 @@ static inline unsigned int i915_sg_segment_size(void)
|
||||
|
||||
bool i915_sg_trim(struct sg_table *orig_st);
|
||||
|
||||
struct sg_table *i915_sg_from_mm_node(const struct drm_mm_node *node,
|
||||
u64 region_start);
|
||||
/**
|
||||
* struct i915_refct_sgt_ops - Operations structure for struct i915_refct_sgt
|
||||
*/
|
||||
struct i915_refct_sgt_ops {
|
||||
/**
|
||||
* release() - Free the memory of the struct i915_refct_sgt
|
||||
* @ref: struct kref that is embedded in the struct i915_refct_sgt
|
||||
*/
|
||||
void (*release)(struct kref *ref);
|
||||
};
|
||||
|
||||
struct sg_table *i915_sg_from_buddy_resource(struct ttm_resource *res,
|
||||
u64 region_start);
|
||||
/**
|
||||
* struct i915_refct_sgt - A refcounted scatter-gather table
|
||||
* @kref: struct kref for refcounting
|
||||
* @table: struct sg_table holding the scatter-gather table itself. Note that
|
||||
* @table->sgl = NULL can be used to determine whether a scatter-gather table
|
||||
* is present or not.
|
||||
* @size: The size in bytes of the underlying memory buffer
|
||||
* @ops: The operations structure.
|
||||
*/
|
||||
struct i915_refct_sgt {
|
||||
struct kref kref;
|
||||
struct sg_table table;
|
||||
size_t size;
|
||||
const struct i915_refct_sgt_ops *ops;
|
||||
};
|
||||
|
||||
/**
|
||||
* i915_refct_sgt_put - Put a refcounted sg-table
|
||||
* @rsgt the struct i915_refct_sgt to put.
|
||||
*/
|
||||
static inline void i915_refct_sgt_put(struct i915_refct_sgt *rsgt)
|
||||
{
|
||||
if (rsgt)
|
||||
kref_put(&rsgt->kref, rsgt->ops->release);
|
||||
}
|
||||
|
||||
/**
|
||||
* i915_refct_sgt_get - Get a refcounted sg-table
|
||||
* @rsgt the struct i915_refct_sgt to get.
|
||||
*/
|
||||
static inline struct i915_refct_sgt *
|
||||
i915_refct_sgt_get(struct i915_refct_sgt *rsgt)
|
||||
{
|
||||
kref_get(&rsgt->kref);
|
||||
return rsgt;
|
||||
}
|
||||
|
||||
/**
|
||||
* __i915_refct_sgt_init - Initialize a refcounted sg-list with a custom
|
||||
* operations structure
|
||||
* @rsgt The struct i915_refct_sgt to initialize.
|
||||
* @size: Size in bytes of the underlying memory buffer.
|
||||
* @ops: A customized operations structure in case the refcounted sg-list
|
||||
* is embedded into another structure.
|
||||
*/
|
||||
static inline void __i915_refct_sgt_init(struct i915_refct_sgt *rsgt,
|
||||
size_t size,
|
||||
const struct i915_refct_sgt_ops *ops)
|
||||
{
|
||||
kref_init(&rsgt->kref);
|
||||
rsgt->table.sgl = NULL;
|
||||
rsgt->size = size;
|
||||
rsgt->ops = ops;
|
||||
}
|
||||
|
||||
void i915_refct_sgt_init(struct i915_refct_sgt *rsgt, size_t size);
|
||||
|
||||
struct i915_refct_sgt *i915_rsgt_from_mm_node(const struct drm_mm_node *node,
|
||||
u64 region_start);
|
||||
|
||||
struct i915_refct_sgt *i915_rsgt_from_buddy_resource(struct ttm_resource *res,
|
||||
u64 region_start);
|
||||
|
||||
#endif
|
||||
|
@ -18,7 +18,9 @@
|
||||
#define I915_SW_FENCE_BUG_ON(expr) BUILD_BUG_ON_INVALID(expr)
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_DRM_I915_SW_FENCE_CHECK_DAG
|
||||
static DEFINE_SPINLOCK(i915_sw_fence_lock);
|
||||
#endif
|
||||
|
||||
#define WQ_FLAG_BITS \
|
||||
BITS_PER_TYPE(typeof_member(struct wait_queue_entry, flags))
|
||||
@ -34,7 +36,7 @@ enum {
|
||||
|
||||
static void *i915_sw_fence_debug_hint(void *addr)
|
||||
{
|
||||
return (void *)(((struct i915_sw_fence *)addr)->flags & I915_SW_FENCE_MASK);
|
||||
return (void *)(((struct i915_sw_fence *)addr)->fn);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_DRM_I915_SW_FENCE_DEBUG_OBJECTS
|
||||
@ -126,10 +128,7 @@ static inline void debug_fence_assert(struct i915_sw_fence *fence)
|
||||
static int __i915_sw_fence_notify(struct i915_sw_fence *fence,
|
||||
enum i915_sw_fence_notify state)
|
||||
{
|
||||
i915_sw_fence_notify_t fn;
|
||||
|
||||
fn = (i915_sw_fence_notify_t)(fence->flags & I915_SW_FENCE_MASK);
|
||||
return fn(fence, state);
|
||||
return fence->fn(fence, state);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_DRM_I915_SW_FENCE_DEBUG_OBJECTS
|
||||
@ -242,10 +241,13 @@ void __i915_sw_fence_init(struct i915_sw_fence *fence,
|
||||
const char *name,
|
||||
struct lock_class_key *key)
|
||||
{
|
||||
BUG_ON(!fn || (unsigned long)fn & ~I915_SW_FENCE_MASK);
|
||||
BUG_ON(!fn);
|
||||
|
||||
__init_waitqueue_head(&fence->wait, name, key);
|
||||
fence->flags = (unsigned long)fn;
|
||||
fence->fn = fn;
|
||||
#ifdef CONFIG_DRM_I915_SW_FENCE_CHECK_DAG
|
||||
fence->flags = 0;
|
||||
#endif
|
||||
|
||||
i915_sw_fence_reinit(fence);
|
||||
}
|
||||
@ -257,7 +259,6 @@ void i915_sw_fence_reinit(struct i915_sw_fence *fence)
|
||||
atomic_set(&fence->pending, 1);
|
||||
fence->error = 0;
|
||||
|
||||
I915_SW_FENCE_BUG_ON(!fence->flags);
|
||||
I915_SW_FENCE_BUG_ON(!list_empty(&fence->wait.head));
|
||||
}
|
||||
|
||||
@ -279,6 +280,7 @@ static int i915_sw_fence_wake(wait_queue_entry_t *wq, unsigned mode, int flags,
|
||||
return 0;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_DRM_I915_SW_FENCE_CHECK_DAG
|
||||
static bool __i915_sw_fence_check_if_after(struct i915_sw_fence *fence,
|
||||
const struct i915_sw_fence * const signaler)
|
||||
{
|
||||
@ -322,9 +324,6 @@ static bool i915_sw_fence_check_if_after(struct i915_sw_fence *fence,
|
||||
unsigned long flags;
|
||||
bool err;
|
||||
|
||||
if (!IS_ENABLED(CONFIG_DRM_I915_SW_FENCE_CHECK_DAG))
|
||||
return false;
|
||||
|
||||
spin_lock_irqsave(&i915_sw_fence_lock, flags);
|
||||
err = __i915_sw_fence_check_if_after(fence, signaler);
|
||||
__i915_sw_fence_clear_checked_bit(fence);
|
||||
@ -332,6 +331,13 @@ static bool i915_sw_fence_check_if_after(struct i915_sw_fence *fence,
|
||||
|
||||
return err;
|
||||
}
|
||||
#else
|
||||
static bool i915_sw_fence_check_if_after(struct i915_sw_fence *fence,
|
||||
const struct i915_sw_fence * const signaler)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
#endif
|
||||
|
||||
static int __i915_sw_fence_await_sw_fence(struct i915_sw_fence *fence,
|
||||
struct i915_sw_fence *signaler,
|
||||
|
@ -17,17 +17,7 @@
|
||||
|
||||
struct completion;
|
||||
struct dma_resv;
|
||||
|
||||
struct i915_sw_fence {
|
||||
wait_queue_head_t wait;
|
||||
unsigned long flags;
|
||||
atomic_t pending;
|
||||
int error;
|
||||
};
|
||||
|
||||
#define I915_SW_FENCE_CHECKED_BIT 0 /* used internally for DAG checking */
|
||||
#define I915_SW_FENCE_PRIVATE_BIT 1 /* available for use by owner */
|
||||
#define I915_SW_FENCE_MASK (~3)
|
||||
struct i915_sw_fence;
|
||||
|
||||
enum i915_sw_fence_notify {
|
||||
FENCE_COMPLETE,
|
||||
@ -36,7 +26,18 @@ enum i915_sw_fence_notify {
|
||||
|
||||
typedef int (*i915_sw_fence_notify_t)(struct i915_sw_fence *,
|
||||
enum i915_sw_fence_notify state);
|
||||
#define __i915_sw_fence_call __aligned(4)
|
||||
|
||||
struct i915_sw_fence {
|
||||
wait_queue_head_t wait;
|
||||
i915_sw_fence_notify_t fn;
|
||||
#ifdef CONFIG_DRM_I915_SW_FENCE_CHECK_DAG
|
||||
unsigned long flags;
|
||||
#endif
|
||||
atomic_t pending;
|
||||
int error;
|
||||
};
|
||||
|
||||
#define I915_SW_FENCE_CHECKED_BIT 0 /* used internally for DAG checking */
|
||||
|
||||
void __i915_sw_fence_init(struct i915_sw_fence *fence,
|
||||
i915_sw_fence_notify_t fn,
|
||||
|
@ -23,7 +23,7 @@ static void fence_work(struct work_struct *work)
|
||||
dma_fence_put(&f->dma);
|
||||
}
|
||||
|
||||
static int __i915_sw_fence_call
|
||||
static int
|
||||
fence_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state)
|
||||
{
|
||||
struct dma_fence_work *f = container_of(fence, typeof(*f), chain);
|
||||
|
@ -279,7 +279,7 @@ static ssize_t gt_boost_freq_mhz_show(struct device *kdev, struct device_attribu
|
||||
struct drm_i915_private *i915 = kdev_minor_to_i915(kdev);
|
||||
struct intel_rps *rps = &i915->gt.rps;
|
||||
|
||||
return sysfs_emit(buf, "%d\n", intel_gpu_freq(rps, rps->boost_freq));
|
||||
return sysfs_emit(buf, "%d\n", intel_rps_get_boost_frequency(rps));
|
||||
}
|
||||
|
||||
static ssize_t gt_boost_freq_mhz_store(struct device *kdev,
|
||||
@ -288,7 +288,6 @@ static ssize_t gt_boost_freq_mhz_store(struct device *kdev,
|
||||
{
|
||||
struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev);
|
||||
struct intel_rps *rps = &dev_priv->gt.rps;
|
||||
bool boost = false;
|
||||
ssize_t ret;
|
||||
u32 val;
|
||||
|
||||
@ -296,21 +295,9 @@ static ssize_t gt_boost_freq_mhz_store(struct device *kdev,
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
/* Validate against (static) hardware limits */
|
||||
val = intel_freq_opcode(rps, val);
|
||||
if (val < rps->min_freq || val > rps->max_freq)
|
||||
return -EINVAL;
|
||||
ret = intel_rps_set_boost_frequency(rps, val);
|
||||
|
||||
mutex_lock(&rps->lock);
|
||||
if (val != rps->boost_freq) {
|
||||
rps->boost_freq = val;
|
||||
boost = atomic_read(&rps->num_waiters);
|
||||
}
|
||||
mutex_unlock(&rps->lock);
|
||||
if (boost)
|
||||
schedule_work(&rps->work);
|
||||
|
||||
return count;
|
||||
return ret ?: count;
|
||||
}
|
||||
|
||||
static ssize_t vlv_rpe_freq_mhz_show(struct device *kdev,
|
||||
|
@ -40,12 +40,12 @@
|
||||
|
||||
static struct kmem_cache *slab_vmas;
|
||||
|
||||
struct i915_vma *i915_vma_alloc(void)
|
||||
static struct i915_vma *i915_vma_alloc(void)
|
||||
{
|
||||
return kmem_cache_zalloc(slab_vmas, GFP_KERNEL);
|
||||
}
|
||||
|
||||
void i915_vma_free(struct i915_vma *vma)
|
||||
static void i915_vma_free(struct i915_vma *vma)
|
||||
{
|
||||
return kmem_cache_free(slab_vmas, vma);
|
||||
}
|
||||
@ -113,7 +113,6 @@ vma_create(struct drm_i915_gem_object *obj,
|
||||
vma->vm = i915_vm_get(vm);
|
||||
vma->ops = &vm->vma_ops;
|
||||
vma->obj = obj;
|
||||
vma->resv = obj->base.resv;
|
||||
vma->size = obj->base.size;
|
||||
vma->display_alignment = I915_GTT_MIN_ALIGNMENT;
|
||||
|
||||
@ -346,7 +345,7 @@ int i915_vma_wait_for_bind(struct i915_vma *vma)
|
||||
fence = dma_fence_get_rcu_safe(&vma->active.excl.fence);
|
||||
rcu_read_unlock();
|
||||
if (fence) {
|
||||
err = dma_fence_wait(fence, MAX_SCHEDULE_TIMEOUT);
|
||||
err = dma_fence_wait(fence, true);
|
||||
dma_fence_put(fence);
|
||||
}
|
||||
}
|
||||
@ -354,6 +353,32 @@ int i915_vma_wait_for_bind(struct i915_vma *vma)
|
||||
return err;
|
||||
}
|
||||
|
||||
#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)
|
||||
static int i915_vma_verify_bind_complete(struct i915_vma *vma)
|
||||
{
|
||||
int err = 0;
|
||||
|
||||
if (i915_active_has_exclusive(&vma->active)) {
|
||||
struct dma_fence *fence =
|
||||
i915_active_fence_get(&vma->active.excl);
|
||||
|
||||
if (!fence)
|
||||
return 0;
|
||||
|
||||
if (dma_fence_is_signaled(fence))
|
||||
err = fence->error;
|
||||
else
|
||||
err = -EBUSY;
|
||||
|
||||
dma_fence_put(fence);
|
||||
}
|
||||
|
||||
return err;
|
||||
}
|
||||
#else
|
||||
#define i915_vma_verify_bind_complete(_vma) 0
|
||||
#endif
|
||||
|
||||
/**
|
||||
* i915_vma_bind - Sets up PTEs for an VMA in it's corresponding address space.
|
||||
* @vma: VMA to map
|
||||
@ -423,11 +448,16 @@ int i915_vma_bind(struct i915_vma *vma,
|
||||
|
||||
work->base.dma.error = 0; /* enable the queue_work() */
|
||||
|
||||
if (vma->obj) {
|
||||
__i915_gem_object_pin_pages(vma->obj);
|
||||
work->pinned = i915_gem_object_get(vma->obj);
|
||||
}
|
||||
__i915_gem_object_pin_pages(vma->obj);
|
||||
work->pinned = i915_gem_object_get(vma->obj);
|
||||
} else {
|
||||
if (vma->obj) {
|
||||
int ret;
|
||||
|
||||
ret = i915_gem_object_wait_moving_fence(vma->obj, true);
|
||||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
vma->ops->bind_vma(vma->vm, NULL, vma, cache_level, bind_flags);
|
||||
}
|
||||
|
||||
@ -449,6 +479,7 @@ void __iomem *i915_vma_pin_iomap(struct i915_vma *vma)
|
||||
|
||||
GEM_BUG_ON(!i915_vma_is_ggtt(vma));
|
||||
GEM_BUG_ON(!i915_vma_is_bound(vma, I915_VMA_GLOBAL_BIND));
|
||||
GEM_BUG_ON(i915_vma_verify_bind_complete(vma));
|
||||
|
||||
ptr = READ_ONCE(vma->iomap);
|
||||
if (ptr == NULL) {
|
||||
@ -667,7 +698,7 @@ i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
|
||||
}
|
||||
|
||||
color = 0;
|
||||
if (vma->obj && i915_vm_has_cache_coloring(vma->vm))
|
||||
if (i915_vm_has_cache_coloring(vma->vm))
|
||||
color = vma->obj->cache_level;
|
||||
|
||||
if (flags & PIN_OFFSET_FIXED) {
|
||||
@ -792,17 +823,14 @@ static bool try_qad_pin(struct i915_vma *vma, unsigned int flags)
|
||||
static int vma_get_pages(struct i915_vma *vma)
|
||||
{
|
||||
int err = 0;
|
||||
bool pinned_pages = false;
|
||||
bool pinned_pages = true;
|
||||
|
||||
if (atomic_add_unless(&vma->pages_count, 1, 0))
|
||||
return 0;
|
||||
|
||||
if (vma->obj) {
|
||||
err = i915_gem_object_pin_pages(vma->obj);
|
||||
if (err)
|
||||
return err;
|
||||
pinned_pages = true;
|
||||
}
|
||||
err = i915_gem_object_pin_pages(vma->obj);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
/* Allocations ahoy! */
|
||||
if (mutex_lock_interruptible(&vma->pages_mutex)) {
|
||||
@ -835,8 +863,8 @@ static void __vma_put_pages(struct i915_vma *vma, unsigned int count)
|
||||
if (atomic_sub_return(count, &vma->pages_count) == 0) {
|
||||
vma->ops->clear_pages(vma);
|
||||
GEM_BUG_ON(vma->pages);
|
||||
if (vma->obj)
|
||||
i915_gem_object_unpin_pages(vma->obj);
|
||||
|
||||
i915_gem_object_unpin_pages(vma->obj);
|
||||
}
|
||||
mutex_unlock(&vma->pages_mutex);
|
||||
}
|
||||
@ -867,12 +895,13 @@ int i915_vma_pin_ww(struct i915_vma *vma, struct i915_gem_ww_ctx *ww,
|
||||
u64 size, u64 alignment, u64 flags)
|
||||
{
|
||||
struct i915_vma_work *work = NULL;
|
||||
struct dma_fence *moving = NULL;
|
||||
intel_wakeref_t wakeref = 0;
|
||||
unsigned int bound;
|
||||
int err;
|
||||
|
||||
#ifdef CONFIG_PROVE_LOCKING
|
||||
if (debug_locks && !WARN_ON(!ww) && vma->resv)
|
||||
if (debug_locks && !WARN_ON(!ww))
|
||||
assert_vma_held(vma);
|
||||
#endif
|
||||
|
||||
@ -892,7 +921,8 @@ int i915_vma_pin_ww(struct i915_vma *vma, struct i915_gem_ww_ctx *ww,
|
||||
if (flags & PIN_GLOBAL)
|
||||
wakeref = intel_runtime_pm_get(&vma->vm->i915->runtime_pm);
|
||||
|
||||
if (flags & vma->vm->bind_async_flags) {
|
||||
moving = vma->obj ? i915_gem_object_get_moving_fence(vma->obj) : NULL;
|
||||
if (flags & vma->vm->bind_async_flags || moving) {
|
||||
/* lock VM */
|
||||
err = i915_vm_lock_objects(vma->vm, ww);
|
||||
if (err)
|
||||
@ -906,6 +936,8 @@ int i915_vma_pin_ww(struct i915_vma *vma, struct i915_gem_ww_ctx *ww,
|
||||
|
||||
work->vm = i915_vm_get(vma->vm);
|
||||
|
||||
dma_fence_work_chain(&work->base, moving);
|
||||
|
||||
/* Allocate enough page directories to used PTE */
|
||||
if (vma->vm->allocate_va_range) {
|
||||
err = i915_vm_alloc_pt_stash(vma->vm,
|
||||
@ -980,7 +1012,7 @@ int i915_vma_pin_ww(struct i915_vma *vma, struct i915_gem_ww_ctx *ww,
|
||||
|
||||
GEM_BUG_ON(!vma->pages);
|
||||
err = i915_vma_bind(vma,
|
||||
vma->obj ? vma->obj->cache_level : 0,
|
||||
vma->obj->cache_level,
|
||||
flags, work);
|
||||
if (err)
|
||||
goto err_remove;
|
||||
@ -1010,7 +1042,10 @@ int i915_vma_pin_ww(struct i915_vma *vma, struct i915_gem_ww_ctx *ww,
|
||||
err_rpm:
|
||||
if (wakeref)
|
||||
intel_runtime_pm_put(&vma->vm->i915->runtime_pm, wakeref);
|
||||
if (moving)
|
||||
dma_fence_put(moving);
|
||||
vma_put_pages(vma);
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
@ -1034,7 +1069,7 @@ int i915_ggtt_pin(struct i915_vma *vma, struct i915_gem_ww_ctx *ww,
|
||||
GEM_BUG_ON(!i915_vma_is_ggtt(vma));
|
||||
|
||||
#ifdef CONFIG_LOCKDEP
|
||||
WARN_ON(!ww && vma->resv && dma_resv_held(vma->resv));
|
||||
WARN_ON(!ww && dma_resv_held(vma->obj->base.resv));
|
||||
#endif
|
||||
|
||||
do {
|
||||
@ -1113,6 +1148,7 @@ void i915_vma_reopen(struct i915_vma *vma)
|
||||
void i915_vma_release(struct kref *ref)
|
||||
{
|
||||
struct i915_vma *vma = container_of(ref, typeof(*vma), ref);
|
||||
struct drm_i915_gem_object *obj = vma->obj;
|
||||
|
||||
if (drm_mm_node_allocated(&vma->node)) {
|
||||
mutex_lock(&vma->vm->mutex);
|
||||
@ -1123,15 +1159,11 @@ void i915_vma_release(struct kref *ref)
|
||||
}
|
||||
GEM_BUG_ON(i915_vma_is_active(vma));
|
||||
|
||||
if (vma->obj) {
|
||||
struct drm_i915_gem_object *obj = vma->obj;
|
||||
|
||||
spin_lock(&obj->vma.lock);
|
||||
list_del(&vma->obj_link);
|
||||
if (!RB_EMPTY_NODE(&vma->obj_node))
|
||||
rb_erase(&vma->obj_node, &obj->vma.tree);
|
||||
spin_unlock(&obj->vma.lock);
|
||||
}
|
||||
spin_lock(&obj->vma.lock);
|
||||
list_del(&vma->obj_link);
|
||||
if (!RB_EMPTY_NODE(&vma->obj_node))
|
||||
rb_erase(&vma->obj_node, &obj->vma.tree);
|
||||
spin_unlock(&obj->vma.lock);
|
||||
|
||||
__i915_vma_remove_closed(vma);
|
||||
i915_vm_put(vma->vm);
|
||||
@ -1256,19 +1288,19 @@ int _i915_vma_move_to_active(struct i915_vma *vma,
|
||||
}
|
||||
|
||||
if (fence) {
|
||||
dma_resv_add_excl_fence(vma->resv, fence);
|
||||
dma_resv_add_excl_fence(vma->obj->base.resv, fence);
|
||||
obj->write_domain = I915_GEM_DOMAIN_RENDER;
|
||||
obj->read_domains = 0;
|
||||
}
|
||||
} else {
|
||||
if (!(flags & __EXEC_OBJECT_NO_RESERVE)) {
|
||||
err = dma_resv_reserve_shared(vma->resv, 1);
|
||||
err = dma_resv_reserve_shared(vma->obj->base.resv, 1);
|
||||
if (unlikely(err))
|
||||
return err;
|
||||
}
|
||||
|
||||
if (fence) {
|
||||
dma_resv_add_shared_fence(vma->resv, fence);
|
||||
dma_resv_add_shared_fence(vma->obj->base.resv, fence);
|
||||
obj->write_domain = 0;
|
||||
}
|
||||
}
|
||||
|
@ -234,16 +234,16 @@ static inline void __i915_vma_put(struct i915_vma *vma)
|
||||
kref_put(&vma->ref, i915_vma_release);
|
||||
}
|
||||
|
||||
#define assert_vma_held(vma) dma_resv_assert_held((vma)->resv)
|
||||
#define assert_vma_held(vma) dma_resv_assert_held((vma)->obj->base.resv)
|
||||
|
||||
static inline void i915_vma_lock(struct i915_vma *vma)
|
||||
{
|
||||
dma_resv_lock(vma->resv, NULL);
|
||||
dma_resv_lock(vma->obj->base.resv, NULL);
|
||||
}
|
||||
|
||||
static inline void i915_vma_unlock(struct i915_vma *vma)
|
||||
{
|
||||
dma_resv_unlock(vma->resv);
|
||||
dma_resv_unlock(vma->obj->base.resv);
|
||||
}
|
||||
|
||||
int __must_check
|
||||
@ -418,9 +418,6 @@ static inline void i915_vma_clear_scanout(struct i915_vma *vma)
|
||||
list_for_each_entry(V, &(OBJ)->vma.list, obj_link) \
|
||||
for_each_until(!i915_vma_is_ggtt(V))
|
||||
|
||||
struct i915_vma *i915_vma_alloc(void);
|
||||
void i915_vma_free(struct i915_vma *vma);
|
||||
|
||||
struct i915_vma *i915_vma_make_unshrinkable(struct i915_vma *vma);
|
||||
void i915_vma_make_shrinkable(struct i915_vma *vma);
|
||||
void i915_vma_make_purgeable(struct i915_vma *vma);
|
||||
|
134
drivers/gpu/drm/i915/i915_vma_snapshot.c
Normal file
134
drivers/gpu/drm/i915/i915_vma_snapshot.c
Normal file
@ -0,0 +1,134 @@
|
||||
// SPDX-License-Identifier: MIT
|
||||
/*
|
||||
* Copyright © 2021 Intel Corporation
|
||||
*/
|
||||
|
||||
#include "i915_vma_snapshot.h"
|
||||
#include "i915_vma_types.h"
|
||||
#include "i915_vma.h"
|
||||
|
||||
/**
|
||||
* i915_vma_snapshot_init - Initialize a struct i915_vma_snapshot from
|
||||
* a struct i915_vma.
|
||||
* @vsnap: The i915_vma_snapshot to init.
|
||||
* @vma: A struct i915_vma used to initialize @vsnap.
|
||||
* @name: Name associated with the snapshot. The character pointer needs to
|
||||
* stay alive over the lifitime of the shapsot
|
||||
*/
|
||||
void i915_vma_snapshot_init(struct i915_vma_snapshot *vsnap,
|
||||
struct i915_vma *vma,
|
||||
const char *name)
|
||||
{
|
||||
if (!i915_vma_is_pinned(vma))
|
||||
assert_object_held(vma->obj);
|
||||
|
||||
vsnap->name = name;
|
||||
vsnap->size = vma->size;
|
||||
vsnap->obj_size = vma->obj->base.size;
|
||||
vsnap->gtt_offset = vma->node.start;
|
||||
vsnap->gtt_size = vma->node.size;
|
||||
vsnap->page_sizes = vma->page_sizes.gtt;
|
||||
vsnap->pages = vma->pages;
|
||||
vsnap->pages_rsgt = NULL;
|
||||
vsnap->mr = NULL;
|
||||
if (vma->obj->mm.rsgt)
|
||||
vsnap->pages_rsgt = i915_refct_sgt_get(vma->obj->mm.rsgt);
|
||||
vsnap->mr = vma->obj->mm.region;
|
||||
kref_init(&vsnap->kref);
|
||||
vsnap->vma_resource = &vma->active;
|
||||
vsnap->onstack = false;
|
||||
vsnap->present = true;
|
||||
}
|
||||
|
||||
/**
|
||||
* i915_vma_snapshot_init_onstack - Initialize a struct i915_vma_snapshot from
|
||||
* a struct i915_vma, but avoid kfreeing it on last put.
|
||||
* @vsnap: The i915_vma_snapshot to init.
|
||||
* @vma: A struct i915_vma used to initialize @vsnap.
|
||||
* @name: Name associated with the snapshot. The character pointer needs to
|
||||
* stay alive over the lifitime of the shapsot
|
||||
*/
|
||||
void i915_vma_snapshot_init_onstack(struct i915_vma_snapshot *vsnap,
|
||||
struct i915_vma *vma,
|
||||
const char *name)
|
||||
{
|
||||
i915_vma_snapshot_init(vsnap, vma, name);
|
||||
vsnap->onstack = true;
|
||||
}
|
||||
|
||||
static void vma_snapshot_release(struct kref *ref)
|
||||
{
|
||||
struct i915_vma_snapshot *vsnap =
|
||||
container_of(ref, typeof(*vsnap), kref);
|
||||
|
||||
vsnap->present = false;
|
||||
if (vsnap->pages_rsgt)
|
||||
i915_refct_sgt_put(vsnap->pages_rsgt);
|
||||
if (!vsnap->onstack)
|
||||
kfree(vsnap);
|
||||
}
|
||||
|
||||
/**
|
||||
* i915_vma_snapshot_put - Put an i915_vma_snapshot pointer reference
|
||||
* @vsnap: The pointer reference
|
||||
*/
|
||||
void i915_vma_snapshot_put(struct i915_vma_snapshot *vsnap)
|
||||
{
|
||||
kref_put(&vsnap->kref, vma_snapshot_release);
|
||||
}
|
||||
|
||||
/**
|
||||
* i915_vma_snapshot_put_onstack - Put an onstcak i915_vma_snapshot pointer
|
||||
* reference and varify that the structure is released
|
||||
* @vsnap: The pointer reference
|
||||
*
|
||||
* This function is intended to be paired with a i915_vma_init_onstack()
|
||||
* and should be called before exiting the scope that declared or
|
||||
* freeing the structure that embedded @vsnap to verify that all references
|
||||
* have been released.
|
||||
*/
|
||||
void i915_vma_snapshot_put_onstack(struct i915_vma_snapshot *vsnap)
|
||||
{
|
||||
if (!kref_put(&vsnap->kref, vma_snapshot_release))
|
||||
GEM_BUG_ON(1);
|
||||
}
|
||||
|
||||
/**
|
||||
* i915_vma_snapshot_resource_pin - Temporarily block the memory the
|
||||
* vma snapshot is pointing to from being released.
|
||||
* @vsnap: The vma snapshot.
|
||||
* @lockdep_cookie: Pointer to bool needed for lockdep support. This needs
|
||||
* to be passed to the paired i915_vma_snapshot_resource_unpin.
|
||||
*
|
||||
* This function will temporarily try to hold up a fence or similar structure
|
||||
* and will therefore enter a fence signaling critical section.
|
||||
*
|
||||
* Return: true if we succeeded in blocking the memory from being released,
|
||||
* false otherwise.
|
||||
*/
|
||||
bool i915_vma_snapshot_resource_pin(struct i915_vma_snapshot *vsnap,
|
||||
bool *lockdep_cookie)
|
||||
{
|
||||
bool pinned = i915_active_acquire_if_busy(vsnap->vma_resource);
|
||||
|
||||
if (pinned)
|
||||
*lockdep_cookie = dma_fence_begin_signalling();
|
||||
|
||||
return pinned;
|
||||
}
|
||||
|
||||
/**
|
||||
* i915_vma_snapshot_resource_unpin - Unblock vma snapshot memory from
|
||||
* being released.
|
||||
* @vsnap: The vma snapshot.
|
||||
* @lockdep_cookie: Cookie returned from matching i915_vma_resource_pin().
|
||||
*
|
||||
* Might leave a fence signalling critical section and signal a fence.
|
||||
*/
|
||||
void i915_vma_snapshot_resource_unpin(struct i915_vma_snapshot *vsnap,
|
||||
bool lockdep_cookie)
|
||||
{
|
||||
dma_fence_end_signalling(lockdep_cookie);
|
||||
|
||||
return i915_active_release(vsnap->vma_resource);
|
||||
}
|
112
drivers/gpu/drm/i915/i915_vma_snapshot.h
Normal file
112
drivers/gpu/drm/i915/i915_vma_snapshot.h
Normal file
@ -0,0 +1,112 @@
|
||||
/* SPDX-License-Identifier: MIT */
|
||||
/*
|
||||
* Copyright © 2021 Intel Corporation
|
||||
*/
|
||||
#ifndef _I915_VMA_SNAPSHOT_H_
|
||||
#define _I915_VMA_SNAPSHOT_H_
|
||||
|
||||
#include <linux/kref.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/types.h>
|
||||
|
||||
struct i915_active;
|
||||
struct i915_refct_sgt;
|
||||
struct i915_vma;
|
||||
struct intel_memory_region;
|
||||
struct sg_table;
|
||||
|
||||
/**
|
||||
* DOC: Simple utilities for snapshotting GPU vma metadata, later used for
|
||||
* error capture. Vi use a separate header for this to avoid issues due to
|
||||
* recursive header includes.
|
||||
*/
|
||||
|
||||
/**
|
||||
* struct i915_vma_snapshot - Snapshot of vma metadata.
|
||||
* @size: The vma size in bytes.
|
||||
* @obj_size: The size of the underlying object in bytes.
|
||||
* @gtt_offset: The gtt offset the vma is bound to.
|
||||
* @gtt_size: The size in bytes allocated for the vma in the GTT.
|
||||
* @pages: The struct sg_table pointing to the pages bound.
|
||||
* @pages_rsgt: The refcounted sg_table holding the reference for @pages if any.
|
||||
* @mr: The memory region pointed for the pages bound.
|
||||
* @kref: Reference for this structure.
|
||||
* @vma_resource: FIXME: A means to keep the unbind fence from signaling.
|
||||
* Temporarily while we have only sync unbinds, and still use the vma
|
||||
* active, we use that. With async unbinding we need a signaling refcount
|
||||
* for the unbind fence.
|
||||
* @page_sizes: The vma GTT page sizes information.
|
||||
* @onstack: Whether the structure shouldn't be freed on final put.
|
||||
* @present: Whether the structure is present and initialized.
|
||||
*/
|
||||
struct i915_vma_snapshot {
|
||||
const char *name;
|
||||
size_t size;
|
||||
size_t obj_size;
|
||||
size_t gtt_offset;
|
||||
size_t gtt_size;
|
||||
struct sg_table *pages;
|
||||
struct i915_refct_sgt *pages_rsgt;
|
||||
struct intel_memory_region *mr;
|
||||
struct kref kref;
|
||||
struct i915_active *vma_resource;
|
||||
u32 page_sizes;
|
||||
bool onstack:1;
|
||||
bool present:1;
|
||||
};
|
||||
|
||||
void i915_vma_snapshot_init(struct i915_vma_snapshot *vsnap,
|
||||
struct i915_vma *vma,
|
||||
const char *name);
|
||||
|
||||
void i915_vma_snapshot_init_onstack(struct i915_vma_snapshot *vsnap,
|
||||
struct i915_vma *vma,
|
||||
const char *name);
|
||||
|
||||
void i915_vma_snapshot_put(struct i915_vma_snapshot *vsnap);
|
||||
|
||||
void i915_vma_snapshot_put_onstack(struct i915_vma_snapshot *vsnap);
|
||||
|
||||
bool i915_vma_snapshot_resource_pin(struct i915_vma_snapshot *vsnap,
|
||||
bool *lockdep_cookie);
|
||||
|
||||
void i915_vma_snapshot_resource_unpin(struct i915_vma_snapshot *vsnap,
|
||||
bool lockdep_cookie);
|
||||
|
||||
/**
|
||||
* i915_vma_snapshot_alloc - Allocate a struct i915_vma_snapshot
|
||||
* @gfp: Allocation mode.
|
||||
*
|
||||
* Return: A pointer to a struct i915_vma_snapshot if successful.
|
||||
* NULL otherwise.
|
||||
*/
|
||||
static inline struct i915_vma_snapshot *i915_vma_snapshot_alloc(gfp_t gfp)
|
||||
{
|
||||
return kmalloc(sizeof(struct i915_vma_snapshot), gfp);
|
||||
}
|
||||
|
||||
/**
|
||||
* i915_vma_snapshot_get - Take a reference on a struct i915_vma_snapshot
|
||||
*
|
||||
* Return: A pointer to a struct i915_vma_snapshot.
|
||||
*/
|
||||
static inline struct i915_vma_snapshot *
|
||||
i915_vma_snapshot_get(struct i915_vma_snapshot *vsnap)
|
||||
{
|
||||
kref_get(&vsnap->kref);
|
||||
return vsnap;
|
||||
}
|
||||
|
||||
/**
|
||||
* i915_vma_snapshot_present - Whether a struct i915_vma_snapshot is
|
||||
* present and initialized.
|
||||
*
|
||||
* Return: true if present and initialized; false otherwise.
|
||||
*/
|
||||
static inline bool
|
||||
i915_vma_snapshot_present(const struct i915_vma_snapshot *vsnap)
|
||||
{
|
||||
return vsnap && vsnap->present;
|
||||
}
|
||||
|
||||
#endif
|
@ -187,7 +187,6 @@ struct i915_vma {
|
||||
const struct i915_vma_ops *ops;
|
||||
|
||||
struct drm_i915_gem_object *obj;
|
||||
struct dma_resv *resv; /** Alias of obj->resv */
|
||||
|
||||
struct sg_table *pages;
|
||||
void __iomem *iomap;
|
||||
|
@ -83,33 +83,26 @@ const char *intel_platform_name(enum intel_platform platform)
|
||||
return platform_names[platform];
|
||||
}
|
||||
|
||||
static const char *iommu_name(void)
|
||||
{
|
||||
const char *msg = "n/a";
|
||||
|
||||
#ifdef CONFIG_INTEL_IOMMU
|
||||
msg = enableddisabled(intel_iommu_gfx_mapped);
|
||||
#endif
|
||||
|
||||
return msg;
|
||||
}
|
||||
|
||||
void intel_device_info_print_static(const struct intel_device_info *info,
|
||||
struct drm_printer *p)
|
||||
{
|
||||
if (info->graphics_rel)
|
||||
drm_printf(p, "graphics version: %u.%02u\n", info->graphics_ver, info->graphics_rel);
|
||||
if (info->graphics.rel)
|
||||
drm_printf(p, "graphics version: %u.%02u\n", info->graphics.ver,
|
||||
info->graphics.rel);
|
||||
else
|
||||
drm_printf(p, "graphics version: %u\n", info->graphics_ver);
|
||||
drm_printf(p, "graphics version: %u\n", info->graphics.ver);
|
||||
|
||||
if (info->media_rel)
|
||||
drm_printf(p, "media version: %u.%02u\n", info->media_ver, info->media_rel);
|
||||
if (info->media.rel)
|
||||
drm_printf(p, "media version: %u.%02u\n", info->media.ver, info->media.rel);
|
||||
else
|
||||
drm_printf(p, "media version: %u\n", info->media_ver);
|
||||
drm_printf(p, "media version: %u\n", info->media.ver);
|
||||
|
||||
if (info->display.rel)
|
||||
drm_printf(p, "display version: %u.%02u\n", info->display.ver, info->display.rel);
|
||||
else
|
||||
drm_printf(p, "display version: %u\n", info->display.ver);
|
||||
|
||||
drm_printf(p, "display version: %u\n", info->display.ver);
|
||||
drm_printf(p, "gt: %d\n", info->gt);
|
||||
drm_printf(p, "iommu: %s\n", iommu_name());
|
||||
drm_printf(p, "memory-regions: %x\n", info->memory_regions);
|
||||
drm_printf(p, "page-sizes: %x\n", info->page_sizes);
|
||||
drm_printf(p, "platform: %s\n", intel_platform_name(info->platform));
|
||||
@ -369,7 +362,7 @@ void intel_device_info_runtime_init(struct drm_i915_private *dev_priv)
|
||||
info->display.has_dsc = 0;
|
||||
}
|
||||
|
||||
if (GRAPHICS_VER(dev_priv) == 6 && intel_vtd_active()) {
|
||||
if (GRAPHICS_VER(dev_priv) == 6 && intel_vtd_active(dev_priv)) {
|
||||
drm_info(&dev_priv->drm,
|
||||
"Disabling ppGTT for VT-d support\n");
|
||||
info->ppgtt_type = INTEL_PPGTT_NONE;
|
||||
|
@ -166,11 +166,14 @@ enum intel_ppgtt_type {
|
||||
func(overlay_needs_physical); \
|
||||
func(supports_tv);
|
||||
|
||||
struct ip_version {
|
||||
u8 ver;
|
||||
u8 rel;
|
||||
};
|
||||
|
||||
struct intel_device_info {
|
||||
u8 graphics_ver;
|
||||
u8 graphics_rel;
|
||||
u8 media_ver;
|
||||
u8 media_rel;
|
||||
struct ip_version graphics;
|
||||
struct ip_version media;
|
||||
|
||||
intel_engine_mask_t platform_engine_mask; /* Engines supported by the HW */
|
||||
|
||||
@ -200,6 +203,7 @@ struct intel_device_info {
|
||||
|
||||
struct {
|
||||
u8 ver;
|
||||
u8 rel;
|
||||
|
||||
#define DEFINE_FLAG(name) u8 name:1
|
||||
DEV_INFO_DISPLAY_FOR_EACH_FLAG(DEFINE_FLAG);
|
||||
|
@ -126,7 +126,6 @@ intel_memory_region_create(struct drm_i915_private *i915,
|
||||
goto err_free;
|
||||
}
|
||||
|
||||
kref_init(&mem->kref);
|
||||
return mem;
|
||||
|
||||
err_free:
|
||||
@ -144,28 +143,17 @@ void intel_memory_region_set_name(struct intel_memory_region *mem,
|
||||
va_end(ap);
|
||||
}
|
||||
|
||||
static void __intel_memory_region_destroy(struct kref *kref)
|
||||
void intel_memory_region_destroy(struct intel_memory_region *mem)
|
||||
{
|
||||
struct intel_memory_region *mem =
|
||||
container_of(kref, typeof(*mem), kref);
|
||||
int ret = 0;
|
||||
|
||||
if (mem->ops->release)
|
||||
mem->ops->release(mem);
|
||||
ret = mem->ops->release(mem);
|
||||
|
||||
GEM_WARN_ON(!list_empty_careful(&mem->objects.list));
|
||||
mutex_destroy(&mem->objects.lock);
|
||||
kfree(mem);
|
||||
}
|
||||
|
||||
struct intel_memory_region *
|
||||
intel_memory_region_get(struct intel_memory_region *mem)
|
||||
{
|
||||
kref_get(&mem->kref);
|
||||
return mem;
|
||||
}
|
||||
|
||||
void intel_memory_region_put(struct intel_memory_region *mem)
|
||||
{
|
||||
kref_put(&mem->kref, __intel_memory_region_destroy);
|
||||
if (!ret)
|
||||
kfree(mem);
|
||||
}
|
||||
|
||||
/* Global memory region registration -- only slight layer inversions! */
|
||||
@ -234,7 +222,7 @@ void intel_memory_regions_driver_release(struct drm_i915_private *i915)
|
||||
fetch_and_zero(&i915->mm.regions[i]);
|
||||
|
||||
if (region)
|
||||
intel_memory_region_put(region);
|
||||
intel_memory_region_destroy(region);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -6,7 +6,6 @@
|
||||
#ifndef __INTEL_MEMORY_REGION_H__
|
||||
#define __INTEL_MEMORY_REGION_H__
|
||||
|
||||
#include <linux/kref.h>
|
||||
#include <linux/ioport.h>
|
||||
#include <linux/mutex.h>
|
||||
#include <linux/io-mapping.h>
|
||||
@ -51,7 +50,7 @@ struct intel_memory_region_ops {
|
||||
unsigned int flags;
|
||||
|
||||
int (*init)(struct intel_memory_region *mem);
|
||||
void (*release)(struct intel_memory_region *mem);
|
||||
int (*release)(struct intel_memory_region *mem);
|
||||
|
||||
int (*init_object)(struct intel_memory_region *mem,
|
||||
struct drm_i915_gem_object *obj,
|
||||
@ -71,8 +70,6 @@ struct intel_memory_region {
|
||||
/* For fake LMEM */
|
||||
struct drm_mm_node fake_mappable;
|
||||
|
||||
struct kref kref;
|
||||
|
||||
resource_size_t io_start;
|
||||
resource_size_t min_page_size;
|
||||
resource_size_t total;
|
||||
@ -110,9 +107,7 @@ intel_memory_region_create(struct drm_i915_private *i915,
|
||||
u16 instance,
|
||||
const struct intel_memory_region_ops *ops);
|
||||
|
||||
struct intel_memory_region *
|
||||
intel_memory_region_get(struct intel_memory_region *mem);
|
||||
void intel_memory_region_put(struct intel_memory_region *mem);
|
||||
void intel_memory_region_destroy(struct intel_memory_region *mem);
|
||||
|
||||
int intel_memory_regions_hw_probe(struct drm_i915_private *i915);
|
||||
void intel_memory_regions_driver_release(struct drm_i915_private *i915);
|
||||
|
@ -98,7 +98,7 @@ static void gen9_init_clock_gating(struct drm_i915_private *dev_priv)
|
||||
* "Plane N strech max must be programmed to 11b (x1)
|
||||
* when Async flips are enabled on that plane."
|
||||
*/
|
||||
if (!IS_GEMINILAKE(dev_priv) && intel_vtd_active())
|
||||
if (!IS_GEMINILAKE(dev_priv) && intel_vtd_active(dev_priv))
|
||||
intel_uncore_rmw(&dev_priv->uncore, CHICKEN_PIPESL_1(pipe),
|
||||
SKL_PLANE1_STRETCH_MAX_MASK, SKL_PLANE1_STRETCH_MAX_X1);
|
||||
}
|
||||
@ -7482,11 +7482,34 @@ static void dg1_init_clock_gating(struct drm_i915_private *dev_priv)
|
||||
gen12lp_init_clock_gating(dev_priv);
|
||||
|
||||
/* Wa_1409836686:dg1[a0] */
|
||||
if (IS_DG1_GT_STEP(dev_priv, STEP_A0, STEP_B0))
|
||||
if (IS_DG1_GRAPHICS_STEP(dev_priv, STEP_A0, STEP_B0))
|
||||
intel_uncore_write(&dev_priv->uncore, GEN9_CLKGATE_DIS_3, intel_uncore_read(&dev_priv->uncore, GEN9_CLKGATE_DIS_3) |
|
||||
DPT_GATING_DIS);
|
||||
}
|
||||
|
||||
static void xehpsdv_init_clock_gating(struct drm_i915_private *dev_priv)
|
||||
{
|
||||
/* Wa_22010146351:xehpsdv */
|
||||
if (IS_XEHPSDV_GRAPHICS_STEP(dev_priv, STEP_A0, STEP_B0))
|
||||
intel_uncore_rmw(&dev_priv->uncore, XEHP_CLOCK_GATE_DIS, 0, SGR_DIS);
|
||||
}
|
||||
|
||||
static void dg2_init_clock_gating(struct drm_i915_private *i915)
|
||||
{
|
||||
/* Wa_22010954014:dg2_g10 */
|
||||
if (IS_DG2_G10(i915))
|
||||
intel_uncore_rmw(&i915->uncore, XEHP_CLOCK_GATE_DIS, 0,
|
||||
SGSI_SIDECLK_DIS);
|
||||
|
||||
/*
|
||||
* Wa_14010733611:dg2_g10
|
||||
* Wa_22010146351:dg2_g10
|
||||
*/
|
||||
if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_A0, STEP_B0))
|
||||
intel_uncore_rmw(&i915->uncore, XEHP_CLOCK_GATE_DIS, 0,
|
||||
SGR_DIS | SGGI_DIS);
|
||||
}
|
||||
|
||||
static void cnp_init_clock_gating(struct drm_i915_private *dev_priv)
|
||||
{
|
||||
if (!HAS_PCH_CNP(dev_priv))
|
||||
@ -7530,12 +7553,12 @@ static void kbl_init_clock_gating(struct drm_i915_private *dev_priv)
|
||||
FBC_LLC_FULLY_OPEN);
|
||||
|
||||
/* WaDisableSDEUnitClockGating:kbl */
|
||||
if (IS_KBL_GT_STEP(dev_priv, 0, STEP_C0))
|
||||
if (IS_KBL_GRAPHICS_STEP(dev_priv, 0, STEP_C0))
|
||||
intel_uncore_write(&dev_priv->uncore, GEN8_UCGCTL6, intel_uncore_read(&dev_priv->uncore, GEN8_UCGCTL6) |
|
||||
GEN8_SDEUNIT_CLOCK_GATE_DISABLE);
|
||||
|
||||
/* WaDisableGamClockGating:kbl */
|
||||
if (IS_KBL_GT_STEP(dev_priv, 0, STEP_C0))
|
||||
if (IS_KBL_GRAPHICS_STEP(dev_priv, 0, STEP_C0))
|
||||
intel_uncore_write(&dev_priv->uncore, GEN6_UCGCTL1, intel_uncore_read(&dev_priv->uncore, GEN6_UCGCTL1) |
|
||||
GEN6_GAMUNIT_CLOCK_GATE_DISABLE);
|
||||
|
||||
@ -7897,6 +7920,8 @@ static const struct drm_i915_clock_gating_funcs platform##_clock_gating_funcs =
|
||||
.init_clock_gating = platform##_init_clock_gating, \
|
||||
}
|
||||
|
||||
CG_FUNCS(dg2);
|
||||
CG_FUNCS(xehpsdv);
|
||||
CG_FUNCS(adlp);
|
||||
CG_FUNCS(dg1);
|
||||
CG_FUNCS(gen12lp);
|
||||
@ -7933,7 +7958,11 @@ CG_FUNCS(nop);
|
||||
*/
|
||||
void intel_init_clock_gating_hooks(struct drm_i915_private *dev_priv)
|
||||
{
|
||||
if (IS_ALDERLAKE_P(dev_priv))
|
||||
if (IS_DG2(dev_priv))
|
||||
dev_priv->clock_gating_funcs = &dg2_clock_gating_funcs;
|
||||
else if (IS_XEHPSDV(dev_priv))
|
||||
dev_priv->clock_gating_funcs = &xehpsdv_clock_gating_funcs;
|
||||
else if (IS_ALDERLAKE_P(dev_priv))
|
||||
dev_priv->clock_gating_funcs = &adlp_clock_gating_funcs;
|
||||
else if (IS_DG1(dev_priv))
|
||||
dev_priv->clock_gating_funcs = &dg1_clock_gating_funcs;
|
||||
|
@ -104,19 +104,50 @@ int intel_region_ttm_init(struct intel_memory_region *mem)
|
||||
* memory region, and if it was registered with the TTM device,
|
||||
* removes that registration.
|
||||
*/
|
||||
void intel_region_ttm_fini(struct intel_memory_region *mem)
|
||||
int intel_region_ttm_fini(struct intel_memory_region *mem)
|
||||
{
|
||||
int ret;
|
||||
struct ttm_resource_manager *man = mem->region_private;
|
||||
int ret = -EBUSY;
|
||||
int count;
|
||||
|
||||
/*
|
||||
* Put the region's move fences. This releases requests that
|
||||
* may hold on to contexts and vms that may hold on to buffer
|
||||
* objects placed in this region.
|
||||
*/
|
||||
if (man)
|
||||
ttm_resource_manager_cleanup(man);
|
||||
|
||||
/* Flush objects from region. */
|
||||
for (count = 0; count < 10; ++count) {
|
||||
i915_gem_flush_free_objects(mem->i915);
|
||||
|
||||
mutex_lock(&mem->objects.lock);
|
||||
if (list_empty(&mem->objects.list))
|
||||
ret = 0;
|
||||
mutex_unlock(&mem->objects.lock);
|
||||
if (!ret)
|
||||
break;
|
||||
|
||||
msleep(20);
|
||||
flush_delayed_work(&mem->i915->bdev.wq);
|
||||
}
|
||||
|
||||
/* If we leaked objects, Don't free the region causing use after free */
|
||||
if (ret || !man)
|
||||
return ret;
|
||||
|
||||
ret = i915_ttm_buddy_man_fini(&mem->i915->bdev,
|
||||
intel_region_to_ttm_type(mem));
|
||||
GEM_WARN_ON(ret);
|
||||
mem->region_private = NULL;
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/**
|
||||
* intel_region_ttm_resource_to_st - Convert an opaque TTM resource manager resource
|
||||
* to an sg_table.
|
||||
* intel_region_ttm_resource_to_rsgt -
|
||||
* Convert an opaque TTM resource manager resource to a refcounted sg_table.
|
||||
* @mem: The memory region.
|
||||
* @res: The resource manager resource obtained from the TTM resource manager.
|
||||
*
|
||||
@ -126,17 +157,18 @@ void intel_region_ttm_fini(struct intel_memory_region *mem)
|
||||
*
|
||||
* Return: A malloced sg_table on success, an error pointer on failure.
|
||||
*/
|
||||
struct sg_table *intel_region_ttm_resource_to_st(struct intel_memory_region *mem,
|
||||
struct ttm_resource *res)
|
||||
struct i915_refct_sgt *
|
||||
intel_region_ttm_resource_to_rsgt(struct intel_memory_region *mem,
|
||||
struct ttm_resource *res)
|
||||
{
|
||||
if (mem->is_range_manager) {
|
||||
struct ttm_range_mgr_node *range_node =
|
||||
to_ttm_range_mgr_node(res);
|
||||
|
||||
return i915_sg_from_mm_node(&range_node->mm_nodes[0],
|
||||
mem->region.start);
|
||||
return i915_rsgt_from_mm_node(&range_node->mm_nodes[0],
|
||||
mem->region.start);
|
||||
} else {
|
||||
return i915_sg_from_buddy_resource(res, mem->region.start);
|
||||
return i915_rsgt_from_buddy_resource(res, mem->region.start);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -20,10 +20,11 @@ void intel_region_ttm_device_fini(struct drm_i915_private *dev_priv);
|
||||
|
||||
int intel_region_ttm_init(struct intel_memory_region *mem);
|
||||
|
||||
void intel_region_ttm_fini(struct intel_memory_region *mem);
|
||||
int intel_region_ttm_fini(struct intel_memory_region *mem);
|
||||
|
||||
struct sg_table *intel_region_ttm_resource_to_st(struct intel_memory_region *mem,
|
||||
struct ttm_resource *res);
|
||||
struct i915_refct_sgt *
|
||||
intel_region_ttm_resource_to_rsgt(struct intel_memory_region *mem,
|
||||
struct ttm_resource *res);
|
||||
|
||||
void intel_region_ttm_resource_free(struct intel_memory_region *mem,
|
||||
struct ttm_resource *res);
|
||||
|
@ -23,7 +23,8 @@
|
||||
* use a macro to define these to make it easier to identify the platforms
|
||||
* where the two steppings can deviate.
|
||||
*/
|
||||
#define COMMON_STEP(x) .gt_step = STEP_##x, .display_step = STEP_##x
|
||||
#define COMMON_STEP(x) .graphics_step = STEP_##x, .display_step = STEP_##x, .media_step = STEP_##x
|
||||
#define COMMON_GT_MEDIA_STEP(x) .graphics_step = STEP_##x, .media_step = STEP_##x
|
||||
|
||||
static const struct intel_step_info skl_revids[] = {
|
||||
[0x6] = { COMMON_STEP(G0) },
|
||||
@ -33,13 +34,13 @@ static const struct intel_step_info skl_revids[] = {
|
||||
};
|
||||
|
||||
static const struct intel_step_info kbl_revids[] = {
|
||||
[1] = { .gt_step = STEP_B0, .display_step = STEP_B0 },
|
||||
[2] = { .gt_step = STEP_C0, .display_step = STEP_B0 },
|
||||
[3] = { .gt_step = STEP_D0, .display_step = STEP_B0 },
|
||||
[4] = { .gt_step = STEP_F0, .display_step = STEP_C0 },
|
||||
[5] = { .gt_step = STEP_C0, .display_step = STEP_B1 },
|
||||
[6] = { .gt_step = STEP_D1, .display_step = STEP_B1 },
|
||||
[7] = { .gt_step = STEP_G0, .display_step = STEP_C0 },
|
||||
[1] = { COMMON_GT_MEDIA_STEP(B0), .display_step = STEP_B0 },
|
||||
[2] = { COMMON_GT_MEDIA_STEP(C0), .display_step = STEP_B0 },
|
||||
[3] = { COMMON_GT_MEDIA_STEP(D0), .display_step = STEP_B0 },
|
||||
[4] = { COMMON_GT_MEDIA_STEP(F0), .display_step = STEP_C0 },
|
||||
[5] = { COMMON_GT_MEDIA_STEP(C0), .display_step = STEP_B1 },
|
||||
[6] = { COMMON_GT_MEDIA_STEP(D1), .display_step = STEP_B1 },
|
||||
[7] = { COMMON_GT_MEDIA_STEP(G0), .display_step = STEP_C0 },
|
||||
};
|
||||
|
||||
static const struct intel_step_info bxt_revids[] = {
|
||||
@ -63,16 +64,16 @@ static const struct intel_step_info jsl_ehl_revids[] = {
|
||||
};
|
||||
|
||||
static const struct intel_step_info tgl_uy_revids[] = {
|
||||
[0] = { .gt_step = STEP_A0, .display_step = STEP_A0 },
|
||||
[1] = { .gt_step = STEP_B0, .display_step = STEP_C0 },
|
||||
[2] = { .gt_step = STEP_B1, .display_step = STEP_C0 },
|
||||
[3] = { .gt_step = STEP_C0, .display_step = STEP_D0 },
|
||||
[0] = { COMMON_GT_MEDIA_STEP(A0), .display_step = STEP_A0 },
|
||||
[1] = { COMMON_GT_MEDIA_STEP(B0), .display_step = STEP_C0 },
|
||||
[2] = { COMMON_GT_MEDIA_STEP(B1), .display_step = STEP_C0 },
|
||||
[3] = { COMMON_GT_MEDIA_STEP(C0), .display_step = STEP_D0 },
|
||||
};
|
||||
|
||||
/* Same GT stepping between tgl_uy_revids and tgl_revids don't mean the same HW */
|
||||
static const struct intel_step_info tgl_revids[] = {
|
||||
[0] = { .gt_step = STEP_A0, .display_step = STEP_B0 },
|
||||
[1] = { .gt_step = STEP_B0, .display_step = STEP_D0 },
|
||||
[0] = { COMMON_GT_MEDIA_STEP(A0), .display_step = STEP_B0 },
|
||||
[1] = { COMMON_GT_MEDIA_STEP(B0), .display_step = STEP_D0 },
|
||||
};
|
||||
|
||||
static const struct intel_step_info rkl_revids[] = {
|
||||
@ -87,38 +88,38 @@ static const struct intel_step_info dg1_revids[] = {
|
||||
};
|
||||
|
||||
static const struct intel_step_info adls_revids[] = {
|
||||
[0x0] = { .gt_step = STEP_A0, .display_step = STEP_A0 },
|
||||
[0x1] = { .gt_step = STEP_A0, .display_step = STEP_A2 },
|
||||
[0x4] = { .gt_step = STEP_B0, .display_step = STEP_B0 },
|
||||
[0x8] = { .gt_step = STEP_C0, .display_step = STEP_B0 },
|
||||
[0xC] = { .gt_step = STEP_D0, .display_step = STEP_C0 },
|
||||
[0x0] = { COMMON_GT_MEDIA_STEP(A0), .display_step = STEP_A0 },
|
||||
[0x1] = { COMMON_GT_MEDIA_STEP(A0), .display_step = STEP_A2 },
|
||||
[0x4] = { COMMON_GT_MEDIA_STEP(B0), .display_step = STEP_B0 },
|
||||
[0x8] = { COMMON_GT_MEDIA_STEP(C0), .display_step = STEP_B0 },
|
||||
[0xC] = { COMMON_GT_MEDIA_STEP(D0), .display_step = STEP_C0 },
|
||||
};
|
||||
|
||||
static const struct intel_step_info adlp_revids[] = {
|
||||
[0x0] = { .gt_step = STEP_A0, .display_step = STEP_A0 },
|
||||
[0x4] = { .gt_step = STEP_B0, .display_step = STEP_B0 },
|
||||
[0x8] = { .gt_step = STEP_C0, .display_step = STEP_C0 },
|
||||
[0xC] = { .gt_step = STEP_C0, .display_step = STEP_D0 },
|
||||
[0x0] = { COMMON_GT_MEDIA_STEP(A0), .display_step = STEP_A0 },
|
||||
[0x4] = { COMMON_GT_MEDIA_STEP(B0), .display_step = STEP_B0 },
|
||||
[0x8] = { COMMON_GT_MEDIA_STEP(C0), .display_step = STEP_C0 },
|
||||
[0xC] = { COMMON_GT_MEDIA_STEP(C0), .display_step = STEP_D0 },
|
||||
};
|
||||
|
||||
static const struct intel_step_info xehpsdv_revids[] = {
|
||||
[0x0] = { .gt_step = STEP_A0 },
|
||||
[0x1] = { .gt_step = STEP_A1 },
|
||||
[0x4] = { .gt_step = STEP_B0 },
|
||||
[0x8] = { .gt_step = STEP_C0 },
|
||||
[0x0] = { COMMON_GT_MEDIA_STEP(A0) },
|
||||
[0x1] = { COMMON_GT_MEDIA_STEP(A1) },
|
||||
[0x4] = { COMMON_GT_MEDIA_STEP(B0) },
|
||||
[0x8] = { COMMON_GT_MEDIA_STEP(C0) },
|
||||
};
|
||||
|
||||
static const struct intel_step_info dg2_g10_revid_step_tbl[] = {
|
||||
[0x0] = { .gt_step = STEP_A0, .display_step = STEP_A0 },
|
||||
[0x1] = { .gt_step = STEP_A1, .display_step = STEP_A0 },
|
||||
[0x4] = { .gt_step = STEP_B0, .display_step = STEP_B0 },
|
||||
[0x8] = { .gt_step = STEP_C0, .display_step = STEP_C0 },
|
||||
[0x0] = { COMMON_GT_MEDIA_STEP(A0), .display_step = STEP_A0 },
|
||||
[0x1] = { COMMON_GT_MEDIA_STEP(A1), .display_step = STEP_A0 },
|
||||
[0x4] = { COMMON_GT_MEDIA_STEP(B0), .display_step = STEP_B0 },
|
||||
[0x8] = { COMMON_GT_MEDIA_STEP(C0), .display_step = STEP_C0 },
|
||||
};
|
||||
|
||||
static const struct intel_step_info dg2_g11_revid_step_tbl[] = {
|
||||
[0x0] = { .gt_step = STEP_A0, .display_step = STEP_B0 },
|
||||
[0x4] = { .gt_step = STEP_B0, .display_step = STEP_C0 },
|
||||
[0x5] = { .gt_step = STEP_B1, .display_step = STEP_C0 },
|
||||
[0x0] = { COMMON_GT_MEDIA_STEP(A0), .display_step = STEP_B0 },
|
||||
[0x4] = { COMMON_GT_MEDIA_STEP(B0), .display_step = STEP_C0 },
|
||||
[0x5] = { COMMON_GT_MEDIA_STEP(B1), .display_step = STEP_C0 },
|
||||
};
|
||||
|
||||
void intel_step_init(struct drm_i915_private *i915)
|
||||
@ -179,7 +180,7 @@ void intel_step_init(struct drm_i915_private *i915)
|
||||
if (!revids)
|
||||
return;
|
||||
|
||||
if (revid < size && revids[revid].gt_step != STEP_NONE) {
|
||||
if (revid < size && revids[revid].graphics_step != STEP_NONE) {
|
||||
step = revids[revid];
|
||||
} else {
|
||||
drm_warn(&i915->drm, "Unknown revid 0x%02x\n", revid);
|
||||
@ -192,7 +193,7 @@ void intel_step_init(struct drm_i915_private *i915)
|
||||
* steppings in the array are not monotonically increasing, but
|
||||
* it's better than defaulting to 0.
|
||||
*/
|
||||
while (revid < size && revids[revid].gt_step == STEP_NONE)
|
||||
while (revid < size && revids[revid].graphics_step == STEP_NONE)
|
||||
revid++;
|
||||
|
||||
if (revid < size) {
|
||||
@ -201,12 +202,12 @@ void intel_step_init(struct drm_i915_private *i915)
|
||||
step = revids[revid];
|
||||
} else {
|
||||
drm_dbg(&i915->drm, "Using future steppings\n");
|
||||
step.gt_step = STEP_FUTURE;
|
||||
step.graphics_step = STEP_FUTURE;
|
||||
step.display_step = STEP_FUTURE;
|
||||
}
|
||||
}
|
||||
|
||||
if (drm_WARN_ON(&i915->drm, step.gt_step == STEP_NONE))
|
||||
if (drm_WARN_ON(&i915->drm, step.graphics_step == STEP_NONE))
|
||||
return;
|
||||
|
||||
RUNTIME_INFO(i915)->step = step;
|
||||
|
@ -11,8 +11,9 @@
|
||||
struct drm_i915_private;
|
||||
|
||||
struct intel_step_info {
|
||||
u8 gt_step;
|
||||
u8 graphics_step;
|
||||
u8 display_step;
|
||||
u8 media_step;
|
||||
};
|
||||
|
||||
#define STEP_ENUM_VAL(name) STEP_##name,
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user