Couple reverts from Jason getting rid of asynchronous command parsing

and fence error propagation and a GVT fix of shadow ppgtt invalidation
 with proper D3 state tracking from Colin.
 -----BEGIN PGP SIGNATURE-----
 
 iQEzBAABCAAdFiEEbSBwaO7dZQkcLOKj+mJfZA7rE8oFAmD5dZ4ACgkQ+mJfZA7r
 E8oOjggAmpH/KcwfiQMoVx8ie28bO5bZzLTX05Cchi1DJ3Av3WuHFalekw+3AAJc
 s58fU2eLp/FKZ796OPNZLtmVgA2984UioDI9TePUn5kQ0PixJh5p7hpuywDBafpH
 C8jgp1dPbMDtTZGbLLt40tJYDNFKnaqQXiTYeDsqXhqlELcD9wfiqU6MwnBkmuoC
 kAdYvHcXrnnavfT7faXW3VFhJWYPB5Ow9tFXG2taeGXlh08Jsb6pek/eukgHKmkP
 10leCkrYVpCbju2LJnUMwX4cV9cxfF4hYPakB/TpuvH8QeMD2tFWuEjljCgxI5BR
 l1spFg7NSiuMAsN5wocLFDvY3Y3hHw==
 =jhBB
 -----END PGP SIGNATURE-----

Merge tag 'drm-intel-fixes-2021-07-22' of git://anongit.freedesktop.org/drm/drm-intel into drm-fixes

Couple reverts from Jason getting rid of asynchronous command parsing
and fence error propagation and a GVT fix of shadow ppgtt invalidation
with proper D3 state tracking from Colin.

Signed-off-by: Dave Airlie <airlied@redhat.com>

From: Rodrigo Vivi <rodrigo.vivi@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/YPl1sIyruD0U5Orl@intel.com
This commit is contained in:
Dave Airlie 2021-07-23 10:43:37 +10:00
commit 36ebaeb48b
6 changed files with 106 additions and 285 deletions

View File

@ -25,10 +25,8 @@
#include "i915_gem_clflush.h"
#include "i915_gem_context.h"
#include "i915_gem_ioctls.h"
#include "i915_sw_fence_work.h"
#include "i915_trace.h"
#include "i915_user_extensions.h"
#include "i915_memcpy.h"
struct eb_vma {
struct i915_vma *vma;
@ -1456,6 +1454,10 @@ static u32 *reloc_gpu(struct i915_execbuffer *eb,
int err;
struct intel_engine_cs *engine = eb->engine;
/* If we need to copy for the cmdparser, we will stall anyway */
if (eb_use_cmdparser(eb))
return ERR_PTR(-EWOULDBLOCK);
if (!reloc_can_use_engine(engine)) {
engine = engine->gt->engine_class[COPY_ENGINE_CLASS][0];
if (!engine)
@ -2372,217 +2374,6 @@ shadow_batch_pin(struct i915_execbuffer *eb,
return vma;
}
struct eb_parse_work {
struct dma_fence_work base;
struct intel_engine_cs *engine;
struct i915_vma *batch;
struct i915_vma *shadow;
struct i915_vma *trampoline;
unsigned long batch_offset;
unsigned long batch_length;
unsigned long *jump_whitelist;
const void *batch_map;
void *shadow_map;
};
static int __eb_parse(struct dma_fence_work *work)
{
struct eb_parse_work *pw = container_of(work, typeof(*pw), base);
int ret;
bool cookie;
cookie = dma_fence_begin_signalling();
ret = intel_engine_cmd_parser(pw->engine,
pw->batch,
pw->batch_offset,
pw->batch_length,
pw->shadow,
pw->jump_whitelist,
pw->shadow_map,
pw->batch_map);
dma_fence_end_signalling(cookie);
return ret;
}
static void __eb_parse_release(struct dma_fence_work *work)
{
struct eb_parse_work *pw = container_of(work, typeof(*pw), base);
if (!IS_ERR_OR_NULL(pw->jump_whitelist))
kfree(pw->jump_whitelist);
if (pw->batch_map)
i915_gem_object_unpin_map(pw->batch->obj);
else
i915_gem_object_unpin_pages(pw->batch->obj);
i915_gem_object_unpin_map(pw->shadow->obj);
if (pw->trampoline)
i915_active_release(&pw->trampoline->active);
i915_active_release(&pw->shadow->active);
i915_active_release(&pw->batch->active);
}
static const struct dma_fence_work_ops eb_parse_ops = {
.name = "eb_parse",
.work = __eb_parse,
.release = __eb_parse_release,
};
static inline int
__parser_mark_active(struct i915_vma *vma,
struct intel_timeline *tl,
struct dma_fence *fence)
{
struct intel_gt_buffer_pool_node *node = vma->private;
return i915_active_ref(&node->active, tl->fence_context, fence);
}
static int
parser_mark_active(struct eb_parse_work *pw, struct intel_timeline *tl)
{
int err;
mutex_lock(&tl->mutex);
err = __parser_mark_active(pw->shadow, tl, &pw->base.dma);
if (err)
goto unlock;
if (pw->trampoline) {
err = __parser_mark_active(pw->trampoline, tl, &pw->base.dma);
if (err)
goto unlock;
}
unlock:
mutex_unlock(&tl->mutex);
return err;
}
static int eb_parse_pipeline(struct i915_execbuffer *eb,
struct i915_vma *shadow,
struct i915_vma *trampoline)
{
struct eb_parse_work *pw;
struct drm_i915_gem_object *batch = eb->batch->vma->obj;
bool needs_clflush;
int err;
GEM_BUG_ON(overflows_type(eb->batch_start_offset, pw->batch_offset));
GEM_BUG_ON(overflows_type(eb->batch_len, pw->batch_length));
pw = kzalloc(sizeof(*pw), GFP_KERNEL);
if (!pw)
return -ENOMEM;
err = i915_active_acquire(&eb->batch->vma->active);
if (err)
goto err_free;
err = i915_active_acquire(&shadow->active);
if (err)
goto err_batch;
if (trampoline) {
err = i915_active_acquire(&trampoline->active);
if (err)
goto err_shadow;
}
pw->shadow_map = i915_gem_object_pin_map(shadow->obj, I915_MAP_WB);
if (IS_ERR(pw->shadow_map)) {
err = PTR_ERR(pw->shadow_map);
goto err_trampoline;
}
needs_clflush =
!(batch->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ);
pw->batch_map = ERR_PTR(-ENODEV);
if (needs_clflush && i915_has_memcpy_from_wc())
pw->batch_map = i915_gem_object_pin_map(batch, I915_MAP_WC);
if (IS_ERR(pw->batch_map)) {
err = i915_gem_object_pin_pages(batch);
if (err)
goto err_unmap_shadow;
pw->batch_map = NULL;
}
pw->jump_whitelist =
intel_engine_cmd_parser_alloc_jump_whitelist(eb->batch_len,
trampoline);
if (IS_ERR(pw->jump_whitelist)) {
err = PTR_ERR(pw->jump_whitelist);
goto err_unmap_batch;
}
dma_fence_work_init(&pw->base, &eb_parse_ops);
pw->engine = eb->engine;
pw->batch = eb->batch->vma;
pw->batch_offset = eb->batch_start_offset;
pw->batch_length = eb->batch_len;
pw->shadow = shadow;
pw->trampoline = trampoline;
/* Mark active refs early for this worker, in case we get interrupted */
err = parser_mark_active(pw, eb->context->timeline);
if (err)
goto err_commit;
err = dma_resv_reserve_shared(pw->batch->resv, 1);
if (err)
goto err_commit;
err = dma_resv_reserve_shared(shadow->resv, 1);
if (err)
goto err_commit;
/* Wait for all writes (and relocs) into the batch to complete */
err = i915_sw_fence_await_reservation(&pw->base.chain,
pw->batch->resv, NULL, false,
0, I915_FENCE_GFP);
if (err < 0)
goto err_commit;
/* Keep the batch alive and unwritten as we parse */
dma_resv_add_shared_fence(pw->batch->resv, &pw->base.dma);
/* Force execution to wait for completion of the parser */
dma_resv_add_excl_fence(shadow->resv, &pw->base.dma);
dma_fence_work_commit_imm(&pw->base);
return 0;
err_commit:
i915_sw_fence_set_error_once(&pw->base.chain, err);
dma_fence_work_commit_imm(&pw->base);
return err;
err_unmap_batch:
if (pw->batch_map)
i915_gem_object_unpin_map(batch);
else
i915_gem_object_unpin_pages(batch);
err_unmap_shadow:
i915_gem_object_unpin_map(shadow->obj);
err_trampoline:
if (trampoline)
i915_active_release(&trampoline->active);
err_shadow:
i915_active_release(&shadow->active);
err_batch:
i915_active_release(&eb->batch->vma->active);
err_free:
kfree(pw);
return err;
}
static struct i915_vma *eb_dispatch_secure(struct i915_execbuffer *eb, struct i915_vma *vma)
{
/*
@ -2672,7 +2463,15 @@ static int eb_parse(struct i915_execbuffer *eb)
goto err_trampoline;
}
err = eb_parse_pipeline(eb, shadow, trampoline);
err = dma_resv_reserve_shared(shadow->resv, 1);
if (err)
goto err_trampoline;
err = intel_engine_cmd_parser(eb->engine,
eb->batch->vma,
eb->batch_start_offset,
eb->batch_len,
shadow, trampoline);
if (err)
goto err_unpin_batch;

View File

@ -125,6 +125,10 @@ static int igt_gpu_reloc(void *arg)
intel_gt_pm_get(&eb.i915->gt);
for_each_uabi_engine(eb.engine, eb.i915) {
if (intel_engine_requires_cmd_parser(eb.engine) ||
intel_engine_using_cmd_parser(eb.engine))
continue;
reloc_cache_init(&eb.reloc_cache, eb.i915);
memset(map, POISON_INUSE, 4096);

View File

@ -1977,6 +1977,21 @@ static int elsp_mmio_write(struct intel_vgpu *vgpu, unsigned int offset,
if (drm_WARN_ON(&i915->drm, !engine))
return -EINVAL;
/*
* Due to d3_entered is used to indicate skipping PPGTT invalidation on
* vGPU reset, it's set on D0->D3 on PCI config write, and cleared after
* vGPU reset if in resuming.
* In S0ix exit, the device power state also transite from D3 to D0 as
* S3 resume, but no vGPU reset (triggered by QEMU devic model). After
* S0ix exit, all engines continue to work. However the d3_entered
* remains set which will break next vGPU reset logic (miss the expected
* PPGTT invalidation).
* Engines can only work in D0. Thus the 1st elsp write gives GVT a
* chance to clear d3_entered.
*/
if (vgpu->d3_entered)
vgpu->d3_entered = false;
execlist = &vgpu->submission.execlist[engine->id];
execlist->elsp_dwords.data[3 - execlist->elsp_dwords.index] = data;

View File

@ -1145,19 +1145,41 @@ find_reg(const struct intel_engine_cs *engine, u32 addr)
static u32 *copy_batch(struct drm_i915_gem_object *dst_obj,
struct drm_i915_gem_object *src_obj,
unsigned long offset, unsigned long length,
void *dst, const void *src)
bool *needs_clflush_after)
{
bool needs_clflush =
!(src_obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ);
unsigned int src_needs_clflush;
unsigned int dst_needs_clflush;
void *dst, *src;
int ret;
if (src) {
GEM_BUG_ON(!needs_clflush);
i915_unaligned_memcpy_from_wc(dst, src + offset, length);
} else {
struct scatterlist *sg;
ret = i915_gem_object_prepare_write(dst_obj, &dst_needs_clflush);
if (ret)
return ERR_PTR(ret);
dst = i915_gem_object_pin_map(dst_obj, I915_MAP_WB);
i915_gem_object_finish_access(dst_obj);
if (IS_ERR(dst))
return dst;
ret = i915_gem_object_prepare_read(src_obj, &src_needs_clflush);
if (ret) {
i915_gem_object_unpin_map(dst_obj);
return ERR_PTR(ret);
}
src = ERR_PTR(-ENODEV);
if (src_needs_clflush && i915_has_memcpy_from_wc()) {
src = i915_gem_object_pin_map(src_obj, I915_MAP_WC);
if (!IS_ERR(src)) {
i915_unaligned_memcpy_from_wc(dst,
src + offset,
length);
i915_gem_object_unpin_map(src_obj);
}
}
if (IS_ERR(src)) {
unsigned long x, n, remain;
void *ptr;
unsigned int x, sg_ofs;
unsigned long remain;
/*
* We can avoid clflushing partial cachelines before the write
@ -1168,40 +1190,34 @@ static u32 *copy_batch(struct drm_i915_gem_object *dst_obj,
* validate up to the end of the batch.
*/
remain = length;
if (!(dst_obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ))
if (dst_needs_clflush & CLFLUSH_BEFORE)
remain = round_up(remain,
boot_cpu_data.x86_clflush_size);
ptr = dst;
x = offset_in_page(offset);
sg = i915_gem_object_get_sg(src_obj, offset >> PAGE_SHIFT, &sg_ofs, false);
for (n = offset >> PAGE_SHIFT; remain; n++) {
int len = min(remain, PAGE_SIZE - x);
while (remain) {
unsigned long sg_max = sg->length >> PAGE_SHIFT;
src = kmap_atomic(i915_gem_object_get_page(src_obj, n));
if (src_needs_clflush)
drm_clflush_virt_range(src + x, len);
memcpy(ptr, src + x, len);
kunmap_atomic(src);
for (; remain && sg_ofs < sg_max; sg_ofs++) {
unsigned long len = min(remain, PAGE_SIZE - x);
void *map;
map = kmap_atomic(nth_page(sg_page(sg), sg_ofs));
if (needs_clflush)
drm_clflush_virt_range(map + x, len);
memcpy(ptr, map + x, len);
kunmap_atomic(map);
ptr += len;
remain -= len;
x = 0;
}
sg_ofs = 0;
sg = sg_next(sg);
ptr += len;
remain -= len;
x = 0;
}
}
i915_gem_object_finish_access(src_obj);
memset32(dst + length, 0, (dst_obj->base.size - length) / sizeof(u32));
/* dst_obj is returned with vmap pinned */
*needs_clflush_after = dst_needs_clflush & CLFLUSH_AFTER;
return dst;
}
@ -1360,6 +1376,9 @@ static int check_bbstart(u32 *cmd, u32 offset, u32 length,
if (target_cmd_index == offset)
return 0;
if (IS_ERR(jump_whitelist))
return PTR_ERR(jump_whitelist);
if (!test_bit(target_cmd_index, jump_whitelist)) {
DRM_DEBUG("CMD: BB_START to 0x%llx not a previously executed cmd\n",
jump_target);
@ -1369,28 +1388,10 @@ static int check_bbstart(u32 *cmd, u32 offset, u32 length,
return 0;
}
/**
* intel_engine_cmd_parser_alloc_jump_whitelist() - preallocate jump whitelist for intel_engine_cmd_parser()
* @batch_length: length of the commands in batch_obj
* @trampoline: Whether jump trampolines are used.
*
* Preallocates a jump whitelist for parsing the cmd buffer in intel_engine_cmd_parser().
* This has to be preallocated, because the command parser runs in signaling context,
* and may not allocate any memory.
*
* Return: NULL or pointer to a jump whitelist, or ERR_PTR() on failure. Use
* IS_ERR() to check for errors. Must bre freed() with kfree().
*
* NULL is a valid value, meaning no allocation was required.
*/
unsigned long *intel_engine_cmd_parser_alloc_jump_whitelist(u32 batch_length,
bool trampoline)
static unsigned long *alloc_whitelist(u32 batch_length)
{
unsigned long *jmp;
if (trampoline)
return NULL;
/*
* We expect batch_length to be less than 256KiB for known users,
* i.e. we need at most an 8KiB bitmap allocation which should be
@ -1415,9 +1416,7 @@ unsigned long *intel_engine_cmd_parser_alloc_jump_whitelist(u32 batch_length,
* @batch_offset: byte offset in the batch at which execution starts
* @batch_length: length of the commands in batch_obj
* @shadow: validated copy of the batch buffer in question
* @jump_whitelist: buffer preallocated with intel_engine_cmd_parser_alloc_jump_whitelist()
* @shadow_map: mapping to @shadow vma
* @batch_map: mapping to @batch vma
* @trampoline: true if we need to trampoline into privileged execution
*
* Parses the specified batch buffer looking for privilege violations as
* described in the overview.
@ -1425,21 +1424,21 @@ unsigned long *intel_engine_cmd_parser_alloc_jump_whitelist(u32 batch_length,
* Return: non-zero if the parser finds violations or otherwise fails; -EACCES
* if the batch appears legal but should use hardware parsing
*/
int intel_engine_cmd_parser(struct intel_engine_cs *engine,
struct i915_vma *batch,
unsigned long batch_offset,
unsigned long batch_length,
struct i915_vma *shadow,
unsigned long *jump_whitelist,
void *shadow_map,
const void *batch_map)
bool trampoline)
{
u32 *cmd, *batch_end, offset = 0;
struct drm_i915_cmd_descriptor default_desc = noop_desc;
const struct drm_i915_cmd_descriptor *desc = &default_desc;
bool needs_clflush_after = false;
unsigned long *jump_whitelist;
u64 batch_addr, shadow_addr;
int ret = 0;
bool trampoline = !jump_whitelist;
GEM_BUG_ON(!IS_ALIGNED(batch_offset, sizeof(*cmd)));
GEM_BUG_ON(!IS_ALIGNED(batch_length, sizeof(*cmd)));
@ -1447,8 +1446,18 @@ int intel_engine_cmd_parser(struct intel_engine_cs *engine,
batch->size));
GEM_BUG_ON(!batch_length);
cmd = copy_batch(shadow->obj, batch->obj, batch_offset, batch_length,
shadow_map, batch_map);
cmd = copy_batch(shadow->obj, batch->obj,
batch_offset, batch_length,
&needs_clflush_after);
if (IS_ERR(cmd)) {
DRM_DEBUG("CMD: Failed to copy batch\n");
return PTR_ERR(cmd);
}
jump_whitelist = NULL;
if (!trampoline)
/* Defer failure until attempted use */
jump_whitelist = alloc_whitelist(batch_length);
shadow_addr = gen8_canonical_addr(shadow->node.start);
batch_addr = gen8_canonical_addr(batch->node.start + batch_offset);
@ -1549,6 +1558,9 @@ int intel_engine_cmd_parser(struct intel_engine_cs *engine,
i915_gem_object_flush_map(shadow->obj);
if (!IS_ERR_OR_NULL(jump_whitelist))
kfree(jump_whitelist);
i915_gem_object_unpin_map(shadow->obj);
return ret;
}

View File

@ -1906,17 +1906,12 @@ const char *i915_cache_level_str(struct drm_i915_private *i915, int type);
int i915_cmd_parser_get_version(struct drm_i915_private *dev_priv);
int intel_engine_init_cmd_parser(struct intel_engine_cs *engine);
void intel_engine_cleanup_cmd_parser(struct intel_engine_cs *engine);
unsigned long *intel_engine_cmd_parser_alloc_jump_whitelist(u32 batch_length,
bool trampoline);
int intel_engine_cmd_parser(struct intel_engine_cs *engine,
struct i915_vma *batch,
unsigned long batch_offset,
unsigned long batch_length,
struct i915_vma *shadow,
unsigned long *jump_whitelist,
void *shadow_map,
const void *batch_map);
bool trampoline);
#define I915_CMD_PARSER_TRAMPOLINE_SIZE 8
/* intel_device_info.c */

View File

@ -1426,10 +1426,8 @@ i915_request_await_execution(struct i915_request *rq,
do {
fence = *child++;
if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)) {
i915_sw_fence_set_error_once(&rq->submit, fence->error);
if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags))
continue;
}
if (fence->context == rq->fence.context)
continue;
@ -1527,10 +1525,8 @@ i915_request_await_dma_fence(struct i915_request *rq, struct dma_fence *fence)
do {
fence = *child++;
if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)) {
i915_sw_fence_set_error_once(&rq->submit, fence->error);
if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags))
continue;
}
/*
* Requests on the same timeline are explicitly ordered, along