mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2025-01-18 19:05:39 +00:00
drm/xe: Invalidate TLB also on bind if in scratch page mode
For scratch table mode we need to cover the case where a scratch PTE might have been pre-fetched and cached and used instead of that of the newly bound vma. For compute vms, invalidate TLB globally using GuC before signalling bind complete. For !long-running vms, invalidate TLB at batch start. Also document how TLB invalidation works. v2: - Fix a pointer to the comment about TLB invalidation (Jose Souza). - Add a bool to the vm whether we want to invalidate TLB at batch start. - Invalidate TLB also on BCS- and video engines at batch start where needed. - Use BIT() macro instead of explicit shift. Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com> Tested-by: José Roberto de Souza <jose.souza@intel.com> #v1 Reported-by: José Roberto de Souza <jose.souza@intel.com> #v1 Link: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/291 Closes: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/291 Acked-by: José Roberto de Souza <jose.souza@intel.com> Reviewed-by: Matthew Brost <matthew.brost@intel.com> Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
This commit is contained in:
parent
5eeb8b4438
commit
85dbfe47d0
@ -73,6 +73,7 @@
|
||||
#define PIPE_CONTROL_STORE_DATA_INDEX (1<<21)
|
||||
#define PIPE_CONTROL_CS_STALL (1<<20)
|
||||
#define PIPE_CONTROL_GLOBAL_SNAPSHOT_RESET (1<<19)
|
||||
#define PIPE_CONTROL_TLB_INVALIDATE BIT(18)
|
||||
#define PIPE_CONTROL_PSD_SYNC (1<<17)
|
||||
#define PIPE_CONTROL_QW_WRITE (1<<14)
|
||||
#define PIPE_CONTROL_DEPTH_STALL (1<<13)
|
||||
|
@ -1297,7 +1297,20 @@ __xe_pt_bind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_engine *e,
|
||||
|
||||
xe_vm_dbg_print_entries(tile_to_xe(tile), entries, num_entries);
|
||||
|
||||
if (rebind && !xe_vm_no_dma_fences(vma->vm)) {
|
||||
/*
|
||||
* If rebind, we have to invalidate TLB on !LR vms to invalidate
|
||||
* cached PTEs point to freed memory. on LR vms this is done
|
||||
* automatically when the context is re-enabled by the rebind worker,
|
||||
* or in fault mode it was invalidated on PTE zapping.
|
||||
*
|
||||
* If !rebind, and scratch enabled VMs, there is a chance the scratch
|
||||
* PTE is already cached in the TLB so it needs to be invalidated.
|
||||
* on !LR VMs this is done in the ring ops preceding a batch, but on
|
||||
* non-faulting LR, in particular on user-space batch buffer chaining,
|
||||
* it needs to be done here.
|
||||
*/
|
||||
if ((rebind && !xe_vm_no_dma_fences(vm) && !vm->batch_invalidate_tlb) ||
|
||||
(!rebind && vm->scratch_bo[tile->id] && xe_vm_in_compute_mode(vm))) {
|
||||
ifence = kzalloc(sizeof(*ifence), GFP_KERNEL);
|
||||
if (!ifence)
|
||||
return ERR_PTR(-ENOMEM);
|
||||
@ -1313,7 +1326,7 @@ __xe_pt_bind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_engine *e,
|
||||
LLIST_HEAD(deferred);
|
||||
|
||||
/* TLB invalidation must be done before signaling rebind */
|
||||
if (rebind && !xe_vm_no_dma_fences(vma->vm)) {
|
||||
if (ifence) {
|
||||
int err = invalidation_fence_init(tile->primary_gt, ifence, fence,
|
||||
vma);
|
||||
if (err) {
|
||||
|
@ -15,6 +15,7 @@
|
||||
#include "xe_macros.h"
|
||||
#include "xe_sched_job.h"
|
||||
#include "xe_vm_types.h"
|
||||
#include "xe_vm.h"
|
||||
|
||||
/*
|
||||
* 3D-related flags that can't be set on _engines_ that lack access to the 3D
|
||||
@ -74,9 +75,11 @@ static int emit_store_imm_ggtt(u32 addr, u32 value, u32 *dw, int i)
|
||||
return i;
|
||||
}
|
||||
|
||||
static int emit_flush_imm_ggtt(u32 addr, u32 value, u32 *dw, int i)
|
||||
static int emit_flush_imm_ggtt(u32 addr, u32 value, bool invalidate_tlb,
|
||||
u32 *dw, int i)
|
||||
{
|
||||
dw[i++] = (MI_FLUSH_DW + 1) | MI_FLUSH_DW_OP_STOREDW;
|
||||
dw[i++] = (MI_FLUSH_DW + 1) | MI_FLUSH_DW_OP_STOREDW |
|
||||
(invalidate_tlb ? MI_INVALIDATE_TLB : 0);
|
||||
dw[i++] = addr | MI_FLUSH_DW_USE_GTT;
|
||||
dw[i++] = 0;
|
||||
dw[i++] = value;
|
||||
@ -107,7 +110,8 @@ static int emit_flush_invalidate(u32 flag, u32 *dw, int i)
|
||||
return i;
|
||||
}
|
||||
|
||||
static int emit_pipe_invalidate(u32 mask_flags, u32 *dw, int i)
|
||||
static int emit_pipe_invalidate(u32 mask_flags, bool invalidate_tlb, u32 *dw,
|
||||
int i)
|
||||
{
|
||||
u32 flags = PIPE_CONTROL_CS_STALL |
|
||||
PIPE_CONTROL_COMMAND_CACHE_INVALIDATE |
|
||||
@ -119,6 +123,9 @@ static int emit_pipe_invalidate(u32 mask_flags, u32 *dw, int i)
|
||||
PIPE_CONTROL_QW_WRITE |
|
||||
PIPE_CONTROL_STORE_DATA_INDEX;
|
||||
|
||||
if (invalidate_tlb)
|
||||
flags |= PIPE_CONTROL_TLB_INVALIDATE;
|
||||
|
||||
flags &= ~mask_flags;
|
||||
|
||||
dw[i++] = GFX_OP_PIPE_CONTROL(6);
|
||||
@ -170,9 +177,17 @@ static void __emit_job_gen12_copy(struct xe_sched_job *job, struct xe_lrc *lrc,
|
||||
{
|
||||
u32 dw[MAX_JOB_SIZE_DW], i = 0;
|
||||
u32 ppgtt_flag = get_ppgtt_flag(job);
|
||||
struct xe_vm *vm = job->engine->vm;
|
||||
|
||||
i = emit_store_imm_ggtt(xe_lrc_start_seqno_ggtt_addr(lrc),
|
||||
seqno, dw, i);
|
||||
if (vm->batch_invalidate_tlb) {
|
||||
dw[i++] = preparser_disable(true);
|
||||
i = emit_flush_imm_ggtt(xe_lrc_start_seqno_ggtt_addr(lrc),
|
||||
seqno, true, dw, i);
|
||||
dw[i++] = preparser_disable(false);
|
||||
} else {
|
||||
i = emit_store_imm_ggtt(xe_lrc_start_seqno_ggtt_addr(lrc),
|
||||
seqno, dw, i);
|
||||
}
|
||||
|
||||
i = emit_bb_start(batch_addr, ppgtt_flag, dw, i);
|
||||
|
||||
@ -181,7 +196,7 @@ static void __emit_job_gen12_copy(struct xe_sched_job *job, struct xe_lrc *lrc,
|
||||
job->user_fence.value,
|
||||
dw, i);
|
||||
|
||||
i = emit_flush_imm_ggtt(xe_lrc_seqno_ggtt_addr(lrc), seqno, dw, i);
|
||||
i = emit_flush_imm_ggtt(xe_lrc_seqno_ggtt_addr(lrc), seqno, false, dw, i);
|
||||
|
||||
i = emit_user_interrupt(dw, i);
|
||||
|
||||
@ -210,6 +225,7 @@ static void __emit_job_gen12_video(struct xe_sched_job *job, struct xe_lrc *lrc,
|
||||
struct xe_gt *gt = job->engine->gt;
|
||||
struct xe_device *xe = gt_to_xe(gt);
|
||||
bool decode = job->engine->class == XE_ENGINE_CLASS_VIDEO_DECODE;
|
||||
struct xe_vm *vm = job->engine->vm;
|
||||
|
||||
dw[i++] = preparser_disable(true);
|
||||
|
||||
@ -220,10 +236,16 @@ static void __emit_job_gen12_video(struct xe_sched_job *job, struct xe_lrc *lrc,
|
||||
else
|
||||
i = emit_aux_table_inv(gt, VE0_AUX_INV, dw, i);
|
||||
}
|
||||
|
||||
if (vm->batch_invalidate_tlb)
|
||||
i = emit_flush_imm_ggtt(xe_lrc_start_seqno_ggtt_addr(lrc),
|
||||
seqno, true, dw, i);
|
||||
|
||||
dw[i++] = preparser_disable(false);
|
||||
|
||||
i = emit_store_imm_ggtt(xe_lrc_start_seqno_ggtt_addr(lrc),
|
||||
seqno, dw, i);
|
||||
if (!vm->batch_invalidate_tlb)
|
||||
i = emit_store_imm_ggtt(xe_lrc_start_seqno_ggtt_addr(lrc),
|
||||
seqno, dw, i);
|
||||
|
||||
i = emit_bb_start(batch_addr, ppgtt_flag, dw, i);
|
||||
|
||||
@ -232,7 +254,7 @@ static void __emit_job_gen12_video(struct xe_sched_job *job, struct xe_lrc *lrc,
|
||||
job->user_fence.value,
|
||||
dw, i);
|
||||
|
||||
i = emit_flush_imm_ggtt(xe_lrc_seqno_ggtt_addr(lrc), seqno, dw, i);
|
||||
i = emit_flush_imm_ggtt(xe_lrc_seqno_ggtt_addr(lrc), seqno, false, dw, i);
|
||||
|
||||
i = emit_user_interrupt(dw, i);
|
||||
|
||||
@ -250,6 +272,7 @@ static void __emit_job_gen12_render_compute(struct xe_sched_job *job,
|
||||
struct xe_gt *gt = job->engine->gt;
|
||||
struct xe_device *xe = gt_to_xe(gt);
|
||||
bool lacks_render = !(gt->info.engine_mask & XE_HW_ENGINE_RCS_MASK);
|
||||
struct xe_vm *vm = job->engine->vm;
|
||||
u32 mask_flags = 0;
|
||||
|
||||
dw[i++] = preparser_disable(true);
|
||||
@ -257,7 +280,9 @@ static void __emit_job_gen12_render_compute(struct xe_sched_job *job,
|
||||
mask_flags = PIPE_CONTROL_3D_ARCH_FLAGS;
|
||||
else if (job->engine->class == XE_ENGINE_CLASS_COMPUTE)
|
||||
mask_flags = PIPE_CONTROL_3D_ENGINE_FLAGS;
|
||||
i = emit_pipe_invalidate(mask_flags, dw, i);
|
||||
|
||||
/* See __xe_pt_bind_vma() for a discussion on TLB invalidations. */
|
||||
i = emit_pipe_invalidate(mask_flags, vm->batch_invalidate_tlb, dw, i);
|
||||
|
||||
/* hsdes: 1809175790 */
|
||||
if (has_aux_ccs(xe))
|
||||
|
@ -1237,11 +1237,13 @@ struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags)
|
||||
if (err)
|
||||
goto err_scratch_pt;
|
||||
}
|
||||
vm->batch_invalidate_tlb = true;
|
||||
}
|
||||
|
||||
if (flags & DRM_XE_VM_CREATE_COMPUTE_MODE) {
|
||||
INIT_WORK(&vm->preempt.rebind_work, preempt_rebind_work_func);
|
||||
vm->flags |= XE_VM_FLAG_COMPUTE_MODE;
|
||||
vm->batch_invalidate_tlb = false;
|
||||
}
|
||||
|
||||
if (flags & DRM_XE_VM_CREATE_ASYNC_BIND_OPS) {
|
||||
|
@ -337,6 +337,9 @@ struct xe_vm {
|
||||
/** @capture_once: capture only one error per VM */
|
||||
bool capture_once;
|
||||
} error_capture;
|
||||
|
||||
/** @batch_invalidate_tlb: Always invalidate TLB before batch start */
|
||||
bool batch_invalidate_tlb;
|
||||
};
|
||||
|
||||
#endif
|
||||
|
Loading…
x
Reference in New Issue
Block a user