mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git
synced 2025-01-08 14:13:53 +00:00
drm/amdgpu: workaround to avoid SET_Q_MODE packets v2
It turned out that executing the SET_Q_MODE packet on every submission creates to much overhead. Implement a workaround which allows skipping the SET_Q_MODE packet if subsequent submissions all use the same parameters. v2: add a NULL check for ring_obj Signed-off-by: Christian König <christian.koenig@amd.com> Reviewed-by: Alex Deucher <alexander.deucher@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
This commit is contained in:
parent
c68cbbfd54
commit
8bc75586ea
@ -285,6 +285,9 @@ struct amdgpu_ring {
|
||||
unsigned cond_exe_offs;
|
||||
u64 cond_exe_gpu_addr;
|
||||
volatile u32 *cond_exe_cpu_addr;
|
||||
unsigned int set_q_mode_offs;
|
||||
volatile u32 *set_q_mode_ptr;
|
||||
u64 set_q_mode_token;
|
||||
unsigned vm_hub;
|
||||
unsigned vm_inv_eng;
|
||||
struct dma_fence *vmid_wait;
|
||||
|
@ -5461,6 +5461,11 @@ static void gfx_v11_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
|
||||
amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
|
||||
amdgpu_ring_write(ring, 0x0);
|
||||
}
|
||||
|
||||
/* Make sure that we can't skip the SET_Q_MODE packets when the VM
|
||||
* changed in any way.
|
||||
*/
|
||||
ring->set_q_mode_ptr = NULL;
|
||||
}
|
||||
|
||||
static void gfx_v11_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr,
|
||||
@ -5510,29 +5515,6 @@ static void gfx_v11_0_ring_emit_cntxcntl(struct amdgpu_ring *ring,
|
||||
amdgpu_ring_write(ring, 0);
|
||||
}
|
||||
|
||||
static void gfx_v11_0_ring_emit_gfx_shadow(struct amdgpu_ring *ring,
|
||||
u64 shadow_va, u64 csa_va,
|
||||
u64 gds_va, bool init_shadow,
|
||||
int vmid)
|
||||
{
|
||||
struct amdgpu_device *adev = ring->adev;
|
||||
|
||||
if (!adev->gfx.cp_gfx_shadow)
|
||||
return;
|
||||
|
||||
amdgpu_ring_write(ring, PACKET3(PACKET3_SET_Q_PREEMPTION_MODE, 7));
|
||||
amdgpu_ring_write(ring, lower_32_bits(shadow_va));
|
||||
amdgpu_ring_write(ring, upper_32_bits(shadow_va));
|
||||
amdgpu_ring_write(ring, lower_32_bits(gds_va));
|
||||
amdgpu_ring_write(ring, upper_32_bits(gds_va));
|
||||
amdgpu_ring_write(ring, lower_32_bits(csa_va));
|
||||
amdgpu_ring_write(ring, upper_32_bits(csa_va));
|
||||
amdgpu_ring_write(ring, shadow_va ?
|
||||
PACKET3_SET_Q_PREEMPTION_MODE_IB_VMID(vmid) : 0);
|
||||
amdgpu_ring_write(ring, init_shadow ?
|
||||
PACKET3_SET_Q_PREEMPTION_MODE_INIT_SHADOW_MEM : 0);
|
||||
}
|
||||
|
||||
static unsigned gfx_v11_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring,
|
||||
uint64_t addr)
|
||||
{
|
||||
@ -5550,6 +5532,97 @@ static unsigned gfx_v11_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring,
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void gfx_v11_0_ring_emit_gfx_shadow(struct amdgpu_ring *ring,
|
||||
u64 shadow_va, u64 csa_va,
|
||||
u64 gds_va, bool init_shadow,
|
||||
int vmid)
|
||||
{
|
||||
struct amdgpu_device *adev = ring->adev;
|
||||
unsigned int offs, end;
|
||||
|
||||
if (!adev->gfx.cp_gfx_shadow || !ring->ring_obj)
|
||||
return;
|
||||
|
||||
/*
|
||||
* The logic here isn't easy to understand because we need to keep state
|
||||
* accross multiple executions of the function as well as between the
|
||||
* CPU and GPU. The general idea is that the newly written GPU command
|
||||
* has a condition on the previous one and only executed if really
|
||||
* necessary.
|
||||
*/
|
||||
|
||||
/*
|
||||
* The dw in the NOP controls if the next SET_Q_MODE packet should be
|
||||
* executed or not. Reserve 64bits just to be on the save side.
|
||||
*/
|
||||
amdgpu_ring_write(ring, PACKET3(PACKET3_NOP, 1));
|
||||
offs = ring->wptr & ring->buf_mask;
|
||||
|
||||
/*
|
||||
* We start with skipping the prefix SET_Q_MODE and always executing
|
||||
* the postfix SET_Q_MODE packet. This is changed below with a
|
||||
* WRITE_DATA command when the postfix executed.
|
||||
*/
|
||||
amdgpu_ring_write(ring, shadow_va ? 1 : 0);
|
||||
amdgpu_ring_write(ring, 0);
|
||||
|
||||
if (ring->set_q_mode_offs) {
|
||||
uint64_t addr;
|
||||
|
||||
addr = amdgpu_bo_gpu_offset(ring->ring_obj);
|
||||
addr += ring->set_q_mode_offs << 2;
|
||||
end = gfx_v11_0_ring_emit_init_cond_exec(ring, addr);
|
||||
}
|
||||
|
||||
/*
|
||||
* When the postfix SET_Q_MODE packet executes we need to make sure that the
|
||||
* next prefix SET_Q_MODE packet executes as well.
|
||||
*/
|
||||
if (!shadow_va) {
|
||||
uint64_t addr;
|
||||
|
||||
addr = amdgpu_bo_gpu_offset(ring->ring_obj);
|
||||
addr += offs << 2;
|
||||
amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
|
||||
amdgpu_ring_write(ring, WRITE_DATA_DST_SEL(5) | WR_CONFIRM);
|
||||
amdgpu_ring_write(ring, lower_32_bits(addr));
|
||||
amdgpu_ring_write(ring, upper_32_bits(addr));
|
||||
amdgpu_ring_write(ring, 0x1);
|
||||
}
|
||||
|
||||
amdgpu_ring_write(ring, PACKET3(PACKET3_SET_Q_PREEMPTION_MODE, 7));
|
||||
amdgpu_ring_write(ring, lower_32_bits(shadow_va));
|
||||
amdgpu_ring_write(ring, upper_32_bits(shadow_va));
|
||||
amdgpu_ring_write(ring, lower_32_bits(gds_va));
|
||||
amdgpu_ring_write(ring, upper_32_bits(gds_va));
|
||||
amdgpu_ring_write(ring, lower_32_bits(csa_va));
|
||||
amdgpu_ring_write(ring, upper_32_bits(csa_va));
|
||||
amdgpu_ring_write(ring, shadow_va ?
|
||||
PACKET3_SET_Q_PREEMPTION_MODE_IB_VMID(vmid) : 0);
|
||||
amdgpu_ring_write(ring, init_shadow ?
|
||||
PACKET3_SET_Q_PREEMPTION_MODE_INIT_SHADOW_MEM : 0);
|
||||
|
||||
if (ring->set_q_mode_offs)
|
||||
amdgpu_ring_patch_cond_exec(ring, end);
|
||||
|
||||
if (shadow_va) {
|
||||
uint64_t token = shadow_va ^ csa_va ^ gds_va ^ vmid;
|
||||
|
||||
/*
|
||||
* If the tokens match try to skip the last postfix SET_Q_MODE
|
||||
* packet to avoid saving/restoring the state all the time.
|
||||
*/
|
||||
if (ring->set_q_mode_ptr && ring->set_q_mode_token == token)
|
||||
*ring->set_q_mode_ptr = 0;
|
||||
|
||||
ring->set_q_mode_token = token;
|
||||
} else {
|
||||
ring->set_q_mode_ptr = &ring->ring[ring->set_q_mode_offs];
|
||||
}
|
||||
|
||||
ring->set_q_mode_offs = offs;
|
||||
}
|
||||
|
||||
static int gfx_v11_0_ring_preempt_ib(struct amdgpu_ring *ring)
|
||||
{
|
||||
int i, r = 0;
|
||||
@ -6114,7 +6187,7 @@ static const struct amdgpu_ring_funcs gfx_v11_0_ring_funcs_gfx = {
|
||||
.emit_frame_size = /* totally 247 maximum if 16 IBs */
|
||||
5 + /* update_spm_vmid */
|
||||
5 + /* COND_EXEC */
|
||||
9 + /* SET_Q_PREEMPTION_MODE */
|
||||
22 + /* SET_Q_PREEMPTION_MODE */
|
||||
7 + /* PIPELINE_SYNC */
|
||||
SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
|
||||
SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
|
||||
@ -6127,6 +6200,7 @@ static const struct amdgpu_ring_funcs gfx_v11_0_ring_funcs_gfx = {
|
||||
31 + /* DE_META */
|
||||
3 + /* CNTX_CTRL */
|
||||
5 + /* HDP_INVL */
|
||||
22 + /* SET_Q_PREEMPTION_MODE */
|
||||
8 + 8 + /* FENCE x2 */
|
||||
8, /* gfx_v11_0_emit_mem_sync */
|
||||
.emit_ib_size = 4, /* gfx_v11_0_ring_emit_ib_gfx */
|
||||
|
Loading…
Reference in New Issue
Block a user