mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git
synced 2025-01-07 13:43:51 +00:00
Merge tag 'drm-intel-fixes-2023-08-03' of git://anongit.freedesktop.org/drm/drm-intel into drm-fixes
- Fix bug in getting msg length in AUX CH registers handler [gvt] (Yan Zhao) - Gen12 AUX invalidation fixes [gt] (Andi Shyti, Jonathan Cavitt) - Fix premature release of request's reusable memory (Janusz Krzysztofik) - Merge tag 'gvt-fixes-2023-08-02' of https://github.com/intel/gvt-linux into drm-intel-fixes (Tvrtko Ursulin) Signed-off-by: Dave Airlie <airlied@redhat.com> From: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/ZMtkxWGuUKpaRMmo@tursulin-desk
This commit is contained in:
commit
1958b0f95a
@ -165,14 +165,60 @@ static u32 preparser_disable(bool state)
|
||||
return MI_ARB_CHECK | 1 << 8 | state;
|
||||
}
|
||||
|
||||
u32 *gen12_emit_aux_table_inv(struct intel_gt *gt, u32 *cs, const i915_reg_t inv_reg)
|
||||
static i915_reg_t gen12_get_aux_inv_reg(struct intel_engine_cs *engine)
|
||||
{
|
||||
u32 gsi_offset = gt->uncore->gsi_offset;
|
||||
switch (engine->id) {
|
||||
case RCS0:
|
||||
return GEN12_CCS_AUX_INV;
|
||||
case BCS0:
|
||||
return GEN12_BCS0_AUX_INV;
|
||||
case VCS0:
|
||||
return GEN12_VD0_AUX_INV;
|
||||
case VCS2:
|
||||
return GEN12_VD2_AUX_INV;
|
||||
case VECS0:
|
||||
return GEN12_VE0_AUX_INV;
|
||||
case CCS0:
|
||||
return GEN12_CCS0_AUX_INV;
|
||||
default:
|
||||
return INVALID_MMIO_REG;
|
||||
}
|
||||
}
|
||||
|
||||
static bool gen12_needs_ccs_aux_inv(struct intel_engine_cs *engine)
|
||||
{
|
||||
i915_reg_t reg = gen12_get_aux_inv_reg(engine);
|
||||
|
||||
if (IS_PONTEVECCHIO(engine->i915))
|
||||
return false;
|
||||
|
||||
/*
|
||||
* So far platforms supported by i915 having flat ccs do not require
|
||||
* AUX invalidation. Check also whether the engine requires it.
|
||||
*/
|
||||
return i915_mmio_reg_valid(reg) && !HAS_FLAT_CCS(engine->i915);
|
||||
}
|
||||
|
||||
u32 *gen12_emit_aux_table_inv(struct intel_engine_cs *engine, u32 *cs)
|
||||
{
|
||||
i915_reg_t inv_reg = gen12_get_aux_inv_reg(engine);
|
||||
u32 gsi_offset = engine->gt->uncore->gsi_offset;
|
||||
|
||||
if (!gen12_needs_ccs_aux_inv(engine))
|
||||
return cs;
|
||||
|
||||
*cs++ = MI_LOAD_REGISTER_IMM(1) | MI_LRI_MMIO_REMAP_EN;
|
||||
*cs++ = i915_mmio_reg_offset(inv_reg) + gsi_offset;
|
||||
*cs++ = AUX_INV;
|
||||
*cs++ = MI_NOOP;
|
||||
|
||||
*cs++ = MI_SEMAPHORE_WAIT_TOKEN |
|
||||
MI_SEMAPHORE_REGISTER_POLL |
|
||||
MI_SEMAPHORE_POLL |
|
||||
MI_SEMAPHORE_SAD_EQ_SDD;
|
||||
*cs++ = 0;
|
||||
*cs++ = i915_mmio_reg_offset(inv_reg) + gsi_offset;
|
||||
*cs++ = 0;
|
||||
*cs++ = 0;
|
||||
|
||||
return cs;
|
||||
}
|
||||
@ -202,8 +248,13 @@ int gen12_emit_flush_rcs(struct i915_request *rq, u32 mode)
|
||||
{
|
||||
struct intel_engine_cs *engine = rq->engine;
|
||||
|
||||
if (mode & EMIT_FLUSH) {
|
||||
u32 flags = 0;
|
||||
/*
|
||||
* On Aux CCS platforms the invalidation of the Aux
|
||||
* table requires quiescing memory traffic beforehand
|
||||
*/
|
||||
if (mode & EMIT_FLUSH || gen12_needs_ccs_aux_inv(engine)) {
|
||||
u32 bit_group_0 = 0;
|
||||
u32 bit_group_1 = 0;
|
||||
int err;
|
||||
u32 *cs;
|
||||
|
||||
@ -211,32 +262,40 @@ int gen12_emit_flush_rcs(struct i915_request *rq, u32 mode)
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
flags |= PIPE_CONTROL_TILE_CACHE_FLUSH;
|
||||
flags |= PIPE_CONTROL_FLUSH_L3;
|
||||
flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
|
||||
flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
|
||||
bit_group_0 |= PIPE_CONTROL0_HDC_PIPELINE_FLUSH;
|
||||
|
||||
/*
|
||||
* When required, in MTL and beyond platforms we
|
||||
* need to set the CCS_FLUSH bit in the pipe control
|
||||
*/
|
||||
if (GRAPHICS_VER_FULL(rq->i915) >= IP_VER(12, 70))
|
||||
bit_group_0 |= PIPE_CONTROL_CCS_FLUSH;
|
||||
|
||||
bit_group_1 |= PIPE_CONTROL_TILE_CACHE_FLUSH;
|
||||
bit_group_1 |= PIPE_CONTROL_FLUSH_L3;
|
||||
bit_group_1 |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
|
||||
bit_group_1 |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
|
||||
/* Wa_1409600907:tgl,adl-p */
|
||||
flags |= PIPE_CONTROL_DEPTH_STALL;
|
||||
flags |= PIPE_CONTROL_DC_FLUSH_ENABLE;
|
||||
flags |= PIPE_CONTROL_FLUSH_ENABLE;
|
||||
bit_group_1 |= PIPE_CONTROL_DEPTH_STALL;
|
||||
bit_group_1 |= PIPE_CONTROL_DC_FLUSH_ENABLE;
|
||||
bit_group_1 |= PIPE_CONTROL_FLUSH_ENABLE;
|
||||
|
||||
flags |= PIPE_CONTROL_STORE_DATA_INDEX;
|
||||
flags |= PIPE_CONTROL_QW_WRITE;
|
||||
bit_group_1 |= PIPE_CONTROL_STORE_DATA_INDEX;
|
||||
bit_group_1 |= PIPE_CONTROL_QW_WRITE;
|
||||
|
||||
flags |= PIPE_CONTROL_CS_STALL;
|
||||
bit_group_1 |= PIPE_CONTROL_CS_STALL;
|
||||
|
||||
if (!HAS_3D_PIPELINE(engine->i915))
|
||||
flags &= ~PIPE_CONTROL_3D_ARCH_FLAGS;
|
||||
bit_group_1 &= ~PIPE_CONTROL_3D_ARCH_FLAGS;
|
||||
else if (engine->class == COMPUTE_CLASS)
|
||||
flags &= ~PIPE_CONTROL_3D_ENGINE_FLAGS;
|
||||
bit_group_1 &= ~PIPE_CONTROL_3D_ENGINE_FLAGS;
|
||||
|
||||
cs = intel_ring_begin(rq, 6);
|
||||
if (IS_ERR(cs))
|
||||
return PTR_ERR(cs);
|
||||
|
||||
cs = gen12_emit_pipe_control(cs,
|
||||
PIPE_CONTROL0_HDC_PIPELINE_FLUSH,
|
||||
flags, LRC_PPHWSP_SCRATCH_ADDR);
|
||||
cs = gen12_emit_pipe_control(cs, bit_group_0, bit_group_1,
|
||||
LRC_PPHWSP_SCRATCH_ADDR);
|
||||
intel_ring_advance(rq, cs);
|
||||
}
|
||||
|
||||
@ -267,10 +326,9 @@ int gen12_emit_flush_rcs(struct i915_request *rq, u32 mode)
|
||||
else if (engine->class == COMPUTE_CLASS)
|
||||
flags &= ~PIPE_CONTROL_3D_ENGINE_FLAGS;
|
||||
|
||||
if (!HAS_FLAT_CCS(rq->engine->i915))
|
||||
count = 8 + 4;
|
||||
else
|
||||
count = 8;
|
||||
count = 8;
|
||||
if (gen12_needs_ccs_aux_inv(rq->engine))
|
||||
count += 8;
|
||||
|
||||
cs = intel_ring_begin(rq, count);
|
||||
if (IS_ERR(cs))
|
||||
@ -285,11 +343,7 @@ int gen12_emit_flush_rcs(struct i915_request *rq, u32 mode)
|
||||
|
||||
cs = gen8_emit_pipe_control(cs, flags, LRC_PPHWSP_SCRATCH_ADDR);
|
||||
|
||||
if (!HAS_FLAT_CCS(rq->engine->i915)) {
|
||||
/* hsdes: 1809175790 */
|
||||
cs = gen12_emit_aux_table_inv(rq->engine->gt,
|
||||
cs, GEN12_GFX_CCS_AUX_NV);
|
||||
}
|
||||
cs = gen12_emit_aux_table_inv(engine, cs);
|
||||
|
||||
*cs++ = preparser_disable(false);
|
||||
intel_ring_advance(rq, cs);
|
||||
@ -300,21 +354,14 @@ int gen12_emit_flush_rcs(struct i915_request *rq, u32 mode)
|
||||
|
||||
int gen12_emit_flush_xcs(struct i915_request *rq, u32 mode)
|
||||
{
|
||||
intel_engine_mask_t aux_inv = 0;
|
||||
u32 cmd, *cs;
|
||||
u32 cmd = 4;
|
||||
u32 *cs;
|
||||
|
||||
cmd = 4;
|
||||
if (mode & EMIT_INVALIDATE) {
|
||||
cmd += 2;
|
||||
|
||||
if (!HAS_FLAT_CCS(rq->engine->i915) &&
|
||||
(rq->engine->class == VIDEO_DECODE_CLASS ||
|
||||
rq->engine->class == VIDEO_ENHANCEMENT_CLASS)) {
|
||||
aux_inv = rq->engine->mask &
|
||||
~GENMASK(_BCS(I915_MAX_BCS - 1), BCS0);
|
||||
if (aux_inv)
|
||||
cmd += 4;
|
||||
}
|
||||
if (gen12_needs_ccs_aux_inv(rq->engine))
|
||||
cmd += 8;
|
||||
}
|
||||
|
||||
cs = intel_ring_begin(rq, cmd);
|
||||
@ -338,6 +385,10 @@ int gen12_emit_flush_xcs(struct i915_request *rq, u32 mode)
|
||||
cmd |= MI_INVALIDATE_TLB;
|
||||
if (rq->engine->class == VIDEO_DECODE_CLASS)
|
||||
cmd |= MI_INVALIDATE_BSD;
|
||||
|
||||
if (gen12_needs_ccs_aux_inv(rq->engine) &&
|
||||
rq->engine->class == COPY_ENGINE_CLASS)
|
||||
cmd |= MI_FLUSH_DW_CCS;
|
||||
}
|
||||
|
||||
*cs++ = cmd;
|
||||
@ -345,14 +396,7 @@ int gen12_emit_flush_xcs(struct i915_request *rq, u32 mode)
|
||||
*cs++ = 0; /* upper addr */
|
||||
*cs++ = 0; /* value */
|
||||
|
||||
if (aux_inv) { /* hsdes: 1809175790 */
|
||||
if (rq->engine->class == VIDEO_DECODE_CLASS)
|
||||
cs = gen12_emit_aux_table_inv(rq->engine->gt,
|
||||
cs, GEN12_VD0_AUX_NV);
|
||||
else
|
||||
cs = gen12_emit_aux_table_inv(rq->engine->gt,
|
||||
cs, GEN12_VE0_AUX_NV);
|
||||
}
|
||||
cs = gen12_emit_aux_table_inv(rq->engine, cs);
|
||||
|
||||
if (mode & EMIT_INVALIDATE)
|
||||
*cs++ = preparser_disable(false);
|
||||
|
@ -13,6 +13,7 @@
|
||||
#include "intel_gt_regs.h"
|
||||
#include "intel_gpu_commands.h"
|
||||
|
||||
struct intel_engine_cs;
|
||||
struct intel_gt;
|
||||
struct i915_request;
|
||||
|
||||
@ -46,28 +47,32 @@ u32 *gen8_emit_fini_breadcrumb_rcs(struct i915_request *rq, u32 *cs);
|
||||
u32 *gen11_emit_fini_breadcrumb_rcs(struct i915_request *rq, u32 *cs);
|
||||
u32 *gen12_emit_fini_breadcrumb_rcs(struct i915_request *rq, u32 *cs);
|
||||
|
||||
u32 *gen12_emit_aux_table_inv(struct intel_gt *gt, u32 *cs, const i915_reg_t inv_reg);
|
||||
u32 *gen12_emit_aux_table_inv(struct intel_engine_cs *engine, u32 *cs);
|
||||
|
||||
static inline u32 *
|
||||
__gen8_emit_pipe_control(u32 *batch, u32 flags0, u32 flags1, u32 offset)
|
||||
__gen8_emit_pipe_control(u32 *batch, u32 bit_group_0,
|
||||
u32 bit_group_1, u32 offset)
|
||||
{
|
||||
memset(batch, 0, 6 * sizeof(u32));
|
||||
|
||||
batch[0] = GFX_OP_PIPE_CONTROL(6) | flags0;
|
||||
batch[1] = flags1;
|
||||
batch[0] = GFX_OP_PIPE_CONTROL(6) | bit_group_0;
|
||||
batch[1] = bit_group_1;
|
||||
batch[2] = offset;
|
||||
|
||||
return batch + 6;
|
||||
}
|
||||
|
||||
static inline u32 *gen8_emit_pipe_control(u32 *batch, u32 flags, u32 offset)
|
||||
static inline u32 *gen8_emit_pipe_control(u32 *batch,
|
||||
u32 bit_group_1, u32 offset)
|
||||
{
|
||||
return __gen8_emit_pipe_control(batch, 0, flags, offset);
|
||||
return __gen8_emit_pipe_control(batch, 0, bit_group_1, offset);
|
||||
}
|
||||
|
||||
static inline u32 *gen12_emit_pipe_control(u32 *batch, u32 flags0, u32 flags1, u32 offset)
|
||||
static inline u32 *gen12_emit_pipe_control(u32 *batch, u32 bit_group_0,
|
||||
u32 bit_group_1, u32 offset)
|
||||
{
|
||||
return __gen8_emit_pipe_control(batch, flags0, flags1, offset);
|
||||
return __gen8_emit_pipe_control(batch, bit_group_0,
|
||||
bit_group_1, offset);
|
||||
}
|
||||
|
||||
static inline u32 *
|
||||
|
@ -121,6 +121,7 @@
|
||||
#define MI_SEMAPHORE_TARGET(engine) ((engine)<<15)
|
||||
#define MI_SEMAPHORE_WAIT MI_INSTR(0x1c, 2) /* GEN8+ */
|
||||
#define MI_SEMAPHORE_WAIT_TOKEN MI_INSTR(0x1c, 3) /* GEN12+ */
|
||||
#define MI_SEMAPHORE_REGISTER_POLL (1 << 16)
|
||||
#define MI_SEMAPHORE_POLL (1 << 15)
|
||||
#define MI_SEMAPHORE_SAD_GT_SDD (0 << 12)
|
||||
#define MI_SEMAPHORE_SAD_GTE_SDD (1 << 12)
|
||||
@ -299,6 +300,7 @@
|
||||
#define PIPE_CONTROL_QW_WRITE (1<<14)
|
||||
#define PIPE_CONTROL_POST_SYNC_OP_MASK (3<<14)
|
||||
#define PIPE_CONTROL_DEPTH_STALL (1<<13)
|
||||
#define PIPE_CONTROL_CCS_FLUSH (1<<13) /* MTL+ */
|
||||
#define PIPE_CONTROL_WRITE_FLUSH (1<<12)
|
||||
#define PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH (1<<12) /* gen6+ */
|
||||
#define PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE (1<<11) /* MBZ on ILK */
|
||||
|
@ -332,9 +332,11 @@
|
||||
#define GEN8_PRIVATE_PAT_HI _MMIO(0x40e0 + 4)
|
||||
#define GEN10_PAT_INDEX(index) _MMIO(0x40e0 + (index) * 4)
|
||||
#define BSD_HWS_PGA_GEN7 _MMIO(0x4180)
|
||||
#define GEN12_GFX_CCS_AUX_NV _MMIO(0x4208)
|
||||
#define GEN12_VD0_AUX_NV _MMIO(0x4218)
|
||||
#define GEN12_VD1_AUX_NV _MMIO(0x4228)
|
||||
|
||||
#define GEN12_CCS_AUX_INV _MMIO(0x4208)
|
||||
#define GEN12_VD0_AUX_INV _MMIO(0x4218)
|
||||
#define GEN12_VE0_AUX_INV _MMIO(0x4238)
|
||||
#define GEN12_BCS0_AUX_INV _MMIO(0x4248)
|
||||
|
||||
#define GEN8_RTCR _MMIO(0x4260)
|
||||
#define GEN8_M1TCR _MMIO(0x4264)
|
||||
@ -342,14 +344,12 @@
|
||||
#define GEN8_BTCR _MMIO(0x426c)
|
||||
#define GEN8_VTCR _MMIO(0x4270)
|
||||
|
||||
#define GEN12_VD2_AUX_NV _MMIO(0x4298)
|
||||
#define GEN12_VD3_AUX_NV _MMIO(0x42a8)
|
||||
#define GEN12_VE0_AUX_NV _MMIO(0x4238)
|
||||
|
||||
#define BLT_HWS_PGA_GEN7 _MMIO(0x4280)
|
||||
|
||||
#define GEN12_VE1_AUX_NV _MMIO(0x42b8)
|
||||
#define GEN12_VD2_AUX_INV _MMIO(0x4298)
|
||||
#define GEN12_CCS0_AUX_INV _MMIO(0x42c8)
|
||||
#define AUX_INV REG_BIT(0)
|
||||
|
||||
#define VEBOX_HWS_PGA_GEN7 _MMIO(0x4380)
|
||||
|
||||
#define GEN12_AUX_ERR_DBG _MMIO(0x43f4)
|
||||
|
@ -1364,10 +1364,7 @@ gen12_emit_indirect_ctx_rcs(const struct intel_context *ce, u32 *cs)
|
||||
IS_DG2_G11(ce->engine->i915))
|
||||
cs = gen8_emit_pipe_control(cs, PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE, 0);
|
||||
|
||||
/* hsdes: 1809175790 */
|
||||
if (!HAS_FLAT_CCS(ce->engine->i915))
|
||||
cs = gen12_emit_aux_table_inv(ce->engine->gt,
|
||||
cs, GEN12_GFX_CCS_AUX_NV);
|
||||
cs = gen12_emit_aux_table_inv(ce->engine, cs);
|
||||
|
||||
/* Wa_16014892111 */
|
||||
if (IS_MTL_GRAPHICS_STEP(ce->engine->i915, M, STEP_A0, STEP_B0) ||
|
||||
@ -1392,17 +1389,7 @@ gen12_emit_indirect_ctx_xcs(const struct intel_context *ce, u32 *cs)
|
||||
PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE,
|
||||
0);
|
||||
|
||||
/* hsdes: 1809175790 */
|
||||
if (!HAS_FLAT_CCS(ce->engine->i915)) {
|
||||
if (ce->engine->class == VIDEO_DECODE_CLASS)
|
||||
cs = gen12_emit_aux_table_inv(ce->engine->gt,
|
||||
cs, GEN12_VD0_AUX_NV);
|
||||
else if (ce->engine->class == VIDEO_ENHANCEMENT_CLASS)
|
||||
cs = gen12_emit_aux_table_inv(ce->engine->gt,
|
||||
cs, GEN12_VE0_AUX_NV);
|
||||
}
|
||||
|
||||
return cs;
|
||||
return gen12_emit_aux_table_inv(ce->engine, cs);
|
||||
}
|
||||
|
||||
static void
|
||||
|
@ -491,7 +491,7 @@ void intel_gvt_i2c_handle_aux_ch_write(struct intel_vgpu *vgpu,
|
||||
return;
|
||||
}
|
||||
|
||||
msg_length = REG_FIELD_GET(DP_AUX_CH_CTL_MESSAGE_SIZE_MASK, reg);
|
||||
msg_length = REG_FIELD_GET(DP_AUX_CH_CTL_MESSAGE_SIZE_MASK, value);
|
||||
|
||||
// check the msg in DATA register.
|
||||
msg = vgpu_vreg(vgpu, offset + 4);
|
||||
|
@ -449,8 +449,11 @@ int i915_active_add_request(struct i915_active *ref, struct i915_request *rq)
|
||||
}
|
||||
} while (unlikely(is_barrier(active)));
|
||||
|
||||
if (!__i915_active_fence_set(active, fence))
|
||||
fence = __i915_active_fence_set(active, fence);
|
||||
if (!fence)
|
||||
__i915_active_acquire(ref);
|
||||
else
|
||||
dma_fence_put(fence);
|
||||
|
||||
out:
|
||||
i915_active_release(ref);
|
||||
@ -469,13 +472,9 @@ __i915_active_set_fence(struct i915_active *ref,
|
||||
return NULL;
|
||||
}
|
||||
|
||||
rcu_read_lock();
|
||||
prev = __i915_active_fence_set(active, fence);
|
||||
if (prev)
|
||||
prev = dma_fence_get_rcu(prev);
|
||||
else
|
||||
if (!prev)
|
||||
__i915_active_acquire(ref);
|
||||
rcu_read_unlock();
|
||||
|
||||
return prev;
|
||||
}
|
||||
@ -1019,10 +1018,11 @@ void i915_request_add_active_barriers(struct i915_request *rq)
|
||||
*
|
||||
* Records the new @fence as the last active fence along its timeline in
|
||||
* this active tracker, moving the tracking callbacks from the previous
|
||||
* fence onto this one. Returns the previous fence (if not already completed),
|
||||
* which the caller must ensure is executed before the new fence. To ensure
|
||||
* that the order of fences within the timeline of the i915_active_fence is
|
||||
* understood, it should be locked by the caller.
|
||||
* fence onto this one. Gets and returns a reference to the previous fence
|
||||
* (if not already completed), which the caller must put after making sure
|
||||
* that it is executed before the new fence. To ensure that the order of
|
||||
* fences within the timeline of the i915_active_fence is understood, it
|
||||
* should be locked by the caller.
|
||||
*/
|
||||
struct dma_fence *
|
||||
__i915_active_fence_set(struct i915_active_fence *active,
|
||||
@ -1031,7 +1031,23 @@ __i915_active_fence_set(struct i915_active_fence *active,
|
||||
struct dma_fence *prev;
|
||||
unsigned long flags;
|
||||
|
||||
if (fence == rcu_access_pointer(active->fence))
|
||||
/*
|
||||
* In case of fences embedded in i915_requests, their memory is
|
||||
* SLAB_FAILSAFE_BY_RCU, then it can be reused right after release
|
||||
* by new requests. Then, there is a risk of passing back a pointer
|
||||
* to a new, completely unrelated fence that reuses the same memory
|
||||
* while tracked under a different active tracker. Combined with i915
|
||||
* perf open/close operations that build await dependencies between
|
||||
* engine kernel context requests and user requests from different
|
||||
* timelines, this can lead to dependency loops and infinite waits.
|
||||
*
|
||||
* As a countermeasure, we try to get a reference to the active->fence
|
||||
* first, so if we succeed and pass it back to our user then it is not
|
||||
* released and potentially reused by an unrelated request before the
|
||||
* user has a chance to set up an await dependency on it.
|
||||
*/
|
||||
prev = i915_active_fence_get(active);
|
||||
if (fence == prev)
|
||||
return fence;
|
||||
|
||||
GEM_BUG_ON(test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags));
|
||||
@ -1040,27 +1056,56 @@ __i915_active_fence_set(struct i915_active_fence *active,
|
||||
* Consider that we have two threads arriving (A and B), with
|
||||
* C already resident as the active->fence.
|
||||
*
|
||||
* A does the xchg first, and so it sees C or NULL depending
|
||||
* on the timing of the interrupt handler. If it is NULL, the
|
||||
* previous fence must have been signaled and we know that
|
||||
* we are first on the timeline. If it is still present,
|
||||
* we acquire the lock on that fence and serialise with the interrupt
|
||||
* handler, in the process removing it from any future interrupt
|
||||
* callback. A will then wait on C before executing (if present).
|
||||
*
|
||||
* As B is second, it sees A as the previous fence and so waits for
|
||||
* it to complete its transition and takes over the occupancy for
|
||||
* itself -- remembering that it needs to wait on A before executing.
|
||||
* Both A and B have got a reference to C or NULL, depending on the
|
||||
* timing of the interrupt handler. Let's assume that if A has got C
|
||||
* then it has locked C first (before B).
|
||||
*
|
||||
* Note the strong ordering of the timeline also provides consistent
|
||||
* nesting rules for the fence->lock; the inner lock is always the
|
||||
* older lock.
|
||||
*/
|
||||
spin_lock_irqsave(fence->lock, flags);
|
||||
prev = xchg(__active_fence_slot(active), fence);
|
||||
if (prev) {
|
||||
GEM_BUG_ON(prev == fence);
|
||||
if (prev)
|
||||
spin_lock_nested(prev->lock, SINGLE_DEPTH_NESTING);
|
||||
|
||||
/*
|
||||
* A does the cmpxchg first, and so it sees C or NULL, as before, or
|
||||
* something else, depending on the timing of other threads and/or
|
||||
* interrupt handler. If not the same as before then A unlocks C if
|
||||
* applicable and retries, starting from an attempt to get a new
|
||||
* active->fence. Meanwhile, B follows the same path as A.
|
||||
* Once A succeeds with cmpxch, B fails again, retires, gets A from
|
||||
* active->fence, locks it as soon as A completes, and possibly
|
||||
* succeeds with cmpxchg.
|
||||
*/
|
||||
while (cmpxchg(__active_fence_slot(active), prev, fence) != prev) {
|
||||
if (prev) {
|
||||
spin_unlock(prev->lock);
|
||||
dma_fence_put(prev);
|
||||
}
|
||||
spin_unlock_irqrestore(fence->lock, flags);
|
||||
|
||||
prev = i915_active_fence_get(active);
|
||||
GEM_BUG_ON(prev == fence);
|
||||
|
||||
spin_lock_irqsave(fence->lock, flags);
|
||||
if (prev)
|
||||
spin_lock_nested(prev->lock, SINGLE_DEPTH_NESTING);
|
||||
}
|
||||
|
||||
/*
|
||||
* If prev is NULL then the previous fence must have been signaled
|
||||
* and we know that we are first on the timeline. If it is still
|
||||
* present then, having the lock on that fence already acquired, we
|
||||
* serialise with the interrupt handler, in the process of removing it
|
||||
* from any future interrupt callback. A will then wait on C before
|
||||
* executing (if present).
|
||||
*
|
||||
* As B is second, it sees A as the previous fence and so waits for
|
||||
* it to complete its transition and takes over the occupancy for
|
||||
* itself -- remembering that it needs to wait on A before executing.
|
||||
*/
|
||||
if (prev) {
|
||||
__list_del_entry(&active->cb.node);
|
||||
spin_unlock(prev->lock); /* serialise with prev->cb_list */
|
||||
}
|
||||
@ -1077,11 +1122,7 @@ int i915_active_fence_set(struct i915_active_fence *active,
|
||||
int err = 0;
|
||||
|
||||
/* Must maintain timeline ordering wrt previous active requests */
|
||||
rcu_read_lock();
|
||||
fence = __i915_active_fence_set(active, &rq->fence);
|
||||
if (fence) /* but the previous fence may not belong to that timeline! */
|
||||
fence = dma_fence_get_rcu(fence);
|
||||
rcu_read_unlock();
|
||||
if (fence) {
|
||||
err = i915_request_await_dma_fence(rq, fence);
|
||||
dma_fence_put(fence);
|
||||
|
@ -1661,6 +1661,11 @@ __i915_request_ensure_parallel_ordering(struct i915_request *rq,
|
||||
|
||||
request_to_parent(rq)->parallel.last_rq = i915_request_get(rq);
|
||||
|
||||
/*
|
||||
* Users have to put a reference potentially got by
|
||||
* __i915_active_fence_set() to the returned request
|
||||
* when no longer needed
|
||||
*/
|
||||
return to_request(__i915_active_fence_set(&timeline->last_request,
|
||||
&rq->fence));
|
||||
}
|
||||
@ -1707,6 +1712,10 @@ __i915_request_ensure_ordering(struct i915_request *rq,
|
||||
0);
|
||||
}
|
||||
|
||||
/*
|
||||
* Users have to put the reference to prev potentially got
|
||||
* by __i915_active_fence_set() when no longer needed
|
||||
*/
|
||||
return prev;
|
||||
}
|
||||
|
||||
@ -1760,6 +1769,8 @@ __i915_request_add_to_timeline(struct i915_request *rq)
|
||||
prev = __i915_request_ensure_ordering(rq, timeline);
|
||||
else
|
||||
prev = __i915_request_ensure_parallel_ordering(rq, timeline);
|
||||
if (prev)
|
||||
i915_request_put(prev);
|
||||
|
||||
/*
|
||||
* Make sure that no request gazumped us - if it was allocated after
|
||||
|
Loading…
Reference in New Issue
Block a user