Skip to content

Commit 8ce7099

Browse files
icklerodrigovivi
authored andcommitted
drm/i915/gt: Use the local HWSP offset during submission
We wrap the timeline on construction of the next request, but there may still be requests in flight that have not yet finalized the breadcrumb. (The breadcrumb is delayed as we need engine-local offsets, and for the virtual engine that is not known until execution.) As such, by the time we write to the timeline's HWSP offset it may have changed, and we should use the value we preserved in the request instead. Though the window is small and infrequent (at full flow we can expect a timeline's seqno to wrap once every 30 minutes), the impact of writing the old seqno into the new HWSP is severe: the old requests are never completed, and the new requests are completed before they are even submitted. Fixes: ebece75 ("drm/i915: Keep timeline HWSP allocated until idle across the system") Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> Cc: <stable@vger.kernel.org> # v5.2+ Reviewed-by: Mika Kuoppala <mika.kuoppala@linux.intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20201022064127.10159-1-chris@chris-wilson.co.uk (cherry picked from commit c10f6019d0b2dc8a6a62b55459f3ada5bc4e5e1a) Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
1 parent 59dd13a commit 8ce7099

3 files changed

Lines changed: 31 additions & 16 deletions

File tree

drivers/gpu/drm/i915/gt/intel_lrc.c

Lines changed: 19 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -3547,6 +3547,19 @@ static const struct intel_context_ops execlists_context_ops = {
35473547
.destroy = execlists_context_destroy,
35483548
};
35493549

3550+
static u32 hwsp_offset(const struct i915_request *rq)
3551+
{
3552+
const struct intel_timeline_cacheline *cl;
3553+
3554+
/* Before the request is executed, the timeline/cachline is fixed */
3555+
3556+
cl = rcu_dereference_protected(rq->hwsp_cacheline, 1);
3557+
if (cl)
3558+
return cl->ggtt_offset;
3559+
3560+
return rcu_dereference_protected(rq->timeline, 1)->hwsp_offset;
3561+
}
3562+
35503563
static int gen8_emit_init_breadcrumb(struct i915_request *rq)
35513564
{
35523565
u32 *cs;
@@ -3569,7 +3582,7 @@ static int gen8_emit_init_breadcrumb(struct i915_request *rq)
35693582
*cs++ = MI_NOOP;
35703583

35713584
*cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
3572-
*cs++ = i915_request_timeline(rq)->hwsp_offset;
3585+
*cs++ = hwsp_offset(rq);
35733586
*cs++ = 0;
35743587
*cs++ = rq->fence.seqno - 1;
35753588

@@ -4886,11 +4899,9 @@ gen8_emit_fini_breadcrumb_tail(struct i915_request *request, u32 *cs)
48864899
return gen8_emit_wa_tail(request, cs);
48874900
}
48884901

4889-
static u32 *emit_xcs_breadcrumb(struct i915_request *request, u32 *cs)
4902+
static u32 *emit_xcs_breadcrumb(struct i915_request *rq, u32 *cs)
48904903
{
4891-
u32 addr = i915_request_active_timeline(request)->hwsp_offset;
4892-
4893-
return gen8_emit_ggtt_write(cs, request->fence.seqno, addr, 0);
4904+
return gen8_emit_ggtt_write(cs, rq->fence.seqno, hwsp_offset(rq), 0);
48944905
}
48954906

48964907
static u32 *gen8_emit_fini_breadcrumb(struct i915_request *rq, u32 *cs)
@@ -4909,7 +4920,7 @@ static u32 *gen8_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs)
49094920
/* XXX flush+write+CS_STALL all in one upsets gem_concurrent_blt:kbl */
49104921
cs = gen8_emit_ggtt_write_rcs(cs,
49114922
request->fence.seqno,
4912-
i915_request_active_timeline(request)->hwsp_offset,
4923+
hwsp_offset(request),
49134924
PIPE_CONTROL_FLUSH_ENABLE |
49144925
PIPE_CONTROL_CS_STALL);
49154926

@@ -4921,7 +4932,7 @@ gen11_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs)
49214932
{
49224933
cs = gen8_emit_ggtt_write_rcs(cs,
49234934
request->fence.seqno,
4924-
i915_request_active_timeline(request)->hwsp_offset,
4935+
hwsp_offset(request),
49254936
PIPE_CONTROL_CS_STALL |
49264937
PIPE_CONTROL_TILE_CACHE_FLUSH |
49274938
PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH |
@@ -4991,7 +5002,7 @@ gen12_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs)
49915002
{
49925003
cs = gen12_emit_ggtt_write_rcs(cs,
49935004
request->fence.seqno,
4994-
i915_request_active_timeline(request)->hwsp_offset,
5005+
hwsp_offset(request),
49955006
PIPE_CONTROL0_HDC_PIPELINE_FLUSH,
49965007
PIPE_CONTROL_CS_STALL |
49975008
PIPE_CONTROL_TILE_CACHE_FLUSH |

drivers/gpu/drm/i915/gt/intel_timeline.c

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -188,10 +188,14 @@ cacheline_alloc(struct intel_timeline_hwsp *hwsp, unsigned int cacheline)
188188
return cl;
189189
}
190190

191-
static void cacheline_acquire(struct intel_timeline_cacheline *cl)
191+
static void cacheline_acquire(struct intel_timeline_cacheline *cl,
192+
u32 ggtt_offset)
192193
{
193-
if (cl)
194-
i915_active_acquire(&cl->active);
194+
if (!cl)
195+
return;
196+
197+
cl->ggtt_offset = ggtt_offset;
198+
i915_active_acquire(&cl->active);
195199
}
196200

197201
static void cacheline_release(struct intel_timeline_cacheline *cl)
@@ -340,7 +344,7 @@ int intel_timeline_pin(struct intel_timeline *tl, struct i915_gem_ww_ctx *ww)
340344
GT_TRACE(tl->gt, "timeline:%llx using HWSP offset:%x\n",
341345
tl->fence_context, tl->hwsp_offset);
342346

343-
cacheline_acquire(tl->hwsp_cacheline);
347+
cacheline_acquire(tl->hwsp_cacheline, tl->hwsp_offset);
344348
if (atomic_fetch_inc(&tl->pin_count)) {
345349
cacheline_release(tl->hwsp_cacheline);
346350
__i915_vma_unpin(tl->hwsp_ggtt);
@@ -515,7 +519,7 @@ __intel_timeline_get_seqno(struct intel_timeline *tl,
515519
GT_TRACE(tl->gt, "timeline:%llx using HWSP offset:%x\n",
516520
tl->fence_context, tl->hwsp_offset);
517521

518-
cacheline_acquire(cl);
522+
cacheline_acquire(cl, tl->hwsp_offset);
519523
tl->hwsp_cacheline = cl;
520524

521525
*seqno = timeline_advance(tl);
@@ -573,9 +577,7 @@ int intel_timeline_read_hwsp(struct i915_request *from,
573577
if (err)
574578
goto out;
575579

576-
*hwsp = i915_ggtt_offset(cl->hwsp->vma) +
577-
ptr_unmask_bits(cl->vaddr, CACHELINE_BITS) * CACHELINE_BYTES;
578-
580+
*hwsp = cl->ggtt_offset;
579581
out:
580582
i915_active_release(&cl->active);
581583
return err;

drivers/gpu/drm/i915/gt/intel_timeline_types.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,8 @@ struct intel_timeline_cacheline {
9494
struct intel_timeline_hwsp *hwsp;
9595
void *vaddr;
9696

97+
u32 ggtt_offset;
98+
9799
struct rcu_head rcu;
98100
};
99101

0 commit comments

Comments
 (0)