Skip to content

Commit 866bc2d

Browse files
committed
Merge tag 'drm-intel-fixes-2020-11-05' of git://anongit.freedesktop.org/drm/drm-intel into drm-fixes
- GVT fixes including vGPU suspend/resume fixes and workaround for APL guest GPU hang. - Fix set domain's cache coherency (Chris) - Fixes around breadcrumbs (Chris) - Fix encoder lookup during PSR atomic (Imre) - Hold onto an explicit ref to i915_vma_work.pinned (Chris) Signed-off-by: Dave Airlie <airlied@redhat.com> From: Rodrigo Vivi <rodrigo.vivi@intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20201105173026.GA858446@intel.com
2 parents 53aa37f + 537457a commit 866bc2d

9 files changed

Lines changed: 139 additions & 65 deletions

File tree

drivers/gpu/drm/i915/display/intel_psr.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1754,7 +1754,7 @@ void intel_psr_atomic_check(struct drm_connector *connector,
17541754
return;
17551755

17561756
intel_connector = to_intel_connector(connector);
1757-
dig_port = enc_to_dig_port(intel_attached_encoder(intel_connector));
1757+
dig_port = enc_to_dig_port(to_intel_encoder(new_state->best_encoder));
17581758
if (dev_priv->psr.dp != &dig_port->dp)
17591759
return;
17601760

drivers/gpu/drm/i915/gem/i915_gem_domain.c

Lines changed: 13 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -508,21 +508,6 @@ i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
508508
if (!obj)
509509
return -ENOENT;
510510

511-
/*
512-
* Already in the desired write domain? Nothing for us to do!
513-
*
514-
* We apply a little bit of cunning here to catch a broader set of
515-
* no-ops. If obj->write_domain is set, we must be in the same
516-
* obj->read_domains, and only that domain. Therefore, if that
517-
* obj->write_domain matches the request read_domains, we are
518-
* already in the same read/write domain and can skip the operation,
519-
* without having to further check the requested write_domain.
520-
*/
521-
if (READ_ONCE(obj->write_domain) == read_domains) {
522-
err = 0;
523-
goto out;
524-
}
525-
526511
/*
527512
* Try to flush the object off the GPU without holding the lock.
528513
* We will repeat the flush holding the lock in the normal manner
@@ -560,6 +545,19 @@ i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
560545
if (err)
561546
goto out;
562547

548+
/*
549+
* Already in the desired write domain? Nothing for us to do!
550+
*
551+
* We apply a little bit of cunning here to catch a broader set of
552+
* no-ops. If obj->write_domain is set, we must be in the same
553+
* obj->read_domains, and only that domain. Therefore, if that
554+
* obj->write_domain matches the request read_domains, we are
555+
* already in the same read/write domain and can skip the operation,
556+
* without having to further check the requested write_domain.
557+
*/
558+
if (READ_ONCE(obj->write_domain) == read_domains)
559+
goto out_unpin;
560+
563561
err = i915_gem_object_lock_interruptible(obj, NULL);
564562
if (err)
565563
goto out_unpin;

drivers/gpu/drm/i915/gt/intel_engine.h

Lines changed: 35 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -245,36 +245,53 @@ static inline u32 *gen12_emit_pipe_control(u32 *batch, u32 flags0, u32 flags1, u
245245
}
246246

247247
static inline u32 *
248-
__gen8_emit_ggtt_write_rcs(u32 *cs, u32 value, u32 gtt_offset, u32 flags0, u32 flags1)
248+
__gen8_emit_write_rcs(u32 *cs, u32 value, u32 offset, u32 flags0, u32 flags1)
249249
{
250-
/* We're using qword write, offset should be aligned to 8 bytes. */
251-
GEM_BUG_ON(!IS_ALIGNED(gtt_offset, 8));
252-
253-
/* w/a for post sync ops following a GPGPU operation we
254-
* need a prior CS_STALL, which is emitted by the flush
255-
* following the batch.
256-
*/
257250
*cs++ = GFX_OP_PIPE_CONTROL(6) | flags0;
258-
*cs++ = flags1 | PIPE_CONTROL_QW_WRITE | PIPE_CONTROL_GLOBAL_GTT_IVB;
259-
*cs++ = gtt_offset;
251+
*cs++ = flags1 | PIPE_CONTROL_QW_WRITE;
252+
*cs++ = offset;
260253
*cs++ = 0;
261254
*cs++ = value;
262-
/* We're thrashing one dword of HWS. */
263-
*cs++ = 0;
255+
*cs++ = 0; /* We're thrashing one extra dword. */
264256

265257
return cs;
266258
}
267259

268260
static inline u32*
269261
gen8_emit_ggtt_write_rcs(u32 *cs, u32 value, u32 gtt_offset, u32 flags)
270262
{
271-
return __gen8_emit_ggtt_write_rcs(cs, value, gtt_offset, 0, flags);
263+
/* We're using qword write, offset should be aligned to 8 bytes. */
264+
GEM_BUG_ON(!IS_ALIGNED(gtt_offset, 8));
265+
266+
return __gen8_emit_write_rcs(cs,
267+
value,
268+
gtt_offset,
269+
0,
270+
flags | PIPE_CONTROL_GLOBAL_GTT_IVB);
272271
}
273272

274273
static inline u32*
275274
gen12_emit_ggtt_write_rcs(u32 *cs, u32 value, u32 gtt_offset, u32 flags0, u32 flags1)
276275
{
277-
return __gen8_emit_ggtt_write_rcs(cs, value, gtt_offset, flags0, flags1);
276+
/* We're using qword write, offset should be aligned to 8 bytes. */
277+
GEM_BUG_ON(!IS_ALIGNED(gtt_offset, 8));
278+
279+
return __gen8_emit_write_rcs(cs,
280+
value,
281+
gtt_offset,
282+
flags0,
283+
flags1 | PIPE_CONTROL_GLOBAL_GTT_IVB);
284+
}
285+
286+
static inline u32 *
287+
__gen8_emit_flush_dw(u32 *cs, u32 value, u32 gtt_offset, u32 flags)
288+
{
289+
*cs++ = (MI_FLUSH_DW + 1) | flags;
290+
*cs++ = gtt_offset;
291+
*cs++ = 0;
292+
*cs++ = value;
293+
294+
return cs;
278295
}
279296

280297
static inline u32 *
@@ -285,12 +302,10 @@ gen8_emit_ggtt_write(u32 *cs, u32 value, u32 gtt_offset, u32 flags)
285302
/* Offset should be aligned to 8 bytes for both (QW/DW) write types */
286303
GEM_BUG_ON(!IS_ALIGNED(gtt_offset, 8));
287304

288-
*cs++ = (MI_FLUSH_DW + 1) | MI_FLUSH_DW_OP_STOREDW | flags;
289-
*cs++ = gtt_offset | MI_FLUSH_DW_USE_GTT;
290-
*cs++ = 0;
291-
*cs++ = value;
292-
293-
return cs;
305+
return __gen8_emit_flush_dw(cs,
306+
value,
307+
gtt_offset | MI_FLUSH_DW_USE_GTT,
308+
flags | MI_FLUSH_DW_OP_STOREDW);
294309
}
295310

296311
static inline void __intel_engine_reset(struct intel_engine_cs *engine,

drivers/gpu/drm/i915/gt/intel_lrc.c

Lines changed: 22 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -3547,6 +3547,19 @@ static const struct intel_context_ops execlists_context_ops = {
35473547
.destroy = execlists_context_destroy,
35483548
};
35493549

3550+
static u32 hwsp_offset(const struct i915_request *rq)
3551+
{
3552+
const struct intel_timeline_cacheline *cl;
3553+
3554+
/* Before the request is executed, the timeline/cachline is fixed */
3555+
3556+
cl = rcu_dereference_protected(rq->hwsp_cacheline, 1);
3557+
if (cl)
3558+
return cl->ggtt_offset;
3559+
3560+
return rcu_dereference_protected(rq->timeline, 1)->hwsp_offset;
3561+
}
3562+
35503563
static int gen8_emit_init_breadcrumb(struct i915_request *rq)
35513564
{
35523565
u32 *cs;
@@ -3569,7 +3582,7 @@ static int gen8_emit_init_breadcrumb(struct i915_request *rq)
35693582
*cs++ = MI_NOOP;
35703583

35713584
*cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
3572-
*cs++ = i915_request_timeline(rq)->hwsp_offset;
3585+
*cs++ = hwsp_offset(rq);
35733586
*cs++ = 0;
35743587
*cs++ = rq->fence.seqno - 1;
35753588

@@ -4886,11 +4899,9 @@ gen8_emit_fini_breadcrumb_tail(struct i915_request *request, u32 *cs)
48864899
return gen8_emit_wa_tail(request, cs);
48874900
}
48884901

4889-
static u32 *emit_xcs_breadcrumb(struct i915_request *request, u32 *cs)
4902+
static u32 *emit_xcs_breadcrumb(struct i915_request *rq, u32 *cs)
48904903
{
4891-
u32 addr = i915_request_active_timeline(request)->hwsp_offset;
4892-
4893-
return gen8_emit_ggtt_write(cs, request->fence.seqno, addr, 0);
4904+
return gen8_emit_ggtt_write(cs, rq->fence.seqno, hwsp_offset(rq), 0);
48944905
}
48954906

48964907
static u32 *gen8_emit_fini_breadcrumb(struct i915_request *rq, u32 *cs)
@@ -4909,7 +4920,7 @@ static u32 *gen8_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs)
49094920
/* XXX flush+write+CS_STALL all in one upsets gem_concurrent_blt:kbl */
49104921
cs = gen8_emit_ggtt_write_rcs(cs,
49114922
request->fence.seqno,
4912-
i915_request_active_timeline(request)->hwsp_offset,
4923+
hwsp_offset(request),
49134924
PIPE_CONTROL_FLUSH_ENABLE |
49144925
PIPE_CONTROL_CS_STALL);
49154926

@@ -4921,7 +4932,7 @@ gen11_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs)
49214932
{
49224933
cs = gen8_emit_ggtt_write_rcs(cs,
49234934
request->fence.seqno,
4924-
i915_request_active_timeline(request)->hwsp_offset,
4935+
hwsp_offset(request),
49254936
PIPE_CONTROL_CS_STALL |
49264937
PIPE_CONTROL_TILE_CACHE_FLUSH |
49274938
PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH |
@@ -4983,15 +4994,17 @@ gen12_emit_fini_breadcrumb_tail(struct i915_request *request, u32 *cs)
49834994

49844995
static u32 *gen12_emit_fini_breadcrumb(struct i915_request *rq, u32 *cs)
49854996
{
4986-
return gen12_emit_fini_breadcrumb_tail(rq, emit_xcs_breadcrumb(rq, cs));
4997+
/* XXX Stalling flush before seqno write; post-sync not */
4998+
cs = emit_xcs_breadcrumb(rq, __gen8_emit_flush_dw(cs, 0, 0, 0));
4999+
return gen12_emit_fini_breadcrumb_tail(rq, cs);
49875000
}
49885001

49895002
static u32 *
49905003
gen12_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs)
49915004
{
49925005
cs = gen12_emit_ggtt_write_rcs(cs,
49935006
request->fence.seqno,
4994-
i915_request_active_timeline(request)->hwsp_offset,
5007+
hwsp_offset(request),
49955008
PIPE_CONTROL0_HDC_PIPELINE_FLUSH,
49965009
PIPE_CONTROL_CS_STALL |
49975010
PIPE_CONTROL_TILE_CACHE_FLUSH |

drivers/gpu/drm/i915/gt/intel_timeline.c

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -188,10 +188,14 @@ cacheline_alloc(struct intel_timeline_hwsp *hwsp, unsigned int cacheline)
188188
return cl;
189189
}
190190

191-
static void cacheline_acquire(struct intel_timeline_cacheline *cl)
191+
static void cacheline_acquire(struct intel_timeline_cacheline *cl,
192+
u32 ggtt_offset)
192193
{
193-
if (cl)
194-
i915_active_acquire(&cl->active);
194+
if (!cl)
195+
return;
196+
197+
cl->ggtt_offset = ggtt_offset;
198+
i915_active_acquire(&cl->active);
195199
}
196200

197201
static void cacheline_release(struct intel_timeline_cacheline *cl)
@@ -340,7 +344,7 @@ int intel_timeline_pin(struct intel_timeline *tl, struct i915_gem_ww_ctx *ww)
340344
GT_TRACE(tl->gt, "timeline:%llx using HWSP offset:%x\n",
341345
tl->fence_context, tl->hwsp_offset);
342346

343-
cacheline_acquire(tl->hwsp_cacheline);
347+
cacheline_acquire(tl->hwsp_cacheline, tl->hwsp_offset);
344348
if (atomic_fetch_inc(&tl->pin_count)) {
345349
cacheline_release(tl->hwsp_cacheline);
346350
__i915_vma_unpin(tl->hwsp_ggtt);
@@ -515,7 +519,7 @@ __intel_timeline_get_seqno(struct intel_timeline *tl,
515519
GT_TRACE(tl->gt, "timeline:%llx using HWSP offset:%x\n",
516520
tl->fence_context, tl->hwsp_offset);
517521

518-
cacheline_acquire(cl);
522+
cacheline_acquire(cl, tl->hwsp_offset);
519523
tl->hwsp_cacheline = cl;
520524

521525
*seqno = timeline_advance(tl);
@@ -573,9 +577,7 @@ int intel_timeline_read_hwsp(struct i915_request *from,
573577
if (err)
574578
goto out;
575579

576-
*hwsp = i915_ggtt_offset(cl->hwsp->vma) +
577-
ptr_unmask_bits(cl->vaddr, CACHELINE_BITS) * CACHELINE_BYTES;
578-
580+
*hwsp = cl->ggtt_offset;
579581
out:
580582
i915_active_release(&cl->active);
581583
return err;

drivers/gpu/drm/i915/gt/intel_timeline_types.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,8 @@ struct intel_timeline_cacheline {
9494
struct intel_timeline_hwsp *hwsp;
9595
void *vaddr;
9696

97+
u32 ggtt_offset;
98+
9799
struct rcu_head rcu;
98100
};
99101

drivers/gpu/drm/i915/gvt/handlers.c

Lines changed: 44 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1489,7 +1489,8 @@ static int hws_pga_write(struct intel_vgpu *vgpu, unsigned int offset,
14891489
const struct intel_engine_cs *engine =
14901490
intel_gvt_render_mmio_to_engine(vgpu->gvt, offset);
14911491

1492-
if (!intel_gvt_ggtt_validate_range(vgpu, value, I915_GTT_PAGE_SIZE)) {
1492+
if (value != 0 &&
1493+
!intel_gvt_ggtt_validate_range(vgpu, value, I915_GTT_PAGE_SIZE)) {
14931494
gvt_vgpu_err("write invalid HWSP address, reg:0x%x, value:0x%x\n",
14941495
offset, value);
14951496
return -EINVAL;
@@ -1650,6 +1651,34 @@ static int edp_psr_imr_iir_write(struct intel_vgpu *vgpu,
16501651
return 0;
16511652
}
16521653

1654+
/**
1655+
* FixMe:
1656+
* If guest fills non-priv batch buffer on ApolloLake/Broxton as Mesa i965 did:
1657+
* 717e7539124d (i965: Use a WC map and memcpy for the batch instead of pwrite.)
1658+
* Due to the missing flush of bb filled by VM vCPU, host GPU hangs on executing
1659+
* these MI_BATCH_BUFFER.
1660+
* Temporarily workaround this by setting SNOOP bit for PAT3 used by PPGTT
1661+
* PML4 PTE: PAT(0) PCD(1) PWT(1).
1662+
* The performance is still expected to be low, will need further improvement.
1663+
*/
1664+
static int bxt_ppat_low_write(struct intel_vgpu *vgpu, unsigned int offset,
1665+
void *p_data, unsigned int bytes)
1666+
{
1667+
u64 pat =
1668+
GEN8_PPAT(0, CHV_PPAT_SNOOP) |
1669+
GEN8_PPAT(1, 0) |
1670+
GEN8_PPAT(2, 0) |
1671+
GEN8_PPAT(3, CHV_PPAT_SNOOP) |
1672+
GEN8_PPAT(4, CHV_PPAT_SNOOP) |
1673+
GEN8_PPAT(5, CHV_PPAT_SNOOP) |
1674+
GEN8_PPAT(6, CHV_PPAT_SNOOP) |
1675+
GEN8_PPAT(7, CHV_PPAT_SNOOP);
1676+
1677+
vgpu_vreg(vgpu, offset) = lower_32_bits(pat);
1678+
1679+
return 0;
1680+
}
1681+
16531682
static int guc_status_read(struct intel_vgpu *vgpu,
16541683
unsigned int offset, void *p_data,
16551684
unsigned int bytes)
@@ -2812,7 +2841,7 @@ static int init_bdw_mmio_info(struct intel_gvt *gvt)
28122841

28132842
MMIO_DH(GEN6_PCODE_MAILBOX, D_BDW_PLUS, NULL, mailbox_write);
28142843

2815-
MMIO_D(GEN8_PRIVATE_PAT_LO, D_BDW_PLUS);
2844+
MMIO_D(GEN8_PRIVATE_PAT_LO, D_BDW_PLUS & ~D_BXT);
28162845
MMIO_D(GEN8_PRIVATE_PAT_HI, D_BDW_PLUS);
28172846

28182847
MMIO_D(GAMTARBMODE, D_BDW_PLUS);
@@ -3139,7 +3168,7 @@ static int init_skl_mmio_info(struct intel_gvt *gvt)
31393168
NULL, NULL);
31403169

31413170
MMIO_DFH(GAMT_CHKN_BIT_REG, D_KBL | D_CFL, F_CMD_ACCESS, NULL, NULL);
3142-
MMIO_D(GEN9_CTX_PREEMPT_REG, D_SKL_PLUS);
3171+
MMIO_D(GEN9_CTX_PREEMPT_REG, D_SKL_PLUS & ~D_BXT);
31433172

31443173
return 0;
31453174
}
@@ -3313,9 +3342,21 @@ static int init_bxt_mmio_info(struct intel_gvt *gvt)
33133342
MMIO_D(GEN8_PUSHBUS_SHIFT, D_BXT);
33143343
MMIO_D(GEN6_GFXPAUSE, D_BXT);
33153344
MMIO_DFH(GEN8_L3SQCREG1, D_BXT, F_CMD_ACCESS, NULL, NULL);
3345+
MMIO_DFH(GEN8_L3CNTLREG, D_BXT, F_CMD_ACCESS, NULL, NULL);
3346+
MMIO_DFH(_MMIO(0x20D8), D_BXT, F_CMD_ACCESS, NULL, NULL);
3347+
MMIO_F(GEN8_RING_CS_GPR(RENDER_RING_BASE, 0), 0x40, F_CMD_ACCESS,
3348+
0, 0, D_BXT, NULL, NULL);
3349+
MMIO_F(GEN8_RING_CS_GPR(GEN6_BSD_RING_BASE, 0), 0x40, F_CMD_ACCESS,
3350+
0, 0, D_BXT, NULL, NULL);
3351+
MMIO_F(GEN8_RING_CS_GPR(BLT_RING_BASE, 0), 0x40, F_CMD_ACCESS,
3352+
0, 0, D_BXT, NULL, NULL);
3353+
MMIO_F(GEN8_RING_CS_GPR(VEBOX_RING_BASE, 0), 0x40, F_CMD_ACCESS,
3354+
0, 0, D_BXT, NULL, NULL);
33163355

33173356
MMIO_DFH(GEN9_CTX_PREEMPT_REG, D_BXT, F_CMD_ACCESS, NULL, NULL);
33183357

3358+
MMIO_DH(GEN8_PRIVATE_PAT_LO, D_BXT, NULL, bxt_ppat_low_write);
3359+
33193360
return 0;
33203361
}
33213362

0 commit comments

Comments
 (0)