2 * Copyright © 2017 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
24 #include "genX_boilerplate.h"
25 #include "brw_defines.h"
26 #include "brw_state.h"
29 flags_to_post_sync_op(uint32_t flags
)
31 if (flags
& PIPE_CONTROL_WRITE_IMMEDIATE
)
32 return WriteImmediateData
;
34 if (flags
& PIPE_CONTROL_WRITE_DEPTH_COUNT
)
35 return WritePSDepthCount
;
37 if (flags
& PIPE_CONTROL_WRITE_TIMESTAMP
)
38 return WriteTimestamp
;
44 * Do the given flags have a Post Sync or LRI Post Sync operation?
46 static enum pipe_control_flags
47 get_post_sync_flags(enum pipe_control_flags flags
)
49 flags
&= PIPE_CONTROL_WRITE_IMMEDIATE
|
50 PIPE_CONTROL_WRITE_DEPTH_COUNT
|
51 PIPE_CONTROL_WRITE_TIMESTAMP
|
52 PIPE_CONTROL_LRI_POST_SYNC_OP
;
54 /* Only one "Post Sync Op" is allowed, and it's mutually exclusive with
55 * "LRI Post Sync Operation". So more than one bit set would be illegal.
57 assert(util_bitcount(flags
) <= 1);
62 #define IS_COMPUTE_PIPELINE(brw) \
63 (GEN_GEN >= 7 && brw->last_pipeline == BRW_COMPUTE_PIPELINE)
65 /* Closed interval - GEN_GEN \in [x, y] */
66 #define IS_GEN_BETWEEN(x, y) (GEN_GEN >= x && GEN_GEN <= y)
67 #define IS_GENx10_BETWEEN(x, y) \
68 (GEN_VERSIONx10 >= x && GEN_VERSIONx10 <= y)
71 * Emit a series of PIPE_CONTROL commands, taking into account any
72 * workarounds necessary to actually accomplish the caller's request.
74 * Unless otherwise noted, spec quotations in this function come from:
76 * Synchronization of the 3D Pipeline > PIPE_CONTROL Command > Programming
77 * Restrictions for PIPE_CONTROL.
79 * You should not use this function directly. Use the helpers in
80 * brw_pipe_control.c instead, which may split the pipe control further.
83 genX(emit_raw_pipe_control
)(struct brw_context
*brw
, uint32_t flags
,
84 struct brw_bo
*bo
, uint32_t offset
, uint64_t imm
)
86 UNUSED
const struct gen_device_info
*devinfo
= &brw
->screen
->devinfo
;
87 enum pipe_control_flags post_sync_flags
= get_post_sync_flags(flags
);
88 enum pipe_control_flags non_lri_post_sync_flags
=
89 post_sync_flags
& ~PIPE_CONTROL_LRI_POST_SYNC_OP
;
91 /* Recursive PIPE_CONTROL workarounds --------------------------------
92 * (http://knowyourmeme.com/memes/xzibit-yo-dawg)
94 * We do these first because we want to look at the original operation,
95 * rather than any workarounds we set.
97 if (GEN_GEN
== 6 && (flags
& PIPE_CONTROL_RENDER_TARGET_FLUSH
)) {
98 /* Hardware workaround: SNB B-Spec says:
100 * "[Dev-SNB{W/A}]: Before a PIPE_CONTROL with Write Cache Flush
101 * Enable = 1, a PIPE_CONTROL with any non-zero post-sync-op is
104 brw_emit_post_sync_nonzero_flush(brw
);
107 if (GEN_GEN
== 9 && (flags
& PIPE_CONTROL_VF_CACHE_INVALIDATE
)) {
108 /* The PIPE_CONTROL "VF Cache Invalidation Enable" bit description
109 * lists several workarounds:
111 * "Project: SKL, KBL, BXT
113 * If the VF Cache Invalidation Enable is set to a 1 in a
114 * PIPE_CONTROL, a separate Null PIPE_CONTROL, all bitfields
115 * sets to 0, with the VF Cache Invalidation Enable set to 0
116 * needs to be sent prior to the PIPE_CONTROL with VF Cache
117 * Invalidation Enable set to a 1."
119 genX(emit_raw_pipe_control
)(brw
, 0, NULL
, 0, 0);
122 if (GEN_GEN
== 9 && IS_COMPUTE_PIPELINE(brw
) && post_sync_flags
) {
123 /* Project: SKL / Argument: LRI Post Sync Operation [23]
125 * "PIPECONTROL command with “Command Streamer Stall Enable” must be
126 * programmed prior to programming a PIPECONTROL command with "LRI
127 * Post Sync Operation" in GPGPU mode of operation (i.e when
128 * PIPELINE_SELECT command is set to GPGPU mode of operation)."
130 * The same text exists a few rows below for Post Sync Op.
132 genX(emit_raw_pipe_control
)(brw
, PIPE_CONTROL_CS_STALL
, NULL
, 0, 0);
135 /* "Flush Types" workarounds ---------------------------------------------
136 * We do these now because they may add post-sync operations or CS stalls.
139 if (IS_GEN_BETWEEN(8, 10) && (flags
& PIPE_CONTROL_VF_CACHE_INVALIDATE
)) {
140 /* Project: BDW, SKL+ (stopping at CNL) / Argument: VF Invalidate
142 * "'Post Sync Operation' must be enabled to 'Write Immediate Data' or
143 * 'Write PS Depth Count' or 'Write Timestamp'."
146 flags
|= PIPE_CONTROL_WRITE_IMMEDIATE
;
147 post_sync_flags
|= PIPE_CONTROL_WRITE_IMMEDIATE
;
148 non_lri_post_sync_flags
|= PIPE_CONTROL_WRITE_IMMEDIATE
;
149 bo
= brw
->workaround_bo
;
153 if (GEN_VERSIONx10
< 75 && (flags
& PIPE_CONTROL_DEPTH_STALL
)) {
154 /* Project: PRE-HSW / Argument: Depth Stall
156 * "The following bits must be clear:
157 * - Render Target Cache Flush Enable ([12] of DW1)
158 * - Depth Cache Flush Enable ([0] of DW1)"
160 assert(!(flags
& (PIPE_CONTROL_RENDER_TARGET_FLUSH
|
161 PIPE_CONTROL_DEPTH_CACHE_FLUSH
)));
164 if (GEN_GEN
>= 6 && (flags
& PIPE_CONTROL_DEPTH_STALL
)) {
165 /* From the PIPE_CONTROL instruction table, bit 13 (Depth Stall Enable):
167 * "This bit must be DISABLED for operations other than writing
170 * This seems like nonsense. An Ivybridge workaround requires us to
171 * emit a PIPE_CONTROL with a depth stall and write immediate post-sync
172 * operation. Gen8+ requires us to emit depth stalls and depth cache
173 * flushes together. So, it's hard to imagine this means anything other
174 * than "we originally intended this to be used for PS_DEPTH_COUNT".
176 * We ignore the supposed restriction and do nothing.
180 if (GEN_VERSIONx10
< 75 && (flags
& PIPE_CONTROL_DEPTH_CACHE_FLUSH
)) {
181 /* Project: PRE-HSW / Argument: Depth Cache Flush
183 * "Depth Stall must be clear ([13] of DW1)."
185 assert(!(flags
& PIPE_CONTROL_DEPTH_STALL
));
188 if (flags
& (PIPE_CONTROL_RENDER_TARGET_FLUSH
|
189 PIPE_CONTROL_STALL_AT_SCOREBOARD
)) {
190 /* From the PIPE_CONTROL instruction table, bit 12 and bit 1:
192 * "This bit must be DISABLED for End-of-pipe (Read) fences,
193 * PS_DEPTH_COUNT or TIMESTAMP queries."
195 * TODO: Implement end-of-pipe checking.
197 assert(!(post_sync_flags
& (PIPE_CONTROL_WRITE_DEPTH_COUNT
|
198 PIPE_CONTROL_WRITE_TIMESTAMP
)));
201 if (GEN_GEN
< 11 && (flags
& PIPE_CONTROL_STALL_AT_SCOREBOARD
)) {
202 /* From the PIPE_CONTROL instruction table, bit 1:
204 * "This bit is ignored if Depth Stall Enable is set.
205 * Further, the render cache is not flushed even if Write Cache
206 * Flush Enable bit is set."
208 * We assert that the caller doesn't do this combination, to try and
209 * prevent mistakes. It shouldn't hurt the GPU, though.
211 * We skip this check on Gen11+ as the "Stall and Pixel Scoreboard"
212 * and "Render Target Flush" combo is explicitly required for BTI
213 * update workarounds.
215 assert(!(flags
& (PIPE_CONTROL_DEPTH_STALL
|
216 PIPE_CONTROL_RENDER_TARGET_FLUSH
)));
219 /* PIPE_CONTROL page workarounds ------------------------------------- */
221 if (IS_GEN_BETWEEN(7, 8) && (flags
& PIPE_CONTROL_STATE_CACHE_INVALIDATE
)) {
222 /* From the PIPE_CONTROL page itself:
225 * Restriction: Pipe_control with CS-stall bit set must be issued
226 * before a pipe-control command that has the State Cache
227 * Invalidate bit set."
229 flags
|= PIPE_CONTROL_CS_STALL
;
232 if (GEN_IS_HASWELL
) {
233 /* From the PIPE_CONTROL page itself:
235 * "HSW - Programming Note: PIPECONTROL with RO Cache Invalidation:
236 * Prior to programming a PIPECONTROL command with any of the RO
237 * cache invalidation bit set, program a PIPECONTROL flush command
238 * with “CS stall” bit and “HDC Flush” bit set."
240 * TODO: Actually implement this. What's an HDC Flush?
244 if (flags
& PIPE_CONTROL_FLUSH_LLC
) {
245 /* From the PIPE_CONTROL instruction table, bit 26 (Flush LLC):
248 * SW must always program Post-Sync Operation to "Write Immediate
249 * Data" when Flush LLC is set."
251 * For now, we just require the caller to do it.
253 assert(flags
& PIPE_CONTROL_WRITE_IMMEDIATE
);
256 /* "Post-Sync Operation" workarounds -------------------------------- */
258 /* Project: All / Argument: Global Snapshot Count Reset [19]
260 * "This bit must not be exercised on any product.
261 * Requires stall bit ([20] of DW1) set."
263 * We don't use this, so we just assert that it isn't used. The
264 * PIPE_CONTROL instruction page indicates that they intended this
265 * as a debug feature and don't think it is useful in production,
266 * but it may actually be usable, should we ever want to.
268 assert((flags
& PIPE_CONTROL_GLOBAL_SNAPSHOT_COUNT_RESET
) == 0);
270 if (flags
& (PIPE_CONTROL_MEDIA_STATE_CLEAR
|
271 PIPE_CONTROL_INDIRECT_STATE_POINTERS_DISABLE
)) {
272 /* Project: All / Arguments:
274 * - Generic Media State Clear [16]
275 * - Indirect State Pointers Disable [16]
277 * "Requires stall bit ([20] of DW1) set."
279 * Also, the PIPE_CONTROL instruction table, bit 16 (Generic Media
282 * "PIPECONTROL command with “Command Streamer Stall Enable” must be
283 * programmed prior to programming a PIPECONTROL command with "Media
284 * State Clear" set in GPGPU mode of operation"
286 * This is a subset of the earlier rule, so there's nothing to do.
288 flags
|= PIPE_CONTROL_CS_STALL
;
291 if (flags
& PIPE_CONTROL_STORE_DATA_INDEX
) {
292 /* Project: All / Argument: Store Data Index
294 * "Post-Sync Operation ([15:14] of DW1) must be set to something other
297 * For now, we just assert that the caller does this. We might want to
298 * automatically add a write to the workaround BO...
300 assert(non_lri_post_sync_flags
!= 0);
303 if (flags
& PIPE_CONTROL_SYNC_GFDT
) {
304 /* Project: All / Argument: Sync GFDT
306 * "Post-Sync Operation ([15:14] of DW1) must be set to something other
307 * than '0' or 0x2520[13] must be set."
309 * For now, we just assert that the caller does this.
311 assert(non_lri_post_sync_flags
!= 0);
314 if (IS_GENx10_BETWEEN(60, 75) && (flags
& PIPE_CONTROL_TLB_INVALIDATE
)) {
315 /* Project: SNB, IVB, HSW / Argument: TLB inv
317 * "{All SKUs}{All Steppings}: Post-Sync Operation ([15:14] of DW1)
318 * must be set to something other than '0'."
320 * For now, we just assert that the caller does this.
322 assert(non_lri_post_sync_flags
!= 0);
325 if (GEN_GEN
>= 7 && (flags
& PIPE_CONTROL_TLB_INVALIDATE
)) {
326 /* Project: IVB+ / Argument: TLB inv
328 * "Requires stall bit ([20] of DW1) set."
330 * Also, from the PIPE_CONTROL instruction table:
333 * Post Sync Operation or CS stall must be set to ensure a TLB
334 * invalidation occurs. Otherwise no cycle will occur to the TLB
335 * cache to invalidate."
337 * This is not a subset of the earlier rule, so there's nothing to do.
339 flags
|= PIPE_CONTROL_CS_STALL
;
342 if (GEN_GEN
== 9 && devinfo
->gt
== 4) {
343 /* TODO: The big Skylake GT4 post sync op workaround */
346 /* "GPGPU specific workarounds" (both post-sync and flush) ------------ */
348 if (IS_COMPUTE_PIPELINE(brw
)) {
349 if (GEN_GEN
>= 9 && (flags
& PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE
)) {
350 /* Project: SKL+ / Argument: Tex Invalidate
351 * "Requires stall bit ([20] of DW) set for all GPGPU Workloads."
353 flags
|= PIPE_CONTROL_CS_STALL
;
356 if (GEN_GEN
== 8 && (post_sync_flags
||
357 (flags
& (PIPE_CONTROL_NOTIFY_ENABLE
|
358 PIPE_CONTROL_DEPTH_STALL
|
359 PIPE_CONTROL_RENDER_TARGET_FLUSH
|
360 PIPE_CONTROL_DEPTH_CACHE_FLUSH
|
361 PIPE_CONTROL_DATA_CACHE_FLUSH
)))) {
362 /* Project: BDW / Arguments:
364 * - LRI Post Sync Operation [23]
365 * - Post Sync Op [15:14]
368 * - Render Target Cache Flush [12]
369 * - Depth Cache Flush [0]
370 * - DC Flush Enable [5]
372 * "Requires stall bit ([20] of DW) set for all GPGPU and Media
375 * (The docs have separate table rows for each bit, with essentially
376 * the same workaround text. We've combined them here.)
378 flags
|= PIPE_CONTROL_CS_STALL
;
380 /* Also, from the PIPE_CONTROL instruction table, bit 20:
383 * This bit must be always set when PIPE_CONTROL command is
384 * programmed by GPGPU and MEDIA workloads, except for the cases
385 * when only Read Only Cache Invalidation bits are set (State
386 * Cache Invalidation Enable, Instruction cache Invalidation
387 * Enable, Texture Cache Invalidation Enable, Constant Cache
388 * Invalidation Enable). This is to WA FFDOP CG issue, this WA
389 * need not implemented when FF_DOP_CG is disable via "Fixed
390 * Function DOP Clock Gate Disable" bit in RC_PSMI_CTRL register."
392 * It sounds like we could avoid CS stalls in some cases, but we
393 * don't currently bother. This list isn't exactly the list above,
399 /* Implement the WaCsStallAtEveryFourthPipecontrol workaround on IVB, BYT:
401 * "Every 4th PIPE_CONTROL command, not counting the PIPE_CONTROL with
402 * only read-cache-invalidate bit(s) set, must have a CS_STALL bit set."
404 * Note that the kernel does CS stalls between batches, so we only need
405 * to count them within a batch. We currently naively count every 4, and
406 * don't skip the ones with only read-cache-invalidate bits set. This
407 * may or may not be a problem...
409 if (GEN_GEN
== 7 && !GEN_IS_HASWELL
) {
410 if (flags
& PIPE_CONTROL_CS_STALL
) {
411 /* If we're doing a CS stall, reset the counter and carry on. */
412 brw
->pipe_controls_since_last_cs_stall
= 0;
415 /* If this is the fourth pipe control without a CS stall, do one now. */
416 if (++brw
->pipe_controls_since_last_cs_stall
== 4) {
417 brw
->pipe_controls_since_last_cs_stall
= 0;
418 flags
|= PIPE_CONTROL_CS_STALL
;
422 /* "Stall" workarounds ----------------------------------------------
423 * These have to come after the earlier ones because we may have added
424 * some additional CS stalls above.
427 if (GEN_GEN
< 9 && (flags
& PIPE_CONTROL_CS_STALL
)) {
428 /* Project: PRE-SKL, VLV, CHV
430 * "[All Stepping][All SKUs]:
432 * One of the following must also be set:
434 * - Render Target Cache Flush Enable ([12] of DW1)
435 * - Depth Cache Flush Enable ([0] of DW1)
436 * - Stall at Pixel Scoreboard ([1] of DW1)
437 * - Depth Stall ([13] of DW1)
438 * - Post-Sync Operation ([13] of DW1)
439 * - DC Flush Enable ([5] of DW1)"
441 * If we don't already have one of those bits set, we choose to add
442 * "Stall at Pixel Scoreboard". Some of the other bits require a
443 * CS stall as a workaround (see above), which would send us into
444 * an infinite recursion of PIPE_CONTROLs. "Stall at Pixel Scoreboard"
445 * appears to be safe, so we choose that.
447 const uint32_t wa_bits
= PIPE_CONTROL_RENDER_TARGET_FLUSH
|
448 PIPE_CONTROL_DEPTH_CACHE_FLUSH
|
449 PIPE_CONTROL_WRITE_IMMEDIATE
|
450 PIPE_CONTROL_WRITE_DEPTH_COUNT
|
451 PIPE_CONTROL_WRITE_TIMESTAMP
|
452 PIPE_CONTROL_STALL_AT_SCOREBOARD
|
453 PIPE_CONTROL_DEPTH_STALL
|
454 PIPE_CONTROL_DATA_CACHE_FLUSH
;
455 if (!(flags
& wa_bits
))
456 flags
|= PIPE_CONTROL_STALL_AT_SCOREBOARD
;
459 /* Emit --------------------------------------------------------------- */
461 brw_batch_emit(brw
, GENX(PIPE_CONTROL
), pc
) {
466 pc
.LRIPostSyncOperation
= NoLRIOperation
;
467 pc
.PipeControlFlushEnable
= flags
& PIPE_CONTROL_FLUSH_ENABLE
;
468 pc
.DCFlushEnable
= flags
& PIPE_CONTROL_DATA_CACHE_FLUSH
;
471 pc
.StoreDataIndex
= 0;
472 pc
.CommandStreamerStallEnable
= flags
& PIPE_CONTROL_CS_STALL
;
473 pc
.GlobalSnapshotCountReset
=
474 flags
& PIPE_CONTROL_GLOBAL_SNAPSHOT_COUNT_RESET
;
475 pc
.TLBInvalidate
= flags
& PIPE_CONTROL_TLB_INVALIDATE
;
476 pc
.GenericMediaStateClear
= flags
& PIPE_CONTROL_MEDIA_STATE_CLEAR
;
477 pc
.StallAtPixelScoreboard
= flags
& PIPE_CONTROL_STALL_AT_SCOREBOARD
;
478 pc
.RenderTargetCacheFlushEnable
=
479 flags
& PIPE_CONTROL_RENDER_TARGET_FLUSH
;
480 pc
.DepthCacheFlushEnable
= flags
& PIPE_CONTROL_DEPTH_CACHE_FLUSH
;
481 pc
.StateCacheInvalidationEnable
=
482 flags
& PIPE_CONTROL_STATE_CACHE_INVALIDATE
;
483 pc
.VFCacheInvalidationEnable
= flags
& PIPE_CONTROL_VF_CACHE_INVALIDATE
;
484 pc
.ConstantCacheInvalidationEnable
=
485 flags
& PIPE_CONTROL_CONST_CACHE_INVALIDATE
;
487 pc
.WriteCacheFlush
= flags
& PIPE_CONTROL_RENDER_TARGET_FLUSH
;
489 pc
.PostSyncOperation
= flags_to_post_sync_op(flags
);
490 pc
.DepthStallEnable
= flags
& PIPE_CONTROL_DEPTH_STALL
;
491 pc
.InstructionCacheInvalidateEnable
=
492 flags
& PIPE_CONTROL_INSTRUCTION_INVALIDATE
;
493 pc
.NotifyEnable
= flags
& PIPE_CONTROL_NOTIFY_ENABLE
;
494 #if GEN_GEN >= 5 || GEN_IS_G4X
495 pc
.IndirectStatePointersDisable
=
496 flags
& PIPE_CONTROL_INDIRECT_STATE_POINTERS_DISABLE
;
499 pc
.TextureCacheInvalidationEnable
=
500 flags
& PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE
;
501 #elif GEN_GEN == 5 || GEN_IS_G4X
502 pc
.TextureCacheFlushEnable
=
503 flags
& PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE
;
505 pc
.Address
= ggtt_bo(bo
, offset
);
506 if (GEN_GEN
< 7 && bo
)
507 pc
.DestinationAddressType
= DAT_GGTT
;
508 pc
.ImmediateData
= imm
;