2 * Copyright © 2017 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
24 #include "genX_boilerplate.h"
25 #include "brw_defines.h"
26 #include "brw_state.h"
29 * According to the latest documentation, any PIPE_CONTROL with the
30 * "Command Streamer Stall" bit set must also have another bit set,
31 * with five different options:
33 * - Render Target Cache Flush
35 * - Stall at Pixel Scoreboard
36 * - Post-Sync Operation
40 * I chose "Stall at Pixel Scoreboard" since we've used it effectively
41 * in the past, but the choice is fairly arbitrary.
44 gen8_add_cs_stall_workaround_bits(uint32_t *flags
)
46 uint32_t wa_bits
= PIPE_CONTROL_RENDER_TARGET_FLUSH
|
47 PIPE_CONTROL_DEPTH_CACHE_FLUSH
|
48 PIPE_CONTROL_WRITE_IMMEDIATE
|
49 PIPE_CONTROL_WRITE_DEPTH_COUNT
|
50 PIPE_CONTROL_WRITE_TIMESTAMP
|
51 PIPE_CONTROL_STALL_AT_SCOREBOARD
|
52 PIPE_CONTROL_DEPTH_STALL
|
53 PIPE_CONTROL_DATA_CACHE_FLUSH
;
55 /* If we're doing a CS stall, and don't already have one of the
56 * workaround bits set, add "Stall at Pixel Scoreboard."
58 if ((*flags
& PIPE_CONTROL_CS_STALL
) != 0 && (*flags
& wa_bits
) == 0)
59 *flags
|= PIPE_CONTROL_STALL_AT_SCOREBOARD
;
62 /* Implement the WaCsStallAtEveryFourthPipecontrol workaround on IVB, BYT:
64 * "Every 4th PIPE_CONTROL command, not counting the PIPE_CONTROL with
65 * only read-cache-invalidate bit(s) set, must have a CS_STALL bit set."
67 * Note that the kernel does CS stalls between batches, so we only need
68 * to count them within a batch.
71 gen7_cs_stall_every_four_pipe_controls(struct brw_context
*brw
, uint32_t flags
)
73 if (GEN_GEN
== 7 && !GEN_IS_HASWELL
) {
74 if (flags
& PIPE_CONTROL_CS_STALL
) {
75 /* If we're doing a CS stall, reset the counter and carry on. */
76 brw
->pipe_controls_since_last_cs_stall
= 0;
80 /* If this is the fourth pipe control without a CS stall, do one now. */
81 if (++brw
->pipe_controls_since_last_cs_stall
== 4) {
82 brw
->pipe_controls_since_last_cs_stall
= 0;
83 return PIPE_CONTROL_CS_STALL
;
89 /* #1130 from gen10 workarounds page in h/w specs:
90 * "Enable Depth Stall on every Post Sync Op if Render target Cache Flush is
91 * not enabled in same PIPE CONTROL and Enable Pixel score board stall if
92 * Render target cache flush is enabled."
94 * Applicable to CNL B0 and C0 steppings only.
97 gen10_add_rcpfe_workaround_bits(uint32_t *flags
)
99 if (*flags
& PIPE_CONTROL_RENDER_TARGET_FLUSH
) {
100 *flags
= *flags
| PIPE_CONTROL_STALL_AT_SCOREBOARD
;
102 (PIPE_CONTROL_WRITE_IMMEDIATE
|
103 PIPE_CONTROL_WRITE_DEPTH_COUNT
|
104 PIPE_CONTROL_WRITE_TIMESTAMP
)) {
105 *flags
= *flags
| PIPE_CONTROL_DEPTH_STALL
;
110 flags_to_post_sync_op(uint32_t flags
)
112 flags
&= PIPE_CONTROL_WRITE_IMMEDIATE
|
113 PIPE_CONTROL_WRITE_DEPTH_COUNT
|
114 PIPE_CONTROL_WRITE_TIMESTAMP
;
116 assert(util_bitcount(flags
) <= 1);
118 if (flags
& PIPE_CONTROL_WRITE_IMMEDIATE
)
119 return WriteImmediateData
;
121 if (flags
& PIPE_CONTROL_WRITE_DEPTH_COUNT
)
122 return WritePSDepthCount
;
124 if (flags
& PIPE_CONTROL_WRITE_TIMESTAMP
)
125 return WriteTimestamp
;
131 genX(emit_raw_pipe_control
)(struct brw_context
*brw
, uint32_t flags
,
132 struct brw_bo
*bo
, uint32_t offset
, uint64_t imm
)
136 gen8_add_cs_stall_workaround_bits(&flags
);
138 if (flags
& PIPE_CONTROL_VF_CACHE_INVALIDATE
) {
140 /* The PIPE_CONTROL "VF Cache Invalidation Enable" bit description
141 * lists several workarounds:
143 * "Project: SKL, KBL, BXT
145 * If the VF Cache Invalidation Enable is set to a 1 in a
146 * PIPE_CONTROL, a separate Null PIPE_CONTROL, all bitfields
147 * sets to 0, with the VF Cache Invalidation Enable set to 0
148 * needs to be sent prior to the PIPE_CONTROL with VF Cache
149 * Invalidation Enable set to a 1."
151 brw_emit_pipe_control_flush(brw
, 0);
155 /* THE PIPE_CONTROL "VF Cache Invalidation Enable" docs continue:
159 * When VF Cache Invalidate is set “Post Sync Operation” must
160 * be enabled to “Write Immediate Data” or “Write PS Depth
161 * Count” or “Write Timestamp”."
163 * If there's a BO, we're already doing some kind of write.
164 * If not, add a write to the workaround BO.
166 * XXX: This causes GPU hangs on Broadwell, so restrict it to
167 * Gen9+ for now...see this bug for more information:
168 * https://bugs.freedesktop.org/show_bug.cgi?id=103787
171 flags
|= PIPE_CONTROL_WRITE_IMMEDIATE
;
172 bo
= brw
->workaround_bo
;
178 gen10_add_rcpfe_workaround_bits(&flags
);
179 } else if (GEN_GEN
>= 6) {
181 (flags
& PIPE_CONTROL_RENDER_TARGET_FLUSH
)) {
182 /* Hardware workaround: SNB B-Spec says:
184 * [Dev-SNB{W/A}]: Before a PIPE_CONTROL with Write Cache Flush
185 * Enable = 1, a PIPE_CONTROL with any non-zero post-sync-op is
188 brw_emit_post_sync_nonzero_flush(brw
);
191 flags
|= gen7_cs_stall_every_four_pipe_controls(brw
, flags
);
194 brw_batch_emit(brw
, GENX(PIPE_CONTROL
), pc
) {
199 pc
.LRIPostSyncOperation
= NoLRIOperation
;
200 pc
.PipeControlFlushEnable
= flags
& PIPE_CONTROL_FLUSH_ENABLE
;
201 pc
.DCFlushEnable
= flags
& PIPE_CONTROL_DATA_CACHE_FLUSH
;
204 pc
.StoreDataIndex
= 0;
205 pc
.CommandStreamerStallEnable
= flags
& PIPE_CONTROL_CS_STALL
;
206 pc
.GlobalSnapshotCountReset
=
207 flags
& PIPE_CONTROL_GLOBAL_SNAPSHOT_COUNT_RESET
;
208 pc
.TLBInvalidate
= flags
& PIPE_CONTROL_TLB_INVALIDATE
;
209 pc
.GenericMediaStateClear
= flags
& PIPE_CONTROL_MEDIA_STATE_CLEAR
;
210 pc
.StallAtPixelScoreboard
= flags
& PIPE_CONTROL_STALL_AT_SCOREBOARD
;
211 pc
.RenderTargetCacheFlushEnable
=
212 flags
& PIPE_CONTROL_RENDER_TARGET_FLUSH
;
213 pc
.DepthCacheFlushEnable
= flags
& PIPE_CONTROL_DEPTH_CACHE_FLUSH
;
214 pc
.StateCacheInvalidationEnable
=
215 flags
& PIPE_CONTROL_STATE_CACHE_INVALIDATE
;
216 pc
.VFCacheInvalidationEnable
= flags
& PIPE_CONTROL_VF_CACHE_INVALIDATE
;
217 pc
.ConstantCacheInvalidationEnable
=
218 flags
& PIPE_CONTROL_CONST_CACHE_INVALIDATE
;
220 pc
.WriteCacheFlush
= flags
& PIPE_CONTROL_RENDER_TARGET_FLUSH
;
222 pc
.PostSyncOperation
= flags_to_post_sync_op(flags
);
223 pc
.DepthStallEnable
= flags
& PIPE_CONTROL_DEPTH_STALL
;
224 pc
.InstructionCacheInvalidateEnable
=
225 flags
& PIPE_CONTROL_INSTRUCTION_INVALIDATE
;
226 pc
.NotifyEnable
= flags
& PIPE_CONTROL_NOTIFY_ENABLE
;
227 #if GEN_GEN >= 5 || GEN_IS_G4X
228 pc
.IndirectStatePointersDisable
=
229 flags
& PIPE_CONTROL_INDIRECT_STATE_POINTERS_DISABLE
;
232 pc
.TextureCacheInvalidationEnable
=
233 flags
& PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE
;
234 #elif GEN_GEN == 5 || GEN_IS_G4X
235 pc
.TextureCacheFlushEnable
=
236 flags
& PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE
;
238 pc
.Address
= ggtt_bo(bo
, offset
);
239 if (GEN_GEN
< 7 && bo
)
240 pc
.DestinationAddressType
= DAT_GGTT
;
241 pc
.ImmediateData
= imm
;