2 * Copyright © 2015 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
27 #include "anv_private.h"
30 # include "gen9_pack.h"
32 # include "gen8_pack.h"
33 #elif (ANV_IS_HASWELL)
34 # include "gen75_pack.h"
36 # include "gen7_pack.h"
40 genX(cmd_buffer_emit_state_base_address
)(struct anv_cmd_buffer
*cmd_buffer
)
42 struct anv_device
*device
= cmd_buffer
->device
;
43 struct anv_bo
*scratch_bo
= NULL
;
45 cmd_buffer
->state
.scratch_size
=
46 anv_block_pool_size(&device
->scratch_block_pool
);
47 if (cmd_buffer
->state
.scratch_size
> 0)
48 scratch_bo
= &device
->scratch_block_pool
.bo
;
50 /* XXX: Do we need this on more than just BDW? */
52 /* Emit a render target cache flush.
54 * This isn't documented anywhere in the PRM. However, it seems to be
55 * necessary prior to changing the surface state base adress. Without
56 * this, we get GPU hangs when using multi-level command buffers which
57 * clear depth, reset state base address, and then go render stuff.
59 anv_batch_emit(&cmd_buffer
->batch
, GEN8_PIPE_CONTROL
,
60 .RenderTargetCacheFlushEnable
= true);
63 anv_batch_emit(&cmd_buffer
->batch
, GENX(STATE_BASE_ADDRESS
),
64 .GeneralStateBaseAddress
= { scratch_bo
, 0 },
65 .GeneralStateMemoryObjectControlState
= GENX(MOCS
),
66 .GeneralStateBaseAddressModifyEnable
= true,
68 .SurfaceStateBaseAddress
= anv_cmd_buffer_surface_base_address(cmd_buffer
),
69 .SurfaceStateMemoryObjectControlState
= GENX(MOCS
),
70 .SurfaceStateBaseAddressModifyEnable
= true,
72 .DynamicStateBaseAddress
= { &device
->dynamic_state_block_pool
.bo
, 0 },
73 .DynamicStateMemoryObjectControlState
= GENX(MOCS
),
74 .DynamicStateBaseAddressModifyEnable
= true,
76 .IndirectObjectBaseAddress
= { NULL
, 0 },
77 .IndirectObjectMemoryObjectControlState
= GENX(MOCS
),
78 .IndirectObjectBaseAddressModifyEnable
= true,
80 .InstructionBaseAddress
= { &device
->instruction_block_pool
.bo
, 0 },
81 .InstructionMemoryObjectControlState
= GENX(MOCS
),
82 .InstructionBaseAddressModifyEnable
= true,
85 /* Broadwell requires that we specify a buffer size for a bunch of
86 * these fields. However, since we will be growing the BO's live, we
87 * just set them all to the maximum.
89 .GeneralStateBufferSize
= 0xfffff,
90 .GeneralStateBufferSizeModifyEnable
= true,
91 .DynamicStateBufferSize
= 0xfffff,
92 .DynamicStateBufferSizeModifyEnable
= true,
93 .IndirectObjectBufferSize
= 0xfffff,
94 .IndirectObjectBufferSizeModifyEnable
= true,
95 .InstructionBufferSize
= 0xfffff,
96 .InstructionBuffersizeModifyEnable
= true,
100 /* After re-setting the surface state base address, we have to do some
101 * cache flusing so that the sampler engine will pick up the new
102 * SURFACE_STATE objects and binding tables. From the Broadwell PRM,
103 * Shared Function > 3D Sampler > State > State Caching (page 96):
105 * Coherency with system memory in the state cache, like the texture
106 * cache is handled partially by software. It is expected that the
107 * command stream or shader will issue Cache Flush operation or
108 * Cache_Flush sampler message to ensure that the L1 cache remains
109 * coherent with system memory.
113 * Whenever the value of the Dynamic_State_Base_Addr,
114 * Surface_State_Base_Addr are altered, the L1 state cache must be
115 * invalidated to ensure the new surface or sampler state is fetched
116 * from system memory.
118 * The PIPE_CONTROL command has a "State Cache Invalidation Enable" bit
119 * which, according the PIPE_CONTROL instruction documentation in the
122 * Setting this bit is independent of any other bit in this packet.
123 * This bit controls the invalidation of the L1 and L2 state caches
124 * at the top of the pipe i.e. at the parsing time.
126 * Unfortunately, experimentation seems to indicate that state cache
127 * invalidation through a PIPE_CONTROL does nothing whatsoever in
128 * regards to surface state and binding tables. In stead, it seems that
129 * invalidating the texture cache is what is actually needed.
131 * XXX: As far as we have been able to determine through
132 * experimentation, shows that flush the texture cache appears to be
133 * sufficient. The theory here is that all of the sampling/rendering
134 * units cache the binding table in the texture cache. However, we have
135 * yet to be able to actually confirm this.
137 anv_batch_emit(&cmd_buffer
->batch
, GENX(PIPE_CONTROL
),
138 .TextureCacheInvalidationEnable
= true);
141 void genX(CmdPipelineBarrier
)(
142 VkCommandBuffer commandBuffer
,
143 VkPipelineStageFlags srcStageMask
,
144 VkPipelineStageFlags destStageMask
,
146 uint32_t memBarrierCount
,
147 const void* const* ppMemBarriers
)
149 ANV_FROM_HANDLE(anv_cmd_buffer
, cmd_buffer
, commandBuffer
);
152 struct GENX(PIPE_CONTROL
) cmd
= {
153 GENX(PIPE_CONTROL_header
),
154 .PostSyncOperation
= NoWrite
,
157 /* XXX: I think waitEvent is a no-op on our HW. We should verify that. */
159 if (anv_clear_mask(&srcStageMask
, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT
)) {
160 /* This is just what PIPE_CONTROL does */
163 if (anv_clear_mask(&srcStageMask
,
164 VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT
|
165 VK_PIPELINE_STAGE_VERTEX_INPUT_BIT
|
166 VK_PIPELINE_STAGE_VERTEX_SHADER_BIT
|
167 VK_PIPELINE_STAGE_TESSELLATION_CONTROL_SHADER_BIT
|
168 VK_PIPELINE_STAGE_TESSELLATION_EVALUATION_SHADER_BIT
|
169 VK_PIPELINE_STAGE_GEOMETRY_SHADER_BIT
|
170 VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT
|
171 VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT
|
172 VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT
|
173 VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT
)) {
174 cmd
.StallAtPixelScoreboard
= true;
177 if (anv_clear_mask(&srcStageMask
,
178 VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT
|
179 VK_PIPELINE_STAGE_TRANSFER_BIT
)) {
180 cmd
.CommandStreamerStallEnable
= true;
183 if (anv_clear_mask(&srcStageMask
, VK_PIPELINE_STAGE_HOST_BIT
)) {
184 anv_finishme("VK_PIPE_EVENT_CPU_SIGNAL_BIT");
187 /* On our hardware, all stages will wait for execution as needed. */
190 /* We checked all known VkPipeEventFlags. */
191 anv_assert(srcStageMask
== 0);
193 /* XXX: Right now, we're really dumb and just flush whatever categories
194 * the app asks for. One of these days we may make this a bit better
195 * but right now that's all the hardware allows for in most areas.
197 VkMemoryOutputFlags out_flags
= 0;
198 VkMemoryInputFlags in_flags
= 0;
200 for (uint32_t i
= 0; i
< memBarrierCount
; i
++) {
201 const struct anv_common
*common
= ppMemBarriers
[i
];
202 switch (common
->sType
) {
203 case VK_STRUCTURE_TYPE_MEMORY_BARRIER
: {
204 ANV_COMMON_TO_STRUCT(VkMemoryBarrier
, barrier
, common
);
205 out_flags
|= barrier
->outputMask
;
206 in_flags
|= barrier
->inputMask
;
209 case VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER
: {
210 ANV_COMMON_TO_STRUCT(VkBufferMemoryBarrier
, barrier
, common
);
211 out_flags
|= barrier
->outputMask
;
212 in_flags
|= barrier
->inputMask
;
215 case VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER
: {
216 ANV_COMMON_TO_STRUCT(VkImageMemoryBarrier
, barrier
, common
);
217 out_flags
|= barrier
->outputMask
;
218 in_flags
|= barrier
->inputMask
;
222 unreachable("Invalid memory barrier type");
226 for_each_bit(b
, out_flags
) {
227 switch ((VkMemoryOutputFlags
)(1 << b
)) {
228 case VK_MEMORY_OUTPUT_HOST_WRITE_BIT
:
229 break; /* FIXME: Little-core systems */
230 case VK_MEMORY_OUTPUT_SHADER_WRITE_BIT
:
231 cmd
.DCFlushEnable
= true;
233 case VK_MEMORY_OUTPUT_COLOR_ATTACHMENT_BIT
:
234 cmd
.RenderTargetCacheFlushEnable
= true;
236 case VK_MEMORY_OUTPUT_DEPTH_STENCIL_ATTACHMENT_BIT
:
237 cmd
.DepthCacheFlushEnable
= true;
239 case VK_MEMORY_OUTPUT_TRANSFER_BIT
:
240 cmd
.RenderTargetCacheFlushEnable
= true;
241 cmd
.DepthCacheFlushEnable
= true;
244 unreachable("Invalid memory output flag");
248 for_each_bit(b
, out_flags
) {
249 switch ((VkMemoryInputFlags
)(1 << b
)) {
250 case VK_MEMORY_INPUT_HOST_READ_BIT
:
251 break; /* FIXME: Little-core systems */
252 case VK_MEMORY_INPUT_INDIRECT_COMMAND_BIT
:
253 case VK_MEMORY_INPUT_INDEX_FETCH_BIT
:
254 case VK_MEMORY_INPUT_VERTEX_ATTRIBUTE_FETCH_BIT
:
255 cmd
.VFCacheInvalidationEnable
= true;
257 case VK_MEMORY_INPUT_UNIFORM_READ_BIT
:
258 cmd
.ConstantCacheInvalidationEnable
= true;
260 case VK_MEMORY_INPUT_SHADER_READ_BIT
:
261 cmd
.DCFlushEnable
= true;
262 cmd
.TextureCacheInvalidationEnable
= true;
264 case VK_MEMORY_INPUT_COLOR_ATTACHMENT_BIT
:
265 case VK_MEMORY_INPUT_DEPTH_STENCIL_ATTACHMENT_BIT
:
266 break; /* XXX: Hunh? */
267 case VK_MEMORY_INPUT_TRANSFER_BIT
:
268 cmd
.TextureCacheInvalidationEnable
= true;
273 dw
= anv_batch_emit_dwords(&cmd_buffer
->batch
, GENX(PIPE_CONTROL_length
));
274 GENX(PIPE_CONTROL_pack
)(&cmd_buffer
->batch
, dw
, &cmd
);