Merge ../mesa into vulkan
[mesa.git] / src / vulkan / genX_cmd_buffer.c
1 /*
2 * Copyright © 2015 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include <assert.h>
25 #include <stdbool.h>
26
27 #include "anv_private.h"
28
29 #if (ANV_GEN == 9)
30 # include "gen9_pack.h"
31 #elif (ANV_GEN == 8)
32 # include "gen8_pack.h"
33 #elif (ANV_IS_HASWELL)
34 # include "gen75_pack.h"
35 #elif (ANV_GEN == 7)
36 # include "gen7_pack.h"
37 #endif
38
39 void
40 genX(cmd_buffer_emit_state_base_address)(struct anv_cmd_buffer *cmd_buffer)
41 {
42 struct anv_device *device = cmd_buffer->device;
43 struct anv_bo *scratch_bo = NULL;
44
45 cmd_buffer->state.scratch_size =
46 anv_block_pool_size(&device->scratch_block_pool);
47 if (cmd_buffer->state.scratch_size > 0)
48 scratch_bo = &device->scratch_block_pool.bo;
49
50 /* XXX: Do we need this on more than just BDW? */
51 #if (ANV_GEN == 8)
52 /* Emit a render target cache flush.
53 *
54 * This isn't documented anywhere in the PRM. However, it seems to be
55 * necessary prior to changing the surface state base adress. Without
56 * this, we get GPU hangs when using multi-level command buffers which
57 * clear depth, reset state base address, and then go render stuff.
58 */
59 anv_batch_emit(&cmd_buffer->batch, GEN8_PIPE_CONTROL,
60 .RenderTargetCacheFlushEnable = true);
61 #endif
62
63 anv_batch_emit(&cmd_buffer->batch, GENX(STATE_BASE_ADDRESS),
64 .GeneralStateBaseAddress = { scratch_bo, 0 },
65 .GeneralStateMemoryObjectControlState = GENX(MOCS),
66 .GeneralStateBaseAddressModifyEnable = true,
67
68 .SurfaceStateBaseAddress = anv_cmd_buffer_surface_base_address(cmd_buffer),
69 .SurfaceStateMemoryObjectControlState = GENX(MOCS),
70 .SurfaceStateBaseAddressModifyEnable = true,
71
72 .DynamicStateBaseAddress = { &device->dynamic_state_block_pool.bo, 0 },
73 .DynamicStateMemoryObjectControlState = GENX(MOCS),
74 .DynamicStateBaseAddressModifyEnable = true,
75
76 .IndirectObjectBaseAddress = { NULL, 0 },
77 .IndirectObjectMemoryObjectControlState = GENX(MOCS),
78 .IndirectObjectBaseAddressModifyEnable = true,
79
80 .InstructionBaseAddress = { &device->instruction_block_pool.bo, 0 },
81 .InstructionMemoryObjectControlState = GENX(MOCS),
82 .InstructionBaseAddressModifyEnable = true,
83
84 # if (ANV_GEN >= 8)
85 /* Broadwell requires that we specify a buffer size for a bunch of
86 * these fields. However, since we will be growing the BO's live, we
87 * just set them all to the maximum.
88 */
89 .GeneralStateBufferSize = 0xfffff,
90 .GeneralStateBufferSizeModifyEnable = true,
91 .DynamicStateBufferSize = 0xfffff,
92 .DynamicStateBufferSizeModifyEnable = true,
93 .IndirectObjectBufferSize = 0xfffff,
94 .IndirectObjectBufferSizeModifyEnable = true,
95 .InstructionBufferSize = 0xfffff,
96 .InstructionBuffersizeModifyEnable = true,
97 # endif
98 );
99
100 /* After re-setting the surface state base address, we have to do some
101 * cache flusing so that the sampler engine will pick up the new
102 * SURFACE_STATE objects and binding tables. From the Broadwell PRM,
103 * Shared Function > 3D Sampler > State > State Caching (page 96):
104 *
105 * Coherency with system memory in the state cache, like the texture
106 * cache is handled partially by software. It is expected that the
107 * command stream or shader will issue Cache Flush operation or
108 * Cache_Flush sampler message to ensure that the L1 cache remains
109 * coherent with system memory.
110 *
111 * [...]
112 *
113 * Whenever the value of the Dynamic_State_Base_Addr,
114 * Surface_State_Base_Addr are altered, the L1 state cache must be
115 * invalidated to ensure the new surface or sampler state is fetched
116 * from system memory.
117 *
118 * The PIPE_CONTROL command has a "State Cache Invalidation Enable" bit
119 * which, according the PIPE_CONTROL instruction documentation in the
120 * Broadwell PRM:
121 *
122 * Setting this bit is independent of any other bit in this packet.
123 * This bit controls the invalidation of the L1 and L2 state caches
124 * at the top of the pipe i.e. at the parsing time.
125 *
126 * Unfortunately, experimentation seems to indicate that state cache
127 * invalidation through a PIPE_CONTROL does nothing whatsoever in
128 * regards to surface state and binding tables. In stead, it seems that
129 * invalidating the texture cache is what is actually needed.
130 *
131 * XXX: As far as we have been able to determine through
132 * experimentation, shows that flush the texture cache appears to be
133 * sufficient. The theory here is that all of the sampling/rendering
134 * units cache the binding table in the texture cache. However, we have
135 * yet to be able to actually confirm this.
136 */
137 anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL),
138 .TextureCacheInvalidationEnable = true);
139 }
140
141 void genX(CmdPipelineBarrier)(
142 VkCommandBuffer commandBuffer,
143 VkPipelineStageFlags srcStageMask,
144 VkPipelineStageFlags destStageMask,
145 VkBool32 byRegion,
146 uint32_t memBarrierCount,
147 const void* const* ppMemBarriers)
148 {
149 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
150 uint32_t b, *dw;
151
152 struct GENX(PIPE_CONTROL) cmd = {
153 GENX(PIPE_CONTROL_header),
154 .PostSyncOperation = NoWrite,
155 };
156
157 /* XXX: I think waitEvent is a no-op on our HW. We should verify that. */
158
159 if (anv_clear_mask(&srcStageMask, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT)) {
160 /* This is just what PIPE_CONTROL does */
161 }
162
163 if (anv_clear_mask(&srcStageMask,
164 VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT |
165 VK_PIPELINE_STAGE_VERTEX_INPUT_BIT |
166 VK_PIPELINE_STAGE_VERTEX_SHADER_BIT |
167 VK_PIPELINE_STAGE_TESSELLATION_CONTROL_SHADER_BIT |
168 VK_PIPELINE_STAGE_TESSELLATION_EVALUATION_SHADER_BIT |
169 VK_PIPELINE_STAGE_GEOMETRY_SHADER_BIT |
170 VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT |
171 VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT |
172 VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT |
173 VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT)) {
174 cmd.StallAtPixelScoreboard = true;
175 }
176
177 if (anv_clear_mask(&srcStageMask,
178 VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT |
179 VK_PIPELINE_STAGE_TRANSFER_BIT)) {
180 cmd.CommandStreamerStallEnable = true;
181 }
182
183 if (anv_clear_mask(&srcStageMask, VK_PIPELINE_STAGE_HOST_BIT)) {
184 anv_finishme("VK_PIPE_EVENT_CPU_SIGNAL_BIT");
185 }
186
187 /* On our hardware, all stages will wait for execution as needed. */
188 (void)destStageMask;
189
190 /* We checked all known VkPipeEventFlags. */
191 anv_assert(srcStageMask == 0);
192
193 /* XXX: Right now, we're really dumb and just flush whatever categories
194 * the app asks for. One of these days we may make this a bit better
195 * but right now that's all the hardware allows for in most areas.
196 */
197 VkAccessFlags src_flags = 0;
198 VkAccessFlags dst_flags = 0;
199
200 for (uint32_t i = 0; i < memBarrierCount; i++) {
201 const struct anv_common *common = ppMemBarriers[i];
202 switch (common->sType) {
203 case VK_STRUCTURE_TYPE_MEMORY_BARRIER: {
204 ANV_COMMON_TO_STRUCT(VkMemoryBarrier, barrier, common);
205 src_flags |= barrier->srcAccessMask;
206 dst_flags |= barrier->dstAccessMask;
207 break;
208 }
209 case VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER: {
210 ANV_COMMON_TO_STRUCT(VkBufferMemoryBarrier, barrier, common);
211 src_flags |= barrier->srcAccessMask;
212 dst_flags |= barrier->dstAccessMask;
213 break;
214 }
215 case VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER: {
216 ANV_COMMON_TO_STRUCT(VkImageMemoryBarrier, barrier, common);
217 src_flags |= barrier->srcAccessMask;
218 dst_flags |= barrier->dstAccessMask;
219 break;
220 }
221 default:
222 unreachable("Invalid memory barrier type");
223 }
224 }
225
226 /* The src flags represent how things were used previously. This is
227 * what we use for doing flushes.
228 */
229 for_each_bit(b, src_flags) {
230 switch ((VkAccessFlagBits)(1 << b)) {
231 case VK_ACCESS_SHADER_WRITE_BIT:
232 cmd.DCFlushEnable = true;
233 break;
234 case VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT:
235 cmd.RenderTargetCacheFlushEnable = true;
236 break;
237 case VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT:
238 cmd.DepthCacheFlushEnable = true;
239 break;
240 case VK_ACCESS_TRANSFER_WRITE_BIT:
241 cmd.RenderTargetCacheFlushEnable = true;
242 cmd.DepthCacheFlushEnable = true;
243 break;
244 default:
245 /* Doesn't require a flush */
246 break;
247 }
248 }
249
250 /* The dst flags represent how things will be used in the fugure. This
251 * is what we use for doing cache invalidations.
252 */
253 for_each_bit(b, dst_flags) {
254 switch ((VkAccessFlagBits)(1 << b)) {
255 case VK_ACCESS_INDIRECT_COMMAND_READ_BIT:
256 case VK_ACCESS_INDEX_READ_BIT:
257 case VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT:
258 cmd.VFCacheInvalidationEnable = true;
259 break;
260 case VK_ACCESS_UNIFORM_READ_BIT:
261 cmd.ConstantCacheInvalidationEnable = true;
262 /* fallthrough */
263 case VK_ACCESS_SHADER_READ_BIT:
264 cmd.TextureCacheInvalidationEnable = true;
265 break;
266 case VK_ACCESS_COLOR_ATTACHMENT_READ_BIT:
267 cmd.TextureCacheInvalidationEnable = true;
268 break;
269 case VK_ACCESS_TRANSFER_READ_BIT:
270 cmd.TextureCacheInvalidationEnable = true;
271 break;
272 case VK_ACCESS_MEMORY_READ_BIT:
273 break; /* XXX: What is this? */
274 default:
275 /* Doesn't require a flush */
276 break;
277 }
278 }
279
280 dw = anv_batch_emit_dwords(&cmd_buffer->batch, GENX(PIPE_CONTROL_length));
281 GENX(PIPE_CONTROL_pack)(&cmd_buffer->batch, dw, &cmd);
282 }