Merge remote-tracking branch 'mesa-public/master' into vulkan
[mesa.git] / src / vulkan / gen8_cmd_buffer.c
1 /*
2 * Copyright © 2015 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include <assert.h>
25 #include <stdbool.h>
26 #include <string.h>
27 #include <unistd.h>
28 #include <fcntl.h>
29
30 #include "anv_private.h"
31
32 static void
33 gen8_cmd_buffer_flush_push_constants(struct anv_cmd_buffer *cmd_buffer)
34 {
35 static const uint32_t push_constant_opcodes[] = {
36 [VK_SHADER_STAGE_VERTEX] = 21,
37 [VK_SHADER_STAGE_TESS_CONTROL] = 25, /* HS */
38 [VK_SHADER_STAGE_TESS_EVALUATION] = 26, /* DS */
39 [VK_SHADER_STAGE_GEOMETRY] = 22,
40 [VK_SHADER_STAGE_FRAGMENT] = 23,
41 [VK_SHADER_STAGE_COMPUTE] = 0,
42 };
43
44 VkShaderStage stage;
45 VkShaderStageFlags flushed = 0;
46
47 for_each_bit(stage, cmd_buffer->state.push_constants_dirty) {
48 struct anv_state state = anv_cmd_buffer_push_constants(cmd_buffer, stage);
49
50 if (state.offset == 0)
51 continue;
52
53 anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_CONSTANT_VS,
54 ._3DCommandSubOpcode = push_constant_opcodes[stage],
55 .ConstantBody = {
56 .PointerToConstantBuffer0 = { .offset = state.offset },
57 .ConstantBuffer0ReadLength = DIV_ROUND_UP(state.alloc_size, 32),
58 });
59
60 flushed |= 1 << stage;
61 }
62
63 cmd_buffer->state.push_constants_dirty &= ~flushed;
64 }
65
66 static void
67 gen8_cmd_buffer_flush_state(struct anv_cmd_buffer *cmd_buffer)
68 {
69 struct anv_pipeline *pipeline = cmd_buffer->state.pipeline;
70 uint32_t *p;
71
72 uint32_t vb_emit = cmd_buffer->state.vb_dirty & pipeline->vb_used;
73
74 assert((pipeline->active_stages & VK_SHADER_STAGE_COMPUTE_BIT) == 0);
75
76 if (cmd_buffer->state.current_pipeline != _3D) {
77 anv_batch_emit(&cmd_buffer->batch, GEN8_PIPELINE_SELECT,
78 .PipelineSelection = _3D);
79 cmd_buffer->state.current_pipeline = _3D;
80 }
81
82 if (vb_emit) {
83 const uint32_t num_buffers = __builtin_popcount(vb_emit);
84 const uint32_t num_dwords = 1 + num_buffers * 4;
85
86 p = anv_batch_emitn(&cmd_buffer->batch, num_dwords,
87 GEN8_3DSTATE_VERTEX_BUFFERS);
88 uint32_t vb, i = 0;
89 for_each_bit(vb, vb_emit) {
90 struct anv_buffer *buffer = cmd_buffer->state.vertex_bindings[vb].buffer;
91 uint32_t offset = cmd_buffer->state.vertex_bindings[vb].offset;
92
93 struct GEN8_VERTEX_BUFFER_STATE state = {
94 .VertexBufferIndex = vb,
95 .MemoryObjectControlState = GEN8_MOCS,
96 .AddressModifyEnable = true,
97 .BufferPitch = pipeline->binding_stride[vb],
98 .BufferStartingAddress = { buffer->bo, buffer->offset + offset },
99 .BufferSize = buffer->size - offset
100 };
101
102 GEN8_VERTEX_BUFFER_STATE_pack(&cmd_buffer->batch, &p[1 + i * 4], &state);
103 i++;
104 }
105 }
106
107 if (cmd_buffer->state.dirty & ANV_CMD_DIRTY_PIPELINE) {
108 /* If somebody compiled a pipeline after starting a command buffer the
109 * scratch bo may have grown since we started this cmd buffer (and
110 * emitted STATE_BASE_ADDRESS). If we're binding that pipeline now,
111 * reemit STATE_BASE_ADDRESS so that we use the bigger scratch bo. */
112 if (cmd_buffer->state.scratch_size < pipeline->total_scratch)
113 anv_cmd_buffer_emit_state_base_address(cmd_buffer);
114
115 anv_batch_emit_batch(&cmd_buffer->batch, &pipeline->batch);
116 }
117
118 if (cmd_buffer->state.descriptors_dirty)
119 anv_flush_descriptor_sets(cmd_buffer);
120
121 if (cmd_buffer->state.push_constants_dirty)
122 gen8_cmd_buffer_flush_push_constants(cmd_buffer);
123
124 if (cmd_buffer->state.dirty & ANV_CMD_DIRTY_DYNAMIC_VIEWPORT)
125 anv_cmd_buffer_emit_viewport(cmd_buffer);
126
127 if (cmd_buffer->state.dirty & ANV_CMD_DIRTY_DYNAMIC_SCISSOR)
128 anv_cmd_buffer_emit_scissor(cmd_buffer);
129
130 if (cmd_buffer->state.dirty & (ANV_CMD_DIRTY_PIPELINE |
131 ANV_CMD_DIRTY_DYNAMIC_LINE_WIDTH)) {
132 uint32_t sf_dw[GEN8_3DSTATE_SF_length];
133 struct GEN8_3DSTATE_SF sf = {
134 GEN8_3DSTATE_SF_header,
135 .LineWidth = cmd_buffer->state.dynamic.line_width,
136 };
137 GEN8_3DSTATE_SF_pack(NULL, sf_dw, &sf);
138 anv_batch_emit_merge(&cmd_buffer->batch, sf_dw, pipeline->gen8.sf);
139 }
140
141 if (cmd_buffer->state.dirty & (ANV_CMD_DIRTY_PIPELINE |
142 ANV_CMD_DIRTY_DYNAMIC_DEPTH_BIAS)){
143 bool enable_bias = cmd_buffer->state.dynamic.depth_bias.bias != 0.0f ||
144 cmd_buffer->state.dynamic.depth_bias.slope_scaled != 0.0f;
145
146 uint32_t raster_dw[GEN8_3DSTATE_RASTER_length];
147 struct GEN8_3DSTATE_RASTER raster = {
148 GEN8_3DSTATE_RASTER_header,
149 .GlobalDepthOffsetEnableSolid = enable_bias,
150 .GlobalDepthOffsetEnableWireframe = enable_bias,
151 .GlobalDepthOffsetEnablePoint = enable_bias,
152 .GlobalDepthOffsetConstant = cmd_buffer->state.dynamic.depth_bias.bias,
153 .GlobalDepthOffsetScale = cmd_buffer->state.dynamic.depth_bias.slope_scaled,
154 .GlobalDepthOffsetClamp = cmd_buffer->state.dynamic.depth_bias.clamp
155 };
156 GEN8_3DSTATE_RASTER_pack(NULL, raster_dw, &raster);
157 anv_batch_emit_merge(&cmd_buffer->batch, raster_dw,
158 pipeline->gen8.raster);
159 }
160
161 if (cmd_buffer->state.dirty & (ANV_CMD_DIRTY_DYNAMIC_BLEND_CONSTANTS |
162 ANV_CMD_DIRTY_DYNAMIC_STENCIL_REFERENCE)) {
163 struct anv_state cc_state =
164 anv_cmd_buffer_alloc_dynamic_state(cmd_buffer,
165 GEN8_COLOR_CALC_STATE_length, 64);
166 struct GEN8_COLOR_CALC_STATE cc = {
167 .BlendConstantColorRed = cmd_buffer->state.dynamic.blend_constants[0],
168 .BlendConstantColorGreen = cmd_buffer->state.dynamic.blend_constants[1],
169 .BlendConstantColorBlue = cmd_buffer->state.dynamic.blend_constants[2],
170 .BlendConstantColorAlpha = cmd_buffer->state.dynamic.blend_constants[3],
171 .StencilReferenceValue =
172 cmd_buffer->state.dynamic.stencil_reference.front,
173 .BackFaceStencilReferenceValue =
174 cmd_buffer->state.dynamic.stencil_reference.back,
175 };
176 GEN8_COLOR_CALC_STATE_pack(NULL, cc_state.map, &cc);
177
178 anv_batch_emit(&cmd_buffer->batch,
179 GEN8_3DSTATE_CC_STATE_POINTERS,
180 .ColorCalcStatePointer = cc_state.offset,
181 .ColorCalcStatePointerValid = true);
182 }
183
184 if (cmd_buffer->state.dirty & (ANV_CMD_DIRTY_PIPELINE |
185 ANV_CMD_DIRTY_DYNAMIC_STENCIL_COMPARE_MASK |
186 ANV_CMD_DIRTY_DYNAMIC_STENCIL_WRITE_MASK)) {
187 uint32_t wm_depth_stencil_dw[GEN8_3DSTATE_WM_DEPTH_STENCIL_length];
188
189 struct GEN8_3DSTATE_WM_DEPTH_STENCIL wm_depth_stencil = {
190 GEN8_3DSTATE_WM_DEPTH_STENCIL_header,
191
192 /* Is this what we need to do? */
193 .StencilBufferWriteEnable =
194 cmd_buffer->state.dynamic.stencil_write_mask.front != 0,
195
196 .StencilTestMask =
197 cmd_buffer->state.dynamic.stencil_compare_mask.front & 0xff,
198 .StencilWriteMask =
199 cmd_buffer->state.dynamic.stencil_write_mask.front & 0xff,
200
201 .BackfaceStencilTestMask =
202 cmd_buffer->state.dynamic.stencil_compare_mask.back & 0xff,
203 .BackfaceStencilWriteMask =
204 cmd_buffer->state.dynamic.stencil_write_mask.back & 0xff,
205 };
206 GEN8_3DSTATE_WM_DEPTH_STENCIL_pack(NULL, wm_depth_stencil_dw,
207 &wm_depth_stencil);
208
209 anv_batch_emit_merge(&cmd_buffer->batch, wm_depth_stencil_dw,
210 pipeline->gen8.wm_depth_stencil);
211 }
212
213 if (cmd_buffer->state.dirty & (ANV_CMD_DIRTY_PIPELINE |
214 ANV_CMD_DIRTY_INDEX_BUFFER)) {
215 anv_batch_emit_merge(&cmd_buffer->batch,
216 cmd_buffer->state.state_vf, pipeline->gen8.vf);
217 }
218
219 cmd_buffer->state.vb_dirty &= ~vb_emit;
220 cmd_buffer->state.dirty = 0;
221 }
222
223 void gen8_CmdDraw(
224 VkCmdBuffer cmdBuffer,
225 uint32_t vertexCount,
226 uint32_t instanceCount,
227 uint32_t firstVertex,
228 uint32_t firstInstance)
229 {
230 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer);
231
232 gen8_cmd_buffer_flush_state(cmd_buffer);
233
234 anv_batch_emit(&cmd_buffer->batch, GEN8_3DPRIMITIVE,
235 .VertexAccessType = SEQUENTIAL,
236 .VertexCountPerInstance = vertexCount,
237 .StartVertexLocation = firstVertex,
238 .InstanceCount = instanceCount,
239 .StartInstanceLocation = firstInstance,
240 .BaseVertexLocation = 0);
241 }
242
243 void gen8_CmdDrawIndexed(
244 VkCmdBuffer cmdBuffer,
245 uint32_t indexCount,
246 uint32_t instanceCount,
247 uint32_t firstIndex,
248 int32_t vertexOffset,
249 uint32_t firstInstance)
250 {
251 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer);
252
253 gen8_cmd_buffer_flush_state(cmd_buffer);
254
255 anv_batch_emit(&cmd_buffer->batch, GEN8_3DPRIMITIVE,
256 .VertexAccessType = RANDOM,
257 .VertexCountPerInstance = indexCount,
258 .StartVertexLocation = firstIndex,
259 .InstanceCount = instanceCount,
260 .StartInstanceLocation = firstInstance,
261 .BaseVertexLocation = vertexOffset);
262 }
263
264 static void
265 emit_lrm(struct anv_batch *batch,
266 uint32_t reg, struct anv_bo *bo, uint32_t offset)
267 {
268 anv_batch_emit(batch, GEN8_MI_LOAD_REGISTER_MEM,
269 .RegisterAddress = reg,
270 .MemoryAddress = { bo, offset });
271 }
272
273 static void
274 emit_lri(struct anv_batch *batch, uint32_t reg, uint32_t imm)
275 {
276 anv_batch_emit(batch, GEN8_MI_LOAD_REGISTER_IMM,
277 .RegisterOffset = reg,
278 .DataDWord = imm);
279 }
280
281 /* Auto-Draw / Indirect Registers */
282 #define GEN7_3DPRIM_END_OFFSET 0x2420
283 #define GEN7_3DPRIM_START_VERTEX 0x2430
284 #define GEN7_3DPRIM_VERTEX_COUNT 0x2434
285 #define GEN7_3DPRIM_INSTANCE_COUNT 0x2438
286 #define GEN7_3DPRIM_START_INSTANCE 0x243C
287 #define GEN7_3DPRIM_BASE_VERTEX 0x2440
288
289 void gen8_CmdDrawIndirect(
290 VkCmdBuffer cmdBuffer,
291 VkBuffer _buffer,
292 VkDeviceSize offset,
293 uint32_t count,
294 uint32_t stride)
295 {
296 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer);
297 ANV_FROM_HANDLE(anv_buffer, buffer, _buffer);
298 struct anv_bo *bo = buffer->bo;
299 uint32_t bo_offset = buffer->offset + offset;
300
301 gen8_cmd_buffer_flush_state(cmd_buffer);
302
303 emit_lrm(&cmd_buffer->batch, GEN7_3DPRIM_VERTEX_COUNT, bo, bo_offset);
304 emit_lrm(&cmd_buffer->batch, GEN7_3DPRIM_INSTANCE_COUNT, bo, bo_offset + 4);
305 emit_lrm(&cmd_buffer->batch, GEN7_3DPRIM_START_VERTEX, bo, bo_offset + 8);
306 emit_lrm(&cmd_buffer->batch, GEN7_3DPRIM_START_INSTANCE, bo, bo_offset + 12);
307 emit_lri(&cmd_buffer->batch, GEN7_3DPRIM_BASE_VERTEX, 0);
308
309 anv_batch_emit(&cmd_buffer->batch, GEN8_3DPRIMITIVE,
310 .IndirectParameterEnable = true,
311 .VertexAccessType = SEQUENTIAL);
312 }
313
314 void gen8_CmdBindIndexBuffer(
315 VkCmdBuffer cmdBuffer,
316 VkBuffer _buffer,
317 VkDeviceSize offset,
318 VkIndexType indexType)
319 {
320 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer);
321 ANV_FROM_HANDLE(anv_buffer, buffer, _buffer);
322
323 static const uint32_t vk_to_gen_index_type[] = {
324 [VK_INDEX_TYPE_UINT16] = INDEX_WORD,
325 [VK_INDEX_TYPE_UINT32] = INDEX_DWORD,
326 };
327
328 struct GEN8_3DSTATE_VF vf = {
329 GEN8_3DSTATE_VF_header,
330 .CutIndex = (indexType == VK_INDEX_TYPE_UINT16) ? UINT16_MAX : UINT32_MAX,
331 };
332 GEN8_3DSTATE_VF_pack(NULL, cmd_buffer->state.state_vf, &vf);
333
334 cmd_buffer->state.dirty |= ANV_CMD_DIRTY_INDEX_BUFFER;
335
336 anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_INDEX_BUFFER,
337 .IndexFormat = vk_to_gen_index_type[indexType],
338 .MemoryObjectControlState = GEN8_MOCS,
339 .BufferStartingAddress = { buffer->bo, buffer->offset + offset },
340 .BufferSize = buffer->size - offset);
341 }
342
343 static VkResult
344 gen8_flush_compute_descriptor_set(struct anv_cmd_buffer *cmd_buffer)
345 {
346 struct anv_device *device = cmd_buffer->device;
347 struct anv_pipeline *pipeline = cmd_buffer->state.compute_pipeline;
348 struct anv_state surfaces = { 0, }, samplers = { 0, };
349 VkResult result;
350
351 result = anv_cmd_buffer_emit_samplers(cmd_buffer,
352 VK_SHADER_STAGE_COMPUTE, &samplers);
353 if (result != VK_SUCCESS)
354 return result;
355 result = anv_cmd_buffer_emit_binding_table(cmd_buffer,
356 VK_SHADER_STAGE_COMPUTE, &surfaces);
357 if (result != VK_SUCCESS)
358 return result;
359
360 struct GEN8_INTERFACE_DESCRIPTOR_DATA desc = {
361 .KernelStartPointer = pipeline->cs_simd,
362 .KernelStartPointerHigh = 0,
363 .BindingTablePointer = surfaces.offset,
364 .BindingTableEntryCount = 0,
365 .SamplerStatePointer = samplers.offset,
366 .SamplerCount = 0,
367 .NumberofThreadsinGPGPUThreadGroup = 0 /* FIXME: Really? */
368 };
369
370 uint32_t size = GEN8_INTERFACE_DESCRIPTOR_DATA_length * sizeof(uint32_t);
371 struct anv_state state =
372 anv_state_pool_alloc(&device->dynamic_state_pool, size, 64);
373
374 GEN8_INTERFACE_DESCRIPTOR_DATA_pack(NULL, state.map, &desc);
375
376 anv_batch_emit(&cmd_buffer->batch, GEN8_MEDIA_INTERFACE_DESCRIPTOR_LOAD,
377 .InterfaceDescriptorTotalLength = size,
378 .InterfaceDescriptorDataStartAddress = state.offset);
379
380 return VK_SUCCESS;
381 }
382
383 static void
384 gen8_cmd_buffer_flush_compute_state(struct anv_cmd_buffer *cmd_buffer)
385 {
386 struct anv_pipeline *pipeline = cmd_buffer->state.compute_pipeline;
387 VkResult result;
388
389 assert(pipeline->active_stages == VK_SHADER_STAGE_COMPUTE_BIT);
390
391 if (cmd_buffer->state.current_pipeline != GPGPU) {
392 anv_batch_emit(&cmd_buffer->batch, GEN8_PIPELINE_SELECT,
393 .PipelineSelection = GPGPU);
394 cmd_buffer->state.current_pipeline = GPGPU;
395 }
396
397 if (cmd_buffer->state.compute_dirty & ANV_CMD_DIRTY_PIPELINE)
398 anv_batch_emit_batch(&cmd_buffer->batch, &pipeline->batch);
399
400 if ((cmd_buffer->state.descriptors_dirty & VK_SHADER_STAGE_COMPUTE_BIT) ||
401 (cmd_buffer->state.compute_dirty & ANV_CMD_DIRTY_PIPELINE)) {
402 result = gen8_flush_compute_descriptor_set(cmd_buffer);
403 assert(result == VK_SUCCESS);
404 cmd_buffer->state.descriptors_dirty &= ~VK_SHADER_STAGE_COMPUTE;
405 }
406
407 cmd_buffer->state.compute_dirty = 0;
408 }
409
410 void gen8_CmdDrawIndexedIndirect(
411 VkCmdBuffer cmdBuffer,
412 VkBuffer _buffer,
413 VkDeviceSize offset,
414 uint32_t count,
415 uint32_t stride)
416 {
417 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer);
418 ANV_FROM_HANDLE(anv_buffer, buffer, _buffer);
419 struct anv_bo *bo = buffer->bo;
420 uint32_t bo_offset = buffer->offset + offset;
421
422 gen8_cmd_buffer_flush_state(cmd_buffer);
423
424 emit_lrm(&cmd_buffer->batch, GEN7_3DPRIM_VERTEX_COUNT, bo, bo_offset);
425 emit_lrm(&cmd_buffer->batch, GEN7_3DPRIM_INSTANCE_COUNT, bo, bo_offset + 4);
426 emit_lrm(&cmd_buffer->batch, GEN7_3DPRIM_START_VERTEX, bo, bo_offset + 8);
427 emit_lrm(&cmd_buffer->batch, GEN7_3DPRIM_BASE_VERTEX, bo, bo_offset + 12);
428 emit_lrm(&cmd_buffer->batch, GEN7_3DPRIM_START_INSTANCE, bo, bo_offset + 16);
429
430 anv_batch_emit(&cmd_buffer->batch, GEN8_3DPRIMITIVE,
431 .IndirectParameterEnable = true,
432 .VertexAccessType = RANDOM);
433 }
434
435 void gen8_CmdDispatch(
436 VkCmdBuffer cmdBuffer,
437 uint32_t x,
438 uint32_t y,
439 uint32_t z)
440 {
441 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer);
442 struct anv_pipeline *pipeline = cmd_buffer->state.compute_pipeline;
443 struct brw_cs_prog_data *prog_data = &pipeline->cs_prog_data;
444
445 gen8_cmd_buffer_flush_compute_state(cmd_buffer);
446
447 anv_batch_emit(&cmd_buffer->batch, GEN8_GPGPU_WALKER,
448 .SIMDSize = prog_data->simd_size / 16,
449 .ThreadDepthCounterMaximum = 0,
450 .ThreadHeightCounterMaximum = 0,
451 .ThreadWidthCounterMaximum = pipeline->cs_thread_width_max,
452 .ThreadGroupIDXDimension = x,
453 .ThreadGroupIDYDimension = y,
454 .ThreadGroupIDZDimension = z,
455 .RightExecutionMask = pipeline->cs_right_mask,
456 .BottomExecutionMask = 0xffffffff);
457
458 anv_batch_emit(&cmd_buffer->batch, GEN8_MEDIA_STATE_FLUSH);
459 }
460
461 #define GPGPU_DISPATCHDIMX 0x2500
462 #define GPGPU_DISPATCHDIMY 0x2504
463 #define GPGPU_DISPATCHDIMZ 0x2508
464
465 void gen8_CmdDispatchIndirect(
466 VkCmdBuffer cmdBuffer,
467 VkBuffer _buffer,
468 VkDeviceSize offset)
469 {
470 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer);
471 ANV_FROM_HANDLE(anv_buffer, buffer, _buffer);
472 struct anv_pipeline *pipeline = cmd_buffer->state.compute_pipeline;
473 struct brw_cs_prog_data *prog_data = &pipeline->cs_prog_data;
474 struct anv_bo *bo = buffer->bo;
475 uint32_t bo_offset = buffer->offset + offset;
476
477 gen8_cmd_buffer_flush_compute_state(cmd_buffer);
478
479 emit_lrm(&cmd_buffer->batch, GPGPU_DISPATCHDIMX, bo, bo_offset);
480 emit_lrm(&cmd_buffer->batch, GPGPU_DISPATCHDIMY, bo, bo_offset + 4);
481 emit_lrm(&cmd_buffer->batch, GPGPU_DISPATCHDIMZ, bo, bo_offset + 8);
482
483 anv_batch_emit(&cmd_buffer->batch, GEN8_GPGPU_WALKER,
484 .IndirectParameterEnable = true,
485 .SIMDSize = prog_data->simd_size / 16,
486 .ThreadDepthCounterMaximum = 0,
487 .ThreadHeightCounterMaximum = 0,
488 .ThreadWidthCounterMaximum = pipeline->cs_thread_width_max,
489 .RightExecutionMask = pipeline->cs_right_mask,
490 .BottomExecutionMask = 0xffffffff);
491
492 anv_batch_emit(&cmd_buffer->batch, GEN8_MEDIA_STATE_FLUSH);
493 }
494
495 static void
496 gen8_cmd_buffer_emit_depth_stencil(struct anv_cmd_buffer *cmd_buffer)
497 {
498 const struct anv_framebuffer *fb = cmd_buffer->state.framebuffer;
499 const struct anv_image_view *iview =
500 anv_cmd_buffer_get_depth_stencil_view(cmd_buffer);
501 const struct anv_image *image = iview ? iview->image : NULL;
502 const bool has_depth = iview && iview->format->depth_format;
503 const bool has_stencil = iview && iview->format->has_stencil;
504
505 /* FIXME: Implement the PMA stall W/A */
506 /* FIXME: Width and Height are wrong */
507
508 /* Emit 3DSTATE_DEPTH_BUFFER */
509 if (has_depth) {
510 anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_DEPTH_BUFFER,
511 .SurfaceType = SURFTYPE_2D,
512 .DepthWriteEnable = iview->format->depth_format,
513 .StencilWriteEnable = has_stencil,
514 .HierarchicalDepthBufferEnable = false,
515 .SurfaceFormat = iview->format->depth_format,
516 .SurfacePitch = image->depth_surface.stride - 1,
517 .SurfaceBaseAddress = {
518 .bo = image->bo,
519 .offset = image->depth_surface.offset,
520 },
521 .Height = fb->height - 1,
522 .Width = fb->width - 1,
523 .LOD = 0,
524 .Depth = 1 - 1,
525 .MinimumArrayElement = 0,
526 .DepthBufferObjectControlState = GEN8_MOCS,
527 .RenderTargetViewExtent = 1 - 1,
528 .SurfaceQPitch = image->depth_surface.qpitch >> 2);
529 } else {
530 /* Even when no depth buffer is present, the hardware requires that
531 * 3DSTATE_DEPTH_BUFFER be programmed correctly. The Broadwell PRM says:
532 *
533 * If a null depth buffer is bound, the driver must instead bind depth as:
534 * 3DSTATE_DEPTH.SurfaceType = SURFTYPE_2D
535 * 3DSTATE_DEPTH.Width = 1
536 * 3DSTATE_DEPTH.Height = 1
537 * 3DSTATE_DEPTH.SuraceFormat = D16_UNORM
538 * 3DSTATE_DEPTH.SurfaceBaseAddress = 0
539 * 3DSTATE_DEPTH.HierarchicalDepthBufferEnable = 0
540 * 3DSTATE_WM_DEPTH_STENCIL.DepthTestEnable = 0
541 * 3DSTATE_WM_DEPTH_STENCIL.DepthBufferWriteEnable = 0
542 *
543 * The PRM is wrong, though. The width and height must be programmed to
544 * actual framebuffer's width and height, even when neither depth buffer
545 * nor stencil buffer is present.
546 */
547 anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_DEPTH_BUFFER,
548 .SurfaceType = SURFTYPE_2D,
549 .SurfaceFormat = D16_UNORM,
550 .Width = fb->width - 1,
551 .Height = fb->height - 1,
552 .StencilWriteEnable = has_stencil);
553 }
554
555 /* Emit 3DSTATE_STENCIL_BUFFER */
556 if (has_stencil) {
557 anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_STENCIL_BUFFER,
558 .StencilBufferEnable = true,
559 .StencilBufferObjectControlState = GEN8_MOCS,
560
561 /* Stencil buffers have strange pitch. The PRM says:
562 *
563 * The pitch must be set to 2x the value computed based on width,
564 * as the stencil buffer is stored with two rows interleaved.
565 */
566 .SurfacePitch = 2 * image->stencil_surface.stride - 1,
567
568 .SurfaceBaseAddress = {
569 .bo = image->bo,
570 .offset = image->offset + image->stencil_surface.offset,
571 },
572 .SurfaceQPitch = image->stencil_surface.stride >> 2);
573 } else {
574 anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_STENCIL_BUFFER);
575 }
576
577 /* Disable hierarchial depth buffers. */
578 anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_HIER_DEPTH_BUFFER);
579
580 /* Clear the clear params. */
581 anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_CLEAR_PARAMS);
582 }
583
584 void
585 gen8_cmd_buffer_begin_subpass(struct anv_cmd_buffer *cmd_buffer,
586 struct anv_subpass *subpass)
587 {
588 cmd_buffer->state.subpass = subpass;
589
590 cmd_buffer->state.descriptors_dirty |= VK_SHADER_STAGE_FRAGMENT_BIT;
591
592 gen8_cmd_buffer_emit_depth_stencil(cmd_buffer);
593 }
594
595 void gen8_CmdBeginRenderPass(
596 VkCmdBuffer cmdBuffer,
597 const VkRenderPassBeginInfo* pRenderPassBegin,
598 VkRenderPassContents contents)
599 {
600 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer);
601 ANV_FROM_HANDLE(anv_render_pass, pass, pRenderPassBegin->renderPass);
602 ANV_FROM_HANDLE(anv_framebuffer, framebuffer, pRenderPassBegin->framebuffer);
603
604 cmd_buffer->state.framebuffer = framebuffer;
605 cmd_buffer->state.pass = pass;
606
607 const VkRect2D *render_area = &pRenderPassBegin->renderArea;
608
609 anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_DRAWING_RECTANGLE,
610 .ClippedDrawingRectangleYMin = render_area->offset.y,
611 .ClippedDrawingRectangleXMin = render_area->offset.x,
612 .ClippedDrawingRectangleYMax =
613 render_area->offset.y + render_area->extent.height - 1,
614 .ClippedDrawingRectangleXMax =
615 render_area->offset.x + render_area->extent.width - 1,
616 .DrawingRectangleOriginY = 0,
617 .DrawingRectangleOriginX = 0);
618
619 anv_cmd_buffer_clear_attachments(cmd_buffer, pass,
620 pRenderPassBegin->pClearValues);
621
622 gen8_cmd_buffer_begin_subpass(cmd_buffer, pass->subpasses);
623 }
624
625 void gen8_CmdNextSubpass(
626 VkCmdBuffer cmdBuffer,
627 VkRenderPassContents contents)
628 {
629 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer);
630
631 assert(cmd_buffer->level == VK_CMD_BUFFER_LEVEL_PRIMARY);
632
633 gen8_cmd_buffer_begin_subpass(cmd_buffer, cmd_buffer->state.subpass + 1);
634 }
635
636 void gen8_CmdEndRenderPass(
637 VkCmdBuffer cmdBuffer)
638 {
639 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer);
640
641 /* Emit a flushing pipe control at the end of a pass. This is kind of a
642 * hack but it ensures that render targets always actually get written.
643 * Eventually, we should do flushing based on image format transitions
644 * or something of that nature.
645 */
646 anv_batch_emit(&cmd_buffer->batch, GEN8_PIPE_CONTROL,
647 .PostSyncOperation = NoWrite,
648 .RenderTargetCacheFlushEnable = true,
649 .InstructionCacheInvalidateEnable = true,
650 .DepthCacheFlushEnable = true,
651 .VFCacheInvalidationEnable = true,
652 .TextureCacheInvalidationEnable = true,
653 .CommandStreamerStallEnable = true);
654 }
655
656 static void
657 emit_ps_depth_count(struct anv_batch *batch,
658 struct anv_bo *bo, uint32_t offset)
659 {
660 anv_batch_emit(batch, GEN8_PIPE_CONTROL,
661 .DestinationAddressType = DAT_PPGTT,
662 .PostSyncOperation = WritePSDepthCount,
663 .Address = { bo, offset }); /* FIXME: This is only lower 32 bits */
664 }
665
666 void gen8_CmdBeginQuery(
667 VkCmdBuffer cmdBuffer,
668 VkQueryPool queryPool,
669 uint32_t slot,
670 VkQueryControlFlags flags)
671 {
672 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer);
673 ANV_FROM_HANDLE(anv_query_pool, pool, queryPool);
674
675 switch (pool->type) {
676 case VK_QUERY_TYPE_OCCLUSION:
677 emit_ps_depth_count(&cmd_buffer->batch, &pool->bo,
678 slot * sizeof(struct anv_query_pool_slot));
679 break;
680
681 case VK_QUERY_TYPE_PIPELINE_STATISTICS:
682 default:
683 unreachable("");
684 }
685 }
686
687 void gen8_CmdEndQuery(
688 VkCmdBuffer cmdBuffer,
689 VkQueryPool queryPool,
690 uint32_t slot)
691 {
692 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer);
693 ANV_FROM_HANDLE(anv_query_pool, pool, queryPool);
694
695 switch (pool->type) {
696 case VK_QUERY_TYPE_OCCLUSION:
697 emit_ps_depth_count(&cmd_buffer->batch, &pool->bo,
698 slot * sizeof(struct anv_query_pool_slot) + 8);
699 break;
700
701 case VK_QUERY_TYPE_PIPELINE_STATISTICS:
702 default:
703 unreachable("");
704 }
705 }
706
707 #define TIMESTAMP 0x2358
708
709 void gen8_CmdWriteTimestamp(
710 VkCmdBuffer cmdBuffer,
711 VkTimestampType timestampType,
712 VkBuffer destBuffer,
713 VkDeviceSize destOffset)
714 {
715 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer);
716 ANV_FROM_HANDLE(anv_buffer, buffer, destBuffer);
717 struct anv_bo *bo = buffer->bo;
718
719 switch (timestampType) {
720 case VK_TIMESTAMP_TYPE_TOP:
721 anv_batch_emit(&cmd_buffer->batch, GEN8_MI_STORE_REGISTER_MEM,
722 .RegisterAddress = TIMESTAMP,
723 .MemoryAddress = { bo, buffer->offset + destOffset });
724 anv_batch_emit(&cmd_buffer->batch, GEN8_MI_STORE_REGISTER_MEM,
725 .RegisterAddress = TIMESTAMP + 4,
726 .MemoryAddress = { bo, buffer->offset + destOffset + 4 });
727 break;
728
729 case VK_TIMESTAMP_TYPE_BOTTOM:
730 anv_batch_emit(&cmd_buffer->batch, GEN8_PIPE_CONTROL,
731 .DestinationAddressType = DAT_PPGTT,
732 .PostSyncOperation = WriteTimestamp,
733 .Address = /* FIXME: This is only lower 32 bits */
734 { bo, buffer->offset + destOffset });
735 break;
736
737 default:
738 break;
739 }
740 }
741
742 #define alu_opcode(v) __gen_field((v), 20, 31)
743 #define alu_operand1(v) __gen_field((v), 10, 19)
744 #define alu_operand2(v) __gen_field((v), 0, 9)
745 #define alu(opcode, operand1, operand2) \
746 alu_opcode(opcode) | alu_operand1(operand1) | alu_operand2(operand2)
747
748 #define OPCODE_NOOP 0x000
749 #define OPCODE_LOAD 0x080
750 #define OPCODE_LOADINV 0x480
751 #define OPCODE_LOAD0 0x081
752 #define OPCODE_LOAD1 0x481
753 #define OPCODE_ADD 0x100
754 #define OPCODE_SUB 0x101
755 #define OPCODE_AND 0x102
756 #define OPCODE_OR 0x103
757 #define OPCODE_XOR 0x104
758 #define OPCODE_STORE 0x180
759 #define OPCODE_STOREINV 0x580
760
761 #define OPERAND_R0 0x00
762 #define OPERAND_R1 0x01
763 #define OPERAND_R2 0x02
764 #define OPERAND_R3 0x03
765 #define OPERAND_R4 0x04
766 #define OPERAND_SRCA 0x20
767 #define OPERAND_SRCB 0x21
768 #define OPERAND_ACCU 0x31
769 #define OPERAND_ZF 0x32
770 #define OPERAND_CF 0x33
771
772 #define CS_GPR(n) (0x2600 + (n) * 8)
773
774 static void
775 emit_load_alu_reg_u64(struct anv_batch *batch, uint32_t reg,
776 struct anv_bo *bo, uint32_t offset)
777 {
778 anv_batch_emit(batch, GEN8_MI_LOAD_REGISTER_MEM,
779 .RegisterAddress = reg,
780 .MemoryAddress = { bo, offset });
781 anv_batch_emit(batch, GEN8_MI_LOAD_REGISTER_MEM,
782 .RegisterAddress = reg + 4,
783 .MemoryAddress = { bo, offset + 4 });
784 }
785
786 void gen8_CmdCopyQueryPoolResults(
787 VkCmdBuffer cmdBuffer,
788 VkQueryPool queryPool,
789 uint32_t startQuery,
790 uint32_t queryCount,
791 VkBuffer destBuffer,
792 VkDeviceSize destOffset,
793 VkDeviceSize destStride,
794 VkQueryResultFlags flags)
795 {
796 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer);
797 ANV_FROM_HANDLE(anv_query_pool, pool, queryPool);
798 ANV_FROM_HANDLE(anv_buffer, buffer, destBuffer);
799 uint32_t slot_offset, dst_offset;
800
801 if (flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT) {
802 /* Where is the availabilty info supposed to go? */
803 anv_finishme("VK_QUERY_RESULT_WITH_AVAILABILITY_BIT");
804 return;
805 }
806
807 assert(pool->type == VK_QUERY_TYPE_OCCLUSION);
808
809 /* FIXME: If we're not waiting, should we just do this on the CPU? */
810 if (flags & VK_QUERY_RESULT_WAIT_BIT)
811 anv_batch_emit(&cmd_buffer->batch, GEN8_PIPE_CONTROL,
812 .CommandStreamerStallEnable = true,
813 .StallAtPixelScoreboard = true);
814
815 dst_offset = buffer->offset + destOffset;
816 for (uint32_t i = 0; i < queryCount; i++) {
817
818 slot_offset = (startQuery + i) * sizeof(struct anv_query_pool_slot);
819
820 emit_load_alu_reg_u64(&cmd_buffer->batch, CS_GPR(0), &pool->bo, slot_offset);
821 emit_load_alu_reg_u64(&cmd_buffer->batch, CS_GPR(1), &pool->bo, slot_offset + 8);
822
823 /* FIXME: We need to clamp the result for 32 bit. */
824
825 uint32_t *dw = anv_batch_emitn(&cmd_buffer->batch, 5, GEN8_MI_MATH);
826 dw[1] = alu(OPCODE_LOAD, OPERAND_SRCA, OPERAND_R1);
827 dw[2] = alu(OPCODE_LOAD, OPERAND_SRCB, OPERAND_R0);
828 dw[3] = alu(OPCODE_SUB, 0, 0);
829 dw[4] = alu(OPCODE_STORE, OPERAND_R2, OPERAND_ACCU);
830
831 anv_batch_emit(&cmd_buffer->batch, GEN8_MI_STORE_REGISTER_MEM,
832 .RegisterAddress = CS_GPR(2),
833 /* FIXME: This is only lower 32 bits */
834 .MemoryAddress = { buffer->bo, dst_offset });
835
836 if (flags & VK_QUERY_RESULT_64_BIT)
837 anv_batch_emit(&cmd_buffer->batch, GEN8_MI_STORE_REGISTER_MEM,
838 .RegisterAddress = CS_GPR(2) + 4,
839 /* FIXME: This is only lower 32 bits */
840 .MemoryAddress = { buffer->bo, dst_offset + 4 });
841
842 dst_offset += destStride;
843 }
844 }
845
846 void
847 gen8_cmd_buffer_emit_state_base_address(struct anv_cmd_buffer *cmd_buffer)
848 {
849 struct anv_device *device = cmd_buffer->device;
850 struct anv_bo *scratch_bo = NULL;
851
852 cmd_buffer->state.scratch_size =
853 anv_block_pool_size(&device->scratch_block_pool);
854 if (cmd_buffer->state.scratch_size > 0)
855 scratch_bo = &device->scratch_block_pool.bo;
856
857 /* Emit a render target cache flush.
858 *
859 * This isn't documented anywhere in the PRM. However, it seems to be
860 * necessary prior to changing the surface state base adress. Without
861 * this, we get GPU hangs when using multi-level command buffers which
862 * clear depth, reset state base address, and then go render stuff.
863 */
864 anv_batch_emit(&cmd_buffer->batch, GEN8_PIPE_CONTROL,
865 .RenderTargetCacheFlushEnable = true);
866
867 anv_batch_emit(&cmd_buffer->batch, GEN8_STATE_BASE_ADDRESS,
868 .GeneralStateBaseAddress = { scratch_bo, 0 },
869 .GeneralStateMemoryObjectControlState = GEN8_MOCS,
870 .GeneralStateBaseAddressModifyEnable = true,
871 .GeneralStateBufferSize = 0xfffff,
872 .GeneralStateBufferSizeModifyEnable = true,
873
874 .SurfaceStateBaseAddress = anv_cmd_buffer_surface_base_address(cmd_buffer),
875 .SurfaceStateMemoryObjectControlState = GEN8_MOCS,
876 .SurfaceStateBaseAddressModifyEnable = true,
877
878 .DynamicStateBaseAddress = { &device->dynamic_state_block_pool.bo, 0 },
879 .DynamicStateMemoryObjectControlState = GEN8_MOCS,
880 .DynamicStateBaseAddressModifyEnable = true,
881 .DynamicStateBufferSize = 0xfffff,
882 .DynamicStateBufferSizeModifyEnable = true,
883
884 .IndirectObjectBaseAddress = { NULL, 0 },
885 .IndirectObjectMemoryObjectControlState = GEN8_MOCS,
886 .IndirectObjectBaseAddressModifyEnable = true,
887 .IndirectObjectBufferSize = 0xfffff,
888 .IndirectObjectBufferSizeModifyEnable = true,
889
890 .InstructionBaseAddress = { &device->instruction_block_pool.bo, 0 },
891 .InstructionMemoryObjectControlState = GEN8_MOCS,
892 .InstructionBaseAddressModifyEnable = true,
893 .InstructionBufferSize = 0xfffff,
894 .InstructionBuffersizeModifyEnable = true);
895
896 /* After re-setting the surface state base address, we have to do some
897 * cache flusing so that the sampler engine will pick up the new
898 * SURFACE_STATE objects and binding tables. From the Broadwell PRM,
899 * Shared Function > 3D Sampler > State > State Caching (page 96):
900 *
901 * Coherency with system memory in the state cache, like the texture
902 * cache is handled partially by software. It is expected that the
903 * command stream or shader will issue Cache Flush operation or
904 * Cache_Flush sampler message to ensure that the L1 cache remains
905 * coherent with system memory.
906 *
907 * [...]
908 *
909 * Whenever the value of the Dynamic_State_Base_Addr,
910 * Surface_State_Base_Addr are altered, the L1 state cache must be
911 * invalidated to ensure the new surface or sampler state is fetched
912 * from system memory.
913 *
914 * The PIPE_CONTROL command has a "State Cache Invalidation Enable" bit
915 * which, according the PIPE_CONTROL instruction documentation in the
916 * Broadwell PRM:
917 *
918 * Setting this bit is independent of any other bit in this packet.
919 * This bit controls the invalidation of the L1 and L2 state caches
920 * at the top of the pipe i.e. at the parsing time.
921 *
922 * Unfortunately, experimentation seems to indicate that state cache
923 * invalidation through a PIPE_CONTROL does nothing whatsoever in
924 * regards to surface state and binding tables. In stead, it seems that
925 * invalidating the texture cache is what is actually needed.
926 *
927 * XXX: As far as we have been able to determine through
928 * experimentation, shows that flush the texture cache appears to be
929 * sufficient. The theory here is that all of the sampling/rendering
930 * units cache the binding table in the texture cache. However, we have
931 * yet to be able to actually confirm this.
932 */
933 anv_batch_emit(&cmd_buffer->batch, GEN8_PIPE_CONTROL,
934 .TextureCacheInvalidationEnable = true);
935 }
936
937 void gen8_CmdPipelineBarrier(
938 VkCmdBuffer cmdBuffer,
939 VkPipelineStageFlags srcStageMask,
940 VkPipelineStageFlags destStageMask,
941 VkBool32 byRegion,
942 uint32_t memBarrierCount,
943 const void* const* ppMemBarriers)
944 {
945 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer);
946 uint32_t b, *dw;
947
948 struct GEN8_PIPE_CONTROL cmd = {
949 GEN8_PIPE_CONTROL_header,
950 .PostSyncOperation = NoWrite,
951 };
952
953 /* XXX: I think waitEvent is a no-op on our HW. We should verify that. */
954
955 if (anv_clear_mask(&srcStageMask, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT)) {
956 /* This is just what PIPE_CONTROL does */
957 }
958
959 if (anv_clear_mask(&srcStageMask,
960 VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT |
961 VK_PIPELINE_STAGE_VERTEX_INPUT_BIT |
962 VK_PIPELINE_STAGE_VERTEX_SHADER_BIT |
963 VK_PIPELINE_STAGE_TESS_CONTROL_SHADER_BIT |
964 VK_PIPELINE_STAGE_TESS_EVALUATION_SHADER_BIT |
965 VK_PIPELINE_STAGE_GEOMETRY_SHADER_BIT |
966 VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT |
967 VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT |
968 VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT |
969 VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT)) {
970 cmd.StallAtPixelScoreboard = true;
971 }
972
973
974 if (anv_clear_mask(&srcStageMask,
975 VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT |
976 VK_PIPELINE_STAGE_TRANSFER_BIT)) {
977 cmd.CommandStreamerStallEnable = true;
978 }
979
980 if (anv_clear_mask(&srcStageMask, VK_PIPELINE_STAGE_HOST_BIT)) {
981 anv_finishme("VK_PIPE_EVENT_CPU_SIGNAL_BIT");
982 }
983
984 /* On our hardware, all stages will wait for execution as needed. */
985 (void)destStageMask;
986
987 /* We checked all known VkPipeEventFlags. */
988 anv_assert(srcStageMask == 0);
989
990 /* XXX: Right now, we're really dumb and just flush whatever categories
991 * the app asks for. One of these days we may make this a bit better
992 * but right now that's all the hardware allows for in most areas.
993 */
994 VkMemoryOutputFlags out_flags = 0;
995 VkMemoryInputFlags in_flags = 0;
996
997 for (uint32_t i = 0; i < memBarrierCount; i++) {
998 const struct anv_common *common = ppMemBarriers[i];
999 switch (common->sType) {
1000 case VK_STRUCTURE_TYPE_MEMORY_BARRIER: {
1001 ANV_COMMON_TO_STRUCT(VkMemoryBarrier, barrier, common);
1002 out_flags |= barrier->outputMask;
1003 in_flags |= barrier->inputMask;
1004 break;
1005 }
1006 case VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER: {
1007 ANV_COMMON_TO_STRUCT(VkBufferMemoryBarrier, barrier, common);
1008 out_flags |= barrier->outputMask;
1009 in_flags |= barrier->inputMask;
1010 break;
1011 }
1012 case VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER: {
1013 ANV_COMMON_TO_STRUCT(VkImageMemoryBarrier, barrier, common);
1014 out_flags |= barrier->outputMask;
1015 in_flags |= barrier->inputMask;
1016 break;
1017 }
1018 default:
1019 unreachable("Invalid memory barrier type");
1020 }
1021 }
1022
1023 for_each_bit(b, out_flags) {
1024 switch ((VkMemoryOutputFlags)(1 << b)) {
1025 case VK_MEMORY_OUTPUT_HOST_WRITE_BIT:
1026 break; /* FIXME: Little-core systems */
1027 case VK_MEMORY_OUTPUT_SHADER_WRITE_BIT:
1028 cmd.DCFlushEnable = true;
1029 break;
1030 case VK_MEMORY_OUTPUT_COLOR_ATTACHMENT_BIT:
1031 cmd.RenderTargetCacheFlushEnable = true;
1032 break;
1033 case VK_MEMORY_OUTPUT_DEPTH_STENCIL_ATTACHMENT_BIT:
1034 cmd.DepthCacheFlushEnable = true;
1035 break;
1036 case VK_MEMORY_OUTPUT_TRANSFER_BIT:
1037 cmd.RenderTargetCacheFlushEnable = true;
1038 cmd.DepthCacheFlushEnable = true;
1039 break;
1040 default:
1041 unreachable("Invalid memory output flag");
1042 }
1043 }
1044
1045 for_each_bit(b, out_flags) {
1046 switch ((VkMemoryInputFlags)(1 << b)) {
1047 case VK_MEMORY_INPUT_HOST_READ_BIT:
1048 break; /* FIXME: Little-core systems */
1049 case VK_MEMORY_INPUT_INDIRECT_COMMAND_BIT:
1050 case VK_MEMORY_INPUT_INDEX_FETCH_BIT:
1051 case VK_MEMORY_INPUT_VERTEX_ATTRIBUTE_FETCH_BIT:
1052 cmd.VFCacheInvalidationEnable = true;
1053 break;
1054 case VK_MEMORY_INPUT_UNIFORM_READ_BIT:
1055 cmd.ConstantCacheInvalidationEnable = true;
1056 /* fallthrough */
1057 case VK_MEMORY_INPUT_SHADER_READ_BIT:
1058 cmd.DCFlushEnable = true;
1059 cmd.TextureCacheInvalidationEnable = true;
1060 break;
1061 case VK_MEMORY_INPUT_COLOR_ATTACHMENT_BIT:
1062 case VK_MEMORY_INPUT_DEPTH_STENCIL_ATTACHMENT_BIT:
1063 break; /* XXX: Hunh? */
1064 case VK_MEMORY_INPUT_TRANSFER_BIT:
1065 cmd.TextureCacheInvalidationEnable = true;
1066 break;
1067 }
1068 }
1069
1070 dw = anv_batch_emit_dwords(&cmd_buffer->batch, GEN8_PIPE_CONTROL_length);
1071 GEN8_PIPE_CONTROL_pack(&cmd_buffer->batch, dw, &cmd);
1072 }