vk: Remove dummy anv_depth_stencil_view
[mesa.git] / src / vulkan / gen8_cmd_buffer.c
1 /*
2 * Copyright © 2015 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include <assert.h>
25 #include <stdbool.h>
26 #include <string.h>
27 #include <unistd.h>
28 #include <fcntl.h>
29
30 #include "anv_private.h"
31
32 static void
33 gen8_cmd_buffer_flush_push_constants(struct anv_cmd_buffer *cmd_buffer)
34 {
35 uint32_t stage;
36
37 static const uint32_t push_constant_opcodes[] = {
38 [VK_SHADER_STAGE_VERTEX] = 21,
39 [VK_SHADER_STAGE_TESS_CONTROL] = 25, /* HS */
40 [VK_SHADER_STAGE_TESS_EVALUATION] = 26, /* DS */
41 [VK_SHADER_STAGE_GEOMETRY] = 22,
42 [VK_SHADER_STAGE_FRAGMENT] = 23,
43 [VK_SHADER_STAGE_COMPUTE] = 0,
44 };
45
46 uint32_t flushed = 0;
47
48 for_each_bit(stage, cmd_buffer->state.push_constants_dirty) {
49 struct anv_state state = anv_cmd_buffer_push_constants(cmd_buffer, stage);
50
51 if (state.offset == 0)
52 continue;
53
54 anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_CONSTANT_VS,
55 ._3DCommandSubOpcode = push_constant_opcodes[stage],
56 .ConstantBody = {
57 .PointerToConstantBuffer0 = { .offset = state.offset },
58 .ConstantBuffer0ReadLength = DIV_ROUND_UP(state.alloc_size, 32),
59 });
60
61 flushed |= 1 << stage;
62 }
63
64 cmd_buffer->state.push_constants_dirty &= ~flushed;
65 }
66
67 static void
68 gen8_cmd_buffer_flush_state(struct anv_cmd_buffer *cmd_buffer)
69 {
70 struct anv_pipeline *pipeline = cmd_buffer->state.pipeline;
71 uint32_t *p;
72
73 uint32_t vb_emit = cmd_buffer->state.vb_dirty & pipeline->vb_used;
74
75 assert((pipeline->active_stages & VK_SHADER_STAGE_COMPUTE_BIT) == 0);
76
77 if (cmd_buffer->state.current_pipeline != _3D) {
78 anv_batch_emit(&cmd_buffer->batch, GEN8_PIPELINE_SELECT,
79 .PipelineSelection = _3D);
80 cmd_buffer->state.current_pipeline = _3D;
81 }
82
83 if (vb_emit) {
84 const uint32_t num_buffers = __builtin_popcount(vb_emit);
85 const uint32_t num_dwords = 1 + num_buffers * 4;
86
87 p = anv_batch_emitn(&cmd_buffer->batch, num_dwords,
88 GEN8_3DSTATE_VERTEX_BUFFERS);
89 uint32_t vb, i = 0;
90 for_each_bit(vb, vb_emit) {
91 struct anv_buffer *buffer = cmd_buffer->state.vertex_bindings[vb].buffer;
92 uint32_t offset = cmd_buffer->state.vertex_bindings[vb].offset;
93
94 struct GEN8_VERTEX_BUFFER_STATE state = {
95 .VertexBufferIndex = vb,
96 .MemoryObjectControlState = GEN8_MOCS,
97 .AddressModifyEnable = true,
98 .BufferPitch = pipeline->binding_stride[vb],
99 .BufferStartingAddress = { buffer->bo, buffer->offset + offset },
100 .BufferSize = buffer->size - offset
101 };
102
103 GEN8_VERTEX_BUFFER_STATE_pack(&cmd_buffer->batch, &p[1 + i * 4], &state);
104 i++;
105 }
106 }
107
108 if (cmd_buffer->state.dirty & ANV_CMD_BUFFER_PIPELINE_DIRTY) {
109 /* If somebody compiled a pipeline after starting a command buffer the
110 * scratch bo may have grown since we started this cmd buffer (and
111 * emitted STATE_BASE_ADDRESS). If we're binding that pipeline now,
112 * reemit STATE_BASE_ADDRESS so that we use the bigger scratch bo. */
113 if (cmd_buffer->state.scratch_size < pipeline->total_scratch)
114 anv_cmd_buffer_emit_state_base_address(cmd_buffer);
115
116 anv_batch_emit_batch(&cmd_buffer->batch, &pipeline->batch);
117 }
118
119 if (cmd_buffer->state.descriptors_dirty)
120 anv_flush_descriptor_sets(cmd_buffer);
121
122 if (cmd_buffer->state.push_constants_dirty)
123 gen8_cmd_buffer_flush_push_constants(cmd_buffer);
124
125 if (cmd_buffer->state.dirty & ANV_CMD_BUFFER_VP_DIRTY) {
126 struct anv_dynamic_vp_state *vp_state = cmd_buffer->state.vp_state;
127 anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_SCISSOR_STATE_POINTERS,
128 .ScissorRectPointer = vp_state->scissor.offset);
129 anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_VIEWPORT_STATE_POINTERS_CC,
130 .CCViewportPointer = vp_state->cc_vp.offset);
131 anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP,
132 .SFClipViewportPointer = vp_state->sf_clip_vp.offset);
133 }
134
135 if (cmd_buffer->state.dirty & (ANV_CMD_BUFFER_PIPELINE_DIRTY |
136 ANV_CMD_BUFFER_RS_DIRTY)) {
137 anv_batch_emit_merge(&cmd_buffer->batch,
138 cmd_buffer->state.rs_state->gen8.sf,
139 pipeline->gen8.sf);
140 anv_batch_emit_merge(&cmd_buffer->batch,
141 cmd_buffer->state.rs_state->gen8.raster,
142 pipeline->gen8.raster);
143 }
144
145 if (cmd_buffer->state.ds_state &&
146 (cmd_buffer->state.dirty & (ANV_CMD_BUFFER_PIPELINE_DIRTY |
147 ANV_CMD_BUFFER_DS_DIRTY))) {
148 anv_batch_emit_merge(&cmd_buffer->batch,
149 cmd_buffer->state.ds_state->gen8.wm_depth_stencil,
150 pipeline->gen8.wm_depth_stencil);
151 }
152
153 if (cmd_buffer->state.dirty & (ANV_CMD_BUFFER_CB_DIRTY |
154 ANV_CMD_BUFFER_DS_DIRTY)) {
155 struct anv_state state;
156 if (cmd_buffer->state.ds_state == NULL)
157 state = anv_cmd_buffer_emit_dynamic(cmd_buffer,
158 cmd_buffer->state.cb_state->color_calc_state,
159 GEN8_COLOR_CALC_STATE_length, 64);
160 else if (cmd_buffer->state.cb_state == NULL)
161 state = anv_cmd_buffer_emit_dynamic(cmd_buffer,
162 cmd_buffer->state.ds_state->gen8.color_calc_state,
163 GEN8_COLOR_CALC_STATE_length, 64);
164 else
165 state = anv_cmd_buffer_merge_dynamic(cmd_buffer,
166 cmd_buffer->state.ds_state->gen8.color_calc_state,
167 cmd_buffer->state.cb_state->color_calc_state,
168 GEN8_COLOR_CALC_STATE_length, 64);
169
170 anv_batch_emit(&cmd_buffer->batch,
171 GEN8_3DSTATE_CC_STATE_POINTERS,
172 .ColorCalcStatePointer = state.offset,
173 .ColorCalcStatePointerValid = true);
174 }
175
176 if (cmd_buffer->state.dirty & (ANV_CMD_BUFFER_PIPELINE_DIRTY |
177 ANV_CMD_BUFFER_INDEX_BUFFER_DIRTY)) {
178 anv_batch_emit_merge(&cmd_buffer->batch,
179 cmd_buffer->state.state_vf, pipeline->gen8.vf);
180 }
181
182 cmd_buffer->state.vb_dirty &= ~vb_emit;
183 cmd_buffer->state.dirty = 0;
184 }
185
186 void gen8_CmdDraw(
187 VkCmdBuffer cmdBuffer,
188 uint32_t firstVertex,
189 uint32_t vertexCount,
190 uint32_t firstInstance,
191 uint32_t instanceCount)
192 {
193 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer);
194
195 gen8_cmd_buffer_flush_state(cmd_buffer);
196
197 anv_batch_emit(&cmd_buffer->batch, GEN8_3DPRIMITIVE,
198 .VertexAccessType = SEQUENTIAL,
199 .VertexCountPerInstance = vertexCount,
200 .StartVertexLocation = firstVertex,
201 .InstanceCount = instanceCount,
202 .StartInstanceLocation = firstInstance,
203 .BaseVertexLocation = 0);
204 }
205
206 void gen8_CmdDrawIndexed(
207 VkCmdBuffer cmdBuffer,
208 uint32_t firstIndex,
209 uint32_t indexCount,
210 int32_t vertexOffset,
211 uint32_t firstInstance,
212 uint32_t instanceCount)
213 {
214 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer);
215
216 gen8_cmd_buffer_flush_state(cmd_buffer);
217
218 anv_batch_emit(&cmd_buffer->batch, GEN8_3DPRIMITIVE,
219 .VertexAccessType = RANDOM,
220 .VertexCountPerInstance = indexCount,
221 .StartVertexLocation = firstIndex,
222 .InstanceCount = instanceCount,
223 .StartInstanceLocation = firstInstance,
224 .BaseVertexLocation = vertexOffset);
225 }
226
227 static void
228 emit_lrm(struct anv_batch *batch,
229 uint32_t reg, struct anv_bo *bo, uint32_t offset)
230 {
231 anv_batch_emit(batch, GEN8_MI_LOAD_REGISTER_MEM,
232 .RegisterAddress = reg,
233 .MemoryAddress = { bo, offset });
234 }
235
236 static void
237 emit_lri(struct anv_batch *batch, uint32_t reg, uint32_t imm)
238 {
239 anv_batch_emit(batch, GEN8_MI_LOAD_REGISTER_IMM,
240 .RegisterOffset = reg,
241 .DataDWord = imm);
242 }
243
244 /* Auto-Draw / Indirect Registers */
245 #define GEN7_3DPRIM_END_OFFSET 0x2420
246 #define GEN7_3DPRIM_START_VERTEX 0x2430
247 #define GEN7_3DPRIM_VERTEX_COUNT 0x2434
248 #define GEN7_3DPRIM_INSTANCE_COUNT 0x2438
249 #define GEN7_3DPRIM_START_INSTANCE 0x243C
250 #define GEN7_3DPRIM_BASE_VERTEX 0x2440
251
252 void gen8_CmdDrawIndirect(
253 VkCmdBuffer cmdBuffer,
254 VkBuffer _buffer,
255 VkDeviceSize offset,
256 uint32_t count,
257 uint32_t stride)
258 {
259 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer);
260 ANV_FROM_HANDLE(anv_buffer, buffer, _buffer);
261 struct anv_bo *bo = buffer->bo;
262 uint32_t bo_offset = buffer->offset + offset;
263
264 gen8_cmd_buffer_flush_state(cmd_buffer);
265
266 emit_lrm(&cmd_buffer->batch, GEN7_3DPRIM_VERTEX_COUNT, bo, bo_offset);
267 emit_lrm(&cmd_buffer->batch, GEN7_3DPRIM_INSTANCE_COUNT, bo, bo_offset + 4);
268 emit_lrm(&cmd_buffer->batch, GEN7_3DPRIM_START_VERTEX, bo, bo_offset + 8);
269 emit_lrm(&cmd_buffer->batch, GEN7_3DPRIM_START_INSTANCE, bo, bo_offset + 12);
270 emit_lri(&cmd_buffer->batch, GEN7_3DPRIM_BASE_VERTEX, 0);
271
272 anv_batch_emit(&cmd_buffer->batch, GEN8_3DPRIMITIVE,
273 .IndirectParameterEnable = true,
274 .VertexAccessType = SEQUENTIAL);
275 }
276
277 void gen8_CmdBindIndexBuffer(
278 VkCmdBuffer cmdBuffer,
279 VkBuffer _buffer,
280 VkDeviceSize offset,
281 VkIndexType indexType)
282 {
283 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer);
284 ANV_FROM_HANDLE(anv_buffer, buffer, _buffer);
285
286 static const uint32_t vk_to_gen_index_type[] = {
287 [VK_INDEX_TYPE_UINT16] = INDEX_WORD,
288 [VK_INDEX_TYPE_UINT32] = INDEX_DWORD,
289 };
290
291 struct GEN8_3DSTATE_VF vf = {
292 GEN8_3DSTATE_VF_header,
293 .CutIndex = (indexType == VK_INDEX_TYPE_UINT16) ? UINT16_MAX : UINT32_MAX,
294 };
295 GEN8_3DSTATE_VF_pack(NULL, cmd_buffer->state.state_vf, &vf);
296
297 cmd_buffer->state.dirty |= ANV_CMD_BUFFER_INDEX_BUFFER_DIRTY;
298
299 anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_INDEX_BUFFER,
300 .IndexFormat = vk_to_gen_index_type[indexType],
301 .MemoryObjectControlState = GEN8_MOCS,
302 .BufferStartingAddress = { buffer->bo, buffer->offset + offset },
303 .BufferSize = buffer->size - offset);
304 }
305
306 static VkResult
307 gen8_flush_compute_descriptor_set(struct anv_cmd_buffer *cmd_buffer)
308 {
309 struct anv_device *device = cmd_buffer->device;
310 struct anv_pipeline *pipeline = cmd_buffer->state.compute_pipeline;
311 struct anv_state surfaces = { 0, }, samplers = { 0, };
312 VkResult result;
313
314 result = anv_cmd_buffer_emit_samplers(cmd_buffer,
315 VK_SHADER_STAGE_COMPUTE, &samplers);
316 if (result != VK_SUCCESS)
317 return result;
318 result = anv_cmd_buffer_emit_binding_table(cmd_buffer,
319 VK_SHADER_STAGE_COMPUTE, &surfaces);
320 if (result != VK_SUCCESS)
321 return result;
322
323 struct GEN8_INTERFACE_DESCRIPTOR_DATA desc = {
324 .KernelStartPointer = pipeline->cs_simd,
325 .KernelStartPointerHigh = 0,
326 .BindingTablePointer = surfaces.offset,
327 .BindingTableEntryCount = 0,
328 .SamplerStatePointer = samplers.offset,
329 .SamplerCount = 0,
330 .NumberofThreadsinGPGPUThreadGroup = 0 /* FIXME: Really? */
331 };
332
333 uint32_t size = GEN8_INTERFACE_DESCRIPTOR_DATA_length * sizeof(uint32_t);
334 struct anv_state state =
335 anv_state_pool_alloc(&device->dynamic_state_pool, size, 64);
336
337 GEN8_INTERFACE_DESCRIPTOR_DATA_pack(NULL, state.map, &desc);
338
339 anv_batch_emit(&cmd_buffer->batch, GEN8_MEDIA_INTERFACE_DESCRIPTOR_LOAD,
340 .InterfaceDescriptorTotalLength = size,
341 .InterfaceDescriptorDataStartAddress = state.offset);
342
343 return VK_SUCCESS;
344 }
345
346 static void
347 gen8_cmd_buffer_flush_compute_state(struct anv_cmd_buffer *cmd_buffer)
348 {
349 struct anv_pipeline *pipeline = cmd_buffer->state.compute_pipeline;
350 VkResult result;
351
352 assert(pipeline->active_stages == VK_SHADER_STAGE_COMPUTE_BIT);
353
354 if (cmd_buffer->state.current_pipeline != GPGPU) {
355 anv_batch_emit(&cmd_buffer->batch, GEN8_PIPELINE_SELECT,
356 .PipelineSelection = GPGPU);
357 cmd_buffer->state.current_pipeline = GPGPU;
358 }
359
360 if (cmd_buffer->state.compute_dirty & ANV_CMD_BUFFER_PIPELINE_DIRTY)
361 anv_batch_emit_batch(&cmd_buffer->batch, &pipeline->batch);
362
363 if ((cmd_buffer->state.descriptors_dirty & VK_SHADER_STAGE_COMPUTE_BIT) ||
364 (cmd_buffer->state.compute_dirty & ANV_CMD_BUFFER_PIPELINE_DIRTY)) {
365 result = gen8_flush_compute_descriptor_set(cmd_buffer);
366 assert(result == VK_SUCCESS);
367 cmd_buffer->state.descriptors_dirty &= ~VK_SHADER_STAGE_COMPUTE;
368 }
369
370 cmd_buffer->state.compute_dirty = 0;
371 }
372
373 void gen8_CmdDrawIndexedIndirect(
374 VkCmdBuffer cmdBuffer,
375 VkBuffer _buffer,
376 VkDeviceSize offset,
377 uint32_t count,
378 uint32_t stride)
379 {
380 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer);
381 ANV_FROM_HANDLE(anv_buffer, buffer, _buffer);
382 struct anv_bo *bo = buffer->bo;
383 uint32_t bo_offset = buffer->offset + offset;
384
385 gen8_cmd_buffer_flush_state(cmd_buffer);
386
387 emit_lrm(&cmd_buffer->batch, GEN7_3DPRIM_VERTEX_COUNT, bo, bo_offset);
388 emit_lrm(&cmd_buffer->batch, GEN7_3DPRIM_INSTANCE_COUNT, bo, bo_offset + 4);
389 emit_lrm(&cmd_buffer->batch, GEN7_3DPRIM_START_VERTEX, bo, bo_offset + 8);
390 emit_lrm(&cmd_buffer->batch, GEN7_3DPRIM_BASE_VERTEX, bo, bo_offset + 12);
391 emit_lrm(&cmd_buffer->batch, GEN7_3DPRIM_START_INSTANCE, bo, bo_offset + 16);
392
393 anv_batch_emit(&cmd_buffer->batch, GEN8_3DPRIMITIVE,
394 .IndirectParameterEnable = true,
395 .VertexAccessType = RANDOM);
396 }
397
398 void gen8_CmdDispatch(
399 VkCmdBuffer cmdBuffer,
400 uint32_t x,
401 uint32_t y,
402 uint32_t z)
403 {
404 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer);
405 struct anv_pipeline *pipeline = cmd_buffer->state.compute_pipeline;
406 struct brw_cs_prog_data *prog_data = &pipeline->cs_prog_data;
407
408 gen8_cmd_buffer_flush_compute_state(cmd_buffer);
409
410 anv_batch_emit(&cmd_buffer->batch, GEN8_GPGPU_WALKER,
411 .SIMDSize = prog_data->simd_size / 16,
412 .ThreadDepthCounterMaximum = 0,
413 .ThreadHeightCounterMaximum = 0,
414 .ThreadWidthCounterMaximum = pipeline->cs_thread_width_max,
415 .ThreadGroupIDXDimension = x,
416 .ThreadGroupIDYDimension = y,
417 .ThreadGroupIDZDimension = z,
418 .RightExecutionMask = pipeline->cs_right_mask,
419 .BottomExecutionMask = 0xffffffff);
420
421 anv_batch_emit(&cmd_buffer->batch, GEN8_MEDIA_STATE_FLUSH);
422 }
423
424 #define GPGPU_DISPATCHDIMX 0x2500
425 #define GPGPU_DISPATCHDIMY 0x2504
426 #define GPGPU_DISPATCHDIMZ 0x2508
427
428 void gen8_CmdDispatchIndirect(
429 VkCmdBuffer cmdBuffer,
430 VkBuffer _buffer,
431 VkDeviceSize offset)
432 {
433 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer);
434 ANV_FROM_HANDLE(anv_buffer, buffer, _buffer);
435 struct anv_pipeline *pipeline = cmd_buffer->state.compute_pipeline;
436 struct brw_cs_prog_data *prog_data = &pipeline->cs_prog_data;
437 struct anv_bo *bo = buffer->bo;
438 uint32_t bo_offset = buffer->offset + offset;
439
440 gen8_cmd_buffer_flush_compute_state(cmd_buffer);
441
442 emit_lrm(&cmd_buffer->batch, GPGPU_DISPATCHDIMX, bo, bo_offset);
443 emit_lrm(&cmd_buffer->batch, GPGPU_DISPATCHDIMY, bo, bo_offset + 4);
444 emit_lrm(&cmd_buffer->batch, GPGPU_DISPATCHDIMZ, bo, bo_offset + 8);
445
446 anv_batch_emit(&cmd_buffer->batch, GEN8_GPGPU_WALKER,
447 .IndirectParameterEnable = true,
448 .SIMDSize = prog_data->simd_size / 16,
449 .ThreadDepthCounterMaximum = 0,
450 .ThreadHeightCounterMaximum = 0,
451 .ThreadWidthCounterMaximum = pipeline->cs_thread_width_max,
452 .RightExecutionMask = pipeline->cs_right_mask,
453 .BottomExecutionMask = 0xffffffff);
454
455 anv_batch_emit(&cmd_buffer->batch, GEN8_MEDIA_STATE_FLUSH);
456 }
457
458 static void
459 gen8_cmd_buffer_emit_depth_stencil(struct anv_cmd_buffer *cmd_buffer)
460 {
461 struct anv_subpass *subpass = cmd_buffer->state.subpass;
462 struct anv_framebuffer *fb = cmd_buffer->state.framebuffer;
463 const struct anv_depth_stencil_view *view = NULL;
464
465 if (subpass->depth_stencil_attachment != VK_ATTACHMENT_UNUSED) {
466 const struct anv_attachment_view *aview =
467 fb->attachments[subpass->depth_stencil_attachment];
468 assert(aview->attachment_type == ANV_ATTACHMENT_VIEW_TYPE_DEPTH_STENCIL);
469 view = (const struct anv_depth_stencil_view *)aview;
470 }
471
472 /* FIXME: Implement the PMA stall W/A */
473 /* FIXME: Width and Height are wrong */
474
475 if (view) {
476 anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_DEPTH_BUFFER,
477 .SurfaceType = SURFTYPE_2D,
478 .DepthWriteEnable = view->depth_stride > 0,
479 .StencilWriteEnable = view->stencil_stride > 0,
480 .HierarchicalDepthBufferEnable = false,
481 .SurfaceFormat = view->depth_format,
482 .SurfacePitch = view->depth_stride > 0 ? view->depth_stride - 1 : 0,
483 .SurfaceBaseAddress = { view->bo, view->depth_offset },
484 .Height = fb->height - 1,
485 .Width = fb->width - 1,
486 .LOD = 0,
487 .Depth = 1 - 1,
488 .MinimumArrayElement = 0,
489 .DepthBufferObjectControlState = GEN8_MOCS,
490 .RenderTargetViewExtent = 1 - 1,
491 .SurfaceQPitch = view->depth_qpitch >> 2);
492
493 anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_STENCIL_BUFFER,
494 .StencilBufferEnable = view->stencil_stride > 0,
495 .StencilBufferObjectControlState = GEN8_MOCS,
496 .SurfacePitch = view->stencil_stride > 0 ? view->stencil_stride - 1 : 0,
497 .SurfaceBaseAddress = { view->bo, view->stencil_offset },
498 .SurfaceQPitch = view->stencil_qpitch >> 2);
499 } else {
500 /* Even when no depth buffer is present, the hardware requires that
501 * 3DSTATE_DEPTH_BUFFER be programmed correctly. The Broadwell PRM says:
502 *
503 * If a null depth buffer is bound, the driver must instead bind depth as:
504 * 3DSTATE_DEPTH.SurfaceType = SURFTYPE_2D
505 * 3DSTATE_DEPTH.Width = 1
506 * 3DSTATE_DEPTH.Height = 1
507 * 3DSTATE_DEPTH.SuraceFormat = D16_UNORM
508 * 3DSTATE_DEPTH.SurfaceBaseAddress = 0
509 * 3DSTATE_DEPTH.HierarchicalDepthBufferEnable = 0
510 * 3DSTATE_WM_DEPTH_STENCIL.DepthTestEnable = 0
511 * 3DSTATE_WM_DEPTH_STENCIL.DepthBufferWriteEnable = 0
512 *
513 * The PRM is wrong, though. The width and height must be programmed to
514 * actual framebuffer's width and height.
515 */
516 anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_DEPTH_BUFFER,
517 .SurfaceType = SURFTYPE_2D,
518 .SurfaceFormat = D16_UNORM,
519 .Width = fb->width - 1,
520 .Height = fb->height - 1);
521
522 /* Disable the stencil buffer. */
523 anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_STENCIL_BUFFER);
524 }
525
526 /* Disable hierarchial depth buffers. */
527 anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_HIER_DEPTH_BUFFER);
528
529 /* Clear the clear params. */
530 anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_CLEAR_PARAMS);
531 }
532
533 void
534 gen8_cmd_buffer_begin_subpass(struct anv_cmd_buffer *cmd_buffer,
535 struct anv_subpass *subpass)
536 {
537 cmd_buffer->state.subpass = subpass;
538
539 cmd_buffer->state.descriptors_dirty |= VK_SHADER_STAGE_FRAGMENT_BIT;
540
541 gen8_cmd_buffer_emit_depth_stencil(cmd_buffer);
542 }
543
544 void gen8_CmdBeginRenderPass(
545 VkCmdBuffer cmdBuffer,
546 const VkRenderPassBeginInfo* pRenderPassBegin,
547 VkRenderPassContents contents)
548 {
549 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer);
550 ANV_FROM_HANDLE(anv_render_pass, pass, pRenderPassBegin->renderPass);
551 ANV_FROM_HANDLE(anv_framebuffer, framebuffer, pRenderPassBegin->framebuffer);
552
553 cmd_buffer->state.framebuffer = framebuffer;
554 cmd_buffer->state.pass = pass;
555
556 const VkRect2D *render_area = &pRenderPassBegin->renderArea;
557
558 anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_DRAWING_RECTANGLE,
559 .ClippedDrawingRectangleYMin = render_area->offset.y,
560 .ClippedDrawingRectangleXMin = render_area->offset.x,
561 .ClippedDrawingRectangleYMax =
562 render_area->offset.y + render_area->extent.height - 1,
563 .ClippedDrawingRectangleXMax =
564 render_area->offset.x + render_area->extent.width - 1,
565 .DrawingRectangleOriginY = 0,
566 .DrawingRectangleOriginX = 0);
567
568 anv_cmd_buffer_clear_attachments(cmd_buffer, pass,
569 pRenderPassBegin->pAttachmentClearValues);
570
571 gen8_cmd_buffer_begin_subpass(cmd_buffer, pass->subpasses);
572 }
573
574 void gen8_CmdNextSubpass(
575 VkCmdBuffer cmdBuffer,
576 VkRenderPassContents contents)
577 {
578 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer);
579
580 assert(cmd_buffer->level == VK_CMD_BUFFER_LEVEL_PRIMARY);
581
582 gen8_cmd_buffer_begin_subpass(cmd_buffer, cmd_buffer->state.subpass + 1);
583 }
584
585 void gen8_CmdEndRenderPass(
586 VkCmdBuffer cmdBuffer)
587 {
588 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer);
589
590 /* Emit a flushing pipe control at the end of a pass. This is kind of a
591 * hack but it ensures that render targets always actually get written.
592 * Eventually, we should do flushing based on image format transitions
593 * or something of that nature.
594 */
595 anv_batch_emit(&cmd_buffer->batch, GEN8_PIPE_CONTROL,
596 .PostSyncOperation = NoWrite,
597 .RenderTargetCacheFlushEnable = true,
598 .InstructionCacheInvalidateEnable = true,
599 .DepthCacheFlushEnable = true,
600 .VFCacheInvalidationEnable = true,
601 .TextureCacheInvalidationEnable = true,
602 .CommandStreamerStallEnable = true);
603 }
604
605 static void
606 emit_ps_depth_count(struct anv_batch *batch,
607 struct anv_bo *bo, uint32_t offset)
608 {
609 anv_batch_emit(batch, GEN8_PIPE_CONTROL,
610 .DestinationAddressType = DAT_PPGTT,
611 .PostSyncOperation = WritePSDepthCount,
612 .Address = { bo, offset }); /* FIXME: This is only lower 32 bits */
613 }
614
615 void gen8_CmdBeginQuery(
616 VkCmdBuffer cmdBuffer,
617 VkQueryPool queryPool,
618 uint32_t slot,
619 VkQueryControlFlags flags)
620 {
621 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer);
622 ANV_FROM_HANDLE(anv_query_pool, pool, queryPool);
623
624 switch (pool->type) {
625 case VK_QUERY_TYPE_OCCLUSION:
626 emit_ps_depth_count(&cmd_buffer->batch, &pool->bo,
627 slot * sizeof(struct anv_query_pool_slot));
628 break;
629
630 case VK_QUERY_TYPE_PIPELINE_STATISTICS:
631 default:
632 unreachable("");
633 }
634 }
635
636 void gen8_CmdEndQuery(
637 VkCmdBuffer cmdBuffer,
638 VkQueryPool queryPool,
639 uint32_t slot)
640 {
641 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer);
642 ANV_FROM_HANDLE(anv_query_pool, pool, queryPool);
643
644 switch (pool->type) {
645 case VK_QUERY_TYPE_OCCLUSION:
646 emit_ps_depth_count(&cmd_buffer->batch, &pool->bo,
647 slot * sizeof(struct anv_query_pool_slot) + 8);
648 break;
649
650 case VK_QUERY_TYPE_PIPELINE_STATISTICS:
651 default:
652 unreachable("");
653 }
654 }
655
656 #define TIMESTAMP 0x2358
657
658 void gen8_CmdWriteTimestamp(
659 VkCmdBuffer cmdBuffer,
660 VkTimestampType timestampType,
661 VkBuffer destBuffer,
662 VkDeviceSize destOffset)
663 {
664 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer);
665 ANV_FROM_HANDLE(anv_buffer, buffer, destBuffer);
666 struct anv_bo *bo = buffer->bo;
667
668 switch (timestampType) {
669 case VK_TIMESTAMP_TYPE_TOP:
670 anv_batch_emit(&cmd_buffer->batch, GEN8_MI_STORE_REGISTER_MEM,
671 .RegisterAddress = TIMESTAMP,
672 .MemoryAddress = { bo, buffer->offset + destOffset });
673 anv_batch_emit(&cmd_buffer->batch, GEN8_MI_STORE_REGISTER_MEM,
674 .RegisterAddress = TIMESTAMP + 4,
675 .MemoryAddress = { bo, buffer->offset + destOffset + 4 });
676 break;
677
678 case VK_TIMESTAMP_TYPE_BOTTOM:
679 anv_batch_emit(&cmd_buffer->batch, GEN8_PIPE_CONTROL,
680 .DestinationAddressType = DAT_PPGTT,
681 .PostSyncOperation = WriteTimestamp,
682 .Address = /* FIXME: This is only lower 32 bits */
683 { bo, buffer->offset + destOffset });
684 break;
685
686 default:
687 break;
688 }
689 }
690
691 #define alu_opcode(v) __gen_field((v), 20, 31)
692 #define alu_operand1(v) __gen_field((v), 10, 19)
693 #define alu_operand2(v) __gen_field((v), 0, 9)
694 #define alu(opcode, operand1, operand2) \
695 alu_opcode(opcode) | alu_operand1(operand1) | alu_operand2(operand2)
696
697 #define OPCODE_NOOP 0x000
698 #define OPCODE_LOAD 0x080
699 #define OPCODE_LOADINV 0x480
700 #define OPCODE_LOAD0 0x081
701 #define OPCODE_LOAD1 0x481
702 #define OPCODE_ADD 0x100
703 #define OPCODE_SUB 0x101
704 #define OPCODE_AND 0x102
705 #define OPCODE_OR 0x103
706 #define OPCODE_XOR 0x104
707 #define OPCODE_STORE 0x180
708 #define OPCODE_STOREINV 0x580
709
710 #define OPERAND_R0 0x00
711 #define OPERAND_R1 0x01
712 #define OPERAND_R2 0x02
713 #define OPERAND_R3 0x03
714 #define OPERAND_R4 0x04
715 #define OPERAND_SRCA 0x20
716 #define OPERAND_SRCB 0x21
717 #define OPERAND_ACCU 0x31
718 #define OPERAND_ZF 0x32
719 #define OPERAND_CF 0x33
720
721 #define CS_GPR(n) (0x2600 + (n) * 8)
722
723 static void
724 emit_load_alu_reg_u64(struct anv_batch *batch, uint32_t reg,
725 struct anv_bo *bo, uint32_t offset)
726 {
727 anv_batch_emit(batch, GEN8_MI_LOAD_REGISTER_MEM,
728 .RegisterAddress = reg,
729 .MemoryAddress = { bo, offset });
730 anv_batch_emit(batch, GEN8_MI_LOAD_REGISTER_MEM,
731 .RegisterAddress = reg + 4,
732 .MemoryAddress = { bo, offset + 4 });
733 }
734
735 void gen8_CmdCopyQueryPoolResults(
736 VkCmdBuffer cmdBuffer,
737 VkQueryPool queryPool,
738 uint32_t startQuery,
739 uint32_t queryCount,
740 VkBuffer destBuffer,
741 VkDeviceSize destOffset,
742 VkDeviceSize destStride,
743 VkQueryResultFlags flags)
744 {
745 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer);
746 ANV_FROM_HANDLE(anv_query_pool, pool, queryPool);
747 ANV_FROM_HANDLE(anv_buffer, buffer, destBuffer);
748 uint32_t slot_offset, dst_offset;
749
750 if (flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT) {
751 /* Where is the availabilty info supposed to go? */
752 anv_finishme("VK_QUERY_RESULT_WITH_AVAILABILITY_BIT");
753 return;
754 }
755
756 assert(pool->type == VK_QUERY_TYPE_OCCLUSION);
757
758 /* FIXME: If we're not waiting, should we just do this on the CPU? */
759 if (flags & VK_QUERY_RESULT_WAIT_BIT)
760 anv_batch_emit(&cmd_buffer->batch, GEN8_PIPE_CONTROL,
761 .CommandStreamerStallEnable = true,
762 .StallAtPixelScoreboard = true);
763
764 dst_offset = buffer->offset + destOffset;
765 for (uint32_t i = 0; i < queryCount; i++) {
766
767 slot_offset = (startQuery + i) * sizeof(struct anv_query_pool_slot);
768
769 emit_load_alu_reg_u64(&cmd_buffer->batch, CS_GPR(0), &pool->bo, slot_offset);
770 emit_load_alu_reg_u64(&cmd_buffer->batch, CS_GPR(1), &pool->bo, slot_offset + 8);
771
772 /* FIXME: We need to clamp the result for 32 bit. */
773
774 uint32_t *dw = anv_batch_emitn(&cmd_buffer->batch, 5, GEN8_MI_MATH);
775 dw[1] = alu(OPCODE_LOAD, OPERAND_SRCA, OPERAND_R1);
776 dw[2] = alu(OPCODE_LOAD, OPERAND_SRCB, OPERAND_R0);
777 dw[3] = alu(OPCODE_SUB, 0, 0);
778 dw[4] = alu(OPCODE_STORE, OPERAND_R2, OPERAND_ACCU);
779
780 anv_batch_emit(&cmd_buffer->batch, GEN8_MI_STORE_REGISTER_MEM,
781 .RegisterAddress = CS_GPR(2),
782 /* FIXME: This is only lower 32 bits */
783 .MemoryAddress = { buffer->bo, dst_offset });
784
785 if (flags & VK_QUERY_RESULT_64_BIT)
786 anv_batch_emit(&cmd_buffer->batch, GEN8_MI_STORE_REGISTER_MEM,
787 .RegisterAddress = CS_GPR(2) + 4,
788 /* FIXME: This is only lower 32 bits */
789 .MemoryAddress = { buffer->bo, dst_offset + 4 });
790
791 dst_offset += destStride;
792 }
793 }
794
795 void
796 gen8_cmd_buffer_emit_state_base_address(struct anv_cmd_buffer *cmd_buffer)
797 {
798 struct anv_device *device = cmd_buffer->device;
799 struct anv_bo *scratch_bo = NULL;
800
801 cmd_buffer->state.scratch_size =
802 anv_block_pool_size(&device->scratch_block_pool);
803 if (cmd_buffer->state.scratch_size > 0)
804 scratch_bo = &device->scratch_block_pool.bo;
805
806 anv_batch_emit(&cmd_buffer->batch, GEN8_STATE_BASE_ADDRESS,
807 .GeneralStateBaseAddress = { scratch_bo, 0 },
808 .GeneralStateMemoryObjectControlState = GEN8_MOCS,
809 .GeneralStateBaseAddressModifyEnable = true,
810 .GeneralStateBufferSize = 0xfffff,
811 .GeneralStateBufferSizeModifyEnable = true,
812
813 .SurfaceStateBaseAddress = { anv_cmd_buffer_current_surface_bo(cmd_buffer), 0 },
814 .SurfaceStateMemoryObjectControlState = GEN8_MOCS,
815 .SurfaceStateBaseAddressModifyEnable = true,
816
817 .DynamicStateBaseAddress = { &device->dynamic_state_block_pool.bo, 0 },
818 .DynamicStateMemoryObjectControlState = GEN8_MOCS,
819 .DynamicStateBaseAddressModifyEnable = true,
820 .DynamicStateBufferSize = 0xfffff,
821 .DynamicStateBufferSizeModifyEnable = true,
822
823 .IndirectObjectBaseAddress = { NULL, 0 },
824 .IndirectObjectMemoryObjectControlState = GEN8_MOCS,
825 .IndirectObjectBaseAddressModifyEnable = true,
826 .IndirectObjectBufferSize = 0xfffff,
827 .IndirectObjectBufferSizeModifyEnable = true,
828
829 .InstructionBaseAddress = { &device->instruction_block_pool.bo, 0 },
830 .InstructionMemoryObjectControlState = GEN8_MOCS,
831 .InstructionBaseAddressModifyEnable = true,
832 .InstructionBufferSize = 0xfffff,
833 .InstructionBuffersizeModifyEnable = true);
834
835 /* After re-setting the surface state base address, we have to do some
836 * cache flusing so that the sampler engine will pick up the new
837 * SURFACE_STATE objects and binding tables. From the Broadwell PRM,
838 * Shared Function > 3D Sampler > State > State Caching (page 96):
839 *
840 * Coherency with system memory in the state cache, like the texture
841 * cache is handled partially by software. It is expected that the
842 * command stream or shader will issue Cache Flush operation or
843 * Cache_Flush sampler message to ensure that the L1 cache remains
844 * coherent with system memory.
845 *
846 * [...]
847 *
848 * Whenever the value of the Dynamic_State_Base_Addr,
849 * Surface_State_Base_Addr are altered, the L1 state cache must be
850 * invalidated to ensure the new surface or sampler state is fetched
851 * from system memory.
852 *
853 * The PIPE_CONTROL command has a "State Cache Invalidation Enable" bit
854 * which, according the PIPE_CONTROL instruction documentation in the
855 * Broadwell PRM:
856 *
857 * Setting this bit is independent of any other bit in this packet.
858 * This bit controls the invalidation of the L1 and L2 state caches
859 * at the top of the pipe i.e. at the parsing time.
860 *
861 * Unfortunately, experimentation seems to indicate that state cache
862 * invalidation through a PIPE_CONTROL does nothing whatsoever in
863 * regards to surface state and binding tables. In stead, it seems that
864 * invalidating the texture cache is what is actually needed.
865 *
866 * XXX: As far as we have been able to determine through
867 * experimentation, shows that flush the texture cache appears to be
868 * sufficient. The theory here is that all of the sampling/rendering
869 * units cache the binding table in the texture cache. However, we have
870 * yet to be able to actually confirm this.
871 */
872 anv_batch_emit(&cmd_buffer->batch, GEN8_PIPE_CONTROL,
873 .TextureCacheInvalidationEnable = true);
874 }
875
876 void gen8_CmdPipelineBarrier(
877 VkCmdBuffer cmdBuffer,
878 VkPipelineStageFlags srcStageMask,
879 VkPipelineStageFlags destStageMask,
880 VkBool32 byRegion,
881 uint32_t memBarrierCount,
882 const void* const* ppMemBarriers)
883 {
884 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, cmdBuffer);
885 uint32_t b, *dw;
886
887 struct GEN8_PIPE_CONTROL cmd = {
888 GEN8_PIPE_CONTROL_header,
889 .PostSyncOperation = NoWrite,
890 };
891
892 /* XXX: I think waitEvent is a no-op on our HW. We should verify that. */
893
894 if (anv_clear_mask(&srcStageMask, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT)) {
895 /* This is just what PIPE_CONTROL does */
896 }
897
898 if (anv_clear_mask(&srcStageMask,
899 VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT |
900 VK_PIPELINE_STAGE_VERTEX_INPUT_BIT |
901 VK_PIPELINE_STAGE_VERTEX_SHADER_BIT |
902 VK_PIPELINE_STAGE_TESS_CONTROL_SHADER_BIT |
903 VK_PIPELINE_STAGE_TESS_EVALUATION_SHADER_BIT |
904 VK_PIPELINE_STAGE_GEOMETRY_SHADER_BIT |
905 VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT |
906 VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT |
907 VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT |
908 VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT)) {
909 cmd.StallAtPixelScoreboard = true;
910 }
911
912
913 if (anv_clear_mask(&srcStageMask,
914 VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT |
915 VK_PIPELINE_STAGE_TRANSFER_BIT |
916 VK_PIPELINE_STAGE_TRANSITION_BIT)) {
917 cmd.CommandStreamerStallEnable = true;
918 }
919
920 if (anv_clear_mask(&srcStageMask, VK_PIPELINE_STAGE_HOST_BIT)) {
921 anv_finishme("VK_PIPE_EVENT_CPU_SIGNAL_BIT");
922 }
923
924 /* On our hardware, all stages will wait for execution as needed. */
925 (void)destStageMask;
926
927 /* We checked all known VkPipeEventFlags. */
928 anv_assert(srcStageMask == 0);
929
930 /* XXX: Right now, we're really dumb and just flush whatever categories
931 * the app asks for. One of these days we may make this a bit better
932 * but right now that's all the hardware allows for in most areas.
933 */
934 VkMemoryOutputFlags out_flags = 0;
935 VkMemoryInputFlags in_flags = 0;
936
937 for (uint32_t i = 0; i < memBarrierCount; i++) {
938 const struct anv_common *common = ppMemBarriers[i];
939 switch (common->sType) {
940 case VK_STRUCTURE_TYPE_MEMORY_BARRIER: {
941 ANV_COMMON_TO_STRUCT(VkMemoryBarrier, barrier, common);
942 out_flags |= barrier->outputMask;
943 in_flags |= barrier->inputMask;
944 break;
945 }
946 case VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER: {
947 ANV_COMMON_TO_STRUCT(VkBufferMemoryBarrier, barrier, common);
948 out_flags |= barrier->outputMask;
949 in_flags |= barrier->inputMask;
950 break;
951 }
952 case VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER: {
953 ANV_COMMON_TO_STRUCT(VkImageMemoryBarrier, barrier, common);
954 out_flags |= barrier->outputMask;
955 in_flags |= barrier->inputMask;
956 break;
957 }
958 default:
959 unreachable("Invalid memory barrier type");
960 }
961 }
962
963 for_each_bit(b, out_flags) {
964 switch ((VkMemoryOutputFlags)(1 << b)) {
965 case VK_MEMORY_OUTPUT_HOST_WRITE_BIT:
966 break; /* FIXME: Little-core systems */
967 case VK_MEMORY_OUTPUT_SHADER_WRITE_BIT:
968 cmd.DCFlushEnable = true;
969 break;
970 case VK_MEMORY_OUTPUT_COLOR_ATTACHMENT_BIT:
971 cmd.RenderTargetCacheFlushEnable = true;
972 break;
973 case VK_MEMORY_OUTPUT_DEPTH_STENCIL_ATTACHMENT_BIT:
974 cmd.DepthCacheFlushEnable = true;
975 break;
976 case VK_MEMORY_OUTPUT_TRANSFER_BIT:
977 cmd.RenderTargetCacheFlushEnable = true;
978 cmd.DepthCacheFlushEnable = true;
979 break;
980 default:
981 unreachable("Invalid memory output flag");
982 }
983 }
984
985 for_each_bit(b, out_flags) {
986 switch ((VkMemoryInputFlags)(1 << b)) {
987 case VK_MEMORY_INPUT_HOST_READ_BIT:
988 break; /* FIXME: Little-core systems */
989 case VK_MEMORY_INPUT_INDIRECT_COMMAND_BIT:
990 case VK_MEMORY_INPUT_INDEX_FETCH_BIT:
991 case VK_MEMORY_INPUT_VERTEX_ATTRIBUTE_FETCH_BIT:
992 cmd.VFCacheInvalidationEnable = true;
993 break;
994 case VK_MEMORY_INPUT_UNIFORM_READ_BIT:
995 cmd.ConstantCacheInvalidationEnable = true;
996 /* fallthrough */
997 case VK_MEMORY_INPUT_SHADER_READ_BIT:
998 cmd.DCFlushEnable = true;
999 cmd.TextureCacheInvalidationEnable = true;
1000 break;
1001 case VK_MEMORY_INPUT_COLOR_ATTACHMENT_BIT:
1002 case VK_MEMORY_INPUT_DEPTH_STENCIL_ATTACHMENT_BIT:
1003 break; /* XXX: Hunh? */
1004 case VK_MEMORY_INPUT_TRANSFER_BIT:
1005 cmd.TextureCacheInvalidationEnable = true;
1006 break;
1007 }
1008 }
1009
1010 dw = anv_batch_emit_dwords(&cmd_buffer->batch, GEN8_PIPE_CONTROL_length);
1011 GEN8_PIPE_CONTROL_pack(&cmd_buffer->batch, dw, &cmd);
1012 }