2 * Copyright © 2015 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
30 #include "anv_private.h"
33 gen8_cmd_buffer_flush_push_constants(struct anv_cmd_buffer
*cmd_buffer
)
35 static const uint32_t push_constant_opcodes
[] = {
36 [VK_SHADER_STAGE_VERTEX
] = 21,
37 [VK_SHADER_STAGE_TESS_CONTROL
] = 25, /* HS */
38 [VK_SHADER_STAGE_TESS_EVALUATION
] = 26, /* DS */
39 [VK_SHADER_STAGE_GEOMETRY
] = 22,
40 [VK_SHADER_STAGE_FRAGMENT
] = 23,
41 [VK_SHADER_STAGE_COMPUTE
] = 0,
45 VkShaderStageFlags flushed
= 0;
47 for_each_bit(stage
, cmd_buffer
->state
.push_constants_dirty
) {
48 struct anv_state state
= anv_cmd_buffer_push_constants(cmd_buffer
, stage
);
50 if (state
.offset
== 0)
53 anv_batch_emit(&cmd_buffer
->batch
, GEN8_3DSTATE_CONSTANT_VS
,
54 ._3DCommandSubOpcode
= push_constant_opcodes
[stage
],
56 .PointerToConstantBuffer0
= { .offset
= state
.offset
},
57 .ConstantBuffer0ReadLength
= DIV_ROUND_UP(state
.alloc_size
, 32),
60 flushed
|= 1 << stage
;
63 cmd_buffer
->state
.push_constants_dirty
&= ~flushed
;
67 gen8_cmd_buffer_flush_state(struct anv_cmd_buffer
*cmd_buffer
)
69 struct anv_pipeline
*pipeline
= cmd_buffer
->state
.pipeline
;
72 uint32_t vb_emit
= cmd_buffer
->state
.vb_dirty
& pipeline
->vb_used
;
74 assert((pipeline
->active_stages
& VK_SHADER_STAGE_COMPUTE_BIT
) == 0);
76 if (cmd_buffer
->state
.current_pipeline
!= _3D
) {
77 anv_batch_emit(&cmd_buffer
->batch
, GEN8_PIPELINE_SELECT
,
78 .PipelineSelection
= _3D
);
79 cmd_buffer
->state
.current_pipeline
= _3D
;
83 const uint32_t num_buffers
= __builtin_popcount(vb_emit
);
84 const uint32_t num_dwords
= 1 + num_buffers
* 4;
86 p
= anv_batch_emitn(&cmd_buffer
->batch
, num_dwords
,
87 GEN8_3DSTATE_VERTEX_BUFFERS
);
89 for_each_bit(vb
, vb_emit
) {
90 struct anv_buffer
*buffer
= cmd_buffer
->state
.vertex_bindings
[vb
].buffer
;
91 uint32_t offset
= cmd_buffer
->state
.vertex_bindings
[vb
].offset
;
93 struct GEN8_VERTEX_BUFFER_STATE state
= {
94 .VertexBufferIndex
= vb
,
95 .MemoryObjectControlState
= GEN8_MOCS
,
96 .AddressModifyEnable
= true,
97 .BufferPitch
= pipeline
->binding_stride
[vb
],
98 .BufferStartingAddress
= { buffer
->bo
, buffer
->offset
+ offset
},
99 .BufferSize
= buffer
->size
- offset
102 GEN8_VERTEX_BUFFER_STATE_pack(&cmd_buffer
->batch
, &p
[1 + i
* 4], &state
);
107 if (cmd_buffer
->state
.dirty
& ANV_CMD_DIRTY_PIPELINE
) {
108 /* If somebody compiled a pipeline after starting a command buffer the
109 * scratch bo may have grown since we started this cmd buffer (and
110 * emitted STATE_BASE_ADDRESS). If we're binding that pipeline now,
111 * reemit STATE_BASE_ADDRESS so that we use the bigger scratch bo. */
112 if (cmd_buffer
->state
.scratch_size
< pipeline
->total_scratch
)
113 anv_cmd_buffer_emit_state_base_address(cmd_buffer
);
115 anv_batch_emit_batch(&cmd_buffer
->batch
, &pipeline
->batch
);
118 if (cmd_buffer
->state
.descriptors_dirty
)
119 anv_flush_descriptor_sets(cmd_buffer
);
121 if (cmd_buffer
->state
.push_constants_dirty
)
122 gen8_cmd_buffer_flush_push_constants(cmd_buffer
);
124 if (cmd_buffer
->state
.dirty
& ANV_CMD_DIRTY_DYNAMIC_VIEWPORT
)
125 anv_cmd_buffer_emit_viewport(cmd_buffer
);
127 if (cmd_buffer
->state
.dirty
& ANV_CMD_DIRTY_DYNAMIC_SCISSOR
)
128 anv_cmd_buffer_emit_scissor(cmd_buffer
);
130 if (cmd_buffer
->state
.dirty
& (ANV_CMD_DIRTY_PIPELINE
|
131 ANV_CMD_DIRTY_DYNAMIC_LINE_WIDTH
)) {
132 uint32_t sf_dw
[GEN8_3DSTATE_SF_length
];
133 struct GEN8_3DSTATE_SF sf
= {
134 GEN8_3DSTATE_SF_header
,
135 .LineWidth
= cmd_buffer
->state
.dynamic
.line_width
,
137 GEN8_3DSTATE_SF_pack(NULL
, sf_dw
, &sf
);
138 anv_batch_emit_merge(&cmd_buffer
->batch
, sf_dw
, pipeline
->gen8
.sf
);
141 if (cmd_buffer
->state
.dirty
& (ANV_CMD_DIRTY_PIPELINE
|
142 ANV_CMD_DIRTY_DYNAMIC_DEPTH_BIAS
)){
143 bool enable_bias
= cmd_buffer
->state
.dynamic
.depth_bias
.bias
!= 0.0f
||
144 cmd_buffer
->state
.dynamic
.depth_bias
.slope_scaled
!= 0.0f
;
146 uint32_t raster_dw
[GEN8_3DSTATE_RASTER_length
];
147 struct GEN8_3DSTATE_RASTER raster
= {
148 GEN8_3DSTATE_RASTER_header
,
149 .GlobalDepthOffsetEnableSolid
= enable_bias
,
150 .GlobalDepthOffsetEnableWireframe
= enable_bias
,
151 .GlobalDepthOffsetEnablePoint
= enable_bias
,
152 .GlobalDepthOffsetConstant
= cmd_buffer
->state
.dynamic
.depth_bias
.bias
,
153 .GlobalDepthOffsetScale
= cmd_buffer
->state
.dynamic
.depth_bias
.slope_scaled
,
154 .GlobalDepthOffsetClamp
= cmd_buffer
->state
.dynamic
.depth_bias
.clamp
156 GEN8_3DSTATE_RASTER_pack(NULL
, raster_dw
, &raster
);
157 anv_batch_emit_merge(&cmd_buffer
->batch
, raster_dw
,
158 pipeline
->gen8
.raster
);
161 if (cmd_buffer
->state
.dirty
& (ANV_CMD_DIRTY_DYNAMIC_BLEND_CONSTANTS
|
162 ANV_CMD_DIRTY_DYNAMIC_STENCIL_REFERENCE
)) {
163 struct anv_state cc_state
=
164 anv_cmd_buffer_alloc_dynamic_state(cmd_buffer
,
165 GEN8_COLOR_CALC_STATE_length
, 64);
166 struct GEN8_COLOR_CALC_STATE cc
= {
167 .BlendConstantColorRed
= cmd_buffer
->state
.dynamic
.blend_constants
[0],
168 .BlendConstantColorGreen
= cmd_buffer
->state
.dynamic
.blend_constants
[1],
169 .BlendConstantColorBlue
= cmd_buffer
->state
.dynamic
.blend_constants
[2],
170 .BlendConstantColorAlpha
= cmd_buffer
->state
.dynamic
.blend_constants
[3],
171 .StencilReferenceValue
=
172 cmd_buffer
->state
.dynamic
.stencil_reference
.front
,
173 .BackFaceStencilReferenceValue
=
174 cmd_buffer
->state
.dynamic
.stencil_reference
.back
,
176 GEN8_COLOR_CALC_STATE_pack(NULL
, cc_state
.map
, &cc
);
178 anv_batch_emit(&cmd_buffer
->batch
,
179 GEN8_3DSTATE_CC_STATE_POINTERS
,
180 .ColorCalcStatePointer
= cc_state
.offset
,
181 .ColorCalcStatePointerValid
= true);
184 if (cmd_buffer
->state
.dirty
& (ANV_CMD_DIRTY_PIPELINE
|
185 ANV_CMD_DIRTY_DYNAMIC_STENCIL_COMPARE_MASK
|
186 ANV_CMD_DIRTY_DYNAMIC_STENCIL_WRITE_MASK
)) {
187 uint32_t wm_depth_stencil_dw
[GEN8_3DSTATE_WM_DEPTH_STENCIL_length
];
189 struct GEN8_3DSTATE_WM_DEPTH_STENCIL wm_depth_stencil
= {
190 GEN8_3DSTATE_WM_DEPTH_STENCIL_header
,
192 /* Is this what we need to do? */
193 .StencilBufferWriteEnable
=
194 cmd_buffer
->state
.dynamic
.stencil_write_mask
.front
!= 0,
197 cmd_buffer
->state
.dynamic
.stencil_compare_mask
.front
& 0xff,
199 cmd_buffer
->state
.dynamic
.stencil_write_mask
.front
& 0xff,
201 .BackfaceStencilTestMask
=
202 cmd_buffer
->state
.dynamic
.stencil_compare_mask
.back
& 0xff,
203 .BackfaceStencilWriteMask
=
204 cmd_buffer
->state
.dynamic
.stencil_write_mask
.back
& 0xff,
206 GEN8_3DSTATE_WM_DEPTH_STENCIL_pack(NULL
, wm_depth_stencil_dw
,
209 anv_batch_emit_merge(&cmd_buffer
->batch
, wm_depth_stencil_dw
,
210 pipeline
->gen8
.wm_depth_stencil
);
213 if (cmd_buffer
->state
.dirty
& (ANV_CMD_DIRTY_PIPELINE
|
214 ANV_CMD_DIRTY_INDEX_BUFFER
)) {
215 anv_batch_emit_merge(&cmd_buffer
->batch
,
216 cmd_buffer
->state
.state_vf
, pipeline
->gen8
.vf
);
219 cmd_buffer
->state
.vb_dirty
&= ~vb_emit
;
220 cmd_buffer
->state
.dirty
= 0;
224 VkCmdBuffer cmdBuffer
,
225 uint32_t vertexCount
,
226 uint32_t instanceCount
,
227 uint32_t firstVertex
,
228 uint32_t firstInstance
)
230 ANV_FROM_HANDLE(anv_cmd_buffer
, cmd_buffer
, cmdBuffer
);
232 gen8_cmd_buffer_flush_state(cmd_buffer
);
234 anv_batch_emit(&cmd_buffer
->batch
, GEN8_3DPRIMITIVE
,
235 .VertexAccessType
= SEQUENTIAL
,
236 .VertexCountPerInstance
= vertexCount
,
237 .StartVertexLocation
= firstVertex
,
238 .InstanceCount
= instanceCount
,
239 .StartInstanceLocation
= firstInstance
,
240 .BaseVertexLocation
= 0);
243 void gen8_CmdDrawIndexed(
244 VkCmdBuffer cmdBuffer
,
246 uint32_t instanceCount
,
248 int32_t vertexOffset
,
249 uint32_t firstInstance
)
251 ANV_FROM_HANDLE(anv_cmd_buffer
, cmd_buffer
, cmdBuffer
);
253 gen8_cmd_buffer_flush_state(cmd_buffer
);
255 anv_batch_emit(&cmd_buffer
->batch
, GEN8_3DPRIMITIVE
,
256 .VertexAccessType
= RANDOM
,
257 .VertexCountPerInstance
= indexCount
,
258 .StartVertexLocation
= firstIndex
,
259 .InstanceCount
= instanceCount
,
260 .StartInstanceLocation
= firstInstance
,
261 .BaseVertexLocation
= vertexOffset
);
265 emit_lrm(struct anv_batch
*batch
,
266 uint32_t reg
, struct anv_bo
*bo
, uint32_t offset
)
268 anv_batch_emit(batch
, GEN8_MI_LOAD_REGISTER_MEM
,
269 .RegisterAddress
= reg
,
270 .MemoryAddress
= { bo
, offset
});
274 emit_lri(struct anv_batch
*batch
, uint32_t reg
, uint32_t imm
)
276 anv_batch_emit(batch
, GEN8_MI_LOAD_REGISTER_IMM
,
277 .RegisterOffset
= reg
,
281 /* Auto-Draw / Indirect Registers */
282 #define GEN7_3DPRIM_END_OFFSET 0x2420
283 #define GEN7_3DPRIM_START_VERTEX 0x2430
284 #define GEN7_3DPRIM_VERTEX_COUNT 0x2434
285 #define GEN7_3DPRIM_INSTANCE_COUNT 0x2438
286 #define GEN7_3DPRIM_START_INSTANCE 0x243C
287 #define GEN7_3DPRIM_BASE_VERTEX 0x2440
289 void gen8_CmdDrawIndirect(
290 VkCmdBuffer cmdBuffer
,
296 ANV_FROM_HANDLE(anv_cmd_buffer
, cmd_buffer
, cmdBuffer
);
297 ANV_FROM_HANDLE(anv_buffer
, buffer
, _buffer
);
298 struct anv_bo
*bo
= buffer
->bo
;
299 uint32_t bo_offset
= buffer
->offset
+ offset
;
301 gen8_cmd_buffer_flush_state(cmd_buffer
);
303 emit_lrm(&cmd_buffer
->batch
, GEN7_3DPRIM_VERTEX_COUNT
, bo
, bo_offset
);
304 emit_lrm(&cmd_buffer
->batch
, GEN7_3DPRIM_INSTANCE_COUNT
, bo
, bo_offset
+ 4);
305 emit_lrm(&cmd_buffer
->batch
, GEN7_3DPRIM_START_VERTEX
, bo
, bo_offset
+ 8);
306 emit_lrm(&cmd_buffer
->batch
, GEN7_3DPRIM_START_INSTANCE
, bo
, bo_offset
+ 12);
307 emit_lri(&cmd_buffer
->batch
, GEN7_3DPRIM_BASE_VERTEX
, 0);
309 anv_batch_emit(&cmd_buffer
->batch
, GEN8_3DPRIMITIVE
,
310 .IndirectParameterEnable
= true,
311 .VertexAccessType
= SEQUENTIAL
);
314 void gen8_CmdBindIndexBuffer(
315 VkCmdBuffer cmdBuffer
,
318 VkIndexType indexType
)
320 ANV_FROM_HANDLE(anv_cmd_buffer
, cmd_buffer
, cmdBuffer
);
321 ANV_FROM_HANDLE(anv_buffer
, buffer
, _buffer
);
323 static const uint32_t vk_to_gen_index_type
[] = {
324 [VK_INDEX_TYPE_UINT16
] = INDEX_WORD
,
325 [VK_INDEX_TYPE_UINT32
] = INDEX_DWORD
,
328 struct GEN8_3DSTATE_VF vf
= {
329 GEN8_3DSTATE_VF_header
,
330 .CutIndex
= (indexType
== VK_INDEX_TYPE_UINT16
) ? UINT16_MAX
: UINT32_MAX
,
332 GEN8_3DSTATE_VF_pack(NULL
, cmd_buffer
->state
.state_vf
, &vf
);
334 cmd_buffer
->state
.dirty
|= ANV_CMD_DIRTY_INDEX_BUFFER
;
336 anv_batch_emit(&cmd_buffer
->batch
, GEN8_3DSTATE_INDEX_BUFFER
,
337 .IndexFormat
= vk_to_gen_index_type
[indexType
],
338 .MemoryObjectControlState
= GEN8_MOCS
,
339 .BufferStartingAddress
= { buffer
->bo
, buffer
->offset
+ offset
},
340 .BufferSize
= buffer
->size
- offset
);
344 gen8_flush_compute_descriptor_set(struct anv_cmd_buffer
*cmd_buffer
)
346 struct anv_device
*device
= cmd_buffer
->device
;
347 struct anv_pipeline
*pipeline
= cmd_buffer
->state
.compute_pipeline
;
348 struct anv_state surfaces
= { 0, }, samplers
= { 0, };
351 result
= anv_cmd_buffer_emit_samplers(cmd_buffer
,
352 VK_SHADER_STAGE_COMPUTE
, &samplers
);
353 if (result
!= VK_SUCCESS
)
355 result
= anv_cmd_buffer_emit_binding_table(cmd_buffer
,
356 VK_SHADER_STAGE_COMPUTE
, &surfaces
);
357 if (result
!= VK_SUCCESS
)
360 struct GEN8_INTERFACE_DESCRIPTOR_DATA desc
= {
361 .KernelStartPointer
= pipeline
->cs_simd
,
362 .KernelStartPointerHigh
= 0,
363 .BindingTablePointer
= surfaces
.offset
,
364 .BindingTableEntryCount
= 0,
365 .SamplerStatePointer
= samplers
.offset
,
367 .NumberofThreadsinGPGPUThreadGroup
= 0 /* FIXME: Really? */
370 uint32_t size
= GEN8_INTERFACE_DESCRIPTOR_DATA_length
* sizeof(uint32_t);
371 struct anv_state state
=
372 anv_state_pool_alloc(&device
->dynamic_state_pool
, size
, 64);
374 GEN8_INTERFACE_DESCRIPTOR_DATA_pack(NULL
, state
.map
, &desc
);
376 anv_batch_emit(&cmd_buffer
->batch
, GEN8_MEDIA_INTERFACE_DESCRIPTOR_LOAD
,
377 .InterfaceDescriptorTotalLength
= size
,
378 .InterfaceDescriptorDataStartAddress
= state
.offset
);
384 gen8_cmd_buffer_flush_compute_state(struct anv_cmd_buffer
*cmd_buffer
)
386 struct anv_pipeline
*pipeline
= cmd_buffer
->state
.compute_pipeline
;
389 assert(pipeline
->active_stages
== VK_SHADER_STAGE_COMPUTE_BIT
);
391 if (cmd_buffer
->state
.current_pipeline
!= GPGPU
) {
392 anv_batch_emit(&cmd_buffer
->batch
, GEN8_PIPELINE_SELECT
,
393 .PipelineSelection
= GPGPU
);
394 cmd_buffer
->state
.current_pipeline
= GPGPU
;
397 if (cmd_buffer
->state
.compute_dirty
& ANV_CMD_DIRTY_PIPELINE
)
398 anv_batch_emit_batch(&cmd_buffer
->batch
, &pipeline
->batch
);
400 if ((cmd_buffer
->state
.descriptors_dirty
& VK_SHADER_STAGE_COMPUTE_BIT
) ||
401 (cmd_buffer
->state
.compute_dirty
& ANV_CMD_DIRTY_PIPELINE
)) {
402 result
= gen8_flush_compute_descriptor_set(cmd_buffer
);
403 assert(result
== VK_SUCCESS
);
404 cmd_buffer
->state
.descriptors_dirty
&= ~VK_SHADER_STAGE_COMPUTE
;
407 cmd_buffer
->state
.compute_dirty
= 0;
410 void gen8_CmdDrawIndexedIndirect(
411 VkCmdBuffer cmdBuffer
,
417 ANV_FROM_HANDLE(anv_cmd_buffer
, cmd_buffer
, cmdBuffer
);
418 ANV_FROM_HANDLE(anv_buffer
, buffer
, _buffer
);
419 struct anv_bo
*bo
= buffer
->bo
;
420 uint32_t bo_offset
= buffer
->offset
+ offset
;
422 gen8_cmd_buffer_flush_state(cmd_buffer
);
424 emit_lrm(&cmd_buffer
->batch
, GEN7_3DPRIM_VERTEX_COUNT
, bo
, bo_offset
);
425 emit_lrm(&cmd_buffer
->batch
, GEN7_3DPRIM_INSTANCE_COUNT
, bo
, bo_offset
+ 4);
426 emit_lrm(&cmd_buffer
->batch
, GEN7_3DPRIM_START_VERTEX
, bo
, bo_offset
+ 8);
427 emit_lrm(&cmd_buffer
->batch
, GEN7_3DPRIM_BASE_VERTEX
, bo
, bo_offset
+ 12);
428 emit_lrm(&cmd_buffer
->batch
, GEN7_3DPRIM_START_INSTANCE
, bo
, bo_offset
+ 16);
430 anv_batch_emit(&cmd_buffer
->batch
, GEN8_3DPRIMITIVE
,
431 .IndirectParameterEnable
= true,
432 .VertexAccessType
= RANDOM
);
435 void gen8_CmdDispatch(
436 VkCmdBuffer cmdBuffer
,
441 ANV_FROM_HANDLE(anv_cmd_buffer
, cmd_buffer
, cmdBuffer
);
442 struct anv_pipeline
*pipeline
= cmd_buffer
->state
.compute_pipeline
;
443 struct brw_cs_prog_data
*prog_data
= &pipeline
->cs_prog_data
;
445 gen8_cmd_buffer_flush_compute_state(cmd_buffer
);
447 anv_batch_emit(&cmd_buffer
->batch
, GEN8_GPGPU_WALKER
,
448 .SIMDSize
= prog_data
->simd_size
/ 16,
449 .ThreadDepthCounterMaximum
= 0,
450 .ThreadHeightCounterMaximum
= 0,
451 .ThreadWidthCounterMaximum
= pipeline
->cs_thread_width_max
,
452 .ThreadGroupIDXDimension
= x
,
453 .ThreadGroupIDYDimension
= y
,
454 .ThreadGroupIDZDimension
= z
,
455 .RightExecutionMask
= pipeline
->cs_right_mask
,
456 .BottomExecutionMask
= 0xffffffff);
458 anv_batch_emit(&cmd_buffer
->batch
, GEN8_MEDIA_STATE_FLUSH
);
461 #define GPGPU_DISPATCHDIMX 0x2500
462 #define GPGPU_DISPATCHDIMY 0x2504
463 #define GPGPU_DISPATCHDIMZ 0x2508
465 void gen8_CmdDispatchIndirect(
466 VkCmdBuffer cmdBuffer
,
470 ANV_FROM_HANDLE(anv_cmd_buffer
, cmd_buffer
, cmdBuffer
);
471 ANV_FROM_HANDLE(anv_buffer
, buffer
, _buffer
);
472 struct anv_pipeline
*pipeline
= cmd_buffer
->state
.compute_pipeline
;
473 struct brw_cs_prog_data
*prog_data
= &pipeline
->cs_prog_data
;
474 struct anv_bo
*bo
= buffer
->bo
;
475 uint32_t bo_offset
= buffer
->offset
+ offset
;
477 gen8_cmd_buffer_flush_compute_state(cmd_buffer
);
479 emit_lrm(&cmd_buffer
->batch
, GPGPU_DISPATCHDIMX
, bo
, bo_offset
);
480 emit_lrm(&cmd_buffer
->batch
, GPGPU_DISPATCHDIMY
, bo
, bo_offset
+ 4);
481 emit_lrm(&cmd_buffer
->batch
, GPGPU_DISPATCHDIMZ
, bo
, bo_offset
+ 8);
483 anv_batch_emit(&cmd_buffer
->batch
, GEN8_GPGPU_WALKER
,
484 .IndirectParameterEnable
= true,
485 .SIMDSize
= prog_data
->simd_size
/ 16,
486 .ThreadDepthCounterMaximum
= 0,
487 .ThreadHeightCounterMaximum
= 0,
488 .ThreadWidthCounterMaximum
= pipeline
->cs_thread_width_max
,
489 .RightExecutionMask
= pipeline
->cs_right_mask
,
490 .BottomExecutionMask
= 0xffffffff);
492 anv_batch_emit(&cmd_buffer
->batch
, GEN8_MEDIA_STATE_FLUSH
);
496 gen8_cmd_buffer_emit_depth_stencil(struct anv_cmd_buffer
*cmd_buffer
)
498 const struct anv_framebuffer
*fb
= cmd_buffer
->state
.framebuffer
;
499 const struct anv_image_view
*iview
=
500 anv_cmd_buffer_get_depth_stencil_view(cmd_buffer
);
501 const struct anv_image
*image
= iview
? iview
->image
: NULL
;
502 const bool has_depth
= iview
&& iview
->format
->depth_format
;
503 const bool has_stencil
= iview
&& iview
->format
->has_stencil
;
505 /* FIXME: Implement the PMA stall W/A */
506 /* FIXME: Width and Height are wrong */
508 /* Emit 3DSTATE_DEPTH_BUFFER */
510 anv_batch_emit(&cmd_buffer
->batch
, GEN8_3DSTATE_DEPTH_BUFFER
,
511 .SurfaceType
= SURFTYPE_2D
,
512 .DepthWriteEnable
= iview
->format
->depth_format
,
513 .StencilWriteEnable
= has_stencil
,
514 .HierarchicalDepthBufferEnable
= false,
515 .SurfaceFormat
= iview
->format
->depth_format
,
516 .SurfacePitch
= image
->depth_surface
.stride
- 1,
517 .SurfaceBaseAddress
= {
519 .offset
= image
->depth_surface
.offset
,
521 .Height
= fb
->height
- 1,
522 .Width
= fb
->width
- 1,
525 .MinimumArrayElement
= 0,
526 .DepthBufferObjectControlState
= GEN8_MOCS
,
527 .RenderTargetViewExtent
= 1 - 1,
528 .SurfaceQPitch
= image
->depth_surface
.qpitch
>> 2);
530 /* Even when no depth buffer is present, the hardware requires that
531 * 3DSTATE_DEPTH_BUFFER be programmed correctly. The Broadwell PRM says:
533 * If a null depth buffer is bound, the driver must instead bind depth as:
534 * 3DSTATE_DEPTH.SurfaceType = SURFTYPE_2D
535 * 3DSTATE_DEPTH.Width = 1
536 * 3DSTATE_DEPTH.Height = 1
537 * 3DSTATE_DEPTH.SuraceFormat = D16_UNORM
538 * 3DSTATE_DEPTH.SurfaceBaseAddress = 0
539 * 3DSTATE_DEPTH.HierarchicalDepthBufferEnable = 0
540 * 3DSTATE_WM_DEPTH_STENCIL.DepthTestEnable = 0
541 * 3DSTATE_WM_DEPTH_STENCIL.DepthBufferWriteEnable = 0
543 * The PRM is wrong, though. The width and height must be programmed to
544 * actual framebuffer's width and height, even when neither depth buffer
545 * nor stencil buffer is present.
547 anv_batch_emit(&cmd_buffer
->batch
, GEN8_3DSTATE_DEPTH_BUFFER
,
548 .SurfaceType
= SURFTYPE_2D
,
549 .SurfaceFormat
= D16_UNORM
,
550 .Width
= fb
->width
- 1,
551 .Height
= fb
->height
- 1,
552 .StencilWriteEnable
= has_stencil
);
555 /* Emit 3DSTATE_STENCIL_BUFFER */
557 anv_batch_emit(&cmd_buffer
->batch
, GEN8_3DSTATE_STENCIL_BUFFER
,
558 .StencilBufferEnable
= true,
559 .StencilBufferObjectControlState
= GEN8_MOCS
,
561 /* Stencil buffers have strange pitch. The PRM says:
563 * The pitch must be set to 2x the value computed based on width,
564 * as the stencil buffer is stored with two rows interleaved.
566 .SurfacePitch
= 2 * image
->stencil_surface
.stride
- 1,
568 .SurfaceBaseAddress
= {
570 .offset
= image
->offset
+ image
->stencil_surface
.offset
,
572 .SurfaceQPitch
= image
->stencil_surface
.stride
>> 2);
574 anv_batch_emit(&cmd_buffer
->batch
, GEN8_3DSTATE_STENCIL_BUFFER
);
577 /* Disable hierarchial depth buffers. */
578 anv_batch_emit(&cmd_buffer
->batch
, GEN8_3DSTATE_HIER_DEPTH_BUFFER
);
580 /* Clear the clear params. */
581 anv_batch_emit(&cmd_buffer
->batch
, GEN8_3DSTATE_CLEAR_PARAMS
);
585 gen8_cmd_buffer_begin_subpass(struct anv_cmd_buffer
*cmd_buffer
,
586 struct anv_subpass
*subpass
)
588 cmd_buffer
->state
.subpass
= subpass
;
590 cmd_buffer
->state
.descriptors_dirty
|= VK_SHADER_STAGE_FRAGMENT_BIT
;
592 gen8_cmd_buffer_emit_depth_stencil(cmd_buffer
);
595 void gen8_CmdBeginRenderPass(
596 VkCmdBuffer cmdBuffer
,
597 const VkRenderPassBeginInfo
* pRenderPassBegin
,
598 VkRenderPassContents contents
)
600 ANV_FROM_HANDLE(anv_cmd_buffer
, cmd_buffer
, cmdBuffer
);
601 ANV_FROM_HANDLE(anv_render_pass
, pass
, pRenderPassBegin
->renderPass
);
602 ANV_FROM_HANDLE(anv_framebuffer
, framebuffer
, pRenderPassBegin
->framebuffer
);
604 cmd_buffer
->state
.framebuffer
= framebuffer
;
605 cmd_buffer
->state
.pass
= pass
;
607 const VkRect2D
*render_area
= &pRenderPassBegin
->renderArea
;
609 anv_batch_emit(&cmd_buffer
->batch
, GEN8_3DSTATE_DRAWING_RECTANGLE
,
610 .ClippedDrawingRectangleYMin
= render_area
->offset
.y
,
611 .ClippedDrawingRectangleXMin
= render_area
->offset
.x
,
612 .ClippedDrawingRectangleYMax
=
613 render_area
->offset
.y
+ render_area
->extent
.height
- 1,
614 .ClippedDrawingRectangleXMax
=
615 render_area
->offset
.x
+ render_area
->extent
.width
- 1,
616 .DrawingRectangleOriginY
= 0,
617 .DrawingRectangleOriginX
= 0);
619 anv_cmd_buffer_clear_attachments(cmd_buffer
, pass
,
620 pRenderPassBegin
->pClearValues
);
622 gen8_cmd_buffer_begin_subpass(cmd_buffer
, pass
->subpasses
);
625 void gen8_CmdNextSubpass(
626 VkCmdBuffer cmdBuffer
,
627 VkRenderPassContents contents
)
629 ANV_FROM_HANDLE(anv_cmd_buffer
, cmd_buffer
, cmdBuffer
);
631 assert(cmd_buffer
->level
== VK_CMD_BUFFER_LEVEL_PRIMARY
);
633 gen8_cmd_buffer_begin_subpass(cmd_buffer
, cmd_buffer
->state
.subpass
+ 1);
636 void gen8_CmdEndRenderPass(
637 VkCmdBuffer cmdBuffer
)
639 ANV_FROM_HANDLE(anv_cmd_buffer
, cmd_buffer
, cmdBuffer
);
641 /* Emit a flushing pipe control at the end of a pass. This is kind of a
642 * hack but it ensures that render targets always actually get written.
643 * Eventually, we should do flushing based on image format transitions
644 * or something of that nature.
646 anv_batch_emit(&cmd_buffer
->batch
, GEN8_PIPE_CONTROL
,
647 .PostSyncOperation
= NoWrite
,
648 .RenderTargetCacheFlushEnable
= true,
649 .InstructionCacheInvalidateEnable
= true,
650 .DepthCacheFlushEnable
= true,
651 .VFCacheInvalidationEnable
= true,
652 .TextureCacheInvalidationEnable
= true,
653 .CommandStreamerStallEnable
= true);
657 emit_ps_depth_count(struct anv_batch
*batch
,
658 struct anv_bo
*bo
, uint32_t offset
)
660 anv_batch_emit(batch
, GEN8_PIPE_CONTROL
,
661 .DestinationAddressType
= DAT_PPGTT
,
662 .PostSyncOperation
= WritePSDepthCount
,
663 .Address
= { bo
, offset
}); /* FIXME: This is only lower 32 bits */
666 void gen8_CmdBeginQuery(
667 VkCmdBuffer cmdBuffer
,
668 VkQueryPool queryPool
,
670 VkQueryControlFlags flags
)
672 ANV_FROM_HANDLE(anv_cmd_buffer
, cmd_buffer
, cmdBuffer
);
673 ANV_FROM_HANDLE(anv_query_pool
, pool
, queryPool
);
675 switch (pool
->type
) {
676 case VK_QUERY_TYPE_OCCLUSION
:
677 emit_ps_depth_count(&cmd_buffer
->batch
, &pool
->bo
,
678 slot
* sizeof(struct anv_query_pool_slot
));
681 case VK_QUERY_TYPE_PIPELINE_STATISTICS
:
687 void gen8_CmdEndQuery(
688 VkCmdBuffer cmdBuffer
,
689 VkQueryPool queryPool
,
692 ANV_FROM_HANDLE(anv_cmd_buffer
, cmd_buffer
, cmdBuffer
);
693 ANV_FROM_HANDLE(anv_query_pool
, pool
, queryPool
);
695 switch (pool
->type
) {
696 case VK_QUERY_TYPE_OCCLUSION
:
697 emit_ps_depth_count(&cmd_buffer
->batch
, &pool
->bo
,
698 slot
* sizeof(struct anv_query_pool_slot
) + 8);
701 case VK_QUERY_TYPE_PIPELINE_STATISTICS
:
707 #define TIMESTAMP 0x2358
709 void gen8_CmdWriteTimestamp(
710 VkCmdBuffer cmdBuffer
,
711 VkTimestampType timestampType
,
713 VkDeviceSize destOffset
)
715 ANV_FROM_HANDLE(anv_cmd_buffer
, cmd_buffer
, cmdBuffer
);
716 ANV_FROM_HANDLE(anv_buffer
, buffer
, destBuffer
);
717 struct anv_bo
*bo
= buffer
->bo
;
719 switch (timestampType
) {
720 case VK_TIMESTAMP_TYPE_TOP
:
721 anv_batch_emit(&cmd_buffer
->batch
, GEN8_MI_STORE_REGISTER_MEM
,
722 .RegisterAddress
= TIMESTAMP
,
723 .MemoryAddress
= { bo
, buffer
->offset
+ destOffset
});
724 anv_batch_emit(&cmd_buffer
->batch
, GEN8_MI_STORE_REGISTER_MEM
,
725 .RegisterAddress
= TIMESTAMP
+ 4,
726 .MemoryAddress
= { bo
, buffer
->offset
+ destOffset
+ 4 });
729 case VK_TIMESTAMP_TYPE_BOTTOM
:
730 anv_batch_emit(&cmd_buffer
->batch
, GEN8_PIPE_CONTROL
,
731 .DestinationAddressType
= DAT_PPGTT
,
732 .PostSyncOperation
= WriteTimestamp
,
733 .Address
= /* FIXME: This is only lower 32 bits */
734 { bo
, buffer
->offset
+ destOffset
});
742 #define alu_opcode(v) __gen_field((v), 20, 31)
743 #define alu_operand1(v) __gen_field((v), 10, 19)
744 #define alu_operand2(v) __gen_field((v), 0, 9)
745 #define alu(opcode, operand1, operand2) \
746 alu_opcode(opcode) | alu_operand1(operand1) | alu_operand2(operand2)
748 #define OPCODE_NOOP 0x000
749 #define OPCODE_LOAD 0x080
750 #define OPCODE_LOADINV 0x480
751 #define OPCODE_LOAD0 0x081
752 #define OPCODE_LOAD1 0x481
753 #define OPCODE_ADD 0x100
754 #define OPCODE_SUB 0x101
755 #define OPCODE_AND 0x102
756 #define OPCODE_OR 0x103
757 #define OPCODE_XOR 0x104
758 #define OPCODE_STORE 0x180
759 #define OPCODE_STOREINV 0x580
761 #define OPERAND_R0 0x00
762 #define OPERAND_R1 0x01
763 #define OPERAND_R2 0x02
764 #define OPERAND_R3 0x03
765 #define OPERAND_R4 0x04
766 #define OPERAND_SRCA 0x20
767 #define OPERAND_SRCB 0x21
768 #define OPERAND_ACCU 0x31
769 #define OPERAND_ZF 0x32
770 #define OPERAND_CF 0x33
772 #define CS_GPR(n) (0x2600 + (n) * 8)
775 emit_load_alu_reg_u64(struct anv_batch
*batch
, uint32_t reg
,
776 struct anv_bo
*bo
, uint32_t offset
)
778 anv_batch_emit(batch
, GEN8_MI_LOAD_REGISTER_MEM
,
779 .RegisterAddress
= reg
,
780 .MemoryAddress
= { bo
, offset
});
781 anv_batch_emit(batch
, GEN8_MI_LOAD_REGISTER_MEM
,
782 .RegisterAddress
= reg
+ 4,
783 .MemoryAddress
= { bo
, offset
+ 4 });
786 void gen8_CmdCopyQueryPoolResults(
787 VkCmdBuffer cmdBuffer
,
788 VkQueryPool queryPool
,
792 VkDeviceSize destOffset
,
793 VkDeviceSize destStride
,
794 VkQueryResultFlags flags
)
796 ANV_FROM_HANDLE(anv_cmd_buffer
, cmd_buffer
, cmdBuffer
);
797 ANV_FROM_HANDLE(anv_query_pool
, pool
, queryPool
);
798 ANV_FROM_HANDLE(anv_buffer
, buffer
, destBuffer
);
799 uint32_t slot_offset
, dst_offset
;
801 if (flags
& VK_QUERY_RESULT_WITH_AVAILABILITY_BIT
) {
802 /* Where is the availabilty info supposed to go? */
803 anv_finishme("VK_QUERY_RESULT_WITH_AVAILABILITY_BIT");
807 assert(pool
->type
== VK_QUERY_TYPE_OCCLUSION
);
809 /* FIXME: If we're not waiting, should we just do this on the CPU? */
810 if (flags
& VK_QUERY_RESULT_WAIT_BIT
)
811 anv_batch_emit(&cmd_buffer
->batch
, GEN8_PIPE_CONTROL
,
812 .CommandStreamerStallEnable
= true,
813 .StallAtPixelScoreboard
= true);
815 dst_offset
= buffer
->offset
+ destOffset
;
816 for (uint32_t i
= 0; i
< queryCount
; i
++) {
818 slot_offset
= (startQuery
+ i
) * sizeof(struct anv_query_pool_slot
);
820 emit_load_alu_reg_u64(&cmd_buffer
->batch
, CS_GPR(0), &pool
->bo
, slot_offset
);
821 emit_load_alu_reg_u64(&cmd_buffer
->batch
, CS_GPR(1), &pool
->bo
, slot_offset
+ 8);
823 /* FIXME: We need to clamp the result for 32 bit. */
825 uint32_t *dw
= anv_batch_emitn(&cmd_buffer
->batch
, 5, GEN8_MI_MATH
);
826 dw
[1] = alu(OPCODE_LOAD
, OPERAND_SRCA
, OPERAND_R1
);
827 dw
[2] = alu(OPCODE_LOAD
, OPERAND_SRCB
, OPERAND_R0
);
828 dw
[3] = alu(OPCODE_SUB
, 0, 0);
829 dw
[4] = alu(OPCODE_STORE
, OPERAND_R2
, OPERAND_ACCU
);
831 anv_batch_emit(&cmd_buffer
->batch
, GEN8_MI_STORE_REGISTER_MEM
,
832 .RegisterAddress
= CS_GPR(2),
833 /* FIXME: This is only lower 32 bits */
834 .MemoryAddress
= { buffer
->bo
, dst_offset
});
836 if (flags
& VK_QUERY_RESULT_64_BIT
)
837 anv_batch_emit(&cmd_buffer
->batch
, GEN8_MI_STORE_REGISTER_MEM
,
838 .RegisterAddress
= CS_GPR(2) + 4,
839 /* FIXME: This is only lower 32 bits */
840 .MemoryAddress
= { buffer
->bo
, dst_offset
+ 4 });
842 dst_offset
+= destStride
;
847 gen8_cmd_buffer_emit_state_base_address(struct anv_cmd_buffer
*cmd_buffer
)
849 struct anv_device
*device
= cmd_buffer
->device
;
850 struct anv_bo
*scratch_bo
= NULL
;
852 cmd_buffer
->state
.scratch_size
=
853 anv_block_pool_size(&device
->scratch_block_pool
);
854 if (cmd_buffer
->state
.scratch_size
> 0)
855 scratch_bo
= &device
->scratch_block_pool
.bo
;
857 /* Emit a render target cache flush.
859 * This isn't documented anywhere in the PRM. However, it seems to be
860 * necessary prior to changing the surface state base adress. Without
861 * this, we get GPU hangs when using multi-level command buffers which
862 * clear depth, reset state base address, and then go render stuff.
864 anv_batch_emit(&cmd_buffer
->batch
, GEN8_PIPE_CONTROL
,
865 .RenderTargetCacheFlushEnable
= true);
867 anv_batch_emit(&cmd_buffer
->batch
, GEN8_STATE_BASE_ADDRESS
,
868 .GeneralStateBaseAddress
= { scratch_bo
, 0 },
869 .GeneralStateMemoryObjectControlState
= GEN8_MOCS
,
870 .GeneralStateBaseAddressModifyEnable
= true,
871 .GeneralStateBufferSize
= 0xfffff,
872 .GeneralStateBufferSizeModifyEnable
= true,
874 .SurfaceStateBaseAddress
= anv_cmd_buffer_surface_base_address(cmd_buffer
),
875 .SurfaceStateMemoryObjectControlState
= GEN8_MOCS
,
876 .SurfaceStateBaseAddressModifyEnable
= true,
878 .DynamicStateBaseAddress
= { &device
->dynamic_state_block_pool
.bo
, 0 },
879 .DynamicStateMemoryObjectControlState
= GEN8_MOCS
,
880 .DynamicStateBaseAddressModifyEnable
= true,
881 .DynamicStateBufferSize
= 0xfffff,
882 .DynamicStateBufferSizeModifyEnable
= true,
884 .IndirectObjectBaseAddress
= { NULL
, 0 },
885 .IndirectObjectMemoryObjectControlState
= GEN8_MOCS
,
886 .IndirectObjectBaseAddressModifyEnable
= true,
887 .IndirectObjectBufferSize
= 0xfffff,
888 .IndirectObjectBufferSizeModifyEnable
= true,
890 .InstructionBaseAddress
= { &device
->instruction_block_pool
.bo
, 0 },
891 .InstructionMemoryObjectControlState
= GEN8_MOCS
,
892 .InstructionBaseAddressModifyEnable
= true,
893 .InstructionBufferSize
= 0xfffff,
894 .InstructionBuffersizeModifyEnable
= true);
896 /* After re-setting the surface state base address, we have to do some
897 * cache flusing so that the sampler engine will pick up the new
898 * SURFACE_STATE objects and binding tables. From the Broadwell PRM,
899 * Shared Function > 3D Sampler > State > State Caching (page 96):
901 * Coherency with system memory in the state cache, like the texture
902 * cache is handled partially by software. It is expected that the
903 * command stream or shader will issue Cache Flush operation or
904 * Cache_Flush sampler message to ensure that the L1 cache remains
905 * coherent with system memory.
909 * Whenever the value of the Dynamic_State_Base_Addr,
910 * Surface_State_Base_Addr are altered, the L1 state cache must be
911 * invalidated to ensure the new surface or sampler state is fetched
912 * from system memory.
914 * The PIPE_CONTROL command has a "State Cache Invalidation Enable" bit
915 * which, according the PIPE_CONTROL instruction documentation in the
918 * Setting this bit is independent of any other bit in this packet.
919 * This bit controls the invalidation of the L1 and L2 state caches
920 * at the top of the pipe i.e. at the parsing time.
922 * Unfortunately, experimentation seems to indicate that state cache
923 * invalidation through a PIPE_CONTROL does nothing whatsoever in
924 * regards to surface state and binding tables. In stead, it seems that
925 * invalidating the texture cache is what is actually needed.
927 * XXX: As far as we have been able to determine through
928 * experimentation, shows that flush the texture cache appears to be
929 * sufficient. The theory here is that all of the sampling/rendering
930 * units cache the binding table in the texture cache. However, we have
931 * yet to be able to actually confirm this.
933 anv_batch_emit(&cmd_buffer
->batch
, GEN8_PIPE_CONTROL
,
934 .TextureCacheInvalidationEnable
= true);
937 void gen8_CmdPipelineBarrier(
938 VkCmdBuffer cmdBuffer
,
939 VkPipelineStageFlags srcStageMask
,
940 VkPipelineStageFlags destStageMask
,
942 uint32_t memBarrierCount
,
943 const void* const* ppMemBarriers
)
945 ANV_FROM_HANDLE(anv_cmd_buffer
, cmd_buffer
, cmdBuffer
);
948 struct GEN8_PIPE_CONTROL cmd
= {
949 GEN8_PIPE_CONTROL_header
,
950 .PostSyncOperation
= NoWrite
,
953 /* XXX: I think waitEvent is a no-op on our HW. We should verify that. */
955 if (anv_clear_mask(&srcStageMask
, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT
)) {
956 /* This is just what PIPE_CONTROL does */
959 if (anv_clear_mask(&srcStageMask
,
960 VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT
|
961 VK_PIPELINE_STAGE_VERTEX_INPUT_BIT
|
962 VK_PIPELINE_STAGE_VERTEX_SHADER_BIT
|
963 VK_PIPELINE_STAGE_TESS_CONTROL_SHADER_BIT
|
964 VK_PIPELINE_STAGE_TESS_EVALUATION_SHADER_BIT
|
965 VK_PIPELINE_STAGE_GEOMETRY_SHADER_BIT
|
966 VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT
|
967 VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT
|
968 VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT
|
969 VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT
)) {
970 cmd
.StallAtPixelScoreboard
= true;
974 if (anv_clear_mask(&srcStageMask
,
975 VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT
|
976 VK_PIPELINE_STAGE_TRANSFER_BIT
)) {
977 cmd
.CommandStreamerStallEnable
= true;
980 if (anv_clear_mask(&srcStageMask
, VK_PIPELINE_STAGE_HOST_BIT
)) {
981 anv_finishme("VK_PIPE_EVENT_CPU_SIGNAL_BIT");
984 /* On our hardware, all stages will wait for execution as needed. */
987 /* We checked all known VkPipeEventFlags. */
988 anv_assert(srcStageMask
== 0);
990 /* XXX: Right now, we're really dumb and just flush whatever categories
991 * the app asks for. One of these days we may make this a bit better
992 * but right now that's all the hardware allows for in most areas.
994 VkMemoryOutputFlags out_flags
= 0;
995 VkMemoryInputFlags in_flags
= 0;
997 for (uint32_t i
= 0; i
< memBarrierCount
; i
++) {
998 const struct anv_common
*common
= ppMemBarriers
[i
];
999 switch (common
->sType
) {
1000 case VK_STRUCTURE_TYPE_MEMORY_BARRIER
: {
1001 ANV_COMMON_TO_STRUCT(VkMemoryBarrier
, barrier
, common
);
1002 out_flags
|= barrier
->outputMask
;
1003 in_flags
|= barrier
->inputMask
;
1006 case VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER
: {
1007 ANV_COMMON_TO_STRUCT(VkBufferMemoryBarrier
, barrier
, common
);
1008 out_flags
|= barrier
->outputMask
;
1009 in_flags
|= barrier
->inputMask
;
1012 case VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER
: {
1013 ANV_COMMON_TO_STRUCT(VkImageMemoryBarrier
, barrier
, common
);
1014 out_flags
|= barrier
->outputMask
;
1015 in_flags
|= barrier
->inputMask
;
1019 unreachable("Invalid memory barrier type");
1023 for_each_bit(b
, out_flags
) {
1024 switch ((VkMemoryOutputFlags
)(1 << b
)) {
1025 case VK_MEMORY_OUTPUT_HOST_WRITE_BIT
:
1026 break; /* FIXME: Little-core systems */
1027 case VK_MEMORY_OUTPUT_SHADER_WRITE_BIT
:
1028 cmd
.DCFlushEnable
= true;
1030 case VK_MEMORY_OUTPUT_COLOR_ATTACHMENT_BIT
:
1031 cmd
.RenderTargetCacheFlushEnable
= true;
1033 case VK_MEMORY_OUTPUT_DEPTH_STENCIL_ATTACHMENT_BIT
:
1034 cmd
.DepthCacheFlushEnable
= true;
1036 case VK_MEMORY_OUTPUT_TRANSFER_BIT
:
1037 cmd
.RenderTargetCacheFlushEnable
= true;
1038 cmd
.DepthCacheFlushEnable
= true;
1041 unreachable("Invalid memory output flag");
1045 for_each_bit(b
, out_flags
) {
1046 switch ((VkMemoryInputFlags
)(1 << b
)) {
1047 case VK_MEMORY_INPUT_HOST_READ_BIT
:
1048 break; /* FIXME: Little-core systems */
1049 case VK_MEMORY_INPUT_INDIRECT_COMMAND_BIT
:
1050 case VK_MEMORY_INPUT_INDEX_FETCH_BIT
:
1051 case VK_MEMORY_INPUT_VERTEX_ATTRIBUTE_FETCH_BIT
:
1052 cmd
.VFCacheInvalidationEnable
= true;
1054 case VK_MEMORY_INPUT_UNIFORM_READ_BIT
:
1055 cmd
.ConstantCacheInvalidationEnable
= true;
1057 case VK_MEMORY_INPUT_SHADER_READ_BIT
:
1058 cmd
.DCFlushEnable
= true;
1059 cmd
.TextureCacheInvalidationEnable
= true;
1061 case VK_MEMORY_INPUT_COLOR_ATTACHMENT_BIT
:
1062 case VK_MEMORY_INPUT_DEPTH_STENCIL_ATTACHMENT_BIT
:
1063 break; /* XXX: Hunh? */
1064 case VK_MEMORY_INPUT_TRANSFER_BIT
:
1065 cmd
.TextureCacheInvalidationEnable
= true;
1070 dw
= anv_batch_emit_dwords(&cmd_buffer
->batch
, GEN8_PIPE_CONTROL_length
);
1071 GEN8_PIPE_CONTROL_pack(&cmd_buffer
->batch
, dw
, &cmd
);