2 * Copyright © 2015 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
30 #include "anv_private.h"
32 #include "gen8_pack.h"
33 #include "gen9_pack.h"
36 cmd_buffer_flush_push_constants(struct anv_cmd_buffer
*cmd_buffer
)
38 static const uint32_t push_constant_opcodes
[] = {
39 [MESA_SHADER_VERTEX
] = 21,
40 [MESA_SHADER_TESS_CTRL
] = 25, /* HS */
41 [MESA_SHADER_TESS_EVAL
] = 26, /* DS */
42 [MESA_SHADER_GEOMETRY
] = 22,
43 [MESA_SHADER_FRAGMENT
] = 23,
44 [MESA_SHADER_COMPUTE
] = 0,
47 VkShaderStageFlags flushed
= 0;
49 anv_foreach_stage(stage
, cmd_buffer
->state
.push_constants_dirty
) {
50 struct anv_state state
= anv_cmd_buffer_push_constants(cmd_buffer
, stage
);
52 if (state
.offset
== 0)
55 anv_batch_emit(&cmd_buffer
->batch
, GENX(3DSTATE_CONSTANT_VS
),
56 ._3DCommandSubOpcode
= push_constant_opcodes
[stage
],
58 .PointerToConstantBuffer0
= { .offset
= state
.offset
},
59 .ConstantBuffer0ReadLength
= DIV_ROUND_UP(state
.alloc_size
, 32),
62 flushed
|= mesa_to_vk_shader_stage(stage
);
65 cmd_buffer
->state
.push_constants_dirty
&= ~flushed
;
70 emit_viewport_state(struct anv_cmd_buffer
*cmd_buffer
,
71 uint32_t count
, const VkViewport
*viewports
)
73 struct anv_state sf_clip_state
=
74 anv_cmd_buffer_alloc_dynamic_state(cmd_buffer
, count
* 64, 64);
75 struct anv_state cc_state
=
76 anv_cmd_buffer_alloc_dynamic_state(cmd_buffer
, count
* 8, 32);
78 for (uint32_t i
= 0; i
< count
; i
++) {
79 const VkViewport
*vp
= &viewports
[i
];
81 /* The gen7 state struct has just the matrix and guardband fields, the
82 * gen8 struct adds the min/max viewport fields. */
83 struct GENX(SF_CLIP_VIEWPORT
) sf_clip_viewport
= {
84 .ViewportMatrixElementm00
= vp
->width
/ 2,
85 .ViewportMatrixElementm11
= vp
->height
/ 2,
86 .ViewportMatrixElementm22
= (vp
->maxDepth
- vp
->minDepth
) / 2,
87 .ViewportMatrixElementm30
= vp
->x
+ vp
->width
/ 2,
88 .ViewportMatrixElementm31
= vp
->y
+ vp
->height
/ 2,
89 .ViewportMatrixElementm32
= (vp
->maxDepth
+ vp
->minDepth
) / 2,
90 .XMinClipGuardband
= -1.0f
,
91 .XMaxClipGuardband
= 1.0f
,
92 .YMinClipGuardband
= -1.0f
,
93 .YMaxClipGuardband
= 1.0f
,
94 .XMinViewPort
= vp
->x
,
95 .XMaxViewPort
= vp
->x
+ vp
->width
- 1,
96 .YMinViewPort
= vp
->y
,
97 .YMaxViewPort
= vp
->y
+ vp
->height
- 1,
100 struct GENX(CC_VIEWPORT
) cc_viewport
= {
101 .MinimumDepth
= vp
->minDepth
,
102 .MaximumDepth
= vp
->maxDepth
105 GENX(SF_CLIP_VIEWPORT_pack
)(NULL
, sf_clip_state
.map
+ i
* 64,
107 GENX(CC_VIEWPORT_pack
)(NULL
, cc_state
.map
+ i
* 32, &cc_viewport
);
110 if (!cmd_buffer
->device
->info
.has_llc
) {
111 anv_state_clflush(sf_clip_state
);
112 anv_state_clflush(cc_state
);
115 anv_batch_emit(&cmd_buffer
->batch
,
116 GENX(3DSTATE_VIEWPORT_STATE_POINTERS_CC
),
117 .CCViewportPointer
= cc_state
.offset
);
118 anv_batch_emit(&cmd_buffer
->batch
,
119 GENX(3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP
),
120 .SFClipViewportPointer
= sf_clip_state
.offset
);
124 gen8_cmd_buffer_emit_viewport(struct anv_cmd_buffer
*cmd_buffer
)
126 if (cmd_buffer
->state
.dynamic
.viewport
.count
> 0) {
127 emit_viewport_state(cmd_buffer
, cmd_buffer
->state
.dynamic
.viewport
.count
,
128 cmd_buffer
->state
.dynamic
.viewport
.viewports
);
130 /* If viewport count is 0, this is taken to mean "use the default" */
131 emit_viewport_state(cmd_buffer
, 1,
135 .width
= cmd_buffer
->state
.framebuffer
->width
,
136 .height
= cmd_buffer
->state
.framebuffer
->height
,
145 cmd_buffer_flush_state(struct anv_cmd_buffer
*cmd_buffer
)
147 struct anv_pipeline
*pipeline
= cmd_buffer
->state
.pipeline
;
150 uint32_t vb_emit
= cmd_buffer
->state
.vb_dirty
& pipeline
->vb_used
;
152 assert((pipeline
->active_stages
& VK_SHADER_STAGE_COMPUTE_BIT
) == 0);
154 if (cmd_buffer
->state
.current_pipeline
!= _3D
) {
155 anv_batch_emit(&cmd_buffer
->batch
, GENX(PIPELINE_SELECT
),
159 .PipelineSelection
= _3D
);
160 cmd_buffer
->state
.current_pipeline
= _3D
;
164 const uint32_t num_buffers
= __builtin_popcount(vb_emit
);
165 const uint32_t num_dwords
= 1 + num_buffers
* 4;
167 p
= anv_batch_emitn(&cmd_buffer
->batch
, num_dwords
,
168 GENX(3DSTATE_VERTEX_BUFFERS
));
170 for_each_bit(vb
, vb_emit
) {
171 struct anv_buffer
*buffer
= cmd_buffer
->state
.vertex_bindings
[vb
].buffer
;
172 uint32_t offset
= cmd_buffer
->state
.vertex_bindings
[vb
].offset
;
174 struct GENX(VERTEX_BUFFER_STATE
) state
= {
175 .VertexBufferIndex
= vb
,
176 .MemoryObjectControlState
= GENX(MOCS
),
177 .AddressModifyEnable
= true,
178 .BufferPitch
= pipeline
->binding_stride
[vb
],
179 .BufferStartingAddress
= { buffer
->bo
, buffer
->offset
+ offset
},
180 .BufferSize
= buffer
->size
- offset
183 GENX(VERTEX_BUFFER_STATE_pack
)(&cmd_buffer
->batch
, &p
[1 + i
* 4], &state
);
188 if (cmd_buffer
->state
.dirty
& ANV_CMD_DIRTY_PIPELINE
) {
189 /* If somebody compiled a pipeline after starting a command buffer the
190 * scratch bo may have grown since we started this cmd buffer (and
191 * emitted STATE_BASE_ADDRESS). If we're binding that pipeline now,
192 * reemit STATE_BASE_ADDRESS so that we use the bigger scratch bo. */
193 if (cmd_buffer
->state
.scratch_size
< pipeline
->total_scratch
)
194 anv_cmd_buffer_emit_state_base_address(cmd_buffer
);
196 anv_batch_emit_batch(&cmd_buffer
->batch
, &pipeline
->batch
);
200 /* On SKL+ the new constants don't take effect until the next corresponding
201 * 3DSTATE_BINDING_TABLE_POINTER_* command is parsed so we need to ensure
202 * that is sent. As it is, we re-emit binding tables but we could hold on
203 * to the offset of the most recent binding table and only re-emit the
204 * 3DSTATE_BINDING_TABLE_POINTER_* command.
206 cmd_buffer
->state
.descriptors_dirty
|=
207 cmd_buffer
->state
.push_constants_dirty
&
208 cmd_buffer
->state
.pipeline
->active_stages
;
211 if (cmd_buffer
->state
.descriptors_dirty
)
212 gen7_cmd_buffer_flush_descriptor_sets(cmd_buffer
);
214 if (cmd_buffer
->state
.push_constants_dirty
)
215 cmd_buffer_flush_push_constants(cmd_buffer
);
217 if (cmd_buffer
->state
.dirty
& ANV_CMD_DIRTY_DYNAMIC_VIEWPORT
)
218 gen8_cmd_buffer_emit_viewport(cmd_buffer
);
220 if (cmd_buffer
->state
.dirty
& ANV_CMD_DIRTY_DYNAMIC_SCISSOR
)
221 gen7_cmd_buffer_emit_scissor(cmd_buffer
);
223 if (cmd_buffer
->state
.dirty
& (ANV_CMD_DIRTY_PIPELINE
|
224 ANV_CMD_DIRTY_DYNAMIC_LINE_WIDTH
)) {
225 uint32_t sf_dw
[GENX(3DSTATE_SF_length
)];
226 struct GENX(3DSTATE_SF
) sf
= {
227 GENX(3DSTATE_SF_header
),
228 .LineWidth
= cmd_buffer
->state
.dynamic
.line_width
,
230 GENX(3DSTATE_SF_pack
)(NULL
, sf_dw
, &sf
);
232 anv_batch_emit_merge(&cmd_buffer
->batch
, sf_dw
, pipeline
->gen8
.sf
);
235 if (cmd_buffer
->state
.dirty
& (ANV_CMD_DIRTY_PIPELINE
|
236 ANV_CMD_DIRTY_DYNAMIC_DEPTH_BIAS
)){
237 bool enable_bias
= cmd_buffer
->state
.dynamic
.depth_bias
.bias
!= 0.0f
||
238 cmd_buffer
->state
.dynamic
.depth_bias
.slope
!= 0.0f
;
240 uint32_t raster_dw
[GENX(3DSTATE_RASTER_length
)];
241 struct GENX(3DSTATE_RASTER
) raster
= {
242 GENX(3DSTATE_RASTER_header
),
243 .GlobalDepthOffsetEnableSolid
= enable_bias
,
244 .GlobalDepthOffsetEnableWireframe
= enable_bias
,
245 .GlobalDepthOffsetEnablePoint
= enable_bias
,
246 .GlobalDepthOffsetConstant
= cmd_buffer
->state
.dynamic
.depth_bias
.bias
,
247 .GlobalDepthOffsetScale
= cmd_buffer
->state
.dynamic
.depth_bias
.slope
,
248 .GlobalDepthOffsetClamp
= cmd_buffer
->state
.dynamic
.depth_bias
.clamp
250 GENX(3DSTATE_RASTER_pack
)(NULL
, raster_dw
, &raster
);
251 anv_batch_emit_merge(&cmd_buffer
->batch
, raster_dw
,
252 pipeline
->gen8
.raster
);
255 /* Stencil reference values moved from COLOR_CALC_STATE in gen8 to
256 * 3DSTATE_WM_DEPTH_STENCIL in gen9. That means the dirty bits gets split
257 * across different state packets for gen8 and gen9. We handle that by
258 * using a big old #if switch here.
261 if (cmd_buffer
->state
.dirty
& (ANV_CMD_DIRTY_DYNAMIC_BLEND_CONSTANTS
|
262 ANV_CMD_DIRTY_DYNAMIC_STENCIL_REFERENCE
)) {
263 struct anv_state cc_state
=
264 anv_cmd_buffer_alloc_dynamic_state(cmd_buffer
,
265 GEN8_COLOR_CALC_STATE_length
, 64);
266 struct GEN8_COLOR_CALC_STATE cc
= {
267 .BlendConstantColorRed
= cmd_buffer
->state
.dynamic
.blend_constants
[0],
268 .BlendConstantColorGreen
= cmd_buffer
->state
.dynamic
.blend_constants
[1],
269 .BlendConstantColorBlue
= cmd_buffer
->state
.dynamic
.blend_constants
[2],
270 .BlendConstantColorAlpha
= cmd_buffer
->state
.dynamic
.blend_constants
[3],
271 .StencilReferenceValue
=
272 cmd_buffer
->state
.dynamic
.stencil_reference
.front
,
273 .BackFaceStencilReferenceValue
=
274 cmd_buffer
->state
.dynamic
.stencil_reference
.back
,
276 GEN8_COLOR_CALC_STATE_pack(NULL
, cc_state
.map
, &cc
);
278 if (!cmd_buffer
->device
->info
.has_llc
)
279 anv_state_clflush(cc_state
);
281 anv_batch_emit(&cmd_buffer
->batch
,
282 GEN8_3DSTATE_CC_STATE_POINTERS
,
283 .ColorCalcStatePointer
= cc_state
.offset
,
284 .ColorCalcStatePointerValid
= true);
287 if (cmd_buffer
->state
.dirty
& (ANV_CMD_DIRTY_PIPELINE
|
288 ANV_CMD_DIRTY_DYNAMIC_STENCIL_COMPARE_MASK
|
289 ANV_CMD_DIRTY_DYNAMIC_STENCIL_WRITE_MASK
)) {
290 uint32_t wm_depth_stencil_dw
[GEN8_3DSTATE_WM_DEPTH_STENCIL_length
];
292 struct GEN8_3DSTATE_WM_DEPTH_STENCIL wm_depth_stencil
= {
293 GEN8_3DSTATE_WM_DEPTH_STENCIL_header
,
295 /* Is this what we need to do? */
296 .StencilBufferWriteEnable
=
297 cmd_buffer
->state
.dynamic
.stencil_write_mask
.front
!= 0,
300 cmd_buffer
->state
.dynamic
.stencil_compare_mask
.front
& 0xff,
302 cmd_buffer
->state
.dynamic
.stencil_write_mask
.front
& 0xff,
304 .BackfaceStencilTestMask
=
305 cmd_buffer
->state
.dynamic
.stencil_compare_mask
.back
& 0xff,
306 .BackfaceStencilWriteMask
=
307 cmd_buffer
->state
.dynamic
.stencil_write_mask
.back
& 0xff,
309 GEN8_3DSTATE_WM_DEPTH_STENCIL_pack(NULL
, wm_depth_stencil_dw
,
312 anv_batch_emit_merge(&cmd_buffer
->batch
, wm_depth_stencil_dw
,
313 pipeline
->gen8
.wm_depth_stencil
);
316 if (cmd_buffer
->state
.dirty
& ANV_CMD_DIRTY_DYNAMIC_BLEND_CONSTANTS
) {
317 struct anv_state cc_state
=
318 anv_cmd_buffer_alloc_dynamic_state(cmd_buffer
,
319 GEN9_COLOR_CALC_STATE_length
, 64);
320 struct GEN9_COLOR_CALC_STATE cc
= {
321 .BlendConstantColorRed
= cmd_buffer
->state
.dynamic
.blend_constants
[0],
322 .BlendConstantColorGreen
= cmd_buffer
->state
.dynamic
.blend_constants
[1],
323 .BlendConstantColorBlue
= cmd_buffer
->state
.dynamic
.blend_constants
[2],
324 .BlendConstantColorAlpha
= cmd_buffer
->state
.dynamic
.blend_constants
[3],
326 GEN9_COLOR_CALC_STATE_pack(NULL
, cc_state
.map
, &cc
);
328 if (!cmd_buffer
->device
->info
.has_llc
)
329 anv_state_clflush(cc_state
);
331 anv_batch_emit(&cmd_buffer
->batch
,
332 GEN9_3DSTATE_CC_STATE_POINTERS
,
333 .ColorCalcStatePointer
= cc_state
.offset
,
334 .ColorCalcStatePointerValid
= true);
337 if (cmd_buffer
->state
.dirty
& (ANV_CMD_DIRTY_PIPELINE
|
338 ANV_CMD_DIRTY_DYNAMIC_STENCIL_COMPARE_MASK
|
339 ANV_CMD_DIRTY_DYNAMIC_STENCIL_WRITE_MASK
|
340 ANV_CMD_DIRTY_DYNAMIC_STENCIL_REFERENCE
)) {
341 uint32_t dwords
[GEN9_3DSTATE_WM_DEPTH_STENCIL_length
];
342 struct anv_dynamic_state
*d
= &cmd_buffer
->state
.dynamic
;
343 struct GEN9_3DSTATE_WM_DEPTH_STENCIL wm_depth_stencil
= {
344 GEN9_3DSTATE_WM_DEPTH_STENCIL_header
,
346 .StencilBufferWriteEnable
= d
->stencil_write_mask
.front
!= 0,
348 .StencilTestMask
= d
->stencil_compare_mask
.front
& 0xff,
349 .StencilWriteMask
= d
->stencil_write_mask
.front
& 0xff,
351 .BackfaceStencilTestMask
= d
->stencil_compare_mask
.back
& 0xff,
352 .BackfaceStencilWriteMask
= d
->stencil_write_mask
.back
& 0xff,
354 .StencilReferenceValue
= d
->stencil_reference
.front
,
355 .BackfaceStencilReferenceValue
= d
->stencil_reference
.back
357 GEN9_3DSTATE_WM_DEPTH_STENCIL_pack(NULL
, dwords
, &wm_depth_stencil
);
359 anv_batch_emit_merge(&cmd_buffer
->batch
, dwords
,
360 pipeline
->gen9
.wm_depth_stencil
);
364 if (cmd_buffer
->state
.dirty
& (ANV_CMD_DIRTY_PIPELINE
|
365 ANV_CMD_DIRTY_INDEX_BUFFER
)) {
366 anv_batch_emit(&cmd_buffer
->batch
, GENX(3DSTATE_VF
),
367 .IndexedDrawCutIndexEnable
= pipeline
->primitive_restart
,
368 .CutIndex
= cmd_buffer
->state
.restart_index
,
372 cmd_buffer
->state
.vb_dirty
&= ~vb_emit
;
373 cmd_buffer
->state
.dirty
= 0;
377 VkCommandBuffer commandBuffer
,
378 uint32_t vertexCount
,
379 uint32_t instanceCount
,
380 uint32_t firstVertex
,
381 uint32_t firstInstance
)
383 ANV_FROM_HANDLE(anv_cmd_buffer
, cmd_buffer
, commandBuffer
);
385 cmd_buffer_flush_state(cmd_buffer
);
387 anv_batch_emit(&cmd_buffer
->batch
, GENX(3DPRIMITIVE
),
388 .VertexAccessType
= SEQUENTIAL
,
389 .VertexCountPerInstance
= vertexCount
,
390 .StartVertexLocation
= firstVertex
,
391 .InstanceCount
= instanceCount
,
392 .StartInstanceLocation
= firstInstance
,
393 .BaseVertexLocation
= 0);
396 void genX(CmdDrawIndexed
)(
397 VkCommandBuffer commandBuffer
,
399 uint32_t instanceCount
,
401 int32_t vertexOffset
,
402 uint32_t firstInstance
)
404 ANV_FROM_HANDLE(anv_cmd_buffer
, cmd_buffer
, commandBuffer
);
406 cmd_buffer_flush_state(cmd_buffer
);
408 anv_batch_emit(&cmd_buffer
->batch
, GENX(3DPRIMITIVE
),
409 .VertexAccessType
= RANDOM
,
410 .VertexCountPerInstance
= indexCount
,
411 .StartVertexLocation
= firstIndex
,
412 .InstanceCount
= instanceCount
,
413 .StartInstanceLocation
= firstInstance
,
414 .BaseVertexLocation
= vertexOffset
);
418 emit_lrm(struct anv_batch
*batch
,
419 uint32_t reg
, struct anv_bo
*bo
, uint32_t offset
)
421 anv_batch_emit(batch
, GENX(MI_LOAD_REGISTER_MEM
),
422 .RegisterAddress
= reg
,
423 .MemoryAddress
= { bo
, offset
});
427 emit_lri(struct anv_batch
*batch
, uint32_t reg
, uint32_t imm
)
429 anv_batch_emit(batch
, GENX(MI_LOAD_REGISTER_IMM
),
430 .RegisterOffset
= reg
,
434 /* Auto-Draw / Indirect Registers */
435 #define GEN7_3DPRIM_END_OFFSET 0x2420
436 #define GEN7_3DPRIM_START_VERTEX 0x2430
437 #define GEN7_3DPRIM_VERTEX_COUNT 0x2434
438 #define GEN7_3DPRIM_INSTANCE_COUNT 0x2438
439 #define GEN7_3DPRIM_START_INSTANCE 0x243C
440 #define GEN7_3DPRIM_BASE_VERTEX 0x2440
442 void genX(CmdDrawIndirect
)(
443 VkCommandBuffer commandBuffer
,
449 ANV_FROM_HANDLE(anv_cmd_buffer
, cmd_buffer
, commandBuffer
);
450 ANV_FROM_HANDLE(anv_buffer
, buffer
, _buffer
);
451 struct anv_bo
*bo
= buffer
->bo
;
452 uint32_t bo_offset
= buffer
->offset
+ offset
;
454 cmd_buffer_flush_state(cmd_buffer
);
456 emit_lrm(&cmd_buffer
->batch
, GEN7_3DPRIM_VERTEX_COUNT
, bo
, bo_offset
);
457 emit_lrm(&cmd_buffer
->batch
, GEN7_3DPRIM_INSTANCE_COUNT
, bo
, bo_offset
+ 4);
458 emit_lrm(&cmd_buffer
->batch
, GEN7_3DPRIM_START_VERTEX
, bo
, bo_offset
+ 8);
459 emit_lrm(&cmd_buffer
->batch
, GEN7_3DPRIM_START_INSTANCE
, bo
, bo_offset
+ 12);
460 emit_lri(&cmd_buffer
->batch
, GEN7_3DPRIM_BASE_VERTEX
, 0);
462 anv_batch_emit(&cmd_buffer
->batch
, GENX(3DPRIMITIVE
),
463 .IndirectParameterEnable
= true,
464 .VertexAccessType
= SEQUENTIAL
);
467 void genX(CmdBindIndexBuffer
)(
468 VkCommandBuffer commandBuffer
,
471 VkIndexType indexType
)
473 ANV_FROM_HANDLE(anv_cmd_buffer
, cmd_buffer
, commandBuffer
);
474 ANV_FROM_HANDLE(anv_buffer
, buffer
, _buffer
);
476 static const uint32_t vk_to_gen_index_type
[] = {
477 [VK_INDEX_TYPE_UINT16
] = INDEX_WORD
,
478 [VK_INDEX_TYPE_UINT32
] = INDEX_DWORD
,
481 static const uint32_t restart_index_for_type
[] = {
482 [VK_INDEX_TYPE_UINT16
] = UINT16_MAX
,
483 [VK_INDEX_TYPE_UINT32
] = UINT32_MAX
,
486 cmd_buffer
->state
.restart_index
= restart_index_for_type
[indexType
];
488 anv_batch_emit(&cmd_buffer
->batch
, GENX(3DSTATE_INDEX_BUFFER
),
489 .IndexFormat
= vk_to_gen_index_type
[indexType
],
490 .MemoryObjectControlState
= GENX(MOCS
),
491 .BufferStartingAddress
= { buffer
->bo
, buffer
->offset
+ offset
},
492 .BufferSize
= buffer
->size
- offset
);
494 cmd_buffer
->state
.dirty
|= ANV_CMD_DIRTY_INDEX_BUFFER
;
498 flush_compute_descriptor_set(struct anv_cmd_buffer
*cmd_buffer
)
500 struct anv_device
*device
= cmd_buffer
->device
;
501 struct anv_pipeline
*pipeline
= cmd_buffer
->state
.compute_pipeline
;
502 struct anv_state surfaces
= { 0, }, samplers
= { 0, };
505 result
= anv_cmd_buffer_emit_samplers(cmd_buffer
,
506 MESA_SHADER_COMPUTE
, &samplers
);
507 if (result
!= VK_SUCCESS
)
509 result
= anv_cmd_buffer_emit_binding_table(cmd_buffer
,
510 MESA_SHADER_COMPUTE
, &surfaces
);
511 if (result
!= VK_SUCCESS
)
514 struct anv_state state
=
515 anv_state_pool_emit(&device
->dynamic_state_pool
,
516 GENX(INTERFACE_DESCRIPTOR_DATA
), 64,
517 .KernelStartPointer
= pipeline
->cs_simd
,
518 .KernelStartPointerHigh
= 0,
519 .BindingTablePointer
= surfaces
.offset
,
520 .BindingTableEntryCount
= 0,
521 .SamplerStatePointer
= samplers
.offset
,
523 .NumberofThreadsinGPGPUThreadGroup
= 0);
525 uint32_t size
= GENX(INTERFACE_DESCRIPTOR_DATA_length
) * sizeof(uint32_t);
526 anv_batch_emit(&cmd_buffer
->batch
, GENX(MEDIA_INTERFACE_DESCRIPTOR_LOAD
),
527 .InterfaceDescriptorTotalLength
= size
,
528 .InterfaceDescriptorDataStartAddress
= state
.offset
);
534 cmd_buffer_flush_compute_state(struct anv_cmd_buffer
*cmd_buffer
)
536 struct anv_pipeline
*pipeline
= cmd_buffer
->state
.compute_pipeline
;
539 assert(pipeline
->active_stages
== VK_SHADER_STAGE_COMPUTE_BIT
);
541 if (cmd_buffer
->state
.current_pipeline
!= GPGPU
) {
542 anv_batch_emit(&cmd_buffer
->batch
, GENX(PIPELINE_SELECT
),
546 .PipelineSelection
= GPGPU
);
547 cmd_buffer
->state
.current_pipeline
= GPGPU
;
550 if (cmd_buffer
->state
.compute_dirty
& ANV_CMD_DIRTY_PIPELINE
)
551 anv_batch_emit_batch(&cmd_buffer
->batch
, &pipeline
->batch
);
553 if ((cmd_buffer
->state
.descriptors_dirty
& VK_SHADER_STAGE_COMPUTE_BIT
) ||
554 (cmd_buffer
->state
.compute_dirty
& ANV_CMD_DIRTY_PIPELINE
)) {
555 result
= flush_compute_descriptor_set(cmd_buffer
);
556 assert(result
== VK_SUCCESS
);
557 cmd_buffer
->state
.descriptors_dirty
&= ~VK_SHADER_STAGE_COMPUTE_BIT
;
560 cmd_buffer
->state
.compute_dirty
= 0;
563 void genX(CmdDrawIndexedIndirect
)(
564 VkCommandBuffer commandBuffer
,
570 ANV_FROM_HANDLE(anv_cmd_buffer
, cmd_buffer
, commandBuffer
);
571 ANV_FROM_HANDLE(anv_buffer
, buffer
, _buffer
);
572 struct anv_bo
*bo
= buffer
->bo
;
573 uint32_t bo_offset
= buffer
->offset
+ offset
;
575 cmd_buffer_flush_state(cmd_buffer
);
577 emit_lrm(&cmd_buffer
->batch
, GEN7_3DPRIM_VERTEX_COUNT
, bo
, bo_offset
);
578 emit_lrm(&cmd_buffer
->batch
, GEN7_3DPRIM_INSTANCE_COUNT
, bo
, bo_offset
+ 4);
579 emit_lrm(&cmd_buffer
->batch
, GEN7_3DPRIM_START_VERTEX
, bo
, bo_offset
+ 8);
580 emit_lrm(&cmd_buffer
->batch
, GEN7_3DPRIM_BASE_VERTEX
, bo
, bo_offset
+ 12);
581 emit_lrm(&cmd_buffer
->batch
, GEN7_3DPRIM_START_INSTANCE
, bo
, bo_offset
+ 16);
583 anv_batch_emit(&cmd_buffer
->batch
, GENX(3DPRIMITIVE
),
584 .IndirectParameterEnable
= true,
585 .VertexAccessType
= RANDOM
);
588 void genX(CmdDispatch
)(
589 VkCommandBuffer commandBuffer
,
594 ANV_FROM_HANDLE(anv_cmd_buffer
, cmd_buffer
, commandBuffer
);
595 struct anv_pipeline
*pipeline
= cmd_buffer
->state
.compute_pipeline
;
596 struct brw_cs_prog_data
*prog_data
= &pipeline
->cs_prog_data
;
598 cmd_buffer_flush_compute_state(cmd_buffer
);
600 anv_batch_emit(&cmd_buffer
->batch
, GENX(GPGPU_WALKER
),
601 .SIMDSize
= prog_data
->simd_size
/ 16,
602 .ThreadDepthCounterMaximum
= 0,
603 .ThreadHeightCounterMaximum
= 0,
604 .ThreadWidthCounterMaximum
= pipeline
->cs_thread_width_max
- 1,
605 .ThreadGroupIDXDimension
= x
,
606 .ThreadGroupIDYDimension
= y
,
607 .ThreadGroupIDZDimension
= z
,
608 .RightExecutionMask
= pipeline
->cs_right_mask
,
609 .BottomExecutionMask
= 0xffffffff);
611 anv_batch_emit(&cmd_buffer
->batch
, GENX(MEDIA_STATE_FLUSH
));
614 #define GPGPU_DISPATCHDIMX 0x2500
615 #define GPGPU_DISPATCHDIMY 0x2504
616 #define GPGPU_DISPATCHDIMZ 0x2508
618 void genX(CmdDispatchIndirect
)(
619 VkCommandBuffer commandBuffer
,
623 ANV_FROM_HANDLE(anv_cmd_buffer
, cmd_buffer
, commandBuffer
);
624 ANV_FROM_HANDLE(anv_buffer
, buffer
, _buffer
);
625 struct anv_pipeline
*pipeline
= cmd_buffer
->state
.compute_pipeline
;
626 struct brw_cs_prog_data
*prog_data
= &pipeline
->cs_prog_data
;
627 struct anv_bo
*bo
= buffer
->bo
;
628 uint32_t bo_offset
= buffer
->offset
+ offset
;
630 cmd_buffer_flush_compute_state(cmd_buffer
);
632 emit_lrm(&cmd_buffer
->batch
, GPGPU_DISPATCHDIMX
, bo
, bo_offset
);
633 emit_lrm(&cmd_buffer
->batch
, GPGPU_DISPATCHDIMY
, bo
, bo_offset
+ 4);
634 emit_lrm(&cmd_buffer
->batch
, GPGPU_DISPATCHDIMZ
, bo
, bo_offset
+ 8);
636 anv_batch_emit(&cmd_buffer
->batch
, GENX(GPGPU_WALKER
),
637 .IndirectParameterEnable
= true,
638 .SIMDSize
= prog_data
->simd_size
/ 16,
639 .ThreadDepthCounterMaximum
= 0,
640 .ThreadHeightCounterMaximum
= 0,
641 .ThreadWidthCounterMaximum
= pipeline
->cs_thread_width_max
- 1,
642 .RightExecutionMask
= pipeline
->cs_right_mask
,
643 .BottomExecutionMask
= 0xffffffff);
645 anv_batch_emit(&cmd_buffer
->batch
, GENX(MEDIA_STATE_FLUSH
));
649 cmd_buffer_emit_depth_stencil(struct anv_cmd_buffer
*cmd_buffer
)
651 const struct anv_framebuffer
*fb
= cmd_buffer
->state
.framebuffer
;
652 const struct anv_image_view
*iview
=
653 anv_cmd_buffer_get_depth_stencil_view(cmd_buffer
);
654 const struct anv_image
*image
= iview
? iview
->image
: NULL
;
655 const bool has_depth
= iview
&& iview
->format
->depth_format
;
656 const bool has_stencil
= iview
&& iview
->format
->has_stencil
;
658 /* FIXME: Implement the PMA stall W/A */
659 /* FIXME: Width and Height are wrong */
661 /* Emit 3DSTATE_DEPTH_BUFFER */
663 anv_batch_emit(&cmd_buffer
->batch
, GENX(3DSTATE_DEPTH_BUFFER
),
664 .SurfaceType
= SURFTYPE_2D
,
665 .DepthWriteEnable
= iview
->format
->depth_format
,
666 .StencilWriteEnable
= has_stencil
,
667 .HierarchicalDepthBufferEnable
= false,
668 .SurfaceFormat
= iview
->format
->depth_format
,
669 .SurfacePitch
= image
->depth_surface
.isl
.row_pitch
- 1,
670 .SurfaceBaseAddress
= {
672 .offset
= image
->depth_surface
.offset
,
674 .Height
= fb
->height
- 1,
675 .Width
= fb
->width
- 1,
678 .MinimumArrayElement
= 0,
679 .DepthBufferObjectControlState
= GENX(MOCS
),
680 .RenderTargetViewExtent
= 1 - 1,
681 .SurfaceQPitch
= isl_surf_get_array_pitch_el_rows(&image
->depth_surface
.isl
) >> 2);
683 /* Even when no depth buffer is present, the hardware requires that
684 * 3DSTATE_DEPTH_BUFFER be programmed correctly. The Broadwell PRM says:
686 * If a null depth buffer is bound, the driver must instead bind depth as:
687 * 3DSTATE_DEPTH.SurfaceType = SURFTYPE_2D
688 * 3DSTATE_DEPTH.Width = 1
689 * 3DSTATE_DEPTH.Height = 1
690 * 3DSTATE_DEPTH.SuraceFormat = D16_UNORM
691 * 3DSTATE_DEPTH.SurfaceBaseAddress = 0
692 * 3DSTATE_DEPTH.HierarchicalDepthBufferEnable = 0
693 * 3DSTATE_WM_DEPTH_STENCIL.DepthTestEnable = 0
694 * 3DSTATE_WM_DEPTH_STENCIL.DepthBufferWriteEnable = 0
696 * The PRM is wrong, though. The width and height must be programmed to
697 * actual framebuffer's width and height, even when neither depth buffer
698 * nor stencil buffer is present.
700 anv_batch_emit(&cmd_buffer
->batch
, GENX(3DSTATE_DEPTH_BUFFER
),
701 .SurfaceType
= SURFTYPE_2D
,
702 .SurfaceFormat
= D16_UNORM
,
703 .Width
= fb
->width
- 1,
704 .Height
= fb
->height
- 1,
705 .StencilWriteEnable
= has_stencil
);
708 /* Emit 3DSTATE_STENCIL_BUFFER */
710 anv_batch_emit(&cmd_buffer
->batch
, GENX(3DSTATE_STENCIL_BUFFER
),
711 .StencilBufferEnable
= true,
712 .StencilBufferObjectControlState
= GENX(MOCS
),
714 /* Stencil buffers have strange pitch. The PRM says:
716 * The pitch must be set to 2x the value computed based on width,
717 * as the stencil buffer is stored with two rows interleaved.
719 .SurfacePitch
= 2 * image
->stencil_surface
.isl
.row_pitch
- 1,
721 .SurfaceBaseAddress
= {
723 .offset
= image
->offset
+ image
->stencil_surface
.offset
,
725 .SurfaceQPitch
= isl_surf_get_array_pitch_el_rows(&image
->stencil_surface
.isl
) >> 2);
727 anv_batch_emit(&cmd_buffer
->batch
, GENX(3DSTATE_STENCIL_BUFFER
));
730 /* Disable hierarchial depth buffers. */
731 anv_batch_emit(&cmd_buffer
->batch
, GENX(3DSTATE_HIER_DEPTH_BUFFER
));
733 /* Clear the clear params. */
734 anv_batch_emit(&cmd_buffer
->batch
, GENX(3DSTATE_CLEAR_PARAMS
));
738 genX(cmd_buffer_begin_subpass
)(struct anv_cmd_buffer
*cmd_buffer
,
739 struct anv_subpass
*subpass
)
741 cmd_buffer
->state
.subpass
= subpass
;
743 cmd_buffer
->state
.descriptors_dirty
|= VK_SHADER_STAGE_FRAGMENT_BIT
;
745 cmd_buffer_emit_depth_stencil(cmd_buffer
);
748 void genX(CmdBeginRenderPass
)(
749 VkCommandBuffer commandBuffer
,
750 const VkRenderPassBeginInfo
* pRenderPassBegin
,
751 VkSubpassContents contents
)
753 ANV_FROM_HANDLE(anv_cmd_buffer
, cmd_buffer
, commandBuffer
);
754 ANV_FROM_HANDLE(anv_render_pass
, pass
, pRenderPassBegin
->renderPass
);
755 ANV_FROM_HANDLE(anv_framebuffer
, framebuffer
, pRenderPassBegin
->framebuffer
);
757 cmd_buffer
->state
.framebuffer
= framebuffer
;
758 cmd_buffer
->state
.pass
= pass
;
760 const VkRect2D
*render_area
= &pRenderPassBegin
->renderArea
;
762 anv_batch_emit(&cmd_buffer
->batch
, GENX(3DSTATE_DRAWING_RECTANGLE
),
763 .ClippedDrawingRectangleYMin
= render_area
->offset
.y
,
764 .ClippedDrawingRectangleXMin
= render_area
->offset
.x
,
765 .ClippedDrawingRectangleYMax
=
766 render_area
->offset
.y
+ render_area
->extent
.height
- 1,
767 .ClippedDrawingRectangleXMax
=
768 render_area
->offset
.x
+ render_area
->extent
.width
- 1,
769 .DrawingRectangleOriginY
= 0,
770 .DrawingRectangleOriginX
= 0);
772 anv_cmd_buffer_clear_attachments(cmd_buffer
, pass
,
773 pRenderPassBegin
->pClearValues
);
775 genX(cmd_buffer_begin_subpass
)(cmd_buffer
, pass
->subpasses
);
778 void genX(CmdNextSubpass
)(
779 VkCommandBuffer commandBuffer
,
780 VkSubpassContents contents
)
782 ANV_FROM_HANDLE(anv_cmd_buffer
, cmd_buffer
, commandBuffer
);
784 assert(cmd_buffer
->level
== VK_COMMAND_BUFFER_LEVEL_PRIMARY
);
786 genX(cmd_buffer_begin_subpass
)(cmd_buffer
, cmd_buffer
->state
.subpass
+ 1);
789 void genX(CmdEndRenderPass
)(
790 VkCommandBuffer commandBuffer
)
792 ANV_FROM_HANDLE(anv_cmd_buffer
, cmd_buffer
, commandBuffer
);
794 /* Emit a flushing pipe control at the end of a pass. This is kind of a
795 * hack but it ensures that render targets always actually get written.
796 * Eventually, we should do flushing based on image format transitions
797 * or something of that nature.
799 anv_batch_emit(&cmd_buffer
->batch
, GENX(PIPE_CONTROL
),
800 .PostSyncOperation
= NoWrite
,
801 .RenderTargetCacheFlushEnable
= true,
802 .InstructionCacheInvalidateEnable
= true,
803 .DepthCacheFlushEnable
= true,
804 .VFCacheInvalidationEnable
= true,
805 .TextureCacheInvalidationEnable
= true,
806 .CommandStreamerStallEnable
= true);
810 emit_ps_depth_count(struct anv_batch
*batch
,
811 struct anv_bo
*bo
, uint32_t offset
)
813 anv_batch_emit(batch
, GENX(PIPE_CONTROL
),
814 .DestinationAddressType
= DAT_PPGTT
,
815 .PostSyncOperation
= WritePSDepthCount
,
816 .Address
= { bo
, offset
}); /* FIXME: This is only lower 32 bits */
819 void genX(CmdBeginQuery
)(
820 VkCommandBuffer commandBuffer
,
821 VkQueryPool queryPool
,
823 VkQueryControlFlags flags
)
825 ANV_FROM_HANDLE(anv_cmd_buffer
, cmd_buffer
, commandBuffer
);
826 ANV_FROM_HANDLE(anv_query_pool
, pool
, queryPool
);
828 switch (pool
->type
) {
829 case VK_QUERY_TYPE_OCCLUSION
:
830 emit_ps_depth_count(&cmd_buffer
->batch
, &pool
->bo
,
831 entry
* sizeof(struct anv_query_pool_slot
));
834 case VK_QUERY_TYPE_PIPELINE_STATISTICS
:
840 void genX(CmdEndQuery
)(
841 VkCommandBuffer commandBuffer
,
842 VkQueryPool queryPool
,
845 ANV_FROM_HANDLE(anv_cmd_buffer
, cmd_buffer
, commandBuffer
);
846 ANV_FROM_HANDLE(anv_query_pool
, pool
, queryPool
);
848 switch (pool
->type
) {
849 case VK_QUERY_TYPE_OCCLUSION
:
850 emit_ps_depth_count(&cmd_buffer
->batch
, &pool
->bo
,
851 entry
* sizeof(struct anv_query_pool_slot
) + 8);
854 case VK_QUERY_TYPE_PIPELINE_STATISTICS
:
860 #define TIMESTAMP 0x2358
862 void genX(CmdWriteTimestamp
)(
863 VkCommandBuffer commandBuffer
,
864 VkPipelineStageFlagBits pipelineStage
,
865 VkQueryPool queryPool
,
868 ANV_FROM_HANDLE(anv_cmd_buffer
, cmd_buffer
, commandBuffer
);
869 ANV_FROM_HANDLE(anv_query_pool
, pool
, queryPool
);
871 assert(pool
->type
== VK_QUERY_TYPE_TIMESTAMP
);
873 switch (pipelineStage
) {
874 case VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT
:
875 anv_batch_emit(&cmd_buffer
->batch
, GENX(MI_STORE_REGISTER_MEM
),
876 .RegisterAddress
= TIMESTAMP
,
877 .MemoryAddress
= { &pool
->bo
, entry
* 8 });
878 anv_batch_emit(&cmd_buffer
->batch
, GENX(MI_STORE_REGISTER_MEM
),
879 .RegisterAddress
= TIMESTAMP
+ 4,
880 .MemoryAddress
= { &pool
->bo
, entry
* 8 + 4 });
884 /* Everything else is bottom-of-pipe */
885 anv_batch_emit(&cmd_buffer
->batch
, GENX(PIPE_CONTROL
),
886 .DestinationAddressType
= DAT_PPGTT
,
887 .PostSyncOperation
= WriteTimestamp
,
888 .Address
= /* FIXME: This is only lower 32 bits */
889 { &pool
->bo
, entry
* 8 });
894 #define alu_opcode(v) __gen_field((v), 20, 31)
895 #define alu_operand1(v) __gen_field((v), 10, 19)
896 #define alu_operand2(v) __gen_field((v), 0, 9)
897 #define alu(opcode, operand1, operand2) \
898 alu_opcode(opcode) | alu_operand1(operand1) | alu_operand2(operand2)
900 #define OPCODE_NOOP 0x000
901 #define OPCODE_LOAD 0x080
902 #define OPCODE_LOADINV 0x480
903 #define OPCODE_LOAD0 0x081
904 #define OPCODE_LOAD1 0x481
905 #define OPCODE_ADD 0x100
906 #define OPCODE_SUB 0x101
907 #define OPCODE_AND 0x102
908 #define OPCODE_OR 0x103
909 #define OPCODE_XOR 0x104
910 #define OPCODE_STORE 0x180
911 #define OPCODE_STOREINV 0x580
913 #define OPERAND_R0 0x00
914 #define OPERAND_R1 0x01
915 #define OPERAND_R2 0x02
916 #define OPERAND_R3 0x03
917 #define OPERAND_R4 0x04
918 #define OPERAND_SRCA 0x20
919 #define OPERAND_SRCB 0x21
920 #define OPERAND_ACCU 0x31
921 #define OPERAND_ZF 0x32
922 #define OPERAND_CF 0x33
924 #define CS_GPR(n) (0x2600 + (n) * 8)
927 emit_load_alu_reg_u64(struct anv_batch
*batch
, uint32_t reg
,
928 struct anv_bo
*bo
, uint32_t offset
)
930 anv_batch_emit(batch
, GENX(MI_LOAD_REGISTER_MEM
),
931 .RegisterAddress
= reg
,
932 .MemoryAddress
= { bo
, offset
});
933 anv_batch_emit(batch
, GENX(MI_LOAD_REGISTER_MEM
),
934 .RegisterAddress
= reg
+ 4,
935 .MemoryAddress
= { bo
, offset
+ 4 });
938 void genX(CmdCopyQueryPoolResults
)(
939 VkCommandBuffer commandBuffer
,
940 VkQueryPool queryPool
,
944 VkDeviceSize destOffset
,
945 VkDeviceSize destStride
,
946 VkQueryResultFlags flags
)
948 ANV_FROM_HANDLE(anv_cmd_buffer
, cmd_buffer
, commandBuffer
);
949 ANV_FROM_HANDLE(anv_query_pool
, pool
, queryPool
);
950 ANV_FROM_HANDLE(anv_buffer
, buffer
, destBuffer
);
951 uint32_t slot_offset
, dst_offset
;
953 if (flags
& VK_QUERY_RESULT_WITH_AVAILABILITY_BIT
) {
954 /* Where is the availabilty info supposed to go? */
955 anv_finishme("VK_QUERY_RESULT_WITH_AVAILABILITY_BIT");
959 assert(pool
->type
== VK_QUERY_TYPE_OCCLUSION
);
961 /* FIXME: If we're not waiting, should we just do this on the CPU? */
962 if (flags
& VK_QUERY_RESULT_WAIT_BIT
)
963 anv_batch_emit(&cmd_buffer
->batch
, GENX(PIPE_CONTROL
),
964 .CommandStreamerStallEnable
= true,
965 .StallAtPixelScoreboard
= true);
967 dst_offset
= buffer
->offset
+ destOffset
;
968 for (uint32_t i
= 0; i
< queryCount
; i
++) {
970 slot_offset
= (startQuery
+ i
) * sizeof(struct anv_query_pool_slot
);
972 emit_load_alu_reg_u64(&cmd_buffer
->batch
, CS_GPR(0), &pool
->bo
, slot_offset
);
973 emit_load_alu_reg_u64(&cmd_buffer
->batch
, CS_GPR(1), &pool
->bo
, slot_offset
+ 8);
975 /* FIXME: We need to clamp the result for 32 bit. */
977 uint32_t *dw
= anv_batch_emitn(&cmd_buffer
->batch
, 5, GENX(MI_MATH
));
978 dw
[1] = alu(OPCODE_LOAD
, OPERAND_SRCA
, OPERAND_R1
);
979 dw
[2] = alu(OPCODE_LOAD
, OPERAND_SRCB
, OPERAND_R0
);
980 dw
[3] = alu(OPCODE_SUB
, 0, 0);
981 dw
[4] = alu(OPCODE_STORE
, OPERAND_R2
, OPERAND_ACCU
);
983 anv_batch_emit(&cmd_buffer
->batch
, GENX(MI_STORE_REGISTER_MEM
),
984 .RegisterAddress
= CS_GPR(2),
985 /* FIXME: This is only lower 32 bits */
986 .MemoryAddress
= { buffer
->bo
, dst_offset
});
988 if (flags
& VK_QUERY_RESULT_64_BIT
)
989 anv_batch_emit(&cmd_buffer
->batch
, GENX(MI_STORE_REGISTER_MEM
),
990 .RegisterAddress
= CS_GPR(2) + 4,
991 /* FIXME: This is only lower 32 bits */
992 .MemoryAddress
= { buffer
->bo
, dst_offset
+ 4 });
994 dst_offset
+= destStride
;