2 * Copyright © 2015 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
30 #include "anv_private.h"
33 gen8_cmd_buffer_flush_push_constants(struct anv_cmd_buffer
*cmd_buffer
)
37 static const uint32_t push_constant_opcodes
[] = {
38 [VK_SHADER_STAGE_VERTEX
] = 21,
39 [VK_SHADER_STAGE_TESS_CONTROL
] = 25, /* HS */
40 [VK_SHADER_STAGE_TESS_EVALUATION
] = 26, /* DS */
41 [VK_SHADER_STAGE_GEOMETRY
] = 22,
42 [VK_SHADER_STAGE_FRAGMENT
] = 23,
43 [VK_SHADER_STAGE_COMPUTE
] = 0,
48 for_each_bit(stage
, cmd_buffer
->state
.push_constants_dirty
) {
49 struct anv_state state
= anv_cmd_buffer_push_constants(cmd_buffer
, stage
);
51 if (state
.offset
== 0)
54 anv_batch_emit(&cmd_buffer
->batch
, GEN8_3DSTATE_CONSTANT_VS
,
55 ._3DCommandSubOpcode
= push_constant_opcodes
[stage
],
57 .PointerToConstantBuffer0
= { .offset
= state
.offset
},
58 .ConstantBuffer0ReadLength
= DIV_ROUND_UP(state
.alloc_size
, 32),
61 flushed
|= 1 << stage
;
64 cmd_buffer
->state
.push_constants_dirty
&= ~flushed
;
68 gen8_cmd_buffer_flush_state(struct anv_cmd_buffer
*cmd_buffer
)
70 struct anv_pipeline
*pipeline
= cmd_buffer
->state
.pipeline
;
73 uint32_t vb_emit
= cmd_buffer
->state
.vb_dirty
& pipeline
->vb_used
;
75 assert((pipeline
->active_stages
& VK_SHADER_STAGE_COMPUTE_BIT
) == 0);
77 if (cmd_buffer
->state
.current_pipeline
!= _3D
) {
78 anv_batch_emit(&cmd_buffer
->batch
, GEN8_PIPELINE_SELECT
,
79 .PipelineSelection
= _3D
);
80 cmd_buffer
->state
.current_pipeline
= _3D
;
84 const uint32_t num_buffers
= __builtin_popcount(vb_emit
);
85 const uint32_t num_dwords
= 1 + num_buffers
* 4;
87 p
= anv_batch_emitn(&cmd_buffer
->batch
, num_dwords
,
88 GEN8_3DSTATE_VERTEX_BUFFERS
);
90 for_each_bit(vb
, vb_emit
) {
91 struct anv_buffer
*buffer
= cmd_buffer
->state
.vertex_bindings
[vb
].buffer
;
92 uint32_t offset
= cmd_buffer
->state
.vertex_bindings
[vb
].offset
;
94 struct GEN8_VERTEX_BUFFER_STATE state
= {
95 .VertexBufferIndex
= vb
,
96 .MemoryObjectControlState
= GEN8_MOCS
,
97 .AddressModifyEnable
= true,
98 .BufferPitch
= pipeline
->binding_stride
[vb
],
99 .BufferStartingAddress
= { buffer
->bo
, buffer
->offset
+ offset
},
100 .BufferSize
= buffer
->size
- offset
103 GEN8_VERTEX_BUFFER_STATE_pack(&cmd_buffer
->batch
, &p
[1 + i
* 4], &state
);
108 if (cmd_buffer
->state
.dirty
& ANV_CMD_BUFFER_PIPELINE_DIRTY
) {
109 /* If somebody compiled a pipeline after starting a command buffer the
110 * scratch bo may have grown since we started this cmd buffer (and
111 * emitted STATE_BASE_ADDRESS). If we're binding that pipeline now,
112 * reemit STATE_BASE_ADDRESS so that we use the bigger scratch bo. */
113 if (cmd_buffer
->state
.scratch_size
< pipeline
->total_scratch
)
114 anv_cmd_buffer_emit_state_base_address(cmd_buffer
);
116 anv_batch_emit_batch(&cmd_buffer
->batch
, &pipeline
->batch
);
119 if (cmd_buffer
->state
.descriptors_dirty
)
120 anv_flush_descriptor_sets(cmd_buffer
);
122 if (cmd_buffer
->state
.push_constants_dirty
)
123 gen8_cmd_buffer_flush_push_constants(cmd_buffer
);
125 if (cmd_buffer
->state
.dirty
& ANV_CMD_BUFFER_VP_DIRTY
) {
126 struct anv_dynamic_vp_state
*vp_state
= cmd_buffer
->state
.vp_state
;
127 anv_batch_emit(&cmd_buffer
->batch
, GEN8_3DSTATE_SCISSOR_STATE_POINTERS
,
128 .ScissorRectPointer
= vp_state
->scissor
.offset
);
129 anv_batch_emit(&cmd_buffer
->batch
, GEN8_3DSTATE_VIEWPORT_STATE_POINTERS_CC
,
130 .CCViewportPointer
= vp_state
->cc_vp
.offset
);
131 anv_batch_emit(&cmd_buffer
->batch
, GEN8_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP
,
132 .SFClipViewportPointer
= vp_state
->sf_clip_vp
.offset
);
135 if (cmd_buffer
->state
.dirty
& (ANV_CMD_BUFFER_PIPELINE_DIRTY
|
136 ANV_CMD_BUFFER_RS_DIRTY
)) {
137 anv_batch_emit_merge(&cmd_buffer
->batch
,
138 cmd_buffer
->state
.rs_state
->gen8
.sf
,
140 anv_batch_emit_merge(&cmd_buffer
->batch
,
141 cmd_buffer
->state
.rs_state
->gen8
.raster
,
142 pipeline
->gen8
.raster
);
145 if (cmd_buffer
->state
.ds_state
&&
146 (cmd_buffer
->state
.dirty
& (ANV_CMD_BUFFER_PIPELINE_DIRTY
|
147 ANV_CMD_BUFFER_DS_DIRTY
))) {
148 anv_batch_emit_merge(&cmd_buffer
->batch
,
149 cmd_buffer
->state
.ds_state
->gen8
.wm_depth_stencil
,
150 pipeline
->gen8
.wm_depth_stencil
);
153 if (cmd_buffer
->state
.dirty
& (ANV_CMD_BUFFER_CB_DIRTY
|
154 ANV_CMD_BUFFER_DS_DIRTY
)) {
155 struct anv_state state
;
156 if (cmd_buffer
->state
.ds_state
== NULL
)
157 state
= anv_cmd_buffer_emit_dynamic(cmd_buffer
,
158 cmd_buffer
->state
.cb_state
->color_calc_state
,
159 GEN8_COLOR_CALC_STATE_length
, 64);
160 else if (cmd_buffer
->state
.cb_state
== NULL
)
161 state
= anv_cmd_buffer_emit_dynamic(cmd_buffer
,
162 cmd_buffer
->state
.ds_state
->gen8
.color_calc_state
,
163 GEN8_COLOR_CALC_STATE_length
, 64);
165 state
= anv_cmd_buffer_merge_dynamic(cmd_buffer
,
166 cmd_buffer
->state
.ds_state
->gen8
.color_calc_state
,
167 cmd_buffer
->state
.cb_state
->color_calc_state
,
168 GEN8_COLOR_CALC_STATE_length
, 64);
170 anv_batch_emit(&cmd_buffer
->batch
,
171 GEN8_3DSTATE_CC_STATE_POINTERS
,
172 .ColorCalcStatePointer
= state
.offset
,
173 .ColorCalcStatePointerValid
= true);
176 if (cmd_buffer
->state
.dirty
& (ANV_CMD_BUFFER_PIPELINE_DIRTY
|
177 ANV_CMD_BUFFER_INDEX_BUFFER_DIRTY
)) {
178 anv_batch_emit_merge(&cmd_buffer
->batch
,
179 cmd_buffer
->state
.state_vf
, pipeline
->gen8
.vf
);
182 cmd_buffer
->state
.vb_dirty
&= ~vb_emit
;
183 cmd_buffer
->state
.dirty
= 0;
187 VkCmdBuffer cmdBuffer
,
188 uint32_t firstVertex
,
189 uint32_t vertexCount
,
190 uint32_t firstInstance
,
191 uint32_t instanceCount
)
193 ANV_FROM_HANDLE(anv_cmd_buffer
, cmd_buffer
, cmdBuffer
);
195 gen8_cmd_buffer_flush_state(cmd_buffer
);
197 anv_batch_emit(&cmd_buffer
->batch
, GEN8_3DPRIMITIVE
,
198 .VertexAccessType
= SEQUENTIAL
,
199 .VertexCountPerInstance
= vertexCount
,
200 .StartVertexLocation
= firstVertex
,
201 .InstanceCount
= instanceCount
,
202 .StartInstanceLocation
= firstInstance
,
203 .BaseVertexLocation
= 0);
206 void gen8_CmdDrawIndexed(
207 VkCmdBuffer cmdBuffer
,
210 int32_t vertexOffset
,
211 uint32_t firstInstance
,
212 uint32_t instanceCount
)
214 ANV_FROM_HANDLE(anv_cmd_buffer
, cmd_buffer
, cmdBuffer
);
216 gen8_cmd_buffer_flush_state(cmd_buffer
);
218 anv_batch_emit(&cmd_buffer
->batch
, GEN8_3DPRIMITIVE
,
219 .VertexAccessType
= RANDOM
,
220 .VertexCountPerInstance
= indexCount
,
221 .StartVertexLocation
= firstIndex
,
222 .InstanceCount
= instanceCount
,
223 .StartInstanceLocation
= firstInstance
,
224 .BaseVertexLocation
= vertexOffset
);
228 emit_lrm(struct anv_batch
*batch
,
229 uint32_t reg
, struct anv_bo
*bo
, uint32_t offset
)
231 anv_batch_emit(batch
, GEN8_MI_LOAD_REGISTER_MEM
,
232 .RegisterAddress
= reg
,
233 .MemoryAddress
= { bo
, offset
});
237 emit_lri(struct anv_batch
*batch
, uint32_t reg
, uint32_t imm
)
239 anv_batch_emit(batch
, GEN8_MI_LOAD_REGISTER_IMM
,
240 .RegisterOffset
= reg
,
244 /* Auto-Draw / Indirect Registers */
245 #define GEN7_3DPRIM_END_OFFSET 0x2420
246 #define GEN7_3DPRIM_START_VERTEX 0x2430
247 #define GEN7_3DPRIM_VERTEX_COUNT 0x2434
248 #define GEN7_3DPRIM_INSTANCE_COUNT 0x2438
249 #define GEN7_3DPRIM_START_INSTANCE 0x243C
250 #define GEN7_3DPRIM_BASE_VERTEX 0x2440
252 void gen8_CmdDrawIndirect(
253 VkCmdBuffer cmdBuffer
,
259 ANV_FROM_HANDLE(anv_cmd_buffer
, cmd_buffer
, cmdBuffer
);
260 ANV_FROM_HANDLE(anv_buffer
, buffer
, _buffer
);
261 struct anv_bo
*bo
= buffer
->bo
;
262 uint32_t bo_offset
= buffer
->offset
+ offset
;
264 gen8_cmd_buffer_flush_state(cmd_buffer
);
266 emit_lrm(&cmd_buffer
->batch
, GEN7_3DPRIM_VERTEX_COUNT
, bo
, bo_offset
);
267 emit_lrm(&cmd_buffer
->batch
, GEN7_3DPRIM_INSTANCE_COUNT
, bo
, bo_offset
+ 4);
268 emit_lrm(&cmd_buffer
->batch
, GEN7_3DPRIM_START_VERTEX
, bo
, bo_offset
+ 8);
269 emit_lrm(&cmd_buffer
->batch
, GEN7_3DPRIM_START_INSTANCE
, bo
, bo_offset
+ 12);
270 emit_lri(&cmd_buffer
->batch
, GEN7_3DPRIM_BASE_VERTEX
, 0);
272 anv_batch_emit(&cmd_buffer
->batch
, GEN8_3DPRIMITIVE
,
273 .IndirectParameterEnable
= true,
274 .VertexAccessType
= SEQUENTIAL
);
277 void gen8_CmdBindIndexBuffer(
278 VkCmdBuffer cmdBuffer
,
281 VkIndexType indexType
)
283 ANV_FROM_HANDLE(anv_cmd_buffer
, cmd_buffer
, cmdBuffer
);
284 ANV_FROM_HANDLE(anv_buffer
, buffer
, _buffer
);
286 static const uint32_t vk_to_gen_index_type
[] = {
287 [VK_INDEX_TYPE_UINT16
] = INDEX_WORD
,
288 [VK_INDEX_TYPE_UINT32
] = INDEX_DWORD
,
291 struct GEN8_3DSTATE_VF vf
= {
292 GEN8_3DSTATE_VF_header
,
293 .CutIndex
= (indexType
== VK_INDEX_TYPE_UINT16
) ? UINT16_MAX
: UINT32_MAX
,
295 GEN8_3DSTATE_VF_pack(NULL
, cmd_buffer
->state
.state_vf
, &vf
);
297 cmd_buffer
->state
.dirty
|= ANV_CMD_BUFFER_INDEX_BUFFER_DIRTY
;
299 anv_batch_emit(&cmd_buffer
->batch
, GEN8_3DSTATE_INDEX_BUFFER
,
300 .IndexFormat
= vk_to_gen_index_type
[indexType
],
301 .MemoryObjectControlState
= GEN8_MOCS
,
302 .BufferStartingAddress
= { buffer
->bo
, buffer
->offset
+ offset
},
303 .BufferSize
= buffer
->size
- offset
);
307 gen8_flush_compute_descriptor_set(struct anv_cmd_buffer
*cmd_buffer
)
309 struct anv_device
*device
= cmd_buffer
->device
;
310 struct anv_pipeline
*pipeline
= cmd_buffer
->state
.compute_pipeline
;
311 struct anv_state surfaces
= { 0, }, samplers
= { 0, };
314 result
= anv_cmd_buffer_emit_samplers(cmd_buffer
,
315 VK_SHADER_STAGE_COMPUTE
, &samplers
);
316 if (result
!= VK_SUCCESS
)
318 result
= anv_cmd_buffer_emit_binding_table(cmd_buffer
,
319 VK_SHADER_STAGE_COMPUTE
, &surfaces
);
320 if (result
!= VK_SUCCESS
)
323 struct GEN8_INTERFACE_DESCRIPTOR_DATA desc
= {
324 .KernelStartPointer
= pipeline
->cs_simd
,
325 .KernelStartPointerHigh
= 0,
326 .BindingTablePointer
= surfaces
.offset
,
327 .BindingTableEntryCount
= 0,
328 .SamplerStatePointer
= samplers
.offset
,
330 .NumberofThreadsinGPGPUThreadGroup
= 0 /* FIXME: Really? */
333 uint32_t size
= GEN8_INTERFACE_DESCRIPTOR_DATA_length
* sizeof(uint32_t);
334 struct anv_state state
=
335 anv_state_pool_alloc(&device
->dynamic_state_pool
, size
, 64);
337 GEN8_INTERFACE_DESCRIPTOR_DATA_pack(NULL
, state
.map
, &desc
);
339 anv_batch_emit(&cmd_buffer
->batch
, GEN8_MEDIA_INTERFACE_DESCRIPTOR_LOAD
,
340 .InterfaceDescriptorTotalLength
= size
,
341 .InterfaceDescriptorDataStartAddress
= state
.offset
);
347 gen8_cmd_buffer_flush_compute_state(struct anv_cmd_buffer
*cmd_buffer
)
349 struct anv_pipeline
*pipeline
= cmd_buffer
->state
.compute_pipeline
;
352 assert(pipeline
->active_stages
== VK_SHADER_STAGE_COMPUTE_BIT
);
354 if (cmd_buffer
->state
.current_pipeline
!= GPGPU
) {
355 anv_batch_emit(&cmd_buffer
->batch
, GEN8_PIPELINE_SELECT
,
356 .PipelineSelection
= GPGPU
);
357 cmd_buffer
->state
.current_pipeline
= GPGPU
;
360 if (cmd_buffer
->state
.compute_dirty
& ANV_CMD_BUFFER_PIPELINE_DIRTY
)
361 anv_batch_emit_batch(&cmd_buffer
->batch
, &pipeline
->batch
);
363 if ((cmd_buffer
->state
.descriptors_dirty
& VK_SHADER_STAGE_COMPUTE_BIT
) ||
364 (cmd_buffer
->state
.compute_dirty
& ANV_CMD_BUFFER_PIPELINE_DIRTY
)) {
365 result
= gen8_flush_compute_descriptor_set(cmd_buffer
);
366 assert(result
== VK_SUCCESS
);
367 cmd_buffer
->state
.descriptors_dirty
&= ~VK_SHADER_STAGE_COMPUTE
;
370 cmd_buffer
->state
.compute_dirty
= 0;
373 void gen8_CmdDrawIndexedIndirect(
374 VkCmdBuffer cmdBuffer
,
380 ANV_FROM_HANDLE(anv_cmd_buffer
, cmd_buffer
, cmdBuffer
);
381 ANV_FROM_HANDLE(anv_buffer
, buffer
, _buffer
);
382 struct anv_bo
*bo
= buffer
->bo
;
383 uint32_t bo_offset
= buffer
->offset
+ offset
;
385 gen8_cmd_buffer_flush_state(cmd_buffer
);
387 emit_lrm(&cmd_buffer
->batch
, GEN7_3DPRIM_VERTEX_COUNT
, bo
, bo_offset
);
388 emit_lrm(&cmd_buffer
->batch
, GEN7_3DPRIM_INSTANCE_COUNT
, bo
, bo_offset
+ 4);
389 emit_lrm(&cmd_buffer
->batch
, GEN7_3DPRIM_START_VERTEX
, bo
, bo_offset
+ 8);
390 emit_lrm(&cmd_buffer
->batch
, GEN7_3DPRIM_BASE_VERTEX
, bo
, bo_offset
+ 12);
391 emit_lrm(&cmd_buffer
->batch
, GEN7_3DPRIM_START_INSTANCE
, bo
, bo_offset
+ 16);
393 anv_batch_emit(&cmd_buffer
->batch
, GEN8_3DPRIMITIVE
,
394 .IndirectParameterEnable
= true,
395 .VertexAccessType
= RANDOM
);
398 void gen8_CmdDispatch(
399 VkCmdBuffer cmdBuffer
,
404 ANV_FROM_HANDLE(anv_cmd_buffer
, cmd_buffer
, cmdBuffer
);
405 struct anv_pipeline
*pipeline
= cmd_buffer
->state
.compute_pipeline
;
406 struct brw_cs_prog_data
*prog_data
= &pipeline
->cs_prog_data
;
408 gen8_cmd_buffer_flush_compute_state(cmd_buffer
);
410 anv_batch_emit(&cmd_buffer
->batch
, GEN8_GPGPU_WALKER
,
411 .SIMDSize
= prog_data
->simd_size
/ 16,
412 .ThreadDepthCounterMaximum
= 0,
413 .ThreadHeightCounterMaximum
= 0,
414 .ThreadWidthCounterMaximum
= pipeline
->cs_thread_width_max
,
415 .ThreadGroupIDXDimension
= x
,
416 .ThreadGroupIDYDimension
= y
,
417 .ThreadGroupIDZDimension
= z
,
418 .RightExecutionMask
= pipeline
->cs_right_mask
,
419 .BottomExecutionMask
= 0xffffffff);
421 anv_batch_emit(&cmd_buffer
->batch
, GEN8_MEDIA_STATE_FLUSH
);
424 #define GPGPU_DISPATCHDIMX 0x2500
425 #define GPGPU_DISPATCHDIMY 0x2504
426 #define GPGPU_DISPATCHDIMZ 0x2508
428 void gen8_CmdDispatchIndirect(
429 VkCmdBuffer cmdBuffer
,
433 ANV_FROM_HANDLE(anv_cmd_buffer
, cmd_buffer
, cmdBuffer
);
434 ANV_FROM_HANDLE(anv_buffer
, buffer
, _buffer
);
435 struct anv_pipeline
*pipeline
= cmd_buffer
->state
.compute_pipeline
;
436 struct brw_cs_prog_data
*prog_data
= &pipeline
->cs_prog_data
;
437 struct anv_bo
*bo
= buffer
->bo
;
438 uint32_t bo_offset
= buffer
->offset
+ offset
;
440 gen8_cmd_buffer_flush_compute_state(cmd_buffer
);
442 emit_lrm(&cmd_buffer
->batch
, GPGPU_DISPATCHDIMX
, bo
, bo_offset
);
443 emit_lrm(&cmd_buffer
->batch
, GPGPU_DISPATCHDIMY
, bo
, bo_offset
+ 4);
444 emit_lrm(&cmd_buffer
->batch
, GPGPU_DISPATCHDIMZ
, bo
, bo_offset
+ 8);
446 anv_batch_emit(&cmd_buffer
->batch
, GEN8_GPGPU_WALKER
,
447 .IndirectParameterEnable
= true,
448 .SIMDSize
= prog_data
->simd_size
/ 16,
449 .ThreadDepthCounterMaximum
= 0,
450 .ThreadHeightCounterMaximum
= 0,
451 .ThreadWidthCounterMaximum
= pipeline
->cs_thread_width_max
,
452 .RightExecutionMask
= pipeline
->cs_right_mask
,
453 .BottomExecutionMask
= 0xffffffff);
455 anv_batch_emit(&cmd_buffer
->batch
, GEN8_MEDIA_STATE_FLUSH
);
459 gen8_cmd_buffer_emit_depth_stencil(struct anv_cmd_buffer
*cmd_buffer
)
461 struct anv_subpass
*subpass
= cmd_buffer
->state
.subpass
;
462 struct anv_framebuffer
*fb
= cmd_buffer
->state
.framebuffer
;
463 const struct anv_depth_stencil_view
*view
= NULL
;
465 if (subpass
->depth_stencil_attachment
!= VK_ATTACHMENT_UNUSED
) {
466 const struct anv_attachment_view
*aview
=
467 fb
->attachments
[subpass
->depth_stencil_attachment
];
468 assert(aview
->attachment_type
== ANV_ATTACHMENT_VIEW_TYPE_DEPTH_STENCIL
);
469 view
= (const struct anv_depth_stencil_view
*)aview
;
472 /* FIXME: Implement the PMA stall W/A */
473 /* FIXME: Width and Height are wrong */
476 anv_batch_emit(&cmd_buffer
->batch
, GEN8_3DSTATE_DEPTH_BUFFER
,
477 .SurfaceType
= SURFTYPE_2D
,
478 .DepthWriteEnable
= view
->depth_stride
> 0,
479 .StencilWriteEnable
= view
->stencil_stride
> 0,
480 .HierarchicalDepthBufferEnable
= false,
481 .SurfaceFormat
= view
->depth_format
,
482 .SurfacePitch
= view
->depth_stride
> 0 ? view
->depth_stride
- 1 : 0,
483 .SurfaceBaseAddress
= { view
->bo
, view
->depth_offset
},
484 .Height
= fb
->height
- 1,
485 .Width
= fb
->width
- 1,
488 .MinimumArrayElement
= 0,
489 .DepthBufferObjectControlState
= GEN8_MOCS
,
490 .RenderTargetViewExtent
= 1 - 1,
491 .SurfaceQPitch
= view
->depth_qpitch
>> 2);
493 anv_batch_emit(&cmd_buffer
->batch
, GEN8_3DSTATE_STENCIL_BUFFER
,
494 .StencilBufferEnable
= view
->stencil_stride
> 0,
495 .StencilBufferObjectControlState
= GEN8_MOCS
,
496 .SurfacePitch
= view
->stencil_stride
> 0 ? view
->stencil_stride
- 1 : 0,
497 .SurfaceBaseAddress
= { view
->bo
, view
->stencil_offset
},
498 .SurfaceQPitch
= view
->stencil_qpitch
>> 2);
500 /* Even when no depth buffer is present, the hardware requires that
501 * 3DSTATE_DEPTH_BUFFER be programmed correctly. The Broadwell PRM says:
503 * If a null depth buffer is bound, the driver must instead bind depth as:
504 * 3DSTATE_DEPTH.SurfaceType = SURFTYPE_2D
505 * 3DSTATE_DEPTH.Width = 1
506 * 3DSTATE_DEPTH.Height = 1
507 * 3DSTATE_DEPTH.SuraceFormat = D16_UNORM
508 * 3DSTATE_DEPTH.SurfaceBaseAddress = 0
509 * 3DSTATE_DEPTH.HierarchicalDepthBufferEnable = 0
510 * 3DSTATE_WM_DEPTH_STENCIL.DepthTestEnable = 0
511 * 3DSTATE_WM_DEPTH_STENCIL.DepthBufferWriteEnable = 0
513 * The PRM is wrong, though. The width and height must be programmed to
514 * actual framebuffer's width and height.
516 anv_batch_emit(&cmd_buffer
->batch
, GEN8_3DSTATE_DEPTH_BUFFER
,
517 .SurfaceType
= SURFTYPE_2D
,
518 .SurfaceFormat
= D16_UNORM
,
519 .Width
= fb
->width
- 1,
520 .Height
= fb
->height
- 1);
522 /* Disable the stencil buffer. */
523 anv_batch_emit(&cmd_buffer
->batch
, GEN8_3DSTATE_STENCIL_BUFFER
);
526 /* Disable hierarchial depth buffers. */
527 anv_batch_emit(&cmd_buffer
->batch
, GEN8_3DSTATE_HIER_DEPTH_BUFFER
);
529 /* Clear the clear params. */
530 anv_batch_emit(&cmd_buffer
->batch
, GEN8_3DSTATE_CLEAR_PARAMS
);
534 gen8_cmd_buffer_begin_subpass(struct anv_cmd_buffer
*cmd_buffer
,
535 struct anv_subpass
*subpass
)
537 cmd_buffer
->state
.subpass
= subpass
;
539 cmd_buffer
->state
.descriptors_dirty
|= VK_SHADER_STAGE_FRAGMENT_BIT
;
541 gen8_cmd_buffer_emit_depth_stencil(cmd_buffer
);
544 void gen8_CmdBeginRenderPass(
545 VkCmdBuffer cmdBuffer
,
546 const VkRenderPassBeginInfo
* pRenderPassBegin
,
547 VkRenderPassContents contents
)
549 ANV_FROM_HANDLE(anv_cmd_buffer
, cmd_buffer
, cmdBuffer
);
550 ANV_FROM_HANDLE(anv_render_pass
, pass
, pRenderPassBegin
->renderPass
);
551 ANV_FROM_HANDLE(anv_framebuffer
, framebuffer
, pRenderPassBegin
->framebuffer
);
553 cmd_buffer
->state
.framebuffer
= framebuffer
;
554 cmd_buffer
->state
.pass
= pass
;
556 const VkRect2D
*render_area
= &pRenderPassBegin
->renderArea
;
558 anv_batch_emit(&cmd_buffer
->batch
, GEN8_3DSTATE_DRAWING_RECTANGLE
,
559 .ClippedDrawingRectangleYMin
= render_area
->offset
.y
,
560 .ClippedDrawingRectangleXMin
= render_area
->offset
.x
,
561 .ClippedDrawingRectangleYMax
=
562 render_area
->offset
.y
+ render_area
->extent
.height
- 1,
563 .ClippedDrawingRectangleXMax
=
564 render_area
->offset
.x
+ render_area
->extent
.width
- 1,
565 .DrawingRectangleOriginY
= 0,
566 .DrawingRectangleOriginX
= 0);
568 anv_cmd_buffer_clear_attachments(cmd_buffer
, pass
,
569 pRenderPassBegin
->pAttachmentClearValues
);
571 gen8_cmd_buffer_begin_subpass(cmd_buffer
, pass
->subpasses
);
574 void gen8_CmdNextSubpass(
575 VkCmdBuffer cmdBuffer
,
576 VkRenderPassContents contents
)
578 ANV_FROM_HANDLE(anv_cmd_buffer
, cmd_buffer
, cmdBuffer
);
580 assert(cmd_buffer
->level
== VK_CMD_BUFFER_LEVEL_PRIMARY
);
582 gen8_cmd_buffer_begin_subpass(cmd_buffer
, cmd_buffer
->state
.subpass
+ 1);
585 void gen8_CmdEndRenderPass(
586 VkCmdBuffer cmdBuffer
)
588 ANV_FROM_HANDLE(anv_cmd_buffer
, cmd_buffer
, cmdBuffer
);
590 /* Emit a flushing pipe control at the end of a pass. This is kind of a
591 * hack but it ensures that render targets always actually get written.
592 * Eventually, we should do flushing based on image format transitions
593 * or something of that nature.
595 anv_batch_emit(&cmd_buffer
->batch
, GEN8_PIPE_CONTROL
,
596 .PostSyncOperation
= NoWrite
,
597 .RenderTargetCacheFlushEnable
= true,
598 .InstructionCacheInvalidateEnable
= true,
599 .DepthCacheFlushEnable
= true,
600 .VFCacheInvalidationEnable
= true,
601 .TextureCacheInvalidationEnable
= true,
602 .CommandStreamerStallEnable
= true);
606 emit_ps_depth_count(struct anv_batch
*batch
,
607 struct anv_bo
*bo
, uint32_t offset
)
609 anv_batch_emit(batch
, GEN8_PIPE_CONTROL
,
610 .DestinationAddressType
= DAT_PPGTT
,
611 .PostSyncOperation
= WritePSDepthCount
,
612 .Address
= { bo
, offset
}); /* FIXME: This is only lower 32 bits */
615 void gen8_CmdBeginQuery(
616 VkCmdBuffer cmdBuffer
,
617 VkQueryPool queryPool
,
619 VkQueryControlFlags flags
)
621 ANV_FROM_HANDLE(anv_cmd_buffer
, cmd_buffer
, cmdBuffer
);
622 ANV_FROM_HANDLE(anv_query_pool
, pool
, queryPool
);
624 switch (pool
->type
) {
625 case VK_QUERY_TYPE_OCCLUSION
:
626 emit_ps_depth_count(&cmd_buffer
->batch
, &pool
->bo
,
627 slot
* sizeof(struct anv_query_pool_slot
));
630 case VK_QUERY_TYPE_PIPELINE_STATISTICS
:
636 void gen8_CmdEndQuery(
637 VkCmdBuffer cmdBuffer
,
638 VkQueryPool queryPool
,
641 ANV_FROM_HANDLE(anv_cmd_buffer
, cmd_buffer
, cmdBuffer
);
642 ANV_FROM_HANDLE(anv_query_pool
, pool
, queryPool
);
644 switch (pool
->type
) {
645 case VK_QUERY_TYPE_OCCLUSION
:
646 emit_ps_depth_count(&cmd_buffer
->batch
, &pool
->bo
,
647 slot
* sizeof(struct anv_query_pool_slot
) + 8);
650 case VK_QUERY_TYPE_PIPELINE_STATISTICS
:
656 #define TIMESTAMP 0x2358
658 void gen8_CmdWriteTimestamp(
659 VkCmdBuffer cmdBuffer
,
660 VkTimestampType timestampType
,
662 VkDeviceSize destOffset
)
664 ANV_FROM_HANDLE(anv_cmd_buffer
, cmd_buffer
, cmdBuffer
);
665 ANV_FROM_HANDLE(anv_buffer
, buffer
, destBuffer
);
666 struct anv_bo
*bo
= buffer
->bo
;
668 switch (timestampType
) {
669 case VK_TIMESTAMP_TYPE_TOP
:
670 anv_batch_emit(&cmd_buffer
->batch
, GEN8_MI_STORE_REGISTER_MEM
,
671 .RegisterAddress
= TIMESTAMP
,
672 .MemoryAddress
= { bo
, buffer
->offset
+ destOffset
});
673 anv_batch_emit(&cmd_buffer
->batch
, GEN8_MI_STORE_REGISTER_MEM
,
674 .RegisterAddress
= TIMESTAMP
+ 4,
675 .MemoryAddress
= { bo
, buffer
->offset
+ destOffset
+ 4 });
678 case VK_TIMESTAMP_TYPE_BOTTOM
:
679 anv_batch_emit(&cmd_buffer
->batch
, GEN8_PIPE_CONTROL
,
680 .DestinationAddressType
= DAT_PPGTT
,
681 .PostSyncOperation
= WriteTimestamp
,
682 .Address
= /* FIXME: This is only lower 32 bits */
683 { bo
, buffer
->offset
+ destOffset
});
691 #define alu_opcode(v) __gen_field((v), 20, 31)
692 #define alu_operand1(v) __gen_field((v), 10, 19)
693 #define alu_operand2(v) __gen_field((v), 0, 9)
694 #define alu(opcode, operand1, operand2) \
695 alu_opcode(opcode) | alu_operand1(operand1) | alu_operand2(operand2)
697 #define OPCODE_NOOP 0x000
698 #define OPCODE_LOAD 0x080
699 #define OPCODE_LOADINV 0x480
700 #define OPCODE_LOAD0 0x081
701 #define OPCODE_LOAD1 0x481
702 #define OPCODE_ADD 0x100
703 #define OPCODE_SUB 0x101
704 #define OPCODE_AND 0x102
705 #define OPCODE_OR 0x103
706 #define OPCODE_XOR 0x104
707 #define OPCODE_STORE 0x180
708 #define OPCODE_STOREINV 0x580
710 #define OPERAND_R0 0x00
711 #define OPERAND_R1 0x01
712 #define OPERAND_R2 0x02
713 #define OPERAND_R3 0x03
714 #define OPERAND_R4 0x04
715 #define OPERAND_SRCA 0x20
716 #define OPERAND_SRCB 0x21
717 #define OPERAND_ACCU 0x31
718 #define OPERAND_ZF 0x32
719 #define OPERAND_CF 0x33
721 #define CS_GPR(n) (0x2600 + (n) * 8)
724 emit_load_alu_reg_u64(struct anv_batch
*batch
, uint32_t reg
,
725 struct anv_bo
*bo
, uint32_t offset
)
727 anv_batch_emit(batch
, GEN8_MI_LOAD_REGISTER_MEM
,
728 .RegisterAddress
= reg
,
729 .MemoryAddress
= { bo
, offset
});
730 anv_batch_emit(batch
, GEN8_MI_LOAD_REGISTER_MEM
,
731 .RegisterAddress
= reg
+ 4,
732 .MemoryAddress
= { bo
, offset
+ 4 });
735 void gen8_CmdCopyQueryPoolResults(
736 VkCmdBuffer cmdBuffer
,
737 VkQueryPool queryPool
,
741 VkDeviceSize destOffset
,
742 VkDeviceSize destStride
,
743 VkQueryResultFlags flags
)
745 ANV_FROM_HANDLE(anv_cmd_buffer
, cmd_buffer
, cmdBuffer
);
746 ANV_FROM_HANDLE(anv_query_pool
, pool
, queryPool
);
747 ANV_FROM_HANDLE(anv_buffer
, buffer
, destBuffer
);
748 uint32_t slot_offset
, dst_offset
;
750 if (flags
& VK_QUERY_RESULT_WITH_AVAILABILITY_BIT
) {
751 /* Where is the availabilty info supposed to go? */
752 anv_finishme("VK_QUERY_RESULT_WITH_AVAILABILITY_BIT");
756 assert(pool
->type
== VK_QUERY_TYPE_OCCLUSION
);
758 /* FIXME: If we're not waiting, should we just do this on the CPU? */
759 if (flags
& VK_QUERY_RESULT_WAIT_BIT
)
760 anv_batch_emit(&cmd_buffer
->batch
, GEN8_PIPE_CONTROL
,
761 .CommandStreamerStallEnable
= true,
762 .StallAtPixelScoreboard
= true);
764 dst_offset
= buffer
->offset
+ destOffset
;
765 for (uint32_t i
= 0; i
< queryCount
; i
++) {
767 slot_offset
= (startQuery
+ i
) * sizeof(struct anv_query_pool_slot
);
769 emit_load_alu_reg_u64(&cmd_buffer
->batch
, CS_GPR(0), &pool
->bo
, slot_offset
);
770 emit_load_alu_reg_u64(&cmd_buffer
->batch
, CS_GPR(1), &pool
->bo
, slot_offset
+ 8);
772 /* FIXME: We need to clamp the result for 32 bit. */
774 uint32_t *dw
= anv_batch_emitn(&cmd_buffer
->batch
, 5, GEN8_MI_MATH
);
775 dw
[1] = alu(OPCODE_LOAD
, OPERAND_SRCA
, OPERAND_R1
);
776 dw
[2] = alu(OPCODE_LOAD
, OPERAND_SRCB
, OPERAND_R0
);
777 dw
[3] = alu(OPCODE_SUB
, 0, 0);
778 dw
[4] = alu(OPCODE_STORE
, OPERAND_R2
, OPERAND_ACCU
);
780 anv_batch_emit(&cmd_buffer
->batch
, GEN8_MI_STORE_REGISTER_MEM
,
781 .RegisterAddress
= CS_GPR(2),
782 /* FIXME: This is only lower 32 bits */
783 .MemoryAddress
= { buffer
->bo
, dst_offset
});
785 if (flags
& VK_QUERY_RESULT_64_BIT
)
786 anv_batch_emit(&cmd_buffer
->batch
, GEN8_MI_STORE_REGISTER_MEM
,
787 .RegisterAddress
= CS_GPR(2) + 4,
788 /* FIXME: This is only lower 32 bits */
789 .MemoryAddress
= { buffer
->bo
, dst_offset
+ 4 });
791 dst_offset
+= destStride
;
796 gen8_cmd_buffer_emit_state_base_address(struct anv_cmd_buffer
*cmd_buffer
)
798 struct anv_device
*device
= cmd_buffer
->device
;
799 struct anv_bo
*scratch_bo
= NULL
;
801 cmd_buffer
->state
.scratch_size
=
802 anv_block_pool_size(&device
->scratch_block_pool
);
803 if (cmd_buffer
->state
.scratch_size
> 0)
804 scratch_bo
= &device
->scratch_block_pool
.bo
;
806 anv_batch_emit(&cmd_buffer
->batch
, GEN8_STATE_BASE_ADDRESS
,
807 .GeneralStateBaseAddress
= { scratch_bo
, 0 },
808 .GeneralStateMemoryObjectControlState
= GEN8_MOCS
,
809 .GeneralStateBaseAddressModifyEnable
= true,
810 .GeneralStateBufferSize
= 0xfffff,
811 .GeneralStateBufferSizeModifyEnable
= true,
813 .SurfaceStateBaseAddress
= { anv_cmd_buffer_current_surface_bo(cmd_buffer
), 0 },
814 .SurfaceStateMemoryObjectControlState
= GEN8_MOCS
,
815 .SurfaceStateBaseAddressModifyEnable
= true,
817 .DynamicStateBaseAddress
= { &device
->dynamic_state_block_pool
.bo
, 0 },
818 .DynamicStateMemoryObjectControlState
= GEN8_MOCS
,
819 .DynamicStateBaseAddressModifyEnable
= true,
820 .DynamicStateBufferSize
= 0xfffff,
821 .DynamicStateBufferSizeModifyEnable
= true,
823 .IndirectObjectBaseAddress
= { NULL
, 0 },
824 .IndirectObjectMemoryObjectControlState
= GEN8_MOCS
,
825 .IndirectObjectBaseAddressModifyEnable
= true,
826 .IndirectObjectBufferSize
= 0xfffff,
827 .IndirectObjectBufferSizeModifyEnable
= true,
829 .InstructionBaseAddress
= { &device
->instruction_block_pool
.bo
, 0 },
830 .InstructionMemoryObjectControlState
= GEN8_MOCS
,
831 .InstructionBaseAddressModifyEnable
= true,
832 .InstructionBufferSize
= 0xfffff,
833 .InstructionBuffersizeModifyEnable
= true);
835 /* After re-setting the surface state base address, we have to do some
836 * cache flusing so that the sampler engine will pick up the new
837 * SURFACE_STATE objects and binding tables. From the Broadwell PRM,
838 * Shared Function > 3D Sampler > State > State Caching (page 96):
840 * Coherency with system memory in the state cache, like the texture
841 * cache is handled partially by software. It is expected that the
842 * command stream or shader will issue Cache Flush operation or
843 * Cache_Flush sampler message to ensure that the L1 cache remains
844 * coherent with system memory.
848 * Whenever the value of the Dynamic_State_Base_Addr,
849 * Surface_State_Base_Addr are altered, the L1 state cache must be
850 * invalidated to ensure the new surface or sampler state is fetched
851 * from system memory.
853 * The PIPE_CONTROL command has a "State Cache Invalidation Enable" bit
854 * which, according the PIPE_CONTROL instruction documentation in the
857 * Setting this bit is independent of any other bit in this packet.
858 * This bit controls the invalidation of the L1 and L2 state caches
859 * at the top of the pipe i.e. at the parsing time.
861 * Unfortunately, experimentation seems to indicate that state cache
862 * invalidation through a PIPE_CONTROL does nothing whatsoever in
863 * regards to surface state and binding tables. In stead, it seems that
864 * invalidating the texture cache is what is actually needed.
866 * XXX: As far as we have been able to determine through
867 * experimentation, shows that flush the texture cache appears to be
868 * sufficient. The theory here is that all of the sampling/rendering
869 * units cache the binding table in the texture cache. However, we have
870 * yet to be able to actually confirm this.
872 anv_batch_emit(&cmd_buffer
->batch
, GEN8_PIPE_CONTROL
,
873 .TextureCacheInvalidationEnable
= true);
876 void gen8_CmdPipelineBarrier(
877 VkCmdBuffer cmdBuffer
,
878 VkPipelineStageFlags srcStageMask
,
879 VkPipelineStageFlags destStageMask
,
881 uint32_t memBarrierCount
,
882 const void* const* ppMemBarriers
)
884 ANV_FROM_HANDLE(anv_cmd_buffer
, cmd_buffer
, cmdBuffer
);
887 struct GEN8_PIPE_CONTROL cmd
= {
888 GEN8_PIPE_CONTROL_header
,
889 .PostSyncOperation
= NoWrite
,
892 /* XXX: I think waitEvent is a no-op on our HW. We should verify that. */
894 if (anv_clear_mask(&srcStageMask
, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT
)) {
895 /* This is just what PIPE_CONTROL does */
898 if (anv_clear_mask(&srcStageMask
,
899 VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT
|
900 VK_PIPELINE_STAGE_VERTEX_INPUT_BIT
|
901 VK_PIPELINE_STAGE_VERTEX_SHADER_BIT
|
902 VK_PIPELINE_STAGE_TESS_CONTROL_SHADER_BIT
|
903 VK_PIPELINE_STAGE_TESS_EVALUATION_SHADER_BIT
|
904 VK_PIPELINE_STAGE_GEOMETRY_SHADER_BIT
|
905 VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT
|
906 VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT
|
907 VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT
|
908 VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT
)) {
909 cmd
.StallAtPixelScoreboard
= true;
913 if (anv_clear_mask(&srcStageMask
,
914 VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT
|
915 VK_PIPELINE_STAGE_TRANSFER_BIT
|
916 VK_PIPELINE_STAGE_TRANSITION_BIT
)) {
917 cmd
.CommandStreamerStallEnable
= true;
920 if (anv_clear_mask(&srcStageMask
, VK_PIPELINE_STAGE_HOST_BIT
)) {
921 anv_finishme("VK_PIPE_EVENT_CPU_SIGNAL_BIT");
924 /* On our hardware, all stages will wait for execution as needed. */
927 /* We checked all known VkPipeEventFlags. */
928 anv_assert(srcStageMask
== 0);
930 /* XXX: Right now, we're really dumb and just flush whatever categories
931 * the app asks for. One of these days we may make this a bit better
932 * but right now that's all the hardware allows for in most areas.
934 VkMemoryOutputFlags out_flags
= 0;
935 VkMemoryInputFlags in_flags
= 0;
937 for (uint32_t i
= 0; i
< memBarrierCount
; i
++) {
938 const struct anv_common
*common
= ppMemBarriers
[i
];
939 switch (common
->sType
) {
940 case VK_STRUCTURE_TYPE_MEMORY_BARRIER
: {
941 ANV_COMMON_TO_STRUCT(VkMemoryBarrier
, barrier
, common
);
942 out_flags
|= barrier
->outputMask
;
943 in_flags
|= barrier
->inputMask
;
946 case VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER
: {
947 ANV_COMMON_TO_STRUCT(VkBufferMemoryBarrier
, barrier
, common
);
948 out_flags
|= barrier
->outputMask
;
949 in_flags
|= barrier
->inputMask
;
952 case VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER
: {
953 ANV_COMMON_TO_STRUCT(VkImageMemoryBarrier
, barrier
, common
);
954 out_flags
|= barrier
->outputMask
;
955 in_flags
|= barrier
->inputMask
;
959 unreachable("Invalid memory barrier type");
963 for_each_bit(b
, out_flags
) {
964 switch ((VkMemoryOutputFlags
)(1 << b
)) {
965 case VK_MEMORY_OUTPUT_HOST_WRITE_BIT
:
966 break; /* FIXME: Little-core systems */
967 case VK_MEMORY_OUTPUT_SHADER_WRITE_BIT
:
968 cmd
.DCFlushEnable
= true;
970 case VK_MEMORY_OUTPUT_COLOR_ATTACHMENT_BIT
:
971 cmd
.RenderTargetCacheFlushEnable
= true;
973 case VK_MEMORY_OUTPUT_DEPTH_STENCIL_ATTACHMENT_BIT
:
974 cmd
.DepthCacheFlushEnable
= true;
976 case VK_MEMORY_OUTPUT_TRANSFER_BIT
:
977 cmd
.RenderTargetCacheFlushEnable
= true;
978 cmd
.DepthCacheFlushEnable
= true;
981 unreachable("Invalid memory output flag");
985 for_each_bit(b
, out_flags
) {
986 switch ((VkMemoryInputFlags
)(1 << b
)) {
987 case VK_MEMORY_INPUT_HOST_READ_BIT
:
988 break; /* FIXME: Little-core systems */
989 case VK_MEMORY_INPUT_INDIRECT_COMMAND_BIT
:
990 case VK_MEMORY_INPUT_INDEX_FETCH_BIT
:
991 case VK_MEMORY_INPUT_VERTEX_ATTRIBUTE_FETCH_BIT
:
992 cmd
.VFCacheInvalidationEnable
= true;
994 case VK_MEMORY_INPUT_UNIFORM_READ_BIT
:
995 cmd
.ConstantCacheInvalidationEnable
= true;
997 case VK_MEMORY_INPUT_SHADER_READ_BIT
:
998 cmd
.DCFlushEnable
= true;
999 cmd
.TextureCacheInvalidationEnable
= true;
1001 case VK_MEMORY_INPUT_COLOR_ATTACHMENT_BIT
:
1002 case VK_MEMORY_INPUT_DEPTH_STENCIL_ATTACHMENT_BIT
:
1003 break; /* XXX: Hunh? */
1004 case VK_MEMORY_INPUT_TRANSFER_BIT
:
1005 cmd
.TextureCacheInvalidationEnable
= true;
1010 dw
= anv_batch_emit_dwords(&cmd_buffer
->batch
, GEN8_PIPE_CONTROL_length
);
1011 GEN8_PIPE_CONTROL_pack(&cmd_buffer
->batch
, dw
, &cmd
);