2 * Copyright © 2015 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
30 #include "anv_private.h"
32 /** \file anv_cmd_buffer.c
34 * This file contains all of the stuff for emitting commands into a command
35 * buffer. This includes implementations of most of the vkCmd*
36 * entrypoints. This file is concerned entirely with state emission and
37 * not with the command buffer data structure itself. As far as this file
38 * is concerned, most of anv_cmd_buffer is magic.
42 anv_cmd_state_init(struct anv_cmd_state
*state
)
44 state
->rs_state
= NULL
;
45 state
->vp_state
= NULL
;
46 state
->cb_state
= NULL
;
47 state
->ds_state
= NULL
;
48 memset(&state
->state_vf
, 0, sizeof(state
->state_vf
));
49 memset(&state
->descriptors
, 0, sizeof(state
->descriptors
));
53 state
->descriptors_dirty
= 0;
54 state
->pipeline
= NULL
;
55 state
->vp_state
= NULL
;
56 state
->rs_state
= NULL
;
57 state
->ds_state
= NULL
;
60 VkResult
anv_CreateCommandBuffer(
62 const VkCmdBufferCreateInfo
* pCreateInfo
,
63 VkCmdBuffer
* pCmdBuffer
)
65 ANV_FROM_HANDLE(anv_device
, device
, _device
);
66 ANV_FROM_HANDLE(anv_cmd_pool
, pool
, pCreateInfo
->cmdPool
);
67 struct anv_cmd_buffer
*cmd_buffer
;
70 cmd_buffer
= anv_device_alloc(device
, sizeof(*cmd_buffer
), 8,
71 VK_SYSTEM_ALLOC_TYPE_API_OBJECT
);
72 if (cmd_buffer
== NULL
)
73 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY
);
75 cmd_buffer
->device
= device
;
77 result
= anv_cmd_buffer_init_batch_bo_chain(cmd_buffer
);
78 if (result
!= VK_SUCCESS
)
81 anv_state_stream_init(&cmd_buffer
->surface_state_stream
,
82 &device
->surface_state_block_pool
);
83 anv_state_stream_init(&cmd_buffer
->dynamic_state_stream
,
84 &device
->dynamic_state_block_pool
);
86 cmd_buffer
->level
= pCreateInfo
->level
;
87 cmd_buffer
->opt_flags
= 0;
89 anv_cmd_state_init(&cmd_buffer
->state
);
92 list_addtail(&cmd_buffer
->pool_link
, &pool
->cmd_buffers
);
94 /* Init the pool_link so we can safefly call list_del when we destroy
97 list_inithead(&cmd_buffer
->pool_link
);
100 *pCmdBuffer
= anv_cmd_buffer_to_handle(cmd_buffer
);
104 fail
: anv_device_free(device
, cmd_buffer
);
109 VkResult
anv_DestroyCommandBuffer(
111 VkCmdBuffer _cmd_buffer
)
113 ANV_FROM_HANDLE(anv_device
, device
, _device
);
114 ANV_FROM_HANDLE(anv_cmd_buffer
, cmd_buffer
, _cmd_buffer
);
116 list_del(&cmd_buffer
->pool_link
);
118 anv_cmd_buffer_fini_batch_bo_chain(cmd_buffer
);
120 anv_state_stream_finish(&cmd_buffer
->surface_state_stream
);
121 anv_state_stream_finish(&cmd_buffer
->dynamic_state_stream
);
122 anv_device_free(device
, cmd_buffer
);
127 VkResult
anv_ResetCommandBuffer(
128 VkCmdBuffer cmdBuffer
,
129 VkCmdBufferResetFlags flags
)
131 ANV_FROM_HANDLE(anv_cmd_buffer
, cmd_buffer
, cmdBuffer
);
133 anv_cmd_buffer_reset_batch_bo_chain(cmd_buffer
);
135 anv_cmd_state_init(&cmd_buffer
->state
);
141 anv_cmd_buffer_emit_state_base_address(struct anv_cmd_buffer
*cmd_buffer
)
143 struct anv_device
*device
= cmd_buffer
->device
;
144 struct anv_bo
*scratch_bo
= NULL
;
146 cmd_buffer
->state
.scratch_size
=
147 anv_block_pool_size(&device
->scratch_block_pool
);
148 if (cmd_buffer
->state
.scratch_size
> 0)
149 scratch_bo
= &device
->scratch_block_pool
.bo
;
151 anv_batch_emit(&cmd_buffer
->batch
, GEN8_STATE_BASE_ADDRESS
,
152 .GeneralStateBaseAddress
= { scratch_bo
, 0 },
153 .GeneralStateMemoryObjectControlState
= GEN8_MOCS
,
154 .GeneralStateBaseAddressModifyEnable
= true,
155 .GeneralStateBufferSize
= 0xfffff,
156 .GeneralStateBufferSizeModifyEnable
= true,
158 .SurfaceStateBaseAddress
= { anv_cmd_buffer_current_surface_bo(cmd_buffer
), 0 },
159 .SurfaceStateMemoryObjectControlState
= GEN8_MOCS
,
160 .SurfaceStateBaseAddressModifyEnable
= true,
162 .DynamicStateBaseAddress
= { &device
->dynamic_state_block_pool
.bo
, 0 },
163 .DynamicStateMemoryObjectControlState
= GEN8_MOCS
,
164 .DynamicStateBaseAddressModifyEnable
= true,
165 .DynamicStateBufferSize
= 0xfffff,
166 .DynamicStateBufferSizeModifyEnable
= true,
168 .IndirectObjectBaseAddress
= { NULL
, 0 },
169 .IndirectObjectMemoryObjectControlState
= GEN8_MOCS
,
170 .IndirectObjectBaseAddressModifyEnable
= true,
171 .IndirectObjectBufferSize
= 0xfffff,
172 .IndirectObjectBufferSizeModifyEnable
= true,
174 .InstructionBaseAddress
= { &device
->instruction_block_pool
.bo
, 0 },
175 .InstructionMemoryObjectControlState
= GEN8_MOCS
,
176 .InstructionBaseAddressModifyEnable
= true,
177 .InstructionBufferSize
= 0xfffff,
178 .InstructionBuffersizeModifyEnable
= true);
180 /* After re-setting the surface state base address, we have to do some
181 * cache flusing so that the sampler engine will pick up the new
182 * SURFACE_STATE objects and binding tables. From the Broadwell PRM,
183 * Shared Function > 3D Sampler > State > State Caching (page 96):
185 * Coherency with system memory in the state cache, like the texture
186 * cache is handled partially by software. It is expected that the
187 * command stream or shader will issue Cache Flush operation or
188 * Cache_Flush sampler message to ensure that the L1 cache remains
189 * coherent with system memory.
193 * Whenever the value of the Dynamic_State_Base_Addr,
194 * Surface_State_Base_Addr are altered, the L1 state cache must be
195 * invalidated to ensure the new surface or sampler state is fetched
196 * from system memory.
198 * The PIPE_CONTROL command has a "State Cache Invalidation Enable" bit
199 * which, according the PIPE_CONTROL instruction documentation in the
202 * Setting this bit is independent of any other bit in this packet.
203 * This bit controls the invalidation of the L1 and L2 state caches
204 * at the top of the pipe i.e. at the parsing time.
206 * Unfortunately, experimentation seems to indicate that state cache
207 * invalidation through a PIPE_CONTROL does nothing whatsoever in
208 * regards to surface state and binding tables. In stead, it seems that
209 * invalidating the texture cache is what is actually needed.
211 * XXX: As far as we have been able to determine through
212 * experimentation, shows that flush the texture cache appears to be
213 * sufficient. The theory here is that all of the sampling/rendering
214 * units cache the binding table in the texture cache. However, we have
215 * yet to be able to actually confirm this.
217 anv_batch_emit(&cmd_buffer
->batch
, GEN8_PIPE_CONTROL
,
218 .TextureCacheInvalidationEnable
= true);
221 VkResult
anv_BeginCommandBuffer(
222 VkCmdBuffer cmdBuffer
,
223 const VkCmdBufferBeginInfo
* pBeginInfo
)
225 ANV_FROM_HANDLE(anv_cmd_buffer
, cmd_buffer
, cmdBuffer
);
227 cmd_buffer
->opt_flags
= pBeginInfo
->flags
;
229 if (cmd_buffer
->level
== VK_CMD_BUFFER_LEVEL_SECONDARY
) {
230 cmd_buffer
->state
.framebuffer
=
231 anv_framebuffer_from_handle(pBeginInfo
->framebuffer
);
232 cmd_buffer
->state
.pass
=
233 anv_render_pass_from_handle(pBeginInfo
->renderPass
);
235 /* FIXME: We shouldn't be starting on the first subpass */
236 anv_cmd_buffer_begin_subpass(cmd_buffer
,
237 &cmd_buffer
->state
.pass
->subpasses
[0]);
240 anv_cmd_buffer_emit_state_base_address(cmd_buffer
);
241 cmd_buffer
->state
.current_pipeline
= UINT32_MAX
;
246 VkResult
anv_EndCommandBuffer(
247 VkCmdBuffer cmdBuffer
)
249 ANV_FROM_HANDLE(anv_cmd_buffer
, cmd_buffer
, cmdBuffer
);
250 struct anv_device
*device
= cmd_buffer
->device
;
252 anv_cmd_buffer_end_batch_buffer(cmd_buffer
);
254 if (cmd_buffer
->level
== VK_CMD_BUFFER_LEVEL_PRIMARY
) {
255 /* The algorithm used to compute the validate list is not threadsafe as
256 * it uses the bo->index field. We have to lock the device around it.
257 * Fortunately, the chances for contention here are probably very low.
259 pthread_mutex_lock(&device
->mutex
);
260 anv_cmd_buffer_prepare_execbuf(cmd_buffer
);
261 pthread_mutex_unlock(&device
->mutex
);
267 void anv_CmdBindPipeline(
268 VkCmdBuffer cmdBuffer
,
269 VkPipelineBindPoint pipelineBindPoint
,
270 VkPipeline _pipeline
)
272 ANV_FROM_HANDLE(anv_cmd_buffer
, cmd_buffer
, cmdBuffer
);
273 ANV_FROM_HANDLE(anv_pipeline
, pipeline
, _pipeline
);
275 switch (pipelineBindPoint
) {
276 case VK_PIPELINE_BIND_POINT_COMPUTE
:
277 cmd_buffer
->state
.compute_pipeline
= pipeline
;
278 cmd_buffer
->state
.compute_dirty
|= ANV_CMD_BUFFER_PIPELINE_DIRTY
;
281 case VK_PIPELINE_BIND_POINT_GRAPHICS
:
282 cmd_buffer
->state
.pipeline
= pipeline
;
283 cmd_buffer
->state
.vb_dirty
|= pipeline
->vb_used
;
284 cmd_buffer
->state
.dirty
|= ANV_CMD_BUFFER_PIPELINE_DIRTY
;
288 assert(!"invalid bind point");
293 void anv_CmdBindDynamicViewportState(
294 VkCmdBuffer cmdBuffer
,
295 VkDynamicViewportState dynamicViewportState
)
297 ANV_FROM_HANDLE(anv_cmd_buffer
, cmd_buffer
, cmdBuffer
);
298 ANV_FROM_HANDLE(anv_dynamic_vp_state
, vp_state
, dynamicViewportState
);
300 cmd_buffer
->state
.vp_state
= vp_state
;
301 cmd_buffer
->state
.dirty
|= ANV_CMD_BUFFER_VP_DIRTY
;
304 void anv_CmdBindDynamicRasterState(
305 VkCmdBuffer cmdBuffer
,
306 VkDynamicRasterState dynamicRasterState
)
308 ANV_FROM_HANDLE(anv_cmd_buffer
, cmd_buffer
, cmdBuffer
);
309 ANV_FROM_HANDLE(anv_dynamic_rs_state
, rs_state
, dynamicRasterState
);
311 cmd_buffer
->state
.rs_state
= rs_state
;
312 cmd_buffer
->state
.dirty
|= ANV_CMD_BUFFER_RS_DIRTY
;
315 void anv_CmdBindDynamicColorBlendState(
316 VkCmdBuffer cmdBuffer
,
317 VkDynamicColorBlendState dynamicColorBlendState
)
319 ANV_FROM_HANDLE(anv_cmd_buffer
, cmd_buffer
, cmdBuffer
);
320 ANV_FROM_HANDLE(anv_dynamic_cb_state
, cb_state
, dynamicColorBlendState
);
322 cmd_buffer
->state
.cb_state
= cb_state
;
323 cmd_buffer
->state
.dirty
|= ANV_CMD_BUFFER_CB_DIRTY
;
326 void anv_CmdBindDynamicDepthStencilState(
327 VkCmdBuffer cmdBuffer
,
328 VkDynamicDepthStencilState dynamicDepthStencilState
)
330 ANV_FROM_HANDLE(anv_cmd_buffer
, cmd_buffer
, cmdBuffer
);
331 ANV_FROM_HANDLE(anv_dynamic_ds_state
, ds_state
, dynamicDepthStencilState
);
333 cmd_buffer
->state
.ds_state
= ds_state
;
334 cmd_buffer
->state
.dirty
|= ANV_CMD_BUFFER_DS_DIRTY
;
337 void anv_CmdBindDescriptorSets(
338 VkCmdBuffer cmdBuffer
,
339 VkPipelineBindPoint pipelineBindPoint
,
340 VkPipelineLayout _layout
,
343 const VkDescriptorSet
* pDescriptorSets
,
344 uint32_t dynamicOffsetCount
,
345 const uint32_t* pDynamicOffsets
)
347 ANV_FROM_HANDLE(anv_cmd_buffer
, cmd_buffer
, cmdBuffer
);
348 ANV_FROM_HANDLE(anv_pipeline_layout
, layout
, _layout
);
349 struct anv_descriptor_set_layout
*set_layout
;
351 assert(firstSet
+ setCount
< MAX_SETS
);
353 uint32_t dynamic_slot
= 0;
354 for (uint32_t i
= 0; i
< setCount
; i
++) {
355 ANV_FROM_HANDLE(anv_descriptor_set
, set
, pDescriptorSets
[i
]);
356 set_layout
= layout
->set
[firstSet
+ i
].layout
;
358 cmd_buffer
->state
.descriptors
[firstSet
+ i
].set
= set
;
360 assert(set_layout
->num_dynamic_buffers
<
361 ARRAY_SIZE(cmd_buffer
->state
.descriptors
[0].dynamic_offsets
));
362 memcpy(cmd_buffer
->state
.descriptors
[firstSet
+ i
].dynamic_offsets
,
363 pDynamicOffsets
+ dynamic_slot
,
364 set_layout
->num_dynamic_buffers
* sizeof(*pDynamicOffsets
));
366 cmd_buffer
->state
.descriptors_dirty
|= set_layout
->shader_stages
;
368 dynamic_slot
+= set_layout
->num_dynamic_buffers
;
372 void anv_CmdBindIndexBuffer(
373 VkCmdBuffer cmdBuffer
,
376 VkIndexType indexType
)
378 ANV_FROM_HANDLE(anv_cmd_buffer
, cmd_buffer
, cmdBuffer
);
379 ANV_FROM_HANDLE(anv_buffer
, buffer
, _buffer
);
381 static const uint32_t vk_to_gen_index_type
[] = {
382 [VK_INDEX_TYPE_UINT16
] = INDEX_WORD
,
383 [VK_INDEX_TYPE_UINT32
] = INDEX_DWORD
,
386 struct GEN8_3DSTATE_VF vf
= {
387 GEN8_3DSTATE_VF_header
,
388 .CutIndex
= (indexType
== VK_INDEX_TYPE_UINT16
) ? UINT16_MAX
: UINT32_MAX
,
390 GEN8_3DSTATE_VF_pack(NULL
, cmd_buffer
->state
.state_vf
, &vf
);
392 cmd_buffer
->state
.dirty
|= ANV_CMD_BUFFER_INDEX_BUFFER_DIRTY
;
394 anv_batch_emit(&cmd_buffer
->batch
, GEN8_3DSTATE_INDEX_BUFFER
,
395 .IndexFormat
= vk_to_gen_index_type
[indexType
],
396 .MemoryObjectControlState
= GEN8_MOCS
,
397 .BufferStartingAddress
= { buffer
->bo
, buffer
->offset
+ offset
},
398 .BufferSize
= buffer
->size
- offset
);
401 void anv_CmdBindVertexBuffers(
402 VkCmdBuffer cmdBuffer
,
403 uint32_t startBinding
,
404 uint32_t bindingCount
,
405 const VkBuffer
* pBuffers
,
406 const VkDeviceSize
* pOffsets
)
408 ANV_FROM_HANDLE(anv_cmd_buffer
, cmd_buffer
, cmdBuffer
);
409 struct anv_vertex_binding
*vb
= cmd_buffer
->state
.vertex_bindings
;
411 /* We have to defer setting up vertex buffer since we need the buffer
412 * stride from the pipeline. */
414 assert(startBinding
+ bindingCount
< MAX_VBS
);
415 for (uint32_t i
= 0; i
< bindingCount
; i
++) {
416 vb
[startBinding
+ i
].buffer
= anv_buffer_from_handle(pBuffers
[i
]);
417 vb
[startBinding
+ i
].offset
= pOffsets
[i
];
418 cmd_buffer
->state
.vb_dirty
|= 1 << (startBinding
+ i
);
423 cmd_buffer_emit_binding_table(struct anv_cmd_buffer
*cmd_buffer
,
424 unsigned stage
, struct anv_state
*bt_state
)
426 struct anv_framebuffer
*fb
= cmd_buffer
->state
.framebuffer
;
427 struct anv_subpass
*subpass
= cmd_buffer
->state
.subpass
;
428 struct anv_pipeline_layout
*layout
;
429 uint32_t attachments
, bias
, size
;
431 if (stage
== VK_SHADER_STAGE_COMPUTE
)
432 layout
= cmd_buffer
->state
.compute_pipeline
->layout
;
434 layout
= cmd_buffer
->state
.pipeline
->layout
;
436 if (stage
== VK_SHADER_STAGE_FRAGMENT
) {
438 attachments
= subpass
->color_count
;
444 /* This is a little awkward: layout can be NULL but we still have to
445 * allocate and set a binding table for the PS stage for render
447 uint32_t surface_count
= layout
? layout
->stage
[stage
].surface_count
: 0;
449 if (attachments
+ surface_count
== 0)
452 size
= (bias
+ surface_count
) * sizeof(uint32_t);
453 *bt_state
= anv_cmd_buffer_alloc_surface_state(cmd_buffer
, size
, 32);
454 uint32_t *bt_map
= bt_state
->map
;
456 if (bt_state
->map
== NULL
)
457 return VK_ERROR_OUT_OF_DEVICE_MEMORY
;
459 /* This is highly annoying. The Vulkan spec puts the depth-stencil
460 * attachments in with the color attachments. Unfortunately, thanks to
461 * other aspects of the API, we cana't really saparate them before this
462 * point. Therefore, we have to walk all of the attachments but only
463 * put the color attachments into the binding table.
465 for (uint32_t a
= 0; a
< attachments
; a
++) {
466 const struct anv_attachment_view
*attachment
=
467 fb
->attachments
[subpass
->color_attachments
[a
]];
469 assert(attachment
->attachment_type
== ANV_ATTACHMENT_VIEW_TYPE_COLOR
);
470 const struct anv_color_attachment_view
*view
=
471 (const struct anv_color_attachment_view
*)attachment
;
473 struct anv_state state
=
474 anv_cmd_buffer_alloc_surface_state(cmd_buffer
, 64, 64);
476 if (state
.map
== NULL
)
477 return VK_ERROR_OUT_OF_DEVICE_MEMORY
;
479 memcpy(state
.map
, view
->view
.surface_state
.map
, 64);
481 /* The address goes in dwords 8 and 9 of the SURFACE_STATE */
482 *(uint64_t *)(state
.map
+ 8 * 4) =
483 anv_reloc_list_add(anv_cmd_buffer_current_surface_relocs(cmd_buffer
),
485 state
.offset
+ 8 * 4,
486 view
->view
.bo
, view
->view
.offset
);
488 bt_map
[a
] = state
.offset
;
494 for (uint32_t set
= 0; set
< layout
->num_sets
; set
++) {
495 struct anv_descriptor_set_binding
*d
= &cmd_buffer
->state
.descriptors
[set
];
496 struct anv_descriptor_set_layout
*set_layout
= layout
->set
[set
].layout
;
497 struct anv_descriptor_slot
*surface_slots
=
498 set_layout
->stage
[stage
].surface_start
;
500 uint32_t start
= bias
+ layout
->set
[set
].surface_start
[stage
];
502 for (uint32_t b
= 0; b
< set_layout
->stage
[stage
].surface_count
; b
++) {
503 struct anv_surface_view
*view
=
504 d
->set
->descriptors
[surface_slots
[b
].index
].view
;
509 struct anv_state state
=
510 anv_cmd_buffer_alloc_surface_state(cmd_buffer
, 64, 64);
512 if (state
.map
== NULL
)
513 return VK_ERROR_OUT_OF_DEVICE_MEMORY
;
516 if (surface_slots
[b
].dynamic_slot
>= 0) {
517 uint32_t dynamic_offset
=
518 d
->dynamic_offsets
[surface_slots
[b
].dynamic_slot
];
520 offset
= view
->offset
+ dynamic_offset
;
521 anv_fill_buffer_surface_state(state
.map
, view
->format
, offset
,
522 view
->range
- dynamic_offset
);
524 offset
= view
->offset
;
525 memcpy(state
.map
, view
->surface_state
.map
, 64);
528 /* The address goes in dwords 8 and 9 of the SURFACE_STATE */
529 *(uint64_t *)(state
.map
+ 8 * 4) =
530 anv_reloc_list_add(anv_cmd_buffer_current_surface_relocs(cmd_buffer
),
532 state
.offset
+ 8 * 4,
535 bt_map
[start
+ b
] = state
.offset
;
543 cmd_buffer_emit_samplers(struct anv_cmd_buffer
*cmd_buffer
,
544 unsigned stage
, struct anv_state
*state
)
546 struct anv_pipeline_layout
*layout
;
547 uint32_t sampler_count
;
549 if (stage
== VK_SHADER_STAGE_COMPUTE
)
550 layout
= cmd_buffer
->state
.compute_pipeline
->layout
;
552 layout
= cmd_buffer
->state
.pipeline
->layout
;
554 sampler_count
= layout
? layout
->stage
[stage
].sampler_count
: 0;
555 if (sampler_count
== 0)
558 uint32_t size
= sampler_count
* 16;
559 *state
= anv_cmd_buffer_alloc_dynamic_state(cmd_buffer
, size
, 32);
561 if (state
->map
== NULL
)
562 return VK_ERROR_OUT_OF_DEVICE_MEMORY
;
564 for (uint32_t set
= 0; set
< layout
->num_sets
; set
++) {
565 struct anv_descriptor_set_binding
*d
= &cmd_buffer
->state
.descriptors
[set
];
566 struct anv_descriptor_set_layout
*set_layout
= layout
->set
[set
].layout
;
567 struct anv_descriptor_slot
*sampler_slots
=
568 set_layout
->stage
[stage
].sampler_start
;
570 uint32_t start
= layout
->set
[set
].sampler_start
[stage
];
572 for (uint32_t b
= 0; b
< set_layout
->stage
[stage
].sampler_count
; b
++) {
573 struct anv_sampler
*sampler
=
574 d
->set
->descriptors
[sampler_slots
[b
].index
].sampler
;
579 memcpy(state
->map
+ (start
+ b
) * 16,
580 sampler
->state
, sizeof(sampler
->state
));
588 flush_descriptor_set(struct anv_cmd_buffer
*cmd_buffer
, uint32_t stage
)
590 struct anv_state surfaces
= { 0, }, samplers
= { 0, };
593 result
= cmd_buffer_emit_samplers(cmd_buffer
, stage
, &samplers
);
594 if (result
!= VK_SUCCESS
)
596 result
= cmd_buffer_emit_binding_table(cmd_buffer
, stage
, &surfaces
);
597 if (result
!= VK_SUCCESS
)
600 static const uint32_t sampler_state_opcodes
[] = {
601 [VK_SHADER_STAGE_VERTEX
] = 43,
602 [VK_SHADER_STAGE_TESS_CONTROL
] = 44, /* HS */
603 [VK_SHADER_STAGE_TESS_EVALUATION
] = 45, /* DS */
604 [VK_SHADER_STAGE_GEOMETRY
] = 46,
605 [VK_SHADER_STAGE_FRAGMENT
] = 47,
606 [VK_SHADER_STAGE_COMPUTE
] = 0,
609 static const uint32_t binding_table_opcodes
[] = {
610 [VK_SHADER_STAGE_VERTEX
] = 38,
611 [VK_SHADER_STAGE_TESS_CONTROL
] = 39,
612 [VK_SHADER_STAGE_TESS_EVALUATION
] = 40,
613 [VK_SHADER_STAGE_GEOMETRY
] = 41,
614 [VK_SHADER_STAGE_FRAGMENT
] = 42,
615 [VK_SHADER_STAGE_COMPUTE
] = 0,
618 if (samplers
.alloc_size
> 0) {
619 anv_batch_emit(&cmd_buffer
->batch
,
620 GEN8_3DSTATE_SAMPLER_STATE_POINTERS_VS
,
621 ._3DCommandSubOpcode
= sampler_state_opcodes
[stage
],
622 .PointertoVSSamplerState
= samplers
.offset
);
625 if (surfaces
.alloc_size
> 0) {
626 anv_batch_emit(&cmd_buffer
->batch
,
627 GEN8_3DSTATE_BINDING_TABLE_POINTERS_VS
,
628 ._3DCommandSubOpcode
= binding_table_opcodes
[stage
],
629 .PointertoVSBindingTable
= surfaces
.offset
);
636 flush_descriptor_sets(struct anv_cmd_buffer
*cmd_buffer
)
638 uint32_t s
, dirty
= cmd_buffer
->state
.descriptors_dirty
&
639 cmd_buffer
->state
.pipeline
->active_stages
;
641 VkResult result
= VK_SUCCESS
;
642 for_each_bit(s
, dirty
) {
643 result
= flush_descriptor_set(cmd_buffer
, s
);
644 if (result
!= VK_SUCCESS
)
648 if (result
!= VK_SUCCESS
) {
649 assert(result
== VK_ERROR_OUT_OF_DEVICE_MEMORY
);
651 result
= anv_cmd_buffer_new_surface_state_bo(cmd_buffer
);
652 assert(result
== VK_SUCCESS
);
654 /* Re-emit state base addresses so we get the new surface state base
655 * address before we start emitting binding tables etc.
657 anv_cmd_buffer_emit_state_base_address(cmd_buffer
);
659 /* Re-emit all active binding tables */
660 for_each_bit(s
, cmd_buffer
->state
.pipeline
->active_stages
) {
661 result
= flush_descriptor_set(cmd_buffer
, s
);
663 /* It had better succeed this time */
664 assert(result
== VK_SUCCESS
);
668 cmd_buffer
->state
.descriptors_dirty
&= ~cmd_buffer
->state
.pipeline
->active_stages
;
671 static struct anv_state
672 anv_cmd_buffer_emit_dynamic(struct anv_cmd_buffer
*cmd_buffer
,
673 uint32_t *a
, uint32_t dwords
, uint32_t alignment
)
675 struct anv_state state
;
677 state
= anv_cmd_buffer_alloc_dynamic_state(cmd_buffer
,
678 dwords
* 4, alignment
);
679 memcpy(state
.map
, a
, dwords
* 4);
681 VG(VALGRIND_CHECK_MEM_IS_DEFINED(state
.map
, dwords
* 4));
686 static struct anv_state
687 anv_cmd_buffer_merge_dynamic(struct anv_cmd_buffer
*cmd_buffer
,
688 uint32_t *a
, uint32_t *b
,
689 uint32_t dwords
, uint32_t alignment
)
691 struct anv_state state
;
694 state
= anv_cmd_buffer_alloc_dynamic_state(cmd_buffer
,
695 dwords
* 4, alignment
);
697 for (uint32_t i
= 0; i
< dwords
; i
++)
700 VG(VALGRIND_CHECK_MEM_IS_DEFINED(p
, dwords
* 4));
706 flush_compute_descriptor_set(struct anv_cmd_buffer
*cmd_buffer
)
708 struct anv_device
*device
= cmd_buffer
->device
;
709 struct anv_pipeline
*pipeline
= cmd_buffer
->state
.compute_pipeline
;
710 struct anv_state surfaces
= { 0, }, samplers
= { 0, };
713 result
= cmd_buffer_emit_samplers(cmd_buffer
,
714 VK_SHADER_STAGE_COMPUTE
, &samplers
);
715 if (result
!= VK_SUCCESS
)
717 result
= cmd_buffer_emit_binding_table(cmd_buffer
,
718 VK_SHADER_STAGE_COMPUTE
, &surfaces
);
719 if (result
!= VK_SUCCESS
)
722 struct GEN8_INTERFACE_DESCRIPTOR_DATA desc
= {
723 .KernelStartPointer
= pipeline
->cs_simd
,
724 .KernelStartPointerHigh
= 0,
725 .BindingTablePointer
= surfaces
.offset
,
726 .BindingTableEntryCount
= 0,
727 .SamplerStatePointer
= samplers
.offset
,
729 .NumberofThreadsinGPGPUThreadGroup
= 0 /* FIXME: Really? */
732 uint32_t size
= GEN8_INTERFACE_DESCRIPTOR_DATA_length
* sizeof(uint32_t);
733 struct anv_state state
=
734 anv_state_pool_alloc(&device
->dynamic_state_pool
, size
, 64);
736 GEN8_INTERFACE_DESCRIPTOR_DATA_pack(NULL
, state
.map
, &desc
);
738 anv_batch_emit(&cmd_buffer
->batch
, GEN8_MEDIA_INTERFACE_DESCRIPTOR_LOAD
,
739 .InterfaceDescriptorTotalLength
= size
,
740 .InterfaceDescriptorDataStartAddress
= state
.offset
);
746 anv_cmd_buffer_flush_compute_state(struct anv_cmd_buffer
*cmd_buffer
)
748 struct anv_pipeline
*pipeline
= cmd_buffer
->state
.compute_pipeline
;
751 assert(pipeline
->active_stages
== VK_SHADER_STAGE_COMPUTE_BIT
);
753 if (cmd_buffer
->state
.current_pipeline
!= GPGPU
) {
754 anv_batch_emit(&cmd_buffer
->batch
, GEN8_PIPELINE_SELECT
,
755 .PipelineSelection
= GPGPU
);
756 cmd_buffer
->state
.current_pipeline
= GPGPU
;
759 if (cmd_buffer
->state
.compute_dirty
& ANV_CMD_BUFFER_PIPELINE_DIRTY
)
760 anv_batch_emit_batch(&cmd_buffer
->batch
, &pipeline
->batch
);
762 if ((cmd_buffer
->state
.descriptors_dirty
& VK_SHADER_STAGE_COMPUTE_BIT
) ||
763 (cmd_buffer
->state
.compute_dirty
& ANV_CMD_BUFFER_PIPELINE_DIRTY
)) {
764 result
= flush_compute_descriptor_set(cmd_buffer
);
765 assert(result
== VK_SUCCESS
);
766 cmd_buffer
->state
.descriptors_dirty
&= ~VK_SHADER_STAGE_COMPUTE
;
769 cmd_buffer
->state
.compute_dirty
= 0;
773 anv_cmd_buffer_flush_state(struct anv_cmd_buffer
*cmd_buffer
)
775 struct anv_pipeline
*pipeline
= cmd_buffer
->state
.pipeline
;
778 uint32_t vb_emit
= cmd_buffer
->state
.vb_dirty
& pipeline
->vb_used
;
780 assert((pipeline
->active_stages
& VK_SHADER_STAGE_COMPUTE_BIT
) == 0);
782 if (cmd_buffer
->state
.current_pipeline
!= _3D
) {
783 anv_batch_emit(&cmd_buffer
->batch
, GEN8_PIPELINE_SELECT
,
784 .PipelineSelection
= _3D
);
785 cmd_buffer
->state
.current_pipeline
= _3D
;
789 const uint32_t num_buffers
= __builtin_popcount(vb_emit
);
790 const uint32_t num_dwords
= 1 + num_buffers
* 4;
792 p
= anv_batch_emitn(&cmd_buffer
->batch
, num_dwords
,
793 GEN8_3DSTATE_VERTEX_BUFFERS
);
795 for_each_bit(vb
, vb_emit
) {
796 struct anv_buffer
*buffer
= cmd_buffer
->state
.vertex_bindings
[vb
].buffer
;
797 uint32_t offset
= cmd_buffer
->state
.vertex_bindings
[vb
].offset
;
799 struct GEN8_VERTEX_BUFFER_STATE state
= {
800 .VertexBufferIndex
= vb
,
801 .MemoryObjectControlState
= GEN8_MOCS
,
802 .AddressModifyEnable
= true,
803 .BufferPitch
= pipeline
->binding_stride
[vb
],
804 .BufferStartingAddress
= { buffer
->bo
, buffer
->offset
+ offset
},
805 .BufferSize
= buffer
->size
- offset
808 GEN8_VERTEX_BUFFER_STATE_pack(&cmd_buffer
->batch
, &p
[1 + i
* 4], &state
);
813 if (cmd_buffer
->state
.dirty
& ANV_CMD_BUFFER_PIPELINE_DIRTY
) {
814 /* If somebody compiled a pipeline after starting a command buffer the
815 * scratch bo may have grown since we started this cmd buffer (and
816 * emitted STATE_BASE_ADDRESS). If we're binding that pipeline now,
817 * reemit STATE_BASE_ADDRESS so that we use the bigger scratch bo. */
818 if (cmd_buffer
->state
.scratch_size
< pipeline
->total_scratch
)
819 anv_cmd_buffer_emit_state_base_address(cmd_buffer
);
821 anv_batch_emit_batch(&cmd_buffer
->batch
, &pipeline
->batch
);
824 if (cmd_buffer
->state
.descriptors_dirty
)
825 flush_descriptor_sets(cmd_buffer
);
827 if (cmd_buffer
->state
.dirty
& ANV_CMD_BUFFER_VP_DIRTY
) {
828 struct anv_dynamic_vp_state
*vp_state
= cmd_buffer
->state
.vp_state
;
829 anv_batch_emit(&cmd_buffer
->batch
, GEN8_3DSTATE_SCISSOR_STATE_POINTERS
,
830 .ScissorRectPointer
= vp_state
->scissor
.offset
);
831 anv_batch_emit(&cmd_buffer
->batch
, GEN8_3DSTATE_VIEWPORT_STATE_POINTERS_CC
,
832 .CCViewportPointer
= vp_state
->cc_vp
.offset
);
833 anv_batch_emit(&cmd_buffer
->batch
, GEN8_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP
,
834 .SFClipViewportPointer
= vp_state
->sf_clip_vp
.offset
);
837 if (cmd_buffer
->state
.dirty
& (ANV_CMD_BUFFER_PIPELINE_DIRTY
|
838 ANV_CMD_BUFFER_RS_DIRTY
)) {
839 anv_batch_emit_merge(&cmd_buffer
->batch
,
840 cmd_buffer
->state
.rs_state
->state_sf
,
842 anv_batch_emit_merge(&cmd_buffer
->batch
,
843 cmd_buffer
->state
.rs_state
->state_raster
,
844 pipeline
->state_raster
);
847 if (cmd_buffer
->state
.ds_state
&&
848 (cmd_buffer
->state
.dirty
& (ANV_CMD_BUFFER_PIPELINE_DIRTY
|
849 ANV_CMD_BUFFER_DS_DIRTY
))) {
850 anv_batch_emit_merge(&cmd_buffer
->batch
,
851 cmd_buffer
->state
.ds_state
->state_wm_depth_stencil
,
852 pipeline
->state_wm_depth_stencil
);
855 if (cmd_buffer
->state
.dirty
& (ANV_CMD_BUFFER_CB_DIRTY
|
856 ANV_CMD_BUFFER_DS_DIRTY
)) {
857 struct anv_state state
;
858 if (cmd_buffer
->state
.ds_state
== NULL
)
859 state
= anv_cmd_buffer_emit_dynamic(cmd_buffer
,
860 cmd_buffer
->state
.cb_state
->state_color_calc
,
861 GEN8_COLOR_CALC_STATE_length
, 64);
862 else if (cmd_buffer
->state
.cb_state
== NULL
)
863 state
= anv_cmd_buffer_emit_dynamic(cmd_buffer
,
864 cmd_buffer
->state
.ds_state
->state_color_calc
,
865 GEN8_COLOR_CALC_STATE_length
, 64);
867 state
= anv_cmd_buffer_merge_dynamic(cmd_buffer
,
868 cmd_buffer
->state
.ds_state
->state_color_calc
,
869 cmd_buffer
->state
.cb_state
->state_color_calc
,
870 GEN8_COLOR_CALC_STATE_length
, 64);
872 anv_batch_emit(&cmd_buffer
->batch
,
873 GEN8_3DSTATE_CC_STATE_POINTERS
,
874 .ColorCalcStatePointer
= state
.offset
,
875 .ColorCalcStatePointerValid
= true);
878 if (cmd_buffer
->state
.dirty
& (ANV_CMD_BUFFER_PIPELINE_DIRTY
|
879 ANV_CMD_BUFFER_INDEX_BUFFER_DIRTY
)) {
880 anv_batch_emit_merge(&cmd_buffer
->batch
,
881 cmd_buffer
->state
.state_vf
, pipeline
->state_vf
);
884 cmd_buffer
->state
.vb_dirty
&= ~vb_emit
;
885 cmd_buffer
->state
.dirty
= 0;
889 VkCmdBuffer cmdBuffer
,
890 uint32_t firstVertex
,
891 uint32_t vertexCount
,
892 uint32_t firstInstance
,
893 uint32_t instanceCount
)
895 ANV_FROM_HANDLE(anv_cmd_buffer
, cmd_buffer
, cmdBuffer
);
897 anv_cmd_buffer_flush_state(cmd_buffer
);
899 anv_batch_emit(&cmd_buffer
->batch
, GEN8_3DPRIMITIVE
,
900 .VertexAccessType
= SEQUENTIAL
,
901 .VertexCountPerInstance
= vertexCount
,
902 .StartVertexLocation
= firstVertex
,
903 .InstanceCount
= instanceCount
,
904 .StartInstanceLocation
= firstInstance
,
905 .BaseVertexLocation
= 0);
908 void anv_CmdDrawIndexed(
909 VkCmdBuffer cmdBuffer
,
912 int32_t vertexOffset
,
913 uint32_t firstInstance
,
914 uint32_t instanceCount
)
916 ANV_FROM_HANDLE(anv_cmd_buffer
, cmd_buffer
, cmdBuffer
);
918 anv_cmd_buffer_flush_state(cmd_buffer
);
920 anv_batch_emit(&cmd_buffer
->batch
, GEN8_3DPRIMITIVE
,
921 .VertexAccessType
= RANDOM
,
922 .VertexCountPerInstance
= indexCount
,
923 .StartVertexLocation
= firstIndex
,
924 .InstanceCount
= instanceCount
,
925 .StartInstanceLocation
= firstInstance
,
926 .BaseVertexLocation
= vertexOffset
);
930 anv_batch_lrm(struct anv_batch
*batch
,
931 uint32_t reg
, struct anv_bo
*bo
, uint32_t offset
)
933 anv_batch_emit(batch
, GEN8_MI_LOAD_REGISTER_MEM
,
934 .RegisterAddress
= reg
,
935 .MemoryAddress
= { bo
, offset
});
939 anv_batch_lri(struct anv_batch
*batch
, uint32_t reg
, uint32_t imm
)
941 anv_batch_emit(batch
, GEN8_MI_LOAD_REGISTER_IMM
,
942 .RegisterOffset
= reg
,
946 /* Auto-Draw / Indirect Registers */
947 #define GEN7_3DPRIM_END_OFFSET 0x2420
948 #define GEN7_3DPRIM_START_VERTEX 0x2430
949 #define GEN7_3DPRIM_VERTEX_COUNT 0x2434
950 #define GEN7_3DPRIM_INSTANCE_COUNT 0x2438
951 #define GEN7_3DPRIM_START_INSTANCE 0x243C
952 #define GEN7_3DPRIM_BASE_VERTEX 0x2440
954 void anv_CmdDrawIndirect(
955 VkCmdBuffer cmdBuffer
,
961 ANV_FROM_HANDLE(anv_cmd_buffer
, cmd_buffer
, cmdBuffer
);
962 ANV_FROM_HANDLE(anv_buffer
, buffer
, _buffer
);
963 struct anv_bo
*bo
= buffer
->bo
;
964 uint32_t bo_offset
= buffer
->offset
+ offset
;
966 anv_cmd_buffer_flush_state(cmd_buffer
);
968 anv_batch_lrm(&cmd_buffer
->batch
, GEN7_3DPRIM_VERTEX_COUNT
, bo
, bo_offset
);
969 anv_batch_lrm(&cmd_buffer
->batch
, GEN7_3DPRIM_INSTANCE_COUNT
, bo
, bo_offset
+ 4);
970 anv_batch_lrm(&cmd_buffer
->batch
, GEN7_3DPRIM_START_VERTEX
, bo
, bo_offset
+ 8);
971 anv_batch_lrm(&cmd_buffer
->batch
, GEN7_3DPRIM_START_INSTANCE
, bo
, bo_offset
+ 12);
972 anv_batch_lri(&cmd_buffer
->batch
, GEN7_3DPRIM_BASE_VERTEX
, 0);
974 anv_batch_emit(&cmd_buffer
->batch
, GEN8_3DPRIMITIVE
,
975 .IndirectParameterEnable
= true,
976 .VertexAccessType
= SEQUENTIAL
);
979 void anv_CmdDrawIndexedIndirect(
980 VkCmdBuffer cmdBuffer
,
986 ANV_FROM_HANDLE(anv_cmd_buffer
, cmd_buffer
, cmdBuffer
);
987 ANV_FROM_HANDLE(anv_buffer
, buffer
, _buffer
);
988 struct anv_bo
*bo
= buffer
->bo
;
989 uint32_t bo_offset
= buffer
->offset
+ offset
;
991 anv_cmd_buffer_flush_state(cmd_buffer
);
993 anv_batch_lrm(&cmd_buffer
->batch
, GEN7_3DPRIM_VERTEX_COUNT
, bo
, bo_offset
);
994 anv_batch_lrm(&cmd_buffer
->batch
, GEN7_3DPRIM_INSTANCE_COUNT
, bo
, bo_offset
+ 4);
995 anv_batch_lrm(&cmd_buffer
->batch
, GEN7_3DPRIM_START_VERTEX
, bo
, bo_offset
+ 8);
996 anv_batch_lrm(&cmd_buffer
->batch
, GEN7_3DPRIM_BASE_VERTEX
, bo
, bo_offset
+ 12);
997 anv_batch_lrm(&cmd_buffer
->batch
, GEN7_3DPRIM_START_INSTANCE
, bo
, bo_offset
+ 16);
999 anv_batch_emit(&cmd_buffer
->batch
, GEN8_3DPRIMITIVE
,
1000 .IndirectParameterEnable
= true,
1001 .VertexAccessType
= RANDOM
);
1004 void anv_CmdDispatch(
1005 VkCmdBuffer cmdBuffer
,
1010 ANV_FROM_HANDLE(anv_cmd_buffer
, cmd_buffer
, cmdBuffer
);
1011 struct anv_pipeline
*pipeline
= cmd_buffer
->state
.compute_pipeline
;
1012 struct brw_cs_prog_data
*prog_data
= &pipeline
->cs_prog_data
;
1014 anv_cmd_buffer_flush_compute_state(cmd_buffer
);
1016 anv_batch_emit(&cmd_buffer
->batch
, GEN8_GPGPU_WALKER
,
1017 .SIMDSize
= prog_data
->simd_size
/ 16,
1018 .ThreadDepthCounterMaximum
= 0,
1019 .ThreadHeightCounterMaximum
= 0,
1020 .ThreadWidthCounterMaximum
= pipeline
->cs_thread_width_max
,
1021 .ThreadGroupIDXDimension
= x
,
1022 .ThreadGroupIDYDimension
= y
,
1023 .ThreadGroupIDZDimension
= z
,
1024 .RightExecutionMask
= pipeline
->cs_right_mask
,
1025 .BottomExecutionMask
= 0xffffffff);
1027 anv_batch_emit(&cmd_buffer
->batch
, GEN8_MEDIA_STATE_FLUSH
);
1030 #define GPGPU_DISPATCHDIMX 0x2500
1031 #define GPGPU_DISPATCHDIMY 0x2504
1032 #define GPGPU_DISPATCHDIMZ 0x2508
1034 void anv_CmdDispatchIndirect(
1035 VkCmdBuffer cmdBuffer
,
1037 VkDeviceSize offset
)
1039 ANV_FROM_HANDLE(anv_cmd_buffer
, cmd_buffer
, cmdBuffer
);
1040 ANV_FROM_HANDLE(anv_buffer
, buffer
, _buffer
);
1041 struct anv_pipeline
*pipeline
= cmd_buffer
->state
.compute_pipeline
;
1042 struct brw_cs_prog_data
*prog_data
= &pipeline
->cs_prog_data
;
1043 struct anv_bo
*bo
= buffer
->bo
;
1044 uint32_t bo_offset
= buffer
->offset
+ offset
;
1046 anv_cmd_buffer_flush_compute_state(cmd_buffer
);
1048 anv_batch_lrm(&cmd_buffer
->batch
, GPGPU_DISPATCHDIMX
, bo
, bo_offset
);
1049 anv_batch_lrm(&cmd_buffer
->batch
, GPGPU_DISPATCHDIMY
, bo
, bo_offset
+ 4);
1050 anv_batch_lrm(&cmd_buffer
->batch
, GPGPU_DISPATCHDIMZ
, bo
, bo_offset
+ 8);
1052 anv_batch_emit(&cmd_buffer
->batch
, GEN8_GPGPU_WALKER
,
1053 .IndirectParameterEnable
= true,
1054 .SIMDSize
= prog_data
->simd_size
/ 16,
1055 .ThreadDepthCounterMaximum
= 0,
1056 .ThreadHeightCounterMaximum
= 0,
1057 .ThreadWidthCounterMaximum
= pipeline
->cs_thread_width_max
,
1058 .RightExecutionMask
= pipeline
->cs_right_mask
,
1059 .BottomExecutionMask
= 0xffffffff);
1061 anv_batch_emit(&cmd_buffer
->batch
, GEN8_MEDIA_STATE_FLUSH
);
1064 void anv_CmdSetEvent(
1065 VkCmdBuffer cmdBuffer
,
1067 VkPipelineStageFlags stageMask
)
1072 void anv_CmdResetEvent(
1073 VkCmdBuffer cmdBuffer
,
1075 VkPipelineStageFlags stageMask
)
1080 void anv_CmdWaitEvents(
1081 VkCmdBuffer cmdBuffer
,
1082 uint32_t eventCount
,
1083 const VkEvent
* pEvents
,
1084 VkPipelineStageFlags srcStageMask
,
1085 VkPipelineStageFlags destStageMask
,
1086 uint32_t memBarrierCount
,
1087 const void* const* ppMemBarriers
)
1092 void anv_CmdPipelineBarrier(
1093 VkCmdBuffer cmdBuffer
,
1094 VkPipelineStageFlags srcStageMask
,
1095 VkPipelineStageFlags destStageMask
,
1097 uint32_t memBarrierCount
,
1098 const void* const* ppMemBarriers
)
1100 ANV_FROM_HANDLE(anv_cmd_buffer
, cmd_buffer
, cmdBuffer
);
1103 struct GEN8_PIPE_CONTROL cmd
= {
1104 GEN8_PIPE_CONTROL_header
,
1105 .PostSyncOperation
= NoWrite
,
1108 /* XXX: I think waitEvent is a no-op on our HW. We should verify that. */
1110 if (anv_clear_mask(&srcStageMask
, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT
)) {
1111 /* This is just what PIPE_CONTROL does */
1114 if (anv_clear_mask(&srcStageMask
,
1115 VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT
|
1116 VK_PIPELINE_STAGE_VERTEX_INPUT_BIT
|
1117 VK_PIPELINE_STAGE_VERTEX_SHADER_BIT
|
1118 VK_PIPELINE_STAGE_TESS_CONTROL_SHADER_BIT
|
1119 VK_PIPELINE_STAGE_TESS_EVALUATION_SHADER_BIT
|
1120 VK_PIPELINE_STAGE_GEOMETRY_SHADER_BIT
|
1121 VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT
|
1122 VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT
|
1123 VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT
|
1124 VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT
)) {
1125 cmd
.StallAtPixelScoreboard
= true;
1129 if (anv_clear_mask(&srcStageMask
,
1130 VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT
|
1131 VK_PIPELINE_STAGE_TRANSFER_BIT
|
1132 VK_PIPELINE_STAGE_TRANSITION_BIT
)) {
1133 cmd
.CommandStreamerStallEnable
= true;
1136 if (anv_clear_mask(&srcStageMask
, VK_PIPELINE_STAGE_HOST_BIT
)) {
1137 anv_finishme("VK_PIPE_EVENT_CPU_SIGNAL_BIT");
1140 /* On our hardware, all stages will wait for execution as needed. */
1141 (void)destStageMask
;
1143 /* We checked all known VkPipeEventFlags. */
1144 anv_assert(srcStageMask
== 0);
1146 /* XXX: Right now, we're really dumb and just flush whatever categories
1147 * the app asks for. One of these days we may make this a bit better
1148 * but right now that's all the hardware allows for in most areas.
1150 VkMemoryOutputFlags out_flags
= 0;
1151 VkMemoryInputFlags in_flags
= 0;
1153 for (uint32_t i
= 0; i
< memBarrierCount
; i
++) {
1154 const struct anv_common
*common
= ppMemBarriers
[i
];
1155 switch (common
->sType
) {
1156 case VK_STRUCTURE_TYPE_MEMORY_BARRIER
: {
1157 ANV_COMMON_TO_STRUCT(VkMemoryBarrier
, barrier
, common
);
1158 out_flags
|= barrier
->outputMask
;
1159 in_flags
|= barrier
->inputMask
;
1162 case VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER
: {
1163 ANV_COMMON_TO_STRUCT(VkBufferMemoryBarrier
, barrier
, common
);
1164 out_flags
|= barrier
->outputMask
;
1165 in_flags
|= barrier
->inputMask
;
1168 case VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER
: {
1169 ANV_COMMON_TO_STRUCT(VkImageMemoryBarrier
, barrier
, common
);
1170 out_flags
|= barrier
->outputMask
;
1171 in_flags
|= barrier
->inputMask
;
1175 unreachable("Invalid memory barrier type");
1179 for_each_bit(b
, out_flags
) {
1180 switch ((VkMemoryOutputFlags
)(1 << b
)) {
1181 case VK_MEMORY_OUTPUT_HOST_WRITE_BIT
:
1182 break; /* FIXME: Little-core systems */
1183 case VK_MEMORY_OUTPUT_SHADER_WRITE_BIT
:
1184 cmd
.DCFlushEnable
= true;
1186 case VK_MEMORY_OUTPUT_COLOR_ATTACHMENT_BIT
:
1187 cmd
.RenderTargetCacheFlushEnable
= true;
1189 case VK_MEMORY_OUTPUT_DEPTH_STENCIL_ATTACHMENT_BIT
:
1190 cmd
.DepthCacheFlushEnable
= true;
1192 case VK_MEMORY_OUTPUT_TRANSFER_BIT
:
1193 cmd
.RenderTargetCacheFlushEnable
= true;
1194 cmd
.DepthCacheFlushEnable
= true;
1197 unreachable("Invalid memory output flag");
1201 for_each_bit(b
, out_flags
) {
1202 switch ((VkMemoryInputFlags
)(1 << b
)) {
1203 case VK_MEMORY_INPUT_HOST_READ_BIT
:
1204 break; /* FIXME: Little-core systems */
1205 case VK_MEMORY_INPUT_INDIRECT_COMMAND_BIT
:
1206 case VK_MEMORY_INPUT_INDEX_FETCH_BIT
:
1207 case VK_MEMORY_INPUT_VERTEX_ATTRIBUTE_FETCH_BIT
:
1208 cmd
.VFCacheInvalidationEnable
= true;
1210 case VK_MEMORY_INPUT_UNIFORM_READ_BIT
:
1211 cmd
.ConstantCacheInvalidationEnable
= true;
1213 case VK_MEMORY_INPUT_SHADER_READ_BIT
:
1214 cmd
.DCFlushEnable
= true;
1215 cmd
.TextureCacheInvalidationEnable
= true;
1217 case VK_MEMORY_INPUT_COLOR_ATTACHMENT_BIT
:
1218 case VK_MEMORY_INPUT_DEPTH_STENCIL_ATTACHMENT_BIT
:
1219 break; /* XXX: Hunh? */
1220 case VK_MEMORY_INPUT_TRANSFER_BIT
:
1221 cmd
.TextureCacheInvalidationEnable
= true;
1226 dw
= anv_batch_emit_dwords(&cmd_buffer
->batch
, GEN8_PIPE_CONTROL_length
);
1227 GEN8_PIPE_CONTROL_pack(&cmd_buffer
->batch
, dw
, &cmd
);
1230 void anv_CmdPushConstants(
1231 VkCmdBuffer cmdBuffer
,
1232 VkPipelineLayout layout
,
1233 VkShaderStageFlags stageFlags
,
1242 anv_cmd_buffer_emit_depth_stencil(struct anv_cmd_buffer
*cmd_buffer
)
1244 struct anv_subpass
*subpass
= cmd_buffer
->state
.subpass
;
1245 struct anv_framebuffer
*fb
= cmd_buffer
->state
.framebuffer
;
1246 const struct anv_depth_stencil_view
*view
;
1248 static const struct anv_depth_stencil_view null_view
=
1249 { .depth_format
= D16_UNORM
, .depth_stride
= 0, .stencil_stride
= 0 };
1251 if (subpass
->depth_stencil_attachment
!= VK_ATTACHMENT_UNUSED
) {
1252 const struct anv_attachment_view
*aview
=
1253 fb
->attachments
[subpass
->depth_stencil_attachment
];
1254 assert(aview
->attachment_type
== ANV_ATTACHMENT_VIEW_TYPE_DEPTH_STENCIL
);
1255 view
= (const struct anv_depth_stencil_view
*)aview
;
1260 /* FIXME: Implement the PMA stall W/A */
1261 /* FIXME: Width and Height are wrong */
1263 anv_batch_emit(&cmd_buffer
->batch
, GEN8_3DSTATE_DEPTH_BUFFER
,
1264 .SurfaceType
= SURFTYPE_2D
,
1265 .DepthWriteEnable
= view
->depth_stride
> 0,
1266 .StencilWriteEnable
= view
->stencil_stride
> 0,
1267 .HierarchicalDepthBufferEnable
= false,
1268 .SurfaceFormat
= view
->depth_format
,
1269 .SurfacePitch
= view
->depth_stride
> 0 ? view
->depth_stride
- 1 : 0,
1270 .SurfaceBaseAddress
= { view
->bo
, view
->depth_offset
},
1271 .Height
= cmd_buffer
->state
.framebuffer
->height
- 1,
1272 .Width
= cmd_buffer
->state
.framebuffer
->width
- 1,
1275 .MinimumArrayElement
= 0,
1276 .DepthBufferObjectControlState
= GEN8_MOCS
,
1277 .RenderTargetViewExtent
= 1 - 1,
1278 .SurfaceQPitch
= view
->depth_qpitch
>> 2);
1280 /* Disable hierarchial depth buffers. */
1281 anv_batch_emit(&cmd_buffer
->batch
, GEN8_3DSTATE_HIER_DEPTH_BUFFER
);
1283 anv_batch_emit(&cmd_buffer
->batch
, GEN8_3DSTATE_STENCIL_BUFFER
,
1284 .StencilBufferEnable
= view
->stencil_stride
> 0,
1285 .StencilBufferObjectControlState
= GEN8_MOCS
,
1286 .SurfacePitch
= view
->stencil_stride
> 0 ? view
->stencil_stride
- 1 : 0,
1287 .SurfaceBaseAddress
= { view
->bo
, view
->stencil_offset
},
1288 .SurfaceQPitch
= view
->stencil_qpitch
>> 2);
1290 /* Clear the clear params. */
1291 anv_batch_emit(&cmd_buffer
->batch
, GEN8_3DSTATE_CLEAR_PARAMS
);
1295 anv_cmd_buffer_begin_subpass(struct anv_cmd_buffer
*cmd_buffer
,
1296 struct anv_subpass
*subpass
)
1298 cmd_buffer
->state
.subpass
= subpass
;
1300 cmd_buffer
->state
.descriptors_dirty
|= VK_SHADER_STAGE_FRAGMENT_BIT
;
1302 anv_cmd_buffer_emit_depth_stencil(cmd_buffer
);
1305 void anv_CmdBeginRenderPass(
1306 VkCmdBuffer cmdBuffer
,
1307 const VkRenderPassBeginInfo
* pRenderPassBegin
,
1308 VkRenderPassContents contents
)
1310 ANV_FROM_HANDLE(anv_cmd_buffer
, cmd_buffer
, cmdBuffer
);
1311 ANV_FROM_HANDLE(anv_render_pass
, pass
, pRenderPassBegin
->renderPass
);
1312 ANV_FROM_HANDLE(anv_framebuffer
, framebuffer
, pRenderPassBegin
->framebuffer
);
1314 cmd_buffer
->state
.framebuffer
= framebuffer
;
1315 cmd_buffer
->state
.pass
= pass
;
1317 const VkRect2D
*render_area
= &pRenderPassBegin
->renderArea
;
1319 anv_batch_emit(&cmd_buffer
->batch
, GEN8_3DSTATE_DRAWING_RECTANGLE
,
1320 .ClippedDrawingRectangleYMin
= render_area
->offset
.y
,
1321 .ClippedDrawingRectangleXMin
= render_area
->offset
.x
,
1322 .ClippedDrawingRectangleYMax
=
1323 render_area
->offset
.y
+ render_area
->extent
.height
- 1,
1324 .ClippedDrawingRectangleXMax
=
1325 render_area
->offset
.x
+ render_area
->extent
.width
- 1,
1326 .DrawingRectangleOriginY
= 0,
1327 .DrawingRectangleOriginX
= 0);
1329 anv_cmd_buffer_clear_attachments(cmd_buffer
, pass
,
1330 pRenderPassBegin
->pAttachmentClearValues
);
1332 anv_cmd_buffer_begin_subpass(cmd_buffer
, pass
->subpasses
);
1335 void anv_CmdNextSubpass(
1336 VkCmdBuffer cmdBuffer
,
1337 VkRenderPassContents contents
)
1339 ANV_FROM_HANDLE(anv_cmd_buffer
, cmd_buffer
, cmdBuffer
);
1341 assert(cmd_buffer
->level
== VK_CMD_BUFFER_LEVEL_PRIMARY
);
1343 anv_cmd_buffer_begin_subpass(cmd_buffer
, cmd_buffer
->state
.subpass
+ 1);
1346 void anv_CmdEndRenderPass(
1347 VkCmdBuffer cmdBuffer
)
1349 ANV_FROM_HANDLE(anv_cmd_buffer
, cmd_buffer
, cmdBuffer
);
1351 /* Emit a flushing pipe control at the end of a pass. This is kind of a
1352 * hack but it ensures that render targets always actually get written.
1353 * Eventually, we should do flushing based on image format transitions
1354 * or something of that nature.
1356 anv_batch_emit(&cmd_buffer
->batch
, GEN8_PIPE_CONTROL
,
1357 .PostSyncOperation
= NoWrite
,
1358 .RenderTargetCacheFlushEnable
= true,
1359 .InstructionCacheInvalidateEnable
= true,
1360 .DepthCacheFlushEnable
= true,
1361 .VFCacheInvalidationEnable
= true,
1362 .TextureCacheInvalidationEnable
= true,
1363 .CommandStreamerStallEnable
= true);
1366 void anv_CmdExecuteCommands(
1367 VkCmdBuffer cmdBuffer
,
1368 uint32_t cmdBuffersCount
,
1369 const VkCmdBuffer
* pCmdBuffers
)
1371 ANV_FROM_HANDLE(anv_cmd_buffer
, primary
, cmdBuffer
);
1373 assert(primary
->level
== VK_CMD_BUFFER_LEVEL_PRIMARY
);
1375 anv_assert(primary
->state
.subpass
== &primary
->state
.pass
->subpasses
[0]);
1377 for (uint32_t i
= 0; i
< cmdBuffersCount
; i
++) {
1378 ANV_FROM_HANDLE(anv_cmd_buffer
, secondary
, pCmdBuffers
[i
]);
1380 assert(secondary
->level
== VK_CMD_BUFFER_LEVEL_SECONDARY
);
1382 anv_cmd_buffer_add_secondary(primary
, secondary
);
1386 VkResult
anv_CreateCommandPool(
1388 const VkCmdPoolCreateInfo
* pCreateInfo
,
1389 VkCmdPool
* pCmdPool
)
1391 ANV_FROM_HANDLE(anv_device
, device
, _device
);
1392 struct anv_cmd_pool
*pool
;
1394 pool
= anv_device_alloc(device
, sizeof(*pool
), 8,
1395 VK_SYSTEM_ALLOC_TYPE_API_OBJECT
);
1397 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY
);
1399 list_inithead(&pool
->cmd_buffers
);
1401 *pCmdPool
= anv_cmd_pool_to_handle(pool
);
1406 VkResult
anv_DestroyCommandPool(
1410 ANV_FROM_HANDLE(anv_device
, device
, _device
);
1411 ANV_FROM_HANDLE(anv_cmd_pool
, pool
, cmdPool
);
1413 anv_ResetCommandPool(_device
, cmdPool
, 0);
1415 anv_device_free(device
, pool
);
1420 VkResult
anv_ResetCommandPool(
1423 VkCmdPoolResetFlags flags
)
1425 ANV_FROM_HANDLE(anv_cmd_pool
, pool
, cmdPool
);
1427 list_for_each_entry_safe(struct anv_cmd_buffer
, cmd_buffer
,
1428 &pool
->cmd_buffers
, pool_link
) {
1429 anv_DestroyCommandBuffer(device
, anv_cmd_buffer_to_handle(cmd_buffer
));