2 * Copyright © 2015 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
30 #include "anv_private.h"
32 /** \file anv_cmd_buffer.c
34 * This file contains all of the stuff for emitting commands into a command
35 * buffer. This includes implementations of most of the vkCmd*
36 * entrypoints. This file is concerned entirely with state emission and
37 * not with the command buffer data structure itself. As far as this file
38 * is concerned, most of anv_cmd_buffer is magic.
42 anv_cmd_state_init(struct anv_cmd_state
*state
)
44 state
->rs_state
= NULL
;
45 state
->vp_state
= NULL
;
46 state
->cb_state
= NULL
;
47 state
->ds_state
= NULL
;
48 memset(&state
->state_vf
, 0, sizeof(state
->state_vf
));
49 memset(&state
->descriptors
, 0, sizeof(state
->descriptors
));
53 state
->descriptors_dirty
= 0;
54 state
->pipeline
= NULL
;
55 state
->vp_state
= NULL
;
56 state
->rs_state
= NULL
;
57 state
->ds_state
= NULL
;
60 VkResult
anv_CreateCommandBuffer(
62 const VkCmdBufferCreateInfo
* pCreateInfo
,
63 VkCmdBuffer
* pCmdBuffer
)
65 ANV_FROM_HANDLE(anv_device
, device
, _device
);
66 ANV_FROM_HANDLE(anv_cmd_pool
, pool
, pCreateInfo
->cmdPool
);
67 struct anv_cmd_buffer
*cmd_buffer
;
70 cmd_buffer
= anv_device_alloc(device
, sizeof(*cmd_buffer
), 8,
71 VK_SYSTEM_ALLOC_TYPE_API_OBJECT
);
72 if (cmd_buffer
== NULL
)
73 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY
);
75 cmd_buffer
->device
= device
;
77 result
= anv_cmd_buffer_init_batch_bo_chain(cmd_buffer
);
78 if (result
!= VK_SUCCESS
)
81 anv_state_stream_init(&cmd_buffer
->surface_state_stream
,
82 &device
->surface_state_block_pool
);
83 anv_state_stream_init(&cmd_buffer
->dynamic_state_stream
,
84 &device
->dynamic_state_block_pool
);
86 cmd_buffer
->level
= pCreateInfo
->level
;
87 cmd_buffer
->opt_flags
= 0;
89 anv_cmd_state_init(&cmd_buffer
->state
);
91 list_addtail(&cmd_buffer
->pool_link
, &pool
->cmd_buffers
);
93 *pCmdBuffer
= anv_cmd_buffer_to_handle(cmd_buffer
);
97 fail
: anv_device_free(device
, cmd_buffer
);
102 VkResult
anv_DestroyCommandBuffer(
104 VkCmdBuffer _cmd_buffer
)
106 ANV_FROM_HANDLE(anv_device
, device
, _device
);
107 ANV_FROM_HANDLE(anv_cmd_buffer
, cmd_buffer
, _cmd_buffer
);
109 list_del(&cmd_buffer
->pool_link
);
111 anv_cmd_buffer_fini_batch_bo_chain(cmd_buffer
);
113 anv_state_stream_finish(&cmd_buffer
->surface_state_stream
);
114 anv_state_stream_finish(&cmd_buffer
->dynamic_state_stream
);
115 anv_device_free(device
, cmd_buffer
);
120 VkResult
anv_ResetCommandBuffer(
121 VkCmdBuffer cmdBuffer
,
122 VkCmdBufferResetFlags flags
)
124 ANV_FROM_HANDLE(anv_cmd_buffer
, cmd_buffer
, cmdBuffer
);
126 anv_cmd_buffer_reset_batch_bo_chain(cmd_buffer
);
128 anv_cmd_state_init(&cmd_buffer
->state
);
134 anv_cmd_buffer_emit_state_base_address(struct anv_cmd_buffer
*cmd_buffer
)
136 struct anv_device
*device
= cmd_buffer
->device
;
137 struct anv_bo
*scratch_bo
= NULL
;
139 cmd_buffer
->state
.scratch_size
= device
->scratch_block_pool
.size
;
140 if (cmd_buffer
->state
.scratch_size
> 0)
141 scratch_bo
= &device
->scratch_block_pool
.bo
;
143 anv_batch_emit(&cmd_buffer
->batch
, GEN8_STATE_BASE_ADDRESS
,
144 .GeneralStateBaseAddress
= { scratch_bo
, 0 },
145 .GeneralStateMemoryObjectControlState
= GEN8_MOCS
,
146 .GeneralStateBaseAddressModifyEnable
= true,
147 .GeneralStateBufferSize
= 0xfffff,
148 .GeneralStateBufferSizeModifyEnable
= true,
150 .SurfaceStateBaseAddress
= { anv_cmd_buffer_current_surface_bo(cmd_buffer
), 0 },
151 .SurfaceStateMemoryObjectControlState
= GEN8_MOCS
,
152 .SurfaceStateBaseAddressModifyEnable
= true,
154 .DynamicStateBaseAddress
= { &device
->dynamic_state_block_pool
.bo
, 0 },
155 .DynamicStateMemoryObjectControlState
= GEN8_MOCS
,
156 .DynamicStateBaseAddressModifyEnable
= true,
157 .DynamicStateBufferSize
= 0xfffff,
158 .DynamicStateBufferSizeModifyEnable
= true,
160 .IndirectObjectBaseAddress
= { NULL
, 0 },
161 .IndirectObjectMemoryObjectControlState
= GEN8_MOCS
,
162 .IndirectObjectBaseAddressModifyEnable
= true,
163 .IndirectObjectBufferSize
= 0xfffff,
164 .IndirectObjectBufferSizeModifyEnable
= true,
166 .InstructionBaseAddress
= { &device
->instruction_block_pool
.bo
, 0 },
167 .InstructionMemoryObjectControlState
= GEN8_MOCS
,
168 .InstructionBaseAddressModifyEnable
= true,
169 .InstructionBufferSize
= 0xfffff,
170 .InstructionBuffersizeModifyEnable
= true);
172 /* After re-setting the surface state base address, we have to do some
173 * cache flusing so that the sampler engine will pick up the new
174 * SURFACE_STATE objects and binding tables. From the Broadwell PRM,
175 * Shared Function > 3D Sampler > State > State Caching (page 96):
177 * Coherency with system memory in the state cache, like the texture
178 * cache is handled partially by software. It is expected that the
179 * command stream or shader will issue Cache Flush operation or
180 * Cache_Flush sampler message to ensure that the L1 cache remains
181 * coherent with system memory.
185 * Whenever the value of the Dynamic_State_Base_Addr,
186 * Surface_State_Base_Addr are altered, the L1 state cache must be
187 * invalidated to ensure the new surface or sampler state is fetched
188 * from system memory.
190 * The PIPE_CONTROL command has a "State Cache Invalidation Enable" bit
191 * which, according the PIPE_CONTROL instruction documentation in the
194 * Setting this bit is independent of any other bit in this packet.
195 * This bit controls the invalidation of the L1 and L2 state caches
196 * at the top of the pipe i.e. at the parsing time.
198 * Unfortunately, experimentation seems to indicate that state cache
199 * invalidation through a PIPE_CONTROL does nothing whatsoever in
200 * regards to surface state and binding tables. In stead, it seems that
201 * invalidating the texture cache is what is actually needed.
203 * XXX: As far as we have been able to determine through
204 * experimentation, shows that flush the texture cache appears to be
205 * sufficient. The theory here is that all of the sampling/rendering
206 * units cache the binding table in the texture cache. However, we have
207 * yet to be able to actually confirm this.
209 anv_batch_emit(&cmd_buffer
->batch
, GEN8_PIPE_CONTROL
,
210 .TextureCacheInvalidationEnable
= true);
213 VkResult
anv_BeginCommandBuffer(
214 VkCmdBuffer cmdBuffer
,
215 const VkCmdBufferBeginInfo
* pBeginInfo
)
217 ANV_FROM_HANDLE(anv_cmd_buffer
, cmd_buffer
, cmdBuffer
);
219 cmd_buffer
->opt_flags
= pBeginInfo
->flags
;
221 if (cmd_buffer
->level
== VK_CMD_BUFFER_LEVEL_SECONDARY
) {
222 cmd_buffer
->state
.framebuffer
=
223 anv_framebuffer_from_handle(pBeginInfo
->framebuffer
);
224 cmd_buffer
->state
.pass
=
225 anv_render_pass_from_handle(pBeginInfo
->renderPass
);
227 /* FIXME: We shouldn't be starting on the first subpass */
228 anv_cmd_buffer_begin_subpass(cmd_buffer
,
229 &cmd_buffer
->state
.pass
->subpasses
[0]);
232 anv_cmd_buffer_emit_state_base_address(cmd_buffer
);
233 cmd_buffer
->state
.current_pipeline
= UINT32_MAX
;
238 VkResult
anv_EndCommandBuffer(
239 VkCmdBuffer cmdBuffer
)
241 ANV_FROM_HANDLE(anv_cmd_buffer
, cmd_buffer
, cmdBuffer
);
242 struct anv_device
*device
= cmd_buffer
->device
;
244 anv_cmd_buffer_end_batch_buffer(cmd_buffer
);
246 if (cmd_buffer
->level
== VK_CMD_BUFFER_LEVEL_PRIMARY
) {
247 /* The algorithm used to compute the validate list is not threadsafe as
248 * it uses the bo->index field. We have to lock the device around it.
249 * Fortunately, the chances for contention here are probably very low.
251 pthread_mutex_lock(&device
->mutex
);
252 anv_cmd_buffer_prepare_execbuf(cmd_buffer
);
253 pthread_mutex_unlock(&device
->mutex
);
259 void anv_CmdBindPipeline(
260 VkCmdBuffer cmdBuffer
,
261 VkPipelineBindPoint pipelineBindPoint
,
262 VkPipeline _pipeline
)
264 ANV_FROM_HANDLE(anv_cmd_buffer
, cmd_buffer
, cmdBuffer
);
265 ANV_FROM_HANDLE(anv_pipeline
, pipeline
, _pipeline
);
267 switch (pipelineBindPoint
) {
268 case VK_PIPELINE_BIND_POINT_COMPUTE
:
269 cmd_buffer
->state
.compute_pipeline
= pipeline
;
270 cmd_buffer
->state
.compute_dirty
|= ANV_CMD_BUFFER_PIPELINE_DIRTY
;
273 case VK_PIPELINE_BIND_POINT_GRAPHICS
:
274 cmd_buffer
->state
.pipeline
= pipeline
;
275 cmd_buffer
->state
.vb_dirty
|= pipeline
->vb_used
;
276 cmd_buffer
->state
.dirty
|= ANV_CMD_BUFFER_PIPELINE_DIRTY
;
280 assert(!"invalid bind point");
285 void anv_CmdBindDynamicViewportState(
286 VkCmdBuffer cmdBuffer
,
287 VkDynamicViewportState dynamicViewportState
)
289 ANV_FROM_HANDLE(anv_cmd_buffer
, cmd_buffer
, cmdBuffer
);
290 ANV_FROM_HANDLE(anv_dynamic_vp_state
, vp_state
, dynamicViewportState
);
292 cmd_buffer
->state
.vp_state
= vp_state
;
293 cmd_buffer
->state
.dirty
|= ANV_CMD_BUFFER_VP_DIRTY
;
296 void anv_CmdBindDynamicRasterState(
297 VkCmdBuffer cmdBuffer
,
298 VkDynamicRasterState dynamicRasterState
)
300 ANV_FROM_HANDLE(anv_cmd_buffer
, cmd_buffer
, cmdBuffer
);
301 ANV_FROM_HANDLE(anv_dynamic_rs_state
, rs_state
, dynamicRasterState
);
303 cmd_buffer
->state
.rs_state
= rs_state
;
304 cmd_buffer
->state
.dirty
|= ANV_CMD_BUFFER_RS_DIRTY
;
307 void anv_CmdBindDynamicColorBlendState(
308 VkCmdBuffer cmdBuffer
,
309 VkDynamicColorBlendState dynamicColorBlendState
)
311 ANV_FROM_HANDLE(anv_cmd_buffer
, cmd_buffer
, cmdBuffer
);
312 ANV_FROM_HANDLE(anv_dynamic_cb_state
, cb_state
, dynamicColorBlendState
);
314 cmd_buffer
->state
.cb_state
= cb_state
;
315 cmd_buffer
->state
.dirty
|= ANV_CMD_BUFFER_CB_DIRTY
;
318 void anv_CmdBindDynamicDepthStencilState(
319 VkCmdBuffer cmdBuffer
,
320 VkDynamicDepthStencilState dynamicDepthStencilState
)
322 ANV_FROM_HANDLE(anv_cmd_buffer
, cmd_buffer
, cmdBuffer
);
323 ANV_FROM_HANDLE(anv_dynamic_ds_state
, ds_state
, dynamicDepthStencilState
);
325 cmd_buffer
->state
.ds_state
= ds_state
;
326 cmd_buffer
->state
.dirty
|= ANV_CMD_BUFFER_DS_DIRTY
;
329 void anv_CmdBindDescriptorSets(
330 VkCmdBuffer cmdBuffer
,
331 VkPipelineBindPoint pipelineBindPoint
,
332 VkPipelineLayout _layout
,
335 const VkDescriptorSet
* pDescriptorSets
,
336 uint32_t dynamicOffsetCount
,
337 const uint32_t* pDynamicOffsets
)
339 ANV_FROM_HANDLE(anv_cmd_buffer
, cmd_buffer
, cmdBuffer
);
340 ANV_FROM_HANDLE(anv_pipeline_layout
, layout
, _layout
);
341 struct anv_descriptor_set_layout
*set_layout
;
343 assert(firstSet
+ setCount
< MAX_SETS
);
345 uint32_t dynamic_slot
= 0;
346 for (uint32_t i
= 0; i
< setCount
; i
++) {
347 ANV_FROM_HANDLE(anv_descriptor_set
, set
, pDescriptorSets
[i
]);
348 set_layout
= layout
->set
[firstSet
+ i
].layout
;
350 cmd_buffer
->state
.descriptors
[firstSet
+ i
].set
= set
;
352 assert(set_layout
->num_dynamic_buffers
<
353 ARRAY_SIZE(cmd_buffer
->state
.descriptors
[0].dynamic_offsets
));
354 memcpy(cmd_buffer
->state
.descriptors
[firstSet
+ i
].dynamic_offsets
,
355 pDynamicOffsets
+ dynamic_slot
,
356 set_layout
->num_dynamic_buffers
* sizeof(*pDynamicOffsets
));
358 cmd_buffer
->state
.descriptors_dirty
|= set_layout
->shader_stages
;
360 dynamic_slot
+= set_layout
->num_dynamic_buffers
;
364 void anv_CmdBindIndexBuffer(
365 VkCmdBuffer cmdBuffer
,
368 VkIndexType indexType
)
370 ANV_FROM_HANDLE(anv_cmd_buffer
, cmd_buffer
, cmdBuffer
);
371 ANV_FROM_HANDLE(anv_buffer
, buffer
, _buffer
);
373 static const uint32_t vk_to_gen_index_type
[] = {
374 [VK_INDEX_TYPE_UINT16
] = INDEX_WORD
,
375 [VK_INDEX_TYPE_UINT32
] = INDEX_DWORD
,
378 struct GEN8_3DSTATE_VF vf
= {
379 GEN8_3DSTATE_VF_header
,
380 .CutIndex
= (indexType
== VK_INDEX_TYPE_UINT16
) ? UINT16_MAX
: UINT32_MAX
,
382 GEN8_3DSTATE_VF_pack(NULL
, cmd_buffer
->state
.state_vf
, &vf
);
384 cmd_buffer
->state
.dirty
|= ANV_CMD_BUFFER_INDEX_BUFFER_DIRTY
;
386 anv_batch_emit(&cmd_buffer
->batch
, GEN8_3DSTATE_INDEX_BUFFER
,
387 .IndexFormat
= vk_to_gen_index_type
[indexType
],
388 .MemoryObjectControlState
= GEN8_MOCS
,
389 .BufferStartingAddress
= { buffer
->bo
, buffer
->offset
+ offset
},
390 .BufferSize
= buffer
->size
- offset
);
393 void anv_CmdBindVertexBuffers(
394 VkCmdBuffer cmdBuffer
,
395 uint32_t startBinding
,
396 uint32_t bindingCount
,
397 const VkBuffer
* pBuffers
,
398 const VkDeviceSize
* pOffsets
)
400 ANV_FROM_HANDLE(anv_cmd_buffer
, cmd_buffer
, cmdBuffer
);
401 struct anv_vertex_binding
*vb
= cmd_buffer
->state
.vertex_bindings
;
403 /* We have to defer setting up vertex buffer since we need the buffer
404 * stride from the pipeline. */
406 assert(startBinding
+ bindingCount
< MAX_VBS
);
407 for (uint32_t i
= 0; i
< bindingCount
; i
++) {
408 vb
[startBinding
+ i
].buffer
= anv_buffer_from_handle(pBuffers
[i
]);
409 vb
[startBinding
+ i
].offset
= pOffsets
[i
];
410 cmd_buffer
->state
.vb_dirty
|= 1 << (startBinding
+ i
);
415 cmd_buffer_emit_binding_table(struct anv_cmd_buffer
*cmd_buffer
,
416 unsigned stage
, struct anv_state
*bt_state
)
418 struct anv_framebuffer
*fb
= cmd_buffer
->state
.framebuffer
;
419 struct anv_subpass
*subpass
= cmd_buffer
->state
.subpass
;
420 struct anv_pipeline_layout
*layout
;
421 uint32_t attachments
, bias
, size
;
423 if (stage
== VK_SHADER_STAGE_COMPUTE
)
424 layout
= cmd_buffer
->state
.compute_pipeline
->layout
;
426 layout
= cmd_buffer
->state
.pipeline
->layout
;
428 if (stage
== VK_SHADER_STAGE_FRAGMENT
) {
430 attachments
= subpass
->color_count
;
436 /* This is a little awkward: layout can be NULL but we still have to
437 * allocate and set a binding table for the PS stage for render
439 uint32_t surface_count
= layout
? layout
->stage
[stage
].surface_count
: 0;
441 if (attachments
+ surface_count
== 0)
444 size
= (bias
+ surface_count
) * sizeof(uint32_t);
445 *bt_state
= anv_cmd_buffer_alloc_surface_state(cmd_buffer
, size
, 32);
446 uint32_t *bt_map
= bt_state
->map
;
448 if (bt_state
->map
== NULL
)
449 return VK_ERROR_OUT_OF_DEVICE_MEMORY
;
451 /* This is highly annoying. The Vulkan spec puts the depth-stencil
452 * attachments in with the color attachments. Unfortunately, thanks to
453 * other aspects of the API, we cana't really saparate them before this
454 * point. Therefore, we have to walk all of the attachments but only
455 * put the color attachments into the binding table.
457 for (uint32_t a
= 0; a
< attachments
; a
++) {
458 const struct anv_attachment_view
*attachment
=
459 fb
->attachments
[subpass
->color_attachments
[a
]];
461 assert(attachment
->attachment_type
== ANV_ATTACHMENT_VIEW_TYPE_COLOR
);
462 const struct anv_color_attachment_view
*view
=
463 (const struct anv_color_attachment_view
*)attachment
;
465 struct anv_state state
=
466 anv_cmd_buffer_alloc_surface_state(cmd_buffer
, 64, 64);
468 if (state
.map
== NULL
)
469 return VK_ERROR_OUT_OF_DEVICE_MEMORY
;
471 memcpy(state
.map
, view
->view
.surface_state
.map
, 64);
473 /* The address goes in dwords 8 and 9 of the SURFACE_STATE */
474 *(uint64_t *)(state
.map
+ 8 * 4) =
475 anv_reloc_list_add(anv_cmd_buffer_current_surface_relocs(cmd_buffer
),
477 state
.offset
+ 8 * 4,
478 view
->view
.bo
, view
->view
.offset
);
480 bt_map
[a
] = state
.offset
;
486 for (uint32_t set
= 0; set
< layout
->num_sets
; set
++) {
487 struct anv_descriptor_set_binding
*d
= &cmd_buffer
->state
.descriptors
[set
];
488 struct anv_descriptor_set_layout
*set_layout
= layout
->set
[set
].layout
;
489 struct anv_descriptor_slot
*surface_slots
=
490 set_layout
->stage
[stage
].surface_start
;
492 uint32_t start
= bias
+ layout
->set
[set
].surface_start
[stage
];
494 for (uint32_t b
= 0; b
< set_layout
->stage
[stage
].surface_count
; b
++) {
495 struct anv_surface_view
*view
=
496 d
->set
->descriptors
[surface_slots
[b
].index
].view
;
501 struct anv_state state
=
502 anv_cmd_buffer_alloc_surface_state(cmd_buffer
, 64, 64);
504 if (state
.map
== NULL
)
505 return VK_ERROR_OUT_OF_DEVICE_MEMORY
;
508 if (surface_slots
[b
].dynamic_slot
>= 0) {
509 uint32_t dynamic_offset
=
510 d
->dynamic_offsets
[surface_slots
[b
].dynamic_slot
];
512 offset
= view
->offset
+ dynamic_offset
;
513 anv_fill_buffer_surface_state(state
.map
, view
->format
, offset
,
514 view
->range
- dynamic_offset
);
516 offset
= view
->offset
;
517 memcpy(state
.map
, view
->surface_state
.map
, 64);
520 /* The address goes in dwords 8 and 9 of the SURFACE_STATE */
521 *(uint64_t *)(state
.map
+ 8 * 4) =
522 anv_reloc_list_add(anv_cmd_buffer_current_surface_relocs(cmd_buffer
),
524 state
.offset
+ 8 * 4,
527 bt_map
[start
+ b
] = state
.offset
;
535 cmd_buffer_emit_samplers(struct anv_cmd_buffer
*cmd_buffer
,
536 unsigned stage
, struct anv_state
*state
)
538 struct anv_pipeline_layout
*layout
;
539 uint32_t sampler_count
;
541 if (stage
== VK_SHADER_STAGE_COMPUTE
)
542 layout
= cmd_buffer
->state
.compute_pipeline
->layout
;
544 layout
= cmd_buffer
->state
.pipeline
->layout
;
546 sampler_count
= layout
? layout
->stage
[stage
].sampler_count
: 0;
547 if (sampler_count
== 0)
550 uint32_t size
= sampler_count
* 16;
551 *state
= anv_cmd_buffer_alloc_dynamic_state(cmd_buffer
, size
, 32);
553 if (state
->map
== NULL
)
554 return VK_ERROR_OUT_OF_DEVICE_MEMORY
;
556 for (uint32_t set
= 0; set
< layout
->num_sets
; set
++) {
557 struct anv_descriptor_set_binding
*d
= &cmd_buffer
->state
.descriptors
[set
];
558 struct anv_descriptor_set_layout
*set_layout
= layout
->set
[set
].layout
;
559 struct anv_descriptor_slot
*sampler_slots
=
560 set_layout
->stage
[stage
].sampler_start
;
562 uint32_t start
= layout
->set
[set
].sampler_start
[stage
];
564 for (uint32_t b
= 0; b
< set_layout
->stage
[stage
].sampler_count
; b
++) {
565 struct anv_sampler
*sampler
=
566 d
->set
->descriptors
[sampler_slots
[b
].index
].sampler
;
571 memcpy(state
->map
+ (start
+ b
) * 16,
572 sampler
->state
, sizeof(sampler
->state
));
580 flush_descriptor_set(struct anv_cmd_buffer
*cmd_buffer
, uint32_t stage
)
582 struct anv_state surfaces
= { 0, }, samplers
= { 0, };
585 result
= cmd_buffer_emit_samplers(cmd_buffer
, stage
, &samplers
);
586 if (result
!= VK_SUCCESS
)
588 result
= cmd_buffer_emit_binding_table(cmd_buffer
, stage
, &surfaces
);
589 if (result
!= VK_SUCCESS
)
592 static const uint32_t sampler_state_opcodes
[] = {
593 [VK_SHADER_STAGE_VERTEX
] = 43,
594 [VK_SHADER_STAGE_TESS_CONTROL
] = 44, /* HS */
595 [VK_SHADER_STAGE_TESS_EVALUATION
] = 45, /* DS */
596 [VK_SHADER_STAGE_GEOMETRY
] = 46,
597 [VK_SHADER_STAGE_FRAGMENT
] = 47,
598 [VK_SHADER_STAGE_COMPUTE
] = 0,
601 static const uint32_t binding_table_opcodes
[] = {
602 [VK_SHADER_STAGE_VERTEX
] = 38,
603 [VK_SHADER_STAGE_TESS_CONTROL
] = 39,
604 [VK_SHADER_STAGE_TESS_EVALUATION
] = 40,
605 [VK_SHADER_STAGE_GEOMETRY
] = 41,
606 [VK_SHADER_STAGE_FRAGMENT
] = 42,
607 [VK_SHADER_STAGE_COMPUTE
] = 0,
610 if (samplers
.alloc_size
> 0) {
611 anv_batch_emit(&cmd_buffer
->batch
,
612 GEN8_3DSTATE_SAMPLER_STATE_POINTERS_VS
,
613 ._3DCommandSubOpcode
= sampler_state_opcodes
[stage
],
614 .PointertoVSSamplerState
= samplers
.offset
);
617 if (surfaces
.alloc_size
> 0) {
618 anv_batch_emit(&cmd_buffer
->batch
,
619 GEN8_3DSTATE_BINDING_TABLE_POINTERS_VS
,
620 ._3DCommandSubOpcode
= binding_table_opcodes
[stage
],
621 .PointertoVSBindingTable
= surfaces
.offset
);
628 flush_descriptor_sets(struct anv_cmd_buffer
*cmd_buffer
)
630 uint32_t s
, dirty
= cmd_buffer
->state
.descriptors_dirty
&
631 cmd_buffer
->state
.pipeline
->active_stages
;
633 VkResult result
= VK_SUCCESS
;
634 for_each_bit(s
, dirty
) {
635 result
= flush_descriptor_set(cmd_buffer
, s
);
636 if (result
!= VK_SUCCESS
)
640 if (result
!= VK_SUCCESS
) {
641 assert(result
== VK_ERROR_OUT_OF_DEVICE_MEMORY
);
643 result
= anv_cmd_buffer_new_surface_state_bo(cmd_buffer
);
644 assert(result
== VK_SUCCESS
);
646 /* Re-emit state base addresses so we get the new surface state base
647 * address before we start emitting binding tables etc.
649 anv_cmd_buffer_emit_state_base_address(cmd_buffer
);
651 /* Re-emit all active binding tables */
652 for_each_bit(s
, cmd_buffer
->state
.pipeline
->active_stages
) {
653 result
= flush_descriptor_set(cmd_buffer
, s
);
655 /* It had better succeed this time */
656 assert(result
== VK_SUCCESS
);
660 cmd_buffer
->state
.descriptors_dirty
&= ~cmd_buffer
->state
.pipeline
->active_stages
;
663 static struct anv_state
664 anv_cmd_buffer_emit_dynamic(struct anv_cmd_buffer
*cmd_buffer
,
665 uint32_t *a
, uint32_t dwords
, uint32_t alignment
)
667 struct anv_state state
;
669 state
= anv_cmd_buffer_alloc_dynamic_state(cmd_buffer
,
670 dwords
* 4, alignment
);
671 memcpy(state
.map
, a
, dwords
* 4);
673 VG(VALGRIND_CHECK_MEM_IS_DEFINED(state
.map
, dwords
* 4));
678 static struct anv_state
679 anv_cmd_buffer_merge_dynamic(struct anv_cmd_buffer
*cmd_buffer
,
680 uint32_t *a
, uint32_t *b
,
681 uint32_t dwords
, uint32_t alignment
)
683 struct anv_state state
;
686 state
= anv_cmd_buffer_alloc_dynamic_state(cmd_buffer
,
687 dwords
* 4, alignment
);
689 for (uint32_t i
= 0; i
< dwords
; i
++)
692 VG(VALGRIND_CHECK_MEM_IS_DEFINED(p
, dwords
* 4));
698 flush_compute_descriptor_set(struct anv_cmd_buffer
*cmd_buffer
)
700 struct anv_device
*device
= cmd_buffer
->device
;
701 struct anv_pipeline
*pipeline
= cmd_buffer
->state
.compute_pipeline
;
702 struct anv_state surfaces
= { 0, }, samplers
= { 0, };
705 result
= cmd_buffer_emit_samplers(cmd_buffer
,
706 VK_SHADER_STAGE_COMPUTE
, &samplers
);
707 if (result
!= VK_SUCCESS
)
709 result
= cmd_buffer_emit_binding_table(cmd_buffer
,
710 VK_SHADER_STAGE_COMPUTE
, &surfaces
);
711 if (result
!= VK_SUCCESS
)
714 struct GEN8_INTERFACE_DESCRIPTOR_DATA desc
= {
715 .KernelStartPointer
= pipeline
->cs_simd
,
716 .KernelStartPointerHigh
= 0,
717 .BindingTablePointer
= surfaces
.offset
,
718 .BindingTableEntryCount
= 0,
719 .SamplerStatePointer
= samplers
.offset
,
721 .NumberofThreadsinGPGPUThreadGroup
= 0 /* FIXME: Really? */
724 uint32_t size
= GEN8_INTERFACE_DESCRIPTOR_DATA_length
* sizeof(uint32_t);
725 struct anv_state state
=
726 anv_state_pool_alloc(&device
->dynamic_state_pool
, size
, 64);
728 GEN8_INTERFACE_DESCRIPTOR_DATA_pack(NULL
, state
.map
, &desc
);
730 anv_batch_emit(&cmd_buffer
->batch
, GEN8_MEDIA_INTERFACE_DESCRIPTOR_LOAD
,
731 .InterfaceDescriptorTotalLength
= size
,
732 .InterfaceDescriptorDataStartAddress
= state
.offset
);
738 anv_cmd_buffer_flush_compute_state(struct anv_cmd_buffer
*cmd_buffer
)
740 struct anv_pipeline
*pipeline
= cmd_buffer
->state
.compute_pipeline
;
743 assert(pipeline
->active_stages
== VK_SHADER_STAGE_COMPUTE_BIT
);
745 if (cmd_buffer
->state
.current_pipeline
!= GPGPU
) {
746 anv_batch_emit(&cmd_buffer
->batch
, GEN8_PIPELINE_SELECT
,
747 .PipelineSelection
= GPGPU
);
748 cmd_buffer
->state
.current_pipeline
= GPGPU
;
751 if (cmd_buffer
->state
.compute_dirty
& ANV_CMD_BUFFER_PIPELINE_DIRTY
)
752 anv_batch_emit_batch(&cmd_buffer
->batch
, &pipeline
->batch
);
754 if ((cmd_buffer
->state
.descriptors_dirty
& VK_SHADER_STAGE_COMPUTE_BIT
) ||
755 (cmd_buffer
->state
.compute_dirty
& ANV_CMD_BUFFER_PIPELINE_DIRTY
)) {
756 result
= flush_compute_descriptor_set(cmd_buffer
);
757 assert(result
== VK_SUCCESS
);
758 cmd_buffer
->state
.descriptors_dirty
&= ~VK_SHADER_STAGE_COMPUTE
;
761 cmd_buffer
->state
.compute_dirty
= 0;
765 anv_cmd_buffer_flush_state(struct anv_cmd_buffer
*cmd_buffer
)
767 struct anv_pipeline
*pipeline
= cmd_buffer
->state
.pipeline
;
770 uint32_t vb_emit
= cmd_buffer
->state
.vb_dirty
& pipeline
->vb_used
;
772 assert((pipeline
->active_stages
& VK_SHADER_STAGE_COMPUTE_BIT
) == 0);
774 if (cmd_buffer
->state
.current_pipeline
!= _3D
) {
775 anv_batch_emit(&cmd_buffer
->batch
, GEN8_PIPELINE_SELECT
,
776 .PipelineSelection
= _3D
);
777 cmd_buffer
->state
.current_pipeline
= _3D
;
781 const uint32_t num_buffers
= __builtin_popcount(vb_emit
);
782 const uint32_t num_dwords
= 1 + num_buffers
* 4;
784 p
= anv_batch_emitn(&cmd_buffer
->batch
, num_dwords
,
785 GEN8_3DSTATE_VERTEX_BUFFERS
);
787 for_each_bit(vb
, vb_emit
) {
788 struct anv_buffer
*buffer
= cmd_buffer
->state
.vertex_bindings
[vb
].buffer
;
789 uint32_t offset
= cmd_buffer
->state
.vertex_bindings
[vb
].offset
;
791 struct GEN8_VERTEX_BUFFER_STATE state
= {
792 .VertexBufferIndex
= vb
,
793 .MemoryObjectControlState
= GEN8_MOCS
,
794 .AddressModifyEnable
= true,
795 .BufferPitch
= pipeline
->binding_stride
[vb
],
796 .BufferStartingAddress
= { buffer
->bo
, buffer
->offset
+ offset
},
797 .BufferSize
= buffer
->size
- offset
800 GEN8_VERTEX_BUFFER_STATE_pack(&cmd_buffer
->batch
, &p
[1 + i
* 4], &state
);
805 if (cmd_buffer
->state
.dirty
& ANV_CMD_BUFFER_PIPELINE_DIRTY
) {
806 /* If somebody compiled a pipeline after starting a command buffer the
807 * scratch bo may have grown since we started this cmd buffer (and
808 * emitted STATE_BASE_ADDRESS). If we're binding that pipeline now,
809 * reemit STATE_BASE_ADDRESS so that we use the bigger scratch bo. */
810 if (cmd_buffer
->state
.scratch_size
< pipeline
->total_scratch
)
811 anv_cmd_buffer_emit_state_base_address(cmd_buffer
);
813 anv_batch_emit_batch(&cmd_buffer
->batch
, &pipeline
->batch
);
816 if (cmd_buffer
->state
.descriptors_dirty
)
817 flush_descriptor_sets(cmd_buffer
);
819 if (cmd_buffer
->state
.dirty
& ANV_CMD_BUFFER_VP_DIRTY
) {
820 struct anv_dynamic_vp_state
*vp_state
= cmd_buffer
->state
.vp_state
;
821 anv_batch_emit(&cmd_buffer
->batch
, GEN8_3DSTATE_SCISSOR_STATE_POINTERS
,
822 .ScissorRectPointer
= vp_state
->scissor
.offset
);
823 anv_batch_emit(&cmd_buffer
->batch
, GEN8_3DSTATE_VIEWPORT_STATE_POINTERS_CC
,
824 .CCViewportPointer
= vp_state
->cc_vp
.offset
);
825 anv_batch_emit(&cmd_buffer
->batch
, GEN8_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP
,
826 .SFClipViewportPointer
= vp_state
->sf_clip_vp
.offset
);
829 if (cmd_buffer
->state
.dirty
& (ANV_CMD_BUFFER_PIPELINE_DIRTY
|
830 ANV_CMD_BUFFER_RS_DIRTY
)) {
831 anv_batch_emit_merge(&cmd_buffer
->batch
,
832 cmd_buffer
->state
.rs_state
->state_sf
,
834 anv_batch_emit_merge(&cmd_buffer
->batch
,
835 cmd_buffer
->state
.rs_state
->state_raster
,
836 pipeline
->state_raster
);
839 if (cmd_buffer
->state
.ds_state
&&
840 (cmd_buffer
->state
.dirty
& (ANV_CMD_BUFFER_PIPELINE_DIRTY
|
841 ANV_CMD_BUFFER_DS_DIRTY
))) {
842 anv_batch_emit_merge(&cmd_buffer
->batch
,
843 cmd_buffer
->state
.ds_state
->state_wm_depth_stencil
,
844 pipeline
->state_wm_depth_stencil
);
847 if (cmd_buffer
->state
.dirty
& (ANV_CMD_BUFFER_CB_DIRTY
|
848 ANV_CMD_BUFFER_DS_DIRTY
)) {
849 struct anv_state state
;
850 if (cmd_buffer
->state
.ds_state
== NULL
)
851 state
= anv_cmd_buffer_emit_dynamic(cmd_buffer
,
852 cmd_buffer
->state
.cb_state
->state_color_calc
,
853 GEN8_COLOR_CALC_STATE_length
, 64);
854 else if (cmd_buffer
->state
.cb_state
== NULL
)
855 state
= anv_cmd_buffer_emit_dynamic(cmd_buffer
,
856 cmd_buffer
->state
.ds_state
->state_color_calc
,
857 GEN8_COLOR_CALC_STATE_length
, 64);
859 state
= anv_cmd_buffer_merge_dynamic(cmd_buffer
,
860 cmd_buffer
->state
.ds_state
->state_color_calc
,
861 cmd_buffer
->state
.cb_state
->state_color_calc
,
862 GEN8_COLOR_CALC_STATE_length
, 64);
864 anv_batch_emit(&cmd_buffer
->batch
,
865 GEN8_3DSTATE_CC_STATE_POINTERS
,
866 .ColorCalcStatePointer
= state
.offset
,
867 .ColorCalcStatePointerValid
= true);
870 if (cmd_buffer
->state
.dirty
& (ANV_CMD_BUFFER_PIPELINE_DIRTY
|
871 ANV_CMD_BUFFER_INDEX_BUFFER_DIRTY
)) {
872 anv_batch_emit_merge(&cmd_buffer
->batch
,
873 cmd_buffer
->state
.state_vf
, pipeline
->state_vf
);
876 cmd_buffer
->state
.vb_dirty
&= ~vb_emit
;
877 cmd_buffer
->state
.dirty
= 0;
881 VkCmdBuffer cmdBuffer
,
882 uint32_t firstVertex
,
883 uint32_t vertexCount
,
884 uint32_t firstInstance
,
885 uint32_t instanceCount
)
887 ANV_FROM_HANDLE(anv_cmd_buffer
, cmd_buffer
, cmdBuffer
);
889 anv_cmd_buffer_flush_state(cmd_buffer
);
891 anv_batch_emit(&cmd_buffer
->batch
, GEN8_3DPRIMITIVE
,
892 .VertexAccessType
= SEQUENTIAL
,
893 .VertexCountPerInstance
= vertexCount
,
894 .StartVertexLocation
= firstVertex
,
895 .InstanceCount
= instanceCount
,
896 .StartInstanceLocation
= firstInstance
,
897 .BaseVertexLocation
= 0);
900 void anv_CmdDrawIndexed(
901 VkCmdBuffer cmdBuffer
,
904 int32_t vertexOffset
,
905 uint32_t firstInstance
,
906 uint32_t instanceCount
)
908 ANV_FROM_HANDLE(anv_cmd_buffer
, cmd_buffer
, cmdBuffer
);
910 anv_cmd_buffer_flush_state(cmd_buffer
);
912 anv_batch_emit(&cmd_buffer
->batch
, GEN8_3DPRIMITIVE
,
913 .VertexAccessType
= RANDOM
,
914 .VertexCountPerInstance
= indexCount
,
915 .StartVertexLocation
= firstIndex
,
916 .InstanceCount
= instanceCount
,
917 .StartInstanceLocation
= firstInstance
,
918 .BaseVertexLocation
= vertexOffset
);
922 anv_batch_lrm(struct anv_batch
*batch
,
923 uint32_t reg
, struct anv_bo
*bo
, uint32_t offset
)
925 anv_batch_emit(batch
, GEN8_MI_LOAD_REGISTER_MEM
,
926 .RegisterAddress
= reg
,
927 .MemoryAddress
= { bo
, offset
});
931 anv_batch_lri(struct anv_batch
*batch
, uint32_t reg
, uint32_t imm
)
933 anv_batch_emit(batch
, GEN8_MI_LOAD_REGISTER_IMM
,
934 .RegisterOffset
= reg
,
938 /* Auto-Draw / Indirect Registers */
939 #define GEN7_3DPRIM_END_OFFSET 0x2420
940 #define GEN7_3DPRIM_START_VERTEX 0x2430
941 #define GEN7_3DPRIM_VERTEX_COUNT 0x2434
942 #define GEN7_3DPRIM_INSTANCE_COUNT 0x2438
943 #define GEN7_3DPRIM_START_INSTANCE 0x243C
944 #define GEN7_3DPRIM_BASE_VERTEX 0x2440
946 void anv_CmdDrawIndirect(
947 VkCmdBuffer cmdBuffer
,
953 ANV_FROM_HANDLE(anv_cmd_buffer
, cmd_buffer
, cmdBuffer
);
954 ANV_FROM_HANDLE(anv_buffer
, buffer
, _buffer
);
955 struct anv_bo
*bo
= buffer
->bo
;
956 uint32_t bo_offset
= buffer
->offset
+ offset
;
958 anv_cmd_buffer_flush_state(cmd_buffer
);
960 anv_batch_lrm(&cmd_buffer
->batch
, GEN7_3DPRIM_VERTEX_COUNT
, bo
, bo_offset
);
961 anv_batch_lrm(&cmd_buffer
->batch
, GEN7_3DPRIM_INSTANCE_COUNT
, bo
, bo_offset
+ 4);
962 anv_batch_lrm(&cmd_buffer
->batch
, GEN7_3DPRIM_START_VERTEX
, bo
, bo_offset
+ 8);
963 anv_batch_lrm(&cmd_buffer
->batch
, GEN7_3DPRIM_START_INSTANCE
, bo
, bo_offset
+ 12);
964 anv_batch_lri(&cmd_buffer
->batch
, GEN7_3DPRIM_BASE_VERTEX
, 0);
966 anv_batch_emit(&cmd_buffer
->batch
, GEN8_3DPRIMITIVE
,
967 .IndirectParameterEnable
= true,
968 .VertexAccessType
= SEQUENTIAL
);
971 void anv_CmdDrawIndexedIndirect(
972 VkCmdBuffer cmdBuffer
,
978 ANV_FROM_HANDLE(anv_cmd_buffer
, cmd_buffer
, cmdBuffer
);
979 ANV_FROM_HANDLE(anv_buffer
, buffer
, _buffer
);
980 struct anv_bo
*bo
= buffer
->bo
;
981 uint32_t bo_offset
= buffer
->offset
+ offset
;
983 anv_cmd_buffer_flush_state(cmd_buffer
);
985 anv_batch_lrm(&cmd_buffer
->batch
, GEN7_3DPRIM_VERTEX_COUNT
, bo
, bo_offset
);
986 anv_batch_lrm(&cmd_buffer
->batch
, GEN7_3DPRIM_INSTANCE_COUNT
, bo
, bo_offset
+ 4);
987 anv_batch_lrm(&cmd_buffer
->batch
, GEN7_3DPRIM_START_VERTEX
, bo
, bo_offset
+ 8);
988 anv_batch_lrm(&cmd_buffer
->batch
, GEN7_3DPRIM_BASE_VERTEX
, bo
, bo_offset
+ 12);
989 anv_batch_lrm(&cmd_buffer
->batch
, GEN7_3DPRIM_START_INSTANCE
, bo
, bo_offset
+ 16);
991 anv_batch_emit(&cmd_buffer
->batch
, GEN8_3DPRIMITIVE
,
992 .IndirectParameterEnable
= true,
993 .VertexAccessType
= RANDOM
);
996 void anv_CmdDispatch(
997 VkCmdBuffer cmdBuffer
,
1002 ANV_FROM_HANDLE(anv_cmd_buffer
, cmd_buffer
, cmdBuffer
);
1003 struct anv_pipeline
*pipeline
= cmd_buffer
->state
.compute_pipeline
;
1004 struct brw_cs_prog_data
*prog_data
= &pipeline
->cs_prog_data
;
1006 anv_cmd_buffer_flush_compute_state(cmd_buffer
);
1008 anv_batch_emit(&cmd_buffer
->batch
, GEN8_GPGPU_WALKER
,
1009 .SIMDSize
= prog_data
->simd_size
/ 16,
1010 .ThreadDepthCounterMaximum
= 0,
1011 .ThreadHeightCounterMaximum
= 0,
1012 .ThreadWidthCounterMaximum
= pipeline
->cs_thread_width_max
,
1013 .ThreadGroupIDXDimension
= x
,
1014 .ThreadGroupIDYDimension
= y
,
1015 .ThreadGroupIDZDimension
= z
,
1016 .RightExecutionMask
= pipeline
->cs_right_mask
,
1017 .BottomExecutionMask
= 0xffffffff);
1019 anv_batch_emit(&cmd_buffer
->batch
, GEN8_MEDIA_STATE_FLUSH
);
1022 #define GPGPU_DISPATCHDIMX 0x2500
1023 #define GPGPU_DISPATCHDIMY 0x2504
1024 #define GPGPU_DISPATCHDIMZ 0x2508
1026 void anv_CmdDispatchIndirect(
1027 VkCmdBuffer cmdBuffer
,
1029 VkDeviceSize offset
)
1031 ANV_FROM_HANDLE(anv_cmd_buffer
, cmd_buffer
, cmdBuffer
);
1032 ANV_FROM_HANDLE(anv_buffer
, buffer
, _buffer
);
1033 struct anv_pipeline
*pipeline
= cmd_buffer
->state
.compute_pipeline
;
1034 struct brw_cs_prog_data
*prog_data
= &pipeline
->cs_prog_data
;
1035 struct anv_bo
*bo
= buffer
->bo
;
1036 uint32_t bo_offset
= buffer
->offset
+ offset
;
1038 anv_cmd_buffer_flush_compute_state(cmd_buffer
);
1040 anv_batch_lrm(&cmd_buffer
->batch
, GPGPU_DISPATCHDIMX
, bo
, bo_offset
);
1041 anv_batch_lrm(&cmd_buffer
->batch
, GPGPU_DISPATCHDIMY
, bo
, bo_offset
+ 4);
1042 anv_batch_lrm(&cmd_buffer
->batch
, GPGPU_DISPATCHDIMZ
, bo
, bo_offset
+ 8);
1044 anv_batch_emit(&cmd_buffer
->batch
, GEN8_GPGPU_WALKER
,
1045 .IndirectParameterEnable
= true,
1046 .SIMDSize
= prog_data
->simd_size
/ 16,
1047 .ThreadDepthCounterMaximum
= 0,
1048 .ThreadHeightCounterMaximum
= 0,
1049 .ThreadWidthCounterMaximum
= pipeline
->cs_thread_width_max
,
1050 .RightExecutionMask
= pipeline
->cs_right_mask
,
1051 .BottomExecutionMask
= 0xffffffff);
1053 anv_batch_emit(&cmd_buffer
->batch
, GEN8_MEDIA_STATE_FLUSH
);
1056 void anv_CmdSetEvent(
1057 VkCmdBuffer cmdBuffer
,
1059 VkPipelineStageFlags stageMask
)
1064 void anv_CmdResetEvent(
1065 VkCmdBuffer cmdBuffer
,
1067 VkPipelineStageFlags stageMask
)
1072 void anv_CmdWaitEvents(
1073 VkCmdBuffer cmdBuffer
,
1074 uint32_t eventCount
,
1075 const VkEvent
* pEvents
,
1076 VkPipelineStageFlags srcStageMask
,
1077 VkPipelineStageFlags destStageMask
,
1078 uint32_t memBarrierCount
,
1079 const void* const* ppMemBarriers
)
1084 void anv_CmdPipelineBarrier(
1085 VkCmdBuffer cmdBuffer
,
1086 VkPipelineStageFlags srcStageMask
,
1087 VkPipelineStageFlags destStageMask
,
1089 uint32_t memBarrierCount
,
1090 const void* const* ppMemBarriers
)
1092 ANV_FROM_HANDLE(anv_cmd_buffer
, cmd_buffer
, cmdBuffer
);
1095 struct GEN8_PIPE_CONTROL cmd
= {
1096 GEN8_PIPE_CONTROL_header
,
1097 .PostSyncOperation
= NoWrite
,
1100 /* XXX: I think waitEvent is a no-op on our HW. We should verify that. */
1102 if (anv_clear_mask(&srcStageMask
, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT
)) {
1103 /* This is just what PIPE_CONTROL does */
1106 if (anv_clear_mask(&srcStageMask
,
1107 VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT
|
1108 VK_PIPELINE_STAGE_VERTEX_INPUT_BIT
|
1109 VK_PIPELINE_STAGE_VERTEX_SHADER_BIT
|
1110 VK_PIPELINE_STAGE_TESS_CONTROL_SHADER_BIT
|
1111 VK_PIPELINE_STAGE_TESS_EVALUATION_SHADER_BIT
|
1112 VK_PIPELINE_STAGE_GEOMETRY_SHADER_BIT
|
1113 VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT
|
1114 VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT
|
1115 VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT
|
1116 VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT
)) {
1117 cmd
.StallAtPixelScoreboard
= true;
1121 if (anv_clear_mask(&srcStageMask
,
1122 VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT
|
1123 VK_PIPELINE_STAGE_TRANSFER_BIT
|
1124 VK_PIPELINE_STAGE_TRANSITION_BIT
)) {
1125 cmd
.CommandStreamerStallEnable
= true;
1128 if (anv_clear_mask(&srcStageMask
, VK_PIPELINE_STAGE_HOST_BIT
)) {
1129 anv_finishme("VK_PIPE_EVENT_CPU_SIGNAL_BIT");
1132 /* On our hardware, all stages will wait for execution as needed. */
1133 (void)destStageMask
;
1135 /* We checked all known VkPipeEventFlags. */
1136 anv_assert(srcStageMask
== 0);
1138 /* XXX: Right now, we're really dumb and just flush whatever categories
1139 * the app asks for. One of these days we may make this a bit better
1140 * but right now that's all the hardware allows for in most areas.
1142 VkMemoryOutputFlags out_flags
= 0;
1143 VkMemoryInputFlags in_flags
= 0;
1145 for (uint32_t i
= 0; i
< memBarrierCount
; i
++) {
1146 const struct anv_common
*common
= ppMemBarriers
[i
];
1147 switch (common
->sType
) {
1148 case VK_STRUCTURE_TYPE_MEMORY_BARRIER
: {
1149 ANV_COMMON_TO_STRUCT(VkMemoryBarrier
, barrier
, common
);
1150 out_flags
|= barrier
->outputMask
;
1151 in_flags
|= barrier
->inputMask
;
1154 case VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER
: {
1155 ANV_COMMON_TO_STRUCT(VkBufferMemoryBarrier
, barrier
, common
);
1156 out_flags
|= barrier
->outputMask
;
1157 in_flags
|= barrier
->inputMask
;
1160 case VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER
: {
1161 ANV_COMMON_TO_STRUCT(VkImageMemoryBarrier
, barrier
, common
);
1162 out_flags
|= barrier
->outputMask
;
1163 in_flags
|= barrier
->inputMask
;
1167 unreachable("Invalid memory barrier type");
1171 for_each_bit(b
, out_flags
) {
1172 switch ((VkMemoryOutputFlags
)(1 << b
)) {
1173 case VK_MEMORY_OUTPUT_HOST_WRITE_BIT
:
1174 break; /* FIXME: Little-core systems */
1175 case VK_MEMORY_OUTPUT_SHADER_WRITE_BIT
:
1176 cmd
.DCFlushEnable
= true;
1178 case VK_MEMORY_OUTPUT_COLOR_ATTACHMENT_BIT
:
1179 cmd
.RenderTargetCacheFlushEnable
= true;
1181 case VK_MEMORY_OUTPUT_DEPTH_STENCIL_ATTACHMENT_BIT
:
1182 cmd
.DepthCacheFlushEnable
= true;
1184 case VK_MEMORY_OUTPUT_TRANSFER_BIT
:
1185 cmd
.RenderTargetCacheFlushEnable
= true;
1186 cmd
.DepthCacheFlushEnable
= true;
1189 unreachable("Invalid memory output flag");
1193 for_each_bit(b
, out_flags
) {
1194 switch ((VkMemoryInputFlags
)(1 << b
)) {
1195 case VK_MEMORY_INPUT_HOST_READ_BIT
:
1196 break; /* FIXME: Little-core systems */
1197 case VK_MEMORY_INPUT_INDIRECT_COMMAND_BIT
:
1198 case VK_MEMORY_INPUT_INDEX_FETCH_BIT
:
1199 case VK_MEMORY_INPUT_VERTEX_ATTRIBUTE_FETCH_BIT
:
1200 cmd
.VFCacheInvalidationEnable
= true;
1202 case VK_MEMORY_INPUT_UNIFORM_READ_BIT
:
1203 cmd
.ConstantCacheInvalidationEnable
= true;
1205 case VK_MEMORY_INPUT_SHADER_READ_BIT
:
1206 cmd
.DCFlushEnable
= true;
1207 cmd
.TextureCacheInvalidationEnable
= true;
1209 case VK_MEMORY_INPUT_COLOR_ATTACHMENT_BIT
:
1210 case VK_MEMORY_INPUT_DEPTH_STENCIL_ATTACHMENT_BIT
:
1211 break; /* XXX: Hunh? */
1212 case VK_MEMORY_INPUT_TRANSFER_BIT
:
1213 cmd
.TextureCacheInvalidationEnable
= true;
1218 dw
= anv_batch_emit_dwords(&cmd_buffer
->batch
, GEN8_PIPE_CONTROL_length
);
1219 GEN8_PIPE_CONTROL_pack(&cmd_buffer
->batch
, dw
, &cmd
);
1222 void anv_CmdPushConstants(
1223 VkCmdBuffer cmdBuffer
,
1224 VkPipelineLayout layout
,
1225 VkShaderStageFlags stageFlags
,
1234 anv_cmd_buffer_emit_depth_stencil(struct anv_cmd_buffer
*cmd_buffer
)
1236 struct anv_subpass
*subpass
= cmd_buffer
->state
.subpass
;
1237 struct anv_framebuffer
*fb
= cmd_buffer
->state
.framebuffer
;
1238 const struct anv_depth_stencil_view
*view
;
1240 static const struct anv_depth_stencil_view null_view
=
1241 { .depth_format
= D16_UNORM
, .depth_stride
= 0, .stencil_stride
= 0 };
1243 if (subpass
->depth_stencil_attachment
!= VK_ATTACHMENT_UNUSED
) {
1244 const struct anv_attachment_view
*aview
=
1245 fb
->attachments
[subpass
->depth_stencil_attachment
];
1246 assert(aview
->attachment_type
== ANV_ATTACHMENT_VIEW_TYPE_DEPTH_STENCIL
);
1247 view
= (const struct anv_depth_stencil_view
*)aview
;
1252 /* FIXME: Implement the PMA stall W/A */
1253 /* FIXME: Width and Height are wrong */
1255 anv_batch_emit(&cmd_buffer
->batch
, GEN8_3DSTATE_DEPTH_BUFFER
,
1256 .SurfaceType
= SURFTYPE_2D
,
1257 .DepthWriteEnable
= view
->depth_stride
> 0,
1258 .StencilWriteEnable
= view
->stencil_stride
> 0,
1259 .HierarchicalDepthBufferEnable
= false,
1260 .SurfaceFormat
= view
->depth_format
,
1261 .SurfacePitch
= view
->depth_stride
> 0 ? view
->depth_stride
- 1 : 0,
1262 .SurfaceBaseAddress
= { view
->bo
, view
->depth_offset
},
1263 .Height
= cmd_buffer
->state
.framebuffer
->height
- 1,
1264 .Width
= cmd_buffer
->state
.framebuffer
->width
- 1,
1267 .MinimumArrayElement
= 0,
1268 .DepthBufferObjectControlState
= GEN8_MOCS
,
1269 .RenderTargetViewExtent
= 1 - 1,
1270 .SurfaceQPitch
= view
->depth_qpitch
>> 2);
1272 /* Disable hierarchial depth buffers. */
1273 anv_batch_emit(&cmd_buffer
->batch
, GEN8_3DSTATE_HIER_DEPTH_BUFFER
);
1275 anv_batch_emit(&cmd_buffer
->batch
, GEN8_3DSTATE_STENCIL_BUFFER
,
1276 .StencilBufferEnable
= view
->stencil_stride
> 0,
1277 .StencilBufferObjectControlState
= GEN8_MOCS
,
1278 .SurfacePitch
= view
->stencil_stride
> 0 ? view
->stencil_stride
- 1 : 0,
1279 .SurfaceBaseAddress
= { view
->bo
, view
->stencil_offset
},
1280 .SurfaceQPitch
= view
->stencil_qpitch
>> 2);
1282 /* Clear the clear params. */
1283 anv_batch_emit(&cmd_buffer
->batch
, GEN8_3DSTATE_CLEAR_PARAMS
);
1287 anv_cmd_buffer_begin_subpass(struct anv_cmd_buffer
*cmd_buffer
,
1288 struct anv_subpass
*subpass
)
1290 cmd_buffer
->state
.subpass
= subpass
;
1292 cmd_buffer
->state
.descriptors_dirty
|= VK_SHADER_STAGE_FRAGMENT_BIT
;
1294 anv_cmd_buffer_emit_depth_stencil(cmd_buffer
);
1297 void anv_CmdBeginRenderPass(
1298 VkCmdBuffer cmdBuffer
,
1299 const VkRenderPassBeginInfo
* pRenderPassBegin
,
1300 VkRenderPassContents contents
)
1302 ANV_FROM_HANDLE(anv_cmd_buffer
, cmd_buffer
, cmdBuffer
);
1303 ANV_FROM_HANDLE(anv_render_pass
, pass
, pRenderPassBegin
->renderPass
);
1304 ANV_FROM_HANDLE(anv_framebuffer
, framebuffer
, pRenderPassBegin
->framebuffer
);
1306 cmd_buffer
->state
.framebuffer
= framebuffer
;
1307 cmd_buffer
->state
.pass
= pass
;
1309 const VkRect2D
*render_area
= &pRenderPassBegin
->renderArea
;
1311 anv_batch_emit(&cmd_buffer
->batch
, GEN8_3DSTATE_DRAWING_RECTANGLE
,
1312 .ClippedDrawingRectangleYMin
= render_area
->offset
.y
,
1313 .ClippedDrawingRectangleXMin
= render_area
->offset
.x
,
1314 .ClippedDrawingRectangleYMax
=
1315 render_area
->offset
.y
+ render_area
->extent
.height
- 1,
1316 .ClippedDrawingRectangleXMax
=
1317 render_area
->offset
.x
+ render_area
->extent
.width
- 1,
1318 .DrawingRectangleOriginY
= 0,
1319 .DrawingRectangleOriginX
= 0);
1321 anv_cmd_buffer_clear_attachments(cmd_buffer
, pass
,
1322 pRenderPassBegin
->pAttachmentClearValues
);
1324 anv_cmd_buffer_begin_subpass(cmd_buffer
, pass
->subpasses
);
1327 void anv_CmdNextSubpass(
1328 VkCmdBuffer cmdBuffer
,
1329 VkRenderPassContents contents
)
1331 ANV_FROM_HANDLE(anv_cmd_buffer
, cmd_buffer
, cmdBuffer
);
1333 assert(cmd_buffer
->level
== VK_CMD_BUFFER_LEVEL_PRIMARY
);
1335 anv_cmd_buffer_begin_subpass(cmd_buffer
, cmd_buffer
->state
.subpass
+ 1);
1338 void anv_CmdEndRenderPass(
1339 VkCmdBuffer cmdBuffer
)
1341 ANV_FROM_HANDLE(anv_cmd_buffer
, cmd_buffer
, cmdBuffer
);
1343 /* Emit a flushing pipe control at the end of a pass. This is kind of a
1344 * hack but it ensures that render targets always actually get written.
1345 * Eventually, we should do flushing based on image format transitions
1346 * or something of that nature.
1348 anv_batch_emit(&cmd_buffer
->batch
, GEN8_PIPE_CONTROL
,
1349 .PostSyncOperation
= NoWrite
,
1350 .RenderTargetCacheFlushEnable
= true,
1351 .InstructionCacheInvalidateEnable
= true,
1352 .DepthCacheFlushEnable
= true,
1353 .VFCacheInvalidationEnable
= true,
1354 .TextureCacheInvalidationEnable
= true,
1355 .CommandStreamerStallEnable
= true);
1358 void anv_CmdExecuteCommands(
1359 VkCmdBuffer cmdBuffer
,
1360 uint32_t cmdBuffersCount
,
1361 const VkCmdBuffer
* pCmdBuffers
)
1363 ANV_FROM_HANDLE(anv_cmd_buffer
, primary
, cmdBuffer
);
1365 assert(primary
->level
== VK_CMD_BUFFER_LEVEL_PRIMARY
);
1367 anv_assert(primary
->state
.subpass
== &primary
->state
.pass
->subpasses
[0]);
1369 for (uint32_t i
= 0; i
< cmdBuffersCount
; i
++) {
1370 ANV_FROM_HANDLE(anv_cmd_buffer
, secondary
, pCmdBuffers
[i
]);
1372 assert(secondary
->level
== VK_CMD_BUFFER_LEVEL_SECONDARY
);
1374 anv_cmd_buffer_add_secondary(primary
, secondary
);
1378 VkResult
anv_CreateCommandPool(
1380 const VkCmdPoolCreateInfo
* pCreateInfo
,
1381 VkCmdPool
* pCmdPool
)
1383 ANV_FROM_HANDLE(anv_device
, device
, _device
);
1384 struct anv_cmd_pool
*pool
;
1386 pool
= anv_device_alloc(device
, sizeof(*pool
), 8,
1387 VK_SYSTEM_ALLOC_TYPE_API_OBJECT
);
1389 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY
);
1391 list_inithead(&pool
->cmd_buffers
);
1393 *pCmdPool
= anv_cmd_pool_to_handle(pool
);
1398 VkResult
anv_DestroyCommandPool(
1402 ANV_FROM_HANDLE(anv_device
, device
, _device
);
1403 ANV_FROM_HANDLE(anv_cmd_pool
, pool
, cmdPool
);
1405 anv_ResetCommandPool(_device
, cmdPool
, 0);
1407 anv_device_free(device
, pool
);
1412 VkResult
anv_ResetCommandPool(
1415 VkCmdPoolResetFlags flags
)
1417 ANV_FROM_HANDLE(anv_cmd_pool
, pool
, cmdPool
);
1419 list_for_each_entry_safe(struct anv_cmd_buffer
, cmd_buffer
,
1420 &pool
->cmd_buffers
, pool_link
) {
1421 anv_DestroyCommandBuffer(device
, anv_cmd_buffer_to_handle(cmd_buffer
));