struct radv_image *image,
VkImageLayout src_layout,
VkImageLayout dst_layout,
+ int src_family,
+ int dst_family,
VkImageSubresourceRange range,
VkImageAspectFlags pending_clears);
dest->stencil_reference = src->stencil_reference;
}
+bool radv_cmd_buffer_uses_mec(struct radv_cmd_buffer *cmd_buffer)
+{
+ return cmd_buffer->queue_family_index == RADV_QUEUE_COMPUTE &&
+ cmd_buffer->device->instance->physicalDevice.rad_info.chip_class >= CIK;
+}
+
+enum ring_type radv_queue_family_to_ring(int f) {
+ switch (f) {
+ case RADV_QUEUE_GENERAL:
+ return RING_GFX;
+ case RADV_QUEUE_COMPUTE:
+ return RING_COMPUTE;
+ case RADV_QUEUE_TRANSFER:
+ return RING_DMA;
+ default:
+ unreachable("Unknown queue family");
+ }
+}
+
static VkResult radv_create_cmd_buffer(
struct radv_device * device,
struct radv_cmd_pool * pool,
{
struct radv_cmd_buffer *cmd_buffer;
VkResult result;
-
+ unsigned ring;
cmd_buffer = vk_alloc(&pool->alloc, sizeof(*cmd_buffer), 8,
VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
if (cmd_buffer == NULL)
if (pool) {
list_addtail(&cmd_buffer->pool_link, &pool->cmd_buffers);
+ cmd_buffer->queue_family_index = pool->queue_family_index;
+
} else {
/* Init the pool_link so we can safefly call list_del when we destroy
* the command buffer
*/
list_inithead(&cmd_buffer->pool_link);
+ cmd_buffer->queue_family_index = RADV_QUEUE_GENERAL;
}
- cmd_buffer->cs = device->ws->cs_create(device->ws, RING_GFX);
+ ring = radv_queue_family_to_ring(cmd_buffer->queue_family_index);
+
+ cmd_buffer->cs = device->ws->cs_create(device->ws, ring);
if (!cmd_buffer->cs) {
result = VK_ERROR_OUT_OF_HOST_MEMORY;
goto fail;
uint32_t ia_multi_vgt_param;
uint32_t ls_hs_config = 0;
- unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs,
- 4096);
+ MAYBE_UNUSED unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws,
+ cmd_buffer->cs, 4096);
if ((cmd_buffer->state.vertex_descriptors_dirty || cmd_buffer->state.vb_dirty) &&
cmd_buffer->state.pipeline->num_vertex_attribs) {
radv_handle_image_transition(cmd_buffer,
view->image,
cmd_buffer->state.attachments[idx].current_layout,
- att.layout, range,
+ att.layout, 0, 0, range,
cmd_buffer->state.attachments[idx].pending_clear_aspects);
cmd_buffer->state.attachments[idx].current_layout = att.layout;
/* setup initial configuration into command buffer */
if (cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_PRIMARY) {
- /* Flush read caches at the beginning of CS not flushed by the kernel. */
- cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_INV_ICACHE |
- RADV_CMD_FLAG_PS_PARTIAL_FLUSH |
- RADV_CMD_FLAG_CS_PARTIAL_FLUSH |
- RADV_CMD_FLAG_INV_VMEM_L1 |
- RADV_CMD_FLAG_INV_SMEM_L1 |
- RADV_CMD_FLUSH_AND_INV_FRAMEBUFFER |
- RADV_CMD_FLAG_INV_GLOBAL_L2;
- si_init_config(&cmd_buffer->device->instance->physicalDevice, cmd_buffer);
- radv_set_db_count_control(cmd_buffer);
- si_emit_cache_flush(cmd_buffer);
+ switch (cmd_buffer->queue_family_index) {
+ case RADV_QUEUE_GENERAL:
+ /* Flush read caches at the beginning of CS not flushed by the kernel. */
+ cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_INV_ICACHE |
+ RADV_CMD_FLAG_PS_PARTIAL_FLUSH |
+ RADV_CMD_FLAG_CS_PARTIAL_FLUSH |
+ RADV_CMD_FLAG_INV_VMEM_L1 |
+ RADV_CMD_FLAG_INV_SMEM_L1 |
+ RADV_CMD_FLUSH_AND_INV_FRAMEBUFFER |
+ RADV_CMD_FLAG_INV_GLOBAL_L2;
+ si_init_config(&cmd_buffer->device->instance->physicalDevice, cmd_buffer);
+ radv_set_db_count_control(cmd_buffer);
+ si_emit_cache_flush(cmd_buffer);
+ break;
+ case RADV_QUEUE_COMPUTE:
+ cmd_buffer->state.flush_bits = RADV_CMD_FLAG_INV_ICACHE |
+ RADV_CMD_FLAG_CS_PARTIAL_FLUSH |
+ RADV_CMD_FLAG_INV_VMEM_L1 |
+ RADV_CMD_FLAG_INV_SMEM_L1 |
+ RADV_CMD_FLAG_INV_GLOBAL_L2;
+ si_init_compute(&cmd_buffer->device->instance->physicalDevice, cmd_buffer);
+ si_emit_cache_flush(cmd_buffer);
+ break;
+ case RADV_QUEUE_TRANSFER:
+ default:
+ break;
+ }
}
if (pBeginInfo->flags & VK_COMMAND_BUFFER_USAGE_RENDER_PASS_CONTINUE_BIT) {
RADV_FROM_HANDLE(radv_pipeline_layout, layout, _layout);
unsigned dyn_idx = 0;
- unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs,
- MAX_SETS * 4 * 6);
+ MAYBE_UNUSED unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws,
+ cmd_buffer->cs, MAX_SETS * 4 * 6);
for (unsigned i = 0; i < descriptorSetCount; ++i) {
unsigned idx = i + firstSet;
{
RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
- si_emit_cache_flush(cmd_buffer);
+ if (cmd_buffer->queue_family_index != RADV_QUEUE_TRANSFER)
+ si_emit_cache_flush(cmd_buffer);
if (!cmd_buffer->device->ws->cs_finalize(cmd_buffer->cs) ||
cmd_buffer->record_fail)
return VK_ERROR_OUT_OF_DEVICE_MEMORY;
ws->cs_add_buffer(cmd_buffer->cs, compute_shader->bo, 8);
- unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 16);
+ MAYBE_UNUSED unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws,
+ cmd_buffer->cs, 16);
radeon_set_sh_reg_seq(cmd_buffer->cs, R_00B830_COMPUTE_PGM_LO, 2);
radeon_emit(cmd_buffer->cs, va >> 8);
list_inithead(&pool->cmd_buffers);
+ pool->queue_family_index = pCreateInfo->queueFamilyIndex;
+
*pCmdPool = radv_cmd_pool_to_handle(pool);
return VK_SUCCESS;
RADV_FROM_HANDLE(radv_render_pass, pass, pRenderPassBegin->renderPass);
RADV_FROM_HANDLE(radv_framebuffer, framebuffer, pRenderPassBegin->framebuffer);
- unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs,
- 2048);
+ MAYBE_UNUSED unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws,
+ cmd_buffer->cs, 2048);
cmd_buffer->state.framebuffer = framebuffer;
cmd_buffer->state.pass = pass;
RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
radv_cmd_buffer_flush_state(cmd_buffer);
- unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 9);
+ MAYBE_UNUSED unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 9);
struct ac_userdata_info *loc = radv_lookup_user_sgpr(cmd_buffer->state.pipeline, MESA_SHADER_VERTEX,
AC_UD_VS_BASE_VERTEX_START_INSTANCE);
radv_cmd_buffer_flush_state(cmd_buffer);
radv_emit_primitive_reset_index(cmd_buffer);
- unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 14);
+ MAYBE_UNUSED unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 14);
radeon_emit(cmd_buffer->cs, PKT3(PKT3_INDEX_TYPE, 0, 0));
radeon_emit(cmd_buffer->cs, cmd_buffer->state.index_type);
RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
radv_cmd_buffer_flush_state(cmd_buffer);
- unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 14);
+ MAYBE_UNUSED unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws,
+ cmd_buffer->cs, 14);
radv_emit_indirect_draw(cmd_buffer, buffer, offset,
countBuffer, countBufferOffset, maxDrawCount, stride, false);
index_va = cmd_buffer->device->ws->buffer_get_va(cmd_buffer->state.index_buffer->bo);
index_va += cmd_buffer->state.index_buffer->offset + cmd_buffer->state.index_offset;
- unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 21);
+ MAYBE_UNUSED unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 21);
radeon_emit(cmd_buffer->cs, PKT3(PKT3_INDEX_TYPE, 0, 0));
radeon_emit(cmd_buffer->cs, cmd_buffer->state.index_type);
radv_flush_compute_state(cmd_buffer);
- unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 10);
+ MAYBE_UNUSED unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 10);
struct ac_userdata_info *loc = radv_lookup_user_sgpr(cmd_buffer->state.compute_pipeline,
MESA_SHADER_COMPUTE, AC_UD_CS_GRID_SIZE);
radv_flush_compute_state(cmd_buffer);
- unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 25);
+ MAYBE_UNUSED unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 25);
struct ac_userdata_info *loc = radv_lookup_user_sgpr(cmd_buffer->state.compute_pipeline,
MESA_SHADER_COMPUTE, AC_UD_CS_GRID_SIZE);
if (loc->sgpr_idx != -1) {
}
}
- radeon_emit(cmd_buffer->cs, PKT3(PKT3_SET_BASE, 2, 0) |
- PKT3_SHADER_TYPE_S(1));
- radeon_emit(cmd_buffer->cs, 1);
- radeon_emit(cmd_buffer->cs, va);
- radeon_emit(cmd_buffer->cs, va >> 32);
+ if (radv_cmd_buffer_uses_mec(cmd_buffer)) {
+ radeon_emit(cmd_buffer->cs, PKT3(PKT3_DISPATCH_INDIRECT, 2, 0) |
+ PKT3_SHADER_TYPE_S(1));
+ radeon_emit(cmd_buffer->cs, va);
+ radeon_emit(cmd_buffer->cs, va >> 32);
+ radeon_emit(cmd_buffer->cs, 1);
+ } else {
+ radeon_emit(cmd_buffer->cs, PKT3(PKT3_SET_BASE, 2, 0) |
+ PKT3_SHADER_TYPE_S(1));
+ radeon_emit(cmd_buffer->cs, 1);
+ radeon_emit(cmd_buffer->cs, va);
+ radeon_emit(cmd_buffer->cs, va >> 32);
- radeon_emit(cmd_buffer->cs, PKT3(PKT3_DISPATCH_INDIRECT, 1, 0) |
- PKT3_SHADER_TYPE_S(1));
- radeon_emit(cmd_buffer->cs, 0);
- radeon_emit(cmd_buffer->cs, 1);
+ radeon_emit(cmd_buffer->cs, PKT3(PKT3_DISPATCH_INDIRECT, 1, 0) |
+ PKT3_SHADER_TYPE_S(1));
+ radeon_emit(cmd_buffer->cs, 0);
+ radeon_emit(cmd_buffer->cs, 1);
+ }
assert(cmd_buffer->cs->cdw <= cdw_max);
}
radv_flush_compute_state(cmd_buffer);
- unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 15);
+ MAYBE_UNUSED unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 15);
radeon_set_sh_reg_seq(cmd_buffer->cs, R_00B81C_COMPUTE_NUM_THREAD_X, 3);
radeon_emit(cmd_buffer->cs,
struct radv_image *image,
VkImageLayout src_layout,
VkImageLayout dst_layout,
+ unsigned src_queue_mask,
+ unsigned dst_queue_mask,
VkImageSubresourceRange range,
VkImageAspectFlags pending_clears)
{
radv_initialise_cmask(cmd_buffer, image, 0xccccccccu);
else
radv_initialise_cmask(cmd_buffer, image, 0xffffffffu);
- } else if (radv_layout_has_cmask(image, src_layout) &&
- !radv_layout_has_cmask(image, dst_layout)) {
+ } else if (radv_layout_has_cmask(image, src_layout, src_queue_mask) &&
+ !radv_layout_has_cmask(image, dst_layout, dst_queue_mask)) {
radv_fast_clear_flush_image_inplace(cmd_buffer, image);
}
}
struct radv_image *image,
VkImageLayout src_layout,
VkImageLayout dst_layout,
+ int src_family,
+ int dst_family,
VkImageSubresourceRange range,
VkImageAspectFlags pending_clears)
{
+ if (image->exclusive && src_family != dst_family) {
+ /* This is an acquire or a release operation and there will be
+ * a corresponding release/acquire. Do the transition in the
+ * most flexible queue. */
+
+ assert(src_family == cmd_buffer->queue_family_index ||
+ dst_family == cmd_buffer->queue_family_index);
+
+ if (cmd_buffer->queue_family_index == RADV_QUEUE_TRANSFER)
+ return;
+
+ if (cmd_buffer->queue_family_index == RADV_QUEUE_COMPUTE &&
+ (src_family == RADV_QUEUE_GENERAL ||
+ dst_family == RADV_QUEUE_GENERAL))
+ return;
+ }
+
+ unsigned src_queue_mask = radv_image_queue_family_mask(image, src_family);
+ unsigned dst_queue_mask = radv_image_queue_family_mask(image, dst_family);
+
if (image->htile.size)
radv_handle_depth_image_transition(cmd_buffer, image, src_layout,
dst_layout, range, pending_clears);
if (image->cmask.size)
radv_handle_cmask_image_transition(cmd_buffer, image, src_layout,
- dst_layout, range, pending_clears);
+ dst_layout, src_queue_mask,
+ dst_queue_mask, range,
+ pending_clears);
if (image->surface.dcc_size)
radv_handle_dcc_image_transition(cmd_buffer, image, src_layout,
radv_handle_image_transition(cmd_buffer, image,
pImageMemoryBarriers[i].oldLayout,
pImageMemoryBarriers[i].newLayout,
+ pImageMemoryBarriers[i].srcQueueFamilyIndex,
+ pImageMemoryBarriers[i].dstQueueFamilyIndex,
pImageMemoryBarriers[i].subresourceRange,
0);
}
case VK_ACCESS_INDIRECT_COMMAND_READ_BIT:
case VK_ACCESS_INDEX_READ_BIT:
case VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT:
- case VK_ACCESS_UNIFORM_READ_BIT:
flush_bits |= RADV_CMD_FLAG_INV_VMEM_L1;
break;
+ case VK_ACCESS_UNIFORM_READ_BIT:
+ flush_bits |= RADV_CMD_FLAG_INV_VMEM_L1 | RADV_CMD_FLAG_INV_SMEM_L1;
+ break;
case VK_ACCESS_SHADER_READ_BIT:
flush_bits |= RADV_CMD_FLAG_INV_GLOBAL_L2;
break;
cmd_buffer->device->ws->cs_add_buffer(cs, event->bo, 8);
- unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws, cs, 12);
+ MAYBE_UNUSED unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws, cs, 12);
/* TODO: this is overkill. Probably should figure something out from
* the stage mask. */
cmd_buffer->device->ws->cs_add_buffer(cs, event->bo, 8);
- unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws, cs, 7);
+ MAYBE_UNUSED unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws, cs, 7);
radeon_emit(cs, PKT3(PKT3_WAIT_REG_MEM, 5, 0));
radeon_emit(cs, WAIT_REG_MEM_EQUAL | WAIT_REG_MEM_MEM_SPACE(1));
radv_handle_image_transition(cmd_buffer, image,
pImageMemoryBarriers[i].oldLayout,
pImageMemoryBarriers[i].newLayout,
+ pImageMemoryBarriers[i].srcQueueFamilyIndex,
+ pImageMemoryBarriers[i].dstQueueFamilyIndex,
pImageMemoryBarriers[i].subresourceRange,
0);
}