VkImageLayout dst_layout,
uint32_t src_family,
uint32_t dst_family,
- VkImageSubresourceRange range,
+ const VkImageSubresourceRange *range,
VkImageAspectFlags pending_clears);
const struct radv_dynamic_state default_dynamic_state = {
}
static void
-radv_cmd_buffer_flush_state(struct radv_cmd_buffer *cmd_buffer)
+radv_cmd_buffer_flush_state(struct radv_cmd_buffer *cmd_buffer, bool instanced_or_indirect_draw,
+ uint32_t draw_vertex_count)
{
struct radv_pipeline *pipeline = cmd_buffer->state.pipeline;
struct radv_device *device = cmd_buffer->device;
if (cmd_buffer->state.dirty & (RADV_CMD_DIRTY_DYNAMIC_SCISSOR))
radv_emit_scissor(cmd_buffer);
+ ia_multi_vgt_param = si_get_ia_multi_vgt_param(cmd_buffer, instanced_or_indirect_draw, draw_vertex_count);
+ if (cmd_buffer->state.last_ia_multi_vgt_param != ia_multi_vgt_param) {
+ if (cmd_buffer->device->physical_device->rad_info.chip_class >= CIK)
+ radeon_set_context_reg_idx(cmd_buffer->cs, R_028AA8_IA_MULTI_VGT_PARAM, 1, ia_multi_vgt_param);
+ else
+ radeon_set_context_reg(cmd_buffer->cs, R_028AA8_IA_MULTI_VGT_PARAM, ia_multi_vgt_param);
+ cmd_buffer->state.last_ia_multi_vgt_param = ia_multi_vgt_param;
+ }
+
if (cmd_buffer->state.dirty & RADV_CMD_DIRTY_PIPELINE) {
uint32_t stages = 0;
S_028B54_VS_EN(V_028B54_VS_STAGE_COPY_SHADER);
radeon_set_context_reg(cmd_buffer->cs, R_028B54_VGT_SHADER_STAGES_EN, stages);
- ia_multi_vgt_param = si_get_ia_multi_vgt_param(cmd_buffer);
if (cmd_buffer->device->physical_device->rad_info.chip_class >= CIK) {
- radeon_set_context_reg_idx(cmd_buffer->cs, R_028AA8_IA_MULTI_VGT_PARAM, 1, ia_multi_vgt_param);
radeon_set_context_reg_idx(cmd_buffer->cs, R_028B58_VGT_LS_HS_CONFIG, 2, ls_hs_config);
radeon_set_uconfig_reg_idx(cmd_buffer->cs, R_030908_VGT_PRIMITIVE_TYPE, 1, cmd_buffer->state.pipeline->graphics.prim);
} else {
radeon_set_config_reg(cmd_buffer->cs, R_008958_VGT_PRIMITIVE_TYPE, cmd_buffer->state.pipeline->graphics.prim);
- radeon_set_context_reg(cmd_buffer->cs, R_028AA8_IA_MULTI_VGT_PARAM, ia_multi_vgt_param);
radeon_set_context_reg(cmd_buffer->cs, R_028B58_VGT_LS_HS_CONFIG, ls_hs_config);
}
radeon_set_context_reg(cmd_buffer->cs, R_028A6C_VGT_GS_OUT_PRIM_TYPE, cmd_buffer->state.pipeline->graphics.gs_out);
}
}
+static void radv_src_access_flush(struct radv_cmd_buffer *cmd_buffer,
+ VkAccessFlags src_flags)
+{
+ enum radv_cmd_flush_bits flush_bits = 0;
+ uint32_t b;
+ for_each_bit(b, src_flags) {
+ switch ((VkAccessFlagBits)(1 << b)) {
+ case VK_ACCESS_SHADER_WRITE_BIT:
+ flush_bits |= RADV_CMD_FLAG_INV_GLOBAL_L2;
+ break;
+ case VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT:
+ flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB;
+ break;
+ case VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT:
+ flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_DB;
+ break;
+ case VK_ACCESS_TRANSFER_WRITE_BIT:
+ flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB;
+ break;
+ default:
+ break;
+ }
+ }
+ cmd_buffer->state.flush_bits |= flush_bits;
+}
+
+static void radv_dst_access_flush(struct radv_cmd_buffer *cmd_buffer,
+ VkAccessFlags dst_flags)
+{
+ enum radv_cmd_flush_bits flush_bits = 0;
+ uint32_t b;
+ for_each_bit(b, dst_flags) {
+ switch ((VkAccessFlagBits)(1 << b)) {
+ case VK_ACCESS_INDIRECT_COMMAND_READ_BIT:
+ case VK_ACCESS_INDEX_READ_BIT:
+ case VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT:
+ flush_bits |= RADV_CMD_FLAG_INV_VMEM_L1;
+ break;
+ case VK_ACCESS_UNIFORM_READ_BIT:
+ flush_bits |= RADV_CMD_FLAG_INV_VMEM_L1 | RADV_CMD_FLAG_INV_SMEM_L1;
+ break;
+ case VK_ACCESS_SHADER_READ_BIT:
+ flush_bits |= RADV_CMD_FLAG_INV_GLOBAL_L2;
+ break;
+ case VK_ACCESS_COLOR_ATTACHMENT_READ_BIT:
+ case VK_ACCESS_TRANSFER_READ_BIT:
+ case VK_ACCESS_TRANSFER_WRITE_BIT:
+ case VK_ACCESS_INPUT_ATTACHMENT_READ_BIT:
+ flush_bits |= RADV_CMD_FLUSH_AND_INV_FRAMEBUFFER | RADV_CMD_FLAG_INV_GLOBAL_L2;
+ default:
+ break;
+ }
+ }
+ cmd_buffer->state.flush_bits |= flush_bits;
+}
+
static void radv_subpass_barrier(struct radv_cmd_buffer *cmd_buffer, const struct radv_subpass_barrier *barrier)
{
+ radv_src_access_flush(cmd_buffer, barrier->src_access_mask);
radv_stage_flush(cmd_buffer, barrier->src_stage_mask);
-
- /* TODO: actual cache flushes */
+ radv_dst_access_flush(cmd_buffer, barrier->dst_access_mask);
}
static void radv_handle_subpass_image_transition(struct radv_cmd_buffer *cmd_buffer,
radv_handle_image_transition(cmd_buffer,
view->image,
cmd_buffer->state.attachments[idx].current_layout,
- att.layout, 0, 0, range,
+ att.layout, 0, 0, &range,
cmd_buffer->state.attachments[idx].pending_clear_aspects);
cmd_buffer->state.attachments[idx].current_layout = att.layout;
return VK_SUCCESS;
}
+static void emit_gfx_buffer_state(struct radv_cmd_buffer *cmd_buffer)
+{
+ struct radv_device *device = cmd_buffer->device;
+ if (device->gfx_init) {
+ uint64_t va = device->ws->buffer_get_va(device->gfx_init);
+ device->ws->cs_add_buffer(cmd_buffer->cs, device->gfx_init, 8);
+ radeon_emit(cmd_buffer->cs, PKT3(PKT3_INDIRECT_BUFFER_CIK, 2, 0));
+ radeon_emit(cmd_buffer->cs, va);
+ radeon_emit(cmd_buffer->cs, (va >> 32) & 0xffff);
+ radeon_emit(cmd_buffer->cs, device->gfx_init_size_dw & 0xffff);
+ } else
+ si_init_config(cmd_buffer);
+}
+
VkResult radv_BeginCommandBuffer(
VkCommandBuffer commandBuffer,
const VkCommandBufferBeginInfo *pBeginInfo)
if (cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_PRIMARY) {
switch (cmd_buffer->queue_family_index) {
case RADV_QUEUE_GENERAL:
- /* Flush read caches at the beginning of CS not flushed by the kernel. */
- cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_INV_ICACHE |
- RADV_CMD_FLAG_PS_PARTIAL_FLUSH |
- RADV_CMD_FLAG_CS_PARTIAL_FLUSH |
- RADV_CMD_FLAG_INV_VMEM_L1 |
- RADV_CMD_FLAG_INV_SMEM_L1 |
- RADV_CMD_FLUSH_AND_INV_FRAMEBUFFER |
- RADV_CMD_FLAG_INV_GLOBAL_L2;
- si_init_config(cmd_buffer->device->physical_device, cmd_buffer);
+ emit_gfx_buffer_state(cmd_buffer);
radv_set_db_count_control(cmd_buffer);
- si_emit_cache_flush(cmd_buffer);
break;
case RADV_QUEUE_COMPUTE:
- cmd_buffer->state.flush_bits = RADV_CMD_FLAG_INV_ICACHE |
- RADV_CMD_FLAG_CS_PARTIAL_FLUSH |
- RADV_CMD_FLAG_INV_VMEM_L1 |
- RADV_CMD_FLAG_INV_SMEM_L1 |
- RADV_CMD_FLAG_INV_GLOBAL_L2;
- si_init_compute(cmd_buffer->device->physical_device, cmd_buffer);
- si_emit_cache_flush(cmd_buffer);
+ si_init_compute(cmd_buffer);
break;
case RADV_QUEUE_TRANSFER:
default:
if (cmd_buffer->ring_offsets_idx == -1)
cmd_buffer->ring_offsets_idx = loc->sgpr_idx;
else if (loc->sgpr_idx != -1)
- assert(loc->sgpr_idx != cmd_buffer->ring_offsets_idx);
+ assert(loc->sgpr_idx == cmd_buffer->ring_offsets_idx);
}
break;
default:
uint32_t firstInstance)
{
RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
- radv_cmd_buffer_flush_state(cmd_buffer);
+
+ radv_cmd_buffer_flush_state(cmd_buffer, (instanceCount > 1), vertexCount);
MAYBE_UNUSED unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 10);
uint32_t index_max_size = (cmd_buffer->state.index_buffer->size - cmd_buffer->state.index_offset) / index_size;
uint64_t index_va;
- radv_cmd_buffer_flush_state(cmd_buffer);
+ radv_cmd_buffer_flush_state(cmd_buffer, (instanceCount > 1), indexCount);
radv_emit_primitive_reset_index(cmd_buffer);
MAYBE_UNUSED unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 15);
uint32_t stride)
{
RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
- radv_cmd_buffer_flush_state(cmd_buffer);
+ radv_cmd_buffer_flush_state(cmd_buffer, true, 0);
MAYBE_UNUSED unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws,
cmd_buffer->cs, 14);
int index_size = cmd_buffer->state.index_type ? 4 : 2;
uint32_t index_max_size = (cmd_buffer->state.index_buffer->size - cmd_buffer->state.index_offset) / index_size;
uint64_t index_va;
- radv_cmd_buffer_flush_state(cmd_buffer);
+ radv_cmd_buffer_flush_state(cmd_buffer, true, 0);
radv_emit_primitive_reset_index(cmd_buffer);
index_va = cmd_buffer->device->ws->buffer_get_va(cmd_buffer->state.index_buffer->bo);
struct radv_image *image,
VkImageLayout src_layout,
VkImageLayout dst_layout,
- VkImageSubresourceRange range,
+ const VkImageSubresourceRange *range,
VkImageAspectFlags pending_clears)
{
if (dst_layout == VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL &&
!radv_layout_has_htile(image, dst_layout)) ||
(radv_layout_is_htile_compressed(image, src_layout) &&
!radv_layout_is_htile_compressed(image, dst_layout))) {
+ VkImageSubresourceRange local_range = *range;
+ local_range.aspectMask = VK_IMAGE_ASPECT_DEPTH_BIT;
+ local_range.baseMipLevel = 0;
+ local_range.levelCount = 1;
- range.aspectMask = VK_IMAGE_ASPECT_DEPTH_BIT;
- range.baseMipLevel = 0;
- range.levelCount = 1;
-
- radv_decompress_depth_image_inplace(cmd_buffer, image, &range);
+ radv_decompress_depth_image_inplace(cmd_buffer, image, &local_range);
}
}
VkImageLayout dst_layout,
unsigned src_queue_mask,
unsigned dst_queue_mask,
- VkImageSubresourceRange range,
+ const VkImageSubresourceRange *range,
VkImageAspectFlags pending_clears)
{
if (src_layout == VK_IMAGE_LAYOUT_UNDEFINED) {
radv_initialise_cmask(cmd_buffer, image, 0xffffffffu);
} else if (radv_layout_can_fast_clear(image, src_layout, src_queue_mask) &&
!radv_layout_can_fast_clear(image, dst_layout, dst_queue_mask)) {
- radv_fast_clear_flush_image_inplace(cmd_buffer, image);
+ radv_fast_clear_flush_image_inplace(cmd_buffer, image, range);
}
}
VkImageLayout dst_layout,
unsigned src_queue_mask,
unsigned dst_queue_mask,
- VkImageSubresourceRange range,
+ const VkImageSubresourceRange *range,
VkImageAspectFlags pending_clears)
{
if (src_layout == VK_IMAGE_LAYOUT_UNDEFINED) {
radv_initialize_dcc(cmd_buffer, image, 0x20202020u);
} else if (radv_layout_can_fast_clear(image, src_layout, src_queue_mask) &&
!radv_layout_can_fast_clear(image, dst_layout, dst_queue_mask)) {
- radv_fast_clear_flush_image_inplace(cmd_buffer, image);
+ radv_fast_clear_flush_image_inplace(cmd_buffer, image, range);
}
}
VkImageLayout dst_layout,
uint32_t src_family,
uint32_t dst_family,
- VkImageSubresourceRange range,
+ const VkImageSubresourceRange *range,
VkImageAspectFlags pending_clears)
{
if (image->exclusive && src_family != dst_family) {
RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
VkAccessFlags src_flags = 0;
VkAccessFlags dst_flags = 0;
- uint32_t b;
+
for (uint32_t i = 0; i < memoryBarrierCount; i++) {
src_flags |= pMemoryBarriers[i].srcAccessMask;
dst_flags |= pMemoryBarriers[i].dstAccessMask;
dst_flags |= pImageMemoryBarriers[i].dstAccessMask;
}
- enum radv_cmd_flush_bits flush_bits = 0;
- for_each_bit(b, src_flags) {
- switch ((VkAccessFlagBits)(1 << b)) {
- case VK_ACCESS_SHADER_WRITE_BIT:
- flush_bits |= RADV_CMD_FLAG_INV_GLOBAL_L2;
- break;
- case VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT:
- flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB;
- break;
- case VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT:
- flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_DB;
- break;
- case VK_ACCESS_TRANSFER_WRITE_BIT:
- flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB;
- break;
- default:
- break;
- }
- }
- cmd_buffer->state.flush_bits |= flush_bits;
+ radv_src_access_flush(cmd_buffer, src_flags);
for (uint32_t i = 0; i < imageMemoryBarrierCount; i++) {
RADV_FROM_HANDLE(radv_image, image, pImageMemoryBarriers[i].image);
pImageMemoryBarriers[i].newLayout,
pImageMemoryBarriers[i].srcQueueFamilyIndex,
pImageMemoryBarriers[i].dstQueueFamilyIndex,
- pImageMemoryBarriers[i].subresourceRange,
+ &pImageMemoryBarriers[i].subresourceRange,
0);
}
- flush_bits = 0;
-
- for_each_bit(b, dst_flags) {
- switch ((VkAccessFlagBits)(1 << b)) {
- case VK_ACCESS_INDIRECT_COMMAND_READ_BIT:
- case VK_ACCESS_INDEX_READ_BIT:
- case VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT:
- flush_bits |= RADV_CMD_FLAG_INV_VMEM_L1;
- break;
- case VK_ACCESS_UNIFORM_READ_BIT:
- flush_bits |= RADV_CMD_FLAG_INV_VMEM_L1 | RADV_CMD_FLAG_INV_SMEM_L1;
- break;
- case VK_ACCESS_SHADER_READ_BIT:
- flush_bits |= RADV_CMD_FLAG_INV_GLOBAL_L2;
- break;
- case VK_ACCESS_COLOR_ATTACHMENT_READ_BIT:
- case VK_ACCESS_TRANSFER_READ_BIT:
- case VK_ACCESS_TRANSFER_WRITE_BIT:
- case VK_ACCESS_INPUT_ATTACHMENT_READ_BIT:
- flush_bits |= RADV_CMD_FLUSH_AND_INV_FRAMEBUFFER | RADV_CMD_FLAG_INV_GLOBAL_L2;
- default:
- break;
- }
- }
+ radv_dst_access_flush(cmd_buffer, dst_flags);
- flush_bits |= RADV_CMD_FLAG_CS_PARTIAL_FLUSH |
+ /* TODO reduce this */
+ enum radv_cmd_flush_bits flush_bits = RADV_CMD_FLAG_CS_PARTIAL_FLUSH |
RADV_CMD_FLAG_PS_PARTIAL_FLUSH;
cmd_buffer->state.flush_bits |= flush_bits;
pImageMemoryBarriers[i].newLayout,
pImageMemoryBarriers[i].srcQueueFamilyIndex,
pImageMemoryBarriers[i].dstQueueFamilyIndex,
- pImageMemoryBarriers[i].subresourceRange,
+ &pImageMemoryBarriers[i].subresourceRange,
0);
}