X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Famd%2Fvulkan%2Fradv_cmd_buffer.c;h=4aa5df69674b6a623dd681583c26e0873d9586ca;hb=8e03250fcf4fc5de31e92ca4919959d932888a69;hp=f281f33dc7363745c0ef11b483db211121156dee;hpb=cda9f3d8ecce663b53c982236557dcd1b70a25b0;p=mesa.git diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c index f281f33dc73..4aa5df69674 100644 --- a/src/amd/vulkan/radv_cmd_buffer.c +++ b/src/amd/vulkan/radv_cmd_buffer.c @@ -40,7 +40,7 @@ static void radv_handle_image_transition(struct radv_cmd_buffer *cmd_buffer, VkImageLayout dst_layout, uint32_t src_family, uint32_t dst_family, - VkImageSubresourceRange range, + const VkImageSubresourceRange *range, VkImageAspectFlags pending_clears); const struct radv_dynamic_state default_dynamic_state = { @@ -1267,7 +1267,8 @@ radv_flush_constants(struct radv_cmd_buffer *cmd_buffer, } static void -radv_cmd_buffer_flush_state(struct radv_cmd_buffer *cmd_buffer) +radv_cmd_buffer_flush_state(struct radv_cmd_buffer *cmd_buffer, bool instanced_or_indirect_draw, + uint32_t draw_vertex_count) { struct radv_pipeline *pipeline = cmd_buffer->state.pipeline; struct radv_device *device = cmd_buffer->device; @@ -1331,6 +1332,15 @@ radv_cmd_buffer_flush_state(struct radv_cmd_buffer *cmd_buffer) if (cmd_buffer->state.dirty & (RADV_CMD_DIRTY_DYNAMIC_SCISSOR)) radv_emit_scissor(cmd_buffer); + ia_multi_vgt_param = si_get_ia_multi_vgt_param(cmd_buffer, instanced_or_indirect_draw, draw_vertex_count); + if (cmd_buffer->state.last_ia_multi_vgt_param != ia_multi_vgt_param) { + if (cmd_buffer->device->physical_device->rad_info.chip_class >= CIK) + radeon_set_context_reg_idx(cmd_buffer->cs, R_028AA8_IA_MULTI_VGT_PARAM, 1, ia_multi_vgt_param); + else + radeon_set_context_reg(cmd_buffer->cs, R_028AA8_IA_MULTI_VGT_PARAM, ia_multi_vgt_param); + cmd_buffer->state.last_ia_multi_vgt_param = ia_multi_vgt_param; + } + if (cmd_buffer->state.dirty & RADV_CMD_DIRTY_PIPELINE) { uint32_t stages = 0; @@ -1340,15 +1350,12 @@ radv_cmd_buffer_flush_state(struct radv_cmd_buffer *cmd_buffer) S_028B54_VS_EN(V_028B54_VS_STAGE_COPY_SHADER); radeon_set_context_reg(cmd_buffer->cs, R_028B54_VGT_SHADER_STAGES_EN, stages); - ia_multi_vgt_param = si_get_ia_multi_vgt_param(cmd_buffer); if (cmd_buffer->device->physical_device->rad_info.chip_class >= CIK) { - radeon_set_context_reg_idx(cmd_buffer->cs, R_028AA8_IA_MULTI_VGT_PARAM, 1, ia_multi_vgt_param); radeon_set_context_reg_idx(cmd_buffer->cs, R_028B58_VGT_LS_HS_CONFIG, 2, ls_hs_config); radeon_set_uconfig_reg_idx(cmd_buffer->cs, R_030908_VGT_PRIMITIVE_TYPE, 1, cmd_buffer->state.pipeline->graphics.prim); } else { radeon_set_config_reg(cmd_buffer->cs, R_008958_VGT_PRIMITIVE_TYPE, cmd_buffer->state.pipeline->graphics.prim); - radeon_set_context_reg(cmd_buffer->cs, R_028AA8_IA_MULTI_VGT_PARAM, ia_multi_vgt_param); radeon_set_context_reg(cmd_buffer->cs, R_028B58_VGT_LS_HS_CONFIG, ls_hs_config); } radeon_set_context_reg(cmd_buffer->cs, R_028A6C_VGT_GS_OUT_PRIM_TYPE, cmd_buffer->state.pipeline->graphics.gs_out); @@ -1396,11 +1403,67 @@ static void radv_stage_flush(struct radv_cmd_buffer *cmd_buffer, } } +static void radv_src_access_flush(struct radv_cmd_buffer *cmd_buffer, + VkAccessFlags src_flags) +{ + enum radv_cmd_flush_bits flush_bits = 0; + uint32_t b; + for_each_bit(b, src_flags) { + switch ((VkAccessFlagBits)(1 << b)) { + case VK_ACCESS_SHADER_WRITE_BIT: + flush_bits |= RADV_CMD_FLAG_INV_GLOBAL_L2; + break; + case VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT: + flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB; + break; + case VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT: + flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_DB; + break; + case VK_ACCESS_TRANSFER_WRITE_BIT: + flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB; + break; + default: + break; + } + } + cmd_buffer->state.flush_bits |= flush_bits; +} + +static void radv_dst_access_flush(struct radv_cmd_buffer *cmd_buffer, + VkAccessFlags dst_flags) +{ + enum radv_cmd_flush_bits flush_bits = 0; + uint32_t b; + for_each_bit(b, dst_flags) { + switch ((VkAccessFlagBits)(1 << b)) { + case VK_ACCESS_INDIRECT_COMMAND_READ_BIT: + case VK_ACCESS_INDEX_READ_BIT: + case VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT: + flush_bits |= RADV_CMD_FLAG_INV_VMEM_L1; + break; + case VK_ACCESS_UNIFORM_READ_BIT: + flush_bits |= RADV_CMD_FLAG_INV_VMEM_L1 | RADV_CMD_FLAG_INV_SMEM_L1; + break; + case VK_ACCESS_SHADER_READ_BIT: + flush_bits |= RADV_CMD_FLAG_INV_GLOBAL_L2; + break; + case VK_ACCESS_COLOR_ATTACHMENT_READ_BIT: + case VK_ACCESS_TRANSFER_READ_BIT: + case VK_ACCESS_TRANSFER_WRITE_BIT: + case VK_ACCESS_INPUT_ATTACHMENT_READ_BIT: + flush_bits |= RADV_CMD_FLUSH_AND_INV_FRAMEBUFFER | RADV_CMD_FLAG_INV_GLOBAL_L2; + default: + break; + } + } + cmd_buffer->state.flush_bits |= flush_bits; +} + static void radv_subpass_barrier(struct radv_cmd_buffer *cmd_buffer, const struct radv_subpass_barrier *barrier) { + radv_src_access_flush(cmd_buffer, barrier->src_access_mask); radv_stage_flush(cmd_buffer, barrier->src_stage_mask); - - /* TODO: actual cache flushes */ + radv_dst_access_flush(cmd_buffer, barrier->dst_access_mask); } static void radv_handle_subpass_image_transition(struct radv_cmd_buffer *cmd_buffer, @@ -1418,7 +1481,7 @@ static void radv_handle_subpass_image_transition(struct radv_cmd_buffer *cmd_buf radv_handle_image_transition(cmd_buffer, view->image, cmd_buffer->state.attachments[idx].current_layout, - att.layout, 0, 0, range, + att.layout, 0, 0, &range, cmd_buffer->state.attachments[idx].pending_clear_aspects); cmd_buffer->state.attachments[idx].current_layout = att.layout; @@ -1603,6 +1666,20 @@ VkResult radv_ResetCommandBuffer( return VK_SUCCESS; } +static void emit_gfx_buffer_state(struct radv_cmd_buffer *cmd_buffer) +{ + struct radv_device *device = cmd_buffer->device; + if (device->gfx_init) { + uint64_t va = device->ws->buffer_get_va(device->gfx_init); + device->ws->cs_add_buffer(cmd_buffer->cs, device->gfx_init, 8); + radeon_emit(cmd_buffer->cs, PKT3(PKT3_INDIRECT_BUFFER_CIK, 2, 0)); + radeon_emit(cmd_buffer->cs, va); + radeon_emit(cmd_buffer->cs, (va >> 32) & 0xffff); + radeon_emit(cmd_buffer->cs, device->gfx_init_size_dw & 0xffff); + } else + si_init_config(cmd_buffer); +} + VkResult radv_BeginCommandBuffer( VkCommandBuffer commandBuffer, const VkCommandBufferBeginInfo *pBeginInfo) @@ -1616,26 +1693,11 @@ VkResult radv_BeginCommandBuffer( if (cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_PRIMARY) { switch (cmd_buffer->queue_family_index) { case RADV_QUEUE_GENERAL: - /* Flush read caches at the beginning of CS not flushed by the kernel. */ - cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_INV_ICACHE | - RADV_CMD_FLAG_PS_PARTIAL_FLUSH | - RADV_CMD_FLAG_CS_PARTIAL_FLUSH | - RADV_CMD_FLAG_INV_VMEM_L1 | - RADV_CMD_FLAG_INV_SMEM_L1 | - RADV_CMD_FLUSH_AND_INV_FRAMEBUFFER | - RADV_CMD_FLAG_INV_GLOBAL_L2; - si_init_config(cmd_buffer->device->physical_device, cmd_buffer); + emit_gfx_buffer_state(cmd_buffer); radv_set_db_count_control(cmd_buffer); - si_emit_cache_flush(cmd_buffer); break; case RADV_QUEUE_COMPUTE: - cmd_buffer->state.flush_bits = RADV_CMD_FLAG_INV_ICACHE | - RADV_CMD_FLAG_CS_PARTIAL_FLUSH | - RADV_CMD_FLAG_INV_VMEM_L1 | - RADV_CMD_FLAG_INV_SMEM_L1 | - RADV_CMD_FLAG_INV_GLOBAL_L2; - si_init_compute(cmd_buffer->device->physical_device, cmd_buffer); - si_emit_cache_flush(cmd_buffer); + si_init_compute(cmd_buffer); break; case RADV_QUEUE_TRANSFER: default: @@ -1877,7 +1939,7 @@ void radv_CmdBindPipeline( if (cmd_buffer->ring_offsets_idx == -1) cmd_buffer->ring_offsets_idx = loc->sgpr_idx; else if (loc->sgpr_idx != -1) - assert(loc->sgpr_idx != cmd_buffer->ring_offsets_idx); + assert(loc->sgpr_idx == cmd_buffer->ring_offsets_idx); } break; default: @@ -2172,7 +2234,8 @@ void radv_CmdDraw( uint32_t firstInstance) { RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); - radv_cmd_buffer_flush_state(cmd_buffer); + + radv_cmd_buffer_flush_state(cmd_buffer, (instanceCount > 1), vertexCount); MAYBE_UNUSED unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 10); @@ -2223,7 +2286,7 @@ void radv_CmdDrawIndexed( uint32_t index_max_size = (cmd_buffer->state.index_buffer->size - cmd_buffer->state.index_offset) / index_size; uint64_t index_va; - radv_cmd_buffer_flush_state(cmd_buffer); + radv_cmd_buffer_flush_state(cmd_buffer, (instanceCount > 1), indexCount); radv_emit_primitive_reset_index(cmd_buffer); MAYBE_UNUSED unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 15); @@ -2321,7 +2384,7 @@ radv_cmd_draw_indirect_count(VkCommandBuffer command uint32_t stride) { RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); - radv_cmd_buffer_flush_state(cmd_buffer); + radv_cmd_buffer_flush_state(cmd_buffer, true, 0); MAYBE_UNUSED unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 14); @@ -2346,7 +2409,7 @@ radv_cmd_draw_indexed_indirect_count( int index_size = cmd_buffer->state.index_type ? 4 : 2; uint32_t index_max_size = (cmd_buffer->state.index_buffer->size - cmd_buffer->state.index_offset) / index_size; uint64_t index_va; - radv_cmd_buffer_flush_state(cmd_buffer); + radv_cmd_buffer_flush_state(cmd_buffer, true, 0); radv_emit_primitive_reset_index(cmd_buffer); index_va = cmd_buffer->device->ws->buffer_get_va(cmd_buffer->state.index_buffer->bo); @@ -2616,7 +2679,7 @@ static void radv_handle_depth_image_transition(struct radv_cmd_buffer *cmd_buffe struct radv_image *image, VkImageLayout src_layout, VkImageLayout dst_layout, - VkImageSubresourceRange range, + const VkImageSubresourceRange *range, VkImageAspectFlags pending_clears) { if (dst_layout == VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL && @@ -2637,12 +2700,12 @@ static void radv_handle_depth_image_transition(struct radv_cmd_buffer *cmd_buffe !radv_layout_has_htile(image, dst_layout)) || (radv_layout_is_htile_compressed(image, src_layout) && !radv_layout_is_htile_compressed(image, dst_layout))) { + VkImageSubresourceRange local_range = *range; + local_range.aspectMask = VK_IMAGE_ASPECT_DEPTH_BIT; + local_range.baseMipLevel = 0; + local_range.levelCount = 1; - range.aspectMask = VK_IMAGE_ASPECT_DEPTH_BIT; - range.baseMipLevel = 0; - range.levelCount = 1; - - radv_decompress_depth_image_inplace(cmd_buffer, image, &range); + radv_decompress_depth_image_inplace(cmd_buffer, image, &local_range); } } @@ -2667,7 +2730,7 @@ static void radv_handle_cmask_image_transition(struct radv_cmd_buffer *cmd_buffe VkImageLayout dst_layout, unsigned src_queue_mask, unsigned dst_queue_mask, - VkImageSubresourceRange range, + const VkImageSubresourceRange *range, VkImageAspectFlags pending_clears) { if (src_layout == VK_IMAGE_LAYOUT_UNDEFINED) { @@ -2677,7 +2740,7 @@ static void radv_handle_cmask_image_transition(struct radv_cmd_buffer *cmd_buffe radv_initialise_cmask(cmd_buffer, image, 0xffffffffu); } else if (radv_layout_can_fast_clear(image, src_layout, src_queue_mask) && !radv_layout_can_fast_clear(image, dst_layout, dst_queue_mask)) { - radv_fast_clear_flush_image_inplace(cmd_buffer, image); + radv_fast_clear_flush_image_inplace(cmd_buffer, image, range); } } @@ -2704,14 +2767,14 @@ static void radv_handle_dcc_image_transition(struct radv_cmd_buffer *cmd_buffer, VkImageLayout dst_layout, unsigned src_queue_mask, unsigned dst_queue_mask, - VkImageSubresourceRange range, + const VkImageSubresourceRange *range, VkImageAspectFlags pending_clears) { if (src_layout == VK_IMAGE_LAYOUT_UNDEFINED) { radv_initialize_dcc(cmd_buffer, image, 0x20202020u); } else if (radv_layout_can_fast_clear(image, src_layout, src_queue_mask) && !radv_layout_can_fast_clear(image, dst_layout, dst_queue_mask)) { - radv_fast_clear_flush_image_inplace(cmd_buffer, image); + radv_fast_clear_flush_image_inplace(cmd_buffer, image, range); } } @@ -2721,7 +2784,7 @@ static void radv_handle_image_transition(struct radv_cmd_buffer *cmd_buffer, VkImageLayout dst_layout, uint32_t src_family, uint32_t dst_family, - VkImageSubresourceRange range, + const VkImageSubresourceRange *range, VkImageAspectFlags pending_clears) { if (image->exclusive && src_family != dst_family) { @@ -2776,7 +2839,7 @@ void radv_CmdPipelineBarrier( RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); VkAccessFlags src_flags = 0; VkAccessFlags dst_flags = 0; - uint32_t b; + for (uint32_t i = 0; i < memoryBarrierCount; i++) { src_flags |= pMemoryBarriers[i].srcAccessMask; dst_flags |= pMemoryBarriers[i].dstAccessMask; @@ -2792,26 +2855,7 @@ void radv_CmdPipelineBarrier( dst_flags |= pImageMemoryBarriers[i].dstAccessMask; } - enum radv_cmd_flush_bits flush_bits = 0; - for_each_bit(b, src_flags) { - switch ((VkAccessFlagBits)(1 << b)) { - case VK_ACCESS_SHADER_WRITE_BIT: - flush_bits |= RADV_CMD_FLAG_INV_GLOBAL_L2; - break; - case VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT: - flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB; - break; - case VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT: - flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_DB; - break; - case VK_ACCESS_TRANSFER_WRITE_BIT: - flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB; - break; - default: - break; - } - } - cmd_buffer->state.flush_bits |= flush_bits; + radv_src_access_flush(cmd_buffer, src_flags); for (uint32_t i = 0; i < imageMemoryBarrierCount; i++) { RADV_FROM_HANDLE(radv_image, image, pImageMemoryBarriers[i].image); @@ -2820,36 +2864,14 @@ void radv_CmdPipelineBarrier( pImageMemoryBarriers[i].newLayout, pImageMemoryBarriers[i].srcQueueFamilyIndex, pImageMemoryBarriers[i].dstQueueFamilyIndex, - pImageMemoryBarriers[i].subresourceRange, + &pImageMemoryBarriers[i].subresourceRange, 0); } - flush_bits = 0; - - for_each_bit(b, dst_flags) { - switch ((VkAccessFlagBits)(1 << b)) { - case VK_ACCESS_INDIRECT_COMMAND_READ_BIT: - case VK_ACCESS_INDEX_READ_BIT: - case VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT: - flush_bits |= RADV_CMD_FLAG_INV_VMEM_L1; - break; - case VK_ACCESS_UNIFORM_READ_BIT: - flush_bits |= RADV_CMD_FLAG_INV_VMEM_L1 | RADV_CMD_FLAG_INV_SMEM_L1; - break; - case VK_ACCESS_SHADER_READ_BIT: - flush_bits |= RADV_CMD_FLAG_INV_GLOBAL_L2; - break; - case VK_ACCESS_COLOR_ATTACHMENT_READ_BIT: - case VK_ACCESS_TRANSFER_READ_BIT: - case VK_ACCESS_TRANSFER_WRITE_BIT: - case VK_ACCESS_INPUT_ATTACHMENT_READ_BIT: - flush_bits |= RADV_CMD_FLUSH_AND_INV_FRAMEBUFFER | RADV_CMD_FLAG_INV_GLOBAL_L2; - default: - break; - } - } + radv_dst_access_flush(cmd_buffer, dst_flags); - flush_bits |= RADV_CMD_FLAG_CS_PARTIAL_FLUSH | + /* TODO reduce this */ + enum radv_cmd_flush_bits flush_bits = RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_PS_PARTIAL_FLUSH; cmd_buffer->state.flush_bits |= flush_bits; @@ -2955,7 +2977,7 @@ void radv_CmdWaitEvents(VkCommandBuffer commandBuffer, pImageMemoryBarriers[i].newLayout, pImageMemoryBarriers[i].srcQueueFamilyIndex, pImageMemoryBarriers[i].dstQueueFamilyIndex, - pImageMemoryBarriers[i].subresourceRange, + &pImageMemoryBarriers[i].subresourceRange, 0); }