From: Dave Airlie Date: Wed, 10 Jan 2018 22:02:52 +0000 (+1000) Subject: radv: don't emit unneeded vertex state. X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=ad11fc3571e025b22d9feed80a7f499665f7a255;p=mesa.git radv: don't emit unneeded vertex state. If the number of instances hasn't changed and we've already emitted it, don't emit it again. If the vertex shader is the same and the first_instance, vertex_offset haven't changed don't emit them again. This increases the fps in GL_vs_VK -t 1 -m -api vk from around 40 to around 60 here, it may not impact anything else. Dieter also reported smoketest going from 1060->1200 fps. Reviewed-by: Bas Nieuwenhuizen Reviewed-by: Samuel Pitoiset Tested-by: Dieter Nützel Signed-off-by: Dave Airlie --- diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c index fb48691ca19..67799a13cc2 100644 --- a/src/amd/vulkan/radv_cmd_buffer.c +++ b/src/amd/vulkan/radv_cmd_buffer.c @@ -2312,6 +2312,9 @@ VkResult radv_BeginCommandBuffer( memset(&cmd_buffer->state, 0, sizeof(cmd_buffer->state)); cmd_buffer->state.last_primitive_reset_en = -1; cmd_buffer->state.last_index_type = -1; + cmd_buffer->state.last_num_instances = -1; + cmd_buffer->state.last_vertex_offset = -1; + cmd_buffer->state.last_first_instance = -1; cmd_buffer->usage_flags = pBeginInfo->flags; /* setup initial configuration into command buffer */ @@ -2733,6 +2736,10 @@ void radv_CmdBindPipeline( cmd_buffer->state.dirty |= RADV_CMD_DIRTY_PIPELINE; cmd_buffer->push_constant_stages |= pipeline->active_stages; + /* the new vertex shader might not have the same user regs */ + cmd_buffer->state.last_first_instance = -1; + cmd_buffer->state.last_vertex_offset = -1; + radv_bind_dynamic_state(cmd_buffer, &pipeline->dynamic_state); if (pipeline->graphics.esgs_ring_size > cmd_buffer->esgs_ring_size_needed) @@ -3003,6 +3010,21 @@ void radv_CmdExecuteCommands( secondary->state.last_ia_multi_vgt_param; } + if (secondary->state.last_first_instance != -1) { + primary->state.last_first_instance = + secondary->state.last_first_instance; + } + + if (secondary->state.last_num_instances != -1) { + primary->state.last_num_instances = + secondary->state.last_num_instances; + } + + if (secondary->state.last_vertex_offset != -1) { + primary->state.last_vertex_offset = + secondary->state.last_vertex_offset; + } + if (secondary->state.last_index_type != -1) { primary->state.last_index_type = secondary->state.last_index_type; @@ -3207,6 +3229,11 @@ radv_cs_emit_indirect_draw_packet(struct radv_cmd_buffer *cmd_buffer, uint32_t base_reg = cmd_buffer->state.pipeline->graphics.vtx_base_sgpr; assert(base_reg); + /* just reset draw state for vertex data */ + cmd_buffer->state.last_first_instance = -1; + cmd_buffer->state.last_num_instances = -1; + cmd_buffer->state.last_vertex_offset = -1; + if (draw_count == 1 && !count_va && !draw_id_enable) { radeon_emit(cs, PKT3(indexed ? PKT3_DRAW_INDEX_INDIRECT : PKT3_DRAW_INDIRECT, 3, false)); @@ -3326,15 +3353,25 @@ radv_emit_draw_packets(struct radv_cmd_buffer *cmd_buffer, } } else { assert(state->pipeline->graphics.vtx_base_sgpr); - radeon_set_sh_reg_seq(cs, state->pipeline->graphics.vtx_base_sgpr, - state->pipeline->graphics.vtx_emit_num); - radeon_emit(cs, info->vertex_offset); - radeon_emit(cs, info->first_instance); - if (state->pipeline->graphics.vtx_emit_num == 3) - radeon_emit(cs, 0); - radeon_emit(cs, PKT3(PKT3_NUM_INSTANCES, 0, state->predicating)); - radeon_emit(cs, info->instance_count); + if (info->vertex_offset != state->last_vertex_offset || + info->first_instance != state->last_first_instance) { + radeon_set_sh_reg_seq(cs, state->pipeline->graphics.vtx_base_sgpr, + state->pipeline->graphics.vtx_emit_num); + + radeon_emit(cs, info->vertex_offset); + radeon_emit(cs, info->first_instance); + if (state->pipeline->graphics.vtx_emit_num == 3) + radeon_emit(cs, 0); + state->last_first_instance = info->first_instance; + state->last_vertex_offset = info->vertex_offset; + } + + if (state->last_num_instances != info->instance_count) { + radeon_emit(cs, PKT3(PKT3_NUM_INSTANCES, 0, state->predicating)); + radeon_emit(cs, info->instance_count); + state->last_num_instances = info->instance_count; + } if (info->indexed) { int index_size = state->index_type ? 4 : 2; diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h index 7330dc6369a..c39358951de 100644 --- a/src/amd/vulkan/radv_private.h +++ b/src/amd/vulkan/radv_private.h @@ -913,6 +913,10 @@ struct radv_cmd_state { uint32_t valid_descriptors; uint32_t trace_id; uint32_t last_ia_multi_vgt_param; + + uint32_t last_num_instances; + uint32_t last_first_instance; + uint32_t last_vertex_offset; }; struct radv_cmd_pool {