radv: Do not redundantly set the RB+ regs on pipeline switch.
authorBas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
Thu, 6 Feb 2020 15:52:52 +0000 (16:52 +0100)
committerMarge Bot <eric+marge@anholt.net>
Tue, 11 Feb 2020 04:39:42 +0000 (04:39 +0000)
No significant perf changes seen on Bayonetta. (Changes are in the
noise on my Raven Laptop)

Reviewed-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Tested-by: Marge Bot <https://gitlab.freedesktop.org/mesa/mesa/merge_requests/3735>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/merge_requests/3735>

src/amd/vulkan/radv_cmd_buffer.c
src/amd/vulkan/radv_private.h

index 12d5d60d91aba5f0817422f4f8032eabbb8a4083..d516c26d96c604706a6402c89c9e96d454a6be4f 100644 (file)
@@ -1118,13 +1118,21 @@ radv_emit_rbplus_state(struct radv_cmd_buffer *cmd_buffer)
         * breaks dual source blending in SkQP and does not seem to improve
         * performance. */
 
-       /* TODO: avoid redundantly setting context registers */
+       if (sx_ps_downconvert == cmd_buffer->state.last_sx_ps_downconvert &&
+           sx_blend_opt_epsilon == cmd_buffer->state.last_sx_blend_opt_epsilon &&
+           sx_blend_opt_control == cmd_buffer->state.last_sx_blend_opt_control)
+               return;
+
        radeon_set_context_reg_seq(cmd_buffer->cs, R_028754_SX_PS_DOWNCONVERT, 3);
        radeon_emit(cmd_buffer->cs, sx_ps_downconvert);
        radeon_emit(cmd_buffer->cs, sx_blend_opt_epsilon);
        radeon_emit(cmd_buffer->cs, sx_blend_opt_control);
 
        cmd_buffer->state.context_roll_without_scissor_emitted = true;
+
+       cmd_buffer->state.last_sx_ps_downconvert = sx_ps_downconvert;
+       cmd_buffer->state.last_sx_blend_opt_epsilon = sx_blend_opt_epsilon;
+       cmd_buffer->state.last_sx_blend_opt_control = sx_blend_opt_control;
 }
 
 static void
@@ -3368,6 +3376,9 @@ VkResult radv_BeginCommandBuffer(
        cmd_buffer->state.last_vertex_offset = -1;
        cmd_buffer->state.last_first_instance = -1;
        cmd_buffer->state.predication_type = -1;
+       cmd_buffer->state.last_sx_ps_downconvert = -1;
+       cmd_buffer->state.last_sx_blend_opt_epsilon = -1;
+       cmd_buffer->state.last_sx_blend_opt_control = -1;
        cmd_buffer->usage_flags = pBeginInfo->flags;
 
        if (cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_SECONDARY &&
@@ -4178,6 +4189,9 @@ void radv_CmdExecuteCommands(
                primary->state.last_first_instance = secondary->state.last_first_instance;
                primary->state.last_num_instances = secondary->state.last_num_instances;
                primary->state.last_vertex_offset = secondary->state.last_vertex_offset;
+               primary->state.last_sx_ps_downconvert = secondary->state.last_sx_ps_downconvert;
+               primary->state.last_sx_blend_opt_epsilon = secondary->state.last_sx_blend_opt_epsilon;
+               primary->state.last_sx_blend_opt_control = secondary->state.last_sx_blend_opt_control;
 
                if (secondary->state.last_index_type != -1) {
                        primary->state.last_index_type =
index ca7d9a084f51511c2dc644bc71f831059d581ed0..537355f6c9a62026e906d30e7aea1f043ba42d2c 100644 (file)
@@ -1269,6 +1269,10 @@ struct radv_cmd_state {
        uint32_t last_first_instance;
        uint32_t last_vertex_offset;
 
+       uint32_t last_sx_ps_downconvert;
+       uint32_t last_sx_blend_opt_epsilon;
+       uint32_t last_sx_blend_opt_control;
+
        /* Whether CP DMA is busy/idle. */
        bool dma_is_busy;