radv: Enable RB+ where possible.
authorBas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
Mon, 9 Apr 2018 09:23:21 +0000 (11:23 +0200)
committerBas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
Tue, 10 Apr 2018 23:19:10 +0000 (01:19 +0200)
According to Marek, not enabling it on Stoney has a significant
negative performance impact. (And I guess this might impact
performance on Raven as well)

The register settings are pretty much copied from radeonsi. I did
not put this in the pipeline as that would make the pipeline more
dependent on the format which mean we would have to have more
pipelines for the meta shaders.

v2: Don't clear RB+ regs if not enabled as the CLEAR_STATE packet
    does already.
Reviewed-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
src/amd/vulkan/radv_cmd_buffer.c
src/amd/vulkan/radv_pipeline.c
src/amd/vulkan/radv_private.h

index 3b1d6aedc81a43b78c1647050cc938bbe08c35ea..f73526b5fc8b5099cf08c00e73f45eaa1e57aa3e 100644 (file)
@@ -678,6 +678,142 @@ radv_emit_prefetch_L2(struct radv_cmd_buffer *cmd_buffer,
        state->prefetch_L2_mask &= ~mask;
 }
 
+static void
+radv_emit_rbplus_state(struct radv_cmd_buffer *cmd_buffer)
+{
+       if (!cmd_buffer->device->physical_device->rbplus_allowed)
+               return;
+
+       struct radv_pipeline *pipeline = cmd_buffer->state.pipeline;
+       struct radv_framebuffer *framebuffer = cmd_buffer->state.framebuffer;
+       const struct radv_subpass *subpass = cmd_buffer->state.subpass;
+
+       unsigned sx_ps_downconvert = 0;
+       unsigned sx_blend_opt_epsilon = 0;
+       unsigned sx_blend_opt_control = 0;
+
+       for (unsigned i = 0; i < subpass->color_count; ++i) {
+               if (subpass->color_attachments[i].attachment == VK_ATTACHMENT_UNUSED)
+                       continue;
+
+               int idx = subpass->color_attachments[i].attachment;
+               struct radv_color_buffer_info *cb = &framebuffer->attachments[idx].cb;
+
+               unsigned format = G_028C70_FORMAT(cb->cb_color_info);
+               unsigned swap = G_028C70_COMP_SWAP(cb->cb_color_info);
+               uint32_t spi_format = (pipeline->graphics.col_format >> (i * 4)) & 0xf;
+               uint32_t colormask = (pipeline->graphics.cb_target_mask >> (i * 4)) & 0xf;
+
+               bool has_alpha, has_rgb;
+
+               /* Set if RGB and A are present. */
+               has_alpha = !G_028C74_FORCE_DST_ALPHA_1(cb->cb_color_attrib);
+
+               if (format == V_028C70_COLOR_8 ||
+                   format == V_028C70_COLOR_16 ||
+                   format == V_028C70_COLOR_32)
+                       has_rgb = !has_alpha;
+               else
+                       has_rgb = true;
+
+               /* Check the colormask and export format. */
+               if (!(colormask & 0x7))
+                       has_rgb = false;
+               if (!(colormask & 0x8))
+                       has_alpha = false;
+
+               if (spi_format == V_028714_SPI_SHADER_ZERO) {
+                       has_rgb = false;
+                       has_alpha = false;
+               }
+
+               /* Disable value checking for disabled channels. */
+               if (!has_rgb)
+                       sx_blend_opt_control |= S_02875C_MRT0_COLOR_OPT_DISABLE(1) << (i * 4);
+               if (!has_alpha)
+                       sx_blend_opt_control |= S_02875C_MRT0_ALPHA_OPT_DISABLE(1) << (i * 4);
+
+               /* Enable down-conversion for 32bpp and smaller formats. */
+               switch (format) {
+               case V_028C70_COLOR_8:
+               case V_028C70_COLOR_8_8:
+               case V_028C70_COLOR_8_8_8_8:
+                       /* For 1 and 2-channel formats, use the superset thereof. */
+                       if (spi_format == V_028714_SPI_SHADER_FP16_ABGR ||
+                           spi_format == V_028714_SPI_SHADER_UINT16_ABGR ||
+                           spi_format == V_028714_SPI_SHADER_SINT16_ABGR) {
+                               sx_ps_downconvert |= V_028754_SX_RT_EXPORT_8_8_8_8 << (i * 4);
+                               sx_blend_opt_epsilon |= V_028758_8BIT_FORMAT << (i * 4);
+                       }
+                       break;
+
+               case V_028C70_COLOR_5_6_5:
+                       if (spi_format == V_028714_SPI_SHADER_FP16_ABGR) {
+                               sx_ps_downconvert |= V_028754_SX_RT_EXPORT_5_6_5 << (i * 4);
+                               sx_blend_opt_epsilon |= V_028758_6BIT_FORMAT << (i * 4);
+                       }
+                       break;
+
+               case V_028C70_COLOR_1_5_5_5:
+                       if (spi_format == V_028714_SPI_SHADER_FP16_ABGR) {
+                               sx_ps_downconvert |= V_028754_SX_RT_EXPORT_1_5_5_5 << (i * 4);
+                               sx_blend_opt_epsilon |= V_028758_5BIT_FORMAT << (i * 4);
+                       }
+                       break;
+
+               case V_028C70_COLOR_4_4_4_4:
+                       if (spi_format == V_028714_SPI_SHADER_FP16_ABGR) {
+                               sx_ps_downconvert |= V_028754_SX_RT_EXPORT_4_4_4_4 << (i * 4);
+                               sx_blend_opt_epsilon |= V_028758_4BIT_FORMAT << (i * 4);
+                       }
+                       break;
+
+               case V_028C70_COLOR_32:
+                       if (swap == V_028C70_SWAP_STD &&
+                           spi_format == V_028714_SPI_SHADER_32_R)
+                               sx_ps_downconvert |= V_028754_SX_RT_EXPORT_32_R << (i * 4);
+                       else if (swap == V_028C70_SWAP_ALT_REV &&
+                                spi_format == V_028714_SPI_SHADER_32_AR)
+                               sx_ps_downconvert |= V_028754_SX_RT_EXPORT_32_A << (i * 4);
+                       break;
+
+               case V_028C70_COLOR_16:
+               case V_028C70_COLOR_16_16:
+                       /* For 1-channel formats, use the superset thereof. */
+                       if (spi_format == V_028714_SPI_SHADER_UNORM16_ABGR ||
+                           spi_format == V_028714_SPI_SHADER_SNORM16_ABGR ||
+                           spi_format == V_028714_SPI_SHADER_UINT16_ABGR ||
+                           spi_format == V_028714_SPI_SHADER_SINT16_ABGR) {
+                               if (swap == V_028C70_SWAP_STD ||
+                                   swap == V_028C70_SWAP_STD_REV)
+                                       sx_ps_downconvert |= V_028754_SX_RT_EXPORT_16_16_GR << (i * 4);
+                               else
+                                       sx_ps_downconvert |= V_028754_SX_RT_EXPORT_16_16_AR << (i * 4);
+                       }
+                       break;
+
+               case V_028C70_COLOR_10_11_11:
+                       if (spi_format == V_028714_SPI_SHADER_FP16_ABGR) {
+                               sx_ps_downconvert |= V_028754_SX_RT_EXPORT_10_11_11 << (i * 4);
+                               sx_blend_opt_epsilon |= V_028758_11BIT_FORMAT << (i * 4);
+                       }
+                       break;
+
+               case V_028C70_COLOR_2_10_10_10:
+                       if (spi_format == V_028714_SPI_SHADER_FP16_ABGR) {
+                               sx_ps_downconvert |= V_028754_SX_RT_EXPORT_2_10_10_10 << (i * 4);
+                               sx_blend_opt_epsilon |= V_028758_10BIT_FORMAT << (i * 4);
+                       }
+                       break;
+               }
+       }
+
+       radeon_set_context_reg_seq(cmd_buffer->cs, R_028754_SX_PS_DOWNCONVERT, 3);
+       radeon_emit(cmd_buffer->cs, sx_ps_downconvert);
+       radeon_emit(cmd_buffer->cs, sx_blend_opt_epsilon);
+       radeon_emit(cmd_buffer->cs, sx_blend_opt_control);
+}
+
 static void
 radv_emit_graphics_pipeline(struct radv_cmd_buffer *cmd_buffer)
 {
@@ -3005,6 +3141,10 @@ static void
 radv_emit_all_graphics_states(struct radv_cmd_buffer *cmd_buffer,
                              const struct radv_draw_info *info)
 {
+       if ((cmd_buffer->state.dirty & RADV_CMD_DIRTY_FRAMEBUFFER) ||
+           cmd_buffer->state.emitted_pipeline != cmd_buffer->state.pipeline)
+               radv_emit_rbplus_state(cmd_buffer);
+
        if (cmd_buffer->state.dirty & RADV_CMD_DIRTY_PIPELINE)
                radv_emit_graphics_pipeline(cmd_buffer);
 
index 08abb9dbc47550050e9fdd5c3f9b38759b7a196c..6735b36846ac06bdca1404782e53e52dcea05204 100644 (file)
@@ -2543,17 +2543,15 @@ radv_pipeline_generate_blend_state(struct radeon_winsys_cs *cs,
 
                radeon_set_context_reg_seq(cs, R_028760_SX_MRT0_BLEND_OPT, 8);
                radeon_emit_array(cs, blend->sx_mrt_blend_opt, 8);
-
-               radeon_set_context_reg_seq(cs, R_028754_SX_PS_DOWNCONVERT, 3);
-               radeon_emit(cs, 0);     /* R_028754_SX_PS_DOWNCONVERT */
-               radeon_emit(cs, 0);     /* R_028758_SX_BLEND_OPT_EPSILON */
-               radeon_emit(cs, 0);     /* R_02875C_SX_BLEND_OPT_CONTROL */
        }
 
        radeon_set_context_reg(cs, R_028714_SPI_SHADER_COL_FORMAT, blend->spi_shader_col_format);
 
        radeon_set_context_reg(cs, R_028238_CB_TARGET_MASK, blend->cb_target_mask);
        radeon_set_context_reg(cs, R_02823C_CB_SHADER_MASK, blend->cb_shader_mask);
+
+       pipeline->graphics.col_format = blend->spi_shader_col_format;
+       pipeline->graphics.cb_target_mask = blend->cb_target_mask;
 }
 
 
@@ -2993,6 +2991,9 @@ radv_compute_db_shader_control(const struct radv_device *device,
        else
                z_order = V_02880C_LATE_Z;
 
+       bool disable_rbplus = device->physical_device->has_rbplus &&
+                             !device->physical_device->rbplus_allowed;
+
        return  S_02880C_Z_EXPORT_ENABLE(ps->info.info.ps.writes_z) |
                S_02880C_STENCIL_TEST_VAL_EXPORT_ENABLE(ps->info.info.ps.writes_stencil) |
                S_02880C_KILL_ENABLE(!!ps->info.fs.can_discard) |
@@ -3001,7 +3002,7 @@ radv_compute_db_shader_control(const struct radv_device *device,
                S_02880C_DEPTH_BEFORE_SHADER(ps->info.fs.early_fragment_test) |
                S_02880C_EXEC_ON_HIER_FAIL(ps->info.info.ps.writes_memory) |
                S_02880C_EXEC_ON_NOOP(ps->info.info.ps.writes_memory) |
-               S_02880C_DUAL_QUAD_DISABLE(!!device->physical_device->has_rbplus);
+               S_02880C_DUAL_QUAD_DISABLE(disable_rbplus);
 }
 
 static void
index 31748910ad81b5becb4328fdbb7fb58051a5990b..1bcc3a906ecacec83b996d1e1991aae6fb4696e0 100644 (file)
@@ -1238,6 +1238,10 @@ struct radv_pipeline {
                        bool can_use_guardband;
                        uint32_t needed_dynamic_state;
                        bool disable_out_of_order_rast_for_occlusion;
+
+                       /* Used for rbplus */
+                       uint32_t col_format;
+                       uint32_t cb_target_mask;
                } graphics;
        };