From ed9463815644c85c124c72111d96e256db2986b4 Mon Sep 17 00:00:00 2001 From: Bas Nieuwenhuizen Date: Mon, 9 Apr 2018 11:23:21 +0200 Subject: [PATCH] radv: Enable RB+ where possible. According to Marek, not enabling it on Stoney has a significant negative performance impact. (And I guess this might impact performance on Raven as well) The register settings are pretty much copied from radeonsi. I did not put this in the pipeline as that would make the pipeline more dependent on the format which mean we would have to have more pipelines for the meta shaders. v2: Don't clear RB+ regs if not enabled as the CLEAR_STATE packet does already. Reviewed-by: Samuel Pitoiset --- src/amd/vulkan/radv_cmd_buffer.c | 140 +++++++++++++++++++++++++++++++ src/amd/vulkan/radv_pipeline.c | 13 +-- src/amd/vulkan/radv_private.h | 4 + 3 files changed, 151 insertions(+), 6 deletions(-) diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c index 3b1d6aedc81..f73526b5fc8 100644 --- a/src/amd/vulkan/radv_cmd_buffer.c +++ b/src/amd/vulkan/radv_cmd_buffer.c @@ -678,6 +678,142 @@ radv_emit_prefetch_L2(struct radv_cmd_buffer *cmd_buffer, state->prefetch_L2_mask &= ~mask; } +static void +radv_emit_rbplus_state(struct radv_cmd_buffer *cmd_buffer) +{ + if (!cmd_buffer->device->physical_device->rbplus_allowed) + return; + + struct radv_pipeline *pipeline = cmd_buffer->state.pipeline; + struct radv_framebuffer *framebuffer = cmd_buffer->state.framebuffer; + const struct radv_subpass *subpass = cmd_buffer->state.subpass; + + unsigned sx_ps_downconvert = 0; + unsigned sx_blend_opt_epsilon = 0; + unsigned sx_blend_opt_control = 0; + + for (unsigned i = 0; i < subpass->color_count; ++i) { + if (subpass->color_attachments[i].attachment == VK_ATTACHMENT_UNUSED) + continue; + + int idx = subpass->color_attachments[i].attachment; + struct radv_color_buffer_info *cb = &framebuffer->attachments[idx].cb; + + unsigned format = G_028C70_FORMAT(cb->cb_color_info); + unsigned swap = G_028C70_COMP_SWAP(cb->cb_color_info); + uint32_t spi_format = (pipeline->graphics.col_format >> (i * 4)) & 0xf; + uint32_t colormask = (pipeline->graphics.cb_target_mask >> (i * 4)) & 0xf; + + bool has_alpha, has_rgb; + + /* Set if RGB and A are present. */ + has_alpha = !G_028C74_FORCE_DST_ALPHA_1(cb->cb_color_attrib); + + if (format == V_028C70_COLOR_8 || + format == V_028C70_COLOR_16 || + format == V_028C70_COLOR_32) + has_rgb = !has_alpha; + else + has_rgb = true; + + /* Check the colormask and export format. */ + if (!(colormask & 0x7)) + has_rgb = false; + if (!(colormask & 0x8)) + has_alpha = false; + + if (spi_format == V_028714_SPI_SHADER_ZERO) { + has_rgb = false; + has_alpha = false; + } + + /* Disable value checking for disabled channels. */ + if (!has_rgb) + sx_blend_opt_control |= S_02875C_MRT0_COLOR_OPT_DISABLE(1) << (i * 4); + if (!has_alpha) + sx_blend_opt_control |= S_02875C_MRT0_ALPHA_OPT_DISABLE(1) << (i * 4); + + /* Enable down-conversion for 32bpp and smaller formats. */ + switch (format) { + case V_028C70_COLOR_8: + case V_028C70_COLOR_8_8: + case V_028C70_COLOR_8_8_8_8: + /* For 1 and 2-channel formats, use the superset thereof. */ + if (spi_format == V_028714_SPI_SHADER_FP16_ABGR || + spi_format == V_028714_SPI_SHADER_UINT16_ABGR || + spi_format == V_028714_SPI_SHADER_SINT16_ABGR) { + sx_ps_downconvert |= V_028754_SX_RT_EXPORT_8_8_8_8 << (i * 4); + sx_blend_opt_epsilon |= V_028758_8BIT_FORMAT << (i * 4); + } + break; + + case V_028C70_COLOR_5_6_5: + if (spi_format == V_028714_SPI_SHADER_FP16_ABGR) { + sx_ps_downconvert |= V_028754_SX_RT_EXPORT_5_6_5 << (i * 4); + sx_blend_opt_epsilon |= V_028758_6BIT_FORMAT << (i * 4); + } + break; + + case V_028C70_COLOR_1_5_5_5: + if (spi_format == V_028714_SPI_SHADER_FP16_ABGR) { + sx_ps_downconvert |= V_028754_SX_RT_EXPORT_1_5_5_5 << (i * 4); + sx_blend_opt_epsilon |= V_028758_5BIT_FORMAT << (i * 4); + } + break; + + case V_028C70_COLOR_4_4_4_4: + if (spi_format == V_028714_SPI_SHADER_FP16_ABGR) { + sx_ps_downconvert |= V_028754_SX_RT_EXPORT_4_4_4_4 << (i * 4); + sx_blend_opt_epsilon |= V_028758_4BIT_FORMAT << (i * 4); + } + break; + + case V_028C70_COLOR_32: + if (swap == V_028C70_SWAP_STD && + spi_format == V_028714_SPI_SHADER_32_R) + sx_ps_downconvert |= V_028754_SX_RT_EXPORT_32_R << (i * 4); + else if (swap == V_028C70_SWAP_ALT_REV && + spi_format == V_028714_SPI_SHADER_32_AR) + sx_ps_downconvert |= V_028754_SX_RT_EXPORT_32_A << (i * 4); + break; + + case V_028C70_COLOR_16: + case V_028C70_COLOR_16_16: + /* For 1-channel formats, use the superset thereof. */ + if (spi_format == V_028714_SPI_SHADER_UNORM16_ABGR || + spi_format == V_028714_SPI_SHADER_SNORM16_ABGR || + spi_format == V_028714_SPI_SHADER_UINT16_ABGR || + spi_format == V_028714_SPI_SHADER_SINT16_ABGR) { + if (swap == V_028C70_SWAP_STD || + swap == V_028C70_SWAP_STD_REV) + sx_ps_downconvert |= V_028754_SX_RT_EXPORT_16_16_GR << (i * 4); + else + sx_ps_downconvert |= V_028754_SX_RT_EXPORT_16_16_AR << (i * 4); + } + break; + + case V_028C70_COLOR_10_11_11: + if (spi_format == V_028714_SPI_SHADER_FP16_ABGR) { + sx_ps_downconvert |= V_028754_SX_RT_EXPORT_10_11_11 << (i * 4); + sx_blend_opt_epsilon |= V_028758_11BIT_FORMAT << (i * 4); + } + break; + + case V_028C70_COLOR_2_10_10_10: + if (spi_format == V_028714_SPI_SHADER_FP16_ABGR) { + sx_ps_downconvert |= V_028754_SX_RT_EXPORT_2_10_10_10 << (i * 4); + sx_blend_opt_epsilon |= V_028758_10BIT_FORMAT << (i * 4); + } + break; + } + } + + radeon_set_context_reg_seq(cmd_buffer->cs, R_028754_SX_PS_DOWNCONVERT, 3); + radeon_emit(cmd_buffer->cs, sx_ps_downconvert); + radeon_emit(cmd_buffer->cs, sx_blend_opt_epsilon); + radeon_emit(cmd_buffer->cs, sx_blend_opt_control); +} + static void radv_emit_graphics_pipeline(struct radv_cmd_buffer *cmd_buffer) { @@ -3005,6 +3141,10 @@ static void radv_emit_all_graphics_states(struct radv_cmd_buffer *cmd_buffer, const struct radv_draw_info *info) { + if ((cmd_buffer->state.dirty & RADV_CMD_DIRTY_FRAMEBUFFER) || + cmd_buffer->state.emitted_pipeline != cmd_buffer->state.pipeline) + radv_emit_rbplus_state(cmd_buffer); + if (cmd_buffer->state.dirty & RADV_CMD_DIRTY_PIPELINE) radv_emit_graphics_pipeline(cmd_buffer); diff --git a/src/amd/vulkan/radv_pipeline.c b/src/amd/vulkan/radv_pipeline.c index 08abb9dbc47..6735b36846a 100644 --- a/src/amd/vulkan/radv_pipeline.c +++ b/src/amd/vulkan/radv_pipeline.c @@ -2543,17 +2543,15 @@ radv_pipeline_generate_blend_state(struct radeon_winsys_cs *cs, radeon_set_context_reg_seq(cs, R_028760_SX_MRT0_BLEND_OPT, 8); radeon_emit_array(cs, blend->sx_mrt_blend_opt, 8); - - radeon_set_context_reg_seq(cs, R_028754_SX_PS_DOWNCONVERT, 3); - radeon_emit(cs, 0); /* R_028754_SX_PS_DOWNCONVERT */ - radeon_emit(cs, 0); /* R_028758_SX_BLEND_OPT_EPSILON */ - radeon_emit(cs, 0); /* R_02875C_SX_BLEND_OPT_CONTROL */ } radeon_set_context_reg(cs, R_028714_SPI_SHADER_COL_FORMAT, blend->spi_shader_col_format); radeon_set_context_reg(cs, R_028238_CB_TARGET_MASK, blend->cb_target_mask); radeon_set_context_reg(cs, R_02823C_CB_SHADER_MASK, blend->cb_shader_mask); + + pipeline->graphics.col_format = blend->spi_shader_col_format; + pipeline->graphics.cb_target_mask = blend->cb_target_mask; } @@ -2993,6 +2991,9 @@ radv_compute_db_shader_control(const struct radv_device *device, else z_order = V_02880C_LATE_Z; + bool disable_rbplus = device->physical_device->has_rbplus && + !device->physical_device->rbplus_allowed; + return S_02880C_Z_EXPORT_ENABLE(ps->info.info.ps.writes_z) | S_02880C_STENCIL_TEST_VAL_EXPORT_ENABLE(ps->info.info.ps.writes_stencil) | S_02880C_KILL_ENABLE(!!ps->info.fs.can_discard) | @@ -3001,7 +3002,7 @@ radv_compute_db_shader_control(const struct radv_device *device, S_02880C_DEPTH_BEFORE_SHADER(ps->info.fs.early_fragment_test) | S_02880C_EXEC_ON_HIER_FAIL(ps->info.info.ps.writes_memory) | S_02880C_EXEC_ON_NOOP(ps->info.info.ps.writes_memory) | - S_02880C_DUAL_QUAD_DISABLE(!!device->physical_device->has_rbplus); + S_02880C_DUAL_QUAD_DISABLE(disable_rbplus); } static void diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h index 31748910ad8..1bcc3a906ec 100644 --- a/src/amd/vulkan/radv_private.h +++ b/src/amd/vulkan/radv_private.h @@ -1238,6 +1238,10 @@ struct radv_pipeline { bool can_use_guardband; uint32_t needed_dynamic_state; bool disable_out_of_order_rast_for_occlusion; + + /* Used for rbplus */ + uint32_t col_format; + uint32_t cb_target_mask; } graphics; }; -- 2.30.2