From 7a5e6fd25f2e132ef4cacc3a5b714c4e153227b0 Mon Sep 17 00:00:00 2001 From: Samuel Pitoiset Date: Thu, 11 Jun 2020 21:54:01 +0200 Subject: [PATCH] radv: add support for MRTs compaction to avoid holes SPI_SHADER_COL_FORMAT allocates export memory and CB_SHADER_MASK map them to higher MRTs if necessary. The hardware allows to remap MRTs to avoid holes somehow. For example, if we have a scenario where MRT0 is unused and only MRT1 and MRT2 are used, SPI_SHADER_COL_FORMAT is 0x77 and CB_SHADER_MASK/CB_TARGET_MASK are 0x770 (this assumes SPI_SHADER_UINT16_ABGR is set). This allows us to remove one workaround that was added for fixing GPU hangs with DXVK. I think this is because SPI_SHADER_COL_FORMAT expects contiguous MRTs to be allocated. Signed-off-by: Samuel Pitoiset Reviewed-by: Bas Nieuwenhuizen Part-of: --- .../compiler/aco_instruction_selection.cpp | 18 ++++--- src/amd/vulkan/radv_nir_to_llvm.c | 3 +- src/amd/vulkan/radv_pipeline.c | 51 ++++++++++--------- src/amd/vulkan/radv_shader_info.c | 12 ----- 4 files changed, 41 insertions(+), 43 deletions(-) diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp index de435ff7834..cd2263963fc 100644 --- a/src/amd/compiler/aco_instruction_selection.cpp +++ b/src/amd/compiler/aco_instruction_selection.cpp @@ -9979,15 +9979,15 @@ static bool export_fs_mrt_z(isel_context *ctx) return true; } -static bool export_fs_mrt_color(isel_context *ctx, int slot) +static bool export_fs_mrt_color(isel_context *ctx, int slot, + unsigned write_mask, Temp *outputs) { Builder bld(ctx->program, ctx->block); - unsigned write_mask = ctx->outputs.mask[slot]; Operand values[4]; for (unsigned i = 0; i < 4; ++i) { if (write_mask & (1 << i)) { - values[i] = Operand(ctx->outputs.temps[slot * 4u + i]); + values[i] = Operand(outputs[i]); } else { values[i] = Operand(v1); } @@ -9997,7 +9997,6 @@ static bool export_fs_mrt_color(isel_context *ctx, int slot) unsigned enabled_channels = 0; aco_opcode compr_op = (aco_opcode)0; - slot -= FRAG_RESULT_DATA0; target = V_008DFC_SQ_EXP_MRT + slot; col_format = (ctx->options->key.fs.col_format >> (4 * slot)) & 0xf; @@ -10182,6 +10181,7 @@ static bool export_fs_mrt_color(isel_context *ctx, int slot) static void create_fs_exports(isel_context *ctx) { + unsigned compacted_mrt_index = 0; bool exported = false; /* Export depth, stencil and sample mask. */ @@ -10191,9 +10191,15 @@ static void create_fs_exports(isel_context *ctx) exported |= export_fs_mrt_z(ctx); /* Export all color render targets. */ - for (unsigned i = FRAG_RESULT_DATA0; i < FRAG_RESULT_DATA7 + 1; ++i) + for (unsigned i = FRAG_RESULT_DATA0; i < FRAG_RESULT_DATA7 + 1; ++i) { if (ctx->outputs.mask[i]) - exported |= export_fs_mrt_color(ctx, i); + if (export_fs_mrt_color(ctx, compacted_mrt_index, + ctx->outputs.mask[i], + &ctx->outputs.temps[i * 4u])) { + compacted_mrt_index++; + exported = true; + } + } if (!exported) create_null_export(ctx); diff --git a/src/amd/vulkan/radv_nir_to_llvm.c b/src/amd/vulkan/radv_nir_to_llvm.c index 987d1054356..0c81138f63d 100644 --- a/src/amd/vulkan/radv_nir_to_llvm.c +++ b/src/amd/vulkan/radv_nir_to_llvm.c @@ -3586,8 +3586,7 @@ handle_fs_outputs_post(struct radv_shader_context *ctx) values[j] = ac_to_float(&ctx->ac, radv_load_output(ctx, i, j)); - bool ret = si_export_mrt_color(ctx, values, - i - FRAG_RESULT_DATA0, + bool ret = si_export_mrt_color(ctx, values, index, &color_args[index]); if (ret) index++; diff --git a/src/amd/vulkan/radv_pipeline.c b/src/amd/vulkan/radv_pipeline.c index e5c0723c707..5e7ea7e64ba 100644 --- a/src/amd/vulkan/radv_pipeline.c +++ b/src/amd/vulkan/radv_pipeline.c @@ -511,8 +511,10 @@ radv_pipeline_compute_spi_color_formats(struct radv_pipeline *pipeline, { RADV_FROM_HANDLE(radv_render_pass, pass, pCreateInfo->renderPass); struct radv_subpass *subpass = pass->subpasses + pCreateInfo->subpass; - unsigned col_format = 0, is_int8 = 0, is_int10 = 0; - unsigned num_targets; + unsigned exp_fmt[MAX_RTS] = {0}; + unsigned is_int8[MAX_RTS] = {0}, is_int10[MAX_RTS] = {0}; + unsigned col_format = 0; + unsigned col_format_is_int8 = 0, col_format_is_int10 = 0; for (unsigned i = 0; i < (blend->single_cb_enable ? 1 : subpass->color_count); ++i) { unsigned cf; @@ -529,42 +531,45 @@ radv_pipeline_compute_spi_color_formats(struct radv_pipeline *pipeline, blend_enable, blend->need_src_alpha & (1 << i)); - if (format_is_int8(attachment->format)) - is_int8 |= 1 << i; - if (format_is_int10(attachment->format)) - is_int10 |= 1 << i; + is_int8[i] = format_is_int8(attachment->format); + is_int10[i] = format_is_int10(attachment->format); } - col_format |= cf << (4 * i); + exp_fmt[i] = cf; } - if (!(col_format & 0xf) && blend->need_src_alpha & (1 << 0)) { + if (!exp_fmt[0] && blend->need_src_alpha & (1 << 0)) { /* When a subpass doesn't have any color attachments, write the * alpha channel of MRT0 when alpha coverage is enabled because * the depth attachment needs it. */ - col_format |= V_028714_SPI_SHADER_32_AR; - } - - /* If the i-th target format is set, all previous target formats must - * be non-zero to avoid hangs. - */ - num_targets = (util_last_bit(col_format) + 3) / 4; - for (unsigned i = 0; i < num_targets; i++) { - if (!(col_format & (0xf << (i * 4)))) { - col_format |= V_028714_SPI_SHADER_32_R << (i * 4); - } + exp_fmt[0] = V_028714_SPI_SHADER_32_AR; } /* The output for dual source blending should have the same format as * the first output. */ - if (blend->mrt0_is_dual_src) - col_format |= (col_format & 0xf) << 4; + if (blend->mrt0_is_dual_src) { + col_format |= (exp_fmt[0] << 4) | exp_fmt[0]; + col_format_is_int8 |= (is_int8[0] << 1) | is_int8[0]; + col_format_is_int10 |= (is_int10[0] << 1) | is_int10[0]; + } else { + /* Remove holes in SPI_SHADER_COL_FORMAT. */ + unsigned num_color_targets = 0; + for (unsigned i = 0; i < MAX_RTS; i++) { + if (!exp_fmt[i]) + continue; + + col_format |= exp_fmt[i] << (4 * num_color_targets); + col_format_is_int8 |= is_int8[i] << num_color_targets; + col_format_is_int10 |= is_int10[i] << num_color_targets; + num_color_targets++; + } + } blend->spi_shader_col_format = col_format; - blend->col_format_is_int8 = is_int8; - blend->col_format_is_int10 = is_int10; + blend->col_format_is_int8 = col_format_is_int8; + blend->col_format_is_int10 = col_format_is_int10; } /* diff --git a/src/amd/vulkan/radv_shader_info.c b/src/amd/vulkan/radv_shader_info.c index eca46c81157..62a5f170f7c 100644 --- a/src/amd/vulkan/radv_shader_info.c +++ b/src/amd/vulkan/radv_shader_info.c @@ -846,18 +846,6 @@ radv_nir_shader_info_pass(const struct nir_shader *nir, info->float_controls_mode = nir->info.float_controls_execution_mode; if (nir->info.stage == MESA_SHADER_FRAGMENT) { - /* If the i-th output is used, all previous outputs must be - * non-zero to match the target format. - * TODO: compact MRT to avoid holes and to remove this - * workaround. - */ - unsigned num_targets = (util_last_bit(info->ps.cb_shader_mask) + 3) / 4; - for (unsigned i = 0; i < num_targets; i++) { - if (!(info->ps.cb_shader_mask & (0xf << (i * 4)))) { - info->ps.cb_shader_mask |= 0xf << (i * 4); - } - } - if (key->fs.is_dual_src) { info->ps.cb_shader_mask |= (info->ps.cb_shader_mask & 0xf) << 4; } -- 2.30.2