From: Marek Olšák Date: Wed, 17 Jun 2020 22:04:53 +0000 (-0400) Subject: radeonsi: compact MRTs to save PS export memory space X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=3fec2f67c3d9424c8c0fdfaeef5d906e27453cd6;p=mesa.git radeonsi: compact MRTs to save PS export memory space If there are holes between color outputs (e.g. a shader exports MRT1, but not MRT0), we can remove the holes by moving higher MRTs lower. The hardware will remap the MRTs to their correct locations if we remove holes in SPI_SHADER_COL_FORMAT but not CB_SHADER_MASK. This is a performance optimization, but MRTs with holes are pretty rare, so there is most likely no effect on any app. Acked-by: Pierre-Eric Pelloux-Prayer Part-of: --- diff --git a/src/gallium/drivers/radeonsi/si_shader_llvm_ps.c b/src/gallium/drivers/radeonsi/si_shader_llvm_ps.c index d04b28d64cb..c5d2cd56f79 100644 --- a/src/gallium/drivers/radeonsi/si_shader_llvm_ps.c +++ b/src/gallium/drivers/radeonsi/si_shader_llvm_ps.c @@ -254,7 +254,8 @@ static void si_export_mrt_z(struct si_shader_context *ctx, LLVMValueRef depth, L /* Initialize arguments for the shader export intrinsic */ static void si_llvm_init_ps_export_args(struct si_shader_context *ctx, LLVMValueRef *values, - unsigned target, struct ac_export_args *args) + unsigned cbuf, unsigned compacted_mrt_index, + struct ac_export_args *args) { const struct si_shader_key *key = &ctx->shader->key; unsigned col_formats = key->part.ps.epilog.spi_shader_col_format; @@ -262,7 +263,6 @@ static void si_llvm_init_ps_export_args(struct si_shader_context *ctx, LLVMValue unsigned spi_shader_col_format; unsigned chan; bool is_int8, is_int10; - int cbuf = target - V_008DFC_SQ_EXP_MRT; assert(cbuf >= 0 && cbuf < 8); @@ -280,7 +280,7 @@ static void si_llvm_init_ps_export_args(struct si_shader_context *ctx, LLVMValue args->done = 0; /* Specify the target we are exporting */ - args->target = target; + args->target = V_008DFC_SQ_EXP_MRT + compacted_mrt_index; args->compr = false; args->out[0] = f32undef; @@ -371,8 +371,9 @@ static void si_llvm_init_ps_export_args(struct si_shader_context *ctx, LLVMValue } } -static void si_export_mrt_color(struct si_shader_context *ctx, LLVMValueRef *color, unsigned index, - unsigned samplemask_param, bool is_last, struct si_ps_exports *exp) +static bool si_export_mrt_color(struct si_shader_context *ctx, LLVMValueRef *color, unsigned index, + unsigned compacted_mrt_index, unsigned samplemask_param, + bool is_last, struct si_ps_exports *exp) { int i; @@ -398,12 +399,18 @@ static void si_export_mrt_color(struct si_shader_context *ctx, LLVMValueRef *col struct ac_export_args args[8]; int c, last = -1; + assert(compacted_mrt_index == 0); + /* Get the export arguments, also find out what the last one is. */ for (c = 0; c <= ctx->shader->key.part.ps.epilog.last_cbuf; c++) { - si_llvm_init_ps_export_args(ctx, color, V_008DFC_SQ_EXP_MRT + c, &args[c]); - if (args[c].enabled_channels) + si_llvm_init_ps_export_args(ctx, color, c, compacted_mrt_index, &args[c]); + if (args[c].enabled_channels) { + compacted_mrt_index++; last = c; + } } + if (last == -1) + return false; /* Emit all exports. */ for (c = 0; c <= ctx->shader->key.part.ps.epilog.last_cbuf; c++) { @@ -419,15 +426,16 @@ static void si_export_mrt_color(struct si_shader_context *ctx, LLVMValueRef *col struct ac_export_args args; /* Export */ - si_llvm_init_ps_export_args(ctx, color, V_008DFC_SQ_EXP_MRT + index, &args); + si_llvm_init_ps_export_args(ctx, color, index, compacted_mrt_index, &args); if (is_last) { args.valid_mask = 1; /* whether the EXEC mask is valid */ args.done = 1; /* DONE bit */ } else if (!args.enabled_channels) - return; /* unnecessary NULL export */ + return false; /* unnecessary NULL export */ memcpy(&exp->args[exp->num++], &args, sizeof(args)); } + return true; } static void si_emit_ps_exports(struct si_shader_context *ctx, struct si_ps_exports *exp) @@ -871,14 +879,18 @@ void si_llvm_build_ps_epilog(struct si_shader_context *ctx, union si_shader_part } } + unsigned num_compacted_mrts = 0; while (colors_written) { LLVMValueRef color[4]; - int mrt = u_bit_scan(&colors_written); + int output_index = u_bit_scan(&colors_written); for (i = 0; i < 4; i++) color[i] = LLVMGetParam(ctx->main_fn, vgpr++); - si_export_mrt_color(ctx, color, mrt, ctx->args.arg_count - 1, mrt == last_color_export, &exp); + if (si_export_mrt_color(ctx, color, output_index, num_compacted_mrts, + ctx->args.arg_count - 1, + output_index == last_color_export, &exp)) + num_compacted_mrts++; } /* Process depth, stencil, samplemask. */ diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c index 520eeada9e9..de5ca569473 100644 --- a/src/gallium/drivers/radeonsi/si_state_shaders.c +++ b/src/gallium/drivers/radeonsi/si_state_shaders.c @@ -1487,15 +1487,19 @@ static unsigned si_get_ps_num_interp(struct si_shader *ps) static unsigned si_get_spi_shader_col_format(struct si_shader *shader) { - unsigned value = shader->key.part.ps.epilog.spi_shader_col_format; - unsigned i, num_targets = (util_last_bit(value) + 3) / 4; + unsigned spi_shader_col_format = shader->key.part.ps.epilog.spi_shader_col_format; + unsigned value = 0, num_mrts = 0; + unsigned i, num_targets = (util_last_bit(spi_shader_col_format) + 3) / 4; - /* If the i-th target format is set, all previous target formats must - * be non-zero to avoid hangs. - */ - for (i = 0; i < num_targets; i++) - if (!(value & (0xf << (i * 4)))) - value |= V_028714_SPI_SHADER_32_R << (i * 4); + /* Remove holes in spi_shader_col_format. */ + for (i = 0; i < num_targets; i++) { + unsigned spi_format = (spi_shader_col_format >> (i * 4)) & 0xf; + + if (spi_format) { + value |= spi_format << (num_mrts * 4); + num_mrts++; + } + } return value; } @@ -1599,7 +1603,7 @@ static void si_shader_ps(struct si_screen *sscreen, struct si_shader *shader) spi_baryc_cntl |= S_0286E0_POS_FLOAT_ULC(1); spi_shader_col_format = si_get_spi_shader_col_format(shader); - cb_shader_mask = ac_get_cb_shader_mask(spi_shader_col_format); + cb_shader_mask = ac_get_cb_shader_mask(shader->key.part.ps.epilog.spi_shader_col_format); /* Ensure that some export memory is always allocated, for two reasons: *