From 8667a1aea2ac3fee9b1b663132dc1bea8ec21cda Mon Sep 17 00:00:00 2001 From: =?utf8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Mon, 11 Jan 2016 23:51:39 +0100 Subject: [PATCH] radeonsi: use SPI_SHADER_COL_FORMAT fields instead of export_16bpc MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit This does change the behavior slightly: If a shader writes COLOR[i] and that color buffer isn't bound, the shader will export MRT_NULL instead and discard the IR tree that calculates the output. The only exception is alpha-to-coverage, which requires an alpha export. v2: - update a comment about 16BPC - account for MRTZ when when fixing alpha-test/kill Reviewed-by: Nicolai Hähnle --- src/gallium/drivers/radeon/r600_pipe_common.h | 1 + src/gallium/drivers/radeonsi/si_pipe.h | 2 +- src/gallium/drivers/radeonsi/si_shader.c | 35 ++++++++++---- src/gallium/drivers/radeonsi/si_shader.h | 2 +- src/gallium/drivers/radeonsi/si_state.c | 41 +++++++++------- src/gallium/drivers/radeonsi/si_state.h | 1 + .../drivers/radeonsi/si_state_shaders.c | 47 +++++++++++++++---- 7 files changed, 91 insertions(+), 38 deletions(-) diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h b/src/gallium/drivers/radeon/r600_pipe_common.h index 27f6e983eea..f3271e2ee6c 100644 --- a/src/gallium/drivers/radeon/r600_pipe_common.h +++ b/src/gallium/drivers/radeon/r600_pipe_common.h @@ -252,6 +252,7 @@ struct r600_surface { unsigned cb_color_fmask_slice; /* EG and later */ unsigned cb_color_cmask; /* CB_COLORn_TILE (r600 only) */ unsigned cb_color_mask; /* R600 only */ + unsigned spi_shader_col_format; /* SI+ */ unsigned sx_ps_downconvert; /* Stoney only */ unsigned sx_blend_opt_epsilon; /* Stoney only */ struct r600_resource *cb_buffer_fmask; /* Used for FMASK relocations. R600 only */ diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h index f83cb024f0e..e2009de9870 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.h +++ b/src/gallium/drivers/radeonsi/si_pipe.h @@ -125,7 +125,7 @@ struct si_framebuffer { unsigned log_samples; unsigned cb0_is_integer; unsigned compressed_cb_mask; - unsigned export_16bpc; + unsigned spi_shader_col_format; unsigned dirty_cbufs; bool dirty_zsbuf; }; diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index 2de7def8dd2..266ef6db633 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -1265,7 +1265,7 @@ static void si_llvm_init_export_args(struct lp_build_tgsi_context *bld_base, struct lp_build_context *uint = &si_shader_ctx->radeon_bld.soa.bld_base.uint_bld; struct lp_build_context *base = &bld_base->base; - unsigned compressed = 0; + unsigned spi_shader_col_format = V_028714_SPI_SHADER_32_ABGR; unsigned chan; /* XXX: This controls which components of the output @@ -1286,17 +1286,29 @@ static void si_llvm_init_export_args(struct lp_build_tgsi_context *bld_base, args[3] = lp_build_const_int32(base->gallivm, target); if (si_shader_ctx->type == TGSI_PROCESSOR_FRAGMENT) { + unsigned col_formats = + si_shader_ctx->shader->key.ps.spi_shader_col_format; int cbuf = target - V_008DFC_SQ_EXP_MRT; - if (cbuf >= 0 && cbuf < 8) - compressed = (si_shader_ctx->shader->key.ps.export_16bpc >> cbuf) & 0x1; + assert(cbuf >= 0 && cbuf < 8); + spi_shader_col_format = (col_formats >> (cbuf * 4)) & 0xf; } - /* Set COMPR flag */ - args[4] = compressed ? uint->one : uint->zero; + args[4] = uint->zero; /* COMPR flag */ + args[5] = base->undef; + args[6] = base->undef; + args[7] = base->undef; + args[8] = base->undef; + + switch (spi_shader_col_format) { + case V_028714_SPI_SHADER_ZERO: + args[0] = uint->zero; /* writemask */ + args[3] = lp_build_const_int32(base->gallivm, V_008DFC_SQ_EXP_NULL); + break; + + case V_028714_SPI_SHADER_FP16_ABGR: + args[4] = uint->one; /* COMPR flag */ - if (compressed) { - /* Pixel shader needs to pack output values before export */ for (chan = 0; chan < 2; chan++) { LLVMValueRef pack_args[2] = { values[2 * chan], @@ -1314,10 +1326,13 @@ static void si_llvm_init_export_args(struct lp_build_tgsi_context *bld_base, packed, LLVMFloatTypeInContext(base->gallivm->context), ""); - args[chan + 7] = base->undef; } - } else + break; + + case V_028714_SPI_SHADER_32_ABGR: memcpy(&args[5], values, sizeof(values[0]) * 4); + break; + } } static void si_alpha_test(struct lp_build_tgsi_context *bld_base, @@ -4031,7 +4046,7 @@ void si_dump_shader_key(unsigned shader, union si_shader_key *key, FILE *f) break; case PIPE_SHADER_FRAGMENT: - fprintf(f, " export_16bpc = 0x%X\n", key->ps.export_16bpc); + fprintf(f, " spi_shader_col_format = 0x%x\n", key->ps.spi_shader_col_format); fprintf(f, " last_cbuf = %u\n", key->ps.last_cbuf); fprintf(f, " color_two_side = %u\n", key->ps.color_two_side); fprintf(f, " alpha_func = %u\n", key->ps.alpha_func); diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h index 1635358d505..a9b76c6d097 100644 --- a/src/gallium/drivers/radeonsi/si_shader.h +++ b/src/gallium/drivers/radeonsi/si_shader.h @@ -232,7 +232,7 @@ struct si_shader_selector { union si_shader_key { struct { - unsigned export_16bpc:8; + unsigned spi_shader_col_format; unsigned last_cbuf:3; unsigned color_two_side:1; unsigned alpha_func:3; diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c index ea441acb1b0..af6759ef19c 100644 --- a/src/gallium/drivers/radeonsi/si_state.c +++ b/src/gallium/drivers/radeonsi/si_state.c @@ -403,6 +403,7 @@ static void *si_create_blend_state_mode(struct pipe_context *ctx, if (!blend) return NULL; + blend->alpha_to_coverage = state->alpha_to_coverage; blend->alpha_to_one = state->alpha_to_one; blend->dual_src_blend = util_blend_state_is_dual(state, 0); @@ -1883,6 +1884,21 @@ unsigned si_tile_mode_index(struct r600_texture *rtex, unsigned level, bool sten * framebuffer handling */ +static void si_choose_spi_color_formats(struct r600_surface *surf, + unsigned format, unsigned swap, + unsigned ntype) +{ + unsigned max_comp_size = si_colorformat_max_comp_size(format); + + surf->spi_shader_col_format = V_028714_SPI_SHADER_32_ABGR; + + if (ntype == V_028C70_NUMBER_SRGB || + ((ntype == V_028C70_NUMBER_UNORM || ntype == V_028C70_NUMBER_SNORM) && + max_comp_size <= 10) || + (ntype == V_028C70_NUMBER_FLOAT && max_comp_size <= 16)) + surf->spi_shader_col_format = V_028714_SPI_SHADER_FP16_ABGR; +} + static void si_initialize_color_surface(struct si_context *sctx, struct r600_surface *surf) { @@ -1896,7 +1912,6 @@ static void si_initialize_color_surface(struct si_context *sctx, const struct util_format_description *desc; int i; unsigned blend_clamp = 0, blend_bypass = 0; - unsigned max_comp_size; /* Layered rendering doesn't work with LINEAR_GENERAL. * (LINEAR_ALIGNED and others work) */ @@ -2053,13 +2068,7 @@ static void si_initialize_color_surface(struct si_context *sctx, } /* Determine pixel shader export format */ - max_comp_size = si_colorformat_max_comp_size(format); - if (ntype == V_028C70_NUMBER_SRGB || - ((ntype == V_028C70_NUMBER_UNORM || ntype == V_028C70_NUMBER_SNORM) && - max_comp_size <= 10) || - (ntype == V_028C70_NUMBER_FLOAT && max_comp_size <= 16)) { - surf->export_16bpc = true; - } + si_choose_spi_color_formats(surf, format, swap, ntype); if (sctx->b.family == CHIP_STONEY && !(sctx->screen->b.debug_flags & DBG_NO_RB_PLUS)) { @@ -2286,7 +2295,7 @@ static void si_set_framebuffer_state(struct pipe_context *ctx, util_copy_framebuffer_state(&sctx->framebuffer.state, state); - sctx->framebuffer.export_16bpc = 0; + sctx->framebuffer.spi_shader_col_format = 0; sctx->framebuffer.compressed_cb_mask = 0; sctx->framebuffer.nr_samples = util_framebuffer_get_num_samples(state); sctx->framebuffer.log_samples = util_logbase2(sctx->framebuffer.nr_samples); @@ -2307,22 +2316,20 @@ static void si_set_framebuffer_state(struct pipe_context *ctx, si_initialize_color_surface(sctx, surf); } - if (surf->export_16bpc) { - sctx->framebuffer.export_16bpc |= 1 << i; - } + sctx->framebuffer.spi_shader_col_format |= + surf->spi_shader_col_format << (i * 4); if (rtex->fmask.size && rtex->cmask.size) { sctx->framebuffer.compressed_cb_mask |= 1 << i; } r600_context_add_resource_size(ctx, surf->base.texture); } - /* Set the 16BPC export for possible dual-src blending. */ - if (i == 1 && surf && surf->export_16bpc) { - sctx->framebuffer.export_16bpc |= 1 << 1; + /* Set the second SPI format for possible dual-src blending. */ + if (i == 1 && surf) { + sctx->framebuffer.spi_shader_col_format |= + surf->spi_shader_col_format << (i * 4); } - assert(!(sctx->framebuffer.export_16bpc & ~0xff)); - if (state->zsbuf) { surf = (struct r600_surface*)state->zsbuf; diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h index f5ca661f8d7..46ba3c4301f 100644 --- a/src/gallium/drivers/radeonsi/si_state.h +++ b/src/gallium/drivers/radeonsi/si_state.h @@ -39,6 +39,7 @@ struct si_shader; struct si_state_blend { struct si_pm4_state pm4; uint32_t cb_target_mask; + bool alpha_to_coverage; bool alpha_to_one; bool dual_src_blend; }; diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c index 8ff70b44d45..91ba9b76210 100644 --- a/src/gallium/drivers/radeonsi/si_state_shaders.c +++ b/src/gallium/drivers/radeonsi/si_state_shaders.c @@ -382,13 +382,28 @@ static void si_shader_vs(struct si_shader *shader) si_set_tesseval_regs(shader, pm4); } +static unsigned si_get_spi_shader_col_format(struct si_shader *shader) +{ + unsigned value = shader->key.ps.spi_shader_col_format; + unsigned i, num_targets = (util_last_bit(value) + 3) / 4; + + /* If the i-th target format is set, all previous target formats must + * be non-zero to avoid hangs. + */ + for (i = 0; i < num_targets; i++) + if (!(value & (0xf << (i * 4)))) + value |= V_028714_SPI_SHADER_32_R << (i * 4); + + return value; +} + static void si_shader_ps(struct si_shader *shader) { struct tgsi_shader_info *info = &shader->selector->info; struct si_pm4_state *pm4; - unsigned i, spi_ps_in_control; - unsigned spi_shader_col_format = 0, cb_shader_mask = 0; - unsigned colors_written, export_16bpc; + unsigned i, spi_ps_in_control, spi_shader_col_format; + unsigned cb_shader_mask = 0; + unsigned colors_written; unsigned num_sgprs, num_user_sgprs; unsigned spi_baryc_cntl = S_0286E0_FRONT_FACE_ALL_BITS(1); uint64_t va; @@ -425,7 +440,6 @@ static void si_shader_ps(struct si_shader *shader) /* Find out what SPI_SHADER_COL_FORMAT and CB_SHADER_MASK should be. */ colors_written = info->colors_written; - export_16bpc = shader->key.ps.export_16bpc; if (info->colors_written == 0x1 && info->properties[TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS]) { @@ -434,13 +448,20 @@ static void si_shader_ps(struct si_shader *shader) while (colors_written) { i = u_bit_scan(&colors_written); - if (export_16bpc & (1 << i)) - spi_shader_col_format |= V_028714_SPI_SHADER_FP16_ABGR << (4 * i); - else - spi_shader_col_format |= V_028714_SPI_SHADER_32_ABGR << (4 * i); cb_shader_mask |= 0xf << (4 * i); } + spi_shader_col_format = si_get_spi_shader_col_format(shader); + + /* This must be non-zero for alpha-test/kill to work. + * The hardware ignores the EXEC mask if no export memory is allocated. + */ + if (!spi_shader_col_format && + !info->writes_z && !info->writes_stencil && !info->writes_samplemask && + (shader->selector->info.uses_kill || + shader->key.ps.alpha_func != PIPE_FUNC_ALWAYS)) + spi_shader_col_format = V_028714_SPI_SHADER_32_R; + /* Set interpolation controls. */ has_centroid = G_0286CC_PERSP_CENTROID_ENA(shader->config.spi_ps_input_ena) || G_0286CC_LINEAR_CENTROID_ENA(shader->config.spi_ps_input_ena); @@ -571,12 +592,20 @@ static inline void si_shader_selector_key(struct pipe_context *ctx, break; case PIPE_SHADER_FRAGMENT: { struct si_state_rasterizer *rs = sctx->queued.named.rasterizer; + struct si_state_blend *blend = sctx->queued.named.blend; if (sel->info.properties[TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS] && sel->info.colors_written == 0x1) key->ps.last_cbuf = MAX2(sctx->framebuffer.state.nr_cbufs, 1) - 1; - key->ps.export_16bpc = sctx->framebuffer.export_16bpc; + key->ps.spi_shader_col_format = sctx->framebuffer.spi_shader_col_format; + + /* If alpha-to-coverage is enabled, we have to export alpha + * even if there is no color buffer. + */ + if (!(key->ps.spi_shader_col_format & 0xf) && + blend && blend->alpha_to_coverage) + key->ps.spi_shader_col_format |= V_028710_SPI_SHADER_FP16_ABGR; if (rs) { bool is_poly = (sctx->current_rast_prim >= PIPE_PRIM_TRIANGLES && -- 2.30.2