From b3c55fc669b54589e57a112df75094405e16ff52 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Mon, 28 Sep 2015 23:46:04 +0200 Subject: [PATCH] radeonsi: do force_persample_interp in shaders for non-trivial cases MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Reviewed-by: Michel Dänzer --- src/gallium/drivers/radeonsi/si_shader.c | 53 ++++++++++++++++++- src/gallium/drivers/radeonsi/si_shader.h | 49 ++++++++++------- .../drivers/radeonsi/si_state_shaders.c | 34 ++++++++++++ 3 files changed, 117 insertions(+), 19 deletions(-) diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index a3df64824c6..32a702fcdf5 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -855,6 +855,56 @@ static int lookup_interp_param_index(unsigned interpolate, unsigned location) } } +/* This shouldn't be used by explicit INTERP opcodes. */ +static LLVMValueRef get_interp_param(struct si_shader_context *si_shader_ctx, + unsigned param) +{ + struct gallivm_state *gallivm = &si_shader_ctx->radeon_bld.gallivm; + unsigned sample_param = 0; + LLVMValueRef default_ij, sample_ij, force_sample; + + default_ij = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, param); + + /* If the shader doesn't use center/centroid, just return the parameter. + * + * If the shader only uses one set of (i,j), "si_emit_spi_ps_input" can + * switch between center/centroid and sample without shader changes. + */ + switch (param) { + case SI_PARAM_PERSP_CENTROID: + case SI_PARAM_PERSP_CENTER: + if (!si_shader_ctx->shader->selector->forces_persample_interp_for_persp) + return default_ij; + + sample_param = SI_PARAM_PERSP_SAMPLE; + break; + + case SI_PARAM_LINEAR_CENTROID: + case SI_PARAM_LINEAR_CENTER: + if (!si_shader_ctx->shader->selector->forces_persample_interp_for_linear) + return default_ij; + + sample_param = SI_PARAM_LINEAR_SAMPLE; + break; + + default: + return default_ij; + } + + /* Otherwise, we have to select (i,j) based on a user data SGPR. */ + sample_ij = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, sample_param); + + /* TODO: this can be done more efficiently by switching between + * 2 prologs. + */ + force_sample = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, + SI_PARAM_PS_STATE_BITS); + force_sample = LLVMBuildTrunc(gallivm->builder, force_sample, + LLVMInt1TypeInContext(gallivm->context), ""); + return LLVMBuildSelect(gallivm->builder, force_sample, + sample_ij, default_ij, ""); +} + static void declare_input_fs( struct radeon_llvm_context *radeon_bld, unsigned input_index, @@ -925,7 +975,7 @@ static void declare_input_fs( if (interp_param_idx == -1) return; else if (interp_param_idx) - interp_param = LLVMGetParam(main_fn, interp_param_idx); + interp_param = get_interp_param(si_shader_ctx, interp_param_idx); /* fs.constant returns the param from the middle vertex, so it's not * really useful for flat shading. It's meant to be used for custom @@ -3458,6 +3508,7 @@ static void create_function(struct si_shader_context *si_shader_ctx) case TGSI_PROCESSOR_FRAGMENT: params[SI_PARAM_ALPHA_REF] = f32; + params[SI_PARAM_PS_STATE_BITS] = i32; params[SI_PARAM_PRIM_MASK] = i32; last_sgpr = SI_PARAM_PRIM_MASK; params[SI_PARAM_PERSP_SAMPLE] = v2i32; diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h index 2305b9988b8..b92fa02a171 100644 --- a/src/gallium/drivers/radeonsi/si_shader.h +++ b/src/gallium/drivers/radeonsi/si_shader.h @@ -88,6 +88,7 @@ struct radeon_shader_reloc; #define SI_SGPR_TCS_OUT_LAYOUT 9 /* TCS & TES only */ #define SI_SGPR_TCS_IN_LAYOUT 10 /* TCS only */ #define SI_SGPR_ALPHA_REF 8 /* PS only */ +#define SI_SGPR_PS_STATE_BITS 9 /* PS only */ #define SI_VS_NUM_USER_SGPR 12 #define SI_LS_NUM_USER_SGPR 13 @@ -95,7 +96,7 @@ struct radeon_shader_reloc; #define SI_TES_NUM_USER_SGPR 10 #define SI_GS_NUM_USER_SGPR 8 #define SI_GSCOPY_NUM_USER_SGPR 4 -#define SI_PS_NUM_USER_SGPR 9 +#define SI_PS_NUM_USER_SGPR 10 /* LLVM function parameter indices */ #define SI_PARAM_RW_BUFFERS 0 @@ -148,23 +149,27 @@ struct radeon_shader_reloc; /* PS only parameters */ #define SI_PARAM_ALPHA_REF 4 -#define SI_PARAM_PRIM_MASK 5 -#define SI_PARAM_PERSP_SAMPLE 6 -#define SI_PARAM_PERSP_CENTER 7 -#define SI_PARAM_PERSP_CENTROID 8 -#define SI_PARAM_PERSP_PULL_MODEL 9 -#define SI_PARAM_LINEAR_SAMPLE 10 -#define SI_PARAM_LINEAR_CENTER 11 -#define SI_PARAM_LINEAR_CENTROID 12 -#define SI_PARAM_LINE_STIPPLE_TEX 13 -#define SI_PARAM_POS_X_FLOAT 14 -#define SI_PARAM_POS_Y_FLOAT 15 -#define SI_PARAM_POS_Z_FLOAT 16 -#define SI_PARAM_POS_W_FLOAT 17 -#define SI_PARAM_FRONT_FACE 18 -#define SI_PARAM_ANCILLARY 19 -#define SI_PARAM_SAMPLE_COVERAGE 20 -#define SI_PARAM_POS_FIXED_PT 21 +/* Bits: + * 0: force_persample_interp + */ +#define SI_PARAM_PS_STATE_BITS 5 +#define SI_PARAM_PRIM_MASK 6 +#define SI_PARAM_PERSP_SAMPLE 7 +#define SI_PARAM_PERSP_CENTER 8 +#define SI_PARAM_PERSP_CENTROID 9 +#define SI_PARAM_PERSP_PULL_MODEL 10 +#define SI_PARAM_LINEAR_SAMPLE 11 +#define SI_PARAM_LINEAR_CENTER 12 +#define SI_PARAM_LINEAR_CENTROID 13 +#define SI_PARAM_LINE_STIPPLE_TEX 14 +#define SI_PARAM_POS_X_FLOAT 15 +#define SI_PARAM_POS_Y_FLOAT 16 +#define SI_PARAM_POS_Z_FLOAT 17 +#define SI_PARAM_POS_W_FLOAT 18 +#define SI_PARAM_FRONT_FACE 19 +#define SI_PARAM_ANCILLARY 20 +#define SI_PARAM_SAMPLE_COVERAGE 21 +#define SI_PARAM_POS_FIXED_PT 22 #define SI_NUM_PARAMS (SI_PARAM_POS_FIXED_PT + 1) @@ -182,6 +187,14 @@ struct si_shader_selector { /* PIPE_SHADER_[VERTEX|FRAGMENT|...] */ unsigned type; + /* Whether the shader has to use a conditional assignment to + * choose between weights when emulating + * pipe_rasterizer_state::force_persample_interp. + * If false, "si_emit_spi_ps_input" will take care of it instead. + */ + bool forces_persample_interp_for_persp; + bool forces_persample_interp_for_linear; + unsigned gs_output_prim; unsigned gs_max_out_vertices; unsigned gs_num_invocations; diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c index aeb28797ace..77c585f958e 100644 --- a/src/gallium/drivers/radeonsi/si_state_shaders.c +++ b/src/gallium/drivers/radeonsi/si_state_shaders.c @@ -667,6 +667,34 @@ static void *si_create_shader_state(struct pipe_context *ctx, tgsi_scan_shader(state->tokens, &sel->info); p_atomic_inc(&sscreen->b.num_shaders_created); + /* First set which opcode uses which (i,j) pair. */ + if (sel->info.uses_persp_opcode_interp_centroid) + sel->info.uses_persp_centroid = true; + + if (sel->info.uses_linear_opcode_interp_centroid) + sel->info.uses_linear_centroid = true; + + if (sel->info.uses_persp_opcode_interp_offset || + sel->info.uses_persp_opcode_interp_sample) + sel->info.uses_persp_center = true; + + if (sel->info.uses_linear_opcode_interp_offset || + sel->info.uses_linear_opcode_interp_sample) + sel->info.uses_linear_center = true; + + /* Determine if the shader has to use a conditional assignment when + * emulating force_persample_interp. + */ + sel->forces_persample_interp_for_persp = + sel->info.uses_persp_center + + sel->info.uses_persp_centroid + + sel->info.uses_persp_sample >= 2; + + sel->forces_persample_interp_for_linear = + sel->info.uses_linear_center + + sel->info.uses_linear_centroid + + sel->info.uses_linear_sample >= 2; + switch (pipe_shader_type) { case PIPE_SHADER_GEOMETRY: sel->gs_output_prim = @@ -1100,6 +1128,12 @@ static void si_emit_spi_ps_input(struct si_context *sctx, struct r600_atom *atom radeon_set_context_reg_seq(cs, R_0286CC_SPI_PS_INPUT_ENA, 2); radeon_emit(cs, input_ena); radeon_emit(cs, input_ena); + + if (ps->selector->forces_persample_interp_for_persp || + ps->selector->forces_persample_interp_for_linear) + radeon_set_sh_reg(cs, R_00B030_SPI_SHADER_USER_DATA_PS_0 + + SI_SGPR_PS_STATE_BITS * 4, + sctx->force_persample_interp); } /* Initialize state related to ESGS / GSVS ring buffers */ -- 2.30.2