From: Marek Olšák Date: Sun, 4 Jan 2015 16:08:57 +0000 (+0100) Subject: radeonsi: enable LLVM optimizations that assume no NaNs for non-compute shaders X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=1829f9c928836940fa13b12a8b073f09c26dc782;p=mesa.git radeonsi: enable LLVM optimizations that assume no NaNs for non-compute shaders v2: complete rewrite Reviewed-by: Alex Deucher Reviewed-by: Tom Stellard --- diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index 5d61a549fe6..cf28860a35f 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -2369,6 +2369,10 @@ static void create_function(struct si_shader_context *si_shader_ctx) radeon_llvm_create_func(&si_shader_ctx->radeon_bld, params, num_params); radeon_llvm_shader_type(si_shader_ctx->radeon_bld.main_fn, si_shader_ctx->type); + if (shader->dx10_clamp_mode) + LLVMAddTargetDependentFunctionAttr(si_shader_ctx->radeon_bld.main_fn, + "enable-no-nans-fp-math", "true"); + for (i = 0; i <= last_sgpr; ++i) { LLVMValueRef P = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, i); @@ -2723,6 +2727,9 @@ int si_shader_create(struct si_screen *sscreen, struct si_shader *shader) radeon_llvm_context_init(&si_shader_ctx.radeon_bld); bld_base = &si_shader_ctx.radeon_bld.soa.bld_base; + if (sel->type != PIPE_SHADER_COMPUTE) + shader->dx10_clamp_mode = true; + if (sel->info.uses_kill) shader->db_shader_control |= S_02880C_KILL_ENABLE(1); diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h index 21692f0ee33..08e344af444 100644 --- a/src/gallium/drivers/radeonsi/si_shader.h +++ b/src/gallium/drivers/radeonsi/si_shader.h @@ -160,6 +160,7 @@ struct si_shader { bool uses_instanceid; unsigned nr_pos_exports; bool is_gs_copy_shader; + bool dx10_clamp_mode; /* convert NaNs to 0 */ }; static inline struct tgsi_shader_info *si_get_vs_info(struct si_context *sctx) diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c index e51d50eb9a7..817a990ee2e 100644 --- a/src/gallium/drivers/radeonsi/si_state_shaders.c +++ b/src/gallium/drivers/radeonsi/si_state_shaders.c @@ -65,7 +65,7 @@ static void si_shader_es(struct si_shader *shader) S_00B328_VGPRS((shader->num_vgprs - 1) / 4) | S_00B328_SGPRS((num_sgprs - 1) / 8) | S_00B328_VGPR_COMP_CNT(vgpr_comp_cnt) | - S_00B328_DX10_CLAMP(1)); + S_00B328_DX10_CLAMP(shader->dx10_clamp_mode)); si_pm4_set_reg(pm4, R_00B32C_SPI_SHADER_PGM_RSRC2_ES, S_00B32C_USER_SGPR(num_user_sgprs)); } @@ -134,7 +134,7 @@ static void si_shader_gs(struct si_shader *shader) si_pm4_set_reg(pm4, R_00B228_SPI_SHADER_PGM_RSRC1_GS, S_00B228_VGPRS((shader->num_vgprs - 1) / 4) | S_00B228_SGPRS((num_sgprs - 1) / 8) | - S_00B228_DX10_CLAMP(1)); + S_00B228_DX10_CLAMP(shader->dx10_clamp_mode)); si_pm4_set_reg(pm4, R_00B22C_SPI_SHADER_PGM_RSRC2_GS, S_00B22C_USER_SGPR(num_user_sgprs)); } @@ -209,7 +209,7 @@ static void si_shader_vs(struct si_shader *shader) S_00B128_VGPRS((shader->num_vgprs - 1) / 4) | S_00B128_SGPRS((num_sgprs - 1) / 8) | S_00B128_VGPR_COMP_CNT(vgpr_comp_cnt) | - S_00B128_DX10_CLAMP(1)); + S_00B128_DX10_CLAMP(shader->dx10_clamp_mode)); si_pm4_set_reg(pm4, R_00B12C_SPI_SHADER_PGM_RSRC2_VS, S_00B12C_USER_SGPR(num_user_sgprs) | S_00B12C_SO_BASE0_EN(!!shader->selector->so.stride[0]) | @@ -304,7 +304,7 @@ static void si_shader_ps(struct si_shader *shader) si_pm4_set_reg(pm4, R_00B028_SPI_SHADER_PGM_RSRC1_PS, S_00B028_VGPRS((shader->num_vgprs - 1) / 4) | S_00B028_SGPRS((num_sgprs - 1) / 8) | - S_00B028_DX10_CLAMP(1)); + S_00B028_DX10_CLAMP(shader->dx10_clamp_mode)); si_pm4_set_reg(pm4, R_00B02C_SPI_SHADER_PGM_RSRC2_PS, S_00B02C_EXTRA_LDS_SIZE(shader->lds_size) | S_00B02C_USER_SGPR(num_user_sgprs));