From: Pierre-Eric Pelloux-Prayer Date: Tue, 11 Aug 2020 16:52:24 +0000 (+0200) Subject: ac/llvm: add option to clamp division by zero X-Git-Url: https://git.libre-soc.org/?p=mesa.git;a=commitdiff_plain;h=32f46a55c8229b2a8d67d895be18651a81f8e6ff;ds=sidebyside ac/llvm: add option to clamp division by zero Replace div(x) by min(div(x), FLT_MAX)) to avoid getting a NaN result when x is 0. A cheaper alternative would be to use legacy mult instructions but they're not exposed by LLVM. Cc: mesa-stable Reviewed-by: Marek Olšák Part-of: --- diff --git a/src/amd/llvm/ac_nir_to_llvm.c b/src/amd/llvm/ac_nir_to_llvm.c index 1b6ef264eef..6dc155dd94a 100644 --- a/src/amd/llvm/ac_nir_to_llvm.c +++ b/src/amd/llvm/ac_nir_to_llvm.c @@ -713,6 +713,9 @@ static void visit_alu(struct ac_nir_context *ctx, const nir_alu_instr *instr) result = emit_intrin_1f_param(&ctx->ac, "llvm.amdgcn.rcp", ac_to_float_type(&ctx->ac, def_type), src[0]); } + if (ctx->abi->clamp_div_by_zero) + result = ac_build_fmin(&ctx->ac, result, + LLVMConstReal(ac_to_float_type(&ctx->ac, def_type), FLT_MAX)); break; case nir_op_iand: result = LLVMBuildAnd(ctx->ac.builder, src[0], src[1], ""); @@ -859,6 +862,9 @@ static void visit_alu(struct ac_nir_context *ctx, const nir_alu_instr *instr) case nir_op_frsq: result = emit_intrin_1f_param(&ctx->ac, "llvm.amdgcn.rsq", ac_to_float_type(&ctx->ac, def_type), src[0]); + if (ctx->abi->clamp_div_by_zero) + result = ac_build_fmin(&ctx->ac, result, + LLVMConstReal(ac_to_float_type(&ctx->ac, def_type), FLT_MAX)); break; case nir_op_frexp_exp: src[0] = ac_to_float(&ctx->ac, src[0]); @@ -900,7 +906,7 @@ static void visit_alu(struct ac_nir_context *ctx, const nir_alu_instr *instr) case nir_op_ffma: /* FMA is better on GFX10, because it has FMA units instead of MUL-ADD units. */ result = emit_intrin_3f_param(&ctx->ac, ctx->ac.chip_class >= GFX10 ? "llvm.fma" : "llvm.fmuladd", - ac_to_float_type(&ctx->ac, def_type), src[0], src[1], src[2]); + ac_to_float_type(&ctx->ac, def_type), src[0], src[1], src[2]); break; case nir_op_ldexp: src[0] = ac_to_float(&ctx->ac, src[0]); diff --git a/src/amd/llvm/ac_shader_abi.h b/src/amd/llvm/ac_shader_abi.h index aa31ff9c52d..80b1554ea3e 100644 --- a/src/amd/llvm/ac_shader_abi.h +++ b/src/amd/llvm/ac_shader_abi.h @@ -192,6 +192,9 @@ struct ac_shader_abi { /* Whether undef values must be converted to zero */ bool convert_undef_to_zero; + + /* Clamp div by 0 (so it won't produce NaN) */ + bool clamp_div_by_zero; }; #endif /* AC_SHADER_ABI_H */