ac/llvm: add option to clamp division by zero

author Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>

Tue, 11 Aug 2020 16:52:24 +0000 (18:52 +0200)

committer Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>

Wed, 2 Sep 2020 09:53:16 +0000 (11:53 +0200)
author Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
Tue, 11 Aug 2020 16:52:24 +0000 (18:52 +0200)
committer Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
Wed, 2 Sep 2020 09:53:16 +0000 (11:53 +0200)
diff --git a/src/amd/llvm/ac_nir_to_llvm.c b/src/amd/llvm/ac_nir_to_llvm.c

index 1b6ef264eef3bb7024eab5b7ce59d728e520876e..6dc155dd94ae37b711d6e8d41ce928fab15e1f91 100644 (file)
--- a/src/amd/llvm/ac_nir_to_llvm.c
+++ b/src/amd/llvm/ac_nir_to_llvm.c
@@ -713,6 +713,9 @@ static void visit_alu(struct ac_nir_context *ctx, const nir_alu_instr *instr)
                         result = emit_intrin_1f_param(&ctx->ac, "llvm.amdgcn.rcp",
                                                       ac_to_float_type(&ctx->ac, def_type), src[0]);
                 }
                         result = emit_intrin_1f_param(&ctx->ac, "llvm.amdgcn.rcp",
                                                       ac_to_float_type(&ctx->ac, def_type), src[0]);
                 }
+               if (ctx->abi->clamp_div_by_zero)
+                       result = ac_build_fmin(&ctx->ac, result,
+                                              LLVMConstReal(ac_to_float_type(&ctx->ac, def_type), FLT_MAX));
                 break;
         case nir_op_iand:
                 result = LLVMBuildAnd(ctx->ac.builder, src[0], src[1], "");
                 break;
         case nir_op_iand:
                 result = LLVMBuildAnd(ctx->ac.builder, src[0], src[1], "");
@@ -859,6 +862,9 @@ static void visit_alu(struct ac_nir_context *ctx, const nir_alu_instr *instr)
         case nir_op_frsq:
                 result = emit_intrin_1f_param(&ctx->ac, "llvm.amdgcn.rsq",
                                               ac_to_float_type(&ctx->ac, def_type), src[0]);
         case nir_op_frsq:
                 result = emit_intrin_1f_param(&ctx->ac, "llvm.amdgcn.rsq",
                                               ac_to_float_type(&ctx->ac, def_type), src[0]);
+               if (ctx->abi->clamp_div_by_zero)
+                       result = ac_build_fmin(&ctx->ac, result,
+                                              LLVMConstReal(ac_to_float_type(&ctx->ac, def_type), FLT_MAX));
                 break;
         case nir_op_frexp_exp:
                 src[0] = ac_to_float(&ctx->ac, src[0]);
                 break;
         case nir_op_frexp_exp:
                 src[0] = ac_to_float(&ctx->ac, src[0]);
@@ -900,7 +906,7 @@ static void visit_alu(struct ac_nir_context *ctx, const nir_alu_instr *instr)
         case nir_op_ffma:
                 /* FMA is better on GFX10, because it has FMA units instead of MUL-ADD units. */
                 result = emit_intrin_3f_param(&ctx->ac, ctx->ac.chip_class >= GFX10 ? "llvm.fma" : "llvm.fmuladd",
         case nir_op_ffma:
                 /* FMA is better on GFX10, because it has FMA units instead of MUL-ADD units. */
                 result = emit_intrin_3f_param(&ctx->ac, ctx->ac.chip_class >= GFX10 ? "llvm.fma" : "llvm.fmuladd",
-                                             ac_to_float_type(&ctx->ac, def_type), src[0], src[1], src[2]);
+                                             ac_to_float_type(&ctx->ac, def_type), src[0], src[1], src[2]);
                 break;
         case nir_op_ldexp:
                 src[0] = ac_to_float(&ctx->ac, src[0]);
                 break;
         case nir_op_ldexp:
                 src[0] = ac_to_float(&ctx->ac, src[0]);
diff --git a/src/amd/llvm/ac_shader_abi.h b/src/amd/llvm/ac_shader_abi.h

index aa31ff9c52da07cac63ebe132df2b26cf6f69db9..80b1554ea3e0aad077057eaf7327edf1f089bf67 100644 (file)
--- a/src/amd/llvm/ac_shader_abi.h
+++ b/src/amd/llvm/ac_shader_abi.h
@@ -192,6 +192,9 @@ struct ac_shader_abi {
  
         /* Whether undef values must be converted to zero */
         bool convert_undef_to_zero;
  
         /* Whether undef values must be converted to zero */
         bool convert_undef_to_zero;
+
+       /* Clamp div by 0 (so it won't produce NaN) */
+       bool clamp_div_by_zero;
  };
  
  #endif /* AC_SHADER_ABI_H */
  };
  
  #endif /* AC_SHADER_ABI_H */
author	Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
	Tue, 11 Aug 2020 16:52:24 +0000 (18:52 +0200)
committer	Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
	Wed, 2 Sep 2020 09:53:16 +0000 (11:53 +0200)
src/amd/llvm/ac_nir_to_llvm.c		patch \| blob \| history
src/amd/llvm/ac_shader_abi.h		patch \| blob \| history