radv: flush f32->f16 conversion denormals to zero. (v2)

author Dave Airlie <airlied@redhat.com>

Fri, 17 Mar 2017 02:11:30 +0000 (12:11 +1000)

committer Dave Airlie <airlied@redhat.com>

Wed, 3 May 2017 02:55:34 +0000 (12:55 +1000)
author Dave Airlie <airlied@redhat.com>
Fri, 17 Mar 2017 02:11:30 +0000 (12:11 +1000)
committer Dave Airlie <airlied@redhat.com>
Wed, 3 May 2017 02:55:34 +0000 (12:55 +1000)
diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c

index d9962c783f0864141ebe428d2fed8ebfc7c68ca2..dbb8ebedd9381ed0e4500b3321c1f564e91601eb 100644 (file)
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -1325,6 +1325,33 @@ static LLVMValueRef emit_b2f(struct nir_to_llvm_context *ctx,
         return LLVMBuildAnd(ctx->builder, src0, LLVMBuildBitCast(ctx->builder, LLVMConstReal(ctx->f32, 1.0), ctx->i32, ""), "");
  }
  
+static LLVMValueRef emit_f2f16(struct nir_to_llvm_context *ctx,
+                              LLVMValueRef src0)
+{
+       LLVMValueRef result;
+       LLVMValueRef cond;
+
+       src0 = to_float(ctx, src0);
+       result = LLVMBuildFPTrunc(ctx->builder, src0, ctx->f16, "");
+
+       /* TODO SI/CIK options here */
+       if (ctx->options->chip_class >= VI) {
+               LLVMValueRef args[2];
+               /* Check if the result is a denormal - and flush to 0 if so. */
+               args[0] = result;
+               args[1] = LLVMConstInt(ctx->i32, N_SUBNORMAL | P_SUBNORMAL, false);
+               cond = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.class.f16", ctx->i1, args, 2, AC_FUNC_ATTR_READNONE);
+       }
+
+       /* need to convert back up to f32 */
+       result = LLVMBuildFPExt(ctx->builder, result, ctx->f32, "");
+
+       if (ctx->options->chip_class >= VI)
+               result = LLVMBuildSelect(ctx->builder, cond, ctx->f32zero, result, "");
+
+       return result;
+}
+
  static LLVMValueRef emit_umul_high(struct nir_to_llvm_context *ctx,
                                    LLVMValueRef src0, LLVMValueRef src1)
  {
@@ -1812,10 +1839,7 @@ static void visit_alu(struct nir_to_llvm_context *ctx, nir_alu_instr *instr)
                 result = emit_b2f(ctx, src[0]);
                 break;
         case nir_op_fquantize2f16:
-               src[0] = to_float(ctx, src[0]);
-               result = LLVMBuildFPTrunc(ctx->builder, src[0], ctx->f16, "");
-               /* need to convert back up to f32 */
-               result = LLVMBuildFPExt(ctx->builder, result, ctx->f32, "");
+               result = emit_f2f16(ctx, src[0]);
                 break;
         case nir_op_umul_high:
                 result = emit_umul_high(ctx, src[0], src[1]);
diff --git a/src/amd/common/sid.h b/src/amd/common/sid.h

index 08cdfd77f07dabbe80a0e6141a657b95bb8a24c6..d329ad9493fe9095c43d4285090466976bf3f74e 100644 (file)
--- a/src/amd/common/sid.h
+++ b/src/amd/common/sid.h
@@ -9095,5 +9095,18 @@
  #define    CIK_SDMA_PACKET_SRBM_WRITE              0xe
  #define    CIK_SDMA_COPY_MAX_SIZE                  0x3fffe0
  
+enum amd_cmp_class_flags {
+       S_NAN = 1 << 0,        // Signaling NaN
+       Q_NAN = 1 << 1,        // Quiet NaN
+       N_INFINITY = 1 << 2,   // Negative infinity
+       N_NORMAL = 1 << 3,     // Negative normal
+       N_SUBNORMAL = 1 << 4,  // Negative subnormal
+       N_ZERO = 1 << 5,       // Negative zero
+       P_ZERO = 1 << 6,       // Positive zero
+       P_SUBNORMAL = 1 << 7,  // Positive subnormal
+       P_NORMAL = 1 << 8,     // Positive normal
+       P_INFINITY = 1 << 9    // Positive infinity
+};
+
  #endif /* _SID_H */
author	Dave Airlie <airlied@redhat.com>
	Fri, 17 Mar 2017 02:11:30 +0000 (12:11 +1000)
committer	Dave Airlie <airlied@redhat.com>
	Wed, 3 May 2017 02:55:34 +0000 (12:55 +1000)
src/amd/common/ac_nir_to_llvm.c		patch \| blob \| history
src/amd/common/sid.h		patch \| blob \| history