radv: fix f16->f32 denorm handling for SI/CIK. (v2)
authorDave Airlie <airlied@redhat.com>
Thu, 3 Aug 2017 23:17:34 +0000 (00:17 +0100)
committerDave Airlie <airlied@redhat.com>
Sun, 6 Aug 2017 23:00:05 +0000 (00:00 +0100)
This just copies the code from the -pro shaders,
and fixes the tests on CIK.

With this CIK passes the same set of conformance
tests as VI.

Fixes: 83e58b03 (radv: flush f32->f16 conversion denormals to zero. (v2))
Reviewed-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
Signed-off-by: Dave Airlie <airlied@redhat.com>
src/amd/common/ac_nir_to_llvm.c

index 83e111166ca633f0d1a6c130d38c183c8034a893..3abf40102c2d8eb0624fe2b8f608697849ead09c 100644 (file)
@@ -1322,7 +1322,6 @@ static LLVMValueRef emit_f2f16(struct nir_to_llvm_context *ctx,
        src0 = to_float(&ctx->ac, src0);
        result = LLVMBuildFPTrunc(ctx->builder, src0, ctx->f16, "");
 
-       /* TODO SI/CIK options here */
        if (ctx->options->chip_class >= VI) {
                LLVMValueRef args[2];
                /* Check if the result is a denormal - and flush to 0 if so. */
@@ -1336,7 +1335,22 @@ static LLVMValueRef emit_f2f16(struct nir_to_llvm_context *ctx,
 
        if (ctx->options->chip_class >= VI)
                result = LLVMBuildSelect(ctx->builder, cond, ctx->f32zero, result, "");
-
+       else {
+               /* for SI/CIK */
+               /* 0x38800000 is smallest half float value (2^-14) in 32-bit float,
+                * so compare the result and flush to 0 if it's smaller.
+                */
+               LLVMValueRef temp, cond2;
+               temp = emit_intrin_1f_param(&ctx->ac, "llvm.fabs",
+                                           ctx->f32, result);
+               cond = LLVMBuildFCmp(ctx->builder, LLVMRealUGT,
+                                    LLVMBuildBitCast(ctx->builder, LLVMConstInt(ctx->i32, 0x38800000, false), ctx->f32, ""),
+                                    temp, "");
+               cond2 = LLVMBuildFCmp(ctx->builder, LLVMRealUNE,
+                                     temp, ctx->f32zero, "");
+               cond = LLVMBuildAnd(ctx->builder, cond, cond2, "");
+               result = LLVMBuildSelect(ctx->builder, cond, ctx->f32zero, result, "");
+       }
        return result;
 }