ac/llvm: force fneg/fabs to flush denorms to zero if requested
authorSamuel Pitoiset <samuel.pitoiset@gmail.com>
Mon, 14 Oct 2019 13:39:06 +0000 (15:39 +0200)
committerSamuel Pitoiset <samuel.pitoiset@gmail.com>
Fri, 18 Oct 2019 14:55:55 +0000 (16:55 +0200)
LLVM optimizes these instructions with XOR/AND and it loses
the sign bit.

Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Reviewed-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
src/amd/llvm/ac_nir_to_llvm.c

index ab042d360835981a2976c8725041b3ad93f3bb96..cd7091ad16337335afc8951da22980d62156a0de 100644 (file)
@@ -516,6 +516,13 @@ static void visit_alu(struct ac_nir_context *ctx, const nir_alu_instr *instr)
        case nir_op_fneg:
                src[0] = ac_to_float(&ctx->ac, src[0]);
                result = LLVMBuildFNeg(ctx->ac.builder, src[0], "");
+               if (ctx->ac.float_mode == AC_FLOAT_MODE_DENORM_FLUSH_TO_ZERO) {
+                       /* fneg will be optimized by backend compiler with sign
+                        * bit removed via XOR. This is probably a LLVM bug.
+                        */
+                       result = ac_build_canonicalize(&ctx->ac, result,
+                                                      instr->dest.dest.ssa.bit_size);
+               }
                break;
        case nir_op_ineg:
                result = LLVMBuildNeg(ctx->ac.builder, src[0], "");
@@ -646,6 +653,13 @@ static void visit_alu(struct ac_nir_context *ctx, const nir_alu_instr *instr)
        case nir_op_fabs:
                result = emit_intrin_1f_param(&ctx->ac, "llvm.fabs",
                                              ac_to_float_type(&ctx->ac, def_type), src[0]);
+               if (ctx->ac.float_mode == AC_FLOAT_MODE_DENORM_FLUSH_TO_ZERO) {
+                       /* fabs will be optimized by backend compiler with sign
+                        * bit removed via AND.
+                        */
+                       result = ac_build_canonicalize(&ctx->ac, result,
+                                                      instr->dest.dest.ssa.bit_size);
+               }
                break;
        case nir_op_iabs:
                result = emit_iabs(&ctx->ac, src[0]);