freedreno/ir3: don't lower fsat
authorRob Clark <robdclark@gmail.com>
Sun, 21 Jan 2018 17:31:51 +0000 (12:31 -0500)
committerRob Clark <robdclark@gmail.com>
Sat, 10 Feb 2018 19:54:58 +0000 (14:54 -0500)
Instead, if possible fold (sat) flag into src, otherwise use:

  (sat)max.f rD, rS, rS

Signed-off-by: Rob Clark <robdclark@gmail.com>
src/gallium/drivers/freedreno/ir3/ir3.h
src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c
src/gallium/drivers/freedreno/ir3/ir3_nir.c

index 210df20b812c22fecb08ed07ad44365a315b2f33..84235cc1cd2722ac7fa953849740a7de224746e6 100644 (file)
@@ -616,6 +616,8 @@ static inline bool is_same_type_mov(struct ir3_instruction *instr)
                break;
        case OPC_ABSNEG_F:
        case OPC_ABSNEG_S:
+               if (instr->flags & IR3_INSTR_SAT)
+                       return false;
                break;
        default:
                return false;
index 009dcb30fa57e6157a8551666c085f084fc87d36..72e0f4fe288f5c5bd8e7d2cc57e5e3401879851d 100644 (file)
@@ -964,6 +964,27 @@ emit_alu(struct ir3_context *ctx, nir_alu_instr *alu)
        case nir_op_fmin:
                dst[0] = ir3_MIN_F(b, src[0], 0, src[1], 0);
                break;
+       case nir_op_fsat:
+               /* if there is just a single use of the src, and it supports
+                * (sat) bit, we can just fold the (sat) flag back to the
+                * src instruction and create a mov.  This is easier for cp
+                * to eliminate.
+                *
+                * TODO probably opc_cat==4 is ok too
+                */
+               if (alu->src[0].src.is_ssa &&
+                               (list_length(&alu->src[0].src.ssa->uses) == 1) &&
+                               ((opc_cat(src[0]->opc) == 2) || (opc_cat(src[0]->opc) == 3))) {
+                       src[0]->flags |= IR3_INSTR_SAT;
+                       dst[0] = ir3_MOV(b, src[0], TYPE_U32);
+               } else {
+                       /* otherwise generate a max.f that saturates.. blob does
+                        * similar (generating a cat2 mov using max.f)
+                        */
+                       dst[0] = ir3_MAX_F(b, src[0], 0, src[0], 0);
+                       dst[0]->flags |= IR3_INSTR_SAT;
+               }
+               break;
        case nir_op_fmul:
                dst[0] = ir3_MUL_F(b, src[0], 0, src[1], 0);
                break;
index 2393306e19cbee956273a0ce263e47fc493cd103..81a46be914e4e80786eb8e39fdf94cc3bd8de1fe 100644 (file)
@@ -37,7 +37,6 @@
 
 static const nir_shader_compiler_options options = {
                .lower_fpow = true,
-               .lower_fsat = true,
                .lower_scmp = true,
                .lower_flrp32 = true,
                .lower_flrp64 = true,