nir/peephole_ffma: Be less agressive about fusing multiply-adds
authorJason Ekstrand <jason.ekstrand@intel.com>
Mon, 23 Mar 2015 21:55:20 +0000 (14:55 -0700)
committerJason Ekstrand <jason.ekstrand@intel.com>
Wed, 1 Apr 2015 19:51:04 +0000 (12:51 -0700)
shader-db results for fragment shaders on Haswell:
total instructions in shared programs: 4395688 -> 4389623 (-0.14%)
instructions in affected programs:     355876 -> 349811 (-1.70%)
helped:                                1455
HURT:                                  14
GAINED:                                5
LOST:                                  0

Reviewed-by: Matt Turner <mattst88@gmail.com>
src/glsl/nir/nir_opt_peephole_ffma.c

index 1ba4ac2b19528c04a9793f2821a9e5386a4754b9..bf4dbe16b9e3e80d9dad998ebe160e69b0079e06 100644 (file)
@@ -38,6 +38,41 @@ struct peephole_ffma_state {
    bool progress;
 };
 
+static inline bool
+are_all_uses_fadd(nir_ssa_def *def)
+{
+   if (def->if_uses->entries > 0)
+      return false;
+
+   struct set_entry *use_iter;
+   set_foreach(def->uses, use_iter) {
+      nir_instr *use_instr = (nir_instr *)use_iter->key;
+
+      if (use_instr->type != nir_instr_type_alu)
+         return false;
+
+      nir_alu_instr *use_alu = nir_instr_as_alu(use_instr);
+      switch (use_alu->op) {
+      case nir_op_fadd:
+         break; /* This one's ok */
+
+      case nir_op_imov:
+      case nir_op_fmov:
+      case nir_op_fneg:
+      case nir_op_fabs:
+         assert(use_alu->dest.dest.is_ssa);
+         if (!are_all_uses_fadd(&use_alu->dest.dest.ssa))
+            return false;
+         break;
+
+      default:
+         return false;
+      }
+   }
+
+   return true;
+}
+
 static nir_alu_instr *
 get_mul_for_src(nir_alu_src *src, uint8_t swizzle[4], bool *negate, bool *abs)
 {
@@ -66,6 +101,12 @@ get_mul_for_src(nir_alu_src *src, uint8_t swizzle[4], bool *negate, bool *abs)
       break;
 
    case nir_op_fmul:
+      /* Only absorbe a fmul into a ffma if the fmul is is only used in fadd
+       * operations.  This prevents us from being too agressive with our
+       * fusing which can actually lead to more instructions.
+       */
+      if (!are_all_uses_fadd(&alu->dest.dest.ssa))
+         return NULL;
       break;
 
    default: