i965: Lower min/max after optimization on Gen4/5.
authorMatt Turner <mattst88@gmail.com>
Thu, 11 Feb 2016 20:27:02 +0000 (12:27 -0800)
committerMatt Turner <mattst88@gmail.com>
Wed, 17 Feb 2016 20:35:27 +0000 (12:35 -0800)
Gen4/5's SEL instruction cannot use conditional modifiers, so min/max
are implemented as CMP + SEL. Handling that after optimization lets us
CSE more.

On Ironlake:

   total instructions in shared programs: 6426035 -> 6422753 (-0.05%)
   instructions in affected programs: 326604 -> 323322 (-1.00%)
   helped: 1411

   total cycles in shared programs: 129184700 -> 129101586 (-0.06%)
   cycles in affected programs: 18950290 -> 18867176 (-0.44%)
   helped: 2419
   HURT: 328

Reviewed-by: Francisco Jerez <currojerez@riseup.net>
src/mesa/drivers/dri/i965/brw_fs.cpp
src/mesa/drivers/dri/i965/brw_fs.h
src/mesa/drivers/dri/i965/brw_fs_builder.h
src/mesa/drivers/dri/i965/brw_fs_nir.cpp
src/mesa/drivers/dri/i965/brw_vec4.cpp
src/mesa/drivers/dri/i965/brw_vec4.h
src/mesa/drivers/dri/i965/brw_vec4_builder.h
src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp

index 41b10a45105cae0945d089fe6c06e4d797e606d2..b5060408d9542dc7064846868011e575847eafb7 100644 (file)
@@ -3476,6 +3476,36 @@ fs_visitor::lower_integer_multiplication()
    return progress;
 }
 
+bool
+fs_visitor::lower_minmax()
+{
+   assert(devinfo->gen < 6);
+
+   bool progress = false;
+
+   foreach_block_and_inst_safe(block, fs_inst, inst, cfg) {
+      const fs_builder ibld(this, block, inst);
+
+      if (inst->opcode == BRW_OPCODE_SEL &&
+          inst->predicate == BRW_PREDICATE_NONE) {
+         /* FIXME: Using CMP doesn't preserve the NaN propagation semantics of
+          *        the original SEL.L/GE instruction
+          */
+         ibld.CMP(ibld.null_reg_d(), inst->src[0], inst->src[1],
+                  inst->conditional_mod);
+         inst->predicate = BRW_PREDICATE_NORMAL;
+         inst->conditional_mod = BRW_CONDITIONAL_NONE;
+
+         progress = true;
+      }
+   }
+
+   if (progress)
+      invalidate_live_intervals();
+
+   return progress;
+}
+
 static void
 setup_color_payload(const fs_builder &bld, const brw_wm_prog_key *key,
                     fs_reg *dst, fs_reg color, unsigned components)
@@ -5138,6 +5168,13 @@ fs_visitor::optimize()
    OPT(opt_combine_constants);
    OPT(lower_integer_multiplication);
 
+   if (devinfo->gen <= 5 && OPT(lower_minmax)) {
+      OPT(opt_cmod_propagation);
+      OPT(opt_cse);
+      OPT(opt_copy_propagate);
+      OPT(dead_code_eliminate);
+   }
+
    lower_uniform_pull_constant_loads();
 
    validate();
index 7c3d85c2688d2eb69d3cb10a5ebb03dcc49a2c6b..7446ca10cd189d7e14515e5b5ab99089e4933081 100644 (file)
@@ -175,6 +175,7 @@ public:
    bool lower_load_payload();
    bool lower_logical_sends();
    bool lower_integer_multiplication();
+   bool lower_minmax();
    bool lower_simd_width();
    bool opt_combine_constants();
 
index dd3c383a17d571f31ce68e373a51961078dcb8ad..bb94c3d7071fcb445bd79a0423d939958846741c 100644 (file)
@@ -375,14 +375,8 @@ namespace brw {
       {
          assert(mod == BRW_CONDITIONAL_GE || mod == BRW_CONDITIONAL_L);
 
-         if (shader->devinfo->gen >= 6) {
-            set_condmod(mod, SEL(dst, fix_unsigned_negate(src0),
-                                 fix_unsigned_negate(src1)));
-         } else {
-            CMP(null_reg_d(), src0, src1, mod);
-            set_predicate(BRW_PREDICATE_NORMAL,
-                          SEL(dst, src0, src1));
-         }
+         set_condmod(mod, SEL(dst, fix_unsigned_negate(src0),
+                              fix_unsigned_negate(src1)));
       }
 
       /**
index 1d5d316ace9f3a714a337ae238e8f25e9a404adb..cf2e782c630d78ff212d4a0626e8522b75ab2989 100644 (file)
@@ -950,28 +950,16 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr)
    case nir_op_fmin:
    case nir_op_imin:
    case nir_op_umin:
-      if (devinfo->gen >= 6) {
-         inst = bld.emit(BRW_OPCODE_SEL, result, op[0], op[1]);
-         inst->conditional_mod = BRW_CONDITIONAL_L;
-      } else {
-         bld.CMP(bld.null_reg_d(), op[0], op[1], BRW_CONDITIONAL_L);
-         inst = bld.SEL(result, op[0], op[1]);
-         inst->predicate = BRW_PREDICATE_NORMAL;
-      }
+      inst = bld.emit(BRW_OPCODE_SEL, result, op[0], op[1]);
+      inst->conditional_mod = BRW_CONDITIONAL_L;
       inst->saturate = instr->dest.saturate;
       break;
 
    case nir_op_fmax:
    case nir_op_imax:
    case nir_op_umax:
-      if (devinfo->gen >= 6) {
-         inst = bld.emit(BRW_OPCODE_SEL, result, op[0], op[1]);
-         inst->conditional_mod = BRW_CONDITIONAL_GE;
-      } else {
-         bld.CMP(bld.null_reg_d(), op[0], op[1], BRW_CONDITIONAL_GE);
-         inst = bld.SEL(result, op[0], op[1]);
-         inst->predicate = BRW_PREDICATE_NORMAL;
-      }
+      inst = bld.emit(BRW_OPCODE_SEL, result, op[0], op[1]);
+      inst->conditional_mod = BRW_CONDITIONAL_GE;
       inst->saturate = instr->dest.saturate;
       break;
 
index e6c580eb89948bdda81c81ec7d78b65982cd83d4..a826fce6d540f9624293fe480a0ec189028f7d62 100644 (file)
@@ -26,6 +26,7 @@
 #include "brw_cfg.h"
 #include "brw_vs.h"
 #include "brw_nir.h"
+#include "brw_vec4_builder.h"
 #include "brw_vec4_live_variables.h"
 #include "brw_dead_control_flow.h"
 #include "program/prog_parameter.h"
@@ -1632,6 +1633,36 @@ vec4_vs_visitor::setup_payload(void)
    this->first_non_payload_grf = reg;
 }
 
+bool
+vec4_visitor::lower_minmax()
+{
+   assert(devinfo->gen < 6);
+
+   bool progress = false;
+
+   foreach_block_and_inst_safe(block, vec4_instruction, inst, cfg) {
+      const vec4_builder ibld(this, block, inst);
+
+      if (inst->opcode == BRW_OPCODE_SEL &&
+          inst->predicate == BRW_PREDICATE_NONE) {
+         /* FIXME: Using CMP doesn't preserve the NaN propagation semantics of
+          *        the original SEL.L/GE instruction
+          */
+         ibld.CMP(ibld.null_reg_d(), inst->src[0], inst->src[1],
+                  inst->conditional_mod);
+         inst->predicate = BRW_PREDICATE_NORMAL;
+         inst->conditional_mod = BRW_CONDITIONAL_NONE;
+
+         progress = true;
+      }
+   }
+
+   if (progress)
+      invalidate_live_intervals();
+
+   return progress;
+}
+
 src_reg
 vec4_visitor::get_timestamp()
 {
@@ -1904,6 +1935,13 @@ vec4_visitor::run()
       OPT(dead_code_eliminate);
    }
 
+   if (devinfo->gen <= 5 && OPT(lower_minmax)) {
+      OPT(opt_cmod_propagation);
+      OPT(opt_cse);
+      OPT(opt_copy_propagation);
+      OPT(dead_code_eliminate);
+   }
+
    if (failed)
       return false;
 
index 14a5f0e428f97448b23d58b8e9e16d1e94dee48e..633f13c2c97b64f1e78db29a0a1d7809ed9aecda 100644 (file)
@@ -307,6 +307,8 @@ public:
 
    void resolve_ud_negate(src_reg *reg);
 
+   bool lower_minmax();
+
    src_reg get_timestamp();
 
    void dump_instruction(backend_instruction *inst);
index 5d4b452032266038553156c41bc520970dca76f4..a0b390b0fd51bd3642d3142474479e1781c1ee3d 100644 (file)
@@ -303,14 +303,8 @@ namespace brw {
       emit_minmax(const dst_reg &dst, const src_reg &src0,
                   const src_reg &src1, brw_conditional_mod mod) const
       {
-         if (shader->devinfo->gen >= 6) {
-            set_condmod(mod, SEL(dst, fix_unsigned_negate(src0),
-                                 fix_unsigned_negate(src1)));
-         } else {
-            CMP(null_reg_d(), src0, src1, mod);
-            set_predicate(BRW_PREDICATE_NORMAL,
-                          SEL(dst, src0, src1));
-         }
+         set_condmod(mod, SEL(dst, fix_unsigned_negate(src0),
+                              fix_unsigned_negate(src1)));
       }
 
       /**
index 6a8c76c4ea06a3b8546729634a93958782812f43..cfd4d9b5d00dabbd23d163b7ebece4b1fe8819a1 100644 (file)
@@ -679,18 +679,8 @@ vec4_instruction *
 vec4_visitor::emit_minmax(enum brw_conditional_mod conditionalmod, dst_reg dst,
                           src_reg src0, src_reg src1)
 {
-   vec4_instruction *inst;
-
-   if (devinfo->gen >= 6) {
-      inst = emit(BRW_OPCODE_SEL, dst, src0, src1);
-      inst->conditional_mod = conditionalmod;
-   } else {
-      emit(CMP(dst, src0, src1, conditionalmod));
-
-      inst = emit(BRW_OPCODE_SEL, dst, src0, src1);
-      inst->predicate = BRW_PREDICATE_NORMAL;
-   }
-
+   vec4_instruction *inst = emit(BRW_OPCODE_SEL, dst, src0, src1);
+   inst->conditional_mod = conditionalmod;
    return inst;
 }