i965/fs: Propagate cmod across flag read if it contains the same value.
authorMatt Turner <mattst88@gmail.com>
Sat, 3 Jan 2015 20:18:15 +0000 (12:18 -0800)
committerMatt Turner <mattst88@gmail.com>
Sat, 24 Jan 2015 01:57:40 +0000 (17:57 -0800)
total instructions in shared programs: 5959463 -> 5958900 (-0.01%)
instructions in affected programs:     70031 -> 69468 (-0.80%)

Reviewed-by: Jason Ekstrand <jason.ekstrand@intel.com>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
src/mesa/drivers/dri/i965/brw_fs_cmod_propagation.cpp
src/mesa/drivers/dri/i965/test_fs_cmod_propagation.cpp

index 9592bf26ac66a547667780c339cb88ef1ed3be66..c8eb3a4f8bec378e322b4fe2e0bdc5ead154a3af 100644 (file)
  * we can do the comparison as part of the ADD instruction directly:
  *
  *    add.ge.f0(8)    g70<1>F    g69<8,8,1>F    4096F
+ *
+ * If there had been a use of the flag register and another CMP using g70
+ *
+ *    add.ge.f0(8)    g70<1>F    g69<8,8,1>F    4096F
+ *    (+f0) sel(8)    g71<F>     g72<8,8,1>F    g73<8,8,1>F
+ *    cmp.ge.f0(8)    null       g70<8,8,1>F    0F
+ *
+ * we can recognize that the CMP is generating the flag value that already
+ * exists and therefore remove the instruction.
  */
 
 static bool
@@ -57,6 +66,7 @@ opt_cmod_propagation_local(fs_visitor *v, bblock_t *block)
           !inst->src[1].is_zero())
          continue;
 
+      bool read_flag = false;
       foreach_inst_in_block_reverse_starting_from(fs_inst, scan_inst, inst,
                                                   block) {
          if (scan_inst->overwrites_reg(inst->src[0])) {
@@ -65,7 +75,7 @@ opt_cmod_propagation_local(fs_visitor *v, bblock_t *block)
                break;
 
             if (scan_inst->can_do_cmod() &&
-                (scan_inst->conditional_mod == BRW_CONDITIONAL_NONE ||
+                ((!read_flag && scan_inst->conditional_mod == BRW_CONDITIONAL_NONE) ||
                  scan_inst->conditional_mod == inst->conditional_mod)) {
                scan_inst->conditional_mod = inst->conditional_mod;
                inst->remove(block);
@@ -74,8 +84,10 @@ opt_cmod_propagation_local(fs_visitor *v, bblock_t *block)
             break;
          }
 
-         if (scan_inst->reads_flag() || scan_inst->writes_flag())
+         if (scan_inst->writes_flag())
             break;
+
+         read_flag = read_flag || scan_inst->reads_flag();
       }
    }
 
index 3aa05987e4c8715d83f4e98971b77423cb5db3ab..95d335d9543cd80456d49bf19d65f925153affee 100644 (file)
@@ -309,3 +309,44 @@ TEST_F(cmod_propagation_test, intervening_dest_write)
    EXPECT_EQ(BRW_OPCODE_CMP, instruction(block0, 2)->opcode);
    EXPECT_EQ(BRW_CONDITIONAL_GE, instruction(block0, 2)->conditional_mod);
 }
+
+TEST_F(cmod_propagation_test, intervening_flag_read_same_value)
+{
+   fs_reg dest0 = v->vgrf(glsl_type::float_type);
+   fs_reg dest1 = v->vgrf(glsl_type::float_type);
+   fs_reg src0 = v->vgrf(glsl_type::float_type);
+   fs_reg src1 = v->vgrf(glsl_type::float_type);
+   fs_reg src2 = v->vgrf(glsl_type::float_type);
+   fs_reg zero(0.0f);
+   v->emit(BRW_OPCODE_ADD, dest0, src0, src1)
+      ->conditional_mod = BRW_CONDITIONAL_GE;
+   v->emit(BRW_OPCODE_SEL, dest1, src2, zero)
+      ->predicate = BRW_PREDICATE_NORMAL;
+   v->emit(BRW_OPCODE_CMP, v->reg_null_f, dest0, zero)
+      ->conditional_mod = BRW_CONDITIONAL_GE;
+
+   /* = Before =
+    *
+    * 0: add.ge.f0(8)  dest0 src0  src1
+    * 1: (+f0) sel(8)  dest1 src2  0.0f
+    * 2: cmp.ge.f0(8)  null  dest0 0.0f
+    *
+    * = After =
+    * 0: add.ge.f0(8)  dest0 src0  src1
+    * 1: (+f0) sel(8)  dest1 src2  0.0f
+    */
+
+   v->calculate_cfg();
+   bblock_t *block0 = v->cfg->blocks[0];
+
+   EXPECT_EQ(0, block0->start_ip);
+   EXPECT_EQ(2, block0->end_ip);
+
+   EXPECT_TRUE(cmod_propagation(v));
+   EXPECT_EQ(0, block0->start_ip);
+   EXPECT_EQ(1, block0->end_ip);
+   EXPECT_EQ(BRW_OPCODE_ADD, instruction(block0, 0)->opcode);
+   EXPECT_EQ(BRW_CONDITIONAL_GE, instruction(block0, 0)->conditional_mod);
+   EXPECT_EQ(BRW_OPCODE_SEL, instruction(block0, 1)->opcode);
+   EXPECT_EQ(BRW_PREDICATE_NORMAL, instruction(block0, 1)->predicate);
+}