i965/fs: Handle CMP.nz ... 0 and AND.nz ... 1 similarly in cmod propagation
authorIan Romanick <ian.d.romanick@intel.com>
Tue, 3 Feb 2015 19:12:28 +0000 (21:12 +0200)
committerIan Romanick <ian.d.romanick@intel.com>
Tue, 17 Mar 2015 21:59:43 +0000 (14:59 -0700)
Espically on platforms that do not natively generate 0u and ~0u for
Boolean results, we generate a lot of sequences where a CMP is
followed by an AND with 1.  emit_bool_to_cond_code does this, for
example.  On ILK, this results in a sequence like:

    add(8)          g3<1>F          g8<8,8,1>F      -g4<0,1,0>F
    cmp.l.f0(8)     g3<1>D          g3<8,8,1>F      0F
    and.nz.f0(8)    null            g3<8,8,1>D      1D
    (+f0) iff(8)    Jump: 6

The AND.nz is obviously redundant.  By propagating the cmod, we can
instead generate

    add.l.f0(8)     null            g8<8,8,1>F      -g4<0,1,0>F
    (+f0) iff(8)    Jump: 6

Existing code already handles the propagation from the CMP to the ADD.

Shader-db results:

GM45 (0x2A42):
total instructions in shared programs: 3550829 -> 3550788 (-0.00%)
instructions in affected programs:     10028 -> 9987 (-0.41%)
helped:                                24

Iron Lake (0x0046):
total instructions in shared programs: 4993146 -> 4993105 (-0.00%)
instructions in affected programs:     9675 -> 9634 (-0.42%)
helped:                                24

Ivy Bridge (0x0166):
total instructions in shared programs: 6291870 -> 6291794 (-0.00%)
instructions in affected programs:     17914 -> 17838 (-0.42%)
helped:                                48

Haswell (0x0426):
total instructions in shared programs: 5779256 -> 5779180 (-0.00%)
instructions in affected programs:     16694 -> 16618 (-0.46%)
helped:                                48

Broadwell (0x162E):
total instructions in shared programs: 6823088 -> 6823014 (-0.00%)
instructions in affected programs:     15824 -> 15750 (-0.47%)
helped:                                46

No chage on Sandy Bridge or on any platform when NIR is used.

v2: Add unit tests suggested by Matt.  Remove spurious writes_flag()
check on scan_inst when scan_inst is known to be BRW_OPCODE_CMP (also
suggested by Matt).

v3: Fix some comments and remove some explicit int() casts in fs_reg
constructors in the unit tests.  Both suggested by Matt.

Signed-off-by: Ian Romanick <ian.d.romanick@intel.com>
Reviewed-by: Matt Turner <mattst88@gmail.com>
src/mesa/drivers/dri/i965/brw_fs_cmod_propagation.cpp
src/mesa/drivers/dri/i965/test_fs_cmod_propagation.cpp

index d0ca2f9ce3158fa6488b8d98d6540ac69d3ba482..1935f06df0b0a6e91647cf4081fdc45ce4cc7ab7 100644 (file)
@@ -57,7 +57,8 @@ opt_cmod_propagation_local(bblock_t *block)
    foreach_inst_in_block_reverse_safe(fs_inst, inst, block) {
       ip--;
 
-      if ((inst->opcode != BRW_OPCODE_CMP &&
+      if ((inst->opcode != BRW_OPCODE_AND &&
+           inst->opcode != BRW_OPCODE_CMP &&
            inst->opcode != BRW_OPCODE_MOV) ||
           inst->predicate != BRW_PREDICATE_NONE ||
           !inst->dst.is_null() ||
@@ -65,6 +66,19 @@ opt_cmod_propagation_local(bblock_t *block)
           inst->src[0].abs)
          continue;
 
+      /* Only an AND.NZ can be propagated.  Many AND.Z instructions are
+       * generated (for ir_unop_not in fs_visitor::emit_bool_to_cond_code).
+       * Propagating those would require inverting the condition on the CMP.
+       * This changes both the flag value and the register destination of the
+       * CMP.  That result may be used elsewhere, so we can't change its value
+       * on a whim.
+       */
+      if (inst->opcode == BRW_OPCODE_AND &&
+          !(inst->src[1].is_one() &&
+            inst->conditional_mod == BRW_CONDITIONAL_NZ &&
+            !inst->src[0].negate))
+         continue;
+
       if (inst->opcode == BRW_OPCODE_CMP && !inst->src[1].is_zero())
          continue;
 
@@ -80,6 +94,21 @@ opt_cmod_propagation_local(bblock_t *block)
                 scan_inst->dst.reg_offset != inst->src[0].reg_offset)
                break;
 
+            /* This must be done before the dst.type check because the result
+             * type of the AND will always be D, but the result of the CMP
+             * could be anything.  The assumption is that the AND is just
+             * figuring out what the result of the previous comparison was
+             * instead of doing a new comparison with a different type.
+             */
+            if (inst->opcode == BRW_OPCODE_AND) {
+               if (scan_inst->opcode == BRW_OPCODE_CMP) {
+                  inst->remove(block);
+                  progress = true;
+               }
+
+               break;
+            }
+
             /* Comparisons operate differently for ints and floats */
             if (scan_inst->dst.type != inst->dst.type)
                break;
index cb92abf43d14b23f21135b87d06b269c6b3b50e1..1ce14f851a177df5f7bebcc52f1e16f6e9eeca7d 100644 (file)
@@ -449,3 +449,108 @@ TEST_F(cmod_propagation_test, different_types_cmod_with_zero)
    EXPECT_EQ(BRW_OPCODE_CMP, instruction(block0, 1)->opcode);
    EXPECT_EQ(BRW_CONDITIONAL_GE, instruction(block0, 1)->conditional_mod);
 }
+
+TEST_F(cmod_propagation_test, andnz_one)
+{
+   fs_reg dest = v->vgrf(glsl_type::int_type);
+   fs_reg src0 = v->vgrf(glsl_type::float_type);
+   fs_reg zero(0.0f);
+   fs_reg one(1);
+
+   v->emit(BRW_OPCODE_CMP, retype(dest, BRW_REGISTER_TYPE_F), src0, zero)
+      ->conditional_mod = BRW_CONDITIONAL_L;
+   v->emit(BRW_OPCODE_AND, v->reg_null_f, dest, one)
+      ->conditional_mod = BRW_CONDITIONAL_NZ;
+
+   /* = Before =
+    * 0: cmp.l.f0(8)     dest:F  src0:F  0F
+    * 1: and.nz.f0(8)    null:D  dest:D  1D
+    *
+    * = After =
+    * 0: cmp.l.f0(8)     dest:F  src0:F  0F
+    */
+
+   v->calculate_cfg();
+   bblock_t *block0 = v->cfg->blocks[0];
+
+   EXPECT_EQ(0, block0->start_ip);
+   EXPECT_EQ(1, block0->end_ip);
+
+   EXPECT_TRUE(cmod_propagation(v));
+   EXPECT_EQ(0, block0->start_ip);
+   EXPECT_EQ(0, block0->end_ip);
+   EXPECT_EQ(BRW_OPCODE_CMP, instruction(block0, 0)->opcode);
+   EXPECT_EQ(BRW_CONDITIONAL_L, instruction(block0, 0)->conditional_mod);
+   EXPECT_TRUE(retype(dest, BRW_REGISTER_TYPE_F)
+               .equals(instruction(block0, 0)->dst));
+}
+
+TEST_F(cmod_propagation_test, andnz_non_one)
+{
+   fs_reg dest = v->vgrf(glsl_type::int_type);
+   fs_reg src0 = v->vgrf(glsl_type::float_type);
+   fs_reg zero(0.0f);
+   fs_reg nonone(38);
+
+   v->emit(BRW_OPCODE_CMP, retype(dest, BRW_REGISTER_TYPE_F), src0, zero)
+      ->conditional_mod = BRW_CONDITIONAL_L;
+   v->emit(BRW_OPCODE_AND, v->reg_null_f, dest, nonone)
+      ->conditional_mod = BRW_CONDITIONAL_NZ;
+
+   /* = Before =
+    * 0: cmp.l.f0(8)     dest:F  src0:F  0F
+    * 1: and.nz.f0(8)    null:D  dest:D  38D
+    *
+    * = After =
+    * (no changes)
+    */
+
+   v->calculate_cfg();
+   bblock_t *block0 = v->cfg->blocks[0];
+
+   EXPECT_EQ(0, block0->start_ip);
+   EXPECT_EQ(1, block0->end_ip);
+
+   EXPECT_FALSE(cmod_propagation(v));
+   EXPECT_EQ(0, block0->start_ip);
+   EXPECT_EQ(1, block0->end_ip);
+   EXPECT_EQ(BRW_OPCODE_CMP, instruction(block0, 0)->opcode);
+   EXPECT_EQ(BRW_CONDITIONAL_L, instruction(block0, 0)->conditional_mod);
+   EXPECT_EQ(BRW_OPCODE_AND, instruction(block0, 1)->opcode);
+   EXPECT_EQ(BRW_CONDITIONAL_NZ, instruction(block0, 1)->conditional_mod);
+}
+
+TEST_F(cmod_propagation_test, andz_one)
+{
+   fs_reg dest = v->vgrf(glsl_type::int_type);
+   fs_reg src0 = v->vgrf(glsl_type::float_type);
+   fs_reg zero(0.0f);
+   fs_reg one(1);
+
+   v->emit(BRW_OPCODE_CMP, retype(dest, BRW_REGISTER_TYPE_F), src0, zero)
+      ->conditional_mod = BRW_CONDITIONAL_L;
+   v->emit(BRW_OPCODE_AND, v->reg_null_f, dest, one)
+      ->conditional_mod = BRW_CONDITIONAL_Z;
+
+   /* = Before =
+    * 0: cmp.l.f0(8)     dest:F  src0:F  0F
+    * 1: and.z.f0(8)     null:D  dest:D  1D
+    *
+    * = After =
+    * (no changes)
+    */
+
+   v->calculate_cfg();
+   bblock_t *block0 = v->cfg->blocks[0];
+
+   EXPECT_EQ(0, block0->start_ip);
+   EXPECT_EQ(1, block0->end_ip);
+
+   EXPECT_FALSE(cmod_propagation(v));
+   EXPECT_EQ(0, block0->start_ip);
+   EXPECT_EQ(1, block0->end_ip);
+   EXPECT_EQ(BRW_OPCODE_CMP, instruction(block0, 0)->opcode);
+   EXPECT_EQ(BRW_CONDITIONAL_L, instruction(block0, 0)->conditional_mod);
+   EXPECT_EQ(BRW_OPCODE_AND, instruction(block0, 1)->opcode);
+   EXPECT_EQ(BRW_CONDITIONAL_EQ, instruction(block0, 1)->conditional_mod);
+}