intel/fs: Allow cmod propagation across reads and writes of different flags
authorIan Romanick <ian.d.romanick@intel.com>
Wed, 22 May 2019 19:32:03 +0000 (12:32 -0700)
committerIan Romanick <ian.d.romanick@intel.com>
Thu, 6 Jun 2019 00:03:45 +0000 (17:03 -0700)
This also helps a later patch (intel/fs: Improve discard_if code
generation) on about 200 shaders.

v2: Document that other instruction sequences are also valid in
subtract_merge_with_compare_intervening_mismatch_flag_write.  Suggested
by Caio.

All Intel platforms had similar results. (Ice Lake shown)
total instructions in shared programs: 17224438 -> 17224434 (<.01%)
instructions in affected programs: 296 -> 292 (-1.35%)
helped: 4
HURT: 0
helped stats (abs) min: 1 max: 1 x̄: 1.00 x̃: 1
helped stats (rel) min: 0.99% max: 1.92% x̄: 1.43% x̃: 1.40%
95% mean confidence interval for instructions value: -1.00 -1.00
95% mean confidence interval for instructions %-change: -2.04% -0.81%
Instructions are helped.

total cycles in shared programs: 361468455 -> 361468458 (<.01%)
cycles in affected programs: 2862 -> 2865 (0.10%)
helped: 2
HURT: 2
helped stats (abs) min: 2 max: 2 x̄: 2.00 x̃: 2
helped stats (rel) min: 0.24% max: 0.39% x̄: 0.31% x̃: 0.31%
HURT stats (abs)   min: 3 max: 4 x̄: 3.50 x̃: 3
HURT stats (rel)   min: 0.32% max: 0.70% x̄: 0.51% x̃: 0.51%
95% mean confidence interval for cycles value: -4.34 5.84
95% mean confidence interval for cycles %-change: -0.70% 0.90%
Inconclusive result (value mean confidence interval includes 0).

Reviewed-by: Caio Marcelo de Oliveira Filho <caio.oliveira@intel.com>
Reviewed-by: Matt Turner <mattst88@gmail.com>
src/intel/compiler/brw_fs_cmod_propagation.cpp
src/intel/compiler/test_fs_cmod_propagation.cpp

index ba4df5924241dafda1d7aa032e7fb04465fc2c16..45ea9206014519de24e67e0e608da8385651615d 100644 (file)
@@ -114,10 +114,11 @@ cmod_propagate_cmp_to_add(const gen_device_info *devinfo, bblock_t *block,
       }
 
    not_match:
-      if (scan_inst->flags_written())
+      if ((scan_inst->flags_written() & flags_written) != 0)
          break;
 
-      read_flag = read_flag || scan_inst->flags_read(devinfo);
+      read_flag = read_flag ||
+                  (scan_inst->flags_read(devinfo) & flags_written) != 0;
    }
 
    return false;
@@ -180,10 +181,11 @@ cmod_propagate_not(const gen_device_info *devinfo, bblock_t *block,
          break;
       }
 
-      if (scan_inst->flags_written())
+      if ((scan_inst->flags_written() & flags_written) != 0)
          break;
 
-      read_flag = read_flag || scan_inst->flags_read(devinfo);
+      read_flag = read_flag ||
+                  (scan_inst->flags_read(devinfo) & flags_written) != 0;
    }
 
    return false;
@@ -423,10 +425,11 @@ opt_cmod_propagation_local(const gen_device_info *devinfo, bblock_t *block)
             break;
          }
 
-         if (scan_inst->flags_written())
+         if ((scan_inst->flags_written() & flags_written) != 0)
             break;
 
-         read_flag = read_flag || scan_inst->flags_read(devinfo);
+         read_flag = read_flag ||
+                     (scan_inst->flags_read(devinfo) & flags_written) != 0;
       }
    }
 
index 89e2684eafb9956867af266d0e6f6393d699e468..218605000f4b0130650a129ab2f0409d78dfba28 100644 (file)
@@ -277,6 +277,47 @@ TEST_F(cmod_propagation_test, intervening_flag_write)
    EXPECT_EQ(BRW_CONDITIONAL_GE, instruction(block0, 2)->conditional_mod);
 }
 
+TEST_F(cmod_propagation_test, intervening_mismatch_flag_write)
+{
+   const fs_builder &bld = v->bld;
+   fs_reg dest = v->vgrf(glsl_type::float_type);
+   fs_reg src0 = v->vgrf(glsl_type::float_type);
+   fs_reg src1 = v->vgrf(glsl_type::float_type);
+   fs_reg src2 = v->vgrf(glsl_type::float_type);
+   fs_reg zero(brw_imm_f(0.0f));
+   bld.ADD(dest, src0, src1);
+   bld.CMP(bld.null_reg_f(), src2, zero, BRW_CONDITIONAL_GE)
+      ->flag_subreg = 1;
+   bld.CMP(bld.null_reg_f(), dest, zero, BRW_CONDITIONAL_GE);
+
+   /* = Before =
+    *
+    * 0: add(8)         dest  src0  src1
+    * 1: cmp.ge.f0.1(8) null  src2  0.0f
+    * 2: cmp.ge.f0(8)   null  dest  0.0f
+    *
+    * = After =
+    * 0: add.ge.f0(8)   dest  src0  src1
+    * 1: cmp.ge.f0.1(8) null  src2  0.0f
+    */
+
+   v->calculate_cfg();
+   bblock_t *block0 = v->cfg->blocks[0];
+
+   EXPECT_EQ(0, block0->start_ip);
+   EXPECT_EQ(2, block0->end_ip);
+
+   EXPECT_TRUE(cmod_propagation(v));
+   EXPECT_EQ(0, block0->start_ip);
+   EXPECT_EQ(1, block0->end_ip);
+   EXPECT_EQ(BRW_OPCODE_ADD, instruction(block0, 0)->opcode);
+   EXPECT_EQ(BRW_CONDITIONAL_GE, instruction(block0, 0)->conditional_mod);
+   EXPECT_EQ(0, instruction(block0, 0)->flag_subreg);
+   EXPECT_EQ(BRW_OPCODE_CMP, instruction(block0, 1)->opcode);
+   EXPECT_EQ(BRW_CONDITIONAL_GE, instruction(block0, 1)->conditional_mod);
+   EXPECT_EQ(1, instruction(block0, 1)->flag_subreg);
+}
+
 TEST_F(cmod_propagation_test, intervening_flag_read)
 {
    const fs_builder &bld = v->bld;
@@ -316,6 +357,48 @@ TEST_F(cmod_propagation_test, intervening_flag_read)
    EXPECT_EQ(BRW_CONDITIONAL_GE, instruction(block0, 2)->conditional_mod);
 }
 
+TEST_F(cmod_propagation_test, intervening_mismatch_flag_read)
+{
+   const fs_builder &bld = v->bld;
+   fs_reg dest0 = v->vgrf(glsl_type::float_type);
+   fs_reg dest1 = v->vgrf(glsl_type::float_type);
+   fs_reg src0 = v->vgrf(glsl_type::float_type);
+   fs_reg src1 = v->vgrf(glsl_type::float_type);
+   fs_reg src2 = v->vgrf(glsl_type::float_type);
+   fs_reg zero(brw_imm_f(0.0f));
+   bld.ADD(dest0, src0, src1);
+   set_predicate(BRW_PREDICATE_NORMAL, bld.SEL(dest1, src2, zero))
+      ->flag_subreg = 1;
+   bld.CMP(bld.null_reg_f(), dest0, zero, BRW_CONDITIONAL_GE);
+
+   /* = Before =
+    *
+    * 0: add(8)         dest0 src0  src1
+    * 1: (+f0.1) sel(8) dest1 src2  0.0f
+    * 2: cmp.ge.f0(8)   null  dest0 0.0f
+    *
+    * = After =
+    * 0: add.ge.f0(8)   dest0 src0  src1
+    * 1: (+f0.1) sel(8) dest1 src2  0.0f
+    */
+
+   v->calculate_cfg();
+   bblock_t *block0 = v->cfg->blocks[0];
+
+   EXPECT_EQ(0, block0->start_ip);
+   EXPECT_EQ(2, block0->end_ip);
+
+   EXPECT_TRUE(cmod_propagation(v));
+   EXPECT_EQ(0, block0->start_ip);
+   EXPECT_EQ(1, block0->end_ip);
+   EXPECT_EQ(BRW_OPCODE_ADD, instruction(block0, 0)->opcode);
+   EXPECT_EQ(BRW_CONDITIONAL_GE, instruction(block0, 0)->conditional_mod);
+   EXPECT_EQ(0, instruction(block0, 0)->flag_subreg);
+   EXPECT_EQ(BRW_OPCODE_SEL, instruction(block0, 1)->opcode);
+   EXPECT_EQ(BRW_PREDICATE_NORMAL, instruction(block0, 1)->predicate);
+   EXPECT_EQ(1, instruction(block0, 1)->flag_subreg);
+}
+
 TEST_F(cmod_propagation_test, intervening_dest_write)
 {
    const fs_builder &bld = v->bld;
@@ -976,6 +1059,96 @@ TEST_F(cmod_propagation_test, subtract_to_mismatch_flag)
    EXPECT_EQ(1, instruction(block0, 1)->flag_subreg);
 }
 
+TEST_F(cmod_propagation_test,
+       subtract_merge_with_compare_intervening_mismatch_flag_write)
+{
+   const fs_builder &bld = v->bld;
+   fs_reg dest0 = v->vgrf(glsl_type::float_type);
+   fs_reg src0 = v->vgrf(glsl_type::float_type);
+   fs_reg src1 = v->vgrf(glsl_type::float_type);
+
+   bld.ADD(dest0, src0, negate(src1));
+   bld.CMP(bld.null_reg_f(), src0, src1, BRW_CONDITIONAL_L)
+            ->flag_subreg = 1;
+   bld.CMP(bld.null_reg_f(), src0, src1, BRW_CONDITIONAL_L);
+
+   /* = Before =
+    * 0: add(8)         dest0:F src0:F  -src1:F
+    * 1: cmp.l.f0.1(8)  null:F  src0:F  src1:F
+    * 2: cmp.l.f0(8)    null:F  src0:F  src1:F
+    *
+    * = After =
+    * 0: add.l.f0(8)    dest0:F src0:F  -src1:F
+    * 1: cmp.l.f0.1(8)  null:F  src0:F  src1:F
+    *
+    * NOTE: Another perfectly valid after sequence would be:
+    *
+    * 0: add.f0.1(8)    dest0:F src0:F  -src1:F
+    * 1: cmp.l.f0(8)    null:F  src0:F  src1:F
+    *
+    * However, the optimization pass starts at the end of the basic block.
+    * Because of this, the cmp.l.f0 will always be chosen.  If the pass
+    * changes its strategy, this test will also need to change.
+    */
+   v->calculate_cfg();
+   bblock_t *block0 = v->cfg->blocks[0];
+
+   EXPECT_EQ(0, block0->start_ip);
+   EXPECT_EQ(2, block0->end_ip);
+
+   EXPECT_TRUE(cmod_propagation(v));
+   EXPECT_EQ(0, block0->start_ip);
+   EXPECT_EQ(1, block0->end_ip);
+   EXPECT_EQ(BRW_OPCODE_ADD, instruction(block0, 0)->opcode);
+   EXPECT_EQ(BRW_CONDITIONAL_L, instruction(block0, 0)->conditional_mod);
+   EXPECT_EQ(0, instruction(block0, 0)->flag_subreg);
+   EXPECT_EQ(BRW_OPCODE_CMP, instruction(block0, 1)->opcode);
+   EXPECT_EQ(BRW_CONDITIONAL_L, instruction(block0, 1)->conditional_mod);
+   EXPECT_EQ(1, instruction(block0, 1)->flag_subreg);
+}
+
+TEST_F(cmod_propagation_test,
+       subtract_merge_with_compare_intervening_mismatch_flag_read)
+{
+   const fs_builder &bld = v->bld;
+   fs_reg dest0 = v->vgrf(glsl_type::float_type);
+   fs_reg dest1 = v->vgrf(glsl_type::float_type);
+   fs_reg src0 = v->vgrf(glsl_type::float_type);
+   fs_reg src1 = v->vgrf(glsl_type::float_type);
+   fs_reg src2 = v->vgrf(glsl_type::float_type);
+   fs_reg zero(brw_imm_f(0.0f));
+
+   bld.ADD(dest0, src0, negate(src1));
+   set_predicate(BRW_PREDICATE_NORMAL, bld.SEL(dest1, src2, zero))
+      ->flag_subreg = 1;
+   bld.CMP(bld.null_reg_f(), src0, src1, BRW_CONDITIONAL_L);
+
+   /* = Before =
+    * 0: add(8)         dest0:F src0:F  -src1:F
+    * 1: (+f0.1) sel(8) dest1   src2    0.0f
+    * 2: cmp.l.f0(8)    null:F  src0:F  src1:F
+    *
+    * = After =
+    * 0: add.l.f0(8)    dest0:F src0:F  -src1:F
+    * 1: (+f0.1) sel(8) dest1   src2    0.0f
+    */
+   v->calculate_cfg();
+   bblock_t *block0 = v->cfg->blocks[0];
+
+   EXPECT_EQ(0, block0->start_ip);
+   EXPECT_EQ(2, block0->end_ip);
+
+   EXPECT_TRUE(cmod_propagation(v));
+   EXPECT_EQ(0, block0->start_ip);
+   EXPECT_EQ(1, block0->end_ip);
+   EXPECT_EQ(BRW_OPCODE_ADD, instruction(block0, 0)->opcode);
+   EXPECT_EQ(BRW_CONDITIONAL_L, instruction(block0, 0)->conditional_mod);
+   EXPECT_EQ(0, instruction(block0, 0)->flag_subreg);
+   EXPECT_EQ(BRW_OPCODE_SEL, instruction(block0, 1)->opcode);
+   EXPECT_EQ(BRW_PREDICATE_NORMAL, instruction(block0, 1)->predicate);
+   EXPECT_EQ(1, instruction(block0, 1)->flag_subreg);
+}
+
 TEST_F(cmod_propagation_test, subtract_delete_compare_derp)
 {
    const fs_builder &bld = v->bld;
@@ -1844,3 +2017,93 @@ TEST_F(cmod_propagation_test, not_to_or_intervening_flag_read_incompatible_value
    EXPECT_EQ(BRW_OPCODE_NOT, instruction(block0, 2)->opcode);
    EXPECT_EQ(BRW_CONDITIONAL_NZ, instruction(block0, 2)->conditional_mod);
 }
+
+TEST_F(cmod_propagation_test, not_to_or_intervening_mismatch_flag_write)
+{
+   /* Exercise propagation of conditional modifier from a NOT instruction to
+    * another ALU instruction as performed by cmod_propagate_not.
+    */
+   const fs_builder &bld = v->bld;
+   fs_reg dest0 = v->vgrf(glsl_type::uint_type);
+   fs_reg dest1 = v->vgrf(glsl_type::uint_type);
+   fs_reg src0 = v->vgrf(glsl_type::uint_type);
+   fs_reg src1 = v->vgrf(glsl_type::uint_type);
+
+   bld.OR(dest0, src0, src1);
+   set_condmod(BRW_CONDITIONAL_Z, bld.OR(dest1, src0, src1))
+      ->flag_subreg = 1;
+   set_condmod(BRW_CONDITIONAL_NZ, bld.NOT(bld.null_reg_ud(), dest0));
+
+   /* = Before =
+    *
+    * 0: or(8)          dest0 src0  src1
+    * 1: or.z.f0.1(8)   dest1 src0  src1
+    * 2: not.nz.f0(8)   null  dest0
+    *
+    * = After =
+    * 0: or.z.f0(8)     dest0 src0  src1
+    * 1: or.z.f0.1(8)   dest1 src0  src1
+    */
+
+   v->calculate_cfg();
+   bblock_t *block0 = v->cfg->blocks[0];
+
+   EXPECT_EQ(0, block0->start_ip);
+   EXPECT_EQ(2, block0->end_ip);
+
+   EXPECT_TRUE(cmod_propagation(v));
+   EXPECT_EQ(0, block0->start_ip);
+   EXPECT_EQ(1, block0->end_ip);
+   EXPECT_EQ(BRW_OPCODE_OR, instruction(block0, 0)->opcode);
+   EXPECT_EQ(BRW_CONDITIONAL_Z, instruction(block0, 0)->conditional_mod);
+   EXPECT_EQ(0, instruction(block0, 0)->flag_subreg);
+   EXPECT_EQ(BRW_OPCODE_OR, instruction(block0, 1)->opcode);
+   EXPECT_EQ(BRW_CONDITIONAL_Z, instruction(block0, 1)->conditional_mod);
+   EXPECT_EQ(1, instruction(block0, 1)->flag_subreg);
+}
+
+TEST_F(cmod_propagation_test, not_to_or_intervening_mismatch_flag_read)
+{
+   /* Exercise propagation of conditional modifier from a NOT instruction to
+    * another ALU instruction as performed by cmod_propagate_not.
+    */
+   const fs_builder &bld = v->bld;
+   fs_reg dest0 = v->vgrf(glsl_type::uint_type);
+   fs_reg dest1 = v->vgrf(glsl_type::float_type);
+   fs_reg src0 = v->vgrf(glsl_type::uint_type);
+   fs_reg src1 = v->vgrf(glsl_type::uint_type);
+   fs_reg src2 = v->vgrf(glsl_type::float_type);
+   fs_reg zero(brw_imm_f(0.0f));
+
+   bld.OR(dest0, src0, src1);
+   set_predicate(BRW_PREDICATE_NORMAL, bld.SEL(dest1, src2, zero))
+      ->flag_subreg = 1;
+   set_condmod(BRW_CONDITIONAL_NZ, bld.NOT(bld.null_reg_ud(), dest0));
+
+   /* = Before =
+    *
+    * 0: or(8)          dest0 src0  src1
+    * 1: (+f0.1) sel(8) dest1 src2  0.0f
+    * 2: not.nz.f0(8)   null  dest0
+    *
+    * = After =
+    * 0: or.z.f0(8)     dest0 src0  src1
+    * 1: (+f0.1) sel(8) dest1 src2  0.0f
+    */
+
+   v->calculate_cfg();
+   bblock_t *block0 = v->cfg->blocks[0];
+
+   EXPECT_EQ(0, block0->start_ip);
+   EXPECT_EQ(2, block0->end_ip);
+
+   EXPECT_TRUE(cmod_propagation(v));
+   EXPECT_EQ(0, block0->start_ip);
+   EXPECT_EQ(1, block0->end_ip);
+   EXPECT_EQ(BRW_OPCODE_OR, instruction(block0, 0)->opcode);
+   EXPECT_EQ(BRW_CONDITIONAL_Z, instruction(block0, 0)->conditional_mod);
+   EXPECT_EQ(0, instruction(block0, 0)->flag_subreg);
+   EXPECT_EQ(BRW_OPCODE_SEL, instruction(block0, 1)->opcode);
+   EXPECT_EQ(BRW_PREDICATE_NORMAL, instruction(block0, 1)->predicate);
+   EXPECT_EQ(1, instruction(block0, 1)->flag_subreg);
+}