i965/fs: Consider MOV.SAT to interfere if it has a source modifier.
authorMatt Turner <mattst88@gmail.com>
Wed, 11 Feb 2015 00:25:47 +0000 (16:25 -0800)
committerMatt Turner <mattst88@gmail.com>
Fri, 20 Feb 2015 05:16:43 +0000 (21:16 -0800)
The saturate propagation pass recognizes that the second instruction
below does not interfere with an attempt to propagate the saturate
modifier from instruction 3 to 1.

 1:  add(8)     dst0   src0  src1
 2:  mov.sat(8) dst1   dst0
 3:  mov.sat(8) dst2   dst0

Unfortunately, we did not consider the case of instruction 2 having a
source modifier on dst0. Take for instance:

 1:  add(8)     dst0   src0  src1
 2:  mov.sat(8) dst1  -dst0
 3:  mov.sat(8) dst2   dst0

Consider such an instruction to interfere. Increase instruction counts
in Anomaly 2, which could be a bug fix depending on the values the first
instruction produces.

instructions in affected programs:     53228 -> 53934 (1.33%)
HURT:                                  360

Cc: <mesa-stable@lists.freedesktop.org>
Reviewed-by: Ian Romanick <ian.d.romanick@intel.com>
src/mesa/drivers/dri/i965/brw_fs_saturate_propagation.cpp
src/mesa/drivers/dri/i965/test_fs_saturate_propagation.cpp

index bc516618c3d3cd983e5e8c07258a61f65f87e803..e406c2899e89c954b257364ad1da979f70de42d9 100644 (file)
@@ -81,12 +81,16 @@ opt_saturate_propagation_local(fs_visitor *v, bblock_t *block)
             break;
          }
          for (int i = 0; i < scan_inst->sources; i++) {
-            if ((scan_inst->opcode != BRW_OPCODE_MOV || !scan_inst->saturate) &&
-                scan_inst->src[i].file == GRF &&
+            if (scan_inst->src[i].file == GRF &&
                 scan_inst->src[i].reg == inst->src[0].reg &&
                 scan_inst->src[i].reg_offset == inst->src[0].reg_offset) {
-               interfered = true;
-               break;
+               if (scan_inst->opcode != BRW_OPCODE_MOV ||
+                   !scan_inst->saturate ||
+                   scan_inst->src[0].abs ||
+                   scan_inst->src[0].negate) {
+                  interfered = true;
+                  break;
+               }
             }
          }
 
index f897bddb7aaee2247f257e385732e24da0de5b01..6f762bcc6e00f75c4c04128cd76de16ba211cbc0 100644 (file)
@@ -393,3 +393,47 @@ TEST_F(saturate_propagation_test, intervening_dest_write)
    EXPECT_EQ(BRW_OPCODE_MOV, instruction(block0, 2)->opcode);
    EXPECT_TRUE(instruction(block0, 2)->saturate);
 }
+
+TEST_F(saturate_propagation_test, mul_neg_mov_sat_mov_sat)
+{
+   fs_reg dst0 = v->vgrf(glsl_type::float_type);
+   fs_reg dst1 = v->vgrf(glsl_type::float_type);
+   fs_reg dst2 = v->vgrf(glsl_type::float_type);
+   fs_reg src0 = v->vgrf(glsl_type::float_type);
+   fs_reg src1 = v->vgrf(glsl_type::float_type);
+   v->emit(BRW_OPCODE_MUL, dst0, src0, src1);
+   dst0.negate = true;
+   v->emit(BRW_OPCODE_MOV, dst1, dst0)
+      ->saturate = true;
+   dst0.negate = false;
+   v->emit(BRW_OPCODE_MOV, dst2, dst0)
+      ->saturate = true;
+
+   /* = Before =
+    *
+    * 0: mul(8)        dst0  src0  src1
+    * 1: mov.sat(8)    dst1  -dst0
+    * 2: mov.sat(8)    dst2  dst0
+    *
+    * = After =
+    * (no changes)
+    */
+
+   v->calculate_cfg();
+   bblock_t *block0 = v->cfg->blocks[0];
+
+   EXPECT_EQ(0, block0->start_ip);
+   EXPECT_EQ(2, block0->end_ip);
+
+   EXPECT_FALSE(saturate_propagation(v));
+   EXPECT_EQ(0, block0->start_ip);
+   EXPECT_EQ(2, block0->end_ip);
+   EXPECT_EQ(BRW_OPCODE_MUL, instruction(block0, 0)->opcode);
+   EXPECT_FALSE(instruction(block0, 0)->saturate);
+   EXPECT_FALSE(instruction(block0, 0)->src[1].negate);
+   EXPECT_EQ(BRW_OPCODE_MOV, instruction(block0, 1)->opcode);
+   EXPECT_TRUE(instruction(block0, 1)->saturate);
+   EXPECT_TRUE(instruction(block0, 1)->src[0].negate);
+   EXPECT_EQ(BRW_OPCODE_MOV, instruction(block0, 2)->opcode);
+   EXPECT_TRUE(instruction(block0, 2)->saturate);
+}