i965/fs: Try to avoid generating extra MOVs to do saturates.
authorEric Anholt <eric@anholt.net>
Sat, 10 Mar 2012 21:48:42 +0000 (13:48 -0800)
committerEric Anholt <eric@anholt.net>
Thu, 12 Apr 2012 01:08:21 +0000 (18:08 -0700)
This change (before the previous two) produced a .23% +/- .11%
performance improvement in Unigine Tropics at 1024x768 on IVB.

Total instructions: 269270 -> 262649
614/2148 programs affected (28.6%)
179386 -> 172765 instructions in affected programs (3.7% reduction)

v2: Move some of the logic of finding the instruction that produced
    the result of an expression tree to a helper.

src/mesa/drivers/dri/i965/brw_fs.cpp
src/mesa/drivers/dri/i965/brw_fs.h
src/mesa/drivers/dri/i965/brw_fs_visitor.cpp

index 5f3d79d1d094660fa5ba9737bfa1ee1b2db5dc59..f9c1483fef87d700a342d196b6083393a43987d7 100644 (file)
@@ -1717,6 +1717,35 @@ fs_visitor::virtual_grf_interferes(int a, int b)
    return start < end;
 }
 
+/**
+ * Possibly returns an instruction that set up @param reg.
+ *
+ * Sometimes we want to take the result of some expression/variable
+ * dereference tree and rewrite the instruction generating the result
+ * of the tree.  When processing the tree, we know that the
+ * instructions generated are all writing temporaries that are dead
+ * outside of this tree.  So, if we have some instructions that write
+ * a temporary, we're free to point that temp write somewhere else.
+ *
+ * Note that this doesn't guarantee that the instruction generated
+ * only reg -- it might be the size=4 destination of a texture instruction.
+ */
+fs_inst *
+fs_visitor::get_instruction_generating_reg(fs_inst *start,
+                                          fs_inst *end,
+                                          fs_reg reg)
+{
+   if (end == start ||
+       end->predicated ||
+       end->force_uncompressed ||
+       end->force_sechalf ||
+       !reg.equals(&end->dst)) {
+      return NULL;
+   } else {
+      return end;
+   }
+}
+
 bool
 fs_visitor::run()
 {
index 7aebffa699d4768620c22a1bf2bbecd06dbd9d8d..d3a1045a604236421b0bd0a0ce6241774db9f10d 100644 (file)
@@ -487,6 +487,9 @@ public:
    }
 
    int type_size(const struct glsl_type *type);
+   fs_inst *get_instruction_generating_reg(fs_inst *start,
+                                          fs_inst *end,
+                                          fs_reg reg);
 
    bool run();
    void setup_paramvalues_refs();
index 00524288f6dad3c517474120db0858e5e2b69190..0cb75f9d5b3cafbeb36eb0f387dfbe562b2881cf 100644 (file)
@@ -172,12 +172,25 @@ fs_visitor::try_emit_saturate(ir_expression *ir)
    if (!sat_val)
       return false;
 
+   fs_inst *pre_inst = (fs_inst *) this->instructions.get_tail();
+
    sat_val->accept(this);
    fs_reg src = this->result;
 
-   this->result = fs_reg(this, ir->type);
-   fs_inst *inst = emit(BRW_OPCODE_MOV, this->result, src);
-   inst->saturate = true;
+   fs_inst *last_inst = (fs_inst *) this->instructions.get_tail();
+
+   /* If the last instruction from our accept() didn't generate our
+    * src, generate a saturated MOV
+    */
+   fs_inst *modify = get_instruction_generating_reg(pre_inst, last_inst, src);
+   if (!modify || modify->regs_written() != 1) {
+      fs_inst *inst = emit(BRW_OPCODE_MOV, this->result, src);
+      inst->saturate = true;
+   } else {
+      modify->saturate = true;
+      this->result = src;
+   }
+
 
    return true;
 }
@@ -591,9 +604,6 @@ fs_visitor::try_rewrite_rhs_to_dst(ir_assignment *ir,
                                    fs_inst *pre_rhs_inst,
                                    fs_inst *last_rhs_inst)
 {
-   if (pre_rhs_inst == last_rhs_inst)
-      return false; /* No instructions generated to work with. */
-
    /* Only attempt if we're doing a direct assignment. */
    if (ir->condition ||
        !(ir->lhs->type->is_scalar() ||
@@ -602,20 +612,20 @@ fs_visitor::try_rewrite_rhs_to_dst(ir_assignment *ir,
       return false;
 
    /* Make sure the last instruction generated our source reg. */
-   if (last_rhs_inst->predicated ||
-       last_rhs_inst->force_uncompressed ||
-       last_rhs_inst->force_sechalf ||
-       !src.equals(&last_rhs_inst->dst))
+   fs_inst *modify = get_instruction_generating_reg(pre_rhs_inst,
+                                                   last_rhs_inst,
+                                                   src);
+   if (!modify)
       return false;
 
    /* If last_rhs_inst wrote a different number of components than our LHS,
     * we can't safely rewrite it.
     */
-   if (ir->lhs->type->vector_elements != last_rhs_inst->regs_written())
+   if (ir->lhs->type->vector_elements != modify->regs_written())
       return false;
 
    /* Success!  Rewrite the instruction. */
-   last_rhs_inst->dst = dst;
+   modify->dst = dst;
 
    return true;
 }