i965/vec4: Emit smarter code for b2f of a comparison
authorIan Romanick <ian.d.romanick@intel.com>
Wed, 11 Jun 2014 01:07:50 +0000 (18:07 -0700)
committerIan Romanick <ian.d.romanick@intel.com>
Wed, 11 Jun 2014 19:00:24 +0000 (12:00 -0700)
Previously we would emit the comparison, emit an AND to mask off extra
bits from the comparison result, then convert the result to float.  Now,
do the comparison, then use a cleverly constructed SEL to pick either
0.0f or 1.0f.

No piglit regressions on Ivybridge.

total instructions in shared programs: 1642311 -> 1639449 (-0.17%)
instructions in affected programs:     136533 -> 133671 (-2.10%)
GAINED:                                0
LOST:                                  0

Programs that are affected appear to save between 1 and 5 instuctions
(just by skimming the output from shader-db report.py.

v2: s/b2i/b2f/ in commit subject (noticed by Chris Forbes).  Remove
extraneous fix_3src_operand (suggested by Matt).  The latter change
required swapping the order of the operands and using predicate_inverse.

Signed-off-by: Ian Romanick <ian.d.romanick@intel.com>
Reviewed-by: Matt Turner <mattst88@gmail.com>
src/mesa/drivers/dri/i965/brw_vec4.h
src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp

index 614ff0b7faa6391facd798e92c2237e6c1628217..20d717a77fbe72dab1fd61c8de6aa3808342ffbb 100644 (file)
@@ -587,6 +587,7 @@ public:
 
    bool try_emit_sat(ir_expression *ir);
    bool try_emit_mad(ir_expression *ir);
+   bool try_emit_b2f_of_compare(ir_expression *ir);
    void resolve_ud_negate(src_reg *reg);
 
    src_reg get_timestamp();
index 249072c64b73ed90a3cbfda35a8dfcce90fcca34..7fd8c2b37619614b770a288f488eeb74eeaf3fc5 100644 (file)
@@ -1126,6 +1126,48 @@ vec4_visitor::try_emit_mad(ir_expression *ir)
    return true;
 }
 
+bool
+vec4_visitor::try_emit_b2f_of_compare(ir_expression *ir)
+{
+   ir_expression *const cmp = ir->operands[0]->as_expression();
+
+   if (cmp == NULL)
+      return false;
+
+   switch (cmp->operation) {
+   case ir_binop_less:
+   case ir_binop_greater:
+   case ir_binop_lequal:
+   case ir_binop_gequal:
+   case ir_binop_equal:
+   case ir_binop_nequal:
+      break;
+
+   default:
+      return false;
+   }
+
+   cmp->operands[0]->accept(this);
+   const src_reg cmp_src0 = this->result;
+
+   cmp->operands[1]->accept(this);
+   const src_reg cmp_src1 = this->result;
+
+   this->result = src_reg(this, ir->type);
+
+   emit(CMP(dst_reg(this->result), cmp_src0, cmp_src1,
+            brw_conditional_for_comparison(cmp->operation)));
+
+   /* If the comparison is false, this->result will just happen to be zero.
+    */
+   vec4_instruction *const inst = emit(BRW_OPCODE_SEL, dst_reg(this->result),
+                                       this->result, src_reg(1.0f));
+   inst->predicate = BRW_PREDICATE_NORMAL;
+   inst->predicate_inverse = true;
+
+   return true;
+}
+
 void
 vec4_visitor::emit_bool_comparison(unsigned int op,
                                 dst_reg dst, src_reg src0, src_reg src1)
@@ -1202,6 +1244,11 @@ vec4_visitor::visit(ir_expression *ir)
         return;
    }
 
+   if (ir->operation == ir_unop_b2f) {
+      if (try_emit_b2f_of_compare(ir))
+        return;
+   }
+
    for (operand = 0; operand < ir->get_num_operands(); operand++) {
       this->result.file = BAD_FILE;
       ir->operands[operand]->accept(this);