i965/fs: Use the embedded compare in SEL on gen6+.
authorEric Anholt <eric@anholt.net>
Thu, 12 May 2011 16:03:24 +0000 (09:03 -0700)
committerEric Anholt <eric@anholt.net>
Tue, 31 May 2011 19:23:50 +0000 (12:23 -0700)
This avoids the extra CMP and the predication on SEL, so in addition
to one less instruction, it makes scheduling less constrained.

Improves glbenchmark Egypt performance 0.6% +/- 0.2% (n=3).  Reduces
FS instruction count across affected shaders in shader-db by 1.3%
without regressing any.

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
src/mesa/drivers/dri/i965/brw_fs.cpp
src/mesa/drivers/dri/i965/brw_fs_visitor.cpp

index 36040c3e0714511c203841dabeb4f2a902925357..09033aecd7c20348c888bd1e2a16320f352850e1 100644 (file)
@@ -1033,12 +1033,16 @@ fs_visitor::propagate_constants()
                  scan_inst->src[i] = inst->src[0];
                  progress = true;
               } else if (i == 0 && scan_inst->src[1].file != IMM) {
-                 /* Fit this constant in by swapping the operands and
-                  * flipping the predicate
-                  */
                  scan_inst->src[0] = scan_inst->src[1];
                  scan_inst->src[1] = inst->src[0];
-                 scan_inst->predicate_inverse = !scan_inst->predicate_inverse;
+
+                 /* If this was predicated, flipping operands means
+                  * we also need to flip the predicate.
+                  */
+                 if (scan_inst->conditional_mod == BRW_CONDITIONAL_NONE) {
+                    scan_inst->predicate_inverse =
+                       !scan_inst->predicate_inverse;
+                 }
                  progress = true;
               }
               break;
index 6e81256cec5f34cff2b8e958b9f81cf887d5ddb0..b4857871c78b75bdb1fe043d503bf4844a891180 100644 (file)
@@ -386,24 +386,34 @@ fs_visitor::visit(ir_expression *ir)
       break;
 
    case ir_binop_min:
-      /* Unalias the destination */
-      this->result = fs_reg(this, ir->type);
+      if (intel->gen >= 6) {
+        inst = emit(BRW_OPCODE_SEL, this->result, op[0], op[1]);
+        inst->conditional_mod = BRW_CONDITIONAL_L;
+      } else {
+        /* Unalias the destination */
+        this->result = fs_reg(this, ir->type);
 
-      inst = emit(BRW_OPCODE_CMP, this->result, op[0], op[1]);
-      inst->conditional_mod = BRW_CONDITIONAL_L;
+        inst = emit(BRW_OPCODE_CMP, this->result, op[0], op[1]);
+        inst->conditional_mod = BRW_CONDITIONAL_L;
 
-      inst = emit(BRW_OPCODE_SEL, this->result, op[0], op[1]);
-      inst->predicated = true;
+        inst = emit(BRW_OPCODE_SEL, this->result, op[0], op[1]);
+        inst->predicated = true;
+      }
       break;
    case ir_binop_max:
-      /* Unalias the destination */
-      this->result = fs_reg(this, ir->type);
+      if (intel->gen >= 6) {
+        inst = emit(BRW_OPCODE_SEL, this->result, op[0], op[1]);
+        inst->conditional_mod = BRW_CONDITIONAL_GE;
+      } else {
+        /* Unalias the destination */
+        this->result = fs_reg(this, ir->type);
 
-      inst = emit(BRW_OPCODE_CMP, this->result, op[0], op[1]);
-      inst->conditional_mod = BRW_CONDITIONAL_G;
+        inst = emit(BRW_OPCODE_CMP, this->result, op[0], op[1]);
+        inst->conditional_mod = BRW_CONDITIONAL_G;
 
-      inst = emit(BRW_OPCODE_SEL, this->result, op[0], op[1]);
-      inst->predicated = true;
+        inst = emit(BRW_OPCODE_SEL, this->result, op[0], op[1]);
+        inst->predicated = true;
+      }
       break;
 
    case ir_binop_pow: