i915: Optimize SEQ and SNE when two operands are uniforms

author Ian Romanick <ian.d.romanick@intel.com>

Thu, 22 Aug 2013 02:37:30 +0000 (19:37 -0700)

committer Ian Romanick <ian.d.romanick@intel.com>

Tue, 27 Aug 2013 05:11:26 +0000 (22:11 -0700)
author Ian Romanick <ian.d.romanick@intel.com>
Thu, 22 Aug 2013 02:37:30 +0000 (19:37 -0700)
committer Ian Romanick <ian.d.romanick@intel.com>
Tue, 27 Aug 2013 05:11:26 +0000 (22:11 -0700)
diff --git a/src/mesa/drivers/dri/i915/i915_fragprog.c b/src/mesa/drivers/dri/i915/i915_fragprog.c

index 930c2b876bc9cb4ba50bebafe00ff05ec817112c..67eff7665d1eaf6d0e89d2491e36678b625680f2 100644 (file)
--- a/src/mesa/drivers/dri/i915/i915_fragprog.c
+++ b/src/mesa/drivers/dri/i915/i915_fragprog.c
@@ -817,23 +817,52 @@ upload_program(struct i915_fragment_program *p)
          flags = get_result_flags(inst);
          dst = get_result_vector(p, inst);
  
+         /* If both operands are uniforms or constants, we get 5 instructions
+          * like:
+          *
+          *     U[1] = MOV CONST[1]
+          *     U[0].xyz = SGE CONST[0].xxxx, U[1]
+          *     U[1] = MOV CONST[1].-x-y-z-w
+          *     R[0].xyz = SGE CONST[0].-x-x-x-x, U[1]
+          *     R[0].xyz = MUL R[0], U[0]
+          *
+          * This code is stupid.  Instead of having the individual calls to
+          * i915_emit_arith generate the moves to utemps, do it in the caller.
+          * This results in code like:
+          *
+          *     U[1] = MOV CONST[1]
+          *     U[0].xyz = SGE CONST[0].xxxx, U[1]
+          *     R[0].xyz = SGE CONST[0].-x-x-x-x, U[1].-x-y-z-w
+          *     R[0].xyz = MUL R[0], U[0]
+          */
+         src0 = src_vector(p, &inst->SrcReg[0], program);
+         src1 = src_vector(p, &inst->SrcReg[1], program);
+
+         if (GET_UREG_TYPE(src0) == REG_TYPE_CONST
+             && GET_UREG_TYPE(src1) == REG_TYPE_CONST) {
+            unsigned tmp = i915_get_utemp(p);
+
+            i915_emit_arith(p, A0_MOV, tmp, A0_DEST_CHANNEL_ALL, 0,
+                            src1, 0, 0);
+
+            src1 = tmp;
+         }
+
          /* tmp = src1 >= src2 */
          i915_emit_arith(p,
                          A0_SGE,
                          tmp,
                          flags, 0,
-                        src_vector(p, &inst->SrcReg[0], program),
-                        src_vector(p, &inst->SrcReg[1], program),
+                        src0,
+                        src1,
                          0);
          /* dst = src1 <= src2 */
          i915_emit_arith(p,
                          A0_SGE,
                          dst,
                          flags, 0,
-                        negate(src_vector(p, &inst->SrcReg[0], program),
-                               1, 1, 1, 1),
-                        negate(src_vector(p, &inst->SrcReg[1], program),
-                               1, 1, 1, 1),
+                        negate(src0, 1, 1, 1, 1),
+                        negate(src1, 1, 1, 1, 1),
                          0);
          /* dst = tmp && dst */
          i915_emit_arith(p,
@@ -966,23 +995,52 @@ upload_program(struct i915_fragment_program *p)
          flags = get_result_flags(inst);
          dst = get_result_vector(p, inst);
  
+         /* If both operands are uniforms or constants, we get 5 instructions
+          * like:
+          *
+          *     U[1] = MOV CONST[1]
+          *     U[0].xyz = SLT CONST[0].xxxx, U[1]
+          *     U[1] = MOV CONST[1].-x-y-z-w
+          *     R[0].xyz = SLT CONST[0].-x-x-x-x, U[1]
+          *     R[0].xyz = MUL R[0], U[0]
+          *
+          * This code is stupid.  Instead of having the individual calls to
+          * i915_emit_arith generate the moves to utemps, do it in the caller.
+          * This results in code like:
+          *
+          *     U[1] = MOV CONST[1]
+          *     U[0].xyz = SLT CONST[0].xxxx, U[1]
+          *     R[0].xyz = SLT CONST[0].-x-x-x-x, U[1].-x-y-z-w
+          *     R[0].xyz = MUL R[0], U[0]
+          */
+         src0 = src_vector(p, &inst->SrcReg[0], program);
+         src1 = src_vector(p, &inst->SrcReg[1], program);
+
+         if (GET_UREG_TYPE(src0) == REG_TYPE_CONST
+             && GET_UREG_TYPE(src1) == REG_TYPE_CONST) {
+            unsigned tmp = i915_get_utemp(p);
+
+            i915_emit_arith(p, A0_MOV, tmp, A0_DEST_CHANNEL_ALL, 0,
+                            src1, 0, 0);
+
+            src1 = tmp;
+         }
+
          /* tmp = src1 < src2 */
          i915_emit_arith(p,
                          A0_SLT,
                          tmp,
                          flags, 0,
-                        src_vector(p, &inst->SrcReg[0], program),
-                        src_vector(p, &inst->SrcReg[1], program),
+                        src0,
+                        src1,
                          0);
          /* dst = src1 > src2 */
          i915_emit_arith(p,
                          A0_SLT,
                          dst,
                          flags, 0,
-                        negate(src_vector(p, &inst->SrcReg[0], program),
-                               1, 1, 1, 1),
-                        negate(src_vector(p, &inst->SrcReg[1], program),
-                               1, 1, 1, 1),
+                        negate(src0, 1, 1, 1, 1),
+                        negate(src1, 1, 1, 1, 1),
                          0);
          /* dst = tmp || dst */
          i915_emit_arith(p,
author	Ian Romanick <ian.d.romanick@intel.com>
	Thu, 22 Aug 2013 02:37:30 +0000 (19:37 -0700)
committer	Ian Romanick <ian.d.romanick@intel.com>
	Tue, 27 Aug 2013 05:11:26 +0000 (22:11 -0700)