i915g: Don't generate useless swizzles before texture accesses.
authorStéphane Marchesin <marcheu@chromium.org>
Fri, 23 Sep 2011 02:24:07 +0000 (19:24 -0700)
committerStéphane Marchesin <marcheu@chromium.org>
Fri, 23 Sep 2011 02:26:33 +0000 (19:26 -0700)
That helps reduce the number of texture indirections, which are very limited on i915.

src/gallium/drivers/i915/i915_fpc.h
src/gallium/drivers/i915/i915_fpc_emit.c
src/gallium/drivers/i915/i915_fpc_translate.c

index 41bf5161b0b64a4eef574864e798a2186f9260d4..26cf2fbbe1072470adbea2e19ca48d8be10005eb 100644 (file)
@@ -169,7 +169,10 @@ extern void i915_release_utemps(struct i915_fp_compile *p);
 extern uint i915_emit_texld(struct i915_fp_compile *p,
                               uint dest,
                               uint destmask,
-                              uint sampler, uint coord, uint op);
+                              uint sampler,
+                              uint coord,
+                              uint op,
+                              uint num_coord);
 
 extern uint i915_emit_arith(struct i915_fp_compile *p,
                               uint op,
index c4a42df7882f9bfa0fbbc39ae928f9eaa7106705..d29539623297e3e5ff67088c788a63043c0d3f35 100644 (file)
@@ -216,16 +216,36 @@ i915_emit_arith(struct i915_fp_compile * p,
  * \param opcode  the instruction opcode
  */
 uint i915_emit_texld( struct i915_fp_compile *p,
-                       uint dest,
-                       uint destmask,
-                       uint sampler,
-                       uint coord,
-                       uint opcode )
+                      uint dest,
+                      uint destmask,
+                      uint sampler,
+                      uint coord,
+                      uint opcode,
+                      uint num_coord )
 {
    const uint k = UREG(GET_UREG_TYPE(coord), GET_UREG_NR(coord));
+
    int temp = -1;
+   uint ignore = 0;
+
+   /* Eliminate the useless texture coordinates. Otherwise we end up generating
+    * a swizzle for no reason below. */
+   switch(num_coord) {
+      case 0:
+         /* Ignore x */
+         ignore |= (0xf << UREG_CHANNEL_X_SHIFT);
+      case 1:
+         /* Ignore y */
+         ignore |= (0xf << UREG_CHANNEL_Y_SHIFT);
+      case 2:
+         /* Ignore z */
+         ignore |= (0xf << UREG_CHANNEL_Z_SHIFT);
+      case 3:
+         /* Ignore w */
+         ignore |= (0xf << UREG_CHANNEL_W_SHIFT);
+   }
 
-   if (coord != k) {
+   if ( (coord &~ignore ) != (k & ~ignore) ) {
       /* texcoord is swizzled or negated.  Need to allocate a new temporary
        * register (a utemp / unpreserved temp) won't do.
        */
@@ -248,7 +268,7 @@ uint i915_emit_texld( struct i915_fp_compile *p,
    if (destmask != A0_DEST_CHANNEL_ALL) {
       /* if not writing to XYZW... */
       uint tmp = i915_get_utemp(p);
-      i915_emit_texld( p, tmp, A0_DEST_CHANNEL_ALL, sampler, coord, opcode );
+      i915_emit_texld( p, tmp, A0_DEST_CHANNEL_ALL, sampler, coord, opcode, num_coord );
       i915_emit_arith( p, A0_MOV, dest, destmask, 0, tmp, 0, 0 );
       /* XXX release utemp here? */
    }
index 641ab3c81077024669af7bcf539e1fc9413a67f8..b383a7476b8164fe7e2a0e23b638394fc1553c6d 100644 (file)
@@ -372,6 +372,33 @@ translate_tex_src_target(struct i915_fp_compile *p, uint tex)
    }
 }
 
+/**
+ * Convert TGSI_TEXTURE_x token to DO_SAMPLE_TYPE_x token
+ */
+static uint
+texture_num_coords(struct i915_fp_compile *p, uint tex)
+{
+   switch (tex) {
+   case TGSI_TEXTURE_SHADOW1D:
+   case TGSI_TEXTURE_1D:
+      return 1;
+
+   case TGSI_TEXTURE_SHADOW2D:
+   case TGSI_TEXTURE_2D:
+   case TGSI_TEXTURE_SHADOWRECT:
+   case TGSI_TEXTURE_RECT:
+      return 2;
+
+   case TGSI_TEXTURE_3D:
+   case TGSI_TEXTURE_CUBE:
+      return 3;
+
+   default:
+      i915_program_error(p, "Num coords");
+      return 2;
+   }
+}
+
 
 /**
  * Generate texel lookup instruction.
@@ -393,7 +420,8 @@ emit_tex(struct i915_fp_compile *p,
                     get_result_flags( inst ),
                     sampler,
                     coord,
-                    opcode);
+                    opcode,
+                    texture_num_coords(p, texture) );
 }
 
 
@@ -622,7 +650,8 @@ i915_translate_instruction(struct i915_fp_compile *p,
                       A0_DEST_CHANNEL_ALL,   /* dest writemask */
                       0,                     /* sampler */
                       src0,                  /* coord*/
-                      T0_TEXKILL);           /* opcode */
+                      T0_TEXKILL,            /* opcode */
+                      1);                    /* num_coord */
       break;
 
    case TGSI_OPCODE_KILP: