gallivm: minor rho calculation optimization for 1 or 3 coords
authorRoland Scheidegger <sroland@vmware.com>
Wed, 3 Apr 2013 00:49:56 +0000 (02:49 +0200)
committerRoland Scheidegger <sroland@vmware.com>
Wed, 3 Apr 2013 23:03:42 +0000 (01:03 +0200)
Using a different packing for the single coord case should save a shuffle.
Plus some minor style fixes.

Reviewed-by: Jose Fonseca <jfonseca@vmware.com>
src/gallium/auxiliary/gallivm/lp_bld_quad.c
src/gallium/auxiliary/gallivm/lp_bld_sample.c

index 1955add88835179cd3eb855817d53c8a2b1002ac..f2a762aec2adca501be1e15066558a9f680c633d 100644 (file)
@@ -81,7 +81,8 @@ lp_build_ddy(struct lp_build_context *bld,
 /*
  * Helper for building packed ddx/ddy vector for one coord (scalar per quad
  * values). The vector will look like this (8-wide):
- * dr1dx dr1dy _____ _____ dr2dx dr2dy _____ _____
+ * dr1dx _____ -dr1dy _____ dr2dx _____ -dr2dy _____
+ * This only requires one shuffle instead of two for more straightforward packing.
  */
 LLVMValueRef
 lp_build_packed_ddx_ddy_onecoord(struct lp_build_context *bld,
@@ -91,19 +92,15 @@ lp_build_packed_ddx_ddy_onecoord(struct lp_build_context *bld,
    LLVMBuilderRef builder = gallivm->builder;
    LLVMValueRef vec1, vec2;
 
-   /* same packing as _twocoord, but can use aos swizzle helper */
+   /* use aos swizzle helper */
 
-   /*
-    * XXX could make swizzle1 a noop swizzle by using right top/bottom
-    * pair for ddy
-    */
-   static const unsigned char swizzle1[] = {
-      LP_BLD_QUAD_TOP_LEFT, LP_BLD_QUAD_TOP_LEFT,
-      LP_BLD_SWIZZLE_DONTCARE, LP_BLD_SWIZZLE_DONTCARE
+   static const unsigned char swizzle1[] = { /* no-op swizzle */
+      LP_BLD_QUAD_TOP_LEFT, LP_BLD_SWIZZLE_DONTCARE,
+      LP_BLD_QUAD_BOTTOM_LEFT, LP_BLD_SWIZZLE_DONTCARE
    };
    static const unsigned char swizzle2[] = {
-      LP_BLD_QUAD_TOP_RIGHT, LP_BLD_QUAD_BOTTOM_LEFT,
-      LP_BLD_SWIZZLE_DONTCARE, LP_BLD_SWIZZLE_DONTCARE
+      LP_BLD_QUAD_TOP_RIGHT, LP_BLD_SWIZZLE_DONTCARE,
+      LP_BLD_QUAD_TOP_LEFT, LP_BLD_SWIZZLE_DONTCARE
    };
 
    vec1 = lp_build_swizzle_aos(bld, a, swizzle1);
@@ -120,6 +117,7 @@ lp_build_packed_ddx_ddy_onecoord(struct lp_build_context *bld,
  * Helper for building packed ddx/ddy vector for one coord (scalar per quad
  * values). The vector will look like this (8-wide):
  * ds1dx ds1dy dt1dx dt1dy ds2dx ds2dy dt2dx dt2dy
+ * This only needs 2 (v)shufps.
  */
 LLVMValueRef
 lp_build_packed_ddx_ddy_twocoord(struct lp_build_context *bld,
index fc8bae70152bc2bdd257262392222cba047988f6..9a008971c04f2c54e9d9e41c512f1d81565d1f28 100644 (file)
@@ -226,7 +226,6 @@ lp_build_rho(struct lp_build_sample_context *bld,
    LLVMValueRef int_size, float_size;
    LLVMValueRef rho;
    LLVMValueRef first_level, first_level_vec;
-   LLVMValueRef abs_ddx_ddy[2];
    unsigned length = coord_bld->type.length;
    unsigned num_quads = length / 4;
    unsigned i;
@@ -279,32 +278,28 @@ lp_build_rho(struct lp_build_sample_context *bld,
          ddx_ddy[0] = lp_build_packed_ddx_ddy_onecoord(coord_bld, s);
       }
       else if (dims >= 2) {
-         ddx_ddy[0] = lp_build_packed_ddx_ddy_twocoord(coord_bld,
-                                                       s, t);
+         ddx_ddy[0] = lp_build_packed_ddx_ddy_twocoord(coord_bld, s, t);
          if (dims > 2) {
             ddx_ddy[1] = lp_build_packed_ddx_ddy_onecoord(coord_bld, r);
          }
       }
 
-      abs_ddx_ddy[0] = lp_build_abs(coord_bld, ddx_ddy[0]);
+      ddx_ddy[0] = lp_build_abs(coord_bld, ddx_ddy[0]);
       if (dims > 2) {
-         abs_ddx_ddy[1] = lp_build_abs(coord_bld, ddx_ddy[1]);
-      }
-      else {
-         abs_ddx_ddy[1] = NULL;
+         ddx_ddy[1] = lp_build_abs(coord_bld, ddx_ddy[1]);
       }
 
-      if (dims == 1) {
-         static const unsigned char swizzle1[] = {
+      if (dims < 2) {
+         static const unsigned char swizzle1[] = { /* no-op swizzle */
             0, LP_BLD_SWIZZLE_DONTCARE,
             LP_BLD_SWIZZLE_DONTCARE, LP_BLD_SWIZZLE_DONTCARE
          };
          static const unsigned char swizzle2[] = {
-            1, LP_BLD_SWIZZLE_DONTCARE,
+            2, LP_BLD_SWIZZLE_DONTCARE,
             LP_BLD_SWIZZLE_DONTCARE, LP_BLD_SWIZZLE_DONTCARE
          };
-         rho_xvec = lp_build_swizzle_aos(coord_bld, abs_ddx_ddy[0], swizzle1);
-         rho_yvec = lp_build_swizzle_aos(coord_bld, abs_ddx_ddy[0], swizzle2);
+         rho_xvec = lp_build_swizzle_aos(coord_bld, ddx_ddy[0], swizzle1);
+         rho_yvec = lp_build_swizzle_aos(coord_bld, ddx_ddy[0], swizzle2);
       }
       else if (dims == 2) {
          static const unsigned char swizzle1[] = {
@@ -315,8 +310,8 @@ lp_build_rho(struct lp_build_sample_context *bld,
             1, 3,
             LP_BLD_SWIZZLE_DONTCARE, LP_BLD_SWIZZLE_DONTCARE
          };
-         rho_xvec = lp_build_swizzle_aos(coord_bld, abs_ddx_ddy[0], swizzle1);
-         rho_yvec = lp_build_swizzle_aos(coord_bld, abs_ddx_ddy[0], swizzle2);
+         rho_xvec = lp_build_swizzle_aos(coord_bld, ddx_ddy[0], swizzle1);
+         rho_yvec = lp_build_swizzle_aos(coord_bld, ddx_ddy[0], swizzle2);
       }
       else {
          LLVMValueRef shuffles1[LP_MAX_VECTOR_LENGTH];
@@ -329,12 +324,12 @@ lp_build_rho(struct lp_build_sample_context *bld,
             shuffles1[4*i + 3] = i32undef;
             shuffles2[4*i + 0] = lp_build_const_int32(gallivm, 4*i + 1);
             shuffles2[4*i + 1] = lp_build_const_int32(gallivm, 4*i + 3);
-            shuffles2[4*i + 2] = lp_build_const_int32(gallivm, length + 4*i + 1);
+            shuffles2[4*i + 2] = lp_build_const_int32(gallivm, length + 4*i + 2);
             shuffles2[4*i + 3] = i32undef;
          }
-         rho_xvec = LLVMBuildShuffleVector(builder, abs_ddx_ddy[0], abs_ddx_ddy[1],
+         rho_xvec = LLVMBuildShuffleVector(builder, ddx_ddy[0], ddx_ddy[1],
                                            LLVMConstVector(shuffles1, length), "");
-         rho_yvec = LLVMBuildShuffleVector(builder, abs_ddx_ddy[0], abs_ddx_ddy[1],
+         rho_yvec = LLVMBuildShuffleVector(builder, ddx_ddy[0], ddx_ddy[1],
                                            LLVMConstVector(shuffles2, length), "");
       }