gallivm: Support extended swizzles in lp_build_swizzle1_aos().
authorJosé Fonseca <jfonseca@vmware.com>
Thu, 1 Jul 2010 11:16:09 +0000 (12:16 +0100)
committerJosé Fonseca <jfonseca@vmware.com>
Thu, 1 Jul 2010 14:02:12 +0000 (15:02 +0100)
And rename to lp_build_swizzle_aos().

src/gallium/auxiliary/gallivm/lp_bld_quad.c
src/gallium/auxiliary/gallivm/lp_bld_swizzle.c
src/gallium/auxiliary/gallivm/lp_bld_swizzle.h

index 38fd5a39efa5c26b526aaceba313f3321180e4fd..ca36046d222a508ef8db024ec3ba0018630b9767 100644 (file)
@@ -61,8 +61,8 @@ LLVMValueRef
 lp_build_ddx(struct lp_build_context *bld,
              LLVMValueRef a)
 {
-   LLVMValueRef a_left  = lp_build_swizzle1_aos(bld, a, swizzle_left);
-   LLVMValueRef a_right = lp_build_swizzle1_aos(bld, a, swizzle_right);
+   LLVMValueRef a_left  = lp_build_swizzle_aos(bld, a, swizzle_left);
+   LLVMValueRef a_right = lp_build_swizzle_aos(bld, a, swizzle_right);
    return lp_build_sub(bld, a_right, a_left);
 }
 
@@ -71,8 +71,8 @@ LLVMValueRef
 lp_build_ddy(struct lp_build_context *bld,
              LLVMValueRef a)
 {
-   LLVMValueRef a_top    = lp_build_swizzle1_aos(bld, a, swizzle_top);
-   LLVMValueRef a_bottom = lp_build_swizzle1_aos(bld, a, swizzle_bottom);
+   LLVMValueRef a_top    = lp_build_swizzle_aos(bld, a, swizzle_top);
+   LLVMValueRef a_bottom = lp_build_swizzle_aos(bld, a, swizzle_bottom);
    return lp_build_sub(bld, a_bottom, a_top);
 }
 
index 53705fa0822987c97052e9245503de1afae61141..20cf96ca66991ed28303aecf1fa47cb1dcd4da65 100644 (file)
@@ -110,7 +110,7 @@ lp_build_broadcast_aos(struct lp_build_context *bld,
    /* XXX: SSE3 has PSHUFB which should be better than bitmasks, but forcing
     * using shuffles here actually causes worst results. More investigation is
     * needed. */
-   if (n <= 4) {
+   if (type.width >= 16) {
       /*
        * Shuffle.
        */
@@ -132,7 +132,7 @@ lp_build_broadcast_aos(struct lp_build_context *bld,
        *   YY00 YY00 .... YY00
        *   YYYY YYYY .... YYYY  <= output
        */
-      struct lp_type type4 = type;
+      struct lp_type type4;
       const char shifts[4][2] = {
          { 1,  2},
          {-1,  2},
@@ -147,6 +147,13 @@ lp_build_broadcast_aos(struct lp_build_context *bld,
 
       a = LLVMBuildAnd(bld->builder, a, lp_build_const_mask_aos(type, cond), "");
 
+      /*
+       * Build a type where each element is an integer that cover the four
+       * channels.
+       */
+
+      type4 = type;
+      type4.floating = FALSE;
       type4.width *= 4;
       type4.length /= 4;
 
@@ -176,31 +183,170 @@ lp_build_broadcast_aos(struct lp_build_context *bld,
 
 
 LLVMValueRef
-lp_build_swizzle1_aos(struct lp_build_context *bld,
-                      LLVMValueRef a,
-                      const unsigned char swizzle[4])
+lp_build_swizzle_aos(struct lp_build_context *bld,
+                     LLVMValueRef a,
+                     const unsigned char swizzles[4])
 {
-   const unsigned n = bld->type.length;
+   const struct lp_type type = bld->type;
+   const unsigned n = type.length;
    unsigned i, j;
 
-   if(a == bld->undef || a == bld->zero || a == bld->one)
+   if (swizzles[0] == PIPE_SWIZZLE_RED &&
+       swizzles[1] == PIPE_SWIZZLE_GREEN &&
+       swizzles[2] == PIPE_SWIZZLE_BLUE &&
+       swizzles[3] == PIPE_SWIZZLE_ALPHA) {
       return a;
+   }
 
-   if(swizzle[0] == swizzle[1] && swizzle[1] == swizzle[2] && swizzle[2] == swizzle[3])
-      return lp_build_broadcast_aos(bld, a, swizzle[0]);
+   if (swizzles[0] == swizzles[1] &&
+       swizzles[1] == swizzles[2] &&
+       swizzles[2] == swizzles[3]) {
+      switch (swizzles[0]) {
+      case PIPE_SWIZZLE_RED:
+      case PIPE_SWIZZLE_GREEN:
+      case PIPE_SWIZZLE_BLUE:
+      case PIPE_SWIZZLE_ALPHA:
+         return lp_build_broadcast_aos(bld, a, swizzles[0]);
+      case PIPE_SWIZZLE_ZERO:
+         return bld->zero;
+      case PIPE_SWIZZLE_ONE:
+         return bld->one;
+      default:
+         assert(0);
+         return bld->undef;
+      }
+   }
 
-   {
+   if (type.width >= 16) {
       /*
        * Shuffle.
        */
-      LLVMTypeRef elem_type = LLVMInt32Type();
+      LLVMValueRef undef = LLVMGetUndef(lp_build_elem_type(type));
+      LLVMTypeRef i32t = LLVMInt32Type();
       LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH];
+      LLVMValueRef aux[LP_MAX_VECTOR_LENGTH];
+
+      memset(aux, 0, sizeof aux);
+
+      for(j = 0; j < n; j += 4) {
+         for(i = 0; i < 4; ++i) {
+            unsigned shuffle;
+            switch (swizzles[i]) {
+            default:
+               assert(0);
+               /* fall through */
+            case PIPE_SWIZZLE_RED:
+            case PIPE_SWIZZLE_GREEN:
+            case PIPE_SWIZZLE_BLUE:
+            case PIPE_SWIZZLE_ALPHA:
+               shuffle = j + swizzles[i];
+               break;
+            case PIPE_SWIZZLE_ZERO:
+               shuffle = type.length + 0;
+               if (!aux[0]) {
+                  aux[0] = lp_build_const_elem(type, 0.0);
+               }
+               break;
+            case PIPE_SWIZZLE_ONE:
+               shuffle = type.length + 1;
+               if (!aux[1]) {
+                  aux[1] = lp_build_const_elem(type, 1.0);
+               }
+               break;
+            }
+            shuffles[j + i] = LLVMConstInt(i32t, shuffle, 0);
+         }
+      }
 
-      for(j = 0; j < n; j += 4)
-         for(i = 0; i < 4; ++i)
-            shuffles[j + i] = LLVMConstInt(elem_type, j + swizzle[i], 0);
+      for (i = 0; i < n; ++i) {
+         if (!aux[i]) {
+            aux[i] = undef;
+         }
+      }
 
-      return LLVMBuildShuffleVector(bld->builder, a, bld->undef, LLVMConstVector(shuffles, n), "");
+      return LLVMBuildShuffleVector(bld->builder, a,
+                                    LLVMConstVector(aux, n),
+                                    LLVMConstVector(shuffles, n), "");
+   } else {
+      /*
+       * Bit mask and shifts.
+       *
+       * For example, this will convert BGRA to RGBA by doing
+       *
+       *   rgba = (bgra & 0x00ff0000) >> 16
+       *        | (bgra & 0xff00ff00)
+       *        | (bgra & 0x000000ff) << 16
+       *
+       * This is necessary not only for faster cause, but because X86 backend
+       * will refuse shuffles of <4 x i8> vectors
+       */
+      LLVMValueRef res;
+      struct lp_type type4;
+      boolean cond[4];
+      unsigned chan;
+      int shift;
+
+      /*
+       * Start with a mixture of 1 and 0.
+       */
+      for (chan = 0; chan < 4; ++chan) {
+         cond[chan] = swizzles[chan] == PIPE_SWIZZLE_ONE ? TRUE : FALSE;
+      }
+      res = lp_build_select_aos(bld, bld->one, bld->zero, cond);
+
+      /*
+       * Build a type where each element is an integer that cover the four
+       * channels.
+       */
+      type4 = type;
+      type4.floating = FALSE;
+      type4.width *= 4;
+      type4.length /= 4;
+
+      a = LLVMBuildBitCast(bld->builder, a, lp_build_vec_type(type4), "");
+      res = LLVMBuildBitCast(bld->builder, res, lp_build_vec_type(type4), "");
+
+      /*
+       * Mask and shift the channels, trying to group as many channels in the
+       * same shift as possible
+       */
+      for (shift = -3; shift <= 3; ++shift) {
+         unsigned long long mask = 0;
+
+         assert(type4.width <= sizeof(mask)*8);
+
+         for (chan = 0; chan < 4; ++chan) {
+            /* FIXME: big endian */
+            if (swizzles[chan] < 4 &&
+                chan - swizzles[chan] == shift) {
+               mask |= ((1ULL << type.width) - 1) << (swizzles[chan] * type.width);
+            }
+         }
+
+         if (mask) {
+            LLVMValueRef masked;
+            LLVMValueRef shifted;
+
+            if (0)
+               debug_printf("shift = %i, mask = 0x%08llx\n", shift, mask);
+
+            masked = LLVMBuildAnd(bld->builder, a,
+                                  lp_build_const_int_vec(type4, mask), "");
+            if (shift > 0) {
+               shifted = LLVMBuildShl(bld->builder, masked,
+                                      lp_build_const_int_vec(type4, shift*type.width), "");
+            } else if (shift < 0) {
+               shifted = LLVMBuildLShr(bld->builder, masked,
+                                       lp_build_const_int_vec(type4, -shift*type.width), "");
+            } else {
+               shifted = masked;
+            }
+
+            res = LLVMBuildOr(bld->builder, res, shifted, "");
+         }
+      }
+
+      return LLVMBuildBitCast(bld->builder, res, lp_build_vec_type(type), "");
    }
 }
 
index 509e97c0ae6168b8733a250e094f526376c24286..315e1bcb548b0a00b1dafe77bda4e8651b215b9d 100644 (file)
@@ -68,12 +68,12 @@ lp_build_broadcast_aos(struct lp_build_context *bld,
 /**
  * Swizzle a vector consisting of an array of XYZW structs.
  *
- * @param swizzle is the in [0,4[ range.
+ * @param swizzles is the in [0,4[ range.
  */
 LLVMValueRef
-lp_build_swizzle1_aos(struct lp_build_context *bld,
-                      LLVMValueRef a,
-                      const unsigned char swizzle[4]);
+lp_build_swizzle_aos(struct lp_build_context *bld,
+                     LLVMValueRef a,
+                     const unsigned char swizzles[4]);
 
 
 LLVMValueRef