llvmpipe: Optimize blend swizzles by using bitmasks instead of shuffles for ubytes.
authorJosé Fonseca <jfonseca@vmware.com>
Tue, 4 Aug 2009 11:09:52 +0000 (12:09 +0100)
committerJosé Fonseca <jfonseca@vmware.com>
Sat, 29 Aug 2009 08:21:23 +0000 (09:21 +0100)
src/gallium/drivers/llvmpipe/SConscript
src/gallium/drivers/llvmpipe/lp_bld_blend.c
src/gallium/drivers/llvmpipe/lp_bld_const.c
src/gallium/drivers/llvmpipe/lp_bld_const.h
src/gallium/drivers/llvmpipe/lp_bld_swizzle.c [new file with mode: 0644]
src/gallium/drivers/llvmpipe/lp_bld_swizzle.h [new file with mode: 0644]

index 71c55a93ab9960b5f1de6ac9b699d98c0d21c0f0..85d0a737faac5d8061f153eee46ba0c45836ab86 100644 (file)
@@ -19,6 +19,7 @@ llvmpipe = env.ConvenienceLibrary(
                'lp_bld_loop.c',
                'lp_bld_logicop.c',
                'lp_bld_blend.c',
+               'lp_bld_swizzle.c',
                'lp_bld_type.c',
                'lp_clear.c',
                'lp_context.c',
index 90afe2e6b670dd3c1d7f3a3b3fb871b33dc92206..e070aac378df334049fe4ec2565746ff95e32235 100644 (file)
@@ -43,6 +43,7 @@
 #include "lp_bld_type.h"
 #include "lp_bld_const.h"
 #include "lp_bld_arit.h"
+#include "lp_bld_swizzle.h"
 
 
 /**
@@ -179,67 +180,30 @@ lp_build_blend_swizzle(struct lp_build_blend_context *bld,
                        enum lp_build_blend_swizzle rgb_swizzle,
                        unsigned alpha_swizzle)
 {
-   const unsigned n = bld->base.type.length;
-   LLVMValueRef swizzles[LP_MAX_VECTOR_LENGTH];
-   unsigned i, j;
-
    if(rgb == alpha) {
       if(rgb_swizzle == LP_BUILD_BLEND_SWIZZLE_RGBA)
          return rgb;
-
-      alpha = bld->base.undef;
-   }
-
-   if(rgb_swizzle == LP_BUILD_BLEND_SWIZZLE_RGBA &&
-      !bld->base.type.floating) {
-#if 0
-      /* Use a select */
-      /* FIXME: Unfortunetaly select of vectors do not work */
-
-      for(j = 0; j < n; j += 4)
-         for(i = 0; i < 4; ++i)
-            swizzles[j + i] = LLVMConstInt(LLVMInt1Type(), i == alpha_swizzle ? 0 : 1, 0);
-
-      return LLVMBuildSelect(bld->base.builder, LLVMConstVector(swizzles, n), rgb, alpha, "");
-#else
-      /* XXX: Use a bitmask, as byte shuffles often end up being translated
-       * into many PEXTRB. Ideally LLVM X86 code generation should pick this
-       * automatically for us. */
-
-      for(j = 0; j < n; j += 4)
-         for(i = 0; i < 4; ++i)
-            swizzles[j + i] = LLVMConstInt(LLVMIntType(bld->base.type.width), i == alpha_swizzle ? 0 : ~0, 0);
-
-      /* TODO: Unfortunately constant propagation prevents from using PANDN. And
-       * on SSE4 we have even better -- PBLENDVB */
-      return LLVMBuildOr(bld->base.builder,
-                         LLVMBuildAnd(bld->base.builder, rgb,   LLVMConstVector(swizzles, n), ""),
-                         LLVMBuildAnd(bld->base.builder, alpha, LLVMBuildNot(bld->base.builder, LLVMConstVector(swizzles, n), ""), ""),
-                         "");
-#endif
+      if(rgb_swizzle == LP_BUILD_BLEND_SWIZZLE_AAAA)
+         return lp_build_broadcast_aos(&bld->base, rgb, alpha_swizzle);
    }
-
-   for(j = 0; j < n; j += 4) {
-      for(i = 0; i < 4; ++i) {
-         unsigned swizzle;
-
-         if(i == alpha_swizzle && alpha != bld->base.undef) {
-            /* Take the alpha from the second shuffle argument */
-            swizzle = n + j + alpha_swizzle;
-         }
-         else if (rgb_swizzle == LP_BUILD_BLEND_SWIZZLE_AAAA) {
-            /* Take the alpha from the first shuffle argument */
-            swizzle = j + alpha_swizzle;
-         }
-         else {
-            swizzle = j + i;
-         }
-
-         swizzles[j + i] = LLVMConstInt(LLVMInt32Type(), swizzle, 0);
+   else {
+      if(rgb_swizzle == LP_BUILD_BLEND_SWIZZLE_RGBA) {
+         boolean cond[4] = {0, 0, 0, 0};
+         cond[alpha_swizzle] = 1;
+         return lp_build_select_aos(&bld->base, alpha, rgb, cond);
+      }
+      if(rgb_swizzle == LP_BUILD_BLEND_SWIZZLE_AAAA) {
+         unsigned char swizzle[4];
+         swizzle[0] = alpha_swizzle;
+         swizzle[1] = alpha_swizzle;
+         swizzle[2] = alpha_swizzle;
+         swizzle[3] = alpha_swizzle;
+         swizzle[alpha_swizzle] += 4;
+         return lp_build_swizzle2_aos(&bld->base, rgb, alpha, swizzle);
       }
    }
-
-   return LLVMBuildShuffleVector(bld->base.builder, rgb, alpha, LLVMConstVector(swizzles, n), "");
+   assert(0);
+   return bld->base.undef;
 }
 
 
index 44fcc467f4026443e0c3ede323f7782313b6512b..fe1c627eeebadfb7fe0a88268676202c6707154e 100644 (file)
@@ -143,3 +143,38 @@ lp_build_const_aos(union lp_type type,
 
    return LLVMConstVector(elems, type.length);
 }
+
+
+LLVMValueRef
+lp_build_const_shift(union lp_type type,
+                     int c)
+{
+   LLVMTypeRef elem_type = LLVMIntType(type.width);
+   LLVMValueRef elems[LP_MAX_VECTOR_LENGTH];
+   unsigned i;
+
+   assert(type.length <= LP_MAX_VECTOR_LENGTH);
+
+   for(i = 0; i < type.length; ++i)
+      elems[i] = LLVMConstInt(elem_type, c, 0);
+
+   return LLVMConstVector(elems, type.length);
+}
+
+
+LLVMValueRef
+lp_build_const_mask_aos(union lp_type type,
+                        boolean cond[4])
+{
+   LLVMTypeRef elem_type = LLVMIntType(type.width);
+   LLVMValueRef masks[LP_MAX_VECTOR_LENGTH];
+   unsigned i, j;
+
+   assert(type.length <= LP_MAX_VECTOR_LENGTH);
+
+   for(j = 0; j < type.length; j += 4)
+      for(i = 0; i < 4; ++i)
+         masks[j + i] = LLVMConstInt(elem_type, cond[i] ? ~0 : 0, 0);
+
+   return LLVMConstVector(masks, type.length);
+}
index f2e5deca944cde1d48c57b7d10dabc5cff2a9eaf..98ed8911a554b0c960a99da0092a918a50193f49 100644 (file)
@@ -61,4 +61,14 @@ lp_build_const_aos(union lp_type type,
                    const unsigned char *swizzle);
 
 
+LLVMValueRef
+lp_build_const_shift(union lp_type type,
+                     int c);
+
+
+LLVMValueRef
+lp_build_const_mask_aos(union lp_type type,
+                        boolean cond[4]);
+
+
 #endif /* !LP_BLD_CONST_H */
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_swizzle.c b/src/gallium/drivers/llvmpipe/lp_bld_swizzle.c
new file mode 100644 (file)
index 0000000..0205d17
--- /dev/null
@@ -0,0 +1,264 @@
+/**************************************************************************
+ *
+ * Copyright 2009 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+
+#include "util/u_debug.h"
+
+#include "lp_bld_type.h"
+#include "lp_bld_const.h"
+#include "lp_bld_swizzle.h"
+
+
+LLVMValueRef
+lp_build_broadcast_aos(struct lp_build_context *bld,
+                       LLVMValueRef a,
+                       unsigned channel)
+{
+   const union lp_type type = bld->type;
+   const unsigned n = type.length;
+   unsigned i, j;
+
+   if(a == bld->undef || a == bld->zero || a == bld->one)
+      return a;
+
+   if (n <= 4) {
+      /*
+       * Shuffle.
+       */
+      LLVMTypeRef elem_type = LLVMInt32Type();
+      LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH];
+
+      for(j = 0; j < n; j += 4)
+         for(i = 0; i < 4; ++i)
+            shuffles[j + i] = LLVMConstInt(elem_type, j + channel, 0);
+
+      return LLVMBuildShuffleVector(bld->builder, a, bld->undef, LLVMConstVector(shuffles, n), "");
+   }
+   else {
+      /*
+       * Bit mask and recursive shifts
+       *
+       *   XYZW XYZW .... XYZW
+       *   _Y__ _Y__ .... _Y__
+       *   YY_  YY__ .... YY__
+       *   YYYY YYYY .... YYYY
+       */
+      union lp_type type4 = type;
+      const char shifts[4][2] = {
+         { 1,  2},
+         {-1,  2},
+         { 1, -2},
+         {-1, -2}
+      };
+      boolean cond[4];
+      unsigned i;
+
+      memset(cond, 0, sizeof cond);
+      cond[channel] = 1;
+
+      a = LLVMBuildAnd(bld->builder, a, lp_build_const_mask_aos(type, cond), "");
+
+      type4.width *= 4;
+      type4.length /= 4;
+
+      a = LLVMBuildBitCast(bld->builder, a, lp_build_vec_type(type4), "");
+
+      for(i = 0; i < 2; ++i) {
+         LLVMValueRef tmp = NULL;
+         int shift = shifts[channel][i];
+
+#ifdef PIPE_ARCH_LITTLE_ENDIAN
+         shift = -shift;
+#endif
+
+         if(shift > 0)
+            tmp = LLVMBuildLShr(bld->builder, a, lp_build_const_shift(type4, shift*type.width), "");
+         if(shift < 0)
+            tmp = LLVMBuildShl(bld->builder, a, lp_build_const_shift(type4, -shift*type.width), "");
+
+         assert(tmp);
+         if(tmp)
+            a = LLVMBuildOr(bld->builder, a, tmp, "");
+      }
+
+      return LLVMBuildBitCast(bld->builder, a, lp_build_vec_type(type), "");
+   }
+}
+
+
+LLVMValueRef
+lp_build_select_aos(struct lp_build_context *bld,
+                    LLVMValueRef a,
+                    LLVMValueRef b,
+                    boolean cond[4])
+{
+   const union lp_type type = bld->type;
+   const unsigned n = type.length;
+   unsigned i, j;
+
+   if(a == b)
+      return a;
+   if(cond[0] && cond[1] && cond[2] && cond[3])
+      return a;
+   if(!cond[0] && !cond[1] && !cond[2] && !cond[3])
+      return b;
+   if(a == bld->undef || b == bld->undef)
+      return bld->undef;
+
+   /*
+    * There are three major ways of accomplishing this:
+    * - with a shuffle,
+    * - with a select,
+    * - or with a bit mask.
+    *
+    * Select isn't supported for vector types yet.
+    * The flip between these is empirical and might need to be.
+    */
+   if (n <= 4) {
+      /*
+       * Shuffle.
+       */
+      LLVMTypeRef elem_type = LLVMInt32Type();
+      LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH];
+
+      for(j = 0; j < n; j += 4)
+         for(i = 0; i < 4; ++i)
+            shuffles[j + i] = LLVMConstInt(elem_type, (cond[i] ? 0 : n) + j + i, 0);
+
+      return LLVMBuildShuffleVector(bld->builder, a, b, LLVMConstVector(shuffles, n), "");
+   }
+#if 0
+   else if(0) {
+      /* FIXME: Unfortunately select of vectors do not work */
+      /* Use a select */
+      LLVMTypeRef elem_type = LLVMInt1Type();
+      LLVMValueRef cond[LP_MAX_VECTOR_LENGTH];
+
+      for(j = 0; j < n; j += 4)
+         for(i = 0; i < 4; ++i)
+            cond[j + i] = LLVMConstInt(elem_type, cond[i] ? 1 : 0, 0);
+
+      return LLVMBuildSelect(bld->builder, LLVMConstVector(cond, n), a, b, "");
+   }
+#endif
+   else {
+      LLVMValueRef mask = lp_build_const_mask_aos(type, cond);
+
+      /* TODO: On SSE4 we could do this with a single instruction -- PBLENDVB */
+
+      a = LLVMBuildAnd(bld->builder, a, mask, "");
+
+      /* This often gets translated to PANDN, but sometimes the NOT is
+       * pre-computed and stored in another constant. The best strategy depends
+       * on available registers, so it is not a big deal -- hopefully LLVM does
+       * the right decision attending the rest of the program.
+       */
+      b = LLVMBuildAnd(bld->builder, b, LLVMBuildNot(bld->builder, mask, ""), "");
+
+      return LLVMBuildOr(bld->builder, a, b, "");
+   }
+}
+
+
+LLVMValueRef
+lp_build_swizzle1_aos(struct lp_build_context *bld,
+                      LLVMValueRef a,
+                      unsigned char swizzle[4])
+{
+   const unsigned n = bld->type.length;
+   unsigned i, j;
+
+   if(a == bld->undef || a == bld->zero || a == bld->one)
+      return a;
+
+   if(swizzle[0] == swizzle[1] && swizzle[1] == swizzle[2] && swizzle[2] == swizzle[3])
+      return lp_build_broadcast_aos(bld, a, swizzle[0]);
+
+   {
+      /*
+       * Shuffle.
+       */
+      LLVMTypeRef elem_type = LLVMInt32Type();
+      LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH];
+
+      for(j = 0; j < n; j += 4)
+         for(i = 0; i < 4; ++i)
+            shuffles[j + i] = LLVMConstInt(elem_type, j + swizzle[i], 0);
+
+      return LLVMBuildShuffleVector(bld->builder, a, bld->undef, LLVMConstVector(shuffles, n), "");
+   }
+}
+
+
+LLVMValueRef
+lp_build_swizzle2_aos(struct lp_build_context *bld,
+                      LLVMValueRef a,
+                      LLVMValueRef b,
+                      unsigned char swizzle[4])
+{
+   const unsigned n = bld->type.length;
+   unsigned i, j;
+
+   if(swizzle[0] < 4 && swizzle[1] < 4 && swizzle[2] < 4 && swizzle[3] < 4)
+      return lp_build_swizzle1_aos(bld, a, swizzle);
+
+   if(a == b) {
+      swizzle[0] %= 4;
+      swizzle[1] %= 4;
+      swizzle[2] %= 4;
+      swizzle[3] %= 4;
+      return lp_build_swizzle1_aos(bld, a, swizzle);
+   }
+
+   if(swizzle[0] % 4 == 0 &&
+      swizzle[1] % 4 == 1 &&
+      swizzle[2] % 4 == 2 &&
+      swizzle[3] % 4 == 3) {
+      boolean cond[4];
+      cond[0] = swizzle[0] / 4;
+      cond[1] = swizzle[1] / 4;
+      cond[2] = swizzle[2] / 4;
+      cond[3] = swizzle[3] / 4;
+      return lp_build_select_aos(bld, a, b, cond);
+   }
+
+   {
+      /*
+       * Shuffle.
+       */
+      LLVMTypeRef elem_type = LLVMInt32Type();
+      LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH];
+
+      for(j = 0; j < n; j += 4)
+         for(i = 0; i < 4; ++i)
+            shuffles[j + i] = LLVMConstInt(elem_type, j + (swizzle[i] % 4) + (swizzle[i] / 4 * n), 0);
+
+      return LLVMBuildShuffleVector(bld->builder, a, b, LLVMConstVector(shuffles, n), "");
+   }
+}
+
+
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_swizzle.h b/src/gallium/drivers/llvmpipe/lp_bld_swizzle.h
new file mode 100644 (file)
index 0000000..aeb4f42
--- /dev/null
@@ -0,0 +1,87 @@
+/**************************************************************************
+ *
+ * Copyright 2009 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/**
+ * @file
+ * Helper functions for constant building.
+ *
+ * @author Jose Fonseca <jfonseca@vmware.com>
+ */
+
+
+#ifndef LP_BLD_SWIZZLE_H
+#define LP_BLD_SWIZZLE_H
+
+
+#include <llvm-c/Core.h>  
+
+
+union lp_type type;
+struct lp_build_context;
+
+
+/**
+ * Broadcast one channel of a vector composed of arrays of XYZW structures into
+ * all four channel.
+ */
+LLVMValueRef
+lp_build_broadcast_aos(struct lp_build_context *bld,
+                       LLVMValueRef a,
+                       unsigned channel);
+
+
+LLVMValueRef
+lp_build_select_aos(struct lp_build_context *bld,
+                    LLVMValueRef a,
+                    LLVMValueRef b,
+                    boolean cond[4]);
+
+
+/**
+ * Swizzle a vector consisting of an array of XYZW structs.
+ *
+ * @param swizzle is the in [0,4[ range.
+ */
+LLVMValueRef
+lp_build_swizzle1_aos(struct lp_build_context *bld,
+                      LLVMValueRef a,
+                      unsigned char swizzle[4]);
+
+
+/**
+ * Swizzle two vector consisting of an array of XYZW structs.
+ *
+ * @param swizzle is the in [0,8[ range. Values in [4,8[ range refer to b.
+ */
+LLVMValueRef
+lp_build_swizzle2_aos(struct lp_build_context *bld,
+                      LLVMValueRef a,
+                      LLVMValueRef b,
+                      unsigned char swizzle[4]);
+
+
+#endif /* !LP_BLD_SWIZZLE_H */