gallivm: fix indirect addressing of constant buffer

[mesa.git] / src / gallium / auxiliary / gallivm / lp_bld_format_soa.c
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c

index c5de5ce72d9c3c564f52770bebc816e7975a2475..9f405921b0a51f355b48fad74114e14c295ce22b 100644 (file)
--- a/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c
@@ -36,7 +36,7 @@
  #include "lp_bld_const.h"
  #include "lp_bld_conv.h"
  #include "lp_bld_swizzle.h"
-#include "lp_bld_sample.h" /* for lp_build_gather */
+#include "lp_bld_gather.h"
  #include "lp_bld_format.h"
  
  
@@ -44,7 +44,7 @@ void
  lp_build_format_swizzle_soa(const struct util_format_description *format_desc,
                              struct lp_build_context *bld,
                              const LLVMValueRef *unswizzled,
-                            LLVMValueRef *swizzled)
+                            LLVMValueRef swizzled_out[4])
  {
     assert(UTIL_FORMAT_SWIZZLE_0 == PIPE_SWIZZLE_ZERO);
     assert(UTIL_FORMAT_SWIZZLE_1 == PIPE_SWIZZLE_ONE);
@@ -59,14 +59,14 @@ lp_build_format_swizzle_soa(const struct util_format_description *format_desc,
         */
        enum util_format_swizzle swizzle = format_desc->swizzle[0];
        LLVMValueRef depth = lp_build_swizzle_soa_channel(bld, unswizzled, swizzle);
-      swizzled[2] = swizzled[1] = swizzled[0] = depth;
-      swizzled[3] = bld->one;
+      swizzled_out[2] = swizzled_out[1] = swizzled_out[0] = depth;
+      swizzled_out[3] = bld->one;
     }
     else {
        unsigned chan;
        for (chan = 0; chan < 4; ++chan) {
           enum util_format_swizzle swizzle = format_desc->swizzle[chan];
-         swizzled[chan] = lp_build_swizzle_soa_channel(bld, unswizzled, swizzle);
+         swizzled_out[chan] = lp_build_swizzle_soa_channel(bld, unswizzled, swizzle);
        }
     }
  }
@@ -89,13 +89,18 @@ lp_build_format_swizzle_soa(const struct util_format_description *format_desc,
   * It requires that a packed pixel fits into an element of the output
   * channels. The common case is when converting pixel with a depth of 32 bit or
   * less into floats.
+ *
+ * \param format_desc  the format of the 'packed' incoming pixel vector
+ * \param type  the desired type for rgba_out (type.length = n, above)
+ * \param packed  the incoming vector of packed pixels
+ * \param rgba_out  returns the SoA R,G,B,A vectors
   */
  void
  lp_build_unpack_rgba_soa(LLVMBuilderRef builder,
                           const struct util_format_description *format_desc,
                           struct lp_type type,
                           LLVMValueRef packed,
-                         LLVMValueRef *rgba)
+                         LLVMValueRef rgba_out[4])
  {
     struct lp_build_context bld;
     LLVMValueRef inputs[4];
@@ -115,8 +120,8 @@ lp_build_unpack_rgba_soa(LLVMBuilderRef builder,
     /* Decode the input vector components */
     start = 0;
     for (chan = 0; chan < format_desc->nr_channels; ++chan) {
-      unsigned width = format_desc->channel[chan].size;
-      unsigned stop = start + width;
+      const unsigned width = format_desc->channel[chan].size;
+      const unsigned stop = start + width;
        LLVMValueRef input;
  
        input = packed;
@@ -242,14 +247,59 @@ lp_build_unpack_rgba_soa(LLVMBuilderRef builder,
        start = stop;
     }
  
-   lp_build_format_swizzle_soa(format_desc, &bld, inputs, rgba);
+   lp_build_format_swizzle_soa(format_desc, &bld, inputs, rgba_out);
+}
+
+
+void
+lp_build_rgba8_to_f32_soa(LLVMBuilderRef builder,
+                          struct lp_type dst_type,
+                          LLVMValueRef packed,
+                          LLVMValueRef *rgba)
+{
+   LLVMValueRef mask = lp_build_const_int_vec(dst_type, 0xff);
+   unsigned chan;
+
+   packed = LLVMBuildBitCast(builder, packed,
+                             lp_build_int_vec_type(dst_type), "");
+
+   /* Decode the input vector components */
+   for (chan = 0; chan < 4; ++chan) {
+      unsigned start = chan*8;
+      unsigned stop = start + 8;
+      LLVMValueRef input;
+
+      input = packed;
+
+      if (start)
+         input = LLVMBuildLShr(builder, input,
+                               lp_build_const_int_vec(dst_type, start), "");
+
+      if (stop < 32)
+         input = LLVMBuildAnd(builder, input, mask, "");
+
+      input = lp_build_unsigned_norm_to_float(builder, 8, dst_type, input);
+
+      rgba[chan] = input;
+   }
  }
  
  
+
  /**
- * Fetch a pixel into a SoA.
+ * Fetch a texels from a texture, returning them in SoA layout.
+ *
+ * \param type  the desired return type for 'rgba'.  The vector length
+ *              is the number of texels to fetch
+ *
+ * \param base_ptr  points to start of the texture image block.  For non-
+ *                  compressed formats, this simply points to the texel.
+ *                  For compressed formats, it points to the start of the
+ *                  compressed data block.
   *
- * i and j are the sub-block pixel coordinates.
+ * \param i, j  the sub-block pixel coordinates.  For non-compressed formats
+ *              these will always be (0,0).  For compressed formats, i will
+ *              be in [0, block_width-1] and j will be in [0, block_height-1].
   */
  void
  lp_build_fetch_rgba_soa(LLVMBuilderRef builder,
@@ -259,7 +309,7 @@ lp_build_fetch_rgba_soa(LLVMBuilderRef builder,
                          LLVMValueRef offset,
                          LLVMValueRef i,
                          LLVMValueRef j,
-                        LLVMValueRef *rgba)
+                        LLVMValueRef rgba_out[4])
  {
  
     if (format_desc->layout == UTIL_FORMAT_LAYOUT_PLAIN &&
@@ -273,7 +323,7 @@ lp_build_fetch_rgba_soa(LLVMBuilderRef builder,
     {
        /*
         * The packed pixel fits into an element of the destination format. Put
-       * the packed pixels into a vector and estract each component for all
+       * the packed pixels into a vector and extract each component for all
         * vector elements in parallel.
         */
  
@@ -281,6 +331,7 @@ lp_build_fetch_rgba_soa(LLVMBuilderRef builder,
  
        /*
         * gather the texels from the texture
+       * Ex: packed = {BGRA, BGRA, BGRA, BGRA}.
         */
        packed = lp_build_gather(builder,
                                 type.length,
@@ -294,49 +345,81 @@ lp_build_fetch_rgba_soa(LLVMBuilderRef builder,
        lp_build_unpack_rgba_soa(builder,
                                 format_desc,
                                 type,
-                               packed, rgba);
+                               packed, rgba_out);
+      return;
     }
-   else {
-      /*
-       * Fallback to calling lp_build_fetch_rgba_aos for each pixel.
-       *
-       * This is not the most efficient way of fetching pixels, as
-       * we miss some opportunities to do vectorization, but this it is a
-       * convenient for formats or scenarios for which there was no opportunity
-       * or incentive to optimize.
-       */
  
+   /*
+    * Try calling lp_build_fetch_rgba_aos for all pixels.
+    */
+
+   if (util_format_fits_8unorm(format_desc) &&
+       type.floating && type.width == 32 && type.length == 4) {
+      struct lp_type tmp_type;
+      LLVMValueRef tmp;
+
+      memset(&tmp_type, 0, sizeof tmp_type);
+      tmp_type.width = 8;
+      tmp_type.length = type.length * 4;
+      tmp_type.norm = TRUE;
+
+      tmp = lp_build_fetch_rgba_aos(builder, format_desc, tmp_type,
+                                    base_ptr, offset, i, j);
+
+      lp_build_rgba8_to_f32_soa(builder,
+                                type,
+                                tmp,
+                                rgba_out);
+
+      return;
+   }
+
+   /*
+    * Fallback to calling lp_build_fetch_rgba_aos for each pixel.
+    *
+    * This is not the most efficient way of fetching pixels, as we
+    * miss some opportunities to do vectorization, but this is
+    * convenient for formats or scenarios for which there was no
+    * opportunity or incentive to optimize.
+    */
+
+   {
        unsigned k, chan;
+      struct lp_type tmp_type;
  
-      assert(type.floating);
+      tmp_type = type;
+      tmp_type.length = 4;
  
        for (chan = 0; chan < 4; ++chan) {
-         rgba[chan] = lp_build_undef(type);
+         rgba_out[chan] = lp_build_undef(type);
        }
  
+      /* loop over number of pixels */
        for(k = 0; k < type.length; ++k) {
           LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), k, 0);
           LLVMValueRef offset_elem;
-         LLVMValueRef ptr;
           LLVMValueRef i_elem, j_elem;
           LLVMValueRef tmp;
  
           offset_elem = LLVMBuildExtractElement(builder, offset, index, "");
-         ptr = LLVMBuildGEP(builder, base_ptr, &offset_elem, 1, "");
  
           i_elem = LLVMBuildExtractElement(builder, i, index, "");
           j_elem = LLVMBuildExtractElement(builder, j, index, "");
  
-         tmp = lp_build_fetch_rgba_aos(builder, format_desc, ptr, i_elem, j_elem);
+         /* Get a single float[4]={R,G,B,A} pixel */
+         tmp = lp_build_fetch_rgba_aos(builder, format_desc, tmp_type,
+                                       base_ptr, offset_elem,
+                                       i_elem, j_elem);
  
           /*
-          * AoS to SoA
+          * Insert the AoS tmp value channels into the SoA result vectors at
+          * position = 'index'.
            */
-
           for (chan = 0; chan < 4; ++chan) {
              LLVMValueRef chan_val = LLVMConstInt(LLVMInt32Type(), chan, 0),
              tmp_chan = LLVMBuildExtractElement(builder, tmp, chan_val, "");
-            rgba[chan] = LLVMBuildInsertElement(builder, rgba[chan], tmp_chan, index, "");
+            rgba_out[chan] = LLVMBuildInsertElement(builder, rgba_out[chan],
+                                                    tmp_chan, index, "");
           }
        }
     }