util: add mutex lock in u_debug_memory.c code
[mesa.git] / src / gallium / auxiliary / gallivm / lp_bld_format_soa.c
index 433134b809041db8a1fe17a9728f324ff1785ff0..0a57b3ce794c6a9680829f1208ac0144afafaab8 100644 (file)
@@ -26,6 +26,8 @@
  **************************************************************************/
 
 
+#include "pipe/p_defines.h"
+
 #include "util/u_format.h"
 #include "util/u_memory.h"
 #include "util/u_string.h"
 #include "lp_bld_type.h"
 #include "lp_bld_const.h"
 #include "lp_bld_conv.h"
-#include "lp_bld_sample.h" /* for lp_build_gather */
-#include "lp_bld_init.h"
+#include "lp_bld_swizzle.h"
+#include "lp_bld_gather.h"
+#include "lp_bld_debug.h"
 #include "lp_bld_format.h"
 
 
-static LLVMValueRef
-lp_build_format_swizzle_chan_soa(struct lp_type type,
-                                 const LLVMValueRef *unswizzled,
-                                 enum util_format_swizzle swizzle)
-{
-   switch (swizzle) {
-   case UTIL_FORMAT_SWIZZLE_X:
-   case UTIL_FORMAT_SWIZZLE_Y:
-   case UTIL_FORMAT_SWIZZLE_Z:
-   case UTIL_FORMAT_SWIZZLE_W:
-      return unswizzled[swizzle];
-   case UTIL_FORMAT_SWIZZLE_0:
-      return lp_build_zero(type);
-   case UTIL_FORMAT_SWIZZLE_1:
-      return lp_build_one(type);
-   case UTIL_FORMAT_SWIZZLE_NONE:
-      return lp_build_undef(type);
-   default:
-      assert(0);
-      return lp_build_undef(type);
-   }
-}
-
-
 void
 lp_build_format_swizzle_soa(const struct util_format_description *format_desc,
-                            struct lp_type type,
+                            struct lp_build_context *bld,
                             const LLVMValueRef *unswizzled,
-                            LLVMValueRef *swizzled)
+                            LLVMValueRef swizzled_out[4])
 {
-   if(format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) {
+   assert(UTIL_FORMAT_SWIZZLE_0 == PIPE_SWIZZLE_ZERO);
+   assert(UTIL_FORMAT_SWIZZLE_1 == PIPE_SWIZZLE_ONE);
+
+   if (format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) {
+      /*
+       * Return zzz1 for depth-stencil formats.
+       *
+       * XXX: Allow to control the depth swizzle with an additional parameter,
+       * as the caller may wish another depth swizzle, or retain the stencil
+       * value.
+       */
       enum util_format_swizzle swizzle = format_desc->swizzle[0];
-      LLVMValueRef depth = lp_build_format_swizzle_chan_soa(type, unswizzled, swizzle);
-      swizzled[2] = swizzled[1] = swizzled[0] = depth;
-      swizzled[3] = lp_build_one(type);
+      LLVMValueRef depth = lp_build_swizzle_soa_channel(bld, unswizzled, swizzle);
+      swizzled_out[2] = swizzled_out[1] = swizzled_out[0] = depth;
+      swizzled_out[3] = bld->one;
    }
    else {
       unsigned chan;
       for (chan = 0; chan < 4; ++chan) {
          enum util_format_swizzle swizzle = format_desc->swizzle[chan];
-         swizzled[chan] = lp_build_format_swizzle_chan_soa(type, unswizzled, swizzle);
+         swizzled_out[chan] = lp_build_swizzle_soa_channel(bld, unswizzled, swizzle);
       }
    }
 }
@@ -101,19 +90,26 @@ lp_build_format_swizzle_soa(const struct util_format_description *format_desc,
  * It requires that a packed pixel fits into an element of the output
  * channels. The common case is when converting pixel with a depth of 32 bit or
  * less into floats.
+ *
+ * \param format_desc  the format of the 'packed' incoming pixel vector
+ * \param type  the desired type for rgba_out (type.length = n, above)
+ * \param packed  the incoming vector of packed pixels
+ * \param rgba_out  returns the SoA R,G,B,A vectors
  */
 void
-lp_build_unpack_rgba_soa(LLVMBuilderRef builder,
+lp_build_unpack_rgba_soa(struct gallivm_state *gallivm,
                          const struct util_format_description *format_desc,
                          struct lp_type type,
                          LLVMValueRef packed,
-                         LLVMValueRef *rgba)
+                         LLVMValueRef rgba_out[4])
 {
+   LLVMBuilderRef builder = gallivm->builder;
+   struct lp_build_context bld;
    LLVMValueRef inputs[4];
    unsigned start;
    unsigned chan;
 
-   /* FIXME: Support more pixel formats */
+   assert(format_desc->layout == UTIL_FORMAT_LAYOUT_PLAIN);
    assert(format_desc->block.width == 1);
    assert(format_desc->block.height == 1);
    assert(format_desc->block.bits <= type.width);
@@ -121,18 +117,20 @@ lp_build_unpack_rgba_soa(LLVMBuilderRef builder,
    assert(type.floating);
    assert(type.width == 32);
 
+   lp_build_context_init(&bld, gallivm, type);
+
    /* Decode the input vector components */
    start = 0;
    for (chan = 0; chan < format_desc->nr_channels; ++chan) {
-      unsigned width = format_desc->channel[chan].size;
-      unsigned stop = start + width;
+      const unsigned width = format_desc->channel[chan].size;
+      const unsigned stop = start + width;
       LLVMValueRef input;
 
       input = packed;
 
       switch(format_desc->channel[chan].type) {
       case UTIL_FORMAT_TYPE_VOID:
-         input = lp_build_undef(type);
+         input = lp_build_undef(gallivm, type);
          break;
 
       case UTIL_FORMAT_TYPE_UNSIGNED:
@@ -141,7 +139,7 @@ lp_build_unpack_rgba_soa(LLVMBuilderRef builder,
           */
 
          if (start) {
-            input = LLVMBuildLShr(builder, input, lp_build_const_int_vec(type, start), "");
+            input = LLVMBuildLShr(builder, input, lp_build_const_int_vec(gallivm, type, start), "");
          }
 
          /*
@@ -150,7 +148,7 @@ lp_build_unpack_rgba_soa(LLVMBuilderRef builder,
 
          if (stop < format_desc->block.bits) {
             unsigned mask = ((unsigned long long)1 << width) - 1;
-            input = LLVMBuildAnd(builder, input, lp_build_const_int_vec(type, mask), "");
+            input = LLVMBuildAnd(builder, input, lp_build_const_int_vec(gallivm, type, mask), "");
          }
 
          /*
@@ -159,14 +157,15 @@ lp_build_unpack_rgba_soa(LLVMBuilderRef builder,
 
          if (type.floating) {
             if(format_desc->channel[chan].normalized)
-               input = lp_build_unsigned_norm_to_float(builder, width, type, input);
+               input = lp_build_unsigned_norm_to_float(gallivm, width, type, input);
             else
-               input = LLVMBuildSIToFP(builder, input, lp_build_vec_type(type), "");
+               input = LLVMBuildSIToFP(builder, input,
+                                       lp_build_vec_type(gallivm, type), "");
          }
          else {
             /* FIXME */
             assert(0);
-            input = lp_build_undef(type);
+            input = lp_build_undef(gallivm, type);
          }
 
          break;
@@ -178,7 +177,7 @@ lp_build_unpack_rgba_soa(LLVMBuilderRef builder,
 
          if (stop < type.width) {
             unsigned bits = type.width - stop;
-            LLVMValueRef bits_val = lp_build_const_int_vec(type, bits);
+            LLVMValueRef bits_val = lp_build_const_int_vec(gallivm, type, bits);
             input = LLVMBuildShl(builder, input, bits_val, "");
          }
 
@@ -188,7 +187,7 @@ lp_build_unpack_rgba_soa(LLVMBuilderRef builder,
 
          if (format_desc->channel[chan].size < type.width) {
             unsigned bits = type.width - format_desc->channel[chan].size;
-            LLVMValueRef bits_val = lp_build_const_int_vec(type, bits);
+            LLVMValueRef bits_val = lp_build_const_int_vec(gallivm, type, bits);
             input = LLVMBuildAShr(builder, input, bits_val, "");
          }
 
@@ -197,17 +196,17 @@ lp_build_unpack_rgba_soa(LLVMBuilderRef builder,
           */
 
          if (type.floating) {
-            input = LLVMBuildSIToFP(builder, input, lp_build_vec_type(type), "");
+            input = LLVMBuildSIToFP(builder, input, lp_build_vec_type(gallivm, type), "");
             if (format_desc->channel[chan].normalized) {
                double scale = 1.0 / ((1 << (format_desc->channel[chan].size - 1)) - 1);
-               LLVMValueRef scale_val = lp_build_const_vec(type, scale);
-               input = LLVMBuildMul(builder, input, scale_val, "");
+               LLVMValueRef scale_val = lp_build_const_vec(gallivm, type, scale);
+               input = LLVMBuildFMul(builder, input, scale_val, "");
             }
          }
          else {
             /* FIXME */
             assert(0);
-            input = lp_build_undef(type);
+            input = lp_build_undef(gallivm, type);
          }
 
          break;
@@ -217,23 +216,32 @@ lp_build_unpack_rgba_soa(LLVMBuilderRef builder,
             assert(start == 0);
             assert(stop == 32);
             assert(type.width == 32);
-            input = LLVMBuildBitCast(builder, input, lp_build_vec_type(type), "");
+            input = LLVMBuildBitCast(builder, input, lp_build_vec_type(gallivm, type), "");
          }
          else {
             /* FIXME */
             assert(0);
-            input = lp_build_undef(type);
+            input = lp_build_undef(gallivm, type);
          }
          break;
 
       case UTIL_FORMAT_TYPE_FIXED:
-         assert(0);
-         input = lp_build_undef(type);
+         if (type.floating) {
+            double scale = 1.0 / ((1 << (format_desc->channel[chan].size/2)) - 1);
+            LLVMValueRef scale_val = lp_build_const_vec(gallivm, type, scale);
+            input = LLVMBuildSIToFP(builder, input, lp_build_vec_type(gallivm, type), "");
+            input = LLVMBuildFMul(builder, input, scale_val, "");
+         }
+         else {
+            /* FIXME */
+            assert(0);
+            input = lp_build_undef(gallivm, type);
+         }
          break;
 
       default:
          assert(0);
-         input = lp_build_undef(type);
+         input = lp_build_undef(gallivm, type);
          break;
       }
 
@@ -242,33 +250,85 @@ lp_build_unpack_rgba_soa(LLVMBuilderRef builder,
       start = stop;
    }
 
-   lp_build_format_swizzle_soa(format_desc, type, inputs, rgba);
+   lp_build_format_swizzle_soa(format_desc, &bld, inputs, rgba_out);
 }
 
 
+void
+lp_build_rgba8_to_f32_soa(struct gallivm_state *gallivm,
+                          struct lp_type dst_type,
+                          LLVMValueRef packed,
+                          LLVMValueRef *rgba)
+{
+   LLVMBuilderRef builder = gallivm->builder;
+   LLVMValueRef mask = lp_build_const_int_vec(gallivm, dst_type, 0xff);
+   unsigned chan;
+
+   packed = LLVMBuildBitCast(builder, packed,
+                             lp_build_int_vec_type(gallivm, dst_type), "");
+
+   /* Decode the input vector components */
+   for (chan = 0; chan < 4; ++chan) {
+      unsigned start = chan*8;
+      unsigned stop = start + 8;
+      LLVMValueRef input;
+
+      input = packed;
+
+      if (start)
+         input = LLVMBuildLShr(builder, input,
+                               lp_build_const_int_vec(gallivm, dst_type, start), "");
+
+      if (stop < 32)
+         input = LLVMBuildAnd(builder, input, mask, "");
+
+      input = lp_build_unsigned_norm_to_float(gallivm, 8, dst_type, input);
+
+      rgba[chan] = input;
+   }
+}
+
+
+
 /**
- * Fetch a pixel into a SoA.
+ * Fetch a texels from a texture, returning them in SoA layout.
+ *
+ * \param type  the desired return type for 'rgba'.  The vector length
+ *              is the number of texels to fetch
  *
- * i and j are the sub-block pixel coordinates.
+ * \param base_ptr  points to start of the texture image block.  For non-
+ *                  compressed formats, this simply points to the texel.
+ *                  For compressed formats, it points to the start of the
+ *                  compressed data block.
+ *
+ * \param i, j  the sub-block pixel coordinates.  For non-compressed formats
+ *              these will always be (0,0).  For compressed formats, i will
+ *              be in [0, block_width-1] and j will be in [0, block_height-1].
  */
 void
-lp_build_fetch_rgba_soa(LLVMBuilderRef builder,
+lp_build_fetch_rgba_soa(struct gallivm_state *gallivm,
                         const struct util_format_description *format_desc,
                         struct lp_type type,
                         LLVMValueRef base_ptr,
                         LLVMValueRef offset,
                         LLVMValueRef i,
                         LLVMValueRef j,
-                        LLVMValueRef *rgba)
+                        LLVMValueRef rgba_out[4])
 {
+   LLVMBuilderRef builder = gallivm->builder;
 
-   if (format_desc->block.width == 1 &&
+   if (format_desc->layout == UTIL_FORMAT_LAYOUT_PLAIN &&
+       (format_desc->colorspace == UTIL_FORMAT_COLORSPACE_RGB ||
+        format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) &&
+       format_desc->block.width == 1 &&
        format_desc->block.height == 1 &&
-       format_desc->block.bits <= type.width)
+       format_desc->block.bits <= type.width &&
+       (format_desc->channel[0].type != UTIL_FORMAT_TYPE_FLOAT ||
+        format_desc->channel[0].size == 32))
    {
       /*
        * The packed pixel fits into an element of the destination format. Put
-       * the packed pixels into a vector and estract each component for all
+       * the packed pixels into a vector and extract each component for all
        * vector elements in parallel.
        */
 
@@ -276,8 +336,9 @@ lp_build_fetch_rgba_soa(LLVMBuilderRef builder,
 
       /*
        * gather the texels from the texture
+       * Ex: packed = {BGRA, BGRA, BGRA, BGRA}.
        */
-      packed = lp_build_gather(builder,
+      packed = lp_build_gather(gallivm,
                                type.length,
                                format_desc->block.bits,
                                type.width,
@@ -286,96 +347,90 @@ lp_build_fetch_rgba_soa(LLVMBuilderRef builder,
       /*
        * convert texels to float rgba
        */
-      lp_build_unpack_rgba_soa(builder,
+      lp_build_unpack_rgba_soa(gallivm,
                                format_desc,
                                type,
-                               packed, rgba);
+                               packed, rgba_out);
+      return;
    }
-   else {
-      /*
-       * Fallback to calling util_format_description::fetch_float for each
-       * pixel.
-       *
-       * This is definitely not the most efficient way of fetching pixels, as
-       * we miss the opportunity to do vectorization, but this it is a
-       * convenient for formats or scenarios for which there was no opportunity
-       * or incentive to optimize.
-       */
 
-      LLVMModuleRef module = LLVMGetGlobalParent(LLVMGetBasicBlockParent(LLVMGetInsertBlock(builder)));
-      char name[256];
-      LLVMValueRef function;
-      LLVMValueRef tmp;
-      unsigned k, chan;
+   /*
+    * Try calling lp_build_fetch_rgba_aos for all pixels.
+    */
 
-      assert(type.floating);
+   if (util_format_fits_8unorm(format_desc) &&
+       type.floating && type.width == 32 && type.length == 4) {
+      struct lp_type tmp_type;
+      LLVMValueRef tmp;
 
-      util_snprintf(name, sizeof name, "util_format_%s_fetch_float", format_desc->short_name);
+      memset(&tmp_type, 0, sizeof tmp_type);
+      tmp_type.width = 8;
+      tmp_type.length = type.length * 4;
+      tmp_type.norm = TRUE;
 
-      /*
-       * Declare and bind format_desc->fetch_float().
-       */
+      tmp = lp_build_fetch_rgba_aos(gallivm, format_desc, tmp_type,
+                                    base_ptr, offset, i, j);
 
-      function = LLVMGetNamedFunction(module, name);
-      if (!function) {
-         LLVMTypeRef ret_type;
-         LLVMTypeRef arg_types[4];
-         LLVMTypeRef function_type;
+      lp_build_rgba8_to_f32_soa(gallivm,
+                                type,
+                                tmp,
+                                rgba_out);
 
-         ret_type = LLVMVoidType();
-         arg_types[0] = LLVMPointerType(LLVMFloatType(), 0);
-         arg_types[1] = LLVMPointerType(LLVMInt8Type(), 0);
-         arg_types[3] = arg_types[2] = LLVMIntType(sizeof(unsigned) * 8);
-         function_type = LLVMFunctionType(ret_type, arg_types, Elements(arg_types), 0);
-         function = LLVMAddFunction(module, name, function_type);
+      return;
+   }
 
-         LLVMSetFunctionCallConv(function, LLVMCCallConv);
-         LLVMSetLinkage(function, LLVMExternalLinkage);
+   /*
+    * Fallback to calling lp_build_fetch_rgba_aos for each pixel.
+    *
+    * This is not the most efficient way of fetching pixels, as we
+    * miss some opportunities to do vectorization, but this is
+    * convenient for formats or scenarios for which there was no
+    * opportunity or incentive to optimize.
+    */
 
-         assert(LLVMIsDeclaration(function));
+   {
+      unsigned k, chan;
+      struct lp_type tmp_type;
 
-         LLVMAddGlobalMapping(lp_build_engine, function, format_desc->fetch_float);
+      if (gallivm_debug & GALLIVM_DEBUG_PERF) {
+         debug_printf("%s: scalar unpacking of %s\n",
+                      __FUNCTION__, format_desc->short_name);
       }
 
+      tmp_type = type;
+      tmp_type.length = 4;
+
       for (chan = 0; chan < 4; ++chan) {
-         rgba[chan] = lp_build_undef(type);
+         rgba_out[chan] = lp_build_undef(gallivm, type);
       }
 
-      tmp = LLVMBuildArrayAlloca(builder,
-                                 LLVMFloatType(),
-                                 LLVMConstInt(LLVMInt32Type(), 4, 0),
-                                 "");
-
-      /*
-       * Invoke format_desc->fetch_float() for each pixel and insert the result
-       * in the SoA vectors.
-       */
-
+      /* loop over number of pixels */
       for(k = 0; k < type.length; ++k) {
-         LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), k, 0);
+         LLVMValueRef index = lp_build_const_int32(gallivm, k);
          LLVMValueRef offset_elem;
-         LLVMValueRef ptr;
          LLVMValueRef i_elem, j_elem;
-         LLVMValueRef args[4];
+         LLVMValueRef tmp;
 
-         offset_elem = LLVMBuildExtractElement(builder, offset, index, "");
-         ptr = LLVMBuildGEP(builder, base_ptr, &offset_elem, 1, "");
+         offset_elem = LLVMBuildExtractElement(builder, offset,
+                                               index, "");
 
          i_elem = LLVMBuildExtractElement(builder, i, index, "");
          j_elem = LLVMBuildExtractElement(builder, j, index, "");
 
-         args[0] = tmp;
-         args[1] = ptr;
-         args[2] = i_elem;
-         args[3] = j_elem;
-
-         LLVMBuildCall(builder, function, args, 4, "");
+         /* Get a single float[4]={R,G,B,A} pixel */
+         tmp = lp_build_fetch_rgba_aos(gallivm, format_desc, tmp_type,
+                                       base_ptr, offset_elem,
+                                       i_elem, j_elem);
 
+         /*
+          * Insert the AoS tmp value channels into the SoA result vectors at
+          * position = 'index'.
+          */
          for (chan = 0; chan < 4; ++chan) {
-            LLVMValueRef chan_val = LLVMConstInt(LLVMInt32Type(), chan, 0),
-            tmp_chan = LLVMBuildGEP(builder, tmp, &chan_val, 1, "");
-            tmp_chan = LLVMBuildLoad(builder, tmp_chan, "");
-            rgba[chan] = LLVMBuildInsertElement(builder, rgba[chan], tmp_chan, index, "");
+            LLVMValueRef chan_val = lp_build_const_int32(gallivm, chan),
+            tmp_chan = LLVMBuildExtractElement(builder, tmp, chan_val, "");
+            rgba_out[chan] = LLVMBuildInsertElement(builder, rgba_out[chan],
+                                                    tmp_chan, index, "");
          }
       }
    }