gallivm: Fallback to calling util_format_description::fetch_float for any format...
authorJosé Fonseca <jfonseca@vmware.com>
Tue, 6 Apr 2010 21:49:33 +0000 (22:49 +0100)
committerJosé Fonseca <jfonseca@vmware.com>
Tue, 6 Apr 2010 21:52:33 +0000 (22:52 +0100)
src/gallium/auxiliary/gallivm/lp_bld_format.h
src/gallium/auxiliary/gallivm/lp_bld_format_soa.c
src/gallium/auxiliary/gallivm/lp_bld_sample.c
src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c

index 73ab6de3f21f7b8e45a4b194e6c1600fea201958..bb1298ed3f43e39482411a9b5ad9f831aefde80e 100644 (file)
@@ -80,4 +80,15 @@ lp_build_unpack_rgba_soa(LLVMBuilderRef builder,
                          LLVMValueRef *rgba);
 
 
+void
+lp_build_fetch_rgba_soa(LLVMBuilderRef builder,
+                        const struct util_format_description *format_desc,
+                        struct lp_type type,
+                        LLVMValueRef base_ptr,
+                        LLVMValueRef offsets,
+                        LLVMValueRef i,
+                        LLVMValueRef j,
+                        LLVMValueRef *rgba);
+
+
 #endif /* !LP_BLD_FORMAT_H */
index 9f242844e5a454c5026b0c5efb0e660bfb4ac6f9..433134b809041db8a1fe17a9728f324ff1785ff0 100644 (file)
 
 
 #include "util/u_format.h"
+#include "util/u_memory.h"
+#include "util/u_string.h"
 
 #include "lp_bld_type.h"
 #include "lp_bld_const.h"
 #include "lp_bld_conv.h"
+#include "lp_bld_sample.h" /* for lp_build_gather */
+#include "lp_bld_init.h"
 #include "lp_bld_format.h"
 
 
@@ -240,3 +244,139 @@ lp_build_unpack_rgba_soa(LLVMBuilderRef builder,
 
    lp_build_format_swizzle_soa(format_desc, type, inputs, rgba);
 }
+
+
+/**
+ * Fetch a pixel into a SoA.
+ *
+ * i and j are the sub-block pixel coordinates.
+ */
+void
+lp_build_fetch_rgba_soa(LLVMBuilderRef builder,
+                        const struct util_format_description *format_desc,
+                        struct lp_type type,
+                        LLVMValueRef base_ptr,
+                        LLVMValueRef offset,
+                        LLVMValueRef i,
+                        LLVMValueRef j,
+                        LLVMValueRef *rgba)
+{
+
+   if (format_desc->block.width == 1 &&
+       format_desc->block.height == 1 &&
+       format_desc->block.bits <= type.width)
+   {
+      /*
+       * The packed pixel fits into an element of the destination format. Put
+       * the packed pixels into a vector and estract each component for all
+       * vector elements in parallel.
+       */
+
+      LLVMValueRef packed;
+
+      /*
+       * gather the texels from the texture
+       */
+      packed = lp_build_gather(builder,
+                               type.length,
+                               format_desc->block.bits,
+                               type.width,
+                               base_ptr, offset);
+
+      /*
+       * convert texels to float rgba
+       */
+      lp_build_unpack_rgba_soa(builder,
+                               format_desc,
+                               type,
+                               packed, rgba);
+   }
+   else {
+      /*
+       * Fallback to calling util_format_description::fetch_float for each
+       * pixel.
+       *
+       * This is definitely not the most efficient way of fetching pixels, as
+       * we miss the opportunity to do vectorization, but this it is a
+       * convenient for formats or scenarios for which there was no opportunity
+       * or incentive to optimize.
+       */
+
+      LLVMModuleRef module = LLVMGetGlobalParent(LLVMGetBasicBlockParent(LLVMGetInsertBlock(builder)));
+      char name[256];
+      LLVMValueRef function;
+      LLVMValueRef tmp;
+      unsigned k, chan;
+
+      assert(type.floating);
+
+      util_snprintf(name, sizeof name, "util_format_%s_fetch_float", format_desc->short_name);
+
+      /*
+       * Declare and bind format_desc->fetch_float().
+       */
+
+      function = LLVMGetNamedFunction(module, name);
+      if (!function) {
+         LLVMTypeRef ret_type;
+         LLVMTypeRef arg_types[4];
+         LLVMTypeRef function_type;
+
+         ret_type = LLVMVoidType();
+         arg_types[0] = LLVMPointerType(LLVMFloatType(), 0);
+         arg_types[1] = LLVMPointerType(LLVMInt8Type(), 0);
+         arg_types[3] = arg_types[2] = LLVMIntType(sizeof(unsigned) * 8);
+         function_type = LLVMFunctionType(ret_type, arg_types, Elements(arg_types), 0);
+         function = LLVMAddFunction(module, name, function_type);
+
+         LLVMSetFunctionCallConv(function, LLVMCCallConv);
+         LLVMSetLinkage(function, LLVMExternalLinkage);
+
+         assert(LLVMIsDeclaration(function));
+
+         LLVMAddGlobalMapping(lp_build_engine, function, format_desc->fetch_float);
+      }
+
+      for (chan = 0; chan < 4; ++chan) {
+         rgba[chan] = lp_build_undef(type);
+      }
+
+      tmp = LLVMBuildArrayAlloca(builder,
+                                 LLVMFloatType(),
+                                 LLVMConstInt(LLVMInt32Type(), 4, 0),
+                                 "");
+
+      /*
+       * Invoke format_desc->fetch_float() for each pixel and insert the result
+       * in the SoA vectors.
+       */
+
+      for(k = 0; k < type.length; ++k) {
+         LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), k, 0);
+         LLVMValueRef offset_elem;
+         LLVMValueRef ptr;
+         LLVMValueRef i_elem, j_elem;
+         LLVMValueRef args[4];
+
+         offset_elem = LLVMBuildExtractElement(builder, offset, index, "");
+         ptr = LLVMBuildGEP(builder, base_ptr, &offset_elem, 1, "");
+
+         i_elem = LLVMBuildExtractElement(builder, i, index, "");
+         j_elem = LLVMBuildExtractElement(builder, j, index, "");
+
+         args[0] = tmp;
+         args[1] = ptr;
+         args[2] = i_elem;
+         args[3] = j_elem;
+
+         LLVMBuildCall(builder, function, args, 4, "");
+
+         for (chan = 0; chan < 4; ++chan) {
+            LLVMValueRef chan_val = LLVMConstInt(LLVMInt32Type(), chan, 0),
+            tmp_chan = LLVMBuildGEP(builder, tmp, &chan_val, 1, "");
+            tmp_chan = LLVMBuildLoad(builder, tmp_chan, "");
+            rgba[chan] = LLVMBuildInsertElement(builder, rgba[chan], tmp_chan, index, "");
+         }
+      }
+   }
+}
index bb76ad4c6bfcd1a1088059a9e55211688ee1ea93..0744be6b41f5549f48eafa3ccf0d9b5e893faaae 100644 (file)
@@ -157,9 +157,10 @@ lp_build_gather(LLVMBuilderRef builder,
 
 
 /**
- * Compute the offset of a pixel.
+ * Compute the offset of a pixel block.
  *
- * x, y, z, y_stride, z_stride are vectors
+ * x, y, z, y_stride, z_stride are vectors, and they refer to pixel blocks, as
+ * per format description, and not individual pixels.
  */
 LLVMValueRef
 lp_build_sample_offset(struct lp_build_context *bld,
index 5b56f2cf3b7b6cfd31642ca9738c0546b845a6d9..395eaaba2692b585cff2036ff2d8109dae041a28 100644 (file)
@@ -211,7 +211,7 @@ lp_build_sample_texel_soa(struct lp_build_sample_context *bld,
    const int dims = texture_dims(bld->static_state->target);
    struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
    LLVMValueRef offset;
-   LLVMValueRef packed;
+   LLVMValueRef i, j;
    LLVMValueRef use_border = NULL;
 
    /* use_border = x < 0 || x >= width || y < 0 || y >= height */
@@ -248,6 +248,43 @@ lp_build_sample_texel_soa(struct lp_build_sample_context *bld,
       }
    }
 
+   /*
+    * Describe the coordinates in terms of pixel blocks.
+    *
+    * TODO: pixel blocks are power of two. LLVM should convert rem/div to
+    * bit arithmetic. Verify this.
+    */
+
+   if (bld->format_desc->block.width == 1) {
+      i = bld->uint_coord_bld.zero;
+   }
+   else {
+      LLVMValueRef block_width = lp_build_const_int_vec(bld->uint_coord_bld.type, bld->format_desc->block.width);
+      i = LLVMBuildURem(bld->builder, x, block_width, "");
+      x = LLVMBuildUDiv(bld->builder, x, block_width, "");
+   }
+
+   if (bld->format_desc->block.height == 1) {
+      j = bld->uint_coord_bld.zero;
+   }
+   else {
+      LLVMValueRef block_height = lp_build_const_int_vec(bld->uint_coord_bld.type, bld->format_desc->block.height);
+      j = LLVMBuildURem(bld->builder, y, block_height, "");
+      y = LLVMBuildUDiv(bld->builder, y, block_height, "");
+   }
+
+   /* convert x,y,z coords to linear offset from start of texture, in bytes */
+   offset = lp_build_sample_offset(&bld->uint_coord_bld,
+                                   bld->format_desc,
+                                   x, y, z, y_stride, z_stride);
+
+   lp_build_fetch_rgba_soa(bld->builder,
+                           bld->format_desc,
+                           bld->texel_type,
+                           data_ptr, offset,
+                           i, j,
+                           texel);
+
    /*
     * Note: if we find an app which frequently samples the texture border
     * we might want to implement a true conditional here to avoid sampling
@@ -263,30 +300,6 @@ lp_build_sample_texel_soa(struct lp_build_sample_context *bld,
     * the texel color results with the border color.
     */
 
-   /* convert x,y,z coords to linear offset from start of texture, in bytes */
-   offset = lp_build_sample_offset(&bld->uint_coord_bld,
-                                   bld->format_desc,
-                                   x, y, z, y_stride, z_stride);
-
-   assert(bld->format_desc->block.width == 1);
-   assert(bld->format_desc->block.height == 1);
-   assert(bld->format_desc->block.bits <= bld->texel_type.width);
-
-   /* gather the texels from the texture */
-   packed = lp_build_gather(bld->builder,
-                            bld->texel_type.length,
-                            bld->format_desc->block.bits,
-                            bld->texel_type.width,
-                            data_ptr, offset);
-
-   texel[0] = texel[1] = texel[2] = texel[3] = NULL;
-
-   /* convert texels to float rgba */
-   lp_build_unpack_rgba_soa(bld->builder,
-                            bld->format_desc,
-                            bld->texel_type,
-                            packed, texel);
-
    if (use_border) {
       /* select texel color or border color depending on use_border */
       int chan;