gallivm: Universal format support on lp_build_fetch_rgba_aos via util_format_descript...
authorJosé Fonseca <jfonseca@vmware.com>
Tue, 20 Apr 2010 14:21:08 +0000 (16:21 +0200)
committerJosé Fonseca <jfonseca@vmware.com>
Tue, 20 Apr 2010 14:21:08 +0000 (16:21 +0200)
This therefore adds support to half float vertex buffers.

src/gallium/auxiliary/draw/draw_llvm_translate.c
src/gallium/auxiliary/gallivm/lp_bld_format.h
src/gallium/auxiliary/gallivm/lp_bld_format_aos.c
src/gallium/auxiliary/gallivm/lp_bld_format_soa.c
src/gallium/drivers/llvmpipe/lp_test_format.c

index d1c7fa44e12eca6dc27619c82781cc48991a8c89..d7da7ed357d0ef47e7e3f66e07d65b8a4d63983b 100644 (file)
@@ -464,6 +464,7 @@ draw_llvm_translate_from(LLVMBuilderRef builder,
                          enum pipe_format from_format)
 {
    const struct util_format_description *format_desc;
+   LLVMValueRef zero;
    int i;
 
    /*
@@ -491,5 +492,6 @@ draw_llvm_translate_from(LLVMBuilderRef builder,
     */
 
    format_desc = util_format_description(from_format);
-   return lp_build_fetch_rgba_aos(builder, format_desc, vbuffer);
+   zero = LLVMConstNull(LLVMInt32Type());
+   return lp_build_fetch_rgba_aos(builder, format_desc, vbuffer, zero, zero);
 }
index ecf2cfd62c02c593a84a01846fd31a468177a9be..085937588ff260b4a314f36ea3b01e33948afe13 100644 (file)
@@ -59,7 +59,9 @@ lp_build_pack_rgba_aos(LLVMBuilderRef builder,
 LLVMValueRef
 lp_build_fetch_rgba_aos(LLVMBuilderRef builder,
                         const struct util_format_description *format_desc,
-                        LLVMValueRef ptr);
+                        LLVMValueRef ptr,
+                        LLVMValueRef i,
+                        LLVMValueRef j);
 
 
 /*
index 191562d460d84738933d4796330ee1cf85e832e0..5cd5b93bdf6ad0ddf9ec863dab6354ea21c41275 100644 (file)
 
 
 #include "util/u_format.h"
+#include "util/u_memory.h"
 #include "util/u_math.h"
+#include "util/u_string.h"
 
+#include "lp_bld_init.h"
 #include "lp_bld_type.h"
 #include "lp_bld_const.h"
 #include "lp_bld_swizzle.h"
@@ -295,12 +298,17 @@ lp_build_pack_rgba_aos(LLVMBuilderRef builder,
 
 /**
  * Fetch a pixel into a 4 float AoS.
+ *
+ * i and j are the sub-block pixel coordinates.
  */
 LLVMValueRef
 lp_build_fetch_rgba_aos(LLVMBuilderRef builder,
                         const struct util_format_description *format_desc,
-                        LLVMValueRef ptr)
+                        LLVMValueRef ptr,
+                        LLVMValueRef i,
+                        LLVMValueRef j)
 {
+
    if (format_desc->layout == UTIL_FORMAT_LAYOUT_PLAIN &&
        (format_desc->colorspace == UTIL_FORMAT_COLORSPACE_RGB ||
         format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) &&
@@ -309,7 +317,9 @@ lp_build_fetch_rgba_aos(LLVMBuilderRef builder,
        util_is_pot(format_desc->block.bits) &&
        format_desc->block.bits <= 32 &&
        format_desc->is_bitmask &&
-       !format_desc->is_mixed)
+       !format_desc->is_mixed &&
+       (format_desc->channel[0].type == UTIL_FORMAT_TYPE_UNSIGNED ||
+        format_desc->channel[1].type == UTIL_FORMAT_TYPE_UNSIGNED))
    {
       LLVMValueRef packed;
 
@@ -321,6 +331,71 @@ lp_build_fetch_rgba_aos(LLVMBuilderRef builder,
 
       return lp_build_unpack_rgba_aos(builder, format_desc, packed);
    }
+   else if (format_desc->fetch_rgba_float) {
+      /*
+       * Fallback to calling util_format_description::fetch_rgba_float.
+       *
+       * This is definitely not the most efficient way of fetching pixels, as
+       * we miss the opportunity to do vectorization, but this it is a
+       * convenient for formats or scenarios for which there was no opportunity
+       * or incentive to optimize.
+       */
+
+      LLVMModuleRef module = LLVMGetGlobalParent(LLVMGetBasicBlockParent(LLVMGetInsertBlock(builder)));
+      char name[256];
+      LLVMValueRef function;
+      LLVMValueRef tmp;
+      LLVMValueRef args[4];
+
+      util_snprintf(name, sizeof name, "util_format_%s_fetch_rgba_float",
+                    format_desc->short_name);
+
+      /*
+       * Declare and bind format_desc->fetch_rgba_float().
+       */
+
+      function = LLVMGetNamedFunction(module, name);
+      if (!function) {
+         LLVMTypeRef ret_type;
+         LLVMTypeRef arg_types[4];
+         LLVMTypeRef function_type;
+
+         ret_type = LLVMVoidType();
+         arg_types[0] = LLVMPointerType(LLVMFloatType(), 0);
+         arg_types[1] = LLVMPointerType(LLVMInt8Type(), 0);
+         arg_types[3] = arg_types[2] = LLVMIntType(sizeof(unsigned) * 8);
+         function_type = LLVMFunctionType(ret_type, arg_types, Elements(arg_types), 0);
+         function = LLVMAddFunction(module, name, function_type);
+
+         LLVMSetFunctionCallConv(function, LLVMCCallConv);
+         LLVMSetLinkage(function, LLVMExternalLinkage);
+
+         assert(LLVMIsDeclaration(function));
+
+         LLVMAddGlobalMapping(lp_build_engine, function, format_desc->fetch_rgba_float);
+      }
+
+      /*
+       * XXX: this should better go to the first block in the function
+       */
+
+      tmp = LLVMBuildAlloca(builder, LLVMVectorType(LLVMFloatType(), 4), "");
+
+      /*
+       * Invoke format_desc->fetch_rgba_float() for each pixel and insert the result
+       * in the SoA vectors.
+       */
+
+      args[0] = LLVMBuildBitCast(builder, tmp,
+                                 LLVMPointerType(LLVMFloatType(), 0), "");
+      args[1] = ptr;
+      args[2] = i;
+      args[3] = j;
+
+      LLVMBuildCall(builder, function, args, 4, "");
+
+      return LLVMBuildLoad(builder, tmp, "");
+   }
    else {
       assert(0);
       return LLVMGetUndef(LLVMVectorType(LLVMFloatType(), 4));
index 2b66162eb40fdb0fd29a15bd0b56b0b9b3324eb3..c7b20f42012884557aab40cc24a1020edf5fbeff 100644 (file)
@@ -307,70 +307,28 @@ lp_build_fetch_rgba_soa(LLVMBuilderRef builder,
    }
    else {
       /*
-       * Fallback to calling util_format_description::fetch_rgba_float for each
-       * pixel.
+       * Fallback to calling lp_build_fetch_rgba_aos for each pixel.
        *
-       * This is definitely not the most efficient way of fetching pixels, as
-       * we miss the opportunity to do vectorization, but this it is a
+       * This is not the most efficient way of fetching pixels, as
+       * we miss some opportunities to do vectorization, but this it is a
        * convenient for formats or scenarios for which there was no opportunity
        * or incentive to optimize.
        */
 
-      LLVMModuleRef module = LLVMGetGlobalParent(LLVMGetBasicBlockParent(LLVMGetInsertBlock(builder)));
-      char name[256];
-      LLVMValueRef function;
-      LLVMValueRef tmp;
       unsigned k, chan;
 
       assert(type.floating);
 
-      util_snprintf(name, sizeof name, "util_format_%s_fetch_rgba_float", format_desc->short_name);
-
-      /*
-       * Declare and bind format_desc->fetch_rgba_float().
-       */
-
-      function = LLVMGetNamedFunction(module, name);
-      if (!function) {
-         LLVMTypeRef ret_type;
-         LLVMTypeRef arg_types[4];
-         LLVMTypeRef function_type;
-
-         ret_type = LLVMVoidType();
-         arg_types[0] = LLVMPointerType(LLVMFloatType(), 0);
-         arg_types[1] = LLVMPointerType(LLVMInt8Type(), 0);
-         arg_types[3] = arg_types[2] = LLVMIntType(sizeof(unsigned) * 8);
-         function_type = LLVMFunctionType(ret_type, arg_types, Elements(arg_types), 0);
-         function = LLVMAddFunction(module, name, function_type);
-
-         LLVMSetFunctionCallConv(function, LLVMCCallConv);
-         LLVMSetLinkage(function, LLVMExternalLinkage);
-
-         assert(LLVMIsDeclaration(function));
-
-         LLVMAddGlobalMapping(lp_build_engine, function, format_desc->fetch_rgba_float);
-      }
-
       for (chan = 0; chan < 4; ++chan) {
          rgba[chan] = lp_build_undef(type);
       }
 
-      tmp = LLVMBuildArrayAlloca(builder,
-                                 LLVMFloatType(),
-                                 LLVMConstInt(LLVMInt32Type(), 4, 0),
-                                 "");
-
-      /*
-       * Invoke format_desc->fetch_rgba_float() for each pixel and insert the result
-       * in the SoA vectors.
-       */
-
       for(k = 0; k < type.length; ++k) {
          LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), k, 0);
          LLVMValueRef offset_elem;
          LLVMValueRef ptr;
          LLVMValueRef i_elem, j_elem;
-         LLVMValueRef args[4];
+         LLVMValueRef tmp;
 
          offset_elem = LLVMBuildExtractElement(builder, offset, index, "");
          ptr = LLVMBuildGEP(builder, base_ptr, &offset_elem, 1, "");
@@ -378,17 +336,15 @@ lp_build_fetch_rgba_soa(LLVMBuilderRef builder,
          i_elem = LLVMBuildExtractElement(builder, i, index, "");
          j_elem = LLVMBuildExtractElement(builder, j, index, "");
 
-         args[0] = tmp;
-         args[1] = ptr;
-         args[2] = i_elem;
-         args[3] = j_elem;
+         tmp = lp_build_fetch_rgba_aos(builder, format_desc, ptr, i_elem, j_elem);
 
-         LLVMBuildCall(builder, function, args, 4, "");
+         /*
+          * AoS to SoA
+          */
 
          for (chan = 0; chan < 4; ++chan) {
             LLVMValueRef chan_val = LLVMConstInt(LLVMInt32Type(), chan, 0),
-            tmp_chan = LLVMBuildGEP(builder, tmp, &chan_val, 1, "");
-            tmp_chan = LLVMBuildLoad(builder, tmp_chan, "");
+            tmp_chan = LLVMBuildExtractElement(builder, tmp, chan_val, "");
             rgba[chan] = LLVMBuildInsertElement(builder, rgba[chan], tmp_chan, index, "");
          }
       }
index 13c3c3572d6cb5a42ce3b20fc0a14b06cb069317..fbac815d107326512bd2c726577e6cc876e7e971 100644 (file)
 
 #include <stdlib.h>
 #include <stdio.h>
+#include <float.h>
 
 #include "gallivm/lp_bld.h"
+#include "gallivm/lp_bld_init.h"
 #include <llvm-c/Analysis.h>
-#include <llvm-c/ExecutionEngine.h>
 #include <llvm-c/Target.h>
 #include <llvm-c/Transforms/Scalar.h>
 
-#include "util/u_cpu_detect.h"
+#include "util/u_memory.h"
 #include "util/u_format.h"
 #include "util/u_format_tests.h"
 #include "util/u_format_s3tc.h"
@@ -68,34 +69,41 @@ write_tsv_row(FILE *fp,
 }
 
 
-typedef void (*fetch_ptr_t)(const void *packed, float *);
+typedef void
+(*fetch_ptr_t)(float *, const void *packed,
+               unsigned i, unsigned j);
 
 
 static LLVMValueRef
-add_fetch_rgba_test(LLVMModuleRef module,
+add_fetch_rgba_test(LLVMModuleRef lp_build_module,
                     const struct util_format_description *desc)
 {
-   LLVMTypeRef args[2];
+   LLVMTypeRef args[4];
    LLVMValueRef func;
    LLVMValueRef packed_ptr;
    LLVMValueRef rgba_ptr;
+   LLVMValueRef i;
+   LLVMValueRef j;
    LLVMBasicBlockRef block;
    LLVMBuilderRef builder;
    LLVMValueRef rgba;
 
-   args[0] = LLVMPointerType(LLVMInt8Type(), 0);
-   args[1] = LLVMPointerType(LLVMVectorType(LLVMFloatType(), 4), 0);
+   args[0] = LLVMPointerType(LLVMVectorType(LLVMFloatType(), 4), 0);
+   args[1] = LLVMPointerType(LLVMInt8Type(), 0);
+   args[3] = args[2] = LLVMInt32Type();
 
-   func = LLVMAddFunction(module, "fetch", LLVMFunctionType(LLVMVoidType(), args, 2, 0));
+   func = LLVMAddFunction(lp_build_module, "fetch", LLVMFunctionType(LLVMVoidType(), args, Elements(args), 0));
    LLVMSetFunctionCallConv(func, LLVMCCallConv);
-   packed_ptr = LLVMGetParam(func, 0);
-   rgba_ptr = LLVMGetParam(func, 1);
+   rgba_ptr = LLVMGetParam(func, 0);
+   packed_ptr = LLVMGetParam(func, 1);
+   i = LLVMGetParam(func, 2);
+   j = LLVMGetParam(func, 3);
 
    block = LLVMAppendBasicBlock(func, "entry");
    builder = LLVMCreateBuilder();
    LLVMPositionBuilderAtEnd(builder, block);
 
-   rgba = lp_build_fetch_rgba_aos(builder, desc, packed_ptr);
+   rgba = lp_build_fetch_rgba_aos(builder, desc, packed_ptr, i, j);
 
    LLVMBuildStore(builder, rgba, rgba_ptr);
 
@@ -112,37 +120,23 @@ test_format(unsigned verbose, FILE *fp,
             const struct util_format_description *desc,
             const struct util_format_test_case *test)
 {
-   LLVMModuleRef module = NULL;
    LLVMValueRef fetch = NULL;
-   LLVMExecutionEngineRef engine = NULL;
-   LLVMModuleProviderRef provider = NULL;
    LLVMPassManagerRef pass = NULL;
-   char *error = NULL;
    fetch_ptr_t fetch_ptr;
    float unpacked[4];
    boolean success;
    unsigned i;
 
-   module = LLVMModuleCreateWithName("test");
+   fetch = add_fetch_rgba_test(lp_build_module, desc);
 
-   fetch = add_fetch_rgba_test(module, desc);
-
-   if(LLVMVerifyModule(module, LLVMPrintMessageAction, &error)) {
-      LLVMDumpModule(module);
-      abort();
-   }
-   LLVMDisposeMessage(error);
-
-   provider = LLVMCreateModuleProviderForExistingModule(module);
-   if (LLVMCreateJITCompiler(&engine, provider, 1, &error)) {
-      fprintf(stderr, "%s\n", error);
-      LLVMDisposeMessage(error);
+   if (LLVMVerifyFunction(fetch, LLVMPrintMessageAction)) {
+      LLVMDumpValue(fetch);
       abort();
    }
 
 #if 0
    pass = LLVMCreatePassManager();
-   LLVMAddTargetData(LLVMGetExecutionEngineTargetData(engine), pass);
+   LLVMAddTargetData(LLVMGetExecutionEngineTargetData(lp_build_engine), pass);
    /* These are the passes currently listed in llvm-c/Transforms/Scalar.h,
     * but there are more on SVN. */
    LLVMAddConstantPropagationPass(pass);
@@ -150,20 +144,20 @@ test_format(unsigned verbose, FILE *fp,
    LLVMAddPromoteMemoryToRegisterPass(pass);
    LLVMAddGVNPass(pass);
    LLVMAddCFGSimplificationPass(pass);
-   LLVMRunPassManager(pass, module);
+   LLVMRunPassManager(pass, lp_build_module);
 #else
    (void)pass;
 #endif
 
-   fetch_ptr  = (fetch_ptr_t) LLVMGetPointerToGlobal(engine, fetch);
+   fetch_ptr = (fetch_ptr_t) LLVMGetPointerToGlobal(lp_build_engine, fetch);
 
    memset(unpacked, 0, sizeof unpacked);
 
-   fetch_ptr(test->packed, unpacked);
+   fetch_ptr(unpacked, test->packed, 0, 0);
 
    success = TRUE;
    for(i = 0; i < 4; ++i)
-      if(test->unpacked[0][0][i] != unpacked[i])
+      if (fabs((float)test->unpacked[0][0][i] - unpacked[i]) > FLT_EPSILON)
          success = FALSE;
 
    if (!success) {
@@ -177,12 +171,12 @@ test_format(unsigned verbose, FILE *fp,
              test->unpacked[0][0][1],
              test->unpacked[0][0][2],
              test->unpacked[0][0][3]);
-      LLVMDumpModule(module);
+      LLVMDumpValue(fetch);
    }
 
-   LLVMFreeMachineCodeForFunction(engine, fetch);
+   LLVMFreeMachineCodeForFunction(lp_build_engine, fetch);
+   LLVMDeleteFunction(fetch);
 
-   LLVMDisposeExecutionEngine(engine);
    if(pass)
       LLVMDisposePassManager(pass);
 
@@ -235,20 +229,12 @@ test_all(unsigned verbose, FILE *fp)
       }
 
       /*
-       * XXX: copied from lp_build_fetch_rgba_aos()
        * TODO: test more
        */
 
-      if (!(format_desc->layout == UTIL_FORMAT_LAYOUT_PLAIN &&
-            format_desc->colorspace == UTIL_FORMAT_COLORSPACE_RGB &&
-            format_desc->block.width == 1 &&
-            format_desc->block.height == 1 &&
-            util_is_pot(format_desc->block.bits) &&
-            format_desc->block.bits <= 32 &&
-            format_desc->is_bitmask &&
-            !format_desc->is_mixed &&
-            (format_desc->channel[0].type == UTIL_FORMAT_TYPE_UNSIGNED ||
-             format_desc->channel[1].type == UTIL_FORMAT_TYPE_UNSIGNED))) {
+      if (format_desc->block.width != 1 ||
+          format_desc->block.height != 1 ||
+          format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) {
          continue;
       }