From ec8d9523d465554e3ffaa1aeef46bfff868281d3 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Jos=C3=A9=20Fonseca?= Date: Tue, 20 Apr 2010 16:21:08 +0200 Subject: [PATCH 1/1] gallivm: Universal format support on lp_build_fetch_rgba_aos via util_format_description::fetch_rgba_float This therefore adds support to half float vertex buffers. --- .../auxiliary/draw/draw_llvm_translate.c | 4 +- src/gallium/auxiliary/gallivm/lp_bld_format.h | 4 +- .../auxiliary/gallivm/lp_bld_format_aos.c | 79 +++++++++++++++++- .../auxiliary/gallivm/lp_bld_format_soa.c | 62 +++----------- src/gallium/drivers/llvmpipe/lp_test_format.c | 80 ++++++++----------- 5 files changed, 125 insertions(+), 104 deletions(-) diff --git a/src/gallium/auxiliary/draw/draw_llvm_translate.c b/src/gallium/auxiliary/draw/draw_llvm_translate.c index d1c7fa44e12..d7da7ed357d 100644 --- a/src/gallium/auxiliary/draw/draw_llvm_translate.c +++ b/src/gallium/auxiliary/draw/draw_llvm_translate.c @@ -464,6 +464,7 @@ draw_llvm_translate_from(LLVMBuilderRef builder, enum pipe_format from_format) { const struct util_format_description *format_desc; + LLVMValueRef zero; int i; /* @@ -491,5 +492,6 @@ draw_llvm_translate_from(LLVMBuilderRef builder, */ format_desc = util_format_description(from_format); - return lp_build_fetch_rgba_aos(builder, format_desc, vbuffer); + zero = LLVMConstNull(LLVMInt32Type()); + return lp_build_fetch_rgba_aos(builder, format_desc, vbuffer, zero, zero); } diff --git a/src/gallium/auxiliary/gallivm/lp_bld_format.h b/src/gallium/auxiliary/gallivm/lp_bld_format.h index ecf2cfd62c0..085937588ff 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_format.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_format.h @@ -59,7 +59,9 @@ lp_build_pack_rgba_aos(LLVMBuilderRef builder, LLVMValueRef lp_build_fetch_rgba_aos(LLVMBuilderRef builder, const struct util_format_description *format_desc, - LLVMValueRef ptr); + LLVMValueRef ptr, + LLVMValueRef i, + LLVMValueRef j); /* diff --git a/src/gallium/auxiliary/gallivm/lp_bld_format_aos.c b/src/gallium/auxiliary/gallivm/lp_bld_format_aos.c index 191562d460d..5cd5b93bdf6 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_format_aos.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_format_aos.c @@ -34,8 +34,11 @@ #include "util/u_format.h" +#include "util/u_memory.h" #include "util/u_math.h" +#include "util/u_string.h" +#include "lp_bld_init.h" #include "lp_bld_type.h" #include "lp_bld_const.h" #include "lp_bld_swizzle.h" @@ -295,12 +298,17 @@ lp_build_pack_rgba_aos(LLVMBuilderRef builder, /** * Fetch a pixel into a 4 float AoS. + * + * i and j are the sub-block pixel coordinates. */ LLVMValueRef lp_build_fetch_rgba_aos(LLVMBuilderRef builder, const struct util_format_description *format_desc, - LLVMValueRef ptr) + LLVMValueRef ptr, + LLVMValueRef i, + LLVMValueRef j) { + if (format_desc->layout == UTIL_FORMAT_LAYOUT_PLAIN && (format_desc->colorspace == UTIL_FORMAT_COLORSPACE_RGB || format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) && @@ -309,7 +317,9 @@ lp_build_fetch_rgba_aos(LLVMBuilderRef builder, util_is_pot(format_desc->block.bits) && format_desc->block.bits <= 32 && format_desc->is_bitmask && - !format_desc->is_mixed) + !format_desc->is_mixed && + (format_desc->channel[0].type == UTIL_FORMAT_TYPE_UNSIGNED || + format_desc->channel[1].type == UTIL_FORMAT_TYPE_UNSIGNED)) { LLVMValueRef packed; @@ -321,6 +331,71 @@ lp_build_fetch_rgba_aos(LLVMBuilderRef builder, return lp_build_unpack_rgba_aos(builder, format_desc, packed); } + else if (format_desc->fetch_rgba_float) { + /* + * Fallback to calling util_format_description::fetch_rgba_float. + * + * This is definitely not the most efficient way of fetching pixels, as + * we miss the opportunity to do vectorization, but this it is a + * convenient for formats or scenarios for which there was no opportunity + * or incentive to optimize. + */ + + LLVMModuleRef module = LLVMGetGlobalParent(LLVMGetBasicBlockParent(LLVMGetInsertBlock(builder))); + char name[256]; + LLVMValueRef function; + LLVMValueRef tmp; + LLVMValueRef args[4]; + + util_snprintf(name, sizeof name, "util_format_%s_fetch_rgba_float", + format_desc->short_name); + + /* + * Declare and bind format_desc->fetch_rgba_float(). + */ + + function = LLVMGetNamedFunction(module, name); + if (!function) { + LLVMTypeRef ret_type; + LLVMTypeRef arg_types[4]; + LLVMTypeRef function_type; + + ret_type = LLVMVoidType(); + arg_types[0] = LLVMPointerType(LLVMFloatType(), 0); + arg_types[1] = LLVMPointerType(LLVMInt8Type(), 0); + arg_types[3] = arg_types[2] = LLVMIntType(sizeof(unsigned) * 8); + function_type = LLVMFunctionType(ret_type, arg_types, Elements(arg_types), 0); + function = LLVMAddFunction(module, name, function_type); + + LLVMSetFunctionCallConv(function, LLVMCCallConv); + LLVMSetLinkage(function, LLVMExternalLinkage); + + assert(LLVMIsDeclaration(function)); + + LLVMAddGlobalMapping(lp_build_engine, function, format_desc->fetch_rgba_float); + } + + /* + * XXX: this should better go to the first block in the function + */ + + tmp = LLVMBuildAlloca(builder, LLVMVectorType(LLVMFloatType(), 4), ""); + + /* + * Invoke format_desc->fetch_rgba_float() for each pixel and insert the result + * in the SoA vectors. + */ + + args[0] = LLVMBuildBitCast(builder, tmp, + LLVMPointerType(LLVMFloatType(), 0), ""); + args[1] = ptr; + args[2] = i; + args[3] = j; + + LLVMBuildCall(builder, function, args, 4, ""); + + return LLVMBuildLoad(builder, tmp, ""); + } else { assert(0); return LLVMGetUndef(LLVMVectorType(LLVMFloatType(), 4)); diff --git a/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c index 2b66162eb40..c7b20f42012 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c @@ -307,70 +307,28 @@ lp_build_fetch_rgba_soa(LLVMBuilderRef builder, } else { /* - * Fallback to calling util_format_description::fetch_rgba_float for each - * pixel. + * Fallback to calling lp_build_fetch_rgba_aos for each pixel. * - * This is definitely not the most efficient way of fetching pixels, as - * we miss the opportunity to do vectorization, but this it is a + * This is not the most efficient way of fetching pixels, as + * we miss some opportunities to do vectorization, but this it is a * convenient for formats or scenarios for which there was no opportunity * or incentive to optimize. */ - LLVMModuleRef module = LLVMGetGlobalParent(LLVMGetBasicBlockParent(LLVMGetInsertBlock(builder))); - char name[256]; - LLVMValueRef function; - LLVMValueRef tmp; unsigned k, chan; assert(type.floating); - util_snprintf(name, sizeof name, "util_format_%s_fetch_rgba_float", format_desc->short_name); - - /* - * Declare and bind format_desc->fetch_rgba_float(). - */ - - function = LLVMGetNamedFunction(module, name); - if (!function) { - LLVMTypeRef ret_type; - LLVMTypeRef arg_types[4]; - LLVMTypeRef function_type; - - ret_type = LLVMVoidType(); - arg_types[0] = LLVMPointerType(LLVMFloatType(), 0); - arg_types[1] = LLVMPointerType(LLVMInt8Type(), 0); - arg_types[3] = arg_types[2] = LLVMIntType(sizeof(unsigned) * 8); - function_type = LLVMFunctionType(ret_type, arg_types, Elements(arg_types), 0); - function = LLVMAddFunction(module, name, function_type); - - LLVMSetFunctionCallConv(function, LLVMCCallConv); - LLVMSetLinkage(function, LLVMExternalLinkage); - - assert(LLVMIsDeclaration(function)); - - LLVMAddGlobalMapping(lp_build_engine, function, format_desc->fetch_rgba_float); - } - for (chan = 0; chan < 4; ++chan) { rgba[chan] = lp_build_undef(type); } - tmp = LLVMBuildArrayAlloca(builder, - LLVMFloatType(), - LLVMConstInt(LLVMInt32Type(), 4, 0), - ""); - - /* - * Invoke format_desc->fetch_rgba_float() for each pixel and insert the result - * in the SoA vectors. - */ - for(k = 0; k < type.length; ++k) { LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), k, 0); LLVMValueRef offset_elem; LLVMValueRef ptr; LLVMValueRef i_elem, j_elem; - LLVMValueRef args[4]; + LLVMValueRef tmp; offset_elem = LLVMBuildExtractElement(builder, offset, index, ""); ptr = LLVMBuildGEP(builder, base_ptr, &offset_elem, 1, ""); @@ -378,17 +336,15 @@ lp_build_fetch_rgba_soa(LLVMBuilderRef builder, i_elem = LLVMBuildExtractElement(builder, i, index, ""); j_elem = LLVMBuildExtractElement(builder, j, index, ""); - args[0] = tmp; - args[1] = ptr; - args[2] = i_elem; - args[3] = j_elem; + tmp = lp_build_fetch_rgba_aos(builder, format_desc, ptr, i_elem, j_elem); - LLVMBuildCall(builder, function, args, 4, ""); + /* + * AoS to SoA + */ for (chan = 0; chan < 4; ++chan) { LLVMValueRef chan_val = LLVMConstInt(LLVMInt32Type(), chan, 0), - tmp_chan = LLVMBuildGEP(builder, tmp, &chan_val, 1, ""); - tmp_chan = LLVMBuildLoad(builder, tmp_chan, ""); + tmp_chan = LLVMBuildExtractElement(builder, tmp, chan_val, ""); rgba[chan] = LLVMBuildInsertElement(builder, rgba[chan], tmp_chan, index, ""); } } diff --git a/src/gallium/drivers/llvmpipe/lp_test_format.c b/src/gallium/drivers/llvmpipe/lp_test_format.c index 13c3c3572d6..fbac815d107 100644 --- a/src/gallium/drivers/llvmpipe/lp_test_format.c +++ b/src/gallium/drivers/llvmpipe/lp_test_format.c @@ -28,14 +28,15 @@ #include #include +#include #include "gallivm/lp_bld.h" +#include "gallivm/lp_bld_init.h" #include -#include #include #include -#include "util/u_cpu_detect.h" +#include "util/u_memory.h" #include "util/u_format.h" #include "util/u_format_tests.h" #include "util/u_format_s3tc.h" @@ -68,34 +69,41 @@ write_tsv_row(FILE *fp, } -typedef void (*fetch_ptr_t)(const void *packed, float *); +typedef void +(*fetch_ptr_t)(float *, const void *packed, + unsigned i, unsigned j); static LLVMValueRef -add_fetch_rgba_test(LLVMModuleRef module, +add_fetch_rgba_test(LLVMModuleRef lp_build_module, const struct util_format_description *desc) { - LLVMTypeRef args[2]; + LLVMTypeRef args[4]; LLVMValueRef func; LLVMValueRef packed_ptr; LLVMValueRef rgba_ptr; + LLVMValueRef i; + LLVMValueRef j; LLVMBasicBlockRef block; LLVMBuilderRef builder; LLVMValueRef rgba; - args[0] = LLVMPointerType(LLVMInt8Type(), 0); - args[1] = LLVMPointerType(LLVMVectorType(LLVMFloatType(), 4), 0); + args[0] = LLVMPointerType(LLVMVectorType(LLVMFloatType(), 4), 0); + args[1] = LLVMPointerType(LLVMInt8Type(), 0); + args[3] = args[2] = LLVMInt32Type(); - func = LLVMAddFunction(module, "fetch", LLVMFunctionType(LLVMVoidType(), args, 2, 0)); + func = LLVMAddFunction(lp_build_module, "fetch", LLVMFunctionType(LLVMVoidType(), args, Elements(args), 0)); LLVMSetFunctionCallConv(func, LLVMCCallConv); - packed_ptr = LLVMGetParam(func, 0); - rgba_ptr = LLVMGetParam(func, 1); + rgba_ptr = LLVMGetParam(func, 0); + packed_ptr = LLVMGetParam(func, 1); + i = LLVMGetParam(func, 2); + j = LLVMGetParam(func, 3); block = LLVMAppendBasicBlock(func, "entry"); builder = LLVMCreateBuilder(); LLVMPositionBuilderAtEnd(builder, block); - rgba = lp_build_fetch_rgba_aos(builder, desc, packed_ptr); + rgba = lp_build_fetch_rgba_aos(builder, desc, packed_ptr, i, j); LLVMBuildStore(builder, rgba, rgba_ptr); @@ -112,37 +120,23 @@ test_format(unsigned verbose, FILE *fp, const struct util_format_description *desc, const struct util_format_test_case *test) { - LLVMModuleRef module = NULL; LLVMValueRef fetch = NULL; - LLVMExecutionEngineRef engine = NULL; - LLVMModuleProviderRef provider = NULL; LLVMPassManagerRef pass = NULL; - char *error = NULL; fetch_ptr_t fetch_ptr; float unpacked[4]; boolean success; unsigned i; - module = LLVMModuleCreateWithName("test"); + fetch = add_fetch_rgba_test(lp_build_module, desc); - fetch = add_fetch_rgba_test(module, desc); - - if(LLVMVerifyModule(module, LLVMPrintMessageAction, &error)) { - LLVMDumpModule(module); - abort(); - } - LLVMDisposeMessage(error); - - provider = LLVMCreateModuleProviderForExistingModule(module); - if (LLVMCreateJITCompiler(&engine, provider, 1, &error)) { - fprintf(stderr, "%s\n", error); - LLVMDisposeMessage(error); + if (LLVMVerifyFunction(fetch, LLVMPrintMessageAction)) { + LLVMDumpValue(fetch); abort(); } #if 0 pass = LLVMCreatePassManager(); - LLVMAddTargetData(LLVMGetExecutionEngineTargetData(engine), pass); + LLVMAddTargetData(LLVMGetExecutionEngineTargetData(lp_build_engine), pass); /* These are the passes currently listed in llvm-c/Transforms/Scalar.h, * but there are more on SVN. */ LLVMAddConstantPropagationPass(pass); @@ -150,20 +144,20 @@ test_format(unsigned verbose, FILE *fp, LLVMAddPromoteMemoryToRegisterPass(pass); LLVMAddGVNPass(pass); LLVMAddCFGSimplificationPass(pass); - LLVMRunPassManager(pass, module); + LLVMRunPassManager(pass, lp_build_module); #else (void)pass; #endif - fetch_ptr = (fetch_ptr_t) LLVMGetPointerToGlobal(engine, fetch); + fetch_ptr = (fetch_ptr_t) LLVMGetPointerToGlobal(lp_build_engine, fetch); memset(unpacked, 0, sizeof unpacked); - fetch_ptr(test->packed, unpacked); + fetch_ptr(unpacked, test->packed, 0, 0); success = TRUE; for(i = 0; i < 4; ++i) - if(test->unpacked[0][0][i] != unpacked[i]) + if (fabs((float)test->unpacked[0][0][i] - unpacked[i]) > FLT_EPSILON) success = FALSE; if (!success) { @@ -177,12 +171,12 @@ test_format(unsigned verbose, FILE *fp, test->unpacked[0][0][1], test->unpacked[0][0][2], test->unpacked[0][0][3]); - LLVMDumpModule(module); + LLVMDumpValue(fetch); } - LLVMFreeMachineCodeForFunction(engine, fetch); + LLVMFreeMachineCodeForFunction(lp_build_engine, fetch); + LLVMDeleteFunction(fetch); - LLVMDisposeExecutionEngine(engine); if(pass) LLVMDisposePassManager(pass); @@ -235,20 +229,12 @@ test_all(unsigned verbose, FILE *fp) } /* - * XXX: copied from lp_build_fetch_rgba_aos() * TODO: test more */ - if (!(format_desc->layout == UTIL_FORMAT_LAYOUT_PLAIN && - format_desc->colorspace == UTIL_FORMAT_COLORSPACE_RGB && - format_desc->block.width == 1 && - format_desc->block.height == 1 && - util_is_pot(format_desc->block.bits) && - format_desc->block.bits <= 32 && - format_desc->is_bitmask && - !format_desc->is_mixed && - (format_desc->channel[0].type == UTIL_FORMAT_TYPE_UNSIGNED || - format_desc->channel[1].type == UTIL_FORMAT_TYPE_UNSIGNED))) { + if (format_desc->block.width != 1 || + format_desc->block.height != 1 || + format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) { continue; } -- 2.30.2