This therefore adds support to half float vertex buffers.
enum pipe_format from_format)
{
const struct util_format_description *format_desc;
+ LLVMValueRef zero;
int i;
/*
*/
format_desc = util_format_description(from_format);
- return lp_build_fetch_rgba_aos(builder, format_desc, vbuffer);
+ zero = LLVMConstNull(LLVMInt32Type());
+ return lp_build_fetch_rgba_aos(builder, format_desc, vbuffer, zero, zero);
}
LLVMValueRef
lp_build_fetch_rgba_aos(LLVMBuilderRef builder,
const struct util_format_description *format_desc,
- LLVMValueRef ptr);
+ LLVMValueRef ptr,
+ LLVMValueRef i,
+ LLVMValueRef j);
/*
#include "util/u_format.h"
+#include "util/u_memory.h"
#include "util/u_math.h"
+#include "util/u_string.h"
+#include "lp_bld_init.h"
#include "lp_bld_type.h"
#include "lp_bld_const.h"
#include "lp_bld_swizzle.h"
/**
* Fetch a pixel into a 4 float AoS.
+ *
+ * i and j are the sub-block pixel coordinates.
*/
LLVMValueRef
lp_build_fetch_rgba_aos(LLVMBuilderRef builder,
const struct util_format_description *format_desc,
- LLVMValueRef ptr)
+ LLVMValueRef ptr,
+ LLVMValueRef i,
+ LLVMValueRef j)
{
+
if (format_desc->layout == UTIL_FORMAT_LAYOUT_PLAIN &&
(format_desc->colorspace == UTIL_FORMAT_COLORSPACE_RGB ||
format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) &&
util_is_pot(format_desc->block.bits) &&
format_desc->block.bits <= 32 &&
format_desc->is_bitmask &&
- !format_desc->is_mixed)
+ !format_desc->is_mixed &&
+ (format_desc->channel[0].type == UTIL_FORMAT_TYPE_UNSIGNED ||
+ format_desc->channel[1].type == UTIL_FORMAT_TYPE_UNSIGNED))
{
LLVMValueRef packed;
return lp_build_unpack_rgba_aos(builder, format_desc, packed);
}
+ else if (format_desc->fetch_rgba_float) {
+ /*
+ * Fallback to calling util_format_description::fetch_rgba_float.
+ *
+ * This is definitely not the most efficient way of fetching pixels, as
+ * we miss the opportunity to do vectorization, but this it is a
+ * convenient for formats or scenarios for which there was no opportunity
+ * or incentive to optimize.
+ */
+
+ LLVMModuleRef module = LLVMGetGlobalParent(LLVMGetBasicBlockParent(LLVMGetInsertBlock(builder)));
+ char name[256];
+ LLVMValueRef function;
+ LLVMValueRef tmp;
+ LLVMValueRef args[4];
+
+ util_snprintf(name, sizeof name, "util_format_%s_fetch_rgba_float",
+ format_desc->short_name);
+
+ /*
+ * Declare and bind format_desc->fetch_rgba_float().
+ */
+
+ function = LLVMGetNamedFunction(module, name);
+ if (!function) {
+ LLVMTypeRef ret_type;
+ LLVMTypeRef arg_types[4];
+ LLVMTypeRef function_type;
+
+ ret_type = LLVMVoidType();
+ arg_types[0] = LLVMPointerType(LLVMFloatType(), 0);
+ arg_types[1] = LLVMPointerType(LLVMInt8Type(), 0);
+ arg_types[3] = arg_types[2] = LLVMIntType(sizeof(unsigned) * 8);
+ function_type = LLVMFunctionType(ret_type, arg_types, Elements(arg_types), 0);
+ function = LLVMAddFunction(module, name, function_type);
+
+ LLVMSetFunctionCallConv(function, LLVMCCallConv);
+ LLVMSetLinkage(function, LLVMExternalLinkage);
+
+ assert(LLVMIsDeclaration(function));
+
+ LLVMAddGlobalMapping(lp_build_engine, function, format_desc->fetch_rgba_float);
+ }
+
+ /*
+ * XXX: this should better go to the first block in the function
+ */
+
+ tmp = LLVMBuildAlloca(builder, LLVMVectorType(LLVMFloatType(), 4), "");
+
+ /*
+ * Invoke format_desc->fetch_rgba_float() for each pixel and insert the result
+ * in the SoA vectors.
+ */
+
+ args[0] = LLVMBuildBitCast(builder, tmp,
+ LLVMPointerType(LLVMFloatType(), 0), "");
+ args[1] = ptr;
+ args[2] = i;
+ args[3] = j;
+
+ LLVMBuildCall(builder, function, args, 4, "");
+
+ return LLVMBuildLoad(builder, tmp, "");
+ }
else {
assert(0);
return LLVMGetUndef(LLVMVectorType(LLVMFloatType(), 4));
}
else {
/*
- * Fallback to calling util_format_description::fetch_rgba_float for each
- * pixel.
+ * Fallback to calling lp_build_fetch_rgba_aos for each pixel.
*
- * This is definitely not the most efficient way of fetching pixels, as
- * we miss the opportunity to do vectorization, but this it is a
+ * This is not the most efficient way of fetching pixels, as
+ * we miss some opportunities to do vectorization, but this it is a
* convenient for formats or scenarios for which there was no opportunity
* or incentive to optimize.
*/
- LLVMModuleRef module = LLVMGetGlobalParent(LLVMGetBasicBlockParent(LLVMGetInsertBlock(builder)));
- char name[256];
- LLVMValueRef function;
- LLVMValueRef tmp;
unsigned k, chan;
assert(type.floating);
- util_snprintf(name, sizeof name, "util_format_%s_fetch_rgba_float", format_desc->short_name);
-
- /*
- * Declare and bind format_desc->fetch_rgba_float().
- */
-
- function = LLVMGetNamedFunction(module, name);
- if (!function) {
- LLVMTypeRef ret_type;
- LLVMTypeRef arg_types[4];
- LLVMTypeRef function_type;
-
- ret_type = LLVMVoidType();
- arg_types[0] = LLVMPointerType(LLVMFloatType(), 0);
- arg_types[1] = LLVMPointerType(LLVMInt8Type(), 0);
- arg_types[3] = arg_types[2] = LLVMIntType(sizeof(unsigned) * 8);
- function_type = LLVMFunctionType(ret_type, arg_types, Elements(arg_types), 0);
- function = LLVMAddFunction(module, name, function_type);
-
- LLVMSetFunctionCallConv(function, LLVMCCallConv);
- LLVMSetLinkage(function, LLVMExternalLinkage);
-
- assert(LLVMIsDeclaration(function));
-
- LLVMAddGlobalMapping(lp_build_engine, function, format_desc->fetch_rgba_float);
- }
-
for (chan = 0; chan < 4; ++chan) {
rgba[chan] = lp_build_undef(type);
}
- tmp = LLVMBuildArrayAlloca(builder,
- LLVMFloatType(),
- LLVMConstInt(LLVMInt32Type(), 4, 0),
- "");
-
- /*
- * Invoke format_desc->fetch_rgba_float() for each pixel and insert the result
- * in the SoA vectors.
- */
-
for(k = 0; k < type.length; ++k) {
LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), k, 0);
LLVMValueRef offset_elem;
LLVMValueRef ptr;
LLVMValueRef i_elem, j_elem;
- LLVMValueRef args[4];
+ LLVMValueRef tmp;
offset_elem = LLVMBuildExtractElement(builder, offset, index, "");
ptr = LLVMBuildGEP(builder, base_ptr, &offset_elem, 1, "");
i_elem = LLVMBuildExtractElement(builder, i, index, "");
j_elem = LLVMBuildExtractElement(builder, j, index, "");
- args[0] = tmp;
- args[1] = ptr;
- args[2] = i_elem;
- args[3] = j_elem;
+ tmp = lp_build_fetch_rgba_aos(builder, format_desc, ptr, i_elem, j_elem);
- LLVMBuildCall(builder, function, args, 4, "");
+ /*
+ * AoS to SoA
+ */
for (chan = 0; chan < 4; ++chan) {
LLVMValueRef chan_val = LLVMConstInt(LLVMInt32Type(), chan, 0),
- tmp_chan = LLVMBuildGEP(builder, tmp, &chan_val, 1, "");
- tmp_chan = LLVMBuildLoad(builder, tmp_chan, "");
+ tmp_chan = LLVMBuildExtractElement(builder, tmp, chan_val, "");
rgba[chan] = LLVMBuildInsertElement(builder, rgba[chan], tmp_chan, index, "");
}
}
#include <stdlib.h>
#include <stdio.h>
+#include <float.h>
#include "gallivm/lp_bld.h"
+#include "gallivm/lp_bld_init.h"
#include <llvm-c/Analysis.h>
-#include <llvm-c/ExecutionEngine.h>
#include <llvm-c/Target.h>
#include <llvm-c/Transforms/Scalar.h>
-#include "util/u_cpu_detect.h"
+#include "util/u_memory.h"
#include "util/u_format.h"
#include "util/u_format_tests.h"
#include "util/u_format_s3tc.h"
}
-typedef void (*fetch_ptr_t)(const void *packed, float *);
+typedef void
+(*fetch_ptr_t)(float *, const void *packed,
+ unsigned i, unsigned j);
static LLVMValueRef
-add_fetch_rgba_test(LLVMModuleRef module,
+add_fetch_rgba_test(LLVMModuleRef lp_build_module,
const struct util_format_description *desc)
{
- LLVMTypeRef args[2];
+ LLVMTypeRef args[4];
LLVMValueRef func;
LLVMValueRef packed_ptr;
LLVMValueRef rgba_ptr;
+ LLVMValueRef i;
+ LLVMValueRef j;
LLVMBasicBlockRef block;
LLVMBuilderRef builder;
LLVMValueRef rgba;
- args[0] = LLVMPointerType(LLVMInt8Type(), 0);
- args[1] = LLVMPointerType(LLVMVectorType(LLVMFloatType(), 4), 0);
+ args[0] = LLVMPointerType(LLVMVectorType(LLVMFloatType(), 4), 0);
+ args[1] = LLVMPointerType(LLVMInt8Type(), 0);
+ args[3] = args[2] = LLVMInt32Type();
- func = LLVMAddFunction(module, "fetch", LLVMFunctionType(LLVMVoidType(), args, 2, 0));
+ func = LLVMAddFunction(lp_build_module, "fetch", LLVMFunctionType(LLVMVoidType(), args, Elements(args), 0));
LLVMSetFunctionCallConv(func, LLVMCCallConv);
- packed_ptr = LLVMGetParam(func, 0);
- rgba_ptr = LLVMGetParam(func, 1);
+ rgba_ptr = LLVMGetParam(func, 0);
+ packed_ptr = LLVMGetParam(func, 1);
+ i = LLVMGetParam(func, 2);
+ j = LLVMGetParam(func, 3);
block = LLVMAppendBasicBlock(func, "entry");
builder = LLVMCreateBuilder();
LLVMPositionBuilderAtEnd(builder, block);
- rgba = lp_build_fetch_rgba_aos(builder, desc, packed_ptr);
+ rgba = lp_build_fetch_rgba_aos(builder, desc, packed_ptr, i, j);
LLVMBuildStore(builder, rgba, rgba_ptr);
const struct util_format_description *desc,
const struct util_format_test_case *test)
{
- LLVMModuleRef module = NULL;
LLVMValueRef fetch = NULL;
- LLVMExecutionEngineRef engine = NULL;
- LLVMModuleProviderRef provider = NULL;
LLVMPassManagerRef pass = NULL;
- char *error = NULL;
fetch_ptr_t fetch_ptr;
float unpacked[4];
boolean success;
unsigned i;
- module = LLVMModuleCreateWithName("test");
+ fetch = add_fetch_rgba_test(lp_build_module, desc);
- fetch = add_fetch_rgba_test(module, desc);
-
- if(LLVMVerifyModule(module, LLVMPrintMessageAction, &error)) {
- LLVMDumpModule(module);
- abort();
- }
- LLVMDisposeMessage(error);
-
- provider = LLVMCreateModuleProviderForExistingModule(module);
- if (LLVMCreateJITCompiler(&engine, provider, 1, &error)) {
- fprintf(stderr, "%s\n", error);
- LLVMDisposeMessage(error);
+ if (LLVMVerifyFunction(fetch, LLVMPrintMessageAction)) {
+ LLVMDumpValue(fetch);
abort();
}
#if 0
pass = LLVMCreatePassManager();
- LLVMAddTargetData(LLVMGetExecutionEngineTargetData(engine), pass);
+ LLVMAddTargetData(LLVMGetExecutionEngineTargetData(lp_build_engine), pass);
/* These are the passes currently listed in llvm-c/Transforms/Scalar.h,
* but there are more on SVN. */
LLVMAddConstantPropagationPass(pass);
LLVMAddPromoteMemoryToRegisterPass(pass);
LLVMAddGVNPass(pass);
LLVMAddCFGSimplificationPass(pass);
- LLVMRunPassManager(pass, module);
+ LLVMRunPassManager(pass, lp_build_module);
#else
(void)pass;
#endif
- fetch_ptr = (fetch_ptr_t) LLVMGetPointerToGlobal(engine, fetch);
+ fetch_ptr = (fetch_ptr_t) LLVMGetPointerToGlobal(lp_build_engine, fetch);
memset(unpacked, 0, sizeof unpacked);
- fetch_ptr(test->packed, unpacked);
+ fetch_ptr(unpacked, test->packed, 0, 0);
success = TRUE;
for(i = 0; i < 4; ++i)
- if(test->unpacked[0][0][i] != unpacked[i])
+ if (fabs((float)test->unpacked[0][0][i] - unpacked[i]) > FLT_EPSILON)
success = FALSE;
if (!success) {
test->unpacked[0][0][1],
test->unpacked[0][0][2],
test->unpacked[0][0][3]);
- LLVMDumpModule(module);
+ LLVMDumpValue(fetch);
}
- LLVMFreeMachineCodeForFunction(engine, fetch);
+ LLVMFreeMachineCodeForFunction(lp_build_engine, fetch);
+ LLVMDeleteFunction(fetch);
- LLVMDisposeExecutionEngine(engine);
if(pass)
LLVMDisposePassManager(pass);
}
/*
- * XXX: copied from lp_build_fetch_rgba_aos()
* TODO: test more
*/
- if (!(format_desc->layout == UTIL_FORMAT_LAYOUT_PLAIN &&
- format_desc->colorspace == UTIL_FORMAT_COLORSPACE_RGB &&
- format_desc->block.width == 1 &&
- format_desc->block.height == 1 &&
- util_is_pot(format_desc->block.bits) &&
- format_desc->block.bits <= 32 &&
- format_desc->is_bitmask &&
- !format_desc->is_mixed &&
- (format_desc->channel[0].type == UTIL_FORMAT_TYPE_UNSIGNED ||
- format_desc->channel[1].type == UTIL_FORMAT_TYPE_UNSIGNED))) {
+ if (format_desc->block.width != 1 ||
+ format_desc->block.height != 1 ||
+ format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) {
continue;
}