#include "lp_bld_const.h"
#include "lp_bld_conv.h"
#include "lp_bld_swizzle.h"
-#include "lp_bld_sample.h" /* for lp_build_gather */
+#include "lp_bld_gather.h"
+#include "lp_bld_debug.h"
#include "lp_bld_format.h"
* \param rgba_out returns the SoA R,G,B,A vectors
*/
void
-lp_build_unpack_rgba_soa(LLVMBuilderRef builder,
+lp_build_unpack_rgba_soa(struct gallivm_state *gallivm,
const struct util_format_description *format_desc,
struct lp_type type,
LLVMValueRef packed,
LLVMValueRef rgba_out[4])
{
+ LLVMBuilderRef builder = gallivm->builder;
struct lp_build_context bld;
LLVMValueRef inputs[4];
unsigned start;
assert(type.floating);
assert(type.width == 32);
- lp_build_context_init(&bld, builder, type);
+ lp_build_context_init(&bld, gallivm, type);
/* Decode the input vector components */
start = 0;
switch(format_desc->channel[chan].type) {
case UTIL_FORMAT_TYPE_VOID:
- input = lp_build_undef(type);
+ input = lp_build_undef(gallivm, type);
break;
case UTIL_FORMAT_TYPE_UNSIGNED:
*/
if (start) {
- input = LLVMBuildLShr(builder, input, lp_build_const_int_vec(type, start), "");
+ input = LLVMBuildLShr(builder, input, lp_build_const_int_vec(gallivm, type, start), "");
}
/*
if (stop < format_desc->block.bits) {
unsigned mask = ((unsigned long long)1 << width) - 1;
- input = LLVMBuildAnd(builder, input, lp_build_const_int_vec(type, mask), "");
+ input = LLVMBuildAnd(builder, input, lp_build_const_int_vec(gallivm, type, mask), "");
}
/*
if (type.floating) {
if(format_desc->channel[chan].normalized)
- input = lp_build_unsigned_norm_to_float(builder, width, type, input);
+ input = lp_build_unsigned_norm_to_float(gallivm, width, type, input);
else
- input = LLVMBuildSIToFP(builder, input, lp_build_vec_type(type), "");
+ input = LLVMBuildSIToFP(builder, input,
+ lp_build_vec_type(gallivm, type), "");
}
else {
/* FIXME */
assert(0);
- input = lp_build_undef(type);
+ input = lp_build_undef(gallivm, type);
}
break;
if (stop < type.width) {
unsigned bits = type.width - stop;
- LLVMValueRef bits_val = lp_build_const_int_vec(type, bits);
+ LLVMValueRef bits_val = lp_build_const_int_vec(gallivm, type, bits);
input = LLVMBuildShl(builder, input, bits_val, "");
}
if (format_desc->channel[chan].size < type.width) {
unsigned bits = type.width - format_desc->channel[chan].size;
- LLVMValueRef bits_val = lp_build_const_int_vec(type, bits);
+ LLVMValueRef bits_val = lp_build_const_int_vec(gallivm, type, bits);
input = LLVMBuildAShr(builder, input, bits_val, "");
}
*/
if (type.floating) {
- input = LLVMBuildSIToFP(builder, input, lp_build_vec_type(type), "");
+ input = LLVMBuildSIToFP(builder, input, lp_build_vec_type(gallivm, type), "");
if (format_desc->channel[chan].normalized) {
double scale = 1.0 / ((1 << (format_desc->channel[chan].size - 1)) - 1);
- LLVMValueRef scale_val = lp_build_const_vec(type, scale);
- input = LLVMBuildMul(builder, input, scale_val, "");
+ LLVMValueRef scale_val = lp_build_const_vec(gallivm, type, scale);
+ input = LLVMBuildFMul(builder, input, scale_val, "");
}
}
else {
/* FIXME */
assert(0);
- input = lp_build_undef(type);
+ input = lp_build_undef(gallivm, type);
}
break;
assert(start == 0);
assert(stop == 32);
assert(type.width == 32);
- input = LLVMBuildBitCast(builder, input, lp_build_vec_type(type), "");
+ input = LLVMBuildBitCast(builder, input, lp_build_vec_type(gallivm, type), "");
}
else {
/* FIXME */
assert(0);
- input = lp_build_undef(type);
+ input = lp_build_undef(gallivm, type);
}
break;
case UTIL_FORMAT_TYPE_FIXED:
if (type.floating) {
double scale = 1.0 / ((1 << (format_desc->channel[chan].size/2)) - 1);
- LLVMValueRef scale_val = lp_build_const_vec(type, scale);
- input = LLVMBuildSIToFP(builder, input, lp_build_vec_type(type), "");
- input = LLVMBuildMul(builder, input, scale_val, "");
+ LLVMValueRef scale_val = lp_build_const_vec(gallivm, type, scale);
+ input = LLVMBuildSIToFP(builder, input, lp_build_vec_type(gallivm, type), "");
+ input = LLVMBuildFMul(builder, input, scale_val, "");
}
else {
/* FIXME */
assert(0);
- input = lp_build_undef(type);
+ input = lp_build_undef(gallivm, type);
}
break;
default:
assert(0);
- input = lp_build_undef(type);
+ input = lp_build_undef(gallivm, type);
break;
}
}
+void
+lp_build_rgba8_to_f32_soa(struct gallivm_state *gallivm,
+ struct lp_type dst_type,
+ LLVMValueRef packed,
+ LLVMValueRef *rgba)
+{
+ LLVMBuilderRef builder = gallivm->builder;
+ LLVMValueRef mask = lp_build_const_int_vec(gallivm, dst_type, 0xff);
+ unsigned chan;
+
+ packed = LLVMBuildBitCast(builder, packed,
+ lp_build_int_vec_type(gallivm, dst_type), "");
+
+ /* Decode the input vector components */
+ for (chan = 0; chan < 4; ++chan) {
+ unsigned start = chan*8;
+ unsigned stop = start + 8;
+ LLVMValueRef input;
+
+ input = packed;
+
+ if (start)
+ input = LLVMBuildLShr(builder, input,
+ lp_build_const_int_vec(gallivm, dst_type, start), "");
+
+ if (stop < 32)
+ input = LLVMBuildAnd(builder, input, mask, "");
+
+ input = lp_build_unsigned_norm_to_float(gallivm, 8, dst_type, input);
+
+ rgba[chan] = input;
+ }
+}
+
+
+
/**
* Fetch a texels from a texture, returning them in SoA layout.
*
* be in [0, block_width-1] and j will be in [0, block_height-1].
*/
void
-lp_build_fetch_rgba_soa(LLVMBuilderRef builder,
+lp_build_fetch_rgba_soa(struct gallivm_state *gallivm,
const struct util_format_description *format_desc,
struct lp_type type,
LLVMValueRef base_ptr,
LLVMValueRef j,
LLVMValueRef rgba_out[4])
{
+ LLVMBuilderRef builder = gallivm->builder;
if (format_desc->layout == UTIL_FORMAT_LAYOUT_PLAIN &&
(format_desc->colorspace == UTIL_FORMAT_COLORSPACE_RGB ||
* gather the texels from the texture
* Ex: packed = {BGRA, BGRA, BGRA, BGRA}.
*/
- packed = lp_build_gather(builder,
+ packed = lp_build_gather(gallivm,
type.length,
format_desc->block.bits,
type.width,
/*
* convert texels to float rgba
*/
- lp_build_unpack_rgba_soa(builder,
+ lp_build_unpack_rgba_soa(gallivm,
format_desc,
type,
packed, rgba_out);
+ return;
}
- else {
- /*
- * Fallback to calling lp_build_fetch_rgba_aos for each pixel.
- *
- * This is not the most efficient way of fetching pixels, as we
- * miss some opportunities to do vectorization, but this is
- * convenient for formats or scenarios for which there was no
- * opportunity or incentive to optimize.
- */
+ /*
+ * Try calling lp_build_fetch_rgba_aos for all pixels.
+ */
+
+ if (util_format_fits_8unorm(format_desc) &&
+ type.floating && type.width == 32 && type.length == 4) {
+ struct lp_type tmp_type;
+ LLVMValueRef tmp;
+
+ memset(&tmp_type, 0, sizeof tmp_type);
+ tmp_type.width = 8;
+ tmp_type.length = type.length * 4;
+ tmp_type.norm = TRUE;
+
+ tmp = lp_build_fetch_rgba_aos(gallivm, format_desc, tmp_type,
+ base_ptr, offset, i, j);
+
+ lp_build_rgba8_to_f32_soa(gallivm,
+ type,
+ tmp,
+ rgba_out);
+
+ return;
+ }
+
+ /*
+ * Fallback to calling lp_build_fetch_rgba_aos for each pixel.
+ *
+ * This is not the most efficient way of fetching pixels, as we
+ * miss some opportunities to do vectorization, but this is
+ * convenient for formats or scenarios for which there was no
+ * opportunity or incentive to optimize.
+ */
+
+ {
unsigned k, chan;
+ struct lp_type tmp_type;
+
+ if (gallivm_debug & GALLIVM_DEBUG_PERF) {
+ debug_printf("%s: scalar unpacking of %s\n",
+ __FUNCTION__, format_desc->short_name);
+ }
- assert(type.floating);
+ tmp_type = type;
+ tmp_type.length = 4;
for (chan = 0; chan < 4; ++chan) {
- rgba_out[chan] = lp_build_undef(type);
+ rgba_out[chan] = lp_build_undef(gallivm, type);
}
/* loop over number of pixels */
for(k = 0; k < type.length; ++k) {
- LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), k, 0);
+ LLVMValueRef index = lp_build_const_int32(gallivm, k);
LLVMValueRef offset_elem;
- LLVMValueRef ptr;
LLVMValueRef i_elem, j_elem;
LLVMValueRef tmp;
- offset_elem = LLVMBuildExtractElement(builder, offset, index, "");
- ptr = LLVMBuildGEP(builder, base_ptr, &offset_elem, 1, "");
+ offset_elem = LLVMBuildExtractElement(builder, offset,
+ index, "");
i_elem = LLVMBuildExtractElement(builder, i, index, "");
j_elem = LLVMBuildExtractElement(builder, j, index, "");
/* Get a single float[4]={R,G,B,A} pixel */
- tmp = lp_build_fetch_rgba_aos(builder, format_desc, ptr,
+ tmp = lp_build_fetch_rgba_aos(gallivm, format_desc, tmp_type,
+ base_ptr, offset_elem,
i_elem, j_elem);
/*
* position = 'index'.
*/
for (chan = 0; chan < 4; ++chan) {
- LLVMValueRef chan_val = LLVMConstInt(LLVMInt32Type(), chan, 0),
+ LLVMValueRef chan_val = lp_build_const_int32(gallivm, chan),
tmp_chan = LLVMBuildExtractElement(builder, tmp, chan_val, "");
rgba_out[chan] = LLVMBuildInsertElement(builder, rgba_out[chan],
tmp_chan, index, "");