**************************************************************************/
+#include "pipe/p_defines.h"
+
#include "util/u_format.h"
#include "util/u_memory.h"
#include "util/u_string.h"
#include "lp_bld_type.h"
#include "lp_bld_const.h"
#include "lp_bld_conv.h"
-#include "lp_bld_sample.h" /* for lp_build_gather */
+#include "lp_bld_swizzle.h"
+#include "lp_bld_gather.h"
+#include "lp_bld_debug.h"
#include "lp_bld_format.h"
-
-
-static LLVMValueRef
-lp_build_format_swizzle_chan_soa(struct lp_type type,
- const LLVMValueRef *unswizzled,
- enum util_format_swizzle swizzle)
-{
- switch (swizzle) {
- case UTIL_FORMAT_SWIZZLE_X:
- case UTIL_FORMAT_SWIZZLE_Y:
- case UTIL_FORMAT_SWIZZLE_Z:
- case UTIL_FORMAT_SWIZZLE_W:
- return unswizzled[swizzle];
- case UTIL_FORMAT_SWIZZLE_0:
- return lp_build_zero(type);
- case UTIL_FORMAT_SWIZZLE_1:
- return lp_build_one(type);
- case UTIL_FORMAT_SWIZZLE_NONE:
- return lp_build_undef(type);
- default:
- assert(0);
- return lp_build_undef(type);
- }
-}
+#include "lp_bld_arit.h"
void
lp_build_format_swizzle_soa(const struct util_format_description *format_desc,
- struct lp_type type,
+ struct lp_build_context *bld,
const LLVMValueRef *unswizzled,
- LLVMValueRef *swizzled)
+ LLVMValueRef swizzled_out[4])
{
- if(format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) {
- enum util_format_swizzle swizzle = format_desc->swizzle[0];
- LLVMValueRef depth = lp_build_format_swizzle_chan_soa(type, unswizzled, swizzle);
- swizzled[2] = swizzled[1] = swizzled[0] = depth;
- swizzled[3] = lp_build_one(type);
+ assert(UTIL_FORMAT_SWIZZLE_0 == PIPE_SWIZZLE_ZERO);
+ assert(UTIL_FORMAT_SWIZZLE_1 == PIPE_SWIZZLE_ONE);
+
+ if (format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) {
+ enum util_format_swizzle swizzle;
+ LLVMValueRef depth_or_stencil;
+
+ if (util_format_has_stencil(format_desc) &&
+ !util_format_has_depth(format_desc)) {
+ assert(!bld->type.floating);
+ swizzle = format_desc->swizzle[1];
+ }
+ else {
+ assert(bld->type.floating);
+ swizzle = format_desc->swizzle[0];
+ }
+ /*
+ * Return zzz1 or sss1 for depth-stencil formats here.
+ * Correct swizzling will be handled by apply_sampler_swizzle() later.
+ */
+ depth_or_stencil = lp_build_swizzle_soa_channel(bld, unswizzled, swizzle);
+
+ swizzled_out[2] = swizzled_out[1] = swizzled_out[0] = depth_or_stencil;
+ swizzled_out[3] = bld->one;
}
else {
unsigned chan;
for (chan = 0; chan < 4; ++chan) {
enum util_format_swizzle swizzle = format_desc->swizzle[chan];
- swizzled[chan] = lp_build_format_swizzle_chan_soa(type, unswizzled, swizzle);
+ swizzled_out[chan] = lp_build_swizzle_soa_channel(bld, unswizzled, swizzle);
}
}
}
* It requires that a packed pixel fits into an element of the output
* channels. The common case is when converting pixel with a depth of 32 bit or
* less into floats.
+ *
+ * \param format_desc the format of the 'packed' incoming pixel vector
+ * \param type the desired type for rgba_out (type.length = n, above)
+ * \param packed the incoming vector of packed pixels
+ * \param rgba_out returns the SoA R,G,B,A vectors
*/
void
-lp_build_unpack_rgba_soa(LLVMBuilderRef builder,
+lp_build_unpack_rgba_soa(struct gallivm_state *gallivm,
const struct util_format_description *format_desc,
struct lp_type type,
LLVMValueRef packed,
- LLVMValueRef *rgba)
+ LLVMValueRef rgba_out[4])
{
+ LLVMBuilderRef builder = gallivm->builder;
+ struct lp_build_context bld;
LLVMValueRef inputs[4];
- unsigned start;
unsigned chan;
assert(format_desc->layout == UTIL_FORMAT_LAYOUT_PLAIN);
assert(format_desc->block.height == 1);
assert(format_desc->block.bits <= type.width);
/* FIXME: Support more output types */
- assert(type.floating);
assert(type.width == 32);
+ lp_build_context_init(&bld, gallivm, type);
+
/* Decode the input vector components */
- start = 0;
for (chan = 0; chan < format_desc->nr_channels; ++chan) {
- unsigned width = format_desc->channel[chan].size;
- unsigned stop = start + width;
+ const unsigned width = format_desc->channel[chan].size;
+ const unsigned start = format_desc->channel[chan].shift;
+ const unsigned stop = start + width;
LLVMValueRef input;
input = packed;
switch(format_desc->channel[chan].type) {
case UTIL_FORMAT_TYPE_VOID:
- input = lp_build_undef(type);
+ input = lp_build_undef(gallivm, type);
break;
case UTIL_FORMAT_TYPE_UNSIGNED:
*/
if (start) {
- input = LLVMBuildLShr(builder, input, lp_build_const_int_vec(type, start), "");
+ input = LLVMBuildLShr(builder, input, lp_build_const_int_vec(gallivm, type, start), "");
}
/*
if (stop < format_desc->block.bits) {
unsigned mask = ((unsigned long long)1 << width) - 1;
- input = LLVMBuildAnd(builder, input, lp_build_const_int_vec(type, mask), "");
+ input = LLVMBuildAnd(builder, input, lp_build_const_int_vec(gallivm, type, mask), "");
}
/*
*/
if (type.floating) {
- if(format_desc->channel[chan].normalized)
- input = lp_build_unsigned_norm_to_float(builder, width, type, input);
- else
- input = LLVMBuildSIToFP(builder, input, lp_build_vec_type(type), "");
+ if (format_desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB) {
+ if (format_desc->swizzle[3] == chan) {
+ input = lp_build_unsigned_norm_to_float(gallivm, width, type, input);
+ }
+ else {
+ struct lp_type conv_type = lp_uint_type(type);
+ input = lp_build_srgb_to_linear(gallivm, conv_type, width, input);
+ }
+ }
+ else {
+ if(format_desc->channel[chan].normalized)
+ input = lp_build_unsigned_norm_to_float(gallivm, width, type, input);
+ else
+ input = LLVMBuildSIToFP(builder, input,
+ lp_build_vec_type(gallivm, type), "");
+ }
}
- else {
- /* FIXME */
- assert(0);
- input = lp_build_undef(type);
+ else if (format_desc->channel[chan].pure_integer) {
+ /* Nothing to do */
+ } else {
+ /* FIXME */
+ assert(0);
}
break;
if (stop < type.width) {
unsigned bits = type.width - stop;
- LLVMValueRef bits_val = lp_build_const_int_vec(type, bits);
+ LLVMValueRef bits_val = lp_build_const_int_vec(gallivm, type, bits);
input = LLVMBuildShl(builder, input, bits_val, "");
}
if (format_desc->channel[chan].size < type.width) {
unsigned bits = type.width - format_desc->channel[chan].size;
- LLVMValueRef bits_val = lp_build_const_int_vec(type, bits);
+ LLVMValueRef bits_val = lp_build_const_int_vec(gallivm, type, bits);
input = LLVMBuildAShr(builder, input, bits_val, "");
}
*/
if (type.floating) {
- input = LLVMBuildSIToFP(builder, input, lp_build_vec_type(type), "");
+ input = LLVMBuildSIToFP(builder, input, lp_build_vec_type(gallivm, type), "");
if (format_desc->channel[chan].normalized) {
double scale = 1.0 / ((1 << (format_desc->channel[chan].size - 1)) - 1);
- LLVMValueRef scale_val = lp_build_const_vec(type, scale);
- input = LLVMBuildMul(builder, input, scale_val, "");
+ LLVMValueRef scale_val = lp_build_const_vec(gallivm, type, scale);
+ input = LLVMBuildFMul(builder, input, scale_val, "");
+ /* the formula above will produce value below -1.0 for most negative
+ * value but everything seems happy with that hence disable for now */
+ if (0)
+ input = lp_build_max(&bld, input,
+ lp_build_const_vec(gallivm, type, -1.0f));
}
}
- else {
- /* FIXME */
- assert(0);
- input = lp_build_undef(type);
+ else if (format_desc->channel[chan].pure_integer) {
+ /* Nothing to do */
+ } else {
+ /* FIXME */
+ assert(0);
}
break;
assert(start == 0);
assert(stop == 32);
assert(type.width == 32);
- input = LLVMBuildBitCast(builder, input, lp_build_vec_type(type), "");
+ input = LLVMBuildBitCast(builder, input, lp_build_vec_type(gallivm, type), "");
}
else {
/* FIXME */
assert(0);
- input = lp_build_undef(type);
+ input = lp_build_undef(gallivm, type);
}
break;
case UTIL_FORMAT_TYPE_FIXED:
if (type.floating) {
double scale = 1.0 / ((1 << (format_desc->channel[chan].size/2)) - 1);
- LLVMValueRef scale_val = lp_build_const_vec(type, scale);
- input = LLVMBuildSIToFP(builder, input, lp_build_vec_type(type), "");
- input = LLVMBuildMul(builder, input, scale_val, "");
+ LLVMValueRef scale_val = lp_build_const_vec(gallivm, type, scale);
+ input = LLVMBuildSIToFP(builder, input, lp_build_vec_type(gallivm, type), "");
+ input = LLVMBuildFMul(builder, input, scale_val, "");
}
else {
/* FIXME */
assert(0);
- input = lp_build_undef(type);
+ input = lp_build_undef(gallivm, type);
}
break;
default:
assert(0);
- input = lp_build_undef(type);
+ input = lp_build_undef(gallivm, type);
break;
}
inputs[chan] = input;
-
- start = stop;
}
- lp_build_format_swizzle_soa(format_desc, type, inputs, rgba);
+ lp_build_format_swizzle_soa(format_desc, &bld, inputs, rgba_out);
+}
+
+
+/**
+ * Convert a vector of rgba8 values into 32bit wide SoA vectors.
+ *
+ * \param dst_type The desired return type. For pure integer formats
+ * this should be a 32bit wide int or uint vector type,
+ * otherwise a float vector type.
+ *
+ * \param packed The rgba8 values to pack.
+ *
+ * \param rgba The 4 SoA return vectors.
+ */
+void
+lp_build_rgba8_to_fi32_soa(struct gallivm_state *gallivm,
+ struct lp_type dst_type,
+ LLVMValueRef packed,
+ LLVMValueRef *rgba)
+{
+ LLVMBuilderRef builder = gallivm->builder;
+ LLVMValueRef mask = lp_build_const_int_vec(gallivm, dst_type, 0xff);
+ unsigned chan;
+
+ /* XXX technically shouldn't use that for uint dst_type */
+ packed = LLVMBuildBitCast(builder, packed,
+ lp_build_int_vec_type(gallivm, dst_type), "");
+
+ /* Decode the input vector components */
+ for (chan = 0; chan < 4; ++chan) {
+#ifdef PIPE_ARCH_LITTLE_ENDIAN
+ unsigned start = chan*8;
+#else
+ unsigned start = (3-chan)*8;
+#endif
+ unsigned stop = start + 8;
+ LLVMValueRef input;
+
+ input = packed;
+
+ if (start)
+ input = LLVMBuildLShr(builder, input,
+ lp_build_const_int_vec(gallivm, dst_type, start), "");
+
+ if (stop < 32)
+ input = LLVMBuildAnd(builder, input, mask, "");
+
+ if (dst_type.floating)
+ input = lp_build_unsigned_norm_to_float(gallivm, 8, dst_type, input);
+
+ rgba[chan] = input;
+ }
}
+
/**
- * Fetch a pixel into a SoA.
+ * Fetch a texels from a texture, returning them in SoA layout.
*
- * i and j are the sub-block pixel coordinates.
+ * \param type the desired return type for 'rgba'. The vector length
+ * is the number of texels to fetch
+ *
+ * \param base_ptr points to the base of the texture mip tree.
+ * \param offset offset to start of the texture image block. For non-
+ * compressed formats, this simply is an offset to the texel.
+ * For compressed formats, it is an offset to the start of the
+ * compressed data block.
+ *
+ * \param i, j the sub-block pixel coordinates. For non-compressed formats
+ * these will always be (0,0). For compressed formats, i will
+ * be in [0, block_width-1] and j will be in [0, block_height-1].
*/
void
-lp_build_fetch_rgba_soa(LLVMBuilderRef builder,
+lp_build_fetch_rgba_soa(struct gallivm_state *gallivm,
const struct util_format_description *format_desc,
struct lp_type type,
LLVMValueRef base_ptr,
LLVMValueRef offset,
LLVMValueRef i,
LLVMValueRef j,
- LLVMValueRef *rgba)
+ LLVMValueRef rgba_out[4])
{
+ LLVMBuilderRef builder = gallivm->builder;
if (format_desc->layout == UTIL_FORMAT_LAYOUT_PLAIN &&
(format_desc->colorspace == UTIL_FORMAT_COLORSPACE_RGB ||
+ format_desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB ||
format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) &&
format_desc->block.width == 1 &&
format_desc->block.height == 1 &&
{
/*
* The packed pixel fits into an element of the destination format. Put
- * the packed pixels into a vector and estract each component for all
+ * the packed pixels into a vector and extract each component for all
* vector elements in parallel.
*/
/*
* gather the texels from the texture
+ * Ex: packed = {XYZW, XYZW, XYZW, XYZW}
*/
- packed = lp_build_gather(builder,
+ assert(format_desc->block.bits <= type.width);
+ packed = lp_build_gather(gallivm,
type.length,
format_desc->block.bits,
type.width,
- base_ptr, offset);
+ TRUE,
+ base_ptr, offset, FALSE);
/*
* convert texels to float rgba
*/
- lp_build_unpack_rgba_soa(builder,
+ lp_build_unpack_rgba_soa(gallivm,
format_desc,
type,
- packed, rgba);
+ packed, rgba_out);
+ return;
}
- else {
+
+ if (format_desc->format == PIPE_FORMAT_R11G11B10_FLOAT ||
+ format_desc->format == PIPE_FORMAT_R9G9B9E5_FLOAT) {
/*
- * Fallback to calling lp_build_fetch_rgba_aos for each pixel.
- *
- * This is not the most efficient way of fetching pixels, as
- * we miss some opportunities to do vectorization, but this it is a
- * convenient for formats or scenarios for which there was no opportunity
- * or incentive to optimize.
+ * similar conceptually to above but requiring special
+ * AoS packed -> SoA float conversion code.
*/
+ LLVMValueRef packed;
+ assert(type.floating);
+ assert(type.width == 32);
+
+ packed = lp_build_gather(gallivm, type.length,
+ format_desc->block.bits,
+ type.width, TRUE,
+ base_ptr, offset, FALSE);
+ if (format_desc->format == PIPE_FORMAT_R11G11B10_FLOAT) {
+ lp_build_r11g11b10_to_float(gallivm, packed, rgba_out);
+ }
+ else {
+ lp_build_rgb9e5_to_float(gallivm, packed, rgba_out);
+ }
+ return;
+ }
+
+ if (format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS &&
+ format_desc->block.bits == 64) {
+ /*
+ * special case the format is 64 bits but we only require
+ * 32bit (or 8bit) from each block.
+ */
+ LLVMValueRef packed;
+
+ if (format_desc->format == PIPE_FORMAT_X32_S8X24_UINT) {
+ /*
+ * for stencil simply fix up offsets - could in fact change
+ * base_ptr instead even outside the shader.
+ */
+ unsigned mask = (1 << 8) - 1;
+ LLVMValueRef s_offset = lp_build_const_int_vec(gallivm, type, 4);
+ offset = LLVMBuildAdd(builder, offset, s_offset, "");
+ packed = lp_build_gather(gallivm, type.length, 32, type.width,
+ TRUE, base_ptr, offset, FALSE);
+ packed = LLVMBuildAnd(builder, packed,
+ lp_build_const_int_vec(gallivm, type, mask), "");
+ }
+ else {
+ assert (format_desc->format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT);
+ packed = lp_build_gather(gallivm, type.length, 32, type.width,
+ TRUE, base_ptr, offset, TRUE);
+ packed = LLVMBuildBitCast(builder, packed,
+ lp_build_vec_type(gallivm, type), "");
+ }
+ /* for consistency with lp_build_unpack_rgba_soa() return sss1 or zzz1 */
+ rgba_out[0] = rgba_out[1] = rgba_out[2] = packed;
+ rgba_out[3] = lp_build_const_vec(gallivm, type, 1.0f);
+ return;
+ }
+
+ /*
+ * Try calling lp_build_fetch_rgba_aos for all pixels.
+ */
+
+ if (util_format_fits_8unorm(format_desc) &&
+ type.floating && type.width == 32 &&
+ (type.length == 1 || (type.length % 4 == 0))) {
+ struct lp_type tmp_type;
+ LLVMValueRef tmp;
+
+ memset(&tmp_type, 0, sizeof tmp_type);
+ tmp_type.width = 8;
+ tmp_type.length = type.length * 4;
+ tmp_type.norm = TRUE;
+
+ tmp = lp_build_fetch_rgba_aos(gallivm, format_desc, tmp_type,
+ TRUE, base_ptr, offset, i, j);
+
+ lp_build_rgba8_to_fi32_soa(gallivm,
+ type,
+ tmp,
+ rgba_out);
+
+ return;
+ }
+
+ /*
+ * Fallback to calling lp_build_fetch_rgba_aos for each pixel.
+ *
+ * This is not the most efficient way of fetching pixels, as we
+ * miss some opportunities to do vectorization, but this is
+ * convenient for formats or scenarios for which there was no
+ * opportunity or incentive to optimize.
+ */
+
+ {
unsigned k, chan;
+ struct lp_type tmp_type;
- assert(type.floating);
+ if (gallivm_debug & GALLIVM_DEBUG_PERF) {
+ debug_printf("%s: scalar unpacking of %s\n",
+ __FUNCTION__, format_desc->short_name);
+ }
+
+ tmp_type = type;
+ tmp_type.length = 4;
for (chan = 0; chan < 4; ++chan) {
- rgba[chan] = lp_build_undef(type);
+ rgba_out[chan] = lp_build_undef(gallivm, type);
}
+ /* loop over number of pixels */
for(k = 0; k < type.length; ++k) {
- LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), k, 0);
+ LLVMValueRef index = lp_build_const_int32(gallivm, k);
LLVMValueRef offset_elem;
- LLVMValueRef ptr;
LLVMValueRef i_elem, j_elem;
LLVMValueRef tmp;
- offset_elem = LLVMBuildExtractElement(builder, offset, index, "");
- ptr = LLVMBuildGEP(builder, base_ptr, &offset_elem, 1, "");
+ offset_elem = LLVMBuildExtractElement(builder, offset,
+ index, "");
i_elem = LLVMBuildExtractElement(builder, i, index, "");
j_elem = LLVMBuildExtractElement(builder, j, index, "");
- tmp = lp_build_fetch_rgba_aos(builder, format_desc, ptr, i_elem, j_elem);
+ /* Get a single float[4]={R,G,B,A} pixel */
+ tmp = lp_build_fetch_rgba_aos(gallivm, format_desc, tmp_type,
+ TRUE, base_ptr, offset_elem,
+ i_elem, j_elem);
/*
- * AoS to SoA
+ * Insert the AoS tmp value channels into the SoA result vectors at
+ * position = 'index'.
*/
-
for (chan = 0; chan < 4; ++chan) {
- LLVMValueRef chan_val = LLVMConstInt(LLVMInt32Type(), chan, 0),
+ LLVMValueRef chan_val = lp_build_const_int32(gallivm, chan),
tmp_chan = LLVMBuildExtractElement(builder, tmp, chan_val, "");
- rgba[chan] = LLVMBuildInsertElement(builder, rgba[chan], tmp_chan, index, "");
+ rgba_out[chan] = LLVMBuildInsertElement(builder, rgba_out[chan],
+ tmp_chan, index, "");
}
}
}