* @author Jose Fonseca <jfonseca@vmware.com>
*/
-
+#include <inttypes.h> /* for PRIx64 macro */
#include "util/u_debug.h"
#include "lp_bld_type.h"
#include "lp_bld_const.h"
+#include "lp_bld_init.h"
#include "lp_bld_logic.h"
#include "lp_bld_swizzle.h"
+#include "lp_bld_pack.h"
LLVMValueRef
-lp_build_broadcast(LLVMBuilderRef builder,
+lp_build_broadcast(struct gallivm_state *gallivm,
LLVMTypeRef vec_type,
LLVMValueRef scalar)
{
- const unsigned n = LLVMGetVectorSize(vec_type);
LLVMValueRef res;
- unsigned i;
- res = LLVMGetUndef(vec_type);
- for(i = 0; i < n; ++i) {
- LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0);
- res = LLVMBuildInsertElement(builder, res, scalar, index, "");
+ if (LLVMGetTypeKind(vec_type) != LLVMVectorTypeKind) {
+ /* scalar */
+ assert(vec_type == LLVMTypeOf(scalar));
+ res = scalar;
+ } else {
+ LLVMBuilderRef builder = gallivm->builder;
+ const unsigned length = LLVMGetVectorSize(vec_type);
+ LLVMValueRef undef = LLVMGetUndef(vec_type);
+ /* The shuffle vector is always made of int32 elements */
+ LLVMTypeRef i32_type = LLVMInt32TypeInContext(gallivm->context);
+ LLVMTypeRef i32_vec_type = LLVMVectorType(i32_type, length);
+
+ assert(LLVMGetElementType(vec_type) == LLVMTypeOf(scalar));
+
+ res = LLVMBuildInsertElement(builder, undef, scalar, LLVMConstNull(i32_type), "");
+ res = LLVMBuildShuffleVector(builder, res, undef, LLVMConstNull(i32_vec_type), "");
}
return res;
lp_build_broadcast_scalar(struct lp_build_context *bld,
LLVMValueRef scalar)
{
- const struct lp_type type = bld->type;
+ assert(lp_check_elem_type(bld->type, LLVMTypeOf(scalar)));
- assert(lp_check_elem_type(type, LLVMTypeOf(scalar)));
-
- if (type.length == 1) {
- return scalar;
- }
- else {
- LLVMValueRef res;
-
-#if HAVE_LLVM >= 0x207
- /* The shuffle vector is always made of int32 elements */
- struct lp_type i32_vec_type = lp_type_int_vec(32);
- i32_vec_type.length = type.length;
-
- res = LLVMBuildInsertElement(bld->builder, bld->undef, scalar,
- LLVMConstInt(LLVMInt32Type(), 0, 0), "");
- res = LLVMBuildShuffleVector(bld->builder, res, bld->undef,
- lp_build_const_int_vec(i32_vec_type, 0), "");
-#else
- /* XXX: The above path provokes a bug in LLVM 2.6 */
- unsigned i;
- res = bld->undef;
- for(i = 0; i < type.length; ++i) {
- LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0);
- res = LLVMBuildInsertElement(bld->builder, res, scalar, index, "");
- }
-#endif
- return res;
- }
+ return lp_build_broadcast(bld->gallivm, bld->vec_type, scalar);
}
/**
- * Combined extract and broadcast (or a mere shuffle when the two types match)
+ * Combined extract and broadcast (mere shuffle in most cases)
*/
LLVMValueRef
-lp_build_extract_broadcast(LLVMBuilderRef builder,
+lp_build_extract_broadcast(struct gallivm_state *gallivm,
struct lp_type src_type,
struct lp_type dst_type,
LLVMValueRef vector,
LLVMValueRef index)
{
- LLVMTypeRef i32t = LLVMInt32Type();
+ LLVMTypeRef i32t = LLVMInt32TypeInContext(gallivm->context);
LLVMValueRef res;
assert(src_type.floating == dst_type.floating);
* Broadcast scalar -> vector.
*/
- res = lp_build_broadcast(builder,
- lp_build_vec_type(dst_type),
+ res = lp_build_broadcast(gallivm,
+ lp_build_vec_type(gallivm, dst_type),
vector);
}
}
else {
- if (dst_type.length == src_type.length) {
+ if (dst_type.length > 1) {
/*
- * Special shuffle of the same size.
+ * shuffle - result can be of different length.
*/
LLVMValueRef shuffle;
- shuffle = lp_build_broadcast(builder,
+ shuffle = lp_build_broadcast(gallivm,
LLVMVectorType(i32t, dst_type.length),
index);
- res = LLVMBuildShuffleVector(builder, vector,
- LLVMGetUndef(lp_build_vec_type(dst_type)),
+ res = LLVMBuildShuffleVector(gallivm->builder, vector,
+ LLVMGetUndef(lp_build_vec_type(gallivm, src_type)),
shuffle, "");
}
else {
- LLVMValueRef scalar;
- scalar = LLVMBuildExtractElement(builder, vector, index, "");
- if (dst_type.length == 1) {
- /*
- * Trivial extract scalar from vector.
- */
-
- res = scalar;
- }
- else {
- /*
- * General case of different sized vectors.
- */
-
- res = lp_build_broadcast(builder,
- lp_build_vec_type(dst_type),
- vector);
- }
+ /*
+ * Trivial extract scalar from vector.
+ */
+ res = LLVMBuildExtractElement(gallivm->builder, vector, index, "");
}
}
/**
- * Swizzle one channel into all other three channels.
+ * Swizzle one channel into other channels.
*/
LLVMValueRef
lp_build_swizzle_scalar_aos(struct lp_build_context *bld,
LLVMValueRef a,
- unsigned channel)
+ unsigned channel,
+ unsigned num_channels)
{
+ LLVMBuilderRef builder = bld->gallivm->builder;
const struct lp_type type = bld->type;
const unsigned n = type.length;
unsigned i, j;
- if(a == bld->undef || a == bld->zero || a == bld->one)
+ if(a == bld->undef || a == bld->zero || a == bld->one || num_channels == 1)
return a;
+ assert(num_channels == 2 || num_channels == 4);
+
/* XXX: SSE3 has PSHUFB which should be better than bitmasks, but forcing
* using shuffles here actually causes worst results. More investigation is
* needed. */
- if (type.width >= 16) {
+ if (LLVMIsConstant(a) ||
+ type.width >= 16) {
/*
* Shuffle.
*/
- LLVMTypeRef elem_type = LLVMInt32Type();
+ LLVMTypeRef elem_type = LLVMInt32TypeInContext(bld->gallivm->context);
LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH];
- for(j = 0; j < n; j += 4)
- for(i = 0; i < 4; ++i)
+ for(j = 0; j < n; j += num_channels)
+ for(i = 0; i < num_channels; ++i)
shuffles[j + i] = LLVMConstInt(elem_type, j + channel, 0);
- return LLVMBuildShuffleVector(bld->builder, a, bld->undef, LLVMConstVector(shuffles, n), "");
+ return LLVMBuildShuffleVector(builder, a, bld->undef, LLVMConstVector(shuffles, n), "");
+ }
+ else if (num_channels == 2) {
+ /*
+ * Bit mask and shifts
+ *
+ * XY XY .... XY <= input
+ * 0Y 0Y .... 0Y
+ * YY YY .... YY
+ * YY YY .... YY <= output
+ */
+ struct lp_type type2;
+ LLVMValueRef tmp = NULL;
+ int shift;
+
+ a = LLVMBuildAnd(builder, a,
+ lp_build_const_mask_aos(bld->gallivm,
+ type, 1 << channel, num_channels), "");
+
+ type2 = type;
+ type2.floating = FALSE;
+ type2.width *= 2;
+ type2.length /= 2;
+
+ a = LLVMBuildBitCast(builder, a, lp_build_vec_type(bld->gallivm, type2), "");
+
+ /*
+ * Vector element 0 is always channel X.
+ *
+ * 76 54 32 10 (array numbering)
+ * Little endian reg in: YX YX YX YX
+ * Little endian reg out: YY YY YY YY if shift right (shift == -1)
+ * XX XX XX XX if shift left (shift == 1)
+ *
+ * 01 23 45 67 (array numbering)
+ * Big endian reg in: XY XY XY XY
+ * Big endian reg out: YY YY YY YY if shift left (shift == 1)
+ * XX XX XX XX if shift right (shift == -1)
+ *
+ */
+#ifdef PIPE_ARCH_LITTLE_ENDIAN
+ shift = channel == 0 ? 1 : -1;
+#else
+ shift = channel == 0 ? -1 : 1;
+#endif
+
+ if (shift > 0) {
+ tmp = LLVMBuildShl(builder, a, lp_build_const_int_vec(bld->gallivm, type2, shift * type.width), "");
+ } else if (shift < 0) {
+ tmp = LLVMBuildLShr(builder, a, lp_build_const_int_vec(bld->gallivm, type2, -shift * type.width), "");
+ }
+
+ assert(tmp);
+ if (tmp) {
+ a = LLVMBuildOr(builder, a, tmp, "");
+ }
+
+ return LLVMBuildBitCast(builder, a, lp_build_vec_type(bld->gallivm, type), "");
}
else {
/*
* Bit mask and recursive shifts
*
+ * Little-endian registers:
+ *
+ * 7654 3210
+ * WZYX WZYX .... WZYX <= input
+ * 00Y0 00Y0 .... 00Y0 <= mask
+ * 00YY 00YY .... 00YY <= shift right 1 (shift amount -1)
+ * YYYY YYYY .... YYYY <= shift left 2 (shift amount 2)
+ *
+ * Big-endian registers:
+ *
+ * 0123 4567
* XYZW XYZW .... XYZW <= input
- * 0Y00 0Y00 .... 0Y00
- * YY00 YY00 .... YY00
- * YYYY YYYY .... YYYY <= output
+ * 0Y00 0Y00 .... 0Y00 <= mask
+ * YY00 YY00 .... YY00 <= shift left 1 (shift amount 1)
+ * YYYY YYYY .... YYYY <= shift right 2 (shift amount -2)
+ *
+ * shifts[] gives little-endian shift amounts; we need to negate for big-endian.
*/
struct lp_type type4;
- const char shifts[4][2] = {
+ const int shifts[4][2] = {
{ 1, 2},
{-1, 2},
{ 1, -2},
};
unsigned i;
- a = LLVMBuildAnd(bld->builder, a,
- lp_build_const_mask_aos(type, 1 << channel), "");
+ a = LLVMBuildAnd(builder, a,
+ lp_build_const_mask_aos(bld->gallivm,
+ type, 1 << channel, 4), "");
/*
* Build a type where each element is an integer that cover the four
type4.width *= 4;
type4.length /= 4;
- a = LLVMBuildBitCast(bld->builder, a, lp_build_vec_type(type4), "");
+ a = LLVMBuildBitCast(builder, a, lp_build_vec_type(bld->gallivm, type4), "");
for(i = 0; i < 2; ++i) {
LLVMValueRef tmp = NULL;
int shift = shifts[channel][i];
-#ifdef PIPE_ARCH_LITTLE_ENDIAN
+ /* See endianness diagram above */
+#ifdef PIPE_ARCH_BIG_ENDIAN
shift = -shift;
#endif
if(shift > 0)
- tmp = LLVMBuildLShr(bld->builder, a, lp_build_const_int_vec(type4, shift*type.width), "");
+ tmp = LLVMBuildShl(builder, a, lp_build_const_int_vec(bld->gallivm, type4, shift*type.width), "");
if(shift < 0)
- tmp = LLVMBuildShl(bld->builder, a, lp_build_const_int_vec(type4, -shift*type.width), "");
+ tmp = LLVMBuildLShr(builder, a, lp_build_const_int_vec(bld->gallivm, type4, -shift*type.width), "");
assert(tmp);
if(tmp)
- a = LLVMBuildOr(bld->builder, a, tmp, "");
+ a = LLVMBuildOr(builder, a, tmp, "");
}
- return LLVMBuildBitCast(bld->builder, a, lp_build_vec_type(type), "");
+ return LLVMBuildBitCast(builder, a, lp_build_vec_type(bld->gallivm, type), "");
}
}
+/**
+ * Swizzle a vector consisting of an array of XYZW structs.
+ *
+ * This fills a vector of dst_len length with the swizzled channels from src.
+ *
+ * e.g. with swizzles = { 2, 1, 0 } and swizzle_count = 6 results in
+ * RGBA RGBA = BGR BGR BG
+ *
+ * @param swizzles the swizzle array
+ * @param num_swizzles the number of elements in swizzles
+ * @param dst_len the length of the result
+ */
+LLVMValueRef
+lp_build_swizzle_aos_n(struct gallivm_state* gallivm,
+ LLVMValueRef src,
+ const unsigned char* swizzles,
+ unsigned num_swizzles,
+ unsigned dst_len)
+{
+ LLVMBuilderRef builder = gallivm->builder;
+ LLVMValueRef shuffles[LP_MAX_VECTOR_WIDTH];
+ unsigned i;
+
+ assert(dst_len < LP_MAX_VECTOR_WIDTH);
+
+ for (i = 0; i < dst_len; ++i) {
+ int swizzle = swizzles[i % num_swizzles];
+
+ if (swizzle == LP_BLD_SWIZZLE_DONTCARE) {
+ shuffles[i] = LLVMGetUndef(LLVMInt32TypeInContext(gallivm->context));
+ } else {
+ shuffles[i] = lp_build_const_int32(gallivm, swizzle);
+ }
+ }
+
+ return LLVMBuildShuffleVector(builder, src, LLVMGetUndef(LLVMTypeOf(src)), LLVMConstVector(shuffles, dst_len), "");
+}
+
+
LLVMValueRef
lp_build_swizzle_aos(struct lp_build_context *bld,
LLVMValueRef a,
const unsigned char swizzles[4])
{
+ LLVMBuilderRef builder = bld->gallivm->builder;
const struct lp_type type = bld->type;
const unsigned n = type.length;
unsigned i, j;
case PIPE_SWIZZLE_GREEN:
case PIPE_SWIZZLE_BLUE:
case PIPE_SWIZZLE_ALPHA:
- return lp_build_swizzle_scalar_aos(bld, a, swizzles[0]);
+ return lp_build_swizzle_scalar_aos(bld, a, swizzles[0], 4);
case PIPE_SWIZZLE_ZERO:
return bld->zero;
case PIPE_SWIZZLE_ONE:
return bld->one;
+ case LP_BLD_SWIZZLE_DONTCARE:
+ return bld->undef;
default:
assert(0);
return bld->undef;
}
}
- if (type.width >= 16) {
+ if (LLVMIsConstant(a) ||
+ type.width >= 16) {
/*
* Shuffle.
*/
- LLVMValueRef undef = LLVMGetUndef(lp_build_elem_type(type));
- LLVMTypeRef i32t = LLVMInt32Type();
+ LLVMValueRef undef = LLVMGetUndef(lp_build_elem_type(bld->gallivm, type));
+ LLVMTypeRef i32t = LLVMInt32TypeInContext(bld->gallivm->context);
LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH];
LLVMValueRef aux[LP_MAX_VECTOR_LENGTH];
case PIPE_SWIZZLE_BLUE:
case PIPE_SWIZZLE_ALPHA:
shuffle = j + swizzles[i];
+ shuffles[j + i] = LLVMConstInt(i32t, shuffle, 0);
break;
case PIPE_SWIZZLE_ZERO:
shuffle = type.length + 0;
+ shuffles[j + i] = LLVMConstInt(i32t, shuffle, 0);
if (!aux[0]) {
- aux[0] = lp_build_const_elem(type, 0.0);
+ aux[0] = lp_build_const_elem(bld->gallivm, type, 0.0);
}
break;
case PIPE_SWIZZLE_ONE:
shuffle = type.length + 1;
+ shuffles[j + i] = LLVMConstInt(i32t, shuffle, 0);
if (!aux[1]) {
- aux[1] = lp_build_const_elem(type, 1.0);
+ aux[1] = lp_build_const_elem(bld->gallivm, type, 1.0);
}
break;
+ case LP_BLD_SWIZZLE_DONTCARE:
+ shuffles[j + i] = LLVMGetUndef(i32t);
+ break;
}
- shuffles[j + i] = LLVMConstInt(i32t, shuffle, 0);
}
}
}
}
- return LLVMBuildShuffleVector(bld->builder, a,
+ return LLVMBuildShuffleVector(builder, a,
LLVMConstVector(aux, n),
LLVMConstVector(shuffles, n), "");
} else {
*
* For example, this will convert BGRA to RGBA by doing
*
+ * Little endian:
* rgba = (bgra & 0x00ff0000) >> 16
* | (bgra & 0xff00ff00)
* | (bgra & 0x000000ff) << 16
*
+ * Big endian:A
+ * rgba = (bgra & 0x0000ff00) << 16
+ * | (bgra & 0x00ff00ff)
+ * | (bgra & 0xff000000) >> 16
+ *
* This is necessary not only for faster cause, but because X86 backend
* will refuse shuffles of <4 x i8> vectors
*/
cond |= 1 << chan;
}
}
- res = lp_build_select_aos(bld, cond, bld->one, bld->zero);
+ res = lp_build_select_aos(bld, cond, bld->one, bld->zero, 4);
/*
* Build a type where each element is an integer that cover the four
type4.width *= 4;
type4.length /= 4;
- a = LLVMBuildBitCast(bld->builder, a, lp_build_vec_type(type4), "");
- res = LLVMBuildBitCast(bld->builder, res, lp_build_vec_type(type4), "");
+ a = LLVMBuildBitCast(builder, a, lp_build_vec_type(bld->gallivm, type4), "");
+ res = LLVMBuildBitCast(builder, res, lp_build_vec_type(bld->gallivm, type4), "");
/*
* Mask and shift the channels, trying to group as many channels in the
- * same shift as possible
+ * same shift as possible. The shift amount is positive for shifts left
+ * and negative for shifts right.
*/
for (shift = -3; shift <= 3; ++shift) {
- unsigned long long mask = 0;
+ uint64_t mask = 0;
assert(type4.width <= sizeof(mask)*8);
+ /*
+ * Vector element numbers follow the XYZW order, so 0 is always X, etc.
+ * After widening 4 times we have:
+ *
+ * 3210
+ * Little-endian register layout: WZYX
+ *
+ * 0123
+ * Big-endian register layout: XYZW
+ *
+ * For little-endian, higher-numbered channels are obtained by a shift right
+ * (negative shift amount) and lower-numbered channels by a shift left
+ * (positive shift amount). The opposite is true for big-endian.
+ */
for (chan = 0; chan < 4; ++chan) {
- /* FIXME: big endian */
- if (swizzles[chan] < 4 &&
- chan - swizzles[chan] == shift) {
- mask |= ((1ULL << type.width) - 1) << (swizzles[chan] * type.width);
+ if (swizzles[chan] < 4) {
+ /* We need to move channel swizzles[chan] into channel chan */
+#ifdef PIPE_ARCH_LITTLE_ENDIAN
+ if (swizzles[chan] - chan == -shift) {
+ mask |= ((1ULL << type.width) - 1) << (swizzles[chan] * type.width);
+ }
+#else
+ if (swizzles[chan] - chan == shift) {
+ mask |= ((1ULL << type.width) - 1) << (type4.width - type.width) >> (swizzles[chan] * type.width);
+ }
+#endif
}
}
if (mask) {
LLVMValueRef masked;
LLVMValueRef shifted;
-
if (0)
- debug_printf("shift = %i, mask = 0x%08llx\n", shift, mask);
+ debug_printf("shift = %i, mask = %" PRIx64 "\n", shift, mask);
- masked = LLVMBuildAnd(bld->builder, a,
- lp_build_const_int_vec(type4, mask), "");
+ masked = LLVMBuildAnd(builder, a,
+ lp_build_const_int_vec(bld->gallivm, type4, mask), "");
if (shift > 0) {
- shifted = LLVMBuildShl(bld->builder, masked,
- lp_build_const_int_vec(type4, shift*type.width), "");
+ shifted = LLVMBuildShl(builder, masked,
+ lp_build_const_int_vec(bld->gallivm, type4, shift*type.width), "");
} else if (shift < 0) {
- shifted = LLVMBuildLShr(bld->builder, masked,
- lp_build_const_int_vec(type4, -shift*type.width), "");
+ shifted = LLVMBuildLShr(builder, masked,
+ lp_build_const_int_vec(bld->gallivm, type4, -shift*type.width), "");
} else {
shifted = masked;
}
- res = LLVMBuildOr(bld->builder, res, shifted, "");
+ res = LLVMBuildOr(builder, res, shifted, "");
}
}
- return LLVMBuildBitCast(bld->builder, res, lp_build_vec_type(type), "");
+ return LLVMBuildBitCast(builder, res,
+ lp_build_vec_type(bld->gallivm, type), "");
}
}
lp_build_swizzle_soa(bld, unswizzled, swizzles, values);
}
+
+
+/**
+ * Transpose from AOS <-> SOA
+ *
+ * @param single_type_lp type of pixels
+ * @param src the 4 * n pixel input
+ * @param dst the 4 * n pixel output
+ */
+void
+lp_build_transpose_aos(struct gallivm_state *gallivm,
+ struct lp_type single_type_lp,
+ const LLVMValueRef src[4],
+ LLVMValueRef dst[4])
+{
+ struct lp_type double_type_lp = single_type_lp;
+ LLVMTypeRef single_type;
+ LLVMTypeRef double_type;
+ LLVMValueRef t0, t1, t2, t3;
+
+ double_type_lp.length >>= 1;
+ double_type_lp.width <<= 1;
+
+ double_type = lp_build_vec_type(gallivm, double_type_lp);
+ single_type = lp_build_vec_type(gallivm, single_type_lp);
+
+ /* Interleave x, y, z, w -> xy and zw */
+ t0 = lp_build_interleave2_half(gallivm, single_type_lp, src[0], src[1], 0);
+ t1 = lp_build_interleave2_half(gallivm, single_type_lp, src[2], src[3], 0);
+ t2 = lp_build_interleave2_half(gallivm, single_type_lp, src[0], src[1], 1);
+ t3 = lp_build_interleave2_half(gallivm, single_type_lp, src[2], src[3], 1);
+
+ /* Cast to double width type for second interleave */
+ t0 = LLVMBuildBitCast(gallivm->builder, t0, double_type, "t0");
+ t1 = LLVMBuildBitCast(gallivm->builder, t1, double_type, "t1");
+ t2 = LLVMBuildBitCast(gallivm->builder, t2, double_type, "t2");
+ t3 = LLVMBuildBitCast(gallivm->builder, t3, double_type, "t3");
+
+ /* Interleave xy, zw -> xyzw */
+ dst[0] = lp_build_interleave2_half(gallivm, double_type_lp, t0, t1, 0);
+ dst[1] = lp_build_interleave2_half(gallivm, double_type_lp, t0, t1, 1);
+ dst[2] = lp_build_interleave2_half(gallivm, double_type_lp, t2, t3, 0);
+ dst[3] = lp_build_interleave2_half(gallivm, double_type_lp, t2, t3, 1);
+
+ /* Cast back to original single width type */
+ dst[0] = LLVMBuildBitCast(gallivm->builder, dst[0], single_type, "dst0");
+ dst[1] = LLVMBuildBitCast(gallivm->builder, dst[1], single_type, "dst1");
+ dst[2] = LLVMBuildBitCast(gallivm->builder, dst[2], single_type, "dst2");
+ dst[3] = LLVMBuildBitCast(gallivm->builder, dst[3], single_type, "dst3");
+}
+
+
+/**
+ * Transpose from AOS <-> SOA for num_srcs
+ */
+void
+lp_build_transpose_aos_n(struct gallivm_state *gallivm,
+ struct lp_type type,
+ const LLVMValueRef* src,
+ unsigned num_srcs,
+ LLVMValueRef* dst)
+{
+ switch (num_srcs) {
+ case 1:
+ dst[0] = src[0];
+ break;
+
+ case 2:
+ {
+ /* Note: we must use a temporary incase src == dst */
+ LLVMValueRef lo, hi;
+
+ lo = lp_build_interleave2_half(gallivm, type, src[0], src[1], 0);
+ hi = lp_build_interleave2_half(gallivm, type, src[0], src[1], 1);
+
+ dst[0] = lo;
+ dst[1] = hi;
+ break;
+ }
+
+ case 4:
+ lp_build_transpose_aos(gallivm, type, src, dst);
+ break;
+
+ default:
+ assert(0);
+ };
+}
+
+
+/**
+ * Pack n-th element of aos values,
+ * pad out to destination size.
+ * i.e. x1 y1 _ _ x2 y2 _ _ will become x1 x2 _ _
+ */
+LLVMValueRef
+lp_build_pack_aos_scalars(struct gallivm_state *gallivm,
+ struct lp_type src_type,
+ struct lp_type dst_type,
+ const LLVMValueRef src,
+ unsigned channel)
+{
+ LLVMTypeRef i32t = LLVMInt32TypeInContext(gallivm->context);
+ LLVMValueRef undef = LLVMGetUndef(i32t);
+ LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH];
+ unsigned num_src = src_type.length / 4;
+ unsigned num_dst = dst_type.length;
+ unsigned i;
+
+ assert(num_src <= num_dst);
+
+ for (i = 0; i < num_src; i++) {
+ shuffles[i] = LLVMConstInt(i32t, i * 4 + channel, 0);
+ }
+ for (i = num_src; i < num_dst; i++) {
+ shuffles[i] = undef;
+ }
+
+ if (num_dst == 1) {
+ return LLVMBuildExtractElement(gallivm->builder, src, shuffles[0], "");
+ }
+ else {
+ return LLVMBuildShuffleVector(gallivm->builder, src, src,
+ LLVMConstVector(shuffles, num_dst), "");
+ }
+}
+
+
+/**
+ * Unpack and broadcast packed aos values consisting of only the
+ * first value, i.e. x1 x2 _ _ will become x1 x1 x1 x1 x2 x2 x2 x2
+ */
+LLVMValueRef
+lp_build_unpack_broadcast_aos_scalars(struct gallivm_state *gallivm,
+ struct lp_type src_type,
+ struct lp_type dst_type,
+ const LLVMValueRef src)
+{
+ LLVMTypeRef i32t = LLVMInt32TypeInContext(gallivm->context);
+ LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH];
+ unsigned num_dst = dst_type.length;
+ unsigned num_src = dst_type.length / 4;
+ unsigned i;
+
+ assert(num_dst / 4 <= src_type.length);
+
+ for (i = 0; i < num_src; i++) {
+ shuffles[i*4] = LLVMConstInt(i32t, i, 0);
+ shuffles[i*4+1] = LLVMConstInt(i32t, i, 0);
+ shuffles[i*4+2] = LLVMConstInt(i32t, i, 0);
+ shuffles[i*4+3] = LLVMConstInt(i32t, i, 0);
+ }
+
+ if (num_src == 1) {
+ return lp_build_extract_broadcast(gallivm, src_type, dst_type,
+ src, shuffles[0]);
+ }
+ else {
+ return LLVMBuildShuffleVector(gallivm->builder, src, src,
+ LLVMConstVector(shuffles, num_dst), "");
+ }
+}
+