*
**************************************************************************/
+/**
+ * @file
+ * Helper functions for swizzling/shuffling.
+ *
+ * @author Jose Fonseca <jfonseca@vmware.com>
+ */
+
#include "util/u_debug.h"
#include "lp_bld_type.h"
#include "lp_bld_const.h"
+#include "lp_bld_logic.h"
#include "lp_bld_swizzle.h"
+LLVMValueRef
+lp_build_broadcast(LLVMBuilderRef builder,
+ LLVMTypeRef vec_type,
+ LLVMValueRef scalar)
+{
+ const unsigned n = LLVMGetVectorSize(vec_type);
+ LLVMValueRef res;
+ unsigned i;
+
+ res = LLVMGetUndef(vec_type);
+ for(i = 0; i < n; ++i) {
+ LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0);
+ res = LLVMBuildInsertElement(builder, res, scalar, index, "");
+ }
+
+ return res;
+}
+
+
+LLVMValueRef
+lp_build_broadcast_scalar(struct lp_build_context *bld,
+ LLVMValueRef scalar)
+{
+ const struct lp_type type = bld->type;
+ LLVMValueRef res;
+ unsigned i;
+
+ res = bld->undef;
+ for(i = 0; i < type.length; ++i) {
+ LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0);
+ res = LLVMBuildInsertElement(bld->builder, res, scalar, index, "");
+ }
+
+ return res;
+}
+
+
LLVMValueRef
lp_build_broadcast_aos(struct lp_build_context *bld,
LLVMValueRef a,
unsigned channel)
{
- const union lp_type type = bld->type;
+ const struct lp_type type = bld->type;
const unsigned n = type.length;
unsigned i, j;
if(a == bld->undef || a == bld->zero || a == bld->one)
return a;
+ /* XXX: SSE3 has PSHUFB which should be better than bitmasks, but forcing
+ * using shuffles here actually causes worst results. More investigation is
+ * needed. */
if (n <= 4) {
/*
* Shuffle.
/*
* Bit mask and recursive shifts
*
- * XYZW XYZW .... XYZW
- * _Y__ _Y__ .... _Y__
- * YY_ YY__ .... YY__
- * YYYY YYYY .... YYYY
+ * XYZW XYZW .... XYZW <= input
+ * 0Y00 0Y00 .... 0Y00
+ * YY00 YY00 .... YY00
+ * YYYY YYYY .... YYYY <= output
*/
- union lp_type type4 = type;
+ struct lp_type type4 = type;
const char shifts[4][2] = {
{ 1, 2},
{-1, 2},
#endif
if(shift > 0)
- tmp = LLVMBuildLShr(bld->builder, a, lp_build_const_shift(type4, shift*type.width), "");
+ tmp = LLVMBuildLShr(bld->builder, a, lp_build_int_const_scalar(type4, shift*type.width), "");
if(shift < 0)
- tmp = LLVMBuildShl(bld->builder, a, lp_build_const_shift(type4, -shift*type.width), "");
+ tmp = LLVMBuildShl(bld->builder, a, lp_build_int_const_scalar(type4, -shift*type.width), "");
assert(tmp);
if(tmp)
}
-LLVMValueRef
-lp_build_select_aos(struct lp_build_context *bld,
- LLVMValueRef a,
- LLVMValueRef b,
- boolean cond[4])
-{
- const union lp_type type = bld->type;
- const unsigned n = type.length;
- unsigned i, j;
-
- if(a == b)
- return a;
- if(cond[0] && cond[1] && cond[2] && cond[3])
- return a;
- if(!cond[0] && !cond[1] && !cond[2] && !cond[3])
- return b;
- if(a == bld->undef || b == bld->undef)
- return bld->undef;
-
- /*
- * There are three major ways of accomplishing this:
- * - with a shuffle,
- * - with a select,
- * - or with a bit mask.
- *
- * Select isn't supported for vector types yet.
- * The flip between these is empirical and might need to be.
- */
- if (n <= 4) {
- /*
- * Shuffle.
- */
- LLVMTypeRef elem_type = LLVMInt32Type();
- LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH];
-
- for(j = 0; j < n; j += 4)
- for(i = 0; i < 4; ++i)
- shuffles[j + i] = LLVMConstInt(elem_type, (cond[i] ? 0 : n) + j + i, 0);
-
- return LLVMBuildShuffleVector(bld->builder, a, b, LLVMConstVector(shuffles, n), "");
- }
-#if 0
- else if(0) {
- /* FIXME: Unfortunately select of vectors do not work */
- /* Use a select */
- LLVMTypeRef elem_type = LLVMInt1Type();
- LLVMValueRef cond[LP_MAX_VECTOR_LENGTH];
-
- for(j = 0; j < n; j += 4)
- for(i = 0; i < 4; ++i)
- cond[j + i] = LLVMConstInt(elem_type, cond[i] ? 1 : 0, 0);
-
- return LLVMBuildSelect(bld->builder, LLVMConstVector(cond, n), a, b, "");
- }
-#endif
- else {
- LLVMValueRef mask = lp_build_const_mask_aos(type, cond);
-
- /* TODO: On SSE4 we could do this with a single instruction -- PBLENDVB */
-
- a = LLVMBuildAnd(bld->builder, a, mask, "");
-
- /* This often gets translated to PANDN, but sometimes the NOT is
- * pre-computed and stored in another constant. The best strategy depends
- * on available registers, so it is not a big deal -- hopefully LLVM does
- * the right decision attending the rest of the program.
- */
- b = LLVMBuildAnd(bld->builder, b, LLVMBuildNot(bld->builder, mask, ""), "");
-
- return LLVMBuildOr(bld->builder, a, b, "");
- }
-}
-
-
LLVMValueRef
lp_build_swizzle1_aos(struct lp_build_context *bld,
LLVMValueRef a,
- unsigned char swizzle[4])
+ const unsigned char swizzle[4])
{
const unsigned n = bld->type.length;
unsigned i, j;
lp_build_swizzle2_aos(struct lp_build_context *bld,
LLVMValueRef a,
LLVMValueRef b,
- unsigned char swizzle[4])
+ const unsigned char swizzle[4])
{
const unsigned n = bld->type.length;
unsigned i, j;
return lp_build_swizzle1_aos(bld, a, swizzle);
if(a == b) {
- swizzle[0] %= 4;
- swizzle[1] %= 4;
- swizzle[2] %= 4;
- swizzle[3] %= 4;
- return lp_build_swizzle1_aos(bld, a, swizzle);
+ unsigned char swizzle1[4];
+ swizzle1[0] = swizzle[0] % 4;
+ swizzle1[1] = swizzle[1] % 4;
+ swizzle1[2] = swizzle[2] % 4;
+ swizzle1[3] = swizzle[3] % 4;
+ return lp_build_swizzle1_aos(bld, a, swizzle1);
}
if(swizzle[0] % 4 == 0 &&