#include "lp_bld_type.h"
#include "lp_bld_const.h"
+#include "lp_bld_init.h"
#include "lp_bld_intr.h"
#include "lp_bld_debug.h"
#include "lp_bld_logic.h"
*
* select <4 x i1> %C, %A, %B
*
- * is valid IR (e.g. llvm/test/Assembler/vector-select.ll), but it is not
- * supported on any backend.
+ * is valid IR (e.g. llvm/test/Assembler/vector-select.ll), but it is only
+ * supported on some backends (x86) starting with llvm 3.1.
*
* Expanding the boolean vector to full SIMD register width, as in
*
/**
* Build code to compare two values 'a' and 'b' of 'type' using the given func.
* \param func one of PIPE_FUNC_x
+ * If the ordered argument is true the function will use LLVM's ordered
+ * comparisons, otherwise unordered comparisons will be used.
* The result values will be 0 for false or ~0 for true.
*/
-LLVMValueRef
-lp_build_compare(LLVMBuilderRef builder,
- const struct lp_type type,
- unsigned func,
- LLVMValueRef a,
- LLVMValueRef b)
+static LLVMValueRef
+lp_build_compare_ext(struct gallivm_state *gallivm,
+ const struct lp_type type,
+ unsigned func,
+ LLVMValueRef a,
+ LLVMValueRef b,
+ boolean ordered)
{
- LLVMTypeRef int_vec_type = lp_build_int_vec_type(type);
+ LLVMBuilderRef builder = gallivm->builder;
+ LLVMTypeRef int_vec_type = lp_build_int_vec_type(gallivm, type);
LLVMValueRef zeros = LLVMConstNull(int_vec_type);
LLVMValueRef ones = LLVMConstAllOnes(int_vec_type);
LLVMValueRef cond;
if(func == PIPE_FUNC_ALWAYS)
return ones;
- /* TODO: optimize the constant case */
-
- /* XXX: It is not clear if we should use the ordered or unordered operators */
-
-#if HAVE_LLVM < 0x0207
-#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
- if(type.width * type.length == 128) {
- if(type.floating && util_cpu_caps.has_sse) {
- /* float[4] comparison */
- LLVMTypeRef vec_type = lp_build_vec_type(type);
- LLVMValueRef args[3];
- unsigned cc;
- boolean swap;
-
- swap = FALSE;
- switch(func) {
- case PIPE_FUNC_EQUAL:
- cc = 0;
- break;
- case PIPE_FUNC_NOTEQUAL:
- cc = 4;
- break;
- case PIPE_FUNC_LESS:
- cc = 1;
- break;
- case PIPE_FUNC_LEQUAL:
- cc = 2;
- break;
- case PIPE_FUNC_GREATER:
- cc = 1;
- swap = TRUE;
- break;
- case PIPE_FUNC_GEQUAL:
- cc = 2;
- swap = TRUE;
- break;
- default:
- assert(0);
- return lp_build_undef(type);
- }
-
- if(swap) {
- args[0] = b;
- args[1] = a;
- }
- else {
- args[0] = a;
- args[1] = b;
- }
-
- args[2] = LLVMConstInt(LLVMInt8Type(), cc, 0);
- res = lp_build_intrinsic(builder,
- "llvm.x86.sse.cmp.ps",
- vec_type,
- args, 3);
- res = LLVMBuildBitCast(builder, res, int_vec_type, "");
- return res;
- }
- else if(util_cpu_caps.has_sse2) {
- /* int[4] comparison */
- static const struct {
- unsigned swap:1;
- unsigned eq:1;
- unsigned gt:1;
- unsigned not:1;
- } table[] = {
- {0, 0, 0, 1}, /* PIPE_FUNC_NEVER */
- {1, 0, 1, 0}, /* PIPE_FUNC_LESS */
- {0, 1, 0, 0}, /* PIPE_FUNC_EQUAL */
- {0, 0, 1, 1}, /* PIPE_FUNC_LEQUAL */
- {0, 0, 1, 0}, /* PIPE_FUNC_GREATER */
- {0, 1, 0, 1}, /* PIPE_FUNC_NOTEQUAL */
- {1, 0, 1, 1}, /* PIPE_FUNC_GEQUAL */
- {0, 0, 0, 0} /* PIPE_FUNC_ALWAYS */
- };
- const char *pcmpeq;
- const char *pcmpgt;
- LLVMValueRef args[2];
- LLVMValueRef res;
- LLVMTypeRef vec_type = lp_build_vec_type(type);
-
- switch (type.width) {
- case 8:
- pcmpeq = "llvm.x86.sse2.pcmpeq.b";
- pcmpgt = "llvm.x86.sse2.pcmpgt.b";
- break;
- case 16:
- pcmpeq = "llvm.x86.sse2.pcmpeq.w";
- pcmpgt = "llvm.x86.sse2.pcmpgt.w";
- break;
- case 32:
- pcmpeq = "llvm.x86.sse2.pcmpeq.d";
- pcmpgt = "llvm.x86.sse2.pcmpgt.d";
- break;
- default:
- assert(0);
- return lp_build_undef(type);
- }
-
- /* There are no unsigned comparison instructions. So flip the sign bit
- * so that the results match.
- */
- if (table[func].gt && !type.sign) {
- LLVMValueRef msb = lp_build_const_int_vec(type, (unsigned long long)1 << (type.width - 1));
- a = LLVMBuildXor(builder, a, msb, "");
- b = LLVMBuildXor(builder, b, msb, "");
- }
-
- if(table[func].swap) {
- args[0] = b;
- args[1] = a;
- }
- else {
- args[0] = a;
- args[1] = b;
- }
-
- if(table[func].eq)
- res = lp_build_intrinsic(builder, pcmpeq, vec_type, args, 2);
- else if (table[func].gt)
- res = lp_build_intrinsic(builder, pcmpgt, vec_type, args, 2);
- else
- res = LLVMConstNull(vec_type);
-
- if(table[func].not)
- res = LLVMBuildNot(builder, res, "");
-
- return res;
- }
- } /* if (type.width * type.length == 128) */
-#endif
-#endif /* HAVE_LLVM < 0x0207 */
-
if(type.floating) {
LLVMRealPredicate op;
switch(func) {
- case PIPE_FUNC_NEVER:
- op = LLVMRealPredicateFalse;
- break;
- case PIPE_FUNC_ALWAYS:
- op = LLVMRealPredicateTrue;
- break;
case PIPE_FUNC_EQUAL:
- op = LLVMRealUEQ;
+ op = ordered ? LLVMRealOEQ : LLVMRealUEQ;
break;
case PIPE_FUNC_NOTEQUAL:
- op = LLVMRealUNE;
+ op = ordered ? LLVMRealONE : LLVMRealUNE;
break;
case PIPE_FUNC_LESS:
- op = LLVMRealULT;
+ op = ordered ? LLVMRealOLT : LLVMRealULT;
break;
case PIPE_FUNC_LEQUAL:
- op = LLVMRealULE;
+ op = ordered ? LLVMRealOLE : LLVMRealULE;
break;
case PIPE_FUNC_GREATER:
- op = LLVMRealUGT;
+ op = ordered ? LLVMRealOGT : LLVMRealUGT;
break;
case PIPE_FUNC_GEQUAL:
- op = LLVMRealUGE;
+ op = ordered ? LLVMRealOGE : LLVMRealUGE;
break;
default:
assert(0);
- return lp_build_undef(type);
+ return lp_build_undef(gallivm, type);
}
-#if HAVE_LLVM >= 0x0207
cond = LLVMBuildFCmp(builder, op, a, b, "");
res = LLVMBuildSExt(builder, cond, int_vec_type, "");
-#else
- if (type.length == 1) {
- cond = LLVMBuildFCmp(builder, op, a, b, "");
- res = LLVMBuildSExt(builder, cond, int_vec_type, "");
- }
- else {
- unsigned i;
-
- res = LLVMGetUndef(int_vec_type);
-
- debug_printf("%s: warning: using slow element-wise float"
- " vector comparison\n", __FUNCTION__);
- for (i = 0; i < type.length; ++i) {
- LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0);
- cond = LLVMBuildFCmp(builder, op,
- LLVMBuildExtractElement(builder, a, index, ""),
- LLVMBuildExtractElement(builder, b, index, ""),
- "");
- cond = LLVMBuildSelect(builder, cond,
- LLVMConstExtractElement(ones, index),
- LLVMConstExtractElement(zeros, index),
- "");
- res = LLVMBuildInsertElement(builder, res, cond, index, "");
- }
- }
-#endif
}
else {
LLVMIntPredicate op;
break;
default:
assert(0);
- return lp_build_undef(type);
+ return lp_build_undef(gallivm, type);
}
-#if HAVE_LLVM >= 0x0207
cond = LLVMBuildICmp(builder, op, a, b, "");
res = LLVMBuildSExt(builder, cond, int_vec_type, "");
-#else
- if (type.length == 1) {
- cond = LLVMBuildICmp(builder, op, a, b, "");
- res = LLVMBuildSExt(builder, cond, int_vec_type, "");
- }
- else {
- unsigned i;
+ }
- res = LLVMGetUndef(int_vec_type);
+ return res;
+}
- if (gallivm_debug & GALLIVM_DEBUG_PERF) {
- debug_printf("%s: using slow element-wise int"
- " vector comparison\n", __FUNCTION__);
- }
+/**
+ * Build code to compare two values 'a' and 'b' of 'type' using the given func.
+ * \param func one of PIPE_FUNC_x
+ * The result values will be 0 for false or ~0 for true.
+ */
+LLVMValueRef
+lp_build_compare(struct gallivm_state *gallivm,
+ const struct lp_type type,
+ unsigned func,
+ LLVMValueRef a,
+ LLVMValueRef b)
+{
+ LLVMTypeRef int_vec_type = lp_build_int_vec_type(gallivm, type);
+ LLVMValueRef zeros = LLVMConstNull(int_vec_type);
+ LLVMValueRef ones = LLVMConstAllOnes(int_vec_type);
- for(i = 0; i < type.length; ++i) {
- LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0);
- cond = LLVMBuildICmp(builder, op,
- LLVMBuildExtractElement(builder, a, index, ""),
- LLVMBuildExtractElement(builder, b, index, ""),
- "");
- cond = LLVMBuildSelect(builder, cond,
- LLVMConstExtractElement(ones, index),
- LLVMConstExtractElement(zeros, index),
- "");
- res = LLVMBuildInsertElement(builder, res, cond, index, "");
- }
- }
-#endif
+ assert(func >= PIPE_FUNC_NEVER);
+ assert(func <= PIPE_FUNC_ALWAYS);
+ assert(lp_check_value(type, a));
+ assert(lp_check_value(type, b));
+
+ if(func == PIPE_FUNC_NEVER)
+ return zeros;
+ if(func == PIPE_FUNC_ALWAYS)
+ return ones;
+
+#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
+ /*
+ * There are no unsigned integer comparison instructions in SSE.
+ */
+
+ if (!type.floating && !type.sign &&
+ type.width * type.length == 128 &&
+ util_cpu_caps.has_sse2 &&
+ (func == PIPE_FUNC_LESS ||
+ func == PIPE_FUNC_LEQUAL ||
+ func == PIPE_FUNC_GREATER ||
+ func == PIPE_FUNC_GEQUAL) &&
+ (gallivm_debug & GALLIVM_DEBUG_PERF)) {
+ debug_printf("%s: inefficient <%u x i%u> unsigned comparison\n",
+ __FUNCTION__, type.length, type.width);
}
+#endif
- return res;
+ return lp_build_compare_ext(gallivm, type, func, a, b, FALSE);
}
-
+/**
+ * Build code to compare two values 'a' and 'b' using the given func.
+ * \param func one of PIPE_FUNC_x
+ * If the operands are floating point numbers, the function will use
+ * ordered comparison which means that it will return true if both
+ * operands are not a NaN and the specified condition evaluates to true.
+ * The result values will be 0 for false or ~0 for true.
+ */
+LLVMValueRef
+lp_build_cmp_ordered(struct lp_build_context *bld,
+ unsigned func,
+ LLVMValueRef a,
+ LLVMValueRef b)
+{
+ return lp_build_compare_ext(bld->gallivm, bld->type, func, a, b, TRUE);
+}
/**
* Build code to compare two values 'a' and 'b' using the given func.
* \param func one of PIPE_FUNC_x
+ * If the operands are floating point numbers, the function will use
+ * unordered comparison which means that it will return true if either
+ * operand is a NaN or the specified condition evaluates to true.
* The result values will be 0 for false or ~0 for true.
*/
LLVMValueRef
LLVMValueRef a,
LLVMValueRef b)
{
- return lp_build_compare(bld->builder, bld->type, func, a, b);
+ return lp_build_compare(bld->gallivm, bld->type, func, a, b);
}
LLVMValueRef a,
LLVMValueRef b)
{
+ LLVMBuilderRef builder = bld->gallivm->builder;
struct lp_type type = bld->type;
LLVMValueRef res;
}
if(type.floating) {
- LLVMTypeRef int_vec_type = lp_build_int_vec_type(type);
- a = LLVMBuildBitCast(bld->builder, a, int_vec_type, "");
- b = LLVMBuildBitCast(bld->builder, b, int_vec_type, "");
+ LLVMTypeRef int_vec_type = lp_build_int_vec_type(bld->gallivm, type);
+ a = LLVMBuildBitCast(builder, a, int_vec_type, "");
+ b = LLVMBuildBitCast(builder, b, int_vec_type, "");
}
- a = LLVMBuildAnd(bld->builder, a, mask, "");
+ a = LLVMBuildAnd(builder, a, mask, "");
/* This often gets translated to PANDN, but sometimes the NOT is
* pre-computed and stored in another constant. The best strategy depends
* on available registers, so it is not a big deal -- hopefully LLVM does
* the right decision attending the rest of the program.
*/
- b = LLVMBuildAnd(bld->builder, b, LLVMBuildNot(bld->builder, mask, ""), "");
+ b = LLVMBuildAnd(builder, b, LLVMBuildNot(builder, mask, ""), "");
- res = LLVMBuildOr(bld->builder, a, b, "");
+ res = LLVMBuildOr(builder, a, b, "");
if(type.floating) {
- LLVMTypeRef vec_type = lp_build_vec_type(type);
- res = LLVMBuildBitCast(bld->builder, res, vec_type, "");
+ LLVMTypeRef vec_type = lp_build_vec_type(bld->gallivm, type);
+ res = LLVMBuildBitCast(builder, res, vec_type, "");
}
return res;
LLVMValueRef a,
LLVMValueRef b)
{
+ LLVMBuilderRef builder = bld->gallivm->builder;
+ LLVMContextRef lc = bld->gallivm->context;
struct lp_type type = bld->type;
LLVMValueRef res;
return a;
if (type.length == 1) {
- mask = LLVMBuildTrunc(bld->builder, mask, LLVMInt1Type(), "");
- res = LLVMBuildSelect(bld->builder, mask, a, b, "");
+ mask = LLVMBuildTrunc(builder, mask, LLVMInt1TypeInContext(lc), "");
+ res = LLVMBuildSelect(builder, mask, a, b, "");
+ }
+ else if (0) {
+ /* Generate a vector select.
+ *
+ * XXX: Using vector selects would avoid emitting intrinsics, but they aren't
+ * properly supported yet.
+ *
+ * LLVM 3.1 supports it, but it yields buggy code (e.g. lp_blend_test).
+ *
+ * LLVM 3.0 includes experimental support provided the -promote-elements
+ * options is passed to LLVM's command line (e.g., via
+ * llvm::cl::ParseCommandLineOptions), but resulting code quality is much
+ * worse, probably because some optimization passes don't know how to
+ * handle vector selects.
+ *
+ * See also:
+ * - http://lists.cs.uiuc.edu/pipermail/llvmdev/2011-October/043659.html
+ */
+
+ /* Convert the mask to a vector of booleans.
+ * XXX: There are two ways to do this. Decide what's best.
+ */
+ if (1) {
+ LLVMTypeRef bool_vec_type = LLVMVectorType(LLVMInt1TypeInContext(lc), type.length);
+ mask = LLVMBuildTrunc(builder, mask, bool_vec_type, "");
+ } else {
+ mask = LLVMBuildICmp(builder, LLVMIntNE, mask, LLVMConstNull(bld->int_vec_type), "");
+ }
+ res = LLVMBuildSelect(builder, mask, a, b, "");
}
- else if (util_cpu_caps.has_sse4_1 &&
- type.width * type.length == 128 &&
+ else if (((util_cpu_caps.has_sse4_1 &&
+ type.width * type.length == 128) ||
+ (util_cpu_caps.has_avx &&
+ type.width * type.length == 256 && type.width >= 32)) &&
!LLVMIsConstant(a) &&
!LLVMIsConstant(b) &&
!LLVMIsConstant(mask)) {
LLVMTypeRef arg_type;
LLVMValueRef args[3];
- if (type.width == 64) {
+ /*
+ * There's only float blend in AVX but can just cast i32/i64
+ * to float.
+ */
+ if (type.width * type.length == 256) {
+ if (type.width == 64) {
+ intrinsic = "llvm.x86.avx.blendv.pd.256";
+ arg_type = LLVMVectorType(LLVMDoubleTypeInContext(lc), 4);
+ }
+ else {
+ intrinsic = "llvm.x86.avx.blendv.ps.256";
+ arg_type = LLVMVectorType(LLVMFloatTypeInContext(lc), 8);
+ }
+ }
+ else if (type.floating &&
+ type.width == 64) {
intrinsic = "llvm.x86.sse41.blendvpd";
- arg_type = LLVMVectorType(LLVMDoubleType(), 2);
- } else if (type.width == 32) {
+ arg_type = LLVMVectorType(LLVMDoubleTypeInContext(lc), 2);
+ } else if (type.floating &&
+ type.width == 32) {
intrinsic = "llvm.x86.sse41.blendvps";
- arg_type = LLVMVectorType(LLVMFloatType(), 4);
+ arg_type = LLVMVectorType(LLVMFloatTypeInContext(lc), 4);
} else {
intrinsic = "llvm.x86.sse41.pblendvb";
- arg_type = LLVMVectorType(LLVMInt8Type(), 16);
+ arg_type = LLVMVectorType(LLVMInt8TypeInContext(lc), 16);
}
if (arg_type != bld->int_vec_type) {
- mask = LLVMBuildBitCast(bld->builder, mask, arg_type, "");
+ mask = LLVMBuildBitCast(builder, mask, arg_type, "");
}
if (arg_type != bld->vec_type) {
- a = LLVMBuildBitCast(bld->builder, a, arg_type, "");
- b = LLVMBuildBitCast(bld->builder, b, arg_type, "");
+ a = LLVMBuildBitCast(builder, a, arg_type, "");
+ b = LLVMBuildBitCast(builder, b, arg_type, "");
}
args[0] = b;
args[1] = a;
args[2] = mask;
- res = lp_build_intrinsic(bld->builder, intrinsic,
+ res = lp_build_intrinsic(builder, intrinsic,
arg_type, args, Elements(args));
if (arg_type != bld->vec_type) {
- res = LLVMBuildBitCast(bld->builder, res, bld->vec_type, "");
+ res = LLVMBuildBitCast(builder, res, bld->vec_type, "");
}
}
else {
lp_build_select_aos(struct lp_build_context *bld,
unsigned mask,
LLVMValueRef a,
- LLVMValueRef b)
+ LLVMValueRef b,
+ unsigned num_channels)
{
+ LLVMBuilderRef builder = bld->gallivm->builder;
const struct lp_type type = bld->type;
const unsigned n = type.length;
unsigned i, j;
return bld->undef;
/*
- * There are three major ways of accomplishing this:
- * - with a shuffle,
- * - with a select,
- * - or with a bit mask.
+ * There are two major ways of accomplishing this:
+ * - with a shuffle
+ * - with a select
*
- * Select isn't supported for vector types yet.
- * The flip between these is empirical and might need to be.
+ * The flip between these is empirical and might need to be adjusted.
*/
if (n <= 4) {
/*
* Shuffle.
*/
- LLVMTypeRef elem_type = LLVMInt32Type();
+ LLVMTypeRef elem_type = LLVMInt32TypeInContext(bld->gallivm->context);
LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH];
- for(j = 0; j < n; j += 4)
- for(i = 0; i < 4; ++i)
+ for(j = 0; j < n; j += num_channels)
+ for(i = 0; i < num_channels; ++i)
shuffles[j + i] = LLVMConstInt(elem_type,
(mask & (1 << i) ? 0 : n) + j + i,
0);
- return LLVMBuildShuffleVector(bld->builder, a, b, LLVMConstVector(shuffles, n), "");
+ return LLVMBuildShuffleVector(builder, a, b, LLVMConstVector(shuffles, n), "");
}
else {
-#if 0
- /* XXX: Unfortunately select of vectors do not work */
- /* Use a select */
- LLVMTypeRef elem_type = LLVMInt1Type();
- LLVMValueRef cond_vec[LP_MAX_VECTOR_LENGTH];
-
- for(j = 0; j < n; j += 4)
- for(i = 0; i < 4; ++i)
- cond_vec[j + i] = LLVMConstInt(elem_type,
- mask & (1 << i) ? 1 : 0, 0);
-
- return LLVMBuildSelect(bld->builder, LLVMConstVector(cond_vec, n), a, b, "");
-#else
- LLVMValueRef mask_vec = lp_build_const_mask_aos(type, mask);
+ LLVMValueRef mask_vec = lp_build_const_mask_aos(bld->gallivm, type, mask, num_channels);
return lp_build_select(bld, mask_vec, a, b);
-#endif
}
}
+
+
+/**
+ * Return (scalar-cast)val ? true : false;
+ */
+LLVMValueRef
+lp_build_any_true_range(struct lp_build_context *bld,
+ unsigned real_length,
+ LLVMValueRef val)
+{
+ LLVMBuilderRef builder = bld->gallivm->builder;
+ LLVMTypeRef scalar_type;
+ LLVMTypeRef true_type;
+
+ assert(real_length <= bld->type.length);
+
+ true_type = LLVMIntTypeInContext(bld->gallivm->context,
+ bld->type.width * real_length);
+ scalar_type = LLVMIntTypeInContext(bld->gallivm->context,
+ bld->type.width * bld->type.length);
+ val = LLVMBuildBitCast(builder, val, scalar_type, "");
+ /*
+ * We're using always native types so we can use intrinsics.
+ * However, if we don't do per-element calculations, we must ensure
+ * the excess elements aren't used since they may contain garbage.
+ */
+ if (real_length < bld->type.length) {
+ val = LLVMBuildTrunc(builder, val, true_type, "");
+ }
+ return LLVMBuildICmp(builder, LLVMIntNE,
+ val, LLVMConstNull(true_type), "");
+}