X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fgallium%2Fauxiliary%2Fgallivm%2Flp_bld_logic.c;h=19d30d0d63c845f900923409e2f45a394046c513;hb=114cc18b98b6e016ab1986577aa3df12acc22cca;hp=a3b697011626daba23662bd98a41f695d6ce3c17;hpb=75b8c4a8f869f63991c774caa7e1cec7e988c5ec;p=mesa.git diff --git a/src/gallium/auxiliary/gallivm/lp_bld_logic.c b/src/gallium/auxiliary/gallivm/lp_bld_logic.c index a3b69701162..19d30d0d63c 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_logic.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_logic.c @@ -34,11 +34,14 @@ #include "util/u_cpu_detect.h" +#include "util/u_memory.h" #include "util/u_debug.h" #include "lp_bld_type.h" #include "lp_bld_const.h" +#include "lp_bld_init.h" #include "lp_bld_intr.h" +#include "lp_bld_debug.h" #include "lp_bld_logic.h" @@ -49,8 +52,8 @@ * * select <4 x i1> %C, %A, %B * - * is valid IR (e.g. llvm/test/Assembler/vector-select.ll), but it is not - * supported on any backend. + * is valid IR (e.g. llvm/test/Assembler/vector-select.ll), but it is only + * supported on some backends (x86) starting with llvm 3.1. * * Expanding the boolean vector to full SIMD register width, as in * @@ -65,16 +68,20 @@ /** * Build code to compare two values 'a' and 'b' of 'type' using the given func. * \param func one of PIPE_FUNC_x + * If the ordered argument is true the function will use LLVM's ordered + * comparisons, otherwise unordered comparisons will be used. * The result values will be 0 for false or ~0 for true. */ -LLVMValueRef -lp_build_compare(LLVMBuilderRef builder, - const struct lp_type type, - unsigned func, - LLVMValueRef a, - LLVMValueRef b) +static LLVMValueRef +lp_build_compare_ext(struct gallivm_state *gallivm, + const struct lp_type type, + unsigned func, + LLVMValueRef a, + LLVMValueRef b, + boolean ordered) { - LLVMTypeRef int_vec_type = lp_build_int_vec_type(type); + LLVMBuilderRef builder = gallivm->builder; + LLVMTypeRef int_vec_type = lp_build_int_vec_type(gallivm, type); LLVMValueRef zeros = LLVMConstNull(int_vec_type); LLVMValueRef ones = LLVMConstAllOnes(int_vec_type); LLVMValueRef cond; @@ -82,208 +89,42 @@ lp_build_compare(LLVMBuilderRef builder, assert(func >= PIPE_FUNC_NEVER); assert(func <= PIPE_FUNC_ALWAYS); + assert(lp_check_value(type, a)); + assert(lp_check_value(type, b)); if(func == PIPE_FUNC_NEVER) return zeros; if(func == PIPE_FUNC_ALWAYS) return ones; - /* TODO: optimize the constant case */ - - /* XXX: It is not clear if we should use the ordered or unordered operators */ - -#if HAVE_LLVM < 0x0207 -#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64) - if(type.width * type.length == 128) { - if(type.floating && util_cpu_caps.has_sse) { - /* float[4] comparison */ - LLVMTypeRef vec_type = lp_build_vec_type(type); - LLVMValueRef args[3]; - unsigned cc; - boolean swap; - - swap = FALSE; - switch(func) { - case PIPE_FUNC_EQUAL: - cc = 0; - break; - case PIPE_FUNC_NOTEQUAL: - cc = 4; - break; - case PIPE_FUNC_LESS: - cc = 1; - break; - case PIPE_FUNC_LEQUAL: - cc = 2; - break; - case PIPE_FUNC_GREATER: - cc = 1; - swap = TRUE; - break; - case PIPE_FUNC_GEQUAL: - cc = 2; - swap = TRUE; - break; - default: - assert(0); - return lp_build_undef(type); - } - - if(swap) { - args[0] = b; - args[1] = a; - } - else { - args[0] = a; - args[1] = b; - } - - args[2] = LLVMConstInt(LLVMInt8Type(), cc, 0); - res = lp_build_intrinsic(builder, - "llvm.x86.sse.cmp.ps", - vec_type, - args, 3); - res = LLVMBuildBitCast(builder, res, int_vec_type, ""); - return res; - } - else if(util_cpu_caps.has_sse2) { - /* int[4] comparison */ - static const struct { - unsigned swap:1; - unsigned eq:1; - unsigned gt:1; - unsigned not:1; - } table[] = { - {0, 0, 0, 1}, /* PIPE_FUNC_NEVER */ - {1, 0, 1, 0}, /* PIPE_FUNC_LESS */ - {0, 1, 0, 0}, /* PIPE_FUNC_EQUAL */ - {0, 0, 1, 1}, /* PIPE_FUNC_LEQUAL */ - {0, 0, 1, 0}, /* PIPE_FUNC_GREATER */ - {0, 1, 0, 1}, /* PIPE_FUNC_NOTEQUAL */ - {1, 0, 1, 1}, /* PIPE_FUNC_GEQUAL */ - {0, 0, 0, 0} /* PIPE_FUNC_ALWAYS */ - }; - const char *pcmpeq; - const char *pcmpgt; - LLVMValueRef args[2]; - LLVMValueRef res; - LLVMTypeRef vec_type = lp_build_vec_type(type); - - switch (type.width) { - case 8: - pcmpeq = "llvm.x86.sse2.pcmpeq.b"; - pcmpgt = "llvm.x86.sse2.pcmpgt.b"; - break; - case 16: - pcmpeq = "llvm.x86.sse2.pcmpeq.w"; - pcmpgt = "llvm.x86.sse2.pcmpgt.w"; - break; - case 32: - pcmpeq = "llvm.x86.sse2.pcmpeq.d"; - pcmpgt = "llvm.x86.sse2.pcmpgt.d"; - break; - default: - assert(0); - return lp_build_undef(type); - } - - /* There are no signed byte and unsigned word/dword comparison - * instructions. So flip the sign bit so that the results match. - */ - if(table[func].gt && - ((type.width == 8 && type.sign) || - (type.width != 8 && !type.sign))) { - LLVMValueRef msb = lp_build_const_int_vec(type, (unsigned long long)1 << (type.width - 1)); - a = LLVMBuildXor(builder, a, msb, ""); - b = LLVMBuildXor(builder, b, msb, ""); - } - - if(table[func].swap) { - args[0] = b; - args[1] = a; - } - else { - args[0] = a; - args[1] = b; - } - - if(table[func].eq) - res = lp_build_intrinsic(builder, pcmpeq, vec_type, args, 2); - else if (table[func].gt) - res = lp_build_intrinsic(builder, pcmpgt, vec_type, args, 2); - else - res = LLVMConstNull(vec_type); - - if(table[func].not) - res = LLVMBuildNot(builder, res, ""); - - return res; - } - } /* if (type.width * type.length == 128) */ -#endif -#endif /* HAVE_LLVM < 0x0207 */ - if(type.floating) { LLVMRealPredicate op; switch(func) { - case PIPE_FUNC_NEVER: - op = LLVMRealPredicateFalse; - break; - case PIPE_FUNC_ALWAYS: - op = LLVMRealPredicateTrue; - break; case PIPE_FUNC_EQUAL: - op = LLVMRealUEQ; + op = ordered ? LLVMRealOEQ : LLVMRealUEQ; break; case PIPE_FUNC_NOTEQUAL: - op = LLVMRealUNE; + op = ordered ? LLVMRealONE : LLVMRealUNE; break; case PIPE_FUNC_LESS: - op = LLVMRealULT; + op = ordered ? LLVMRealOLT : LLVMRealULT; break; case PIPE_FUNC_LEQUAL: - op = LLVMRealULE; + op = ordered ? LLVMRealOLE : LLVMRealULE; break; case PIPE_FUNC_GREATER: - op = LLVMRealUGT; + op = ordered ? LLVMRealOGT : LLVMRealUGT; break; case PIPE_FUNC_GEQUAL: - op = LLVMRealUGE; + op = ordered ? LLVMRealOGE : LLVMRealUGE; break; default: assert(0); - return lp_build_undef(type); + return lp_build_undef(gallivm, type); } -#if HAVE_LLVM >= 0x0207 cond = LLVMBuildFCmp(builder, op, a, b, ""); res = LLVMBuildSExt(builder, cond, int_vec_type, ""); -#else - if (type.length == 1) { - cond = LLVMBuildFCmp(builder, op, a, b, ""); - res = LLVMBuildSExt(builder, cond, int_vec_type, ""); - } - else { - unsigned i; - - res = LLVMGetUndef(int_vec_type); - - debug_printf("%s: warning: using slow element-wise float" - " vector comparison\n", __FUNCTION__); - for (i = 0; i < type.length; ++i) { - LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0); - cond = LLVMBuildFCmp(builder, op, - LLVMBuildExtractElement(builder, a, index, ""), - LLVMBuildExtractElement(builder, b, index, ""), - ""); - cond = LLVMBuildSelect(builder, cond, - LLVMConstExtractElement(ones, index), - LLVMConstExtractElement(zeros, index), - ""); - res = LLVMBuildInsertElement(builder, res, cond, index, ""); - } - } -#endif } else { LLVMIntPredicate op; @@ -308,49 +149,86 @@ lp_build_compare(LLVMBuilderRef builder, break; default: assert(0); - return lp_build_undef(type); + return lp_build_undef(gallivm, type); } -#if HAVE_LLVM >= 0x0207 cond = LLVMBuildICmp(builder, op, a, b, ""); res = LLVMBuildSExt(builder, cond, int_vec_type, ""); -#else - if (type.length == 1) { - cond = LLVMBuildICmp(builder, op, a, b, ""); - res = LLVMBuildSExt(builder, cond, int_vec_type, ""); - } - else { - unsigned i; - - res = LLVMGetUndef(int_vec_type); - - debug_printf("%s: warning: using slow element-wise int" - " vector comparison\n", __FUNCTION__); - - for(i = 0; i < type.length; ++i) { - LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0); - cond = LLVMBuildICmp(builder, op, - LLVMBuildExtractElement(builder, a, index, ""), - LLVMBuildExtractElement(builder, b, index, ""), - ""); - cond = LLVMBuildSelect(builder, cond, - LLVMConstExtractElement(ones, index), - LLVMConstExtractElement(zeros, index), - ""); - res = LLVMBuildInsertElement(builder, res, cond, index, ""); - } - } -#endif } return res; } +/** + * Build code to compare two values 'a' and 'b' of 'type' using the given func. + * \param func one of PIPE_FUNC_x + * The result values will be 0 for false or ~0 for true. + */ +LLVMValueRef +lp_build_compare(struct gallivm_state *gallivm, + const struct lp_type type, + unsigned func, + LLVMValueRef a, + LLVMValueRef b) +{ + LLVMTypeRef int_vec_type = lp_build_int_vec_type(gallivm, type); + LLVMValueRef zeros = LLVMConstNull(int_vec_type); + LLVMValueRef ones = LLVMConstAllOnes(int_vec_type); + assert(func >= PIPE_FUNC_NEVER); + assert(func <= PIPE_FUNC_ALWAYS); + assert(lp_check_value(type, a)); + assert(lp_check_value(type, b)); + + if(func == PIPE_FUNC_NEVER) + return zeros; + if(func == PIPE_FUNC_ALWAYS) + return ones; + +#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64) + /* + * There are no unsigned integer comparison instructions in SSE. + */ + + if (!type.floating && !type.sign && + type.width * type.length == 128 && + util_cpu_caps.has_sse2 && + (func == PIPE_FUNC_LESS || + func == PIPE_FUNC_LEQUAL || + func == PIPE_FUNC_GREATER || + func == PIPE_FUNC_GEQUAL) && + (gallivm_debug & GALLIVM_DEBUG_PERF)) { + debug_printf("%s: inefficient <%u x i%u> unsigned comparison\n", + __FUNCTION__, type.length, type.width); + } +#endif + + return lp_build_compare_ext(gallivm, type, func, a, b, FALSE); +} /** * Build code to compare two values 'a' and 'b' using the given func. * \param func one of PIPE_FUNC_x + * If the operands are floating point numbers, the function will use + * ordered comparison which means that it will return true if both + * operands are not a NaN and the specified condition evaluates to true. + * The result values will be 0 for false or ~0 for true. + */ +LLVMValueRef +lp_build_cmp_ordered(struct lp_build_context *bld, + unsigned func, + LLVMValueRef a, + LLVMValueRef b) +{ + return lp_build_compare_ext(bld->gallivm, bld->type, func, a, b, TRUE); +} + +/** + * Build code to compare two values 'a' and 'b' using the given func. + * \param func one of PIPE_FUNC_x + * If the operands are floating point numbers, the function will use + * unordered comparison which means that it will return true if either + * operand is a NaN or the specified condition evaluates to true. * The result values will be 0 for false or ~0 for true. */ LLVMValueRef @@ -359,14 +237,61 @@ lp_build_cmp(struct lp_build_context *bld, LLVMValueRef a, LLVMValueRef b) { - return lp_build_compare(bld->builder, bld->type, func, a, b); + return lp_build_compare(bld->gallivm, bld->type, func, a, b); +} + + +/** + * Return (mask & a) | (~mask & b); + */ +LLVMValueRef +lp_build_select_bitwise(struct lp_build_context *bld, + LLVMValueRef mask, + LLVMValueRef a, + LLVMValueRef b) +{ + LLVMBuilderRef builder = bld->gallivm->builder; + struct lp_type type = bld->type; + LLVMValueRef res; + + assert(lp_check_value(type, a)); + assert(lp_check_value(type, b)); + + if (a == b) { + return a; + } + + if(type.floating) { + LLVMTypeRef int_vec_type = lp_build_int_vec_type(bld->gallivm, type); + a = LLVMBuildBitCast(builder, a, int_vec_type, ""); + b = LLVMBuildBitCast(builder, b, int_vec_type, ""); + } + + a = LLVMBuildAnd(builder, a, mask, ""); + + /* This often gets translated to PANDN, but sometimes the NOT is + * pre-computed and stored in another constant. The best strategy depends + * on available registers, so it is not a big deal -- hopefully LLVM does + * the right decision attending the rest of the program. + */ + b = LLVMBuildAnd(builder, b, LLVMBuildNot(builder, mask, ""), ""); + + res = LLVMBuildOr(builder, a, b, ""); + + if(type.floating) { + LLVMTypeRef vec_type = lp_build_vec_type(bld->gallivm, type); + res = LLVMBuildBitCast(builder, res, vec_type, ""); + } + + return res; } /** * Return mask ? a : b; * - * mask is a bitwise mask, composed of 0 or ~0 for each element. + * mask is a bitwise mask, composed of 0 or ~0 for each element. Any other value + * will yield unpredictable results. */ LLVMValueRef lp_build_select(struct lp_build_context *bld, @@ -374,122 +299,202 @@ lp_build_select(struct lp_build_context *bld, LLVMValueRef a, LLVMValueRef b) { + LLVMBuilderRef builder = bld->gallivm->builder; + LLVMContextRef lc = bld->gallivm->context; struct lp_type type = bld->type; LLVMValueRef res; + assert(lp_check_value(type, a)); + assert(lp_check_value(type, b)); + if(a == b) return a; if (type.length == 1) { - mask = LLVMBuildTrunc(bld->builder, mask, LLVMInt1Type(), ""); - res = LLVMBuildSelect(bld->builder, mask, a, b, ""); + mask = LLVMBuildTrunc(builder, mask, LLVMInt1TypeInContext(lc), ""); + res = LLVMBuildSelect(builder, mask, a, b, ""); } - else { - if(type.floating) { - LLVMTypeRef int_vec_type = lp_build_int_vec_type(type); - a = LLVMBuildBitCast(bld->builder, a, int_vec_type, ""); - b = LLVMBuildBitCast(bld->builder, b, int_vec_type, ""); - } + else if (0) { + /* Generate a vector select. + * + * XXX: Using vector selects would avoid emitting intrinsics, but they aren't + * properly supported yet. + * + * LLVM 3.1 supports it, but it yields buggy code (e.g. lp_blend_test). + * + * LLVM 3.0 includes experimental support provided the -promote-elements + * options is passed to LLVM's command line (e.g., via + * llvm::cl::ParseCommandLineOptions), but resulting code quality is much + * worse, probably because some optimization passes don't know how to + * handle vector selects. + * + * See also: + * - http://lists.cs.uiuc.edu/pipermail/llvmdev/2011-October/043659.html + */ - a = LLVMBuildAnd(bld->builder, a, mask, ""); + /* Convert the mask to a vector of booleans. + * XXX: There are two ways to do this. Decide what's best. + */ + if (1) { + LLVMTypeRef bool_vec_type = LLVMVectorType(LLVMInt1TypeInContext(lc), type.length); + mask = LLVMBuildTrunc(builder, mask, bool_vec_type, ""); + } else { + mask = LLVMBuildICmp(builder, LLVMIntNE, mask, LLVMConstNull(bld->int_vec_type), ""); + } + res = LLVMBuildSelect(builder, mask, a, b, ""); + } + else if (((util_cpu_caps.has_sse4_1 && + type.width * type.length == 128) || + (util_cpu_caps.has_avx && + type.width * type.length == 256 && type.width >= 32)) && + !LLVMIsConstant(a) && + !LLVMIsConstant(b) && + !LLVMIsConstant(mask)) { + const char *intrinsic; + LLVMTypeRef arg_type; + LLVMValueRef args[3]; - /* This often gets translated to PANDN, but sometimes the NOT is - * pre-computed and stored in another constant. The best strategy depends - * on available registers, so it is not a big deal -- hopefully LLVM does - * the right decision attending the rest of the program. + /* + * There's only float blend in AVX but can just cast i32/i64 + * to float. */ - b = LLVMBuildAnd(bld->builder, b, LLVMBuildNot(bld->builder, mask, ""), ""); + if (type.width * type.length == 256) { + if (type.width == 64) { + intrinsic = "llvm.x86.avx.blendv.pd.256"; + arg_type = LLVMVectorType(LLVMDoubleTypeInContext(lc), 4); + } + else { + intrinsic = "llvm.x86.avx.blendv.ps.256"; + arg_type = LLVMVectorType(LLVMFloatTypeInContext(lc), 8); + } + } + else if (type.floating && + type.width == 64) { + intrinsic = "llvm.x86.sse41.blendvpd"; + arg_type = LLVMVectorType(LLVMDoubleTypeInContext(lc), 2); + } else if (type.floating && + type.width == 32) { + intrinsic = "llvm.x86.sse41.blendvps"; + arg_type = LLVMVectorType(LLVMFloatTypeInContext(lc), 4); + } else { + intrinsic = "llvm.x86.sse41.pblendvb"; + arg_type = LLVMVectorType(LLVMInt8TypeInContext(lc), 16); + } - res = LLVMBuildOr(bld->builder, a, b, ""); + if (arg_type != bld->int_vec_type) { + mask = LLVMBuildBitCast(builder, mask, arg_type, ""); + } + + if (arg_type != bld->vec_type) { + a = LLVMBuildBitCast(builder, a, arg_type, ""); + b = LLVMBuildBitCast(builder, b, arg_type, ""); + } + + args[0] = b; + args[1] = a; + args[2] = mask; - if(type.floating) { - LLVMTypeRef vec_type = lp_build_vec_type(type); - res = LLVMBuildBitCast(bld->builder, res, vec_type, ""); + res = lp_build_intrinsic(builder, intrinsic, + arg_type, args, Elements(args), 0); + + if (arg_type != bld->vec_type) { + res = LLVMBuildBitCast(builder, res, bld->vec_type, ""); } } + else { + res = lp_build_select_bitwise(bld, mask, a, b); + } return res; } +/** + * Return mask ? a : b; + * + * mask is a TGSI_WRITEMASK_xxx. + */ LLVMValueRef lp_build_select_aos(struct lp_build_context *bld, + unsigned mask, LLVMValueRef a, LLVMValueRef b, - const boolean cond[4]) + unsigned num_channels) { + LLVMBuilderRef builder = bld->gallivm->builder; const struct lp_type type = bld->type; const unsigned n = type.length; unsigned i, j; + assert((mask & ~0xf) == 0); + assert(lp_check_value(type, a)); + assert(lp_check_value(type, b)); + if(a == b) return a; - if(cond[0] && cond[1] && cond[2] && cond[3]) + if((mask & 0xf) == 0xf) return a; - if(!cond[0] && !cond[1] && !cond[2] && !cond[3]) + if((mask & 0xf) == 0x0) return b; if(a == bld->undef || b == bld->undef) return bld->undef; /* - * There are three major ways of accomplishing this: - * - with a shuffle, - * - with a select, - * - or with a bit mask. + * There are two major ways of accomplishing this: + * - with a shuffle + * - with a select * - * Select isn't supported for vector types yet. - * The flip between these is empirical and might need to be. + * The flip between these is empirical and might need to be adjusted. */ if (n <= 4) { /* * Shuffle. */ - LLVMTypeRef elem_type = LLVMInt32Type(); + LLVMTypeRef elem_type = LLVMInt32TypeInContext(bld->gallivm->context); LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH]; - for(j = 0; j < n; j += 4) - for(i = 0; i < 4; ++i) - shuffles[j + i] = LLVMConstInt(elem_type, (cond[i] ? 0 : n) + j + i, 0); + for(j = 0; j < n; j += num_channels) + for(i = 0; i < num_channels; ++i) + shuffles[j + i] = LLVMConstInt(elem_type, + (mask & (1 << i) ? 0 : n) + j + i, + 0); - return LLVMBuildShuffleVector(bld->builder, a, b, LLVMConstVector(shuffles, n), ""); + return LLVMBuildShuffleVector(builder, a, b, LLVMConstVector(shuffles, n), ""); } else { -#if 0 - /* XXX: Unfortunately select of vectors do not work */ - /* Use a select */ - LLVMTypeRef elem_type = LLVMInt1Type(); - LLVMValueRef cond[LP_MAX_VECTOR_LENGTH]; - - for(j = 0; j < n; j += 4) - for(i = 0; i < 4; ++i) - cond[j + i] = LLVMConstInt(elem_type, cond[i] ? 1 : 0, 0); - - return LLVMBuildSelect(bld->builder, LLVMConstVector(cond, n), a, b, ""); -#else - LLVMValueRef mask = lp_build_const_mask_aos(type, cond); - return lp_build_select(bld, mask, a, b); -#endif + LLVMValueRef mask_vec = lp_build_const_mask_aos(bld->gallivm, type, mask, num_channels); + return lp_build_select(bld, mask_vec, a, b); } } + +/** + * Return (scalar-cast)val ? true : false; + */ LLVMValueRef -lp_build_alloca(struct lp_build_context *bld) +lp_build_any_true_range(struct lp_build_context *bld, + unsigned real_length, + LLVMValueRef val) { - const struct lp_type type = bld->type; + LLVMBuilderRef builder = bld->gallivm->builder; + LLVMTypeRef scalar_type; + LLVMTypeRef true_type; - if (type.length > 1) { /*vector*/ - return LLVMBuildAlloca(bld->builder, lp_build_vec_type(type), ""); - } else { /*scalar*/ - return LLVMBuildAlloca(bld->builder, lp_build_elem_type(type), ""); - } -} + assert(real_length <= bld->type.length); - -/** Return (a & ~b) */ -LLVMValueRef -lp_build_andc(struct lp_build_context *bld, LLVMValueRef a, LLVMValueRef b) -{ - b = LLVMBuildNot(bld->builder, b, ""); - b = LLVMBuildAnd(bld->builder, a, b, ""); - return b; + true_type = LLVMIntTypeInContext(bld->gallivm->context, + bld->type.width * real_length); + scalar_type = LLVMIntTypeInContext(bld->gallivm->context, + bld->type.width * bld->type.length); + val = LLVMBuildBitCast(builder, val, scalar_type, ""); + /* + * We're using always native types so we can use intrinsics. + * However, if we don't do per-element calculations, we must ensure + * the excess elements aren't used since they may contain garbage. + */ + if (real_length < bld->type.length) { + val = LLVMBuildTrunc(builder, val, true_type, ""); + } + return LLVMBuildICmp(builder, LLVMIntNE, + val, LLVMConstNull(true_type), ""); }