X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fgallium%2Fauxiliary%2Fgallivm%2Flp_bld_logic.c;h=315977ae7450e619d6d6ff9d2c0ddc200f7adcb2;hb=9a2df304edbe7976c5cd7c522e06dc66b8b08b6e;hp=f56b61bf24891212624e4943a8cdef9838c5cec1;hpb=fa1b481c09b14e01eca1b3db8e0854033f6dee3d;p=mesa.git diff --git a/src/gallium/auxiliary/gallivm/lp_bld_logic.c b/src/gallium/auxiliary/gallivm/lp_bld_logic.c index f56b61bf248..315977ae745 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_logic.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_logic.c @@ -32,6 +32,7 @@ * @author Jose Fonseca */ +#include #include "util/u_cpu_detect.h" #include "util/u_memory.h" @@ -39,6 +40,7 @@ #include "lp_bld_type.h" #include "lp_bld_const.h" +#include "lp_bld_swizzle.h" #include "lp_bld_init.h" #include "lp_bld_intr.h" #include "lp_bld_debug.h" @@ -68,14 +70,17 @@ /** * Build code to compare two values 'a' and 'b' of 'type' using the given func. * \param func one of PIPE_FUNC_x + * If the ordered argument is true the function will use LLVM's ordered + * comparisons, otherwise unordered comparisons will be used. * The result values will be 0 for false or ~0 for true. */ -LLVMValueRef -lp_build_compare(struct gallivm_state *gallivm, - const struct lp_type type, - unsigned func, - LLVMValueRef a, - LLVMValueRef b) +static LLVMValueRef +lp_build_compare_ext(struct gallivm_state *gallivm, + const struct lp_type type, + unsigned func, + LLVMValueRef a, + LLVMValueRef b, + boolean ordered) { LLVMBuilderRef builder = gallivm->builder; LLVMTypeRef int_vec_type = lp_build_int_vec_type(gallivm, type); @@ -84,8 +89,6 @@ lp_build_compare(struct gallivm_state *gallivm, LLVMValueRef cond; LLVMValueRef res; - assert(func >= PIPE_FUNC_NEVER); - assert(func <= PIPE_FUNC_ALWAYS); assert(lp_check_value(type, a)); assert(lp_check_value(type, b)); @@ -94,216 +97,37 @@ lp_build_compare(struct gallivm_state *gallivm, if(func == PIPE_FUNC_ALWAYS) return ones; -#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64) - /* - * There are no unsigned integer comparison instructions in SSE. - */ - - if (!type.floating && !type.sign && - type.width * type.length == 128 && - util_cpu_caps.has_sse2 && - (func == PIPE_FUNC_LESS || - func == PIPE_FUNC_LEQUAL || - func == PIPE_FUNC_GREATER || - func == PIPE_FUNC_GEQUAL) && - (gallivm_debug & GALLIVM_DEBUG_PERF)) { - debug_printf("%s: inefficient <%u x i%u> unsigned comparison\n", - __FUNCTION__, type.length, type.width); - } -#endif - -#if HAVE_LLVM < 0x0207 -#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64) - if(type.width * type.length == 128) { - if(type.floating && util_cpu_caps.has_sse) { - /* float[4] comparison */ - LLVMTypeRef vec_type = lp_build_vec_type(gallivm, type); - LLVMValueRef args[3]; - unsigned cc; - boolean swap; - - swap = FALSE; - switch(func) { - case PIPE_FUNC_EQUAL: - cc = 0; - break; - case PIPE_FUNC_NOTEQUAL: - cc = 4; - break; - case PIPE_FUNC_LESS: - cc = 1; - break; - case PIPE_FUNC_LEQUAL: - cc = 2; - break; - case PIPE_FUNC_GREATER: - cc = 1; - swap = TRUE; - break; - case PIPE_FUNC_GEQUAL: - cc = 2; - swap = TRUE; - break; - default: - assert(0); - return lp_build_undef(gallivm, type); - } - - if(swap) { - args[0] = b; - args[1] = a; - } - else { - args[0] = a; - args[1] = b; - } - - args[2] = LLVMConstInt(LLVMInt8TypeInContext(gallivm->context), cc, 0); - res = lp_build_intrinsic(builder, - "llvm.x86.sse.cmp.ps", - vec_type, - args, 3); - res = LLVMBuildBitCast(builder, res, int_vec_type, ""); - return res; - } - else if(util_cpu_caps.has_sse2) { - /* int[4] comparison */ - static const struct { - unsigned swap:1; - unsigned eq:1; - unsigned gt:1; - unsigned not:1; - } table[] = { - {0, 0, 0, 1}, /* PIPE_FUNC_NEVER */ - {1, 0, 1, 0}, /* PIPE_FUNC_LESS */ - {0, 1, 0, 0}, /* PIPE_FUNC_EQUAL */ - {0, 0, 1, 1}, /* PIPE_FUNC_LEQUAL */ - {0, 0, 1, 0}, /* PIPE_FUNC_GREATER */ - {0, 1, 0, 1}, /* PIPE_FUNC_NOTEQUAL */ - {1, 0, 1, 1}, /* PIPE_FUNC_GEQUAL */ - {0, 0, 0, 0} /* PIPE_FUNC_ALWAYS */ - }; - const char *pcmpeq; - const char *pcmpgt; - LLVMValueRef args[2]; - LLVMValueRef res; - LLVMTypeRef vec_type = lp_build_vec_type(gallivm, type); - - switch (type.width) { - case 8: - pcmpeq = "llvm.x86.sse2.pcmpeq.b"; - pcmpgt = "llvm.x86.sse2.pcmpgt.b"; - break; - case 16: - pcmpeq = "llvm.x86.sse2.pcmpeq.w"; - pcmpgt = "llvm.x86.sse2.pcmpgt.w"; - break; - case 32: - pcmpeq = "llvm.x86.sse2.pcmpeq.d"; - pcmpgt = "llvm.x86.sse2.pcmpgt.d"; - break; - default: - assert(0); - return lp_build_undef(gallivm, type); - } - - /* There are no unsigned comparison instructions. So flip the sign bit - * so that the results match. - */ - if (table[func].gt && !type.sign) { - LLVMValueRef msb = lp_build_const_int_vec(gallivm, type, (unsigned long long)1 << (type.width - 1)); - a = LLVMBuildXor(builder, a, msb, ""); - b = LLVMBuildXor(builder, b, msb, ""); - } - - if(table[func].swap) { - args[0] = b; - args[1] = a; - } - else { - args[0] = a; - args[1] = b; - } - - if(table[func].eq) - res = lp_build_intrinsic(builder, pcmpeq, vec_type, args, 2); - else if (table[func].gt) - res = lp_build_intrinsic(builder, pcmpgt, vec_type, args, 2); - else - res = LLVMConstNull(vec_type); - - if(table[func].not) - res = LLVMBuildNot(builder, res, ""); - - return res; - } - } /* if (type.width * type.length == 128) */ -#endif -#endif /* HAVE_LLVM < 0x0207 */ - - /* XXX: It is not clear if we should use the ordered or unordered operators */ + assert(func > PIPE_FUNC_NEVER); + assert(func < PIPE_FUNC_ALWAYS); if(type.floating) { LLVMRealPredicate op; switch(func) { - case PIPE_FUNC_NEVER: - op = LLVMRealPredicateFalse; - break; - case PIPE_FUNC_ALWAYS: - op = LLVMRealPredicateTrue; - break; case PIPE_FUNC_EQUAL: - op = LLVMRealUEQ; + op = ordered ? LLVMRealOEQ : LLVMRealUEQ; break; case PIPE_FUNC_NOTEQUAL: - op = LLVMRealUNE; + op = ordered ? LLVMRealONE : LLVMRealUNE; break; case PIPE_FUNC_LESS: - op = LLVMRealULT; + op = ordered ? LLVMRealOLT : LLVMRealULT; break; case PIPE_FUNC_LEQUAL: - op = LLVMRealULE; + op = ordered ? LLVMRealOLE : LLVMRealULE; break; case PIPE_FUNC_GREATER: - op = LLVMRealUGT; + op = ordered ? LLVMRealOGT : LLVMRealUGT; break; case PIPE_FUNC_GEQUAL: - op = LLVMRealUGE; + op = ordered ? LLVMRealOGE : LLVMRealUGE; break; default: assert(0); return lp_build_undef(gallivm, type); } -#if HAVE_LLVM >= 0x0207 cond = LLVMBuildFCmp(builder, op, a, b, ""); res = LLVMBuildSExt(builder, cond, int_vec_type, ""); -#else - if (type.length == 1) { - cond = LLVMBuildFCmp(builder, op, a, b, ""); - res = LLVMBuildSExt(builder, cond, int_vec_type, ""); - } - else { - unsigned i; - - res = LLVMGetUndef(int_vec_type); - - debug_printf("%s: warning: using slow element-wise float" - " vector comparison\n", __FUNCTION__); - for (i = 0; i < type.length; ++i) { - LLVMValueRef index = lp_build_const_int32(gallivm, i); - cond = LLVMBuildFCmp(builder, op, - LLVMBuildExtractElement(builder, a, index, ""), - LLVMBuildExtractElement(builder, b, index, ""), - ""); - cond = LLVMBuildSelect(builder, cond, - LLVMConstExtractElement(ones, index), - LLVMConstExtractElement(zeros, index), - ""); - res = LLVMBuildInsertElement(builder, res, cond, index, ""); - } - } -#endif } else { LLVMIntPredicate op; @@ -331,48 +155,84 @@ lp_build_compare(struct gallivm_state *gallivm, return lp_build_undef(gallivm, type); } -#if HAVE_LLVM >= 0x0207 cond = LLVMBuildICmp(builder, op, a, b, ""); res = LLVMBuildSExt(builder, cond, int_vec_type, ""); -#else - if (type.length == 1) { - cond = LLVMBuildICmp(builder, op, a, b, ""); - res = LLVMBuildSExt(builder, cond, int_vec_type, ""); - } - else { - unsigned i; + } - res = LLVMGetUndef(int_vec_type); + return res; +} - if (gallivm_debug & GALLIVM_DEBUG_PERF) { - debug_printf("%s: using slow element-wise int" - " vector comparison\n", __FUNCTION__); - } +/** + * Build code to compare two values 'a' and 'b' of 'type' using the given func. + * \param func one of PIPE_FUNC_x + * The result values will be 0 for false or ~0 for true. + */ +LLVMValueRef +lp_build_compare(struct gallivm_state *gallivm, + const struct lp_type type, + unsigned func, + LLVMValueRef a, + LLVMValueRef b) +{ + LLVMTypeRef int_vec_type = lp_build_int_vec_type(gallivm, type); + LLVMValueRef zeros = LLVMConstNull(int_vec_type); + LLVMValueRef ones = LLVMConstAllOnes(int_vec_type); - for(i = 0; i < type.length; ++i) { - LLVMValueRef index = lp_build_const_int32(gallivm, i); - cond = LLVMBuildICmp(builder, op, - LLVMBuildExtractElement(builder, a, index, ""), - LLVMBuildExtractElement(builder, b, index, ""), - ""); - cond = LLVMBuildSelect(builder, cond, - LLVMConstExtractElement(ones, index), - LLVMConstExtractElement(zeros, index), - ""); - res = LLVMBuildInsertElement(builder, res, cond, index, ""); - } - } -#endif + assert(lp_check_value(type, a)); + assert(lp_check_value(type, b)); + + if(func == PIPE_FUNC_NEVER) + return zeros; + if(func == PIPE_FUNC_ALWAYS) + return ones; + + assert(func > PIPE_FUNC_NEVER); + assert(func < PIPE_FUNC_ALWAYS); + +#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64) + /* + * There are no unsigned integer comparison instructions in SSE. + */ + + if (!type.floating && !type.sign && + type.width * type.length == 128 && + util_cpu_caps.has_sse2 && + (func == PIPE_FUNC_LESS || + func == PIPE_FUNC_LEQUAL || + func == PIPE_FUNC_GREATER || + func == PIPE_FUNC_GEQUAL) && + (gallivm_debug & GALLIVM_DEBUG_PERF)) { + debug_printf("%s: inefficient <%u x i%u> unsigned comparison\n", + __FUNCTION__, type.length, type.width); } +#endif - return res; + return lp_build_compare_ext(gallivm, type, func, a, b, FALSE); } - +/** + * Build code to compare two values 'a' and 'b' using the given func. + * \param func one of PIPE_FUNC_x + * If the operands are floating point numbers, the function will use + * ordered comparison which means that it will return true if both + * operands are not a NaN and the specified condition evaluates to true. + * The result values will be 0 for false or ~0 for true. + */ +LLVMValueRef +lp_build_cmp_ordered(struct lp_build_context *bld, + unsigned func, + LLVMValueRef a, + LLVMValueRef b) +{ + return lp_build_compare_ext(bld->gallivm, bld->type, func, a, b, TRUE); +} /** * Build code to compare two values 'a' and 'b' using the given func. * \param func one of PIPE_FUNC_x + * If the operands are floating point numbers, the function will use + * unordered comparison which means that it will return true if either + * operand is a NaN or the specified condition evaluates to true. * The result values will be 0 for false or ~0 for true. */ LLVMValueRef @@ -397,6 +257,7 @@ lp_build_select_bitwise(struct lp_build_context *bld, LLVMBuilderRef builder = bld->gallivm->builder; struct lp_type type = bld->type; LLVMValueRef res; + LLVMTypeRef int_vec_type = lp_build_int_vec_type(bld->gallivm, type); assert(lp_check_value(type, a)); assert(lp_check_value(type, b)); @@ -406,11 +267,12 @@ lp_build_select_bitwise(struct lp_build_context *bld, } if(type.floating) { - LLVMTypeRef int_vec_type = lp_build_int_vec_type(bld->gallivm, type); a = LLVMBuildBitCast(builder, a, int_vec_type, ""); b = LLVMBuildBitCast(builder, b, int_vec_type, ""); } + if (type.width > 32) + mask = LLVMBuildSExt(builder, mask, int_vec_type, ""); a = LLVMBuildAnd(builder, a, mask, ""); /* This often gets translated to PANDN, but sometimes the NOT is @@ -458,37 +320,40 @@ lp_build_select(struct lp_build_context *bld, mask = LLVMBuildTrunc(builder, mask, LLVMInt1TypeInContext(lc), ""); res = LLVMBuildSelect(builder, mask, a, b, ""); } - else if (0) { + else if (LLVMIsConstant(mask) || + LLVMGetInstructionOpcode(mask) == LLVMSExt) { /* Generate a vector select. * - * XXX: Using vector selects would avoid emitting intrinsics, but they aren't - * properly supported yet. - * - * LLVM 3.0 includes experimental support provided the -promote-elements - * options is passed to LLVM's command line (e.g., via - * llvm::cl::ParseCommandLineOptions), but resulting code quality is much - * worse, probably because some optimization passes don't know how to - * handle vector selects. - * - * See also: - * - http://lists.cs.uiuc.edu/pipermail/llvmdev/2011-October/043659.html + * Using vector selects should avoid emitting intrinsics hence avoid + * hindering optimization passes, but vector selects weren't properly + * supported yet for a long time, and LLVM will generate poor code when + * the mask is not the result of a comparison. + * XXX: Even if the instruction was an SExt, this may still produce + * terrible code. Try piglit stencil-twoside. */ /* Convert the mask to a vector of booleans. - * XXX: There are two ways to do this. Decide what's best. + * + * XXX: In x86 the mask is controlled by the MSB, so if we shifted the + * mask by `type.width - 1`, LLVM should realize the mask is ready. Alas + * what really happens is that LLVM will emit two shifts back to back. */ - if (1) { - LLVMTypeRef bool_vec_type = LLVMVectorType(LLVMInt1TypeInContext(lc), type.length); - mask = LLVMBuildTrunc(builder, mask, bool_vec_type, ""); - } else { - mask = LLVMBuildICmp(builder, LLVMIntNE, mask, LLVMConstNull(bld->int_vec_type), ""); + if (0) { + LLVMValueRef shift = LLVMConstInt(bld->int_elem_type, bld->type.width - 1, 0); + shift = lp_build_broadcast(bld->gallivm, bld->int_vec_type, shift); + mask = LLVMBuildLShr(builder, mask, shift, ""); } + LLVMTypeRef bool_vec_type = LLVMVectorType(LLVMInt1TypeInContext(lc), type.length); + mask = LLVMBuildTrunc(builder, mask, bool_vec_type, ""); + res = LLVMBuildSelect(builder, mask, a, b, ""); } else if (((util_cpu_caps.has_sse4_1 && type.width * type.length == 128) || (util_cpu_caps.has_avx && - type.width * type.length == 256 && type.width >= 32)) && + type.width * type.length == 256 && type.width >= 32) || + (util_cpu_caps.has_avx2 && + type.width * type.length == 256)) && !LLVMIsConstant(a) && !LLVMIsConstant(b) && !LLVMIsConstant(mask)) { @@ -496,6 +361,11 @@ lp_build_select(struct lp_build_context *bld, LLVMTypeRef arg_type; LLVMValueRef args[3]; + LLVMTypeRef mask_type = LLVMGetElementType(LLVMTypeOf(mask)); + if (LLVMGetIntTypeWidth(mask_type) != type.width) { + LLVMTypeRef int_vec_type = LLVMVectorType(LLVMIntTypeInContext(lc, type.width), type.length); + mask = LLVMBuildSExt(builder, mask, int_vec_type, ""); + } /* * There's only float blend in AVX but can just cast i32/i64 * to float. @@ -505,9 +375,13 @@ lp_build_select(struct lp_build_context *bld, intrinsic = "llvm.x86.avx.blendv.pd.256"; arg_type = LLVMVectorType(LLVMDoubleTypeInContext(lc), 4); } - else { + else if (type.width == 32) { intrinsic = "llvm.x86.avx.blendv.ps.256"; arg_type = LLVMVectorType(LLVMFloatTypeInContext(lc), 8); + } else { + assert(util_cpu_caps.has_avx2); + intrinsic = "llvm.x86.avx2.pblendvb"; + arg_type = LLVMVectorType(LLVMInt8TypeInContext(lc), 32); } } else if (type.floating && @@ -537,7 +411,7 @@ lp_build_select(struct lp_build_context *bld, args[2] = mask; res = lp_build_intrinsic(builder, intrinsic, - arg_type, args, Elements(args)); + arg_type, args, ARRAY_SIZE(args), 0); if (arg_type != bld->vec_type) { res = LLVMBuildBitCast(builder, res, bld->vec_type, "");