X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fgallium%2Fauxiliary%2Fgallivm%2Flp_bld_logic.c;h=fc7a7289707bba4f4d1c95f2db764be38c09816f;hb=0ac316470813b4f2e825ff4befbbf2135cccce94;hp=f7e6fbaff1a2aaad405167986db40fb0bcd1610a;hpb=6299f241e9fdd86e705d144a42d9b1979c13f9ad;p=mesa.git diff --git a/src/gallium/auxiliary/gallivm/lp_bld_logic.c b/src/gallium/auxiliary/gallivm/lp_bld_logic.c index f7e6fbaff1a..fc7a7289707 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_logic.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_logic.c @@ -52,8 +52,8 @@ * * select <4 x i1> %C, %A, %B * - * is valid IR (e.g. llvm/test/Assembler/vector-select.ll), but it is not - * supported on any backend. + * is valid IR (e.g. llvm/test/Assembler/vector-select.ll), but it is only + * supported on some backends (x86) starting with llvm 3.1. * * Expanding the boolean vector to full SIMD register width, as in * @@ -65,6 +65,157 @@ */ +/** + * Build code to compare two values 'a' and 'b' of 'type' using the given func. + * \param func one of PIPE_FUNC_x + * If the ordered argument is true the function will use LLVM's ordered + * comparisons, otherwise unordered comparisons will be used. + * The result values will be 0 for false or ~0 for true. + */ +static LLVMValueRef +lp_build_compare_ext(struct gallivm_state *gallivm, + const struct lp_type type, + unsigned func, + LLVMValueRef a, + LLVMValueRef b, + boolean ordered) +{ + LLVMBuilderRef builder = gallivm->builder; + LLVMTypeRef int_vec_type = lp_build_int_vec_type(gallivm, type); + LLVMValueRef zeros = LLVMConstNull(int_vec_type); + LLVMValueRef ones = LLVMConstAllOnes(int_vec_type); + LLVMValueRef cond; + LLVMValueRef res; + + assert(func >= PIPE_FUNC_NEVER); + assert(func <= PIPE_FUNC_ALWAYS); + assert(lp_check_value(type, a)); + assert(lp_check_value(type, b)); + + if(func == PIPE_FUNC_NEVER) + return zeros; + if(func == PIPE_FUNC_ALWAYS) + return ones; + + if(type.floating) { + LLVMRealPredicate op; + switch(func) { + case PIPE_FUNC_EQUAL: + op = ordered ? LLVMRealOEQ : LLVMRealUEQ; + break; + case PIPE_FUNC_NOTEQUAL: + op = ordered ? LLVMRealONE : LLVMRealUNE; + break; + case PIPE_FUNC_LESS: + op = ordered ? LLVMRealOLT : LLVMRealULT; + break; + case PIPE_FUNC_LEQUAL: + op = ordered ? LLVMRealOLE : LLVMRealULE; + break; + case PIPE_FUNC_GREATER: + op = ordered ? LLVMRealOGT : LLVMRealUGT; + break; + case PIPE_FUNC_GEQUAL: + op = ordered ? LLVMRealOGE : LLVMRealUGE; + break; + default: + assert(0); + return lp_build_undef(gallivm, type); + } + +#if HAVE_LLVM >= 0x0207 + cond = LLVMBuildFCmp(builder, op, a, b, ""); + res = LLVMBuildSExt(builder, cond, int_vec_type, ""); +#else + if (type.length == 1) { + cond = LLVMBuildFCmp(builder, op, a, b, ""); + res = LLVMBuildSExt(builder, cond, int_vec_type, ""); + } + else { + unsigned i; + + res = LLVMGetUndef(int_vec_type); + + debug_printf("%s: warning: using slow element-wise float" + " vector comparison\n", __FUNCTION__); + for (i = 0; i < type.length; ++i) { + LLVMValueRef index = lp_build_const_int32(gallivm, i); + cond = LLVMBuildFCmp(builder, op, + LLVMBuildExtractElement(builder, a, index, ""), + LLVMBuildExtractElement(builder, b, index, ""), + ""); + cond = LLVMBuildSelect(builder, cond, + LLVMConstExtractElement(ones, index), + LLVMConstExtractElement(zeros, index), + ""); + res = LLVMBuildInsertElement(builder, res, cond, index, ""); + } + } +#endif + } + else { + LLVMIntPredicate op; + switch(func) { + case PIPE_FUNC_EQUAL: + op = LLVMIntEQ; + break; + case PIPE_FUNC_NOTEQUAL: + op = LLVMIntNE; + break; + case PIPE_FUNC_LESS: + op = type.sign ? LLVMIntSLT : LLVMIntULT; + break; + case PIPE_FUNC_LEQUAL: + op = type.sign ? LLVMIntSLE : LLVMIntULE; + break; + case PIPE_FUNC_GREATER: + op = type.sign ? LLVMIntSGT : LLVMIntUGT; + break; + case PIPE_FUNC_GEQUAL: + op = type.sign ? LLVMIntSGE : LLVMIntUGE; + break; + default: + assert(0); + return lp_build_undef(gallivm, type); + } + +#if HAVE_LLVM >= 0x0207 + cond = LLVMBuildICmp(builder, op, a, b, ""); + res = LLVMBuildSExt(builder, cond, int_vec_type, ""); +#else + if (type.length == 1) { + cond = LLVMBuildICmp(builder, op, a, b, ""); + res = LLVMBuildSExt(builder, cond, int_vec_type, ""); + } + else { + unsigned i; + + res = LLVMGetUndef(int_vec_type); + + if (gallivm_debug & GALLIVM_DEBUG_PERF) { + debug_printf("%s: using slow element-wise int" + " vector comparison\n", __FUNCTION__); + } + + for(i = 0; i < type.length; ++i) { + LLVMValueRef index = lp_build_const_int32(gallivm, i); + cond = LLVMBuildICmp(builder, op, + LLVMBuildExtractElement(builder, a, index, ""), + LLVMBuildExtractElement(builder, b, index, ""), + ""); + cond = LLVMBuildSelect(builder, cond, + LLVMConstExtractElement(ones, index), + LLVMConstExtractElement(zeros, index), + ""); + res = LLVMBuildInsertElement(builder, res, cond, index, ""); + } + } +#endif + } + + return res; +} + /** * Build code to compare two values 'a' and 'b' of 'type' using the given func. * \param func one of PIPE_FUNC_x @@ -77,12 +228,9 @@ lp_build_compare(struct gallivm_state *gallivm, LLVMValueRef a, LLVMValueRef b) { - LLVMBuilderRef builder = gallivm->builder; LLVMTypeRef int_vec_type = lp_build_int_vec_type(gallivm, type); LLVMValueRef zeros = LLVMConstNull(int_vec_type); LLVMValueRef ones = LLVMConstAllOnes(int_vec_type); - LLVMValueRef cond; - LLVMValueRef res; assert(func >= PIPE_FUNC_NEVER); assert(func <= PIPE_FUNC_ALWAYS); @@ -115,6 +263,9 @@ lp_build_compare(struct gallivm_state *gallivm, #if HAVE_LLVM < 0x0207 #if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64) if(type.width * type.length == 128) { + LLVMBuilderRef builder = gallivm->builder; + LLVMValueRef cond; + LLVMValueRef res; if(type.floating && util_cpu_caps.has_sse) { /* float[4] comparison */ LLVMTypeRef vec_type = lp_build_vec_type(gallivm, type); @@ -241,138 +392,32 @@ lp_build_compare(struct gallivm_state *gallivm, #endif #endif /* HAVE_LLVM < 0x0207 */ - /* XXX: It is not clear if we should use the ordered or unordered operators */ - - if(type.floating) { - LLVMRealPredicate op; - switch(func) { - case PIPE_FUNC_NEVER: - op = LLVMRealPredicateFalse; - break; - case PIPE_FUNC_ALWAYS: - op = LLVMRealPredicateTrue; - break; - case PIPE_FUNC_EQUAL: - op = LLVMRealUEQ; - break; - case PIPE_FUNC_NOTEQUAL: - op = LLVMRealUNE; - break; - case PIPE_FUNC_LESS: - op = LLVMRealULT; - break; - case PIPE_FUNC_LEQUAL: - op = LLVMRealULE; - break; - case PIPE_FUNC_GREATER: - op = LLVMRealUGT; - break; - case PIPE_FUNC_GEQUAL: - op = LLVMRealUGE; - break; - default: - assert(0); - return lp_build_undef(gallivm, type); - } - -#if HAVE_LLVM >= 0x0207 - cond = LLVMBuildFCmp(builder, op, a, b, ""); - res = LLVMBuildSExt(builder, cond, int_vec_type, ""); -#else - if (type.length == 1) { - cond = LLVMBuildFCmp(builder, op, a, b, ""); - res = LLVMBuildSExt(builder, cond, int_vec_type, ""); - } - else { - unsigned i; - - res = LLVMGetUndef(int_vec_type); - - debug_printf("%s: warning: using slow element-wise float" - " vector comparison\n", __FUNCTION__); - for (i = 0; i < type.length; ++i) { - LLVMValueRef index = lp_build_const_int32(gallivm, i); - cond = LLVMBuildFCmp(builder, op, - LLVMBuildExtractElement(builder, a, index, ""), - LLVMBuildExtractElement(builder, b, index, ""), - ""); - cond = LLVMBuildSelect(builder, cond, - LLVMConstExtractElement(ones, index), - LLVMConstExtractElement(zeros, index), - ""); - res = LLVMBuildInsertElement(builder, res, cond, index, ""); - } - } -#endif - } - else { - LLVMIntPredicate op; - switch(func) { - case PIPE_FUNC_EQUAL: - op = LLVMIntEQ; - break; - case PIPE_FUNC_NOTEQUAL: - op = LLVMIntNE; - break; - case PIPE_FUNC_LESS: - op = type.sign ? LLVMIntSLT : LLVMIntULT; - break; - case PIPE_FUNC_LEQUAL: - op = type.sign ? LLVMIntSLE : LLVMIntULE; - break; - case PIPE_FUNC_GREATER: - op = type.sign ? LLVMIntSGT : LLVMIntUGT; - break; - case PIPE_FUNC_GEQUAL: - op = type.sign ? LLVMIntSGE : LLVMIntUGE; - break; - default: - assert(0); - return lp_build_undef(gallivm, type); - } - -#if HAVE_LLVM >= 0x0207 - cond = LLVMBuildICmp(builder, op, a, b, ""); - res = LLVMBuildSExt(builder, cond, int_vec_type, ""); -#else - if (type.length == 1) { - cond = LLVMBuildICmp(builder, op, a, b, ""); - res = LLVMBuildSExt(builder, cond, int_vec_type, ""); - } - else { - unsigned i; - - res = LLVMGetUndef(int_vec_type); - - if (gallivm_debug & GALLIVM_DEBUG_PERF) { - debug_printf("%s: using slow element-wise int" - " vector comparison\n", __FUNCTION__); - } - - for(i = 0; i < type.length; ++i) { - LLVMValueRef index = lp_build_const_int32(gallivm, i); - cond = LLVMBuildICmp(builder, op, - LLVMBuildExtractElement(builder, a, index, ""), - LLVMBuildExtractElement(builder, b, index, ""), - ""); - cond = LLVMBuildSelect(builder, cond, - LLVMConstExtractElement(ones, index), - LLVMConstExtractElement(zeros, index), - ""); - res = LLVMBuildInsertElement(builder, res, cond, index, ""); - } - } -#endif - } - - return res; + return lp_build_compare_ext(gallivm, type, func, a, b, FALSE); } - +/** + * Build code to compare two values 'a' and 'b' using the given func. + * \param func one of PIPE_FUNC_x + * If the operands are floating point numbers, the function will use + * ordered comparison which means that it will return true if both + * operands are not a NaN and the specified condition evaluates to true. + * The result values will be 0 for false or ~0 for true. + */ +LLVMValueRef +lp_build_cmp_ordered(struct lp_build_context *bld, + unsigned func, + LLVMValueRef a, + LLVMValueRef b) +{ + return lp_build_compare_ext(bld->gallivm, bld->type, func, a, b, TRUE); +} /** * Build code to compare two values 'a' and 'b' using the given func. * \param func one of PIPE_FUNC_x + * If the operands are floating point numbers, the function will use + * unordered comparison which means that it will return true if either + * operand is a NaN or the specified condition evaluates to true. * The result values will be 0 for false or ~0 for true. */ LLVMValueRef @@ -458,8 +503,39 @@ lp_build_select(struct lp_build_context *bld, mask = LLVMBuildTrunc(builder, mask, LLVMInt1TypeInContext(lc), ""); res = LLVMBuildSelect(builder, mask, a, b, ""); } - else if (util_cpu_caps.has_sse4_1 && - type.width * type.length == 128 && + else if (0) { + /* Generate a vector select. + * + * XXX: Using vector selects would avoid emitting intrinsics, but they aren't + * properly supported yet. + * + * LLVM 3.1 supports it, but it yields buggy code (e.g. lp_blend_test). + * + * LLVM 3.0 includes experimental support provided the -promote-elements + * options is passed to LLVM's command line (e.g., via + * llvm::cl::ParseCommandLineOptions), but resulting code quality is much + * worse, probably because some optimization passes don't know how to + * handle vector selects. + * + * See also: + * - http://lists.cs.uiuc.edu/pipermail/llvmdev/2011-October/043659.html + */ + + /* Convert the mask to a vector of booleans. + * XXX: There are two ways to do this. Decide what's best. + */ + if (1) { + LLVMTypeRef bool_vec_type = LLVMVectorType(LLVMInt1TypeInContext(lc), type.length); + mask = LLVMBuildTrunc(builder, mask, bool_vec_type, ""); + } else { + mask = LLVMBuildICmp(builder, LLVMIntNE, mask, LLVMConstNull(bld->int_vec_type), ""); + } + res = LLVMBuildSelect(builder, mask, a, b, ""); + } + else if (((util_cpu_caps.has_sse4_1 && + type.width * type.length == 128) || + (util_cpu_caps.has_avx && + type.width * type.length == 256 && type.width >= 32)) && !LLVMIsConstant(a) && !LLVMIsConstant(b) && !LLVMIsConstant(mask)) { @@ -467,8 +543,22 @@ lp_build_select(struct lp_build_context *bld, LLVMTypeRef arg_type; LLVMValueRef args[3]; - if (type.floating && - type.width == 64) { + /* + * There's only float blend in AVX but can just cast i32/i64 + * to float. + */ + if (type.width * type.length == 256) { + if (type.width == 64) { + intrinsic = "llvm.x86.avx.blendv.pd.256"; + arg_type = LLVMVectorType(LLVMDoubleTypeInContext(lc), 4); + } + else { + intrinsic = "llvm.x86.avx.blendv.ps.256"; + arg_type = LLVMVectorType(LLVMFloatTypeInContext(lc), 8); + } + } + else if (type.floating && + type.width == 64) { intrinsic = "llvm.x86.sse41.blendvpd"; arg_type = LLVMVectorType(LLVMDoubleTypeInContext(lc), 2); } else if (type.floating && @@ -517,7 +607,8 @@ LLVMValueRef lp_build_select_aos(struct lp_build_context *bld, unsigned mask, LLVMValueRef a, - LLVMValueRef b) + LLVMValueRef b, + unsigned num_channels) { LLVMBuilderRef builder = bld->gallivm->builder; const struct lp_type type = bld->type; @@ -538,13 +629,11 @@ lp_build_select_aos(struct lp_build_context *bld, return bld->undef; /* - * There are three major ways of accomplishing this: - * - with a shuffle, - * - with a select, - * - or with a bit mask. + * There are two major ways of accomplishing this: + * - with a shuffle + * - with a select * - * Select isn't supported for vector types yet. - * The flip between these is empirical and might need to be. + * The flip between these is empirical and might need to be adjusted. */ if (n <= 4) { /* @@ -553,8 +642,8 @@ lp_build_select_aos(struct lp_build_context *bld, LLVMTypeRef elem_type = LLVMInt32TypeInContext(bld->gallivm->context); LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH]; - for(j = 0; j < n; j += 4) - for(i = 0; i < 4; ++i) + for(j = 0; j < n; j += num_channels) + for(i = 0; i < num_channels; ++i) shuffles[j + i] = LLVMConstInt(elem_type, (mask & (1 << i) ? 0 : n) + j + i, 0); @@ -562,21 +651,39 @@ lp_build_select_aos(struct lp_build_context *bld, return LLVMBuildShuffleVector(builder, a, b, LLVMConstVector(shuffles, n), ""); } else { -#if 0 - /* XXX: Unfortunately select of vectors do not work */ - /* Use a select */ - LLVMTypeRef elem_type = LLVMInt1Type(); - LLVMValueRef cond_vec[LP_MAX_VECTOR_LENGTH]; - - for(j = 0; j < n; j += 4) - for(i = 0; i < 4; ++i) - cond_vec[j + i] = LLVMConstInt(elem_type, - mask & (1 << i) ? 1 : 0, 0); - - return LLVMBuildSelect(builder, LLVMConstVector(cond_vec, n), a, b, ""); -#else - LLVMValueRef mask_vec = lp_build_const_mask_aos(bld->gallivm, type, mask); + LLVMValueRef mask_vec = lp_build_const_mask_aos(bld->gallivm, type, mask, num_channels); return lp_build_select(bld, mask_vec, a, b); -#endif } } + + +/** + * Return (scalar-cast)val ? true : false; + */ +LLVMValueRef +lp_build_any_true_range(struct lp_build_context *bld, + unsigned real_length, + LLVMValueRef val) +{ + LLVMBuilderRef builder = bld->gallivm->builder; + LLVMTypeRef scalar_type; + LLVMTypeRef true_type; + + assert(real_length <= bld->type.length); + + true_type = LLVMIntTypeInContext(bld->gallivm->context, + bld->type.width * real_length); + scalar_type = LLVMIntTypeInContext(bld->gallivm->context, + bld->type.width * bld->type.length); + val = LLVMBuildBitCast(builder, val, scalar_type, ""); + /* + * We're using always native types so we can use intrinsics. + * However, if we don't do per-element calculations, we must ensure + * the excess elements aren't used since they may contain garbage. + */ + if (real_length < bld->type.length) { + val = LLVMBuildTrunc(builder, val, true_type, ""); + } + return LLVMBuildICmp(builder, LLVMIntNE, + val, LLVMConstNull(true_type), ""); +}