From b284f1f7f9afa560d6f4c0863681a7e36913955c Mon Sep 17 00:00:00 2001 From: Jose Fonseca Date: Sat, 2 Apr 2016 15:13:38 +0100 Subject: [PATCH] gallivm: Fix performance regressions due to vector selects. LLVM often can't determine the mask elements are all ones/zeros, and there doesn't seem to be a good way to hint that. Thanks to Roland Scheidegger for spotting and analyzing the issue. Reviewed-by: Roland Scheidegger --- src/gallium/auxiliary/gallivm/lp_bld_logic.c | 40 +++++++++----------- 1 file changed, 18 insertions(+), 22 deletions(-) diff --git a/src/gallium/auxiliary/gallivm/lp_bld_logic.c b/src/gallium/auxiliary/gallivm/lp_bld_logic.c index 5b0b6c6b234..91f316c4565 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_logic.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_logic.c @@ -39,6 +39,7 @@ #include "lp_bld_type.h" #include "lp_bld_const.h" +#include "lp_bld_swizzle.h" #include "lp_bld_init.h" #include "lp_bld_intr.h" #include "lp_bld_debug.h" @@ -314,35 +315,30 @@ lp_build_select(struct lp_build_context *bld, mask = LLVMBuildTrunc(builder, mask, LLVMInt1TypeInContext(lc), ""); res = LLVMBuildSelect(builder, mask, a, b, ""); } - else if (HAVE_LLVM >= 0x0303) { + else if (LLVMIsConstant(mask) || + LLVMGetInstructionOpcode(mask) == LLVMSExt) { /* Generate a vector select. * - * Using vector selects would avoid emitting intrinsics, but they weren't - * properly supported yet for a long time. - * - * LLVM 3.3 appears to reliably support it. - * - * LLVM 3.1 supports it, but it yields buggy code (e.g. lp_blend_test). - * - * LLVM 3.0 includes experimental support provided the -promote-elements - * options is passed to LLVM's command line (e.g., via - * llvm::cl::ParseCommandLineOptions), but resulting code quality is much - * worse, probably because some optimization passes don't know how to - * handle vector selects. - * - * See also: - * - http://lists.cs.uiuc.edu/pipermail/llvmdev/2011-October/043659.html + * Using vector selects should avoid emitting intrinsics hence avoid + * hidering optimization passes, but vector selects weren't properly + * supported yet for a long time, and LLVM will generate poor code when + * the mask is not the result of a comparison. */ /* Convert the mask to a vector of booleans. - * XXX: There are two ways to do this. Decide what's best. + * + * XXX: In x86 the mask is controlled by the MSB, so if we shifted the + * mask by `type.width - 1`, LLVM should realize the mask is ready. Alas + * what really happens is that LLVM will emit two shifts back to back. */ - if (1) { - LLVMTypeRef bool_vec_type = LLVMVectorType(LLVMInt1TypeInContext(lc), type.length); - mask = LLVMBuildTrunc(builder, mask, bool_vec_type, ""); - } else { - mask = LLVMBuildICmp(builder, LLVMIntNE, mask, LLVMConstNull(bld->int_vec_type), ""); + if (0) { + LLVMValueRef shift = LLVMConstInt(bld->int_elem_type, bld->type.width - 1, 0); + shift = lp_build_broadcast(bld->gallivm, bld->int_vec_type, shift); + mask = LLVMBuildLShr(builder, mask, shift, ""); } + LLVMTypeRef bool_vec_type = LLVMVectorType(LLVMInt1TypeInContext(lc), type.length); + mask = LLVMBuildTrunc(builder, mask, bool_vec_type, ""); + res = LLVMBuildSelect(builder, mask, a, b, ""); } else if (((util_cpu_caps.has_sse4_1 && -- 2.30.2