From cb438d8b3e1e32faf714f22b308c8f9c5506209b Mon Sep 17 00:00:00 2001 From: Roland Scheidegger Date: Wed, 13 Apr 2016 05:00:03 +0200 Subject: [PATCH] gallivm: use llvm.nearbyint instead of llvm.round. We used to use sse roundps intrinsic directly, but switched to use the llvm intrinsics for rounding with e4f01da15d8c6ce3e8c77ff3ff3d2ce2574a3f7b. However, llvm semantics follows standard math lib round function which is specced to do roundNearestAwayFromZero but we really want roundNearestEven (moreoever, using round generates atrocious code since the cpu can't do it directly and it results in scalar calls to libm __roundf). So, use llvm.nearbyint instead, which does exactly the right thing, and even has the advantage of being available with llvm 3.3 too. (I've verified it actually generates a roundps instruction with llvm 3.3.) This fixes https://bugs.freedesktop.org/show_bug.cgi?id=94909 Reviewed-by: Jose Fonseca --- src/gallium/auxiliary/gallivm/lp_bld_arit.c | 99 +-------------------- 1 file changed, 1 insertion(+), 98 deletions(-) diff --git a/src/gallium/auxiliary/gallivm/lp_bld_arit.c b/src/gallium/auxiliary/gallivm/lp_bld_arit.c index 0c43617d531..9cb745e4cfe 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_arit.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_arit.c @@ -1663,99 +1663,6 @@ enum lp_build_round_mode LP_BUILD_ROUND_TRUNCATE = 3 }; -/** - * Helper for SSE4.1's ROUNDxx instructions. - * - * NOTE: In the SSE4.1's nearest mode, if two values are equally close, the - * result is the even value. That is, rounding 2.5 will be 2.0, and not 3.0. - */ -static inline LLVMValueRef -lp_build_nearest_sse41(struct lp_build_context *bld, - LLVMValueRef a) -{ - LLVMBuilderRef builder = bld->gallivm->builder; - const struct lp_type type = bld->type; - LLVMTypeRef i32t = LLVMInt32TypeInContext(bld->gallivm->context); - LLVMValueRef mode = LLVMConstNull(i32t); - const char *intrinsic; - LLVMValueRef res; - - assert(type.floating); - - assert(lp_check_value(type, a)); - assert(util_cpu_caps.has_sse4_1); - - if (type.length == 1) { - LLVMTypeRef vec_type; - LLVMValueRef undef; - LLVMValueRef args[3]; - LLVMValueRef index0 = LLVMConstInt(i32t, 0, 0); - - switch(type.width) { - case 32: - intrinsic = "llvm.x86.sse41.round.ss"; - break; - case 64: - intrinsic = "llvm.x86.sse41.round.sd"; - break; - default: - assert(0); - return bld->undef; - } - - vec_type = LLVMVectorType(bld->elem_type, 4); - - undef = LLVMGetUndef(vec_type); - - args[0] = undef; - args[1] = LLVMBuildInsertElement(builder, undef, a, index0, ""); - args[2] = mode; - - res = lp_build_intrinsic(builder, intrinsic, - vec_type, args, Elements(args), 0); - - res = LLVMBuildExtractElement(builder, res, index0, ""); - } - else { - if (type.width * type.length == 128) { - switch(type.width) { - case 32: - intrinsic = "llvm.x86.sse41.round.ps"; - break; - case 64: - intrinsic = "llvm.x86.sse41.round.pd"; - break; - default: - assert(0); - return bld->undef; - } - } - else { - assert(type.width * type.length == 256); - assert(util_cpu_caps.has_avx); - - switch(type.width) { - case 32: - intrinsic = "llvm.x86.avx.round.ps.256"; - break; - case 64: - intrinsic = "llvm.x86.avx.round.pd.256"; - break; - default: - assert(0); - return bld->undef; - } - } - - res = lp_build_intrinsic_binary(builder, intrinsic, - bld->vec_type, a, - mode); - } - - return res; -} - - static inline LLVMValueRef lp_build_iround_nearest_sse2(struct lp_build_context *bld, LLVMValueRef a) @@ -1863,11 +1770,7 @@ lp_build_round_arch(struct lp_build_context *bld, switch (mode) { case LP_BUILD_ROUND_NEAREST: - if (HAVE_LLVM >= 0x0304) { - intrinsic_root = "llvm.round"; - } else { - return lp_build_nearest_sse41(bld, a); - } + intrinsic_root = "llvm.nearbyint"; break; case LP_BUILD_ROUND_FLOOR: intrinsic_root = "llvm.floor"; -- 2.30.2