gallivm: use llvm.nearbyint instead of llvm.round.
authorRoland Scheidegger <sroland@vmware.com>
Wed, 13 Apr 2016 03:00:03 +0000 (05:00 +0200)
committerJose Fonseca <jfonseca@vmware.com>
Wed, 13 Apr 2016 10:13:03 +0000 (11:13 +0100)
We used to use sse roundps intrinsic directly, but switched to use the llvm
intrinsics for rounding with e4f01da15d8c6ce3e8c77ff3ff3d2ce2574a3f7b.
However, llvm semantics follows standard math lib round function which is
specced to do roundNearestAwayFromZero but we really want roundNearestEven
(moreoever, using round generates atrocious code since the cpu can't do it
directly and it results in scalar calls to libm __roundf).
So, use llvm.nearbyint instead, which does exactly the right thing, and even
has the advantage of being available with llvm 3.3 too. (I've verified it
actually generates a roundps instruction with llvm 3.3.)

This fixes https://bugs.freedesktop.org/show_bug.cgi?id=94909

Reviewed-by: Jose Fonseca <jfonseca@vmware.com>
src/gallium/auxiliary/gallivm/lp_bld_arit.c

index 0c43617d5315c6c356d1c6409fe53c55caf600e5..9cb745e4cfe8779cc2fbc271478f79cc5c78b5b6 100644 (file)
@@ -1663,99 +1663,6 @@ enum lp_build_round_mode
    LP_BUILD_ROUND_TRUNCATE = 3
 };
 
-/**
- * Helper for SSE4.1's ROUNDxx instructions.
- *
- * NOTE: In the SSE4.1's nearest mode, if two values are equally close, the
- * result is the even value.  That is, rounding 2.5 will be 2.0, and not 3.0.
- */
-static inline LLVMValueRef
-lp_build_nearest_sse41(struct lp_build_context *bld,
-                       LLVMValueRef a)
-{
-   LLVMBuilderRef builder = bld->gallivm->builder;
-   const struct lp_type type = bld->type;
-   LLVMTypeRef i32t = LLVMInt32TypeInContext(bld->gallivm->context);
-   LLVMValueRef mode = LLVMConstNull(i32t);
-   const char *intrinsic;
-   LLVMValueRef res;
-
-   assert(type.floating);
-
-   assert(lp_check_value(type, a));
-   assert(util_cpu_caps.has_sse4_1);
-
-   if (type.length == 1) {
-      LLVMTypeRef vec_type;
-      LLVMValueRef undef;
-      LLVMValueRef args[3];
-      LLVMValueRef index0 = LLVMConstInt(i32t, 0, 0);
-
-      switch(type.width) {
-      case 32:
-         intrinsic = "llvm.x86.sse41.round.ss";
-         break;
-      case 64:
-         intrinsic = "llvm.x86.sse41.round.sd";
-         break;
-      default:
-         assert(0);
-         return bld->undef;
-      }
-
-      vec_type = LLVMVectorType(bld->elem_type, 4);
-
-      undef = LLVMGetUndef(vec_type);
-
-      args[0] = undef;
-      args[1] = LLVMBuildInsertElement(builder, undef, a, index0, "");
-      args[2] = mode;
-
-      res = lp_build_intrinsic(builder, intrinsic,
-                               vec_type, args, Elements(args), 0);
-
-      res = LLVMBuildExtractElement(builder, res, index0, "");
-   }
-   else {
-      if (type.width * type.length == 128) {
-         switch(type.width) {
-         case 32:
-            intrinsic = "llvm.x86.sse41.round.ps";
-            break;
-         case 64:
-            intrinsic = "llvm.x86.sse41.round.pd";
-            break;
-         default:
-            assert(0);
-            return bld->undef;
-         }
-      }
-      else {
-         assert(type.width * type.length == 256);
-         assert(util_cpu_caps.has_avx);
-
-         switch(type.width) {
-         case 32:
-            intrinsic = "llvm.x86.avx.round.ps.256";
-            break;
-         case 64:
-            intrinsic = "llvm.x86.avx.round.pd.256";
-            break;
-         default:
-            assert(0);
-            return bld->undef;
-         }
-      }
-
-      res = lp_build_intrinsic_binary(builder, intrinsic,
-                                      bld->vec_type, a,
-                                      mode);
-   }
-
-   return res;
-}
-
-
 static inline LLVMValueRef
 lp_build_iround_nearest_sse2(struct lp_build_context *bld,
                              LLVMValueRef a)
@@ -1863,11 +1770,7 @@ lp_build_round_arch(struct lp_build_context *bld,
 
       switch (mode) {
       case LP_BUILD_ROUND_NEAREST:
-         if (HAVE_LLVM >= 0x0304) {
-            intrinsic_root = "llvm.round";
-         } else {
-            return lp_build_nearest_sse41(bld, a);
-         }
+         intrinsic_root = "llvm.nearbyint";
          break;
       case LP_BUILD_ROUND_FLOOR:
          intrinsic_root = "llvm.floor";