gallivm: Prefer backend agnostic intrinsic for rounding.
authorJose Fonseca <jfonseca@vmware.com>
Fri, 1 Apr 2016 10:50:28 +0000 (11:50 +0100)
committerJose Fonseca <jfonseca@vmware.com>
Sun, 3 Apr 2016 21:09:07 +0000 (22:09 +0100)
We could unconditionally use these instrinsics, but performance with SSE2
would suck, as LLVM falls back to calling libm.

lp_test_arit.

Reviewed-by: Roland Scheidegger <sroland@vmware.com>
src/gallium/auxiliary/gallivm/lp_bld_arit.c

index 01c6ba96e3f284d8a3f7641e6098c9735ec55b89..12777436d7c9908f1d482b33bef1f195e8bfb08a 100644 (file)
@@ -1675,13 +1675,13 @@ enum lp_build_round_mode
  * result is the even value.  That is, rounding 2.5 will be 2.0, and not 3.0.
  */
 static inline LLVMValueRef
-lp_build_round_sse41(struct lp_build_context *bld,
-                     LLVMValueRef a,
-                     enum lp_build_round_mode mode)
+lp_build_nearest_sse41(struct lp_build_context *bld,
+                       LLVMValueRef a)
 {
    LLVMBuilderRef builder = bld->gallivm->builder;
    const struct lp_type type = bld->type;
    LLVMTypeRef i32t = LLVMInt32TypeInContext(bld->gallivm->context);
+   LLVMValueRef mode = LLVMConstNull(i32t);
    const char *intrinsic;
    LLVMValueRef res;
 
@@ -1714,7 +1714,7 @@ lp_build_round_sse41(struct lp_build_context *bld,
 
       args[0] = undef;
       args[1] = LLVMBuildInsertElement(builder, undef, a, index0, "");
-      args[2] = LLVMConstInt(i32t, mode, 0);
+      args[2] = mode;
 
       res = lp_build_intrinsic(builder, intrinsic,
                                vec_type, args, Elements(args), 0);
@@ -1754,7 +1754,7 @@ lp_build_round_sse41(struct lp_build_context *bld,
 
       res = lp_build_intrinsic_binary(builder, intrinsic,
                                       bld->vec_type, a,
-                                      LLVMConstInt(i32t, mode, 0));
+                                      mode);
    }
 
    return res;
@@ -1856,8 +1856,40 @@ lp_build_round_arch(struct lp_build_context *bld,
                     LLVMValueRef a,
                     enum lp_build_round_mode mode)
 {
-   if (util_cpu_caps.has_sse4_1)
-     return lp_build_round_sse41(bld, a, mode);
+   if (util_cpu_caps.has_sse4_1) {
+      LLVMBuilderRef builder = bld->gallivm->builder;
+      const struct lp_type type = bld->type;
+      const char *intrinsic_root;
+      char intrinsic[32];
+
+      assert(type.floating);
+      assert(lp_check_value(type, a));
+      (void)type;
+
+      switch (mode) {
+      case LP_BUILD_ROUND_NEAREST:
+         if (HAVE_LLVM >= 0x0304) {
+            intrinsic_root = "llvm.round";
+         } else {
+            return lp_build_nearest_sse41(bld, a);
+         }
+         break;
+      case LP_BUILD_ROUND_FLOOR:
+         intrinsic_root = "llvm.floor";
+         break;
+      case LP_BUILD_ROUND_CEIL:
+         intrinsic_root = "llvm.ceil";
+         break;
+      case LP_BUILD_ROUND_TRUNCATE:
+         intrinsic_root = "llvm.trunc";
+         break;
+      }
+
+      util_snprintf(intrinsic, sizeof intrinsic, "%s.v%uf%u",
+                    intrinsic_root, type.length, type.width);
+
+      return lp_build_intrinsic_unary(builder, intrinsic, bld->vec_type, a);
+   }
    else /* (util_cpu_caps.has_altivec) */
      return lp_build_round_altivec(bld, a, mode);
 }