iris: Don't enable smooth points when point sprites are enabled

[mesa.git] / src / gallium / auxiliary / gallivm / lp_bld_arit.c
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_arit.c b/src/gallium/auxiliary/gallivm/lp_bld_arit.c

index a1edd349f1f1897d0323c1215b4a3d4d300914ef..057c50ed278ce50b45dbe570373a643a942a0fb1 100644 (file)
--- a/src/gallium/auxiliary/gallivm/lp_bld_arit.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_arit.c
@@ -541,39 +541,43 @@ lp_build_add(struct lp_build_context *bld,
     assert(lp_check_value(type, a));
     assert(lp_check_value(type, b));
  
-   if(a == bld->zero)
+   if (a == bld->zero)
        return b;
-   if(b == bld->zero)
+   if (b == bld->zero)
        return a;
-   if(a == bld->undef || b == bld->undef)
+   if (a == bld->undef || b == bld->undef)
        return bld->undef;
  
-   if(bld->type.norm) {
+   if (type.norm) {
        const char *intrinsic = NULL;
  
-      if(a == bld->one || b == bld->one)
+      if (!type.sign && (a == bld->one || b == bld->one))
          return bld->one;
  
        if (!type.floating && !type.fixed) {
           if (type.width * type.length == 128) {
-            if(util_cpu_caps.has_sse2) {
-              if(type.width == 8)
-                intrinsic = type.sign ? "llvm.x86.sse2.padds.b" : "llvm.x86.sse2.paddus.b";
-              if(type.width == 16)
-                intrinsic = type.sign ? "llvm.x86.sse2.padds.w" : "llvm.x86.sse2.paddus.w";
+            if (util_cpu_caps.has_sse2) {
+               if (type.width == 8)
+                 intrinsic = type.sign ? "llvm.x86.sse2.padds.b" :
+                                         HAVE_LLVM < 0x0800 ? "llvm.x86.sse2.paddus.b" : NULL;
+               if (type.width == 16)
+                 intrinsic = type.sign ? "llvm.x86.sse2.padds.w" :
+                                         HAVE_LLVM < 0x0800 ? "llvm.x86.sse2.paddus.w" : NULL;
              } else if (util_cpu_caps.has_altivec) {
-              if(type.width == 8)
-                 intrinsic = type.sign ? "llvm.ppc.altivec.vaddsbs" : "llvm.ppc.altivec.vaddubs";
-              if(type.width == 16)
-                 intrinsic = type.sign ? "llvm.ppc.altivec.vaddshs" : "llvm.ppc.altivec.vadduhs";
+               if (type.width == 8)
+                  intrinsic = type.sign ? "llvm.ppc.altivec.vaddsbs" : "llvm.ppc.altivec.vaddubs";
+               if (type.width == 16)
+                  intrinsic = type.sign ? "llvm.ppc.altivec.vaddshs" : "llvm.ppc.altivec.vadduhs";
              }
           }
           if (type.width * type.length == 256) {
-            if(util_cpu_caps.has_avx2) {
-              if(type.width == 8)
-                intrinsic = type.sign ? "llvm.x86.avx2.padds.b" : "llvm.x86.avx2.paddus.b";
-              if(type.width == 16)
-                intrinsic = type.sign ? "llvm.x86.avx2.padds.w" : "llvm.x86.avx2.paddus.w";
+            if (util_cpu_caps.has_avx2) {
+               if (type.width == 8)
+                  intrinsic = type.sign ? "llvm.x86.avx2.padds.b" :
+                                          HAVE_LLVM < 0x0800 ? "llvm.x86.avx2.paddus.b" : NULL;
+               if (type.width == 16)
+                  intrinsic = type.sign ? "llvm.x86.avx2.padds.w" :
+                                          HAVE_LLVM < 0x0800 ? "llvm.x86.avx2.paddus.w" : NULL;
              }
           }
        }
@@ -592,8 +596,6 @@ lp_build_add(struct lp_build_context *bld,
           LLVMValueRef a_clamp_max = lp_build_min_simple(bld, a, LLVMBuildSub(builder, max_val, b, ""), GALLIVM_NAN_BEHAVIOR_UNDEFINED);
           LLVMValueRef a_clamp_min = lp_build_max_simple(bld, a, LLVMBuildSub(builder, min_val, b, ""), GALLIVM_NAN_BEHAVIOR_UNDEFINED);
           a = lp_build_select(bld, lp_build_cmp(bld, PIPE_FUNC_GREATER, b, bld->zero), a_clamp_max, a_clamp_min);
-      } else {
-         a = lp_build_min_simple(bld, a, lp_build_comp(bld, b), GALLIVM_NAN_BEHAVIOR_UNDEFINED);
        }
     }
  
@@ -612,6 +614,24 @@ lp_build_add(struct lp_build_context *bld,
     if(bld->type.norm && (bld->type.floating || bld->type.fixed))
        res = lp_build_min_simple(bld, res, bld->one, GALLIVM_NAN_BEHAVIOR_UNDEFINED);
  
+   if (type.norm && !type.floating && !type.fixed) {
+      if (!type.sign) {
+         /*
+          * newer llvm versions no longer support the intrinsics, but recognize
+          * the pattern. Since auto-upgrade of intrinsics doesn't work for jit
+          * code, it is important we match the pattern llvm uses (and pray llvm
+          * doesn't change it - and hope they decide on the same pattern for
+          * all backends supporting it...).
+          * NOTE: cmp/select does sext/trunc of the mask. Does not seem to
+          * interfere with llvm's ability to recognize the pattern but seems
+          * a bit brittle.
+          */
+         LLVMValueRef overflowed = lp_build_cmp(bld, PIPE_FUNC_GREATER, a, res);
+         res = lp_build_select(bld, overflowed,
+                               LLVMConstAllOnes(bld->int_vec_type), res);
+      }
+   }
+
     /* XXX clamp to floor of -1 or 0??? */
  
     return res;
@@ -842,39 +862,43 @@ lp_build_sub(struct lp_build_context *bld,
     assert(lp_check_value(type, a));
     assert(lp_check_value(type, b));
  
-   if(b == bld->zero)
+   if (b == bld->zero)
        return a;
-   if(a == bld->undef || b == bld->undef)
+   if (a == bld->undef || b == bld->undef)
        return bld->undef;
-   if(a == b)
+   if (a == b)
        return bld->zero;
  
-   if(bld->type.norm) {
+   if (type.norm) {
        const char *intrinsic = NULL;
  
-      if(b == bld->one)
+      if (!type.sign && b == bld->one)
          return bld->zero;
  
        if (!type.floating && !type.fixed) {
           if (type.width * type.length == 128) {
              if (util_cpu_caps.has_sse2) {
-              if(type.width == 8)
-                 intrinsic = type.sign ? "llvm.x86.sse2.psubs.b" : "llvm.x86.sse2.psubus.b";
-              if(type.width == 16)
-                 intrinsic = type.sign ? "llvm.x86.sse2.psubs.w" : "llvm.x86.sse2.psubus.w";
+               if (type.width == 8)
+                  intrinsic = type.sign ? "llvm.x86.sse2.psubs.b" :
+                                          HAVE_LLVM < 0x0800 ? "llvm.x86.sse2.psubus.b" : NULL;
+               if (type.width == 16)
+                  intrinsic = type.sign ? "llvm.x86.sse2.psubs.w" :
+                                          HAVE_LLVM < 0x0800 ? "llvm.x86.sse2.psubus.w" : NULL;
              } else if (util_cpu_caps.has_altivec) {
-              if(type.width == 8)
-                 intrinsic = type.sign ? "llvm.ppc.altivec.vsubsbs" : "llvm.ppc.altivec.vsububs";
-              if(type.width == 16)
-                 intrinsic = type.sign ? "llvm.ppc.altivec.vsubshs" : "llvm.ppc.altivec.vsubuhs";
+               if (type.width == 8)
+                  intrinsic = type.sign ? "llvm.ppc.altivec.vsubsbs" : "llvm.ppc.altivec.vsububs";
+               if (type.width == 16)
+                  intrinsic = type.sign ? "llvm.ppc.altivec.vsubshs" : "llvm.ppc.altivec.vsubuhs";
              }
           }
           if (type.width * type.length == 256) {
              if (util_cpu_caps.has_avx2) {
-              if(type.width == 8)
-                 intrinsic = type.sign ? "llvm.x86.avx2.psubs.b" : "llvm.x86.avx2.psubus.b";
-              if(type.width == 16)
-                 intrinsic = type.sign ? "llvm.x86.avx2.psubs.w" : "llvm.x86.avx2.psubus.w";
+               if (type.width == 8)
+                  intrinsic = type.sign ? "llvm.x86.avx2.psubs.b" :
+                                          HAVE_LLVM < 0x0800 ? "llvm.x86.avx2.psubus.b" : NULL;
+               if (type.width == 16)
+                  intrinsic = type.sign ? "llvm.x86.avx2.psubs.w" :
+                                          HAVE_LLVM < 0x0800 ? "llvm.x86.avx2.psubus.w" : NULL;
              }
           }
        }
@@ -894,7 +918,16 @@ lp_build_sub(struct lp_build_context *bld,
           LLVMValueRef a_clamp_min = lp_build_max_simple(bld, a, LLVMBuildAdd(builder, min_val, b, ""), GALLIVM_NAN_BEHAVIOR_UNDEFINED);
           a = lp_build_select(bld, lp_build_cmp(bld, PIPE_FUNC_GREATER, b, bld->zero), a_clamp_min, a_clamp_max);
        } else {
-         a = lp_build_max_simple(bld, a, b, GALLIVM_NAN_BEHAVIOR_UNDEFINED);
+         /*
+          * This must match llvm pattern for saturated unsigned sub.
+          * (lp_build_max_simple actually does the job with its current
+          * definition but do it explicitly here.)
+          * NOTE: cmp/select does sext/trunc of the mask. Does not seem to
+          * interfere with llvm's ability to recognize the pattern but seems
+          * a bit brittle.
+          */
+         LLVMValueRef no_ov = lp_build_cmp(bld, PIPE_FUNC_GREATER, a, b);
+         a = lp_build_select(bld, no_ov, a, b);
        }
     }
  
@@ -963,7 +996,7 @@ lp_build_sub(struct lp_build_context *bld,
   * @sa Michael Herf, The "double blend trick", May 2000, 
   *     http://www.stereopsis.com/doubleblend.html
   */
-static LLVMValueRef
+LLVMValueRef
  lp_build_mul_norm(struct gallivm_state *gallivm,
                    struct lp_type wide_type,
                    LLVMValueRef a, LLVMValueRef b)
@@ -1307,7 +1340,7 @@ lp_build_mul_imm(struct lp_build_context *bld,
     if(b == 2 && bld->type.floating)
        return lp_build_add(bld, a, a);
  
-   if(util_is_power_of_two(b)) {
+   if(util_is_power_of_two_or_zero(b)) {
        unsigned shift = ffs(b) - 1;
  
        if(bld->type.floating) {
@@ -1959,6 +1992,8 @@ arch_rounding_available(const struct lp_type type)
     else if ((util_cpu_caps.has_altivec &&
              (type.width == 32 && type.length == 4)))
        return TRUE;
+   else if (util_cpu_caps.has_neon)
+      return TRUE;
  
     return FALSE;
  }
@@ -2066,7 +2101,7 @@ lp_build_round_arch(struct lp_build_context *bld,
                      LLVMValueRef a,
                      enum lp_build_round_mode mode)
  {
-   if (util_cpu_caps.has_sse4_1) {
+   if (util_cpu_caps.has_sse4_1 || util_cpu_caps.has_neon) {
        LLVMBuilderRef builder = bld->gallivm->builder;
        const struct lp_type type = bld->type;
        const char *intrinsic_root;
@@ -2444,7 +2479,7 @@ lp_build_iround(struct lp_build_context *bld,
     else {
        LLVMValueRef half;
  
-      half = lp_build_const_vec(bld->gallivm, type, 0.5);
+      half = lp_build_const_vec(bld->gallivm, type, nextafterf(0.5, 0.0));
  
        if (type.sign) {
           LLVMTypeRef vec_type = bld->vec_type;