draw: hack around weird primitive id input in gs

[mesa.git] / src / gallium / auxiliary / gallivm / lp_bld_arit.c
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_arit.c b/src/gallium/auxiliary/gallivm/lp_bld_arit.c

index 09107ff71382b22537ca7a4ab6e2b204740f8ae9..3d341442708ae0a1e3140a048cb7ae9226def9ed 100644 (file)
--- a/src/gallium/auxiliary/gallivm/lp_bld_arit.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_arit.c
@@ -64,6 +64,17 @@
  #include "lp_bld_arit.h"
  #include "lp_bld_flow.h"
  
+#if defined(PIPE_ARCH_SSE)
+#include <xmmintrin.h>
+#endif
+
+#ifndef _MM_DENORMALS_ZERO_MASK
+#define _MM_DENORMALS_ZERO_MASK 0x0040
+#endif
+
+#ifndef _MM_FLUSH_ZERO_MASK
+#define _MM_FLUSH_ZERO_MASK 0x8000
+#endif
  
  #define EXP_POLY_DEGREE 5
  
@@ -123,8 +134,10 @@ lp_build_min_simple(struct lp_build_context *bld,
        }
     }
     else if (type.floating && util_cpu_caps.has_altivec) {
-      debug_printf("%s: altivec doesn't support nan behavior modes\n",
-                   __FUNCTION__);
+      if (nan_behavior == GALLIVM_NAN_RETURN_NAN) {
+         debug_printf("%s: altivec doesn't support nan return nan behavior\n",
+                      __FUNCTION__);
+      }
        if (type.width == 32 && type.length == 4) {
           intrinsic = "llvm.ppc.altivec.vminfp";
           intr_size = 128;
@@ -159,8 +172,6 @@ lp_build_min_simple(struct lp_build_context *bld,
        }
     } else if (util_cpu_caps.has_altivec) {
        intr_size = 128;
-      debug_printf("%s: altivec doesn't support nan behavior modes\n",
-                   __FUNCTION__);
        if (type.width == 8) {
           if (!type.sign) {
              intrinsic = "llvm.ppc.altivec.vminub";
@@ -191,7 +202,7 @@ lp_build_min_simple(struct lp_build_context *bld,
         */
        if (util_cpu_caps.has_sse && type.floating &&
            nan_behavior != GALLIVM_NAN_BEHAVIOR_UNDEFINED &&
-          nan_behavior != GALLIVM_NAN_RETURN_SECOND) {
+          nan_behavior != GALLIVM_NAN_RETURN_OTHER_SECOND_NONNAN) {
           LLVMValueRef isnan, max;
           max = lp_build_intrinsic_binary_anylength(bld->gallivm, intrinsic,
                                                     type,
@@ -227,7 +238,7 @@ lp_build_min_simple(struct lp_build_context *bld,
           return lp_build_select(bld, cond, a, b);
        }
           break;
-      case GALLIVM_NAN_RETURN_SECOND:
+      case GALLIVM_NAN_RETURN_OTHER_SECOND_NONNAN:
           cond = lp_build_cmp_ordered(bld, PIPE_FUNC_LESS, a, b);
           return lp_build_select(bld, cond, a, b);
        case GALLIVM_NAN_BEHAVIOR_UNDEFINED:
@@ -299,8 +310,10 @@ lp_build_max_simple(struct lp_build_context *bld,
        }
     }
     else if (type.floating && util_cpu_caps.has_altivec) {
-      debug_printf("%s: altivec doesn't support nan behavior modes\n",
-                   __FUNCTION__);
+      if (nan_behavior == GALLIVM_NAN_RETURN_NAN) {
+         debug_printf("%s: altivec doesn't support nan return nan behavior\n",
+                      __FUNCTION__);
+      }
        if (type.width == 32 || type.length == 4) {
           intrinsic = "llvm.ppc.altivec.vmaxfp";
           intr_size = 128;
@@ -336,8 +349,6 @@ lp_build_max_simple(struct lp_build_context *bld,
        }
     } else if (util_cpu_caps.has_altivec) {
       intr_size = 128;
-     debug_printf("%s: altivec doesn't support nan behavior modes\n",
-                  __FUNCTION__);
       if (type.width == 8) {
         if (!type.sign) {
           intrinsic = "llvm.ppc.altivec.vmaxub";
@@ -362,7 +373,7 @@ lp_build_max_simple(struct lp_build_context *bld,
     if(intrinsic) {
        if (util_cpu_caps.has_sse && type.floating &&
            nan_behavior != GALLIVM_NAN_BEHAVIOR_UNDEFINED &&
-          nan_behavior != GALLIVM_NAN_RETURN_SECOND) {
+          nan_behavior != GALLIVM_NAN_RETURN_OTHER_SECOND_NONNAN) {
           LLVMValueRef isnan, min;
           min = lp_build_intrinsic_binary_anylength(bld->gallivm, intrinsic,
                                                     type,
@@ -398,7 +409,7 @@ lp_build_max_simple(struct lp_build_context *bld,
           return lp_build_select(bld, cond, a, b);
        }
           break;
-      case GALLIVM_NAN_RETURN_SECOND:
+      case GALLIVM_NAN_RETURN_OTHER_SECOND_NONNAN:
           cond = lp_build_cmp_ordered(bld, PIPE_FUNC_GREATER, a, b);
           return lp_build_select(bld, cond, a, b);
        case GALLIVM_NAN_BEHAVIOR_UNDEFINED:
@@ -1399,6 +1410,7 @@ lp_build_max_ext(struct lp_build_context *bld,
  
  /**
   * Generate clamp(a, min, max)
+ * NaN behavior (for any of a, min, max) is undefined.
   * Do checks for special cases.
   */
  LLVMValueRef
@@ -1417,6 +1429,20 @@ lp_build_clamp(struct lp_build_context *bld,
  }
  
  
+/**
+ * Generate clamp(a, 0, 1)
+ * A NaN will get converted to zero.
+ */
+LLVMValueRef
+lp_build_clamp_zero_one_nanzero(struct lp_build_context *bld,
+                                LLVMValueRef a)
+{
+   a = lp_build_max_ext(bld, a, bld->zero, GALLIVM_NAN_RETURN_OTHER_SECOND_NONNAN);
+   a = lp_build_min(bld, a, bld->one);
+   return a;
+}
+
+
  /**
   * Generate abs(a)
   */
@@ -1473,11 +1499,9 @@ lp_build_negate(struct lp_build_context *bld,
  
     assert(lp_check_value(bld->type, a));
  
-#if HAVE_LLVM >= 0x0207
     if (bld->type.floating)
        a = LLVMBuildFNeg(builder, a, "");
     else
-#endif
        a = LLVMBuildNeg(builder, a, "");
  
     return a;
@@ -3001,12 +3025,9 @@ const double lp_build_exp2_polynomial[] = {
  };
  
  
-void
-lp_build_exp2_approx(struct lp_build_context *bld,
-                     LLVMValueRef x,
-                     LLVMValueRef *p_exp2_int_part,
-                     LLVMValueRef *p_frac_part,
-                     LLVMValueRef *p_exp2)
+LLVMValueRef
+lp_build_exp2(struct lp_build_context *bld,
+              LLVMValueRef x)
  {
     LLVMBuilderRef builder = bld->gallivm->builder;
     const struct lp_type type = bld->type;
@@ -3019,65 +3040,47 @@ lp_build_exp2_approx(struct lp_build_context *bld,
  
     assert(lp_check_value(bld->type, x));
  
-   if(p_exp2_int_part || p_frac_part || p_exp2) {
-      /* TODO: optimize the constant case */
-      if (gallivm_debug & GALLIVM_DEBUG_PERF &&
-          LLVMIsConstant(x)) {
-         debug_printf("%s: inefficient/imprecise constant arithmetic\n",
-                      __FUNCTION__);
-      }
  
-      assert(type.floating && type.width == 32);
+   /* TODO: optimize the constant case */
+   if (gallivm_debug & GALLIVM_DEBUG_PERF &&
+       LLVMIsConstant(x)) {
+      debug_printf("%s: inefficient/imprecise constant arithmetic\n",
+                   __FUNCTION__);
+   }
  
-      /* We want to preserve NaN and make sure than for exp2 if x > 128,
-       * the result is INF  and if it's smaller than -126.9 the result is 0 */
-      x = lp_build_min_ext(bld, lp_build_const_vec(bld->gallivm, type,  128.0), x,
-                           GALLIVM_NAN_RETURN_SECOND);
-      x = lp_build_max_ext(bld, lp_build_const_vec(bld->gallivm, type, -126.99999), x,
-                           GALLIVM_NAN_RETURN_SECOND);
+   assert(type.floating && type.width == 32);
  
-      /* ipart = floor(x) */
-      /* fpart = x - ipart */
-      lp_build_ifloor_fract(bld, x, &ipart, &fpart);
-   }
+   /* We want to preserve NaN and make sure than for exp2 if x > 128,
+    * the result is INF  and if it's smaller than -126.9 the result is 0 */
+   x = lp_build_min_ext(bld, lp_build_const_vec(bld->gallivm, type,  128.0), x,
+                        GALLIVM_NAN_RETURN_OTHER_SECOND_NONNAN);
+   x = lp_build_max(bld, lp_build_const_vec(bld->gallivm, type, -126.99999), x);
  
-   if(p_exp2_int_part || p_exp2) {
-      /* expipart = (float) (1 << ipart) */
-      expipart = LLVMBuildAdd(builder, ipart,
-                              lp_build_const_int_vec(bld->gallivm, type, 127), "");
-      expipart = LLVMBuildShl(builder, expipart,
-                              lp_build_const_int_vec(bld->gallivm, type, 23), "");
-      expipart = LLVMBuildBitCast(builder, expipart, vec_type, "");
-   }
+   /* ipart = floor(x) */
+   /* fpart = x - ipart */
+   lp_build_ifloor_fract(bld, x, &ipart, &fpart);
  
-   if(p_exp2) {
-      expfpart = lp_build_polynomial(bld, fpart, lp_build_exp2_polynomial,
-                                     Elements(lp_build_exp2_polynomial));
  
-      res = LLVMBuildFMul(builder, expipart, expfpart, "");
-   }
  
-   if(p_exp2_int_part)
-      *p_exp2_int_part = expipart;
+   /* expipart = (float) (1 << ipart) */
+   expipart = LLVMBuildAdd(builder, ipart,
+                           lp_build_const_int_vec(bld->gallivm, type, 127), "");
+   expipart = LLVMBuildShl(builder, expipart,
+                           lp_build_const_int_vec(bld->gallivm, type, 23), "");
+   expipart = LLVMBuildBitCast(builder, expipart, vec_type, "");
  
-   if(p_frac_part)
-      *p_frac_part = fpart;
  
-   if(p_exp2)
-      *p_exp2 = res;
-}
+   expfpart = lp_build_polynomial(bld, fpart, lp_build_exp2_polynomial,
+                                  Elements(lp_build_exp2_polynomial));
+
+   res = LLVMBuildFMul(builder, expipart, expfpart, "");
  
  
-LLVMValueRef
-lp_build_exp2(struct lp_build_context *bld,
-              LLVMValueRef x)
-{
-   LLVMValueRef res;
-   lp_build_exp2_approx(bld, x, NULL, NULL, &res);
     return res;
  }
  
  
+
  /**
   * Extract the exponent of a IEEE-754 floating point value.
   *
@@ -3495,3 +3498,68 @@ lp_build_is_inf_or_nan(struct gallivm_state *gallivm,
     return ret;
  }
  
+
+LLVMValueRef
+lp_build_fpstate_get(struct gallivm_state *gallivm)
+{
+   if (util_cpu_caps.has_sse) {
+      LLVMBuilderRef builder = gallivm->builder;
+      LLVMValueRef mxcsr_ptr = lp_build_alloca(
+         gallivm,
+         LLVMInt32TypeInContext(gallivm->context),
+         "mxcsr_ptr");
+      LLVMValueRef mxcsr_ptr8 = LLVMBuildPointerCast(builder, mxcsr_ptr,
+          LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0), "");
+      lp_build_intrinsic(builder,
+                         "llvm.x86.sse.stmxcsr",
+                         LLVMVoidTypeInContext(gallivm->context),
+                         &mxcsr_ptr8, 1);
+      return mxcsr_ptr;
+   }
+   return 0;
+}
+
+void
+lp_build_fpstate_set_denorms_zero(struct gallivm_state *gallivm,
+                                  boolean zero)
+{
+   if (util_cpu_caps.has_sse) {
+      /* turn on DAZ (64) | FTZ (32768) = 32832 if available */
+      int daz_ftz = _MM_FLUSH_ZERO_MASK;
+
+      LLVMBuilderRef builder = gallivm->builder;
+      LLVMValueRef mxcsr_ptr = lp_build_fpstate_get(gallivm);
+      LLVMValueRef mxcsr =
+         LLVMBuildLoad(builder, mxcsr_ptr, "mxcsr");
+
+      if (util_cpu_caps.has_daz) {
+         /* Enable denormals are zero mode */
+         daz_ftz |= _MM_DENORMALS_ZERO_MASK;
+      }
+      if (zero) {
+         mxcsr = LLVMBuildOr(builder, mxcsr,
+                             LLVMConstInt(LLVMTypeOf(mxcsr), daz_ftz, 0), "");
+      } else {
+         mxcsr = LLVMBuildAnd(builder, mxcsr,
+                              LLVMConstInt(LLVMTypeOf(mxcsr), ~daz_ftz, 0), "");
+      }
+
+      LLVMBuildStore(builder, mxcsr, mxcsr_ptr);
+      lp_build_fpstate_set(gallivm, mxcsr_ptr);
+   }
+}
+
+void
+lp_build_fpstate_set(struct gallivm_state *gallivm,
+                     LLVMValueRef mxcsr_ptr)
+{
+   if (util_cpu_caps.has_sse) {
+      LLVMBuilderRef builder = gallivm->builder;
+      mxcsr_ptr = LLVMBuildPointerCast(builder, mxcsr_ptr,
+                     LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0), "");
+      lp_build_intrinsic(builder,
+                         "llvm.x86.sse.ldmxcsr",
+                         LLVMVoidTypeInContext(gallivm->context),
+                         &mxcsr_ptr, 1);
+   }
+}