Merge branch 'mesa_7_6_branch'

[mesa.git] / src / gallium / drivers / llvmpipe / lp_bld_arit.c
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_arit.c b/src/gallium/drivers/llvmpipe/lp_bld_arit.c

index aec3e297f4a2167f596a04a85923c66cea1d3067..0b115fc9b0779eb74239dc2e204929180b02be6e 100644 (file)
--- a/src/gallium/drivers/llvmpipe/lp_bld_arit.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_arit.c
@@ -52,15 +52,20 @@
  #include "lp_bld_type.h"
  #include "lp_bld_const.h"
  #include "lp_bld_intr.h"
+#include "lp_bld_logic.h"
  #include "lp_bld_arit.h"
  
  
+/**
+ * Generate min(a, b)
+ * No checks for special case values of a or b = 1 or 0 are done.
+ */
  static LLVMValueRef
  lp_build_min_simple(struct lp_build_context *bld,
                      LLVMValueRef a,
                      LLVMValueRef b)
  {
-   const union lp_type type = bld->type;
+   const struct lp_type type = bld->type;
     const char *intrinsic = NULL;
     LLVMValueRef cond;
  
@@ -94,20 +99,21 @@ lp_build_min_simple(struct lp_build_context *bld,
     if(intrinsic)
        return lp_build_intrinsic_binary(bld->builder, intrinsic, lp_build_vec_type(bld->type), a, b);
  
-   if(type.floating)
-      cond = LLVMBuildFCmp(bld->builder, LLVMRealULT, a, b, "");
-   else
-      cond = LLVMBuildICmp(bld->builder, type.sign ? LLVMIntSLT : LLVMIntULT, a, b, "");
-   return LLVMBuildSelect(bld->builder, cond, a, b, "");
+   cond = lp_build_cmp(bld, PIPE_FUNC_LESS, a, b);
+   return lp_build_select(bld, cond, a, b);
  }
  
  
+/**
+ * Generate max(a, b)
+ * No checks for special case values of a or b = 1 or 0 are done.
+ */
  static LLVMValueRef
  lp_build_max_simple(struct lp_build_context *bld,
                      LLVMValueRef a,
                      LLVMValueRef b)
  {
-   const union lp_type type = bld->type;
+   const struct lp_type type = bld->type;
     const char *intrinsic = NULL;
     LLVMValueRef cond;
  
@@ -141,19 +147,19 @@ lp_build_max_simple(struct lp_build_context *bld,
     if(intrinsic)
        return lp_build_intrinsic_binary(bld->builder, intrinsic, lp_build_vec_type(bld->type), a, b);
  
-   if(type.floating)
-      cond = LLVMBuildFCmp(bld->builder, LLVMRealULT, a, b, "");
-   else
-      cond = LLVMBuildICmp(bld->builder, type.sign ? LLVMIntSLT : LLVMIntULT, a, b, "");
-   return LLVMBuildSelect(bld->builder, cond, b, a, "");
+   cond = lp_build_cmp(bld, PIPE_FUNC_GREATER, a, b);
+   return lp_build_select(bld, cond, a, b);
  }
  
  
+/**
+ * Generate 1 - a, or ~a depending on bld->type.
+ */
  LLVMValueRef
  lp_build_comp(struct lp_build_context *bld,
                LLVMValueRef a)
  {
-   const union lp_type type = bld->type;
+   const struct lp_type type = bld->type;
  
     if(a == bld->one)
        return bld->zero;
@@ -174,12 +180,15 @@ lp_build_comp(struct lp_build_context *bld,
  }
  
  
+/**
+ * Generate a + b
+ */
  LLVMValueRef
  lp_build_add(struct lp_build_context *bld,
               LLVMValueRef a,
               LLVMValueRef b)
  {
-   const union lp_type type = bld->type;
+   const struct lp_type type = bld->type;
     LLVMValueRef res;
  
     if(a == bld->zero)
@@ -214,19 +223,25 @@ lp_build_add(struct lp_build_context *bld,
     else
        res = LLVMBuildAdd(bld->builder, a, b, "");
  
+   /* clamp to ceiling of 1.0 */
     if(bld->type.norm && (bld->type.floating || bld->type.fixed))
        res = lp_build_min_simple(bld, res, bld->one);
  
+   /* XXX clamp to floor of -1 or 0??? */
+
     return res;
  }
  
  
+/**
+ * Generate a - b
+ */
  LLVMValueRef
  lp_build_sub(struct lp_build_context *bld,
               LLVMValueRef a,
               LLVMValueRef b)
  {
-   const union lp_type type = bld->type;
+   const struct lp_type type = bld->type;
     LLVMValueRef res;
  
     if(b == bld->zero)
@@ -289,6 +304,9 @@ lp_build_unpack_shuffle(unsigned n, unsigned lo_hi)
  }
  
  
+/**
+ * Build constant int vector of width 'n' and value 'c'.
+ */
  static LLVMValueRef 
  lp_build_const_vec(LLVMTypeRef type, unsigned n, long long c)
  {
@@ -379,12 +397,15 @@ lp_build_mul_u8n(LLVMBuilderRef builder,
  }
  
  
+/**
+ * Generate a * b
+ */
  LLVMValueRef
  lp_build_mul(struct lp_build_context *bld,
               LLVMValueRef a,
               LLVMValueRef b)
  {
-   const union lp_type type = bld->type;
+   const struct lp_type type = bld->type;
  
     if(a == bld->zero)
        return bld->zero;
@@ -448,12 +469,15 @@ lp_build_mul(struct lp_build_context *bld,
  }
  
  
+/**
+ * Generate a / b
+ */
  LLVMValueRef
  lp_build_div(struct lp_build_context *bld,
               LLVMValueRef a,
               LLVMValueRef b)
  {
-   const union lp_type type = bld->type;
+   const struct lp_type type = bld->type;
  
     if(a == bld->zero)
        return bld->zero;
@@ -478,6 +502,35 @@ lp_build_div(struct lp_build_context *bld,
  }
  
  
+LLVMValueRef
+lp_build_lerp(struct lp_build_context *bld,
+              LLVMValueRef x,
+              LLVMValueRef v0,
+              LLVMValueRef v1)
+{
+   return lp_build_add(bld, v0, lp_build_mul(bld, x, lp_build_sub(bld, v1, v0)));
+}
+
+
+LLVMValueRef
+lp_build_lerp_2d(struct lp_build_context *bld,
+                 LLVMValueRef x,
+                 LLVMValueRef y,
+                 LLVMValueRef v00,
+                 LLVMValueRef v01,
+                 LLVMValueRef v10,
+                 LLVMValueRef v11)
+{
+   LLVMValueRef v0 = lp_build_lerp(bld, x, v00, v01);
+   LLVMValueRef v1 = lp_build_lerp(bld, x, v10, v11);
+   return lp_build_lerp(bld, y, v0, v1);
+}
+
+
+/**
+ * Generate min(a, b)
+ * Do checks for special cases.
+ */
  LLVMValueRef
  lp_build_min(struct lp_build_context *bld,
               LLVMValueRef a,
@@ -502,6 +555,10 @@ lp_build_min(struct lp_build_context *bld,
  }
  
  
+/**
+ * Generate max(a, b)
+ * Do checks for special cases.
+ */
  LLVMValueRef
  lp_build_max(struct lp_build_context *bld,
               LLVMValueRef a,
@@ -526,25 +583,39 @@ lp_build_max(struct lp_build_context *bld,
  }
  
  
+/**
+ * Generate abs(a)
+ */
  LLVMValueRef
  lp_build_abs(struct lp_build_context *bld,
               LLVMValueRef a)
  {
-   const union lp_type type = bld->type;
+   const struct lp_type type = bld->type;
+   LLVMTypeRef vec_type = lp_build_vec_type(type);
  
     if(!type.sign)
        return a;
  
-   /* XXX: is this really necessary? */
+   if(type.floating) {
+      /* Mask out the sign bit */
+      LLVMTypeRef int_vec_type = lp_build_int_vec_type(type);
+      LLVMValueRef mask = lp_build_int_const_scalar(type, ((unsigned long long)1 << type.width) - 1);
+      a = LLVMBuildBitCast(bld->builder, a, int_vec_type, "");
+      a = LLVMBuildAnd(bld->builder, a, mask, "");
+      a = LLVMBuildBitCast(bld->builder, a, vec_type, "");
+      return a;
+   }
+
  #if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
-   if(!type.floating && type.width*type.length == 128) {
-      LLVMTypeRef vec_type = lp_build_vec_type(type);
-      if(type.width == 8)
+   if(type.width*type.length == 128) {
+      switch(type.width) {
+      case 8:
           return lp_build_intrinsic_unary(bld->builder, "llvm.x86.ssse3.pabs.b.128", vec_type, a);
-      if(type.width == 16)
+      case 16:
           return lp_build_intrinsic_unary(bld->builder, "llvm.x86.ssse3.pabs.w.128", vec_type, a);
-      if(type.width == 32)
+      case 32:
           return lp_build_intrinsic_unary(bld->builder, "llvm.x86.ssse3.pabs.d.128", vec_type, a);
+      }
     }
  #endif
  
@@ -552,11 +623,189 @@ lp_build_abs(struct lp_build_context *bld,
  }
  
  
+LLVMValueRef
+lp_build_sgn(struct lp_build_context *bld,
+             LLVMValueRef a)
+{
+   const struct lp_type type = bld->type;
+   LLVMTypeRef vec_type = lp_build_vec_type(type);
+   LLVMValueRef cond;
+   LLVMValueRef res;
+
+   /* Handle non-zero case */
+   if(!type.sign) {
+      /* if not zero then sign must be positive */
+      res = bld->one;
+   }
+   else if(type.floating) {
+      /* Take the sign bit and add it to 1 constant */
+      LLVMTypeRef int_vec_type = lp_build_int_vec_type(type);
+      LLVMValueRef mask = lp_build_int_const_scalar(type, (unsigned long long)1 << (type.width - 1));
+      LLVMValueRef sign;
+      LLVMValueRef one;
+      sign = LLVMBuildBitCast(bld->builder, a, int_vec_type, "");
+      sign = LLVMBuildAnd(bld->builder, sign, mask, "");
+      one = LLVMConstBitCast(bld->one, int_vec_type);
+      res = LLVMBuildOr(bld->builder, sign, one, "");
+      res = LLVMBuildBitCast(bld->builder, res, vec_type, "");
+   }
+   else
+   {
+      LLVMValueRef minus_one = lp_build_const_scalar(type, -1.0);
+      cond = lp_build_cmp(bld, PIPE_FUNC_GREATER, a, bld->zero);
+      res = lp_build_select(bld, cond, bld->one, minus_one);
+   }
+
+   /* Handle zero */
+   cond = lp_build_cmp(bld, PIPE_FUNC_EQUAL, a, bld->zero);
+   res = lp_build_select(bld, cond, bld->zero, bld->one);
+
+   return res;
+}
+
+
+enum lp_build_round_sse41_mode
+{
+   LP_BUILD_ROUND_SSE41_NEAREST = 0,
+   LP_BUILD_ROUND_SSE41_FLOOR = 1,
+   LP_BUILD_ROUND_SSE41_CEIL = 2,
+   LP_BUILD_ROUND_SSE41_TRUNCATE = 3
+};
+
+
+static INLINE LLVMValueRef
+lp_build_round_sse41(struct lp_build_context *bld,
+                     LLVMValueRef a,
+                     enum lp_build_round_sse41_mode mode)
+{
+   const struct lp_type type = bld->type;
+   LLVMTypeRef vec_type = lp_build_vec_type(type);
+   const char *intrinsic;
+
+   assert(type.floating);
+   assert(type.width*type.length == 128);
+
+   switch(type.width) {
+   case 32:
+      intrinsic = "llvm.x86.sse41.round.ps";
+      break;
+   case 64:
+      intrinsic = "llvm.x86.sse41.round.pd";
+      break;
+   default:
+      assert(0);
+      return bld->undef;
+   }
+
+   return lp_build_intrinsic_binary(bld->builder, intrinsic, vec_type, a,
+                                    LLVMConstInt(LLVMInt32Type(), mode, 0));
+}
+
+
+LLVMValueRef
+lp_build_round(struct lp_build_context *bld,
+               LLVMValueRef a)
+{
+   const struct lp_type type = bld->type;
+
+   assert(type.floating);
+
+#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
+   return lp_build_round_sse41(bld, a, LP_BUILD_ROUND_SSE41_NEAREST);
+#endif
+
+   /* FIXME */
+   assert(0);
+   return bld->undef;
+}
+
+
+LLVMValueRef
+lp_build_floor(struct lp_build_context *bld,
+               LLVMValueRef a)
+{
+   const struct lp_type type = bld->type;
+
+   assert(type.floating);
+
+#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
+   return lp_build_round_sse41(bld, a, LP_BUILD_ROUND_SSE41_FLOOR);
+#endif
+
+   /* FIXME */
+   assert(0);
+   return bld->undef;
+}
+
+
+LLVMValueRef
+lp_build_ceil(struct lp_build_context *bld,
+              LLVMValueRef a)
+{
+   const struct lp_type type = bld->type;
+
+   assert(type.floating);
+
+#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
+   return lp_build_round_sse41(bld, a, LP_BUILD_ROUND_SSE41_CEIL);
+#endif
+
+   /* FIXME */
+   assert(0);
+   return bld->undef;
+}
+
+
+LLVMValueRef
+lp_build_trunc(struct lp_build_context *bld,
+               LLVMValueRef a)
+{
+   const struct lp_type type = bld->type;
+
+   assert(type.floating);
+
+#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
+   return lp_build_round_sse41(bld, a, LP_BUILD_ROUND_SSE41_TRUNCATE);
+#endif
+
+   /* FIXME */
+   assert(0);
+   return bld->undef;
+}
+
+
+/**
+ * Convert to integer, through whichever rounding method that's fastest,
+ * typically truncating to zero.
+ */
+LLVMValueRef
+lp_build_int(struct lp_build_context *bld,
+             LLVMValueRef a)
+{
+   const struct lp_type type = bld->type;
+   LLVMTypeRef int_vec_type = lp_build_int_vec_type(type);
+
+   assert(type.floating);
+
+   return LLVMBuildFPToSI(bld->builder, a, int_vec_type, "");
+}
+
+
+LLVMValueRef
+lp_build_ifloor(struct lp_build_context *bld,
+                LLVMValueRef a)
+{
+   a = lp_build_floor(bld, a);
+   a = lp_build_int(bld, a);
+   return a;
+}
+
+
  LLVMValueRef
  lp_build_sqrt(struct lp_build_context *bld,
                LLVMValueRef a)
  {
-   const union lp_type type = bld->type;
+   const struct lp_type type = bld->type;
     LLVMTypeRef vec_type = lp_build_vec_type(type);
     char intrinsic[32];
  
@@ -574,7 +823,7 @@ LLVMValueRef
  lp_build_rcp(struct lp_build_context *bld,
               LLVMValueRef a)
  {
-   const union lp_type type = bld->type;
+   const struct lp_type type = bld->type;
  
     if(a == bld->zero)
        return bld->undef;
@@ -598,11 +847,14 @@ lp_build_rcp(struct lp_build_context *bld,
  }
  
  
+/**
+ * Generate 1/sqrt(a)
+ */
  LLVMValueRef
  lp_build_rsqrt(struct lp_build_context *bld,
                 LLVMValueRef a)
  {
-   const union lp_type type = bld->type;
+   const struct lp_type type = bld->type;
  
     assert(type.floating);
  
@@ -616,11 +868,14 @@ lp_build_rsqrt(struct lp_build_context *bld,
  }
  
  
+/**
+ * Generate cos(a)
+ */
  LLVMValueRef
  lp_build_cos(struct lp_build_context *bld,
                LLVMValueRef a)
  {
-   const union lp_type type = bld->type;
+   const struct lp_type type = bld->type;
     LLVMTypeRef vec_type = lp_build_vec_type(type);
     char intrinsic[32];
  
@@ -633,11 +888,14 @@ lp_build_cos(struct lp_build_context *bld,
  }
  
  
+/**
+ * Generate sin(a)
+ */
  LLVMValueRef
  lp_build_sin(struct lp_build_context *bld,
                LLVMValueRef a)
  {
-   const union lp_type type = bld->type;
+   const struct lp_type type = bld->type;
     LLVMTypeRef vec_type = lp_build_vec_type(type);
     char intrinsic[32];
  
@@ -650,6 +908,9 @@ lp_build_sin(struct lp_build_context *bld,
  }
  
  
+/**
+ * Generate pow(x, y)
+ */
  LLVMValueRef
  lp_build_pow(struct lp_build_context *bld,
               LLVMValueRef x,
@@ -663,23 +924,29 @@ lp_build_pow(struct lp_build_context *bld,
  }
  
  
+/**
+ * Generate exp(x)
+ */
  LLVMValueRef
  lp_build_exp(struct lp_build_context *bld,
               LLVMValueRef x)
  {
     /* log2(e) = 1/log(2) */
-   LLVMValueRef log2e = lp_build_const_uni(bld->type, 1.4426950408889634);
+   LLVMValueRef log2e = lp_build_const_scalar(bld->type, 1.4426950408889634);
  
     return lp_build_mul(bld, log2e, lp_build_exp2(bld, x));
  }
  
  
+/**
+ * Generate log(x)
+ */
  LLVMValueRef
  lp_build_log(struct lp_build_context *bld,
               LLVMValueRef x)
  {
     /* log(2) */
-   LLVMValueRef log2 = lp_build_const_uni(bld->type, 1.4426950408889634);
+   LLVMValueRef log2 = lp_build_const_scalar(bld->type, 1.4426950408889634);
  
     return lp_build_mul(bld, log2, lp_build_exp2(bld, x));
  }
@@ -689,13 +956,17 @@ lp_build_log(struct lp_build_context *bld,
  #define LOG_POLY_DEGREE 5
  
  
+/**
+ * Generate polynomial.
+ * Ex:  x^2 * coeffs[0] + x * coeffs[1] + coeffs[2].
+ */
  static LLVMValueRef
  lp_build_polynomial(struct lp_build_context *bld,
                      LLVMValueRef x,
                      const double *coeffs,
                      unsigned num_coeffs)
  {
-   const union lp_type type = bld->type;
+   const struct lp_type type = bld->type;
     LLVMValueRef res = NULL;
     unsigned i;
  
@@ -704,7 +975,7 @@ lp_build_polynomial(struct lp_build_context *bld,
        debug_printf("%s: inefficient/imprecise constant arithmetic\n");
  
     for (i = num_coeffs; i--; ) {
-      LLVMValueRef coeff = lp_build_const_uni(type, coeffs[i]);
+      LLVMValueRef coeff = lp_build_const_scalar(type, coeffs[i]);
        if(res)
           res = lp_build_add(bld, coeff, lp_build_mul(bld, x, res));
        else
@@ -743,11 +1014,14 @@ lp_build_exp2_approx(struct lp_build_context *bld,
                       LLVMValueRef *p_frac_part,
                       LLVMValueRef *p_exp2)
  {
-   const union lp_type type = bld->type;
+   const struct lp_type type = bld->type;
     LLVMTypeRef vec_type = lp_build_vec_type(type);
     LLVMTypeRef int_vec_type = lp_build_int_vec_type(type);
-   LLVMValueRef ipart;
-   LLVMValueRef fpart, expipart, expfpart, res;
+   LLVMValueRef ipart = NULL;
+   LLVMValueRef fpart = NULL;
+   LLVMValueRef expipart = NULL;
+   LLVMValueRef expfpart = NULL;
+   LLVMValueRef res = NULL;
  
     if(p_exp2_int_part || p_frac_part || p_exp2) {
        /* TODO: optimize the constant case */
@@ -756,11 +1030,11 @@ lp_build_exp2_approx(struct lp_build_context *bld,
  
        assert(type.floating && type.width == 32);
  
-      x = lp_build_min(bld, x, lp_build_const_uni(type,  129.0));
-      x = lp_build_max(bld, x, lp_build_const_uni(type, -126.99999));
+      x = lp_build_min(bld, x, lp_build_const_scalar(type,  129.0));
+      x = lp_build_max(bld, x, lp_build_const_scalar(type, -126.99999));
  
        /* ipart = int(x - 0.5) */
-      ipart = LLVMBuildSub(bld->builder, x, lp_build_const_uni(type, 0.5f), "");
+      ipart = LLVMBuildSub(bld->builder, x, lp_build_const_scalar(type, 0.5f), "");
        ipart = LLVMBuildFPToSI(bld->builder, ipart, int_vec_type, "");
  
        /* fpart = x - ipart */
@@ -770,8 +1044,8 @@ lp_build_exp2_approx(struct lp_build_context *bld,
  
     if(p_exp2_int_part || p_exp2) {
        /* expipart = (float) (1 << ipart) */
-      expipart = LLVMBuildAdd(bld->builder, ipart, lp_build_int_const_uni(type, 127), "");
-      expipart = LLVMBuildShl(bld->builder, expipart, lp_build_int_const_uni(type, 23), "");
+      expipart = LLVMBuildAdd(bld->builder, ipart, lp_build_int_const_scalar(type, 127), "");
+      expipart = LLVMBuildShl(bld->builder, expipart, lp_build_int_const_scalar(type, 23), "");
        expipart = LLVMBuildBitCast(bld->builder, expipart, vec_type, "");
     }
  
@@ -833,20 +1107,20 @@ lp_build_log2_approx(struct lp_build_context *bld,
                       LLVMValueRef *p_floor_log2,
                       LLVMValueRef *p_log2)
  {
-   const union lp_type type = bld->type;
+   const struct lp_type type = bld->type;
     LLVMTypeRef vec_type = lp_build_vec_type(type);
     LLVMTypeRef int_vec_type = lp_build_int_vec_type(type);
  
-   LLVMValueRef expmask = lp_build_int_const_uni(type, 0x7f800000);
-   LLVMValueRef mantmask = lp_build_int_const_uni(type, 0x007fffff);
+   LLVMValueRef expmask = lp_build_int_const_scalar(type, 0x7f800000);
+   LLVMValueRef mantmask = lp_build_int_const_scalar(type, 0x007fffff);
     LLVMValueRef one = LLVMConstBitCast(bld->one, int_vec_type);
  
-   LLVMValueRef i;
-   LLVMValueRef exp;
-   LLVMValueRef mant;
-   LLVMValueRef logexp;
-   LLVMValueRef logmant;
-   LLVMValueRef res;
+   LLVMValueRef i = NULL;
+   LLVMValueRef exp = NULL;
+   LLVMValueRef mant = NULL;
+   LLVMValueRef logexp = NULL;
+   LLVMValueRef logmant = NULL;
+   LLVMValueRef res = NULL;
  
     if(p_exp || p_floor_log2 || p_log2) {
        /* TODO: optimize the constant case */
@@ -862,8 +1136,8 @@ lp_build_log2_approx(struct lp_build_context *bld,
     }
  
     if(p_floor_log2 || p_log2) {
-      logexp = LLVMBuildLShr(bld->builder, exp, lp_build_int_const_uni(type, 23), "");
-      logexp = LLVMBuildSub(bld->builder, logexp, lp_build_int_const_uni(type, 127), "");
+      logexp = LLVMBuildLShr(bld->builder, exp, lp_build_int_const_scalar(type, 23), "");
+      logexp = LLVMBuildSub(bld->builder, logexp, lp_build_int_const_scalar(type, 127), "");
        logexp = LLVMBuildSIToFP(bld->builder, logexp, vec_type, "");
     }