Merge commit 'origin/7.8'

[mesa.git] / src / gallium / auxiliary / gallivm / lp_bld_arit.c
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_arit.c b/src/gallium/auxiliary/gallivm/lp_bld_arit.c

index 32f9e5201c54fa566c0e02eb67f450290594d0f2..8e8fcccf564ead77eae89145de1df7bbd7946622 100644 (file)
--- a/src/gallium/auxiliary/gallivm/lp_bld_arit.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_arit.c
@@ -232,6 +232,37 @@ lp_build_add(struct lp_build_context *bld,
  }
  
  
+/** Return the sum of the elements of a */
+LLVMValueRef
+lp_build_sum_vector(struct lp_build_context *bld,
+                    LLVMValueRef a)
+{
+   const struct lp_type type = bld->type;
+   LLVMValueRef index, res;
+   int i;
+
+   if (a == bld->zero)
+      return bld->zero;
+   if (a == bld->undef)
+      return bld->undef;
+   assert(type.length > 1);
+
+   assert(!bld->type.norm);
+
+   index = LLVMConstInt(LLVMInt32Type(), 0, 0);
+   res = LLVMBuildExtractElement(bld->builder, a, index, "");
+
+   for (i = 1; i < type.length; i++) {
+      index = LLVMConstInt(LLVMInt32Type(), i, 0);
+      res = LLVMBuildAdd(bld->builder, res,
+                         LLVMBuildExtractElement(bld->builder, a, index, ""),
+                         "");
+   }
+
+   return res;
+}
+
+
  /**
   * Generate a - b
   */
@@ -330,12 +361,12 @@ lp_build_mul_u8n(LLVMBuilderRef builder,
     LLVMValueRef c8;
     LLVMValueRef ab;
  
-   c8 = lp_build_int_const_scalar(i16_type, 8);
+   c8 = lp_build_const_int_vec(i16_type, 8);
     
  #if 0
     
     /* a*b/255 ~= (a*(b + 1)) >> 256 */
-   b = LLVMBuildAdd(builder, b, lp_build_int_const_scalar(i16_type, 1), "");
+   b = LLVMBuildAdd(builder, b, lp_build_const_int_vec(i16_type, 1), "");
     ab = LLVMBuildMul(builder, a, b, "");
  
  #else
@@ -343,7 +374,7 @@ lp_build_mul_u8n(LLVMBuilderRef builder,
     /* ab/255 ~= (ab + (ab >> 8) + 0x80) >> 8 */
     ab = LLVMBuildMul(builder, a, b, "");
     ab = LLVMBuildAdd(builder, ab, LLVMBuildLShr(builder, ab, c8, ""), "");
-   ab = LLVMBuildAdd(builder, ab, lp_build_int_const_scalar(i16_type, 0x80), "");
+   ab = LLVMBuildAdd(builder, ab, lp_build_const_int_vec(i16_type, 0x80), "");
  
  #endif
     
@@ -398,7 +429,7 @@ lp_build_mul(struct lp_build_context *bld,
     }
  
     if(type.fixed)
-      shift = lp_build_int_const_scalar(type, type.width/2);
+      shift = lp_build_const_int_vec(type, type.width/2);
     else
        shift = NULL;
  
@@ -460,7 +491,7 @@ lp_build_mul_imm(struct lp_build_context *bld,
            * for Inf and NaN.
            */
           unsigned mantissa = lp_mantissa(bld->type);
-         factor = lp_build_int_const_scalar(bld->type, (unsigned long long)shift << mantissa);
+         factor = lp_build_const_int_vec(bld->type, (unsigned long long)shift << mantissa);
           a = LLVMBuildBitCast(bld->builder, a, lp_build_int_vec_type(bld->type), "");
           a = LLVMBuildAdd(bld->builder, a, factor, "");
           a = LLVMBuildBitCast(bld->builder, a, lp_build_vec_type(bld->type), "");
@@ -468,12 +499,12 @@ lp_build_mul_imm(struct lp_build_context *bld,
  #endif
        }
        else {
-         factor = lp_build_const_scalar(bld->type, shift);
+         factor = lp_build_const_vec(bld->type, shift);
           return LLVMBuildShl(bld->builder, a, factor, "");
        }
     }
  
-   factor = lp_build_const_scalar(bld->type, (double)b);
+   factor = lp_build_const_vec(bld->type, (double)b);
     return lp_build_mul(bld, a, factor);
  }
  
@@ -536,7 +567,7 @@ lp_build_lerp(struct lp_build_context *bld,
         * but it will be wrong for other uses. Basically we need a more
         * powerful lp_type, capable of further distinguishing the values
         * interpretation from the value storage. */
-      res = LLVMBuildAnd(bld->builder, res, lp_build_int_const_scalar(bld->type, (1 << bld->type.width/2) - 1), "");
+      res = LLVMBuildAnd(bld->builder, res, lp_build_const_int_vec(bld->type, (1 << bld->type.width/2) - 1), "");
  
     return res;
  }
@@ -644,13 +675,26 @@ lp_build_abs(struct lp_build_context *bld,
  
     if(type.floating) {
        /* Mask out the sign bit */
-      LLVMTypeRef int_vec_type = lp_build_int_vec_type(type);
-      unsigned long long absMask = ~(1ULL << (type.width - 1));
-      LLVMValueRef mask = lp_build_int_const_scalar(type, ((unsigned long long) absMask));
-      a = LLVMBuildBitCast(bld->builder, a, int_vec_type, "");
-      a = LLVMBuildAnd(bld->builder, a, mask, "");
-      a = LLVMBuildBitCast(bld->builder, a, vec_type, "");
-      return a;
+      if (type.length == 1) {
+         LLVMTypeRef int_type = LLVMIntType(type.width);
+         LLVMTypeRef float_type = LLVMFloatType();
+         unsigned long long absMask = ~(1ULL << (type.width - 1));
+         LLVMValueRef mask = LLVMConstInt(int_type, absMask, 0);
+         a = LLVMBuildBitCast(bld->builder, a, int_type, "");
+         a = LLVMBuildAnd(bld->builder, a, mask, "");
+         a = LLVMBuildBitCast(bld->builder, a, float_type, "");
+         return a;
+      }
+      else {
+         /* vector of floats */
+         LLVMTypeRef int_vec_type = lp_build_int_vec_type(type);
+         unsigned long long absMask = ~(1ULL << (type.width - 1));
+         LLVMValueRef mask = lp_build_const_int_vec(type, ((unsigned long long) absMask));
+         a = LLVMBuildBitCast(bld->builder, a, int_vec_type, "");
+         a = LLVMBuildAnd(bld->builder, a, mask, "");
+         a = LLVMBuildBitCast(bld->builder, a, vec_type, "");
+         return a;
+      }
     }
  
     if(type.width*type.length == 128 && util_cpu_caps.has_ssse3) {
@@ -676,12 +720,12 @@ lp_build_negate(struct lp_build_context *bld,
  }
  
  
+/** Return -1, 0 or +1 depending on the sign of a */
  LLVMValueRef
  lp_build_sgn(struct lp_build_context *bld,
               LLVMValueRef a)
  {
     const struct lp_type type = bld->type;
-   LLVMTypeRef vec_type = lp_build_vec_type(type);
     LLVMValueRef cond;
     LLVMValueRef res;
  
@@ -691,27 +735,42 @@ lp_build_sgn(struct lp_build_context *bld,
        res = bld->one;
     }
     else if(type.floating) {
-      /* Take the sign bit and add it to 1 constant */
-      LLVMTypeRef int_vec_type = lp_build_int_vec_type(type);
-      LLVMValueRef mask = lp_build_int_const_scalar(type, (unsigned long long)1 << (type.width - 1));
+      LLVMTypeRef vec_type;
+      LLVMTypeRef int_type;
+      LLVMValueRef mask;
        LLVMValueRef sign;
        LLVMValueRef one;
-      sign = LLVMBuildBitCast(bld->builder, a, int_vec_type, "");
+      unsigned long long maskBit = (unsigned long long)1 << (type.width - 1);
+
+      if (type.length == 1) {
+         int_type = lp_build_int_elem_type(type);
+         vec_type = lp_build_elem_type(type);
+         mask = LLVMConstInt(int_type, maskBit, 0);
+      }
+      else {
+         /* vector */
+         int_type = lp_build_int_vec_type(type);
+         vec_type = lp_build_vec_type(type);
+         mask = lp_build_const_int_vec(type, maskBit);
+      }
+
+      /* Take the sign bit and add it to 1 constant */
+      sign = LLVMBuildBitCast(bld->builder, a, int_type, "");
        sign = LLVMBuildAnd(bld->builder, sign, mask, "");
-      one = LLVMConstBitCast(bld->one, int_vec_type);
+      one = LLVMConstBitCast(bld->one, int_type);
        res = LLVMBuildOr(bld->builder, sign, one, "");
        res = LLVMBuildBitCast(bld->builder, res, vec_type, "");
     }
     else
     {
-      LLVMValueRef minus_one = lp_build_const_scalar(type, -1.0);
+      LLVMValueRef minus_one = lp_build_const_vec(type, -1.0);
        cond = lp_build_cmp(bld, PIPE_FUNC_GREATER, a, bld->zero);
        res = lp_build_select(bld, cond, bld->one, minus_one);
     }
  
     /* Handle zero */
     cond = lp_build_cmp(bld, PIPE_FUNC_EQUAL, a, bld->zero);
-   res = lp_build_select(bld, cond, bld->zero, bld->one);
+   res = lp_build_select(bld, cond, bld->zero, res);
  
     return res;
  }
@@ -730,8 +789,8 @@ lp_build_set_sign(struct lp_build_context *bld,
     const struct lp_type type = bld->type;
     LLVMTypeRef int_vec_type = lp_build_int_vec_type(type);
     LLVMTypeRef vec_type = lp_build_vec_type(type);
-   LLVMValueRef shift = lp_build_int_const_scalar(type, type.width - 1);
-   LLVMValueRef mask = lp_build_int_const_scalar(type,
+   LLVMValueRef shift = lp_build_const_int_vec(type, type.width - 1);
+   LLVMValueRef mask = lp_build_const_int_vec(type,
                               ~((unsigned long long) 1 << (type.width - 1)));
     LLVMValueRef val, res;
  
@@ -753,7 +812,7 @@ lp_build_set_sign(struct lp_build_context *bld,
  
  
  /**
- * Convert vector of int to vector of float.
+ * Convert vector of (or scalar) int to vector of (or scalar) float.
   */
  LLVMValueRef
  lp_build_int_to_float(struct lp_build_context *bld,
@@ -764,7 +823,11 @@ lp_build_int_to_float(struct lp_build_context *bld,
     assert(type.floating);
     /*assert(lp_check_value(type, a));*/
  
-   {
+   if (type.length == 1) {
+      LLVMTypeRef float_type = LLVMFloatType();
+      return LLVMBuildSIToFP(bld->builder, a, float_type, "");
+   }
+   else {
        LLVMTypeRef vec_type = lp_build_vec_type(type);
        /*LLVMTypeRef int_vec_type = lp_build_int_vec_type(type);*/
        LLVMValueRef res;
@@ -866,6 +929,13 @@ lp_build_floor(struct lp_build_context *bld,
  
     assert(type.floating);
  
+   if (type.length == 1) {
+      LLVMValueRef res;
+      res = lp_build_ifloor(bld, a);
+      res = LLVMBuildSIToFP(bld->builder, res, LLVMFloatType(), "");
+      return res;
+   }
+
     if(util_cpu_caps.has_sse4_1)
        return lp_build_round_sse41(bld, a, LP_BUILD_ROUND_SSE41_FLOOR);
     else {
@@ -921,15 +991,24 @@ lp_build_itrunc(struct lp_build_context *bld,
                  LLVMValueRef a)
  {
     const struct lp_type type = bld->type;
-   LLVMTypeRef int_vec_type = lp_build_int_vec_type(type);
  
     assert(type.floating);
-   assert(lp_check_value(type, a));
  
-   return LLVMBuildFPToSI(bld->builder, a, int_vec_type, "");
+   if (type.length == 1) {
+      LLVMTypeRef int_type = LLVMIntType(type.width);
+      return LLVMBuildFPToSI(bld->builder, a, int_type, "");
+   }
+   else {
+      LLVMTypeRef int_vec_type = lp_build_int_vec_type(type);
+      assert(lp_check_value(type, a));
+      return LLVMBuildFPToSI(bld->builder, a, int_vec_type, "");
+   }
  }
  
  
+/**
+ * Convert float[] to int[] with round().
+ */
  LLVMValueRef
  lp_build_iround(struct lp_build_context *bld,
                  LLVMValueRef a)
@@ -939,6 +1018,15 @@ lp_build_iround(struct lp_build_context *bld,
     LLVMValueRef res;
  
     assert(type.floating);
+
+   if (type.length == 1) {
+      /* scalar float to int */
+      LLVMTypeRef int_type = LLVMIntType(type.width);
+      /* XXX we want rounding here! */
+      res = LLVMBuildFPToSI(bld->builder, a, int_type, "");
+      return res;
+   }
+
     assert(lp_check_value(type, a));
  
     if(util_cpu_caps.has_sse4_1) {
@@ -946,7 +1034,7 @@ lp_build_iround(struct lp_build_context *bld,
     }
     else {
        LLVMTypeRef vec_type = lp_build_vec_type(type);
-      LLVMValueRef mask = lp_build_int_const_scalar(type, (unsigned long long)1 << (type.width - 1));
+      LLVMValueRef mask = lp_build_const_int_vec(type, (unsigned long long)1 << (type.width - 1));
        LLVMValueRef sign;
        LLVMValueRef half;
  
@@ -955,7 +1043,7 @@ lp_build_iround(struct lp_build_context *bld,
        sign = LLVMBuildAnd(bld->builder, sign, mask, "");
  
        /* sign * 0.5 */
-      half = lp_build_const_scalar(type, 0.5);
+      half = lp_build_const_vec(type, 0.5);
        half = LLVMBuildBitCast(bld->builder, half, int_vec_type, "");
        half = LLVMBuildOr(bld->builder, sign, half, "");
        half = LLVMBuildBitCast(bld->builder, half, vec_type, "");
@@ -981,6 +1069,14 @@ lp_build_ifloor(struct lp_build_context *bld,
     LLVMValueRef res;
  
     assert(type.floating);
+
+   if (type.length == 1) {
+      /* scalar float to int */
+      LLVMTypeRef int_type = LLVMIntType(type.width);
+      res = LLVMBuildFPToSI(bld->builder, a, int_type, "");
+      return res;
+   }
+
     assert(lp_check_value(type, a));
  
     if(util_cpu_caps.has_sse4_1) {
@@ -990,18 +1086,18 @@ lp_build_ifloor(struct lp_build_context *bld,
        /* Take the sign bit and add it to 1 constant */
        LLVMTypeRef vec_type = lp_build_vec_type(type);
        unsigned mantissa = lp_mantissa(type);
-      LLVMValueRef mask = lp_build_int_const_scalar(type, (unsigned long long)1 << (type.width - 1));
+      LLVMValueRef mask = lp_build_const_int_vec(type, (unsigned long long)1 << (type.width - 1));
        LLVMValueRef sign;
        LLVMValueRef offset;
  
        /* sign = a < 0 ? ~0 : 0 */
        sign = LLVMBuildBitCast(bld->builder, a, int_vec_type, "");
        sign = LLVMBuildAnd(bld->builder, sign, mask, "");
-      sign = LLVMBuildAShr(bld->builder, sign, lp_build_int_const_scalar(type, type.width - 1), "");
+      sign = LLVMBuildAShr(bld->builder, sign, lp_build_const_int_vec(type, type.width - 1), "");
        lp_build_name(sign, "floor.sign");
  
        /* offset = -0.99999(9)f */
-      offset = lp_build_const_scalar(type, -(double)(((unsigned long long)1 << mantissa) - 1)/((unsigned long long)1 << mantissa));
+      offset = lp_build_const_vec(type, -(double)(((unsigned long long)1 << mantissa) - 1)/((unsigned long long)1 << mantissa));
        offset = LLVMConstBitCast(offset, int_vec_type);
  
        /* offset = a < 0 ? -0.99999(9)f : 0.0f */
@@ -1172,7 +1268,7 @@ lp_build_exp(struct lp_build_context *bld,
               LLVMValueRef x)
  {
     /* log2(e) = 1/log(2) */
-   LLVMValueRef log2e = lp_build_const_scalar(bld->type, 1.4426950408889634);
+   LLVMValueRef log2e = lp_build_const_vec(bld->type, 1.4426950408889634);
  
     return lp_build_mul(bld, log2e, lp_build_exp2(bld, x));
  }
@@ -1186,7 +1282,7 @@ lp_build_log(struct lp_build_context *bld,
               LLVMValueRef x)
  {
     /* log(2) */
-   LLVMValueRef log2 = lp_build_const_scalar(bld->type, 0.69314718055994529);
+   LLVMValueRef log2 = lp_build_const_vec(bld->type, 0.69314718055994529);
  
     return lp_build_mul(bld, log2, lp_build_exp2(bld, x));
  }
@@ -1207,6 +1303,7 @@ lp_build_polynomial(struct lp_build_context *bld,
                      unsigned num_coeffs)
  {
     const struct lp_type type = bld->type;
+   LLVMTypeRef float_type = LLVMFloatType();
     LLVMValueRef res = NULL;
     unsigned i;
  
@@ -1216,7 +1313,13 @@ lp_build_polynomial(struct lp_build_context *bld,
                     __FUNCTION__);
  
     for (i = num_coeffs; i--; ) {
-      LLVMValueRef coeff = lp_build_const_scalar(type, coeffs[i]);
+      LLVMValueRef coeff;
+
+      if (type.length == 1)
+         coeff = LLVMConstReal(float_type, coeffs[i]);
+      else
+         coeff = lp_build_const_vec(type, coeffs[i]);
+
        if(res)
           res = lp_build_add(bld, coeff, lp_build_mul(bld, x, res));
        else
@@ -1272,11 +1375,11 @@ lp_build_exp2_approx(struct lp_build_context *bld,
  
        assert(type.floating && type.width == 32);
  
-      x = lp_build_min(bld, x, lp_build_const_scalar(type,  129.0));
-      x = lp_build_max(bld, x, lp_build_const_scalar(type, -126.99999));
+      x = lp_build_min(bld, x, lp_build_const_vec(type,  129.0));
+      x = lp_build_max(bld, x, lp_build_const_vec(type, -126.99999));
  
        /* ipart = int(x - 0.5) */
-      ipart = LLVMBuildSub(bld->builder, x, lp_build_const_scalar(type, 0.5f), "");
+      ipart = LLVMBuildSub(bld->builder, x, lp_build_const_vec(type, 0.5f), "");
        ipart = LLVMBuildFPToSI(bld->builder, ipart, int_vec_type, "");
  
        /* fpart = x - ipart */
@@ -1286,8 +1389,8 @@ lp_build_exp2_approx(struct lp_build_context *bld,
  
     if(p_exp2_int_part || p_exp2) {
        /* expipart = (float) (1 << ipart) */
-      expipart = LLVMBuildAdd(bld->builder, ipart, lp_build_int_const_scalar(type, 127), "");
-      expipart = LLVMBuildShl(bld->builder, expipart, lp_build_int_const_scalar(type, 23), "");
+      expipart = LLVMBuildAdd(bld->builder, ipart, lp_build_const_int_vec(type, 127), "");
+      expipart = LLVMBuildShl(bld->builder, expipart, lp_build_const_int_vec(type, 23), "");
        expipart = LLVMBuildBitCast(bld->builder, expipart, vec_type, "");
     }
  
@@ -1353,8 +1456,8 @@ lp_build_log2_approx(struct lp_build_context *bld,
     LLVMTypeRef vec_type = lp_build_vec_type(type);
     LLVMTypeRef int_vec_type = lp_build_int_vec_type(type);
  
-   LLVMValueRef expmask = lp_build_int_const_scalar(type, 0x7f800000);
-   LLVMValueRef mantmask = lp_build_int_const_scalar(type, 0x007fffff);
+   LLVMValueRef expmask = lp_build_const_int_vec(type, 0x7f800000);
+   LLVMValueRef mantmask = lp_build_const_int_vec(type, 0x007fffff);
     LLVMValueRef one = LLVMConstBitCast(bld->one, int_vec_type);
  
     LLVMValueRef i = NULL;
@@ -1379,8 +1482,8 @@ lp_build_log2_approx(struct lp_build_context *bld,
     }
  
     if(p_floor_log2 || p_log2) {
-      logexp = LLVMBuildLShr(bld->builder, exp, lp_build_int_const_scalar(type, 23), "");
-      logexp = LLVMBuildSub(bld->builder, logexp, lp_build_int_const_scalar(type, 127), "");
+      logexp = LLVMBuildLShr(bld->builder, exp, lp_build_const_int_vec(type, 23), "");
+      logexp = LLVMBuildSub(bld->builder, logexp, lp_build_const_int_vec(type, 127), "");
        logexp = LLVMBuildSIToFP(bld->builder, logexp, vec_type, "");
     }
  
@@ -1410,11 +1513,87 @@ lp_build_log2_approx(struct lp_build_context *bld,
  }
  
  
+/** scalar version of above function */
+static void
+lp_build_float_log2_approx(struct lp_build_context *bld,
+                           LLVMValueRef x,
+                           LLVMValueRef *p_exp,
+                           LLVMValueRef *p_floor_log2,
+                           LLVMValueRef *p_log2)
+{
+   const struct lp_type type = bld->type;
+   LLVMTypeRef float_type = LLVMFloatType();
+   LLVMTypeRef int_type = LLVMIntType(type.width);
+
+   LLVMValueRef expmask = LLVMConstInt(int_type, 0x7f800000, 0);
+   LLVMValueRef mantmask = LLVMConstInt(int_type, 0x007fffff, 0);
+   LLVMValueRef one = LLVMConstBitCast(bld->one, int_type);
+
+   LLVMValueRef i = NULL;
+   LLVMValueRef exp = NULL;
+   LLVMValueRef mant = NULL;
+   LLVMValueRef logexp = NULL;
+   LLVMValueRef logmant = NULL;
+   LLVMValueRef res = NULL;
+
+   if(p_exp || p_floor_log2 || p_log2) {
+      /* TODO: optimize the constant case */
+      if(LLVMIsConstant(x))
+         debug_printf("%s: inefficient/imprecise constant arithmetic\n",
+                      __FUNCTION__);
+
+      assert(type.floating && type.width == 32);
+
+      i = LLVMBuildBitCast(bld->builder, x, int_type, "");
+
+      /* exp = (float) exponent(x) */
+      exp = LLVMBuildAnd(bld->builder, i, expmask, "");
+   }
+
+   if(p_floor_log2 || p_log2) {
+      LLVMValueRef c23 = LLVMConstInt(int_type, 23, 0);
+      LLVMValueRef c127 = LLVMConstInt(int_type, 127, 0);
+      logexp = LLVMBuildLShr(bld->builder, exp, c23, "");
+      logexp = LLVMBuildSub(bld->builder, logexp, c127, "");
+      logexp = LLVMBuildSIToFP(bld->builder, logexp, float_type, "");
+   }
+
+   if(p_log2) {
+      /* mant = (float) mantissa(x) */
+      mant = LLVMBuildAnd(bld->builder, i, mantmask, "");
+      mant = LLVMBuildOr(bld->builder, mant, one, "");
+      mant = LLVMBuildBitCast(bld->builder, mant, float_type, "");
+
+      logmant = lp_build_polynomial(bld, mant, lp_build_log2_polynomial,
+                                    Elements(lp_build_log2_polynomial));
+
+      /* This effectively increases the polynomial degree by one, but ensures that log2(1) == 0*/
+      logmant = LLVMBuildMul(bld->builder, logmant, LLVMBuildSub(bld->builder, mant, bld->one, ""), "");
+
+      res = LLVMBuildAdd(bld->builder, logmant, logexp, "");
+   }
+
+   if(p_exp)
+      *p_exp = exp;
+
+   if(p_floor_log2)
+      *p_floor_log2 = logexp;
+
+   if(p_log2)
+      *p_log2 = res;
+}
+
+
  LLVMValueRef
  lp_build_log2(struct lp_build_context *bld,
                LLVMValueRef x)
  {
     LLVMValueRef res;
-   lp_build_log2_approx(bld, x, NULL, NULL, &res);
+   if (bld->type.length == 1) {
+      lp_build_float_log2_approx(bld, x, NULL, NULL, &res);
+   }
+   else {
+      lp_build_log2_approx(bld, x, NULL, NULL, &res);
+   }
     return res;
  }