glsl: Add utility function to round and pack int64_t value

[mesa.git] / src / compiler / glsl / float64.glsl
diff --git a/src/compiler/glsl/float64.glsl b/src/compiler/glsl/float64.glsl

index 88209fc99f0c792626e5883c12d25da974786780..71ebbb8d907ece76dbe5caaa2b951db7020cd49a 100644 (file)
--- a/src/compiler/glsl/float64.glsl
+++ b/src/compiler/glsl/float64.glsl
@@ -451,6 +451,74 @@ __roundAndPackFloat64(uint zSign,
     return __packFloat64(zSign, zExp, zFrac0, zFrac1);
  }
  
+uint64_t
+__roundAndPackUInt64(uint zSign, uint zFrac0, uint zFrac1, uint zFrac2)
+{
+   bool roundNearestEven;
+   bool increment;
+   uint64_t default_nan = 0xFFFFFFFFFFFFFFFFUL;
+
+   roundNearestEven = FLOAT_ROUNDING_MODE == FLOAT_ROUND_NEAREST_EVEN;
+
+   if (zFrac2 >= 0x80000000u)
+      increment = false;
+
+   if (!roundNearestEven) {
+      if (zSign != 0u) {
+         if ((FLOAT_ROUNDING_MODE == FLOAT_ROUND_DOWN) && (zFrac2 != 0u)) {
+            increment = false;
+         }
+      } else {
+         increment = (FLOAT_ROUNDING_MODE == FLOAT_ROUND_UP) &&
+            (zFrac2 != 0u);
+      }
+   }
+
+   if (increment) {
+      __add64(zFrac0, zFrac1, 0u, 1u, zFrac0, zFrac1);
+      if ((zFrac0 | zFrac1) != 0u)
+         zFrac1 &= ~(1u) + uint(zFrac2 == 0u) & uint(roundNearestEven);
+   }
+   return mix(packUint2x32(uvec2(zFrac1, zFrac0)), default_nan,
+              (zSign !=0u && (zFrac0 | zFrac1) != 0u));
+}
+
+int64_t
+__roundAndPackInt64(uint zSign, uint zFrac0, uint zFrac1, uint zFrac2)
+{
+   bool roundNearestEven;
+   bool increment;
+   int64_t default_NegNaN = -0x7FFFFFFFFFFFFFFEL;
+   int64_t default_PosNaN = 0xFFFFFFFFFFFFFFFFL;
+
+   roundNearestEven = FLOAT_ROUNDING_MODE == FLOAT_ROUND_NEAREST_EVEN;
+
+   if (zFrac2 >= 0x80000000u)
+      increment = false;
+
+   if (!roundNearestEven) {
+      if (zSign != 0u) {
+         increment = ((FLOAT_ROUNDING_MODE == FLOAT_ROUND_DOWN) &&
+            (zFrac2 != 0u));
+      } else {
+         increment = (FLOAT_ROUNDING_MODE == FLOAT_ROUND_UP) &&
+            (zFrac2 != 0u);
+      }
+   }
+
+   if (increment) {
+      __add64(zFrac0, zFrac1, 0u, 1u, zFrac0, zFrac1);
+      if ((zFrac0 | zFrac1) != 0u)
+         zFrac1 &= ~(1u) + uint(zFrac2 == 0u) & uint(roundNearestEven);
+   }
+
+   int64_t absZ = mix(int64_t(packUint2x32(uvec2(zFrac1, zFrac0))),
+                      -int64_t(packUint2x32(uvec2(zFrac1, zFrac0))),
+                      (zSign != 0u));
+   int64_t nan = mix(default_PosNaN, default_NegNaN, bool(zSign));
+   return mix(absZ, nan, bool(zSign ^ uint(absZ < 0)) && bool(absZ));
+}
+
  /* Returns the number of leading 0 bits before the most-significant 1 bit of
   * `a'.  If `a' is zero, 32 is returned.
   */
@@ -798,6 +866,12 @@ __fmul64(uint64_t a, uint64_t b)
     return __roundAndPackFloat64(zSign, zExp, zFrac0, zFrac1, zFrac2);
  }
  
+uint64_t
+__ffma64(uint64_t a, uint64_t b, uint64_t c)
+{
+   return __fadd64(__fmul64(a, b), c);
+}
+
  /* Shifts the 64-bit value formed by concatenating `a0' and `a1' right by the
   * number of bits given in `count'.  Any bits shifted off are lost.  The value
   * of `count' can be arbitrarily large; in particular, if `count' is greater
@@ -881,6 +955,69 @@ __uint_to_fp64(uint a)
     return __packFloat64(0u, 0x432 - shiftDist, aHigh, aLow);
  }
  
+uint64_t
+__uint64_to_fp64(uint64_t a)
+{
+   if (a == 0u)
+      return 0ul;
+
+   uvec2 aFrac = unpackUint2x32(a);
+   uint aFracLo = __extractFloat64FracLo(a);
+   uint aFracHi = __extractFloat64FracHi(a);
+
+   if ((aFracHi & 0x80000000u) != 0u) {
+      __shift64RightJamming(aFracHi, aFracLo, 1, aFracHi, aFracLo);
+      return __roundAndPackFloat64(0, 0x433, aFracHi, aFracLo, 0u);
+   } else {
+      return __normalizeRoundAndPackFloat64(0, 0x432, aFrac.y, aFrac.x);
+   }
+}
+
+uint64_t
+__fp64_to_uint64(uint64_t a)
+{
+   uint aFracLo = __extractFloat64FracLo(a);
+   uint aFracHi = __extractFloat64FracHi(a);
+   int aExp = __extractFloat64Exp(a);
+   uint aSign = __extractFloat64Sign(a);
+   uint zFrac2 = 0u;
+   uint64_t default_nan = 0xFFFFFFFFFFFFFFFFUL;
+
+   aFracHi = mix(aFracHi, aFracHi | 0x00100000u, aExp != 0);
+   int shiftCount = 0x433 - aExp;
+
+   if ( shiftCount <= 0 ) {
+      if (shiftCount < -11 && aExp == 0x7FF) {
+         if ((aFracHi | aFracLo) != 0u)
+            return __propagateFloat64NaN(a, a);
+         return mix(default_nan, a, aSign == 0u);
+      }
+      __shortShift64Left(aFracHi, aFracLo, -shiftCount, aFracHi, aFracLo);
+   } else {
+      __shift64ExtraRightJamming(aFracHi, aFracLo, zFrac2, shiftCount,
+                                 aFracHi, aFracLo, zFrac2);
+   }
+   return __roundAndPackUInt64(aSign, aFracHi, aFracLo, zFrac2);
+}
+
+uint64_t
+__int64_to_fp64(int64_t a)
+{
+   if (a==0)
+      return 0ul;
+
+   uint64_t absA = mix(uint64_t(a), uint64_t(-a), a < 0);
+   uint aFracHi = __extractFloat64FracHi(absA);
+   uvec2 aFrac = unpackUint2x32(absA);
+   uint zSign = uint(a < 0);
+
+   if ((aFracHi & 0x80000000u) != 0u) {
+      return mix(0ul, __packFloat64(1, 0x434, 0u, 0u), a < 0);
+   }
+
+   return __normalizeRoundAndPackFloat64(zSign, 0x432, aFrac.y, aFrac.x);
+}
+
  /* Returns the result of converting the double-precision floating-point value
   * `a' to the 32-bit two's complement integer format.  The conversion is
   * performed according to the IEEE Standard for Floating-Point Arithmetic---
@@ -945,6 +1082,18 @@ __int_to_fp64(int a)
     return __packFloat64(zSign, 0x412 - shiftCount, zFrac0, zFrac1);
  }
  
+bool
+__fp64_to_bool(uint64_t a)
+{
+   return !__feq64_nonnan(__fabs64(a), 0ul);
+}
+
+uint64_t
+__bool_to_fp64(bool a)
+{
+   return __int_to_fp64(int(a));
+}
+
  /* Packs the sign `zSign', exponent `zExp', and significand `zFrac' into a
   * single-precision floating-point value, returning the result.  After being
   * shifted into the proper positions, the three fields are simply added
@@ -1046,6 +1195,48 @@ __fp64_to_fp32(uint64_t __a)
     return __roundAndPackFloat32(aSign, aExp - 0x381, zFrac);
  }
  
+float
+__uint64_to_fp32(uint64_t __a)
+{
+   uint zFrac = 0u;
+   uvec2 aFrac = unpackUint2x32(__a);
+   int shiftCount = __countLeadingZeros32(mix(aFrac.y, aFrac.x, aFrac.y == 0u));
+   shiftCount -= mix(40, 8, aFrac.y == 0u);
+
+   if (0 <= shiftCount) {
+      __shortShift64Left(aFrac.y, aFrac.x, shiftCount, aFrac.y, aFrac.x);
+      bool is_zero = (aFrac.y | aFrac.x) == 0u;
+      return mix(__packFloat32(0u, 0x95 - shiftCount, aFrac.x), 0, is_zero);
+   }
+
+   shiftCount += 7;
+   __shift64RightJamming(aFrac.y, aFrac.x, -shiftCount, aFrac.y, aFrac.x);
+   zFrac = mix(aFrac.x<<shiftCount, aFrac.x, shiftCount < 0);
+   return __roundAndPackFloat32(0u, 0x9C - shiftCount, zFrac);
+}
+
+float
+__int64_to_fp32(int64_t __a)
+{
+   uint zFrac = 0u;
+   uint aSign = uint(__a < 0);
+   uint64_t absA = mix(uint64_t(__a), uint64_t(-__a), __a < 0);
+   uvec2 aFrac = unpackUint2x32(absA);
+   int shiftCount = __countLeadingZeros32(mix(aFrac.y, aFrac.x, aFrac.y == 0u));
+   shiftCount -= mix(40, 8, aFrac.y == 0u);
+
+   if (0 <= shiftCount) {
+      __shortShift64Left(aFrac.y, aFrac.x, shiftCount, aFrac.y, aFrac.x);
+      bool is_zero = (aFrac.y | aFrac.x) == 0u;
+      return mix(__packFloat32(aSign, 0x95 - shiftCount, aFrac.x), 0, absA == 0u);
+   }
+
+   shiftCount += 7;
+   __shift64RightJamming(aFrac.y, aFrac.x, -shiftCount, aFrac.y, aFrac.x);
+   zFrac = mix(aFrac.x<<shiftCount, aFrac.x, shiftCount < 0);
+   return __roundAndPackFloat32(aSign, 0x9C - shiftCount, zFrac);
+}
+
  /* Returns the result of converting the single-precision floating-point value
   * `a' to the double-precision floating-point format.
   */
@@ -1377,3 +1568,84 @@ __ftrunc64(uint64_t __a)
     zHi = mix(zHi, a.y, unbiasedExp > 52);
     return packUint2x32(uvec2(zLo, zHi));
  }
+
+uint64_t
+__ffloor64(uint64_t a)
+{
+   bool is_positive = __fge64(a, 0ul);
+   uint64_t tr = __ftrunc64(a);
+
+   if (is_positive || __feq64(tr, a)) {
+      return tr;
+   } else {
+      return __fadd64(tr, 0xbff0000000000000ul /* -1.0 */);
+   }
+}
+
+uint64_t
+__fround64(uint64_t __a)
+{
+   uvec2 a = unpackUint2x32(__a);
+   int unbiasedExp = __extractFloat64Exp(__a) - 1023;
+   uint aHi = a.y;
+   uint aLo = a.x;
+
+   if (unbiasedExp < 20) {
+      if (unbiasedExp < 0) {
+         aHi &= 0x80000000u;
+         if (unbiasedExp == -1 && aLo != 0u)
+            aHi |= (1023u << 20);
+         aLo = 0u;
+      } else {
+         uint maskExp = 0x000FFFFFu >> unbiasedExp;
+         /* a is an integral value */
+         if (((aHi & maskExp) == 0u) && (aLo == 0u))
+            return __a;
+
+         aHi += 0x00080000u >> unbiasedExp;
+         aHi &= ~maskExp;
+         aLo = 0u;
+      }
+   } else if (unbiasedExp > 51 || unbiasedExp == 1024) {
+      return __a;
+   } else {
+      uint maskExp = 0xFFFFFFFFu >> (unbiasedExp - 20);
+      if ((aLo & maskExp) == 0u)
+         return __a;
+      uint tmp = aLo + (1u << (51 - unbiasedExp));
+      if(tmp < aLo)
+         aHi += 1u;
+      aLo = tmp;
+      aLo &= ~maskExp;
+   }
+
+   a.x = aLo;
+   a.y = aHi;
+   return packUint2x32(a);
+}
+
+uint64_t
+__fmin64(uint64_t a, uint64_t b)
+{
+   if (__is_nan(a)) return b;
+   if (__is_nan(b)) return a;
+
+   if (__flt64_nonnan(a, b)) return a;
+   return b;
+}
+
+uint64_t
+__fmax64(uint64_t a, uint64_t b)
+{
+   if (__is_nan(a)) return b;
+   if (__is_nan(b)) return a;
+
+   if (__flt64_nonnan(a, b)) return b;
+   return a;
+}
+
+uint64_t
+__ffract64(uint64_t a)
+{
+   return __fadd64(a, __fneg64(__ffloor64(a)));
+}