glsl: Add utility function to round and pack int64_t value

[mesa.git] / src / compiler / glsl / float64.glsl
diff --git a/src/compiler/glsl/float64.glsl b/src/compiler/glsl/float64.glsl

index 1808fec07277290ed3d78fd33cf881a34cf88f6f..71ebbb8d907ece76dbe5caaa2b951db7020cd49a 100644 (file)
--- a/src/compiler/glsl/float64.glsl
+++ b/src/compiler/glsl/float64.glsl
@@ -451,6 +451,74 @@ __roundAndPackFloat64(uint zSign,
     return __packFloat64(zSign, zExp, zFrac0, zFrac1);
  }
  
+uint64_t
+__roundAndPackUInt64(uint zSign, uint zFrac0, uint zFrac1, uint zFrac2)
+{
+   bool roundNearestEven;
+   bool increment;
+   uint64_t default_nan = 0xFFFFFFFFFFFFFFFFUL;
+
+   roundNearestEven = FLOAT_ROUNDING_MODE == FLOAT_ROUND_NEAREST_EVEN;
+
+   if (zFrac2 >= 0x80000000u)
+      increment = false;
+
+   if (!roundNearestEven) {
+      if (zSign != 0u) {
+         if ((FLOAT_ROUNDING_MODE == FLOAT_ROUND_DOWN) && (zFrac2 != 0u)) {
+            increment = false;
+         }
+      } else {
+         increment = (FLOAT_ROUNDING_MODE == FLOAT_ROUND_UP) &&
+            (zFrac2 != 0u);
+      }
+   }
+
+   if (increment) {
+      __add64(zFrac0, zFrac1, 0u, 1u, zFrac0, zFrac1);
+      if ((zFrac0 | zFrac1) != 0u)
+         zFrac1 &= ~(1u) + uint(zFrac2 == 0u) & uint(roundNearestEven);
+   }
+   return mix(packUint2x32(uvec2(zFrac1, zFrac0)), default_nan,
+              (zSign !=0u && (zFrac0 | zFrac1) != 0u));
+}
+
+int64_t
+__roundAndPackInt64(uint zSign, uint zFrac0, uint zFrac1, uint zFrac2)
+{
+   bool roundNearestEven;
+   bool increment;
+   int64_t default_NegNaN = -0x7FFFFFFFFFFFFFFEL;
+   int64_t default_PosNaN = 0xFFFFFFFFFFFFFFFFL;
+
+   roundNearestEven = FLOAT_ROUNDING_MODE == FLOAT_ROUND_NEAREST_EVEN;
+
+   if (zFrac2 >= 0x80000000u)
+      increment = false;
+
+   if (!roundNearestEven) {
+      if (zSign != 0u) {
+         increment = ((FLOAT_ROUNDING_MODE == FLOAT_ROUND_DOWN) &&
+            (zFrac2 != 0u));
+      } else {
+         increment = (FLOAT_ROUNDING_MODE == FLOAT_ROUND_UP) &&
+            (zFrac2 != 0u);
+      }
+   }
+
+   if (increment) {
+      __add64(zFrac0, zFrac1, 0u, 1u, zFrac0, zFrac1);
+      if ((zFrac0 | zFrac1) != 0u)
+         zFrac1 &= ~(1u) + uint(zFrac2 == 0u) & uint(roundNearestEven);
+   }
+
+   int64_t absZ = mix(int64_t(packUint2x32(uvec2(zFrac1, zFrac0))),
+                      -int64_t(packUint2x32(uvec2(zFrac1, zFrac0))),
+                      (zSign != 0u));
+   int64_t nan = mix(default_PosNaN, default_NegNaN, bool(zSign));
+   return mix(absZ, nan, bool(zSign ^ uint(absZ < 0)) && bool(absZ));
+}
+
  /* Returns the number of leading 0 bits before the most-significant 1 bit of
   * `a'.  If `a' is zero, 32 is returned.
   */
@@ -887,6 +955,69 @@ __uint_to_fp64(uint a)
     return __packFloat64(0u, 0x432 - shiftDist, aHigh, aLow);
  }
  
+uint64_t
+__uint64_to_fp64(uint64_t a)
+{
+   if (a == 0u)
+      return 0ul;
+
+   uvec2 aFrac = unpackUint2x32(a);
+   uint aFracLo = __extractFloat64FracLo(a);
+   uint aFracHi = __extractFloat64FracHi(a);
+
+   if ((aFracHi & 0x80000000u) != 0u) {
+      __shift64RightJamming(aFracHi, aFracLo, 1, aFracHi, aFracLo);
+      return __roundAndPackFloat64(0, 0x433, aFracHi, aFracLo, 0u);
+   } else {
+      return __normalizeRoundAndPackFloat64(0, 0x432, aFrac.y, aFrac.x);
+   }
+}
+
+uint64_t
+__fp64_to_uint64(uint64_t a)
+{
+   uint aFracLo = __extractFloat64FracLo(a);
+   uint aFracHi = __extractFloat64FracHi(a);
+   int aExp = __extractFloat64Exp(a);
+   uint aSign = __extractFloat64Sign(a);
+   uint zFrac2 = 0u;
+   uint64_t default_nan = 0xFFFFFFFFFFFFFFFFUL;
+
+   aFracHi = mix(aFracHi, aFracHi | 0x00100000u, aExp != 0);
+   int shiftCount = 0x433 - aExp;
+
+   if ( shiftCount <= 0 ) {
+      if (shiftCount < -11 && aExp == 0x7FF) {
+         if ((aFracHi | aFracLo) != 0u)
+            return __propagateFloat64NaN(a, a);
+         return mix(default_nan, a, aSign == 0u);
+      }
+      __shortShift64Left(aFracHi, aFracLo, -shiftCount, aFracHi, aFracLo);
+   } else {
+      __shift64ExtraRightJamming(aFracHi, aFracLo, zFrac2, shiftCount,
+                                 aFracHi, aFracLo, zFrac2);
+   }
+   return __roundAndPackUInt64(aSign, aFracHi, aFracLo, zFrac2);
+}
+
+uint64_t
+__int64_to_fp64(int64_t a)
+{
+   if (a==0)
+      return 0ul;
+
+   uint64_t absA = mix(uint64_t(a), uint64_t(-a), a < 0);
+   uint aFracHi = __extractFloat64FracHi(absA);
+   uvec2 aFrac = unpackUint2x32(absA);
+   uint zSign = uint(a < 0);
+
+   if ((aFracHi & 0x80000000u) != 0u) {
+      return mix(0ul, __packFloat64(1, 0x434, 0u, 0u), a < 0);
+   }
+
+   return __normalizeRoundAndPackFloat64(zSign, 0x432, aFrac.y, aFrac.x);
+}
+
  /* Returns the result of converting the double-precision floating-point value
   * `a' to the 32-bit two's complement integer format.  The conversion is
   * performed according to the IEEE Standard for Floating-Point Arithmetic---
@@ -1064,6 +1195,48 @@ __fp64_to_fp32(uint64_t __a)
     return __roundAndPackFloat32(aSign, aExp - 0x381, zFrac);
  }
  
+float
+__uint64_to_fp32(uint64_t __a)
+{
+   uint zFrac = 0u;
+   uvec2 aFrac = unpackUint2x32(__a);
+   int shiftCount = __countLeadingZeros32(mix(aFrac.y, aFrac.x, aFrac.y == 0u));
+   shiftCount -= mix(40, 8, aFrac.y == 0u);
+
+   if (0 <= shiftCount) {
+      __shortShift64Left(aFrac.y, aFrac.x, shiftCount, aFrac.y, aFrac.x);
+      bool is_zero = (aFrac.y | aFrac.x) == 0u;
+      return mix(__packFloat32(0u, 0x95 - shiftCount, aFrac.x), 0, is_zero);
+   }
+
+   shiftCount += 7;
+   __shift64RightJamming(aFrac.y, aFrac.x, -shiftCount, aFrac.y, aFrac.x);
+   zFrac = mix(aFrac.x<<shiftCount, aFrac.x, shiftCount < 0);
+   return __roundAndPackFloat32(0u, 0x9C - shiftCount, zFrac);
+}
+
+float
+__int64_to_fp32(int64_t __a)
+{
+   uint zFrac = 0u;
+   uint aSign = uint(__a < 0);
+   uint64_t absA = mix(uint64_t(__a), uint64_t(-__a), __a < 0);
+   uvec2 aFrac = unpackUint2x32(absA);
+   int shiftCount = __countLeadingZeros32(mix(aFrac.y, aFrac.x, aFrac.y == 0u));
+   shiftCount -= mix(40, 8, aFrac.y == 0u);
+
+   if (0 <= shiftCount) {
+      __shortShift64Left(aFrac.y, aFrac.x, shiftCount, aFrac.y, aFrac.x);
+      bool is_zero = (aFrac.y | aFrac.x) == 0u;
+      return mix(__packFloat32(aSign, 0x95 - shiftCount, aFrac.x), 0, absA == 0u);
+   }
+
+   shiftCount += 7;
+   __shift64RightJamming(aFrac.y, aFrac.x, -shiftCount, aFrac.y, aFrac.x);
+   zFrac = mix(aFrac.x<<shiftCount, aFrac.x, shiftCount < 0);
+   return __roundAndPackFloat32(aSign, 0x9C - shiftCount, zFrac);
+}
+
  /* Returns the result of converting the single-precision floating-point value
   * `a' to the double-precision floating-point format.
   */