return __packFloat64(zSign, zExp, zFrac0, zFrac1);
}
+uint64_t
+__roundAndPackUInt64(uint zSign, uint zFrac0, uint zFrac1, uint zFrac2)
+{
+ bool roundNearestEven;
+ bool increment;
+ uint64_t default_nan = 0xFFFFFFFFFFFFFFFFUL;
+
+ roundNearestEven = FLOAT_ROUNDING_MODE == FLOAT_ROUND_NEAREST_EVEN;
+
+ if (zFrac2 >= 0x80000000u)
+ increment = false;
+
+ if (!roundNearestEven) {
+ if (zSign != 0u) {
+ if ((FLOAT_ROUNDING_MODE == FLOAT_ROUND_DOWN) && (zFrac2 != 0u)) {
+ increment = false;
+ }
+ } else {
+ increment = (FLOAT_ROUNDING_MODE == FLOAT_ROUND_UP) &&
+ (zFrac2 != 0u);
+ }
+ }
+
+ if (increment) {
+ __add64(zFrac0, zFrac1, 0u, 1u, zFrac0, zFrac1);
+ if ((zFrac0 | zFrac1) != 0u)
+ zFrac1 &= ~(1u) + uint(zFrac2 == 0u) & uint(roundNearestEven);
+ }
+ return mix(packUint2x32(uvec2(zFrac1, zFrac0)), default_nan,
+ (zSign !=0u && (zFrac0 | zFrac1) != 0u));
+}
+
+int64_t
+__roundAndPackInt64(uint zSign, uint zFrac0, uint zFrac1, uint zFrac2)
+{
+ bool roundNearestEven;
+ bool increment;
+ int64_t default_NegNaN = -0x7FFFFFFFFFFFFFFEL;
+ int64_t default_PosNaN = 0xFFFFFFFFFFFFFFFFL;
+
+ roundNearestEven = FLOAT_ROUNDING_MODE == FLOAT_ROUND_NEAREST_EVEN;
+
+ if (zFrac2 >= 0x80000000u)
+ increment = false;
+
+ if (!roundNearestEven) {
+ if (zSign != 0u) {
+ increment = ((FLOAT_ROUNDING_MODE == FLOAT_ROUND_DOWN) &&
+ (zFrac2 != 0u));
+ } else {
+ increment = (FLOAT_ROUNDING_MODE == FLOAT_ROUND_UP) &&
+ (zFrac2 != 0u);
+ }
+ }
+
+ if (increment) {
+ __add64(zFrac0, zFrac1, 0u, 1u, zFrac0, zFrac1);
+ if ((zFrac0 | zFrac1) != 0u)
+ zFrac1 &= ~(1u) + uint(zFrac2 == 0u) & uint(roundNearestEven);
+ }
+
+ int64_t absZ = mix(int64_t(packUint2x32(uvec2(zFrac1, zFrac0))),
+ -int64_t(packUint2x32(uvec2(zFrac1, zFrac0))),
+ (zSign != 0u));
+ int64_t nan = mix(default_PosNaN, default_NegNaN, bool(zSign));
+ return mix(absZ, nan, bool(zSign ^ uint(absZ < 0)) && bool(absZ));
+}
+
/* Returns the number of leading 0 bits before the most-significant 1 bit of
* `a'. If `a' is zero, 32 is returned.
*/
return __roundAndPackFloat64(zSign, zExp, zFrac0, zFrac1, zFrac2);
}
+uint64_t
+__ffma64(uint64_t a, uint64_t b, uint64_t c)
+{
+ return __fadd64(__fmul64(a, b), c);
+}
+
/* Shifts the 64-bit value formed by concatenating `a0' and `a1' right by the
* number of bits given in `count'. Any bits shifted off are lost. The value
* of `count' can be arbitrarily large; in particular, if `count' is greater
return __packFloat64(0u, 0x432 - shiftDist, aHigh, aLow);
}
+uint64_t
+__uint64_to_fp64(uint64_t a)
+{
+ if (a == 0u)
+ return 0ul;
+
+ uvec2 aFrac = unpackUint2x32(a);
+ uint aFracLo = __extractFloat64FracLo(a);
+ uint aFracHi = __extractFloat64FracHi(a);
+
+ if ((aFracHi & 0x80000000u) != 0u) {
+ __shift64RightJamming(aFracHi, aFracLo, 1, aFracHi, aFracLo);
+ return __roundAndPackFloat64(0, 0x433, aFracHi, aFracLo, 0u);
+ } else {
+ return __normalizeRoundAndPackFloat64(0, 0x432, aFrac.y, aFrac.x);
+ }
+}
+
+uint64_t
+__fp64_to_uint64(uint64_t a)
+{
+ uint aFracLo = __extractFloat64FracLo(a);
+ uint aFracHi = __extractFloat64FracHi(a);
+ int aExp = __extractFloat64Exp(a);
+ uint aSign = __extractFloat64Sign(a);
+ uint zFrac2 = 0u;
+ uint64_t default_nan = 0xFFFFFFFFFFFFFFFFUL;
+
+ aFracHi = mix(aFracHi, aFracHi | 0x00100000u, aExp != 0);
+ int shiftCount = 0x433 - aExp;
+
+ if ( shiftCount <= 0 ) {
+ if (shiftCount < -11 && aExp == 0x7FF) {
+ if ((aFracHi | aFracLo) != 0u)
+ return __propagateFloat64NaN(a, a);
+ return mix(default_nan, a, aSign == 0u);
+ }
+ __shortShift64Left(aFracHi, aFracLo, -shiftCount, aFracHi, aFracLo);
+ } else {
+ __shift64ExtraRightJamming(aFracHi, aFracLo, zFrac2, shiftCount,
+ aFracHi, aFracLo, zFrac2);
+ }
+ return __roundAndPackUInt64(aSign, aFracHi, aFracLo, zFrac2);
+}
+
+uint64_t
+__int64_to_fp64(int64_t a)
+{
+ if (a==0)
+ return 0ul;
+
+ uint64_t absA = mix(uint64_t(a), uint64_t(-a), a < 0);
+ uint aFracHi = __extractFloat64FracHi(absA);
+ uvec2 aFrac = unpackUint2x32(absA);
+ uint zSign = uint(a < 0);
+
+ if ((aFracHi & 0x80000000u) != 0u) {
+ return mix(0ul, __packFloat64(1, 0x434, 0u, 0u), a < 0);
+ }
+
+ return __normalizeRoundAndPackFloat64(zSign, 0x432, aFrac.y, aFrac.x);
+}
+
/* Returns the result of converting the double-precision floating-point value
* `a' to the 32-bit two's complement integer format. The conversion is
* performed according to the IEEE Standard for Floating-Point Arithmetic---
return __packFloat64(zSign, 0x412 - shiftCount, zFrac0, zFrac1);
}
+bool
+__fp64_to_bool(uint64_t a)
+{
+ return !__feq64_nonnan(__fabs64(a), 0ul);
+}
+
+uint64_t
+__bool_to_fp64(bool a)
+{
+ return __int_to_fp64(int(a));
+}
+
/* Packs the sign `zSign', exponent `zExp', and significand `zFrac' into a
* single-precision floating-point value, returning the result. After being
* shifted into the proper positions, the three fields are simply added
return __roundAndPackFloat32(aSign, aExp - 0x381, zFrac);
}
+float
+__uint64_to_fp32(uint64_t __a)
+{
+ uint zFrac = 0u;
+ uvec2 aFrac = unpackUint2x32(__a);
+ int shiftCount = __countLeadingZeros32(mix(aFrac.y, aFrac.x, aFrac.y == 0u));
+ shiftCount -= mix(40, 8, aFrac.y == 0u);
+
+ if (0 <= shiftCount) {
+ __shortShift64Left(aFrac.y, aFrac.x, shiftCount, aFrac.y, aFrac.x);
+ bool is_zero = (aFrac.y | aFrac.x) == 0u;
+ return mix(__packFloat32(0u, 0x95 - shiftCount, aFrac.x), 0, is_zero);
+ }
+
+ shiftCount += 7;
+ __shift64RightJamming(aFrac.y, aFrac.x, -shiftCount, aFrac.y, aFrac.x);
+ zFrac = mix(aFrac.x<<shiftCount, aFrac.x, shiftCount < 0);
+ return __roundAndPackFloat32(0u, 0x9C - shiftCount, zFrac);
+}
+
+float
+__int64_to_fp32(int64_t __a)
+{
+ uint zFrac = 0u;
+ uint aSign = uint(__a < 0);
+ uint64_t absA = mix(uint64_t(__a), uint64_t(-__a), __a < 0);
+ uvec2 aFrac = unpackUint2x32(absA);
+ int shiftCount = __countLeadingZeros32(mix(aFrac.y, aFrac.x, aFrac.y == 0u));
+ shiftCount -= mix(40, 8, aFrac.y == 0u);
+
+ if (0 <= shiftCount) {
+ __shortShift64Left(aFrac.y, aFrac.x, shiftCount, aFrac.y, aFrac.x);
+ bool is_zero = (aFrac.y | aFrac.x) == 0u;
+ return mix(__packFloat32(aSign, 0x95 - shiftCount, aFrac.x), 0, absA == 0u);
+ }
+
+ shiftCount += 7;
+ __shift64RightJamming(aFrac.y, aFrac.x, -shiftCount, aFrac.y, aFrac.x);
+ zFrac = mix(aFrac.x<<shiftCount, aFrac.x, shiftCount < 0);
+ return __roundAndPackFloat32(aSign, 0x9C - shiftCount, zFrac);
+}
+
/* Returns the result of converting the single-precision floating-point value
* `a' to the double-precision floating-point format.
*/
zHi = mix(zHi, a.y, unbiasedExp > 52);
return packUint2x32(uvec2(zLo, zHi));
}
+
+uint64_t
+__ffloor64(uint64_t a)
+{
+ bool is_positive = __fge64(a, 0ul);
+ uint64_t tr = __ftrunc64(a);
+
+ if (is_positive || __feq64(tr, a)) {
+ return tr;
+ } else {
+ return __fadd64(tr, 0xbff0000000000000ul /* -1.0 */);
+ }
+}
+
+uint64_t
+__fround64(uint64_t __a)
+{
+ uvec2 a = unpackUint2x32(__a);
+ int unbiasedExp = __extractFloat64Exp(__a) - 1023;
+ uint aHi = a.y;
+ uint aLo = a.x;
+
+ if (unbiasedExp < 20) {
+ if (unbiasedExp < 0) {
+ aHi &= 0x80000000u;
+ if (unbiasedExp == -1 && aLo != 0u)
+ aHi |= (1023u << 20);
+ aLo = 0u;
+ } else {
+ uint maskExp = 0x000FFFFFu >> unbiasedExp;
+ /* a is an integral value */
+ if (((aHi & maskExp) == 0u) && (aLo == 0u))
+ return __a;
+
+ aHi += 0x00080000u >> unbiasedExp;
+ aHi &= ~maskExp;
+ aLo = 0u;
+ }
+ } else if (unbiasedExp > 51 || unbiasedExp == 1024) {
+ return __a;
+ } else {
+ uint maskExp = 0xFFFFFFFFu >> (unbiasedExp - 20);
+ if ((aLo & maskExp) == 0u)
+ return __a;
+ uint tmp = aLo + (1u << (51 - unbiasedExp));
+ if(tmp < aLo)
+ aHi += 1u;
+ aLo = tmp;
+ aLo &= ~maskExp;
+ }
+
+ a.x = aLo;
+ a.y = aHi;
+ return packUint2x32(a);
+}
+
+uint64_t
+__fmin64(uint64_t a, uint64_t b)
+{
+ if (__is_nan(a)) return b;
+ if (__is_nan(b)) return a;
+
+ if (__flt64_nonnan(a, b)) return a;
+ return b;
+}
+
+uint64_t
+__fmax64(uint64_t a, uint64_t b)
+{
+ if (__is_nan(a)) return b;
+ if (__is_nan(b)) return a;
+
+ if (__flt64_nonnan(a, b)) return b;
+ return a;
+}
+
+uint64_t
+__ffract64(uint64_t a)
+{
+ return __fadd64(a, __fneg64(__ffloor64(a)));
+}