return val.fp;
}
+template <class fpType>
+static inline fpType
+fixDest(FPSCR fpscr, fpType val, fpType op1)
+{
+ int fpClass = std::fpclassify(val);
+ fpType junk = 0.0;
+ if (fpClass == FP_NAN) {
+ const bool single = (sizeof(val) == sizeof(float));
+ const uint64_t qnan = single ? 0x7fc00000 : ULL(0x7ff8000000000000);
+ const bool nan = std::isnan(op1);
+ if (!nan || (fpscr.dn == 1)) {
+ val = bitsToFp(qnan, junk);
+ } else if (nan) {
+ val = bitsToFp(fpToBits(op1) | qnan, junk);
+ }
+ } else if (fpClass == FP_SUBNORMAL && fpscr.fz == 1) {
+ // Turn val into a zero with the correct sign;
+ uint64_t bitMask = ULL(0x1) << (sizeof(fpType) * 8 - 1);
+ val = bitsToFp(fpToBits(val) & bitMask, junk);
+ feraiseexcept(FeUnderflow);
+ }
+ return val;
+}
+
template <class fpType>
static inline fpType
fixDest(FPSCR fpscr, fpType val, fpType op1, fpType op2)
return val;
}
+template <class fpType>
+static inline fpType
+fixMultDest(FPSCR fpscr, fpType val, fpType op1, fpType op2)
+{
+ fpType mid = fixDest(fpscr, val, op1, op2);
+ const bool single = (sizeof(fpType) == sizeof(float));
+ const fpType junk = 0.0;
+ if ((single && (val == bitsToFp(0x00800000, junk) ||
+ val == bitsToFp(0x80800000, junk))) ||
+ (!single && (val == bitsToFp(ULL(0x0010000000000000), junk) ||
+ val == bitsToFp(ULL(0x8010000000000000), junk)))
+ ) {
+ __asm__ __volatile__("" : "=m" (op1) : "m" (op1));
+ fesetround(FeRoundZero);
+ fpType temp = 0.0;
+ __asm__ __volatile__("" : "=m" (temp) : "m" (temp));
+ temp = op1 * op2;
+ if (!std::isnormal(temp)) {
+ feraiseexcept(FeUnderflow);
+ }
+ __asm__ __volatile__("" :: "m" (temp));
+ }
+ return mid;
+}
+
+template <class fpType>
+static inline fpType
+fixDivDest(FPSCR fpscr, fpType val, fpType op1, fpType op2)
+{
+ fpType mid = fixDest(fpscr, val, op1, op2);
+ const bool single = (sizeof(fpType) == sizeof(float));
+ const fpType junk = 0.0;
+ if ((single && (val == bitsToFp(0x00800000, junk) ||
+ val == bitsToFp(0x80800000, junk))) ||
+ (!single && (val == bitsToFp(ULL(0x0010000000000000), junk) ||
+ val == bitsToFp(ULL(0x8010000000000000), junk)))
+ ) {
+ __asm__ __volatile__("" : "=m" (op1) : "m" (op1));
+ fesetround(FeRoundZero);
+ fpType temp = 0.0;
+ __asm__ __volatile__("" : "=m" (temp) : "m" (temp));
+ temp = op1 / op2;
+ if (!std::isnormal(temp)) {
+ feraiseexcept(FeUnderflow);
+ }
+ __asm__ __volatile__("" :: "m" (temp));
+ }
+ return mid;
+}
+
+static inline float
+fixFpDFpSDest(FPSCR fpscr, double val)
+{
+ const float junk = 0.0;
+ float op1 = 0.0;
+ if (std::isnan(val)) {
+ uint64_t valBits = fpToBits(val);
+ uint32_t op1Bits = bits(valBits, 50, 29) |
+ (mask(9) << 22) |
+ (bits(valBits, 63) << 31);
+ op1 = bitsToFp(op1Bits, junk);
+ }
+ float mid = fixDest(fpscr, (float)val, op1);
+ if (mid == bitsToFp(0x00800000, junk) ||
+ mid == bitsToFp(0x80800000, junk)) {
+ __asm__ __volatile__("" : "=m" (val) : "m" (val));
+ fesetround(FeRoundZero);
+ float temp = 0.0;
+ __asm__ __volatile__("" : "=m" (temp) : "m" (temp));
+ temp = val;
+ if (!std::isnormal(temp)) {
+ feraiseexcept(FeUnderflow);
+ }
+ __asm__ __volatile__("" :: "m" (temp));
+ }
+ return mid;
+}
+
static inline uint64_t
vfpFpSToFixed(float val, bool isSigned, bool half, uint8_t imm)
{
}
static inline float
-vfpUFixedToFpS(uint32_t val, bool half, uint8_t imm)
+vfpUFixedToFpS(FPSCR fpscr, uint32_t val, bool half, uint8_t imm)
{
fesetround(FeRoundNearest);
if (half)
__asm__ __volatile__("" : "=m" (scale) : "m" (scale));
feclearexcept(FeAllExceptions);
__asm__ __volatile__("" : "=m" (scale) : "m" (scale));
- return val / scale;
+ return fixDivDest(fpscr, val / scale, (float)val, scale);
}
static inline float
-vfpSFixedToFpS(int32_t val, bool half, uint8_t imm)
+vfpSFixedToFpS(FPSCR fpscr, int32_t val, bool half, uint8_t imm)
{
fesetround(FeRoundNearest);
if (half)
__asm__ __volatile__("" : "=m" (scale) : "m" (scale));
feclearexcept(FeAllExceptions);
__asm__ __volatile__("" : "=m" (scale) : "m" (scale));
- return val / scale;
+ return fixDivDest(fpscr, val / scale, (float)val, scale);
}
static inline uint64_t
}
static inline double
-vfpUFixedToFpD(uint32_t val, bool half, uint8_t imm)
+vfpUFixedToFpD(FPSCR fpscr, uint32_t val, bool half, uint8_t imm)
{
fesetround(FeRoundNearest);
if (half)
__asm__ __volatile__("" : "=m" (scale) : "m" (scale));
feclearexcept(FeAllExceptions);
__asm__ __volatile__("" : "=m" (scale) : "m" (scale));
- return val / scale;
+ return fixDivDest(fpscr, val / scale, (double)val, scale);
}
static inline double
-vfpSFixedToFpD(int32_t val, bool half, uint8_t imm)
+vfpSFixedToFpD(FPSCR fpscr, int32_t val, bool half, uint8_t imm)
{
fesetround(FeRoundNearest);
if (half)
__asm__ __volatile__("" : "=m" (scale) : "m" (scale));
feclearexcept(FeAllExceptions);
__asm__ __volatile__("" : "=m" (scale) : "m" (scale));
- return val / scale;
+ return fixDivDest(fpscr, val / scale, (double)val, scale);
}
typedef int VfpSavedState;
vfpFlushToZero(Fpscr, FpOp1, FpOp2);
VfpSavedState state = prepVfpFpscr(Fpscr);
__asm__ __volatile__("" : "=m" (FpOp1) : "m" (FpOp1));
- FpDest = fixDest(Fpscr, FpOp1 * FpOp2, FpOp1, FpOp2);
+ FpDest = fixMultDest(Fpscr, FpOp1 * FpOp2, FpOp1, FpOp2);
__asm__ __volatile__("" :: "m" (FpDest));
Fpscr = setVfpFpscr(Fpscr, state);
if ((isinf(FpOp1) && FpOp2 == 0) || (isinf(FpOp2) && FpOp1 == 0)) {
vfpFlushToZero(Fpscr, cOp1.fp, cOp2.fp);
VfpSavedState state = prepVfpFpscr(Fpscr);
__asm__ __volatile__("" : "=m" (cOp1.fp) : "m" (cOp1.fp));
- cDest.fp = fixDest(Fpscr, cOp1.fp * cOp2.fp, cOp1.fp, cOp2.fp);
+ cDest.fp = fixMultDest(Fpscr, cOp1.fp * cOp2.fp, cOp1.fp, cOp2.fp);
__asm__ __volatile__("" :: "m" (cDest.fp));
Fpscr = setVfpFpscr(Fpscr, state);
if ((isinf(cOp1.fp) && cOp2.fp == 0) ||
mid = NAN;
}
vfpFlushToZero(Fpscr, FpDest, mid);
- FpDest = fixDest(Fpscr, FpDest - mid, FpDest, mid);
+ FpDest = fixDest(Fpscr, FpDest - mid, FpDest, -mid);
__asm__ __volatile__("" :: "m" (FpDest));
Fpscr = setVfpFpscr(Fpscr, state);
'''
(isinf(cOp2.fp) && cOp1.fp == 0)) {
mid = NAN;
}
- cDest.fp = fixDest(Fpscr, cDest.fp - mid, cDest.fp, mid);
+ cDest.fp = fixDest(Fpscr, cDest.fp - mid, cDest.fp, -mid);
vfpFlushToZero(Fpscr, cDest.fp, mid);
__asm__ __volatile__("" :: "m" (cDest.fp));
Fpscr = setVfpFpscr(Fpscr, state);
mid = NAN;
}
vfpFlushToZero(Fpscr, FpDest, mid);
- FpDest = fixDest(Fpscr, -FpDest - mid, FpDest, mid);
+ FpDest = fixDest(Fpscr, -FpDest - mid, -FpDest, -mid);
__asm__ __volatile__("" :: "m" (FpDest));
Fpscr = setVfpFpscr(Fpscr, state);
'''
mid = NAN;
}
vfpFlushToZero(Fpscr, cDest.fp, mid);
- cDest.fp = fixDest(Fpscr, -cDest.fp - mid, cDest.fp, mid);
+ cDest.fp = fixDest(Fpscr, -cDest.fp - mid, -cDest.fp, -mid);
__asm__ __volatile__("" :: "m" (cDest.fp));
Fpscr = setVfpFpscr(Fpscr, state);
FpDestP0.uw = cDest.bits;
mid = NAN;
}
vfpFlushToZero(Fpscr, FpDest, mid);
- FpDest = fixDest(Fpscr, -FpDest + mid, FpDest, mid);
+ FpDest = fixDest(Fpscr, -FpDest + mid, -FpDest, mid);
__asm__ __volatile__("" :: "m" (FpDest));
Fpscr = setVfpFpscr(Fpscr, state);
'''
mid = NAN;
}
vfpFlushToZero(Fpscr, cDest.fp, mid);
- cDest.fp = fixDest(Fpscr, -cDest.fp + mid, cDest.fp, mid);
+ cDest.fp = fixDest(Fpscr, -cDest.fp + mid, -cDest.fp, mid);
__asm__ __volatile__("" :: "m" (cDest.fp));
Fpscr = setVfpFpscr(Fpscr, state);
FpDestP0.uw = cDest.bits;
vfpFlushToZero(Fpscr, cOp1.fp);
VfpSavedState state = prepVfpFpscr(Fpscr);
__asm__ __volatile__("" : "=m" (cOp1.fp) : "m" (cOp1.fp));
- FpDest = cOp1.fp;
+ FpDest = fixFpDFpSDest(Fpscr, cOp1.fp);
__asm__ __volatile__("" :: "m" (FpDest));
Fpscr = setVfpFpscr(Fpscr, state);
'''
vcvtSFixedFpSCode = '''
VfpSavedState state = prepVfpFpscr(Fpscr);
__asm__ __volatile__("" : "=m" (FpOp1.sw) : "m" (FpOp1.sw));
- FpDest = vfpSFixedToFpS(FpOp1.sw, false, imm);
+ FpDest = vfpSFixedToFpS(Fpscr, FpOp1.sw, false, imm);
__asm__ __volatile__("" :: "m" (FpDest));
Fpscr = setVfpFpscr(Fpscr, state);
'''
uint64_t mid = ((uint64_t)FpOp1P0.uw | ((uint64_t)FpOp1P1.uw << 32));
VfpSavedState state = prepVfpFpscr(Fpscr);
__asm__ __volatile__("" : "=m" (mid) : "m" (mid));
- cDest.fp = vfpSFixedToFpD(mid, false, imm);
+ cDest.fp = vfpSFixedToFpD(Fpscr, mid, false, imm);
__asm__ __volatile__("" :: "m" (cDest.fp));
Fpscr = setVfpFpscr(Fpscr, state);
FpDestP0.uw = cDest.bits;
vcvtUFixedFpSCode = '''
VfpSavedState state = prepVfpFpscr(Fpscr);
__asm__ __volatile__("" : "=m" (FpOp1.uw) : "m" (FpOp1.uw));
- FpDest = vfpUFixedToFpS(FpOp1.uw, false, imm);
+ FpDest = vfpUFixedToFpS(Fpscr, FpOp1.uw, false, imm);
__asm__ __volatile__("" :: "m" (FpDest));
Fpscr = setVfpFpscr(Fpscr, state);
'''
uint64_t mid = ((uint64_t)FpOp1P0.uw | ((uint64_t)FpOp1P1.uw << 32));
VfpSavedState state = prepVfpFpscr(Fpscr);
__asm__ __volatile__("" : "=m" (mid) : "m" (mid));
- cDest.fp = vfpUFixedToFpD(mid, false, imm);
+ cDest.fp = vfpUFixedToFpD(Fpscr, mid, false, imm);
__asm__ __volatile__("" :: "m" (cDest.fp));
Fpscr = setVfpFpscr(Fpscr, state);
FpDestP0.uw = cDest.bits;
vcvtSHFixedFpSCode = '''
VfpSavedState state = prepVfpFpscr(Fpscr);
__asm__ __volatile__("" : "=m" (FpOp1.sh) : "m" (FpOp1.sh));
- FpDest = vfpSFixedToFpS(FpOp1.sh, true, imm);
+ FpDest = vfpSFixedToFpS(Fpscr, FpOp1.sh, true, imm);
__asm__ __volatile__("" :: "m" (FpDest));
Fpscr = setVfpFpscr(Fpscr, state);
'''
uint64_t mid = ((uint64_t)FpOp1P0.uw | ((uint64_t)FpOp1P1.uw << 32));
VfpSavedState state = prepVfpFpscr(Fpscr);
__asm__ __volatile__("" : "=m" (mid) : "m" (mid));
- cDest.fp = vfpSFixedToFpD(mid, true, imm);
+ cDest.fp = vfpSFixedToFpD(Fpscr, mid, true, imm);
__asm__ __volatile__("" :: "m" (cDest.fp));
Fpscr = setVfpFpscr(Fpscr, state);
FpDestP0.uw = cDest.bits;
vcvtUHFixedFpSCode = '''
VfpSavedState state = prepVfpFpscr(Fpscr);
__asm__ __volatile__("" : "=m" (FpOp1.uh) : "m" (FpOp1.uh));
- FpDest = vfpUFixedToFpS(FpOp1.uh, true, imm);
+ FpDest = vfpUFixedToFpS(Fpscr, FpOp1.uh, true, imm);
__asm__ __volatile__("" :: "m" (FpDest));
Fpscr = setVfpFpscr(Fpscr, state);
'''
uint64_t mid = ((uint64_t)FpOp1P0.uw | ((uint64_t)FpOp1P1.uw << 32));
VfpSavedState state = prepVfpFpscr(Fpscr);
__asm__ __volatile__("" : "=m" (mid) : "m" (mid));
- cDest.fp = vfpUFixedToFpD(mid, true, imm);
+ cDest.fp = vfpUFixedToFpD(Fpscr, mid, true, imm);
__asm__ __volatile__("" :: "m" (cDest.fp));
Fpscr = setVfpFpscr(Fpscr, state);
FpDestP0.uw = cDest.bits;