2 * Copyright 2016-2017 Jacob Lifshay
4 * Permission is hereby granted, free of charge, to any person obtaining a copy
5 * of this software and associated documentation files (the "Software"), to deal
6 * in the Software without restriction, including without limitation the rights
7 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8 * copies of the Software, and to permit persons to whom the Software is
9 * furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included in all
12 * copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
25 // https://github.com/programmerjake/javascript-tasklets/blob/master/javascript_tasklets/soft_float.h
27 #ifndef UTIL_SOFT_FLOAT_H_
28 #define UTIL_SOFT_FLOAT_H_
33 #include "bit_intrinsics.h"
45 constexpr UInt128(std::uint64_t high
, std::uint64_t low
) noexcept
: low(low
), high(high
)
48 constexpr explicit UInt128(std::uint64_t low
= 0) noexcept
: low(low
), high(0)
51 static constexpr bool addCarries(std::uint64_t a
, std::uint64_t b
) noexcept
53 return static_cast<std::uint64_t>(a
+ b
) < a
;
55 static constexpr bool subBorrows(std::uint64_t a
, std::uint64_t b
) noexcept
57 return static_cast<std::uint64_t>(a
- b
) > a
;
59 friend constexpr UInt128
operator+(UInt128 a
, UInt128 b
) noexcept
61 return UInt128(a
.high
+ b
.high
+ addCarries(a
.low
, b
.low
), a
.low
+ b
.low
);
63 constexpr UInt128
&operator+=(UInt128 v
) noexcept
65 return *this = *this + v
;
67 friend constexpr UInt128
operator-(UInt128 a
, UInt128 b
) noexcept
69 return UInt128(a
.high
- b
.high
- subBorrows(a
.low
, b
.low
), a
.low
- b
.low
);
71 constexpr UInt128
&operator-=(UInt128 v
) noexcept
73 return *this = *this - v
;
77 static constexpr std::uint64_t multiplyHighHelper2(std::uint64_t h
,
80 std::uint64_t l
) noexcept
82 return (UInt128(h
, l
) + UInt128(m1
>> 32, m1
<< 32) + UInt128(m2
>> 32, m2
<< 32)).high
;
84 static constexpr std::uint64_t multiplyHighHelper1(std::uint32_t ah
,
87 std::uint32_t bl
) noexcept
89 return multiplyHighHelper2(static_cast<std::uint64_t>(ah
) * bh
,
90 static_cast<std::uint64_t>(ah
) * bl
,
91 static_cast<std::uint64_t>(al
) * bh
,
92 static_cast<std::uint64_t>(al
) * bl
);
96 static constexpr std::uint64_t multiplyHigh(std::uint64_t a
, std::uint64_t b
) noexcept
98 return multiplyHighHelper1(a
>> 32, a
, b
>> 32, b
);
100 friend constexpr UInt128
operator*(UInt128 a
, UInt128 b
) noexcept
102 return UInt128(a
.high
* b
.low
+ a
.low
* b
.high
+ multiplyHigh(a
.low
, b
.low
), a
.low
* b
.low
);
104 constexpr UInt128
&operator*=(UInt128 v
) noexcept
106 return *this = *this * v
;
109 static constexpr DivModResult
divmod(UInt128 a
, UInt128 b
) noexcept
;
110 static constexpr UInt128
div(UInt128 a
, UInt128 b
) noexcept
;
111 static constexpr UInt128
mod(UInt128 a
, UInt128 b
) noexcept
;
112 friend constexpr UInt128
operator/(UInt128 a
, UInt128 b
) noexcept
116 friend constexpr UInt128
operator%(UInt128 a
, UInt128 b
) noexcept
120 constexpr UInt128
&operator/=(UInt128 v
) noexcept
122 return *this = *this / v
;
124 constexpr UInt128
&operator%=(UInt128 v
) noexcept
126 return *this = *this % v
;
128 friend constexpr UInt128
operator&(UInt128 a
, UInt128 b
) noexcept
130 return UInt128(a
.high
& b
.high
, a
.low
& b
.low
);
132 constexpr UInt128
&operator&=(UInt128 v
) noexcept
134 return *this = *this & v
;
136 friend constexpr UInt128
operator|(UInt128 a
, UInt128 b
) noexcept
138 return UInt128(a
.high
| b
.high
, a
.low
| b
.low
);
140 constexpr UInt128
&operator|=(UInt128 v
) noexcept
142 return *this = *this | v
;
144 friend constexpr UInt128
operator^(UInt128 a
, UInt128 b
) noexcept
146 return UInt128(a
.high
^ b
.high
, a
.low
^ b
.low
);
148 constexpr UInt128
&operator^=(UInt128 v
) noexcept
150 return *this = *this ^ v
;
152 friend constexpr UInt128
operator<<(UInt128 v
, unsigned shiftAmount
) noexcept
154 assert(shiftAmount
< 128);
155 return shiftAmount
== 0 ? v
: shiftAmount
< 64 ?
156 UInt128((v
.high
<< shiftAmount
) | (v
.low
>> (64 - shiftAmount
)),
157 v
.low
<< shiftAmount
) :
158 shiftAmount
== 64 ? UInt128(v
.low
, 0) :
159 UInt128(v
.low
<< (shiftAmount
- 64), 0);
161 constexpr UInt128
&operator<<=(unsigned shiftAmount
) noexcept
163 return *this = *this << shiftAmount
;
165 friend constexpr UInt128
operator>>(UInt128 v
, unsigned shiftAmount
) noexcept
167 assert(shiftAmount
< 128);
168 return shiftAmount
== 0 ? v
: shiftAmount
< 64 ?
169 UInt128(v
.high
>> shiftAmount
,
170 (v
.low
>> shiftAmount
) | (v
.high
<< (64 - shiftAmount
))) :
171 shiftAmount
== 64 ? UInt128(0, v
.high
) :
172 UInt128(0, v
.high
>> (shiftAmount
- 64));
174 constexpr UInt128
&operator>>=(unsigned shiftAmount
) noexcept
176 return *this = *this >> shiftAmount
;
178 constexpr UInt128
operator+() noexcept
182 constexpr UInt128
operator~() noexcept
184 return UInt128(~high
, ~low
);
186 constexpr UInt128
operator-() noexcept
188 return low
!= 0 ? UInt128(~high
, -low
) : UInt128(-high
, 0);
190 friend constexpr bool operator==(UInt128 a
, UInt128 b
) noexcept
192 return a
.high
== b
.high
&& a
.low
== b
.low
;
194 friend constexpr bool operator!=(UInt128 a
, UInt128 b
) noexcept
196 return a
.high
!= b
.high
|| a
.low
!= b
.low
;
198 friend constexpr bool operator<(UInt128 a
, UInt128 b
) noexcept
200 return a
.high
< b
.high
|| (a
.high
== b
.high
&& a
.low
< b
.low
);
202 friend constexpr bool operator<=(UInt128 a
, UInt128 b
) noexcept
204 return a
.high
< b
.high
|| (a
.high
== b
.high
&& a
.low
<= b
.low
);
206 friend constexpr bool operator>(UInt128 a
, UInt128 b
) noexcept
208 return a
.high
> b
.high
|| (a
.high
== b
.high
&& a
.low
> b
.low
);
210 friend constexpr bool operator>=(UInt128 a
, UInt128 b
) noexcept
212 return a
.high
> b
.high
|| (a
.high
== b
.high
&& a
.low
>= b
.low
);
214 friend constexpr unsigned clz128(UInt128 v
) noexcept
216 return v
.high
== 0 ? 64 + clz64(v
.low
) : clz64(v
.high
);
218 friend constexpr unsigned ctz128(UInt128 v
) noexcept
220 return v
.low
== 0 ? 64 + ctz64(v
.high
) : ctz64(v
.low
);
224 struct UInt128::DivModResult final
228 constexpr DivModResult(UInt128 divResult
, UInt128 modResult
) noexcept
: divResult(divResult
),
234 constexpr UInt128::DivModResult
UInt128::divmod(UInt128 a
, UInt128 b
) noexcept
236 constexpr std::size_t NumberSizes
= 4;
237 typedef std::uint32_t Digit
;
238 typedef std::uint64_t DoubleDigit
;
239 constexpr unsigned DigitBitCount
= 32;
240 struct DigitCLZFn final
242 constexpr unsigned operator()(Digit v
) const noexcept
247 constexpr Digit DigitMax
= (static_cast<DoubleDigit
>(1) << DigitBitCount
) - 1;
248 const Digit numerator
[NumberSizes
] = {
249 static_cast<Digit
>(a
.high
>> DigitBitCount
),
250 static_cast<Digit
>(a
.high
& DigitMax
),
251 static_cast<Digit
>(a
.low
>> DigitBitCount
),
252 static_cast<Digit
>(a
.low
& DigitMax
),
254 const Digit denominator
[NumberSizes
] = {
255 static_cast<Digit
>(b
.high
>> DigitBitCount
),
256 static_cast<Digit
>(b
.high
& DigitMax
),
257 static_cast<Digit
>(b
.low
>> DigitBitCount
),
258 static_cast<Digit
>(b
.low
& DigitMax
),
260 Digit quotient
[NumberSizes
]{};
261 Digit remainder
[NumberSizes
]{};
262 std::size_t m
= NumberSizes
;
263 for(std::size_t i
= 0; i
< NumberSizes
; i
++)
265 if(denominator
[i
] != 0)
271 const std::size_t n
= NumberSizes
- m
;
274 assert(denominator
[NumberSizes
- 1] != 0);
275 for(std::size_t i
= 0; i
< NumberSizes
- 1; i
++)
279 Digit currentRemainder
= 0;
280 for(std::size_t i
= 0; i
< NumberSizes
; i
++)
282 DoubleDigit n
= currentRemainder
;
285 quotient
[i
] = n
/ denominator
[NumberSizes
- 1];
286 currentRemainder
= n
% denominator
[NumberSizes
- 1];
288 remainder
[NumberSizes
- 1] = currentRemainder
;
292 // from algorithm D, section 4.3.1 in Art of Computer Programming volume 2 by Knuth.
293 unsigned log2D
= DigitCLZFn()(denominator
[m
]);
294 Digit u
[NumberSizes
+ 1]{};
295 u
[NumberSizes
] = (numerator
[NumberSizes
- 1] << log2D
) & DigitMax
;
296 u
[0] = ((static_cast<DoubleDigit
>(numerator
[0]) << log2D
) >> DigitBitCount
) & DigitMax
;
297 for(std::size_t i
= 1; i
< NumberSizes
; i
++)
299 DoubleDigit value
= numerator
[i
- 1];
300 value
<<= DigitBitCount
;
301 value
|= numerator
[i
];
303 u
[i
] = (value
>> DigitBitCount
) & DigitMax
;
305 Digit v
[NumberSizes
+ 1] = {};
306 v
[n
] = (denominator
[NumberSizes
- 1] << log2D
) & DigitMax
;
307 for(std::size_t i
= 1; i
< n
; i
++)
309 DoubleDigit value
= denominator
[m
+ i
- 1];
310 value
<<= DigitBitCount
;
311 value
|= denominator
[m
+ i
];
313 v
[i
] = (value
>> DigitBitCount
) & DigitMax
;
316 for(std::size_t j
= 0; j
<= m
; j
++)
325 qHat
= ((static_cast<DoubleDigit
>(u
[j
]) << DigitBitCount
) | u
[j
+ 1]) / v
[1];
328 DoubleDigit lhs
= v
[2] * qHat
;
329 DoubleDigit rhsHigh
=
330 ((static_cast<DoubleDigit
>(u
[j
]) << DigitBitCount
) | u
[j
+ 1]) - qHat
* v
[1];
331 Digit rhsLow
= u
[j
+ 2];
332 if(rhsHigh
< static_cast<DoubleDigit
>(1) << DigitBitCount
333 && lhs
> ((rhsHigh
<< DigitBitCount
) | rhsLow
))
338 if(rhsHigh
< static_cast<DoubleDigit
>(1) << DigitBitCount
339 && lhs
> ((rhsHigh
<< DigitBitCount
) | rhsLow
))
348 for(std::size_t i
= n
; i
> 0; i
--)
350 assert(i
<= NumberSizes
);
351 DoubleDigit product
= qHat
* v
[i
] + mulCarry
;
352 mulCarry
= product
>> DigitBitCount
;
354 bool prevBorrow
= borrow
;
355 DoubleDigit digit
= u
[j
+ i
] - product
- prevBorrow
;
356 borrow
= digit
!= (digit
& DigitMax
);
360 bool prevBorrow
= borrow
;
361 DoubleDigit digit
= u
[j
] - mulCarry
- prevBorrow
;
362 borrow
= digit
!= (digit
& DigitMax
);
371 for(std::size_t i
= n
; i
> 0; i
--)
373 bool prevCarry
= carry
;
374 assert(i
+ j
<= NumberSizes
);
375 DoubleDigit digit
= u
[j
+ i
] + v
[i
] + prevCarry
;
376 carry
= digit
!= (digit
& DigitMax
);
380 u
[j
] = (u
[j
] + carry
) & DigitMax
;
382 quotient
[j
+ n
- 1] = qj
;
384 for(std::size_t i
= 0; i
< NumberSizes
; i
++)
386 DoubleDigit value
= u
[i
];
387 value
<<= DigitBitCount
;
389 remainder
[i
] = value
>> log2D
;
393 UInt128((static_cast<DoubleDigit
>(quotient
[0]) << DigitBitCount
) | quotient
[1],
394 (static_cast<DoubleDigit
>(quotient
[2]) << DigitBitCount
) | quotient
[3]),
395 UInt128((static_cast<DoubleDigit
>(remainder
[0]) << DigitBitCount
) | remainder
[1],
396 (static_cast<DoubleDigit
>(remainder
[2]) << DigitBitCount
) | remainder
[3]));
399 constexpr UInt128
UInt128::div(UInt128 a
, UInt128 b
) noexcept
401 return divmod(a
, b
).divResult
;
404 constexpr UInt128
UInt128::mod(UInt128 a
, UInt128 b
) noexcept
406 return divmod(a
, b
).modResult
;
409 struct ExtendedFloat final
// modeled after IEEE754 standard
411 std::uint64_t mantissa
;
412 std::uint16_t exponent
;
414 static constexpr std::uint16_t infinityNaNExponent() noexcept
418 static constexpr std::uint16_t exponentBias() noexcept
422 static constexpr std::uint64_t normalizedMantissaMax() noexcept
424 return 0xFFFFFFFFFFFFFFFFULL
;
426 static constexpr std::uint64_t normalizedMantissaMin() noexcept
428 return 0x8000000000000000ULL
;
430 struct NormalizedTag final
433 static constexpr ExtendedFloat
normalizeHelper(const ExtendedFloat
&v
,
434 unsigned shiftAmount
) noexcept
436 return shiftAmount
> 0 && v
.exponent
>= shiftAmount
?
437 ExtendedFloat(NormalizedTag
{},
438 v
.mantissa
<< shiftAmount
,
439 v
.exponent
- shiftAmount
,
443 static constexpr ExtendedFloat
normalizeHelper(UInt128 mantissa
,
444 std::int32_t exponent
,
446 int shiftAmount
) noexcept
448 return shiftAmount
> 0 && exponent
>= shiftAmount
?
449 ExtendedFloat(NormalizedTag
{},
450 (mantissa
<< shiftAmount
).high
,
451 exponent
- shiftAmount
,
453 ExtendedFloat(NormalizedTag
{}, mantissa
.high
, exponent
, sign
);
455 static constexpr ExtendedFloat
normalize(const ExtendedFloat
&v
) noexcept
457 return v
.exponent
== infinityNaNExponent() ? v
: v
.mantissa
== 0 ?
459 normalizeHelper(v
, clz64(v
.mantissa
));
461 static constexpr ExtendedFloat
normalize(UInt128 mantissa
,
462 std::uint16_t exponent
,
465 return exponent
== infinityNaNExponent() ?
467 NormalizedTag
{}, mantissa
!= UInt128(0), infinityNaNExponent(), sign
) :
468 mantissa
== UInt128(0) ?
470 normalizeHelper(mantissa
, exponent
, sign
, clz128(mantissa
));
472 constexpr ExtendedFloat() noexcept
: mantissa(0), exponent(0), sign(false)
475 constexpr ExtendedFloat(NormalizedTag
,
476 std::uint64_t mantissa
,
477 std::uint16_t exponent
,
478 bool sign
= false) noexcept
: mantissa(mantissa
),
483 explicit constexpr ExtendedFloat(std::uint64_t mantissa
,
484 std::uint16_t exponent
= exponentBias() + 63,
485 bool sign
= false) noexcept
486 : ExtendedFloat(normalize(ExtendedFloat(NormalizedTag
{}, mantissa
, exponent
, sign
)))
489 explicit constexpr ExtendedFloat(UInt128 mantissa
,
490 std::uint16_t exponent
= exponentBias() + 127,
491 bool sign
= false) noexcept
492 : ExtendedFloat(normalize(mantissa
, exponent
, sign
))
495 explicit constexpr ExtendedFloat(std::int64_t mantissa
) noexcept
496 : ExtendedFloat(mantissa
< 0 ? -static_cast<std::uint64_t>(mantissa
) :
497 static_cast<std::uint64_t>(mantissa
),
502 explicit ExtendedFloat(double value
) noexcept
: mantissa(0),
504 sign(std::signbit(value
))
506 value
= std::fabs(value
);
507 if(std::isnan(value
))
510 exponent
= infinityNaNExponent();
513 if(std::isinf(value
))
515 exponent
= infinityNaNExponent();
525 int log2Value
= std::ilogb(value
);
526 if(log2Value
<= -static_cast<int>(exponentBias()))
529 exponent
= log2Value
+ exponentBias();
530 value
= std::scalbn(value
, 63 - static_cast<long>(exponent
) + exponentBias());
533 explicit ExtendedFloat(long double value
) noexcept
: mantissa(0),
535 sign(std::signbit(value
))
537 value
= std::fabs(value
);
538 if(std::isnan(value
))
541 exponent
= infinityNaNExponent();
544 if(std::isinf(value
))
546 exponent
= infinityNaNExponent();
556 int log2Value
= std::ilogb(value
);
557 if(log2Value
<= -static_cast<int>(exponentBias()))
560 exponent
= log2Value
+ exponentBias();
561 value
= std::scalbn(value
, 63 - static_cast<long>(exponent
) + exponentBias());
564 explicit operator long double() const noexcept
566 if(exponent
== infinityNaNExponent())
568 double retval
= std::numeric_limits
<double>::infinity();
570 retval
= std::numeric_limits
<double>::quiet_NaN();
581 long double value
= std::scalbln(static_cast<long double>(mantissa
),
582 static_cast<long>(exponent
) - exponentBias() - 63);
587 explicit operator double() const noexcept
589 if(exponent
== infinityNaNExponent())
591 double retval
= std::numeric_limits
<double>::infinity();
593 retval
= std::numeric_limits
<double>::quiet_NaN();
604 double value
= std::scalbln(static_cast<double>(mantissa
),
605 static_cast<long>(exponent
) - exponentBias() - 63);
610 constexpr bool isNaN() const noexcept
612 return exponent
== infinityNaNExponent() && mantissa
!= 0;
614 constexpr bool isInfinite() const noexcept
616 return exponent
== infinityNaNExponent() && mantissa
== 0;
618 constexpr bool isFinite() const noexcept
620 return exponent
!= infinityNaNExponent();
622 constexpr bool isNormal() const noexcept
624 return exponent
!= infinityNaNExponent() && exponent
!= 0;
626 constexpr bool isDenormal() const noexcept
628 return exponent
== 0 && mantissa
!= 0;
630 constexpr bool isZero() const noexcept
632 return exponent
== 0 && mantissa
== 0;
634 constexpr bool signBit() const noexcept
638 static constexpr ExtendedFloat
NaN() noexcept
640 return ExtendedFloat(NormalizedTag
{}, 1, infinityNaNExponent());
642 static constexpr ExtendedFloat
One() noexcept
644 return ExtendedFloat(NormalizedTag
{}, 0x8000000000000000ULL
, exponentBias());
646 static constexpr ExtendedFloat
TwoToThe64() noexcept
648 return ExtendedFloat(NormalizedTag
{}, 0x8000000000000000ULL
, exponentBias() + 64);
650 static constexpr ExtendedFloat
Infinity(bool sign
= false) noexcept
652 return ExtendedFloat(NormalizedTag
{}, 0, infinityNaNExponent(), sign
);
654 static constexpr ExtendedFloat
Zero(bool sign
= false) noexcept
656 return ExtendedFloat(NormalizedTag
{}, 0, 0, sign
);
658 constexpr ExtendedFloat
operator+() const noexcept
662 constexpr ExtendedFloat
operator-() const noexcept
664 return ExtendedFloat(NormalizedTag
{}, mantissa
, exponent
, !sign
);
666 static constexpr UInt128
shiftHelper(std::uint64_t a
, unsigned shift
) noexcept
668 return shift
>= 128 ? UInt128(0) : UInt128(a
, 0) >> shift
;
670 static constexpr UInt128
finalRoundHelper(UInt128 v
) noexcept
672 return v
.low
== 0x8000000000000000ULL
&& (v
.high
& 1) == 0 ?
674 ((v
>> 1) + UInt128(0x4000000000000000ULL
)) >> 63;
676 static constexpr ExtendedFloat
subtractHelper6(UInt128 mantissa
,
677 std::uint16_t exponent
,
681 return ExtendedFloat(finalRoundHelper(mantissa
<< shift
), exponent
- shift
+ 64, sign
);
683 static constexpr ExtendedFloat
subtractHelper5(UInt128 mantissa
,
684 std::uint16_t exponent
,
688 return subtractHelper6(mantissa
, exponent
, sign
, shift
> exponent
? exponent
: shift
);
690 static constexpr ExtendedFloat
subtractHelper4(UInt128 mantissa
,
691 std::uint16_t exponent
,
694 return subtractHelper5(mantissa
, exponent
, sign
, clz128(mantissa
));
696 static constexpr ExtendedFloat
subtractHelper3(UInt128 aMantissa
,
698 std::uint16_t exponent
) noexcept
700 return aMantissa
== bMantissa
? Zero() : aMantissa
< bMantissa
?
701 subtractHelper4(bMantissa
- aMantissa
, exponent
, true) :
702 subtractHelper4(aMantissa
- bMantissa
, exponent
, false);
704 static constexpr ExtendedFloat
subtractHelper2(std::uint64_t aMantissa
,
705 std::uint16_t aExponent
,
706 std::uint64_t bMantissa
,
707 std::uint16_t bExponent
,
708 std::uint16_t maxExponent
) noexcept
710 return subtractHelper3(shiftHelper(aMantissa
, maxExponent
- aExponent
),
711 shiftHelper(bMantissa
, maxExponent
- bExponent
),
714 static constexpr ExtendedFloat
subtractHelper(std::uint64_t aMantissa
,
715 std::uint16_t aExponent
,
716 std::uint64_t bMantissa
,
717 std::uint16_t bExponent
) noexcept
719 return subtractHelper2(aMantissa
,
723 aExponent
< bExponent
? bExponent
: aExponent
);
725 static constexpr ExtendedFloat
addHelper3(UInt128 mantissa
,
726 std::uint16_t exponent
,
729 return mantissa
>= UInt128(0x8000000000000000ULL
, 0) ?
730 (exponent
+ 1 == infinityNaNExponent() ?
732 ExtendedFloat(finalRoundHelper(mantissa
), exponent
+ 65, sign
)) :
733 ExtendedFloat(finalRoundHelper(mantissa
<< 1), exponent
+ 64, sign
);
735 static constexpr ExtendedFloat
addHelper2(std::uint64_t aMantissa
,
736 std::uint16_t aExponent
,
737 std::uint64_t bMantissa
,
738 std::uint16_t bExponent
,
739 std::uint16_t maxExponent
,
742 return addHelper3(shiftHelper(aMantissa
, maxExponent
- aExponent
+ 1)
743 + shiftHelper(bMantissa
, maxExponent
- bExponent
+ 1),
747 static constexpr ExtendedFloat
addHelper(std::uint64_t aMantissa
,
748 std::uint16_t aExponent
,
749 std::uint64_t bMantissa
,
750 std::uint16_t bExponent
,
753 return addHelper2(aMantissa
,
757 aExponent
< bExponent
? bExponent
: aExponent
,
760 constexpr friend ExtendedFloat
operator+(const ExtendedFloat
&a
,
761 const ExtendedFloat
&b
) noexcept
763 return a
.isNaN() ? a
: b
.isNaN() ?
766 (b
.isInfinite() ? (a
.sign
== b
.sign
? a
: NaN()) : a
) :
770 (b
.isZero() ? Zero(a
.sign
&& b
.sign
) : b
) :
774 addHelper(a
.mantissa
, a
.exponent
, b
.mantissa
, b
.exponent
, a
.sign
) :
775 a
.sign
? subtractHelper(b
.mantissa
, b
.exponent
, a
.mantissa
, a
.exponent
) :
776 subtractHelper(a
.mantissa
, a
.exponent
, b
.mantissa
, b
.exponent
);
778 constexpr friend ExtendedFloat
operator-(const ExtendedFloat
&a
,
779 const ExtendedFloat
&b
) noexcept
781 return a
+ b
.operator-();
783 constexpr ExtendedFloat
&operator+=(const ExtendedFloat
&v
) noexcept
785 return *this = *this + v
;
787 constexpr ExtendedFloat
&operator-=(const ExtendedFloat
&v
) noexcept
789 return *this = *this - v
;
791 friend constexpr bool operator==(const ExtendedFloat
&a
, const ExtendedFloat
&b
) noexcept
793 return a
.isNaN() ? false : b
.isNaN() ? false : a
.isZero() ?
795 a
.exponent
== b
.exponent
&& a
.mantissa
== b
.mantissa
;
797 friend constexpr bool operator!=(const ExtendedFloat
&a
, const ExtendedFloat
&b
) noexcept
801 static constexpr int compareHelper(const ExtendedFloat
&a
, const ExtendedFloat
&b
) noexcept
803 return a
.isZero() ? (b
.isZero() ? 0 : (b
.sign
? 1 : -1)) : a
.sign
!= b
.sign
?
805 a
.exponent
!= b
.exponent
?
806 ((a
.exponent
< b
.exponent
) != a
.sign
? -1 : 1) :
807 a
.mantissa
== b
.mantissa
? 0 :
808 (a
.mantissa
< b
.mantissa
) != a
.sign
? -1 : 1;
810 friend constexpr bool operator<(const ExtendedFloat
&a
, const ExtendedFloat
&b
) noexcept
812 return a
.isNaN() ? false : b
.isNaN() ? false : compareHelper(a
, b
) < 0;
814 friend constexpr bool operator<=(const ExtendedFloat
&a
, const ExtendedFloat
&b
) noexcept
816 return a
.isNaN() ? false : b
.isNaN() ? false : compareHelper(a
, b
) <= 0;
818 friend constexpr bool operator>(const ExtendedFloat
&a
, const ExtendedFloat
&b
) noexcept
820 return a
.isNaN() ? false : b
.isNaN() ? false : compareHelper(a
, b
) > 0;
822 friend constexpr bool operator>=(const ExtendedFloat
&a
, const ExtendedFloat
&b
) noexcept
824 return a
.isNaN() ? false : b
.isNaN() ? false : compareHelper(a
, b
) >= 0;
826 static constexpr ExtendedFloat
mulHelper4(UInt128 mantissa
,
827 std::int32_t exponent
,
830 return exponent
>= infinityNaNExponent() ?
834 exponent
< 0 ? ExtendedFloat(finalRoundHelper(mantissa
>> -exponent
), 64, sign
) :
835 ExtendedFloat(finalRoundHelper(mantissa
), exponent
+ 64, sign
);
837 static constexpr ExtendedFloat
mulHelper3(UInt128 mantissa
,
838 std::int32_t exponent
,
840 unsigned shift
) noexcept
842 return mulHelper4(mantissa
<< shift
, exponent
- shift
, sign
);
844 static constexpr ExtendedFloat
mulHelper2(UInt128 mantissa
,
845 std::int32_t exponent
,
848 return mantissa
== UInt128(0) ? Zero(sign
) :
849 mulHelper3(mantissa
, exponent
, sign
, clz128(mantissa
));
851 static constexpr ExtendedFloat
mulHelper(std::uint64_t aMantissa
,
852 std::int32_t aExponent
,
853 std::uint64_t bMantissa
,
854 std::int32_t bExponent
,
857 return mulHelper2(UInt128(aMantissa
) * UInt128(bMantissa
),
858 aExponent
+ bExponent
- exponentBias() + 1,
861 constexpr friend ExtendedFloat
operator*(const ExtendedFloat
&a
,
862 const ExtendedFloat
&b
) noexcept
864 return a
.isNaN() ? a
: b
.isNaN() ?
867 (b
.isZero() ? NaN() : Infinity(a
.sign
!= b
.sign
)) :
869 (a
.isZero() ? NaN() : Infinity(a
.sign
!= b
.sign
)) :
871 a
.mantissa
, a
.exponent
, b
.mantissa
, b
.exponent
, a
.sign
!= b
.sign
);
873 constexpr ExtendedFloat
&operator*=(const ExtendedFloat
&v
) noexcept
875 return *this = *this * v
;
877 static constexpr int compareU128(UInt128 a
, UInt128 b
) noexcept
879 return a
== b
? 0 : a
< b
? -1 : 1;
881 static constexpr ExtendedFloat
divHelper6(UInt128 mantissa
,
882 std::int32_t exponent
,
885 return exponent
>= infinityNaNExponent() ?
889 exponent
< 0 ? ExtendedFloat(finalRoundHelper(mantissa
>> -exponent
), 64, sign
) :
890 ExtendedFloat(finalRoundHelper(mantissa
), exponent
+ 64, sign
);
892 static constexpr ExtendedFloat
divHelper5(UInt128 quotient
,
894 int roundExtraBitsCompareValue
,
895 std::int32_t exponent
,
899 ((quotient
<< 2) | UInt128(static_cast<std::uint64_t>(2 - roundExtraBitsCompareValue
)))
901 exponent
- shift
+ 64,
904 static constexpr ExtendedFloat
divHelper4(UInt128::DivModResult mantissa
,
905 std::uint64_t bMantissa
,
906 std::int32_t exponent
,
909 return divHelper5(mantissa
.divResult
,
910 clz128(mantissa
.divResult
),
911 compareU128(UInt128(bMantissa
), mantissa
.modResult
<< 1),
915 static constexpr ExtendedFloat
divHelper3(std::uint64_t aMantissa
,
916 std::uint64_t bMantissa
,
917 std::int32_t exponent
,
921 UInt128::divmod(UInt128(aMantissa
, 0), UInt128(bMantissa
)), bMantissa
, exponent
, sign
);
923 static constexpr ExtendedFloat
divHelper2(std::uint64_t aMantissa
,
924 std::int32_t aExponent
,
926 std::uint64_t bMantissa
,
927 std::int32_t bExponent
,
931 return divHelper3(aMantissa
<< aShift
,
933 aExponent
- aShift
- (bExponent
- bShift
) + exponentBias() - 1,
936 static constexpr ExtendedFloat
divHelper(std::uint64_t aMantissa
,
937 std::int32_t aExponent
,
938 std::uint64_t bMantissa
,
939 std::int32_t bExponent
,
943 aMantissa
, aExponent
, clz64(aMantissa
), bMantissa
, bExponent
, clz64(bMantissa
), sign
);
945 friend constexpr ExtendedFloat
operator/(const ExtendedFloat
&a
,
946 const ExtendedFloat
&b
) noexcept
948 return a
.isNaN() ? a
: b
.isNaN() ?
951 (b
.isInfinite() ? NaN() : Infinity(a
.sign
!= b
.sign
)) :
953 (a
.isZero() ? NaN() : Infinity(a
.sign
!= b
.sign
)) :
954 b
.isInfinite() || a
.isZero() ?
955 Zero(a
.sign
!= b
.sign
) :
957 a
.mantissa
, a
.exponent
, b
.mantissa
, b
.exponent
, a
.sign
!= b
.sign
);
959 constexpr ExtendedFloat
&operator/=(const ExtendedFloat
&v
) noexcept
961 return *this = *this / v
;
963 static constexpr ExtendedFloat
floorCeilHelper2(std::uint64_t mantissa
,
964 std::int32_t exponent
) noexcept
966 return exponent
>= infinityNaNExponent() ?
971 ExtendedFloat(finalRoundHelper(UInt128(mantissa
, 0) >> -exponent
), 64) :
972 ExtendedFloat(finalRoundHelper(UInt128(mantissa
, 0)), exponent
+ 64);
974 static constexpr ExtendedFloat
floorCeilHelper(UInt128 mantissa
, std::int32_t exponent
) noexcept
976 return mantissa
.high
!= 0 ? floorCeilHelper2((mantissa
>> 1).low
, exponent
+ 1) :
977 floorCeilHelper2(mantissa
.low
, exponent
);
979 static constexpr ExtendedFloat
ceilHelper2(UInt128 mantissa
) noexcept
981 return mantissa
.low
!= 0 ? (mantissa
.high
== ~static_cast<std::uint64_t>(0) ?
983 ExtendedFloat(mantissa
.high
+ 1)) :
984 ExtendedFloat(mantissa
.high
);
986 static constexpr ExtendedFloat
ceilHelper(std::uint64_t mantissa
,
987 std::int32_t exponent
) noexcept
989 return exponent
< exponentBias() ?
991 exponent
>= exponentBias() + 63 ?
992 ExtendedFloat(NormalizedTag
{}, mantissa
, exponent
) :
993 ceilHelper2(UInt128(mantissa
, 0) >> (exponentBias() - exponent
+ 63));
995 static constexpr ExtendedFloat
floorHelper2(UInt128 mantissa
) noexcept
997 return ExtendedFloat(mantissa
.high
);
999 static constexpr ExtendedFloat
floorHelper(std::uint64_t mantissa
,
1000 std::int32_t exponent
) noexcept
1002 return exponent
< exponentBias() ?
1004 exponent
>= exponentBias() + 63 ?
1005 ExtendedFloat(NormalizedTag
{}, mantissa
, exponent
) :
1006 floorHelper2(UInt128(mantissa
, 0) >> (exponentBias() - exponent
+ 63));
1008 constexpr friend ExtendedFloat
floor(const ExtendedFloat
&v
) noexcept
1010 return !v
.isFinite() || v
.isZero() ? v
: v
.sign
? -ceilHelper(v
.mantissa
, v
.exponent
) :
1011 floorHelper(v
.mantissa
, v
.exponent
);
1013 constexpr friend ExtendedFloat
trunc(const ExtendedFloat
&v
) noexcept
1015 return !v
.isFinite() || v
.isZero() ? v
: v
.sign
? -floorHelper(v
.mantissa
, v
.exponent
) :
1016 floorHelper(v
.mantissa
, v
.exponent
);
1018 constexpr friend ExtendedFloat
ceil(const ExtendedFloat
&v
) noexcept
1020 return !v
.isFinite() || v
.isZero() ? v
: v
.sign
? -floorHelper(v
.mantissa
, v
.exponent
) :
1021 ceilHelper(v
.mantissa
, v
.exponent
);
1023 static constexpr ExtendedFloat
roundHelper(std::uint64_t mantissa
,
1024 std::int32_t exponent
) noexcept
1026 return exponent
< exponentBias() - 2 ?
1028 exponent
>= exponentBias() + 63 ?
1029 ExtendedFloat(NormalizedTag
{}, mantissa
, exponent
) :
1030 ExtendedFloat(((UInt128(mantissa
, 0) >> (exponentBias() - exponent
+ 64))
1031 + UInt128(0x4000000000000000ULL
))
1034 constexpr friend ExtendedFloat
round(const ExtendedFloat
&v
) noexcept
1036 return !v
.isFinite() || v
.isZero() ? v
: v
.sign
? -roundHelper(v
.mantissa
, v
.exponent
) :
1037 roundHelper(v
.mantissa
, v
.exponent
);
1039 explicit constexpr operator std::uint64_t() const noexcept
1041 return isNaN() ? 0 : isInfinite() ?
1042 (sign
? 0 : ~static_cast<std::uint64_t>(0)) :
1043 exponent
< exponentBias() || sign
?
1045 *this >= TwoToThe64() ?
1046 ~static_cast<std::uint64_t>(0) :
1047 (UInt128(mantissa
, 0) >> (exponentBias() - exponent
+ 63)).high
;
1049 static constexpr std::int64_t toInt64Helper(bool sign
, std::uint64_t uint64Value
) noexcept
1051 return sign
? (uint64Value
> 0x8000000000000000ULL
?
1052 -static_cast<std::int64_t>(0x7FFFFFFFFFFFFFFFULL
) - 1 :
1053 -static_cast<std::int64_t>(uint64Value
)) :
1054 uint64Value
>= 0x8000000000000000ULL
?
1055 static_cast<std::int64_t>(0x7FFFFFFFFFFFFFFFULL
) :
1056 static_cast<std::int64_t>(uint64Value
);
1058 explicit constexpr operator std::int64_t() const noexcept
1060 return isNaN() ? 0 : sign
? toInt64Helper(true, static_cast<std::uint64_t>(operator-())) :
1061 toInt64Helper(false, static_cast<std::uint64_t>(*this));
1063 static constexpr ExtendedFloat
powHelper(const ExtendedFloat
&base
,
1064 const ExtendedFloat
¤tValue
,
1065 std::uint64_t exponent
) noexcept
1067 return exponent
== 0 ? currentValue
: exponent
== 1 ?
1068 currentValue
* base
:
1070 currentValue
* (base
* base
) :
1072 powHelper(base
* base
, currentValue
* base
, exponent
>> 1) :
1073 powHelper(base
* base
, currentValue
, exponent
>> 1);
1075 constexpr friend ExtendedFloat
pow(const ExtendedFloat
&base
, std::uint64_t exponent
) noexcept
1077 return powHelper(base
, One(), exponent
);
1079 friend ExtendedFloat
pow(const ExtendedFloat
&base
, std::int64_t exponent
) noexcept
1081 return exponent
< 0 ? powHelper(One() / base
, One(), -exponent
) :
1082 powHelper(base
, One(), exponent
);
1084 constexpr friend int ilogb(const ExtendedFloat
&v
) noexcept
1086 return v
.isNaN() ? FP_ILOGBNAN
: v
.isZero() ? FP_ILOGB0
: v
.isInfinite() ?
1087 std::numeric_limits
<int>::max() :
1088 static_cast<std::int32_t>(v
.exponent
)
1090 - clz64(v
.mantissa
);
1092 static constexpr ExtendedFloat
scalbnHelper(std::uint64_t mantissa
,
1093 std::int64_t exponent
,
1096 return exponent
>= infinityNaNExponent() ?
1101 ExtendedFloat(finalRoundHelper(UInt128(mantissa
, 0) >> -exponent
), 64, sign
) :
1102 ExtendedFloat(finalRoundHelper(UInt128(mantissa
, 0)), exponent
+ 64, sign
);
1104 constexpr friend ExtendedFloat
scalbn(const ExtendedFloat
&v
, std::int64_t exponent
) noexcept
1106 return !v
.isFinite() || v
.isZero() ? v
: scalbnHelper(
1107 v
.mantissa
, v
.exponent
+ exponent
, v
.sign
);
1109 static constexpr std::uint64_t log2Helper4(UInt128 mantissa
) noexcept
1111 return ~mantissa
.high
== 0
1112 || ((mantissa
.high
& 1) == 0 && mantissa
.low
== 0x8000000000000000ULL
) ?
1114 (mantissa
+ UInt128(0x8000000000000000ULL
)).high
;
1116 static constexpr UInt128
log2Helper3(UInt128 mantissa
, unsigned bitsLeft
) noexcept
1118 return (bitsLeft
> 0 ?
1120 log2Helper4(mantissa
<< (mantissa
.high
& 0x8000000000000000ULL
? 0 : 1)),
1124 | UInt128(mantissa
.high
& 0x8000000000000000ULL
, 0);
1126 static constexpr UInt128
log2Helper2(std::uint64_t mantissa
, unsigned bitsLeft
) noexcept
1128 return log2Helper3(UInt128(mantissa
) * UInt128(mantissa
), bitsLeft
);
1130 static constexpr ExtendedFloat
log2Helper(const ExtendedFloat
&v
, unsigned shift
) noexcept
1132 return ExtendedFloat(finalRoundHelper(log2Helper2(v
.mantissa
<< shift
, 67)),
1133 exponentBias() - 1 + 64,
1135 + ExtendedFloat(static_cast<std::int64_t>(v
.exponent
) - exponentBias() - shift
);
1137 constexpr friend ExtendedFloat
log2(const ExtendedFloat
&v
) noexcept
1139 return v
.isNaN() ? v
: v
.isZero() ? Infinity(true) : v
.sign
?
1141 v
.isInfinite() ? v
: log2Helper(v
, clz64(v
.mantissa
));
1143 static constexpr ExtendedFloat
Log10Of2() noexcept
1145 return ExtendedFloat(NormalizedTag
{}, 0x9A209A84FBCFF799ULL
, exponentBias() - 2);
1147 static constexpr ExtendedFloat
LogOf2() noexcept
1149 return ExtendedFloat(NormalizedTag
{}, 0xB17217F7D1CF79ACULL
, exponentBias() - 1);
1151 constexpr friend ExtendedFloat
log10(const ExtendedFloat
&v
) noexcept
1153 return log2(v
) * Log10Of2();
1155 constexpr friend ExtendedFloat
log(const ExtendedFloat
&v
) noexcept
1157 return log2(v
) * LogOf2();
1164 #endif /* UTIL_SOFT_FLOAT_H_ */