working on util::filesystem
[kazan.git] / src / util / soft_float.h
1 /*
2 * Copyright 2016-2017 Jacob Lifshay
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a copy
5 * of this software and associated documentation files (the "Software"), to deal
6 * in the Software without restriction, including without limitation the rights
7 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8 * copies of the Software, and to permit persons to whom the Software is
9 * furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in all
12 * copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
20 * SOFTWARE.
21 *
22 */
23
24 // derived from
25 // https://github.com/programmerjake/javascript-tasklets/blob/master/javascript_tasklets/soft_float.h
26
27 #ifndef UTIL_SOFT_FLOAT_H_
28 #define UTIL_SOFT_FLOAT_H_
29
30 #include <cstdint>
31 #include <cmath>
32 #include <cassert>
33 #include "bit_intrinsics.h"
34
35 namespace vulkan_cpu
36 {
37 namespace util
38 {
39 namespace soft_float
40 {
41 struct UInt128 final
42 {
43 std::uint64_t low;
44 std::uint64_t high;
45 constexpr UInt128(std::uint64_t high, std::uint64_t low) noexcept : low(low), high(high)
46 {
47 }
48 constexpr explicit UInt128(std::uint64_t low = 0) noexcept : low(low), high(0)
49 {
50 }
51 static constexpr bool addCarries(std::uint64_t a, std::uint64_t b) noexcept
52 {
53 return static_cast<std::uint64_t>(a + b) < a;
54 }
55 static constexpr bool subBorrows(std::uint64_t a, std::uint64_t b) noexcept
56 {
57 return static_cast<std::uint64_t>(a - b) > a;
58 }
59 friend constexpr UInt128 operator+(UInt128 a, UInt128 b) noexcept
60 {
61 return UInt128(a.high + b.high + addCarries(a.low, b.low), a.low + b.low);
62 }
63 constexpr UInt128 &operator+=(UInt128 v) noexcept
64 {
65 return *this = *this + v;
66 }
67 friend constexpr UInt128 operator-(UInt128 a, UInt128 b) noexcept
68 {
69 return UInt128(a.high - b.high - subBorrows(a.low, b.low), a.low - b.low);
70 }
71 constexpr UInt128 &operator-=(UInt128 v) noexcept
72 {
73 return *this = *this - v;
74 }
75
76 private:
77 static constexpr std::uint64_t multiplyHighHelper2(std::uint64_t h,
78 std::uint64_t m1,
79 std::uint64_t m2,
80 std::uint64_t l) noexcept
81 {
82 return (UInt128(h, l) + UInt128(m1 >> 32, m1 << 32) + UInt128(m2 >> 32, m2 << 32)).high;
83 }
84 static constexpr std::uint64_t multiplyHighHelper1(std::uint32_t ah,
85 std::uint32_t al,
86 std::uint32_t bh,
87 std::uint32_t bl) noexcept
88 {
89 return multiplyHighHelper2(static_cast<std::uint64_t>(ah) * bh,
90 static_cast<std::uint64_t>(ah) * bl,
91 static_cast<std::uint64_t>(al) * bh,
92 static_cast<std::uint64_t>(al) * bl);
93 }
94
95 public:
96 static constexpr std::uint64_t multiplyHigh(std::uint64_t a, std::uint64_t b) noexcept
97 {
98 return multiplyHighHelper1(a >> 32, a, b >> 32, b);
99 }
100 friend constexpr UInt128 operator*(UInt128 a, UInt128 b) noexcept
101 {
102 return UInt128(a.high * b.low + a.low * b.high + multiplyHigh(a.low, b.low), a.low * b.low);
103 }
104 constexpr UInt128 &operator*=(UInt128 v) noexcept
105 {
106 return *this = *this * v;
107 }
108 struct DivModResult;
109 static constexpr DivModResult divmod(UInt128 a, UInt128 b) noexcept;
110 static constexpr UInt128 div(UInt128 a, UInt128 b) noexcept;
111 static constexpr UInt128 mod(UInt128 a, UInt128 b) noexcept;
112 friend constexpr UInt128 operator/(UInt128 a, UInt128 b) noexcept
113 {
114 return div(a, b);
115 }
116 friend constexpr UInt128 operator%(UInt128 a, UInt128 b) noexcept
117 {
118 return mod(a, b);
119 }
120 constexpr UInt128 &operator/=(UInt128 v) noexcept
121 {
122 return *this = *this / v;
123 }
124 constexpr UInt128 &operator%=(UInt128 v) noexcept
125 {
126 return *this = *this % v;
127 }
128 friend constexpr UInt128 operator&(UInt128 a, UInt128 b) noexcept
129 {
130 return UInt128(a.high & b.high, a.low & b.low);
131 }
132 constexpr UInt128 &operator&=(UInt128 v) noexcept
133 {
134 return *this = *this & v;
135 }
136 friend constexpr UInt128 operator|(UInt128 a, UInt128 b) noexcept
137 {
138 return UInt128(a.high | b.high, a.low | b.low);
139 }
140 constexpr UInt128 &operator|=(UInt128 v) noexcept
141 {
142 return *this = *this | v;
143 }
144 friend constexpr UInt128 operator^(UInt128 a, UInt128 b) noexcept
145 {
146 return UInt128(a.high ^ b.high, a.low ^ b.low);
147 }
148 constexpr UInt128 &operator^=(UInt128 v) noexcept
149 {
150 return *this = *this ^ v;
151 }
152 friend constexpr UInt128 operator<<(UInt128 v, unsigned shiftAmount) noexcept
153 {
154 assert(shiftAmount < 128);
155 return shiftAmount == 0 ? v : shiftAmount < 64 ?
156 UInt128((v.high << shiftAmount) | (v.low >> (64 - shiftAmount)),
157 v.low << shiftAmount) :
158 shiftAmount == 64 ? UInt128(v.low, 0) :
159 UInt128(v.low << (shiftAmount - 64), 0);
160 }
161 constexpr UInt128 &operator<<=(unsigned shiftAmount) noexcept
162 {
163 return *this = *this << shiftAmount;
164 }
165 friend constexpr UInt128 operator>>(UInt128 v, unsigned shiftAmount) noexcept
166 {
167 assert(shiftAmount < 128);
168 return shiftAmount == 0 ? v : shiftAmount < 64 ?
169 UInt128(v.high >> shiftAmount,
170 (v.low >> shiftAmount) | (v.high << (64 - shiftAmount))) :
171 shiftAmount == 64 ? UInt128(0, v.high) :
172 UInt128(0, v.high >> (shiftAmount - 64));
173 }
174 constexpr UInt128 &operator>>=(unsigned shiftAmount) noexcept
175 {
176 return *this = *this >> shiftAmount;
177 }
178 constexpr UInt128 operator+() noexcept
179 {
180 return *this;
181 }
182 constexpr UInt128 operator~() noexcept
183 {
184 return UInt128(~high, ~low);
185 }
186 constexpr UInt128 operator-() noexcept
187 {
188 return low != 0 ? UInt128(~high, -low) : UInt128(-high, 0);
189 }
190 friend constexpr bool operator==(UInt128 a, UInt128 b) noexcept
191 {
192 return a.high == b.high && a.low == b.low;
193 }
194 friend constexpr bool operator!=(UInt128 a, UInt128 b) noexcept
195 {
196 return a.high != b.high || a.low != b.low;
197 }
198 friend constexpr bool operator<(UInt128 a, UInt128 b) noexcept
199 {
200 return a.high < b.high || (a.high == b.high && a.low < b.low);
201 }
202 friend constexpr bool operator<=(UInt128 a, UInt128 b) noexcept
203 {
204 return a.high < b.high || (a.high == b.high && a.low <= b.low);
205 }
206 friend constexpr bool operator>(UInt128 a, UInt128 b) noexcept
207 {
208 return a.high > b.high || (a.high == b.high && a.low > b.low);
209 }
210 friend constexpr bool operator>=(UInt128 a, UInt128 b) noexcept
211 {
212 return a.high > b.high || (a.high == b.high && a.low >= b.low);
213 }
214 friend constexpr unsigned clz128(UInt128 v) noexcept
215 {
216 return v.high == 0 ? 64 + clz64(v.low) : clz64(v.high);
217 }
218 friend constexpr unsigned ctz128(UInt128 v) noexcept
219 {
220 return v.low == 0 ? 64 + ctz64(v.high) : ctz64(v.low);
221 }
222 };
223
224 struct UInt128::DivModResult final
225 {
226 UInt128 divResult;
227 UInt128 modResult;
228 constexpr DivModResult(UInt128 divResult, UInt128 modResult) noexcept : divResult(divResult),
229 modResult(modResult)
230 {
231 }
232 };
233
234 constexpr UInt128::DivModResult UInt128::divmod(UInt128 a, UInt128 b) noexcept
235 {
236 constexpr std::size_t NumberSizes = 4;
237 typedef std::uint32_t Digit;
238 typedef std::uint64_t DoubleDigit;
239 constexpr unsigned DigitBitCount = 32;
240 struct DigitCLZFn final
241 {
242 constexpr unsigned operator()(Digit v) const noexcept
243 {
244 return clz32(v);
245 }
246 };
247 constexpr Digit DigitMax = (static_cast<DoubleDigit>(1) << DigitBitCount) - 1;
248 const Digit numerator[NumberSizes] = {
249 static_cast<Digit>(a.high >> DigitBitCount),
250 static_cast<Digit>(a.high & DigitMax),
251 static_cast<Digit>(a.low >> DigitBitCount),
252 static_cast<Digit>(a.low & DigitMax),
253 };
254 const Digit denominator[NumberSizes] = {
255 static_cast<Digit>(b.high >> DigitBitCount),
256 static_cast<Digit>(b.high & DigitMax),
257 static_cast<Digit>(b.low >> DigitBitCount),
258 static_cast<Digit>(b.low & DigitMax),
259 };
260 Digit quotient[NumberSizes]{};
261 Digit remainder[NumberSizes]{};
262 std::size_t m = NumberSizes;
263 for(std::size_t i = 0; i < NumberSizes; i++)
264 {
265 if(denominator[i] != 0)
266 {
267 m = i;
268 break;
269 }
270 }
271 const std::size_t n = NumberSizes - m;
272 if(n <= 1)
273 {
274 assert(denominator[NumberSizes - 1] != 0);
275 for(std::size_t i = 0; i < NumberSizes - 1; i++)
276 {
277 remainder[i] = 0;
278 }
279 Digit currentRemainder = 0;
280 for(std::size_t i = 0; i < NumberSizes; i++)
281 {
282 DoubleDigit n = currentRemainder;
283 n <<= DigitBitCount;
284 n |= numerator[i];
285 quotient[i] = n / denominator[NumberSizes - 1];
286 currentRemainder = n % denominator[NumberSizes - 1];
287 }
288 remainder[NumberSizes - 1] = currentRemainder;
289 }
290 else
291 {
292 // from algorithm D, section 4.3.1 in Art of Computer Programming volume 2 by Knuth.
293 unsigned log2D = DigitCLZFn()(denominator[m]);
294 Digit u[NumberSizes + 1]{};
295 u[NumberSizes] = (numerator[NumberSizes - 1] << log2D) & DigitMax;
296 u[0] = ((static_cast<DoubleDigit>(numerator[0]) << log2D) >> DigitBitCount) & DigitMax;
297 for(std::size_t i = 1; i < NumberSizes; i++)
298 {
299 DoubleDigit value = numerator[i - 1];
300 value <<= DigitBitCount;
301 value |= numerator[i];
302 value <<= log2D;
303 u[i] = (value >> DigitBitCount) & DigitMax;
304 }
305 Digit v[NumberSizes + 1] = {};
306 v[n] = (denominator[NumberSizes - 1] << log2D) & DigitMax;
307 for(std::size_t i = 1; i < n; i++)
308 {
309 DoubleDigit value = denominator[m + i - 1];
310 value <<= DigitBitCount;
311 value |= denominator[m + i];
312 value <<= log2D;
313 v[i] = (value >> DigitBitCount) & DigitMax;
314 quotient[i - 1] = 0;
315 }
316 for(std::size_t j = 0; j <= m; j++)
317 {
318 DoubleDigit qHat{};
319 if(u[j] == v[1])
320 {
321 qHat = DigitMax;
322 }
323 else
324 {
325 qHat = ((static_cast<DoubleDigit>(u[j]) << DigitBitCount) | u[j + 1]) / v[1];
326 }
327 {
328 DoubleDigit lhs = v[2] * qHat;
329 DoubleDigit rhsHigh =
330 ((static_cast<DoubleDigit>(u[j]) << DigitBitCount) | u[j + 1]) - qHat * v[1];
331 Digit rhsLow = u[j + 2];
332 if(rhsHigh < static_cast<DoubleDigit>(1) << DigitBitCount
333 && lhs > ((rhsHigh << DigitBitCount) | rhsLow))
334 {
335 qHat--;
336 lhs -= v[2];
337 rhsHigh += v[1];
338 if(rhsHigh < static_cast<DoubleDigit>(1) << DigitBitCount
339 && lhs > ((rhsHigh << DigitBitCount) | rhsLow))
340 {
341 qHat--;
342 }
343 }
344 }
345 bool borrow = false;
346 {
347 Digit mulCarry = 0;
348 for(std::size_t i = n; i > 0; i--)
349 {
350 assert(i <= NumberSizes);
351 DoubleDigit product = qHat * v[i] + mulCarry;
352 mulCarry = product >> DigitBitCount;
353 product &= DigitMax;
354 bool prevBorrow = borrow;
355 DoubleDigit digit = u[j + i] - product - prevBorrow;
356 borrow = digit != (digit & DigitMax);
357 digit &= DigitMax;
358 u[j + i] = digit;
359 }
360 bool prevBorrow = borrow;
361 DoubleDigit digit = u[j] - mulCarry - prevBorrow;
362 borrow = digit != (digit & DigitMax);
363 digit &= DigitMax;
364 u[j] = digit;
365 }
366 Digit qj = qHat;
367 if(borrow)
368 {
369 qj--;
370 bool carry = false;
371 for(std::size_t i = n; i > 0; i--)
372 {
373 bool prevCarry = carry;
374 assert(i + j <= NumberSizes);
375 DoubleDigit digit = u[j + i] + v[i] + prevCarry;
376 carry = digit != (digit & DigitMax);
377 digit &= DigitMax;
378 u[j + i] = digit;
379 }
380 u[j] = (u[j] + carry) & DigitMax;
381 }
382 quotient[j + n - 1] = qj;
383 }
384 for(std::size_t i = 0; i < NumberSizes; i++)
385 {
386 DoubleDigit value = u[i];
387 value <<= DigitBitCount;
388 value |= u[i + 1];
389 remainder[i] = value >> log2D;
390 }
391 }
392 return DivModResult(
393 UInt128((static_cast<DoubleDigit>(quotient[0]) << DigitBitCount) | quotient[1],
394 (static_cast<DoubleDigit>(quotient[2]) << DigitBitCount) | quotient[3]),
395 UInt128((static_cast<DoubleDigit>(remainder[0]) << DigitBitCount) | remainder[1],
396 (static_cast<DoubleDigit>(remainder[2]) << DigitBitCount) | remainder[3]));
397 }
398
399 constexpr UInt128 UInt128::div(UInt128 a, UInt128 b) noexcept
400 {
401 return divmod(a, b).divResult;
402 }
403
404 constexpr UInt128 UInt128::mod(UInt128 a, UInt128 b) noexcept
405 {
406 return divmod(a, b).modResult;
407 }
408
409 struct ExtendedFloat final // modeled after IEEE754 standard
410 {
411 std::uint64_t mantissa;
412 std::uint16_t exponent;
413 bool sign;
414 static constexpr std::uint16_t infinityNaNExponent() noexcept
415 {
416 return 0xFFFFU;
417 }
418 static constexpr std::uint16_t exponentBias() noexcept
419 {
420 return 0x7FFFU;
421 }
422 static constexpr std::uint64_t normalizedMantissaMax() noexcept
423 {
424 return 0xFFFFFFFFFFFFFFFFULL;
425 }
426 static constexpr std::uint64_t normalizedMantissaMin() noexcept
427 {
428 return 0x8000000000000000ULL;
429 }
430 struct NormalizedTag final
431 {
432 };
433 static constexpr ExtendedFloat normalizeHelper(const ExtendedFloat &v,
434 unsigned shiftAmount) noexcept
435 {
436 return shiftAmount > 0 && v.exponent >= shiftAmount ?
437 ExtendedFloat(NormalizedTag{},
438 v.mantissa << shiftAmount,
439 v.exponent - shiftAmount,
440 v.sign) :
441 v;
442 }
443 static constexpr ExtendedFloat normalizeHelper(UInt128 mantissa,
444 std::int32_t exponent,
445 bool sign,
446 int shiftAmount) noexcept
447 {
448 return shiftAmount > 0 && exponent >= shiftAmount ?
449 ExtendedFloat(NormalizedTag{},
450 (mantissa << shiftAmount).high,
451 exponent - shiftAmount,
452 sign) :
453 ExtendedFloat(NormalizedTag{}, mantissa.high, exponent, sign);
454 }
455 static constexpr ExtendedFloat normalize(const ExtendedFloat &v) noexcept
456 {
457 return v.exponent == infinityNaNExponent() ? v : v.mantissa == 0 ?
458 Zero(v.sign) :
459 normalizeHelper(v, clz64(v.mantissa));
460 }
461 static constexpr ExtendedFloat normalize(UInt128 mantissa,
462 std::uint16_t exponent,
463 bool sign) noexcept
464 {
465 return exponent == infinityNaNExponent() ?
466 ExtendedFloat(
467 NormalizedTag{}, mantissa != UInt128(0), infinityNaNExponent(), sign) :
468 mantissa == UInt128(0) ?
469 Zero(sign) :
470 normalizeHelper(mantissa, exponent, sign, clz128(mantissa));
471 }
472 constexpr ExtendedFloat() noexcept : mantissa(0), exponent(0), sign(false)
473 {
474 }
475 constexpr ExtendedFloat(NormalizedTag,
476 std::uint64_t mantissa,
477 std::uint16_t exponent,
478 bool sign = false) noexcept : mantissa(mantissa),
479 exponent(exponent),
480 sign(sign)
481 {
482 }
483 explicit constexpr ExtendedFloat(std::uint64_t mantissa,
484 std::uint16_t exponent = exponentBias() + 63,
485 bool sign = false) noexcept
486 : ExtendedFloat(normalize(ExtendedFloat(NormalizedTag{}, mantissa, exponent, sign)))
487 {
488 }
489 explicit constexpr ExtendedFloat(UInt128 mantissa,
490 std::uint16_t exponent = exponentBias() + 127,
491 bool sign = false) noexcept
492 : ExtendedFloat(normalize(mantissa, exponent, sign))
493 {
494 }
495 explicit constexpr ExtendedFloat(std::int64_t mantissa) noexcept
496 : ExtendedFloat(mantissa < 0 ? -static_cast<std::uint64_t>(mantissa) :
497 static_cast<std::uint64_t>(mantissa),
498 exponentBias() + 63,
499 mantissa < 0)
500 {
501 }
502 explicit ExtendedFloat(double value) noexcept : mantissa(0),
503 exponent(0),
504 sign(std::signbit(value))
505 {
506 value = std::fabs(value);
507 if(std::isnan(value))
508 {
509 mantissa = 1;
510 exponent = infinityNaNExponent();
511 return;
512 }
513 if(std::isinf(value))
514 {
515 exponent = infinityNaNExponent();
516 mantissa = 0;
517 return;
518 }
519 if(value == 0)
520 {
521 exponent = 0;
522 mantissa = 0;
523 return;
524 }
525 int log2Value = std::ilogb(value);
526 if(log2Value <= -static_cast<int>(exponentBias()))
527 exponent = 0;
528 else
529 exponent = log2Value + exponentBias();
530 value = std::scalbn(value, 63 - static_cast<long>(exponent) + exponentBias());
531 mantissa = value;
532 }
533 explicit ExtendedFloat(long double value) noexcept : mantissa(0),
534 exponent(0),
535 sign(std::signbit(value))
536 {
537 value = std::fabs(value);
538 if(std::isnan(value))
539 {
540 mantissa = 1;
541 exponent = infinityNaNExponent();
542 return;
543 }
544 if(std::isinf(value))
545 {
546 exponent = infinityNaNExponent();
547 mantissa = 0;
548 return;
549 }
550 if(value == 0)
551 {
552 exponent = 0;
553 mantissa = 0;
554 return;
555 }
556 int log2Value = std::ilogb(value);
557 if(log2Value <= -static_cast<int>(exponentBias()))
558 exponent = 0;
559 else
560 exponent = log2Value + exponentBias();
561 value = std::scalbn(value, 63 - static_cast<long>(exponent) + exponentBias());
562 mantissa = value;
563 }
564 explicit operator long double() const noexcept
565 {
566 if(exponent == infinityNaNExponent())
567 {
568 double retval = std::numeric_limits<double>::infinity();
569 if(mantissa)
570 retval = std::numeric_limits<double>::quiet_NaN();
571 if(sign)
572 return -retval;
573 return retval;
574 }
575 if(isZero())
576 {
577 if(sign)
578 return -0.0;
579 return 0;
580 }
581 long double value = std::scalbln(static_cast<long double>(mantissa),
582 static_cast<long>(exponent) - exponentBias() - 63);
583 if(sign)
584 return -value;
585 return value;
586 }
587 explicit operator double() const noexcept
588 {
589 if(exponent == infinityNaNExponent())
590 {
591 double retval = std::numeric_limits<double>::infinity();
592 if(mantissa)
593 retval = std::numeric_limits<double>::quiet_NaN();
594 if(sign)
595 return -retval;
596 return retval;
597 }
598 if(isZero())
599 {
600 if(sign)
601 return -0.0;
602 return 0;
603 }
604 double value = std::scalbln(static_cast<double>(mantissa),
605 static_cast<long>(exponent) - exponentBias() - 63);
606 if(sign)
607 return -value;
608 return value;
609 }
610 constexpr bool isNaN() const noexcept
611 {
612 return exponent == infinityNaNExponent() && mantissa != 0;
613 }
614 constexpr bool isInfinite() const noexcept
615 {
616 return exponent == infinityNaNExponent() && mantissa == 0;
617 }
618 constexpr bool isFinite() const noexcept
619 {
620 return exponent != infinityNaNExponent();
621 }
622 constexpr bool isNormal() const noexcept
623 {
624 return exponent != infinityNaNExponent() && exponent != 0;
625 }
626 constexpr bool isDenormal() const noexcept
627 {
628 return exponent == 0 && mantissa != 0;
629 }
630 constexpr bool isZero() const noexcept
631 {
632 return exponent == 0 && mantissa == 0;
633 }
634 constexpr bool signBit() const noexcept
635 {
636 return sign;
637 }
638 static constexpr ExtendedFloat NaN() noexcept
639 {
640 return ExtendedFloat(NormalizedTag{}, 1, infinityNaNExponent());
641 }
642 static constexpr ExtendedFloat One() noexcept
643 {
644 return ExtendedFloat(NormalizedTag{}, 0x8000000000000000ULL, exponentBias());
645 }
646 static constexpr ExtendedFloat TwoToThe64() noexcept
647 {
648 return ExtendedFloat(NormalizedTag{}, 0x8000000000000000ULL, exponentBias() + 64);
649 }
650 static constexpr ExtendedFloat Infinity(bool sign = false) noexcept
651 {
652 return ExtendedFloat(NormalizedTag{}, 0, infinityNaNExponent(), sign);
653 }
654 static constexpr ExtendedFloat Zero(bool sign = false) noexcept
655 {
656 return ExtendedFloat(NormalizedTag{}, 0, 0, sign);
657 }
658 constexpr ExtendedFloat operator+() const noexcept
659 {
660 return *this;
661 }
662 constexpr ExtendedFloat operator-() const noexcept
663 {
664 return ExtendedFloat(NormalizedTag{}, mantissa, exponent, !sign);
665 }
666 static constexpr UInt128 shiftHelper(std::uint64_t a, unsigned shift) noexcept
667 {
668 return shift >= 128 ? UInt128(0) : UInt128(a, 0) >> shift;
669 }
670 static constexpr UInt128 finalRoundHelper(UInt128 v) noexcept
671 {
672 return v.low == 0x8000000000000000ULL && (v.high & 1) == 0 ?
673 UInt128(v.high) :
674 ((v >> 1) + UInt128(0x4000000000000000ULL)) >> 63;
675 }
676 static constexpr ExtendedFloat subtractHelper6(UInt128 mantissa,
677 std::uint16_t exponent,
678 bool sign,
679 unsigned shift)
680 {
681 return ExtendedFloat(finalRoundHelper(mantissa << shift), exponent - shift + 64, sign);
682 }
683 static constexpr ExtendedFloat subtractHelper5(UInt128 mantissa,
684 std::uint16_t exponent,
685 bool sign,
686 unsigned shift)
687 {
688 return subtractHelper6(mantissa, exponent, sign, shift > exponent ? exponent : shift);
689 }
690 static constexpr ExtendedFloat subtractHelper4(UInt128 mantissa,
691 std::uint16_t exponent,
692 bool sign)
693 {
694 return subtractHelper5(mantissa, exponent, sign, clz128(mantissa));
695 }
696 static constexpr ExtendedFloat subtractHelper3(UInt128 aMantissa,
697 UInt128 bMantissa,
698 std::uint16_t exponent) noexcept
699 {
700 return aMantissa == bMantissa ? Zero() : aMantissa < bMantissa ?
701 subtractHelper4(bMantissa - aMantissa, exponent, true) :
702 subtractHelper4(aMantissa - bMantissa, exponent, false);
703 }
704 static constexpr ExtendedFloat subtractHelper2(std::uint64_t aMantissa,
705 std::uint16_t aExponent,
706 std::uint64_t bMantissa,
707 std::uint16_t bExponent,
708 std::uint16_t maxExponent) noexcept
709 {
710 return subtractHelper3(shiftHelper(aMantissa, maxExponent - aExponent),
711 shiftHelper(bMantissa, maxExponent - bExponent),
712 maxExponent);
713 }
714 static constexpr ExtendedFloat subtractHelper(std::uint64_t aMantissa,
715 std::uint16_t aExponent,
716 std::uint64_t bMantissa,
717 std::uint16_t bExponent) noexcept
718 {
719 return subtractHelper2(aMantissa,
720 aExponent,
721 bMantissa,
722 bExponent,
723 aExponent < bExponent ? bExponent : aExponent);
724 }
725 static constexpr ExtendedFloat addHelper3(UInt128 mantissa,
726 std::uint16_t exponent,
727 bool sign) noexcept
728 {
729 return mantissa >= UInt128(0x8000000000000000ULL, 0) ?
730 (exponent + 1 == infinityNaNExponent() ?
731 Infinity(sign) :
732 ExtendedFloat(finalRoundHelper(mantissa), exponent + 65, sign)) :
733 ExtendedFloat(finalRoundHelper(mantissa << 1), exponent + 64, sign);
734 }
735 static constexpr ExtendedFloat addHelper2(std::uint64_t aMantissa,
736 std::uint16_t aExponent,
737 std::uint64_t bMantissa,
738 std::uint16_t bExponent,
739 std::uint16_t maxExponent,
740 bool sign) noexcept
741 {
742 return addHelper3(shiftHelper(aMantissa, maxExponent - aExponent + 1)
743 + shiftHelper(bMantissa, maxExponent - bExponent + 1),
744 maxExponent,
745 sign);
746 }
747 static constexpr ExtendedFloat addHelper(std::uint64_t aMantissa,
748 std::uint16_t aExponent,
749 std::uint64_t bMantissa,
750 std::uint16_t bExponent,
751 bool sign) noexcept
752 {
753 return addHelper2(aMantissa,
754 aExponent,
755 bMantissa,
756 bExponent,
757 aExponent < bExponent ? bExponent : aExponent,
758 sign);
759 }
760 constexpr friend ExtendedFloat operator+(const ExtendedFloat &a,
761 const ExtendedFloat &b) noexcept
762 {
763 return a.isNaN() ? a : b.isNaN() ?
764 b :
765 a.isInfinite() ?
766 (b.isInfinite() ? (a.sign == b.sign ? a : NaN()) : a) :
767 b.isInfinite() ?
768 b :
769 a.isZero() ?
770 (b.isZero() ? Zero(a.sign && b.sign) : b) :
771 b.isZero() ?
772 a :
773 a.sign == b.sign ?
774 addHelper(a.mantissa, a.exponent, b.mantissa, b.exponent, a.sign) :
775 a.sign ? subtractHelper(b.mantissa, b.exponent, a.mantissa, a.exponent) :
776 subtractHelper(a.mantissa, a.exponent, b.mantissa, b.exponent);
777 }
778 constexpr friend ExtendedFloat operator-(const ExtendedFloat &a,
779 const ExtendedFloat &b) noexcept
780 {
781 return a + b.operator-();
782 }
783 constexpr ExtendedFloat &operator+=(const ExtendedFloat &v) noexcept
784 {
785 return *this = *this + v;
786 }
787 constexpr ExtendedFloat &operator-=(const ExtendedFloat &v) noexcept
788 {
789 return *this = *this - v;
790 }
791 friend constexpr bool operator==(const ExtendedFloat &a, const ExtendedFloat &b) noexcept
792 {
793 return a.isNaN() ? false : b.isNaN() ? false : a.isZero() ?
794 b.isZero() :
795 a.exponent == b.exponent && a.mantissa == b.mantissa;
796 }
797 friend constexpr bool operator!=(const ExtendedFloat &a, const ExtendedFloat &b) noexcept
798 {
799 return !(a == b);
800 }
801 static constexpr int compareHelper(const ExtendedFloat &a, const ExtendedFloat &b) noexcept
802 {
803 return a.isZero() ? (b.isZero() ? 0 : (b.sign ? 1 : -1)) : a.sign != b.sign ?
804 (a.sign ? -1 : 1) :
805 a.exponent != b.exponent ?
806 ((a.exponent < b.exponent) != a.sign ? -1 : 1) :
807 a.mantissa == b.mantissa ? 0 :
808 (a.mantissa < b.mantissa) != a.sign ? -1 : 1;
809 }
810 friend constexpr bool operator<(const ExtendedFloat &a, const ExtendedFloat &b) noexcept
811 {
812 return a.isNaN() ? false : b.isNaN() ? false : compareHelper(a, b) < 0;
813 }
814 friend constexpr bool operator<=(const ExtendedFloat &a, const ExtendedFloat &b) noexcept
815 {
816 return a.isNaN() ? false : b.isNaN() ? false : compareHelper(a, b) <= 0;
817 }
818 friend constexpr bool operator>(const ExtendedFloat &a, const ExtendedFloat &b) noexcept
819 {
820 return a.isNaN() ? false : b.isNaN() ? false : compareHelper(a, b) > 0;
821 }
822 friend constexpr bool operator>=(const ExtendedFloat &a, const ExtendedFloat &b) noexcept
823 {
824 return a.isNaN() ? false : b.isNaN() ? false : compareHelper(a, b) >= 0;
825 }
826 static constexpr ExtendedFloat mulHelper4(UInt128 mantissa,
827 std::int32_t exponent,
828 bool sign) noexcept
829 {
830 return exponent >= infinityNaNExponent() ?
831 Infinity(sign) :
832 exponent <= -128 ?
833 Zero(sign) :
834 exponent < 0 ? ExtendedFloat(finalRoundHelper(mantissa >> -exponent), 64, sign) :
835 ExtendedFloat(finalRoundHelper(mantissa), exponent + 64, sign);
836 }
837 static constexpr ExtendedFloat mulHelper3(UInt128 mantissa,
838 std::int32_t exponent,
839 bool sign,
840 unsigned shift) noexcept
841 {
842 return mulHelper4(mantissa << shift, exponent - shift, sign);
843 }
844 static constexpr ExtendedFloat mulHelper2(UInt128 mantissa,
845 std::int32_t exponent,
846 bool sign) noexcept
847 {
848 return mantissa == UInt128(0) ? Zero(sign) :
849 mulHelper3(mantissa, exponent, sign, clz128(mantissa));
850 }
851 static constexpr ExtendedFloat mulHelper(std::uint64_t aMantissa,
852 std::int32_t aExponent,
853 std::uint64_t bMantissa,
854 std::int32_t bExponent,
855 bool sign) noexcept
856 {
857 return mulHelper2(UInt128(aMantissa) * UInt128(bMantissa),
858 aExponent + bExponent - exponentBias() + 1,
859 sign);
860 }
861 constexpr friend ExtendedFloat operator*(const ExtendedFloat &a,
862 const ExtendedFloat &b) noexcept
863 {
864 return a.isNaN() ? a : b.isNaN() ?
865 b :
866 a.isInfinite() ?
867 (b.isZero() ? NaN() : Infinity(a.sign != b.sign)) :
868 b.isInfinite() ?
869 (a.isZero() ? NaN() : Infinity(a.sign != b.sign)) :
870 mulHelper(
871 a.mantissa, a.exponent, b.mantissa, b.exponent, a.sign != b.sign);
872 }
873 constexpr ExtendedFloat &operator*=(const ExtendedFloat &v) noexcept
874 {
875 return *this = *this * v;
876 }
877 static constexpr int compareU128(UInt128 a, UInt128 b) noexcept
878 {
879 return a == b ? 0 : a < b ? -1 : 1;
880 }
881 static constexpr ExtendedFloat divHelper6(UInt128 mantissa,
882 std::int32_t exponent,
883 bool sign) noexcept
884 {
885 return exponent >= infinityNaNExponent() ?
886 Infinity(sign) :
887 exponent <= -128 ?
888 Zero(sign) :
889 exponent < 0 ? ExtendedFloat(finalRoundHelper(mantissa >> -exponent), 64, sign) :
890 ExtendedFloat(finalRoundHelper(mantissa), exponent + 64, sign);
891 }
892 static constexpr ExtendedFloat divHelper5(UInt128 quotient,
893 unsigned shift,
894 int roundExtraBitsCompareValue,
895 std::int32_t exponent,
896 bool sign) noexcept
897 {
898 return divHelper6(
899 ((quotient << 2) | UInt128(static_cast<std::uint64_t>(2 - roundExtraBitsCompareValue)))
900 << (shift - 2),
901 exponent - shift + 64,
902 sign);
903 }
904 static constexpr ExtendedFloat divHelper4(UInt128::DivModResult mantissa,
905 std::uint64_t bMantissa,
906 std::int32_t exponent,
907 bool sign) noexcept
908 {
909 return divHelper5(mantissa.divResult,
910 clz128(mantissa.divResult),
911 compareU128(UInt128(bMantissa), mantissa.modResult << 1),
912 exponent,
913 sign);
914 }
915 static constexpr ExtendedFloat divHelper3(std::uint64_t aMantissa,
916 std::uint64_t bMantissa,
917 std::int32_t exponent,
918 bool sign) noexcept
919 {
920 return divHelper4(
921 UInt128::divmod(UInt128(aMantissa, 0), UInt128(bMantissa)), bMantissa, exponent, sign);
922 }
923 static constexpr ExtendedFloat divHelper2(std::uint64_t aMantissa,
924 std::int32_t aExponent,
925 unsigned aShift,
926 std::uint64_t bMantissa,
927 std::int32_t bExponent,
928 unsigned bShift,
929 bool sign) noexcept
930 {
931 return divHelper3(aMantissa << aShift,
932 bMantissa << bShift,
933 aExponent - aShift - (bExponent - bShift) + exponentBias() - 1,
934 sign);
935 }
936 static constexpr ExtendedFloat divHelper(std::uint64_t aMantissa,
937 std::int32_t aExponent,
938 std::uint64_t bMantissa,
939 std::int32_t bExponent,
940 bool sign) noexcept
941 {
942 return divHelper2(
943 aMantissa, aExponent, clz64(aMantissa), bMantissa, bExponent, clz64(bMantissa), sign);
944 }
945 friend constexpr ExtendedFloat operator/(const ExtendedFloat &a,
946 const ExtendedFloat &b) noexcept
947 {
948 return a.isNaN() ? a : b.isNaN() ?
949 b :
950 a.isInfinite() ?
951 (b.isInfinite() ? NaN() : Infinity(a.sign != b.sign)) :
952 b.isZero() ?
953 (a.isZero() ? NaN() : Infinity(a.sign != b.sign)) :
954 b.isInfinite() || a.isZero() ?
955 Zero(a.sign != b.sign) :
956 divHelper(
957 a.mantissa, a.exponent, b.mantissa, b.exponent, a.sign != b.sign);
958 }
959 constexpr ExtendedFloat &operator/=(const ExtendedFloat &v) noexcept
960 {
961 return *this = *this / v;
962 }
963 static constexpr ExtendedFloat floorCeilHelper2(std::uint64_t mantissa,
964 std::int32_t exponent) noexcept
965 {
966 return exponent >= infinityNaNExponent() ?
967 Infinity() :
968 exponent <= -128 ?
969 Zero() :
970 exponent < 0 ?
971 ExtendedFloat(finalRoundHelper(UInt128(mantissa, 0) >> -exponent), 64) :
972 ExtendedFloat(finalRoundHelper(UInt128(mantissa, 0)), exponent + 64);
973 }
974 static constexpr ExtendedFloat floorCeilHelper(UInt128 mantissa, std::int32_t exponent) noexcept
975 {
976 return mantissa.high != 0 ? floorCeilHelper2((mantissa >> 1).low, exponent + 1) :
977 floorCeilHelper2(mantissa.low, exponent);
978 }
979 static constexpr ExtendedFloat ceilHelper2(UInt128 mantissa) noexcept
980 {
981 return mantissa.low != 0 ? (mantissa.high == ~static_cast<std::uint64_t>(0) ?
982 TwoToThe64() :
983 ExtendedFloat(mantissa.high + 1)) :
984 ExtendedFloat(mantissa.high);
985 }
986 static constexpr ExtendedFloat ceilHelper(std::uint64_t mantissa,
987 std::int32_t exponent) noexcept
988 {
989 return exponent < exponentBias() ?
990 One() :
991 exponent >= exponentBias() + 63 ?
992 ExtendedFloat(NormalizedTag{}, mantissa, exponent) :
993 ceilHelper2(UInt128(mantissa, 0) >> (exponentBias() - exponent + 63));
994 }
995 static constexpr ExtendedFloat floorHelper2(UInt128 mantissa) noexcept
996 {
997 return ExtendedFloat(mantissa.high);
998 }
999 static constexpr ExtendedFloat floorHelper(std::uint64_t mantissa,
1000 std::int32_t exponent) noexcept
1001 {
1002 return exponent < exponentBias() ?
1003 Zero() :
1004 exponent >= exponentBias() + 63 ?
1005 ExtendedFloat(NormalizedTag{}, mantissa, exponent) :
1006 floorHelper2(UInt128(mantissa, 0) >> (exponentBias() - exponent + 63));
1007 }
1008 constexpr friend ExtendedFloat floor(const ExtendedFloat &v) noexcept
1009 {
1010 return !v.isFinite() || v.isZero() ? v : v.sign ? -ceilHelper(v.mantissa, v.exponent) :
1011 floorHelper(v.mantissa, v.exponent);
1012 }
1013 constexpr friend ExtendedFloat trunc(const ExtendedFloat &v) noexcept
1014 {
1015 return !v.isFinite() || v.isZero() ? v : v.sign ? -floorHelper(v.mantissa, v.exponent) :
1016 floorHelper(v.mantissa, v.exponent);
1017 }
1018 constexpr friend ExtendedFloat ceil(const ExtendedFloat &v) noexcept
1019 {
1020 return !v.isFinite() || v.isZero() ? v : v.sign ? -floorHelper(v.mantissa, v.exponent) :
1021 ceilHelper(v.mantissa, v.exponent);
1022 }
1023 static constexpr ExtendedFloat roundHelper(std::uint64_t mantissa,
1024 std::int32_t exponent) noexcept
1025 {
1026 return exponent < exponentBias() - 2 ?
1027 Zero() :
1028 exponent >= exponentBias() + 63 ?
1029 ExtendedFloat(NormalizedTag{}, mantissa, exponent) :
1030 ExtendedFloat(((UInt128(mantissa, 0) >> (exponentBias() - exponent + 64))
1031 + UInt128(0x4000000000000000ULL))
1032 >> 63);
1033 }
1034 constexpr friend ExtendedFloat round(const ExtendedFloat &v) noexcept
1035 {
1036 return !v.isFinite() || v.isZero() ? v : v.sign ? -roundHelper(v.mantissa, v.exponent) :
1037 roundHelper(v.mantissa, v.exponent);
1038 }
1039 explicit constexpr operator std::uint64_t() const noexcept
1040 {
1041 return isNaN() ? 0 : isInfinite() ?
1042 (sign ? 0 : ~static_cast<std::uint64_t>(0)) :
1043 exponent < exponentBias() || sign ?
1044 0 :
1045 *this >= TwoToThe64() ?
1046 ~static_cast<std::uint64_t>(0) :
1047 (UInt128(mantissa, 0) >> (exponentBias() - exponent + 63)).high;
1048 }
1049 static constexpr std::int64_t toInt64Helper(bool sign, std::uint64_t uint64Value) noexcept
1050 {
1051 return sign ? (uint64Value > 0x8000000000000000ULL ?
1052 -static_cast<std::int64_t>(0x7FFFFFFFFFFFFFFFULL) - 1 :
1053 -static_cast<std::int64_t>(uint64Value)) :
1054 uint64Value >= 0x8000000000000000ULL ?
1055 static_cast<std::int64_t>(0x7FFFFFFFFFFFFFFFULL) :
1056 static_cast<std::int64_t>(uint64Value);
1057 }
1058 explicit constexpr operator std::int64_t() const noexcept
1059 {
1060 return isNaN() ? 0 : sign ? toInt64Helper(true, static_cast<std::uint64_t>(operator-())) :
1061 toInt64Helper(false, static_cast<std::uint64_t>(*this));
1062 }
1063 static constexpr ExtendedFloat powHelper(const ExtendedFloat &base,
1064 const ExtendedFloat &currentValue,
1065 std::uint64_t exponent) noexcept
1066 {
1067 return exponent == 0 ? currentValue : exponent == 1 ?
1068 currentValue * base :
1069 exponent == 2 ?
1070 currentValue * (base * base) :
1071 exponent & 1 ?
1072 powHelper(base * base, currentValue * base, exponent >> 1) :
1073 powHelper(base * base, currentValue, exponent >> 1);
1074 }
1075 constexpr friend ExtendedFloat pow(const ExtendedFloat &base, std::uint64_t exponent) noexcept
1076 {
1077 return powHelper(base, One(), exponent);
1078 }
1079 friend ExtendedFloat pow(const ExtendedFloat &base, std::int64_t exponent) noexcept
1080 {
1081 return exponent < 0 ? powHelper(One() / base, One(), -exponent) :
1082 powHelper(base, One(), exponent);
1083 }
1084 constexpr friend int ilogb(const ExtendedFloat &v) noexcept
1085 {
1086 return v.isNaN() ? FP_ILOGBNAN : v.isZero() ? FP_ILOGB0 : v.isInfinite() ?
1087 std::numeric_limits<int>::max() :
1088 static_cast<std::int32_t>(v.exponent)
1089 - exponentBias()
1090 - clz64(v.mantissa);
1091 }
1092 static constexpr ExtendedFloat scalbnHelper(std::uint64_t mantissa,
1093 std::int64_t exponent,
1094 bool sign) noexcept
1095 {
1096 return exponent >= infinityNaNExponent() ?
1097 Infinity(sign) :
1098 exponent <= -128 ?
1099 Zero(sign) :
1100 exponent < 0 ?
1101 ExtendedFloat(finalRoundHelper(UInt128(mantissa, 0) >> -exponent), 64, sign) :
1102 ExtendedFloat(finalRoundHelper(UInt128(mantissa, 0)), exponent + 64, sign);
1103 }
1104 constexpr friend ExtendedFloat scalbn(const ExtendedFloat &v, std::int64_t exponent) noexcept
1105 {
1106 return !v.isFinite() || v.isZero() ? v : scalbnHelper(
1107 v.mantissa, v.exponent + exponent, v.sign);
1108 }
1109 static constexpr std::uint64_t log2Helper4(UInt128 mantissa) noexcept
1110 {
1111 return ~mantissa.high == 0
1112 || ((mantissa.high & 1) == 0 && mantissa.low == 0x8000000000000000ULL) ?
1113 mantissa.high :
1114 (mantissa + UInt128(0x8000000000000000ULL)).high;
1115 }
1116 static constexpr UInt128 log2Helper3(UInt128 mantissa, unsigned bitsLeft) noexcept
1117 {
1118 return (bitsLeft > 0 ?
1119 log2Helper2(
1120 log2Helper4(mantissa << (mantissa.high & 0x8000000000000000ULL ? 0 : 1)),
1121 bitsLeft - 1)
1122 >> 1 :
1123 UInt128(0))
1124 | UInt128(mantissa.high & 0x8000000000000000ULL, 0);
1125 }
1126 static constexpr UInt128 log2Helper2(std::uint64_t mantissa, unsigned bitsLeft) noexcept
1127 {
1128 return log2Helper3(UInt128(mantissa) * UInt128(mantissa), bitsLeft);
1129 }
1130 static constexpr ExtendedFloat log2Helper(const ExtendedFloat &v, unsigned shift) noexcept
1131 {
1132 return ExtendedFloat(finalRoundHelper(log2Helper2(v.mantissa << shift, 67)),
1133 exponentBias() - 1 + 64,
1134 0)
1135 + ExtendedFloat(static_cast<std::int64_t>(v.exponent) - exponentBias() - shift);
1136 }
1137 constexpr friend ExtendedFloat log2(const ExtendedFloat &v) noexcept
1138 {
1139 return v.isNaN() ? v : v.isZero() ? Infinity(true) : v.sign ?
1140 NaN() :
1141 v.isInfinite() ? v : log2Helper(v, clz64(v.mantissa));
1142 }
1143 static constexpr ExtendedFloat Log10Of2() noexcept
1144 {
1145 return ExtendedFloat(NormalizedTag{}, 0x9A209A84FBCFF799ULL, exponentBias() - 2);
1146 }
1147 static constexpr ExtendedFloat LogOf2() noexcept
1148 {
1149 return ExtendedFloat(NormalizedTag{}, 0xB17217F7D1CF79ACULL, exponentBias() - 1);
1150 }
1151 constexpr friend ExtendedFloat log10(const ExtendedFloat &v) noexcept
1152 {
1153 return log2(v) * Log10Of2();
1154 }
1155 constexpr friend ExtendedFloat log(const ExtendedFloat &v) noexcept
1156 {
1157 return log2(v) * LogOf2();
1158 }
1159 };
1160 }
1161 }
1162 }
1163
1164 #endif /* UTIL_SOFT_FLOAT_H_ */