2 * Copyright 2016-2017 Jacob Lifshay
4 * Permission is hereby granted, free of charge, to any person obtaining a copy
5 * of this software and associated documentation files (the "Software"), to deal
6 * in the Software without restriction, including without limitation the rights
7 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8 * copies of the Software, and to permit persons to whom the Software is
9 * furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included in all
12 * copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
25 // https://github.com/programmerjake/javascript-tasklets/blob/master/javascript_tasklets/soft_float.cpp
28 #include "soft_float.h"
37 using namespace vulkan_cpu::util::soft_float
;
38 std::string
hexValue(const ExtendedFloat
&v
)
50 std::ostringstream ss
;
51 ss
<< std::hex
<< std::uppercase
;
58 std::int32_t exponent
= v
.exponent
;
59 exponent
-= ExtendedFloat::exponentBias();
62 std::uint64_t mantissa
= v
.mantissa
;
63 unsigned firstDigitBits
= 1 + (exponent
& 3);
64 ss
<< (mantissa
>> (64 - firstDigitBits
));
65 mantissa
<<= firstDigitBits
;
71 ss
<< std::dec
<< std::showpos
;
75 std::string
hexValue(long double v
)
87 const std::size_t strSize
= 64;
89 std::snprintf(str
, sizeof(str
), "%+1.16LA", v
);
101 std::string
hexValue(std::int64_t v
)
103 std::ostringstream ss
;
104 ss
<< std::hex
<< std::uppercase
;
113 ss
<< -static_cast<std::uint64_t>(v
);
115 ss
<< static_cast<std::uint64_t>(v
);
118 std::string
hexValue(std::uint64_t v
)
120 std::ostringstream ss
;
121 ss
<< std::hex
<< std::uppercase
;
125 ss
<< static_cast<std::uint64_t>(v
);
128 bool sameValue(long double a
, long double b
)
131 return std::isnan(b
);
134 return b
== 0 && std::signbit(a
) == std::signbit(b
);
141 template <typename Arg
, typename
... Args
>
142 void writeArgs(Arg arg
, Args
... args
)
144 std::cout
<< " " << hexValue(arg
);
147 constexpr bool displayPassedTests
= true;
148 template <typename TestFn1
, typename TestFn2
, typename
... Args
>
149 void testCase(const char *name
, TestFn1
&&testFn1
, TestFn2
&&testFn2
, Args
... args
)
151 long double result1
= static_cast<long double>(testFn1(args
...));
152 long double result2
= static_cast<long double>(testFn2(args
...));
153 if(!sameValue(result1
, result2
))
158 std::cout
<< hexValue(result1
) << " != " << hexValue(result2
) << std::endl
;
160 else if(displayPassedTests
)
165 std::cout
<< hexValue(result1
) << std::endl
;
168 template <typename TestFn1
, typename TestFn2
, typename
... Args
>
169 void testCaseI(const char *name
, TestFn1
&&testFn1
, TestFn2
&&testFn2
, Args
... args
)
171 auto result1
= testFn1(args
...);
172 auto result2
= testFn2(args
...);
173 if(result1
!= result2
)
178 std::cout
<< hexValue(result1
) << " != " << hexValue(result2
) << std::endl
;
180 else if(displayPassedTests
)
185 std::cout
<< hexValue(result1
) << std::endl
;
188 template <typename TestFn1
, typename TestFn2
, typename
... Args
>
189 void roundTestCases(const char *name
, TestFn1
&&testFn1
, TestFn2
&&testFn2
)
191 const long double NaN
= std::numeric_limits
<long double>::quiet_NaN();
192 const long double Infinity
= std::numeric_limits
<long double>::infinity();
193 auto testBothSigns
= [&](long double value
)
195 testCase(name
, testFn1
, testFn2
, value
);
196 testCase(name
, testFn1
, testFn2
, -value
);
198 testCase(name
, testFn1
, testFn2
, NaN
);
200 testBothSigns(Infinity
);
202 testBothSigns(0x1.0p
-1L);
203 testBothSigns(0x1.8p0L
);
204 testBothSigns(0x1.Fp0L
);
205 testBothSigns(0x1.Fp
-30L);
206 testBothSigns(0x1.Fp30L
);
207 testBothSigns(0x1.Fp62L
);
208 testBothSigns(0x1.Fp63L
);
209 testBothSigns(0x1.Fp64L
);
210 testBothSigns(0x1.Fp65L
);
211 testBothSigns(0x1.Fp62L
+ 0.5L);
212 testBothSigns(0x1.Fp63L
+ 0.5L);
213 testBothSigns(0x1.Fp64L
+ 0.5L);
214 testBothSigns(0x1.Fp65L
+ 0.5L);
215 testBothSigns(0x1.Fp62L
+ 1);
216 testBothSigns(0x1.Fp63L
+ 1);
217 testBothSigns(0x1.Fp64L
+ 1);
218 testBothSigns(0x1.Fp65L
+ 1);
220 template <typename TestFn1
, typename TestFn2
, typename
... Args
>
221 void toIntTestCases(const char *name
, TestFn1
&&testFn1
, TestFn2
&&testFn2
)
223 const long double NaN
= std::numeric_limits
<long double>::quiet_NaN();
224 const long double Infinity
= std::numeric_limits
<long double>::infinity();
225 auto testBothSigns
= [&](long double value
)
227 testCaseI(name
, testFn1
, testFn2
, value
);
228 testCaseI(name
, testFn1
, testFn2
, -value
);
230 testCaseI(name
, testFn1
, testFn2
, NaN
);
232 testBothSigns(Infinity
);
234 testBothSigns(0x1.0p
-1L);
235 testBothSigns(0x1.8p0L
);
236 testBothSigns(0x1.Fp0L
);
237 testBothSigns(0x1.Fp
-30L);
238 testBothSigns(0x1.Fp30L
);
239 testBothSigns(0x1.Fp62L
);
240 testBothSigns(0x1.Fp63L
);
241 testBothSigns(0x1.Fp64L
);
242 testBothSigns(0x1.Fp65L
);
243 testBothSigns(0x1.Fp62L
+ 0.5L);
244 testBothSigns(0x1.Fp63L
+ 0.5L);
245 testBothSigns(0x1.Fp64L
+ 0.5L);
246 testBothSigns(0x1.Fp65L
+ 0.5L);
247 testBothSigns(0x1.Fp62L
+ 1);
248 testBothSigns(0x1.Fp63L
+ 1);
249 testBothSigns(0x1.Fp64L
+ 1);
250 testBothSigns(0x1.Fp65L
+ 1);
254 auto add1
= [](long double a
, long double b
) -> long double
258 auto add2
= [](long double a
, long double b
) -> ExtendedFloat
260 return ExtendedFloat(a
) + ExtendedFloat(b
);
262 auto mul1
= [](long double a
, long double b
) -> long double
266 auto mul2
= [](long double a
, long double b
) -> ExtendedFloat
268 return ExtendedFloat(a
) * ExtendedFloat(b
);
270 auto div1
= [](long double a
, long double b
) -> long double
274 auto div2
= [](long double a
, long double b
) -> ExtendedFloat
276 return ExtendedFloat(a
) / ExtendedFloat(b
);
278 auto floor1
= [](long double a
) -> long double
280 return std::floor(a
);
282 auto floor2
= [](long double a
) -> ExtendedFloat
284 return floor(ExtendedFloat(a
));
286 auto ceil1
= [](long double a
) -> long double
290 auto ceil2
= [](long double a
) -> ExtendedFloat
292 return ceil(ExtendedFloat(a
));
294 auto round1
= [](long double a
) -> long double
296 return std::round(a
);
298 auto round2
= [](long double a
) -> ExtendedFloat
300 return round(ExtendedFloat(a
));
302 auto trunc1
= [](long double a
) -> long double
304 return std::trunc(a
);
306 auto trunc2
= [](long double a
) -> ExtendedFloat
308 return trunc(ExtendedFloat(a
));
310 auto toUInt1
= [](long double a
) -> std::uint64_t
314 if(a
< std::numeric_limits
<std::uint64_t>::min())
315 return std::numeric_limits
<std::uint64_t>::min();
316 if(a
> std::numeric_limits
<std::uint64_t>::max())
317 return std::numeric_limits
<std::uint64_t>::max();
318 return static_cast<std::uint64_t>(a
);
320 auto toUInt2
= [](long double a
) -> std::uint64_t
322 return static_cast<std::uint64_t>(ExtendedFloat(a
));
324 auto toInt1
= [](long double a
) -> std::int64_t
328 if(a
< std::numeric_limits
<std::int64_t>::min())
329 return std::numeric_limits
<std::int64_t>::min();
330 if(a
> std::numeric_limits
<std::int64_t>::max())
331 return std::numeric_limits
<std::int64_t>::max();
332 return static_cast<std::int64_t>(a
);
334 auto toInt2
= [](long double a
) -> std::int64_t
336 return static_cast<std::int64_t>(ExtendedFloat(a
));
338 auto pow1
= [](long double base
, int exponent
) -> long double
343 exponent
= -exponent
;
345 else if(exponent
== 0)
347 long double retval
= 1;
352 else if(exponent
== 1)
353 return retval
* base
;
362 auto pow2
= [](long double base
, int exponent
) -> ExtendedFloat
364 return pow(ExtendedFloat(base
), static_cast<std::int64_t>(exponent
));
366 auto scalbn1
= [](long double a
, std::int64_t exponent
) -> long double
368 return std::scalbln(a
, static_cast<long>(exponent
));
370 auto scalbn2
= [](long double a
, std::int64_t exponent
) -> ExtendedFloat
372 return scalbn(ExtendedFloat(a
), exponent
);
374 auto log2_1
= [](long double a
) -> long double
378 auto log2_2
= [](long double a
) -> ExtendedFloat
380 return log2(ExtendedFloat(a
));
382 auto log10_1
= [](long double a
) -> long double
384 return std::log10(a
);
386 auto log10_2
= [](long double a
) -> ExtendedFloat
388 return log10(ExtendedFloat(a
));
390 const long double NaN
= std::numeric_limits
<long double>::quiet_NaN();
391 const long double Infinity
= std::numeric_limits
<long double>::infinity();
392 testCase("add", add1
, add2
, +0.0L, +0.0L);
393 testCase("add", add1
, add2
, +0.0L, -0.0L);
394 testCase("add", add1
, add2
, -0.0L, +0.0L);
395 testCase("add", add1
, add2
, -0.0L, -0.0L);
396 testCase("add", add1
, add2
, 0.0L, NaN
);
397 testCase("add", add1
, add2
, NaN
, 0.0L);
398 testCase("add", add1
, add2
, NaN
, NaN
);
399 testCase("add", add1
, add2
, +Infinity
, +Infinity
);
400 testCase("add", add1
, add2
, +Infinity
, -Infinity
);
401 testCase("add", add1
, add2
, -Infinity
, +Infinity
);
402 testCase("add", add1
, add2
, -Infinity
, -Infinity
);
403 testCase("add", add1
, add2
, 0x1.0000000000000002p0L
, -0x1.0p
-64L);
404 testCase("add", add1
, add2
, 0x1.p0L
, -0x1.0p
-65L);
405 testCase("add", add1
, add2
, 0x1.p0L
, -0x0.Fp
-65L);
406 testCase("add", add1
, add2
, 0x1.p0L
, -0x1.1p
-65L);
407 testCase("add", add1
, add2
, 0x1.0000000000000002p0L
, -0x2.0p
-65L);
408 testCase("add", add1
, add2
, 0x1.0000000000000002p0L
, -0x1.Fp
-65L);
409 testCase("add", add1
, add2
, 0x1.0000000000000002p0L
, -0x2.1p
-65L);
410 testCase("add", add1
, add2
, 0x1p
-16445L, 0x1p
-16445L);
411 testCase("add", add1
, add2
, 0x1p
+16383L, 0x1p
+16383L);
412 testCase("mul", mul1
, mul2
, +0.0L, +0.0L);
413 testCase("mul", mul1
, mul2
, +0.0L, -0.0L);
414 testCase("mul", mul1
, mul2
, -0.0L, +0.0L);
415 testCase("mul", mul1
, mul2
, -0.0L, -0.0L);
416 testCase("mul", mul1
, mul2
, 0.0L, NaN
);
417 testCase("mul", mul1
, mul2
, NaN
, 0.0L);
418 testCase("mul", mul1
, mul2
, NaN
, NaN
);
419 testCase("mul", mul1
, mul2
, +Infinity
, +Infinity
);
420 testCase("mul", mul1
, mul2
, +Infinity
, -Infinity
);
421 testCase("mul", mul1
, mul2
, -Infinity
, +Infinity
);
422 testCase("mul", mul1
, mul2
, -Infinity
, -Infinity
);
423 testCase("mul", mul1
, mul2
, 0x1p
0L, 0x1p
0L);
424 testCase("mul", mul1
, mul2
, 0x1p
16000L, 0x1p
383L);
425 testCase("mul", mul1
, mul2
, 0x1p
16000L, 0x1p
384L);
426 testCase("mul", mul1
, mul2
, 0x1p
-16000L, 0x1p
-445L);
427 testCase("mul", mul1
, mul2
, 0x1p
-16000L, 0x1p
-446L);
428 testCase("mul", mul1
, mul2
, 0x1.0000001p0L
, 0x1.000000001p0L
);
429 testCase("mul", mul1
, mul2
, 0x1.0000001p0L
, 0x1.0000000018p0L
);
430 testCase("mul", mul1
, mul2
, 0x1.0000001p0L
, 0x1.000000002p0L
);
431 testCase("mul", mul1
, mul2
, 0x1.0000001p0L
, 0x1.0000000028p0L
);
432 testCase("mul", mul1
, mul2
, 0x1.0000001p0L
, 0x1.000000003p0L
);
433 testCase("mul", mul1
, mul2
, 0x1.0000001p0L
, 0x1.0000000038p0L
);
434 testCase("mul", mul1
, mul2
, 0x1.0000001p0L
, 0x1.000000004p0L
);
435 testCase("mul", mul1
, mul2
, 0x1.0000001p0L
, 0x1.0000000048p0L
);
436 testCase("mul", mul1
, mul2
, 0x1.0000001p0L
, 0x1.000000005p0L
);
437 testCase("mul", mul1
, mul2
, 0x1.0000001p0L
, 0x1.0000000058p0L
);
438 testCase("mul", mul1
, mul2
, 0x1.0000001p0L
, 0x1.000000006p0L
);
439 testCase("mul", mul1
, mul2
, 0x1.0000001p0L
, 0x1.0000000068p0L
);
440 testCase("mul", mul1
, mul2
, 0x1.0000001p0L
, 0x1.000000007p0L
);
441 testCase("mul", mul1
, mul2
, 0x1.0000001p0L
, 0x1.0000000078p0L
);
445 3.1415926535897932384626433832795L,
446 0.318309886183790671537767526745028724L);
450 2.718281828459045235360287471352662497757L,
451 0.3678794411714423215955237701614608674458L);
452 testCase("div", div1
, div2
, +0.0L, +0.0L);
453 testCase("div", div1
, div2
, +1.0L, +0.0L);
454 testCase("div", div1
, div2
, +1.0L, -0.0L);
455 testCase("div", div1
, div2
, -1.0L, +0.0L);
456 testCase("div", div1
, div2
, -1.0L, -0.0L);
457 testCase("div", div1
, div2
, +0.0L, +1.0L);
458 testCase("div", div1
, div2
, +0.0L, -1.0L);
459 testCase("div", div1
, div2
, -0.0L, +1.0L);
460 testCase("div", div1
, div2
, -0.0L, -1.0L);
461 testCase("div", div1
, div2
, 0.0L, NaN
);
462 testCase("div", div1
, div2
, NaN
, 0.0L);
463 testCase("div", div1
, div2
, NaN
, NaN
);
464 testCase("div", div1
, div2
, +Infinity
, +Infinity
);
465 testCase("div", div1
, div2
, +1.0L, +Infinity
);
466 testCase("div", div1
, div2
, +1.0L, -Infinity
);
467 testCase("div", div1
, div2
, -1.0L, +Infinity
);
468 testCase("div", div1
, div2
, -1.0L, -Infinity
);
469 testCase("div", div1
, div2
, 1.0L, 3.0L);
470 testCase("div", div1
, div2
, 1.0L, 5.0L);
471 testCase("div", div1
, div2
, 1.0L, 7.0L);
472 testCase("div", div1
, div2
, 1.0L, 9.0L);
473 testCase("div", div1
, div2
, 1.0L, 11.0L);
474 testCase("div", div1
, div2
, 1.0L, 3.1415926535897932384626433832795L);
475 testCase("div", div1
, div2
, 1.0L, 2.718281828459045235360287471352662497757L);
476 testCase("div", div1
, div2
, 0x1p
16000L, 0x1p
-383L);
477 testCase("div", div1
, div2
, 0x1p
16000L, 0x1p
-384L);
478 testCase("div", div1
, div2
, 0x1p
-16000L, 0x1p
445L);
479 testCase("div", div1
, div2
, 0x1p
-16000L, 0x1p
446L);
480 roundTestCases("floor", floor1
, floor2
);
481 roundTestCases("round", round1
, round2
);
482 roundTestCases("ceil", ceil1
, ceil2
);
483 roundTestCases("trunc", trunc1
, trunc2
);
484 toIntTestCases("uint64", toUInt1
, toUInt2
);
485 toIntTestCases("int64", toInt1
, toInt2
);
486 testCase("pow", pow1
, pow2
, 1.0L, static_cast<std::int64_t>(0));
487 testCase("pow", pow1
, pow2
, 1.0L, static_cast<std::int64_t>(5000));
488 testCase("pow", pow1
, pow2
, 1.0L, static_cast<std::int64_t>(-5000));
489 testCase("pow", pow1
, pow2
, 2.0L, static_cast<std::int64_t>(3000));
490 testCase("pow", pow1
, pow2
, 2.0L, static_cast<std::int64_t>(-3000));
491 testCase("pow", pow1
, pow2
, 3.0L, static_cast<std::int64_t>(3000));
492 testCase("pow", pow1
, pow2
, 3.0L, static_cast<std::int64_t>(-3000));
493 testCase("pow", pow1
, pow2
, 10.0L, static_cast<std::int64_t>(3000));
494 testCase("pow", pow1
, pow2
, 10.0L, static_cast<std::int64_t>(-3000));
495 testCase("pow", pow1
, pow2
, 36.0L, static_cast<std::int64_t>(3000));
496 testCase("pow", pow1
, pow2
, 36.0L, static_cast<std::int64_t>(-3000));
497 testCase("scalbn", scalbn1
, scalbn2
, 1.0L, static_cast<std::int64_t>(16384));
498 testCase("scalbn", scalbn1
, scalbn2
, 1.0L, static_cast<std::int64_t>(16383));
499 testCase("scalbn", scalbn1
, scalbn2
, 1.0L, static_cast<std::int64_t>(3000));
500 testCase("scalbn", scalbn1
, scalbn2
, 1.0L, static_cast<std::int64_t>(-3000));
501 testCase("scalbn", scalbn1
, scalbn2
, 1.0L, static_cast<std::int64_t>(-16383));
502 testCase("scalbn", scalbn1
, scalbn2
, 1.0L, static_cast<std::int64_t>(-16384));
503 testCase("scalbn", scalbn1
, scalbn2
, 1.0L, static_cast<std::int64_t>(-16445));
504 testCase("scalbn", scalbn1
, scalbn2
, 1.0L, static_cast<std::int64_t>(-16446));
505 testCase("log2", log2_1
, log2_2
, NaN
);
506 testCase("log2", log2_1
, log2_2
, Infinity
);
507 testCase("log2", log2_1
, log2_2
, -Infinity
);
508 testCase("log2", log2_1
, log2_2
, 0.0L);
509 testCase("log2", log2_1
, log2_2
, -0.0L);
510 testCase("log2", log2_1
, log2_2
, -1.0L);
511 testCase("log2", log2_1
, log2_2
, 1.0L);
512 testCase("log2", log2_1
, log2_2
, 2.0L);
513 testCase("log2", log2_1
, log2_2
, 0x1.0p
-16445L);
514 testCase("log2", log2_1
, log2_2
, 0x1.0p16383L
);
515 testCase("log2", log2_1
, log2_2
, 3.0L);
516 testCase("log2", log2_1
, log2_2
, 5.0L);
517 testCase("log2", log2_1
, log2_2
, 7.0L);
518 testCase("log2", log2_1
, log2_2
, 9.0L);
519 testCase("log2", log2_1
, log2_2
, 11.0L);
520 testCase("log2", log2_1
, log2_2
, 1e100L
);
521 testCase("log2", log2_1
, log2_2
, 1e-1L);
522 testCase("log2", log2_1
, log2_2
, 1e-2L);
523 testCase("log2", log2_1
, log2_2
, 1.5L);
524 testCase("log2", log2_1
, log2_2
, 0.693147180559945309417232121458176568L);
525 testCase("log2", log2_1
, log2_2
, static_cast<long double>(ExtendedFloat::Log10Of2()));
526 testCase("log2", log2_1
, log2_2
, static_cast<long double>(ExtendedFloat::LogOf2()));
527 testCase("log10", log10_1
, log10_2
, 1e1001L
);
528 testCase("log10", log10_1
, log10_2
, 1.5L);
551 unsigned clz8(std::uint8_t v
)
553 return __builtin_clz(v
) - __builtin_clz(0x80U
);
555 unsigned ctz8(std::uint8_t v
)
557 return v
== 0 ? 8 : __builtin_ctz(v
);
563 explicit UInt16(std::uint8_t low
= 0) : high(0), low(low
)
566 UInt16(std::uint8_t high
, std::uint8_t low
) : high(high
), low(low
)
569 friend unsigned clz16(UInt16 v
)
571 return v
.high
== 0 ? 8 + clz8(v
.low
) : clz8(v
.high
);
573 friend unsigned ctz16(UInt16 v
)
575 return v
.low
== 0 ? 8 + ctz8(v
.high
) : ctz8(v
.low
);
577 static UInt16
mul8x8(std::uint8_t a
, std::uint8_t b
)
581 return UInt16(v
>> 8, v
& 0xFFU
);
583 static bool addCarry(std::uint8_t a
, std::uint8_t b
)
585 return static_cast<std::uint16_t>(a
) + b
> 0xFFU
;
587 static bool addCarry(std::uint8_t a
, std::uint8_t b
, bool carry
)
589 return static_cast<unsigned>(a
) + b
+ carry
> 0xFFU
;
591 static bool subBorrow(std::uint8_t a
, std::uint8_t b
)
595 static bool subBorrow(std::uint8_t a
, std::uint8_t b
, bool borrow
)
597 return a
< b
|| (a
== b
&& borrow
);
599 friend UInt16
operator+(UInt16 a
, UInt16 b
)
601 return UInt16(a
.high
+ b
.high
+ addCarry(a
.low
, b
.low
), a
.low
+ b
.low
);
603 friend UInt16
operator-(UInt16 a
, UInt16 b
)
605 return UInt16(a
.high
- b
.high
- subBorrow(a
.low
, b
.low
), a
.low
- b
.low
);
607 friend UInt16
operator<<(UInt16 v
, unsigned shiftAmount
)
609 return shiftAmount
== 0 ? v
: shiftAmount
< 8 ?
610 UInt16((v
.high
<< shiftAmount
) | (v
.low
>> (8 - shiftAmount
)),
611 v
.low
<< shiftAmount
) :
612 UInt16(v
.low
<< (shiftAmount
- 8), 0);
614 friend UInt16
operator>>(UInt16 v
, unsigned shiftAmount
)
616 return shiftAmount
== 0 ? v
: shiftAmount
< 8 ?
617 UInt16(v
.high
>> shiftAmount
,
618 (v
.low
>> shiftAmount
) | (v
.high
<< (8 - shiftAmount
))) :
619 UInt16(v
.high
>> (shiftAmount
- 8));
621 struct DivModResult8 final
623 std::uint8_t divResult
;
624 std::uint8_t modResult
;
625 DivModResult8(std::uint8_t divResult
, std::uint8_t modResult
)
626 : divResult(divResult
), modResult(modResult
)
630 static DivModResult8
divMod16x8(UInt16 n
, std::uint8_t d
)
633 std::uint16_t v
= n
.high
;
636 std::uint16_t divResult
= v
/ d
;
637 std::uint16_t modResult
= v
% d
;
638 assert(divResult
<= 0xFFU
);
639 assert(modResult
<= 0xFFU
);
640 return DivModResult8(divResult
, modResult
);
643 static DivModResult
divMod(UInt16 uIn
, UInt16 vIn
);
644 static DivModResult
divMod2(UInt16 n
, UInt16 d
);
645 friend bool operator==(UInt16 a
, UInt16 b
) noexcept
647 return a
.high
== b
.high
&& a
.low
== b
.low
;
649 friend bool operator!=(UInt16 a
, UInt16 b
) noexcept
651 return a
.high
!= b
.high
|| a
.low
!= b
.low
;
653 friend bool operator<(UInt16 a
, UInt16 b
) noexcept
655 return a
.high
< b
.high
|| (a
.high
== b
.high
&& a
.low
< b
.low
);
657 friend bool operator<=(UInt16 a
, UInt16 b
) noexcept
659 return a
.high
< b
.high
|| (a
.high
== b
.high
&& a
.low
<= b
.low
);
661 friend bool operator>(UInt16 a
, UInt16 b
) noexcept
663 return a
.high
> b
.high
|| (a
.high
== b
.high
&& a
.low
> b
.low
);
665 friend bool operator>=(UInt16 a
, UInt16 b
) noexcept
667 return a
.high
> b
.high
|| (a
.high
== b
.high
&& a
.low
>= b
.low
);
670 struct UInt16::DivModResult final
674 DivModResult(UInt16 divResult
, UInt16 modResult
) : divResult(divResult
), modResult(modResult
)
678 UInt16::DivModResult
UInt16::divMod2(UInt16 n
, UInt16 d
)
680 std::uint16_t nv
= n
.high
;
683 std::uint16_t dv
= d
.high
;
686 std::uint16_t qv
= nv
/ dv
;
687 std::uint16_t rv
= nv
% dv
;
688 return DivModResult(UInt16(qv
>> 8, qv
& 0xFF), UInt16(rv
>> 8, rv
& 0xFF));
690 template <std::size_t NumberSizes
,
692 typename DoubleDigit
,
693 unsigned DigitBitCount
,
695 void divMod(const Digit(&numerator
)[NumberSizes
],
696 const Digit(&denominator
)[NumberSizes
],
697 Digit("ient
)[NumberSizes
],
698 Digit(&remainder
)[NumberSizes
])
700 constexpr Digit DigitMax
= (static_cast<DoubleDigit
>(1) << DigitBitCount
) - 1;
701 static_assert(NumberSizes
!= 0, "bad size");
702 std::size_t m
= NumberSizes
;
703 for(std::size_t i
= 0; i
< NumberSizes
; i
++)
705 if(denominator
[i
] != 0)
711 const std::size_t n
= NumberSizes
- m
;
714 assert(denominator
[NumberSizes
- 1] != 0);
715 for(std::size_t i
= 0; i
< NumberSizes
- 1; i
++)
719 Digit currentRemainder
= 0;
720 for(std::size_t i
= 0; i
< NumberSizes
; i
++)
722 DoubleDigit n
= currentRemainder
;
725 quotient
[i
] = n
/ denominator
[NumberSizes
- 1];
726 currentRemainder
= n
% denominator
[NumberSizes
- 1];
728 remainder
[NumberSizes
- 1] = currentRemainder
;
731 // from algorithm D, section 4.3.1 in Art of Computer Programming volume 2 by Knuth.
732 unsigned log2D
= DigitCLZFn()(denominator
[m
]);
733 Digit u
[NumberSizes
+ 1];
734 u
[NumberSizes
] = (numerator
[NumberSizes
- 1] << log2D
) & DigitMax
;
735 u
[0] = ((static_cast<DoubleDigit
>(numerator
[0]) << log2D
) >> DigitBitCount
) & DigitMax
;
736 for(std::size_t i
= 1; i
< NumberSizes
; i
++)
738 DoubleDigit value
= numerator
[i
- 1];
739 value
<<= DigitBitCount
;
740 value
|= numerator
[i
];
742 u
[i
] = (value
>> DigitBitCount
) & DigitMax
;
744 Digit v
[NumberSizes
+ 1] = {};
745 v
[n
] = (denominator
[NumberSizes
- 1] << log2D
) & DigitMax
;
746 for(std::size_t i
= 1; i
< n
; i
++)
748 DoubleDigit value
= denominator
[m
+ i
- 1];
749 value
<<= DigitBitCount
;
750 value
|= denominator
[m
+ i
];
752 v
[i
] = (value
>> DigitBitCount
) & DigitMax
;
755 for(std::size_t j
= 0; j
<= m
; j
++)
764 qHat
= ((static_cast<DoubleDigit
>(u
[j
]) << DigitBitCount
) | u
[j
+ 1]) / v
[1];
767 DoubleDigit lhs
= v
[2] * qHat
;
768 DoubleDigit rhsHigh
=
769 ((static_cast<DoubleDigit
>(u
[j
]) << DigitBitCount
) | u
[j
+ 1]) - qHat
* v
[1];
770 Digit rhsLow
= u
[j
+ 2];
771 if(rhsHigh
< static_cast<DoubleDigit
>(1) << DigitBitCount
772 && lhs
> ((rhsHigh
<< DigitBitCount
) | rhsLow
))
777 if(rhsHigh
< static_cast<DoubleDigit
>(1) << DigitBitCount
778 && lhs
> ((rhsHigh
<< DigitBitCount
) | rhsLow
))
787 for(std::size_t i
= n
; i
> 0; i
--)
789 assert(i
<= NumberSizes
);
790 DoubleDigit product
= qHat
* v
[i
] + mulCarry
;
791 mulCarry
= product
>> DigitBitCount
;
793 bool prevBorrow
= borrow
;
794 DoubleDigit digit
= u
[j
+ i
] - product
- prevBorrow
;
795 borrow
= digit
!= (digit
& DigitMax
);
799 bool prevBorrow
= borrow
;
800 DoubleDigit digit
= u
[j
] - mulCarry
- prevBorrow
;
801 borrow
= digit
!= (digit
& DigitMax
);
810 for(std::size_t i
= n
; i
> 0; i
--)
812 bool prevCarry
= carry
;
813 assert(i
+ j
<= NumberSizes
);
814 DoubleDigit digit
= u
[j
+ i
] + v
[i
] + prevCarry
;
815 carry
= digit
!= (digit
& DigitMax
);
819 u
[j
] = (u
[j
] + carry
) & DigitMax
;
821 quotient
[j
+ n
- 1] = qj
;
823 for(std::size_t i
= 0; i
< NumberSizes
; i
++)
825 DoubleDigit value
= u
[i
];
826 value
<<= DigitBitCount
;
828 remainder
[i
] = value
>> log2D
;
833 constexpr unsigned operator()(std::uint16_t value
) const noexcept
835 return __builtin_clz(value
) - (__builtin_clz(0) - 4);
838 UInt16::DivModResult
UInt16::divMod(UInt16 uIn
, UInt16 vIn
)
840 constexpr std::size_t NumberSizes
= 4;
841 typedef std::uint16_t Digit
;
842 typedef unsigned DoubleDigit
;
843 constexpr unsigned DigitBitCount
= 4;
844 Digit numerator
[NumberSizes
], denominator
[NumberSizes
], quotient
[NumberSizes
],
845 remainder
[NumberSizes
];
846 numerator
[0] = uIn
.high
>> 4;
847 numerator
[1] = uIn
.high
& 0xF;
848 numerator
[2] = uIn
.low
>> 4;
849 numerator
[3] = uIn
.low
& 0xF;
850 denominator
[0] = vIn
.high
>> 4;
851 denominator
[1] = vIn
.high
& 0xF;
852 denominator
[2] = vIn
.low
>> 4;
853 denominator
[3] = vIn
.low
& 0xF;
854 ::divMod
<NumberSizes
, Digit
, DoubleDigit
, DigitBitCount
, OpClz4
>(
855 numerator
, denominator
, quotient
, remainder
);
857 UInt16((quotient
[0] << 4) | quotient
[1], (quotient
[2] << 4) | quotient
[3]),
858 UInt16((remainder
[0] << 4) | remainder
[1], (remainder
[2] << 4) | remainder
[3]));
860 void mainFn(std::uint8_t start
, std::uint8_t end
)
862 for(unsigned dHigh
= start
; dHigh
<= end
; dHigh
++)
866 std::ostringstream ss
;
867 ss
<< dHigh
* 100 / (end
+ 1) << "%\n";
868 std::cout
<< ss
.str() << std::flush
;
870 for(unsigned dLow
= 0; dLow
< 0x100U
; dLow
++)
872 UInt16
d(dHigh
, dLow
);
879 for(unsigned nHigh
= 0; nHigh
< 0x100U
; nHigh
++)
881 for(unsigned nLow
= 0; nLow
< 0x100U
; nLow
++)
883 UInt16
n(nHigh
, nLow
);
884 auto result
= UInt16::divMod(n
, d
);
885 auto result2
= UInt16::divMod2(n
, d
);
886 if(result
.divResult
!= result2
.divResult
887 || result
.modResult
!= result2
.modResult
)
889 std::ostringstream ss
;
890 ss
<< std::hex
<< std::uppercase
;
893 ss
<< static_cast<unsigned>(n
.high
);
895 ss
<< static_cast<unsigned>(n
.low
);
898 ss
<< static_cast<unsigned>(d
.high
);
900 ss
<< static_cast<unsigned>(d
.low
);
903 ss
<< static_cast<unsigned>(result
.divResult
.high
);
905 ss
<< static_cast<unsigned>(result
.divResult
.low
);
908 ss
<< static_cast<unsigned>(result2
.divResult
.high
);
910 ss
<< static_cast<unsigned>(result2
.divResult
.low
);
913 ss
<< static_cast<unsigned>(n
.high
);
915 ss
<< static_cast<unsigned>(n
.low
);
918 ss
<< static_cast<unsigned>(d
.high
);
920 ss
<< static_cast<unsigned>(d
.low
);
923 ss
<< static_cast<unsigned>(result
.modResult
.high
);
925 ss
<< static_cast<unsigned>(result
.modResult
.low
);
928 ss
<< static_cast<unsigned>(result2
.modResult
.high
);
930 ss
<< static_cast<unsigned>(result2
.modResult
.low
);
931 std::cout
<< ss
.str() << std::endl
;
943 const std::size_t splitCount
= 6;
944 std::list
<std::thread
> threads
;
945 for(std::size_t i
= 0; i
< splitCount
; i
++)
947 auto start
= i
* 0x100 / splitCount
;
948 auto end
= (i
+ 1) * 0x100 / splitCount
- 1;
949 threads
.push_back(std::thread([=]()
954 for(std::thread
&thread
: threads
)