working on util::filesystem
[kazan.git] / src / util / soft_float.cpp
1 /*
2 * Copyright 2016-2017 Jacob Lifshay
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a copy
5 * of this software and associated documentation files (the "Software"), to deal
6 * in the Software without restriction, including without limitation the rights
7 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8 * copies of the Software, and to permit persons to whom the Software is
9 * furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in all
12 * copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
20 * SOFTWARE.
21 *
22 */
23
24 // derived from
25 // https://github.com/programmerjake/javascript-tasklets/blob/master/javascript_tasklets/soft_float.cpp
26
27 #if 1
28 #include "soft_float.h"
29 #if 0
30 #include <iostream>
31 #include <cstdlib>
32 #include <string>
33 #include <sstream>
34 #include <cstdio>
35 namespace
36 {
37 using namespace vulkan_cpu::util::soft_float;
38 std::string hexValue(const ExtendedFloat &v)
39 {
40 if(v.isNaN())
41 {
42 return "NaN";
43 }
44 if(v.isInfinite())
45 {
46 if(v.sign)
47 return "-Infinity";
48 return "+Infinity";
49 }
50 std::ostringstream ss;
51 ss << std::hex << std::uppercase;
52 ss.fill('0');
53 if(v.sign)
54 ss << "-";
55 else
56 ss << "+";
57 ss << "0x";
58 std::int32_t exponent = v.exponent;
59 exponent -= ExtendedFloat::exponentBias();
60 if(v.isZero())
61 exponent = 0;
62 std::uint64_t mantissa = v.mantissa;
63 unsigned firstDigitBits = 1 + (exponent & 3);
64 ss << (mantissa >> (64 - firstDigitBits));
65 mantissa <<= firstDigitBits;
66 exponent &= ~3;
67 ss << ".";
68 ss.width(16);
69 ss << mantissa;
70 ss << "p";
71 ss << std::dec << std::showpos;
72 ss << exponent;
73 return ss.str();
74 }
75 std::string hexValue(long double v)
76 {
77 if(std::isnan(v))
78 {
79 return "NaN";
80 }
81 if(std::isinf(v))
82 {
83 if(v < 0)
84 return "-Infinity";
85 return "+Infinity";
86 }
87 const std::size_t strSize = 64;
88 char str[strSize];
89 std::snprintf(str, sizeof(str), "%+1.16LA", v);
90 for(char &ch : str)
91 {
92 if(ch == '\0')
93 break;
94 if(ch == 'X')
95 ch = 'x';
96 else if(ch == 'P')
97 ch = 'p';
98 }
99 return str;
100 }
101 std::string hexValue(std::int64_t v)
102 {
103 std::ostringstream ss;
104 ss << std::hex << std::uppercase;
105 ss.fill('0');
106 if(v < 0)
107 ss << "-";
108 else
109 ss << "+";
110 ss << "0x";
111 ss.width(16);
112 if(v < 0)
113 ss << -static_cast<std::uint64_t>(v);
114 else
115 ss << static_cast<std::uint64_t>(v);
116 return ss.str();
117 }
118 std::string hexValue(std::uint64_t v)
119 {
120 std::ostringstream ss;
121 ss << std::hex << std::uppercase;
122 ss.fill('0');
123 ss << "0x";
124 ss.width(16);
125 ss << static_cast<std::uint64_t>(v);
126 return ss.str();
127 }
128 bool sameValue(long double a, long double b)
129 {
130 if(std::isnan(a))
131 return std::isnan(b);
132 if(a == 0)
133 {
134 return b == 0 && std::signbit(a) == std::signbit(b);
135 }
136 return a == b;
137 }
138 void writeArgs()
139 {
140 }
141 template <typename Arg, typename... Args>
142 void writeArgs(Arg arg, Args... args)
143 {
144 std::cout << " " << hexValue(arg);
145 writeArgs(args...);
146 }
147 constexpr bool displayPassedTests = true;
148 template <typename TestFn1, typename TestFn2, typename... Args>
149 void testCase(const char *name, TestFn1 &&testFn1, TestFn2 &&testFn2, Args... args)
150 {
151 long double result1 = static_cast<long double>(testFn1(args...));
152 long double result2 = static_cast<long double>(testFn2(args...));
153 if(!sameValue(result1, result2))
154 {
155 std::cout << name;
156 writeArgs(args...);
157 std::cout << " -> ";
158 std::cout << hexValue(result1) << " != " << hexValue(result2) << std::endl;
159 }
160 else if(displayPassedTests)
161 {
162 std::cout << name;
163 writeArgs(args...);
164 std::cout << " -> ";
165 std::cout << hexValue(result1) << std::endl;
166 }
167 }
168 template <typename TestFn1, typename TestFn2, typename... Args>
169 void testCaseI(const char *name, TestFn1 &&testFn1, TestFn2 &&testFn2, Args... args)
170 {
171 auto result1 = testFn1(args...);
172 auto result2 = testFn2(args...);
173 if(result1 != result2)
174 {
175 std::cout << name;
176 writeArgs(args...);
177 std::cout << " -> ";
178 std::cout << hexValue(result1) << " != " << hexValue(result2) << std::endl;
179 }
180 else if(displayPassedTests)
181 {
182 std::cout << name;
183 writeArgs(args...);
184 std::cout << " -> ";
185 std::cout << hexValue(result1) << std::endl;
186 }
187 }
188 template <typename TestFn1, typename TestFn2, typename... Args>
189 void roundTestCases(const char *name, TestFn1 &&testFn1, TestFn2 &&testFn2)
190 {
191 const long double NaN = std::numeric_limits<long double>::quiet_NaN();
192 const long double Infinity = std::numeric_limits<long double>::infinity();
193 auto testBothSigns = [&](long double value)
194 {
195 testCase(name, testFn1, testFn2, value);
196 testCase(name, testFn1, testFn2, -value);
197 };
198 testCase(name, testFn1, testFn2, NaN);
199 testBothSigns(0.0L);
200 testBothSigns(Infinity);
201 testBothSigns(1.0L);
202 testBothSigns(0x1.0p-1L);
203 testBothSigns(0x1.8p0L);
204 testBothSigns(0x1.Fp0L);
205 testBothSigns(0x1.Fp-30L);
206 testBothSigns(0x1.Fp30L);
207 testBothSigns(0x1.Fp62L);
208 testBothSigns(0x1.Fp63L);
209 testBothSigns(0x1.Fp64L);
210 testBothSigns(0x1.Fp65L);
211 testBothSigns(0x1.Fp62L + 0.5L);
212 testBothSigns(0x1.Fp63L + 0.5L);
213 testBothSigns(0x1.Fp64L + 0.5L);
214 testBothSigns(0x1.Fp65L + 0.5L);
215 testBothSigns(0x1.Fp62L + 1);
216 testBothSigns(0x1.Fp63L + 1);
217 testBothSigns(0x1.Fp64L + 1);
218 testBothSigns(0x1.Fp65L + 1);
219 }
220 template <typename TestFn1, typename TestFn2, typename... Args>
221 void toIntTestCases(const char *name, TestFn1 &&testFn1, TestFn2 &&testFn2)
222 {
223 const long double NaN = std::numeric_limits<long double>::quiet_NaN();
224 const long double Infinity = std::numeric_limits<long double>::infinity();
225 auto testBothSigns = [&](long double value)
226 {
227 testCaseI(name, testFn1, testFn2, value);
228 testCaseI(name, testFn1, testFn2, -value);
229 };
230 testCaseI(name, testFn1, testFn2, NaN);
231 testBothSigns(0.0L);
232 testBothSigns(Infinity);
233 testBothSigns(1.0L);
234 testBothSigns(0x1.0p-1L);
235 testBothSigns(0x1.8p0L);
236 testBothSigns(0x1.Fp0L);
237 testBothSigns(0x1.Fp-30L);
238 testBothSigns(0x1.Fp30L);
239 testBothSigns(0x1.Fp62L);
240 testBothSigns(0x1.Fp63L);
241 testBothSigns(0x1.Fp64L);
242 testBothSigns(0x1.Fp65L);
243 testBothSigns(0x1.Fp62L + 0.5L);
244 testBothSigns(0x1.Fp63L + 0.5L);
245 testBothSigns(0x1.Fp64L + 0.5L);
246 testBothSigns(0x1.Fp65L + 0.5L);
247 testBothSigns(0x1.Fp62L + 1);
248 testBothSigns(0x1.Fp63L + 1);
249 testBothSigns(0x1.Fp64L + 1);
250 testBothSigns(0x1.Fp65L + 1);
251 }
252 void mainFn()
253 {
254 auto add1 = [](long double a, long double b) -> long double
255 {
256 return a + b;
257 };
258 auto add2 = [](long double a, long double b) -> ExtendedFloat
259 {
260 return ExtendedFloat(a) + ExtendedFloat(b);
261 };
262 auto mul1 = [](long double a, long double b) -> long double
263 {
264 return a * b;
265 };
266 auto mul2 = [](long double a, long double b) -> ExtendedFloat
267 {
268 return ExtendedFloat(a) * ExtendedFloat(b);
269 };
270 auto div1 = [](long double a, long double b) -> long double
271 {
272 return a / b;
273 };
274 auto div2 = [](long double a, long double b) -> ExtendedFloat
275 {
276 return ExtendedFloat(a) / ExtendedFloat(b);
277 };
278 auto floor1 = [](long double a) -> long double
279 {
280 return std::floor(a);
281 };
282 auto floor2 = [](long double a) -> ExtendedFloat
283 {
284 return floor(ExtendedFloat(a));
285 };
286 auto ceil1 = [](long double a) -> long double
287 {
288 return std::ceil(a);
289 };
290 auto ceil2 = [](long double a) -> ExtendedFloat
291 {
292 return ceil(ExtendedFloat(a));
293 };
294 auto round1 = [](long double a) -> long double
295 {
296 return std::round(a);
297 };
298 auto round2 = [](long double a) -> ExtendedFloat
299 {
300 return round(ExtendedFloat(a));
301 };
302 auto trunc1 = [](long double a) -> long double
303 {
304 return std::trunc(a);
305 };
306 auto trunc2 = [](long double a) -> ExtendedFloat
307 {
308 return trunc(ExtendedFloat(a));
309 };
310 auto toUInt1 = [](long double a) -> std::uint64_t
311 {
312 if(std::isnan(a))
313 return 0;
314 if(a < std::numeric_limits<std::uint64_t>::min())
315 return std::numeric_limits<std::uint64_t>::min();
316 if(a > std::numeric_limits<std::uint64_t>::max())
317 return std::numeric_limits<std::uint64_t>::max();
318 return static_cast<std::uint64_t>(a);
319 };
320 auto toUInt2 = [](long double a) -> std::uint64_t
321 {
322 return static_cast<std::uint64_t>(ExtendedFloat(a));
323 };
324 auto toInt1 = [](long double a) -> std::int64_t
325 {
326 if(std::isnan(a))
327 return 0;
328 if(a < std::numeric_limits<std::int64_t>::min())
329 return std::numeric_limits<std::int64_t>::min();
330 if(a > std::numeric_limits<std::int64_t>::max())
331 return std::numeric_limits<std::int64_t>::max();
332 return static_cast<std::int64_t>(a);
333 };
334 auto toInt2 = [](long double a) -> std::int64_t
335 {
336 return static_cast<std::int64_t>(ExtendedFloat(a));
337 };
338 auto pow1 = [](long double base, int exponent) -> long double
339 {
340 if(exponent < 0)
341 {
342 base = 1 / base;
343 exponent = -exponent;
344 }
345 else if(exponent == 0)
346 return 1;
347 long double retval = 1;
348 for(;;)
349 {
350 if(exponent == 0)
351 return retval;
352 else if(exponent == 1)
353 return retval * base;
354 if(exponent & 1)
355 {
356 retval *= base;
357 }
358 base *= base;
359 exponent >>= 1;
360 }
361 };
362 auto pow2 = [](long double base, int exponent) -> ExtendedFloat
363 {
364 return pow(ExtendedFloat(base), static_cast<std::int64_t>(exponent));
365 };
366 auto scalbn1 = [](long double a, std::int64_t exponent) -> long double
367 {
368 return std::scalbln(a, static_cast<long>(exponent));
369 };
370 auto scalbn2 = [](long double a, std::int64_t exponent) -> ExtendedFloat
371 {
372 return scalbn(ExtendedFloat(a), exponent);
373 };
374 auto log2_1 = [](long double a) -> long double
375 {
376 return std::log2(a);
377 };
378 auto log2_2 = [](long double a) -> ExtendedFloat
379 {
380 return log2(ExtendedFloat(a));
381 };
382 auto log10_1 = [](long double a) -> long double
383 {
384 return std::log10(a);
385 };
386 auto log10_2 = [](long double a) -> ExtendedFloat
387 {
388 return log10(ExtendedFloat(a));
389 };
390 const long double NaN = std::numeric_limits<long double>::quiet_NaN();
391 const long double Infinity = std::numeric_limits<long double>::infinity();
392 testCase("add", add1, add2, +0.0L, +0.0L);
393 testCase("add", add1, add2, +0.0L, -0.0L);
394 testCase("add", add1, add2, -0.0L, +0.0L);
395 testCase("add", add1, add2, -0.0L, -0.0L);
396 testCase("add", add1, add2, 0.0L, NaN);
397 testCase("add", add1, add2, NaN, 0.0L);
398 testCase("add", add1, add2, NaN, NaN);
399 testCase("add", add1, add2, +Infinity, +Infinity);
400 testCase("add", add1, add2, +Infinity, -Infinity);
401 testCase("add", add1, add2, -Infinity, +Infinity);
402 testCase("add", add1, add2, -Infinity, -Infinity);
403 testCase("add", add1, add2, 0x1.0000000000000002p0L, -0x1.0p-64L);
404 testCase("add", add1, add2, 0x1.p0L, -0x1.0p-65L);
405 testCase("add", add1, add2, 0x1.p0L, -0x0.Fp-65L);
406 testCase("add", add1, add2, 0x1.p0L, -0x1.1p-65L);
407 testCase("add", add1, add2, 0x1.0000000000000002p0L, -0x2.0p-65L);
408 testCase("add", add1, add2, 0x1.0000000000000002p0L, -0x1.Fp-65L);
409 testCase("add", add1, add2, 0x1.0000000000000002p0L, -0x2.1p-65L);
410 testCase("add", add1, add2, 0x1p-16445L, 0x1p-16445L);
411 testCase("add", add1, add2, 0x1p+16383L, 0x1p+16383L);
412 testCase("mul", mul1, mul2, +0.0L, +0.0L);
413 testCase("mul", mul1, mul2, +0.0L, -0.0L);
414 testCase("mul", mul1, mul2, -0.0L, +0.0L);
415 testCase("mul", mul1, mul2, -0.0L, -0.0L);
416 testCase("mul", mul1, mul2, 0.0L, NaN);
417 testCase("mul", mul1, mul2, NaN, 0.0L);
418 testCase("mul", mul1, mul2, NaN, NaN);
419 testCase("mul", mul1, mul2, +Infinity, +Infinity);
420 testCase("mul", mul1, mul2, +Infinity, -Infinity);
421 testCase("mul", mul1, mul2, -Infinity, +Infinity);
422 testCase("mul", mul1, mul2, -Infinity, -Infinity);
423 testCase("mul", mul1, mul2, 0x1p0L, 0x1p0L);
424 testCase("mul", mul1, mul2, 0x1p16000L, 0x1p383L);
425 testCase("mul", mul1, mul2, 0x1p16000L, 0x1p384L);
426 testCase("mul", mul1, mul2, 0x1p-16000L, 0x1p-445L);
427 testCase("mul", mul1, mul2, 0x1p-16000L, 0x1p-446L);
428 testCase("mul", mul1, mul2, 0x1.0000001p0L, 0x1.000000001p0L);
429 testCase("mul", mul1, mul2, 0x1.0000001p0L, 0x1.0000000018p0L);
430 testCase("mul", mul1, mul2, 0x1.0000001p0L, 0x1.000000002p0L);
431 testCase("mul", mul1, mul2, 0x1.0000001p0L, 0x1.0000000028p0L);
432 testCase("mul", mul1, mul2, 0x1.0000001p0L, 0x1.000000003p0L);
433 testCase("mul", mul1, mul2, 0x1.0000001p0L, 0x1.0000000038p0L);
434 testCase("mul", mul1, mul2, 0x1.0000001p0L, 0x1.000000004p0L);
435 testCase("mul", mul1, mul2, 0x1.0000001p0L, 0x1.0000000048p0L);
436 testCase("mul", mul1, mul2, 0x1.0000001p0L, 0x1.000000005p0L);
437 testCase("mul", mul1, mul2, 0x1.0000001p0L, 0x1.0000000058p0L);
438 testCase("mul", mul1, mul2, 0x1.0000001p0L, 0x1.000000006p0L);
439 testCase("mul", mul1, mul2, 0x1.0000001p0L, 0x1.0000000068p0L);
440 testCase("mul", mul1, mul2, 0x1.0000001p0L, 0x1.000000007p0L);
441 testCase("mul", mul1, mul2, 0x1.0000001p0L, 0x1.0000000078p0L);
442 testCase("mul",
443 mul1,
444 mul2,
445 3.1415926535897932384626433832795L,
446 0.318309886183790671537767526745028724L);
447 testCase("mul",
448 mul1,
449 mul2,
450 2.718281828459045235360287471352662497757L,
451 0.3678794411714423215955237701614608674458L);
452 testCase("div", div1, div2, +0.0L, +0.0L);
453 testCase("div", div1, div2, +1.0L, +0.0L);
454 testCase("div", div1, div2, +1.0L, -0.0L);
455 testCase("div", div1, div2, -1.0L, +0.0L);
456 testCase("div", div1, div2, -1.0L, -0.0L);
457 testCase("div", div1, div2, +0.0L, +1.0L);
458 testCase("div", div1, div2, +0.0L, -1.0L);
459 testCase("div", div1, div2, -0.0L, +1.0L);
460 testCase("div", div1, div2, -0.0L, -1.0L);
461 testCase("div", div1, div2, 0.0L, NaN);
462 testCase("div", div1, div2, NaN, 0.0L);
463 testCase("div", div1, div2, NaN, NaN);
464 testCase("div", div1, div2, +Infinity, +Infinity);
465 testCase("div", div1, div2, +1.0L, +Infinity);
466 testCase("div", div1, div2, +1.0L, -Infinity);
467 testCase("div", div1, div2, -1.0L, +Infinity);
468 testCase("div", div1, div2, -1.0L, -Infinity);
469 testCase("div", div1, div2, 1.0L, 3.0L);
470 testCase("div", div1, div2, 1.0L, 5.0L);
471 testCase("div", div1, div2, 1.0L, 7.0L);
472 testCase("div", div1, div2, 1.0L, 9.0L);
473 testCase("div", div1, div2, 1.0L, 11.0L);
474 testCase("div", div1, div2, 1.0L, 3.1415926535897932384626433832795L);
475 testCase("div", div1, div2, 1.0L, 2.718281828459045235360287471352662497757L);
476 testCase("div", div1, div2, 0x1p16000L, 0x1p-383L);
477 testCase("div", div1, div2, 0x1p16000L, 0x1p-384L);
478 testCase("div", div1, div2, 0x1p-16000L, 0x1p445L);
479 testCase("div", div1, div2, 0x1p-16000L, 0x1p446L);
480 roundTestCases("floor", floor1, floor2);
481 roundTestCases("round", round1, round2);
482 roundTestCases("ceil", ceil1, ceil2);
483 roundTestCases("trunc", trunc1, trunc2);
484 toIntTestCases("uint64", toUInt1, toUInt2);
485 toIntTestCases("int64", toInt1, toInt2);
486 testCase("pow", pow1, pow2, 1.0L, static_cast<std::int64_t>(0));
487 testCase("pow", pow1, pow2, 1.0L, static_cast<std::int64_t>(5000));
488 testCase("pow", pow1, pow2, 1.0L, static_cast<std::int64_t>(-5000));
489 testCase("pow", pow1, pow2, 2.0L, static_cast<std::int64_t>(3000));
490 testCase("pow", pow1, pow2, 2.0L, static_cast<std::int64_t>(-3000));
491 testCase("pow", pow1, pow2, 3.0L, static_cast<std::int64_t>(3000));
492 testCase("pow", pow1, pow2, 3.0L, static_cast<std::int64_t>(-3000));
493 testCase("pow", pow1, pow2, 10.0L, static_cast<std::int64_t>(3000));
494 testCase("pow", pow1, pow2, 10.0L, static_cast<std::int64_t>(-3000));
495 testCase("pow", pow1, pow2, 36.0L, static_cast<std::int64_t>(3000));
496 testCase("pow", pow1, pow2, 36.0L, static_cast<std::int64_t>(-3000));
497 testCase("scalbn", scalbn1, scalbn2, 1.0L, static_cast<std::int64_t>(16384));
498 testCase("scalbn", scalbn1, scalbn2, 1.0L, static_cast<std::int64_t>(16383));
499 testCase("scalbn", scalbn1, scalbn2, 1.0L, static_cast<std::int64_t>(3000));
500 testCase("scalbn", scalbn1, scalbn2, 1.0L, static_cast<std::int64_t>(-3000));
501 testCase("scalbn", scalbn1, scalbn2, 1.0L, static_cast<std::int64_t>(-16383));
502 testCase("scalbn", scalbn1, scalbn2, 1.0L, static_cast<std::int64_t>(-16384));
503 testCase("scalbn", scalbn1, scalbn2, 1.0L, static_cast<std::int64_t>(-16445));
504 testCase("scalbn", scalbn1, scalbn2, 1.0L, static_cast<std::int64_t>(-16446));
505 testCase("log2", log2_1, log2_2, NaN);
506 testCase("log2", log2_1, log2_2, Infinity);
507 testCase("log2", log2_1, log2_2, -Infinity);
508 testCase("log2", log2_1, log2_2, 0.0L);
509 testCase("log2", log2_1, log2_2, -0.0L);
510 testCase("log2", log2_1, log2_2, -1.0L);
511 testCase("log2", log2_1, log2_2, 1.0L);
512 testCase("log2", log2_1, log2_2, 2.0L);
513 testCase("log2", log2_1, log2_2, 0x1.0p-16445L);
514 testCase("log2", log2_1, log2_2, 0x1.0p16383L);
515 testCase("log2", log2_1, log2_2, 3.0L);
516 testCase("log2", log2_1, log2_2, 5.0L);
517 testCase("log2", log2_1, log2_2, 7.0L);
518 testCase("log2", log2_1, log2_2, 9.0L);
519 testCase("log2", log2_1, log2_2, 11.0L);
520 testCase("log2", log2_1, log2_2, 1e100L);
521 testCase("log2", log2_1, log2_2, 1e-1L);
522 testCase("log2", log2_1, log2_2, 1e-2L);
523 testCase("log2", log2_1, log2_2, 1.5L);
524 testCase("log2", log2_1, log2_2, 0.693147180559945309417232121458176568L);
525 testCase("log2", log2_1, log2_2, static_cast<long double>(ExtendedFloat::Log10Of2()));
526 testCase("log2", log2_1, log2_2, static_cast<long double>(ExtendedFloat::LogOf2()));
527 testCase("log10", log10_1, log10_2, 1e1001L);
528 testCase("log10", log10_1, log10_2, 1.5L);
529 }
530 struct Init
531 {
532 Init()
533 {
534 mainFn();
535 std::exit(0);
536 }
537 };
538 Init init;
539 }
540 #endif
541 #else
542 #include <cstdint>
543 #include <cstdlib>
544 #include <iostream>
545 #include <thread>
546 #include <list>
547 #include <sstream>
548 #include <cassert>
549 namespace
550 {
551 unsigned clz8(std::uint8_t v)
552 {
553 return __builtin_clz(v) - __builtin_clz(0x80U);
554 }
555 unsigned ctz8(std::uint8_t v)
556 {
557 return v == 0 ? 8 : __builtin_ctz(v);
558 }
559 struct UInt16 final
560 {
561 std::uint8_t high;
562 std::uint8_t low;
563 explicit UInt16(std::uint8_t low = 0) : high(0), low(low)
564 {
565 }
566 UInt16(std::uint8_t high, std::uint8_t low) : high(high), low(low)
567 {
568 }
569 friend unsigned clz16(UInt16 v)
570 {
571 return v.high == 0 ? 8 + clz8(v.low) : clz8(v.high);
572 }
573 friend unsigned ctz16(UInt16 v)
574 {
575 return v.low == 0 ? 8 + ctz8(v.high) : ctz8(v.low);
576 }
577 static UInt16 mul8x8(std::uint8_t a, std::uint8_t b)
578 {
579 unsigned v = a;
580 v *= b;
581 return UInt16(v >> 8, v & 0xFFU);
582 }
583 static bool addCarry(std::uint8_t a, std::uint8_t b)
584 {
585 return static_cast<std::uint16_t>(a) + b > 0xFFU;
586 }
587 static bool addCarry(std::uint8_t a, std::uint8_t b, bool carry)
588 {
589 return static_cast<unsigned>(a) + b + carry > 0xFFU;
590 }
591 static bool subBorrow(std::uint8_t a, std::uint8_t b)
592 {
593 return a < b;
594 }
595 static bool subBorrow(std::uint8_t a, std::uint8_t b, bool borrow)
596 {
597 return a < b || (a == b && borrow);
598 }
599 friend UInt16 operator+(UInt16 a, UInt16 b)
600 {
601 return UInt16(a.high + b.high + addCarry(a.low, b.low), a.low + b.low);
602 }
603 friend UInt16 operator-(UInt16 a, UInt16 b)
604 {
605 return UInt16(a.high - b.high - subBorrow(a.low, b.low), a.low - b.low);
606 }
607 friend UInt16 operator<<(UInt16 v, unsigned shiftAmount)
608 {
609 return shiftAmount == 0 ? v : shiftAmount < 8 ?
610 UInt16((v.high << shiftAmount) | (v.low >> (8 - shiftAmount)),
611 v.low << shiftAmount) :
612 UInt16(v.low << (shiftAmount - 8), 0);
613 }
614 friend UInt16 operator>>(UInt16 v, unsigned shiftAmount)
615 {
616 return shiftAmount == 0 ? v : shiftAmount < 8 ?
617 UInt16(v.high >> shiftAmount,
618 (v.low >> shiftAmount) | (v.high << (8 - shiftAmount))) :
619 UInt16(v.high >> (shiftAmount - 8));
620 }
621 struct DivModResult8 final
622 {
623 std::uint8_t divResult;
624 std::uint8_t modResult;
625 DivModResult8(std::uint8_t divResult, std::uint8_t modResult)
626 : divResult(divResult), modResult(modResult)
627 {
628 }
629 };
630 static DivModResult8 divMod16x8(UInt16 n, std::uint8_t d)
631 {
632 assert(d != 0);
633 std::uint16_t v = n.high;
634 v <<= 8;
635 v |= n.low;
636 std::uint16_t divResult = v / d;
637 std::uint16_t modResult = v % d;
638 assert(divResult <= 0xFFU);
639 assert(modResult <= 0xFFU);
640 return DivModResult8(divResult, modResult);
641 }
642 struct DivModResult;
643 static DivModResult divMod(UInt16 uIn, UInt16 vIn);
644 static DivModResult divMod2(UInt16 n, UInt16 d);
645 friend bool operator==(UInt16 a, UInt16 b) noexcept
646 {
647 return a.high == b.high && a.low == b.low;
648 }
649 friend bool operator!=(UInt16 a, UInt16 b) noexcept
650 {
651 return a.high != b.high || a.low != b.low;
652 }
653 friend bool operator<(UInt16 a, UInt16 b) noexcept
654 {
655 return a.high < b.high || (a.high == b.high && a.low < b.low);
656 }
657 friend bool operator<=(UInt16 a, UInt16 b) noexcept
658 {
659 return a.high < b.high || (a.high == b.high && a.low <= b.low);
660 }
661 friend bool operator>(UInt16 a, UInt16 b) noexcept
662 {
663 return a.high > b.high || (a.high == b.high && a.low > b.low);
664 }
665 friend bool operator>=(UInt16 a, UInt16 b) noexcept
666 {
667 return a.high > b.high || (a.high == b.high && a.low >= b.low);
668 }
669 };
670 struct UInt16::DivModResult final
671 {
672 UInt16 divResult;
673 UInt16 modResult;
674 DivModResult(UInt16 divResult, UInt16 modResult) : divResult(divResult), modResult(modResult)
675 {
676 }
677 };
678 UInt16::DivModResult UInt16::divMod2(UInt16 n, UInt16 d)
679 {
680 std::uint16_t nv = n.high;
681 nv <<= 8;
682 nv |= n.low;
683 std::uint16_t dv = d.high;
684 dv <<= 8;
685 dv |= d.low;
686 std::uint16_t qv = nv / dv;
687 std::uint16_t rv = nv % dv;
688 return DivModResult(UInt16(qv >> 8, qv & 0xFF), UInt16(rv >> 8, rv & 0xFF));
689 }
690 template <std::size_t NumberSizes,
691 typename Digit,
692 typename DoubleDigit,
693 unsigned DigitBitCount,
694 typename DigitCLZFn>
695 void divMod(const Digit(&numerator)[NumberSizes],
696 const Digit(&denominator)[NumberSizes],
697 Digit(&quotient)[NumberSizes],
698 Digit(&remainder)[NumberSizes])
699 {
700 constexpr Digit DigitMax = (static_cast<DoubleDigit>(1) << DigitBitCount) - 1;
701 static_assert(NumberSizes != 0, "bad size");
702 std::size_t m = NumberSizes;
703 for(std::size_t i = 0; i < NumberSizes; i++)
704 {
705 if(denominator[i] != 0)
706 {
707 m = i;
708 break;
709 }
710 }
711 const std::size_t n = NumberSizes - m;
712 if(n <= 1)
713 {
714 assert(denominator[NumberSizes - 1] != 0);
715 for(std::size_t i = 0; i < NumberSizes - 1; i++)
716 {
717 remainder[i] = 0;
718 }
719 Digit currentRemainder = 0;
720 for(std::size_t i = 0; i < NumberSizes; i++)
721 {
722 DoubleDigit n = currentRemainder;
723 n <<= DigitBitCount;
724 n |= numerator[i];
725 quotient[i] = n / denominator[NumberSizes - 1];
726 currentRemainder = n % denominator[NumberSizes - 1];
727 }
728 remainder[NumberSizes - 1] = currentRemainder;
729 return;
730 }
731 // from algorithm D, section 4.3.1 in Art of Computer Programming volume 2 by Knuth.
732 unsigned log2D = DigitCLZFn()(denominator[m]);
733 Digit u[NumberSizes + 1];
734 u[NumberSizes] = (numerator[NumberSizes - 1] << log2D) & DigitMax;
735 u[0] = ((static_cast<DoubleDigit>(numerator[0]) << log2D) >> DigitBitCount) & DigitMax;
736 for(std::size_t i = 1; i < NumberSizes; i++)
737 {
738 DoubleDigit value = numerator[i - 1];
739 value <<= DigitBitCount;
740 value |= numerator[i];
741 value <<= log2D;
742 u[i] = (value >> DigitBitCount) & DigitMax;
743 }
744 Digit v[NumberSizes + 1] = {};
745 v[n] = (denominator[NumberSizes - 1] << log2D) & DigitMax;
746 for(std::size_t i = 1; i < n; i++)
747 {
748 DoubleDigit value = denominator[m + i - 1];
749 value <<= DigitBitCount;
750 value |= denominator[m + i];
751 value <<= log2D;
752 v[i] = (value >> DigitBitCount) & DigitMax;
753 quotient[i - 1] = 0;
754 }
755 for(std::size_t j = 0; j <= m; j++)
756 {
757 DoubleDigit qHat;
758 if(u[j] == v[1])
759 {
760 qHat = DigitMax;
761 }
762 else
763 {
764 qHat = ((static_cast<DoubleDigit>(u[j]) << DigitBitCount) | u[j + 1]) / v[1];
765 }
766 {
767 DoubleDigit lhs = v[2] * qHat;
768 DoubleDigit rhsHigh =
769 ((static_cast<DoubleDigit>(u[j]) << DigitBitCount) | u[j + 1]) - qHat * v[1];
770 Digit rhsLow = u[j + 2];
771 if(rhsHigh < static_cast<DoubleDigit>(1) << DigitBitCount
772 && lhs > ((rhsHigh << DigitBitCount) | rhsLow))
773 {
774 qHat--;
775 lhs -= v[2];
776 rhsHigh += v[1];
777 if(rhsHigh < static_cast<DoubleDigit>(1) << DigitBitCount
778 && lhs > ((rhsHigh << DigitBitCount) | rhsLow))
779 {
780 qHat--;
781 }
782 }
783 }
784 bool borrow = false;
785 {
786 Digit mulCarry = 0;
787 for(std::size_t i = n; i > 0; i--)
788 {
789 assert(i <= NumberSizes);
790 DoubleDigit product = qHat * v[i] + mulCarry;
791 mulCarry = product >> DigitBitCount;
792 product &= DigitMax;
793 bool prevBorrow = borrow;
794 DoubleDigit digit = u[j + i] - product - prevBorrow;
795 borrow = digit != (digit & DigitMax);
796 digit &= DigitMax;
797 u[j + i] = digit;
798 }
799 bool prevBorrow = borrow;
800 DoubleDigit digit = u[j] - mulCarry - prevBorrow;
801 borrow = digit != (digit & DigitMax);
802 digit &= DigitMax;
803 u[j] = digit;
804 }
805 Digit qj = qHat;
806 if(borrow)
807 {
808 qj--;
809 bool carry = false;
810 for(std::size_t i = n; i > 0; i--)
811 {
812 bool prevCarry = carry;
813 assert(i + j <= NumberSizes);
814 DoubleDigit digit = u[j + i] + v[i] + prevCarry;
815 carry = digit != (digit & DigitMax);
816 digit &= DigitMax;
817 u[j + i] = digit;
818 }
819 u[j] = (u[j] + carry) & DigitMax;
820 }
821 quotient[j + n - 1] = qj;
822 }
823 for(std::size_t i = 0; i < NumberSizes; i++)
824 {
825 DoubleDigit value = u[i];
826 value <<= DigitBitCount;
827 value |= u[i + 1];
828 remainder[i] = value >> log2D;
829 }
830 }
831 struct OpClz4 final
832 {
833 constexpr unsigned operator()(std::uint16_t value) const noexcept
834 {
835 return __builtin_clz(value) - (__builtin_clz(0) - 4);
836 }
837 };
838 UInt16::DivModResult UInt16::divMod(UInt16 uIn, UInt16 vIn)
839 {
840 constexpr std::size_t NumberSizes = 4;
841 typedef std::uint16_t Digit;
842 typedef unsigned DoubleDigit;
843 constexpr unsigned DigitBitCount = 4;
844 Digit numerator[NumberSizes], denominator[NumberSizes], quotient[NumberSizes],
845 remainder[NumberSizes];
846 numerator[0] = uIn.high >> 4;
847 numerator[1] = uIn.high & 0xF;
848 numerator[2] = uIn.low >> 4;
849 numerator[3] = uIn.low & 0xF;
850 denominator[0] = vIn.high >> 4;
851 denominator[1] = vIn.high & 0xF;
852 denominator[2] = vIn.low >> 4;
853 denominator[3] = vIn.low & 0xF;
854 ::divMod<NumberSizes, Digit, DoubleDigit, DigitBitCount, OpClz4>(
855 numerator, denominator, quotient, remainder);
856 return DivModResult(
857 UInt16((quotient[0] << 4) | quotient[1], (quotient[2] << 4) | quotient[3]),
858 UInt16((remainder[0] << 4) | remainder[1], (remainder[2] << 4) | remainder[3]));
859 }
860 void mainFn(std::uint8_t start, std::uint8_t end)
861 {
862 for(unsigned dHigh = start; dHigh <= end; dHigh++)
863 {
864 if(start == 0)
865 {
866 std::ostringstream ss;
867 ss << dHigh * 100 / (end + 1) << "%\n";
868 std::cout << ss.str() << std::flush;
869 }
870 for(unsigned dLow = 0; dLow < 0x100U; dLow++)
871 {
872 UInt16 d(dHigh, dLow);
873 if(d == UInt16(0))
874 continue;
875 #if 0
876 if(d < UInt16(2, 0))
877 continue;
878 #endif
879 for(unsigned nHigh = 0; nHigh < 0x100U; nHigh++)
880 {
881 for(unsigned nLow = 0; nLow < 0x100U; nLow++)
882 {
883 UInt16 n(nHigh, nLow);
884 auto result = UInt16::divMod(n, d);
885 auto result2 = UInt16::divMod2(n, d);
886 if(result.divResult != result2.divResult
887 || result.modResult != result2.modResult)
888 {
889 std::ostringstream ss;
890 ss << std::hex << std::uppercase;
891 ss.fill('0');
892 ss.width(2);
893 ss << static_cast<unsigned>(n.high);
894 ss.width(2);
895 ss << static_cast<unsigned>(n.low);
896 ss << " / ";
897 ss.width(2);
898 ss << static_cast<unsigned>(d.high);
899 ss.width(2);
900 ss << static_cast<unsigned>(d.low);
901 ss << " == ";
902 ss.width(2);
903 ss << static_cast<unsigned>(result.divResult.high);
904 ss.width(2);
905 ss << static_cast<unsigned>(result.divResult.low);
906 ss << ", ";
907 ss.width(2);
908 ss << static_cast<unsigned>(result2.divResult.high);
909 ss.width(2);
910 ss << static_cast<unsigned>(result2.divResult.low);
911 ss << std::endl;
912 ss.width(2);
913 ss << static_cast<unsigned>(n.high);
914 ss.width(2);
915 ss << static_cast<unsigned>(n.low);
916 ss << " % ";
917 ss.width(2);
918 ss << static_cast<unsigned>(d.high);
919 ss.width(2);
920 ss << static_cast<unsigned>(d.low);
921 ss << " == ";
922 ss.width(2);
923 ss << static_cast<unsigned>(result.modResult.high);
924 ss.width(2);
925 ss << static_cast<unsigned>(result.modResult.low);
926 ss << ", ";
927 ss.width(2);
928 ss << static_cast<unsigned>(result2.modResult.high);
929 ss.width(2);
930 ss << static_cast<unsigned>(result2.modResult.low);
931 std::cout << ss.str() << std::endl;
932 return;
933 }
934 }
935 }
936 }
937 }
938 }
939 struct Init
940 {
941 Init()
942 {
943 const std::size_t splitCount = 6;
944 std::list<std::thread> threads;
945 for(std::size_t i = 0; i < splitCount; i++)
946 {
947 auto start = i * 0x100 / splitCount;
948 auto end = (i + 1) * 0x100 / splitCount - 1;
949 threads.push_back(std::thread([=]()
950 {
951 mainFn(start, end);
952 }));
953 }
954 for(std::thread &thread : threads)
955 thread.join();
956 std::exit(0);
957 }
958 };
959 Init init;
960 }
961 #endif