src/util/soft_float.cpp

   1 /*
   2  * Copyright 2016-2017 Jacob Lifshay
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a copy
   5  * of this software and associated documentation files (the "Software"), to deal
   6  * in the Software without restriction, including without limitation the rights
   7  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
   8  * copies of the Software, and to permit persons to whom the Software is
   9  * furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice shall be included in all
  12  * copies or substantial portions of the Software.
  13  *
  14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  17  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  18  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  19  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  20  * SOFTWARE.
  21  *
  22  */
  23
  24 // derived from
  25 // https://github.com/programmerjake/javascript-tasklets/blob/master/javascript_tasklets/soft_float.cpp
  26
  27 #if 1
  28 #include "soft_float.h"
  29 #if 0
  30 #include <iostream>
  31 #include <cstdlib>
  32 #include <string>
  33 #include <sstream>
  34 #include <cstdio>
  35 namespace
  36 {
  37 using namespace vulkan_cpu::util::soft_float;
  38 std::string hexValue(const ExtendedFloat &v)
  39 {
  40     if(v.isNaN())
  41     {
  42         return "NaN";
  43     }
  44     if(v.isInfinite())
  45     {
  46         if(v.sign)
  47             return "-Infinity";
  48         return "+Infinity";
  49     }
  50     std::ostringstream ss;
  51     ss << std::hex << std::uppercase;
  52     ss.fill('0');
  53     if(v.sign)
  54         ss << "-";
  55     else
  56         ss << "+";
  57     ss << "0x";
  58     std::int32_t exponent = v.exponent;
  59     exponent -= ExtendedFloat::exponentBias();
  60     if(v.isZero())
  61         exponent = 0;
  62     std::uint64_t mantissa = v.mantissa;
  63     unsigned firstDigitBits = 1 + (exponent & 3);
  64     ss << (mantissa >> (64 - firstDigitBits));
  65     mantissa <<= firstDigitBits;
  66     exponent &= ~3;
  67     ss << ".";
  68     ss.width(16);
  69     ss << mantissa;
  70     ss << "p";
  71     ss << std::dec << std::showpos;
  72     ss << exponent;
  73     return ss.str();
  74 }
  75 std::string hexValue(long double v)
  76 {
  77     if(std::isnan(v))
  78     {
  79         return "NaN";
  80     }
  81     if(std::isinf(v))
  82     {
  83         if(v < 0)
  84             return "-Infinity";
  85         return "+Infinity";
  86     }
  87     const std::size_t strSize = 64;
  88     char str[strSize];
  89     std::snprintf(str, sizeof(str), "%+1.16LA", v);
  90     for(char &ch : str)
  91     {
  92         if(ch == '\0')
  93             break;
  94         if(ch == 'X')
  95             ch = 'x';
  96         else if(ch == 'P')
  97             ch = 'p';
  98     }
  99     return str;
 100 }
 101 std::string hexValue(std::int64_t v)
 102 {
 103     std::ostringstream ss;
 104     ss << std::hex << std::uppercase;
 105     ss.fill('0');
 106     if(v < 0)
 107         ss << "-";
 108     else
 109         ss << "+";
 110     ss << "0x";
 111     ss.width(16);
 112     if(v < 0)
 113         ss << -static_cast<std::uint64_t>(v);
 114     else
 115         ss << static_cast<std::uint64_t>(v);
 116     return ss.str();
 117 }
 118 std::string hexValue(std::uint64_t v)
 119 {
 120     std::ostringstream ss;
 121     ss << std::hex << std::uppercase;
 122     ss.fill('0');
 123     ss << "0x";
 124     ss.width(16);
 125     ss << static_cast<std::uint64_t>(v);
 126     return ss.str();
 127 }
 128 bool sameValue(long double a, long double b)
 129 {
 130     if(std::isnan(a))
 131         return std::isnan(b);
 132     if(a == 0)
 133     {
 134         return b == 0 && std::signbit(a) == std::signbit(b);
 135     }
 136     return a == b;
 137 }
 138 void writeArgs()
 139 {
 140 }
 141 template <typename Arg, typename... Args>
 142 void writeArgs(Arg arg, Args... args)
 143 {
 144     std::cout << " " << hexValue(arg);
 145     writeArgs(args...);
 146 }
 147 constexpr bool displayPassedTests = true;
 148 template <typename TestFn1, typename TestFn2, typename... Args>
 149 void testCase(const char *name, TestFn1 &&testFn1, TestFn2 &&testFn2, Args... args)
 150 {
 151     long double result1 = static_cast<long double>(testFn1(args...));
 152     long double result2 = static_cast<long double>(testFn2(args...));
 153     if(!sameValue(result1, result2))
 154     {
 155         std::cout << name;
 156         writeArgs(args...);
 157         std::cout << " -> ";
 158         std::cout << hexValue(result1) << " != " << hexValue(result2) << std::endl;
 159     }
 160     else if(displayPassedTests)
 161     {
 162         std::cout << name;
 163         writeArgs(args...);
 164         std::cout << " -> ";
 165         std::cout << hexValue(result1) << std::endl;
 166     }
 167 }
 168 template <typename TestFn1, typename TestFn2, typename... Args>
 169 void testCaseI(const char *name, TestFn1 &&testFn1, TestFn2 &&testFn2, Args... args)
 170 {
 171     auto result1 = testFn1(args...);
 172     auto result2 = testFn2(args...);
 173     if(result1 != result2)
 174     {
 175         std::cout << name;
 176         writeArgs(args...);
 177         std::cout << " -> ";
 178         std::cout << hexValue(result1) << " != " << hexValue(result2) << std::endl;
 179     }
 180     else if(displayPassedTests)
 181     {
 182         std::cout << name;
 183         writeArgs(args...);
 184         std::cout << " -> ";
 185         std::cout << hexValue(result1) << std::endl;
 186     }
 187 }
 188 template <typename TestFn1, typename TestFn2, typename... Args>
 189 void roundTestCases(const char *name, TestFn1 &&testFn1, TestFn2 &&testFn2)
 190 {
 191     const long double NaN = std::numeric_limits<long double>::quiet_NaN();
 192     const long double Infinity = std::numeric_limits<long double>::infinity();
 193     auto testBothSigns = [&](long double value)
 194     {
 195         testCase(name, testFn1, testFn2, value);
 196         testCase(name, testFn1, testFn2, -value);
 197     };
 198     testCase(name, testFn1, testFn2, NaN);
 199     testBothSigns(0.0L);
 200     testBothSigns(Infinity);
 201     testBothSigns(1.0L);
 202     testBothSigns(0x1.0p-1L);
 203     testBothSigns(0x1.8p0L);
 204     testBothSigns(0x1.Fp0L);
 205     testBothSigns(0x1.Fp-30L);
 206     testBothSigns(0x1.Fp30L);
 207     testBothSigns(0x1.Fp62L);
 208     testBothSigns(0x1.Fp63L);
 209     testBothSigns(0x1.Fp64L);
 210     testBothSigns(0x1.Fp65L);
 211     testBothSigns(0x1.Fp62L + 0.5L);
 212     testBothSigns(0x1.Fp63L + 0.5L);
 213     testBothSigns(0x1.Fp64L + 0.5L);
 214     testBothSigns(0x1.Fp65L + 0.5L);
 215     testBothSigns(0x1.Fp62L + 1);
 216     testBothSigns(0x1.Fp63L + 1);
 217     testBothSigns(0x1.Fp64L + 1);
 218     testBothSigns(0x1.Fp65L + 1);
 219 }
 220 template <typename TestFn1, typename TestFn2, typename... Args>
 221 void toIntTestCases(const char *name, TestFn1 &&testFn1, TestFn2 &&testFn2)
 222 {
 223     const long double NaN = std::numeric_limits<long double>::quiet_NaN();
 224     const long double Infinity = std::numeric_limits<long double>::infinity();
 225     auto testBothSigns = [&](long double value)
 226     {
 227         testCaseI(name, testFn1, testFn2, value);
 228         testCaseI(name, testFn1, testFn2, -value);
 229     };
 230     testCaseI(name, testFn1, testFn2, NaN);
 231     testBothSigns(0.0L);
 232     testBothSigns(Infinity);
 233     testBothSigns(1.0L);
 234     testBothSigns(0x1.0p-1L);
 235     testBothSigns(0x1.8p0L);
 236     testBothSigns(0x1.Fp0L);
 237     testBothSigns(0x1.Fp-30L);
 238     testBothSigns(0x1.Fp30L);
 239     testBothSigns(0x1.Fp62L);
 240     testBothSigns(0x1.Fp63L);
 241     testBothSigns(0x1.Fp64L);
 242     testBothSigns(0x1.Fp65L);
 243     testBothSigns(0x1.Fp62L + 0.5L);
 244     testBothSigns(0x1.Fp63L + 0.5L);
 245     testBothSigns(0x1.Fp64L + 0.5L);
 246     testBothSigns(0x1.Fp65L + 0.5L);
 247     testBothSigns(0x1.Fp62L + 1);
 248     testBothSigns(0x1.Fp63L + 1);
 249     testBothSigns(0x1.Fp64L + 1);
 250     testBothSigns(0x1.Fp65L + 1);
 251 }
 252 void mainFn()
 253 {
 254     auto add1 = [](long double a, long double b) -> long double
 255     {
 256         return a + b;
 257     };
 258     auto add2 = [](long double a, long double b) -> ExtendedFloat
 259     {
 260         return ExtendedFloat(a) + ExtendedFloat(b);
 261     };
 262     auto mul1 = [](long double a, long double b) -> long double
 263     {
 264         return a * b;
 265     };
 266     auto mul2 = [](long double a, long double b) -> ExtendedFloat
 267     {
 268         return ExtendedFloat(a) * ExtendedFloat(b);
 269     };
 270     auto div1 = [](long double a, long double b) -> long double
 271     {
 272         return a / b;
 273     };
 274     auto div2 = [](long double a, long double b) -> ExtendedFloat
 275     {
 276         return ExtendedFloat(a) / ExtendedFloat(b);
 277     };
 278     auto floor1 = [](long double a) -> long double
 279     {
 280         return std::floor(a);
 281     };
 282     auto floor2 = [](long double a) -> ExtendedFloat
 283     {
 284         return floor(ExtendedFloat(a));
 285     };
 286     auto ceil1 = [](long double a) -> long double
 287     {
 288         return std::ceil(a);
 289     };
 290     auto ceil2 = [](long double a) -> ExtendedFloat
 291     {
 292         return ceil(ExtendedFloat(a));
 293     };
 294     auto round1 = [](long double a) -> long double
 295     {
 296         return std::round(a);
 297     };
 298     auto round2 = [](long double a) -> ExtendedFloat
 299     {
 300         return round(ExtendedFloat(a));
 301     };
 302     auto trunc1 = [](long double a) -> long double
 303     {
 304         return std::trunc(a);
 305     };
 306     auto trunc2 = [](long double a) -> ExtendedFloat
 307     {
 308         return trunc(ExtendedFloat(a));
 309     };
 310     auto toUInt1 = [](long double a) -> std::uint64_t
 311     {
 312         if(std::isnan(a))
 313             return 0;
 314         if(a < std::numeric_limits<std::uint64_t>::min())
 315             return std::numeric_limits<std::uint64_t>::min();
 316         if(a > std::numeric_limits<std::uint64_t>::max())
 317             return std::numeric_limits<std::uint64_t>::max();
 318         return static_cast<std::uint64_t>(a);
 319     };
 320     auto toUInt2 = [](long double a) -> std::uint64_t
 321     {
 322         return static_cast<std::uint64_t>(ExtendedFloat(a));
 323     };
 324     auto toInt1 = [](long double a) -> std::int64_t
 325     {
 326         if(std::isnan(a))
 327             return 0;
 328         if(a < std::numeric_limits<std::int64_t>::min())
 329             return std::numeric_limits<std::int64_t>::min();
 330         if(a > std::numeric_limits<std::int64_t>::max())
 331             return std::numeric_limits<std::int64_t>::max();
 332         return static_cast<std::int64_t>(a);
 333     };
 334     auto toInt2 = [](long double a) -> std::int64_t
 335     {
 336         return static_cast<std::int64_t>(ExtendedFloat(a));
 337     };
 338     auto pow1 = [](long double base, int exponent) -> long double
 339     {
 340         if(exponent < 0)
 341         {
 342             base = 1 / base;
 343             exponent = -exponent;
 344         }
 345         else if(exponent == 0)
 346             return 1;
 347         long double retval = 1;
 348         for(;;)
 349         {
 350             if(exponent == 0)
 351                 return retval;
 352             else if(exponent == 1)
 353                 return retval * base;
 354             if(exponent & 1)
 355             {
 356                 retval *= base;
 357             }
 358             base *= base;
 359             exponent >>= 1;
 360         }
 361     };
 362     auto pow2 = [](long double base, int exponent) -> ExtendedFloat
 363     {
 364         return pow(ExtendedFloat(base), static_cast<std::int64_t>(exponent));
 365     };
 366     auto scalbn1 = [](long double a, std::int64_t exponent) -> long double
 367     {
 368         return std::scalbln(a, static_cast<long>(exponent));
 369     };
 370     auto scalbn2 = [](long double a, std::int64_t exponent) -> ExtendedFloat
 371     {
 372         return scalbn(ExtendedFloat(a), exponent);
 373     };
 374     auto log2_1 = [](long double a) -> long double
 375     {
 376         return std::log2(a);
 377     };
 378     auto log2_2 = [](long double a) -> ExtendedFloat
 379     {
 380         return log2(ExtendedFloat(a));
 381     };
 382     auto log10_1 = [](long double a) -> long double
 383     {
 384         return std::log10(a);
 385     };
 386     auto log10_2 = [](long double a) -> ExtendedFloat
 387     {
 388         return log10(ExtendedFloat(a));
 389     };
 390     const long double NaN = std::numeric_limits<long double>::quiet_NaN();
 391     const long double Infinity = std::numeric_limits<long double>::infinity();
 392     testCase("add", add1, add2, +0.0L, +0.0L);
 393     testCase("add", add1, add2, +0.0L, -0.0L);
 394     testCase("add", add1, add2, -0.0L, +0.0L);
 395     testCase("add", add1, add2, -0.0L, -0.0L);
 396     testCase("add", add1, add2, 0.0L, NaN);
 397     testCase("add", add1, add2, NaN, 0.0L);
 398     testCase("add", add1, add2, NaN, NaN);
 399     testCase("add", add1, add2, +Infinity, +Infinity);
 400     testCase("add", add1, add2, +Infinity, -Infinity);
 401     testCase("add", add1, add2, -Infinity, +Infinity);
 402     testCase("add", add1, add2, -Infinity, -Infinity);
 403     testCase("add", add1, add2, 0x1.0000000000000002p0L, -0x1.0p-64L);
 404     testCase("add", add1, add2, 0x1.p0L, -0x1.0p-65L);
 405     testCase("add", add1, add2, 0x1.p0L, -0x0.Fp-65L);
 406     testCase("add", add1, add2, 0x1.p0L, -0x1.1p-65L);
 407     testCase("add", add1, add2, 0x1.0000000000000002p0L, -0x2.0p-65L);
 408     testCase("add", add1, add2, 0x1.0000000000000002p0L, -0x1.Fp-65L);
 409     testCase("add", add1, add2, 0x1.0000000000000002p0L, -0x2.1p-65L);
 410     testCase("add", add1, add2, 0x1p-16445L, 0x1p-16445L);
 411     testCase("add", add1, add2, 0x1p+16383L, 0x1p+16383L);
 412     testCase("mul", mul1, mul2, +0.0L, +0.0L);
 413     testCase("mul", mul1, mul2, +0.0L, -0.0L);
 414     testCase("mul", mul1, mul2, -0.0L, +0.0L);
 415     testCase("mul", mul1, mul2, -0.0L, -0.0L);
 416     testCase("mul", mul1, mul2, 0.0L, NaN);
 417     testCase("mul", mul1, mul2, NaN, 0.0L);
 418     testCase("mul", mul1, mul2, NaN, NaN);
 419     testCase("mul", mul1, mul2, +Infinity, +Infinity);
 420     testCase("mul", mul1, mul2, +Infinity, -Infinity);
 421     testCase("mul", mul1, mul2, -Infinity, +Infinity);
 422     testCase("mul", mul1, mul2, -Infinity, -Infinity);
 423     testCase("mul", mul1, mul2, 0x1p0L, 0x1p0L);
 424     testCase("mul", mul1, mul2, 0x1p16000L, 0x1p383L);
 425     testCase("mul", mul1, mul2, 0x1p16000L, 0x1p384L);
 426     testCase("mul", mul1, mul2, 0x1p-16000L, 0x1p-445L);
 427     testCase("mul", mul1, mul2, 0x1p-16000L, 0x1p-446L);
 428     testCase("mul", mul1, mul2, 0x1.0000001p0L, 0x1.000000001p0L);
 429     testCase("mul", mul1, mul2, 0x1.0000001p0L, 0x1.0000000018p0L);
 430     testCase("mul", mul1, mul2, 0x1.0000001p0L, 0x1.000000002p0L);
 431     testCase("mul", mul1, mul2, 0x1.0000001p0L, 0x1.0000000028p0L);
 432     testCase("mul", mul1, mul2, 0x1.0000001p0L, 0x1.000000003p0L);
 433     testCase("mul", mul1, mul2, 0x1.0000001p0L, 0x1.0000000038p0L);
 434     testCase("mul", mul1, mul2, 0x1.0000001p0L, 0x1.000000004p0L);
 435     testCase("mul", mul1, mul2, 0x1.0000001p0L, 0x1.0000000048p0L);
 436     testCase("mul", mul1, mul2, 0x1.0000001p0L, 0x1.000000005p0L);
 437     testCase("mul", mul1, mul2, 0x1.0000001p0L, 0x1.0000000058p0L);
 438     testCase("mul", mul1, mul2, 0x1.0000001p0L, 0x1.000000006p0L);
 439     testCase("mul", mul1, mul2, 0x1.0000001p0L, 0x1.0000000068p0L);
 440     testCase("mul", mul1, mul2, 0x1.0000001p0L, 0x1.000000007p0L);
 441     testCase("mul", mul1, mul2, 0x1.0000001p0L, 0x1.0000000078p0L);
 442     testCase("mul",
 443              mul1,
 444              mul2,
 445              3.1415926535897932384626433832795L,
 446              0.318309886183790671537767526745028724L);
 447     testCase("mul",
 448              mul1,
 449              mul2,
 450              2.718281828459045235360287471352662497757L,
 451              0.3678794411714423215955237701614608674458L);
 452     testCase("div", div1, div2, +0.0L, +0.0L);
 453     testCase("div", div1, div2, +1.0L, +0.0L);
 454     testCase("div", div1, div2, +1.0L, -0.0L);
 455     testCase("div", div1, div2, -1.0L, +0.0L);
 456     testCase("div", div1, div2, -1.0L, -0.0L);
 457     testCase("div", div1, div2, +0.0L, +1.0L);
 458     testCase("div", div1, div2, +0.0L, -1.0L);
 459     testCase("div", div1, div2, -0.0L, +1.0L);
 460     testCase("div", div1, div2, -0.0L, -1.0L);
 461     testCase("div", div1, div2, 0.0L, NaN);
 462     testCase("div", div1, div2, NaN, 0.0L);
 463     testCase("div", div1, div2, NaN, NaN);
 464     testCase("div", div1, div2, +Infinity, +Infinity);
 465     testCase("div", div1, div2, +1.0L, +Infinity);
 466     testCase("div", div1, div2, +1.0L, -Infinity);
 467     testCase("div", div1, div2, -1.0L, +Infinity);
 468     testCase("div", div1, div2, -1.0L, -Infinity);
 469     testCase("div", div1, div2, 1.0L, 3.0L);
 470     testCase("div", div1, div2, 1.0L, 5.0L);
 471     testCase("div", div1, div2, 1.0L, 7.0L);
 472     testCase("div", div1, div2, 1.0L, 9.0L);
 473     testCase("div", div1, div2, 1.0L, 11.0L);
 474     testCase("div", div1, div2, 1.0L, 3.1415926535897932384626433832795L);
 475     testCase("div", div1, div2, 1.0L, 2.718281828459045235360287471352662497757L);
 476     testCase("div", div1, div2, 0x1p16000L, 0x1p-383L);
 477     testCase("div", div1, div2, 0x1p16000L, 0x1p-384L);
 478     testCase("div", div1, div2, 0x1p-16000L, 0x1p445L);
 479     testCase("div", div1, div2, 0x1p-16000L, 0x1p446L);
 480     roundTestCases("floor", floor1, floor2);
 481     roundTestCases("round", round1, round2);
 482     roundTestCases("ceil", ceil1, ceil2);
 483     roundTestCases("trunc", trunc1, trunc2);
 484     toIntTestCases("uint64", toUInt1, toUInt2);
 485     toIntTestCases("int64", toInt1, toInt2);
 486     testCase("pow", pow1, pow2, 1.0L, static_cast<std::int64_t>(0));
 487     testCase("pow", pow1, pow2, 1.0L, static_cast<std::int64_t>(5000));
 488     testCase("pow", pow1, pow2, 1.0L, static_cast<std::int64_t>(-5000));
 489     testCase("pow", pow1, pow2, 2.0L, static_cast<std::int64_t>(3000));
 490     testCase("pow", pow1, pow2, 2.0L, static_cast<std::int64_t>(-3000));
 491     testCase("pow", pow1, pow2, 3.0L, static_cast<std::int64_t>(3000));
 492     testCase("pow", pow1, pow2, 3.0L, static_cast<std::int64_t>(-3000));
 493     testCase("pow", pow1, pow2, 10.0L, static_cast<std::int64_t>(3000));
 494     testCase("pow", pow1, pow2, 10.0L, static_cast<std::int64_t>(-3000));
 495     testCase("pow", pow1, pow2, 36.0L, static_cast<std::int64_t>(3000));
 496     testCase("pow", pow1, pow2, 36.0L, static_cast<std::int64_t>(-3000));
 497     testCase("scalbn", scalbn1, scalbn2, 1.0L, static_cast<std::int64_t>(16384));
 498     testCase("scalbn", scalbn1, scalbn2, 1.0L, static_cast<std::int64_t>(16383));
 499     testCase("scalbn", scalbn1, scalbn2, 1.0L, static_cast<std::int64_t>(3000));
 500     testCase("scalbn", scalbn1, scalbn2, 1.0L, static_cast<std::int64_t>(-3000));
 501     testCase("scalbn", scalbn1, scalbn2, 1.0L, static_cast<std::int64_t>(-16383));
 502     testCase("scalbn", scalbn1, scalbn2, 1.0L, static_cast<std::int64_t>(-16384));
 503     testCase("scalbn", scalbn1, scalbn2, 1.0L, static_cast<std::int64_t>(-16445));
 504     testCase("scalbn", scalbn1, scalbn2, 1.0L, static_cast<std::int64_t>(-16446));
 505     testCase("log2", log2_1, log2_2, NaN);
 506     testCase("log2", log2_1, log2_2, Infinity);
 507     testCase("log2", log2_1, log2_2, -Infinity);
 508     testCase("log2", log2_1, log2_2, 0.0L);
 509     testCase("log2", log2_1, log2_2, -0.0L);
 510     testCase("log2", log2_1, log2_2, -1.0L);
 511     testCase("log2", log2_1, log2_2, 1.0L);
 512     testCase("log2", log2_1, log2_2, 2.0L);
 513     testCase("log2", log2_1, log2_2, 0x1.0p-16445L);
 514     testCase("log2", log2_1, log2_2, 0x1.0p16383L);
 515     testCase("log2", log2_1, log2_2, 3.0L);
 516     testCase("log2", log2_1, log2_2, 5.0L);
 517     testCase("log2", log2_1, log2_2, 7.0L);
 518     testCase("log2", log2_1, log2_2, 9.0L);
 519     testCase("log2", log2_1, log2_2, 11.0L);
 520     testCase("log2", log2_1, log2_2, 1e100L);
 521     testCase("log2", log2_1, log2_2, 1e-1L);
 522     testCase("log2", log2_1, log2_2, 1e-2L);
 523     testCase("log2", log2_1, log2_2, 1.5L);
 524     testCase("log2", log2_1, log2_2, 0.693147180559945309417232121458176568L);
 525     testCase("log2", log2_1, log2_2, static_cast<long double>(ExtendedFloat::Log10Of2()));
 526     testCase("log2", log2_1, log2_2, static_cast<long double>(ExtendedFloat::LogOf2()));
 527     testCase("log10", log10_1, log10_2, 1e1001L);
 528     testCase("log10", log10_1, log10_2, 1.5L);
 529 }
 530 struct Init
 531 {
 532     Init()
 533     {
 534         mainFn();
 535         std::exit(0);
 536     }
 537 };
 538 Init init;
 539 }
 540 #endif
 541 #else
 542 #include <cstdint>
 543 #include <cstdlib>
 544 #include <iostream>
 545 #include <thread>
 546 #include <list>
 547 #include <sstream>
 548 #include <cassert>
 549 namespace
 550 {
 551 unsigned clz8(std::uint8_t v)
 552 {
 553     return __builtin_clz(v) - __builtin_clz(0x80U);
 554 }
 555 unsigned ctz8(std::uint8_t v)
 556 {
 557     return v == 0 ? 8 : __builtin_ctz(v);
 558 }
 559 struct UInt16 final
 560 {
 561     std::uint8_t high;
 562     std::uint8_t low;
 563     explicit UInt16(std::uint8_t low = 0) : high(0), low(low)
 564     {
 565     }
 566     UInt16(std::uint8_t high, std::uint8_t low) : high(high), low(low)
 567     {
 568     }
 569     friend unsigned clz16(UInt16 v)
 570     {
 571         return v.high == 0 ? 8 + clz8(v.low) : clz8(v.high);
 572     }
 573     friend unsigned ctz16(UInt16 v)
 574     {
 575         return v.low == 0 ? 8 + ctz8(v.high) : ctz8(v.low);
 576     }
 577     static UInt16 mul8x8(std::uint8_t a, std::uint8_t b)
 578     {
 579         unsigned v = a;
 580         v *= b;
 581         return UInt16(v >> 8, v & 0xFFU);
 582     }
 583     static bool addCarry(std::uint8_t a, std::uint8_t b)
 584     {
 585         return static_cast<std::uint16_t>(a) + b > 0xFFU;
 586     }
 587     static bool addCarry(std::uint8_t a, std::uint8_t b, bool carry)
 588     {
 589         return static_cast<unsigned>(a) + b + carry > 0xFFU;
 590     }
 591     static bool subBorrow(std::uint8_t a, std::uint8_t b)
 592     {
 593         return a < b;
 594     }
 595     static bool subBorrow(std::uint8_t a, std::uint8_t b, bool borrow)
 596     {
 597         return a < b || (a == b && borrow);
 598     }
 599     friend UInt16 operator+(UInt16 a, UInt16 b)
 600     {
 601         return UInt16(a.high + b.high + addCarry(a.low, b.low), a.low + b.low);
 602     }
 603     friend UInt16 operator-(UInt16 a, UInt16 b)
 604     {
 605         return UInt16(a.high - b.high - subBorrow(a.low, b.low), a.low - b.low);
 606     }
 607     friend UInt16 operator<<(UInt16 v, unsigned shiftAmount)
 608     {
 609         return shiftAmount == 0 ? v : shiftAmount < 8 ?
 610                                   UInt16((v.high << shiftAmount) | (v.low >> (8 - shiftAmount)),
 611                                          v.low << shiftAmount) :
 612                                   UInt16(v.low << (shiftAmount - 8), 0);
 613     }
 614     friend UInt16 operator>>(UInt16 v, unsigned shiftAmount)
 615     {
 616         return shiftAmount == 0 ? v : shiftAmount < 8 ?
 617                                   UInt16(v.high >> shiftAmount,
 618                                          (v.low >> shiftAmount) | (v.high << (8 - shiftAmount))) :
 619                                   UInt16(v.high >> (shiftAmount - 8));
 620     }
 621     struct DivModResult8 final
 622     {
 623         std::uint8_t divResult;
 624         std::uint8_t modResult;
 625         DivModResult8(std::uint8_t divResult, std::uint8_t modResult)
 626             : divResult(divResult), modResult(modResult)
 627         {
 628         }
 629     };
 630     static DivModResult8 divMod16x8(UInt16 n, std::uint8_t d)
 631     {
 632         assert(d != 0);
 633         std::uint16_t v = n.high;
 634         v <<= 8;
 635         v |= n.low;
 636         std::uint16_t divResult = v / d;
 637         std::uint16_t modResult = v % d;
 638         assert(divResult <= 0xFFU);
 639         assert(modResult <= 0xFFU);
 640         return DivModResult8(divResult, modResult);
 641     }
 642     struct DivModResult;
 643     static DivModResult divMod(UInt16 uIn, UInt16 vIn);
 644     static DivModResult divMod2(UInt16 n, UInt16 d);
 645     friend bool operator==(UInt16 a, UInt16 b) noexcept
 646     {
 647         return a.high == b.high && a.low == b.low;
 648     }
 649     friend bool operator!=(UInt16 a, UInt16 b) noexcept
 650     {
 651         return a.high != b.high || a.low != b.low;
 652     }
 653     friend bool operator<(UInt16 a, UInt16 b) noexcept
 654     {
 655         return a.high < b.high || (a.high == b.high && a.low < b.low);
 656     }
 657     friend bool operator<=(UInt16 a, UInt16 b) noexcept
 658     {
 659         return a.high < b.high || (a.high == b.high && a.low <= b.low);
 660     }
 661     friend bool operator>(UInt16 a, UInt16 b) noexcept
 662     {
 663         return a.high > b.high || (a.high == b.high && a.low > b.low);
 664     }
 665     friend bool operator>=(UInt16 a, UInt16 b) noexcept
 666     {
 667         return a.high > b.high || (a.high == b.high && a.low >= b.low);
 668     }
 669 };
 670 struct UInt16::DivModResult final
 671 {
 672     UInt16 divResult;
 673     UInt16 modResult;
 674     DivModResult(UInt16 divResult, UInt16 modResult) : divResult(divResult), modResult(modResult)
 675     {
 676     }
 677 };
 678 UInt16::DivModResult UInt16::divMod2(UInt16 n, UInt16 d)
 679 {
 680     std::uint16_t nv = n.high;
 681     nv <<= 8;
 682     nv |= n.low;
 683     std::uint16_t dv = d.high;
 684     dv <<= 8;
 685     dv |= d.low;
 686     std::uint16_t qv = nv / dv;
 687     std::uint16_t rv = nv % dv;
 688     return DivModResult(UInt16(qv >> 8, qv & 0xFF), UInt16(rv >> 8, rv & 0xFF));
 689 }
 690 template <std::size_t NumberSizes,
 691           typename Digit,
 692           typename DoubleDigit,
 693           unsigned DigitBitCount,
 694           typename DigitCLZFn>
 695 void divMod(const Digit(&numerator)[NumberSizes],
 696             const Digit(&denominator)[NumberSizes],
 697             Digit(&quotient)[NumberSizes],
 698             Digit(&remainder)[NumberSizes])
 699 {
 700     constexpr Digit DigitMax = (static_cast<DoubleDigit>(1) << DigitBitCount) - 1;
 701     static_assert(NumberSizes != 0, "bad size");
 702     std::size_t m = NumberSizes;
 703     for(std::size_t i = 0; i < NumberSizes; i++)
 704     {
 705         if(denominator[i] != 0)
 706         {
 707             m = i;
 708             break;
 709         }
 710     }
 711     const std::size_t n = NumberSizes - m;
 712     if(n <= 1)
 713     {
 714         assert(denominator[NumberSizes - 1] != 0);
 715         for(std::size_t i = 0; i < NumberSizes - 1; i++)
 716         {
 717             remainder[i] = 0;
 718         }
 719         Digit currentRemainder = 0;
 720         for(std::size_t i = 0; i < NumberSizes; i++)
 721         {
 722             DoubleDigit n = currentRemainder;
 723             n <<= DigitBitCount;
 724             n |= numerator[i];
 725             quotient[i] = n / denominator[NumberSizes - 1];
 726             currentRemainder = n % denominator[NumberSizes - 1];
 727         }
 728         remainder[NumberSizes - 1] = currentRemainder;
 729         return;
 730     }
 731     // from algorithm D, section 4.3.1 in Art of Computer Programming volume 2 by Knuth.
 732     unsigned log2D = DigitCLZFn()(denominator[m]);
 733     Digit u[NumberSizes + 1];
 734     u[NumberSizes] = (numerator[NumberSizes - 1] << log2D) & DigitMax;
 735     u[0] = ((static_cast<DoubleDigit>(numerator[0]) << log2D) >> DigitBitCount) & DigitMax;
 736     for(std::size_t i = 1; i < NumberSizes; i++)
 737     {
 738         DoubleDigit value = numerator[i - 1];
 739         value <<= DigitBitCount;
 740         value |= numerator[i];
 741         value <<= log2D;
 742         u[i] = (value >> DigitBitCount) & DigitMax;
 743     }
 744     Digit v[NumberSizes + 1] = {};
 745     v[n] = (denominator[NumberSizes - 1] << log2D) & DigitMax;
 746     for(std::size_t i = 1; i < n; i++)
 747     {
 748         DoubleDigit value = denominator[m + i - 1];
 749         value <<= DigitBitCount;
 750         value |= denominator[m + i];
 751         value <<= log2D;
 752         v[i] = (value >> DigitBitCount) & DigitMax;
 753         quotient[i - 1] = 0;
 754     }
 755     for(std::size_t j = 0; j <= m; j++)
 756     {
 757         DoubleDigit qHat;
 758         if(u[j] == v[1])
 759         {
 760             qHat = DigitMax;
 761         }
 762         else
 763         {
 764             qHat = ((static_cast<DoubleDigit>(u[j]) << DigitBitCount) | u[j + 1]) / v[1];
 765         }
 766         {
 767             DoubleDigit lhs = v[2] * qHat;
 768             DoubleDigit rhsHigh =
 769                 ((static_cast<DoubleDigit>(u[j]) << DigitBitCount) | u[j + 1]) - qHat * v[1];
 770             Digit rhsLow = u[j + 2];
 771             if(rhsHigh < static_cast<DoubleDigit>(1) << DigitBitCount
 772                && lhs > ((rhsHigh << DigitBitCount) | rhsLow))
 773             {
 774                 qHat--;
 775                 lhs -= v[2];
 776                 rhsHigh += v[1];
 777                 if(rhsHigh < static_cast<DoubleDigit>(1) << DigitBitCount
 778                    && lhs > ((rhsHigh << DigitBitCount) | rhsLow))
 779                 {
 780                     qHat--;
 781                 }
 782             }
 783         }
 784         bool borrow = false;
 785         {
 786             Digit mulCarry = 0;
 787             for(std::size_t i = n; i > 0; i--)
 788             {
 789                 assert(i <= NumberSizes);
 790                 DoubleDigit product = qHat * v[i] + mulCarry;
 791                 mulCarry = product >> DigitBitCount;
 792                 product &= DigitMax;
 793                 bool prevBorrow = borrow;
 794                 DoubleDigit digit = u[j + i] - product - prevBorrow;
 795                 borrow = digit != (digit & DigitMax);
 796                 digit &= DigitMax;
 797                 u[j + i] = digit;
 798             }
 799             bool prevBorrow = borrow;
 800             DoubleDigit digit = u[j] - mulCarry - prevBorrow;
 801             borrow = digit != (digit & DigitMax);
 802             digit &= DigitMax;
 803             u[j] = digit;
 804         }
 805         Digit qj = qHat;
 806         if(borrow)
 807         {
 808             qj--;
 809             bool carry = false;
 810             for(std::size_t i = n; i > 0; i--)
 811             {
 812                 bool prevCarry = carry;
 813                 assert(i + j <= NumberSizes);
 814                 DoubleDigit digit = u[j + i] + v[i] + prevCarry;
 815                 carry = digit != (digit & DigitMax);
 816                 digit &= DigitMax;
 817                 u[j + i] = digit;
 818             }
 819             u[j] = (u[j] + carry) & DigitMax;
 820         }
 821         quotient[j + n - 1] = qj;
 822     }
 823     for(std::size_t i = 0; i < NumberSizes; i++)
 824     {
 825         DoubleDigit value = u[i];
 826         value <<= DigitBitCount;
 827         value |= u[i + 1];
 828         remainder[i] = value >> log2D;
 829     }
 830 }
 831 struct OpClz4 final
 832 {
 833     constexpr unsigned operator()(std::uint16_t value) const noexcept
 834     {
 835         return __builtin_clz(value) - (__builtin_clz(0) - 4);
 836     }
 837 };
 838 UInt16::DivModResult UInt16::divMod(UInt16 uIn, UInt16 vIn)
 839 {
 840     constexpr std::size_t NumberSizes = 4;
 841     typedef std::uint16_t Digit;
 842     typedef unsigned DoubleDigit;
 843     constexpr unsigned DigitBitCount = 4;
 844     Digit numerator[NumberSizes], denominator[NumberSizes], quotient[NumberSizes],
 845         remainder[NumberSizes];
 846     numerator[0] = uIn.high >> 4;
 847     numerator[1] = uIn.high & 0xF;
 848     numerator[2] = uIn.low >> 4;
 849     numerator[3] = uIn.low & 0xF;
 850     denominator[0] = vIn.high >> 4;
 851     denominator[1] = vIn.high & 0xF;
 852     denominator[2] = vIn.low >> 4;
 853     denominator[3] = vIn.low & 0xF;
 854     ::divMod<NumberSizes, Digit, DoubleDigit, DigitBitCount, OpClz4>(
 855         numerator, denominator, quotient, remainder);
 856     return DivModResult(
 857         UInt16((quotient[0] << 4) | quotient[1], (quotient[2] << 4) | quotient[3]),
 858         UInt16((remainder[0] << 4) | remainder[1], (remainder[2] << 4) | remainder[3]));
 859 }
 860 void mainFn(std::uint8_t start, std::uint8_t end)
 861 {
 862     for(unsigned dHigh = start; dHigh <= end; dHigh++)
 863     {
 864         if(start == 0)
 865         {
 866             std::ostringstream ss;
 867             ss << dHigh * 100 / (end + 1) << "%\n";
 868             std::cout << ss.str() << std::flush;
 869         }
 870         for(unsigned dLow = 0; dLow < 0x100U; dLow++)
 871         {
 872             UInt16 d(dHigh, dLow);
 873             if(d == UInt16(0))
 874                 continue;
 875 #if 0
 876             if(d < UInt16(2, 0))
 877                 continue;
 878 #endif
 879             for(unsigned nHigh = 0; nHigh < 0x100U; nHigh++)
 880             {
 881                 for(unsigned nLow = 0; nLow < 0x100U; nLow++)
 882                 {
 883                     UInt16 n(nHigh, nLow);
 884                     auto result = UInt16::divMod(n, d);
 885                     auto result2 = UInt16::divMod2(n, d);
 886                     if(result.divResult != result2.divResult
 887                        || result.modResult != result2.modResult)
 888                     {
 889                         std::ostringstream ss;
 890                         ss << std::hex << std::uppercase;
 891                         ss.fill('0');
 892                         ss.width(2);
 893                         ss << static_cast<unsigned>(n.high);
 894                         ss.width(2);
 895                         ss << static_cast<unsigned>(n.low);
 896                         ss << " / ";
 897                         ss.width(2);
 898                         ss << static_cast<unsigned>(d.high);
 899                         ss.width(2);
 900                         ss << static_cast<unsigned>(d.low);
 901                         ss << " == ";
 902                         ss.width(2);
 903                         ss << static_cast<unsigned>(result.divResult.high);
 904                         ss.width(2);
 905                         ss << static_cast<unsigned>(result.divResult.low);
 906                         ss << ", ";
 907                         ss.width(2);
 908                         ss << static_cast<unsigned>(result2.divResult.high);
 909                         ss.width(2);
 910                         ss << static_cast<unsigned>(result2.divResult.low);
 911                         ss << std::endl;
 912                         ss.width(2);
 913                         ss << static_cast<unsigned>(n.high);
 914                         ss.width(2);
 915                         ss << static_cast<unsigned>(n.low);
 916                         ss << " % ";
 917                         ss.width(2);
 918                         ss << static_cast<unsigned>(d.high);
 919                         ss.width(2);
 920                         ss << static_cast<unsigned>(d.low);
 921                         ss << " == ";
 922                         ss.width(2);
 923                         ss << static_cast<unsigned>(result.modResult.high);
 924                         ss.width(2);
 925                         ss << static_cast<unsigned>(result.modResult.low);
 926                         ss << ", ";
 927                         ss.width(2);
 928                         ss << static_cast<unsigned>(result2.modResult.high);
 929                         ss.width(2);
 930                         ss << static_cast<unsigned>(result2.modResult.low);
 931                         std::cout << ss.str() << std::endl;
 932                         return;
 933                     }
 934                 }
 935             }
 936         }
 937     }
 938 }
 939 struct Init
 940 {
 941     Init()
 942     {
 943         const std::size_t splitCount = 6;
 944         std::list<std::thread> threads;
 945         for(std::size_t i = 0; i < splitCount; i++)
 946         {
 947             auto start = i * 0x100 / splitCount;
 948             auto end = (i + 1) * 0x100 / splitCount - 1;
 949             threads.push_back(std::thread([=]()
 950                                           {
 951                                               mainFn(start, end);
 952                                           }));
 953         }
 954         for(std::thread &thread : threads)
 955             thread.join();
 956         std::exit(0);
 957     }
 958 };
 959 Init init;
 960 }
 961 #endif