src/arch/arm/insts/fplib.cc

   1 /*
   2 * Copyright (c) 2012-2013 ARM Limited
   3 * All rights reserved
   4 *
   5 * The license below extends only to copyright in the software and shall
   6 * not be construed as granting a license to any other intellectual
   7 * property including but not limited to intellectual property relating
   8 * to a hardware implementation of the functionality of the software
   9 * licensed hereunder.  You may use the software subject to the license
  10 * terms below provided that you ensure that this notice is replicated
  11 * unmodified and in its entirety in all distributions of the software,
  12 * modified or unmodified, in source code or in binary form.
  13 *
  14 * Redistribution and use in source and binary forms, with or without
  15 * modification, are permitted provided that the following conditions are
  16 * met: redistributions of source code must retain the above copyright
  17 * notice, this list of conditions and the following disclaimer;
  18 * redistributions in binary form must reproduce the above copyright
  19 * notice, this list of conditions and the following disclaimer in the
  20 * documentation and/or other materials provided with the distribution;
  21 * neither the name of the copyright holders nor the names of its
  22 * contributors may be used to endorse or promote products derived from
  23 * this software without specific prior written permission.
  24 *
  25 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  26 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  27 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  28 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  29 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  30 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  31 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  32 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  33 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  34 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  35 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  36 *
  37 * Authors: Edmund Grimley Evans
  38 *          Thomas Grocutt
  39 */
  40
  41 #include <stdint.h>
  42
  43 #include <cassert>
  44
  45 #include "fplib.hh"
  46
  47 namespace ArmISA
  48 {
  49
  50 #define FPLIB_RN 0
  51 #define FPLIB_RP 1
  52 #define FPLIB_RM 2
  53 #define FPLIB_RZ 3
  54 #define FPLIB_FZ 4
  55 #define FPLIB_DN 8
  56 #define FPLIB_AHP 16
  57
  58 #define FPLIB_IDC 128 // Input Denormal
  59 #define FPLIB_IXC 16  // Inexact
  60 #define FPLIB_UFC 8   // Underflow
  61 #define FPLIB_OFC 4   // Overflow
  62 #define FPLIB_DZC 2   // Division by Zero
  63 #define FPLIB_IOC 1   // Invalid Operation
  64
  65 static inline uint16_t
  66 lsl16(uint16_t x, uint32_t shift)
  67 {
  68     return shift < 16 ? x << shift : 0;
  69 }
  70
  71 static inline uint16_t
  72 lsr16(uint16_t x, uint32_t shift)
  73 {
  74     return shift < 16 ? x >> shift : 0;
  75 }
  76
  77 static inline uint32_t
  78 lsl32(uint32_t x, uint32_t shift)
  79 {
  80     return shift < 32 ? x << shift : 0;
  81 }
  82
  83 static inline uint32_t
  84 lsr32(uint32_t x, uint32_t shift)
  85 {
  86     return shift < 32 ? x >> shift : 0;
  87 }
  88
  89 static inline uint64_t
  90 lsl64(uint64_t x, uint32_t shift)
  91 {
  92     return shift < 64 ? x << shift : 0;
  93 }
  94
  95 static inline uint64_t
  96 lsr64(uint64_t x, uint32_t shift)
  97 {
  98     return shift < 64 ? x >> shift : 0;
  99 }
 100
 101 static inline void
 102 lsl128(uint64_t *r0, uint64_t *r1, uint64_t x0, uint64_t x1, uint32_t shift)
 103 {
 104     if (shift == 0) {
 105         *r1 = x1;
 106         *r0 = x0;
 107     } else if (shift < 64) {
 108         *r1 = x1 << shift | x0 >> (64 - shift);
 109         *r0 = x0 << shift;
 110     } else if (shift < 128) {
 111         *r1 = x0 << (shift - 64);
 112         *r0 = 0;
 113     } else {
 114         *r1 = 0;
 115         *r0 = 0;
 116     }
 117 }
 118
 119 static inline void
 120 lsr128(uint64_t *r0, uint64_t *r1, uint64_t x0, uint64_t x1, uint32_t shift)
 121 {
 122     if (shift == 0) {
 123         *r1 = x1;
 124         *r0 = x0;
 125     } else if (shift < 64) {
 126         *r0 = x0 >> shift | x1 << (64 - shift);
 127         *r1 = x1 >> shift;
 128     } else if (shift < 128) {
 129         *r0 = x1 >> (shift - 64);
 130         *r1 = 0;
 131     } else {
 132         *r0 = 0;
 133         *r1 = 0;
 134     }
 135 }
 136
 137 static inline void
 138 mul62x62(uint64_t *x0, uint64_t *x1, uint64_t a, uint64_t b)
 139 {
 140     uint32_t mask = ((uint32_t)1 << 31) - 1;
 141     uint64_t a0 = a & mask;
 142     uint64_t a1 = a >> 31 & mask;
 143     uint64_t b0 = b & mask;
 144     uint64_t b1 = b >> 31 & mask;
 145     uint64_t p0 = a0 * b0;
 146     uint64_t p2 = a1 * b1;
 147     uint64_t p1 = (a0 + a1) * (b0 + b1) - p0 - p2;
 148     uint64_t s0 = p0;
 149     uint64_t s1 = (s0 >> 31) + p1;
 150     uint64_t s2 = (s1 >> 31) + p2;
 151     *x0 = (s0 & mask) | (s1 & mask) << 31 | s2 << 62;
 152     *x1 = s2 >> 2;
 153 }
 154
 155 static inline
 156 void mul64x32(uint64_t *x0, uint64_t *x1, uint64_t a, uint32_t b)
 157 {
 158     uint64_t t0 = (uint64_t)(uint32_t)a * b;
 159     uint64_t t1 = (t0 >> 32) + (a >> 32) * b;
 160     *x0 = t1 << 32 | (uint32_t)t0;
 161     *x1 = t1 >> 32;
 162 }
 163
 164 static inline void
 165 add128(uint64_t *x0, uint64_t *x1, uint64_t a0, uint64_t a1, uint64_t b0,
 166        uint64_t b1)
 167 {
 168     *x0 = a0 + b0;
 169     *x1 = a1 + b1 + (*x0 < a0);
 170 }
 171
 172 static inline void
 173 sub128(uint64_t *x0, uint64_t *x1, uint64_t a0, uint64_t a1, uint64_t b0,
 174        uint64_t b1)
 175 {
 176     *x0 = a0 - b0;
 177     *x1 = a1 - b1 - (*x0 > a0);
 178 }
 179
 180 static inline int
 181 cmp128(uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1)
 182 {
 183     return (a1 < b1 ? -1 : a1 > b1 ? 1 : a0 < b0 ? -1 : a0 > b0 ? 1 : 0);
 184 }
 185
 186 static inline uint16_t
 187 fp16_normalise(uint16_t mnt, int *exp)
 188 {
 189     int shift;
 190
 191     if (!mnt) {
 192         return 0;
 193     }
 194
 195     for (shift = 8; shift; shift >>= 1) {
 196         if (!(mnt >> (16 - shift))) {
 197             mnt <<= shift;
 198             *exp -= shift;
 199         }
 200     }
 201     return mnt;
 202 }
 203
 204 static inline uint32_t
 205 fp32_normalise(uint32_t mnt, int *exp)
 206 {
 207     int shift;
 208
 209     if (!mnt) {
 210         return 0;
 211     }
 212
 213     for (shift = 16; shift; shift >>= 1) {
 214         if (!(mnt >> (32 - shift))) {
 215             mnt <<= shift;
 216             *exp -= shift;
 217         }
 218     }
 219     return mnt;
 220 }
 221
 222 static inline uint64_t
 223 fp64_normalise(uint64_t mnt, int *exp)
 224 {
 225     int shift;
 226
 227     if (!mnt) {
 228         return 0;
 229     }
 230
 231     for (shift = 32; shift; shift >>= 1) {
 232         if (!(mnt >> (64 - shift))) {
 233             mnt <<= shift;
 234             *exp -= shift;
 235         }
 236     }
 237     return mnt;
 238 }
 239
 240 static inline void
 241 fp128_normalise(uint64_t *mnt0, uint64_t *mnt1, int *exp)
 242 {
 243     uint64_t x0 = *mnt0;
 244     uint64_t x1 = *mnt1;
 245     int shift;
 246
 247     if (!x0 && !x1) {
 248         return;
 249     }
 250
 251     if (!x1) {
 252         x1 = x0;
 253         x0 = 0;
 254         *exp -= 64;
 255     }
 256
 257     for (shift = 32; shift; shift >>= 1) {
 258         if (!(x1 >> (64 - shift))) {
 259             x1 = x1 << shift | x0 >> (64 - shift);
 260             x0 <<= shift;
 261             *exp -= shift;
 262         }
 263     }
 264
 265     *mnt0 = x0;
 266     *mnt1 = x1;
 267 }
 268
 269 static inline uint16_t
 270 fp16_pack(uint16_t sgn, uint16_t exp, uint16_t mnt)
 271 {
 272     return sgn << 15 | exp << 10 | (mnt & (((uint16_t)1 << 10) - 1));
 273 }
 274
 275 static inline uint32_t
 276 fp32_pack(uint32_t sgn, uint32_t exp, uint32_t mnt)
 277 {
 278     return sgn << 31 | exp << 23 | (mnt & (((uint32_t)1 << 23) - 1));
 279 }
 280
 281 static inline uint64_t
 282 fp64_pack(uint64_t sgn, uint64_t exp, uint64_t mnt)
 283 {
 284     return (uint64_t)sgn << 63 | exp << 52 | (mnt & (((uint64_t)1 << 52) - 1));
 285 }
 286
 287 static inline uint16_t
 288 fp16_zero(int sgn)
 289 {
 290     return fp16_pack(sgn, 0, 0);
 291 }
 292
 293 static inline uint32_t
 294 fp32_zero(int sgn)
 295 {
 296     return fp32_pack(sgn, 0, 0);
 297 }
 298
 299 static inline uint64_t
 300 fp64_zero(int sgn)
 301 {
 302     return fp64_pack(sgn, 0, 0);
 303 }
 304
 305 static inline uint16_t
 306 fp16_max_normal(int sgn)
 307 {
 308     return fp16_pack(sgn, 30, -1);
 309 }
 310
 311 static inline uint32_t
 312 fp32_max_normal(int sgn)
 313 {
 314     return fp32_pack(sgn, 254, -1);
 315 }
 316
 317 static inline uint64_t
 318 fp64_max_normal(int sgn)
 319 {
 320     return fp64_pack(sgn, 2046, -1);
 321 }
 322
 323 static inline uint16_t
 324 fp16_infinity(int sgn)
 325 {
 326     return fp16_pack(sgn, 31, 0);
 327 }
 328
 329 static inline uint32_t
 330 fp32_infinity(int sgn)
 331 {
 332     return fp32_pack(sgn, 255, 0);
 333 }
 334
 335 static inline uint64_t
 336 fp64_infinity(int sgn)
 337 {
 338     return fp64_pack(sgn, 2047, 0);
 339 }
 340
 341 static inline uint16_t
 342 fp16_defaultNaN()
 343 {
 344     return fp16_pack(0, 31, (uint16_t)1 << 9);
 345 }
 346
 347 static inline uint32_t
 348 fp32_defaultNaN()
 349 {
 350     return fp32_pack(0, 255, (uint32_t)1 << 22);
 351 }
 352
 353 static inline uint64_t
 354 fp64_defaultNaN()
 355 {
 356     return fp64_pack(0, 2047, (uint64_t)1 << 51);
 357 }
 358
 359 static inline void
 360 fp16_unpack(int *sgn, int *exp, uint16_t *mnt, uint16_t x, int mode,
 361             int *flags)
 362 {
 363     *sgn = x >> 15;
 364     *exp = x >> 10 & 31;
 365     *mnt = x & (((uint16_t)1 << 10) - 1);
 366
 367     // Handle subnormals:
 368     if (*exp) {
 369         *mnt |= (uint16_t)1 << 10;
 370     } else {
 371         ++*exp;
 372         // There is no flush to zero in this case!
 373     }
 374 }
 375
 376 static inline void
 377 fp32_unpack(int *sgn, int *exp, uint32_t *mnt, uint32_t x, int mode,
 378             int *flags)
 379 {
 380     *sgn = x >> 31;
 381     *exp = x >> 23 & 255;
 382     *mnt = x & (((uint32_t)1 << 23) - 1);
 383
 384     // Handle subnormals:
 385     if (*exp) {
 386         *mnt |= (uint32_t)1 << 23;
 387     } else {
 388         ++*exp;
 389         if ((mode & FPLIB_FZ) && *mnt) {
 390             *flags |= FPLIB_IDC;
 391             *mnt = 0;
 392         }
 393     }
 394 }
 395
 396 static inline void
 397 fp64_unpack(int *sgn, int *exp, uint64_t *mnt, uint64_t x, int mode,
 398             int *flags)
 399 {
 400     *sgn = x >> 63;
 401     *exp = x >> 52 & 2047;
 402     *mnt = x & (((uint64_t)1 << 52) - 1);
 403
 404     // Handle subnormals:
 405     if (*exp) {
 406         *mnt |= (uint64_t)1 << 52;
 407     } else {
 408         ++*exp;
 409         if ((mode & FPLIB_FZ) && *mnt) {
 410             *flags |= FPLIB_IDC;
 411             *mnt = 0;
 412         }
 413     }
 414 }
 415
 416 static inline uint32_t
 417 fp32_process_NaN(uint32_t a, int mode, int *flags)
 418 {
 419     if (!(a >> 22 & 1)) {
 420         *flags |= FPLIB_IOC;
 421         a |= (uint32_t)1 << 22;
 422     }
 423     return mode & FPLIB_DN ? fp32_defaultNaN() : a;
 424 }
 425
 426 static inline uint64_t
 427 fp64_process_NaN(uint64_t a, int mode, int *flags)
 428 {
 429     if (!(a >> 51 & 1)) {
 430         *flags |= FPLIB_IOC;
 431         a |= (uint64_t)1 << 51;
 432     }
 433     return mode & FPLIB_DN ? fp64_defaultNaN() : a;
 434 }
 435
 436 static uint32_t
 437 fp32_process_NaNs(uint32_t a, uint32_t b, int mode, int *flags)
 438 {
 439     int a_exp = a >> 23 & 255;
 440     uint32_t a_mnt = a & (((uint32_t)1 << 23) - 1);
 441     int b_exp = b >> 23 & 255;
 442     uint32_t b_mnt = b & (((uint32_t)1 << 23) - 1);
 443
 444     // Handle signalling NaNs:
 445     if (a_exp == 255 && a_mnt && !(a_mnt >> 22 & 1))
 446         return fp32_process_NaN(a, mode, flags);
 447     if (b_exp == 255 && b_mnt && !(b_mnt >> 22 & 1))
 448         return fp32_process_NaN(b, mode, flags);
 449
 450     // Handle quiet NaNs:
 451     if (a_exp == 255 && a_mnt)
 452         return fp32_process_NaN(a, mode, flags);
 453     if (b_exp == 255 && b_mnt)
 454         return fp32_process_NaN(b, mode, flags);
 455
 456     return 0;
 457 }
 458
 459 static uint64_t
 460 fp64_process_NaNs(uint64_t a, uint64_t b, int mode, int *flags)
 461 {
 462     int a_exp = a >> 52 & 2047;
 463     uint64_t a_mnt = a & (((uint64_t)1 << 52) - 1);
 464     int b_exp = b >> 52 & 2047;
 465     uint64_t b_mnt = b & (((uint64_t)1 << 52) - 1);
 466
 467     // Handle signalling NaNs:
 468     if (a_exp == 2047 && a_mnt && !(a_mnt >> 51 & 1))
 469         return fp64_process_NaN(a, mode, flags);
 470     if (b_exp == 2047 && b_mnt && !(b_mnt >> 51 & 1))
 471         return fp64_process_NaN(b, mode, flags);
 472
 473     // Handle quiet NaNs:
 474     if (a_exp == 2047 && a_mnt)
 475         return fp64_process_NaN(a, mode, flags);
 476     if (b_exp == 2047 && b_mnt)
 477         return fp64_process_NaN(b, mode, flags);
 478
 479     return 0;
 480 }
 481
 482 static uint32_t
 483 fp32_process_NaNs3(uint32_t a, uint32_t b, uint32_t c, int mode, int *flags)
 484 {
 485     int a_exp = a >> 23 & 255;
 486     uint32_t a_mnt = a & (((uint32_t)1 << 23) - 1);
 487     int b_exp = b >> 23 & 255;
 488     uint32_t b_mnt = b & (((uint32_t)1 << 23) - 1);
 489     int c_exp = c >> 23 & 255;
 490     uint32_t c_mnt = c & (((uint32_t)1 << 23) - 1);
 491
 492     // Handle signalling NaNs:
 493     if (a_exp == 255 && a_mnt && !(a_mnt >> 22 & 1))
 494         return fp32_process_NaN(a, mode, flags);
 495     if (b_exp == 255 && b_mnt && !(b_mnt >> 22 & 1))
 496         return fp32_process_NaN(b, mode, flags);
 497     if (c_exp == 255 && c_mnt && !(c_mnt >> 22 & 1))
 498         return fp32_process_NaN(c, mode, flags);
 499
 500     // Handle quiet NaNs:
 501     if (a_exp == 255 && a_mnt)
 502         return fp32_process_NaN(a, mode, flags);
 503     if (b_exp == 255 && b_mnt)
 504         return fp32_process_NaN(b, mode, flags);
 505     if (c_exp == 255 && c_mnt)
 506         return fp32_process_NaN(c, mode, flags);
 507
 508     return 0;
 509 }
 510
 511 static uint64_t
 512 fp64_process_NaNs3(uint64_t a, uint64_t b, uint64_t c, int mode, int *flags)
 513 {
 514     int a_exp = a >> 52 & 2047;
 515     uint64_t a_mnt = a & (((uint64_t)1 << 52) - 1);
 516     int b_exp = b >> 52 & 2047;
 517     uint64_t b_mnt = b & (((uint64_t)1 << 52) - 1);
 518     int c_exp = c >> 52 & 2047;
 519     uint64_t c_mnt = c & (((uint64_t)1 << 52) - 1);
 520
 521     // Handle signalling NaNs:
 522     if (a_exp == 2047 && a_mnt && !(a_mnt >> 51 & 1))
 523         return fp64_process_NaN(a, mode, flags);
 524     if (b_exp == 2047 && b_mnt && !(b_mnt >> 51 & 1))
 525         return fp64_process_NaN(b, mode, flags);
 526     if (c_exp == 2047 && c_mnt && !(c_mnt >> 51 & 1))
 527         return fp64_process_NaN(c, mode, flags);
 528
 529     // Handle quiet NaNs:
 530     if (a_exp == 2047 && a_mnt)
 531         return fp64_process_NaN(a, mode, flags);
 532     if (b_exp == 2047 && b_mnt)
 533         return fp64_process_NaN(b, mode, flags);
 534     if (c_exp == 2047 && c_mnt)
 535         return fp64_process_NaN(c, mode, flags);
 536
 537     return 0;
 538 }
 539
 540 static uint16_t
 541 fp16_round_(int sgn, int exp, uint16_t mnt, int rm, int mode, int *flags)
 542 {
 543     int biased_exp; // non-negative exponent value for result
 544     uint16_t int_mant; // mantissa for result, less than (1 << 11)
 545     int error; // 0, 1, 2 or 3, where 2 means int_mant is wrong by exactly 0.5
 546
 547     assert(rm != FPRounding_TIEAWAY);
 548
 549     // There is no flush to zero in this case!
 550
 551     // The bottom 5 bits of mnt are orred together:
 552     mnt = (uint16_t)1 << 12 | mnt >> 4 | ((mnt & 31) != 0);
 553
 554     if (exp > 0) {
 555         biased_exp = exp;
 556         int_mant = mnt >> 2;
 557         error = mnt & 3;
 558     } else {
 559         biased_exp = 0;
 560         int_mant = lsr16(mnt, 3 - exp);
 561         error = (lsr16(mnt, 1 - exp) & 3) | !!(mnt & (lsl16(1, 1 - exp) - 1));
 562     }
 563
 564     if (!biased_exp && error) { // xx should also check fpscr_val<11>
 565         *flags |= FPLIB_UFC;
 566     }
 567
 568     // Round up:
 569     if ((rm == FPLIB_RN && (error == 3 ||
 570                             (error == 2 && (int_mant & 1)))) ||
 571         (((rm == FPLIB_RP && !sgn) || (rm == FPLIB_RM && sgn)) && error)) {
 572         ++int_mant;
 573         if (int_mant == (uint32_t)1 << 10) {
 574             // Rounded up from denormalized to normalized
 575             biased_exp = 1;
 576         }
 577         if (int_mant == (uint32_t)1 << 11) {
 578             // Rounded up to next exponent
 579             ++biased_exp;
 580             int_mant >>= 1;
 581         }
 582     }
 583
 584     // Handle rounding to odd aka Von Neumann rounding:
 585     if (error && rm == FPRounding_ODD)
 586         int_mant |= 1;
 587
 588     // Handle overflow:
 589     if (!(mode & FPLIB_AHP)) {
 590         if (biased_exp >= 31) {
 591             *flags |= FPLIB_OFC | FPLIB_IXC;
 592             if (rm == FPLIB_RN || (rm == FPLIB_RP && !sgn) ||
 593                 (rm == FPLIB_RM && sgn)) {
 594                 return fp16_infinity(sgn);
 595             } else {
 596                 return fp16_max_normal(sgn);
 597             }
 598         }
 599     } else {
 600         if (biased_exp >= 32) {
 601             *flags |= FPLIB_IOC;
 602             return fp16_pack(sgn, 31, -1);
 603         }
 604     }
 605
 606     if (error) {
 607         *flags |= FPLIB_IXC;
 608     }
 609
 610     return fp16_pack(sgn, biased_exp, int_mant);
 611 }
 612
 613 static uint32_t
 614 fp32_round_(int sgn, int exp, uint32_t mnt, int rm, int mode, int *flags)
 615 {
 616     int biased_exp; // non-negative exponent value for result
 617     uint32_t int_mant; // mantissa for result, less than (1 << 24)
 618     int error; // 0, 1, 2 or 3, where 2 means int_mant is wrong by exactly 0.5
 619
 620     assert(rm != FPRounding_TIEAWAY);
 621
 622     // Flush to zero:
 623     if ((mode & FPLIB_FZ) && exp < 1) {
 624         *flags |= FPLIB_UFC;
 625         return fp32_zero(sgn);
 626     }
 627
 628     // The bottom 8 bits of mnt are orred together:
 629     mnt = (uint32_t)1 << 25 | mnt >> 7 | ((mnt & 255) != 0);
 630
 631     if (exp > 0) {
 632         biased_exp = exp;
 633         int_mant = mnt >> 2;
 634         error = mnt & 3;
 635     } else {
 636         biased_exp = 0;
 637         int_mant = lsr32(mnt, 3 - exp);
 638         error = (lsr32(mnt, 1 - exp) & 3) | !!(mnt & (lsl32(1, 1 - exp) - 1));
 639     }
 640
 641     if (!biased_exp && error) { // xx should also check fpscr_val<11>
 642         *flags |= FPLIB_UFC;
 643     }
 644
 645     // Round up:
 646     if ((rm == FPLIB_RN && (error == 3 ||
 647                             (error == 2 && (int_mant & 1)))) ||
 648         (((rm == FPLIB_RP && !sgn) || (rm == FPLIB_RM && sgn)) && error)) {
 649         ++int_mant;
 650         if (int_mant == (uint32_t)1 << 23) {
 651             // Rounded up from denormalized to normalized
 652             biased_exp = 1;
 653         }
 654         if (int_mant == (uint32_t)1 << 24) {
 655             // Rounded up to next exponent
 656             ++biased_exp;
 657             int_mant >>= 1;
 658         }
 659     }
 660
 661     // Handle rounding to odd aka Von Neumann rounding:
 662     if (error && rm == FPRounding_ODD)
 663         int_mant |= 1;
 664
 665     // Handle overflow:
 666     if (biased_exp >= 255) {
 667         *flags |= FPLIB_OFC | FPLIB_IXC;
 668         if (rm == FPLIB_RN || (rm == FPLIB_RP && !sgn) ||
 669             (rm == FPLIB_RM && sgn)) {
 670             return fp32_infinity(sgn);
 671         } else {
 672             return fp32_max_normal(sgn);
 673         }
 674     }
 675
 676     if (error) {
 677         *flags |= FPLIB_IXC;
 678     }
 679
 680     return fp32_pack(sgn, biased_exp, int_mant);
 681 }
 682
 683 static uint32_t
 684 fp32_round(int sgn, int exp, uint32_t mnt, int mode, int *flags)
 685 {
 686     return fp32_round_(sgn, exp, mnt, mode & 3, mode, flags);
 687 }
 688
 689 static uint64_t
 690 fp64_round_(int sgn, int exp, uint64_t mnt, int rm, int mode, int *flags)
 691 {
 692     int biased_exp; // non-negative exponent value for result
 693     uint64_t int_mant; // mantissa for result, less than (1 << 52)
 694     int error; // 0, 1, 2 or 3, where 2 means int_mant is wrong by exactly 0.5
 695
 696     assert(rm != FPRounding_TIEAWAY);
 697
 698     // Flush to zero:
 699     if ((mode & FPLIB_FZ) && exp < 1) {
 700         *flags |= FPLIB_UFC;
 701         return fp64_zero(sgn);
 702     }
 703
 704     // The bottom 11 bits of mnt are orred together:
 705     mnt = (uint64_t)1 << 54 | mnt >> 10 | ((mnt & 0x3ff) != 0);
 706
 707     if (exp > 0) {
 708         biased_exp = exp;
 709         int_mant = mnt >> 2;
 710         error = mnt & 3;
 711     } else {
 712         biased_exp = 0;
 713         int_mant = lsr64(mnt, 3 - exp);
 714         error = (lsr64(mnt, 1 - exp) & 3) | !!(mnt & (lsl64(1, 1 - exp) - 1));
 715     }
 716
 717     if (!biased_exp && error) { // xx should also check fpscr_val<11>
 718         *flags |= FPLIB_UFC;
 719     }
 720
 721     // Round up:
 722     if ((rm == FPLIB_RN && (error == 3 ||
 723                             (error == 2 && (int_mant & 1)))) ||
 724         (((rm == FPLIB_RP && !sgn) || (rm == FPLIB_RM && sgn)) && error)) {
 725         ++int_mant;
 726         if (int_mant == (uint64_t)1 << 52) {
 727             // Rounded up from denormalized to normalized
 728             biased_exp = 1;
 729         }
 730         if (int_mant == (uint64_t)1 << 53) {
 731             // Rounded up to next exponent
 732             ++biased_exp;
 733             int_mant >>= 1;
 734         }
 735     }
 736
 737     // Handle rounding to odd aka Von Neumann rounding:
 738     if (error && rm == FPRounding_ODD)
 739         int_mant |= 1;
 740
 741     // Handle overflow:
 742     if (biased_exp >= 2047) {
 743         *flags |= FPLIB_OFC | FPLIB_IXC;
 744         if (rm == FPLIB_RN || (rm == FPLIB_RP && !sgn) ||
 745             (rm == FPLIB_RM && sgn)) {
 746             return fp64_infinity(sgn);
 747         } else {
 748             return fp64_max_normal(sgn);
 749         }
 750     }
 751
 752     if (error) {
 753         *flags |= FPLIB_IXC;
 754     }
 755
 756     return fp64_pack(sgn, biased_exp, int_mant);
 757 }
 758
 759 static uint64_t
 760 fp64_round(int sgn, int exp, uint64_t mnt, int mode, int *flags)
 761 {
 762     return fp64_round_(sgn, exp, mnt, mode & 3, mode, flags);
 763 }
 764
 765 static int
 766 fp32_compare_eq(uint32_t a, uint32_t b, int mode, int *flags)
 767 {
 768     int a_sgn, a_exp, b_sgn, b_exp;
 769     uint32_t a_mnt, b_mnt;
 770
 771     fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
 772     fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
 773
 774     if ((a_exp == 255 && (uint32_t)(a_mnt << 9)) ||
 775         (b_exp == 255 && (uint32_t)(b_mnt << 9))) {
 776         if ((a_exp == 255 && (uint32_t)(a_mnt << 9) && !(a >> 22 & 1)) ||
 777             (b_exp == 255 && (uint32_t)(b_mnt << 9) && !(b >> 22 & 1)))
 778             *flags |= FPLIB_IOC;
 779         return 0;
 780     }
 781     return a == b || (!a_mnt && !b_mnt);
 782 }
 783
 784 static int
 785 fp32_compare_ge(uint32_t a, uint32_t b, int mode, int *flags)
 786 {
 787     int a_sgn, a_exp, b_sgn, b_exp;
 788     uint32_t a_mnt, b_mnt;
 789
 790     fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
 791     fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
 792
 793     if ((a_exp == 255 && (uint32_t)(a_mnt << 9)) ||
 794         (b_exp == 255 && (uint32_t)(b_mnt << 9))) {
 795         *flags |= FPLIB_IOC;
 796         return 0;
 797     }
 798     if (!a_mnt && !b_mnt)
 799         return 1;
 800     if (a_sgn != b_sgn)
 801         return b_sgn;
 802     if (a_exp != b_exp)
 803         return a_sgn ^ (a_exp > b_exp);
 804     if (a_mnt != b_mnt)
 805         return a_sgn ^ (a_mnt > b_mnt);
 806     return 1;
 807 }
 808
 809 static int
 810 fp32_compare_gt(uint32_t a, uint32_t b, int mode, int *flags)
 811 {
 812     int a_sgn, a_exp, b_sgn, b_exp;
 813     uint32_t a_mnt, b_mnt;
 814
 815     fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
 816     fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
 817
 818     if ((a_exp == 255 && (uint32_t)(a_mnt << 9)) ||
 819         (b_exp == 255 && (uint32_t)(b_mnt << 9))) {
 820         *flags |= FPLIB_IOC;
 821         return 0;
 822     }
 823     if (!a_mnt && !b_mnt)
 824         return 0;
 825     if (a_sgn != b_sgn)
 826         return b_sgn;
 827     if (a_exp != b_exp)
 828         return a_sgn ^ (a_exp > b_exp);
 829     if (a_mnt != b_mnt)
 830         return a_sgn ^ (a_mnt > b_mnt);
 831     return 0;
 832 }
 833
 834 static int
 835 fp64_compare_eq(uint64_t a, uint64_t b, int mode, int *flags)
 836 {
 837     int a_sgn, a_exp, b_sgn, b_exp;
 838     uint64_t a_mnt, b_mnt;
 839
 840     fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
 841     fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
 842
 843     if ((a_exp == 2047 && (uint64_t)(a_mnt << 12)) ||
 844         (b_exp == 2047 && (uint64_t)(b_mnt << 12))) {
 845         if ((a_exp == 2047 && (uint64_t)(a_mnt << 12) && !(a >> 51 & 1)) ||
 846             (b_exp == 2047 && (uint64_t)(b_mnt << 12) && !(b >> 51 & 1)))
 847             *flags |= FPLIB_IOC;
 848         return 0;
 849     }
 850     return a == b || (!a_mnt && !b_mnt);
 851 }
 852
 853 static int
 854 fp64_compare_ge(uint64_t a, uint64_t b, int mode, int *flags)
 855 {
 856     int a_sgn, a_exp, b_sgn, b_exp;
 857     uint64_t a_mnt, b_mnt;
 858
 859     fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
 860     fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
 861
 862     if ((a_exp == 2047 && (uint64_t)(a_mnt << 12)) ||
 863         (b_exp == 2047 && (uint64_t)(b_mnt << 12))) {
 864         *flags |= FPLIB_IOC;
 865         return 0;
 866     }
 867     if (!a_mnt && !b_mnt)
 868         return 1;
 869     if (a_sgn != b_sgn)
 870         return b_sgn;
 871     if (a_exp != b_exp)
 872         return a_sgn ^ (a_exp > b_exp);
 873     if (a_mnt != b_mnt)
 874         return a_sgn ^ (a_mnt > b_mnt);
 875     return 1;
 876 }
 877
 878 static int
 879 fp64_compare_gt(uint64_t a, uint64_t b, int mode, int *flags)
 880 {
 881     int a_sgn, a_exp, b_sgn, b_exp;
 882     uint64_t a_mnt, b_mnt;
 883
 884     fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
 885     fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
 886
 887     if ((a_exp == 2047 && (uint64_t)(a_mnt << 12)) ||
 888         (b_exp == 2047 && (uint64_t)(b_mnt << 12))) {
 889         *flags |= FPLIB_IOC;
 890         return 0;
 891     }
 892     if (!a_mnt && !b_mnt)
 893         return 0;
 894     if (a_sgn != b_sgn)
 895         return b_sgn;
 896     if (a_exp != b_exp)
 897         return a_sgn ^ (a_exp > b_exp);
 898     if (a_mnt != b_mnt)
 899         return a_sgn ^ (a_mnt > b_mnt);
 900     return 0;
 901 }
 902
 903 static uint32_t
 904 fp32_add(uint32_t a, uint32_t b, int neg, int mode, int *flags)
 905 {
 906     int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp;
 907     uint32_t a_mnt, b_mnt, x, x_mnt;
 908
 909     fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
 910     fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
 911
 912     if ((x = fp32_process_NaNs(a, b, mode, flags))) {
 913         return x;
 914     }
 915
 916     b_sgn ^= neg;
 917
 918     // Handle infinities and zeroes:
 919     if (a_exp == 255 && b_exp == 255 && a_sgn != b_sgn) {
 920         *flags |= FPLIB_IOC;
 921         return fp32_defaultNaN();
 922     } else if (a_exp == 255) {
 923         return fp32_infinity(a_sgn);
 924     } else if (b_exp == 255) {
 925         return fp32_infinity(b_sgn);
 926     } else if (!a_mnt && !b_mnt && a_sgn == b_sgn) {
 927         return fp32_zero(a_sgn);
 928     }
 929
 930     a_mnt <<= 3;
 931     b_mnt <<= 3;
 932     if (a_exp >= b_exp) {
 933         b_mnt = (lsr32(b_mnt, a_exp - b_exp) |
 934                  !!(b_mnt & (lsl32(1, a_exp - b_exp) - 1)));
 935         b_exp = a_exp;
 936     } else {
 937         a_mnt = (lsr32(a_mnt, b_exp - a_exp) |
 938                  !!(a_mnt & (lsl32(1, b_exp - a_exp) - 1)));
 939         a_exp = b_exp;
 940     }
 941     x_sgn = a_sgn;
 942     x_exp = a_exp;
 943     if (a_sgn == b_sgn) {
 944         x_mnt = a_mnt + b_mnt;
 945     } else if (a_mnt >= b_mnt) {
 946         x_mnt = a_mnt - b_mnt;
 947     } else {
 948         x_sgn ^= 1;
 949         x_mnt = b_mnt - a_mnt;
 950     }
 951
 952     if (!x_mnt) {
 953         // Sign of exact zero result depends on rounding mode
 954         return fp32_zero((mode & 3) == 2);
 955     }
 956
 957     x_mnt = fp32_normalise(x_mnt, &x_exp);
 958
 959     return fp32_round(x_sgn, x_exp + 5, x_mnt << 1, mode, flags);
 960 }
 961
 962 static uint64_t
 963 fp64_add(uint64_t a, uint64_t b, int neg, int mode, int *flags)
 964 {
 965     int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp;
 966     uint64_t a_mnt, b_mnt, x, x_mnt;
 967
 968     fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
 969     fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
 970
 971     if ((x = fp64_process_NaNs(a, b, mode, flags))) {
 972         return x;
 973     }
 974
 975     b_sgn ^= neg;
 976
 977     // Handle infinities and zeroes:
 978     if (a_exp == 2047 && b_exp == 2047 && a_sgn != b_sgn) {
 979         *flags |= FPLIB_IOC;
 980         return fp64_defaultNaN();
 981     } else if (a_exp == 2047) {
 982         return fp64_infinity(a_sgn);
 983     } else if (b_exp == 2047) {
 984         return fp64_infinity(b_sgn);
 985     } else if (!a_mnt && !b_mnt && a_sgn == b_sgn) {
 986         return fp64_zero(a_sgn);
 987     }
 988
 989     a_mnt <<= 3;
 990     b_mnt <<= 3;
 991     if (a_exp >= b_exp) {
 992         b_mnt = (lsr64(b_mnt, a_exp - b_exp) |
 993                  !!(b_mnt & (lsl64(1, a_exp - b_exp) - 1)));
 994         b_exp = a_exp;
 995     } else {
 996         a_mnt = (lsr64(a_mnt, b_exp - a_exp) |
 997                  !!(a_mnt & (lsl64(1, b_exp - a_exp) - 1)));
 998         a_exp = b_exp;
 999     }
1000     x_sgn = a_sgn;
1001     x_exp = a_exp;
1002     if (a_sgn == b_sgn) {
1003         x_mnt = a_mnt + b_mnt;
1004     } else if (a_mnt >= b_mnt) {
1005         x_mnt = a_mnt - b_mnt;
1006     } else {
1007         x_sgn ^= 1;
1008         x_mnt = b_mnt - a_mnt;
1009     }
1010
1011     if (!x_mnt) {
1012         // Sign of exact zero result depends on rounding mode
1013         return fp64_zero((mode & 3) == 2);
1014     }
1015
1016     x_mnt = fp64_normalise(x_mnt, &x_exp);
1017
1018     return fp64_round(x_sgn, x_exp + 8, x_mnt << 1, mode, flags);
1019 }
1020
1021 static uint32_t
1022 fp32_mul(uint32_t a, uint32_t b, int mode, int *flags)
1023 {
1024     int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp;
1025     uint32_t a_mnt, b_mnt, x;
1026     uint64_t x_mnt;
1027
1028     fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1029     fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1030
1031     if ((x = fp32_process_NaNs(a, b, mode, flags))) {
1032         return x;
1033     }
1034
1035     // Handle infinities and zeroes:
1036     if ((a_exp == 255 && !b_mnt) || (b_exp == 255 && !a_mnt)) {
1037         *flags |= FPLIB_IOC;
1038         return fp32_defaultNaN();
1039     } else if (a_exp == 255 || b_exp == 255) {
1040         return fp32_infinity(a_sgn ^ b_sgn);
1041     } else if (!a_mnt || !b_mnt) {
1042         return fp32_zero(a_sgn ^ b_sgn);
1043     }
1044
1045     // Multiply and normalise:
1046     x_sgn = a_sgn ^ b_sgn;
1047     x_exp = a_exp + b_exp - 110;
1048     x_mnt = (uint64_t)a_mnt * b_mnt;
1049     x_mnt = fp64_normalise(x_mnt, &x_exp);
1050
1051     // Convert to 32 bits, collapsing error into bottom bit:
1052     x_mnt = lsr64(x_mnt, 31) | !!lsl64(x_mnt, 33);
1053
1054     return fp32_round(x_sgn, x_exp, x_mnt, mode, flags);
1055 }
1056
1057 static uint64_t
1058 fp64_mul(uint64_t a, uint64_t b, int mode, int *flags)
1059 {
1060     int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp;
1061     uint64_t a_mnt, b_mnt, x;
1062     uint64_t x0_mnt, x1_mnt;
1063
1064     fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1065     fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1066
1067     if ((x = fp64_process_NaNs(a, b, mode, flags))) {
1068         return x;
1069     }
1070
1071     // Handle infinities and zeroes:
1072     if ((a_exp == 2047 && !b_mnt) || (b_exp == 2047 && !a_mnt)) {
1073         *flags |= FPLIB_IOC;
1074         return fp64_defaultNaN();
1075     } else if (a_exp == 2047 || b_exp == 2047) {
1076         return fp64_infinity(a_sgn ^ b_sgn);
1077     } else if (!a_mnt || !b_mnt) {
1078         return fp64_zero(a_sgn ^ b_sgn);
1079     }
1080
1081     // Multiply and normalise:
1082     x_sgn = a_sgn ^ b_sgn;
1083     x_exp = a_exp + b_exp - 1000;
1084     mul62x62(&x0_mnt, &x1_mnt, a_mnt, b_mnt);
1085     fp128_normalise(&x0_mnt, &x1_mnt, &x_exp);
1086
1087     // Convert to 64 bits, collapsing error into bottom bit:
1088     x0_mnt = x1_mnt << 1 | !!x0_mnt;
1089
1090     return fp64_round(x_sgn, x_exp, x0_mnt, mode, flags);
1091 }
1092
1093 static uint32_t
1094 fp32_muladd(uint32_t a, uint32_t b, uint32_t c, int scale,
1095             int mode, int *flags)
1096 {
1097     int a_sgn, a_exp, b_sgn, b_exp, c_sgn, c_exp, x_sgn, x_exp, y_sgn, y_exp;
1098     uint32_t a_mnt, b_mnt, c_mnt, x;
1099     uint64_t x_mnt, y_mnt;
1100
1101     fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1102     fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1103     fp32_unpack(&c_sgn, &c_exp, &c_mnt, c, mode, flags);
1104
1105     x = fp32_process_NaNs3(a, b, c, mode, flags);
1106
1107     // Quiet NaN added to product of zero and infinity:
1108     if (a_exp == 255 && (a_mnt >> 22 & 1) &&
1109         ((!b_mnt && c_exp == 255 && !(uint32_t)(c_mnt << 9)) ||
1110          (!c_mnt && b_exp == 255 && !(uint32_t)(b_mnt << 9)))) {
1111         x = fp32_defaultNaN();
1112         *flags |= FPLIB_IOC;
1113     }
1114
1115     if (x) {
1116         return x;
1117     }
1118
1119     // Handle infinities and zeroes:
1120     if ((b_exp == 255 && !c_mnt) ||
1121         (c_exp == 255 && !b_mnt) ||
1122         (a_exp == 255 && (b_exp == 255 || c_exp == 255) &&
1123          (a_sgn != (b_sgn ^ c_sgn)))) {
1124         *flags |= FPLIB_IOC;
1125         return fp32_defaultNaN();
1126     }
1127     if (a_exp == 255)
1128         return fp32_infinity(a_sgn);
1129     if (b_exp == 255 || c_exp == 255)
1130         return fp32_infinity(b_sgn ^ c_sgn);
1131     if (!a_mnt && (!b_mnt || !c_mnt) && a_sgn == (b_sgn ^ c_sgn))
1132         return fp32_zero(a_sgn);
1133
1134     x_sgn = a_sgn;
1135     x_exp = a_exp + 13;
1136     x_mnt = (uint64_t)a_mnt << 27;
1137
1138     // Multiply:
1139     y_sgn = b_sgn ^ c_sgn;
1140     y_exp = b_exp + c_exp - 113;
1141     y_mnt = (uint64_t)b_mnt * c_mnt << 3;
1142     if (!y_mnt) {
1143         y_exp = x_exp;
1144     }
1145
1146     // Add:
1147     if (x_exp >= y_exp) {
1148         y_mnt = (lsr64(y_mnt, x_exp - y_exp) |
1149                  !!(y_mnt & (lsl64(1, x_exp - y_exp) - 1)));
1150         y_exp = x_exp;
1151     } else {
1152         x_mnt = (lsr64(x_mnt, y_exp - x_exp) |
1153                  !!(x_mnt & (lsl64(1, y_exp - x_exp) - 1)));
1154         x_exp = y_exp;
1155     }
1156     if (x_sgn == y_sgn) {
1157         x_mnt = x_mnt + y_mnt;
1158     } else if (x_mnt >= y_mnt) {
1159         x_mnt = x_mnt - y_mnt;
1160     } else {
1161         x_sgn ^= 1;
1162         x_mnt = y_mnt - x_mnt;
1163     }
1164
1165     if (!x_mnt) {
1166         // Sign of exact zero result depends on rounding mode
1167         return fp32_zero((mode & 3) == 2);
1168     }
1169
1170     // Normalise and convert to 32 bits, collapsing error into bottom bit:
1171     x_mnt = fp64_normalise(x_mnt, &x_exp);
1172     x_mnt = x_mnt >> 31 | !!(uint32_t)(x_mnt << 1);
1173
1174     return fp32_round(x_sgn, x_exp + scale, x_mnt, mode, flags);
1175 }
1176
1177 static uint64_t
1178 fp64_muladd(uint64_t a, uint64_t b, uint64_t c, int scale,
1179             int mode, int *flags)
1180 {
1181     int a_sgn, a_exp, b_sgn, b_exp, c_sgn, c_exp, x_sgn, x_exp, y_sgn, y_exp;
1182     uint64_t a_mnt, b_mnt, c_mnt, x;
1183     uint64_t x0_mnt, x1_mnt, y0_mnt, y1_mnt;
1184
1185     fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1186     fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1187     fp64_unpack(&c_sgn, &c_exp, &c_mnt, c, mode, flags);
1188
1189     x = fp64_process_NaNs3(a, b, c, mode, flags);
1190
1191     // Quiet NaN added to product of zero and infinity:
1192     if (a_exp == 2047 && (a_mnt >> 51 & 1) &&
1193         ((!b_mnt && c_exp == 2047 && !(uint64_t)(c_mnt << 12)) ||
1194          (!c_mnt && b_exp == 2047 && !(uint64_t)(b_mnt << 12)))) {
1195         x = fp64_defaultNaN();
1196         *flags |= FPLIB_IOC;
1197     }
1198
1199     if (x) {
1200         return x;
1201     }
1202
1203     // Handle infinities and zeroes:
1204     if ((b_exp == 2047 && !c_mnt) ||
1205         (c_exp == 2047 && !b_mnt) ||
1206         (a_exp == 2047 && (b_exp == 2047 || c_exp == 2047) &&
1207          (a_sgn != (b_sgn ^ c_sgn)))) {
1208         *flags |= FPLIB_IOC;
1209         return fp64_defaultNaN();
1210     }
1211     if (a_exp == 2047)
1212         return fp64_infinity(a_sgn);
1213     if (b_exp == 2047 || c_exp == 2047)
1214         return fp64_infinity(b_sgn ^ c_sgn);
1215     if (!a_mnt && (!b_mnt || !c_mnt) && a_sgn == (b_sgn ^ c_sgn))
1216         return fp64_zero(a_sgn);
1217
1218     x_sgn = a_sgn;
1219     x_exp = a_exp + 11;
1220     x0_mnt = 0;
1221     x1_mnt = a_mnt;
1222
1223     // Multiply:
1224     y_sgn = b_sgn ^ c_sgn;
1225     y_exp = b_exp + c_exp - 1003;
1226     mul62x62(&y0_mnt, &y1_mnt, b_mnt, c_mnt << 3);
1227     if (!y0_mnt && !y1_mnt) {
1228         y_exp = x_exp;
1229     }
1230
1231     // Add:
1232     if (x_exp >= y_exp) {
1233         uint64_t t0, t1;
1234         lsl128(&t0, &t1, y0_mnt, y1_mnt,
1235                x_exp - y_exp < 128 ? 128 - (x_exp - y_exp) : 0);
1236         lsr128(&y0_mnt, &y1_mnt, y0_mnt, y1_mnt, x_exp - y_exp);
1237         y0_mnt |= !!(t0 | t1);
1238         y_exp = x_exp;
1239     } else {
1240         uint64_t t0, t1;
1241         lsl128(&t0, &t1, x0_mnt, x1_mnt,
1242                y_exp - x_exp < 128 ? 128 - (y_exp - x_exp) : 0);
1243         lsr128(&x0_mnt, &x1_mnt, x0_mnt, x1_mnt, y_exp - x_exp);
1244         x0_mnt |= !!(t0 | t1);
1245         x_exp = y_exp;
1246     }
1247     if (x_sgn == y_sgn) {
1248         add128(&x0_mnt, &x1_mnt, x0_mnt, x1_mnt, y0_mnt, y1_mnt);
1249     } else if (cmp128(x0_mnt, x1_mnt, y0_mnt, y1_mnt) >= 0) {
1250         sub128(&x0_mnt, &x1_mnt, x0_mnt, x1_mnt, y0_mnt, y1_mnt);
1251     } else {
1252         x_sgn ^= 1;
1253         sub128(&x0_mnt, &x1_mnt, y0_mnt, y1_mnt, x0_mnt, x1_mnt);
1254     }
1255
1256     if (!x0_mnt && !x1_mnt) {
1257         // Sign of exact zero result depends on rounding mode
1258         return fp64_zero((mode & 3) == 2);
1259     }
1260
1261     // Normalise and convert to 64 bits, collapsing error into bottom bit:
1262     fp128_normalise(&x0_mnt, &x1_mnt, &x_exp);
1263     x0_mnt = x1_mnt << 1 | !!x0_mnt;
1264
1265     return fp64_round(x_sgn, x_exp + scale, x0_mnt, mode, flags);
1266 }
1267
1268 static uint32_t
1269 fp32_div(uint32_t a, uint32_t b, int mode, int *flags)
1270 {
1271     int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp;
1272     uint32_t a_mnt, b_mnt, x;
1273     uint64_t x_mnt;
1274
1275     fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1276     fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1277
1278     if ((x = fp32_process_NaNs(a, b, mode, flags)))
1279         return x;
1280
1281     // Handle infinities and zeroes:
1282     if ((a_exp == 255 && b_exp == 255) || (!a_mnt && !b_mnt)) {
1283         *flags |= FPLIB_IOC;
1284         return fp32_defaultNaN();
1285     }
1286     if (a_exp == 255 || !b_mnt) {
1287         if (a_exp != 255)
1288             *flags |= FPLIB_DZC;
1289         return fp32_infinity(a_sgn ^ b_sgn);
1290     }
1291     if (!a_mnt || b_exp == 255)
1292         return fp32_zero(a_sgn ^ b_sgn);
1293
1294     // Divide, setting bottom bit if inexact:
1295     a_mnt = fp32_normalise(a_mnt, &a_exp);
1296     x_sgn = a_sgn ^ b_sgn;
1297     x_exp = a_exp - b_exp + 172;
1298     x_mnt = ((uint64_t)a_mnt << 18) / b_mnt;
1299     x_mnt |= (x_mnt * b_mnt != (uint64_t)a_mnt << 18);
1300
1301     // Normalise and convert to 32 bits, collapsing error into bottom bit:
1302     x_mnt = fp64_normalise(x_mnt, &x_exp);
1303     x_mnt = x_mnt >> 31 | !!(uint32_t)(x_mnt << 1);
1304
1305     return fp32_round(x_sgn, x_exp, x_mnt, mode, flags);
1306 }
1307
1308 static uint64_t
1309 fp64_div(uint64_t a, uint64_t b, int mode, int *flags)
1310 {
1311     int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp, c;
1312     uint64_t a_mnt, b_mnt, x, x_mnt, x0_mnt, x1_mnt;
1313
1314     fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1315     fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1316
1317     if ((x = fp64_process_NaNs(a, b, mode, flags)))
1318         return x;
1319
1320     // Handle infinities and zeroes:
1321     if ((a_exp == 2047 && b_exp == 2047) || (!a_mnt && !b_mnt)) {
1322         *flags |= FPLIB_IOC;
1323         return fp64_defaultNaN();
1324     }
1325     if (a_exp == 2047 || !b_mnt) {
1326         if (a_exp != 2047)
1327             *flags |= FPLIB_DZC;
1328         return fp64_infinity(a_sgn ^ b_sgn);
1329     }
1330     if (!a_mnt || b_exp == 2047)
1331         return fp64_zero(a_sgn ^ b_sgn);
1332
1333     // Find reciprocal of divisor with Newton-Raphson:
1334     a_mnt = fp64_normalise(a_mnt, &a_exp);
1335     b_mnt = fp64_normalise(b_mnt, &b_exp);
1336     x_mnt = ~(uint64_t)0 / (b_mnt >> 31);
1337     mul64x32(&x0_mnt, &x1_mnt, b_mnt, x_mnt);
1338     sub128(&x0_mnt, &x1_mnt, 0, (uint64_t)1 << 32, x0_mnt, x1_mnt);
1339     lsr128(&x0_mnt, &x1_mnt, x0_mnt, x1_mnt, 32);
1340     mul64x32(&x0_mnt, &x1_mnt, x0_mnt, x_mnt);
1341     lsr128(&x0_mnt, &x1_mnt, x0_mnt, x1_mnt, 33);
1342
1343     // Multiply by dividend:
1344     x_sgn = a_sgn ^ b_sgn;
1345     x_exp = a_exp - b_exp + 1031;
1346     mul62x62(&x0_mnt, &x1_mnt, x0_mnt, a_mnt >> 2); // xx 62x62 is enough
1347     lsr128(&x0_mnt, &x1_mnt, x0_mnt, x1_mnt, 4);
1348     x_mnt = x1_mnt;
1349
1350     // This is an underestimate, so try adding one:
1351     mul62x62(&x0_mnt, &x1_mnt, b_mnt >> 2, x_mnt + 1); // xx 62x62 is enough
1352     c = cmp128(x0_mnt, x1_mnt, 0, a_mnt >> 11);
1353     if (c <= 0) {
1354         ++x_mnt;
1355     }
1356
1357     x_mnt = fp64_normalise(x_mnt, &x_exp);
1358
1359     return fp64_round(x_sgn, x_exp, x_mnt << 1 | !!c, mode, flags);
1360 }
1361
1362 static void
1363 set_fpscr0(FPSCR &fpscr, int flags)
1364 {
1365     if (flags & FPLIB_IDC) {
1366         fpscr.idc = 1;
1367     }
1368     if (flags & FPLIB_IOC) {
1369         fpscr.ioc = 1;
1370     }
1371     if (flags & FPLIB_DZC) {
1372         fpscr.dzc = 1;
1373     }
1374     if (flags & FPLIB_OFC) {
1375         fpscr.ofc = 1;
1376     }
1377     if (flags & FPLIB_UFC) {
1378         fpscr.ufc = 1;
1379     }
1380     if (flags & FPLIB_IXC) {
1381         fpscr.ixc = 1;
1382     }
1383 }
1384
1385 static uint32_t
1386 fp32_sqrt(uint32_t a, int mode, int *flags)
1387 {
1388     int a_sgn, a_exp, x_sgn, x_exp;
1389     uint32_t a_mnt, x, x_mnt;
1390     uint64_t t0, t1;
1391
1392     fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1393
1394     // Handle NaNs:
1395     if (a_exp == 255 && (uint32_t)(a_mnt << 9))
1396         return fp32_process_NaN(a, mode, flags);
1397
1398     // Handle infinities and zeroes:
1399     if (!a_mnt) {
1400         return fp32_zero(a_sgn);
1401     }
1402     if (a_exp == 255 && !a_sgn) {
1403         return fp32_infinity(a_sgn);
1404     }
1405     if (a_sgn) {
1406         *flags |= FPLIB_IOC;
1407         return fp32_defaultNaN();
1408     }
1409
1410     a_mnt = fp32_normalise(a_mnt, &a_exp);
1411     if (!(a_exp & 1)) {
1412         ++a_exp;
1413         a_mnt >>= 1;
1414     }
1415
1416     // x = (a * 3 + 5) / 8
1417     x = (a_mnt >> 2) + (a_mnt >> 3) + (5 << 28);
1418
1419     // x = (a / x + x) / 2; // 16-bit accuracy
1420     x = (a_mnt / (x >> 15) + (x >> 16)) << 15;
1421
1422     // x = (a / x + x) / 2; // 16-bit accuracy
1423     x = (a_mnt / (x >> 15) + (x >> 16)) << 15;
1424
1425     // x = (a / x + x) / 2; // 32-bit accuracy
1426     x = ((((uint64_t)a_mnt << 32) / x) >> 2) + (x >> 1);
1427
1428     x_sgn = 0;
1429     x_exp = (a_exp + 147) >> 1;
1430     x_mnt = ((x - (1 << 5)) >> 6) + 1;
1431     t1 = (uint64_t)x_mnt * x_mnt;
1432     t0 = (uint64_t)a_mnt << 19;
1433     if (t1 > t0) {
1434         --x_mnt;
1435     }
1436
1437     x_mnt = fp32_normalise(x_mnt, &x_exp);
1438
1439     return fp32_round(x_sgn, x_exp, x_mnt << 1 | (t1 != t0), mode, flags);
1440 }
1441
1442 static uint64_t
1443 fp64_sqrt(uint64_t a, int mode, int *flags)
1444 {
1445     int a_sgn, a_exp, x_sgn, x_exp, c;
1446     uint64_t a_mnt, x_mnt, r, x0, x1;
1447     uint32_t x;
1448
1449     fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1450
1451     // Handle NaNs:
1452     if (a_exp == 2047 && (uint64_t)(a_mnt << 12)) {
1453         return fp64_process_NaN(a, mode, flags);
1454     }
1455
1456     // Handle infinities and zeroes:
1457     if (!a_mnt)
1458         return fp64_zero(a_sgn);
1459     if (a_exp == 2047 && !a_sgn)
1460         return fp64_infinity(a_sgn);
1461     if (a_sgn) {
1462         *flags |= FPLIB_IOC;
1463         return fp64_defaultNaN();
1464     }
1465
1466     a_mnt = fp64_normalise(a_mnt, &a_exp);
1467     if (a_exp & 1) {
1468         ++a_exp;
1469         a_mnt >>= 1;
1470     }
1471
1472     // x = (a * 3 + 5) / 8
1473     x = (a_mnt >> 34) + (a_mnt >> 35) + (5 << 28);
1474
1475     // x = (a / x + x) / 2; // 16-bit accuracy
1476     x = ((a_mnt >> 32) / (x >> 15) + (x >> 16)) << 15;
1477
1478     // x = (a / x + x) / 2; // 16-bit accuracy
1479     x = ((a_mnt >> 32) / (x >> 15) + (x >> 16)) << 15;
1480
1481     // x = (a / x + x) / 2; // 32-bit accuracy
1482     x = ((a_mnt / x) >> 2) + (x >> 1);
1483
1484     // r = 1 / x; // 32-bit accuracy
1485     r = ((uint64_t)1 << 62) / x;
1486
1487     // r = r * (2 - x * r); // 64-bit accuracy
1488     mul64x32(&x0, &x1, -(uint64_t)x * r << 1, r);
1489     lsr128(&x0, &x1, x0, x1, 31);
1490
1491     // x = (x + a * r) / 2; // 64-bit accuracy
1492     mul62x62(&x0, &x1, a_mnt >> 10, x0 >> 2);
1493     lsl128(&x0, &x1, x0, x1, 5);
1494     lsr128(&x0, &x1, x0, x1, 56);
1495
1496     x0 = ((uint64_t)x << 31) + (x0 >> 1);
1497
1498     x_sgn = 0;
1499     x_exp = (a_exp + 1053) >> 1;
1500     x_mnt = x0;
1501     x_mnt = ((x_mnt - (1 << 8)) >> 9) + 1;
1502     mul62x62(&x0, &x1, x_mnt, x_mnt);
1503     lsl128(&x0, &x1, x0, x1, 19);
1504     c = cmp128(x0, x1, 0, a_mnt);
1505     if (c > 0)
1506         --x_mnt;
1507
1508     x_mnt = fp64_normalise(x_mnt, &x_exp);
1509
1510     return fp64_round(x_sgn, x_exp, x_mnt << 1 | !!c, mode, flags);
1511 }
1512
1513 static int
1514 modeConv(FPSCR fpscr)
1515 {
1516     return (((int) fpscr) >> 22) & 0xF;
1517 }
1518
1519 static void
1520 set_fpscr(FPSCR &fpscr, int flags)
1521 {
1522     // translate back to FPSCR
1523     bool underflow = false;
1524     if (flags & FPLIB_IDC) {
1525         fpscr.idc = 1;
1526     }
1527     if (flags & FPLIB_IOC) {
1528         fpscr.ioc = 1;
1529     }
1530     if (flags & FPLIB_DZC) {
1531         fpscr.dzc = 1;
1532     }
1533     if (flags & FPLIB_OFC) {
1534         fpscr.ofc = 1;
1535     }
1536     if (flags & FPLIB_UFC) {
1537         underflow = true; //xx Why is this required?
1538         fpscr.ufc = 1;
1539     }
1540     if ((flags & FPLIB_IXC) && !(underflow && fpscr.fz)) {
1541         fpscr.ixc = 1;
1542     }
1543 }
1544
1545 template <>
1546 bool
1547 fplibCompareEQ(uint32_t a, uint32_t b, FPSCR &fpscr)
1548 {
1549     int flags = 0;
1550     int x = fp32_compare_eq(a, b, modeConv(fpscr), &flags);
1551     set_fpscr(fpscr, flags);
1552     return x;
1553 }
1554
1555 template <>
1556 bool
1557 fplibCompareGE(uint32_t a, uint32_t b, FPSCR &fpscr)
1558 {
1559     int flags = 0;
1560     int x = fp32_compare_ge(a, b, modeConv(fpscr), &flags);
1561     set_fpscr(fpscr, flags);
1562     return x;
1563 }
1564
1565 template <>
1566 bool
1567 fplibCompareGT(uint32_t a, uint32_t b, FPSCR &fpscr)
1568 {
1569     int flags = 0;
1570     int x = fp32_compare_gt(a, b, modeConv(fpscr), &flags);
1571     set_fpscr(fpscr, flags);
1572     return x;
1573 }
1574
1575 template <>
1576 bool
1577 fplibCompareEQ(uint64_t a, uint64_t b, FPSCR &fpscr)
1578 {
1579     int flags = 0;
1580     int x = fp64_compare_eq(a, b, modeConv(fpscr), &flags);
1581     set_fpscr(fpscr, flags);
1582     return x;
1583 }
1584
1585 template <>
1586 bool
1587 fplibCompareGE(uint64_t a, uint64_t b, FPSCR &fpscr)
1588 {
1589     int flags = 0;
1590     int x = fp64_compare_ge(a, b, modeConv(fpscr), &flags);
1591     set_fpscr(fpscr, flags);
1592     return x;
1593 }
1594
1595 template <>
1596 bool
1597 fplibCompareGT(uint64_t a, uint64_t b, FPSCR &fpscr)
1598 {
1599     int flags = 0;
1600     int x = fp64_compare_gt(a, b, modeConv(fpscr), &flags);
1601     set_fpscr(fpscr, flags);
1602     return x;
1603 }
1604
1605 template <>
1606 uint32_t
1607 fplibAbs(uint32_t op)
1608 {
1609     return op & ~((uint32_t)1 << 31);
1610 }
1611
1612 template <>
1613 uint64_t
1614 fplibAbs(uint64_t op)
1615 {
1616     return op & ~((uint64_t)1 << 63);
1617 }
1618
1619 template <>
1620 uint32_t
1621 fplibAdd(uint32_t op1, uint32_t op2, FPSCR &fpscr)
1622 {
1623     int flags = 0;
1624     uint32_t result = fp32_add(op1, op2, 0, modeConv(fpscr), &flags);
1625     set_fpscr0(fpscr, flags);
1626     return result;
1627 }
1628
1629 template <>
1630 uint64_t
1631 fplibAdd(uint64_t op1, uint64_t op2, FPSCR &fpscr)
1632 {
1633     int flags = 0;
1634     uint64_t result = fp64_add(op1, op2, 0, modeConv(fpscr), &flags);
1635     set_fpscr0(fpscr, flags);
1636     return result;
1637 }
1638
1639 template <>
1640 int
1641 fplibCompare(uint32_t op1, uint32_t op2, bool signal_nans, FPSCR &fpscr)
1642 {
1643     int mode = modeConv(fpscr);
1644     int flags = 0;
1645     int sgn1, exp1, sgn2, exp2, result;
1646     uint32_t mnt1, mnt2;
1647
1648     fp32_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
1649     fp32_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
1650
1651     if ((exp1 == 255 && (uint32_t)(mnt1 << 9)) ||
1652         (exp2 == 255 && (uint32_t)(mnt2 << 9))) {
1653         result = 3;
1654         if ((exp1 == 255 && (uint32_t)(mnt1 << 9) && !(mnt1 >> 22 & 1)) ||
1655             (exp2 == 255 && (uint32_t)(mnt2 << 9) && !(mnt2 >> 22 & 1)) ||
1656             signal_nans)
1657             flags |= FPLIB_IOC;
1658     } else {
1659         if (op1 == op2 || (!mnt1 && !mnt2)) {
1660             result = 6;
1661         } else if (sgn1 != sgn2) {
1662             result = sgn1 ? 8 : 2;
1663         } else if (exp1 != exp2) {
1664             result = sgn1 ^ (exp1 < exp2) ? 8 : 2;
1665         } else {
1666             result = sgn1 ^ (mnt1 < mnt2) ? 8 : 2;
1667         }
1668     }
1669
1670     set_fpscr0(fpscr, flags);
1671
1672     return result;
1673 }
1674
1675 template <>
1676 int
1677 fplibCompare(uint64_t op1, uint64_t op2, bool signal_nans, FPSCR &fpscr)
1678 {
1679     int mode = modeConv(fpscr);
1680     int flags = 0;
1681     int sgn1, exp1, sgn2, exp2, result;
1682     uint64_t mnt1, mnt2;
1683
1684     fp64_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
1685     fp64_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
1686
1687     if ((exp1 == 2047 && (uint64_t)(mnt1 << 12)) ||
1688         (exp2 == 2047 && (uint64_t)(mnt2 << 12))) {
1689         result = 3;
1690         if ((exp1 == 2047 && (uint64_t)(mnt1 << 12) && !(mnt1 >> 51 & 1)) ||
1691             (exp2 == 2047 && (uint64_t)(mnt2 << 12) && !(mnt2 >> 51 & 1)) ||
1692             signal_nans)
1693             flags |= FPLIB_IOC;
1694     } else {
1695         if (op1 == op2 || (!mnt1 && !mnt2)) {
1696             result = 6;
1697         } else if (sgn1 != sgn2) {
1698             result = sgn1 ? 8 : 2;
1699         } else if (exp1 != exp2) {
1700             result = sgn1 ^ (exp1 < exp2) ? 8 : 2;
1701         } else {
1702             result = sgn1 ^ (mnt1 < mnt2) ? 8 : 2;
1703         }
1704     }
1705
1706     set_fpscr0(fpscr, flags);
1707
1708     return result;
1709 }
1710
1711 static uint16_t
1712 fp16_FPConvertNaN_32(uint32_t op)
1713 {
1714     return fp16_pack(op >> 31, 31, (uint16_t)1 << 9 | op >> 13);
1715 }
1716
1717 static uint16_t
1718 fp16_FPConvertNaN_64(uint64_t op)
1719 {
1720     return fp16_pack(op >> 63, 31, (uint16_t)1 << 9 | op >> 42);
1721 }
1722
1723 static uint32_t
1724 fp32_FPConvertNaN_16(uint16_t op)
1725 {
1726     return fp32_pack(op >> 15, 255, (uint32_t)1 << 22 | (uint32_t)op << 13);
1727 }
1728
1729 static uint32_t
1730 fp32_FPConvertNaN_64(uint64_t op)
1731 {
1732     return fp32_pack(op >> 63, 255, (uint32_t)1 << 22 | op >> 29);
1733 }
1734
1735 static uint64_t
1736 fp64_FPConvertNaN_16(uint16_t op)
1737 {
1738     return fp64_pack(op >> 15, 2047, (uint64_t)1 << 51 | (uint64_t)op << 42);
1739 }
1740
1741 static uint64_t
1742 fp64_FPConvertNaN_32(uint32_t op)
1743 {
1744     return fp64_pack(op >> 31, 2047, (uint64_t)1 << 51 | (uint64_t)op << 29);
1745 }
1746
1747 static uint32_t
1748 fp32_FPOnePointFive(int sgn)
1749 {
1750     return fp32_pack(sgn, 127, (uint64_t)1 << 22);
1751 }
1752
1753 static uint64_t
1754 fp64_FPOnePointFive(int sgn)
1755 {
1756     return fp64_pack(sgn, 1023, (uint64_t)1 << 51);
1757 }
1758
1759 static uint32_t
1760 fp32_FPThree(int sgn)
1761 {
1762     return fp32_pack(sgn, 128, (uint64_t)1 << 22);
1763 }
1764
1765 static uint64_t
1766 fp64_FPThree(int sgn)
1767 {
1768     return fp64_pack(sgn, 1024, (uint64_t)1 << 51);
1769 }
1770
1771 static uint32_t
1772 fp32_FPTwo(int sgn)
1773 {
1774     return fp32_pack(sgn, 128, 0);
1775 }
1776
1777 static uint64_t
1778 fp64_FPTwo(int sgn)
1779 {
1780     return fp64_pack(sgn, 1024, 0);
1781 }
1782
1783 template <>
1784 uint16_t
1785 fplibConvert(uint32_t op, FPRounding rounding, FPSCR &fpscr)
1786 {
1787     int mode = modeConv(fpscr);
1788     int flags = 0;
1789     int sgn, exp;
1790     uint32_t mnt;
1791     uint16_t result;
1792
1793     // Unpack floating-point operand optionally with flush-to-zero:
1794     fp32_unpack(&sgn, &exp, &mnt, op, mode, &flags);
1795
1796     bool alt_hp = fpscr.ahp;
1797
1798     if (exp == 255 && (uint32_t)(mnt << 9)) {
1799         if (alt_hp) {
1800             result = fp16_zero(sgn);
1801         } else if (fpscr.dn) {
1802             result = fp16_defaultNaN();
1803         } else {
1804             result = fp16_FPConvertNaN_32(op);
1805         }
1806         if (!(mnt >> 22 & 1) || alt_hp) {
1807             flags |= FPLIB_IOC;
1808         }
1809     } else if (exp == 255) {
1810         if (alt_hp) {
1811             result = sgn << 15 | (uint16_t)0x7fff;
1812             flags |= FPLIB_IOC;
1813         } else {
1814             result = fp16_infinity(sgn);
1815         }
1816     } else if (!mnt) {
1817         result = fp16_zero(sgn);
1818     } else {
1819         result = fp16_round_(sgn, exp - 127 + 15,
1820                              mnt >> 7 | !!(uint32_t)(mnt << 25),
1821                              rounding, mode | alt_hp << 4, &flags);
1822     }
1823
1824     set_fpscr0(fpscr, flags);
1825
1826     return result;
1827 }
1828
1829 template <>
1830 uint16_t
1831 fplibConvert(uint64_t op, FPRounding rounding, FPSCR &fpscr)
1832 {
1833     int mode = modeConv(fpscr);
1834     int flags = 0;
1835     int sgn, exp;
1836     uint64_t mnt;
1837     uint16_t result;
1838
1839     // Unpack floating-point operand optionally with flush-to-zero:
1840     fp64_unpack(&sgn, &exp, &mnt, op, mode, &flags);
1841
1842     bool alt_hp = fpscr.ahp;
1843
1844     if (exp == 2047 && (uint64_t)(mnt << 12)) {
1845         if (alt_hp) {
1846             result = fp16_zero(sgn);
1847         } else if (fpscr.dn) {
1848             result = fp16_defaultNaN();
1849         } else {
1850             result = fp16_FPConvertNaN_64(op);
1851         }
1852         if (!(mnt >> 51 & 1) || alt_hp) {
1853             flags |= FPLIB_IOC;
1854         }
1855     } else if (exp == 2047) {
1856         if (alt_hp) {
1857             result = sgn << 15 | (uint16_t)0x7fff;
1858             flags |= FPLIB_IOC;
1859         } else {
1860             result = fp16_infinity(sgn);
1861         }
1862     } else if (!mnt) {
1863         result = fp16_zero(sgn);
1864     } else {
1865         result = fp16_round_(sgn, exp - 1023 + 15,
1866                              mnt >> 36 | !!(uint64_t)(mnt << 28),
1867                              rounding, mode | alt_hp << 4, &flags);
1868     }
1869
1870     set_fpscr0(fpscr, flags);
1871
1872     return result;
1873 }
1874
1875 template <>
1876 uint32_t
1877 fplibConvert(uint16_t op, FPRounding rounding, FPSCR &fpscr)
1878 {
1879     int mode = modeConv(fpscr);
1880     int flags = 0;
1881     int sgn, exp;
1882     uint16_t mnt;
1883     uint32_t result;
1884
1885     // Unpack floating-point operand optionally with flush-to-zero:
1886     fp16_unpack(&sgn, &exp, &mnt, op, mode, &flags);
1887
1888     if (exp == 31 && !fpscr.ahp && (uint16_t)(mnt << 6)) {
1889         if (fpscr.dn) {
1890             result = fp32_defaultNaN();
1891         } else {
1892             result = fp32_FPConvertNaN_16(op);
1893         }
1894         if (!(mnt >> 9 & 1)) {
1895             flags |= FPLIB_IOC;
1896         }
1897     } else if (exp == 31 && !fpscr.ahp) {
1898         result = fp32_infinity(sgn);
1899     } else if (!mnt) {
1900         result = fp32_zero(sgn);
1901     } else {
1902         mnt = fp16_normalise(mnt, &exp);
1903         result = fp32_pack(sgn, exp - 15 + 127 + 5, (uint32_t)mnt << 8);
1904     }
1905
1906     set_fpscr0(fpscr, flags);
1907
1908     return result;
1909 }
1910
1911 template <>
1912 uint32_t
1913 fplibConvert(uint64_t op, FPRounding rounding, FPSCR &fpscr)
1914 {
1915     int mode = modeConv(fpscr);
1916     int flags = 0;
1917     int sgn, exp;
1918     uint64_t mnt;
1919     uint32_t result;
1920
1921     // Unpack floating-point operand optionally with flush-to-zero:
1922     fp64_unpack(&sgn, &exp, &mnt, op, mode, &flags);
1923
1924     if (exp == 2047 && (uint64_t)(mnt << 12)) {
1925         if (fpscr.dn) {
1926             result = fp32_defaultNaN();
1927         } else {
1928             result = fp32_FPConvertNaN_64(op);
1929         }
1930         if (!(mnt >> 51 & 1)) {
1931             flags |= FPLIB_IOC;
1932         }
1933     } else if (exp == 2047) {
1934         result = fp32_infinity(sgn);
1935     } else if (!mnt) {
1936         result = fp32_zero(sgn);
1937     } else {
1938         result = fp32_round_(sgn, exp - 1023 + 127,
1939                              mnt >> 20 | !!(uint64_t)(mnt << 44),
1940                              rounding, mode, &flags);
1941     }
1942
1943     set_fpscr0(fpscr, flags);
1944
1945     return result;
1946 }
1947
1948 template <>
1949 uint64_t
1950 fplibConvert(uint16_t op, FPRounding rounding, FPSCR &fpscr)
1951 {
1952     int mode = modeConv(fpscr);
1953     int flags = 0;
1954     int sgn, exp;
1955     uint16_t mnt;
1956     uint64_t result;
1957
1958     // Unpack floating-point operand optionally with flush-to-zero:
1959     fp16_unpack(&sgn, &exp, &mnt, op, mode, &flags);
1960
1961     if (exp == 31 && !fpscr.ahp && (uint16_t)(mnt << 6)) {
1962         if (fpscr.dn) {
1963             result = fp64_defaultNaN();
1964         } else {
1965             result = fp64_FPConvertNaN_16(op);
1966         }
1967         if (!(mnt >> 9 & 1)) {
1968             flags |= FPLIB_IOC;
1969         }
1970     } else if (exp == 31 && !fpscr.ahp) {
1971         result = fp64_infinity(sgn);
1972     } else if (!mnt) {
1973         result = fp64_zero(sgn);
1974     } else {
1975         mnt = fp16_normalise(mnt, &exp);
1976         result = fp64_pack(sgn, exp - 15 + 1023 + 5, (uint64_t)mnt << 37);
1977     }
1978
1979     set_fpscr0(fpscr, flags);
1980
1981     return result;
1982 }
1983
1984 template <>
1985 uint64_t
1986 fplibConvert(uint32_t op, FPRounding rounding, FPSCR &fpscr)
1987 {
1988     int mode = modeConv(fpscr);
1989     int flags = 0;
1990     int sgn, exp;
1991     uint32_t mnt;
1992     uint64_t result;
1993
1994     // Unpack floating-point operand optionally with flush-to-zero:
1995     fp32_unpack(&sgn, &exp, &mnt, op, mode, &flags);
1996
1997     if (exp == 255 && (uint32_t)(mnt << 9)) {
1998         if (fpscr.dn) {
1999             result = fp64_defaultNaN();
2000         } else {
2001             result = fp64_FPConvertNaN_32(op);
2002         }
2003         if (!(mnt >> 22 & 1)) {
2004             flags |= FPLIB_IOC;
2005         }
2006     } else if (exp == 255) {
2007         result = fp64_infinity(sgn);
2008     } else if (!mnt) {
2009         result = fp64_zero(sgn);
2010     } else {
2011         mnt = fp32_normalise(mnt, &exp);
2012         result = fp64_pack(sgn, exp - 127 + 1023 + 8, (uint64_t)mnt << 21);
2013     }
2014
2015     set_fpscr0(fpscr, flags);
2016
2017     return result;
2018 }
2019
2020 template <>
2021 uint32_t
2022 fplibMulAdd(uint32_t addend, uint32_t op1, uint32_t op2, FPSCR &fpscr)
2023 {
2024     int flags = 0;
2025     uint32_t result = fp32_muladd(addend, op1, op2, 0, modeConv(fpscr), &flags);
2026     set_fpscr0(fpscr, flags);
2027     return result;
2028 }
2029
2030 template <>
2031 uint64_t
2032 fplibMulAdd(uint64_t addend, uint64_t op1, uint64_t op2, FPSCR &fpscr)
2033 {
2034     int flags = 0;
2035     uint64_t result = fp64_muladd(addend, op1, op2, 0, modeConv(fpscr), &flags);
2036     set_fpscr0(fpscr, flags);
2037     return result;
2038 }
2039
2040 template <>
2041 uint32_t
2042 fplibDiv(uint32_t op1, uint32_t op2, FPSCR &fpscr)
2043 {
2044     int flags = 0;
2045     uint32_t result = fp32_div(op1, op2, modeConv(fpscr), &flags);
2046     set_fpscr0(fpscr, flags);
2047     return result;
2048 }
2049
2050 template <>
2051 uint64_t
2052 fplibDiv(uint64_t op1, uint64_t op2, FPSCR &fpscr)
2053 {
2054     int flags = 0;
2055     uint64_t result = fp64_div(op1, op2, modeConv(fpscr), &flags);
2056     set_fpscr0(fpscr, flags);
2057     return result;
2058 }
2059
2060 static uint32_t
2061 fp32_repack(int sgn, int exp, uint32_t mnt)
2062 {
2063     return fp32_pack(sgn, mnt >> 23 ? exp : 0, mnt);
2064 }
2065
2066 static uint64_t
2067 fp64_repack(int sgn, int exp, uint64_t mnt)
2068 {
2069     return fp64_pack(sgn, mnt >> 52 ? exp : 0, mnt);
2070 }
2071
2072 static void
2073 fp32_minmaxnum(uint32_t *op1, uint32_t *op2, int sgn)
2074 {
2075     // Treat a single quiet-NaN as +Infinity/-Infinity
2076     if (!((uint32_t)~(*op1 << 1) >> 23) && (uint32_t)~(*op2 << 1) >> 23)
2077         *op1 = fp32_infinity(sgn);
2078     if (!((uint32_t)~(*op2 << 1) >> 23) && (uint32_t)~(*op1 << 1) >> 23)
2079         *op2 = fp32_infinity(sgn);
2080 }
2081
2082 static void
2083 fp64_minmaxnum(uint64_t *op1, uint64_t *op2, int sgn)
2084 {
2085     // Treat a single quiet-NaN as +Infinity/-Infinity
2086     if (!((uint64_t)~(*op1 << 1) >> 52) && (uint64_t)~(*op2 << 1) >> 52)
2087         *op1 = fp64_infinity(sgn);
2088     if (!((uint64_t)~(*op2 << 1) >> 52) && (uint64_t)~(*op1 << 1) >> 52)
2089         *op2 = fp64_infinity(sgn);
2090 }
2091
2092 template <>
2093 uint32_t
2094 fplibMax(uint32_t op1, uint32_t op2, FPSCR &fpscr)
2095 {
2096     int mode = modeConv(fpscr);
2097     int flags = 0;
2098     int sgn1, exp1, sgn2, exp2;
2099     uint32_t mnt1, mnt2, x, result;
2100
2101     fp32_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
2102     fp32_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
2103
2104     if ((x = fp32_process_NaNs(op1, op2, mode, &flags))) {
2105         result = x;
2106     } else {
2107         result = ((sgn1 != sgn2 ? sgn2 : sgn1 ^ (op1 > op2)) ?
2108                   fp32_repack(sgn1, exp1, mnt1) :
2109                   fp32_repack(sgn2, exp2, mnt2));
2110     }
2111     set_fpscr0(fpscr, flags);
2112     return result;
2113 }
2114
2115 template <>
2116 uint64_t
2117 fplibMax(uint64_t op1, uint64_t op2, FPSCR &fpscr)
2118 {
2119     int mode = modeConv(fpscr);
2120     int flags = 0;
2121     int sgn1, exp1, sgn2, exp2;
2122     uint64_t mnt1, mnt2, x, result;
2123
2124     fp64_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
2125     fp64_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
2126
2127     if ((x = fp64_process_NaNs(op1, op2, mode, &flags))) {
2128         result = x;
2129     } else {
2130         result = ((sgn1 != sgn2 ? sgn2 : sgn1 ^ (op1 > op2)) ?
2131                   fp64_repack(sgn1, exp1, mnt1) :
2132                   fp64_repack(sgn2, exp2, mnt2));
2133     }
2134     set_fpscr0(fpscr, flags);
2135     return result;
2136 }
2137
2138 template <>
2139 uint32_t
2140 fplibMaxNum(uint32_t op1, uint32_t op2, FPSCR &fpscr)
2141 {
2142     fp32_minmaxnum(&op1, &op2, 1);
2143     return fplibMax<uint32_t>(op1, op2, fpscr);
2144 }
2145
2146 template <>
2147 uint64_t
2148 fplibMaxNum(uint64_t op1, uint64_t op2, FPSCR &fpscr)
2149 {
2150     fp64_minmaxnum(&op1, &op2, 1);
2151     return fplibMax<uint64_t>(op1, op2, fpscr);
2152 }
2153
2154 template <>
2155 uint32_t
2156 fplibMin(uint32_t op1, uint32_t op2, FPSCR &fpscr)
2157 {
2158     int mode = modeConv(fpscr);
2159     int flags = 0;
2160     int sgn1, exp1, sgn2, exp2;
2161     uint32_t mnt1, mnt2, x, result;
2162
2163     fp32_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
2164     fp32_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
2165
2166     if ((x = fp32_process_NaNs(op1, op2, mode, &flags))) {
2167         result = x;
2168     } else {
2169         result = ((sgn1 != sgn2 ? sgn1 : sgn1 ^ (op1 < op2)) ?
2170                   fp32_repack(sgn1, exp1, mnt1) :
2171                   fp32_repack(sgn2, exp2, mnt2));
2172     }
2173     set_fpscr0(fpscr, flags);
2174     return result;
2175 }
2176
2177 template <>
2178 uint64_t
2179 fplibMin(uint64_t op1, uint64_t op2, FPSCR &fpscr)
2180 {
2181     int mode = modeConv(fpscr);
2182     int flags = 0;
2183     int sgn1, exp1, sgn2, exp2;
2184     uint64_t mnt1, mnt2, x, result;
2185
2186     fp64_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
2187     fp64_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
2188
2189     if ((x = fp64_process_NaNs(op1, op2, mode, &flags))) {
2190         result = x;
2191     } else {
2192         result = ((sgn1 != sgn2 ? sgn1 : sgn1 ^ (op1 < op2)) ?
2193                   fp64_repack(sgn1, exp1, mnt1) :
2194                   fp64_repack(sgn2, exp2, mnt2));
2195     }
2196     set_fpscr0(fpscr, flags);
2197     return result;
2198 }
2199
2200 template <>
2201 uint32_t
2202 fplibMinNum(uint32_t op1, uint32_t op2, FPSCR &fpscr)
2203 {
2204     fp32_minmaxnum(&op1, &op2, 0);
2205     return fplibMin<uint32_t>(op1, op2, fpscr);
2206 }
2207
2208 template <>
2209 uint64_t
2210 fplibMinNum(uint64_t op1, uint64_t op2, FPSCR &fpscr)
2211 {
2212     fp64_minmaxnum(&op1, &op2, 0);
2213     return fplibMin<uint64_t>(op1, op2, fpscr);
2214 }
2215
2216 template <>
2217 uint32_t
2218 fplibMul(uint32_t op1, uint32_t op2, FPSCR &fpscr)
2219 {
2220     int flags = 0;
2221     uint32_t result = fp32_mul(op1, op2, modeConv(fpscr), &flags);
2222     set_fpscr0(fpscr, flags);
2223     return result;
2224 }
2225
2226 template <>
2227 uint64_t
2228 fplibMul(uint64_t op1, uint64_t op2, FPSCR &fpscr)
2229 {
2230     int flags = 0;
2231     uint64_t result = fp64_mul(op1, op2, modeConv(fpscr), &flags);
2232     set_fpscr0(fpscr, flags);
2233     return result;
2234 }
2235
2236 template <>
2237 uint32_t
2238 fplibMulX(uint32_t op1, uint32_t op2, FPSCR &fpscr)
2239 {
2240     int mode = modeConv(fpscr);
2241     int flags = 0;
2242     int sgn1, exp1, sgn2, exp2;
2243     uint32_t mnt1, mnt2, result;
2244
2245     fp32_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
2246     fp32_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
2247
2248     result = fp32_process_NaNs(op1, op2, mode, &flags);
2249     if (!result) {
2250         if ((exp1 == 255 && !mnt2) || (exp2 == 255 && !mnt1)) {
2251             result = fp32_FPTwo(sgn1 ^ sgn2);
2252         } else if (exp1 == 255 || exp2 == 255) {
2253             result = fp32_infinity(sgn1 ^ sgn2);
2254         } else if (!mnt1 || !mnt2) {
2255             result = fp32_zero(sgn1 ^ sgn2);
2256         } else {
2257             result = fp32_mul(op1, op2, mode, &flags);
2258         }
2259     }
2260
2261     set_fpscr0(fpscr, flags);
2262
2263     return result;
2264 }
2265
2266 template <>
2267 uint64_t
2268 fplibMulX(uint64_t op1, uint64_t op2, FPSCR &fpscr)
2269 {
2270     int mode = modeConv(fpscr);
2271     int flags = 0;
2272     int sgn1, exp1, sgn2, exp2;
2273     uint64_t mnt1, mnt2, result;
2274
2275     fp64_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
2276     fp64_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
2277
2278     result = fp64_process_NaNs(op1, op2, mode, &flags);
2279     if (!result) {
2280         if ((exp1 == 2047 && !mnt2) || (exp2 == 2047 && !mnt1)) {
2281             result = fp64_FPTwo(sgn1 ^ sgn2);
2282         } else if (exp1 == 2047 || exp2 == 2047) {
2283             result = fp64_infinity(sgn1 ^ sgn2);
2284         } else if (!mnt1 || !mnt2) {
2285             result = fp64_zero(sgn1 ^ sgn2);
2286         } else {
2287             result = fp64_mul(op1, op2, mode, &flags);
2288         }
2289     }
2290
2291     set_fpscr0(fpscr, flags);
2292
2293     return result;
2294 }
2295
2296 template <>
2297 uint32_t
2298 fplibNeg(uint32_t op)
2299 {
2300     return op ^ (uint32_t)1 << 31;
2301 }
2302
2303 template <>
2304 uint64_t
2305 fplibNeg(uint64_t op)
2306 {
2307     return op ^ (uint64_t)1 << 63;
2308 }
2309
2310 static const uint8_t recip_sqrt_estimate[256] = {
2311     255, 253, 251, 249, 247, 245, 243, 242, 240, 238, 236, 234, 233, 231, 229, 228,
2312     226, 224, 223, 221, 219, 218, 216, 215, 213, 212, 210, 209, 207, 206, 204, 203,
2313     201, 200, 198, 197, 196, 194, 193, 192, 190, 189, 188, 186, 185, 184, 183, 181,
2314     180, 179, 178, 176, 175, 174, 173, 172, 170, 169, 168, 167, 166, 165, 164, 163,
2315     162, 160, 159, 158, 157, 156, 155, 154, 153, 152, 151, 150, 149, 148, 147, 146,
2316     145, 144, 143, 142, 141, 140, 140, 139, 138, 137, 136, 135, 134, 133, 132, 131,
2317     131, 130, 129, 128, 127, 126, 126, 125, 124, 123, 122, 121, 121, 120, 119, 118,
2318     118, 117, 116, 115, 114, 114, 113, 112, 111, 111, 110, 109, 109, 108, 107, 106,
2319     105, 104, 103, 101, 100,  99,  97,  96,  95,  93,  92,  91,  90,  88,  87,  86,
2320     85,  84,  82,  81,  80,  79,  78,  77,  76,  75,  74,  72,  71,  70,  69,  68,
2321     67,  66,  65,  64,  63,  62,  61,  60,  60,  59,  58,  57,  56,  55,  54,  53,
2322     52,  51,  51,  50,  49,  48,  47,  46,  46,  45,  44,  43,  42,  42,  41,  40,
2323     39,  38,  38,  37,  36,  35,  35,  34,  33,  33,  32,  31,  30,  30,  29,  28,
2324     28,  27,  26,  26,  25,  24,  24,  23,  22,  22,  21,  20,  20,  19,  19,  18,
2325     17,  17,  16,  16,  15,  14,  14,  13,  13,  12,  11,  11,  10,  10,   9,   9,
2326     8,   8,   7,   6,   6,   5,   5,   4,   4,   3,   3,   2,   2,   1,   1,   0
2327 };
2328
2329 template <>
2330 uint32_t
2331 fplibRSqrtEstimate(uint32_t op, FPSCR &fpscr)
2332 {
2333     int mode = modeConv(fpscr);
2334     int flags = 0;
2335     int sgn, exp;
2336     uint32_t mnt, result;
2337
2338     fp32_unpack(&sgn, &exp, &mnt, op, mode, &flags);
2339
2340     if (exp == 255 && (uint32_t)(mnt << 9)) {
2341         result = fp32_process_NaN(op, mode, &flags);
2342     } else if (!mnt) {
2343         result = fp32_infinity(sgn);
2344         flags |= FPLIB_DZC;
2345     } else if (sgn) {
2346         result = fp32_defaultNaN();
2347         flags |= FPLIB_IOC;
2348     } else if (exp == 255) {
2349         result = fp32_zero(0);
2350     } else {
2351         exp += 8;
2352         mnt = fp32_normalise(mnt, &exp);
2353         mnt = recip_sqrt_estimate[(~exp & 1) << 7 | (mnt >> 24 & 127)];
2354         result = fp32_pack(0, (380 - exp) >> 1, mnt << 15);
2355     }
2356
2357     set_fpscr0(fpscr, flags);
2358
2359     return result;
2360 }
2361
2362 template <>
2363 uint64_t
2364 fplibRSqrtEstimate(uint64_t op, FPSCR &fpscr)
2365 {
2366     int mode = modeConv(fpscr);
2367     int flags = 0;
2368     int sgn, exp;
2369     uint64_t mnt, result;
2370
2371     fp64_unpack(&sgn, &exp, &mnt, op, mode, &flags);
2372
2373     if (exp == 2047 && (uint64_t)(mnt << 12)) {
2374         result = fp64_process_NaN(op, mode, &flags);
2375     } else if (!mnt) {
2376         result = fp64_infinity(sgn);
2377         flags |= FPLIB_DZC;
2378     } else if (sgn) {
2379         result = fp64_defaultNaN();
2380         flags |= FPLIB_IOC;
2381     } else if (exp == 2047) {
2382         result = fp32_zero(0);
2383     } else {
2384         exp += 11;
2385         mnt = fp64_normalise(mnt, &exp);
2386         mnt = recip_sqrt_estimate[(~exp & 1) << 7 | (mnt >> 56 & 127)];
2387         result = fp64_pack(0, (3068 - exp) >> 1, mnt << 44);
2388     }
2389
2390     set_fpscr0(fpscr, flags);
2391
2392     return result;
2393 }
2394
2395 template <>
2396 uint32_t
2397 fplibRSqrtStepFused(uint32_t op1, uint32_t op2, FPSCR &fpscr)
2398 {
2399     int mode = modeConv(fpscr);
2400     int flags = 0;
2401     int sgn1, exp1, sgn2, exp2;
2402     uint32_t mnt1, mnt2, result;
2403
2404     op1 = fplibNeg<uint32_t>(op1);
2405     fp32_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
2406     fp32_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
2407
2408     result = fp32_process_NaNs(op1, op2, mode, &flags);
2409     if (!result) {
2410         if ((exp1 == 255 && !mnt2) || (exp2 == 255 && !mnt1)) {
2411             result = fp32_FPOnePointFive(0);
2412         } else if (exp1 == 255 || exp2 == 255) {
2413             result = fp32_infinity(sgn1 ^ sgn2);
2414         } else {
2415             result = fp32_muladd(fp32_FPThree(0), op1, op2, -1, mode, &flags);
2416         }
2417     }
2418
2419     set_fpscr0(fpscr, flags);
2420
2421     return result;
2422 }
2423
2424 template <>
2425 uint64_t
2426 fplibRSqrtStepFused(uint64_t op1, uint64_t op2, FPSCR &fpscr)
2427 {
2428     int mode = modeConv(fpscr);
2429     int flags = 0;
2430     int sgn1, exp1, sgn2, exp2;
2431     uint64_t mnt1, mnt2, result;
2432
2433     op1 = fplibNeg<uint64_t>(op1);
2434     fp64_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
2435     fp64_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
2436
2437     result = fp64_process_NaNs(op1, op2, mode, &flags);
2438     if (!result) {
2439         if ((exp1 == 2047 && !mnt2) || (exp2 == 2047 && !mnt1)) {
2440             result = fp64_FPOnePointFive(0);
2441         } else if (exp1 == 2047 || exp2 == 2047) {
2442             result = fp64_infinity(sgn1 ^ sgn2);
2443         } else {
2444             result = fp64_muladd(fp64_FPThree(0), op1, op2, -1, mode, &flags);
2445         }
2446     }
2447
2448     set_fpscr0(fpscr, flags);
2449
2450     return result;
2451 }
2452
2453 template <>
2454 uint32_t
2455 fplibRecipStepFused(uint32_t op1, uint32_t op2, FPSCR &fpscr)
2456 {
2457     int mode = modeConv(fpscr);
2458     int flags = 0;
2459     int sgn1, exp1, sgn2, exp2;
2460     uint32_t mnt1, mnt2, result;
2461
2462     op1 = fplibNeg<uint32_t>(op1);
2463     fp32_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
2464     fp32_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
2465
2466     result = fp32_process_NaNs(op1, op2, mode, &flags);
2467     if (!result) {
2468         if ((exp1 == 255 && !mnt2) || (exp2 == 255 && !mnt1)) {
2469             result = fp32_FPTwo(0);
2470         } else if (exp1 == 255 || exp2 == 255) {
2471             result = fp32_infinity(sgn1 ^ sgn2);
2472         } else {
2473             result = fp32_muladd(fp32_FPTwo(0), op1, op2, 0, mode, &flags);
2474         }
2475     }
2476
2477     set_fpscr0(fpscr, flags);
2478
2479     return result;
2480 }
2481
2482 template <>
2483 uint32_t
2484 fplibRecipEstimate(uint32_t op, FPSCR &fpscr)
2485 {
2486     int mode = modeConv(fpscr);
2487     int flags = 0;
2488     int sgn, exp;
2489     uint32_t mnt, result;
2490
2491     fp32_unpack(&sgn, &exp, &mnt, op, mode, &flags);
2492
2493     if (exp == 255 && (uint32_t)(mnt << 9)) {
2494         result = fp32_process_NaN(op, mode, &flags);
2495     } else if (exp == 255) {
2496         result = fp32_zero(sgn);
2497     } else if (!mnt) {
2498         result = fp32_infinity(sgn);
2499         flags |= FPLIB_DZC;
2500     } else if (!((uint32_t)(op << 1) >> 22)) {
2501         bool overflow_to_inf = false;
2502         switch (FPCRRounding(fpscr)) {
2503           case FPRounding_TIEEVEN:
2504             overflow_to_inf = true;
2505             break;
2506           case FPRounding_POSINF:
2507             overflow_to_inf = !sgn;
2508             break;
2509           case FPRounding_NEGINF:
2510             overflow_to_inf = sgn;
2511             break;
2512           case FPRounding_ZERO:
2513             overflow_to_inf = false;
2514             break;
2515           default:
2516             assert(0);
2517         }
2518         result = overflow_to_inf ? fp32_infinity(sgn) : fp32_max_normal(sgn);
2519         flags |= FPLIB_OFC | FPLIB_IXC;
2520     } else if (fpscr.fz && exp >= 253) {
2521         result = fp32_zero(sgn);
2522         flags |= FPLIB_UFC;
2523     } else {
2524         exp += 8;
2525         mnt = fp32_normalise(mnt, &exp);
2526         int result_exp = 253 - exp;
2527         uint32_t fraction = (((uint32_t)1 << 19) / (mnt >> 22 | 1) + 1) >> 1;
2528         fraction <<= 15;
2529         if (result_exp == 0) {
2530             fraction >>= 1;
2531         } else if (result_exp == -1) {
2532             fraction >>= 2;
2533             result_exp = 0;
2534         }
2535         result = fp32_pack(sgn, result_exp, fraction);
2536     }
2537
2538     set_fpscr0(fpscr, flags);
2539
2540     return result;
2541 }
2542
2543 template <>
2544 uint64_t
2545 fplibRecipEstimate(uint64_t op, FPSCR &fpscr)
2546 {
2547     int mode = modeConv(fpscr);
2548     int flags = 0;
2549     int sgn, exp;
2550     uint64_t mnt, result;
2551
2552     fp64_unpack(&sgn, &exp, &mnt, op, mode, &flags);
2553
2554     if (exp == 2047 && (uint64_t)(mnt << 12)) {
2555         result = fp64_process_NaN(op, mode, &flags);
2556     } else if (exp == 2047) {
2557         result = fp64_zero(sgn);
2558     } else if (!mnt) {
2559         result = fp64_infinity(sgn);
2560         flags |= FPLIB_DZC;
2561     } else if (!((uint64_t)(op << 1) >> 51)) {
2562         bool overflow_to_inf = false;
2563         switch (FPCRRounding(fpscr)) {
2564           case FPRounding_TIEEVEN:
2565             overflow_to_inf = true;
2566             break;
2567           case FPRounding_POSINF:
2568             overflow_to_inf = !sgn;
2569             break;
2570           case FPRounding_NEGINF:
2571             overflow_to_inf = sgn;
2572             break;
2573           case FPRounding_ZERO:
2574             overflow_to_inf = false;
2575             break;
2576           default:
2577             assert(0);
2578         }
2579         result = overflow_to_inf ? fp64_infinity(sgn) : fp64_max_normal(sgn);
2580         flags |= FPLIB_OFC | FPLIB_IXC;
2581     } else if (fpscr.fz && exp >= 2045) {
2582         result = fp64_zero(sgn);
2583         flags |= FPLIB_UFC;
2584     } else {
2585         exp += 11;
2586         mnt = fp64_normalise(mnt, &exp);
2587         int result_exp = 2045 - exp;
2588         uint64_t fraction = (((uint32_t)1 << 19) / (mnt >> 54 | 1) + 1) >> 1;
2589         fraction <<= 44;
2590         if (result_exp == 0) {
2591             fraction >>= 1;
2592         } else if (result_exp == -1) {
2593             fraction >>= 2;
2594             result_exp = 0;
2595         }
2596         result = fp64_pack(sgn, result_exp, fraction);
2597     }
2598
2599     set_fpscr0(fpscr, flags);
2600
2601     return result;
2602 }
2603
2604 template <>
2605 uint64_t
2606 fplibRecipStepFused(uint64_t op1, uint64_t op2, FPSCR &fpscr)
2607 {
2608     int mode = modeConv(fpscr);
2609     int flags = 0;
2610     int sgn1, exp1, sgn2, exp2;
2611     uint64_t mnt1, mnt2, result;
2612
2613     op1 = fplibNeg<uint64_t>(op1);
2614     fp64_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
2615     fp64_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
2616
2617     result = fp64_process_NaNs(op1, op2, mode, &flags);
2618     if (!result) {
2619         if ((exp1 == 2047 && !mnt2) || (exp2 == 2047 && !mnt1)) {
2620             result = fp64_FPTwo(0);
2621         } else if (exp1 == 2047 || exp2 == 2047) {
2622             result = fp64_infinity(sgn1 ^ sgn2);
2623         } else {
2624             result = fp64_muladd(fp64_FPTwo(0), op1, op2, 0, mode, &flags);
2625         }
2626     }
2627
2628     set_fpscr0(fpscr, flags);
2629
2630     return result;
2631 }
2632
2633 template <>
2634 uint32_t
2635 fplibRecpX(uint32_t op, FPSCR &fpscr)
2636 {
2637     int mode = modeConv(fpscr);
2638     int flags = 0;
2639     int sgn, exp;
2640     uint32_t mnt, result;
2641
2642     fp32_unpack(&sgn, &exp, &mnt, op, mode, &flags);
2643
2644     if (exp == 255 && (uint32_t)(mnt << 9)) {
2645         result = fp32_process_NaN(op, mode, &flags);
2646     }
2647     else {
2648         if (!mnt) { // Zero and denormals
2649             result = fp32_pack(sgn, 254, 0);
2650         } else { // Infinities and normals
2651             result = fp32_pack(sgn, exp ^ 255, 0);
2652         }
2653     }
2654
2655     set_fpscr0(fpscr, flags);
2656
2657     return result;
2658 }
2659
2660 template <>
2661 uint64_t
2662 fplibRecpX(uint64_t op, FPSCR &fpscr)
2663 {
2664     int mode = modeConv(fpscr);
2665     int flags = 0;
2666     int sgn, exp;
2667     uint64_t mnt, result;
2668
2669     fp64_unpack(&sgn, &exp, &mnt, op, mode, &flags);
2670
2671     if (exp == 2047 && (uint64_t)(mnt << 12)) {
2672         result = fp64_process_NaN(op, mode, &flags);
2673     }
2674     else {
2675         if (!mnt) { // Zero and denormals
2676             result = fp64_pack(sgn, 2046, 0);
2677         } else { // Infinities and normals
2678             result = fp64_pack(sgn, exp ^ 2047, 0);
2679         }
2680     }
2681
2682     set_fpscr0(fpscr, flags);
2683
2684     return result;
2685 }
2686
2687 template <>
2688 uint32_t
2689 fplibRoundInt(uint32_t op, FPRounding rounding, bool exact, FPSCR &fpscr)
2690 {
2691     int mode = modeConv(fpscr);
2692     int flags = 0;
2693     int sgn, exp;
2694     uint32_t mnt, result;
2695
2696     // Unpack using FPCR to determine if subnormals are flushed-to-zero:
2697     fp32_unpack(&sgn, &exp, &mnt, op, mode, &flags);
2698
2699     // Handle NaNs, infinities and zeroes:
2700     if (exp == 255 && (uint32_t)(mnt << 9)) {
2701         result = fp32_process_NaN(op, mode, &flags);
2702     } else if (exp == 255) {
2703         result = fp32_infinity(sgn);
2704     } else if (!mnt) {
2705         result = fp32_zero(sgn);
2706     } else if (exp >= 150) {
2707         // There are no fractional bits
2708         result = op;
2709     } else {
2710         // Truncate towards zero:
2711         uint32_t x = 150 - exp >= 32 ? 0 : mnt >> (150 - exp);
2712         int err = exp < 118 ? 1 :
2713             (mnt << 1 >> (149 - exp) & 3) | (mnt << 2 << (exp - 118) != 0);
2714         switch (rounding) {
2715           case FPRounding_TIEEVEN:
2716             x += (err == 3 || (err == 2 && (x & 1)));
2717             break;
2718           case FPRounding_POSINF:
2719             x += err && !sgn;
2720             break;
2721           case FPRounding_NEGINF:
2722             x += err && sgn;
2723             break;
2724           case FPRounding_ZERO:
2725             break;
2726           case FPRounding_TIEAWAY:
2727             x += err >> 1;
2728             break;
2729           default:
2730             assert(0);
2731         }
2732
2733         if (x == 0) {
2734             result = fp32_zero(sgn);
2735         } else {
2736             exp = 150;
2737             mnt = fp32_normalise(x, &exp);
2738             result = fp32_pack(sgn, exp + 8, mnt >> 8);
2739         }
2740
2741         if (err && exact)
2742             flags |= FPLIB_IXC;
2743     }
2744
2745     set_fpscr0(fpscr, flags);
2746
2747     return result;
2748 }
2749
2750 template <>
2751 uint64_t
2752 fplibRoundInt(uint64_t op, FPRounding rounding, bool exact, FPSCR &fpscr)
2753 {
2754     int mode = modeConv(fpscr);
2755     int flags = 0;
2756     int sgn, exp;
2757     uint64_t mnt, result;
2758
2759     // Unpack using FPCR to determine if subnormals are flushed-to-zero:
2760     fp64_unpack(&sgn, &exp, &mnt, op, mode, &flags);
2761
2762     // Handle NaNs, infinities and zeroes:
2763     if (exp == 2047 && (uint64_t)(mnt << 12)) {
2764         result = fp64_process_NaN(op, mode, &flags);
2765     } else if (exp == 2047) {
2766         result = fp64_infinity(sgn);
2767     } else if (!mnt) {
2768         result = fp64_zero(sgn);
2769     } else if (exp >= 1075) {
2770         // There are no fractional bits
2771         result = op;
2772     } else {
2773         // Truncate towards zero:
2774         uint64_t x = 1075 - exp >= 64 ? 0 : mnt >> (1075 - exp);
2775         int err = exp < 1011 ? 1 :
2776             (mnt << 1 >> (1074 - exp) & 3) | (mnt << 2 << (exp - 1011) != 0);
2777         switch (rounding) {
2778           case FPRounding_TIEEVEN:
2779             x += (err == 3 || (err == 2 && (x & 1)));
2780             break;
2781           case FPRounding_POSINF:
2782             x += err && !sgn;
2783             break;
2784           case FPRounding_NEGINF:
2785             x += err && sgn;
2786             break;
2787           case FPRounding_ZERO:
2788             break;
2789           case FPRounding_TIEAWAY:
2790             x += err >> 1;
2791             break;
2792           default:
2793             assert(0);
2794         }
2795
2796         if (x == 0) {
2797             result = fp64_zero(sgn);
2798         } else {
2799             exp = 1075;
2800             mnt = fp64_normalise(x, &exp);
2801             result = fp64_pack(sgn, exp + 11, mnt >> 11);
2802         }
2803
2804         if (err && exact)
2805             flags |= FPLIB_IXC;
2806     }
2807
2808     set_fpscr0(fpscr, flags);
2809
2810     return result;
2811 }
2812
2813 template <>
2814 uint32_t
2815 fplibSqrt(uint32_t op, FPSCR &fpscr)
2816 {
2817     int flags = 0;
2818     uint32_t result = fp32_sqrt(op, modeConv(fpscr), &flags);
2819     set_fpscr0(fpscr, flags);
2820     return result;
2821 }
2822
2823 template <>
2824 uint64_t
2825 fplibSqrt(uint64_t op, FPSCR &fpscr)
2826 {
2827     int flags = 0;
2828     uint64_t result = fp64_sqrt(op, modeConv(fpscr), &flags);
2829     set_fpscr0(fpscr, flags);
2830     return result;
2831 }
2832
2833 template <>
2834 uint32_t
2835 fplibSub(uint32_t op1, uint32_t op2, FPSCR &fpscr)
2836 {
2837     int flags = 0;
2838     uint32_t result = fp32_add(op1, op2, 1, modeConv(fpscr), &flags);
2839     set_fpscr0(fpscr, flags);
2840     return result;
2841 }
2842
2843 template <>
2844 uint64_t
2845 fplibSub(uint64_t op1, uint64_t op2, FPSCR &fpscr)
2846 {
2847     int flags = 0;
2848     uint64_t result = fp64_add(op1, op2, 1, modeConv(fpscr), &flags);
2849     set_fpscr0(fpscr, flags);
2850     return result;
2851 }
2852
2853 static uint64_t
2854 FPToFixed_64(int sgn, int exp, uint64_t mnt, bool u, FPRounding rounding,
2855              int *flags)
2856 {
2857     uint64_t x;
2858     int err;
2859
2860     if (exp > 1023 + 63) {
2861         *flags = FPLIB_IOC;
2862         return ((uint64_t)!u << 63) - !sgn;
2863     }
2864
2865     x = lsr64(mnt << 11, 1023 + 63 - exp);
2866     err = (exp > 1023 + 63 - 2 ? 0 :
2867            (lsr64(mnt << 11, 1023 + 63 - 2 - exp) & 3) |
2868            !!(mnt << 11 & (lsl64(1, 1023 + 63 - 2 - exp) - 1)));
2869
2870     switch (rounding) {
2871       case FPRounding_TIEEVEN:
2872         x += (err == 3 || (err == 2 && (x & 1)));
2873         break;
2874       case FPRounding_POSINF:
2875         x += err && !sgn;
2876         break;
2877       case FPRounding_NEGINF:
2878         x += err && sgn;
2879         break;
2880       case FPRounding_ZERO:
2881         break;
2882       case FPRounding_TIEAWAY:
2883         x += err >> 1;
2884         break;
2885       default:
2886         assert(0);
2887     }
2888
2889     if (u ? sgn && x : x > ((uint64_t)1 << 63) - !sgn) {
2890         *flags = FPLIB_IOC;
2891         return ((uint64_t)!u << 63) - !sgn;
2892     }
2893
2894     if (err) {
2895         *flags = FPLIB_IXC;
2896     }
2897
2898     return sgn ? -x : x;
2899 }
2900
2901 static uint32_t
2902 FPToFixed_32(int sgn, int exp, uint64_t mnt, bool u, FPRounding rounding,
2903              int *flags)
2904 {
2905     uint64_t x = FPToFixed_64(sgn, exp, mnt, u, rounding, flags);
2906     if (u ? x >= (uint64_t)1 << 32 :
2907         !(x < (uint64_t)1 << 31 ||
2908           (uint64_t)-x <= (uint64_t)1 << 31)) {
2909         *flags = FPLIB_IOC;
2910         x = ((uint32_t)!u << 31) - !sgn;
2911     }
2912     return x;
2913 }
2914
2915 template <>
2916 uint32_t
2917 fplibFPToFixed(uint32_t op, int fbits, bool u, FPRounding rounding, FPSCR &fpscr)
2918 {
2919     int flags = 0;
2920     int sgn, exp;
2921     uint32_t mnt, result;
2922
2923     // Unpack using FPCR to determine if subnormals are flushed-to-zero:
2924     fp32_unpack(&sgn, &exp, &mnt, op, modeConv(fpscr), &flags);
2925
2926     // If NaN, set cumulative flag or take exception:
2927     if (exp == 255 && (uint32_t)(mnt << 9)) {
2928         flags = FPLIB_IOC;
2929         result = 0;
2930     } else {
2931         result = FPToFixed_32(sgn, exp + 1023 - 127 + fbits,
2932                               (uint64_t)mnt << (52 - 23), u, rounding, &flags);
2933     }
2934
2935     set_fpscr0(fpscr, flags);
2936
2937     return result;
2938 }
2939
2940 template <>
2941 uint32_t
2942 fplibFPToFixed(uint64_t op, int fbits, bool u, FPRounding rounding, FPSCR &fpscr)
2943 {
2944     int flags = 0;
2945     int sgn, exp;
2946     uint64_t mnt;
2947     uint32_t result;
2948
2949     // Unpack using FPCR to determine if subnormals are flushed-to-zero:
2950     fp64_unpack(&sgn, &exp, &mnt, op, modeConv(fpscr), &flags);
2951
2952     // If NaN, set cumulative flag or take exception:
2953     if (exp == 2047 && (uint64_t)(mnt << 12)) {
2954         flags = FPLIB_IOC;
2955         result = 0;
2956     } else {
2957         result = FPToFixed_32(sgn, exp + fbits, mnt, u, rounding, &flags);
2958     }
2959
2960     set_fpscr0(fpscr, flags);
2961
2962     return result;
2963 }
2964
2965 template <>
2966 uint64_t
2967 fplibFPToFixed(uint32_t op, int fbits, bool u, FPRounding rounding, FPSCR &fpscr)
2968 {
2969     int flags = 0;
2970     int sgn, exp;
2971     uint32_t mnt;
2972     uint64_t result;
2973
2974     // Unpack using FPCR to determine if subnormals are flushed-to-zero:
2975     fp32_unpack(&sgn, &exp, &mnt, op, modeConv(fpscr), &flags);
2976
2977     // If NaN, set cumulative flag or take exception:
2978     if (exp == 255 && (uint32_t)(mnt << 9)) {
2979         flags = FPLIB_IOC;
2980         result = 0;
2981     } else {
2982         result = FPToFixed_64(sgn, exp + 1023 - 127 + fbits,
2983                               (uint64_t)mnt << (52 - 23), u, rounding, &flags);
2984     }
2985
2986     set_fpscr0(fpscr, flags);
2987
2988     return result;
2989 }
2990
2991 template <>
2992 uint64_t
2993 fplibFPToFixed(uint64_t op, int fbits, bool u, FPRounding rounding, FPSCR &fpscr)
2994 {
2995     int flags = 0;
2996     int sgn, exp;
2997     uint64_t mnt, result;
2998
2999     // Unpack using FPCR to determine if subnormals are flushed-to-zero:
3000     fp64_unpack(&sgn, &exp, &mnt, op, modeConv(fpscr), &flags);
3001
3002     // If NaN, set cumulative flag or take exception:
3003     if (exp == 2047 && (uint64_t)(mnt << 12)) {
3004         flags = FPLIB_IOC;
3005         result = 0;
3006     } else {
3007         result = FPToFixed_64(sgn, exp + fbits, mnt, u, rounding, &flags);
3008     }
3009
3010     set_fpscr0(fpscr, flags);
3011
3012     return result;
3013 }
3014
3015 static uint32_t
3016 fp32_cvtf(uint64_t a, int fbits, int u, int mode, int *flags)
3017 {
3018     int x_sgn = !u && a >> 63;
3019     int x_exp = 190 - fbits;
3020     uint64_t x_mnt = x_sgn ? -a : a;
3021
3022     // Handle zero:
3023     if (!x_mnt) {
3024         return fp32_zero(0);
3025     }
3026
3027     // Normalise and convert to 32 bits, collapsing error into bottom bit:
3028     x_mnt = fp64_normalise(x_mnt, &x_exp);
3029     x_mnt = x_mnt >> 31 | !!(uint32_t)(x_mnt << 1);
3030
3031     return fp32_round(x_sgn, x_exp, x_mnt, mode, flags);
3032 }
3033
3034 static uint64_t
3035 fp64_cvtf(uint64_t a, int fbits, int u, int mode, int *flags)
3036 {
3037     int x_sgn = !u && a >> 63;
3038     int x_exp = 1024 + 62 - fbits;
3039     uint64_t x_mnt = x_sgn ? -a : a;
3040
3041     // Handle zero:
3042     if (!x_mnt) {
3043         return fp64_zero(0);
3044     }
3045
3046     x_mnt = fp64_normalise(x_mnt, &x_exp);
3047
3048     return fp64_round(x_sgn, x_exp, x_mnt << 1, mode, flags);
3049 }
3050
3051 template <>
3052 uint32_t
3053 fplibFixedToFP(uint64_t op, int fbits, bool u, FPRounding rounding, FPSCR &fpscr)
3054 {
3055     int flags = 0;
3056     uint32_t res = fp32_cvtf(op, fbits, u,
3057                              (int)rounding | ((uint32_t)fpscr >> 22 & 12),
3058                              &flags);
3059     set_fpscr0(fpscr, flags);
3060     return res;
3061 }
3062
3063 template <>
3064 uint64_t
3065 fplibFixedToFP(uint64_t op, int fbits, bool u, FPRounding rounding, FPSCR &fpscr)
3066 {
3067     int flags = 0;
3068     uint64_t res = fp64_cvtf(op, fbits, u,
3069                              (int)rounding | ((uint32_t)fpscr >> 22 & 12),
3070                              &flags);
3071     set_fpscr0(fpscr, flags);
3072     return res;
3073 }
3074
3075 }