src/arch/arm/insts/vfp.hh

   1 /*
   2  * Copyright (c) 2010-2013 ARM Limited
   3  * All rights reserved
   4  *
   5  * The license below extends only to copyright in the software and shall
   6  * not be construed as granting a license to any other intellectual
   7  * property including but not limited to intellectual property relating
   8  * to a hardware implementation of the functionality of the software
   9  * licensed hereunder.  You may use the software subject to the license
  10  * terms below provided that you ensure that this notice is replicated
  11  * unmodified and in its entirety in all distributions of the software,
  12  * modified or unmodified, in source code or in binary form.
  13  *
  14  * Redistribution and use in source and binary forms, with or without
  15  * modification, are permitted provided that the following conditions are
  16  * met: redistributions of source code must retain the above copyright
  17  * notice, this list of conditions and the following disclaimer;
  18  * redistributions in binary form must reproduce the above copyright
  19  * notice, this list of conditions and the following disclaimer in the
  20  * documentation and/or other materials provided with the distribution;
  21  * neither the name of the copyright holders nor the names of its
  22  * contributors may be used to endorse or promote products derived from
  23  * this software without specific prior written permission.
  24  *
  25  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  26  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  27  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  28  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  29  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  30  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  31  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  32  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  33  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  34  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  35  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  36  *
  37  * Authors: Gabe Black
  38  */
  39
  40 #ifndef __ARCH_ARM_INSTS_VFP_HH__
  41 #define __ARCH_ARM_INSTS_VFP_HH__
  42
  43 #include <fenv.h>
  44
  45 #include <cmath>
  46
  47 #include "arch/arm/insts/misc.hh"
  48 #include "arch/arm/miscregs.hh"
  49
  50 namespace ArmISA
  51 {
  52
  53 enum VfpMicroMode {
  54     VfpNotAMicroop,
  55     VfpMicroop,
  56     VfpFirstMicroop,
  57     VfpLastMicroop
  58 };
  59
  60 template<class T>
  61 static inline void
  62 setVfpMicroFlags(VfpMicroMode mode, T &flags)
  63 {
  64     switch (mode) {
  65       case VfpMicroop:
  66         flags[StaticInst::IsMicroop] = true;
  67         break;
  68       case VfpFirstMicroop:
  69         flags[StaticInst::IsMicroop] =
  70             flags[StaticInst::IsFirstMicroop] = true;
  71         break;
  72       case VfpLastMicroop:
  73         flags[StaticInst::IsMicroop] =
  74             flags[StaticInst::IsLastMicroop] = true;
  75         break;
  76       case VfpNotAMicroop:
  77         break;
  78     }
  79     if (mode == VfpMicroop || mode == VfpFirstMicroop) {
  80         flags[StaticInst::IsDelayedCommit] = true;
  81     }
  82 }
  83
  84 enum FeExceptionBit
  85 {
  86     FeDivByZero = FE_DIVBYZERO,
  87     FeInexact = FE_INEXACT,
  88     FeInvalid = FE_INVALID,
  89     FeOverflow = FE_OVERFLOW,
  90     FeUnderflow = FE_UNDERFLOW,
  91     FeAllExceptions = FE_ALL_EXCEPT
  92 };
  93
  94 enum FeRoundingMode
  95 {
  96     FeRoundDown = FE_DOWNWARD,
  97     FeRoundNearest = FE_TONEAREST,
  98     FeRoundZero = FE_TOWARDZERO,
  99     FeRoundUpward = FE_UPWARD
 100 };
 101
 102 enum VfpRoundingMode
 103 {
 104     VfpRoundNearest = 0,
 105     VfpRoundUpward = 1,
 106     VfpRoundDown = 2,
 107     VfpRoundZero = 3,
 108     VfpRoundAway = 4
 109 };
 110
 111 static inline float bitsToFp(uint64_t, float);
 112 static inline double bitsToFp(uint64_t, double);
 113 static inline uint32_t fpToBits(float);
 114 static inline uint64_t fpToBits(double);
 115
 116 template <class fpType>
 117 static inline bool
 118 flushToZero(fpType &op)
 119 {
 120     fpType junk = 0.0;
 121     if (std::fpclassify(op) == FP_SUBNORMAL) {
 122         uint64_t bitMask = ULL(0x1) << (sizeof(fpType) * 8 - 1);
 123         op = bitsToFp(fpToBits(op) & bitMask, junk);
 124         return true;
 125     }
 126     return false;
 127 }
 128
 129 template <class fpType>
 130 static inline bool
 131 flushToZero(fpType &op1, fpType &op2)
 132 {
 133     bool flush1 = flushToZero(op1);
 134     bool flush2 = flushToZero(op2);
 135     return flush1 || flush2;
 136 }
 137
 138 template <class fpType>
 139 static inline void
 140 vfpFlushToZero(FPSCR &fpscr, fpType &op)
 141 {
 142     if (fpscr.fz == 1 && flushToZero(op)) {
 143         fpscr.idc = 1;
 144     }
 145 }
 146
 147 template <class fpType>
 148 static inline void
 149 vfpFlushToZero(FPSCR &fpscr, fpType &op1, fpType &op2)
 150 {
 151     vfpFlushToZero(fpscr, op1);
 152     vfpFlushToZero(fpscr, op2);
 153 }
 154
 155 static inline uint32_t
 156 fpToBits(float fp)
 157 {
 158     union
 159     {
 160         float fp;
 161         uint32_t bits;
 162     } val;
 163     val.fp = fp;
 164     return val.bits;
 165 }
 166
 167 static inline uint64_t
 168 fpToBits(double fp)
 169 {
 170     union
 171     {
 172         double fp;
 173         uint64_t bits;
 174     } val;
 175     val.fp = fp;
 176     return val.bits;
 177 }
 178
 179 static inline float
 180 bitsToFp(uint64_t bits, float junk)
 181 {
 182     union
 183     {
 184         float fp;
 185         uint32_t bits;
 186     } val;
 187     val.bits = bits;
 188     return val.fp;
 189 }
 190
 191 static inline double
 192 bitsToFp(uint64_t bits, double junk)
 193 {
 194     union
 195     {
 196         double fp;
 197         uint64_t bits;
 198     } val;
 199     val.bits = bits;
 200     return val.fp;
 201 }
 202
 203 template <class fpType>
 204 static bool
 205 isSnan(fpType val)
 206 {
 207     const bool single = (sizeof(fpType) == sizeof(float));
 208     const uint64_t qnan =
 209         single ? 0x7fc00000 : ULL(0x7ff8000000000000);
 210     return std::isnan(val) && ((fpToBits(val) & qnan) != qnan);
 211 }
 212
 213 typedef int VfpSavedState;
 214
 215 VfpSavedState prepFpState(uint32_t rMode);
 216 void finishVfp(FPSCR &fpscr, VfpSavedState state, bool flush, FPSCR mask = FpscrExcMask);
 217
 218 template <class fpType>
 219 fpType fixDest(FPSCR fpscr, fpType val, fpType op1);
 220
 221 template <class fpType>
 222 fpType fixDest(FPSCR fpscr, fpType val, fpType op1, fpType op2);
 223
 224 template <class fpType>
 225 fpType fixDivDest(FPSCR fpscr, fpType val, fpType op1, fpType op2);
 226
 227 float fixFpDFpSDest(FPSCR fpscr, double val);
 228 double fixFpSFpDDest(FPSCR fpscr, float val);
 229
 230 uint16_t vcvtFpSFpH(FPSCR &fpscr, bool flush, bool defaultNan,
 231                     uint32_t rMode, bool ahp, float op);
 232 uint16_t vcvtFpDFpH(FPSCR &fpscr, bool flush, bool defaultNan,
 233                     uint32_t rMode, bool ahp, double op);
 234
 235 float  vcvtFpHFpS(FPSCR &fpscr, bool defaultNan, bool ahp, uint16_t op);
 236 double vcvtFpHFpD(FPSCR &fpscr, bool defaultNan, bool ahp, uint16_t op);
 237
 238 static inline double
 239 makeDouble(uint32_t low, uint32_t high)
 240 {
 241     double junk = 0.0;
 242     return bitsToFp((uint64_t)low | ((uint64_t)high << 32), junk);
 243 }
 244
 245 static inline uint32_t
 246 lowFromDouble(double val)
 247 {
 248     return fpToBits(val);
 249 }
 250
 251 static inline uint32_t
 252 highFromDouble(double val)
 253 {
 254     return fpToBits(val) >> 32;
 255 }
 256
 257 static inline void
 258 setFPExceptions(int exceptions) {
 259     feclearexcept(FeAllExceptions);
 260     feraiseexcept(exceptions);
 261 }
 262
 263 template <typename T>
 264 uint64_t
 265 vfpFpToFixed(T val, bool isSigned, uint8_t width, uint8_t imm, bool
 266              useRmode = true, VfpRoundingMode roundMode = VfpRoundZero,
 267              bool aarch64 = false)
 268 {
 269     int  rmode;
 270     bool roundAwayFix = false;
 271
 272     if (!useRmode) {
 273         rmode = fegetround();
 274     } else {
 275         switch (roundMode)
 276         {
 277           case VfpRoundNearest:
 278             rmode = FeRoundNearest;
 279             break;
 280           case VfpRoundUpward:
 281             rmode = FeRoundUpward;
 282             break;
 283           case VfpRoundDown:
 284             rmode = FeRoundDown;
 285             break;
 286           case VfpRoundZero:
 287             rmode = FeRoundZero;
 288             break;
 289           case VfpRoundAway:
 290             // There is no equivalent rounding mode, use round down and we'll
 291             // fix it later
 292             rmode        = FeRoundDown;
 293             roundAwayFix = true;
 294             break;
 295           default:
 296             panic("Unsupported roundMode %d\n", roundMode);
 297         }
 298     }
 299     __asm__ __volatile__("" : "=m" (rmode) : "m" (rmode));
 300     fesetround(FeRoundNearest);
 301     val = val * pow(2.0, imm);
 302     __asm__ __volatile__("" : "=m" (val) : "m" (val));
 303     fesetround(rmode);
 304     feclearexcept(FeAllExceptions);
 305     __asm__ __volatile__("" : "=m" (val) : "m" (val));
 306     T origVal = val;
 307     val = rint(val);
 308     __asm__ __volatile__("" : "=m" (val) : "m" (val));
 309
 310     int exceptions = fetestexcept(FeAllExceptions);
 311
 312     int fpType = std::fpclassify(val);
 313     if (fpType == FP_SUBNORMAL || fpType == FP_NAN) {
 314         if (fpType == FP_NAN) {
 315             exceptions |= FeInvalid;
 316         }
 317         val = 0.0;
 318     } else if (origVal != val) {
 319         switch (rmode) {
 320           case FeRoundNearest:
 321             if (origVal - val > 0.5)
 322                 val += 1.0;
 323             else if (val - origVal > 0.5)
 324                 val -= 1.0;
 325             break;
 326           case FeRoundDown:
 327             if (roundAwayFix) {
 328                 // The ordering on the subtraction looks a bit odd in that we
 329                 // don't do the obvious origVal - val, instead we do
 330                 // -(val - origVal). This is required to get the corruct bit
 331                 // exact behaviour when very close to the 0.5 threshold.
 332                 volatile T error = val;
 333                 error -= origVal;
 334                 error = -error;
 335                 if ( (error >  0.5) ||
 336                     ((error == 0.5) && (val >= 0)) )
 337                     val += 1.0;
 338             } else {
 339                 if (origVal < val)
 340                     val -= 1.0;
 341             }
 342             break;
 343           case FeRoundUpward:
 344             if (origVal > val)
 345                 val += 1.0;
 346             break;
 347         }
 348         exceptions |= FeInexact;
 349     }
 350
 351     __asm__ __volatile__("" : "=m" (val) : "m" (val));
 352
 353     if (isSigned) {
 354         bool     outOfRange = false;
 355         int64_t  result     = (int64_t) val;
 356         uint64_t finalVal;
 357
 358         if (!aarch64) {
 359             if (width == 16) {
 360                 finalVal = (int16_t)val;
 361             } else if (width == 32) {
 362                 finalVal =(int32_t)val;
 363             } else if (width == 64) {
 364                 finalVal = result;
 365             } else {
 366                 panic("Unsupported width %d\n", width);
 367             }
 368
 369             // check if value is in range
 370             int64_t minVal = ~mask(width-1);
 371             if ((double)val < minVal) {
 372                 outOfRange = true;
 373                 finalVal = minVal;
 374             }
 375             int64_t maxVal = mask(width-1);
 376             if ((double)val > maxVal) {
 377                 outOfRange = true;
 378                 finalVal = maxVal;
 379             }
 380         } else {
 381             bool isNeg = val < 0;
 382             finalVal = result & mask(width);
 383             // If the result is supposed to be less than 64 bits check that the
 384             // upper bits that got thrown away are just sign extension bits
 385             if (width != 64) {
 386                 outOfRange = ((uint64_t) result >> (width - 1)) !=
 387                              (isNeg ? mask(64-width+1) : 0);
 388             }
 389             // Check if the original floating point value doesn't matches the
 390             // integer version we are also out of range. So create a saturated
 391             // result.
 392             if (isNeg) {
 393                 outOfRange |= val < result;
 394                 if (outOfRange) {
 395                     finalVal = 1LL << (width-1);
 396                 }
 397             } else {
 398                 outOfRange |= val > result;
 399                 if (outOfRange) {
 400                     finalVal = mask(width-1);
 401                 }
 402             }
 403         }
 404
 405         // Raise an exception if the value was out of range
 406         if (outOfRange) {
 407             exceptions |= FeInvalid;
 408             exceptions &= ~FeInexact;
 409         }
 410         setFPExceptions(exceptions);
 411         return finalVal;
 412     } else {
 413         if ((double)val < 0) {
 414             exceptions |= FeInvalid;
 415             exceptions &= ~FeInexact;
 416             setFPExceptions(exceptions);
 417             return 0;
 418         }
 419
 420         uint64_t result = ((uint64_t) val) & mask(width);
 421         if (val > result) {
 422             exceptions |= FeInvalid;
 423             exceptions &= ~FeInexact;
 424             setFPExceptions(exceptions);
 425             return mask(width);
 426         }
 427
 428         setFPExceptions(exceptions);
 429         return result;
 430     }
 431 };
 432
 433
 434 float vfpUFixedToFpS(bool flush, bool defaultNan,
 435         uint64_t val, uint8_t width, uint8_t imm);
 436 float vfpSFixedToFpS(bool flush, bool defaultNan,
 437         int64_t val, uint8_t width, uint8_t imm);
 438
 439 double vfpUFixedToFpD(bool flush, bool defaultNan,
 440         uint64_t val, uint8_t width, uint8_t imm);
 441 double vfpSFixedToFpD(bool flush, bool defaultNan,
 442         int64_t val, uint8_t width, uint8_t imm);
 443
 444 float fprSqrtEstimate(FPSCR &fpscr, float op);
 445 uint32_t unsignedRSqrtEstimate(uint32_t op);
 446
 447 float fpRecipEstimate(FPSCR &fpscr, float op);
 448 uint32_t unsignedRecipEstimate(uint32_t op);
 449
 450 class VfpMacroOp : public PredMacroOp
 451 {
 452   public:
 453     static bool
 454     inScalarBank(IntRegIndex idx)
 455     {
 456         return (idx % 32) < 8;
 457     }
 458
 459   protected:
 460     bool wide;
 461
 462     VfpMacroOp(const char *mnem, ExtMachInst _machInst,
 463             OpClass __opClass, bool _wide) :
 464         PredMacroOp(mnem, _machInst, __opClass), wide(_wide)
 465     {}
 466
 467     IntRegIndex addStride(IntRegIndex idx, unsigned stride);
 468     void nextIdxs(IntRegIndex &dest, IntRegIndex &op1, IntRegIndex &op2);
 469     void nextIdxs(IntRegIndex &dest, IntRegIndex &op1);
 470     void nextIdxs(IntRegIndex &dest);
 471 };
 472
 473 template <typename T>
 474 static inline T
 475 fpAdd(T a, T b)
 476 {
 477     return a + b;
 478 };
 479
 480 template <typename T>
 481 static inline T
 482 fpSub(T a, T b)
 483 {
 484     return a - b;
 485 };
 486
 487 static inline float
 488 fpAddS(float a, float b)
 489 {
 490     return a + b;
 491 }
 492
 493 static inline double
 494 fpAddD(double a, double b)
 495 {
 496     return a + b;
 497 }
 498
 499 static inline float
 500 fpSubS(float a, float b)
 501 {
 502     return a - b;
 503 }
 504
 505 static inline double
 506 fpSubD(double a, double b)
 507 {
 508     return a - b;
 509 }
 510
 511 static inline float
 512 fpDivS(float a, float b)
 513 {
 514     return a / b;
 515 }
 516
 517 static inline double
 518 fpDivD(double a, double b)
 519 {
 520     return a / b;
 521 }
 522
 523 template <typename T>
 524 static inline T
 525 fpDiv(T a, T b)
 526 {
 527     return a / b;
 528 };
 529
 530 template <typename T>
 531 static inline T
 532 fpMulX(T a, T b)
 533 {
 534     uint64_t opData;
 535     uint32_t sign1;
 536     uint32_t sign2;
 537     const bool single = (sizeof(T) == sizeof(float));
 538     if (single) {
 539         opData = (fpToBits(a));
 540         sign1 = opData>>31;
 541         opData = (fpToBits(b));
 542         sign2 = opData>>31;
 543     } else {
 544         opData = (fpToBits(a));
 545         sign1 = opData>>63;
 546         opData = (fpToBits(b));
 547         sign2 = opData>>63;
 548     }
 549     bool inf1 = (std::fpclassify(a) == FP_INFINITE);
 550     bool inf2 = (std::fpclassify(b) == FP_INFINITE);
 551     bool zero1 = (std::fpclassify(a) == FP_ZERO);
 552     bool zero2 = (std::fpclassify(b) == FP_ZERO);
 553     if ((inf1 && zero2) || (zero1 && inf2)) {
 554         if (sign1 ^ sign2)
 555             return (T)(-2.0);
 556         else
 557             return (T)(2.0);
 558     } else {
 559         return (a * b);
 560     }
 561 };
 562
 563
 564 template <typename T>
 565 static inline T
 566 fpMul(T a, T b)
 567 {
 568     return a * b;
 569 };
 570
 571 static inline float
 572 fpMulS(float a, float b)
 573 {
 574     return a * b;
 575 }
 576
 577 static inline double
 578 fpMulD(double a, double b)
 579 {
 580     return a * b;
 581 }
 582
 583 template <typename T>
 584 static inline T
 585 // @todo remove this when all calls to it have been replaced with the new fplib implementation
 586 fpMulAdd(T op1, T op2, T addend)
 587 {
 588     T result;
 589
 590     if (sizeof(T) == sizeof(float))
 591         result = fmaf(op1, op2, addend);
 592     else
 593         result = fma(op1, op2, addend);
 594
 595     // ARM doesn't generate signed nan's from this opperation, so fix up the result
 596     if (std::isnan(result) && !std::isnan(op1) &&
 597         !std::isnan(op2) && !std::isnan(addend))
 598     {
 599         uint64_t bitMask = ULL(0x1) << ((sizeof(T) * 8) - 1);
 600         result = bitsToFp(fpToBits(result) & ~bitMask, op1);
 601     }
 602     return result;
 603 }
 604
 605 template <typename T>
 606 static inline T
 607 fpRIntX(T a, FPSCR &fpscr)
 608 {
 609     T rVal;
 610
 611     rVal = rint(a);
 612     if (rVal != a && !std::isnan(a))
 613         fpscr.ixc = 1;
 614     return (rVal);
 615 };
 616
 617 template <typename T>
 618 static inline T
 619 fpMaxNum(T a, T b)
 620 {
 621     const bool     single = (sizeof(T) == sizeof(float));
 622     const uint64_t qnan   = single ? 0x7fc00000 : ULL(0x7ff8000000000000);
 623
 624     if (std::isnan(a))
 625         return ((fpToBits(a) & qnan) == qnan) ? b : a;
 626     if (std::isnan(b))
 627         return ((fpToBits(b) & qnan) == qnan) ? a : b;
 628     // Handle comparisons of +0 and -0.
 629     if (!std::signbit(a) && std::signbit(b))
 630         return a;
 631     return fmax(a, b);
 632 };
 633
 634 template <typename T>
 635 static inline T
 636 fpMax(T a, T b)
 637 {
 638     if (std::isnan(a))
 639         return a;
 640     if (std::isnan(b))
 641         return b;
 642     return fpMaxNum<T>(a, b);
 643 };
 644
 645 template <typename T>
 646 static inline T
 647 fpMinNum(T a, T b)
 648 {
 649     const bool     single = (sizeof(T) == sizeof(float));
 650     const uint64_t qnan   = single ? 0x7fc00000 : ULL(0x7ff8000000000000);
 651
 652     if (std::isnan(a))
 653         return ((fpToBits(a) & qnan) == qnan) ? b : a;
 654     if (std::isnan(b))
 655         return ((fpToBits(b) & qnan) == qnan) ? a : b;
 656     // Handle comparisons of +0 and -0.
 657     if (std::signbit(a) && !std::signbit(b))
 658         return a;
 659     return fmin(a, b);
 660 };
 661
 662 template <typename T>
 663 static inline T
 664 fpMin(T a, T b)
 665 {
 666     if (std::isnan(a))
 667         return a;
 668     if (std::isnan(b))
 669         return b;
 670     return fpMinNum<T>(a, b);
 671 };
 672
 673 template <typename T>
 674 static inline T
 675 fpRSqrts(T a, T b)
 676 {
 677     int fpClassA = std::fpclassify(a);
 678     int fpClassB = std::fpclassify(b);
 679     T aXb;
 680     int fpClassAxB;
 681
 682     if ((fpClassA == FP_ZERO && fpClassB == FP_INFINITE) ||
 683         (fpClassA == FP_INFINITE && fpClassB == FP_ZERO)) {
 684         return 1.5;
 685     }
 686     aXb = a*b;
 687     fpClassAxB = std::fpclassify(aXb);
 688     if (fpClassAxB == FP_SUBNORMAL) {
 689        feraiseexcept(FeUnderflow);
 690        return 1.5;
 691     }
 692     return (3.0 - (a * b)) / 2.0;
 693 };
 694
 695 template <typename T>
 696 static inline T
 697 fpRecps(T a, T b)
 698 {
 699     int fpClassA = std::fpclassify(a);
 700     int fpClassB = std::fpclassify(b);
 701     T aXb;
 702     int fpClassAxB;
 703
 704     if ((fpClassA == FP_ZERO && fpClassB == FP_INFINITE) ||
 705         (fpClassA == FP_INFINITE && fpClassB == FP_ZERO)) {
 706         return 2.0;
 707     }
 708     aXb = a*b;
 709     fpClassAxB = std::fpclassify(aXb);
 710     if (fpClassAxB == FP_SUBNORMAL) {
 711        feraiseexcept(FeUnderflow);
 712        return 2.0;
 713     }
 714     return 2.0 - (a * b);
 715 };
 716
 717
 718 static inline float
 719 fpRSqrtsS(float a, float b)
 720 {
 721     int fpClassA = std::fpclassify(a);
 722     int fpClassB = std::fpclassify(b);
 723     float aXb;
 724     int fpClassAxB;
 725
 726     if ((fpClassA == FP_ZERO && fpClassB == FP_INFINITE) ||
 727         (fpClassA == FP_INFINITE && fpClassB == FP_ZERO)) {
 728         return 1.5;
 729     }
 730     aXb = a*b;
 731     fpClassAxB = std::fpclassify(aXb);
 732     if (fpClassAxB == FP_SUBNORMAL) {
 733        feraiseexcept(FeUnderflow);
 734        return 1.5;
 735     }
 736     return (3.0 - (a * b)) / 2.0;
 737 }
 738
 739 static inline float
 740 fpRecpsS(float a, float b)
 741 {
 742     int fpClassA = std::fpclassify(a);
 743     int fpClassB = std::fpclassify(b);
 744     float aXb;
 745     int fpClassAxB;
 746
 747     if ((fpClassA == FP_ZERO && fpClassB == FP_INFINITE) ||
 748         (fpClassA == FP_INFINITE && fpClassB == FP_ZERO)) {
 749         return 2.0;
 750     }
 751     aXb = a*b;
 752     fpClassAxB = std::fpclassify(aXb);
 753     if (fpClassAxB == FP_SUBNORMAL) {
 754        feraiseexcept(FeUnderflow);
 755        return 2.0;
 756     }
 757     return 2.0 - (a * b);
 758 }
 759
 760 template <typename T>
 761 static inline T
 762 roundNEven(T a) {
 763     T val;
 764
 765     val = round(a);
 766     if (a - val == 0.5) {
 767         if ( (((int) a) & 1) == 0 ) val += 1.0;
 768     }
 769     else if (a - val == -0.5) {
 770         if ( (((int) a) & 1) == 0 ) val -= 1.0;
 771     }
 772     return val;
 773 }
 774
 775
 776
 777 class FpOp : public PredOp
 778 {
 779   protected:
 780     FpOp(const char *mnem, ExtMachInst _machInst, OpClass __opClass) :
 781         PredOp(mnem, _machInst, __opClass)
 782     {}
 783
 784     virtual float
 785     doOp(float op1, float op2) const
 786     {
 787         panic("Unimplemented version of doOp called.\n");
 788     }
 789
 790     virtual float
 791     doOp(float op1) const
 792     {
 793         panic("Unimplemented version of doOp called.\n");
 794     }
 795
 796     virtual double
 797     doOp(double op1, double op2) const
 798     {
 799         panic("Unimplemented version of doOp called.\n");
 800     }
 801
 802     virtual double
 803     doOp(double op1) const
 804     {
 805         panic("Unimplemented version of doOp called.\n");
 806     }
 807
 808     double
 809     dbl(uint32_t low, uint32_t high) const
 810     {
 811         double junk = 0.0;
 812         return bitsToFp((uint64_t)low | ((uint64_t)high << 32), junk);
 813     }
 814
 815     uint32_t
 816     dblLow(double val) const
 817     {
 818         return fpToBits(val);
 819     }
 820
 821     uint32_t
 822     dblHi(double val) const
 823     {
 824         return fpToBits(val) >> 32;
 825     }
 826
 827     template <class fpType>
 828     fpType
 829     processNans(FPSCR &fpscr, bool &done, bool defaultNan,
 830                 fpType op1, fpType op2) const;
 831
 832     template <class fpType>
 833     fpType
 834     ternaryOp(FPSCR &fpscr, fpType op1, fpType op2, fpType op3,
 835               fpType (*func)(fpType, fpType, fpType),
 836               bool flush, bool defaultNan, uint32_t rMode) const;
 837
 838     template <class fpType>
 839     fpType
 840     binaryOp(FPSCR &fpscr, fpType op1, fpType op2,
 841             fpType (*func)(fpType, fpType),
 842             bool flush, bool defaultNan, uint32_t rMode) const;
 843
 844     template <class fpType>
 845     fpType
 846     unaryOp(FPSCR &fpscr, fpType op1,
 847             fpType (*func)(fpType),
 848             bool flush, uint32_t rMode) const;
 849
 850     void
 851     advancePC(PCState &pcState) const
 852     {
 853         if (flags[IsLastMicroop]) {
 854             pcState.uEnd();
 855         } else if (flags[IsMicroop]) {
 856             pcState.uAdvance();
 857         } else {
 858             pcState.advance();
 859         }
 860     }
 861
 862     float
 863     fpSqrt (FPSCR fpscr,float x) const
 864     {
 865
 866         return unaryOp(fpscr,x,sqrtf,fpscr.fz,fpscr.rMode);
 867
 868     }
 869
 870     double
 871     fpSqrt (FPSCR fpscr,double x) const
 872     {
 873
 874         return unaryOp(fpscr,x,sqrt,fpscr.fz,fpscr.rMode);
 875
 876     }
 877 };
 878
 879 class FpCondCompRegOp : public FpOp
 880 {
 881   protected:
 882     IntRegIndex op1, op2;
 883     ConditionCode condCode;
 884     uint8_t defCc;
 885
 886     FpCondCompRegOp(const char *mnem, ExtMachInst _machInst,
 887                        OpClass __opClass, IntRegIndex _op1, IntRegIndex _op2,
 888                        ConditionCode _condCode, uint8_t _defCc) :
 889         FpOp(mnem, _machInst, __opClass),
 890         op1(_op1), op2(_op2), condCode(_condCode), defCc(_defCc)
 891     {}
 892
 893     std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const;
 894 };
 895
 896 class FpCondSelOp : public FpOp
 897 {
 898   protected:
 899     IntRegIndex dest, op1, op2;
 900     ConditionCode condCode;
 901
 902     FpCondSelOp(const char *mnem, ExtMachInst _machInst, OpClass __opClass,
 903                 IntRegIndex _dest, IntRegIndex _op1, IntRegIndex _op2,
 904                 ConditionCode _condCode) :
 905         FpOp(mnem, _machInst, __opClass),
 906         dest(_dest), op1(_op1), op2(_op2), condCode(_condCode)
 907     {}
 908
 909     std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const;
 910 };
 911
 912 class FpRegRegOp : public FpOp
 913 {
 914   protected:
 915     IntRegIndex dest;
 916     IntRegIndex op1;
 917
 918     FpRegRegOp(const char *mnem, ExtMachInst _machInst, OpClass __opClass,
 919                IntRegIndex _dest, IntRegIndex _op1,
 920                VfpMicroMode mode = VfpNotAMicroop) :
 921         FpOp(mnem, _machInst, __opClass), dest(_dest), op1(_op1)
 922     {
 923         setVfpMicroFlags(mode, flags);
 924     }
 925
 926     std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const;
 927 };
 928
 929 class FpRegImmOp : public FpOp
 930 {
 931   protected:
 932     IntRegIndex dest;
 933     uint64_t imm;
 934
 935     FpRegImmOp(const char *mnem, ExtMachInst _machInst, OpClass __opClass,
 936                IntRegIndex _dest, uint64_t _imm,
 937                VfpMicroMode mode = VfpNotAMicroop) :
 938         FpOp(mnem, _machInst, __opClass), dest(_dest), imm(_imm)
 939     {
 940         setVfpMicroFlags(mode, flags);
 941     }
 942
 943     std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const;
 944 };
 945
 946 class FpRegRegImmOp : public FpOp
 947 {
 948   protected:
 949     IntRegIndex dest;
 950     IntRegIndex op1;
 951     uint64_t imm;
 952
 953     FpRegRegImmOp(const char *mnem, ExtMachInst _machInst, OpClass __opClass,
 954                   IntRegIndex _dest, IntRegIndex _op1,
 955                   uint64_t _imm, VfpMicroMode mode = VfpNotAMicroop) :
 956         FpOp(mnem, _machInst, __opClass), dest(_dest), op1(_op1), imm(_imm)
 957     {
 958         setVfpMicroFlags(mode, flags);
 959     }
 960
 961     std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const;
 962 };
 963
 964 class FpRegRegRegOp : public FpOp
 965 {
 966   protected:
 967     IntRegIndex dest;
 968     IntRegIndex op1;
 969     IntRegIndex op2;
 970
 971     FpRegRegRegOp(const char *mnem, ExtMachInst _machInst, OpClass __opClass,
 972                   IntRegIndex _dest, IntRegIndex _op1, IntRegIndex _op2,
 973                   VfpMicroMode mode = VfpNotAMicroop) :
 974         FpOp(mnem, _machInst, __opClass), dest(_dest), op1(_op1), op2(_op2)
 975     {
 976         setVfpMicroFlags(mode, flags);
 977     }
 978
 979     std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const;
 980 };
 981
 982 class FpRegRegRegRegOp : public FpOp
 983 {
 984   protected:
 985     IntRegIndex dest;
 986     IntRegIndex op1;
 987     IntRegIndex op2;
 988     IntRegIndex op3;
 989
 990     FpRegRegRegRegOp(const char *mnem, ExtMachInst _machInst, OpClass __opClass,
 991                      IntRegIndex _dest, IntRegIndex _op1, IntRegIndex _op2,
 992                      IntRegIndex _op3, VfpMicroMode mode = VfpNotAMicroop) :
 993         FpOp(mnem, _machInst, __opClass), dest(_dest), op1(_op1), op2(_op2),
 994         op3(_op3)
 995     {
 996         setVfpMicroFlags(mode, flags);
 997     }
 998
 999     std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const;
1000 };
1001
1002 class FpRegRegRegImmOp : public FpOp
1003 {
1004   protected:
1005     IntRegIndex dest;
1006     IntRegIndex op1;
1007     IntRegIndex op2;
1008     uint64_t imm;
1009
1010     FpRegRegRegImmOp(const char *mnem, ExtMachInst _machInst,
1011                      OpClass __opClass, IntRegIndex _dest,
1012                      IntRegIndex _op1, IntRegIndex _op2,
1013                      uint64_t _imm, VfpMicroMode mode = VfpNotAMicroop) :
1014         FpOp(mnem, _machInst, __opClass),
1015         dest(_dest), op1(_op1), op2(_op2), imm(_imm)
1016     {
1017         setVfpMicroFlags(mode, flags);
1018     }
1019
1020     std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const;
1021 };
1022
1023 }
1024
1025 #endif //__ARCH_ARM_INSTS_VFP_HH__