2 * Copyright (c) 2010-2013 ARM Limited
5 * The license below extends only to copyright in the software and shall
6 * not be construed as granting a license to any other intellectual
7 * property including but not limited to intellectual property relating
8 * to a hardware implementation of the functionality of the software
9 * licensed hereunder. You may use the software subject to the license
10 * terms below provided that you ensure that this notice is replicated
11 * unmodified and in its entirety in all distributions of the software,
12 * modified or unmodified, in source code or in binary form.
14 * Redistribution and use in source and binary forms, with or without
15 * modification, are permitted provided that the following conditions are
16 * met: redistributions of source code must retain the above copyright
17 * notice, this list of conditions and the following disclaimer;
18 * redistributions in binary form must reproduce the above copyright
19 * notice, this list of conditions and the following disclaimer in the
20 * documentation and/or other materials provided with the distribution;
21 * neither the name of the copyright holders nor the names of its
22 * contributors may be used to endorse or promote products derived from
23 * this software without specific prior written permission.
25 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
26 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
27 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
28 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
29 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
30 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
31 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
32 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
33 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
34 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
35 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
40 #ifndef __ARCH_ARM_INSTS_VFP_HH__
41 #define __ARCH_ARM_INSTS_VFP_HH__
47 #include "arch/arm/insts/misc.hh"
48 #include "arch/arm/miscregs.hh"
62 setVfpMicroFlags(VfpMicroMode mode, T &flags)
66 flags[StaticInst::IsMicroop] = true;
69 flags[StaticInst::IsMicroop] =
70 flags[StaticInst::IsFirstMicroop] = true;
73 flags[StaticInst::IsMicroop] =
74 flags[StaticInst::IsLastMicroop] = true;
79 if (mode == VfpMicroop || mode == VfpFirstMicroop) {
80 flags[StaticInst::IsDelayedCommit] = true;
86 FeDivByZero = FE_DIVBYZERO,
87 FeInexact = FE_INEXACT,
88 FeInvalid = FE_INVALID,
89 FeOverflow = FE_OVERFLOW,
90 FeUnderflow = FE_UNDERFLOW,
91 FeAllExceptions = FE_ALL_EXCEPT
96 FeRoundDown = FE_DOWNWARD,
97 FeRoundNearest = FE_TONEAREST,
98 FeRoundZero = FE_TOWARDZERO,
99 FeRoundUpward = FE_UPWARD
111 static inline float bitsToFp(uint64_t, float);
112 static inline double bitsToFp(uint64_t, double);
113 static inline uint32_t fpToBits(float);
114 static inline uint64_t fpToBits(double);
116 template <class fpType>
118 flushToZero(fpType &op)
121 if (std::fpclassify(op) == FP_SUBNORMAL) {
122 uint64_t bitMask = ULL(0x1) << (sizeof(fpType) * 8 - 1);
123 op = bitsToFp(fpToBits(op) & bitMask, junk);
129 template <class fpType>
131 flushToZero(fpType &op1, fpType &op2)
133 bool flush1 = flushToZero(op1);
134 bool flush2 = flushToZero(op2);
135 return flush1 || flush2;
138 template <class fpType>
140 vfpFlushToZero(FPSCR &fpscr, fpType &op)
142 if (fpscr.fz == 1 && flushToZero(op)) {
147 template <class fpType>
149 vfpFlushToZero(FPSCR &fpscr, fpType &op1, fpType &op2)
151 vfpFlushToZero(fpscr, op1);
152 vfpFlushToZero(fpscr, op2);
155 static inline uint32_t
167 static inline uint64_t
180 bitsToFp(uint64_t bits, float junk)
192 bitsToFp(uint64_t bits, double junk)
203 template <class fpType>
207 const bool single = (sizeof(fpType) == sizeof(float));
208 const uint64_t qnan =
209 single ? 0x7fc00000 : ULL(0x7ff8000000000000);
210 return std::isnan(val) && ((fpToBits(val) & qnan) != qnan);
213 typedef int VfpSavedState;
215 VfpSavedState prepFpState(uint32_t rMode);
216 void finishVfp(FPSCR &fpscr, VfpSavedState state, bool flush, FPSCR mask = FpscrExcMask);
218 template <class fpType>
219 fpType fixDest(FPSCR fpscr, fpType val, fpType op1);
221 template <class fpType>
222 fpType fixDest(FPSCR fpscr, fpType val, fpType op1, fpType op2);
224 template <class fpType>
225 fpType fixDivDest(FPSCR fpscr, fpType val, fpType op1, fpType op2);
227 float fixFpDFpSDest(FPSCR fpscr, double val);
228 double fixFpSFpDDest(FPSCR fpscr, float val);
230 uint16_t vcvtFpSFpH(FPSCR &fpscr, bool flush, bool defaultNan,
231 uint32_t rMode, bool ahp, float op);
232 uint16_t vcvtFpDFpH(FPSCR &fpscr, bool flush, bool defaultNan,
233 uint32_t rMode, bool ahp, double op);
235 float vcvtFpHFpS(FPSCR &fpscr, bool defaultNan, bool ahp, uint16_t op);
236 double vcvtFpHFpD(FPSCR &fpscr, bool defaultNan, bool ahp, uint16_t op);
239 makeDouble(uint32_t low, uint32_t high)
242 return bitsToFp((uint64_t)low | ((uint64_t)high << 32), junk);
245 static inline uint32_t
246 lowFromDouble(double val)
248 return fpToBits(val);
251 static inline uint32_t
252 highFromDouble(double val)
254 return fpToBits(val) >> 32;
258 setFPExceptions(int exceptions) {
259 feclearexcept(FeAllExceptions);
260 feraiseexcept(exceptions);
263 template <typename T>
265 vfpFpToFixed(T val, bool isSigned, uint8_t width, uint8_t imm, bool
266 useRmode = true, VfpRoundingMode roundMode = VfpRoundZero,
267 bool aarch64 = false)
270 bool roundAwayFix = false;
273 rmode = fegetround();
277 case VfpRoundNearest:
278 rmode = FeRoundNearest;
281 rmode = FeRoundUpward;
290 // There is no equivalent rounding mode, use round down and we'll
296 panic("Unsupported roundMode %d\n", roundMode);
299 __asm__ __volatile__("" : "=m" (rmode) : "m" (rmode));
300 fesetround(FeRoundNearest);
301 val = val * pow(2.0, imm);
302 __asm__ __volatile__("" : "=m" (val) : "m" (val));
304 feclearexcept(FeAllExceptions);
305 __asm__ __volatile__("" : "=m" (val) : "m" (val));
308 __asm__ __volatile__("" : "=m" (val) : "m" (val));
310 int exceptions = fetestexcept(FeAllExceptions);
312 int fpType = std::fpclassify(val);
313 if (fpType == FP_SUBNORMAL || fpType == FP_NAN) {
314 if (fpType == FP_NAN) {
315 exceptions |= FeInvalid;
318 } else if (origVal != val) {
321 if (origVal - val > 0.5)
323 else if (val - origVal > 0.5)
328 // The ordering on the subtraction looks a bit odd in that we
329 // don't do the obvious origVal - val, instead we do
330 // -(val - origVal). This is required to get the corruct bit
331 // exact behaviour when very close to the 0.5 threshold.
332 volatile T error = val;
335 if ( (error > 0.5) ||
336 ((error == 0.5) && (val >= 0)) )
348 exceptions |= FeInexact;
351 __asm__ __volatile__("" : "=m" (val) : "m" (val));
354 bool outOfRange = false;
355 int64_t result = (int64_t) val;
360 finalVal = (int16_t)val;
361 } else if (width == 32) {
362 finalVal =(int32_t)val;
363 } else if (width == 64) {
366 panic("Unsupported width %d\n", width);
369 // check if value is in range
370 int64_t minVal = ~mask(width-1);
371 if ((double)val < minVal) {
375 int64_t maxVal = mask(width-1);
376 if ((double)val > maxVal) {
381 bool isNeg = val < 0;
382 finalVal = result & mask(width);
383 // If the result is supposed to be less than 64 bits check that the
384 // upper bits that got thrown away are just sign extension bits
386 outOfRange = ((uint64_t) result >> (width - 1)) !=
387 (isNeg ? mask(64-width+1) : 0);
389 // Check if the original floating point value doesn't matches the
390 // integer version we are also out of range. So create a saturated
393 outOfRange |= val < result;
395 finalVal = 1LL << (width-1);
398 outOfRange |= val > result;
400 finalVal = mask(width-1);
405 // Raise an exception if the value was out of range
407 exceptions |= FeInvalid;
408 exceptions &= ~FeInexact;
410 setFPExceptions(exceptions);
413 if ((double)val < 0) {
414 exceptions |= FeInvalid;
415 exceptions &= ~FeInexact;
416 setFPExceptions(exceptions);
420 uint64_t result = ((uint64_t) val) & mask(width);
422 exceptions |= FeInvalid;
423 exceptions &= ~FeInexact;
424 setFPExceptions(exceptions);
428 setFPExceptions(exceptions);
434 float vfpUFixedToFpS(bool flush, bool defaultNan,
435 uint64_t val, uint8_t width, uint8_t imm);
436 float vfpSFixedToFpS(bool flush, bool defaultNan,
437 int64_t val, uint8_t width, uint8_t imm);
439 double vfpUFixedToFpD(bool flush, bool defaultNan,
440 uint64_t val, uint8_t width, uint8_t imm);
441 double vfpSFixedToFpD(bool flush, bool defaultNan,
442 int64_t val, uint8_t width, uint8_t imm);
444 float fprSqrtEstimate(FPSCR &fpscr, float op);
445 uint32_t unsignedRSqrtEstimate(uint32_t op);
447 float fpRecipEstimate(FPSCR &fpscr, float op);
448 uint32_t unsignedRecipEstimate(uint32_t op);
450 class VfpMacroOp : public PredMacroOp
454 inScalarBank(IntRegIndex idx)
456 return (idx % 32) < 8;
462 VfpMacroOp(const char *mnem, ExtMachInst _machInst,
463 OpClass __opClass, bool _wide) :
464 PredMacroOp(mnem, _machInst, __opClass), wide(_wide)
467 IntRegIndex addStride(IntRegIndex idx, unsigned stride);
468 void nextIdxs(IntRegIndex &dest, IntRegIndex &op1, IntRegIndex &op2);
469 void nextIdxs(IntRegIndex &dest, IntRegIndex &op1);
470 void nextIdxs(IntRegIndex &dest);
473 template <typename T>
480 template <typename T>
488 fpAddS(float a, float b)
494 fpAddD(double a, double b)
500 fpSubS(float a, float b)
506 fpSubD(double a, double b)
512 fpDivS(float a, float b)
518 fpDivD(double a, double b)
523 template <typename T>
530 template <typename T>
537 const bool single = (sizeof(T) == sizeof(float));
539 opData = (fpToBits(a));
541 opData = (fpToBits(b));
544 opData = (fpToBits(a));
546 opData = (fpToBits(b));
549 bool inf1 = (std::fpclassify(a) == FP_INFINITE);
550 bool inf2 = (std::fpclassify(b) == FP_INFINITE);
551 bool zero1 = (std::fpclassify(a) == FP_ZERO);
552 bool zero2 = (std::fpclassify(b) == FP_ZERO);
553 if ((inf1 && zero2) || (zero1 && inf2)) {
564 template <typename T>
572 fpMulS(float a, float b)
578 fpMulD(double a, double b)
583 template <typename T>
585 // @todo remove this when all calls to it have been replaced with the new fplib implementation
586 fpMulAdd(T op1, T op2, T addend)
590 if (sizeof(T) == sizeof(float))
591 result = fmaf(op1, op2, addend);
593 result = fma(op1, op2, addend);
595 // ARM doesn't generate signed nan's from this opperation, so fix up the result
596 if (std::isnan(result) && !std::isnan(op1) &&
597 !std::isnan(op2) && !std::isnan(addend))
599 uint64_t bitMask = ULL(0x1) << ((sizeof(T) * 8) - 1);
600 result = bitsToFp(fpToBits(result) & ~bitMask, op1);
605 template <typename T>
607 fpRIntX(T a, FPSCR &fpscr)
612 if (rVal != a && !std::isnan(a))
617 template <typename T>
621 const bool single = (sizeof(T) == sizeof(float));
622 const uint64_t qnan = single ? 0x7fc00000 : ULL(0x7ff8000000000000);
625 return ((fpToBits(a) & qnan) == qnan) ? b : a;
627 return ((fpToBits(b) & qnan) == qnan) ? a : b;
628 // Handle comparisons of +0 and -0.
629 if (!std::signbit(a) && std::signbit(b))
634 template <typename T>
642 return fpMaxNum<T>(a, b);
645 template <typename T>
649 const bool single = (sizeof(T) == sizeof(float));
650 const uint64_t qnan = single ? 0x7fc00000 : ULL(0x7ff8000000000000);
653 return ((fpToBits(a) & qnan) == qnan) ? b : a;
655 return ((fpToBits(b) & qnan) == qnan) ? a : b;
656 // Handle comparisons of +0 and -0.
657 if (std::signbit(a) && !std::signbit(b))
662 template <typename T>
670 return fpMinNum<T>(a, b);
673 template <typename T>
677 int fpClassA = std::fpclassify(a);
678 int fpClassB = std::fpclassify(b);
682 if ((fpClassA == FP_ZERO && fpClassB == FP_INFINITE) ||
683 (fpClassA == FP_INFINITE && fpClassB == FP_ZERO)) {
687 fpClassAxB = std::fpclassify(aXb);
688 if (fpClassAxB == FP_SUBNORMAL) {
689 feraiseexcept(FeUnderflow);
692 return (3.0 - (a * b)) / 2.0;
695 template <typename T>
699 int fpClassA = std::fpclassify(a);
700 int fpClassB = std::fpclassify(b);
704 if ((fpClassA == FP_ZERO && fpClassB == FP_INFINITE) ||
705 (fpClassA == FP_INFINITE && fpClassB == FP_ZERO)) {
709 fpClassAxB = std::fpclassify(aXb);
710 if (fpClassAxB == FP_SUBNORMAL) {
711 feraiseexcept(FeUnderflow);
714 return 2.0 - (a * b);
719 fpRSqrtsS(float a, float b)
721 int fpClassA = std::fpclassify(a);
722 int fpClassB = std::fpclassify(b);
726 if ((fpClassA == FP_ZERO && fpClassB == FP_INFINITE) ||
727 (fpClassA == FP_INFINITE && fpClassB == FP_ZERO)) {
731 fpClassAxB = std::fpclassify(aXb);
732 if (fpClassAxB == FP_SUBNORMAL) {
733 feraiseexcept(FeUnderflow);
736 return (3.0 - (a * b)) / 2.0;
740 fpRecpsS(float a, float b)
742 int fpClassA = std::fpclassify(a);
743 int fpClassB = std::fpclassify(b);
747 if ((fpClassA == FP_ZERO && fpClassB == FP_INFINITE) ||
748 (fpClassA == FP_INFINITE && fpClassB == FP_ZERO)) {
752 fpClassAxB = std::fpclassify(aXb);
753 if (fpClassAxB == FP_SUBNORMAL) {
754 feraiseexcept(FeUnderflow);
757 return 2.0 - (a * b);
760 template <typename T>
766 if (a - val == 0.5) {
767 if ( (((int) a) & 1) == 0 ) val += 1.0;
769 else if (a - val == -0.5) {
770 if ( (((int) a) & 1) == 0 ) val -= 1.0;
777 class FpOp : public PredOp
780 FpOp(const char *mnem, ExtMachInst _machInst, OpClass __opClass) :
781 PredOp(mnem, _machInst, __opClass)
785 doOp(float op1, float op2) const
787 panic("Unimplemented version of doOp called.\n");
791 doOp(float op1) const
793 panic("Unimplemented version of doOp called.\n");
797 doOp(double op1, double op2) const
799 panic("Unimplemented version of doOp called.\n");
803 doOp(double op1) const
805 panic("Unimplemented version of doOp called.\n");
809 dbl(uint32_t low, uint32_t high) const
812 return bitsToFp((uint64_t)low | ((uint64_t)high << 32), junk);
816 dblLow(double val) const
818 return fpToBits(val);
822 dblHi(double val) const
824 return fpToBits(val) >> 32;
827 template <class fpType>
829 processNans(FPSCR &fpscr, bool &done, bool defaultNan,
830 fpType op1, fpType op2) const;
832 template <class fpType>
834 ternaryOp(FPSCR &fpscr, fpType op1, fpType op2, fpType op3,
835 fpType (*func)(fpType, fpType, fpType),
836 bool flush, bool defaultNan, uint32_t rMode) const;
838 template <class fpType>
840 binaryOp(FPSCR &fpscr, fpType op1, fpType op2,
841 fpType (*func)(fpType, fpType),
842 bool flush, bool defaultNan, uint32_t rMode) const;
844 template <class fpType>
846 unaryOp(FPSCR &fpscr, fpType op1,
847 fpType (*func)(fpType),
848 bool flush, uint32_t rMode) const;
851 advancePC(PCState &pcState) const
853 if (flags[IsLastMicroop]) {
855 } else if (flags[IsMicroop]) {
863 fpSqrt (FPSCR fpscr,float x) const
866 return unaryOp(fpscr,x,sqrtf,fpscr.fz,fpscr.rMode);
871 fpSqrt (FPSCR fpscr,double x) const
874 return unaryOp(fpscr,x,sqrt,fpscr.fz,fpscr.rMode);
879 class FpCondCompRegOp : public FpOp
882 IntRegIndex op1, op2;
883 ConditionCode condCode;
886 FpCondCompRegOp(const char *mnem, ExtMachInst _machInst,
887 OpClass __opClass, IntRegIndex _op1, IntRegIndex _op2,
888 ConditionCode _condCode, uint8_t _defCc) :
889 FpOp(mnem, _machInst, __opClass),
890 op1(_op1), op2(_op2), condCode(_condCode), defCc(_defCc)
893 std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const;
896 class FpCondSelOp : public FpOp
899 IntRegIndex dest, op1, op2;
900 ConditionCode condCode;
902 FpCondSelOp(const char *mnem, ExtMachInst _machInst, OpClass __opClass,
903 IntRegIndex _dest, IntRegIndex _op1, IntRegIndex _op2,
904 ConditionCode _condCode) :
905 FpOp(mnem, _machInst, __opClass),
906 dest(_dest), op1(_op1), op2(_op2), condCode(_condCode)
909 std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const;
912 class FpRegRegOp : public FpOp
918 FpRegRegOp(const char *mnem, ExtMachInst _machInst, OpClass __opClass,
919 IntRegIndex _dest, IntRegIndex _op1,
920 VfpMicroMode mode = VfpNotAMicroop) :
921 FpOp(mnem, _machInst, __opClass), dest(_dest), op1(_op1)
923 setVfpMicroFlags(mode, flags);
926 std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const;
929 class FpRegImmOp : public FpOp
935 FpRegImmOp(const char *mnem, ExtMachInst _machInst, OpClass __opClass,
936 IntRegIndex _dest, uint64_t _imm,
937 VfpMicroMode mode = VfpNotAMicroop) :
938 FpOp(mnem, _machInst, __opClass), dest(_dest), imm(_imm)
940 setVfpMicroFlags(mode, flags);
943 std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const;
946 class FpRegRegImmOp : public FpOp
953 FpRegRegImmOp(const char *mnem, ExtMachInst _machInst, OpClass __opClass,
954 IntRegIndex _dest, IntRegIndex _op1,
955 uint64_t _imm, VfpMicroMode mode = VfpNotAMicroop) :
956 FpOp(mnem, _machInst, __opClass), dest(_dest), op1(_op1), imm(_imm)
958 setVfpMicroFlags(mode, flags);
961 std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const;
964 class FpRegRegRegOp : public FpOp
971 FpRegRegRegOp(const char *mnem, ExtMachInst _machInst, OpClass __opClass,
972 IntRegIndex _dest, IntRegIndex _op1, IntRegIndex _op2,
973 VfpMicroMode mode = VfpNotAMicroop) :
974 FpOp(mnem, _machInst, __opClass), dest(_dest), op1(_op1), op2(_op2)
976 setVfpMicroFlags(mode, flags);
979 std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const;
982 class FpRegRegRegRegOp : public FpOp
990 FpRegRegRegRegOp(const char *mnem, ExtMachInst _machInst, OpClass __opClass,
991 IntRegIndex _dest, IntRegIndex _op1, IntRegIndex _op2,
992 IntRegIndex _op3, VfpMicroMode mode = VfpNotAMicroop) :
993 FpOp(mnem, _machInst, __opClass), dest(_dest), op1(_op1), op2(_op2),
996 setVfpMicroFlags(mode, flags);
999 std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const;
1002 class FpRegRegRegImmOp : public FpOp
1010 FpRegRegRegImmOp(const char *mnem, ExtMachInst _machInst,
1011 OpClass __opClass, IntRegIndex _dest,
1012 IntRegIndex _op1, IntRegIndex _op2,
1013 uint64_t _imm, VfpMicroMode mode = VfpNotAMicroop) :
1014 FpOp(mnem, _machInst, __opClass),
1015 dest(_dest), op1(_op1), op2(_op2), imm(_imm)
1017 setVfpMicroFlags(mode, flags);
1020 std::string generateDisassembly(Addr pc, const SymbolTable *symtab) const;
1025 #endif //__ARCH_ARM_INSTS_VFP_HH__