ARM: Implement flush to zero mode for VFP, and clean up some corner cases.

author Gabe Black <gblack@eecs.umich.edu>

Wed, 2 Jun 2010 17:58:15 +0000 (12:58 -0500)

committer Gabe Black <gblack@eecs.umich.edu>

Wed, 2 Jun 2010 17:58:15 +0000 (12:58 -0500)
author Gabe Black <gblack@eecs.umich.edu>
Wed, 2 Jun 2010 17:58:15 +0000 (12:58 -0500)
committer Gabe Black <gblack@eecs.umich.edu>
Wed, 2 Jun 2010 17:58:15 +0000 (12:58 -0500)
diff --git a/src/arch/arm/insts/vfp.hh b/src/arch/arm/insts/vfp.hh

index 5a0ecf828d72547c2bcb5daf7c36789c0cf67656..b0fc8b6dc030af005047d8f06bef5e725cfe2dbf 100644 (file)
--- a/src/arch/arm/insts/vfp.hh
+++ b/src/arch/arm/insts/vfp.hh
@@ -43,6 +43,7 @@
  #include "arch/arm/insts/misc.hh"
  #include "arch/arm/miscregs.hh"
  #include <fenv.h>
+#include <cmath>
  
  enum VfpMicroMode {
      VfpNotAMicroop,
@@ -101,6 +102,26 @@ enum VfpRoundingMode
      VfpRoundZero = 3
  };
  
+template <class fpType>
+static inline void
+vfpFlushToZero(uint32_t &_fpscr, fpType &op)
+{
+    FPSCR fpscr = _fpscr;
+    if (fpscr.fz == 1 && (std::fpclassify(op) == FP_SUBNORMAL)) {
+        fpscr.idc = 1;
+        op = 0;
+    }
+    _fpscr = fpscr;
+}
+
+template <class fpType>
+static inline void
+vfpFlushToZero(uint32_t &fpscr, fpType &op1, fpType &op2)
+{
+    vfpFlushToZero(fpscr, op1);
+    vfpFlushToZero(fpscr, op2);
+}
+
  static inline uint64_t
  vfpFpSToFixed(float val, bool isSigned, bool half, uint8_t imm)
  {
@@ -108,24 +129,41 @@ vfpFpSToFixed(float val, bool isSigned, bool half, uint8_t imm)
      val = val * powf(2.0, imm);
      __asm__ __volatile__("" : "=m" (val) : "m" (val));
      feclearexcept(FeAllExceptions);
+    __asm__ __volatile__("" : "=m" (val) : "m" (val));
+    float origVal = val;
+    val = rintf(val);
+    int fpType = std::fpclassify(val);
+    if (fpType == FP_SUBNORMAL || fpType == FP_NAN) {
+        if (fpType == FP_NAN) {
+            feraiseexcept(FeInvalid);
+        }
+        val = 0.0;
+    } else if (origVal != val) {
+        feraiseexcept(FeInexact);
+    }
+
      if (isSigned) {
          if (half) {
              if ((double)val < (int16_t)(1 << 15)) {
                  feraiseexcept(FeInvalid);
+                feclearexcept(FeInexact);
                  return (int16_t)(1 << 15);
              }
              if ((double)val > (int16_t)mask(15)) {
                  feraiseexcept(FeInvalid);
+                feclearexcept(FeInexact);
                  return (int16_t)mask(15);
              }
              return (int16_t)val;
          } else {
              if ((double)val < (int32_t)(1 << 31)) {
                  feraiseexcept(FeInvalid);
+                feclearexcept(FeInexact);
                  return (int32_t)(1 << 31);
              }
              if ((double)val > (int32_t)mask(31)) {
                  feraiseexcept(FeInvalid);
+                feclearexcept(FeInexact);
                  return (int32_t)mask(31);
              }
              return (int32_t)val;
@@ -134,20 +172,24 @@ vfpFpSToFixed(float val, bool isSigned, bool half, uint8_t imm)
          if (half) {
              if ((double)val < 0) {
                  feraiseexcept(FeInvalid);
+                feclearexcept(FeInexact);
                  return 0;
              }
              if ((double)val > (mask(16))) {
                  feraiseexcept(FeInvalid);
+                feclearexcept(FeInexact);
                  return mask(16);
              }
              return (uint16_t)val;
          } else {
              if ((double)val < 0) {
                  feraiseexcept(FeInvalid);
+                feclearexcept(FeInexact);
                  return 0;
              }
              if ((double)val > (mask(32))) {
                  feraiseexcept(FeInvalid);
+                feclearexcept(FeInexact);
                  return mask(32);
              }
              return (uint32_t)val;
@@ -161,7 +203,11 @@ vfpUFixedToFpS(uint32_t val, bool half, uint8_t imm)
      fesetround(FeRoundNearest);
      if (half)
          val = (uint16_t)val;
-    return val / powf(2.0, imm);
+    float scale = powf(2.0, imm);
+    __asm__ __volatile__("" : "=m" (scale) : "m" (scale));
+    feclearexcept(FeAllExceptions);
+    __asm__ __volatile__("" : "=m" (scale) : "m" (scale));
+    return val / scale;
  }
  
  static inline float
@@ -170,34 +216,55 @@ vfpSFixedToFpS(int32_t val, bool half, uint8_t imm)
      fesetround(FeRoundNearest);
      if (half)
          val = sext<16>(val & mask(16));
-    return val / powf(2.0, imm);
+    float scale = powf(2.0, imm);
+    __asm__ __volatile__("" : "=m" (scale) : "m" (scale));
+    feclearexcept(FeAllExceptions);
+    __asm__ __volatile__("" : "=m" (scale) : "m" (scale));
+    return val / scale;
  }
  
  static inline uint64_t
  vfpFpDToFixed(double val, bool isSigned, bool half, uint8_t imm)
  {
-    fesetround(FeRoundZero);
+    fesetround(FeRoundNearest);
      val = val * pow(2.0, imm);
      __asm__ __volatile__("" : "=m" (val) : "m" (val));
+    fesetround(FeRoundZero);
      feclearexcept(FeAllExceptions);
+    __asm__ __volatile__("" : "=m" (val) : "m" (val));
+    double origVal = val;
+    val = rint(val);
+    int fpType = std::fpclassify(val);
+    if (fpType == FP_SUBNORMAL || fpType == FP_NAN) {
+        if (fpType == FP_NAN) {
+            feraiseexcept(FeInvalid);
+        }
+        val = 0.0;
+    } else if (origVal != val) {
+        feraiseexcept(FeInexact);
+    }
      if (isSigned) {
          if (half) {
              if (val < (int16_t)(1 << 15)) {
                  feraiseexcept(FeInvalid);
+                feclearexcept(FeInexact);
                  return (int16_t)(1 << 15);
              }
              if (val > (int16_t)mask(15)) {
                  feraiseexcept(FeInvalid);
+                feclearexcept(FeInexact);
                  return (int16_t)mask(15);
              }
              return (int16_t)val;
          } else {
              if (val < (int32_t)(1 << 31)) {
                  feraiseexcept(FeInvalid);
+                feclearexcept(FeInexact);
                  return (int32_t)(1 << 31);
              }
              if (val > (int32_t)mask(31)) {
                  feraiseexcept(FeInvalid);
+                feclearexcept(FeInexact);
                  return (int32_t)mask(31);
              }
              return (int32_t)val;
@@ -206,20 +273,24 @@ vfpFpDToFixed(double val, bool isSigned, bool half, uint8_t imm)
          if (half) {
              if (val < 0) {
                  feraiseexcept(FeInvalid);
+                feclearexcept(FeInexact);
                  return 0;
              }
              if (val > mask(16)) {
                  feraiseexcept(FeInvalid);
+                feclearexcept(FeInexact);
                  return mask(16);
              }
              return (uint16_t)val;
          } else {
              if (val < 0) {
                  feraiseexcept(FeInvalid);
+                feclearexcept(FeInexact);
                  return 0;
              }
              if (val > mask(32)) {
                  feraiseexcept(FeInvalid);
+                feclearexcept(FeInexact);
                  return mask(32);
              }
              return (uint32_t)val;
@@ -233,7 +304,11 @@ vfpUFixedToFpD(uint32_t val, bool half, uint8_t imm)
      fesetround(FeRoundNearest);
      if (half)
          val = (uint16_t)val;
-    return val / pow(2.0, imm);
+    double scale = pow(2.0, imm);
+    __asm__ __volatile__("" : "=m" (scale) : "m" (scale));
+    feclearexcept(FeAllExceptions);
+    __asm__ __volatile__("" : "=m" (scale) : "m" (scale));
+    return val / scale;
  }
  
  static inline double
@@ -242,7 +317,11 @@ vfpSFixedToFpD(int32_t val, bool half, uint8_t imm)
      fesetround(FeRoundNearest);
      if (half)
          val = sext<16>(val & mask(16));
-    return val / pow(2.0, imm);
+    double scale = pow(2.0, imm);
+    __asm__ __volatile__("" : "=m" (scale) : "m" (scale));
+    feclearexcept(FeAllExceptions);
+    __asm__ __volatile__("" : "=m" (scale) : "m" (scale));
+    return val / scale;
  }
  
  typedef int VfpSavedState;
diff --git a/src/arch/arm/isa/insts/fp.isa b/src/arch/arm/isa/insts/fp.isa

index 6503e05f1652862d1b0b28705d03584b34ecde62..cd1ddc498f60eea072cd63b65d8fa2bf744d7aa6 100644 (file)
--- a/src/arch/arm/isa/insts/fp.isa
+++ b/src/arch/arm/isa/insts/fp.isa
@@ -383,6 +383,7 @@ let {{
      exec_output = ""
  
      vmulSCode = '''
+        vfpFlushToZero(Fpscr, FpOp1, FpOp2);
          VfpSavedState state = prepVfpFpscr(Fpscr);
          __asm__ __volatile__("" : "=m" (FpOp1) : "m" (FpOp1));
          FpDest = FpOp1 * FpOp2;
@@ -403,6 +404,7 @@ let {{
          IntDoubleUnion cOp1, cOp2, cDest;
          cOp1.bits = ((uint64_t)FpOp1P0.uw | ((uint64_t)FpOp1P1.uw << 32));
          cOp2.bits = ((uint64_t)FpOp2P0.uw | ((uint64_t)FpOp2P1.uw << 32));
+        vfpFlushToZero(Fpscr, cOp1.fp, cOp2.fp);
          VfpSavedState state = prepVfpFpscr(Fpscr);
          __asm__ __volatile__("" : "=m" (cOp1.fp) : "m" (cOp1.fp));
          cDest.fp = cOp1.fp * cOp2.fp;
@@ -471,6 +473,7 @@ let {{
      exec_output += PredOpExecute.subst(vabsDIop);
  
      vaddSCode = '''
+        vfpFlushToZero(Fpscr, FpOp1, FpOp2);
          VfpSavedState state = prepVfpFpscr(Fpscr);
          __asm__ __volatile__("" : "=m" (FpOp1) : "m" (FpOp1));
          FpDest = FpOp1 + FpOp2;
@@ -488,6 +491,7 @@ let {{
          IntDoubleUnion cOp1, cOp2, cDest;
          cOp1.bits = ((uint64_t)FpOp1P0.uw | ((uint64_t)FpOp1P1.uw << 32));
          cOp2.bits = ((uint64_t)FpOp2P0.uw | ((uint64_t)FpOp2P1.uw << 32));
+        vfpFlushToZero(Fpscr, cOp1.fp, cOp2.fp);
          VfpSavedState state = prepVfpFpscr(Fpscr);
          __asm__ __volatile__("" : "=m" (cOp1.fp) : "m" (cOp1.fp));
          cDest.fp = cOp1.fp + cOp2.fp;
@@ -504,6 +508,7 @@ let {{
      exec_output += PredOpExecute.subst(vaddDIop);
  
      vsubSCode = '''
+        vfpFlushToZero(Fpscr, FpOp1, FpOp2);
          VfpSavedState state = prepVfpFpscr(Fpscr);
          __asm__ __volatile__("" : "=m" (FpOp1) : "m" (FpOp1));
          FpDest = FpOp1 - FpOp2;
@@ -521,6 +526,7 @@ let {{
          IntDoubleUnion cOp1, cOp2, cDest;
          cOp1.bits = ((uint64_t)FpOp1P0.uw | ((uint64_t)FpOp1P1.uw << 32));
          cOp2.bits = ((uint64_t)FpOp2P0.uw | ((uint64_t)FpOp2P1.uw << 32));
+        vfpFlushToZero(Fpscr, cOp1.fp, cOp2.fp);
          VfpSavedState state = prepVfpFpscr(Fpscr);
          __asm__ __volatile__("" : "=m" (cOp1.fp) : "m" (cOp1.fp));
          cDest.fp = cOp1.fp - cOp2.fp;
@@ -537,6 +543,7 @@ let {{
      exec_output += PredOpExecute.subst(vsubDIop);
  
      vdivSCode = '''
+        vfpFlushToZero(Fpscr, FpOp1, FpOp2);
          VfpSavedState state = prepVfpFpscr(Fpscr);
          __asm__ __volatile__("" : "=m" (FpOp1) : "m" (FpOp1));
          FpDest = FpOp1 / FpOp2;
@@ -554,6 +561,7 @@ let {{
          IntDoubleUnion cOp1, cOp2, cDest;
          cOp1.bits = ((uint64_t)FpOp1P0.uw | ((uint64_t)FpOp1P1.uw << 32));
          cOp2.bits = ((uint64_t)FpOp2P0.uw | ((uint64_t)FpOp2P1.uw << 32));
+        vfpFlushToZero(Fpscr, cOp1.fp, cOp2.fp);
          VfpSavedState state = prepVfpFpscr(Fpscr);
          __asm__ __volatile__("" : "=m" (cOp1.fp) : "m" (cDest.fp));
          cDest.fp = cOp1.fp / cOp2.fp;
@@ -570,6 +578,7 @@ let {{
      exec_output += PredOpExecute.subst(vdivDIop);
  
      vsqrtSCode = '''
+        vfpFlushToZero(Fpscr, FpOp1);
          VfpSavedState state = prepVfpFpscr(Fpscr);
          __asm__ __volatile__("" : "=m" (FpOp1) : "m" (FpOp1));
          FpDest = sqrtf(FpOp1);
@@ -589,6 +598,7 @@ let {{
      vsqrtDCode = '''
          IntDoubleUnion cOp1, cDest;
          cOp1.bits = ((uint64_t)FpOp1P0.uw | ((uint64_t)FpOp1P1.uw << 32));
+        vfpFlushToZero(Fpscr, cOp1.fp);
          VfpSavedState state = prepVfpFpscr(Fpscr);
          __asm__ __volatile__("" : "=m" (cOp1.fp) : "m" (cDest.fp));
          cDest.fp = sqrt(cOp1.fp);
@@ -615,12 +625,14 @@ let {{
      exec_output = ""
  
      vmlaSCode = '''
+        vfpFlushToZero(Fpscr, FpOp1, FpOp2);
          VfpSavedState state = prepVfpFpscr(Fpscr);
          __asm__ __volatile__("" : "=m" (FpOp1) : "m" (FpOp1));
          float mid = FpOp1 * FpOp2;
          if ((isinf(FpOp1) && FpOp2 == 0) || (isinf(FpOp2) && FpOp1 == 0)) {
              mid = NAN;
          }
+        vfpFlushToZero(Fpscr, FpDest, mid);
          FpDest = FpDest + mid;
          __asm__ __volatile__("" :: "m" (FpDest));
          Fpscr = setVfpFpscr(Fpscr, state);
@@ -637,6 +649,7 @@ let {{
          cOp1.bits = ((uint64_t)FpOp1P0.uw | ((uint64_t)FpOp1P1.uw << 32));
          cOp2.bits = ((uint64_t)FpOp2P0.uw | ((uint64_t)FpOp2P1.uw << 32));
          cDest.bits = ((uint64_t)FpDestP0.uw | ((uint64_t)FpDestP1.uw << 32));
+        vfpFlushToZero(Fpscr, cOp1.fp, cOp2.fp);
          VfpSavedState state = prepVfpFpscr(Fpscr);
          __asm__ __volatile__("" : "=m" (cOp1.fp) : "m" (cOp1.fp));
          double mid = cOp1.fp * cOp2.fp;
@@ -644,6 +657,7 @@ let {{
                  (isinf(cOp2.fp) && cOp1.fp == 0)) {
              mid = NAN;
          }
+        vfpFlushToZero(Fpscr, cDest.fp, mid);
          cDest.fp = cDest.fp + mid;
          __asm__ __volatile__("" :: "m" (cDest.fp));
          Fpscr = setVfpFpscr(Fpscr, state);
@@ -658,12 +672,14 @@ let {{
      exec_output += PredOpExecute.subst(vmlaDIop);
  
      vmlsSCode = '''
+        vfpFlushToZero(Fpscr, FpOp1, FpOp2);
          VfpSavedState state = prepVfpFpscr(Fpscr);
          __asm__ __volatile__("" : "=m" (FpOp1) : "m" (FpOp1));
          float mid = FpOp1 * FpOp2;
          if ((isinf(FpOp1) && FpOp2 == 0) || (isinf(FpOp2) && FpOp1 == 0)) {
              mid = NAN;
          }
+        vfpFlushToZero(Fpscr, FpDest, mid);
          FpDest = FpDest - mid;
          __asm__ __volatile__("" :: "m" (FpDest));
          Fpscr = setVfpFpscr(Fpscr, state);
@@ -680,6 +696,7 @@ let {{
          cOp1.bits = ((uint64_t)FpOp1P0.uw | ((uint64_t)FpOp1P1.uw << 32));
          cOp2.bits = ((uint64_t)FpOp2P0.uw | ((uint64_t)FpOp2P1.uw << 32));
          cDest.bits = ((uint64_t)FpDestP0.uw | ((uint64_t)FpDestP1.uw << 32));
+        vfpFlushToZero(Fpscr, cOp1.fp, cOp2.fp);
          VfpSavedState state = prepVfpFpscr(Fpscr);
          __asm__ __volatile__("" : "=m" (cOp1.fp) : "m" (cOp1.fp));
          double mid = cOp1.fp * cOp2.fp;
@@ -688,6 +705,7 @@ let {{
              mid = NAN;
          }
          cDest.fp = cDest.fp - mid;
+        vfpFlushToZero(Fpscr, cDest.fp, mid);
          __asm__ __volatile__("" :: "m" (cDest.fp));
          Fpscr = setVfpFpscr(Fpscr, state);
          FpDestP0.uw = cDest.bits;
@@ -701,12 +719,14 @@ let {{
      exec_output += PredOpExecute.subst(vmlsDIop);
  
      vnmlaSCode = '''
+        vfpFlushToZero(Fpscr, FpOp1, FpOp2);
          VfpSavedState state = prepVfpFpscr(Fpscr);
          __asm__ __volatile__("" : "=m" (FpOp1) : "m" (FpOp1));
          float mid = FpOp1 * FpOp2;
          if ((isinf(FpOp1) && FpOp2 == 0) || (isinf(FpOp2) && FpOp1 == 0)) {
              mid = NAN;
          }
+        vfpFlushToZero(Fpscr, FpDest, mid);
          FpDest = -FpDest - mid;
          __asm__ __volatile__("" :: "m" (FpDest));
          Fpscr = setVfpFpscr(Fpscr, state);
@@ -723,6 +743,7 @@ let {{
          cOp1.bits = ((uint64_t)FpOp1P0.uw | ((uint64_t)FpOp1P1.uw << 32));
          cOp2.bits = ((uint64_t)FpOp2P0.uw | ((uint64_t)FpOp2P1.uw << 32));
          cDest.bits = ((uint64_t)FpDestP0.uw | ((uint64_t)FpDestP1.uw << 32));
+        vfpFlushToZero(Fpscr, cOp1.fp, cOp2.fp);
          VfpSavedState state = prepVfpFpscr(Fpscr);
          __asm__ __volatile__("" : "=m" (cOp1.fp) : "m" (cOp1.fp));
          double mid = cOp1.fp * cOp2.fp;
@@ -730,6 +751,7 @@ let {{
                  (isinf(cOp2.fp) && cOp1.fp == 0)) {
              mid = NAN;
          }
+        vfpFlushToZero(Fpscr, cDest.fp, mid);
          cDest.fp = -cDest.fp - mid;
          __asm__ __volatile__("" :: "m" (cDest.fp));
          Fpscr = setVfpFpscr(Fpscr, state);
@@ -744,12 +766,14 @@ let {{
      exec_output += PredOpExecute.subst(vnmlaDIop);
  
      vnmlsSCode = '''
+        vfpFlushToZero(Fpscr, FpOp1, FpOp2);
          VfpSavedState state = prepVfpFpscr(Fpscr);
          __asm__ __volatile__("" : "=m" (FpOp1) : "m" (FpOp1));
          float mid = FpOp1 * FpOp2;
          if ((isinf(FpOp1) && FpOp2 == 0) || (isinf(FpOp2) && FpOp1 == 0)) {
              mid = NAN;
          }
+        vfpFlushToZero(Fpscr, FpDest, mid);
          FpDest = -FpDest + mid;
          __asm__ __volatile__("" :: "m" (FpDest));
          Fpscr = setVfpFpscr(Fpscr, state);
@@ -766,6 +790,7 @@ let {{
          cOp1.bits = ((uint64_t)FpOp1P0.uw | ((uint64_t)FpOp1P1.uw << 32));
          cOp2.bits = ((uint64_t)FpOp2P0.uw | ((uint64_t)FpOp2P1.uw << 32));
          cDest.bits = ((uint64_t)FpDestP0.uw | ((uint64_t)FpDestP1.uw << 32));
+        vfpFlushToZero(Fpscr, cOp1.fp, cOp2.fp);
          VfpSavedState state = prepVfpFpscr(Fpscr);
          __asm__ __volatile__("" : "=m" (cOp1.fp) : "m" (cOp1.fp));
          double mid = cOp1.fp * cOp2.fp;
@@ -773,6 +798,7 @@ let {{
                  (isinf(cOp2.fp) && cOp1.fp == 0)) {
              mid = NAN;
          }
+        vfpFlushToZero(Fpscr, cDest.fp, mid);
          cDest.fp = -cDest.fp + mid;
          __asm__ __volatile__("" :: "m" (cDest.fp));
          Fpscr = setVfpFpscr(Fpscr, state);
@@ -787,6 +813,7 @@ let {{
      exec_output += PredOpExecute.subst(vnmlsDIop);
  
      vnmulSCode = '''
+        vfpFlushToZero(Fpscr, FpOp1, FpOp2);
          VfpSavedState state = prepVfpFpscr(Fpscr);
          __asm__ __volatile__("" : "=m" (FpOp1) : "m" (FpOp1));
          float mid = FpOp1 * FpOp2;
@@ -809,6 +836,7 @@ let {{
          cOp1.bits = ((uint64_t)FpOp1P0.uw | ((uint64_t)FpOp1P1.uw << 32));
          cOp2.bits = ((uint64_t)FpOp2P0.uw | ((uint64_t)FpOp2P1.uw << 32));
          cDest.bits = ((uint64_t)FpDestP0.uw | ((uint64_t)FpDestP1.uw << 32));
+        vfpFlushToZero(Fpscr, cOp1.fp, cOp2.fp);
          VfpSavedState state = prepVfpFpscr(Fpscr);
          __asm__ __volatile__("" : "=m" (cOp1.fp) : "m" (cOp1.fp));
          double mid = cOp1.fp * cOp2.fp;
@@ -899,6 +927,7 @@ let {{
      exec_output += PredOpExecute.subst(vcvtSIntFpDIop);
  
      vcvtFpUIntSRCode = '''
+        vfpFlushToZero(Fpscr, FpOp1);
          VfpSavedState state = prepVfpFpscr(Fpscr);
          __asm__ __volatile__("" : "=m" (FpOp1) : "m" (FpOp1));
          FpDest.uw = FpOp1;
@@ -915,6 +944,7 @@ let {{
      vcvtFpUIntDRCode = '''
          IntDoubleUnion cOp1;
          cOp1.bits = ((uint64_t)FpOp1P0.uw | ((uint64_t)FpOp1P1.uw << 32));
+        vfpFlushToZero(Fpscr, cOp1.fp);
          VfpSavedState state = prepVfpFpscr(Fpscr);
          __asm__ __volatile__("" : "=m" (cOp1.fp) : "m" (cOp1.fp));
          uint64_t result = cOp1.fp;
@@ -930,6 +960,7 @@ let {{
      exec_output += PredOpExecute.subst(vcvtFpUIntDRIop);
  
      vcvtFpSIntSRCode = '''
+        vfpFlushToZero(Fpscr, FpOp1);
          VfpSavedState state = prepVfpFpscr(Fpscr);
          __asm__ __volatile__("" : "=m" (FpOp1) : "m" (FpOp1));
          FpDest.sw = FpOp1;
@@ -946,6 +977,7 @@ let {{
      vcvtFpSIntDRCode = '''
          IntDoubleUnion cOp1;
          cOp1.bits = ((uint64_t)FpOp1P0.uw | ((uint64_t)FpOp1P1.uw << 32));
+        vfpFlushToZero(Fpscr, cOp1.fp);
          VfpSavedState state = prepVfpFpscr(Fpscr);
          __asm__ __volatile__("" : "=m" (cOp1.fp) : "m" (cOp1.fp));
          int64_t result = cOp1.fp;
@@ -961,6 +993,7 @@ let {{
      exec_output += PredOpExecute.subst(vcvtFpSIntDRIop);
  
      vcvtFpUIntSCode = '''
+        vfpFlushToZero(Fpscr, FpOp1);
          VfpSavedState state = prepVfpFpscr(Fpscr);
          fesetround(FeRoundZero);
          __asm__ __volatile__("" : "=m" (FpOp1) : "m" (FpOp1));
@@ -978,6 +1011,7 @@ let {{
      vcvtFpUIntDCode = '''
          IntDoubleUnion cOp1;
          cOp1.bits = ((uint64_t)FpOp1P0.uw | ((uint64_t)FpOp1P1.uw << 32));
+        vfpFlushToZero(Fpscr, cOp1.fp);
          VfpSavedState state = prepVfpFpscr(Fpscr);
          fesetround(FeRoundZero);
          __asm__ __volatile__("" : "=m" (cOp1.fp) : "m" (cOp1.fp));
@@ -994,6 +1028,7 @@ let {{
      exec_output += PredOpExecute.subst(vcvtFpUIntDIop);
  
      vcvtFpSIntSCode = '''
+        vfpFlushToZero(Fpscr, FpOp1);
          VfpSavedState state = prepVfpFpscr(Fpscr);
          fesetround(FeRoundZero);
          __asm__ __volatile__("" : "=m" (FpOp1) : "m" (FpOp1));
@@ -1011,6 +1046,7 @@ let {{
      vcvtFpSIntDCode = '''
          IntDoubleUnion cOp1;
          cOp1.bits = ((uint64_t)FpOp1P0.uw | ((uint64_t)FpOp1P1.uw << 32));
+        vfpFlushToZero(Fpscr, cOp1.fp);
          VfpSavedState state = prepVfpFpscr(Fpscr);
          fesetround(FeRoundZero);
          __asm__ __volatile__("" : "=m" (cOp1.fp) : "m" (cOp1.fp));
@@ -1028,6 +1064,7 @@ let {{
  
      vcvtFpSFpDCode = '''
          IntDoubleUnion cDest;
+        vfpFlushToZero(Fpscr, FpOp1);
          VfpSavedState state = prepVfpFpscr(Fpscr);
          __asm__ __volatile__("" : "=m" (FpOp1) : "m" (FpOp1));
          cDest.fp = FpOp1;
@@ -1046,6 +1083,7 @@ let {{
      vcvtFpDFpSCode = '''
          IntDoubleUnion cOp1;
          cOp1.bits = ((uint64_t)FpOp1P0.uw | ((uint64_t)FpOp1P1.uw << 32));
+        vfpFlushToZero(Fpscr, cOp1.fp);
          VfpSavedState state = prepVfpFpscr(Fpscr);
          __asm__ __volatile__("" : "=m" (cOp1.fp) : "m" (cOp1.fp));
          FpDest = cOp1.fp;
@@ -1061,6 +1099,7 @@ let {{
  
      vcmpSCode = '''
          FPSCR fpscr = Fpscr;
+        vfpFlushToZero(Fpscr, FpDest, FpOp1);
          if (FpDest == FpOp1) {
              fpscr.n = 0; fpscr.z = 1; fpscr.c = 1; fpscr.v = 0;
          } else if (FpDest < FpOp1) {
@@ -1083,6 +1122,7 @@ let {{
          IntDoubleUnion cOp1, cDest;
          cDest.bits = ((uint64_t)FpDestP0.uw | ((uint64_t)FpDestP1.uw << 32));
          cOp1.bits = ((uint64_t)FpOp1P0.uw | ((uint64_t)FpOp1P1.uw << 32));
+        vfpFlushToZero(Fpscr, cDest.fp, cOp1.fp);
          FPSCR fpscr = Fpscr;
          if (cDest.fp == cOp1.fp) {
              fpscr.n = 0; fpscr.z = 1; fpscr.c = 1; fpscr.v = 0;
@@ -1104,6 +1144,7 @@ let {{
  
      vcmpZeroSCode = '''
          FPSCR fpscr = Fpscr;
+        vfpFlushToZero(Fpscr, FpDest);
          if (FpDest == imm) {
              fpscr.n = 0; fpscr.z = 1; fpscr.c = 1; fpscr.v = 0;
          } else if (FpDest < imm) {
@@ -1125,6 +1166,7 @@ let {{
      vcmpZeroDCode = '''
          IntDoubleUnion cDest;
          cDest.bits = ((uint64_t)FpDestP0.uw | ((uint64_t)FpDestP1.uw << 32));
+        vfpFlushToZero(Fpscr, cDest.fp);
          FPSCR fpscr = Fpscr;
          if (cDest.fp == imm) {
              fpscr.n = 0; fpscr.z = 1; fpscr.c = 1; fpscr.v = 0;
@@ -1152,6 +1194,7 @@ let {{
      exec_output = ""
  
      vcvtFpSFixedSCode = '''
+        vfpFlushToZero(Fpscr, FpOp1);
          VfpSavedState state = prepVfpFpscr(Fpscr);
          __asm__ __volatile__("" : "=m" (FpOp1) : "m" (FpOp1));
          FpDest.sw = vfpFpSToFixed(FpOp1, true, false, imm);
@@ -1168,6 +1211,7 @@ let {{
      vcvtFpSFixedDCode = '''
          IntDoubleUnion cOp1;
          cOp1.bits = ((uint64_t)FpOp1P0.uw | ((uint64_t)FpOp1P1.uw << 32));
+        vfpFlushToZero(Fpscr, cOp1.fp);
          VfpSavedState state = prepVfpFpscr(Fpscr);
          __asm__ __volatile__("" : "=m" (cOp1.fp) : "m" (cOp1.fp));
          uint64_t mid = vfpFpDToFixed(cOp1.fp, true, false, imm);
@@ -1184,6 +1228,7 @@ let {{
      exec_output += PredOpExecute.subst(vcvtFpSFixedDIop);
  
      vcvtFpUFixedSCode = '''
+        vfpFlushToZero(Fpscr, FpOp1);
          VfpSavedState state = prepVfpFpscr(Fpscr);
          __asm__ __volatile__("" : "=m" (FpOp1) : "m" (FpOp1));
          FpDest.uw = vfpFpSToFixed(FpOp1, false, false, imm);
@@ -1200,6 +1245,7 @@ let {{
      vcvtFpUFixedDCode = '''
          IntDoubleUnion cOp1;
          cOp1.bits = ((uint64_t)FpOp1P0.uw | ((uint64_t)FpOp1P1.uw << 32));
+        vfpFlushToZero(Fpscr, cOp1.fp);
          VfpSavedState state = prepVfpFpscr(Fpscr);
          __asm__ __volatile__("" : "=m" (cOp1.fp) : "m" (cOp1.fp));
          uint64_t mid = vfpFpDToFixed(cOp1.fp, false, false, imm);
@@ -1280,6 +1326,7 @@ let {{
      exec_output += PredOpExecute.subst(vcvtUFixedFpDIop);
  
      vcvtFpSHFixedSCode = '''
+        vfpFlushToZero(Fpscr, FpOp1);
          VfpSavedState state = prepVfpFpscr(Fpscr);
          __asm__ __volatile__("" : "=m" (FpOp1) : "m" (FpOp1));
          FpDest.sh = vfpFpSToFixed(FpOp1, true, true, imm);
@@ -1297,6 +1344,7 @@ let {{
      vcvtFpSHFixedDCode = '''
          IntDoubleUnion cOp1;
          cOp1.bits = ((uint64_t)FpOp1P0.uw | ((uint64_t)FpOp1P1.uw << 32));
+        vfpFlushToZero(Fpscr, cOp1.fp);
          VfpSavedState state = prepVfpFpscr(Fpscr);
          __asm__ __volatile__("" : "=m" (cOp1.fp) : "m" (cOp1.fp));
          uint64_t result = vfpFpDToFixed(cOp1.fp, true, true, imm);
@@ -1314,6 +1362,7 @@ let {{
      exec_output += PredOpExecute.subst(vcvtFpSHFixedDIop);
  
      vcvtFpUHFixedSCode = '''
+        vfpFlushToZero(Fpscr, FpOp1);
          VfpSavedState state = prepVfpFpscr(Fpscr);
          __asm__ __volatile__("" : "=m" (FpOp1) : "m" (FpOp1));
          FpDest.uh = vfpFpSToFixed(FpOp1, false, true, imm);
@@ -1331,6 +1380,7 @@ let {{
      vcvtFpUHFixedDCode = '''
          IntDoubleUnion cOp1;
          cOp1.bits = ((uint64_t)FpOp1P0.uw | ((uint64_t)FpOp1P1.uw << 32));
+        vfpFlushToZero(Fpscr, cOp1.fp);
          VfpSavedState state = prepVfpFpscr(Fpscr);
          __asm__ __volatile__("" : "=m" (cOp1.fp) : "m" (cOp1.fp));
          uint64_t mid = vfpFpDToFixed(cOp1.fp, false, true, imm);
author	Gabe Black <gblack@eecs.umich.edu>
	Wed, 2 Jun 2010 17:58:15 +0000 (12:58 -0500)
committer	Gabe Black <gblack@eecs.umich.edu>
	Wed, 2 Jun 2010 17:58:15 +0000 (12:58 -0500)
src/arch/arm/insts/vfp.hh		patch \| blob \| history
src/arch/arm/isa/insts/fp.isa		patch \| blob \| history