FloatingPoint: Add utility functions for largest and smallest normal. (#5174)
authorAina Niemetz <aina.niemetz@gmail.com>
Thu, 1 Oct 2020 18:29:39 +0000 (11:29 -0700)
committerGitHub <noreply@github.com>
Thu, 1 Oct 2020 18:29:39 +0000 (13:29 -0500)
src/util/floatingpoint.cpp
src/util/floatingpoint.h.in
test/unit/util/floatingpoint_black.h

index 7693d37cb04abeefe02cc9c8335264e6a2e1eadd..c84779be06d7d60638749bc821762a02811916d3 100644 (file)
@@ -695,6 +695,25 @@ FloatingPoint FloatingPoint::makeMaxSubnormal(const FloatingPointSize& size,
   return FloatingPoint(size, bvsign.concat(bvexp).concat(bvsig));
 }
 
+FloatingPoint FloatingPoint::makeMinNormal(const FloatingPointSize& size,
+                                           bool sign)
+{
+  BitVector bvsign = sign ? BitVector::mkOne(1) : BitVector::mkZero(1);
+  BitVector bvexp = BitVector::mkOne(size.packedExponentWidth());
+  BitVector bvsig = BitVector::mkZero(size.packedSignificandWidth());
+  return FloatingPoint(size, bvsign.concat(bvexp).concat(bvsig));
+}
+
+FloatingPoint FloatingPoint::makeMaxNormal(const FloatingPointSize& size,
+                                           bool sign)
+{
+  BitVector bvsign = sign ? BitVector::mkOne(1) : BitVector::mkZero(1);
+  BitVector bvexp =
+      BitVector::mkOnes(size.packedExponentWidth()).setBit(0, false);
+  BitVector bvsig = BitVector::mkOnes(size.packedSignificandWidth());
+  return FloatingPoint(size, bvsign.concat(bvexp).concat(bvsig));
+}
+
   /* Operations implemented using symfpu */
   FloatingPoint FloatingPoint::absolute (void) const {
 #ifdef CVC4_USE_SYMFPU
index 637f01104b5f2f54eaa27e53a372b03848e4f43f..9e78c20c07496bf91aac0b7f60bc8d769c6d7bed 100644 (file)
@@ -368,6 +368,20 @@ namespace CVC4 {
      */
     static FloatingPoint makeMaxSubnormal(const FloatingPointSize& size,
                                           bool sign);
+    /**
+     * Create the smallest normal FP value of given size.
+     * t:    The FP size (format).
+     * sign: True for positive sign, false otherwise.
+     */
+    static FloatingPoint makeMinNormal(const FloatingPointSize& size,
+                                       bool sign);
+    /**
+     * Create the largest normal FP value of given size.
+     * t:    The FP size (format).
+     * sign: True for positive sign, false otherwise.
+     */
+    static FloatingPoint makeMaxNormal(const FloatingPointSize& size,
+                                          bool sign);
 
     const FloatingPointLiteral& getLiteral(void) const { return this->fpl; }
 
index 07b87eac3b8f59d022b3a847391faff220f3d2bf..4517bcafc7b49bfae2b9854378b1986f608ce233 100644 (file)
@@ -24,6 +24,8 @@ class FloatingPointBlack : public CxxTest::TestSuite
  public:
   void testMakeMinSubnormal();
   void testMakeMaxSubnormal();
+  void testMakeMinNormal();
+  void testMakeMaxNormal();
 };
 
 void FloatingPointBlack::testMakeMinSubnormal()
@@ -81,3 +83,59 @@ void FloatingPointBlack::testMakeMaxSubnormal()
   FloatingPoint mfp128 = FloatingPoint::makeMaxSubnormal(size128, false);
   TS_ASSERT(mfp128.isSubnormal());
 }
+
+void FloatingPointBlack::testMakeMinNormal()
+{
+  FloatingPointSize size16(5, 11);
+  FloatingPointSize size32(8, 24);
+  FloatingPointSize size64(11, 53);
+  FloatingPointSize size128(15, 113);
+
+  FloatingPoint fp16 = FloatingPoint::makeMinNormal(size16, true);
+  TS_ASSERT(fp16.isNormal());
+  FloatingPoint mfp16 = FloatingPoint::makeMinNormal(size16, false);
+  TS_ASSERT(mfp16.isNormal());
+
+  FloatingPoint fp32 = FloatingPoint::makeMinNormal(size32, true);
+  TS_ASSERT(fp32.isNormal());
+  FloatingPoint mfp32 = FloatingPoint::makeMinNormal(size32, false);
+  TS_ASSERT(mfp32.isNormal());
+
+  FloatingPoint fp64 = FloatingPoint::makeMinNormal(size64, true);
+  TS_ASSERT(fp64.isNormal());
+  FloatingPoint mfp64 = FloatingPoint::makeMinNormal(size64, false);
+  TS_ASSERT(mfp64.isNormal());
+
+  FloatingPoint fp128 = FloatingPoint::makeMinNormal(size128, true);
+  TS_ASSERT(fp128.isNormal());
+  FloatingPoint mfp128 = FloatingPoint::makeMinNormal(size128, false);
+  TS_ASSERT(mfp128.isNormal());
+}
+
+void FloatingPointBlack::testMakeMaxNormal()
+{
+  FloatingPointSize size16(5, 11);
+  FloatingPointSize size32(8, 24);
+  FloatingPointSize size64(11, 53);
+  FloatingPointSize size128(15, 113);
+
+  FloatingPoint fp16 = FloatingPoint::makeMaxNormal(size16, true);
+  TS_ASSERT(fp16.isNormal());
+  FloatingPoint mfp16 = FloatingPoint::makeMaxNormal(size16, false);
+  TS_ASSERT(mfp16.isNormal());
+
+  FloatingPoint fp32 = FloatingPoint::makeMaxNormal(size32, true);
+  TS_ASSERT(fp32.isNormal());
+  FloatingPoint mfp32 = FloatingPoint::makeMaxNormal(size32, false);
+  TS_ASSERT(mfp32.isNormal());
+
+  FloatingPoint fp64 = FloatingPoint::makeMaxNormal(size64, true);
+  TS_ASSERT(fp64.isNormal());
+  FloatingPoint mfp64 = FloatingPoint::makeMaxNormal(size64, false);
+  TS_ASSERT(mfp64.isNormal());
+
+  FloatingPoint fp128 = FloatingPoint::makeMaxNormal(size128, true);
+  TS_ASSERT(fp128.isNormal());
+  FloatingPoint mfp128 = FloatingPoint::makeMaxNormal(size128, false);
+  TS_ASSERT(mfp128.isNormal());
+}