From 4c2c2de951c52ef48704dbffe7ec5848917f1398 Mon Sep 17 00:00:00 2001 From: Aina Niemetz Date: Thu, 1 Oct 2020 11:29:39 -0700 Subject: [PATCH] FloatingPoint: Add utility functions for largest and smallest normal. (#5174) --- src/util/floatingpoint.cpp | 19 +++++++++ src/util/floatingpoint.h.in | 14 +++++++ test/unit/util/floatingpoint_black.h | 58 ++++++++++++++++++++++++++++ 3 files changed, 91 insertions(+) diff --git a/src/util/floatingpoint.cpp b/src/util/floatingpoint.cpp index 7693d37cb..c84779be0 100644 --- a/src/util/floatingpoint.cpp +++ b/src/util/floatingpoint.cpp @@ -695,6 +695,25 @@ FloatingPoint FloatingPoint::makeMaxSubnormal(const FloatingPointSize& size, return FloatingPoint(size, bvsign.concat(bvexp).concat(bvsig)); } +FloatingPoint FloatingPoint::makeMinNormal(const FloatingPointSize& size, + bool sign) +{ + BitVector bvsign = sign ? BitVector::mkOne(1) : BitVector::mkZero(1); + BitVector bvexp = BitVector::mkOne(size.packedExponentWidth()); + BitVector bvsig = BitVector::mkZero(size.packedSignificandWidth()); + return FloatingPoint(size, bvsign.concat(bvexp).concat(bvsig)); +} + +FloatingPoint FloatingPoint::makeMaxNormal(const FloatingPointSize& size, + bool sign) +{ + BitVector bvsign = sign ? BitVector::mkOne(1) : BitVector::mkZero(1); + BitVector bvexp = + BitVector::mkOnes(size.packedExponentWidth()).setBit(0, false); + BitVector bvsig = BitVector::mkOnes(size.packedSignificandWidth()); + return FloatingPoint(size, bvsign.concat(bvexp).concat(bvsig)); +} + /* Operations implemented using symfpu */ FloatingPoint FloatingPoint::absolute (void) const { #ifdef CVC4_USE_SYMFPU diff --git a/src/util/floatingpoint.h.in b/src/util/floatingpoint.h.in index 637f01104..9e78c20c0 100644 --- a/src/util/floatingpoint.h.in +++ b/src/util/floatingpoint.h.in @@ -368,6 +368,20 @@ namespace CVC4 { */ static FloatingPoint makeMaxSubnormal(const FloatingPointSize& size, bool sign); + /** + * Create the smallest normal FP value of given size. + * t: The FP size (format). + * sign: True for positive sign, false otherwise. + */ + static FloatingPoint makeMinNormal(const FloatingPointSize& size, + bool sign); + /** + * Create the largest normal FP value of given size. + * t: The FP size (format). + * sign: True for positive sign, false otherwise. + */ + static FloatingPoint makeMaxNormal(const FloatingPointSize& size, + bool sign); const FloatingPointLiteral& getLiteral(void) const { return this->fpl; } diff --git a/test/unit/util/floatingpoint_black.h b/test/unit/util/floatingpoint_black.h index 07b87eac3..4517bcafc 100644 --- a/test/unit/util/floatingpoint_black.h +++ b/test/unit/util/floatingpoint_black.h @@ -24,6 +24,8 @@ class FloatingPointBlack : public CxxTest::TestSuite public: void testMakeMinSubnormal(); void testMakeMaxSubnormal(); + void testMakeMinNormal(); + void testMakeMaxNormal(); }; void FloatingPointBlack::testMakeMinSubnormal() @@ -81,3 +83,59 @@ void FloatingPointBlack::testMakeMaxSubnormal() FloatingPoint mfp128 = FloatingPoint::makeMaxSubnormal(size128, false); TS_ASSERT(mfp128.isSubnormal()); } + +void FloatingPointBlack::testMakeMinNormal() +{ + FloatingPointSize size16(5, 11); + FloatingPointSize size32(8, 24); + FloatingPointSize size64(11, 53); + FloatingPointSize size128(15, 113); + + FloatingPoint fp16 = FloatingPoint::makeMinNormal(size16, true); + TS_ASSERT(fp16.isNormal()); + FloatingPoint mfp16 = FloatingPoint::makeMinNormal(size16, false); + TS_ASSERT(mfp16.isNormal()); + + FloatingPoint fp32 = FloatingPoint::makeMinNormal(size32, true); + TS_ASSERT(fp32.isNormal()); + FloatingPoint mfp32 = FloatingPoint::makeMinNormal(size32, false); + TS_ASSERT(mfp32.isNormal()); + + FloatingPoint fp64 = FloatingPoint::makeMinNormal(size64, true); + TS_ASSERT(fp64.isNormal()); + FloatingPoint mfp64 = FloatingPoint::makeMinNormal(size64, false); + TS_ASSERT(mfp64.isNormal()); + + FloatingPoint fp128 = FloatingPoint::makeMinNormal(size128, true); + TS_ASSERT(fp128.isNormal()); + FloatingPoint mfp128 = FloatingPoint::makeMinNormal(size128, false); + TS_ASSERT(mfp128.isNormal()); +} + +void FloatingPointBlack::testMakeMaxNormal() +{ + FloatingPointSize size16(5, 11); + FloatingPointSize size32(8, 24); + FloatingPointSize size64(11, 53); + FloatingPointSize size128(15, 113); + + FloatingPoint fp16 = FloatingPoint::makeMaxNormal(size16, true); + TS_ASSERT(fp16.isNormal()); + FloatingPoint mfp16 = FloatingPoint::makeMaxNormal(size16, false); + TS_ASSERT(mfp16.isNormal()); + + FloatingPoint fp32 = FloatingPoint::makeMaxNormal(size32, true); + TS_ASSERT(fp32.isNormal()); + FloatingPoint mfp32 = FloatingPoint::makeMaxNormal(size32, false); + TS_ASSERT(mfp32.isNormal()); + + FloatingPoint fp64 = FloatingPoint::makeMaxNormal(size64, true); + TS_ASSERT(fp64.isNormal()); + FloatingPoint mfp64 = FloatingPoint::makeMaxNormal(size64, false); + TS_ASSERT(mfp64.isNormal()); + + FloatingPoint fp128 = FloatingPoint::makeMaxNormal(size128, true); + TS_ASSERT(fp128.isNormal()); + FloatingPoint mfp128 = FloatingPoint::makeMaxNormal(size128, false); + TS_ASSERT(mfp128.isNormal()); +} -- 2.30.2