From: Aina Niemetz Date: Thu, 1 Oct 2020 18:29:39 +0000 (-0700) Subject: FloatingPoint: Add utility functions for largest and smallest normal. (#5174) X-Git-Tag: cvc5-1.0.0~2773 X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=4c2c2de951c52ef48704dbffe7ec5848917f1398;p=cvc5.git FloatingPoint: Add utility functions for largest and smallest normal. (#5174) --- diff --git a/src/util/floatingpoint.cpp b/src/util/floatingpoint.cpp index 7693d37cb..c84779be0 100644 --- a/src/util/floatingpoint.cpp +++ b/src/util/floatingpoint.cpp @@ -695,6 +695,25 @@ FloatingPoint FloatingPoint::makeMaxSubnormal(const FloatingPointSize& size, return FloatingPoint(size, bvsign.concat(bvexp).concat(bvsig)); } +FloatingPoint FloatingPoint::makeMinNormal(const FloatingPointSize& size, + bool sign) +{ + BitVector bvsign = sign ? BitVector::mkOne(1) : BitVector::mkZero(1); + BitVector bvexp = BitVector::mkOne(size.packedExponentWidth()); + BitVector bvsig = BitVector::mkZero(size.packedSignificandWidth()); + return FloatingPoint(size, bvsign.concat(bvexp).concat(bvsig)); +} + +FloatingPoint FloatingPoint::makeMaxNormal(const FloatingPointSize& size, + bool sign) +{ + BitVector bvsign = sign ? BitVector::mkOne(1) : BitVector::mkZero(1); + BitVector bvexp = + BitVector::mkOnes(size.packedExponentWidth()).setBit(0, false); + BitVector bvsig = BitVector::mkOnes(size.packedSignificandWidth()); + return FloatingPoint(size, bvsign.concat(bvexp).concat(bvsig)); +} + /* Operations implemented using symfpu */ FloatingPoint FloatingPoint::absolute (void) const { #ifdef CVC4_USE_SYMFPU diff --git a/src/util/floatingpoint.h.in b/src/util/floatingpoint.h.in index 637f01104..9e78c20c0 100644 --- a/src/util/floatingpoint.h.in +++ b/src/util/floatingpoint.h.in @@ -368,6 +368,20 @@ namespace CVC4 { */ static FloatingPoint makeMaxSubnormal(const FloatingPointSize& size, bool sign); + /** + * Create the smallest normal FP value of given size. + * t: The FP size (format). + * sign: True for positive sign, false otherwise. + */ + static FloatingPoint makeMinNormal(const FloatingPointSize& size, + bool sign); + /** + * Create the largest normal FP value of given size. + * t: The FP size (format). + * sign: True for positive sign, false otherwise. + */ + static FloatingPoint makeMaxNormal(const FloatingPointSize& size, + bool sign); const FloatingPointLiteral& getLiteral(void) const { return this->fpl; } diff --git a/test/unit/util/floatingpoint_black.h b/test/unit/util/floatingpoint_black.h index 07b87eac3..4517bcafc 100644 --- a/test/unit/util/floatingpoint_black.h +++ b/test/unit/util/floatingpoint_black.h @@ -24,6 +24,8 @@ class FloatingPointBlack : public CxxTest::TestSuite public: void testMakeMinSubnormal(); void testMakeMaxSubnormal(); + void testMakeMinNormal(); + void testMakeMaxNormal(); }; void FloatingPointBlack::testMakeMinSubnormal() @@ -81,3 +83,59 @@ void FloatingPointBlack::testMakeMaxSubnormal() FloatingPoint mfp128 = FloatingPoint::makeMaxSubnormal(size128, false); TS_ASSERT(mfp128.isSubnormal()); } + +void FloatingPointBlack::testMakeMinNormal() +{ + FloatingPointSize size16(5, 11); + FloatingPointSize size32(8, 24); + FloatingPointSize size64(11, 53); + FloatingPointSize size128(15, 113); + + FloatingPoint fp16 = FloatingPoint::makeMinNormal(size16, true); + TS_ASSERT(fp16.isNormal()); + FloatingPoint mfp16 = FloatingPoint::makeMinNormal(size16, false); + TS_ASSERT(mfp16.isNormal()); + + FloatingPoint fp32 = FloatingPoint::makeMinNormal(size32, true); + TS_ASSERT(fp32.isNormal()); + FloatingPoint mfp32 = FloatingPoint::makeMinNormal(size32, false); + TS_ASSERT(mfp32.isNormal()); + + FloatingPoint fp64 = FloatingPoint::makeMinNormal(size64, true); + TS_ASSERT(fp64.isNormal()); + FloatingPoint mfp64 = FloatingPoint::makeMinNormal(size64, false); + TS_ASSERT(mfp64.isNormal()); + + FloatingPoint fp128 = FloatingPoint::makeMinNormal(size128, true); + TS_ASSERT(fp128.isNormal()); + FloatingPoint mfp128 = FloatingPoint::makeMinNormal(size128, false); + TS_ASSERT(mfp128.isNormal()); +} + +void FloatingPointBlack::testMakeMaxNormal() +{ + FloatingPointSize size16(5, 11); + FloatingPointSize size32(8, 24); + FloatingPointSize size64(11, 53); + FloatingPointSize size128(15, 113); + + FloatingPoint fp16 = FloatingPoint::makeMaxNormal(size16, true); + TS_ASSERT(fp16.isNormal()); + FloatingPoint mfp16 = FloatingPoint::makeMaxNormal(size16, false); + TS_ASSERT(mfp16.isNormal()); + + FloatingPoint fp32 = FloatingPoint::makeMaxNormal(size32, true); + TS_ASSERT(fp32.isNormal()); + FloatingPoint mfp32 = FloatingPoint::makeMaxNormal(size32, false); + TS_ASSERT(mfp32.isNormal()); + + FloatingPoint fp64 = FloatingPoint::makeMaxNormal(size64, true); + TS_ASSERT(fp64.isNormal()); + FloatingPoint mfp64 = FloatingPoint::makeMaxNormal(size64, false); + TS_ASSERT(mfp64.isNormal()); + + FloatingPoint fp128 = FloatingPoint::makeMaxNormal(size128, true); + TS_ASSERT(fp128.isNormal()); + FloatingPoint mfp128 = FloatingPoint::makeMaxNormal(size128, false); + TS_ASSERT(mfp128.isNormal()); +}