From aa65c60968d0b8c0a7cd47adb2e9e1a684c0332a Mon Sep 17 00:00:00 2001 From: Aina Niemetz Date: Wed, 30 Sep 2020 14:05:29 -0700 Subject: [PATCH] FloatingPoint: Add utility functions for largest and smallest subnormal. (#5166) --- src/util/bitvector.cpp | 12 +++ src/util/bitvector.h | 6 ++ src/util/floatingpoint.cpp | 129 +++++++++++++++++---------- src/util/floatingpoint.h.in | 106 +++++++++++++++------- test/unit/util/CMakeLists.txt | 1 + test/unit/util/floatingpoint_black.h | 83 +++++++++++++++++ 6 files changed, 258 insertions(+), 79 deletions(-) create mode 100644 test/unit/util/floatingpoint_black.h diff --git a/src/util/bitvector.cpp b/src/util/bitvector.cpp index 13710244e..f5e8c9b16 100644 --- a/src/util/bitvector.cpp +++ b/src/util/bitvector.cpp @@ -341,6 +341,18 @@ BitVector BitVector::arithRightShift(const BitVector& y) const ** Static helpers. * ----------------------------------------------------------------------- */ +BitVector BitVector::mkZero(unsigned size) +{ + CheckArgument(size > 0, size); + return BitVector(size); +} + +BitVector BitVector::mkOne(unsigned size) +{ + CheckArgument(size > 0, size); + return BitVector(size, 1u); +} + BitVector BitVector::mkOnes(unsigned size) { CheckArgument(size > 0, size); diff --git a/src/util/bitvector.h b/src/util/bitvector.h index 49f80d2d6..9d4f778cd 100644 --- a/src/util/bitvector.h +++ b/src/util/bitvector.h @@ -240,6 +240,12 @@ class CVC4_PUBLIC BitVector ** Static helpers. * ----------------------------------------------------------------------- */ + /* Create zero bit-vector of given size. */ + static BitVector mkZero(unsigned size); + + /* Create bit-vector representing value 1 of given size. */ + static BitVector mkOne(unsigned size); + /* Create bit-vector of ones of given size. */ static BitVector mkOnes(unsigned size); diff --git a/src/util/floatingpoint.cpp b/src/util/floatingpoint.cpp index 631f8241c..7693d37cb 100644 --- a/src/util/floatingpoint.cpp +++ b/src/util/floatingpoint.cpp @@ -2,7 +2,7 @@ /*! \file floatingpoint.cpp ** \verbatim ** Top contributors (to current version): - ** Martin Brain, Haniel Barbosa, Mathias Preiner + ** Martin Brain, Aina Niemetz, Haniel Barbosa ** Copyright (c) 2013 University of Oxford ** This file is part of the CVC4 project. ** Copyright (c) 2009-2020 by the authors listed in the file AUTHORS @@ -10,8 +10,10 @@ ** All rights reserved. See the file COPYING in the top-level source ** directory for licensing information.\endverbatim ** - ** \brief [[ Implementations of the utility functions for working with floating point theories. ]] + ** \brief A floating-point value. ** + ** This file contains the data structures used by the constant and parametric + ** types of the floating point theory. **/ #include "util/floatingpoint.h" @@ -475,10 +477,21 @@ FloatingPoint::FloatingPoint(unsigned e, unsigned s, const BitVector &bv) { } +FloatingPoint::FloatingPoint(const FloatingPointSize& size, const BitVector& bv) + : +#ifdef CVC4_USE_SYMFPU + fpl(symfpu::unpack(size, bv)), +#else + fpl(size.exponent(), size.significand(), 0.0), +#endif + t(size) +{ +} + static FloatingPointLiteral constructorHelperBitVector( - const FloatingPointSize &ct, - const RoundingMode &rm, - const BitVector &bv, + const FloatingPointSize& size, + const RoundingMode& rm, + const BitVector& bv, bool signedBV) { #ifdef CVC4_USE_SYMFPU @@ -486,7 +499,7 @@ static FloatingPointLiteral constructorHelperBitVector( { return FloatingPointLiteral( symfpu::convertSBVToFloat( - symfpuLiteral::fpt(ct), + symfpuLiteral::fpt(size), symfpuLiteral::rm(rm), symfpuLiteral::sbv(bv))); } @@ -494,7 +507,7 @@ static FloatingPointLiteral constructorHelperBitVector( { return FloatingPointLiteral( symfpu::convertUBVToFloat( - symfpuLiteral::fpt(ct), + symfpuLiteral::fpt(size), symfpuLiteral::rm(rm), symfpuLiteral::ubv(bv))); } @@ -502,21 +515,27 @@ static FloatingPointLiteral constructorHelperBitVector( return FloatingPointLiteral(2, 2, 0.0); #endif Unreachable() << "Constructor helper broken"; - } - - FloatingPoint::FloatingPoint (const FloatingPointSize &ct, const RoundingMode &rm, const BitVector &bv, bool signedBV) : - fpl(constructorHelperBitVector(ct, rm, bv, signedBV)), - t(ct) {} +} - - static FloatingPointLiteral constructorHelperRational (const FloatingPointSize &ct, const RoundingMode &rm, const Rational &ri) { - Rational r(ri); - Rational two(2,1); - - if (r.isZero()) { +FloatingPoint::FloatingPoint(const FloatingPointSize& size, + const RoundingMode& rm, + const BitVector& bv, + bool signedBV) + : fpl(constructorHelperBitVector(size, rm, bv, signedBV)), t(size) +{ +} + +static FloatingPointLiteral constructorHelperRational( + const FloatingPointSize& size, const RoundingMode& rm, const Rational& ri) +{ + Rational r(ri); + Rational two(2, 1); + + if (r.isZero()) + { #ifdef CVC4_USE_SYMFPU - return FloatingPointLiteral::makeZero( - ct, false); // In keeping with the SMT-LIB standard + return FloatingPointLiteral::makeZero( + size, false); // In keeping with the SMT-LIB standard #else return FloatingPointLiteral(2,2,0.0); #endif @@ -572,10 +591,8 @@ static FloatingPointLiteral constructorHelperBitVector( BitVector exactExp(expBits, exp); - - // Compute the significand. - unsigned sigBits = ct.significandWidth() + 2; // guard and sticky bits + unsigned sigBits = size.significandWidth() + 2; // guard and sticky bits BitVector sig(sigBits, 0U); BitVector one(sigBits, 1U); Rational workingSig(0,1); @@ -614,47 +631,69 @@ static FloatingPointLiteral constructorHelperBitVector( // Then cast... FloatingPointLiteral rounded( - symfpu::convertFloatToFloat(exactFormat, ct, rm, exactFloat)); + symfpu::convertFloatToFloat(exactFormat, size, rm, exactFloat)); return rounded; #else Unreachable() << "no concrete implementation of FloatingPointLiteral"; #endif } Unreachable() << "Constructor helper broken"; - } - - FloatingPoint::FloatingPoint (const FloatingPointSize &ct, const RoundingMode &rm, const Rational &r) : - fpl(constructorHelperRational(ct, rm, r)), - t(ct) {} +} - - FloatingPoint FloatingPoint::makeNaN (const FloatingPointSize &t) { +FloatingPoint::FloatingPoint(const FloatingPointSize& size, + const RoundingMode& rm, + const Rational& r) + : fpl(constructorHelperRational(size, rm, r)), t(size) +{ +} + +FloatingPoint FloatingPoint::makeNaN(const FloatingPointSize& size) +{ #ifdef CVC4_USE_SYMFPU - return FloatingPoint( - t, symfpu::unpackedFloat::makeNaN(t)); + return FloatingPoint( + size, symfpu::unpackedFloat::makeNaN(size)); #else - return FloatingPoint(2, 2, BitVector(4U,0U)); + return FloatingPoint(2, 2, BitVector(4U, 0U)); #endif - } +} - FloatingPoint FloatingPoint::makeInf (const FloatingPointSize &t, bool sign) { +FloatingPoint FloatingPoint::makeInf(const FloatingPointSize& size, bool sign) +{ #ifdef CVC4_USE_SYMFPU - return FloatingPoint( - t, symfpu::unpackedFloat::makeInf(t, sign)); + return FloatingPoint( + size, symfpu::unpackedFloat::makeInf(size, sign)); #else - return FloatingPoint(2, 2, BitVector(4U,0U)); + return FloatingPoint(2, 2, BitVector(4U, 0U)); #endif - } +} - FloatingPoint FloatingPoint::makeZero (const FloatingPointSize &t, bool sign) { +FloatingPoint FloatingPoint::makeZero(const FloatingPointSize& size, bool sign) +{ #ifdef CVC4_USE_SYMFPU - return FloatingPoint( - t, symfpu::unpackedFloat::makeZero(t, sign)); + return FloatingPoint( + size, symfpu::unpackedFloat::makeZero(size, sign)); #else - return FloatingPoint(2, 2, BitVector(4U,0U)); + return FloatingPoint(2, 2, BitVector(4U, 0U)); #endif - } +} +FloatingPoint FloatingPoint::makeMinSubnormal(const FloatingPointSize& size, + bool sign) +{ + BitVector bvsign = sign ? BitVector::mkOne(1) : BitVector::mkZero(1); + BitVector bvexp = BitVector::mkZero(size.packedExponentWidth()); + BitVector bvsig = BitVector::mkOne(size.packedSignificandWidth()); + return FloatingPoint(size, bvsign.concat(bvexp).concat(bvsig)); +} + +FloatingPoint FloatingPoint::makeMaxSubnormal(const FloatingPointSize& size, + bool sign) +{ + BitVector bvsign = sign ? BitVector::mkOne(1) : BitVector::mkZero(1); + BitVector bvexp = BitVector::mkZero(size.packedExponentWidth()); + BitVector bvsig = BitVector::mkOnes(size.packedSignificandWidth()); + return FloatingPoint(size, bvsign.concat(bvexp).concat(bvsig)); +} /* Operations implemented using symfpu */ FloatingPoint FloatingPoint::absolute (void) const { diff --git a/src/util/floatingpoint.h.in b/src/util/floatingpoint.h.in index 602fa2eec..637f01104 100644 --- a/src/util/floatingpoint.h.in +++ b/src/util/floatingpoint.h.in @@ -2,7 +2,7 @@ /*! \file floatingpoint.h.in ** \verbatim ** Top contributors (to current version): - ** Martin Brain, Haniel Barbosa, Tim King + ** Martin Brain, Aina Niemetz, Haniel Barbosa ** Copyright (c) 2013 University of Oxford ** This file is part of the CVC4 project. ** Copyright (c) 2009-2020 by the authors listed in the file AUTHORS @@ -10,10 +10,10 @@ ** All rights reserved. See the file COPYING in the top-level source ** directory for licensing information.\endverbatim ** - ** \brief [[ Utility functions for working with floating point theories. ]] + ** \brief A floating-point value. ** - ** [[ This file contains the data structures used by the constant and - ** parametric types of the floating point theory. ]] + ** This file contains the data structures used by the constant and parametric + ** types of the floating point theory. **/ #include "cvc4_public.h" @@ -89,9 +89,9 @@ namespace CVC4 { return (((X) << (N)) | ((X) >> (8*sizeof((X)) - (N)) )); } - inline size_t operator() (const FloatingPointSize& fpt) const { - return size_t(ROLL(fpt.exponent(), 4*sizeof(unsigned)) | - fpt.significand()); + inline size_t operator() (const FloatingPointSize& t) const { + return size_t(ROLL(t.exponent(), 4*sizeof(unsigned)) | + t.significand()); } }; /* struct FloatingPointSizeHashFunction */ @@ -132,12 +132,12 @@ namespace CVC4 { { public: // The six key types that symfpu uses. - typedef unsigned bwt; - typedef bool prop; - typedef ::CVC4::RoundingMode rm; - typedef ::CVC4::FloatingPointSize fpt; - typedef wrappedBitVector sbv; - typedef wrappedBitVector ubv; + using bwt = uint32_t; + using prop = bool; + using rm = ::CVC4::RoundingMode; + using fpt = ::CVC4::FloatingPointSize; + using sbv = wrappedBitVector; + using ubv = wrappedBitVector; // Give concrete instances of each rounding mode, mainly for comparisons. static rm RNE(void); @@ -153,12 +153,12 @@ namespace CVC4 { }; // Use the same type names as symfpu. - typedef traits::bwt bwt; - typedef traits::prop prop; - typedef traits::rm rm; - typedef traits::fpt fpt; - typedef traits::ubv ubv; - typedef traits::sbv sbv; + using bwt = traits::bwt; + using prop = traits::prop; + using rm = traits::rm; + using fpt = traits::fpt; + using ubv = traits::ubv; + using sbv = traits::sbv; // Type function for mapping between types template @@ -167,12 +167,12 @@ namespace CVC4 { template <> struct signedToLiteralType { - typedef int literalType; + using literalType = int32_t; }; template <> struct signedToLiteralType { - typedef unsigned int literalType; + using literalType = uint32_t; }; // This is an additional interface for CVC4::BitVector. @@ -182,7 +182,7 @@ namespace CVC4 { class wrappedBitVector : public BitVector { protected: - typedef typename signedToLiteralType::literalType literalType; + using literalType = typename signedToLiteralType::literalType; friend wrappedBitVector; // To allow conversion between the // types @@ -288,7 +288,7 @@ namespace CVC4 { * A concrete floating point number */ #if @CVC4_USE_SYMFPU@ - typedef ::symfpu::unpackedFloat FloatingPointLiteral; + using FloatingPointLiteral = ::symfpu::unpackedFloat; #else class CVC4_PUBLIC FloatingPointLiteral { public : @@ -318,20 +318,58 @@ namespace CVC4 { public : FloatingPointSize t; - FloatingPoint (unsigned e, unsigned s, const BitVector &bv); - FloatingPoint (const FloatingPointSize &oldt, const FloatingPointLiteral &oldfpl) : fpl(oldfpl), t(oldt) {} - FloatingPoint (const FloatingPoint &fp) : fpl(fp.fpl), t(fp.t) {} - FloatingPoint (const FloatingPointSize &ct, const RoundingMode &rm, const BitVector &bv, bool signedBV); - FloatingPoint (const FloatingPointSize &ct, const RoundingMode &rm, const Rational &r); + /** Constructors. */ + FloatingPoint(unsigned e, unsigned s, const BitVector& bv); + FloatingPoint(const FloatingPointSize& size, const BitVector& bv); + FloatingPoint(const FloatingPointSize& oldt, + const FloatingPointLiteral& oldfpl) + : fpl(oldfpl), t(oldt) + { + } - static FloatingPoint makeNaN (const FloatingPointSize &t); - static FloatingPoint makeInf (const FloatingPointSize &t, bool sign); - static FloatingPoint makeZero (const FloatingPointSize &t, bool sign); + FloatingPoint(const FloatingPoint& fp) : fpl(fp.fpl), t(fp.t) {} + FloatingPoint(const FloatingPointSize& size, + const RoundingMode& rm, + const BitVector& bv, + bool signedBV); + FloatingPoint(const FloatingPointSize& size, + const RoundingMode& rm, + const Rational& r); + + /** + * Create a FP NaN value of given size. + * t: The FP size (format). + */ + static FloatingPoint makeNaN (const FloatingPointSize &size); + /** + * Create a FP infinity value of given size. + * t: The FP size (format). + * sign: True for +oo, false otherwise. + */ + static FloatingPoint makeInf(const FloatingPointSize& size, bool sign); + /** + * Create a FP zero value of given size. + * t: The FP size (format). + * sign: True for +zero, false otherwise. + */ + static FloatingPoint makeZero(const FloatingPointSize& size, bool sign); + /** + * Create the smallest subnormal FP value of given size. + * t: The FP size (format). + * sign: True for positive sign, false otherwise. + */ + static FloatingPoint makeMinSubnormal(const FloatingPointSize& size, + bool sign); + /** + * Create the largest subnormal FP value of given size. + * t: The FP size (format). + * sign: True for positive sign, false otherwise. + */ + static FloatingPoint makeMaxSubnormal(const FloatingPointSize& size, + bool sign); - const FloatingPointLiteral & getLiteral (void) const { - return this->fpl; - } + const FloatingPointLiteral& getLiteral(void) const { return this->fpl; } /* Return string representation. * diff --git a/test/unit/util/CMakeLists.txt b/test/unit/util/CMakeLists.txt index 5312342a2..c5753e732 100644 --- a/test/unit/util/CMakeLists.txt +++ b/test/unit/util/CMakeLists.txt @@ -21,6 +21,7 @@ cvc4_add_unit_test_white(check_white util) cvc4_add_unit_test_black(configuration_black util) cvc4_add_unit_test_black(datatype_black util) cvc4_add_unit_test_black(exception_black util) +cvc4_add_unit_test_black(floatingpoint_black util) cvc4_add_unit_test_black(integer_black util) cvc4_add_unit_test_white(integer_white util) cvc4_add_unit_test_black(output_black util) diff --git a/test/unit/util/floatingpoint_black.h b/test/unit/util/floatingpoint_black.h new file mode 100644 index 000000000..07b87eac3 --- /dev/null +++ b/test/unit/util/floatingpoint_black.h @@ -0,0 +1,83 @@ +/********************* */ +/*! \file floatingpoint_black.h + ** \verbatim + ** Top contributors (to current version): + ** Aina Niemetz + ** This file is part of the CVC4 project. + ** Copyright (c) 2009-2020 by the authors listed in the file AUTHORS + ** in the top-level source directory and their institutional affiliations. + ** All rights reserved. See the file COPYING in the top-level source + ** directory for licensing information.\endverbatim + ** + ** \brief Black box testing of CVC4::FloatingPoint. + ** + ** Black box testing of CVC4::FloatingPoint. + **/ +#include + +#include "util/floatingpoint.h" + +using namespace CVC4; + +class FloatingPointBlack : public CxxTest::TestSuite +{ + public: + void testMakeMinSubnormal(); + void testMakeMaxSubnormal(); +}; + +void FloatingPointBlack::testMakeMinSubnormal() +{ + FloatingPointSize size16(5, 11); + FloatingPointSize size32(8, 24); + FloatingPointSize size64(11, 53); + FloatingPointSize size128(15, 113); + + FloatingPoint fp16 = FloatingPoint::makeMinSubnormal(size16, true); + TS_ASSERT(fp16.isSubnormal()); + FloatingPoint mfp16 = FloatingPoint::makeMinSubnormal(size16, false); + TS_ASSERT(mfp16.isSubnormal()); + + FloatingPoint fp32 = FloatingPoint::makeMinSubnormal(size32, true); + TS_ASSERT(fp32.isSubnormal()); + FloatingPoint mfp32 = FloatingPoint::makeMinSubnormal(size32, false); + TS_ASSERT(mfp32.isSubnormal()); + + FloatingPoint fp64 = FloatingPoint::makeMinSubnormal(size64, true); + TS_ASSERT(fp64.isSubnormal()); + FloatingPoint mfp64 = FloatingPoint::makeMinSubnormal(size64, false); + TS_ASSERT(mfp64.isSubnormal()); + + FloatingPoint fp128 = FloatingPoint::makeMinSubnormal(size128, true); + TS_ASSERT(fp128.isSubnormal()); + FloatingPoint mfp128 = FloatingPoint::makeMinSubnormal(size128, false); + TS_ASSERT(mfp128.isSubnormal()); +} + +void FloatingPointBlack::testMakeMaxSubnormal() +{ + FloatingPointSize size16(5, 11); + FloatingPointSize size32(8, 24); + FloatingPointSize size64(11, 53); + FloatingPointSize size128(15, 113); + + FloatingPoint fp16 = FloatingPoint::makeMaxSubnormal(size16, true); + TS_ASSERT(fp16.isSubnormal()); + FloatingPoint mfp16 = FloatingPoint::makeMaxSubnormal(size16, false); + TS_ASSERT(mfp16.isSubnormal()); + + FloatingPoint fp32 = FloatingPoint::makeMaxSubnormal(size32, true); + TS_ASSERT(fp32.isSubnormal()); + FloatingPoint mfp32 = FloatingPoint::makeMaxSubnormal(size32, false); + TS_ASSERT(mfp32.isSubnormal()); + + FloatingPoint fp64 = FloatingPoint::makeMaxSubnormal(size64, true); + TS_ASSERT(fp64.isSubnormal()); + FloatingPoint mfp64 = FloatingPoint::makeMaxSubnormal(size64, false); + TS_ASSERT(mfp64.isSubnormal()); + + FloatingPoint fp128 = FloatingPoint::makeMaxSubnormal(size128, true); + TS_ASSERT(fp128.isSubnormal()); + FloatingPoint mfp128 = FloatingPoint::makeMaxSubnormal(size128, false); + TS_ASSERT(mfp128.isSubnormal()); +} -- 2.30.2