--- /dev/null
+\r
+/*============================================================================\r
+\r
+*** FIX.\r
+\r
+This C source fragment is part of the SoftFloat IEC/IEEE Floating-point\r
+Arithmetic Package, Release 2b.\r
+\r
+Written by John R. Hauser. This work was made possible in part by the\r
+International Computer Science Institute, located at Suite 600, 1947 Center\r
+Street, Berkeley, California 94704. Funding was partially provided by the\r
+National Science Foundation under grant MIP-9311980. The original version\r
+of this code was written as part of a project to build a fixed-point vector\r
+processor in collaboration with the University of California at Berkeley,\r
+overseen by Profs. Nelson Morgan and John Wawrzynek. More information\r
+is available through the Web page `http://www.cs.berkeley.edu/~jhauser/\r
+arithmetic/SoftFloat.html'.\r
+\r
+THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort has\r
+been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT TIMES\r
+RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO PERSONS\r
+AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ALL LOSSES,\r
+COSTS, OR OTHER PROBLEMS THEY INCUR DUE TO THE SOFTWARE, AND WHO FURTHERMORE\r
+EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER SCIENCE\r
+INSTITUTE (possibly via similar legal warning) AGAINST ALL LOSSES, COSTS, OR\r
+OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE SOFTWARE.\r
+\r
+Derivative works are acceptable, even for commercial purposes, so long as\r
+(1) the source code for the derivative work includes prominent notice that\r
+the work is derivative, and (2) the source code includes prominent notice with\r
+these four paragraphs for those parts of this code that are retained.\r
+\r
+=============================================================================*/\r
+\r
+/*----------------------------------------------------------------------------\r
+| Underflow tininess-detection mode, statically initialized to default value.\r
+| (The declaration in `softfloat.h' must match the `int8' type here.)\r
+*----------------------------------------------------------------------------*/\r
+bool float_detectTininess = float_tininess_afterRounding;\r
+\r
--- /dev/null
+\r
+/*============================================================================\r
+\r
+*** FIX.\r
+\r
+This C source fragment is part of the SoftFloat IEC/IEEE Floating-point\r
+Arithmetic Package, Release 2b.\r
+\r
+Written by John R. Hauser. This work was made possible in part by the\r
+International Computer Science Institute, located at Suite 600, 1947 Center\r
+Street, Berkeley, California 94704. Funding was partially provided by the\r
+National Science Foundation under grant MIP-9311980. The original version\r
+of this code was written as part of a project to build a fixed-point vector\r
+processor in collaboration with the University of California at Berkeley,\r
+overseen by Profs. Nelson Morgan and John Wawrzynek. More information\r
+is available through the Web page `http://www.cs.berkeley.edu/~jhauser/\r
+arithmetic/SoftFloat.html'.\r
+\r
+THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort has\r
+been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT TIMES\r
+RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO PERSONS\r
+AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ALL LOSSES,\r
+COSTS, OR OTHER PROBLEMS THEY INCUR DUE TO THE SOFTWARE, AND WHO FURTHERMORE\r
+EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER SCIENCE\r
+INSTITUTE (possibly via similar legal warning) AGAINST ALL LOSSES, COSTS, OR\r
+OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE SOFTWARE.\r
+\r
+Derivative works are acceptable, even for commercial purposes, so long as\r
+(1) the source code for the derivative work includes prominent notice that\r
+the work is derivative, and (2) the source code includes prominent notice with\r
+these four paragraphs for those parts of this code that are retained.\r
+\r
+=============================================================================*/\r
+\r
+/*----------------------------------------------------------------------------\r
+| Internal canonical NaN format.\r
+*----------------------------------------------------------------------------*/\r
+*** COMMON\r
+typedef struct {\r
+ flag sign;\r
+ uint128_t bits;\r
+} commonNaNT;\r
+\r
+/*----------------------------------------------------------------------------\r
+| The pattern for a default generated single-precision NaN.\r
+*----------------------------------------------------------------------------*/\r
+#define float32Bits_defaultNaN 0xFFC00000\r
+\r
+/*----------------------------------------------------------------------------\r
+| Returns 1 if the single-precision floating-point value `a' is a NaN;\r
+| otherwise, returns 0.\r
+*----------------------------------------------------------------------------*/\r
+*** COMMON\r
+#define softfloat_isNaNFloat32Bits( a ) ( 0xFF000000 < (uint32_t) ( a )<<1 )\r
+\r
+/*----------------------------------------------------------------------------\r
+| Returns 1 if the single-precision floating-point value `a' is a signaling\r
+| NaN; otherwise, returns 0.\r
+*----------------------------------------------------------------------------*/\r
+inline bool softfloat_isSigNaNFloat32Bits( uint32_t a )\r
+ { return ( ( a>>22 & 0x1FF ) == 0x1FE ) && ( a & 0x003FFFFF ); }\r
+\r
+/*----------------------------------------------------------------------------\r
+*----------------------------------------------------------------------------*/\r
+commonNaNT softfloat_NaNFromFloat32Bits( uint32_t );\r
+uint32_t softfloat_float32BitsFromNaN( commonNaNT );\r
+uint32_t softfloat_propNaNFloat32Bits( uint32_t, uint32_t );\r
+\r
+/*----------------------------------------------------------------------------\r
+| The pattern for a default generated double-precision NaN.\r
+*----------------------------------------------------------------------------*/\r
+#define float64Bits_defaultNaN 0xFFF8000000000000\r
+\r
+/*----------------------------------------------------------------------------\r
+| Returns 1 if the double-precision floating-point value `a' is a NaN;\r
+| otherwise, returns 0.\r
+*----------------------------------------------------------------------------*/\r
+*** COMMON\r
+#define softfloat_isNaNFloat64Bits( a ) ( 0xFFE0000000000000 < (uint64_t) ( a )<<1 )\r
+\r
+\r
+\r
+\r
+\r
+\r
+/*----------------------------------------------------------------------------\r
+| Returns 1 if the double-precision floating-point value `a' is a signaling\r
+| NaN; otherwise, returns 0.\r
+*----------------------------------------------------------------------------*/\r
+\r
+flag float64_is_signaling_nan( float64 a )\r
+{\r
+\r
+ return\r
+ ( ( ( a>>51 ) & 0xFFF ) == 0xFFE )\r
+ && ( a & LIT64( 0x0007FFFFFFFFFFFF ) );\r
+\r
+}\r
+\r
+/*----------------------------------------------------------------------------\r
+| Returns the result of converting the double-precision floating-point NaN\r
+| `a' to the canonical NaN format. If `a' is a signaling NaN, the invalid\r
+| exception is raised.\r
+*----------------------------------------------------------------------------*/\r
+\r
+static commonNaNT float64ToCommonNaN( float64 a )\r
+{\r
+ commonNaNT z;\r
+\r
+ if ( float64_is_signaling_nan( a ) ) float_raise( float_flag_invalid );\r
+ z.sign = a>>63;\r
+ z.low = 0;\r
+ z.high = a<<12;\r
+ return z;\r
+\r
+}\r
+\r
+/*----------------------------------------------------------------------------\r
+| Returns the result of converting the canonical NaN `a' to the double-\r
+| precision floating-point format.\r
+*----------------------------------------------------------------------------*/\r
+\r
+static float64 commonNaNToFloat64( commonNaNT a )\r
+{\r
+\r
+ return\r
+ ( ( (bits64) a.sign )<<63 )\r
+ | LIT64( 0x7FF8000000000000 )\r
+ | ( a.high>>12 );\r
+\r
+}\r
+\r
+/*----------------------------------------------------------------------------\r
+| Takes two double-precision floating-point values `a' and `b', one of which\r
+| is a NaN, and returns the appropriate NaN result. If either `a' or `b' is a\r
+| signaling NaN, the invalid exception is raised.\r
+*----------------------------------------------------------------------------*/\r
+\r
+static float64 propagateFloat64NaN( float64 a, float64 b )\r
+{\r
+ flag aIsNaN, aIsSignalingNaN, bIsNaN, bIsSignalingNaN;\r
+\r
+ aIsNaN = float64_is_nan( a );\r
+ aIsSignalingNaN = float64_is_signaling_nan( a );\r
+ bIsNaN = float64_is_nan( b );\r
+ bIsSignalingNaN = float64_is_signaling_nan( b );\r
+ a |= LIT64( 0x0008000000000000 );\r
+ b |= LIT64( 0x0008000000000000 );\r
+ if ( aIsSignalingNaN | bIsSignalingNaN ) float_raise( float_flag_invalid );\r
+ if ( aIsSignalingNaN ) {\r
+ if ( bIsSignalingNaN ) goto returnLargerSignificand;\r
+ return bIsNaN ? b : a;\r
+ }\r
+ else if ( aIsNaN ) {\r
+ if ( bIsSignalingNaN | ! bIsNaN ) return a;\r
+ returnLargerSignificand:\r
+ if ( (bits64) ( a<<1 ) < (bits64) ( b<<1 ) ) return b;\r
+ if ( (bits64) ( b<<1 ) < (bits64) ( a<<1 ) ) return a;\r
+ return ( a < b ) ? a : b;\r
+ }\r
+ else {\r
+ return b;\r
+ }\r
+\r
+}\r
+\r
+#ifdef FLOATX80\r
+\r
+/*----------------------------------------------------------------------------\r
+| The pattern for a default generated extended double-precision NaN. The\r
+| `high' and `low' values hold the most- and least-significant bits,\r
+| respectively.\r
+*----------------------------------------------------------------------------*/\r
+#define floatx80_default_nan_high 0xFFFF\r
+#define floatx80_default_nan_low LIT64( 0xC000000000000000 )\r
+\r
+/*----------------------------------------------------------------------------\r
+| Returns 1 if the extended double-precision floating-point value `a' is a\r
+| NaN; otherwise, returns 0.\r
+*----------------------------------------------------------------------------*/\r
+\r
+flag floatx80_is_nan( floatx80 a )\r
+{\r
+\r
+ return ( ( a.high & 0x7FFF ) == 0x7FFF ) && (bits64) ( a.low<<1 );\r
+\r
+}\r
+\r
+/*----------------------------------------------------------------------------\r
+| Returns 1 if the extended double-precision floating-point value `a' is a\r
+| signaling NaN; otherwise, returns 0.\r
+*----------------------------------------------------------------------------*/\r
+\r
+flag floatx80_is_signaling_nan( floatx80 a )\r
+{\r
+ bits64 aLow;\r
+\r
+ aLow = a.low & ~ LIT64( 0x4000000000000000 );\r
+ return\r
+ ( ( a.high & 0x7FFF ) == 0x7FFF )\r
+ && (bits64) ( aLow<<1 )\r
+ && ( a.low == aLow );\r
+\r
+}\r
+\r
+/*----------------------------------------------------------------------------\r
+| Returns the result of converting the extended double-precision floating-\r
+| point NaN `a' to the canonical NaN format. If `a' is a signaling NaN, the\r
+| invalid exception is raised.\r
+*----------------------------------------------------------------------------*/\r
+\r
+static commonNaNT floatx80ToCommonNaN( floatx80 a )\r
+{\r
+ commonNaNT z;\r
+\r
+ if ( floatx80_is_signaling_nan( a ) ) float_raise( float_flag_invalid );\r
+ z.sign = a.high>>15;\r
+ z.low = 0;\r
+ z.high = a.low<<1;\r
+ return z;\r
+\r
+}\r
+\r
+/*----------------------------------------------------------------------------\r
+| Returns the result of converting the canonical NaN `a' to the extended\r
+| double-precision floating-point format.\r
+*----------------------------------------------------------------------------*/\r
+\r
+static floatx80 commonNaNToFloatx80( commonNaNT a )\r
+{\r
+ floatx80 z;\r
+\r
+ z.low = LIT64( 0xC000000000000000 ) | ( a.high>>1 );\r
+ z.high = ( ( (bits16) a.sign )<<15 ) | 0x7FFF;\r
+ return z;\r
+\r
+}\r
+\r
+/*----------------------------------------------------------------------------\r
+| Takes two extended double-precision floating-point values `a' and `b', one\r
+| of which is a NaN, and returns the appropriate NaN result. If either `a' or\r
+| `b' is a signaling NaN, the invalid exception is raised.\r
+*----------------------------------------------------------------------------*/\r
+\r
+static floatx80 propagateFloatx80NaN( floatx80 a, floatx80 b )\r
+{\r
+ flag aIsNaN, aIsSignalingNaN, bIsNaN, bIsSignalingNaN;\r
+\r
+ aIsNaN = floatx80_is_nan( a );\r
+ aIsSignalingNaN = floatx80_is_signaling_nan( a );\r
+ bIsNaN = floatx80_is_nan( b );\r
+ bIsSignalingNaN = floatx80_is_signaling_nan( b );\r
+ a.low |= LIT64( 0xC000000000000000 );\r
+ b.low |= LIT64( 0xC000000000000000 );\r
+ if ( aIsSignalingNaN | bIsSignalingNaN ) float_raise( float_flag_invalid );\r
+ if ( aIsSignalingNaN ) {\r
+ if ( bIsSignalingNaN ) goto returnLargerSignificand;\r
+ return bIsNaN ? b : a;\r
+ }\r
+ else if ( aIsNaN ) {\r
+ if ( bIsSignalingNaN | ! bIsNaN ) return a;\r
+ returnLargerSignificand:\r
+ if ( a.low < b.low ) return b;\r
+ if ( b.low < a.low ) return a;\r
+ return ( a.high < b.high ) ? a : b;\r
+ }\r
+ else {\r
+ return b;\r
+ }\r
+\r
+}\r
+\r
+#endif\r
+\r
+#ifdef FLOAT128\r
+\r
+/*----------------------------------------------------------------------------\r
+| The pattern for a default generated quadruple-precision NaN. The `high' and\r
+| `low' values hold the most- and least-significant bits, respectively.\r
+*----------------------------------------------------------------------------*/\r
+#define float128_default_nan_high LIT64( 0xFFFF800000000000 )\r
+#define float128_default_nan_low LIT64( 0x0000000000000000 )\r
+\r
+/*----------------------------------------------------------------------------\r
+| Returns 1 if the quadruple-precision floating-point value `a' is a NaN;\r
+| otherwise, returns 0.\r
+*----------------------------------------------------------------------------*/\r
+\r
+flag float128_is_nan( float128 a )\r
+{\r
+\r
+ return\r
+ ( LIT64( 0xFFFE000000000000 ) <= (bits64) ( a.high<<1 ) )\r
+ && ( a.low || ( a.high & LIT64( 0x0000FFFFFFFFFFFF ) ) );\r
+\r
+}\r
+\r
+/*----------------------------------------------------------------------------\r
+| Returns 1 if the quadruple-precision floating-point value `a' is a\r
+| signaling NaN; otherwise, returns 0.\r
+*----------------------------------------------------------------------------*/\r
+\r
+flag float128_is_signaling_nan( float128 a )\r
+{\r
+\r
+ return\r
+ ( ( ( a.high>>47 ) & 0xFFFF ) == 0xFFFE )\r
+ && ( a.low || ( a.high & LIT64( 0x00007FFFFFFFFFFF ) ) );\r
+\r
+}\r
+\r
+/*----------------------------------------------------------------------------\r
+| Returns the result of converting the quadruple-precision floating-point NaN\r
+| `a' to the canonical NaN format. If `a' is a signaling NaN, the invalid\r
+| exception is raised.\r
+*----------------------------------------------------------------------------*/\r
+\r
+static commonNaNT float128ToCommonNaN( float128 a )\r
+{\r
+ commonNaNT z;\r
+\r
+ if ( float128_is_signaling_nan( a ) ) float_raise( float_flag_invalid );\r
+ z.sign = a.high>>63;\r
+ shortShift128Left( a.high, a.low, 16, &z.high, &z.low );\r
+ return z;\r
+\r
+}\r
+\r
+/*----------------------------------------------------------------------------\r
+| Returns the result of converting the canonical NaN `a' to the quadruple-\r
+| precision floating-point format.\r
+*----------------------------------------------------------------------------*/\r
+\r
+static float128 commonNaNToFloat128( commonNaNT a )\r
+{\r
+ float128 z;\r
+\r
+ shift128Right( a.high, a.low, 16, &z.high, &z.low );\r
+ z.high |= ( ( (bits64) a.sign )<<63 ) | LIT64( 0x7FFF800000000000 );\r
+ return z;\r
+\r
+}\r
+\r
+/*----------------------------------------------------------------------------\r
+| Takes two quadruple-precision floating-point values `a' and `b', one of\r
+| which is a NaN, and returns the appropriate NaN result. If either `a' or\r
+| `b' is a signaling NaN, the invalid exception is raised.\r
+*----------------------------------------------------------------------------*/\r
+\r
+static float128 propagateFloat128NaN( float128 a, float128 b )\r
+{\r
+ flag aIsNaN, aIsSignalingNaN, bIsNaN, bIsSignalingNaN;\r
+\r
+ aIsNaN = float128_is_nan( a );\r
+ aIsSignalingNaN = float128_is_signaling_nan( a );\r
+ bIsNaN = float128_is_nan( b );\r
+ bIsSignalingNaN = float128_is_signaling_nan( b );\r
+ a.high |= LIT64( 0x0000800000000000 );\r
+ b.high |= LIT64( 0x0000800000000000 );\r
+ if ( aIsSignalingNaN | bIsSignalingNaN ) float_raise( float_flag_invalid );\r
+ if ( aIsSignalingNaN ) {\r
+ if ( bIsSignalingNaN ) goto returnLargerSignificand;\r
+ return bIsNaN ? b : a;\r
+ }\r
+ else if ( aIsNaN ) {\r
+ if ( bIsSignalingNaN | ! bIsNaN ) return a;\r
+ returnLargerSignificand:\r
+ if ( lt128( a.high<<1, a.low, b.high<<1, b.low ) ) return b;\r
+ if ( lt128( b.high<<1, b.low, a.high<<1, a.low ) ) return a;\r
+ return ( a.high < b.high ) ? a : b;\r
+ }\r
+ else {\r
+ return b;\r
+ }\r
+\r
+}\r
+\r
+#endif\r
+\r
--- /dev/null
+
+/*============================================================================
+
+*** FIX.
+
+This C source fragment is part of the SoftFloat IEC/IEEE Floating-point
+Arithmetic Package, Release 2b.
+
+Written by John R. Hauser. This work was made possible in part by the
+International Computer Science Institute, located at Suite 600, 1947 Center
+Street, Berkeley, California 94704. Funding was partially provided by the
+National Science Foundation under grant MIP-9311980. The original version
+of this code was written as part of a project to build a fixed-point vector
+processor in collaboration with the University of California at Berkeley,
+overseen by Profs. Nelson Morgan and John Wawrzynek. More information
+is available through the Web page `http://www.cs.berkeley.edu/~jhauser/
+arithmetic/SoftFloat.html'.
+
+THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort has
+been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT TIMES
+RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO PERSONS
+AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ALL LOSSES,
+COSTS, OR OTHER PROBLEMS THEY INCUR DUE TO THE SOFTWARE, AND WHO FURTHERMORE
+EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER SCIENCE
+INSTITUTE (possibly via similar legal warning) AGAINST ALL LOSSES, COSTS, OR
+OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE SOFTWARE.
+
+Derivative works are acceptable, even for commercial purposes, so long as
+(1) the source code for the derivative work includes prominent notice that
+the work is derivative, and (2) the source code includes prominent notice with
+these four paragraphs for those parts of this code that are retained.
+
+=============================================================================*/
+
+/*----------------------------------------------------------------------------
+*----------------------------------------------------------------------------*/
+#define LITTLEENDIAN
+
--- /dev/null
+\r
+#include <stdint.h>\r
+#include "platform.h"\r
+#include "specialize.h"\r
+\r
+/*----------------------------------------------------------------------------\r
+| Returns the result of converting the canonical NaN `a' to the single-\r
+| precision floating-point format.\r
+*----------------------------------------------------------------------------*/\r
+\r
+uint_fast32_t softfloat_commonNaNToF32UI( struct commonNaN a )\r
+{\r
+\r
+ return (uint_fast32_t) a.sign<<31 | 0x7FC00000 | a.v64>>41;\r
+\r
+}\r
+\r
--- /dev/null
+
+#include <stdint.h>
+#include "platform.h"
+#include "specialize.h"
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the canonical NaN `a' to the double-
+| precision floating-point format.
+*----------------------------------------------------------------------------*/
+
+uint_fast64_t softfloat_commonNaNToF64UI( struct commonNaN a )
+{
+
+ return
+ (uint_fast64_t) a.sign<<63 | UINT64_C( 0x7FF8000000000000 )
+ | a.v64>>12;
+
+}
+
--- /dev/null
+\r
+#include <stdint.h>\r
+#include "platform.h"\r
+#include "specialize.h"\r
+#include "softfloat.h"\r
+\r
+/*----------------------------------------------------------------------------\r
+| Returns the result of converting the single-precision floating-point NaN\r
+| `a' to the canonical NaN format. If `a' is a signaling NaN, the invalid\r
+| exception is raised.\r
+*----------------------------------------------------------------------------*/\r
+struct commonNaN softfloat_f32UIToCommonNaN( uint_fast32_t uiA )\r
+{\r
+ struct commonNaN z;\r
+\r
+ if ( softfloat_isSigNaNF32UI( uiA ) ) {\r
+ softfloat_raiseFlags( softfloat_flag_invalid );\r
+ }\r
+ z.sign = uiA>>31;\r
+ z.v64 = (uint_fast64_t) uiA<<41;\r
+ z.v0 = 0;\r
+ return z;\r
+\r
+}\r
+\r
--- /dev/null
+\r
+#include <stdint.h>\r
+#include "platform.h"\r
+#include "specialize.h"\r
+#include "softfloat.h"\r
+\r
+/*----------------------------------------------------------------------------\r
+| Returns the result of converting the double-precision floating-point NaN\r
+| `a' to the canonical NaN format. If `a' is a signaling NaN, the invalid\r
+| exception is raised.\r
+*----------------------------------------------------------------------------*/\r
+struct commonNaN softfloat_f64UIToCommonNaN( uint_fast64_t uiA )\r
+{\r
+ struct commonNaN z;\r
+\r
+ if ( softfloat_isSigNaNF64UI( uiA ) ) {\r
+ softfloat_raiseFlags( softfloat_flag_invalid );\r
+ }\r
+ z.sign = uiA>>63;\r
+ z.v64 = uiA<<12;\r
+ z.v0 = 0;\r
+ return z;\r
+\r
+}\r
+\r
--- /dev/null
+
+#include <stdbool.h>
+#include <stdint.h>
+#include "platform.h"
+#include "specialize.h"
+
+bool softfloat_isSigNaNF32UI( uint_fast32_t ui )
+{
+
+ return ( ( ui>>22 & 0x1FF ) == 0x1FE ) && ( ui & 0x003FFFFF );
+
+}
+
--- /dev/null
+
+#include <stdbool.h>
+#include <stdint.h>
+#include "platform.h"
+#include "specialize.h"
+
+bool softfloat_isSigNaNF64UI( uint_fast64_t ui )
+{
+
+ return
+ ( ( ui>>51 & 0xFFF ) == 0xFFE )
+ && ( ui & UINT64_C( 0x0007FFFFFFFFFFFF ) );
+
+}
+
--- /dev/null
+\r
+/*** UPDATE COMMENTS. ***/\r
+\r
+#include <stdbool.h>\r
+#include <stdint.h>\r
+#include "platform.h"\r
+#include "internals.h"\r
+#include "specialize.h"\r
+#include "softfloat.h"\r
+\r
+/*----------------------------------------------------------------------------\r
+| Takes two single-precision floating-point values `a' and `b', one of which\r
+| is a NaN, and returns the appropriate NaN result. If either `a' or `b' is a\r
+| signaling NaN, the invalid exception is raised.\r
+*----------------------------------------------------------------------------*/\r
+\r
+uint_fast32_t\r
+ softfloat_propagateNaNF32UI( uint_fast32_t uiA, uint_fast32_t uiB )\r
+{\r
+ bool isNaNA, isSigNaNA, isNaNB, isSigNaNB;\r
+ uint_fast32_t uiMagA, uiMagB;\r
+\r
+ /*------------------------------------------------------------------------\r
+ *------------------------------------------------------------------------*/\r
+ isNaNA = isNaNF32UI( uiA );\r
+ isSigNaNA = softfloat_isSigNaNF32UI( uiA );\r
+ isNaNB = isNaNF32UI( uiB );\r
+ isSigNaNB = softfloat_isSigNaNF32UI( uiB );\r
+ /*------------------------------------------------------------------------\r
+ | Make NaNs non-signaling.\r
+ *------------------------------------------------------------------------*/\r
+ uiA |= 0x00400000;\r
+ uiB |= 0x00400000;\r
+ /*------------------------------------------------------------------------\r
+ *------------------------------------------------------------------------*/\r
+ if ( isSigNaNA | isSigNaNB ) {\r
+ softfloat_raiseFlags( softfloat_flag_invalid );\r
+ }\r
+ if ( isSigNaNA ) {\r
+ if ( isSigNaNB ) goto returnLargerSignificand;\r
+ return isNaNB ? uiB : uiA;\r
+ } else if ( isNaNA ) {\r
+ if ( isSigNaNB || ! isNaNB ) return uiA;\r
+ returnLargerSignificand:\r
+ uiMagA = uiA<<1;\r
+ uiMagB = uiB<<1;\r
+ if ( uiMagA < uiMagB ) return uiB;\r
+ if ( uiMagB < uiMagA ) return uiA;\r
+ return ( uiA < uiB ) ? uiA : uiB;\r
+ } else {\r
+ return uiB;\r
+ }\r
+\r
+}\r
+\r
--- /dev/null
+
+/*** UPDATE COMMENTS. ***/
+
+#include <stdbool.h>
+#include <stdint.h>
+#include "platform.h"
+#include "internals.h"
+#include "specialize.h"
+#include "softfloat.h"
+
+/*----------------------------------------------------------------------------
+| Takes two double-precision floating-point values `a' and `b', one of which
+| is a NaN, and returns the appropriate NaN result. If either `a' or `b' is a
+| signaling NaN, the invalid exception is raised.
+*----------------------------------------------------------------------------*/
+
+uint_fast64_t
+ softfloat_propagateNaNF64UI( uint_fast64_t uiA, uint_fast64_t uiB )
+{
+ bool isNaNA, isSigNaNA, isNaNB, isSigNaNB;
+ uint_fast64_t uiMagA, uiMagB;
+
+ /*------------------------------------------------------------------------
+ *------------------------------------------------------------------------*/
+ isNaNA = isNaNF64UI( uiA );
+ isSigNaNA = softfloat_isSigNaNF64UI( uiA );
+ isNaNB = isNaNF64UI( uiB );
+ isSigNaNB = softfloat_isSigNaNF64UI( uiB );
+ /*------------------------------------------------------------------------
+ | Make NaNs non-signaling.
+ *------------------------------------------------------------------------*/
+ uiA |= UINT64_C( 0x0008000000000000 );
+ uiB |= UINT64_C( 0x0008000000000000 );
+ /*------------------------------------------------------------------------
+ *------------------------------------------------------------------------*/
+ if ( isSigNaNA | isSigNaNB ) {
+ softfloat_raiseFlags( softfloat_flag_invalid );
+ }
+ if ( isSigNaNA ) {
+ if ( isSigNaNB ) goto returnLargerSignificand;
+ return isNaNB ? uiB : uiA;
+ } else if ( isNaNA ) {
+ if ( isSigNaNB || ! isNaNB ) return uiA;
+ returnLargerSignificand:
+ uiMagA = uiA & UINT64_C( 0x7FFFFFFFFFFFFFFF );
+ uiMagB = uiB & UINT64_C( 0x7FFFFFFFFFFFFFFF );
+ if ( uiMagA < uiMagB ) return uiB;
+ if ( uiMagB < uiMagA ) return uiA;
+ return ( uiA < uiB ) ? uiA : uiB;
+ } else {
+ return uiB;
+ }
+
+}
+
--- /dev/null
+
+/*============================================================================
+
+*** FIX.
+
+This C source fragment is part of the SoftFloat IEC/IEEE Floating-point
+Arithmetic Package, Release 2b.
+
+Written by John R. Hauser. This work was made possible in part by the
+International Computer Science Institute, located at Suite 600, 1947 Center
+Street, Berkeley, California 94704. Funding was partially provided by the
+National Science Foundation under grant MIP-9311980. The original version
+of this code was written as part of a project to build a fixed-point vector
+processor in collaboration with the University of California at Berkeley,
+overseen by Profs. Nelson Morgan and John Wawrzynek. More information
+is available through the Web page `http://www.cs.berkeley.edu/~jhauser/
+arithmetic/SoftFloat.html'.
+
+THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort has
+been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT TIMES
+RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO PERSONS
+AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ALL LOSSES,
+COSTS, OR OTHER PROBLEMS THEY INCUR DUE TO THE SOFTWARE, AND WHO FURTHERMORE
+EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER SCIENCE
+INSTITUTE (possibly via similar legal warning) AGAINST ALL LOSSES, COSTS, OR
+OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE SOFTWARE.
+
+Derivative works are acceptable, even for commercial purposes, so long as
+(1) the source code for the derivative work includes prominent notice that
+the work is derivative, and (2) the source code includes prominent notice with
+these four paragraphs for those parts of this code that are retained.
+
+=============================================================================*/
+
+#include "platform.h"
+#include "softfloat.h"
+
+/*----------------------------------------------------------------------------
+| Raises the exceptions specified by `flags'. Floating-point traps can be
+| defined here if desired. It is currently not possible for such a trap
+| to substitute a result value. If traps are not implemented, this routine
+| should be simply `float_exception_flags |= flags;'.
+*----------------------------------------------------------------------------*/
+
+void softfloat_raiseFlags( int_fast8_t flags )
+{
+
+ softfloat_exceptionFlags |= flags;
+
+}
+
--- /dev/null
+
+#ifndef softfloat_types_h
+#define softfloat_types_h
+
+/*** COMMENTS. ***/
+
+#include <stdbool.h>
+#include <stdint.h>
+
+typedef struct { uint32_t v; } float32_t;
+typedef struct { uint64_t v; } float64_t;
+typedef struct { uint64_t v; uint16_t x; } floatx80_t;
+typedef struct { uint64_t v[ 2 ]; } float128_t;
+
+#endif
+
--- /dev/null
+\r
+/*============================================================================\r
+\r
+*** FIX.\r
+\r
+This C source fragment is part of the SoftFloat IEC/IEEE Floating-point\r
+Arithmetic Package, Release 2b.\r
+\r
+Written by John R. Hauser. This work was made possible in part by the\r
+International Computer Science Institute, located at Suite 600, 1947 Center\r
+Street, Berkeley, California 94704. Funding was partially provided by the\r
+National Science Foundation under grant MIP-9311980. The original version\r
+of this code was written as part of a project to build a fixed-point vector\r
+processor in collaboration with the University of California at Berkeley,\r
+overseen by Profs. Nelson Morgan and John Wawrzynek. More information\r
+is available through the Web page `http://www.cs.berkeley.edu/~jhauser/\r
+arithmetic/SoftFloat.html'.\r
+\r
+THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort has\r
+been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT TIMES\r
+RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO PERSONS\r
+AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ALL LOSSES,\r
+COSTS, OR OTHER PROBLEMS THEY INCUR DUE TO THE SOFTWARE, AND WHO FURTHERMORE\r
+EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER SCIENCE\r
+INSTITUTE (possibly via similar legal warning) AGAINST ALL LOSSES, COSTS, OR\r
+OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE SOFTWARE.\r
+\r
+Derivative works are acceptable, even for commercial purposes, so long as\r
+(1) the source code for the derivative work includes prominent notice that\r
+the work is derivative, and (2) the source code includes prominent notice with\r
+these four paragraphs for those parts of this code that are retained.\r
+\r
+=============================================================================*/\r
+\r
+#include <stdbool.h>\r
+#include <stdint.h>\r
+\r
+/*----------------------------------------------------------------------------\r
+*----------------------------------------------------------------------------*/\r
+#define init_detectTininess softfloat_tininess_afterRounding;\r
+\r
+/*----------------------------------------------------------------------------\r
+| Structure used to transfer NaN representations from one format to another.\r
+*----------------------------------------------------------------------------*/\r
+struct commonNaN {\r
+ bool sign;\r
+ uint64_t v64, v0;\r
+};\r
+\r
+/*----------------------------------------------------------------------------\r
+| The pattern for a default generated single-precision NaN.\r
+*----------------------------------------------------------------------------*/\r
+#define defaultNaNF32UI 0xFFC00000\r
+\r
+/*----------------------------------------------------------------------------\r
+| Returns 1 if the single-precision floating-point value `a' is a signaling\r
+| NaN; otherwise, returns 0.\r
+*----------------------------------------------------------------------------*/\r
+#if defined INLINE_LEVEL && ( 1 <= INLINE_LEVEL )\r
+INLINE bool softfloat_isSigNaNF32UI( uint_fast32_t ui )\r
+ { return ( ( ui>>22 & 0x1FF ) == 0x1FE ) && ( ui & 0x003FFFFF ); }\r
+#else\r
+bool softfloat_isSigNaNF32UI( uint_fast32_t );\r
+#endif\r
+\r
+/*----------------------------------------------------------------------------\r
+*----------------------------------------------------------------------------*/\r
+struct commonNaN softfloat_f32UIToCommonNaN( uint_fast32_t );\r
+#if defined INLINE_LEVEL && ( 1 <= INLINE_LEVEL )\r
+INLINE uint_fast32_t softfloat_commonNaNToF32UI( struct commonNaN a )\r
+ { return (uint_fast32_t) a.sign<<31 | 0x7FC00000 | a.v64>>41; }\r
+#else\r
+uint_fast32_t softfloat_commonNaNToF32UI( struct commonNaN );\r
+#endif\r
+\r
+/*----------------------------------------------------------------------------\r
+| Takes two single-precision floating-point values `a' and `b', one of which\r
+| is a NaN, and returns the appropriate NaN result. If either `a' or `b' is a\r
+| signaling NaN, the invalid exception is raised.\r
+*----------------------------------------------------------------------------*/\r
+uint_fast32_t softfloat_propagateNaNF32UI( uint_fast32_t, uint_fast32_t );\r
+\r
+/*----------------------------------------------------------------------------\r
+| The pattern for a default generated double-precision NaN.\r
+*----------------------------------------------------------------------------*/\r
+#define defaultNaNF64UI UINT64_C(0xFFF8000000000000)\r
+\r
+/*----------------------------------------------------------------------------\r
+*----------------------------------------------------------------------------*/\r
+#if defined INLINE_LEVEL && ( 1 <= INLINE_LEVEL )\r
+INLINE bool softfloat_isSigNaNF64UI( uint_fast64_t ui )\r
+{\r
+ return\r
+ ( ( ui>>51 & 0xFFF ) == 0xFFE )\r
+ && ( ui & UINT64_C( 0x0007FFFFFFFFFFFF ) );\r
+}\r
+#else\r
+bool softfloat_isSigNaNF64UI( uint_fast64_t );\r
+#endif\r
+\r
+/*----------------------------------------------------------------------------\r
+*----------------------------------------------------------------------------*/\r
+/*** MIGHT BE INLINE'D. ***/\r
+struct commonNaN softfloat_f64UIToCommonNaN( uint_fast64_t );\r
+uint_fast64_t softfloat_commonNaNToF64UI( struct commonNaN );\r
+\r
+/*----------------------------------------------------------------------------\r
+| Takes two double-precision floating-point values `a' and `b', one of which\r
+| is a NaN, and returns the appropriate NaN result. If either `a' or `b' is a\r
+| signaling NaN, the invalid exception is raised.\r
+*----------------------------------------------------------------------------*/\r
+uint_fast64_t softfloat_propagateNaNF64UI( uint_fast64_t, uint_fast64_t );\r
+\r
--- /dev/null
+
+/*============================================================================
+
+This C source file is part of the SoftFloat IEC/IEEE Floating-point Arithmetic
+Package, Release 2b.
+
+Written by John R. Hauser. This work was made possible in part by the
+International Computer Science Institute, located at Suite 600, 1947 Center
+Street, Berkeley, California 94704. Funding was partially provided by the
+National Science Foundation under grant MIP-9311980. The original version
+of this code was written as part of a project to build a fixed-point vector
+processor in collaboration with the University of California at Berkeley,
+overseen by Profs. Nelson Morgan and John Wawrzynek. More information
+is available through the Web page `http://www.cs.berkeley.edu/~jhauser/
+arithmetic/SoftFloat.html'.
+
+THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort has
+been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT TIMES
+RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO PERSONS
+AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ALL LOSSES,
+COSTS, OR OTHER PROBLEMS THEY INCUR DUE TO THE SOFTWARE, AND WHO FURTHERMORE
+EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER SCIENCE
+INSTITUTE (possibly via similar legal warning) AGAINST ALL LOSSES, COSTS, OR
+OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE SOFTWARE.
+
+Derivative works are acceptable, even for commercial purposes, so long as
+(1) the source code for the derivative work includes prominent notice that
+the work is derivative, and (2) the source code includes prominent notice with
+these four paragraphs for those parts of this code that are retained.
+
+=============================================================================*/
+
+#include "milieu.h"
+#include "softfloat.h"
+
+/*----------------------------------------------------------------------------
+| Primitive arithmetic functions, including multi-word arithmetic, and
+| division and square root approximations. (Can be specialized to target if
+| desired.)
+*----------------------------------------------------------------------------*/
+#include "softfloat-macros"
+
+/*----------------------------------------------------------------------------
+| Functions and definitions to determine: (1) whether tininess for underflow
+| is detected before or after rounding by default, (2) what (if anything)
+| happens when exceptions are raised, (3) how signaling NaNs are distinguished
+| from quiet NaNs, (4) the default generated quiet NaNs, and (5) how NaNs
+| are propagated from function inputs to output. These details are target-
+| specific.
+*----------------------------------------------------------------------------*/
+#include "softfloat-specialize"
+
+#ifdef FLOATX80
+
+/*----------------------------------------------------------------------------
+| Returns the fraction bits of the extended double-precision floating-point
+| value `a'.
+*----------------------------------------------------------------------------*/
+
+INLINE bits64 extractFloatx80Frac( floatx80 a )
+{
+
+ return a.low;
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the exponent bits of the extended double-precision floating-point
+| value `a'.
+*----------------------------------------------------------------------------*/
+
+INLINE int32 extractFloatx80Exp( floatx80 a )
+{
+
+ return a.high & 0x7FFF;
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the sign bit of the extended double-precision floating-point value
+| `a'.
+*----------------------------------------------------------------------------*/
+
+INLINE flag extractFloatx80Sign( floatx80 a )
+{
+
+ return a.high>>15;
+
+}
+
+/*----------------------------------------------------------------------------
+| Normalizes the subnormal extended double-precision floating-point value
+| represented by the denormalized significand `aSig'. The normalized exponent
+| and significand are stored at the locations pointed to by `zExpPtr' and
+| `zSigPtr', respectively.
+*----------------------------------------------------------------------------*/
+
+static void
+ normalizeFloatx80Subnormal( bits64 aSig, int32 *zExpPtr, bits64 *zSigPtr )
+{
+ int8 shiftCount;
+
+ shiftCount = countLeadingZeros64( aSig );
+ *zSigPtr = aSig<<shiftCount;
+ *zExpPtr = 1 - shiftCount;
+
+}
+
+/*----------------------------------------------------------------------------
+| Packs the sign `zSign', exponent `zExp', and significand `zSig' into an
+| extended double-precision floating-point value, returning the result.
+*----------------------------------------------------------------------------*/
+
+INLINE floatx80 packFloatx80( flag zSign, int32 zExp, bits64 zSig )
+{
+ floatx80 z;
+
+ z.low = zSig;
+ z.high = ( ( (bits16) zSign )<<15 ) + zExp;
+ return z;
+
+}
+
+/*----------------------------------------------------------------------------
+| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
+| and extended significand formed by the concatenation of `zSig0' and `zSig1',
+| and returns the proper extended double-precision floating-point value
+| corresponding to the abstract input. Ordinarily, the abstract value is
+| rounded and packed into the extended double-precision format, with the
+| inexact exception raised if the abstract input cannot be represented
+| exactly. However, if the abstract value is too large, the overflow and
+| inexact exceptions are raised and an infinity or maximal finite value is
+| returned. If the abstract value is too small, the input value is rounded to
+| a subnormal number, and the underflow and inexact exceptions are raised if
+| the abstract input cannot be represented exactly as a subnormal extended
+| double-precision floating-point number.
+| If `roundingPrecision' is 32 or 64, the result is rounded to the same
+| number of bits as single or double precision, respectively. Otherwise, the
+| result is rounded to the full precision of the extended double-precision
+| format.
+| The input significand must be normalized or smaller. If the input
+| significand is not normalized, `zExp' must be 0; in that case, the result
+| returned is a subnormal number, and it must not require rounding. The
+| handling of underflow and overflow follows the IEC/IEEE Standard for Binary
+| Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+static floatx80
+ roundAndPackFloatx80(
+ int8 roundingPrecision, flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1
+ )
+{
+ int8 roundingMode;
+ flag roundNearestEven, increment, isTiny;
+ int64 roundIncrement, roundMask, roundBits;
+
+ roundingMode = float_rounding_mode;
+ roundNearestEven = ( roundingMode == float_round_nearest_even );
+ if ( roundingPrecision == 80 ) goto precision80;
+ if ( roundingPrecision == 64 ) {
+ roundIncrement = LIT64( 0x0000000000000400 );
+ roundMask = LIT64( 0x00000000000007FF );
+ }
+ else if ( roundingPrecision == 32 ) {
+ roundIncrement = LIT64( 0x0000008000000000 );
+ roundMask = LIT64( 0x000000FFFFFFFFFF );
+ }
+ else {
+ goto precision80;
+ }
+ zSig0 |= ( zSig1 != 0 );
+ if ( ! roundNearestEven ) {
+ if ( roundingMode == float_round_to_zero ) {
+ roundIncrement = 0;
+ }
+ else {
+ roundIncrement = roundMask;
+ if ( zSign ) {
+ if ( roundingMode == float_round_up ) roundIncrement = 0;
+ }
+ else {
+ if ( roundingMode == float_round_down ) roundIncrement = 0;
+ }
+ }
+ }
+ roundBits = zSig0 & roundMask;
+ if ( 0x7FFD <= (bits32) ( zExp - 1 ) ) {
+ if ( ( 0x7FFE < zExp )
+ || ( ( zExp == 0x7FFE ) && ( zSig0 + roundIncrement < zSig0 ) )
+ ) {
+ goto overflow;
+ }
+ if ( zExp <= 0 ) {
+ isTiny =
+ ( float_detect_tininess == float_tininess_before_rounding )
+ || ( zExp < 0 )
+ || ( zSig0 <= zSig0 + roundIncrement );
+ shift64RightJamming( zSig0, 1 - zExp, &zSig0 );
+ zExp = 0;
+ roundBits = zSig0 & roundMask;
+ if ( isTiny && roundBits ) float_raise( float_flag_underflow );
+ if ( roundBits ) float_exception_flags |= float_flag_inexact;
+ zSig0 += roundIncrement;
+ if ( (sbits64) zSig0 < 0 ) zExp = 1;
+ roundIncrement = roundMask + 1;
+ if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
+ roundMask |= roundIncrement;
+ }
+ zSig0 &= ~ roundMask;
+ return packFloatx80( zSign, zExp, zSig0 );
+ }
+ }
+ if ( roundBits ) float_exception_flags |= float_flag_inexact;
+ zSig0 += roundIncrement;
+ if ( zSig0 < roundIncrement ) {
+ ++zExp;
+ zSig0 = LIT64( 0x8000000000000000 );
+ }
+ roundIncrement = roundMask + 1;
+ if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
+ roundMask |= roundIncrement;
+ }
+ zSig0 &= ~ roundMask;
+ if ( zSig0 == 0 ) zExp = 0;
+ return packFloatx80( zSign, zExp, zSig0 );
+ precision80:
+ increment = ( (sbits64) zSig1 < 0 );
+ if ( ! roundNearestEven ) {
+ if ( roundingMode == float_round_to_zero ) {
+ increment = 0;
+ }
+ else {
+ if ( zSign ) {
+ increment = ( roundingMode == float_round_down ) && zSig1;
+ }
+ else {
+ increment = ( roundingMode == float_round_up ) && zSig1;
+ }
+ }
+ }
+ if ( 0x7FFD <= (bits32) ( zExp - 1 ) ) {
+ if ( ( 0x7FFE < zExp )
+ || ( ( zExp == 0x7FFE )
+ && ( zSig0 == LIT64( 0xFFFFFFFFFFFFFFFF ) )
+ && increment
+ )
+ ) {
+ roundMask = 0;
+ overflow:
+ float_raise( float_flag_overflow | float_flag_inexact );
+ if ( ( roundingMode == float_round_to_zero )
+ || ( zSign && ( roundingMode == float_round_up ) )
+ || ( ! zSign && ( roundingMode == float_round_down ) )
+ ) {
+ return packFloatx80( zSign, 0x7FFE, ~ roundMask );
+ }
+ return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
+ }
+ if ( zExp <= 0 ) {
+ isTiny =
+ ( float_detect_tininess == float_tininess_before_rounding )
+ || ( zExp < 0 )
+ || ! increment
+ || ( zSig0 < LIT64( 0xFFFFFFFFFFFFFFFF ) );
+ shift64ExtraRightJamming( zSig0, zSig1, 1 - zExp, &zSig0, &zSig1 );
+ zExp = 0;
+ if ( isTiny && zSig1 ) float_raise( float_flag_underflow );
+ if ( zSig1 ) float_exception_flags |= float_flag_inexact;
+ if ( roundNearestEven ) {
+ increment = ( (sbits64) zSig1 < 0 );
+ }
+ else {
+ if ( zSign ) {
+ increment = ( roundingMode == float_round_down ) && zSig1;
+ }
+ else {
+ increment = ( roundingMode == float_round_up ) && zSig1;
+ }
+ }
+ if ( increment ) {
+ ++zSig0;
+ zSig0 &=
+ ~ ( ( (bits64) ( zSig1<<1 ) == 0 ) & roundNearestEven );
+ if ( (sbits64) zSig0 < 0 ) zExp = 1;
+ }
+ return packFloatx80( zSign, zExp, zSig0 );
+ }
+ }
+ if ( zSig1 ) float_exception_flags |= float_flag_inexact;
+ if ( increment ) {
+ ++zSig0;
+ if ( zSig0 == 0 ) {
+ ++zExp;
+ zSig0 = LIT64( 0x8000000000000000 );
+ }
+ else {
+ zSig0 &= ~ ( ( (bits64) ( zSig1<<1 ) == 0 ) & roundNearestEven );
+ }
+ }
+ else {
+ if ( zSig0 == 0 ) zExp = 0;
+ }
+ return packFloatx80( zSign, zExp, zSig0 );
+
+}
+
+/*----------------------------------------------------------------------------
+| Takes an abstract floating-point value having sign `zSign', exponent
+| `zExp', and significand formed by the concatenation of `zSig0' and `zSig1',
+| and returns the proper extended double-precision floating-point value
+| corresponding to the abstract input. This routine is just like
+| `roundAndPackFloatx80' except that the input significand does not have to be
+| normalized.
+*----------------------------------------------------------------------------*/
+
+static floatx80
+ normalizeRoundAndPackFloatx80(
+ int8 roundingPrecision, flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1
+ )
+{
+ int8 shiftCount;
+
+ if ( zSig0 == 0 ) {
+ zSig0 = zSig1;
+ zSig1 = 0;
+ zExp -= 64;
+ }
+ shiftCount = countLeadingZeros64( zSig0 );
+ shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
+ zExp -= shiftCount;
+ return
+ roundAndPackFloatx80( roundingPrecision, zSign, zExp, zSig0, zSig1 );
+
+}
+
+#endif
+
+#ifdef FLOAT128
+
+/*----------------------------------------------------------------------------
+| Returns the least-significant 64 fraction bits of the quadruple-precision
+| floating-point value `a'.
+*----------------------------------------------------------------------------*/
+
+INLINE bits64 extractFloat128Frac1( float128 a )
+{
+
+ return a.low;
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the most-significant 48 fraction bits of the quadruple-precision
+| floating-point value `a'.
+*----------------------------------------------------------------------------*/
+
+INLINE bits64 extractFloat128Frac0( float128 a )
+{
+
+ return a.high & LIT64( 0x0000FFFFFFFFFFFF );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the exponent bits of the quadruple-precision floating-point value
+| `a'.
+*----------------------------------------------------------------------------*/
+
+INLINE int32 extractFloat128Exp( float128 a )
+{
+
+ return ( a.high>>48 ) & 0x7FFF;
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the sign bit of the quadruple-precision floating-point value `a'.
+*----------------------------------------------------------------------------*/
+
+INLINE flag extractFloat128Sign( float128 a )
+{
+
+ return a.high>>63;
+
+}
+
+/*----------------------------------------------------------------------------
+| Normalizes the subnormal quadruple-precision floating-point value
+| represented by the denormalized significand formed by the concatenation of
+| `aSig0' and `aSig1'. The normalized exponent is stored at the location
+| pointed to by `zExpPtr'. The most significant 49 bits of the normalized
+| significand are stored at the location pointed to by `zSig0Ptr', and the
+| least significant 64 bits of the normalized significand are stored at the
+| location pointed to by `zSig1Ptr'.
+*----------------------------------------------------------------------------*/
+
+static void
+ normalizeFloat128Subnormal(
+ bits64 aSig0,
+ bits64 aSig1,
+ int32 *zExpPtr,
+ bits64 *zSig0Ptr,
+ bits64 *zSig1Ptr
+ )
+{
+ int8 shiftCount;
+
+ if ( aSig0 == 0 ) {
+ shiftCount = countLeadingZeros64( aSig1 ) - 15;
+ if ( shiftCount < 0 ) {
+ *zSig0Ptr = aSig1>>( - shiftCount );
+ *zSig1Ptr = aSig1<<( shiftCount & 63 );
+ }
+ else {
+ *zSig0Ptr = aSig1<<shiftCount;
+ *zSig1Ptr = 0;
+ }
+ *zExpPtr = - shiftCount - 63;
+ }
+ else {
+ shiftCount = countLeadingZeros64( aSig0 ) - 15;
+ shortShift128Left( aSig0, aSig1, shiftCount, zSig0Ptr, zSig1Ptr );
+ *zExpPtr = 1 - shiftCount;
+ }
+
+}
+
+/*----------------------------------------------------------------------------
+| Packs the sign `zSign', the exponent `zExp', and the significand formed
+| by the concatenation of `zSig0' and `zSig1' into a quadruple-precision
+| floating-point value, returning the result. After being shifted into the
+| proper positions, the three fields `zSign', `zExp', and `zSig0' are simply
+| added together to form the most significant 32 bits of the result. This
+| means that any integer portion of `zSig0' will be added into the exponent.
+| Since a properly normalized significand will have an integer portion equal
+| to 1, the `zExp' input should be 1 less than the desired result exponent
+| whenever `zSig0' and `zSig1' concatenated form a complete, normalized
+| significand.
+*----------------------------------------------------------------------------*/
+
+INLINE float128
+ packFloat128( flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1 )
+{
+ float128 z;
+
+ z.low = zSig1;
+ z.high = ( ( (bits64) zSign )<<63 ) + ( ( (bits64) zExp )<<48 ) + zSig0;
+ return z;
+
+}
+
+/*----------------------------------------------------------------------------
+| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
+| and extended significand formed by the concatenation of `zSig0', `zSig1',
+| and `zSig2', and returns the proper quadruple-precision floating-point value
+| corresponding to the abstract input. Ordinarily, the abstract value is
+| simply rounded and packed into the quadruple-precision format, with the
+| inexact exception raised if the abstract input cannot be represented
+| exactly. However, if the abstract value is too large, the overflow and
+| inexact exceptions are raised and an infinity or maximal finite value is
+| returned. If the abstract value is too small, the input value is rounded to
+| a subnormal number, and the underflow and inexact exceptions are raised if
+| the abstract input cannot be represented exactly as a subnormal quadruple-
+| precision floating-point number.
+| The input significand must be normalized or smaller. If the input
+| significand is not normalized, `zExp' must be 0; in that case, the result
+| returned is a subnormal number, and it must not require rounding. In the
+| usual case that the input significand is normalized, `zExp' must be 1 less
+| than the ``true'' floating-point exponent. The handling of underflow and
+| overflow follows the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+static float128
+ roundAndPackFloat128(
+ flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1, bits64 zSig2 )
+{
+ int8 roundingMode;
+ flag roundNearestEven, increment, isTiny;
+
+ roundingMode = float_rounding_mode;
+ roundNearestEven = ( roundingMode == float_round_nearest_even );
+ increment = ( (sbits64) zSig2 < 0 );
+ if ( ! roundNearestEven ) {
+ if ( roundingMode == float_round_to_zero ) {
+ increment = 0;
+ }
+ else {
+ if ( zSign ) {
+ increment = ( roundingMode == float_round_down ) && zSig2;
+ }
+ else {
+ increment = ( roundingMode == float_round_up ) && zSig2;
+ }
+ }
+ }
+ if ( 0x7FFD <= (bits32) zExp ) {
+ if ( ( 0x7FFD < zExp )
+ || ( ( zExp == 0x7FFD )
+ && eq128(
+ LIT64( 0x0001FFFFFFFFFFFF ),
+ LIT64( 0xFFFFFFFFFFFFFFFF ),
+ zSig0,
+ zSig1
+ )
+ && increment
+ )
+ ) {
+ float_raise( float_flag_overflow | float_flag_inexact );
+ if ( ( roundingMode == float_round_to_zero )
+ || ( zSign && ( roundingMode == float_round_up ) )
+ || ( ! zSign && ( roundingMode == float_round_down ) )
+ ) {
+ return
+ packFloat128(
+ zSign,
+ 0x7FFE,
+ LIT64( 0x0000FFFFFFFFFFFF ),
+ LIT64( 0xFFFFFFFFFFFFFFFF )
+ );
+ }
+ return packFloat128( zSign, 0x7FFF, 0, 0 );
+ }
+ if ( zExp < 0 ) {
+ isTiny =
+ ( float_detect_tininess == float_tininess_before_rounding )
+ || ( zExp < -1 )
+ || ! increment
+ || lt128(
+ zSig0,
+ zSig1,
+ LIT64( 0x0001FFFFFFFFFFFF ),
+ LIT64( 0xFFFFFFFFFFFFFFFF )
+ );
+ shift128ExtraRightJamming(
+ zSig0, zSig1, zSig2, - zExp, &zSig0, &zSig1, &zSig2 );
+ zExp = 0;
+ if ( isTiny && zSig2 ) float_raise( float_flag_underflow );
+ if ( roundNearestEven ) {
+ increment = ( (sbits64) zSig2 < 0 );
+ }
+ else {
+ if ( zSign ) {
+ increment = ( roundingMode == float_round_down ) && zSig2;
+ }
+ else {
+ increment = ( roundingMode == float_round_up ) && zSig2;
+ }
+ }
+ }
+ }
+ if ( zSig2 ) float_exception_flags |= float_flag_inexact;
+ if ( increment ) {
+ add128( zSig0, zSig1, 0, 1, &zSig0, &zSig1 );
+ zSig1 &= ~ ( ( zSig2 + zSig2 == 0 ) & roundNearestEven );
+ }
+ else {
+ if ( ( zSig0 | zSig1 ) == 0 ) zExp = 0;
+ }
+ return packFloat128( zSign, zExp, zSig0, zSig1 );
+
+}
+
+/*----------------------------------------------------------------------------
+| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
+| and significand formed by the concatenation of `zSig0' and `zSig1', and
+| returns the proper quadruple-precision floating-point value corresponding
+| to the abstract input. This routine is just like `roundAndPackFloat128'
+| except that the input significand has fewer bits and does not have to be
+| normalized. In all cases, `zExp' must be 1 less than the ``true'' floating-
+| point exponent.
+*----------------------------------------------------------------------------*/
+
+static float128
+ normalizeRoundAndPackFloat128(
+ flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1 )
+{
+ int8 shiftCount;
+ bits64 zSig2;
+
+ if ( zSig0 == 0 ) {
+ zSig0 = zSig1;
+ zSig1 = 0;
+ zExp -= 64;
+ }
+ shiftCount = countLeadingZeros64( zSig0 ) - 15;
+ if ( 0 <= shiftCount ) {
+ zSig2 = 0;
+ shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
+ }
+ else {
+ shift128ExtraRightJamming(
+ zSig0, zSig1, 0, - shiftCount, &zSig0, &zSig1, &zSig2 );
+ }
+ zExp -= shiftCount;
+ return roundAndPackFloat128( zSign, zExp, zSig0, zSig1, zSig2 );
+
+}
+
+#endif
+
+#ifdef FLOATX80
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the 32-bit two's complement integer `a'
+| to the extended double-precision floating-point format. The conversion
+| is performed according to the IEC/IEEE Standard for Binary Floating-Point
+| Arithmetic.
+*----------------------------------------------------------------------------*/
+
+floatx80 int32_to_floatx80( int32 a )
+{
+ flag zSign;
+ uint32 absA;
+ int8 shiftCount;
+ bits64 zSig;
+
+ if ( a == 0 ) return packFloatx80( 0, 0, 0 );
+ zSign = ( a < 0 );
+ absA = zSign ? - a : a;
+ shiftCount = countLeadingZeros32( absA ) + 32;
+ zSig = absA;
+ return packFloatx80( zSign, 0x403E - shiftCount, zSig<<shiftCount );
+
+}
+
+#endif
+
+#ifdef FLOAT128
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the 32-bit two's complement integer `a' to
+| the quadruple-precision floating-point format. The conversion is performed
+| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float128 int32_to_float128( int32 a )
+{
+ flag zSign;
+ uint32 absA;
+ int8 shiftCount;
+ bits64 zSig0;
+
+ if ( a == 0 ) return packFloat128( 0, 0, 0, 0 );
+ zSign = ( a < 0 );
+ absA = zSign ? - a : a;
+ shiftCount = countLeadingZeros32( absA ) + 17;
+ zSig0 = absA;
+ return packFloat128( zSign, 0x402E - shiftCount, zSig0<<shiftCount, 0 );
+
+}
+
+#endif
+
+#ifdef FLOATX80
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the 64-bit two's complement integer `a'
+| to the extended double-precision floating-point format. The conversion
+| is performed according to the IEC/IEEE Standard for Binary Floating-Point
+| Arithmetic.
+*----------------------------------------------------------------------------*/
+
+floatx80 int64_to_floatx80( int64 a )
+{
+ flag zSign;
+ uint64 absA;
+ int8 shiftCount;
+
+ if ( a == 0 ) return packFloatx80( 0, 0, 0 );
+ zSign = ( a < 0 );
+ absA = zSign ? - a : a;
+ shiftCount = countLeadingZeros64( absA );
+ return packFloatx80( zSign, 0x403E - shiftCount, absA<<shiftCount );
+
+}
+
+#endif
+
+#ifdef FLOAT128
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the 64-bit two's complement integer `a' to
+| the quadruple-precision floating-point format. The conversion is performed
+| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float128 int64_to_float128( int64 a )
+{
+ flag zSign;
+ uint64 absA;
+ int8 shiftCount;
+ int32 zExp;
+ bits64 zSig0, zSig1;
+
+ if ( a == 0 ) return packFloat128( 0, 0, 0, 0 );
+ zSign = ( a < 0 );
+ absA = zSign ? - a : a;
+ shiftCount = countLeadingZeros64( absA ) + 49;
+ zExp = 0x406E - shiftCount;
+ if ( 64 <= shiftCount ) {
+ zSig1 = 0;
+ zSig0 = absA;
+ shiftCount -= 64;
+ }
+ else {
+ zSig1 = absA;
+ zSig0 = 0;
+ }
+ shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
+ return packFloat128( zSign, zExp, zSig0, zSig1 );
+
+}
+
+#endif
+
+#ifdef FLOATX80
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the single-precision floating-point value
+| `a' to the extended double-precision floating-point format. The conversion
+| is performed according to the IEC/IEEE Standard for Binary Floating-Point
+| Arithmetic.
+*----------------------------------------------------------------------------*/
+
+floatx80 float32_to_floatx80( float32 a )
+{
+ flag aSign;
+ int16 aExp;
+ bits32 aSig;
+
+ aSig = extractFloat32Frac( a );
+ aExp = extractFloat32Exp( a );
+ aSign = extractFloat32Sign( a );
+ if ( aExp == 0xFF ) {
+ if ( aSig ) return commonNaNToFloatx80( float32ToCommonNaN( a ) );
+ return packFloatx80( aSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
+ }
+ if ( aExp == 0 ) {
+ if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 );
+ normalizeFloat32Subnormal( aSig, &aExp, &aSig );
+ }
+ aSig |= 0x00800000;
+ return packFloatx80( aSign, aExp + 0x3F80, ( (bits64) aSig )<<40 );
+
+}
+
+#endif
+
+#ifdef FLOAT128
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the single-precision floating-point value
+| `a' to the double-precision floating-point format. The conversion is
+| performed according to the IEC/IEEE Standard for Binary Floating-Point
+| Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float128 float32_to_float128( float32 a )
+{
+ flag aSign;
+ int16 aExp;
+ bits32 aSig;
+
+ aSig = extractFloat32Frac( a );
+ aExp = extractFloat32Exp( a );
+ aSign = extractFloat32Sign( a );
+ if ( aExp == 0xFF ) {
+ if ( aSig ) return commonNaNToFloat128( float32ToCommonNaN( a ) );
+ return packFloat128( aSign, 0x7FFF, 0, 0 );
+ }
+ if ( aExp == 0 ) {
+ if ( aSig == 0 ) return packFloat128( aSign, 0, 0, 0 );
+ normalizeFloat32Subnormal( aSig, &aExp, &aSig );
+ --aExp;
+ }
+ return packFloat128( aSign, aExp + 0x3F80, ( (bits64) aSig )<<25, 0 );
+
+}
+
+#endif
+
+#ifdef FLOATX80
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the double-precision floating-point value
+| `a' to the extended double-precision floating-point format. The conversion
+| is performed according to the IEC/IEEE Standard for Binary Floating-Point
+| Arithmetic.
+*----------------------------------------------------------------------------*/
+
+floatx80 float64_to_floatx80( float64 a )
+{
+ flag aSign;
+ int16 aExp;
+ bits64 aSig;
+
+ aSig = extractFloat64Frac( a );
+ aExp = extractFloat64Exp( a );
+ aSign = extractFloat64Sign( a );
+ if ( aExp == 0x7FF ) {
+ if ( aSig ) return commonNaNToFloatx80( float64ToCommonNaN( a ) );
+ return packFloatx80( aSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
+ }
+ if ( aExp == 0 ) {
+ if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 );
+ normalizeFloat64Subnormal( aSig, &aExp, &aSig );
+ }
+ return
+ packFloatx80(
+ aSign, aExp + 0x3C00, ( aSig | LIT64( 0x0010000000000000 ) )<<11 );
+
+}
+
+#endif
+
+#ifdef FLOAT128
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the double-precision floating-point value
+| `a' to the quadruple-precision floating-point format. The conversion is
+| performed according to the IEC/IEEE Standard for Binary Floating-Point
+| Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float128 float64_to_float128( float64 a )
+{
+ flag aSign;
+ int16 aExp;
+ bits64 aSig, zSig0, zSig1;
+
+ aSig = extractFloat64Frac( a );
+ aExp = extractFloat64Exp( a );
+ aSign = extractFloat64Sign( a );
+ if ( aExp == 0x7FF ) {
+ if ( aSig ) return commonNaNToFloat128( float64ToCommonNaN( a ) );
+ return packFloat128( aSign, 0x7FFF, 0, 0 );
+ }
+ if ( aExp == 0 ) {
+ if ( aSig == 0 ) return packFloat128( aSign, 0, 0, 0 );
+ normalizeFloat64Subnormal( aSig, &aExp, &aSig );
+ --aExp;
+ }
+ shift128Right( aSig, 0, 4, &zSig0, &zSig1 );
+ return packFloat128( aSign, aExp + 0x3C00, zSig0, zSig1 );
+
+}
+
+#endif
+
+#ifdef FLOATX80
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the extended double-precision floating-
+| point value `a' to the 32-bit two's complement integer format. The
+| conversion is performed according to the IEC/IEEE Standard for Binary
+| Floating-Point Arithmetic---which means in particular that the conversion
+| is rounded according to the current rounding mode. If `a' is a NaN, the
+| largest positive integer is returned. Otherwise, if the conversion
+| overflows, the largest integer with the same sign as `a' is returned.
+*----------------------------------------------------------------------------*/
+
+int32 floatx80_to_int32( floatx80 a )
+{
+ flag aSign;
+ int32 aExp, shiftCount;
+ bits64 aSig;
+
+ aSig = extractFloatx80Frac( a );
+ aExp = extractFloatx80Exp( a );
+ aSign = extractFloatx80Sign( a );
+ if ( ( aExp == 0x7FFF ) && (bits64) ( aSig<<1 ) ) aSign = 0;
+ shiftCount = 0x4037 - aExp;
+ if ( shiftCount <= 0 ) shiftCount = 1;
+ shift64RightJamming( aSig, shiftCount, &aSig );
+ return roundAndPackInt32( aSign, aSig );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the extended double-precision floating-
+| point value `a' to the 32-bit two's complement integer format. The
+| conversion is performed according to the IEC/IEEE Standard for Binary
+| Floating-Point Arithmetic, except that the conversion is always rounded
+| toward zero. If `a' is a NaN, the largest positive integer is returned.
+| Otherwise, if the conversion overflows, the largest integer with the same
+| sign as `a' is returned.
+*----------------------------------------------------------------------------*/
+
+int32 floatx80_to_int32_round_to_zero( floatx80 a )
+{
+ flag aSign;
+ int32 aExp, shiftCount;
+ bits64 aSig, savedASig;
+ int32 z;
+
+ aSig = extractFloatx80Frac( a );
+ aExp = extractFloatx80Exp( a );
+ aSign = extractFloatx80Sign( a );
+ if ( 0x401E < aExp ) {
+ if ( ( aExp == 0x7FFF ) && (bits64) ( aSig<<1 ) ) aSign = 0;
+ goto invalid;
+ }
+ else if ( aExp < 0x3FFF ) {
+ if ( aExp || aSig ) float_exception_flags |= float_flag_inexact;
+ return 0;
+ }
+ shiftCount = 0x403E - aExp;
+ savedASig = aSig;
+ aSig >>= shiftCount;
+ z = aSig;
+ if ( aSign ) z = - z;
+ if ( ( z < 0 ) ^ aSign ) {
+ invalid:
+ float_raise( float_flag_invalid );
+ return aSign ? (sbits32) 0x80000000 : 0x7FFFFFFF;
+ }
+ if ( ( aSig<<shiftCount ) != savedASig ) {
+ float_exception_flags |= float_flag_inexact;
+ }
+ return z;
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the extended double-precision floating-
+| point value `a' to the 64-bit two's complement integer format. The
+| conversion is performed according to the IEC/IEEE Standard for Binary
+| Floating-Point Arithmetic---which means in particular that the conversion
+| is rounded according to the current rounding mode. If `a' is a NaN,
+| the largest positive integer is returned. Otherwise, if the conversion
+| overflows, the largest integer with the same sign as `a' is returned.
+*----------------------------------------------------------------------------*/
+
+int64 floatx80_to_int64( floatx80 a )
+{
+ flag aSign;
+ int32 aExp, shiftCount;
+ bits64 aSig, aSigExtra;
+
+ aSig = extractFloatx80Frac( a );
+ aExp = extractFloatx80Exp( a );
+ aSign = extractFloatx80Sign( a );
+ shiftCount = 0x403E - aExp;
+ if ( shiftCount <= 0 ) {
+ if ( shiftCount ) {
+ float_raise( float_flag_invalid );
+ if ( ! aSign
+ || ( ( aExp == 0x7FFF )
+ && ( aSig != LIT64( 0x8000000000000000 ) ) )
+ ) {
+ return LIT64( 0x7FFFFFFFFFFFFFFF );
+ }
+ return (sbits64) LIT64( 0x8000000000000000 );
+ }
+ aSigExtra = 0;
+ }
+ else {
+ shift64ExtraRightJamming( aSig, 0, shiftCount, &aSig, &aSigExtra );
+ }
+ return roundAndPackInt64( aSign, aSig, aSigExtra );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the extended double-precision floating-
+| point value `a' to the 64-bit two's complement integer format. The
+| conversion is performed according to the IEC/IEEE Standard for Binary
+| Floating-Point Arithmetic, except that the conversion is always rounded
+| toward zero. If `a' is a NaN, the largest positive integer is returned.
+| Otherwise, if the conversion overflows, the largest integer with the same
+| sign as `a' is returned.
+*----------------------------------------------------------------------------*/
+
+int64 floatx80_to_int64_round_to_zero( floatx80 a )
+{
+ flag aSign;
+ int32 aExp, shiftCount;
+ bits64 aSig;
+ int64 z;
+
+ aSig = extractFloatx80Frac( a );
+ aExp = extractFloatx80Exp( a );
+ aSign = extractFloatx80Sign( a );
+ shiftCount = aExp - 0x403E;
+ if ( 0 <= shiftCount ) {
+ aSig &= LIT64( 0x7FFFFFFFFFFFFFFF );
+ if ( ( a.high != 0xC03E ) || aSig ) {
+ float_raise( float_flag_invalid );
+ if ( ! aSign || ( ( aExp == 0x7FFF ) && aSig ) ) {
+ return LIT64( 0x7FFFFFFFFFFFFFFF );
+ }
+ }
+ return (sbits64) LIT64( 0x8000000000000000 );
+ }
+ else if ( aExp < 0x3FFF ) {
+ if ( aExp | aSig ) float_exception_flags |= float_flag_inexact;
+ return 0;
+ }
+ z = aSig>>( - shiftCount );
+ if ( (bits64) ( aSig<<( shiftCount & 63 ) ) ) {
+ float_exception_flags |= float_flag_inexact;
+ }
+ if ( aSign ) z = - z;
+ return z;
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the extended double-precision floating-
+| point value `a' to the single-precision floating-point format. The
+| conversion is performed according to the IEC/IEEE Standard for Binary
+| Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float32 floatx80_to_float32( floatx80 a )
+{
+ flag aSign;
+ int32 aExp;
+ bits64 aSig;
+
+ aSig = extractFloatx80Frac( a );
+ aExp = extractFloatx80Exp( a );
+ aSign = extractFloatx80Sign( a );
+ if ( aExp == 0x7FFF ) {
+ if ( (bits64) ( aSig<<1 ) ) {
+ return commonNaNToFloat32( floatx80ToCommonNaN( a ) );
+ }
+ return packFloat32( aSign, 0xFF, 0 );
+ }
+ shift64RightJamming( aSig, 33, &aSig );
+ if ( aExp || aSig ) aExp -= 0x3F81;
+ return roundAndPackFloat32( aSign, aExp, aSig );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the extended double-precision floating-
+| point value `a' to the double-precision floating-point format. The
+| conversion is performed according to the IEC/IEEE Standard for Binary
+| Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float64 floatx80_to_float64( floatx80 a )
+{
+ flag aSign;
+ int32 aExp;
+ bits64 aSig, zSig;
+
+ aSig = extractFloatx80Frac( a );
+ aExp = extractFloatx80Exp( a );
+ aSign = extractFloatx80Sign( a );
+ if ( aExp == 0x7FFF ) {
+ if ( (bits64) ( aSig<<1 ) ) {
+ return commonNaNToFloat64( floatx80ToCommonNaN( a ) );
+ }
+ return packFloat64( aSign, 0x7FF, 0 );
+ }
+ shift64RightJamming( aSig, 1, &zSig );
+ if ( aExp || aSig ) aExp -= 0x3C01;
+ return roundAndPackFloat64( aSign, aExp, zSig );
+
+}
+
+#ifdef FLOAT128
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the extended double-precision floating-
+| point value `a' to the quadruple-precision floating-point format. The
+| conversion is performed according to the IEC/IEEE Standard for Binary
+| Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float128 floatx80_to_float128( floatx80 a )
+{
+ flag aSign;
+ int16 aExp;
+ bits64 aSig, zSig0, zSig1;
+
+ aSig = extractFloatx80Frac( a );
+ aExp = extractFloatx80Exp( a );
+ aSign = extractFloatx80Sign( a );
+ if ( ( aExp == 0x7FFF ) && (bits64) ( aSig<<1 ) ) {
+ return commonNaNToFloat128( floatx80ToCommonNaN( a ) );
+ }
+ shift128Right( aSig<<1, 0, 16, &zSig0, &zSig1 );
+ return packFloat128( aSign, aExp, zSig0, zSig1 );
+
+}
+
+#endif
+
+/*----------------------------------------------------------------------------
+| Rounds the extended double-precision floating-point value `a' to an integer,
+| and returns the result as an extended quadruple-precision floating-point
+| value. The operation is performed according to the IEC/IEEE Standard for
+| Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+floatx80 floatx80_round_to_int( floatx80 a )
+{
+ flag aSign;
+ int32 aExp;
+ bits64 lastBitMask, roundBitsMask;
+ int8 roundingMode;
+ floatx80 z;
+
+ aExp = extractFloatx80Exp( a );
+ if ( 0x403E <= aExp ) {
+ if ( ( aExp == 0x7FFF ) && (bits64) ( extractFloatx80Frac( a )<<1 ) ) {
+ return propagateFloatx80NaN( a, a );
+ }
+ return a;
+ }
+ if ( aExp < 0x3FFF ) {
+ if ( ( aExp == 0 )
+ && ( (bits64) ( extractFloatx80Frac( a )<<1 ) == 0 ) ) {
+ return a;
+ }
+ float_exception_flags |= float_flag_inexact;
+ aSign = extractFloatx80Sign( a );
+ switch ( float_rounding_mode ) {
+ case float_round_nearest_even:
+ if ( ( aExp == 0x3FFE ) && (bits64) ( extractFloatx80Frac( a )<<1 )
+ ) {
+ return
+ packFloatx80( aSign, 0x3FFF, LIT64( 0x8000000000000000 ) );
+ }
+ break;
+ case float_round_down:
+ return
+ aSign ?
+ packFloatx80( 1, 0x3FFF, LIT64( 0x8000000000000000 ) )
+ : packFloatx80( 0, 0, 0 );
+ case float_round_up:
+ return
+ aSign ? packFloatx80( 1, 0, 0 )
+ : packFloatx80( 0, 0x3FFF, LIT64( 0x8000000000000000 ) );
+ }
+ return packFloatx80( aSign, 0, 0 );
+ }
+ lastBitMask = 1;
+ lastBitMask <<= 0x403E - aExp;
+ roundBitsMask = lastBitMask - 1;
+ z = a;
+ roundingMode = float_rounding_mode;
+ if ( roundingMode == float_round_nearest_even ) {
+ z.low += lastBitMask>>1;
+ if ( ( z.low & roundBitsMask ) == 0 ) z.low &= ~ lastBitMask;
+ }
+ else if ( roundingMode != float_round_to_zero ) {
+ if ( extractFloatx80Sign( z ) ^ ( roundingMode == float_round_up ) ) {
+ z.low += roundBitsMask;
+ }
+ }
+ z.low &= ~ roundBitsMask;
+ if ( z.low == 0 ) {
+ ++z.high;
+ z.low = LIT64( 0x8000000000000000 );
+ }
+ if ( z.low != a.low ) float_exception_flags |= float_flag_inexact;
+ return z;
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of adding the absolute values of the extended double-
+| precision floating-point values `a' and `b'. If `zSign' is 1, the sum is
+| negated before being returned. `zSign' is ignored if the result is a NaN.
+| The addition is performed according to the IEC/IEEE Standard for Binary
+| Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+static floatx80 addFloatx80Sigs( floatx80 a, floatx80 b, flag zSign )
+{
+ int32 aExp, bExp, zExp;
+ bits64 aSig, bSig, zSig0, zSig1;
+ int32 expDiff;
+
+ aSig = extractFloatx80Frac( a );
+ aExp = extractFloatx80Exp( a );
+ bSig = extractFloatx80Frac( b );
+ bExp = extractFloatx80Exp( b );
+ expDiff = aExp - bExp;
+ if ( 0 < expDiff ) {
+ if ( aExp == 0x7FFF ) {
+ if ( (bits64) ( aSig<<1 ) ) return propagateFloatx80NaN( a, b );
+ return a;
+ }
+ if ( bExp == 0 ) --expDiff;
+ shift64ExtraRightJamming( bSig, 0, expDiff, &bSig, &zSig1 );
+ zExp = aExp;
+ }
+ else if ( expDiff < 0 ) {
+ if ( bExp == 0x7FFF ) {
+ if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b );
+ return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
+ }
+ if ( aExp == 0 ) ++expDiff;
+ shift64ExtraRightJamming( aSig, 0, - expDiff, &aSig, &zSig1 );
+ zExp = bExp;
+ }
+ else {
+ if ( aExp == 0x7FFF ) {
+ if ( (bits64) ( ( aSig | bSig )<<1 ) ) {
+ return propagateFloatx80NaN( a, b );
+ }
+ return a;
+ }
+ zSig1 = 0;
+ zSig0 = aSig + bSig;
+ if ( aExp == 0 ) {
+ normalizeFloatx80Subnormal( zSig0, &zExp, &zSig0 );
+ goto roundAndPack;
+ }
+ zExp = aExp;
+ goto shiftRight1;
+ }
+ zSig0 = aSig + bSig;
+ if ( (sbits64) zSig0 < 0 ) goto roundAndPack;
+ shiftRight1:
+ shift64ExtraRightJamming( zSig0, zSig1, 1, &zSig0, &zSig1 );
+ zSig0 |= LIT64( 0x8000000000000000 );
+ ++zExp;
+ roundAndPack:
+ return
+ roundAndPackFloatx80(
+ floatx80_rounding_precision, zSign, zExp, zSig0, zSig1 );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of subtracting the absolute values of the extended
+| double-precision floating-point values `a' and `b'. If `zSign' is 1, the
+| difference is negated before being returned. `zSign' is ignored if the
+| result is a NaN. The subtraction is performed according to the IEC/IEEE
+| Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+static floatx80 subFloatx80Sigs( floatx80 a, floatx80 b, flag zSign )
+{
+ int32 aExp, bExp, zExp;
+ bits64 aSig, bSig, zSig0, zSig1;
+ int32 expDiff;
+ floatx80 z;
+
+ aSig = extractFloatx80Frac( a );
+ aExp = extractFloatx80Exp( a );
+ bSig = extractFloatx80Frac( b );
+ bExp = extractFloatx80Exp( b );
+ expDiff = aExp - bExp;
+ if ( 0 < expDiff ) goto aExpBigger;
+ if ( expDiff < 0 ) goto bExpBigger;
+ if ( aExp == 0x7FFF ) {
+ if ( (bits64) ( ( aSig | bSig )<<1 ) ) {
+ return propagateFloatx80NaN( a, b );
+ }
+ float_raise( float_flag_invalid );
+ z.low = floatx80_default_nan_low;
+ z.high = floatx80_default_nan_high;
+ return z;
+ }
+ if ( aExp == 0 ) {
+ aExp = 1;
+ bExp = 1;
+ }
+ zSig1 = 0;
+ if ( bSig < aSig ) goto aBigger;
+ if ( aSig < bSig ) goto bBigger;
+ return packFloatx80( float_rounding_mode == float_round_down, 0, 0 );
+ bExpBigger:
+ if ( bExp == 0x7FFF ) {
+ if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b );
+ return packFloatx80( zSign ^ 1, 0x7FFF, LIT64( 0x8000000000000000 ) );
+ }
+ if ( aExp == 0 ) ++expDiff;
+ shift128RightJamming( aSig, 0, - expDiff, &aSig, &zSig1 );
+ bBigger:
+ sub128( bSig, 0, aSig, zSig1, &zSig0, &zSig1 );
+ zExp = bExp;
+ zSign ^= 1;
+ goto normalizeRoundAndPack;
+ aExpBigger:
+ if ( aExp == 0x7FFF ) {
+ if ( (bits64) ( aSig<<1 ) ) return propagateFloatx80NaN( a, b );
+ return a;
+ }
+ if ( bExp == 0 ) --expDiff;
+ shift128RightJamming( bSig, 0, expDiff, &bSig, &zSig1 );
+ aBigger:
+ sub128( aSig, 0, bSig, zSig1, &zSig0, &zSig1 );
+ zExp = aExp;
+ normalizeRoundAndPack:
+ return
+ normalizeRoundAndPackFloatx80(
+ floatx80_rounding_precision, zSign, zExp, zSig0, zSig1 );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of adding the extended double-precision floating-point
+| values `a' and `b'. The operation is performed according to the IEC/IEEE
+| Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+floatx80 floatx80_add( floatx80 a, floatx80 b )
+{
+ flag aSign, bSign;
+
+ aSign = extractFloatx80Sign( a );
+ bSign = extractFloatx80Sign( b );
+ if ( aSign == bSign ) {
+ return addFloatx80Sigs( a, b, aSign );
+ }
+ else {
+ return subFloatx80Sigs( a, b, aSign );
+ }
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of subtracting the extended double-precision floating-
+| point values `a' and `b'. The operation is performed according to the
+| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+floatx80 floatx80_sub( floatx80 a, floatx80 b )
+{
+ flag aSign, bSign;
+
+ aSign = extractFloatx80Sign( a );
+ bSign = extractFloatx80Sign( b );
+ if ( aSign == bSign ) {
+ return subFloatx80Sigs( a, b, aSign );
+ }
+ else {
+ return addFloatx80Sigs( a, b, aSign );
+ }
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of multiplying the extended double-precision floating-
+| point values `a' and `b'. The operation is performed according to the
+| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+floatx80 floatx80_mul( floatx80 a, floatx80 b )
+{
+ flag aSign, bSign, zSign;
+ int32 aExp, bExp, zExp;
+ bits64 aSig, bSig, zSig0, zSig1;
+ floatx80 z;
+
+ aSig = extractFloatx80Frac( a );
+ aExp = extractFloatx80Exp( a );
+ aSign = extractFloatx80Sign( a );
+ bSig = extractFloatx80Frac( b );
+ bExp = extractFloatx80Exp( b );
+ bSign = extractFloatx80Sign( b );
+ zSign = aSign ^ bSign;
+ if ( aExp == 0x7FFF ) {
+ if ( (bits64) ( aSig<<1 )
+ || ( ( bExp == 0x7FFF ) && (bits64) ( bSig<<1 ) ) ) {
+ return propagateFloatx80NaN( a, b );
+ }
+ if ( ( bExp | bSig ) == 0 ) goto invalid;
+ return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
+ }
+ if ( bExp == 0x7FFF ) {
+ if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b );
+ if ( ( aExp | aSig ) == 0 ) {
+ invalid:
+ float_raise( float_flag_invalid );
+ z.low = floatx80_default_nan_low;
+ z.high = floatx80_default_nan_high;
+ return z;
+ }
+ return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
+ }
+ if ( aExp == 0 ) {
+ if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 );
+ normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
+ }
+ if ( bExp == 0 ) {
+ if ( bSig == 0 ) return packFloatx80( zSign, 0, 0 );
+ normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
+ }
+ zExp = aExp + bExp - 0x3FFE;
+ mul64To128( aSig, bSig, &zSig0, &zSig1 );
+ if ( 0 < (sbits64) zSig0 ) {
+ shortShift128Left( zSig0, zSig1, 1, &zSig0, &zSig1 );
+ --zExp;
+ }
+ return
+ roundAndPackFloatx80(
+ floatx80_rounding_precision, zSign, zExp, zSig0, zSig1 );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of dividing the extended double-precision floating-point
+| value `a' by the corresponding value `b'. The operation is performed
+| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+floatx80 floatx80_div( floatx80 a, floatx80 b )
+{
+ flag aSign, bSign, zSign;
+ int32 aExp, bExp, zExp;
+ bits64 aSig, bSig, zSig0, zSig1;
+ bits64 rem0, rem1, rem2, term0, term1, term2;
+ floatx80 z;
+
+ aSig = extractFloatx80Frac( a );
+ aExp = extractFloatx80Exp( a );
+ aSign = extractFloatx80Sign( a );
+ bSig = extractFloatx80Frac( b );
+ bExp = extractFloatx80Exp( b );
+ bSign = extractFloatx80Sign( b );
+ zSign = aSign ^ bSign;
+ if ( aExp == 0x7FFF ) {
+ if ( (bits64) ( aSig<<1 ) ) return propagateFloatx80NaN( a, b );
+ if ( bExp == 0x7FFF ) {
+ if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b );
+ goto invalid;
+ }
+ return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
+ }
+ if ( bExp == 0x7FFF ) {
+ if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b );
+ return packFloatx80( zSign, 0, 0 );
+ }
+ if ( bExp == 0 ) {
+ if ( bSig == 0 ) {
+ if ( ( aExp | aSig ) == 0 ) {
+ invalid:
+ float_raise( float_flag_invalid );
+ z.low = floatx80_default_nan_low;
+ z.high = floatx80_default_nan_high;
+ return z;
+ }
+ float_raise( float_flag_divbyzero );
+ return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
+ }
+ normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
+ }
+ if ( aExp == 0 ) {
+ if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 );
+ normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
+ }
+ zExp = aExp - bExp + 0x3FFE;
+ rem1 = 0;
+ if ( bSig <= aSig ) {
+ shift128Right( aSig, 0, 1, &aSig, &rem1 );
+ ++zExp;
+ }
+ zSig0 = estimateDiv128To64( aSig, rem1, bSig );
+ mul64To128( bSig, zSig0, &term0, &term1 );
+ sub128( aSig, rem1, term0, term1, &rem0, &rem1 );
+ while ( (sbits64) rem0 < 0 ) {
+ --zSig0;
+ add128( rem0, rem1, 0, bSig, &rem0, &rem1 );
+ }
+ zSig1 = estimateDiv128To64( rem1, 0, bSig );
+ if ( (bits64) ( zSig1<<1 ) <= 8 ) {
+ mul64To128( bSig, zSig1, &term1, &term2 );
+ sub128( rem1, 0, term1, term2, &rem1, &rem2 );
+ while ( (sbits64) rem1 < 0 ) {
+ --zSig1;
+ add128( rem1, rem2, 0, bSig, &rem1, &rem2 );
+ }
+ zSig1 |= ( ( rem1 | rem2 ) != 0 );
+ }
+ return
+ roundAndPackFloatx80(
+ floatx80_rounding_precision, zSign, zExp, zSig0, zSig1 );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the remainder of the extended double-precision floating-point value
+| `a' with respect to the corresponding value `b'. The operation is performed
+| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+floatx80 floatx80_rem( floatx80 a, floatx80 b )
+{
+ flag aSign, bSign, zSign;
+ int32 aExp, bExp, expDiff;
+ bits64 aSig0, aSig1, bSig;
+ bits64 q, term0, term1, alternateASig0, alternateASig1;
+ floatx80 z;
+
+ aSig0 = extractFloatx80Frac( a );
+ aExp = extractFloatx80Exp( a );
+ aSign = extractFloatx80Sign( a );
+ bSig = extractFloatx80Frac( b );
+ bExp = extractFloatx80Exp( b );
+ bSign = extractFloatx80Sign( b );
+ if ( aExp == 0x7FFF ) {
+ if ( (bits64) ( aSig0<<1 )
+ || ( ( bExp == 0x7FFF ) && (bits64) ( bSig<<1 ) ) ) {
+ return propagateFloatx80NaN( a, b );
+ }
+ goto invalid;
+ }
+ if ( bExp == 0x7FFF ) {
+ if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b );
+ return a;
+ }
+ if ( bExp == 0 ) {
+ if ( bSig == 0 ) {
+ invalid:
+ float_raise( float_flag_invalid );
+ z.low = floatx80_default_nan_low;
+ z.high = floatx80_default_nan_high;
+ return z;
+ }
+ normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
+ }
+ if ( aExp == 0 ) {
+ if ( (bits64) ( aSig0<<1 ) == 0 ) return a;
+ normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 );
+ }
+ bSig |= LIT64( 0x8000000000000000 );
+ zSign = aSign;
+ expDiff = aExp - bExp;
+ aSig1 = 0;
+ if ( expDiff < 0 ) {
+ if ( expDiff < -1 ) return a;
+ shift128Right( aSig0, 0, 1, &aSig0, &aSig1 );
+ expDiff = 0;
+ }
+ q = ( bSig <= aSig0 );
+ if ( q ) aSig0 -= bSig;
+ expDiff -= 64;
+ while ( 0 < expDiff ) {
+ q = estimateDiv128To64( aSig0, aSig1, bSig );
+ q = ( 2 < q ) ? q - 2 : 0;
+ mul64To128( bSig, q, &term0, &term1 );
+ sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
+ shortShift128Left( aSig0, aSig1, 62, &aSig0, &aSig1 );
+ expDiff -= 62;
+ }
+ expDiff += 64;
+ if ( 0 < expDiff ) {
+ q = estimateDiv128To64( aSig0, aSig1, bSig );
+ q = ( 2 < q ) ? q - 2 : 0;
+ q >>= 64 - expDiff;
+ mul64To128( bSig, q<<( 64 - expDiff ), &term0, &term1 );
+ sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
+ shortShift128Left( 0, bSig, 64 - expDiff, &term0, &term1 );
+ while ( le128( term0, term1, aSig0, aSig1 ) ) {
+ ++q;
+ sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
+ }
+ }
+ else {
+ term1 = 0;
+ term0 = bSig;
+ }
+ sub128( term0, term1, aSig0, aSig1, &alternateASig0, &alternateASig1 );
+ if ( lt128( alternateASig0, alternateASig1, aSig0, aSig1 )
+ || ( eq128( alternateASig0, alternateASig1, aSig0, aSig1 )
+ && ( q & 1 ) )
+ ) {
+ aSig0 = alternateASig0;
+ aSig1 = alternateASig1;
+ zSign = ! zSign;
+ }
+ return
+ normalizeRoundAndPackFloatx80(
+ 80, zSign, bExp + expDiff, aSig0, aSig1 );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the square root of the extended double-precision floating-point
+| value `a'. The operation is performed according to the IEC/IEEE Standard
+| for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+floatx80 floatx80_sqrt( floatx80 a )
+{
+ flag aSign;
+ int32 aExp, zExp;
+ bits64 aSig0, aSig1, zSig0, zSig1, doubleZSig0;
+ bits64 rem0, rem1, rem2, rem3, term0, term1, term2, term3;
+ floatx80 z;
+
+ aSig0 = extractFloatx80Frac( a );
+ aExp = extractFloatx80Exp( a );
+ aSign = extractFloatx80Sign( a );
+ if ( aExp == 0x7FFF ) {
+ if ( (bits64) ( aSig0<<1 ) ) return propagateFloatx80NaN( a, a );
+ if ( ! aSign ) return a;
+ goto invalid;
+ }
+ if ( aSign ) {
+ if ( ( aExp | aSig0 ) == 0 ) return a;
+ invalid:
+ float_raise( float_flag_invalid );
+ z.low = floatx80_default_nan_low;
+ z.high = floatx80_default_nan_high;
+ return z;
+ }
+ if ( aExp == 0 ) {
+ if ( aSig0 == 0 ) return packFloatx80( 0, 0, 0 );
+ normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 );
+ }
+ zExp = ( ( aExp - 0x3FFF )>>1 ) + 0x3FFF;
+ zSig0 = estimateSqrt32( aExp, aSig0>>32 );
+ shift128Right( aSig0, 0, 2 + ( aExp & 1 ), &aSig0, &aSig1 );
+ zSig0 = estimateDiv128To64( aSig0, aSig1, zSig0<<32 ) + ( zSig0<<30 );
+ doubleZSig0 = zSig0<<1;
+ mul64To128( zSig0, zSig0, &term0, &term1 );
+ sub128( aSig0, aSig1, term0, term1, &rem0, &rem1 );
+ while ( (sbits64) rem0 < 0 ) {
+ --zSig0;
+ doubleZSig0 -= 2;
+ add128( rem0, rem1, zSig0>>63, doubleZSig0 | 1, &rem0, &rem1 );
+ }
+ zSig1 = estimateDiv128To64( rem1, 0, doubleZSig0 );
+ if ( ( zSig1 & LIT64( 0x3FFFFFFFFFFFFFFF ) ) <= 5 ) {
+ if ( zSig1 == 0 ) zSig1 = 1;
+ mul64To128( doubleZSig0, zSig1, &term1, &term2 );
+ sub128( rem1, 0, term1, term2, &rem1, &rem2 );
+ mul64To128( zSig1, zSig1, &term2, &term3 );
+ sub192( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 );
+ while ( (sbits64) rem1 < 0 ) {
+ --zSig1;
+ shortShift128Left( 0, zSig1, 1, &term2, &term3 );
+ term3 |= 1;
+ term2 |= doubleZSig0;
+ add192( rem1, rem2, rem3, 0, term2, term3, &rem1, &rem2, &rem3 );
+ }
+ zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
+ }
+ shortShift128Left( 0, zSig1, 1, &zSig0, &zSig1 );
+ zSig0 |= doubleZSig0;
+ return
+ roundAndPackFloatx80(
+ floatx80_rounding_precision, 0, zExp, zSig0, zSig1 );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the extended double-precision floating-point value `a' is
+| equal to the corresponding value `b', and 0 otherwise. The comparison is
+| performed according to the IEC/IEEE Standard for Binary Floating-Point
+| Arithmetic.
+*----------------------------------------------------------------------------*/
+
+flag floatx80_eq( floatx80 a, floatx80 b )
+{
+
+ if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
+ && (bits64) ( extractFloatx80Frac( a )<<1 ) )
+ || ( ( extractFloatx80Exp( b ) == 0x7FFF )
+ && (bits64) ( extractFloatx80Frac( b )<<1 ) )
+ ) {
+ if ( floatx80_is_signaling_nan( a )
+ || floatx80_is_signaling_nan( b ) ) {
+ float_raise( float_flag_invalid );
+ }
+ return 0;
+ }
+ return
+ ( a.low == b.low )
+ && ( ( a.high == b.high )
+ || ( ( a.low == 0 )
+ && ( (bits16) ( ( a.high | b.high )<<1 ) == 0 ) )
+ );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the extended double-precision floating-point value `a' is
+| less than or equal to the corresponding value `b', and 0 otherwise. The
+| comparison is performed according to the IEC/IEEE Standard for Binary
+| Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+flag floatx80_le( floatx80 a, floatx80 b )
+{
+ flag aSign, bSign;
+
+ if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
+ && (bits64) ( extractFloatx80Frac( a )<<1 ) )
+ || ( ( extractFloatx80Exp( b ) == 0x7FFF )
+ && (bits64) ( extractFloatx80Frac( b )<<1 ) )
+ ) {
+ float_raise( float_flag_invalid );
+ return 0;
+ }
+ aSign = extractFloatx80Sign( a );
+ bSign = extractFloatx80Sign( b );
+ if ( aSign != bSign ) {
+ return
+ aSign
+ || ( ( ( (bits16) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
+ == 0 );
+ }
+ return
+ aSign ? le128( b.high, b.low, a.high, a.low )
+ : le128( a.high, a.low, b.high, b.low );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the extended double-precision floating-point value `a' is
+| less than the corresponding value `b', and 0 otherwise. The comparison
+| is performed according to the IEC/IEEE Standard for Binary Floating-Point
+| Arithmetic.
+*----------------------------------------------------------------------------*/
+
+flag floatx80_lt( floatx80 a, floatx80 b )
+{
+ flag aSign, bSign;
+
+ if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
+ && (bits64) ( extractFloatx80Frac( a )<<1 ) )
+ || ( ( extractFloatx80Exp( b ) == 0x7FFF )
+ && (bits64) ( extractFloatx80Frac( b )<<1 ) )
+ ) {
+ float_raise( float_flag_invalid );
+ return 0;
+ }
+ aSign = extractFloatx80Sign( a );
+ bSign = extractFloatx80Sign( b );
+ if ( aSign != bSign ) {
+ return
+ aSign
+ && ( ( ( (bits16) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
+ != 0 );
+ }
+ return
+ aSign ? lt128( b.high, b.low, a.high, a.low )
+ : lt128( a.high, a.low, b.high, b.low );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the extended double-precision floating-point value `a' is equal
+| to the corresponding value `b', and 0 otherwise. The invalid exception is
+| raised if either operand is a NaN. Otherwise, the comparison is performed
+| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+flag floatx80_eq_signaling( floatx80 a, floatx80 b )
+{
+
+ if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
+ && (bits64) ( extractFloatx80Frac( a )<<1 ) )
+ || ( ( extractFloatx80Exp( b ) == 0x7FFF )
+ && (bits64) ( extractFloatx80Frac( b )<<1 ) )
+ ) {
+ float_raise( float_flag_invalid );
+ return 0;
+ }
+ return
+ ( a.low == b.low )
+ && ( ( a.high == b.high )
+ || ( ( a.low == 0 )
+ && ( (bits16) ( ( a.high | b.high )<<1 ) == 0 ) )
+ );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the extended double-precision floating-point value `a' is less
+| than or equal to the corresponding value `b', and 0 otherwise. Quiet NaNs
+| do not cause an exception. Otherwise, the comparison is performed according
+| to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+flag floatx80_le_quiet( floatx80 a, floatx80 b )
+{
+ flag aSign, bSign;
+
+ if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
+ && (bits64) ( extractFloatx80Frac( a )<<1 ) )
+ || ( ( extractFloatx80Exp( b ) == 0x7FFF )
+ && (bits64) ( extractFloatx80Frac( b )<<1 ) )
+ ) {
+ if ( floatx80_is_signaling_nan( a )
+ || floatx80_is_signaling_nan( b ) ) {
+ float_raise( float_flag_invalid );
+ }
+ return 0;
+ }
+ aSign = extractFloatx80Sign( a );
+ bSign = extractFloatx80Sign( b );
+ if ( aSign != bSign ) {
+ return
+ aSign
+ || ( ( ( (bits16) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
+ == 0 );
+ }
+ return
+ aSign ? le128( b.high, b.low, a.high, a.low )
+ : le128( a.high, a.low, b.high, b.low );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the extended double-precision floating-point value `a' is less
+| than the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause
+| an exception. Otherwise, the comparison is performed according to the
+| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+flag floatx80_lt_quiet( floatx80 a, floatx80 b )
+{
+ flag aSign, bSign;
+
+ if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
+ && (bits64) ( extractFloatx80Frac( a )<<1 ) )
+ || ( ( extractFloatx80Exp( b ) == 0x7FFF )
+ && (bits64) ( extractFloatx80Frac( b )<<1 ) )
+ ) {
+ if ( floatx80_is_signaling_nan( a )
+ || floatx80_is_signaling_nan( b ) ) {
+ float_raise( float_flag_invalid );
+ }
+ return 0;
+ }
+ aSign = extractFloatx80Sign( a );
+ bSign = extractFloatx80Sign( b );
+ if ( aSign != bSign ) {
+ return
+ aSign
+ && ( ( ( (bits16) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
+ != 0 );
+ }
+ return
+ aSign ? lt128( b.high, b.low, a.high, a.low )
+ : lt128( a.high, a.low, b.high, b.low );
+
+}
+
+#endif
+
+#ifdef FLOAT128
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the quadruple-precision floating-point
+| value `a' to the 32-bit two's complement integer format. The conversion
+| is performed according to the IEC/IEEE Standard for Binary Floating-Point
+| Arithmetic---which means in particular that the conversion is rounded
+| according to the current rounding mode. If `a' is a NaN, the largest
+| positive integer is returned. Otherwise, if the conversion overflows, the
+| largest integer with the same sign as `a' is returned.
+*----------------------------------------------------------------------------*/
+
+int32 float128_to_int32( float128 a )
+{
+ flag aSign;
+ int32 aExp, shiftCount;
+ bits64 aSig0, aSig1;
+
+ aSig1 = extractFloat128Frac1( a );
+ aSig0 = extractFloat128Frac0( a );
+ aExp = extractFloat128Exp( a );
+ aSign = extractFloat128Sign( a );
+ if ( ( aExp == 0x7FFF ) && ( aSig0 | aSig1 ) ) aSign = 0;
+ if ( aExp ) aSig0 |= LIT64( 0x0001000000000000 );
+ aSig0 |= ( aSig1 != 0 );
+ shiftCount = 0x4028 - aExp;
+ if ( 0 < shiftCount ) shift64RightJamming( aSig0, shiftCount, &aSig0 );
+ return roundAndPackInt32( aSign, aSig0 );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the quadruple-precision floating-point
+| value `a' to the 32-bit two's complement integer format. The conversion
+| is performed according to the IEC/IEEE Standard for Binary Floating-Point
+| Arithmetic, except that the conversion is always rounded toward zero. If
+| `a' is a NaN, the largest positive integer is returned. Otherwise, if the
+| conversion overflows, the largest integer with the same sign as `a' is
+| returned.
+*----------------------------------------------------------------------------*/
+
+int32 float128_to_int32_round_to_zero( float128 a )
+{
+ flag aSign;
+ int32 aExp, shiftCount;
+ bits64 aSig0, aSig1, savedASig;
+ int32 z;
+
+ aSig1 = extractFloat128Frac1( a );
+ aSig0 = extractFloat128Frac0( a );
+ aExp = extractFloat128Exp( a );
+ aSign = extractFloat128Sign( a );
+ aSig0 |= ( aSig1 != 0 );
+ if ( 0x401E < aExp ) {
+ if ( ( aExp == 0x7FFF ) && aSig0 ) aSign = 0;
+ goto invalid;
+ }
+ else if ( aExp < 0x3FFF ) {
+ if ( aExp || aSig0 ) float_exception_flags |= float_flag_inexact;
+ return 0;
+ }
+ aSig0 |= LIT64( 0x0001000000000000 );
+ shiftCount = 0x402F - aExp;
+ savedASig = aSig0;
+ aSig0 >>= shiftCount;
+ z = aSig0;
+ if ( aSign ) z = - z;
+ if ( ( z < 0 ) ^ aSign ) {
+ invalid:
+ float_raise( float_flag_invalid );
+ return aSign ? (sbits32) 0x80000000 : 0x7FFFFFFF;
+ }
+ if ( ( aSig0<<shiftCount ) != savedASig ) {
+ float_exception_flags |= float_flag_inexact;
+ }
+ return z;
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the quadruple-precision floating-point
+| value `a' to the 64-bit two's complement integer format. The conversion
+| is performed according to the IEC/IEEE Standard for Binary Floating-Point
+| Arithmetic---which means in particular that the conversion is rounded
+| according to the current rounding mode. If `a' is a NaN, the largest
+| positive integer is returned. Otherwise, if the conversion overflows, the
+| largest integer with the same sign as `a' is returned.
+*----------------------------------------------------------------------------*/
+
+int64 float128_to_int64( float128 a )
+{
+ flag aSign;
+ int32 aExp, shiftCount;
+ bits64 aSig0, aSig1;
+
+ aSig1 = extractFloat128Frac1( a );
+ aSig0 = extractFloat128Frac0( a );
+ aExp = extractFloat128Exp( a );
+ aSign = extractFloat128Sign( a );
+ if ( aExp ) aSig0 |= LIT64( 0x0001000000000000 );
+ shiftCount = 0x402F - aExp;
+ if ( shiftCount <= 0 ) {
+ if ( 0x403E < aExp ) {
+ float_raise( float_flag_invalid );
+ if ( ! aSign
+ || ( ( aExp == 0x7FFF )
+ && ( aSig1 || ( aSig0 != LIT64( 0x0001000000000000 ) ) )
+ )
+ ) {
+ return LIT64( 0x7FFFFFFFFFFFFFFF );
+ }
+ return (sbits64) LIT64( 0x8000000000000000 );
+ }
+ shortShift128Left( aSig0, aSig1, - shiftCount, &aSig0, &aSig1 );
+ }
+ else {
+ shift64ExtraRightJamming( aSig0, aSig1, shiftCount, &aSig0, &aSig1 );
+ }
+ return roundAndPackInt64( aSign, aSig0, aSig1 );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the quadruple-precision floating-point
+| value `a' to the 64-bit two's complement integer format. The conversion
+| is performed according to the IEC/IEEE Standard for Binary Floating-Point
+| Arithmetic, except that the conversion is always rounded toward zero.
+| If `a' is a NaN, the largest positive integer is returned. Otherwise, if
+| the conversion overflows, the largest integer with the same sign as `a' is
+| returned.
+*----------------------------------------------------------------------------*/
+
+int64 float128_to_int64_round_to_zero( float128 a )
+{
+ flag aSign;
+ int32 aExp, shiftCount;
+ bits64 aSig0, aSig1;
+ int64 z;
+
+ aSig1 = extractFloat128Frac1( a );
+ aSig0 = extractFloat128Frac0( a );
+ aExp = extractFloat128Exp( a );
+ aSign = extractFloat128Sign( a );
+ if ( aExp ) aSig0 |= LIT64( 0x0001000000000000 );
+ shiftCount = aExp - 0x402F;
+ if ( 0 < shiftCount ) {
+ if ( 0x403E <= aExp ) {
+ aSig0 &= LIT64( 0x0000FFFFFFFFFFFF );
+ if ( ( a.high == LIT64( 0xC03E000000000000 ) )
+ && ( aSig1 < LIT64( 0x0002000000000000 ) ) ) {
+ if ( aSig1 ) float_exception_flags |= float_flag_inexact;
+ }
+ else {
+ float_raise( float_flag_invalid );
+ if ( ! aSign || ( ( aExp == 0x7FFF ) && ( aSig0 | aSig1 ) ) ) {
+ return LIT64( 0x7FFFFFFFFFFFFFFF );
+ }
+ }
+ return (sbits64) LIT64( 0x8000000000000000 );
+ }
+ z = ( aSig0<<shiftCount ) | ( aSig1>>( ( - shiftCount ) & 63 ) );
+ if ( (bits64) ( aSig1<<shiftCount ) ) {
+ float_exception_flags |= float_flag_inexact;
+ }
+ }
+ else {
+ if ( aExp < 0x3FFF ) {
+ if ( aExp | aSig0 | aSig1 ) {
+ float_exception_flags |= float_flag_inexact;
+ }
+ return 0;
+ }
+ z = aSig0>>( - shiftCount );
+ if ( aSig1
+ || ( shiftCount && (bits64) ( aSig0<<( shiftCount & 63 ) ) ) ) {
+ float_exception_flags |= float_flag_inexact;
+ }
+ }
+ if ( aSign ) z = - z;
+ return z;
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the quadruple-precision floating-point
+| value `a' to the single-precision floating-point format. The conversion
+| is performed according to the IEC/IEEE Standard for Binary Floating-Point
+| Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float32 float128_to_float32( float128 a )
+{
+ flag aSign;
+ int32 aExp;
+ bits64 aSig0, aSig1;
+ bits32 zSig;
+
+ aSig1 = extractFloat128Frac1( a );
+ aSig0 = extractFloat128Frac0( a );
+ aExp = extractFloat128Exp( a );
+ aSign = extractFloat128Sign( a );
+ if ( aExp == 0x7FFF ) {
+ if ( aSig0 | aSig1 ) {
+ return commonNaNToFloat32( float128ToCommonNaN( a ) );
+ }
+ return packFloat32( aSign, 0xFF, 0 );
+ }
+ aSig0 |= ( aSig1 != 0 );
+ shift64RightJamming( aSig0, 18, &aSig0 );
+ zSig = aSig0;
+ if ( aExp || zSig ) {
+ zSig |= 0x40000000;
+ aExp -= 0x3F81;
+ }
+ return roundAndPackFloat32( aSign, aExp, zSig );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the quadruple-precision floating-point
+| value `a' to the double-precision floating-point format. The conversion
+| is performed according to the IEC/IEEE Standard for Binary Floating-Point
+| Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float64 float128_to_float64( float128 a )
+{
+ flag aSign;
+ int32 aExp;
+ bits64 aSig0, aSig1;
+
+ aSig1 = extractFloat128Frac1( a );
+ aSig0 = extractFloat128Frac0( a );
+ aExp = extractFloat128Exp( a );
+ aSign = extractFloat128Sign( a );
+ if ( aExp == 0x7FFF ) {
+ if ( aSig0 | aSig1 ) {
+ return commonNaNToFloat64( float128ToCommonNaN( a ) );
+ }
+ return packFloat64( aSign, 0x7FF, 0 );
+ }
+ shortShift128Left( aSig0, aSig1, 14, &aSig0, &aSig1 );
+ aSig0 |= ( aSig1 != 0 );
+ if ( aExp || aSig0 ) {
+ aSig0 |= LIT64( 0x4000000000000000 );
+ aExp -= 0x3C01;
+ }
+ return roundAndPackFloat64( aSign, aExp, aSig0 );
+
+}
+
+#ifdef FLOATX80
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the quadruple-precision floating-point
+| value `a' to the extended double-precision floating-point format. The
+| conversion is performed according to the IEC/IEEE Standard for Binary
+| Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+floatx80 float128_to_floatx80( float128 a )
+{
+ flag aSign;
+ int32 aExp;
+ bits64 aSig0, aSig1;
+
+ aSig1 = extractFloat128Frac1( a );
+ aSig0 = extractFloat128Frac0( a );
+ aExp = extractFloat128Exp( a );
+ aSign = extractFloat128Sign( a );
+ if ( aExp == 0x7FFF ) {
+ if ( aSig0 | aSig1 ) {
+ return commonNaNToFloatx80( float128ToCommonNaN( a ) );
+ }
+ return packFloatx80( aSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
+ }
+ if ( aExp == 0 ) {
+ if ( ( aSig0 | aSig1 ) == 0 ) return packFloatx80( aSign, 0, 0 );
+ normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
+ }
+ else {
+ aSig0 |= LIT64( 0x0001000000000000 );
+ }
+ shortShift128Left( aSig0, aSig1, 15, &aSig0, &aSig1 );
+ return roundAndPackFloatx80( 80, aSign, aExp, aSig0, aSig1 );
+
+}
+
+#endif
+
+/*----------------------------------------------------------------------------
+| Rounds the quadruple-precision floating-point value `a' to an integer, and
+| returns the result as a quadruple-precision floating-point value. The
+| operation is performed according to the IEC/IEEE Standard for Binary
+| Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float128 float128_round_to_int( float128 a )
+{
+ flag aSign;
+ int32 aExp;
+ bits64 lastBitMask, roundBitsMask;
+ int8 roundingMode;
+ float128 z;
+
+ aExp = extractFloat128Exp( a );
+ if ( 0x402F <= aExp ) {
+ if ( 0x406F <= aExp ) {
+ if ( ( aExp == 0x7FFF )
+ && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) )
+ ) {
+ return propagateFloat128NaN( a, a );
+ }
+ return a;
+ }
+ lastBitMask = 1;
+ lastBitMask = ( lastBitMask<<( 0x406E - aExp ) )<<1;
+ roundBitsMask = lastBitMask - 1;
+ z = a;
+ roundingMode = float_rounding_mode;
+ if ( roundingMode == float_round_nearest_even ) {
+ if ( lastBitMask ) {
+ add128( z.high, z.low, 0, lastBitMask>>1, &z.high, &z.low );
+ if ( ( z.low & roundBitsMask ) == 0 ) z.low &= ~ lastBitMask;
+ }
+ else {
+ if ( (sbits64) z.low < 0 ) {
+ ++z.high;
+ if ( (bits64) ( z.low<<1 ) == 0 ) z.high &= ~1;
+ }
+ }
+ }
+ else if ( roundingMode != float_round_to_zero ) {
+ if ( extractFloat128Sign( z )
+ ^ ( roundingMode == float_round_up ) ) {
+ add128( z.high, z.low, 0, roundBitsMask, &z.high, &z.low );
+ }
+ }
+ z.low &= ~ roundBitsMask;
+ }
+ else {
+ if ( aExp < 0x3FFF ) {
+ if ( ( ( (bits64) ( a.high<<1 ) ) | a.low ) == 0 ) return a;
+ float_exception_flags |= float_flag_inexact;
+ aSign = extractFloat128Sign( a );
+ switch ( float_rounding_mode ) {
+ case float_round_nearest_even:
+ if ( ( aExp == 0x3FFE )
+ && ( extractFloat128Frac0( a )
+ | extractFloat128Frac1( a ) )
+ ) {
+ return packFloat128( aSign, 0x3FFF, 0, 0 );
+ }
+ break;
+ case float_round_down:
+ return
+ aSign ? packFloat128( 1, 0x3FFF, 0, 0 )
+ : packFloat128( 0, 0, 0, 0 );
+ case float_round_up:
+ return
+ aSign ? packFloat128( 1, 0, 0, 0 )
+ : packFloat128( 0, 0x3FFF, 0, 0 );
+ }
+ return packFloat128( aSign, 0, 0, 0 );
+ }
+ lastBitMask = 1;
+ lastBitMask <<= 0x402F - aExp;
+ roundBitsMask = lastBitMask - 1;
+ z.low = 0;
+ z.high = a.high;
+ roundingMode = float_rounding_mode;
+ if ( roundingMode == float_round_nearest_even ) {
+ z.high += lastBitMask>>1;
+ if ( ( ( z.high & roundBitsMask ) | a.low ) == 0 ) {
+ z.high &= ~ lastBitMask;
+ }
+ }
+ else if ( roundingMode != float_round_to_zero ) {
+ if ( extractFloat128Sign( z )
+ ^ ( roundingMode == float_round_up ) ) {
+ z.high |= ( a.low != 0 );
+ z.high += roundBitsMask;
+ }
+ }
+ z.high &= ~ roundBitsMask;
+ }
+ if ( ( z.low != a.low ) || ( z.high != a.high ) ) {
+ float_exception_flags |= float_flag_inexact;
+ }
+ return z;
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of adding the absolute values of the quadruple-precision
+| floating-point values `a' and `b'. If `zSign' is 1, the sum is negated
+| before being returned. `zSign' is ignored if the result is a NaN.
+| The addition is performed according to the IEC/IEEE Standard for Binary
+| Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+static float128 addFloat128Sigs( float128 a, float128 b, flag zSign )
+{
+ int32 aExp, bExp, zExp;
+ bits64 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2;
+ int32 expDiff;
+
+ aSig1 = extractFloat128Frac1( a );
+ aSig0 = extractFloat128Frac0( a );
+ aExp = extractFloat128Exp( a );
+ bSig1 = extractFloat128Frac1( b );
+ bSig0 = extractFloat128Frac0( b );
+ bExp = extractFloat128Exp( b );
+ expDiff = aExp - bExp;
+ if ( 0 < expDiff ) {
+ if ( aExp == 0x7FFF ) {
+ if ( aSig0 | aSig1 ) return propagateFloat128NaN( a, b );
+ return a;
+ }
+ if ( bExp == 0 ) {
+ --expDiff;
+ }
+ else {
+ bSig0 |= LIT64( 0x0001000000000000 );
+ }
+ shift128ExtraRightJamming(
+ bSig0, bSig1, 0, expDiff, &bSig0, &bSig1, &zSig2 );
+ zExp = aExp;
+ }
+ else if ( expDiff < 0 ) {
+ if ( bExp == 0x7FFF ) {
+ if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b );
+ return packFloat128( zSign, 0x7FFF, 0, 0 );
+ }
+ if ( aExp == 0 ) {
+ ++expDiff;
+ }
+ else {
+ aSig0 |= LIT64( 0x0001000000000000 );
+ }
+ shift128ExtraRightJamming(
+ aSig0, aSig1, 0, - expDiff, &aSig0, &aSig1, &zSig2 );
+ zExp = bExp;
+ }
+ else {
+ if ( aExp == 0x7FFF ) {
+ if ( aSig0 | aSig1 | bSig0 | bSig1 ) {
+ return propagateFloat128NaN( a, b );
+ }
+ return a;
+ }
+ add128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
+ if ( aExp == 0 ) return packFloat128( zSign, 0, zSig0, zSig1 );
+ zSig2 = 0;
+ zSig0 |= LIT64( 0x0002000000000000 );
+ zExp = aExp;
+ goto shiftRight1;
+ }
+ aSig0 |= LIT64( 0x0001000000000000 );
+ add128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
+ --zExp;
+ if ( zSig0 < LIT64( 0x0002000000000000 ) ) goto roundAndPack;
+ ++zExp;
+ shiftRight1:
+ shift128ExtraRightJamming(
+ zSig0, zSig1, zSig2, 1, &zSig0, &zSig1, &zSig2 );
+ roundAndPack:
+ return roundAndPackFloat128( zSign, zExp, zSig0, zSig1, zSig2 );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of subtracting the absolute values of the quadruple-
+| precision floating-point values `a' and `b'. If `zSign' is 1, the
+| difference is negated before being returned. `zSign' is ignored if the
+| result is a NaN. The subtraction is performed according to the IEC/IEEE
+| Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+static float128 subFloat128Sigs( float128 a, float128 b, flag zSign )
+{
+ int32 aExp, bExp, zExp;
+ bits64 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1;
+ int32 expDiff;
+ float128 z;
+
+ aSig1 = extractFloat128Frac1( a );
+ aSig0 = extractFloat128Frac0( a );
+ aExp = extractFloat128Exp( a );
+ bSig1 = extractFloat128Frac1( b );
+ bSig0 = extractFloat128Frac0( b );
+ bExp = extractFloat128Exp( b );
+ expDiff = aExp - bExp;
+ shortShift128Left( aSig0, aSig1, 14, &aSig0, &aSig1 );
+ shortShift128Left( bSig0, bSig1, 14, &bSig0, &bSig1 );
+ if ( 0 < expDiff ) goto aExpBigger;
+ if ( expDiff < 0 ) goto bExpBigger;
+ if ( aExp == 0x7FFF ) {
+ if ( aSig0 | aSig1 | bSig0 | bSig1 ) {
+ return propagateFloat128NaN( a, b );
+ }
+ float_raise( float_flag_invalid );
+ z.low = float128_default_nan_low;
+ z.high = float128_default_nan_high;
+ return z;
+ }
+ if ( aExp == 0 ) {
+ aExp = 1;
+ bExp = 1;
+ }
+ if ( bSig0 < aSig0 ) goto aBigger;
+ if ( aSig0 < bSig0 ) goto bBigger;
+ if ( bSig1 < aSig1 ) goto aBigger;
+ if ( aSig1 < bSig1 ) goto bBigger;
+ return packFloat128( float_rounding_mode == float_round_down, 0, 0, 0 );
+ bExpBigger:
+ if ( bExp == 0x7FFF ) {
+ if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b );
+ return packFloat128( zSign ^ 1, 0x7FFF, 0, 0 );
+ }
+ if ( aExp == 0 ) {
+ ++expDiff;
+ }
+ else {
+ aSig0 |= LIT64( 0x4000000000000000 );
+ }
+ shift128RightJamming( aSig0, aSig1, - expDiff, &aSig0, &aSig1 );
+ bSig0 |= LIT64( 0x4000000000000000 );
+ bBigger:
+ sub128( bSig0, bSig1, aSig0, aSig1, &zSig0, &zSig1 );
+ zExp = bExp;
+ zSign ^= 1;
+ goto normalizeRoundAndPack;
+ aExpBigger:
+ if ( aExp == 0x7FFF ) {
+ if ( aSig0 | aSig1 ) return propagateFloat128NaN( a, b );
+ return a;
+ }
+ if ( bExp == 0 ) {
+ --expDiff;
+ }
+ else {
+ bSig0 |= LIT64( 0x4000000000000000 );
+ }
+ shift128RightJamming( bSig0, bSig1, expDiff, &bSig0, &bSig1 );
+ aSig0 |= LIT64( 0x4000000000000000 );
+ aBigger:
+ sub128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
+ zExp = aExp;
+ normalizeRoundAndPack:
+ --zExp;
+ return normalizeRoundAndPackFloat128( zSign, zExp - 14, zSig0, zSig1 );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of adding the quadruple-precision floating-point values
+| `a' and `b'. The operation is performed according to the IEC/IEEE Standard
+| for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float128 float128_add( float128 a, float128 b )
+{
+ flag aSign, bSign;
+
+ aSign = extractFloat128Sign( a );
+ bSign = extractFloat128Sign( b );
+ if ( aSign == bSign ) {
+ return addFloat128Sigs( a, b, aSign );
+ }
+ else {
+ return subFloat128Sigs( a, b, aSign );
+ }
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of subtracting the quadruple-precision floating-point
+| values `a' and `b'. The operation is performed according to the IEC/IEEE
+| Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float128 float128_sub( float128 a, float128 b )
+{
+ flag aSign, bSign;
+
+ aSign = extractFloat128Sign( a );
+ bSign = extractFloat128Sign( b );
+ if ( aSign == bSign ) {
+ return subFloat128Sigs( a, b, aSign );
+ }
+ else {
+ return addFloat128Sigs( a, b, aSign );
+ }
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of multiplying the quadruple-precision floating-point
+| values `a' and `b'. The operation is performed according to the IEC/IEEE
+| Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float128 float128_mul( float128 a, float128 b )
+{
+ flag aSign, bSign, zSign;
+ int32 aExp, bExp, zExp;
+ bits64 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2, zSig3;
+ float128 z;
+
+ aSig1 = extractFloat128Frac1( a );
+ aSig0 = extractFloat128Frac0( a );
+ aExp = extractFloat128Exp( a );
+ aSign = extractFloat128Sign( a );
+ bSig1 = extractFloat128Frac1( b );
+ bSig0 = extractFloat128Frac0( b );
+ bExp = extractFloat128Exp( b );
+ bSign = extractFloat128Sign( b );
+ zSign = aSign ^ bSign;
+ if ( aExp == 0x7FFF ) {
+ if ( ( aSig0 | aSig1 )
+ || ( ( bExp == 0x7FFF ) && ( bSig0 | bSig1 ) ) ) {
+ return propagateFloat128NaN( a, b );
+ }
+ if ( ( bExp | bSig0 | bSig1 ) == 0 ) goto invalid;
+ return packFloat128( zSign, 0x7FFF, 0, 0 );
+ }
+ if ( bExp == 0x7FFF ) {
+ if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b );
+ if ( ( aExp | aSig0 | aSig1 ) == 0 ) {
+ invalid:
+ float_raise( float_flag_invalid );
+ z.low = float128_default_nan_low;
+ z.high = float128_default_nan_high;
+ return z;
+ }
+ return packFloat128( zSign, 0x7FFF, 0, 0 );
+ }
+ if ( aExp == 0 ) {
+ if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 );
+ normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
+ }
+ if ( bExp == 0 ) {
+ if ( ( bSig0 | bSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 );
+ normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
+ }
+ zExp = aExp + bExp - 0x4000;
+ aSig0 |= LIT64( 0x0001000000000000 );
+ shortShift128Left( bSig0, bSig1, 16, &bSig0, &bSig1 );
+ mul128To256( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1, &zSig2, &zSig3 );
+ add128( zSig0, zSig1, aSig0, aSig1, &zSig0, &zSig1 );
+ zSig2 |= ( zSig3 != 0 );
+ if ( LIT64( 0x0002000000000000 ) <= zSig0 ) {
+ shift128ExtraRightJamming(
+ zSig0, zSig1, zSig2, 1, &zSig0, &zSig1, &zSig2 );
+ ++zExp;
+ }
+ return roundAndPackFloat128( zSign, zExp, zSig0, zSig1, zSig2 );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of dividing the quadruple-precision floating-point value
+| `a' by the corresponding value `b'. The operation is performed according to
+| the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float128 float128_div( float128 a, float128 b )
+{
+ flag aSign, bSign, zSign;
+ int32 aExp, bExp, zExp;
+ bits64 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2;
+ bits64 rem0, rem1, rem2, rem3, term0, term1, term2, term3;
+ float128 z;
+
+ aSig1 = extractFloat128Frac1( a );
+ aSig0 = extractFloat128Frac0( a );
+ aExp = extractFloat128Exp( a );
+ aSign = extractFloat128Sign( a );
+ bSig1 = extractFloat128Frac1( b );
+ bSig0 = extractFloat128Frac0( b );
+ bExp = extractFloat128Exp( b );
+ bSign = extractFloat128Sign( b );
+ zSign = aSign ^ bSign;
+ if ( aExp == 0x7FFF ) {
+ if ( aSig0 | aSig1 ) return propagateFloat128NaN( a, b );
+ if ( bExp == 0x7FFF ) {
+ if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b );
+ goto invalid;
+ }
+ return packFloat128( zSign, 0x7FFF, 0, 0 );
+ }
+ if ( bExp == 0x7FFF ) {
+ if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b );
+ return packFloat128( zSign, 0, 0, 0 );
+ }
+ if ( bExp == 0 ) {
+ if ( ( bSig0 | bSig1 ) == 0 ) {
+ if ( ( aExp | aSig0 | aSig1 ) == 0 ) {
+ invalid:
+ float_raise( float_flag_invalid );
+ z.low = float128_default_nan_low;
+ z.high = float128_default_nan_high;
+ return z;
+ }
+ float_raise( float_flag_divbyzero );
+ return packFloat128( zSign, 0x7FFF, 0, 0 );
+ }
+ normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
+ }
+ if ( aExp == 0 ) {
+ if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 );
+ normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
+ }
+ zExp = aExp - bExp + 0x3FFD;
+ shortShift128Left(
+ aSig0 | LIT64( 0x0001000000000000 ), aSig1, 15, &aSig0, &aSig1 );
+ shortShift128Left(
+ bSig0 | LIT64( 0x0001000000000000 ), bSig1, 15, &bSig0, &bSig1 );
+ if ( le128( bSig0, bSig1, aSig0, aSig1 ) ) {
+ shift128Right( aSig0, aSig1, 1, &aSig0, &aSig1 );
+ ++zExp;
+ }
+ zSig0 = estimateDiv128To64( aSig0, aSig1, bSig0 );
+ mul128By64To192( bSig0, bSig1, zSig0, &term0, &term1, &term2 );
+ sub192( aSig0, aSig1, 0, term0, term1, term2, &rem0, &rem1, &rem2 );
+ while ( (sbits64) rem0 < 0 ) {
+ --zSig0;
+ add192( rem0, rem1, rem2, 0, bSig0, bSig1, &rem0, &rem1, &rem2 );
+ }
+ zSig1 = estimateDiv128To64( rem1, rem2, bSig0 );
+ if ( ( zSig1 & 0x3FFF ) <= 4 ) {
+ mul128By64To192( bSig0, bSig1, zSig1, &term1, &term2, &term3 );
+ sub192( rem1, rem2, 0, term1, term2, term3, &rem1, &rem2, &rem3 );
+ while ( (sbits64) rem1 < 0 ) {
+ --zSig1;
+ add192( rem1, rem2, rem3, 0, bSig0, bSig1, &rem1, &rem2, &rem3 );
+ }
+ zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
+ }
+ shift128ExtraRightJamming( zSig0, zSig1, 0, 15, &zSig0, &zSig1, &zSig2 );
+ return roundAndPackFloat128( zSign, zExp, zSig0, zSig1, zSig2 );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the remainder of the quadruple-precision floating-point value `a'
+| with respect to the corresponding value `b'. The operation is performed
+| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float128 float128_rem( float128 a, float128 b )
+{
+ flag aSign, bSign, zSign;
+ int32 aExp, bExp, expDiff;
+ bits64 aSig0, aSig1, bSig0, bSig1, q, term0, term1, term2;
+ bits64 allZero, alternateASig0, alternateASig1, sigMean1;
+ sbits64 sigMean0;
+ float128 z;
+
+ aSig1 = extractFloat128Frac1( a );
+ aSig0 = extractFloat128Frac0( a );
+ aExp = extractFloat128Exp( a );
+ aSign = extractFloat128Sign( a );
+ bSig1 = extractFloat128Frac1( b );
+ bSig0 = extractFloat128Frac0( b );
+ bExp = extractFloat128Exp( b );
+ bSign = extractFloat128Sign( b );
+ if ( aExp == 0x7FFF ) {
+ if ( ( aSig0 | aSig1 )
+ || ( ( bExp == 0x7FFF ) && ( bSig0 | bSig1 ) ) ) {
+ return propagateFloat128NaN( a, b );
+ }
+ goto invalid;
+ }
+ if ( bExp == 0x7FFF ) {
+ if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b );
+ return a;
+ }
+ if ( bExp == 0 ) {
+ if ( ( bSig0 | bSig1 ) == 0 ) {
+ invalid:
+ float_raise( float_flag_invalid );
+ z.low = float128_default_nan_low;
+ z.high = float128_default_nan_high;
+ return z;
+ }
+ normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
+ }
+ if ( aExp == 0 ) {
+ if ( ( aSig0 | aSig1 ) == 0 ) return a;
+ normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
+ }
+ expDiff = aExp - bExp;
+ if ( expDiff < -1 ) return a;
+ shortShift128Left(
+ aSig0 | LIT64( 0x0001000000000000 ),
+ aSig1,
+ 15 - ( expDiff < 0 ),
+ &aSig0,
+ &aSig1
+ );
+ shortShift128Left(
+ bSig0 | LIT64( 0x0001000000000000 ), bSig1, 15, &bSig0, &bSig1 );
+ q = le128( bSig0, bSig1, aSig0, aSig1 );
+ if ( q ) sub128( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 );
+ expDiff -= 64;
+ while ( 0 < expDiff ) {
+ q = estimateDiv128To64( aSig0, aSig1, bSig0 );
+ q = ( 4 < q ) ? q - 4 : 0;
+ mul128By64To192( bSig0, bSig1, q, &term0, &term1, &term2 );
+ shortShift192Left( term0, term1, term2, 61, &term1, &term2, &allZero );
+ shortShift128Left( aSig0, aSig1, 61, &aSig0, &allZero );
+ sub128( aSig0, 0, term1, term2, &aSig0, &aSig1 );
+ expDiff -= 61;
+ }
+ if ( -64 < expDiff ) {
+ q = estimateDiv128To64( aSig0, aSig1, bSig0 );
+ q = ( 4 < q ) ? q - 4 : 0;
+ q >>= - expDiff;
+ shift128Right( bSig0, bSig1, 12, &bSig0, &bSig1 );
+ expDiff += 52;
+ if ( expDiff < 0 ) {
+ shift128Right( aSig0, aSig1, - expDiff, &aSig0, &aSig1 );
+ }
+ else {
+ shortShift128Left( aSig0, aSig1, expDiff, &aSig0, &aSig1 );
+ }
+ mul128By64To192( bSig0, bSig1, q, &term0, &term1, &term2 );
+ sub128( aSig0, aSig1, term1, term2, &aSig0, &aSig1 );
+ }
+ else {
+ shift128Right( aSig0, aSig1, 12, &aSig0, &aSig1 );
+ shift128Right( bSig0, bSig1, 12, &bSig0, &bSig1 );
+ }
+ do {
+ alternateASig0 = aSig0;
+ alternateASig1 = aSig1;
+ ++q;
+ sub128( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 );
+ } while ( 0 <= (sbits64) aSig0 );
+ add128(
+ aSig0, aSig1, alternateASig0, alternateASig1, &sigMean0, &sigMean1 );
+ if ( ( sigMean0 < 0 )
+ || ( ( ( sigMean0 | sigMean1 ) == 0 ) && ( q & 1 ) ) ) {
+ aSig0 = alternateASig0;
+ aSig1 = alternateASig1;
+ }
+ zSign = ( (sbits64) aSig0 < 0 );
+ if ( zSign ) sub128( 0, 0, aSig0, aSig1, &aSig0, &aSig1 );
+ return
+ normalizeRoundAndPackFloat128( aSign ^ zSign, bExp - 4, aSig0, aSig1 );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the square root of the quadruple-precision floating-point value `a'.
+| The operation is performed according to the IEC/IEEE Standard for Binary
+| Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float128 float128_sqrt( float128 a )
+{
+ flag aSign;
+ int32 aExp, zExp;
+ bits64 aSig0, aSig1, zSig0, zSig1, zSig2, doubleZSig0;
+ bits64 rem0, rem1, rem2, rem3, term0, term1, term2, term3;
+ float128 z;
+
+ aSig1 = extractFloat128Frac1( a );
+ aSig0 = extractFloat128Frac0( a );
+ aExp = extractFloat128Exp( a );
+ aSign = extractFloat128Sign( a );
+ if ( aExp == 0x7FFF ) {
+ if ( aSig0 | aSig1 ) return propagateFloat128NaN( a, a );
+ if ( ! aSign ) return a;
+ goto invalid;
+ }
+ if ( aSign ) {
+ if ( ( aExp | aSig0 | aSig1 ) == 0 ) return a;
+ invalid:
+ float_raise( float_flag_invalid );
+ z.low = float128_default_nan_low;
+ z.high = float128_default_nan_high;
+ return z;
+ }
+ if ( aExp == 0 ) {
+ if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( 0, 0, 0, 0 );
+ normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
+ }
+ zExp = ( ( aExp - 0x3FFF )>>1 ) + 0x3FFE;
+ aSig0 |= LIT64( 0x0001000000000000 );
+ zSig0 = estimateSqrt32( aExp, aSig0>>17 );
+ shortShift128Left( aSig0, aSig1, 13 - ( aExp & 1 ), &aSig0, &aSig1 );
+ zSig0 = estimateDiv128To64( aSig0, aSig1, zSig0<<32 ) + ( zSig0<<30 );
+ doubleZSig0 = zSig0<<1;
+ mul64To128( zSig0, zSig0, &term0, &term1 );
+ sub128( aSig0, aSig1, term0, term1, &rem0, &rem1 );
+ while ( (sbits64) rem0 < 0 ) {
+ --zSig0;
+ doubleZSig0 -= 2;
+ add128( rem0, rem1, zSig0>>63, doubleZSig0 | 1, &rem0, &rem1 );
+ }
+ zSig1 = estimateDiv128To64( rem1, 0, doubleZSig0 );
+ if ( ( zSig1 & 0x1FFF ) <= 5 ) {
+ if ( zSig1 == 0 ) zSig1 = 1;
+ mul64To128( doubleZSig0, zSig1, &term1, &term2 );
+ sub128( rem1, 0, term1, term2, &rem1, &rem2 );
+ mul64To128( zSig1, zSig1, &term2, &term3 );
+ sub192( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 );
+ while ( (sbits64) rem1 < 0 ) {
+ --zSig1;
+ shortShift128Left( 0, zSig1, 1, &term2, &term3 );
+ term3 |= 1;
+ term2 |= doubleZSig0;
+ add192( rem1, rem2, rem3, 0, term2, term3, &rem1, &rem2, &rem3 );
+ }
+ zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
+ }
+ shift128ExtraRightJamming( zSig0, zSig1, 0, 14, &zSig0, &zSig1, &zSig2 );
+ return roundAndPackFloat128( 0, zExp, zSig0, zSig1, zSig2 );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the quadruple-precision floating-point value `a' is equal to
+| the corresponding value `b', and 0 otherwise. The comparison is performed
+| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+flag float128_eq( float128 a, float128 b )
+{
+
+ if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
+ && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
+ || ( ( extractFloat128Exp( b ) == 0x7FFF )
+ && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
+ ) {
+ if ( float128_is_signaling_nan( a )
+ || float128_is_signaling_nan( b ) ) {
+ float_raise( float_flag_invalid );
+ }
+ return 0;
+ }
+ return
+ ( a.low == b.low )
+ && ( ( a.high == b.high )
+ || ( ( a.low == 0 )
+ && ( (bits64) ( ( a.high | b.high )<<1 ) == 0 ) )
+ );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the quadruple-precision floating-point value `a' is less than
+| or equal to the corresponding value `b', and 0 otherwise. The comparison
+| is performed according to the IEC/IEEE Standard for Binary Floating-Point
+| Arithmetic.
+*----------------------------------------------------------------------------*/
+
+flag float128_le( float128 a, float128 b )
+{
+ flag aSign, bSign;
+
+ if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
+ && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
+ || ( ( extractFloat128Exp( b ) == 0x7FFF )
+ && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
+ ) {
+ float_raise( float_flag_invalid );
+ return 0;
+ }
+ aSign = extractFloat128Sign( a );
+ bSign = extractFloat128Sign( b );
+ if ( aSign != bSign ) {
+ return
+ aSign
+ || ( ( ( (bits64) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
+ == 0 );
+ }
+ return
+ aSign ? le128( b.high, b.low, a.high, a.low )
+ : le128( a.high, a.low, b.high, b.low );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the quadruple-precision floating-point value `a' is less than
+| the corresponding value `b', and 0 otherwise. The comparison is performed
+| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+flag float128_lt( float128 a, float128 b )
+{
+ flag aSign, bSign;
+
+ if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
+ && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
+ || ( ( extractFloat128Exp( b ) == 0x7FFF )
+ && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
+ ) {
+ float_raise( float_flag_invalid );
+ return 0;
+ }
+ aSign = extractFloat128Sign( a );
+ bSign = extractFloat128Sign( b );
+ if ( aSign != bSign ) {
+ return
+ aSign
+ && ( ( ( (bits64) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
+ != 0 );
+ }
+ return
+ aSign ? lt128( b.high, b.low, a.high, a.low )
+ : lt128( a.high, a.low, b.high, b.low );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the quadruple-precision floating-point value `a' is equal to
+| the corresponding value `b', and 0 otherwise. The invalid exception is
+| raised if either operand is a NaN. Otherwise, the comparison is performed
+| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+flag float128_eq_signaling( float128 a, float128 b )
+{
+
+ if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
+ && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
+ || ( ( extractFloat128Exp( b ) == 0x7FFF )
+ && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
+ ) {
+ float_raise( float_flag_invalid );
+ return 0;
+ }
+ return
+ ( a.low == b.low )
+ && ( ( a.high == b.high )
+ || ( ( a.low == 0 )
+ && ( (bits64) ( ( a.high | b.high )<<1 ) == 0 ) )
+ );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the quadruple-precision floating-point value `a' is less than
+| or equal to the corresponding value `b', and 0 otherwise. Quiet NaNs do not
+| cause an exception. Otherwise, the comparison is performed according to the
+| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+flag float128_le_quiet( float128 a, float128 b )
+{
+ flag aSign, bSign;
+
+ if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
+ && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
+ || ( ( extractFloat128Exp( b ) == 0x7FFF )
+ && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
+ ) {
+ if ( float128_is_signaling_nan( a )
+ || float128_is_signaling_nan( b ) ) {
+ float_raise( float_flag_invalid );
+ }
+ return 0;
+ }
+ aSign = extractFloat128Sign( a );
+ bSign = extractFloat128Sign( b );
+ if ( aSign != bSign ) {
+ return
+ aSign
+ || ( ( ( (bits64) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
+ == 0 );
+ }
+ return
+ aSign ? le128( b.high, b.low, a.high, a.low )
+ : le128( a.high, a.low, b.high, b.low );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the quadruple-precision floating-point value `a' is less than
+| the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an
+| exception. Otherwise, the comparison is performed according to the IEC/IEEE
+| Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+flag float128_lt_quiet( float128 a, float128 b )
+{
+ flag aSign, bSign;
+
+ if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
+ && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
+ || ( ( extractFloat128Exp( b ) == 0x7FFF )
+ && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
+ ) {
+ if ( float128_is_signaling_nan( a )
+ || float128_is_signaling_nan( b ) ) {
+ float_raise( float_flag_invalid );
+ }
+ return 0;
+ }
+ aSign = extractFloat128Sign( a );
+ bSign = extractFloat128Sign( b );
+ if ( aSign != bSign ) {
+ return
+ aSign
+ && ( ( ( (bits64) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
+ != 0 );
+ }
+ return
+ aSign ? lt128( b.high, b.low, a.high, a.low )
+ : lt128( a.high, a.low, b.high, b.low );
+
+}
+
+#endif
+
--- /dev/null
+
+#include <stdbool.h>
+#include <stdint.h>
+#include "platform.h"
+#include "internals.h"
+#include "softfloat.h"
+
+float32_t f32_add( float32_t a, float32_t b )
+{
+ union ui32_f32 uA;
+ uint_fast32_t uiA;
+ bool signA;
+ union ui32_f32 uB;
+ uint_fast32_t uiB;
+ bool signB;
+ float32_t ( *magsRoutine )( uint_fast32_t, uint_fast32_t, bool );
+
+ uA.f = a;
+ uiA = uA.ui;
+ signA = signF32UI( uiA );
+ uB.f = b;
+ uiB = uB.ui;
+ signB = signF32UI( uiB );
+ magsRoutine =
+ ( signA == signB ) ? softfloat_addMagsF32 : softfloat_subMagsF32;
+ return magsRoutine( uiA, uiB, signA );
+
+}
+
--- /dev/null
+
+#include <stdbool.h>
+#include <stdint.h>
+#include "platform.h"
+#include "internals.h"
+#include "specialize.h"
+#include "softfloat.h"
+
+float32_t f32_div( float32_t a, float32_t b )
+{
+ union ui32_f32 uA;
+ uint_fast32_t uiA;
+ bool signA;
+ int_fast16_t expA;
+ uint_fast32_t sigA;
+ union ui32_f32 uB;
+ uint_fast32_t uiB;
+ bool signB;
+ int_fast16_t expB;
+ uint_fast32_t sigB;
+ bool signZ;
+ struct exp16_sig32 normExpSig;
+ int_fast16_t expZ;
+ uint_fast32_t sigZ;
+ uint_fast32_t uiZ;
+ union ui32_f32 uZ;
+
+ uA.f = a;
+ uiA = uA.ui;
+ signA = signF32UI( uiA );
+ expA = expF32UI( uiA );
+ sigA = fracF32UI( uiA );
+ uB.f = b;
+ uiB = uB.ui;
+ signB = signF32UI( uiB );
+ expB = expF32UI( uiB );
+ sigB = fracF32UI( uiB );
+ signZ = signA ^ signB;
+ if ( expA == 0xFF ) {
+ if ( sigA ) goto propagateNaN;
+ if ( expB == 0xFF ) {
+ if ( sigB ) goto propagateNaN;
+ goto invalid;
+ }
+ goto infinity;
+ }
+ if ( expB == 0xFF ) {
+ if ( sigB ) goto propagateNaN;
+ goto zero;
+ }
+ if ( ! expB ) {
+ if ( ! sigB ) {
+ if ( ! ( expA | sigA ) ) goto invalid;
+ softfloat_raiseFlags( softfloat_flag_infinity );
+ goto infinity;
+ }
+ normExpSig = softfloat_normSubnormalF32Sig( sigB );
+ expB = normExpSig.exp;
+ sigB = normExpSig.sig;
+ }
+ if ( ! expA ) {
+ if ( ! sigA ) goto zero;
+ normExpSig = softfloat_normSubnormalF32Sig( sigA );
+ expA = normExpSig.exp;
+ sigA = normExpSig.sig;
+ }
+ expZ = expA - expB + 0x7D;
+ sigA = ( sigA | 0x00800000 )<<7;
+ sigB = ( sigB | 0x00800000 )<<8;
+ if ( sigB <= ( sigA + sigA ) ) {
+ ++expZ;
+ sigA >>= 1;
+ }
+ sigZ = ( (uint_fast64_t) sigA<<32 ) / sigB;
+ if ( ! ( sigZ & 0x3F ) ) {
+ sigZ |= ( (uint_fast64_t) sigB * sigZ != (uint_fast64_t) sigA<<32 );
+ }
+ return softfloat_roundPackToF32( signZ, expZ, sigZ );
+ propagateNaN:
+ uiZ = softfloat_propagateNaNF32UI( uiA, uiB );
+ goto uiZ;
+ invalid:
+ softfloat_raiseFlags( softfloat_flag_invalid );
+ uiZ = defaultNaNF32UI;
+ goto uiZ;
+ infinity:
+ uiZ = packToF32UI( signZ, 0xFF, 0 );
+ goto uiZ;
+ zero:
+ uiZ = packToF32UI( signZ, 0, 0 );
+ uiZ:
+ uZ.ui = uiZ;
+ return uZ.f;
+
+}
+
--- /dev/null
+
+#include <stdbool.h>
+#include <stdint.h>
+#include "platform.h"
+#include "internals.h"
+#include "specialize.h"
+#include "softfloat.h"
+
+bool f32_eq( float32_t a, float32_t b )
+{
+ union ui32_f32 uA;
+ uint_fast32_t uiA;
+ union ui32_f32 uB;
+ uint_fast32_t uiB;
+
+ uA.f = a;
+ uiA = uA.ui;
+ uB.f = b;
+ uiB = uB.ui;
+ if (
+ ( ( expF32UI( uiA ) == 0xFF ) && fracF32UI( uiA ) )
+ || ( ( expF32UI( uiB ) == 0xFF ) && fracF32UI( uiB ) )
+ ) {
+ if (
+ softfloat_isSigNaNF32UI( uiA ) || softfloat_isSigNaNF32UI( uiB )
+ ) {
+ softfloat_raiseFlags( softfloat_flag_invalid );
+ }
+ return false;
+ }
+ return ( uiA == uiB ) || ! (uint32_t) ( ( uiA | uiB )<<1 );
+
+}
+
--- /dev/null
+
+#include <stdbool.h>
+#include <stdint.h>
+#include "platform.h"
+#include "internals.h"
+#include "softfloat.h"
+
+bool f32_eq_signaling( float32_t a, float32_t b )
+{
+ union ui32_f32 uA;
+ uint_fast32_t uiA;
+ union ui32_f32 uB;
+ uint_fast32_t uiB;
+
+ uA.f = a;
+ uiA = uA.ui;
+ uB.f = b;
+ uiB = uB.ui;
+ if (
+ ( ( expF32UI( uiA ) == 0xFF ) && fracF32UI( uiA ) )
+ || ( ( expF32UI( uiB ) == 0xFF ) && fracF32UI( uiB ) )
+ ) {
+ softfloat_raiseFlags( softfloat_flag_invalid );
+ return false;
+ }
+ return ( uiA == uiB ) || ! (uint32_t) ( ( uiA | uiB )<<1 );
+
+}
+
--- /dev/null
+
+#include <stdbool.h>
+#include "platform.h"
+#include "internals.h"
+#include "specialize.h"
+#include "softfloat.h"
+
+bool f32_isSignalingNaN( float32_t a )
+{
+ union ui32_f32 uA;
+
+ uA.f = a;
+ return softfloat_isSigNaNF32UI( uA.ui );
+
+}
+
--- /dev/null
+
+#include <stdbool.h>
+#include <stdint.h>
+#include "platform.h"
+#include "internals.h"
+#include "softfloat.h"
+
+bool f32_le( float32_t a, float32_t b )
+{
+ union ui32_f32 uA;
+ uint_fast32_t uiA;
+ union ui32_f32 uB;
+ uint_fast32_t uiB;
+ bool signA, signB;
+
+ uA.f = a;
+ uiA = uA.ui;
+ uB.f = b;
+ uiB = uB.ui;
+ if (
+ ( ( expF32UI( uiA ) == 0xFF ) && fracF32UI( uiA ) )
+ || ( ( expF32UI( uiB ) == 0xFF ) && fracF32UI( uiB ) )
+ ) {
+ softfloat_raiseFlags( softfloat_flag_invalid );
+ return false;
+ }
+ signA = signF32UI( uiA );
+ signB = signF32UI( uiB );
+ return
+ ( signA != signB ) ? signA || ! (uint32_t) ( ( uiA | uiB )<<1 )
+ : ( uiA == uiB ) || ( signA ^ ( uiA < uiB ) );
+
+}
+
--- /dev/null
+
+#include <stdbool.h>
+#include <stdint.h>
+#include "platform.h"
+#include "internals.h"
+#include "specialize.h"
+#include "softfloat.h"
+
+bool f32_le_quiet( float32_t a, float32_t b )
+{
+ union ui32_f32 uA;
+ uint_fast32_t uiA;
+ union ui32_f32 uB;
+ uint_fast32_t uiB;
+ bool signA, signB;
+
+ uA.f = a;
+ uiA = uA.ui;
+ uB.f = b;
+ uiB = uB.ui;
+ if (
+ ( ( expF32UI( uiA ) == 0xFF ) && fracF32UI( uiA ) )
+ || ( ( expF32UI( uiB ) == 0xFF ) && fracF32UI( uiB ) )
+ ) {
+ if (
+ softfloat_isSigNaNF32UI( uiA ) || softfloat_isSigNaNF32UI( uiB )
+ ) {
+ softfloat_raiseFlags( softfloat_flag_invalid );
+ }
+ return false;
+ }
+ signA = signF32UI( uiA );
+ signB = signF32UI( uiB );
+ return
+ ( signA != signB ) ? signA || ! (uint32_t) ( ( uiA | uiB )<<1 )
+ : ( uiA == uiB ) || ( signA ^ ( uiA < uiB ) );
+
+}
+
--- /dev/null
+
+#include <stdbool.h>
+#include <stdint.h>
+#include "platform.h"
+#include "internals.h"
+#include "softfloat.h"
+
+bool f32_lt( float32_t a, float32_t b )
+{
+ union ui32_f32 uA;
+ uint_fast32_t uiA;
+ union ui32_f32 uB;
+ uint_fast32_t uiB;
+ bool signA, signB;
+
+ uA.f = a;
+ uiA = uA.ui;
+ uB.f = b;
+ uiB = uB.ui;
+ if (
+ ( ( expF32UI( uiA ) == 0xFF ) && fracF32UI( uiA ) )
+ || ( ( expF32UI( uiB ) == 0xFF ) && fracF32UI( uiB ) )
+ ) {
+ softfloat_raiseFlags( softfloat_flag_invalid );
+ return false;
+ }
+ signA = signF32UI( uiA );
+ signB = signF32UI( uiB );
+ return
+ ( signA != signB ) ? signA && ( (uint32_t) ( ( uiA | uiB )<<1 ) != 0 )
+ : ( uiA != uiB ) && ( signA ^ ( uiA < uiB ) );
+
+}
+
--- /dev/null
+
+#include <stdbool.h>
+#include <stdint.h>
+#include "platform.h"
+#include "internals.h"
+#include "specialize.h"
+#include "softfloat.h"
+
+bool f32_lt_quiet( float32_t a, float32_t b )
+{
+ union ui32_f32 uA;
+ uint_fast32_t uiA;
+ union ui32_f32 uB;
+ uint_fast32_t uiB;
+ bool signA, signB;
+
+ uA.f = a;
+ uiA = uA.ui;
+ uB.f = b;
+ uiB = uB.ui;
+ if (
+ ( ( expF32UI( uiA ) == 0xFF ) && fracF32UI( uiA ) )
+ || ( ( expF32UI( uiB ) == 0xFF ) && fracF32UI( uiB ) )
+ ) {
+ if (
+ softfloat_isSigNaNF32UI( uiA ) || softfloat_isSigNaNF32UI( uiB )
+ ) {
+ softfloat_raiseFlags( softfloat_flag_invalid );
+ }
+ return false;
+ }
+ signA = signF32UI( uiA );
+ signB = signF32UI( uiB );
+ return
+ ( signA != signB ) ? signA && ( (uint32_t) ( ( uiA | uiB )<<1 ) != 0 )
+ : ( uiA != uiB ) && ( signA ^ ( uiA < uiB ) );
+
+}
+
--- /dev/null
+\r
+#include <stdbool.h>\r
+#include <stdint.h>\r
+#include "platform.h"\r
+#include "primitives.h"\r
+#include "internals.h"\r
+#include "specialize.h"\r
+#include "softfloat.h"\r
+\r
+float32_t f32_mul( float32_t a, float32_t b )\r
+{\r
+ union ui32_f32 uA;\r
+ uint_fast32_t uiA;\r
+ bool signA;\r
+ int_fast16_t expA;\r
+ uint_fast32_t sigA;\r
+ union ui32_f32 uB;\r
+ uint_fast32_t uiB;\r
+ bool signB;\r
+ int_fast16_t expB;\r
+ uint_fast32_t sigB;\r
+ bool signZ;\r
+ uint_fast32_t magBits;\r
+ struct exp16_sig32 normExpSig;\r
+ int_fast16_t expZ;\r
+ uint_fast32_t sigZ, uiZ;\r
+ union ui32_f32 uZ;\r
+\r
+ uA.f = a;\r
+ uiA = uA.ui;\r
+ signA = signF32UI( uiA );\r
+ expA = expF32UI( uiA );\r
+ sigA = fracF32UI( uiA );\r
+ uB.f = b;\r
+ uiB = uB.ui;\r
+ signB = signF32UI( uiB );\r
+ expB = expF32UI( uiB );\r
+ sigB = fracF32UI( uiB );\r
+ signZ = signA ^ signB;\r
+ if ( expA == 0xFF ) {\r
+ if ( sigA || ( ( expB == 0xFF ) && sigB ) ) goto propagateNaN;\r
+ magBits = expB | sigB;\r
+ goto infArg;\r
+ }\r
+ if ( expB == 0xFF ) {\r
+ if ( sigB ) goto propagateNaN;\r
+ magBits = expA | sigA;\r
+ goto infArg;\r
+ }\r
+ if ( ! expA ) {\r
+ if ( ! sigA ) goto zero;\r
+ normExpSig = softfloat_normSubnormalF32Sig( sigA );\r
+ expA = normExpSig.exp;\r
+ sigA = normExpSig.sig;\r
+ }\r
+ if ( ! expB ) {\r
+ if ( ! sigB ) goto zero;\r
+ normExpSig = softfloat_normSubnormalF32Sig( sigB );\r
+ expB = normExpSig.exp;\r
+ sigB = normExpSig.sig;\r
+ }\r
+ expZ = expA + expB - 0x7F;\r
+ sigA = ( sigA | 0x00800000 )<<7;\r
+ sigB = ( sigB | 0x00800000 )<<8;\r
+ sigZ = softfloat_shortShift64RightJam( (uint_fast64_t) sigA * sigB, 32 );\r
+ if ( sigZ < 0x40000000 ) {\r
+ --expZ;\r
+ sigZ <<= 1;\r
+ }\r
+ return softfloat_roundPackToF32( signZ, expZ, sigZ );\r
+ propagateNaN:\r
+ uiZ = softfloat_propagateNaNF32UI( uiA, uiB );\r
+ goto uiZ;\r
+ infArg:\r
+ if ( ! magBits ) {\r
+ softfloat_raiseFlags( softfloat_flag_invalid );\r
+ uiZ = defaultNaNF32UI;\r
+ } else {\r
+ uiZ = packToF32UI( signZ, 0xFF, 0 );\r
+ }\r
+ goto uiZ;\r
+ zero:\r
+ uiZ = packToF32UI( signZ, 0, 0 );\r
+ uiZ:\r
+ uZ.ui = uiZ;\r
+ return uZ.f;\r
+\r
+}\r
+\r
--- /dev/null
+\r
+#include <stdint.h>\r
+#include "platform.h"\r
+#include "internals.h"\r
+#include "softfloat.h"\r
+\r
+float32_t f32_mulAdd( float32_t a, float32_t b, float32_t c )\r
+{\r
+ union ui32_f32 uA;\r
+ uint_fast32_t uiA;\r
+ union ui32_f32 uB;\r
+ uint_fast32_t uiB;\r
+ union ui32_f32 uC;\r
+ uint_fast32_t uiC;\r
+\r
+ uA.f = a;\r
+ uiA = uA.ui;\r
+ uB.f = b;\r
+ uiB = uB.ui;\r
+ uC.f = c;\r
+ uiC = uC.ui;\r
+ return softfloat_mulAddF32( 0, uiA, uiB, uiC );\r
+\r
+}\r
+\r
--- /dev/null
+
+#include <stdbool.h>
+#include <stdint.h>
+#include "platform.h"
+#include "primitives.h"
+#include "internals.h"
+#include "specialize.h"
+#include "softfloat.h"
+
+float32_t f32_rem( float32_t a, float32_t b )
+{
+ union ui32_f32 uA;
+ uint_fast32_t uiA;
+ bool signA;
+ int_fast16_t expA;
+ uint_fast32_t sigA;
+ union ui32_f32 uB;
+ uint_fast32_t uiB;
+ bool signB;
+ int_fast16_t expB;
+ uint_fast32_t sigB;
+ struct exp16_sig32 normExpSig;
+ int_fast16_t expDiff;
+ uint_fast32_t q;
+ uint_fast64_t sigA64, sigB64, q64;
+ uint_fast32_t alternateSigA;
+ uint32_t sigMean;
+ bool signZ;
+ uint_fast32_t uiZ;
+ union ui32_f32 uZ;
+
+ uA.f = a;
+ uiA = uA.ui;
+ signA = signF32UI( uiA );
+ expA = expF32UI( uiA );
+ sigA = fracF32UI( uiA );
+ uB.f = b;
+ uiB = uB.ui;
+ signB = signF32UI( uiB );
+ expB = expF32UI( uiB );
+ sigB = fracF32UI( uiB );
+ if ( expA == 0xFF ) {
+ if ( sigA || ( ( expB == 0xFF ) && sigB ) ) goto propagateNaN;
+ goto invalid;
+ }
+ if ( expB == 0xFF ) {
+ if ( sigB ) goto propagateNaN;
+ return a;
+ }
+ if ( ! expB ) {
+ if ( ! sigB ) goto invalid;
+ normExpSig = softfloat_normSubnormalF32Sig( sigB );
+ expB = normExpSig.exp;
+ sigB = normExpSig.sig;
+ }
+ if ( ! expA ) {
+ if ( ! sigA ) return a;
+ normExpSig = softfloat_normSubnormalF32Sig( sigA );
+ expA = normExpSig.exp;
+ sigA = normExpSig.sig;
+ }
+ expDiff = expA - expB;
+ sigA |= 0x00800000;
+ sigB |= 0x00800000;
+ if ( expDiff < 32 ) {
+ sigA <<= 8;
+ sigB <<= 8;
+ if ( expDiff < 0 ) {
+ if ( expDiff < -1 ) return a;
+ sigA >>= 1;
+ }
+ q = ( sigB <= sigA );
+ if ( q ) sigA -= sigB;
+ if ( 0 < expDiff ) {
+ q = ( (uint_fast64_t) sigA<<32 ) / sigB;
+ q >>= 32 - expDiff;
+ sigB >>= 2;
+ sigA = ( ( sigA>>1 )<<( expDiff - 1 ) ) - sigB * q;
+ } else {
+ sigA >>= 2;
+ sigB >>= 2;
+ }
+ } else {
+ if ( sigB <= sigA ) sigA -= sigB;
+ sigA64 = (uint_fast64_t) sigA<<40;
+ sigB64 = (uint_fast64_t) sigB<<40;
+ expDiff -= 64;
+ while ( 0 < expDiff ) {
+ q64 = softfloat_estimateDiv128To64( sigA64, 0, sigB64 );
+ q64 = ( 2 < q64 ) ? q64 - 2 : 0;
+ sigA64 = - ( ( sigB * q64 )<<38 );
+ expDiff -= 62;
+ }
+ expDiff += 64;
+ q64 = softfloat_estimateDiv128To64( sigA64, 0, sigB64 );
+ q64 = ( 2 < q64 ) ? q64 - 2 : 0;
+ q = q64>>( 64 - expDiff );
+ sigB <<= 6;
+ sigA = ( ( sigA64>>33 )<<( expDiff - 1 ) ) - sigB * q;
+ }
+ do {
+ alternateSigA = sigA;
+ ++q;
+ sigA -= sigB;
+ } while ( sigA < 0x80000000 );
+ sigMean = sigA + alternateSigA;
+ if ( ( 0x80000000 <= sigMean ) || ( ! sigMean && ( q & 1 ) ) ) {
+ sigA = alternateSigA;
+ }
+ signZ = ( 0x80000000 <= sigA );
+ if ( signZ ) sigA = - sigA;
+ return softfloat_normRoundPackToF32( signA ^ signZ, expB, sigA );
+ propagateNaN:
+ uiZ = softfloat_propagateNaNF32UI( uiA, uiB );
+ goto uiZ;
+ invalid:
+ softfloat_raiseFlags( softfloat_flag_invalid );
+ uiZ = defaultNaNF32UI;
+ uiZ:
+ uZ.ui = uiZ;
+ return uZ.f;
+
+}
+
--- /dev/null
+
+#include <stdbool.h>
+#include <stdint.h>
+#include "platform.h"
+#include "internals.h"
+#include "specialize.h"
+#include "softfloat.h"
+
+float32_t f32_roundToInt( float32_t a, int_fast8_t roundingMode, bool exact )
+{
+ union ui32_f32 uA;
+ uint_fast32_t uiA;
+ int_fast16_t expA;
+ uint_fast32_t uiZ;
+ bool signA;
+ uint_fast32_t lastBitMask, roundBitsMask;
+ union ui32_f32 uZ;
+
+ uA.f = a;
+ uiA = uA.ui;
+ expA = expF32UI( uiA );
+ if ( 0x96 <= expA ) {
+ if ( ( expA == 0xFF ) && fracF32UI( uiA ) ) {
+ uiZ = softfloat_propagateNaNF32UI( uiA, 0 );
+ goto uiZ;
+ }
+ return a;
+ }
+ if ( expA <= 0x7E ) {
+ if ( ! (uint32_t) ( uiA<<1 ) ) return a;
+ if ( exact ) softfloat_exceptionFlags |= softfloat_flag_inexact;
+ signA = signF32UI( uiA );
+ switch ( roundingMode ) {
+ case softfloat_round_nearest_even:
+ if ( ( expA == 0x7E ) && fracF32UI( uiA ) ) {
+ uiZ = packToF32UI( signA, 0x7F, 0 );
+ goto uiZ;
+ }
+ break;
+ case softfloat_round_min:
+ uiZ = signA ? 0xBF800000 : 0;
+ goto uiZ;
+ case softfloat_round_max:
+ uiZ = signA ? 0x80000000 : 0x3F800000;
+ goto uiZ;
+ case softfloat_round_nearest_maxMag:
+ if ( expA == 0x7E ) {
+ uiZ = packToF32UI( signA, 0x7F, 0 );
+ goto uiZ;
+ }
+ break;
+ }
+ uiZ = packToF32UI( signA, 0, 0 );
+ goto uiZ;
+ }
+ lastBitMask = (uint_fast32_t) 1<<( 0x96 - expA );
+ roundBitsMask = lastBitMask - 1;
+ uiZ = uiA;
+ if ( roundingMode == softfloat_round_nearest_maxMag ) {
+ uiZ += lastBitMask>>1;
+ } else if ( roundingMode == softfloat_round_nearest_even ) {
+ uiZ += lastBitMask>>1;
+ if ( ! ( uiZ & roundBitsMask ) ) uiZ &= ~ lastBitMask;
+ } else if ( roundingMode != softfloat_round_minMag ) {
+ if ( signF32UI( uiZ ) ^ ( roundingMode == softfloat_round_max ) ) {
+ uiZ += roundBitsMask;
+ }
+ }
+ uiZ &= ~ roundBitsMask;
+ if ( exact && ( uiZ != uiA ) ) {
+ softfloat_exceptionFlags |= softfloat_flag_inexact;
+ }
+ uiZ:
+ uZ.ui = uiZ;
+ return uZ.f;
+
+}
+
--- /dev/null
+\r
+#include <stdbool.h>\r
+#include <stdint.h>\r
+#include "platform.h"\r
+#include "primitives.h"\r
+#include "internals.h"\r
+#include "specialize.h"\r
+#include "softfloat.h"\r
+\r
+float32_t f32_sqrt( float32_t a )\r
+{\r
+ union ui32_f32 uA;\r
+ uint_fast32_t uiA;\r
+ bool signA;\r
+ int_fast16_t expA;\r
+ uint_fast32_t sigA, uiZ;\r
+ struct exp16_sig32 normExpSig;\r
+ int_fast16_t expZ;\r
+ uint_fast32_t sigZ;\r
+ uint_fast64_t term, rem;\r
+ union ui32_f32 uZ;\r
+\r
+ uA.f = a;\r
+ uiA = uA.ui;\r
+ signA = signF32UI( uiA );\r
+ expA = expF32UI( uiA );\r
+ sigA = fracF32UI( uiA );\r
+ if ( expA == 0xFF ) {\r
+ if ( sigA ) {\r
+ uiZ = softfloat_propagateNaNF32UI( uiA, 0 );\r
+ goto uiZ;\r
+ }\r
+ if ( ! signA ) return a;\r
+ goto invalid;\r
+ }\r
+ if ( signA ) {\r
+ if ( ! ( expA | sigA ) ) return a;\r
+ goto invalid;\r
+ }\r
+ if ( ! expA ) {\r
+ if ( ! sigA ) return a;\r
+ normExpSig = softfloat_normSubnormalF32Sig( sigA );\r
+ expA = normExpSig.exp;\r
+ sigA = normExpSig.sig;\r
+ }\r
+ expZ = ( ( expA - 0x7F )>>1 ) + 0x7E;\r
+ sigA = ( sigA | 0x00800000 )<<8;\r
+ sigZ = softfloat_estimateSqrt32( expA, sigA ) + 2;\r
+ if ( ( sigZ & 0x7F ) <= 5 ) {\r
+ if ( sigZ < 2 ) {\r
+ sigZ = 0x7FFFFFFF;\r
+ goto roundPack;\r
+ }\r
+ sigA >>= expA & 1;\r
+ term = (uint_fast64_t) sigZ * sigZ;\r
+ rem = ( (uint_fast64_t) sigA<<32 ) - term;\r
+ while ( UINT64_C( 0x8000000000000000 ) <= rem ) {\r
+ --sigZ;\r
+ rem += ( (uint_fast64_t) sigZ<<1 ) | 1;\r
+ }\r
+ sigZ |= ( rem != 0 );\r
+ }\r
+ sigZ = softfloat_shortShift32Right1Jam( sigZ );\r
+ roundPack:\r
+ return softfloat_roundPackToF32( 0, expZ, sigZ );\r
+ invalid:\r
+ softfloat_raiseFlags( softfloat_flag_invalid );\r
+ uiZ = defaultNaNF32UI;\r
+ uiZ:\r
+ uZ.ui = uiZ;\r
+ return uZ.f;\r
+\r
+}\r
+\r
--- /dev/null
+\r
+#include <stdbool.h>\r
+#include <stdint.h>\r
+#include "platform.h"\r
+#include "internals.h"\r
+#include "softfloat.h"\r
+\r
+float32_t f32_sub( float32_t a, float32_t b )\r
+{\r
+ union ui32_f32 uA;\r
+ uint_fast32_t uiA;\r
+ bool signA;\r
+ union ui32_f32 uB;\r
+ uint_fast32_t uiB;\r
+ bool signB;\r
+ float32_t ( *magsRoutine )( uint_fast32_t, uint_fast32_t, bool );\r
+\r
+ uA.f = a;\r
+ uiA = uA.ui;\r
+ signA = signF32UI( uiA );\r
+ uB.f = b;\r
+ uiB = uB.ui;\r
+ signB = signF32UI( uiB );\r
+ magsRoutine =\r
+ ( signA == signB ) ? softfloat_subMagsF32 : softfloat_addMagsF32;\r
+ return magsRoutine( uiA, uiB ^ 0x80000000, signA );\r
+\r
+}\r
+\r
--- /dev/null
+\r
+#include <stdbool.h>\r
+#include <stdint.h>\r
+#include "platform.h"\r
+#include "internals.h"\r
+#include "specialize.h"\r
+#include "softfloat.h"\r
+\r
+float64_t f32_to_f64( float32_t a )\r
+{\r
+ union ui32_f32 uA;\r
+ uint_fast32_t uiA;\r
+ bool sign;\r
+ int_fast16_t exp;\r
+ uint_fast32_t sig;\r
+ uint_fast64_t uiZ;\r
+ struct exp16_sig32 normExpSig;\r
+ union ui64_f64 uZ;\r
+\r
+ uA.f = a;\r
+ uiA = uA.ui;\r
+ sign = signF32UI( uiA );\r
+ exp = expF32UI( uiA );\r
+ sig = fracF32UI( uiA );\r
+ if ( exp == 0xFF ) {\r
+ uiZ =\r
+ sig ? softfloat_commonNaNToF64UI(\r
+ softfloat_f32UIToCommonNaN( uiA ) )\r
+ : packToF64UI( sign, 0x7FF, 0 );\r
+ goto uiZ;\r
+ }\r
+ if ( ! exp ) {\r
+ if ( ! sig ) {\r
+ uiZ = packToF64UI( sign, 0, 0 );\r
+ goto uiZ;\r
+ }\r
+ normExpSig = softfloat_normSubnormalF32Sig( sig );\r
+ exp = normExpSig.exp - 1;\r
+ sig = normExpSig.sig;\r
+ }\r
+ uiZ = packToF64UI( sign, exp + 0x380, (uint_fast64_t) sig<<29 );\r
+ uiZ:\r
+ uZ.ui = uiZ;\r
+ return uZ.f;\r
+\r
+}\r
+\r
--- /dev/null
+\r
+#include <stdbool.h>\r
+#include <stdint.h>\r
+#include "platform.h"\r
+#include "primitives.h"\r
+#include "internals.h"\r
+#include "softfloat.h"\r
+\r
+int_fast32_t f32_to_i32( float32_t a, int_fast8_t roundingMode, bool exact )\r
+{\r
+ union ui32_f32 uA;\r
+ uint_fast32_t uiA;\r
+ bool sign;\r
+ int_fast16_t exp;\r
+ uint_fast32_t sig;\r
+ uint_fast64_t sig64;\r
+ int_fast16_t shiftCount;\r
+\r
+ uA.f = a;\r
+ uiA = uA.ui;\r
+ sign = signF32UI( uiA );\r
+ exp = expF32UI( uiA );\r
+ sig = fracF32UI( uiA );\r
+ if ( ( exp == 0xFF ) && sig ) sign = 0;\r
+ if ( exp ) sig |= 0x00800000;\r
+ sig64 = (uint_fast64_t) sig<<32;\r
+ shiftCount = 0xAF - exp;\r
+ if ( 0 < shiftCount ) {\r
+ sig64 = softfloat_shift64RightJam( sig64, shiftCount );\r
+ }\r
+ return softfloat_roundPackToI32( sign, sig64, roundingMode, exact );\r
+\r
+}\r
+\r
--- /dev/null
+\r
+#include <stdbool.h>\r
+#include <stdint.h>\r
+#include "platform.h"\r
+#include "internals.h"\r
+#include "softfloat.h"\r
+\r
+int_fast32_t f32_to_i32_r_minMag( float32_t a, bool exact )\r
+{\r
+ union ui32_f32 uA;\r
+ uint_fast32_t uiA;\r
+ int_fast16_t exp;\r
+ uint_fast32_t sig;\r
+ bool sign;\r
+ int_fast16_t shiftCount;\r
+ int_fast32_t absZ;\r
+\r
+ uA.f = a;\r
+ uiA = uA.ui;\r
+ exp = expF32UI( uiA );\r
+ sig = fracF32UI( uiA );\r
+ if ( exp < 0x7F ) {\r
+ if ( exact && ( exp | sig ) ) {\r
+ softfloat_exceptionFlags |= softfloat_flag_inexact;\r
+ }\r
+ return 0;\r
+ }\r
+ sign = signF32UI( uiA );\r
+ shiftCount = 0x9E - exp;\r
+ if ( shiftCount <= 0 ) {\r
+ if ( uiA != packToF32UI( 1, 0x9E, 0 ) ) {\r
+ softfloat_raiseFlags( softfloat_flag_invalid );\r
+ if ( ! sign || ( ( exp == 0xFF ) && sig ) ) return 0x7FFFFFFF;\r
+ }\r
+ return -0x7FFFFFFF - 1;\r
+ }\r
+ sig = ( sig | 0x00800000 )<<8;\r
+ absZ = sig>>shiftCount;\r
+ if ( exact && (uint32_t) ( sig<<( ( - shiftCount ) & 31 ) ) ) {\r
+ softfloat_exceptionFlags |= softfloat_flag_inexact;\r
+ }\r
+ return sign ? - absZ : absZ;\r
+\r
+}\r
+\r
--- /dev/null
+\r
+#include <stdbool.h>\r
+#include <stdint.h>\r
+#include "platform.h"\r
+#include "primitives.h"\r
+#include "internals.h"\r
+#include "softfloat.h"\r
+\r
+int_fast64_t f32_to_i64( float32_t a, int_fast8_t roundingMode, bool exact )\r
+{\r
+ union ui32_f32 uA;\r
+ uint_fast32_t uiA;\r
+ bool sign;\r
+ int_fast16_t exp;\r
+ uint_fast32_t sig;\r
+ int_fast16_t shiftCount;\r
+ uint_fast64_t sig64, extra;\r
+ struct uint64_extra sig64Extra;\r
+\r
+ uA.f = a;\r
+ uiA = uA.ui;\r
+ sign = signF32UI( uiA );\r
+ exp = expF32UI( uiA );\r
+ sig = fracF32UI( uiA );\r
+ shiftCount = 0xBE - exp;\r
+ if ( shiftCount < 0 ) {\r
+ softfloat_raiseFlags( softfloat_flag_invalid );\r
+ if ( ! sign || ( ( exp == 0xFF ) && sig ) ) {\r
+ return INT64_C( 0x7FFFFFFFFFFFFFFF );\r
+ }\r
+ return - INT64_C( 0x7FFFFFFFFFFFFFFF ) - 1;\r
+ }\r
+ if ( exp ) sig |= 0x00800000;\r
+ sig64 = (uint_fast64_t) sig<<40;\r
+ extra = 0;\r
+ if ( shiftCount ) {\r
+ sig64Extra = softfloat_shift64ExtraRightJam( sig64, 0, shiftCount );\r
+ sig64 = sig64Extra.v;\r
+ extra = sig64Extra.extra;\r
+ }\r
+ return softfloat_roundPackToI64( sign, sig64, extra, roundingMode, exact );\r
+\r
+}\r
+\r
--- /dev/null
+\r
+#include <stdbool.h>\r
+#include <stdint.h>\r
+#include "platform.h"\r
+#include "internals.h"\r
+#include "softfloat.h"\r
+\r
+int_fast64_t f32_to_i64_r_minMag( float32_t a, bool exact )\r
+{\r
+ union ui32_f32 uA;\r
+ uint_fast32_t uiA;\r
+ int_fast16_t exp;\r
+ uint_fast32_t sig;\r
+ bool sign;\r
+ int_fast16_t shiftCount;\r
+ uint_fast64_t sig64;\r
+ int_fast64_t absZ;\r
+\r
+ uA.f = a;\r
+ uiA = uA.ui;\r
+ exp = expF32UI( uiA );\r
+ sig = fracF32UI( uiA );\r
+ if ( exp < 0x7F ) {\r
+ if ( exact && ( exp | sig ) ) {\r
+ softfloat_exceptionFlags |= softfloat_flag_inexact;\r
+ }\r
+ return 0;\r
+ }\r
+ sign = signF32UI( uiA );\r
+ shiftCount = 0xBE - exp;\r
+ if ( shiftCount <= 0 ) {\r
+ if ( uiA != packToF32UI( 1, 0xBE, 0 ) ) {\r
+ softfloat_raiseFlags( softfloat_flag_invalid );\r
+ if ( ! sign || ( ( exp == 0xFF ) && sig ) ) {\r
+ return INT64_C( 0x7FFFFFFFFFFFFFFF );\r
+ }\r
+ }\r
+ return - INT64_C( 0x7FFFFFFFFFFFFFFF ) - 1;\r
+ }\r
+ sig |= 0x00800000;\r
+ sig64 = (uint_fast64_t) sig<<40;\r
+ absZ = sig64>>shiftCount;\r
+ shiftCount = 40 - shiftCount;\r
+ if (\r
+ exact && ( shiftCount < 0 ) && (uint32_t) ( sig<<( shiftCount & 31 ) )\r
+ ) {\r
+ softfloat_exceptionFlags |= softfloat_flag_inexact;\r
+ }\r
+ return sign ? - absZ : absZ;\r
+\r
+}\r
+\r
--- /dev/null
+\r
+#include <stdbool.h>\r
+#include <stdint.h>\r
+#include "platform.h"\r
+#include "primitives.h"\r
+#include "internals.h"\r
+#include "softfloat.h"\r
+\r
+uint_fast32_t f32_to_ui32( float32_t a, int_fast8_t roundingMode, bool exact )\r
+{\r
+ union ui32_f32 uA;\r
+ uint_fast32_t uiA;\r
+ bool sign;\r
+ int_fast16_t exp;\r
+ uint_fast32_t sig;\r
+ uint_fast64_t sig64;\r
+ int_fast16_t shiftCount;\r
+\r
+ uA.f = a;\r
+ uiA = uA.ui;\r
+ sign = signF32UI( uiA );\r
+ exp = expF32UI( uiA );\r
+ sig = fracF32UI( uiA );\r
+ if ( exp ) sig |= 0x00800000;\r
+ sig64 = (uint_fast64_t) sig<<32;\r
+ shiftCount = 0xAF - exp;\r
+ if ( 0 < shiftCount ) {\r
+ sig64 = softfloat_shift64RightJam( sig64, shiftCount );\r
+ }\r
+ return softfloat_roundPackToUI32( sign, sig64, roundingMode, exact );\r
+\r
+}\r
+\r
--- /dev/null
+\r
+#include <stdbool.h>\r
+#include <stdint.h>\r
+#include "platform.h"\r
+#include "internals.h"\r
+#include "softfloat.h"\r
+\r
+uint_fast32_t f32_to_ui32_r_minMag( float32_t a, bool exact )\r
+{\r
+ union ui32_f32 uA;\r
+ uint_fast32_t uiA;\r
+ int_fast16_t exp;\r
+ uint_fast32_t sig;\r
+ int_fast16_t shiftCount;\r
+ uint_fast32_t z;\r
+\r
+ uA.f = a;\r
+ uiA = uA.ui;\r
+ exp = expF32UI( uiA );\r
+ sig = fracF32UI( uiA );\r
+ if ( exp < 0x7F ) {\r
+ if ( exact && ( exp | sig ) ) {\r
+ softfloat_exceptionFlags |= softfloat_flag_inexact;\r
+ }\r
+ return 0;\r
+ }\r
+ if ( signF32UI( uiA ) ) goto invalid;\r
+ shiftCount = 0x9E - exp;\r
+ if ( shiftCount < 0 ) goto invalid;\r
+ sig = ( sig | 0x00800000 )<<8;\r
+ z = sig>>shiftCount;\r
+ if ( exact && ( sig & ( ( (uint_fast32_t) 1<<shiftCount ) - 1 ) ) ) {\r
+ softfloat_exceptionFlags |= softfloat_flag_inexact;\r
+ }\r
+ return z;\r
+ invalid:\r
+ softfloat_raiseFlags( softfloat_flag_invalid );\r
+ return 0xFFFFFFFF;\r
+\r
+}\r
+\r
--- /dev/null
+\r
+#include <stdbool.h>\r
+#include <stdint.h>\r
+#include "platform.h"\r
+#include "primitives.h"\r
+#include "internals.h"\r
+#include "softfloat.h"\r
+\r
+uint_fast64_t f32_to_ui64( float32_t a, int_fast8_t roundingMode, bool exact )\r
+{\r
+ union ui32_f32 uA;\r
+ uint_fast32_t uiA;\r
+ bool sign;\r
+ int_fast16_t exp;\r
+ uint_fast32_t sig;\r
+ int_fast16_t shiftCount;\r
+ uint_fast64_t sig64, extra;\r
+ struct uint64_extra sig64Extra;\r
+\r
+ uA.f = a;\r
+ uiA = uA.ui;\r
+ sign = signF32UI( uiA );\r
+ exp = expF32UI( uiA );\r
+ sig = fracF32UI( uiA );\r
+ shiftCount = 0xBE - exp;\r
+ if ( shiftCount < 0 ) {\r
+ softfloat_raiseFlags( softfloat_flag_invalid );\r
+ return UINT64_C( 0xFFFFFFFFFFFFFFFF );\r
+ }\r
+ if ( exp ) sig |= 0x00800000;\r
+ sig64 = (uint_fast64_t) sig<<40;\r
+ extra = 0;\r
+ if ( shiftCount ) {\r
+ sig64Extra = softfloat_shift64ExtraRightJam( sig64, 0, shiftCount );\r
+ sig64 = sig64Extra.v;\r
+ extra = sig64Extra.extra;\r
+ }\r
+ return\r
+ softfloat_roundPackToUI64( sign, sig64, extra, roundingMode, exact );\r
+\r
+}\r
+\r
--- /dev/null
+\r
+#include <stdbool.h>\r
+#include <stdint.h>\r
+#include "platform.h"\r
+#include "internals.h"\r
+#include "softfloat.h"\r
+\r
+uint_fast64_t f32_to_ui64_r_minMag( float32_t a, bool exact )\r
+{\r
+ union ui32_f32 uA;\r
+ uint_fast32_t uiA;\r
+ int_fast16_t exp;\r
+ uint_fast32_t sig;\r
+ int_fast16_t shiftCount;\r
+ uint_fast64_t sig64, z;\r
+\r
+ uA.f = a;\r
+ uiA = uA.ui;\r
+ exp = expF32UI( uiA );\r
+ sig = fracF32UI( uiA );\r
+ if ( exp < 0x7F ) {\r
+ if ( exact && ( exp | sig ) ) {\r
+ softfloat_exceptionFlags |= softfloat_flag_inexact;\r
+ }\r
+ return 0;\r
+ }\r
+ if ( signF32UI( uiA ) ) goto invalid;\r
+ shiftCount = 0xBE - exp;\r
+ if ( shiftCount < 0 ) goto invalid;\r
+ sig |= 0x00800000;\r
+ sig64 = (uint_fast64_t) sig<<40;\r
+ z = sig64>>shiftCount;\r
+ shiftCount = 40 - shiftCount;\r
+ if (\r
+ exact && ( shiftCount < 0 ) && (uint32_t) ( sig<<( shiftCount & 31 ) )\r
+ ) {\r
+ softfloat_exceptionFlags |= softfloat_flag_inexact;\r
+ }\r
+ return z;\r
+ invalid:\r
+ softfloat_raiseFlags( softfloat_flag_invalid );\r
+ return UINT64_C( 0xFFFFFFFFFFFFFFFF );\r
+\r
+}\r
+\r
--- /dev/null
+
+#include <stdbool.h>
+#include <stdint.h>
+#include "platform.h"
+#include "internals.h"
+#include "softfloat.h"
+
+float64_t f64_add( float64_t a, float64_t b )
+{
+ union ui64_f64 uA;
+ uint_fast64_t uiA;
+ bool signA;
+ union ui64_f64 uB;
+ uint_fast64_t uiB;
+ bool signB;
+ float64_t ( *magsRoutine )( uint_fast64_t, uint_fast64_t, bool );
+
+ uA.f = a;
+ uiA = uA.ui;
+ signA = signF64UI( uiA );
+ uB.f = b;
+ uiB = uB.ui;
+ signB = signF64UI( uiB );
+ magsRoutine =
+ ( signA == signB ) ? softfloat_addMagsF64 : softfloat_subMagsF64;
+ return magsRoutine( uiA, uiB, signA );
+
+}
+
--- /dev/null
+
+#include <stdbool.h>
+#include <stdint.h>
+#include "platform.h"
+#include "primitives.h"
+#include "internals.h"
+#include "specialize.h"
+#include "softfloat.h"
+
+float64_t f64_div( float64_t a, float64_t b )
+{
+ union ui64_f64 uA;
+ uint_fast64_t uiA;
+ bool signA;
+ int_fast16_t expA;
+ uint_fast64_t sigA;
+ union ui64_f64 uB;
+ uint_fast64_t uiB;
+ bool signB;
+ int_fast16_t expB;
+ uint_fast64_t sigB;
+ bool signZ;
+ struct exp16_sig64 normExpSig;
+ int_fast16_t expZ;
+ uint_fast64_t sigZ;
+ struct uint128 term, rem;
+ uint_fast64_t uiZ;
+ union ui64_f64 uZ;
+
+ uA.f = a;
+ uiA = uA.ui;
+ signA = signF64UI( uiA );
+ expA = expF64UI( uiA );
+ sigA = fracF64UI( uiA );
+ uB.f = b;
+ uiB = uB.ui;
+ signB = signF64UI( uiB );
+ expB = expF64UI( uiB );
+ sigB = fracF64UI( uiB );
+ signZ = signA ^ signB;
+ if ( expA == 0x7FF ) {
+ if ( sigA ) goto propagateNaN;
+ if ( expB == 0x7FF ) {
+ if ( sigB ) goto propagateNaN;
+ goto invalid;
+ }
+ goto infinity;
+ }
+ if ( expB == 0x7FF ) {
+ if ( sigB ) goto propagateNaN;
+ goto zero;
+ }
+ if ( ! expB ) {
+ if ( ! sigB ) {
+ if ( ! ( expA | sigA ) ) goto invalid;
+ softfloat_raiseFlags( softfloat_flag_infinity );
+ goto infinity;
+ }
+ normExpSig = softfloat_normSubnormalF64Sig( sigB );
+ expB = normExpSig.exp;
+ sigB = normExpSig.sig;
+ }
+ if ( ! expA ) {
+ if ( ! sigA ) goto zero;
+ normExpSig = softfloat_normSubnormalF64Sig( sigA );
+ expA = normExpSig.exp;
+ sigA = normExpSig.sig;
+ }
+ expZ = expA - expB + 0x3FD;
+ sigA = ( sigA | UINT64_C( 0x0010000000000000 ) )<<10;
+ sigB = ( sigB | UINT64_C( 0x0010000000000000 ) )<<11;
+ if ( sigB <= ( sigA + sigA ) ) {
+ ++expZ;
+ sigA >>= 1;
+ }
+ sigZ = softfloat_estimateDiv128To64( sigA, 0, sigB );
+ if ( ( sigZ & 0x1FF ) <= 2 ) {
+ term = softfloat_mul64To128( sigB, sigZ );
+ rem = softfloat_sub128( sigA, 0, term.v64, term.v0 );
+ while ( UINT64_C( 0x8000000000000000 ) <= rem.v64 ) {
+ --sigZ;
+ rem = softfloat_add128( rem.v64, rem.v0, 0, sigB );
+ }
+ sigZ |= ( rem.v0 != 0 );
+ }
+ return softfloat_roundPackToF64( signZ, expZ, sigZ );
+ propagateNaN:
+ uiZ = softfloat_propagateNaNF64UI( uiA, uiB );
+ goto uiZ;
+ invalid:
+ softfloat_raiseFlags( softfloat_flag_invalid );
+ uiZ = defaultNaNF64UI;
+ goto uiZ;
+ infinity:
+ uiZ = packToF64UI( signZ, 0x7FF, 0 );
+ goto uiZ;
+ zero:
+ uiZ = packToF64UI( signZ, 0, 0 );
+ uiZ:
+ uZ.ui = uiZ;
+ return uZ.f;
+
+}
+
--- /dev/null
+
+#include <stdbool.h>
+#include <stdint.h>
+#include "platform.h"
+#include "internals.h"
+#include "specialize.h"
+#include "softfloat.h"
+
+bool f64_eq( float64_t a, float64_t b )
+{
+ union ui64_f64 uA;
+ uint_fast64_t uiA;
+ union ui64_f64 uB;
+ uint_fast64_t uiB;
+
+ uA.f = a;
+ uiA = uA.ui;
+ uB.f = b;
+ uiB = uB.ui;
+ if (
+ ( ( expF64UI( uiA ) == 0x7FF ) && fracF64UI( uiA ) )
+ || ( ( expF64UI( uiB ) == 0x7FF ) && fracF64UI( uiB ) )
+ ) {
+ if (
+ softfloat_isSigNaNF64UI( uiA ) || softfloat_isSigNaNF64UI( uiB )
+ ) {
+ softfloat_raiseFlags( softfloat_flag_invalid );
+ }
+ return false;
+ }
+ return
+ ( uiA == uiB ) || ! ( ( uiA | uiB ) & UINT64_C( 0x7FFFFFFFFFFFFFFF ) );
+
+}
+
--- /dev/null
+
+#include <stdbool.h>
+#include <stdint.h>
+#include "platform.h"
+#include "internals.h"
+#include "softfloat.h"
+
+bool f64_eq_signaling( float64_t a, float64_t b )
+{
+ union ui64_f64 uA;
+ uint_fast64_t uiA;
+ union ui64_f64 uB;
+ uint_fast64_t uiB;
+
+ uA.f = a;
+ uiA = uA.ui;
+ uB.f = b;
+ uiB = uB.ui;
+ if (
+ ( ( expF64UI( uiA ) == 0x7FF ) && fracF64UI( uiA ) )
+ || ( ( expF64UI( uiB ) == 0x7FF ) && fracF64UI( uiB ) )
+ ) {
+ softfloat_raiseFlags( softfloat_flag_invalid );
+ return false;
+ }
+ return
+ ( uiA == uiB ) || ! ( ( uiA | uiB ) & UINT64_C( 0x7FFFFFFFFFFFFFFF ) );
+
+}
+
--- /dev/null
+
+#include <stdbool.h>
+#include "platform.h"
+#include "internals.h"
+#include "specialize.h"
+#include "softfloat.h"
+
+bool f64_isSignalingNaN( float64_t a )
+{
+ union ui64_f64 uA;
+
+ uA.f = a;
+ return softfloat_isSigNaNF64UI( uA.ui );
+
+}
+
--- /dev/null
+
+#include <stdbool.h>
+#include <stdint.h>
+#include "platform.h"
+#include "internals.h"
+#include "softfloat.h"
+
+bool f64_le( float64_t a, float64_t b )
+{
+ union ui64_f64 uA;
+ uint_fast64_t uiA;
+ union ui64_f64 uB;
+ uint_fast64_t uiB;
+ bool signA, signB;
+
+ uA.f = a;
+ uiA = uA.ui;
+ uB.f = b;
+ uiB = uB.ui;
+ if (
+ ( ( expF64UI( uiA ) == 0x7FF ) && fracF64UI( uiA ) )
+ || ( ( expF64UI( uiB ) == 0x7FF ) && fracF64UI( uiB ) )
+ ) {
+ softfloat_raiseFlags( softfloat_flag_invalid );
+ return false;
+ }
+ signA = signF64UI( uiA );
+ signB = signF64UI( uiB );
+ return
+ ( signA != signB )
+ ? signA || ! ( ( uiA | uiB ) & UINT64_C( 0x7FFFFFFFFFFFFFFF ) )
+ : ( uiA == uiB ) || ( signA ^ ( uiA < uiB ) );
+
+}
+
--- /dev/null
+
+#include <stdbool.h>
+#include <stdint.h>
+#include "platform.h"
+#include "internals.h"
+#include "specialize.h"
+#include "softfloat.h"
+
+bool f64_le_quiet( float64_t a, float64_t b )
+{
+ union ui64_f64 uA;
+ uint_fast64_t uiA;
+ union ui64_f64 uB;
+ uint_fast64_t uiB;
+ bool signA, signB;
+
+ uA.f = a;
+ uiA = uA.ui;
+ uB.f = b;
+ uiB = uB.ui;
+ if (
+ ( ( expF64UI( uiA ) == 0x7FF ) && fracF64UI( uiA ) )
+ || ( ( expF64UI( uiB ) == 0x7FF ) && fracF64UI( uiB ) )
+ ) {
+ if (
+ softfloat_isSigNaNF64UI( uiA ) || softfloat_isSigNaNF64UI( uiB )
+ ) {
+ softfloat_raiseFlags( softfloat_flag_invalid );
+ }
+ return false;
+ }
+ signA = signF64UI( uiA );
+ signB = signF64UI( uiB );
+ return
+ ( signA != signB )
+ ? signA || ! ( ( uiA | uiB ) & UINT64_C( 0x7FFFFFFFFFFFFFFF ) )
+ : ( uiA == uiB ) || ( signA ^ ( uiA < uiB ) );
+
+}
+
--- /dev/null
+
+#include <stdbool.h>
+#include <stdint.h>
+#include "platform.h"
+#include "internals.h"
+#include "softfloat.h"
+
+bool f64_lt( float64_t a, float64_t b )
+{
+ union ui64_f64 uA;
+ uint_fast64_t uiA;
+ union ui64_f64 uB;
+ uint_fast64_t uiB;
+ bool signA, signB;
+
+ uA.f = a;
+ uiA = uA.ui;
+ uB.f = b;
+ uiB = uB.ui;
+ if (
+ ( ( expF64UI( uiA ) == 0x7FF ) && fracF64UI( uiA ) )
+ || ( ( expF64UI( uiB ) == 0x7FF ) && fracF64UI( uiB ) )
+ ) {
+ softfloat_raiseFlags( softfloat_flag_invalid );
+ return false;
+ }
+ signA = signF64UI( uiA );
+ signB = signF64UI( uiB );
+ return
+ ( signA != signB )
+ ? signA && ( ( uiA | uiB ) & UINT64_C( 0x7FFFFFFFFFFFFFFF ) )
+ : ( uiA != uiB ) && ( signA ^ ( uiA < uiB ) );
+
+}
+
--- /dev/null
+
+#include <stdbool.h>
+#include <stdint.h>
+#include "platform.h"
+#include "internals.h"
+#include "specialize.h"
+#include "softfloat.h"
+
+bool f64_lt_quiet( float64_t a, float64_t b )
+{
+ union ui64_f64 uA;
+ uint_fast64_t uiA;
+ union ui64_f64 uB;
+ uint_fast64_t uiB;
+ bool signA, signB;
+
+ uA.f = a;
+ uiA = uA.ui;
+ uB.f = b;
+ uiB = uB.ui;
+ if (
+ ( ( expF64UI( uiA ) == 0x7FF ) && fracF64UI( uiA ) )
+ || ( ( expF64UI( uiB ) == 0x7FF ) && fracF64UI( uiB ) )
+ ) {
+ if (
+ softfloat_isSigNaNF64UI( uiA ) || softfloat_isSigNaNF64UI( uiB )
+ ) {
+ softfloat_raiseFlags( softfloat_flag_invalid );
+ }
+ return false;
+ }
+ signA = signF64UI( uiA );
+ signB = signF64UI( uiB );
+ return
+ ( signA != signB )
+ ? signA && ( ( uiA | uiB ) & UINT64_C( 0x7FFFFFFFFFFFFFFF ) )
+ : ( uiA != uiB ) && ( signA ^ ( uiA < uiB ) );
+
+}
+
--- /dev/null
+\r
+#include <stdbool.h>\r
+#include <stdint.h>\r
+#include "platform.h"\r
+#include "primitives.h"\r
+#include "internals.h"\r
+#include "specialize.h"\r
+#include "softfloat.h"\r
+\r
+float64_t f64_mul( float64_t a, float64_t b )\r
+{\r
+ union ui64_f64 uA;\r
+ uint_fast64_t uiA;\r
+ bool signA;\r
+ int_fast16_t expA;\r
+ uint_fast64_t sigA;\r
+ union ui64_f64 uB;\r
+ uint_fast64_t uiB;\r
+ bool signB;\r
+ int_fast16_t expB;\r
+ uint_fast64_t sigB;\r
+ bool signZ;\r
+ uint_fast64_t magBits;\r
+ struct exp16_sig64 normExpSig;\r
+ int_fast16_t expZ;\r
+ struct uint128 sigZ128;\r
+ uint_fast64_t sigZ, uiZ;\r
+ union ui64_f64 uZ;\r
+\r
+ uA.f = a;\r
+ uiA = uA.ui;\r
+ signA = signF64UI( uiA );\r
+ expA = expF64UI( uiA );\r
+ sigA = fracF64UI( uiA );\r
+ uB.f = b;\r
+ uiB = uB.ui;\r
+ signB = signF64UI( uiB );\r
+ expB = expF64UI( uiB );\r
+ sigB = fracF64UI( uiB );\r
+ signZ = signA ^ signB;\r
+ if ( expA == 0x7FF ) {\r
+ if ( sigA || ( ( expB == 0x7FF ) && sigB ) ) goto propagateNaN;\r
+ magBits = expB | sigB;\r
+ goto infArg;\r
+ }\r
+ if ( expB == 0x7FF ) {\r
+ if ( sigB ) goto propagateNaN;\r
+ magBits = expA | sigA;\r
+ goto infArg;\r
+ }\r
+ if ( ! expA ) {\r
+ if ( ! sigA ) goto zero;\r
+ normExpSig = softfloat_normSubnormalF64Sig( sigA );\r
+ expA = normExpSig.exp;\r
+ sigA = normExpSig.sig;\r
+ }\r
+ if ( ! expB ) {\r
+ if ( ! sigB ) goto zero;\r
+ normExpSig = softfloat_normSubnormalF64Sig( sigB );\r
+ expB = normExpSig.exp;\r
+ sigB = normExpSig.sig;\r
+ }\r
+ expZ = expA + expB - 0x3FF;\r
+ sigA = ( sigA | UINT64_C( 0x0010000000000000 ) )<<10;\r
+ sigB = ( sigB | UINT64_C( 0x0010000000000000 ) )<<11;\r
+ sigZ128 = softfloat_mul64To128( sigA, sigB );\r
+ sigZ = sigZ128.v64 | ( sigZ128.v0 != 0 );\r
+ if ( sigZ < UINT64_C( 0x4000000000000000 ) ) {\r
+ --expZ;\r
+ sigZ <<= 1;\r
+ }\r
+ return softfloat_roundPackToF64( signZ, expZ, sigZ );\r
+ propagateNaN:\r
+ uiZ = softfloat_propagateNaNF64UI( uiA, uiB );\r
+ goto uiZ;\r
+ infArg:\r
+ if ( ! magBits ) {\r
+ softfloat_raiseFlags( softfloat_flag_invalid );\r
+ uiZ = defaultNaNF64UI;\r
+ } else {\r
+ uiZ = packToF64UI( signZ, 0x7FF, 0 );\r
+ }\r
+ goto uiZ;\r
+ zero:\r
+ uiZ = packToF64UI( signZ, 0, 0 );\r
+ uiZ:\r
+ uZ.ui = uiZ;\r
+ return uZ.f;\r
+\r
+}\r
+\r
--- /dev/null
+\r
+#include <stdint.h>\r
+#include "platform.h"\r
+#include "internals.h"\r
+#include "softfloat.h"\r
+\r
+float64_t f64_mulAdd( float64_t a, float64_t b, float64_t c )\r
+{\r
+ union ui64_f64 uA;\r
+ uint_fast64_t uiA;\r
+ union ui64_f64 uB;\r
+ uint_fast64_t uiB;\r
+ union ui64_f64 uC;\r
+ uint_fast64_t uiC;\r
+\r
+ uA.f = a;\r
+ uiA = uA.ui;\r
+ uB.f = b;\r
+ uiB = uB.ui;\r
+ uC.f = c;\r
+ uiC = uC.ui;\r
+ return softfloat_mulAddF64( 0, uiA, uiB, uiC );\r
+\r
+}\r
+\r
--- /dev/null
+
+#include <stdbool.h>
+#include <stdint.h>
+#include "platform.h"
+#include "primitives.h"
+#include "internals.h"
+#include "specialize.h"
+#include "softfloat.h"
+
+float64_t f64_rem( float64_t a, float64_t b )
+{
+ union ui64_f64 uA;
+ uint_fast64_t uiA;
+ bool signA;
+ int_fast16_t expA;
+ uint_fast64_t sigA;
+ union ui64_f64 uB;
+ uint_fast64_t uiB;
+ bool signB;
+ int_fast16_t expB;
+ uint_fast64_t sigB;
+ struct exp16_sig64 normExpSig;
+ int_fast16_t expDiff;
+ uint_fast64_t q, alternateSigA;
+ uint64_t sigMean;
+ bool signZ;
+ uint_fast64_t uiZ;
+ union ui64_f64 uZ;
+
+ uA.f = a;
+ uiA = uA.ui;
+ signA = signF64UI( uiA );
+ expA = expF64UI( uiA );
+ sigA = fracF64UI( uiA );
+ uB.f = b;
+ uiB = uB.ui;
+ signB = signF64UI( uiB );
+ expB = expF64UI( uiB );
+ sigB = fracF64UI( uiB );
+ if ( expA == 0x7FF ) {
+ if ( sigA || ( ( expB == 0x7FF ) && sigB ) ) goto propagateNaN;
+ goto invalid;
+ }
+ if ( expB == 0x7FF ) {
+ if ( sigB ) goto propagateNaN;
+ return a;
+ }
+ if ( ! expB ) {
+ if ( ! sigB ) goto invalid;
+ normExpSig = softfloat_normSubnormalF64Sig( sigB );
+ expB = normExpSig.exp;
+ sigB = normExpSig.sig;
+ }
+ if ( ! expA ) {
+ if ( ! sigA ) return a;
+ normExpSig = softfloat_normSubnormalF64Sig( sigA );
+ expA = normExpSig.exp;
+ sigA = normExpSig.sig;
+ }
+ expDiff = expA - expB;
+ sigA = ( sigA | UINT64_C( 0x0010000000000000 ) )<<11;
+ sigB = ( sigB | UINT64_C( 0x0010000000000000 ) )<<11;
+ if ( expDiff < 0 ) {
+ if ( expDiff < -1 ) return a;
+ sigA >>= 1;
+ }
+ q = ( sigB <= sigA );
+ if ( q ) sigA -= sigB;
+ expDiff -= 64;
+ while ( 0 < expDiff ) {
+ q = softfloat_estimateDiv128To64( sigA, 0, sigB );
+ q = ( 2 < q ) ? q - 2 : 0;
+ sigA = - ( ( sigB>>2 ) * q );
+ expDiff -= 62;
+ }
+ expDiff += 64;
+ if ( 0 < expDiff ) {
+ q = softfloat_estimateDiv128To64( sigA, 0, sigB );
+ q = ( 2 < q ) ? q - 2 : 0;
+ q >>= 64 - expDiff;
+ sigB >>= 2;
+ sigA = ( ( sigA>>1 )<<( expDiff - 1 ) ) - sigB * q;
+ } else {
+ sigA >>= 2;
+ sigB >>= 2;
+ }
+ do {
+ alternateSigA = sigA;
+ ++q;
+ sigA -= sigB;
+ } while ( sigA < UINT64_C( 0x8000000000000000 ) );
+ sigMean = sigA + alternateSigA;
+ if (
+ ( UINT64_C( 0x8000000000000000 ) <= sigMean )
+ || ( ! sigMean && ( q & 1 ) )
+ ) {
+ sigA = alternateSigA;
+ }
+ signZ = ( UINT64_C( 0x8000000000000000 ) <= sigA );
+ if ( signZ ) sigA = - sigA;
+ return softfloat_normRoundPackToF64( signA ^ signZ, expB, sigA );
+ propagateNaN:
+ uiZ = softfloat_propagateNaNF64UI( uiA, uiB );
+ goto uiZ;
+ invalid:
+ softfloat_raiseFlags( softfloat_flag_invalid );
+ uiZ = defaultNaNF64UI;
+ uiZ:
+ uZ.ui = uiZ;
+ return uZ.f;
+
+}
+
--- /dev/null
+
+#include <stdbool.h>
+#include <stdint.h>
+#include "platform.h"
+#include "internals.h"
+#include "specialize.h"
+#include "softfloat.h"
+
+float64_t f64_roundToInt( float64_t a, int_fast8_t roundingMode, bool exact )
+{
+ union ui64_f64 uA;
+ uint_fast64_t uiA;
+ int_fast16_t expA;
+ uint_fast64_t uiZ;
+ bool signA;
+ uint_fast64_t lastBitMask, roundBitsMask;
+ union ui64_f64 uZ;
+
+ uA.f = a;
+ uiA = uA.ui;
+ expA = expF64UI( uiA );
+ if ( 0x433 <= expA ) {
+ if ( ( expA == 0x7FF ) && fracF64UI( uiA ) ) {
+ uiZ = softfloat_propagateNaNF64UI( uiA, 0 );
+ goto uiZ;
+ }
+ return a;
+ }
+ if ( expA <= 0x3FE ) {
+ if ( ! ( uiA & UINT64_C( 0x7FFFFFFFFFFFFFFF ) ) ) return a;
+ if ( exact ) softfloat_exceptionFlags |= softfloat_flag_inexact;
+ signA = signF64UI( uiA );
+ switch ( roundingMode ) {
+ case softfloat_round_nearest_even:
+ if ( ( expA == 0x3FE ) && fracF64UI( uiA ) ) {
+ uiZ = packToF64UI( signA, 0x3FF, 0 );
+ goto uiZ;
+ }
+ break;
+ case softfloat_round_min:
+ uiZ = signA ? UINT64_C( 0xBFF0000000000000 ) : 0;
+ goto uiZ;
+ case softfloat_round_max:
+ uiZ =
+ signA ? UINT64_C( 0x8000000000000000 )
+ : UINT64_C( 0x3FF0000000000000 );
+ goto uiZ;
+ case softfloat_round_nearest_maxMag:
+ if ( expA == 0x3FE ) {
+ uiZ = packToF64UI( signA, 0x3FF, 0 );
+ goto uiZ;
+ }
+ break;
+ }
+ uiZ = packToF64UI( signA, 0, 0 );
+ goto uiZ;
+ }
+ lastBitMask = (uint_fast64_t) 1<<( 0x433 - expA );
+ roundBitsMask = lastBitMask - 1;
+ uiZ = uiA;
+ if ( roundingMode == softfloat_round_nearest_maxMag ) {
+ uiZ += lastBitMask>>1;
+ } else if ( roundingMode == softfloat_round_nearest_even ) {
+ uiZ += lastBitMask>>1;
+ if ( ! ( uiZ & roundBitsMask ) ) uiZ &= ~ lastBitMask;
+ } else if ( roundingMode != softfloat_round_minMag ) {
+ if ( signF64UI( uiZ ) ^ ( roundingMode == softfloat_round_max ) ) {
+ uiZ += roundBitsMask;
+ }
+ }
+ uiZ &= ~ roundBitsMask;
+ if ( exact && ( uiZ != uiA ) ) {
+ softfloat_exceptionFlags |= softfloat_flag_inexact;
+ }
+ uiZ:
+ uZ.ui = uiZ;
+ return uZ.f;
+
+}
+
--- /dev/null
+\r
+#include <stdbool.h>\r
+#include <stdint.h>\r
+#include "platform.h"\r
+#include "primitives.h"\r
+#include "internals.h"\r
+#include "specialize.h"\r
+#include "softfloat.h"\r
+\r
+float64_t f64_sqrt( float64_t a )\r
+{\r
+ union ui64_f64 uA;\r
+ uint_fast64_t uiA;\r
+ bool signA;\r
+ int_fast16_t expA;\r
+ uint_fast64_t sigA, uiZ;\r
+ struct exp16_sig64 normExpSig;\r
+ int_fast16_t expZ;\r
+ uint_fast32_t sigZ32;\r
+ uint_fast64_t sigZ;\r
+ struct uint128 term, rem;\r
+ union ui64_f64 uZ;\r
+\r
+ uA.f = a;\r
+ uiA = uA.ui;\r
+ signA = signF64UI( uiA );\r
+ expA = expF64UI( uiA );\r
+ sigA = fracF64UI( uiA );\r
+ if ( expA == 0x7FF ) {\r
+ if ( sigA ) {\r
+ uiZ = softfloat_propagateNaNF64UI( uiA, 0 );\r
+ goto uiZ;\r
+ }\r
+ if ( ! signA ) return a;\r
+ goto invalid;\r
+ }\r
+ if ( signA ) {\r
+ if ( ! ( expA | sigA ) ) return a;\r
+ goto invalid;\r
+ }\r
+ if ( ! expA ) {\r
+ if ( ! sigA ) return a;\r
+ normExpSig = softfloat_normSubnormalF64Sig( sigA );\r
+ expA = normExpSig.exp;\r
+ sigA = normExpSig.sig;\r
+ }\r
+ expZ = ( ( expA - 0x3FF )>>1 ) + 0x3FE;\r
+ sigA |= UINT64_C( 0x0010000000000000 );\r
+ sigZ32 = softfloat_estimateSqrt32( expA, sigA>>21 );\r
+ sigA <<= 9 - ( expA & 1 );\r
+ sigZ =\r
+ softfloat_estimateDiv128To64( sigA, 0, (uint_fast64_t) sigZ32<<32 )\r
+ + ( (uint_fast64_t) sigZ32<<30 );\r
+ if ( ( sigZ & 0x1FF ) <= 5 ) {\r
+ term = softfloat_mul64To128( sigZ, sigZ );\r
+ rem = softfloat_sub128( sigA, 0, term.v64, term.v0 );\r
+ while ( UINT64_C( 0x8000000000000000 ) <= rem.v64 ) {\r
+ --sigZ;\r
+ rem =\r
+ softfloat_add128(\r
+ rem.v64, rem.v0, sigZ>>63, (uint64_t) ( sigZ<<1 ) );\r
+ }\r
+ sigZ |= ( ( rem.v64 | rem.v0 ) != 0 );\r
+ }\r
+ return softfloat_roundPackToF64( 0, expZ, sigZ );\r
+ invalid:\r
+ softfloat_raiseFlags( softfloat_flag_invalid );\r
+ uiZ = defaultNaNF64UI;\r
+ uiZ:\r
+ uZ.ui = uiZ;\r
+ return uZ.f;\r
+\r
+}\r
+\r
--- /dev/null
+
+#include <stdbool.h>
+#include <stdint.h>
+#include "platform.h"
+#include "internals.h"
+#include "softfloat.h"
+
+float64_t f64_sub( float64_t a, float64_t b )
+{
+ union ui64_f64 uA;
+ uint_fast64_t uiA;
+ bool signA;
+ union ui64_f64 uB;
+ uint_fast64_t uiB;
+ bool signB;
+ float64_t ( *magsRoutine )( uint_fast64_t, uint_fast64_t, bool );
+
+ uA.f = a;
+ uiA = uA.ui;
+ signA = signF64UI( uiA );
+ uB.f = b;
+ uiB = uB.ui;
+ signB = signF64UI( uiB );
+ magsRoutine =
+ ( signA == signB ) ? softfloat_subMagsF64 : softfloat_addMagsF64;
+ return magsRoutine( uiA, uiB, signA );
+
+}
+
--- /dev/null
+\r
+#include <stdbool.h>\r
+#include <stdint.h>\r
+#include "platform.h"\r
+#include "primitives.h"\r
+#include "internals.h"\r
+#include "specialize.h"\r
+#include "softfloat.h"\r
+\r
+float32_t f64_to_f32( float64_t a )\r
+{\r
+ union ui64_f64 uA;\r
+ uint_fast64_t uiA;\r
+ bool sign;\r
+ int_fast16_t exp;\r
+ uint_fast64_t sig;\r
+ uint_fast32_t uiZ, sig32;\r
+ union ui32_f32 uZ;\r
+\r
+ uA.f = a;\r
+ uiA = uA.ui;\r
+ sign = signF64UI( uiA );\r
+ exp = expF64UI( uiA );\r
+ sig = fracF64UI( uiA );\r
+ if ( exp == 0x7FF ) {\r
+ uiZ =\r
+ sig ? softfloat_commonNaNToF32UI(\r
+ softfloat_f64UIToCommonNaN( uiA ) )\r
+ : packToF32UI( sign, 0xFF, 0 );\r
+ goto uiZ;\r
+ }\r
+ sig32 = softfloat_shortShift64RightJam( sig, 22 );\r
+ if ( ! ( exp | sig32 ) ) {\r
+ uiZ = packToF32UI( sign, 0, 0 );\r
+ goto uiZ;\r
+ }\r
+ return softfloat_roundPackToF32( sign, exp - 0x381, sig32 | 0x40000000 );\r
+ uiZ:\r
+ uZ.ui = uiZ;\r
+ return uZ.f;\r
+\r
+}\r
+\r
--- /dev/null
+\r
+#include <stdbool.h>\r
+#include <stdint.h>\r
+#include "platform.h"\r
+#include "primitives.h"\r
+#include "internals.h"\r
+#include "softfloat.h"\r
+\r
+int_fast32_t f64_to_i32( float64_t a, int_fast8_t roundingMode, bool exact )\r
+{\r
+ union ui64_f64 uA;\r
+ uint_fast64_t uiA;\r
+ bool sign;\r
+ int_fast16_t exp;\r
+ uint_fast64_t sig;\r
+ int_fast16_t shiftCount;\r
+\r
+ uA.f = a;\r
+ uiA = uA.ui;\r
+ sign = signF64UI( uiA );\r
+ exp = expF64UI( uiA );\r
+ sig = fracF64UI( uiA );\r
+ if ( ( exp == 0x7FF ) && sig ) sign = 0;\r
+ if ( exp ) sig |= UINT64_C( 0x0010000000000000 );\r
+ shiftCount = 0x42C - exp;\r
+ if ( 0 < shiftCount ) sig = softfloat_shift64RightJam( sig, shiftCount );\r
+ return softfloat_roundPackToI32( sign, sig, roundingMode, exact );\r
+\r
+}\r
+\r
--- /dev/null
+\r
+#include <stdbool.h>\r
+#include <stdint.h>\r
+#include "platform.h"\r
+#include "internals.h"\r
+#include "softfloat.h"\r
+\r
+int_fast32_t f64_to_i32_r_minMag( float64_t a, bool exact )\r
+{\r
+ union ui64_f64 uA;\r
+ uint_fast64_t uiA;\r
+ int_fast16_t exp;\r
+ uint_fast64_t sig;\r
+ bool sign;\r
+ int_fast16_t shiftCount;\r
+ uint_fast32_t absZ;\r
+ union { uint32_t ui; int32_t i; } uZ;\r
+ int_fast32_t z;\r
+\r
+ uA.f = a;\r
+ uiA = uA.ui;\r
+ exp = expF64UI( uiA );\r
+ sig = fracF64UI( uiA );\r
+ if ( exp < 0x3FF ) {\r
+ if ( exact && ( exp | sig ) ) {\r
+ softfloat_exceptionFlags |= softfloat_flag_inexact;\r
+ }\r
+ return 0;\r
+ }\r
+ sign = signF64UI( uiA );\r
+ if ( 0x41E < exp ) {\r
+ if ( ( exp == 0x7FF ) && sig ) sign = 0;\r
+ goto invalid;\r
+ }\r
+ sig |= UINT64_C( 0x0010000000000000 );\r
+ shiftCount = 0x433 - exp;\r
+ absZ = sig>>shiftCount;\r
+ uZ.ui = sign ? - absZ : absZ;\r
+ z = uZ.i;\r
+ if ( ( z < 0 ) != sign ) goto invalid;\r
+ if ( exact && ( (uint_fast64_t) absZ<<shiftCount != sig ) ) {\r
+ softfloat_exceptionFlags |= softfloat_flag_inexact;\r
+ }\r
+ return z;\r
+ invalid:\r
+ softfloat_raiseFlags( softfloat_flag_invalid );\r
+ return sign ? -0x7FFFFFFF - 1 : 0x7FFFFFFF;\r
+\r
+}\r
+\r
--- /dev/null
+\r
+#include <stdbool.h>\r
+#include <stdint.h>\r
+#include "platform.h"\r
+#include "primitives.h"\r
+#include "internals.h"\r
+#include "softfloat.h"\r
+\r
+int_fast64_t f64_to_i64( float64_t a, int_fast8_t roundingMode, bool exact )\r
+{\r
+ union ui64_f64 uA;\r
+ uint_fast64_t uiA;\r
+ bool sign;\r
+ int_fast16_t exp;\r
+ uint_fast64_t sig;\r
+ int_fast16_t shiftCount;\r
+ struct uint64_extra sigExtra;\r
+\r
+ uA.f = a;\r
+ uiA = uA.ui;\r
+ sign = signF64UI( uiA );\r
+ exp = expF64UI( uiA );\r
+ sig = fracF64UI( uiA );\r
+ if ( exp ) sig |= UINT64_C( 0x0010000000000000 );\r
+ shiftCount = 0x433 - exp;\r
+ if ( shiftCount <= 0 ) {\r
+ if ( 0x43E < exp ) {\r
+ softfloat_raiseFlags( softfloat_flag_invalid );\r
+ return\r
+ ! sign\r
+ || ( ( exp == 0x7FF )\r
+ && ( sig != UINT64_C( 0x0010000000000000 ) ) )\r
+ ? INT64_C( 0x7FFFFFFFFFFFFFFF )\r
+ : - INT64_C( 0x7FFFFFFFFFFFFFFF ) - 1;\r
+ }\r
+ sigExtra.v = sig<<( - shiftCount );\r
+ sigExtra.extra = 0;\r
+ } else {\r
+ sigExtra = softfloat_shift64ExtraRightJam( sig, 0, shiftCount );\r
+ }\r
+ return\r
+ softfloat_roundPackToI64(\r
+ sign, sigExtra.v, sigExtra.extra, roundingMode, exact );\r
+\r
+}\r
+\r
--- /dev/null
+\r
+#include <stdbool.h>\r
+#include <stdint.h>\r
+#include "platform.h"\r
+#include "internals.h"\r
+#include "softfloat.h"\r
+\r
+int_fast64_t f64_to_i64_r_minMag( float64_t a, bool exact )\r
+{\r
+ union ui64_f64 uA;\r
+ uint_fast64_t uiA;\r
+ bool sign;\r
+ int_fast16_t exp;\r
+ uint_fast64_t sig;\r
+ int_fast16_t shiftCount;\r
+ int_fast64_t absZ;\r
+\r
+ uA.f = a;\r
+ uiA = uA.ui;\r
+ sign = signF64UI( uiA );\r
+ exp = expF64UI( uiA );\r
+ sig = fracF64UI( uiA );\r
+ shiftCount = exp - 0x433;\r
+ if ( 0 <= shiftCount ) {\r
+ if ( 0x43E <= exp ) {\r
+ if ( uiA != packToF64UI( 1, 0x43E, 0 ) ) {\r
+ softfloat_raiseFlags( softfloat_flag_invalid );\r
+ if ( ! sign || ( ( exp == 0x7FF ) && sig ) ) {\r
+ return INT64_C( 0x7FFFFFFFFFFFFFFF );\r
+ }\r
+ }\r
+ return - INT64_C( 0x7FFFFFFFFFFFFFFF ) - 1;\r
+ }\r
+ sig |= UINT64_C( 0x0010000000000000 );\r
+ absZ = sig<<shiftCount;\r
+ } else {\r
+ if ( exp < 0x3FF ) {\r
+ if ( exact && ( exp | sig ) ) {\r
+ softfloat_exceptionFlags |= softfloat_flag_inexact;\r
+ }\r
+ return 0;\r
+ }\r
+ sig |= UINT64_C( 0x0010000000000000 );\r
+ absZ = sig>>( - shiftCount );\r
+ if ( exact && (uint64_t) ( sig<<( shiftCount & 63 ) ) ) {\r
+ softfloat_exceptionFlags |= softfloat_flag_inexact;\r
+ }\r
+ }\r
+ return sign ? - absZ : absZ;\r
+\r
+}\r
+\r
--- /dev/null
+\r
+#include <stdbool.h>\r
+#include <stdint.h>\r
+#include "platform.h"\r
+#include "primitives.h"\r
+#include "internals.h"\r
+#include "softfloat.h"\r
+\r
+uint_fast32_t f64_to_ui32( float64_t a, int_fast8_t roundingMode, bool exact )\r
+{\r
+ union ui64_f64 uA;\r
+ uint_fast64_t uiA;\r
+ bool sign;\r
+ int_fast16_t exp;\r
+ uint_fast64_t sig;\r
+ int_fast16_t shiftCount;\r
+\r
+ uA.f = a;\r
+ uiA = uA.ui;\r
+ sign = signF64UI( uiA );\r
+ exp = expF64UI( uiA );\r
+ sig = fracF64UI( uiA );\r
+ if ( exp ) sig |= UINT64_C( 0x0010000000000000 );\r
+ shiftCount = 0x42C - exp;\r
+ if ( 0 < shiftCount ) sig = softfloat_shift64RightJam( sig, shiftCount );\r
+ return softfloat_roundPackToUI32( sign, sig, roundingMode, exact );\r
+\r
+}\r
+\r
--- /dev/null
+\r
+#include <stdbool.h>\r
+#include <stdint.h>\r
+#include "platform.h"\r
+#include "internals.h"\r
+#include "softfloat.h"\r
+\r
+uint_fast32_t f64_to_ui32_r_minMag( float64_t a, bool exact )\r
+{\r
+ union ui64_f64 uA;\r
+ uint_fast64_t uiA;\r
+ int_fast16_t exp;\r
+ uint_fast64_t sig;\r
+ int_fast16_t shiftCount;\r
+ uint_fast32_t z;\r
+\r
+ uA.f = a;\r
+ uiA = uA.ui;\r
+ exp = expF64UI( uiA );\r
+ sig = fracF64UI( uiA );\r
+ if ( exp < 0x3FF ) {\r
+ if ( exact && ( exp | sig ) ) {\r
+ softfloat_exceptionFlags |= softfloat_flag_inexact;\r
+ }\r
+ return 0;\r
+ }\r
+ if ( signF64UI( uiA ) || ( 0x41E < exp ) ) {\r
+ softfloat_raiseFlags( softfloat_flag_invalid );\r
+ return 0xFFFFFFFF;\r
+ }\r
+ sig |= UINT64_C( 0x0010000000000000 );\r
+ shiftCount = 0x433 - exp;\r
+ z = sig>>shiftCount;\r
+ if ( exact && ( (uint_fast64_t) z<<shiftCount != sig ) ) {\r
+ softfloat_exceptionFlags |= softfloat_flag_inexact;\r
+ }\r
+ return z;\r
+\r
+}\r
+\r
--- /dev/null
+\r
+#include <stdbool.h>\r
+#include <stdint.h>\r
+#include "platform.h"\r
+#include "primitives.h"\r
+#include "internals.h"\r
+#include "softfloat.h"\r
+\r
+uint_fast64_t f64_to_ui64( float64_t a, int_fast8_t roundingMode, bool exact )\r
+{\r
+ union ui64_f64 uA;\r
+ uint_fast64_t uiA;\r
+ bool sign;\r
+ int_fast16_t exp;\r
+ uint_fast64_t sig;\r
+ int_fast16_t shiftCount;\r
+ struct uint64_extra sigExtra;\r
+\r
+ uA.f = a;\r
+ uiA = uA.ui;\r
+ sign = signF64UI( uiA );\r
+ exp = expF64UI( uiA );\r
+ sig = fracF64UI( uiA );\r
+ if ( exp ) sig |= UINT64_C( 0x0010000000000000 );\r
+ shiftCount = 0x433 - exp;\r
+ if ( shiftCount <= 0 ) {\r
+ if ( 0x43E < exp ) {\r
+ softfloat_raiseFlags( softfloat_flag_invalid );\r
+ return UINT64_C( 0xFFFFFFFFFFFFFFFF );\r
+ }\r
+ sigExtra.v = sig<<( - shiftCount );\r
+ sigExtra.extra = 0;\r
+ } else {\r
+ sigExtra = softfloat_shift64ExtraRightJam( sig, 0, shiftCount );\r
+ }\r
+ return\r
+ softfloat_roundPackToUI64(\r
+ sign, sigExtra.v, sigExtra.extra, roundingMode, exact );\r
+\r
+}\r
+\r
--- /dev/null
+\r
+#include <stdbool.h>\r
+#include <stdint.h>\r
+#include "platform.h"\r
+#include "internals.h"\r
+#include "softfloat.h"\r
+\r
+uint_fast64_t f64_to_ui64_r_minMag( float64_t a, bool exact )\r
+{\r
+ union ui64_f64 uA;\r
+ uint_fast64_t uiA;\r
+ int_fast16_t exp;\r
+ uint_fast64_t sig;\r
+ int_fast16_t shiftCount;\r
+ uint_fast64_t z;\r
+\r
+ uA.f = a;\r
+ uiA = uA.ui;\r
+ exp = expF64UI( uiA );\r
+ sig = fracF64UI( uiA );\r
+ if ( exp < 0x3FF ) {\r
+ if ( exact && ( exp | sig ) ) {\r
+ softfloat_exceptionFlags |= softfloat_flag_inexact;\r
+ }\r
+ return 0;\r
+ }\r
+ if ( signF64UI( uiA ) ) goto invalid;\r
+ shiftCount = exp - 0x433;\r
+ if ( 0 <= shiftCount ) {\r
+ if ( 0x43E < exp ) goto invalid;\r
+ z = ( sig | UINT64_C( 0x0010000000000000 ) )<<shiftCount;\r
+ } else {\r
+ sig |= UINT64_C( 0x0010000000000000 );\r
+ z = sig>>( - shiftCount );\r
+ if ( exact && (uint64_t) ( sig<<( shiftCount & 63 ) ) ) {\r
+ softfloat_exceptionFlags |= softfloat_flag_inexact;\r
+ }\r
+ }\r
+ return z;\r
+ invalid:\r
+ softfloat_raiseFlags( softfloat_flag_invalid );\r
+ return UINT64_C( 0xFFFFFFFFFFFFFFFF );\r
+\r
+}\r
+\r
--- /dev/null
+\r
+#include <stdbool.h>\r
+#include <stdint.h>\r
+#include "platform.h"\r
+#include "internals.h"\r
+#include "softfloat.h"\r
+\r
+float32_t i32_to_f32( int_fast32_t a )\r
+{\r
+ bool sign;\r
+ union ui32_f32 uZ;\r
+\r
+ sign = ( a < 0 );\r
+ if ( ! ( a & 0x7FFFFFFF ) ) {\r
+ uZ.ui = sign ? packToF32UI( 1, 0x9E, 0 ) : 0;\r
+ return uZ.f;\r
+ }\r
+ return softfloat_normRoundPackToF32( sign, 0x9C, sign ? - a : a );\r
+\r
+}\r
+\r
--- /dev/null
+\r
+#include <stdbool.h>\r
+#include <stdint.h>\r
+#include "platform.h"\r
+#include "primitives.h"\r
+#include "internals.h"\r
+#include "softfloat.h"\r
+\r
+float64_t i32_to_f64( int_fast32_t a )\r
+{\r
+ uint_fast64_t uiZ;\r
+ bool sign;\r
+ uint_fast32_t absA;\r
+ int shiftCount;\r
+ union ui64_f64 uZ;\r
+\r
+ if ( ! a ) {\r
+ uiZ = 0;\r
+ } else {\r
+ sign = ( a < 0 );\r
+ absA = sign ? - a : a;\r
+ shiftCount = softfloat_countLeadingZeros32( absA ) + 21;\r
+ uiZ =\r
+ packToF64UI(\r
+ sign, 0x432 - shiftCount, (uint_fast64_t) absA<<shiftCount );\r
+ }\r
+ uZ.ui = uiZ;\r
+ return uZ.f;\r
+\r
+}\r
+\r
--- /dev/null
+\r
+#include <stdbool.h>\r
+#include <stdint.h>\r
+#include "platform.h"\r
+#include "primitives.h"\r
+#include "internals.h"\r
+#include "softfloat.h"\r
+\r
+float32_t i64_to_f32( int_fast64_t a )\r
+{\r
+ bool sign;\r
+ uint_fast64_t absA;\r
+ int shiftCount;\r
+ union ui32_f32 u;\r
+ uint_fast32_t sig;\r
+\r
+ sign = ( a < 0 );\r
+ absA = sign ? - (uint_fast64_t) a : a;\r
+ shiftCount = softfloat_countLeadingZeros64( absA ) - 40;\r
+ if ( 0 <= shiftCount ) {\r
+ u.ui =\r
+ a ? packToF32UI(\r
+ sign, 0x95 - shiftCount, (uint_fast32_t) absA<<shiftCount )\r
+ : 0;\r
+ return u.f;\r
+ } else {\r
+ shiftCount += 7;\r
+ sig =\r
+ ( shiftCount < 0 )\r
+ ? softfloat_shortShift64RightJam( absA, - shiftCount )\r
+ : (uint_fast32_t) absA<<shiftCount;\r
+ return softfloat_roundPackToF32( sign, 0x9C - shiftCount, sig );\r
+ }\r
+\r
+}\r
+\r
--- /dev/null
+\r
+#include <stdbool.h>\r
+#include <stdint.h>\r
+#include "platform.h"\r
+#include "internals.h"\r
+#include "softfloat.h"\r
+\r
+float64_t i64_to_f64( int_fast64_t a )\r
+{\r
+ bool sign;\r
+ union ui64_f64 uZ;\r
+\r
+ sign = ( a < 0 );\r
+ if ( ! ( a & UINT64_C( 0x7FFFFFFFFFFFFFFF ) ) ) {\r
+ uZ.ui = sign ? packToF64UI( 1, 0x43E, 0 ) : 0;\r
+ return uZ.f;\r
+ }\r
+ return softfloat_normRoundPackToF64( sign, 0x43C, sign ? - a : a );\r
+\r
+}\r
+\r
--- /dev/null
+\r
+/*** UPDATE COMMENTS. ***/\r
+\r
+#include "softfloat_types.h"\r
+\r
+union ui32_f32 { uint32_t ui; float32_t f; };\r
+union ui64_f64 { uint64_t ui; float64_t f; };\r
+#ifdef LITTLEENDIAN\r
+union ui128_f128 { uint64_t ui0, ui64; float128_t f; };\r
+#else\r
+union ui128_f128 { uint64_t ui64, ui0; float128_t f; };\r
+#endif\r
+\r
+enum {\r
+ softfloat_mulAdd_subC = 1,\r
+ softfloat_mulAdd_subProd = 2\r
+};\r
+\r
+uint_fast32_t\r
+ softfloat_roundPackToUI32( bool, uint_fast64_t, int_fast8_t, bool );\r
+uint_fast64_t\r
+ softfloat_roundPackToUI64(\r
+ bool, uint_fast64_t, uint_fast64_t, int_fast8_t, bool );\r
+/*----------------------------------------------------------------------------\r
+| Takes a 64-bit fixed-point value `absZ' with binary point between bits 6\r
+| and 7, and returns the properly rounded 32-bit integer corresponding to the\r
+| input. If `zSign' is 1, the input is negated before being converted to an\r
+| integer. Bit 63 of `absZ' must be zero. Ordinarily, the fixed-point input\r
+| is simply rounded to an integer, with the inexact exception raised if the\r
+| input cannot be represented exactly as an integer. However, if the fixed-\r
+| point input is too large, the invalid exception is raised and the largest\r
+| positive or negative integer is returned.\r
+*----------------------------------------------------------------------------*/\r
+int_fast32_t\r
+ softfloat_roundPackToI32( bool, uint_fast64_t, int_fast8_t, bool );\r
+/*----------------------------------------------------------------------------\r
+| Takes the 128-bit fixed-point value formed by concatenating `absZ0' and\r
+| `absZ1', with binary point between bits 63 and 64 (between the input words),\r
+| and returns the properly rounded 64-bit integer corresponding to the input.\r
+| If `zSign' is 1, the input is negated before being converted to an integer.\r
+| Ordinarily, the fixed-point input is simply rounded to an integer, with\r
+| the inexact exception raised if the input cannot be represented exactly as\r
+| an integer. However, if the fixed-point input is too large, the invalid\r
+| exception is raised and the largest positive or negative integer is\r
+| returned.\r
+*----------------------------------------------------------------------------*/\r
+int_fast64_t\r
+ softfloat_roundPackToI64(\r
+ bool, uint_fast64_t, uint_fast64_t, int_fast8_t, bool );\r
+\r
+/*----------------------------------------------------------------------------\r
+| Returns 1 if the single-precision floating-point value `a' is a NaN;\r
+| otherwise, returns 0.\r
+*----------------------------------------------------------------------------*/\r
+#define isNaNF32UI( ui ) (0xFF000000<(uint32_t)((uint_fast32_t)(ui)<<1))\r
+/*----------------------------------------------------------------------------\r
+| Returns the sign bit of the single-precision floating-point value `a'.\r
+*----------------------------------------------------------------------------*/\r
+#define signF32UI( a ) ((bool)((uint32_t)(a)>>31))\r
+/*----------------------------------------------------------------------------\r
+| Returns the exponent bits of the single-precision floating-point value `a'.\r
+*----------------------------------------------------------------------------*/\r
+#define expF32UI( a ) ((int_fast16_t)((a)>>23)&0xFF)\r
+/*----------------------------------------------------------------------------\r
+| Returns the fraction bits of the single-precision floating-point value `a'.\r
+*----------------------------------------------------------------------------*/\r
+#define fracF32UI( a ) ((a)&0x007FFFFF)\r
+/*----------------------------------------------------------------------------\r
+| Packs the sign `zSign', exponent `zExp', and significand `zSig' into a\r
+| single-precision floating-point value, returning the result. After being\r
+| shifted into the proper positions, the three fields are simply added\r
+| together to form the result. This means that any integer portion of `zSig'\r
+| will be added into the exponent. Since a properly normalized significand\r
+| will have an integer portion equal to 1, the `zExp' input should be 1 less\r
+| than the desired result exponent whenever `zSig' is a complete, normalized\r
+| significand.\r
+*----------------------------------------------------------------------------*/\r
+#define packToF32UI( sign, exp, sig ) (((uint32_t)(sign)<<31)+((uint32_t)(exp)<<23)+(sig))\r
+\r
+/*----------------------------------------------------------------------------\r
+| Normalizes the subnormal single-precision floating-point value represented\r
+| by the denormalized significand `aSig'. The normalized exponent and\r
+| significand are stored at the locations pointed to by `zExpPtr' and\r
+| `zSigPtr', respectively.\r
+*----------------------------------------------------------------------------*/\r
+struct exp16_sig32 { int_fast16_t exp; uint_fast32_t sig; }\r
+ softfloat_normSubnormalF32Sig( uint_fast32_t );\r
+\r
+/*----------------------------------------------------------------------------\r
+| Takes an abstract floating-point value having sign `zSign', exponent `zExp',\r
+| and significand `zSig', and returns the proper single-precision floating-\r
+| point value corresponding to the abstract input. Ordinarily, the abstract\r
+| value is simply rounded and packed into the single-precision format, with\r
+| the inexact exception raised if the abstract input cannot be represented\r
+| exactly. However, if the abstract value is too large, the overflow and\r
+| inexact exceptions are raised and an infinity or maximal finite value is\r
+| returned. If the abstract value is too small, the input value is rounded to\r
+| a subnormal number, and the underflow and inexact exceptions are raised if\r
+| the abstract input cannot be represented exactly as a subnormal single-\r
+| precision floating-point number.\r
+| The input significand `zSig' has its binary point between bits 30\r
+| and 29, which is 7 bits to the left of the usual location. This shifted\r
+| significand must be normalized or smaller. If `zSig' is not normalized,\r
+| `zExp' must be 0; in that case, the result returned is a subnormal number,\r
+| and it must not require rounding. In the usual case that `zSig' is\r
+| normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.\r
+| The handling of underflow and overflow follows the IEC/IEEE Standard for\r
+| Binary Floating-Point Arithmetic.\r
+*----------------------------------------------------------------------------*/\r
+float32_t softfloat_roundPackToF32( bool, int_fast16_t, uint_fast32_t );\r
+/*----------------------------------------------------------------------------\r
+| Takes an abstract floating-point value having sign `zSign', exponent `zExp',\r
+| and significand `zSig', and returns the proper single-precision floating-\r
+| point value corresponding to the abstract input. This routine is just like\r
+| `roundAndPackFloat32' except that `zSig' does not have to be normalized.\r
+| Bit 31 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''\r
+| floating-point exponent.\r
+*----------------------------------------------------------------------------*/\r
+float32_t softfloat_normRoundPackToF32( bool, int_fast16_t, uint_fast32_t );\r
+\r
+/*----------------------------------------------------------------------------\r
+| Returns the result of adding the absolute values of the single-precision\r
+| floating-point values `a' and `b'. If `zSign' is 1, the sum is negated\r
+| before being returned. `zSign' is ignored if the result is a NaN.\r
+| The addition is performed according to the IEC/IEEE Standard for Binary\r
+| Floating-Point Arithmetic.\r
+*----------------------------------------------------------------------------*/\r
+float32_t softfloat_addMagsF32( uint_fast32_t, uint_fast32_t, bool );\r
+/*----------------------------------------------------------------------------\r
+| Returns the result of subtracting the absolute values of the single-\r
+| precision floating-point values `a' and `b'. If `zSign' is 1, the\r
+| difference is negated before being returned. `zSign' is ignored if the\r
+| result is a NaN. The subtraction is performed according to the IEC/IEEE\r
+| Standard for Binary Floating-Point Arithmetic.\r
+*----------------------------------------------------------------------------*/\r
+float32_t softfloat_subMagsF32( uint_fast32_t, uint_fast32_t, bool );\r
+/*----------------------------------------------------------------------------\r
+*----------------------------------------------------------------------------*/\r
+float32_t\r
+ softfloat_mulAddF32( int, uint_fast32_t, uint_fast32_t, uint_fast32_t );\r
+\r
+/*----------------------------------------------------------------------------\r
+| Returns 1 if the double-precision floating-point value `a' is a NaN;\r
+| otherwise, returns 0.\r
+*----------------------------------------------------------------------------*/\r
+#define isNaNF64UI( ui ) (UINT64_C(0xFFE0000000000000)<(uint64_t)((uint_fast64_t)(ui)<<1))\r
+/*----------------------------------------------------------------------------\r
+| Returns the sign bit of the double-precision floating-point value `a'.\r
+*----------------------------------------------------------------------------*/\r
+#define signF64UI( a ) ((bool)((uint64_t)(a)>>63))\r
+/*----------------------------------------------------------------------------\r
+| Returns the exponent bits of the double-precision floating-point value `a'.\r
+*----------------------------------------------------------------------------*/\r
+#define expF64UI( a ) ((int_fast16_t)((a)>>52)&0x7FF)\r
+/*----------------------------------------------------------------------------\r
+| Returns the fraction bits of the double-precision floating-point value `a'.\r
+*----------------------------------------------------------------------------*/\r
+#define fracF64UI( a ) ((a)&UINT64_C(0x000FFFFFFFFFFFFF))\r
+/*----------------------------------------------------------------------------\r
+| Packs the sign `zSign', exponent `zExp', and significand `zSig' into a\r
+| double-precision floating-point value, returning the result. After being\r
+| shifted into the proper positions, the three fields are simply added\r
+| together to form the result. This means that any integer portion of `zSig'\r
+| will be added into the exponent. Since a properly normalized significand\r
+| will have an integer portion equal to 1, the `zExp' input should be 1 less\r
+| than the desired result exponent whenever `zSig' is a complete, normalized\r
+| significand.\r
+*----------------------------------------------------------------------------*/\r
+#define packToF64UI( sign, exp, sig ) (((uint64_t)(sign)<<63)+((uint64_t)(exp)<<52)+(sig))\r
+\r
+/*----------------------------------------------------------------------------\r
+| Normalizes the subnormal double-precision floating-point value represented\r
+| by the denormalized significand `aSig'. The normalized exponent and\r
+| significand are stored at the locations pointed to by `zExpPtr' and\r
+| `zSigPtr', respectively.\r
+*----------------------------------------------------------------------------*/\r
+struct exp16_sig64 { int_fast16_t exp; uint_fast64_t sig; }\r
+ softfloat_normSubnormalF64Sig( uint_fast64_t );\r
+\r
+/*----------------------------------------------------------------------------\r
+| Takes an abstract floating-point value having sign `zSign', exponent `zExp',\r
+| and significand `zSig', and returns the proper double-precision floating-\r
+| point value corresponding to the abstract input. Ordinarily, the abstract\r
+| value is simply rounded and packed into the double-precision format, with\r
+| the inexact exception raised if the abstract input cannot be represented\r
+| exactly. However, if the abstract value is too large, the overflow and\r
+| inexact exceptions are raised and an infinity or maximal finite value is\r
+| returned. If the abstract value is too small, the input value is rounded\r
+| to a subnormal number, and the underflow and inexact exceptions are raised\r
+| if the abstract input cannot be represented exactly as a subnormal double-\r
+| precision floating-point number.\r
+| The input significand `zSig' has its binary point between bits 62\r
+| and 61, which is 10 bits to the left of the usual location. This shifted\r
+| significand must be normalized or smaller. If `zSig' is not normalized,\r
+| `zExp' must be 0; in that case, the result returned is a subnormal number,\r
+| and it must not require rounding. In the usual case that `zSig' is\r
+| normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.\r
+| The handling of underflow and overflow follows the IEC/IEEE Standard for\r
+| Binary Floating-Point Arithmetic.\r
+*----------------------------------------------------------------------------*/\r
+float64_t softfloat_roundPackToF64( bool, int_fast16_t, uint_fast64_t );\r
+/*----------------------------------------------------------------------------\r
+| Takes an abstract floating-point value having sign `zSign', exponent `zExp',\r
+| and significand `zSig', and returns the proper double-precision floating-\r
+| point value corresponding to the abstract input. This routine is just like\r
+| `roundAndPackFloat64' except that `zSig' does not have to be normalized.\r
+| Bit 63 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''\r
+| floating-point exponent.\r
+*----------------------------------------------------------------------------*/\r
+float64_t softfloat_normRoundPackToF64( bool, int_fast16_t, uint_fast64_t );\r
+\r
+/*----------------------------------------------------------------------------\r
+| Returns the result of adding the absolute values of the double-precision\r
+| floating-point values `a' and `b'. If `zSign' is 1, the sum is negated\r
+| before being returned. `zSign' is ignored if the result is a NaN.\r
+| The addition is performed according to the IEC/IEEE Standard for Binary\r
+| Floating-Point Arithmetic.\r
+*----------------------------------------------------------------------------*/\r
+float64_t softfloat_addMagsF64( uint_fast64_t, uint_fast64_t, bool );\r
+/*----------------------------------------------------------------------------\r
+| Returns the result of subtracting the absolute values of the double-\r
+| precision floating-point values `a' and `b'. If `zSign' is 1, the\r
+| difference is negated before being returned. `zSign' is ignored if the\r
+| result is a NaN. The subtraction is performed according to the IEC/IEEE\r
+| Standard for Binary Floating-Point Arithmetic.\r
+*----------------------------------------------------------------------------*/\r
+float64_t softfloat_subMagsF64( uint_fast64_t, uint_fast64_t, bool );\r
+/*----------------------------------------------------------------------------\r
+*----------------------------------------------------------------------------*/\r
+float64_t\r
+ softfloat_mulAddF64( int, uint_fast64_t, uint_fast64_t, uint_fast64_t );\r
+\r
--- /dev/null
+
+/*============================================================================
+
+This C source fragment is part of the SoftFloat IEC/IEEE Floating-point
+Arithmetic Package, Release 3.
+
+*** UPDATE
+
+Written by John R. Hauser. This work was made possible in part by the
+International Computer Science Institute, located at Suite 600, 1947 Center
+Street, Berkeley, California 94704. Funding was partially provided by the
+National Science Foundation under grant MIP-9311980. The original version
+of this code was written as part of a project to build a fixed-point vector
+processor in collaboration with the University of California at Berkeley,
+overseen by Profs. Nelson Morgan and John Wawrzynek. More information
+is available through the Web page `http://www.cs.berkeley.edu/~jhauser/
+arithmetic/SoftFloat.html'.
+
+THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort has
+been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT TIMES
+RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO PERSONS
+AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ALL LOSSES,
+COSTS, OR OTHER PROBLEMS THEY INCUR DUE TO THE SOFTWARE, AND WHO FURTHERMORE
+EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER SCIENCE
+INSTITUTE (possibly via similar legal notice) AGAINST ALL LOSSES, COSTS, OR
+OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE SOFTWARE.
+
+Derivative works are acceptable, even for commercial purposes, so long as
+(1) the source code for the derivative work includes prominent notice that
+the work is derivative, and (2) the source code includes prominent notice with
+these four paragraphs for those parts of this code that are retained.
+
+=============================================================================*/
+
+#include <stdbool.h>
+#include <stdint.h>
+
+/*** CHANGE TO USE `fast' INTEGER TYPES? ***/
+/*** ADD 80-BIT FUNCTIONS? ***/
+
+#ifdef LITTLEENDIAN
+struct uintx80 { uint64_t v0; uint16_t v64; };
+struct uint128 { uint64_t v0, v64; };
+struct uint192 { uint64_t v0, v64, v128; };
+struct uint256 { uint64_t v0, v64, v128, v192; };
+#else
+struct uintx80 { uint16_t v64; uint64_t v0; };
+struct uint128 { uint64_t v64, v0; };
+struct uint192 { uint64_t v128, v64, v0; };
+struct uint256 { uint64_t v256, v128, v64, v0; };
+#endif
+
+struct uint64_extra { uint64_t v, extra; };
+struct uint128_extra { uint64_t v64; uint64_t v0; uint64_t extra; };
+
+
+/*** SHIFT COUNTS CANNOT BE ZERO. MUST CHECK BEFORE CALLING! ***/
+
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the 128-bit value formed by concatenating `a0' and `a1'
+| is equal to the 128-bit value formed by concatenating `b0' and `b1'.
+| Otherwise, returns 0.
+*----------------------------------------------------------------------------*/
+#if defined INLINE_LEVEL && ( 1 <= INLINE_LEVEL )
+INLINE bool
+ softfloat_eq128( uint64_t a64, uint64_t a0, uint64_t b64, uint64_t b0 )
+ { return ( a64 == b64 ) && ( a0 == b0 ); }
+#else
+bool softfloat_eq128( uint64_t, uint64_t, uint64_t, uint64_t );
+#endif
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is less
+| than or equal to the 128-bit value formed by concatenating `b0' and `b1'.
+| Otherwise, returns 0.
+*----------------------------------------------------------------------------*/
+#if defined INLINE_LEVEL && ( 1 <= INLINE_LEVEL )
+INLINE bool
+ softfloat_le128( uint64_t a64, uint64_t a0, uint64_t b64, uint64_t b0 )
+ { return ( a64 < b64 ) || ( ( a64 == b64 ) && ( a0 <= b0 ) ); }
+#else
+bool softfloat_le128( uint64_t, uint64_t, uint64_t, uint64_t );
+#endif
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is less
+| than the 128-bit value formed by concatenating `b0' and `b1'. Otherwise,
+| returns 0.
+*----------------------------------------------------------------------------*/
+#if defined INLINE_LEVEL && ( 1 <= INLINE_LEVEL )
+INLINE bool
+ softfloat_lt128( uint64_t a64, uint64_t a0, uint64_t b64, uint64_t b0 )
+ { return ( a64 < b64 ) || ( ( a64 == b64 ) && ( a0 < b0 ) ); }
+#else
+bool softfloat_lt128( uint64_t, uint64_t, uint64_t, uint64_t );
+#endif
+
+/*----------------------------------------------------------------------------
+| Shifts the 128-bit value formed by concatenating `a0' and `a1' left by the
+| number of bits given in `count'. Any bits shifted off are lost. The value
+| of `count' must be less than 64. The result is broken into two 64-bit
+| pieces which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
+*----------------------------------------------------------------------------*/
+#if defined INLINE_LEVEL && ( 2 <= INLINE_LEVEL )
+INLINE struct uint128
+ softfloat_shortShift128Left( uint64_t a64, uint64_t a0, unsigned int count )
+{
+ struct uint128 z;
+ z.v64 = a64<<count | a0>>( ( - count ) & 63 );
+ z.v0 = a0<<count;
+ return z;
+}
+#else
+struct uint128 softfloat_shortShift128Left( uint64_t, uint64_t, unsigned int );
+#endif
+
+/*----------------------------------------------------------------------------
+| Shifts the 192-bit value formed by concatenating `a0', `a1', and `a2' left
+| by the number of bits given in `count'. Any bits shifted off are lost.
+| The value of `count' must be less than 64. The result is broken into three
+| 64-bit pieces which are stored at the locations pointed to by `z0Ptr',
+| `z1Ptr', and `z2Ptr'.
+*----------------------------------------------------------------------------*/
+#if defined INLINE_LEVEL && ( 3 <= INLINE_LEVEL )
+INLINE struct uint192
+ softfloat_shortShift192Left(
+ uint64_t a128, uint64_t a64, uint64_t a0, unsigned int count )
+{
+ unsigned int negCount = - count;
+ struct uint192 z;
+ z.v128 = a128<<count | a64>>( negCount & 63 );
+ z.v64 = a64<<count | a0>>( negCount & 63 );
+ z.v0 = a0<<count;
+ return z;
+}
+#else
+struct uint192
+ softfloat_shortShift192Left( uint64_t, uint64_t, uint64_t, unsigned int );
+#endif
+
+/*----------------------------------------------------------------------------
+| Shifts `a' right by the number of bits given in `count'. If any nonzero
+| bits are shifted off, they are ``jammed'' into the least significant bit of
+| the result by setting the least significant bit to 1. The value of `count'
+| can be arbitrarily large; in particular, if `count' is greater than 32, the
+| result will be either 0 or 1, depending on whether `a' is zero or nonzero.
+| The result is stored in the location pointed to by `zPtr'.
+*----------------------------------------------------------------------------*/
+#if defined INLINE_LEVEL && ( 2 <= INLINE_LEVEL )
+INLINE uint32_t softfloat_shift32RightJam( uint32_t a, unsigned int count )
+{
+ return
+ ( count < 32 )
+ ? a>>count | ( (uint32_t) ( a<<( ( - count ) & 31 ) ) != 0 )
+ : ( a != 0 );
+}
+#else
+uint32_t softfloat_shift32RightJam( uint32_t, unsigned int );
+#endif
+
+/*----------------------------------------------------------------------------
+| Shift count is less than 32.
+*----------------------------------------------------------------------------*/
+#if defined INLINE
+INLINE uint32_t softfloat_shortShift32Right1Jam( uint32_t a )
+ { return a>>1 | ( a & 1 ); }
+#else
+uint32_t softfloat_shortShift32Right1Jam( uint32_t );
+#endif
+
+/*----------------------------------------------------------------------------
+| Shifts `a' right by the number of bits given in `count'. If any nonzero
+| bits are shifted off, they are ``jammed'' into the least significant bit of
+| the result by setting the least significant bit to 1. The value of `count'
+| can be arbitrarily large; in particular, if `count' is greater than 64, the
+| result will be either 0 or 1, depending on whether `a' is zero or nonzero.
+| The result is stored in the location pointed to by `zPtr'.
+*----------------------------------------------------------------------------*/
+#if defined INLINE_LEVEL && ( 3 <= INLINE_LEVEL )
+INLINE uint64_t softfloat_shift64RightJam( uint64_t a, unsigned int count )
+{
+ return
+ ( count < 64 )
+ ? a>>count | ( (uint64_t) ( a<<( ( - count ) & 63 ) ) != 0 )
+ : ( a != 0 );
+}
+#else
+uint64_t softfloat_shift64RightJam( uint64_t, unsigned int );
+#endif
+
+/*----------------------------------------------------------------------------
+| Shift count is less than 64.
+*----------------------------------------------------------------------------*/
+#if defined INLINE_LEVEL && ( 2 <= INLINE_LEVEL )
+INLINE uint64_t
+ softfloat_shortShift64RightJam( uint64_t a, unsigned int count )
+ { return a>>count | ( ( a & ( ( (uint64_t) 1<<count ) - 1 ) ) != 0 ); }
+#else
+uint64_t softfloat_shortShift64RightJam( uint64_t, unsigned int );
+#endif
+
+/*----------------------------------------------------------------------------
+| Shifts the 128-bit value formed by concatenating `a0' and `a1' right by 64
+| _plus_ the number of bits given in `count'. The shifted result is at most
+| 64 nonzero bits; this is stored at the location pointed to by `z0Ptr'. The
+| bits shifted off form a second 64-bit result as follows: The _last_ bit
+| shifted off is the most-significant bit of the extra result, and the other
+| 63 bits of the extra result are all zero if and only if _all_but_the_last_
+| bits shifted off were all zero. This extra result is stored in the location
+| pointed to by `z1Ptr'. The value of `count' can be arbitrarily large.
+| (This routine makes more sense if `a0' and `a1' are considered to form
+| a fixed-point value with binary point between `a0' and `a1'. This fixed-
+| point value is shifted right by the number of bits given in `count', and
+| the integer part of the result is returned at the location pointed to by
+| `z0Ptr'. The fractional part of the result may be slightly corrupted as
+| described above, and is returned at the location pointed to by `z1Ptr'.)
+*----------------------------------------------------------------------------*/
+#if defined INLINE_LEVEL && ( 3 <= INLINE_LEVEL )
+INLINE struct uint64_extra
+ softfloat_shift64ExtraRightJam(
+ uint64_t a, uint64_t extra, unsigned int count )
+{
+ struct uint64_extra z;
+ if ( count < 64 ) {
+ z.v = a>>count;
+ z.extra = a<<( ( - count ) & 63 );
+ } else {
+ z.v = 0;
+ z.extra = ( count == 64 ) ? a : ( a != 0 );
+ }
+ z.extra |= ( extra != 0 );
+ return z;
+}
+#else
+struct uint64_extra
+ softfloat_shift64ExtraRightJam( uint64_t, uint64_t, unsigned int );
+#endif
+
+/*----------------------------------------------------------------------------
+| Shift count is less than 64.
+*----------------------------------------------------------------------------*/
+#if defined INLINE_LEVEL && ( 2 <= INLINE_LEVEL )
+INLINE struct uint64_extra
+ softfloat_shortShift64ExtraRightJam(
+ uint64_t a, uint64_t extra, unsigned int count )
+{
+ struct uint64_extra z;
+ z.v = a>>count;
+ z.extra = a<<( ( - count ) & 63 ) | ( extra != 0 );
+ return z;
+}
+#else
+struct uint64_extra
+ softfloat_shortShift64ExtraRightJam( uint64_t, uint64_t, unsigned int );
+#endif
+
+/*----------------------------------------------------------------------------
+| Shifts the 128-bit value formed by concatenating `a0' and `a1' right by the
+| number of bits given in `count'. Any bits shifted off are lost. The value
+| of `count' can be arbitrarily large; in particular, if `count' is greater
+| than 128, the result will be 0. The result is broken into two 64-bit pieces
+| which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
+*----------------------------------------------------------------------------*/
+/*----------------------------------------------------------------------------
+| Shift count is less than 64.
+*----------------------------------------------------------------------------*/
+#if defined INLINE_LEVEL && ( 2 <= INLINE_LEVEL )
+INLINE struct uint128
+ softfloat_shortShift128Right( uint64_t a64, uint64_t a0, unsigned int count )
+{
+ struct uint128 z;
+ z.v64 = a64>>count;
+ z.v0 = a64<<( ( - count ) & 63 ) | a0>>count;
+ return z;
+}
+#else
+struct uint128
+ softfloat_shortShift128Right( uint64_t, uint64_t, unsigned int );
+#endif
+
+/*----------------------------------------------------------------------------
+| Shifts the 128-bit value formed by concatenating `a0' and `a1' right by the
+| number of bits given in `count'. If any nonzero bits are shifted off, they
+| are ``jammed'' into the least significant bit of the result by setting the
+| least significant bit to 1. The value of `count' can be arbitrarily large;
+| in particular, if `count' is greater than 128, the result will be either
+| 0 or 1, depending on whether the concatenation of `a0' and `a1' is zero or
+| nonzero. The result is broken into two 64-bit pieces which are stored at
+| the locations pointed to by `z0Ptr' and `z1Ptr'.
+*----------------------------------------------------------------------------*/
+#if defined INLINE_LEVEL && ( 4 <= INLINE_LEVEL )
+INLINE struct uint128
+ softfloat_shift128RightJam( uint64_t a64, uint64_t a0, unsigned int count )
+{
+ unsigned int negCount;
+ struct uint128 z;
+ if ( count < 64 ) {
+ negCount = - count;
+ z.v64 = a64>>( count & 63 );
+ z.v0 =
+ a64<<( negCount & 63 ) | a0>>count
+ | ( (uint64_t) ( a0<<( negCount & 63 ) ) != 0 );
+ } else {
+ z.v64 = 0;
+ z.v0 =
+ ( count < 128 )
+ ? a64>>( count & 63 )
+ | ( ( ( a64 & ( ( (uint64_t) 1<<( count & 63 ) ) - 1 ) )
+ | a0 )
+ != 0 )
+ : ( ( a64 | a0 ) != 0 );
+ }
+ return z;
+}
+#else
+struct uint128
+ softfloat_shift128RightJam( uint64_t, uint64_t, unsigned int );
+#endif
+
+/*----------------------------------------------------------------------------
+| Shifts the 192-bit value formed by concatenating `a0', `a1', and `a2' right
+| by 64 _plus_ the number of bits given in `count'. The shifted result is
+| at most 128 nonzero bits; these are broken into two 64-bit pieces which are
+| stored at the locations pointed to by `z0Ptr' and `z1Ptr'. The bits shifted
+| off form a third 64-bit result as follows: The _last_ bit shifted off is
+| the most-significant bit of the extra result, and the other 63 bits of the
+| extra result are all zero if and only if _all_but_the_last_ bits shifted off
+| were all zero. This extra result is stored in the location pointed to by
+| `z2Ptr'. The value of `count' can be arbitrarily large.
+| (This routine makes more sense if `a0', `a1', and `a2' are considered
+| to form a fixed-point value with binary point between `a1' and `a2'. This
+| fixed-point value is shifted right by the number of bits given in `count',
+| and the integer part of the result is returned at the locations pointed to
+| by `z0Ptr' and `z1Ptr'. The fractional part of the result may be slightly
+| corrupted as described above, and is returned at the location pointed to by
+| `z2Ptr'.)
+*----------------------------------------------------------------------------*/
+#if defined INLINE_LEVEL && ( 5 <= INLINE_LEVEL )
+INLINE struct uint128_extra
+ softfloat_shift128ExtraRightJam(
+ uint64_t a64, uint64_t a0, uint64_t extra, unsigned int count )
+{
+ unsigned int negCount = - count;
+ struct uint128_extra z;
+ if ( count < 64 ) {
+ z.v64 = a64>>count;
+ z.v0 = a64<<( negCount & 63 ) | a0>>count;
+ z.extra = a0<<( negCount & 63 );
+ } else {
+ z.v64 = 0;
+ if ( count == 64 ) {
+ z.v0 = a64;
+ z.extra = a0;
+ } else {
+ extra |= a0;
+ if ( count < 128 ) {
+ z.v0 = a64>>( count & 63 );
+ z.extra = a64<<( negCount & 63 );
+ } else {
+ z.v0 = 0;
+ z.extra = ( count == 128 ) ? a64 : ( a64 != 0 );
+ }
+ }
+ }
+ z.extra |= ( extra != 0 );
+ return z;
+}
+#else
+struct uint128_extra
+ softfloat_shift128ExtraRightJam( uint64_t, uint64_t, uint64_t, unsigned int );
+#endif
+
+/*----------------------------------------------------------------------------
+| Shift count is less than 64.
+*----------------------------------------------------------------------------*/
+#if defined INLINE_LEVEL && ( 3 <= INLINE_LEVEL )
+INLINE struct uint128_extra
+ softfloat_shortShift128ExtraRightJam(
+ uint64_t a64, uint64_t a0, uint64_t extra, unsigned int count )
+{
+ unsigned int negCount = - count;
+ struct uint128_extra z;
+ z.v64 = a64>>count;
+ z.v0 = a64<<( negCount & 63 ) | a0>>count;
+ z.extra = a0<<( negCount & 63 ) | ( extra != 0 );
+ return z;
+}
+#else
+struct uint128_extra
+ softfloat_shortShift128ExtraRightJam(
+ uint64_t, uint64_t, uint64_t, unsigned int );
+#endif
+
+extern const uint8_t softfloat_countLeadingZeros8[ 256 ];
+
+/*----------------------------------------------------------------------------
+| Returns the number of leading 0 bits before the most-significant 1 bit of
+| `a'. If `a' is zero, 32 is returned.
+*----------------------------------------------------------------------------*/
+#if defined INLINE_LEVEL && ( 2 <= INLINE_LEVEL )
+INLINE int softfloat_countLeadingZeros32( uint32_t a )
+{
+ int count = 0;
+ if ( a < 0x10000 ) {
+ count = 16;
+ a <<= 16;
+ }
+ if ( a < 0x1000000 ) {
+ count += 8;
+ a <<= 8;
+ }
+ count += softfloat_countLeadingZeros8[ a>>24 ];
+ return count;
+}
+#else
+int softfloat_countLeadingZeros32( uint32_t );
+#endif
+
+/*----------------------------------------------------------------------------
+| Returns the number of leading 0 bits before the most-significant 1 bit of
+| `a'. If `a' is zero, 64 is returned.
+*----------------------------------------------------------------------------*/
+#if defined INLINE_LEVEL && ( 4 <= INLINE_LEVEL )
+INLINE int softfloat_countLeadingZeros64( uint64_t a )
+{
+ int count = 32;
+ uint32_t a32 = a;
+ if ( UINT64_C( 0x100000000 ) <= a ) {
+ count = 0;
+ a32 = a>>32;
+ }
+ /*------------------------------------------------------------------------
+ | From here, result is current count + count leading zeros of `a32'.
+ *------------------------------------------------------------------------*/
+ if ( a32 < 0x10000 ) {
+ count += 16;
+ a32 <<= 16;
+ }
+ if ( a32 < 0x1000000 ) {
+ count += 8;
+ a32 <<= 8;
+ }
+ count += softfloat_countLeadingZeros8[ a32>>24 ];
+ return count;
+}
+#else
+int softfloat_countLeadingZeros64( uint64_t );
+#endif
+
+/*----------------------------------------------------------------------------
+| Adds the 128-bit value formed by concatenating `a0' and `a1' to the 128-bit
+| value formed by concatenating `b0' and `b1'. Addition is modulo 2^128, so
+| any carry out is lost. The result is broken into two 64-bit pieces which
+| are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
+*----------------------------------------------------------------------------*/
+#if defined INLINE_LEVEL && ( 2 <= INLINE_LEVEL )
+INLINE struct uint128
+ softfloat_add128( uint64_t a64, uint64_t a0, uint64_t b64, uint64_t b0 )
+{
+ struct uint128 z;
+ z.v0 = a0 + b0;
+ z.v64 = a64 + b64;
+ z.v64 += ( z.v0 < a0 );
+ return z;
+}
+#else
+struct uint128 softfloat_add128( uint64_t, uint64_t, uint64_t, uint64_t );
+#endif
+
+/*----------------------------------------------------------------------------
+| Adds the 192-bit value formed by concatenating `a0', `a1', and `a2' to the
+| 192-bit value formed by concatenating `b0', `b1', and `b2'. Addition is
+| modulo 2^192, so any carry out is lost. The result is broken into three
+| 64-bit pieces which are stored at the locations pointed to by `z0Ptr',
+| `z1Ptr', and `z2Ptr'.
+*----------------------------------------------------------------------------*/
+#if defined INLINE_LEVEL && ( 3 <= INLINE_LEVEL )
+INLINE struct uint192
+ softfloat_add192(
+ uint64_t a128,
+ uint64_t a64,
+ uint64_t a0,
+ uint64_t b128,
+ uint64_t b64,
+ uint64_t b0
+ )
+{
+ struct uint192 z;
+ unsigned int carry64, carry128;
+ z.v0 = a0 + b0;
+ carry64 = ( z.v0 < a0 );
+ z.v64 = a64 + b64;
+ carry128 = ( z.v64 < a64 );
+ z.v128 = a128 + b128;
+ z.v64 += carry64;
+ carry128 += ( z.v64 < carry64 );
+ z.v128 += carry128;
+ return z;
+}
+#else
+struct uint192
+ softfloat_add192(
+ uint64_t, uint64_t, uint64_t, uint64_t, uint64_t, uint64_t );
+#endif
+
+/*----------------------------------------------------------------------------
+| Subtracts the 128-bit value formed by concatenating `b0' and `b1' from the
+| 128-bit value formed by concatenating `a0' and `a1'. Subtraction is modulo
+| 2^128, so any borrow out (carry out) is lost. The result is broken into two
+| 64-bit pieces which are stored at the locations pointed to by `z0Ptr' and
+| `z1Ptr'.
+*----------------------------------------------------------------------------*/
+#if defined INLINE_LEVEL && ( 2 <= INLINE_LEVEL )
+INLINE struct uint128
+ softfloat_sub128( uint64_t a64, uint64_t a0, uint64_t b64, uint64_t b0 )
+{
+ struct uint128 z;
+ z.v0 = a0 - b0;
+ z.v64 = a64 - b64;
+ z.v64 -= ( a0 < b0 );
+ return z;
+}
+#else
+struct uint128 softfloat_sub128( uint64_t, uint64_t, uint64_t, uint64_t );
+#endif
+
+/*----------------------------------------------------------------------------
+| Subtracts the 192-bit value formed by concatenating `b0', `b1', and `b2'
+| from the 192-bit value formed by concatenating `a0', `a1', and `a2'.
+| Subtraction is modulo 2^192, so any borrow out (carry out) is lost. The
+| result is broken into three 64-bit pieces which are stored at the locations
+| pointed to by `z0Ptr', `z1Ptr', and `z2Ptr'.
+*----------------------------------------------------------------------------*/
+#if defined INLINE_LEVEL && ( 3 <= INLINE_LEVEL )
+INLINE struct uint192
+ softfloat_sub192(
+ uint64_t a128,
+ uint64_t a64,
+ uint64_t a0,
+ uint64_t b128,
+ uint64_t b64,
+ uint64_t b0
+ )
+{
+ struct uint192 z;
+ unsigned int borrow64, borrow128;
+ z.v0 = a0 - b0;
+ borrow64 = ( a0 < b0 );
+ z.v64 = a64 - b64;
+ borrow128 = ( a64 < b64 );
+ z.v128 = a128 - b128;
+ borrow128 += ( z.v64 < borrow64 );
+ z.v64 -= borrow64;
+ z.v128 -= borrow128;
+ return z;
+}
+#else
+struct uint192
+ softfloat_sub192(
+ uint64_t, uint64_t, uint64_t, uint64_t, uint64_t, uint64_t );
+#endif
+
+/*----------------------------------------------------------------------------
+| Multiplies `a' by `b' to obtain a 128-bit product. The product is broken
+| into two 64-bit pieces which are stored at the locations pointed to by
+| `z0Ptr' and `z1Ptr'.
+*----------------------------------------------------------------------------*/
+#if defined INLINE_LEVEL && ( 4 <= INLINE_LEVEL )
+INLINE struct uint128 softfloat_mul64To128( uint64_t a, uint64_t b )
+{
+ uint32_t a32 = a>>32;
+ uint32_t a0 = a;
+ uint32_t b32 = b>>32;
+ uint32_t b0 = b;
+ struct uint128 z;
+ uint64_t mid1, mid2, mid;
+ z.v0 = (uint64_t) a0 * b0;
+ mid1 = (uint64_t) a32 * b0;
+ mid2 = (uint64_t) a0 * b32;
+ z.v64 = (uint64_t) a32 * b32;
+ mid = mid1 + mid2;
+ z.v64 += ( (uint64_t) ( mid < mid1 ) )<<32 | mid>>32;
+ mid <<= 32;
+ z.v0 += mid;
+ z.v64 += ( z.v0 < mid );
+ return z;
+}
+#else
+struct uint128 softfloat_mul64To128( uint64_t, uint64_t );
+#endif
+
+/*----------------------------------------------------------------------------
+| Multiplies the 128-bit value formed by concatenating `a0' and `a1' by
+| `b' to obtain a 192-bit product. The product is broken into three 64-bit
+| pieces which are stored at the locations pointed to by `z0Ptr', `z1Ptr', and
+| `z2Ptr'.
+*----------------------------------------------------------------------------*/
+struct uint192 softfloat_mul128By64To192( uint64_t, uint64_t, uint64_t );
+/*----------------------------------------------------------------------------
+| Multiplies the 128-bit value formed by concatenating `a0' and `a1' to the
+| 128-bit value formed by concatenating `b0' and `b1' to obtain a 256-bit
+| product. The product is broken into four 64-bit pieces which are stored at
+| the locations pointed to by `z0Ptr', `z1Ptr', `z2Ptr', and `z3Ptr'.
+*----------------------------------------------------------------------------*/
+struct uint256 softfloat_mul128To256( uint64_t, uint64_t, uint64_t, uint64_t );
+
+/*----------------------------------------------------------------------------
+| Returns an approximation to the 64-bit integer quotient obtained by dividing
+| `b' into the 128-bit value formed by concatenating `a0' and `a1'. The
+| divisor `b' must be at least 2^63. If q is the exact quotient truncated
+| toward zero, the approximation returned lies between q and q + 2 inclusive.
+| If the exact quotient q is larger than 64 bits, the maximum positive 64-bit
+| unsigned integer is returned.
+*----------------------------------------------------------------------------*/
+uint64_t softfloat_estimateDiv128To64( uint64_t, uint64_t, uint64_t );
+
+/*----------------------------------------------------------------------------
+| Returns an approximation to the square root of the 32-bit significand given
+| by `a'. Considered as an integer, `a' must be at least 2^31. If bit 0 of
+| `aExp' (the least significant bit) is 1, the integer returned approximates
+| 2^31*sqrt(`a'/2^31), where `a' is considered an integer. If bit 0 of `aExp'
+| is 0, the integer returned approximates 2^31*sqrt(`a'/2^30). In either
+| case, the approximation returned lies strictly within +/-2 of the exact
+| value.
+*----------------------------------------------------------------------------*/
+uint32_t softfloat_estimateSqrt32( unsigned int, uint32_t );
+
--- /dev/null
+\r
+#include <stdint.h>\r
+#include "platform.h"\r
+#include "primitives.h"\r
+\r
+struct uint128\r
+ softfloat_add128( uint64_t a64, uint64_t a0, uint64_t b64, uint64_t b0 )\r
+{\r
+ struct uint128 z;\r
+\r
+ z.v0 = a0 + b0;\r
+ z.v64 = a64 + b64;\r
+ z.v64 += ( z.v0 < a0 );\r
+ return z;\r
+\r
+}\r
+\r
--- /dev/null
+\r
+#include <stdint.h>\r
+#include "platform.h"\r
+#include "primitives.h"\r
+\r
+struct uint192\r
+ softfloat_add192(\r
+ uint64_t a128,\r
+ uint64_t a64,\r
+ uint64_t a0,\r
+ uint64_t b128,\r
+ uint64_t b64,\r
+ uint64_t b0\r
+ )\r
+{\r
+ struct uint192 z;\r
+ unsigned int carry64, carry128;\r
+\r
+ z.v0 = a0 + b0;\r
+ carry64 = ( z.v0 < a0 );\r
+ z.v64 = a64 + b64;\r
+ carry128 = ( z.v64 < a64 );\r
+ z.v128 = a128 + b128;\r
+ z.v64 += carry64;\r
+ carry128 += ( z.v64 < carry64 );\r
+ z.v128 += carry128;\r
+ return z;\r
+\r
+}\r
+\r
--- /dev/null
+\r
+#include <stdbool.h>\r
+#include <stdint.h>\r
+#include "platform.h"\r
+#include "primitives.h"\r
+#include "internals.h"\r
+#include "specialize.h"\r
+\r
+float32_t\r
+ softfloat_addMagsF32( uint_fast32_t uiA, uint_fast32_t uiB, bool signZ )\r
+{\r
+ int_fast16_t expA;\r
+ uint_fast32_t sigA;\r
+ int_fast16_t expB;\r
+ uint_fast32_t sigB;\r
+ int_fast16_t expDiff;\r
+ uint_fast32_t uiZ;\r
+ int_fast16_t expZ;\r
+ uint_fast32_t sigZ;\r
+ union ui32_f32 uZ;\r
+\r
+ expA = expF32UI( uiA );\r
+ sigA = fracF32UI( uiA );\r
+ expB = expF32UI( uiB );\r
+ sigB = fracF32UI( uiB );\r
+ expDiff = expA - expB;\r
+ sigA <<= 6;\r
+ sigB <<= 6;\r
+ if ( ! expDiff ) {\r
+ if ( expA == 0xFF ) {\r
+ if ( sigA | sigB ) goto propagateNaN;\r
+ uiZ = uiA;\r
+ goto uiZ;\r
+ }\r
+ if ( ! expA ) {\r
+ uiZ = packToF32UI( signZ, 0, ( uiA + uiB ) & 0x7FFFFFFF );\r
+ goto uiZ;\r
+ }\r
+ expZ = expA;\r
+ sigZ = 0x40000000 + sigA + sigB;\r
+ } else {\r
+ if ( expDiff < 0 ) {\r
+ if ( expB == 0xFF ) {\r
+ if ( sigB ) goto propagateNaN;\r
+ uiZ = packToF32UI( signZ, 0xFF, 0 );\r
+ goto uiZ;\r
+ }\r
+ expZ = expB;\r
+ sigA += expA ? 0x20000000 : sigA;\r
+ sigA = softfloat_shift32RightJam( sigA, - expDiff );\r
+ } else {\r
+ if ( expA == 0xFF ) {\r
+ if ( sigA ) goto propagateNaN;\r
+ uiZ = uiA;\r
+ goto uiZ;\r
+ }\r
+ expZ = expA;\r
+ sigB += expB ? 0x20000000 : sigB;\r
+ sigB = softfloat_shift32RightJam( sigB, expDiff );\r
+ }\r
+ sigZ = 0x20000000 + sigA + sigB;\r
+ if ( sigZ < 0x40000000 ) {\r
+ --expZ;\r
+ sigZ <<= 1;\r
+ }\r
+ }\r
+ return softfloat_roundPackToF32( signZ, expZ, sigZ );\r
+ propagateNaN:\r
+ uiZ = softfloat_propagateNaNF32UI( uiA, uiB );\r
+ uiZ:\r
+ uZ.ui = uiZ;\r
+ return uZ.f;\r
+\r
+}\r
+\r
--- /dev/null
+
+#include <stdbool.h>
+#include <stdint.h>
+#include "platform.h"
+#include "primitives.h"
+#include "internals.h"
+#include "specialize.h"
+
+float64_t
+ softfloat_addMagsF64( uint_fast64_t uiA, uint_fast64_t uiB, bool signZ )
+{
+ int_fast16_t expA;
+ uint_fast64_t sigA;
+ int_fast16_t expB;
+ uint_fast64_t sigB;
+ int_fast16_t expDiff;
+ uint_fast64_t uiZ;
+ int_fast16_t expZ;
+ uint_fast64_t sigZ;
+ union ui64_f64 uZ;
+
+ expA = expF64UI( uiA );
+ sigA = fracF64UI( uiA );
+ expB = expF64UI( uiB );
+ sigB = fracF64UI( uiB );
+ expDiff = expA - expB;
+ sigA <<= 9;
+ sigB <<= 9;
+ if ( ! expDiff ) {
+ if ( expA == 0x7FF ) {
+ if ( sigA | sigB ) goto propagateNaN;
+ uiZ = uiA;
+ goto uiZ;
+ }
+ if ( ! expA ) {
+ uiZ =
+ packToF64UI(
+ signZ, 0, ( uiA + uiB ) & UINT64_C( 0x7FFFFFFFFFFFFFFF ) );
+ goto uiZ;
+ }
+ expZ = expA;
+ sigZ = UINT64_C( 0x4000000000000000 ) + sigA + sigB;
+ } else {
+ if ( expDiff < 0 ) {
+ if ( expB == 0x7FF ) {
+ if ( sigB ) goto propagateNaN;
+ uiZ = packToF64UI( signZ, 0x7FF, 0 );
+ goto uiZ;
+ }
+ expZ = expB;
+ sigA += expA ? UINT64_C( 0x2000000000000000 ) : sigA;
+ sigA = softfloat_shift64RightJam( sigA, - expDiff );
+ } else {
+ if ( expA == 0x7FF ) {
+ if ( sigA ) goto propagateNaN;
+ uiZ = uiA;
+ goto uiZ;
+ }
+ expZ = expA;
+ sigB += expB ? UINT64_C( 0x2000000000000000 ) : sigB;
+ sigB = softfloat_shift64RightJam( sigB, expDiff );
+ }
+ sigZ = UINT64_C( 0x2000000000000000 ) + sigA + sigB;
+ if ( sigZ < UINT64_C( 0x4000000000000000 ) ) {
+ --expZ;
+ sigZ <<= 1;
+ }
+ }
+ return softfloat_roundPackToF64( signZ, expZ, sigZ );
+ propagateNaN:
+ uiZ = softfloat_propagateNaNF64UI( uiA, uiB );
+ uiZ:
+ uZ.ui = uiZ;
+ return uZ.f;
+
+}
+
--- /dev/null
+
+#include <stdint.h>
+#include "primitives.h"
+
+int softfloat_countLeadingZeros32( uint32_t a )
+{
+ int count;
+
+ count = 0;
+ if ( a < 0x10000 ) {
+ count = 16;
+ a <<= 16;
+ }
+ if ( a < 0x1000000 ) {
+ count += 8;
+ a <<= 8;
+ }
+ count += softfloat_countLeadingZeros8[ a>>24 ];
+ return count;
+
+}
+
--- /dev/null
+
+#include <stdint.h>
+#include "primitives.h"
+
+int softfloat_countLeadingZeros64( uint64_t a )
+{
+ int count;
+ uint32_t a32;
+
+ count = 32;
+ a32 = a;
+ if ( UINT64_C( 0x100000000 ) <= a ) {
+ count = 0;
+ a32 = a>>32;
+ }
+ /*------------------------------------------------------------------------
+ | From here, result is current count + count leading zeros of `a32'.
+ *------------------------------------------------------------------------*/
+ if ( a32 < 0x10000 ) {
+ count += 16;
+ a32 <<= 16;
+ }
+ if ( a32 < 0x1000000 ) {
+ count += 8;
+ a32 <<= 8;
+ }
+ count += softfloat_countLeadingZeros8[ a32>>24 ];
+ return count;
+
+}
+
--- /dev/null
+
+#include <stdint.h>
+#include "platform.h"
+#include "primitives.h"
+
+const uint8_t softfloat_countLeadingZeros8[ 256 ] = {
+ 8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4,
+ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+};
+
--- /dev/null
+
+#include <stdbool.h>
+#include <stdint.h>
+#include "platform.h"
+#include "primitives.h"
+
+bool softfloat_eq128( uint64_t a64, uint64_t a0, uint64_t b64, uint64_t b0 )
+{
+
+ return ( a64 == b64 ) && ( a0 == b0 );
+
+}
+
--- /dev/null
+
+#include <stdint.h>
+#include "platform.h"
+#include "primitives.h"
+
+uint64_t softfloat_estimateDiv128To64( uint64_t a64, uint64_t a0, uint64_t b )
+{
+ uint32_t b32;
+ uint64_t z;
+ struct uint128 term, rem;
+ uint64_t rem32;
+
+ if ( b <= a64 ) return UINT64_C( 0xFFFFFFFFFFFFFFFF );
+ b32 = b>>32;
+ z = ( (uint64_t) b32<<32 <= a64 ) ? UINT64_C( 0xFFFFFFFF00000000 )
+ : ( a64 / b32 )<<32;
+ term = softfloat_mul64To128( b, z );
+ rem = softfloat_sub128( a64, a0, term.v64, term.v0 );
+ while ( UINT64_C( 0x8000000000000000 ) <= rem.v64 ) {
+ z -= UINT64_C( 0x100000000 );
+ rem = softfloat_add128( rem.v64, rem.v0, b32, (uint64_t) ( b<<32 ) );
+ }
+ rem32 = ( rem.v64<<32 ) | ( rem.v0>>32 );
+ z |= ( (uint64_t) b32<<32 <= rem32 ) ? 0xFFFFFFFF : rem32 / b32;
+ return z;
+
+}
+
--- /dev/null
+
+#include <stdint.h>
+#include "platform.h"
+#include "primitives.h"
+
+uint32_t softfloat_estimateSqrt32( unsigned int expA, uint32_t a )
+{
+ static const uint16_t sqrtOddAdjustments[] = {
+ 0x0004, 0x0022, 0x005D, 0x00B1, 0x011D, 0x019F, 0x0236, 0x02E0,
+ 0x039C, 0x0468, 0x0545, 0x0631, 0x072B, 0x0832, 0x0946, 0x0A67
+ };
+ static const uint16_t sqrtEvenAdjustments[] = {
+ 0x0A2D, 0x08AF, 0x075A, 0x0629, 0x051A, 0x0429, 0x0356, 0x029E,
+ 0x0200, 0x0179, 0x0109, 0x00AF, 0x0068, 0x0034, 0x0012, 0x0002
+ };
+ int index;
+ uint32_t z;
+ union { uint32_t ui; int32_t i; } u32;
+
+ index = ( a>>27 ) & 15;
+ if ( expA & 1 ) {
+ z = 0x4000 + ( a>>17 ) - sqrtOddAdjustments[ index ];
+ z = ( ( a / z )<<14 ) + ( z<<15 );
+ a >>= 1;
+ } else {
+ z = 0x8000 + ( a>>17 ) - sqrtEvenAdjustments[ index ];
+ z = a / z + z;
+ z = ( 0x20000 <= z ) ? 0xFFFF8000 : z<<15;
+ if ( z <= a ) {
+ u32.ui = a;
+ return u32.i>>1;
+ }
+ }
+ return (uint32_t) ( ( (uint64_t) a<<31 ) / z ) + ( z>>1 );
+
+}
+
--- /dev/null
+\r
+#include <stdbool.h>\r
+#include <stdint.h>\r
+#include "platform.h"\r
+#include "primitives.h"\r
+\r
+bool softfloat_le128( uint64_t a64, uint64_t a0, uint64_t b64, uint64_t b0 )\r
+{\r
+\r
+ return ( a64 < b64 ) || ( ( a64 == b64 ) && ( a0 <= b0 ) );\r
+\r
+}\r
+\r
--- /dev/null
+\r
+#include <stdbool.h>\r
+#include <stdint.h>\r
+#include "platform.h"\r
+#include "primitives.h"\r
+\r
+bool softfloat_lt128( uint64_t a64, uint64_t a0, uint64_t b64, uint64_t b0 )\r
+{\r
+\r
+ return ( a64 < b64 ) || ( ( a64 == b64 ) && ( a0 < b0 ) );\r
+\r
+}\r
+\r
--- /dev/null
+\r
+#include <stdint.h>\r
+#include "platform.h"\r
+#include "primitives.h"\r
+\r
+struct uint192\r
+ softfloat_mul128By64To192( uint64_t a64, uint64_t a0, uint64_t b )\r
+{\r
+ struct uint128 p0, p64;\r
+ struct uint192 z;\r
+\r
+ p0 = softfloat_mul64To128( a0, b );\r
+ z.v0 = p0.v0;\r
+ p64 = softfloat_mul64To128( a64, b );\r
+ z.v64 = p64.v0 + p0.v64;\r
+ z.v128 = p64.v64 + ( z.v64 < p64.v0 );\r
+ return z;\r
+\r
+}\r
+\r
--- /dev/null
+\r
+#include <stdint.h>\r
+#include "platform.h"\r
+#include "primitives.h"\r
+\r
+struct uint256\r
+ softfloat_mul128To256( uint64_t a64, uint64_t a0, uint64_t b64, uint64_t b0 )\r
+{\r
+ struct uint128 p0, p64, p128;\r
+ struct uint256 z;\r
+\r
+ p0 = softfloat_mul64To128( a0, b0 );\r
+ z.v0 = p0.v0;\r
+ p64 = softfloat_mul64To128( a64, b0 );\r
+ z.v64 = p64.v0 + p0.v64;\r
+ z.v128 = p64.v64 + ( z.v64 < p64.v0 );\r
+ p128 = softfloat_mul64To128( a64, b64 );\r
+ z.v128 += p128.v0;\r
+ z.v192 = p128.v64 + ( z.v128 < p128.v0 );\r
+ p64 = softfloat_mul64To128( a0, b64 );\r
+ z.v64 += p64.v0;\r
+ p64.v64 += ( z.v64 < p64.v0 );\r
+ z.v128 += p64.v64;\r
+ z.v192 += ( z.v128 < p64.v64 );\r
+ return z;\r
+\r
+}\r
+\r
--- /dev/null
+\r
+#include <stdint.h>\r
+#include "platform.h"\r
+#include "primitives.h"\r
+\r
+struct uint128 softfloat_mul64To128( uint64_t a, uint64_t b )\r
+{\r
+ uint32_t a32, a0, b32, b0;\r
+ struct uint128 z;\r
+ uint64_t mid1, mid2, mid;\r
+\r
+ a32 = a>>32;\r
+ a0 = a;\r
+ b32 = b>>32;\r
+ b0 = b;\r
+ z.v0 = (uint64_t) a0 * b0;\r
+ mid1 = (uint64_t) a32 * b0;\r
+ mid2 = (uint64_t) a0 * b32;\r
+ z.v64 = (uint64_t) a32 * b32;\r
+ mid = mid1 + mid2;\r
+ z.v64 += ( (uint64_t) ( mid < mid1 ) )<<32 | mid>>32;\r
+ mid <<= 32;\r
+ z.v0 += mid;\r
+ z.v64 += ( z.v0 < mid );\r
+ return z;\r
+\r
+}\r
+\r
--- /dev/null
+\r
+#include <stdbool.h>\r
+#include <stdint.h>\r
+#include "platform.h"\r
+#include "primitives.h"\r
+#include "internals.h"\r
+#include "specialize.h"\r
+#include "softfloat.h"\r
+\r
+float32_t\r
+ softfloat_mulAddF32(\r
+ int op, uint_fast32_t uiA, uint_fast32_t uiB, uint_fast32_t uiC )\r
+{\r
+ bool signA;\r
+ int_fast16_t expA;\r
+ uint_fast32_t sigA;\r
+ bool signB;\r
+ int_fast16_t expB;\r
+ uint_fast32_t sigB;\r
+ bool signC;\r
+ int_fast16_t expC;\r
+ uint_fast32_t sigC;\r
+ bool signProd;\r
+ uint_fast32_t magBits, uiZ;\r
+ struct exp16_sig32 normExpSig;\r
+ int_fast16_t expProd;\r
+ uint_fast64_t sigProd;\r
+ bool signZ;\r
+ int_fast16_t expZ;\r
+ uint_fast32_t sigZ;\r
+ int_fast16_t expDiff;\r
+ uint_fast64_t sigZ64, sigC64;\r
+ int shiftCount;\r
+ union ui32_f32 uZ;\r
+\r
+ signA = signF32UI( uiA );\r
+ expA = expF32UI( uiA );\r
+ sigA = fracF32UI( uiA );\r
+ signB = signF32UI( uiB );\r
+ expB = expF32UI( uiB );\r
+ sigB = fracF32UI( uiB );\r
+ signC = signF32UI( uiC ) ^ ( op == softfloat_mulAdd_subC );\r
+ expC = expF32UI( uiC );\r
+ sigC = fracF32UI( uiC );\r
+ signProd = signA ^ signB ^ ( op == softfloat_mulAdd_subProd );\r
+ if ( expA == 0xFF ) {\r
+ if ( sigA || ( ( expB == 0xFF ) && sigB ) ) goto propagateNaN_ABC;\r
+ magBits = expB | sigB;\r
+ goto infProdArg;\r
+ }\r
+ if ( expB == 0xFF ) {\r
+ if ( sigB ) goto propagateNaN_ABC;\r
+ magBits = expA | sigA;\r
+ goto infProdArg;\r
+ }\r
+ if ( expC == 0xFF ) {\r
+ if ( sigC ) {\r
+ uiZ = 0;\r
+ goto propagateNaN_ZC;\r
+ }\r
+ uiZ = uiC;\r
+ goto uiZ;\r
+ }\r
+ if ( ! expA ) {\r
+ if ( ! sigA ) goto zeroProd;\r
+ normExpSig = softfloat_normSubnormalF32Sig( sigA );\r
+ expA = normExpSig.exp;\r
+ sigA = normExpSig.sig;\r
+ }\r
+ if ( ! expB ) {\r
+ if ( ! sigB ) goto zeroProd;\r
+ normExpSig = softfloat_normSubnormalF32Sig( sigB );\r
+ expB = normExpSig.exp;\r
+ sigB = normExpSig.sig;\r
+ }\r
+ expProd = expA + expB - 0x7E;\r
+ sigA = ( sigA | 0x00800000 )<<7;\r
+ sigB = ( sigB | 0x00800000 )<<7;\r
+ sigProd = (uint_fast64_t) sigA * sigB;\r
+ if ( sigProd < UINT64_C( 0x2000000000000000 ) ) {\r
+ --expProd;\r
+ sigProd <<= 1;\r
+ }\r
+ signZ = signProd;\r
+ if ( ! expC ) {\r
+ if ( ! sigC ) {\r
+ expZ = expProd - 1;\r
+ sigZ = softfloat_shortShift64RightJam( sigProd, 31 );\r
+ goto roundPack;\r
+ }\r
+ normExpSig = softfloat_normSubnormalF32Sig( sigC );\r
+ expC = normExpSig.exp;\r
+ sigC = normExpSig.sig;\r
+ }\r
+ sigC = ( sigC | 0x00800000 )<<6;\r
+ expDiff = expProd - expC;\r
+ if ( signProd == signC ) {\r
+ if ( expDiff <= 0 ) {\r
+ expZ = expC;\r
+ sigZ = sigC + softfloat_shift64RightJam( sigProd, 32 - expDiff );\r
+ } else {\r
+ expZ = expProd;\r
+ sigZ64 =\r
+ sigProd\r
+ + softfloat_shift64RightJam(\r
+ (uint_fast64_t) sigC<<32, expDiff );\r
+ sigZ = softfloat_shortShift64RightJam( sigZ64, 32 );\r
+ }\r
+ if ( sigZ < 0x40000000 ) {\r
+ --expZ;\r
+ sigZ <<= 1;\r
+ }\r
+ } else {\r
+/*** OPTIMIZE BETTER? ***/\r
+ sigC64 = (uint_fast64_t) sigC<<32;\r
+ if ( expDiff < 0 ) {\r
+ signZ = signC;\r
+ expZ = expC;\r
+ sigZ64 = sigC64 - softfloat_shift64RightJam( sigProd, - expDiff );\r
+ } else if ( ! expDiff ) {\r
+ expZ = expProd;\r
+ sigZ64 = sigProd - sigC64;\r
+ if ( ! sigZ64 ) goto completeCancellation;\r
+ if ( sigZ64 & UINT64_C( 0x8000000000000000 ) ) {\r
+ signZ ^= 1;\r
+ sigZ64 = - sigZ64;\r
+ }\r
+ } else {\r
+ expZ = expProd;\r
+ sigZ64 = sigProd - softfloat_shift64RightJam( sigC64, expDiff );\r
+ }\r
+ shiftCount = softfloat_countLeadingZeros64( sigZ64 ) - 1;\r
+ expZ -= shiftCount;\r
+ shiftCount -= 32;\r
+ if ( shiftCount < 0 ) {\r
+ sigZ = softfloat_shortShift64RightJam( sigZ64, - shiftCount );\r
+ } else {\r
+ sigZ = (uint_fast32_t) sigZ64<<shiftCount;\r
+ }\r
+ }\r
+ roundPack:\r
+ return softfloat_roundPackToF32( signZ, expZ, sigZ );\r
+ propagateNaN_ABC:\r
+ uiZ = softfloat_propagateNaNF32UI( uiA, uiB );\r
+ goto propagateNaN_ZC;\r
+ infProdArg:\r
+ if ( magBits ) {\r
+ uiZ = packToF32UI( signProd, 0xFF, 0 );\r
+ if ( expC != 0xFF ) goto uiZ;\r
+ if ( sigC ) goto propagateNaN_ZC;\r
+ if ( signProd == signC ) goto uiZ;\r
+ }\r
+ invalid:\r
+ softfloat_raiseFlags( softfloat_flag_invalid );\r
+ uiZ = defaultNaNF32UI;\r
+ propagateNaN_ZC:\r
+ uiZ = softfloat_propagateNaNF32UI( uiZ, uiC );\r
+ goto uiZ;\r
+ zeroProd:\r
+ uiZ = uiC;\r
+ if ( ! ( expC | sigC ) && ( signProd != signC ) ) {\r
+ completeCancellation:\r
+ uiZ =\r
+ packToF32UI( softfloat_roundingMode == softfloat_round_min, 0, 0 );\r
+ }\r
+ uiZ:\r
+ uZ.ui = uiZ;\r
+ return uZ.f;\r
+\r
+}\r
+\r
--- /dev/null
+\r
+#include <stdbool.h>\r
+#include <stdint.h>\r
+#include "platform.h"\r
+#include "primitives.h"\r
+#include "internals.h"\r
+#include "specialize.h"\r
+#include "softfloat.h"\r
+\r
+float64_t\r
+ softfloat_mulAddF64(\r
+ int op, uint_fast64_t uiA, uint_fast64_t uiB, uint_fast64_t uiC )\r
+{\r
+ bool signA;\r
+ int_fast16_t expA;\r
+ uint_fast64_t sigA;\r
+ bool signB;\r
+ int_fast16_t expB;\r
+ uint_fast64_t sigB;\r
+ bool signC;\r
+ int_fast16_t expC;\r
+ uint_fast64_t sigC;\r
+ bool signProd;\r
+ uint_fast64_t magBits, uiZ;\r
+ struct exp16_sig64 normExpSig;\r
+ int_fast16_t expProd;\r
+ struct uint128 sigProd;\r
+ bool signZ;\r
+ int_fast16_t expZ;\r
+ uint_fast64_t sigZ;\r
+ int_fast16_t expDiff;\r
+ struct uint128 sigC128, sigZ128;\r
+ int shiftCount;\r
+ union ui64_f64 uZ;\r
+\r
+ signA = signF64UI( uiA );\r
+ expA = expF64UI( uiA );\r
+ sigA = fracF64UI( uiA );\r
+ signB = signF64UI( uiB );\r
+ expB = expF64UI( uiB );\r
+ sigB = fracF64UI( uiB );\r
+ signC = signF64UI( uiC ) ^ ( op == softfloat_mulAdd_subC );\r
+ expC = expF64UI( uiC );\r
+ sigC = fracF64UI( uiC );\r
+ signProd = signA ^ signB ^ ( op == softfloat_mulAdd_subProd );\r
+ if ( expA == 0x7FF ) {\r
+ if ( sigA || ( ( expB == 0x7FF ) && sigB ) ) goto propagateNaN_ABC;\r
+ magBits = expB | sigB;\r
+ goto infProdArg;\r
+ }\r
+ if ( expB == 0x7FF ) {\r
+ if ( sigB ) goto propagateNaN_ABC;\r
+ magBits = expA | sigA;\r
+ goto infProdArg;\r
+ }\r
+ if ( expC == 0x7FF ) {\r
+ if ( sigC ) {\r
+ uiZ = 0;\r
+ goto propagateNaN_ZC;\r
+ }\r
+ uiZ = uiC;\r
+ goto uiZ;\r
+ }\r
+ if ( ! expA ) {\r
+ if ( ! sigA ) goto zeroProd;\r
+ normExpSig = softfloat_normSubnormalF64Sig( sigA );\r
+ expA = normExpSig.exp;\r
+ sigA = normExpSig.sig;\r
+ }\r
+ if ( ! expB ) {\r
+ if ( ! sigB ) goto zeroProd;\r
+ normExpSig = softfloat_normSubnormalF64Sig( sigB );\r
+ expB = normExpSig.exp;\r
+ sigB = normExpSig.sig;\r
+ }\r
+ expProd = expA + expB - 0x3FE;\r
+ sigA = ( sigA | UINT64_C( 0x0010000000000000 ) )<<10;\r
+ sigB = ( sigB | UINT64_C( 0x0010000000000000 ) )<<10;\r
+ sigProd = softfloat_mul64To128( sigA, sigB );\r
+ if ( sigProd.v64 < UINT64_C( 0x2000000000000000 ) ) {\r
+ --expProd;\r
+ sigProd = softfloat_shortShift128Left( sigProd.v64, sigProd.v0, 1 );\r
+ }\r
+ signZ = signProd;\r
+ if ( ! expC ) {\r
+ if ( ! sigC ) {\r
+ expZ = expProd - 1;\r
+ sigZ = sigProd.v64<<1 | ( sigProd.v0 != 0 );\r
+ goto roundPack;\r
+ }\r
+ normExpSig = softfloat_normSubnormalF64Sig( sigC );\r
+ expC = normExpSig.exp;\r
+ sigC = normExpSig.sig;\r
+ }\r
+ sigC = ( sigC | UINT64_C( 0x0010000000000000 ) )<<9;\r
+ expDiff = expProd - expC;\r
+ if ( signProd == signC ) {\r
+ if ( expDiff <= 0 ) {\r
+ expZ = expC;\r
+ if ( expDiff ) {\r
+ sigProd.v64 =\r
+ softfloat_shift64RightJam( sigProd.v64, - expDiff );\r
+ }\r
+ sigZ = ( sigC + sigProd.v64 ) | ( sigProd.v0 != 0 );\r
+ } else {\r
+ expZ = expProd;\r
+ sigC128 = softfloat_shift128RightJam( sigC, 0, expDiff );\r
+ sigZ128 =\r
+ softfloat_add128(\r
+ sigProd.v64, sigProd.v0, sigC128.v64, sigC128.v0 );\r
+ sigZ = sigZ128.v64 | ( sigZ128.v0 != 0 );\r
+ }\r
+ if ( sigZ < UINT64_C( 0x4000000000000000 ) ) {\r
+ --expZ;\r
+ sigZ <<= 1;\r
+ }\r
+ } else {\r
+/*** OPTIMIZE BETTER? ***/\r
+ if ( expDiff < 0 ) {\r
+ signZ = signC;\r
+ expZ = expC;\r
+ sigProd =\r
+ softfloat_shift128RightJam(\r
+ sigProd.v64, sigProd.v0, - expDiff );\r
+ sigZ128 = softfloat_sub128( sigC, 0, sigProd.v64, sigProd.v0 );\r
+ } else if ( ! expDiff ) {\r
+ expZ = expProd;\r
+ sigZ128 = softfloat_sub128( sigProd.v64, sigProd.v0, sigC, 0 );\r
+ if ( ! ( sigZ128.v64 | sigZ128.v0 ) ) goto completeCancellation;\r
+ if ( sigZ128.v64 & UINT64_C( 0x8000000000000000 ) ) {\r
+ signZ ^= 1;\r
+ sigZ128 = softfloat_sub128( 0, 0, sigZ128.v64, sigZ128.v0 );\r
+ }\r
+ } else {\r
+ expZ = expProd;\r
+ sigC128 = softfloat_shift128RightJam( sigC, 0, expDiff );\r
+ sigZ128 =\r
+ softfloat_sub128(\r
+ sigProd.v64, sigProd.v0, sigC128.v64, sigC128.v0 );\r
+ }\r
+ if ( ! sigZ128.v64 ) {\r
+ expZ -= 64;\r
+ sigZ128.v64 = sigZ128.v0;\r
+ sigZ128.v0 = 0;\r
+ }\r
+ shiftCount = softfloat_countLeadingZeros64( sigZ128.v64 ) - 1;\r
+ expZ -= shiftCount;\r
+ if ( shiftCount < 0 ) {\r
+ sigZ = softfloat_shortShift64RightJam( sigZ128.v64, - shiftCount );\r
+ } else {\r
+ sigZ128 =\r
+ softfloat_shortShift128Left(\r
+ sigZ128.v64, sigZ128.v0, shiftCount );\r
+ sigZ = sigZ128.v64;\r
+ }\r
+ sigZ |= ( sigZ128.v0 != 0 );\r
+ }\r
+ roundPack:\r
+ return softfloat_roundPackToF64( signZ, expZ, sigZ );\r
+ propagateNaN_ABC:\r
+ uiZ = softfloat_propagateNaNF64UI( uiA, uiB );\r
+ goto propagateNaN_ZC;\r
+ infProdArg:\r
+ if ( magBits ) {\r
+ uiZ = packToF64UI( signProd, 0x7FF, 0 );\r
+ if ( expC != 0x7FF ) goto uiZ;\r
+ if ( sigC ) goto propagateNaN_ZC;\r
+ if ( signProd == signC ) goto uiZ;\r
+ }\r
+ invalid:\r
+ softfloat_raiseFlags( softfloat_flag_invalid );\r
+ uiZ = defaultNaNF64UI;\r
+ propagateNaN_ZC:\r
+ uiZ = softfloat_propagateNaNF64UI( uiZ, uiC );\r
+ goto uiZ;\r
+ zeroProd:\r
+ uiZ = uiC;\r
+ if ( ! ( expC | sigC ) && ( signProd != signC ) ) {\r
+ completeCancellation:\r
+ uiZ =\r
+ packToF64UI( softfloat_roundingMode == softfloat_round_min, 0, 0 );\r
+ }\r
+ uiZ:\r
+ uZ.ui = uiZ;\r
+ return uZ.f;\r
+\r
+}\r
+\r
--- /dev/null
+\r
+#include <stdbool.h>\r
+#include <stdint.h>\r
+#include "platform.h"\r
+#include "primitives.h"\r
+#include "internals.h"\r
+\r
+float32_t\r
+ softfloat_normRoundPackToF32( bool sign, int_fast16_t exp, uint_fast32_t sig )\r
+{\r
+ int shiftCount;\r
+ union ui32_f32 uZ;\r
+\r
+ shiftCount = softfloat_countLeadingZeros32( sig ) - 1;\r
+ exp -= shiftCount;\r
+ if ( ( 7 <= shiftCount ) && ( (uint16_t) exp < 0xFD ) ) {\r
+ uZ.ui = packToF32UI( sign, sig ? exp : 0, sig<<( shiftCount - 7 ) );\r
+ return uZ.f;\r
+ } else {\r
+ return softfloat_roundPackToF32( sign, exp, sig<<shiftCount );\r
+ }\r
+\r
+}\r
+\r
--- /dev/null
+\r
+#include <stdbool.h>\r
+#include <stdint.h>\r
+#include "platform.h"\r
+#include "primitives.h"\r
+#include "internals.h"\r
+\r
+float64_t\r
+ softfloat_normRoundPackToF64( bool sign, int_fast16_t exp, uint_fast64_t sig )\r
+{\r
+ int shiftCount;\r
+ union ui64_f64 uZ;\r
+\r
+ shiftCount = softfloat_countLeadingZeros64( sig ) - 1;\r
+ exp -= shiftCount;\r
+ if ( ( 10 <= shiftCount ) && ( (uint16_t) exp < 0x7FD ) ) {\r
+ uZ.ui = packToF64UI( sign, sig ? exp : 0, sig<<( shiftCount - 10 ) );\r
+ return uZ.f;\r
+ } else {\r
+ return softfloat_roundPackToF64( sign, exp, sig<<shiftCount );\r
+ }\r
+\r
+}\r
+\r
--- /dev/null
+\r
+#include <stdint.h>\r
+#include "platform.h"\r
+#include "primitives.h"\r
+#include "internals.h"\r
+\r
+struct exp16_sig32 softfloat_normSubnormalF32Sig( uint_fast32_t sig )\r
+{\r
+ int shiftCount;\r
+ struct exp16_sig32 z;\r
+\r
+ shiftCount = softfloat_countLeadingZeros32( sig ) - 8;\r
+ z.exp = 1 - shiftCount;\r
+ z.sig = sig<<shiftCount;\r
+ return z;\r
+\r
+}\r
+\r
--- /dev/null
+\r
+#include <stdint.h>\r
+#include "platform.h"\r
+#include "primitives.h"\r
+#include "internals.h"\r
+\r
+struct exp16_sig64 softfloat_normSubnormalF64Sig( uint_fast64_t sig )\r
+{\r
+ int shiftCount;\r
+ struct exp16_sig64 z;\r
+\r
+ shiftCount = softfloat_countLeadingZeros64( sig ) - 11;\r
+ z.exp = 1 - shiftCount;\r
+ z.sig = sig<<shiftCount;\r
+ return z;\r
+\r
+}\r
+\r
--- /dev/null
+
+#include <stdbool.h>
+#include <stdint.h>
+#include "platform.h"
+#include "primitives.h"
+#include "internals.h"
+#include "softfloat.h"
+
+float32_t
+ softfloat_roundPackToF32( bool sign, int_fast16_t exp, uint_fast32_t sig )
+{
+ int roundingMode;
+ bool roundNearestEven;
+ int roundIncrement, roundBits;
+ bool isTiny;
+ uint_fast32_t uiZ;
+ union ui32_f32 uZ;
+
+ roundingMode = softfloat_roundingMode;
+ roundNearestEven = ( roundingMode == softfloat_round_nearest_even );
+ roundIncrement = 0x40;
+ if (
+ ! roundNearestEven
+ && ( roundingMode != softfloat_round_nearest_maxMag )
+ ) {
+ roundIncrement =
+ ( roundingMode == softfloat_round_minMag )
+ || ( roundingMode
+ == ( sign ? softfloat_round_max : softfloat_round_min ) )
+ ? 0
+ : 0x7F;
+ }
+ roundBits = sig & 0x7F;
+ if ( 0xFD <= (uint16_t) exp ) {
+ if ( exp < 0 ) {
+ isTiny =
+ ( softfloat_detectTininess
+ == softfloat_tininess_beforeRounding )
+ || ( exp < -1 )
+ || ( sig + roundIncrement < 0x80000000 );
+ sig = softfloat_shift32RightJam( sig, - exp );
+ exp = 0;
+ roundBits = sig & 0x7F;
+ if ( isTiny && roundBits ) {
+ softfloat_raiseFlags( softfloat_flag_underflow );
+ }
+ } else if (
+ ( 0xFD < exp ) || ( 0x80000000 <= sig + roundIncrement )
+ ) {
+ softfloat_raiseFlags(
+ softfloat_flag_overflow | softfloat_flag_inexact );
+ uiZ = packToF32UI( sign, 0xFF, 0 ) - ! roundIncrement;
+ goto uiZ;
+ }
+ }
+ if ( roundBits ) softfloat_exceptionFlags |= softfloat_flag_inexact;
+ sig = ( sig + roundIncrement )>>7;
+ sig &= ~ ( ! ( roundBits ^ 0x40 ) & roundNearestEven );
+ uiZ = packToF32UI( sign, sig ? exp : 0, sig );
+ uiZ:
+ uZ.ui = uiZ;
+ return uZ.f;
+
+}
+
--- /dev/null
+
+#include <stdbool.h>
+#include <stdint.h>
+#include "platform.h"
+#include "primitives.h"
+#include "internals.h"
+#include "softfloat.h"
+
+float64_t
+ softfloat_roundPackToF64( bool sign, int_fast16_t exp, uint_fast64_t sig )
+{
+ int roundingMode;
+ bool roundNearestEven;
+ int roundIncrement, roundBits;
+ bool isTiny;
+ uint_fast64_t uiZ;
+ union ui64_f64 uZ;
+
+ roundingMode = softfloat_roundingMode;
+ roundNearestEven = ( roundingMode == softfloat_round_nearest_even );
+ roundIncrement = 0x200;
+ if (
+ ! roundNearestEven
+ && ( roundingMode != softfloat_round_nearest_maxMag )
+ ) {
+ roundIncrement =
+ ( roundingMode == softfloat_round_minMag )
+ || ( roundingMode
+ == ( sign ? softfloat_round_max : softfloat_round_min ) )
+ ? 0
+ : 0x3FF;
+ }
+ roundBits = sig & 0x3FF;
+ if ( 0x7FD <= (uint16_t) exp ) {
+ if ( exp < 0 ) {
+ isTiny =
+ ( softfloat_detectTininess
+ == softfloat_tininess_beforeRounding )
+ || ( exp < -1 )
+ || ( sig + roundIncrement < UINT64_C( 0x8000000000000000 ) );
+ sig = softfloat_shift64RightJam( sig, - exp );
+ exp = 0;
+ roundBits = sig & 0x3FF;
+ if ( isTiny && roundBits ) {
+ softfloat_raiseFlags( softfloat_flag_underflow );
+ }
+ } else if (
+ ( 0x7FD < exp )
+ || ( UINT64_C( 0x8000000000000000 ) <= sig + roundIncrement )
+ ) {
+ softfloat_raiseFlags(
+ softfloat_flag_overflow | softfloat_flag_inexact );
+ uiZ = packToF64UI( sign, 0x7FF, 0 ) - ! roundIncrement;
+ goto uiZ;
+ }
+ }
+ if ( roundBits ) softfloat_exceptionFlags |= softfloat_flag_inexact;
+ sig = ( sig + roundIncrement )>>10;
+ sig &= ~ ( ! ( roundBits ^ 0x200 ) & roundNearestEven );
+ uiZ = packToF64UI( sign, sig ? exp : 0, sig );
+ uiZ:
+ uZ.ui = uiZ;
+ return uZ.f;
+
+}
+
--- /dev/null
+
+#include <stdbool.h>
+#include <stdint.h>
+#include "platform.h"
+#include "internals.h"
+#include "softfloat.h"
+
+int_fast32_t
+ softfloat_roundPackToI32(
+ bool sign, uint_fast64_t sig, int_fast8_t roundingMode, bool exact )
+{
+ bool roundNearestEven;
+ int roundIncrement, roundBits;
+ uint_fast32_t sig32;
+ union { uint32_t ui; int32_t i; } uZ;
+ int_fast32_t z;
+
+ roundNearestEven = ( roundingMode == softfloat_round_nearest_even );
+ roundIncrement = 0x40;
+ if (
+ ! roundNearestEven
+ && ( roundingMode != softfloat_round_nearest_maxMag )
+ ) {
+ roundIncrement =
+ ( roundingMode == softfloat_round_minMag )
+ || ( roundingMode
+ == ( sign ? softfloat_round_max : softfloat_round_min ) )
+ ? 0
+ : 0x7F;
+ }
+ roundBits = sig & 0x7F;
+ sig += roundIncrement;
+ if ( sig & UINT64_C( 0xFFFFFF8000000000 ) ) goto invalid;
+ sig32 = sig>>7;
+ sig32 &= ~ ( ! ( roundBits ^ 0x40 ) & roundNearestEven );
+ uZ.ui = sign ? - sig32 : sig32;
+ z = uZ.i;
+ if ( z && ( ( z < 0 ) ^ sign ) ) goto invalid;
+ if ( exact && roundBits ) {
+ softfloat_exceptionFlags |= softfloat_flag_inexact;
+ }
+ return z;
+ invalid:
+ softfloat_raiseFlags( softfloat_flag_invalid );
+ return sign ? -0x7FFFFFFF - 1 : 0x7FFFFFFF;
+
+}
+
--- /dev/null
+
+#include <stdbool.h>
+#include <stdint.h>
+#include "platform.h"
+#include "internals.h"
+#include "softfloat.h"
+
+int_fast64_t
+ softfloat_roundPackToI64(
+ bool sign,
+ uint_fast64_t sig64,
+ uint_fast64_t sig0,
+ int_fast8_t roundingMode,
+ bool exact
+ )
+{
+ bool roundNearestEven, increment;
+ union { uint64_t ui; int64_t i; } uZ;
+ int_fast64_t z;
+
+ roundNearestEven = ( roundingMode == softfloat_round_nearest_even );
+ increment = ( UINT64_C( 0x8000000000000000 ) <= sig0 );
+ if (
+ ! roundNearestEven
+ && ( roundingMode != softfloat_round_nearest_maxMag )
+ ) {
+ increment =
+ ( roundingMode != softfloat_round_minMag )
+ && ( roundingMode
+ == ( sign ? softfloat_round_min : softfloat_round_max ) )
+ && sig0;
+ }
+ if ( increment ) {
+ ++sig64;
+ if ( ! sig64 ) goto invalid;
+ sig64 &=
+ ~ ( ! ( sig0 & UINT64_C( 0x7FFFFFFFFFFFFFFF ) )
+ & roundNearestEven );
+ }
+ uZ.ui = sign ? - sig64 : sig64;
+ z = uZ.i;
+ if ( z && ( ( z < 0 ) ^ sign ) ) goto invalid;
+ if ( exact && sig0 ) softfloat_exceptionFlags |= softfloat_flag_inexact;
+ return z;
+ invalid:
+ softfloat_raiseFlags( softfloat_flag_invalid );
+ return
+ sign ? - INT64_C( 0x7FFFFFFFFFFFFFFF ) - 1
+ : INT64_C( 0x7FFFFFFFFFFFFFFF );
+
+}
+
--- /dev/null
+
+#include <stdbool.h>
+#include <stdint.h>
+#include "platform.h"
+#include "internals.h"
+#include "softfloat.h"
+
+uint_fast32_t
+ softfloat_roundPackToUI32(
+ bool sign, uint_fast64_t sig, int_fast8_t roundingMode, bool exact )
+{
+ bool roundNearestEven;
+ int roundIncrement, roundBits;
+ uint_fast32_t z;
+
+ roundNearestEven = ( roundingMode == softfloat_round_nearest_even );
+ roundIncrement = 0x40;
+ if (
+ ! roundNearestEven
+ && ( roundingMode != softfloat_round_nearest_maxMag )
+ ) {
+ roundIncrement =
+ ( roundingMode == softfloat_round_minMag )
+ || ( roundingMode
+ == ( sign ? softfloat_round_max : softfloat_round_min ) )
+ ? 0
+ : 0x7F;
+ }
+ roundBits = sig & 0x7F;
+ sig += roundIncrement;
+ if ( sig & UINT64_C( 0xFFFFFF8000000000 ) ) goto invalid;
+ z = sig>>7;
+ z &= ~ ( ! ( roundBits ^ 0x40 ) & roundNearestEven );
+ if ( sign && z ) goto invalid;
+ if ( exact && roundBits ) {
+ softfloat_exceptionFlags |= softfloat_flag_inexact;
+ }
+ return z;
+ invalid:
+ softfloat_raiseFlags( softfloat_flag_invalid );
+ return 0xFFFFFFFF;
+
+}
+
--- /dev/null
+
+#include <stdbool.h>
+#include <stdint.h>
+#include "platform.h"
+#include "internals.h"
+#include "softfloat.h"
+
+uint_fast64_t
+ softfloat_roundPackToUI64(
+ bool sign,
+ uint_fast64_t sig64,
+ uint_fast64_t sig0,
+ int_fast8_t roundingMode,
+ bool exact
+ )
+{
+ bool roundNearestEven, increment;
+
+ roundNearestEven = ( roundingMode == softfloat_round_nearest_even );
+ increment = ( UINT64_C( 0x8000000000000000 ) <= sig0 );
+ if (
+ ! roundNearestEven
+ && ( roundingMode != softfloat_round_nearest_maxMag )
+ ) {
+ increment =
+ ( roundingMode != softfloat_round_minMag )
+ && ( roundingMode
+ == ( sign ? softfloat_round_min : softfloat_round_max ) )
+ && sig0;
+ }
+ if ( increment ) {
+ ++sig64;
+ if ( ! sig64 ) goto invalid;
+ sig64 &=
+ ~ ( ! ( sig0 & UINT64_C( 0x7FFFFFFFFFFFFFFF ) )
+ & roundNearestEven );
+ }
+ if ( sign && sig64 ) goto invalid;
+ if ( exact && sig0 ) softfloat_exceptionFlags |= softfloat_flag_inexact;
+ return sig64;
+ invalid:
+ softfloat_raiseFlags( softfloat_flag_invalid );
+ return UINT64_C( 0xFFFFFFFFFFFFFFFF );
+
+}
+
--- /dev/null
+\r
+#include <stdint.h>\r
+#include "platform.h"\r
+#include "primitives.h"\r
+\r
+struct uint128_extra\r
+ softfloat_shift128ExtraRightJam(\r
+ uint64_t a64, uint64_t a0, uint64_t extra, unsigned int count )\r
+{\r
+ unsigned int negCount;\r
+ struct uint128_extra z;\r
+\r
+ negCount = - count;\r
+ if ( count < 64 ) {\r
+ z.v64 = a64>>count;\r
+ z.v0 = a64<<( negCount & 63 ) | a0>>count;\r
+ z.extra = a0<<( negCount & 63 );\r
+ } else {\r
+ z.v64 = 0;\r
+ if ( count == 64 ) {\r
+ z.v0 = a64;\r
+ z.extra = a0;\r
+ } else {\r
+ extra |= a0;\r
+ if ( count < 128 ) {\r
+ z.v0 = a64>>( count & 63 );\r
+ z.extra = a64<<( negCount & 63 );\r
+ } else {\r
+ z.v0 = 0;\r
+ z.extra = ( count == 128 ) ? a64 : ( a64 != 0 );\r
+ }\r
+ }\r
+ }\r
+ z.extra |= ( extra != 0 );\r
+ return z;\r
+\r
+}\r
+\r
--- /dev/null
+
+#include <stdint.h>
+#include "platform.h"
+#include "primitives.h"
+
+struct uint128
+ softfloat_shift128RightJam( uint64_t a64, uint64_t a0, unsigned int count )
+{
+ unsigned int negCount;
+ struct uint128 z;
+
+ if ( count < 64 ) {
+ negCount = - count;
+ z.v64 = a64>>( count & 63 );
+ z.v0 =
+ a64<<( negCount & 63 ) | a0>>count
+ | ( (uint64_t) ( a0<<( negCount & 63 ) ) != 0 );
+ } else {
+ z.v64 = 0;
+ z.v0 =
+ ( count < 128 )
+ ? a64>>( count & 63 )
+ | ( ( ( a64 & ( ( (uint64_t) 1<<( count & 63 ) ) - 1 ) )
+ | a0 )
+ != 0 )
+ : ( ( a64 | a0 ) != 0 );
+ }
+ return z;
+
+}
+
--- /dev/null
+
+#include <stdint.h>
+#include "platform.h"
+#include "primitives.h"
+
+uint32_t softfloat_shift32RightJam( uint32_t a, unsigned int count )
+{
+
+ return
+ ( count < 32 )
+ ? a>>count | ( (uint32_t) ( a<<( ( - count ) & 31 ) ) != 0 )
+ : ( a != 0 );
+
+}
+
--- /dev/null
+\r
+#include <stdint.h>\r
+#include "platform.h"\r
+#include "primitives.h"\r
+\r
+struct uint64_extra\r
+ softfloat_shift64ExtraRightJam(\r
+ uint64_t a, uint64_t extra, unsigned int count )\r
+{\r
+ struct uint64_extra z;\r
+\r
+ if ( count < 64 ) {\r
+ z.v = a>>count;\r
+ z.extra = a<<( ( - count ) & 63 );\r
+ } else {\r
+ z.v = 0;\r
+ z.extra = ( count == 64 ) ? a : ( a != 0 );\r
+ }\r
+ z.extra |= ( extra != 0 );\r
+ return z;\r
+\r
+}\r
+\r
--- /dev/null
+
+#include <stdint.h>
+#include "platform.h"
+#include "primitives.h"
+
+uint64_t softfloat_shift64RightJam( uint64_t a, unsigned int count )
+{
+
+ return
+ ( count < 64 )
+ ? a>>count | ( (uint64_t) ( a<<( ( - count ) & 63 ) ) != 0 )
+ : ( a != 0 );
+
+}
+
--- /dev/null
+\r
+#include <stdint.h>\r
+#include "platform.h"\r
+#include "primitives.h"\r
+\r
+struct uint128_extra\r
+ softfloat_shortShift128ExtraRightJam(\r
+ uint64_t a64, uint64_t a0, uint64_t extra, unsigned int count )\r
+{\r
+ unsigned int negCount;\r
+ struct uint128_extra z;\r
+\r
+ negCount = - count;\r
+ z.v64 = a64>>count;\r
+ z.v0 = a64<<( negCount & 63 ) | a0>>count;\r
+ z.extra = a0<<( negCount & 63 ) | ( extra != 0 );\r
+ return z;\r
+\r
+}\r
+\r
--- /dev/null
+\r
+#include <stdint.h>\r
+#include "platform.h"\r
+#include "primitives.h"\r
+\r
+struct uint128\r
+ softfloat_shortShift128Left( uint64_t a64, uint64_t a0, unsigned int count )\r
+{\r
+ struct uint128 z;\r
+\r
+ z.v64 = a64<<count | a0>>( ( - count ) & 63 );\r
+ z.v0 = a0<<count;\r
+ return z;\r
+\r
+}\r
+\r
--- /dev/null
+\r
+#include <stdint.h>\r
+#include "platform.h"\r
+#include "primitives.h"\r
+\r
+struct uint128\r
+ softfloat_shortShift128Right( uint64_t a64, uint64_t a0, unsigned int count )\r
+{\r
+ struct uint128 z;\r
+\r
+ z.v64 = a64>>count;\r
+ z.v0 = a64<<( ( - count ) & 63 ) | a0>>count;\r
+ return z;\r
+\r
+}\r
+\r
--- /dev/null
+\r
+#include <stdint.h>\r
+#include "platform.h"\r
+#include "primitives.h"\r
+\r
+struct uint192\r
+ softfloat_shortShift192Left(\r
+ uint64_t a128, uint64_t a64, uint64_t a0, unsigned int count )\r
+{\r
+ unsigned int negCount;\r
+ struct uint192 z;\r
+\r
+ negCount = - count;\r
+ z.v128 = a128<<count | a64>>( negCount & 63 );\r
+ z.v64 = a64<<count | a0>>( negCount & 63 );\r
+ z.v0 = a0<<count;\r
+ return z;\r
+\r
+}\r
+\r
--- /dev/null
+
+#include <stdint.h>
+#include "platform.h"
+#include "primitives.h"
+
+uint32_t softfloat_shortShift32Right1Jam( uint32_t a )
+{
+
+ return a>>1 | ( a & 1 );
+
+}
+
--- /dev/null
+\r
+#include <stdint.h>\r
+#include "platform.h"\r
+#include "primitives.h"\r
+\r
+struct uint64_extra\r
+ softfloat_shortShift64ExtraRightJam(\r
+ uint64_t a, uint64_t extra, unsigned int count )\r
+{\r
+ struct uint64_extra z;\r
+\r
+ z.v = a>>count;\r
+ z.extra = a<<( ( - count ) & 63 ) | ( extra != 0 );\r
+ return z;\r
+\r
+}\r
+\r
--- /dev/null
+
+#include <stdint.h>
+#include "platform.h"
+#include "primitives.h"
+
+uint64_t softfloat_shortShift64RightJam( uint64_t a, unsigned int count )
+{
+
+ return a>>count | ( ( a & ( ( (uint64_t) 1<<count ) - 1 ) ) != 0 );
+
+}
+
--- /dev/null
+\r
+#include <stdint.h>\r
+#include "platform.h"\r
+#include "primitives.h"\r
+\r
+struct uint128\r
+ softfloat_sub128( uint64_t a64, uint64_t a0, uint64_t b64, uint64_t b0 )\r
+{\r
+ struct uint128 z;\r
+\r
+ z.v0 = a0 - b0;\r
+ z.v64 = a64 - b64;\r
+ z.v64 -= ( a0 < b0 );\r
+ return z;\r
+\r
+}\r
+\r
--- /dev/null
+\r
+#include <stdint.h>\r
+#include "platform.h"\r
+#include "primitives.h"\r
+\r
+struct uint192\r
+ softfloat_sub192(\r
+ uint64_t a128,\r
+ uint64_t a64,\r
+ uint64_t a0,\r
+ uint64_t b128,\r
+ uint64_t b64,\r
+ uint64_t b0\r
+ )\r
+{\r
+ struct uint192 z;\r
+ unsigned int borrow64, borrow128;\r
+\r
+ z.v0 = a0 - b0;\r
+ borrow64 = ( a0 < b0 );\r
+ z.v64 = a64 - b64;\r
+ borrow128 = ( a64 < b64 );\r
+ z.v128 = a128 - b128;\r
+ borrow128 += ( z.v64 < borrow64 );\r
+ z.v64 -= borrow64;\r
+ z.v128 -= borrow128;\r
+ return z;\r
+\r
+}\r
+\r
--- /dev/null
+
+#include <stdbool.h>
+#include <stdint.h>
+#include "platform.h"
+#include "primitives.h"
+#include "internals.h"
+#include "specialize.h"
+#include "softfloat.h"
+
+float32_t
+ softfloat_subMagsF32( uint_fast32_t uiA, uint_fast32_t uiB, bool signZ )
+{
+ int_fast16_t expA;
+ uint_fast32_t sigA;
+ int_fast16_t expB;
+ uint_fast32_t sigB;
+ int_fast16_t expDiff;
+ uint_fast32_t uiZ;
+ int_fast16_t expZ;
+ uint_fast32_t sigZ;
+ union ui32_f32 uZ;
+
+ expA = expF32UI( uiA );
+ sigA = fracF32UI( uiA );
+ expB = expF32UI( uiB );
+ sigB = fracF32UI( uiB );
+ expDiff = expA - expB;
+ sigA <<= 7;
+ sigB <<= 7;
+ if ( 0 < expDiff ) goto expABigger;
+ if ( expDiff < 0 ) goto expBBigger;
+ if ( expA == 0xFF ) {
+ if ( sigA | sigB ) goto propagateNaN;
+ softfloat_raiseFlags( softfloat_flag_invalid );
+ uiZ = defaultNaNF32UI;
+ goto uiZ;
+ }
+ if ( ! expA ) {
+ expA = 1;
+ expB = 1;
+ }
+ if ( sigB < sigA ) goto aBigger;
+ if ( sigA < sigB ) goto bBigger;
+ uiZ = packToF32UI( softfloat_roundingMode == softfloat_round_min, 0, 0 );
+ goto uiZ;
+ expBBigger:
+ if ( expB == 0xFF ) {
+ if ( sigB ) goto propagateNaN;
+ uiZ = packToF32UI( signZ ^ 1, 0xFF, 0 );
+ goto uiZ;
+ }
+ sigA += expA ? 0x40000000 : sigA;
+ sigA = softfloat_shift32RightJam( sigA, - expDiff );
+ sigB |= 0x40000000;
+ bBigger:
+ signZ ^= 1;
+ expZ = expB;
+ sigZ = sigB - sigA;
+ goto normRoundPack;
+ expABigger:
+ if ( expA == 0xFF ) {
+ if ( sigA ) goto propagateNaN;
+ uiZ = uiA;
+ goto uiZ;
+ }
+ sigB += expB ? 0x40000000 : sigB;
+ sigB = softfloat_shift32RightJam( sigB, expDiff );
+ sigA |= 0x40000000;
+ aBigger:
+ expZ = expA;
+ sigZ = sigA - sigB;
+ normRoundPack:
+ return softfloat_normRoundPackToF32( signZ, expZ - 1, sigZ );
+ propagateNaN:
+ uiZ = softfloat_propagateNaNF32UI( uiA, uiB );
+ uiZ:
+ uZ.ui = uiZ;
+ return uZ.f;
+
+}
+
--- /dev/null
+
+#include <stdbool.h>
+#include <stdint.h>
+#include "platform.h"
+#include "primitives.h"
+#include "internals.h"
+#include "specialize.h"
+#include "softfloat.h"
+
+float64_t
+ softfloat_subMagsF64( uint_fast64_t uiA, uint_fast64_t uiB, bool signZ )
+{
+ int_fast16_t expA;
+ uint_fast64_t sigA;
+ int_fast16_t expB;
+ uint_fast64_t sigB;
+ int_fast16_t expDiff;
+ uint_fast64_t uiZ;
+ int_fast16_t expZ;
+ uint_fast64_t sigZ;
+ union ui64_f64 uZ;
+
+ expA = expF64UI( uiA );
+ sigA = fracF64UI( uiA );
+ expB = expF64UI( uiB );
+ sigB = fracF64UI( uiB );
+ expDiff = expA - expB;
+ sigA <<= 10;
+ sigB <<= 10;
+ if ( 0 < expDiff ) goto expABigger;
+ if ( expDiff < 0 ) goto expBBigger;
+ if ( expA == 0x7FF ) {
+ if ( sigA | sigB ) goto propagateNaN;
+ softfloat_raiseFlags( softfloat_flag_invalid );
+ uiZ = defaultNaNF64UI;
+ goto uiZ;
+ }
+ if ( ! expA ) {
+ expA = 1;
+ expB = 1;
+ }
+ if ( sigB < sigA ) goto aBigger;
+ if ( sigA < sigB ) goto bBigger;
+ uiZ = packToF64UI( softfloat_roundingMode == softfloat_round_min, 0, 0 );
+ goto uiZ;
+ expBBigger:
+ if ( expB == 0x7FF ) {
+ if ( sigB ) goto propagateNaN;
+ uiZ = packToF64UI( signZ ^ 1, 0x7FF, 0 );
+ goto uiZ;
+ }
+ sigA += expA ? UINT64_C( 0x4000000000000000 ) : sigA;
+ sigA = softfloat_shift64RightJam( sigA, - expDiff );
+ sigB |= UINT64_C( 0x4000000000000000 );
+ bBigger:
+ signZ ^= 1;
+ expZ = expB;
+ sigZ = sigB - sigA;
+ goto normRoundPack;
+ expABigger:
+ if ( expA == 0x7FF ) {
+ if ( sigA ) goto propagateNaN;
+ uiZ = uiA;
+ goto uiZ;
+ }
+ sigB += expB ? UINT64_C( 0x4000000000000000 ) : sigB;
+ sigB = softfloat_shift64RightJam( sigB, expDiff );
+ sigA |= UINT64_C( 0x4000000000000000 );
+ aBigger:
+ expZ = expA;
+ sigZ = sigA - sigB;
+ normRoundPack:
+ return softfloat_normRoundPackToF64( signZ, expZ - 1, sigZ );
+ propagateNaN:
+ uiZ = softfloat_propagateNaNF64UI( uiA, uiB );
+ uiZ:
+ uZ.ui = uiZ;
+ return uZ.f;
+
+}
+
--- /dev/null
+\r
+#ifndef softfloat_h\r
+#define softfloat_h\r
+\r
+/*** UPDATE COMMENTS. ***/\r
+\r
+/*============================================================================\r
+\r
+This C header file is part of the SoftFloat IEEE Floating-point Arithmetic\r
+Package, Release 2b.\r
+\r
+Written by John R. Hauser. This work was made possible in part by the\r
+International Computer Science Institute, located at Suite 600, 1947 Center\r
+Street, Berkeley, California 94704. Funding was partially provided by the\r
+National Science Foundation under grant MIP-9311980. The original version\r
+of this code was written as part of a project to build a fixed-point vector\r
+processor in collaboration with the University of California at Berkeley,\r
+overseen by Profs. Nelson Morgan and John Wawrzynek. More information\r
+is available through the Web page `http://www.cs.berkeley.edu/~jhauser/\r
+arithmetic/SoftFloat.html'.\r
+\r
+THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort has\r
+been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT TIMES\r
+RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO PERSONS\r
+AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ALL LOSSES,\r
+COSTS, OR OTHER PROBLEMS THEY INCUR DUE TO THE SOFTWARE, AND WHO FURTHERMORE\r
+EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER SCIENCE\r
+INSTITUTE (possibly via similar legal warning) AGAINST ALL LOSSES, COSTS, OR\r
+OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE SOFTWARE.\r
+\r
+Derivative works are acceptable, even for commercial purposes, so long as\r
+(1) the source code for the derivative work includes prominent notice that\r
+the work is derivative, and (2) the source code includes prominent notice with\r
+these four paragraphs for those parts of this code that are retained.\r
+\r
+=============================================================================*/\r
+\r
+#include "softfloat_types.h"\r
+\r
+/*----------------------------------------------------------------------------\r
+| Software floating-point underflow tininess-detection mode.\r
+*----------------------------------------------------------------------------*/\r
+extern int_fast8_t softfloat_detectTininess;\r
+enum {\r
+ softfloat_tininess_beforeRounding = 0,\r
+ softfloat_tininess_afterRounding = 1\r
+};\r
+\r
+/*----------------------------------------------------------------------------\r
+| Software floating-point rounding mode.\r
+*----------------------------------------------------------------------------*/\r
+extern int_fast8_t softfloat_roundingMode;\r
+enum {\r
+ softfloat_round_nearest_even = 0,\r
+ softfloat_round_minMag = 1,\r
+ softfloat_round_min = 2,\r
+ softfloat_round_max = 3,\r
+ softfloat_round_nearest_maxMag = 4\r
+};\r
+\r
+/*----------------------------------------------------------------------------\r
+| Software floating-point exception flags.\r
+*----------------------------------------------------------------------------*/\r
+extern int_fast8_t softfloat_exceptionFlags;\r
+enum {\r
+ softfloat_flag_inexact = 1,\r
+ softfloat_flag_underflow = 2,\r
+ softfloat_flag_overflow = 4,\r
+ softfloat_flag_infinity = 8,\r
+ softfloat_flag_invalid = 16\r
+};\r
+\r
+/*----------------------------------------------------------------------------\r
+| Routine to raise any or all of the software floating-point exception flags.\r
+*----------------------------------------------------------------------------*/\r
+void softfloat_raiseFlags( int_fast8_t );\r
+\r
+/*----------------------------------------------------------------------------\r
+| Integer-to-floating-point conversion routines.\r
+*----------------------------------------------------------------------------*/\r
+float32_t ui32_to_f32( uint_fast32_t );\r
+float64_t ui32_to_f64( uint_fast32_t );\r
+floatx80_t ui32_to_fx80( uint_fast32_t );\r
+float128_t ui32_to_f128( uint_fast32_t );\r
+float32_t ui64_to_f32( uint_fast64_t );\r
+float64_t ui64_to_f64( uint_fast64_t );\r
+floatx80_t ui64_to_fx80( uint_fast64_t );\r
+float128_t ui64_to_f128( uint_fast64_t );\r
+float32_t i32_to_f32( int_fast32_t );\r
+float64_t i32_to_f64( int_fast32_t );\r
+floatx80_t i32_to_fx80( int_fast32_t );\r
+float128_t i32_to_f128( int_fast32_t );\r
+float32_t i64_to_f32( int_fast64_t );\r
+float64_t i64_to_f64( int_fast64_t );\r
+floatx80_t i64_to_fx80( int_fast64_t );\r
+float128_t i64_to_f128( int_fast64_t );\r
+\r
+/*----------------------------------------------------------------------------\r
+| 32-bit (single-precision) floating-point operations.\r
+*----------------------------------------------------------------------------*/\r
+uint_fast32_t f32_to_ui32( float32_t, int_fast8_t, bool );\r
+uint_fast64_t f32_to_ui64( float32_t, int_fast8_t, bool );\r
+int_fast32_t f32_to_i32( float32_t, int_fast8_t, bool );\r
+int_fast64_t f32_to_i64( float32_t, int_fast8_t, bool );\r
+uint_fast32_t f32_to_ui32_r_minMag( float32_t, bool );\r
+uint_fast64_t f32_to_ui64_r_minMag( float32_t, bool );\r
+int_fast32_t f32_to_i32_r_minMag( float32_t, bool );\r
+int_fast64_t f32_to_i64_r_minMag( float32_t, bool );\r
+float64_t f32_to_f64( float32_t );\r
+floatx80_t f32_to_fx80( float32_t );\r
+float128_t f32_to_f128( float32_t );\r
+float32_t f32_roundToInt( float32_t, int_fast8_t, bool );\r
+float32_t f32_add( float32_t, float32_t );\r
+float32_t f32_sub( float32_t, float32_t );\r
+float32_t f32_mul( float32_t, float32_t );\r
+float32_t f32_mulAdd( float32_t, float32_t, float32_t );\r
+float32_t f32_div( float32_t, float32_t );\r
+float32_t f32_rem( float32_t, float32_t );\r
+float32_t f32_sqrt( float32_t );\r
+bool f32_eq( float32_t, float32_t );\r
+bool f32_le( float32_t, float32_t );\r
+bool f32_lt( float32_t, float32_t );\r
+bool f32_eq_signaling( float32_t, float32_t );\r
+bool f32_le_quiet( float32_t, float32_t );\r
+bool f32_lt_quiet( float32_t, float32_t );\r
+bool f32_isSignalingNaN( float32_t );\r
+\r
+/*----------------------------------------------------------------------------\r
+| 64-bit (double-precision) floating-point operations.\r
+*----------------------------------------------------------------------------*/\r
+uint_fast32_t f64_to_ui32( float64_t, int_fast8_t, bool );\r
+uint_fast64_t f64_to_ui64( float64_t, int_fast8_t, bool );\r
+int_fast32_t f64_to_i32( float64_t, int_fast8_t, bool );\r
+int_fast64_t f64_to_i64( float64_t, int_fast8_t, bool );\r
+uint_fast32_t f64_to_ui32_r_minMag( float64_t, bool );\r
+uint_fast64_t f64_to_ui64_r_minMag( float64_t, bool );\r
+int_fast32_t f64_to_i32_r_minMag( float64_t, bool );\r
+int_fast64_t f64_to_i64_r_minMag( float64_t, bool );\r
+float32_t f64_to_f32( float64_t );\r
+floatx80_t f64_to_fx80( float64_t );\r
+float128_t f64_to_f128( float64_t );\r
+float64_t f64_roundToInt( float64_t, int_fast8_t, bool );\r
+float64_t f64_add( float64_t, float64_t );\r
+float64_t f64_sub( float64_t, float64_t );\r
+float64_t f64_mul( float64_t, float64_t );\r
+float64_t f64_mulAdd( float64_t, float64_t, float64_t );\r
+float64_t f64_div( float64_t, float64_t );\r
+float64_t f64_rem( float64_t, float64_t );\r
+float64_t f64_sqrt( float64_t );\r
+bool f64_eq( float64_t, float64_t );\r
+bool f64_le( float64_t, float64_t );\r
+bool f64_lt( float64_t, float64_t );\r
+bool f64_eq_signaling( float64_t, float64_t );\r
+bool f64_le_quiet( float64_t, float64_t );\r
+bool f64_lt_quiet( float64_t, float64_t );\r
+bool f64_isSignalingNaN( float64_t );\r
+\r
+/*----------------------------------------------------------------------------\r
+| Extended double-precision rounding precision. Valid values are 32, 64, and\r
+| 80.\r
+*----------------------------------------------------------------------------*/\r
+extern int_fast8_t floatx80_roundingPrecision;\r
+\r
+/*----------------------------------------------------------------------------\r
+| Extended double-precision floating-point operations.\r
+*----------------------------------------------------------------------------*/\r
+uint_fast32_t fx80_to_ui32( floatx80_t, int_fast8_t, bool );\r
+uint_fast64_t fx80_to_ui64( floatx80_t, int_fast8_t, bool );\r
+int_fast32_t fx80_to_i32( floatx80_t, int_fast8_t, bool );\r
+int_fast64_t fx80_to_i64( floatx80_t, int_fast8_t, bool );\r
+uint_fast32_t fx80_to_ui32_r_minMag( floatx80_t, bool );\r
+uint_fast64_t fx80_to_ui64_r_minMag( floatx80_t, bool );\r
+int_fast32_t fx80_to_i32_r_minMag( floatx80_t, bool );\r
+int_fast64_t fx80_to_i64_r_minMag( floatx80_t, bool );\r
+float32_t fx80_to_f32( floatx80_t );\r
+float64_t fx80_to_f64( floatx80_t );\r
+float128_t fx80_to_f128( floatx80_t );\r
+floatx80_t fx80_roundToInt( floatx80_t, int_fast8_t, bool );\r
+floatx80_t fx80_add( floatx80_t, floatx80_t );\r
+floatx80_t fx80_sub( floatx80_t, floatx80_t );\r
+floatx80_t fx80_mul( floatx80_t, floatx80_t );\r
+floatx80_t fx80_mulAdd( floatx80_t, floatx80_t, floatx80_t );\r
+floatx80_t fx80_div( floatx80_t, floatx80_t );\r
+floatx80_t fx80_rem( floatx80_t, floatx80_t );\r
+floatx80_t fx80_sqrt( floatx80_t );\r
+bool fx80_eq( floatx80_t, floatx80_t );\r
+bool fx80_le( floatx80_t, floatx80_t );\r
+bool fx80_lt( floatx80_t, floatx80_t );\r
+bool fx80_eq_signaling( floatx80_t, floatx80_t );\r
+bool fx80_le_quiet( floatx80_t, floatx80_t );\r
+bool fx80_lt_quiet( floatx80_t, floatx80_t );\r
+bool fx80_isSignalingNaN( floatx80_t );\r
+\r
+/*----------------------------------------------------------------------------\r
+| 128-bit (quadruple-precision) floating-point operations.\r
+*----------------------------------------------------------------------------*/\r
+uint_fast32_t f128_to_ui32( float128_t, int_fast8_t, bool );\r
+uint_fast64_t f128_to_ui64( float128_t, int_fast8_t, bool );\r
+int_fast32_t f128_to_i32( float128_t, int_fast8_t, bool );\r
+int_fast64_t f128_to_i64( float128_t, int_fast8_t, bool );\r
+uint_fast32_t f128_to_ui32_r_minMag( float128_t, bool );\r
+uint_fast64_t f128_to_ui64_r_minMag( float128_t, bool );\r
+int_fast32_t f128_to_i32_r_minMag( float128_t, bool );\r
+int_fast64_t f128_to_i64_r_minMag( float128_t, bool );\r
+float32_t f128_to_f32( float128_t );\r
+float64_t f128_to_f64( float128_t );\r
+floatx80_t f128_to_fx80( float128_t );\r
+float128_t f128_roundToInt( float128_t, int_fast8_t, bool );\r
+float128_t f128_add( float128_t, float128_t );\r
+float128_t f128_sub( float128_t, float128_t );\r
+float128_t f128_mul( float128_t, float128_t );\r
+float128_t f128_mulAdd( float128_t, float128_t, float128_t );\r
+float128_t f128_div( float128_t, float128_t );\r
+float128_t f128_rem( float128_t, float128_t );\r
+float128_t f128_sqrt( float128_t );\r
+bool f128_eq( float128_t, float128_t );\r
+bool f128_le( float128_t, float128_t );\r
+bool f128_lt( float128_t, float128_t );\r
+bool f128_eq_signaling( float128_t, float128_t );\r
+bool f128_le_quiet( float128_t, float128_t );\r
+bool f128_lt_quiet( float128_t, float128_t );\r
+bool f128_isSignalingNaN( float128_t );\r
+\r
+#endif\r
+\r
--- /dev/null
+\r
+/*** COMMENTS. ***/\r
+\r
+#include <stdint.h>\r
+#include "platform.h"\r
+#include "internals.h"\r
+#include "specialize.h"\r
+#include "softfloat.h"\r
+\r
+/*----------------------------------------------------------------------------\r
+| Floating-point rounding mode, extended double-precision rounding precision,\r
+| and exception flags.\r
+*----------------------------------------------------------------------------*/\r
+int_fast8_t softfloat_roundingMode = softfloat_round_nearest_even;\r
+int_fast8_t softfloat_detectTininess = init_detectTininess;\r
+int_fast8_t softfloat_exceptionFlags = 0;\r
+\r
+int_fast8_t floatx80_roundingPrecision = 80;\r
+\r
--- /dev/null
+\r
+#include <stdint.h>\r
+#include "platform.h"\r
+#include "primitives.h"\r
+#include "internals.h"\r
+#include "softfloat.h"\r
+\r
+float32_t ui32_to_f32( uint_fast32_t a )\r
+{\r
+ union ui32_f32 uZ;\r
+\r
+ if ( ! a ) {\r
+ uZ.ui = 0;\r
+ return uZ.f;\r
+ }\r
+ if ( a & 0x80000000 ) {\r
+ return\r
+ softfloat_roundPackToF32(\r
+ 0, 0x9D, softfloat_shortShift32Right1Jam( a ) );\r
+ } else {\r
+ return softfloat_normRoundPackToF32( 0, 0x9C, a );\r
+ }\r
+\r
+}\r
+\r
--- /dev/null
+\r
+#include <stdint.h>\r
+#include "platform.h"\r
+#include "primitives.h"\r
+#include "internals.h"\r
+#include "softfloat.h"\r
+\r
+float64_t ui32_to_f64( uint_fast32_t a )\r
+{\r
+ uint_fast64_t uiZ;\r
+ int shiftCount;\r
+ union ui64_f64 uZ;\r
+\r
+ if ( ! a ) {\r
+ uiZ = 0;\r
+ } else {\r
+ shiftCount = softfloat_countLeadingZeros32( a ) + 21;\r
+ uiZ =\r
+ packToF64UI(\r
+ 0, 0x432 - shiftCount, (uint_fast64_t) a<<shiftCount );\r
+ }\r
+ uZ.ui = uiZ;\r
+ return uZ.f;\r
+\r
+}\r
+\r
--- /dev/null
+\r
+#include <stdint.h>\r
+#include "platform.h"\r
+#include "primitives.h"\r
+#include "internals.h"\r
+#include "softfloat.h"\r
+\r
+float32_t ui64_to_f32( uint_fast64_t a )\r
+{\r
+ int shiftCount;\r
+ union ui32_f32 u;\r
+ uint_fast32_t sig;\r
+\r
+ shiftCount = softfloat_countLeadingZeros64( a ) - 40;\r
+ if ( 0 <= shiftCount ) {\r
+ u.ui =\r
+ a ? packToF32UI(\r
+ 0, 0x95 - shiftCount, (uint_fast32_t) a<<shiftCount )\r
+ : 0;\r
+ return u.f;\r
+ } else {\r
+ shiftCount += 7;\r
+ sig =\r
+ ( shiftCount < 0 )\r
+ ? softfloat_shortShift64RightJam( a, - shiftCount )\r
+ : (uint_fast32_t) a<<shiftCount;\r
+ return softfloat_roundPackToF32( 0, 0x9C - shiftCount, sig );\r
+ }\r
+\r
+}\r
+\r
--- /dev/null
+\r
+#include <stdint.h>\r
+#include "platform.h"\r
+#include "primitives.h"\r
+#include "internals.h"\r
+#include "softfloat.h"\r
+\r
+float64_t ui64_to_f64( uint_fast64_t a )\r
+{\r
+ union ui64_f64 uZ;\r
+\r
+ if ( ! a ) {\r
+ uZ.ui = 0;\r
+ return uZ.f;\r
+ }\r
+ if ( a & UINT64_C( 0x8000000000000000 ) ) {\r
+ return\r
+ softfloat_roundPackToF64(\r
+ 0, 0x43D, softfloat_shortShift64RightJam( a, 1 ) );\r
+ } else {\r
+ return softfloat_normRoundPackToF64( 0, 0x43C, a );\r
+ }\r
+\r
+}\r
+\r