From: Sebastien Bourdeauducq Date: Mon, 28 May 2012 17:41:31 +0000 (+0200) Subject: software/libbase: use compiler-rt X-Git-Tag: 24jan2021_ls180~3151 X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=8e03ea26d62a6168c6722691a3e57dd22ef1281e;p=litex.git software/libbase: use compiler-rt --- diff --git a/software/bios/Makefile b/software/bios/Makefile index 325ba57d..4d4fcdd4 100644 --- a/software/bios/Makefile +++ b/software/bios/Makefile @@ -18,7 +18,10 @@ bios.elf: linker.ld $(OBJECTS) libs bios-rescue.elf: linker-rescue.ld $(OBJECTS) libs %.elf: - $(LD) $(LDFLAGS) -T $< -N -o $@ $(OBJECTS) -L$(M2DIR)/software/libbase -lbase + $(LD) $(LDFLAGS) -T $< -N -o $@ $(OBJECTS) \ + -L$(M2DIR)/software/libbase \ + -L$(CRTDIR) \ + -lbase -lcompiler_rt chmod -x $@ libs: diff --git a/software/libbase/Makefile b/software/libbase/Makefile index cf07ad2f..b5a5ab12 100644 --- a/software/libbase/Makefile +++ b/software/libbase/Makefile @@ -1,7 +1,7 @@ M2DIR=../.. include $(M2DIR)/software/common.mak -OBJECTS=divsi3.o setjmp.o libc.o crc16.o crc32.o console.o timer.o system.o board.o uart.o softfloat.o softfloat-glue.o vsnprintf.o strtod.o +OBJECTS=setjmp.o libc.o crc16.o crc32.o console.o timer.o system.o board.o uart.o vsnprintf.o strtod.o all: libbase.a diff --git a/software/libbase/divsi3.c b/software/libbase/divsi3.c deleted file mode 100644 index 0e98556b..00000000 --- a/software/libbase/divsi3.c +++ /dev/null @@ -1,55 +0,0 @@ -#define divnorm(num, den, sign) \ -{ \ - if(num < 0) \ - { \ - num = -num; \ - sign = 1; \ - } \ - else \ - { \ - sign = 0; \ - } \ - \ - if(den < 0) \ - { \ - den = - den; \ - sign = 1 - sign; \ - } \ -} - -#define exitdiv(sign, res) if (sign) { res = - res;} return res; - -long __divsi3 (long numerator, long denominator); -long __divsi3 (long numerator, long denominator) -{ - int sign; - long dividend; - - divnorm(numerator, denominator, sign); - - dividend = (unsigned int)numerator/(unsigned int)denominator; - exitdiv(sign, dividend); -} - -long __modsi3 (long numerator, long denominator); -long __modsi3 (long numerator, long denominator) -{ - int sign; - long res; - - if(numerator < 0) { - numerator = -numerator; - sign = 1; - } else - sign = 0; - - if(denominator < 0) - denominator = -denominator; - - res = (unsigned int)numerator % (unsigned int)denominator; - - if(sign) - return -res; - else - return res; -} diff --git a/software/libbase/milieu.h b/software/libbase/milieu.h deleted file mode 100644 index fd5d8145..00000000 --- a/software/libbase/milieu.h +++ /dev/null @@ -1,112 +0,0 @@ - -/*============================================================================ - -This C header file is part of the SoftFloat IEC/IEEE Floating-point Arithmetic -Package, Release 2b. - -Written by John R. Hauser. This work was made possible in part by the -International Computer Science Institute, located at Suite 600, 1947 Center -Street, Berkeley, California 94704. Funding was partially provided by the -National Science Foundation under grant MIP-9311980. The original version -of this code was written as part of a project to build a fixed-point vector -processor in collaboration with the University of California at Berkeley, -overseen by Profs. Nelson Morgan and John Wawrzynek. More information -is available through the Web page `http://www.cs.berkeley.edu/~jhauser/ -arithmetic/SoftFloat.html'. - -THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort has -been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT TIMES -RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO PERSONS -AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ALL LOSSES, -COSTS, OR OTHER PROBLEMS THEY INCUR DUE TO THE SOFTWARE, AND WHO FURTHERMORE -EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER SCIENCE -INSTITUTE (possibly via similar legal warning) AGAINST ALL LOSSES, COSTS, OR -OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE SOFTWARE. - -Derivative works are acceptable, even for commercial purposes, so long as -(1) the source code for the derivative work includes prominent notice that -the work is derivative, and (2) the source code includes prominent notice with -these four paragraphs for those parts of this code that are retained. - -=============================================================================*/ - -/*---------------------------------------------------------------------------- -| Include common integer types and flags. -*----------------------------------------------------------------------------*/ - -/*---------------------------------------------------------------------------- -| One of the macros `BIGENDIAN' or `LITTLEENDIAN' must be defined. -*----------------------------------------------------------------------------*/ -#define BIGENDIAN - -/*---------------------------------------------------------------------------- -| The macro `BITS64' can be defined to indicate that 64-bit integer types are -| supported by the compiler. -*----------------------------------------------------------------------------*/ -//#define BITS64 - -/*---------------------------------------------------------------------------- -| Each of the following `typedef's defines the most convenient type that holds -| integers of at least as many bits as specified. For example, `uint8' should -| be the most convenient type that can hold unsigned integers of as many as -| 8 bits. The `flag' type must be able to hold either a 0 or 1. For most -| implementations of C, `flag', `uint8', and `int8' should all be `typedef'ed -| to the same as `int'. -*----------------------------------------------------------------------------*/ -typedef int flag; -typedef int uint8; -typedef int int8; -typedef int uint16; -typedef int int16; -typedef unsigned int uint32; -typedef signed int int32; -#ifdef BITS64 -typedef unsigned long long int uint64; -typedef signed long long int int64; -#endif - -/*---------------------------------------------------------------------------- -| Each of the following `typedef's defines a type that holds integers -| of _exactly_ the number of bits specified. For instance, for most -| implementation of C, `bits16' and `sbits16' should be `typedef'ed to -| `unsigned short int' and `signed short int' (or `short int'), respectively. -*----------------------------------------------------------------------------*/ -typedef unsigned char bits8; -typedef signed char sbits8; -typedef unsigned short int bits16; -typedef signed short int sbits16; -typedef unsigned int bits32; -typedef signed int sbits32; -#ifdef BITS64 -typedef unsigned long long int bits64; -typedef signed long long int sbits64; -#endif - -#ifdef BITS64 -/*---------------------------------------------------------------------------- -| The `LIT64' macro takes as its argument a textual integer literal and -| if necessary ``marks'' the literal as having a 64-bit integer type. -| For example, the GNU C Compiler (`gcc') requires that 64-bit literals be -| appended with the letters `LL' standing for `long long', which is `gcc's -| name for the 64-bit integer type. Some compilers may allow `LIT64' to be -| defined as the identity macro: `#define LIT64( a ) a'. -*----------------------------------------------------------------------------*/ -#define LIT64( a ) a##LL -#endif - -/*---------------------------------------------------------------------------- -| The macro `INLINE' can be used before functions that should be inlined. If -| a compiler does not support explicit inlining, this macro should be defined -| to be `static'. -*----------------------------------------------------------------------------*/ -#define INLINE extern inline - - -/*---------------------------------------------------------------------------- -| Symbolic Boolean literals. -*----------------------------------------------------------------------------*/ -enum { - FALSE = 0, - TRUE = 1 -}; - diff --git a/software/libbase/softfloat-glue.c b/software/libbase/softfloat-glue.c deleted file mode 100644 index 44616373..00000000 --- a/software/libbase/softfloat-glue.c +++ /dev/null @@ -1,274 +0,0 @@ -/* $NetBSD: fplib_glue.c,v 1.2 2000/02/22 01:18:28 mycroft Exp $ */ - -/*- - * Copyright (c) 1997 The NetBSD Foundation, Inc. - * All rights reserved. - * - * This code is derived from software contributed to The NetBSD Foundation - * by Neil A. Carson and Mark Brinicombe - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by the NetBSD - * Foundation, Inc. and its contributors. - * 4. Neither the name of The NetBSD Foundation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS - * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED - * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS - * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#include "milieu.h" -#include "softfloat.h" - -int __eqsf2(float32 a,float32 b); -int __eqdf2(float64 a,float64 b); -int __nesf2(float32 a,float32 b); -int __nedf2(float64 a,float64 b); -int __gtsf2(float32 a,float32 b); -int __gtdf2(float64 a,float64 b); -int __gesf2(float32 a,float32 b); -int __gedf2(float64 a,float64 b); -int __ltsf2(float32 a,float32 b); -int __ltdf2(float64 a,float64 b); -int __lesf2(float32 a,float32 b); -int __ledf2(float64 a,float64 b); -float32 __negsf2(float32 a); -float64 __negdf2(float64 a); - -/********************************* COMPARISONS ********************************/ - -/* - * 'Equal' wrapper. This returns 0 if the numbers are equal, or (1 | -1) - * otherwise. So we need to invert the output. - */ - -int __eqsf2(float32 a,float32 b) { - return float32_eq(a,b)?0:1; -} - -int __eqdf2(float64 a,float64 b) { - return float64_eq(a,b)?0:1; -} - -/* - * 'Not Equal' wrapper. This returns -1 or 1 (say, 1!) if the numbers are - * not equal, 0 otherwise. However no not equal call is provided, so we have - * to use an 'equal' call and invert the result. The result is already - * inverted though! Confusing?! - */ -int __nesf2(float32 a,float32 b) { - return float32_eq(a,b)?0:-1; -} - -int __nedf2(float64 a,float64 b) { - return float64_eq(a,b)?0:-1; -} - -/* - * 'Greater Than' wrapper. This returns 1 if the number is greater, 0 - * or -1 otherwise. Unfortunately, no such function exists. We have to - * instead compare the numbers using the 'less than' calls in order to - * make up our mind. This means that we can call 'less than or equal' and - * invert the result. - */ -int __gtsf2(float32 a,float32 b) { - return float32_le(a,b)?0:1; -} - -int __gtdf2(float64 a,float64 b) { - return float64_le(a,b)?0:1; -} - -/* - * 'Greater Than or Equal' wrapper. We emulate this by inverting the result - * of a 'less than' call. - */ -int __gesf2(float32 a,float32 b) { - return float32_lt(a,b)?-1:0; -} - -int __gedf2(float64 a,float64 b) { - return float64_lt(a,b)?-1:0; -} - -/* - * 'Less Than' wrapper. A 1 from the ARM code needs to be turned into -1. - */ -int __ltsf2(float32 a,float32 b) { - return float32_lt(a,b)?-1:0; -} - -int __ltdf2(float64 a,float64 b) { - return float64_lt(a,b)?-1:0; -} - -/* - * 'Less Than or Equal' wrapper. A 0 must turn into a 1, and a 1 into a 0. - */ -int __lesf2(float32 a,float32 b) { - return float32_le(a,b)?0:1; -} - -int __ledf2(float64 a,float64 b) { - return float64_le(a,b)?0:1; -} - -/* - * Float negate... This isn't provided by the library, but it's hardly the - * hardest function in the world to write... :) In fact, because of the - * position in the registers of arguments, the double precision version can - * go here too ;-) - */ -float32 __negsf2(float32 a) { - return (a ^ 0x80000000); -} - -float64 __negdf2(float64 a) { - a.high ^= 0x80000000; - return a; -} - -/* - * 32-bit operations. This is not BSD code. - */ -float32 __addsf3(float32 a, float32 b); -float32 __addsf3(float32 a, float32 b) -{ - return float32_add(a, b); -} - -float32 __subsf3(float32 a, float32 b); -float32 __subsf3(float32 a, float32 b) -{ - return float32_sub(a, b); -} - -float32 __mulsf3(float32 a, float32 b); -float32 __mulsf3(float32 a, float32 b) -{ - return float32_mul(a, b); -} - -float32 __divsf3(float32 a, float32 b); -float32 __divsf3(float32 a, float32 b) -{ - return float32_div(a, b); -} - -float32 __floatsisf(int32 x); -float32 __floatsisf(int32 x) -{ - return int32_to_float32(x); -} - -float32 __floatunsisf(int32 x); -float32 __floatunsisf(int32 x) -{ - return int32_to_float32(x); // XXX -} - -int32 __fixsfsi(float32 x); -int32 __fixsfsi(float32 x) -{ - return float32_to_int32_round_to_zero(x); -} - -uint32 __fixunssfsi(float32 x); -uint32 __fixunssfsi(float32 x) -{ - return float32_to_int32_round_to_zero(x); // XXX -} - -flag __unordsf2(float32 a, float32 b); -flag __unordsf2(float32 a, float32 b) -{ - /* - * The comparison is unordered if either input is a NaN. - * Test for this by comparing each operand with itself. - * We must perform both comparisons to correctly check for - * signalling NaNs. - */ - return 1 ^ (float32_eq(a, a) & float32_eq(b, b)); -} - -/* - * 64-bit operations. This is not BSD code. - */ -float64 __adddf3(float64 a, float64 b); -float64 __adddf3(float64 a, float64 b) -{ - return float64_add(a, b); -} - -float64 __subdf3(float64 a, float64 b); -float64 __subdf3(float64 a, float64 b) -{ - return float64_sub(a, b); -} - -float64 __muldf3(float64 a, float64 b); -float64 __muldf3(float64 a, float64 b) -{ - return float64_mul(a, b); -} - -float64 __divdf3(float64 a, float64 b); -float64 __divdf3(float64 a, float64 b) -{ - return float64_div(a, b); -} - -float64 __floatsidf(int32 x); -float64 __floatsidf(int32 x) -{ - return int32_to_float64(x); -} - -float64 __floatunsidf(int32 x); -float64 __floatunsidf(int32 x) -{ - return int32_to_float64(x); // XXX -} - -int32 __fixdfsi(float64 x); -int32 __fixdfsi(float64 x) -{ - return float64_to_int32_round_to_zero(x); -} - -uint32 __fixunsdfsi(float64 x); -uint32 __fixunsdfsi(float64 x) -{ - return float64_to_int32_round_to_zero(x); // XXX -} - -flag __unorddf2(float64 a, float64 b); -flag __unorddf2(float64 a, float64 b) -{ - /* - * The comparison is unordered if either input is a NaN. - * Test for this by comparing each operand with itself. - * We must perform both comparisons to correctly check for - * signalling NaNs. - */ - return 1 ^ (float64_eq(a, a) & float64_eq(b, b)); -} diff --git a/software/libbase/softfloat-macros.h b/software/libbase/softfloat-macros.h deleted file mode 100644 index b4f74486..00000000 --- a/software/libbase/softfloat-macros.h +++ /dev/null @@ -1,627 +0,0 @@ - -/*============================================================================ - -This C source fragment is part of the SoftFloat IEC/IEEE Floating-point -Arithmetic Package, Release 2b. - -Written by John R. Hauser. This work was made possible in part by the -International Computer Science Institute, located at Suite 600, 1947 Center -Street, Berkeley, California 94704. Funding was partially provided by the -National Science Foundation under grant MIP-9311980. The original version -of this code was written as part of a project to build a fixed-point vector -processor in collaboration with the University of California at Berkeley, -overseen by Profs. Nelson Morgan and John Wawrzynek. More information -is available through the Web page `http://www.cs.berkeley.edu/~jhauser/ -arithmetic/SoftFloat.html'. - -THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort has -been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT TIMES -RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO PERSONS -AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ALL LOSSES, -COSTS, OR OTHER PROBLEMS THEY INCUR DUE TO THE SOFTWARE, AND WHO FURTHERMORE -EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER SCIENCE -INSTITUTE (possibly via similar legal warning) AGAINST ALL LOSSES, COSTS, OR -OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE SOFTWARE. - -Derivative works are acceptable, even for commercial purposes, so long as -(1) the source code for the derivative work includes prominent notice that -the work is derivative, and (2) the source code includes prominent notice with -these four paragraphs for those parts of this code that are retained. - -=============================================================================*/ - -/*---------------------------------------------------------------------------- -| Shifts `a' right by the number of bits given in `count'. If any nonzero -| bits are shifted off, they are ``jammed'' into the least significant bit of -| the result by setting the least significant bit to 1. The value of `count' -| can be arbitrarily large; in particular, if `count' is greater than 32, the -| result will be either 0 or 1, depending on whether `a' is zero or nonzero. -| The result is stored in the location pointed to by `zPtr'. -*----------------------------------------------------------------------------*/ - -INLINE void shift32RightJamming( bits32 a, int16 count, bits32 *zPtr ) -{ - bits32 z; - - if ( count == 0 ) { - z = a; - } - else if ( count < 32 ) { - z = ( a>>count ) | ( ( a<<( ( - count ) & 31 ) ) != 0 ); - } - else { - z = ( a != 0 ); - } - *zPtr = z; - -} - -/*---------------------------------------------------------------------------- -| Shifts the 64-bit value formed by concatenating `a0' and `a1' right by the -| number of bits given in `count'. Any bits shifted off are lost. The value -| of `count' can be arbitrarily large; in particular, if `count' is greater -| than 64, the result will be 0. The result is broken into two 32-bit pieces -| which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'. -*----------------------------------------------------------------------------*/ - -INLINE void - shift64Right( - bits32 a0, bits32 a1, int16 count, bits32 *z0Ptr, bits32 *z1Ptr ) -{ - bits32 z0, z1; - int8 negCount = ( - count ) & 31; - - if ( count == 0 ) { - z1 = a1; - z0 = a0; - } - else if ( count < 32 ) { - z1 = ( a0<>count ); - z0 = a0>>count; - } - else { - z1 = ( count < 64 ) ? ( a0>>( count & 31 ) ) : 0; - z0 = 0; - } - *z1Ptr = z1; - *z0Ptr = z0; - -} - -/*---------------------------------------------------------------------------- -| Shifts the 64-bit value formed by concatenating `a0' and `a1' right by the -| number of bits given in `count'. If any nonzero bits are shifted off, they -| are ``jammed'' into the least significant bit of the result by setting the -| least significant bit to 1. The value of `count' can be arbitrarily large; -| in particular, if `count' is greater than 64, the result will be either 0 -| or 1, depending on whether the concatenation of `a0' and `a1' is zero or -| nonzero. The result is broken into two 32-bit pieces which are stored at -| the locations pointed to by `z0Ptr' and `z1Ptr'. -*----------------------------------------------------------------------------*/ - -INLINE void - shift64RightJamming( - bits32 a0, bits32 a1, int16 count, bits32 *z0Ptr, bits32 *z1Ptr ) -{ - bits32 z0, z1; - int8 negCount = ( - count ) & 31; - - if ( count == 0 ) { - z1 = a1; - z0 = a0; - } - else if ( count < 32 ) { - z1 = ( a0<>count ) | ( ( a1<>count; - } - else { - if ( count == 32 ) { - z1 = a0 | ( a1 != 0 ); - } - else if ( count < 64 ) { - z1 = ( a0>>( count & 31 ) ) | ( ( ( a0<>count ); - z0 = a0>>count; - } - else { - if ( count == 32 ) { - z2 = a1; - z1 = a0; - } - else { - a2 |= a1; - if ( count < 64 ) { - z2 = a0<>( count & 31 ); - } - else { - z2 = ( count == 64 ) ? a0 : ( a0 != 0 ); - z1 = 0; - } - } - z0 = 0; - } - z2 |= ( a2 != 0 ); - } - *z2Ptr = z2; - *z1Ptr = z1; - *z0Ptr = z0; - -} - -/*---------------------------------------------------------------------------- -| Shifts the 64-bit value formed by concatenating `a0' and `a1' left by the -| number of bits given in `count'. Any bits shifted off are lost. The value -| of `count' must be less than 32. The result is broken into two 32-bit -| pieces which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'. -*----------------------------------------------------------------------------*/ - -INLINE void - shortShift64Left( - bits32 a0, bits32 a1, int16 count, bits32 *z0Ptr, bits32 *z1Ptr ) -{ - - *z1Ptr = a1<>( ( - count ) & 31 ) ); - -} - -/*---------------------------------------------------------------------------- -| Shifts the 96-bit value formed by concatenating `a0', `a1', and `a2' left -| by the number of bits given in `count'. Any bits shifted off are lost. -| The value of `count' must be less than 32. The result is broken into three -| 32-bit pieces which are stored at the locations pointed to by `z0Ptr', -| `z1Ptr', and `z2Ptr'. -*----------------------------------------------------------------------------*/ - -INLINE void - shortShift96Left( - bits32 a0, - bits32 a1, - bits32 a2, - int16 count, - bits32 *z0Ptr, - bits32 *z1Ptr, - bits32 *z2Ptr - ) -{ - bits32 z0, z1, z2; - int8 negCount; - - z2 = a2<>negCount; - z0 |= a1>>negCount; - } - *z2Ptr = z2; - *z1Ptr = z1; - *z0Ptr = z0; - -} - -/*---------------------------------------------------------------------------- -| Adds the 64-bit value formed by concatenating `a0' and `a1' to the 64-bit -| value formed by concatenating `b0' and `b1'. Addition is modulo 2^64, so -| any carry out is lost. The result is broken into two 32-bit pieces which -| are stored at the locations pointed to by `z0Ptr' and `z1Ptr'. -*----------------------------------------------------------------------------*/ - -INLINE void - add64( - bits32 a0, bits32 a1, bits32 b0, bits32 b1, bits32 *z0Ptr, bits32 *z1Ptr ) -{ - bits32 z1; - - z1 = a1 + b1; - *z1Ptr = z1; - *z0Ptr = a0 + b0 + ( z1 < a1 ); - -} - -/*---------------------------------------------------------------------------- -| Adds the 96-bit value formed by concatenating `a0', `a1', and `a2' to the -| 96-bit value formed by concatenating `b0', `b1', and `b2'. Addition is -| modulo 2^96, so any carry out is lost. The result is broken into three -| 32-bit pieces which are stored at the locations pointed to by `z0Ptr', -| `z1Ptr', and `z2Ptr'. -*----------------------------------------------------------------------------*/ - -INLINE void - add96( - bits32 a0, - bits32 a1, - bits32 a2, - bits32 b0, - bits32 b1, - bits32 b2, - bits32 *z0Ptr, - bits32 *z1Ptr, - bits32 *z2Ptr - ) -{ - bits32 z0, z1, z2; - int8 carry0, carry1; - - z2 = a2 + b2; - carry1 = ( z2 < a2 ); - z1 = a1 + b1; - carry0 = ( z1 < a1 ); - z0 = a0 + b0; - z1 += carry1; - z0 += ( z1 < carry1 ); - z0 += carry0; - *z2Ptr = z2; - *z1Ptr = z1; - *z0Ptr = z0; - -} - -/*---------------------------------------------------------------------------- -| Subtracts the 64-bit value formed by concatenating `b0' and `b1' from the -| 64-bit value formed by concatenating `a0' and `a1'. Subtraction is modulo -| 2^64, so any borrow out (carry out) is lost. The result is broken into two -| 32-bit pieces which are stored at the locations pointed to by `z0Ptr' and -| `z1Ptr'. -*----------------------------------------------------------------------------*/ - -INLINE void - sub64( - bits32 a0, bits32 a1, bits32 b0, bits32 b1, bits32 *z0Ptr, bits32 *z1Ptr ) -{ - - *z1Ptr = a1 - b1; - *z0Ptr = a0 - b0 - ( a1 < b1 ); - -} - -/*---------------------------------------------------------------------------- -| Subtracts the 96-bit value formed by concatenating `b0', `b1', and `b2' from -| the 96-bit value formed by concatenating `a0', `a1', and `a2'. Subtraction -| is modulo 2^96, so any borrow out (carry out) is lost. The result is broken -| into three 32-bit pieces which are stored at the locations pointed to by -| `z0Ptr', `z1Ptr', and `z2Ptr'. -*----------------------------------------------------------------------------*/ - -INLINE void - sub96( - bits32 a0, - bits32 a1, - bits32 a2, - bits32 b0, - bits32 b1, - bits32 b2, - bits32 *z0Ptr, - bits32 *z1Ptr, - bits32 *z2Ptr - ) -{ - bits32 z0, z1, z2; - int8 borrow0, borrow1; - - z2 = a2 - b2; - borrow1 = ( a2 < b2 ); - z1 = a1 - b1; - borrow0 = ( a1 < b1 ); - z0 = a0 - b0; - z0 -= ( z1 < borrow1 ); - z1 -= borrow1; - z0 -= borrow0; - *z2Ptr = z2; - *z1Ptr = z1; - *z0Ptr = z0; - -} - -/*---------------------------------------------------------------------------- -| Multiplies `a' by `b' to obtain a 64-bit product. The product is broken -| into two 32-bit pieces which are stored at the locations pointed to by -| `z0Ptr' and `z1Ptr'. -*----------------------------------------------------------------------------*/ - -INLINE void mul32To64( bits32 a, bits32 b, bits32 *z0Ptr, bits32 *z1Ptr ) -{ - bits16 aHigh, aLow, bHigh, bLow; - bits32 z0, zMiddleA, zMiddleB, z1; - - aLow = a; - aHigh = a>>16; - bLow = b; - bHigh = b>>16; - z1 = ( (bits32) aLow ) * bLow; - zMiddleA = ( (bits32) aLow ) * bHigh; - zMiddleB = ( (bits32) aHigh ) * bLow; - z0 = ( (bits32) aHigh ) * bHigh; - zMiddleA += zMiddleB; - z0 += ( ( (bits32) ( zMiddleA < zMiddleB ) )<<16 ) + ( zMiddleA>>16 ); - zMiddleA <<= 16; - z1 += zMiddleA; - z0 += ( z1 < zMiddleA ); - *z1Ptr = z1; - *z0Ptr = z0; - -} - -/*---------------------------------------------------------------------------- -| Multiplies the 64-bit value formed by concatenating `a0' and `a1' by `b' -| to obtain a 96-bit product. The product is broken into three 32-bit pieces -| which are stored at the locations pointed to by `z0Ptr', `z1Ptr', and -| `z2Ptr'. -*----------------------------------------------------------------------------*/ - -INLINE void - mul64By32To96( - bits32 a0, - bits32 a1, - bits32 b, - bits32 *z0Ptr, - bits32 *z1Ptr, - bits32 *z2Ptr - ) -{ - bits32 z0, z1, z2, more1; - - mul32To64( a1, b, &z1, &z2 ); - mul32To64( a0, b, &z0, &more1 ); - add64( z0, more1, 0, z1, &z0, &z1 ); - *z2Ptr = z2; - *z1Ptr = z1; - *z0Ptr = z0; - -} - -/*---------------------------------------------------------------------------- -| Multiplies the 64-bit value formed by concatenating `a0' and `a1' to the -| 64-bit value formed by concatenating `b0' and `b1' to obtain a 128-bit -| product. The product is broken into four 32-bit pieces which are stored at -| the locations pointed to by `z0Ptr', `z1Ptr', `z2Ptr', and `z3Ptr'. -*----------------------------------------------------------------------------*/ - -INLINE void - mul64To128( - bits32 a0, - bits32 a1, - bits32 b0, - bits32 b1, - bits32 *z0Ptr, - bits32 *z1Ptr, - bits32 *z2Ptr, - bits32 *z3Ptr - ) -{ - bits32 z0, z1, z2, z3; - bits32 more1, more2; - - mul32To64( a1, b1, &z2, &z3 ); - mul32To64( a1, b0, &z1, &more2 ); - add64( z1, more2, 0, z2, &z1, &z2 ); - mul32To64( a0, b0, &z0, &more1 ); - add64( z0, more1, 0, z1, &z0, &z1 ); - mul32To64( a0, b1, &more1, &more2 ); - add64( more1, more2, 0, z2, &more1, &z2 ); - add64( z0, z1, 0, more1, &z0, &z1 ); - *z3Ptr = z3; - *z2Ptr = z2; - *z1Ptr = z1; - *z0Ptr = z0; - -} - -/*---------------------------------------------------------------------------- -| Returns an approximation to the 32-bit integer quotient obtained by dividing -| `b' into the 64-bit value formed by concatenating `a0' and `a1'. The -| divisor `b' must be at least 2^31. If q is the exact quotient truncated -| toward zero, the approximation returned lies between q and q + 2 inclusive. -| If the exact quotient q is larger than 32 bits, the maximum positive 32-bit -| unsigned integer is returned. -*----------------------------------------------------------------------------*/ - -static bits32 estimateDiv64To32( bits32 a0, bits32 a1, bits32 b ) -{ - bits32 b0, b1; - bits32 rem0, rem1, term0, term1; - bits32 z; - - if ( b <= a0 ) return 0xFFFFFFFF; - b0 = b>>16; - z = ( b0<<16 <= a0 ) ? 0xFFFF0000 : ( a0 / b0 )<<16; - mul32To64( b, z, &term0, &term1 ); - sub64( a0, a1, term0, term1, &rem0, &rem1 ); - while ( ( (sbits32) rem0 ) < 0 ) { - z -= 0x10000; - b1 = b<<16; - add64( rem0, rem1, b0, b1, &rem0, &rem1 ); - } - rem0 = ( rem0<<16 ) | ( rem1>>16 ); - z |= ( b0<<16 <= rem0 ) ? 0xFFFF : rem0 / b0; - return z; - -} - -/*---------------------------------------------------------------------------- -| Returns an approximation to the square root of the 32-bit significand given -| by `a'. Considered as an integer, `a' must be at least 2^31. If bit 0 of -| `aExp' (the least significant bit) is 1, the integer returned approximates -| 2^31*sqrt(`a'/2^31), where `a' is considered an integer. If bit 0 of `aExp' -| is 0, the integer returned approximates 2^31*sqrt(`a'/2^30). In either -| case, the approximation returned lies strictly within +/-2 of the exact -| value. -*----------------------------------------------------------------------------*/ - -static bits32 estimateSqrt32( int16 aExp, bits32 a ) -{ - static const bits16 sqrtOddAdjustments[] = { - 0x0004, 0x0022, 0x005D, 0x00B1, 0x011D, 0x019F, 0x0236, 0x02E0, - 0x039C, 0x0468, 0x0545, 0x0631, 0x072B, 0x0832, 0x0946, 0x0A67 - }; - static const bits16 sqrtEvenAdjustments[] = { - 0x0A2D, 0x08AF, 0x075A, 0x0629, 0x051A, 0x0429, 0x0356, 0x029E, - 0x0200, 0x0179, 0x0109, 0x00AF, 0x0068, 0x0034, 0x0012, 0x0002 - }; - int8 index; - bits32 z; - - index = ( a>>27 ) & 15; - if ( aExp & 1 ) { - z = 0x4000 + ( a>>17 ) - sqrtOddAdjustments[ index ]; - z = ( ( a / z )<<14 ) + ( z<<15 ); - a >>= 1; - } - else { - z = 0x8000 + ( a>>17 ) - sqrtEvenAdjustments[ index ]; - z = a / z + z; - z = ( 0x20000 <= z ) ? 0xFFFF8000 : ( z<<15 ); - if ( z <= a ) return (bits32) ( ( (sbits32) a )>>1 ); - } - return ( ( estimateDiv64To32( a, 0, z ) )>>1 ) + ( z>>1 ); - -} - -/*---------------------------------------------------------------------------- -| Returns the number of leading 0 bits before the most-significant 1 bit of -| `a'. If `a' is zero, 32 is returned. -*----------------------------------------------------------------------------*/ - -static int8 countLeadingZeros32( bits32 a ) -{ - static const int8 countLeadingZerosHigh[] = { - 8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4, - 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 - }; - int8 shiftCount; - - shiftCount = 0; - if ( a < 0x10000 ) { - shiftCount += 16; - a <<= 16; - } - if ( a < 0x1000000 ) { - shiftCount += 8; - a <<= 8; - } - shiftCount += countLeadingZerosHigh[ a>>24 ]; - return shiftCount; - -} - -/*---------------------------------------------------------------------------- -| Returns 1 if the 64-bit value formed by concatenating `a0' and `a1' is -| equal to the 64-bit value formed by concatenating `b0' and `b1'. Otherwise, -| returns 0. -*----------------------------------------------------------------------------*/ - -INLINE flag eq64( bits32 a0, bits32 a1, bits32 b0, bits32 b1 ) -{ - - return ( a0 == b0 ) && ( a1 == b1 ); - -} - -/*---------------------------------------------------------------------------- -| Returns 1 if the 64-bit value formed by concatenating `a0' and `a1' is less -| than or equal to the 64-bit value formed by concatenating `b0' and `b1'. -| Otherwise, returns 0. -*----------------------------------------------------------------------------*/ - -INLINE flag le64( bits32 a0, bits32 a1, bits32 b0, bits32 b1 ) -{ - - return ( a0 < b0 ) || ( ( a0 == b0 ) && ( a1 <= b1 ) ); - -} - -/*---------------------------------------------------------------------------- -| Returns 1 if the 64-bit value formed by concatenating `a0' and `a1' is less -| than the 64-bit value formed by concatenating `b0' and `b1'. Otherwise, -| returns 0. -*----------------------------------------------------------------------------*/ - -INLINE flag lt64( bits32 a0, bits32 a1, bits32 b0, bits32 b1 ) -{ - - return ( a0 < b0 ) || ( ( a0 == b0 ) && ( a1 < b1 ) ); - -} - -/*---------------------------------------------------------------------------- -| Returns 1 if the 64-bit value formed by concatenating `a0' and `a1' is not -| equal to the 64-bit value formed by concatenating `b0' and `b1'. Otherwise, -| returns 0. -*----------------------------------------------------------------------------*/ - -INLINE flag ne64( bits32 a0, bits32 a1, bits32 b0, bits32 b1 ) -{ - - return ( a0 != b0 ) || ( a1 != b1 ); - -} - diff --git a/software/libbase/softfloat-specialize.h b/software/libbase/softfloat-specialize.h deleted file mode 100644 index 8b830a55..00000000 --- a/software/libbase/softfloat-specialize.h +++ /dev/null @@ -1,242 +0,0 @@ - -/*============================================================================ - -This C source fragment is part of the SoftFloat IEC/IEEE Floating-point -Arithmetic Package, Release 2b. - -Written by John R. Hauser. This work was made possible in part by the -International Computer Science Institute, located at Suite 600, 1947 Center -Street, Berkeley, California 94704. Funding was partially provided by the -National Science Foundation under grant MIP-9311980. The original version -of this code was written as part of a project to build a fixed-point vector -processor in collaboration with the University of California at Berkeley, -overseen by Profs. Nelson Morgan and John Wawrzynek. More information -is available through the Web page `http://www.cs.berkeley.edu/~jhauser/ -arithmetic/SoftFloat.html'. - -THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort has -been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT TIMES -RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO PERSONS -AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ALL LOSSES, -COSTS, OR OTHER PROBLEMS THEY INCUR DUE TO THE SOFTWARE, AND WHO FURTHERMORE -EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER SCIENCE -INSTITUTE (possibly via similar legal warning) AGAINST ALL LOSSES, COSTS, OR -OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE SOFTWARE. - -Derivative works are acceptable, even for commercial purposes, so long as -(1) the source code for the derivative work includes prominent notice that -the work is derivative, and (2) the source code includes prominent notice with -these four paragraphs for those parts of this code that are retained. - -=============================================================================*/ - -/*---------------------------------------------------------------------------- -| Underflow tininess-detection mode, statically initialized to default value. -| (The declaration in `softfloat.h' must match the `int8' type here.) -*----------------------------------------------------------------------------*/ -int8 float_detect_tininess = float_tininess_after_rounding; - -/*---------------------------------------------------------------------------- -| Raises the exceptions specified by `flags'. Floating-point traps can be -| defined here if desired. It is currently not possible for such a trap -| to substitute a result value. If traps are not implemented, this routine -| should be simply `float_exception_flags |= flags;'. -*----------------------------------------------------------------------------*/ - -void float_raise( int8 flags ) -{ - - float_exception_flags |= flags; - -} - -/*---------------------------------------------------------------------------- -| Internal canonical NaN format. -*----------------------------------------------------------------------------*/ -typedef struct { - flag sign; - bits32 high, low; -} commonNaNT; - -/*---------------------------------------------------------------------------- -| The pattern for a default generated single-precision NaN. -*----------------------------------------------------------------------------*/ -enum { - float32_default_nan = 0xFFFFFFFF -}; - -/*---------------------------------------------------------------------------- -| Returns 1 if the single-precision floating-point value `a' is a NaN; -| otherwise returns 0. -*----------------------------------------------------------------------------*/ - -flag float32_is_nan( float32 a ) -{ - - return ( 0xFF000000 < (bits32) ( a<<1 ) ); - -} - -/*---------------------------------------------------------------------------- -| Returns 1 if the single-precision floating-point value `a' is a signaling -| NaN; otherwise returns 0. -*----------------------------------------------------------------------------*/ - -flag float32_is_signaling_nan( float32 a ) -{ - - return ( ( ( a>>22 ) & 0x1FF ) == 0x1FE ) && ( a & 0x003FFFFF ); - -} - -/*---------------------------------------------------------------------------- -| Returns the result of converting the single-precision floating-point NaN -| `a' to the canonical NaN format. If `a' is a signaling NaN, the invalid -| exception is raised. -*----------------------------------------------------------------------------*/ - -static commonNaNT float32ToCommonNaN( float32 a ) -{ - commonNaNT z; - - if ( float32_is_signaling_nan( a ) ) float_raise( float_flag_invalid ); - z.sign = a>>31; - z.low = 0; - z.high = a<<9; - return z; - -} - -/*---------------------------------------------------------------------------- -| Returns the result of converting the canonical NaN `a' to the single- -| precision floating-point format. -*----------------------------------------------------------------------------*/ - -static float32 commonNaNToFloat32( commonNaNT a ) -{ - - return ( ( (bits32) a.sign )<<31 ) | 0x7FC00000 | ( a.high>>9 ); - -} - -/*---------------------------------------------------------------------------- -| Takes two single-precision floating-point values `a' and `b', one of which -| is a NaN, and returns the appropriate NaN result. If either `a' or `b' is a -| signaling NaN, the invalid exception is raised. -*----------------------------------------------------------------------------*/ - -static float32 propagateFloat32NaN( float32 a, float32 b ) -{ - flag aIsNaN, aIsSignalingNaN, bIsNaN, bIsSignalingNaN; - - aIsNaN = float32_is_nan( a ); - aIsSignalingNaN = float32_is_signaling_nan( a ); - bIsNaN = float32_is_nan( b ); - bIsSignalingNaN = float32_is_signaling_nan( b ); - a |= 0x00400000; - b |= 0x00400000; - if ( aIsSignalingNaN | bIsSignalingNaN ) float_raise( float_flag_invalid ); - if ( aIsNaN ) { - return ( aIsSignalingNaN & bIsNaN ) ? b : a; - } - else { - return b; - } - -} - -/*---------------------------------------------------------------------------- -| The pattern for a default generated double-precision NaN. The `high' and -| `low' values hold the most- and least-significant bits, respectively. -*----------------------------------------------------------------------------*/ -enum { - float64_default_nan_high = 0xFFFFFFFF, - float64_default_nan_low = 0xFFFFFFFF -}; - -/*---------------------------------------------------------------------------- -| Returns 1 if the double-precision floating-point value `a' is a NaN; -| otherwise returns 0. -*----------------------------------------------------------------------------*/ - -flag float64_is_nan( float64 a ) -{ - - return - ( 0xFFE00000 <= (bits32) ( a.high<<1 ) ) - && ( a.low || ( a.high & 0x000FFFFF ) ); - -} - -/*---------------------------------------------------------------------------- -| Returns 1 if the double-precision floating-point value `a' is a signaling -| NaN; otherwise returns 0. -*----------------------------------------------------------------------------*/ - -flag float64_is_signaling_nan( float64 a ) -{ - - return - ( ( ( a.high>>19 ) & 0xFFF ) == 0xFFE ) - && ( a.low || ( a.high & 0x0007FFFF ) ); - -} - -/*---------------------------------------------------------------------------- -| Returns the result of converting the double-precision floating-point NaN -| `a' to the canonical NaN format. If `a' is a signaling NaN, the invalid -| exception is raised. -*----------------------------------------------------------------------------*/ - -static commonNaNT float64ToCommonNaN( float64 a ) -{ - commonNaNT z; - - if ( float64_is_signaling_nan( a ) ) float_raise( float_flag_invalid ); - z.sign = a.high>>31; - shortShift64Left( a.high, a.low, 12, &z.high, &z.low ); - return z; - -} - -/*---------------------------------------------------------------------------- -| Returns the result of converting the canonical NaN `a' to the double- -| precision floating-point format. -*----------------------------------------------------------------------------*/ - -static float64 commonNaNToFloat64( commonNaNT a ) -{ - float64 z; - - shift64Right( a.high, a.low, 12, &z.high, &z.low ); - z.high |= ( ( (bits32) a.sign )<<31 ) | 0x7FF80000; - return z; - -} - -/*---------------------------------------------------------------------------- -| Takes two double-precision floating-point values `a' and `b', one of which -| is a NaN, and returns the appropriate NaN result. If either `a' or `b' is a -| signaling NaN, the invalid exception is raised. -*----------------------------------------------------------------------------*/ - -static float64 propagateFloat64NaN( float64 a, float64 b ) -{ - flag aIsNaN, aIsSignalingNaN, bIsNaN, bIsSignalingNaN; - - aIsNaN = float64_is_nan( a ); - aIsSignalingNaN = float64_is_signaling_nan( a ); - bIsNaN = float64_is_nan( b ); - bIsSignalingNaN = float64_is_signaling_nan( b ); - a.high |= 0x00080000; - b.high |= 0x00080000; - if ( aIsSignalingNaN | bIsSignalingNaN ) float_raise( float_flag_invalid ); - if ( aIsNaN ) { - return ( aIsSignalingNaN & bIsNaN ) ? b : a; - } - else { - return b; - } - -} - diff --git a/software/libbase/softfloat.c b/software/libbase/softfloat.c deleted file mode 100644 index 27a156e4..00000000 --- a/software/libbase/softfloat.c +++ /dev/null @@ -1,2269 +0,0 @@ - -/*============================================================================ - -This C source file is part of the SoftFloat IEC/IEEE Floating-point Arithmetic -Package, Release 2b. - -Written by John R. Hauser. This work was made possible in part by the -International Computer Science Institute, located at Suite 600, 1947 Center -Street, Berkeley, California 94704. Funding was partially provided by the -National Science Foundation under grant MIP-9311980. The original version -of this code was written as part of a project to build a fixed-point vector -processor in collaboration with the University of California at Berkeley, -overseen by Profs. Nelson Morgan and John Wawrzynek. More information -is available through the Web page `http://www.cs.berkeley.edu/~jhauser/ -arithmetic/SoftFloat.html'. - -THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort has -been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT TIMES -RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO PERSONS -AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ALL LOSSES, -COSTS, OR OTHER PROBLEMS THEY INCUR DUE TO THE SOFTWARE, AND WHO FURTHERMORE -EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER SCIENCE -INSTITUTE (possibly via similar legal warning) AGAINST ALL LOSSES, COSTS, OR -OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE SOFTWARE. - -Derivative works are acceptable, even for commercial purposes, so long as -(1) the source code for the derivative work includes prominent notice that -the work is derivative, and (2) the source code includes prominent notice with -these four paragraphs for those parts of this code that are retained. - -=============================================================================*/ - -#include "milieu.h" -#include "softfloat.h" - -/*---------------------------------------------------------------------------- -| Floating-point rounding mode and exception flags. -*----------------------------------------------------------------------------*/ -int8 float_rounding_mode = float_round_nearest_even; -int8 float_exception_flags = 0; - -/*---------------------------------------------------------------------------- -| Primitive arithmetic functions, including multi-word arithmetic, and -| division and square root approximations. (Can be specialized to target if -| desired.) -*----------------------------------------------------------------------------*/ -#include "softfloat-macros.h" - -/*---------------------------------------------------------------------------- -| Functions and definitions to determine: (1) whether tininess for underflow -| is detected before or after rounding by default, (2) what (if anything) -| happens when exceptions are raised, (3) how signaling NaNs are distinguished -| from quiet NaNs, (4) the default generated quiet NaNs, and (4) how NaNs -| are propagated from function inputs to output. These details are target- -| specific. -*----------------------------------------------------------------------------*/ -#include "softfloat-specialize.h" - -/*---------------------------------------------------------------------------- -| Returns the fraction bits of the single-precision floating-point value `a'. -*----------------------------------------------------------------------------*/ - -INLINE bits32 extractFloat32Frac( float32 a ) -{ - - return a & 0x007FFFFF; - -} - -/*---------------------------------------------------------------------------- -| Returns the exponent bits of the single-precision floating-point value `a'. -*----------------------------------------------------------------------------*/ - -INLINE int16 extractFloat32Exp( float32 a ) -{ - - return ( a>>23 ) & 0xFF; - -} - -/*---------------------------------------------------------------------------- -| Returns the sign bit of the single-precision floating-point value `a'. -*----------------------------------------------------------------------------*/ - -INLINE flag extractFloat32Sign( float32 a ) -{ - - return a>>31; - -} - -/*---------------------------------------------------------------------------- -| Normalizes the subnormal single-precision floating-point value represented -| by the denormalized significand `aSig'. The normalized exponent and -| significand are stored at the locations pointed to by `zExpPtr' and -| `zSigPtr', respectively. -*----------------------------------------------------------------------------*/ - -static void - normalizeFloat32Subnormal( bits32 aSig, int16 *zExpPtr, bits32 *zSigPtr ) -{ - int8 shiftCount; - - shiftCount = countLeadingZeros32( aSig ) - 8; - *zSigPtr = aSig<>7; - zSig &= ~ ( ( ( roundBits ^ 0x40 ) == 0 ) & roundNearestEven ); - if ( zSig == 0 ) zExp = 0; - return packFloat32( zSign, zExp, zSig ); - -} - -/*---------------------------------------------------------------------------- -| Takes an abstract floating-point value having sign `zSign', exponent `zExp', -| and significand `zSig', and returns the proper single-precision floating- -| point value corresponding to the abstract input. This routine is just like -| `roundAndPackFloat32' except that `zSig' does not have to be normalized. -| Bit 31 of `zSig' must be zero, and `zExp' must be 1 less than the ``true'' -| floating-point exponent. -*----------------------------------------------------------------------------*/ - -static float32 - normalizeRoundAndPackFloat32( flag zSign, int16 zExp, bits32 zSig ) -{ - int8 shiftCount; - - shiftCount = countLeadingZeros32( zSig ) - 1; - return roundAndPackFloat32( zSign, zExp - shiftCount, zSig<>20 ) & 0x7FF; - -} - -/*---------------------------------------------------------------------------- -| Returns the sign bit of the double-precision floating-point value `a'. -*----------------------------------------------------------------------------*/ - -INLINE flag extractFloat64Sign( float64 a ) -{ - - return a.high>>31; - -} - -/*---------------------------------------------------------------------------- -| Normalizes the subnormal double-precision floating-point value represented -| by the denormalized significand formed by the concatenation of `aSig0' and -| `aSig1'. The normalized exponent is stored at the location pointed to by -| `zExpPtr'. The most significant 21 bits of the normalized significand are -| stored at the location pointed to by `zSig0Ptr', and the least significant -| 32 bits of the normalized significand are stored at the location pointed to -| by `zSig1Ptr'. -*----------------------------------------------------------------------------*/ - -static void - normalizeFloat64Subnormal( - bits32 aSig0, - bits32 aSig1, - int16 *zExpPtr, - bits32 *zSig0Ptr, - bits32 *zSig1Ptr - ) -{ - int8 shiftCount; - - if ( aSig0 == 0 ) { - shiftCount = countLeadingZeros32( aSig1 ) - 11; - if ( shiftCount < 0 ) { - *zSig0Ptr = aSig1>>( - shiftCount ); - *zSig1Ptr = aSig1<<( shiftCount & 31 ); - } - else { - *zSig0Ptr = aSig1<>( - shiftCount ); - } - if ( aSigExtra ) float_exception_flags |= float_flag_inexact; - roundingMode = float_rounding_mode; - if ( roundingMode == float_round_nearest_even ) { - if ( (sbits32) aSigExtra < 0 ) { - ++z; - if ( (bits32) ( aSigExtra<<1 ) == 0 ) z &= ~1; - } - if ( aSign ) z = - z; - } - else { - aSigExtra = ( aSigExtra != 0 ); - if ( aSign ) { - z += ( roundingMode == float_round_down ) & aSigExtra; - z = - z; - } - else { - z += ( roundingMode == float_round_up ) & aSigExtra; - } - } - } - return z; - -} - -/*---------------------------------------------------------------------------- -| Returns the result of converting the single-precision floating-point value -| `a' to the 32-bit two's complement integer format. The conversion is -| performed according to the IEC/IEEE Standard for Binary Floating-Point -| Arithmetic, except that the conversion is always rounded toward zero. -| If `a' is a NaN, the largest positive integer is returned. Otherwise, if -| the conversion overflows, the largest integer with the same sign as `a' is -| returned. -*----------------------------------------------------------------------------*/ - -int32 float32_to_int32_round_to_zero( float32 a ) -{ - flag aSign; - int16 aExp, shiftCount; - bits32 aSig; - int32 z; - - aSig = extractFloat32Frac( a ); - aExp = extractFloat32Exp( a ); - aSign = extractFloat32Sign( a ); - shiftCount = aExp - 0x9E; - if ( 0 <= shiftCount ) { - if ( a != 0xCF000000 ) { - float_raise( float_flag_invalid ); - if ( ! aSign || ( ( aExp == 0xFF ) && aSig ) ) return 0x7FFFFFFF; - } - return (sbits32) 0x80000000; - } - else if ( aExp <= 0x7E ) { - if ( aExp | aSig ) float_exception_flags |= float_flag_inexact; - return 0; - } - aSig = ( aSig | 0x00800000 )<<8; - z = aSig>>( - shiftCount ); - if ( (bits32) ( aSig<<( shiftCount & 31 ) ) ) { - float_exception_flags |= float_flag_inexact; - } - if ( aSign ) z = - z; - return z; - -} - -/*---------------------------------------------------------------------------- -| Returns the result of converting the single-precision floating-point value -| `a' to the double-precision floating-point format. The conversion is -| performed according to the IEC/IEEE Standard for Binary Floating-Point -| Arithmetic. -*----------------------------------------------------------------------------*/ - -float64 float32_to_float64( float32 a ) -{ - flag aSign; - int16 aExp; - bits32 aSig, zSig0, zSig1; - - aSig = extractFloat32Frac( a ); - aExp = extractFloat32Exp( a ); - aSign = extractFloat32Sign( a ); - if ( aExp == 0xFF ) { - if ( aSig ) return commonNaNToFloat64( float32ToCommonNaN( a ) ); - return packFloat64( aSign, 0x7FF, 0, 0 ); - } - if ( aExp == 0 ) { - if ( aSig == 0 ) return packFloat64( aSign, 0, 0, 0 ); - normalizeFloat32Subnormal( aSig, &aExp, &aSig ); - --aExp; - } - shift64Right( aSig, 0, 3, &zSig0, &zSig1 ); - return packFloat64( aSign, aExp + 0x380, zSig0, zSig1 ); - -} - -/*---------------------------------------------------------------------------- -| Rounds the single-precision floating-point value `a' to an integer, -| and returns the result as a single-precision floating-point value. The -| operation is performed according to the IEC/IEEE Standard for Binary -| Floating-Point Arithmetic. -*----------------------------------------------------------------------------*/ - -float32 float32_round_to_int( float32 a ) -{ - flag aSign; - int16 aExp; - bits32 lastBitMask, roundBitsMask; - int8 roundingMode; - float32 z; - - aExp = extractFloat32Exp( a ); - if ( 0x96 <= aExp ) { - if ( ( aExp == 0xFF ) && extractFloat32Frac( a ) ) { - return propagateFloat32NaN( a, a ); - } - return a; - } - if ( aExp <= 0x7E ) { - if ( (bits32) ( a<<1 ) == 0 ) return a; - float_exception_flags |= float_flag_inexact; - aSign = extractFloat32Sign( a ); - switch ( float_rounding_mode ) { - case float_round_nearest_even: - if ( ( aExp == 0x7E ) && extractFloat32Frac( a ) ) { - return packFloat32( aSign, 0x7F, 0 ); - } - break; - case float_round_down: - return aSign ? 0xBF800000 : 0; - case float_round_up: - return aSign ? 0x80000000 : 0x3F800000; - } - return packFloat32( aSign, 0, 0 ); - } - lastBitMask = 1; - lastBitMask <<= 0x96 - aExp; - roundBitsMask = lastBitMask - 1; - z = a; - roundingMode = float_rounding_mode; - if ( roundingMode == float_round_nearest_even ) { - z += lastBitMask>>1; - if ( ( z & roundBitsMask ) == 0 ) z &= ~ lastBitMask; - } - else if ( roundingMode != float_round_to_zero ) { - if ( extractFloat32Sign( z ) ^ ( roundingMode == float_round_up ) ) { - z += roundBitsMask; - } - } - z &= ~ roundBitsMask; - if ( z != a ) float_exception_flags |= float_flag_inexact; - return z; - -} - -/*---------------------------------------------------------------------------- -| Returns the result of adding the absolute values of the single-precision -| floating-point values `a' and `b'. If `zSign' is 1, the sum is negated -| before being returned. `zSign' is ignored if the result is a NaN. -| The addition is performed according to the IEC/IEEE Standard for Binary -| Floating-Point Arithmetic. -*----------------------------------------------------------------------------*/ - -static float32 addFloat32Sigs( float32 a, float32 b, flag zSign ) -{ - int16 aExp, bExp, zExp; - bits32 aSig, bSig, zSig; - int16 expDiff; - - aSig = extractFloat32Frac( a ); - aExp = extractFloat32Exp( a ); - bSig = extractFloat32Frac( b ); - bExp = extractFloat32Exp( b ); - expDiff = aExp - bExp; - aSig <<= 6; - bSig <<= 6; - if ( 0 < expDiff ) { - if ( aExp == 0xFF ) { - if ( aSig ) return propagateFloat32NaN( a, b ); - return a; - } - if ( bExp == 0 ) { - --expDiff; - } - else { - bSig |= 0x20000000; - } - shift32RightJamming( bSig, expDiff, &bSig ); - zExp = aExp; - } - else if ( expDiff < 0 ) { - if ( bExp == 0xFF ) { - if ( bSig ) return propagateFloat32NaN( a, b ); - return packFloat32( zSign, 0xFF, 0 ); - } - if ( aExp == 0 ) { - ++expDiff; - } - else { - aSig |= 0x20000000; - } - shift32RightJamming( aSig, - expDiff, &aSig ); - zExp = bExp; - } - else { - if ( aExp == 0xFF ) { - if ( aSig | bSig ) return propagateFloat32NaN( a, b ); - return a; - } - if ( aExp == 0 ) return packFloat32( zSign, 0, ( aSig + bSig )>>6 ); - zSig = 0x40000000 + aSig + bSig; - zExp = aExp; - goto roundAndPack; - } - aSig |= 0x20000000; - zSig = ( aSig + bSig )<<1; - --zExp; - if ( (sbits32) zSig < 0 ) { - zSig = aSig + bSig; - ++zExp; - } - roundAndPack: - return roundAndPackFloat32( zSign, zExp, zSig ); - -} - -/*---------------------------------------------------------------------------- -| Returns the result of subtracting the absolute values of the single- -| precision floating-point values `a' and `b'. If `zSign' is 1, the -| difference is negated before being returned. `zSign' is ignored if the -| result is a NaN. The subtraction is performed according to the IEC/IEEE -| Standard for Binary Floating-Point Arithmetic. -*----------------------------------------------------------------------------*/ - -static float32 subFloat32Sigs( float32 a, float32 b, flag zSign ) -{ - int16 aExp, bExp, zExp; - bits32 aSig, bSig, zSig; - int16 expDiff; - - aSig = extractFloat32Frac( a ); - aExp = extractFloat32Exp( a ); - bSig = extractFloat32Frac( b ); - bExp = extractFloat32Exp( b ); - expDiff = aExp - bExp; - aSig <<= 7; - bSig <<= 7; - if ( 0 < expDiff ) goto aExpBigger; - if ( expDiff < 0 ) goto bExpBigger; - if ( aExp == 0xFF ) { - if ( aSig | bSig ) return propagateFloat32NaN( a, b ); - float_raise( float_flag_invalid ); - return float32_default_nan; - } - if ( aExp == 0 ) { - aExp = 1; - bExp = 1; - } - if ( bSig < aSig ) goto aBigger; - if ( aSig < bSig ) goto bBigger; - return packFloat32( float_rounding_mode == float_round_down, 0, 0 ); - bExpBigger: - if ( bExp == 0xFF ) { - if ( bSig ) return propagateFloat32NaN( a, b ); - return packFloat32( zSign ^ 1, 0xFF, 0 ); - } - if ( aExp == 0 ) { - ++expDiff; - } - else { - aSig |= 0x40000000; - } - shift32RightJamming( aSig, - expDiff, &aSig ); - bSig |= 0x40000000; - bBigger: - zSig = bSig - aSig; - zExp = bExp; - zSign ^= 1; - goto normalizeRoundAndPack; - aExpBigger: - if ( aExp == 0xFF ) { - if ( aSig ) return propagateFloat32NaN( a, b ); - return a; - } - if ( bExp == 0 ) { - --expDiff; - } - else { - bSig |= 0x40000000; - } - shift32RightJamming( bSig, expDiff, &bSig ); - aSig |= 0x40000000; - aBigger: - zSig = aSig - bSig; - zExp = aExp; - normalizeRoundAndPack: - --zExp; - return normalizeRoundAndPackFloat32( zSign, zExp, zSig ); - -} - -/*---------------------------------------------------------------------------- -| Returns the result of adding the single-precision floating-point values `a' -| and `b'. The operation is performed according to the IEC/IEEE Standard for -| Binary Floating-Point Arithmetic. -*----------------------------------------------------------------------------*/ - -float32 float32_add( float32 a, float32 b ) -{ - flag aSign, bSign; - - aSign = extractFloat32Sign( a ); - bSign = extractFloat32Sign( b ); - if ( aSign == bSign ) { - return addFloat32Sigs( a, b, aSign ); - } - else { - return subFloat32Sigs( a, b, aSign ); - } - -} - -/*---------------------------------------------------------------------------- -| Returns the result of subtracting the single-precision floating-point values -| `a' and `b'. The operation is performed according to the IEC/IEEE Standard -| for Binary Floating-Point Arithmetic. -*----------------------------------------------------------------------------*/ - -float32 float32_sub( float32 a, float32 b ) -{ - flag aSign, bSign; - - aSign = extractFloat32Sign( a ); - bSign = extractFloat32Sign( b ); - if ( aSign == bSign ) { - return subFloat32Sigs( a, b, aSign ); - } - else { - return addFloat32Sigs( a, b, aSign ); - } - -} - -/*---------------------------------------------------------------------------- -| Returns the result of multiplying the single-precision floating-point values -| `a' and `b'. The operation is performed according to the IEC/IEEE Standard -| for Binary Floating-Point Arithmetic. -*----------------------------------------------------------------------------*/ - -float32 float32_mul( float32 a, float32 b ) -{ - flag aSign, bSign, zSign; - int16 aExp, bExp, zExp; - bits32 aSig, bSig, zSig0, zSig1; - - aSig = extractFloat32Frac( a ); - aExp = extractFloat32Exp( a ); - aSign = extractFloat32Sign( a ); - bSig = extractFloat32Frac( b ); - bExp = extractFloat32Exp( b ); - bSign = extractFloat32Sign( b ); - zSign = aSign ^ bSign; - if ( aExp == 0xFF ) { - if ( aSig || ( ( bExp == 0xFF ) && bSig ) ) { - return propagateFloat32NaN( a, b ); - } - if ( ( bExp | bSig ) == 0 ) { - float_raise( float_flag_invalid ); - return float32_default_nan; - } - return packFloat32( zSign, 0xFF, 0 ); - } - if ( bExp == 0xFF ) { - if ( bSig ) return propagateFloat32NaN( a, b ); - if ( ( aExp | aSig ) == 0 ) { - float_raise( float_flag_invalid ); - return float32_default_nan; - } - return packFloat32( zSign, 0xFF, 0 ); - } - if ( aExp == 0 ) { - if ( aSig == 0 ) return packFloat32( zSign, 0, 0 ); - normalizeFloat32Subnormal( aSig, &aExp, &aSig ); - } - if ( bExp == 0 ) { - if ( bSig == 0 ) return packFloat32( zSign, 0, 0 ); - normalizeFloat32Subnormal( bSig, &bExp, &bSig ); - } - zExp = aExp + bExp - 0x7F; - aSig = ( aSig | 0x00800000 )<<7; - bSig = ( bSig | 0x00800000 )<<8; - mul32To64( aSig, bSig, &zSig0, &zSig1 ); - zSig0 |= ( zSig1 != 0 ); - if ( 0 <= (sbits32) ( zSig0<<1 ) ) { - zSig0 <<= 1; - --zExp; - } - return roundAndPackFloat32( zSign, zExp, zSig0 ); - -} - -/*---------------------------------------------------------------------------- -| Returns the result of dividing the single-precision floating-point value `a' -| by the corresponding value `b'. The operation is performed according to the -| IEC/IEEE Standard for Binary Floating-Point Arithmetic. -*----------------------------------------------------------------------------*/ - -float32 float32_div( float32 a, float32 b ) -{ - flag aSign, bSign, zSign; - int16 aExp, bExp, zExp; - bits32 aSig, bSig, zSig, rem0, rem1, term0, term1; - - aSig = extractFloat32Frac( a ); - aExp = extractFloat32Exp( a ); - aSign = extractFloat32Sign( a ); - bSig = extractFloat32Frac( b ); - bExp = extractFloat32Exp( b ); - bSign = extractFloat32Sign( b ); - zSign = aSign ^ bSign; - if ( aExp == 0xFF ) { - if ( aSig ) return propagateFloat32NaN( a, b ); - if ( bExp == 0xFF ) { - if ( bSig ) return propagateFloat32NaN( a, b ); - float_raise( float_flag_invalid ); - return float32_default_nan; - } - return packFloat32( zSign, 0xFF, 0 ); - } - if ( bExp == 0xFF ) { - if ( bSig ) return propagateFloat32NaN( a, b ); - return packFloat32( zSign, 0, 0 ); - } - if ( bExp == 0 ) { - if ( bSig == 0 ) { - if ( ( aExp | aSig ) == 0 ) { - float_raise( float_flag_invalid ); - return float32_default_nan; - } - float_raise( float_flag_divbyzero ); - return packFloat32( zSign, 0xFF, 0 ); - } - normalizeFloat32Subnormal( bSig, &bExp, &bSig ); - } - if ( aExp == 0 ) { - if ( aSig == 0 ) return packFloat32( zSign, 0, 0 ); - normalizeFloat32Subnormal( aSig, &aExp, &aSig ); - } - zExp = aExp - bExp + 0x7D; - aSig = ( aSig | 0x00800000 )<<7; - bSig = ( bSig | 0x00800000 )<<8; - if ( bSig <= ( aSig + aSig ) ) { - aSig >>= 1; - ++zExp; - } - zSig = estimateDiv64To32( aSig, 0, bSig ); - if ( ( zSig & 0x3F ) <= 2 ) { - mul32To64( bSig, zSig, &term0, &term1 ); - sub64( aSig, 0, term0, term1, &rem0, &rem1 ); - while ( (sbits32) rem0 < 0 ) { - --zSig; - add64( rem0, rem1, 0, bSig, &rem0, &rem1 ); - } - zSig |= ( rem1 != 0 ); - } - return roundAndPackFloat32( zSign, zExp, zSig ); - -} - -/*---------------------------------------------------------------------------- -| Returns the remainder of the single-precision floating-point value `a' -| with respect to the corresponding value `b'. The operation is performed -| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. -*----------------------------------------------------------------------------*/ - -float32 float32_rem( float32 a, float32 b ) -{ - flag aSign, bSign, zSign; - int16 aExp, bExp, expDiff; - bits32 aSig, bSig, q, alternateASig; - sbits32 sigMean; - - aSig = extractFloat32Frac( a ); - aExp = extractFloat32Exp( a ); - aSign = extractFloat32Sign( a ); - bSig = extractFloat32Frac( b ); - bExp = extractFloat32Exp( b ); - bSign = extractFloat32Sign( b ); - if ( aExp == 0xFF ) { - if ( aSig || ( ( bExp == 0xFF ) && bSig ) ) { - return propagateFloat32NaN( a, b ); - } - float_raise( float_flag_invalid ); - return float32_default_nan; - } - if ( bExp == 0xFF ) { - if ( bSig ) return propagateFloat32NaN( a, b ); - return a; - } - if ( bExp == 0 ) { - if ( bSig == 0 ) { - float_raise( float_flag_invalid ); - return float32_default_nan; - } - normalizeFloat32Subnormal( bSig, &bExp, &bSig ); - } - if ( aExp == 0 ) { - if ( aSig == 0 ) return a; - normalizeFloat32Subnormal( aSig, &aExp, &aSig ); - } - expDiff = aExp - bExp; - aSig = ( aSig | 0x00800000 )<<8; - bSig = ( bSig | 0x00800000 )<<8; - if ( expDiff < 0 ) { - if ( expDiff < -1 ) return a; - aSig >>= 1; - } - q = ( bSig <= aSig ); - if ( q ) aSig -= bSig; - expDiff -= 32; - while ( 0 < expDiff ) { - q = estimateDiv64To32( aSig, 0, bSig ); - q = ( 2 < q ) ? q - 2 : 0; - aSig = - ( ( bSig>>2 ) * q ); - expDiff -= 30; - } - expDiff += 32; - if ( 0 < expDiff ) { - q = estimateDiv64To32( aSig, 0, bSig ); - q = ( 2 < q ) ? q - 2 : 0; - q >>= 32 - expDiff; - bSig >>= 2; - aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q; - } - else { - aSig >>= 2; - bSig >>= 2; - } - do { - alternateASig = aSig; - ++q; - aSig -= bSig; - } while ( 0 <= (sbits32) aSig ); - sigMean = aSig + alternateASig; - if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) { - aSig = alternateASig; - } - zSign = ( (sbits32) aSig < 0 ); - if ( zSign ) aSig = - aSig; - return normalizeRoundAndPackFloat32( aSign ^ zSign, bExp, aSig ); - -} - -/*---------------------------------------------------------------------------- -| Returns the square root of the single-precision floating-point value `a'. -| The operation is performed according to the IEC/IEEE Standard for Binary -| Floating-Point Arithmetic. -*----------------------------------------------------------------------------*/ - -float32 float32_sqrt( float32 a ) -{ - flag aSign; - int16 aExp, zExp; - bits32 aSig, zSig, rem0, rem1, term0, term1; - - aSig = extractFloat32Frac( a ); - aExp = extractFloat32Exp( a ); - aSign = extractFloat32Sign( a ); - if ( aExp == 0xFF ) { - if ( aSig ) return propagateFloat32NaN( a, 0 ); - if ( ! aSign ) return a; - float_raise( float_flag_invalid ); - return float32_default_nan; - } - if ( aSign ) { - if ( ( aExp | aSig ) == 0 ) return a; - float_raise( float_flag_invalid ); - return float32_default_nan; - } - if ( aExp == 0 ) { - if ( aSig == 0 ) return 0; - normalizeFloat32Subnormal( aSig, &aExp, &aSig ); - } - zExp = ( ( aExp - 0x7F )>>1 ) + 0x7E; - aSig = ( aSig | 0x00800000 )<<8; - zSig = estimateSqrt32( aExp, aSig ) + 2; - if ( ( zSig & 0x7F ) <= 5 ) { - if ( zSig < 2 ) { - zSig = 0x7FFFFFFF; - goto roundAndPack; - } - else { - aSig >>= aExp & 1; - mul32To64( zSig, zSig, &term0, &term1 ); - sub64( aSig, 0, term0, term1, &rem0, &rem1 ); - while ( (sbits32) rem0 < 0 ) { - --zSig; - shortShift64Left( 0, zSig, 1, &term0, &term1 ); - term1 |= 1; - add64( rem0, rem1, term0, term1, &rem0, &rem1 ); - } - zSig |= ( ( rem0 | rem1 ) != 0 ); - } - } - shift32RightJamming( zSig, 1, &zSig ); - roundAndPack: - return roundAndPackFloat32( 0, zExp, zSig ); - -} - -/*---------------------------------------------------------------------------- -| Returns 1 if the single-precision floating-point value `a' is equal to -| the corresponding value `b', and 0 otherwise. The comparison is performed -| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. -*----------------------------------------------------------------------------*/ - -flag float32_eq( float32 a, float32 b ) -{ - - if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) ) - || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) ) - ) { - if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) { - float_raise( float_flag_invalid ); - } - return 0; - } - return ( a == b ) || ( (bits32) ( ( a | b )<<1 ) == 0 ); - -} - -/*---------------------------------------------------------------------------- -| Returns 1 if the single-precision floating-point value `a' is less than -| or equal to the corresponding value `b', and 0 otherwise. The comparison -| is performed according to the IEC/IEEE Standard for Binary Floating-Point -| Arithmetic. -*----------------------------------------------------------------------------*/ - -flag float32_le( float32 a, float32 b ) -{ - flag aSign, bSign; - - if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) ) - || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) ) - ) { - float_raise( float_flag_invalid ); - return 0; - } - aSign = extractFloat32Sign( a ); - bSign = extractFloat32Sign( b ); - if ( aSign != bSign ) return aSign || ( (bits32) ( ( a | b )<<1 ) == 0 ); - return ( a == b ) || ( aSign ^ ( a < b ) ); - -} - -/*---------------------------------------------------------------------------- -| Returns 1 if the single-precision floating-point value `a' is less than -| the corresponding value `b', and 0 otherwise. The comparison is performed -| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. -*----------------------------------------------------------------------------*/ - -flag float32_lt( float32 a, float32 b ) -{ - flag aSign, bSign; - - if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) ) - || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) ) - ) { - float_raise( float_flag_invalid ); - return 0; - } - aSign = extractFloat32Sign( a ); - bSign = extractFloat32Sign( b ); - if ( aSign != bSign ) return aSign && ( (bits32) ( ( a | b )<<1 ) != 0 ); - return ( a != b ) && ( aSign ^ ( a < b ) ); - -} - -/*---------------------------------------------------------------------------- -| Returns 1 if the single-precision floating-point value `a' is equal to -| the corresponding value `b', and 0 otherwise. The invalid exception is -| raised if either operand is a NaN. Otherwise, the comparison is performed -| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. -*----------------------------------------------------------------------------*/ - -flag float32_eq_signaling( float32 a, float32 b ) -{ - - if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) ) - || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) ) - ) { - float_raise( float_flag_invalid ); - return 0; - } - return ( a == b ) || ( (bits32) ( ( a | b )<<1 ) == 0 ); - -} - -/*---------------------------------------------------------------------------- -| Returns 1 if the single-precision floating-point value `a' is less than or -| equal to the corresponding value `b', and 0 otherwise. Quiet NaNs do not -| cause an exception. Otherwise, the comparison is performed according to the -| IEC/IEEE Standard for Binary Floating-Point Arithmetic. -*----------------------------------------------------------------------------*/ - -flag float32_le_quiet( float32 a, float32 b ) -{ - flag aSign, bSign; - - if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) ) - || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) ) - ) { - if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) { - float_raise( float_flag_invalid ); - } - return 0; - } - aSign = extractFloat32Sign( a ); - bSign = extractFloat32Sign( b ); - if ( aSign != bSign ) return aSign || ( (bits32) ( ( a | b )<<1 ) == 0 ); - return ( a == b ) || ( aSign ^ ( a < b ) ); - -} - -/*---------------------------------------------------------------------------- -| Returns 1 if the single-precision floating-point value `a' is less than -| the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an -| exception. Otherwise, the comparison is performed according to the IEC/IEEE -| Standard for Binary Floating-Point Arithmetic. -*----------------------------------------------------------------------------*/ - -flag float32_lt_quiet( float32 a, float32 b ) -{ - flag aSign, bSign; - - if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) ) - || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) ) - ) { - if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) { - float_raise( float_flag_invalid ); - } - return 0; - } - aSign = extractFloat32Sign( a ); - bSign = extractFloat32Sign( b ); - if ( aSign != bSign ) return aSign && ( (bits32) ( ( a | b )<<1 ) != 0 ); - return ( a != b ) && ( aSign ^ ( a < b ) ); - -} - -/*---------------------------------------------------------------------------- -| Returns the result of converting the double-precision floating-point value -| `a' to the 32-bit two's complement integer format. The conversion is -| performed according to the IEC/IEEE Standard for Binary Floating-Point -| Arithmetic---which means in particular that the conversion is rounded -| according to the current rounding mode. If `a' is a NaN, the largest -| positive integer is returned. Otherwise, if the conversion overflows, the -| largest integer with the same sign as `a' is returned. -*----------------------------------------------------------------------------*/ - -int32 float64_to_int32( float64 a ) -{ - flag aSign; - int16 aExp, shiftCount; - bits32 aSig0, aSig1, absZ, aSigExtra; - int32 z; - int8 roundingMode; - - aSig1 = extractFloat64Frac1( a ); - aSig0 = extractFloat64Frac0( a ); - aExp = extractFloat64Exp( a ); - aSign = extractFloat64Sign( a ); - shiftCount = aExp - 0x413; - if ( 0 <= shiftCount ) { - if ( 0x41E < aExp ) { - if ( ( aExp == 0x7FF ) && ( aSig0 | aSig1 ) ) aSign = 0; - goto invalid; - } - shortShift64Left( - aSig0 | 0x00100000, aSig1, shiftCount, &absZ, &aSigExtra ); - if ( 0x80000000 < absZ ) goto invalid; - } - else { - aSig1 = ( aSig1 != 0 ); - if ( aExp < 0x3FE ) { - aSigExtra = aExp | aSig0 | aSig1; - absZ = 0; - } - else { - aSig0 |= 0x00100000; - aSigExtra = ( aSig0<<( shiftCount & 31 ) ) | aSig1; - absZ = aSig0>>( - shiftCount ); - } - } - roundingMode = float_rounding_mode; - if ( roundingMode == float_round_nearest_even ) { - if ( (sbits32) aSigExtra < 0 ) { - ++absZ; - if ( (bits32) ( aSigExtra<<1 ) == 0 ) absZ &= ~1; - } - z = aSign ? - absZ : absZ; - } - else { - aSigExtra = ( aSigExtra != 0 ); - if ( aSign ) { - z = - ( absZ - + ( ( roundingMode == float_round_down ) & aSigExtra ) ); - } - else { - z = absZ + ( ( roundingMode == float_round_up ) & aSigExtra ); - } - } - if ( ( aSign ^ ( z < 0 ) ) && z ) { - invalid: - float_raise( float_flag_invalid ); - return aSign ? (sbits32) 0x80000000 : 0x7FFFFFFF; - } - if ( aSigExtra ) float_exception_flags |= float_flag_inexact; - return z; - -} - -/*---------------------------------------------------------------------------- -| Returns the result of converting the double-precision floating-point value -| `a' to the 32-bit two's complement integer format. The conversion is -| performed according to the IEC/IEEE Standard for Binary Floating-Point -| Arithmetic, except that the conversion is always rounded toward zero. -| If `a' is a NaN, the largest positive integer is returned. Otherwise, if -| the conversion overflows, the largest integer with the same sign as `a' is -| returned. -*----------------------------------------------------------------------------*/ - -int32 float64_to_int32_round_to_zero( float64 a ) -{ - flag aSign; - int16 aExp, shiftCount; - bits32 aSig0, aSig1, absZ, aSigExtra; - int32 z; - - aSig1 = extractFloat64Frac1( a ); - aSig0 = extractFloat64Frac0( a ); - aExp = extractFloat64Exp( a ); - aSign = extractFloat64Sign( a ); - shiftCount = aExp - 0x413; - if ( 0 <= shiftCount ) { - if ( 0x41E < aExp ) { - if ( ( aExp == 0x7FF ) && ( aSig0 | aSig1 ) ) aSign = 0; - goto invalid; - } - shortShift64Left( - aSig0 | 0x00100000, aSig1, shiftCount, &absZ, &aSigExtra ); - } - else { - if ( aExp < 0x3FF ) { - if ( aExp | aSig0 | aSig1 ) { - float_exception_flags |= float_flag_inexact; - } - return 0; - } - aSig0 |= 0x00100000; - aSigExtra = ( aSig0<<( shiftCount & 31 ) ) | aSig1; - absZ = aSig0>>( - shiftCount ); - } - z = aSign ? - absZ : absZ; - if ( ( aSign ^ ( z < 0 ) ) && z ) { - invalid: - float_raise( float_flag_invalid ); - return aSign ? (sbits32) 0x80000000 : 0x7FFFFFFF; - } - if ( aSigExtra ) float_exception_flags |= float_flag_inexact; - return z; - -} - -/*---------------------------------------------------------------------------- -| Returns the result of converting the double-precision floating-point value -| `a' to the single-precision floating-point format. The conversion is -| performed according to the IEC/IEEE Standard for Binary Floating-Point -| Arithmetic. -*----------------------------------------------------------------------------*/ - -float32 float64_to_float32( float64 a ) -{ - flag aSign; - int16 aExp; - bits32 aSig0, aSig1, zSig; - bits32 allZero; - - aSig1 = extractFloat64Frac1( a ); - aSig0 = extractFloat64Frac0( a ); - aExp = extractFloat64Exp( a ); - aSign = extractFloat64Sign( a ); - if ( aExp == 0x7FF ) { - if ( aSig0 | aSig1 ) { - return commonNaNToFloat32( float64ToCommonNaN( a ) ); - } - return packFloat32( aSign, 0xFF, 0 ); - } - shift64RightJamming( aSig0, aSig1, 22, &allZero, &zSig ); - if ( aExp ) zSig |= 0x40000000; - return roundAndPackFloat32( aSign, aExp - 0x381, zSig ); - -} - -/*---------------------------------------------------------------------------- -| Rounds the double-precision floating-point value `a' to an integer, -| and returns the result as a double-precision floating-point value. The -| operation is performed according to the IEC/IEEE Standard for Binary -| Floating-Point Arithmetic. -*----------------------------------------------------------------------------*/ - -float64 float64_round_to_int( float64 a ) -{ - flag aSign; - int16 aExp; - bits32 lastBitMask, roundBitsMask; - int8 roundingMode; - float64 z; - - aExp = extractFloat64Exp( a ); - if ( 0x413 <= aExp ) { - if ( 0x433 <= aExp ) { - if ( ( aExp == 0x7FF ) - && ( extractFloat64Frac0( a ) | extractFloat64Frac1( a ) ) ) { - return propagateFloat64NaN( a, a ); - } - return a; - } - lastBitMask = 1; - lastBitMask = ( lastBitMask<<( 0x432 - aExp ) )<<1; - roundBitsMask = lastBitMask - 1; - z = a; - roundingMode = float_rounding_mode; - if ( roundingMode == float_round_nearest_even ) { - if ( lastBitMask ) { - add64( z.high, z.low, 0, lastBitMask>>1, &z.high, &z.low ); - if ( ( z.low & roundBitsMask ) == 0 ) z.low &= ~ lastBitMask; - } - else { - if ( (sbits32) z.low < 0 ) { - ++z.high; - if ( (bits32) ( z.low<<1 ) == 0 ) z.high &= ~1; - } - } - } - else if ( roundingMode != float_round_to_zero ) { - if ( extractFloat64Sign( z ) - ^ ( roundingMode == float_round_up ) ) { - add64( z.high, z.low, 0, roundBitsMask, &z.high, &z.low ); - } - } - z.low &= ~ roundBitsMask; - } - else { - if ( aExp <= 0x3FE ) { - if ( ( ( (bits32) ( a.high<<1 ) ) | a.low ) == 0 ) return a; - float_exception_flags |= float_flag_inexact; - aSign = extractFloat64Sign( a ); - switch ( float_rounding_mode ) { - case float_round_nearest_even: - if ( ( aExp == 0x3FE ) - && ( extractFloat64Frac0( a ) | extractFloat64Frac1( a ) ) - ) { - return packFloat64( aSign, 0x3FF, 0, 0 ); - } - break; - case float_round_down: - return - aSign ? packFloat64( 1, 0x3FF, 0, 0 ) - : packFloat64( 0, 0, 0, 0 ); - case float_round_up: - return - aSign ? packFloat64( 1, 0, 0, 0 ) - : packFloat64( 0, 0x3FF, 0, 0 ); - } - return packFloat64( aSign, 0, 0, 0 ); - } - lastBitMask = 1; - lastBitMask <<= 0x413 - aExp; - roundBitsMask = lastBitMask - 1; - z.low = 0; - z.high = a.high; - roundingMode = float_rounding_mode; - if ( roundingMode == float_round_nearest_even ) { - z.high += lastBitMask>>1; - if ( ( ( z.high & roundBitsMask ) | a.low ) == 0 ) { - z.high &= ~ lastBitMask; - } - } - else if ( roundingMode != float_round_to_zero ) { - if ( extractFloat64Sign( z ) - ^ ( roundingMode == float_round_up ) ) { - z.high |= ( a.low != 0 ); - z.high += roundBitsMask; - } - } - z.high &= ~ roundBitsMask; - } - if ( ( z.low != a.low ) || ( z.high != a.high ) ) { - float_exception_flags |= float_flag_inexact; - } - return z; - -} - -/*---------------------------------------------------------------------------- -| Returns the result of adding the absolute values of the double-precision -| floating-point values `a' and `b'. If `zSign' is 1, the sum is negated -| before being returned. `zSign' is ignored if the result is a NaN. -| The addition is performed according to the IEC/IEEE Standard for Binary -| Floating-Point Arithmetic. -*----------------------------------------------------------------------------*/ - -static float64 addFloat64Sigs( float64 a, float64 b, flag zSign ) -{ - int16 aExp, bExp, zExp; - bits32 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2; - int16 expDiff; - - aSig1 = extractFloat64Frac1( a ); - aSig0 = extractFloat64Frac0( a ); - aExp = extractFloat64Exp( a ); - bSig1 = extractFloat64Frac1( b ); - bSig0 = extractFloat64Frac0( b ); - bExp = extractFloat64Exp( b ); - expDiff = aExp - bExp; - if ( 0 < expDiff ) { - if ( aExp == 0x7FF ) { - if ( aSig0 | aSig1 ) return propagateFloat64NaN( a, b ); - return a; - } - if ( bExp == 0 ) { - --expDiff; - } - else { - bSig0 |= 0x00100000; - } - shift64ExtraRightJamming( - bSig0, bSig1, 0, expDiff, &bSig0, &bSig1, &zSig2 ); - zExp = aExp; - } - else if ( expDiff < 0 ) { - if ( bExp == 0x7FF ) { - if ( bSig0 | bSig1 ) return propagateFloat64NaN( a, b ); - return packFloat64( zSign, 0x7FF, 0, 0 ); - } - if ( aExp == 0 ) { - ++expDiff; - } - else { - aSig0 |= 0x00100000; - } - shift64ExtraRightJamming( - aSig0, aSig1, 0, - expDiff, &aSig0, &aSig1, &zSig2 ); - zExp = bExp; - } - else { - if ( aExp == 0x7FF ) { - if ( aSig0 | aSig1 | bSig0 | bSig1 ) { - return propagateFloat64NaN( a, b ); - } - return a; - } - add64( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 ); - if ( aExp == 0 ) return packFloat64( zSign, 0, zSig0, zSig1 ); - zSig2 = 0; - zSig0 |= 0x00200000; - zExp = aExp; - goto shiftRight1; - } - aSig0 |= 0x00100000; - add64( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 ); - --zExp; - if ( zSig0 < 0x00200000 ) goto roundAndPack; - ++zExp; - shiftRight1: - shift64ExtraRightJamming( zSig0, zSig1, zSig2, 1, &zSig0, &zSig1, &zSig2 ); - roundAndPack: - return roundAndPackFloat64( zSign, zExp, zSig0, zSig1, zSig2 ); - -} - -/*---------------------------------------------------------------------------- -| Returns the result of subtracting the absolute values of the double- -| precision floating-point values `a' and `b'. If `zSign' is 1, the -| difference is negated before being returned. `zSign' is ignored if the -| result is a NaN. The subtraction is performed according to the IEC/IEEE -| Standard for Binary Floating-Point Arithmetic. -*----------------------------------------------------------------------------*/ - -static float64 subFloat64Sigs( float64 a, float64 b, flag zSign ) -{ - int16 aExp, bExp, zExp; - bits32 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1; - int16 expDiff; - float64 z; - - aSig1 = extractFloat64Frac1( a ); - aSig0 = extractFloat64Frac0( a ); - aExp = extractFloat64Exp( a ); - bSig1 = extractFloat64Frac1( b ); - bSig0 = extractFloat64Frac0( b ); - bExp = extractFloat64Exp( b ); - expDiff = aExp - bExp; - shortShift64Left( aSig0, aSig1, 10, &aSig0, &aSig1 ); - shortShift64Left( bSig0, bSig1, 10, &bSig0, &bSig1 ); - if ( 0 < expDiff ) goto aExpBigger; - if ( expDiff < 0 ) goto bExpBigger; - if ( aExp == 0x7FF ) { - if ( aSig0 | aSig1 | bSig0 | bSig1 ) { - return propagateFloat64NaN( a, b ); - } - float_raise( float_flag_invalid ); - z.low = float64_default_nan_low; - z.high = float64_default_nan_high; - return z; - } - if ( aExp == 0 ) { - aExp = 1; - bExp = 1; - } - if ( bSig0 < aSig0 ) goto aBigger; - if ( aSig0 < bSig0 ) goto bBigger; - if ( bSig1 < aSig1 ) goto aBigger; - if ( aSig1 < bSig1 ) goto bBigger; - return packFloat64( float_rounding_mode == float_round_down, 0, 0, 0 ); - bExpBigger: - if ( bExp == 0x7FF ) { - if ( bSig0 | bSig1 ) return propagateFloat64NaN( a, b ); - return packFloat64( zSign ^ 1, 0x7FF, 0, 0 ); - } - if ( aExp == 0 ) { - ++expDiff; - } - else { - aSig0 |= 0x40000000; - } - shift64RightJamming( aSig0, aSig1, - expDiff, &aSig0, &aSig1 ); - bSig0 |= 0x40000000; - bBigger: - sub64( bSig0, bSig1, aSig0, aSig1, &zSig0, &zSig1 ); - zExp = bExp; - zSign ^= 1; - goto normalizeRoundAndPack; - aExpBigger: - if ( aExp == 0x7FF ) { - if ( aSig0 | aSig1 ) return propagateFloat64NaN( a, b ); - return a; - } - if ( bExp == 0 ) { - --expDiff; - } - else { - bSig0 |= 0x40000000; - } - shift64RightJamming( bSig0, bSig1, expDiff, &bSig0, &bSig1 ); - aSig0 |= 0x40000000; - aBigger: - sub64( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 ); - zExp = aExp; - normalizeRoundAndPack: - --zExp; - return normalizeRoundAndPackFloat64( zSign, zExp - 10, zSig0, zSig1 ); - -} - -/*---------------------------------------------------------------------------- -| Returns the result of adding the double-precision floating-point values `a' -| and `b'. The operation is performed according to the IEC/IEEE Standard for -| Binary Floating-Point Arithmetic. -*----------------------------------------------------------------------------*/ - -float64 float64_add( float64 a, float64 b ) -{ - flag aSign, bSign; - - aSign = extractFloat64Sign( a ); - bSign = extractFloat64Sign( b ); - if ( aSign == bSign ) { - return addFloat64Sigs( a, b, aSign ); - } - else { - return subFloat64Sigs( a, b, aSign ); - } - -} - -/*---------------------------------------------------------------------------- -| Returns the result of subtracting the double-precision floating-point values -| `a' and `b'. The operation is performed according to the IEC/IEEE Standard -| for Binary Floating-Point Arithmetic. -*----------------------------------------------------------------------------*/ - -float64 float64_sub( float64 a, float64 b ) -{ - flag aSign, bSign; - - aSign = extractFloat64Sign( a ); - bSign = extractFloat64Sign( b ); - if ( aSign == bSign ) { - return subFloat64Sigs( a, b, aSign ); - } - else { - return addFloat64Sigs( a, b, aSign ); - } - -} - -/*---------------------------------------------------------------------------- -| Returns the result of multiplying the double-precision floating-point values -| `a' and `b'. The operation is performed according to the IEC/IEEE Standard -| for Binary Floating-Point Arithmetic. -*----------------------------------------------------------------------------*/ - -float64 float64_mul( float64 a, float64 b ) -{ - flag aSign, bSign, zSign; - int16 aExp, bExp, zExp; - bits32 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2, zSig3; - float64 z; - - aSig1 = extractFloat64Frac1( a ); - aSig0 = extractFloat64Frac0( a ); - aExp = extractFloat64Exp( a ); - aSign = extractFloat64Sign( a ); - bSig1 = extractFloat64Frac1( b ); - bSig0 = extractFloat64Frac0( b ); - bExp = extractFloat64Exp( b ); - bSign = extractFloat64Sign( b ); - zSign = aSign ^ bSign; - if ( aExp == 0x7FF ) { - if ( ( aSig0 | aSig1 ) - || ( ( bExp == 0x7FF ) && ( bSig0 | bSig1 ) ) ) { - return propagateFloat64NaN( a, b ); - } - if ( ( bExp | bSig0 | bSig1 ) == 0 ) goto invalid; - return packFloat64( zSign, 0x7FF, 0, 0 ); - } - if ( bExp == 0x7FF ) { - if ( bSig0 | bSig1 ) return propagateFloat64NaN( a, b ); - if ( ( aExp | aSig0 | aSig1 ) == 0 ) { - invalid: - float_raise( float_flag_invalid ); - z.low = float64_default_nan_low; - z.high = float64_default_nan_high; - return z; - } - return packFloat64( zSign, 0x7FF, 0, 0 ); - } - if ( aExp == 0 ) { - if ( ( aSig0 | aSig1 ) == 0 ) return packFloat64( zSign, 0, 0, 0 ); - normalizeFloat64Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 ); - } - if ( bExp == 0 ) { - if ( ( bSig0 | bSig1 ) == 0 ) return packFloat64( zSign, 0, 0, 0 ); - normalizeFloat64Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 ); - } - zExp = aExp + bExp - 0x400; - aSig0 |= 0x00100000; - shortShift64Left( bSig0, bSig1, 12, &bSig0, &bSig1 ); - mul64To128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1, &zSig2, &zSig3 ); - add64( zSig0, zSig1, aSig0, aSig1, &zSig0, &zSig1 ); - zSig2 |= ( zSig3 != 0 ); - if ( 0x00200000 <= zSig0 ) { - shift64ExtraRightJamming( - zSig0, zSig1, zSig2, 1, &zSig0, &zSig1, &zSig2 ); - ++zExp; - } - return roundAndPackFloat64( zSign, zExp, zSig0, zSig1, zSig2 ); - -} - -/*---------------------------------------------------------------------------- -| Returns the result of dividing the double-precision floating-point value `a' -| by the corresponding value `b'. The operation is performed according to the -| IEC/IEEE Standard for Binary Floating-Point Arithmetic. -*----------------------------------------------------------------------------*/ - -float64 float64_div( float64 a, float64 b ) -{ - flag aSign, bSign, zSign; - int16 aExp, bExp, zExp; - bits32 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2; - bits32 rem0, rem1, rem2, rem3, term0, term1, term2, term3; - float64 z; - - aSig1 = extractFloat64Frac1( a ); - aSig0 = extractFloat64Frac0( a ); - aExp = extractFloat64Exp( a ); - aSign = extractFloat64Sign( a ); - bSig1 = extractFloat64Frac1( b ); - bSig0 = extractFloat64Frac0( b ); - bExp = extractFloat64Exp( b ); - bSign = extractFloat64Sign( b ); - zSign = aSign ^ bSign; - if ( aExp == 0x7FF ) { - if ( aSig0 | aSig1 ) return propagateFloat64NaN( a, b ); - if ( bExp == 0x7FF ) { - if ( bSig0 | bSig1 ) return propagateFloat64NaN( a, b ); - goto invalid; - } - return packFloat64( zSign, 0x7FF, 0, 0 ); - } - if ( bExp == 0x7FF ) { - if ( bSig0 | bSig1 ) return propagateFloat64NaN( a, b ); - return packFloat64( zSign, 0, 0, 0 ); - } - if ( bExp == 0 ) { - if ( ( bSig0 | bSig1 ) == 0 ) { - if ( ( aExp | aSig0 | aSig1 ) == 0 ) { - invalid: - float_raise( float_flag_invalid ); - z.low = float64_default_nan_low; - z.high = float64_default_nan_high; - return z; - } - float_raise( float_flag_divbyzero ); - return packFloat64( zSign, 0x7FF, 0, 0 ); - } - normalizeFloat64Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 ); - } - if ( aExp == 0 ) { - if ( ( aSig0 | aSig1 ) == 0 ) return packFloat64( zSign, 0, 0, 0 ); - normalizeFloat64Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 ); - } - zExp = aExp - bExp + 0x3FD; - shortShift64Left( aSig0 | 0x00100000, aSig1, 11, &aSig0, &aSig1 ); - shortShift64Left( bSig0 | 0x00100000, bSig1, 11, &bSig0, &bSig1 ); - if ( le64( bSig0, bSig1, aSig0, aSig1 ) ) { - shift64Right( aSig0, aSig1, 1, &aSig0, &aSig1 ); - ++zExp; - } - zSig0 = estimateDiv64To32( aSig0, aSig1, bSig0 ); - mul64By32To96( bSig0, bSig1, zSig0, &term0, &term1, &term2 ); - sub96( aSig0, aSig1, 0, term0, term1, term2, &rem0, &rem1, &rem2 ); - while ( (sbits32) rem0 < 0 ) { - --zSig0; - add96( rem0, rem1, rem2, 0, bSig0, bSig1, &rem0, &rem1, &rem2 ); - } - zSig1 = estimateDiv64To32( rem1, rem2, bSig0 ); - if ( ( zSig1 & 0x3FF ) <= 4 ) { - mul64By32To96( bSig0, bSig1, zSig1, &term1, &term2, &term3 ); - sub96( rem1, rem2, 0, term1, term2, term3, &rem1, &rem2, &rem3 ); - while ( (sbits32) rem1 < 0 ) { - --zSig1; - add96( rem1, rem2, rem3, 0, bSig0, bSig1, &rem1, &rem2, &rem3 ); - } - zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 ); - } - shift64ExtraRightJamming( zSig0, zSig1, 0, 11, &zSig0, &zSig1, &zSig2 ); - return roundAndPackFloat64( zSign, zExp, zSig0, zSig1, zSig2 ); - -} - -/*---------------------------------------------------------------------------- -| Returns the remainder of the double-precision floating-point value `a' -| with respect to the corresponding value `b'. The operation is performed -| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. -*----------------------------------------------------------------------------*/ - -float64 float64_rem( float64 a, float64 b ) -{ - flag aSign, bSign, zSign; - int16 aExp, bExp, expDiff; - bits32 aSig0, aSig1, bSig0, bSig1, q, term0, term1, term2; - bits32 allZero, alternateASig0, alternateASig1, sigMean1; - sbits32 sigMean0; - float64 z; - - aSig1 = extractFloat64Frac1( a ); - aSig0 = extractFloat64Frac0( a ); - aExp = extractFloat64Exp( a ); - aSign = extractFloat64Sign( a ); - bSig1 = extractFloat64Frac1( b ); - bSig0 = extractFloat64Frac0( b ); - bExp = extractFloat64Exp( b ); - bSign = extractFloat64Sign( b ); - if ( aExp == 0x7FF ) { - if ( ( aSig0 | aSig1 ) - || ( ( bExp == 0x7FF ) && ( bSig0 | bSig1 ) ) ) { - return propagateFloat64NaN( a, b ); - } - goto invalid; - } - if ( bExp == 0x7FF ) { - if ( bSig0 | bSig1 ) return propagateFloat64NaN( a, b ); - return a; - } - if ( bExp == 0 ) { - if ( ( bSig0 | bSig1 ) == 0 ) { - invalid: - float_raise( float_flag_invalid ); - z.low = float64_default_nan_low; - z.high = float64_default_nan_high; - return z; - } - normalizeFloat64Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 ); - } - if ( aExp == 0 ) { - if ( ( aSig0 | aSig1 ) == 0 ) return a; - normalizeFloat64Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 ); - } - expDiff = aExp - bExp; - if ( expDiff < -1 ) return a; - shortShift64Left( - aSig0 | 0x00100000, aSig1, 11 - ( expDiff < 0 ), &aSig0, &aSig1 ); - shortShift64Left( bSig0 | 0x00100000, bSig1, 11, &bSig0, &bSig1 ); - q = le64( bSig0, bSig1, aSig0, aSig1 ); - if ( q ) sub64( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 ); - expDiff -= 32; - while ( 0 < expDiff ) { - q = estimateDiv64To32( aSig0, aSig1, bSig0 ); - q = ( 4 < q ) ? q - 4 : 0; - mul64By32To96( bSig0, bSig1, q, &term0, &term1, &term2 ); - shortShift96Left( term0, term1, term2, 29, &term1, &term2, &allZero ); - shortShift64Left( aSig0, aSig1, 29, &aSig0, &allZero ); - sub64( aSig0, 0, term1, term2, &aSig0, &aSig1 ); - expDiff -= 29; - } - if ( -32 < expDiff ) { - q = estimateDiv64To32( aSig0, aSig1, bSig0 ); - q = ( 4 < q ) ? q - 4 : 0; - q >>= - expDiff; - shift64Right( bSig0, bSig1, 8, &bSig0, &bSig1 ); - expDiff += 24; - if ( expDiff < 0 ) { - shift64Right( aSig0, aSig1, - expDiff, &aSig0, &aSig1 ); - } - else { - shortShift64Left( aSig0, aSig1, expDiff, &aSig0, &aSig1 ); - } - mul64By32To96( bSig0, bSig1, q, &term0, &term1, &term2 ); - sub64( aSig0, aSig1, term1, term2, &aSig0, &aSig1 ); - } - else { - shift64Right( aSig0, aSig1, 8, &aSig0, &aSig1 ); - shift64Right( bSig0, bSig1, 8, &bSig0, &bSig1 ); - } - do { - alternateASig0 = aSig0; - alternateASig1 = aSig1; - ++q; - sub64( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 ); - } while ( 0 <= (sbits32) aSig0 ); - add64( - aSig0, aSig1, alternateASig0, alternateASig1, (bits32 *)&sigMean0, &sigMean1 ); - if ( ( sigMean0 < 0 ) - || ( ( ( sigMean0 | sigMean1 ) == 0 ) && ( q & 1 ) ) ) { - aSig0 = alternateASig0; - aSig1 = alternateASig1; - } - zSign = ( (sbits32) aSig0 < 0 ); - if ( zSign ) sub64( 0, 0, aSig0, aSig1, &aSig0, &aSig1 ); - return - normalizeRoundAndPackFloat64( aSign ^ zSign, bExp - 4, aSig0, aSig1 ); - -} - -/*---------------------------------------------------------------------------- -| Returns the square root of the double-precision floating-point value `a'. -| The operation is performed according to the IEC/IEEE Standard for Binary -| Floating-Point Arithmetic. -*----------------------------------------------------------------------------*/ - -float64 float64_sqrt( float64 a ) -{ - flag aSign; - int16 aExp, zExp; - bits32 aSig0, aSig1, zSig0, zSig1, zSig2, doubleZSig0; - bits32 rem0, rem1, rem2, rem3, term0, term1, term2, term3; - float64 z; - - aSig1 = extractFloat64Frac1( a ); - aSig0 = extractFloat64Frac0( a ); - aExp = extractFloat64Exp( a ); - aSign = extractFloat64Sign( a ); - if ( aExp == 0x7FF ) { - if ( aSig0 | aSig1 ) return propagateFloat64NaN( a, a ); - if ( ! aSign ) return a; - goto invalid; - } - if ( aSign ) { - if ( ( aExp | aSig0 | aSig1 ) == 0 ) return a; - invalid: - float_raise( float_flag_invalid ); - z.low = float64_default_nan_low; - z.high = float64_default_nan_high; - return z; - } - if ( aExp == 0 ) { - if ( ( aSig0 | aSig1 ) == 0 ) return packFloat64( 0, 0, 0, 0 ); - normalizeFloat64Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 ); - } - zExp = ( ( aExp - 0x3FF )>>1 ) + 0x3FE; - aSig0 |= 0x00100000; - shortShift64Left( aSig0, aSig1, 11, &term0, &term1 ); - zSig0 = ( estimateSqrt32( aExp, term0 )>>1 ) + 1; - if ( zSig0 == 0 ) zSig0 = 0x7FFFFFFF; - doubleZSig0 = zSig0 + zSig0; - shortShift64Left( aSig0, aSig1, 9 - ( aExp & 1 ), &aSig0, &aSig1 ); - mul32To64( zSig0, zSig0, &term0, &term1 ); - sub64( aSig0, aSig1, term0, term1, &rem0, &rem1 ); - while ( (sbits32) rem0 < 0 ) { - --zSig0; - doubleZSig0 -= 2; - add64( rem0, rem1, 0, doubleZSig0 | 1, &rem0, &rem1 ); - } - zSig1 = estimateDiv64To32( rem1, 0, doubleZSig0 ); - if ( ( zSig1 & 0x1FF ) <= 5 ) { - if ( zSig1 == 0 ) zSig1 = 1; - mul32To64( doubleZSig0, zSig1, &term1, &term2 ); - sub64( rem1, 0, term1, term2, &rem1, &rem2 ); - mul32To64( zSig1, zSig1, &term2, &term3 ); - sub96( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 ); - while ( (sbits32) rem1 < 0 ) { - --zSig1; - shortShift64Left( 0, zSig1, 1, &term2, &term3 ); - term3 |= 1; - term2 |= doubleZSig0; - add96( rem1, rem2, rem3, 0, term2, term3, &rem1, &rem2, &rem3 ); - } - zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 ); - } - shift64ExtraRightJamming( zSig0, zSig1, 0, 10, &zSig0, &zSig1, &zSig2 ); - return roundAndPackFloat64( 0, zExp, zSig0, zSig1, zSig2 ); - -} - -/*---------------------------------------------------------------------------- -| Returns 1 if the double-precision floating-point value `a' is equal to -| the corresponding value `b', and 0 otherwise. The comparison is performed -| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. -*----------------------------------------------------------------------------*/ - -flag float64_eq( float64 a, float64 b ) -{ - - if ( ( ( extractFloat64Exp( a ) == 0x7FF ) - && ( extractFloat64Frac0( a ) | extractFloat64Frac1( a ) ) ) - || ( ( extractFloat64Exp( b ) == 0x7FF ) - && ( extractFloat64Frac0( b ) | extractFloat64Frac1( b ) ) ) - ) { - if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) { - float_raise( float_flag_invalid ); - } - return 0; - } - return - ( a.low == b.low ) - && ( ( a.high == b.high ) - || ( ( a.low == 0 ) - && ( (bits32) ( ( a.high | b.high )<<1 ) == 0 ) ) - ); - -} - -/*---------------------------------------------------------------------------- -| Returns 1 if the double-precision floating-point value `a' is less than -| or equal to the corresponding value `b', and 0 otherwise. The comparison -| is performed according to the IEC/IEEE Standard for Binary Floating-Point -| Arithmetic. -*----------------------------------------------------------------------------*/ - -flag float64_le( float64 a, float64 b ) -{ - flag aSign, bSign; - - if ( ( ( extractFloat64Exp( a ) == 0x7FF ) - && ( extractFloat64Frac0( a ) | extractFloat64Frac1( a ) ) ) - || ( ( extractFloat64Exp( b ) == 0x7FF ) - && ( extractFloat64Frac0( b ) | extractFloat64Frac1( b ) ) ) - ) { - float_raise( float_flag_invalid ); - return 0; - } - aSign = extractFloat64Sign( a ); - bSign = extractFloat64Sign( b ); - if ( aSign != bSign ) { - return - aSign - || ( ( ( (bits32) ( ( a.high | b.high )<<1 ) ) | a.low | b.low ) - == 0 ); - } - return - aSign ? le64( b.high, b.low, a.high, a.low ) - : le64( a.high, a.low, b.high, b.low ); - -} - -/*---------------------------------------------------------------------------- -| Returns 1 if the double-precision floating-point value `a' is less than -| the corresponding value `b', and 0 otherwise. The comparison is performed -| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. -*----------------------------------------------------------------------------*/ - -flag float64_lt( float64 a, float64 b ) -{ - flag aSign, bSign; - - if ( ( ( extractFloat64Exp( a ) == 0x7FF ) - && ( extractFloat64Frac0( a ) | extractFloat64Frac1( a ) ) ) - || ( ( extractFloat64Exp( b ) == 0x7FF ) - && ( extractFloat64Frac0( b ) | extractFloat64Frac1( b ) ) ) - ) { - float_raise( float_flag_invalid ); - return 0; - } - aSign = extractFloat64Sign( a ); - bSign = extractFloat64Sign( b ); - if ( aSign != bSign ) { - return - aSign - && ( ( ( (bits32) ( ( a.high | b.high )<<1 ) ) | a.low | b.low ) - != 0 ); - } - return - aSign ? lt64( b.high, b.low, a.high, a.low ) - : lt64( a.high, a.low, b.high, b.low ); - -} - -/*---------------------------------------------------------------------------- -| Returns 1 if the double-precision floating-point value `a' is equal to -| the corresponding value `b', and 0 otherwise. The invalid exception is -| raised if either operand is a NaN. Otherwise, the comparison is performed -| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. -*----------------------------------------------------------------------------*/ - -flag float64_eq_signaling( float64 a, float64 b ) -{ - - if ( ( ( extractFloat64Exp( a ) == 0x7FF ) - && ( extractFloat64Frac0( a ) | extractFloat64Frac1( a ) ) ) - || ( ( extractFloat64Exp( b ) == 0x7FF ) - && ( extractFloat64Frac0( b ) | extractFloat64Frac1( b ) ) ) - ) { - float_raise( float_flag_invalid ); - return 0; - } - return - ( a.low == b.low ) - && ( ( a.high == b.high ) - || ( ( a.low == 0 ) - && ( (bits32) ( ( a.high | b.high )<<1 ) == 0 ) ) - ); - -} - -/*---------------------------------------------------------------------------- -| Returns 1 if the double-precision floating-point value `a' is less than or -| equal to the corresponding value `b', and 0 otherwise. Quiet NaNs do not -| cause an exception. Otherwise, the comparison is performed according to the -| IEC/IEEE Standard for Binary Floating-Point Arithmetic. -*----------------------------------------------------------------------------*/ - -flag float64_le_quiet( float64 a, float64 b ) -{ - flag aSign, bSign; - - if ( ( ( extractFloat64Exp( a ) == 0x7FF ) - && ( extractFloat64Frac0( a ) | extractFloat64Frac1( a ) ) ) - || ( ( extractFloat64Exp( b ) == 0x7FF ) - && ( extractFloat64Frac0( b ) | extractFloat64Frac1( b ) ) ) - ) { - if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) { - float_raise( float_flag_invalid ); - } - return 0; - } - aSign = extractFloat64Sign( a ); - bSign = extractFloat64Sign( b ); - if ( aSign != bSign ) { - return - aSign - || ( ( ( (bits32) ( ( a.high | b.high )<<1 ) ) | a.low | b.low ) - == 0 ); - } - return - aSign ? le64( b.high, b.low, a.high, a.low ) - : le64( a.high, a.low, b.high, b.low ); - -} - -/*---------------------------------------------------------------------------- -| Returns 1 if the double-precision floating-point value `a' is less than -| the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an -| exception. Otherwise, the comparison is performed according to the IEC/IEEE -| Standard for Binary Floating-Point Arithmetic. -*----------------------------------------------------------------------------*/ - -flag float64_lt_quiet( float64 a, float64 b ) -{ - flag aSign, bSign; - - if ( ( ( extractFloat64Exp( a ) == 0x7FF ) - && ( extractFloat64Frac0( a ) | extractFloat64Frac1( a ) ) ) - || ( ( extractFloat64Exp( b ) == 0x7FF ) - && ( extractFloat64Frac0( b ) | extractFloat64Frac1( b ) ) ) - ) { - if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) { - float_raise( float_flag_invalid ); - } - return 0; - } - aSign = extractFloat64Sign( a ); - bSign = extractFloat64Sign( b ); - if ( aSign != bSign ) { - return - aSign - && ( ( ( (bits32) ( ( a.high | b.high )<<1 ) ) | a.low | b.low ) - != 0 ); - } - return - aSign ? lt64( b.high, b.low, a.high, a.low ) - : lt64( a.high, a.low, b.high, b.low ); - -} - diff --git a/software/libbase/softfloat.h b/software/libbase/softfloat.h deleted file mode 100644 index 0dad06b4..00000000 --- a/software/libbase/softfloat.h +++ /dev/null @@ -1,136 +0,0 @@ - -/*============================================================================ - -This C header file is part of the SoftFloat IEC/IEEE Floating-point Arithmetic -Package, Release 2b. - -Written by John R. Hauser. This work was made possible in part by the -International Computer Science Institute, located at Suite 600, 1947 Center -Street, Berkeley, California 94704. Funding was partially provided by the -National Science Foundation under grant MIP-9311980. The original version -of this code was written as part of a project to build a fixed-point vector -processor in collaboration with the University of California at Berkeley, -overseen by Profs. Nelson Morgan and John Wawrzynek. More information -is available through the Web page `http://www.cs.berkeley.edu/~jhauser/ -arithmetic/SoftFloat.html'. - -THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort has -been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT TIMES -RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO PERSONS -AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ALL LOSSES, -COSTS, OR OTHER PROBLEMS THEY INCUR DUE TO THE SOFTWARE, AND WHO FURTHERMORE -EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER SCIENCE -INSTITUTE (possibly via similar legal warning) AGAINST ALL LOSSES, COSTS, OR -OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE SOFTWARE. - -Derivative works are acceptable, even for commercial purposes, so long as -(1) the source code for the derivative work includes prominent notice that -the work is derivative, and (2) the source code includes prominent notice with -these four paragraphs for those parts of this code that are retained. - -=============================================================================*/ - -/*---------------------------------------------------------------------------- -| Software IEC/IEEE floating-point types. -*----------------------------------------------------------------------------*/ -typedef bits32 float32; -typedef struct { - bits32 high, low; -} float64; - -/*---------------------------------------------------------------------------- -| Software IEC/IEEE floating-point underflow tininess-detection mode. -*----------------------------------------------------------------------------*/ -extern int8 float_detect_tininess; -enum { - float_tininess_after_rounding = 0, - float_tininess_before_rounding = 1 -}; - -/*---------------------------------------------------------------------------- -| Software IEC/IEEE floating-point rounding mode. -*----------------------------------------------------------------------------*/ -extern int8 float_rounding_mode; -enum { - float_round_nearest_even = 0, - float_round_to_zero = 1, - float_round_down = 2, - float_round_up = 3 -}; - -/*---------------------------------------------------------------------------- -| Software IEC/IEEE floating-point exception flags. -*----------------------------------------------------------------------------*/ -extern int8 float_exception_flags; -enum { - float_flag_inexact = 1, - float_flag_underflow = 2, - float_flag_overflow = 4, - float_flag_divbyzero = 8, - float_flag_invalid = 16 -}; - -/*---------------------------------------------------------------------------- -| Routine to raise any or all of the software IEC/IEEE floating-point -| exception flags. -*----------------------------------------------------------------------------*/ -void float_raise( int8 ); - -/*---------------------------------------------------------------------------- -| Software IEC/IEEE integer-to-floating-point conversion routines. -*----------------------------------------------------------------------------*/ -float32 int32_to_float32( int32 ); -float64 int32_to_float64( int32 ); - -/*---------------------------------------------------------------------------- -| Software IEC/IEEE single-precision conversion routines. -*----------------------------------------------------------------------------*/ -int32 float32_to_int32( float32 ); -int32 float32_to_int32_round_to_zero( float32 ); -float64 float32_to_float64( float32 ); - -/*---------------------------------------------------------------------------- -| Software IEC/IEEE single-precision operations. -*----------------------------------------------------------------------------*/ -float32 float32_round_to_int( float32 ); -float32 float32_add( float32, float32 ); -float32 float32_sub( float32, float32 ); -float32 float32_mul( float32, float32 ); -float32 float32_div( float32, float32 ); -float32 float32_rem( float32, float32 ); -float32 float32_sqrt( float32 ); -flag float32_eq( float32, float32 ); -flag float32_le( float32, float32 ); -flag float32_lt( float32, float32 ); -flag float32_eq_signaling( float32, float32 ); -flag float32_le_quiet( float32, float32 ); -flag float32_lt_quiet( float32, float32 ); -flag float32_is_nan( float32 ); -flag float32_is_signaling_nan( float32 ); - -/*---------------------------------------------------------------------------- -| Software IEC/IEEE double-precision conversion routines. -*----------------------------------------------------------------------------*/ -int32 float64_to_int32( float64 ); -int32 float64_to_int32_round_to_zero( float64 ); -float32 float64_to_float32( float64 ); - -/*---------------------------------------------------------------------------- -| Software IEC/IEEE double-precision operations. -*----------------------------------------------------------------------------*/ -float64 float64_round_to_int( float64 ); -float64 float64_add( float64, float64 ); -float64 float64_sub( float64, float64 ); -float64 float64_mul( float64, float64 ); -float64 float64_div( float64, float64 ); -float64 float64_rem( float64, float64 ); -float64 float64_sqrt( float64 ); -flag float64_eq( float64, float64 ); -flag float64_le( float64, float64 ); -flag float64_lt( float64, float64 ); -flag float64_eq_signaling( float64, float64 ); -flag float64_le_quiet( float64, float64 ); -flag float64_lt_quiet( float64, float64 ); -flag float64_is_nan( float64 ); -flag float64_is_signaling_nan( float64 ); -