2 * Copyright (c) 2012-2013 ARM Limited
5 * The license below extends only to copyright in the software and shall
6 * not be construed as granting a license to any other intellectual
7 * property including but not limited to intellectual property relating
8 * to a hardware implementation of the functionality of the software
9 * licensed hereunder. You may use the software subject to the license
10 * terms below provided that you ensure that this notice is replicated
11 * unmodified and in its entirety in all distributions of the software,
12 * modified or unmodified, in source code or in binary form.
14 * Redistribution and use in source and binary forms, with or without
15 * modification, are permitted provided that the following conditions are
16 * met: redistributions of source code must retain the above copyright
17 * notice, this list of conditions and the following disclaimer;
18 * redistributions in binary form must reproduce the above copyright
19 * notice, this list of conditions and the following disclaimer in the
20 * documentation and/or other materials provided with the distribution;
21 * neither the name of the copyright holders nor the names of its
22 * contributors may be used to endorse or promote products derived from
23 * this software without specific prior written permission.
25 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
26 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
27 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
28 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
29 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
30 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
31 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
32 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
33 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
34 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
35 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37 * Authors: Edmund Grimley Evans
58 #define FPLIB_IDC 128 // Input Denormal
59 #define FPLIB_IXC 16 // Inexact
60 #define FPLIB_UFC 8 // Underflow
61 #define FPLIB_OFC 4 // Overflow
62 #define FPLIB_DZC 2 // Division by Zero
63 #define FPLIB_IOC 1 // Invalid Operation
65 static inline uint16_t
66 lsl16(uint16_t x
, uint32_t shift
)
68 return shift
< 16 ? x
<< shift
: 0;
71 static inline uint16_t
72 lsr16(uint16_t x
, uint32_t shift
)
74 return shift
< 16 ? x
>> shift
: 0;
77 static inline uint32_t
78 lsl32(uint32_t x
, uint32_t shift
)
80 return shift
< 32 ? x
<< shift
: 0;
83 static inline uint32_t
84 lsr32(uint32_t x
, uint32_t shift
)
86 return shift
< 32 ? x
>> shift
: 0;
89 static inline uint64_t
90 lsl64(uint64_t x
, uint32_t shift
)
92 return shift
< 64 ? x
<< shift
: 0;
95 static inline uint64_t
96 lsr64(uint64_t x
, uint32_t shift
)
98 return shift
< 64 ? x
>> shift
: 0;
102 lsl128(uint64_t *r0
, uint64_t *r1
, uint64_t x0
, uint64_t x1
, uint32_t shift
)
107 } else if (shift
< 64) {
108 *r1
= x1
<< shift
| x0
>> (64 - shift
);
110 } else if (shift
< 128) {
111 *r1
= x0
<< (shift
- 64);
120 lsr128(uint64_t *r0
, uint64_t *r1
, uint64_t x0
, uint64_t x1
, uint32_t shift
)
125 } else if (shift
< 64) {
126 *r0
= x0
>> shift
| x1
<< (64 - shift
);
128 } else if (shift
< 128) {
129 *r0
= x1
>> (shift
- 64);
138 mul62x62(uint64_t *x0
, uint64_t *x1
, uint64_t a
, uint64_t b
)
140 uint32_t mask
= ((uint32_t)1 << 31) - 1;
141 uint64_t a0
= a
& mask
;
142 uint64_t a1
= a
>> 31 & mask
;
143 uint64_t b0
= b
& mask
;
144 uint64_t b1
= b
>> 31 & mask
;
145 uint64_t p0
= a0
* b0
;
146 uint64_t p2
= a1
* b1
;
147 uint64_t p1
= (a0
+ a1
) * (b0
+ b1
) - p0
- p2
;
149 uint64_t s1
= (s0
>> 31) + p1
;
150 uint64_t s2
= (s1
>> 31) + p2
;
151 *x0
= (s0
& mask
) | (s1
& mask
) << 31 | s2
<< 62;
156 void mul64x32(uint64_t *x0
, uint64_t *x1
, uint64_t a
, uint32_t b
)
158 uint64_t t0
= (uint64_t)(uint32_t)a
* b
;
159 uint64_t t1
= (t0
>> 32) + (a
>> 32) * b
;
160 *x0
= t1
<< 32 | (uint32_t)t0
;
165 add128(uint64_t *x0
, uint64_t *x1
, uint64_t a0
, uint64_t a1
, uint64_t b0
,
169 *x1
= a1
+ b1
+ (*x0
< a0
);
173 sub128(uint64_t *x0
, uint64_t *x1
, uint64_t a0
, uint64_t a1
, uint64_t b0
,
177 *x1
= a1
- b1
- (*x0
> a0
);
181 cmp128(uint64_t a0
, uint64_t a1
, uint64_t b0
, uint64_t b1
)
183 return (a1
< b1
? -1 : a1
> b1
? 1 : a0
< b0
? -1 : a0
> b0
? 1 : 0);
186 static inline uint16_t
187 fp16_normalise(uint16_t mnt
, int *exp
)
195 for (shift
= 8; shift
; shift
>>= 1) {
196 if (!(mnt
>> (16 - shift
))) {
204 static inline uint32_t
205 fp32_normalise(uint32_t mnt
, int *exp
)
213 for (shift
= 16; shift
; shift
>>= 1) {
214 if (!(mnt
>> (32 - shift
))) {
222 static inline uint64_t
223 fp64_normalise(uint64_t mnt
, int *exp
)
231 for (shift
= 32; shift
; shift
>>= 1) {
232 if (!(mnt
>> (64 - shift
))) {
241 fp128_normalise(uint64_t *mnt0
, uint64_t *mnt1
, int *exp
)
257 for (shift
= 32; shift
; shift
>>= 1) {
258 if (!(x1
>> (64 - shift
))) {
259 x1
= x1
<< shift
| x0
>> (64 - shift
);
269 static inline uint16_t
270 fp16_pack(uint16_t sgn
, uint16_t exp
, uint16_t mnt
)
272 return sgn
<< 15 | exp
<< 10 | (mnt
& (((uint16_t)1 << 10) - 1));
275 static inline uint32_t
276 fp32_pack(uint32_t sgn
, uint32_t exp
, uint32_t mnt
)
278 return sgn
<< 31 | exp
<< 23 | (mnt
& (((uint32_t)1 << 23) - 1));
281 static inline uint64_t
282 fp64_pack(uint64_t sgn
, uint64_t exp
, uint64_t mnt
)
284 return (uint64_t)sgn
<< 63 | exp
<< 52 | (mnt
& (((uint64_t)1 << 52) - 1));
287 static inline uint16_t
290 return fp16_pack(sgn
, 0, 0);
293 static inline uint32_t
296 return fp32_pack(sgn
, 0, 0);
299 static inline uint64_t
302 return fp64_pack(sgn
, 0, 0);
305 static inline uint16_t
306 fp16_max_normal(int sgn
)
308 return fp16_pack(sgn
, 30, -1);
311 static inline uint32_t
312 fp32_max_normal(int sgn
)
314 return fp32_pack(sgn
, 254, -1);
317 static inline uint64_t
318 fp64_max_normal(int sgn
)
320 return fp64_pack(sgn
, 2046, -1);
323 static inline uint16_t
324 fp16_infinity(int sgn
)
326 return fp16_pack(sgn
, 31, 0);
329 static inline uint32_t
330 fp32_infinity(int sgn
)
332 return fp32_pack(sgn
, 255, 0);
335 static inline uint64_t
336 fp64_infinity(int sgn
)
338 return fp64_pack(sgn
, 2047, 0);
341 static inline uint16_t
344 return fp16_pack(0, 31, (uint16_t)1 << 9);
347 static inline uint32_t
350 return fp32_pack(0, 255, (uint32_t)1 << 22);
353 static inline uint64_t
356 return fp64_pack(0, 2047, (uint64_t)1 << 51);
360 fp16_unpack(int *sgn
, int *exp
, uint16_t *mnt
, uint16_t x
, int mode
,
365 *mnt
= x
& (((uint16_t)1 << 10) - 1);
367 // Handle subnormals:
369 *mnt
|= (uint16_t)1 << 10;
372 // There is no flush to zero in this case!
377 fp32_unpack(int *sgn
, int *exp
, uint32_t *mnt
, uint32_t x
, int mode
,
381 *exp
= x
>> 23 & 255;
382 *mnt
= x
& (((uint32_t)1 << 23) - 1);
384 // Handle subnormals:
386 *mnt
|= (uint32_t)1 << 23;
389 if ((mode
& FPLIB_FZ
) && *mnt
) {
397 fp64_unpack(int *sgn
, int *exp
, uint64_t *mnt
, uint64_t x
, int mode
,
401 *exp
= x
>> 52 & 2047;
402 *mnt
= x
& (((uint64_t)1 << 52) - 1);
404 // Handle subnormals:
406 *mnt
|= (uint64_t)1 << 52;
409 if ((mode
& FPLIB_FZ
) && *mnt
) {
416 static inline uint32_t
417 fp32_process_NaN(uint32_t a
, int mode
, int *flags
)
419 if (!(a
>> 22 & 1)) {
421 a
|= (uint32_t)1 << 22;
423 return mode
& FPLIB_DN
? fp32_defaultNaN() : a
;
426 static inline uint64_t
427 fp64_process_NaN(uint64_t a
, int mode
, int *flags
)
429 if (!(a
>> 51 & 1)) {
431 a
|= (uint64_t)1 << 51;
433 return mode
& FPLIB_DN
? fp64_defaultNaN() : a
;
437 fp32_process_NaNs(uint32_t a
, uint32_t b
, int mode
, int *flags
)
439 int a_exp
= a
>> 23 & 255;
440 uint32_t a_mnt
= a
& (((uint32_t)1 << 23) - 1);
441 int b_exp
= b
>> 23 & 255;
442 uint32_t b_mnt
= b
& (((uint32_t)1 << 23) - 1);
444 // Handle signalling NaNs:
445 if (a_exp
== 255 && a_mnt
&& !(a_mnt
>> 22 & 1))
446 return fp32_process_NaN(a
, mode
, flags
);
447 if (b_exp
== 255 && b_mnt
&& !(b_mnt
>> 22 & 1))
448 return fp32_process_NaN(b
, mode
, flags
);
450 // Handle quiet NaNs:
451 if (a_exp
== 255 && a_mnt
)
452 return fp32_process_NaN(a
, mode
, flags
);
453 if (b_exp
== 255 && b_mnt
)
454 return fp32_process_NaN(b
, mode
, flags
);
460 fp64_process_NaNs(uint64_t a
, uint64_t b
, int mode
, int *flags
)
462 int a_exp
= a
>> 52 & 2047;
463 uint64_t a_mnt
= a
& (((uint64_t)1 << 52) - 1);
464 int b_exp
= b
>> 52 & 2047;
465 uint64_t b_mnt
= b
& (((uint64_t)1 << 52) - 1);
467 // Handle signalling NaNs:
468 if (a_exp
== 2047 && a_mnt
&& !(a_mnt
>> 51 & 1))
469 return fp64_process_NaN(a
, mode
, flags
);
470 if (b_exp
== 2047 && b_mnt
&& !(b_mnt
>> 51 & 1))
471 return fp64_process_NaN(b
, mode
, flags
);
473 // Handle quiet NaNs:
474 if (a_exp
== 2047 && a_mnt
)
475 return fp64_process_NaN(a
, mode
, flags
);
476 if (b_exp
== 2047 && b_mnt
)
477 return fp64_process_NaN(b
, mode
, flags
);
483 fp32_process_NaNs3(uint32_t a
, uint32_t b
, uint32_t c
, int mode
, int *flags
)
485 int a_exp
= a
>> 23 & 255;
486 uint32_t a_mnt
= a
& (((uint32_t)1 << 23) - 1);
487 int b_exp
= b
>> 23 & 255;
488 uint32_t b_mnt
= b
& (((uint32_t)1 << 23) - 1);
489 int c_exp
= c
>> 23 & 255;
490 uint32_t c_mnt
= c
& (((uint32_t)1 << 23) - 1);
492 // Handle signalling NaNs:
493 if (a_exp
== 255 && a_mnt
&& !(a_mnt
>> 22 & 1))
494 return fp32_process_NaN(a
, mode
, flags
);
495 if (b_exp
== 255 && b_mnt
&& !(b_mnt
>> 22 & 1))
496 return fp32_process_NaN(b
, mode
, flags
);
497 if (c_exp
== 255 && c_mnt
&& !(c_mnt
>> 22 & 1))
498 return fp32_process_NaN(c
, mode
, flags
);
500 // Handle quiet NaNs:
501 if (a_exp
== 255 && a_mnt
)
502 return fp32_process_NaN(a
, mode
, flags
);
503 if (b_exp
== 255 && b_mnt
)
504 return fp32_process_NaN(b
, mode
, flags
);
505 if (c_exp
== 255 && c_mnt
)
506 return fp32_process_NaN(c
, mode
, flags
);
512 fp64_process_NaNs3(uint64_t a
, uint64_t b
, uint64_t c
, int mode
, int *flags
)
514 int a_exp
= a
>> 52 & 2047;
515 uint64_t a_mnt
= a
& (((uint64_t)1 << 52) - 1);
516 int b_exp
= b
>> 52 & 2047;
517 uint64_t b_mnt
= b
& (((uint64_t)1 << 52) - 1);
518 int c_exp
= c
>> 52 & 2047;
519 uint64_t c_mnt
= c
& (((uint64_t)1 << 52) - 1);
521 // Handle signalling NaNs:
522 if (a_exp
== 2047 && a_mnt
&& !(a_mnt
>> 51 & 1))
523 return fp64_process_NaN(a
, mode
, flags
);
524 if (b_exp
== 2047 && b_mnt
&& !(b_mnt
>> 51 & 1))
525 return fp64_process_NaN(b
, mode
, flags
);
526 if (c_exp
== 2047 && c_mnt
&& !(c_mnt
>> 51 & 1))
527 return fp64_process_NaN(c
, mode
, flags
);
529 // Handle quiet NaNs:
530 if (a_exp
== 2047 && a_mnt
)
531 return fp64_process_NaN(a
, mode
, flags
);
532 if (b_exp
== 2047 && b_mnt
)
533 return fp64_process_NaN(b
, mode
, flags
);
534 if (c_exp
== 2047 && c_mnt
)
535 return fp64_process_NaN(c
, mode
, flags
);
541 fp16_round_(int sgn
, int exp
, uint16_t mnt
, int rm
, int mode
, int *flags
)
543 int biased_exp
; // non-negative exponent value for result
544 uint16_t int_mant
; // mantissa for result, less than (1 << 11)
545 int error
; // 0, 1, 2 or 3, where 2 means int_mant is wrong by exactly 0.5
547 assert(rm
!= FPRounding_TIEAWAY
);
549 // There is no flush to zero in this case!
551 // The bottom 5 bits of mnt are orred together:
552 mnt
= (uint16_t)1 << 12 | mnt
>> 4 | ((mnt
& 31) != 0);
560 int_mant
= lsr16(mnt
, 3 - exp
);
561 error
= (lsr16(mnt
, 1 - exp
) & 3) | !!(mnt
& (lsl16(1, 1 - exp
) - 1));
564 if (!biased_exp
&& error
) { // xx should also check fpscr_val<11>
569 if ((rm
== FPLIB_RN
&& (error
== 3 ||
570 (error
== 2 && (int_mant
& 1)))) ||
571 (((rm
== FPLIB_RP
&& !sgn
) || (rm
== FPLIB_RM
&& sgn
)) && error
)) {
573 if (int_mant
== (uint32_t)1 << 10) {
574 // Rounded up from denormalized to normalized
577 if (int_mant
== (uint32_t)1 << 11) {
578 // Rounded up to next exponent
584 // Handle rounding to odd aka Von Neumann rounding:
585 if (error
&& rm
== FPRounding_ODD
)
589 if (!(mode
& FPLIB_AHP
)) {
590 if (biased_exp
>= 31) {
591 *flags
|= FPLIB_OFC
| FPLIB_IXC
;
592 if (rm
== FPLIB_RN
|| (rm
== FPLIB_RP
&& !sgn
) ||
593 (rm
== FPLIB_RM
&& sgn
)) {
594 return fp16_infinity(sgn
);
596 return fp16_max_normal(sgn
);
600 if (biased_exp
>= 32) {
602 return fp16_pack(sgn
, 31, -1);
610 return fp16_pack(sgn
, biased_exp
, int_mant
);
614 fp32_round_(int sgn
, int exp
, uint32_t mnt
, int rm
, int mode
, int *flags
)
616 int biased_exp
; // non-negative exponent value for result
617 uint32_t int_mant
; // mantissa for result, less than (1 << 24)
618 int error
; // 0, 1, 2 or 3, where 2 means int_mant is wrong by exactly 0.5
620 assert(rm
!= FPRounding_TIEAWAY
);
623 if ((mode
& FPLIB_FZ
) && exp
< 1) {
625 return fp32_zero(sgn
);
628 // The bottom 8 bits of mnt are orred together:
629 mnt
= (uint32_t)1 << 25 | mnt
>> 7 | ((mnt
& 255) != 0);
637 int_mant
= lsr32(mnt
, 3 - exp
);
638 error
= (lsr32(mnt
, 1 - exp
) & 3) | !!(mnt
& (lsl32(1, 1 - exp
) - 1));
641 if (!biased_exp
&& error
) { // xx should also check fpscr_val<11>
646 if ((rm
== FPLIB_RN
&& (error
== 3 ||
647 (error
== 2 && (int_mant
& 1)))) ||
648 (((rm
== FPLIB_RP
&& !sgn
) || (rm
== FPLIB_RM
&& sgn
)) && error
)) {
650 if (int_mant
== (uint32_t)1 << 23) {
651 // Rounded up from denormalized to normalized
654 if (int_mant
== (uint32_t)1 << 24) {
655 // Rounded up to next exponent
661 // Handle rounding to odd aka Von Neumann rounding:
662 if (error
&& rm
== FPRounding_ODD
)
666 if (biased_exp
>= 255) {
667 *flags
|= FPLIB_OFC
| FPLIB_IXC
;
668 if (rm
== FPLIB_RN
|| (rm
== FPLIB_RP
&& !sgn
) ||
669 (rm
== FPLIB_RM
&& sgn
)) {
670 return fp32_infinity(sgn
);
672 return fp32_max_normal(sgn
);
680 return fp32_pack(sgn
, biased_exp
, int_mant
);
684 fp32_round(int sgn
, int exp
, uint32_t mnt
, int mode
, int *flags
)
686 return fp32_round_(sgn
, exp
, mnt
, mode
& 3, mode
, flags
);
690 fp64_round_(int sgn
, int exp
, uint64_t mnt
, int rm
, int mode
, int *flags
)
692 int biased_exp
; // non-negative exponent value for result
693 uint64_t int_mant
; // mantissa for result, less than (1 << 52)
694 int error
; // 0, 1, 2 or 3, where 2 means int_mant is wrong by exactly 0.5
696 assert(rm
!= FPRounding_TIEAWAY
);
699 if ((mode
& FPLIB_FZ
) && exp
< 1) {
701 return fp64_zero(sgn
);
704 // The bottom 11 bits of mnt are orred together:
705 mnt
= (uint64_t)1 << 54 | mnt
>> 10 | ((mnt
& 0x3ff) != 0);
713 int_mant
= lsr64(mnt
, 3 - exp
);
714 error
= (lsr64(mnt
, 1 - exp
) & 3) | !!(mnt
& (lsl64(1, 1 - exp
) - 1));
717 if (!biased_exp
&& error
) { // xx should also check fpscr_val<11>
722 if ((rm
== FPLIB_RN
&& (error
== 3 ||
723 (error
== 2 && (int_mant
& 1)))) ||
724 (((rm
== FPLIB_RP
&& !sgn
) || (rm
== FPLIB_RM
&& sgn
)) && error
)) {
726 if (int_mant
== (uint64_t)1 << 52) {
727 // Rounded up from denormalized to normalized
730 if (int_mant
== (uint64_t)1 << 53) {
731 // Rounded up to next exponent
737 // Handle rounding to odd aka Von Neumann rounding:
738 if (error
&& rm
== FPRounding_ODD
)
742 if (biased_exp
>= 2047) {
743 *flags
|= FPLIB_OFC
| FPLIB_IXC
;
744 if (rm
== FPLIB_RN
|| (rm
== FPLIB_RP
&& !sgn
) ||
745 (rm
== FPLIB_RM
&& sgn
)) {
746 return fp64_infinity(sgn
);
748 return fp64_max_normal(sgn
);
756 return fp64_pack(sgn
, biased_exp
, int_mant
);
760 fp64_round(int sgn
, int exp
, uint64_t mnt
, int mode
, int *flags
)
762 return fp64_round_(sgn
, exp
, mnt
, mode
& 3, mode
, flags
);
766 fp32_compare_eq(uint32_t a
, uint32_t b
, int mode
, int *flags
)
768 int a_sgn
, a_exp
, b_sgn
, b_exp
;
769 uint32_t a_mnt
, b_mnt
;
771 fp32_unpack(&a_sgn
, &a_exp
, &a_mnt
, a
, mode
, flags
);
772 fp32_unpack(&b_sgn
, &b_exp
, &b_mnt
, b
, mode
, flags
);
774 if ((a_exp
== 255 && (uint32_t)(a_mnt
<< 9)) ||
775 (b_exp
== 255 && (uint32_t)(b_mnt
<< 9))) {
776 if ((a_exp
== 255 && (uint32_t)(a_mnt
<< 9) && !(a
>> 22 & 1)) ||
777 (b_exp
== 255 && (uint32_t)(b_mnt
<< 9) && !(b
>> 22 & 1)))
781 return a
== b
|| (!a_mnt
&& !b_mnt
);
785 fp32_compare_ge(uint32_t a
, uint32_t b
, int mode
, int *flags
)
787 int a_sgn
, a_exp
, b_sgn
, b_exp
;
788 uint32_t a_mnt
, b_mnt
;
790 fp32_unpack(&a_sgn
, &a_exp
, &a_mnt
, a
, mode
, flags
);
791 fp32_unpack(&b_sgn
, &b_exp
, &b_mnt
, b
, mode
, flags
);
793 if ((a_exp
== 255 && (uint32_t)(a_mnt
<< 9)) ||
794 (b_exp
== 255 && (uint32_t)(b_mnt
<< 9))) {
798 if (!a_mnt
&& !b_mnt
)
803 return a_sgn
^ (a_exp
> b_exp
);
805 return a_sgn
^ (a_mnt
> b_mnt
);
810 fp32_compare_gt(uint32_t a
, uint32_t b
, int mode
, int *flags
)
812 int a_sgn
, a_exp
, b_sgn
, b_exp
;
813 uint32_t a_mnt
, b_mnt
;
815 fp32_unpack(&a_sgn
, &a_exp
, &a_mnt
, a
, mode
, flags
);
816 fp32_unpack(&b_sgn
, &b_exp
, &b_mnt
, b
, mode
, flags
);
818 if ((a_exp
== 255 && (uint32_t)(a_mnt
<< 9)) ||
819 (b_exp
== 255 && (uint32_t)(b_mnt
<< 9))) {
823 if (!a_mnt
&& !b_mnt
)
828 return a_sgn
^ (a_exp
> b_exp
);
830 return a_sgn
^ (a_mnt
> b_mnt
);
835 fp64_compare_eq(uint64_t a
, uint64_t b
, int mode
, int *flags
)
837 int a_sgn
, a_exp
, b_sgn
, b_exp
;
838 uint64_t a_mnt
, b_mnt
;
840 fp64_unpack(&a_sgn
, &a_exp
, &a_mnt
, a
, mode
, flags
);
841 fp64_unpack(&b_sgn
, &b_exp
, &b_mnt
, b
, mode
, flags
);
843 if ((a_exp
== 2047 && (uint64_t)(a_mnt
<< 12)) ||
844 (b_exp
== 2047 && (uint64_t)(b_mnt
<< 12))) {
845 if ((a_exp
== 2047 && (uint64_t)(a_mnt
<< 12) && !(a
>> 51 & 1)) ||
846 (b_exp
== 2047 && (uint64_t)(b_mnt
<< 12) && !(b
>> 51 & 1)))
850 return a
== b
|| (!a_mnt
&& !b_mnt
);
854 fp64_compare_ge(uint64_t a
, uint64_t b
, int mode
, int *flags
)
856 int a_sgn
, a_exp
, b_sgn
, b_exp
;
857 uint64_t a_mnt
, b_mnt
;
859 fp64_unpack(&a_sgn
, &a_exp
, &a_mnt
, a
, mode
, flags
);
860 fp64_unpack(&b_sgn
, &b_exp
, &b_mnt
, b
, mode
, flags
);
862 if ((a_exp
== 2047 && (uint64_t)(a_mnt
<< 12)) ||
863 (b_exp
== 2047 && (uint64_t)(b_mnt
<< 12))) {
867 if (!a_mnt
&& !b_mnt
)
872 return a_sgn
^ (a_exp
> b_exp
);
874 return a_sgn
^ (a_mnt
> b_mnt
);
879 fp64_compare_gt(uint64_t a
, uint64_t b
, int mode
, int *flags
)
881 int a_sgn
, a_exp
, b_sgn
, b_exp
;
882 uint64_t a_mnt
, b_mnt
;
884 fp64_unpack(&a_sgn
, &a_exp
, &a_mnt
, a
, mode
, flags
);
885 fp64_unpack(&b_sgn
, &b_exp
, &b_mnt
, b
, mode
, flags
);
887 if ((a_exp
== 2047 && (uint64_t)(a_mnt
<< 12)) ||
888 (b_exp
== 2047 && (uint64_t)(b_mnt
<< 12))) {
892 if (!a_mnt
&& !b_mnt
)
897 return a_sgn
^ (a_exp
> b_exp
);
899 return a_sgn
^ (a_mnt
> b_mnt
);
904 fp32_add(uint32_t a
, uint32_t b
, int neg
, int mode
, int *flags
)
906 int a_sgn
, a_exp
, b_sgn
, b_exp
, x_sgn
, x_exp
;
907 uint32_t a_mnt
, b_mnt
, x
, x_mnt
;
909 fp32_unpack(&a_sgn
, &a_exp
, &a_mnt
, a
, mode
, flags
);
910 fp32_unpack(&b_sgn
, &b_exp
, &b_mnt
, b
, mode
, flags
);
912 if ((x
= fp32_process_NaNs(a
, b
, mode
, flags
))) {
918 // Handle infinities and zeroes:
919 if (a_exp
== 255 && b_exp
== 255 && a_sgn
!= b_sgn
) {
921 return fp32_defaultNaN();
922 } else if (a_exp
== 255) {
923 return fp32_infinity(a_sgn
);
924 } else if (b_exp
== 255) {
925 return fp32_infinity(b_sgn
);
926 } else if (!a_mnt
&& !b_mnt
&& a_sgn
== b_sgn
) {
927 return fp32_zero(a_sgn
);
932 if (a_exp
>= b_exp
) {
933 b_mnt
= (lsr32(b_mnt
, a_exp
- b_exp
) |
934 !!(b_mnt
& (lsl32(1, a_exp
- b_exp
) - 1)));
937 a_mnt
= (lsr32(a_mnt
, b_exp
- a_exp
) |
938 !!(a_mnt
& (lsl32(1, b_exp
- a_exp
) - 1)));
943 if (a_sgn
== b_sgn
) {
944 x_mnt
= a_mnt
+ b_mnt
;
945 } else if (a_mnt
>= b_mnt
) {
946 x_mnt
= a_mnt
- b_mnt
;
949 x_mnt
= b_mnt
- a_mnt
;
953 // Sign of exact zero result depends on rounding mode
954 return fp32_zero((mode
& 3) == 2);
957 x_mnt
= fp32_normalise(x_mnt
, &x_exp
);
959 return fp32_round(x_sgn
, x_exp
+ 5, x_mnt
<< 1, mode
, flags
);
963 fp64_add(uint64_t a
, uint64_t b
, int neg
, int mode
, int *flags
)
965 int a_sgn
, a_exp
, b_sgn
, b_exp
, x_sgn
, x_exp
;
966 uint64_t a_mnt
, b_mnt
, x
, x_mnt
;
968 fp64_unpack(&a_sgn
, &a_exp
, &a_mnt
, a
, mode
, flags
);
969 fp64_unpack(&b_sgn
, &b_exp
, &b_mnt
, b
, mode
, flags
);
971 if ((x
= fp64_process_NaNs(a
, b
, mode
, flags
))) {
977 // Handle infinities and zeroes:
978 if (a_exp
== 2047 && b_exp
== 2047 && a_sgn
!= b_sgn
) {
980 return fp64_defaultNaN();
981 } else if (a_exp
== 2047) {
982 return fp64_infinity(a_sgn
);
983 } else if (b_exp
== 2047) {
984 return fp64_infinity(b_sgn
);
985 } else if (!a_mnt
&& !b_mnt
&& a_sgn
== b_sgn
) {
986 return fp64_zero(a_sgn
);
991 if (a_exp
>= b_exp
) {
992 b_mnt
= (lsr64(b_mnt
, a_exp
- b_exp
) |
993 !!(b_mnt
& (lsl64(1, a_exp
- b_exp
) - 1)));
996 a_mnt
= (lsr64(a_mnt
, b_exp
- a_exp
) |
997 !!(a_mnt
& (lsl64(1, b_exp
- a_exp
) - 1)));
1002 if (a_sgn
== b_sgn
) {
1003 x_mnt
= a_mnt
+ b_mnt
;
1004 } else if (a_mnt
>= b_mnt
) {
1005 x_mnt
= a_mnt
- b_mnt
;
1008 x_mnt
= b_mnt
- a_mnt
;
1012 // Sign of exact zero result depends on rounding mode
1013 return fp64_zero((mode
& 3) == 2);
1016 x_mnt
= fp64_normalise(x_mnt
, &x_exp
);
1018 return fp64_round(x_sgn
, x_exp
+ 8, x_mnt
<< 1, mode
, flags
);
1022 fp32_mul(uint32_t a
, uint32_t b
, int mode
, int *flags
)
1024 int a_sgn
, a_exp
, b_sgn
, b_exp
, x_sgn
, x_exp
;
1025 uint32_t a_mnt
, b_mnt
, x
;
1028 fp32_unpack(&a_sgn
, &a_exp
, &a_mnt
, a
, mode
, flags
);
1029 fp32_unpack(&b_sgn
, &b_exp
, &b_mnt
, b
, mode
, flags
);
1031 if ((x
= fp32_process_NaNs(a
, b
, mode
, flags
))) {
1035 // Handle infinities and zeroes:
1036 if ((a_exp
== 255 && !b_mnt
) || (b_exp
== 255 && !a_mnt
)) {
1037 *flags
|= FPLIB_IOC
;
1038 return fp32_defaultNaN();
1039 } else if (a_exp
== 255 || b_exp
== 255) {
1040 return fp32_infinity(a_sgn
^ b_sgn
);
1041 } else if (!a_mnt
|| !b_mnt
) {
1042 return fp32_zero(a_sgn
^ b_sgn
);
1045 // Multiply and normalise:
1046 x_sgn
= a_sgn
^ b_sgn
;
1047 x_exp
= a_exp
+ b_exp
- 110;
1048 x_mnt
= (uint64_t)a_mnt
* b_mnt
;
1049 x_mnt
= fp64_normalise(x_mnt
, &x_exp
);
1051 // Convert to 32 bits, collapsing error into bottom bit:
1052 x_mnt
= lsr64(x_mnt
, 31) | !!lsl64(x_mnt
, 33);
1054 return fp32_round(x_sgn
, x_exp
, x_mnt
, mode
, flags
);
1058 fp64_mul(uint64_t a
, uint64_t b
, int mode
, int *flags
)
1060 int a_sgn
, a_exp
, b_sgn
, b_exp
, x_sgn
, x_exp
;
1061 uint64_t a_mnt
, b_mnt
, x
;
1062 uint64_t x0_mnt
, x1_mnt
;
1064 fp64_unpack(&a_sgn
, &a_exp
, &a_mnt
, a
, mode
, flags
);
1065 fp64_unpack(&b_sgn
, &b_exp
, &b_mnt
, b
, mode
, flags
);
1067 if ((x
= fp64_process_NaNs(a
, b
, mode
, flags
))) {
1071 // Handle infinities and zeroes:
1072 if ((a_exp
== 2047 && !b_mnt
) || (b_exp
== 2047 && !a_mnt
)) {
1073 *flags
|= FPLIB_IOC
;
1074 return fp64_defaultNaN();
1075 } else if (a_exp
== 2047 || b_exp
== 2047) {
1076 return fp64_infinity(a_sgn
^ b_sgn
);
1077 } else if (!a_mnt
|| !b_mnt
) {
1078 return fp64_zero(a_sgn
^ b_sgn
);
1081 // Multiply and normalise:
1082 x_sgn
= a_sgn
^ b_sgn
;
1083 x_exp
= a_exp
+ b_exp
- 1000;
1084 mul62x62(&x0_mnt
, &x1_mnt
, a_mnt
, b_mnt
);
1085 fp128_normalise(&x0_mnt
, &x1_mnt
, &x_exp
);
1087 // Convert to 64 bits, collapsing error into bottom bit:
1088 x0_mnt
= x1_mnt
<< 1 | !!x0_mnt
;
1090 return fp64_round(x_sgn
, x_exp
, x0_mnt
, mode
, flags
);
1094 fp32_muladd(uint32_t a
, uint32_t b
, uint32_t c
, int scale
,
1095 int mode
, int *flags
)
1097 int a_sgn
, a_exp
, b_sgn
, b_exp
, c_sgn
, c_exp
, x_sgn
, x_exp
, y_sgn
, y_exp
;
1098 uint32_t a_mnt
, b_mnt
, c_mnt
, x
;
1099 uint64_t x_mnt
, y_mnt
;
1101 fp32_unpack(&a_sgn
, &a_exp
, &a_mnt
, a
, mode
, flags
);
1102 fp32_unpack(&b_sgn
, &b_exp
, &b_mnt
, b
, mode
, flags
);
1103 fp32_unpack(&c_sgn
, &c_exp
, &c_mnt
, c
, mode
, flags
);
1105 x
= fp32_process_NaNs3(a
, b
, c
, mode
, flags
);
1107 // Quiet NaN added to product of zero and infinity:
1108 if (a_exp
== 255 && (a_mnt
>> 22 & 1) &&
1109 ((!b_mnt
&& c_exp
== 255 && !(uint32_t)(c_mnt
<< 9)) ||
1110 (!c_mnt
&& b_exp
== 255 && !(uint32_t)(b_mnt
<< 9)))) {
1111 x
= fp32_defaultNaN();
1112 *flags
|= FPLIB_IOC
;
1119 // Handle infinities and zeroes:
1120 if ((b_exp
== 255 && !c_mnt
) ||
1121 (c_exp
== 255 && !b_mnt
) ||
1122 (a_exp
== 255 && (b_exp
== 255 || c_exp
== 255) &&
1123 (a_sgn
!= (b_sgn
^ c_sgn
)))) {
1124 *flags
|= FPLIB_IOC
;
1125 return fp32_defaultNaN();
1128 return fp32_infinity(a_sgn
);
1129 if (b_exp
== 255 || c_exp
== 255)
1130 return fp32_infinity(b_sgn
^ c_sgn
);
1131 if (!a_mnt
&& (!b_mnt
|| !c_mnt
) && a_sgn
== (b_sgn
^ c_sgn
))
1132 return fp32_zero(a_sgn
);
1136 x_mnt
= (uint64_t)a_mnt
<< 27;
1139 y_sgn
= b_sgn
^ c_sgn
;
1140 y_exp
= b_exp
+ c_exp
- 113;
1141 y_mnt
= (uint64_t)b_mnt
* c_mnt
<< 3;
1147 if (x_exp
>= y_exp
) {
1148 y_mnt
= (lsr64(y_mnt
, x_exp
- y_exp
) |
1149 !!(y_mnt
& (lsl64(1, x_exp
- y_exp
) - 1)));
1152 x_mnt
= (lsr64(x_mnt
, y_exp
- x_exp
) |
1153 !!(x_mnt
& (lsl64(1, y_exp
- x_exp
) - 1)));
1156 if (x_sgn
== y_sgn
) {
1157 x_mnt
= x_mnt
+ y_mnt
;
1158 } else if (x_mnt
>= y_mnt
) {
1159 x_mnt
= x_mnt
- y_mnt
;
1162 x_mnt
= y_mnt
- x_mnt
;
1166 // Sign of exact zero result depends on rounding mode
1167 return fp32_zero((mode
& 3) == 2);
1170 // Normalise and convert to 32 bits, collapsing error into bottom bit:
1171 x_mnt
= fp64_normalise(x_mnt
, &x_exp
);
1172 x_mnt
= x_mnt
>> 31 | !!(uint32_t)(x_mnt
<< 1);
1174 return fp32_round(x_sgn
, x_exp
+ scale
, x_mnt
, mode
, flags
);
1178 fp64_muladd(uint64_t a
, uint64_t b
, uint64_t c
, int scale
,
1179 int mode
, int *flags
)
1181 int a_sgn
, a_exp
, b_sgn
, b_exp
, c_sgn
, c_exp
, x_sgn
, x_exp
, y_sgn
, y_exp
;
1182 uint64_t a_mnt
, b_mnt
, c_mnt
, x
;
1183 uint64_t x0_mnt
, x1_mnt
, y0_mnt
, y1_mnt
;
1185 fp64_unpack(&a_sgn
, &a_exp
, &a_mnt
, a
, mode
, flags
);
1186 fp64_unpack(&b_sgn
, &b_exp
, &b_mnt
, b
, mode
, flags
);
1187 fp64_unpack(&c_sgn
, &c_exp
, &c_mnt
, c
, mode
, flags
);
1189 x
= fp64_process_NaNs3(a
, b
, c
, mode
, flags
);
1191 // Quiet NaN added to product of zero and infinity:
1192 if (a_exp
== 2047 && (a_mnt
>> 51 & 1) &&
1193 ((!b_mnt
&& c_exp
== 2047 && !(uint64_t)(c_mnt
<< 12)) ||
1194 (!c_mnt
&& b_exp
== 2047 && !(uint64_t)(b_mnt
<< 12)))) {
1195 x
= fp64_defaultNaN();
1196 *flags
|= FPLIB_IOC
;
1203 // Handle infinities and zeroes:
1204 if ((b_exp
== 2047 && !c_mnt
) ||
1205 (c_exp
== 2047 && !b_mnt
) ||
1206 (a_exp
== 2047 && (b_exp
== 2047 || c_exp
== 2047) &&
1207 (a_sgn
!= (b_sgn
^ c_sgn
)))) {
1208 *flags
|= FPLIB_IOC
;
1209 return fp64_defaultNaN();
1212 return fp64_infinity(a_sgn
);
1213 if (b_exp
== 2047 || c_exp
== 2047)
1214 return fp64_infinity(b_sgn
^ c_sgn
);
1215 if (!a_mnt
&& (!b_mnt
|| !c_mnt
) && a_sgn
== (b_sgn
^ c_sgn
))
1216 return fp64_zero(a_sgn
);
1224 y_sgn
= b_sgn
^ c_sgn
;
1225 y_exp
= b_exp
+ c_exp
- 1003;
1226 mul62x62(&y0_mnt
, &y1_mnt
, b_mnt
, c_mnt
<< 3);
1227 if (!y0_mnt
&& !y1_mnt
) {
1232 if (x_exp
>= y_exp
) {
1234 lsl128(&t0
, &t1
, y0_mnt
, y1_mnt
,
1235 x_exp
- y_exp
< 128 ? 128 - (x_exp
- y_exp
) : 0);
1236 lsr128(&y0_mnt
, &y1_mnt
, y0_mnt
, y1_mnt
, x_exp
- y_exp
);
1237 y0_mnt
|= !!(t0
| t1
);
1241 lsl128(&t0
, &t1
, x0_mnt
, x1_mnt
,
1242 y_exp
- x_exp
< 128 ? 128 - (y_exp
- x_exp
) : 0);
1243 lsr128(&x0_mnt
, &x1_mnt
, x0_mnt
, x1_mnt
, y_exp
- x_exp
);
1244 x0_mnt
|= !!(t0
| t1
);
1247 if (x_sgn
== y_sgn
) {
1248 add128(&x0_mnt
, &x1_mnt
, x0_mnt
, x1_mnt
, y0_mnt
, y1_mnt
);
1249 } else if (cmp128(x0_mnt
, x1_mnt
, y0_mnt
, y1_mnt
) >= 0) {
1250 sub128(&x0_mnt
, &x1_mnt
, x0_mnt
, x1_mnt
, y0_mnt
, y1_mnt
);
1253 sub128(&x0_mnt
, &x1_mnt
, y0_mnt
, y1_mnt
, x0_mnt
, x1_mnt
);
1256 if (!x0_mnt
&& !x1_mnt
) {
1257 // Sign of exact zero result depends on rounding mode
1258 return fp64_zero((mode
& 3) == 2);
1261 // Normalise and convert to 64 bits, collapsing error into bottom bit:
1262 fp128_normalise(&x0_mnt
, &x1_mnt
, &x_exp
);
1263 x0_mnt
= x1_mnt
<< 1 | !!x0_mnt
;
1265 return fp64_round(x_sgn
, x_exp
+ scale
, x0_mnt
, mode
, flags
);
1269 fp32_div(uint32_t a
, uint32_t b
, int mode
, int *flags
)
1271 int a_sgn
, a_exp
, b_sgn
, b_exp
, x_sgn
, x_exp
;
1272 uint32_t a_mnt
, b_mnt
, x
;
1275 fp32_unpack(&a_sgn
, &a_exp
, &a_mnt
, a
, mode
, flags
);
1276 fp32_unpack(&b_sgn
, &b_exp
, &b_mnt
, b
, mode
, flags
);
1278 if ((x
= fp32_process_NaNs(a
, b
, mode
, flags
)))
1281 // Handle infinities and zeroes:
1282 if ((a_exp
== 255 && b_exp
== 255) || (!a_mnt
&& !b_mnt
)) {
1283 *flags
|= FPLIB_IOC
;
1284 return fp32_defaultNaN();
1286 if (a_exp
== 255 || !b_mnt
) {
1288 *flags
|= FPLIB_DZC
;
1289 return fp32_infinity(a_sgn
^ b_sgn
);
1291 if (!a_mnt
|| b_exp
== 255)
1292 return fp32_zero(a_sgn
^ b_sgn
);
1294 // Divide, setting bottom bit if inexact:
1295 a_mnt
= fp32_normalise(a_mnt
, &a_exp
);
1296 x_sgn
= a_sgn
^ b_sgn
;
1297 x_exp
= a_exp
- b_exp
+ 172;
1298 x_mnt
= ((uint64_t)a_mnt
<< 18) / b_mnt
;
1299 x_mnt
|= (x_mnt
* b_mnt
!= (uint64_t)a_mnt
<< 18);
1301 // Normalise and convert to 32 bits, collapsing error into bottom bit:
1302 x_mnt
= fp64_normalise(x_mnt
, &x_exp
);
1303 x_mnt
= x_mnt
>> 31 | !!(uint32_t)(x_mnt
<< 1);
1305 return fp32_round(x_sgn
, x_exp
, x_mnt
, mode
, flags
);
1309 fp64_div(uint64_t a
, uint64_t b
, int mode
, int *flags
)
1311 int a_sgn
, a_exp
, b_sgn
, b_exp
, x_sgn
, x_exp
, c
;
1312 uint64_t a_mnt
, b_mnt
, x
, x_mnt
, x0_mnt
, x1_mnt
;
1314 fp64_unpack(&a_sgn
, &a_exp
, &a_mnt
, a
, mode
, flags
);
1315 fp64_unpack(&b_sgn
, &b_exp
, &b_mnt
, b
, mode
, flags
);
1317 if ((x
= fp64_process_NaNs(a
, b
, mode
, flags
)))
1320 // Handle infinities and zeroes:
1321 if ((a_exp
== 2047 && b_exp
== 2047) || (!a_mnt
&& !b_mnt
)) {
1322 *flags
|= FPLIB_IOC
;
1323 return fp64_defaultNaN();
1325 if (a_exp
== 2047 || !b_mnt
) {
1327 *flags
|= FPLIB_DZC
;
1328 return fp64_infinity(a_sgn
^ b_sgn
);
1330 if (!a_mnt
|| b_exp
== 2047)
1331 return fp64_zero(a_sgn
^ b_sgn
);
1333 // Find reciprocal of divisor with Newton-Raphson:
1334 a_mnt
= fp64_normalise(a_mnt
, &a_exp
);
1335 b_mnt
= fp64_normalise(b_mnt
, &b_exp
);
1336 x_mnt
= ~(uint64_t)0 / (b_mnt
>> 31);
1337 mul64x32(&x0_mnt
, &x1_mnt
, b_mnt
, x_mnt
);
1338 sub128(&x0_mnt
, &x1_mnt
, 0, (uint64_t)1 << 32, x0_mnt
, x1_mnt
);
1339 lsr128(&x0_mnt
, &x1_mnt
, x0_mnt
, x1_mnt
, 32);
1340 mul64x32(&x0_mnt
, &x1_mnt
, x0_mnt
, x_mnt
);
1341 lsr128(&x0_mnt
, &x1_mnt
, x0_mnt
, x1_mnt
, 33);
1343 // Multiply by dividend:
1344 x_sgn
= a_sgn
^ b_sgn
;
1345 x_exp
= a_exp
- b_exp
+ 1031;
1346 mul62x62(&x0_mnt
, &x1_mnt
, x0_mnt
, a_mnt
>> 2); // xx 62x62 is enough
1347 lsr128(&x0_mnt
, &x1_mnt
, x0_mnt
, x1_mnt
, 4);
1350 // This is an underestimate, so try adding one:
1351 mul62x62(&x0_mnt
, &x1_mnt
, b_mnt
>> 2, x_mnt
+ 1); // xx 62x62 is enough
1352 c
= cmp128(x0_mnt
, x1_mnt
, 0, a_mnt
>> 11);
1357 x_mnt
= fp64_normalise(x_mnt
, &x_exp
);
1359 return fp64_round(x_sgn
, x_exp
, x_mnt
<< 1 | !!c
, mode
, flags
);
1363 set_fpscr0(FPSCR
&fpscr
, int flags
)
1365 if (flags
& FPLIB_IDC
) {
1368 if (flags
& FPLIB_IOC
) {
1371 if (flags
& FPLIB_DZC
) {
1374 if (flags
& FPLIB_OFC
) {
1377 if (flags
& FPLIB_UFC
) {
1380 if (flags
& FPLIB_IXC
) {
1386 fp32_sqrt(uint32_t a
, int mode
, int *flags
)
1388 int a_sgn
, a_exp
, x_sgn
, x_exp
;
1389 uint32_t a_mnt
, x
, x_mnt
;
1392 fp32_unpack(&a_sgn
, &a_exp
, &a_mnt
, a
, mode
, flags
);
1395 if (a_exp
== 255 && (uint32_t)(a_mnt
<< 9))
1396 return fp32_process_NaN(a
, mode
, flags
);
1398 // Handle infinities and zeroes:
1400 return fp32_zero(a_sgn
);
1402 if (a_exp
== 255 && !a_sgn
) {
1403 return fp32_infinity(a_sgn
);
1406 *flags
|= FPLIB_IOC
;
1407 return fp32_defaultNaN();
1410 a_mnt
= fp32_normalise(a_mnt
, &a_exp
);
1416 // x = (a * 3 + 5) / 8
1417 x
= (a_mnt
>> 2) + (a_mnt
>> 3) + (5 << 28);
1419 // x = (a / x + x) / 2; // 16-bit accuracy
1420 x
= (a_mnt
/ (x
>> 15) + (x
>> 16)) << 15;
1422 // x = (a / x + x) / 2; // 16-bit accuracy
1423 x
= (a_mnt
/ (x
>> 15) + (x
>> 16)) << 15;
1425 // x = (a / x + x) / 2; // 32-bit accuracy
1426 x
= ((((uint64_t)a_mnt
<< 32) / x
) >> 2) + (x
>> 1);
1429 x_exp
= (a_exp
+ 147) >> 1;
1430 x_mnt
= ((x
- (1 << 5)) >> 6) + 1;
1431 t1
= (uint64_t)x_mnt
* x_mnt
;
1432 t0
= (uint64_t)a_mnt
<< 19;
1437 x_mnt
= fp32_normalise(x_mnt
, &x_exp
);
1439 return fp32_round(x_sgn
, x_exp
, x_mnt
<< 1 | (t1
!= t0
), mode
, flags
);
1443 fp64_sqrt(uint64_t a
, int mode
, int *flags
)
1445 int a_sgn
, a_exp
, x_sgn
, x_exp
, c
;
1446 uint64_t a_mnt
, x_mnt
, r
, x0
, x1
;
1449 fp64_unpack(&a_sgn
, &a_exp
, &a_mnt
, a
, mode
, flags
);
1452 if (a_exp
== 2047 && (uint64_t)(a_mnt
<< 12)) {
1453 return fp64_process_NaN(a
, mode
, flags
);
1456 // Handle infinities and zeroes:
1458 return fp64_zero(a_sgn
);
1459 if (a_exp
== 2047 && !a_sgn
)
1460 return fp64_infinity(a_sgn
);
1462 *flags
|= FPLIB_IOC
;
1463 return fp64_defaultNaN();
1466 a_mnt
= fp64_normalise(a_mnt
, &a_exp
);
1472 // x = (a * 3 + 5) / 8
1473 x
= (a_mnt
>> 34) + (a_mnt
>> 35) + (5 << 28);
1475 // x = (a / x + x) / 2; // 16-bit accuracy
1476 x
= ((a_mnt
>> 32) / (x
>> 15) + (x
>> 16)) << 15;
1478 // x = (a / x + x) / 2; // 16-bit accuracy
1479 x
= ((a_mnt
>> 32) / (x
>> 15) + (x
>> 16)) << 15;
1481 // x = (a / x + x) / 2; // 32-bit accuracy
1482 x
= ((a_mnt
/ x
) >> 2) + (x
>> 1);
1484 // r = 1 / x; // 32-bit accuracy
1485 r
= ((uint64_t)1 << 62) / x
;
1487 // r = r * (2 - x * r); // 64-bit accuracy
1488 mul64x32(&x0
, &x1
, -(uint64_t)x
* r
<< 1, r
);
1489 lsr128(&x0
, &x1
, x0
, x1
, 31);
1491 // x = (x + a * r) / 2; // 64-bit accuracy
1492 mul62x62(&x0
, &x1
, a_mnt
>> 10, x0
>> 2);
1493 lsl128(&x0
, &x1
, x0
, x1
, 5);
1494 lsr128(&x0
, &x1
, x0
, x1
, 56);
1496 x0
= ((uint64_t)x
<< 31) + (x0
>> 1);
1499 x_exp
= (a_exp
+ 1053) >> 1;
1501 x_mnt
= ((x_mnt
- (1 << 8)) >> 9) + 1;
1502 mul62x62(&x0
, &x1
, x_mnt
, x_mnt
);
1503 lsl128(&x0
, &x1
, x0
, x1
, 19);
1504 c
= cmp128(x0
, x1
, 0, a_mnt
);
1508 x_mnt
= fp64_normalise(x_mnt
, &x_exp
);
1510 return fp64_round(x_sgn
, x_exp
, x_mnt
<< 1 | !!c
, mode
, flags
);
1514 modeConv(FPSCR fpscr
)
1516 return (((int) fpscr
) >> 22) & 0xF;
1520 set_fpscr(FPSCR
&fpscr
, int flags
)
1522 // translate back to FPSCR
1523 bool underflow
= false;
1524 if (flags
& FPLIB_IDC
) {
1527 if (flags
& FPLIB_IOC
) {
1530 if (flags
& FPLIB_DZC
) {
1533 if (flags
& FPLIB_OFC
) {
1536 if (flags
& FPLIB_UFC
) {
1537 underflow
= true; //xx Why is this required?
1540 if ((flags
& FPLIB_IXC
) && !(underflow
&& fpscr
.fz
)) {
1547 fplibCompareEQ(uint32_t a
, uint32_t b
, FPSCR
&fpscr
)
1550 int x
= fp32_compare_eq(a
, b
, modeConv(fpscr
), &flags
);
1551 set_fpscr(fpscr
, flags
);
1557 fplibCompareGE(uint32_t a
, uint32_t b
, FPSCR
&fpscr
)
1560 int x
= fp32_compare_ge(a
, b
, modeConv(fpscr
), &flags
);
1561 set_fpscr(fpscr
, flags
);
1567 fplibCompareGT(uint32_t a
, uint32_t b
, FPSCR
&fpscr
)
1570 int x
= fp32_compare_gt(a
, b
, modeConv(fpscr
), &flags
);
1571 set_fpscr(fpscr
, flags
);
1577 fplibCompareEQ(uint64_t a
, uint64_t b
, FPSCR
&fpscr
)
1580 int x
= fp64_compare_eq(a
, b
, modeConv(fpscr
), &flags
);
1581 set_fpscr(fpscr
, flags
);
1587 fplibCompareGE(uint64_t a
, uint64_t b
, FPSCR
&fpscr
)
1590 int x
= fp64_compare_ge(a
, b
, modeConv(fpscr
), &flags
);
1591 set_fpscr(fpscr
, flags
);
1597 fplibCompareGT(uint64_t a
, uint64_t b
, FPSCR
&fpscr
)
1600 int x
= fp64_compare_gt(a
, b
, modeConv(fpscr
), &flags
);
1601 set_fpscr(fpscr
, flags
);
1607 fplibAbs(uint32_t op
)
1609 return op
& ~((uint32_t)1 << 31);
1614 fplibAbs(uint64_t op
)
1616 return op
& ~((uint64_t)1 << 63);
1621 fplibAdd(uint32_t op1
, uint32_t op2
, FPSCR
&fpscr
)
1624 uint32_t result
= fp32_add(op1
, op2
, 0, modeConv(fpscr
), &flags
);
1625 set_fpscr0(fpscr
, flags
);
1631 fplibAdd(uint64_t op1
, uint64_t op2
, FPSCR
&fpscr
)
1634 uint64_t result
= fp64_add(op1
, op2
, 0, modeConv(fpscr
), &flags
);
1635 set_fpscr0(fpscr
, flags
);
1641 fplibCompare(uint32_t op1
, uint32_t op2
, bool signal_nans
, FPSCR
&fpscr
)
1643 int mode
= modeConv(fpscr
);
1645 int sgn1
, exp1
, sgn2
, exp2
, result
;
1646 uint32_t mnt1
, mnt2
;
1648 fp32_unpack(&sgn1
, &exp1
, &mnt1
, op1
, mode
, &flags
);
1649 fp32_unpack(&sgn2
, &exp2
, &mnt2
, op2
, mode
, &flags
);
1651 if ((exp1
== 255 && (uint32_t)(mnt1
<< 9)) ||
1652 (exp2
== 255 && (uint32_t)(mnt2
<< 9))) {
1654 if ((exp1
== 255 && (uint32_t)(mnt1
<< 9) && !(mnt1
>> 22 & 1)) ||
1655 (exp2
== 255 && (uint32_t)(mnt2
<< 9) && !(mnt2
>> 22 & 1)) ||
1659 if (op1
== op2
|| (!mnt1
&& !mnt2
)) {
1661 } else if (sgn1
!= sgn2
) {
1662 result
= sgn1
? 8 : 2;
1663 } else if (exp1
!= exp2
) {
1664 result
= sgn1
^ (exp1
< exp2
) ? 8 : 2;
1666 result
= sgn1
^ (mnt1
< mnt2
) ? 8 : 2;
1670 set_fpscr0(fpscr
, flags
);
1677 fplibCompare(uint64_t op1
, uint64_t op2
, bool signal_nans
, FPSCR
&fpscr
)
1679 int mode
= modeConv(fpscr
);
1681 int sgn1
, exp1
, sgn2
, exp2
, result
;
1682 uint64_t mnt1
, mnt2
;
1684 fp64_unpack(&sgn1
, &exp1
, &mnt1
, op1
, mode
, &flags
);
1685 fp64_unpack(&sgn2
, &exp2
, &mnt2
, op2
, mode
, &flags
);
1687 if ((exp1
== 2047 && (uint64_t)(mnt1
<< 12)) ||
1688 (exp2
== 2047 && (uint64_t)(mnt2
<< 12))) {
1690 if ((exp1
== 2047 && (uint64_t)(mnt1
<< 12) && !(mnt1
>> 51 & 1)) ||
1691 (exp2
== 2047 && (uint64_t)(mnt2
<< 12) && !(mnt2
>> 51 & 1)) ||
1695 if (op1
== op2
|| (!mnt1
&& !mnt2
)) {
1697 } else if (sgn1
!= sgn2
) {
1698 result
= sgn1
? 8 : 2;
1699 } else if (exp1
!= exp2
) {
1700 result
= sgn1
^ (exp1
< exp2
) ? 8 : 2;
1702 result
= sgn1
^ (mnt1
< mnt2
) ? 8 : 2;
1706 set_fpscr0(fpscr
, flags
);
1712 fp16_FPConvertNaN_32(uint32_t op
)
1714 return fp16_pack(op
>> 31, 31, (uint16_t)1 << 9 | op
>> 13);
1718 fp16_FPConvertNaN_64(uint64_t op
)
1720 return fp16_pack(op
>> 63, 31, (uint16_t)1 << 9 | op
>> 42);
1724 fp32_FPConvertNaN_16(uint16_t op
)
1726 return fp32_pack(op
>> 15, 255, (uint32_t)1 << 22 | (uint32_t)op
<< 13);
1730 fp32_FPConvertNaN_64(uint64_t op
)
1732 return fp32_pack(op
>> 63, 255, (uint32_t)1 << 22 | op
>> 29);
1736 fp64_FPConvertNaN_16(uint16_t op
)
1738 return fp64_pack(op
>> 15, 2047, (uint64_t)1 << 51 | (uint64_t)op
<< 42);
1742 fp64_FPConvertNaN_32(uint32_t op
)
1744 return fp64_pack(op
>> 31, 2047, (uint64_t)1 << 51 | (uint64_t)op
<< 29);
1748 fp32_FPOnePointFive(int sgn
)
1750 return fp32_pack(sgn
, 127, (uint64_t)1 << 22);
1754 fp64_FPOnePointFive(int sgn
)
1756 return fp64_pack(sgn
, 1023, (uint64_t)1 << 51);
1760 fp32_FPThree(int sgn
)
1762 return fp32_pack(sgn
, 128, (uint64_t)1 << 22);
1766 fp64_FPThree(int sgn
)
1768 return fp64_pack(sgn
, 1024, (uint64_t)1 << 51);
1774 return fp32_pack(sgn
, 128, 0);
1780 return fp64_pack(sgn
, 1024, 0);
1785 fplibConvert(uint32_t op
, FPRounding rounding
, FPSCR
&fpscr
)
1787 int mode
= modeConv(fpscr
);
1793 // Unpack floating-point operand optionally with flush-to-zero:
1794 fp32_unpack(&sgn
, &exp
, &mnt
, op
, mode
, &flags
);
1796 bool alt_hp
= fpscr
.ahp
;
1798 if (exp
== 255 && (uint32_t)(mnt
<< 9)) {
1800 result
= fp16_zero(sgn
);
1801 } else if (fpscr
.dn
) {
1802 result
= fp16_defaultNaN();
1804 result
= fp16_FPConvertNaN_32(op
);
1806 if (!(mnt
>> 22 & 1) || alt_hp
) {
1809 } else if (exp
== 255) {
1811 result
= sgn
<< 15 | (uint16_t)0x7fff;
1814 result
= fp16_infinity(sgn
);
1817 result
= fp16_zero(sgn
);
1819 result
= fp16_round_(sgn
, exp
- 127 + 15,
1820 mnt
>> 7 | !!(uint32_t)(mnt
<< 25),
1821 rounding
, mode
| alt_hp
<< 4, &flags
);
1824 set_fpscr0(fpscr
, flags
);
1831 fplibConvert(uint64_t op
, FPRounding rounding
, FPSCR
&fpscr
)
1833 int mode
= modeConv(fpscr
);
1839 // Unpack floating-point operand optionally with flush-to-zero:
1840 fp64_unpack(&sgn
, &exp
, &mnt
, op
, mode
, &flags
);
1842 bool alt_hp
= fpscr
.ahp
;
1844 if (exp
== 2047 && (uint64_t)(mnt
<< 12)) {
1846 result
= fp16_zero(sgn
);
1847 } else if (fpscr
.dn
) {
1848 result
= fp16_defaultNaN();
1850 result
= fp16_FPConvertNaN_64(op
);
1852 if (!(mnt
>> 51 & 1) || alt_hp
) {
1855 } else if (exp
== 2047) {
1857 result
= sgn
<< 15 | (uint16_t)0x7fff;
1860 result
= fp16_infinity(sgn
);
1863 result
= fp16_zero(sgn
);
1865 result
= fp16_round_(sgn
, exp
- 1023 + 15,
1866 mnt
>> 36 | !!(uint64_t)(mnt
<< 28),
1867 rounding
, mode
| alt_hp
<< 4, &flags
);
1870 set_fpscr0(fpscr
, flags
);
1877 fplibConvert(uint16_t op
, FPRounding rounding
, FPSCR
&fpscr
)
1879 int mode
= modeConv(fpscr
);
1885 // Unpack floating-point operand optionally with flush-to-zero:
1886 fp16_unpack(&sgn
, &exp
, &mnt
, op
, mode
, &flags
);
1888 if (exp
== 31 && !fpscr
.ahp
&& (uint16_t)(mnt
<< 6)) {
1890 result
= fp32_defaultNaN();
1892 result
= fp32_FPConvertNaN_16(op
);
1894 if (!(mnt
>> 9 & 1)) {
1897 } else if (exp
== 31 && !fpscr
.ahp
) {
1898 result
= fp32_infinity(sgn
);
1900 result
= fp32_zero(sgn
);
1902 mnt
= fp16_normalise(mnt
, &exp
);
1903 result
= fp32_pack(sgn
, exp
- 15 + 127 + 5, (uint32_t)mnt
<< 8);
1906 set_fpscr0(fpscr
, flags
);
1913 fplibConvert(uint64_t op
, FPRounding rounding
, FPSCR
&fpscr
)
1915 int mode
= modeConv(fpscr
);
1921 // Unpack floating-point operand optionally with flush-to-zero:
1922 fp64_unpack(&sgn
, &exp
, &mnt
, op
, mode
, &flags
);
1924 if (exp
== 2047 && (uint64_t)(mnt
<< 12)) {
1926 result
= fp32_defaultNaN();
1928 result
= fp32_FPConvertNaN_64(op
);
1930 if (!(mnt
>> 51 & 1)) {
1933 } else if (exp
== 2047) {
1934 result
= fp32_infinity(sgn
);
1936 result
= fp32_zero(sgn
);
1938 result
= fp32_round_(sgn
, exp
- 1023 + 127,
1939 mnt
>> 20 | !!(uint64_t)(mnt
<< 44),
1940 rounding
, mode
, &flags
);
1943 set_fpscr0(fpscr
, flags
);
1950 fplibConvert(uint16_t op
, FPRounding rounding
, FPSCR
&fpscr
)
1952 int mode
= modeConv(fpscr
);
1958 // Unpack floating-point operand optionally with flush-to-zero:
1959 fp16_unpack(&sgn
, &exp
, &mnt
, op
, mode
, &flags
);
1961 if (exp
== 31 && !fpscr
.ahp
&& (uint16_t)(mnt
<< 6)) {
1963 result
= fp64_defaultNaN();
1965 result
= fp64_FPConvertNaN_16(op
);
1967 if (!(mnt
>> 9 & 1)) {
1970 } else if (exp
== 31 && !fpscr
.ahp
) {
1971 result
= fp64_infinity(sgn
);
1973 result
= fp64_zero(sgn
);
1975 mnt
= fp16_normalise(mnt
, &exp
);
1976 result
= fp64_pack(sgn
, exp
- 15 + 1023 + 5, (uint64_t)mnt
<< 37);
1979 set_fpscr0(fpscr
, flags
);
1986 fplibConvert(uint32_t op
, FPRounding rounding
, FPSCR
&fpscr
)
1988 int mode
= modeConv(fpscr
);
1994 // Unpack floating-point operand optionally with flush-to-zero:
1995 fp32_unpack(&sgn
, &exp
, &mnt
, op
, mode
, &flags
);
1997 if (exp
== 255 && (uint32_t)(mnt
<< 9)) {
1999 result
= fp64_defaultNaN();
2001 result
= fp64_FPConvertNaN_32(op
);
2003 if (!(mnt
>> 22 & 1)) {
2006 } else if (exp
== 255) {
2007 result
= fp64_infinity(sgn
);
2009 result
= fp64_zero(sgn
);
2011 mnt
= fp32_normalise(mnt
, &exp
);
2012 result
= fp64_pack(sgn
, exp
- 127 + 1023 + 8, (uint64_t)mnt
<< 21);
2015 set_fpscr0(fpscr
, flags
);
2022 fplibMulAdd(uint32_t addend
, uint32_t op1
, uint32_t op2
, FPSCR
&fpscr
)
2025 uint32_t result
= fp32_muladd(addend
, op1
, op2
, 0, modeConv(fpscr
), &flags
);
2026 set_fpscr0(fpscr
, flags
);
2032 fplibMulAdd(uint64_t addend
, uint64_t op1
, uint64_t op2
, FPSCR
&fpscr
)
2035 uint64_t result
= fp64_muladd(addend
, op1
, op2
, 0, modeConv(fpscr
), &flags
);
2036 set_fpscr0(fpscr
, flags
);
2042 fplibDiv(uint32_t op1
, uint32_t op2
, FPSCR
&fpscr
)
2045 uint32_t result
= fp32_div(op1
, op2
, modeConv(fpscr
), &flags
);
2046 set_fpscr0(fpscr
, flags
);
2052 fplibDiv(uint64_t op1
, uint64_t op2
, FPSCR
&fpscr
)
2055 uint64_t result
= fp64_div(op1
, op2
, modeConv(fpscr
), &flags
);
2056 set_fpscr0(fpscr
, flags
);
2061 fp32_repack(int sgn
, int exp
, uint32_t mnt
)
2063 return fp32_pack(sgn
, mnt
>> 23 ? exp
: 0, mnt
);
2067 fp64_repack(int sgn
, int exp
, uint64_t mnt
)
2069 return fp64_pack(sgn
, mnt
>> 52 ? exp
: 0, mnt
);
2073 fp32_minmaxnum(uint32_t *op1
, uint32_t *op2
, int sgn
)
2075 // Treat a single quiet-NaN as +Infinity/-Infinity
2076 if (!((uint32_t)~(*op1
<< 1) >> 23) && (uint32_t)~(*op2
<< 1) >> 23)
2077 *op1
= fp32_infinity(sgn
);
2078 if (!((uint32_t)~(*op2
<< 1) >> 23) && (uint32_t)~(*op1
<< 1) >> 23)
2079 *op2
= fp32_infinity(sgn
);
2083 fp64_minmaxnum(uint64_t *op1
, uint64_t *op2
, int sgn
)
2085 // Treat a single quiet-NaN as +Infinity/-Infinity
2086 if (!((uint64_t)~(*op1
<< 1) >> 52) && (uint64_t)~(*op2
<< 1) >> 52)
2087 *op1
= fp64_infinity(sgn
);
2088 if (!((uint64_t)~(*op2
<< 1) >> 52) && (uint64_t)~(*op1
<< 1) >> 52)
2089 *op2
= fp64_infinity(sgn
);
2094 fplibMax(uint32_t op1
, uint32_t op2
, FPSCR
&fpscr
)
2096 int mode
= modeConv(fpscr
);
2098 int sgn1
, exp1
, sgn2
, exp2
;
2099 uint32_t mnt1
, mnt2
, x
, result
;
2101 fp32_unpack(&sgn1
, &exp1
, &mnt1
, op1
, mode
, &flags
);
2102 fp32_unpack(&sgn2
, &exp2
, &mnt2
, op2
, mode
, &flags
);
2104 if ((x
= fp32_process_NaNs(op1
, op2
, mode
, &flags
))) {
2107 result
= ((sgn1
!= sgn2
? sgn2
: sgn1
^ (op1
> op2
)) ?
2108 fp32_repack(sgn1
, exp1
, mnt1
) :
2109 fp32_repack(sgn2
, exp2
, mnt2
));
2111 set_fpscr0(fpscr
, flags
);
2117 fplibMax(uint64_t op1
, uint64_t op2
, FPSCR
&fpscr
)
2119 int mode
= modeConv(fpscr
);
2121 int sgn1
, exp1
, sgn2
, exp2
;
2122 uint64_t mnt1
, mnt2
, x
, result
;
2124 fp64_unpack(&sgn1
, &exp1
, &mnt1
, op1
, mode
, &flags
);
2125 fp64_unpack(&sgn2
, &exp2
, &mnt2
, op2
, mode
, &flags
);
2127 if ((x
= fp64_process_NaNs(op1
, op2
, mode
, &flags
))) {
2130 result
= ((sgn1
!= sgn2
? sgn2
: sgn1
^ (op1
> op2
)) ?
2131 fp64_repack(sgn1
, exp1
, mnt1
) :
2132 fp64_repack(sgn2
, exp2
, mnt2
));
2134 set_fpscr0(fpscr
, flags
);
2140 fplibMaxNum(uint32_t op1
, uint32_t op2
, FPSCR
&fpscr
)
2142 fp32_minmaxnum(&op1
, &op2
, 1);
2143 return fplibMax
<uint32_t>(op1
, op2
, fpscr
);
2148 fplibMaxNum(uint64_t op1
, uint64_t op2
, FPSCR
&fpscr
)
2150 fp64_minmaxnum(&op1
, &op2
, 1);
2151 return fplibMax
<uint64_t>(op1
, op2
, fpscr
);
2156 fplibMin(uint32_t op1
, uint32_t op2
, FPSCR
&fpscr
)
2158 int mode
= modeConv(fpscr
);
2160 int sgn1
, exp1
, sgn2
, exp2
;
2161 uint32_t mnt1
, mnt2
, x
, result
;
2163 fp32_unpack(&sgn1
, &exp1
, &mnt1
, op1
, mode
, &flags
);
2164 fp32_unpack(&sgn2
, &exp2
, &mnt2
, op2
, mode
, &flags
);
2166 if ((x
= fp32_process_NaNs(op1
, op2
, mode
, &flags
))) {
2169 result
= ((sgn1
!= sgn2
? sgn1
: sgn1
^ (op1
< op2
)) ?
2170 fp32_repack(sgn1
, exp1
, mnt1
) :
2171 fp32_repack(sgn2
, exp2
, mnt2
));
2173 set_fpscr0(fpscr
, flags
);
2179 fplibMin(uint64_t op1
, uint64_t op2
, FPSCR
&fpscr
)
2181 int mode
= modeConv(fpscr
);
2183 int sgn1
, exp1
, sgn2
, exp2
;
2184 uint64_t mnt1
, mnt2
, x
, result
;
2186 fp64_unpack(&sgn1
, &exp1
, &mnt1
, op1
, mode
, &flags
);
2187 fp64_unpack(&sgn2
, &exp2
, &mnt2
, op2
, mode
, &flags
);
2189 if ((x
= fp64_process_NaNs(op1
, op2
, mode
, &flags
))) {
2192 result
= ((sgn1
!= sgn2
? sgn1
: sgn1
^ (op1
< op2
)) ?
2193 fp64_repack(sgn1
, exp1
, mnt1
) :
2194 fp64_repack(sgn2
, exp2
, mnt2
));
2196 set_fpscr0(fpscr
, flags
);
2202 fplibMinNum(uint32_t op1
, uint32_t op2
, FPSCR
&fpscr
)
2204 fp32_minmaxnum(&op1
, &op2
, 0);
2205 return fplibMin
<uint32_t>(op1
, op2
, fpscr
);
2210 fplibMinNum(uint64_t op1
, uint64_t op2
, FPSCR
&fpscr
)
2212 fp64_minmaxnum(&op1
, &op2
, 0);
2213 return fplibMin
<uint64_t>(op1
, op2
, fpscr
);
2218 fplibMul(uint32_t op1
, uint32_t op2
, FPSCR
&fpscr
)
2221 uint32_t result
= fp32_mul(op1
, op2
, modeConv(fpscr
), &flags
);
2222 set_fpscr0(fpscr
, flags
);
2228 fplibMul(uint64_t op1
, uint64_t op2
, FPSCR
&fpscr
)
2231 uint64_t result
= fp64_mul(op1
, op2
, modeConv(fpscr
), &flags
);
2232 set_fpscr0(fpscr
, flags
);
2238 fplibMulX(uint32_t op1
, uint32_t op2
, FPSCR
&fpscr
)
2240 int mode
= modeConv(fpscr
);
2242 int sgn1
, exp1
, sgn2
, exp2
;
2243 uint32_t mnt1
, mnt2
, result
;
2245 fp32_unpack(&sgn1
, &exp1
, &mnt1
, op1
, mode
, &flags
);
2246 fp32_unpack(&sgn2
, &exp2
, &mnt2
, op2
, mode
, &flags
);
2248 result
= fp32_process_NaNs(op1
, op2
, mode
, &flags
);
2250 if ((exp1
== 255 && !mnt2
) || (exp2
== 255 && !mnt1
)) {
2251 result
= fp32_FPTwo(sgn1
^ sgn2
);
2252 } else if (exp1
== 255 || exp2
== 255) {
2253 result
= fp32_infinity(sgn1
^ sgn2
);
2254 } else if (!mnt1
|| !mnt2
) {
2255 result
= fp32_zero(sgn1
^ sgn2
);
2257 result
= fp32_mul(op1
, op2
, mode
, &flags
);
2261 set_fpscr0(fpscr
, flags
);
2268 fplibMulX(uint64_t op1
, uint64_t op2
, FPSCR
&fpscr
)
2270 int mode
= modeConv(fpscr
);
2272 int sgn1
, exp1
, sgn2
, exp2
;
2273 uint64_t mnt1
, mnt2
, result
;
2275 fp64_unpack(&sgn1
, &exp1
, &mnt1
, op1
, mode
, &flags
);
2276 fp64_unpack(&sgn2
, &exp2
, &mnt2
, op2
, mode
, &flags
);
2278 result
= fp64_process_NaNs(op1
, op2
, mode
, &flags
);
2280 if ((exp1
== 2047 && !mnt2
) || (exp2
== 2047 && !mnt1
)) {
2281 result
= fp64_FPTwo(sgn1
^ sgn2
);
2282 } else if (exp1
== 2047 || exp2
== 2047) {
2283 result
= fp64_infinity(sgn1
^ sgn2
);
2284 } else if (!mnt1
|| !mnt2
) {
2285 result
= fp64_zero(sgn1
^ sgn2
);
2287 result
= fp64_mul(op1
, op2
, mode
, &flags
);
2291 set_fpscr0(fpscr
, flags
);
2298 fplibNeg(uint32_t op
)
2300 return op
^ (uint32_t)1 << 31;
2305 fplibNeg(uint64_t op
)
2307 return op
^ (uint64_t)1 << 63;
2310 static const uint8_t recip_sqrt_estimate
[256] = {
2311 255, 253, 251, 249, 247, 245, 243, 242, 240, 238, 236, 234, 233, 231, 229, 228,
2312 226, 224, 223, 221, 219, 218, 216, 215, 213, 212, 210, 209, 207, 206, 204, 203,
2313 201, 200, 198, 197, 196, 194, 193, 192, 190, 189, 188, 186, 185, 184, 183, 181,
2314 180, 179, 178, 176, 175, 174, 173, 172, 170, 169, 168, 167, 166, 165, 164, 163,
2315 162, 160, 159, 158, 157, 156, 155, 154, 153, 152, 151, 150, 149, 148, 147, 146,
2316 145, 144, 143, 142, 141, 140, 140, 139, 138, 137, 136, 135, 134, 133, 132, 131,
2317 131, 130, 129, 128, 127, 126, 126, 125, 124, 123, 122, 121, 121, 120, 119, 118,
2318 118, 117, 116, 115, 114, 114, 113, 112, 111, 111, 110, 109, 109, 108, 107, 106,
2319 105, 104, 103, 101, 100, 99, 97, 96, 95, 93, 92, 91, 90, 88, 87, 86,
2320 85, 84, 82, 81, 80, 79, 78, 77, 76, 75, 74, 72, 71, 70, 69, 68,
2321 67, 66, 65, 64, 63, 62, 61, 60, 60, 59, 58, 57, 56, 55, 54, 53,
2322 52, 51, 51, 50, 49, 48, 47, 46, 46, 45, 44, 43, 42, 42, 41, 40,
2323 39, 38, 38, 37, 36, 35, 35, 34, 33, 33, 32, 31, 30, 30, 29, 28,
2324 28, 27, 26, 26, 25, 24, 24, 23, 22, 22, 21, 20, 20, 19, 19, 18,
2325 17, 17, 16, 16, 15, 14, 14, 13, 13, 12, 11, 11, 10, 10, 9, 9,
2326 8, 8, 7, 6, 6, 5, 5, 4, 4, 3, 3, 2, 2, 1, 1, 0
2331 fplibRSqrtEstimate(uint32_t op
, FPSCR
&fpscr
)
2333 int mode
= modeConv(fpscr
);
2336 uint32_t mnt
, result
;
2338 fp32_unpack(&sgn
, &exp
, &mnt
, op
, mode
, &flags
);
2340 if (exp
== 255 && (uint32_t)(mnt
<< 9)) {
2341 result
= fp32_process_NaN(op
, mode
, &flags
);
2343 result
= fp32_infinity(sgn
);
2346 result
= fp32_defaultNaN();
2348 } else if (exp
== 255) {
2349 result
= fp32_zero(0);
2352 mnt
= fp32_normalise(mnt
, &exp
);
2353 mnt
= recip_sqrt_estimate
[(~exp
& 1) << 7 | (mnt
>> 24 & 127)];
2354 result
= fp32_pack(0, (380 - exp
) >> 1, mnt
<< 15);
2357 set_fpscr0(fpscr
, flags
);
2364 fplibRSqrtEstimate(uint64_t op
, FPSCR
&fpscr
)
2366 int mode
= modeConv(fpscr
);
2369 uint64_t mnt
, result
;
2371 fp64_unpack(&sgn
, &exp
, &mnt
, op
, mode
, &flags
);
2373 if (exp
== 2047 && (uint64_t)(mnt
<< 12)) {
2374 result
= fp64_process_NaN(op
, mode
, &flags
);
2376 result
= fp64_infinity(sgn
);
2379 result
= fp64_defaultNaN();
2381 } else if (exp
== 2047) {
2382 result
= fp32_zero(0);
2385 mnt
= fp64_normalise(mnt
, &exp
);
2386 mnt
= recip_sqrt_estimate
[(~exp
& 1) << 7 | (mnt
>> 56 & 127)];
2387 result
= fp64_pack(0, (3068 - exp
) >> 1, mnt
<< 44);
2390 set_fpscr0(fpscr
, flags
);
2397 fplibRSqrtStepFused(uint32_t op1
, uint32_t op2
, FPSCR
&fpscr
)
2399 int mode
= modeConv(fpscr
);
2401 int sgn1
, exp1
, sgn2
, exp2
;
2402 uint32_t mnt1
, mnt2
, result
;
2404 op1
= fplibNeg
<uint32_t>(op1
);
2405 fp32_unpack(&sgn1
, &exp1
, &mnt1
, op1
, mode
, &flags
);
2406 fp32_unpack(&sgn2
, &exp2
, &mnt2
, op2
, mode
, &flags
);
2408 result
= fp32_process_NaNs(op1
, op2
, mode
, &flags
);
2410 if ((exp1
== 255 && !mnt2
) || (exp2
== 255 && !mnt1
)) {
2411 result
= fp32_FPOnePointFive(0);
2412 } else if (exp1
== 255 || exp2
== 255) {
2413 result
= fp32_infinity(sgn1
^ sgn2
);
2415 result
= fp32_muladd(fp32_FPThree(0), op1
, op2
, -1, mode
, &flags
);
2419 set_fpscr0(fpscr
, flags
);
2426 fplibRSqrtStepFused(uint64_t op1
, uint64_t op2
, FPSCR
&fpscr
)
2428 int mode
= modeConv(fpscr
);
2430 int sgn1
, exp1
, sgn2
, exp2
;
2431 uint64_t mnt1
, mnt2
, result
;
2433 op1
= fplibNeg
<uint64_t>(op1
);
2434 fp64_unpack(&sgn1
, &exp1
, &mnt1
, op1
, mode
, &flags
);
2435 fp64_unpack(&sgn2
, &exp2
, &mnt2
, op2
, mode
, &flags
);
2437 result
= fp64_process_NaNs(op1
, op2
, mode
, &flags
);
2439 if ((exp1
== 2047 && !mnt2
) || (exp2
== 2047 && !mnt1
)) {
2440 result
= fp64_FPOnePointFive(0);
2441 } else if (exp1
== 2047 || exp2
== 2047) {
2442 result
= fp64_infinity(sgn1
^ sgn2
);
2444 result
= fp64_muladd(fp64_FPThree(0), op1
, op2
, -1, mode
, &flags
);
2448 set_fpscr0(fpscr
, flags
);
2455 fplibRecipStepFused(uint32_t op1
, uint32_t op2
, FPSCR
&fpscr
)
2457 int mode
= modeConv(fpscr
);
2459 int sgn1
, exp1
, sgn2
, exp2
;
2460 uint32_t mnt1
, mnt2
, result
;
2462 op1
= fplibNeg
<uint32_t>(op1
);
2463 fp32_unpack(&sgn1
, &exp1
, &mnt1
, op1
, mode
, &flags
);
2464 fp32_unpack(&sgn2
, &exp2
, &mnt2
, op2
, mode
, &flags
);
2466 result
= fp32_process_NaNs(op1
, op2
, mode
, &flags
);
2468 if ((exp1
== 255 && !mnt2
) || (exp2
== 255 && !mnt1
)) {
2469 result
= fp32_FPTwo(0);
2470 } else if (exp1
== 255 || exp2
== 255) {
2471 result
= fp32_infinity(sgn1
^ sgn2
);
2473 result
= fp32_muladd(fp32_FPTwo(0), op1
, op2
, 0, mode
, &flags
);
2477 set_fpscr0(fpscr
, flags
);
2484 fplibRecipEstimate(uint32_t op
, FPSCR
&fpscr
)
2486 int mode
= modeConv(fpscr
);
2489 uint32_t mnt
, result
;
2491 fp32_unpack(&sgn
, &exp
, &mnt
, op
, mode
, &flags
);
2493 if (exp
== 255 && (uint32_t)(mnt
<< 9)) {
2494 result
= fp32_process_NaN(op
, mode
, &flags
);
2495 } else if (exp
== 255) {
2496 result
= fp32_zero(sgn
);
2498 result
= fp32_infinity(sgn
);
2500 } else if (!((uint32_t)(op
<< 1) >> 22)) {
2501 bool overflow_to_inf
= false;
2502 switch (FPCRRounding(fpscr
)) {
2503 case FPRounding_TIEEVEN
:
2504 overflow_to_inf
= true;
2506 case FPRounding_POSINF
:
2507 overflow_to_inf
= !sgn
;
2509 case FPRounding_NEGINF
:
2510 overflow_to_inf
= sgn
;
2512 case FPRounding_ZERO
:
2513 overflow_to_inf
= false;
2518 result
= overflow_to_inf
? fp32_infinity(sgn
) : fp32_max_normal(sgn
);
2519 flags
|= FPLIB_OFC
| FPLIB_IXC
;
2520 } else if (fpscr
.fz
&& exp
>= 253) {
2521 result
= fp32_zero(sgn
);
2525 mnt
= fp32_normalise(mnt
, &exp
);
2526 int result_exp
= 253 - exp
;
2527 uint32_t fraction
= (((uint32_t)1 << 19) / (mnt
>> 22 | 1) + 1) >> 1;
2529 if (result_exp
== 0) {
2531 } else if (result_exp
== -1) {
2535 result
= fp32_pack(sgn
, result_exp
, fraction
);
2538 set_fpscr0(fpscr
, flags
);
2545 fplibRecipEstimate(uint64_t op
, FPSCR
&fpscr
)
2547 int mode
= modeConv(fpscr
);
2550 uint64_t mnt
, result
;
2552 fp64_unpack(&sgn
, &exp
, &mnt
, op
, mode
, &flags
);
2554 if (exp
== 2047 && (uint64_t)(mnt
<< 12)) {
2555 result
= fp64_process_NaN(op
, mode
, &flags
);
2556 } else if (exp
== 2047) {
2557 result
= fp64_zero(sgn
);
2559 result
= fp64_infinity(sgn
);
2561 } else if (!((uint64_t)(op
<< 1) >> 51)) {
2562 bool overflow_to_inf
= false;
2563 switch (FPCRRounding(fpscr
)) {
2564 case FPRounding_TIEEVEN
:
2565 overflow_to_inf
= true;
2567 case FPRounding_POSINF
:
2568 overflow_to_inf
= !sgn
;
2570 case FPRounding_NEGINF
:
2571 overflow_to_inf
= sgn
;
2573 case FPRounding_ZERO
:
2574 overflow_to_inf
= false;
2579 result
= overflow_to_inf
? fp64_infinity(sgn
) : fp64_max_normal(sgn
);
2580 flags
|= FPLIB_OFC
| FPLIB_IXC
;
2581 } else if (fpscr
.fz
&& exp
>= 2045) {
2582 result
= fp64_zero(sgn
);
2586 mnt
= fp64_normalise(mnt
, &exp
);
2587 int result_exp
= 2045 - exp
;
2588 uint64_t fraction
= (((uint32_t)1 << 19) / (mnt
>> 54 | 1) + 1) >> 1;
2590 if (result_exp
== 0) {
2592 } else if (result_exp
== -1) {
2596 result
= fp64_pack(sgn
, result_exp
, fraction
);
2599 set_fpscr0(fpscr
, flags
);
2606 fplibRecipStepFused(uint64_t op1
, uint64_t op2
, FPSCR
&fpscr
)
2608 int mode
= modeConv(fpscr
);
2610 int sgn1
, exp1
, sgn2
, exp2
;
2611 uint64_t mnt1
, mnt2
, result
;
2613 op1
= fplibNeg
<uint64_t>(op1
);
2614 fp64_unpack(&sgn1
, &exp1
, &mnt1
, op1
, mode
, &flags
);
2615 fp64_unpack(&sgn2
, &exp2
, &mnt2
, op2
, mode
, &flags
);
2617 result
= fp64_process_NaNs(op1
, op2
, mode
, &flags
);
2619 if ((exp1
== 2047 && !mnt2
) || (exp2
== 2047 && !mnt1
)) {
2620 result
= fp64_FPTwo(0);
2621 } else if (exp1
== 2047 || exp2
== 2047) {
2622 result
= fp64_infinity(sgn1
^ sgn2
);
2624 result
= fp64_muladd(fp64_FPTwo(0), op1
, op2
, 0, mode
, &flags
);
2628 set_fpscr0(fpscr
, flags
);
2635 fplibRecpX(uint32_t op
, FPSCR
&fpscr
)
2637 int mode
= modeConv(fpscr
);
2640 uint32_t mnt
, result
;
2642 fp32_unpack(&sgn
, &exp
, &mnt
, op
, mode
, &flags
);
2644 if (exp
== 255 && (uint32_t)(mnt
<< 9)) {
2645 result
= fp32_process_NaN(op
, mode
, &flags
);
2648 if (!mnt
) { // Zero and denormals
2649 result
= fp32_pack(sgn
, 254, 0);
2650 } else { // Infinities and normals
2651 result
= fp32_pack(sgn
, exp
^ 255, 0);
2655 set_fpscr0(fpscr
, flags
);
2662 fplibRecpX(uint64_t op
, FPSCR
&fpscr
)
2664 int mode
= modeConv(fpscr
);
2667 uint64_t mnt
, result
;
2669 fp64_unpack(&sgn
, &exp
, &mnt
, op
, mode
, &flags
);
2671 if (exp
== 2047 && (uint64_t)(mnt
<< 12)) {
2672 result
= fp64_process_NaN(op
, mode
, &flags
);
2675 if (!mnt
) { // Zero and denormals
2676 result
= fp64_pack(sgn
, 2046, 0);
2677 } else { // Infinities and normals
2678 result
= fp64_pack(sgn
, exp
^ 2047, 0);
2682 set_fpscr0(fpscr
, flags
);
2689 fplibRoundInt(uint32_t op
, FPRounding rounding
, bool exact
, FPSCR
&fpscr
)
2691 int mode
= modeConv(fpscr
);
2694 uint32_t mnt
, result
;
2696 // Unpack using FPCR to determine if subnormals are flushed-to-zero:
2697 fp32_unpack(&sgn
, &exp
, &mnt
, op
, mode
, &flags
);
2699 // Handle NaNs, infinities and zeroes:
2700 if (exp
== 255 && (uint32_t)(mnt
<< 9)) {
2701 result
= fp32_process_NaN(op
, mode
, &flags
);
2702 } else if (exp
== 255) {
2703 result
= fp32_infinity(sgn
);
2705 result
= fp32_zero(sgn
);
2706 } else if (exp
>= 150) {
2707 // There are no fractional bits
2710 // Truncate towards zero:
2711 uint32_t x
= 150 - exp
>= 32 ? 0 : mnt
>> (150 - exp
);
2712 int err
= exp
< 118 ? 1 :
2713 (mnt
<< 1 >> (149 - exp
) & 3) | (mnt
<< 2 << (exp
- 118) != 0);
2715 case FPRounding_TIEEVEN
:
2716 x
+= (err
== 3 || (err
== 2 && (x
& 1)));
2718 case FPRounding_POSINF
:
2721 case FPRounding_NEGINF
:
2724 case FPRounding_ZERO
:
2726 case FPRounding_TIEAWAY
:
2734 result
= fp32_zero(sgn
);
2737 mnt
= fp32_normalise(x
, &exp
);
2738 result
= fp32_pack(sgn
, exp
+ 8, mnt
>> 8);
2745 set_fpscr0(fpscr
, flags
);
2752 fplibRoundInt(uint64_t op
, FPRounding rounding
, bool exact
, FPSCR
&fpscr
)
2754 int mode
= modeConv(fpscr
);
2757 uint64_t mnt
, result
;
2759 // Unpack using FPCR to determine if subnormals are flushed-to-zero:
2760 fp64_unpack(&sgn
, &exp
, &mnt
, op
, mode
, &flags
);
2762 // Handle NaNs, infinities and zeroes:
2763 if (exp
== 2047 && (uint64_t)(mnt
<< 12)) {
2764 result
= fp64_process_NaN(op
, mode
, &flags
);
2765 } else if (exp
== 2047) {
2766 result
= fp64_infinity(sgn
);
2768 result
= fp64_zero(sgn
);
2769 } else if (exp
>= 1075) {
2770 // There are no fractional bits
2773 // Truncate towards zero:
2774 uint64_t x
= 1075 - exp
>= 64 ? 0 : mnt
>> (1075 - exp
);
2775 int err
= exp
< 1011 ? 1 :
2776 (mnt
<< 1 >> (1074 - exp
) & 3) | (mnt
<< 2 << (exp
- 1011) != 0);
2778 case FPRounding_TIEEVEN
:
2779 x
+= (err
== 3 || (err
== 2 && (x
& 1)));
2781 case FPRounding_POSINF
:
2784 case FPRounding_NEGINF
:
2787 case FPRounding_ZERO
:
2789 case FPRounding_TIEAWAY
:
2797 result
= fp64_zero(sgn
);
2800 mnt
= fp64_normalise(x
, &exp
);
2801 result
= fp64_pack(sgn
, exp
+ 11, mnt
>> 11);
2808 set_fpscr0(fpscr
, flags
);
2815 fplibSqrt(uint32_t op
, FPSCR
&fpscr
)
2818 uint32_t result
= fp32_sqrt(op
, modeConv(fpscr
), &flags
);
2819 set_fpscr0(fpscr
, flags
);
2825 fplibSqrt(uint64_t op
, FPSCR
&fpscr
)
2828 uint64_t result
= fp64_sqrt(op
, modeConv(fpscr
), &flags
);
2829 set_fpscr0(fpscr
, flags
);
2835 fplibSub(uint32_t op1
, uint32_t op2
, FPSCR
&fpscr
)
2838 uint32_t result
= fp32_add(op1
, op2
, 1, modeConv(fpscr
), &flags
);
2839 set_fpscr0(fpscr
, flags
);
2845 fplibSub(uint64_t op1
, uint64_t op2
, FPSCR
&fpscr
)
2848 uint64_t result
= fp64_add(op1
, op2
, 1, modeConv(fpscr
), &flags
);
2849 set_fpscr0(fpscr
, flags
);
2854 FPToFixed_64(int sgn
, int exp
, uint64_t mnt
, bool u
, FPRounding rounding
,
2860 if (exp
> 1023 + 63) {
2862 return ((uint64_t)!u
<< 63) - !sgn
;
2865 x
= lsr64(mnt
<< 11, 1023 + 63 - exp
);
2866 err
= (exp
> 1023 + 63 - 2 ? 0 :
2867 (lsr64(mnt
<< 11, 1023 + 63 - 2 - exp
) & 3) |
2868 !!(mnt
<< 11 & (lsl64(1, 1023 + 63 - 2 - exp
) - 1)));
2871 case FPRounding_TIEEVEN
:
2872 x
+= (err
== 3 || (err
== 2 && (x
& 1)));
2874 case FPRounding_POSINF
:
2877 case FPRounding_NEGINF
:
2880 case FPRounding_ZERO
:
2882 case FPRounding_TIEAWAY
:
2889 if (u
? sgn
&& x
: x
> ((uint64_t)1 << 63) - !sgn
) {
2891 return ((uint64_t)!u
<< 63) - !sgn
;
2898 return sgn
? -x
: x
;
2902 FPToFixed_32(int sgn
, int exp
, uint64_t mnt
, bool u
, FPRounding rounding
,
2905 uint64_t x
= FPToFixed_64(sgn
, exp
, mnt
, u
, rounding
, flags
);
2906 if (u
? x
>= (uint64_t)1 << 32 :
2907 !(x
< (uint64_t)1 << 31 ||
2908 (uint64_t)-x
<= (uint64_t)1 << 31)) {
2910 x
= ((uint32_t)!u
<< 31) - !sgn
;
2917 fplibFPToFixed(uint32_t op
, int fbits
, bool u
, FPRounding rounding
, FPSCR
&fpscr
)
2921 uint32_t mnt
, result
;
2923 // Unpack using FPCR to determine if subnormals are flushed-to-zero:
2924 fp32_unpack(&sgn
, &exp
, &mnt
, op
, modeConv(fpscr
), &flags
);
2926 // If NaN, set cumulative flag or take exception:
2927 if (exp
== 255 && (uint32_t)(mnt
<< 9)) {
2931 result
= FPToFixed_32(sgn
, exp
+ 1023 - 127 + fbits
,
2932 (uint64_t)mnt
<< (52 - 23), u
, rounding
, &flags
);
2935 set_fpscr0(fpscr
, flags
);
2942 fplibFPToFixed(uint64_t op
, int fbits
, bool u
, FPRounding rounding
, FPSCR
&fpscr
)
2949 // Unpack using FPCR to determine if subnormals are flushed-to-zero:
2950 fp64_unpack(&sgn
, &exp
, &mnt
, op
, modeConv(fpscr
), &flags
);
2952 // If NaN, set cumulative flag or take exception:
2953 if (exp
== 2047 && (uint64_t)(mnt
<< 12)) {
2957 result
= FPToFixed_32(sgn
, exp
+ fbits
, mnt
, u
, rounding
, &flags
);
2960 set_fpscr0(fpscr
, flags
);
2967 fplibFPToFixed(uint32_t op
, int fbits
, bool u
, FPRounding rounding
, FPSCR
&fpscr
)
2974 // Unpack using FPCR to determine if subnormals are flushed-to-zero:
2975 fp32_unpack(&sgn
, &exp
, &mnt
, op
, modeConv(fpscr
), &flags
);
2977 // If NaN, set cumulative flag or take exception:
2978 if (exp
== 255 && (uint32_t)(mnt
<< 9)) {
2982 result
= FPToFixed_64(sgn
, exp
+ 1023 - 127 + fbits
,
2983 (uint64_t)mnt
<< (52 - 23), u
, rounding
, &flags
);
2986 set_fpscr0(fpscr
, flags
);
2993 fplibFPToFixed(uint64_t op
, int fbits
, bool u
, FPRounding rounding
, FPSCR
&fpscr
)
2997 uint64_t mnt
, result
;
2999 // Unpack using FPCR to determine if subnormals are flushed-to-zero:
3000 fp64_unpack(&sgn
, &exp
, &mnt
, op
, modeConv(fpscr
), &flags
);
3002 // If NaN, set cumulative flag or take exception:
3003 if (exp
== 2047 && (uint64_t)(mnt
<< 12)) {
3007 result
= FPToFixed_64(sgn
, exp
+ fbits
, mnt
, u
, rounding
, &flags
);
3010 set_fpscr0(fpscr
, flags
);
3016 fp32_cvtf(uint64_t a
, int fbits
, int u
, int mode
, int *flags
)
3018 int x_sgn
= !u
&& a
>> 63;
3019 int x_exp
= 190 - fbits
;
3020 uint64_t x_mnt
= x_sgn
? -a
: a
;
3024 return fp32_zero(0);
3027 // Normalise and convert to 32 bits, collapsing error into bottom bit:
3028 x_mnt
= fp64_normalise(x_mnt
, &x_exp
);
3029 x_mnt
= x_mnt
>> 31 | !!(uint32_t)(x_mnt
<< 1);
3031 return fp32_round(x_sgn
, x_exp
, x_mnt
, mode
, flags
);
3035 fp64_cvtf(uint64_t a
, int fbits
, int u
, int mode
, int *flags
)
3037 int x_sgn
= !u
&& a
>> 63;
3038 int x_exp
= 1024 + 62 - fbits
;
3039 uint64_t x_mnt
= x_sgn
? -a
: a
;
3043 return fp64_zero(0);
3046 x_mnt
= fp64_normalise(x_mnt
, &x_exp
);
3048 return fp64_round(x_sgn
, x_exp
, x_mnt
<< 1, mode
, flags
);
3053 fplibFixedToFP(uint64_t op
, int fbits
, bool u
, FPRounding rounding
, FPSCR
&fpscr
)
3056 uint32_t res
= fp32_cvtf(op
, fbits
, u
,
3057 (int)rounding
| ((uint32_t)fpscr
>> 22 & 12),
3059 set_fpscr0(fpscr
, flags
);
3065 fplibFixedToFP(uint64_t op
, int fbits
, bool u
, FPRounding rounding
, FPSCR
&fpscr
)
3068 uint64_t res
= fp64_cvtf(op
, fbits
, u
,
3069 (int)rounding
| ((uint32_t)fpscr
>> 22 & 12),
3071 set_fpscr0(fpscr
, flags
);