stats: update stats for mmap() change.
[gem5.git] / src / arch / arm / insts / fplib.cc
1 /*
2 * Copyright (c) 2012-2013 ARM Limited
3 * All rights reserved
4 *
5 * The license below extends only to copyright in the software and shall
6 * not be construed as granting a license to any other intellectual
7 * property including but not limited to intellectual property relating
8 * to a hardware implementation of the functionality of the software
9 * licensed hereunder. You may use the software subject to the license
10 * terms below provided that you ensure that this notice is replicated
11 * unmodified and in its entirety in all distributions of the software,
12 * modified or unmodified, in source code or in binary form.
13 *
14 * Redistribution and use in source and binary forms, with or without
15 * modification, are permitted provided that the following conditions are
16 * met: redistributions of source code must retain the above copyright
17 * notice, this list of conditions and the following disclaimer;
18 * redistributions in binary form must reproduce the above copyright
19 * notice, this list of conditions and the following disclaimer in the
20 * documentation and/or other materials provided with the distribution;
21 * neither the name of the copyright holders nor the names of its
22 * contributors may be used to endorse or promote products derived from
23 * this software without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
26 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
27 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
28 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
29 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
30 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
31 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
32 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
33 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
34 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
35 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 *
37 * Authors: Edmund Grimley Evans
38 * Thomas Grocutt
39 */
40
41 #include <stdint.h>
42
43 #include <cassert>
44
45 #include "fplib.hh"
46
47 namespace ArmISA
48 {
49
50 #define FPLIB_RN 0
51 #define FPLIB_RP 1
52 #define FPLIB_RM 2
53 #define FPLIB_RZ 3
54 #define FPLIB_FZ 4
55 #define FPLIB_DN 8
56 #define FPLIB_AHP 16
57
58 #define FPLIB_IDC 128 // Input Denormal
59 #define FPLIB_IXC 16 // Inexact
60 #define FPLIB_UFC 8 // Underflow
61 #define FPLIB_OFC 4 // Overflow
62 #define FPLIB_DZC 2 // Division by Zero
63 #define FPLIB_IOC 1 // Invalid Operation
64
65 static inline uint16_t
66 lsl16(uint16_t x, uint32_t shift)
67 {
68 return shift < 16 ? x << shift : 0;
69 }
70
71 static inline uint16_t
72 lsr16(uint16_t x, uint32_t shift)
73 {
74 return shift < 16 ? x >> shift : 0;
75 }
76
77 static inline uint32_t
78 lsl32(uint32_t x, uint32_t shift)
79 {
80 return shift < 32 ? x << shift : 0;
81 }
82
83 static inline uint32_t
84 lsr32(uint32_t x, uint32_t shift)
85 {
86 return shift < 32 ? x >> shift : 0;
87 }
88
89 static inline uint64_t
90 lsl64(uint64_t x, uint32_t shift)
91 {
92 return shift < 64 ? x << shift : 0;
93 }
94
95 static inline uint64_t
96 lsr64(uint64_t x, uint32_t shift)
97 {
98 return shift < 64 ? x >> shift : 0;
99 }
100
101 static inline void
102 lsl128(uint64_t *r0, uint64_t *r1, uint64_t x0, uint64_t x1, uint32_t shift)
103 {
104 if (shift == 0) {
105 *r1 = x1;
106 *r0 = x0;
107 } else if (shift < 64) {
108 *r1 = x1 << shift | x0 >> (64 - shift);
109 *r0 = x0 << shift;
110 } else if (shift < 128) {
111 *r1 = x0 << (shift - 64);
112 *r0 = 0;
113 } else {
114 *r1 = 0;
115 *r0 = 0;
116 }
117 }
118
119 static inline void
120 lsr128(uint64_t *r0, uint64_t *r1, uint64_t x0, uint64_t x1, uint32_t shift)
121 {
122 if (shift == 0) {
123 *r1 = x1;
124 *r0 = x0;
125 } else if (shift < 64) {
126 *r0 = x0 >> shift | x1 << (64 - shift);
127 *r1 = x1 >> shift;
128 } else if (shift < 128) {
129 *r0 = x1 >> (shift - 64);
130 *r1 = 0;
131 } else {
132 *r0 = 0;
133 *r1 = 0;
134 }
135 }
136
137 static inline void
138 mul62x62(uint64_t *x0, uint64_t *x1, uint64_t a, uint64_t b)
139 {
140 uint32_t mask = ((uint32_t)1 << 31) - 1;
141 uint64_t a0 = a & mask;
142 uint64_t a1 = a >> 31 & mask;
143 uint64_t b0 = b & mask;
144 uint64_t b1 = b >> 31 & mask;
145 uint64_t p0 = a0 * b0;
146 uint64_t p2 = a1 * b1;
147 uint64_t p1 = (a0 + a1) * (b0 + b1) - p0 - p2;
148 uint64_t s0 = p0;
149 uint64_t s1 = (s0 >> 31) + p1;
150 uint64_t s2 = (s1 >> 31) + p2;
151 *x0 = (s0 & mask) | (s1 & mask) << 31 | s2 << 62;
152 *x1 = s2 >> 2;
153 }
154
155 static inline
156 void mul64x32(uint64_t *x0, uint64_t *x1, uint64_t a, uint32_t b)
157 {
158 uint64_t t0 = (uint64_t)(uint32_t)a * b;
159 uint64_t t1 = (t0 >> 32) + (a >> 32) * b;
160 *x0 = t1 << 32 | (uint32_t)t0;
161 *x1 = t1 >> 32;
162 }
163
164 static inline void
165 add128(uint64_t *x0, uint64_t *x1, uint64_t a0, uint64_t a1, uint64_t b0,
166 uint64_t b1)
167 {
168 *x0 = a0 + b0;
169 *x1 = a1 + b1 + (*x0 < a0);
170 }
171
172 static inline void
173 sub128(uint64_t *x0, uint64_t *x1, uint64_t a0, uint64_t a1, uint64_t b0,
174 uint64_t b1)
175 {
176 *x0 = a0 - b0;
177 *x1 = a1 - b1 - (*x0 > a0);
178 }
179
180 static inline int
181 cmp128(uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1)
182 {
183 return (a1 < b1 ? -1 : a1 > b1 ? 1 : a0 < b0 ? -1 : a0 > b0 ? 1 : 0);
184 }
185
186 static inline uint16_t
187 fp16_normalise(uint16_t mnt, int *exp)
188 {
189 int shift;
190
191 if (!mnt) {
192 return 0;
193 }
194
195 for (shift = 8; shift; shift >>= 1) {
196 if (!(mnt >> (16 - shift))) {
197 mnt <<= shift;
198 *exp -= shift;
199 }
200 }
201 return mnt;
202 }
203
204 static inline uint32_t
205 fp32_normalise(uint32_t mnt, int *exp)
206 {
207 int shift;
208
209 if (!mnt) {
210 return 0;
211 }
212
213 for (shift = 16; shift; shift >>= 1) {
214 if (!(mnt >> (32 - shift))) {
215 mnt <<= shift;
216 *exp -= shift;
217 }
218 }
219 return mnt;
220 }
221
222 static inline uint64_t
223 fp64_normalise(uint64_t mnt, int *exp)
224 {
225 int shift;
226
227 if (!mnt) {
228 return 0;
229 }
230
231 for (shift = 32; shift; shift >>= 1) {
232 if (!(mnt >> (64 - shift))) {
233 mnt <<= shift;
234 *exp -= shift;
235 }
236 }
237 return mnt;
238 }
239
240 static inline void
241 fp128_normalise(uint64_t *mnt0, uint64_t *mnt1, int *exp)
242 {
243 uint64_t x0 = *mnt0;
244 uint64_t x1 = *mnt1;
245 int shift;
246
247 if (!x0 && !x1) {
248 return;
249 }
250
251 if (!x1) {
252 x1 = x0;
253 x0 = 0;
254 *exp -= 64;
255 }
256
257 for (shift = 32; shift; shift >>= 1) {
258 if (!(x1 >> (64 - shift))) {
259 x1 = x1 << shift | x0 >> (64 - shift);
260 x0 <<= shift;
261 *exp -= shift;
262 }
263 }
264
265 *mnt0 = x0;
266 *mnt1 = x1;
267 }
268
269 static inline uint16_t
270 fp16_pack(uint16_t sgn, uint16_t exp, uint16_t mnt)
271 {
272 return sgn << 15 | exp << 10 | (mnt & (((uint16_t)1 << 10) - 1));
273 }
274
275 static inline uint32_t
276 fp32_pack(uint32_t sgn, uint32_t exp, uint32_t mnt)
277 {
278 return sgn << 31 | exp << 23 | (mnt & (((uint32_t)1 << 23) - 1));
279 }
280
281 static inline uint64_t
282 fp64_pack(uint64_t sgn, uint64_t exp, uint64_t mnt)
283 {
284 return (uint64_t)sgn << 63 | exp << 52 | (mnt & (((uint64_t)1 << 52) - 1));
285 }
286
287 static inline uint16_t
288 fp16_zero(int sgn)
289 {
290 return fp16_pack(sgn, 0, 0);
291 }
292
293 static inline uint32_t
294 fp32_zero(int sgn)
295 {
296 return fp32_pack(sgn, 0, 0);
297 }
298
299 static inline uint64_t
300 fp64_zero(int sgn)
301 {
302 return fp64_pack(sgn, 0, 0);
303 }
304
305 static inline uint16_t
306 fp16_max_normal(int sgn)
307 {
308 return fp16_pack(sgn, 30, -1);
309 }
310
311 static inline uint32_t
312 fp32_max_normal(int sgn)
313 {
314 return fp32_pack(sgn, 254, -1);
315 }
316
317 static inline uint64_t
318 fp64_max_normal(int sgn)
319 {
320 return fp64_pack(sgn, 2046, -1);
321 }
322
323 static inline uint16_t
324 fp16_infinity(int sgn)
325 {
326 return fp16_pack(sgn, 31, 0);
327 }
328
329 static inline uint32_t
330 fp32_infinity(int sgn)
331 {
332 return fp32_pack(sgn, 255, 0);
333 }
334
335 static inline uint64_t
336 fp64_infinity(int sgn)
337 {
338 return fp64_pack(sgn, 2047, 0);
339 }
340
341 static inline uint16_t
342 fp16_defaultNaN()
343 {
344 return fp16_pack(0, 31, (uint16_t)1 << 9);
345 }
346
347 static inline uint32_t
348 fp32_defaultNaN()
349 {
350 return fp32_pack(0, 255, (uint32_t)1 << 22);
351 }
352
353 static inline uint64_t
354 fp64_defaultNaN()
355 {
356 return fp64_pack(0, 2047, (uint64_t)1 << 51);
357 }
358
359 static inline void
360 fp16_unpack(int *sgn, int *exp, uint16_t *mnt, uint16_t x, int mode,
361 int *flags)
362 {
363 *sgn = x >> 15;
364 *exp = x >> 10 & 31;
365 *mnt = x & (((uint16_t)1 << 10) - 1);
366
367 // Handle subnormals:
368 if (*exp) {
369 *mnt |= (uint16_t)1 << 10;
370 } else {
371 ++*exp;
372 // There is no flush to zero in this case!
373 }
374 }
375
376 static inline void
377 fp32_unpack(int *sgn, int *exp, uint32_t *mnt, uint32_t x, int mode,
378 int *flags)
379 {
380 *sgn = x >> 31;
381 *exp = x >> 23 & 255;
382 *mnt = x & (((uint32_t)1 << 23) - 1);
383
384 // Handle subnormals:
385 if (*exp) {
386 *mnt |= (uint32_t)1 << 23;
387 } else {
388 ++*exp;
389 if ((mode & FPLIB_FZ) && *mnt) {
390 *flags |= FPLIB_IDC;
391 *mnt = 0;
392 }
393 }
394 }
395
396 static inline void
397 fp64_unpack(int *sgn, int *exp, uint64_t *mnt, uint64_t x, int mode,
398 int *flags)
399 {
400 *sgn = x >> 63;
401 *exp = x >> 52 & 2047;
402 *mnt = x & (((uint64_t)1 << 52) - 1);
403
404 // Handle subnormals:
405 if (*exp) {
406 *mnt |= (uint64_t)1 << 52;
407 } else {
408 ++*exp;
409 if ((mode & FPLIB_FZ) && *mnt) {
410 *flags |= FPLIB_IDC;
411 *mnt = 0;
412 }
413 }
414 }
415
416 static inline uint32_t
417 fp32_process_NaN(uint32_t a, int mode, int *flags)
418 {
419 if (!(a >> 22 & 1)) {
420 *flags |= FPLIB_IOC;
421 a |= (uint32_t)1 << 22;
422 }
423 return mode & FPLIB_DN ? fp32_defaultNaN() : a;
424 }
425
426 static inline uint64_t
427 fp64_process_NaN(uint64_t a, int mode, int *flags)
428 {
429 if (!(a >> 51 & 1)) {
430 *flags |= FPLIB_IOC;
431 a |= (uint64_t)1 << 51;
432 }
433 return mode & FPLIB_DN ? fp64_defaultNaN() : a;
434 }
435
436 static uint32_t
437 fp32_process_NaNs(uint32_t a, uint32_t b, int mode, int *flags)
438 {
439 int a_exp = a >> 23 & 255;
440 uint32_t a_mnt = a & (((uint32_t)1 << 23) - 1);
441 int b_exp = b >> 23 & 255;
442 uint32_t b_mnt = b & (((uint32_t)1 << 23) - 1);
443
444 // Handle signalling NaNs:
445 if (a_exp == 255 && a_mnt && !(a_mnt >> 22 & 1))
446 return fp32_process_NaN(a, mode, flags);
447 if (b_exp == 255 && b_mnt && !(b_mnt >> 22 & 1))
448 return fp32_process_NaN(b, mode, flags);
449
450 // Handle quiet NaNs:
451 if (a_exp == 255 && a_mnt)
452 return fp32_process_NaN(a, mode, flags);
453 if (b_exp == 255 && b_mnt)
454 return fp32_process_NaN(b, mode, flags);
455
456 return 0;
457 }
458
459 static uint64_t
460 fp64_process_NaNs(uint64_t a, uint64_t b, int mode, int *flags)
461 {
462 int a_exp = a >> 52 & 2047;
463 uint64_t a_mnt = a & (((uint64_t)1 << 52) - 1);
464 int b_exp = b >> 52 & 2047;
465 uint64_t b_mnt = b & (((uint64_t)1 << 52) - 1);
466
467 // Handle signalling NaNs:
468 if (a_exp == 2047 && a_mnt && !(a_mnt >> 51 & 1))
469 return fp64_process_NaN(a, mode, flags);
470 if (b_exp == 2047 && b_mnt && !(b_mnt >> 51 & 1))
471 return fp64_process_NaN(b, mode, flags);
472
473 // Handle quiet NaNs:
474 if (a_exp == 2047 && a_mnt)
475 return fp64_process_NaN(a, mode, flags);
476 if (b_exp == 2047 && b_mnt)
477 return fp64_process_NaN(b, mode, flags);
478
479 return 0;
480 }
481
482 static uint32_t
483 fp32_process_NaNs3(uint32_t a, uint32_t b, uint32_t c, int mode, int *flags)
484 {
485 int a_exp = a >> 23 & 255;
486 uint32_t a_mnt = a & (((uint32_t)1 << 23) - 1);
487 int b_exp = b >> 23 & 255;
488 uint32_t b_mnt = b & (((uint32_t)1 << 23) - 1);
489 int c_exp = c >> 23 & 255;
490 uint32_t c_mnt = c & (((uint32_t)1 << 23) - 1);
491
492 // Handle signalling NaNs:
493 if (a_exp == 255 && a_mnt && !(a_mnt >> 22 & 1))
494 return fp32_process_NaN(a, mode, flags);
495 if (b_exp == 255 && b_mnt && !(b_mnt >> 22 & 1))
496 return fp32_process_NaN(b, mode, flags);
497 if (c_exp == 255 && c_mnt && !(c_mnt >> 22 & 1))
498 return fp32_process_NaN(c, mode, flags);
499
500 // Handle quiet NaNs:
501 if (a_exp == 255 && a_mnt)
502 return fp32_process_NaN(a, mode, flags);
503 if (b_exp == 255 && b_mnt)
504 return fp32_process_NaN(b, mode, flags);
505 if (c_exp == 255 && c_mnt)
506 return fp32_process_NaN(c, mode, flags);
507
508 return 0;
509 }
510
511 static uint64_t
512 fp64_process_NaNs3(uint64_t a, uint64_t b, uint64_t c, int mode, int *flags)
513 {
514 int a_exp = a >> 52 & 2047;
515 uint64_t a_mnt = a & (((uint64_t)1 << 52) - 1);
516 int b_exp = b >> 52 & 2047;
517 uint64_t b_mnt = b & (((uint64_t)1 << 52) - 1);
518 int c_exp = c >> 52 & 2047;
519 uint64_t c_mnt = c & (((uint64_t)1 << 52) - 1);
520
521 // Handle signalling NaNs:
522 if (a_exp == 2047 && a_mnt && !(a_mnt >> 51 & 1))
523 return fp64_process_NaN(a, mode, flags);
524 if (b_exp == 2047 && b_mnt && !(b_mnt >> 51 & 1))
525 return fp64_process_NaN(b, mode, flags);
526 if (c_exp == 2047 && c_mnt && !(c_mnt >> 51 & 1))
527 return fp64_process_NaN(c, mode, flags);
528
529 // Handle quiet NaNs:
530 if (a_exp == 2047 && a_mnt)
531 return fp64_process_NaN(a, mode, flags);
532 if (b_exp == 2047 && b_mnt)
533 return fp64_process_NaN(b, mode, flags);
534 if (c_exp == 2047 && c_mnt)
535 return fp64_process_NaN(c, mode, flags);
536
537 return 0;
538 }
539
540 static uint16_t
541 fp16_round_(int sgn, int exp, uint16_t mnt, int rm, int mode, int *flags)
542 {
543 int biased_exp; // non-negative exponent value for result
544 uint16_t int_mant; // mantissa for result, less than (1 << 11)
545 int error; // 0, 1, 2 or 3, where 2 means int_mant is wrong by exactly 0.5
546
547 assert(rm != FPRounding_TIEAWAY);
548
549 // There is no flush to zero in this case!
550
551 // The bottom 5 bits of mnt are orred together:
552 mnt = (uint16_t)1 << 12 | mnt >> 4 | ((mnt & 31) != 0);
553
554 if (exp > 0) {
555 biased_exp = exp;
556 int_mant = mnt >> 2;
557 error = mnt & 3;
558 } else {
559 biased_exp = 0;
560 int_mant = lsr16(mnt, 3 - exp);
561 error = (lsr16(mnt, 1 - exp) & 3) | !!(mnt & (lsl16(1, 1 - exp) - 1));
562 }
563
564 if (!biased_exp && error) { // xx should also check fpscr_val<11>
565 *flags |= FPLIB_UFC;
566 }
567
568 // Round up:
569 if ((rm == FPLIB_RN && (error == 3 ||
570 (error == 2 && (int_mant & 1)))) ||
571 (((rm == FPLIB_RP && !sgn) || (rm == FPLIB_RM && sgn)) && error)) {
572 ++int_mant;
573 if (int_mant == (uint32_t)1 << 10) {
574 // Rounded up from denormalized to normalized
575 biased_exp = 1;
576 }
577 if (int_mant == (uint32_t)1 << 11) {
578 // Rounded up to next exponent
579 ++biased_exp;
580 int_mant >>= 1;
581 }
582 }
583
584 // Handle rounding to odd aka Von Neumann rounding:
585 if (error && rm == FPRounding_ODD)
586 int_mant |= 1;
587
588 // Handle overflow:
589 if (!(mode & FPLIB_AHP)) {
590 if (biased_exp >= 31) {
591 *flags |= FPLIB_OFC | FPLIB_IXC;
592 if (rm == FPLIB_RN || (rm == FPLIB_RP && !sgn) ||
593 (rm == FPLIB_RM && sgn)) {
594 return fp16_infinity(sgn);
595 } else {
596 return fp16_max_normal(sgn);
597 }
598 }
599 } else {
600 if (biased_exp >= 32) {
601 *flags |= FPLIB_IOC;
602 return fp16_pack(sgn, 31, -1);
603 }
604 }
605
606 if (error) {
607 *flags |= FPLIB_IXC;
608 }
609
610 return fp16_pack(sgn, biased_exp, int_mant);
611 }
612
613 static uint32_t
614 fp32_round_(int sgn, int exp, uint32_t mnt, int rm, int mode, int *flags)
615 {
616 int biased_exp; // non-negative exponent value for result
617 uint32_t int_mant; // mantissa for result, less than (1 << 24)
618 int error; // 0, 1, 2 or 3, where 2 means int_mant is wrong by exactly 0.5
619
620 assert(rm != FPRounding_TIEAWAY);
621
622 // Flush to zero:
623 if ((mode & FPLIB_FZ) && exp < 1) {
624 *flags |= FPLIB_UFC;
625 return fp32_zero(sgn);
626 }
627
628 // The bottom 8 bits of mnt are orred together:
629 mnt = (uint32_t)1 << 25 | mnt >> 7 | ((mnt & 255) != 0);
630
631 if (exp > 0) {
632 biased_exp = exp;
633 int_mant = mnt >> 2;
634 error = mnt & 3;
635 } else {
636 biased_exp = 0;
637 int_mant = lsr32(mnt, 3 - exp);
638 error = (lsr32(mnt, 1 - exp) & 3) | !!(mnt & (lsl32(1, 1 - exp) - 1));
639 }
640
641 if (!biased_exp && error) { // xx should also check fpscr_val<11>
642 *flags |= FPLIB_UFC;
643 }
644
645 // Round up:
646 if ((rm == FPLIB_RN && (error == 3 ||
647 (error == 2 && (int_mant & 1)))) ||
648 (((rm == FPLIB_RP && !sgn) || (rm == FPLIB_RM && sgn)) && error)) {
649 ++int_mant;
650 if (int_mant == (uint32_t)1 << 23) {
651 // Rounded up from denormalized to normalized
652 biased_exp = 1;
653 }
654 if (int_mant == (uint32_t)1 << 24) {
655 // Rounded up to next exponent
656 ++biased_exp;
657 int_mant >>= 1;
658 }
659 }
660
661 // Handle rounding to odd aka Von Neumann rounding:
662 if (error && rm == FPRounding_ODD)
663 int_mant |= 1;
664
665 // Handle overflow:
666 if (biased_exp >= 255) {
667 *flags |= FPLIB_OFC | FPLIB_IXC;
668 if (rm == FPLIB_RN || (rm == FPLIB_RP && !sgn) ||
669 (rm == FPLIB_RM && sgn)) {
670 return fp32_infinity(sgn);
671 } else {
672 return fp32_max_normal(sgn);
673 }
674 }
675
676 if (error) {
677 *flags |= FPLIB_IXC;
678 }
679
680 return fp32_pack(sgn, biased_exp, int_mant);
681 }
682
683 static uint32_t
684 fp32_round(int sgn, int exp, uint32_t mnt, int mode, int *flags)
685 {
686 return fp32_round_(sgn, exp, mnt, mode & 3, mode, flags);
687 }
688
689 static uint64_t
690 fp64_round_(int sgn, int exp, uint64_t mnt, int rm, int mode, int *flags)
691 {
692 int biased_exp; // non-negative exponent value for result
693 uint64_t int_mant; // mantissa for result, less than (1 << 52)
694 int error; // 0, 1, 2 or 3, where 2 means int_mant is wrong by exactly 0.5
695
696 assert(rm != FPRounding_TIEAWAY);
697
698 // Flush to zero:
699 if ((mode & FPLIB_FZ) && exp < 1) {
700 *flags |= FPLIB_UFC;
701 return fp64_zero(sgn);
702 }
703
704 // The bottom 11 bits of mnt are orred together:
705 mnt = (uint64_t)1 << 54 | mnt >> 10 | ((mnt & 0x3ff) != 0);
706
707 if (exp > 0) {
708 biased_exp = exp;
709 int_mant = mnt >> 2;
710 error = mnt & 3;
711 } else {
712 biased_exp = 0;
713 int_mant = lsr64(mnt, 3 - exp);
714 error = (lsr64(mnt, 1 - exp) & 3) | !!(mnt & (lsl64(1, 1 - exp) - 1));
715 }
716
717 if (!biased_exp && error) { // xx should also check fpscr_val<11>
718 *flags |= FPLIB_UFC;
719 }
720
721 // Round up:
722 if ((rm == FPLIB_RN && (error == 3 ||
723 (error == 2 && (int_mant & 1)))) ||
724 (((rm == FPLIB_RP && !sgn) || (rm == FPLIB_RM && sgn)) && error)) {
725 ++int_mant;
726 if (int_mant == (uint64_t)1 << 52) {
727 // Rounded up from denormalized to normalized
728 biased_exp = 1;
729 }
730 if (int_mant == (uint64_t)1 << 53) {
731 // Rounded up to next exponent
732 ++biased_exp;
733 int_mant >>= 1;
734 }
735 }
736
737 // Handle rounding to odd aka Von Neumann rounding:
738 if (error && rm == FPRounding_ODD)
739 int_mant |= 1;
740
741 // Handle overflow:
742 if (biased_exp >= 2047) {
743 *flags |= FPLIB_OFC | FPLIB_IXC;
744 if (rm == FPLIB_RN || (rm == FPLIB_RP && !sgn) ||
745 (rm == FPLIB_RM && sgn)) {
746 return fp64_infinity(sgn);
747 } else {
748 return fp64_max_normal(sgn);
749 }
750 }
751
752 if (error) {
753 *flags |= FPLIB_IXC;
754 }
755
756 return fp64_pack(sgn, biased_exp, int_mant);
757 }
758
759 static uint64_t
760 fp64_round(int sgn, int exp, uint64_t mnt, int mode, int *flags)
761 {
762 return fp64_round_(sgn, exp, mnt, mode & 3, mode, flags);
763 }
764
765 static int
766 fp32_compare_eq(uint32_t a, uint32_t b, int mode, int *flags)
767 {
768 int a_sgn, a_exp, b_sgn, b_exp;
769 uint32_t a_mnt, b_mnt;
770
771 fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
772 fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
773
774 if ((a_exp == 255 && (uint32_t)(a_mnt << 9)) ||
775 (b_exp == 255 && (uint32_t)(b_mnt << 9))) {
776 if ((a_exp == 255 && (uint32_t)(a_mnt << 9) && !(a >> 22 & 1)) ||
777 (b_exp == 255 && (uint32_t)(b_mnt << 9) && !(b >> 22 & 1)))
778 *flags |= FPLIB_IOC;
779 return 0;
780 }
781 return a == b || (!a_mnt && !b_mnt);
782 }
783
784 static int
785 fp32_compare_ge(uint32_t a, uint32_t b, int mode, int *flags)
786 {
787 int a_sgn, a_exp, b_sgn, b_exp;
788 uint32_t a_mnt, b_mnt;
789
790 fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
791 fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
792
793 if ((a_exp == 255 && (uint32_t)(a_mnt << 9)) ||
794 (b_exp == 255 && (uint32_t)(b_mnt << 9))) {
795 *flags |= FPLIB_IOC;
796 return 0;
797 }
798 if (!a_mnt && !b_mnt)
799 return 1;
800 if (a_sgn != b_sgn)
801 return b_sgn;
802 if (a_exp != b_exp)
803 return a_sgn ^ (a_exp > b_exp);
804 if (a_mnt != b_mnt)
805 return a_sgn ^ (a_mnt > b_mnt);
806 return 1;
807 }
808
809 static int
810 fp32_compare_gt(uint32_t a, uint32_t b, int mode, int *flags)
811 {
812 int a_sgn, a_exp, b_sgn, b_exp;
813 uint32_t a_mnt, b_mnt;
814
815 fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
816 fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
817
818 if ((a_exp == 255 && (uint32_t)(a_mnt << 9)) ||
819 (b_exp == 255 && (uint32_t)(b_mnt << 9))) {
820 *flags |= FPLIB_IOC;
821 return 0;
822 }
823 if (!a_mnt && !b_mnt)
824 return 0;
825 if (a_sgn != b_sgn)
826 return b_sgn;
827 if (a_exp != b_exp)
828 return a_sgn ^ (a_exp > b_exp);
829 if (a_mnt != b_mnt)
830 return a_sgn ^ (a_mnt > b_mnt);
831 return 0;
832 }
833
834 static int
835 fp64_compare_eq(uint64_t a, uint64_t b, int mode, int *flags)
836 {
837 int a_sgn, a_exp, b_sgn, b_exp;
838 uint64_t a_mnt, b_mnt;
839
840 fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
841 fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
842
843 if ((a_exp == 2047 && (uint64_t)(a_mnt << 12)) ||
844 (b_exp == 2047 && (uint64_t)(b_mnt << 12))) {
845 if ((a_exp == 2047 && (uint64_t)(a_mnt << 12) && !(a >> 51 & 1)) ||
846 (b_exp == 2047 && (uint64_t)(b_mnt << 12) && !(b >> 51 & 1)))
847 *flags |= FPLIB_IOC;
848 return 0;
849 }
850 return a == b || (!a_mnt && !b_mnt);
851 }
852
853 static int
854 fp64_compare_ge(uint64_t a, uint64_t b, int mode, int *flags)
855 {
856 int a_sgn, a_exp, b_sgn, b_exp;
857 uint64_t a_mnt, b_mnt;
858
859 fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
860 fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
861
862 if ((a_exp == 2047 && (uint64_t)(a_mnt << 12)) ||
863 (b_exp == 2047 && (uint64_t)(b_mnt << 12))) {
864 *flags |= FPLIB_IOC;
865 return 0;
866 }
867 if (!a_mnt && !b_mnt)
868 return 1;
869 if (a_sgn != b_sgn)
870 return b_sgn;
871 if (a_exp != b_exp)
872 return a_sgn ^ (a_exp > b_exp);
873 if (a_mnt != b_mnt)
874 return a_sgn ^ (a_mnt > b_mnt);
875 return 1;
876 }
877
878 static int
879 fp64_compare_gt(uint64_t a, uint64_t b, int mode, int *flags)
880 {
881 int a_sgn, a_exp, b_sgn, b_exp;
882 uint64_t a_mnt, b_mnt;
883
884 fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
885 fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
886
887 if ((a_exp == 2047 && (uint64_t)(a_mnt << 12)) ||
888 (b_exp == 2047 && (uint64_t)(b_mnt << 12))) {
889 *flags |= FPLIB_IOC;
890 return 0;
891 }
892 if (!a_mnt && !b_mnt)
893 return 0;
894 if (a_sgn != b_sgn)
895 return b_sgn;
896 if (a_exp != b_exp)
897 return a_sgn ^ (a_exp > b_exp);
898 if (a_mnt != b_mnt)
899 return a_sgn ^ (a_mnt > b_mnt);
900 return 0;
901 }
902
903 static uint32_t
904 fp32_add(uint32_t a, uint32_t b, int neg, int mode, int *flags)
905 {
906 int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp;
907 uint32_t a_mnt, b_mnt, x, x_mnt;
908
909 fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
910 fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
911
912 if ((x = fp32_process_NaNs(a, b, mode, flags))) {
913 return x;
914 }
915
916 b_sgn ^= neg;
917
918 // Handle infinities and zeroes:
919 if (a_exp == 255 && b_exp == 255 && a_sgn != b_sgn) {
920 *flags |= FPLIB_IOC;
921 return fp32_defaultNaN();
922 } else if (a_exp == 255) {
923 return fp32_infinity(a_sgn);
924 } else if (b_exp == 255) {
925 return fp32_infinity(b_sgn);
926 } else if (!a_mnt && !b_mnt && a_sgn == b_sgn) {
927 return fp32_zero(a_sgn);
928 }
929
930 a_mnt <<= 3;
931 b_mnt <<= 3;
932 if (a_exp >= b_exp) {
933 b_mnt = (lsr32(b_mnt, a_exp - b_exp) |
934 !!(b_mnt & (lsl32(1, a_exp - b_exp) - 1)));
935 b_exp = a_exp;
936 } else {
937 a_mnt = (lsr32(a_mnt, b_exp - a_exp) |
938 !!(a_mnt & (lsl32(1, b_exp - a_exp) - 1)));
939 a_exp = b_exp;
940 }
941 x_sgn = a_sgn;
942 x_exp = a_exp;
943 if (a_sgn == b_sgn) {
944 x_mnt = a_mnt + b_mnt;
945 } else if (a_mnt >= b_mnt) {
946 x_mnt = a_mnt - b_mnt;
947 } else {
948 x_sgn ^= 1;
949 x_mnt = b_mnt - a_mnt;
950 }
951
952 if (!x_mnt) {
953 // Sign of exact zero result depends on rounding mode
954 return fp32_zero((mode & 3) == 2);
955 }
956
957 x_mnt = fp32_normalise(x_mnt, &x_exp);
958
959 return fp32_round(x_sgn, x_exp + 5, x_mnt << 1, mode, flags);
960 }
961
962 static uint64_t
963 fp64_add(uint64_t a, uint64_t b, int neg, int mode, int *flags)
964 {
965 int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp;
966 uint64_t a_mnt, b_mnt, x, x_mnt;
967
968 fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
969 fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
970
971 if ((x = fp64_process_NaNs(a, b, mode, flags))) {
972 return x;
973 }
974
975 b_sgn ^= neg;
976
977 // Handle infinities and zeroes:
978 if (a_exp == 2047 && b_exp == 2047 && a_sgn != b_sgn) {
979 *flags |= FPLIB_IOC;
980 return fp64_defaultNaN();
981 } else if (a_exp == 2047) {
982 return fp64_infinity(a_sgn);
983 } else if (b_exp == 2047) {
984 return fp64_infinity(b_sgn);
985 } else if (!a_mnt && !b_mnt && a_sgn == b_sgn) {
986 return fp64_zero(a_sgn);
987 }
988
989 a_mnt <<= 3;
990 b_mnt <<= 3;
991 if (a_exp >= b_exp) {
992 b_mnt = (lsr64(b_mnt, a_exp - b_exp) |
993 !!(b_mnt & (lsl64(1, a_exp - b_exp) - 1)));
994 b_exp = a_exp;
995 } else {
996 a_mnt = (lsr64(a_mnt, b_exp - a_exp) |
997 !!(a_mnt & (lsl64(1, b_exp - a_exp) - 1)));
998 a_exp = b_exp;
999 }
1000 x_sgn = a_sgn;
1001 x_exp = a_exp;
1002 if (a_sgn == b_sgn) {
1003 x_mnt = a_mnt + b_mnt;
1004 } else if (a_mnt >= b_mnt) {
1005 x_mnt = a_mnt - b_mnt;
1006 } else {
1007 x_sgn ^= 1;
1008 x_mnt = b_mnt - a_mnt;
1009 }
1010
1011 if (!x_mnt) {
1012 // Sign of exact zero result depends on rounding mode
1013 return fp64_zero((mode & 3) == 2);
1014 }
1015
1016 x_mnt = fp64_normalise(x_mnt, &x_exp);
1017
1018 return fp64_round(x_sgn, x_exp + 8, x_mnt << 1, mode, flags);
1019 }
1020
1021 static uint32_t
1022 fp32_mul(uint32_t a, uint32_t b, int mode, int *flags)
1023 {
1024 int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp;
1025 uint32_t a_mnt, b_mnt, x;
1026 uint64_t x_mnt;
1027
1028 fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1029 fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1030
1031 if ((x = fp32_process_NaNs(a, b, mode, flags))) {
1032 return x;
1033 }
1034
1035 // Handle infinities and zeroes:
1036 if ((a_exp == 255 && !b_mnt) || (b_exp == 255 && !a_mnt)) {
1037 *flags |= FPLIB_IOC;
1038 return fp32_defaultNaN();
1039 } else if (a_exp == 255 || b_exp == 255) {
1040 return fp32_infinity(a_sgn ^ b_sgn);
1041 } else if (!a_mnt || !b_mnt) {
1042 return fp32_zero(a_sgn ^ b_sgn);
1043 }
1044
1045 // Multiply and normalise:
1046 x_sgn = a_sgn ^ b_sgn;
1047 x_exp = a_exp + b_exp - 110;
1048 x_mnt = (uint64_t)a_mnt * b_mnt;
1049 x_mnt = fp64_normalise(x_mnt, &x_exp);
1050
1051 // Convert to 32 bits, collapsing error into bottom bit:
1052 x_mnt = lsr64(x_mnt, 31) | !!lsl64(x_mnt, 33);
1053
1054 return fp32_round(x_sgn, x_exp, x_mnt, mode, flags);
1055 }
1056
1057 static uint64_t
1058 fp64_mul(uint64_t a, uint64_t b, int mode, int *flags)
1059 {
1060 int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp;
1061 uint64_t a_mnt, b_mnt, x;
1062 uint64_t x0_mnt, x1_mnt;
1063
1064 fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1065 fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1066
1067 if ((x = fp64_process_NaNs(a, b, mode, flags))) {
1068 return x;
1069 }
1070
1071 // Handle infinities and zeroes:
1072 if ((a_exp == 2047 && !b_mnt) || (b_exp == 2047 && !a_mnt)) {
1073 *flags |= FPLIB_IOC;
1074 return fp64_defaultNaN();
1075 } else if (a_exp == 2047 || b_exp == 2047) {
1076 return fp64_infinity(a_sgn ^ b_sgn);
1077 } else if (!a_mnt || !b_mnt) {
1078 return fp64_zero(a_sgn ^ b_sgn);
1079 }
1080
1081 // Multiply and normalise:
1082 x_sgn = a_sgn ^ b_sgn;
1083 x_exp = a_exp + b_exp - 1000;
1084 mul62x62(&x0_mnt, &x1_mnt, a_mnt, b_mnt);
1085 fp128_normalise(&x0_mnt, &x1_mnt, &x_exp);
1086
1087 // Convert to 64 bits, collapsing error into bottom bit:
1088 x0_mnt = x1_mnt << 1 | !!x0_mnt;
1089
1090 return fp64_round(x_sgn, x_exp, x0_mnt, mode, flags);
1091 }
1092
1093 static uint32_t
1094 fp32_muladd(uint32_t a, uint32_t b, uint32_t c, int scale,
1095 int mode, int *flags)
1096 {
1097 int a_sgn, a_exp, b_sgn, b_exp, c_sgn, c_exp, x_sgn, x_exp, y_sgn, y_exp;
1098 uint32_t a_mnt, b_mnt, c_mnt, x;
1099 uint64_t x_mnt, y_mnt;
1100
1101 fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1102 fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1103 fp32_unpack(&c_sgn, &c_exp, &c_mnt, c, mode, flags);
1104
1105 x = fp32_process_NaNs3(a, b, c, mode, flags);
1106
1107 // Quiet NaN added to product of zero and infinity:
1108 if (a_exp == 255 && (a_mnt >> 22 & 1) &&
1109 ((!b_mnt && c_exp == 255 && !(uint32_t)(c_mnt << 9)) ||
1110 (!c_mnt && b_exp == 255 && !(uint32_t)(b_mnt << 9)))) {
1111 x = fp32_defaultNaN();
1112 *flags |= FPLIB_IOC;
1113 }
1114
1115 if (x) {
1116 return x;
1117 }
1118
1119 // Handle infinities and zeroes:
1120 if ((b_exp == 255 && !c_mnt) ||
1121 (c_exp == 255 && !b_mnt) ||
1122 (a_exp == 255 && (b_exp == 255 || c_exp == 255) &&
1123 (a_sgn != (b_sgn ^ c_sgn)))) {
1124 *flags |= FPLIB_IOC;
1125 return fp32_defaultNaN();
1126 }
1127 if (a_exp == 255)
1128 return fp32_infinity(a_sgn);
1129 if (b_exp == 255 || c_exp == 255)
1130 return fp32_infinity(b_sgn ^ c_sgn);
1131 if (!a_mnt && (!b_mnt || !c_mnt) && a_sgn == (b_sgn ^ c_sgn))
1132 return fp32_zero(a_sgn);
1133
1134 x_sgn = a_sgn;
1135 x_exp = a_exp + 13;
1136 x_mnt = (uint64_t)a_mnt << 27;
1137
1138 // Multiply:
1139 y_sgn = b_sgn ^ c_sgn;
1140 y_exp = b_exp + c_exp - 113;
1141 y_mnt = (uint64_t)b_mnt * c_mnt << 3;
1142 if (!y_mnt) {
1143 y_exp = x_exp;
1144 }
1145
1146 // Add:
1147 if (x_exp >= y_exp) {
1148 y_mnt = (lsr64(y_mnt, x_exp - y_exp) |
1149 !!(y_mnt & (lsl64(1, x_exp - y_exp) - 1)));
1150 y_exp = x_exp;
1151 } else {
1152 x_mnt = (lsr64(x_mnt, y_exp - x_exp) |
1153 !!(x_mnt & (lsl64(1, y_exp - x_exp) - 1)));
1154 x_exp = y_exp;
1155 }
1156 if (x_sgn == y_sgn) {
1157 x_mnt = x_mnt + y_mnt;
1158 } else if (x_mnt >= y_mnt) {
1159 x_mnt = x_mnt - y_mnt;
1160 } else {
1161 x_sgn ^= 1;
1162 x_mnt = y_mnt - x_mnt;
1163 }
1164
1165 if (!x_mnt) {
1166 // Sign of exact zero result depends on rounding mode
1167 return fp32_zero((mode & 3) == 2);
1168 }
1169
1170 // Normalise and convert to 32 bits, collapsing error into bottom bit:
1171 x_mnt = fp64_normalise(x_mnt, &x_exp);
1172 x_mnt = x_mnt >> 31 | !!(uint32_t)(x_mnt << 1);
1173
1174 return fp32_round(x_sgn, x_exp + scale, x_mnt, mode, flags);
1175 }
1176
1177 static uint64_t
1178 fp64_muladd(uint64_t a, uint64_t b, uint64_t c, int scale,
1179 int mode, int *flags)
1180 {
1181 int a_sgn, a_exp, b_sgn, b_exp, c_sgn, c_exp, x_sgn, x_exp, y_sgn, y_exp;
1182 uint64_t a_mnt, b_mnt, c_mnt, x;
1183 uint64_t x0_mnt, x1_mnt, y0_mnt, y1_mnt;
1184
1185 fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1186 fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1187 fp64_unpack(&c_sgn, &c_exp, &c_mnt, c, mode, flags);
1188
1189 x = fp64_process_NaNs3(a, b, c, mode, flags);
1190
1191 // Quiet NaN added to product of zero and infinity:
1192 if (a_exp == 2047 && (a_mnt >> 51 & 1) &&
1193 ((!b_mnt && c_exp == 2047 && !(uint64_t)(c_mnt << 12)) ||
1194 (!c_mnt && b_exp == 2047 && !(uint64_t)(b_mnt << 12)))) {
1195 x = fp64_defaultNaN();
1196 *flags |= FPLIB_IOC;
1197 }
1198
1199 if (x) {
1200 return x;
1201 }
1202
1203 // Handle infinities and zeroes:
1204 if ((b_exp == 2047 && !c_mnt) ||
1205 (c_exp == 2047 && !b_mnt) ||
1206 (a_exp == 2047 && (b_exp == 2047 || c_exp == 2047) &&
1207 (a_sgn != (b_sgn ^ c_sgn)))) {
1208 *flags |= FPLIB_IOC;
1209 return fp64_defaultNaN();
1210 }
1211 if (a_exp == 2047)
1212 return fp64_infinity(a_sgn);
1213 if (b_exp == 2047 || c_exp == 2047)
1214 return fp64_infinity(b_sgn ^ c_sgn);
1215 if (!a_mnt && (!b_mnt || !c_mnt) && a_sgn == (b_sgn ^ c_sgn))
1216 return fp64_zero(a_sgn);
1217
1218 x_sgn = a_sgn;
1219 x_exp = a_exp + 11;
1220 x0_mnt = 0;
1221 x1_mnt = a_mnt;
1222
1223 // Multiply:
1224 y_sgn = b_sgn ^ c_sgn;
1225 y_exp = b_exp + c_exp - 1003;
1226 mul62x62(&y0_mnt, &y1_mnt, b_mnt, c_mnt << 3);
1227 if (!y0_mnt && !y1_mnt) {
1228 y_exp = x_exp;
1229 }
1230
1231 // Add:
1232 if (x_exp >= y_exp) {
1233 uint64_t t0, t1;
1234 lsl128(&t0, &t1, y0_mnt, y1_mnt,
1235 x_exp - y_exp < 128 ? 128 - (x_exp - y_exp) : 0);
1236 lsr128(&y0_mnt, &y1_mnt, y0_mnt, y1_mnt, x_exp - y_exp);
1237 y0_mnt |= !!(t0 | t1);
1238 y_exp = x_exp;
1239 } else {
1240 uint64_t t0, t1;
1241 lsl128(&t0, &t1, x0_mnt, x1_mnt,
1242 y_exp - x_exp < 128 ? 128 - (y_exp - x_exp) : 0);
1243 lsr128(&x0_mnt, &x1_mnt, x0_mnt, x1_mnt, y_exp - x_exp);
1244 x0_mnt |= !!(t0 | t1);
1245 x_exp = y_exp;
1246 }
1247 if (x_sgn == y_sgn) {
1248 add128(&x0_mnt, &x1_mnt, x0_mnt, x1_mnt, y0_mnt, y1_mnt);
1249 } else if (cmp128(x0_mnt, x1_mnt, y0_mnt, y1_mnt) >= 0) {
1250 sub128(&x0_mnt, &x1_mnt, x0_mnt, x1_mnt, y0_mnt, y1_mnt);
1251 } else {
1252 x_sgn ^= 1;
1253 sub128(&x0_mnt, &x1_mnt, y0_mnt, y1_mnt, x0_mnt, x1_mnt);
1254 }
1255
1256 if (!x0_mnt && !x1_mnt) {
1257 // Sign of exact zero result depends on rounding mode
1258 return fp64_zero((mode & 3) == 2);
1259 }
1260
1261 // Normalise and convert to 64 bits, collapsing error into bottom bit:
1262 fp128_normalise(&x0_mnt, &x1_mnt, &x_exp);
1263 x0_mnt = x1_mnt << 1 | !!x0_mnt;
1264
1265 return fp64_round(x_sgn, x_exp + scale, x0_mnt, mode, flags);
1266 }
1267
1268 static uint32_t
1269 fp32_div(uint32_t a, uint32_t b, int mode, int *flags)
1270 {
1271 int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp;
1272 uint32_t a_mnt, b_mnt, x;
1273 uint64_t x_mnt;
1274
1275 fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1276 fp32_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1277
1278 if ((x = fp32_process_NaNs(a, b, mode, flags)))
1279 return x;
1280
1281 // Handle infinities and zeroes:
1282 if ((a_exp == 255 && b_exp == 255) || (!a_mnt && !b_mnt)) {
1283 *flags |= FPLIB_IOC;
1284 return fp32_defaultNaN();
1285 }
1286 if (a_exp == 255 || !b_mnt) {
1287 if (a_exp != 255)
1288 *flags |= FPLIB_DZC;
1289 return fp32_infinity(a_sgn ^ b_sgn);
1290 }
1291 if (!a_mnt || b_exp == 255)
1292 return fp32_zero(a_sgn ^ b_sgn);
1293
1294 // Divide, setting bottom bit if inexact:
1295 a_mnt = fp32_normalise(a_mnt, &a_exp);
1296 x_sgn = a_sgn ^ b_sgn;
1297 x_exp = a_exp - b_exp + 172;
1298 x_mnt = ((uint64_t)a_mnt << 18) / b_mnt;
1299 x_mnt |= (x_mnt * b_mnt != (uint64_t)a_mnt << 18);
1300
1301 // Normalise and convert to 32 bits, collapsing error into bottom bit:
1302 x_mnt = fp64_normalise(x_mnt, &x_exp);
1303 x_mnt = x_mnt >> 31 | !!(uint32_t)(x_mnt << 1);
1304
1305 return fp32_round(x_sgn, x_exp, x_mnt, mode, flags);
1306 }
1307
1308 static uint64_t
1309 fp64_div(uint64_t a, uint64_t b, int mode, int *flags)
1310 {
1311 int a_sgn, a_exp, b_sgn, b_exp, x_sgn, x_exp, c;
1312 uint64_t a_mnt, b_mnt, x, x_mnt, x0_mnt, x1_mnt;
1313
1314 fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1315 fp64_unpack(&b_sgn, &b_exp, &b_mnt, b, mode, flags);
1316
1317 if ((x = fp64_process_NaNs(a, b, mode, flags)))
1318 return x;
1319
1320 // Handle infinities and zeroes:
1321 if ((a_exp == 2047 && b_exp == 2047) || (!a_mnt && !b_mnt)) {
1322 *flags |= FPLIB_IOC;
1323 return fp64_defaultNaN();
1324 }
1325 if (a_exp == 2047 || !b_mnt) {
1326 if (a_exp != 2047)
1327 *flags |= FPLIB_DZC;
1328 return fp64_infinity(a_sgn ^ b_sgn);
1329 }
1330 if (!a_mnt || b_exp == 2047)
1331 return fp64_zero(a_sgn ^ b_sgn);
1332
1333 // Find reciprocal of divisor with Newton-Raphson:
1334 a_mnt = fp64_normalise(a_mnt, &a_exp);
1335 b_mnt = fp64_normalise(b_mnt, &b_exp);
1336 x_mnt = ~(uint64_t)0 / (b_mnt >> 31);
1337 mul64x32(&x0_mnt, &x1_mnt, b_mnt, x_mnt);
1338 sub128(&x0_mnt, &x1_mnt, 0, (uint64_t)1 << 32, x0_mnt, x1_mnt);
1339 lsr128(&x0_mnt, &x1_mnt, x0_mnt, x1_mnt, 32);
1340 mul64x32(&x0_mnt, &x1_mnt, x0_mnt, x_mnt);
1341 lsr128(&x0_mnt, &x1_mnt, x0_mnt, x1_mnt, 33);
1342
1343 // Multiply by dividend:
1344 x_sgn = a_sgn ^ b_sgn;
1345 x_exp = a_exp - b_exp + 1031;
1346 mul62x62(&x0_mnt, &x1_mnt, x0_mnt, a_mnt >> 2); // xx 62x62 is enough
1347 lsr128(&x0_mnt, &x1_mnt, x0_mnt, x1_mnt, 4);
1348 x_mnt = x1_mnt;
1349
1350 // This is an underestimate, so try adding one:
1351 mul62x62(&x0_mnt, &x1_mnt, b_mnt >> 2, x_mnt + 1); // xx 62x62 is enough
1352 c = cmp128(x0_mnt, x1_mnt, 0, a_mnt >> 11);
1353 if (c <= 0) {
1354 ++x_mnt;
1355 }
1356
1357 x_mnt = fp64_normalise(x_mnt, &x_exp);
1358
1359 return fp64_round(x_sgn, x_exp, x_mnt << 1 | !!c, mode, flags);
1360 }
1361
1362 static void
1363 set_fpscr0(FPSCR &fpscr, int flags)
1364 {
1365 if (flags & FPLIB_IDC) {
1366 fpscr.idc = 1;
1367 }
1368 if (flags & FPLIB_IOC) {
1369 fpscr.ioc = 1;
1370 }
1371 if (flags & FPLIB_DZC) {
1372 fpscr.dzc = 1;
1373 }
1374 if (flags & FPLIB_OFC) {
1375 fpscr.ofc = 1;
1376 }
1377 if (flags & FPLIB_UFC) {
1378 fpscr.ufc = 1;
1379 }
1380 if (flags & FPLIB_IXC) {
1381 fpscr.ixc = 1;
1382 }
1383 }
1384
1385 static uint32_t
1386 fp32_sqrt(uint32_t a, int mode, int *flags)
1387 {
1388 int a_sgn, a_exp, x_sgn, x_exp;
1389 uint32_t a_mnt, x, x_mnt;
1390 uint64_t t0, t1;
1391
1392 fp32_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1393
1394 // Handle NaNs:
1395 if (a_exp == 255 && (uint32_t)(a_mnt << 9))
1396 return fp32_process_NaN(a, mode, flags);
1397
1398 // Handle infinities and zeroes:
1399 if (!a_mnt) {
1400 return fp32_zero(a_sgn);
1401 }
1402 if (a_exp == 255 && !a_sgn) {
1403 return fp32_infinity(a_sgn);
1404 }
1405 if (a_sgn) {
1406 *flags |= FPLIB_IOC;
1407 return fp32_defaultNaN();
1408 }
1409
1410 a_mnt = fp32_normalise(a_mnt, &a_exp);
1411 if (!(a_exp & 1)) {
1412 ++a_exp;
1413 a_mnt >>= 1;
1414 }
1415
1416 // x = (a * 3 + 5) / 8
1417 x = (a_mnt >> 2) + (a_mnt >> 3) + (5 << 28);
1418
1419 // x = (a / x + x) / 2; // 16-bit accuracy
1420 x = (a_mnt / (x >> 15) + (x >> 16)) << 15;
1421
1422 // x = (a / x + x) / 2; // 16-bit accuracy
1423 x = (a_mnt / (x >> 15) + (x >> 16)) << 15;
1424
1425 // x = (a / x + x) / 2; // 32-bit accuracy
1426 x = ((((uint64_t)a_mnt << 32) / x) >> 2) + (x >> 1);
1427
1428 x_sgn = 0;
1429 x_exp = (a_exp + 147) >> 1;
1430 x_mnt = ((x - (1 << 5)) >> 6) + 1;
1431 t1 = (uint64_t)x_mnt * x_mnt;
1432 t0 = (uint64_t)a_mnt << 19;
1433 if (t1 > t0) {
1434 --x_mnt;
1435 }
1436
1437 x_mnt = fp32_normalise(x_mnt, &x_exp);
1438
1439 return fp32_round(x_sgn, x_exp, x_mnt << 1 | (t1 != t0), mode, flags);
1440 }
1441
1442 static uint64_t
1443 fp64_sqrt(uint64_t a, int mode, int *flags)
1444 {
1445 int a_sgn, a_exp, x_sgn, x_exp, c;
1446 uint64_t a_mnt, x_mnt, r, x0, x1;
1447 uint32_t x;
1448
1449 fp64_unpack(&a_sgn, &a_exp, &a_mnt, a, mode, flags);
1450
1451 // Handle NaNs:
1452 if (a_exp == 2047 && (uint64_t)(a_mnt << 12)) {
1453 return fp64_process_NaN(a, mode, flags);
1454 }
1455
1456 // Handle infinities and zeroes:
1457 if (!a_mnt)
1458 return fp64_zero(a_sgn);
1459 if (a_exp == 2047 && !a_sgn)
1460 return fp64_infinity(a_sgn);
1461 if (a_sgn) {
1462 *flags |= FPLIB_IOC;
1463 return fp64_defaultNaN();
1464 }
1465
1466 a_mnt = fp64_normalise(a_mnt, &a_exp);
1467 if (a_exp & 1) {
1468 ++a_exp;
1469 a_mnt >>= 1;
1470 }
1471
1472 // x = (a * 3 + 5) / 8
1473 x = (a_mnt >> 34) + (a_mnt >> 35) + (5 << 28);
1474
1475 // x = (a / x + x) / 2; // 16-bit accuracy
1476 x = ((a_mnt >> 32) / (x >> 15) + (x >> 16)) << 15;
1477
1478 // x = (a / x + x) / 2; // 16-bit accuracy
1479 x = ((a_mnt >> 32) / (x >> 15) + (x >> 16)) << 15;
1480
1481 // x = (a / x + x) / 2; // 32-bit accuracy
1482 x = ((a_mnt / x) >> 2) + (x >> 1);
1483
1484 // r = 1 / x; // 32-bit accuracy
1485 r = ((uint64_t)1 << 62) / x;
1486
1487 // r = r * (2 - x * r); // 64-bit accuracy
1488 mul64x32(&x0, &x1, -(uint64_t)x * r << 1, r);
1489 lsr128(&x0, &x1, x0, x1, 31);
1490
1491 // x = (x + a * r) / 2; // 64-bit accuracy
1492 mul62x62(&x0, &x1, a_mnt >> 10, x0 >> 2);
1493 lsl128(&x0, &x1, x0, x1, 5);
1494 lsr128(&x0, &x1, x0, x1, 56);
1495
1496 x0 = ((uint64_t)x << 31) + (x0 >> 1);
1497
1498 x_sgn = 0;
1499 x_exp = (a_exp + 1053) >> 1;
1500 x_mnt = x0;
1501 x_mnt = ((x_mnt - (1 << 8)) >> 9) + 1;
1502 mul62x62(&x0, &x1, x_mnt, x_mnt);
1503 lsl128(&x0, &x1, x0, x1, 19);
1504 c = cmp128(x0, x1, 0, a_mnt);
1505 if (c > 0)
1506 --x_mnt;
1507
1508 x_mnt = fp64_normalise(x_mnt, &x_exp);
1509
1510 return fp64_round(x_sgn, x_exp, x_mnt << 1 | !!c, mode, flags);
1511 }
1512
1513 static int
1514 modeConv(FPSCR fpscr)
1515 {
1516 return (((int) fpscr) >> 22) & 0xF;
1517 }
1518
1519 static void
1520 set_fpscr(FPSCR &fpscr, int flags)
1521 {
1522 // translate back to FPSCR
1523 bool underflow = false;
1524 if (flags & FPLIB_IDC) {
1525 fpscr.idc = 1;
1526 }
1527 if (flags & FPLIB_IOC) {
1528 fpscr.ioc = 1;
1529 }
1530 if (flags & FPLIB_DZC) {
1531 fpscr.dzc = 1;
1532 }
1533 if (flags & FPLIB_OFC) {
1534 fpscr.ofc = 1;
1535 }
1536 if (flags & FPLIB_UFC) {
1537 underflow = true; //xx Why is this required?
1538 fpscr.ufc = 1;
1539 }
1540 if ((flags & FPLIB_IXC) && !(underflow && fpscr.fz)) {
1541 fpscr.ixc = 1;
1542 }
1543 }
1544
1545 template <>
1546 bool
1547 fplibCompareEQ(uint32_t a, uint32_t b, FPSCR &fpscr)
1548 {
1549 int flags = 0;
1550 int x = fp32_compare_eq(a, b, modeConv(fpscr), &flags);
1551 set_fpscr(fpscr, flags);
1552 return x;
1553 }
1554
1555 template <>
1556 bool
1557 fplibCompareGE(uint32_t a, uint32_t b, FPSCR &fpscr)
1558 {
1559 int flags = 0;
1560 int x = fp32_compare_ge(a, b, modeConv(fpscr), &flags);
1561 set_fpscr(fpscr, flags);
1562 return x;
1563 }
1564
1565 template <>
1566 bool
1567 fplibCompareGT(uint32_t a, uint32_t b, FPSCR &fpscr)
1568 {
1569 int flags = 0;
1570 int x = fp32_compare_gt(a, b, modeConv(fpscr), &flags);
1571 set_fpscr(fpscr, flags);
1572 return x;
1573 }
1574
1575 template <>
1576 bool
1577 fplibCompareEQ(uint64_t a, uint64_t b, FPSCR &fpscr)
1578 {
1579 int flags = 0;
1580 int x = fp64_compare_eq(a, b, modeConv(fpscr), &flags);
1581 set_fpscr(fpscr, flags);
1582 return x;
1583 }
1584
1585 template <>
1586 bool
1587 fplibCompareGE(uint64_t a, uint64_t b, FPSCR &fpscr)
1588 {
1589 int flags = 0;
1590 int x = fp64_compare_ge(a, b, modeConv(fpscr), &flags);
1591 set_fpscr(fpscr, flags);
1592 return x;
1593 }
1594
1595 template <>
1596 bool
1597 fplibCompareGT(uint64_t a, uint64_t b, FPSCR &fpscr)
1598 {
1599 int flags = 0;
1600 int x = fp64_compare_gt(a, b, modeConv(fpscr), &flags);
1601 set_fpscr(fpscr, flags);
1602 return x;
1603 }
1604
1605 template <>
1606 uint32_t
1607 fplibAbs(uint32_t op)
1608 {
1609 return op & ~((uint32_t)1 << 31);
1610 }
1611
1612 template <>
1613 uint64_t
1614 fplibAbs(uint64_t op)
1615 {
1616 return op & ~((uint64_t)1 << 63);
1617 }
1618
1619 template <>
1620 uint32_t
1621 fplibAdd(uint32_t op1, uint32_t op2, FPSCR &fpscr)
1622 {
1623 int flags = 0;
1624 uint32_t result = fp32_add(op1, op2, 0, modeConv(fpscr), &flags);
1625 set_fpscr0(fpscr, flags);
1626 return result;
1627 }
1628
1629 template <>
1630 uint64_t
1631 fplibAdd(uint64_t op1, uint64_t op2, FPSCR &fpscr)
1632 {
1633 int flags = 0;
1634 uint64_t result = fp64_add(op1, op2, 0, modeConv(fpscr), &flags);
1635 set_fpscr0(fpscr, flags);
1636 return result;
1637 }
1638
1639 template <>
1640 int
1641 fplibCompare(uint32_t op1, uint32_t op2, bool signal_nans, FPSCR &fpscr)
1642 {
1643 int mode = modeConv(fpscr);
1644 int flags = 0;
1645 int sgn1, exp1, sgn2, exp2, result;
1646 uint32_t mnt1, mnt2;
1647
1648 fp32_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
1649 fp32_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
1650
1651 if ((exp1 == 255 && (uint32_t)(mnt1 << 9)) ||
1652 (exp2 == 255 && (uint32_t)(mnt2 << 9))) {
1653 result = 3;
1654 if ((exp1 == 255 && (uint32_t)(mnt1 << 9) && !(mnt1 >> 22 & 1)) ||
1655 (exp2 == 255 && (uint32_t)(mnt2 << 9) && !(mnt2 >> 22 & 1)) ||
1656 signal_nans)
1657 flags |= FPLIB_IOC;
1658 } else {
1659 if (op1 == op2 || (!mnt1 && !mnt2)) {
1660 result = 6;
1661 } else if (sgn1 != sgn2) {
1662 result = sgn1 ? 8 : 2;
1663 } else if (exp1 != exp2) {
1664 result = sgn1 ^ (exp1 < exp2) ? 8 : 2;
1665 } else {
1666 result = sgn1 ^ (mnt1 < mnt2) ? 8 : 2;
1667 }
1668 }
1669
1670 set_fpscr0(fpscr, flags);
1671
1672 return result;
1673 }
1674
1675 template <>
1676 int
1677 fplibCompare(uint64_t op1, uint64_t op2, bool signal_nans, FPSCR &fpscr)
1678 {
1679 int mode = modeConv(fpscr);
1680 int flags = 0;
1681 int sgn1, exp1, sgn2, exp2, result;
1682 uint64_t mnt1, mnt2;
1683
1684 fp64_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
1685 fp64_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
1686
1687 if ((exp1 == 2047 && (uint64_t)(mnt1 << 12)) ||
1688 (exp2 == 2047 && (uint64_t)(mnt2 << 12))) {
1689 result = 3;
1690 if ((exp1 == 2047 && (uint64_t)(mnt1 << 12) && !(mnt1 >> 51 & 1)) ||
1691 (exp2 == 2047 && (uint64_t)(mnt2 << 12) && !(mnt2 >> 51 & 1)) ||
1692 signal_nans)
1693 flags |= FPLIB_IOC;
1694 } else {
1695 if (op1 == op2 || (!mnt1 && !mnt2)) {
1696 result = 6;
1697 } else if (sgn1 != sgn2) {
1698 result = sgn1 ? 8 : 2;
1699 } else if (exp1 != exp2) {
1700 result = sgn1 ^ (exp1 < exp2) ? 8 : 2;
1701 } else {
1702 result = sgn1 ^ (mnt1 < mnt2) ? 8 : 2;
1703 }
1704 }
1705
1706 set_fpscr0(fpscr, flags);
1707
1708 return result;
1709 }
1710
1711 static uint16_t
1712 fp16_FPConvertNaN_32(uint32_t op)
1713 {
1714 return fp16_pack(op >> 31, 31, (uint16_t)1 << 9 | op >> 13);
1715 }
1716
1717 static uint16_t
1718 fp16_FPConvertNaN_64(uint64_t op)
1719 {
1720 return fp16_pack(op >> 63, 31, (uint16_t)1 << 9 | op >> 42);
1721 }
1722
1723 static uint32_t
1724 fp32_FPConvertNaN_16(uint16_t op)
1725 {
1726 return fp32_pack(op >> 15, 255, (uint32_t)1 << 22 | (uint32_t)op << 13);
1727 }
1728
1729 static uint32_t
1730 fp32_FPConvertNaN_64(uint64_t op)
1731 {
1732 return fp32_pack(op >> 63, 255, (uint32_t)1 << 22 | op >> 29);
1733 }
1734
1735 static uint64_t
1736 fp64_FPConvertNaN_16(uint16_t op)
1737 {
1738 return fp64_pack(op >> 15, 2047, (uint64_t)1 << 51 | (uint64_t)op << 42);
1739 }
1740
1741 static uint64_t
1742 fp64_FPConvertNaN_32(uint32_t op)
1743 {
1744 return fp64_pack(op >> 31, 2047, (uint64_t)1 << 51 | (uint64_t)op << 29);
1745 }
1746
1747 static uint32_t
1748 fp32_FPOnePointFive(int sgn)
1749 {
1750 return fp32_pack(sgn, 127, (uint64_t)1 << 22);
1751 }
1752
1753 static uint64_t
1754 fp64_FPOnePointFive(int sgn)
1755 {
1756 return fp64_pack(sgn, 1023, (uint64_t)1 << 51);
1757 }
1758
1759 static uint32_t
1760 fp32_FPThree(int sgn)
1761 {
1762 return fp32_pack(sgn, 128, (uint64_t)1 << 22);
1763 }
1764
1765 static uint64_t
1766 fp64_FPThree(int sgn)
1767 {
1768 return fp64_pack(sgn, 1024, (uint64_t)1 << 51);
1769 }
1770
1771 static uint32_t
1772 fp32_FPTwo(int sgn)
1773 {
1774 return fp32_pack(sgn, 128, 0);
1775 }
1776
1777 static uint64_t
1778 fp64_FPTwo(int sgn)
1779 {
1780 return fp64_pack(sgn, 1024, 0);
1781 }
1782
1783 template <>
1784 uint16_t
1785 fplibConvert(uint32_t op, FPRounding rounding, FPSCR &fpscr)
1786 {
1787 int mode = modeConv(fpscr);
1788 int flags = 0;
1789 int sgn, exp;
1790 uint32_t mnt;
1791 uint16_t result;
1792
1793 // Unpack floating-point operand optionally with flush-to-zero:
1794 fp32_unpack(&sgn, &exp, &mnt, op, mode, &flags);
1795
1796 bool alt_hp = fpscr.ahp;
1797
1798 if (exp == 255 && (uint32_t)(mnt << 9)) {
1799 if (alt_hp) {
1800 result = fp16_zero(sgn);
1801 } else if (fpscr.dn) {
1802 result = fp16_defaultNaN();
1803 } else {
1804 result = fp16_FPConvertNaN_32(op);
1805 }
1806 if (!(mnt >> 22 & 1) || alt_hp) {
1807 flags |= FPLIB_IOC;
1808 }
1809 } else if (exp == 255) {
1810 if (alt_hp) {
1811 result = sgn << 15 | (uint16_t)0x7fff;
1812 flags |= FPLIB_IOC;
1813 } else {
1814 result = fp16_infinity(sgn);
1815 }
1816 } else if (!mnt) {
1817 result = fp16_zero(sgn);
1818 } else {
1819 result = fp16_round_(sgn, exp - 127 + 15,
1820 mnt >> 7 | !!(uint32_t)(mnt << 25),
1821 rounding, mode | alt_hp << 4, &flags);
1822 }
1823
1824 set_fpscr0(fpscr, flags);
1825
1826 return result;
1827 }
1828
1829 template <>
1830 uint16_t
1831 fplibConvert(uint64_t op, FPRounding rounding, FPSCR &fpscr)
1832 {
1833 int mode = modeConv(fpscr);
1834 int flags = 0;
1835 int sgn, exp;
1836 uint64_t mnt;
1837 uint16_t result;
1838
1839 // Unpack floating-point operand optionally with flush-to-zero:
1840 fp64_unpack(&sgn, &exp, &mnt, op, mode, &flags);
1841
1842 bool alt_hp = fpscr.ahp;
1843
1844 if (exp == 2047 && (uint64_t)(mnt << 12)) {
1845 if (alt_hp) {
1846 result = fp16_zero(sgn);
1847 } else if (fpscr.dn) {
1848 result = fp16_defaultNaN();
1849 } else {
1850 result = fp16_FPConvertNaN_64(op);
1851 }
1852 if (!(mnt >> 51 & 1) || alt_hp) {
1853 flags |= FPLIB_IOC;
1854 }
1855 } else if (exp == 2047) {
1856 if (alt_hp) {
1857 result = sgn << 15 | (uint16_t)0x7fff;
1858 flags |= FPLIB_IOC;
1859 } else {
1860 result = fp16_infinity(sgn);
1861 }
1862 } else if (!mnt) {
1863 result = fp16_zero(sgn);
1864 } else {
1865 result = fp16_round_(sgn, exp - 1023 + 15,
1866 mnt >> 36 | !!(uint64_t)(mnt << 28),
1867 rounding, mode | alt_hp << 4, &flags);
1868 }
1869
1870 set_fpscr0(fpscr, flags);
1871
1872 return result;
1873 }
1874
1875 template <>
1876 uint32_t
1877 fplibConvert(uint16_t op, FPRounding rounding, FPSCR &fpscr)
1878 {
1879 int mode = modeConv(fpscr);
1880 int flags = 0;
1881 int sgn, exp;
1882 uint16_t mnt;
1883 uint32_t result;
1884
1885 // Unpack floating-point operand optionally with flush-to-zero:
1886 fp16_unpack(&sgn, &exp, &mnt, op, mode, &flags);
1887
1888 if (exp == 31 && !fpscr.ahp && (uint16_t)(mnt << 6)) {
1889 if (fpscr.dn) {
1890 result = fp32_defaultNaN();
1891 } else {
1892 result = fp32_FPConvertNaN_16(op);
1893 }
1894 if (!(mnt >> 9 & 1)) {
1895 flags |= FPLIB_IOC;
1896 }
1897 } else if (exp == 31 && !fpscr.ahp) {
1898 result = fp32_infinity(sgn);
1899 } else if (!mnt) {
1900 result = fp32_zero(sgn);
1901 } else {
1902 mnt = fp16_normalise(mnt, &exp);
1903 result = fp32_pack(sgn, exp - 15 + 127 + 5, (uint32_t)mnt << 8);
1904 }
1905
1906 set_fpscr0(fpscr, flags);
1907
1908 return result;
1909 }
1910
1911 template <>
1912 uint32_t
1913 fplibConvert(uint64_t op, FPRounding rounding, FPSCR &fpscr)
1914 {
1915 int mode = modeConv(fpscr);
1916 int flags = 0;
1917 int sgn, exp;
1918 uint64_t mnt;
1919 uint32_t result;
1920
1921 // Unpack floating-point operand optionally with flush-to-zero:
1922 fp64_unpack(&sgn, &exp, &mnt, op, mode, &flags);
1923
1924 if (exp == 2047 && (uint64_t)(mnt << 12)) {
1925 if (fpscr.dn) {
1926 result = fp32_defaultNaN();
1927 } else {
1928 result = fp32_FPConvertNaN_64(op);
1929 }
1930 if (!(mnt >> 51 & 1)) {
1931 flags |= FPLIB_IOC;
1932 }
1933 } else if (exp == 2047) {
1934 result = fp32_infinity(sgn);
1935 } else if (!mnt) {
1936 result = fp32_zero(sgn);
1937 } else {
1938 result = fp32_round_(sgn, exp - 1023 + 127,
1939 mnt >> 20 | !!(uint64_t)(mnt << 44),
1940 rounding, mode, &flags);
1941 }
1942
1943 set_fpscr0(fpscr, flags);
1944
1945 return result;
1946 }
1947
1948 template <>
1949 uint64_t
1950 fplibConvert(uint16_t op, FPRounding rounding, FPSCR &fpscr)
1951 {
1952 int mode = modeConv(fpscr);
1953 int flags = 0;
1954 int sgn, exp;
1955 uint16_t mnt;
1956 uint64_t result;
1957
1958 // Unpack floating-point operand optionally with flush-to-zero:
1959 fp16_unpack(&sgn, &exp, &mnt, op, mode, &flags);
1960
1961 if (exp == 31 && !fpscr.ahp && (uint16_t)(mnt << 6)) {
1962 if (fpscr.dn) {
1963 result = fp64_defaultNaN();
1964 } else {
1965 result = fp64_FPConvertNaN_16(op);
1966 }
1967 if (!(mnt >> 9 & 1)) {
1968 flags |= FPLIB_IOC;
1969 }
1970 } else if (exp == 31 && !fpscr.ahp) {
1971 result = fp64_infinity(sgn);
1972 } else if (!mnt) {
1973 result = fp64_zero(sgn);
1974 } else {
1975 mnt = fp16_normalise(mnt, &exp);
1976 result = fp64_pack(sgn, exp - 15 + 1023 + 5, (uint64_t)mnt << 37);
1977 }
1978
1979 set_fpscr0(fpscr, flags);
1980
1981 return result;
1982 }
1983
1984 template <>
1985 uint64_t
1986 fplibConvert(uint32_t op, FPRounding rounding, FPSCR &fpscr)
1987 {
1988 int mode = modeConv(fpscr);
1989 int flags = 0;
1990 int sgn, exp;
1991 uint32_t mnt;
1992 uint64_t result;
1993
1994 // Unpack floating-point operand optionally with flush-to-zero:
1995 fp32_unpack(&sgn, &exp, &mnt, op, mode, &flags);
1996
1997 if (exp == 255 && (uint32_t)(mnt << 9)) {
1998 if (fpscr.dn) {
1999 result = fp64_defaultNaN();
2000 } else {
2001 result = fp64_FPConvertNaN_32(op);
2002 }
2003 if (!(mnt >> 22 & 1)) {
2004 flags |= FPLIB_IOC;
2005 }
2006 } else if (exp == 255) {
2007 result = fp64_infinity(sgn);
2008 } else if (!mnt) {
2009 result = fp64_zero(sgn);
2010 } else {
2011 mnt = fp32_normalise(mnt, &exp);
2012 result = fp64_pack(sgn, exp - 127 + 1023 + 8, (uint64_t)mnt << 21);
2013 }
2014
2015 set_fpscr0(fpscr, flags);
2016
2017 return result;
2018 }
2019
2020 template <>
2021 uint32_t
2022 fplibMulAdd(uint32_t addend, uint32_t op1, uint32_t op2, FPSCR &fpscr)
2023 {
2024 int flags = 0;
2025 uint32_t result = fp32_muladd(addend, op1, op2, 0, modeConv(fpscr), &flags);
2026 set_fpscr0(fpscr, flags);
2027 return result;
2028 }
2029
2030 template <>
2031 uint64_t
2032 fplibMulAdd(uint64_t addend, uint64_t op1, uint64_t op2, FPSCR &fpscr)
2033 {
2034 int flags = 0;
2035 uint64_t result = fp64_muladd(addend, op1, op2, 0, modeConv(fpscr), &flags);
2036 set_fpscr0(fpscr, flags);
2037 return result;
2038 }
2039
2040 template <>
2041 uint32_t
2042 fplibDiv(uint32_t op1, uint32_t op2, FPSCR &fpscr)
2043 {
2044 int flags = 0;
2045 uint32_t result = fp32_div(op1, op2, modeConv(fpscr), &flags);
2046 set_fpscr0(fpscr, flags);
2047 return result;
2048 }
2049
2050 template <>
2051 uint64_t
2052 fplibDiv(uint64_t op1, uint64_t op2, FPSCR &fpscr)
2053 {
2054 int flags = 0;
2055 uint64_t result = fp64_div(op1, op2, modeConv(fpscr), &flags);
2056 set_fpscr0(fpscr, flags);
2057 return result;
2058 }
2059
2060 static uint32_t
2061 fp32_repack(int sgn, int exp, uint32_t mnt)
2062 {
2063 return fp32_pack(sgn, mnt >> 23 ? exp : 0, mnt);
2064 }
2065
2066 static uint64_t
2067 fp64_repack(int sgn, int exp, uint64_t mnt)
2068 {
2069 return fp64_pack(sgn, mnt >> 52 ? exp : 0, mnt);
2070 }
2071
2072 static void
2073 fp32_minmaxnum(uint32_t *op1, uint32_t *op2, int sgn)
2074 {
2075 // Treat a single quiet-NaN as +Infinity/-Infinity
2076 if (!((uint32_t)~(*op1 << 1) >> 23) && (uint32_t)~(*op2 << 1) >> 23)
2077 *op1 = fp32_infinity(sgn);
2078 if (!((uint32_t)~(*op2 << 1) >> 23) && (uint32_t)~(*op1 << 1) >> 23)
2079 *op2 = fp32_infinity(sgn);
2080 }
2081
2082 static void
2083 fp64_minmaxnum(uint64_t *op1, uint64_t *op2, int sgn)
2084 {
2085 // Treat a single quiet-NaN as +Infinity/-Infinity
2086 if (!((uint64_t)~(*op1 << 1) >> 52) && (uint64_t)~(*op2 << 1) >> 52)
2087 *op1 = fp64_infinity(sgn);
2088 if (!((uint64_t)~(*op2 << 1) >> 52) && (uint64_t)~(*op1 << 1) >> 52)
2089 *op2 = fp64_infinity(sgn);
2090 }
2091
2092 template <>
2093 uint32_t
2094 fplibMax(uint32_t op1, uint32_t op2, FPSCR &fpscr)
2095 {
2096 int mode = modeConv(fpscr);
2097 int flags = 0;
2098 int sgn1, exp1, sgn2, exp2;
2099 uint32_t mnt1, mnt2, x, result;
2100
2101 fp32_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
2102 fp32_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
2103
2104 if ((x = fp32_process_NaNs(op1, op2, mode, &flags))) {
2105 result = x;
2106 } else {
2107 result = ((sgn1 != sgn2 ? sgn2 : sgn1 ^ (op1 > op2)) ?
2108 fp32_repack(sgn1, exp1, mnt1) :
2109 fp32_repack(sgn2, exp2, mnt2));
2110 }
2111 set_fpscr0(fpscr, flags);
2112 return result;
2113 }
2114
2115 template <>
2116 uint64_t
2117 fplibMax(uint64_t op1, uint64_t op2, FPSCR &fpscr)
2118 {
2119 int mode = modeConv(fpscr);
2120 int flags = 0;
2121 int sgn1, exp1, sgn2, exp2;
2122 uint64_t mnt1, mnt2, x, result;
2123
2124 fp64_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
2125 fp64_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
2126
2127 if ((x = fp64_process_NaNs(op1, op2, mode, &flags))) {
2128 result = x;
2129 } else {
2130 result = ((sgn1 != sgn2 ? sgn2 : sgn1 ^ (op1 > op2)) ?
2131 fp64_repack(sgn1, exp1, mnt1) :
2132 fp64_repack(sgn2, exp2, mnt2));
2133 }
2134 set_fpscr0(fpscr, flags);
2135 return result;
2136 }
2137
2138 template <>
2139 uint32_t
2140 fplibMaxNum(uint32_t op1, uint32_t op2, FPSCR &fpscr)
2141 {
2142 fp32_minmaxnum(&op1, &op2, 1);
2143 return fplibMax<uint32_t>(op1, op2, fpscr);
2144 }
2145
2146 template <>
2147 uint64_t
2148 fplibMaxNum(uint64_t op1, uint64_t op2, FPSCR &fpscr)
2149 {
2150 fp64_minmaxnum(&op1, &op2, 1);
2151 return fplibMax<uint64_t>(op1, op2, fpscr);
2152 }
2153
2154 template <>
2155 uint32_t
2156 fplibMin(uint32_t op1, uint32_t op2, FPSCR &fpscr)
2157 {
2158 int mode = modeConv(fpscr);
2159 int flags = 0;
2160 int sgn1, exp1, sgn2, exp2;
2161 uint32_t mnt1, mnt2, x, result;
2162
2163 fp32_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
2164 fp32_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
2165
2166 if ((x = fp32_process_NaNs(op1, op2, mode, &flags))) {
2167 result = x;
2168 } else {
2169 result = ((sgn1 != sgn2 ? sgn1 : sgn1 ^ (op1 < op2)) ?
2170 fp32_repack(sgn1, exp1, mnt1) :
2171 fp32_repack(sgn2, exp2, mnt2));
2172 }
2173 set_fpscr0(fpscr, flags);
2174 return result;
2175 }
2176
2177 template <>
2178 uint64_t
2179 fplibMin(uint64_t op1, uint64_t op2, FPSCR &fpscr)
2180 {
2181 int mode = modeConv(fpscr);
2182 int flags = 0;
2183 int sgn1, exp1, sgn2, exp2;
2184 uint64_t mnt1, mnt2, x, result;
2185
2186 fp64_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
2187 fp64_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
2188
2189 if ((x = fp64_process_NaNs(op1, op2, mode, &flags))) {
2190 result = x;
2191 } else {
2192 result = ((sgn1 != sgn2 ? sgn1 : sgn1 ^ (op1 < op2)) ?
2193 fp64_repack(sgn1, exp1, mnt1) :
2194 fp64_repack(sgn2, exp2, mnt2));
2195 }
2196 set_fpscr0(fpscr, flags);
2197 return result;
2198 }
2199
2200 template <>
2201 uint32_t
2202 fplibMinNum(uint32_t op1, uint32_t op2, FPSCR &fpscr)
2203 {
2204 fp32_minmaxnum(&op1, &op2, 0);
2205 return fplibMin<uint32_t>(op1, op2, fpscr);
2206 }
2207
2208 template <>
2209 uint64_t
2210 fplibMinNum(uint64_t op1, uint64_t op2, FPSCR &fpscr)
2211 {
2212 fp64_minmaxnum(&op1, &op2, 0);
2213 return fplibMin<uint64_t>(op1, op2, fpscr);
2214 }
2215
2216 template <>
2217 uint32_t
2218 fplibMul(uint32_t op1, uint32_t op2, FPSCR &fpscr)
2219 {
2220 int flags = 0;
2221 uint32_t result = fp32_mul(op1, op2, modeConv(fpscr), &flags);
2222 set_fpscr0(fpscr, flags);
2223 return result;
2224 }
2225
2226 template <>
2227 uint64_t
2228 fplibMul(uint64_t op1, uint64_t op2, FPSCR &fpscr)
2229 {
2230 int flags = 0;
2231 uint64_t result = fp64_mul(op1, op2, modeConv(fpscr), &flags);
2232 set_fpscr0(fpscr, flags);
2233 return result;
2234 }
2235
2236 template <>
2237 uint32_t
2238 fplibMulX(uint32_t op1, uint32_t op2, FPSCR &fpscr)
2239 {
2240 int mode = modeConv(fpscr);
2241 int flags = 0;
2242 int sgn1, exp1, sgn2, exp2;
2243 uint32_t mnt1, mnt2, result;
2244
2245 fp32_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
2246 fp32_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
2247
2248 result = fp32_process_NaNs(op1, op2, mode, &flags);
2249 if (!result) {
2250 if ((exp1 == 255 && !mnt2) || (exp2 == 255 && !mnt1)) {
2251 result = fp32_FPTwo(sgn1 ^ sgn2);
2252 } else if (exp1 == 255 || exp2 == 255) {
2253 result = fp32_infinity(sgn1 ^ sgn2);
2254 } else if (!mnt1 || !mnt2) {
2255 result = fp32_zero(sgn1 ^ sgn2);
2256 } else {
2257 result = fp32_mul(op1, op2, mode, &flags);
2258 }
2259 }
2260
2261 set_fpscr0(fpscr, flags);
2262
2263 return result;
2264 }
2265
2266 template <>
2267 uint64_t
2268 fplibMulX(uint64_t op1, uint64_t op2, FPSCR &fpscr)
2269 {
2270 int mode = modeConv(fpscr);
2271 int flags = 0;
2272 int sgn1, exp1, sgn2, exp2;
2273 uint64_t mnt1, mnt2, result;
2274
2275 fp64_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
2276 fp64_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
2277
2278 result = fp64_process_NaNs(op1, op2, mode, &flags);
2279 if (!result) {
2280 if ((exp1 == 2047 && !mnt2) || (exp2 == 2047 && !mnt1)) {
2281 result = fp64_FPTwo(sgn1 ^ sgn2);
2282 } else if (exp1 == 2047 || exp2 == 2047) {
2283 result = fp64_infinity(sgn1 ^ sgn2);
2284 } else if (!mnt1 || !mnt2) {
2285 result = fp64_zero(sgn1 ^ sgn2);
2286 } else {
2287 result = fp64_mul(op1, op2, mode, &flags);
2288 }
2289 }
2290
2291 set_fpscr0(fpscr, flags);
2292
2293 return result;
2294 }
2295
2296 template <>
2297 uint32_t
2298 fplibNeg(uint32_t op)
2299 {
2300 return op ^ (uint32_t)1 << 31;
2301 }
2302
2303 template <>
2304 uint64_t
2305 fplibNeg(uint64_t op)
2306 {
2307 return op ^ (uint64_t)1 << 63;
2308 }
2309
2310 static const uint8_t recip_sqrt_estimate[256] = {
2311 255, 253, 251, 249, 247, 245, 243, 242, 240, 238, 236, 234, 233, 231, 229, 228,
2312 226, 224, 223, 221, 219, 218, 216, 215, 213, 212, 210, 209, 207, 206, 204, 203,
2313 201, 200, 198, 197, 196, 194, 193, 192, 190, 189, 188, 186, 185, 184, 183, 181,
2314 180, 179, 178, 176, 175, 174, 173, 172, 170, 169, 168, 167, 166, 165, 164, 163,
2315 162, 160, 159, 158, 157, 156, 155, 154, 153, 152, 151, 150, 149, 148, 147, 146,
2316 145, 144, 143, 142, 141, 140, 140, 139, 138, 137, 136, 135, 134, 133, 132, 131,
2317 131, 130, 129, 128, 127, 126, 126, 125, 124, 123, 122, 121, 121, 120, 119, 118,
2318 118, 117, 116, 115, 114, 114, 113, 112, 111, 111, 110, 109, 109, 108, 107, 106,
2319 105, 104, 103, 101, 100, 99, 97, 96, 95, 93, 92, 91, 90, 88, 87, 86,
2320 85, 84, 82, 81, 80, 79, 78, 77, 76, 75, 74, 72, 71, 70, 69, 68,
2321 67, 66, 65, 64, 63, 62, 61, 60, 60, 59, 58, 57, 56, 55, 54, 53,
2322 52, 51, 51, 50, 49, 48, 47, 46, 46, 45, 44, 43, 42, 42, 41, 40,
2323 39, 38, 38, 37, 36, 35, 35, 34, 33, 33, 32, 31, 30, 30, 29, 28,
2324 28, 27, 26, 26, 25, 24, 24, 23, 22, 22, 21, 20, 20, 19, 19, 18,
2325 17, 17, 16, 16, 15, 14, 14, 13, 13, 12, 11, 11, 10, 10, 9, 9,
2326 8, 8, 7, 6, 6, 5, 5, 4, 4, 3, 3, 2, 2, 1, 1, 0
2327 };
2328
2329 template <>
2330 uint32_t
2331 fplibRSqrtEstimate(uint32_t op, FPSCR &fpscr)
2332 {
2333 int mode = modeConv(fpscr);
2334 int flags = 0;
2335 int sgn, exp;
2336 uint32_t mnt, result;
2337
2338 fp32_unpack(&sgn, &exp, &mnt, op, mode, &flags);
2339
2340 if (exp == 255 && (uint32_t)(mnt << 9)) {
2341 result = fp32_process_NaN(op, mode, &flags);
2342 } else if (!mnt) {
2343 result = fp32_infinity(sgn);
2344 flags |= FPLIB_DZC;
2345 } else if (sgn) {
2346 result = fp32_defaultNaN();
2347 flags |= FPLIB_IOC;
2348 } else if (exp == 255) {
2349 result = fp32_zero(0);
2350 } else {
2351 exp += 8;
2352 mnt = fp32_normalise(mnt, &exp);
2353 mnt = recip_sqrt_estimate[(~exp & 1) << 7 | (mnt >> 24 & 127)];
2354 result = fp32_pack(0, (380 - exp) >> 1, mnt << 15);
2355 }
2356
2357 set_fpscr0(fpscr, flags);
2358
2359 return result;
2360 }
2361
2362 template <>
2363 uint64_t
2364 fplibRSqrtEstimate(uint64_t op, FPSCR &fpscr)
2365 {
2366 int mode = modeConv(fpscr);
2367 int flags = 0;
2368 int sgn, exp;
2369 uint64_t mnt, result;
2370
2371 fp64_unpack(&sgn, &exp, &mnt, op, mode, &flags);
2372
2373 if (exp == 2047 && (uint64_t)(mnt << 12)) {
2374 result = fp64_process_NaN(op, mode, &flags);
2375 } else if (!mnt) {
2376 result = fp64_infinity(sgn);
2377 flags |= FPLIB_DZC;
2378 } else if (sgn) {
2379 result = fp64_defaultNaN();
2380 flags |= FPLIB_IOC;
2381 } else if (exp == 2047) {
2382 result = fp32_zero(0);
2383 } else {
2384 exp += 11;
2385 mnt = fp64_normalise(mnt, &exp);
2386 mnt = recip_sqrt_estimate[(~exp & 1) << 7 | (mnt >> 56 & 127)];
2387 result = fp64_pack(0, (3068 - exp) >> 1, mnt << 44);
2388 }
2389
2390 set_fpscr0(fpscr, flags);
2391
2392 return result;
2393 }
2394
2395 template <>
2396 uint32_t
2397 fplibRSqrtStepFused(uint32_t op1, uint32_t op2, FPSCR &fpscr)
2398 {
2399 int mode = modeConv(fpscr);
2400 int flags = 0;
2401 int sgn1, exp1, sgn2, exp2;
2402 uint32_t mnt1, mnt2, result;
2403
2404 op1 = fplibNeg<uint32_t>(op1);
2405 fp32_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
2406 fp32_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
2407
2408 result = fp32_process_NaNs(op1, op2, mode, &flags);
2409 if (!result) {
2410 if ((exp1 == 255 && !mnt2) || (exp2 == 255 && !mnt1)) {
2411 result = fp32_FPOnePointFive(0);
2412 } else if (exp1 == 255 || exp2 == 255) {
2413 result = fp32_infinity(sgn1 ^ sgn2);
2414 } else {
2415 result = fp32_muladd(fp32_FPThree(0), op1, op2, -1, mode, &flags);
2416 }
2417 }
2418
2419 set_fpscr0(fpscr, flags);
2420
2421 return result;
2422 }
2423
2424 template <>
2425 uint64_t
2426 fplibRSqrtStepFused(uint64_t op1, uint64_t op2, FPSCR &fpscr)
2427 {
2428 int mode = modeConv(fpscr);
2429 int flags = 0;
2430 int sgn1, exp1, sgn2, exp2;
2431 uint64_t mnt1, mnt2, result;
2432
2433 op1 = fplibNeg<uint64_t>(op1);
2434 fp64_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
2435 fp64_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
2436
2437 result = fp64_process_NaNs(op1, op2, mode, &flags);
2438 if (!result) {
2439 if ((exp1 == 2047 && !mnt2) || (exp2 == 2047 && !mnt1)) {
2440 result = fp64_FPOnePointFive(0);
2441 } else if (exp1 == 2047 || exp2 == 2047) {
2442 result = fp64_infinity(sgn1 ^ sgn2);
2443 } else {
2444 result = fp64_muladd(fp64_FPThree(0), op1, op2, -1, mode, &flags);
2445 }
2446 }
2447
2448 set_fpscr0(fpscr, flags);
2449
2450 return result;
2451 }
2452
2453 template <>
2454 uint32_t
2455 fplibRecipStepFused(uint32_t op1, uint32_t op2, FPSCR &fpscr)
2456 {
2457 int mode = modeConv(fpscr);
2458 int flags = 0;
2459 int sgn1, exp1, sgn2, exp2;
2460 uint32_t mnt1, mnt2, result;
2461
2462 op1 = fplibNeg<uint32_t>(op1);
2463 fp32_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
2464 fp32_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
2465
2466 result = fp32_process_NaNs(op1, op2, mode, &flags);
2467 if (!result) {
2468 if ((exp1 == 255 && !mnt2) || (exp2 == 255 && !mnt1)) {
2469 result = fp32_FPTwo(0);
2470 } else if (exp1 == 255 || exp2 == 255) {
2471 result = fp32_infinity(sgn1 ^ sgn2);
2472 } else {
2473 result = fp32_muladd(fp32_FPTwo(0), op1, op2, 0, mode, &flags);
2474 }
2475 }
2476
2477 set_fpscr0(fpscr, flags);
2478
2479 return result;
2480 }
2481
2482 template <>
2483 uint32_t
2484 fplibRecipEstimate(uint32_t op, FPSCR &fpscr)
2485 {
2486 int mode = modeConv(fpscr);
2487 int flags = 0;
2488 int sgn, exp;
2489 uint32_t mnt, result;
2490
2491 fp32_unpack(&sgn, &exp, &mnt, op, mode, &flags);
2492
2493 if (exp == 255 && (uint32_t)(mnt << 9)) {
2494 result = fp32_process_NaN(op, mode, &flags);
2495 } else if (exp == 255) {
2496 result = fp32_zero(sgn);
2497 } else if (!mnt) {
2498 result = fp32_infinity(sgn);
2499 flags |= FPLIB_DZC;
2500 } else if (!((uint32_t)(op << 1) >> 22)) {
2501 bool overflow_to_inf = false;
2502 switch (FPCRRounding(fpscr)) {
2503 case FPRounding_TIEEVEN:
2504 overflow_to_inf = true;
2505 break;
2506 case FPRounding_POSINF:
2507 overflow_to_inf = !sgn;
2508 break;
2509 case FPRounding_NEGINF:
2510 overflow_to_inf = sgn;
2511 break;
2512 case FPRounding_ZERO:
2513 overflow_to_inf = false;
2514 break;
2515 default:
2516 assert(0);
2517 }
2518 result = overflow_to_inf ? fp32_infinity(sgn) : fp32_max_normal(sgn);
2519 flags |= FPLIB_OFC | FPLIB_IXC;
2520 } else if (fpscr.fz && exp >= 253) {
2521 result = fp32_zero(sgn);
2522 flags |= FPLIB_UFC;
2523 } else {
2524 exp += 8;
2525 mnt = fp32_normalise(mnt, &exp);
2526 int result_exp = 253 - exp;
2527 uint32_t fraction = (((uint32_t)1 << 19) / (mnt >> 22 | 1) + 1) >> 1;
2528 fraction <<= 15;
2529 if (result_exp == 0) {
2530 fraction >>= 1;
2531 } else if (result_exp == -1) {
2532 fraction >>= 2;
2533 result_exp = 0;
2534 }
2535 result = fp32_pack(sgn, result_exp, fraction);
2536 }
2537
2538 set_fpscr0(fpscr, flags);
2539
2540 return result;
2541 }
2542
2543 template <>
2544 uint64_t
2545 fplibRecipEstimate(uint64_t op, FPSCR &fpscr)
2546 {
2547 int mode = modeConv(fpscr);
2548 int flags = 0;
2549 int sgn, exp;
2550 uint64_t mnt, result;
2551
2552 fp64_unpack(&sgn, &exp, &mnt, op, mode, &flags);
2553
2554 if (exp == 2047 && (uint64_t)(mnt << 12)) {
2555 result = fp64_process_NaN(op, mode, &flags);
2556 } else if (exp == 2047) {
2557 result = fp64_zero(sgn);
2558 } else if (!mnt) {
2559 result = fp64_infinity(sgn);
2560 flags |= FPLIB_DZC;
2561 } else if (!((uint64_t)(op << 1) >> 51)) {
2562 bool overflow_to_inf = false;
2563 switch (FPCRRounding(fpscr)) {
2564 case FPRounding_TIEEVEN:
2565 overflow_to_inf = true;
2566 break;
2567 case FPRounding_POSINF:
2568 overflow_to_inf = !sgn;
2569 break;
2570 case FPRounding_NEGINF:
2571 overflow_to_inf = sgn;
2572 break;
2573 case FPRounding_ZERO:
2574 overflow_to_inf = false;
2575 break;
2576 default:
2577 assert(0);
2578 }
2579 result = overflow_to_inf ? fp64_infinity(sgn) : fp64_max_normal(sgn);
2580 flags |= FPLIB_OFC | FPLIB_IXC;
2581 } else if (fpscr.fz && exp >= 2045) {
2582 result = fp64_zero(sgn);
2583 flags |= FPLIB_UFC;
2584 } else {
2585 exp += 11;
2586 mnt = fp64_normalise(mnt, &exp);
2587 int result_exp = 2045 - exp;
2588 uint64_t fraction = (((uint32_t)1 << 19) / (mnt >> 54 | 1) + 1) >> 1;
2589 fraction <<= 44;
2590 if (result_exp == 0) {
2591 fraction >>= 1;
2592 } else if (result_exp == -1) {
2593 fraction >>= 2;
2594 result_exp = 0;
2595 }
2596 result = fp64_pack(sgn, result_exp, fraction);
2597 }
2598
2599 set_fpscr0(fpscr, flags);
2600
2601 return result;
2602 }
2603
2604 template <>
2605 uint64_t
2606 fplibRecipStepFused(uint64_t op1, uint64_t op2, FPSCR &fpscr)
2607 {
2608 int mode = modeConv(fpscr);
2609 int flags = 0;
2610 int sgn1, exp1, sgn2, exp2;
2611 uint64_t mnt1, mnt2, result;
2612
2613 op1 = fplibNeg<uint64_t>(op1);
2614 fp64_unpack(&sgn1, &exp1, &mnt1, op1, mode, &flags);
2615 fp64_unpack(&sgn2, &exp2, &mnt2, op2, mode, &flags);
2616
2617 result = fp64_process_NaNs(op1, op2, mode, &flags);
2618 if (!result) {
2619 if ((exp1 == 2047 && !mnt2) || (exp2 == 2047 && !mnt1)) {
2620 result = fp64_FPTwo(0);
2621 } else if (exp1 == 2047 || exp2 == 2047) {
2622 result = fp64_infinity(sgn1 ^ sgn2);
2623 } else {
2624 result = fp64_muladd(fp64_FPTwo(0), op1, op2, 0, mode, &flags);
2625 }
2626 }
2627
2628 set_fpscr0(fpscr, flags);
2629
2630 return result;
2631 }
2632
2633 template <>
2634 uint32_t
2635 fplibRecpX(uint32_t op, FPSCR &fpscr)
2636 {
2637 int mode = modeConv(fpscr);
2638 int flags = 0;
2639 int sgn, exp;
2640 uint32_t mnt, result;
2641
2642 fp32_unpack(&sgn, &exp, &mnt, op, mode, &flags);
2643
2644 if (exp == 255 && (uint32_t)(mnt << 9)) {
2645 result = fp32_process_NaN(op, mode, &flags);
2646 }
2647 else {
2648 if (!mnt) { // Zero and denormals
2649 result = fp32_pack(sgn, 254, 0);
2650 } else { // Infinities and normals
2651 result = fp32_pack(sgn, exp ^ 255, 0);
2652 }
2653 }
2654
2655 set_fpscr0(fpscr, flags);
2656
2657 return result;
2658 }
2659
2660 template <>
2661 uint64_t
2662 fplibRecpX(uint64_t op, FPSCR &fpscr)
2663 {
2664 int mode = modeConv(fpscr);
2665 int flags = 0;
2666 int sgn, exp;
2667 uint64_t mnt, result;
2668
2669 fp64_unpack(&sgn, &exp, &mnt, op, mode, &flags);
2670
2671 if (exp == 2047 && (uint64_t)(mnt << 12)) {
2672 result = fp64_process_NaN(op, mode, &flags);
2673 }
2674 else {
2675 if (!mnt) { // Zero and denormals
2676 result = fp64_pack(sgn, 2046, 0);
2677 } else { // Infinities and normals
2678 result = fp64_pack(sgn, exp ^ 2047, 0);
2679 }
2680 }
2681
2682 set_fpscr0(fpscr, flags);
2683
2684 return result;
2685 }
2686
2687 template <>
2688 uint32_t
2689 fplibRoundInt(uint32_t op, FPRounding rounding, bool exact, FPSCR &fpscr)
2690 {
2691 int mode = modeConv(fpscr);
2692 int flags = 0;
2693 int sgn, exp;
2694 uint32_t mnt, result;
2695
2696 // Unpack using FPCR to determine if subnormals are flushed-to-zero:
2697 fp32_unpack(&sgn, &exp, &mnt, op, mode, &flags);
2698
2699 // Handle NaNs, infinities and zeroes:
2700 if (exp == 255 && (uint32_t)(mnt << 9)) {
2701 result = fp32_process_NaN(op, mode, &flags);
2702 } else if (exp == 255) {
2703 result = fp32_infinity(sgn);
2704 } else if (!mnt) {
2705 result = fp32_zero(sgn);
2706 } else if (exp >= 150) {
2707 // There are no fractional bits
2708 result = op;
2709 } else {
2710 // Truncate towards zero:
2711 uint32_t x = 150 - exp >= 32 ? 0 : mnt >> (150 - exp);
2712 int err = exp < 118 ? 1 :
2713 (mnt << 1 >> (149 - exp) & 3) | (mnt << 2 << (exp - 118) != 0);
2714 switch (rounding) {
2715 case FPRounding_TIEEVEN:
2716 x += (err == 3 || (err == 2 && (x & 1)));
2717 break;
2718 case FPRounding_POSINF:
2719 x += err && !sgn;
2720 break;
2721 case FPRounding_NEGINF:
2722 x += err && sgn;
2723 break;
2724 case FPRounding_ZERO:
2725 break;
2726 case FPRounding_TIEAWAY:
2727 x += err >> 1;
2728 break;
2729 default:
2730 assert(0);
2731 }
2732
2733 if (x == 0) {
2734 result = fp32_zero(sgn);
2735 } else {
2736 exp = 150;
2737 mnt = fp32_normalise(x, &exp);
2738 result = fp32_pack(sgn, exp + 8, mnt >> 8);
2739 }
2740
2741 if (err && exact)
2742 flags |= FPLIB_IXC;
2743 }
2744
2745 set_fpscr0(fpscr, flags);
2746
2747 return result;
2748 }
2749
2750 template <>
2751 uint64_t
2752 fplibRoundInt(uint64_t op, FPRounding rounding, bool exact, FPSCR &fpscr)
2753 {
2754 int mode = modeConv(fpscr);
2755 int flags = 0;
2756 int sgn, exp;
2757 uint64_t mnt, result;
2758
2759 // Unpack using FPCR to determine if subnormals are flushed-to-zero:
2760 fp64_unpack(&sgn, &exp, &mnt, op, mode, &flags);
2761
2762 // Handle NaNs, infinities and zeroes:
2763 if (exp == 2047 && (uint64_t)(mnt << 12)) {
2764 result = fp64_process_NaN(op, mode, &flags);
2765 } else if (exp == 2047) {
2766 result = fp64_infinity(sgn);
2767 } else if (!mnt) {
2768 result = fp64_zero(sgn);
2769 } else if (exp >= 1075) {
2770 // There are no fractional bits
2771 result = op;
2772 } else {
2773 // Truncate towards zero:
2774 uint64_t x = 1075 - exp >= 64 ? 0 : mnt >> (1075 - exp);
2775 int err = exp < 1011 ? 1 :
2776 (mnt << 1 >> (1074 - exp) & 3) | (mnt << 2 << (exp - 1011) != 0);
2777 switch (rounding) {
2778 case FPRounding_TIEEVEN:
2779 x += (err == 3 || (err == 2 && (x & 1)));
2780 break;
2781 case FPRounding_POSINF:
2782 x += err && !sgn;
2783 break;
2784 case FPRounding_NEGINF:
2785 x += err && sgn;
2786 break;
2787 case FPRounding_ZERO:
2788 break;
2789 case FPRounding_TIEAWAY:
2790 x += err >> 1;
2791 break;
2792 default:
2793 assert(0);
2794 }
2795
2796 if (x == 0) {
2797 result = fp64_zero(sgn);
2798 } else {
2799 exp = 1075;
2800 mnt = fp64_normalise(x, &exp);
2801 result = fp64_pack(sgn, exp + 11, mnt >> 11);
2802 }
2803
2804 if (err && exact)
2805 flags |= FPLIB_IXC;
2806 }
2807
2808 set_fpscr0(fpscr, flags);
2809
2810 return result;
2811 }
2812
2813 template <>
2814 uint32_t
2815 fplibSqrt(uint32_t op, FPSCR &fpscr)
2816 {
2817 int flags = 0;
2818 uint32_t result = fp32_sqrt(op, modeConv(fpscr), &flags);
2819 set_fpscr0(fpscr, flags);
2820 return result;
2821 }
2822
2823 template <>
2824 uint64_t
2825 fplibSqrt(uint64_t op, FPSCR &fpscr)
2826 {
2827 int flags = 0;
2828 uint64_t result = fp64_sqrt(op, modeConv(fpscr), &flags);
2829 set_fpscr0(fpscr, flags);
2830 return result;
2831 }
2832
2833 template <>
2834 uint32_t
2835 fplibSub(uint32_t op1, uint32_t op2, FPSCR &fpscr)
2836 {
2837 int flags = 0;
2838 uint32_t result = fp32_add(op1, op2, 1, modeConv(fpscr), &flags);
2839 set_fpscr0(fpscr, flags);
2840 return result;
2841 }
2842
2843 template <>
2844 uint64_t
2845 fplibSub(uint64_t op1, uint64_t op2, FPSCR &fpscr)
2846 {
2847 int flags = 0;
2848 uint64_t result = fp64_add(op1, op2, 1, modeConv(fpscr), &flags);
2849 set_fpscr0(fpscr, flags);
2850 return result;
2851 }
2852
2853 static uint64_t
2854 FPToFixed_64(int sgn, int exp, uint64_t mnt, bool u, FPRounding rounding,
2855 int *flags)
2856 {
2857 uint64_t x;
2858 int err;
2859
2860 if (exp > 1023 + 63) {
2861 *flags = FPLIB_IOC;
2862 return ((uint64_t)!u << 63) - !sgn;
2863 }
2864
2865 x = lsr64(mnt << 11, 1023 + 63 - exp);
2866 err = (exp > 1023 + 63 - 2 ? 0 :
2867 (lsr64(mnt << 11, 1023 + 63 - 2 - exp) & 3) |
2868 !!(mnt << 11 & (lsl64(1, 1023 + 63 - 2 - exp) - 1)));
2869
2870 switch (rounding) {
2871 case FPRounding_TIEEVEN:
2872 x += (err == 3 || (err == 2 && (x & 1)));
2873 break;
2874 case FPRounding_POSINF:
2875 x += err && !sgn;
2876 break;
2877 case FPRounding_NEGINF:
2878 x += err && sgn;
2879 break;
2880 case FPRounding_ZERO:
2881 break;
2882 case FPRounding_TIEAWAY:
2883 x += err >> 1;
2884 break;
2885 default:
2886 assert(0);
2887 }
2888
2889 if (u ? sgn && x : x > ((uint64_t)1 << 63) - !sgn) {
2890 *flags = FPLIB_IOC;
2891 return ((uint64_t)!u << 63) - !sgn;
2892 }
2893
2894 if (err) {
2895 *flags = FPLIB_IXC;
2896 }
2897
2898 return sgn ? -x : x;
2899 }
2900
2901 static uint32_t
2902 FPToFixed_32(int sgn, int exp, uint64_t mnt, bool u, FPRounding rounding,
2903 int *flags)
2904 {
2905 uint64_t x = FPToFixed_64(sgn, exp, mnt, u, rounding, flags);
2906 if (u ? x >= (uint64_t)1 << 32 :
2907 !(x < (uint64_t)1 << 31 ||
2908 (uint64_t)-x <= (uint64_t)1 << 31)) {
2909 *flags = FPLIB_IOC;
2910 x = ((uint32_t)!u << 31) - !sgn;
2911 }
2912 return x;
2913 }
2914
2915 template <>
2916 uint32_t
2917 fplibFPToFixed(uint32_t op, int fbits, bool u, FPRounding rounding, FPSCR &fpscr)
2918 {
2919 int flags = 0;
2920 int sgn, exp;
2921 uint32_t mnt, result;
2922
2923 // Unpack using FPCR to determine if subnormals are flushed-to-zero:
2924 fp32_unpack(&sgn, &exp, &mnt, op, modeConv(fpscr), &flags);
2925
2926 // If NaN, set cumulative flag or take exception:
2927 if (exp == 255 && (uint32_t)(mnt << 9)) {
2928 flags = FPLIB_IOC;
2929 result = 0;
2930 } else {
2931 result = FPToFixed_32(sgn, exp + 1023 - 127 + fbits,
2932 (uint64_t)mnt << (52 - 23), u, rounding, &flags);
2933 }
2934
2935 set_fpscr0(fpscr, flags);
2936
2937 return result;
2938 }
2939
2940 template <>
2941 uint32_t
2942 fplibFPToFixed(uint64_t op, int fbits, bool u, FPRounding rounding, FPSCR &fpscr)
2943 {
2944 int flags = 0;
2945 int sgn, exp;
2946 uint64_t mnt;
2947 uint32_t result;
2948
2949 // Unpack using FPCR to determine if subnormals are flushed-to-zero:
2950 fp64_unpack(&sgn, &exp, &mnt, op, modeConv(fpscr), &flags);
2951
2952 // If NaN, set cumulative flag or take exception:
2953 if (exp == 2047 && (uint64_t)(mnt << 12)) {
2954 flags = FPLIB_IOC;
2955 result = 0;
2956 } else {
2957 result = FPToFixed_32(sgn, exp + fbits, mnt, u, rounding, &flags);
2958 }
2959
2960 set_fpscr0(fpscr, flags);
2961
2962 return result;
2963 }
2964
2965 template <>
2966 uint64_t
2967 fplibFPToFixed(uint32_t op, int fbits, bool u, FPRounding rounding, FPSCR &fpscr)
2968 {
2969 int flags = 0;
2970 int sgn, exp;
2971 uint32_t mnt;
2972 uint64_t result;
2973
2974 // Unpack using FPCR to determine if subnormals are flushed-to-zero:
2975 fp32_unpack(&sgn, &exp, &mnt, op, modeConv(fpscr), &flags);
2976
2977 // If NaN, set cumulative flag or take exception:
2978 if (exp == 255 && (uint32_t)(mnt << 9)) {
2979 flags = FPLIB_IOC;
2980 result = 0;
2981 } else {
2982 result = FPToFixed_64(sgn, exp + 1023 - 127 + fbits,
2983 (uint64_t)mnt << (52 - 23), u, rounding, &flags);
2984 }
2985
2986 set_fpscr0(fpscr, flags);
2987
2988 return result;
2989 }
2990
2991 template <>
2992 uint64_t
2993 fplibFPToFixed(uint64_t op, int fbits, bool u, FPRounding rounding, FPSCR &fpscr)
2994 {
2995 int flags = 0;
2996 int sgn, exp;
2997 uint64_t mnt, result;
2998
2999 // Unpack using FPCR to determine if subnormals are flushed-to-zero:
3000 fp64_unpack(&sgn, &exp, &mnt, op, modeConv(fpscr), &flags);
3001
3002 // If NaN, set cumulative flag or take exception:
3003 if (exp == 2047 && (uint64_t)(mnt << 12)) {
3004 flags = FPLIB_IOC;
3005 result = 0;
3006 } else {
3007 result = FPToFixed_64(sgn, exp + fbits, mnt, u, rounding, &flags);
3008 }
3009
3010 set_fpscr0(fpscr, flags);
3011
3012 return result;
3013 }
3014
3015 static uint32_t
3016 fp32_cvtf(uint64_t a, int fbits, int u, int mode, int *flags)
3017 {
3018 int x_sgn = !u && a >> 63;
3019 int x_exp = 190 - fbits;
3020 uint64_t x_mnt = x_sgn ? -a : a;
3021
3022 // Handle zero:
3023 if (!x_mnt) {
3024 return fp32_zero(0);
3025 }
3026
3027 // Normalise and convert to 32 bits, collapsing error into bottom bit:
3028 x_mnt = fp64_normalise(x_mnt, &x_exp);
3029 x_mnt = x_mnt >> 31 | !!(uint32_t)(x_mnt << 1);
3030
3031 return fp32_round(x_sgn, x_exp, x_mnt, mode, flags);
3032 }
3033
3034 static uint64_t
3035 fp64_cvtf(uint64_t a, int fbits, int u, int mode, int *flags)
3036 {
3037 int x_sgn = !u && a >> 63;
3038 int x_exp = 1024 + 62 - fbits;
3039 uint64_t x_mnt = x_sgn ? -a : a;
3040
3041 // Handle zero:
3042 if (!x_mnt) {
3043 return fp64_zero(0);
3044 }
3045
3046 x_mnt = fp64_normalise(x_mnt, &x_exp);
3047
3048 return fp64_round(x_sgn, x_exp, x_mnt << 1, mode, flags);
3049 }
3050
3051 template <>
3052 uint32_t
3053 fplibFixedToFP(uint64_t op, int fbits, bool u, FPRounding rounding, FPSCR &fpscr)
3054 {
3055 int flags = 0;
3056 uint32_t res = fp32_cvtf(op, fbits, u,
3057 (int)rounding | ((uint32_t)fpscr >> 22 & 12),
3058 &flags);
3059 set_fpscr0(fpscr, flags);
3060 return res;
3061 }
3062
3063 template <>
3064 uint64_t
3065 fplibFixedToFP(uint64_t op, int fbits, bool u, FPRounding rounding, FPSCR &fpscr)
3066 {
3067 int flags = 0;
3068 uint64_t res = fp64_cvtf(op, fbits, u,
3069 (int)rounding | ((uint32_t)fpscr >> 22 & 12),
3070 &flags);
3071 set_fpscr0(fpscr, flags);
3072 return res;
3073 }
3074
3075 }