2 * License for Berkeley SoftFloat Release 3e
7 * The following applies to the whole of SoftFloat Release 3e as well as to
8 * each source file individually.
10 * Copyright 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018 The Regents of the
11 * University of California. All rights reserved.
13 * Redistribution and use in source and binary forms, with or without
14 * modification, are permitted provided that the following conditions are met:
16 * 1. Redistributions of source code must retain the above copyright notice,
17 * this list of conditions, and the following disclaimer.
19 * 2. Redistributions in binary form must reproduce the above copyright
20 * notice, this list of conditions, and the following disclaimer in the
21 * documentation and/or other materials provided with the distribution.
23 * 3. Neither the name of the University nor the names of its contributors
24 * may be used to endorse or promote products derived from this software
25 * without specific prior written permission.
27 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
28 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
29 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
30 * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
31 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
32 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
33 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
34 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
35 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
36 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
39 * The functions listed in this file are modified versions of the ones
40 * from the Berkeley SoftFloat 3e Library.
42 * Their implementation correctness has been checked with the Berkeley
43 * TestFloat Release 3e tool for x86_64.
48 #include "softfloat.h"
50 #if defined(BIG_ENDIAN)
52 #define index_word(total, n) ((total) - 1 - (n))
53 #define index_word_hi(total) 0
54 #define index_word_lo(total) ((total) - 1)
55 #define index_multiword_hi(total, n) 0
56 #define index_multiword_lo(total, n) ((total) - (n))
57 #define index_multiword_hi_but(total, n) 0
58 #define index_multiword_lo_but(total, n) (n)
61 #define index_word(total, n) (n)
62 #define index_word_hi(total) ((total) - 1)
63 #define index_word_lo(total) 0
64 #define index_multiword_hi(total, n) ((total) - (n))
65 #define index_multiword_lo(total, n) 0
66 #define index_multiword_hi_but(total, n) (n)
67 #define index_multiword_lo_but(total, n) 0
70 typedef union { double f
; int64_t i
; uint64_t u
; } di_type
;
71 typedef union { float f
; int32_t i
; uint32_t u
; } fi_type
;
73 const uint8_t count_leading_zeros8
[256] = {
74 8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4,
75 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
76 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
77 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
78 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
79 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
80 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
81 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
82 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
83 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
84 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
85 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
86 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
87 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
88 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
89 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
93 * \brief Shifts 'a' right by the number of bits given in 'dist', which must be in
94 * the range 1 to 63. If any nonzero bits are shifted off, they are "jammed"
95 * into the least-significant bit of the shifted value by setting the
96 * least-significant bit to 1. This shifted-and-jammed value is returned.
98 * From softfloat_shortShiftRightJam64()
101 uint64_t _mesa_short_shift_right_jam64(uint64_t a
, uint8_t dist
)
103 return a
>> dist
| ((a
& (((uint64_t) 1 << dist
) - 1)) != 0);
107 * \brief Shifts 'a' right by the number of bits given in 'dist', which must not
108 * be zero. If any nonzero bits are shifted off, they are "jammed" into the
109 * least-significant bit of the shifted value by setting the least-significant
110 * bit to 1. This shifted-and-jammed value is returned.
111 * The value of 'dist' can be arbitrarily large. In particular, if 'dist' is
112 * greater than 64, the result will be either 0 or 1, depending on whether 'a'
113 * is zero or nonzero.
115 * From softfloat_shiftRightJam64()
118 uint64_t _mesa_shift_right_jam64(uint64_t a
, uint32_t dist
)
121 (dist
< 63) ? a
>> dist
| ((uint64_t) (a
<< (-dist
& 63)) != 0) : (a
!= 0);
125 * \brief Shifts 'a' right by the number of bits given in 'dist', which must not be
126 * zero. If any nonzero bits are shifted off, they are "jammed" into the
127 * least-significant bit of the shifted value by setting the least-significant
128 * bit to 1. This shifted-and-jammed value is returned.
129 * The value of 'dist' can be arbitrarily large. In particular, if 'dist' is
130 * greater than 32, the result will be either 0 or 1, depending on whether 'a'
131 * is zero or nonzero.
133 * From softfloat_shiftRightJam32()
136 uint32_t _mesa_shift_right_jam32(uint32_t a
, uint16_t dist
)
139 (dist
< 31) ? a
>> dist
| ((uint32_t) (a
<< (-dist
& 31)) != 0) : (a
!= 0);
143 * \brief Extracted from softfloat_roundPackToF64()
146 double _mesa_roundtozero_f64(int64_t s
, int64_t e
, int64_t m
)
150 if ((uint64_t) e
>= 0x7fd) {
152 m
= _mesa_shift_right_jam64(m
, -e
);
154 } else if ((e
> 0x7fd) || (0x8000000000000000 <= m
)) {
157 result
.u
= (s
<< 63) + (e
<< 52) + m
;
167 result
.u
= (s
<< 63) + (e
<< 52) + m
;
172 * \brief Extracted from softfloat_roundPackToF32()
175 float _mesa_round_f32(int32_t s
, int32_t e
, int32_t m
, bool rtz
)
178 uint8_t round_increment
= rtz
? 0 : 0x40;
180 if ((uint32_t) e
>= 0xfd) {
182 m
= _mesa_shift_right_jam32(m
, -e
);
184 } else if ((e
> 0xfd) || (0x80000000 <= m
+ round_increment
)) {
187 result
.u
= (s
<< 31) + (e
<< 23) + m
;
188 result
.u
-= !round_increment
;
194 round_bits
= m
& 0x7f;
195 m
= ((uint32_t) m
+ round_increment
) >> 7;
196 m
&= ~(uint32_t) (! (round_bits
^ 0x40) & !rtz
);
200 result
.u
= (s
<< 31) + (e
<< 23) + m
;
205 * \brief Extracted from softfloat_roundPackToF16()
208 uint16_t _mesa_roundtozero_f16(int16_t s
, int16_t e
, int16_t m
)
210 if ((uint16_t) e
>= 0x1d) {
212 m
= _mesa_shift_right_jam32(m
, -e
);
214 } else if ((e
> 0x1d) || (0x8000 <= m
)) {
217 return (s
<< 15) + (e
<< 10) + m
- 1;
225 return (s
<< 15) + (e
<< 10) + m
;
229 * \brief Shifts the N-bit unsigned integer pointed to by 'a' left by the number of
230 * bits given in 'dist', where N = 'size_words' * 32. The value of 'dist'
231 * must be in the range 1 to 31. Any nonzero bits shifted off are lost. The
232 * shifted N-bit result is stored at the location pointed to by 'm_out'. Each
233 * of 'a' and 'm_out' points to a 'size_words'-long array of 32-bit elements
234 * that concatenate in the platform's normal endian order to form an N-bit
237 * From softfloat_shortShiftLeftM()
240 _mesa_short_shift_left_m(uint8_t size_words
, const uint32_t *a
, uint8_t dist
, uint32_t *m_out
)
243 unsigned index
, last_index
;
244 uint32_t part_word
, a_word
;
247 index
= index_word_hi(size_words
);
248 last_index
= index_word_lo(size_words
);
249 part_word
= a
[index
] << dist
;
250 while (index
!= last_index
) {
251 a_word
= a
[index
- word_incr
];
252 m_out
[index
] = part_word
| a_word
>> (neg_dist
& 31);
254 part_word
= a_word
<< dist
;
256 m_out
[index
] = part_word
;
260 * \brief Shifts the N-bit unsigned integer pointed to by 'a' left by the number of
261 * bits given in 'dist', where N = 'size_words' * 32. The value of 'dist'
262 * must not be zero. Any nonzero bits shifted off are lost. The shifted
263 * N-bit result is stored at the location pointed to by 'm_out'. Each of 'a'
264 * and 'm_out' points to a 'size_words'-long array of 32-bit elements that
265 * concatenate in the platform's normal endian order to form an N-bit
266 * integer. The value of 'dist' can be arbitrarily large. In particular, if
267 * 'dist' is greater than N, the stored result will be 0.
269 * From softfloat_shiftLeftM()
272 _mesa_shift_left_m(uint8_t size_words
, const uint32_t *a
, uint32_t dist
, uint32_t *m_out
)
278 word_dist
= dist
>> 5;
279 if (word_dist
< size_words
) {
280 a
+= index_multiword_lo_but(size_words
, word_dist
);
281 inner_dist
= dist
& 31;
283 _mesa_short_shift_left_m(size_words
- word_dist
, a
, inner_dist
,
284 m_out
+ index_multiword_hi_but(size_words
, word_dist
));
288 uint32_t *dest
= m_out
+ index_word_hi(size_words
);
289 a
+= index_word_hi(size_words
- word_dist
);
290 for (i
= size_words
- word_dist
; i
; --i
) {
296 m_out
+= index_multiword_lo(size_words
, word_dist
);
298 word_dist
= size_words
;
307 * \brief Shifts the N-bit unsigned integer pointed to by 'a' right by the number of
308 * bits given in 'dist', where N = 'size_words' * 32. The value of 'dist'
309 * must be in the range 1 to 31. Any nonzero bits shifted off are lost. The
310 * shifted N-bit result is stored at the location pointed to by 'm_out'. Each
311 * of 'a' and 'm_out' points to a 'size_words'-long array of 32-bit elements
312 * that concatenate in the platform's normal endian order to form an N-bit
315 * From softfloat_shortShiftRightM()
318 _mesa_short_shift_right_m(uint8_t size_words
, const uint32_t *a
, uint8_t dist
, uint32_t *m_out
)
321 unsigned index
, last_index
;
322 uint32_t part_word
, a_word
;
325 index
= index_word_lo(size_words
);
326 last_index
= index_word_hi(size_words
);
327 part_word
= a
[index
] >> dist
;
328 while (index
!= last_index
) {
329 a_word
= a
[index
+ word_incr
];
330 m_out
[index
] = a_word
<< (neg_dist
& 31) | part_word
;
332 part_word
= a_word
>> dist
;
334 m_out
[index
] = part_word
;
338 * \brief Shifts the N-bit unsigned integer pointed to by 'a' right by the number of
339 * bits given in 'dist', where N = 'size_words' * 32. The value of 'dist'
340 * must be in the range 1 to 31. If any nonzero bits are shifted off, they
341 * are "jammed" into the least-significant bit of the shifted value by setting
342 * the least-significant bit to 1. This shifted-and-jammed N-bit result is
343 * stored at the location pointed to by 'm_out'. Each of 'a' and 'm_out'
344 * points to a 'size_words'-long array of 32-bit elements that concatenate in
345 * the platform's normal endian order to form an N-bit integer.
348 * From softfloat_shortShiftRightJamM()
351 _mesa_short_shift_right_jam_m(uint8_t size_words
, const uint32_t *a
, uint8_t dist
, uint32_t *m_out
)
354 unsigned index
, last_index
;
355 uint64_t part_word
, a_word
;
358 index
= index_word_lo(size_words
);
359 last_index
= index_word_hi(size_words
);
361 part_word
= a_word
>> dist
;
362 if (part_word
<< dist
!= a_word
)
364 while (index
!= last_index
) {
365 a_word
= a
[index
+ word_incr
];
366 m_out
[index
] = a_word
<< (neg_dist
& 31) | part_word
;
368 part_word
= a_word
>> dist
;
370 m_out
[index
] = part_word
;
374 * \brief Shifts the N-bit unsigned integer pointed to by 'a' right by the number of
375 * bits given in 'dist', where N = 'size_words' * 32. The value of 'dist'
376 * must not be zero. If any nonzero bits are shifted off, they are "jammed"
377 * into the least-significant bit of the shifted value by setting the
378 * least-significant bit to 1. This shifted-and-jammed N-bit result is stored
379 * at the location pointed to by 'm_out'. Each of 'a' and 'm_out' points to a
380 * 'size_words'-long array of 32-bit elements that concatenate in the
381 * platform's normal endian order to form an N-bit integer. The value of
382 * 'dist' can be arbitrarily large. In particular, if 'dist' is greater than
383 * N, the stored result will be either 0 or 1, depending on whether the
384 * original N bits are all zeros.
386 * From softfloat_shiftRightJamM()
389 _mesa_shift_right_jam_m(uint8_t size_words
, const uint32_t *a
, uint32_t dist
, uint32_t *m_out
)
391 uint32_t word_jam
, word_dist
, *tmp
;
392 uint8_t i
, inner_dist
;
395 word_dist
= dist
>> 5;
397 if (size_words
< word_dist
)
398 word_dist
= size_words
;
399 tmp
= (uint32_t *) (a
+ index_multiword_lo(size_words
, word_dist
));
409 if (word_dist
< size_words
) {
410 a
+= index_multiword_hi_but(size_words
, word_dist
);
411 inner_dist
= dist
& 31;
413 _mesa_short_shift_right_jam_m(size_words
- word_dist
, a
, inner_dist
,
414 m_out
+ index_multiword_lo_but(size_words
, word_dist
));
417 m_out
[index_word_lo(size_words
)] |= 1;
421 a
+= index_word_lo(size_words
- word_dist
);
422 tmp
= m_out
+ index_word_lo(size_words
);
423 for (i
= size_words
- word_dist
; i
; --i
) {
429 tmp
= m_out
+ index_multiword_hi(size_words
, word_dist
);
436 m_out
[index_word_lo(size_words
)] |= 1;
440 * \brief Calculate a + b but rounding to zero.
442 * Notice that this mainly differs from the original Berkeley SoftFloat 3e
443 * implementation in that we don't really treat NaNs, Zeroes nor the
444 * signalling flags. Any NaN is good for us and the sign of the Zero is not
450 _mesa_double_add_rtz(double a
, double b
)
452 const di_type a_di
= {a
};
453 uint64_t a_flt_m
= a_di
.u
& 0x0fffffffffffff;
454 uint64_t a_flt_e
= (a_di
.u
>> 52) & 0x7ff;
455 uint64_t a_flt_s
= (a_di
.u
>> 63) & 0x1;
456 const di_type b_di
= {b
};
457 uint64_t b_flt_m
= b_di
.u
& 0x0fffffffffffff;
458 uint64_t b_flt_e
= (b_di
.u
>> 52) & 0x7ff;
459 uint64_t b_flt_s
= (b_di
.u
>> 63) & 0x1;
464 const int64_t exp_diff
= a_flt_e
- b_flt_e
;
466 /* Handle special cases */
468 if (a_flt_s
!= b_flt_s
) {
469 return _mesa_double_sub_rtz(a
, -b
);
470 } else if ((a_flt_e
== 0) && (a_flt_m
== 0)) {
471 /* 'a' is zero, return 'b' */
473 } else if ((b_flt_e
== 0) && (b_flt_m
== 0)) {
474 /* 'b' is zero, return 'a' */
476 } else if (a_flt_e
== 0x7ff && a_flt_m
!= 0) {
477 /* 'a' is a NaN, return NaN */
479 } else if (b_flt_e
== 0x7ff && b_flt_m
!= 0) {
480 /* 'b' is a NaN, return NaN */
482 } else if (a_flt_e
== 0x7ff && a_flt_m
== 0) {
485 } else if (b_flt_e
== 0x7ff && b_flt_m
== 0) {
488 } else if (exp_diff
== 0 && a_flt_e
== 0) {
490 result_di
.u
= a_di
.u
+ b_flt_m
;
492 } else if (exp_diff
== 0) {
494 m
= 0x0020000000000000 + a_flt_m
+ b_flt_m
;
496 } else if (exp_diff
< 0) {
502 a_flt_m
+= 0x2000000000000000;
506 a_flt_m
= _mesa_shift_right_jam64(a_flt_m
, -exp_diff
);
507 m
= 0x2000000000000000 + a_flt_m
+ b_flt_m
;
508 if (m
< 0x4000000000000000) {
518 b_flt_m
+= 0x2000000000000000;
522 b_flt_m
= _mesa_shift_right_jam64(b_flt_m
, exp_diff
);
523 m
= 0x2000000000000000 + a_flt_m
+ b_flt_m
;
524 if (m
< 0x4000000000000000) {
530 return _mesa_roundtozero_f64(s
, e
, m
);
534 * \brief Returns the number of leading 0 bits before the most-significant 1 bit of
535 * 'a'. If 'a' is zero, 64 is returned.
537 static inline unsigned
538 _mesa_count_leading_zeros64(uint64_t a
)
540 return 64 - util_last_bit64(a
);
544 * \brief Returns the number of leading 0 bits before the most-significant 1 bit of
545 * 'a'. If 'a' is zero, 32 is returned.
547 static inline unsigned
548 _mesa_count_leading_zeros32(uint32_t a
)
550 return 32 - util_last_bit(a
);
554 _mesa_norm_round_pack_f64(int64_t s
, int64_t e
, int64_t m
)
558 shift_dist
= _mesa_count_leading_zeros64(m
) - 1;
560 if ((10 <= shift_dist
) && ((unsigned) e
< 0x7fd)) {
562 result
.u
= (s
<< 63) + ((m
? e
: 0) << 52) + (m
<< (shift_dist
- 10));
565 return _mesa_roundtozero_f64(s
, e
, m
<< shift_dist
);
570 * \brief Replaces the N-bit unsigned integer pointed to by 'm_out' by the
571 * 2s-complement of itself, where N = 'size_words' * 32. Argument 'm_out'
572 * points to a 'size_words'-long array of 32-bit elements that concatenate in
573 * the platform's normal endian order to form an N-bit integer.
575 * From softfloat_negXM()
578 _mesa_neg_x_m(uint8_t size_words
, uint32_t *m_out
)
580 unsigned index
, last_index
;
584 index
= index_word_lo(size_words
);
585 last_index
= index_word_hi(size_words
);
588 word
= ~m_out
[index
] + carry
;
590 if (index
== last_index
)
599 * \brief Adds the two N-bit integers pointed to by 'a' and 'b', where N =
600 * 'size_words' * 32. The addition is modulo 2^N, so any carry out is
601 * lost. The N-bit sum is stored at the location pointed to by 'm_out'. Each
602 * of 'a', 'b', and 'm_out' points to a 'size_words'-long array of 32-bit
603 * elements that concatenate in the platform's normal endian order to form an
606 * From softfloat_addM()
609 _mesa_add_m(uint8_t size_words
, const uint32_t *a
, const uint32_t *b
, uint32_t *m_out
)
611 unsigned index
, last_index
;
613 uint32_t a_word
, word
;
615 index
= index_word_lo(size_words
);
616 last_index
= index_word_hi(size_words
);
620 word
= a_word
+ b
[index
] + carry
;
622 if (index
== last_index
)
625 carry
= (word
< a_word
);
631 * \brief Subtracts the two N-bit integers pointed to by 'a' and 'b', where N =
632 * 'size_words' * 32. The subtraction is modulo 2^N, so any borrow out (carry
633 * out) is lost. The N-bit difference is stored at the location pointed to by
634 * 'm_out'. Each of 'a', 'b', and 'm_out' points to a 'size_words'-long array
635 * of 32-bit elements that concatenate in the platform's normal endian order
636 * to form an N-bit integer.
638 * From softfloat_subM()
641 _mesa_sub_m(uint8_t size_words
, const uint32_t *a
, const uint32_t *b
, uint32_t *m_out
)
643 unsigned index
, last_index
;
645 uint32_t a_word
, b_word
;
647 index
= index_word_lo(size_words
);
648 last_index
= index_word_hi(size_words
);
653 m_out
[index
] = a_word
- b_word
- borrow
;
654 if (index
== last_index
)
656 borrow
= borrow
? (a_word
<= b_word
) : (a_word
< b_word
);
661 /* Calculate a - b but rounding to zero.
663 * Notice that this mainly differs from the original Berkeley SoftFloat 3e
664 * implementation in that we don't really treat NaNs, Zeroes nor the
665 * signalling flags. Any NaN is good for us and the sign of the Zero is not
671 _mesa_double_sub_rtz(double a
, double b
)
673 const di_type a_di
= {a
};
674 uint64_t a_flt_m
= a_di
.u
& 0x0fffffffffffff;
675 uint64_t a_flt_e
= (a_di
.u
>> 52) & 0x7ff;
676 uint64_t a_flt_s
= (a_di
.u
>> 63) & 0x1;
677 const di_type b_di
= {b
};
678 uint64_t b_flt_m
= b_di
.u
& 0x0fffffffffffff;
679 uint64_t b_flt_e
= (b_di
.u
>> 52) & 0x7ff;
680 uint64_t b_flt_s
= (b_di
.u
>> 63) & 0x1;
683 unsigned shift_dist
= 0;
687 const int64_t exp_diff
= a_flt_e
- b_flt_e
;
689 /* Handle special cases */
691 if (a_flt_s
!= b_flt_s
) {
692 return _mesa_double_add_rtz(a
, -b
);
693 } else if ((a_flt_e
== 0) && (a_flt_m
== 0)) {
694 /* 'a' is zero, return '-b' */
696 } else if ((b_flt_e
== 0) && (b_flt_m
== 0)) {
697 /* 'b' is zero, return 'a' */
699 } else if (a_flt_e
== 0x7ff && a_flt_m
!= 0) {
700 /* 'a' is a NaN, return NaN */
702 } else if (b_flt_e
== 0x7ff && b_flt_m
!= 0) {
703 /* 'b' is a NaN, return NaN */
705 } else if (a_flt_e
== 0x7ff && a_flt_m
== 0) {
706 if (b_flt_e
== 0x7ff && b_flt_m
== 0) {
707 /* Inf - Inf = NaN */
710 result
.u
= (s
<< 63) + (e
<< 52) + 0x1;
715 } else if (b_flt_e
== 0x7ff && b_flt_m
== 0) {
718 } else if (exp_diff
== 0) {
719 m_diff
= a_flt_m
- b_flt_m
;
730 shift_dist
= _mesa_count_leading_zeros64(m_diff
) - 11;
731 e
= a_flt_e
- shift_dist
;
733 shift_dist
= a_flt_e
;
738 result
.u
= (s
<< 63) + (e
<< 52) + (m_diff
<< shift_dist
);
740 } else if (exp_diff
< 0) {
745 a_flt_m
+= (a_flt_e
) ? 0x4000000000000000 : a_flt_m
;
746 a_flt_m
= _mesa_shift_right_jam64(a_flt_m
, -exp_diff
);
747 b_flt_m
|= 0x4000000000000000;
749 m
= b_flt_m
- a_flt_m
;
754 b_flt_m
+= (b_flt_e
) ? 0x4000000000000000 : b_flt_m
;
755 b_flt_m
= _mesa_shift_right_jam64(b_flt_m
, exp_diff
);
756 a_flt_m
|= 0x4000000000000000;
758 m
= a_flt_m
- b_flt_m
;
761 return _mesa_norm_round_pack_f64(s
, e
- 1, m
);
765 _mesa_norm_subnormal_mantissa_f64(uint64_t m
, uint64_t *exp
, uint64_t *m_out
)
769 shift_dist
= _mesa_count_leading_zeros64(m
) - 11;
770 *exp
= 1 - shift_dist
;
771 *m_out
= m
<< shift_dist
;
775 _mesa_norm_subnormal_mantissa_f32(uint32_t m
, uint32_t *exp
, uint32_t *m_out
)
779 shift_dist
= _mesa_count_leading_zeros32(m
) - 8;
780 *exp
= 1 - shift_dist
;
781 *m_out
= m
<< shift_dist
;
785 * \brief Multiplies 'a' and 'b' and stores the 128-bit product at the location
786 * pointed to by 'zPtr'. Argument 'zPtr' points to an array of four 32-bit
787 * elements that concatenate in the platform's normal endian order to form a
790 * From softfloat_mul64To128M()
793 _mesa_softfloat_mul_f64_to_f128_m(uint64_t a
, uint64_t b
, uint32_t *m_out
)
795 uint32_t a32
, a0
, b32
, b0
;
796 uint64_t z0
, mid1
, z64
, mid
;
802 z0
= (uint64_t) a0
* b0
;
803 mid1
= (uint64_t) a32
* b0
;
804 mid
= mid1
+ (uint64_t) a0
* b32
;
805 z64
= (uint64_t) a32
* b32
;
806 z64
+= (uint64_t) (mid
< mid1
) << 32 | mid
>> 32;
809 m_out
[index_word(4, 1)] = z0
>> 32;
810 m_out
[index_word(4, 0)] = z0
;
812 m_out
[index_word(4, 3)] = z64
>> 32;
813 m_out
[index_word(4, 2)] = z64
;
816 /* Calculate a * b but rounding to zero.
818 * Notice that this mainly differs from the original Berkeley SoftFloat 3e
819 * implementation in that we don't really treat NaNs, Zeroes nor the
820 * signalling flags. Any NaN is good for us and the sign of the Zero is not
826 _mesa_double_mul_rtz(double a
, double b
)
828 const di_type a_di
= {a
};
829 uint64_t a_flt_m
= a_di
.u
& 0x0fffffffffffff;
830 uint64_t a_flt_e
= (a_di
.u
>> 52) & 0x7ff;
831 uint64_t a_flt_s
= (a_di
.u
>> 63) & 0x1;
832 const di_type b_di
= {b
};
833 uint64_t b_flt_m
= b_di
.u
& 0x0fffffffffffff;
834 uint64_t b_flt_e
= (b_di
.u
>> 52) & 0x7ff;
835 uint64_t b_flt_s
= (b_di
.u
>> 63) & 0x1;
838 s
= a_flt_s
^ b_flt_s
;
840 if (a_flt_e
== 0x7ff) {
842 /* 'a' is a NaN, return NaN */
844 } else if (b_flt_e
== 0x7ff && b_flt_m
!= 0) {
845 /* 'b' is a NaN, return NaN */
849 if (!(b_flt_e
| b_flt_m
)) {
853 result
.u
= (s
<< 63) + (e
<< 52) + 0x1;
859 result
.u
= (s
<< 63) + (e
<< 52) + 0;
863 if (b_flt_e
== 0x7ff) {
865 /* 'b' is a NaN, return NaN */
868 if (!(a_flt_e
| a_flt_m
)) {
872 result
.u
= (s
<< 63) + (e
<< 52) + 0x1;
878 result
.u
= (s
<< 63) + (e
<< 52) + 0;
884 /* 'a' is zero. Return zero */
886 result
.u
= (s
<< 63) + 0;
889 _mesa_norm_subnormal_mantissa_f64(a_flt_m
, &a_flt_e
, &a_flt_m
);
893 /* 'b' is zero. Return zero */
895 result
.u
= (s
<< 63) + 0;
898 _mesa_norm_subnormal_mantissa_f64(b_flt_m
, &b_flt_e
, &b_flt_m
);
901 e
= a_flt_e
+ b_flt_e
- 0x3ff;
902 a_flt_m
= (a_flt_m
| 0x0010000000000000) << 10;
903 b_flt_m
= (b_flt_m
| 0x0010000000000000) << 11;
906 _mesa_softfloat_mul_f64_to_f128_m(a_flt_m
, b_flt_m
, m_128
);
908 m
= (uint64_t) m_128
[index_word(4, 3)] << 32 | m_128
[index_word(4, 2)];
909 if (m_128
[index_word(4, 1)] || m_128
[index_word(4, 0)])
912 if (m
< 0x4000000000000000) {
917 return _mesa_roundtozero_f64(s
, e
, m
);
922 * \brief Calculate a * b + c but rounding to zero.
924 * Notice that this mainly differs from the original Berkeley SoftFloat 3e
925 * implementation in that we don't really treat NaNs, Zeroes nor the
926 * signalling flags. Any NaN is good for us and the sign of the Zero is not
932 _mesa_double_fma_rtz(double a
, double b
, double c
)
934 const di_type a_di
= {a
};
935 uint64_t a_flt_m
= a_di
.u
& 0x0fffffffffffff;
936 uint64_t a_flt_e
= (a_di
.u
>> 52) & 0x7ff;
937 uint64_t a_flt_s
= (a_di
.u
>> 63) & 0x1;
938 const di_type b_di
= {b
};
939 uint64_t b_flt_m
= b_di
.u
& 0x0fffffffffffff;
940 uint64_t b_flt_e
= (b_di
.u
>> 52) & 0x7ff;
941 uint64_t b_flt_s
= (b_di
.u
>> 63) & 0x1;
942 const di_type c_di
= {c
};
943 uint64_t c_flt_m
= c_di
.u
& 0x0fffffffffffff;
944 uint64_t c_flt_e
= (c_di
.u
>> 52) & 0x7ff;
945 uint64_t c_flt_s
= (c_di
.u
>> 63) & 0x1;
949 s
= a_flt_s
^ b_flt_s
^ 0;
951 if (a_flt_e
== 0x7ff) {
953 /* 'a' is a NaN, return NaN */
955 } else if (b_flt_e
== 0x7ff && b_flt_m
!= 0) {
956 /* 'b' is a NaN, return NaN */
958 } else if (c_flt_e
== 0x7ff && c_flt_m
!= 0) {
959 /* 'c' is a NaN, return NaN */
963 if (!(b_flt_e
| b_flt_m
)) {
964 /* Inf * 0 + y = NaN */
967 result
.u
= (s
<< 63) + (e
<< 52) + 0x1;
971 if ((c_flt_e
== 0x7ff && c_flt_m
== 0) && (s
!= c_flt_s
)) {
972 /* Inf * x - Inf = NaN */
975 result
.u
= (s
<< 63) + (e
<< 52) + 0x1;
979 /* Inf * x + y = Inf */
982 result
.u
= (s
<< 63) + (e
<< 52) + 0;
986 if (b_flt_e
== 0x7ff) {
988 /* 'b' is a NaN, return NaN */
990 } else if (c_flt_e
== 0x7ff && c_flt_m
!= 0) {
991 /* 'c' is a NaN, return NaN */
995 if (!(a_flt_e
| a_flt_m
)) {
996 /* 0 * Inf + y = NaN */
999 result
.u
= (s
<< 63) + (e
<< 52) + 0x1;
1003 if ((c_flt_e
== 0x7ff && c_flt_m
== 0) && (s
!= c_flt_s
)) {
1004 /* x * Inf - Inf = NaN */
1007 result
.u
= (s
<< 63) + (e
<< 52) + 0x1;
1011 /* x * Inf + y = Inf */
1014 result
.u
= (s
<< 63) + (e
<< 52) + 0;
1018 if (c_flt_e
== 0x7ff) {
1020 /* 'c' is a NaN, return NaN */
1024 /* x * y + Inf = Inf */
1030 /* 'a' is zero, return 'c' */
1033 _mesa_norm_subnormal_mantissa_f64(a_flt_m
, &a_flt_e
, &a_flt_m
);
1038 /* 'b' is zero, return 'c' */
1041 _mesa_norm_subnormal_mantissa_f64(b_flt_m
, &b_flt_e
, &b_flt_m
);
1044 e
= a_flt_e
+ b_flt_e
- 0x3fe;
1045 a_flt_m
= (a_flt_m
| 0x0010000000000000) << 10;
1046 b_flt_m
= (b_flt_m
| 0x0010000000000000) << 11;
1049 _mesa_softfloat_mul_f64_to_f128_m(a_flt_m
, b_flt_m
, m_128
);
1051 m
= (uint64_t) m_128
[index_word(4, 3)] << 32 | m_128
[index_word(4, 2)];
1053 int64_t shift_dist
= 0;
1054 if (!(m
& 0x4000000000000000)) {
1061 /* 'c' is zero, return 'a * b' */
1065 if (m_128
[index_word(4, 1)] || m_128
[index_word(4, 0)])
1067 return _mesa_roundtozero_f64(s
, e
- 1, m
);
1069 _mesa_norm_subnormal_mantissa_f64(c_flt_m
, &c_flt_e
, &c_flt_m
);
1071 c_flt_m
= (c_flt_m
| 0x0010000000000000) << 10;
1073 uint32_t c_flt_m_128
[4];
1074 int64_t exp_diff
= e
- c_flt_e
;
1077 if ((s
== c_flt_s
) || (exp_diff
< -1)) {
1078 shift_dist
-= exp_diff
;
1080 m
= _mesa_shift_right_jam64(m
, shift_dist
);
1084 _mesa_short_shift_right_m(4, m_128
, 1, m_128
);
1089 _mesa_add_m(4, m_128
, m_128
, m_128
);
1091 m
= (uint64_t) m_128
[index_word(4, 3)] << 32
1092 | m_128
[index_word(4, 2)];
1094 c_flt_m_128
[index_word(4, 3)] = c_flt_m
>> 32;
1095 c_flt_m_128
[index_word(4, 2)] = c_flt_m
;
1096 c_flt_m_128
[index_word(4, 1)] = 0;
1097 c_flt_m_128
[index_word(4, 0)] = 0;
1098 _mesa_shift_right_jam_m(4, c_flt_m_128
, exp_diff
, c_flt_m_128
);
1103 if (exp_diff
<= 0) {
1106 _mesa_add_m(4, m_128
, c_flt_m_128
, m_128
);
1107 m
= (uint64_t) m_128
[index_word(4, 3)] << 32
1108 | m_128
[index_word(4, 2)];
1110 if (m
& 0x8000000000000000) {
1112 m
= _mesa_short_shift_right_jam64(m
, 1);
1117 if (exp_diff
< -1) {
1119 if (m_128
[index_word(4, 1)] || m_128
[index_word(4, 0)]) {
1122 if (!(m
& 0x4000000000000000)) {
1126 return _mesa_roundtozero_f64(s
, e
- 1, m
);
1128 c_flt_m_128
[index_word(4, 3)] = c_flt_m
>> 32;
1129 c_flt_m_128
[index_word(4, 2)] = c_flt_m
;
1130 c_flt_m_128
[index_word(4, 1)] = 0;
1131 c_flt_m_128
[index_word(4, 0)] = 0;
1132 _mesa_sub_m(4, c_flt_m_128
, m_128
, m_128
);
1134 } else if (!exp_diff
) {
1136 if (!m
&& !m_128
[index_word(4, 1)] && !m_128
[index_word(4, 0)]) {
1139 result
.u
= (s
<< 63) + 0;
1142 m_128
[index_word(4, 3)] = m
>> 32;
1143 m_128
[index_word(4, 2)] = m
;
1144 if (m
& 0x8000000000000000) {
1146 _mesa_neg_x_m(4, m_128
);
1149 _mesa_sub_m(4, m_128
, c_flt_m_128
, m_128
);
1151 m
= (uint64_t) m_128
[index_word(4, 3)] << 32
1152 | m_128
[index_word(4, 2)];
1153 if (!(m
& 0x4000000000000000)) {
1157 if (m_128
[index_word(4, 1)] || m_128
[index_word(4, 0)])
1159 return _mesa_roundtozero_f64(s
, e
- 1, m
);
1164 m
= (uint64_t) m_128
[index_word(4, 3)] << 32
1165 | m_128
[index_word(4, 2)];
1168 m
= (uint64_t) m_128
[index_word(4, 1)] << 32
1169 | m_128
[index_word(4, 0)];
1171 shift_dist
+= _mesa_count_leading_zeros64(m
) - 1;
1174 _mesa_shift_left_m(4, m_128
, shift_dist
, m_128
);
1175 m
= (uint64_t) m_128
[index_word(4, 3)] << 32
1176 | m_128
[index_word(4, 2)];
1180 if (m_128
[index_word(4, 1)] || m_128
[index_word(4, 0)])
1182 return _mesa_roundtozero_f64(s
, e
- 1, m
);
1187 * \brief Calculate a * b + c but rounding to zero.
1189 * Notice that this mainly differs from the original Berkeley SoftFloat 3e
1190 * implementation in that we don't really treat NaNs, Zeroes nor the
1191 * signalling flags. Any NaN is good for us and the sign of the Zero is not
1197 _mesa_float_fma_rtz(float a
, float b
, float c
)
1199 const fi_type a_fi
= {a
};
1200 uint32_t a_flt_m
= a_fi
.u
& 0x07fffff;
1201 uint32_t a_flt_e
= (a_fi
.u
>> 23) & 0xff;
1202 uint32_t a_flt_s
= (a_fi
.u
>> 31) & 0x1;
1203 const fi_type b_fi
= {b
};
1204 uint32_t b_flt_m
= b_fi
.u
& 0x07fffff;
1205 uint32_t b_flt_e
= (b_fi
.u
>> 23) & 0xff;
1206 uint32_t b_flt_s
= (b_fi
.u
>> 31) & 0x1;
1207 const fi_type c_fi
= {c
};
1208 uint32_t c_flt_m
= c_fi
.u
& 0x07fffff;
1209 uint32_t c_flt_e
= (c_fi
.u
>> 23) & 0xff;
1210 uint32_t c_flt_s
= (c_fi
.u
>> 31) & 0x1;
1211 int32_t s
, e
, m
= 0;
1214 s
= a_flt_s
^ b_flt_s
^ 0;
1216 if (a_flt_e
== 0xff) {
1218 /* 'a' is a NaN, return NaN */
1220 } else if (b_flt_e
== 0xff && b_flt_m
!= 0) {
1221 /* 'b' is a NaN, return NaN */
1223 } else if (c_flt_e
== 0xff && c_flt_m
!= 0) {
1224 /* 'c' is a NaN, return NaN */
1228 if (!(b_flt_e
| b_flt_m
)) {
1229 /* Inf * 0 + y = NaN */
1232 result
.u
= (s
<< 31) + (e
<< 23) + 0x1;
1236 if ((c_flt_e
== 0xff && c_flt_m
== 0) && (s
!= c_flt_s
)) {
1237 /* Inf * x - Inf = NaN */
1240 result
.u
= (s
<< 31) + (e
<< 23) + 0x1;
1244 /* Inf * x + y = Inf */
1247 result
.u
= (s
<< 31) + (e
<< 23) + 0;
1251 if (b_flt_e
== 0xff) {
1253 /* 'b' is a NaN, return NaN */
1255 } else if (c_flt_e
== 0xff && c_flt_m
!= 0) {
1256 /* 'c' is a NaN, return NaN */
1260 if (!(a_flt_e
| a_flt_m
)) {
1261 /* 0 * Inf + y = NaN */
1264 result
.u
= (s
<< 31) + (e
<< 23) + 0x1;
1268 if ((c_flt_e
== 0xff && c_flt_m
== 0) && (s
!= c_flt_s
)) {
1269 /* x * Inf - Inf = NaN */
1272 result
.u
= (s
<< 31) + (e
<< 23) + 0x1;
1276 /* x * Inf + y = Inf */
1279 result
.u
= (s
<< 31) + (e
<< 23) + 0;
1283 if (c_flt_e
== 0xff) {
1285 /* 'c' is a NaN, return NaN */
1289 /* x * y + Inf = Inf */
1295 /* 'a' is zero, return 'c' */
1298 _mesa_norm_subnormal_mantissa_f32(a_flt_m
, &a_flt_e
, &a_flt_m
);
1303 /* 'b' is zero, return 'c' */
1306 _mesa_norm_subnormal_mantissa_f32(b_flt_m
, &b_flt_e
, &b_flt_m
);
1309 e
= a_flt_e
+ b_flt_e
- 0x7e;
1310 a_flt_m
= (a_flt_m
| 0x00800000) << 7;
1311 b_flt_m
= (b_flt_m
| 0x00800000) << 7;
1313 uint64_t m_64
= (uint64_t) a_flt_m
* b_flt_m
;
1314 if (m_64
< 0x2000000000000000) {
1321 /* 'c' is zero, return 'a * b' */
1322 m
= _mesa_short_shift_right_jam64(m_64
, 31);
1323 return _mesa_round_f32(s
, e
- 1, m
, true);
1325 _mesa_norm_subnormal_mantissa_f32(c_flt_m
, &c_flt_e
, &c_flt_m
);
1327 c_flt_m
= (c_flt_m
| 0x00800000) << 6;
1329 int16_t exp_diff
= e
- c_flt_e
;
1331 if (exp_diff
<= 0) {
1333 m
= c_flt_m
+ _mesa_shift_right_jam64(m_64
, 32 - exp_diff
);
1335 m_64
+= _mesa_shift_right_jam64((uint64_t) c_flt_m
<< 32, exp_diff
);
1336 m
= _mesa_short_shift_right_jam64(m_64
, 32);
1338 if (m
< 0x40000000) {
1343 uint64_t c_flt_m_64
= (uint64_t) c_flt_m
<< 32;
1347 m_64
= c_flt_m_64
- _mesa_shift_right_jam64(m_64
, -exp_diff
);
1348 } else if (!exp_diff
) {
1353 result
.u
= (s
<< 31) + 0;
1356 if (m_64
& 0x8000000000000000) {
1361 m_64
-= _mesa_shift_right_jam64(c_flt_m_64
, exp_diff
);
1363 int8_t shift_dist
= _mesa_count_leading_zeros64(m_64
) - 1;
1366 if (shift_dist
< 0) {
1367 m
= _mesa_short_shift_right_jam64(m_64
, -shift_dist
);
1369 m
= (uint32_t) m_64
<< shift_dist
;
1373 return _mesa_round_f32(s
, e
, m
, true);
1378 * \brief Converts from 64bits to 32bits float and rounds according to
1384 _mesa_double_to_f32(double val
, bool rtz
)
1386 const di_type di
= {val
};
1387 uint64_t flt_m
= di
.u
& 0x0fffffffffffff;
1388 uint64_t flt_e
= (di
.u
>> 52) & 0x7ff;
1389 uint64_t flt_s
= (di
.u
>> 63) & 0x1;
1390 int32_t s
, e
, m
= 0;
1394 if (flt_e
== 0x7ff) {
1396 /* 'val' is a NaN, return NaN */
1400 result
.u
= (s
<< 31) + (e
<< 23) + m
;
1404 /* 'val' is Inf, return Inf */
1407 result
.u
= (s
<< 31) + (e
<< 23) + m
;
1411 if (!(flt_e
| flt_m
)) {
1412 /* 'val' is zero, return zero */
1415 result
.u
= (s
<< 31) + (e
<< 23) + m
;
1419 m
= _mesa_short_shift_right_jam64(flt_m
, 22);
1420 if ( ! (flt_e
| m
) ) {
1421 /* 'val' is denorm, return zero */
1424 result
.u
= (s
<< 31) + (e
<< 23) + m
;
1428 return _mesa_round_f32(s
, flt_e
- 0x381, m
| 0x40000000, rtz
);
1433 * \brief Converts from 32bits to 16bits float and rounds the result to zero.
1438 _mesa_float_to_half_rtz(float val
)
1440 const fi_type fi
= {val
};
1441 const uint32_t flt_m
= fi
.u
& 0x7fffff;
1442 const uint32_t flt_e
= (fi
.u
>> 23) & 0xff;
1443 const uint32_t flt_s
= (fi
.u
>> 31) & 0x1;
1444 int16_t s
, e
, m
= 0;
1448 if (flt_e
== 0xff) {
1450 /* 'val' is a NaN, return NaN */
1453 return (s
<< 15) + (e
<< 10) + m
;
1456 /* 'val' is Inf, return Inf */
1458 return (s
<< 15) + (e
<< 10) + m
;
1461 if (!(flt_e
| flt_m
)) {
1462 /* 'val' is zero, return zero */
1464 return (s
<< 15) + (e
<< 10) + m
;
1467 m
= flt_m
>> 9 | ((flt_m
& 0x1ff) != 0);
1468 if ( ! (flt_e
| m
) ) {
1469 /* 'val' is denorm, return zero */
1471 return (s
<< 15) + (e
<< 10) + m
;
1474 return _mesa_roundtozero_f16(s
, flt_e
- 0x71, m
| 0x4000);