1 /* Copyright (C) 2007 Free Software Foundation, Inc.
3 This file is part of GCC.
5 GCC is free software; you can redistribute it and/or modify it under
6 the terms of the GNU General Public License as published by the Free
7 Software Foundation; either version 2, or (at your option) any later
10 In addition to the permissions in the GNU General Public License, the
11 Free Software Foundation gives you unlimited permission to link the
12 compiled version of this file into combinations with other programs,
13 and to distribute those combinations without any restriction coming
14 from the use of this file. (The General Public License restrictions
15 do apply in other respects; for example, they cover modification of
16 the file, and distribution when not linked into a combine
19 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
20 WARRANTY; without even the implied warranty of MERCHANTABILITY or
21 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
24 You should have received a copy of the GNU General Public License
25 along with GCC; see the file COPYING. If not, write to the Free
26 Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
33 #include "bid_functions.h"
36 #define __BID_INLINE__ static __inline
38 /*********************************************************************
40 * Logical Shift Macros
42 *********************************************************************/
44 #define __shr_128(Q, A, k) \
46 (Q).w[0] = (A).w[0] >> k; \
47 (Q).w[0] |= (A).w[1] << (64-k); \
48 (Q).w[1] = (A).w[1] >> k; \
51 #define __shr_128_long(Q, A, k) \
54 (Q).w[0] = (A).w[0] >> k; \
55 (Q).w[0] |= (A).w[1] << (64-k); \
56 (Q).w[1] = (A).w[1] >> k; \
59 (Q).w[0] = (A).w[1]>>((k)-64); \
64 #define __shl_128_long(Q, A, k) \
67 (Q).w[1] = (A).w[1] << k; \
68 (Q).w[1] |= (A).w[0] >> (64-k); \
69 (Q).w[0] = (A).w[0] << k; \
72 (Q).w[1] = (A).w[0]<<((k)-64); \
77 #define __low_64(Q) (Q).w[0]
78 /*********************************************************************
82 *********************************************************************/
83 #define tolower_macro(x) (((unsigned char)((x)-'A')<=('Z'-'A'))?((x)-'A'+'a'):(x))
84 /*********************************************************************
88 *********************************************************************/
92 #define __unsigned_compare_gt_128(A, B) \
93 ((A.w[1]>B.w[1]) || ((A.w[1]==B.w[1]) && (A.w[0]>B.w[0])))
95 #define __unsigned_compare_ge_128(A, B) \
96 ((A.w[1]>B.w[1]) || ((A.w[1]==B.w[1]) && (A.w[0]>=B.w[0])))
97 #define __test_equal_128(A, B) (((A).w[1]==(B).w[1]) && ((A).w[0]==(B).w[0]))
98 // tighten exponent range
99 #define __tight_bin_range_128(bp, P, bin_expon) \
103 (bp) = (bin_expon); \
106 if((P).w[0] >= M) (bp)++; } \
109 if(((P).w[1]>M) ||((P).w[1]==M && (P).w[0]))\
111 else if((P).w[1]) (bp)++; \
113 /*********************************************************************
115 * Add/Subtract Macros
117 *********************************************************************/
118 // add 64-bit value to 128-bit
119 #define __add_128_64(R128, A128, B64) \
122 R64H = (A128).w[1]; \
123 (R128).w[0] = (B64) + (A128).w[0]; \
124 if((R128).w[0] < (B64)) \
126 (R128).w[1] = R64H; \
128 // subtract 64-bit value from 128-bit
129 #define __sub_128_64(R128, A128, B64) \
132 R64H = (A128).w[1]; \
133 if((A128).w[0] < (B64)) \
135 (R128).w[1] = R64H; \
136 (R128).w[0] = (A128).w[0] - (B64); \
138 // add 128-bit value to 128-bit
139 // assume no carry-out
140 #define __add_128_128(R128, A128, B128) \
143 Q128.w[1] = (A128).w[1]+(B128).w[1]; \
144 Q128.w[0] = (B128).w[0] + (A128).w[0]; \
145 if(Q128.w[0] < (B128).w[0]) \
147 (R128).w[1] = Q128.w[1]; \
148 (R128).w[0] = Q128.w[0]; \
150 #define __sub_128_128(R128, A128, B128) \
153 Q128.w[1] = (A128).w[1]-(B128).w[1]; \
154 Q128.w[0] = (A128).w[0] - (B128).w[0]; \
155 if((A128).w[0] < (B128).w[0]) \
157 (R128).w[1] = Q128.w[1]; \
158 (R128).w[0] = Q128.w[0]; \
160 #define __add_carry_out(S, CY, X, Y) \
164 CY = (S<X1) ? 1 : 0; \
166 #define __add_carry_in_out(S, CY, X, Y, CI) \
171 CY = ((S<X1) || (X1<CI)) ? 1 : 0; \
173 #define __sub_borrow_out(S, CY, X, Y) \
177 CY = (S>X1) ? 1 : 0; \
179 #define __sub_borrow_in_out(S, CY, X, Y, CI) \
184 CY = ((S>X1) || (X1>X0)) ? 1 : 0; \
186 // increment C128 and check for rounding overflow:
187 // if (C_128) = 10^34 then (C_128) = 10^33 and increment the exponent
188 #define INCREMENT(C_128, exp) \
191 if (C_128.w[0] == 0) C_128.w[1]++; \
192 if (C_128.w[1] == 0x0001ed09bead87c0ull && \
193 C_128.w[0] == 0x378d8e6400000000ull) { \
195 C_128.w[1] = 0x0000314dc6448d93ull; \
196 C_128.w[0] = 0x38c15b0a00000000ull; \
199 // decrement C128 and check for rounding underflow, but only at the
200 // boundary: if C_128 = 10^33 - 1 and exp > 0 then C_128 = 10^34 - 1
201 // and decrement the exponent
202 #define DECREMENT(C_128, exp) \
205 if (C_128.w[0] == 0xffffffffffffffffull) C_128.w[1]--; \
206 if (C_128.w[1] == 0x0000314dc6448d93ull && \
207 C_128.w[0] == 0x38c15b09ffffffffull && exp > 0) { \
209 C_128.w[1] = 0x0001ed09bead87c0ull; \
210 C_128.w[0] = 0x378d8e63ffffffffull; \
214 /*********************************************************************
218 *********************************************************************/
219 #define __mul_64x64_to_64(P64, CX, CY) (P64) = (CX) * (CY)
220 /***************************************
221 * Signed, Full 64x64-bit Multiply
222 ***************************************/
223 #define __imul_64x64_to_128(P, CX, CY) \
226 __mul_64x64_to_128(P, CX, CY); \
228 SX = ((SINT64)(CX))>>63; \
229 SY = ((SINT64)(CY))>>63; \
230 SX &= CY; SY &= CX; \
232 (P).w[1] = (P).w[1] - SX - SY; \
234 /***************************************
235 * Signed, Full 64x128-bit Multiply
236 ***************************************/
237 #define __imul_64x128_full(Ph, Ql, A, B) \
239 UINT128 ALBL, ALBH, QM2, QM; \
241 __imul_64x64_to_128(ALBH, (A), (B).w[1]); \
242 __imul_64x64_to_128(ALBL, (A), (B).w[0]); \
244 (Ql).w[0] = ALBL.w[0]; \
245 QM.w[0] = ALBL.w[1]; \
246 QM.w[1] = ((SINT64)ALBL.w[1])>>63; \
247 __add_128_128(QM2, ALBH, QM); \
248 (Ql).w[1] = QM2.w[0]; \
251 /*****************************************************
252 * Unsigned Multiply Macros
253 *****************************************************/
254 // get full 64x64bit product
256 #define __mul_64x64_to_128(P, CX, CY) \
258 UINT64 CXH, CXL, CYH,CYL,PL,PH,PM,PM2;\
260 CXL = (UINT32)(CX); \
262 CYL = (UINT32)(CY); \
269 PM = (UINT64)((UINT32)PM)+PM2+(PL>>32); \
271 (P).w[1] = PH + (PM>>32); \
272 (P).w[0] = (PM<<32)+(UINT32)PL; \
274 // get full 64x64bit product
276 // This macro is used for CX < 2^61, CY < 2^61
278 #define __mul_64x64_to_128_fast(P, CX, CY) \
280 UINT64 CXH, CXL, CYH, CYL, PL, PH, PM; \
282 CXL = (UINT32)(CX); \
284 CYL = (UINT32)(CY); \
292 (P).w[1] = PH + (PM>>32); \
293 (P).w[0] = (PM<<32)+(UINT32)PL; \
296 #define __sqr64_fast(P, CX) \
298 UINT64 CXH, CXL, PL, PH, PM; \
300 CXL = (UINT32)(CX); \
308 (P).w[1] = PH + (PM>>32); \
309 (P).w[0] = (PM<<32)+(UINT32)PL; \
311 // get full 64x64bit product
313 // This implementation is used for CX < 2^61, CY < 2^61
315 #define __mul_64x64_to_64_high_fast(P, CX, CY) \
317 UINT64 CXH, CXL, CYH, CYL, PL, PH, PM; \
319 CXL = (UINT32)(CX); \
321 CYL = (UINT32)(CY); \
329 (P) = PH + (PM>>32); \
331 // get full 64x64bit product
333 #define __mul_64x64_to_128_full(P, CX, CY) \
335 UINT64 CXH, CXL, CYH,CYL,PL,PH,PM,PM2;\
337 CXL = (UINT32)(CX); \
339 CYL = (UINT32)(CY); \
346 PM = (UINT64)((UINT32)PM)+PM2+(PL>>32); \
348 (P).w[1] = PH + (PM>>32); \
349 (P).w[0] = (PM<<32)+(UINT32)PL; \
351 #define __mul_128x128_high(Q, A, B) \
353 UINT128 ALBL, ALBH, AHBL, AHBH, QM, QM2; \
355 __mul_64x64_to_128(ALBH, (A).w[0], (B).w[1]); \
356 __mul_64x64_to_128(AHBL, (B).w[0], (A).w[1]); \
357 __mul_64x64_to_128(ALBL, (A).w[0], (B).w[0]); \
358 __mul_64x64_to_128(AHBH, (A).w[1],(B).w[1]); \
360 __add_128_128(QM, ALBH, AHBL); \
361 __add_128_64(QM2, QM, ALBL.w[1]); \
362 __add_128_64((Q), AHBH, QM2.w[1]); \
364 #define __mul_128x128_full(Qh, Ql, A, B) \
366 UINT128 ALBL, ALBH, AHBL, AHBH, QM, QM2; \
368 __mul_64x64_to_128(ALBH, (A).w[0], (B).w[1]); \
369 __mul_64x64_to_128(AHBL, (B).w[0], (A).w[1]); \
370 __mul_64x64_to_128(ALBL, (A).w[0], (B).w[0]); \
371 __mul_64x64_to_128(AHBH, (A).w[1],(B).w[1]); \
373 __add_128_128(QM, ALBH, AHBL); \
374 (Ql).w[0] = ALBL.w[0]; \
375 __add_128_64(QM2, QM, ALBL.w[1]); \
376 __add_128_64((Qh), AHBH, QM2.w[1]); \
377 (Ql).w[1] = QM2.w[0]; \
379 #define __mul_128x128_low(Ql, A, B) \
384 __mul_64x64_to_128(ALBL, (A).w[0], (B).w[0]); \
385 QM64 = (B).w[0]*(A).w[1] + (A).w[0]*(B).w[1]; \
387 (Ql).w[0] = ALBL.w[0]; \
388 (Ql).w[1] = QM64 + ALBL.w[1]; \
390 #define __mul_64x128_low(Ql, A, B) \
392 UINT128 ALBL, ALBH, QM2; \
393 __mul_64x64_to_128(ALBH, (A), (B).w[1]); \
394 __mul_64x64_to_128(ALBL, (A), (B).w[0]); \
395 (Ql).w[0] = ALBL.w[0]; \
396 __add_128_64(QM2, ALBH, ALBL.w[1]); \
397 (Ql).w[1] = QM2.w[0]; \
399 #define __mul_64x128_full(Ph, Ql, A, B) \
401 UINT128 ALBL, ALBH, QM2; \
403 __mul_64x64_to_128(ALBH, (A), (B).w[1]); \
404 __mul_64x64_to_128(ALBL, (A), (B).w[0]); \
406 (Ql).w[0] = ALBL.w[0]; \
407 __add_128_64(QM2, ALBH, ALBL.w[1]); \
408 (Ql).w[1] = QM2.w[0]; \
411 #define __mul_64x128_to_192(Q, A, B) \
413 UINT128 ALBL, ALBH, QM2; \
415 __mul_64x64_to_128(ALBH, (A), (B).w[1]); \
416 __mul_64x64_to_128(ALBL, (A), (B).w[0]); \
418 (Q).w[0] = ALBL.w[0]; \
419 __add_128_64(QM2, ALBH, ALBL.w[1]); \
420 (Q).w[1] = QM2.w[0]; \
421 (Q).w[2] = QM2.w[1]; \
423 #define __mul_64x128_to192(Q, A, B) \
425 UINT128 ALBL, ALBH, QM2; \
427 __mul_64x64_to_128(ALBH, (A), (B).w[1]); \
428 __mul_64x64_to_128(ALBL, (A), (B).w[0]); \
430 (Q).w[0] = ALBL.w[0]; \
431 __add_128_64(QM2, ALBH, ALBL.w[1]); \
432 (Q).w[1] = QM2.w[0]; \
433 (Q).w[2] = QM2.w[1]; \
435 #define __mul_128x128_to_256(P256, A, B) \
438 UINT64 Phl, Phh, CY1, CY2; \
440 __mul_64x128_full(Phl, Qll, A.w[0], B); \
441 __mul_64x128_full(Phh, Qlh, A.w[1], B); \
442 (P256).w[0] = Qll.w[0]; \
443 __add_carry_out((P256).w[1],CY1, Qlh.w[0], Qll.w[1]); \
444 __add_carry_in_out((P256).w[2],CY2, Qlh.w[1], Phl, CY1); \
445 (P256).w[3] = Phh + CY2; \
448 // For better performance, will check A.w[1] against 0,
450 // Use this macro accordingly
451 #define __mul_128x128_to_256_check_A(P256, A, B) \
454 UINT64 Phl, Phh, CY1, CY2; \
456 __mul_64x128_full(Phl, Qll, A.w[0], B); \
457 (P256).w[0] = Qll.w[0]; \
459 __mul_64x128_full(Phh, Qlh, A.w[1], B); \
460 __add_carry_out((P256).w[1],CY1, Qlh.w[0], Qll.w[1]); \
461 __add_carry_in_out((P256).w[2],CY2, Qlh.w[1], Phl, CY1); \
462 (P256).w[3] = Phh + CY2; } \
464 (P256).w[1] = Qll.w[1]; \
468 #define __mul_64x192_to_256(lP, lA, lB) \
470 UINT128 lP0,lP1,lP2; \
472 __mul_64x64_to_128(lP0, lA, (lB).w[0]); \
473 __mul_64x64_to_128(lP1, lA, (lB).w[1]); \
474 __mul_64x64_to_128(lP2, lA, (lB).w[2]); \
475 (lP).w[0] = lP0.w[0]; \
476 __add_carry_out((lP).w[1],lC,lP1.w[0],lP0.w[1]); \
477 __add_carry_in_out((lP).w[2],lC,lP2.w[0],lP1.w[1],lC); \
478 (lP).w[3] = lP2.w[1] + lC; \
480 #define __mul_64x256_to_320(P, A, B) \
482 UINT128 lP0,lP1,lP2,lP3; \
484 __mul_64x64_to_128(lP0, A, (B).w[0]); \
485 __mul_64x64_to_128(lP1, A, (B).w[1]); \
486 __mul_64x64_to_128(lP2, A, (B).w[2]); \
487 __mul_64x64_to_128(lP3, A, (B).w[3]); \
488 (P).w[0] = lP0.w[0]; \
489 __add_carry_out((P).w[1],lC,lP1.w[0],lP0.w[1]); \
490 __add_carry_in_out((P).w[2],lC,lP2.w[0],lP1.w[1],lC); \
491 __add_carry_in_out((P).w[3],lC,lP3.w[0],lP2.w[1],lC); \
492 (P).w[4] = lP3.w[1] + lC; \
494 #define __mul_192x192_to_384(P, A, B) \
498 __mul_64x192_to_256(P0, (A).w[0], B); \
499 __mul_64x192_to_256(P1, (A).w[1], B); \
500 __mul_64x192_to_256(P2, (A).w[2], B); \
501 (P).w[0] = P0.w[0]; \
502 __add_carry_out((P).w[1],CY,P1.w[0],P0.w[1]); \
503 __add_carry_in_out((P).w[2],CY,P1.w[1],P0.w[2],CY); \
504 __add_carry_in_out((P).w[3],CY,P1.w[2],P0.w[3],CY); \
505 (P).w[4] = P1.w[3] + CY; \
506 __add_carry_out((P).w[2],CY,P2.w[0],(P).w[2]); \
507 __add_carry_in_out((P).w[3],CY,P2.w[1],(P).w[3],CY); \
508 __add_carry_in_out((P).w[4],CY,P2.w[2],(P).w[4],CY); \
509 (P).w[5] = P2.w[3] + CY; \
511 #define __mul_64x320_to_384(P, A, B) \
513 UINT128 lP0,lP1,lP2,lP3,lP4; \
515 __mul_64x64_to_128(lP0, A, (B).w[0]); \
516 __mul_64x64_to_128(lP1, A, (B).w[1]); \
517 __mul_64x64_to_128(lP2, A, (B).w[2]); \
518 __mul_64x64_to_128(lP3, A, (B).w[3]); \
519 __mul_64x64_to_128(lP4, A, (B).w[4]); \
520 (P).w[0] = lP0.w[0]; \
521 __add_carry_out((P).w[1],lC,lP1.w[0],lP0.w[1]); \
522 __add_carry_in_out((P).w[2],lC,lP2.w[0],lP1.w[1],lC); \
523 __add_carry_in_out((P).w[3],lC,lP3.w[0],lP2.w[1],lC); \
524 __add_carry_in_out((P).w[4],lC,lP4.w[0],lP3.w[1],lC); \
525 (P).w[5] = lP4.w[1] + lC; \
528 // Full 128x128-bit product
529 #define __sqr128_to_256(P256, A) \
531 UINT128 Qll, Qlh, Qhh; \
532 UINT64 TMP_C1, TMP_C2; \
534 __mul_64x64_to_128(Qhh, A.w[1], A.w[1]); \
535 __mul_64x64_to_128(Qlh, A.w[0], A.w[1]); \
536 Qhh.w[1] += (Qlh.w[1]>>63); \
537 Qlh.w[1] = (Qlh.w[1]+Qlh.w[1])|(Qlh.w[0]>>63); \
538 Qlh.w[0] += Qlh.w[0]; \
539 __mul_64x64_to_128(Qll, A.w[0], A.w[0]); \
541 __add_carry_out((P256).w[1],TMP_C1, Qlh.w[0], Qll.w[1]); \
542 (P256).w[0] = Qll.w[0]; \
543 __add_carry_in_out((P256).w[2],TMP_C2, Qlh.w[1], Qhh.w[0], TMP_C1); \
544 (P256).w[3] = Qhh.w[1]+TMP_C2; \
546 #define __mul_128x128_to_256_low_high(PQh, PQl, A, B) \
549 UINT64 Phl, Phh, C1, C2; \
551 __mul_64x128_full(Phl, Qll, A.w[0], B); \
552 __mul_64x128_full(Phh, Qlh, A.w[1], B); \
553 (PQl).w[0] = Qll.w[0]; \
554 __add_carry_out((PQl).w[1],C1, Qlh.w[0], Qll.w[1]); \
555 __add_carry_in_out((PQh).w[0],C2, Qlh.w[1], Phl, C1); \
556 (PQh).w[1] = Phh + C2; \
558 #define __mul_256x256_to_512(P, A, B) \
560 UINT512 P0,P1,P2,P3; \
562 __mul_64x256_to_320(P0, (A).w[0], B); \
563 __mul_64x256_to_320(P1, (A).w[1], B); \
564 __mul_64x256_to_320(P2, (A).w[2], B); \
565 __mul_64x256_to_320(P3, (A).w[3], B); \
566 (P).w[0] = P0.w[0]; \
567 __add_carry_out((P).w[1],CY,P1.w[0],P0.w[1]); \
568 __add_carry_in_out((P).w[2],CY,P1.w[1],P0.w[2],CY); \
569 __add_carry_in_out((P).w[3],CY,P1.w[2],P0.w[3],CY); \
570 __add_carry_in_out((P).w[4],CY,P1.w[3],P0.w[4],CY); \
571 (P).w[5] = P1.w[4] + CY; \
572 __add_carry_out((P).w[2],CY,P2.w[0],(P).w[2]); \
573 __add_carry_in_out((P).w[3],CY,P2.w[1],(P).w[3],CY); \
574 __add_carry_in_out((P).w[4],CY,P2.w[2],(P).w[4],CY); \
575 __add_carry_in_out((P).w[5],CY,P2.w[3],(P).w[5],CY); \
576 (P).w[6] = P2.w[4] + CY; \
577 __add_carry_out((P).w[3],CY,P3.w[0],(P).w[3]); \
578 __add_carry_in_out((P).w[4],CY,P3.w[1],(P).w[4],CY); \
579 __add_carry_in_out((P).w[5],CY,P3.w[2],(P).w[5],CY); \
580 __add_carry_in_out((P).w[6],CY,P3.w[3],(P).w[6],CY); \
581 (P).w[7] = P3.w[4] + CY; \
583 #define __mul_192x256_to_448(P, A, B) \
587 __mul_64x256_to_320(P0, (A).w[0], B); \
588 __mul_64x256_to_320(P1, (A).w[1], B); \
589 __mul_64x256_to_320(P2, (A).w[2], B); \
590 (P).w[0] = P0.w[0]; \
591 __add_carry_out((P).w[1],CY,P1.w[0],P0.w[1]); \
592 __add_carry_in_out((P).w[2],CY,P1.w[1],P0.w[2],CY); \
593 __add_carry_in_out((P).w[3],CY,P1.w[2],P0.w[3],CY); \
594 __add_carry_in_out((P).w[4],CY,P1.w[3],P0.w[4],CY); \
595 (P).w[5] = P1.w[4] + CY; \
596 __add_carry_out((P).w[2],CY,P2.w[0],(P).w[2]); \
597 __add_carry_in_out((P).w[3],CY,P2.w[1],(P).w[3],CY); \
598 __add_carry_in_out((P).w[4],CY,P2.w[2],(P).w[4],CY); \
599 __add_carry_in_out((P).w[5],CY,P2.w[3],(P).w[5],CY); \
600 (P).w[6] = P2.w[4] + CY; \
602 #define __mul_320x320_to_640(P, A, B) \
604 UINT512 P0,P1,P2,P3; \
606 __mul_256x256_to_512((P), (A), B); \
607 __mul_64x256_to_320(P1, (A).w[4], B); \
608 __mul_64x256_to_320(P2, (B).w[4], A); \
609 __mul_64x64_to_128(P3, (A).w[4], (B).w[4]); \
610 __add_carry_out((P0).w[0],CY,P1.w[0],P2.w[0]); \
611 __add_carry_in_out((P0).w[1],CY,P1.w[1],P2.w[1],CY); \
612 __add_carry_in_out((P0).w[2],CY,P1.w[2],P2.w[2],CY); \
613 __add_carry_in_out((P0).w[3],CY,P1.w[3],P2.w[3],CY); \
614 __add_carry_in_out((P0).w[4],CY,P1.w[4],P2.w[4],CY); \
616 __add_carry_out((P).w[4],CY,(P).w[4],P0.w[0]); \
617 __add_carry_in_out((P).w[5],CY,(P).w[5],P0.w[1],CY); \
618 __add_carry_in_out((P).w[6],CY,(P).w[6],P0.w[2],CY); \
619 __add_carry_in_out((P).w[7],CY,(P).w[7],P0.w[3],CY); \
620 __add_carry_in_out((P).w[8],CY,P3.w[0],P0.w[4],CY); \
621 (P).w[9] = P3.w[1] + CY; \
623 #define __mul_384x384_to_768(P, A, B) \
625 UINT512 P0,P1,P2,P3; \
627 __mul_320x320_to_640((P), (A), B); \
628 __mul_64x320_to_384(P1, (A).w[5], B); \
629 __mul_64x320_to_384(P2, (B).w[5], A); \
630 __mul_64x64_to_128(P3, (A).w[5], (B).w[5]); \
631 __add_carry_out((P0).w[0],CY,P1.w[0],P2.w[0]); \
632 __add_carry_in_out((P0).w[1],CY,P1.w[1],P2.w[1],CY); \
633 __add_carry_in_out((P0).w[2],CY,P1.w[2],P2.w[2],CY); \
634 __add_carry_in_out((P0).w[3],CY,P1.w[3],P2.w[3],CY); \
635 __add_carry_in_out((P0).w[4],CY,P1.w[4],P2.w[4],CY); \
636 __add_carry_in_out((P0).w[5],CY,P1.w[5],P2.w[5],CY); \
638 __add_carry_out((P).w[5],CY,(P).w[5],P0.w[0]); \
639 __add_carry_in_out((P).w[6],CY,(P).w[6],P0.w[1],CY); \
640 __add_carry_in_out((P).w[7],CY,(P).w[7],P0.w[2],CY); \
641 __add_carry_in_out((P).w[8],CY,(P).w[8],P0.w[3],CY); \
642 __add_carry_in_out((P).w[9],CY,(P).w[9],P0.w[4],CY); \
643 __add_carry_in_out((P).w[10],CY,P3.w[0],P0.w[5],CY); \
644 (P).w[11] = P3.w[1] + CY; \
646 #define __mul_64x128_short(Ql, A, B) \
650 __mul_64x64_to_64(ALBH_L, (A),(B).w[1]); \
651 __mul_64x64_to_128((Ql), (A), (B).w[0]); \
653 (Ql).w[1] += ALBH_L; \
655 #define __scale128_10(D,_TMP) \
657 UINT128 _TMP2,_TMP8; \
658 _TMP2.w[1] = (_TMP.w[1]<<1)|(_TMP.w[0]>>63); \
659 _TMP2.w[0] = _TMP.w[0]<<1; \
660 _TMP8.w[1] = (_TMP.w[1]<<3)|(_TMP.w[0]>>61); \
661 _TMP8.w[0] = _TMP.w[0]<<3; \
662 __add_128_128(D, _TMP2, _TMP8); \
665 #define __mul_64x64_to_128MACH(P128, CX64, CY64) \
667 UINT64 CXH,CXL,CYH,CYL,PL,PH,PM,PM2; \
668 CXH = (CX64) >> 32; \
669 CXL = (UINT32)(CX64); \
670 CYH = (CY64) >> 32; \
671 CYL = (UINT32)(CY64); \
677 PM = (UINT64)((UINT32)PM)+PM2+(PL>>32); \
678 (P128).w[1] = PH + (PM>>32); \
679 (P128).w[0] = (PM<<32)+(UINT32)PL; \
682 #define __mul_64x64_to_128HIGH(P64, CX64, CY64) \
684 UINT64 CXH,CXL,CYH,CYL,PL,PH,PM,PM2; \
685 CXH = (CX64) >> 32; \
686 CXL = (UINT32)(CX64); \
687 CYH = (CY64) >> 32; \
688 CYL = (UINT32)(CY64); \
694 PM = (UINT64)((UINT32)PM)+PM2+(PL>>32); \
695 P64 = PH + (PM>>32); \
697 #define __mul_128x64_to_128(Q128, A64, B128) \
700 ALBH_L = (A64) * (B128).w[1]; \
701 __mul_64x64_to_128MACH((Q128), (A64), (B128).w[0]); \
702 (Q128).w[1] += ALBH_L; \
704 // might simplify by calculating just QM2.w[0]
705 #define __mul_64x128_to_128(Ql, A, B) \
707 UINT128 ALBL, ALBH, QM2; \
708 __mul_64x64_to_128(ALBH, (A), (B).w[1]); \
709 __mul_64x64_to_128(ALBL, (A), (B).w[0]); \
710 (Ql).w[0] = ALBL.w[0]; \
711 __add_128_64(QM2, ALBH, ALBL.w[1]); \
712 (Ql).w[1] = QM2.w[0]; \
714 /*********************************************************************
716 * BID Pack/Unpack Macros
718 *********************************************************************/
719 /////////////////////////////////////////
721 ////////////////////////////////////////
722 #define DECIMAL_MAX_EXPON_64 767
723 #define DECIMAL_EXPONENT_BIAS 398
724 #define MAX_FORMAT_DIGITS 16
725 /////////////////////////////////////////
726 // BID128 definitions
727 ////////////////////////////////////////
728 #define DECIMAL_MAX_EXPON_128 12287
729 #define DECIMAL_EXPONENT_BIAS_128 6176
730 #define MAX_FORMAT_DIGITS_128 34
731 /////////////////////////////////////////
733 ////////////////////////////////////////
734 #define DECIMAL_MAX_EXPON_32 191
735 #define DECIMAL_EXPONENT_BIAS_32 101
736 #define MAX_FORMAT_DIGITS_32 7
737 ////////////////////////////////////////
738 // Constant Definitions
739 ///////////////////////////////////////
740 #define SPECIAL_ENCODING_MASK64 0x6000000000000000ull
741 #define INFINITY_MASK64 0x7800000000000000ull
742 #define NAN_MASK64 0x7c00000000000000ull
743 #define SNAN_MASK64 0x7e00000000000000ull
744 #define QUIET_MASK64 0xfdffffffffffffffull
745 #define LARGE_COEFF_MASK64 0x0007ffffffffffffull
746 #define LARGE_COEFF_HIGH_BIT64 0x0020000000000000ull
747 #define SMALL_COEFF_MASK64 0x001fffffffffffffull
748 #define EXPONENT_MASK64 0x3ff
749 #define EXPONENT_SHIFT_LARGE64 51
750 #define EXPONENT_SHIFT_SMALL64 53
751 #define LARGEST_BID64 0x77fb86f26fc0ffffull
752 #define SMALLEST_BID64 0xf7fb86f26fc0ffffull
753 #define SMALL_COEFF_MASK128 0x0001ffffffffffffull
754 #define LARGE_COEFF_MASK128 0x00007fffffffffffull
755 #define EXPONENT_MASK128 0x3fff
756 #define LARGEST_BID128_HIGH 0x5fffed09bead87c0ull
757 #define LARGEST_BID128_LOW 0x378d8e63ffffffffull
758 #define SPECIAL_ENCODING_MASK32 0x60000000ul
759 #define INFINITY_MASK32 0x78000000ul
760 #define LARGE_COEFF_MASK32 0x007ffffful
761 #define LARGE_COEFF_HIGH_BIT32 0x00800000ul
762 #define SMALL_COEFF_MASK32 0x001ffffful
763 #define EXPONENT_MASK32 0xff
764 #define LARGEST_BID32 0x77f8967f
765 #define MASK_BINARY_EXPONENT 0x7ff0000000000000ull
766 #define BINARY_EXPONENT_BIAS 0x3ff
767 #define UPPER_EXPON_LIMIT 51
768 // data needed for BID pack/unpack macros
769 extern UINT64 __bid_round_const_table
[][19];
770 extern UINT128 __bid_reciprocals10_128
[];
771 extern int __bid_recip_scale
[];
772 extern UINT128 __bid_power10_table_128
[];
773 extern int __bid_estimate_decimal_digits
[];
774 extern int __bid_estimate_bin_expon
[];
775 extern UINT64 __bid_power10_index_binexp
[];
776 extern int __bid_short_recip_scale
[];
777 extern UINT64 __bid_reciprocals10_64
[];
778 extern UINT128 __bid_power10_index_binexp_128
[];
779 extern UINT128 __bid_round_const_table_128
[][36];
782 //////////////////////////////////////////////
783 // Status Flag Handling
784 /////////////////////////////////////////////
785 #define __set_status_flags(fpsc, status) *(fpsc) |= status
786 #define is_inexact(fpsc) ((*(fpsc))&INEXACT_EXCEPTION)
788 __BID_INLINE__ UINT64
789 unpack_BID64 (UINT64
* psign_x
, int *pexponent_x
,
790 UINT64
* pcoefficient_x
, UINT64 x
) {
793 *psign_x
= x
& 0x8000000000000000ull
;
795 if ((x
& SPECIAL_ENCODING_MASK64
) == SPECIAL_ENCODING_MASK64
) {
798 coeff
= (x
& LARGE_COEFF_MASK64
) | LARGE_COEFF_HIGH_BIT64
;
800 if ((x
& INFINITY_MASK64
) == INFINITY_MASK64
) {
802 // check for non-canonical values
803 if ((x
& LARGE_COEFF_MASK64
) >= 1000000000000000ull)
804 *pcoefficient_x
= x
& (~LARGE_COEFF_MASK64
);
805 return 0; // NaN or Infinity
807 // check for non-canonical values
808 if (coeff
>= 10000000000000000ull)
810 *pcoefficient_x
= coeff
;
812 tmp
= x
>> EXPONENT_SHIFT_LARGE64
;
813 *pexponent_x
= (int) (tmp
& EXPONENT_MASK64
);
817 tmp
= x
>> EXPONENT_SHIFT_SMALL64
;
818 *pexponent_x
= (int) (tmp
& EXPONENT_MASK64
);
820 *pcoefficient_x
= (x
& SMALL_COEFF_MASK64
);
822 return *pcoefficient_x
;
826 // BID64 pack macro (general form)
828 __BID_INLINE__ UINT64
829 get_BID64 (UINT64 sgn
, int expon
, UINT64 coeff
, int rmode
,
831 UINT128 Stemp
, Q_low
;
832 UINT64 QH
, r
, mask
, C64
, remainder_h
, CY
, carry
;
833 int extra_digits
, amount
, amount2
;
836 if (coeff
> 9999999999999999ull) {
838 coeff
= 1000000000000000ull;
840 // check for possible underflow/overflow
841 if (((unsigned) expon
) >= 3 * 256) {
844 if (expon
+ MAX_FORMAT_DIGITS
< 0) {
845 #ifdef SET_STATUS_FLAGS
846 __set_status_flags (fpsc
,
847 UNDERFLOW_EXCEPTION
| INEXACT_EXCEPTION
);
849 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
850 #ifndef IEEE_ROUND_NEAREST
851 if (rmode
== ROUNDING_DOWN
&& sgn
)
852 return 0x8000000000000001ull
;
853 if (rmode
== ROUNDING_UP
&& !sgn
)
860 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
861 #ifndef IEEE_ROUND_NEAREST
862 if (sgn
&& (unsigned) (rmode
- 1) < 2)
866 // get digits to be shifted out
867 extra_digits
= -expon
;
868 coeff
+= __bid_round_const_table
[rmode
][extra_digits
];
870 // get coeff*(2^M[extra_digits])/10^extra_digits
871 __mul_64x128_full (QH
, Q_low
, coeff
,
872 __bid_reciprocals10_128
[extra_digits
]);
874 // now get P/10^extra_digits: shift Q_high right by M[extra_digits]-128
875 amount
= __bid_recip_scale
[extra_digits
];
879 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
880 #ifndef IEEE_ROUND_NEAREST
881 if (rmode
== 0) //ROUNDING_TO_NEAREST
884 // check whether fractional part of initial_P/10^extra_digits is exactly .5
887 amount2
= 64 - amount
;
890 remainder_h
>>= amount2
;
891 remainder_h
= remainder_h
& QH
;
894 && (Q_low
.w
[1] < __bid_reciprocals10_128
[extra_digits
].w
[1]
895 || (Q_low
.w
[1] == __bid_reciprocals10_128
[extra_digits
].w
[1]
897 __bid_reciprocals10_128
[extra_digits
].w
[0]))) {
903 #ifdef SET_STATUS_FLAGS
905 if (is_inexact (fpsc
))
906 __set_status_flags (fpsc
, UNDERFLOW_EXCEPTION
);
908 status
= INEXACT_EXCEPTION
;
910 remainder_h
= QH
<< (64 - amount
);
913 case ROUNDING_TO_NEAREST
:
914 case ROUNDING_TIES_AWAY
:
915 // test whether fractional part is 0
916 if (remainder_h
== 0x8000000000000000ull
917 && (Q_low
.w
[1] < __bid_reciprocals10_128
[extra_digits
].w
[1]
918 || (Q_low
.w
[1] == __bid_reciprocals10_128
[extra_digits
].w
[1]
920 __bid_reciprocals10_128
[extra_digits
].w
[0])))
921 status
= EXACT_STATUS
;
924 case ROUNDING_TO_ZERO
:
926 && (Q_low
.w
[1] < __bid_reciprocals10_128
[extra_digits
].w
[1]
927 || (Q_low
.w
[1] == __bid_reciprocals10_128
[extra_digits
].w
[1]
929 __bid_reciprocals10_128
[extra_digits
].w
[0])))
930 status
= EXACT_STATUS
;
934 __add_carry_out (Stemp
.w
[0], CY
, Q_low
.w
[0],
935 __bid_reciprocals10_128
[extra_digits
].w
[0]);
936 __add_carry_in_out (Stemp
.w
[1], carry
, Q_low
.w
[1],
937 __bid_reciprocals10_128
[extra_digits
].w
[1], CY
);
938 if ((remainder_h
>> (64 - amount
)) + carry
>=
939 (((UINT64
) 1) << amount
))
940 status
= EXACT_STATUS
;
943 if (status
!= EXACT_STATUS
)
944 __set_status_flags (fpsc
, UNDERFLOW_EXCEPTION
| status
);
951 while (coeff
< 1000000000000000ull && expon
>= 3 * 256) {
953 coeff
= (coeff
<< 3) + (coeff
<< 1);
955 if (expon
> DECIMAL_MAX_EXPON_64
) {
956 #ifdef SET_STATUS_FLAGS
957 __set_status_flags (fpsc
, OVERFLOW_EXCEPTION
| INEXACT_EXCEPTION
);
960 r
= sgn
| INFINITY_MASK64
;
966 case ROUNDING_TO_ZERO
:
967 r
= sgn
| LARGEST_BID64
;
979 mask
<<= EXPONENT_SHIFT_SMALL64
;
981 // check whether coefficient fits in 10*5+3 bits
984 r
<<= EXPONENT_SHIFT_SMALL64
;
990 // eliminate the case coeff==10^16 after rounding
991 if (coeff
== 10000000000000000ull) {
993 r
<<= EXPONENT_SHIFT_SMALL64
;
994 r
|= (1000000000000000ull | sgn
);
999 r
<<= EXPONENT_SHIFT_LARGE64
;
1000 r
|= (sgn
| SPECIAL_ENCODING_MASK64
);
1001 // add coeff, without leading bits
1002 mask
= (mask
>> 2) - 1;
1013 // No overflow/underflow checking
1015 __BID_INLINE__ UINT64
1016 fast_get_BID64 (UINT64 sgn
, int expon
, UINT64 coeff
) {
1020 mask
<<= EXPONENT_SHIFT_SMALL64
;
1022 // check whether coefficient fits in 10*5+3 bits
1025 r
<<= EXPONENT_SHIFT_SMALL64
;
1031 // eliminate the case coeff==10^16 after rounding
1032 if (coeff
== 10000000000000000ull) {
1034 r
<<= EXPONENT_SHIFT_SMALL64
;
1035 r
|= (1000000000000000ull | sgn
);
1040 r
<<= EXPONENT_SHIFT_LARGE64
;
1041 r
|= (sgn
| SPECIAL_ENCODING_MASK64
);
1042 // add coeff, without leading bits
1043 mask
= (mask
>> 2) - 1;
1052 // no underflow checking
1054 __BID_INLINE__ UINT64
1055 fast_get_BID64_check_OF (UINT64 sgn
, int expon
, UINT64 coeff
, int rmode
,
1059 if (((unsigned) expon
) >= 3 * 256 - 1) {
1060 if ((expon
== 3 * 256 - 1) && coeff
== 10000000000000000ull) {
1062 coeff
= 1000000000000000ull;
1065 if (((unsigned) expon
) >= 3 * 256) {
1066 while (coeff
< 1000000000000000ull && expon
>= 3 * 256) {
1068 coeff
= (coeff
<< 3) + (coeff
<< 1);
1070 if (expon
> DECIMAL_MAX_EXPON_64
) {
1071 #ifdef SET_STATUS_FLAGS
1072 __set_status_flags (fpsc
,
1073 OVERFLOW_EXCEPTION
| INEXACT_EXCEPTION
);
1076 r
= sgn
| INFINITY_MASK64
;
1082 case ROUNDING_TO_ZERO
:
1083 r
= sgn
| LARGEST_BID64
;
1096 mask
<<= EXPONENT_SHIFT_SMALL64
;
1098 // check whether coefficient fits in 10*5+3 bits
1101 r
<<= EXPONENT_SHIFT_SMALL64
;
1107 // eliminate the case coeff==10^16 after rounding
1108 if (coeff
== 10000000000000000ull) {
1110 r
<<= EXPONENT_SHIFT_SMALL64
;
1111 r
|= (1000000000000000ull | sgn
);
1116 r
<<= EXPONENT_SHIFT_LARGE64
;
1117 r
|= (sgn
| SPECIAL_ENCODING_MASK64
);
1118 // add coeff, without leading bits
1119 mask
= (mask
>> 2) - 1;
1128 // No overflow/underflow checking
1129 // or checking for coefficients equal to 10^16 (after rounding)
1131 __BID_INLINE__ UINT64
1132 very_fast_get_BID64 (UINT64 sgn
, int expon
, UINT64 coeff
) {
1136 mask
<<= EXPONENT_SHIFT_SMALL64
;
1138 // check whether coefficient fits in 10*5+3 bits
1141 r
<<= EXPONENT_SHIFT_SMALL64
;
1147 r
<<= EXPONENT_SHIFT_LARGE64
;
1148 r
|= (sgn
| SPECIAL_ENCODING_MASK64
);
1149 // add coeff, without leading bits
1150 mask
= (mask
>> 2) - 1;
1158 // No overflow/underflow checking or checking for coefficients above 2^53
1160 __BID_INLINE__ UINT64
1161 very_fast_get_BID64_small_mantissa (UINT64 sgn
, int expon
, UINT64 coeff
) {
1166 r
<<= EXPONENT_SHIFT_SMALL64
;
1173 // This pack macro is used when underflow is known to occur
1175 __BID_INLINE__ UINT64
1176 get_BID64_UF (UINT64 sgn
, int expon
, UINT64 coeff
, UINT64 R
, int rmode
,
1178 UINT128 C128
, Q_low
, Stemp
;
1179 UINT64 C64
, remainder_h
, QH
, carry
, CY
;
1180 int extra_digits
, amount
, amount2
;
1184 if (expon
+ MAX_FORMAT_DIGITS
< 0) {
1185 #ifdef SET_STATUS_FLAGS
1186 __set_status_flags (fpsc
, UNDERFLOW_EXCEPTION
| INEXACT_EXCEPTION
);
1188 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
1189 #ifndef IEEE_ROUND_NEAREST
1190 if (rmode
== ROUNDING_DOWN
&& sgn
)
1191 return 0x8000000000000001ull
;
1192 if (rmode
== ROUNDING_UP
&& !sgn
)
1200 coeff
= (coeff
<< 3) + (coeff
<< 1);
1201 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
1202 #ifndef IEEE_ROUND_NEAREST
1203 if (sgn
&& (unsigned) (rmode
- 1) < 2)
1209 // get digits to be shifted out
1210 extra_digits
= 1 - expon
;
1211 C128
.w
[0] = coeff
+ __bid_round_const_table
[rmode
][extra_digits
];
1213 // get coeff*(2^M[extra_digits])/10^extra_digits
1214 __mul_64x128_full (QH
, Q_low
, C128
.w
[0],
1215 __bid_reciprocals10_128
[extra_digits
]);
1217 // now get P/10^extra_digits: shift Q_high right by M[extra_digits]-128
1218 amount
= __bid_recip_scale
[extra_digits
];
1221 //__shr_128(C128, Q_high, amount);
1223 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
1224 #ifndef IEEE_ROUND_NEAREST
1225 if (rmode
== 0) //ROUNDING_TO_NEAREST
1228 // check whether fractional part of initial_P/10^extra_digits is exactly .5
1231 amount2
= 64 - amount
;
1234 remainder_h
>>= amount2
;
1235 remainder_h
= remainder_h
& QH
;
1238 && (Q_low
.w
[1] < __bid_reciprocals10_128
[extra_digits
].w
[1]
1239 || (Q_low
.w
[1] == __bid_reciprocals10_128
[extra_digits
].w
[1]
1241 __bid_reciprocals10_128
[extra_digits
].w
[0]))) {
1247 #ifdef SET_STATUS_FLAGS
1249 if (is_inexact (fpsc
))
1250 __set_status_flags (fpsc
, UNDERFLOW_EXCEPTION
);
1252 status
= INEXACT_EXCEPTION
;
1254 remainder_h
= QH
<< (64 - amount
);
1257 case ROUNDING_TO_NEAREST
:
1258 case ROUNDING_TIES_AWAY
:
1259 // test whether fractional part is 0
1260 if (remainder_h
== 0x8000000000000000ull
1261 && (Q_low
.w
[1] < __bid_reciprocals10_128
[extra_digits
].w
[1]
1262 || (Q_low
.w
[1] == __bid_reciprocals10_128
[extra_digits
].w
[1]
1264 __bid_reciprocals10_128
[extra_digits
].w
[0])))
1265 status
= EXACT_STATUS
;
1268 case ROUNDING_TO_ZERO
:
1270 && (Q_low
.w
[1] < __bid_reciprocals10_128
[extra_digits
].w
[1]
1271 || (Q_low
.w
[1] == __bid_reciprocals10_128
[extra_digits
].w
[1]
1273 __bid_reciprocals10_128
[extra_digits
].w
[0])))
1274 status
= EXACT_STATUS
;
1278 __add_carry_out (Stemp
.w
[0], CY
, Q_low
.w
[0],
1279 __bid_reciprocals10_128
[extra_digits
].w
[0]);
1280 __add_carry_in_out (Stemp
.w
[1], carry
, Q_low
.w
[1],
1281 __bid_reciprocals10_128
[extra_digits
].w
[1], CY
);
1282 if ((remainder_h
>> (64 - amount
)) + carry
>=
1283 (((UINT64
) 1) << amount
))
1284 status
= EXACT_STATUS
;
1287 if (status
!= EXACT_STATUS
)
1288 __set_status_flags (fpsc
, UNDERFLOW_EXCEPTION
| status
);
1300 // This pack macro doesnot check for coefficients above 2^53
1302 __BID_INLINE__ UINT64
1303 get_BID64_small_mantissa (UINT64 sgn
, int expon
, UINT64 coeff
,
1304 int rmode
, unsigned *fpsc
) {
1305 UINT128 C128
, Q_low
, Stemp
;
1306 UINT64 r
, mask
, C64
, remainder_h
, QH
, carry
, CY
;
1307 int extra_digits
, amount
, amount2
;
1310 // check for possible underflow/overflow
1311 if (((unsigned) expon
) >= 3 * 256) {
1314 if (expon
+ MAX_FORMAT_DIGITS
< 0) {
1315 #ifdef SET_STATUS_FLAGS
1316 __set_status_flags (fpsc
,
1317 UNDERFLOW_EXCEPTION
| INEXACT_EXCEPTION
);
1319 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
1320 #ifndef IEEE_ROUND_NEAREST
1321 if (rmode
== ROUNDING_DOWN
&& sgn
)
1322 return 0x8000000000000001ull
;
1323 if (rmode
== ROUNDING_UP
&& !sgn
)
1330 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
1331 #ifndef IEEE_ROUND_NEAREST
1332 if (sgn
&& (unsigned) (rmode
- 1) < 2)
1336 // get digits to be shifted out
1337 extra_digits
= -expon
;
1338 C128
.w
[0] = coeff
+ __bid_round_const_table
[rmode
][extra_digits
];
1340 // get coeff*(2^M[extra_digits])/10^extra_digits
1341 __mul_64x128_full (QH
, Q_low
, C128
.w
[0],
1342 __bid_reciprocals10_128
[extra_digits
]);
1344 // now get P/10^extra_digits: shift Q_high right by M[extra_digits]-128
1345 amount
= __bid_recip_scale
[extra_digits
];
1349 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
1350 #ifndef IEEE_ROUND_NEAREST
1351 if (rmode
== 0) //ROUNDING_TO_NEAREST
1354 // check whether fractional part of initial_P/10^extra_digits is exactly .5
1357 amount2
= 64 - amount
;
1360 remainder_h
>>= amount2
;
1361 remainder_h
= remainder_h
& QH
;
1364 && (Q_low
.w
[1] < __bid_reciprocals10_128
[extra_digits
].w
[1]
1365 || (Q_low
.w
[1] == __bid_reciprocals10_128
[extra_digits
].w
[1]
1367 __bid_reciprocals10_128
[extra_digits
].w
[0]))) {
1373 #ifdef SET_STATUS_FLAGS
1375 if (is_inexact (fpsc
))
1376 __set_status_flags (fpsc
, UNDERFLOW_EXCEPTION
);
1378 status
= INEXACT_EXCEPTION
;
1380 remainder_h
= QH
<< (64 - amount
);
1383 case ROUNDING_TO_NEAREST
:
1384 case ROUNDING_TIES_AWAY
:
1385 // test whether fractional part is 0
1386 if (remainder_h
== 0x8000000000000000ull
1387 && (Q_low
.w
[1] < __bid_reciprocals10_128
[extra_digits
].w
[1]
1388 || (Q_low
.w
[1] == __bid_reciprocals10_128
[extra_digits
].w
[1]
1390 __bid_reciprocals10_128
[extra_digits
].w
[0])))
1391 status
= EXACT_STATUS
;
1394 case ROUNDING_TO_ZERO
:
1396 && (Q_low
.w
[1] < __bid_reciprocals10_128
[extra_digits
].w
[1]
1397 || (Q_low
.w
[1] == __bid_reciprocals10_128
[extra_digits
].w
[1]
1399 __bid_reciprocals10_128
[extra_digits
].w
[0])))
1400 status
= EXACT_STATUS
;
1404 __add_carry_out (Stemp
.w
[0], CY
, Q_low
.w
[0],
1405 __bid_reciprocals10_128
[extra_digits
].w
[0]);
1406 __add_carry_in_out (Stemp
.w
[1], carry
, Q_low
.w
[1],
1407 __bid_reciprocals10_128
[extra_digits
].w
[1], CY
);
1408 if ((remainder_h
>> (64 - amount
)) + carry
>=
1409 (((UINT64
) 1) << amount
))
1410 status
= EXACT_STATUS
;
1413 if (status
!= EXACT_STATUS
)
1414 __set_status_flags (fpsc
, UNDERFLOW_EXCEPTION
| status
);
1422 while (coeff
< 1000000000000000ull && expon
>= 3 * 256) {
1424 coeff
= (coeff
<< 3) + (coeff
<< 1);
1426 if (expon
> DECIMAL_MAX_EXPON_64
) {
1427 #ifdef SET_STATUS_FLAGS
1428 __set_status_flags (fpsc
, OVERFLOW_EXCEPTION
| INEXACT_EXCEPTION
);
1431 r
= sgn
| INFINITY_MASK64
;
1437 case ROUNDING_TO_ZERO
:
1438 r
= sgn
| LARGEST_BID64
;
1448 mask
<<= EXPONENT_SHIFT_SMALL64
;
1449 if (coeff
>= mask
) {
1451 r
<<= EXPONENT_SHIFT_LARGE64
;
1452 r
|= (sgn
| SPECIAL_ENCODING_MASK64
);
1453 // add coeff, without leading bits
1454 mask
= (mask
>> 2) - 1;
1463 r
<<= EXPONENT_SHIFT_SMALL64
;
1470 /*****************************************************************************
1472 * BID128 pack/unpack macros
1474 *****************************************************************************/
1477 // Macro for handling BID128 underflow
1478 // sticky bit given as additional argument
1480 __BID_INLINE__ UINT128
*
1481 handle_UF_128_rem (UINT128
* pres
, UINT64 sgn
, int expon
, UINT128 CQ
,
1482 UINT64 R
, unsigned *prounding_mode
, unsigned *fpsc
) {
1483 UINT128 T128
, TP128
, Qh
, Ql
, Qh1
, Stemp
, Tmp
, Tmp1
, CQ2
, CQ8
;
1486 unsigned rmode
, status
;
1489 if (expon
+ MAX_FORMAT_DIGITS_128
< 0) {
1490 #ifdef SET_STATUS_FLAGS
1491 __set_status_flags (fpsc
, UNDERFLOW_EXCEPTION
| INEXACT_EXCEPTION
);
1495 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
1496 #ifndef IEEE_ROUND_NEAREST
1497 if ((sgn
&& *prounding_mode
== ROUNDING_DOWN
)
1498 || (!sgn
&& *prounding_mode
== ROUNDING_UP
))
1505 CQ2
.w
[1] = (CQ
.w
[1] << 1) | (CQ
.w
[0] >> 63);
1506 CQ2
.w
[0] = CQ
.w
[0] << 1;
1507 CQ8
.w
[1] = (CQ
.w
[1] << 3) | (CQ
.w
[0] >> 61);
1508 CQ8
.w
[0] = CQ
.w
[0] << 3;
1509 __add_128_128 (CQ
, CQ2
, CQ8
);
1516 // add rounding constant to CQ
1517 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
1518 #ifndef IEEE_ROUND_NEAREST
1519 rmode
= *prounding_mode
;
1520 if (sgn
&& (unsigned) (rmode
- 1) < 2)
1528 T128
= __bid_round_const_table_128
[rmode
][ed2
];
1529 __add_carry_out (CQ
.w
[0], carry
, T128
.w
[0], CQ
.w
[0]);
1530 CQ
.w
[1] = CQ
.w
[1] + T128
.w
[1] + carry
;
1532 TP128
= __bid_reciprocals10_128
[ed2
];
1533 __mul_128x128_full (Qh
, Ql
, CQ
, TP128
);
1534 amount
= __bid_recip_scale
[ed2
];
1537 CQ
.w
[0] = Qh
.w
[1] >> (amount
- 64);
1540 __shr_128 (CQ
, Qh
, amount
);
1545 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
1546 #ifndef IEEE_ROUND_NEAREST
1547 if (!(*prounding_mode
))
1550 // check whether fractional part of initial_P/10^ed1 is exactly .5
1553 __shl_128_long (Qh1
, Qh
, (128 - amount
));
1555 if (!Qh1
.w
[1] && !Qh1
.w
[0]
1556 && (Ql
.w
[1] < __bid_reciprocals10_128
[ed2
].w
[1]
1557 || (Ql
.w
[1] == __bid_reciprocals10_128
[ed2
].w
[1]
1558 && Ql
.w
[0] < __bid_reciprocals10_128
[ed2
].w
[0]))) {
1564 #ifdef SET_STATUS_FLAGS
1566 if (is_inexact (fpsc
))
1567 __set_status_flags (fpsc
, UNDERFLOW_EXCEPTION
);
1569 status
= INEXACT_EXCEPTION
;
1571 __shl_128_long (Qh1
, Qh
, (128 - amount
));
1574 case ROUNDING_TO_NEAREST
:
1575 case ROUNDING_TIES_AWAY
:
1576 // test whether fractional part is 0
1577 if (Qh1
.w
[1] == 0x8000000000000000ull
&& (!Qh1
.w
[0])
1578 && (Ql
.w
[1] < __bid_reciprocals10_128
[ed2
].w
[1]
1579 || (Ql
.w
[1] == __bid_reciprocals10_128
[ed2
].w
[1]
1580 && Ql
.w
[0] < __bid_reciprocals10_128
[ed2
].w
[0])))
1581 status
= EXACT_STATUS
;
1584 case ROUNDING_TO_ZERO
:
1585 if ((!Qh1
.w
[1]) && (!Qh1
.w
[0])
1586 && (Ql
.w
[1] < __bid_reciprocals10_128
[ed2
].w
[1]
1587 || (Ql
.w
[1] == __bid_reciprocals10_128
[ed2
].w
[1]
1588 && Ql
.w
[0] < __bid_reciprocals10_128
[ed2
].w
[0])))
1589 status
= EXACT_STATUS
;
1593 __add_carry_out (Stemp
.w
[0], CY
, Ql
.w
[0],
1594 __bid_reciprocals10_128
[ed2
].w
[0]);
1595 __add_carry_in_out (Stemp
.w
[1], carry
, Ql
.w
[1],
1596 __bid_reciprocals10_128
[ed2
].w
[1], CY
);
1597 __shr_128_long (Qh
, Qh1
, (128 - amount
));
1600 __shl_128_long (Tmp1
, Tmp
, amount
);
1602 if (Qh
.w
[0] < carry
)
1604 if (__unsigned_compare_ge_128 (Qh
, Tmp1
))
1605 status
= EXACT_STATUS
;
1608 if (status
!= EXACT_STATUS
)
1609 __set_status_flags (fpsc
, UNDERFLOW_EXCEPTION
| status
);
1614 pres
->w
[1] = sgn
| CQ
.w
[1];
1615 pres
->w
[0] = CQ
.w
[0];
1623 // Macro for handling BID128 underflow
1625 __BID_INLINE__ UINT128
*
1626 handle_UF_128 (UINT128
* pres
, UINT64 sgn
, int expon
, UINT128 CQ
,
1627 unsigned *prounding_mode
, unsigned *fpsc
) {
1628 UINT128 T128
, TP128
, Qh
, Ql
, Qh1
, Stemp
, Tmp
, Tmp1
;
1631 unsigned rmode
, status
;
1634 if (expon
+ MAX_FORMAT_DIGITS_128
< 0) {
1635 #ifdef SET_STATUS_FLAGS
1636 __set_status_flags (fpsc
, UNDERFLOW_EXCEPTION
| INEXACT_EXCEPTION
);
1640 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
1641 #ifndef IEEE_ROUND_NEAREST
1642 if ((sgn
&& *prounding_mode
== ROUNDING_DOWN
)
1643 || (!sgn
&& *prounding_mode
== ROUNDING_UP
))
1651 // add rounding constant to CQ
1652 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
1653 #ifndef IEEE_ROUND_NEAREST
1654 rmode
= *prounding_mode
;
1655 if (sgn
&& (unsigned) (rmode
- 1) < 2)
1664 T128
= __bid_round_const_table_128
[rmode
][ed2
];
1665 __add_carry_out (CQ
.w
[0], carry
, T128
.w
[0], CQ
.w
[0]);
1666 CQ
.w
[1] = CQ
.w
[1] + T128
.w
[1] + carry
;
1668 TP128
= __bid_reciprocals10_128
[ed2
];
1669 __mul_128x128_full (Qh
, Ql
, CQ
, TP128
);
1670 amount
= __bid_recip_scale
[ed2
];
1673 CQ
.w
[0] = Qh
.w
[1] >> (amount
- 64);
1676 __shr_128 (CQ
, Qh
, amount
);
1681 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
1682 #ifndef IEEE_ROUND_NEAREST
1683 if (!(*prounding_mode
))
1686 // check whether fractional part of initial_P/10^ed1 is exactly .5
1689 __shl_128_long (Qh1
, Qh
, (128 - amount
));
1691 if (!Qh1
.w
[1] && !Qh1
.w
[0]
1692 && (Ql
.w
[1] < __bid_reciprocals10_128
[ed2
].w
[1]
1693 || (Ql
.w
[1] == __bid_reciprocals10_128
[ed2
].w
[1]
1694 && Ql
.w
[0] < __bid_reciprocals10_128
[ed2
].w
[0]))) {
1700 #ifdef SET_STATUS_FLAGS
1702 if (is_inexact (fpsc
))
1703 __set_status_flags (fpsc
, UNDERFLOW_EXCEPTION
);
1705 status
= INEXACT_EXCEPTION
;
1707 __shl_128_long (Qh1
, Qh
, (128 - amount
));
1710 case ROUNDING_TO_NEAREST
:
1711 case ROUNDING_TIES_AWAY
:
1712 // test whether fractional part is 0
1713 if (Qh1
.w
[1] == 0x8000000000000000ull
&& (!Qh1
.w
[0])
1714 && (Ql
.w
[1] < __bid_reciprocals10_128
[ed2
].w
[1]
1715 || (Ql
.w
[1] == __bid_reciprocals10_128
[ed2
].w
[1]
1716 && Ql
.w
[0] < __bid_reciprocals10_128
[ed2
].w
[0])))
1717 status
= EXACT_STATUS
;
1720 case ROUNDING_TO_ZERO
:
1721 if ((!Qh1
.w
[1]) && (!Qh1
.w
[0])
1722 && (Ql
.w
[1] < __bid_reciprocals10_128
[ed2
].w
[1]
1723 || (Ql
.w
[1] == __bid_reciprocals10_128
[ed2
].w
[1]
1724 && Ql
.w
[0] < __bid_reciprocals10_128
[ed2
].w
[0])))
1725 status
= EXACT_STATUS
;
1729 __add_carry_out (Stemp
.w
[0], CY
, Ql
.w
[0],
1730 __bid_reciprocals10_128
[ed2
].w
[0]);
1731 __add_carry_in_out (Stemp
.w
[1], carry
, Ql
.w
[1],
1732 __bid_reciprocals10_128
[ed2
].w
[1], CY
);
1733 __shr_128_long (Qh
, Qh1
, (128 - amount
));
1736 __shl_128_long (Tmp1
, Tmp
, amount
);
1738 if (Qh
.w
[0] < carry
)
1740 if (__unsigned_compare_ge_128 (Qh
, Tmp1
))
1741 status
= EXACT_STATUS
;
1744 if (status
!= EXACT_STATUS
)
1745 __set_status_flags (fpsc
, UNDERFLOW_EXCEPTION
| status
);
1750 pres
->w
[1] = sgn
| CQ
.w
[1];
1751 pres
->w
[0] = CQ
.w
[0];
1760 // BID128 unpack, input passed by value
1762 __BID_INLINE__ UINT64
1763 unpack_BID128_value (UINT64
* psign_x
, int *pexponent_x
,
1764 UINT128
* pcoefficient_x
, UINT128 x
) {
1765 UINT128 coeff
, T33
, T34
;
1768 *psign_x
= (x
.w
[1]) & 0x8000000000000000ull
;
1770 // special encodings
1771 if ((x
.w
[1] & INFINITY_MASK64
) >= SPECIAL_ENCODING_MASK64
) {
1772 if ((x
.w
[1] & INFINITY_MASK64
) < INFINITY_MASK64
) {
1773 // non-canonical input
1774 pcoefficient_x
->w
[0] = 0;
1775 pcoefficient_x
->w
[1] = 0;
1776 ex
= (x
.w
[1]) >> 47;
1777 *pexponent_x
= ((int) ex
) & EXPONENT_MASK128
;
1781 T33
= __bid_power10_table_128
[33];
1782 coeff
.w
[0] = x
.w
[0];
1783 coeff
.w
[1] = (x
.w
[1]) & LARGE_COEFF_MASK128
;
1784 pcoefficient_x
->w
[0] = x
.w
[0];
1785 pcoefficient_x
->w
[1] = x
.w
[1];
1786 if (__unsigned_compare_ge_128 (coeff
, T33
)) // non-canonical
1787 pcoefficient_x
->w
[1] &= (~LARGE_COEFF_MASK128
);
1788 return 0; // NaN or Infinity
1791 coeff
.w
[0] = x
.w
[0];
1792 coeff
.w
[1] = (x
.w
[1]) & SMALL_COEFF_MASK128
;
1795 T34
= __bid_power10_table_128
[34];
1796 // check for non-canonical values
1797 if (__unsigned_compare_ge_128 (coeff
, T34
))
1798 coeff
.w
[0] = coeff
.w
[1] = 0;
1800 pcoefficient_x
->w
[0] = coeff
.w
[0];
1801 pcoefficient_x
->w
[1] = coeff
.w
[1];
1803 ex
= (x
.w
[1]) >> 49;
1804 *pexponent_x
= ((int) ex
) & EXPONENT_MASK128
;
1806 return coeff
.w
[0] | coeff
.w
[1];
1811 // BID128 unpack, input pased by reference
1813 __BID_INLINE__ UINT64
1814 unpack_BID128 (UINT64
* psign_x
, int *pexponent_x
,
1815 UINT128
* pcoefficient_x
, UINT128
* px
) {
1816 UINT128 coeff
, T33
, T34
;
1819 *psign_x
= (px
->w
[1]) & 0x8000000000000000ull
;
1821 // special encodings
1822 if ((px
->w
[1] & INFINITY_MASK64
) >= SPECIAL_ENCODING_MASK64
) {
1823 if ((px
->w
[1] & INFINITY_MASK64
) < INFINITY_MASK64
) {
1824 // non-canonical input
1825 pcoefficient_x
->w
[0] = 0;
1826 pcoefficient_x
->w
[1] = 0;
1827 ex
= (px
->w
[1]) >> 47;
1828 *pexponent_x
= ((int) ex
) & EXPONENT_MASK128
;
1832 T33
= __bid_power10_table_128
[33];
1833 coeff
.w
[0] = px
->w
[0];
1834 coeff
.w
[1] = (px
->w
[1]) & LARGE_COEFF_MASK128
;
1835 pcoefficient_x
->w
[0] = px
->w
[0];
1836 pcoefficient_x
->w
[1] = px
->w
[1];
1837 if (__unsigned_compare_ge_128 (coeff
, T33
)) // non-canonical
1838 pcoefficient_x
->w
[1] &= (~LARGE_COEFF_MASK128
);
1839 return 0; // NaN or Infinity
1842 coeff
.w
[0] = px
->w
[0];
1843 coeff
.w
[1] = (px
->w
[1]) & SMALL_COEFF_MASK128
;
1846 T34
= __bid_power10_table_128
[34];
1847 // check for non-canonical values
1848 if (__unsigned_compare_ge_128 (coeff
, T34
))
1849 coeff
.w
[0] = coeff
.w
[1] = 0;
1851 pcoefficient_x
->w
[0] = coeff
.w
[0];
1852 pcoefficient_x
->w
[1] = coeff
.w
[1];
1854 ex
= (px
->w
[1]) >> 49;
1855 *pexponent_x
= ((int) ex
) & EXPONENT_MASK128
;
1857 return coeff
.w
[0] | coeff
.w
[1];
1861 // Pack macro checks for overflow, but not underflow
1863 __BID_INLINE__ UINT128
*
1864 get_BID128_very_fast_OF (UINT128
* pres
, UINT64 sgn
, int expon
,
1865 UINT128 coeff
, unsigned *prounding_mode
,
1870 if ((unsigned) expon
> DECIMAL_MAX_EXPON_128
) {
1872 if (expon
- MAX_FORMAT_DIGITS_128
<= DECIMAL_MAX_EXPON_128
) {
1873 T
= __bid_power10_table_128
[MAX_FORMAT_DIGITS_128
- 1];
1874 while (__unsigned_compare_gt_128 (T
, coeff
)
1875 && expon
> DECIMAL_MAX_EXPON_128
) {
1877 (coeff
.w
[1] << 3) + (coeff
.w
[1] << 1) + (coeff
.w
[0] >> 61) +
1879 tmp2
= coeff
.w
[0] << 3;
1880 coeff
.w
[0] = (coeff
.w
[0] << 1) + tmp2
;
1881 if (coeff
.w
[0] < tmp2
)
1887 if ((unsigned) expon
> DECIMAL_MAX_EXPON_128
) {
1889 #ifdef SET_STATUS_FLAGS
1890 __set_status_flags (fpsc
, OVERFLOW_EXCEPTION
| INEXACT_EXCEPTION
);
1892 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
1893 #ifndef IEEE_ROUND_NEAREST
1894 if (*prounding_mode
== ROUNDING_TO_ZERO
1895 || (sgn
&& *prounding_mode
== ROUNDING_UP
) || (!sgn
1901 pres
->w
[1] = sgn
| LARGEST_BID128_HIGH
;
1902 pres
->w
[0] = LARGEST_BID128_LOW
;
1907 pres
->w
[1] = sgn
| INFINITY_MASK64
;
1914 pres
->w
[0] = coeff
.w
[0];
1917 pres
->w
[1] = sgn
| tmp
| coeff
.w
[1];
1924 // No overflow/underflow checks
1925 // No checking for coefficient == 10^34 (rounding artifact)
1927 __BID_INLINE__ UINT128
*
1928 get_BID128_very_fast (UINT128
* pres
, UINT64 sgn
, int expon
,
1932 pres
->w
[0] = coeff
.w
[0];
1935 pres
->w
[1] = sgn
| tmp
| coeff
.w
[1];
1941 // No overflow/underflow checks
1943 __BID_INLINE__ UINT128
*
1944 get_BID128_fast (UINT128
* pres
, UINT64 sgn
, int expon
, UINT128 coeff
) {
1948 if (coeff
.w
[1] == 0x0001ed09bead87c0ull
1949 && coeff
.w
[0] == 0x378d8e6400000000ull
) {
1951 // set coefficient to 10^33
1952 coeff
.w
[1] = 0x0000314dc6448d93ull
;
1953 coeff
.w
[0] = 0x38c15b0a00000000ull
;
1956 pres
->w
[0] = coeff
.w
[0];
1959 pres
->w
[1] = sgn
| tmp
| coeff
.w
[1];
1965 // General BID128 pack macro
1967 __BID_INLINE__ UINT128
*
1968 get_BID128 (UINT128
* pres
, UINT64 sgn
, int expon
, UINT128 coeff
,
1969 unsigned *prounding_mode
, unsigned *fpsc
) {
1974 if (coeff
.w
[1] == 0x0001ed09bead87c0ull
1975 && coeff
.w
[0] == 0x378d8e6400000000ull
) {
1977 // set coefficient to 10^33
1978 coeff
.w
[1] = 0x0000314dc6448d93ull
;
1979 coeff
.w
[0] = 0x38c15b0a00000000ull
;
1982 if ((unsigned) expon
> DECIMAL_MAX_EXPON_128
) {
1985 return handle_UF_128 (pres
, sgn
, expon
, coeff
, prounding_mode
,
1988 if (expon
- MAX_FORMAT_DIGITS_128
<= DECIMAL_MAX_EXPON_128
) {
1989 T
= __bid_power10_table_128
[MAX_FORMAT_DIGITS_128
- 1];
1990 while (__unsigned_compare_gt_128 (T
, coeff
)
1991 && expon
> DECIMAL_MAX_EXPON_128
) {
1993 (coeff
.w
[1] << 3) + (coeff
.w
[1] << 1) + (coeff
.w
[0] >> 61) +
1995 tmp2
= coeff
.w
[0] << 3;
1996 coeff
.w
[0] = (coeff
.w
[0] << 1) + tmp2
;
1997 if (coeff
.w
[0] < tmp2
)
2003 if ((unsigned) expon
> DECIMAL_MAX_EXPON_128
) {
2005 #ifdef SET_STATUS_FLAGS
2006 __set_status_flags (fpsc
, OVERFLOW_EXCEPTION
| INEXACT_EXCEPTION
);
2008 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
2009 #ifndef IEEE_ROUND_NEAREST
2010 if (*prounding_mode
== ROUNDING_TO_ZERO
2011 || (sgn
&& *prounding_mode
== ROUNDING_UP
) || (!sgn
2017 pres
->w
[1] = sgn
| LARGEST_BID128_HIGH
;
2018 pres
->w
[0] = LARGEST_BID128_LOW
;
2023 pres
->w
[1] = sgn
| INFINITY_MASK64
;
2030 pres
->w
[0] = coeff
.w
[0];
2033 pres
->w
[1] = sgn
| tmp
| coeff
.w
[1];
2040 // Macro used for conversions from string
2041 // (no additional arguments given for rounding mode, status flags)
2043 __BID_INLINE__ UINT128
*
2044 get_BID128_string (UINT128
* pres
, UINT64 sgn
, int expon
, UINT128 coeff
) {
2047 unsigned rmode
= 0, status
;
2050 if (coeff
.w
[1] == 0x0001ed09bead87c0ull
2051 && coeff
.w
[0] == 0x378d8e6400000000ull
) {
2053 // set coefficient to 10^33
2054 coeff
.w
[1] = 0x0000314dc6448d93ull
;
2055 coeff
.w
[0] = 0x38c15b0a00000000ull
;
2058 if ((unsigned) expon
> DECIMAL_MAX_EXPON_128
) {
2061 return handle_UF_128 (pres
, sgn
, expon
, coeff
, &rmode
, &status
);
2065 if (expon
< DECIMAL_MAX_EXPON_128
+ 34) {
2066 while (expon
> DECIMAL_MAX_EXPON_128
&&
2067 (coeff
.w
[1] < __bid_power10_table_128
[33].w
[1] ||
2068 (coeff
.w
[1] == __bid_power10_table_128
[33].w
[1]
2069 && coeff
.w
[0] < __bid_power10_table_128
[33].w
[0]))) {
2070 D2
.w
[1] = (coeff
.w
[1] << 1) | (coeff
.w
[0] >> 63);
2071 D2
.w
[0] = coeff
.w
[0] << 1;
2072 D8
.w
[1] = (coeff
.w
[1] << 3) | (coeff
.w
[0] >> 61);
2073 D8
.w
[0] = coeff
.w
[0] << 3;
2075 __add_128_128 (coeff
, D2
, D8
);
2078 } else if (!(coeff
.w
[0] | coeff
.w
[1]))
2079 expon
= DECIMAL_MAX_EXPON_128
;
2081 if (expon
> DECIMAL_MAX_EXPON_128
) {
2082 pres
->w
[1] = sgn
| INFINITY_MASK64
;
2087 pres
->w
[1] = LARGEST_BID128_HIGH
;
2088 pres
->w
[0] = LARGEST_BID128_LOW
;
2091 case ROUNDING_TO_ZERO
:
2092 pres
->w
[1] = sgn
| LARGEST_BID128_HIGH
;
2093 pres
->w
[0] = LARGEST_BID128_LOW
;
2098 pres
->w
[1] = sgn
| LARGEST_BID128_HIGH
;
2099 pres
->w
[0] = LARGEST_BID128_LOW
;
2108 pres
->w
[0] = coeff
.w
[0];
2111 pres
->w
[1] = sgn
| tmp
| coeff
.w
[1];
2118 /*****************************************************************************
2120 * BID32 pack/unpack macros
2122 *****************************************************************************/
2125 __BID_INLINE__ UINT32
2126 unpack_BID32 (UINT32
* psign_x
, int *pexponent_x
,
2127 UINT32
* pcoefficient_x
, UINT32 x
) {
2130 *psign_x
= x
& 0x80000000;
2132 if ((x
& SPECIAL_ENCODING_MASK32
) == SPECIAL_ENCODING_MASK32
) {
2133 // special encodings
2134 if ((x
& INFINITY_MASK32
) == INFINITY_MASK32
)
2135 return 0; // NaN or Infinity
2138 *pcoefficient_x
= (x
& SMALL_COEFF_MASK32
) | LARGE_COEFF_HIGH_BIT32
;
2139 // check for non-canonical value
2140 if (*pcoefficient_x
>= 10000000)
2141 *pcoefficient_x
= 0;
2144 *pexponent_x
= tmp
& EXPONENT_MASK32
;
2149 *pexponent_x
= tmp
& EXPONENT_MASK32
;
2151 *pcoefficient_x
= (x
& LARGE_COEFF_MASK32
);
2153 return *pcoefficient_x
;
2157 // General pack macro for BID32
2159 __BID_INLINE__ UINT32
2160 get_BID32 (UINT32 sgn
, int expon
, UINT64 coeff
, int rmode
,
2163 UINT64 C64
, remainder_h
, carry
, Stemp
;
2165 int extra_digits
, amount
, amount2
;
2168 if (coeff
> 9999999ull) {
2172 // check for possible underflow/overflow
2173 if (((unsigned) expon
) > DECIMAL_MAX_EXPON_32
) {
2176 if (expon
+ MAX_FORMAT_DIGITS_32
< 0) {
2177 #ifdef SET_STATUS_FLAGS
2178 __set_status_flags (fpsc
,
2179 UNDERFLOW_EXCEPTION
| INEXACT_EXCEPTION
);
2181 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
2182 #ifndef IEEE_ROUND_NEAREST
2183 if (rmode
== ROUNDING_DOWN
&& sgn
)
2185 if (rmode
== ROUNDING_UP
&& !sgn
)
2192 // get digits to be shifted out
2193 #ifdef IEEE_ROUND_NEAREST_TIES_AWAY
2196 #ifdef IEEE_ROUND_NEAREST
2200 extra_digits
= -expon
;
2201 coeff
+= __bid_round_const_table
[rmode
][extra_digits
];
2203 // get coeff*(2^M[extra_digits])/10^extra_digits
2204 __mul_64x64_to_128 (Q
, coeff
, __bid_reciprocals10_64
[extra_digits
]);
2206 // now get P/10^extra_digits: shift Q_high right by M[extra_digits]-128
2207 amount
= __bid_short_recip_scale
[extra_digits
];
2209 C64
= Q
.w
[1] >> amount
;
2211 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
2212 #ifndef IEEE_ROUND_NEAREST
2213 if (rmode
== 0) //ROUNDING_TO_NEAREST
2216 // check whether fractional part of initial_P/10^extra_digits is exactly .5
2219 amount2
= 64 - amount
;
2222 remainder_h
>>= amount2
;
2223 remainder_h
= remainder_h
& Q
.w
[1];
2225 if (!remainder_h
&& (Q
.w
[0] < __bid_reciprocals10_64
[extra_digits
])) {
2231 #ifdef SET_STATUS_FLAGS
2233 if (is_inexact (fpsc
))
2234 __set_status_flags (fpsc
, UNDERFLOW_EXCEPTION
);
2236 status
= INEXACT_EXCEPTION
;
2238 remainder_h
= Q
.w
[1] << (64 - amount
);
2241 case ROUNDING_TO_NEAREST
:
2242 case ROUNDING_TIES_AWAY
:
2243 // test whether fractional part is 0
2244 if (remainder_h
== 0x8000000000000000ull
2245 && (Q
.w
[0] < __bid_reciprocals10_64
[extra_digits
]))
2246 status
= EXACT_STATUS
;
2249 case ROUNDING_TO_ZERO
:
2250 if (!remainder_h
&& (Q
.w
[0] < __bid_reciprocals10_64
[extra_digits
]))
2251 status
= EXACT_STATUS
;
2255 __add_carry_out (Stemp
, carry
, Q
.w
[0],
2256 __bid_reciprocals10_64
[extra_digits
]);
2257 if ((remainder_h
>> (64 - amount
)) + carry
>=
2258 (((UINT64
) 1) << amount
))
2259 status
= EXACT_STATUS
;
2262 if (status
!= EXACT_STATUS
)
2263 __set_status_flags (fpsc
, UNDERFLOW_EXCEPTION
| status
);
2268 return sgn
| (UINT32
) C64
;
2271 while (coeff
< 1000000 && expon
> DECIMAL_MAX_EXPON_32
) {
2272 coeff
= (coeff
<< 3) + (coeff
<< 1);
2275 if (((unsigned) expon
) > DECIMAL_MAX_EXPON_32
) {
2276 __set_status_flags (fpsc
, OVERFLOW_EXCEPTION
| INEXACT_EXCEPTION
);
2278 r
= sgn
| INFINITY_MASK32
;
2284 case ROUNDING_TO_ZERO
:
2285 r
= sgn
| LARGEST_BID32
;
2290 r
= sgn
| LARGEST_BID32
;
2298 // check whether coefficient fits in DECIMAL_COEFF_FIT bits
2302 r
|= ((UINT32
) coeff
| sgn
);
2309 r
|= (sgn
| SPECIAL_ENCODING_MASK32
);
2310 // add coeff, without leading bits
2311 mask
= (1 << 21) - 1;
2312 r
|= (((UINT32
) coeff
) & mask
);
2320 // no overflow/underflow checks
2322 __BID_INLINE__ UINT32
2323 very_fast_get_BID32 (UINT32 sgn
, int expon
, UINT32 coeff
) {
2328 // check whether coefficient fits in 10*2+3 bits
2338 r
|= (sgn
| SPECIAL_ENCODING_MASK32
);
2339 // add coeff, without leading bits
2340 mask
= (1 << 21) - 1;
2349 /*************************************************************
2351 *************************************************************/
2363 #define MASK_STEERING_BITS 0x6000000000000000ull
2364 #define MASK_BINARY_EXPONENT1 0x7fe0000000000000ull
2365 #define MASK_BINARY_SIG1 0x001fffffffffffffull
2366 #define MASK_BINARY_EXPONENT2 0x1ff8000000000000ull
2367 //used to take G[2:w+3] (sec 3.3)
2368 #define MASK_BINARY_SIG2 0x0007ffffffffffffull
2369 //used to mask out G4:T0 (sec 3.3)
2370 #define MASK_BINARY_OR2 0x0020000000000000ull
2371 //used to prefix 8+G4 to T (sec 3.3)
2372 #define UPPER_EXPON_LIMIT 51
2373 #define MASK_EXP 0x7ffe000000000000ull
2374 #define MASK_SPECIAL 0x7800000000000000ull
2375 #define MASK_NAN 0x7c00000000000000ull
2376 #define MASK_SNAN 0x7e00000000000000ull
2377 #define MASK_ANY_INF 0x7c00000000000000ull
2378 #define MASK_INF 0x7800000000000000ull
2379 #define MASK_SIGN 0x8000000000000000ull
2380 #define MASK_COEFF 0x0001ffffffffffffull
2381 #define BIN_EXP_BIAS (0x1820ull << 49)
2383 #define EXP_MIN 0x0000000000000000ull
2384 // EXP_MIN = (-6176 + 6176) << 49
2385 #define EXP_MAX 0x5ffe000000000000ull
2386 // EXP_MAX = (6111 + 6176) << 49
2387 #define EXP_MAX_P1 0x6000000000000000ull
2388 // EXP_MAX + 1 = (6111 + 6176 + 1) << 49
2389 #define EXP_P1 0x0002000000000000ull
2391 #define expmin -6176
2392 // min unbiased exponent
2394 // max unbiased exponent
2395 #define expmin16 -398
2396 // min unbiased exponent
2397 #define expmax16 369
2398 // max unbiased exponent
2400 #define SIGNMASK32 0x80000000
2401 #define BID64_SIG_MAX 0x002386F26FC0ffffull
2402 #define SIGNMASK64 0x8000000000000000ull
2404 // typedef unsigned int FPSC; // floating-point status and control
2438 #define ROUNDING_MODE_MASK 0x00007000
2440 typedef struct _DEC_DIGITS
{
2441 unsigned int digits
;
2442 UINT64 threshold_hi
;
2443 UINT64 threshold_lo
;
2444 unsigned int digits1
;
2447 extern DEC_DIGITS __bid_nr_digits
[];
2448 extern UINT64 __bid_midpoint64
[];
2449 extern UINT128 __bid_midpoint128
[];
2450 extern UINT192 __bid_midpoint192
[];
2451 extern UINT256 __bid_midpoint256
[];
2452 extern UINT64 __bid_ten2k64
[];
2453 extern UINT128 __bid_ten2k128
[];
2454 extern UINT256 __bid_ten2k256
[];
2455 extern UINT128 __bid_ten2mk128
[];
2456 extern UINT64 __bid_ten2mk64
[];
2457 extern UINT128 __bid_ten2mk128trunc
[];
2458 extern int __bid_shiftright128
[];
2459 extern UINT64 __bid_maskhigh128
[];
2460 extern UINT64 __bid_maskhigh128M
[];
2461 extern UINT64 __bid_maskhigh192M
[];
2462 extern UINT64 __bid_maskhigh256M
[];
2463 extern UINT64 __bid_one_half128
[];
2464 extern UINT64 __bid_one_half128M
[];
2465 extern UINT64 __bid_one_half192M
[];
2466 extern UINT64 __bid_one_half256M
[];
2467 extern UINT128 __bid_ten2mk128M
[];
2468 extern UINT128 __bid_ten2mk128truncM
[];
2469 extern UINT192 __bid_ten2mk192truncM
[];
2470 extern UINT256 __bid_ten2mk256truncM
[];
2471 extern int __bid_shiftright128M
[];
2472 extern int __bid_shiftright192M
[];
2473 extern int __bid_shiftright256M
[];
2474 extern UINT192 __bid_ten2mk192M
[];
2475 extern UINT256 __bid_ten2mk256M
[];
2476 extern unsigned char __bid_char_table2
[];
2477 extern unsigned char __bid_char_table3
[];
2479 extern UINT64 __bid_ten2m3k64
[];
2480 extern unsigned int __bid_shift_ten2m3k64
[];
2481 extern UINT128 __bid_ten2m3k128
[];
2482 extern unsigned int __bid_shift_ten2m3k128
[];
2486 /***************************************************************************
2487 *************** TABLES FOR GENERAL ROUNDING FUNCTIONS *********************
2488 ***************************************************************************/
2490 extern UINT64 __bid_Kx64
[];
2491 extern unsigned int __bid_Ex64m64
[];
2492 extern UINT64 __bid_half64
[];
2493 extern UINT64 __bid_mask64
[];
2494 extern UINT64 __bid_ten2mxtrunc64
[];
2496 extern UINT128 __bid_Kx128
[];
2497 extern unsigned int __bid_Ex128m128
[];
2498 extern UINT64 __bid_half128
[];
2499 extern UINT64 __bid_mask128
[];
2500 extern UINT128 __bid_ten2mxtrunc128
[];
2502 extern UINT192 __bid_Kx192
[];
2503 extern unsigned int __bid_Ex192m192
[];
2504 extern UINT64 __bid_half192
[];
2505 extern UINT64 __bid_mask192
[];
2506 extern UINT192 __bid_ten2mxtrunc192
[];
2508 extern UINT256 __bid_Kx256
[];
2509 extern unsigned int __bid_Ex256m256
[];
2510 extern UINT64 __bid_half256
[];
2511 extern UINT64 __bid_mask256
[];
2512 extern UINT256 __bid_ten2mxtrunc256
[];
2514 typedef union __bid64_128
{
2519 BID64_128
bid_fma (unsigned int P0
,
2520 BID64_128 x1
, unsigned int P1
,
2521 BID64_128 y1
, unsigned int P2
,
2522 BID64_128 z1
, unsigned int P3
,
2523 unsigned int rnd_mode
, FPSC
* fpsc
);
2528 union __int_double
{
2532 typedef union __int_double int_double
;
2539 typedef union __int_float int_float
;
2541 #define SWAP(A,B,T) {\
2547 // this macro will find coefficient_x to be in [2^A, 2^(A+1) )
2548 // ie it knows that it is A bits long
2549 #define NUMBITS(A, coefficient_x, tempx){\
2550 temp_x.d=(float)coefficient_x;\
2551 A=((tempx.i >>23) & EXPONENT_MASK32) - 0x7f;\
2569 typedef union { UINT32 ui32
; float f
; } BID_UI32FLOAT
;
2570 typedef union { UINT64 ui64
; double d
; } BID_UI64DOUBLE
;
2571 typedef union { UINT128 ui128
; long double ld
; } BID_UI128LONGDOUBLE
;