Merged with libbbid branch at revision 126349.
[gcc.git] / libgcc / config / libbid / bid_internal.h
1 /* Copyright (C) 2007 Free Software Foundation, Inc.
2
3 This file is part of GCC.
4
5 GCC is free software; you can redistribute it and/or modify it under
6 the terms of the GNU General Public License as published by the Free
7 Software Foundation; either version 2, or (at your option) any later
8 version.
9
10 In addition to the permissions in the GNU General Public License, the
11 Free Software Foundation gives you unlimited permission to link the
12 compiled version of this file into combinations with other programs,
13 and to distribute those combinations without any restriction coming
14 from the use of this file. (The General Public License restrictions
15 do apply in other respects; for example, they cover modification of
16 the file, and distribution when not linked into a combine
17 executable.)
18
19 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
20 WARRANTY; without even the implied warranty of MERCHANTABILITY or
21 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
22 for more details.
23
24 You should have received a copy of the GNU General Public License
25 along with GCC; see the file COPYING. If not, write to the Free
26 Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
27 02110-1301, USA. */
28
29 #ifndef __BIDECIMAL_H
30 #define __BIDECIMAL_H
31
32 #include "bid_conf.h"
33 #include "bid_functions.h"
34
35
36 #define __BID_INLINE__ static __inline
37
38 /*********************************************************************
39 *
40 * Logical Shift Macros
41 *
42 *********************************************************************/
43
44 #define __shr_128(Q, A, k) \
45 { \
46 (Q).w[0] = (A).w[0] >> k; \
47 (Q).w[0] |= (A).w[1] << (64-k); \
48 (Q).w[1] = (A).w[1] >> k; \
49 }
50
51 #define __shr_128_long(Q, A, k) \
52 { \
53 if((k)<64) { \
54 (Q).w[0] = (A).w[0] >> k; \
55 (Q).w[0] |= (A).w[1] << (64-k); \
56 (Q).w[1] = (A).w[1] >> k; \
57 } \
58 else { \
59 (Q).w[0] = (A).w[1]>>((k)-64); \
60 (Q).w[1] = 0; \
61 } \
62 }
63
64 #define __shl_128_long(Q, A, k) \
65 { \
66 if((k)<64) { \
67 (Q).w[1] = (A).w[1] << k; \
68 (Q).w[1] |= (A).w[0] >> (64-k); \
69 (Q).w[0] = (A).w[0] << k; \
70 } \
71 else { \
72 (Q).w[1] = (A).w[0]<<((k)-64); \
73 (Q).w[0] = 0; \
74 } \
75 }
76
77 #define __low_64(Q) (Q).w[0]
78 /*********************************************************************
79 *
80 * String Macros
81 *
82 *********************************************************************/
83 #define tolower_macro(x) (((unsigned char)((x)-'A')<=('Z'-'A'))?((x)-'A'+'a'):(x))
84 /*********************************************************************
85 *
86 * Compare Macros
87 *
88 *********************************************************************/
89 // greater than
90 // return 0 if A<=B
91 // non-zero if A>B
92 #define __unsigned_compare_gt_128(A, B) \
93 ((A.w[1]>B.w[1]) || ((A.w[1]==B.w[1]) && (A.w[0]>B.w[0])))
94 // greater-or-equal
95 #define __unsigned_compare_ge_128(A, B) \
96 ((A.w[1]>B.w[1]) || ((A.w[1]==B.w[1]) && (A.w[0]>=B.w[0])))
97 #define __test_equal_128(A, B) (((A).w[1]==(B).w[1]) && ((A).w[0]==(B).w[0]))
98 // tighten exponent range
99 #define __tight_bin_range_128(bp, P, bin_expon) \
100 { \
101 UINT64 M; \
102 M = 1; \
103 (bp) = (bin_expon); \
104 if((bp)<63) { \
105 M <<= ((bp)+1); \
106 if((P).w[0] >= M) (bp)++; } \
107 else if((bp)>64) { \
108 M <<= ((bp)+1-64); \
109 if(((P).w[1]>M) ||((P).w[1]==M && (P).w[0]))\
110 (bp)++; } \
111 else if((P).w[1]) (bp)++; \
112 }
113 /*********************************************************************
114 *
115 * Add/Subtract Macros
116 *
117 *********************************************************************/
118 // add 64-bit value to 128-bit
119 #define __add_128_64(R128, A128, B64) \
120 { \
121 UINT64 R64H; \
122 R64H = (A128).w[1]; \
123 (R128).w[0] = (B64) + (A128).w[0]; \
124 if((R128).w[0] < (B64)) \
125 R64H ++; \
126 (R128).w[1] = R64H; \
127 }
128 // subtract 64-bit value from 128-bit
129 #define __sub_128_64(R128, A128, B64) \
130 { \
131 UINT64 R64H; \
132 R64H = (A128).w[1]; \
133 if((A128).w[0] < (B64)) \
134 R64H --; \
135 (R128).w[1] = R64H; \
136 (R128).w[0] = (A128).w[0] - (B64); \
137 }
138 // add 128-bit value to 128-bit
139 // assume no carry-out
140 #define __add_128_128(R128, A128, B128) \
141 { \
142 UINT128 Q128; \
143 Q128.w[1] = (A128).w[1]+(B128).w[1]; \
144 Q128.w[0] = (B128).w[0] + (A128).w[0]; \
145 if(Q128.w[0] < (B128).w[0]) \
146 Q128.w[1] ++; \
147 (R128).w[1] = Q128.w[1]; \
148 (R128).w[0] = Q128.w[0]; \
149 }
150 #define __sub_128_128(R128, A128, B128) \
151 { \
152 UINT128 Q128; \
153 Q128.w[1] = (A128).w[1]-(B128).w[1]; \
154 Q128.w[0] = (A128).w[0] - (B128).w[0]; \
155 if((A128).w[0] < (B128).w[0]) \
156 Q128.w[1] --; \
157 (R128).w[1] = Q128.w[1]; \
158 (R128).w[0] = Q128.w[0]; \
159 }
160 #define __add_carry_out(S, CY, X, Y) \
161 { \
162 UINT64 X1=X; \
163 S = X + Y; \
164 CY = (S<X1) ? 1 : 0; \
165 }
166 #define __add_carry_in_out(S, CY, X, Y, CI) \
167 { \
168 UINT64 X1; \
169 X1 = X + CI; \
170 S = X1 + Y; \
171 CY = ((S<X1) || (X1<CI)) ? 1 : 0; \
172 }
173 #define __sub_borrow_out(S, CY, X, Y) \
174 { \
175 UINT64 X1=X; \
176 S = X - Y; \
177 CY = (S>X1) ? 1 : 0; \
178 }
179 #define __sub_borrow_in_out(S, CY, X, Y, CI) \
180 { \
181 UINT64 X1, X0=X; \
182 X1 = X - CI; \
183 S = X1 - Y; \
184 CY = ((S>X1) || (X1>X0)) ? 1 : 0; \
185 }
186 // increment C128 and check for rounding overflow:
187 // if (C_128) = 10^34 then (C_128) = 10^33 and increment the exponent
188 #define INCREMENT(C_128, exp) \
189 { \
190 C_128.w[0]++; \
191 if (C_128.w[0] == 0) C_128.w[1]++; \
192 if (C_128.w[1] == 0x0001ed09bead87c0ull && \
193 C_128.w[0] == 0x378d8e6400000000ull) { \
194 exp++; \
195 C_128.w[1] = 0x0000314dc6448d93ull; \
196 C_128.w[0] = 0x38c15b0a00000000ull; \
197 } \
198 }
199 // decrement C128 and check for rounding underflow, but only at the
200 // boundary: if C_128 = 10^33 - 1 and exp > 0 then C_128 = 10^34 - 1
201 // and decrement the exponent
202 #define DECREMENT(C_128, exp) \
203 { \
204 C_128.w[0]--; \
205 if (C_128.w[0] == 0xffffffffffffffffull) C_128.w[1]--; \
206 if (C_128.w[1] == 0x0000314dc6448d93ull && \
207 C_128.w[0] == 0x38c15b09ffffffffull && exp > 0) { \
208 exp--; \
209 C_128.w[1] = 0x0001ed09bead87c0ull; \
210 C_128.w[0] = 0x378d8e63ffffffffull; \
211 } \
212 }
213
214 /*********************************************************************
215 *
216 * Multiply Macros
217 *
218 *********************************************************************/
219 #define __mul_64x64_to_64(P64, CX, CY) (P64) = (CX) * (CY)
220 /***************************************
221 * Signed, Full 64x64-bit Multiply
222 ***************************************/
223 #define __imul_64x64_to_128(P, CX, CY) \
224 { \
225 UINT64 SX, SY; \
226 __mul_64x64_to_128(P, CX, CY); \
227 \
228 SX = ((SINT64)(CX))>>63; \
229 SY = ((SINT64)(CY))>>63; \
230 SX &= CY; SY &= CX; \
231 \
232 (P).w[1] = (P).w[1] - SX - SY; \
233 }
234 /***************************************
235 * Signed, Full 64x128-bit Multiply
236 ***************************************/
237 #define __imul_64x128_full(Ph, Ql, A, B) \
238 { \
239 UINT128 ALBL, ALBH, QM2, QM; \
240 \
241 __imul_64x64_to_128(ALBH, (A), (B).w[1]); \
242 __imul_64x64_to_128(ALBL, (A), (B).w[0]); \
243 \
244 (Ql).w[0] = ALBL.w[0]; \
245 QM.w[0] = ALBL.w[1]; \
246 QM.w[1] = ((SINT64)ALBL.w[1])>>63; \
247 __add_128_128(QM2, ALBH, QM); \
248 (Ql).w[1] = QM2.w[0]; \
249 Ph = QM2.w[1]; \
250 }
251 /*****************************************************
252 * Unsigned Multiply Macros
253 *****************************************************/
254 // get full 64x64bit product
255 //
256 #define __mul_64x64_to_128(P, CX, CY) \
257 { \
258 UINT64 CXH, CXL, CYH,CYL,PL,PH,PM,PM2;\
259 CXH = (CX) >> 32; \
260 CXL = (UINT32)(CX); \
261 CYH = (CY) >> 32; \
262 CYL = (UINT32)(CY); \
263 \
264 PM = CXH*CYL; \
265 PH = CXH*CYH; \
266 PL = CXL*CYL; \
267 PM2 = CXL*CYH; \
268 PH += (PM>>32); \
269 PM = (UINT64)((UINT32)PM)+PM2+(PL>>32); \
270 \
271 (P).w[1] = PH + (PM>>32); \
272 (P).w[0] = (PM<<32)+(UINT32)PL; \
273 }
274 // get full 64x64bit product
275 // Note:
276 // This macro is used for CX < 2^61, CY < 2^61
277 //
278 #define __mul_64x64_to_128_fast(P, CX, CY) \
279 { \
280 UINT64 CXH, CXL, CYH, CYL, PL, PH, PM; \
281 CXH = (CX) >> 32; \
282 CXL = (UINT32)(CX); \
283 CYH = (CY) >> 32; \
284 CYL = (UINT32)(CY); \
285 \
286 PM = CXH*CYL; \
287 PL = CXL*CYL; \
288 PH = CXH*CYH; \
289 PM += CXL*CYH; \
290 PM += (PL>>32); \
291 \
292 (P).w[1] = PH + (PM>>32); \
293 (P).w[0] = (PM<<32)+(UINT32)PL; \
294 }
295 // used for CX< 2^60
296 #define __sqr64_fast(P, CX) \
297 { \
298 UINT64 CXH, CXL, PL, PH, PM; \
299 CXH = (CX) >> 32; \
300 CXL = (UINT32)(CX); \
301 \
302 PM = CXH*CXL; \
303 PL = CXL*CXL; \
304 PH = CXH*CXH; \
305 PM += PM; \
306 PM += (PL>>32); \
307 \
308 (P).w[1] = PH + (PM>>32); \
309 (P).w[0] = (PM<<32)+(UINT32)PL; \
310 }
311 // get full 64x64bit product
312 // Note:
313 // This implementation is used for CX < 2^61, CY < 2^61
314 //
315 #define __mul_64x64_to_64_high_fast(P, CX, CY) \
316 { \
317 UINT64 CXH, CXL, CYH, CYL, PL, PH, PM; \
318 CXH = (CX) >> 32; \
319 CXL = (UINT32)(CX); \
320 CYH = (CY) >> 32; \
321 CYL = (UINT32)(CY); \
322 \
323 PM = CXH*CYL; \
324 PL = CXL*CYL; \
325 PH = CXH*CYH; \
326 PM += CXL*CYH; \
327 PM += (PL>>32); \
328 \
329 (P) = PH + (PM>>32); \
330 }
331 // get full 64x64bit product
332 //
333 #define __mul_64x64_to_128_full(P, CX, CY) \
334 { \
335 UINT64 CXH, CXL, CYH,CYL,PL,PH,PM,PM2;\
336 CXH = (CX) >> 32; \
337 CXL = (UINT32)(CX); \
338 CYH = (CY) >> 32; \
339 CYL = (UINT32)(CY); \
340 \
341 PM = CXH*CYL; \
342 PH = CXH*CYH; \
343 PL = CXL*CYL; \
344 PM2 = CXL*CYH; \
345 PH += (PM>>32); \
346 PM = (UINT64)((UINT32)PM)+PM2+(PL>>32); \
347 \
348 (P).w[1] = PH + (PM>>32); \
349 (P).w[0] = (PM<<32)+(UINT32)PL; \
350 }
351 #define __mul_128x128_high(Q, A, B) \
352 { \
353 UINT128 ALBL, ALBH, AHBL, AHBH, QM, QM2; \
354 \
355 __mul_64x64_to_128(ALBH, (A).w[0], (B).w[1]); \
356 __mul_64x64_to_128(AHBL, (B).w[0], (A).w[1]); \
357 __mul_64x64_to_128(ALBL, (A).w[0], (B).w[0]); \
358 __mul_64x64_to_128(AHBH, (A).w[1],(B).w[1]); \
359 \
360 __add_128_128(QM, ALBH, AHBL); \
361 __add_128_64(QM2, QM, ALBL.w[1]); \
362 __add_128_64((Q), AHBH, QM2.w[1]); \
363 }
364 #define __mul_128x128_full(Qh, Ql, A, B) \
365 { \
366 UINT128 ALBL, ALBH, AHBL, AHBH, QM, QM2; \
367 \
368 __mul_64x64_to_128(ALBH, (A).w[0], (B).w[1]); \
369 __mul_64x64_to_128(AHBL, (B).w[0], (A).w[1]); \
370 __mul_64x64_to_128(ALBL, (A).w[0], (B).w[0]); \
371 __mul_64x64_to_128(AHBH, (A).w[1],(B).w[1]); \
372 \
373 __add_128_128(QM, ALBH, AHBL); \
374 (Ql).w[0] = ALBL.w[0]; \
375 __add_128_64(QM2, QM, ALBL.w[1]); \
376 __add_128_64((Qh), AHBH, QM2.w[1]); \
377 (Ql).w[1] = QM2.w[0]; \
378 }
379 #define __mul_128x128_low(Ql, A, B) \
380 { \
381 UINT128 ALBL; \
382 UINT64 QM64; \
383 \
384 __mul_64x64_to_128(ALBL, (A).w[0], (B).w[0]); \
385 QM64 = (B).w[0]*(A).w[1] + (A).w[0]*(B).w[1]; \
386 \
387 (Ql).w[0] = ALBL.w[0]; \
388 (Ql).w[1] = QM64 + ALBL.w[1]; \
389 }
390 #define __mul_64x128_low(Ql, A, B) \
391 { \
392 UINT128 ALBL, ALBH, QM2; \
393 __mul_64x64_to_128(ALBH, (A), (B).w[1]); \
394 __mul_64x64_to_128(ALBL, (A), (B).w[0]); \
395 (Ql).w[0] = ALBL.w[0]; \
396 __add_128_64(QM2, ALBH, ALBL.w[1]); \
397 (Ql).w[1] = QM2.w[0]; \
398 }
399 #define __mul_64x128_full(Ph, Ql, A, B) \
400 { \
401 UINT128 ALBL, ALBH, QM2; \
402 \
403 __mul_64x64_to_128(ALBH, (A), (B).w[1]); \
404 __mul_64x64_to_128(ALBL, (A), (B).w[0]); \
405 \
406 (Ql).w[0] = ALBL.w[0]; \
407 __add_128_64(QM2, ALBH, ALBL.w[1]); \
408 (Ql).w[1] = QM2.w[0]; \
409 Ph = QM2.w[1]; \
410 }
411 #define __mul_64x128_to_192(Q, A, B) \
412 { \
413 UINT128 ALBL, ALBH, QM2; \
414 \
415 __mul_64x64_to_128(ALBH, (A), (B).w[1]); \
416 __mul_64x64_to_128(ALBL, (A), (B).w[0]); \
417 \
418 (Q).w[0] = ALBL.w[0]; \
419 __add_128_64(QM2, ALBH, ALBL.w[1]); \
420 (Q).w[1] = QM2.w[0]; \
421 (Q).w[2] = QM2.w[1]; \
422 }
423 #define __mul_64x128_to192(Q, A, B) \
424 { \
425 UINT128 ALBL, ALBH, QM2; \
426 \
427 __mul_64x64_to_128(ALBH, (A), (B).w[1]); \
428 __mul_64x64_to_128(ALBL, (A), (B).w[0]); \
429 \
430 (Q).w[0] = ALBL.w[0]; \
431 __add_128_64(QM2, ALBH, ALBL.w[1]); \
432 (Q).w[1] = QM2.w[0]; \
433 (Q).w[2] = QM2.w[1]; \
434 }
435 #define __mul_128x128_to_256(P256, A, B) \
436 { \
437 UINT128 Qll, Qlh; \
438 UINT64 Phl, Phh, CY1, CY2; \
439 \
440 __mul_64x128_full(Phl, Qll, A.w[0], B); \
441 __mul_64x128_full(Phh, Qlh, A.w[1], B); \
442 (P256).w[0] = Qll.w[0]; \
443 __add_carry_out((P256).w[1],CY1, Qlh.w[0], Qll.w[1]); \
444 __add_carry_in_out((P256).w[2],CY2, Qlh.w[1], Phl, CY1); \
445 (P256).w[3] = Phh + CY2; \
446 }
447 //
448 // For better performance, will check A.w[1] against 0,
449 // but not B.w[1]
450 // Use this macro accordingly
451 #define __mul_128x128_to_256_check_A(P256, A, B) \
452 { \
453 UINT128 Qll, Qlh; \
454 UINT64 Phl, Phh, CY1, CY2; \
455 \
456 __mul_64x128_full(Phl, Qll, A.w[0], B); \
457 (P256).w[0] = Qll.w[0]; \
458 if(A.w[1]) { \
459 __mul_64x128_full(Phh, Qlh, A.w[1], B); \
460 __add_carry_out((P256).w[1],CY1, Qlh.w[0], Qll.w[1]); \
461 __add_carry_in_out((P256).w[2],CY2, Qlh.w[1], Phl, CY1); \
462 (P256).w[3] = Phh + CY2; } \
463 else { \
464 (P256).w[1] = Qll.w[1]; \
465 (P256).w[2] = Phl; \
466 (P256).w[3] = 0; } \
467 }
468 #define __mul_64x192_to_256(lP, lA, lB) \
469 { \
470 UINT128 lP0,lP1,lP2; \
471 UINT64 lC; \
472 __mul_64x64_to_128(lP0, lA, (lB).w[0]); \
473 __mul_64x64_to_128(lP1, lA, (lB).w[1]); \
474 __mul_64x64_to_128(lP2, lA, (lB).w[2]); \
475 (lP).w[0] = lP0.w[0]; \
476 __add_carry_out((lP).w[1],lC,lP1.w[0],lP0.w[1]); \
477 __add_carry_in_out((lP).w[2],lC,lP2.w[0],lP1.w[1],lC); \
478 (lP).w[3] = lP2.w[1] + lC; \
479 }
480 #define __mul_64x256_to_320(P, A, B) \
481 { \
482 UINT128 lP0,lP1,lP2,lP3; \
483 UINT64 lC; \
484 __mul_64x64_to_128(lP0, A, (B).w[0]); \
485 __mul_64x64_to_128(lP1, A, (B).w[1]); \
486 __mul_64x64_to_128(lP2, A, (B).w[2]); \
487 __mul_64x64_to_128(lP3, A, (B).w[3]); \
488 (P).w[0] = lP0.w[0]; \
489 __add_carry_out((P).w[1],lC,lP1.w[0],lP0.w[1]); \
490 __add_carry_in_out((P).w[2],lC,lP2.w[0],lP1.w[1],lC); \
491 __add_carry_in_out((P).w[3],lC,lP3.w[0],lP2.w[1],lC); \
492 (P).w[4] = lP3.w[1] + lC; \
493 }
494 #define __mul_192x192_to_384(P, A, B) \
495 { \
496 UINT256 P0,P1,P2; \
497 UINT64 CY; \
498 __mul_64x192_to_256(P0, (A).w[0], B); \
499 __mul_64x192_to_256(P1, (A).w[1], B); \
500 __mul_64x192_to_256(P2, (A).w[2], B); \
501 (P).w[0] = P0.w[0]; \
502 __add_carry_out((P).w[1],CY,P1.w[0],P0.w[1]); \
503 __add_carry_in_out((P).w[2],CY,P1.w[1],P0.w[2],CY); \
504 __add_carry_in_out((P).w[3],CY,P1.w[2],P0.w[3],CY); \
505 (P).w[4] = P1.w[3] + CY; \
506 __add_carry_out((P).w[2],CY,P2.w[0],(P).w[2]); \
507 __add_carry_in_out((P).w[3],CY,P2.w[1],(P).w[3],CY); \
508 __add_carry_in_out((P).w[4],CY,P2.w[2],(P).w[4],CY); \
509 (P).w[5] = P2.w[3] + CY; \
510 }
511 #define __mul_64x320_to_384(P, A, B) \
512 { \
513 UINT128 lP0,lP1,lP2,lP3,lP4; \
514 UINT64 lC; \
515 __mul_64x64_to_128(lP0, A, (B).w[0]); \
516 __mul_64x64_to_128(lP1, A, (B).w[1]); \
517 __mul_64x64_to_128(lP2, A, (B).w[2]); \
518 __mul_64x64_to_128(lP3, A, (B).w[3]); \
519 __mul_64x64_to_128(lP4, A, (B).w[4]); \
520 (P).w[0] = lP0.w[0]; \
521 __add_carry_out((P).w[1],lC,lP1.w[0],lP0.w[1]); \
522 __add_carry_in_out((P).w[2],lC,lP2.w[0],lP1.w[1],lC); \
523 __add_carry_in_out((P).w[3],lC,lP3.w[0],lP2.w[1],lC); \
524 __add_carry_in_out((P).w[4],lC,lP4.w[0],lP3.w[1],lC); \
525 (P).w[5] = lP4.w[1] + lC; \
526 }
527 // A*A
528 // Full 128x128-bit product
529 #define __sqr128_to_256(P256, A) \
530 { \
531 UINT128 Qll, Qlh, Qhh; \
532 UINT64 TMP_C1, TMP_C2; \
533 \
534 __mul_64x64_to_128(Qhh, A.w[1], A.w[1]); \
535 __mul_64x64_to_128(Qlh, A.w[0], A.w[1]); \
536 Qhh.w[1] += (Qlh.w[1]>>63); \
537 Qlh.w[1] = (Qlh.w[1]+Qlh.w[1])|(Qlh.w[0]>>63); \
538 Qlh.w[0] += Qlh.w[0]; \
539 __mul_64x64_to_128(Qll, A.w[0], A.w[0]); \
540 \
541 __add_carry_out((P256).w[1],TMP_C1, Qlh.w[0], Qll.w[1]); \
542 (P256).w[0] = Qll.w[0]; \
543 __add_carry_in_out((P256).w[2],TMP_C2, Qlh.w[1], Qhh.w[0], TMP_C1); \
544 (P256).w[3] = Qhh.w[1]+TMP_C2; \
545 }
546 #define __mul_128x128_to_256_low_high(PQh, PQl, A, B) \
547 { \
548 UINT128 Qll, Qlh; \
549 UINT64 Phl, Phh, C1, C2; \
550 \
551 __mul_64x128_full(Phl, Qll, A.w[0], B); \
552 __mul_64x128_full(Phh, Qlh, A.w[1], B); \
553 (PQl).w[0] = Qll.w[0]; \
554 __add_carry_out((PQl).w[1],C1, Qlh.w[0], Qll.w[1]); \
555 __add_carry_in_out((PQh).w[0],C2, Qlh.w[1], Phl, C1); \
556 (PQh).w[1] = Phh + C2; \
557 }
558 #define __mul_256x256_to_512(P, A, B) \
559 { \
560 UINT512 P0,P1,P2,P3; \
561 UINT64 CY; \
562 __mul_64x256_to_320(P0, (A).w[0], B); \
563 __mul_64x256_to_320(P1, (A).w[1], B); \
564 __mul_64x256_to_320(P2, (A).w[2], B); \
565 __mul_64x256_to_320(P3, (A).w[3], B); \
566 (P).w[0] = P0.w[0]; \
567 __add_carry_out((P).w[1],CY,P1.w[0],P0.w[1]); \
568 __add_carry_in_out((P).w[2],CY,P1.w[1],P0.w[2],CY); \
569 __add_carry_in_out((P).w[3],CY,P1.w[2],P0.w[3],CY); \
570 __add_carry_in_out((P).w[4],CY,P1.w[3],P0.w[4],CY); \
571 (P).w[5] = P1.w[4] + CY; \
572 __add_carry_out((P).w[2],CY,P2.w[0],(P).w[2]); \
573 __add_carry_in_out((P).w[3],CY,P2.w[1],(P).w[3],CY); \
574 __add_carry_in_out((P).w[4],CY,P2.w[2],(P).w[4],CY); \
575 __add_carry_in_out((P).w[5],CY,P2.w[3],(P).w[5],CY); \
576 (P).w[6] = P2.w[4] + CY; \
577 __add_carry_out((P).w[3],CY,P3.w[0],(P).w[3]); \
578 __add_carry_in_out((P).w[4],CY,P3.w[1],(P).w[4],CY); \
579 __add_carry_in_out((P).w[5],CY,P3.w[2],(P).w[5],CY); \
580 __add_carry_in_out((P).w[6],CY,P3.w[3],(P).w[6],CY); \
581 (P).w[7] = P3.w[4] + CY; \
582 }
583 #define __mul_192x256_to_448(P, A, B) \
584 { \
585 UINT512 P0,P1,P2; \
586 UINT64 CY; \
587 __mul_64x256_to_320(P0, (A).w[0], B); \
588 __mul_64x256_to_320(P1, (A).w[1], B); \
589 __mul_64x256_to_320(P2, (A).w[2], B); \
590 (P).w[0] = P0.w[0]; \
591 __add_carry_out((P).w[1],CY,P1.w[0],P0.w[1]); \
592 __add_carry_in_out((P).w[2],CY,P1.w[1],P0.w[2],CY); \
593 __add_carry_in_out((P).w[3],CY,P1.w[2],P0.w[3],CY); \
594 __add_carry_in_out((P).w[4],CY,P1.w[3],P0.w[4],CY); \
595 (P).w[5] = P1.w[4] + CY; \
596 __add_carry_out((P).w[2],CY,P2.w[0],(P).w[2]); \
597 __add_carry_in_out((P).w[3],CY,P2.w[1],(P).w[3],CY); \
598 __add_carry_in_out((P).w[4],CY,P2.w[2],(P).w[4],CY); \
599 __add_carry_in_out((P).w[5],CY,P2.w[3],(P).w[5],CY); \
600 (P).w[6] = P2.w[4] + CY; \
601 }
602 #define __mul_320x320_to_640(P, A, B) \
603 { \
604 UINT512 P0,P1,P2,P3; \
605 UINT64 CY; \
606 __mul_256x256_to_512((P), (A), B); \
607 __mul_64x256_to_320(P1, (A).w[4], B); \
608 __mul_64x256_to_320(P2, (B).w[4], A); \
609 __mul_64x64_to_128(P3, (A).w[4], (B).w[4]); \
610 __add_carry_out((P0).w[0],CY,P1.w[0],P2.w[0]); \
611 __add_carry_in_out((P0).w[1],CY,P1.w[1],P2.w[1],CY); \
612 __add_carry_in_out((P0).w[2],CY,P1.w[2],P2.w[2],CY); \
613 __add_carry_in_out((P0).w[3],CY,P1.w[3],P2.w[3],CY); \
614 __add_carry_in_out((P0).w[4],CY,P1.w[4],P2.w[4],CY); \
615 P3.w[1] += CY; \
616 __add_carry_out((P).w[4],CY,(P).w[4],P0.w[0]); \
617 __add_carry_in_out((P).w[5],CY,(P).w[5],P0.w[1],CY); \
618 __add_carry_in_out((P).w[6],CY,(P).w[6],P0.w[2],CY); \
619 __add_carry_in_out((P).w[7],CY,(P).w[7],P0.w[3],CY); \
620 __add_carry_in_out((P).w[8],CY,P3.w[0],P0.w[4],CY); \
621 (P).w[9] = P3.w[1] + CY; \
622 }
623 #define __mul_384x384_to_768(P, A, B) \
624 { \
625 UINT512 P0,P1,P2,P3; \
626 UINT64 CY; \
627 __mul_320x320_to_640((P), (A), B); \
628 __mul_64x320_to_384(P1, (A).w[5], B); \
629 __mul_64x320_to_384(P2, (B).w[5], A); \
630 __mul_64x64_to_128(P3, (A).w[5], (B).w[5]); \
631 __add_carry_out((P0).w[0],CY,P1.w[0],P2.w[0]); \
632 __add_carry_in_out((P0).w[1],CY,P1.w[1],P2.w[1],CY); \
633 __add_carry_in_out((P0).w[2],CY,P1.w[2],P2.w[2],CY); \
634 __add_carry_in_out((P0).w[3],CY,P1.w[3],P2.w[3],CY); \
635 __add_carry_in_out((P0).w[4],CY,P1.w[4],P2.w[4],CY); \
636 __add_carry_in_out((P0).w[5],CY,P1.w[5],P2.w[5],CY); \
637 P3.w[1] += CY; \
638 __add_carry_out((P).w[5],CY,(P).w[5],P0.w[0]); \
639 __add_carry_in_out((P).w[6],CY,(P).w[6],P0.w[1],CY); \
640 __add_carry_in_out((P).w[7],CY,(P).w[7],P0.w[2],CY); \
641 __add_carry_in_out((P).w[8],CY,(P).w[8],P0.w[3],CY); \
642 __add_carry_in_out((P).w[9],CY,(P).w[9],P0.w[4],CY); \
643 __add_carry_in_out((P).w[10],CY,P3.w[0],P0.w[5],CY); \
644 (P).w[11] = P3.w[1] + CY; \
645 }
646 #define __mul_64x128_short(Ql, A, B) \
647 { \
648 UINT64 ALBH_L; \
649 \
650 __mul_64x64_to_64(ALBH_L, (A),(B).w[1]); \
651 __mul_64x64_to_128((Ql), (A), (B).w[0]); \
652 \
653 (Ql).w[1] += ALBH_L; \
654 }
655 #define __scale128_10(D,_TMP) \
656 { \
657 UINT128 _TMP2,_TMP8; \
658 _TMP2.w[1] = (_TMP.w[1]<<1)|(_TMP.w[0]>>63); \
659 _TMP2.w[0] = _TMP.w[0]<<1; \
660 _TMP8.w[1] = (_TMP.w[1]<<3)|(_TMP.w[0]>>61); \
661 _TMP8.w[0] = _TMP.w[0]<<3; \
662 __add_128_128(D, _TMP2, _TMP8); \
663 }
664 // 64x64-bit product
665 #define __mul_64x64_to_128MACH(P128, CX64, CY64) \
666 { \
667 UINT64 CXH,CXL,CYH,CYL,PL,PH,PM,PM2; \
668 CXH = (CX64) >> 32; \
669 CXL = (UINT32)(CX64); \
670 CYH = (CY64) >> 32; \
671 CYL = (UINT32)(CY64); \
672 PM = CXH*CYL; \
673 PH = CXH*CYH; \
674 PL = CXL*CYL; \
675 PM2 = CXL*CYH; \
676 PH += (PM>>32); \
677 PM = (UINT64)((UINT32)PM)+PM2+(PL>>32); \
678 (P128).w[1] = PH + (PM>>32); \
679 (P128).w[0] = (PM<<32)+(UINT32)PL; \
680 }
681 // 64x64-bit product
682 #define __mul_64x64_to_128HIGH(P64, CX64, CY64) \
683 { \
684 UINT64 CXH,CXL,CYH,CYL,PL,PH,PM,PM2; \
685 CXH = (CX64) >> 32; \
686 CXL = (UINT32)(CX64); \
687 CYH = (CY64) >> 32; \
688 CYL = (UINT32)(CY64); \
689 PM = CXH*CYL; \
690 PH = CXH*CYH; \
691 PL = CXL*CYL; \
692 PM2 = CXL*CYH; \
693 PH += (PM>>32); \
694 PM = (UINT64)((UINT32)PM)+PM2+(PL>>32); \
695 P64 = PH + (PM>>32); \
696 }
697 #define __mul_128x64_to_128(Q128, A64, B128) \
698 { \
699 UINT64 ALBH_L; \
700 ALBH_L = (A64) * (B128).w[1]; \
701 __mul_64x64_to_128MACH((Q128), (A64), (B128).w[0]); \
702 (Q128).w[1] += ALBH_L; \
703 }
704 // might simplify by calculating just QM2.w[0]
705 #define __mul_64x128_to_128(Ql, A, B) \
706 { \
707 UINT128 ALBL, ALBH, QM2; \
708 __mul_64x64_to_128(ALBH, (A), (B).w[1]); \
709 __mul_64x64_to_128(ALBL, (A), (B).w[0]); \
710 (Ql).w[0] = ALBL.w[0]; \
711 __add_128_64(QM2, ALBH, ALBL.w[1]); \
712 (Ql).w[1] = QM2.w[0]; \
713 }
714 /*********************************************************************
715 *
716 * BID Pack/Unpack Macros
717 *
718 *********************************************************************/
719 /////////////////////////////////////////
720 // BID64 definitions
721 ////////////////////////////////////////
722 #define DECIMAL_MAX_EXPON_64 767
723 #define DECIMAL_EXPONENT_BIAS 398
724 #define MAX_FORMAT_DIGITS 16
725 /////////////////////////////////////////
726 // BID128 definitions
727 ////////////////////////////////////////
728 #define DECIMAL_MAX_EXPON_128 12287
729 #define DECIMAL_EXPONENT_BIAS_128 6176
730 #define MAX_FORMAT_DIGITS_128 34
731 /////////////////////////////////////////
732 // BID32 definitions
733 ////////////////////////////////////////
734 #define DECIMAL_MAX_EXPON_32 191
735 #define DECIMAL_EXPONENT_BIAS_32 101
736 #define MAX_FORMAT_DIGITS_32 7
737 ////////////////////////////////////////
738 // Constant Definitions
739 ///////////////////////////////////////
740 #define SPECIAL_ENCODING_MASK64 0x6000000000000000ull
741 #define INFINITY_MASK64 0x7800000000000000ull
742 #define NAN_MASK64 0x7c00000000000000ull
743 #define SNAN_MASK64 0x7e00000000000000ull
744 #define QUIET_MASK64 0xfdffffffffffffffull
745 #define LARGE_COEFF_MASK64 0x0007ffffffffffffull
746 #define LARGE_COEFF_HIGH_BIT64 0x0020000000000000ull
747 #define SMALL_COEFF_MASK64 0x001fffffffffffffull
748 #define EXPONENT_MASK64 0x3ff
749 #define EXPONENT_SHIFT_LARGE64 51
750 #define EXPONENT_SHIFT_SMALL64 53
751 #define LARGEST_BID64 0x77fb86f26fc0ffffull
752 #define SMALLEST_BID64 0xf7fb86f26fc0ffffull
753 #define SMALL_COEFF_MASK128 0x0001ffffffffffffull
754 #define LARGE_COEFF_MASK128 0x00007fffffffffffull
755 #define EXPONENT_MASK128 0x3fff
756 #define LARGEST_BID128_HIGH 0x5fffed09bead87c0ull
757 #define LARGEST_BID128_LOW 0x378d8e63ffffffffull
758 #define SPECIAL_ENCODING_MASK32 0x60000000ul
759 #define INFINITY_MASK32 0x78000000ul
760 #define LARGE_COEFF_MASK32 0x007ffffful
761 #define LARGE_COEFF_HIGH_BIT32 0x00800000ul
762 #define SMALL_COEFF_MASK32 0x001ffffful
763 #define EXPONENT_MASK32 0xff
764 #define LARGEST_BID32 0x77f8967f
765 #define MASK_BINARY_EXPONENT 0x7ff0000000000000ull
766 #define BINARY_EXPONENT_BIAS 0x3ff
767 #define UPPER_EXPON_LIMIT 51
768 // data needed for BID pack/unpack macros
769 extern UINT64 __bid_round_const_table[][19];
770 extern UINT128 __bid_reciprocals10_128[];
771 extern int __bid_recip_scale[];
772 extern UINT128 __bid_power10_table_128[];
773 extern int __bid_estimate_decimal_digits[];
774 extern int __bid_estimate_bin_expon[];
775 extern UINT64 __bid_power10_index_binexp[];
776 extern int __bid_short_recip_scale[];
777 extern UINT64 __bid_reciprocals10_64[];
778 extern UINT128 __bid_power10_index_binexp_128[];
779 extern UINT128 __bid_round_const_table_128[][36];
780
781
782 //////////////////////////////////////////////
783 // Status Flag Handling
784 /////////////////////////////////////////////
785 #define __set_status_flags(fpsc, status) *(fpsc) |= status
786 #define is_inexact(fpsc) ((*(fpsc))&INEXACT_EXCEPTION)
787
788 __BID_INLINE__ UINT64
789 unpack_BID64 (UINT64 * psign_x, int *pexponent_x,
790 UINT64 * pcoefficient_x, UINT64 x) {
791 UINT64 tmp, coeff;
792
793 *psign_x = x & 0x8000000000000000ull;
794
795 if ((x & SPECIAL_ENCODING_MASK64) == SPECIAL_ENCODING_MASK64) {
796 // special encodings
797 // coefficient
798 coeff = (x & LARGE_COEFF_MASK64) | LARGE_COEFF_HIGH_BIT64;
799
800 if ((x & INFINITY_MASK64) == INFINITY_MASK64) {
801 *pcoefficient_x = x;
802 // check for non-canonical values
803 if ((x & LARGE_COEFF_MASK64) >= 1000000000000000ull)
804 *pcoefficient_x = x & (~LARGE_COEFF_MASK64);
805 return 0; // NaN or Infinity
806 }
807 // check for non-canonical values
808 if (coeff >= 10000000000000000ull)
809 coeff = 0;
810 *pcoefficient_x = coeff;
811 // get exponent
812 tmp = x >> EXPONENT_SHIFT_LARGE64;
813 *pexponent_x = (int) (tmp & EXPONENT_MASK64);
814 return 1;
815 }
816 // exponent
817 tmp = x >> EXPONENT_SHIFT_SMALL64;
818 *pexponent_x = (int) (tmp & EXPONENT_MASK64);
819 // coefficient
820 *pcoefficient_x = (x & SMALL_COEFF_MASK64);
821
822 return *pcoefficient_x;
823 }
824
825 //
826 // BID64 pack macro (general form)
827 //
828 __BID_INLINE__ UINT64
829 get_BID64 (UINT64 sgn, int expon, UINT64 coeff, int rmode,
830 unsigned *fpsc) {
831 UINT128 Stemp, Q_low;
832 UINT64 QH, r, mask, C64, remainder_h, CY, carry;
833 int extra_digits, amount, amount2;
834 unsigned status;
835
836 if (coeff > 9999999999999999ull) {
837 expon++;
838 coeff = 1000000000000000ull;
839 }
840 // check for possible underflow/overflow
841 if (((unsigned) expon) >= 3 * 256) {
842 if (expon < 0) {
843 // underflow
844 if (expon + MAX_FORMAT_DIGITS < 0) {
845 #ifdef SET_STATUS_FLAGS
846 __set_status_flags (fpsc,
847 UNDERFLOW_EXCEPTION | INEXACT_EXCEPTION);
848 #endif
849 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
850 #ifndef IEEE_ROUND_NEAREST
851 if (rmode == ROUNDING_DOWN && sgn)
852 return 0x8000000000000001ull;
853 if (rmode == ROUNDING_UP && !sgn)
854 return 1ull;
855 #endif
856 #endif
857 // result is 0
858 return sgn;
859 }
860 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
861 #ifndef IEEE_ROUND_NEAREST
862 if (sgn && (unsigned) (rmode - 1) < 2)
863 rmode = 3 - rmode;
864 #endif
865 #endif
866 // get digits to be shifted out
867 extra_digits = -expon;
868 coeff += __bid_round_const_table[rmode][extra_digits];
869
870 // get coeff*(2^M[extra_digits])/10^extra_digits
871 __mul_64x128_full (QH, Q_low, coeff,
872 __bid_reciprocals10_128[extra_digits]);
873
874 // now get P/10^extra_digits: shift Q_high right by M[extra_digits]-128
875 amount = __bid_recip_scale[extra_digits];
876
877 C64 = QH >> amount;
878
879 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
880 #ifndef IEEE_ROUND_NEAREST
881 if (rmode == 0) //ROUNDING_TO_NEAREST
882 #endif
883 if (C64 & 1) {
884 // check whether fractional part of initial_P/10^extra_digits is exactly .5
885
886 // get remainder
887 amount2 = 64 - amount;
888 remainder_h = 0;
889 remainder_h--;
890 remainder_h >>= amount2;
891 remainder_h = remainder_h & QH;
892
893 if (!remainder_h
894 && (Q_low.w[1] < __bid_reciprocals10_128[extra_digits].w[1]
895 || (Q_low.w[1] == __bid_reciprocals10_128[extra_digits].w[1]
896 && Q_low.w[0] <
897 __bid_reciprocals10_128[extra_digits].w[0]))) {
898 C64--;
899 }
900 }
901 #endif
902
903 #ifdef SET_STATUS_FLAGS
904
905 if (is_inexact (fpsc))
906 __set_status_flags (fpsc, UNDERFLOW_EXCEPTION);
907 else {
908 status = INEXACT_EXCEPTION;
909 // get remainder
910 remainder_h = QH << (64 - amount);
911
912 switch (rmode) {
913 case ROUNDING_TO_NEAREST:
914 case ROUNDING_TIES_AWAY:
915 // test whether fractional part is 0
916 if (remainder_h == 0x8000000000000000ull
917 && (Q_low.w[1] < __bid_reciprocals10_128[extra_digits].w[1]
918 || (Q_low.w[1] == __bid_reciprocals10_128[extra_digits].w[1]
919 && Q_low.w[0] <
920 __bid_reciprocals10_128[extra_digits].w[0])))
921 status = EXACT_STATUS;
922 break;
923 case ROUNDING_DOWN:
924 case ROUNDING_TO_ZERO:
925 if (!remainder_h
926 && (Q_low.w[1] < __bid_reciprocals10_128[extra_digits].w[1]
927 || (Q_low.w[1] == __bid_reciprocals10_128[extra_digits].w[1]
928 && Q_low.w[0] <
929 __bid_reciprocals10_128[extra_digits].w[0])))
930 status = EXACT_STATUS;
931 break;
932 default:
933 // round up
934 __add_carry_out (Stemp.w[0], CY, Q_low.w[0],
935 __bid_reciprocals10_128[extra_digits].w[0]);
936 __add_carry_in_out (Stemp.w[1], carry, Q_low.w[1],
937 __bid_reciprocals10_128[extra_digits].w[1], CY);
938 if ((remainder_h >> (64 - amount)) + carry >=
939 (((UINT64) 1) << amount))
940 status = EXACT_STATUS;
941 }
942
943 if (status != EXACT_STATUS)
944 __set_status_flags (fpsc, UNDERFLOW_EXCEPTION | status);
945 }
946
947 #endif
948
949 return sgn | C64;
950 }
951 while (coeff < 1000000000000000ull && expon >= 3 * 256) {
952 expon--;
953 coeff = (coeff << 3) + (coeff << 1);
954 }
955 if (expon > DECIMAL_MAX_EXPON_64) {
956 #ifdef SET_STATUS_FLAGS
957 __set_status_flags (fpsc, OVERFLOW_EXCEPTION | INEXACT_EXCEPTION);
958 #endif
959 // overflow
960 r = sgn | INFINITY_MASK64;
961 switch (rmode) {
962 case ROUNDING_DOWN:
963 if (!sgn)
964 r = LARGEST_BID64;
965 break;
966 case ROUNDING_TO_ZERO:
967 r = sgn | LARGEST_BID64;
968 break;
969 case ROUNDING_UP:
970 // round up
971 if (sgn)
972 r = SMALLEST_BID64;
973 }
974 return r;
975 }
976 }
977
978 mask = 1;
979 mask <<= EXPONENT_SHIFT_SMALL64;
980
981 // check whether coefficient fits in 10*5+3 bits
982 if (coeff < mask) {
983 r = expon;
984 r <<= EXPONENT_SHIFT_SMALL64;
985 r |= (coeff | sgn);
986 return r;
987 }
988 // special format
989
990 // eliminate the case coeff==10^16 after rounding
991 if (coeff == 10000000000000000ull) {
992 r = expon + 1;
993 r <<= EXPONENT_SHIFT_SMALL64;
994 r |= (1000000000000000ull | sgn);
995 return r;
996 }
997
998 r = expon;
999 r <<= EXPONENT_SHIFT_LARGE64;
1000 r |= (sgn | SPECIAL_ENCODING_MASK64);
1001 // add coeff, without leading bits
1002 mask = (mask >> 2) - 1;
1003 coeff &= mask;
1004 r |= coeff;
1005
1006 return r;
1007 }
1008
1009
1010
1011
1012 //
1013 // No overflow/underflow checking
1014 //
1015 __BID_INLINE__ UINT64
1016 fast_get_BID64 (UINT64 sgn, int expon, UINT64 coeff) {
1017 UINT64 r, mask;
1018
1019 mask = 1;
1020 mask <<= EXPONENT_SHIFT_SMALL64;
1021
1022 // check whether coefficient fits in 10*5+3 bits
1023 if (coeff < mask) {
1024 r = expon;
1025 r <<= EXPONENT_SHIFT_SMALL64;
1026 r |= (coeff | sgn);
1027 return r;
1028 }
1029 // special format
1030
1031 // eliminate the case coeff==10^16 after rounding
1032 if (coeff == 10000000000000000ull) {
1033 r = expon + 1;
1034 r <<= EXPONENT_SHIFT_SMALL64;
1035 r |= (1000000000000000ull | sgn);
1036 return r;
1037 }
1038
1039 r = expon;
1040 r <<= EXPONENT_SHIFT_LARGE64;
1041 r |= (sgn | SPECIAL_ENCODING_MASK64);
1042 // add coeff, without leading bits
1043 mask = (mask >> 2) - 1;
1044 coeff &= mask;
1045 r |= coeff;
1046
1047 return r;
1048 }
1049
1050
1051 //
1052 // no underflow checking
1053 //
1054 __BID_INLINE__ UINT64
1055 fast_get_BID64_check_OF (UINT64 sgn, int expon, UINT64 coeff, int rmode,
1056 unsigned *fpsc) {
1057 UINT64 r, mask;
1058
1059 if (((unsigned) expon) >= 3 * 256 - 1) {
1060 if ((expon == 3 * 256 - 1) && coeff == 10000000000000000ull) {
1061 expon = 3 * 256;
1062 coeff = 1000000000000000ull;
1063 }
1064
1065 if (((unsigned) expon) >= 3 * 256) {
1066 while (coeff < 1000000000000000ull && expon >= 3 * 256) {
1067 expon--;
1068 coeff = (coeff << 3) + (coeff << 1);
1069 }
1070 if (expon > DECIMAL_MAX_EXPON_64) {
1071 #ifdef SET_STATUS_FLAGS
1072 __set_status_flags (fpsc,
1073 OVERFLOW_EXCEPTION | INEXACT_EXCEPTION);
1074 #endif
1075 // overflow
1076 r = sgn | INFINITY_MASK64;
1077 switch (rmode) {
1078 case ROUNDING_DOWN:
1079 if (!sgn)
1080 r = LARGEST_BID64;
1081 break;
1082 case ROUNDING_TO_ZERO:
1083 r = sgn | LARGEST_BID64;
1084 break;
1085 case ROUNDING_UP:
1086 // round up
1087 if (sgn)
1088 r = SMALLEST_BID64;
1089 }
1090 return r;
1091 }
1092 }
1093 }
1094
1095 mask = 1;
1096 mask <<= EXPONENT_SHIFT_SMALL64;
1097
1098 // check whether coefficient fits in 10*5+3 bits
1099 if (coeff < mask) {
1100 r = expon;
1101 r <<= EXPONENT_SHIFT_SMALL64;
1102 r |= (coeff | sgn);
1103 return r;
1104 }
1105 // special format
1106
1107 // eliminate the case coeff==10^16 after rounding
1108 if (coeff == 10000000000000000ull) {
1109 r = expon + 1;
1110 r <<= EXPONENT_SHIFT_SMALL64;
1111 r |= (1000000000000000ull | sgn);
1112 return r;
1113 }
1114
1115 r = expon;
1116 r <<= EXPONENT_SHIFT_LARGE64;
1117 r |= (sgn | SPECIAL_ENCODING_MASK64);
1118 // add coeff, without leading bits
1119 mask = (mask >> 2) - 1;
1120 coeff &= mask;
1121 r |= coeff;
1122
1123 return r;
1124 }
1125
1126
1127 //
1128 // No overflow/underflow checking
1129 // or checking for coefficients equal to 10^16 (after rounding)
1130 //
1131 __BID_INLINE__ UINT64
1132 very_fast_get_BID64 (UINT64 sgn, int expon, UINT64 coeff) {
1133 UINT64 r, mask;
1134
1135 mask = 1;
1136 mask <<= EXPONENT_SHIFT_SMALL64;
1137
1138 // check whether coefficient fits in 10*5+3 bits
1139 if (coeff < mask) {
1140 r = expon;
1141 r <<= EXPONENT_SHIFT_SMALL64;
1142 r |= (coeff | sgn);
1143 return r;
1144 }
1145 // special format
1146 r = expon;
1147 r <<= EXPONENT_SHIFT_LARGE64;
1148 r |= (sgn | SPECIAL_ENCODING_MASK64);
1149 // add coeff, without leading bits
1150 mask = (mask >> 2) - 1;
1151 coeff &= mask;
1152 r |= coeff;
1153
1154 return r;
1155 }
1156
1157 //
1158 // No overflow/underflow checking or checking for coefficients above 2^53
1159 //
1160 __BID_INLINE__ UINT64
1161 very_fast_get_BID64_small_mantissa (UINT64 sgn, int expon, UINT64 coeff) {
1162 // no UF/OF
1163 UINT64 r;
1164
1165 r = expon;
1166 r <<= EXPONENT_SHIFT_SMALL64;
1167 r |= (coeff | sgn);
1168 return r;
1169 }
1170
1171
1172 //
1173 // This pack macro is used when underflow is known to occur
1174 //
1175 __BID_INLINE__ UINT64
1176 get_BID64_UF (UINT64 sgn, int expon, UINT64 coeff, UINT64 R, int rmode,
1177 unsigned *fpsc) {
1178 UINT128 C128, Q_low, Stemp;
1179 UINT64 C64, remainder_h, QH, carry, CY;
1180 int extra_digits, amount, amount2;
1181 unsigned status;
1182
1183 // underflow
1184 if (expon + MAX_FORMAT_DIGITS < 0) {
1185 #ifdef SET_STATUS_FLAGS
1186 __set_status_flags (fpsc, UNDERFLOW_EXCEPTION | INEXACT_EXCEPTION);
1187 #endif
1188 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
1189 #ifndef IEEE_ROUND_NEAREST
1190 if (rmode == ROUNDING_DOWN && sgn)
1191 return 0x8000000000000001ull;
1192 if (rmode == ROUNDING_UP && !sgn)
1193 return 1ull;
1194 #endif
1195 #endif
1196 // result is 0
1197 return sgn;
1198 }
1199 // 10*coeff
1200 coeff = (coeff << 3) + (coeff << 1);
1201 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
1202 #ifndef IEEE_ROUND_NEAREST
1203 if (sgn && (unsigned) (rmode - 1) < 2)
1204 rmode = 3 - rmode;
1205 #endif
1206 #endif
1207 if (R)
1208 coeff |= 1;
1209 // get digits to be shifted out
1210 extra_digits = 1 - expon;
1211 C128.w[0] = coeff + __bid_round_const_table[rmode][extra_digits];
1212
1213 // get coeff*(2^M[extra_digits])/10^extra_digits
1214 __mul_64x128_full (QH, Q_low, C128.w[0],
1215 __bid_reciprocals10_128[extra_digits]);
1216
1217 // now get P/10^extra_digits: shift Q_high right by M[extra_digits]-128
1218 amount = __bid_recip_scale[extra_digits];
1219
1220 C64 = QH >> amount;
1221 //__shr_128(C128, Q_high, amount);
1222
1223 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
1224 #ifndef IEEE_ROUND_NEAREST
1225 if (rmode == 0) //ROUNDING_TO_NEAREST
1226 #endif
1227 if (C64 & 1) {
1228 // check whether fractional part of initial_P/10^extra_digits is exactly .5
1229
1230 // get remainder
1231 amount2 = 64 - amount;
1232 remainder_h = 0;
1233 remainder_h--;
1234 remainder_h >>= amount2;
1235 remainder_h = remainder_h & QH;
1236
1237 if (!remainder_h
1238 && (Q_low.w[1] < __bid_reciprocals10_128[extra_digits].w[1]
1239 || (Q_low.w[1] == __bid_reciprocals10_128[extra_digits].w[1]
1240 && Q_low.w[0] <
1241 __bid_reciprocals10_128[extra_digits].w[0]))) {
1242 C64--;
1243 }
1244 }
1245 #endif
1246
1247 #ifdef SET_STATUS_FLAGS
1248
1249 if (is_inexact (fpsc))
1250 __set_status_flags (fpsc, UNDERFLOW_EXCEPTION);
1251 else {
1252 status = INEXACT_EXCEPTION;
1253 // get remainder
1254 remainder_h = QH << (64 - amount);
1255
1256 switch (rmode) {
1257 case ROUNDING_TO_NEAREST:
1258 case ROUNDING_TIES_AWAY:
1259 // test whether fractional part is 0
1260 if (remainder_h == 0x8000000000000000ull
1261 && (Q_low.w[1] < __bid_reciprocals10_128[extra_digits].w[1]
1262 || (Q_low.w[1] == __bid_reciprocals10_128[extra_digits].w[1]
1263 && Q_low.w[0] <
1264 __bid_reciprocals10_128[extra_digits].w[0])))
1265 status = EXACT_STATUS;
1266 break;
1267 case ROUNDING_DOWN:
1268 case ROUNDING_TO_ZERO:
1269 if (!remainder_h
1270 && (Q_low.w[1] < __bid_reciprocals10_128[extra_digits].w[1]
1271 || (Q_low.w[1] == __bid_reciprocals10_128[extra_digits].w[1]
1272 && Q_low.w[0] <
1273 __bid_reciprocals10_128[extra_digits].w[0])))
1274 status = EXACT_STATUS;
1275 break;
1276 default:
1277 // round up
1278 __add_carry_out (Stemp.w[0], CY, Q_low.w[0],
1279 __bid_reciprocals10_128[extra_digits].w[0]);
1280 __add_carry_in_out (Stemp.w[1], carry, Q_low.w[1],
1281 __bid_reciprocals10_128[extra_digits].w[1], CY);
1282 if ((remainder_h >> (64 - amount)) + carry >=
1283 (((UINT64) 1) << amount))
1284 status = EXACT_STATUS;
1285 }
1286
1287 if (status != EXACT_STATUS)
1288 __set_status_flags (fpsc, UNDERFLOW_EXCEPTION | status);
1289 }
1290
1291 #endif
1292
1293 return sgn | C64;
1294
1295 }
1296
1297
1298
1299 //
1300 // This pack macro doesnot check for coefficients above 2^53
1301 //
1302 __BID_INLINE__ UINT64
1303 get_BID64_small_mantissa (UINT64 sgn, int expon, UINT64 coeff,
1304 int rmode, unsigned *fpsc) {
1305 UINT128 C128, Q_low, Stemp;
1306 UINT64 r, mask, C64, remainder_h, QH, carry, CY;
1307 int extra_digits, amount, amount2;
1308 unsigned status;
1309
1310 // check for possible underflow/overflow
1311 if (((unsigned) expon) >= 3 * 256) {
1312 if (expon < 0) {
1313 // underflow
1314 if (expon + MAX_FORMAT_DIGITS < 0) {
1315 #ifdef SET_STATUS_FLAGS
1316 __set_status_flags (fpsc,
1317 UNDERFLOW_EXCEPTION | INEXACT_EXCEPTION);
1318 #endif
1319 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
1320 #ifndef IEEE_ROUND_NEAREST
1321 if (rmode == ROUNDING_DOWN && sgn)
1322 return 0x8000000000000001ull;
1323 if (rmode == ROUNDING_UP && !sgn)
1324 return 1ull;
1325 #endif
1326 #endif
1327 // result is 0
1328 return sgn;
1329 }
1330 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
1331 #ifndef IEEE_ROUND_NEAREST
1332 if (sgn && (unsigned) (rmode - 1) < 2)
1333 rmode = 3 - rmode;
1334 #endif
1335 #endif
1336 // get digits to be shifted out
1337 extra_digits = -expon;
1338 C128.w[0] = coeff + __bid_round_const_table[rmode][extra_digits];
1339
1340 // get coeff*(2^M[extra_digits])/10^extra_digits
1341 __mul_64x128_full (QH, Q_low, C128.w[0],
1342 __bid_reciprocals10_128[extra_digits]);
1343
1344 // now get P/10^extra_digits: shift Q_high right by M[extra_digits]-128
1345 amount = __bid_recip_scale[extra_digits];
1346
1347 C64 = QH >> amount;
1348
1349 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
1350 #ifndef IEEE_ROUND_NEAREST
1351 if (rmode == 0) //ROUNDING_TO_NEAREST
1352 #endif
1353 if (C64 & 1) {
1354 // check whether fractional part of initial_P/10^extra_digits is exactly .5
1355
1356 // get remainder
1357 amount2 = 64 - amount;
1358 remainder_h = 0;
1359 remainder_h--;
1360 remainder_h >>= amount2;
1361 remainder_h = remainder_h & QH;
1362
1363 if (!remainder_h
1364 && (Q_low.w[1] < __bid_reciprocals10_128[extra_digits].w[1]
1365 || (Q_low.w[1] == __bid_reciprocals10_128[extra_digits].w[1]
1366 && Q_low.w[0] <
1367 __bid_reciprocals10_128[extra_digits].w[0]))) {
1368 C64--;
1369 }
1370 }
1371 #endif
1372
1373 #ifdef SET_STATUS_FLAGS
1374
1375 if (is_inexact (fpsc))
1376 __set_status_flags (fpsc, UNDERFLOW_EXCEPTION);
1377 else {
1378 status = INEXACT_EXCEPTION;
1379 // get remainder
1380 remainder_h = QH << (64 - amount);
1381
1382 switch (rmode) {
1383 case ROUNDING_TO_NEAREST:
1384 case ROUNDING_TIES_AWAY:
1385 // test whether fractional part is 0
1386 if (remainder_h == 0x8000000000000000ull
1387 && (Q_low.w[1] < __bid_reciprocals10_128[extra_digits].w[1]
1388 || (Q_low.w[1] == __bid_reciprocals10_128[extra_digits].w[1]
1389 && Q_low.w[0] <
1390 __bid_reciprocals10_128[extra_digits].w[0])))
1391 status = EXACT_STATUS;
1392 break;
1393 case ROUNDING_DOWN:
1394 case ROUNDING_TO_ZERO:
1395 if (!remainder_h
1396 && (Q_low.w[1] < __bid_reciprocals10_128[extra_digits].w[1]
1397 || (Q_low.w[1] == __bid_reciprocals10_128[extra_digits].w[1]
1398 && Q_low.w[0] <
1399 __bid_reciprocals10_128[extra_digits].w[0])))
1400 status = EXACT_STATUS;
1401 break;
1402 default:
1403 // round up
1404 __add_carry_out (Stemp.w[0], CY, Q_low.w[0],
1405 __bid_reciprocals10_128[extra_digits].w[0]);
1406 __add_carry_in_out (Stemp.w[1], carry, Q_low.w[1],
1407 __bid_reciprocals10_128[extra_digits].w[1], CY);
1408 if ((remainder_h >> (64 - amount)) + carry >=
1409 (((UINT64) 1) << amount))
1410 status = EXACT_STATUS;
1411 }
1412
1413 if (status != EXACT_STATUS)
1414 __set_status_flags (fpsc, UNDERFLOW_EXCEPTION | status);
1415 }
1416
1417 #endif
1418
1419 return sgn | C64;
1420 }
1421
1422 while (coeff < 1000000000000000ull && expon >= 3 * 256) {
1423 expon--;
1424 coeff = (coeff << 3) + (coeff << 1);
1425 }
1426 if (expon > DECIMAL_MAX_EXPON_64) {
1427 #ifdef SET_STATUS_FLAGS
1428 __set_status_flags (fpsc, OVERFLOW_EXCEPTION | INEXACT_EXCEPTION);
1429 #endif
1430 // overflow
1431 r = sgn | INFINITY_MASK64;
1432 switch (rmode) {
1433 case ROUNDING_DOWN:
1434 if (!sgn)
1435 r = LARGEST_BID64;
1436 break;
1437 case ROUNDING_TO_ZERO:
1438 r = sgn | LARGEST_BID64;
1439 break;
1440 case ROUNDING_UP:
1441 // round up
1442 if (sgn)
1443 r = SMALLEST_BID64;
1444 }
1445 return r;
1446 } else {
1447 mask = 1;
1448 mask <<= EXPONENT_SHIFT_SMALL64;
1449 if (coeff >= mask) {
1450 r = expon;
1451 r <<= EXPONENT_SHIFT_LARGE64;
1452 r |= (sgn | SPECIAL_ENCODING_MASK64);
1453 // add coeff, without leading bits
1454 mask = (mask >> 2) - 1;
1455 coeff &= mask;
1456 r |= coeff;
1457 return r;
1458 }
1459 }
1460 }
1461
1462 r = expon;
1463 r <<= EXPONENT_SHIFT_SMALL64;
1464 r |= (coeff | sgn);
1465
1466 return r;
1467 }
1468
1469
1470 /*****************************************************************************
1471 *
1472 * BID128 pack/unpack macros
1473 *
1474 *****************************************************************************/
1475
1476 //
1477 // Macro for handling BID128 underflow
1478 // sticky bit given as additional argument
1479 //
1480 __BID_INLINE__ UINT128 *
1481 handle_UF_128_rem (UINT128 * pres, UINT64 sgn, int expon, UINT128 CQ,
1482 UINT64 R, unsigned *prounding_mode, unsigned *fpsc) {
1483 UINT128 T128, TP128, Qh, Ql, Qh1, Stemp, Tmp, Tmp1, CQ2, CQ8;
1484 UINT64 carry, CY;
1485 int ed2, amount;
1486 unsigned rmode, status;
1487
1488 // UF occurs
1489 if (expon + MAX_FORMAT_DIGITS_128 < 0) {
1490 #ifdef SET_STATUS_FLAGS
1491 __set_status_flags (fpsc, UNDERFLOW_EXCEPTION | INEXACT_EXCEPTION);
1492 #endif
1493 pres->w[1] = sgn;
1494 pres->w[0] = 0;
1495 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
1496 #ifndef IEEE_ROUND_NEAREST
1497 if ((sgn && *prounding_mode == ROUNDING_DOWN)
1498 || (!sgn && *prounding_mode == ROUNDING_UP))
1499 pres->w[0] = 1ull;
1500 #endif
1501 #endif
1502 return pres;
1503 }
1504 // CQ *= 10
1505 CQ2.w[1] = (CQ.w[1] << 1) | (CQ.w[0] >> 63);
1506 CQ2.w[0] = CQ.w[0] << 1;
1507 CQ8.w[1] = (CQ.w[1] << 3) | (CQ.w[0] >> 61);
1508 CQ8.w[0] = CQ.w[0] << 3;
1509 __add_128_128 (CQ, CQ2, CQ8);
1510
1511 // add remainder
1512 if (R)
1513 CQ.w[0] |= 1;
1514
1515 ed2 = 1 - expon;
1516 // add rounding constant to CQ
1517 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
1518 #ifndef IEEE_ROUND_NEAREST
1519 rmode = *prounding_mode;
1520 if (sgn && (unsigned) (rmode - 1) < 2)
1521 rmode = 3 - rmode;
1522 #else
1523 rmode = 0;
1524 #endif
1525 #else
1526 rmode = 0;
1527 #endif
1528 T128 = __bid_round_const_table_128[rmode][ed2];
1529 __add_carry_out (CQ.w[0], carry, T128.w[0], CQ.w[0]);
1530 CQ.w[1] = CQ.w[1] + T128.w[1] + carry;
1531
1532 TP128 = __bid_reciprocals10_128[ed2];
1533 __mul_128x128_full (Qh, Ql, CQ, TP128);
1534 amount = __bid_recip_scale[ed2];
1535
1536 if (amount >= 64) {
1537 CQ.w[0] = Qh.w[1] >> (amount - 64);
1538 CQ.w[1] = 0;
1539 } else {
1540 __shr_128 (CQ, Qh, amount);
1541 }
1542
1543 expon = 0;
1544
1545 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
1546 #ifndef IEEE_ROUND_NEAREST
1547 if (!(*prounding_mode))
1548 #endif
1549 if (CQ.w[0] & 1) {
1550 // check whether fractional part of initial_P/10^ed1 is exactly .5
1551
1552 // get remainder
1553 __shl_128_long (Qh1, Qh, (128 - amount));
1554
1555 if (!Qh1.w[1] && !Qh1.w[0]
1556 && (Ql.w[1] < __bid_reciprocals10_128[ed2].w[1]
1557 || (Ql.w[1] == __bid_reciprocals10_128[ed2].w[1]
1558 && Ql.w[0] < __bid_reciprocals10_128[ed2].w[0]))) {
1559 CQ.w[0]--;
1560 }
1561 }
1562 #endif
1563
1564 #ifdef SET_STATUS_FLAGS
1565
1566 if (is_inexact (fpsc))
1567 __set_status_flags (fpsc, UNDERFLOW_EXCEPTION);
1568 else {
1569 status = INEXACT_EXCEPTION;
1570 // get remainder
1571 __shl_128_long (Qh1, Qh, (128 - amount));
1572
1573 switch (rmode) {
1574 case ROUNDING_TO_NEAREST:
1575 case ROUNDING_TIES_AWAY:
1576 // test whether fractional part is 0
1577 if (Qh1.w[1] == 0x8000000000000000ull && (!Qh1.w[0])
1578 && (Ql.w[1] < __bid_reciprocals10_128[ed2].w[1]
1579 || (Ql.w[1] == __bid_reciprocals10_128[ed2].w[1]
1580 && Ql.w[0] < __bid_reciprocals10_128[ed2].w[0])))
1581 status = EXACT_STATUS;
1582 break;
1583 case ROUNDING_DOWN:
1584 case ROUNDING_TO_ZERO:
1585 if ((!Qh1.w[1]) && (!Qh1.w[0])
1586 && (Ql.w[1] < __bid_reciprocals10_128[ed2].w[1]
1587 || (Ql.w[1] == __bid_reciprocals10_128[ed2].w[1]
1588 && Ql.w[0] < __bid_reciprocals10_128[ed2].w[0])))
1589 status = EXACT_STATUS;
1590 break;
1591 default:
1592 // round up
1593 __add_carry_out (Stemp.w[0], CY, Ql.w[0],
1594 __bid_reciprocals10_128[ed2].w[0]);
1595 __add_carry_in_out (Stemp.w[1], carry, Ql.w[1],
1596 __bid_reciprocals10_128[ed2].w[1], CY);
1597 __shr_128_long (Qh, Qh1, (128 - amount));
1598 Tmp.w[0] = 1;
1599 Tmp.w[1] = 0;
1600 __shl_128_long (Tmp1, Tmp, amount);
1601 Qh.w[0] += carry;
1602 if (Qh.w[0] < carry)
1603 Qh.w[1]++;
1604 if (__unsigned_compare_ge_128 (Qh, Tmp1))
1605 status = EXACT_STATUS;
1606 }
1607
1608 if (status != EXACT_STATUS)
1609 __set_status_flags (fpsc, UNDERFLOW_EXCEPTION | status);
1610 }
1611
1612 #endif
1613
1614 pres->w[1] = sgn | CQ.w[1];
1615 pres->w[0] = CQ.w[0];
1616
1617 return pres;
1618
1619 }
1620
1621
1622 //
1623 // Macro for handling BID128 underflow
1624 //
1625 __BID_INLINE__ UINT128 *
1626 handle_UF_128 (UINT128 * pres, UINT64 sgn, int expon, UINT128 CQ,
1627 unsigned *prounding_mode, unsigned *fpsc) {
1628 UINT128 T128, TP128, Qh, Ql, Qh1, Stemp, Tmp, Tmp1;
1629 UINT64 carry, CY;
1630 int ed2, amount;
1631 unsigned rmode, status;
1632
1633 // UF occurs
1634 if (expon + MAX_FORMAT_DIGITS_128 < 0) {
1635 #ifdef SET_STATUS_FLAGS
1636 __set_status_flags (fpsc, UNDERFLOW_EXCEPTION | INEXACT_EXCEPTION);
1637 #endif
1638 pres->w[1] = sgn;
1639 pres->w[0] = 0;
1640 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
1641 #ifndef IEEE_ROUND_NEAREST
1642 if ((sgn && *prounding_mode == ROUNDING_DOWN)
1643 || (!sgn && *prounding_mode == ROUNDING_UP))
1644 pres->w[0] = 1ull;
1645 #endif
1646 #endif
1647 return pres;
1648 }
1649
1650 ed2 = 0 - expon;
1651 // add rounding constant to CQ
1652 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
1653 #ifndef IEEE_ROUND_NEAREST
1654 rmode = *prounding_mode;
1655 if (sgn && (unsigned) (rmode - 1) < 2)
1656 rmode = 3 - rmode;
1657 #else
1658 rmode = 0;
1659 #endif
1660 #else
1661 rmode = 0;
1662 #endif
1663
1664 T128 = __bid_round_const_table_128[rmode][ed2];
1665 __add_carry_out (CQ.w[0], carry, T128.w[0], CQ.w[0]);
1666 CQ.w[1] = CQ.w[1] + T128.w[1] + carry;
1667
1668 TP128 = __bid_reciprocals10_128[ed2];
1669 __mul_128x128_full (Qh, Ql, CQ, TP128);
1670 amount = __bid_recip_scale[ed2];
1671
1672 if (amount >= 64) {
1673 CQ.w[0] = Qh.w[1] >> (amount - 64);
1674 CQ.w[1] = 0;
1675 } else {
1676 __shr_128 (CQ, Qh, amount);
1677 }
1678
1679 expon = 0;
1680
1681 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
1682 #ifndef IEEE_ROUND_NEAREST
1683 if (!(*prounding_mode))
1684 #endif
1685 if (CQ.w[0] & 1) {
1686 // check whether fractional part of initial_P/10^ed1 is exactly .5
1687
1688 // get remainder
1689 __shl_128_long (Qh1, Qh, (128 - amount));
1690
1691 if (!Qh1.w[1] && !Qh1.w[0]
1692 && (Ql.w[1] < __bid_reciprocals10_128[ed2].w[1]
1693 || (Ql.w[1] == __bid_reciprocals10_128[ed2].w[1]
1694 && Ql.w[0] < __bid_reciprocals10_128[ed2].w[0]))) {
1695 CQ.w[0]--;
1696 }
1697 }
1698 #endif
1699
1700 #ifdef SET_STATUS_FLAGS
1701
1702 if (is_inexact (fpsc))
1703 __set_status_flags (fpsc, UNDERFLOW_EXCEPTION);
1704 else {
1705 status = INEXACT_EXCEPTION;
1706 // get remainder
1707 __shl_128_long (Qh1, Qh, (128 - amount));
1708
1709 switch (rmode) {
1710 case ROUNDING_TO_NEAREST:
1711 case ROUNDING_TIES_AWAY:
1712 // test whether fractional part is 0
1713 if (Qh1.w[1] == 0x8000000000000000ull && (!Qh1.w[0])
1714 && (Ql.w[1] < __bid_reciprocals10_128[ed2].w[1]
1715 || (Ql.w[1] == __bid_reciprocals10_128[ed2].w[1]
1716 && Ql.w[0] < __bid_reciprocals10_128[ed2].w[0])))
1717 status = EXACT_STATUS;
1718 break;
1719 case ROUNDING_DOWN:
1720 case ROUNDING_TO_ZERO:
1721 if ((!Qh1.w[1]) && (!Qh1.w[0])
1722 && (Ql.w[1] < __bid_reciprocals10_128[ed2].w[1]
1723 || (Ql.w[1] == __bid_reciprocals10_128[ed2].w[1]
1724 && Ql.w[0] < __bid_reciprocals10_128[ed2].w[0])))
1725 status = EXACT_STATUS;
1726 break;
1727 default:
1728 // round up
1729 __add_carry_out (Stemp.w[0], CY, Ql.w[0],
1730 __bid_reciprocals10_128[ed2].w[0]);
1731 __add_carry_in_out (Stemp.w[1], carry, Ql.w[1],
1732 __bid_reciprocals10_128[ed2].w[1], CY);
1733 __shr_128_long (Qh, Qh1, (128 - amount));
1734 Tmp.w[0] = 1;
1735 Tmp.w[1] = 0;
1736 __shl_128_long (Tmp1, Tmp, amount);
1737 Qh.w[0] += carry;
1738 if (Qh.w[0] < carry)
1739 Qh.w[1]++;
1740 if (__unsigned_compare_ge_128 (Qh, Tmp1))
1741 status = EXACT_STATUS;
1742 }
1743
1744 if (status != EXACT_STATUS)
1745 __set_status_flags (fpsc, UNDERFLOW_EXCEPTION | status);
1746 }
1747
1748 #endif
1749
1750 pres->w[1] = sgn | CQ.w[1];
1751 pres->w[0] = CQ.w[0];
1752
1753 return pres;
1754
1755 }
1756
1757
1758
1759 //
1760 // BID128 unpack, input passed by value
1761 //
1762 __BID_INLINE__ UINT64
1763 unpack_BID128_value (UINT64 * psign_x, int *pexponent_x,
1764 UINT128 * pcoefficient_x, UINT128 x) {
1765 UINT128 coeff, T33, T34;
1766 UINT64 ex;
1767
1768 *psign_x = (x.w[1]) & 0x8000000000000000ull;
1769
1770 // special encodings
1771 if ((x.w[1] & INFINITY_MASK64) >= SPECIAL_ENCODING_MASK64) {
1772 if ((x.w[1] & INFINITY_MASK64) < INFINITY_MASK64) {
1773 // non-canonical input
1774 pcoefficient_x->w[0] = 0;
1775 pcoefficient_x->w[1] = 0;
1776 ex = (x.w[1]) >> 47;
1777 *pexponent_x = ((int) ex) & EXPONENT_MASK128;
1778 return 0;
1779 }
1780 // 10^33
1781 T33 = __bid_power10_table_128[33];
1782 coeff.w[0] = x.w[0];
1783 coeff.w[1] = (x.w[1]) & LARGE_COEFF_MASK128;
1784 pcoefficient_x->w[0] = x.w[0];
1785 pcoefficient_x->w[1] = x.w[1];
1786 if (__unsigned_compare_ge_128 (coeff, T33)) // non-canonical
1787 pcoefficient_x->w[1] &= (~LARGE_COEFF_MASK128);
1788 return 0; // NaN or Infinity
1789 }
1790
1791 coeff.w[0] = x.w[0];
1792 coeff.w[1] = (x.w[1]) & SMALL_COEFF_MASK128;
1793
1794 // 10^34
1795 T34 = __bid_power10_table_128[34];
1796 // check for non-canonical values
1797 if (__unsigned_compare_ge_128 (coeff, T34))
1798 coeff.w[0] = coeff.w[1] = 0;
1799
1800 pcoefficient_x->w[0] = coeff.w[0];
1801 pcoefficient_x->w[1] = coeff.w[1];
1802
1803 ex = (x.w[1]) >> 49;
1804 *pexponent_x = ((int) ex) & EXPONENT_MASK128;
1805
1806 return coeff.w[0] | coeff.w[1];
1807 }
1808
1809
1810 //
1811 // BID128 unpack, input pased by reference
1812 //
1813 __BID_INLINE__ UINT64
1814 unpack_BID128 (UINT64 * psign_x, int *pexponent_x,
1815 UINT128 * pcoefficient_x, UINT128 * px) {
1816 UINT128 coeff, T33, T34;
1817 UINT64 ex;
1818
1819 *psign_x = (px->w[1]) & 0x8000000000000000ull;
1820
1821 // special encodings
1822 if ((px->w[1] & INFINITY_MASK64) >= SPECIAL_ENCODING_MASK64) {
1823 if ((px->w[1] & INFINITY_MASK64) < INFINITY_MASK64) {
1824 // non-canonical input
1825 pcoefficient_x->w[0] = 0;
1826 pcoefficient_x->w[1] = 0;
1827 ex = (px->w[1]) >> 47;
1828 *pexponent_x = ((int) ex) & EXPONENT_MASK128;
1829 return 0;
1830 }
1831 // 10^33
1832 T33 = __bid_power10_table_128[33];
1833 coeff.w[0] = px->w[0];
1834 coeff.w[1] = (px->w[1]) & LARGE_COEFF_MASK128;
1835 pcoefficient_x->w[0] = px->w[0];
1836 pcoefficient_x->w[1] = px->w[1];
1837 if (__unsigned_compare_ge_128 (coeff, T33)) // non-canonical
1838 pcoefficient_x->w[1] &= (~LARGE_COEFF_MASK128);
1839 return 0; // NaN or Infinity
1840 }
1841
1842 coeff.w[0] = px->w[0];
1843 coeff.w[1] = (px->w[1]) & SMALL_COEFF_MASK128;
1844
1845 // 10^34
1846 T34 = __bid_power10_table_128[34];
1847 // check for non-canonical values
1848 if (__unsigned_compare_ge_128 (coeff, T34))
1849 coeff.w[0] = coeff.w[1] = 0;
1850
1851 pcoefficient_x->w[0] = coeff.w[0];
1852 pcoefficient_x->w[1] = coeff.w[1];
1853
1854 ex = (px->w[1]) >> 49;
1855 *pexponent_x = ((int) ex) & EXPONENT_MASK128;
1856
1857 return coeff.w[0] | coeff.w[1];
1858 }
1859
1860 //
1861 // Pack macro checks for overflow, but not underflow
1862 //
1863 __BID_INLINE__ UINT128 *
1864 get_BID128_very_fast_OF (UINT128 * pres, UINT64 sgn, int expon,
1865 UINT128 coeff, unsigned *prounding_mode,
1866 unsigned *fpsc) {
1867 UINT128 T;
1868 UINT64 tmp, tmp2;
1869
1870 if ((unsigned) expon > DECIMAL_MAX_EXPON_128) {
1871
1872 if (expon - MAX_FORMAT_DIGITS_128 <= DECIMAL_MAX_EXPON_128) {
1873 T = __bid_power10_table_128[MAX_FORMAT_DIGITS_128 - 1];
1874 while (__unsigned_compare_gt_128 (T, coeff)
1875 && expon > DECIMAL_MAX_EXPON_128) {
1876 coeff.w[1] =
1877 (coeff.w[1] << 3) + (coeff.w[1] << 1) + (coeff.w[0] >> 61) +
1878 (coeff.w[0] >> 63);
1879 tmp2 = coeff.w[0] << 3;
1880 coeff.w[0] = (coeff.w[0] << 1) + tmp2;
1881 if (coeff.w[0] < tmp2)
1882 coeff.w[1]++;
1883
1884 expon--;
1885 }
1886 }
1887 if ((unsigned) expon > DECIMAL_MAX_EXPON_128) {
1888 // OF
1889 #ifdef SET_STATUS_FLAGS
1890 __set_status_flags (fpsc, OVERFLOW_EXCEPTION | INEXACT_EXCEPTION);
1891 #endif
1892 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
1893 #ifndef IEEE_ROUND_NEAREST
1894 if (*prounding_mode == ROUNDING_TO_ZERO
1895 || (sgn && *prounding_mode == ROUNDING_UP) || (!sgn
1896 &&
1897 *prounding_mode
1898 ==
1899 ROUNDING_DOWN))
1900 {
1901 pres->w[1] = sgn | LARGEST_BID128_HIGH;
1902 pres->w[0] = LARGEST_BID128_LOW;
1903 } else
1904 #endif
1905 #endif
1906 {
1907 pres->w[1] = sgn | INFINITY_MASK64;
1908 pres->w[0] = 0;
1909 }
1910 return pres;
1911 }
1912 }
1913
1914 pres->w[0] = coeff.w[0];
1915 tmp = expon;
1916 tmp <<= 49;
1917 pres->w[1] = sgn | tmp | coeff.w[1];
1918
1919 return pres;
1920 }
1921
1922
1923 //
1924 // No overflow/underflow checks
1925 // No checking for coefficient == 10^34 (rounding artifact)
1926 //
1927 __BID_INLINE__ UINT128 *
1928 get_BID128_very_fast (UINT128 * pres, UINT64 sgn, int expon,
1929 UINT128 coeff) {
1930 UINT64 tmp;
1931
1932 pres->w[0] = coeff.w[0];
1933 tmp = expon;
1934 tmp <<= 49;
1935 pres->w[1] = sgn | tmp | coeff.w[1];
1936
1937 return pres;
1938 }
1939
1940 //
1941 // No overflow/underflow checks
1942 //
1943 __BID_INLINE__ UINT128 *
1944 get_BID128_fast (UINT128 * pres, UINT64 sgn, int expon, UINT128 coeff) {
1945 UINT64 tmp;
1946
1947 // coeff==10^34?
1948 if (coeff.w[1] == 0x0001ed09bead87c0ull
1949 && coeff.w[0] == 0x378d8e6400000000ull) {
1950 expon++;
1951 // set coefficient to 10^33
1952 coeff.w[1] = 0x0000314dc6448d93ull;
1953 coeff.w[0] = 0x38c15b0a00000000ull;
1954 }
1955
1956 pres->w[0] = coeff.w[0];
1957 tmp = expon;
1958 tmp <<= 49;
1959 pres->w[1] = sgn | tmp | coeff.w[1];
1960
1961 return pres;
1962 }
1963
1964 //
1965 // General BID128 pack macro
1966 //
1967 __BID_INLINE__ UINT128 *
1968 get_BID128 (UINT128 * pres, UINT64 sgn, int expon, UINT128 coeff,
1969 unsigned *prounding_mode, unsigned *fpsc) {
1970 UINT128 T;
1971 UINT64 tmp, tmp2;
1972
1973 // coeff==10^34?
1974 if (coeff.w[1] == 0x0001ed09bead87c0ull
1975 && coeff.w[0] == 0x378d8e6400000000ull) {
1976 expon++;
1977 // set coefficient to 10^33
1978 coeff.w[1] = 0x0000314dc6448d93ull;
1979 coeff.w[0] = 0x38c15b0a00000000ull;
1980 }
1981 // check OF, UF
1982 if ((unsigned) expon > DECIMAL_MAX_EXPON_128) {
1983 // check UF
1984 if (expon < 0)
1985 return handle_UF_128 (pres, sgn, expon, coeff, prounding_mode,
1986 fpsc);
1987
1988 if (expon - MAX_FORMAT_DIGITS_128 <= DECIMAL_MAX_EXPON_128) {
1989 T = __bid_power10_table_128[MAX_FORMAT_DIGITS_128 - 1];
1990 while (__unsigned_compare_gt_128 (T, coeff)
1991 && expon > DECIMAL_MAX_EXPON_128) {
1992 coeff.w[1] =
1993 (coeff.w[1] << 3) + (coeff.w[1] << 1) + (coeff.w[0] >> 61) +
1994 (coeff.w[0] >> 63);
1995 tmp2 = coeff.w[0] << 3;
1996 coeff.w[0] = (coeff.w[0] << 1) + tmp2;
1997 if (coeff.w[0] < tmp2)
1998 coeff.w[1]++;
1999
2000 expon--;
2001 }
2002 }
2003 if ((unsigned) expon > DECIMAL_MAX_EXPON_128) {
2004 // OF
2005 #ifdef SET_STATUS_FLAGS
2006 __set_status_flags (fpsc, OVERFLOW_EXCEPTION | INEXACT_EXCEPTION);
2007 #endif
2008 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
2009 #ifndef IEEE_ROUND_NEAREST
2010 if (*prounding_mode == ROUNDING_TO_ZERO
2011 || (sgn && *prounding_mode == ROUNDING_UP) || (!sgn
2012 &&
2013 *prounding_mode
2014 ==
2015 ROUNDING_DOWN))
2016 {
2017 pres->w[1] = sgn | LARGEST_BID128_HIGH;
2018 pres->w[0] = LARGEST_BID128_LOW;
2019 } else
2020 #endif
2021 #endif
2022 {
2023 pres->w[1] = sgn | INFINITY_MASK64;
2024 pres->w[0] = 0;
2025 }
2026 return pres;
2027 }
2028 }
2029
2030 pres->w[0] = coeff.w[0];
2031 tmp = expon;
2032 tmp <<= 49;
2033 pres->w[1] = sgn | tmp | coeff.w[1];
2034
2035 return pres;
2036 }
2037
2038
2039 //
2040 // Macro used for conversions from string
2041 // (no additional arguments given for rounding mode, status flags)
2042 //
2043 __BID_INLINE__ UINT128 *
2044 get_BID128_string (UINT128 * pres, UINT64 sgn, int expon, UINT128 coeff) {
2045 UINT128 D2, D8;
2046 UINT64 tmp;
2047 unsigned rmode = 0, status;
2048
2049 // coeff==10^34?
2050 if (coeff.w[1] == 0x0001ed09bead87c0ull
2051 && coeff.w[0] == 0x378d8e6400000000ull) {
2052 expon++;
2053 // set coefficient to 10^33
2054 coeff.w[1] = 0x0000314dc6448d93ull;
2055 coeff.w[0] = 0x38c15b0a00000000ull;
2056 }
2057 // check OF, UF
2058 if ((unsigned) expon > DECIMAL_MAX_EXPON_128) {
2059 // check UF
2060 if (expon < 0)
2061 return handle_UF_128 (pres, sgn, expon, coeff, &rmode, &status);
2062
2063 // OF
2064
2065 if (expon < DECIMAL_MAX_EXPON_128 + 34) {
2066 while (expon > DECIMAL_MAX_EXPON_128 &&
2067 (coeff.w[1] < __bid_power10_table_128[33].w[1] ||
2068 (coeff.w[1] == __bid_power10_table_128[33].w[1]
2069 && coeff.w[0] < __bid_power10_table_128[33].w[0]))) {
2070 D2.w[1] = (coeff.w[1] << 1) | (coeff.w[0] >> 63);
2071 D2.w[0] = coeff.w[0] << 1;
2072 D8.w[1] = (coeff.w[1] << 3) | (coeff.w[0] >> 61);
2073 D8.w[0] = coeff.w[0] << 3;
2074
2075 __add_128_128 (coeff, D2, D8);
2076 expon--;
2077 }
2078 } else if (!(coeff.w[0] | coeff.w[1]))
2079 expon = DECIMAL_MAX_EXPON_128;
2080
2081 if (expon > DECIMAL_MAX_EXPON_128) {
2082 pres->w[1] = sgn | INFINITY_MASK64;
2083 pres->w[0] = 0;
2084 switch (rmode) {
2085 case ROUNDING_DOWN:
2086 if (!sgn) {
2087 pres->w[1] = LARGEST_BID128_HIGH;
2088 pres->w[0] = LARGEST_BID128_LOW;
2089 }
2090 break;
2091 case ROUNDING_TO_ZERO:
2092 pres->w[1] = sgn | LARGEST_BID128_HIGH;
2093 pres->w[0] = LARGEST_BID128_LOW;
2094 break;
2095 case ROUNDING_UP:
2096 // round up
2097 if (sgn) {
2098 pres->w[1] = sgn | LARGEST_BID128_HIGH;
2099 pres->w[0] = LARGEST_BID128_LOW;
2100 }
2101 break;
2102 }
2103
2104 return pres;
2105 }
2106 }
2107
2108 pres->w[0] = coeff.w[0];
2109 tmp = expon;
2110 tmp <<= 49;
2111 pres->w[1] = sgn | tmp | coeff.w[1];
2112
2113 return pres;
2114 }
2115
2116
2117
2118 /*****************************************************************************
2119 *
2120 * BID32 pack/unpack macros
2121 *
2122 *****************************************************************************/
2123
2124
2125 __BID_INLINE__ UINT32
2126 unpack_BID32 (UINT32 * psign_x, int *pexponent_x,
2127 UINT32 * pcoefficient_x, UINT32 x) {
2128 UINT32 tmp;
2129
2130 *psign_x = x & 0x80000000;
2131
2132 if ((x & SPECIAL_ENCODING_MASK32) == SPECIAL_ENCODING_MASK32) {
2133 // special encodings
2134 if ((x & INFINITY_MASK32) == INFINITY_MASK32)
2135 return 0; // NaN or Infinity
2136
2137 // coefficient
2138 *pcoefficient_x = (x & SMALL_COEFF_MASK32) | LARGE_COEFF_HIGH_BIT32;
2139 // check for non-canonical value
2140 if (*pcoefficient_x >= 10000000)
2141 *pcoefficient_x = 0;
2142 // get exponent
2143 tmp = x >> 21;
2144 *pexponent_x = tmp & EXPONENT_MASK32;
2145 return 1;
2146 }
2147 // exponent
2148 tmp = x >> 23;
2149 *pexponent_x = tmp & EXPONENT_MASK32;
2150 // coefficient
2151 *pcoefficient_x = (x & LARGE_COEFF_MASK32);
2152
2153 return *pcoefficient_x;
2154 }
2155
2156 //
2157 // General pack macro for BID32
2158 //
2159 __BID_INLINE__ UINT32
2160 get_BID32 (UINT32 sgn, int expon, UINT64 coeff, int rmode,
2161 unsigned *fpsc) {
2162 UINT128 Q;
2163 UINT64 C64, remainder_h, carry, Stemp;
2164 UINT32 r, mask;
2165 int extra_digits, amount, amount2;
2166 unsigned status;
2167
2168 if (coeff > 9999999ull) {
2169 expon++;
2170 coeff = 1000000ull;
2171 }
2172 // check for possible underflow/overflow
2173 if (((unsigned) expon) > DECIMAL_MAX_EXPON_32) {
2174 if (expon < 0) {
2175 // underflow
2176 if (expon + MAX_FORMAT_DIGITS_32 < 0) {
2177 #ifdef SET_STATUS_FLAGS
2178 __set_status_flags (fpsc,
2179 UNDERFLOW_EXCEPTION | INEXACT_EXCEPTION);
2180 #endif
2181 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
2182 #ifndef IEEE_ROUND_NEAREST
2183 if (rmode == ROUNDING_DOWN && sgn)
2184 return 0x80000001;
2185 if (rmode == ROUNDING_UP && !sgn)
2186 return 1;
2187 #endif
2188 #endif
2189 // result is 0
2190 return sgn;
2191 }
2192 // get digits to be shifted out
2193 #ifdef IEEE_ROUND_NEAREST_TIES_AWAY
2194 rmode = 0;
2195 #endif
2196 #ifdef IEEE_ROUND_NEAREST
2197 rmode = 0;
2198 #endif
2199
2200 extra_digits = -expon;
2201 coeff += __bid_round_const_table[rmode][extra_digits];
2202
2203 // get coeff*(2^M[extra_digits])/10^extra_digits
2204 __mul_64x64_to_128 (Q, coeff, __bid_reciprocals10_64[extra_digits]);
2205
2206 // now get P/10^extra_digits: shift Q_high right by M[extra_digits]-128
2207 amount = __bid_short_recip_scale[extra_digits];
2208
2209 C64 = Q.w[1] >> amount;
2210
2211 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
2212 #ifndef IEEE_ROUND_NEAREST
2213 if (rmode == 0) //ROUNDING_TO_NEAREST
2214 #endif
2215 if (C64 & 1) {
2216 // check whether fractional part of initial_P/10^extra_digits is exactly .5
2217
2218 // get remainder
2219 amount2 = 64 - amount;
2220 remainder_h = 0;
2221 remainder_h--;
2222 remainder_h >>= amount2;
2223 remainder_h = remainder_h & Q.w[1];
2224
2225 if (!remainder_h && (Q.w[0] < __bid_reciprocals10_64[extra_digits])) {
2226 C64--;
2227 }
2228 }
2229 #endif
2230
2231 #ifdef SET_STATUS_FLAGS
2232
2233 if (is_inexact (fpsc))
2234 __set_status_flags (fpsc, UNDERFLOW_EXCEPTION);
2235 else {
2236 status = INEXACT_EXCEPTION;
2237 // get remainder
2238 remainder_h = Q.w[1] << (64 - amount);
2239
2240 switch (rmode) {
2241 case ROUNDING_TO_NEAREST:
2242 case ROUNDING_TIES_AWAY:
2243 // test whether fractional part is 0
2244 if (remainder_h == 0x8000000000000000ull
2245 && (Q.w[0] < __bid_reciprocals10_64[extra_digits]))
2246 status = EXACT_STATUS;
2247 break;
2248 case ROUNDING_DOWN:
2249 case ROUNDING_TO_ZERO:
2250 if (!remainder_h && (Q.w[0] < __bid_reciprocals10_64[extra_digits]))
2251 status = EXACT_STATUS;
2252 break;
2253 default:
2254 // round up
2255 __add_carry_out (Stemp, carry, Q.w[0],
2256 __bid_reciprocals10_64[extra_digits]);
2257 if ((remainder_h >> (64 - amount)) + carry >=
2258 (((UINT64) 1) << amount))
2259 status = EXACT_STATUS;
2260 }
2261
2262 if (status != EXACT_STATUS)
2263 __set_status_flags (fpsc, UNDERFLOW_EXCEPTION | status);
2264 }
2265
2266 #endif
2267
2268 return sgn | (UINT32) C64;
2269 }
2270
2271 while (coeff < 1000000 && expon > DECIMAL_MAX_EXPON_32) {
2272 coeff = (coeff << 3) + (coeff << 1);
2273 expon--;
2274 }
2275 if (((unsigned) expon) > DECIMAL_MAX_EXPON_32) {
2276 __set_status_flags (fpsc, OVERFLOW_EXCEPTION | INEXACT_EXCEPTION);
2277 // overflow
2278 r = sgn | INFINITY_MASK32;
2279 switch (rmode) {
2280 case ROUNDING_DOWN:
2281 if (!sgn)
2282 r = LARGEST_BID32;
2283 break;
2284 case ROUNDING_TO_ZERO:
2285 r = sgn | LARGEST_BID32;
2286 break;
2287 case ROUNDING_UP:
2288 // round up
2289 if (sgn)
2290 r = sgn | LARGEST_BID32;
2291 }
2292 return r;
2293 }
2294 }
2295
2296 mask = 1 << 23;
2297
2298 // check whether coefficient fits in DECIMAL_COEFF_FIT bits
2299 if (coeff < mask) {
2300 r = expon;
2301 r <<= 23;
2302 r |= ((UINT32) coeff | sgn);
2303 return r;
2304 }
2305 // special format
2306
2307 r = expon;
2308 r <<= 21;
2309 r |= (sgn | SPECIAL_ENCODING_MASK32);
2310 // add coeff, without leading bits
2311 mask = (1 << 21) - 1;
2312 r |= (((UINT32) coeff) & mask);
2313
2314 return r;
2315 }
2316
2317
2318
2319 //
2320 // no overflow/underflow checks
2321 //
2322 __BID_INLINE__ UINT32
2323 very_fast_get_BID32 (UINT32 sgn, int expon, UINT32 coeff) {
2324 UINT32 r, mask;
2325
2326 mask = 1 << 23;
2327
2328 // check whether coefficient fits in 10*2+3 bits
2329 if (coeff < mask) {
2330 r = expon;
2331 r <<= 23;
2332 r |= (coeff | sgn);
2333 return r;
2334 }
2335 // special format
2336 r = expon;
2337 r <<= 21;
2338 r |= (sgn | SPECIAL_ENCODING_MASK32);
2339 // add coeff, without leading bits
2340 mask = (1 << 21) - 1;
2341 coeff &= mask;
2342 r |= coeff;
2343
2344 return r;
2345 }
2346
2347
2348
2349 /*************************************************************
2350 *
2351 *************************************************************/
2352 typedef
2353 ALIGN (16)
2354 struct {
2355 UINT64 w[6];
2356 } UINT384;
2357 typedef ALIGN (16)
2358 struct {
2359 UINT64 w[8];
2360 } UINT512;
2361
2362 // #define P 34
2363 #define MASK_STEERING_BITS 0x6000000000000000ull
2364 #define MASK_BINARY_EXPONENT1 0x7fe0000000000000ull
2365 #define MASK_BINARY_SIG1 0x001fffffffffffffull
2366 #define MASK_BINARY_EXPONENT2 0x1ff8000000000000ull
2367 //used to take G[2:w+3] (sec 3.3)
2368 #define MASK_BINARY_SIG2 0x0007ffffffffffffull
2369 //used to mask out G4:T0 (sec 3.3)
2370 #define MASK_BINARY_OR2 0x0020000000000000ull
2371 //used to prefix 8+G4 to T (sec 3.3)
2372 #define UPPER_EXPON_LIMIT 51
2373 #define MASK_EXP 0x7ffe000000000000ull
2374 #define MASK_SPECIAL 0x7800000000000000ull
2375 #define MASK_NAN 0x7c00000000000000ull
2376 #define MASK_SNAN 0x7e00000000000000ull
2377 #define MASK_ANY_INF 0x7c00000000000000ull
2378 #define MASK_INF 0x7800000000000000ull
2379 #define MASK_SIGN 0x8000000000000000ull
2380 #define MASK_COEFF 0x0001ffffffffffffull
2381 #define BIN_EXP_BIAS (0x1820ull << 49)
2382
2383 #define EXP_MIN 0x0000000000000000ull
2384 // EXP_MIN = (-6176 + 6176) << 49
2385 #define EXP_MAX 0x5ffe000000000000ull
2386 // EXP_MAX = (6111 + 6176) << 49
2387 #define EXP_MAX_P1 0x6000000000000000ull
2388 // EXP_MAX + 1 = (6111 + 6176 + 1) << 49
2389 #define EXP_P1 0x0002000000000000ull
2390 // EXP_ P1= 1 << 49
2391 #define expmin -6176
2392 // min unbiased exponent
2393 #define expmax 6111
2394 // max unbiased exponent
2395 #define expmin16 -398
2396 // min unbiased exponent
2397 #define expmax16 369
2398 // max unbiased exponent
2399
2400 #define SIGNMASK32 0x80000000
2401 #define BID64_SIG_MAX 0x002386F26FC0ffffull
2402 #define SIGNMASK64 0x8000000000000000ull
2403
2404 // typedef unsigned int FPSC; // floating-point status and control
2405 // bit31:
2406 // bit30:
2407 // bit29:
2408 // bit28:
2409 // bit27:
2410 // bit26:
2411 // bit25:
2412 // bit24:
2413 // bit23:
2414 // bit22:
2415 // bit21:
2416 // bit20:
2417 // bit19:
2418 // bit18:
2419 // bit17:
2420 // bit16:
2421 // bit15:
2422 // bit14: RC:2
2423 // bit13: RC:1
2424 // bit12: RC:0
2425 // bit11: PM
2426 // bit10: UM
2427 // bit9: OM
2428 // bit8: ZM
2429 // bit7: DM
2430 // bit6: IM
2431 // bit5: PE
2432 // bit4: UE
2433 // bit3: OE
2434 // bit2: ZE
2435 // bit1: DE
2436 // bit0: IE
2437
2438 #define ROUNDING_MODE_MASK 0x00007000
2439
2440 typedef struct _DEC_DIGITS {
2441 unsigned int digits;
2442 UINT64 threshold_hi;
2443 UINT64 threshold_lo;
2444 unsigned int digits1;
2445 } DEC_DIGITS;
2446
2447 extern DEC_DIGITS __bid_nr_digits[];
2448 extern UINT64 __bid_midpoint64[];
2449 extern UINT128 __bid_midpoint128[];
2450 extern UINT192 __bid_midpoint192[];
2451 extern UINT256 __bid_midpoint256[];
2452 extern UINT64 __bid_ten2k64[];
2453 extern UINT128 __bid_ten2k128[];
2454 extern UINT256 __bid_ten2k256[];
2455 extern UINT128 __bid_ten2mk128[];
2456 extern UINT64 __bid_ten2mk64[];
2457 extern UINT128 __bid_ten2mk128trunc[];
2458 extern int __bid_shiftright128[];
2459 extern UINT64 __bid_maskhigh128[];
2460 extern UINT64 __bid_maskhigh128M[];
2461 extern UINT64 __bid_maskhigh192M[];
2462 extern UINT64 __bid_maskhigh256M[];
2463 extern UINT64 __bid_one_half128[];
2464 extern UINT64 __bid_one_half128M[];
2465 extern UINT64 __bid_one_half192M[];
2466 extern UINT64 __bid_one_half256M[];
2467 extern UINT128 __bid_ten2mk128M[];
2468 extern UINT128 __bid_ten2mk128truncM[];
2469 extern UINT192 __bid_ten2mk192truncM[];
2470 extern UINT256 __bid_ten2mk256truncM[];
2471 extern int __bid_shiftright128M[];
2472 extern int __bid_shiftright192M[];
2473 extern int __bid_shiftright256M[];
2474 extern UINT192 __bid_ten2mk192M[];
2475 extern UINT256 __bid_ten2mk256M[];
2476 extern unsigned char __bid_char_table2[];
2477 extern unsigned char __bid_char_table3[];
2478
2479 extern UINT64 __bid_ten2m3k64[];
2480 extern unsigned int __bid_shift_ten2m3k64[];
2481 extern UINT128 __bid_ten2m3k128[];
2482 extern unsigned int __bid_shift_ten2m3k128[];
2483
2484
2485
2486 /***************************************************************************
2487 *************** TABLES FOR GENERAL ROUNDING FUNCTIONS *********************
2488 ***************************************************************************/
2489
2490 extern UINT64 __bid_Kx64[];
2491 extern unsigned int __bid_Ex64m64[];
2492 extern UINT64 __bid_half64[];
2493 extern UINT64 __bid_mask64[];
2494 extern UINT64 __bid_ten2mxtrunc64[];
2495
2496 extern UINT128 __bid_Kx128[];
2497 extern unsigned int __bid_Ex128m128[];
2498 extern UINT64 __bid_half128[];
2499 extern UINT64 __bid_mask128[];
2500 extern UINT128 __bid_ten2mxtrunc128[];
2501
2502 extern UINT192 __bid_Kx192[];
2503 extern unsigned int __bid_Ex192m192[];
2504 extern UINT64 __bid_half192[];
2505 extern UINT64 __bid_mask192[];
2506 extern UINT192 __bid_ten2mxtrunc192[];
2507
2508 extern UINT256 __bid_Kx256[];
2509 extern unsigned int __bid_Ex256m256[];
2510 extern UINT64 __bid_half256[];
2511 extern UINT64 __bid_mask256[];
2512 extern UINT256 __bid_ten2mxtrunc256[];
2513
2514 typedef union __bid64_128 {
2515 UINT64 b64;
2516 UINT128 b128;
2517 } BID64_128;
2518
2519 BID64_128 bid_fma (unsigned int P0,
2520 BID64_128 x1, unsigned int P1,
2521 BID64_128 y1, unsigned int P2,
2522 BID64_128 z1, unsigned int P3,
2523 unsigned int rnd_mode, FPSC * fpsc);
2524
2525 #define P16 16
2526 #define P34 34
2527
2528 union __int_double {
2529 UINT64 i;
2530 double d;
2531 };
2532 typedef union __int_double int_double;
2533
2534
2535 union __int_float {
2536 UINT32 i;
2537 float d;
2538 };
2539 typedef union __int_float int_float;
2540
2541 #define SWAP(A,B,T) {\
2542 T = A; \
2543 A = B; \
2544 B = T; \
2545 }
2546
2547 // this macro will find coefficient_x to be in [2^A, 2^(A+1) )
2548 // ie it knows that it is A bits long
2549 #define NUMBITS(A, coefficient_x, tempx){\
2550 temp_x.d=(float)coefficient_x;\
2551 A=((tempx.i >>23) & EXPONENT_MASK32) - 0x7f;\
2552 }
2553
2554 enum class_types {
2555 signalingNaN,
2556 quietNaN,
2557 negativeInfinity,
2558 negativeNormal,
2559 negativeSubnormal,
2560 negativeZero,
2561 positiveZero,
2562 positiveSubnormal,
2563 positiveNormal,
2564 positiveInfinity
2565 };
2566
2567 #endif
2568
2569 typedef union { UINT32 ui32; float f; } BID_UI32FLOAT;
2570 typedef union { UINT64 ui64; double d; } BID_UI64DOUBLE;
2571 typedef union { UINT128 ui128; long double ld; } BID_UI128LONGDOUBLE;
2572