1 /* original source code from Hackers-Delight
2 https://github.com/hcs0/Hackers-Delight
4 /* This divides an n-word dividend by an m-word divisor, giving an
5 n-m+1-word quotient and m-word remainder. The bignums are in arrays of
6 words. Here a "word" is 32 bits. This routine is designed for a 64-bit
7 machine which has a 64/64 division instruction. */
12 #include <stdlib.h> //To define "exit", req'd by XLC.
14 #define max(x, y) ((x) > (y) ? (x) : (y))
50 void dumpit(char *msg
, int n
, unsigned v
[])
54 for (i
= n
- 1; i
>= 0; i
--)
55 printf(" %08x", v
[i
]);
59 void bigrsh(unsigned s
, unsigned r
[], unsigned un
[], int n
)
61 for (int i
= 0; i
< n
- 1; i
++)
62 r
[i
] = (un
[i
] >> s
) | ((unsigned long long)un
[i
+ 1] << (32 - s
));
63 r
[n
- 1] = un
[n
- 1] >> s
;
66 void biglsh(unsigned s
, unsigned vn
[], unsigned const v
[], int n
)
68 for (int i
= n
- 1; i
> 0; i
--)
69 vn
[i
] = (v
[i
] << s
) | ((unsigned long long)v
[i
- 1] >> (32 - s
));
73 unsigned long bigdiv(unsigned v
, unsigned q
[], unsigned const u
[], int m
)
75 unsigned long long k
= 0; // the case of a
76 for (int j
= m
- 1; j
>= 0; j
--)
78 unsigned long d2
= (k
<< 32) | u
[j
];
79 q
[j
] = d2
/ v
; // divisor here.
85 bool bigmul(unsigned long long qhat
, unsigned product
[], unsigned vn
[], int m
,
88 // Multiply and subtract.
91 // sv.maddedu product.v, vn.v, qhat.s, carry.s
92 for (int i
= 0; i
<= n
; i
++)
94 uint32_t vn_v
= i
< n
? vn
[i
] : 0;
95 uint64_t value
= (uint64_t)vn_v
* (uint64_t)qhat
+ carry
;
96 carry
= (uint32_t)(value
>> 32);
97 product
[i
] = (uint32_t)value
;
102 bool bigadd(unsigned result
[], unsigned vn
[], unsigned un
[], int m
, int n
,
106 // sv.subfe un_j.v, product.v, un_j.v
107 for (int i
= 0; i
<= n
; i
++)
109 uint64_t value
= (uint64_t)vn
[i
] + (uint64_t)un
[i
] + ca
;
110 ca
= value
>> 32 != 0;
116 bool bigsub(unsigned result
[], unsigned vn
[], unsigned un
[], int m
, int n
,
120 // sv.subfe un_j.v, product.v, un_j.v
121 for (int i
= 0; i
<= n
; i
++)
123 uint64_t value
= (uint64_t)~vn
[i
] + (uint64_t)un
[i
] + ca
;
124 ca
= value
>> 32 != 0;
130 bool bigmulsub(unsigned long long qhat
, unsigned vn
[], unsigned un
[], int j
,
133 // Multiply and subtract.
134 uint32_t product
[n
+ 1];
135 bigmul(qhat
, product
, vn
, m
, n
);
136 bool ca
= bigsub(un
, product
, un
, m
, n
, true);
137 bool need_fixup
= !ca
;
141 /* q[0], r[0], u[0], and v[0] contain the LEAST significant words.
142 (The sequence is in little-endian order).
144 This is a fairly precise implementation of Knuth's Algorithm D, for a
145 binary computer with base b = 2**32. The caller supplies:
146 1. Space q for the quotient, m - n + 1 words (at least one).
147 2. Space r for the remainder (optional), n words.
148 3. The dividend u, m words, m >= 1.
149 4. The divisor v, n words, n >= 2.
150 The most significant digit of the divisor, v[n-1], must be nonzero. The
151 dividend u may have leading zeros; this just makes the algorithm take
152 longer and makes the quotient contain more leading zeros. A value of
153 NULL may be given for the address of the remainder to signify that the
154 caller does not want the remainder.
155 The program does not alter the input parameters u and v.
156 The quotient and remainder returned may have leading zeros. The
157 function itself returns a value of 0 for success and 1 for invalid
158 parameters (e.g., division by 0).
159 For now, we must have m >= n. Knuth's Algorithm D also requires
160 that the dividend be at least as long as the divisor. (In his terms,
161 m >= 0 (unstated). Therefore m+n >= n.) */
163 int divmnu(unsigned q
[], unsigned r
[], const unsigned u
[], const unsigned v
[],
167 const unsigned long long b
= 1LL << 32; // Number base (2**32).
168 unsigned *un
, *vn
; // Normalized form of u, v.
169 unsigned long long qhat
; // Estimated quotient digit.
170 unsigned long long rhat
; // A remainder.
173 if (m
< n
|| n
<= 0 || v
[n
- 1] == 0)
174 return 1; // Return if invalid param.
178 unsigned long k
= bigdiv(v
[0], q
, u
, m
);
184 /* Normalize by shifting v left just enough so that its high-order
185 bit is on, and shift u left the same amount. We may have to append a
186 high-order digit on the dividend; we do that unconditionally. */
188 s
= nlz(v
[n
- 1]); // 0 <= s <= 31.
190 vn
= (unsigned *)alloca(4 * n
);
193 un
= (unsigned *)alloca(4 * (m
+ 1));
194 un
[m
] = (unsigned long long)u
[m
- 1] >> (32 - s
); // extra digit
197 for (j
= m
- n
; j
>= 0; j
--)
199 // Compute estimate qhat of q[j] from top 2 digits
200 uint64_t dig2
= ((uint64_t)un
[j
+ n
] << 32) | un
[j
+ n
- 1];
201 qhat
= dig2
/ vn
[n
- 1];
202 rhat
= dig2
% vn
[n
- 1];
204 // use 3rd-from-top digit to obtain better accuracy
205 if (qhat
>= b
|| qhat
* vn
[n
- 2] > b
* rhat
+ un
[j
+ n
- 2])
208 rhat
= rhat
+ vn
[n
- 1];
213 uint32_t *un_j
= &un
[j
];
214 bool need_fixup
= bigmulsub(qhat
, vn
, un_j
, j
, m
, n
);
216 q
[j
] = qhat
; // Store quotient digit.
218 { // If we subtracted too
219 q
[j
] = q
[j
] - 1; // much, add back.
220 bigadd(un_j
, vn
, un_j
, m
, n
, 0);
223 // If the caller wants the remainder, unnormalize
224 // it and pass it back.
232 void check(unsigned q
[], unsigned r
[], unsigned u
[], unsigned v
[], int m
,
233 int n
, unsigned cq
[], unsigned cr
[])
237 szq
= max(m
- n
+ 1, 1);
238 for (i
= 0; i
< szq
; i
++)
243 dumpit("Error, dividend u =", m
, u
);
244 dumpit(" divisor v =", n
, v
);
245 dumpit("For quotient, got:", m
- n
+ 1, q
);
246 dumpit(" Should get:", m
- n
+ 1, cq
);
250 for (i
= 0; i
< n
; i
++)
255 dumpit("Error, dividend u =", m
, u
);
256 dumpit(" divisor v =", n
, v
);
257 dumpit("For remainder, got:", n
, r
);
258 dumpit(" Should get:", n
, cr
);
270 uint32_t u
[10], v
[10], cq
[10], cr
[10];
274 {.m
=1, .n
=1, .u
={3}, .v
={0}, .cq
={1}, .cr
={1}, .error
=true}, // Error, divide by 0.
275 {.m
=1, .n
=2, .u
={7}, .v
={1,3}, .cq
={0}, .cr
={7,0}, .error
=true}, // Error, n > m.
276 {.m
=2, .n
=2, .u
={0,0}, .v
={1,0}, .cq
={0}, .cr
={0,0}, .error
=true}, // Error, incorrect remainder cr.
277 {.m
=1, .n
=1, .u
={3}, .v
={2}, .cq
={1}, .cr
={1}},
278 {.m
=1, .n
=1, .u
={3}, .v
={3}, .cq
={1}, .cr
={0}},
279 {.m
=1, .n
=1, .u
={3}, .v
={4}, .cq
={0}, .cr
={3}},
280 {.m
=1, .n
=1, .u
={0}, .v
={0xffffffff}, .cq
={0}, .cr
={0}},
281 {.m
=1, .n
=1, .u
={0xffffffff}, .v
={1}, .cq
={0xffffffff}, .cr
={0}},
282 {.m
=1, .n
=1, .u
={0xffffffff}, .v
={0xffffffff}, .cq
={1}, .cr
={0}},
283 {.m
=1, .n
=1, .u
={0xffffffff}, .v
={3}, .cq
={0x55555555}, .cr
={0}},
284 {.m
=2, .n
=1, .u
={0xffffffff,0xffffffff}, .v
={1}, .cq
={0xffffffff,0xffffffff}, .cr
={0}},
285 {.m
=2, .n
=1, .u
={0xffffffff,0xffffffff}, .v
={0xffffffff}, .cq
={1,1}, .cr
={0}},
286 {.m
=2, .n
=1, .u
={0xffffffff,0xfffffffe}, .v
={0xffffffff}, .cq
={0xffffffff,0}, .cr
={0xfffffffe}},
287 {.m
=2, .n
=1, .u
={0x00005678,0x00001234}, .v
={0x00009abc}, .cq
={0x1e1dba76,0}, .cr
={0x6bd0}},
288 {.m
=2, .n
=2, .u
={0,0}, .v
={0,1}, .cq
={0}, .cr
={0,0}},
289 {.m
=2, .n
=2, .u
={0,7}, .v
={0,3}, .cq
={2}, .cr
={0,1}},
290 {.m
=2, .n
=2, .u
={5,7}, .v
={0,3}, .cq
={2}, .cr
={5,1}},
291 {.m
=2, .n
=2, .u
={0,6}, .v
={0,2}, .cq
={3}, .cr
={0,0}},
292 {.m
=1, .n
=1, .u
={0x80000000}, .v
={0x40000001}, .cq
={0x00000001}, .cr
={0x3fffffff}},
293 {.m
=2, .n
=1, .u
={0x00000000,0x80000000}, .v
={0x40000001}, .cq
={0xfffffff8,0x00000001}, .cr
={0x00000008}},
294 {.m
=2, .n
=2, .u
={0x00000000,0x80000000}, .v
={0x00000001,0x40000000}, .cq
={0x00000001}, .cr
={0xffffffff,0x3fffffff}},
295 {.m
=2, .n
=2, .u
={0x0000789a,0x0000bcde}, .v
={0x0000789a,0x0000bcde}, .cq
={1}, .cr
={0,0}},
296 {.m
=2, .n
=2, .u
={0x0000789b,0x0000bcde}, .v
={0x0000789a,0x0000bcde}, .cq
={1}, .cr
={1,0}},
297 {.m
=2, .n
=2, .u
={0x00007899,0x0000bcde}, .v
={0x0000789a,0x0000bcde}, .cq
={0}, .cr
={0x00007899,0x0000bcde}},
298 {.m
=2, .n
=2, .u
={0x0000ffff,0x0000ffff}, .v
={0x0000ffff,0x0000ffff}, .cq
={1}, .cr
={0,0}},
299 {.m
=2, .n
=2, .u
={0x0000ffff,0x0000ffff}, .v
={0x00000000,0x00000001}, .cq
={0x0000ffff}, .cr
={0x0000ffff,0}},
300 {.m
=3, .n
=2, .u
={0x000089ab,0x00004567,0x00000123}, .v
={0x00000000,0x00000001}, .cq
={0x00004567,0x00000123}, .cr
={0x000089ab,0}},
301 {.m
=3, .n
=2, .u
={0x00000000,0x0000fffe,0x00008000}, .v
={0x0000ffff,0x00008000}, .cq
={0xffffffff,0x00000000}, .cr
={0x0000ffff,0x00007fff}}, // Shows that first qhat can = b + 1.
302 {.m
=3, .n
=3, .u
={0x00000003,0x00000000,0x80000000}, .v
={0x00000001,0x00000000,0x20000000}, .cq
={0x00000003}, .cr
={0,0,0x20000000}}, // Adding back step req'd.
303 {.m
=3, .n
=3, .u
={0x00000003,0x00000000,0x00008000}, .v
={0x00000001,0x00000000,0x00002000}, .cq
={0x00000003}, .cr
={0,0,0x00002000}}, // Adding back step req'd.
304 {.m
=4, .n
=3, .u
={0,0,0x00008000,0x00007fff}, .v
={1,0,0x00008000}, .cq
={0xfffe0000,0}, .cr
={0x00020000,0xffffffff,0x00007fff}}, // Add back req'd.
305 {.m
=4, .n
=3, .u
={0,0x0000fffe,0,0x00008000}, .v
={0x0000ffff,0,0x00008000}, .cq
={0xffffffff,0}, .cr
={0x0000ffff,0xffffffff,0x00007fff}}, // Shows that mult-sub quantity cannot be treated as signed.
306 {.m
=4, .n
=3, .u
={0,0xfffffffe,0,0x80000000}, .v
={0x0000ffff,0,0x80000000}, .cq
={0x00000000,1}, .cr
={0x00000000,0xfffeffff,0x00000000}}, // Shows that mult-sub quantity cannot be treated as signed.
307 {.m
=4, .n
=3, .u
={0,0xfffffffe,0,0x80000000}, .v
={0xffffffff,0,0x80000000}, .cq
={0xffffffff,0}, .cr
={0xffffffff,0xffffffff,0x7fffffff}}, // Shows that mult-sub quantity cannot be treated as signed.
310 const int ncases
= sizeof(test
) / sizeof(test
[0]);
311 unsigned q
[10], r
[10];
314 for (int i
= 0; i
< ncases
; i
++)
318 uint32_t *u
= test
[i
].u
;
319 uint32_t *v
= test
[i
].v
;
320 uint32_t *cq
= test
[i
].cq
;
321 uint32_t *cr
= test
[i
].cr
;
323 int f
= divmnu(q
, r
, u
, v
, m
, n
);
324 if (f
&& !test
[i
].error
)
326 dumpit("Unexpected error return code for dividend u =", m
, u
);
327 dumpit(" divisor v =", n
, v
);
330 else if (!f
&& test
[i
].error
)
332 dumpit("Unexpected success return code for dividend u =", m
, u
);
333 dumpit(" divisor v =", n
, v
);
338 check(q
, r
, u
, v
, m
, n
, cq
, cr
);
341 printf("%d test failures out of %d cases.\n", errors
, ncases
);