add requested loop variants
[libreriscv.git] / openpower / sv / bitmanip / divmnu64.c
1 /* original source code from Hackers-Delight
2 https://github.com/hcs0/Hackers-Delight
3 */
4 /* This divides an n-word dividend by an m-word divisor, giving an
5 n-m+1-word quotient and m-word remainder. The bignums are in arrays of
6 words. Here a "word" is 32 bits. This routine is designed for a 64-bit
7 machine which has a 64/64 division instruction. */
8
9 #include <stdio.h>
10 #include <stdlib.h> //To define "exit", req'd by XLC.
11 #include <stdbool.h>
12 #include <stdint.h>
13
14 #define max(x, y) ((x) > (y) ? (x) : (y))
15
16 int nlz(unsigned x) {
17 int n;
18
19 if (x == 0) return(32);
20 n = 0;
21 if (x <= 0x0000FFFF) {n = n +16; x = x <<16;}
22 if (x <= 0x00FFFFFF) {n = n + 8; x = x << 8;}
23 if (x <= 0x0FFFFFFF) {n = n + 4; x = x << 4;}
24 if (x <= 0x3FFFFFFF) {n = n + 2; x = x << 2;}
25 if (x <= 0x7FFFFFFF) {n = n + 1;}
26 return n;
27 }
28
29 void dumpit(char *msg, int n, unsigned v[]) {
30 int i;
31 printf("%s", msg);
32 for (i = n-1; i >= 0; i--) printf(" %08x", v[i]);
33 printf("\n");
34 }
35
36 /* q[0], r[0], u[0], and v[0] contain the LEAST significant words.
37 (The sequence is in little-endian order).
38
39 This is a fairly precise implementation of Knuth's Algorithm D, for a
40 binary computer with base b = 2**32. The caller supplies:
41 1. Space q for the quotient, m - n + 1 words (at least one).
42 2. Space r for the remainder (optional), n words.
43 3. The dividend u, m words, m >= 1.
44 4. The divisor v, n words, n >= 2.
45 The most significant digit of the divisor, v[n-1], must be nonzero. The
46 dividend u may have leading zeros; this just makes the algorithm take
47 longer and makes the quotient contain more leading zeros. A value of
48 NULL may be given for the address of the remainder to signify that the
49 caller does not want the remainder.
50 The program does not alter the input parameters u and v.
51 The quotient and remainder returned may have leading zeros. The
52 function itself returns a value of 0 for success and 1 for invalid
53 parameters (e.g., division by 0).
54 For now, we must have m >= n. Knuth's Algorithm D also requires
55 that the dividend be at least as long as the divisor. (In his terms,
56 m >= 0 (unstated). Therefore m+n >= n.) */
57
58 int divmnu(unsigned q[], unsigned r[],
59 const unsigned u[], const unsigned v[],
60 int m, int n) {
61
62 const unsigned long long b = 4294967296LL; // Number base (2**32).
63 unsigned *un, *vn; // Normalized form of u, v.
64 unsigned long long qhat; // Estimated quotient digit.
65 unsigned long long rhat; // A remainder.
66 unsigned long long p; // Product of two digits.
67 long long t, k;
68 int s, i, j;
69
70 if (m < n || n <= 0 || v[n-1] == 0)
71 return 1; // Return if invalid param.
72
73 if (n == 1) { // Take care of
74 k = 0; // the case of a
75 for (j = m - 1; j >= 0; j--) { // single-digit
76 q[j] = (k*b + u[j])/v[0]; // divisor here.
77 k = (k*b + u[j]) - q[j]*v[0];
78 }
79 if (r != NULL) r[0] = k;
80 return 0;
81 }
82
83 /* Normalize by shifting v left just enough so that its high-order
84 bit is on, and shift u left the same amount. We may have to append a
85 high-order digit on the dividend; we do that unconditionally. */
86
87 s = nlz(v[n-1]); // 0 <= s <= 31.
88 vn = (unsigned *)alloca(4*n);
89 for (i = n - 1; i > 0; i--)
90 vn[i] = (v[i] << s) | ((unsigned long long)v[i-1] >> (32-s));
91 vn[0] = v[0] << s;
92
93 un = (unsigned *)alloca(4*(m + 1));
94 un[m] = (unsigned long long)u[m-1] >> (32-s);
95 for (i = m - 1; i > 0; i--)
96 un[i] = (u[i] << s) | ((unsigned long long)u[i-1] >> (32-s));
97 un[0] = u[0] << s;
98
99 for (j = m - n; j >= 0; j--) { // Main loop.
100 // Compute estimate qhat of q[j].
101 qhat = (un[j+n]*b + un[j+n-1])/vn[n-1];
102 rhat = (un[j+n]*b + un[j+n-1]) - qhat*vn[n-1];
103 again:
104 if (qhat >= b || qhat*vn[n-2] > b*rhat + un[j+n-2])
105 { qhat = qhat - 1;
106 rhat = rhat + vn[n-1];
107 if (rhat < b) goto again;
108 }
109
110 #ifdef ORIGINAL
111 // Multiply and subtract.
112 k = 0;
113 for (i = 0; i < n; i++) {
114 p = qhat*vn[i];
115 t = un[i+j] - k - (p & 0xFFFFFFFFLL);
116 un[i+j] = t;
117 k = (p >> 32) - (t >> 32);
118 }
119 t = un[j+n] - k;
120 un[j+n] = t;
121 bool need_fixup = t < 0;
122 #elif defined(SUB_MUL_BORROW)
123 (void)p; // shut up unused variable warning
124
125 // Multiply and subtract.
126 uint32_t borrow = 0;
127 for(int i = 0; i <= n; i++) {
128 uint32_t vn_i = i < n ? vn[i] : 0;
129 uint64_t value = un[i + j] - (uint64_t)qhat * vn_i - borrow;
130 borrow = -(uint32_t)(value >> 32);
131 un[i + j] = (uint32_t)value;
132 }
133 bool need_fixup = borrow != 0;
134 #elif defined(MUL_RSUB_CARRY)
135 (void)p; // shut up unused variable warning
136
137 // Multiply and subtract.
138 uint32_t carry = 1;
139 for(int i = 0; i <= n; i++) {
140 uint32_t vn_i = i < n ? vn[i] : 0;
141 uint64_t result = un[i + j] + ~((uint64_t)qhat * vn_i) + carry;
142 uint32_t result_high = result >> 32;
143 if(carry <= 1)
144 result_high++;
145 carry = result_high;
146 un[i + j] = (uint32_t)result;
147 }
148 bool need_fixup = carry != 1;
149 #else
150 #error need to define one of ORIGINAL, SUB_MUL_BORROW, or MUL_RSUB_CARRY
151 #endif
152
153 q[j] = qhat; // Store quotient digit.
154 if (need_fixup) { // If we subtracted too
155 q[j] = q[j] - 1; // much, add back.
156 k = 0;
157 for (i = 0; i < n; i++) {
158 t = (unsigned long long)un[i+j] + vn[i] + k;
159 un[i+j] = t;
160 k = t >> 32;
161 }
162 un[j+n] = un[j+n] + k;
163 }
164 } // End j.
165 // If the caller wants the remainder, unnormalize
166 // it and pass it back.
167 if (r != NULL) {
168 for (i = 0; i < n-1; i++)
169 r[i] = (un[i] >> s) | ((unsigned long long)un[i+1] << (32-s));
170 r[n-1] = un[n-1] >> s;
171 }
172 return 0;
173 }
174
175 int errors;
176
177 void check(unsigned q[], unsigned r[],
178 unsigned u[], unsigned v[],
179 int m, int n,
180 unsigned cq[], unsigned cr[]) {
181 int i, szq;
182
183 szq = max(m - n + 1, 1);
184 for (i = 0; i < szq; i++) {
185 if (q[i] != cq[i]) {
186 errors = errors + 1;
187 dumpit("Error, dividend u =", m, u);
188 dumpit(" divisor v =", n, v);
189 dumpit("For quotient, got:", m-n+1, q);
190 dumpit(" Should get:", m-n+1, cq);
191 return;
192 }
193 }
194 for (i = 0; i < n; i++) {
195 if (r[i] != cr[i]) {
196 errors = errors + 1;
197 dumpit("Error, dividend u =", m, u);
198 dumpit(" divisor v =", n, v);
199 dumpit("For remainder, got:", n, r);
200 dumpit(" Should get:", n, cr);
201 return;
202 }
203 }
204 return;
205 }
206
207 int main() {
208 static unsigned test[] = {
209 // m, n, u..., v..., cq..., cr....
210 1, 1, 3, 0, 1, 1, // Error, divide by 0.
211 1, 2, 7, 1,3, 0, 7,0, // Error, n > m.
212 2, 2, 0,0, 1,0, 0, 0,0, // Error, incorrect remainder cr.
213 1, 1, 3, 2, 1, 1,
214 1, 1, 3, 3, 1, 0,
215 1, 1, 3, 4, 0, 3,
216 1, 1, 0, 0xffffffff, 0, 0,
217 1, 1, 0xffffffff, 1, 0xffffffff, 0,
218 1, 1, 0xffffffff, 0xffffffff, 1, 0,
219 1, 1, 0xffffffff, 3, 0x55555555, 0,
220 2, 1, 0xffffffff,0xffffffff, 1, 0xffffffff,0xffffffff, 0,
221 2, 1, 0xffffffff,0xffffffff, 0xffffffff, 1,1, 0,
222 2, 1, 0xffffffff,0xfffffffe, 0xffffffff, 0xffffffff,0, 0xfffffffe,
223 2, 1, 0x00005678,0x00001234, 0x00009abc, 0x1e1dba76,0, 0x6bd0,
224 2, 2, 0,0, 0,1, 0, 0,0,
225 2, 2, 0,7, 0,3, 2, 0,1,
226 2, 2, 5,7, 0,3, 2, 5,1,
227 2, 2, 0,6, 0,2, 3, 0,0,
228 1, 1, 0x80000000, 0x40000001, 0x00000001, 0x3fffffff,
229 2, 1, 0x00000000,0x80000000, 0x40000001, 0xfffffff8,0x00000001, 0x00000008,
230 2, 2, 0x00000000,0x80000000, 0x00000001,0x40000000, 0x00000001, 0xffffffff,0x3fffffff,
231 2, 2, 0x0000789a,0x0000bcde, 0x0000789a,0x0000bcde, 1, 0,0,
232 2, 2, 0x0000789b,0x0000bcde, 0x0000789a,0x0000bcde, 1, 1,0,
233 2, 2, 0x00007899,0x0000bcde, 0x0000789a,0x0000bcde, 0, 0x00007899,0x0000bcde,
234 2, 2, 0x0000ffff,0x0000ffff, 0x0000ffff,0x0000ffff, 1, 0,0,
235 2, 2, 0x0000ffff,0x0000ffff, 0x00000000,0x00000001, 0x0000ffff, 0x0000ffff,0,
236 3, 2, 0x000089ab,0x00004567,0x00000123, 0x00000000,0x00000001, 0x00004567,0x00000123, 0x000089ab,0,
237 3, 2, 0x00000000,0x0000fffe,0x00008000, 0x0000ffff,0x00008000, 0xffffffff,0x00000000, 0x0000ffff,0x00007fff, // Shows that first qhat can = b + 1.
238 3, 3, 0x00000003,0x00000000,0x80000000, 0x00000001,0x00000000,0x20000000, 0x00000003, 0,0,0x20000000, // Adding back step req'd.
239 3, 3, 0x00000003,0x00000000,0x00008000, 0x00000001,0x00000000,0x00002000, 0x00000003, 0,0,0x00002000, // Adding back step req'd.
240 4, 3, 0,0,0x00008000,0x00007fff, 1,0,0x00008000, 0xfffe0000,0, 0x00020000,0xffffffff,0x00007fff, // Add back req'd.
241 4, 3, 0,0x0000fffe,0,0x00008000, 0x0000ffff,0,0x00008000, 0xffffffff,0, 0x0000ffff,0xffffffff,0x00007fff, // Shows that mult-sub quantity cannot be treated as signed.
242 4, 3, 0,0xfffffffe,0,0x80000000, 0x0000ffff,0,0x80000000, 0x00000000,1, 0x00000000,0xfffeffff,0x00000000, // Shows that mult-sub quantity cannot be treated as signed.
243 4, 3, 0,0xfffffffe,0,0x80000000, 0xffffffff,0,0x80000000, 0xffffffff,0, 0xffffffff,0xffffffff,0x7fffffff, // Shows that mult-sub quantity cannot be treated as signed.
244 };
245 int i, n, m, ncases, f;
246 unsigned q[10], r[10];
247 unsigned *u, *v, *cq, *cr;
248
249 printf("divmnu:\n");
250 i = 0;
251 ncases = 0;
252 while (i < sizeof(test)/4) {
253 m = test[i];
254 n = test[i+1];
255 u = &test[i+2];
256 v = &test[i+2+m];
257 cq = &test[i+2+m+n];
258 cr = &test[i+2+m+n+max(m-n+1, 1)];
259
260 f = divmnu(q, r, u, v, m, n);
261 if (f) {
262 dumpit("Error return code for dividend u =", m, u);
263 dumpit(" divisor v =", n, v);
264 errors = errors + 1;
265 }
266 else
267 check(q, r, u, v, m, n, cq, cr);
268 i = i + 2 + m + n + max(m-n+1, 1) + n;
269 ncases = ncases + 1;
270 }
271
272 printf("%d errors out of %d cases; there should be 3.\n", errors, ncases);
273 return 0;
274 }