trying to get EXPERIMENT1 working
[libreriscv.git] / openpower / sv / bitmanip / divmnu64.c
1 /* original source code from Hackers-Delight
2 https://github.com/hcs0/Hackers-Delight
3 */
4 /* This divides an n-word dividend by an m-word divisor, giving an
5 n-m+1-word quotient and m-word remainder. The bignums are in arrays of
6 words. Here a "word" is 32 bits. This routine is designed for a 64-bit
7 machine which has a 64/64 division instruction. */
8
9 #include <stdio.h>
10 #include <stdlib.h> //To define "exit", req'd by XLC.
11
12 #define max(x, y) ((x) > (y) ? (x) : (y))
13
14 int nlz(unsigned x) {
15 int n;
16
17 if (x == 0) return(32);
18 n = 0;
19 if (x <= 0x0000FFFF) {n = n +16; x = x <<16;}
20 if (x <= 0x00FFFFFF) {n = n + 8; x = x << 8;}
21 if (x <= 0x0FFFFFFF) {n = n + 4; x = x << 4;}
22 if (x <= 0x3FFFFFFF) {n = n + 2; x = x << 2;}
23 if (x <= 0x7FFFFFFF) {n = n + 1;}
24 return n;
25 }
26
27 void dumpit(char *msg, int n, unsigned v[]) {
28 int i;
29 printf(msg);
30 for (i = n-1; i >= 0; i--) printf(" %08x", v[i]);
31 printf("\n");
32 }
33
34 /* q[0], r[0], u[0], and v[0] contain the LEAST significant words.
35 (The sequence is in little-endian order).
36
37 This is a fairly precise implementation of Knuth's Algorithm D, for a
38 binary computer with base b = 2**32. The caller supplies:
39 1. Space q for the quotient, m - n + 1 words (at least one).
40 2. Space r for the remainder (optional), n words.
41 3. The dividend u, m words, m >= 1.
42 4. The divisor v, n words, n >= 2.
43 The most significant digit of the divisor, v[n-1], must be nonzero. The
44 dividend u may have leading zeros; this just makes the algorithm take
45 longer and makes the quotient contain more leading zeros. A value of
46 NULL may be given for the address of the remainder to signify that the
47 caller does not want the remainder.
48 The program does not alter the input parameters u and v.
49 The quotient and remainder returned may have leading zeros. The
50 function itself returns a value of 0 for success and 1 for invalid
51 parameters (e.g., division by 0).
52 For now, we must have m >= n. Knuth's Algorithm D also requires
53 that the dividend be at least as long as the divisor. (In his terms,
54 m >= 0 (unstated). Therefore m+n >= n.) */
55
56 int divmnu(unsigned q[], unsigned r[],
57 const unsigned u[], const unsigned v[],
58 int m, int n) {
59
60 const unsigned long long b = 4294967296LL; // Number base (2**32).
61 unsigned *un, *vn; // Normalized form of u, v.
62 unsigned long long qhat; // Estimated quotient digit.
63 unsigned long long rhat; // A remainder.
64 unsigned long long p; // Product of two digits.
65 long long t, k;
66 int s, i, j;
67
68 if (m < n || n <= 0 || v[n-1] == 0)
69 return 1; // Return if invalid param.
70
71 if (n == 1) { // Take care of
72 k = 0; // the case of a
73 for (j = m - 1; j >= 0; j--) { // single-digit
74 q[j] = (k*b + u[j])/v[0]; // divisor here.
75 k = (k*b + u[j]) - q[j]*v[0];
76 }
77 if (r != NULL) r[0] = k;
78 return 0;
79 }
80
81 /* Normalize by shifting v left just enough so that its high-order
82 bit is on, and shift u left the same amount. We may have to append a
83 high-order digit on the dividend; we do that unconditionally. */
84
85 s = nlz(v[n-1]); // 0 <= s <= 31.
86 vn = (unsigned *)alloca(4*n);
87 for (i = n - 1; i > 0; i--)
88 vn[i] = (v[i] << s) | ((unsigned long long)v[i-1] >> (32-s));
89 vn[0] = v[0] << s;
90
91 un = (unsigned *)alloca(4*(m + 1));
92 un[m] = (unsigned long long)u[m-1] >> (32-s);
93 for (i = m - 1; i > 0; i--)
94 un[i] = (u[i] << s) | ((unsigned long long)u[i-1] >> (32-s));
95 un[0] = u[0] << s;
96
97 for (j = m - n; j >= 0; j--) { // Main loop.
98 // Compute estimate qhat of q[j].
99 qhat = (un[j+n]*b + un[j+n-1])/vn[n-1];
100 rhat = (un[j+n]*b + un[j+n-1]) - qhat*vn[n-1];
101 again:
102 if (qhat >= b || qhat*vn[n-2] > b*rhat + un[j+n-2])
103 { qhat = qhat - 1;
104 rhat = rhat + vn[n-1];
105 if (rhat < b) goto again;
106 }
107
108 // Multiply and subtract.
109 k = 0;
110 #ifdef ORIGINAL
111 for (i = 0; i < n; i++) {
112 p = qhat*vn[i];
113 t = un[i+j] - k - (p & 0xFFFFFFFFLL);
114 un[i+j] = (t & 0xffffffffLL);
115 k = (p >> 32) - (t >> 32);
116 }
117 t = un[j+n] - k;
118 un[j+n] = t;
119 #endif
120 #define EXPERIMENT1
121 k = 1;
122 #ifdef EXPERIMENT1
123 for (i = 0; i < n+1; i++) {
124 unsigned rhi;
125 unsigned long long sum;
126 if (i != n) {
127 p = qhat*vn[i];
128 sum = ((unsigned long long)un[i+j]) + ~p + k;
129 } else { // for last loop instead of separate cleanup do special sum
130 sum = (~0)+k;
131 }
132 rhi = (sum >> 32);
133 if (((unsigned long long)k) <= 1) rhi += 1;
134 k = rhi;
135 un[i+j] = sum & 0xffffffff;
136 }
137 // XXX t is not properly computed, it must contain -ve
138 // if the subtract should not have occurred
139 #endif
140 //#define EXPERIMENT2
141 #ifdef EXPERIMENT2
142 k = 0;
143 unsigned long phi[200]; // yes, not malloced, we know
144 unsigned long plo[200]; // yes, not malloced, we know
145 // double-width multiply with strange subtract only on bottom half
146 for (i = 0; i < n; i++) {
147 unsigned long long p = qhat*vn[i];
148 plo[i] = un[i+j] - (p&0xffffffffLL);
149 phi[i] = p >> 32;
150 }
151 for (i = 0; i < n; i++) {
152 t = plo[i] - k; // subtract previous carry
153 un[i+j] = (t & 0xffffffffLL);
154 k = phi[i] - (t >> 32); // take top-halves for new carry
155 }
156 t = un[j+n] - k;
157 un[j+n] = t;
158 #endif
159
160 q[j] = qhat; // Store quotient digit.
161 if (t < 0) { // If we subtracted too
162 q[j] = q[j] - 1; // much, add back.
163 k = 0;
164 for (i = 0; i < n; i++) {
165 t = (unsigned long long)un[i+j] + vn[i] + k;
166 un[i+j] = t;
167 k = t >> 32;
168 }
169 un[j+n] = un[j+n] + k;
170 }
171 } // End j.
172 // If the caller wants the remainder, unnormalize
173 // it and pass it back.
174 if (r != NULL) {
175 for (i = 0; i < n-1; i++)
176 r[i] = (un[i] >> s) | ((unsigned long long)un[i+1] << (32-s));
177 r[n-1] = un[n-1] >> s;
178 }
179 return 0;
180 }
181
182 int errors;
183
184 void check(unsigned q[], unsigned r[],
185 unsigned u[], unsigned v[],
186 int m, int n,
187 unsigned cq[], unsigned cr[]) {
188 int i, szq;
189
190 szq = max(m - n + 1, 1);
191 for (i = 0; i < szq; i++) {
192 if (q[i] != cq[i]) {
193 errors = errors + 1;
194 dumpit("Error, dividend u =", m, u);
195 dumpit(" divisor v =", n, v);
196 dumpit("For quotient, got:", m-n+1, q);
197 dumpit(" Should get:", m-n+1, cq);
198 return;
199 }
200 }
201 for (i = 0; i < n; i++) {
202 if (r[i] != cr[i]) {
203 errors = errors + 1;
204 dumpit("Error, dividend u =", m, u);
205 dumpit(" divisor v =", n, v);
206 dumpit("For remainder, got:", n, r);
207 dumpit(" Should get:", n, cr);
208 return;
209 }
210 }
211 return;
212 }
213
214 int main() {
215 static unsigned test[] = {
216 // m, n, u..., v..., cq..., cr....
217 1, 1, 3, 0, 1, 1, // Error, divide by 0.
218 1, 2, 7, 1,3, 0, 7,0, // Error, n > m.
219 2, 2, 0,0, 1,0, 0, 0,0, // Error, incorrect remainder cr.
220 1, 1, 3, 2, 1, 1,
221 1, 1, 3, 3, 1, 0,
222 1, 1, 3, 4, 0, 3,
223 1, 1, 0, 0xffffffff, 0, 0,
224 1, 1, 0xffffffff, 1, 0xffffffff, 0,
225 1, 1, 0xffffffff, 0xffffffff, 1, 0,
226 1, 1, 0xffffffff, 3, 0x55555555, 0,
227 2, 1, 0xffffffff,0xffffffff, 1, 0xffffffff,0xffffffff, 0,
228 2, 1, 0xffffffff,0xffffffff, 0xffffffff, 1,1, 0,
229 2, 1, 0xffffffff,0xfffffffe, 0xffffffff, 0xffffffff,0, 0xfffffffe,
230 2, 1, 0x00005678,0x00001234, 0x00009abc, 0x1e1dba76,0, 0x6bd0,
231 2, 2, 0,0, 0,1, 0, 0,0,
232 2, 2, 0,7, 0,3, 2, 0,1,
233 2, 2, 5,7, 0,3, 2, 5,1,
234 2, 2, 0,6, 0,2, 3, 0,0,
235 1, 1, 0x80000000, 0x40000001, 0x00000001, 0x3fffffff,
236 2, 1, 0x00000000,0x80000000, 0x40000001, 0xfffffff8,0x00000001, 0x00000008,
237 2, 2, 0x00000000,0x80000000, 0x00000001,0x40000000, 0x00000001, 0xffffffff,0x3fffffff,
238 2, 2, 0x0000789a,0x0000bcde, 0x0000789a,0x0000bcde, 1, 0,0,
239 2, 2, 0x0000789b,0x0000bcde, 0x0000789a,0x0000bcde, 1, 1,0,
240 2, 2, 0x00007899,0x0000bcde, 0x0000789a,0x0000bcde, 0, 0x00007899,0x0000bcde,
241 2, 2, 0x0000ffff,0x0000ffff, 0x0000ffff,0x0000ffff, 1, 0,0,
242 2, 2, 0x0000ffff,0x0000ffff, 0x00000000,0x00000001, 0x0000ffff, 0x0000ffff,0,
243 3, 2, 0x000089ab,0x00004567,0x00000123, 0x00000000,0x00000001, 0x00004567,0x00000123, 0x000089ab,0,
244 3, 2, 0x00000000,0x0000fffe,0x00008000, 0x0000ffff,0x00008000, 0xffffffff,0x00000000, 0x0000ffff,0x00007fff, // Shows that first qhat can = b + 1.
245 3, 3, 0x00000003,0x00000000,0x80000000, 0x00000001,0x00000000,0x20000000, 0x00000003, 0,0,0x20000000, // Adding back step req'd.
246 3, 3, 0x00000003,0x00000000,0x00008000, 0x00000001,0x00000000,0x00002000, 0x00000003, 0,0,0x00002000, // Adding back step req'd.
247 4, 3, 0,0,0x00008000,0x00007fff, 1,0,0x00008000, 0xfffe0000,0, 0x00020000,0xffffffff,0x00007fff, // Add back req'd.
248 4, 3, 0,0x0000fffe,0,0x00008000, 0x0000ffff,0,0x00008000, 0xffffffff,0, 0x0000ffff,0xffffffff,0x00007fff, // Shows that mult-sub quantity cannot be treated as signed.
249 4, 3, 0,0xfffffffe,0,0x80000000, 0x0000ffff,0,0x80000000, 0x00000000,1, 0x00000000,0xfffeffff,0x00000000, // Shows that mult-sub quantity cannot be treated as signed.
250 4, 3, 0,0xfffffffe,0,0x80000000, 0xffffffff,0,0x80000000, 0xffffffff,0, 0xffffffff,0xffffffff,0x7fffffff, // Shows that mult-sub quantity cannot be treated as signed.
251 };
252 int i, n, m, ncases, f;
253 unsigned q[10], r[10];
254 unsigned *u, *v, *cq, *cr;
255
256 printf("divmnu:\n");
257 i = 0;
258 ncases = 0;
259 while (i < sizeof(test)/4) {
260 m = test[i];
261 n = test[i+1];
262 u = &test[i+2];
263 v = &test[i+2+m];
264 cq = &test[i+2+m+n];
265 cr = &test[i+2+m+n+max(m-n+1, 1)];
266
267 f = divmnu(q, r, u, v, m, n);
268 if (f) {
269 dumpit("Error return code for dividend u =", m, u);
270 dumpit(" divisor v =", n, v);
271 errors = errors + 1;
272 }
273 else
274 check(q, r, u, v, m, n, cq, cr);
275 i = i + 2 + m + n + max(m-n+1, 1) + n;
276 ncases = ncases + 1;
277 }
278
279 printf("%d errors out of %d cases; there should be 3.\n", errors, ncases);
280 return 0;
281 }