EXPERIMENT2 in divmnu64.c to split out into two loops
[libreriscv.git] / openpower / sv / bitmanip / divmnu64.c
1 /* original source code from Hackers-Delight
2 https://github.com/hcs0/Hackers-Delight
3 */
4 /* This divides an n-word dividend by an m-word divisor, giving an
5 n-m+1-word quotient and m-word remainder. The bignums are in arrays of
6 words. Here a "word" is 32 bits. This routine is designed for a 64-bit
7 machine which has a 64/64 division instruction. */
8
9 #include <stdio.h>
10 #include <stdlib.h> //To define "exit", req'd by XLC.
11
12 #define max(x, y) ((x) > (y) ? (x) : (y))
13
14 int nlz(unsigned x) {
15 int n;
16
17 if (x == 0) return(32);
18 n = 0;
19 if (x <= 0x0000FFFF) {n = n +16; x = x <<16;}
20 if (x <= 0x00FFFFFF) {n = n + 8; x = x << 8;}
21 if (x <= 0x0FFFFFFF) {n = n + 4; x = x << 4;}
22 if (x <= 0x3FFFFFFF) {n = n + 2; x = x << 2;}
23 if (x <= 0x7FFFFFFF) {n = n + 1;}
24 return n;
25 }
26
27 void dumpit(char *msg, int n, unsigned v[]) {
28 int i;
29 printf(msg);
30 for (i = n-1; i >= 0; i--) printf(" %08x", v[i]);
31 printf("\n");
32 }
33
34 /* q[0], r[0], u[0], and v[0] contain the LEAST significant words.
35 (The sequence is in little-endian order).
36
37 This is a fairly precise implementation of Knuth's Algorithm D, for a
38 binary computer with base b = 2**32. The caller supplies:
39 1. Space q for the quotient, m - n + 1 words (at least one).
40 2. Space r for the remainder (optional), n words.
41 3. The dividend u, m words, m >= 1.
42 4. The divisor v, n words, n >= 2.
43 The most significant digit of the divisor, v[n-1], must be nonzero. The
44 dividend u may have leading zeros; this just makes the algorithm take
45 longer and makes the quotient contain more leading zeros. A value of
46 NULL may be given for the address of the remainder to signify that the
47 caller does not want the remainder.
48 The program does not alter the input parameters u and v.
49 The quotient and remainder returned may have leading zeros. The
50 function itself returns a value of 0 for success and 1 for invalid
51 parameters (e.g., division by 0).
52 For now, we must have m >= n. Knuth's Algorithm D also requires
53 that the dividend be at least as long as the divisor. (In his terms,
54 m >= 0 (unstated). Therefore m+n >= n.) */
55
56 int divmnu(unsigned q[], unsigned r[],
57 const unsigned u[], const unsigned v[],
58 int m, int n) {
59
60 const unsigned long long b = 4294967296LL; // Number base (2**32).
61 unsigned *un, *vn; // Normalized form of u, v.
62 unsigned long long qhat; // Estimated quotient digit.
63 unsigned long long rhat; // A remainder.
64 unsigned long long p; // Product of two digits.
65 long long t, k;
66 int s, i, j;
67
68 if (m < n || n <= 0 || v[n-1] == 0)
69 return 1; // Return if invalid param.
70
71 if (n == 1) { // Take care of
72 k = 0; // the case of a
73 for (j = m - 1; j >= 0; j--) { // single-digit
74 q[j] = (k*b + u[j])/v[0]; // divisor here.
75 k = (k*b + u[j]) - q[j]*v[0];
76 }
77 if (r != NULL) r[0] = k;
78 return 0;
79 }
80
81 /* Normalize by shifting v left just enough so that its high-order
82 bit is on, and shift u left the same amount. We may have to append a
83 high-order digit on the dividend; we do that unconditionally. */
84
85 s = nlz(v[n-1]); // 0 <= s <= 31.
86 vn = (unsigned *)alloca(4*n);
87 for (i = n - 1; i > 0; i--)
88 vn[i] = (v[i] << s) | ((unsigned long long)v[i-1] >> (32-s));
89 vn[0] = v[0] << s;
90
91 un = (unsigned *)alloca(4*(m + 1));
92 un[m] = (unsigned long long)u[m-1] >> (32-s);
93 for (i = m - 1; i > 0; i--)
94 un[i] = (u[i] << s) | ((unsigned long long)u[i-1] >> (32-s));
95 un[0] = u[0] << s;
96
97 for (j = m - n; j >= 0; j--) { // Main loop.
98 // Compute estimate qhat of q[j].
99 qhat = (un[j+n]*b + un[j+n-1])/vn[n-1];
100 rhat = (un[j+n]*b + un[j+n-1]) - qhat*vn[n-1];
101 again:
102 if (qhat >= b || qhat*vn[n-2] > b*rhat + un[j+n-2])
103 { qhat = qhat - 1;
104 rhat = rhat + vn[n-1];
105 if (rhat < b) goto again;
106 }
107
108 // Multiply and subtract.
109 k = 0;
110 #ifdef ORIGINAL
111 for (i = 0; i < n; i++) {
112 p = qhat*vn[i];
113 t = un[i+j] - k - (p & 0xFFFFFFFFLL);
114 un[i+j] = (t & 0xffffffffLL);
115 k = (p >> 32) - (t >> 32);
116 }
117 #endif
118 #ifdef EXPERIMENT1
119 for (i = 0; i < n; i++) {
120 unsigned rhi;
121 unsigned long long sum;
122 p = ((unsigned long long)un[i+j])+ ~(qhat*vn[i]) + k;
123 rhi = (p >> 32);
124 if (((unsigned long long)k) <= 1) rhi += 1;
125 k = rhi;
126 un[i+j] = sum & 0xffffffff;
127 }
128 #endif
129 #define EXPERIMENT2
130 #ifdef EXPERIMENT2
131 unsigned long phi[200]; // yes, not malloced, we know
132 unsigned long plo[200]; // yes, not malloced, we know
133 // double-width multiply with strange subtract only on bottom half
134 for (i = 0; i < n; i++) {
135 unsigned long long p = qhat*vn[i];
136 plo[i] = un[i+j] - (p&0xffffffffLL);
137 phi[i] = p >> 32;
138 }
139 for (i = 0; i < n; i++) {
140 t = plo[i] - k; // subtract previous carry
141 un[i+j] = (t & 0xffffffffLL);
142 k = phi[i] - (t >> 32); // take top-halves for new carry
143 }
144 #endif
145 t = un[j+n] - k;
146 un[j+n] = t;
147
148 q[j] = qhat; // Store quotient digit.
149 if (t < 0) { // If we subtracted too
150 q[j] = q[j] - 1; // much, add back.
151 k = 0;
152 for (i = 0; i < n; i++) {
153 t = (unsigned long long)un[i+j] + vn[i] + k;
154 un[i+j] = t;
155 k = t >> 32;
156 }
157 un[j+n] = un[j+n] + k;
158 }
159 } // End j.
160 // If the caller wants the remainder, unnormalize
161 // it and pass it back.
162 if (r != NULL) {
163 for (i = 0; i < n-1; i++)
164 r[i] = (un[i] >> s) | ((unsigned long long)un[i+1] << (32-s));
165 r[n-1] = un[n-1] >> s;
166 }
167 return 0;
168 }
169
170 int errors;
171
172 void check(unsigned q[], unsigned r[],
173 unsigned u[], unsigned v[],
174 int m, int n,
175 unsigned cq[], unsigned cr[]) {
176 int i, szq;
177
178 szq = max(m - n + 1, 1);
179 for (i = 0; i < szq; i++) {
180 if (q[i] != cq[i]) {
181 errors = errors + 1;
182 dumpit("Error, dividend u =", m, u);
183 dumpit(" divisor v =", n, v);
184 dumpit("For quotient, got:", m-n+1, q);
185 dumpit(" Should get:", m-n+1, cq);
186 return;
187 }
188 }
189 for (i = 0; i < n; i++) {
190 if (r[i] != cr[i]) {
191 errors = errors + 1;
192 dumpit("Error, dividend u =", m, u);
193 dumpit(" divisor v =", n, v);
194 dumpit("For remainder, got:", n, r);
195 dumpit(" Should get:", n, cr);
196 return;
197 }
198 }
199 return;
200 }
201
202 int main() {
203 static unsigned test[] = {
204 // m, n, u..., v..., cq..., cr....
205 1, 1, 3, 0, 1, 1, // Error, divide by 0.
206 1, 2, 7, 1,3, 0, 7,0, // Error, n > m.
207 2, 2, 0,0, 1,0, 0, 0,0, // Error, incorrect remainder cr.
208 1, 1, 3, 2, 1, 1,
209 1, 1, 3, 3, 1, 0,
210 1, 1, 3, 4, 0, 3,
211 1, 1, 0, 0xffffffff, 0, 0,
212 1, 1, 0xffffffff, 1, 0xffffffff, 0,
213 1, 1, 0xffffffff, 0xffffffff, 1, 0,
214 1, 1, 0xffffffff, 3, 0x55555555, 0,
215 2, 1, 0xffffffff,0xffffffff, 1, 0xffffffff,0xffffffff, 0,
216 2, 1, 0xffffffff,0xffffffff, 0xffffffff, 1,1, 0,
217 2, 1, 0xffffffff,0xfffffffe, 0xffffffff, 0xffffffff,0, 0xfffffffe,
218 2, 1, 0x00005678,0x00001234, 0x00009abc, 0x1e1dba76,0, 0x6bd0,
219 2, 2, 0,0, 0,1, 0, 0,0,
220 2, 2, 0,7, 0,3, 2, 0,1,
221 2, 2, 5,7, 0,3, 2, 5,1,
222 2, 2, 0,6, 0,2, 3, 0,0,
223 1, 1, 0x80000000, 0x40000001, 0x00000001, 0x3fffffff,
224 2, 1, 0x00000000,0x80000000, 0x40000001, 0xfffffff8,0x00000001, 0x00000008,
225 2, 2, 0x00000000,0x80000000, 0x00000001,0x40000000, 0x00000001, 0xffffffff,0x3fffffff,
226 2, 2, 0x0000789a,0x0000bcde, 0x0000789a,0x0000bcde, 1, 0,0,
227 2, 2, 0x0000789b,0x0000bcde, 0x0000789a,0x0000bcde, 1, 1,0,
228 2, 2, 0x00007899,0x0000bcde, 0x0000789a,0x0000bcde, 0, 0x00007899,0x0000bcde,
229 2, 2, 0x0000ffff,0x0000ffff, 0x0000ffff,0x0000ffff, 1, 0,0,
230 2, 2, 0x0000ffff,0x0000ffff, 0x00000000,0x00000001, 0x0000ffff, 0x0000ffff,0,
231 3, 2, 0x000089ab,0x00004567,0x00000123, 0x00000000,0x00000001, 0x00004567,0x00000123, 0x000089ab,0,
232 3, 2, 0x00000000,0x0000fffe,0x00008000, 0x0000ffff,0x00008000, 0xffffffff,0x00000000, 0x0000ffff,0x00007fff, // Shows that first qhat can = b + 1.
233 3, 3, 0x00000003,0x00000000,0x80000000, 0x00000001,0x00000000,0x20000000, 0x00000003, 0,0,0x20000000, // Adding back step req'd.
234 3, 3, 0x00000003,0x00000000,0x00008000, 0x00000001,0x00000000,0x00002000, 0x00000003, 0,0,0x00002000, // Adding back step req'd.
235 4, 3, 0,0,0x00008000,0x00007fff, 1,0,0x00008000, 0xfffe0000,0, 0x00020000,0xffffffff,0x00007fff, // Add back req'd.
236 4, 3, 0,0x0000fffe,0,0x00008000, 0x0000ffff,0,0x00008000, 0xffffffff,0, 0x0000ffff,0xffffffff,0x00007fff, // Shows that mult-sub quantity cannot be treated as signed.
237 4, 3, 0,0xfffffffe,0,0x80000000, 0x0000ffff,0,0x80000000, 0x00000000,1, 0x00000000,0xfffeffff,0x00000000, // Shows that mult-sub quantity cannot be treated as signed.
238 4, 3, 0,0xfffffffe,0,0x80000000, 0xffffffff,0,0x80000000, 0xffffffff,0, 0xffffffff,0xffffffff,0x7fffffff, // Shows that mult-sub quantity cannot be treated as signed.
239 };
240 int i, n, m, ncases, f;
241 unsigned q[10], r[10];
242 unsigned *u, *v, *cq, *cr;
243
244 printf("divmnu:\n");
245 i = 0;
246 ncases = 0;
247 while (i < sizeof(test)/4) {
248 m = test[i];
249 n = test[i+1];
250 u = &test[i+2];
251 v = &test[i+2+m];
252 cq = &test[i+2+m+n];
253 cr = &test[i+2+m+n+max(m-n+1, 1)];
254
255 f = divmnu(q, r, u, v, m, n);
256 if (f) {
257 dumpit("Error return code for dividend u =", m, u);
258 dumpit(" divisor v =", n, v);
259 errors = errors + 1;
260 }
261 else
262 check(q, r, u, v, m, n, cq, cr);
263 i = i + 2 + m + n + max(m-n+1, 1) + n;
264 ncases = ncases + 1;
265 }
266
267 printf("%d errors out of %d cases; there should be 3.\n", errors, ncases);
268 return 0;
269 }