From 5bc9709bc9496e19eb66907029abd1898ceaf177 Mon Sep 17 00:00:00 2001 From: Luke Kenneth Casson Leighton Date: Sun, 24 Apr 2022 18:06:18 +0100 Subject: [PATCH] morphing divmnu64.c into big* functions then attempt goldschmidt --- openpower/sv/biginteger/divgnu64.c | 348 +++++++++++++++++++++++++++++ 1 file changed, 348 insertions(+) create mode 100644 openpower/sv/biginteger/divgnu64.c diff --git a/openpower/sv/biginteger/divgnu64.c b/openpower/sv/biginteger/divgnu64.c new file mode 100644 index 000000000..e40831fc7 --- /dev/null +++ b/openpower/sv/biginteger/divgnu64.c @@ -0,0 +1,348 @@ +/* original source code from Hackers-Delight + https://github.com/hcs0/Hackers-Delight +*/ +/* This divides an n-word dividend by an m-word divisor, giving an +n-m+1-word quotient and m-word remainder. The bignums are in arrays of +words. Here a "word" is 32 bits. This routine is designed for a 64-bit +machine which has a 64/64 division instruction. */ + +#include +#include +#include +#include //To define "exit", req'd by XLC. + +#define max(x, y) ((x) > (y) ? (x) : (y)) + +int nlz(unsigned x) +{ + int n; + + if (x == 0) + return (32); + n = 0; + if (x <= 0x0000FFFF) + { + n = n + 16; + x = x << 16; + } + if (x <= 0x00FFFFFF) + { + n = n + 8; + x = x << 8; + } + if (x <= 0x0FFFFFFF) + { + n = n + 4; + x = x << 4; + } + if (x <= 0x3FFFFFFF) + { + n = n + 2; + x = x << 2; + } + if (x <= 0x7FFFFFFF) + { + n = n + 1; + } + return n; +} + +void dumpit(char *msg, int n, unsigned v[]) +{ + int i; + printf("%s", msg); + for (i = n - 1; i >= 0; i--) + printf(" %08x", v[i]); + printf("\n"); +} + +bool bigmul(unsigned long long qhat, unsigned vn[], unsigned un[], + int j, int m, int n) +{ + long long t, k; + int s, i; + // Multiply and subtract. + uint32_t carry = 0; + uint32_t product[n + 1]; + // VL = n + 1 + // sv.madded product.v, vn.v, qhat.s, carry.s + for (int i = 0; i <= n; i++) + { + uint32_t vn_v = i < n ? vn[i] : 0; + uint64_t value = (uint64_t)vn_v * (uint64_t)qhat + carry; + carry = (uint32_t)(value >> 32); + product[i] = (uint32_t)value; + } + return carry != 0; +} + +bool bigsub(unsigned long long qhat, unsigned vn[], unsigned un[], + int j, int m, int n, bool ca) +{ + // VL = n + 1 + // sv.subfe un_j.v, product.v, un_j.v + for (int i = 0; i <= n; i++) + { + uint64_t value = (uint64_t)~vn[i] + (uint64_t)un[i] + ca; + ca = value >> 32 != 0; + un[i] = value; + } + bool need_fixup = !ca; + return need_fixup; +} + +bool bigmulsub(unsigned long long qhat, unsigned vn[], unsigned un[], + int j, int m, int n) +{ + unsigned long long p; // Product of two digits. + long long t, k; + int s, i; + // Multiply and subtract. + uint32_t carry = 0; + uint32_t product[n + 1]; + // VL = n + 1 + // sv.madded product.v, vn.v, qhat.s, carry.s + for (int i = 0; i <= n; i++) + { + uint32_t vn_v = i < n ? vn[i] : 0; + uint64_t value = (uint64_t)vn_v * (uint64_t)qhat + carry; + carry = (uint32_t)(value >> 32); + product[i] = (uint32_t)value; + } + bool ca = true; + uint32_t *un_j = &un[j]; + // VL = n + 1 + // sv.subfe un_j.v, product.v, un_j.v + for (int i = 0; i <= n; i++) + { + uint64_t value = (uint64_t)~product[i] + (uint64_t)un_j[i] + ca; + ca = value >> 32 != 0; + un_j[i] = value; + } + bool need_fixup = !ca; + return need_fixup; +} + +/* q[0], r[0], u[0], and v[0] contain the LEAST significant words. +(The sequence is in little-endian order). + +This is a fairly precise implementation of Knuth's Algorithm D, for a +binary computer with base b = 2**32. The caller supplies: + 1. Space q for the quotient, m - n + 1 words (at least one). + 2. Space r for the remainder (optional), n words. + 3. The dividend u, m words, m >= 1. + 4. The divisor v, n words, n >= 2. +The most significant digit of the divisor, v[n-1], must be nonzero. The +dividend u may have leading zeros; this just makes the algorithm take +longer and makes the quotient contain more leading zeros. A value of +NULL may be given for the address of the remainder to signify that the +caller does not want the remainder. + The program does not alter the input parameters u and v. + The quotient and remainder returned may have leading zeros. The +function itself returns a value of 0 for success and 1 for invalid +parameters (e.g., division by 0). + For now, we must have m >= n. Knuth's Algorithm D also requires +that the dividend be at least as long as the divisor. (In his terms, +m >= 0 (unstated). Therefore m+n >= n.) */ + +int divmnu(unsigned q[], unsigned r[], const unsigned u[], const unsigned v[], + int m, int n) +{ + + const unsigned long long b = 1LL<<32; // Number base (2**32). + unsigned *un, *vn; // Normalized form of u, v. + unsigned long long qhat; // Estimated quotient digit. + unsigned long long rhat; // A remainder. + unsigned long long p; // Product of two digits. + long long t, k; + int s, i, j; + + if (m < n || n <= 0 || v[n - 1] == 0) + return 1; // Return if invalid param. + + if (n == 1) + { // Take care of + k = 0; // the case of a + for (j = m - 1; j >= 0; j--) + { // single-digit + q[j] = (k * b + u[j]) / v[0]; // divisor here. + k = (k * b + u[j]) - q[j] * v[0]; + } + if (r != NULL) + r[0] = k; + return 0; + } + + /* Normalize by shifting v left just enough so that its high-order + bit is on, and shift u left the same amount. We may have to append a + high-order digit on the dividend; we do that unconditionally. */ + + s = nlz(v[n - 1]); // 0 <= s <= 31. + vn = (unsigned *)alloca(4 * n); + for (i = n - 1; i > 0; i--) + vn[i] = (v[i] << s) | ((unsigned long long)v[i - 1] >> (32 - s)); + vn[0] = v[0] << s; + + un = (unsigned *)alloca(4 * (m + 1)); + un[m] = (unsigned long long)u[m - 1] >> (32 - s); + for (i = m - 1; i > 0; i--) + un[i] = (u[i] << s) | ((unsigned long long)u[i - 1] >> (32 - s)); + un[0] = u[0] << s; + + for (j = m - n; j >= 0; j--) + { // Main loop. + // Compute estimate qhat of q[j] from top 2 digits + uint64_t dig2 = ((uint64_t)un[j + n] << 32) | un[j + n - 1]; + qhat = dig2 / vn[n - 1]; + rhat = dig2 % vn[n - 1]; + again: + // use 3rd-from-top digit to obtain better accuracy + if (qhat >= b || qhat * vn[n - 2] > b * rhat + un[j + n - 2]) + { + qhat = qhat - 1; + rhat = rhat + vn[n - 1]; + if (rhat < b) + goto again; + } + + bool need_fixup = bigmulsub(qhat, vn, un, j, m, n); + + q[j] = qhat; // Store quotient digit. + if (need_fixup) + { // If we subtracted too + q[j] = q[j] - 1; // much, add back. + k = 0; + for (i = 0; i < n; i++) + { + t = (unsigned long long)un[i + j] + vn[i] + k; + un[i + j] = t; + k = t >> 32; + } + un[j + n] = un[j + n] + k; + } + } // End j. + // If the caller wants the remainder, unnormalize + // it and pass it back. + if (r != NULL) + { + for (i = 0; i < n - 1; i++) + r[i] = (un[i] >> s) | ((unsigned long long)un[i + 1] << (32 - s)); + r[n - 1] = un[n - 1] >> s; + } + return 0; +} + +int errors; + +void check(unsigned q[], unsigned r[], unsigned u[], unsigned v[], int m, + int n, unsigned cq[], unsigned cr[]) +{ + int i, szq; + + szq = max(m - n + 1, 1); + for (i = 0; i < szq; i++) + { + if (q[i] != cq[i]) + { + errors = errors + 1; + dumpit("Error, dividend u =", m, u); + dumpit(" divisor v =", n, v); + dumpit("For quotient, got:", m - n + 1, q); + dumpit(" Should get:", m - n + 1, cq); + return; + } + } + for (i = 0; i < n; i++) + { + if (r[i] != cr[i]) + { + errors = errors + 1; + dumpit("Error, dividend u =", m, u); + dumpit(" divisor v =", n, v); + dumpit("For remainder, got:", n, r); + dumpit(" Should get:", n, cr); + return; + } + } + return; +} + +int main() +{ + static struct + { + int m, n; + uint32_t u[10], v[10], cq[10], cr[10]; + bool error; + } test[] = { + // clang-format off + {.m=1, .n=1, .u={3}, .v={0}, .cq={1}, .cr={1}, .error=true}, // Error, divide by 0. + {.m=1, .n=2, .u={7}, .v={1,3}, .cq={0}, .cr={7,0}, .error=true}, // Error, n > m. + {.m=2, .n=2, .u={0,0}, .v={1,0}, .cq={0}, .cr={0,0}, .error=true}, // Error, incorrect remainder cr. + {.m=1, .n=1, .u={3}, .v={2}, .cq={1}, .cr={1}}, + {.m=1, .n=1, .u={3}, .v={3}, .cq={1}, .cr={0}}, + {.m=1, .n=1, .u={3}, .v={4}, .cq={0}, .cr={3}}, + {.m=1, .n=1, .u={0}, .v={0xffffffff}, .cq={0}, .cr={0}}, + {.m=1, .n=1, .u={0xffffffff}, .v={1}, .cq={0xffffffff}, .cr={0}}, + {.m=1, .n=1, .u={0xffffffff}, .v={0xffffffff}, .cq={1}, .cr={0}}, + {.m=1, .n=1, .u={0xffffffff}, .v={3}, .cq={0x55555555}, .cr={0}}, + {.m=2, .n=1, .u={0xffffffff,0xffffffff}, .v={1}, .cq={0xffffffff,0xffffffff}, .cr={0}}, + {.m=2, .n=1, .u={0xffffffff,0xffffffff}, .v={0xffffffff}, .cq={1,1}, .cr={0}}, + {.m=2, .n=1, .u={0xffffffff,0xfffffffe}, .v={0xffffffff}, .cq={0xffffffff,0}, .cr={0xfffffffe}}, + {.m=2, .n=1, .u={0x00005678,0x00001234}, .v={0x00009abc}, .cq={0x1e1dba76,0}, .cr={0x6bd0}}, + {.m=2, .n=2, .u={0,0}, .v={0,1}, .cq={0}, .cr={0,0}}, + {.m=2, .n=2, .u={0,7}, .v={0,3}, .cq={2}, .cr={0,1}}, + {.m=2, .n=2, .u={5,7}, .v={0,3}, .cq={2}, .cr={5,1}}, + {.m=2, .n=2, .u={0,6}, .v={0,2}, .cq={3}, .cr={0,0}}, + {.m=1, .n=1, .u={0x80000000}, .v={0x40000001}, .cq={0x00000001}, .cr={0x3fffffff}}, + {.m=2, .n=1, .u={0x00000000,0x80000000}, .v={0x40000001}, .cq={0xfffffff8,0x00000001}, .cr={0x00000008}}, + {.m=2, .n=2, .u={0x00000000,0x80000000}, .v={0x00000001,0x40000000}, .cq={0x00000001}, .cr={0xffffffff,0x3fffffff}}, + {.m=2, .n=2, .u={0x0000789a,0x0000bcde}, .v={0x0000789a,0x0000bcde}, .cq={1}, .cr={0,0}}, + {.m=2, .n=2, .u={0x0000789b,0x0000bcde}, .v={0x0000789a,0x0000bcde}, .cq={1}, .cr={1,0}}, + {.m=2, .n=2, .u={0x00007899,0x0000bcde}, .v={0x0000789a,0x0000bcde}, .cq={0}, .cr={0x00007899,0x0000bcde}}, + {.m=2, .n=2, .u={0x0000ffff,0x0000ffff}, .v={0x0000ffff,0x0000ffff}, .cq={1}, .cr={0,0}}, + {.m=2, .n=2, .u={0x0000ffff,0x0000ffff}, .v={0x00000000,0x00000001}, .cq={0x0000ffff}, .cr={0x0000ffff,0}}, + {.m=3, .n=2, .u={0x000089ab,0x00004567,0x00000123}, .v={0x00000000,0x00000001}, .cq={0x00004567,0x00000123}, .cr={0x000089ab,0}}, + {.m=3, .n=2, .u={0x00000000,0x0000fffe,0x00008000}, .v={0x0000ffff,0x00008000}, .cq={0xffffffff,0x00000000}, .cr={0x0000ffff,0x00007fff}}, // Shows that first qhat can = b + 1. + {.m=3, .n=3, .u={0x00000003,0x00000000,0x80000000}, .v={0x00000001,0x00000000,0x20000000}, .cq={0x00000003}, .cr={0,0,0x20000000}}, // Adding back step req'd. + {.m=3, .n=3, .u={0x00000003,0x00000000,0x00008000}, .v={0x00000001,0x00000000,0x00002000}, .cq={0x00000003}, .cr={0,0,0x00002000}}, // Adding back step req'd. + {.m=4, .n=3, .u={0,0,0x00008000,0x00007fff}, .v={1,0,0x00008000}, .cq={0xfffe0000,0}, .cr={0x00020000,0xffffffff,0x00007fff}}, // Add back req'd. + {.m=4, .n=3, .u={0,0x0000fffe,0,0x00008000}, .v={0x0000ffff,0,0x00008000}, .cq={0xffffffff,0}, .cr={0x0000ffff,0xffffffff,0x00007fff}}, // Shows that mult-sub quantity cannot be treated as signed. + {.m=4, .n=3, .u={0,0xfffffffe,0,0x80000000}, .v={0x0000ffff,0,0x80000000}, .cq={0x00000000,1}, .cr={0x00000000,0xfffeffff,0x00000000}}, // Shows that mult-sub quantity cannot be treated as signed. + {.m=4, .n=3, .u={0,0xfffffffe,0,0x80000000}, .v={0xffffffff,0,0x80000000}, .cq={0xffffffff,0}, .cr={0xffffffff,0xffffffff,0x7fffffff}}, // Shows that mult-sub quantity cannot be treated as signed. + // clang-format on + }; + const int ncases = sizeof(test) / sizeof(test[0]); + unsigned q[10], r[10]; + + printf("divmnu:\n"); + for (int i = 0; i < ncases; i++) + { + int m = test[i].m; + int n = test[i].n; + uint32_t *u = test[i].u; + uint32_t *v = test[i].v; + uint32_t *cq = test[i].cq; + uint32_t *cr = test[i].cr; + + int f = divmnu(q, r, u, v, m, n); + if (f && !test[i].error) + { + dumpit("Unexpected error return code for dividend u =", m, u); + dumpit(" divisor v =", n, v); + errors = errors + 1; + } + else if (!f && test[i].error) + { + dumpit("Unexpected success return code for dividend u =", m, u); + dumpit(" divisor v =", n, v); + errors = errors + 1; + } + + if (!f) + check(q, r, u, v, m, n, cq, cr); + } + + printf("%d test failures out of %d cases.\n", errors, ncases); + return 0; +} -- 2.30.2