From: Luke Kenneth Casson Leighton Date: Tue, 19 Apr 2022 16:13:27 +0000 (+0100) Subject: move c code to biginteger X-Git-Tag: opf_rfc_ls005_v1~2695 X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=b4a403be448585627f3b0a7429b246b04bf8dd28;p=libreriscv.git move c code to biginteger --- diff --git a/openpower/sv/biginteger.mdwn b/openpower/sv/biginteger.mdwn index 4cdc5ca78..670c8dc24 100644 --- a/openpower/sv/biginteger.mdwn +++ b/openpower/sv/biginteger.mdwn @@ -1,3 +1,5 @@ +[[!tag standards]] + # Big Integer Arithmetic **DRAFT STATUS** 19apr2021 diff --git a/openpower/sv/biginteger/divmnu64.c b/openpower/sv/biginteger/divmnu64.c new file mode 100644 index 000000000..7ab938ea7 --- /dev/null +++ b/openpower/sv/biginteger/divmnu64.c @@ -0,0 +1,376 @@ +/* original source code from Hackers-Delight + https://github.com/hcs0/Hackers-Delight +*/ +/* This divides an n-word dividend by an m-word divisor, giving an +n-m+1-word quotient and m-word remainder. The bignums are in arrays of +words. Here a "word" is 32 bits. This routine is designed for a 64-bit +machine which has a 64/64 division instruction. */ + +#include +#include //To define "exit", req'd by XLC. +#include +#include + +#define max(x, y) ((x) > (y) ? (x) : (y)) + +int nlz(unsigned x) { + int n; + + if (x == 0) return(32); + n = 0; + if (x <= 0x0000FFFF) {n = n +16; x = x <<16;} + if (x <= 0x00FFFFFF) {n = n + 8; x = x << 8;} + if (x <= 0x0FFFFFFF) {n = n + 4; x = x << 4;} + if (x <= 0x3FFFFFFF) {n = n + 2; x = x << 2;} + if (x <= 0x7FFFFFFF) {n = n + 1;} + return n; +} + +void dumpit(char *msg, int n, unsigned v[]) { + int i; + printf("%s", msg); + for (i = n-1; i >= 0; i--) printf(" %08x", v[i]); + printf("\n"); +} + +/* q[0], r[0], u[0], and v[0] contain the LEAST significant words. +(The sequence is in little-endian order). + +This is a fairly precise implementation of Knuth's Algorithm D, for a +binary computer with base b = 2**32. The caller supplies: + 1. Space q for the quotient, m - n + 1 words (at least one). + 2. Space r for the remainder (optional), n words. + 3. The dividend u, m words, m >= 1. + 4. The divisor v, n words, n >= 2. +The most significant digit of the divisor, v[n-1], must be nonzero. The +dividend u may have leading zeros; this just makes the algorithm take +longer and makes the quotient contain more leading zeros. A value of +NULL may be given for the address of the remainder to signify that the +caller does not want the remainder. + The program does not alter the input parameters u and v. + The quotient and remainder returned may have leading zeros. The +function itself returns a value of 0 for success and 1 for invalid +parameters (e.g., division by 0). + For now, we must have m >= n. Knuth's Algorithm D also requires +that the dividend be at least as long as the divisor. (In his terms, +m >= 0 (unstated). Therefore m+n >= n.) */ + +int divmnu(unsigned q[], unsigned r[], + const unsigned u[], const unsigned v[], + int m, int n) { + + const unsigned long long b = 4294967296LL; // Number base (2**32). + unsigned *un, *vn; // Normalized form of u, v. + unsigned long long qhat; // Estimated quotient digit. + unsigned long long rhat; // A remainder. + unsigned long long p; // Product of two digits. + long long t, k; + int s, i, j; + + if (m < n || n <= 0 || v[n-1] == 0) + return 1; // Return if invalid param. + + if (n == 1) { // Take care of + k = 0; // the case of a + for (j = m - 1; j >= 0; j--) { // single-digit + q[j] = (k*b + u[j])/v[0]; // divisor here. + k = (k*b + u[j]) - q[j]*v[0]; + } + if (r != NULL) r[0] = k; + return 0; + } + + /* Normalize by shifting v left just enough so that its high-order + bit is on, and shift u left the same amount. We may have to append a + high-order digit on the dividend; we do that unconditionally. */ + + s = nlz(v[n-1]); // 0 <= s <= 31. + vn = (unsigned *)alloca(4*n); + for (i = n - 1; i > 0; i--) + vn[i] = (v[i] << s) | ((unsigned long long)v[i-1] >> (32-s)); + vn[0] = v[0] << s; + + un = (unsigned *)alloca(4*(m + 1)); + un[m] = (unsigned long long)u[m-1] >> (32-s); + for (i = m - 1; i > 0; i--) + un[i] = (u[i] << s) | ((unsigned long long)u[i-1] >> (32-s)); + un[0] = u[0] << s; + + for (j = m - n; j >= 0; j--) { // Main loop. + // Compute estimate qhat of q[j]. + qhat = (un[j+n]*b + un[j+n-1])/vn[n-1]; + rhat = (un[j+n]*b + un[j+n-1]) - qhat*vn[n-1]; +again: + if (qhat >= b || qhat*vn[n-2] > b*rhat + un[j+n-2]) + { qhat = qhat - 1; + rhat = rhat + vn[n-1]; + if (rhat < b) goto again; + } +#define MUL_RSUB_CARRY_2_STAGE2 +#ifdef ORIGINAL + // Multiply and subtract. + k = 0; + for (i = 0; i < n; i++) { + p = qhat*vn[i]; + t = un[i+j] - k - (p & 0xFFFFFFFFLL); + un[i+j] = t; + k = (p >> 32) - (t >> 32); + } + t = un[j+n] - k; + un[j+n] = t; + bool need_fixup = t < 0; +#elif defined(SUB_MUL_BORROW) + (void)p; // shut up unused variable warning + + // Multiply and subtract. + uint32_t borrow = 0; + for(int i = 0; i <= n; i++) { + uint32_t vn_i = i < n ? vn[i] : 0; + uint64_t value = un[i + j] - (uint64_t)qhat * vn_i - borrow; + borrow = -(uint32_t)(value >> 32); + un[i + j] = (uint32_t)value; + } + bool need_fixup = borrow != 0; +#elif defined(MUL_RSUB_CARRY) + (void)p; // shut up unused variable warning + + // Multiply and subtract. + uint32_t carry = 1; + for(int i = 0; i <= n; i++) { + uint32_t vn_i = i < n ? vn[i] : 0; + uint64_t result = un[i + j] + ~((uint64_t)qhat * vn_i) + carry; + uint32_t result_high = result >> 32; + if(carry <= 1) + result_high++; + carry = result_high; + un[i + j] = (uint32_t)result; + } + bool need_fixup = carry != 1; +#elif defined(SUB_MUL_BORROW_2_STAGE) + (void)p; // shut up unused variable warning + + // Multiply and subtract. + uint32_t borrow = 0; + uint32_t phi[2000]; // plenty space + uint32_t plo[2000]; // plenty space + // first, perform mul-and-sub and store in split hi-lo + // this shows the vectorised sv.msubx which stores 128-bit in + // two 64-bit registers + for(int i = 0; i <= n; i++) { + uint32_t vn_i = i < n ? vn[i] : 0; + uint64_t value = un[i + j] - (uint64_t)qhat * vn_i; + plo[i] = value & 0xffffffffLL; + phi[i] = value >> 32; + } + // second, reconstruct the 64-bit result, subtract borrow, + // store top-half (-ve) in new borrow and store low-half as answer + // this is the new (odd) instruction + for(int i = 0; i <= n; i++) { + uint64_t value = (((uint64_t)phi[i]<<32) | plo[i]) - borrow; + borrow = ~(value >> 32)+1; // -(uint32_t)(value >> 32); + un[i + j] = (uint32_t)value; + } + bool need_fixup = borrow != 0; +#elif defined(MUL_RSUB_CARRY_2_STAGE) + (void)p; // shut up unused variable warning + + // Multiply and subtract. + uint32_t carry = 1; + uint32_t phi[2000]; // plenty space + uint32_t plo[2000]; // plenty space + for(int i = 0; i <= n; i++) { + uint32_t vn_i = i < n ? vn[i] : 0; + uint64_t value = un[i + j] + ~((uint64_t)qhat * vn_i); + plo[i] = value & 0xffffffffLL; + phi[i] = value >> 32; + } + for(int i = 0; i <= n; i++) { + uint64_t result = (((uint64_t)phi[i]<<32) | plo[i]) + carry; + uint32_t result_high = result >> 32; + if(carry <= 1) + result_high++; + carry = result_high; + un[i + j] = (uint32_t)result; + } + bool need_fixup = carry != 1; +#elif defined(MUL_RSUB_CARRY_2_STAGE1) + (void)p; // shut up unused variable warning + + // Multiply and subtract. + uint32_t carry = 1; + uint32_t phi[2000]; // plenty space + uint32_t plo[2000]; // plenty space + // same mul-and-sub as SUB_MUL_BORROW but not the same + // mul-and-sub-minus-one as MUL_RSUB_CARRY + for(int i = 0; i <= n; i++) { + uint32_t vn_i = i < n ? vn[i] : 0; + uint64_t value = un[i + j] - ((uint64_t)qhat * vn_i); + plo[i] = value & 0xffffffffLL; + phi[i] = value >> 32; + } + // compensate for the +1 that was added by mul-and-sub by subtracting + // it here (as ~(0)) + for(int i = 0; i <= n; i++) { + uint64_t result = (((uint64_t)phi[i]<<32) | plo[i]) + carry+ + ~(0); // a way to express "-1" + uint32_t result_high = result >> 32; + if(carry <= 1) + result_high++; + carry = result_high; + un[i + j] = (uint32_t)result; + } + bool need_fixup = carry != 1; +#elif defined(MUL_RSUB_CARRY_2_STAGE2) + (void)p; // shut up unused variable warning + + // Multiply and subtract. + uint32_t carry = 0; + uint32_t phi[2000]; // plenty space + uint32_t plo[2000]; // plenty space + // same mul-and-sub as SUB_MUL_BORROW but not the same + // mul-and-sub-minus-one as MUL_RSUB_CARRY + for(int i = 0; i <= n; i++) { + uint32_t vn_i = i < n ? vn[i] : 0; + uint64_t value = un[i + j] - ((uint64_t)qhat * vn_i); + plo[i] = value & 0xffffffffLL; + phi[i] = value >> 32; + } + // NOW it starts to make sense. when no carry this time, next + // carry as-is. rlse next carry reduces by one. + // it here (as ~(0)) + for(int i = 0; i <= n; i++) { + uint64_t result = (((uint64_t)phi[i]<<32) | plo[i]) + carry; + uint32_t result_high = result >> 32; + if(carry == 0) + carry = result_high; + else + carry = result_high-1; + un[i + j] = (uint32_t)result; + } + bool need_fixup = carry != 0; +#else +#error need to choose one of the algorithm options; e.g. -DORIGINAL +#endif + + q[j] = qhat; // Store quotient digit. + if (need_fixup) { // If we subtracted too + q[j] = q[j] - 1; // much, add back. + k = 0; + for (i = 0; i < n; i++) { + t = (unsigned long long)un[i+j] + vn[i] + k; + un[i+j] = t; + k = t >> 32; + } + un[j+n] = un[j+n] + k; + } + } // End j. + // If the caller wants the remainder, unnormalize + // it and pass it back. + if (r != NULL) { + for (i = 0; i < n-1; i++) + r[i] = (un[i] >> s) | ((unsigned long long)un[i+1] << (32-s)); + r[n-1] = un[n-1] >> s; + } + return 0; +} + +int errors; + +void check(unsigned q[], unsigned r[], + unsigned u[], unsigned v[], + int m, int n, + unsigned cq[], unsigned cr[]) { + int i, szq; + + szq = max(m - n + 1, 1); + for (i = 0; i < szq; i++) { + if (q[i] != cq[i]) { + errors = errors + 1; + dumpit("Error, dividend u =", m, u); + dumpit(" divisor v =", n, v); + dumpit("For quotient, got:", m-n+1, q); + dumpit(" Should get:", m-n+1, cq); + return; + } + } + for (i = 0; i < n; i++) { + if (r[i] != cr[i]) { + errors = errors + 1; + dumpit("Error, dividend u =", m, u); + dumpit(" divisor v =", n, v); + dumpit("For remainder, got:", n, r); + dumpit(" Should get:", n, cr); + return; + } + } + return; +} + +int main() { + static unsigned test[] = { + // m, n, u..., v..., cq..., cr.... + 1, 1, 3, 0, 1, 1, // Error, divide by 0. + 1, 2, 7, 1,3, 0, 7,0, // Error, n > m. + 2, 2, 0,0, 1,0, 0, 0,0, // Error, incorrect remainder cr. + 1, 1, 3, 2, 1, 1, + 1, 1, 3, 3, 1, 0, + 1, 1, 3, 4, 0, 3, + 1, 1, 0, 0xffffffff, 0, 0, + 1, 1, 0xffffffff, 1, 0xffffffff, 0, + 1, 1, 0xffffffff, 0xffffffff, 1, 0, + 1, 1, 0xffffffff, 3, 0x55555555, 0, + 2, 1, 0xffffffff,0xffffffff, 1, 0xffffffff,0xffffffff, 0, + 2, 1, 0xffffffff,0xffffffff, 0xffffffff, 1,1, 0, + 2, 1, 0xffffffff,0xfffffffe, 0xffffffff, 0xffffffff,0, 0xfffffffe, + 2, 1, 0x00005678,0x00001234, 0x00009abc, 0x1e1dba76,0, 0x6bd0, + 2, 2, 0,0, 0,1, 0, 0,0, + 2, 2, 0,7, 0,3, 2, 0,1, + 2, 2, 5,7, 0,3, 2, 5,1, + 2, 2, 0,6, 0,2, 3, 0,0, + 1, 1, 0x80000000, 0x40000001, 0x00000001, 0x3fffffff, + 2, 1, 0x00000000,0x80000000, 0x40000001, 0xfffffff8,0x00000001, 0x00000008, + 2, 2, 0x00000000,0x80000000, 0x00000001,0x40000000, 0x00000001, 0xffffffff,0x3fffffff, + 2, 2, 0x0000789a,0x0000bcde, 0x0000789a,0x0000bcde, 1, 0,0, + 2, 2, 0x0000789b,0x0000bcde, 0x0000789a,0x0000bcde, 1, 1,0, + 2, 2, 0x00007899,0x0000bcde, 0x0000789a,0x0000bcde, 0, 0x00007899,0x0000bcde, + 2, 2, 0x0000ffff,0x0000ffff, 0x0000ffff,0x0000ffff, 1, 0,0, + 2, 2, 0x0000ffff,0x0000ffff, 0x00000000,0x00000001, 0x0000ffff, 0x0000ffff,0, + 3, 2, 0x000089ab,0x00004567,0x00000123, 0x00000000,0x00000001, 0x00004567,0x00000123, 0x000089ab,0, + 3, 2, 0x00000000,0x0000fffe,0x00008000, 0x0000ffff,0x00008000, 0xffffffff,0x00000000, 0x0000ffff,0x00007fff, // Shows that first qhat can = b + 1. + 3, 3, 0x00000003,0x00000000,0x80000000, 0x00000001,0x00000000,0x20000000, 0x00000003, 0,0,0x20000000, // Adding back step req'd. + 3, 3, 0x00000003,0x00000000,0x00008000, 0x00000001,0x00000000,0x00002000, 0x00000003, 0,0,0x00002000, // Adding back step req'd. + 4, 3, 0,0,0x00008000,0x00007fff, 1,0,0x00008000, 0xfffe0000,0, 0x00020000,0xffffffff,0x00007fff, // Add back req'd. + 4, 3, 0,0x0000fffe,0,0x00008000, 0x0000ffff,0,0x00008000, 0xffffffff,0, 0x0000ffff,0xffffffff,0x00007fff, // Shows that mult-sub quantity cannot be treated as signed. + 4, 3, 0,0xfffffffe,0,0x80000000, 0x0000ffff,0,0x80000000, 0x00000000,1, 0x00000000,0xfffeffff,0x00000000, // Shows that mult-sub quantity cannot be treated as signed. + 4, 3, 0,0xfffffffe,0,0x80000000, 0xffffffff,0,0x80000000, 0xffffffff,0, 0xffffffff,0xffffffff,0x7fffffff, // Shows that mult-sub quantity cannot be treated as signed. + }; + int i, n, m, ncases, f; + unsigned q[10], r[10]; + unsigned *u, *v, *cq, *cr; + + printf("divmnu:\n"); + i = 0; + ncases = 0; + while (i < sizeof(test)/4) { + m = test[i]; + n = test[i+1]; + u = &test[i+2]; + v = &test[i+2+m]; + cq = &test[i+2+m+n]; + cr = &test[i+2+m+n+max(m-n+1, 1)]; + + f = divmnu(q, r, u, v, m, n); + if (f) { + dumpit("Error return code for dividend u =", m, u); + dumpit(" divisor v =", n, v); + errors = errors + 1; + } + else + check(q, r, u, v, m, n, cq, cr); + i = i + 2 + m + n + max(m-n+1, 1) + n; + ncases = ncases + 1; + } + + printf("%d errors out of %d cases; there should be 3.\n", errors, ncases); + return 0; +} diff --git a/openpower/sv/biginteger/mulmnu.c b/openpower/sv/biginteger/mulmnu.c new file mode 100644 index 000000000..a38afc4be --- /dev/null +++ b/openpower/sv/biginteger/mulmnu.c @@ -0,0 +1,103 @@ +/* from hacker's delight, originsl by hannah suarez (hcs0) */ +// Computes the m+n-halfword product of n halfwords x m halfwords, unsigned. +// Max line length is 57, to fit in hacker.book. +#include +#include //To define "exit", req'd by XLC. + +// w[0], u[0], and v[0] contain the LEAST significant halfwords. +// (The halfwords are in little-endian order). +// This is Knuth's Algorithm M from [Knuth Vol. 2 Third edition (1998)] +// section 4.3.1. Picture is: +// u[m-1] ... u[1] u[0] +// x v[n-1] ... v[1] v[0] +// -------------------- +// w[m+n-1] ............ w[1] w[0] + +void mulmnu(unsigned short w[], unsigned short u[], + unsigned short v[], int m, int n) { + + unsigned int k, t; + int i, j; + + for (i = 0; i < m; i++) + w[i] = 0; + + for (j = 0; j < n; j++) { + k = 0; + unsigned short phi[2000]; + unsigned short plo[2000]; + for (i = 0; i < m; i++) { + unsigned product = u[i]*v[j] + w[i + j]; + phi[i] = product>>16; + plo[i] = product; + } + for (i = 0; i < m; i++) { + t = (phi[i]<<16) | plo[i] + k; + w[i + j] = t; // (I.e., t & 0xFFFF). + k = t >> 16; + } + w[j + m] = k; + } + return; +} + +int errors; + +void check(unsigned short result[], unsigned short u[], + unsigned short v[], int m, int n, unsigned short correct[]) { + int i, j; + + for (i = 0; i < m + n; i++) { + if (correct[i] != result[i]) { + errors = errors + 1; + printf("Error, m = %d, n = %d, u = ", m, n); + for (j = 0; j < m; j++) printf(" %04x", u[j]); + printf(" v ="); + for (j = 0; j < n; j++) printf(" %04x", v[j]); + printf("\nShould get:"); + for (j = 0; j < n+m; j++) printf(" %04x", correct[j]); + printf("\n Got:"); + for (j = 0; j < n+m; j++) printf(" %04x", result[j]); + printf("\n"); + break; + } + } +} + +int main() { + static unsigned short test[] = { + // m, n, u ..., v ..., result. + 1, 1, 7, 3, 21,0, + 1, 1, 2, 0xFFFF, 0xFFFE,0x0001, // 2*FFFF = 0001_FFFE. + 1, 1, 0xFFFF, 0xFFFF, 1,0xFFFE, + 1, 2, 7, 5, 6, 35,42,0, + 1, 2, 65000, 63000, 64000, 0xBDC0,0x8414,0xF7F5, + 1, 3, 65535, 31000, 32000, 33000, 0x86E8,0xFC17,0xFC17,0x80E7, + 2, 3, 400, 300, 500, 100, 200, 0x0D40,0xE633,0xADB2,0xEA61,0, + 2, 3, 400, 65535, 500, 100, 65534, 0x0D40,0x9A4F,0xFE70,0x01F5,0xFFFD, + 4, 4, 65535, 65535, 65535, 65535, 65535, 65535, 65535, 65535, + 1, 0, 0, 0, 65534, 65535, 65535, 65535, + }; + int i, n, m, ncases; + unsigned short result[10]; + unsigned short *u, *v; + + printf("mulmnu:\n"); + i = 0; + ncases = 0; + while (i < sizeof(test)/2) { + m = test[i]; + n = test[i+1]; + u = &test[i+2]; + v = &test[i+2+m]; + mulmnu(result, u, v, m, n); + check (result, u, v, m, n, &test[i+2+m+n]); + mulmnu(result, v, u, n, m); // Interchange operands. + check (result, v, u, n, m, &test[i+2+m+n]); + i = i + 2 + 2*(m + n); + ncases = ncases + 1; + } + + if (errors == 0) + printf("Passed all %d cases.\n", ncases); +} diff --git a/openpower/sv/bitmanip/divmnu64.c b/openpower/sv/bitmanip/divmnu64.c index 7ab938ea7..203b21b26 100644 --- a/openpower/sv/bitmanip/divmnu64.c +++ b/openpower/sv/bitmanip/divmnu64.c @@ -1,376 +1 @@ -/* original source code from Hackers-Delight - https://github.com/hcs0/Hackers-Delight -*/ -/* This divides an n-word dividend by an m-word divisor, giving an -n-m+1-word quotient and m-word remainder. The bignums are in arrays of -words. Here a "word" is 32 bits. This routine is designed for a 64-bit -machine which has a 64/64 division instruction. */ - -#include -#include //To define "exit", req'd by XLC. -#include -#include - -#define max(x, y) ((x) > (y) ? (x) : (y)) - -int nlz(unsigned x) { - int n; - - if (x == 0) return(32); - n = 0; - if (x <= 0x0000FFFF) {n = n +16; x = x <<16;} - if (x <= 0x00FFFFFF) {n = n + 8; x = x << 8;} - if (x <= 0x0FFFFFFF) {n = n + 4; x = x << 4;} - if (x <= 0x3FFFFFFF) {n = n + 2; x = x << 2;} - if (x <= 0x7FFFFFFF) {n = n + 1;} - return n; -} - -void dumpit(char *msg, int n, unsigned v[]) { - int i; - printf("%s", msg); - for (i = n-1; i >= 0; i--) printf(" %08x", v[i]); - printf("\n"); -} - -/* q[0], r[0], u[0], and v[0] contain the LEAST significant words. -(The sequence is in little-endian order). - -This is a fairly precise implementation of Knuth's Algorithm D, for a -binary computer with base b = 2**32. The caller supplies: - 1. Space q for the quotient, m - n + 1 words (at least one). - 2. Space r for the remainder (optional), n words. - 3. The dividend u, m words, m >= 1. - 4. The divisor v, n words, n >= 2. -The most significant digit of the divisor, v[n-1], must be nonzero. The -dividend u may have leading zeros; this just makes the algorithm take -longer and makes the quotient contain more leading zeros. A value of -NULL may be given for the address of the remainder to signify that the -caller does not want the remainder. - The program does not alter the input parameters u and v. - The quotient and remainder returned may have leading zeros. The -function itself returns a value of 0 for success and 1 for invalid -parameters (e.g., division by 0). - For now, we must have m >= n. Knuth's Algorithm D also requires -that the dividend be at least as long as the divisor. (In his terms, -m >= 0 (unstated). Therefore m+n >= n.) */ - -int divmnu(unsigned q[], unsigned r[], - const unsigned u[], const unsigned v[], - int m, int n) { - - const unsigned long long b = 4294967296LL; // Number base (2**32). - unsigned *un, *vn; // Normalized form of u, v. - unsigned long long qhat; // Estimated quotient digit. - unsigned long long rhat; // A remainder. - unsigned long long p; // Product of two digits. - long long t, k; - int s, i, j; - - if (m < n || n <= 0 || v[n-1] == 0) - return 1; // Return if invalid param. - - if (n == 1) { // Take care of - k = 0; // the case of a - for (j = m - 1; j >= 0; j--) { // single-digit - q[j] = (k*b + u[j])/v[0]; // divisor here. - k = (k*b + u[j]) - q[j]*v[0]; - } - if (r != NULL) r[0] = k; - return 0; - } - - /* Normalize by shifting v left just enough so that its high-order - bit is on, and shift u left the same amount. We may have to append a - high-order digit on the dividend; we do that unconditionally. */ - - s = nlz(v[n-1]); // 0 <= s <= 31. - vn = (unsigned *)alloca(4*n); - for (i = n - 1; i > 0; i--) - vn[i] = (v[i] << s) | ((unsigned long long)v[i-1] >> (32-s)); - vn[0] = v[0] << s; - - un = (unsigned *)alloca(4*(m + 1)); - un[m] = (unsigned long long)u[m-1] >> (32-s); - for (i = m - 1; i > 0; i--) - un[i] = (u[i] << s) | ((unsigned long long)u[i-1] >> (32-s)); - un[0] = u[0] << s; - - for (j = m - n; j >= 0; j--) { // Main loop. - // Compute estimate qhat of q[j]. - qhat = (un[j+n]*b + un[j+n-1])/vn[n-1]; - rhat = (un[j+n]*b + un[j+n-1]) - qhat*vn[n-1]; -again: - if (qhat >= b || qhat*vn[n-2] > b*rhat + un[j+n-2]) - { qhat = qhat - 1; - rhat = rhat + vn[n-1]; - if (rhat < b) goto again; - } -#define MUL_RSUB_CARRY_2_STAGE2 -#ifdef ORIGINAL - // Multiply and subtract. - k = 0; - for (i = 0; i < n; i++) { - p = qhat*vn[i]; - t = un[i+j] - k - (p & 0xFFFFFFFFLL); - un[i+j] = t; - k = (p >> 32) - (t >> 32); - } - t = un[j+n] - k; - un[j+n] = t; - bool need_fixup = t < 0; -#elif defined(SUB_MUL_BORROW) - (void)p; // shut up unused variable warning - - // Multiply and subtract. - uint32_t borrow = 0; - for(int i = 0; i <= n; i++) { - uint32_t vn_i = i < n ? vn[i] : 0; - uint64_t value = un[i + j] - (uint64_t)qhat * vn_i - borrow; - borrow = -(uint32_t)(value >> 32); - un[i + j] = (uint32_t)value; - } - bool need_fixup = borrow != 0; -#elif defined(MUL_RSUB_CARRY) - (void)p; // shut up unused variable warning - - // Multiply and subtract. - uint32_t carry = 1; - for(int i = 0; i <= n; i++) { - uint32_t vn_i = i < n ? vn[i] : 0; - uint64_t result = un[i + j] + ~((uint64_t)qhat * vn_i) + carry; - uint32_t result_high = result >> 32; - if(carry <= 1) - result_high++; - carry = result_high; - un[i + j] = (uint32_t)result; - } - bool need_fixup = carry != 1; -#elif defined(SUB_MUL_BORROW_2_STAGE) - (void)p; // shut up unused variable warning - - // Multiply and subtract. - uint32_t borrow = 0; - uint32_t phi[2000]; // plenty space - uint32_t plo[2000]; // plenty space - // first, perform mul-and-sub and store in split hi-lo - // this shows the vectorised sv.msubx which stores 128-bit in - // two 64-bit registers - for(int i = 0; i <= n; i++) { - uint32_t vn_i = i < n ? vn[i] : 0; - uint64_t value = un[i + j] - (uint64_t)qhat * vn_i; - plo[i] = value & 0xffffffffLL; - phi[i] = value >> 32; - } - // second, reconstruct the 64-bit result, subtract borrow, - // store top-half (-ve) in new borrow and store low-half as answer - // this is the new (odd) instruction - for(int i = 0; i <= n; i++) { - uint64_t value = (((uint64_t)phi[i]<<32) | plo[i]) - borrow; - borrow = ~(value >> 32)+1; // -(uint32_t)(value >> 32); - un[i + j] = (uint32_t)value; - } - bool need_fixup = borrow != 0; -#elif defined(MUL_RSUB_CARRY_2_STAGE) - (void)p; // shut up unused variable warning - - // Multiply and subtract. - uint32_t carry = 1; - uint32_t phi[2000]; // plenty space - uint32_t plo[2000]; // plenty space - for(int i = 0; i <= n; i++) { - uint32_t vn_i = i < n ? vn[i] : 0; - uint64_t value = un[i + j] + ~((uint64_t)qhat * vn_i); - plo[i] = value & 0xffffffffLL; - phi[i] = value >> 32; - } - for(int i = 0; i <= n; i++) { - uint64_t result = (((uint64_t)phi[i]<<32) | plo[i]) + carry; - uint32_t result_high = result >> 32; - if(carry <= 1) - result_high++; - carry = result_high; - un[i + j] = (uint32_t)result; - } - bool need_fixup = carry != 1; -#elif defined(MUL_RSUB_CARRY_2_STAGE1) - (void)p; // shut up unused variable warning - - // Multiply and subtract. - uint32_t carry = 1; - uint32_t phi[2000]; // plenty space - uint32_t plo[2000]; // plenty space - // same mul-and-sub as SUB_MUL_BORROW but not the same - // mul-and-sub-minus-one as MUL_RSUB_CARRY - for(int i = 0; i <= n; i++) { - uint32_t vn_i = i < n ? vn[i] : 0; - uint64_t value = un[i + j] - ((uint64_t)qhat * vn_i); - plo[i] = value & 0xffffffffLL; - phi[i] = value >> 32; - } - // compensate for the +1 that was added by mul-and-sub by subtracting - // it here (as ~(0)) - for(int i = 0; i <= n; i++) { - uint64_t result = (((uint64_t)phi[i]<<32) | plo[i]) + carry+ - ~(0); // a way to express "-1" - uint32_t result_high = result >> 32; - if(carry <= 1) - result_high++; - carry = result_high; - un[i + j] = (uint32_t)result; - } - bool need_fixup = carry != 1; -#elif defined(MUL_RSUB_CARRY_2_STAGE2) - (void)p; // shut up unused variable warning - - // Multiply and subtract. - uint32_t carry = 0; - uint32_t phi[2000]; // plenty space - uint32_t plo[2000]; // plenty space - // same mul-and-sub as SUB_MUL_BORROW but not the same - // mul-and-sub-minus-one as MUL_RSUB_CARRY - for(int i = 0; i <= n; i++) { - uint32_t vn_i = i < n ? vn[i] : 0; - uint64_t value = un[i + j] - ((uint64_t)qhat * vn_i); - plo[i] = value & 0xffffffffLL; - phi[i] = value >> 32; - } - // NOW it starts to make sense. when no carry this time, next - // carry as-is. rlse next carry reduces by one. - // it here (as ~(0)) - for(int i = 0; i <= n; i++) { - uint64_t result = (((uint64_t)phi[i]<<32) | plo[i]) + carry; - uint32_t result_high = result >> 32; - if(carry == 0) - carry = result_high; - else - carry = result_high-1; - un[i + j] = (uint32_t)result; - } - bool need_fixup = carry != 0; -#else -#error need to choose one of the algorithm options; e.g. -DORIGINAL -#endif - - q[j] = qhat; // Store quotient digit. - if (need_fixup) { // If we subtracted too - q[j] = q[j] - 1; // much, add back. - k = 0; - for (i = 0; i < n; i++) { - t = (unsigned long long)un[i+j] + vn[i] + k; - un[i+j] = t; - k = t >> 32; - } - un[j+n] = un[j+n] + k; - } - } // End j. - // If the caller wants the remainder, unnormalize - // it and pass it back. - if (r != NULL) { - for (i = 0; i < n-1; i++) - r[i] = (un[i] >> s) | ((unsigned long long)un[i+1] << (32-s)); - r[n-1] = un[n-1] >> s; - } - return 0; -} - -int errors; - -void check(unsigned q[], unsigned r[], - unsigned u[], unsigned v[], - int m, int n, - unsigned cq[], unsigned cr[]) { - int i, szq; - - szq = max(m - n + 1, 1); - for (i = 0; i < szq; i++) { - if (q[i] != cq[i]) { - errors = errors + 1; - dumpit("Error, dividend u =", m, u); - dumpit(" divisor v =", n, v); - dumpit("For quotient, got:", m-n+1, q); - dumpit(" Should get:", m-n+1, cq); - return; - } - } - for (i = 0; i < n; i++) { - if (r[i] != cr[i]) { - errors = errors + 1; - dumpit("Error, dividend u =", m, u); - dumpit(" divisor v =", n, v); - dumpit("For remainder, got:", n, r); - dumpit(" Should get:", n, cr); - return; - } - } - return; -} - -int main() { - static unsigned test[] = { - // m, n, u..., v..., cq..., cr.... - 1, 1, 3, 0, 1, 1, // Error, divide by 0. - 1, 2, 7, 1,3, 0, 7,0, // Error, n > m. - 2, 2, 0,0, 1,0, 0, 0,0, // Error, incorrect remainder cr. - 1, 1, 3, 2, 1, 1, - 1, 1, 3, 3, 1, 0, - 1, 1, 3, 4, 0, 3, - 1, 1, 0, 0xffffffff, 0, 0, - 1, 1, 0xffffffff, 1, 0xffffffff, 0, - 1, 1, 0xffffffff, 0xffffffff, 1, 0, - 1, 1, 0xffffffff, 3, 0x55555555, 0, - 2, 1, 0xffffffff,0xffffffff, 1, 0xffffffff,0xffffffff, 0, - 2, 1, 0xffffffff,0xffffffff, 0xffffffff, 1,1, 0, - 2, 1, 0xffffffff,0xfffffffe, 0xffffffff, 0xffffffff,0, 0xfffffffe, - 2, 1, 0x00005678,0x00001234, 0x00009abc, 0x1e1dba76,0, 0x6bd0, - 2, 2, 0,0, 0,1, 0, 0,0, - 2, 2, 0,7, 0,3, 2, 0,1, - 2, 2, 5,7, 0,3, 2, 5,1, - 2, 2, 0,6, 0,2, 3, 0,0, - 1, 1, 0x80000000, 0x40000001, 0x00000001, 0x3fffffff, - 2, 1, 0x00000000,0x80000000, 0x40000001, 0xfffffff8,0x00000001, 0x00000008, - 2, 2, 0x00000000,0x80000000, 0x00000001,0x40000000, 0x00000001, 0xffffffff,0x3fffffff, - 2, 2, 0x0000789a,0x0000bcde, 0x0000789a,0x0000bcde, 1, 0,0, - 2, 2, 0x0000789b,0x0000bcde, 0x0000789a,0x0000bcde, 1, 1,0, - 2, 2, 0x00007899,0x0000bcde, 0x0000789a,0x0000bcde, 0, 0x00007899,0x0000bcde, - 2, 2, 0x0000ffff,0x0000ffff, 0x0000ffff,0x0000ffff, 1, 0,0, - 2, 2, 0x0000ffff,0x0000ffff, 0x00000000,0x00000001, 0x0000ffff, 0x0000ffff,0, - 3, 2, 0x000089ab,0x00004567,0x00000123, 0x00000000,0x00000001, 0x00004567,0x00000123, 0x000089ab,0, - 3, 2, 0x00000000,0x0000fffe,0x00008000, 0x0000ffff,0x00008000, 0xffffffff,0x00000000, 0x0000ffff,0x00007fff, // Shows that first qhat can = b + 1. - 3, 3, 0x00000003,0x00000000,0x80000000, 0x00000001,0x00000000,0x20000000, 0x00000003, 0,0,0x20000000, // Adding back step req'd. - 3, 3, 0x00000003,0x00000000,0x00008000, 0x00000001,0x00000000,0x00002000, 0x00000003, 0,0,0x00002000, // Adding back step req'd. - 4, 3, 0,0,0x00008000,0x00007fff, 1,0,0x00008000, 0xfffe0000,0, 0x00020000,0xffffffff,0x00007fff, // Add back req'd. - 4, 3, 0,0x0000fffe,0,0x00008000, 0x0000ffff,0,0x00008000, 0xffffffff,0, 0x0000ffff,0xffffffff,0x00007fff, // Shows that mult-sub quantity cannot be treated as signed. - 4, 3, 0,0xfffffffe,0,0x80000000, 0x0000ffff,0,0x80000000, 0x00000000,1, 0x00000000,0xfffeffff,0x00000000, // Shows that mult-sub quantity cannot be treated as signed. - 4, 3, 0,0xfffffffe,0,0x80000000, 0xffffffff,0,0x80000000, 0xffffffff,0, 0xffffffff,0xffffffff,0x7fffffff, // Shows that mult-sub quantity cannot be treated as signed. - }; - int i, n, m, ncases, f; - unsigned q[10], r[10]; - unsigned *u, *v, *cq, *cr; - - printf("divmnu:\n"); - i = 0; - ncases = 0; - while (i < sizeof(test)/4) { - m = test[i]; - n = test[i+1]; - u = &test[i+2]; - v = &test[i+2+m]; - cq = &test[i+2+m+n]; - cr = &test[i+2+m+n+max(m-n+1, 1)]; - - f = divmnu(q, r, u, v, m, n); - if (f) { - dumpit("Error return code for dividend u =", m, u); - dumpit(" divisor v =", n, v); - errors = errors + 1; - } - else - check(q, r, u, v, m, n, cq, cr); - i = i + 2 + m + n + max(m-n+1, 1) + n; - ncases = ncases + 1; - } - - printf("%d errors out of %d cases; there should be 3.\n", errors, ncases); - return 0; -} +// moved to ../biginteger/divmnu.c diff --git a/openpower/sv/bitmanip/mulmnu.c b/openpower/sv/bitmanip/mulmnu.c index a38afc4be..973e3b376 100644 --- a/openpower/sv/bitmanip/mulmnu.c +++ b/openpower/sv/bitmanip/mulmnu.c @@ -1,103 +1 @@ -/* from hacker's delight, originsl by hannah suarez (hcs0) */ -// Computes the m+n-halfword product of n halfwords x m halfwords, unsigned. -// Max line length is 57, to fit in hacker.book. -#include -#include //To define "exit", req'd by XLC. - -// w[0], u[0], and v[0] contain the LEAST significant halfwords. -// (The halfwords are in little-endian order). -// This is Knuth's Algorithm M from [Knuth Vol. 2 Third edition (1998)] -// section 4.3.1. Picture is: -// u[m-1] ... u[1] u[0] -// x v[n-1] ... v[1] v[0] -// -------------------- -// w[m+n-1] ............ w[1] w[0] - -void mulmnu(unsigned short w[], unsigned short u[], - unsigned short v[], int m, int n) { - - unsigned int k, t; - int i, j; - - for (i = 0; i < m; i++) - w[i] = 0; - - for (j = 0; j < n; j++) { - k = 0; - unsigned short phi[2000]; - unsigned short plo[2000]; - for (i = 0; i < m; i++) { - unsigned product = u[i]*v[j] + w[i + j]; - phi[i] = product>>16; - plo[i] = product; - } - for (i = 0; i < m; i++) { - t = (phi[i]<<16) | plo[i] + k; - w[i + j] = t; // (I.e., t & 0xFFFF). - k = t >> 16; - } - w[j + m] = k; - } - return; -} - -int errors; - -void check(unsigned short result[], unsigned short u[], - unsigned short v[], int m, int n, unsigned short correct[]) { - int i, j; - - for (i = 0; i < m + n; i++) { - if (correct[i] != result[i]) { - errors = errors + 1; - printf("Error, m = %d, n = %d, u = ", m, n); - for (j = 0; j < m; j++) printf(" %04x", u[j]); - printf(" v ="); - for (j = 0; j < n; j++) printf(" %04x", v[j]); - printf("\nShould get:"); - for (j = 0; j < n+m; j++) printf(" %04x", correct[j]); - printf("\n Got:"); - for (j = 0; j < n+m; j++) printf(" %04x", result[j]); - printf("\n"); - break; - } - } -} - -int main() { - static unsigned short test[] = { - // m, n, u ..., v ..., result. - 1, 1, 7, 3, 21,0, - 1, 1, 2, 0xFFFF, 0xFFFE,0x0001, // 2*FFFF = 0001_FFFE. - 1, 1, 0xFFFF, 0xFFFF, 1,0xFFFE, - 1, 2, 7, 5, 6, 35,42,0, - 1, 2, 65000, 63000, 64000, 0xBDC0,0x8414,0xF7F5, - 1, 3, 65535, 31000, 32000, 33000, 0x86E8,0xFC17,0xFC17,0x80E7, - 2, 3, 400, 300, 500, 100, 200, 0x0D40,0xE633,0xADB2,0xEA61,0, - 2, 3, 400, 65535, 500, 100, 65534, 0x0D40,0x9A4F,0xFE70,0x01F5,0xFFFD, - 4, 4, 65535, 65535, 65535, 65535, 65535, 65535, 65535, 65535, - 1, 0, 0, 0, 65534, 65535, 65535, 65535, - }; - int i, n, m, ncases; - unsigned short result[10]; - unsigned short *u, *v; - - printf("mulmnu:\n"); - i = 0; - ncases = 0; - while (i < sizeof(test)/2) { - m = test[i]; - n = test[i+1]; - u = &test[i+2]; - v = &test[i+2+m]; - mulmnu(result, u, v, m, n); - check (result, u, v, m, n, &test[i+2+m+n]); - mulmnu(result, v, u, n, m); // Interchange operands. - check (result, v, u, n, m, &test[i+2+m+n]); - i = i + 2 + 2*(m + n); - ncases = ncases + 1; - } - - if (errors == 0) - printf("Passed all %d cases.\n", ncases); -} +// moved to ../biginteger/mulmnu.c