From: Jacob Lifshay Date: Tue, 26 Apr 2022 06:18:29 +0000 (-0700) Subject: switch to using divrem_64_by_32 which follows semantics of proposed 128x64->64 div op X-Git-Tag: opf_rfc_ls005_v1~2586 X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=b6044fdb84bea61887999ffa5e17543fa91b8f89;p=libreriscv.git switch to using divrem_64_by_32 which follows semantics of proposed 128x64->64 div op --- diff --git a/openpower/sv/biginteger/divmnu64.c b/openpower/sv/biginteger/divmnu64.c index 0d3f3973a..e193a59ed 100644 --- a/openpower/sv/biginteger/divmnu64.c +++ b/openpower/sv/biginteger/divmnu64.c @@ -56,6 +56,24 @@ void dumpit(char *msg, int n, unsigned v[]) printf("\n"); } +typedef struct +{ + uint32_t q, r; + bool overflow; +} divrem_t; + +divrem_t divrem_64_by_32(uint64_t n, uint32_t d) +{ + if ((n >> 32) >= d) // overflow + { + return (divrem_t){.q = UINT32_MAX, .r = 0, .overflow = true}; + } + else + { + return (divrem_t){.q = n / d, .r = n % d, .overflow = false}; + } +} + /* q[0], r[0], u[0], and v[0] contain the LEAST significant words. (The sequence is in little-endian order). @@ -126,17 +144,19 @@ int divmnu(unsigned q[], unsigned r[], const unsigned u[], const unsigned v[], for (j = m - n; j >= 0; j--) { // Main loop. // Compute estimate qhat of q[j] from top 2 digits. - // do as 2 separate divs to demo that 64/32 div/rem would - // be perfectly fine. - uint64_t dig1 = (uint64_t)un[j + n]; - qhat = dig1 / vn[n - 1]; - rhat = dig1 % vn[n - 1]; - uint64_t dig2 = ((uint64_t)rhat << 32) | un[j + n - 1]; - qhat = dig2 / vn[n - 1] | (qhat << 32); - rhat = dig2 % vn[n - 1] | (rhat << 32); + uint64_t dig2 = ((uint64_t)un[j + n] << 32) | un[j + n - 1]; + divrem_t qr = divrem_64_by_32(dig2, vn[n - 1]); + qhat = qr.q; + rhat = qr.r; + if (qr.overflow) + { + // rhat can be bigger than 32-bit when the division overflows, + // so rhat's computation can't be folded into divrem_64_by_32 + rhat = dig2 - qr.q * vn[n - 1]; + } again: // use 3rd-from-top digit to obtain better accuracy - if (qhat >= b || qhat * vn[n - 2] > b * rhat + un[j + n - 2]) + if (rhat < b && qhat * vn[n - 2] > b * rhat + un[j + n - 2]) { qhat = qhat - 1; rhat = rhat + vn[n - 1];