From: Jacob Lifshay Date: Sat, 7 Oct 2023 00:08:35 +0000 (-0700) Subject: add WIP knuth algorithm D python implementation X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=ebe90fd9d7d2f765896fd55cd62efcbc3da37b29;p=openpower-isa.git add WIP knuth algorithm D python implementation --- diff --git a/src/openpower/test/bigint/powmod.py b/src/openpower/test/bigint/powmod.py index f17139b0..425e860a 100644 --- a/src/openpower/test/bigint/powmod.py +++ b/src/openpower/test/bigint/powmod.py @@ -267,6 +267,125 @@ def python_divmod_shift_sub_algorithm(n, d, width=256, log_regex=False): return q, r +def python_divmod_knuth_algorithm_d(n, d, word_size=64, log_regex=False): + do_log = _DivModRegsRegexLogger(enabled=log_regex).log + + # switch to names used by Knuth's algorithm D + u = list(n) # dividend + m = len(u) # length of dividend + v = list(d) # divisor + del d # less confusing to debug + n = len(v) # length of divisor + + assert m >= n, "the dividend's length must be >= the divisor's length" + assert word_size > 0 + + # allocate outputs/temporaries -- before any normalization so + # the outputs/temporaries can be fixed-length in the assembly version. + + # quotient length from original algorithm is m - n + 1, + # but that assumes v[-1] != 0 -- since we support smaller divisors the + # quotient must be larger. + q = [0] * m # quotient + r = [0] * n # remainder + vn = [0] * n # normalized divisor + un = [0] * (m + 1) # normalized dividend + product = [0] * (n + 1) + + # get non-zero length of dividend + while m > 0 and u[m - 1] == 0: + m -= 1 + + # get non-zero length of divisor + while n > 0 and v[n - 1] == 0: + n -= 1 + + if n == 0: + raise ZeroDivisionError + + if n == 1: + # Knuth's algorithm D requires the divisor to have length >= 2 + # handle single-word divisors separately + t = 0 + for i in reversed(range(m)): + # divmod2du + t <<= word_size + t += u[i] + q[i] = t // v[0] + t %= v[0] + r[0] = t + return q, r + + # Knuth's algorithm D starts here: + + # Step D1: normalize + + # calculate amount to shift by -- count leading zeros + s = 0 + while (v[n - 1] << s) >> (word_size - 1) == 0: + s += 1 + + # vn = v << s + t = 0 + for i in range(n): + # dsld + t |= v[i] << s + vn[i] = t % 2 ** word_size + t >>= word_size + + # un = u << s + t = 0 + for i in range(m): + # dsld + t |= u[i] << s + un[i] = t % 2 ** word_size + t >>= word_size + un[m] = t + + # Step D2 and Step D7: loop + for j in range(m - n, -1, -1): + # Step D3: calculate q̂ + + t = un[j + n] + t <<= word_size + t += un[j + n - 1] + if un[j + n] >= vn[n - 1]: + # division overflows word + qhat = 2 ** word_size - 1 + rhat = t - qhat * vn[n - 1] + else: + # divmod2du + qhat = t // vn[n - 1] + rhat = t % vn[n - 1] + + while rhat < 2 ** word_size: + if qhat * vn[n - 2] > (rhat << word_size) + un[j + n - 2]: + qhat -= 1 + rhat += vn[n - 1] + else: + break + + # Step D4: multiply and subtract + + t = 0 + for i in range(n): + # maddedu + t += vn[i] * qhat + product[i] = t % 2 ** word_size + t >>= word_size + product[n] = t + + t = 1 + for i in range(n + 1): + # subfe + t += ~product[i] + un[j + i] + un[j + i] = t % 2 ** word_size + t = int(t >= 2 ** word_size) + need_fixup = not t + + # FIXME(jacob): finish + + POWMOD_256_ASM = ( # base is in r4-7, exp is in r8-11, mod is in r32-35 "powmod_256:",