From 9bb15605e304f53fa98f058cf22ef41d7b526cc9 Mon Sep 17 00:00:00 2001
From: Jacob Lifshay <programmerjake@gmail.com>
Date: Fri, 6 Oct 2023 17:08:35 -0700
Subject: [PATCH] add WIP knuth algorithm D python implementation

---
 src/openpower/test/bigint/powmod.py | 119 ++++++++++++++++++++++++++++
 1 file changed, 119 insertions(+)

diff --git a/src/openpower/test/bigint/powmod.py b/src/openpower/test/bigint/powmod.py
index f17139b0..425e860a 100644
--- a/src/openpower/test/bigint/powmod.py
+++ b/src/openpower/test/bigint/powmod.py
@@ -267,6 +267,125 @@ def python_divmod_shift_sub_algorithm(n, d, width=256, log_regex=False):
     return q, r
 
 
+def python_divmod_knuth_algorithm_d(n, d, word_size=64, log_regex=False):
+    do_log = _DivModRegsRegexLogger(enabled=log_regex).log
+
+    # switch to names used by Knuth's algorithm D
+    u = list(n)  # dividend
+    m = len(u)  # length of dividend
+    v = list(d)  # divisor
+    del d  # less confusing to debug
+    n = len(v)  # length of divisor
+
+    assert m >= n, "the dividend's length must be >= the divisor's length"
+    assert word_size > 0
+
+    # allocate outputs/temporaries -- before any normalization so
+    # the outputs/temporaries can be fixed-length in the assembly version.
+
+    # quotient length from original algorithm is m - n + 1,
+    # but that assumes v[-1] != 0 -- since we support smaller divisors the
+    # quotient must be larger.
+    q = [0] * m  # quotient
+    r = [0] * n  # remainder
+    vn = [0] * n  # normalized divisor
+    un = [0] * (m + 1)  # normalized dividend
+    product = [0] * (n + 1)
+
+    # get non-zero length of dividend
+    while m > 0 and u[m - 1] == 0:
+        m -= 1
+
+    # get non-zero length of divisor
+    while n > 0 and v[n - 1] == 0:
+        n -= 1
+
+    if n == 0:
+        raise ZeroDivisionError
+
+    if n == 1:
+        # Knuth's algorithm D requires the divisor to have length >= 2
+        # handle single-word divisors separately
+        t = 0
+        for i in reversed(range(m)):
+            # divmod2du
+            t <<= word_size
+            t += u[i]
+            q[i] = t // v[0]
+            t %= v[0]
+        r[0] = t
+        return q, r
+
+    # Knuth's algorithm D starts here:
+
+    # Step D1: normalize
+
+    # calculate amount to shift by -- count leading zeros
+    s = 0
+    while (v[n - 1] << s) >> (word_size - 1) == 0:
+        s += 1
+
+    # vn = v << s
+    t = 0
+    for i in range(n):
+        # dsld
+        t |= v[i] << s
+        vn[i] = t % 2 ** word_size
+        t >>= word_size
+
+    # un = u << s
+    t = 0
+    for i in range(m):
+        # dsld
+        t |= u[i] << s
+        un[i] = t % 2 ** word_size
+        t >>= word_size
+    un[m] = t
+
+    # Step D2 and Step D7: loop
+    for j in range(m - n, -1, -1):
+        # Step D3: calculate qÌ
+
+        t = un[j + n]
+        t <<= word_size
+        t += un[j + n - 1]
+        if un[j + n] >= vn[n - 1]:
+            # division overflows word
+            qhat = 2 ** word_size - 1
+            rhat = t - qhat * vn[n - 1]
+        else:
+            # divmod2du
+            qhat = t // vn[n - 1]
+            rhat = t % vn[n - 1]
+
+        while rhat < 2 ** word_size:
+            if qhat * vn[n - 2] > (rhat << word_size) + un[j + n - 2]:
+                qhat -= 1
+                rhat += vn[n - 1]
+            else:
+                break
+
+        # Step D4: multiply and subtract
+
+        t = 0
+        for i in range(n):
+            # maddedu
+            t += vn[i] * qhat
+            product[i] = t % 2 ** word_size
+            t >>= word_size
+        product[n] = t
+
+        t = 1
+        for i in range(n + 1):
+            # subfe
+            t += ~product[i] + un[j + i]
+            un[j + i] = t % 2 ** word_size
+            t = int(t >= 2 ** word_size)
+        need_fixup = not t
+
+        # FIXME(jacob): finish
+
+
 POWMOD_256_ASM = (
     # base is in r4-7, exp is in r8-11, mod is in r32-35
     "powmod_256:",
-- 
2.30.2