From 0d1739a1b41d7a23e214fca36da8c27791183575 Mon Sep 17 00:00:00 2001
From: Jacob Lifshay <programmerjake@gmail.com>
Date: Sun, 8 Oct 2023 21:46:57 -0700
Subject: [PATCH] finish writing python_divmod_knuth_algorithm_d

---
 .../decoder/isa/test_caller_svp64_powmod.py   | 32 +++++++-
 src/openpower/test/bigint/powmod.py           | 74 ++++++++++++++++---
 2 files changed, 93 insertions(+), 13 deletions(-)

diff --git a/src/openpower/decoder/isa/test_caller_svp64_powmod.py b/src/openpower/decoder/isa/test_caller_svp64_powmod.py
index 1ea02a23..fb57efec 100644
--- a/src/openpower/decoder/isa/test_caller_svp64_powmod.py
+++ b/src/openpower/decoder/isa/test_caller_svp64_powmod.py
@@ -16,7 +16,7 @@ from functools import lru_cache
 import os
 from openpower.test.bigint.powmod import (
     PowModCases, python_divmod_shift_sub_algorithm,
-    python_powmod_256_algorithm)
+    python_divmod_knuth_algorithm_d, python_powmod_256_algorithm)
 from openpower.test.runner import TestRunnerBase
 
 
@@ -33,6 +33,36 @@ class TestPythonAlgorithms(unittest.TestCase):
                     self.assertEqual(out_q, q)
                     self.assertEqual(out_r, r)
 
+    def test_python_divmod_knuth_algorithm_d(self):
+        seen_corner_cases = set()
+        for n, d in PowModCases.divmod_512x256_to_256x256_test_inputs():
+            log_regex = n == 2 ** 511 - 1 and d == 2 ** 256 - 1
+            q, r = divmod(n, d)
+            n = [(n >> 64 * i) % 2 ** 64 for i in range(8)]
+            d = [(d >> 64 * i) % 2 ** 64 for i in range(4)]
+            q = [(q >> 64 * i) % 2 ** 64 for i in range(4)]
+            r = [(r >> 64 * i) % 2 ** 64 for i in range(4)]
+            with self.subTest(n=[f"{i:#_x}" for i in n],
+                              d=[f"{i:#_x}" for i in d],
+                              q=[f"{i:#_x}" for i in q],
+                              r=[f"{i:#_x}" for i in r]):
+                out_q, out_r = python_divmod_knuth_algorithm_d(
+                    n, d, log_regex=log_regex,
+                    on_corner_case=seen_corner_cases.add)
+                with self.subTest(out_q=[f"{i:#_x}" for i in out_q],
+                                  out_r=[f"{i:#_x}" for i in out_r]):
+                    self.assertEqual(out_q, q + [0] * 4)
+                    self.assertEqual(out_r, r)
+
+        # ensure our testing actually covers all the corner cases
+        self.assertEqual(seen_corner_cases, {
+            "single-word divisor",
+            "non-zero shift",
+            "qhat overflows word",
+            "qhat adjustment",
+            "add back",
+        })
+
     def test_python_powmod_algorithm(self):
         for base, exp, mod in PowModCases.powmod_256_test_inputs():
             expected = pow(base, exp, mod)
diff --git a/src/openpower/test/bigint/powmod.py b/src/openpower/test/bigint/powmod.py
index a4d9bc50..cb595645 100644
--- a/src/openpower/test/bigint/powmod.py
+++ b/src/openpower/test/bigint/powmod.py
@@ -267,7 +267,8 @@ def python_divmod_shift_sub_algorithm(n, d, width=256, log_regex=False):
     return q, r
 
 
-def python_divmod_knuth_algorithm_d(n, d, word_size=64, log_regex=False):
+def python_divmod_knuth_algorithm_d(n, d, word_size=64, log_regex=False,
+                                    on_corner_case=lambda desc: None):
     do_log = _DivModRegsRegexLogger(enabled=log_regex).log
 
     # switch to names used by Knuth's algorithm D
@@ -304,6 +305,7 @@ def python_divmod_knuth_algorithm_d(n, d, word_size=64, log_regex=False):
         raise ZeroDivisionError
 
     if n == 1:
+        on_corner_case("single-word divisor")
         # Knuth's algorithm D requires the divisor to have length >= 2
         # handle single-word divisors separately
         t = 0
@@ -316,6 +318,12 @@ def python_divmod_knuth_algorithm_d(n, d, word_size=64, log_regex=False):
         r[0] = t
         return q, r
 
+    if m < n:
+        # dividend < divisor
+        for i in range(m):
+            r[i] = u[i]
+        return q, r
+
     # Knuth's algorithm D starts here:
 
     # Step D1: normalize
@@ -325,6 +333,9 @@ def python_divmod_knuth_algorithm_d(n, d, word_size=64, log_regex=False):
     while (v[n - 1] << s) >> (word_size - 1) == 0:
         s += 1
 
+    if s != 0:
+        on_corner_case("non-zero shift")
+
     # vn = v << s
     t = 0
     for i in range(n):
@@ -351,6 +362,7 @@ def python_divmod_knuth_algorithm_d(n, d, word_size=64, log_regex=False):
         t += un[j + n - 1]
         if un[j + n] >= vn[n - 1]:
             # division overflows word
+            on_corner_case("qhat overflows word")
             qhat = 2 ** word_size - 1
             rhat = t - qhat * vn[n - 1]
         else:
@@ -360,6 +372,7 @@ def python_divmod_knuth_algorithm_d(n, d, word_size=64, log_regex=False):
 
         while rhat < 2 ** word_size:
             if qhat * vn[n - 2] > (rhat << word_size) + un[j + n - 2]:
+                on_corner_case("qhat adjustment")
                 qhat -= 1
                 rhat += vn[n - 1]
             else:
@@ -378,13 +391,43 @@ def python_divmod_knuth_algorithm_d(n, d, word_size=64, log_regex=False):
         t = 1
         for i in range(n + 1):
             # subfe
-            t += ~product[i] + un[j + i]
+            not_product = ~product[i] % 2 ** word_size
+            t += not_product + un[j + i]
             un[j + i] = t % 2 ** word_size
             t = int(t >= 2 ** word_size)
         need_fixup = not t
 
-        # FIXME(jacob): finish
+        # Step D5: test remainder
+
+        q[j] = qhat
+        if need_fixup:
+
+            # Step D6: add back
+
+            on_corner_case("add back")
+
+            q[j] -= 1
+
+            t = 0
+            for i in range(n):
+                # adde
+                t += un[j + i] + vn[i]
+                un[j + i] = t % 2 ** word_size
+                t = int(t >= 2 ** word_size)
+            un[j + n] += t
+
+    # Step D8: un-normalize
+
+    # r = un >> s
+    t = 0
+    for i in reversed(range(n)):
+        # dsrd
+        t <<= word_size
+        t |= (un[i] << word_size) >> s
+        r[i] = t >> word_size
+        t %= 2 ** word_size
 
+    return q, r
 
 POWMOD_256_ASM = (
     # base is in r4-7, exp is in r8-11, mod is in r32-35
@@ -509,19 +552,26 @@ class PowModCases(TestAccumulatorBase):
 
     @staticmethod
     def divmod_512x256_to_256x256_test_inputs():
-        for i in range(10):
+        yield (2 ** (256 - 1), 1)
+        yield (2 ** (512 - 1) - 1, 2 ** 256 - 1)
+
+        # test qhat overflow
+        yield (0x8000 << 128 | 0xFFFE << 64, 0x8000 << 64 | 0xFFFF)
+
+        # tests where add back is required
+        yield (8 << (192 - 4) | 3, 2 << (192 - 4) | 1)
+        yield (0x8000 << 128 | 3, 0x2000 << 128 | 1)
+        yield (0x7FFF << 192 | 0x8000 << 128, 0x8000 << 128 | 1)
+
+        for i in range(20):
             n = hash_256(f"divmod256 input n msb {i}")
             n <<= 256
             n |= hash_256(f"divmod256 input n lsb {i}")
+            n_shift = hash_256(f"divmod256 input n shift {i}") % 512
+            n >>= n_shift
             d = hash_256(f"divmod256 input d {i}")
-            if i == 0:
-                # use known values:
-                n = 2 ** (256 - 1)
-                d = 1
-            elif i == 1:
-                # use known values:
-                n = 2 ** (512 - 1) - 1
-                d = 2 ** 256 - 1
+            d_shift = hash_256(f"divmod256 input d shift {i}") % 256
+            d >>= d_shift
             if d == 0:
                 d = 1
             n %= d << 256
-- 
2.30.2