divmod: asm version of Knuth's algorithm D works!

author Jacob Lifshay <programmerjake@gmail.com>

Mon, 16 Oct 2023 04:06:02 +0000 (21:06 -0700)

committer Jacob Lifshay <programmerjake@gmail.com>

Thu, 30 Nov 2023 07:55:28 +0000 (23:55 -0800)
author Jacob Lifshay <programmerjake@gmail.com>
Mon, 16 Oct 2023 04:06:02 +0000 (21:06 -0700)
committer Jacob Lifshay <programmerjake@gmail.com>
Thu, 30 Nov 2023 07:55:28 +0000 (23:55 -0800)
diff --git a/src/openpower/test/bigint/powmod.py b/src/openpower/test/bigint/powmod.py

index 76cfb5b41664549bebc9edb10315e2292e5ead30..3ba5045ede005c9fbc5a2712c59a211caa353bc0 100644 (file)
--- a/src/openpower/test/bigint/powmod.py
+++ b/src/openpower/test/bigint/powmod.py
@@ -615,11 +615,15 @@ class DivModKnuthAlgorithmD:
                      un[j + i] = t % 2 ** self.word_size
                      t = int(t >= 2 ** self.word_size)
                      do_log(locals(), "add back: step")
-                un[j + n] += t
-                do_log(locals(), "add back: un[j + n] += t")
+                index = j + n
+                do_log(locals(), "add back: index = j + n", index="index")
+                un[index] += t
+                do_log(locals(), "add back: un[index] += t", index=None)
  
-            q[j] = qhat
-            do_log(locals(), "q[j] = qhat", index=None)
+            index = j
+            do_log(locals(), "assign q: index = j", index="index")
+            q[index] = qhat
+            do_log(locals(), "q[index] = qhat", index=None)
  
          # Step D8: un-normalize
  
@@ -817,6 +821,7 @@ class DivModKnuthAlgorithmD:
          yield f"setvl 0, 0, {un_size}, 0, 1, 1"  # VL = un_size
          assert index == 3, "index must be r3"
          yield f"sv.divmod2du/m=1<<r3 {qhat}, *{un}, {qhat_denom}, {rhat_lo}"
+        yield f"addi {rhat_hi}, 0, 0"  # rhat_hi = 0
          yield f"mcrxrx 0"  # move OV to CR0.lt
          yield "bc 4, 0, divmod_skip_qhat_overflow # bge divmod_..."
          # if ov:
@@ -885,7 +890,7 @@ class DivModKnuthAlgorithmD:
          # product[:n] = vn[:n] * qhat
          yield f"sv.maddedu *{product}, *{vn}, {qhat}, {t_for_prod}"
          yield f"or {index}, {n_scalar}, {n_scalar}"  # index = n
-        yield f"setvl 0, 0, {vn_size}, 0, 1, 1"  # VL = vn_size
+        yield f"setvl 0, 0, {product_size}, 0, 1, 1"  # VL = product_size
          # product[index] = t
          assert index == 3, "index must be r3"
          yield f"sv.or/m=1<<r3 *{product}, {t_for_prod}, {t_for_prod}"
@@ -895,7 +900,7 @@ class DivModKnuthAlgorithmD:
          yield f"setvl 0, {sub_len}, {product_size}, 0, 1, 1"  # VL = sub_len
          # create svshape that offsets by `j`
          svshape = SVSHAPE(0)
-        svshape.zdimsz = q_size
+        svshape.zdimsz = q_size + 1
          svshape_low = int(svshape) % 2 ** 16
          svshape_high = int(svshape) >> 16
          offset_field = svshape.fsi['offset']
@@ -910,7 +915,8 @@ class DivModKnuthAlgorithmD:
          # or in all the other bits
          if svshape_high != 0:
              yield f"oris 0, 0, {svshape_high}"
-        yield f"ori 0, 0, {svshape_low}"
+        if svshape_low != 0:
+            yield f"ori 0, 0, {svshape_low}"
          yield f"mtspr {SVSHAPE0}, 0 # mtspr SVSHAPE0, 0"
          yield f"svremap 0o12, 0, 0, 0, 0, 0, 0"  # enable SVSHAPE0 for RB & RT
          # un[j:] -= product
@@ -921,7 +927,7 @@ class DivModKnuthAlgorithmD:
  
          yield f"mcrxrx 0"  # move CA to CR0.eq
          # if need_fixup:
-        yield "bc 4, 2, divmod_skip_fixup # bne divmod_skip_fixup"
+        yield "bc 12, 2, divmod_skip_fixup # beq divmod_skip_fixup"
  
          # Step D6: add back
  
@@ -931,15 +937,18 @@ class DivModKnuthAlgorithmD:
          yield f"svremap 0o11, 0, 0, 0, 0, 0, 0"  # enable SVSHAPE0 for RA & RT
          # un[j:] += vn
          yield f"sv.adde *{un}, *{un}, *{vn}"
-        yield f"svremap 0o11, 0, 0, 0, 0, 0, 0"  # enable SVSHAPE0 for RA & RT
-        # un[j + n] += t
-        yield f"sv.addze *{un}, *{un}"
+        yield f"add {index}, {j}, {n_scalar}"  # index = j + n
+        # un[index] += t
+        yield f"setvl 0, 0, {un_size}, 0, 1, 1"  # VL = un_size
+        assert index == 3, "index must be r3"
+        yield f"sv.addze/m=1<<r3 *{un}, *{un}"
  
          yield "divmod_skip_fixup:"
-        yield f"setvl 0, 0, {q_size}, 0, 1, 1"  # VL = q_size
-        yield f"svremap 0o10, 0, 0, 0, 0, 0, 0"  # enable SVSHAPE0 for RT
+
+        yield f"or {index}, {j}, {j}"  # index = j
          # q[j] = qhat
-        yield f"sv.or {q}, {qhat}, {qhat}"
+        yield f"setvl 0, 0, {q_size}, 0, 1, 1"  # VL = q_size
+        yield f"sv.or/m=1<<r3 *{q}, {qhat}, {qhat}"
  
          # Step D2 and Step D7: loop
          yield f"addic. {j}, {j}, -1"  # j -= 1
@@ -1161,13 +1170,6 @@ class PowModCases(TestAccumulatorBase):
          cases = list(self.divmod_512x256_to_256x256_test_inputs())
          asm = DivModKnuthAlgorithmD().asm
          for n, d in cases:
-            skip = d >= 2 ** 64
-            if n << 64 < n:
-                skip = False
-            if skip:
-                # FIXME: only part of the algorithm works,
-                # so we skip the cases that we know fail
-                continue
              q, r = divmod(n, d)
              with self.subTest(n=f"{n:#_x}", d=f"{d:#_x}",
                                q=f"{q:#_x}", r=f"{r:#_x}"):
author	Jacob Lifshay <programmerjake@gmail.com>
	Mon, 16 Oct 2023 04:06:02 +0000 (21:06 -0700)
committer	Jacob Lifshay <programmerjake@gmail.com>
	Thu, 30 Nov 2023 07:55:28 +0000 (23:55 -0800)