From 94e09de41d1bacc26c06a8dd59f549e18cb70a98 Mon Sep 17 00:00:00 2001 From: Jacob Lifshay Date: Sun, 15 Oct 2023 21:06:02 -0700 Subject: [PATCH] divmod: asm version of Knuth's algorithm D works! --- src/openpower/test/bigint/powmod.py | 44 +++++++++++++++-------------- 1 file changed, 23 insertions(+), 21 deletions(-) diff --git a/src/openpower/test/bigint/powmod.py b/src/openpower/test/bigint/powmod.py index 76cfb5b4..3ba5045e 100644 --- a/src/openpower/test/bigint/powmod.py +++ b/src/openpower/test/bigint/powmod.py @@ -615,11 +615,15 @@ class DivModKnuthAlgorithmD: un[j + i] = t % 2 ** self.word_size t = int(t >= 2 ** self.word_size) do_log(locals(), "add back: step") - un[j + n] += t - do_log(locals(), "add back: un[j + n] += t") + index = j + n + do_log(locals(), "add back: index = j + n", index="index") + un[index] += t + do_log(locals(), "add back: un[index] += t", index=None) - q[j] = qhat - do_log(locals(), "q[j] = qhat", index=None) + index = j + do_log(locals(), "assign q: index = j", index="index") + q[index] = qhat + do_log(locals(), "q[index] = qhat", index=None) # Step D8: un-normalize @@ -817,6 +821,7 @@ class DivModKnuthAlgorithmD: yield f"setvl 0, 0, {un_size}, 0, 1, 1" # VL = un_size assert index == 3, "index must be r3" yield f"sv.divmod2du/m=1<> 16 offset_field = svshape.fsi['offset'] @@ -910,7 +915,8 @@ class DivModKnuthAlgorithmD: # or in all the other bits if svshape_high != 0: yield f"oris 0, 0, {svshape_high}" - yield f"ori 0, 0, {svshape_low}" + if svshape_low != 0: + yield f"ori 0, 0, {svshape_low}" yield f"mtspr {SVSHAPE0}, 0 # mtspr SVSHAPE0, 0" yield f"svremap 0o12, 0, 0, 0, 0, 0, 0" # enable SVSHAPE0 for RB & RT # un[j:] -= product @@ -921,7 +927,7 @@ class DivModKnuthAlgorithmD: yield f"mcrxrx 0" # move CA to CR0.eq # if need_fixup: - yield "bc 4, 2, divmod_skip_fixup # bne divmod_skip_fixup" + yield "bc 12, 2, divmod_skip_fixup # beq divmod_skip_fixup" # Step D6: add back @@ -931,15 +937,18 @@ class DivModKnuthAlgorithmD: yield f"svremap 0o11, 0, 0, 0, 0, 0, 0" # enable SVSHAPE0 for RA & RT # un[j:] += vn yield f"sv.adde *{un}, *{un}, *{vn}" - yield f"svremap 0o11, 0, 0, 0, 0, 0, 0" # enable SVSHAPE0 for RA & RT - # un[j + n] += t - yield f"sv.addze *{un}, *{un}" + yield f"add {index}, {j}, {n_scalar}" # index = j + n + # un[index] += t + yield f"setvl 0, 0, {un_size}, 0, 1, 1" # VL = un_size + assert index == 3, "index must be r3" + yield f"sv.addze/m=1<= 2 ** 64 - if n << 64 < n: - skip = False - if skip: - # FIXME: only part of the algorithm works, - # so we skip the cases that we know fail - continue q, r = divmod(n, d) with self.subTest(n=f"{n:#_x}", d=f"{d:#_x}", q=f"{q:#_x}", r=f"{r:#_x}"): -- 2.30.2