From 694a893efbd4b3aa53e02edcf97a941f49b33e33 Mon Sep 17 00:00:00 2001 From: Jacob Lifshay Date: Mon, 9 Oct 2023 21:05:17 -0700 Subject: [PATCH] add WIP Knuth's algorithm D assembly --- src/openpower/test/bigint/powmod.py | 50 +++++++++++++++++++++++++++++ 1 file changed, 50 insertions(+) diff --git a/src/openpower/test/bigint/powmod.py b/src/openpower/test/bigint/powmod.py index 87d42c33..a9653c54 100644 --- a/src/openpower/test/bigint/powmod.py +++ b/src/openpower/test/bigint/powmod.py @@ -20,6 +20,7 @@ from openpower.test.util import assemble from nmutil.sim_util import hash_256 from openpower.util import log from nmutil.plain_data import plain_data +from cached_property import cached_property MUL_256_X_256_TO_512_ASM = ( @@ -561,6 +562,55 @@ class DivModKnuthAlgorithmD: return q, r + def __asm_iter(self): + if self.word_size != 64: + raise NotImplementedError("only word_size == 64 is implemented") + n_0 = self.regs["n_0"] + d_0 = self.regs["d_0"] + u = self.regs["u"] + m = self.regs["m"] + v = self.regs["v"] + n_scalar = self.regs["n_scalar"] + q = self.regs["q"] + vn = self.regs["vn"] + un = self.regs["un"] + product = self.regs["product"] + r = self.regs["r"] + t_single = self.regs["t_single"] + s_scalar = self.regs["s_scalar"] + t_for_uv_shift = self.regs["t_for_uv_shift"] + n_for_unnorm = self.regs["n_for_unnorm"] + t_for_unnorm = self.regs["t_for_unnorm"] + s_for_unnorm = self.regs["s_for_unnorm"] + qhat = self.regs["qhat"] + rhat = self.regs["rhat"] + t_for_prod = self.regs["t_for_prod"] + num_size = self.num_size + denom_size = self.denom_size + q_size = self.q_size + + yield "divmod_512_by_256:" + # n in n_0 size num_size + # d in d_0 size denom_size + + # switch to names used by Knuth's algorithm D + yield f"setvl 0, 0, {num_size}, 0, 1, 1" # set VL to num_size + yield f"sv.or *{u}, *{n_0}, *{n_0}" # u = n + yield f"addi {m}, 0, {num_size}" # m = len(u) + assert v == d_0, "v and d_0 must be in the same regs" # v = d + yield f"addi {n_scalar}, 0, {denom_size}" # n = len(v) + + # allocate outputs/temporaries + yield f"setvl 0, 0, {q_size}, 0, 1, 1" # set VL to q_size + yield f"sv.addi *{q}, 0, 0" # q = [0] * q_size + + raise NotImplementedError("FIXME: finish") + yield "bclr 20, 0, 0 # blr" + + @cached_property + def asm(self): + return tuple(self.__asm_iter()) + POWMOD_256_ASM = ( # base is in r4-7, exp is in r8-11, mod is in r32-35 -- 2.30.2