From aa6f10d1f2637c1e1ff4eb6f6811cac0ba60eb7a Mon Sep 17 00:00:00 2001 From: Luke Kenneth Casson Leighton Date: Fri, 22 Dec 2023 09:11:58 +0000 Subject: [PATCH] bug 1155: split out yielding indices into separate function when done as a separate "yielding function" this is closer to the hardware and is a code-morph step to putting that (very same) yielding function behind the SVSHAPE system in ISACaller --- src/openpower/test/bigint/powmod.py | 21 +++++++++------------ 1 file changed, 9 insertions(+), 12 deletions(-) diff --git a/src/openpower/test/bigint/powmod.py b/src/openpower/test/bigint/powmod.py index 13c092e8..6ceb9f7f 100644 --- a/src/openpower/test/bigint/powmod.py +++ b/src/openpower/test/bigint/powmod.py @@ -99,28 +99,25 @@ def python_mul_algorithm(a, b): return y -def python_mul_remap_algorithm(a, b): - # version 2 of the MUL_256_X_256_TO_512_ASM algorithm using base 100 rather - # than 2^64, since that's easier to read. - a_sz, b_sz = len(a), = len(b) - ai, bi, apbi, apbp1 = [], [], [], [] # REMAP indices +def python_mul_remap_yielder(a_sz, b_sz): for ai in range(a_sz): for bi in range(b_sz): - ai.append(ai) - bi.append(bi) - apbi.append(ai + bi) - apbp1.append(ai + bi + 1) + yield ai, bi, ai + bi, ai + bi + 1 +def python_mul_remap_algorithm(a, b): + # version 2 of the MUL_256_X_256_TO_512_ASM algorithm using base 100 rather + # than 2^64, since that's easier to read. + a_sz, b_sz = len(a), len(b) y = [0] * (a_sz + b_sz) ca = 0 - for i in range(a_sz * b_sz): + for ai, bi, apbi, apbp1 in python_mul_remap_yielder(a_sz, b_sz): # no need to clear ca between ai outer loops, since the partial # products can't get big enough to have a carry out, so ca will # always be zero when (i % b_sz == 0). # That said, hardware will probably want to pattern-match this to # remove the unnecessary dependency through ca. - y[apbi[i]], t = maddedu(a[ai[i]], b[bi[i]], y[apbi[i]]) - y[apbp1[i]], ca = adde(y[apbp1[i]], t, ca) + y[apbi], t = maddedu(a[ai], b[bi], y[apbi]) + y[apbp1], ca = adde(y[apbp1], t, ca) return y -- 2.30.2