From: Jacob Lifshay Date: Fri, 22 Dec 2023 05:10:11 +0000 (-0800) Subject: tests/bigint/powmod: initial version of bigint multiply remap X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=a3c1930de7990c8babcb3908ed7650e1d08eafb6;p=openpower-isa.git tests/bigint/powmod: initial version of bigint multiply remap it has some issues around being able to encode scalar RS but vector RT, and doesn't match the scalar * vector multiplication pattern, but is quite compact. --- diff --git a/src/openpower/test/bigint/powmod.py b/src/openpower/test/bigint/powmod.py index 7fc79468..73d87b35 100644 --- a/src/openpower/test/bigint/powmod.py +++ b/src/openpower/test/bigint/powmod.py @@ -99,32 +99,35 @@ def python_mul_algorithm(a, b): return y -def python_mul_algorithm2(a, b): +def python_mul_remap_algorithm(a, b): # version 2 of the MUL_256_X_256_TO_512_ASM algorithm using base 100 rather # than 2^64, since that's easier to read. - # the idea here is that it will "morph" into something more akin to - # using REMAP bigmul (first using REMAP Indexed) - - # create a schedule for use below. the "end of inner loop" marker is 0b01 - iyl = [] - il = [] - for iy in range(4): - for i in range(4): - iyl.append((iy+i, i == 3)) - il.append(i) - for i in range(5): - iyl.append((iy+i, i == 4)) - il.append(i) - - y = [0] * 8 # result y and temp t of same size - t = [0] * 8 # no need after this to set t[4] to zero - for iy in range(4): - for i in range(4): # use t[iy+4] as a 64-bit carry - t[iy+i], t[iy+4] = maddedu(a[iy], b[i], t[iy+4]) - ca = 0 - for i in range(5): # add vec t to y with 1-bit carry - idx = iy + i - y[idx], ca = adde(y[idx], t[idx], ca) + # run this file in a debugger to see all the intermediate values. + a_sz = len(a) + b_sz = len(b) + a_idx = [] + b_idx = [] + a_plus_b_idx = [] + a_plus_b_plus_1_idx = [] + for ai in range(a_sz): + for bi in range(b_sz): + a_idx.append(ai) + b_idx.append(bi) + a_plus_b_idx.append(ai + bi) + a_plus_b_plus_1_idx.append(ai + bi + 1) + + y = [0] * (a_sz + b_sz) + ca = 0 + for i in range(a_sz * b_sz): + # no need to clear ca between ai outer loops, since the partial + # products can't get big enough to have a carry out, so ca will + # always be zero when (i % b_sz == 0). + # That said, hardware will probably want to pattern-match this to + # remove the unnecessary dependency through ca. + y[a_plus_b_idx[i]], t = maddedu( + a[a_idx[i]], b[b_idx[i]], y[a_plus_b_idx[i]]) + y[a_plus_b_plus_1_idx[i]], ca = adde( + y[a_plus_b_plus_1_idx[i]], t, ca) return y @@ -1255,19 +1258,26 @@ if __name__ == "__main__": a = b = (99, 99, 99, 99) expected = [1, 0, 0, 0, 98, 99, 99, 99] assert python_mul_algorithm(a, b) == expected + # check python_mul_remap_algorithm + assert python_mul_remap_algorithm(a, b) == expected - # now test python_mul_algorithm2 *against* python_mul_algorithm + # now test python_mul_remap_algorithm *against* python_mul_algorithm import random random.seed(0) # reproducible values - for i in range(10000): + + def fmt_l(l): + return "[" + ", ".join("%2i" % (i,) for i in l) + "]" + + for i in range(100000): a = [] b = [] for j in range(4): a.append(random.randint(0, 99)) b.append(random.randint(0, 99)) expected = python_mul_algorithm(a, b) - testing = python_mul_algorithm2(a, b) - report = "%+17s * %-17s = %s\n" % (repr(a), repr(b), repr(expected)) - report += " (%s)" % repr(testing) + testing = python_mul_remap_algorithm(a, b) + report = "%s * %s = " % (fmt_l(a), fmt_l(b)) + indent = " " * len(report) + report += "%s\n%s%s" % (fmt_l(expected), indent, fmt_l(testing)) print(report) assert expected == testing