From: Jacob Lifshay Date: Thu, 14 Sep 2023 06:24:12 +0000 (-0700) Subject: add SVP64 256x256->512-bit multiply X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=e044403f4c243b3248df0872a661756b6cc8a984;p=openpower-isa.git add SVP64 256x256->512-bit multiply --- diff --git a/src/openpower/decoder/isa/test_caller_svp64_powmod.py b/src/openpower/decoder/isa/test_caller_svp64_powmod.py new file mode 100644 index 00000000..d76d6b46 --- /dev/null +++ b/src/openpower/decoder/isa/test_caller_svp64_powmod.py @@ -0,0 +1,27 @@ +""" modular exponentiation (`pow(x, y, z)`) tests + +related bugs: + + * https://bugs.libre-soc.org/show_bug.cgi?id=1044 +""" + +import unittest + +from openpower.test.bigint.powmod import PowModCases +from openpower.test.runner import TestRunnerBase + +# writing the test_caller invocation this way makes it work with pytest + + +class TestPowMod(TestRunnerBase): + def __init__(self, test): + assert test == 'test' + super().__init__(PowModCases().test_data) + + def test(self): + # dummy function to make unittest try to test this class + pass + + +if __name__ == "__main__": + unittest.main() diff --git a/src/openpower/test/bigint/powmod.py b/src/openpower/test/bigint/powmod.py new file mode 100644 index 00000000..801302c2 --- /dev/null +++ b/src/openpower/test/bigint/powmod.py @@ -0,0 +1,124 @@ +""" modular exponentiation (`pow(x, y, z)`) + +related bugs: + + * https://bugs.libre-soc.org/show_bug.cgi?id=1044 +""" + +from openpower.test.common import TestAccumulatorBase, skip_case +from openpower.test.state import ExpectedState +from openpower.test.util import assemble +from nmutil.sim_util import hash_256 + + +MUL_256_X_256_TO_512_ASM = [ + "mul_256_to_512:", + # a is in r4-7, b is in r8-11 + "setvl 0, 0, 8, 0, 1, 1", # set VL to 8 + "sv.or *12, *4, *4", # move args to r12-19 + # a is now in r12-15, b is in r16-19 + "sv.addi *4, 0, 0", # clear output + "setvl 0, 0, 4, 0, 1, 1", # set VL to 4 + "sv.maddedu *4, *12, 16, 8", # first partial-product a * b[0] + "addi 24, 0, 0", + "sv.maddedu *20, *12, 17, 24", # second partial-product a * b[1] + "addc 5, 5, 20", + "sv.adde *6, *6, *21", + "addi 24, 0, 0", + "sv.maddedu *20, *12, 18, 24", # third partial-product a * b[2] + "addc 6, 6, 20", + "sv.adde *7, *7, *21", + "addi 24, 0, 0", + "sv.maddedu *20, *12, 19, 24", # final partial-product a * b[3] + "addc 7, 7, 20", + "sv.adde *8, *8, *21", + "bclr 20, 0, 0 # blr", +] + + +def _python_mul_algorithm(a, b): + # version of the MUL_256_X_256_TO_512_ASM algorithm using base 100 rather + # than 2^64, since that's easier to read. + # run this file in a debugger to see all the intermediate values. + def maddedu(a, b, c): + y = a * b + c + return y % 100, y // 100 + + def adde(a, b, c): + y = a + b + c + return y % 100, y // 100 + + def addc(a, b): + y = a + b + return y % 100, y // 100 + + y = [0] * 8 + t = [0] * 5 + for i in range(4): + y[i], y[4] = maddedu(a[0], b[i], y[4]) + t[4] = 0 + for i in range(4): + t[i], t[4] = maddedu(a[1], b[i], t[4]) + y[1], ca = addc(y[1], t[0]) + for i in range(4): + y[2 + i], ca = adde(y[2 + i], t[1 + i], ca) + t[4] = 0 + for i in range(4): + t[i], t[4] = maddedu(a[2], b[i], t[4]) + y[2], ca = addc(y[2], t[0]) + for i in range(4): + y[3 + i], ca = adde(y[3 + i], t[1 + i], ca) + t[4] = 0 + for i in range(4): + t[i], t[4] = maddedu(a[3], b[i], t[4]) + y[3], ca = addc(y[3], t[0]) + for i in range(4): + y[4 + i], ca = adde(y[4 + i], t[1 + i], ca) + return y + + +class PowModCases(TestAccumulatorBase): + def call_case(self, instructions, expected, initial_regs, src_loc_at=0): + stop_at_pc = 0x10000000 + sprs = {8: stop_at_pc} + expected.pc = stop_at_pc + expected.sprs['LR'] = None + self.add_case(assemble(instructions), + initial_regs, initial_sprs=sprs, + stop_at_pc=stop_at_pc, expected=expected, + src_loc_at=src_loc_at + 1) + + def case_mul_256_x_256_to_512(self): + for i in range(10): + a = hash_256(f"mul256 input a {i}") + b = hash_256(f"mul256 input b {i}") + if i == 0: + # use known values: + a = b = 2**256 - 1 + elif i == 1: + # use known values: + a = b = (2**256 - 1) // 0xFF + y = a * b + with self.subTest(a=f"{a:#_x}", b=f"{b:#_x}", y=f"{y:#_x}"): + # registers start filled with junk + initial_regs = [0xABCDEF] * 128 + for i in range(4): + # write a in LE order to regs 4-7 + initial_regs[4 + i] = (a >> (64 * i)) % 2**64 + # write b in LE order to regs 8-11 + initial_regs[8 + i] = (b >> (64 * i)) % 2**64 + # only check regs up to r11 since that's where the output is + e = ExpectedState(int_regs=initial_regs[:12]) + for i in range(8): + # write y in LE order to regs 4-11 + e.intregs[4 + i] = (y >> (64 * i)) % 2**64 + + self.call_case(MUL_256_X_256_TO_512_ASM, e, initial_regs) + + # TODO: add 512x256-bit divrem + # TODO: add 256-bit modular exponentiation + + +if __name__ == "__main__": + a = b = 99, 99, 99, 99 + assert _python_mul_algorithm(a, b) == [1, 0, 0, 0, 98, 99, 99, 99]