From: Jacob Lifshay Date: Wed, 16 Nov 2022 05:03:47 +0000 (-0800) Subject: TOOM-2 256x256->512-bit [un]signed*[un]signed mul works! X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=a369418056bf51137af0fc6bdcfc0799697df583;p=bigint-presentation-code.git TOOM-2 256x256->512-bit [un]signed*[un]signed mul works! --- diff --git a/src/bigint_presentation_code/_tests/test_toom_cook.py b/src/bigint_presentation_code/_tests/test_toom_cook.py index 2032cbc..76b9a5e 100644 --- a/src/bigint_presentation_code/_tests/test_toom_cook.py +++ b/src/bigint_presentation_code/_tests/test_toom_cook.py @@ -1,5 +1,6 @@ +from contextlib import contextmanager import unittest -from typing import Callable +from typing import Any, Callable, ContextManager, Iterator, Tuple from bigint_presentation_code.compiler_ir import (GPR_SIZE_IN_BITS, GPR_SIZE_IN_BYTES, @@ -8,19 +9,33 @@ from bigint_presentation_code.compiler_ir import (GPR_SIZE_IN_BITS, PostRASimState, PreRASimState, SSAVal) from bigint_presentation_code.register_allocator import allocate_registers -from bigint_presentation_code.toom_cook import (ToomCookInstance, simple_mul, - toom_cook_mul) +from bigint_presentation_code.toom_cook import (ToomCookInstance, ToomCookMul, + simple_mul) + +_StateFactory = Callable[[], ContextManager[BaseSimState]] def simple_umul(fn, lhs, rhs): - # type: (Fn, SSAVal, SSAVal) -> SSAVal + # type: (Fn, SSAVal, SSAVal) -> tuple[SSAVal, None] return simple_mul(fn=fn, lhs=lhs, lhs_signed=False, rhs=rhs, - rhs_signed=False, name="mul") + rhs_signed=False, name="mul"), None + + +def get_pre_ra_state_factory(code): + # type: (Mul) -> _StateFactory + @contextmanager + def state_factory(): + state = PreRASimState(ssa_vals={}, memory={}) + with state.set_as_current_debugging_state(): + yield state + return state_factory class Mul: + _MulFn = Callable[[Fn, SSAVal, SSAVal], Tuple[SSAVal, Any]] + def __init__(self, mul, lhs_size_in_words, rhs_size_in_words): - # type: (Callable[[Fn, SSAVal, SSAVal], SSAVal], int, int) -> None + # type: (_MulFn, int, int) -> None super().__init__() self.fn = fn = Fn() self.dest_offset = 0 @@ -34,31 +49,45 @@ class Mul: self.rhs_offset = self.lhs_size_in_bytes + self.lhs_offset self.ptr_in = fn.append_new_op(kind=OpKind.FuncArgR3, name="ptr_in").outputs[0] - lhs_setvl = fn.append_new_op( + self.lhs_setvl = fn.append_new_op( kind=OpKind.SetVLI, immediates=[lhs_size_in_words], maxvl=lhs_size_in_words, name="lhs_setvl") - load_lhs = fn.append_new_op( + self.load_lhs = fn.append_new_op( kind=OpKind.SvLd, immediates=[self.lhs_offset], - input_vals=[self.ptr_in, lhs_setvl.outputs[0]], + input_vals=[self.ptr_in, self.lhs_setvl.outputs[0]], name="load_lhs", maxvl=lhs_size_in_words) - rhs_setvl = fn.append_new_op( + self.rhs_setvl = fn.append_new_op( kind=OpKind.SetVLI, immediates=[rhs_size_in_words], maxvl=rhs_size_in_words, name="rhs_setvl") - load_rhs = fn.append_new_op( + self.load_rhs = fn.append_new_op( kind=OpKind.SvLd, immediates=[self.rhs_offset], - input_vals=[self.ptr_in, rhs_setvl.outputs[0]], - name="load_rhs", maxvl=3) - retval = mul(fn, load_lhs.outputs[0], load_rhs.outputs[0]) - dest_setvl = fn.append_new_op( + input_vals=[self.ptr_in, self.rhs_setvl.outputs[0]], + name="load_rhs", maxvl=rhs_size_in_words) + self.retval = mul( + fn, self.load_lhs.outputs[0], self.load_rhs.outputs[0]) + self.dest_setvl = fn.append_new_op( kind=OpKind.SetVLI, immediates=[self.dest_size_in_words], maxvl=self.dest_size_in_words, name="dest_setvl") - fn.append_new_op( + self.store = fn.append_new_op( kind=OpKind.SvStd, - input_vals=[retval, self.ptr_in, dest_setvl.outputs[0]], + input_vals=[self.retval[0], self.ptr_in, + self.dest_setvl.outputs[0]], immediates=[self.dest_offset], maxvl=self.dest_size_in_words, name="store_dest") +def get_post_ra_state_factory(code): + # type: (Mul) -> _StateFactory + ssa_val_to_loc_map = allocate_registers(code.fn) + + @contextmanager + def state_factory(): + yield PostRASimState( + ssa_val_to_loc_map=ssa_val_to_loc_map, + memory={}, loc_values={}) + return state_factory + + class TestToomCook(unittest.TestCase): maxDiff = None @@ -228,32 +257,23 @@ class TestToomCook(unittest.TestCase): ) def test_simple_mul_192x192_pre_ra_sim(self): - def get_state_factory(code): - # type: (Mul) -> Callable[[], BaseSimState] - return lambda: PreRASimState(ssa_vals={}, memory={}) for lhs_signed in False, True: for rhs_signed in False, True: self.tst_simple_mul_192x192_sim( lhs_signed=lhs_signed, rhs_signed=rhs_signed, - get_state_factory=get_state_factory) + get_state_factory=get_pre_ra_state_factory) def test_simple_mul_192x192_post_ra_sim(self): - def get_state_factory(code): - # type: (Mul) -> Callable[[], BaseSimState] - ssa_val_to_loc_map = allocate_registers(code.fn) - return lambda: PostRASimState( - ssa_val_to_loc_map=ssa_val_to_loc_map, - memory={}, loc_values={}) for lhs_signed in False, True: for rhs_signed in False, True: self.tst_simple_mul_192x192_sim( lhs_signed=lhs_signed, rhs_signed=rhs_signed, - get_state_factory=get_state_factory) + get_state_factory=get_post_ra_state_factory) def tst_simple_mul_192x192_sim( self, lhs_signed, # type: bool rhs_signed, # type: bool - get_state_factory, # type: Callable[[Mul], Callable[[], BaseSimState]] + get_state_factory, # type: Callable[[Mul], _StateFactory] ): # test multiplying: # 0x000191acb262e15b_4c6b5f2b19e1a53e_821a2342132c5b57 @@ -270,9 +290,9 @@ class TestToomCook(unittest.TestCase): b"arbitrary 192x192->384-bit multiplication test", 'little') self.assertEqual(lhs_value * rhs_value, prod_value) code = Mul( - mul=lambda fn, lhs, rhs: simple_mul( + mul=lambda fn, lhs, rhs: (simple_mul( fn=fn, lhs=lhs, lhs_signed=lhs_signed, - rhs=rhs, rhs_signed=rhs_signed, name="mul"), + rhs=rhs, rhs_signed=rhs_signed, name="mul"), None), lhs_size_in_words=3, rhs_size_in_words=3) state_factory = get_state_factory(code) ptr_in = 0x100 @@ -288,29 +308,29 @@ class TestToomCook(unittest.TestCase): with self.subTest(lhs_signed=lhs_signed, rhs_signed=rhs_signed, lhs_neg=lhs_neg, rhs_neg=rhs_neg): - state = state_factory() - state[code.ptr_in] = ptr_in, - lhs = lhs_value - if lhs_neg: - lhs = 2 ** 192 - lhs - rhs = rhs_value - if rhs_neg: - rhs = 2 ** 192 - rhs - for i in range(3): - v = (lhs >> GPR_SIZE_IN_BITS * i) & GPR_VALUE_MASK - state.store(lhs_ptr + i * GPR_SIZE_IN_BYTES, v) - for i in range(3): - v = (rhs >> GPR_SIZE_IN_BITS * i) & GPR_VALUE_MASK - state.store(rhs_ptr + i * GPR_SIZE_IN_BYTES, v) - code.fn.sim(state) - expected = prod_value - if lhs_neg != rhs_neg: - expected = 2 ** 384 - expected - prod = 0 - for i in range(6): - v = state.load(dest_ptr + GPR_SIZE_IN_BYTES * i) - prod += v << (GPR_SIZE_IN_BITS * i) - self.assertEqual(hex(prod), hex(expected)) + with state_factory() as state: + state[code.ptr_in] = ptr_in, + lhs = lhs_value + if lhs_neg: + lhs = 2 ** 192 - lhs + rhs = rhs_value + if rhs_neg: + rhs = 2 ** 192 - rhs + for i in range(3): + v = (lhs >> GPR_SIZE_IN_BITS * i) & GPR_VALUE_MASK + state.store(lhs_ptr + i * GPR_SIZE_IN_BYTES, v) + for i in range(3): + v = (rhs >> GPR_SIZE_IN_BITS * i) & GPR_VALUE_MASK + state.store(rhs_ptr + i * GPR_SIZE_IN_BYTES, v) + code.fn.sim(state) + expected = prod_value + if lhs_neg != rhs_neg: + expected = 2 ** 384 - expected + prod = 0 + for i in range(6): + v = state.load(dest_ptr + GPR_SIZE_IN_BYTES * i) + prod += v << (GPR_SIZE_IN_BITS * i) + self.assertEqual(hex(prod), hex(expected)) def test_simple_mul_192x192_ops(self): code = Mul(mul=simple_umul, lhs_size_in_words=3, rhs_size_in_words=3) @@ -990,389 +1010,239 @@ class TestToomCook(unittest.TestCase): 'sv.std *4, 0(3)' ]) - def toom_2_mul_256x256(self): + def toom_2_mul_256x256(self, lhs_signed, rhs_signed): + # type: (bool, bool) -> Mul TOOM_2 = ToomCookInstance.make_toom_2() - instances = TOOM_2, TOOM_2 + instances = TOOM_2, def mul(fn, lhs, rhs): - # type: (Fn, SSAVal, SSAVal) -> SSAVal - return toom_cook_mul(fn=fn, lhs=lhs, lhs_signed=False, rhs=rhs, - rhs_signed=False, instances=instances) - return Mul(mul=mul, lhs_size_in_words=3, rhs_size_in_words=3) + # type: (Fn, SSAVal, SSAVal) -> tuple[SSAVal, ToomCookMul] + v = ToomCookMul(fn=fn, lhs=lhs, lhs_signed=lhs_signed, rhs=rhs, + rhs_signed=rhs_signed, instances=instances) + return v.retval, v + return Mul(mul=mul, lhs_size_in_words=4, rhs_size_in_words=4) + + def make_256x256_mul_test_cases(self, lhs_signed, rhs_signed): + # type: (bool, bool) -> Iterator[tuple[int, int, int]] + # test multiplying `+-1 << n` and: + # 0xc162321a5eaad80b_4b86bb0efdfb93c0_a789ff04cc11b157_eaa08e29fb197621 + # * + # 0x3138710167583371_998af336a8fac64d_e6da3737090787fe_85ba09ea701f4af2 + # == + # int("0x" + # "252e6e6f69746163_696c7069746c754d_" + # "2061627573746172_614b202d20322d4d_" + # "4f4f5420676e6973_75206c756d20746e_" + # "6967696220746962_2d36353278363532", base=0) + # == int.from_bytes(b'256x256-bit bigint mul using TOOM-2 ' + # b'- Karatsuba Multiplication.%', 'little') + lhs_value_in = (0xc162321a5eaad80b_4b86bb0efdfb93c0 << 128) \ + | 0xa789ff04cc11b157_eaa08e29fb197621 + rhs_value_in = (0x3138710167583371_998af336a8fac64d << 128) \ + | 0xe6da3737090787fe_85ba09ea701f4af2 + prod_value_in = int.from_bytes( + b'256x256-bit bigint mul using TOOM-2 ' + b'- Karatsuba Multiplication.%', 'little') + self.assertEqual(lhs_value_in * rhs_value_in, prod_value_in) + shifts = [*range(0, 256, 16), *range(15, 256, 16)] + lhs_values = [1 << i for i in shifts] + [0, lhs_value_in] + rhs_values = [1 << i for i in shifts] + [0, rhs_value_in] + if lhs_signed: + lhs_values.extend([-i for i in lhs_values]) + if rhs_signed: + rhs_values.extend([-i for i in rhs_values]) + + def key(v): + # type: (int) -> tuple[bool, int] + return abs(v) in (lhs_value_in, rhs_value_in), v % (1 << 256) + + lhs_values.sort(key=key) + rhs_values.sort(key=key) + for lhs_value in lhs_values: + for rhs_value in rhs_values: + lhs_value %= 1 << 256 + rhs_value %= 1 << 256 + if lhs_value >> 255 != 0 and lhs_signed: + lhs_value -= 1 << 256 + if rhs_value >> 255 != 0 and rhs_signed: + rhs_value -= 1 << 256 + prod_value = lhs_value * rhs_value + lhs_value %= 1 << 256 + rhs_value %= 1 << 256 + prod_value %= 1 << 512 + yield lhs_value, rhs_value, prod_value + + def tst_toom_2_mul_256x256_sim( + self, lhs_signed, # type: bool + rhs_signed, # type: bool + get_state_factory, # type: Callable[[Mul], _StateFactory] + ): + code = self.toom_2_mul_256x256( + lhs_signed=lhs_signed, rhs_signed=rhs_signed) + print(code.retval[1]) + print(code.fn.ops_to_str()) + state_factory = get_state_factory(code) + ptr_in = 0x100 + dest_ptr = ptr_in + code.dest_offset + lhs_ptr = ptr_in + code.lhs_offset + rhs_ptr = ptr_in + code.rhs_offset + values = self.make_256x256_mul_test_cases( + lhs_signed=lhs_signed, rhs_signed=rhs_signed) + for lhs_value, rhs_value, prod_value in values: + with self.subTest(lhs_signed=lhs_signed, rhs_signed=rhs_signed, + lhs_value=hex(lhs_value), + rhs_value=hex(rhs_value), + prod_value=hex(prod_value)): + with state_factory() as state: + state[code.ptr_in] = ptr_in, + for i in range(4): + v = lhs_value >> GPR_SIZE_IN_BITS * i + v &= GPR_VALUE_MASK + state.store(lhs_ptr + i * GPR_SIZE_IN_BYTES, v) + for i in range(4): + v = rhs_value >> GPR_SIZE_IN_BITS * i + v &= GPR_VALUE_MASK + state.store(rhs_ptr + i * GPR_SIZE_IN_BYTES, v) + code.fn.sim(state) + prod = 0 + for i in range(8): + v = state.load(dest_ptr + GPR_SIZE_IN_BYTES * i) + prod += v << (GPR_SIZE_IN_BITS * i) + self.assertEqual(hex(prod), hex(prod_value), + f"failed: state={state}") def test_toom_2_mul_256x256_pre_ra_sim(self): - self.skipTest("WIP") # FIXME: finish - # maybe use something that multiplies to: - # int.from_bytes( - # b'256x256-bit bigint mul using TOOM-2 -- Karatsuba Multiplication!', - # 'little') - # as the multiplication test... - # known factors (used yafu-1.34): - # P1 = 2 - # P1 = 7 - # P3 = 197 - # P7 = 1319057 - # ***co-factor*** - # C144 = 4812983706140089583461601472550901888754775658675461119771495\ - # 11062521614062442465071845504357495554525178667728633744424201288485\ - # 594266060663587 + for lhs_signed in False, True: + for rhs_signed in False, True: + self.tst_toom_2_mul_256x256_sim( + lhs_signed=lhs_signed, rhs_signed=rhs_signed, + get_state_factory=get_pre_ra_state_factory) + + def test_toom_2_mul_256x256_uu_post_ra_sim(self): + self.tst_toom_2_mul_256x256_sim( + lhs_signed=False, rhs_signed=False, + get_state_factory=get_post_ra_state_factory) + + def test_toom_2_mul_256x256_su_post_ra_sim(self): + self.tst_toom_2_mul_256x256_sim( + lhs_signed=True, rhs_signed=False, + get_state_factory=get_post_ra_state_factory) + + def test_toom_2_mul_256x256_us_post_ra_sim(self): + self.tst_toom_2_mul_256x256_sim( + lhs_signed=False, rhs_signed=True, + get_state_factory=get_post_ra_state_factory) + + def test_toom_2_mul_256x256_ss_post_ra_sim(self): + self.tst_toom_2_mul_256x256_sim( + lhs_signed=True, rhs_signed=True, + get_state_factory=get_post_ra_state_factory) def test_toom_2_mul_256x256_asm(self): - self.skipTest("WIP") # FIXME: finish - code = self.toom_2_mul_256x256() + code = self.toom_2_mul_256x256(lhs_signed=False, rhs_signed=False) fn = code.fn assigned_registers = allocate_registers(fn) gen_asm_state = GenAsmState(assigned_registers) fn.gen_asm(gen_asm_state) self.assertEqual(gen_asm_state.output, [ - 'or 23, 3, 3', - 'setvl 0, 0, 3, 0, 1, 1', - 'or 6, 23, 23', - 'setvl 0, 0, 3, 0, 1, 1', - 'sv.ld *3, 48(6)', - 'setvl 0, 0, 3, 0, 1, 1', - 'sv.or *7, *3, *3', - 'setvl 0, 0, 3, 0, 1, 1', - 'or 6, 23, 23', - 'setvl 0, 0, 3, 0, 1, 1', - 'sv.ld *3, 72(6)', - 'setvl 0, 0, 3, 0, 1, 1', - 'sv.or *16, *3, *3', - 'setvl 0, 0, 3, 0, 1, 1', - 'setvl 0, 0, 3, 0, 1, 1', - 'sv.or *3, *7, *7', - 'setvl 0, 0, 3, 0, 1, 1', - 'sv.or *6, *3, *3', - 'or 5, 6, 6', - 'or 4, 7, 7', - 'or 3, 8, 8', - 'setvl 0, 0, 1, 0, 1, 1', - 'or 3, 5, 5', - 'setvl 0, 0, 1, 0, 1, 1', - 'or 10, 3, 3', - 'setvl 0, 0, 1, 0, 1, 1', - 'or 3, 4, 4', - 'setvl 0, 0, 1, 0, 1, 1', - 'or 25, 3, 3', - 'setvl 0, 0, 1, 0, 1, 1', - 'or 3, 10, 10', - 'setvl 0, 0, 1, 0, 1, 1', - 'or 4, 3, 3', - 'addi 3, 0, 0', - 'or 5, 3, 3', - 'setvl 0, 0, 2, 0, 1, 1', - 'or 3, 4, 4', - 'or 4, 5, 5', - 'setvl 0, 0, 2, 0, 1, 1', - 'setvl 0, 0, 2, 0, 1, 1', - 'sv.or *6, *3, *3', - 'setvl 0, 0, 1, 0, 1, 1', - 'or 3, 25, 25', - 'setvl 0, 0, 1, 0, 1, 1', - 'or 4, 3, 3', - 'addi 3, 0, 0', - 'or 5, 3, 3', - 'setvl 0, 0, 2, 0, 1, 1', - 'or 3, 4, 4', - 'or 4, 5, 5', - 'setvl 0, 0, 2, 0, 1, 1', - 'setvl 0, 0, 2, 0, 1, 1', - 'setvl 0, 0, 2, 0, 1, 1', - 'addic 0, 0, 0', - 'setvl 0, 0, 2, 0, 1, 1', - 'sv.or/mrr *7, *6, *6', - 'setvl 0, 0, 2, 0, 1, 1', - 'sv.or *5, *3, *3', - 'setvl 0, 0, 2, 0, 1, 1', - 'sv.adde *3, *7, *5', - 'setvl 0, 0, 2, 0, 1, 1', + 'or 42, 3, 3', + 'setvl 0, 0, 4, 0, 1, 1', + 'or 7, 42, 42', + 'setvl 0, 0, 4, 0, 1, 1', + 'sv.ld *3, 64(7)', + 'setvl 0, 0, 4, 0, 1, 1', + 'sv.or *8, *3, *3', + 'setvl 0, 0, 4, 0, 1, 1', + 'or 7, 42, 42', + 'setvl 0, 0, 4, 0, 1, 1', + 'sv.ld *3, 96(7)', + 'setvl 0, 0, 4, 0, 1, 1', 'sv.or *14, *3, *3', - 'setvl 0, 0, 3, 0, 1, 1', - 'setvl 0, 0, 3, 0, 1, 1', - 'sv.or *3, *16, *16', - 'setvl 0, 0, 3, 0, 1, 1', - 'sv.or *6, *3, *3', - 'or 5, 6, 6', - 'or 4, 7, 7', - 'or 3, 8, 8', - 'setvl 0, 0, 1, 0, 1, 1', - 'or 3, 5, 5', - 'setvl 0, 0, 1, 0, 1, 1', - 'or 9, 3, 3', - 'setvl 0, 0, 1, 0, 1, 1', - 'or 3, 4, 4', - 'setvl 0, 0, 1, 0, 1, 1', - 'or 24, 3, 3', - 'setvl 0, 0, 1, 0, 1, 1', + 'setvl 0, 0, 4, 0, 1, 1', + 'setvl 0, 0, 4, 0, 1, 1', + 'sv.or *3, *8, *8', + 'setvl 0, 0, 4, 0, 1, 1', + 'sv.or *9, *3, *3', 'or 3, 9, 9', - 'setvl 0, 0, 1, 0, 1, 1', - 'or 4, 3, 3', - 'addi 3, 0, 0', - 'or 5, 3, 3', - 'setvl 0, 0, 2, 0, 1, 1', - 'or 3, 4, 4', - 'or 4, 5, 5', - 'setvl 0, 0, 2, 0, 1, 1', - 'setvl 0, 0, 2, 0, 1, 1', - 'sv.or *6, *3, *3', - 'setvl 0, 0, 1, 0, 1, 1', - 'or 3, 24, 24', - 'setvl 0, 0, 1, 0, 1, 1', - 'or 4, 3, 3', - 'addi 3, 0, 0', - 'or 5, 3, 3', + 'or 5, 10, 10', + 'or 8, 11, 11', + 'or 7, 12, 12', 'setvl 0, 0, 2, 0, 1, 1', - 'or 3, 4, 4', 'or 4, 5, 5', 'setvl 0, 0, 2, 0, 1, 1', 'setvl 0, 0, 2, 0, 1, 1', - 'setvl 0, 0, 2, 0, 1, 1', - 'addic 0, 0, 0', - 'setvl 0, 0, 2, 0, 1, 1', - 'sv.or/mrr *7, *6, *6', - 'setvl 0, 0, 2, 0, 1, 1', 'sv.or *5, *3, *3', 'setvl 0, 0, 2, 0, 1, 1', - 'sv.adde *3, *7, *5', - 'setvl 0, 0, 2, 0, 1, 1', - 'sv.or *11, *3, *3', - 'setvl 0, 0, 1, 0, 1, 1', - 'or 3, 9, 9', - 'setvl 0, 0, 1, 0, 1, 1', - 'or 5, 3, 3', - 'addi 3, 0, 0', - 'or 4, 3, 3', - 'setvl 0, 0, 1, 0, 1, 1', - 'addi 3, 0, 0', - 'or 6, 10, 10', - 'setvl 0, 0, 1, 0, 1, 1', - 'sv.maddedu *3, *6, 5, 4', - 'or 5, 4, 4', - 'setvl 0, 0, 1, 0, 1, 1', - 'setvl 0, 0, 2, 0, 1, 1', - 'or 4, 5, 5', - 'setvl 0, 0, 2, 0, 1, 1', - 'setvl 0, 0, 2, 0, 1, 1', - 'sv.or *21, *3, *3', - 'setvl 0, 0, 2, 0, 1, 1', - 'setvl 0, 0, 2, 0, 1, 1', - 'sv.or *3, *14, *14', - 'setvl 0, 0, 2, 0, 1, 1', - 'sv.or/mrr *4, *3, *3', - 'or 3, 4, 4', - 'or 4, 5, 5', - 'setvl 0, 0, 1, 0, 1, 1', - 'setvl 0, 0, 1, 0, 1, 1', - 'or 10, 3, 3', - 'setvl 0, 0, 1, 0, 1, 1', - 'or 3, 4, 4', - 'setvl 0, 0, 1, 0, 1, 1', - 'or 18, 3, 3', - 'setvl 0, 0, 1, 0, 1, 1', - 'or 3, 10, 10', - 'setvl 0, 0, 1, 0, 1, 1', - 'or 4, 3, 3', - 'addi 3, 0, 0', - 'or 5, 3, 3', - 'setvl 0, 0, 2, 0, 1, 1', - 'or 3, 4, 4', - 'or 4, 5, 5', - 'setvl 0, 0, 2, 0, 1, 1', - 'setvl 0, 0, 2, 0, 1, 1', - 'sv.or *6, *3, *3', - 'setvl 0, 0, 1, 0, 1, 1', - 'or 3, 18, 18', - 'setvl 0, 0, 1, 0, 1, 1', - 'or 4, 3, 3', - 'addi 3, 0, 0', - 'or 5, 3, 3', - 'setvl 0, 0, 2, 0, 1, 1', - 'or 3, 4, 4', - 'or 4, 5, 5', + 'or 3, 8, 8', + 'or 4, 7, 7', 'setvl 0, 0, 2, 0, 1, 1', 'setvl 0, 0, 2, 0, 1, 1', + 'sv.or *8, *3, *3', 'setvl 0, 0, 2, 0, 1, 1', - 'addic 0, 0, 0', 'setvl 0, 0, 2, 0, 1, 1', - 'sv.or/mrr *7, *6, *6', + 'sv.or *3, *5, *5', 'setvl 0, 0, 2, 0, 1, 1', 'sv.or *5, *3, *3', - 'setvl 0, 0, 2, 0, 1, 1', - 'sv.adde *3, *7, *5', - 'setvl 0, 0, 2, 0, 1, 1', - 'sv.or *16, *3, *3', - 'setvl 0, 0, 2, 0, 1, 1', - 'setvl 0, 0, 2, 0, 1, 1', - 'sv.or *3, *11, *11', - 'setvl 0, 0, 2, 0, 1, 1', - 'sv.or/mrr *4, *3, *3', - 'or 3, 4, 4', - 'or 4, 5, 5', - 'setvl 0, 0, 1, 0, 1, 1', - 'setvl 0, 0, 1, 0, 1, 1', - 'or 9, 3, 3', - 'setvl 0, 0, 1, 0, 1, 1', - 'or 3, 4, 4', - 'setvl 0, 0, 1, 0, 1, 1', - 'or 15, 3, 3', - 'setvl 0, 0, 1, 0, 1, 1', - 'or 3, 9, 9', - 'setvl 0, 0, 1, 0, 1, 1', - 'or 4, 3, 3', - 'addi 3, 0, 0', - 'or 5, 3, 3', - 'setvl 0, 0, 2, 0, 1, 1', - 'or 3, 4, 4', 'or 4, 5, 5', - 'setvl 0, 0, 2, 0, 1, 1', - 'setvl 0, 0, 2, 0, 1, 1', - 'sv.or *6, *3, *3', - 'setvl 0, 0, 1, 0, 1, 1', - 'or 3, 15, 15', - 'setvl 0, 0, 1, 0, 1, 1', - 'or 4, 3, 3', + 'or 7, 6, 6', 'addi 3, 0, 0', - 'or 5, 3, 3', - 'setvl 0, 0, 2, 0, 1, 1', + 'or 6, 3, 3', + 'setvl 0, 0, 3, 0, 1, 1', 'or 3, 4, 4', - 'or 4, 5, 5', - 'setvl 0, 0, 2, 0, 1, 1', - 'setvl 0, 0, 2, 0, 1, 1', - 'setvl 0, 0, 2, 0, 1, 1', - 'addic 0, 0, 0', - 'setvl 0, 0, 2, 0, 1, 1', - 'sv.or/mrr *7, *6, *6', - 'setvl 0, 0, 2, 0, 1, 1', - 'sv.or *5, *3, *3', - 'setvl 0, 0, 2, 0, 1, 1', - 'sv.adde *3, *7, *5', - 'setvl 0, 0, 2, 0, 1, 1', - 'sv.or *7, *3, *3', - 'setvl 0, 0, 1, 0, 1, 1', - 'or 3, 9, 9', - 'setvl 0, 0, 1, 0, 1, 1', - 'or 5, 3, 3', - 'addi 3, 0, 0', - 'or 4, 3, 3', - 'setvl 0, 0, 1, 0, 1, 1', - 'addi 3, 0, 0', - 'or 6, 10, 10', - 'setvl 0, 0, 1, 0, 1, 1', - 'sv.maddedu *3, *6, 5, 4', - 'or 5, 4, 4', - 'setvl 0, 0, 1, 0, 1, 1', - 'setvl 0, 0, 2, 0, 1, 1', - 'or 4, 5, 5', - 'setvl 0, 0, 2, 0, 1, 1', - 'setvl 0, 0, 2, 0, 1, 1', - 'sv.or *19, *3, *3', + 'or 4, 7, 7', + 'or 5, 6, 6', + 'setvl 0, 0, 3, 0, 1, 1', + 'setvl 0, 0, 3, 0, 1, 1', + 'sv.or *24, *3, *3', 'setvl 0, 0, 2, 0, 1, 1', 'setvl 0, 0, 2, 0, 1, 1', - 'sv.or *3, *7, *7', + 'sv.or *3, *8, *8', 'setvl 0, 0, 2, 0, 1, 1', 'sv.or *5, *3, *3', 'or 4, 5, 5', - 'or 11, 6, 6', - 'addi 3, 0, 0', - 'or 10, 3, 3', - 'setvl 0, 0, 2, 0, 1, 1', - 'addi 3, 0, 0', - 'setvl 0, 0, 2, 0, 1, 1', - 'sv.or *8, *16, *16', - 'or 6, 4, 4', - 'or 5, 10, 10', - 'setvl 0, 0, 2, 0, 1, 1', - 'sv.maddedu *3, *8, 6, 5', - 'setvl 0, 0, 2, 0, 1, 1', - 'or 14, 5, 5', - 'setvl 0, 0, 2, 0, 1, 1', - 'setvl 0, 0, 2, 0, 1, 1', - 'or 12, 3, 3', - 'or 7, 4, 4', - 'setvl 0, 0, 2, 0, 1, 1', - 'sv.or *8, *16, *16', - 'or 6, 11, 11', - 'or 5, 10, 10', - 'setvl 0, 0, 2, 0, 1, 1', - 'sv.maddedu *3, *8, 6, 5', - 'setvl 0, 0, 2, 0, 1, 1', - 'or 11, 5, 5', - 'setvl 0, 0, 2, 0, 1, 1', - 'setvl 0, 0, 2, 0, 1, 1', - 'or 10, 3, 3', - 'or 9, 4, 4', + 'or 7, 6, 6', 'addi 3, 0, 0', 'or 6, 3, 3', 'setvl 0, 0, 3, 0, 1, 1', - 'or 3, 7, 7', - 'or 4, 14, 14', + 'or 3, 4, 4', + 'or 4, 7, 7', 'or 5, 6, 6', 'setvl 0, 0, 3, 0, 1, 1', 'setvl 0, 0, 3, 0, 1, 1', - 'sv.or *6, *3, *3', - 'or 3, 10, 10', - 'or 4, 9, 9', - 'or 5, 11, 11', - 'setvl 0, 0, 3, 0, 1, 1', + 'sv.or *30, *3, *3', 'setvl 0, 0, 3, 0, 1, 1', 'addic 0, 0, 0', 'setvl 0, 0, 3, 0, 1, 1', - 'sv.or *9, *6, *6', + 'sv.or *9, *24, *24', 'setvl 0, 0, 3, 0, 1, 1', - 'sv.or *6, *3, *3', + 'sv.or *6, *30, *30', 'setvl 0, 0, 3, 0, 1, 1', 'sv.adde *3, *9, *6', 'setvl 0, 0, 3, 0, 1, 1', - 'setvl 0, 0, 3, 0, 1, 1', - 'setvl 0, 0, 3, 0, 1, 1', - 'or 9, 3, 3', - 'or 8, 4, 4', - 'or 7, 5, 5', + 'sv.or *39, *3, *3', 'setvl 0, 0, 4, 0, 1, 1', - 'or 3, 12, 12', - 'or 4, 9, 9', - 'or 5, 8, 8', - 'or 6, 7, 7', 'setvl 0, 0, 4, 0, 1, 1', + 'sv.or *3, *14, *14', 'setvl 0, 0, 4, 0, 1, 1', - 'sv.or *7, *3, *3', - 'setvl 0, 0, 1, 0, 1, 1', - 'or 3, 15, 15', - 'setvl 0, 0, 1, 0, 1, 1', - 'or 5, 3, 3', - 'addi 3, 0, 0', - 'or 4, 3, 3', - 'setvl 0, 0, 1, 0, 1, 1', - 'addi 3, 0, 0', - 'or 6, 18, 18', - 'setvl 0, 0, 1, 0, 1, 1', - 'sv.maddedu *3, *6, 5, 4', - 'or 5, 4, 4', - 'setvl 0, 0, 1, 0, 1, 1', + 'sv.or *9, *3, *3', + 'or 3, 9, 9', + 'or 5, 10, 10', + 'or 8, 11, 11', + 'or 7, 12, 12', 'setvl 0, 0, 2, 0, 1, 1', 'or 4, 5, 5', 'setvl 0, 0, 2, 0, 1, 1', 'setvl 0, 0, 2, 0, 1, 1', - 'sv.or *17, *3, *3', - 'setvl 0, 0, 4, 0, 1, 1', - 'setvl 0, 0, 4, 0, 1, 1', - 'sv.or *3, *7, *7', - 'setvl 0, 0, 4, 0, 1, 1', - 'sv.or *8, *3, *3', - 'or 4, 8, 8', - 'or 7, 9, 9', - 'or 6, 10, 10', - 'or 3, 11, 11', - 'setvl 0, 0, 3, 0, 1, 1', - 'or 3, 4, 4', + 'sv.or *5, *3, *3', + 'setvl 0, 0, 2, 0, 1, 1', + 'or 3, 8, 8', 'or 4, 7, 7', - 'or 5, 6, 6', - 'setvl 0, 0, 3, 0, 1, 1', - 'setvl 0, 0, 3, 0, 1, 1', - 'sv.or *14, *3, *3', 'setvl 0, 0, 2, 0, 1, 1', 'setvl 0, 0, 2, 0, 1, 1', - 'sv.or *3, *19, *19', + 'sv.or *8, *3, *3', + 'setvl 0, 0, 2, 0, 1, 1', + 'setvl 0, 0, 2, 0, 1, 1', + 'sv.or *3, *5, *5', 'setvl 0, 0, 2, 0, 1, 1', 'sv.or *5, *3, *3', 'or 4, 5, 5', @@ -1385,19 +1255,10 @@ class TestToomCook(unittest.TestCase): 'or 5, 6, 6', 'setvl 0, 0, 3, 0, 1, 1', 'setvl 0, 0, 3, 0, 1, 1', - 'setvl 0, 0, 3, 0, 1, 1', - 'subfc 0, 0, 0', - 'setvl 0, 0, 3, 0, 1, 1', - 'sv.or *9, *3, *3', - 'setvl 0, 0, 3, 0, 1, 1', - 'sv.or *6, *14, *14', - 'setvl 0, 0, 3, 0, 1, 1', - 'sv.subfe *3, *9, *6', - 'setvl 0, 0, 3, 0, 1, 1', 'sv.or *14, *3, *3', 'setvl 0, 0, 2, 0, 1, 1', 'setvl 0, 0, 2, 0, 1, 1', - 'sv.or *3, *17, *17', + 'sv.or *3, *8, *8', 'setvl 0, 0, 2, 0, 1, 1', 'sv.or *5, *3, *3', 'or 4, 5, 5', @@ -1410,207 +1271,115 @@ class TestToomCook(unittest.TestCase): 'or 5, 6, 6', 'setvl 0, 0, 3, 0, 1, 1', 'setvl 0, 0, 3, 0, 1, 1', - 'setvl 0, 0, 3, 0, 1, 1', - 'subfc 0, 0, 0', - 'setvl 0, 0, 3, 0, 1, 1', - 'sv.or *9, *3, *3', - 'setvl 0, 0, 3, 0, 1, 1', - 'sv.or *6, *14, *14', - 'setvl 0, 0, 3, 0, 1, 1', - 'sv.subfe *3, *9, *6', - 'setvl 0, 0, 3, 0, 1, 1', - 'sv.or *8, *3, *3', - 'addi 3, 0, 0', - 'setvl 0, 0, 2, 0, 1, 1', - 'setvl 0, 0, 2, 0, 1, 1', - 'sv.or *3, *19, *19', - 'setvl 0, 0, 2, 0, 1, 1', - 'or 12, 3, 3', - 'or 7, 4, 4', - 'setvl 0, 0, 3, 0, 1, 1', - 'setvl 0, 0, 3, 0, 1, 1', - 'sv.or *3, *8, *8', - 'setvl 0, 0, 3, 0, 1, 1', - 'or 11, 3, 3', - 'or 10, 4, 4', - 'or 9, 5, 5', - 'addi 3, 0, 0', - 'or 6, 3, 3', - 'setvl 0, 0, 3, 0, 1, 1', - 'or 3, 7, 7', - 'or 4, 6, 6', - 'or 5, 6, 6', - 'setvl 0, 0, 3, 0, 1, 1', - 'setvl 0, 0, 3, 0, 1, 1', - 'sv.or *6, *3, *3', - 'or 3, 11, 11', - 'or 4, 10, 10', - 'or 5, 9, 9', - 'setvl 0, 0, 3, 0, 1, 1', + 'sv.or *33, *3, *3', 'setvl 0, 0, 3, 0, 1, 1', 'addic 0, 0, 0', 'setvl 0, 0, 3, 0, 1, 1', - 'sv.or *9, *6, *6', + 'sv.or *9, *14, *14', 'setvl 0, 0, 3, 0, 1, 1', - 'sv.or *6, *3, *3', + 'sv.or *6, *33, *33', 'setvl 0, 0, 3, 0, 1, 1', 'sv.adde *3, *9, *6', 'setvl 0, 0, 3, 0, 1, 1', + 'sv.or *36, *3, *3', 'setvl 0, 0, 3, 0, 1, 1', 'setvl 0, 0, 3, 0, 1, 1', - 'or 9, 3, 3', - 'or 6, 4, 4', - 'setvl 0, 0, 2, 0, 1, 1', - 'setvl 0, 0, 2, 0, 1, 1', - 'sv.or *3, *17, *17', - 'setvl 0, 0, 2, 0, 1, 1', - 'or 8, 3, 3', - 'or 7, 4, 4', - 'setvl 0, 0, 2, 0, 1, 1', - 'or 3, 6, 6', - 'or 4, 5, 5', - 'setvl 0, 0, 2, 0, 1, 1', - 'setvl 0, 0, 2, 0, 1, 1', - 'sv.or *5, *3, *3', - 'or 3, 8, 8', - 'or 4, 7, 7', - 'setvl 0, 0, 2, 0, 1, 1', - 'setvl 0, 0, 2, 0, 1, 1', - 'addic 0, 0, 0', - 'setvl 0, 0, 2, 0, 1, 1', - 'sv.or *7, *5, *5', - 'setvl 0, 0, 2, 0, 1, 1', - 'sv.or *5, *3, *3', - 'setvl 0, 0, 2, 0, 1, 1', - 'sv.adde *3, *7, *5', - 'setvl 0, 0, 2, 0, 1, 1', - 'setvl 0, 0, 2, 0, 1, 1', - 'setvl 0, 0, 2, 0, 1, 1', - 'or 8, 3, 3', - 'or 7, 4, 4', - 'setvl 0, 0, 4, 0, 1, 1', - 'or 3, 12, 12', - 'or 4, 9, 9', - 'or 5, 8, 8', - 'or 6, 7, 7', - 'setvl 0, 0, 4, 0, 1, 1', - 'setvl 0, 0, 4, 0, 1, 1', - 'sv.or *7, *3, *3', - 'setvl 0, 0, 1, 0, 1, 1', - 'or 3, 24, 24', - 'setvl 0, 0, 1, 0, 1, 1', - 'or 5, 3, 3', - 'addi 3, 0, 0', - 'or 4, 3, 3', - 'setvl 0, 0, 1, 0, 1, 1', - 'addi 3, 0, 0', - 'or 6, 25, 25', - 'setvl 0, 0, 1, 0, 1, 1', - 'sv.maddedu *3, *6, 5, 4', - 'or 5, 4, 4', - 'setvl 0, 0, 1, 0, 1, 1', - 'setvl 0, 0, 2, 0, 1, 1', - 'or 4, 5, 5', - 'setvl 0, 0, 2, 0, 1, 1', - 'setvl 0, 0, 2, 0, 1, 1', - 'sv.or *19, *3, *3', - 'setvl 0, 0, 4, 0, 1, 1', - 'setvl 0, 0, 4, 0, 1, 1', - 'sv.or *3, *7, *7', - 'setvl 0, 0, 4, 0, 1, 1', - 'sv.or *8, *3, *3', - 'or 4, 8, 8', - 'or 7, 9, 9', - 'or 6, 10, 10', - 'or 3, 11, 11', - 'setvl 0, 0, 3, 0, 1, 1', - 'or 3, 4, 4', - 'or 4, 7, 7', - 'or 5, 6, 6', - 'setvl 0, 0, 3, 0, 1, 1', + 'sv.or *3, *14, *14', 'setvl 0, 0, 3, 0, 1, 1', - 'sv.or *14, *3, *3', - 'setvl 0, 0, 2, 0, 1, 1', - 'setvl 0, 0, 2, 0, 1, 1', - 'sv.or *3, *21, *21', - 'setvl 0, 0, 2, 0, 1, 1', - 'sv.or *5, *3, *3', + 'sv.or/mrr *5, *3, *3', 'or 4, 5, 5', - 'or 7, 6, 6', + 'or 14, 6, 6', + 'or 23, 7, 7', 'addi 3, 0, 0', - 'or 6, 3, 3', - 'setvl 0, 0, 3, 0, 1, 1', - 'or 3, 4, 4', - 'or 4, 7, 7', - 'or 5, 6, 6', - 'setvl 0, 0, 3, 0, 1, 1', - 'setvl 0, 0, 3, 0, 1, 1', + 'or 22, 3, 3', 'setvl 0, 0, 3, 0, 1, 1', - 'subfc 0, 0, 0', + 'addi 3, 0, 0', 'setvl 0, 0, 3, 0, 1, 1', - 'sv.or *9, *3, *3', + 'sv.or *8, *24, *24', + 'or 7, 4, 4', + 'or 6, 22, 22', 'setvl 0, 0, 3, 0, 1, 1', - 'sv.or *6, *14, *14', + 'sv.maddedu *3, *8, 7, 6', 'setvl 0, 0, 3, 0, 1, 1', - 'sv.subfe *3, *9, *6', + 'or 19, 6, 6', 'setvl 0, 0, 3, 0, 1, 1', - 'sv.or *14, *3, *3', - 'setvl 0, 0, 2, 0, 1, 1', - 'setvl 0, 0, 2, 0, 1, 1', - 'sv.or *3, *19, *19', - 'setvl 0, 0, 2, 0, 1, 1', - 'sv.or *5, *3, *3', - 'or 4, 5, 5', - 'or 7, 6, 6', - 'addi 3, 0, 0', - 'or 6, 3, 3', 'setvl 0, 0, 3, 0, 1, 1', - 'or 3, 4, 4', - 'or 4, 7, 7', - 'or 5, 6, 6', + 'or 21, 3, 3', + 'or 12, 4, 4', + 'or 11, 5, 5', 'setvl 0, 0, 3, 0, 1, 1', + 'sv.or *8, *24, *24', + 'or 7, 14, 14', + 'or 6, 22, 22', 'setvl 0, 0, 3, 0, 1, 1', + 'sv.maddedu *3, *8, 7, 6', 'setvl 0, 0, 3, 0, 1, 1', - 'subfc 0, 0, 0', + 'or 18, 6, 6', 'setvl 0, 0, 3, 0, 1, 1', - 'sv.or *9, *3, *3', 'setvl 0, 0, 3, 0, 1, 1', - 'sv.or *6, *14, *14', + 'or 17, 3, 3', + 'or 16, 4, 4', + 'or 15, 5, 5', + 'addi 3, 0, 0', + 'or 8, 3, 3', + 'addi 3, 0, 0', + 'or 14, 3, 3', + 'setvl 0, 0, 5, 0, 1, 1', + 'or 3, 12, 12', + 'or 4, 11, 11', + 'or 5, 19, 19', + 'or 6, 8, 8', + 'or 7, 8, 8', + 'setvl 0, 0, 5, 0, 1, 1', + 'setvl 0, 0, 5, 0, 1, 1', + 'sv.or *8, *3, *3', + 'or 3, 17, 17', + 'or 4, 16, 16', + 'or 5, 15, 15', + 'or 6, 18, 18', + 'or 7, 14, 14', + 'setvl 0, 0, 5, 0, 1, 1', + 'setvl 0, 0, 5, 0, 1, 1', + 'addic 0, 0, 0', + 'setvl 0, 0, 5, 0, 1, 1', + 'sv.or *14, *8, *8', + 'setvl 0, 0, 5, 0, 1, 1', + 'sv.or *8, *3, *3', + 'setvl 0, 0, 5, 0, 1, 1', + 'sv.adde *3, *14, *8', + 'setvl 0, 0, 5, 0, 1, 1', + 'setvl 0, 0, 5, 0, 1, 1', + 'setvl 0, 0, 5, 0, 1, 1', + 'or 20, 3, 3', + 'or 19, 4, 4', + 'or 18, 5, 5', + 'or 17, 6, 6', + 'or 16, 7, 7', 'setvl 0, 0, 3, 0, 1, 1', - 'sv.subfe *3, *9, *6', + 'sv.or *8, *24, *24', + 'or 7, 23, 23', + 'or 6, 22, 22', 'setvl 0, 0, 3, 0, 1, 1', - 'sv.or *7, *3, *3', - 'addi 3, 0, 0', - 'setvl 0, 0, 2, 0, 1, 1', - 'setvl 0, 0, 2, 0, 1, 1', - 'sv.or *3, *21, *21', - 'setvl 0, 0, 2, 0, 1, 1', - 'or 18, 3, 3', - 'or 6, 4, 4', + 'sv.maddedu *3, *8, 7, 6', 'setvl 0, 0, 3, 0, 1, 1', + 'or 15, 6, 6', 'setvl 0, 0, 3, 0, 1, 1', - 'sv.or *3, *7, *7', 'setvl 0, 0, 3, 0, 1, 1', - 'or 15, 3, 3', - 'or 14, 4, 4', - 'or 12, 5, 5', - 'addi 3, 0, 0', - 'or 7, 3, 3', - 'or 3, 12, 12', - 'sradi 3, 3, 63', - 'or 11, 3, 3', + 'or 14, 3, 3', + 'or 12, 4, 4', + 'or 11, 5, 5', 'setvl 0, 0, 4, 0, 1, 1', - 'or 3, 6, 6', - 'or 4, 7, 7', - 'or 5, 7, 7', - 'or 6, 7, 7', + 'or 3, 19, 19', + 'or 4, 18, 18', + 'or 5, 17, 17', + 'or 6, 16, 16', 'setvl 0, 0, 4, 0, 1, 1', 'setvl 0, 0, 4, 0, 1, 1', 'sv.or *7, *3, *3', - 'or 3, 15, 15', - 'or 4, 14, 14', - 'or 5, 12, 12', - 'or 6, 11, 11', + 'or 3, 14, 14', + 'or 4, 12, 12', + 'or 5, 11, 11', + 'or 6, 15, 15', 'setvl 0, 0, 4, 0, 1, 1', 'setvl 0, 0, 4, 0, 1, 1', 'addic 0, 0, 0', @@ -1623,60 +1392,458 @@ class TestToomCook(unittest.TestCase): 'setvl 0, 0, 4, 0, 1, 1', 'setvl 0, 0, 4, 0, 1, 1', 'setvl 0, 0, 4, 0, 1, 1', - 'sv.or *8, *3, *3', - 'or 14, 8, 8', - 'or 5, 9, 9', - 'or 7, 10, 10', + 'or 12, 3, 3', + 'or 11, 4, 4', + 'or 10, 5, 5', + 'or 9, 6, 6', + 'setvl 0, 0, 6, 0, 1, 1', + 'or 3, 21, 21', + 'or 4, 20, 20', + 'or 5, 12, 12', 'or 6, 11, 11', - 'setvl 0, 0, 2, 0, 1, 1', - 'setvl 0, 0, 2, 0, 1, 1', - 'sv.or *3, *19, *19', - 'setvl 0, 0, 2, 0, 1, 1', - 'or 11, 3, 3', - 'or 10, 4, 4', + 'or 7, 10, 10', + 'or 8, 9, 9', + 'setvl 0, 0, 6, 0, 1, 1', + 'setvl 0, 0, 6, 0, 1, 1', + 'sv.or *24, *3, *3', + 'setvl 0, 0, 3, 0, 1, 1', + 'setvl 0, 0, 3, 0, 1, 1', + 'sv.or *3, *36, *36', + 'setvl 0, 0, 3, 0, 1, 1', + 'sv.or/mrr *5, *3, *3', + 'or 4, 5, 5', + 'or 14, 6, 6', + 'or 23, 7, 7', 'addi 3, 0, 0', - 'or 9, 3, 3', + 'or 22, 3, 3', 'setvl 0, 0, 3, 0, 1, 1', - 'or 3, 5, 5', - 'or 4, 7, 7', - 'or 5, 6, 6', + 'addi 3, 0, 0', 'setvl 0, 0, 3, 0, 1, 1', + 'sv.or *8, *39, *39', + 'or 7, 4, 4', + 'or 6, 22, 22', 'setvl 0, 0, 3, 0, 1, 1', - 'sv.or *6, *3, *3', - 'or 3, 11, 11', - 'or 4, 10, 10', - 'or 5, 9, 9', + 'sv.maddedu *3, *8, 7, 6', 'setvl 0, 0, 3, 0, 1, 1', + 'or 19, 6, 6', 'setvl 0, 0, 3, 0, 1, 1', - 'addic 0, 0, 0', 'setvl 0, 0, 3, 0, 1, 1', - 'sv.or *9, *6, *6', + 'or 21, 3, 3', + 'or 12, 4, 4', + 'or 11, 5, 5', 'setvl 0, 0, 3, 0, 1, 1', - 'sv.or *6, *3, *3', + 'sv.or *8, *39, *39', + 'or 7, 14, 14', + 'or 6, 22, 22', 'setvl 0, 0, 3, 0, 1, 1', - 'sv.adde *3, *9, *6', + 'sv.maddedu *3, *8, 7, 6', 'setvl 0, 0, 3, 0, 1, 1', + 'or 18, 6, 6', 'setvl 0, 0, 3, 0, 1, 1', 'setvl 0, 0, 3, 0, 1, 1', - 'or 12, 3, 3', - 'or 11, 4, 4', - 'or 10, 5, 5', + 'or 17, 3, 3', + 'or 16, 4, 4', + 'or 15, 5, 5', 'addi 3, 0, 0', - 'or 9, 3, 3', - 'setvl 0, 0, 6, 0, 1, 1', - 'or 3, 18, 18', - 'or 4, 14, 14', - 'or 5, 12, 12', - 'or 6, 11, 11', - 'or 7, 10, 10', + 'or 8, 3, 3', + 'addi 3, 0, 0', + 'or 14, 3, 3', + 'setvl 0, 0, 5, 0, 1, 1', + 'or 3, 12, 12', + 'or 4, 11, 11', + 'or 5, 19, 19', + 'or 6, 8, 8', + 'or 7, 8, 8', + 'setvl 0, 0, 5, 0, 1, 1', + 'setvl 0, 0, 5, 0, 1, 1', + 'sv.or *8, *3, *3', + 'or 3, 17, 17', + 'or 4, 16, 16', + 'or 5, 15, 15', + 'or 6, 18, 18', + 'or 7, 14, 14', + 'setvl 0, 0, 5, 0, 1, 1', + 'setvl 0, 0, 5, 0, 1, 1', + 'addic 0, 0, 0', + 'setvl 0, 0, 5, 0, 1, 1', + 'sv.or *14, *8, *8', + 'setvl 0, 0, 5, 0, 1, 1', + 'sv.or *8, *3, *3', + 'setvl 0, 0, 5, 0, 1, 1', + 'sv.adde *3, *14, *8', + 'setvl 0, 0, 5, 0, 1, 1', + 'setvl 0, 0, 5, 0, 1, 1', + 'setvl 0, 0, 5, 0, 1, 1', + 'or 20, 3, 3', + 'or 19, 4, 4', + 'or 18, 5, 5', + 'or 17, 6, 6', + 'or 16, 7, 7', + 'setvl 0, 0, 3, 0, 1, 1', + 'sv.or *8, *39, *39', + 'or 7, 23, 23', + 'or 6, 22, 22', + 'setvl 0, 0, 3, 0, 1, 1', + 'sv.maddedu *3, *8, 7, 6', + 'setvl 0, 0, 3, 0, 1, 1', + 'or 15, 6, 6', + 'setvl 0, 0, 3, 0, 1, 1', + 'setvl 0, 0, 3, 0, 1, 1', + 'or 14, 3, 3', + 'or 12, 4, 4', + 'or 11, 5, 5', + 'setvl 0, 0, 4, 0, 1, 1', + 'or 3, 19, 19', + 'or 4, 18, 18', + 'or 5, 17, 17', + 'or 6, 16, 16', + 'setvl 0, 0, 4, 0, 1, 1', + 'setvl 0, 0, 4, 0, 1, 1', + 'sv.or *7, *3, *3', + 'or 3, 14, 14', + 'or 4, 12, 12', + 'or 5, 11, 11', + 'or 6, 15, 15', + 'setvl 0, 0, 4, 0, 1, 1', + 'setvl 0, 0, 4, 0, 1, 1', + 'addic 0, 0, 0', + 'setvl 0, 0, 4, 0, 1, 1', + 'sv.or *14, *7, *7', + 'setvl 0, 0, 4, 0, 1, 1', + 'sv.or *7, *3, *3', + 'setvl 0, 0, 4, 0, 1, 1', + 'sv.adde *3, *14, *7', + 'setvl 0, 0, 4, 0, 1, 1', + 'setvl 0, 0, 4, 0, 1, 1', + 'setvl 0, 0, 4, 0, 1, 1', + 'or 12, 3, 3', + 'or 11, 4, 4', + 'or 10, 5, 5', + 'or 9, 6, 6', + 'setvl 0, 0, 6, 0, 1, 1', + 'or 3, 21, 21', + 'or 4, 20, 20', + 'or 5, 12, 12', + 'or 6, 11, 11', + 'or 7, 10, 10', 'or 8, 9, 9', 'setvl 0, 0, 6, 0, 1, 1', 'setvl 0, 0, 6, 0, 1, 1', + 'sv.or *36, *3, *3', + 'setvl 0, 0, 3, 0, 1, 1', + 'setvl 0, 0, 3, 0, 1, 1', + 'sv.or *3, *33, *33', + 'setvl 0, 0, 3, 0, 1, 1', + 'sv.or/mrr *5, *3, *3', + 'or 4, 5, 5', + 'or 14, 6, 6', + 'or 23, 7, 7', + 'addi 3, 0, 0', + 'or 22, 3, 3', + 'setvl 0, 0, 3, 0, 1, 1', + 'addi 3, 0, 0', + 'setvl 0, 0, 3, 0, 1, 1', + 'sv.or *8, *30, *30', + 'or 7, 4, 4', + 'or 6, 22, 22', + 'setvl 0, 0, 3, 0, 1, 1', + 'sv.maddedu *3, *8, 7, 6', + 'setvl 0, 0, 3, 0, 1, 1', + 'or 19, 6, 6', + 'setvl 0, 0, 3, 0, 1, 1', + 'setvl 0, 0, 3, 0, 1, 1', + 'or 21, 3, 3', + 'or 12, 4, 4', + 'or 11, 5, 5', + 'setvl 0, 0, 3, 0, 1, 1', + 'sv.or *8, *30, *30', + 'or 7, 14, 14', + 'or 6, 22, 22', + 'setvl 0, 0, 3, 0, 1, 1', + 'sv.maddedu *3, *8, 7, 6', + 'setvl 0, 0, 3, 0, 1, 1', + 'or 18, 6, 6', + 'setvl 0, 0, 3, 0, 1, 1', + 'setvl 0, 0, 3, 0, 1, 1', + 'or 17, 3, 3', + 'or 16, 4, 4', + 'or 15, 5, 5', + 'addi 3, 0, 0', + 'or 8, 3, 3', + 'addi 3, 0, 0', + 'or 14, 3, 3', + 'setvl 0, 0, 5, 0, 1, 1', + 'or 3, 12, 12', + 'or 4, 11, 11', + 'or 5, 19, 19', + 'or 6, 8, 8', + 'or 7, 8, 8', + 'setvl 0, 0, 5, 0, 1, 1', + 'setvl 0, 0, 5, 0, 1, 1', + 'sv.or *8, *3, *3', + 'or 3, 17, 17', + 'or 4, 16, 16', + 'or 5, 15, 15', + 'or 6, 18, 18', + 'or 7, 14, 14', + 'setvl 0, 0, 5, 0, 1, 1', + 'setvl 0, 0, 5, 0, 1, 1', + 'addic 0, 0, 0', + 'setvl 0, 0, 5, 0, 1, 1', + 'sv.or *14, *8, *8', + 'setvl 0, 0, 5, 0, 1, 1', + 'sv.or *8, *3, *3', + 'setvl 0, 0, 5, 0, 1, 1', + 'sv.adde *3, *14, *8', + 'setvl 0, 0, 5, 0, 1, 1', + 'setvl 0, 0, 5, 0, 1, 1', + 'setvl 0, 0, 5, 0, 1, 1', + 'or 20, 3, 3', + 'or 19, 4, 4', + 'or 18, 5, 5', + 'or 17, 6, 6', + 'or 16, 7, 7', + 'setvl 0, 0, 3, 0, 1, 1', + 'sv.or *8, *30, *30', + 'or 7, 23, 23', + 'or 6, 22, 22', + 'setvl 0, 0, 3, 0, 1, 1', + 'sv.maddedu *3, *8, 7, 6', + 'setvl 0, 0, 3, 0, 1, 1', + 'or 15, 6, 6', + 'setvl 0, 0, 3, 0, 1, 1', + 'setvl 0, 0, 3, 0, 1, 1', + 'or 14, 3, 3', + 'or 12, 4, 4', + 'or 11, 5, 5', + 'setvl 0, 0, 4, 0, 1, 1', + 'or 3, 19, 19', + 'or 4, 18, 18', + 'or 5, 17, 17', + 'or 6, 16, 16', + 'setvl 0, 0, 4, 0, 1, 1', + 'setvl 0, 0, 4, 0, 1, 1', + 'sv.or *7, *3, *3', + 'or 3, 14, 14', + 'or 4, 12, 12', + 'or 5, 11, 11', + 'or 6, 15, 15', + 'setvl 0, 0, 4, 0, 1, 1', + 'setvl 0, 0, 4, 0, 1, 1', + 'addic 0, 0, 0', + 'setvl 0, 0, 4, 0, 1, 1', + 'sv.or *14, *7, *7', + 'setvl 0, 0, 4, 0, 1, 1', + 'sv.or *7, *3, *3', + 'setvl 0, 0, 4, 0, 1, 1', + 'sv.adde *3, *14, *7', + 'setvl 0, 0, 4, 0, 1, 1', + 'setvl 0, 0, 4, 0, 1, 1', + 'setvl 0, 0, 4, 0, 1, 1', + 'or 12, 3, 3', + 'or 11, 4, 4', + 'or 10, 5, 5', + 'or 9, 6, 6', 'setvl 0, 0, 6, 0, 1, 1', + 'or 3, 21, 21', + 'or 4, 20, 20', + 'or 5, 12, 12', + 'or 6, 11, 11', + 'or 7, 10, 10', + 'or 8, 9, 9', 'setvl 0, 0, 6, 0, 1, 1', - 'sv.or/mrr *4, *3, *3', - 'or 3, 23, 23', 'setvl 0, 0, 6, 0, 1, 1', + 'sv.or *30, *3, *3', + 'setvl 0, 0, 6, 0, 1, 1', + 'setvl 0, 0, 6, 0, 1, 1', + 'sv.or *3, *24, *24', + 'setvl 0, 0, 6, 0, 1, 1', + 'sv.or *14, *3, *3', + 'or 4, 14, 14', + 'or 11, 15, 15', + 'or 10, 16, 16', + 'or 9, 17, 17', + 'or 8, 18, 18', + 'or 3, 19, 19', + 'setvl 0, 0, 5, 0, 1, 1', + 'or 3, 4, 4', + 'or 4, 11, 11', + 'or 5, 10, 10', + 'or 6, 9, 9', + 'or 7, 8, 8', + 'setvl 0, 0, 5, 0, 1, 1', + 'setvl 0, 0, 5, 0, 1, 1', + 'sv.or *25, *3, *3', + 'setvl 0, 0, 6, 0, 1, 1', + 'setvl 0, 0, 6, 0, 1, 1', + 'sv.or *3, *36, *36', + 'setvl 0, 0, 6, 0, 1, 1', + 'sv.or *14, *3, *3', + 'or 4, 14, 14', + 'or 11, 15, 15', + 'or 10, 16, 16', + 'or 9, 17, 17', + 'or 8, 18, 18', + 'or 3, 19, 19', + 'setvl 0, 0, 5, 0, 1, 1', + 'or 3, 4, 4', + 'or 4, 11, 11', + 'or 5, 10, 10', + 'or 6, 9, 9', + 'or 7, 8, 8', + 'setvl 0, 0, 5, 0, 1, 1', + 'setvl 0, 0, 5, 0, 1, 1', + 'setvl 0, 0, 5, 0, 1, 1', + 'subfc 0, 0, 0', + 'setvl 0, 0, 5, 0, 1, 1', + 'sv.or *14, *25, *25', + 'setvl 0, 0, 5, 0, 1, 1', + 'sv.or *8, *3, *3', + 'setvl 0, 0, 5, 0, 1, 1', + 'sv.subfe *3, *14, *8', + 'setvl 0, 0, 5, 0, 1, 1', + 'sv.or *20, *3, *3', + 'setvl 0, 0, 6, 0, 1, 1', + 'setvl 0, 0, 6, 0, 1, 1', + 'sv.or *3, *30, *30', + 'setvl 0, 0, 6, 0, 1, 1', + 'sv.or *14, *3, *3', + 'or 4, 14, 14', + 'or 11, 15, 15', + 'or 10, 16, 16', + 'or 9, 17, 17', + 'or 8, 18, 18', + 'or 3, 19, 19', + 'setvl 0, 0, 5, 0, 1, 1', + 'or 3, 4, 4', + 'or 4, 11, 11', + 'or 5, 10, 10', + 'or 6, 9, 9', + 'or 7, 8, 8', + 'setvl 0, 0, 5, 0, 1, 1', + 'setvl 0, 0, 5, 0, 1, 1', + 'sv.or *30, *3, *3', + 'setvl 0, 0, 5, 0, 1, 1', + 'subfc 0, 0, 0', + 'setvl 0, 0, 5, 0, 1, 1', + 'sv.or *14, *30, *30', + 'setvl 0, 0, 5, 0, 1, 1', + 'sv.or *8, *20, *20', + 'setvl 0, 0, 5, 0, 1, 1', + 'sv.subfe *3, *14, *8', + 'setvl 0, 0, 5, 0, 1, 1', + 'sv.or *16, *3, *3', + 'setvl 0, 0, 5, 0, 1, 1', + 'setvl 0, 0, 5, 0, 1, 1', + 'sv.or *3, *25, *25', + 'setvl 0, 0, 5, 0, 1, 1', + 'or 29, 3, 3', + 'or 28, 4, 4', + 'or 8, 5, 5', + 'or 15, 6, 6', + 'or 14, 7, 7', + 'setvl 0, 0, 5, 0, 1, 1', + 'setvl 0, 0, 5, 0, 1, 1', + 'sv.or *3, *16, *16', + 'setvl 0, 0, 5, 0, 1, 1', + 'or 24, 3, 3', + 'or 23, 4, 4', + 'or 22, 5, 5', + 'or 21, 6, 6', + 'or 20, 7, 7', + 'setvl 0, 0, 5, 0, 1, 1', + 'setvl 0, 0, 5, 0, 1, 1', + 'sv.or *3, *30, *30', + 'setvl 0, 0, 5, 0, 1, 1', + 'or 27, 3, 3', + 'or 26, 4, 4', + 'or 12, 5, 5', + 'or 11, 6, 6', + 'or 3, 7, 7', + 'addi 3, 0, 0', + 'addi 3, 0, 0', + 'or 10, 3, 3', + 'or 3, 20, 20', + 'sradi 3, 3, 63', + 'or 9, 3, 3', + 'setvl 0, 0, 6, 0, 1, 1', + 'or 3, 8, 8', + 'or 4, 15, 15', + 'or 5, 14, 14', + 'or 6, 10, 10', + 'or 7, 10, 10', + 'or 8, 10, 10', + 'setvl 0, 0, 6, 0, 1, 1', + 'setvl 0, 0, 6, 0, 1, 1', + 'sv.or *14, *3, *3', + 'or 3, 24, 24', + 'or 4, 23, 23', + 'or 5, 22, 22', + 'or 6, 21, 21', + 'or 7, 20, 20', + 'or 8, 9, 9', + 'setvl 0, 0, 6, 0, 1, 1', + 'setvl 0, 0, 6, 0, 1, 1', + 'addic 0, 0, 0', + 'setvl 0, 0, 6, 0, 1, 1', + 'sv.or *20, *14, *14', + 'setvl 0, 0, 6, 0, 1, 1', + 'sv.or *14, *3, *3', + 'setvl 0, 0, 6, 0, 1, 1', + 'sv.adde *3, *20, *14', + 'setvl 0, 0, 6, 0, 1, 1', + 'setvl 0, 0, 6, 0, 1, 1', + 'setvl 0, 0, 6, 0, 1, 1', + 'sv.or *20, *3, *3', + 'or 19, 20, 20', + 'or 18, 21, 21', + 'or 3, 22, 22', + 'or 9, 23, 23', + 'or 8, 24, 24', + 'or 7, 25, 25', + 'setvl 0, 0, 4, 0, 1, 1', + 'or 4, 9, 9', + 'or 5, 8, 8', + 'or 6, 7, 7', + 'setvl 0, 0, 4, 0, 1, 1', + 'setvl 0, 0, 4, 0, 1, 1', + 'sv.or *7, *3, *3', + 'or 3, 27, 27', + 'or 4, 26, 26', + 'or 5, 12, 12', + 'or 6, 11, 11', + 'setvl 0, 0, 4, 0, 1, 1', + 'setvl 0, 0, 4, 0, 1, 1', + 'addic 0, 0, 0', + 'setvl 0, 0, 4, 0, 1, 1', + 'sv.or *14, *7, *7', + 'setvl 0, 0, 4, 0, 1, 1', + 'sv.or *7, *3, *3', + 'setvl 0, 0, 4, 0, 1, 1', + 'sv.adde *3, *14, *7', + 'setvl 0, 0, 4, 0, 1, 1', + 'setvl 0, 0, 4, 0, 1, 1', + 'setvl 0, 0, 4, 0, 1, 1', + 'or 15, 3, 3', + 'or 14, 4, 4', + 'or 12, 5, 5', + 'or 11, 6, 6', + 'setvl 0, 0, 8, 0, 1, 1', + 'or 3, 29, 29', + 'or 4, 28, 28', + 'or 5, 19, 19', + 'or 6, 18, 18', + 'or 7, 15, 15', + 'or 8, 14, 14', + 'or 9, 12, 12', + 'or 10, 11, 11', + 'setvl 0, 0, 8, 0, 1, 1', + 'setvl 0, 0, 8, 0, 1, 1', + 'setvl 0, 0, 8, 0, 1, 1', + 'setvl 0, 0, 8, 0, 1, 1', + 'sv.or/mrr *4, *3, *3', + 'or 3, 42, 42', + 'setvl 0, 0, 8, 0, 1, 1', 'sv.std *4, 0(3)' ]) diff --git a/src/bigint_presentation_code/compiler_ir.py b/src/bigint_presentation_code/compiler_ir.py index 61a5ece..69f8d8e 100644 --- a/src/bigint_presentation_code/compiler_ir.py +++ b/src/bigint_presentation_code/compiler_ir.py @@ -1,3 +1,4 @@ +from contextlib import contextmanager import enum from abc import ABCMeta, abstractmethod from enum import Enum, unique @@ -1794,6 +1795,19 @@ class SSAVal(SSAValOrUse): # type: () -> OpStage return self.defining_descriptor.write_stage + @property + def current_debugging_value(self): + # type: () -> tuple[int, ...] + """ get the current value for debugging in pdb or similar. + + This is intended for use with + `PreRASimState.set_current_debugging_state`. + + This is only intended for debugging, do not use in unit tests or + production code. + """ + return PreRASimState.get_current_debugging_state()[self] + @plain_data(frozen=True, unsafe_hash=True, repr=False) @final @@ -2222,6 +2236,36 @@ class PreRASimState(BaseSimState): raise ValueError("value has wrong len") self.ssa_vals[ssa_val] = value + __CURRENT_DEBUGGING_STATE = [] # type: list[PreRASimState] + + @contextmanager + def set_as_current_debugging_state(self): + """ return a context manager that sets self as the current state for + debugging in pdb or similar. This is intended only for use with + `get_current_debugging_state` which should not be used in unit tests + or production code. + """ + try: + PreRASimState.__CURRENT_DEBUGGING_STATE.append(self) + yield + finally: + assert self is PreRASimState.__CURRENT_DEBUGGING_STATE.pop(), \ + "inconsistent __CURRENT_DEBUGGING_STATE" + + @staticmethod + def get_current_debugging_state(): + # type: () -> PreRASimState + """ get the current state for debugging in pdb or similar. + + This is intended for use with `set_current_debugging_state`. + + This is only intended for debugging, do not use in unit tests or + production code. + """ + if len(PreRASimState.__CURRENT_DEBUGGING_STATE) == 0: + raise ValueError("no current debugging state") + return PreRASimState.__CURRENT_DEBUGGING_STATE[-1] + @plain_data(frozen=True, repr=False) @final @@ -2245,7 +2289,8 @@ class PostRASimState(BaseSimState): def _loc_values__repr(self): # type: () -> str - locs = sorted(self.loc_values.keys(), key=lambda v: (v.kind, v.start)) + locs = sorted(self.loc_values.keys(), + key=lambda v: (v.kind.name, v.start)) items = [] # type: list[str] for loc in locs: items.append(f"{loc}: 0x{self.loc_values[loc]:x}") diff --git a/src/bigint_presentation_code/toom_cook.py b/src/bigint_presentation_code/toom_cook.py index 75891f2..a7c5450 100644 --- a/src/bigint_presentation_code/toom_cook.py +++ b/src/bigint_presentation_code/toom_cook.py @@ -250,6 +250,19 @@ class EvalOpGenIrOutput: # type: () -> int return self.value_range.output_size + @property + def current_debugging_value(self): + # type: () -> tuple[int, ...] + """ get the current value for debugging in pdb or similar. + + This is intended for use with + `PreRASimState.set_current_debugging_state`. + + This is only intended for debugging, do not use in unit tests or + production code. + """ + return self.output.current_debugging_value + @plain_data(frozen=True, unsafe_hash=True) @final @@ -284,6 +297,19 @@ class EvalOpGenIrInput: if self.min_value > self.max_value: raise ValueError("invalid value range") + @property + def current_debugging_value(self): + # type: () -> tuple[int, ...] + """ get the current value for debugging in pdb or similar. + + This is intended for use with + `PreRASimState.set_current_debugging_state`. + + This is only intended for debugging, do not use in unit tests or + production code. + """ + return self.ssa_val.current_debugging_value + @plain_data(frozen=True) @final @@ -901,7 +927,7 @@ def cast_to_size_spread(fn, ssa_vals, src_signed, dest_size, name): def split_into_exact_sized_parts(fn, ssa_val, part_count, part_size, name): - # type: (Fn, SSAVal, int, int, str) -> list[SSAVal] + # type: (Fn, SSAVal, int, int, str) -> tuple[SSAVal, ...] """split ssa_val into part_count parts, where all but the last part have `part.ty.reg_len == part_size`. """ @@ -910,7 +936,7 @@ def split_into_exact_sized_parts(fn, ssa_val, part_count, part_size, name): if part_count <= 0: raise ValueError("invalid part count, must be positive") if part_count == 1: - return [ssa_val] + return (ssa_val,) too_short_reg_len = (part_count - 1) * part_size if ssa_val.ty.reg_len <= too_short_reg_len: raise ValueError(f"ssa_val is too short to split, must have " @@ -934,88 +960,127 @@ def split_into_exact_sized_parts(fn, ssa_val, part_count, part_size, name): input_vals=[*spread.outputs[start:stop], part_setvl.outputs[0]], name=f"{name}_{part}_concat", maxvl=part_maxvl) retval.append(concat.outputs[0]) - return retval + return tuple(retval) -__TCIs = Tuple[ToomCookInstance, ...] +_TCIs = Tuple[ToomCookInstance, ...] + + +@plain_data(frozen=True) +@final +class ToomCookMul: + __slots__ = ( + "fn", "lhs", "lhs_signed", "rhs", "rhs_signed", "instances", + "retval_size", "start_instance_index", "instance", "part_size", + "lhs_parts", "lhs_inputs", "lhs_eval_state", "lhs_outputs", + "rhs_parts", "rhs_inputs", "rhs_eval_state", "rhs_outputs", + "prod_inputs", "prod_eval_state", "prod_parts", + "partial_products", "retval", + ) + + def __init__(self, fn, lhs, lhs_signed, rhs, rhs_signed, instances, + retval_size=None, start_instance_index=0): + # type: (Fn, SSAVal, bool, SSAVal, bool, _TCIs, None | int, int) -> None + self.fn = fn + self.lhs = lhs + self.lhs_signed = lhs_signed + self.rhs = rhs + self.rhs_signed = rhs_signed + self.instances = instances + if retval_size is None: + retval_size = lhs.ty.reg_len + rhs.ty.reg_len + self.retval_size = retval_size + if start_instance_index < 0: + raise ValueError("start_instance_index must be non-negative") + self.start_instance_index = start_instance_index + self.instance = None + self.part_size = 0 # type: int + while start_instance_index < len(instances): + self.instance = instances[start_instance_index] + self.part_size = max( + lhs.ty.reg_len // self.instance.lhs_part_count, + rhs.ty.reg_len // self.instance.rhs_part_count) + if self.part_size <= 0: + self.instance = None + start_instance_index += 1 + else: + break + if self.instance is None: + self.retval = simple_mul(fn=fn, + lhs=lhs, lhs_signed=lhs_signed, + rhs=rhs, rhs_signed=rhs_signed, + name="toom_cook_base_case") + return + self.lhs_parts = split_into_exact_sized_parts( + fn=fn, ssa_val=lhs, part_count=self.instance.lhs_part_count, + part_size=self.part_size, name="lhs") + self.lhs_inputs = [] # type: list[EvalOpGenIrInput] + for part, ssa_val in enumerate(self.lhs_parts): + self.lhs_inputs.append(EvalOpGenIrInput( + ssa_val=ssa_val, + is_signed=lhs_signed and part == len(self.lhs_parts) - 1)) + self.lhs_eval_state = EvalOpGenIrState(fn=fn, inputs=self.lhs_inputs) + lhs_eval_ops = self.instance.lhs_eval_ops + self.lhs_outputs = [ + self.lhs_eval_state.get_output(i) for i in lhs_eval_ops] + self.rhs_parts = split_into_exact_sized_parts( + fn=fn, ssa_val=rhs, part_count=self.instance.rhs_part_count, + part_size=self.part_size, name="rhs") + self.rhs_inputs = [] # type: list[EvalOpGenIrInput] + for part, ssa_val in enumerate(self.rhs_parts): + self.rhs_inputs.append(EvalOpGenIrInput( + ssa_val=ssa_val, + is_signed=rhs_signed and part == len(self.rhs_parts) - 1)) + self.rhs_eval_state = EvalOpGenIrState(fn=fn, inputs=self.rhs_inputs) + rhs_eval_ops = self.instance.rhs_eval_ops + self.rhs_outputs = [ + self.rhs_eval_state.get_output(i) for i in rhs_eval_ops] + self.prod_inputs = [] # type: list[EvalOpGenIrInput] + for lhs_output, rhs_output in zip(self.lhs_outputs, self.rhs_outputs): + ssa_val = toom_cook_mul( + fn=fn, + lhs=lhs_output.output, lhs_signed=lhs_output.is_signed, + rhs=rhs_output.output, rhs_signed=rhs_output.is_signed, + instances=instances, + start_instance_index=start_instance_index + 1) + products = (lhs_output.min_value * rhs_output.min_value, + lhs_output.min_value * rhs_output.max_value, + lhs_output.max_value * rhs_output.min_value, + lhs_output.max_value * rhs_output.max_value) + self.prod_inputs.append(EvalOpGenIrInput( + ssa_val=ssa_val, + is_signed=None, + min_value=min(products), + max_value=max(products))) + self.prod_eval_state = EvalOpGenIrState(fn=fn, inputs=self.prod_inputs) + prod_eval_ops = self.instance.prod_eval_ops + self.prod_parts = [ + self.prod_eval_state.get_output(i) for i in prod_eval_ops] + + def partial_products(): + # type: () -> Iterable[PartialProduct] + for part, prod_part in enumerate(self.prod_parts): + part_maxvl = prod_part.output.ty.reg_len + part_setvl = fn.append_new_op( + OpKind.SetVLI, immediates=[part_maxvl], + name=f"prod_{part}_setvl", maxvl=part_maxvl) + spread_part = fn.append_new_op( + OpKind.Spread, + input_vals=[prod_part.output, part_setvl.outputs[0]], + name=f"prod_{part}_spread", maxvl=part_maxvl) + yield PartialProduct( + spread_part.outputs, shift_in_words=part * self.part_size, + is_signed=prod_part.is_signed, subtract=False) + self.partial_products = tuple(partial_products()) + self.retval = sum_partial_products( + fn=fn, partial_products=self.partial_products, + retval_size=retval_size, name="prod") def toom_cook_mul(fn, lhs, lhs_signed, rhs, rhs_signed, instances, retval_size=None, start_instance_index=0): - # type: (Fn, SSAVal, bool, SSAVal, bool, __TCIs, None | int, int) -> SSAVal - if retval_size is None: - retval_size = lhs.ty.reg_len + rhs.ty.reg_len - if start_instance_index < 0: - raise ValueError("start_instance_index must be non-negative") - instance = None - part_size = 0 - while start_instance_index < len(instances): - instance = instances[start_instance_index] - part_size = max(lhs.ty.reg_len // instance.lhs_part_count, - rhs.ty.reg_len // instance.rhs_part_count) - if part_size <= 0: - instance = None - start_instance_index += 1 - else: - break - if instance is None: - return simple_mul(fn=fn, - lhs=lhs, lhs_signed=lhs_signed, - rhs=rhs, rhs_signed=rhs_signed, - name="toom_cook_base_case") - lhs_parts = split_into_exact_sized_parts( - fn=fn, ssa_val=lhs, part_count=instance.lhs_part_count, - part_size=part_size, name="lhs") - lhs_inputs = [] # type: list[EvalOpGenIrInput] - for part, ssa_val in enumerate(lhs_parts): - lhs_inputs.append(EvalOpGenIrInput( - ssa_val=ssa_val, - is_signed=lhs_signed and part == len(lhs_parts) - 1)) - lhs_eval_state = EvalOpGenIrState(fn=fn, inputs=lhs_inputs) - lhs_outputs = [lhs_eval_state.get_output(i) for i in instance.lhs_eval_ops] - rhs_parts = split_into_exact_sized_parts( - fn=fn, ssa_val=rhs, part_count=instance.rhs_part_count, - part_size=part_size, name="rhs") - rhs_inputs = [] # type: list[EvalOpGenIrInput] - for part, ssa_val in enumerate(rhs_parts): - rhs_inputs.append(EvalOpGenIrInput( - ssa_val=ssa_val, - is_signed=rhs_signed and part == len(rhs_parts) - 1)) - rhs_eval_state = EvalOpGenIrState(fn=fn, inputs=rhs_inputs) - rhs_outputs = [rhs_eval_state.get_output(i) for i in instance.rhs_eval_ops] - prod_inputs = [] # type: list[EvalOpGenIrInput] - for lhs_output, rhs_output in zip(lhs_outputs, rhs_outputs): - ssa_val = toom_cook_mul( - fn=fn, - lhs=lhs_output.output, lhs_signed=lhs_output.is_signed, - rhs=rhs_output.output, rhs_signed=rhs_output.is_signed, - instances=instances, start_instance_index=start_instance_index + 1) - products = (lhs_output.min_value * rhs_output.min_value, - lhs_output.min_value * rhs_output.max_value, - lhs_output.max_value * rhs_output.min_value, - lhs_output.max_value * rhs_output.max_value) - prod_inputs.append(EvalOpGenIrInput( - ssa_val=ssa_val, - is_signed=None, - min_value=min(products), - max_value=max(products))) - prod_eval_state = EvalOpGenIrState(fn=fn, inputs=prod_inputs) - prod_parts = [ - prod_eval_state.get_output(i) for i in instance.prod_eval_ops] - - def partial_products(): - # type: () -> Iterable[PartialProduct] - for part, prod_part in enumerate(prod_parts): - part_maxvl = prod_part.output.ty.reg_len - part_setvl = fn.append_new_op( - OpKind.SetVLI, immediates=[part_maxvl], - name=f"prod_{part}_setvl", maxvl=part_maxvl) - spread_part = fn.append_new_op( - OpKind.Spread, - input_vals=[prod_part.output, part_setvl.outputs[0]], - name=f"prod_{part}_spread", maxvl=part_maxvl) - yield PartialProduct( - spread_part.outputs, shift_in_words=part * part_size, - is_signed=prod_part.is_signed, subtract=False) - return sum_partial_products(fn=fn, partial_products=partial_products(), - retval_size=retval_size, name="prod") + # type: (Fn, SSAVal, bool, SSAVal, bool, _TCIs, None | int, int) -> SSAVal + return ToomCookMul( + fn=fn, lhs=lhs, lhs_signed=lhs_signed, rhs=rhs, rhs_signed=rhs_signed, + instances=instances, retval_size=retval_size, + start_instance_index=start_instance_index).retval