From: Jacob Lifshay Date: Mon, 31 Oct 2022 09:43:07 +0000 (-0700) Subject: pre-ra simulation works with new ir X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=fd9cb7d7bd632f9a70968d920609b906d71c67e4;p=bigint-presentation-code.git pre-ra simulation works with new ir --- diff --git a/src/bigint_presentation_code/_tests/test_compiler_ir2.py b/src/bigint_presentation_code/_tests/test_compiler_ir2.py new file mode 100644 index 0000000..be7f69f --- /dev/null +++ b/src/bigint_presentation_code/_tests/test_compiler_ir2.py @@ -0,0 +1,229 @@ +import unittest +from bigint_presentation_code.compiler_ir import GPR_SIZE_IN_BYTES + +from bigint_presentation_code.compiler_ir2 import (Fn, OpKind, PreRASimState) + + +class TestCompilerIR(unittest.TestCase): + maxDiff = None + + def test_sim(self): + fn = Fn() + op0 = fn.append_new_op(OpKind.FuncArgR3, name="arg") + arg = op0.outputs[0] + MAXVL = 32 + op1 = fn.append_new_op(OpKind.SetVLI, immediates=[MAXVL], name="vl") + vl = op1.outputs[0] + op2 = fn.append_new_op( + OpKind.SvLd, inputs=[arg, vl], immediates=[0], maxvl=MAXVL, + name="ld") + a = op2.outputs[0] + op3 = fn.append_new_op( + OpKind.SvLI, inputs=[vl], immediates=[0], maxvl=MAXVL, name="li") + b = op3.outputs[0] + op4 = fn.append_new_op(OpKind.SetCA, name="ca") + ca = op4.outputs[0] + op5 = fn.append_new_op( + OpKind.SvAddE, inputs=[a, b, ca, vl], maxvl=MAXVL, name="add") + s = op5.outputs[0] + fn.append_new_op( + OpKind.SvStd, inputs=[s, arg, vl], immediates=[0], maxvl=MAXVL, + name="st") + + self.assertEqual([repr(i) for i in fn.ops], [ + "Op(fn=, properties=OpProperties(kind=OpKind.FuncArgR3, " + "inputs=(), outputs=(" + "OperandDesc(loc_set_before_spread=LocSet(starts=FMap({" + "LocKind.GPR: FBitSet([3])}), " + "ty=Ty(base_ty=BaseTy.I64, reg_len=1)), " + "tied_input_index=None, spread_index=None),), maxvl=1), " + "inputs=OpInputs([], op=...), " + "immediates=OpImmediates([], op=...), " + "outputs=(,), " + "name='arg')", + "Op(fn=, properties=OpProperties(kind=OpKind.SetVLI, " + "inputs=(), outputs=(" + "OperandDesc(loc_set_before_spread=LocSet(starts=FMap({" + "LocKind.VL_MAXVL: FBitSet([0])}), " + "ty=Ty(base_ty=BaseTy.VL_MAXVL, reg_len=1)), " + "tied_input_index=None, spread_index=None),), maxvl=1), " + "inputs=OpInputs([], op=...), " + "immediates=OpImmediates([32], op=...), " + "outputs=(,), " + "name='vl')", + "Op(fn=, properties=OpProperties(kind=OpKind.SvLd, " + "inputs=(" + "OperandDesc(loc_set_before_spread=LocSet(starts=FMap({" + "LocKind.GPR: FBitSet([*range(3, 13), *range(14, 128)])}), " + "ty=Ty(base_ty=BaseTy.I64, reg_len=1)), " + "tied_input_index=None, spread_index=None), " + "OperandDesc(loc_set_before_spread=LocSet(starts=FMap({" + "LocKind.VL_MAXVL: FBitSet([0])}), " + "ty=Ty(base_ty=BaseTy.VL_MAXVL, reg_len=1)), " + "tied_input_index=None, spread_index=None)), " + "outputs=(" + "OperandDesc(loc_set_before_spread=LocSet(starts=FMap({" + "LocKind.GPR: FBitSet(range(14, 97))}), " + "ty=Ty(base_ty=BaseTy.I64, reg_len=32)), " + "tied_input_index=None, spread_index=None),), maxvl=32), " + "inputs=OpInputs([, ], op=...), " + "immediates=OpImmediates([0], op=...), " + "outputs=(,), " + "name='ld')", + "Op(fn=, properties=OpProperties(kind=OpKind.SvLI, " + "inputs=(" + "OperandDesc(loc_set_before_spread=LocSet(starts=FMap({" + "LocKind.VL_MAXVL: FBitSet([0])}), " + "ty=Ty(base_ty=BaseTy.VL_MAXVL, reg_len=1)), " + "tied_input_index=None, spread_index=None),), outputs=(" + "OperandDesc(loc_set_before_spread=LocSet(starts=FMap({" + "LocKind.GPR: FBitSet(range(14, 97))}), " + "ty=Ty(base_ty=BaseTy.I64, reg_len=32)), " + "tied_input_index=None, spread_index=None),), maxvl=32), " + "inputs=OpInputs([], op=...), " + "immediates=OpImmediates([0], op=...), " + "outputs=(,), " + "name='li')", + "Op(fn=, properties=OpProperties(kind=OpKind.SetCA, " + "inputs=(), outputs=(" + "OperandDesc(loc_set_before_spread=LocSet(starts=FMap({" + "LocKind.CA: FBitSet([0])}), " + "ty=Ty(base_ty=BaseTy.CA, reg_len=1)), " + "tied_input_index=None, spread_index=None),), maxvl=1), " + "inputs=OpInputs([], op=...), " + "immediates=OpImmediates([], op=...), " + "outputs=(,), " + "name='ca')", + "Op(fn=, properties=OpProperties(kind=OpKind.SvAddE, " + "inputs=(" + "OperandDesc(loc_set_before_spread=LocSet(starts=FMap({" + "LocKind.GPR: FBitSet(range(14, 97))}), " + "ty=Ty(base_ty=BaseTy.I64, reg_len=32)), " + "tied_input_index=None, spread_index=None), " + "OperandDesc(loc_set_before_spread=LocSet(starts=FMap({" + "LocKind.GPR: FBitSet(range(14, 97))}), " + "ty=Ty(base_ty=BaseTy.I64, reg_len=32)), " + "tied_input_index=None, spread_index=None), " + "OperandDesc(loc_set_before_spread=LocSet(starts=FMap({" + "LocKind.CA: FBitSet([0])}), " + "ty=Ty(base_ty=BaseTy.CA, reg_len=1)), " + "tied_input_index=None, spread_index=None), " + "OperandDesc(loc_set_before_spread=LocSet(starts=FMap({" + "LocKind.VL_MAXVL: FBitSet([0])}), " + "ty=Ty(base_ty=BaseTy.VL_MAXVL, reg_len=1)), " + "tied_input_index=None, spread_index=None)), outputs=(" + "OperandDesc(loc_set_before_spread=LocSet(starts=FMap({" + "LocKind.GPR: FBitSet(range(14, 97))}), " + "ty=Ty(base_ty=BaseTy.I64, reg_len=32)), " + "tied_input_index=None, spread_index=None), " + "OperandDesc(loc_set_before_spread=LocSet(starts=FMap({" + "LocKind.CA: FBitSet([0])}), " + "ty=Ty(base_ty=BaseTy.CA, reg_len=1)), " + "tied_input_index=None, spread_index=None)), maxvl=32), " + "inputs=OpInputs([, , , ], op=...), " + "immediates=OpImmediates([], op=...), " + "outputs=(, ), " + "name='add')", + "Op(fn=, properties=OpProperties(kind=OpKind.SvStd, " + "inputs=(" + "OperandDesc(loc_set_before_spread=LocSet(starts=FMap({" + "LocKind.GPR: FBitSet(range(14, 97))}), " + "ty=Ty(base_ty=BaseTy.I64, reg_len=32)), " + "tied_input_index=None, spread_index=None), " + "OperandDesc(loc_set_before_spread=LocSet(starts=FMap({" + "LocKind.GPR: FBitSet([*range(3, 13), *range(14, 128)])}), " + "ty=Ty(base_ty=BaseTy.I64, reg_len=1)), " + "tied_input_index=None, spread_index=None), " + "OperandDesc(loc_set_before_spread=LocSet(starts=FMap({" + "LocKind.VL_MAXVL: FBitSet([0])}), " + "ty=Ty(base_ty=BaseTy.VL_MAXVL, reg_len=1)), " + "tied_input_index=None, spread_index=None)), " + "outputs=(), maxvl=32), " + "inputs=OpInputs([, , ], op=...), " + "immediates=OpImmediates([0], op=...), outputs=(), " + "name='st')", + ]) + + addr = 0x100 + state = PreRASimState(ssa_vals={arg: (addr,)}, memory={}) + state.store(addr=addr, value=0xffffffff_ffffffff, + size_in_bytes=GPR_SIZE_IN_BYTES) + state.store(addr=addr + GPR_SIZE_IN_BYTES, value=0xabcdef01_23456789, + size_in_bytes=GPR_SIZE_IN_BYTES) + self.assertEqual( + repr(state), + "PreRASimState(ssa_vals={: (0x100,)}, memory={\n" + "0x00100: <0xffffffffffffffff>,\n" + "0x00108: <0xabcdef0123456789>})") + fn.pre_ra_sim(state) + self.assertEqual( + repr(state), + "PreRASimState(ssa_vals={\n" + ": (0x100,),\n" + ": (0x20,),\n" + ": (\n" + " 0xffffffffffffffff, 0xabcdef0123456789, 0x0, 0x0,\n" + " 0x0, 0x0, 0x0, 0x0,\n" + " 0x0, 0x0, 0x0, 0x0,\n" + " 0x0, 0x0, 0x0, 0x0,\n" + " 0x0, 0x0, 0x0, 0x0,\n" + " 0x0, 0x0, 0x0, 0x0,\n" + " 0x0, 0x0, 0x0, 0x0,\n" + " 0x0, 0x0, 0x0, 0x0),\n" + ": (\n" + " 0x0, 0x0, 0x0, 0x0,\n" + " 0x0, 0x0, 0x0, 0x0,\n" + " 0x0, 0x0, 0x0, 0x0,\n" + " 0x0, 0x0, 0x0, 0x0,\n" + " 0x0, 0x0, 0x0, 0x0,\n" + " 0x0, 0x0, 0x0, 0x0,\n" + " 0x0, 0x0, 0x0, 0x0,\n" + " 0x0, 0x0, 0x0, 0x0),\n" + ": (0x1,),\n" + ": (\n" + " 0x0, 0xabcdef012345678a, 0x0, 0x0,\n" + " 0x0, 0x0, 0x0, 0x0,\n" + " 0x0, 0x0, 0x0, 0x0,\n" + " 0x0, 0x0, 0x0, 0x0,\n" + " 0x0, 0x0, 0x0, 0x0,\n" + " 0x0, 0x0, 0x0, 0x0,\n" + " 0x0, 0x0, 0x0, 0x0,\n" + " 0x0, 0x0, 0x0, 0x0),\n" + ": (0x0,),\n" + "}, memory={\n" + "0x00100: <0x0000000000000000>,\n" + "0x00108: <0xabcdef012345678a>,\n" + "0x00110: <0x0000000000000000>,\n" + "0x00118: <0x0000000000000000>,\n" + "0x00120: <0x0000000000000000>,\n" + "0x00128: <0x0000000000000000>,\n" + "0x00130: <0x0000000000000000>,\n" + "0x00138: <0x0000000000000000>,\n" + "0x00140: <0x0000000000000000>,\n" + "0x00148: <0x0000000000000000>,\n" + "0x00150: <0x0000000000000000>,\n" + "0x00158: <0x0000000000000000>,\n" + "0x00160: <0x0000000000000000>,\n" + "0x00168: <0x0000000000000000>,\n" + "0x00170: <0x0000000000000000>,\n" + "0x00178: <0x0000000000000000>,\n" + "0x00180: <0x0000000000000000>,\n" + "0x00188: <0x0000000000000000>,\n" + "0x00190: <0x0000000000000000>,\n" + "0x00198: <0x0000000000000000>,\n" + "0x001a0: <0x0000000000000000>,\n" + "0x001a8: <0x0000000000000000>,\n" + "0x001b0: <0x0000000000000000>,\n" + "0x001b8: <0x0000000000000000>,\n" + "0x001c0: <0x0000000000000000>,\n" + "0x001c8: <0x0000000000000000>,\n" + "0x001d0: <0x0000000000000000>,\n" + "0x001d8: <0x0000000000000000>,\n" + "0x001e0: <0x0000000000000000>,\n" + "0x001e8: <0x0000000000000000>,\n" + "0x001f0: <0x0000000000000000>,\n" + "0x001f8: <0x0000000000000000>})") + + +if __name__ == "__main__": + unittest.main() diff --git a/src/bigint_presentation_code/_tests/test_util.py b/src/bigint_presentation_code/_tests/test_util.py new file mode 100644 index 0000000..0bfe365 --- /dev/null +++ b/src/bigint_presentation_code/_tests/test_util.py @@ -0,0 +1,30 @@ +import unittest + +from bigint_presentation_code.util import BitSet + + +class TestBitSet(unittest.TestCase): + def test_bitset_repr(self): + self.assertEqual(repr(BitSet([*range(3, 20), 21, *range(23, 50)])), + "BitSet([*range(3, 20), 21, *range(23, 50)])") + self.assertEqual(repr(BitSet(range(3, 20))), "BitSet(range(3, 20))") + self.assertEqual(repr(BitSet([5, 10, 15, 20, 25, 30, 35])), + "BitSet(range(5, 40, 5))") + self.assertEqual(repr(BitSet([5, 10, 15])), "BitSet([5, 10, 15])") + self.assertEqual(repr(BitSet([5, 10, 15, 20])), + "BitSet(range(5, 25, 5))") + self.assertEqual(repr( + BitSet([*range(5, 8), *range(9, 12), 13, *range(14, 20, 2)])), + "BitSet([*range(5, 8), *range(9, 12), 13, *range(14, 20, 2)])") + self.assertEqual(repr(BitSet(bits=0xcccd)), "BitSet(bits=0xcccd)") + self.assertEqual(repr(BitSet(bits=0xcccc)), "BitSet(bits=0xcccc)") + self.assertEqual(repr(BitSet(bits=0x249249)), + "BitSet(range(0, 24, 3))") + self.assertEqual(repr(BitSet(bits=0xaaaaaa)), + "BitSet(range(1, 25, 2))") + self.assertEqual(repr(BitSet(bits=0x123456789abcdef)), + "BitSet(bits=0x123456789abcdef)") + + +if __name__ == "__main__": + unittest.main() diff --git a/src/bigint_presentation_code/compiler_ir2.py b/src/bigint_presentation_code/compiler_ir2.py index b04848b..00e6221 100644 --- a/src/bigint_presentation_code/compiler_ir2.py +++ b/src/bigint_presentation_code/compiler_ir2.py @@ -2,12 +2,12 @@ import enum from abc import abstractmethod from enum import Enum, unique from functools import lru_cache -from typing import (AbstractSet, Any, Generic, Iterable, Iterator, Sequence, - TypeVar, overload) +from typing import (AbstractSet, Any, Callable, Generic, Iterable, Iterator, + Sequence, TypeVar, overload) from weakref import WeakValueDictionary as _WeakVDict from cached_property import cached_property -from nmutil.plain_data import plain_data +from nmutil.plain_data import fields, plain_data from bigint_presentation_code.type_util import Self, assert_never, final from bigint_presentation_code.util import BitSet, FBitSet, FMap, OFSet @@ -28,7 +28,7 @@ class Fn: raise ValueError("Op already named") orig_name = name while True: - if name not in self.__op_names: + if name != "" and name not in self.__op_names: self.__op_names[name] = op return name name = orig_name + str(self.__next_name_suffix) @@ -38,6 +38,24 @@ class Fn: # type: () -> str return "" + def append_op(self, op): + # type: (Op) -> None + if op.fn is not self: + raise ValueError("can't add Op to wrong Fn") + self.ops.append(op) + + def append_new_op(self, kind, inputs=(), immediates=(), name="", maxvl=1): + # type: (OpKind, Iterable[SSAVal], Iterable[int], str, int) -> Op + retval = Op(fn=self, properties=kind.instantiate(maxvl=maxvl), + inputs=inputs, immediates=immediates, name=name) + self.append_op(retval) + return retval + + def pre_ra_sim(self, state): + # type: (PreRASimState) -> None + for op in self.ops: + op.pre_ra_sim(state) + @unique @final @@ -170,7 +188,37 @@ class LocSubKind(Enum): # type: (Ty) -> LocSet if ty.base_ty != self.base_ty: raise ValueError("type mismatch") - raise NotImplementedError # FIXME: finish + if self is LocSubKind.BASE_GPR: + starts = range(32) + elif self is LocSubKind.SV_EXTRA2_VGPR: + starts = range(0, 128, 2) + elif self is LocSubKind.SV_EXTRA2_SGPR: + starts = range(64) + elif self is LocSubKind.SV_EXTRA3_VGPR \ + or self is LocSubKind.SV_EXTRA3_SGPR: + starts = range(128) + elif self is LocSubKind.StackI64: + starts = range(LocKind.StackI64.loc_count) + elif self is LocSubKind.CA or self is LocSubKind.VL_MAXVL: + return LocSet([Loc(kind=self.kind, start=0, reg_len=1)]) + else: + assert_never(self) + retval = [] # type: list[Loc] + for start in starts: + loc = Loc.try_make(kind=self.kind, start=start, reg_len=ty.reg_len) + if loc is None: + continue + conflicts = False + for special_loc in SPECIAL_GPRS: + if loc.conflicts(special_loc): + conflicts = True + break + if not conflicts: + retval.append(loc) + return LocSet(retval) + + def __repr__(self): + return "LocSubKind." + self._name_ @plain_data(frozen=True, unsafe_hash=True) @@ -222,7 +270,7 @@ class Loc: def try_make(kind, start, reg_len): # type: (LocKind, int, int) -> Loc | None msg = Loc.validate(kind=kind, start=start, reg_len=reg_len) - if msg is None: + if msg is not None: return None return Loc(kind=kind, start=start, reg_len=reg_len) @@ -237,7 +285,7 @@ class Loc: def conflicts(self, other): # type: (Loc) -> bool - return (self.kind != other.kind + return (self.kind == other.kind and self.start < other.stop and other.start < self.stop) @staticmethod @@ -269,7 +317,15 @@ class Loc: return Loc(kind=self.kind, start=self.start, reg_len=reg_len) -@plain_data(frozen=True, eq=False, repr=False) +SPECIAL_GPRS = ( + Loc(kind=LocKind.GPR, start=0, reg_len=1), + Loc(kind=LocKind.GPR, start=1, reg_len=1), + Loc(kind=LocKind.GPR, start=2, reg_len=1), + Loc(kind=LocKind.GPR, start=13, reg_len=1), +) + + +@plain_data(frozen=True, eq=False) @final class LocSet(AbstractSet[Loc]): __slots__ = "starts", "ty" @@ -353,7 +409,7 @@ class LocSet(AbstractSet[Loc]): def __contains__(self, loc): # type: (Loc | Any) -> bool - if not isinstance(loc, Loc) or loc.ty == self.ty: + if not isinstance(loc, Loc) or loc.ty != self.ty: return False if loc.kind not in self.starts: return False @@ -532,14 +588,15 @@ class GenericOpProperties: __slots__ = ("demo_asm", "inputs", "outputs", "immediates", "is_copy", "is_load_immediate", "has_side_effects") - def __init__(self, demo_asm, # type: str - inputs, # type: Iterable[GenericOperandDesc] - outputs, # type: Iterable[GenericOperandDesc] - immediates=(), # type: Iterable[range] - is_copy=False, # type: bool - is_load_immediate=False, # type: bool - has_side_effects=False, # type: bool - ): + def __init__( + self, demo_asm, # type: str + inputs, # type: Iterable[GenericOperandDesc] + outputs, # type: Iterable[GenericOperandDesc] + immediates=(), # type: Iterable[range] + is_copy=False, # type: bool + is_load_immediate=False, # type: bool + has_side_effects=False, # type: bool + ): # type: (...) -> None self.demo_asm = demo_asm self.inputs = tuple(inputs) @@ -617,6 +674,13 @@ class OpProperties: return self.generic.has_side_effects +IMM_S16 = range(-1 << 15, 1 << 15) + +_PRE_RA_SIM_FN = Callable[["Op", "PreRASimState"], None] +_PRE_RA_SIM_FN2 = Callable[[], _PRE_RA_SIM_FN] +_PRE_RA_SIMS = {} # type: dict[GenericOpProperties | Any, _PRE_RA_SIM_FN2] + + @unique @final class OpKind(Enum): @@ -630,107 +694,326 @@ class OpKind(Enum): # type: () -> GenericOpProperties return self.__properties + def instantiate(self, maxvl): + # type: (int) -> OpProperties + return OpProperties(self, maxvl=maxvl) + + def __repr__(self): + return "OpKind." + self._name_ + + @cached_property + def pre_ra_sim(self): + # type: () -> _PRE_RA_SIM_FN + return _PRE_RA_SIMS[self.properties]() + + @staticmethod + def __clearca_pre_ra_sim(op, state): + # type: (Op, PreRASimState) -> None + state.ssa_vals[op.outputs[0]] = False, + ClearCA = GenericOpProperties( + demo_asm="addic 0, 0, 0", + inputs=[], + outputs=[OD_CA], + ) + _PRE_RA_SIMS[ClearCA] = lambda: OpKind.__clearca_pre_ra_sim + + @staticmethod + def __setca_pre_ra_sim(op, state): + # type: (Op, PreRASimState) -> None + state.ssa_vals[op.outputs[0]] = True, + SetCA = GenericOpProperties( + demo_asm="subfc 0, 0, 0", + inputs=[], + outputs=[OD_CA], + ) + _PRE_RA_SIMS[SetCA] = lambda: OpKind.__setca_pre_ra_sim + + @staticmethod + def __svadde_pre_ra_sim(op, state): + # type: (Op, PreRASimState) -> None + RA = state.ssa_vals[op.inputs[0]] + RB = state.ssa_vals[op.inputs[1]] + carry, = state.ssa_vals[op.inputs[2]] + VL, = state.ssa_vals[op.inputs[3]] + RT = [] # type: list[int] + for i in range(VL): + v = RA[i] + RB[i] + carry + RT.append(v & GPR_VALUE_MASK) + carry = (v >> GPR_SIZE_IN_BITS) != 0 + state.ssa_vals[op.outputs[0]] = tuple(RT) + state.ssa_vals[op.outputs[1]] = carry, SvAddE = GenericOpProperties( demo_asm="sv.adde *RT, *RA, *RB", - inputs=(OD_EXTRA3_VGPR, OD_EXTRA3_VGPR, OD_CA, OD_VL), - outputs=(OD_EXTRA3_VGPR, OD_CA), + inputs=[OD_EXTRA3_VGPR, OD_EXTRA3_VGPR, OD_CA, OD_VL], + outputs=[OD_EXTRA3_VGPR, OD_CA], ) + _PRE_RA_SIMS[SvAddE] = lambda: OpKind.__svadde_pre_ra_sim + + @staticmethod + def __svsubfe_pre_ra_sim(op, state): + # type: (Op, PreRASimState) -> None + RA = state.ssa_vals[op.inputs[0]] + RB = state.ssa_vals[op.inputs[1]] + carry, = state.ssa_vals[op.inputs[2]] + VL, = state.ssa_vals[op.inputs[3]] + RT = [] # type: list[int] + for i in range(VL): + v = (~RA[i] & GPR_VALUE_MASK) + RB[i] + carry + RT.append(v & GPR_VALUE_MASK) + carry = (v >> GPR_SIZE_IN_BITS) != 0 + state.ssa_vals[op.outputs[0]] = tuple(RT) + state.ssa_vals[op.outputs[1]] = carry, SvSubFE = GenericOpProperties( demo_asm="sv.subfe *RT, *RA, *RB", - inputs=(OD_EXTRA3_VGPR, OD_EXTRA3_VGPR, OD_CA, OD_VL), - outputs=(OD_EXTRA3_VGPR, OD_CA), + inputs=[OD_EXTRA3_VGPR, OD_EXTRA3_VGPR, OD_CA, OD_VL], + outputs=[OD_EXTRA3_VGPR, OD_CA], ) + _PRE_RA_SIMS[SvSubFE] = lambda: OpKind.__svsubfe_pre_ra_sim + + @staticmethod + def __svmaddedu_pre_ra_sim(op, state): + # type: (Op, PreRASimState) -> None + RA = state.ssa_vals[op.inputs[0]] + RB, = state.ssa_vals[op.inputs[1]] + carry, = state.ssa_vals[op.inputs[2]] + VL, = state.ssa_vals[op.inputs[3]] + RT = [] # type: list[int] + for i in range(VL): + v = RA[i] * RB + carry + RT.append(v & GPR_VALUE_MASK) + carry = v >> GPR_SIZE_IN_BITS + state.ssa_vals[op.outputs[0]] = tuple(RT) + state.ssa_vals[op.outputs[1]] = carry, SvMAddEDU = GenericOpProperties( demo_asm="sv.maddedu *RT, *RA, RB, RC", - inputs=(OD_EXTRA2_VGPR, OD_EXTRA2_VGPR, OD_EXTRA2_SGPR, - OD_EXTRA2_SGPR, OD_VL), - outputs=(OD_EXTRA3_VGPR, OD_EXTRA2_SGPR.tied_to_input(3)), + inputs=[OD_EXTRA2_VGPR, OD_EXTRA2_SGPR, OD_EXTRA2_SGPR, OD_VL], + outputs=[OD_EXTRA3_VGPR, OD_EXTRA2_SGPR.tied_to_input(2)], ) + _PRE_RA_SIMS[SvMAddEDU] = lambda: OpKind.__svmaddedu_pre_ra_sim + + @staticmethod + def __setvli_pre_ra_sim(op, state): + # type: (Op, PreRASimState) -> None + state.ssa_vals[op.outputs[0]] = op.immediates[0], SetVLI = GenericOpProperties( demo_asm="setvl 0, 0, imm, 0, 1, 1", inputs=(), - outputs=(OD_VL,), - immediates=(range(1, 65),), + outputs=[OD_VL], + immediates=[range(1, 65)], is_load_immediate=True, ) + _PRE_RA_SIMS[SetVLI] = lambda: OpKind.__setvli_pre_ra_sim + + @staticmethod + def __svli_pre_ra_sim(op, state): + # type: (Op, PreRASimState) -> None + VL, = state.ssa_vals[op.inputs[0]] + imm = op.immediates[0] & GPR_VALUE_MASK + state.ssa_vals[op.outputs[0]] = (imm,) * VL SvLI = GenericOpProperties( demo_asm="sv.addi *RT, 0, imm", - inputs=(OD_VL,), - outputs=(OD_EXTRA3_VGPR,), - immediates=(range(-2 ** 15, 2 ** 15),), + inputs=[OD_VL], + outputs=[OD_EXTRA3_VGPR], + immediates=[IMM_S16], is_load_immediate=True, ) + _PRE_RA_SIMS[SvLI] = lambda: OpKind.__svli_pre_ra_sim + + @staticmethod + def __li_pre_ra_sim(op, state): + # type: (Op, PreRASimState) -> None + imm = op.immediates[0] & GPR_VALUE_MASK + state.ssa_vals[op.outputs[0]] = imm, LI = GenericOpProperties( demo_asm="addi RT, 0, imm", inputs=(), - outputs=(OD_BASE_SGPR,), - immediates=(range(-2 ** 15, 2 ** 15),), + outputs=[OD_BASE_SGPR], + immediates=[IMM_S16], is_load_immediate=True, ) + _PRE_RA_SIMS[LI] = lambda: OpKind.__li_pre_ra_sim + + @staticmethod + def __veccopytoreg_pre_ra_sim(op, state): + # type: (Op, PreRASimState) -> None + state.ssa_vals[op.outputs[0]] = state.ssa_vals[op.inputs[0]] VecCopyToReg = GenericOpProperties( demo_asm="sv.mv dest, src", - inputs=(GenericOperandDesc( + inputs=[GenericOperandDesc( ty=GenericTy(BaseTy.I64, is_vec=True), - sub_kinds=(LocSubKind.SV_EXTRA3_VGPR, LocSubKind.StackI64), - ), OD_VL), - outputs=(OD_EXTRA3_VGPR,), + sub_kinds=[LocSubKind.SV_EXTRA3_VGPR, LocSubKind.StackI64], + ), OD_VL], + outputs=[OD_EXTRA3_VGPR], is_copy=True, ) + _PRE_RA_SIMS[VecCopyToReg] = lambda: OpKind.__veccopytoreg_pre_ra_sim + + @staticmethod + def __veccopyfromreg_pre_ra_sim(op, state): + # type: (Op, PreRASimState) -> None + state.ssa_vals[op.outputs[0]] = state.ssa_vals[op.inputs[0]] VecCopyFromReg = GenericOpProperties( demo_asm="sv.mv dest, src", - inputs=(OD_EXTRA3_VGPR, OD_VL), - outputs=(GenericOperandDesc( + inputs=[OD_EXTRA3_VGPR, OD_VL], + outputs=[GenericOperandDesc( ty=GenericTy(BaseTy.I64, is_vec=True), - sub_kinds=(LocSubKind.SV_EXTRA3_VGPR, LocSubKind.StackI64), - ),), + sub_kinds=[LocSubKind.SV_EXTRA3_VGPR, LocSubKind.StackI64], + )], is_copy=True, ) + _PRE_RA_SIMS[VecCopyFromReg] = lambda: OpKind.__veccopyfromreg_pre_ra_sim + + @staticmethod + def __copytoreg_pre_ra_sim(op, state): + # type: (Op, PreRASimState) -> None + state.ssa_vals[op.outputs[0]] = state.ssa_vals[op.inputs[0]] CopyToReg = GenericOpProperties( demo_asm="mv dest, src", - inputs=(GenericOperandDesc( + inputs=[GenericOperandDesc( ty=GenericTy(BaseTy.I64, is_vec=False), - sub_kinds=(LocSubKind.SV_EXTRA3_SGPR, LocSubKind.BASE_GPR, - LocSubKind.StackI64), - ),), - outputs=(GenericOperandDesc( + sub_kinds=[LocSubKind.SV_EXTRA3_SGPR, LocSubKind.BASE_GPR, + LocSubKind.StackI64], + )], + outputs=[GenericOperandDesc( ty=GenericTy(BaseTy.I64, is_vec=False), - sub_kinds=(LocSubKind.SV_EXTRA3_SGPR, LocSubKind.BASE_GPR), - ),), + sub_kinds=[LocSubKind.SV_EXTRA3_SGPR, LocSubKind.BASE_GPR], + )], is_copy=True, ) + _PRE_RA_SIMS[CopyToReg] = lambda: OpKind.__copytoreg_pre_ra_sim + + @staticmethod + def __copyfromreg_pre_ra_sim(op, state): + # type: (Op, PreRASimState) -> None + state.ssa_vals[op.outputs[0]] = state.ssa_vals[op.inputs[0]] CopyFromReg = GenericOpProperties( demo_asm="mv dest, src", - inputs=(GenericOperandDesc( + inputs=[GenericOperandDesc( ty=GenericTy(BaseTy.I64, is_vec=False), - sub_kinds=(LocSubKind.SV_EXTRA3_SGPR, LocSubKind.BASE_GPR), - ),), - outputs=(GenericOperandDesc( + sub_kinds=[LocSubKind.SV_EXTRA3_SGPR, LocSubKind.BASE_GPR], + )], + outputs=[GenericOperandDesc( ty=GenericTy(BaseTy.I64, is_vec=False), - sub_kinds=(LocSubKind.SV_EXTRA3_SGPR, LocSubKind.BASE_GPR, - LocSubKind.StackI64), - ),), + sub_kinds=[LocSubKind.SV_EXTRA3_SGPR, LocSubKind.BASE_GPR, + LocSubKind.StackI64], + )], is_copy=True, ) + _PRE_RA_SIMS[CopyFromReg] = lambda: OpKind.__copyfromreg_pre_ra_sim + + @staticmethod + def __concat_pre_ra_sim(op, state): + # type: (Op, PreRASimState) -> None + state.ssa_vals[op.outputs[0]] = tuple( + state.ssa_vals[i][0] for i in op.inputs[:-1]) Concat = GenericOpProperties( demo_asm="sv.mv dest, src", - inputs=(GenericOperandDesc( + inputs=[GenericOperandDesc( ty=GenericTy(BaseTy.I64, is_vec=False), - sub_kinds=(LocSubKind.SV_EXTRA3_VGPR,), + sub_kinds=[LocSubKind.SV_EXTRA3_VGPR], spread=True, - ), OD_VL), - outputs=(OD_EXTRA3_VGPR,), + ), OD_VL], + outputs=[OD_EXTRA3_VGPR], is_copy=True, ) + _PRE_RA_SIMS[Concat] = lambda: OpKind.__concat_pre_ra_sim + + @staticmethod + def __spread_pre_ra_sim(op, state): + # type: (Op, PreRASimState) -> None + for idx, inp in enumerate(state.ssa_vals[op.inputs[0]]): + state.ssa_vals[op.outputs[idx]] = inp, Spread = GenericOpProperties( demo_asm="sv.mv dest, src", - inputs=(OD_EXTRA3_VGPR, OD_VL), - outputs=(GenericOperandDesc( + inputs=[OD_EXTRA3_VGPR, OD_VL], + outputs=[GenericOperandDesc( ty=GenericTy(BaseTy.I64, is_vec=False), - sub_kinds=(LocSubKind.SV_EXTRA3_VGPR,), + sub_kinds=[LocSubKind.SV_EXTRA3_VGPR], spread=True, - ),), + )], is_copy=True, ) + _PRE_RA_SIMS[Spread] = lambda: OpKind.__spread_pre_ra_sim + + @staticmethod + def __svld_pre_ra_sim(op, state): + # type: (Op, PreRASimState) -> None + RA, = state.ssa_vals[op.inputs[0]] + VL, = state.ssa_vals[op.inputs[1]] + addr = RA + op.immediates[0] + RT = [] # type: list[int] + for i in range(VL): + v = state.load(addr + GPR_SIZE_IN_BYTES * i) + RT.append(v & GPR_VALUE_MASK) + state.ssa_vals[op.outputs[0]] = tuple(RT) + SvLd = GenericOpProperties( + demo_asm="sv.ld *RT, imm(RA)", + inputs=[OD_EXTRA3_SGPR, OD_VL], + outputs=[OD_EXTRA3_VGPR], + immediates=[IMM_S16], + ) + _PRE_RA_SIMS[SvLd] = lambda: OpKind.__svld_pre_ra_sim + + @staticmethod + def __ld_pre_ra_sim(op, state): + # type: (Op, PreRASimState) -> None + RA, = state.ssa_vals[op.inputs[0]] + addr = RA + op.immediates[0] + v = state.load(addr) + state.ssa_vals[op.outputs[0]] = v & GPR_VALUE_MASK, + Ld = GenericOpProperties( + demo_asm="ld RT, imm(RA)", + inputs=[OD_BASE_SGPR], + outputs=[OD_BASE_SGPR], + immediates=[IMM_S16], + ) + _PRE_RA_SIMS[Ld] = lambda: OpKind.__ld_pre_ra_sim + + @staticmethod + def __svstd_pre_ra_sim(op, state): + # type: (Op, PreRASimState) -> None + RS = state.ssa_vals[op.inputs[0]] + RA, = state.ssa_vals[op.inputs[1]] + VL, = state.ssa_vals[op.inputs[2]] + addr = RA + op.immediates[0] + for i in range(VL): + state.store(addr + GPR_SIZE_IN_BYTES * i, value=RS[i]) + SvStd = GenericOpProperties( + demo_asm="sv.std *RS, imm(RA)", + inputs=[OD_EXTRA3_VGPR, OD_EXTRA3_SGPR, OD_VL], + outputs=[], + immediates=[IMM_S16], + has_side_effects=True, + ) + _PRE_RA_SIMS[SvStd] = lambda: OpKind.__svstd_pre_ra_sim + + @staticmethod + def __std_pre_ra_sim(op, state): + # type: (Op, PreRASimState) -> None + RS, = state.ssa_vals[op.inputs[0]] + RA, = state.ssa_vals[op.inputs[1]] + addr = RA + op.immediates[0] + state.store(addr, value=RS) + Std = GenericOpProperties( + demo_asm="std RT, imm(RA)", + inputs=[OD_BASE_SGPR, OD_BASE_SGPR], + outputs=[], + immediates=[IMM_S16], + has_side_effects=True, + ) + _PRE_RA_SIMS[Std] = lambda: OpKind.__std_pre_ra_sim + + @staticmethod + def __funcargr3_pre_ra_sim(op, state): + # type: (Op, PreRASimState) -> None + pass # return value set before simulation + FuncArgR3 = GenericOpProperties( + demo_asm="", + inputs=[], + outputs=[OD_BASE_SGPR.with_fixed_loc( + Loc(kind=LocKind.GPR, start=3, reg_len=1))], + ) + _PRE_RA_SIMS[FuncArgR3] = lambda: OpKind.__funcargr3_pre_ra_sim @plain_data(frozen=True, unsafe_hash=True, repr=False) @@ -853,6 +1136,9 @@ class OpInputSeq(Sequence[_T], Generic[_T, _Desc]): # type: () -> int return len(self.__items) + def __repr__(self): + return f"{self.__class__.__name__}({self.__items}, op=...)" + @final class OpInputs(OpInputSeq[SSAVal, OperandDesc]): @@ -922,3 +1208,142 @@ class Op: def __hash__(self): return object.__hash__(self) + + def pre_ra_sim(self, state): + # type: (PreRASimState) -> None + for inp in self.inputs: + if inp not in state.ssa_vals: + raise ValueError(f"SSAVal {inp} not yet assigned when " + f"running {self}") + if len(state.ssa_vals[inp]) != inp.ty.reg_len: + raise ValueError( + f"value of SSAVal {inp} has wrong number of elements: " + f"expected {inp.ty.reg_len} found " + f"{len(state.ssa_vals[inp])}: {state.ssa_vals[inp]!r}") + for out in self.outputs: + if out in state.ssa_vals: + if self.kind is OpKind.FuncArgR3: + continue + raise ValueError(f"SSAVal {out} already assigned before " + f"running {self}") + self.kind.pre_ra_sim(self, state) + for out in self.outputs: + if out not in state.ssa_vals: + raise ValueError(f"running {self} failed to assign to {out}") + if len(state.ssa_vals[out]) != out.ty.reg_len: + raise ValueError( + f"value of SSAVal {out} has wrong number of elements: " + f"expected {out.ty.reg_len} found " + f"{len(state.ssa_vals[out])}: {state.ssa_vals[out]!r}") + + +GPR_SIZE_IN_BYTES = 8 +BITS_IN_BYTE = 8 +GPR_SIZE_IN_BITS = GPR_SIZE_IN_BYTES * BITS_IN_BYTE +GPR_VALUE_MASK = (1 << GPR_SIZE_IN_BITS) - 1 + + +@plain_data(frozen=True, repr=False) +@final +class PreRASimState: + __slots__ = "ssa_vals", "memory" + + def __init__(self, ssa_vals, memory): + # type: (dict[SSAVal, tuple[int, ...]], dict[int, int]) -> None + self.ssa_vals = ssa_vals + self.memory = memory + + def load_byte(self, addr): + # type: (int) -> int + addr &= GPR_VALUE_MASK + return self.memory.get(addr, 0) & 0xFF + + def store_byte(self, addr, value): + # type: (int, int) -> None + addr &= GPR_VALUE_MASK + value &= 0xFF + self.memory[addr] = value + + def load(self, addr, size_in_bytes=GPR_SIZE_IN_BYTES, signed=False): + # type: (int, int, bool) -> int + if addr % size_in_bytes != 0: + raise ValueError(f"address not aligned: {hex(addr)} " + f"required alignment: {size_in_bytes}") + retval = 0 + for i in range(size_in_bytes): + retval |= self.load_byte(addr + i) << i * BITS_IN_BYTE + if signed and retval >> (size_in_bytes * BITS_IN_BYTE - 1) != 0: + retval -= 1 << size_in_bytes * BITS_IN_BYTE + return retval + + def store(self, addr, value, size_in_bytes=GPR_SIZE_IN_BYTES): + # type: (int, int, int) -> None + if addr % size_in_bytes != 0: + raise ValueError(f"address not aligned: {hex(addr)} " + f"required alignment: {size_in_bytes}") + for i in range(size_in_bytes): + self.store_byte(addr + i, (value >> i * BITS_IN_BYTE) & 0xFF) + + def _memory__repr(self): + # type: () -> str + if len(self.memory) == 0: + return "{}" + keys = sorted(self.memory.keys(), reverse=True) + CHUNK_SIZE = GPR_SIZE_IN_BYTES + items = [] # type: list[str] + while len(keys) != 0: + addr = keys[-1] + if (len(keys) >= CHUNK_SIZE + and addr % CHUNK_SIZE == 0 + and keys[-CHUNK_SIZE:] + == list(reversed(range(addr, addr + CHUNK_SIZE)))): + value = self.load(addr, size_in_bytes=CHUNK_SIZE) + items.append(f"0x{addr:05x}: <0x{value:0{CHUNK_SIZE * 2}x}>") + keys[-CHUNK_SIZE:] = () + else: + items.append(f"0x{addr:05x}: 0x{self.memory[keys.pop()]:02x}") + if len(items) == 1: + return f"{{{items[0]}}}" + items_str = ",\n".join(items) + return f"{{\n{items_str}}}" + + def _ssa_vals__repr(self): + # type: () -> str + if len(self.ssa_vals) == 0: + return "{}" + items = [] # type: list[str] + CHUNK_SIZE = 4 + for k, v in self.ssa_vals.items(): + element_strs = [] # type: list[str] + for i, el in enumerate(v): + if i % CHUNK_SIZE != 0: + element_strs.append(" " + hex(el)) + else: + element_strs.append("\n " + hex(el)) + if len(element_strs) <= CHUNK_SIZE: + element_strs[0] = element_strs[0].lstrip() + if len(element_strs) == 1: + element_strs.append("") + v_str = ",".join(element_strs) + items.append(f"{k!r}: ({v_str})") + if len(items) == 1 and "\n" not in items[0]: + return f"{{{items[0]}}}" + items_str = ",\n".join(items) + return f"{{\n{items_str},\n}}" + + def __repr__(self): + # type: () -> str + field_vals = [] # type: list[str] + for name in fields(self): + try: + value = getattr(self, name) + except AttributeError: + field_vals.append(f"{name}=") + continue + repr_fn = getattr(self, f"_{name}__repr", None) + if callable(repr_fn): + field_vals.append(f"{name}={repr_fn()}") + else: + field_vals.append(f"{name}={value!r}") + field_vals_str = ", ".join(field_vals) + return f"PreRASimState({field_vals_str})" diff --git a/src/bigint_presentation_code/util.py b/src/bigint_presentation_code/util.py index 4b39787..b85b3ac 100644 --- a/src/bigint_presentation_code/util.py +++ b/src/bigint_presentation_code/util.py @@ -234,7 +234,44 @@ class BaseBitSet(AbstractSet[int]): # type: () -> str if self.bits == 0: return f"{self.__class__.__name__}()" - if self.bits > 0xFFFFFFFF and len(self) < 10: + len_self = len(self) + if len_self <= 3: + v = list(self) + return f"{self.__class__.__name__}({v})" + ranges = [] # type: list[range] + MAX_RANGES = 5 + for i in self: + if len(ranges) != 0 and ranges[-1].stop == i: + ranges[-1] = range( + ranges[-1].start, i + ranges[-1].step, ranges[-1].step) + elif len(ranges) != 0 and len(ranges[-1]) == 1: + start = ranges[-1][0] + step = i - start + stop = i + step + ranges[-1] = range(start, stop, step) + elif len(ranges) != 0 and len(ranges[-1]) == 2: + single = ranges[-1][0] + start = ranges[-1][1] + ranges[-1] = range(single, single + 1) + step = i - start + stop = i + step + ranges.append(range(start, stop, step)) + else: + ranges.append(range(i, i + 1)) + if len(ranges) > MAX_RANGES: + break + if len(ranges) == 1: + return f"{self.__class__.__name__}({ranges[0]})" + if len(ranges) <= MAX_RANGES: + range_strs = [] # type: list[str] + for r in ranges: + if len(r) == 1: + range_strs.append(str(r[0])) + else: + range_strs.append(f"*{r}") + ranges_str = ", ".join(range_strs) + return f"{self.__class__.__name__}([{ranges_str}])" + if self.bits > 0xFFFFFFFF and len_self < 10: v = list(self) return f"{self.__class__.__name__}({v})" return f"{self.__class__.__name__}(bits={hex(self.bits)})"