From: Jacob Lifshay Date: Wed, 14 Jun 2023 05:56:10 +0000 (-0700) Subject: deepcopy is really slow and unnecessary here X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=c23202498ae30addf04ab4c1e0d7262cc825cd45;hp=0401e7ed1c2a94af91fcf8e6eabe3a1e0024b996;p=openpower-isa.git deepcopy is really slow and unnecessary here --- diff --git a/src/openpower/decoder/isa/caller.py b/src/openpower/decoder/isa/caller.py index 56560433..b5688c92 100644 --- a/src/openpower/decoder/isa/caller.py +++ b/src/openpower/decoder/isa/caller.py @@ -1679,6 +1679,14 @@ class ISACaller(ISACallerHelper, ISAFPHelpers, StepLoop): pc, insn = self.get_next_insn() yield from self.setup_next_insn(pc, insn) + # cache since it's really slow to construct + __PREFIX_CACHE = SVP64Instruction.Prefix(SelectableInt(value=0, bits=32)) + + def __decode_prefix(self, opcode): + pfx = self.__PREFIX_CACHE + pfx.storage.eq(opcode) + return pfx + def setup_next_insn(self, pc, ins): """set up next instruction """ @@ -1698,7 +1706,7 @@ class ISACaller(ISACallerHelper, ISAFPHelpers, StepLoop): yield Settle() opcode = yield self.dec2.dec.opcode_in opcode = SelectableInt(value=opcode, bits=32) - pfx = SVP64Instruction.Prefix(opcode) + pfx = self.__decode_prefix(opcode) log("prefix test: opcode:", pfx.PO, bin(pfx.PO), pfx.id) self.is_svp64_mode = bool((pfx.PO == 0b000001) and (pfx.id == 0b11)) self.pc.update_nia(self.is_svp64_mode) diff --git a/src/openpower/decoder/power_decoder.py b/src/openpower/decoder/power_decoder.py index 9389dd3a..81fc1db0 100644 --- a/src/openpower/decoder/power_decoder.py +++ b/src/openpower/decoder/power_decoder.py @@ -496,7 +496,8 @@ class PowerDecoder(Elaboratable): #print("submodules", self.pname, submodules) - gc.collect() + # GC collection is really slow and shouldn't be needed + # gc.collect() return self.actually_does_something def handle_subdecoders(self, switch_case, submodules, d): diff --git a/src/openpower/decoder/power_enums.py b/src/openpower/decoder/power_enums.py index 6e32fb99..fb90b135 100644 --- a/src/openpower/decoder/power_enums.py +++ b/src/openpower/decoder/power_enums.py @@ -40,6 +40,12 @@ def find_wiki_file(name): def get_csv(name): + retval = _get_csv(name) + return [i.copy() for i in retval] + + +@functools.lru_cache() +def _get_csv(name): """gets a not-entirely-csv-file-formatted database, which allows comments """ file_path = find_wiki_file(name) diff --git a/src/openpower/fpscr.py b/src/openpower/fpscr.py index d4e8a1d7..edfcfaca 100644 --- a/src/openpower/fpscr.py +++ b/src/openpower/fpscr.py @@ -136,9 +136,7 @@ class FPSCRState(SelectableInt): offs = 0 # set up sub-fields from Record layout self.fsi = {} - l = deepcopy(FPSCRRecord.layout) - l.reverse() - for field, width in l: + for field, width in reversed(FPSCRRecord.layout): if field == "FPRF": v = FPSCR_FPRF(self, tuple(range(47, 52))) end = 52 diff --git a/src/openpower/test/fmv_fcvt/fmv_fcvt.py b/src/openpower/test/fmv_fcvt/fmv_fcvt.py index 64cbc111..578536ec 100644 --- a/src/openpower/test/fmv_fcvt/fmv_fcvt.py +++ b/src/openpower/test/fmv_fcvt/fmv_fcvt.py @@ -450,6 +450,23 @@ class FMvFCvtCases(TestAccumulatorBase): self.toint(-(2**64), 0, signed=False, _32bit=False) self.toint(-fp_bits_add(2**64, 1), signed=False, _32bit=False) + @staticmethod + @functools.lru_cache(maxsize=None) + def _fcvtfg_fpscr(RN, set_XX, FR, FPRF, fpscr_unmodified): + """ cached FPSCR computation for fcvtfg_one since that part is slow """ + initial_fpscr = FPSCRState() + initial_fpscr.RN = RN + fpscr = FPSCRState(initial_fpscr) + if set_XX: + fpscr.XX = 1 + fpscr.FX = 1 + fpscr.FI = 1 + fpscr.FR = FR + fpscr.FPRF = FPRF + if fpscr_unmodified: + fpscr = FPSCRState(initial_fpscr) + return initial_fpscr, fpscr + def fcvtfg_one(self, inp, bfp32, IT, Rc, RN): inp %= 2 ** 64 inp_width = 64 if IT & 0b10 else 32 @@ -505,24 +522,24 @@ class FMvFCvtCases(TestAccumulatorBase): else: expected_fp = next_fp expected_bits = bitcast_fp_to_int(expected_fp, bfp32=False) - initial_fpscr = FPSCRState() - initial_fpscr.RN = RN - fpscr = FPSCRState(initial_fpscr) + set_XX = FR = False if expected_fp != inp_value: - fpscr.XX = 1 - fpscr.FX = 1 - fpscr.FI = 1 - fpscr.FR = abs(expected_fp) > abs(inp_value) + set_XX = True + FR = abs(expected_fp) > abs(inp_value) if expected_fp < 0: - fpscr.FPRF = "- Normal Number" + FPRF = "- Normal Number" elif expected_fp > 0: - fpscr.FPRF = "+ Normal Number" + FPRF = "+ Normal Number" else: # integer conversion never gives -0.0 - fpscr.FPRF = "+ Zero" - if inp_width == 32 and not bfp32: - # defined to not modify FPSCR since the conversion is always exact - fpscr = FPSCRState(initial_fpscr) + FPRF = "+ Zero" + + # defined to not modify FPSCR since the conversion is always exact + fpscr_unmodified = inp_width == 32 and not bfp32 + + initial_fpscr, fpscr = self._fcvtfg_fpscr( + RN=RN, set_XX=set_XX, FR=FR, FPRF=FPRF, + fpscr_unmodified=fpscr_unmodified) if Rc: cr1 = int(fpscr.FX) << 3 cr1 |= int(fpscr.FEX) << 2 diff --git a/src/openpower/test/state.py b/src/openpower/test/state.py index 5d61c277..b2eb250a 100644 --- a/src/openpower/test/state.py +++ b/src/openpower/test/state.py @@ -66,9 +66,13 @@ class StateRunner: class StateSPRs: KEYS = tuple(i for i in SPRfull if i != SPRfull.XER) + __EMPTY_VALUES = {k: 0 for k in KEYS} def __init__(self, values=None): - self.__values = {k: 0 for k in StateSPRs.KEYS} + if isinstance(values, StateSPRs): + self.__values = values.__values.copy() + return + self.__values = self.__EMPTY_VALUES.copy() if values is not None: for k, v in values.items(): self[k] = v @@ -442,19 +446,34 @@ class ExpectedState(State): fp_regs = 32 if isinstance(fp_regs, int): fp_regs = [0] * fp_regs - self.fpregs = deepcopy(fp_regs) + else: + assert isinstance(fp_regs, list), \ + "fp_regs must be int | list[int] | None" + # don't use deepcopy, it's slow + fp_regs = fp_regs.copy() + self.fpregs = fp_regs self.fpscr = fpscr if int_regs is None: int_regs = 32 if isinstance(int_regs, int): int_regs = [0] * int_regs - self.intregs = deepcopy(int_regs) + else: + assert isinstance(int_regs, list), \ + "int_regs must be int | list[int] | None" + # don't use deepcopy, it's slow + int_regs = int_regs.copy() + self.intregs = int_regs self.pc = pc if crregs is None: crregs = 8 if isinstance(crregs, int): crregs = [0] * crregs - self.crregs = deepcopy(crregs) + else: + assert isinstance(crregs, list), \ + "crregs must be int | list[int] | None" + # don't use deepcopy, it's slow + crregs = crregs.copy() + self.crregs = crregs self.so = so self.ov = ov self.ca = ca