deepcopy is really slow and unnecessary here
authorJacob Lifshay <programmerjake@gmail.com>
Wed, 14 Jun 2023 05:56:10 +0000 (22:56 -0700)
committerJacob Lifshay <programmerjake@gmail.com>
Wed, 14 Jun 2023 05:56:10 +0000 (22:56 -0700)
src/openpower/decoder/isa/caller.py
src/openpower/decoder/power_decoder.py
src/openpower/decoder/power_enums.py
src/openpower/fpscr.py
src/openpower/test/fmv_fcvt/fmv_fcvt.py
src/openpower/test/state.py

index 56560433c5141c8ba3d1096700ccbca8cfee4842..b5688c920a0540615470f7b41d65ebc27878c2f8 100644 (file)
@@ -1679,6 +1679,14 @@ class ISACaller(ISACallerHelper, ISAFPHelpers, StepLoop):
         pc, insn = self.get_next_insn()
         yield from self.setup_next_insn(pc, insn)
 
+    # cache since it's really slow to construct
+    __PREFIX_CACHE = SVP64Instruction.Prefix(SelectableInt(value=0, bits=32))
+
+    def __decode_prefix(self, opcode):
+        pfx = self.__PREFIX_CACHE
+        pfx.storage.eq(opcode)
+        return pfx
+
     def setup_next_insn(self, pc, ins):
         """set up next instruction
         """
@@ -1698,7 +1706,7 @@ class ISACaller(ISACallerHelper, ISAFPHelpers, StepLoop):
         yield Settle()
         opcode = yield self.dec2.dec.opcode_in
         opcode = SelectableInt(value=opcode, bits=32)
-        pfx = SVP64Instruction.Prefix(opcode)
+        pfx = self.__decode_prefix(opcode)
         log("prefix test: opcode:", pfx.PO, bin(pfx.PO), pfx.id)
         self.is_svp64_mode = bool((pfx.PO == 0b000001) and (pfx.id == 0b11))
         self.pc.update_nia(self.is_svp64_mode)
index 9389dd3aa3c0c2dbe0d7ed4cae510202bc35422e..81fc1db071117c5a1f03e739ba698dae05f0426d 100644 (file)
@@ -496,7 +496,8 @@ class PowerDecoder(Elaboratable):
 
         #print("submodules", self.pname, submodules)
 
-        gc.collect()
+        # GC collection is really slow and shouldn't be needed
+        # gc.collect()
         return self.actually_does_something
 
     def handle_subdecoders(self, switch_case, submodules, d):
index 6e32fb996d38d30a18446fdbff65c6876b246998..fb90b135d23c1bec0cbbdfaf42f238de4718aed4 100644 (file)
@@ -40,6 +40,12 @@ def find_wiki_file(name):
 
 
 def get_csv(name):
+    retval = _get_csv(name)
+    return [i.copy() for i in retval]
+
+
+@functools.lru_cache()
+def _get_csv(name):
     """gets a not-entirely-csv-file-formatted database, which allows comments
     """
     file_path = find_wiki_file(name)
index d4e8a1d7612a6b12f33ef8b8fe8e4e1880db5905..edfcfacacb0228a45159ebf244fff92b07af1a5c 100644 (file)
@@ -136,9 +136,7 @@ class FPSCRState(SelectableInt):
         offs = 0
         # set up sub-fields from Record layout
         self.fsi = {}
-        l = deepcopy(FPSCRRecord.layout)
-        l.reverse()
-        for field, width in l:
+        for field, width in reversed(FPSCRRecord.layout):
             if field == "FPRF":
                 v = FPSCR_FPRF(self, tuple(range(47, 52)))
                 end = 52
index 64cbc111ff6606aacfd0ba0c948bf86e3be623ff..578536eca0e7f81d6a6389d1afedc26e3f7262a9 100644 (file)
@@ -450,6 +450,23 @@ class FMvFCvtCases(TestAccumulatorBase):
         self.toint(-(2**64), 0, signed=False, _32bit=False)
         self.toint(-fp_bits_add(2**64, 1), signed=False, _32bit=False)
 
+    @staticmethod
+    @functools.lru_cache(maxsize=None)
+    def _fcvtfg_fpscr(RN, set_XX, FR, FPRF, fpscr_unmodified):
+        """ cached FPSCR computation for fcvtfg_one since that part is slow """
+        initial_fpscr = FPSCRState()
+        initial_fpscr.RN = RN
+        fpscr = FPSCRState(initial_fpscr)
+        if set_XX:
+            fpscr.XX = 1
+            fpscr.FX = 1
+            fpscr.FI = 1
+            fpscr.FR = FR
+        fpscr.FPRF = FPRF
+        if fpscr_unmodified:
+            fpscr = FPSCRState(initial_fpscr)
+        return initial_fpscr, fpscr
+
     def fcvtfg_one(self, inp, bfp32, IT, Rc, RN):
         inp %= 2 ** 64
         inp_width = 64 if IT & 0b10 else 32
@@ -505,24 +522,24 @@ class FMvFCvtCases(TestAccumulatorBase):
             else:
                 expected_fp = next_fp
         expected_bits = bitcast_fp_to_int(expected_fp, bfp32=False)
-        initial_fpscr = FPSCRState()
-        initial_fpscr.RN = RN
-        fpscr = FPSCRState(initial_fpscr)
+        set_XX = FR = False
         if expected_fp != inp_value:
-            fpscr.XX = 1
-            fpscr.FX = 1
-            fpscr.FI = 1
-            fpscr.FR = abs(expected_fp) > abs(inp_value)
+            set_XX = True
+            FR = abs(expected_fp) > abs(inp_value)
         if expected_fp < 0:
-            fpscr.FPRF = "- Normal Number"
+            FPRF = "- Normal Number"
         elif expected_fp > 0:
-            fpscr.FPRF = "+ Normal Number"
+            FPRF = "+ Normal Number"
         else:
             # integer conversion never gives -0.0
-            fpscr.FPRF = "+ Zero"
-        if inp_width == 32 and not bfp32:
-            # defined to not modify FPSCR since the conversion is always exact
-            fpscr = FPSCRState(initial_fpscr)
+            FPRF = "+ Zero"
+
+        # defined to not modify FPSCR since the conversion is always exact
+        fpscr_unmodified = inp_width == 32 and not bfp32
+
+        initial_fpscr, fpscr = self._fcvtfg_fpscr(
+            RN=RN, set_XX=set_XX, FR=FR, FPRF=FPRF,
+            fpscr_unmodified=fpscr_unmodified)
         if Rc:
             cr1 = int(fpscr.FX) << 3
             cr1 |= int(fpscr.FEX) << 2
index 5d61c27753a2d7754c5a590834a71d70ea53dcdb..b2eb250ad9a4ce90e192e646fae748f91a89094c 100644 (file)
@@ -66,9 +66,13 @@ class StateRunner:
 
 class StateSPRs:
     KEYS = tuple(i for i in SPRfull if i != SPRfull.XER)
+    __EMPTY_VALUES = {k: 0 for k in KEYS}
 
     def __init__(self, values=None):
-        self.__values = {k: 0 for k in StateSPRs.KEYS}
+        if isinstance(values, StateSPRs):
+            self.__values = values.__values.copy()
+            return
+        self.__values = self.__EMPTY_VALUES.copy()
         if values is not None:
             for k, v in values.items():
                 self[k] = v
@@ -442,19 +446,34 @@ class ExpectedState(State):
             fp_regs = 32
         if isinstance(fp_regs, int):
             fp_regs = [0] * fp_regs
-        self.fpregs = deepcopy(fp_regs)
+        else:
+            assert isinstance(fp_regs, list), \
+                "fp_regs must be int | list[int] | None"
+            # don't use deepcopy, it's slow
+            fp_regs = fp_regs.copy()
+        self.fpregs = fp_regs
         self.fpscr = fpscr
         if int_regs is None:
             int_regs = 32
         if isinstance(int_regs, int):
             int_regs = [0] * int_regs
-        self.intregs = deepcopy(int_regs)
+        else:
+            assert isinstance(int_regs, list), \
+                "int_regs must be int | list[int] | None"
+            # don't use deepcopy, it's slow
+            int_regs = int_regs.copy()
+        self.intregs = int_regs
         self.pc = pc
         if crregs is None:
             crregs = 8
         if isinstance(crregs, int):
             crregs = [0] * crregs
-        self.crregs = deepcopy(crregs)
+        else:
+            assert isinstance(crregs, list), \
+                "crregs must be int | list[int] | None"
+            # don't use deepcopy, it's slow
+            crregs = crregs.copy()
+        self.crregs = crregs
         self.so = so
         self.ov = ov
         self.ca = ca