From dedd1ccc99e4ed7a63c95c04ab2bb16a91df1925 Mon Sep 17 00:00:00 2001
From: Luke Kenneth Casson Leighton <lkcl@lkcl.net>
Date: Fri, 7 May 2021 12:51:26 +0100
Subject: [PATCH] move LoadStore1 class to soc.fu.ldst.loadstore

---
 src/soc/fu/ldst/loadstore.py | 232 +++++++++++++++++++++++++++++++++++
 src/soc/fu/mmu/fsm.py        | 232 +----------------------------------
 2 files changed, 236 insertions(+), 228 deletions(-)
 create mode 100644 src/soc/fu/ldst/loadstore.py

diff --git a/src/soc/fu/ldst/loadstore.py b/src/soc/fu/ldst/loadstore.py
new file mode 100644
index 00000000..a2ad7a9a
--- /dev/null
+++ b/src/soc/fu/ldst/loadstore.py
@@ -0,0 +1,232 @@
+from nmigen import Elaboratable, Module, Signal, Shape, unsigned, Cat, Mux
+from nmigen import Record, Memory
+from nmigen import Const
+from nmutil.util import rising_edge
+
+from soc.experiment.dcache import DCache
+from soc.experiment.pimem import PortInterfaceBase
+from soc.experiment.mem_types import LoadStore1ToMMUType
+from soc.experiment.mem_types import MMUToLoadStore1Type
+
+from soc.minerva.wishbone import make_wb_layout
+from soc.bus.sram import SRAM
+
+
+# glue logic for microwatt mmu and dcache
+class LoadStore1(PortInterfaceBase):
+    def __init__(self, pspec):
+        self.pspec = pspec
+        self.disable_cache = (hasattr(pspec, "disable_cache") and
+                              pspec.disable_cache == True)
+        regwid = pspec.reg_wid
+        addrwid = pspec.addr_wid
+
+        super().__init__(regwid, addrwid)
+        self.dcache = DCache()
+        self.d_in  = self.dcache.d_in
+        self.d_out = self.dcache.d_out
+        self.l_in  = LoadStore1ToMMUType()
+        self.l_out = MMUToLoadStore1Type()
+        # TODO microwatt
+        self.mmureq = Signal()
+        self.derror = Signal()
+
+        # TODO, convert dcache wb_in/wb_out to "standard" nmigen Wishbone bus
+        self.dbus = Record(make_wb_layout(pspec))
+
+        # for creating a single clock blip to DCache
+        self.d_valid = Signal()
+        self.d_w_data = Signal(64) # XXX
+        self.d_w_valid = Signal()
+        self.d_validblip = Signal()
+
+        # DSISR and DAR cached values.  note that the MMU FSM is where
+        # these are accessed by OP_MTSPR/OP_MFSPR, on behalf of LoadStore1.
+        # by contrast microwatt has the spr set/get done *in* loadstore1.vhdl
+        self.dsisr = Signal(64)
+        self.dar = Signal(64)
+
+    def set_wr_addr(self, m, addr, mask):
+        # this gets complicated: actually a FSM is needed which
+        # first checks dcache, then if that fails (in virt mode)
+        # it checks the MMU instead.
+        #m.d.comb += self.l_in.valid.eq(1)
+        #m.d.comb += self.l_in.addr.eq(addr)
+        #m.d.comb += self.l_in.load.eq(0)
+        m.d.comb += self.d_in.load.eq(0)
+        m.d.comb += self.d_in.byte_sel.eq(mask)
+        m.d.comb += self.d_in.addr.eq(addr)
+        # option to disable the cache entirely for write
+        if self.disable_cache:
+            m.d.comb += self.d_in.nc.eq(1)
+        return None
+
+    def set_rd_addr(self, m, addr, mask):
+        # this gets complicated: actually a FSM is needed which
+        # first checks dcache, then if that fails (in virt mode)
+        # it checks the MMU instead.
+        #m.d.comb += self.l_in.valid.eq(1)
+        #m.d.comb += self.l_in.load.eq(1)
+        #m.d.comb += self.l_in.addr.eq(addr)
+        m.d.comb += self.d_valid.eq(1)
+        m.d.comb += self.d_in.valid.eq(self.d_validblip)
+        m.d.comb += self.d_in.load.eq(1)
+        m.d.comb += self.d_in.byte_sel.eq(mask)
+        m.d.comb += self.d_in.addr.eq(addr)
+        # BAD HACK! disable cacheing on LD when address is 0xCxxx_xxxx
+        # this is for peripherals. same thing done in Microwatt loadstore1.vhdl
+        with m.If(addr[28:] == Const(0xc, 4)):
+            m.d.comb += self.d_in.nc.eq(1)
+        # option to disable the cache entirely for read
+        if self.disable_cache:
+            m.d.comb += self.d_in.nc.eq(1)
+        return None #FIXME return value
+
+    def set_wr_data(self, m, data, wen):
+        # do the "blip" on write data
+        m.d.comb += self.d_valid.eq(1)
+        m.d.comb += self.d_in.valid.eq(self.d_validblip)
+        # put data into comb which is picked up in main elaborate()
+        m.d.comb += self.d_w_valid.eq(1)
+        m.d.comb += self.d_w_data.eq(data)
+        #m.d.sync += self.d_in.byte_sel.eq(wen) # this might not be needed
+        st_ok = self.d_out.valid # TODO indicates write data is valid
+        #st_ok = Const(1, 1)
+        return st_ok
+
+    def get_rd_data(self, m):
+        ld_ok = self.d_out.valid # indicates read data is valid
+        data = self.d_out.data   # actual read data
+        return data, ld_ok
+
+    """
+    if d_in.error = '1' then
+                if d_in.cache_paradox = '1' then
+                    -- signal an interrupt straight away
+                    exception := '1';
+                    dsisr(63 - 38) := not r2.req.load;
+                    -- XXX there is no architected bit for this
+                    -- (probably should be a machine check in fact)
+                    dsisr(63 - 35) := d_in.cache_paradox;
+                else
+                    -- Look up the translation for TLB miss
+                    -- and also for permission error and RC error
+                    -- in case the PTE has been updated.
+                    mmureq := '1';
+                    v.state := MMU_LOOKUP;
+                    v.stage1_en := '0';
+                end if;
+            end if;
+    """
+
+    def elaborate(self, platform):
+        m = super().elaborate(platform)
+        comb = m.d.comb
+
+        # create dcache module
+        m.submodules.dcache = dcache = self.dcache
+
+        # temp vars
+        d_out, l_out, dbus = self.d_out, self.l_out, self.dbus
+
+        with m.If(d_out.error):
+            with m.If(d_out.cache_paradox):
+                comb += self.derror.eq(1)
+                #  dsisr(63 - 38) := not r2.req.load;
+                #    -- XXX there is no architected bit for this
+                #    -- (probably should be a machine check in fact)
+                #    dsisr(63 - 35) := d_in.cache_paradox;
+            with m.Else():
+                # Look up the translation for TLB miss
+                # and also for permission error and RC error
+                # in case the PTE has been updated.
+                comb += self.mmureq.eq(1)
+                # v.state := MMU_LOOKUP;
+                # v.stage1_en := '0';
+
+        exc = self.pi.exc_o
+
+        #happened, alignment, instr_fault, invalid,
+        comb += exc.happened.eq(d_out.error | l_out.err)
+        comb += exc.invalid.eq(l_out.invalid)
+
+        #badtree, perm_error, rc_error, segment_fault
+        comb += exc.badtree.eq(l_out.badtree)
+        comb += exc.perm_error.eq(l_out.perm_error)
+        comb += exc.rc_error.eq(l_out.rc_error)
+        comb += exc.segment_fault.eq(l_out.segerr)
+
+        # TODO connect those signals somewhere
+        #print(d_out.valid)         -> no error
+        #print(d_out.store_done)    -> no error
+        #print(d_out.cache_paradox) -> ?
+        #print(l_out.done)          -> no error
+
+        # TODO some exceptions set SPRs
+
+        # TODO, connect dcache wb_in/wb_out to "standard" nmigen Wishbone bus
+        comb += dbus.adr.eq(dcache.wb_out.adr)
+        comb += dbus.dat_w.eq(dcache.wb_out.dat)
+        comb += dbus.sel.eq(dcache.wb_out.sel)
+        comb += dbus.cyc.eq(dcache.wb_out.cyc)
+        comb += dbus.stb.eq(dcache.wb_out.stb)
+        comb += dbus.we.eq(dcache.wb_out.we)
+
+        comb += dcache.wb_in.dat.eq(dbus.dat_r)
+        comb += dcache.wb_in.ack.eq(dbus.ack)
+        if hasattr(dbus, "stall"):
+            comb += dcache.wb_in.stall.eq(dbus.stall)
+
+        # create a blip (single pulse) on valid read/write request
+        m.d.comb += self.d_validblip.eq(rising_edge(m, self.d_valid))
+
+        # write out d data only when flag set
+        with m.If(self.d_w_valid):
+            m.d.sync += self.d_in.data.eq(self.d_w_data)
+        with m.Else():
+            m.d.sync += self.d_in.data.eq(0)
+
+        return m
+
+    def ports(self):
+        yield from super().ports()
+        # TODO: memory ports
+
+
+class TestSRAMLoadStore1(LoadStore1):
+    def __init__(self, pspec):
+        super().__init__(pspec)
+        pspec = self.pspec
+        # small 32-entry Memory
+        if (hasattr(pspec, "dmem_test_depth") and
+                isinstance(pspec.dmem_test_depth, int)):
+            depth = pspec.dmem_test_depth
+        else:
+            depth = 32
+        print("TestSRAMBareLoadStoreUnit depth", depth)
+
+        self.mem = Memory(width=pspec.reg_wid, depth=depth)
+
+    def elaborate(self, platform):
+        m = super().elaborate(platform)
+        comb = m.d.comb
+        m.submodules.sram = sram = SRAM(memory=self.mem, granularity=8,
+                                        features={'cti', 'bte', 'err'})
+        dbus = self.dbus
+
+        # directly connect the wishbone bus of LoadStoreUnitInterface to SRAM
+        # note: SRAM is a target (slave), dbus is initiator (master)
+        fanouts = ['dat_w', 'sel', 'cyc', 'stb', 'we', 'cti', 'bte']
+        fanins = ['dat_r', 'ack', 'err']
+        for fanout in fanouts:
+            print("fanout", fanout, getattr(sram.bus, fanout).shape(),
+                  getattr(dbus, fanout).shape())
+            comb += getattr(sram.bus, fanout).eq(getattr(dbus, fanout))
+            comb += getattr(sram.bus, fanout).eq(getattr(dbus, fanout))
+        for fanin in fanins:
+            comb += getattr(dbus, fanin).eq(getattr(sram.bus, fanin))
+        # connect address
+        comb += sram.bus.adr.eq(dbus.adr)
+
+        return m
+
diff --git a/src/soc/fu/mmu/fsm.py b/src/soc/fu/mmu/fsm.py
index 3eea1e9f..de84038b 100644
--- a/src/soc/fu/mmu/fsm.py
+++ b/src/soc/fu/mmu/fsm.py
@@ -6,241 +6,17 @@ from nmutil.singlepipe import ControlBase
 from nmutil.util import rising_edge
 
 from soc.experiment.mmu import MMU
-from soc.experiment.dcache import DCache
 
 from openpower.consts import MSR
 from openpower.decoder.power_fields import DecodeFields
 from openpower.decoder.power_fieldsn import SignalBitRange
 from openpower.decoder.power_decoder2 import decode_spr_num
-from openpower.decoder.power_enums import MicrOp, XER_bits
+from openpower.decoder.power_enums import MicrOp
 
-from soc.experiment.pimem import PortInterface
-from soc.experiment.pimem import PortInterfaceBase
+from soc.experiment.mem_types import LoadStore1ToMMUType
+from soc.experiment.mem_types import MMUToLoadStore1Type
 
-from soc.experiment.mem_types import LoadStore1ToDCacheType, LoadStore1ToMMUType
-from soc.experiment.mem_types import DCacheToLoadStore1Type, MMUToLoadStore1Type
-
-from soc.minerva.wishbone import make_wb_layout
-from soc.bus.sram import SRAM
-
-
-# glue logic for microwatt mmu and dcache
-class LoadStore1(PortInterfaceBase):
-    def __init__(self, pspec):
-        self.pspec = pspec
-        self.disable_cache = (hasattr(pspec, "disable_cache") and
-                              pspec.disable_cache == True)
-        regwid = pspec.reg_wid
-        addrwid = pspec.addr_wid
-
-        super().__init__(regwid, addrwid)
-        self.dcache = DCache()
-        self.d_in  = self.dcache.d_in
-        self.d_out = self.dcache.d_out
-        self.l_in  = LoadStore1ToMMUType()
-        self.l_out = MMUToLoadStore1Type()
-        # TODO microwatt
-        self.mmureq = Signal()
-        self.derror = Signal()
-
-        # TODO, convert dcache wb_in/wb_out to "standard" nmigen Wishbone bus
-        self.dbus = Record(make_wb_layout(pspec))
-
-        # for creating a single clock blip to DCache
-        self.d_valid = Signal()
-        self.d_w_data = Signal(64) # XXX
-        self.d_w_valid = Signal()
-        self.d_validblip = Signal()
-
-        # DSISR and DAR cached values.  note that the MMU FSM is where
-        # these are accessed by OP_MTSPR/OP_MFSPR, on behalf of LoadStore1.
-        # by contrast microwatt has the spr set/get done *in* loadstore1.vhdl
-        self.dsisr = Signal(64)
-        self.dar = Signal(64)
-
-    def set_wr_addr(self, m, addr, mask):
-        # this gets complicated: actually a FSM is needed which
-        # first checks dcache, then if that fails (in virt mode)
-        # it checks the MMU instead.
-        #m.d.comb += self.l_in.valid.eq(1)
-        #m.d.comb += self.l_in.addr.eq(addr)
-        #m.d.comb += self.l_in.load.eq(0)
-        m.d.comb += self.d_in.load.eq(0)
-        m.d.comb += self.d_in.byte_sel.eq(mask)
-        m.d.comb += self.d_in.addr.eq(addr)
-        # option to disable the cache entirely for write
-        if self.disable_cache:
-            m.d.comb += self.d_in.nc.eq(1)
-        return None
-
-    def set_rd_addr(self, m, addr, mask):
-        # this gets complicated: actually a FSM is needed which
-        # first checks dcache, then if that fails (in virt mode)
-        # it checks the MMU instead.
-        #m.d.comb += self.l_in.valid.eq(1)
-        #m.d.comb += self.l_in.load.eq(1)
-        #m.d.comb += self.l_in.addr.eq(addr)
-        m.d.comb += self.d_valid.eq(1)
-        m.d.comb += self.d_in.valid.eq(self.d_validblip)
-        m.d.comb += self.d_in.load.eq(1)
-        m.d.comb += self.d_in.byte_sel.eq(mask)
-        m.d.comb += self.d_in.addr.eq(addr)
-        # BAD HACK! disable cacheing on LD when address is 0xCxxx_xxxx
-        # this is for peripherals. same thing done in Microwatt loadstore1.vhdl
-        with m.If(addr[28:] == Const(0xc, 4)):
-            m.d.comb += self.d_in.nc.eq(1)
-        # option to disable the cache entirely for read
-        if self.disable_cache:
-            m.d.comb += self.d_in.nc.eq(1)
-        return None #FIXME return value
-
-    def set_wr_data(self, m, data, wen):
-        # do the "blip" on write data
-        m.d.comb += self.d_valid.eq(1)
-        m.d.comb += self.d_in.valid.eq(self.d_validblip)
-        # put data into comb which is picked up in main elaborate()
-        m.d.comb += self.d_w_valid.eq(1)
-        m.d.comb += self.d_w_data.eq(data)
-        #m.d.sync += self.d_in.byte_sel.eq(wen) # this might not be needed
-        st_ok = self.d_out.valid # TODO indicates write data is valid
-        #st_ok = Const(1, 1)
-        return st_ok
-
-    def get_rd_data(self, m):
-        ld_ok = self.d_out.valid # indicates read data is valid
-        data = self.d_out.data   # actual read data
-        return data, ld_ok
-
-    """
-    if d_in.error = '1' then
-                if d_in.cache_paradox = '1' then
-                    -- signal an interrupt straight away
-                    exception := '1';
-                    dsisr(63 - 38) := not r2.req.load;
-                    -- XXX there is no architected bit for this
-                    -- (probably should be a machine check in fact)
-                    dsisr(63 - 35) := d_in.cache_paradox;
-                else
-                    -- Look up the translation for TLB miss
-                    -- and also for permission error and RC error
-                    -- in case the PTE has been updated.
-                    mmureq := '1';
-                    v.state := MMU_LOOKUP;
-                    v.stage1_en := '0';
-                end if;
-            end if;
-    """
-
-    def elaborate(self, platform):
-        m = super().elaborate(platform)
-        comb = m.d.comb
-
-        # create dcache module
-        m.submodules.dcache = dcache = self.dcache
-
-        # temp vars
-        d_out, l_out, dbus = self.d_out, self.l_out, self.dbus
-
-        with m.If(d_out.error):
-            with m.If(d_out.cache_paradox):
-                comb += self.derror.eq(1)
-                #  dsisr(63 - 38) := not r2.req.load;
-                #    -- XXX there is no architected bit for this
-                #    -- (probably should be a machine check in fact)
-                #    dsisr(63 - 35) := d_in.cache_paradox;
-            with m.Else():
-                # Look up the translation for TLB miss
-                # and also for permission error and RC error
-                # in case the PTE has been updated.
-                comb += self.mmureq.eq(1)
-                # v.state := MMU_LOOKUP;
-                # v.stage1_en := '0';
-
-        exc = self.pi.exc_o
-
-        #happened, alignment, instr_fault, invalid,
-        comb += exc.happened.eq(d_out.error | l_out.err)
-        comb += exc.invalid.eq(l_out.invalid)
-
-        #badtree, perm_error, rc_error, segment_fault
-        comb += exc.badtree.eq(l_out.badtree)
-        comb += exc.perm_error.eq(l_out.perm_error)
-        comb += exc.rc_error.eq(l_out.rc_error)
-        comb += exc.segment_fault.eq(l_out.segerr)
-
-        # TODO connect those signals somewhere
-        #print(d_out.valid)         -> no error
-        #print(d_out.store_done)    -> no error
-        #print(d_out.cache_paradox) -> ?
-        #print(l_out.done)          -> no error
-
-        # TODO some exceptions set SPRs
-
-        # TODO, connect dcache wb_in/wb_out to "standard" nmigen Wishbone bus
-        comb += dbus.adr.eq(dcache.wb_out.adr)
-        comb += dbus.dat_w.eq(dcache.wb_out.dat)
-        comb += dbus.sel.eq(dcache.wb_out.sel)
-        comb += dbus.cyc.eq(dcache.wb_out.cyc)
-        comb += dbus.stb.eq(dcache.wb_out.stb)
-        comb += dbus.we.eq(dcache.wb_out.we)
-
-        comb += dcache.wb_in.dat.eq(dbus.dat_r)
-        comb += dcache.wb_in.ack.eq(dbus.ack)
-        if hasattr(dbus, "stall"):
-            comb += dcache.wb_in.stall.eq(dbus.stall)
-
-        # create a blip (single pulse) on valid read/write request
-        m.d.comb += self.d_validblip.eq(rising_edge(m, self.d_valid))
-
-        # write out d data only when flag set
-        with m.If(self.d_w_valid):
-            m.d.sync += self.d_in.data.eq(self.d_w_data)
-        with m.Else():
-            m.d.sync += self.d_in.data.eq(0)
-
-        return m
-
-    def ports(self):
-        yield from super().ports()
-        # TODO: memory ports
-
-
-class TestSRAMLoadStore1(LoadStore1):
-    def __init__(self, pspec):
-        super().__init__(pspec)
-        pspec = self.pspec
-        # small 32-entry Memory
-        if (hasattr(pspec, "dmem_test_depth") and
-                isinstance(pspec.dmem_test_depth, int)):
-            depth = pspec.dmem_test_depth
-        else:
-            depth = 32
-        print("TestSRAMBareLoadStoreUnit depth", depth)
-
-        self.mem = Memory(width=pspec.reg_wid, depth=depth)
-
-    def elaborate(self, platform):
-        m = super().elaborate(platform)
-        comb = m.d.comb
-        m.submodules.sram = sram = SRAM(memory=self.mem, granularity=8,
-                                        features={'cti', 'bte', 'err'})
-        dbus = self.dbus
-
-        # directly connect the wishbone bus of LoadStoreUnitInterface to SRAM
-        # note: SRAM is a target (slave), dbus is initiator (master)
-        fanouts = ['dat_w', 'sel', 'cyc', 'stb', 'we', 'cti', 'bte']
-        fanins = ['dat_r', 'ack', 'err']
-        for fanout in fanouts:
-            print("fanout", fanout, getattr(sram.bus, fanout).shape(),
-                  getattr(dbus, fanout).shape())
-            comb += getattr(sram.bus, fanout).eq(getattr(dbus, fanout))
-            comb += getattr(sram.bus, fanout).eq(getattr(dbus, fanout))
-        for fanin in fanins:
-            comb += getattr(dbus, fanin).eq(getattr(sram.bus, fanin))
-        # connect address
-        comb += sram.bus.adr.eq(dbus.adr)
-
-        return m
+from fu.ldst.loadstore import LoadStore1, TestSRAMLoadStore1
 
 
 class FSMMMUStage(ControlBase):
-- 
2.30.2