src/soc/fu/ldst/loadstore.py

   1 """LoadStore1 FSM.
   2
   3 based on microwatt loadstore1.vhdl, but conforming to PortInterface.
   4 unlike loadstore1.vhdl this does *not* deal with actual Load/Store
   5 ops: that job is handled by LDSTCompUnit, which talks to LoadStore1
   6 by way of PortInterface.  PortInterface is where things need extending,
   7 such as adding dcbz support, etc.
   8
   9 this module basically handles "pure" load / store operations, and
  10 its first job is to ask the D-Cache for the data.  if that fails,
  11 the second task (if virtual memory is enabled) is to ask the MMU
  12 to perform a TLB, then to go *back* to the cache and ask again.
  13
  14 Links:
  15
  16 * https://bugs.libre-soc.org/show_bug.cgi?id=465
  17
  18 """
  19
  20 from nmigen import (Elaboratable, Module, Signal, Shape, unsigned, Cat, Mux,
  21                     Record, Memory,
  22                     Const)
  23 from nmutil.iocontrol import RecordObject
  24 from nmutil.util import rising_edge, Display
  25 from enum import Enum, unique
  26
  27 from soc.experiment.dcache import DCache
  28 from soc.experiment.pimem import PortInterfaceBase
  29 from soc.experiment.mem_types import LoadStore1ToMMUType
  30 from soc.experiment.mem_types import MMUToLoadStore1Type
  31
  32 from soc.minerva.wishbone import make_wb_layout
  33 from soc.bus.sram import SRAM
  34 from nmutil.util import Display
  35
  36
  37 @unique
  38 class State(Enum):
  39     IDLE = 0       # ready for instruction
  40     ACK_WAIT = 1   # waiting for ack from dcache
  41     MMU_LOOKUP = 2 # waiting for MMU to look up translation
  42     TLBIE_WAIT = 3 # waiting for MMU to finish doing a tlbie
  43
  44
  45 # captures the LDSTRequest from the PortInterface, which "blips" most
  46 # of this at us (pipeline-style).
  47 class LDSTRequest(RecordObject):
  48     def __init__(self, name=None):
  49         RecordObject.__init__(self, name=name)
  50
  51         self.load          = Signal()
  52         self.dcbz          = Signal()
  53         self.addr          = Signal(64)
  54         # self.store_data    = Signal(64) # this is already sync (on a delay)
  55         self.byte_sel      = Signal(8)
  56         self.nc            = Signal()              # non-cacheable access
  57         self.virt_mode     = Signal()
  58         self.priv_mode     = Signal()
  59         self.align_intr    = Signal()
  60
  61 # glue logic for microwatt mmu and dcache
  62 class LoadStore1(PortInterfaceBase):
  63     def __init__(self, pspec):
  64         self.pspec = pspec
  65         self.disable_cache = (hasattr(pspec, "disable_cache") and
  66                               pspec.disable_cache == True)
  67         regwid = pspec.reg_wid
  68         addrwid = pspec.addr_wid
  69
  70         super().__init__(regwid, addrwid)
  71         self.dcache = DCache()
  72         # these names are from the perspective of here (LoadStore1)
  73         self.d_out  = self.dcache.d_in     # in to dcache is out for LoadStore
  74         self.d_in = self.dcache.d_out      # out from dcache is in for LoadStore
  75         self.m_out  = LoadStore1ToMMUType() # out *to* MMU
  76         self.m_in = MMUToLoadStore1Type()   # in *from* MMU
  77         self.req = LDSTRequest(name="ldst_req")
  78
  79         # TODO, convert dcache wb_in/wb_out to "standard" nmigen Wishbone bus
  80         self.dbus = Record(make_wb_layout(pspec))
  81
  82         # for creating a single clock blip to DCache
  83         self.d_valid = Signal()
  84         self.d_w_valid = Signal()
  85         self.d_validblip = Signal()
  86
  87         # DSISR and DAR cached values.  note that the MMU FSM is where
  88         # these are accessed by OP_MTSPR/OP_MFSPR, on behalf of LoadStore1.
  89         # by contrast microwatt has the spr set/get done *in* loadstore1.vhdl
  90         self.dsisr = Signal(64)
  91         self.dar = Signal(64)
  92
  93         # state info for LD/ST
  94         self.done          = Signal()
  95         # latch most of the input request
  96         self.load          = Signal()
  97         self.tlbie         = Signal()
  98         self.dcbz          = Signal()
  99         self.addr          = Signal(64)
 100         self.store_data    = Signal(64)
 101         self.load_data     = Signal(64)
 102         self.byte_sel      = Signal(8)
 103         #self.xerc         : xer_common_t;
 104         #self.reserve       = Signal()
 105         #self.atomic        = Signal()
 106         #self.atomic_last   = Signal()
 107         #self.rc            = Signal()
 108         self.nc            = Signal()              # non-cacheable access
 109         self.virt_mode     = Signal()
 110         self.priv_mode     = Signal()
 111         self.state        = Signal(State)
 112         self.instr_fault   = Signal()
 113         self.align_intr    = Signal()
 114         self.busy          = Signal()
 115         self.wait_dcache   = Signal()
 116         self.wait_mmu      = Signal()
 117         #self.mode_32bit    = Signal()
 118         #self.intr_vec     : integer range 0 to 16#fff#;
 119         #self.nia           = Signal(64)
 120         #self.srr1          = Signal(16)
 121
 122     def set_wr_addr(self, m, addr, mask, misalign, msr_pr):
 123         m.d.comb += self.req.load.eq(0) # store operation
 124         m.d.comb += self.req.byte_sel.eq(mask)
 125         m.d.comb += self.req.addr.eq(addr)
 126         m.d.comb += self.req.priv_mode.eq(~msr_pr) # not-problem  ==> priv
 127         m.d.comb += self.req.virt_mode.eq(msr_pr) # problem-state ==> virt
 128         m.d.comb += self.req.align_intr.eq(misalign)
 129
 130         dcbz = self.pi.is_dcbz
 131         m.d.comb += Display("is_dcbz %x",dcbz)
 132         m.d.comb += self.req.dcbz.eq(dcbz)
 133
 134         # option to disable the cache entirely for write
 135         if self.disable_cache:
 136             m.d.comb += self.req.nc.eq(1)
 137         return None
 138
 139     def set_rd_addr(self, m, addr, mask, misalign, msr_pr):
 140         m.d.comb += self.d_valid.eq(1)
 141         m.d.comb += self.req.load.eq(1) # load operation
 142         m.d.comb += self.req.byte_sel.eq(mask)
 143         m.d.comb += self.req.align_intr.eq(misalign)
 144         m.d.comb += self.req.addr.eq(addr)
 145         m.d.comb += self.req.priv_mode.eq(~msr_pr) # not-problem  ==> priv
 146         m.d.comb += self.req.virt_mode.eq(msr_pr) # problem-state ==> virt
 147         # BAD HACK! disable cacheing on LD when address is 0xCxxx_xxxx
 148         # this is for peripherals. same thing done in Microwatt loadstore1.vhdl
 149         with m.If(addr[28:] == Const(0xc, 4)):
 150             m.d.comb += self.req.nc.eq(1)
 151         # option to disable the cache entirely for read
 152         if self.disable_cache:
 153             m.d.comb += self.req.nc.eq(1)
 154         return None #FIXME return value
 155
 156     def set_wr_data(self, m, data, wen):
 157         # do the "blip" on write data
 158         m.d.comb += self.d_valid.eq(1)
 159         # put data into comb which is picked up in main elaborate()
 160         m.d.comb += self.d_w_valid.eq(1)
 161         m.d.comb += self.store_data.eq(data)
 162         #m.d.sync += self.d_out.byte_sel.eq(wen) # this might not be needed
 163         st_ok = self.done # TODO indicates write data is valid
 164         return st_ok
 165
 166     def get_rd_data(self, m):
 167         ld_ok = self.done     # indicates read data is valid
 168         data = self.load_data # actual read data
 169         return data, ld_ok
 170
 171     def elaborate(self, platform):
 172         m = super().elaborate(platform)
 173         comb, sync = m.d.comb, m.d.sync
 174
 175         # create dcache module
 176         m.submodules.dcache = dcache = self.dcache
 177
 178         # temp vars
 179         d_out, d_in, dbus = self.d_out, self.d_in, self.dbus
 180         m_out, m_in = self.m_out, self.m_in
 181         exc = self.pi.exc_o
 182         exception = exc.happened
 183         mmureq = Signal()
 184
 185         # copy of address, but gets over-ridden for OP_FETCH_FAILED
 186         maddr = Signal(64)
 187         m.d.comb += maddr.eq(self.addr)
 188
 189         # create a blip (single pulse) on valid read/write request
 190         # this can be over-ridden in the FSM to get dcache to re-run
 191         # a request when MMU_LOOKUP completes.
 192         m.d.comb += self.d_validblip.eq(rising_edge(m, self.d_valid))
 193         ldst_r = LDSTRequest("ldst_r")
 194
 195         # fsm skeleton
 196         with m.Switch(self.state):
 197             with m.Case(State.IDLE):
 198                 with m.If(self.d_validblip & ~exc.happened):
 199                     comb += self.busy.eq(1)
 200                     sync += self.state.eq(State.ACK_WAIT)
 201                     sync += ldst_r.eq(self.req) # copy of LDSTRequest on "blip"
 202                 with m.Else():
 203                     sync += ldst_r.eq(0)
 204
 205             # waiting for completion
 206             with m.Case(State.ACK_WAIT):
 207                 comb += self.busy.eq(~exc.happened)
 208
 209                 with m.If(d_in.error):
 210                     # cache error is not necessarily "final", it could
 211                     # be that it was just a TLB miss
 212                     with m.If(d_in.cache_paradox):
 213                         comb += exception.eq(1)
 214                         sync += self.state.eq(State.IDLE)
 215                         sync += ldst_r.eq(0)
 216                         sync += self.dsisr[63 - 38].eq(~self.load)
 217                         # XXX there is no architected bit for this
 218                         # (probably should be a machine check in fact)
 219                         sync += self.dsisr[63 - 35].eq(d_in.cache_paradox)
 220
 221                     with m.Else():
 222                         # Look up the translation for TLB miss
 223                         # and also for permission error and RC error
 224                         # in case the PTE has been updated.
 225                         comb += mmureq.eq(1)
 226                         sync += self.state.eq(State.MMU_LOOKUP)
 227                 with m.If(d_in.valid):
 228                     m.d.comb += self.done.eq(~mmureq) # done if not doing MMU
 229                     with m.If(self.done):
 230                         sync += Display("ACK_WAIT, done %x", self.addr)
 231                     sync += self.state.eq(State.IDLE)
 232                     sync += ldst_r.eq(0)
 233                     with m.If(self.load):
 234                         m.d.comb += self.load_data.eq(d_in.data)
 235
 236             # waiting here for the MMU TLB lookup to complete.
 237             # either re-try the dcache lookup or throw MMU exception
 238             with m.Case(State.MMU_LOOKUP):
 239                 comb += self.busy.eq(1)
 240                 with m.If(m_in.done):
 241                     with m.If(~self.instr_fault):
 242                         sync += Display("MMU_LOOKUP, done %x -> %x",
 243                                         self.addr, d_out.addr)
 244                         # retry the request now that the MMU has
 245                         # installed a TLB entry, if not exception raised
 246                         m.d.comb += self.d_out.valid.eq(~exception)
 247                         sync += self.state.eq(State.ACK_WAIT)
 248                         sync += ldst_r.eq(0)
 249                     with m.Else():
 250                         sync += Display("MMU_LOOKUP, exception %x", self.addr)
 251                         # instruction lookup fault: store address in DAR
 252                         comb += exc.happened.eq(1)
 253                         sync += self.dar.eq(self.addr)
 254
 255                 with m.If(m_in.err):
 256                     # MMU RADIX exception thrown
 257                     comb += exception.eq(1)
 258                     sync += self.dsisr[63 - 33].eq(m_in.invalid)
 259                     sync += self.dsisr[63 - 36].eq(m_in.perm_error)
 260                     sync += self.dsisr[63 - 38].eq(self.load)
 261                     sync += self.dsisr[63 - 44].eq(m_in.badtree)
 262                     sync += self.dsisr[63 - 45].eq(m_in.rc_error)
 263
 264             with m.Case(State.TLBIE_WAIT):
 265                 pass
 266
 267         # alignment error: store address in DAR
 268         with m.If(self.align_intr):
 269             comb += exc.happened.eq(1)
 270             sync += self.dar.eq(self.addr)
 271
 272         # happened, alignment, instr_fault, invalid.
 273         # note that all of these flow through - eventually to the TRAP
 274         # pipeline, via PowerDecoder2.
 275         comb += exc.invalid.eq(m_in.invalid)
 276         comb += exc.alignment.eq(self.align_intr)
 277         comb += exc.instr_fault.eq(self.instr_fault)
 278         # badtree, perm_error, rc_error, segment_fault
 279         comb += exc.badtree.eq(m_in.badtree)
 280         comb += exc.perm_error.eq(m_in.perm_error)
 281         comb += exc.rc_error.eq(m_in.rc_error)
 282         comb += exc.segment_fault.eq(m_in.segerr)
 283
 284         # TODO, connect dcache wb_in/wb_out to "standard" nmigen Wishbone bus
 285         comb += dbus.adr.eq(dcache.wb_out.adr)
 286         comb += dbus.dat_w.eq(dcache.wb_out.dat)
 287         comb += dbus.sel.eq(dcache.wb_out.sel)
 288         comb += dbus.cyc.eq(dcache.wb_out.cyc)
 289         comb += dbus.stb.eq(dcache.wb_out.stb)
 290         comb += dbus.we.eq(dcache.wb_out.we)
 291
 292         comb += dcache.wb_in.dat.eq(dbus.dat_r)
 293         comb += dcache.wb_in.ack.eq(dbus.ack)
 294         if hasattr(dbus, "stall"):
 295             comb += dcache.wb_in.stall.eq(dbus.stall)
 296
 297         # update out d data when flag set
 298         with m.If(self.d_w_valid):
 299             m.d.sync += d_out.data.eq(self.store_data)
 300         #with m.Else():
 301         #    m.d.sync += d_out.data.eq(0)
 302         # unit test passes with that change
 303
 304         # this must move into the FSM, conditionally noticing that
 305         # the "blip" comes from self.d_validblip.
 306         # task 1: look up in dcache
 307         # task 2: if dcache fails, look up in MMU.
 308         # do **NOT** confuse the two.
 309         with m.If(self.d_validblip):
 310             m.d.comb += self.d_out.valid.eq(~exc.happened)
 311             m.d.comb += d_out.load.eq(self.req.load)
 312             m.d.comb += d_out.byte_sel.eq(self.req.byte_sel)
 313             m.d.comb += self.addr.eq(self.req.addr)
 314             m.d.comb += d_out.nc.eq(self.req.nc)
 315             m.d.comb += d_out.priv_mode.eq(self.req.priv_mode)
 316             m.d.comb += d_out.virt_mode.eq(self.req.virt_mode)
 317             m.d.comb += self.align_intr.eq(self.req.align_intr)
 318         with m.Else():
 319             m.d.comb += d_out.load.eq(ldst_r.load)
 320             m.d.comb += d_out.byte_sel.eq(ldst_r.byte_sel)
 321             m.d.comb += self.addr.eq(ldst_r.addr)
 322             m.d.comb += d_out.nc.eq(ldst_r.nc)
 323             m.d.comb += d_out.priv_mode.eq(ldst_r.priv_mode)
 324             m.d.comb += d_out.virt_mode.eq(ldst_r.virt_mode)
 325             m.d.comb += self.align_intr.eq(ldst_r.align_intr)
 326
 327         # XXX these should be possible to remove but for some reason
 328         # cannot be... yet. TODO, investigate
 329         m.d.comb += self.load_data.eq(d_in.data)
 330         m.d.comb += d_out.addr.eq(self.addr)
 331
 332         # Update outputs to MMU
 333         m.d.comb += m_out.valid.eq(mmureq)
 334         m.d.comb += m_out.iside.eq(self.instr_fault)
 335         m.d.comb += m_out.load.eq(ldst_r.load)
 336         # m_out.priv <= r.priv_mode; TODO
 337         m.d.comb += m_out.tlbie.eq(self.tlbie)
 338         # m_out.mtspr <= mmu_mtspr; # TODO
 339         # m_out.sprn <= sprn; # TODO
 340         m.d.comb += m_out.addr.eq(maddr)
 341         # m_out.slbia <= l_in.insn(7); # TODO: no idea what this is
 342         # m_out.rs <= l_in.data; # nope, probably not needed, TODO investigate
 343
 344         return m
 345
 346     def ports(self):
 347         yield from super().ports()
 348         # TODO: memory ports
 349
 350
 351 class TestSRAMLoadStore1(LoadStore1):
 352     def __init__(self, pspec):
 353         super().__init__(pspec)
 354         pspec = self.pspec
 355         # small 32-entry Memory
 356         if (hasattr(pspec, "dmem_test_depth") and
 357                 isinstance(pspec.dmem_test_depth, int)):
 358             depth = pspec.dmem_test_depth
 359         else:
 360             depth = 32
 361         print("TestSRAMBareLoadStoreUnit depth", depth)
 362
 363         self.mem = Memory(width=pspec.reg_wid, depth=depth)
 364
 365     def elaborate(self, platform):
 366         m = super().elaborate(platform)
 367         comb = m.d.comb
 368         m.submodules.sram = sram = SRAM(memory=self.mem, granularity=8,
 369                                         features={'cti', 'bte', 'err'})
 370         dbus = self.dbus
 371
 372         # directly connect the wishbone bus of LoadStoreUnitInterface to SRAM
 373         # note: SRAM is a target (slave), dbus is initiator (master)
 374         fanouts = ['dat_w', 'sel', 'cyc', 'stb', 'we', 'cti', 'bte']
 375         fanins = ['dat_r', 'ack', 'err']
 376         for fanout in fanouts:
 377             print("fanout", fanout, getattr(sram.bus, fanout).shape(),
 378                   getattr(dbus, fanout).shape())
 379             comb += getattr(sram.bus, fanout).eq(getattr(dbus, fanout))
 380             comb += getattr(sram.bus, fanout).eq(getattr(dbus, fanout))
 381         for fanin in fanins:
 382             comb += getattr(dbus, fanin).eq(getattr(sram.bus, fanin))
 383         # connect address
 384         comb += sram.bus.adr.eq(dbus.adr)
 385
 386         return m
 387