src/soc/fu/ldst/loadstore.py

   1 """LoadStore1 FSM.
   2
   3 based on microwatt loadstore1.vhdl, but conforming to PortInterface.
   4 unlike loadstore1.vhdl this does *not* deal with actual Load/Store
   5 ops: that job is handled by LDSTCompUnit, which talks to LoadStore1
   6 by way of PortInterface.  PortInterface is where things need extending,
   7 such as adding dcbz support, etc.
   8
   9 this module basically handles "pure" load / store operations, and
  10 its first job is to ask the D-Cache for the data.  if that fails,
  11 the second task (if virtual memory is enabled) is to ask the MMU
  12 to perform a TLB, then to go *back* to the cache and ask again.
  13
  14 Links:
  15
  16 * https://bugs.libre-soc.org/show_bug.cgi?id=465
  17
  18 """
  19
  20 from nmigen import (Elaboratable, Module, Signal, Shape, unsigned, Cat, Mux,
  21                     Record, Memory,
  22                     Const)
  23 from nmutil.iocontrol import RecordObject
  24 from nmutil.util import rising_edge, Display
  25 from enum import Enum, unique
  26
  27 from soc.experiment.dcache import DCache
  28 from soc.experiment.pimem import PortInterfaceBase
  29 from soc.experiment.mem_types import LoadStore1ToMMUType
  30 from soc.experiment.mem_types import MMUToLoadStore1Type
  31
  32 from soc.minerva.wishbone import make_wb_layout
  33 from soc.bus.sram import SRAM
  34 from nmutil.util import Display
  35
  36
  37 @unique
  38 class State(Enum):
  39     IDLE = 0       # ready for instruction
  40     ACK_WAIT = 1   # waiting for ack from dcache
  41     MMU_LOOKUP = 2 # waiting for MMU to look up translation
  42     TLBIE_WAIT = 3 # waiting for MMU to finish doing a tlbie
  43
  44
  45 # captures the LDSTRequest from the PortInterface, which "blips" most
  46 # of this at us (pipeline-style).
  47 class LDSTRequest(RecordObject):
  48     def __init__(self, name=None):
  49         RecordObject.__init__(self, name=name)
  50
  51         self.load          = Signal()
  52         self.dcbz          = Signal()
  53         self.addr          = Signal(64)
  54         # self.store_data    = Signal(64) # this is already sync (on a delay)
  55         self.byte_sel      = Signal(8)
  56         self.nc            = Signal()              # non-cacheable access
  57         self.virt_mode     = Signal()
  58         self.priv_mode     = Signal()
  59         self.align_intr    = Signal()
  60
  61 # glue logic for microwatt mmu and dcache
  62 class LoadStore1(PortInterfaceBase):
  63     def __init__(self, pspec):
  64         self.pspec = pspec
  65         self.disable_cache = (hasattr(pspec, "disable_cache") and
  66                               pspec.disable_cache == True)
  67         regwid = pspec.reg_wid
  68         addrwid = pspec.addr_wid
  69
  70         super().__init__(regwid, addrwid)
  71         self.dcache = DCache()
  72         # these names are from the perspective of here (LoadStore1)
  73         self.d_out  = self.dcache.d_in     # in to dcache is out for LoadStore
  74         self.d_in = self.dcache.d_out      # out from dcache is in for LoadStore
  75         self.m_out  = LoadStore1ToMMUType() # out *to* MMU
  76         self.m_in = MMUToLoadStore1Type()   # in *from* MMU
  77         self.req = LDSTRequest(name="ldst_req")
  78
  79         # TODO, convert dcache wb_in/wb_out to "standard" nmigen Wishbone bus
  80         self.dbus = Record(make_wb_layout(pspec))
  81
  82         # for creating a single clock blip to DCache
  83         self.d_valid = Signal()
  84         self.d_w_valid = Signal()
  85         self.d_validblip = Signal()
  86
  87         # DSISR and DAR cached values.  note that the MMU FSM is where
  88         # these are accessed by OP_MTSPR/OP_MFSPR, on behalf of LoadStore1.
  89         # by contrast microwatt has the spr set/get done *in* loadstore1.vhdl
  90         self.dsisr = Signal(64)
  91         self.dar = Signal(64)
  92
  93         # state info for LD/ST
  94         self.done          = Signal()
  95         # latch most of the input request
  96         self.load          = Signal()
  97         self.tlbie         = Signal()
  98         self.dcbz          = Signal()
  99         self.addr          = Signal(64)
 100         self.store_data    = Signal(64)
 101         self.load_data     = Signal(64)
 102         self.byte_sel      = Signal(8)
 103         #self.xerc         : xer_common_t;
 104         #self.reserve       = Signal()
 105         #self.atomic        = Signal()
 106         #self.atomic_last   = Signal()
 107         #self.rc            = Signal()
 108         self.nc            = Signal()              # non-cacheable access
 109         self.virt_mode     = Signal()
 110         self.priv_mode     = Signal()
 111         self.state        = Signal(State)
 112         self.instr_fault   = Signal()
 113         self.align_intr    = Signal()
 114         self.busy          = Signal()
 115         self.wait_dcache   = Signal()
 116         self.wait_mmu      = Signal()
 117         #self.mode_32bit    = Signal()
 118         #self.intr_vec     : integer range 0 to 16#fff#;
 119         #self.nia           = Signal(64)
 120         #self.srr1          = Signal(16)
 121
 122     def set_wr_addr(self, m, addr, mask, misalign, msr_pr, is_dcbz):
 123         m.d.comb += self.req.load.eq(0) # store operation
 124         m.d.comb += self.req.byte_sel.eq(mask)
 125         m.d.comb += self.req.addr.eq(addr)
 126         m.d.comb += self.req.priv_mode.eq(~msr_pr) # not-problem  ==> priv
 127         m.d.comb += self.req.virt_mode.eq(msr_pr) # problem-state ==> virt
 128         m.d.comb += self.req.align_intr.eq(misalign)
 129         m.d.comb += self.req.dcbz.eq(is_dcbz)
 130
 131         m.d.comb += Display("set_wr_addr %i dcbz %i",addr,is_dcbz)
 132
 133         # option to disable the cache entirely for write
 134         if self.disable_cache:
 135             m.d.comb += self.req.nc.eq(1)
 136         return None
 137
 138     def set_rd_addr(self, m, addr, mask, misalign, msr_pr):
 139         m.d.comb += self.d_valid.eq(1)
 140         m.d.comb += self.req.load.eq(1) # load operation
 141         m.d.comb += self.req.byte_sel.eq(mask)
 142         m.d.comb += self.req.align_intr.eq(misalign)
 143         m.d.comb += self.req.addr.eq(addr)
 144         m.d.comb += self.req.priv_mode.eq(~msr_pr) # not-problem  ==> priv
 145         m.d.comb += self.req.virt_mode.eq(msr_pr) # problem-state ==> virt
 146         # BAD HACK! disable cacheing on LD when address is 0xCxxx_xxxx
 147         # this is for peripherals. same thing done in Microwatt loadstore1.vhdl
 148         with m.If(addr[28:] == Const(0xc, 4)):
 149             m.d.comb += self.req.nc.eq(1)
 150         # option to disable the cache entirely for read
 151         if self.disable_cache:
 152             m.d.comb += self.req.nc.eq(1)
 153         return None #FIXME return value
 154
 155     def set_wr_data(self, m, data, wen):
 156         # do the "blip" on write data
 157         m.d.comb += self.d_valid.eq(1)
 158         # put data into comb which is picked up in main elaborate()
 159         m.d.comb += self.d_w_valid.eq(1)
 160         m.d.comb += self.store_data.eq(data)
 161         #m.d.sync += self.d_out.byte_sel.eq(wen) # this might not be needed
 162         st_ok = self.done # TODO indicates write data is valid
 163         return st_ok
 164
 165     def get_rd_data(self, m):
 166         ld_ok = self.done     # indicates read data is valid
 167         data = self.load_data # actual read data
 168         return data, ld_ok
 169
 170     def elaborate(self, platform):
 171         m = super().elaborate(platform)
 172         comb, sync = m.d.comb, m.d.sync
 173
 174         # create dcache module
 175         m.submodules.dcache = dcache = self.dcache
 176
 177         # temp vars
 178         d_out, d_in, dbus = self.d_out, self.d_in, self.dbus
 179         m_out, m_in = self.m_out, self.m_in
 180         exc = self.pi.exc_o
 181         exception = exc.happened
 182         mmureq = Signal()
 183
 184         # copy of address, but gets over-ridden for OP_FETCH_FAILED
 185         maddr = Signal(64)
 186         m.d.comb += maddr.eq(self.addr)
 187
 188         # create a blip (single pulse) on valid read/write request
 189         # this can be over-ridden in the FSM to get dcache to re-run
 190         # a request when MMU_LOOKUP completes.
 191         m.d.comb += self.d_validblip.eq(rising_edge(m, self.d_valid))
 192         ldst_r = LDSTRequest("ldst_r")
 193
 194         # fsm skeleton
 195         with m.Switch(self.state):
 196             with m.Case(State.IDLE):
 197                 with m.If(self.d_validblip & ~exc.happened):
 198                     comb += self.busy.eq(1)
 199                     sync += self.state.eq(State.ACK_WAIT)
 200                     sync += ldst_r.eq(self.req) # copy of LDSTRequest on "blip"
 201                 with m.Else():
 202                     sync += ldst_r.eq(0)
 203
 204             # waiting for completion
 205             with m.Case(State.ACK_WAIT):
 206                 comb += self.busy.eq(~exc.happened)
 207
 208                 with m.If(d_in.error):
 209                     # cache error is not necessarily "final", it could
 210                     # be that it was just a TLB miss
 211                     with m.If(d_in.cache_paradox):
 212                         comb += exception.eq(1)
 213                         sync += self.state.eq(State.IDLE)
 214                         sync += ldst_r.eq(0)
 215                         sync += self.dsisr[63 - 38].eq(~self.load)
 216                         # XXX there is no architected bit for this
 217                         # (probably should be a machine check in fact)
 218                         sync += self.dsisr[63 - 35].eq(d_in.cache_paradox)
 219
 220                     with m.Else():
 221                         # Look up the translation for TLB miss
 222                         # and also for permission error and RC error
 223                         # in case the PTE has been updated.
 224                         comb += mmureq.eq(1)
 225                         sync += self.state.eq(State.MMU_LOOKUP)
 226                 with m.If(d_in.valid):
 227                     m.d.comb += self.done.eq(~mmureq) # done if not doing MMU
 228                     with m.If(self.done):
 229                         sync += Display("ACK_WAIT, done %x", self.addr)
 230                     sync += self.state.eq(State.IDLE)
 231                     sync += ldst_r.eq(0)
 232                     with m.If(self.load):
 233                         m.d.comb += self.load_data.eq(d_in.data)
 234
 235             # waiting here for the MMU TLB lookup to complete.
 236             # either re-try the dcache lookup or throw MMU exception
 237             with m.Case(State.MMU_LOOKUP):
 238                 comb += self.busy.eq(1)
 239                 with m.If(m_in.done):
 240                     with m.If(~self.instr_fault):
 241                         sync += Display("MMU_LOOKUP, done %x -> %x",
 242                                         self.addr, d_out.addr)
 243                         # retry the request now that the MMU has
 244                         # installed a TLB entry, if not exception raised
 245                         m.d.comb += self.d_out.valid.eq(~exception)
 246                         sync += self.state.eq(State.ACK_WAIT)
 247                         sync += ldst_r.eq(0)
 248                     with m.Else():
 249                         sync += Display("MMU_LOOKUP, exception %x", self.addr)
 250                         # instruction lookup fault: store address in DAR
 251                         comb += exc.happened.eq(1)
 252                         sync += self.dar.eq(self.addr)
 253
 254                 with m.If(m_in.err):
 255                     # MMU RADIX exception thrown
 256                     comb += exception.eq(1)
 257                     sync += self.dsisr[63 - 33].eq(m_in.invalid)
 258                     sync += self.dsisr[63 - 36].eq(m_in.perm_error)
 259                     sync += self.dsisr[63 - 38].eq(self.load)
 260                     sync += self.dsisr[63 - 44].eq(m_in.badtree)
 261                     sync += self.dsisr[63 - 45].eq(m_in.rc_error)
 262
 263             with m.Case(State.TLBIE_WAIT):
 264                 pass
 265
 266         # alignment error: store address in DAR
 267         with m.If(self.align_intr):
 268             comb += exc.happened.eq(1)
 269             sync += self.dar.eq(self.addr)
 270
 271         # happened, alignment, instr_fault, invalid.
 272         # note that all of these flow through - eventually to the TRAP
 273         # pipeline, via PowerDecoder2.
 274         comb += exc.invalid.eq(m_in.invalid)
 275         comb += exc.alignment.eq(self.align_intr)
 276         comb += exc.instr_fault.eq(self.instr_fault)
 277         # badtree, perm_error, rc_error, segment_fault
 278         comb += exc.badtree.eq(m_in.badtree)
 279         comb += exc.perm_error.eq(m_in.perm_error)
 280         comb += exc.rc_error.eq(m_in.rc_error)
 281         comb += exc.segment_fault.eq(m_in.segerr)
 282
 283         # TODO, connect dcache wb_in/wb_out to "standard" nmigen Wishbone bus
 284         comb += dbus.adr.eq(dcache.wb_out.adr)
 285         comb += dbus.dat_w.eq(dcache.wb_out.dat)
 286         comb += dbus.sel.eq(dcache.wb_out.sel)
 287         comb += dbus.cyc.eq(dcache.wb_out.cyc)
 288         comb += dbus.stb.eq(dcache.wb_out.stb)
 289         comb += dbus.we.eq(dcache.wb_out.we)
 290
 291         comb += dcache.wb_in.dat.eq(dbus.dat_r)
 292         comb += dcache.wb_in.ack.eq(dbus.ack)
 293         if hasattr(dbus, "stall"):
 294             comb += dcache.wb_in.stall.eq(dbus.stall)
 295
 296         # update out d data when flag set
 297         with m.If(self.d_w_valid):
 298             m.d.sync += d_out.data.eq(self.store_data)
 299         #with m.Else():
 300         #    m.d.sync += d_out.data.eq(0)
 301         # unit test passes with that change
 302
 303         # this must move into the FSM, conditionally noticing that
 304         # the "blip" comes from self.d_validblip.
 305         # task 1: look up in dcache
 306         # task 2: if dcache fails, look up in MMU.
 307         # do **NOT** confuse the two.
 308         with m.If(self.d_validblip):
 309             m.d.comb += self.d_out.valid.eq(~exc.happened)
 310             m.d.comb += d_out.load.eq(self.req.load)
 311             m.d.comb += d_out.byte_sel.eq(self.req.byte_sel)
 312             m.d.comb += self.addr.eq(self.req.addr)
 313             m.d.comb += d_out.nc.eq(self.req.nc)
 314             m.d.comb += d_out.priv_mode.eq(self.req.priv_mode)
 315             m.d.comb += d_out.virt_mode.eq(self.req.virt_mode)
 316             m.d.comb += self.align_intr.eq(self.req.align_intr)
 317             #m.d.comb += Display("validblip dcbz=%i addr=%x",self.req.dcbz,self.req.addr)
 318             m.d.comb += d_out.dcbz.eq(self.req.dcbz)
 319         with m.Else():
 320             m.d.comb += d_out.load.eq(ldst_r.load)
 321             m.d.comb += d_out.byte_sel.eq(ldst_r.byte_sel)
 322             m.d.comb += self.addr.eq(ldst_r.addr)
 323             m.d.comb += d_out.nc.eq(ldst_r.nc)
 324             m.d.comb += d_out.priv_mode.eq(ldst_r.priv_mode)
 325             m.d.comb += d_out.virt_mode.eq(ldst_r.virt_mode)
 326             m.d.comb += self.align_intr.eq(ldst_r.align_intr)
 327             #m.d.comb += Display("no_validblip dcbz=%i addr=%x",ldst_r.dcbz,ldst_r.addr)
 328             m.d.comb += d_out.dcbz.eq(ldst_r.dcbz)
 329
 330         # XXX these should be possible to remove but for some reason
 331         # cannot be... yet. TODO, investigate
 332         m.d.comb += self.load_data.eq(d_in.data)
 333         m.d.comb += d_out.addr.eq(self.addr)
 334
 335         # Update outputs to MMU
 336         m.d.comb += m_out.valid.eq(mmureq)
 337         m.d.comb += m_out.iside.eq(self.instr_fault)
 338         m.d.comb += m_out.load.eq(ldst_r.load)
 339         # m_out.priv <= r.priv_mode; TODO
 340         m.d.comb += m_out.tlbie.eq(self.tlbie)
 341         # m_out.mtspr <= mmu_mtspr; # TODO
 342         # m_out.sprn <= sprn; # TODO
 343         m.d.comb += m_out.addr.eq(maddr)
 344         # m_out.slbia <= l_in.insn(7); # TODO: no idea what this is
 345         # m_out.rs <= l_in.data; # nope, probably not needed, TODO investigate
 346
 347         return m
 348
 349     def ports(self):
 350         yield from super().ports()
 351         # TODO: memory ports
 352
 353
 354 class TestSRAMLoadStore1(LoadStore1):
 355     def __init__(self, pspec):
 356         super().__init__(pspec)
 357         pspec = self.pspec
 358         # small 32-entry Memory
 359         if (hasattr(pspec, "dmem_test_depth") and
 360                 isinstance(pspec.dmem_test_depth, int)):
 361             depth = pspec.dmem_test_depth
 362         else:
 363             depth = 32
 364         print("TestSRAMBareLoadStoreUnit depth", depth)
 365
 366         self.mem = Memory(width=pspec.reg_wid, depth=depth)
 367
 368     def elaborate(self, platform):
 369         m = super().elaborate(platform)
 370         comb = m.d.comb
 371         m.submodules.sram = sram = SRAM(memory=self.mem, granularity=8,
 372                                         features={'cti', 'bte', 'err'})
 373         dbus = self.dbus
 374
 375         # directly connect the wishbone bus of LoadStoreUnitInterface to SRAM
 376         # note: SRAM is a target (slave), dbus is initiator (master)
 377         fanouts = ['dat_w', 'sel', 'cyc', 'stb', 'we', 'cti', 'bte']
 378         fanins = ['dat_r', 'ack', 'err']
 379         for fanout in fanouts:
 380             print("fanout", fanout, getattr(sram.bus, fanout).shape(),
 381                   getattr(dbus, fanout).shape())
 382             comb += getattr(sram.bus, fanout).eq(getattr(dbus, fanout))
 383             comb += getattr(sram.bus, fanout).eq(getattr(dbus, fanout))
 384         for fanin in fanins:
 385             comb += getattr(dbus, fanin).eq(getattr(sram.bus, fanin))
 386         # connect address
 387         comb += sram.bus.adr.eq(dbus.adr)
 388
 389         return m
 390