src/soc/fu/ldst/loadstore.py

   1 """LoadStore1 FSM.
   2
   3 based on microwatt loadstore1.vhdl, but conforming to PortInterface.
   4 unlike loadstore1.vhdl this does *not* deal with actual Load/Store
   5 ops: that job is handled by LDSTCompUnit, which talks to LoadStore1
   6 by way of PortInterface.  PortInterface is where things need extending,
   7 such as adding dcbz support, etc.
   8
   9 this module basically handles "pure" load / store operations, and
  10 its first job is to ask the D-Cache for the data.  if that fails,
  11 the second task (if virtual memory is enabled) is to ask the MMU
  12 to perform a TLB, then to go *back* to the cache and ask again.
  13
  14 Links:
  15
  16 * https://bugs.libre-soc.org/show_bug.cgi?id=465
  17
  18 """
  19
  20 from nmigen import (Elaboratable, Module, Signal, Shape, unsigned, Cat, Mux,
  21                     Record, Memory,
  22                     Const)
  23 from nmutil.iocontrol import RecordObject
  24 from nmutil.util import rising_edge, Display
  25 from enum import Enum, unique
  26
  27 from soc.experiment.dcache import DCache
  28 from soc.experiment.pimem import PortInterfaceBase
  29 from soc.experiment.mem_types import LoadStore1ToMMUType
  30 from soc.experiment.mem_types import MMUToLoadStore1Type
  31
  32 from soc.minerva.wishbone import make_wb_layout
  33 from soc.bus.sram import SRAM
  34 from nmutil.util import Display
  35
  36
  37 @unique
  38 class State(Enum):
  39     IDLE = 0       # ready for instruction
  40     ACK_WAIT = 1   # waiting for ack from dcache
  41     MMU_LOOKUP = 2 # waiting for MMU to look up translation
  42     TLBIE_WAIT = 3 # waiting for MMU to finish doing a tlbie
  43
  44
  45 # captures the LDSTRequest from the PortInterface, which "blips" most
  46 # of this at us (pipeline-style).
  47 class LDSTRequest(RecordObject):
  48     def __init__(self, name=None):
  49         RecordObject.__init__(self, name=name)
  50
  51         self.load          = Signal()
  52         self.dcbz          = Signal()
  53         self.addr          = Signal(64)
  54         # self.store_data    = Signal(64) # this is already sync (on a delay)
  55         self.byte_sel      = Signal(8)
  56         self.nc            = Signal()              # non-cacheable access
  57         self.virt_mode     = Signal()
  58         self.priv_mode     = Signal()
  59         self.align_intr    = Signal()
  60
  61 # glue logic for microwatt mmu and dcache
  62 class LoadStore1(PortInterfaceBase):
  63     def __init__(self, pspec):
  64         self.pspec = pspec
  65         self.disable_cache = (hasattr(pspec, "disable_cache") and
  66                               pspec.disable_cache == True)
  67         regwid = pspec.reg_wid
  68         addrwid = pspec.addr_wid
  69
  70         super().__init__(regwid, addrwid)
  71         self.dcache = DCache()
  72         # these names are from the perspective of here (LoadStore1)
  73         self.d_out  = self.dcache.d_in     # in to dcache is out for LoadStore
  74         self.d_in = self.dcache.d_out      # out from dcache is in for LoadStore
  75         self.m_out  = LoadStore1ToMMUType() # out *to* MMU
  76         self.m_in = MMUToLoadStore1Type()   # in *from* MMU
  77         self.req = LDSTRequest(name="ldst_req")
  78
  79         # TODO, convert dcache wb_in/wb_out to "standard" nmigen Wishbone bus
  80         self.dbus = Record(make_wb_layout(pspec))
  81
  82         # for creating a single clock blip to DCache
  83         self.d_valid = Signal()
  84         self.d_w_valid = Signal()
  85         self.d_validblip = Signal()
  86
  87         # DSISR and DAR cached values.  note that the MMU FSM is where
  88         # these are accessed by OP_MTSPR/OP_MFSPR, on behalf of LoadStore1.
  89         # by contrast microwatt has the spr set/get done *in* loadstore1.vhdl
  90         self.dsisr = Signal(64)
  91         self.dar = Signal(64)
  92
  93         # state info for LD/ST
  94         self.done          = Signal()
  95         # latch most of the input request
  96         self.load          = Signal()
  97         self.tlbie         = Signal()
  98         self.dcbz          = Signal()
  99         self.addr          = Signal(64)
 100         self.store_data    = Signal(64)
 101         self.load_data     = Signal(64)
 102         self.byte_sel      = Signal(8)
 103         #self.xerc         : xer_common_t;
 104         #self.reserve       = Signal()
 105         #self.atomic        = Signal()
 106         #self.atomic_last   = Signal()
 107         #self.rc            = Signal()
 108         self.nc            = Signal()              # non-cacheable access
 109         self.virt_mode     = Signal()
 110         self.priv_mode     = Signal()
 111         self.state        = Signal(State)
 112         self.instr_fault   = Signal()
 113         self.align_intr    = Signal()
 114         self.busy          = Signal()
 115         self.wait_dcache   = Signal()
 116         self.wait_mmu      = Signal()
 117         #self.mode_32bit    = Signal()
 118         #self.intr_vec     : integer range 0 to 16#fff#;
 119         #self.nia           = Signal(64)
 120         #self.srr1          = Signal(16)
 121
 122     def set_dcbz_addr(self, m, addr):
 123         m.d.comb += self.req.load.eq(0) #not a load operation
 124         m.d.comb += self.req.dcbz.eq(1)
 125         #m.d.comb += self.req.byte_sel.eq(mask)
 126         m.d.comb += self.req.addr.eq(addr)
 127         m.d.comb += Display("set_dcbz_addr %i",addr)
 128         #m.d.comb += self.req.priv_mode.eq(~msr_pr) # not-problem  ==> priv
 129         #m.d.comb += self.req.virt_mode.eq(msr_pr) # problem-state ==> virt
 130         #m.d.comb += self.req.align_intr.eq(misalign)
 131         return None
 132
 133     def set_wr_addr(self, m, addr, mask, misalign, msr_pr):
 134         m.d.comb += self.req.load.eq(0) # store operation
 135         m.d.comb += self.req.byte_sel.eq(mask)
 136         m.d.comb += self.req.addr.eq(addr)
 137         m.d.comb += self.req.priv_mode.eq(~msr_pr) # not-problem  ==> priv
 138         m.d.comb += self.req.virt_mode.eq(msr_pr) # problem-state ==> virt
 139         m.d.comb += self.req.align_intr.eq(misalign)
 140
 141         dcbz = self.pi.is_dcbz
 142         with m.If(dcbz):
 143             m.d.comb += Display("set_wr_addr: is_dcbz")
 144         m.d.comb += self.req.dcbz.eq(dcbz)
 145
 146         # option to disable the cache entirely for write
 147         if self.disable_cache:
 148             m.d.comb += self.req.nc.eq(1)
 149         return None
 150
 151     def set_rd_addr(self, m, addr, mask, misalign, msr_pr):
 152         m.d.comb += self.d_valid.eq(1)
 153         m.d.comb += self.req.load.eq(1) # load operation
 154         m.d.comb += self.req.byte_sel.eq(mask)
 155         m.d.comb += self.req.align_intr.eq(misalign)
 156         m.d.comb += self.req.addr.eq(addr)
 157         m.d.comb += self.req.priv_mode.eq(~msr_pr) # not-problem  ==> priv
 158         m.d.comb += self.req.virt_mode.eq(msr_pr) # problem-state ==> virt
 159         # BAD HACK! disable cacheing on LD when address is 0xCxxx_xxxx
 160         # this is for peripherals. same thing done in Microwatt loadstore1.vhdl
 161         with m.If(addr[28:] == Const(0xc, 4)):
 162             m.d.comb += self.req.nc.eq(1)
 163         # option to disable the cache entirely for read
 164         if self.disable_cache:
 165             m.d.comb += self.req.nc.eq(1)
 166         return None #FIXME return value
 167
 168     def set_wr_data(self, m, data, wen):
 169         # do the "blip" on write data
 170         m.d.comb += self.d_valid.eq(1)
 171         # put data into comb which is picked up in main elaborate()
 172         m.d.comb += self.d_w_valid.eq(1)
 173         m.d.comb += self.store_data.eq(data)
 174         #m.d.sync += self.d_out.byte_sel.eq(wen) # this might not be needed
 175         st_ok = self.done # TODO indicates write data is valid
 176         return st_ok
 177
 178     def get_rd_data(self, m):
 179         ld_ok = self.done     # indicates read data is valid
 180         data = self.load_data # actual read data
 181         return data, ld_ok
 182
 183     def elaborate(self, platform):
 184         m = super().elaborate(platform)
 185         comb, sync = m.d.comb, m.d.sync
 186
 187         # create dcache module
 188         m.submodules.dcache = dcache = self.dcache
 189
 190         # temp vars
 191         d_out, d_in, dbus = self.d_out, self.d_in, self.dbus
 192         m_out, m_in = self.m_out, self.m_in
 193         exc = self.pi.exc_o
 194         exception = exc.happened
 195         mmureq = Signal()
 196
 197         # copy of address, but gets over-ridden for OP_FETCH_FAILED
 198         maddr = Signal(64)
 199         m.d.comb += maddr.eq(self.addr)
 200
 201         # create a blip (single pulse) on valid read/write request
 202         # this can be over-ridden in the FSM to get dcache to re-run
 203         # a request when MMU_LOOKUP completes.
 204         m.d.comb += self.d_validblip.eq(rising_edge(m, self.d_valid))
 205         ldst_r = LDSTRequest("ldst_r")
 206
 207         # fsm skeleton
 208         with m.Switch(self.state):
 209             with m.Case(State.IDLE):
 210                 with m.If(self.d_validblip & ~exc.happened):
 211                     comb += self.busy.eq(1)
 212                     sync += self.state.eq(State.ACK_WAIT)
 213                     sync += ldst_r.eq(self.req) # copy of LDSTRequest on "blip"
 214                 with m.Else():
 215                     sync += ldst_r.eq(0)
 216
 217             # waiting for completion
 218             with m.Case(State.ACK_WAIT):
 219                 comb += self.busy.eq(~exc.happened)
 220
 221                 with m.If(d_in.error):
 222                     # cache error is not necessarily "final", it could
 223                     # be that it was just a TLB miss
 224                     with m.If(d_in.cache_paradox):
 225                         comb += exception.eq(1)
 226                         sync += self.state.eq(State.IDLE)
 227                         sync += ldst_r.eq(0)
 228                         sync += self.dsisr[63 - 38].eq(~self.load)
 229                         # XXX there is no architected bit for this
 230                         # (probably should be a machine check in fact)
 231                         sync += self.dsisr[63 - 35].eq(d_in.cache_paradox)
 232
 233                     with m.Else():
 234                         # Look up the translation for TLB miss
 235                         # and also for permission error and RC error
 236                         # in case the PTE has been updated.
 237                         comb += mmureq.eq(1)
 238                         sync += self.state.eq(State.MMU_LOOKUP)
 239                 with m.If(d_in.valid):
 240                     m.d.comb += self.done.eq(~mmureq) # done if not doing MMU
 241                     with m.If(self.done):
 242                         sync += Display("ACK_WAIT, done %x", self.addr)
 243                     sync += self.state.eq(State.IDLE)
 244                     sync += ldst_r.eq(0)
 245                     with m.If(self.load):
 246                         m.d.comb += self.load_data.eq(d_in.data)
 247
 248             # waiting here for the MMU TLB lookup to complete.
 249             # either re-try the dcache lookup or throw MMU exception
 250             with m.Case(State.MMU_LOOKUP):
 251                 comb += self.busy.eq(1)
 252                 with m.If(m_in.done):
 253                     with m.If(~self.instr_fault):
 254                         sync += Display("MMU_LOOKUP, done %x -> %x",
 255                                         self.addr, d_out.addr)
 256                         # retry the request now that the MMU has
 257                         # installed a TLB entry, if not exception raised
 258                         m.d.comb += self.d_out.valid.eq(~exception)
 259                         sync += self.state.eq(State.ACK_WAIT)
 260                         sync += ldst_r.eq(0)
 261                     with m.Else():
 262                         sync += Display("MMU_LOOKUP, exception %x", self.addr)
 263                         # instruction lookup fault: store address in DAR
 264                         comb += exc.happened.eq(1)
 265                         sync += self.dar.eq(self.addr)
 266
 267                 with m.If(m_in.err):
 268                     # MMU RADIX exception thrown
 269                     comb += exception.eq(1)
 270                     sync += self.dsisr[63 - 33].eq(m_in.invalid)
 271                     sync += self.dsisr[63 - 36].eq(m_in.perm_error)
 272                     sync += self.dsisr[63 - 38].eq(self.load)
 273                     sync += self.dsisr[63 - 44].eq(m_in.badtree)
 274                     sync += self.dsisr[63 - 45].eq(m_in.rc_error)
 275
 276             with m.Case(State.TLBIE_WAIT):
 277                 pass
 278
 279         # alignment error: store address in DAR
 280         with m.If(self.align_intr):
 281             comb += exc.happened.eq(1)
 282             sync += self.dar.eq(self.addr)
 283
 284         # happened, alignment, instr_fault, invalid.
 285         # note that all of these flow through - eventually to the TRAP
 286         # pipeline, via PowerDecoder2.
 287         comb += exc.invalid.eq(m_in.invalid)
 288         comb += exc.alignment.eq(self.align_intr)
 289         comb += exc.instr_fault.eq(self.instr_fault)
 290         # badtree, perm_error, rc_error, segment_fault
 291         comb += exc.badtree.eq(m_in.badtree)
 292         comb += exc.perm_error.eq(m_in.perm_error)
 293         comb += exc.rc_error.eq(m_in.rc_error)
 294         comb += exc.segment_fault.eq(m_in.segerr)
 295
 296         # TODO, connect dcache wb_in/wb_out to "standard" nmigen Wishbone bus
 297         comb += dbus.adr.eq(dcache.wb_out.adr)
 298         comb += dbus.dat_w.eq(dcache.wb_out.dat)
 299         comb += dbus.sel.eq(dcache.wb_out.sel)
 300         comb += dbus.cyc.eq(dcache.wb_out.cyc)
 301         comb += dbus.stb.eq(dcache.wb_out.stb)
 302         comb += dbus.we.eq(dcache.wb_out.we)
 303
 304         comb += dcache.wb_in.dat.eq(dbus.dat_r)
 305         comb += dcache.wb_in.ack.eq(dbus.ack)
 306         if hasattr(dbus, "stall"):
 307             comb += dcache.wb_in.stall.eq(dbus.stall)
 308
 309         # update out d data when flag set
 310         with m.If(self.d_w_valid):
 311             m.d.sync += d_out.data.eq(self.store_data)
 312         #with m.Else():
 313         #    m.d.sync += d_out.data.eq(0)
 314         # unit test passes with that change
 315
 316         # this must move into the FSM, conditionally noticing that
 317         # the "blip" comes from self.d_validblip.
 318         # task 1: look up in dcache
 319         # task 2: if dcache fails, look up in MMU.
 320         # do **NOT** confuse the two.
 321         with m.If(self.d_validblip):
 322             m.d.comb += self.d_out.valid.eq(~exc.happened)
 323             m.d.comb += d_out.load.eq(self.req.load)
 324             m.d.comb += d_out.byte_sel.eq(self.req.byte_sel)
 325             m.d.comb += self.addr.eq(self.req.addr)
 326             m.d.comb += d_out.nc.eq(self.req.nc)
 327             m.d.comb += d_out.priv_mode.eq(self.req.priv_mode)
 328             m.d.comb += d_out.virt_mode.eq(self.req.virt_mode)
 329             m.d.comb += self.align_intr.eq(self.req.align_intr)
 330             #m.d.comb += Display("validblip dcbz=%i addr=%x",self.req.dcbz,self.req.addr)
 331             m.d.comb += d_out.dcbz.eq(self.req.dcbz)
 332         with m.Else():
 333             m.d.comb += d_out.load.eq(ldst_r.load)
 334             m.d.comb += d_out.byte_sel.eq(ldst_r.byte_sel)
 335             m.d.comb += self.addr.eq(ldst_r.addr)
 336             m.d.comb += d_out.nc.eq(ldst_r.nc)
 337             m.d.comb += d_out.priv_mode.eq(ldst_r.priv_mode)
 338             m.d.comb += d_out.virt_mode.eq(ldst_r.virt_mode)
 339             m.d.comb += self.align_intr.eq(ldst_r.align_intr)
 340             #m.d.comb += Display("no_validblip dcbz=%i addr=%x",ldst_r.dcbz,ldst_r.addr)
 341             m.d.comb += d_out.dcbz.eq(ldst_r.dcbz)
 342
 343         # XXX these should be possible to remove but for some reason
 344         # cannot be... yet. TODO, investigate
 345         m.d.comb += self.load_data.eq(d_in.data)
 346         m.d.comb += d_out.addr.eq(self.addr)
 347
 348         # Update outputs to MMU
 349         m.d.comb += m_out.valid.eq(mmureq)
 350         m.d.comb += m_out.iside.eq(self.instr_fault)
 351         m.d.comb += m_out.load.eq(ldst_r.load)
 352         # m_out.priv <= r.priv_mode; TODO
 353         m.d.comb += m_out.tlbie.eq(self.tlbie)
 354         # m_out.mtspr <= mmu_mtspr; # TODO
 355         # m_out.sprn <= sprn; # TODO
 356         m.d.comb += m_out.addr.eq(maddr)
 357         # m_out.slbia <= l_in.insn(7); # TODO: no idea what this is
 358         # m_out.rs <= l_in.data; # nope, probably not needed, TODO investigate
 359
 360         return m
 361
 362     def ports(self):
 363         yield from super().ports()
 364         # TODO: memory ports
 365
 366
 367 class TestSRAMLoadStore1(LoadStore1):
 368     def __init__(self, pspec):
 369         super().__init__(pspec)
 370         pspec = self.pspec
 371         # small 32-entry Memory
 372         if (hasattr(pspec, "dmem_test_depth") and
 373                 isinstance(pspec.dmem_test_depth, int)):
 374             depth = pspec.dmem_test_depth
 375         else:
 376             depth = 32
 377         print("TestSRAMBareLoadStoreUnit depth", depth)
 378
 379         self.mem = Memory(width=pspec.reg_wid, depth=depth)
 380
 381     def elaborate(self, platform):
 382         m = super().elaborate(platform)
 383         comb = m.d.comb
 384         m.submodules.sram = sram = SRAM(memory=self.mem, granularity=8,
 385                                         features={'cti', 'bte', 'err'})
 386         dbus = self.dbus
 387
 388         # directly connect the wishbone bus of LoadStoreUnitInterface to SRAM
 389         # note: SRAM is a target (slave), dbus is initiator (master)
 390         fanouts = ['dat_w', 'sel', 'cyc', 'stb', 'we', 'cti', 'bte']
 391         fanins = ['dat_r', 'ack', 'err']
 392         for fanout in fanouts:
 393             print("fanout", fanout, getattr(sram.bus, fanout).shape(),
 394                   getattr(dbus, fanout).shape())
 395             comb += getattr(sram.bus, fanout).eq(getattr(dbus, fanout))
 396             comb += getattr(sram.bus, fanout).eq(getattr(dbus, fanout))
 397         for fanin in fanins:
 398             comb += getattr(dbus, fanin).eq(getattr(sram.bus, fanin))
 399         # connect address
 400         comb += sram.bus.adr.eq(dbus.adr)
 401
 402         return m
 403