src/soc/fu/ldst/loadstore.py

   1 """LoadStore1 FSM.
   2
   3 based on microwatt loadstore1.vhdl, but conforming to PortInterface.
   4 unlike loadstore1.vhdl this does *not* deal with actual Load/Store
   5 ops: that job is handled by LDSTCompUnit, which talks to LoadStore1
   6 by way of PortInterface.  PortInterface is where things need extending,
   7 such as adding dcbz support, etc.
   8
   9 this module basically handles "pure" load / store operations, and
  10 its first job is to ask the D-Cache for the data.  if that fails,
  11 the second task (if virtual memory is enabled) is to ask the MMU
  12 to perform a TLB, then to go *back* to the cache and ask again.
  13
  14 Links:
  15
  16 * https://bugs.libre-soc.org/show_bug.cgi?id=465
  17
  18 """
  19
  20 from nmigen import (Elaboratable, Module, Signal, Shape, unsigned, Cat, Mux,
  21                     Record, Memory,
  22                     Const)
  23 from nmutil.iocontrol import RecordObject
  24 from nmutil.util import rising_edge, Display
  25 from enum import Enum, unique
  26
  27 from soc.experiment.dcache import DCache
  28 from soc.experiment.pimem import PortInterfaceBase
  29 from soc.experiment.mem_types import LoadStore1ToMMUType
  30 from soc.experiment.mem_types import MMUToLoadStore1Type
  31
  32 from soc.minerva.wishbone import make_wb_layout
  33 from soc.bus.sram import SRAM
  34 from nmutil.util import Display
  35
  36
  37 @unique
  38 class State(Enum):
  39     IDLE = 0       # ready for instruction
  40     ACK_WAIT = 1   # waiting for ack from dcache
  41     MMU_LOOKUP = 2 # waiting for MMU to look up translation
  42     TLBIE_WAIT = 3 # waiting for MMU to finish doing a tlbie
  43
  44
  45 # captures the LDSTRequest from the PortInterface, which "blips" most
  46 # of this at us (pipeline-style).
  47 class LDSTRequest(RecordObject):
  48     def __init__(self, name=None):
  49         RecordObject.__init__(self, name=name)
  50
  51         self.load          = Signal()
  52         self.dcbz          = Signal()
  53         self.addr          = Signal(64)
  54         # self.store_data    = Signal(64) # this is already sync (on a delay)
  55         self.byte_sel      = Signal(8)
  56         self.nc            = Signal()              # non-cacheable access
  57         self.virt_mode     = Signal()
  58         self.priv_mode     = Signal()
  59         self.align_intr    = Signal()
  60
  61 # glue logic for microwatt mmu and dcache
  62 class LoadStore1(PortInterfaceBase):
  63     def __init__(self, pspec):
  64         self.pspec = pspec
  65         self.disable_cache = (hasattr(pspec, "disable_cache") and
  66                               pspec.disable_cache == True)
  67         regwid = pspec.reg_wid
  68         addrwid = pspec.addr_wid
  69
  70         super().__init__(regwid, addrwid)
  71         self.dcache = DCache()
  72         # these names are from the perspective of here (LoadStore1)
  73         self.d_out  = self.dcache.d_in     # in to dcache is out for LoadStore
  74         self.d_in = self.dcache.d_out      # out from dcache is in for LoadStore
  75         self.m_out  = LoadStore1ToMMUType() # out *to* MMU
  76         self.m_in = MMUToLoadStore1Type()   # in *from* MMU
  77         self.req = LDSTRequest(name="ldst_req")
  78
  79         # TODO, convert dcache wb_in/wb_out to "standard" nmigen Wishbone bus
  80         self.dbus = Record(make_wb_layout(pspec))
  81
  82         # for creating a single clock blip to DCache
  83         self.d_valid = Signal()
  84         self.d_w_valid = Signal()
  85         self.d_validblip = Signal()
  86
  87         # DSISR and DAR cached values.  note that the MMU FSM is where
  88         # these are accessed by OP_MTSPR/OP_MFSPR, on behalf of LoadStore1.
  89         # by contrast microwatt has the spr set/get done *in* loadstore1.vhdl
  90         self.dsisr = Signal(64)
  91         self.dar = Signal(64)
  92
  93         # state info for LD/ST
  94         self.done          = Signal()
  95         # latch most of the input request
  96         self.load          = Signal()
  97         self.tlbie         = Signal()
  98         self.dcbz          = Signal()
  99         self.addr          = Signal(64)
 100         self.store_data    = Signal(64)
 101         self.load_data     = Signal(64)
 102         self.byte_sel      = Signal(8)
 103         #self.xerc         : xer_common_t;
 104         #self.reserve       = Signal()
 105         #self.atomic        = Signal()
 106         #self.atomic_last   = Signal()
 107         #self.rc            = Signal()
 108         self.nc            = Signal()              # non-cacheable access
 109         self.virt_mode     = Signal()
 110         self.priv_mode     = Signal()
 111         self.state        = Signal(State)
 112         self.instr_fault   = Signal()
 113         self.align_intr    = Signal()
 114         self.busy          = Signal()
 115         self.wait_dcache   = Signal()
 116         self.wait_mmu      = Signal()
 117         #self.mode_32bit    = Signal()
 118         #self.intr_vec     : integer range 0 to 16#fff#;
 119         #self.nia           = Signal(64)
 120         #self.srr1          = Signal(16)
 121
 122     # XXX please don't do it this way (and ask in future).
 123     # the exact same logic is required for setting store addresses
 124     # as for dcbz addresses, therefore why duplicate code?
 125     # it would be better to add an argument to set_wr_addr to
 126     # specifiy that it requires dcbz mode to be set.
 127     def __please_remove_and_use_set_wr_addr_instead_set_dcbz_addr(self, m, addr):
 128         m.d.comb += self.req.load.eq(0) #not a load operation
 129         m.d.comb += self.req.dcbz.eq(1)
 130         #m.d.comb += self.req.byte_sel.eq(mask)
 131         m.d.comb += self.req.addr.eq(addr)
 132         m.d.comb += Display("set_dcbz_addr %i",addr)
 133         #m.d.comb += self.req.priv_mode.eq(~msr_pr) # not-problem  ==> priv
 134         #m.d.comb += self.req.virt_mode.eq(msr_pr) # problem-state ==> virt
 135         #m.d.comb += self.req.align_intr.eq(misalign)
 136         return None
 137
 138     # XXX please add a dcbz argument to all set_wr_addr functions instead.
 139     def set_wr_addr(self, m, addr, mask, misalign, msr_pr):
 140         m.d.comb += self.req.load.eq(0) # store operation
 141         m.d.comb += self.req.byte_sel.eq(mask)
 142         m.d.comb += self.req.addr.eq(addr)
 143         m.d.comb += self.req.priv_mode.eq(~msr_pr) # not-problem  ==> priv
 144         m.d.comb += self.req.virt_mode.eq(msr_pr) # problem-state ==> virt
 145         m.d.comb += self.req.align_intr.eq(misalign)
 146
 147         # option to disable the cache entirely for write
 148         if self.disable_cache:
 149             m.d.comb += self.req.nc.eq(1)
 150         return None
 151
 152     def set_rd_addr(self, m, addr, mask, misalign, msr_pr):
 153         m.d.comb += self.d_valid.eq(1)
 154         m.d.comb += self.req.load.eq(1) # load operation
 155         m.d.comb += self.req.byte_sel.eq(mask)
 156         m.d.comb += self.req.align_intr.eq(misalign)
 157         m.d.comb += self.req.addr.eq(addr)
 158         m.d.comb += self.req.priv_mode.eq(~msr_pr) # not-problem  ==> priv
 159         m.d.comb += self.req.virt_mode.eq(msr_pr) # problem-state ==> virt
 160         # BAD HACK! disable cacheing on LD when address is 0xCxxx_xxxx
 161         # this is for peripherals. same thing done in Microwatt loadstore1.vhdl
 162         with m.If(addr[28:] == Const(0xc, 4)):
 163             m.d.comb += self.req.nc.eq(1)
 164         # option to disable the cache entirely for read
 165         if self.disable_cache:
 166             m.d.comb += self.req.nc.eq(1)
 167         return None #FIXME return value
 168
 169     def set_wr_data(self, m, data, wen):
 170         # do the "blip" on write data
 171         m.d.comb += self.d_valid.eq(1)
 172         # put data into comb which is picked up in main elaborate()
 173         m.d.comb += self.d_w_valid.eq(1)
 174         m.d.comb += self.store_data.eq(data)
 175         #m.d.sync += self.d_out.byte_sel.eq(wen) # this might not be needed
 176         st_ok = self.done # TODO indicates write data is valid
 177         return st_ok
 178
 179     def get_rd_data(self, m):
 180         ld_ok = self.done     # indicates read data is valid
 181         data = self.load_data # actual read data
 182         return data, ld_ok
 183
 184     def elaborate(self, platform):
 185         m = super().elaborate(platform)
 186         comb, sync = m.d.comb, m.d.sync
 187
 188         # create dcache module
 189         m.submodules.dcache = dcache = self.dcache
 190
 191         # temp vars
 192         d_out, d_in, dbus = self.d_out, self.d_in, self.dbus
 193         m_out, m_in = self.m_out, self.m_in
 194         exc = self.pi.exc_o
 195         exception = exc.happened
 196         mmureq = Signal()
 197
 198         # copy of address, but gets over-ridden for OP_FETCH_FAILED
 199         maddr = Signal(64)
 200         m.d.comb += maddr.eq(self.addr)
 201
 202         # create a blip (single pulse) on valid read/write request
 203         # this can be over-ridden in the FSM to get dcache to re-run
 204         # a request when MMU_LOOKUP completes.
 205         m.d.comb += self.d_validblip.eq(rising_edge(m, self.d_valid))
 206         ldst_r = LDSTRequest("ldst_r")
 207
 208         # fsm skeleton
 209         with m.Switch(self.state):
 210             with m.Case(State.IDLE):
 211                 with m.If(self.d_validblip & ~exc.happened):
 212                     comb += self.busy.eq(1)
 213                     sync += self.state.eq(State.ACK_WAIT)
 214                     sync += ldst_r.eq(self.req) # copy of LDSTRequest on "blip"
 215                 with m.Else():
 216                     sync += ldst_r.eq(0)
 217
 218             # waiting for completion
 219             with m.Case(State.ACK_WAIT):
 220                 comb += self.busy.eq(~exc.happened)
 221
 222                 with m.If(d_in.error):
 223                     # cache error is not necessarily "final", it could
 224                     # be that it was just a TLB miss
 225                     with m.If(d_in.cache_paradox):
 226                         comb += exception.eq(1)
 227                         sync += self.state.eq(State.IDLE)
 228                         sync += ldst_r.eq(0)
 229                         sync += self.dsisr[63 - 38].eq(~self.load)
 230                         # XXX there is no architected bit for this
 231                         # (probably should be a machine check in fact)
 232                         sync += self.dsisr[63 - 35].eq(d_in.cache_paradox)
 233
 234                     with m.Else():
 235                         # Look up the translation for TLB miss
 236                         # and also for permission error and RC error
 237                         # in case the PTE has been updated.
 238                         comb += mmureq.eq(1)
 239                         sync += self.state.eq(State.MMU_LOOKUP)
 240                 with m.If(d_in.valid):
 241                     m.d.comb += self.done.eq(~mmureq) # done if not doing MMU
 242                     with m.If(self.done):
 243                         sync += Display("ACK_WAIT, done %x", self.addr)
 244                     sync += self.state.eq(State.IDLE)
 245                     sync += ldst_r.eq(0)
 246                     with m.If(self.load):
 247                         m.d.comb += self.load_data.eq(d_in.data)
 248
 249             # waiting here for the MMU TLB lookup to complete.
 250             # either re-try the dcache lookup or throw MMU exception
 251             with m.Case(State.MMU_LOOKUP):
 252                 comb += self.busy.eq(1)
 253                 with m.If(m_in.done):
 254                     with m.If(~self.instr_fault):
 255                         sync += Display("MMU_LOOKUP, done %x -> %x",
 256                                         self.addr, d_out.addr)
 257                         # retry the request now that the MMU has
 258                         # installed a TLB entry, if not exception raised
 259                         m.d.comb += self.d_out.valid.eq(~exception)
 260                         sync += self.state.eq(State.ACK_WAIT)
 261                         sync += ldst_r.eq(0)
 262                     with m.Else():
 263                         sync += Display("MMU_LOOKUP, exception %x", self.addr)
 264                         # instruction lookup fault: store address in DAR
 265                         comb += exc.happened.eq(1)
 266                         sync += self.dar.eq(self.addr)
 267
 268                 with m.If(m_in.err):
 269                     # MMU RADIX exception thrown
 270                     comb += exception.eq(1)
 271                     sync += self.dsisr[63 - 33].eq(m_in.invalid)
 272                     sync += self.dsisr[63 - 36].eq(m_in.perm_error)
 273                     sync += self.dsisr[63 - 38].eq(self.load)
 274                     sync += self.dsisr[63 - 44].eq(m_in.badtree)
 275                     sync += self.dsisr[63 - 45].eq(m_in.rc_error)
 276
 277             with m.Case(State.TLBIE_WAIT):
 278                 pass
 279
 280         # alignment error: store address in DAR
 281         with m.If(self.align_intr):
 282             comb += exc.happened.eq(1)
 283             sync += self.dar.eq(self.addr)
 284
 285         # happened, alignment, instr_fault, invalid.
 286         # note that all of these flow through - eventually to the TRAP
 287         # pipeline, via PowerDecoder2.
 288         comb += exc.invalid.eq(m_in.invalid)
 289         comb += exc.alignment.eq(self.align_intr)
 290         comb += exc.instr_fault.eq(self.instr_fault)
 291         # badtree, perm_error, rc_error, segment_fault
 292         comb += exc.badtree.eq(m_in.badtree)
 293         comb += exc.perm_error.eq(m_in.perm_error)
 294         comb += exc.rc_error.eq(m_in.rc_error)
 295         comb += exc.segment_fault.eq(m_in.segerr)
 296
 297         # TODO, connect dcache wb_in/wb_out to "standard" nmigen Wishbone bus
 298         comb += dbus.adr.eq(dcache.wb_out.adr)
 299         comb += dbus.dat_w.eq(dcache.wb_out.dat)
 300         comb += dbus.sel.eq(dcache.wb_out.sel)
 301         comb += dbus.cyc.eq(dcache.wb_out.cyc)
 302         comb += dbus.stb.eq(dcache.wb_out.stb)
 303         comb += dbus.we.eq(dcache.wb_out.we)
 304
 305         comb += dcache.wb_in.dat.eq(dbus.dat_r)
 306         comb += dcache.wb_in.ack.eq(dbus.ack)
 307         if hasattr(dbus, "stall"):
 308             comb += dcache.wb_in.stall.eq(dbus.stall)
 309
 310         # update out d data when flag set
 311         with m.If(self.d_w_valid):
 312             m.d.sync += d_out.data.eq(self.store_data)
 313         #with m.Else():
 314         #    m.d.sync += d_out.data.eq(0)
 315         # unit test passes with that change
 316
 317         # this must move into the FSM, conditionally noticing that
 318         # the "blip" comes from self.d_validblip.
 319         # task 1: look up in dcache
 320         # task 2: if dcache fails, look up in MMU.
 321         # do **NOT** confuse the two.
 322         with m.If(self.d_validblip):
 323             m.d.comb += self.d_out.valid.eq(~exc.happened)
 324             m.d.comb += d_out.load.eq(self.req.load)
 325             m.d.comb += d_out.byte_sel.eq(self.req.byte_sel)
 326             m.d.comb += self.addr.eq(self.req.addr)
 327             m.d.comb += d_out.nc.eq(self.req.nc)
 328             m.d.comb += d_out.priv_mode.eq(self.req.priv_mode)
 329             m.d.comb += d_out.virt_mode.eq(self.req.virt_mode)
 330             m.d.comb += self.align_intr.eq(self.req.align_intr)
 331             #m.d.comb += Display("validblip dcbz=%i addr=%x",self.req.dcbz,self.req.addr)
 332             m.d.comb += d_out.dcbz.eq(self.req.dcbz)
 333         with m.Else():
 334             m.d.comb += d_out.load.eq(ldst_r.load)
 335             m.d.comb += d_out.byte_sel.eq(ldst_r.byte_sel)
 336             m.d.comb += self.addr.eq(ldst_r.addr)
 337             m.d.comb += d_out.nc.eq(ldst_r.nc)
 338             m.d.comb += d_out.priv_mode.eq(ldst_r.priv_mode)
 339             m.d.comb += d_out.virt_mode.eq(ldst_r.virt_mode)
 340             m.d.comb += self.align_intr.eq(ldst_r.align_intr)
 341             #m.d.comb += Display("no_validblip dcbz=%i addr=%x",ldst_r.dcbz,ldst_r.addr)
 342             m.d.comb += d_out.dcbz.eq(ldst_r.dcbz)
 343
 344         # XXX these should be possible to remove but for some reason
 345         # cannot be... yet. TODO, investigate
 346         m.d.comb += self.load_data.eq(d_in.data)
 347         m.d.comb += d_out.addr.eq(self.addr)
 348
 349         # Update outputs to MMU
 350         m.d.comb += m_out.valid.eq(mmureq)
 351         m.d.comb += m_out.iside.eq(self.instr_fault)
 352         m.d.comb += m_out.load.eq(ldst_r.load)
 353         # m_out.priv <= r.priv_mode; TODO
 354         m.d.comb += m_out.tlbie.eq(self.tlbie)
 355         # m_out.mtspr <= mmu_mtspr; # TODO
 356         # m_out.sprn <= sprn; # TODO
 357         m.d.comb += m_out.addr.eq(maddr)
 358         # m_out.slbia <= l_in.insn(7); # TODO: no idea what this is
 359         # m_out.rs <= l_in.data; # nope, probably not needed, TODO investigate
 360
 361         return m
 362
 363     def ports(self):
 364         yield from super().ports()
 365         # TODO: memory ports
 366
 367
 368 class TestSRAMLoadStore1(LoadStore1):
 369     def __init__(self, pspec):
 370         super().__init__(pspec)
 371         pspec = self.pspec
 372         # small 32-entry Memory
 373         if (hasattr(pspec, "dmem_test_depth") and
 374                 isinstance(pspec.dmem_test_depth, int)):
 375             depth = pspec.dmem_test_depth
 376         else:
 377             depth = 32
 378         print("TestSRAMBareLoadStoreUnit depth", depth)
 379
 380         self.mem = Memory(width=pspec.reg_wid, depth=depth)
 381
 382     def elaborate(self, platform):
 383         m = super().elaborate(platform)
 384         comb = m.d.comb
 385         m.submodules.sram = sram = SRAM(memory=self.mem, granularity=8,
 386                                         features={'cti', 'bte', 'err'})
 387         dbus = self.dbus
 388
 389         # directly connect the wishbone bus of LoadStoreUnitInterface to SRAM
 390         # note: SRAM is a target (slave), dbus is initiator (master)
 391         fanouts = ['dat_w', 'sel', 'cyc', 'stb', 'we', 'cti', 'bte']
 392         fanins = ['dat_r', 'ack', 'err']
 393         for fanout in fanouts:
 394             print("fanout", fanout, getattr(sram.bus, fanout).shape(),
 395                   getattr(dbus, fanout).shape())
 396             comb += getattr(sram.bus, fanout).eq(getattr(dbus, fanout))
 397             comb += getattr(sram.bus, fanout).eq(getattr(dbus, fanout))
 398         for fanin in fanins:
 399             comb += getattr(dbus, fanin).eq(getattr(sram.bus, fanin))
 400         # connect address
 401         comb += sram.bus.adr.eq(dbus.adr)
 402
 403         return m
 404