src/soc/fu/ldst/loadstore.py

   1 """LoadStore1 FSM.
   2
   3 based on microwatt loadstore1.vhdl, but conforming to PortInterface.
   4 unlike loadstore1.vhdl this does *not* deal with actual Load/Store
   5 ops: that job is handled by LDSTCompUnit, which talks to LoadStore1
   6 by way of PortInterface.  PortInterface is where things need extending,
   7 such as adding dcbz support, etc.
   8
   9 this module basically handles "pure" load / store operations, and
  10 its first job is to ask the D-Cache for the data.  if that fails,
  11 the second task (if virtual memory is enabled) is to ask the MMU
  12 to perform a TLB, then to go *back* to the cache and ask again.
  13
  14 Links:
  15
  16 * https://bugs.libre-soc.org/show_bug.cgi?id=465
  17
  18 """
  19
  20 from nmigen import (Elaboratable, Module, Signal, Shape, unsigned, Cat, Mux,
  21                     Record, Memory,
  22                     Const)
  23 from nmutil.iocontrol import RecordObject
  24 from nmutil.util import rising_edge, Display
  25 from enum import Enum, unique
  26
  27 from soc.experiment.dcache import DCache
  28 from soc.experiment.icache import ICache
  29 from soc.experiment.pimem import PortInterfaceBase
  30 from soc.experiment.mem_types import LoadStore1ToMMUType
  31 from soc.experiment.mem_types import MMUToLoadStore1Type
  32
  33 from soc.minerva.wishbone import make_wb_layout
  34 from soc.bus.sram import SRAM
  35 from nmutil.util import Display
  36
  37
  38 @unique
  39 class State(Enum):
  40     IDLE = 0       # ready for instruction
  41     ACK_WAIT = 1   # waiting for ack from dcache
  42     MMU_LOOKUP = 2 # waiting for MMU to look up translation
  43     TLBIE_WAIT = 3 # waiting for MMU to finish doing a tlbie
  44
  45
  46 # captures the LDSTRequest from the PortInterface, which "blips" most
  47 # of this at us (pipeline-style).
  48 class LDSTRequest(RecordObject):
  49     def __init__(self, name=None):
  50         RecordObject.__init__(self, name=name)
  51
  52         self.load          = Signal()
  53         self.dcbz          = Signal()
  54         self.addr          = Signal(64)
  55         # self.store_data    = Signal(64) # this is already sync (on a delay)
  56         self.byte_sel      = Signal(8)
  57         self.nc            = Signal()              # non-cacheable access
  58         self.virt_mode     = Signal()
  59         self.priv_mode     = Signal()
  60         self.align_intr    = Signal()
  61
  62
  63 # glue logic for microwatt mmu and dcache
  64 class LoadStore1(PortInterfaceBase):
  65     def __init__(self, pspec):
  66         self.pspec = pspec
  67         self.disable_cache = (hasattr(pspec, "disable_cache") and
  68                               pspec.disable_cache == True)
  69         regwid = pspec.reg_wid
  70         addrwid = pspec.addr_wid
  71
  72         super().__init__(regwid, addrwid)
  73         self.dcache = DCache()
  74         self.icache = ICache()
  75         # these names are from the perspective of here (LoadStore1)
  76         self.d_out  = self.dcache.d_in     # in to dcache is out for LoadStore
  77         self.d_in = self.dcache.d_out      # out from dcache is in for LoadStore
  78         self.i_out  = self.icache.i_in     # in to icache is out for LoadStore
  79         self.i_in = self.icache.i_out      # out from icache is in for LoadStore
  80         self.m_out  = LoadStore1ToMMUType() # out *to* MMU
  81         self.m_in = MMUToLoadStore1Type()   # in *from* MMU
  82         self.req = LDSTRequest(name="ldst_req")
  83
  84         # TODO, convert dcache wb_in/wb_out to "standard" nmigen Wishbone bus
  85         self.dbus = Record(make_wb_layout(pspec))
  86         self.ibus = Record(make_wb_layout(pspec))
  87
  88         # for creating a single clock blip to DCache
  89         self.d_valid = Signal()
  90         self.d_w_valid = Signal()
  91         self.d_validblip = Signal()
  92
  93         # state info for LD/ST
  94         self.done          = Signal()
  95         self.done_delay    = Signal()
  96         # latch most of the input request
  97         self.load          = Signal()
  98         self.tlbie         = Signal()
  99         self.dcbz          = Signal()
 100         self.addr          = Signal(64)
 101         self.store_data    = Signal(64)
 102         self.load_data     = Signal(64)
 103         self.load_data_delay = Signal(64)
 104         self.byte_sel      = Signal(8)
 105         #self.xerc         : xer_common_t;
 106         #self.reserve       = Signal()
 107         #self.atomic        = Signal()
 108         #self.atomic_last   = Signal()
 109         #self.rc            = Signal()
 110         self.nc            = Signal()              # non-cacheable access
 111         self.virt_mode     = Signal()
 112         self.priv_mode     = Signal()
 113         self.state        = Signal(State)
 114         self.iside         = Signal() # request instruction-side load
 115         self.instr_fault   = Signal()
 116         self.align_intr    = Signal()
 117         self.busy          = Signal()
 118         self.wait_dcache   = Signal()
 119         self.wait_mmu      = Signal()
 120         #self.mode_32bit    = Signal()
 121         #self.intr_vec     : integer range 0 to 16#fff#;
 122         #self.nia           = Signal(64)
 123         #self.srr1          = Signal(16)
 124         # use these to set the dsisr or dar respectively
 125         self.mmu_set_spr    = Signal()
 126         self.mmu_set_dsisr  = Signal()
 127         self.mmu_set_dar    = Signal()
 128         self.sprval_in      = Signal(64)
 129
 130         # ONLY access these read-only, do NOT attempt to change
 131         self.dsisr          = Signal(32)
 132         self.dar            = Signal(64)
 133
 134     def set_wr_addr(self, m, addr, mask, misalign, msr_pr, is_dcbz):
 135         m.d.comb += self.req.load.eq(0) # store operation
 136         m.d.comb += self.req.byte_sel.eq(mask)
 137         m.d.comb += self.req.addr.eq(addr)
 138         m.d.comb += self.req.priv_mode.eq(~msr_pr) # not-problem  ==> priv
 139         m.d.comb += self.req.virt_mode.eq(msr_pr) # problem-state ==> virt
 140         m.d.comb += self.req.align_intr.eq(misalign)
 141         m.d.comb += self.req.dcbz.eq(is_dcbz)
 142
 143         # m.d.comb += Display("set_wr_addr %i dcbz %i",addr,is_dcbz)
 144
 145         # option to disable the cache entirely for write
 146         if self.disable_cache:
 147             m.d.comb += self.req.nc.eq(1)
 148         return None
 149
 150     def set_rd_addr(self, m, addr, mask, misalign, msr_pr):
 151         m.d.comb += self.d_valid.eq(1)
 152         m.d.comb += self.req.load.eq(1) # load operation
 153         m.d.comb += self.req.byte_sel.eq(mask)
 154         m.d.comb += self.req.align_intr.eq(misalign)
 155         m.d.comb += self.req.addr.eq(addr)
 156         m.d.comb += self.req.priv_mode.eq(~msr_pr) # not-problem  ==> priv
 157         m.d.comb += self.req.virt_mode.eq(msr_pr) # problem-state ==> virt
 158         # BAD HACK! disable cacheing on LD when address is 0xCxxx_xxxx
 159         # this is for peripherals. same thing done in Microwatt loadstore1.vhdl
 160         with m.If(addr[28:] == Const(0xc, 4)):
 161             m.d.comb += self.req.nc.eq(1)
 162         # option to disable the cache entirely for read
 163         if self.disable_cache:
 164             m.d.comb += self.req.nc.eq(1)
 165         return None #FIXME return value
 166
 167     def set_wr_data(self, m, data, wen):
 168         # do the "blip" on write data
 169         m.d.comb += self.d_valid.eq(1)
 170         # put data into comb which is picked up in main elaborate()
 171         m.d.comb += self.d_w_valid.eq(1)
 172         m.d.comb += self.store_data.eq(data)
 173         #m.d.sync += self.d_out.byte_sel.eq(wen) # this might not be needed
 174         st_ok = self.done # TODO indicates write data is valid
 175         return st_ok
 176
 177     def get_rd_data(self, m):
 178         ld_ok = self.done_delay # indicates read data is valid
 179         data = self.load_data_delay   # actual read data
 180         return data, ld_ok
 181
 182     def elaborate(self, platform):
 183         m = super().elaborate(platform)
 184         comb, sync = m.d.comb, m.d.sync
 185
 186         # microwatt takes one more cycle before next operation can be issued
 187         sync += self.done_delay.eq(self.done)
 188         sync += self.load_data_delay.eq(self.load_data)
 189
 190         # create dcache and icache module
 191         m.submodules.dcache = dcache = self.dcache
 192         m.submodules.icache = icache = self.icache
 193
 194         # temp vars
 195         d_out, d_in, dbus = self.d_out, self.d_in, self.dbus
 196         i_out, i_in, ibus = self.i_out, self.i_in, self.ibus
 197         m_out, m_in = self.m_out, self.m_in
 198         exc = self.pi.exc_o
 199         exception = exc.happened
 200         mmureq = Signal()
 201
 202         # copy of address, but gets over-ridden for OP_FETCH_FAILED
 203         maddr = Signal(64)
 204         m.d.comb += maddr.eq(self.addr)
 205
 206         # create a blip (single pulse) on valid read/write request
 207         # this can be over-ridden in the FSM to get dcache to re-run
 208         # a request when MMU_LOOKUP completes.
 209         m.d.comb += self.d_validblip.eq(rising_edge(m, self.d_valid))
 210         ldst_r = LDSTRequest("ldst_r")
 211         comb += Display("MMUTEST: LoadStore1 d_in.error=%i",d_in.error)
 212
 213         # fsm skeleton
 214         with m.Switch(self.state):
 215             with m.Case(State.IDLE):
 216                 with m.If(self.d_validblip & ~exc.happened):
 217                     comb += self.busy.eq(1)
 218                     sync += self.state.eq(State.ACK_WAIT)
 219                     sync += ldst_r.eq(self.req) # copy of LDSTRequest on "blip"
 220 #                   sync += Display("validblip self.req.virt_mode=%i",
 221 #                   self.req.virt_mode)
 222                 with m.Else():
 223                     sync += ldst_r.eq(0)
 224
 225             # waiting for completion
 226             with m.Case(State.ACK_WAIT):
 227                 comb += Display("MMUTEST: ACK_WAIT")
 228                 comb += self.busy.eq(~exc.happened)
 229
 230                 with m.If(d_in.error):
 231                     # cache error is not necessarily "final", it could
 232                     # be that it was just a TLB miss
 233                     with m.If(d_in.cache_paradox):
 234                         comb += exception.eq(1)
 235                         sync += self.state.eq(State.IDLE)
 236                         sync += ldst_r.eq(0)
 237                         sync += Display("cache error -> update dsisr")
 238                         sync += self.dsisr[63 - 38].eq(~self.load)
 239                         # XXX there is no architected bit for this
 240                         # (probably should be a machine check in fact)
 241                         sync += self.dsisr[63 - 35].eq(d_in.cache_paradox)
 242
 243                     with m.Else():
 244                         # Look up the translation for TLB miss
 245                         # and also for permission error and RC error
 246                         # in case the PTE has been updated.
 247                         comb += mmureq.eq(1)
 248                         sync += self.state.eq(State.MMU_LOOKUP)
 249                 with m.If(d_in.valid):
 250                     m.d.comb += self.done.eq(~mmureq) # done if not doing MMU
 251                     with m.If(self.done):
 252                         sync += Display("ACK_WAIT, done %x", self.addr)
 253                     sync += self.state.eq(State.IDLE)
 254                     sync += ldst_r.eq(0)
 255                     with m.If(self.load):
 256                         m.d.comb += self.load_data.eq(d_in.data)
 257
 258             # waiting here for the MMU TLB lookup to complete.
 259             # either re-try the dcache lookup or throw MMU exception
 260             with m.Case(State.MMU_LOOKUP):
 261                 comb += self.busy.eq(~exception)
 262                 with m.If(m_in.done):
 263                     with m.If(~self.instr_fault):
 264                         sync += Display("MMU_LOOKUP, done %x -> %x",
 265                                         self.addr, d_out.addr)
 266                         # retry the request now that the MMU has
 267                         # installed a TLB entry, if not exception raised
 268                         m.d.comb += self.d_out.valid.eq(~exception)
 269                         sync += self.state.eq(State.ACK_WAIT)
 270                         sync += ldst_r.eq(0)
 271                     with m.Else():
 272                         sync += self.state.eq(State.IDLE)
 273
 274                 with m.If(m_in.err):
 275                     # MMU RADIX exception thrown. XXX
 276                     # TODO: critical that the write here has to
 277                     # notify the MMU FSM of the change to dsisr
 278                     comb += exception.eq(1)
 279                     sync += Display("MMU RADIX exception thrown")
 280                     sync += Display("TODO: notify MMU of change to dsisr")
 281                     sync += self.dsisr[63 - 33].eq(m_in.invalid)
 282                     sync += self.dsisr[63 - 36].eq(m_in.perm_error) # noexec
 283                     sync += self.dsisr[63 - 38].eq(~self.load)
 284                     sync += self.dsisr[63 - 44].eq(m_in.badtree)
 285                     sync += self.dsisr[63 - 45].eq(m_in.rc_error)
 286                     sync += self.state.eq(State.IDLE)
 287
 288             with m.Case(State.TLBIE_WAIT):
 289                 pass
 290
 291         # MMU FSM communicating a request to update DSISR or DAR (OP_MTSPR)
 292         with m.If(self.mmu_set_spr):
 293             with m.If(self.mmu_set_dsisr):
 294                 sync += self.dsisr.eq(self.sprval_in)
 295             with m.If(self.mmu_set_dar):
 296                 sync += self.dar.eq(self.sprval_in)
 297
 298         # hmmm, alignment occurs in set_rd_addr/set_wr_addr, note exception
 299         with m.If(self.align_intr):
 300             comb += exc.happened.eq(1)
 301         # check for updating DAR
 302         with m.If(exception):
 303             sync += Display("exception %x", self.addr)
 304             # alignment error: store address in DAR
 305             with m.If(self.align_intr):
 306                 sync += Display("alignment error: addr in DAR %x", self.addr)
 307                 sync += self.dar.eq(self.addr)
 308             with m.Elif(~self.instr_fault):
 309                 sync += Display("not instr fault, addr in DAR %x", self.addr)
 310                 sync += self.dar.eq(self.addr)
 311
 312         # when done or exception, return to idle state
 313         with m.If(self.done | exception):
 314             sync += self.state.eq(State.IDLE)
 315             comb += self.busy.eq(0)
 316
 317         # happened, alignment, instr_fault, invalid.
 318         # note that all of these flow through - eventually to the TRAP
 319         # pipeline, via PowerDecoder2.
 320         comb += self.align_intr.eq(self.req.align_intr)
 321         comb += exc.invalid.eq(m_in.invalid)
 322         comb += exc.alignment.eq(self.align_intr)
 323         comb += exc.instr_fault.eq(self.instr_fault)
 324         # badtree, perm_error, rc_error, segment_fault
 325         comb += exc.badtree.eq(m_in.badtree)
 326         comb += exc.perm_error.eq(m_in.perm_error)
 327         comb += exc.rc_error.eq(m_in.rc_error)
 328         comb += exc.segment_fault.eq(m_in.segerr)
 329
 330         # TODO, connect dcache wb_in/wb_out to "standard" nmigen Wishbone bus
 331         comb += dbus.adr.eq(dcache.bus.adr)
 332         comb += dbus.dat_w.eq(dcache.bus.dat_w)
 333         comb += dbus.sel.eq(dcache.bus.sel)
 334         comb += dbus.cyc.eq(dcache.bus.cyc)
 335         comb += dbus.stb.eq(dcache.bus.stb)
 336         comb += dbus.we.eq(dcache.bus.we)
 337
 338         comb += dcache.bus.dat_r.eq(dbus.dat_r)
 339         comb += dcache.bus.ack.eq(dbus.ack)
 340         if hasattr(dbus, "stall"):
 341             comb += dcache.bus.stall.eq(dbus.stall)
 342
 343         # update out d data when flag set
 344         with m.If(self.d_w_valid):
 345             m.d.sync += d_out.data.eq(self.store_data)
 346         #with m.Else():
 347         #    m.d.sync += d_out.data.eq(0)
 348         # unit test passes with that change
 349
 350         # this must move into the FSM, conditionally noticing that
 351         # the "blip" comes from self.d_validblip.
 352         # task 1: look up in dcache
 353         # task 2: if dcache fails, look up in MMU.
 354         # do **NOT** confuse the two.
 355         with m.If(self.d_validblip):
 356             m.d.comb += self.d_out.valid.eq(~exc.happened)
 357             m.d.comb += d_out.load.eq(self.req.load)
 358             m.d.comb += d_out.byte_sel.eq(self.req.byte_sel)
 359             m.d.comb += self.addr.eq(self.req.addr)
 360             m.d.comb += d_out.nc.eq(self.req.nc)
 361             m.d.comb += d_out.priv_mode.eq(self.req.priv_mode)
 362             m.d.comb += d_out.virt_mode.eq(self.req.virt_mode)
 363             #m.d.comb += Display("validblip dcbz=%i addr=%x",
 364             #self.req.dcbz,self.req.addr)
 365             m.d.comb += d_out.dcbz.eq(self.req.dcbz)
 366         with m.Else():
 367             m.d.comb += d_out.load.eq(ldst_r.load)
 368             m.d.comb += d_out.byte_sel.eq(ldst_r.byte_sel)
 369             m.d.comb += self.addr.eq(ldst_r.addr)
 370             m.d.comb += d_out.nc.eq(ldst_r.nc)
 371             m.d.comb += d_out.priv_mode.eq(ldst_r.priv_mode)
 372             m.d.comb += d_out.virt_mode.eq(ldst_r.virt_mode)
 373             #m.d.comb += Display("no_validblip dcbz=%i addr=%x",
 374             #ldst_r.dcbz,ldst_r.addr)
 375             m.d.comb += d_out.dcbz.eq(ldst_r.dcbz)
 376
 377         # XXX these should be possible to remove but for some reason
 378         # cannot be... yet. TODO, investigate
 379         m.d.comb += self.load_data.eq(d_in.data)
 380         m.d.comb += d_out.addr.eq(self.addr)
 381
 382         # Update outputs to MMU
 383         m.d.comb += m_out.valid.eq(mmureq)
 384         m.d.comb += m_out.iside.eq(self.iside)
 385         m.d.comb += m_out.load.eq(ldst_r.load)
 386         # m_out.priv <= r.priv_mode; TODO
 387         m.d.comb += m_out.tlbie.eq(self.tlbie)
 388         # m_out.mtspr <= mmu_mtspr; # TODO
 389         # m_out.sprn <= sprn; # TODO
 390         m.d.comb += m_out.addr.eq(maddr)
 391         # m_out.slbia <= l_in.insn(7); # TODO: no idea what this is
 392         # m_out.rs <= l_in.data; # nope, probably not needed, TODO investigate
 393
 394         return m
 395
 396     def ports(self):
 397         yield from super().ports()
 398         # TODO: memory ports
 399
 400
 401 class TestSRAMLoadStore1(LoadStore1):
 402     def __init__(self, pspec):
 403         super().__init__(pspec)
 404         pspec = self.pspec
 405         # small 32-entry Memory
 406         if (hasattr(pspec, "dmem_test_depth") and
 407                 isinstance(pspec.dmem_test_depth, int)):
 408             depth = pspec.dmem_test_depth
 409         else:
 410             depth = 32
 411         print("TestSRAMBareLoadStoreUnit depth", depth)
 412
 413         self.mem = Memory(width=pspec.reg_wid, depth=depth)
 414
 415     def elaborate(self, platform):
 416         m = super().elaborate(platform)
 417         comb = m.d.comb
 418         m.submodules.sram = sram = SRAM(memory=self.mem, granularity=8,
 419                                         features={'cti', 'bte', 'err'})
 420         dbus = self.dbus
 421
 422         # directly connect the wishbone bus of LoadStoreUnitInterface to SRAM
 423         # note: SRAM is a target (slave), dbus is initiator (master)
 424         fanouts = ['dat_w', 'sel', 'cyc', 'stb', 'we', 'cti', 'bte']
 425         fanins = ['dat_r', 'ack', 'err']
 426         for fanout in fanouts:
 427             print("fanout", fanout, getattr(sram.bus, fanout).shape(),
 428                   getattr(dbus, fanout).shape())
 429             comb += getattr(sram.bus, fanout).eq(getattr(dbus, fanout))
 430             comb += getattr(sram.bus, fanout).eq(getattr(dbus, fanout))
 431         for fanin in fanins:
 432             comb += getattr(dbus, fanin).eq(getattr(sram.bus, fanin))
 433         # connect address
 434         comb += sram.bus.adr.eq(dbus.adr)
 435
 436         return m
 437