src/soc/fu/ldst/loadstore.py

   1 """LoadStore1 FSM.
   2
   3 based on microwatt loadstore1.vhdl, but conforming to PortInterface.
   4 unlike loadstore1.vhdl this does *not* deal with actual Load/Store
   5 ops: that job is handled by LDSTCompUnit, which talks to LoadStore1
   6 by way of PortInterface.  PortInterface is where things need extending,
   7 such as adding dcbz support, etc.
   8
   9 this module basically handles "pure" load / store operations, and
  10 its first job is to ask the D-Cache for the data.  if that fails,
  11 the second task (if virtual memory is enabled) is to ask the MMU
  12 to perform a TLB, then to go *back* to the cache and ask again.
  13
  14 Links:
  15
  16 * https://bugs.libre-soc.org/show_bug.cgi?id=465
  17
  18 """
  19
  20 from nmigen import (Elaboratable, Module, Signal, Shape, unsigned, Cat, Mux,
  21                     Record, Memory,
  22                     Const)
  23 from nmutil.iocontrol import RecordObject
  24 from nmutil.util import rising_edge, Display
  25 from enum import Enum, unique
  26
  27 from soc.experiment.dcache import DCache
  28 from soc.experiment.pimem import PortInterfaceBase
  29 from soc.experiment.mem_types import LoadStore1ToMMUType
  30 from soc.experiment.mem_types import MMUToLoadStore1Type
  31
  32 from soc.minerva.wishbone import make_wb_layout
  33 from soc.bus.sram import SRAM
  34 from nmutil.util import Display
  35
  36
  37 @unique
  38 class State(Enum):
  39     IDLE = 0       # ready for instruction
  40     ACK_WAIT = 1   # waiting for ack from dcache
  41     MMU_LOOKUP = 2 # waiting for MMU to look up translation
  42     TLBIE_WAIT = 3 # waiting for MMU to finish doing a tlbie
  43
  44
  45 # captures the LDSTRequest from the PortInterface, which "blips" most
  46 # of this at us (pipeline-style).
  47 class LDSTRequest(RecordObject):
  48     def __init__(self, name=None):
  49         RecordObject.__init__(self, name=name)
  50
  51         self.load          = Signal()
  52         self.dcbz          = Signal()
  53         self.addr          = Signal(64)
  54         # self.store_data    = Signal(64) # this is already sync (on a delay)
  55         self.byte_sel      = Signal(8)
  56         self.nc            = Signal()              # non-cacheable access
  57         self.virt_mode     = Signal()
  58         self.priv_mode     = Signal()
  59         self.align_intr    = Signal()
  60
  61 # glue logic for microwatt mmu and dcache
  62 class LoadStore1(PortInterfaceBase):
  63     def __init__(self, pspec):
  64         self.pspec = pspec
  65         self.disable_cache = (hasattr(pspec, "disable_cache") and
  66                               pspec.disable_cache == True)
  67         regwid = pspec.reg_wid
  68         addrwid = pspec.addr_wid
  69
  70         super().__init__(regwid, addrwid)
  71         self.dcache = DCache()
  72         # these names are from the perspective of here (LoadStore1)
  73         self.d_out  = self.dcache.d_in     # in to dcache is out for LoadStore
  74         self.d_in = self.dcache.d_out      # out from dcache is in for LoadStore
  75         self.m_out  = LoadStore1ToMMUType() # out *to* MMU
  76         self.m_in = MMUToLoadStore1Type()   # in *from* MMU
  77         self.req = LDSTRequest(name="ldst_req")
  78
  79         # TODO, convert dcache wb_in/wb_out to "standard" nmigen Wishbone bus
  80         self.dbus = Record(make_wb_layout(pspec))
  81
  82         # for creating a single clock blip to DCache
  83         self.d_valid = Signal()
  84         self.d_w_valid = Signal()
  85         self.d_validblip = Signal()
  86
  87         # state info for LD/ST
  88         self.done          = Signal()
  89         self.done_delay    = Signal()
  90         # latch most of the input request
  91         self.load          = Signal()
  92         self.tlbie         = Signal()
  93         self.dcbz          = Signal()
  94         self.addr          = Signal(64)
  95         self.store_data    = Signal(64)
  96         self.load_data     = Signal(64)
  97         self.load_data_delay = Signal(64)
  98         self.byte_sel      = Signal(8)
  99         #self.xerc         : xer_common_t;
 100         #self.reserve       = Signal()
 101         #self.atomic        = Signal()
 102         #self.atomic_last   = Signal()
 103         #self.rc            = Signal()
 104         self.nc            = Signal()              # non-cacheable access
 105         self.virt_mode     = Signal()
 106         self.priv_mode     = Signal()
 107         self.state        = Signal(State)
 108         self.instr_fault   = Signal()
 109         self.align_intr    = Signal()
 110         self.busy          = Signal()
 111         self.wait_dcache   = Signal()
 112         self.wait_mmu      = Signal()
 113         #self.mode_32bit    = Signal()
 114         #self.intr_vec     : integer range 0 to 16#fff#;
 115         #self.nia           = Signal(64)
 116         #self.srr1          = Signal(16)
 117
 118     def set_wr_addr(self, m, addr, mask, misalign, msr_pr, is_dcbz):
 119         m.d.comb += self.req.load.eq(0) # store operation
 120         m.d.comb += self.req.byte_sel.eq(mask)
 121         m.d.comb += self.req.addr.eq(addr)
 122         m.d.comb += self.req.priv_mode.eq(~msr_pr) # not-problem  ==> priv
 123         m.d.comb += self.req.virt_mode.eq(msr_pr) # problem-state ==> virt
 124         m.d.comb += self.req.align_intr.eq(misalign)
 125         m.d.comb += self.req.dcbz.eq(is_dcbz)
 126
 127         # m.d.comb += Display("set_wr_addr %i dcbz %i",addr,is_dcbz)
 128
 129         # option to disable the cache entirely for write
 130         if self.disable_cache:
 131             m.d.comb += self.req.nc.eq(1)
 132         return None
 133
 134     def set_rd_addr(self, m, addr, mask, misalign, msr_pr):
 135         m.d.comb += self.d_valid.eq(1)
 136         m.d.comb += self.req.load.eq(1) # load operation
 137         m.d.comb += self.req.byte_sel.eq(mask)
 138         m.d.comb += self.req.align_intr.eq(misalign)
 139         m.d.comb += self.req.addr.eq(addr)
 140         m.d.comb += self.req.priv_mode.eq(~msr_pr) # not-problem  ==> priv
 141         m.d.comb += self.req.virt_mode.eq(msr_pr) # problem-state ==> virt
 142         # BAD HACK! disable cacheing on LD when address is 0xCxxx_xxxx
 143         # this is for peripherals. same thing done in Microwatt loadstore1.vhdl
 144         with m.If(addr[28:] == Const(0xc, 4)):
 145             m.d.comb += self.req.nc.eq(1)
 146         # option to disable the cache entirely for read
 147         if self.disable_cache:
 148             m.d.comb += self.req.nc.eq(1)
 149         return None #FIXME return value
 150
 151     def set_wr_data(self, m, data, wen):
 152         # do the "blip" on write data
 153         m.d.comb += self.d_valid.eq(1)
 154         # put data into comb which is picked up in main elaborate()
 155         m.d.comb += self.d_w_valid.eq(1)
 156         m.d.comb += self.store_data.eq(data)
 157         #m.d.sync += self.d_out.byte_sel.eq(wen) # this might not be needed
 158         st_ok = self.done # TODO indicates write data is valid
 159         return st_ok
 160
 161     def get_rd_data(self, m):
 162         ld_ok = self.done_delay # indicates read data is valid
 163         data = self.load_data_delay   # actual read data
 164         return data, ld_ok
 165
 166     def elaborate(self, platform):
 167         m = super().elaborate(platform)
 168         comb, sync = m.d.comb, m.d.sync
 169
 170         # microwatt takes one more cycle before next operation can be issued
 171         sync += self.done_delay.eq(self.done)
 172         sync += self.load_data_delay.eq(self.load_data)
 173
 174         # create dcache module
 175         m.submodules.dcache = dcache = self.dcache
 176
 177         # temp vars
 178         d_out, d_in, dbus = self.d_out, self.d_in, self.dbus
 179         m_out, m_in = self.m_out, self.m_in
 180         exc = self.pi.exc_o
 181         exception = exc.happened
 182         mmureq = Signal()
 183
 184         # copy of address, but gets over-ridden for OP_FETCH_FAILED
 185         maddr = Signal(64)
 186         m.d.comb += maddr.eq(self.addr)
 187
 188         # create a blip (single pulse) on valid read/write request
 189         # this can be over-ridden in the FSM to get dcache to re-run
 190         # a request when MMU_LOOKUP completes.
 191         m.d.comb += self.d_validblip.eq(rising_edge(m, self.d_valid))
 192         ldst_r = LDSTRequest("ldst_r")
 193         comb += Display("MMUTEST: LoadStore1 d_in.error=%i",d_in.error)
 194
 195         # fsm skeleton
 196         with m.Switch(self.state):
 197             with m.Case(State.IDLE):
 198                 with m.If(self.d_validblip & ~exc.happened):
 199                     comb += self.busy.eq(1)
 200                     sync += self.state.eq(State.ACK_WAIT)
 201                     sync += ldst_r.eq(self.req) # copy of LDSTRequest on "blip"
 202 #                   sync += Display("validblip self.req.virt_mode=%i",
 203 #                   self.req.virt_mode)
 204                 with m.Else():
 205                     sync += ldst_r.eq(0)
 206
 207             # waiting for completion
 208             with m.Case(State.ACK_WAIT):
 209                 comb += Display("MMUTEST: ACK_WAIT")
 210                 comb += self.busy.eq(~exc.happened)
 211
 212                 with m.If(d_in.error):
 213                     # cache error is not necessarily "final", it could
 214                     # be that it was just a TLB miss
 215                     with m.If(d_in.cache_paradox):
 216                         comb += exception.eq(1)
 217                         sync += self.state.eq(State.IDLE)
 218                         sync += ldst_r.eq(0)
 219                         sync += Display("cache error -> update dsisr")
 220                         #sync += self.dsisr[63 - 38].eq(~self.load)
 221                         # XXX there is no architected bit for this
 222                         # (probably should be a machine check in fact)
 223                         #sync += self.dsisr[63 - 35].eq(d_in.cache_paradox)
 224
 225                     with m.Else():
 226                         # Look up the translation for TLB miss
 227                         # and also for permission error and RC error
 228                         # in case the PTE has been updated.
 229                         comb += mmureq.eq(1)
 230                         sync += self.state.eq(State.MMU_LOOKUP)
 231                 with m.If(d_in.valid):
 232                     m.d.comb += self.done.eq(~mmureq) # done if not doing MMU
 233                     with m.If(self.done):
 234                         sync += Display("ACK_WAIT, done %x", self.addr)
 235                     sync += self.state.eq(State.IDLE)
 236                     sync += ldst_r.eq(0)
 237                     with m.If(self.load):
 238                         m.d.comb += self.load_data.eq(d_in.data)
 239
 240             # waiting here for the MMU TLB lookup to complete.
 241             # either re-try the dcache lookup or throw MMU exception
 242             with m.Case(State.MMU_LOOKUP):
 243                 comb += self.busy.eq(1)
 244                 with m.If(m_in.done):
 245                     with m.If(~self.instr_fault):
 246                         sync += Display("MMU_LOOKUP, done %x -> %x",
 247                                         self.addr, d_out.addr)
 248                         # retry the request now that the MMU has
 249                         # installed a TLB entry, if not exception raised
 250                         m.d.comb += self.d_out.valid.eq(~exception)
 251                         sync += self.state.eq(State.ACK_WAIT)
 252                         sync += ldst_r.eq(0)
 253                     with m.Else():
 254                         sync += Display("MMU_LOOKUP, exception %x", self.addr)
 255                         # instruction lookup fault: store address in DAR
 256                         comb += exc.happened.eq(1) # reason = MMU_LOOKUP
 257                         # mark dar as updated ?
 258                         comb += self.pi.dar_o.eq(self.addr)
 259                         sync += self.state.eq(State.IDLE)
 260
 261                 with m.If(m_in.err):
 262                     # MMU RADIX exception thrown
 263                     comb += exception.eq(1)
 264                     sync += Display("MMU RADIX exception thrown")
 265                     #sync += self.dsisr[63 - 33].eq(m_in.invalid)
 266                     #sync += self.dsisr[63 - 36].eq(m_in.perm_error)
 267                     #sync += self.dsisr[63 - 38].eq(self.load)
 268                     #sync += self.dsisr[63 - 44].eq(m_in.badtree)
 269                     #sync += self.dsisr[63 - 45].eq(m_in.rc_error)
 270                     sync += self.state.eq(State.IDLE)
 271
 272             with m.Case(State.TLBIE_WAIT):
 273                 pass
 274
 275         # alignment error: store address in DAR
 276         with m.If(self.align_intr):
 277             comb += exc.happened.eq(1) # reason = alignment
 278             sync += Display("alignment error: store addr in DAR %x", self.addr)
 279             comb += self.pi.dar_o.eq(self.addr)
 280
 281         # happened, alignment, instr_fault, invalid.
 282         # note that all of these flow through - eventually to the TRAP
 283         # pipeline, via PowerDecoder2.
 284         comb += exc.invalid.eq(m_in.invalid)
 285         comb += exc.alignment.eq(self.align_intr)
 286         comb += exc.instr_fault.eq(self.instr_fault)
 287         # badtree, perm_error, rc_error, segment_fault
 288         comb += exc.badtree.eq(m_in.badtree)
 289         comb += exc.perm_error.eq(m_in.perm_error)
 290         comb += exc.rc_error.eq(m_in.rc_error)
 291         comb += exc.segment_fault.eq(m_in.segerr)
 292
 293         # TODO, connect dcache wb_in/wb_out to "standard" nmigen Wishbone bus
 294         comb += dbus.adr.eq(dcache.wb_out.adr)
 295         comb += dbus.dat_w.eq(dcache.wb_out.dat)
 296         comb += dbus.sel.eq(dcache.wb_out.sel)
 297         comb += dbus.cyc.eq(dcache.wb_out.cyc)
 298         comb += dbus.stb.eq(dcache.wb_out.stb)
 299         comb += dbus.we.eq(dcache.wb_out.we)
 300
 301         comb += dcache.wb_in.dat.eq(dbus.dat_r)
 302         comb += dcache.wb_in.ack.eq(dbus.ack)
 303         if hasattr(dbus, "stall"):
 304             comb += dcache.wb_in.stall.eq(dbus.stall)
 305
 306         # update out d data when flag set
 307         with m.If(self.d_w_valid):
 308             m.d.sync += d_out.data.eq(self.store_data)
 309         #with m.Else():
 310         #    m.d.sync += d_out.data.eq(0)
 311         # unit test passes with that change
 312
 313         # this must move into the FSM, conditionally noticing that
 314         # the "blip" comes from self.d_validblip.
 315         # task 1: look up in dcache
 316         # task 2: if dcache fails, look up in MMU.
 317         # do **NOT** confuse the two.
 318         with m.If(self.d_validblip):
 319             m.d.comb += self.d_out.valid.eq(~exc.happened)
 320             m.d.comb += d_out.load.eq(self.req.load)
 321             m.d.comb += d_out.byte_sel.eq(self.req.byte_sel)
 322             m.d.comb += self.addr.eq(self.req.addr)
 323             m.d.comb += d_out.nc.eq(self.req.nc)
 324             m.d.comb += d_out.priv_mode.eq(self.req.priv_mode)
 325             m.d.comb += d_out.virt_mode.eq(self.req.virt_mode)
 326             m.d.comb += self.align_intr.eq(self.req.align_intr)
 327             #m.d.comb += Display("validblip dcbz=%i addr=%x",
 328             #self.req.dcbz,self.req.addr)
 329             m.d.comb += d_out.dcbz.eq(self.req.dcbz)
 330         with m.Else():
 331             m.d.comb += d_out.load.eq(ldst_r.load)
 332             m.d.comb += d_out.byte_sel.eq(ldst_r.byte_sel)
 333             m.d.comb += self.addr.eq(ldst_r.addr)
 334             m.d.comb += d_out.nc.eq(ldst_r.nc)
 335             m.d.comb += d_out.priv_mode.eq(ldst_r.priv_mode)
 336             m.d.comb += d_out.virt_mode.eq(ldst_r.virt_mode)
 337             m.d.comb += self.align_intr.eq(ldst_r.align_intr)
 338             #m.d.comb += Display("no_validblip dcbz=%i addr=%x",
 339             #ldst_r.dcbz,ldst_r.addr)
 340             m.d.comb += d_out.dcbz.eq(ldst_r.dcbz)
 341
 342         # XXX these should be possible to remove but for some reason
 343         # cannot be... yet. TODO, investigate
 344         m.d.comb += self.load_data.eq(d_in.data)
 345         m.d.comb += d_out.addr.eq(self.addr)
 346
 347         # Update outputs to MMU
 348         m.d.comb += m_out.valid.eq(mmureq)
 349         m.d.comb += m_out.iside.eq(self.instr_fault)
 350         m.d.comb += m_out.load.eq(ldst_r.load)
 351         # m_out.priv <= r.priv_mode; TODO
 352         m.d.comb += m_out.tlbie.eq(self.tlbie)
 353         # m_out.mtspr <= mmu_mtspr; # TODO
 354         # m_out.sprn <= sprn; # TODO
 355         m.d.comb += m_out.addr.eq(maddr)
 356         # m_out.slbia <= l_in.insn(7); # TODO: no idea what this is
 357         # m_out.rs <= l_in.data; # nope, probably not needed, TODO investigate
 358
 359         return m
 360
 361     def ports(self):
 362         yield from super().ports()
 363         # TODO: memory ports
 364
 365
 366 class TestSRAMLoadStore1(LoadStore1):
 367     def __init__(self, pspec):
 368         super().__init__(pspec)
 369         pspec = self.pspec
 370         # small 32-entry Memory
 371         if (hasattr(pspec, "dmem_test_depth") and
 372                 isinstance(pspec.dmem_test_depth, int)):
 373             depth = pspec.dmem_test_depth
 374         else:
 375             depth = 32
 376         print("TestSRAMBareLoadStoreUnit depth", depth)
 377
 378         self.mem = Memory(width=pspec.reg_wid, depth=depth)
 379
 380     def elaborate(self, platform):
 381         m = super().elaborate(platform)
 382         comb = m.d.comb
 383         m.submodules.sram = sram = SRAM(memory=self.mem, granularity=8,
 384                                         features={'cti', 'bte', 'err'})
 385         dbus = self.dbus
 386
 387         # directly connect the wishbone bus of LoadStoreUnitInterface to SRAM
 388         # note: SRAM is a target (slave), dbus is initiator (master)
 389         fanouts = ['dat_w', 'sel', 'cyc', 'stb', 'we', 'cti', 'bte']
 390         fanins = ['dat_r', 'ack', 'err']
 391         for fanout in fanouts:
 392             print("fanout", fanout, getattr(sram.bus, fanout).shape(),
 393                   getattr(dbus, fanout).shape())
 394             comb += getattr(sram.bus, fanout).eq(getattr(dbus, fanout))
 395             comb += getattr(sram.bus, fanout).eq(getattr(dbus, fanout))
 396         for fanin in fanins:
 397             comb += getattr(dbus, fanin).eq(getattr(sram.bus, fanin))
 398         # connect address
 399         comb += sram.bus.adr.eq(dbus.adr)
 400
 401         return m
 402