src/soc/fu/ldst/loadstore.py

   1 """LoadStore1 FSM.
   2
   3 based on microwatt loadstore1.vhdl, but conforming to PortInterface.
   4 unlike loadstore1.vhdl this does *not* deal with actual Load/Store
   5 ops: that job is handled by LDSTCompUnit, which talks to LoadStore1
   6 by way of PortInterface.  PortInterface is where things need extending,
   7 such as adding dcbz support, etc.
   8
   9 this module basically handles "pure" load / store operations, and
  10 its first job is to ask the D-Cache for the data.  if that fails,
  11 the second task (if virtual memory is enabled) is to ask the MMU
  12 to perform a TLB, then to go *back* to the cache and ask again.
  13
  14 Links:
  15
  16 * https://bugs.libre-soc.org/show_bug.cgi?id=465
  17
  18 """
  19
  20 from nmigen import (Elaboratable, Module, Signal, Shape, unsigned, Cat, Mux,
  21                     Record, Memory,
  22                     Const)
  23 from nmutil.iocontrol import RecordObject
  24 from nmutil.util import rising_edge, Display
  25 from enum import Enum, unique
  26
  27 from soc.experiment.dcache import DCache
  28 from soc.experiment.pimem import PortInterfaceBase
  29 from soc.experiment.mem_types import LoadStore1ToMMUType
  30 from soc.experiment.mem_types import MMUToLoadStore1Type
  31
  32 from soc.minerva.wishbone import make_wb_layout
  33 from soc.bus.sram import SRAM
  34 from nmutil.util import Display
  35
  36
  37 @unique
  38 class State(Enum):
  39     IDLE = 0       # ready for instruction
  40     ACK_WAIT = 1   # waiting for ack from dcache
  41     MMU_LOOKUP = 2 # waiting for MMU to look up translation
  42     TLBIE_WAIT = 3 # waiting for MMU to finish doing a tlbie
  43
  44
  45 # captures the LDSTRequest from the PortInterface, which "blips" most
  46 # of this at us (pipeline-style).
  47 class LDSTRequest(RecordObject):
  48     def __init__(self, name=None):
  49         RecordObject.__init__(self, name=name)
  50
  51         self.load          = Signal()
  52         self.dcbz          = Signal()
  53         self.addr          = Signal(64)
  54         # self.store_data    = Signal(64) # this is already sync (on a delay)
  55         self.byte_sel      = Signal(8)
  56         self.nc            = Signal()              # non-cacheable access
  57         self.virt_mode     = Signal()
  58         self.priv_mode     = Signal()
  59         self.align_intr    = Signal()
  60
  61 # glue logic for microwatt mmu and dcache
  62 class LoadStore1(PortInterfaceBase):
  63     def __init__(self, pspec):
  64         self.pspec = pspec
  65         self.disable_cache = (hasattr(pspec, "disable_cache") and
  66                               pspec.disable_cache == True)
  67         regwid = pspec.reg_wid
  68         addrwid = pspec.addr_wid
  69
  70         super().__init__(regwid, addrwid)
  71         self.dcache = DCache()
  72         # these names are from the perspective of here (LoadStore1)
  73         self.d_out  = self.dcache.d_in     # in to dcache is out for LoadStore
  74         self.d_in = self.dcache.d_out      # out from dcache is in for LoadStore
  75         self.m_out  = LoadStore1ToMMUType() # out *to* MMU
  76         self.m_in = MMUToLoadStore1Type()   # in *from* MMU
  77         self.req = LDSTRequest(name="ldst_req")
  78
  79         # TODO, convert dcache wb_in/wb_out to "standard" nmigen Wishbone bus
  80         self.dbus = Record(make_wb_layout(pspec))
  81
  82         # for creating a single clock blip to DCache
  83         self.d_valid = Signal()
  84         self.d_w_valid = Signal()
  85         self.d_validblip = Signal()
  86
  87         # DSISR and DAR cached values.  note that the MMU FSM is where
  88         # these are accessed by OP_MTSPR/OP_MFSPR, on behalf of LoadStore1.
  89         # by contrast microwatt has the spr set/get done *in* loadstore1.vhdl
  90         self.dsisr = Signal(64)
  91         self.dar = Signal(64)
  92
  93         # state info for LD/ST
  94         self.done          = Signal()
  95         # latch most of the input request
  96         self.load          = Signal()
  97         self.tlbie         = Signal()
  98         self.dcbz          = Signal()
  99         self.addr          = Signal(64)
 100         self.store_data    = Signal(64)
 101         self.load_data     = Signal(64)
 102         self.byte_sel      = Signal(8)
 103         #self.xerc         : xer_common_t;
 104         #self.reserve       = Signal()
 105         #self.atomic        = Signal()
 106         #self.atomic_last   = Signal()
 107         #self.rc            = Signal()
 108         self.nc            = Signal()              # non-cacheable access
 109         self.virt_mode     = Signal()
 110         self.priv_mode     = Signal()
 111         self.state        = Signal(State)
 112         self.instr_fault   = Signal()
 113         self.align_intr    = Signal()
 114         self.busy          = Signal()
 115         self.wait_dcache   = Signal()
 116         self.wait_mmu      = Signal()
 117         #self.mode_32bit    = Signal()
 118         #self.intr_vec     : integer range 0 to 16#fff#;
 119         #self.nia           = Signal(64)
 120         #self.srr1          = Signal(16)
 121
 122     def set_wr_addr(self, m, addr, mask, misalign, msr_pr, is_dcbz):
 123         m.d.comb += self.req.load.eq(0) # store operation
 124         m.d.comb += self.req.byte_sel.eq(mask)
 125         m.d.comb += self.req.addr.eq(addr)
 126         m.d.comb += self.req.priv_mode.eq(~msr_pr) # not-problem  ==> priv
 127         m.d.comb += self.req.virt_mode.eq(msr_pr) # problem-state ==> virt
 128         m.d.comb += self.req.align_intr.eq(misalign)
 129         m.d.comb += self.req.dcbz.eq(is_dcbz)
 130
 131         # option to disable the cache entirely for write
 132         if self.disable_cache:
 133             m.d.comb += self.req.nc.eq(1)
 134         return None
 135
 136     def set_rd_addr(self, m, addr, mask, misalign, msr_pr):
 137         m.d.comb += self.d_valid.eq(1)
 138         m.d.comb += self.req.load.eq(1) # load operation
 139         m.d.comb += self.req.byte_sel.eq(mask)
 140         m.d.comb += self.req.align_intr.eq(misalign)
 141         m.d.comb += self.req.addr.eq(addr)
 142         m.d.comb += self.req.priv_mode.eq(~msr_pr) # not-problem  ==> priv
 143         m.d.comb += self.req.virt_mode.eq(msr_pr) # problem-state ==> virt
 144         # BAD HACK! disable cacheing on LD when address is 0xCxxx_xxxx
 145         # this is for peripherals. same thing done in Microwatt loadstore1.vhdl
 146         with m.If(addr[28:] == Const(0xc, 4)):
 147             m.d.comb += self.req.nc.eq(1)
 148         # option to disable the cache entirely for read
 149         if self.disable_cache:
 150             m.d.comb += self.req.nc.eq(1)
 151         return None #FIXME return value
 152
 153     def set_wr_data(self, m, data, wen):
 154         # do the "blip" on write data
 155         m.d.comb += self.d_valid.eq(1)
 156         # put data into comb which is picked up in main elaborate()
 157         m.d.comb += self.d_w_valid.eq(1)
 158         m.d.comb += self.store_data.eq(data)
 159         #m.d.sync += self.d_out.byte_sel.eq(wen) # this might not be needed
 160         st_ok = self.done # TODO indicates write data is valid
 161         return st_ok
 162
 163     def get_rd_data(self, m):
 164         ld_ok = self.done     # indicates read data is valid
 165         data = self.load_data # actual read data
 166         return data, ld_ok
 167
 168     def elaborate(self, platform):
 169         m = super().elaborate(platform)
 170         comb, sync = m.d.comb, m.d.sync
 171
 172         # create dcache module
 173         m.submodules.dcache = dcache = self.dcache
 174
 175         # temp vars
 176         d_out, d_in, dbus = self.d_out, self.d_in, self.dbus
 177         m_out, m_in = self.m_out, self.m_in
 178         exc = self.pi.exc_o
 179         exception = exc.happened
 180         mmureq = Signal()
 181
 182         # copy of address, but gets over-ridden for OP_FETCH_FAILED
 183         maddr = Signal(64)
 184         m.d.comb += maddr.eq(self.addr)
 185
 186         # create a blip (single pulse) on valid read/write request
 187         # this can be over-ridden in the FSM to get dcache to re-run
 188         # a request when MMU_LOOKUP completes.
 189         m.d.comb += self.d_validblip.eq(rising_edge(m, self.d_valid))
 190         ldst_r = LDSTRequest("ldst_r")
 191
 192         # fsm skeleton
 193         with m.Switch(self.state):
 194             with m.Case(State.IDLE):
 195                 with m.If(self.d_validblip & ~exc.happened):
 196                     comb += self.busy.eq(1)
 197                     sync += self.state.eq(State.ACK_WAIT)
 198                     sync += ldst_r.eq(self.req) # copy of LDSTRequest on "blip"
 199                 with m.Else():
 200                     sync += ldst_r.eq(0)
 201
 202             # waiting for completion
 203             with m.Case(State.ACK_WAIT):
 204                 comb += self.busy.eq(~exc.happened)
 205
 206                 with m.If(d_in.error):
 207                     # cache error is not necessarily "final", it could
 208                     # be that it was just a TLB miss
 209                     with m.If(d_in.cache_paradox):
 210                         comb += exception.eq(1)
 211                         sync += self.state.eq(State.IDLE)
 212                         sync += ldst_r.eq(0)
 213                         sync += self.dsisr[63 - 38].eq(~self.load)
 214                         # XXX there is no architected bit for this
 215                         # (probably should be a machine check in fact)
 216                         sync += self.dsisr[63 - 35].eq(d_in.cache_paradox)
 217
 218                     with m.Else():
 219                         # Look up the translation for TLB miss
 220                         # and also for permission error and RC error
 221                         # in case the PTE has been updated.
 222                         comb += mmureq.eq(1)
 223                         sync += self.state.eq(State.MMU_LOOKUP)
 224                 with m.If(d_in.valid):
 225                     m.d.comb += self.done.eq(~mmureq) # done if not doing MMU
 226                     with m.If(self.done):
 227                         sync += Display("ACK_WAIT, done %x", self.addr)
 228                     sync += self.state.eq(State.IDLE)
 229                     sync += ldst_r.eq(0)
 230                     with m.If(self.load):
 231                         m.d.comb += self.load_data.eq(d_in.data)
 232
 233             # waiting here for the MMU TLB lookup to complete.
 234             # either re-try the dcache lookup or throw MMU exception
 235             with m.Case(State.MMU_LOOKUP):
 236                 comb += self.busy.eq(1)
 237                 with m.If(m_in.done):
 238                     with m.If(~self.instr_fault):
 239                         sync += Display("MMU_LOOKUP, done %x -> %x",
 240                                         self.addr, d_out.addr)
 241                         # retry the request now that the MMU has
 242                         # installed a TLB entry, if not exception raised
 243                         m.d.comb += self.d_out.valid.eq(~exception)
 244                         sync += self.state.eq(State.ACK_WAIT)
 245                         sync += ldst_r.eq(0)
 246                     with m.Else():
 247                         sync += Display("MMU_LOOKUP, exception %x", self.addr)
 248                         # instruction lookup fault: store address in DAR
 249                         comb += exc.happened.eq(1)
 250                         sync += self.dar.eq(self.addr)
 251
 252                 with m.If(m_in.err):
 253                     # MMU RADIX exception thrown
 254                     comb += exception.eq(1)
 255                     sync += self.dsisr[63 - 33].eq(m_in.invalid)
 256                     sync += self.dsisr[63 - 36].eq(m_in.perm_error)
 257                     sync += self.dsisr[63 - 38].eq(self.load)
 258                     sync += self.dsisr[63 - 44].eq(m_in.badtree)
 259                     sync += self.dsisr[63 - 45].eq(m_in.rc_error)
 260
 261             with m.Case(State.TLBIE_WAIT):
 262                 pass
 263
 264         # alignment error: store address in DAR
 265         with m.If(self.align_intr):
 266             comb += exc.happened.eq(1)
 267             sync += self.dar.eq(self.addr)
 268
 269         # happened, alignment, instr_fault, invalid.
 270         # note that all of these flow through - eventually to the TRAP
 271         # pipeline, via PowerDecoder2.
 272         comb += exc.invalid.eq(m_in.invalid)
 273         comb += exc.alignment.eq(self.align_intr)
 274         comb += exc.instr_fault.eq(self.instr_fault)
 275         # badtree, perm_error, rc_error, segment_fault
 276         comb += exc.badtree.eq(m_in.badtree)
 277         comb += exc.perm_error.eq(m_in.perm_error)
 278         comb += exc.rc_error.eq(m_in.rc_error)
 279         comb += exc.segment_fault.eq(m_in.segerr)
 280
 281         # TODO, connect dcache wb_in/wb_out to "standard" nmigen Wishbone bus
 282         comb += dbus.adr.eq(dcache.wb_out.adr)
 283         comb += dbus.dat_w.eq(dcache.wb_out.dat)
 284         comb += dbus.sel.eq(dcache.wb_out.sel)
 285         comb += dbus.cyc.eq(dcache.wb_out.cyc)
 286         comb += dbus.stb.eq(dcache.wb_out.stb)
 287         comb += dbus.we.eq(dcache.wb_out.we)
 288
 289         comb += dcache.wb_in.dat.eq(dbus.dat_r)
 290         comb += dcache.wb_in.ack.eq(dbus.ack)
 291         if hasattr(dbus, "stall"):
 292             comb += dcache.wb_in.stall.eq(dbus.stall)
 293
 294         # update out d data when flag set
 295         with m.If(self.d_w_valid):
 296             m.d.sync += d_out.data.eq(self.store_data)
 297         #with m.Else():
 298         #    m.d.sync += d_out.data.eq(0)
 299         # unit test passes with that change
 300
 301         # this must move into the FSM, conditionally noticing that
 302         # the "blip" comes from self.d_validblip.
 303         # task 1: look up in dcache
 304         # task 2: if dcache fails, look up in MMU.
 305         # do **NOT** confuse the two.
 306         with m.If(self.d_validblip):
 307             m.d.comb += self.d_out.valid.eq(~exc.happened)
 308             m.d.comb += d_out.load.eq(self.req.load)
 309             m.d.comb += d_out.byte_sel.eq(self.req.byte_sel)
 310             m.d.comb += self.addr.eq(self.req.addr)
 311             m.d.comb += d_out.nc.eq(self.req.nc)
 312             m.d.comb += d_out.priv_mode.eq(self.req.priv_mode)
 313             m.d.comb += d_out.virt_mode.eq(self.req.virt_mode)
 314             m.d.comb += self.align_intr.eq(self.req.align_intr)
 315             #m.d.comb += Display("validblip dcbz=%i addr=%x",self.req.dcbz,self.req.addr)
 316             m.d.comb += d_out.dcbz.eq(self.req.dcbz)
 317         with m.Else():
 318             m.d.comb += d_out.load.eq(ldst_r.load)
 319             m.d.comb += d_out.byte_sel.eq(ldst_r.byte_sel)
 320             m.d.comb += self.addr.eq(ldst_r.addr)
 321             m.d.comb += d_out.nc.eq(ldst_r.nc)
 322             m.d.comb += d_out.priv_mode.eq(ldst_r.priv_mode)
 323             m.d.comb += d_out.virt_mode.eq(ldst_r.virt_mode)
 324             m.d.comb += self.align_intr.eq(ldst_r.align_intr)
 325             #m.d.comb += Display("no_validblip dcbz=%i addr=%x",ldst_r.dcbz,ldst_r.addr)
 326             m.d.comb += d_out.dcbz.eq(ldst_r.dcbz)
 327
 328         # XXX these should be possible to remove but for some reason
 329         # cannot be... yet. TODO, investigate
 330         m.d.comb += self.load_data.eq(d_in.data)
 331         m.d.comb += d_out.addr.eq(self.addr)
 332
 333         # Update outputs to MMU
 334         m.d.comb += m_out.valid.eq(mmureq)
 335         m.d.comb += m_out.iside.eq(self.instr_fault)
 336         m.d.comb += m_out.load.eq(ldst_r.load)
 337         # m_out.priv <= r.priv_mode; TODO
 338         m.d.comb += m_out.tlbie.eq(self.tlbie)
 339         # m_out.mtspr <= mmu_mtspr; # TODO
 340         # m_out.sprn <= sprn; # TODO
 341         m.d.comb += m_out.addr.eq(maddr)
 342         # m_out.slbia <= l_in.insn(7); # TODO: no idea what this is
 343         # m_out.rs <= l_in.data; # nope, probably not needed, TODO investigate
 344
 345         return m
 346
 347     def ports(self):
 348         yield from super().ports()
 349         # TODO: memory ports
 350
 351
 352 class TestSRAMLoadStore1(LoadStore1):
 353     def __init__(self, pspec):
 354         super().__init__(pspec)
 355         pspec = self.pspec
 356         # small 32-entry Memory
 357         if (hasattr(pspec, "dmem_test_depth") and
 358                 isinstance(pspec.dmem_test_depth, int)):
 359             depth = pspec.dmem_test_depth
 360         else:
 361             depth = 32
 362         print("TestSRAMBareLoadStoreUnit depth", depth)
 363
 364         self.mem = Memory(width=pspec.reg_wid, depth=depth)
 365
 366     def elaborate(self, platform):
 367         m = super().elaborate(platform)
 368         comb = m.d.comb
 369         m.submodules.sram = sram = SRAM(memory=self.mem, granularity=8,
 370                                         features={'cti', 'bte', 'err'})
 371         dbus = self.dbus
 372
 373         # directly connect the wishbone bus of LoadStoreUnitInterface to SRAM
 374         # note: SRAM is a target (slave), dbus is initiator (master)
 375         fanouts = ['dat_w', 'sel', 'cyc', 'stb', 'we', 'cti', 'bte']
 376         fanins = ['dat_r', 'ack', 'err']
 377         for fanout in fanouts:
 378             print("fanout", fanout, getattr(sram.bus, fanout).shape(),
 379                   getattr(dbus, fanout).shape())
 380             comb += getattr(sram.bus, fanout).eq(getattr(dbus, fanout))
 381             comb += getattr(sram.bus, fanout).eq(getattr(dbus, fanout))
 382         for fanin in fanins:
 383             comb += getattr(dbus, fanin).eq(getattr(sram.bus, fanin))
 384         # connect address
 385         comb += sram.bus.adr.eq(dbus.adr)
 386
 387         return m
 388