a3bff180f290a5b1b384b0027b3de952a24ce731
[soc.git] / src / soc / fu / ldst / loadstore.py
1 """LoadStore1 FSM.
2
3 based on microwatt loadstore1.vhdl, but conforming to PortInterface.
4 unlike loadstore1.vhdl this does *not* deal with actual Load/Store
5 ops: that job is handled by LDSTCompUnit, which talks to LoadStore1
6 by way of PortInterface. PortInterface is where things need extending,
7 such as adding dcbz support, etc.
8
9 this module basically handles "pure" load / store operations, and
10 its first job is to ask the D-Cache for the data. if that fails,
11 the second task (if virtual memory is enabled) is to ask the MMU
12 to perform a TLB, then to go *back* to the cache and ask again.
13
14 Links:
15
16 * https://bugs.libre-soc.org/show_bug.cgi?id=465
17
18 """
19
20 from nmigen import (Elaboratable, Module, Signal, Shape, unsigned, Cat, Mux,
21 Record, Memory,
22 Const)
23 from nmutil.iocontrol import RecordObject
24 from nmutil.util import rising_edge, Display
25 from enum import Enum, unique
26
27 from soc.experiment.dcache import DCache
28 from soc.experiment.icache import ICache
29 from soc.experiment.pimem import PortInterfaceBase
30 from soc.experiment.mem_types import LoadStore1ToMMUType
31 from soc.experiment.mem_types import MMUToLoadStore1Type
32
33 from soc.minerva.wishbone import make_wb_layout
34 from soc.bus.sram import SRAM
35 from nmutil.util import Display
36
37
38 @unique
39 class State(Enum):
40 IDLE = 0 # ready for instruction
41 ACK_WAIT = 1 # waiting for ack from dcache
42 MMU_LOOKUP = 2 # waiting for MMU to look up translation
43 TLBIE_WAIT = 3 # waiting for MMU to finish doing a tlbie
44
45
46 # captures the LDSTRequest from the PortInterface, which "blips" most
47 # of this at us (pipeline-style).
48 class LDSTRequest(RecordObject):
49 def __init__(self, name=None):
50 RecordObject.__init__(self, name=name)
51
52 self.load = Signal()
53 self.dcbz = Signal()
54 self.addr = Signal(64)
55 # self.store_data = Signal(64) # this is already sync (on a delay)
56 self.byte_sel = Signal(8)
57 self.nc = Signal() # non-cacheable access
58 self.virt_mode = Signal()
59 self.priv_mode = Signal()
60 self.align_intr = Signal()
61
62
63 # glue logic for microwatt mmu and dcache
64 class LoadStore1(PortInterfaceBase):
65 def __init__(self, pspec):
66 self.pspec = pspec
67 self.disable_cache = (hasattr(pspec, "disable_cache") and
68 pspec.disable_cache == True)
69 regwid = pspec.reg_wid
70 addrwid = pspec.addr_wid
71
72 super().__init__(regwid, addrwid)
73 self.dcache = DCache()
74 self.icache = ICache()
75 # these names are from the perspective of here (LoadStore1)
76 self.d_out = self.dcache.d_in # in to dcache is out for LoadStore
77 self.d_in = self.dcache.d_out # out from dcache is in for LoadStore
78 self.i_out = self.icache.i_in # in to icache is out for LoadStore
79 self.i_in = self.icache.i_out # out from icache is in for LoadStore
80 self.m_out = LoadStore1ToMMUType() # out *to* MMU
81 self.m_in = MMUToLoadStore1Type() # in *from* MMU
82 self.req = LDSTRequest(name="ldst_req")
83
84 # TODO, convert dcache wb_in/wb_out to "standard" nmigen Wishbone bus
85 self.dbus = Record(make_wb_layout(pspec))
86 self.ibus = Record(make_wb_layout(pspec))
87
88 # for creating a single clock blip to DCache
89 self.d_valid = Signal()
90 self.d_w_valid = Signal()
91 self.d_validblip = Signal()
92
93 # state info for LD/ST
94 self.done = Signal()
95 self.done_delay = Signal()
96 # latch most of the input request
97 self.load = Signal()
98 self.tlbie = Signal()
99 self.dcbz = Signal()
100 self.addr = Signal(64)
101 self.store_data = Signal(64)
102 self.load_data = Signal(64)
103 self.load_data_delay = Signal(64)
104 self.byte_sel = Signal(8)
105 #self.xerc : xer_common_t;
106 #self.reserve = Signal()
107 #self.atomic = Signal()
108 #self.atomic_last = Signal()
109 #self.rc = Signal()
110 self.nc = Signal() # non-cacheable access
111 self.virt_mode = Signal()
112 self.priv_mode = Signal()
113 self.state = Signal(State)
114 self.iside = Signal() # request instruction-side load
115 self.instr_fault = Signal()
116 self.align_intr = Signal()
117 self.busy = Signal()
118 self.wait_dcache = Signal()
119 self.wait_mmu = Signal()
120 #self.mode_32bit = Signal()
121 #self.intr_vec : integer range 0 to 16#fff#;
122 #self.nia = Signal(64)
123 #self.srr1 = Signal(16)
124 # use these to set the dsisr or dar respectively
125 self.mmu_set_spr = Signal()
126 self.mmu_set_dsisr = Signal()
127 self.mmu_set_dar = Signal()
128 self.sprval_in = Signal(64)
129
130 # ONLY access these read-only, do NOT attempt to change
131 self.dsisr = Signal(32)
132 self.dar = Signal(64)
133
134 def set_wr_addr(self, m, addr, mask, misalign, msr_pr, is_dcbz):
135 m.d.comb += self.req.load.eq(0) # store operation
136 m.d.comb += self.req.byte_sel.eq(mask)
137 m.d.comb += self.req.addr.eq(addr)
138 m.d.comb += self.req.priv_mode.eq(~msr_pr) # not-problem ==> priv
139 m.d.comb += self.req.virt_mode.eq(msr_pr) # problem-state ==> virt
140 m.d.comb += self.req.align_intr.eq(misalign)
141 m.d.comb += self.req.dcbz.eq(is_dcbz)
142
143 # m.d.comb += Display("set_wr_addr %i dcbz %i",addr,is_dcbz)
144
145 # option to disable the cache entirely for write
146 if self.disable_cache:
147 m.d.comb += self.req.nc.eq(1)
148 return None
149
150 def set_rd_addr(self, m, addr, mask, misalign, msr_pr):
151 m.d.comb += self.d_valid.eq(1)
152 m.d.comb += self.req.load.eq(1) # load operation
153 m.d.comb += self.req.byte_sel.eq(mask)
154 m.d.comb += self.req.align_intr.eq(misalign)
155 m.d.comb += self.req.addr.eq(addr)
156 m.d.comb += self.req.priv_mode.eq(~msr_pr) # not-problem ==> priv
157 m.d.comb += self.req.virt_mode.eq(msr_pr) # problem-state ==> virt
158 # BAD HACK! disable cacheing on LD when address is 0xCxxx_xxxx
159 # this is for peripherals. same thing done in Microwatt loadstore1.vhdl
160 with m.If(addr[28:] == Const(0xc, 4)):
161 m.d.comb += self.req.nc.eq(1)
162 # option to disable the cache entirely for read
163 if self.disable_cache:
164 m.d.comb += self.req.nc.eq(1)
165 return None #FIXME return value
166
167 def set_wr_data(self, m, data, wen):
168 # do the "blip" on write data
169 m.d.comb += self.d_valid.eq(1)
170 # put data into comb which is picked up in main elaborate()
171 m.d.comb += self.d_w_valid.eq(1)
172 m.d.comb += self.store_data.eq(data)
173 #m.d.sync += self.d_out.byte_sel.eq(wen) # this might not be needed
174 st_ok = self.done # TODO indicates write data is valid
175 return st_ok
176
177 def get_rd_data(self, m):
178 ld_ok = self.done_delay # indicates read data is valid
179 data = self.load_data_delay # actual read data
180 return data, ld_ok
181
182 def elaborate(self, platform):
183 m = super().elaborate(platform)
184 comb, sync = m.d.comb, m.d.sync
185
186 # microwatt takes one more cycle before next operation can be issued
187 sync += self.done_delay.eq(self.done)
188 sync += self.load_data_delay.eq(self.load_data)
189
190 # create dcache and icache module
191 m.submodules.dcache = dcache = self.dcache
192 m.submodules.icache = icache = self.icache
193
194 # temp vars
195 d_out, d_in, dbus = self.d_out, self.d_in, self.dbus
196 i_out, i_in, ibus = self.i_out, self.i_in, self.ibus
197 m_out, m_in = self.m_out, self.m_in
198 exc = self.pi.exc_o
199 exception = exc.happened
200 mmureq = Signal()
201
202 # copy of address, but gets over-ridden for OP_FETCH_FAILED
203 maddr = Signal(64)
204 m.d.comb += maddr.eq(self.addr)
205
206 # create a blip (single pulse) on valid read/write request
207 # this can be over-ridden in the FSM to get dcache to re-run
208 # a request when MMU_LOOKUP completes.
209 m.d.comb += self.d_validblip.eq(rising_edge(m, self.d_valid))
210 ldst_r = LDSTRequest("ldst_r")
211 comb += Display("MMUTEST: LoadStore1 d_in.error=%i",d_in.error)
212
213 # fsm skeleton
214 with m.Switch(self.state):
215 with m.Case(State.IDLE):
216 with m.If(self.d_validblip & ~exc.happened):
217 comb += self.busy.eq(1)
218 sync += self.state.eq(State.ACK_WAIT)
219 sync += ldst_r.eq(self.req) # copy of LDSTRequest on "blip"
220 # sync += Display("validblip self.req.virt_mode=%i",
221 # self.req.virt_mode)
222 with m.Else():
223 sync += ldst_r.eq(0)
224
225 # waiting for completion
226 with m.Case(State.ACK_WAIT):
227 comb += Display("MMUTEST: ACK_WAIT")
228 comb += self.busy.eq(~exc.happened)
229
230 with m.If(d_in.error):
231 # cache error is not necessarily "final", it could
232 # be that it was just a TLB miss
233 with m.If(d_in.cache_paradox):
234 comb += exception.eq(1)
235 sync += self.state.eq(State.IDLE)
236 sync += ldst_r.eq(0)
237 sync += Display("cache error -> update dsisr")
238 sync += self.dsisr[63 - 38].eq(~self.load)
239 # XXX there is no architected bit for this
240 # (probably should be a machine check in fact)
241 sync += self.dsisr[63 - 35].eq(d_in.cache_paradox)
242
243 with m.Else():
244 # Look up the translation for TLB miss
245 # and also for permission error and RC error
246 # in case the PTE has been updated.
247 comb += mmureq.eq(1)
248 sync += self.state.eq(State.MMU_LOOKUP)
249 with m.If(d_in.valid):
250 m.d.comb += self.done.eq(~mmureq) # done if not doing MMU
251 with m.If(self.done):
252 sync += Display("ACK_WAIT, done %x", self.addr)
253 sync += self.state.eq(State.IDLE)
254 sync += ldst_r.eq(0)
255 with m.If(self.load):
256 m.d.comb += self.load_data.eq(d_in.data)
257
258 # waiting here for the MMU TLB lookup to complete.
259 # either re-try the dcache lookup or throw MMU exception
260 with m.Case(State.MMU_LOOKUP):
261 comb += self.busy.eq(~exception)
262 with m.If(m_in.done):
263 with m.If(~self.instr_fault):
264 sync += Display("MMU_LOOKUP, done %x -> %x",
265 self.addr, d_out.addr)
266 # retry the request now that the MMU has
267 # installed a TLB entry, if not exception raised
268 m.d.comb += self.d_out.valid.eq(~exception)
269 sync += self.state.eq(State.ACK_WAIT)
270 sync += ldst_r.eq(0)
271 with m.Else():
272 sync += self.state.eq(State.IDLE)
273
274 with m.If(m_in.err):
275 # MMU RADIX exception thrown. XXX
276 # TODO: critical that the write here has to
277 # notify the MMU FSM of the change to dsisr
278 comb += exception.eq(1)
279 sync += Display("MMU RADIX exception thrown")
280 sync += Display("TODO: notify MMU of change to dsisr")
281 sync += self.dsisr[63 - 33].eq(m_in.invalid)
282 sync += self.dsisr[63 - 36].eq(m_in.perm_error) # noexec
283 sync += self.dsisr[63 - 38].eq(~self.load)
284 sync += self.dsisr[63 - 44].eq(m_in.badtree)
285 sync += self.dsisr[63 - 45].eq(m_in.rc_error)
286 sync += self.state.eq(State.IDLE)
287
288 with m.Case(State.TLBIE_WAIT):
289 pass
290
291 # MMU FSM communicating a request to update DSISR or DAR (OP_MTSPR)
292 with m.If(self.mmu_set_spr):
293 with m.If(self.mmu_set_dsisr):
294 sync += self.dsisr.eq(self.sprval_in)
295 with m.If(self.mmu_set_dar):
296 sync += self.dar.eq(self.sprval_in)
297
298 # hmmm, alignment occurs in set_rd_addr/set_wr_addr, note exception
299 with m.If(self.align_intr):
300 comb += exc.happened.eq(1)
301 # check for updating DAR
302 with m.If(exception):
303 sync += Display("exception %x", self.addr)
304 # alignment error: store address in DAR
305 with m.If(self.align_intr):
306 sync += Display("alignment error: addr in DAR %x", self.addr)
307 sync += self.dar.eq(self.addr)
308 with m.Elif(~self.instr_fault):
309 sync += Display("not instr fault, addr in DAR %x", self.addr)
310 sync += self.dar.eq(self.addr)
311
312 # when done or exception, return to idle state
313 with m.If(self.done | exception):
314 sync += self.state.eq(State.IDLE)
315 comb += self.busy.eq(0)
316
317 # happened, alignment, instr_fault, invalid.
318 # note that all of these flow through - eventually to the TRAP
319 # pipeline, via PowerDecoder2.
320 comb += self.align_intr.eq(self.req.align_intr)
321 comb += exc.invalid.eq(m_in.invalid)
322 comb += exc.alignment.eq(self.align_intr)
323 comb += exc.instr_fault.eq(self.instr_fault)
324 # badtree, perm_error, rc_error, segment_fault
325 comb += exc.badtree.eq(m_in.badtree)
326 comb += exc.perm_error.eq(m_in.perm_error)
327 comb += exc.rc_error.eq(m_in.rc_error)
328 comb += exc.segment_fault.eq(m_in.segerr)
329
330 # TODO, connect dcache wb_in/wb_out to "standard" nmigen Wishbone bus
331 comb += dbus.adr.eq(dcache.bus.adr)
332 comb += dbus.dat_w.eq(dcache.bus.dat_w)
333 comb += dbus.sel.eq(dcache.bus.sel)
334 comb += dbus.cyc.eq(dcache.bus.cyc)
335 comb += dbus.stb.eq(dcache.bus.stb)
336 comb += dbus.we.eq(dcache.bus.we)
337
338 comb += dcache.bus.dat_r.eq(dbus.dat_r)
339 comb += dcache.bus.ack.eq(dbus.ack)
340 if hasattr(dbus, "stall"):
341 comb += dcache.bus.stall.eq(dbus.stall)
342
343 # update out d data when flag set
344 with m.If(self.d_w_valid):
345 m.d.sync += d_out.data.eq(self.store_data)
346 #with m.Else():
347 # m.d.sync += d_out.data.eq(0)
348 # unit test passes with that change
349
350 # this must move into the FSM, conditionally noticing that
351 # the "blip" comes from self.d_validblip.
352 # task 1: look up in dcache
353 # task 2: if dcache fails, look up in MMU.
354 # do **NOT** confuse the two.
355 with m.If(self.d_validblip):
356 m.d.comb += self.d_out.valid.eq(~exc.happened)
357 m.d.comb += d_out.load.eq(self.req.load)
358 m.d.comb += d_out.byte_sel.eq(self.req.byte_sel)
359 m.d.comb += self.addr.eq(self.req.addr)
360 m.d.comb += d_out.nc.eq(self.req.nc)
361 m.d.comb += d_out.priv_mode.eq(self.req.priv_mode)
362 m.d.comb += d_out.virt_mode.eq(self.req.virt_mode)
363 #m.d.comb += Display("validblip dcbz=%i addr=%x",
364 #self.req.dcbz,self.req.addr)
365 m.d.comb += d_out.dcbz.eq(self.req.dcbz)
366 with m.Else():
367 m.d.comb += d_out.load.eq(ldst_r.load)
368 m.d.comb += d_out.byte_sel.eq(ldst_r.byte_sel)
369 m.d.comb += self.addr.eq(ldst_r.addr)
370 m.d.comb += d_out.nc.eq(ldst_r.nc)
371 m.d.comb += d_out.priv_mode.eq(ldst_r.priv_mode)
372 m.d.comb += d_out.virt_mode.eq(ldst_r.virt_mode)
373 #m.d.comb += Display("no_validblip dcbz=%i addr=%x",
374 #ldst_r.dcbz,ldst_r.addr)
375 m.d.comb += d_out.dcbz.eq(ldst_r.dcbz)
376
377 # XXX these should be possible to remove but for some reason
378 # cannot be... yet. TODO, investigate
379 m.d.comb += self.load_data.eq(d_in.data)
380 m.d.comb += d_out.addr.eq(self.addr)
381
382 # Update outputs to MMU
383 m.d.comb += m_out.valid.eq(mmureq)
384 m.d.comb += m_out.iside.eq(self.iside)
385 m.d.comb += m_out.load.eq(ldst_r.load)
386 # m_out.priv <= r.priv_mode; TODO
387 m.d.comb += m_out.tlbie.eq(self.tlbie)
388 # m_out.mtspr <= mmu_mtspr; # TODO
389 # m_out.sprn <= sprn; # TODO
390 m.d.comb += m_out.addr.eq(maddr)
391 # m_out.slbia <= l_in.insn(7); # TODO: no idea what this is
392 # m_out.rs <= l_in.data; # nope, probably not needed, TODO investigate
393
394 return m
395
396 def ports(self):
397 yield from super().ports()
398 # TODO: memory ports
399
400
401 class TestSRAMLoadStore1(LoadStore1):
402 def __init__(self, pspec):
403 super().__init__(pspec)
404 pspec = self.pspec
405 # small 32-entry Memory
406 if (hasattr(pspec, "dmem_test_depth") and
407 isinstance(pspec.dmem_test_depth, int)):
408 depth = pspec.dmem_test_depth
409 else:
410 depth = 32
411 print("TestSRAMBareLoadStoreUnit depth", depth)
412
413 self.mem = Memory(width=pspec.reg_wid, depth=depth)
414
415 def elaborate(self, platform):
416 m = super().elaborate(platform)
417 comb = m.d.comb
418 m.submodules.sram = sram = SRAM(memory=self.mem, granularity=8,
419 features={'cti', 'bte', 'err'})
420 dbus = self.dbus
421
422 # directly connect the wishbone bus of LoadStoreUnitInterface to SRAM
423 # note: SRAM is a target (slave), dbus is initiator (master)
424 fanouts = ['dat_w', 'sel', 'cyc', 'stb', 'we', 'cti', 'bte']
425 fanins = ['dat_r', 'ack', 'err']
426 for fanout in fanouts:
427 print("fanout", fanout, getattr(sram.bus, fanout).shape(),
428 getattr(dbus, fanout).shape())
429 comb += getattr(sram.bus, fanout).eq(getattr(dbus, fanout))
430 comb += getattr(sram.bus, fanout).eq(getattr(dbus, fanout))
431 for fanin in fanins:
432 comb += getattr(dbus, fanin).eq(getattr(sram.bus, fanin))
433 # connect address
434 comb += sram.bus.adr.eq(dbus.adr)
435
436 return m
437