LoadStore1 priv_mode was not being correctly picked up by the MMU
[soc.git] / src / soc / fu / ldst / loadstore.py
1 """LoadStore1 FSM.
2
3 based on microwatt loadstore1.vhdl, but conforming to PortInterface.
4 unlike loadstore1.vhdl this does *not* deal with actual Load/Store
5 ops: that job is handled by LDSTCompUnit, which talks to LoadStore1
6 by way of PortInterface. PortInterface is where things need extending,
7 such as adding dcbz support, etc.
8
9 this module basically handles "pure" load / store operations, and
10 its first job is to ask the D-Cache for the data. if that fails,
11 the second task (if virtual memory is enabled) is to ask the MMU
12 to perform a TLB, then to go *back* to the cache and ask again.
13
14 Links:
15
16 * https://bugs.libre-soc.org/show_bug.cgi?id=465
17
18 """
19
20 from nmigen import (Elaboratable, Module, Signal, Shape, unsigned, Cat, Mux,
21 Record, Memory,
22 Const)
23 from nmutil.iocontrol import RecordObject
24 from nmutil.util import rising_edge, Display
25 from enum import Enum, unique
26
27 from soc.experiment.dcache import DCache
28 from soc.experiment.icache import ICache
29 from soc.experiment.pimem import PortInterfaceBase
30 from soc.experiment.mem_types import LoadStore1ToMMUType
31 from soc.experiment.mem_types import MMUToLoadStore1Type
32
33 from soc.minerva.wishbone import make_wb_layout
34 from soc.bus.sram import SRAM
35 from nmutil.util import Display
36
37
38 @unique
39 class State(Enum):
40 IDLE = 0 # ready for instruction
41 ACK_WAIT = 1 # waiting for ack from dcache
42 MMU_LOOKUP = 2 # waiting for MMU to look up translation
43 #SECOND_REQ = 3 # second request for unaligned transfer
44
45 @unique
46 class Misalign(Enum):
47 ONEWORD = 0 # only one word needed, all good
48 NEED2WORDS = 1 # need to send/receive two words
49 WAITFIRST = 2 # waiting for the first word
50 WAITSECOND = 3 # waiting for the second word
51
52
53 # captures the LDSTRequest from the PortInterface, which "blips" most
54 # of this at us (pipeline-style).
55 class LDSTRequest(RecordObject):
56 def __init__(self, name=None):
57 RecordObject.__init__(self, name=name)
58
59 self.load = Signal()
60 self.dcbz = Signal()
61 self.raddr = Signal(64)
62 # self.store_data = Signal(64) # this is already sync (on a delay)
63 self.byte_sel = Signal(16)
64 self.nc = Signal() # non-cacheable access
65 self.virt_mode = Signal()
66 self.priv_mode = Signal()
67 self.mode_32bit = Signal() # XXX UNUSED AT PRESENT
68 self.alignstate = Signal(Misalign) # progress of alignment request
69 self.align_intr = Signal()
70
71
72 # glue logic for microwatt mmu and dcache
73 class LoadStore1(PortInterfaceBase):
74 def __init__(self, pspec):
75 self.pspec = pspec
76 self.disable_cache = (hasattr(pspec, "disable_cache") and
77 pspec.disable_cache == True)
78 regwid = pspec.reg_wid
79 addrwid = pspec.addr_wid
80
81 super().__init__(regwid, addrwid)
82 self.dcache = DCache(pspec)
83 self.icache = ICache(pspec)
84 # these names are from the perspective of here (LoadStore1)
85 self.d_out = self.dcache.d_in # in to dcache is out for LoadStore
86 self.d_in = self.dcache.d_out # out from dcache is in for LoadStore
87 self.i_out = self.icache.i_in # in to icache is out for LoadStore
88 self.i_in = self.icache.i_out # out from icache is in for LoadStore
89 self.m_out = LoadStore1ToMMUType("m_out") # out *to* MMU
90 self.m_in = MMUToLoadStore1Type("m_in") # in *from* MMU
91 self.req = LDSTRequest(name="ldst_req")
92
93 # TODO, convert dcache wb_in/wb_out to "standard" nmigen Wishbone bus
94 self.dbus = Record(make_wb_layout(pspec))
95 self.ibus = Record(make_wb_layout(pspec))
96
97 # for creating a single clock blip to DCache
98 self.d_valid = Signal()
99 self.d_w_valid = Signal()
100 self.d_validblip = Signal()
101
102 # state info for LD/ST
103 self.done = Signal()
104 self.done_delay = Signal()
105 # latch most of the input request
106 self.load = Signal()
107 self.tlbie = Signal()
108 self.dcbz = Signal()
109 self.raddr = Signal(64)
110 self.maddr = Signal(64)
111 self.store_data = Signal(128) # 128-bit to cope with
112 self.load_data = Signal(128) # misalignment
113 self.load_data_delay = Signal(128) # perform 2 LD/STs
114 self.byte_sel = Signal(16) # also for misaligned, 16-bit
115 self.alignstate = Signal(Misalign) # progress of alignment request
116 #self.xerc : xer_common_t;
117 #self.reserve = Signal()
118 #self.atomic = Signal()
119 #self.atomic_last = Signal()
120 #self.rc = Signal()
121 self.nc = Signal() # non-cacheable access
122 self.mode_32bit = Signal() # XXX UNUSED AT PRESENT
123 self.state = Signal(State)
124 self.instr_fault = Signal() # indicator to request i-cache MMU lookup
125 self.r_instr_fault = Signal() # accessed in external_busy
126 self.align_intr = Signal()
127 self.busy = Signal()
128 self.wait_dcache = Signal()
129 self.wait_mmu = Signal()
130 #self.intr_vec : integer range 0 to 16#fff#;
131 #self.nia = Signal(64)
132 #self.srr1 = Signal(16)
133 # use these to set the dsisr or dar respectively
134 self.mmu_set_spr = Signal()
135 self.mmu_set_dsisr = Signal()
136 self.mmu_set_dar = Signal()
137 self.sprval_in = Signal(64)
138
139 # ONLY access these read-only, do NOT attempt to change
140 self.dsisr = Signal(32)
141 self.dar = Signal(64)
142
143 # when external_busy set, do not allow PortInterface to proceed
144 def external_busy(self, m):
145 return self.instr_fault | self.r_instr_fault
146
147 def set_wr_addr(self, m, addr, mask, misalign, msr, is_dcbz):
148 m.d.comb += self.req.load.eq(0) # store operation
149 m.d.comb += self.req.byte_sel.eq(mask)
150 m.d.comb += self.req.raddr.eq(addr)
151 m.d.comb += self.req.priv_mode.eq(~msr.pr) # not-problem ==> priv
152 m.d.comb += self.req.virt_mode.eq(msr.dr) # DR ==> virt
153 m.d.comb += self.req.mode_32bit.eq(~msr.sf) # not-sixty-four ==> 32bit
154 m.d.comb += self.req.dcbz.eq(is_dcbz)
155 with m.If(misalign):
156 m.d.comb += self.req.alignstate.eq(Misalign.NEED2WORDS)
157
158 # m.d.comb += Display("set_wr_addr %i dcbz %i",addr,is_dcbz)
159
160 # option to disable the cache entirely for write
161 if self.disable_cache:
162 m.d.comb += self.req.nc.eq(1)
163
164 # dcbz cannot do no-cache
165 with m.If(is_dcbz & self.req.nc):
166 m.d.comb += self.req.align_intr.eq(1)
167
168 return None
169
170 def set_rd_addr(self, m, addr, mask, misalign, msr):
171 m.d.comb += self.d_valid.eq(1)
172 m.d.comb += self.req.load.eq(1) # load operation
173 m.d.comb += self.req.byte_sel.eq(mask)
174 m.d.comb += self.req.raddr.eq(addr)
175 m.d.comb += self.req.priv_mode.eq(~msr.pr) # not-problem ==> priv
176 m.d.comb += self.req.virt_mode.eq(msr.dr) # DR ==> virt
177 m.d.comb += self.req.mode_32bit.eq(~msr.sf) # not-sixty-four ==> 32bit
178 # BAD HACK! disable cacheing on LD when address is 0xCxxx_xxxx
179 # this is for peripherals. same thing done in Microwatt loadstore1.vhdl
180 with m.If(addr[28:] == Const(0xc, 4)):
181 m.d.comb += self.req.nc.eq(1)
182 # option to disable the cache entirely for read
183 if self.disable_cache:
184 m.d.comb += self.req.nc.eq(1)
185 with m.If(misalign):
186 m.d.comb += self.req.alignstate.eq(Misalign.NEED2WORDS)
187 return None #FIXME return value
188
189 def set_wr_data(self, m, data, wen):
190 # do the "blip" on write data
191 m.d.comb += self.d_valid.eq(1)
192 # put data into comb which is picked up in main elaborate()
193 m.d.comb += self.d_w_valid.eq(1)
194 m.d.comb += self.store_data.eq(data)
195 #m.d.sync += self.d_out.byte_sel.eq(wen) # this might not be needed
196 st_ok = self.done # TODO indicates write data is valid
197 return st_ok
198
199 def get_rd_data(self, m):
200 ld_ok = self.done_delay # indicates read data is valid
201 data = self.load_data_delay # actual read data
202 return data, ld_ok
203
204 def elaborate(self, platform):
205 m = super().elaborate(platform)
206 comb, sync = m.d.comb, m.d.sync
207
208 # microwatt takes one more cycle before next operation can be issued
209 sync += self.done_delay.eq(self.done)
210 #sync += self.load_data_delay[0:64].eq(self.load_data[0:64])
211
212 # create dcache and icache module
213 m.submodules.dcache = dcache = self.dcache
214 m.submodules.icache = icache = self.icache
215
216 # temp vars
217 d_out, d_in, dbus = self.d_out, self.d_in, self.dbus
218 i_out, i_in, ibus = self.i_out, self.i_in, self.ibus
219 m_out, m_in = self.m_out, self.m_in
220 exc = self.pi.exc_o
221 exception = exc.happened
222 mmureq = Signal()
223
224 # copy of address, but gets over-ridden for instr_fault
225 maddr = Signal(64)
226 m.d.comb += maddr.eq(self.raddr)
227
228 # create a blip (single pulse) on valid read/write request
229 # this can be over-ridden in the FSM to get dcache to re-run
230 # a request when MMU_LOOKUP completes.
231 m.d.comb += self.d_validblip.eq(rising_edge(m, self.d_valid))
232 ldst_r = LDSTRequest("ldst_r")
233 sync += Display("MMUTEST: LoadStore1 d_in.error=%i",d_in.error)
234
235 # fsm skeleton
236 with m.Switch(self.state):
237 with m.Case(State.IDLE):
238 with m.If((self.d_validblip | self.instr_fault) &
239 ~exc.happened):
240 comb += self.busy.eq(1)
241 sync += self.state.eq(State.ACK_WAIT)
242 sync += ldst_r.eq(self.req) # copy of LDSTRequest on "blip"
243 # sync += Display("validblip self.req.virt_mode=%i",
244 # self.req.virt_mode)
245 with m.If(self.instr_fault):
246 comb += mmureq.eq(1)
247 sync += self.r_instr_fault.eq(1)
248 comb += maddr.eq(self.maddr)
249 sync += self.state.eq(State.MMU_LOOKUP)
250 with m.Else():
251 sync += self.r_instr_fault.eq(0)
252 # if the LD/ST requires two dwords, move to waiting
253 # for first word
254 with m.If(self.req.alignstate == Misalign.NEED2WORDS):
255 sync += ldst_r.alignstate.eq(Misalign.WAITFIRST)
256 with m.Else():
257 sync += ldst_r.eq(0)
258
259 # waiting for completion
260 with m.Case(State.ACK_WAIT):
261 sync += Display("MMUTEST: ACK_WAIT")
262 comb += self.busy.eq(~exc.happened)
263
264 with m.If(d_in.error):
265 # cache error is not necessarily "final", it could
266 # be that it was just a TLB miss
267 with m.If(d_in.cache_paradox):
268 comb += exception.eq(1)
269 sync += self.state.eq(State.IDLE)
270 sync += ldst_r.eq(0)
271 sync += Display("cache error -> update dsisr")
272 sync += self.dsisr[63 - 38].eq(~ldst_r.load)
273 # XXX there is no architected bit for this
274 # (probably should be a machine check in fact)
275 sync += self.dsisr[63 - 35].eq(d_in.cache_paradox)
276 sync += self.r_instr_fault.eq(0)
277
278 with m.Else():
279 # Look up the translation for TLB miss
280 # and also for permission error and RC error
281 # in case the PTE has been updated.
282 comb += mmureq.eq(1)
283 sync += self.state.eq(State.MMU_LOOKUP)
284 with m.If(d_in.valid):
285 with m.If(self.done):
286 sync += Display("ACK_WAIT, done %x", self.raddr)
287 with m.If(ldst_r.alignstate == Misalign.ONEWORD):
288 # done if there is only one dcache operation
289 sync += self.state.eq(State.IDLE)
290 sync += ldst_r.eq(0)
291 with m.If(ldst_r.load):
292 m.d.comb += self.load_data.eq(d_in.data)
293 sync += self.load_data_delay[0:64].eq(d_in.data)
294 m.d.comb += self.done.eq(~mmureq) # done if not MMU
295 with m.Elif(ldst_r.alignstate == Misalign.WAITFIRST):
296 # first LD done: load data, initiate 2nd request.
297 # leave in ACK_WAIT state
298 with m.If(ldst_r.load):
299 m.d.comb += self.load_data[0:63].eq(d_in.data)
300 sync += self.load_data_delay[0:64].eq(d_in.data)
301 # mmm kinda cheating, make a 2nd blip
302 m.d.comb += self.d_validblip.eq(1)
303 comb += self.req.eq(ldst_r) # from copy of request
304 comb += self.req.raddr.eq(ldst_r.raddr + 8)
305 comb += self.req.byte_sel.eq(ldst_r.byte_sel[8:])
306 comb += self.req.alignstate.eq(Misalign.WAITSECOND)
307 sync += ldst_r.raddr.eq(ldst_r.raddr + 8)
308 sync += ldst_r.byte_sel.eq(ldst_r.byte_sel[8:])
309 sync += ldst_r.alignstate.eq(Misalign.WAITSECOND)
310 sync += Display(" second req %x", self.req.raddr)
311 with m.Elif(ldst_r.alignstate == Misalign.WAITSECOND):
312 sync += Display(" done second %x", d_in.data)
313 # done second load
314 sync += self.state.eq(State.IDLE)
315 sync += ldst_r.eq(0)
316 with m.If(ldst_r.load):
317 m.d.comb += self.load_data[64:128].eq(d_in.data)
318 sync += self.load_data_delay[64:128].eq(d_in.data)
319 m.d.comb += self.done.eq(~mmureq) # done if not MMU
320
321 # waiting here for the MMU TLB lookup to complete.
322 # either re-try the dcache lookup or throw MMU exception
323 with m.Case(State.MMU_LOOKUP):
324 comb += self.busy.eq(~exception)
325 with m.If(m_in.done):
326 with m.If(~self.r_instr_fault):
327 sync += Display("MMU_LOOKUP, done %x -> %x",
328 self.raddr, d_out.addr)
329 # retry the request now that the MMU has
330 # installed a TLB entry, if not exception raised
331 m.d.comb += self.d_out.valid.eq(~exception)
332 sync += self.state.eq(State.ACK_WAIT)
333 with m.Else():
334 sync += self.state.eq(State.IDLE)
335 sync += self.r_instr_fault.eq(0)
336 comb += self.done.eq(1)
337
338 with m.If(m_in.err):
339 # MMU RADIX exception thrown. XXX
340 # TODO: critical that the write here has to
341 # notify the MMU FSM of the change to dsisr
342 comb += exception.eq(1)
343 comb += self.done.eq(1)
344 sync += Display("MMU RADIX exception thrown")
345 sync += self.dsisr[63 - 33].eq(m_in.invalid)
346 sync += self.dsisr[63 - 36].eq(m_in.perm_error) # noexec
347 sync += self.dsisr[63 - 38].eq(~ldst_r.load)
348 sync += self.dsisr[63 - 44].eq(m_in.badtree)
349 sync += self.dsisr[63 - 45].eq(m_in.rc_error)
350 sync += self.state.eq(State.IDLE)
351 # exception thrown, clear out instruction fault state
352 sync += self.r_instr_fault.eq(0)
353
354 # MMU FSM communicating a request to update DSISR or DAR (OP_MTSPR)
355 with m.If(self.mmu_set_spr):
356 with m.If(self.mmu_set_dsisr):
357 sync += self.dsisr.eq(self.sprval_in)
358 with m.If(self.mmu_set_dar):
359 sync += self.dar.eq(self.sprval_in)
360
361 # hmmm, alignment occurs in set_rd_addr/set_wr_addr, note exception
362 with m.If(self.align_intr):
363 comb += exc.happened.eq(1)
364 # check for updating DAR
365 with m.If(exception):
366 sync += Display("exception %x", self.raddr)
367 # alignment error: store address in DAR
368 with m.If(self.align_intr):
369 sync += Display("alignment error: addr in DAR %x", self.raddr)
370 sync += self.dar.eq(self.raddr)
371 with m.Elif(~self.r_instr_fault):
372 sync += Display("not instr fault, addr in DAR %x", self.raddr)
373 sync += self.dar.eq(self.raddr)
374
375 # when done or exception, return to idle state
376 with m.If(self.done | exception):
377 sync += self.state.eq(State.IDLE)
378 comb += self.busy.eq(0)
379
380 # happened, alignment, instr_fault, invalid.
381 # note that all of these flow through - eventually to the TRAP
382 # pipeline, via PowerDecoder2.
383 comb += self.align_intr.eq(self.req.align_intr)
384 comb += exc.invalid.eq(m_in.invalid)
385 comb += exc.alignment.eq(self.align_intr)
386 comb += exc.instr_fault.eq(self.r_instr_fault)
387 # badtree, perm_error, rc_error, segment_fault
388 comb += exc.badtree.eq(m_in.badtree)
389 comb += exc.perm_error.eq(m_in.perm_error)
390 comb += exc.rc_error.eq(m_in.rc_error)
391 comb += exc.segment_fault.eq(m_in.segerr)
392
393 # TODO, connect dcache wb_in/wb_out to "standard" nmigen Wishbone bus
394 comb += dbus.adr.eq(dcache.bus.adr)
395 comb += dbus.dat_w.eq(dcache.bus.dat_w)
396 comb += dbus.sel.eq(dcache.bus.sel)
397 comb += dbus.cyc.eq(dcache.bus.cyc)
398 comb += dbus.stb.eq(dcache.bus.stb)
399 comb += dbus.we.eq(dcache.bus.we)
400
401 comb += dcache.bus.dat_r.eq(dbus.dat_r)
402 comb += dcache.bus.ack.eq(dbus.ack)
403 if hasattr(dbus, "stall"):
404 comb += dcache.bus.stall.eq(dbus.stall)
405
406 # update out d data when flag set
407 with m.If(self.d_w_valid):
408 with m.If(ldst_r.alignstate == Misalign.WAITSECOND):
409 m.d.sync += d_out.data.eq(self.store_data[64:128])
410 with m.Else():
411 m.d.sync += d_out.data.eq(self.store_data[0:64])
412 #with m.Else():
413 # m.d.sync += d_out.data.eq(0)
414 # unit test passes with that change
415
416 # this must move into the FSM, conditionally noticing that
417 # the "blip" comes from self.d_validblip.
418 # task 1: look up in dcache
419 # task 2: if dcache fails, look up in MMU.
420 # do **NOT** confuse the two.
421 with m.If(self.d_validblip):
422 m.d.comb += self.d_out.valid.eq(~exc.happened)
423 m.d.comb += d_out.load.eq(self.req.load)
424 m.d.comb += d_out.byte_sel.eq(self.req.byte_sel)
425 m.d.comb += self.raddr.eq(self.req.raddr)
426 m.d.comb += d_out.nc.eq(self.req.nc)
427 m.d.comb += d_out.priv_mode.eq(self.req.priv_mode)
428 m.d.comb += d_out.virt_mode.eq(self.req.virt_mode)
429 #m.d.comb += Display("validblip dcbz=%i addr=%x",
430 #self.req.dcbz,self.req.addr)
431 m.d.comb += d_out.dcbz.eq(self.req.dcbz)
432 with m.Else():
433 m.d.comb += d_out.load.eq(ldst_r.load)
434 m.d.comb += d_out.byte_sel.eq(ldst_r.byte_sel)
435 m.d.comb += self.raddr.eq(ldst_r.raddr)
436 m.d.comb += d_out.nc.eq(ldst_r.nc)
437 m.d.comb += d_out.priv_mode.eq(ldst_r.priv_mode)
438 m.d.comb += d_out.virt_mode.eq(ldst_r.virt_mode)
439 #m.d.comb += Display("no_validblip dcbz=%i addr=%x",
440 #ldst_r.dcbz,ldst_r.addr)
441 m.d.comb += d_out.dcbz.eq(ldst_r.dcbz)
442 m.d.comb += d_out.addr.eq(self.raddr)
443
444 # Update outputs to MMU
445 m.d.comb += m_out.valid.eq(mmureq)
446 m.d.comb += m_out.iside.eq(self.instr_fault)
447 m.d.comb += m_out.load.eq(ldst_r.load)
448 m.d.comb += m_out.priv.eq(ldst_r.priv_mode)
449 # m_out.priv <= r.priv_mode; TODO
450 m.d.comb += m_out.tlbie.eq(self.tlbie)
451 # m_out.mtspr <= mmu_mtspr; # TODO
452 # m_out.sprn <= sprn; # TODO
453 m.d.comb += m_out.addr.eq(maddr)
454 # m_out.slbia <= l_in.insn(7); # TODO: no idea what this is
455 # m_out.rs <= l_in.data; # nope, probably not needed, TODO investigate
456
457 return m
458
459 def ports(self):
460 yield from super().ports()
461 # TODO: memory ports
462
463
464 class TestSRAMLoadStore1(LoadStore1):
465 def __init__(self, pspec):
466 super().__init__(pspec)
467 pspec = self.pspec
468 # small 32-entry Memory
469 if (hasattr(pspec, "dmem_test_depth") and
470 isinstance(pspec.dmem_test_depth, int)):
471 depth = pspec.dmem_test_depth
472 else:
473 depth = 32
474 print("TestSRAMBareLoadStoreUnit depth", depth)
475
476 self.mem = Memory(width=pspec.reg_wid, depth=depth)
477
478 def elaborate(self, platform):
479 m = super().elaborate(platform)
480 comb = m.d.comb
481 m.submodules.sram = sram = SRAM(memory=self.mem, granularity=8,
482 features={'cti', 'bte', 'err'})
483 dbus = self.dbus
484
485 # directly connect the wishbone bus of LoadStoreUnitInterface to SRAM
486 # note: SRAM is a target (slave), dbus is initiator (master)
487 fanouts = ['dat_w', 'sel', 'cyc', 'stb', 'we', 'cti', 'bte']
488 fanins = ['dat_r', 'ack', 'err']
489 for fanout in fanouts:
490 print("fanout", fanout, getattr(sram.bus, fanout).shape(),
491 getattr(dbus, fanout).shape())
492 comb += getattr(sram.bus, fanout).eq(getattr(dbus, fanout))
493 comb += getattr(sram.bus, fanout).eq(getattr(dbus, fanout))
494 for fanin in fanins:
495 comb += getattr(dbus, fanin).eq(getattr(sram.bus, fanin))
496 # connect address
497 comb += sram.bus.adr.eq(dbus.adr)
498
499 return m
500