clear instruction fault on exception WAIT_MMU ACK in LoadStore1
[soc.git] / src / soc / fu / ldst / loadstore.py
1 """LoadStore1 FSM.
2
3 based on microwatt loadstore1.vhdl, but conforming to PortInterface.
4 unlike loadstore1.vhdl this does *not* deal with actual Load/Store
5 ops: that job is handled by LDSTCompUnit, which talks to LoadStore1
6 by way of PortInterface. PortInterface is where things need extending,
7 such as adding dcbz support, etc.
8
9 this module basically handles "pure" load / store operations, and
10 its first job is to ask the D-Cache for the data. if that fails,
11 the second task (if virtual memory is enabled) is to ask the MMU
12 to perform a TLB, then to go *back* to the cache and ask again.
13
14 Links:
15
16 * https://bugs.libre-soc.org/show_bug.cgi?id=465
17
18 """
19
20 from nmigen import (Elaboratable, Module, Signal, Shape, unsigned, Cat, Mux,
21 Record, Memory,
22 Const)
23 from nmutil.iocontrol import RecordObject
24 from nmutil.util import rising_edge, Display
25 from enum import Enum, unique
26
27 from soc.experiment.dcache import DCache
28 from soc.experiment.icache import ICache
29 from soc.experiment.pimem import PortInterfaceBase
30 from soc.experiment.mem_types import LoadStore1ToMMUType
31 from soc.experiment.mem_types import MMUToLoadStore1Type
32
33 from soc.minerva.wishbone import make_wb_layout
34 from soc.bus.sram import SRAM
35 from nmutil.util import Display
36
37
38 @unique
39 class State(Enum):
40 IDLE = 0 # ready for instruction
41 ACK_WAIT = 1 # waiting for ack from dcache
42 MMU_LOOKUP = 2 # waiting for MMU to look up translation
43 TLBIE_WAIT = 3 # waiting for MMU to finish doing a tlbie
44
45
46 # captures the LDSTRequest from the PortInterface, which "blips" most
47 # of this at us (pipeline-style).
48 class LDSTRequest(RecordObject):
49 def __init__(self, name=None):
50 RecordObject.__init__(self, name=name)
51
52 self.load = Signal()
53 self.dcbz = Signal()
54 self.addr = Signal(64)
55 # self.store_data = Signal(64) # this is already sync (on a delay)
56 self.byte_sel = Signal(8)
57 self.nc = Signal() # non-cacheable access
58 self.virt_mode = Signal()
59 self.priv_mode = Signal()
60 self.mode_32bit = Signal() # XXX UNUSED AT PRESENT
61 self.align_intr = Signal()
62
63
64 # glue logic for microwatt mmu and dcache
65 class LoadStore1(PortInterfaceBase):
66 def __init__(self, pspec):
67 self.pspec = pspec
68 self.disable_cache = (hasattr(pspec, "disable_cache") and
69 pspec.disable_cache == True)
70 regwid = pspec.reg_wid
71 addrwid = pspec.addr_wid
72
73 super().__init__(regwid, addrwid)
74 self.dcache = DCache()
75 self.icache = ICache(pspec)
76 # these names are from the perspective of here (LoadStore1)
77 self.d_out = self.dcache.d_in # in to dcache is out for LoadStore
78 self.d_in = self.dcache.d_out # out from dcache is in for LoadStore
79 self.i_out = self.icache.i_in # in to icache is out for LoadStore
80 self.i_in = self.icache.i_out # out from icache is in for LoadStore
81 self.m_out = LoadStore1ToMMUType("m_out") # out *to* MMU
82 self.m_in = MMUToLoadStore1Type("m_in") # in *from* MMU
83 self.req = LDSTRequest(name="ldst_req")
84
85 # TODO, convert dcache wb_in/wb_out to "standard" nmigen Wishbone bus
86 self.dbus = Record(make_wb_layout(pspec))
87 self.ibus = Record(make_wb_layout(pspec))
88
89 # for creating a single clock blip to DCache
90 self.d_valid = Signal()
91 self.d_w_valid = Signal()
92 self.d_validblip = Signal()
93
94 # state info for LD/ST
95 self.done = Signal()
96 self.done_delay = Signal()
97 # latch most of the input request
98 self.load = Signal()
99 self.tlbie = Signal()
100 self.dcbz = Signal()
101 self.addr = Signal(64)
102 self.maddr = Signal(64)
103 self.store_data = Signal(64)
104 self.load_data = Signal(64)
105 self.load_data_delay = Signal(64)
106 self.byte_sel = Signal(8)
107 #self.xerc : xer_common_t;
108 #self.reserve = Signal()
109 #self.atomic = Signal()
110 #self.atomic_last = Signal()
111 #self.rc = Signal()
112 self.nc = Signal() # non-cacheable access
113 self.virt_mode = Signal()
114 self.priv_mode = Signal()
115 self.mode_32bit = Signal() # XXX UNUSED AT PRESENT
116 self.state = Signal(State)
117 self.instr_fault = Signal() # indicator to request i-cache MMU lookup
118 self.r_instr_fault = Signal() # accessed in external_busy
119 self.align_intr = Signal()
120 self.busy = Signal()
121 self.wait_dcache = Signal()
122 self.wait_mmu = Signal()
123 #self.intr_vec : integer range 0 to 16#fff#;
124 #self.nia = Signal(64)
125 #self.srr1 = Signal(16)
126 # use these to set the dsisr or dar respectively
127 self.mmu_set_spr = Signal()
128 self.mmu_set_dsisr = Signal()
129 self.mmu_set_dar = Signal()
130 self.sprval_in = Signal(64)
131
132 # ONLY access these read-only, do NOT attempt to change
133 self.dsisr = Signal(32)
134 self.dar = Signal(64)
135
136 # when external_busy set, do not allow PortInterface to proceed
137 def external_busy(self, m):
138 return self.instr_fault | self.r_instr_fault
139
140 def set_wr_addr(self, m, addr, mask, misalign, msr, is_dcbz):
141 m.d.comb += self.req.load.eq(0) # store operation
142 m.d.comb += self.req.byte_sel.eq(mask)
143 m.d.comb += self.req.addr.eq(addr)
144 m.d.comb += self.req.priv_mode.eq(~msr.pr) # not-problem ==> priv
145 m.d.comb += self.req.virt_mode.eq(msr.dr) # DR ==> virt
146 m.d.comb += self.req.mode_32bit.eq(~msr.sf) # not-sixty-four ==> 32bit
147 m.d.comb += self.req.align_intr.eq(misalign)
148 m.d.comb += self.req.dcbz.eq(is_dcbz)
149
150 # m.d.comb += Display("set_wr_addr %i dcbz %i",addr,is_dcbz)
151
152 # option to disable the cache entirely for write
153 if self.disable_cache:
154 m.d.comb += self.req.nc.eq(1)
155 return None
156
157 def set_rd_addr(self, m, addr, mask, misalign, msr):
158 m.d.comb += self.d_valid.eq(1)
159 m.d.comb += self.req.load.eq(1) # load operation
160 m.d.comb += self.req.byte_sel.eq(mask)
161 m.d.comb += self.req.align_intr.eq(misalign)
162 m.d.comb += self.req.addr.eq(addr)
163 m.d.comb += self.req.priv_mode.eq(~msr.pr) # not-problem ==> priv
164 m.d.comb += self.req.virt_mode.eq(msr.dr) # DR ==> virt
165 m.d.comb += self.req.mode_32bit.eq(~msr.sf) # not-sixty-four ==> 32bit
166 # BAD HACK! disable cacheing on LD when address is 0xCxxx_xxxx
167 # this is for peripherals. same thing done in Microwatt loadstore1.vhdl
168 with m.If(addr[28:] == Const(0xc, 4)):
169 m.d.comb += self.req.nc.eq(1)
170 # option to disable the cache entirely for read
171 if self.disable_cache:
172 m.d.comb += self.req.nc.eq(1)
173 return None #FIXME return value
174
175 def set_wr_data(self, m, data, wen):
176 # do the "blip" on write data
177 m.d.comb += self.d_valid.eq(1)
178 # put data into comb which is picked up in main elaborate()
179 m.d.comb += self.d_w_valid.eq(1)
180 m.d.comb += self.store_data.eq(data)
181 #m.d.sync += self.d_out.byte_sel.eq(wen) # this might not be needed
182 st_ok = self.done # TODO indicates write data is valid
183 return st_ok
184
185 def get_rd_data(self, m):
186 ld_ok = self.done_delay # indicates read data is valid
187 data = self.load_data_delay # actual read data
188 return data, ld_ok
189
190 def elaborate(self, platform):
191 m = super().elaborate(platform)
192 comb, sync = m.d.comb, m.d.sync
193
194 # microwatt takes one more cycle before next operation can be issued
195 sync += self.done_delay.eq(self.done)
196 sync += self.load_data_delay.eq(self.load_data)
197
198 # create dcache and icache module
199 m.submodules.dcache = dcache = self.dcache
200 m.submodules.icache = icache = self.icache
201
202 # temp vars
203 d_out, d_in, dbus = self.d_out, self.d_in, self.dbus
204 i_out, i_in, ibus = self.i_out, self.i_in, self.ibus
205 m_out, m_in = self.m_out, self.m_in
206 exc = self.pi.exc_o
207 exception = exc.happened
208 mmureq = Signal()
209
210 # copy of address, but gets over-ridden for instr_fault
211 maddr = Signal(64)
212 m.d.comb += maddr.eq(self.addr)
213
214 # create a blip (single pulse) on valid read/write request
215 # this can be over-ridden in the FSM to get dcache to re-run
216 # a request when MMU_LOOKUP completes.
217 m.d.comb += self.d_validblip.eq(rising_edge(m, self.d_valid))
218 ldst_r = LDSTRequest("ldst_r")
219 comb += Display("MMUTEST: LoadStore1 d_in.error=%i",d_in.error)
220
221 # fsm skeleton
222 with m.Switch(self.state):
223 with m.Case(State.IDLE):
224 with m.If((self.d_validblip | self.instr_fault) &
225 ~exc.happened):
226 comb += self.busy.eq(1)
227 sync += self.state.eq(State.ACK_WAIT)
228 sync += ldst_r.eq(self.req) # copy of LDSTRequest on "blip"
229 # sync += Display("validblip self.req.virt_mode=%i",
230 # self.req.virt_mode)
231 with m.If(self.instr_fault):
232 comb += mmureq.eq(1)
233 sync += self.r_instr_fault.eq(1)
234 comb += maddr.eq(self.maddr)
235 sync += self.state.eq(State.MMU_LOOKUP)
236 with m.Else():
237 sync += self.r_instr_fault.eq(0)
238 with m.Else():
239 sync += ldst_r.eq(0)
240
241 # waiting for completion
242 with m.Case(State.ACK_WAIT):
243 comb += Display("MMUTEST: ACK_WAIT")
244 comb += self.busy.eq(~exc.happened)
245
246 with m.If(d_in.error):
247 # cache error is not necessarily "final", it could
248 # be that it was just a TLB miss
249 with m.If(d_in.cache_paradox):
250 comb += exception.eq(1)
251 sync += self.state.eq(State.IDLE)
252 sync += ldst_r.eq(0)
253 sync += Display("cache error -> update dsisr")
254 sync += self.dsisr[63 - 38].eq(~self.load)
255 # XXX there is no architected bit for this
256 # (probably should be a machine check in fact)
257 sync += self.dsisr[63 - 35].eq(d_in.cache_paradox)
258 sync += self.r_instr_fault.eq(0)
259
260 with m.Else():
261 # Look up the translation for TLB miss
262 # and also for permission error and RC error
263 # in case the PTE has been updated.
264 comb += mmureq.eq(1)
265 sync += self.state.eq(State.MMU_LOOKUP)
266 with m.If(d_in.valid):
267 m.d.comb += self.done.eq(~mmureq) # done if not doing MMU
268 with m.If(self.done):
269 sync += Display("ACK_WAIT, done %x", self.addr)
270 sync += self.state.eq(State.IDLE)
271 sync += ldst_r.eq(0)
272 with m.If(self.load):
273 m.d.comb += self.load_data.eq(d_in.data)
274
275 # waiting here for the MMU TLB lookup to complete.
276 # either re-try the dcache lookup or throw MMU exception
277 with m.Case(State.MMU_LOOKUP):
278 comb += self.busy.eq(~exception)
279 with m.If(m_in.done):
280 with m.If(~self.r_instr_fault):
281 sync += Display("MMU_LOOKUP, done %x -> %x",
282 self.addr, d_out.addr)
283 # retry the request now that the MMU has
284 # installed a TLB entry, if not exception raised
285 m.d.comb += self.d_out.valid.eq(~exception)
286 sync += self.state.eq(State.ACK_WAIT)
287 sync += ldst_r.eq(0)
288 with m.Else():
289 sync += self.state.eq(State.IDLE)
290 sync += self.r_instr_fault.eq(0)
291 comb += self.done.eq(1)
292
293 with m.If(m_in.err):
294 # MMU RADIX exception thrown. XXX
295 # TODO: critical that the write here has to
296 # notify the MMU FSM of the change to dsisr
297 comb += exception.eq(1)
298 comb += self.done.eq(1)
299 sync += Display("MMU RADIX exception thrown")
300 sync += Display("TODO: notify MMU of change to dsisr")
301 sync += self.dsisr[63 - 33].eq(m_in.invalid)
302 sync += self.dsisr[63 - 36].eq(m_in.perm_error) # noexec
303 sync += self.dsisr[63 - 38].eq(~self.load)
304 sync += self.dsisr[63 - 44].eq(m_in.badtree)
305 sync += self.dsisr[63 - 45].eq(m_in.rc_error)
306 sync += self.state.eq(State.IDLE)
307 # exception thrown, clear out instruction fault state
308 sync += self.r_instr_fault.eq(0)
309
310 with m.Case(State.TLBIE_WAIT):
311 pass
312
313 # MMU FSM communicating a request to update DSISR or DAR (OP_MTSPR)
314 with m.If(self.mmu_set_spr):
315 with m.If(self.mmu_set_dsisr):
316 sync += self.dsisr.eq(self.sprval_in)
317 with m.If(self.mmu_set_dar):
318 sync += self.dar.eq(self.sprval_in)
319
320 # hmmm, alignment occurs in set_rd_addr/set_wr_addr, note exception
321 with m.If(self.align_intr):
322 comb += exc.happened.eq(1)
323 # check for updating DAR
324 with m.If(exception):
325 sync += Display("exception %x", self.addr)
326 # alignment error: store address in DAR
327 with m.If(self.align_intr):
328 sync += Display("alignment error: addr in DAR %x", self.addr)
329 sync += self.dar.eq(self.addr)
330 with m.Elif(~self.r_instr_fault):
331 sync += Display("not instr fault, addr in DAR %x", self.addr)
332 sync += self.dar.eq(self.addr)
333
334 # when done or exception, return to idle state
335 with m.If(self.done | exception):
336 sync += self.state.eq(State.IDLE)
337 comb += self.busy.eq(0)
338
339 # happened, alignment, instr_fault, invalid.
340 # note that all of these flow through - eventually to the TRAP
341 # pipeline, via PowerDecoder2.
342 comb += self.align_intr.eq(self.req.align_intr)
343 comb += exc.invalid.eq(m_in.invalid)
344 comb += exc.alignment.eq(self.align_intr)
345 comb += exc.instr_fault.eq(self.r_instr_fault)
346 # badtree, perm_error, rc_error, segment_fault
347 comb += exc.badtree.eq(m_in.badtree)
348 comb += exc.perm_error.eq(m_in.perm_error)
349 comb += exc.rc_error.eq(m_in.rc_error)
350 comb += exc.segment_fault.eq(m_in.segerr)
351
352 # TODO, connect dcache wb_in/wb_out to "standard" nmigen Wishbone bus
353 comb += dbus.adr.eq(dcache.bus.adr)
354 comb += dbus.dat_w.eq(dcache.bus.dat_w)
355 comb += dbus.sel.eq(dcache.bus.sel)
356 comb += dbus.cyc.eq(dcache.bus.cyc)
357 comb += dbus.stb.eq(dcache.bus.stb)
358 comb += dbus.we.eq(dcache.bus.we)
359
360 comb += dcache.bus.dat_r.eq(dbus.dat_r)
361 comb += dcache.bus.ack.eq(dbus.ack)
362 if hasattr(dbus, "stall"):
363 comb += dcache.bus.stall.eq(dbus.stall)
364
365 # update out d data when flag set
366 with m.If(self.d_w_valid):
367 m.d.sync += d_out.data.eq(self.store_data)
368 #with m.Else():
369 # m.d.sync += d_out.data.eq(0)
370 # unit test passes with that change
371
372 # this must move into the FSM, conditionally noticing that
373 # the "blip" comes from self.d_validblip.
374 # task 1: look up in dcache
375 # task 2: if dcache fails, look up in MMU.
376 # do **NOT** confuse the two.
377 with m.If(self.d_validblip):
378 m.d.comb += self.d_out.valid.eq(~exc.happened)
379 m.d.comb += d_out.load.eq(self.req.load)
380 m.d.comb += d_out.byte_sel.eq(self.req.byte_sel)
381 m.d.comb += self.addr.eq(self.req.addr)
382 m.d.comb += d_out.nc.eq(self.req.nc)
383 m.d.comb += d_out.priv_mode.eq(self.req.priv_mode)
384 m.d.comb += d_out.virt_mode.eq(self.req.virt_mode)
385 #m.d.comb += Display("validblip dcbz=%i addr=%x",
386 #self.req.dcbz,self.req.addr)
387 m.d.comb += d_out.dcbz.eq(self.req.dcbz)
388 with m.Else():
389 m.d.comb += d_out.load.eq(ldst_r.load)
390 m.d.comb += d_out.byte_sel.eq(ldst_r.byte_sel)
391 m.d.comb += self.addr.eq(ldst_r.addr)
392 m.d.comb += d_out.nc.eq(ldst_r.nc)
393 m.d.comb += d_out.priv_mode.eq(ldst_r.priv_mode)
394 m.d.comb += d_out.virt_mode.eq(ldst_r.virt_mode)
395 #m.d.comb += Display("no_validblip dcbz=%i addr=%x",
396 #ldst_r.dcbz,ldst_r.addr)
397 m.d.comb += d_out.dcbz.eq(ldst_r.dcbz)
398
399 # XXX these should be possible to remove but for some reason
400 # cannot be... yet. TODO, investigate
401 m.d.comb += self.load_data.eq(d_in.data)
402 m.d.comb += d_out.addr.eq(self.addr)
403
404 # Update outputs to MMU
405 m.d.comb += m_out.valid.eq(mmureq)
406 m.d.comb += m_out.iside.eq(self.instr_fault)
407 m.d.comb += m_out.load.eq(ldst_r.load)
408 m.d.comb += m_out.priv.eq(self.priv_mode)
409 # m_out.priv <= r.priv_mode; TODO
410 m.d.comb += m_out.tlbie.eq(self.tlbie)
411 # m_out.mtspr <= mmu_mtspr; # TODO
412 # m_out.sprn <= sprn; # TODO
413 m.d.comb += m_out.addr.eq(maddr)
414 # m_out.slbia <= l_in.insn(7); # TODO: no idea what this is
415 # m_out.rs <= l_in.data; # nope, probably not needed, TODO investigate
416
417 return m
418
419 def ports(self):
420 yield from super().ports()
421 # TODO: memory ports
422
423
424 class TestSRAMLoadStore1(LoadStore1):
425 def __init__(self, pspec):
426 super().__init__(pspec)
427 pspec = self.pspec
428 # small 32-entry Memory
429 if (hasattr(pspec, "dmem_test_depth") and
430 isinstance(pspec.dmem_test_depth, int)):
431 depth = pspec.dmem_test_depth
432 else:
433 depth = 32
434 print("TestSRAMBareLoadStoreUnit depth", depth)
435
436 self.mem = Memory(width=pspec.reg_wid, depth=depth)
437
438 def elaborate(self, platform):
439 m = super().elaborate(platform)
440 comb = m.d.comb
441 m.submodules.sram = sram = SRAM(memory=self.mem, granularity=8,
442 features={'cti', 'bte', 'err'})
443 dbus = self.dbus
444
445 # directly connect the wishbone bus of LoadStoreUnitInterface to SRAM
446 # note: SRAM is a target (slave), dbus is initiator (master)
447 fanouts = ['dat_w', 'sel', 'cyc', 'stb', 'we', 'cti', 'bte']
448 fanins = ['dat_r', 'ack', 'err']
449 for fanout in fanouts:
450 print("fanout", fanout, getattr(sram.bus, fanout).shape(),
451 getattr(dbus, fanout).shape())
452 comb += getattr(sram.bus, fanout).eq(getattr(dbus, fanout))
453 comb += getattr(sram.bus, fanout).eq(getattr(dbus, fanout))
454 for fanin in fanins:
455 comb += getattr(dbus, fanin).eq(getattr(sram.bus, fanin))
456 # connect address
457 comb += sram.bus.adr.eq(dbus.adr)
458
459 return m
460