remove unneeded state in LoadStore1
[soc.git] / src / soc / fu / ldst / loadstore.py
1 """LoadStore1 FSM.
2
3 based on microwatt loadstore1.vhdl, but conforming to PortInterface.
4 unlike loadstore1.vhdl this does *not* deal with actual Load/Store
5 ops: that job is handled by LDSTCompUnit, which talks to LoadStore1
6 by way of PortInterface. PortInterface is where things need extending,
7 such as adding dcbz support, etc.
8
9 this module basically handles "pure" load / store operations, and
10 its first job is to ask the D-Cache for the data. if that fails,
11 the second task (if virtual memory is enabled) is to ask the MMU
12 to perform a TLB, then to go *back* to the cache and ask again.
13
14 Links:
15
16 * https://bugs.libre-soc.org/show_bug.cgi?id=465
17
18 """
19
20 from nmigen import (Elaboratable, Module, Signal, Shape, unsigned, Cat, Mux,
21 Record, Memory,
22 Const)
23 from nmutil.iocontrol import RecordObject
24 from nmutil.util import rising_edge, Display
25 from enum import Enum, unique
26
27 from soc.experiment.dcache import DCache
28 from soc.experiment.icache import ICache
29 from soc.experiment.pimem import PortInterfaceBase
30 from soc.experiment.mem_types import LoadStore1ToMMUType
31 from soc.experiment.mem_types import MMUToLoadStore1Type
32
33 from soc.minerva.wishbone import make_wb_layout
34 from soc.bus.sram import SRAM
35 from nmutil.util import Display
36
37
38 @unique
39 class State(Enum):
40 IDLE = 0 # ready for instruction
41 ACK_WAIT = 1 # waiting for ack from dcache
42 MMU_LOOKUP = 2 # waiting for MMU to look up translation
43
44
45 # captures the LDSTRequest from the PortInterface, which "blips" most
46 # of this at us (pipeline-style).
47 class LDSTRequest(RecordObject):
48 def __init__(self, name=None):
49 RecordObject.__init__(self, name=name)
50
51 self.load = Signal()
52 self.dcbz = Signal()
53 self.addr = Signal(64)
54 # self.store_data = Signal(64) # this is already sync (on a delay)
55 self.byte_sel = Signal(8)
56 self.nc = Signal() # non-cacheable access
57 self.virt_mode = Signal()
58 self.priv_mode = Signal()
59 self.mode_32bit = Signal() # XXX UNUSED AT PRESENT
60 self.align_intr = Signal()
61
62
63 # glue logic for microwatt mmu and dcache
64 class LoadStore1(PortInterfaceBase):
65 def __init__(self, pspec):
66 self.pspec = pspec
67 self.disable_cache = (hasattr(pspec, "disable_cache") and
68 pspec.disable_cache == True)
69 regwid = pspec.reg_wid
70 addrwid = pspec.addr_wid
71
72 super().__init__(regwid, addrwid)
73 self.dcache = DCache()
74 self.icache = ICache(pspec)
75 # these names are from the perspective of here (LoadStore1)
76 self.d_out = self.dcache.d_in # in to dcache is out for LoadStore
77 self.d_in = self.dcache.d_out # out from dcache is in for LoadStore
78 self.i_out = self.icache.i_in # in to icache is out for LoadStore
79 self.i_in = self.icache.i_out # out from icache is in for LoadStore
80 self.m_out = LoadStore1ToMMUType("m_out") # out *to* MMU
81 self.m_in = MMUToLoadStore1Type("m_in") # in *from* MMU
82 self.req = LDSTRequest(name="ldst_req")
83
84 # TODO, convert dcache wb_in/wb_out to "standard" nmigen Wishbone bus
85 self.dbus = Record(make_wb_layout(pspec))
86 self.ibus = Record(make_wb_layout(pspec))
87
88 # for creating a single clock blip to DCache
89 self.d_valid = Signal()
90 self.d_w_valid = Signal()
91 self.d_validblip = Signal()
92
93 # state info for LD/ST
94 self.done = Signal()
95 self.done_delay = Signal()
96 # latch most of the input request
97 self.load = Signal()
98 self.tlbie = Signal()
99 self.dcbz = Signal()
100 self.addr = Signal(64)
101 self.maddr = Signal(64)
102 self.store_data = Signal(64)
103 self.load_data = Signal(64)
104 self.load_data_delay = Signal(64)
105 self.byte_sel = Signal(8)
106 #self.xerc : xer_common_t;
107 #self.reserve = Signal()
108 #self.atomic = Signal()
109 #self.atomic_last = Signal()
110 #self.rc = Signal()
111 self.nc = Signal() # non-cacheable access
112 self.virt_mode = Signal()
113 self.priv_mode = Signal()
114 self.mode_32bit = Signal() # XXX UNUSED AT PRESENT
115 self.state = Signal(State)
116 self.instr_fault = Signal() # indicator to request i-cache MMU lookup
117 self.r_instr_fault = Signal() # accessed in external_busy
118 self.align_intr = Signal()
119 self.busy = Signal()
120 self.wait_dcache = Signal()
121 self.wait_mmu = Signal()
122 #self.intr_vec : integer range 0 to 16#fff#;
123 #self.nia = Signal(64)
124 #self.srr1 = Signal(16)
125 # use these to set the dsisr or dar respectively
126 self.mmu_set_spr = Signal()
127 self.mmu_set_dsisr = Signal()
128 self.mmu_set_dar = Signal()
129 self.sprval_in = Signal(64)
130
131 # ONLY access these read-only, do NOT attempt to change
132 self.dsisr = Signal(32)
133 self.dar = Signal(64)
134
135 # when external_busy set, do not allow PortInterface to proceed
136 def external_busy(self, m):
137 return self.instr_fault | self.r_instr_fault
138
139 def set_wr_addr(self, m, addr, mask, misalign, msr, is_dcbz):
140 m.d.comb += self.req.load.eq(0) # store operation
141 m.d.comb += self.req.byte_sel.eq(mask)
142 m.d.comb += self.req.addr.eq(addr)
143 m.d.comb += self.req.priv_mode.eq(~msr.pr) # not-problem ==> priv
144 m.d.comb += self.req.virt_mode.eq(msr.dr) # DR ==> virt
145 m.d.comb += self.req.mode_32bit.eq(~msr.sf) # not-sixty-four ==> 32bit
146 m.d.comb += self.req.align_intr.eq(misalign)
147 m.d.comb += self.req.dcbz.eq(is_dcbz)
148
149 # m.d.comb += Display("set_wr_addr %i dcbz %i",addr,is_dcbz)
150
151 # option to disable the cache entirely for write
152 if self.disable_cache:
153 m.d.comb += self.req.nc.eq(1)
154 return None
155
156 def set_rd_addr(self, m, addr, mask, misalign, msr):
157 m.d.comb += self.d_valid.eq(1)
158 m.d.comb += self.req.load.eq(1) # load operation
159 m.d.comb += self.req.byte_sel.eq(mask)
160 m.d.comb += self.req.align_intr.eq(misalign)
161 m.d.comb += self.req.addr.eq(addr)
162 m.d.comb += self.req.priv_mode.eq(~msr.pr) # not-problem ==> priv
163 m.d.comb += self.req.virt_mode.eq(msr.dr) # DR ==> virt
164 m.d.comb += self.req.mode_32bit.eq(~msr.sf) # not-sixty-four ==> 32bit
165 # BAD HACK! disable cacheing on LD when address is 0xCxxx_xxxx
166 # this is for peripherals. same thing done in Microwatt loadstore1.vhdl
167 with m.If(addr[28:] == Const(0xc, 4)):
168 m.d.comb += self.req.nc.eq(1)
169 # option to disable the cache entirely for read
170 if self.disable_cache:
171 m.d.comb += self.req.nc.eq(1)
172 return None #FIXME return value
173
174 def set_wr_data(self, m, data, wen):
175 # do the "blip" on write data
176 m.d.comb += self.d_valid.eq(1)
177 # put data into comb which is picked up in main elaborate()
178 m.d.comb += self.d_w_valid.eq(1)
179 m.d.comb += self.store_data.eq(data)
180 #m.d.sync += self.d_out.byte_sel.eq(wen) # this might not be needed
181 st_ok = self.done # TODO indicates write data is valid
182 return st_ok
183
184 def get_rd_data(self, m):
185 ld_ok = self.done_delay # indicates read data is valid
186 data = self.load_data_delay # actual read data
187 return data, ld_ok
188
189 def elaborate(self, platform):
190 m = super().elaborate(platform)
191 comb, sync = m.d.comb, m.d.sync
192
193 # microwatt takes one more cycle before next operation can be issued
194 sync += self.done_delay.eq(self.done)
195 sync += self.load_data_delay.eq(self.load_data)
196
197 # create dcache and icache module
198 m.submodules.dcache = dcache = self.dcache
199 m.submodules.icache = icache = self.icache
200
201 # temp vars
202 d_out, d_in, dbus = self.d_out, self.d_in, self.dbus
203 i_out, i_in, ibus = self.i_out, self.i_in, self.ibus
204 m_out, m_in = self.m_out, self.m_in
205 exc = self.pi.exc_o
206 exception = exc.happened
207 mmureq = Signal()
208
209 # copy of address, but gets over-ridden for instr_fault
210 maddr = Signal(64)
211 m.d.comb += maddr.eq(self.addr)
212
213 # create a blip (single pulse) on valid read/write request
214 # this can be over-ridden in the FSM to get dcache to re-run
215 # a request when MMU_LOOKUP completes.
216 m.d.comb += self.d_validblip.eq(rising_edge(m, self.d_valid))
217 ldst_r = LDSTRequest("ldst_r")
218 comb += Display("MMUTEST: LoadStore1 d_in.error=%i",d_in.error)
219
220 # fsm skeleton
221 with m.Switch(self.state):
222 with m.Case(State.IDLE):
223 with m.If((self.d_validblip | self.instr_fault) &
224 ~exc.happened):
225 comb += self.busy.eq(1)
226 sync += self.state.eq(State.ACK_WAIT)
227 sync += ldst_r.eq(self.req) # copy of LDSTRequest on "blip"
228 # sync += Display("validblip self.req.virt_mode=%i",
229 # self.req.virt_mode)
230 with m.If(self.instr_fault):
231 comb += mmureq.eq(1)
232 sync += self.r_instr_fault.eq(1)
233 comb += maddr.eq(self.maddr)
234 sync += self.state.eq(State.MMU_LOOKUP)
235 with m.Else():
236 sync += self.r_instr_fault.eq(0)
237 with m.Else():
238 sync += ldst_r.eq(0)
239
240 # waiting for completion
241 with m.Case(State.ACK_WAIT):
242 comb += Display("MMUTEST: ACK_WAIT")
243 comb += self.busy.eq(~exc.happened)
244
245 with m.If(d_in.error):
246 # cache error is not necessarily "final", it could
247 # be that it was just a TLB miss
248 with m.If(d_in.cache_paradox):
249 comb += exception.eq(1)
250 sync += self.state.eq(State.IDLE)
251 sync += ldst_r.eq(0)
252 sync += Display("cache error -> update dsisr")
253 sync += self.dsisr[63 - 38].eq(~self.load)
254 # XXX there is no architected bit for this
255 # (probably should be a machine check in fact)
256 sync += self.dsisr[63 - 35].eq(d_in.cache_paradox)
257 sync += self.r_instr_fault.eq(0)
258
259 with m.Else():
260 # Look up the translation for TLB miss
261 # and also for permission error and RC error
262 # in case the PTE has been updated.
263 comb += mmureq.eq(1)
264 sync += self.state.eq(State.MMU_LOOKUP)
265 with m.If(d_in.valid):
266 m.d.comb += self.done.eq(~mmureq) # done if not doing MMU
267 with m.If(self.done):
268 sync += Display("ACK_WAIT, done %x", self.addr)
269 sync += self.state.eq(State.IDLE)
270 sync += ldst_r.eq(0)
271 with m.If(self.load):
272 m.d.comb += self.load_data.eq(d_in.data)
273
274 # waiting here for the MMU TLB lookup to complete.
275 # either re-try the dcache lookup or throw MMU exception
276 with m.Case(State.MMU_LOOKUP):
277 comb += self.busy.eq(~exception)
278 with m.If(m_in.done):
279 with m.If(~self.r_instr_fault):
280 sync += Display("MMU_LOOKUP, done %x -> %x",
281 self.addr, d_out.addr)
282 # retry the request now that the MMU has
283 # installed a TLB entry, if not exception raised
284 m.d.comb += self.d_out.valid.eq(~exception)
285 sync += self.state.eq(State.ACK_WAIT)
286 sync += ldst_r.eq(0)
287 with m.Else():
288 sync += self.state.eq(State.IDLE)
289 sync += self.r_instr_fault.eq(0)
290 comb += self.done.eq(1)
291
292 with m.If(m_in.err):
293 # MMU RADIX exception thrown. XXX
294 # TODO: critical that the write here has to
295 # notify the MMU FSM of the change to dsisr
296 comb += exception.eq(1)
297 comb += self.done.eq(1)
298 sync += Display("MMU RADIX exception thrown")
299 sync += Display("TODO: notify MMU of change to dsisr")
300 sync += self.dsisr[63 - 33].eq(m_in.invalid)
301 sync += self.dsisr[63 - 36].eq(m_in.perm_error) # noexec
302 sync += self.dsisr[63 - 38].eq(~self.load)
303 sync += self.dsisr[63 - 44].eq(m_in.badtree)
304 sync += self.dsisr[63 - 45].eq(m_in.rc_error)
305 sync += self.state.eq(State.IDLE)
306 # exception thrown, clear out instruction fault state
307 sync += self.r_instr_fault.eq(0)
308
309 # MMU FSM communicating a request to update DSISR or DAR (OP_MTSPR)
310 with m.If(self.mmu_set_spr):
311 with m.If(self.mmu_set_dsisr):
312 sync += self.dsisr.eq(self.sprval_in)
313 with m.If(self.mmu_set_dar):
314 sync += self.dar.eq(self.sprval_in)
315
316 # hmmm, alignment occurs in set_rd_addr/set_wr_addr, note exception
317 with m.If(self.align_intr):
318 comb += exc.happened.eq(1)
319 # check for updating DAR
320 with m.If(exception):
321 sync += Display("exception %x", self.addr)
322 # alignment error: store address in DAR
323 with m.If(self.align_intr):
324 sync += Display("alignment error: addr in DAR %x", self.addr)
325 sync += self.dar.eq(self.addr)
326 with m.Elif(~self.r_instr_fault):
327 sync += Display("not instr fault, addr in DAR %x", self.addr)
328 sync += self.dar.eq(self.addr)
329
330 # when done or exception, return to idle state
331 with m.If(self.done | exception):
332 sync += self.state.eq(State.IDLE)
333 comb += self.busy.eq(0)
334
335 # happened, alignment, instr_fault, invalid.
336 # note that all of these flow through - eventually to the TRAP
337 # pipeline, via PowerDecoder2.
338 comb += self.align_intr.eq(self.req.align_intr)
339 comb += exc.invalid.eq(m_in.invalid)
340 comb += exc.alignment.eq(self.align_intr)
341 comb += exc.instr_fault.eq(self.r_instr_fault)
342 # badtree, perm_error, rc_error, segment_fault
343 comb += exc.badtree.eq(m_in.badtree)
344 comb += exc.perm_error.eq(m_in.perm_error)
345 comb += exc.rc_error.eq(m_in.rc_error)
346 comb += exc.segment_fault.eq(m_in.segerr)
347
348 # TODO, connect dcache wb_in/wb_out to "standard" nmigen Wishbone bus
349 comb += dbus.adr.eq(dcache.bus.adr)
350 comb += dbus.dat_w.eq(dcache.bus.dat_w)
351 comb += dbus.sel.eq(dcache.bus.sel)
352 comb += dbus.cyc.eq(dcache.bus.cyc)
353 comb += dbus.stb.eq(dcache.bus.stb)
354 comb += dbus.we.eq(dcache.bus.we)
355
356 comb += dcache.bus.dat_r.eq(dbus.dat_r)
357 comb += dcache.bus.ack.eq(dbus.ack)
358 if hasattr(dbus, "stall"):
359 comb += dcache.bus.stall.eq(dbus.stall)
360
361 # update out d data when flag set
362 with m.If(self.d_w_valid):
363 m.d.sync += d_out.data.eq(self.store_data)
364 #with m.Else():
365 # m.d.sync += d_out.data.eq(0)
366 # unit test passes with that change
367
368 # this must move into the FSM, conditionally noticing that
369 # the "blip" comes from self.d_validblip.
370 # task 1: look up in dcache
371 # task 2: if dcache fails, look up in MMU.
372 # do **NOT** confuse the two.
373 with m.If(self.d_validblip):
374 m.d.comb += self.d_out.valid.eq(~exc.happened)
375 m.d.comb += d_out.load.eq(self.req.load)
376 m.d.comb += d_out.byte_sel.eq(self.req.byte_sel)
377 m.d.comb += self.addr.eq(self.req.addr)
378 m.d.comb += d_out.nc.eq(self.req.nc)
379 m.d.comb += d_out.priv_mode.eq(self.req.priv_mode)
380 m.d.comb += d_out.virt_mode.eq(self.req.virt_mode)
381 #m.d.comb += Display("validblip dcbz=%i addr=%x",
382 #self.req.dcbz,self.req.addr)
383 m.d.comb += d_out.dcbz.eq(self.req.dcbz)
384 with m.Else():
385 m.d.comb += d_out.load.eq(ldst_r.load)
386 m.d.comb += d_out.byte_sel.eq(ldst_r.byte_sel)
387 m.d.comb += self.addr.eq(ldst_r.addr)
388 m.d.comb += d_out.nc.eq(ldst_r.nc)
389 m.d.comb += d_out.priv_mode.eq(ldst_r.priv_mode)
390 m.d.comb += d_out.virt_mode.eq(ldst_r.virt_mode)
391 #m.d.comb += Display("no_validblip dcbz=%i addr=%x",
392 #ldst_r.dcbz,ldst_r.addr)
393 m.d.comb += d_out.dcbz.eq(ldst_r.dcbz)
394
395 # XXX these should be possible to remove but for some reason
396 # cannot be... yet. TODO, investigate
397 m.d.comb += self.load_data.eq(d_in.data)
398 m.d.comb += d_out.addr.eq(self.addr)
399
400 # Update outputs to MMU
401 m.d.comb += m_out.valid.eq(mmureq)
402 m.d.comb += m_out.iside.eq(self.instr_fault)
403 m.d.comb += m_out.load.eq(ldst_r.load)
404 m.d.comb += m_out.priv.eq(self.priv_mode)
405 # m_out.priv <= r.priv_mode; TODO
406 m.d.comb += m_out.tlbie.eq(self.tlbie)
407 # m_out.mtspr <= mmu_mtspr; # TODO
408 # m_out.sprn <= sprn; # TODO
409 m.d.comb += m_out.addr.eq(maddr)
410 # m_out.slbia <= l_in.insn(7); # TODO: no idea what this is
411 # m_out.rs <= l_in.data; # nope, probably not needed, TODO investigate
412
413 return m
414
415 def ports(self):
416 yield from super().ports()
417 # TODO: memory ports
418
419
420 class TestSRAMLoadStore1(LoadStore1):
421 def __init__(self, pspec):
422 super().__init__(pspec)
423 pspec = self.pspec
424 # small 32-entry Memory
425 if (hasattr(pspec, "dmem_test_depth") and
426 isinstance(pspec.dmem_test_depth, int)):
427 depth = pspec.dmem_test_depth
428 else:
429 depth = 32
430 print("TestSRAMBareLoadStoreUnit depth", depth)
431
432 self.mem = Memory(width=pspec.reg_wid, depth=depth)
433
434 def elaborate(self, platform):
435 m = super().elaborate(platform)
436 comb = m.d.comb
437 m.submodules.sram = sram = SRAM(memory=self.mem, granularity=8,
438 features={'cti', 'bte', 'err'})
439 dbus = self.dbus
440
441 # directly connect the wishbone bus of LoadStoreUnitInterface to SRAM
442 # note: SRAM is a target (slave), dbus is initiator (master)
443 fanouts = ['dat_w', 'sel', 'cyc', 'stb', 'we', 'cti', 'bte']
444 fanins = ['dat_r', 'ack', 'err']
445 for fanout in fanouts:
446 print("fanout", fanout, getattr(sram.bus, fanout).shape(),
447 getattr(dbus, fanout).shape())
448 comb += getattr(sram.bus, fanout).eq(getattr(dbus, fanout))
449 comb += getattr(sram.bus, fanout).eq(getattr(dbus, fanout))
450 for fanin in fanins:
451 comb += getattr(dbus, fanin).eq(getattr(sram.bus, fanin))
452 # connect address
453 comb += sram.bus.adr.eq(dbus.adr)
454
455 return m
456