loadstore: add done_delay
[soc.git] / src / soc / fu / ldst / loadstore.py
1 """LoadStore1 FSM.
2
3 based on microwatt loadstore1.vhdl, but conforming to PortInterface.
4 unlike loadstore1.vhdl this does *not* deal with actual Load/Store
5 ops: that job is handled by LDSTCompUnit, which talks to LoadStore1
6 by way of PortInterface. PortInterface is where things need extending,
7 such as adding dcbz support, etc.
8
9 this module basically handles "pure" load / store operations, and
10 its first job is to ask the D-Cache for the data. if that fails,
11 the second task (if virtual memory is enabled) is to ask the MMU
12 to perform a TLB, then to go *back* to the cache and ask again.
13
14 Links:
15
16 * https://bugs.libre-soc.org/show_bug.cgi?id=465
17
18 """
19
20 from nmigen import (Elaboratable, Module, Signal, Shape, unsigned, Cat, Mux,
21 Record, Memory,
22 Const)
23 from nmutil.iocontrol import RecordObject
24 from nmutil.util import rising_edge, Display
25 from enum import Enum, unique
26
27 from soc.experiment.dcache import DCache
28 from soc.experiment.pimem import PortInterfaceBase
29 from soc.experiment.mem_types import LoadStore1ToMMUType
30 from soc.experiment.mem_types import MMUToLoadStore1Type
31
32 from soc.minerva.wishbone import make_wb_layout
33 from soc.bus.sram import SRAM
34 from nmutil.util import Display
35
36
37 @unique
38 class State(Enum):
39 IDLE = 0 # ready for instruction
40 ACK_WAIT = 1 # waiting for ack from dcache
41 MMU_LOOKUP = 2 # waiting for MMU to look up translation
42 TLBIE_WAIT = 3 # waiting for MMU to finish doing a tlbie
43
44
45 # captures the LDSTRequest from the PortInterface, which "blips" most
46 # of this at us (pipeline-style).
47 class LDSTRequest(RecordObject):
48 def __init__(self, name=None):
49 RecordObject.__init__(self, name=name)
50
51 self.load = Signal()
52 self.dcbz = Signal()
53 self.addr = Signal(64)
54 # self.store_data = Signal(64) # this is already sync (on a delay)
55 self.byte_sel = Signal(8)
56 self.nc = Signal() # non-cacheable access
57 self.virt_mode = Signal()
58 self.priv_mode = Signal()
59 self.align_intr = Signal()
60
61 # glue logic for microwatt mmu and dcache
62 class LoadStore1(PortInterfaceBase):
63 def __init__(self, pspec):
64 self.pspec = pspec
65 self.disable_cache = (hasattr(pspec, "disable_cache") and
66 pspec.disable_cache == True)
67 regwid = pspec.reg_wid
68 addrwid = pspec.addr_wid
69
70 super().__init__(regwid, addrwid)
71 self.dcache = DCache()
72 # these names are from the perspective of here (LoadStore1)
73 self.d_out = self.dcache.d_in # in to dcache is out for LoadStore
74 self.d_in = self.dcache.d_out # out from dcache is in for LoadStore
75 self.m_out = LoadStore1ToMMUType() # out *to* MMU
76 self.m_in = MMUToLoadStore1Type() # in *from* MMU
77 self.req = LDSTRequest(name="ldst_req")
78
79 # TODO, convert dcache wb_in/wb_out to "standard" nmigen Wishbone bus
80 self.dbus = Record(make_wb_layout(pspec))
81
82 # for creating a single clock blip to DCache
83 self.d_valid = Signal()
84 self.d_w_valid = Signal()
85 self.d_validblip = Signal()
86
87 # state info for LD/ST
88 self.done = Signal()
89 self.done_delay = Signal()
90 # latch most of the input request
91 self.load = Signal()
92 self.tlbie = Signal()
93 self.dcbz = Signal()
94 self.addr = Signal(64)
95 self.store_data = Signal(64)
96 self.load_data = Signal(64)
97 self.byte_sel = Signal(8)
98 #self.xerc : xer_common_t;
99 #self.reserve = Signal()
100 #self.atomic = Signal()
101 #self.atomic_last = Signal()
102 #self.rc = Signal()
103 self.nc = Signal() # non-cacheable access
104 self.virt_mode = Signal()
105 self.priv_mode = Signal()
106 self.state = Signal(State)
107 self.instr_fault = Signal()
108 self.align_intr = Signal()
109 self.busy = Signal()
110 self.wait_dcache = Signal()
111 self.wait_mmu = Signal()
112 #self.mode_32bit = Signal()
113 #self.intr_vec : integer range 0 to 16#fff#;
114 #self.nia = Signal(64)
115 #self.srr1 = Signal(16)
116
117 def set_wr_addr(self, m, addr, mask, misalign, msr_pr, is_dcbz):
118 m.d.comb += self.req.load.eq(0) # store operation
119 m.d.comb += self.req.byte_sel.eq(mask)
120 m.d.comb += self.req.addr.eq(addr)
121 m.d.comb += self.req.priv_mode.eq(~msr_pr) # not-problem ==> priv
122 m.d.comb += self.req.virt_mode.eq(msr_pr) # problem-state ==> virt
123 m.d.comb += self.req.align_intr.eq(misalign)
124 m.d.comb += self.req.dcbz.eq(is_dcbz)
125
126 # m.d.comb += Display("set_wr_addr %i dcbz %i",addr,is_dcbz)
127
128 # option to disable the cache entirely for write
129 if self.disable_cache:
130 m.d.comb += self.req.nc.eq(1)
131 return None
132
133 def set_rd_addr(self, m, addr, mask, misalign, msr_pr):
134 m.d.comb += self.d_valid.eq(1)
135 m.d.comb += self.req.load.eq(1) # load operation
136 m.d.comb += self.req.byte_sel.eq(mask)
137 m.d.comb += self.req.align_intr.eq(misalign)
138 m.d.comb += self.req.addr.eq(addr)
139 m.d.comb += self.req.priv_mode.eq(~msr_pr) # not-problem ==> priv
140 m.d.comb += self.req.virt_mode.eq(msr_pr) # problem-state ==> virt
141 # BAD HACK! disable cacheing on LD when address is 0xCxxx_xxxx
142 # this is for peripherals. same thing done in Microwatt loadstore1.vhdl
143 with m.If(addr[28:] == Const(0xc, 4)):
144 m.d.comb += self.req.nc.eq(1)
145 # option to disable the cache entirely for read
146 if self.disable_cache:
147 m.d.comb += self.req.nc.eq(1)
148 return None #FIXME return value
149
150 def set_wr_data(self, m, data, wen):
151 # do the "blip" on write data
152 m.d.comb += self.d_valid.eq(1)
153 # put data into comb which is picked up in main elaborate()
154 m.d.comb += self.d_w_valid.eq(1)
155 m.d.comb += self.store_data.eq(data)
156 #m.d.sync += self.d_out.byte_sel.eq(wen) # this might not be needed
157 st_ok = self.done # TODO indicates write data is valid
158 return st_ok
159
160 def get_rd_data(self, m):
161 ld_ok = self.done_delay # indicates read data is valid
162 data = self.load_data # actual read data
163 return data, ld_ok
164
165 def elaborate(self, platform):
166 m = super().elaborate(platform)
167 comb, sync = m.d.comb, m.d.sync
168
169 sync += self.done_delay.eq(self.done)
170
171 # create dcache module
172 m.submodules.dcache = dcache = self.dcache
173
174 # temp vars
175 d_out, d_in, dbus = self.d_out, self.d_in, self.dbus
176 m_out, m_in = self.m_out, self.m_in
177 exc = self.pi.exc_o
178 exception = exc.happened
179 mmureq = Signal()
180
181 # copy of address, but gets over-ridden for OP_FETCH_FAILED
182 maddr = Signal(64)
183 m.d.comb += maddr.eq(self.addr)
184
185 # create a blip (single pulse) on valid read/write request
186 # this can be over-ridden in the FSM to get dcache to re-run
187 # a request when MMU_LOOKUP completes.
188 m.d.comb += self.d_validblip.eq(rising_edge(m, self.d_valid))
189 ldst_r = LDSTRequest("ldst_r")
190 comb += Display("MMUTEST: LoadStore1 d_in.error=%i",d_in.error)
191
192 # fsm skeleton
193 with m.Switch(self.state):
194 with m.Case(State.IDLE):
195 with m.If(self.d_validblip & ~exc.happened):
196 comb += self.busy.eq(1)
197 sync += self.state.eq(State.ACK_WAIT)
198 sync += ldst_r.eq(self.req) # copy of LDSTRequest on "blip"
199 # sync += Display("validblip self.req.virt_mode=%i",
200 # self.req.virt_mode)
201 with m.Else():
202 sync += ldst_r.eq(0)
203
204 # waiting for completion
205 with m.Case(State.ACK_WAIT):
206 comb += Display("MMUTEST: ACK_WAIT")
207 comb += self.busy.eq(~exc.happened)
208
209 with m.If(d_in.error):
210 # cache error is not necessarily "final", it could
211 # be that it was just a TLB miss
212 with m.If(d_in.cache_paradox):
213 comb += exception.eq(1)
214 sync += self.state.eq(State.IDLE)
215 sync += ldst_r.eq(0)
216 sync += Display("cache error -> update dsisr")
217 #sync += self.dsisr[63 - 38].eq(~self.load)
218 # XXX there is no architected bit for this
219 # (probably should be a machine check in fact)
220 #sync += self.dsisr[63 - 35].eq(d_in.cache_paradox)
221
222 with m.Else():
223 # Look up the translation for TLB miss
224 # and also for permission error and RC error
225 # in case the PTE has been updated.
226 comb += mmureq.eq(1)
227 sync += self.state.eq(State.MMU_LOOKUP)
228 with m.If(d_in.valid):
229 m.d.comb += self.done.eq(~mmureq) # done if not doing MMU
230 with m.If(self.done):
231 sync += Display("ACK_WAIT, done %x", self.addr)
232 sync += self.state.eq(State.IDLE)
233 sync += ldst_r.eq(0)
234 with m.If(self.load):
235 m.d.comb += self.load_data.eq(d_in.data)
236
237 # waiting here for the MMU TLB lookup to complete.
238 # either re-try the dcache lookup or throw MMU exception
239 with m.Case(State.MMU_LOOKUP):
240 comb += self.busy.eq(1)
241 with m.If(m_in.done):
242 with m.If(~self.instr_fault):
243 sync += Display("MMU_LOOKUP, done %x -> %x",
244 self.addr, d_out.addr)
245 # retry the request now that the MMU has
246 # installed a TLB entry, if not exception raised
247 m.d.comb += self.d_out.valid.eq(~exception)
248 sync += self.state.eq(State.ACK_WAIT)
249 sync += ldst_r.eq(0)
250 with m.Else():
251 sync += Display("MMU_LOOKUP, exception %x", self.addr)
252 # instruction lookup fault: store address in DAR
253 comb += exc.happened.eq(1) # reason = MMU_LOOKUP
254 # mark dar as updated ?
255 comb += self.pi.dar_o.eq(self.addr)
256 sync += self.state.eq(State.IDLE)
257
258 with m.If(m_in.err):
259 # MMU RADIX exception thrown
260 comb += exception.eq(1)
261 sync += Display("MMU RADIX exception thrown")
262 #sync += self.dsisr[63 - 33].eq(m_in.invalid)
263 #sync += self.dsisr[63 - 36].eq(m_in.perm_error)
264 #sync += self.dsisr[63 - 38].eq(self.load)
265 #sync += self.dsisr[63 - 44].eq(m_in.badtree)
266 #sync += self.dsisr[63 - 45].eq(m_in.rc_error)
267 sync += self.state.eq(State.IDLE)
268
269 with m.Case(State.TLBIE_WAIT):
270 pass
271
272 # alignment error: store address in DAR
273 with m.If(self.align_intr):
274 comb += exc.happened.eq(1) # reason = alignment
275 sync += Display("alignment error: store addr in DAR %x", self.addr)
276 comb += self.pi.dar_o.eq(self.addr)
277
278 # happened, alignment, instr_fault, invalid.
279 # note that all of these flow through - eventually to the TRAP
280 # pipeline, via PowerDecoder2.
281 comb += exc.invalid.eq(m_in.invalid)
282 comb += exc.alignment.eq(self.align_intr)
283 comb += exc.instr_fault.eq(self.instr_fault)
284 # badtree, perm_error, rc_error, segment_fault
285 comb += exc.badtree.eq(m_in.badtree)
286 comb += exc.perm_error.eq(m_in.perm_error)
287 comb += exc.rc_error.eq(m_in.rc_error)
288 comb += exc.segment_fault.eq(m_in.segerr)
289
290 # TODO, connect dcache wb_in/wb_out to "standard" nmigen Wishbone bus
291 comb += dbus.adr.eq(dcache.wb_out.adr)
292 comb += dbus.dat_w.eq(dcache.wb_out.dat)
293 comb += dbus.sel.eq(dcache.wb_out.sel)
294 comb += dbus.cyc.eq(dcache.wb_out.cyc)
295 comb += dbus.stb.eq(dcache.wb_out.stb)
296 comb += dbus.we.eq(dcache.wb_out.we)
297
298 comb += dcache.wb_in.dat.eq(dbus.dat_r)
299 comb += dcache.wb_in.ack.eq(dbus.ack)
300 if hasattr(dbus, "stall"):
301 comb += dcache.wb_in.stall.eq(dbus.stall)
302
303 # update out d data when flag set
304 with m.If(self.d_w_valid):
305 m.d.sync += d_out.data.eq(self.store_data)
306 #with m.Else():
307 # m.d.sync += d_out.data.eq(0)
308 # unit test passes with that change
309
310 # this must move into the FSM, conditionally noticing that
311 # the "blip" comes from self.d_validblip.
312 # task 1: look up in dcache
313 # task 2: if dcache fails, look up in MMU.
314 # do **NOT** confuse the two.
315 with m.If(self.d_validblip):
316 m.d.comb += self.d_out.valid.eq(~exc.happened)
317 m.d.comb += d_out.load.eq(self.req.load)
318 m.d.comb += d_out.byte_sel.eq(self.req.byte_sel)
319 m.d.comb += self.addr.eq(self.req.addr)
320 m.d.comb += d_out.nc.eq(self.req.nc)
321 m.d.comb += d_out.priv_mode.eq(self.req.priv_mode)
322 m.d.comb += d_out.virt_mode.eq(self.req.virt_mode)
323 m.d.comb += self.align_intr.eq(self.req.align_intr)
324 #m.d.comb += Display("validblip dcbz=%i addr=%x",
325 #self.req.dcbz,self.req.addr)
326 m.d.comb += d_out.dcbz.eq(self.req.dcbz)
327 with m.Else():
328 m.d.comb += d_out.load.eq(ldst_r.load)
329 m.d.comb += d_out.byte_sel.eq(ldst_r.byte_sel)
330 m.d.comb += self.addr.eq(ldst_r.addr)
331 m.d.comb += d_out.nc.eq(ldst_r.nc)
332 m.d.comb += d_out.priv_mode.eq(ldst_r.priv_mode)
333 m.d.comb += d_out.virt_mode.eq(ldst_r.virt_mode)
334 m.d.comb += self.align_intr.eq(ldst_r.align_intr)
335 #m.d.comb += Display("no_validblip dcbz=%i addr=%x",
336 #ldst_r.dcbz,ldst_r.addr)
337 m.d.comb += d_out.dcbz.eq(ldst_r.dcbz)
338
339 # XXX these should be possible to remove but for some reason
340 # cannot be... yet. TODO, investigate
341 m.d.comb += self.load_data.eq(d_in.data)
342 m.d.comb += d_out.addr.eq(self.addr)
343
344 # Update outputs to MMU
345 m.d.comb += m_out.valid.eq(mmureq)
346 m.d.comb += m_out.iside.eq(self.instr_fault)
347 m.d.comb += m_out.load.eq(ldst_r.load)
348 # m_out.priv <= r.priv_mode; TODO
349 m.d.comb += m_out.tlbie.eq(self.tlbie)
350 # m_out.mtspr <= mmu_mtspr; # TODO
351 # m_out.sprn <= sprn; # TODO
352 m.d.comb += m_out.addr.eq(maddr)
353 # m_out.slbia <= l_in.insn(7); # TODO: no idea what this is
354 # m_out.rs <= l_in.data; # nope, probably not needed, TODO investigate
355
356 return m
357
358 def ports(self):
359 yield from super().ports()
360 # TODO: memory ports
361
362
363 class TestSRAMLoadStore1(LoadStore1):
364 def __init__(self, pspec):
365 super().__init__(pspec)
366 pspec = self.pspec
367 # small 32-entry Memory
368 if (hasattr(pspec, "dmem_test_depth") and
369 isinstance(pspec.dmem_test_depth, int)):
370 depth = pspec.dmem_test_depth
371 else:
372 depth = 32
373 print("TestSRAMBareLoadStoreUnit depth", depth)
374
375 self.mem = Memory(width=pspec.reg_wid, depth=depth)
376
377 def elaborate(self, platform):
378 m = super().elaborate(platform)
379 comb = m.d.comb
380 m.submodules.sram = sram = SRAM(memory=self.mem, granularity=8,
381 features={'cti', 'bte', 'err'})
382 dbus = self.dbus
383
384 # directly connect the wishbone bus of LoadStoreUnitInterface to SRAM
385 # note: SRAM is a target (slave), dbus is initiator (master)
386 fanouts = ['dat_w', 'sel', 'cyc', 'stb', 'we', 'cti', 'bte']
387 fanins = ['dat_r', 'ack', 'err']
388 for fanout in fanouts:
389 print("fanout", fanout, getattr(sram.bus, fanout).shape(),
390 getattr(dbus, fanout).shape())
391 comb += getattr(sram.bus, fanout).eq(getattr(dbus, fanout))
392 comb += getattr(sram.bus, fanout).eq(getattr(dbus, fanout))
393 for fanin in fanins:
394 comb += getattr(dbus, fanin).eq(getattr(sram.bus, fanin))
395 # connect address
396 comb += sram.bus.adr.eq(dbus.adr)
397
398 return m
399