missed setting of r0_full to zero in dcache. not encountered as
[soc.git] / src / soc / fu / ldst / loadstore.py
1 """LoadStore1 FSM.
2
3 based on microwatt loadstore1.vhdl, but conforming to PortInterface.
4 unlike loadstore1.vhdl this does *not* deal with actual Load/Store
5 ops: that job is handled by LDSTCompUnit, which talks to LoadStore1
6 by way of PortInterface. PortInterface is where things need extending,
7 such as adding dcbz support, etc.
8
9 this module basically handles "pure" load / store operations, and
10 its first job is to ask the D-Cache for the data. if that fails,
11 the second task (if virtual memory is enabled) is to ask the MMU
12 to perform a TLB, then to go *back* to the cache and ask again.
13
14 Links:
15
16 * https://bugs.libre-soc.org/show_bug.cgi?id=465
17
18 """
19
20 from nmigen import (Elaboratable, Module, Signal, Shape, unsigned, Cat, Mux,
21 Record, Memory,
22 Const)
23 from nmutil.iocontrol import RecordObject
24 from nmutil.util import rising_edge, Display
25 from enum import Enum, unique
26
27 from soc.experiment.dcache import DCache
28 from soc.experiment.pimem import PortInterfaceBase
29 from soc.experiment.mem_types import LoadStore1ToMMUType
30 from soc.experiment.mem_types import MMUToLoadStore1Type
31
32 from soc.minerva.wishbone import make_wb_layout
33 from soc.bus.sram import SRAM
34 from nmutil.util import Display
35
36
37 @unique
38 class State(Enum):
39 IDLE = 0 # ready for instruction
40 ACK_WAIT = 1 # waiting for ack from dcache
41 MMU_LOOKUP = 2 # waiting for MMU to look up translation
42 TLBIE_WAIT = 3 # waiting for MMU to finish doing a tlbie
43
44
45 # captures the LDSTRequest from the PortInterface, which "blips" most
46 # of this at us (pipeline-style).
47 class LDSTRequest(RecordObject):
48 def __init__(self, name=None):
49 RecordObject.__init__(self, name=name)
50
51 self.load = Signal()
52 self.dcbz = Signal()
53 self.addr = Signal(64)
54 # self.store_data = Signal(64) # this is already sync (on a delay)
55 self.byte_sel = Signal(8)
56 self.nc = Signal() # non-cacheable access
57 self.virt_mode = Signal()
58 self.priv_mode = Signal()
59 self.align_intr = Signal()
60
61 # glue logic for microwatt mmu and dcache
62 class LoadStore1(PortInterfaceBase):
63 def __init__(self, pspec):
64 self.pspec = pspec
65 self.disable_cache = (hasattr(pspec, "disable_cache") and
66 pspec.disable_cache == True)
67 regwid = pspec.reg_wid
68 addrwid = pspec.addr_wid
69
70 super().__init__(regwid, addrwid)
71 self.dcache = DCache()
72 # these names are from the perspective of here (LoadStore1)
73 self.d_out = self.dcache.d_in # in to dcache is out for LoadStore
74 self.d_in = self.dcache.d_out # out from dcache is in for LoadStore
75 self.m_out = LoadStore1ToMMUType() # out *to* MMU
76 self.m_in = MMUToLoadStore1Type() # in *from* MMU
77 self.req = LDSTRequest(name="ldst_req")
78
79 # TODO, convert dcache wb_in/wb_out to "standard" nmigen Wishbone bus
80 self.dbus = Record(make_wb_layout(pspec))
81
82 # for creating a single clock blip to DCache
83 self.d_valid = Signal()
84 self.d_w_valid = Signal()
85 self.d_validblip = Signal()
86
87 # state info for LD/ST
88 self.done = Signal()
89 self.done_delay = Signal()
90 # latch most of the input request
91 self.load = Signal()
92 self.tlbie = Signal()
93 self.dcbz = Signal()
94 self.addr = Signal(64)
95 self.store_data = Signal(64)
96 self.load_data = Signal(64)
97 self.load_data_delay = Signal(64)
98 self.byte_sel = Signal(8)
99 #self.xerc : xer_common_t;
100 #self.reserve = Signal()
101 #self.atomic = Signal()
102 #self.atomic_last = Signal()
103 #self.rc = Signal()
104 self.nc = Signal() # non-cacheable access
105 self.virt_mode = Signal()
106 self.priv_mode = Signal()
107 self.state = Signal(State)
108 self.instr_fault = Signal()
109 self.align_intr = Signal()
110 self.busy = Signal()
111 self.wait_dcache = Signal()
112 self.wait_mmu = Signal()
113 #self.mode_32bit = Signal()
114 #self.intr_vec : integer range 0 to 16#fff#;
115 #self.nia = Signal(64)
116 #self.srr1 = Signal(16)
117
118 def set_wr_addr(self, m, addr, mask, misalign, msr_pr, is_dcbz):
119 m.d.comb += self.req.load.eq(0) # store operation
120 m.d.comb += self.req.byte_sel.eq(mask)
121 m.d.comb += self.req.addr.eq(addr)
122 m.d.comb += self.req.priv_mode.eq(~msr_pr) # not-problem ==> priv
123 m.d.comb += self.req.virt_mode.eq(msr_pr) # problem-state ==> virt
124 m.d.comb += self.req.align_intr.eq(misalign)
125 m.d.comb += self.req.dcbz.eq(is_dcbz)
126
127 # m.d.comb += Display("set_wr_addr %i dcbz %i",addr,is_dcbz)
128
129 # option to disable the cache entirely for write
130 if self.disable_cache:
131 m.d.comb += self.req.nc.eq(1)
132 return None
133
134 def set_rd_addr(self, m, addr, mask, misalign, msr_pr):
135 m.d.comb += self.d_valid.eq(1)
136 m.d.comb += self.req.load.eq(1) # load operation
137 m.d.comb += self.req.byte_sel.eq(mask)
138 m.d.comb += self.req.align_intr.eq(misalign)
139 m.d.comb += self.req.addr.eq(addr)
140 m.d.comb += self.req.priv_mode.eq(~msr_pr) # not-problem ==> priv
141 m.d.comb += self.req.virt_mode.eq(msr_pr) # problem-state ==> virt
142 # BAD HACK! disable cacheing on LD when address is 0xCxxx_xxxx
143 # this is for peripherals. same thing done in Microwatt loadstore1.vhdl
144 with m.If(addr[28:] == Const(0xc, 4)):
145 m.d.comb += self.req.nc.eq(1)
146 # option to disable the cache entirely for read
147 if self.disable_cache:
148 m.d.comb += self.req.nc.eq(1)
149 return None #FIXME return value
150
151 def set_wr_data(self, m, data, wen):
152 # do the "blip" on write data
153 m.d.comb += self.d_valid.eq(1)
154 # put data into comb which is picked up in main elaborate()
155 m.d.comb += self.d_w_valid.eq(1)
156 m.d.comb += self.store_data.eq(data)
157 #m.d.sync += self.d_out.byte_sel.eq(wen) # this might not be needed
158 st_ok = self.done # TODO indicates write data is valid
159 return st_ok
160
161 def get_rd_data(self, m):
162 ld_ok = self.done_delay # indicates read data is valid
163 data = self.load_data_delay # actual read data
164 return data, ld_ok
165
166 def elaborate(self, platform):
167 m = super().elaborate(platform)
168 comb, sync = m.d.comb, m.d.sync
169
170 # microwatt takes one more cycle before next operation can be issued
171 sync += self.done_delay.eq(self.done)
172 sync += self.load_data_delay.eq(self.load_data)
173
174 # create dcache module
175 m.submodules.dcache = dcache = self.dcache
176
177 # temp vars
178 d_out, d_in, dbus = self.d_out, self.d_in, self.dbus
179 m_out, m_in = self.m_out, self.m_in
180 exc = self.pi.exc_o
181 exception = exc.happened
182 mmureq = Signal()
183
184 # copy of address, but gets over-ridden for OP_FETCH_FAILED
185 maddr = Signal(64)
186 m.d.comb += maddr.eq(self.addr)
187
188 # create a blip (single pulse) on valid read/write request
189 # this can be over-ridden in the FSM to get dcache to re-run
190 # a request when MMU_LOOKUP completes.
191 m.d.comb += self.d_validblip.eq(rising_edge(m, self.d_valid))
192 ldst_r = LDSTRequest("ldst_r")
193 comb += Display("MMUTEST: LoadStore1 d_in.error=%i",d_in.error)
194
195 # fsm skeleton
196 with m.Switch(self.state):
197 with m.Case(State.IDLE):
198 with m.If(self.d_validblip & ~exc.happened):
199 comb += self.busy.eq(1)
200 sync += self.state.eq(State.ACK_WAIT)
201 sync += ldst_r.eq(self.req) # copy of LDSTRequest on "blip"
202 # sync += Display("validblip self.req.virt_mode=%i",
203 # self.req.virt_mode)
204 with m.Else():
205 sync += ldst_r.eq(0)
206
207 # waiting for completion
208 with m.Case(State.ACK_WAIT):
209 comb += Display("MMUTEST: ACK_WAIT")
210 comb += self.busy.eq(~exc.happened)
211
212 with m.If(d_in.error):
213 # cache error is not necessarily "final", it could
214 # be that it was just a TLB miss
215 with m.If(d_in.cache_paradox):
216 comb += exception.eq(1)
217 sync += self.state.eq(State.IDLE)
218 sync += ldst_r.eq(0)
219 sync += Display("cache error -> update dsisr")
220 #sync += self.dsisr[63 - 38].eq(~self.load)
221 # XXX there is no architected bit for this
222 # (probably should be a machine check in fact)
223 #sync += self.dsisr[63 - 35].eq(d_in.cache_paradox)
224
225 with m.Else():
226 # Look up the translation for TLB miss
227 # and also for permission error and RC error
228 # in case the PTE has been updated.
229 comb += mmureq.eq(1)
230 sync += self.state.eq(State.MMU_LOOKUP)
231 with m.If(d_in.valid):
232 m.d.comb += self.done.eq(~mmureq) # done if not doing MMU
233 with m.If(self.done):
234 sync += Display("ACK_WAIT, done %x", self.addr)
235 sync += self.state.eq(State.IDLE)
236 sync += ldst_r.eq(0)
237 with m.If(self.load):
238 m.d.comb += self.load_data.eq(d_in.data)
239
240 # waiting here for the MMU TLB lookup to complete.
241 # either re-try the dcache lookup or throw MMU exception
242 with m.Case(State.MMU_LOOKUP):
243 comb += self.busy.eq(1)
244 with m.If(m_in.done):
245 with m.If(~self.instr_fault):
246 sync += Display("MMU_LOOKUP, done %x -> %x",
247 self.addr, d_out.addr)
248 # retry the request now that the MMU has
249 # installed a TLB entry, if not exception raised
250 m.d.comb += self.d_out.valid.eq(~exception)
251 sync += self.state.eq(State.ACK_WAIT)
252 sync += ldst_r.eq(0)
253 with m.Else():
254 sync += Display("MMU_LOOKUP, exception %x", self.addr)
255 # instruction lookup fault: store address in DAR
256 comb += exc.happened.eq(1) # reason = MMU_LOOKUP
257 # mark dar as updated ?
258 comb += self.pi.dar_o.eq(self.addr)
259 sync += self.state.eq(State.IDLE)
260
261 with m.If(m_in.err):
262 # MMU RADIX exception thrown
263 comb += exception.eq(1)
264 sync += Display("MMU RADIX exception thrown")
265 #sync += self.dsisr[63 - 33].eq(m_in.invalid)
266 #sync += self.dsisr[63 - 36].eq(m_in.perm_error)
267 #sync += self.dsisr[63 - 38].eq(self.load)
268 #sync += self.dsisr[63 - 44].eq(m_in.badtree)
269 #sync += self.dsisr[63 - 45].eq(m_in.rc_error)
270 sync += self.state.eq(State.IDLE)
271
272 with m.Case(State.TLBIE_WAIT):
273 pass
274
275 # alignment error: store address in DAR
276 with m.If(self.align_intr):
277 comb += exc.happened.eq(1) # reason = alignment
278 sync += Display("alignment error: store addr in DAR %x", self.addr)
279 comb += self.pi.dar_o.eq(self.addr)
280
281 # when done or exception, return to idle state
282 with m.If(self.done | exception):
283 sync += self.state.eq(State.IDLE)
284 comb += self.busy.eq(0)
285
286 # happened, alignment, instr_fault, invalid.
287 # note that all of these flow through - eventually to the TRAP
288 # pipeline, via PowerDecoder2.
289 comb += self.align_intr.eq(self.req.align_intr)
290 comb += exc.invalid.eq(m_in.invalid)
291 comb += exc.alignment.eq(self.align_intr)
292 comb += exc.instr_fault.eq(self.instr_fault)
293 # badtree, perm_error, rc_error, segment_fault
294 comb += exc.badtree.eq(m_in.badtree)
295 comb += exc.perm_error.eq(m_in.perm_error)
296 comb += exc.rc_error.eq(m_in.rc_error)
297 comb += exc.segment_fault.eq(m_in.segerr)
298
299 # TODO, connect dcache wb_in/wb_out to "standard" nmigen Wishbone bus
300 comb += dbus.adr.eq(dcache.wb_out.adr)
301 comb += dbus.dat_w.eq(dcache.wb_out.dat)
302 comb += dbus.sel.eq(dcache.wb_out.sel)
303 comb += dbus.cyc.eq(dcache.wb_out.cyc)
304 comb += dbus.stb.eq(dcache.wb_out.stb)
305 comb += dbus.we.eq(dcache.wb_out.we)
306
307 comb += dcache.wb_in.dat.eq(dbus.dat_r)
308 comb += dcache.wb_in.ack.eq(dbus.ack)
309 if hasattr(dbus, "stall"):
310 comb += dcache.wb_in.stall.eq(dbus.stall)
311
312 # update out d data when flag set
313 with m.If(self.d_w_valid):
314 m.d.sync += d_out.data.eq(self.store_data)
315 #with m.Else():
316 # m.d.sync += d_out.data.eq(0)
317 # unit test passes with that change
318
319 # this must move into the FSM, conditionally noticing that
320 # the "blip" comes from self.d_validblip.
321 # task 1: look up in dcache
322 # task 2: if dcache fails, look up in MMU.
323 # do **NOT** confuse the two.
324 with m.If(self.d_validblip):
325 m.d.comb += self.d_out.valid.eq(~exc.happened)
326 m.d.comb += d_out.load.eq(self.req.load)
327 m.d.comb += d_out.byte_sel.eq(self.req.byte_sel)
328 m.d.comb += self.addr.eq(self.req.addr)
329 m.d.comb += d_out.nc.eq(self.req.nc)
330 m.d.comb += d_out.priv_mode.eq(self.req.priv_mode)
331 m.d.comb += d_out.virt_mode.eq(self.req.virt_mode)
332 #m.d.comb += Display("validblip dcbz=%i addr=%x",
333 #self.req.dcbz,self.req.addr)
334 m.d.comb += d_out.dcbz.eq(self.req.dcbz)
335 with m.Else():
336 m.d.comb += d_out.load.eq(ldst_r.load)
337 m.d.comb += d_out.byte_sel.eq(ldst_r.byte_sel)
338 m.d.comb += self.addr.eq(ldst_r.addr)
339 m.d.comb += d_out.nc.eq(ldst_r.nc)
340 m.d.comb += d_out.priv_mode.eq(ldst_r.priv_mode)
341 m.d.comb += d_out.virt_mode.eq(ldst_r.virt_mode)
342 #m.d.comb += Display("no_validblip dcbz=%i addr=%x",
343 #ldst_r.dcbz,ldst_r.addr)
344 m.d.comb += d_out.dcbz.eq(ldst_r.dcbz)
345
346 # XXX these should be possible to remove but for some reason
347 # cannot be... yet. TODO, investigate
348 m.d.comb += self.load_data.eq(d_in.data)
349 m.d.comb += d_out.addr.eq(self.addr)
350
351 # Update outputs to MMU
352 m.d.comb += m_out.valid.eq(mmureq)
353 m.d.comb += m_out.iside.eq(self.instr_fault)
354 m.d.comb += m_out.load.eq(ldst_r.load)
355 # m_out.priv <= r.priv_mode; TODO
356 m.d.comb += m_out.tlbie.eq(self.tlbie)
357 # m_out.mtspr <= mmu_mtspr; # TODO
358 # m_out.sprn <= sprn; # TODO
359 m.d.comb += m_out.addr.eq(maddr)
360 # m_out.slbia <= l_in.insn(7); # TODO: no idea what this is
361 # m_out.rs <= l_in.data; # nope, probably not needed, TODO investigate
362
363 return m
364
365 def ports(self):
366 yield from super().ports()
367 # TODO: memory ports
368
369
370 class TestSRAMLoadStore1(LoadStore1):
371 def __init__(self, pspec):
372 super().__init__(pspec)
373 pspec = self.pspec
374 # small 32-entry Memory
375 if (hasattr(pspec, "dmem_test_depth") and
376 isinstance(pspec.dmem_test_depth, int)):
377 depth = pspec.dmem_test_depth
378 else:
379 depth = 32
380 print("TestSRAMBareLoadStoreUnit depth", depth)
381
382 self.mem = Memory(width=pspec.reg_wid, depth=depth)
383
384 def elaborate(self, platform):
385 m = super().elaborate(platform)
386 comb = m.d.comb
387 m.submodules.sram = sram = SRAM(memory=self.mem, granularity=8,
388 features={'cti', 'bte', 'err'})
389 dbus = self.dbus
390
391 # directly connect the wishbone bus of LoadStoreUnitInterface to SRAM
392 # note: SRAM is a target (slave), dbus is initiator (master)
393 fanouts = ['dat_w', 'sel', 'cyc', 'stb', 'we', 'cti', 'bte']
394 fanins = ['dat_r', 'ack', 'err']
395 for fanout in fanouts:
396 print("fanout", fanout, getattr(sram.bus, fanout).shape(),
397 getattr(dbus, fanout).shape())
398 comb += getattr(sram.bus, fanout).eq(getattr(dbus, fanout))
399 comb += getattr(sram.bus, fanout).eq(getattr(dbus, fanout))
400 for fanin in fanins:
401 comb += getattr(dbus, fanin).eq(getattr(sram.bus, fanin))
402 # connect address
403 comb += sram.bus.adr.eq(dbus.adr)
404
405 return m
406