ldst: cleanup debug outputs
[soc.git] / src / soc / fu / ldst / loadstore.py
1 """LoadStore1 FSM.
2
3 based on microwatt loadstore1.vhdl, but conforming to PortInterface.
4 unlike loadstore1.vhdl this does *not* deal with actual Load/Store
5 ops: that job is handled by LDSTCompUnit, which talks to LoadStore1
6 by way of PortInterface. PortInterface is where things need extending,
7 such as adding dcbz support, etc.
8
9 this module basically handles "pure" load / store operations, and
10 its first job is to ask the D-Cache for the data. if that fails,
11 the second task (if virtual memory is enabled) is to ask the MMU
12 to perform a TLB, then to go *back* to the cache and ask again.
13
14 Links:
15
16 * https://bugs.libre-soc.org/show_bug.cgi?id=465
17
18 """
19
20 from nmigen import (Elaboratable, Module, Signal, Shape, unsigned, Cat, Mux,
21 Record, Memory,
22 Const)
23 from nmutil.iocontrol import RecordObject
24 from nmutil.util import rising_edge, Display
25 from enum import Enum, unique
26
27 from soc.experiment.dcache import DCache
28 from soc.experiment.pimem import PortInterfaceBase
29 from soc.experiment.mem_types import LoadStore1ToMMUType
30 from soc.experiment.mem_types import MMUToLoadStore1Type
31
32 from soc.minerva.wishbone import make_wb_layout
33 from soc.bus.sram import SRAM
34 from nmutil.util import Display
35
36
37 @unique
38 class State(Enum):
39 IDLE = 0 # ready for instruction
40 ACK_WAIT = 1 # waiting for ack from dcache
41 MMU_LOOKUP = 2 # waiting for MMU to look up translation
42 TLBIE_WAIT = 3 # waiting for MMU to finish doing a tlbie
43
44
45 # captures the LDSTRequest from the PortInterface, which "blips" most
46 # of this at us (pipeline-style).
47 class LDSTRequest(RecordObject):
48 def __init__(self, name=None):
49 RecordObject.__init__(self, name=name)
50
51 self.load = Signal()
52 self.dcbz = Signal()
53 self.addr = Signal(64)
54 # self.store_data = Signal(64) # this is already sync (on a delay)
55 self.byte_sel = Signal(8)
56 self.nc = Signal() # non-cacheable access
57 self.virt_mode = Signal()
58 self.priv_mode = Signal()
59 self.align_intr = Signal()
60
61 # glue logic for microwatt mmu and dcache
62 class LoadStore1(PortInterfaceBase):
63 def __init__(self, pspec):
64 self.pspec = pspec
65 self.disable_cache = (hasattr(pspec, "disable_cache") and
66 pspec.disable_cache == True)
67 regwid = pspec.reg_wid
68 addrwid = pspec.addr_wid
69
70 super().__init__(regwid, addrwid)
71 self.dcache = DCache()
72 # these names are from the perspective of here (LoadStore1)
73 self.d_out = self.dcache.d_in # in to dcache is out for LoadStore
74 self.d_in = self.dcache.d_out # out from dcache is in for LoadStore
75 self.m_out = LoadStore1ToMMUType() # out *to* MMU
76 self.m_in = MMUToLoadStore1Type() # in *from* MMU
77 self.req = LDSTRequest(name="ldst_req")
78
79 # TODO, convert dcache wb_in/wb_out to "standard" nmigen Wishbone bus
80 self.dbus = Record(make_wb_layout(pspec))
81
82 # for creating a single clock blip to DCache
83 self.d_valid = Signal()
84 self.d_w_valid = Signal()
85 self.d_validblip = Signal()
86
87 # DSISR and DAR cached values. note that the MMU FSM is where
88 # these are accessed by OP_MTSPR/OP_MFSPR, on behalf of LoadStore1.
89 # by contrast microwatt has the spr set/get done *in* loadstore1.vhdl
90 self.dsisr = Signal(64)
91 self.dar = Signal(64)
92
93 # state info for LD/ST
94 self.done = Signal()
95 # latch most of the input request
96 self.load = Signal()
97 self.tlbie = Signal()
98 self.dcbz = Signal()
99 self.addr = Signal(64)
100 self.store_data = Signal(64)
101 self.load_data = Signal(64)
102 self.byte_sel = Signal(8)
103 #self.xerc : xer_common_t;
104 #self.reserve = Signal()
105 #self.atomic = Signal()
106 #self.atomic_last = Signal()
107 #self.rc = Signal()
108 self.nc = Signal() # non-cacheable access
109 self.virt_mode = Signal()
110 self.priv_mode = Signal()
111 self.state = Signal(State)
112 self.instr_fault = Signal()
113 self.align_intr = Signal()
114 self.busy = Signal()
115 self.wait_dcache = Signal()
116 self.wait_mmu = Signal()
117 #self.mode_32bit = Signal()
118 #self.intr_vec : integer range 0 to 16#fff#;
119 #self.nia = Signal(64)
120 #self.srr1 = Signal(16)
121
122 def set_wr_addr(self, m, addr, mask, misalign, msr_pr):
123 m.d.comb += self.req.load.eq(0) # store operation
124 m.d.comb += self.req.byte_sel.eq(mask)
125 m.d.comb += self.req.addr.eq(addr)
126 m.d.comb += self.req.priv_mode.eq(~msr_pr) # not-problem ==> priv
127 m.d.comb += self.req.virt_mode.eq(msr_pr) # problem-state ==> virt
128 m.d.comb += self.req.align_intr.eq(misalign)
129
130 dcbz = self.pi.is_dcbz
131 with m.If(dcbz):
132 m.d.comb += Display("set_wr_addr: is_dcbz")
133 m.d.comb += self.req.dcbz.eq(dcbz)
134
135 # option to disable the cache entirely for write
136 if self.disable_cache:
137 m.d.comb += self.req.nc.eq(1)
138 return None
139
140 def set_rd_addr(self, m, addr, mask, misalign, msr_pr):
141 m.d.comb += self.d_valid.eq(1)
142 m.d.comb += self.req.load.eq(1) # load operation
143 m.d.comb += self.req.byte_sel.eq(mask)
144 m.d.comb += self.req.align_intr.eq(misalign)
145 m.d.comb += self.req.addr.eq(addr)
146 m.d.comb += self.req.priv_mode.eq(~msr_pr) # not-problem ==> priv
147 m.d.comb += self.req.virt_mode.eq(msr_pr) # problem-state ==> virt
148 # BAD HACK! disable cacheing on LD when address is 0xCxxx_xxxx
149 # this is for peripherals. same thing done in Microwatt loadstore1.vhdl
150 with m.If(addr[28:] == Const(0xc, 4)):
151 m.d.comb += self.req.nc.eq(1)
152 # option to disable the cache entirely for read
153 if self.disable_cache:
154 m.d.comb += self.req.nc.eq(1)
155 return None #FIXME return value
156
157 def set_wr_data(self, m, data, wen):
158 # do the "blip" on write data
159 m.d.comb += self.d_valid.eq(1)
160 # put data into comb which is picked up in main elaborate()
161 m.d.comb += self.d_w_valid.eq(1)
162 m.d.comb += self.store_data.eq(data)
163 #m.d.sync += self.d_out.byte_sel.eq(wen) # this might not be needed
164 st_ok = self.done # TODO indicates write data is valid
165 return st_ok
166
167 def get_rd_data(self, m):
168 ld_ok = self.done # indicates read data is valid
169 data = self.load_data # actual read data
170 return data, ld_ok
171
172 def elaborate(self, platform):
173 m = super().elaborate(platform)
174 comb, sync = m.d.comb, m.d.sync
175
176 # create dcache module
177 m.submodules.dcache = dcache = self.dcache
178
179 # temp vars
180 d_out, d_in, dbus = self.d_out, self.d_in, self.dbus
181 m_out, m_in = self.m_out, self.m_in
182 exc = self.pi.exc_o
183 exception = exc.happened
184 mmureq = Signal()
185
186 # copy of address, but gets over-ridden for OP_FETCH_FAILED
187 maddr = Signal(64)
188 m.d.comb += maddr.eq(self.addr)
189
190 # create a blip (single pulse) on valid read/write request
191 # this can be over-ridden in the FSM to get dcache to re-run
192 # a request when MMU_LOOKUP completes.
193 m.d.comb += self.d_validblip.eq(rising_edge(m, self.d_valid))
194 ldst_r = LDSTRequest("ldst_r")
195
196 # fsm skeleton
197 with m.Switch(self.state):
198 with m.Case(State.IDLE):
199 with m.If(self.d_validblip & ~exc.happened):
200 comb += self.busy.eq(1)
201 sync += self.state.eq(State.ACK_WAIT)
202 sync += ldst_r.eq(self.req) # copy of LDSTRequest on "blip"
203 with m.Else():
204 sync += ldst_r.eq(0)
205
206 # waiting for completion
207 with m.Case(State.ACK_WAIT):
208 comb += self.busy.eq(~exc.happened)
209
210 with m.If(d_in.error):
211 # cache error is not necessarily "final", it could
212 # be that it was just a TLB miss
213 with m.If(d_in.cache_paradox):
214 comb += exception.eq(1)
215 sync += self.state.eq(State.IDLE)
216 sync += ldst_r.eq(0)
217 sync += self.dsisr[63 - 38].eq(~self.load)
218 # XXX there is no architected bit for this
219 # (probably should be a machine check in fact)
220 sync += self.dsisr[63 - 35].eq(d_in.cache_paradox)
221
222 with m.Else():
223 # Look up the translation for TLB miss
224 # and also for permission error and RC error
225 # in case the PTE has been updated.
226 comb += mmureq.eq(1)
227 sync += self.state.eq(State.MMU_LOOKUP)
228 with m.If(d_in.valid):
229 m.d.comb += self.done.eq(~mmureq) # done if not doing MMU
230 with m.If(self.done):
231 sync += Display("ACK_WAIT, done %x", self.addr)
232 sync += self.state.eq(State.IDLE)
233 sync += ldst_r.eq(0)
234 with m.If(self.load):
235 m.d.comb += self.load_data.eq(d_in.data)
236
237 # waiting here for the MMU TLB lookup to complete.
238 # either re-try the dcache lookup or throw MMU exception
239 with m.Case(State.MMU_LOOKUP):
240 comb += self.busy.eq(1)
241 with m.If(m_in.done):
242 with m.If(~self.instr_fault):
243 sync += Display("MMU_LOOKUP, done %x -> %x",
244 self.addr, d_out.addr)
245 # retry the request now that the MMU has
246 # installed a TLB entry, if not exception raised
247 m.d.comb += self.d_out.valid.eq(~exception)
248 sync += self.state.eq(State.ACK_WAIT)
249 sync += ldst_r.eq(0)
250 with m.Else():
251 sync += Display("MMU_LOOKUP, exception %x", self.addr)
252 # instruction lookup fault: store address in DAR
253 comb += exc.happened.eq(1)
254 sync += self.dar.eq(self.addr)
255
256 with m.If(m_in.err):
257 # MMU RADIX exception thrown
258 comb += exception.eq(1)
259 sync += self.dsisr[63 - 33].eq(m_in.invalid)
260 sync += self.dsisr[63 - 36].eq(m_in.perm_error)
261 sync += self.dsisr[63 - 38].eq(self.load)
262 sync += self.dsisr[63 - 44].eq(m_in.badtree)
263 sync += self.dsisr[63 - 45].eq(m_in.rc_error)
264
265 with m.Case(State.TLBIE_WAIT):
266 pass
267
268 # alignment error: store address in DAR
269 with m.If(self.align_intr):
270 comb += exc.happened.eq(1)
271 sync += self.dar.eq(self.addr)
272
273 # happened, alignment, instr_fault, invalid.
274 # note that all of these flow through - eventually to the TRAP
275 # pipeline, via PowerDecoder2.
276 comb += exc.invalid.eq(m_in.invalid)
277 comb += exc.alignment.eq(self.align_intr)
278 comb += exc.instr_fault.eq(self.instr_fault)
279 # badtree, perm_error, rc_error, segment_fault
280 comb += exc.badtree.eq(m_in.badtree)
281 comb += exc.perm_error.eq(m_in.perm_error)
282 comb += exc.rc_error.eq(m_in.rc_error)
283 comb += exc.segment_fault.eq(m_in.segerr)
284
285 # TODO, connect dcache wb_in/wb_out to "standard" nmigen Wishbone bus
286 comb += dbus.adr.eq(dcache.wb_out.adr)
287 comb += dbus.dat_w.eq(dcache.wb_out.dat)
288 comb += dbus.sel.eq(dcache.wb_out.sel)
289 comb += dbus.cyc.eq(dcache.wb_out.cyc)
290 comb += dbus.stb.eq(dcache.wb_out.stb)
291 comb += dbus.we.eq(dcache.wb_out.we)
292
293 comb += dcache.wb_in.dat.eq(dbus.dat_r)
294 comb += dcache.wb_in.ack.eq(dbus.ack)
295 if hasattr(dbus, "stall"):
296 comb += dcache.wb_in.stall.eq(dbus.stall)
297
298 # update out d data when flag set
299 with m.If(self.d_w_valid):
300 m.d.sync += d_out.data.eq(self.store_data)
301 #with m.Else():
302 # m.d.sync += d_out.data.eq(0)
303 # unit test passes with that change
304
305 # this must move into the FSM, conditionally noticing that
306 # the "blip" comes from self.d_validblip.
307 # task 1: look up in dcache
308 # task 2: if dcache fails, look up in MMU.
309 # do **NOT** confuse the two.
310 with m.If(self.d_validblip):
311 m.d.comb += self.d_out.valid.eq(~exc.happened)
312 m.d.comb += d_out.load.eq(self.req.load)
313 m.d.comb += d_out.byte_sel.eq(self.req.byte_sel)
314 m.d.comb += self.addr.eq(self.req.addr)
315 m.d.comb += d_out.nc.eq(self.req.nc)
316 m.d.comb += d_out.priv_mode.eq(self.req.priv_mode)
317 m.d.comb += d_out.virt_mode.eq(self.req.virt_mode)
318 m.d.comb += self.align_intr.eq(self.req.align_intr)
319 #m.d.comb += Display("validblip dcbz=%i addr=%x",self.req.dcbz,self.req.addr)
320 m.d.comb += d_out.dcbz.eq(self.req.dcbz)
321 with m.Else():
322 m.d.comb += d_out.load.eq(ldst_r.load)
323 m.d.comb += d_out.byte_sel.eq(ldst_r.byte_sel)
324 m.d.comb += self.addr.eq(ldst_r.addr)
325 m.d.comb += d_out.nc.eq(ldst_r.nc)
326 m.d.comb += d_out.priv_mode.eq(ldst_r.priv_mode)
327 m.d.comb += d_out.virt_mode.eq(ldst_r.virt_mode)
328 m.d.comb += self.align_intr.eq(ldst_r.align_intr)
329 #m.d.comb += Display("no_validblip dcbz=%i addr=%x",ldst_r.dcbz,ldst_r.addr)
330 m.d.comb += d_out.dcbz.eq(ldst_r.dcbz)
331
332 # XXX these should be possible to remove but for some reason
333 # cannot be... yet. TODO, investigate
334 m.d.comb += self.load_data.eq(d_in.data)
335 m.d.comb += d_out.addr.eq(self.addr)
336
337 # Update outputs to MMU
338 m.d.comb += m_out.valid.eq(mmureq)
339 m.d.comb += m_out.iside.eq(self.instr_fault)
340 m.d.comb += m_out.load.eq(ldst_r.load)
341 # m_out.priv <= r.priv_mode; TODO
342 m.d.comb += m_out.tlbie.eq(self.tlbie)
343 # m_out.mtspr <= mmu_mtspr; # TODO
344 # m_out.sprn <= sprn; # TODO
345 m.d.comb += m_out.addr.eq(maddr)
346 # m_out.slbia <= l_in.insn(7); # TODO: no idea what this is
347 # m_out.rs <= l_in.data; # nope, probably not needed, TODO investigate
348
349 return m
350
351 def ports(self):
352 yield from super().ports()
353 # TODO: memory ports
354
355
356 class TestSRAMLoadStore1(LoadStore1):
357 def __init__(self, pspec):
358 super().__init__(pspec)
359 pspec = self.pspec
360 # small 32-entry Memory
361 if (hasattr(pspec, "dmem_test_depth") and
362 isinstance(pspec.dmem_test_depth, int)):
363 depth = pspec.dmem_test_depth
364 else:
365 depth = 32
366 print("TestSRAMBareLoadStoreUnit depth", depth)
367
368 self.mem = Memory(width=pspec.reg_wid, depth=depth)
369
370 def elaborate(self, platform):
371 m = super().elaborate(platform)
372 comb = m.d.comb
373 m.submodules.sram = sram = SRAM(memory=self.mem, granularity=8,
374 features={'cti', 'bte', 'err'})
375 dbus = self.dbus
376
377 # directly connect the wishbone bus of LoadStoreUnitInterface to SRAM
378 # note: SRAM is a target (slave), dbus is initiator (master)
379 fanouts = ['dat_w', 'sel', 'cyc', 'stb', 'we', 'cti', 'bte']
380 fanins = ['dat_r', 'ack', 'err']
381 for fanout in fanouts:
382 print("fanout", fanout, getattr(sram.bus, fanout).shape(),
383 getattr(dbus, fanout).shape())
384 comb += getattr(sram.bus, fanout).eq(getattr(dbus, fanout))
385 comb += getattr(sram.bus, fanout).eq(getattr(dbus, fanout))
386 for fanin in fanins:
387 comb += getattr(dbus, fanin).eq(getattr(sram.bus, fanin))
388 # connect address
389 comb += sram.bus.adr.eq(dbus.adr)
390
391 return m
392