loadstore.py: add debug output for dcbz
[soc.git] / src / soc / fu / ldst / loadstore.py
1 """LoadStore1 FSM.
2
3 based on microwatt loadstore1.vhdl, but conforming to PortInterface.
4 unlike loadstore1.vhdl this does *not* deal with actual Load/Store
5 ops: that job is handled by LDSTCompUnit, which talks to LoadStore1
6 by way of PortInterface. PortInterface is where things need extending,
7 such as adding dcbz support, etc.
8
9 this module basically handles "pure" load / store operations, and
10 its first job is to ask the D-Cache for the data. if that fails,
11 the second task (if virtual memory is enabled) is to ask the MMU
12 to perform a TLB, then to go *back* to the cache and ask again.
13
14 Links:
15
16 * https://bugs.libre-soc.org/show_bug.cgi?id=465
17
18 """
19
20 from nmigen import (Elaboratable, Module, Signal, Shape, unsigned, Cat, Mux,
21 Record, Memory,
22 Const)
23 from nmutil.iocontrol import RecordObject
24 from nmutil.util import rising_edge, Display
25 from enum import Enum, unique
26
27 from soc.experiment.dcache import DCache
28 from soc.experiment.pimem import PortInterfaceBase
29 from soc.experiment.mem_types import LoadStore1ToMMUType
30 from soc.experiment.mem_types import MMUToLoadStore1Type
31
32 from soc.minerva.wishbone import make_wb_layout
33 from soc.bus.sram import SRAM
34 from nmutil.util import Display
35
36
37 @unique
38 class State(Enum):
39 IDLE = 0 # ready for instruction
40 ACK_WAIT = 1 # waiting for ack from dcache
41 MMU_LOOKUP = 2 # waiting for MMU to look up translation
42 TLBIE_WAIT = 3 # waiting for MMU to finish doing a tlbie
43
44
45 # captures the LDSTRequest from the PortInterface, which "blips" most
46 # of this at us (pipeline-style).
47 class LDSTRequest(RecordObject):
48 def __init__(self, name=None):
49 RecordObject.__init__(self, name=name)
50
51 self.load = Signal()
52 self.dcbz = Signal()
53 self.addr = Signal(64)
54 # self.store_data = Signal(64) # this is already sync (on a delay)
55 self.byte_sel = Signal(8)
56 self.nc = Signal() # non-cacheable access
57 self.virt_mode = Signal()
58 self.priv_mode = Signal()
59 self.align_intr = Signal()
60
61 # glue logic for microwatt mmu and dcache
62 class LoadStore1(PortInterfaceBase):
63 def __init__(self, pspec):
64 self.pspec = pspec
65 self.disable_cache = (hasattr(pspec, "disable_cache") and
66 pspec.disable_cache == True)
67 regwid = pspec.reg_wid
68 addrwid = pspec.addr_wid
69
70 super().__init__(regwid, addrwid)
71 self.dcache = DCache()
72 # these names are from the perspective of here (LoadStore1)
73 self.d_out = self.dcache.d_in # in to dcache is out for LoadStore
74 self.d_in = self.dcache.d_out # out from dcache is in for LoadStore
75 self.m_out = LoadStore1ToMMUType() # out *to* MMU
76 self.m_in = MMUToLoadStore1Type() # in *from* MMU
77 self.req = LDSTRequest(name="ldst_req")
78
79 # TODO, convert dcache wb_in/wb_out to "standard" nmigen Wishbone bus
80 self.dbus = Record(make_wb_layout(pspec))
81
82 # for creating a single clock blip to DCache
83 self.d_valid = Signal()
84 self.d_w_valid = Signal()
85 self.d_validblip = Signal()
86
87 # DSISR and DAR cached values. note that the MMU FSM is where
88 # these are accessed by OP_MTSPR/OP_MFSPR, on behalf of LoadStore1.
89 # by contrast microwatt has the spr set/get done *in* loadstore1.vhdl
90 self.dsisr = Signal(64)
91 self.dar = Signal(64)
92
93 # state info for LD/ST
94 self.done = Signal()
95 # latch most of the input request
96 self.load = Signal()
97 self.tlbie = Signal()
98 self.dcbz = Signal()
99 self.addr = Signal(64)
100 self.store_data = Signal(64)
101 self.load_data = Signal(64)
102 self.byte_sel = Signal(8)
103 #self.xerc : xer_common_t;
104 #self.reserve = Signal()
105 #self.atomic = Signal()
106 #self.atomic_last = Signal()
107 #self.rc = Signal()
108 self.nc = Signal() # non-cacheable access
109 self.virt_mode = Signal()
110 self.priv_mode = Signal()
111 self.state = Signal(State)
112 self.instr_fault = Signal()
113 self.align_intr = Signal()
114 self.busy = Signal()
115 self.wait_dcache = Signal()
116 self.wait_mmu = Signal()
117 #self.mode_32bit = Signal()
118 #self.intr_vec : integer range 0 to 16#fff#;
119 #self.nia = Signal(64)
120 #self.srr1 = Signal(16)
121
122 def set_wr_addr(self, m, addr, mask, misalign, msr_pr, is_dcbz):
123 m.d.comb += self.req.load.eq(0) # store operation
124 m.d.comb += self.req.byte_sel.eq(mask)
125 m.d.comb += self.req.addr.eq(addr)
126 m.d.comb += self.req.priv_mode.eq(~msr_pr) # not-problem ==> priv
127 m.d.comb += self.req.virt_mode.eq(msr_pr) # problem-state ==> virt
128 m.d.comb += self.req.align_intr.eq(misalign)
129 m.d.comb += self.req.dcbz.eq(is_dcbz)
130
131 m.d.comb += Display("set_wr_addr %i dcbz %i",addr,is_dcbz)
132
133 # option to disable the cache entirely for write
134 if self.disable_cache:
135 m.d.comb += self.req.nc.eq(1)
136 return None
137
138 def set_rd_addr(self, m, addr, mask, misalign, msr_pr):
139 m.d.comb += self.d_valid.eq(1)
140 m.d.comb += self.req.load.eq(1) # load operation
141 m.d.comb += self.req.byte_sel.eq(mask)
142 m.d.comb += self.req.align_intr.eq(misalign)
143 m.d.comb += self.req.addr.eq(addr)
144 m.d.comb += self.req.priv_mode.eq(~msr_pr) # not-problem ==> priv
145 m.d.comb += self.req.virt_mode.eq(msr_pr) # problem-state ==> virt
146 # BAD HACK! disable cacheing on LD when address is 0xCxxx_xxxx
147 # this is for peripherals. same thing done in Microwatt loadstore1.vhdl
148 with m.If(addr[28:] == Const(0xc, 4)):
149 m.d.comb += self.req.nc.eq(1)
150 # option to disable the cache entirely for read
151 if self.disable_cache:
152 m.d.comb += self.req.nc.eq(1)
153 return None #FIXME return value
154
155 def set_wr_data(self, m, data, wen):
156 # do the "blip" on write data
157 m.d.comb += self.d_valid.eq(1)
158 # put data into comb which is picked up in main elaborate()
159 m.d.comb += self.d_w_valid.eq(1)
160 m.d.comb += self.store_data.eq(data)
161 #m.d.sync += self.d_out.byte_sel.eq(wen) # this might not be needed
162 st_ok = self.done # TODO indicates write data is valid
163 return st_ok
164
165 def get_rd_data(self, m):
166 ld_ok = self.done # indicates read data is valid
167 data = self.load_data # actual read data
168 return data, ld_ok
169
170 def elaborate(self, platform):
171 m = super().elaborate(platform)
172 comb, sync = m.d.comb, m.d.sync
173
174 # create dcache module
175 m.submodules.dcache = dcache = self.dcache
176
177 # temp vars
178 d_out, d_in, dbus = self.d_out, self.d_in, self.dbus
179 m_out, m_in = self.m_out, self.m_in
180 exc = self.pi.exc_o
181 exception = exc.happened
182 mmureq = Signal()
183
184 # copy of address, but gets over-ridden for OP_FETCH_FAILED
185 maddr = Signal(64)
186 m.d.comb += maddr.eq(self.addr)
187
188 # create a blip (single pulse) on valid read/write request
189 # this can be over-ridden in the FSM to get dcache to re-run
190 # a request when MMU_LOOKUP completes.
191 m.d.comb += self.d_validblip.eq(rising_edge(m, self.d_valid))
192 ldst_r = LDSTRequest("ldst_r")
193
194 # fsm skeleton
195 with m.Switch(self.state):
196 with m.Case(State.IDLE):
197 with m.If(self.d_validblip & ~exc.happened):
198 comb += self.busy.eq(1)
199 sync += self.state.eq(State.ACK_WAIT)
200 sync += ldst_r.eq(self.req) # copy of LDSTRequest on "blip"
201 with m.Else():
202 sync += ldst_r.eq(0)
203
204 # waiting for completion
205 with m.Case(State.ACK_WAIT):
206 comb += self.busy.eq(~exc.happened)
207
208 with m.If(d_in.error):
209 # cache error is not necessarily "final", it could
210 # be that it was just a TLB miss
211 with m.If(d_in.cache_paradox):
212 comb += exception.eq(1)
213 sync += self.state.eq(State.IDLE)
214 sync += ldst_r.eq(0)
215 sync += self.dsisr[63 - 38].eq(~self.load)
216 # XXX there is no architected bit for this
217 # (probably should be a machine check in fact)
218 sync += self.dsisr[63 - 35].eq(d_in.cache_paradox)
219
220 with m.Else():
221 # Look up the translation for TLB miss
222 # and also for permission error and RC error
223 # in case the PTE has been updated.
224 comb += mmureq.eq(1)
225 sync += self.state.eq(State.MMU_LOOKUP)
226 with m.If(d_in.valid):
227 m.d.comb += self.done.eq(~mmureq) # done if not doing MMU
228 with m.If(self.done):
229 sync += Display("ACK_WAIT, done %x", self.addr)
230 sync += self.state.eq(State.IDLE)
231 sync += ldst_r.eq(0)
232 with m.If(self.load):
233 m.d.comb += self.load_data.eq(d_in.data)
234
235 # waiting here for the MMU TLB lookup to complete.
236 # either re-try the dcache lookup or throw MMU exception
237 with m.Case(State.MMU_LOOKUP):
238 comb += self.busy.eq(1)
239 with m.If(m_in.done):
240 with m.If(~self.instr_fault):
241 sync += Display("MMU_LOOKUP, done %x -> %x",
242 self.addr, d_out.addr)
243 # retry the request now that the MMU has
244 # installed a TLB entry, if not exception raised
245 m.d.comb += self.d_out.valid.eq(~exception)
246 sync += self.state.eq(State.ACK_WAIT)
247 sync += ldst_r.eq(0)
248 with m.Else():
249 sync += Display("MMU_LOOKUP, exception %x", self.addr)
250 # instruction lookup fault: store address in DAR
251 comb += exc.happened.eq(1)
252 sync += self.dar.eq(self.addr)
253
254 with m.If(m_in.err):
255 # MMU RADIX exception thrown
256 comb += exception.eq(1)
257 sync += self.dsisr[63 - 33].eq(m_in.invalid)
258 sync += self.dsisr[63 - 36].eq(m_in.perm_error)
259 sync += self.dsisr[63 - 38].eq(self.load)
260 sync += self.dsisr[63 - 44].eq(m_in.badtree)
261 sync += self.dsisr[63 - 45].eq(m_in.rc_error)
262
263 with m.Case(State.TLBIE_WAIT):
264 pass
265
266 # alignment error: store address in DAR
267 with m.If(self.align_intr):
268 comb += exc.happened.eq(1)
269 sync += self.dar.eq(self.addr)
270
271 # happened, alignment, instr_fault, invalid.
272 # note that all of these flow through - eventually to the TRAP
273 # pipeline, via PowerDecoder2.
274 comb += exc.invalid.eq(m_in.invalid)
275 comb += exc.alignment.eq(self.align_intr)
276 comb += exc.instr_fault.eq(self.instr_fault)
277 # badtree, perm_error, rc_error, segment_fault
278 comb += exc.badtree.eq(m_in.badtree)
279 comb += exc.perm_error.eq(m_in.perm_error)
280 comb += exc.rc_error.eq(m_in.rc_error)
281 comb += exc.segment_fault.eq(m_in.segerr)
282
283 # TODO, connect dcache wb_in/wb_out to "standard" nmigen Wishbone bus
284 comb += dbus.adr.eq(dcache.wb_out.adr)
285 comb += dbus.dat_w.eq(dcache.wb_out.dat)
286 comb += dbus.sel.eq(dcache.wb_out.sel)
287 comb += dbus.cyc.eq(dcache.wb_out.cyc)
288 comb += dbus.stb.eq(dcache.wb_out.stb)
289 comb += dbus.we.eq(dcache.wb_out.we)
290
291 comb += dcache.wb_in.dat.eq(dbus.dat_r)
292 comb += dcache.wb_in.ack.eq(dbus.ack)
293 if hasattr(dbus, "stall"):
294 comb += dcache.wb_in.stall.eq(dbus.stall)
295
296 # update out d data when flag set
297 with m.If(self.d_w_valid):
298 m.d.sync += d_out.data.eq(self.store_data)
299 #with m.Else():
300 # m.d.sync += d_out.data.eq(0)
301 # unit test passes with that change
302
303 # this must move into the FSM, conditionally noticing that
304 # the "blip" comes from self.d_validblip.
305 # task 1: look up in dcache
306 # task 2: if dcache fails, look up in MMU.
307 # do **NOT** confuse the two.
308 with m.If(self.d_validblip):
309 m.d.comb += self.d_out.valid.eq(~exc.happened)
310 m.d.comb += d_out.load.eq(self.req.load)
311 m.d.comb += d_out.byte_sel.eq(self.req.byte_sel)
312 m.d.comb += self.addr.eq(self.req.addr)
313 m.d.comb += d_out.nc.eq(self.req.nc)
314 m.d.comb += d_out.priv_mode.eq(self.req.priv_mode)
315 m.d.comb += d_out.virt_mode.eq(self.req.virt_mode)
316 m.d.comb += self.align_intr.eq(self.req.align_intr)
317 #m.d.comb += Display("validblip dcbz=%i addr=%x",self.req.dcbz,self.req.addr)
318 m.d.comb += d_out.dcbz.eq(self.req.dcbz)
319 with m.Else():
320 m.d.comb += d_out.load.eq(ldst_r.load)
321 m.d.comb += d_out.byte_sel.eq(ldst_r.byte_sel)
322 m.d.comb += self.addr.eq(ldst_r.addr)
323 m.d.comb += d_out.nc.eq(ldst_r.nc)
324 m.d.comb += d_out.priv_mode.eq(ldst_r.priv_mode)
325 m.d.comb += d_out.virt_mode.eq(ldst_r.virt_mode)
326 m.d.comb += self.align_intr.eq(ldst_r.align_intr)
327 #m.d.comb += Display("no_validblip dcbz=%i addr=%x",ldst_r.dcbz,ldst_r.addr)
328 m.d.comb += d_out.dcbz.eq(ldst_r.dcbz)
329
330 # XXX these should be possible to remove but for some reason
331 # cannot be... yet. TODO, investigate
332 m.d.comb += self.load_data.eq(d_in.data)
333 m.d.comb += d_out.addr.eq(self.addr)
334
335 # Update outputs to MMU
336 m.d.comb += m_out.valid.eq(mmureq)
337 m.d.comb += m_out.iside.eq(self.instr_fault)
338 m.d.comb += m_out.load.eq(ldst_r.load)
339 # m_out.priv <= r.priv_mode; TODO
340 m.d.comb += m_out.tlbie.eq(self.tlbie)
341 # m_out.mtspr <= mmu_mtspr; # TODO
342 # m_out.sprn <= sprn; # TODO
343 m.d.comb += m_out.addr.eq(maddr)
344 # m_out.slbia <= l_in.insn(7); # TODO: no idea what this is
345 # m_out.rs <= l_in.data; # nope, probably not needed, TODO investigate
346
347 return m
348
349 def ports(self):
350 yield from super().ports()
351 # TODO: memory ports
352
353
354 class TestSRAMLoadStore1(LoadStore1):
355 def __init__(self, pspec):
356 super().__init__(pspec)
357 pspec = self.pspec
358 # small 32-entry Memory
359 if (hasattr(pspec, "dmem_test_depth") and
360 isinstance(pspec.dmem_test_depth, int)):
361 depth = pspec.dmem_test_depth
362 else:
363 depth = 32
364 print("TestSRAMBareLoadStoreUnit depth", depth)
365
366 self.mem = Memory(width=pspec.reg_wid, depth=depth)
367
368 def elaborate(self, platform):
369 m = super().elaborate(platform)
370 comb = m.d.comb
371 m.submodules.sram = sram = SRAM(memory=self.mem, granularity=8,
372 features={'cti', 'bte', 'err'})
373 dbus = self.dbus
374
375 # directly connect the wishbone bus of LoadStoreUnitInterface to SRAM
376 # note: SRAM is a target (slave), dbus is initiator (master)
377 fanouts = ['dat_w', 'sel', 'cyc', 'stb', 'we', 'cti', 'bte']
378 fanins = ['dat_r', 'ack', 'err']
379 for fanout in fanouts:
380 print("fanout", fanout, getattr(sram.bus, fanout).shape(),
381 getattr(dbus, fanout).shape())
382 comb += getattr(sram.bus, fanout).eq(getattr(dbus, fanout))
383 comb += getattr(sram.bus, fanout).eq(getattr(dbus, fanout))
384 for fanin in fanins:
385 comb += getattr(dbus, fanin).eq(getattr(sram.bus, fanin))
386 # connect address
387 comb += sram.bus.adr.eq(dbus.adr)
388
389 return m
390