pass self.pi.is_dcbz to request
[soc.git] / src / soc / fu / ldst / loadstore.py
1 """LoadStore1 FSM.
2
3 based on microwatt loadstore1.vhdl, but conforming to PortInterface.
4 unlike loadstore1.vhdl this does *not* deal with actual Load/Store
5 ops: that job is handled by LDSTCompUnit, which talks to LoadStore1
6 by way of PortInterface. PortInterface is where things need extending,
7 such as adding dcbz support, etc.
8
9 this module basically handles "pure" load / store operations, and
10 its first job is to ask the D-Cache for the data. if that fails,
11 the second task (if virtual memory is enabled) is to ask the MMU
12 to perform a TLB, then to go *back* to the cache and ask again.
13
14 Links:
15
16 * https://bugs.libre-soc.org/show_bug.cgi?id=465
17
18 """
19
20 from nmigen import (Elaboratable, Module, Signal, Shape, unsigned, Cat, Mux,
21 Record, Memory,
22 Const)
23 from nmutil.iocontrol import RecordObject
24 from nmutil.util import rising_edge, Display
25 from enum import Enum, unique
26
27 from soc.experiment.dcache import DCache
28 from soc.experiment.pimem import PortInterfaceBase
29 from soc.experiment.mem_types import LoadStore1ToMMUType
30 from soc.experiment.mem_types import MMUToLoadStore1Type
31
32 from soc.minerva.wishbone import make_wb_layout
33 from soc.bus.sram import SRAM
34 from nmutil.util import Display
35
36
37 @unique
38 class State(Enum):
39 IDLE = 0 # ready for instruction
40 ACK_WAIT = 1 # waiting for ack from dcache
41 MMU_LOOKUP = 2 # waiting for MMU to look up translation
42 TLBIE_WAIT = 3 # waiting for MMU to finish doing a tlbie
43
44
45 # captures the LDSTRequest from the PortInterface, which "blips" most
46 # of this at us (pipeline-style).
47 class LDSTRequest(RecordObject):
48 def __init__(self, name=None):
49 RecordObject.__init__(self, name=name)
50
51 self.load = Signal()
52 self.dcbz = Signal()
53 self.addr = Signal(64)
54 # self.store_data = Signal(64) # this is already sync (on a delay)
55 self.byte_sel = Signal(8)
56 self.nc = Signal() # non-cacheable access
57 self.virt_mode = Signal()
58 self.priv_mode = Signal()
59 self.align_intr = Signal()
60
61 # glue logic for microwatt mmu and dcache
62 class LoadStore1(PortInterfaceBase):
63 def __init__(self, pspec):
64 self.pspec = pspec
65 self.disable_cache = (hasattr(pspec, "disable_cache") and
66 pspec.disable_cache == True)
67 regwid = pspec.reg_wid
68 addrwid = pspec.addr_wid
69
70 super().__init__(regwid, addrwid)
71 self.dcache = DCache()
72 # these names are from the perspective of here (LoadStore1)
73 self.d_out = self.dcache.d_in # in to dcache is out for LoadStore
74 self.d_in = self.dcache.d_out # out from dcache is in for LoadStore
75 self.m_out = LoadStore1ToMMUType() # out *to* MMU
76 self.m_in = MMUToLoadStore1Type() # in *from* MMU
77 self.req = LDSTRequest(name="ldst_req")
78
79 # TODO, convert dcache wb_in/wb_out to "standard" nmigen Wishbone bus
80 self.dbus = Record(make_wb_layout(pspec))
81
82 # for creating a single clock blip to DCache
83 self.d_valid = Signal()
84 self.d_w_valid = Signal()
85 self.d_validblip = Signal()
86
87 # DSISR and DAR cached values. note that the MMU FSM is where
88 # these are accessed by OP_MTSPR/OP_MFSPR, on behalf of LoadStore1.
89 # by contrast microwatt has the spr set/get done *in* loadstore1.vhdl
90 self.dsisr = Signal(64)
91 self.dar = Signal(64)
92
93 # state info for LD/ST
94 self.done = Signal()
95 # latch most of the input request
96 self.load = Signal()
97 self.tlbie = Signal()
98 self.dcbz = Signal()
99 self.addr = Signal(64)
100 self.store_data = Signal(64)
101 self.load_data = Signal(64)
102 self.byte_sel = Signal(8)
103 #self.xerc : xer_common_t;
104 #self.reserve = Signal()
105 #self.atomic = Signal()
106 #self.atomic_last = Signal()
107 #self.rc = Signal()
108 self.nc = Signal() # non-cacheable access
109 self.virt_mode = Signal()
110 self.priv_mode = Signal()
111 self.state = Signal(State)
112 self.instr_fault = Signal()
113 self.align_intr = Signal()
114 self.busy = Signal()
115 self.wait_dcache = Signal()
116 self.wait_mmu = Signal()
117 #self.mode_32bit = Signal()
118 #self.intr_vec : integer range 0 to 16#fff#;
119 #self.nia = Signal(64)
120 #self.srr1 = Signal(16)
121
122 def set_wr_addr(self, m, addr, mask, misalign, msr_pr):
123 m.d.comb += self.req.load.eq(0) # store operation
124 m.d.comb += self.req.byte_sel.eq(mask)
125 m.d.comb += self.req.addr.eq(addr)
126 m.d.comb += self.req.priv_mode.eq(~msr_pr) # not-problem ==> priv
127 m.d.comb += self.req.virt_mode.eq(msr_pr) # problem-state ==> virt
128 m.d.comb += self.req.align_intr.eq(misalign)
129
130 dcbz = self.pi.is_dcbz
131 m.d.comb += Display("is_dcbz %x",dcbz)
132 m.d.comb += self.req.dcbz.eq(dcbz)
133
134 # option to disable the cache entirely for write
135 if self.disable_cache:
136 m.d.comb += self.req.nc.eq(1)
137 return None
138
139 def set_rd_addr(self, m, addr, mask, misalign, msr_pr):
140 m.d.comb += self.d_valid.eq(1)
141 m.d.comb += self.req.load.eq(1) # load operation
142 m.d.comb += self.req.byte_sel.eq(mask)
143 m.d.comb += self.req.align_intr.eq(misalign)
144 m.d.comb += self.req.addr.eq(addr)
145 m.d.comb += self.req.priv_mode.eq(~msr_pr) # not-problem ==> priv
146 m.d.comb += self.req.virt_mode.eq(msr_pr) # problem-state ==> virt
147 # BAD HACK! disable cacheing on LD when address is 0xCxxx_xxxx
148 # this is for peripherals. same thing done in Microwatt loadstore1.vhdl
149 with m.If(addr[28:] == Const(0xc, 4)):
150 m.d.comb += self.req.nc.eq(1)
151 # option to disable the cache entirely for read
152 if self.disable_cache:
153 m.d.comb += self.req.nc.eq(1)
154 return None #FIXME return value
155
156 def set_wr_data(self, m, data, wen):
157 # do the "blip" on write data
158 m.d.comb += self.d_valid.eq(1)
159 # put data into comb which is picked up in main elaborate()
160 m.d.comb += self.d_w_valid.eq(1)
161 m.d.comb += self.store_data.eq(data)
162 #m.d.sync += self.d_out.byte_sel.eq(wen) # this might not be needed
163 st_ok = self.done # TODO indicates write data is valid
164 return st_ok
165
166 def get_rd_data(self, m):
167 ld_ok = self.done # indicates read data is valid
168 data = self.load_data # actual read data
169 return data, ld_ok
170
171 def elaborate(self, platform):
172 m = super().elaborate(platform)
173 comb, sync = m.d.comb, m.d.sync
174
175 # create dcache module
176 m.submodules.dcache = dcache = self.dcache
177
178 # temp vars
179 d_out, d_in, dbus = self.d_out, self.d_in, self.dbus
180 m_out, m_in = self.m_out, self.m_in
181 exc = self.pi.exc_o
182 exception = exc.happened
183 mmureq = Signal()
184
185 # copy of address, but gets over-ridden for OP_FETCH_FAILED
186 maddr = Signal(64)
187 m.d.comb += maddr.eq(self.addr)
188
189 # create a blip (single pulse) on valid read/write request
190 # this can be over-ridden in the FSM to get dcache to re-run
191 # a request when MMU_LOOKUP completes.
192 m.d.comb += self.d_validblip.eq(rising_edge(m, self.d_valid))
193 ldst_r = LDSTRequest("ldst_r")
194
195 # fsm skeleton
196 with m.Switch(self.state):
197 with m.Case(State.IDLE):
198 with m.If(self.d_validblip & ~exc.happened):
199 comb += self.busy.eq(1)
200 sync += self.state.eq(State.ACK_WAIT)
201 sync += ldst_r.eq(self.req) # copy of LDSTRequest on "blip"
202 with m.Else():
203 sync += ldst_r.eq(0)
204
205 # waiting for completion
206 with m.Case(State.ACK_WAIT):
207 comb += self.busy.eq(~exc.happened)
208
209 with m.If(d_in.error):
210 # cache error is not necessarily "final", it could
211 # be that it was just a TLB miss
212 with m.If(d_in.cache_paradox):
213 comb += exception.eq(1)
214 sync += self.state.eq(State.IDLE)
215 sync += ldst_r.eq(0)
216 sync += self.dsisr[63 - 38].eq(~self.load)
217 # XXX there is no architected bit for this
218 # (probably should be a machine check in fact)
219 sync += self.dsisr[63 - 35].eq(d_in.cache_paradox)
220
221 with m.Else():
222 # Look up the translation for TLB miss
223 # and also for permission error and RC error
224 # in case the PTE has been updated.
225 comb += mmureq.eq(1)
226 sync += self.state.eq(State.MMU_LOOKUP)
227 with m.If(d_in.valid):
228 m.d.comb += self.done.eq(~mmureq) # done if not doing MMU
229 with m.If(self.done):
230 sync += Display("ACK_WAIT, done %x", self.addr)
231 sync += self.state.eq(State.IDLE)
232 sync += ldst_r.eq(0)
233 with m.If(self.load):
234 m.d.comb += self.load_data.eq(d_in.data)
235
236 # waiting here for the MMU TLB lookup to complete.
237 # either re-try the dcache lookup or throw MMU exception
238 with m.Case(State.MMU_LOOKUP):
239 comb += self.busy.eq(1)
240 with m.If(m_in.done):
241 with m.If(~self.instr_fault):
242 sync += Display("MMU_LOOKUP, done %x -> %x",
243 self.addr, d_out.addr)
244 # retry the request now that the MMU has
245 # installed a TLB entry, if not exception raised
246 m.d.comb += self.d_out.valid.eq(~exception)
247 sync += self.state.eq(State.ACK_WAIT)
248 sync += ldst_r.eq(0)
249 with m.Else():
250 sync += Display("MMU_LOOKUP, exception %x", self.addr)
251 # instruction lookup fault: store address in DAR
252 comb += exc.happened.eq(1)
253 sync += self.dar.eq(self.addr)
254
255 with m.If(m_in.err):
256 # MMU RADIX exception thrown
257 comb += exception.eq(1)
258 sync += self.dsisr[63 - 33].eq(m_in.invalid)
259 sync += self.dsisr[63 - 36].eq(m_in.perm_error)
260 sync += self.dsisr[63 - 38].eq(self.load)
261 sync += self.dsisr[63 - 44].eq(m_in.badtree)
262 sync += self.dsisr[63 - 45].eq(m_in.rc_error)
263
264 with m.Case(State.TLBIE_WAIT):
265 pass
266
267 # alignment error: store address in DAR
268 with m.If(self.align_intr):
269 comb += exc.happened.eq(1)
270 sync += self.dar.eq(self.addr)
271
272 # happened, alignment, instr_fault, invalid.
273 # note that all of these flow through - eventually to the TRAP
274 # pipeline, via PowerDecoder2.
275 comb += exc.invalid.eq(m_in.invalid)
276 comb += exc.alignment.eq(self.align_intr)
277 comb += exc.instr_fault.eq(self.instr_fault)
278 # badtree, perm_error, rc_error, segment_fault
279 comb += exc.badtree.eq(m_in.badtree)
280 comb += exc.perm_error.eq(m_in.perm_error)
281 comb += exc.rc_error.eq(m_in.rc_error)
282 comb += exc.segment_fault.eq(m_in.segerr)
283
284 # TODO, connect dcache wb_in/wb_out to "standard" nmigen Wishbone bus
285 comb += dbus.adr.eq(dcache.wb_out.adr)
286 comb += dbus.dat_w.eq(dcache.wb_out.dat)
287 comb += dbus.sel.eq(dcache.wb_out.sel)
288 comb += dbus.cyc.eq(dcache.wb_out.cyc)
289 comb += dbus.stb.eq(dcache.wb_out.stb)
290 comb += dbus.we.eq(dcache.wb_out.we)
291
292 comb += dcache.wb_in.dat.eq(dbus.dat_r)
293 comb += dcache.wb_in.ack.eq(dbus.ack)
294 if hasattr(dbus, "stall"):
295 comb += dcache.wb_in.stall.eq(dbus.stall)
296
297 # update out d data when flag set
298 with m.If(self.d_w_valid):
299 m.d.sync += d_out.data.eq(self.store_data)
300 #with m.Else():
301 # m.d.sync += d_out.data.eq(0)
302 # unit test passes with that change
303
304 # this must move into the FSM, conditionally noticing that
305 # the "blip" comes from self.d_validblip.
306 # task 1: look up in dcache
307 # task 2: if dcache fails, look up in MMU.
308 # do **NOT** confuse the two.
309 with m.If(self.d_validblip):
310 m.d.comb += self.d_out.valid.eq(~exc.happened)
311 m.d.comb += d_out.load.eq(self.req.load)
312 m.d.comb += d_out.byte_sel.eq(self.req.byte_sel)
313 m.d.comb += self.addr.eq(self.req.addr)
314 m.d.comb += d_out.nc.eq(self.req.nc)
315 m.d.comb += d_out.priv_mode.eq(self.req.priv_mode)
316 m.d.comb += d_out.virt_mode.eq(self.req.virt_mode)
317 m.d.comb += self.align_intr.eq(self.req.align_intr)
318 with m.Else():
319 m.d.comb += d_out.load.eq(ldst_r.load)
320 m.d.comb += d_out.byte_sel.eq(ldst_r.byte_sel)
321 m.d.comb += self.addr.eq(ldst_r.addr)
322 m.d.comb += d_out.nc.eq(ldst_r.nc)
323 m.d.comb += d_out.priv_mode.eq(ldst_r.priv_mode)
324 m.d.comb += d_out.virt_mode.eq(ldst_r.virt_mode)
325 m.d.comb += self.align_intr.eq(ldst_r.align_intr)
326
327 # XXX these should be possible to remove but for some reason
328 # cannot be... yet. TODO, investigate
329 m.d.comb += self.load_data.eq(d_in.data)
330 m.d.comb += d_out.addr.eq(self.addr)
331
332 # Update outputs to MMU
333 m.d.comb += m_out.valid.eq(mmureq)
334 m.d.comb += m_out.iside.eq(self.instr_fault)
335 m.d.comb += m_out.load.eq(ldst_r.load)
336 # m_out.priv <= r.priv_mode; TODO
337 m.d.comb += m_out.tlbie.eq(self.tlbie)
338 # m_out.mtspr <= mmu_mtspr; # TODO
339 # m_out.sprn <= sprn; # TODO
340 m.d.comb += m_out.addr.eq(maddr)
341 # m_out.slbia <= l_in.insn(7); # TODO: no idea what this is
342 # m_out.rs <= l_in.data; # nope, probably not needed, TODO investigate
343
344 return m
345
346 def ports(self):
347 yield from super().ports()
348 # TODO: memory ports
349
350
351 class TestSRAMLoadStore1(LoadStore1):
352 def __init__(self, pspec):
353 super().__init__(pspec)
354 pspec = self.pspec
355 # small 32-entry Memory
356 if (hasattr(pspec, "dmem_test_depth") and
357 isinstance(pspec.dmem_test_depth, int)):
358 depth = pspec.dmem_test_depth
359 else:
360 depth = 32
361 print("TestSRAMBareLoadStoreUnit depth", depth)
362
363 self.mem = Memory(width=pspec.reg_wid, depth=depth)
364
365 def elaborate(self, platform):
366 m = super().elaborate(platform)
367 comb = m.d.comb
368 m.submodules.sram = sram = SRAM(memory=self.mem, granularity=8,
369 features={'cti', 'bte', 'err'})
370 dbus = self.dbus
371
372 # directly connect the wishbone bus of LoadStoreUnitInterface to SRAM
373 # note: SRAM is a target (slave), dbus is initiator (master)
374 fanouts = ['dat_w', 'sel', 'cyc', 'stb', 'we', 'cti', 'bte']
375 fanins = ['dat_r', 'ack', 'err']
376 for fanout in fanouts:
377 print("fanout", fanout, getattr(sram.bus, fanout).shape(),
378 getattr(dbus, fanout).shape())
379 comb += getattr(sram.bus, fanout).eq(getattr(dbus, fanout))
380 comb += getattr(sram.bus, fanout).eq(getattr(dbus, fanout))
381 for fanin in fanins:
382 comb += getattr(dbus, fanin).eq(getattr(sram.bus, fanin))
383 # connect address
384 comb += sram.bus.adr.eq(dbus.adr)
385
386 return m
387