an extra dcbz parameter in all six places
[soc.git] / src / soc / fu / ldst / loadstore.py
1 """LoadStore1 FSM.
2
3 based on microwatt loadstore1.vhdl, but conforming to PortInterface.
4 unlike loadstore1.vhdl this does *not* deal with actual Load/Store
5 ops: that job is handled by LDSTCompUnit, which talks to LoadStore1
6 by way of PortInterface. PortInterface is where things need extending,
7 such as adding dcbz support, etc.
8
9 this module basically handles "pure" load / store operations, and
10 its first job is to ask the D-Cache for the data. if that fails,
11 the second task (if virtual memory is enabled) is to ask the MMU
12 to perform a TLB, then to go *back* to the cache and ask again.
13
14 Links:
15
16 * https://bugs.libre-soc.org/show_bug.cgi?id=465
17
18 """
19
20 from nmigen import (Elaboratable, Module, Signal, Shape, unsigned, Cat, Mux,
21 Record, Memory,
22 Const)
23 from nmutil.iocontrol import RecordObject
24 from nmutil.util import rising_edge, Display
25 from enum import Enum, unique
26
27 from soc.experiment.dcache import DCache
28 from soc.experiment.pimem import PortInterfaceBase
29 from soc.experiment.mem_types import LoadStore1ToMMUType
30 from soc.experiment.mem_types import MMUToLoadStore1Type
31
32 from soc.minerva.wishbone import make_wb_layout
33 from soc.bus.sram import SRAM
34 from nmutil.util import Display
35
36
37 @unique
38 class State(Enum):
39 IDLE = 0 # ready for instruction
40 ACK_WAIT = 1 # waiting for ack from dcache
41 MMU_LOOKUP = 2 # waiting for MMU to look up translation
42 TLBIE_WAIT = 3 # waiting for MMU to finish doing a tlbie
43
44
45 # captures the LDSTRequest from the PortInterface, which "blips" most
46 # of this at us (pipeline-style).
47 class LDSTRequest(RecordObject):
48 def __init__(self, name=None):
49 RecordObject.__init__(self, name=name)
50
51 self.load = Signal()
52 self.dcbz = Signal()
53 self.addr = Signal(64)
54 # self.store_data = Signal(64) # this is already sync (on a delay)
55 self.byte_sel = Signal(8)
56 self.nc = Signal() # non-cacheable access
57 self.virt_mode = Signal()
58 self.priv_mode = Signal()
59 self.align_intr = Signal()
60
61 # glue logic for microwatt mmu and dcache
62 class LoadStore1(PortInterfaceBase):
63 def __init__(self, pspec):
64 self.pspec = pspec
65 self.disable_cache = (hasattr(pspec, "disable_cache") and
66 pspec.disable_cache == True)
67 regwid = pspec.reg_wid
68 addrwid = pspec.addr_wid
69
70 super().__init__(regwid, addrwid)
71 self.dcache = DCache()
72 # these names are from the perspective of here (LoadStore1)
73 self.d_out = self.dcache.d_in # in to dcache is out for LoadStore
74 self.d_in = self.dcache.d_out # out from dcache is in for LoadStore
75 self.m_out = LoadStore1ToMMUType() # out *to* MMU
76 self.m_in = MMUToLoadStore1Type() # in *from* MMU
77 self.req = LDSTRequest(name="ldst_req")
78
79 # TODO, convert dcache wb_in/wb_out to "standard" nmigen Wishbone bus
80 self.dbus = Record(make_wb_layout(pspec))
81
82 # for creating a single clock blip to DCache
83 self.d_valid = Signal()
84 self.d_w_valid = Signal()
85 self.d_validblip = Signal()
86
87 # DSISR and DAR cached values. note that the MMU FSM is where
88 # these are accessed by OP_MTSPR/OP_MFSPR, on behalf of LoadStore1.
89 # by contrast microwatt has the spr set/get done *in* loadstore1.vhdl
90 self.dsisr = Signal(64)
91 self.dar = Signal(64)
92
93 # state info for LD/ST
94 self.done = Signal()
95 # latch most of the input request
96 self.load = Signal()
97 self.tlbie = Signal()
98 self.dcbz = Signal()
99 self.addr = Signal(64)
100 self.store_data = Signal(64)
101 self.load_data = Signal(64)
102 self.byte_sel = Signal(8)
103 #self.xerc : xer_common_t;
104 #self.reserve = Signal()
105 #self.atomic = Signal()
106 #self.atomic_last = Signal()
107 #self.rc = Signal()
108 self.nc = Signal() # non-cacheable access
109 self.virt_mode = Signal()
110 self.priv_mode = Signal()
111 self.state = Signal(State)
112 self.instr_fault = Signal()
113 self.align_intr = Signal()
114 self.busy = Signal()
115 self.wait_dcache = Signal()
116 self.wait_mmu = Signal()
117 #self.mode_32bit = Signal()
118 #self.intr_vec : integer range 0 to 16#fff#;
119 #self.nia = Signal(64)
120 #self.srr1 = Signal(16)
121
122 def set_wr_addr(self, m, addr, mask, misalign, msr_pr, is_dcbz):
123 m.d.comb += self.req.load.eq(0) # store operation
124 m.d.comb += self.req.byte_sel.eq(mask)
125 m.d.comb += self.req.addr.eq(addr)
126 m.d.comb += self.req.priv_mode.eq(~msr_pr) # not-problem ==> priv
127 m.d.comb += self.req.virt_mode.eq(msr_pr) # problem-state ==> virt
128 m.d.comb += self.req.align_intr.eq(misalign)
129 m.d.comb += self.req.dcbz.eq(is_dcbz)
130
131 # option to disable the cache entirely for write
132 if self.disable_cache:
133 m.d.comb += self.req.nc.eq(1)
134 return None
135
136 def set_rd_addr(self, m, addr, mask, misalign, msr_pr):
137 m.d.comb += self.d_valid.eq(1)
138 m.d.comb += self.req.load.eq(1) # load operation
139 m.d.comb += self.req.byte_sel.eq(mask)
140 m.d.comb += self.req.align_intr.eq(misalign)
141 m.d.comb += self.req.addr.eq(addr)
142 m.d.comb += self.req.priv_mode.eq(~msr_pr) # not-problem ==> priv
143 m.d.comb += self.req.virt_mode.eq(msr_pr) # problem-state ==> virt
144 # BAD HACK! disable cacheing on LD when address is 0xCxxx_xxxx
145 # this is for peripherals. same thing done in Microwatt loadstore1.vhdl
146 with m.If(addr[28:] == Const(0xc, 4)):
147 m.d.comb += self.req.nc.eq(1)
148 # option to disable the cache entirely for read
149 if self.disable_cache:
150 m.d.comb += self.req.nc.eq(1)
151 return None #FIXME return value
152
153 def set_wr_data(self, m, data, wen):
154 # do the "blip" on write data
155 m.d.comb += self.d_valid.eq(1)
156 # put data into comb which is picked up in main elaborate()
157 m.d.comb += self.d_w_valid.eq(1)
158 m.d.comb += self.store_data.eq(data)
159 #m.d.sync += self.d_out.byte_sel.eq(wen) # this might not be needed
160 st_ok = self.done # TODO indicates write data is valid
161 return st_ok
162
163 def get_rd_data(self, m):
164 ld_ok = self.done # indicates read data is valid
165 data = self.load_data # actual read data
166 return data, ld_ok
167
168 def elaborate(self, platform):
169 m = super().elaborate(platform)
170 comb, sync = m.d.comb, m.d.sync
171
172 # create dcache module
173 m.submodules.dcache = dcache = self.dcache
174
175 # temp vars
176 d_out, d_in, dbus = self.d_out, self.d_in, self.dbus
177 m_out, m_in = self.m_out, self.m_in
178 exc = self.pi.exc_o
179 exception = exc.happened
180 mmureq = Signal()
181
182 # copy of address, but gets over-ridden for OP_FETCH_FAILED
183 maddr = Signal(64)
184 m.d.comb += maddr.eq(self.addr)
185
186 # create a blip (single pulse) on valid read/write request
187 # this can be over-ridden in the FSM to get dcache to re-run
188 # a request when MMU_LOOKUP completes.
189 m.d.comb += self.d_validblip.eq(rising_edge(m, self.d_valid))
190 ldst_r = LDSTRequest("ldst_r")
191
192 # fsm skeleton
193 with m.Switch(self.state):
194 with m.Case(State.IDLE):
195 with m.If(self.d_validblip & ~exc.happened):
196 comb += self.busy.eq(1)
197 sync += self.state.eq(State.ACK_WAIT)
198 sync += ldst_r.eq(self.req) # copy of LDSTRequest on "blip"
199 with m.Else():
200 sync += ldst_r.eq(0)
201
202 # waiting for completion
203 with m.Case(State.ACK_WAIT):
204 comb += self.busy.eq(~exc.happened)
205
206 with m.If(d_in.error):
207 # cache error is not necessarily "final", it could
208 # be that it was just a TLB miss
209 with m.If(d_in.cache_paradox):
210 comb += exception.eq(1)
211 sync += self.state.eq(State.IDLE)
212 sync += ldst_r.eq(0)
213 sync += self.dsisr[63 - 38].eq(~self.load)
214 # XXX there is no architected bit for this
215 # (probably should be a machine check in fact)
216 sync += self.dsisr[63 - 35].eq(d_in.cache_paradox)
217
218 with m.Else():
219 # Look up the translation for TLB miss
220 # and also for permission error and RC error
221 # in case the PTE has been updated.
222 comb += mmureq.eq(1)
223 sync += self.state.eq(State.MMU_LOOKUP)
224 with m.If(d_in.valid):
225 m.d.comb += self.done.eq(~mmureq) # done if not doing MMU
226 with m.If(self.done):
227 sync += Display("ACK_WAIT, done %x", self.addr)
228 sync += self.state.eq(State.IDLE)
229 sync += ldst_r.eq(0)
230 with m.If(self.load):
231 m.d.comb += self.load_data.eq(d_in.data)
232
233 # waiting here for the MMU TLB lookup to complete.
234 # either re-try the dcache lookup or throw MMU exception
235 with m.Case(State.MMU_LOOKUP):
236 comb += self.busy.eq(1)
237 with m.If(m_in.done):
238 with m.If(~self.instr_fault):
239 sync += Display("MMU_LOOKUP, done %x -> %x",
240 self.addr, d_out.addr)
241 # retry the request now that the MMU has
242 # installed a TLB entry, if not exception raised
243 m.d.comb += self.d_out.valid.eq(~exception)
244 sync += self.state.eq(State.ACK_WAIT)
245 sync += ldst_r.eq(0)
246 with m.Else():
247 sync += Display("MMU_LOOKUP, exception %x", self.addr)
248 # instruction lookup fault: store address in DAR
249 comb += exc.happened.eq(1)
250 sync += self.dar.eq(self.addr)
251
252 with m.If(m_in.err):
253 # MMU RADIX exception thrown
254 comb += exception.eq(1)
255 sync += self.dsisr[63 - 33].eq(m_in.invalid)
256 sync += self.dsisr[63 - 36].eq(m_in.perm_error)
257 sync += self.dsisr[63 - 38].eq(self.load)
258 sync += self.dsisr[63 - 44].eq(m_in.badtree)
259 sync += self.dsisr[63 - 45].eq(m_in.rc_error)
260
261 with m.Case(State.TLBIE_WAIT):
262 pass
263
264 # alignment error: store address in DAR
265 with m.If(self.align_intr):
266 comb += exc.happened.eq(1)
267 sync += self.dar.eq(self.addr)
268
269 # happened, alignment, instr_fault, invalid.
270 # note that all of these flow through - eventually to the TRAP
271 # pipeline, via PowerDecoder2.
272 comb += exc.invalid.eq(m_in.invalid)
273 comb += exc.alignment.eq(self.align_intr)
274 comb += exc.instr_fault.eq(self.instr_fault)
275 # badtree, perm_error, rc_error, segment_fault
276 comb += exc.badtree.eq(m_in.badtree)
277 comb += exc.perm_error.eq(m_in.perm_error)
278 comb += exc.rc_error.eq(m_in.rc_error)
279 comb += exc.segment_fault.eq(m_in.segerr)
280
281 # TODO, connect dcache wb_in/wb_out to "standard" nmigen Wishbone bus
282 comb += dbus.adr.eq(dcache.wb_out.adr)
283 comb += dbus.dat_w.eq(dcache.wb_out.dat)
284 comb += dbus.sel.eq(dcache.wb_out.sel)
285 comb += dbus.cyc.eq(dcache.wb_out.cyc)
286 comb += dbus.stb.eq(dcache.wb_out.stb)
287 comb += dbus.we.eq(dcache.wb_out.we)
288
289 comb += dcache.wb_in.dat.eq(dbus.dat_r)
290 comb += dcache.wb_in.ack.eq(dbus.ack)
291 if hasattr(dbus, "stall"):
292 comb += dcache.wb_in.stall.eq(dbus.stall)
293
294 # update out d data when flag set
295 with m.If(self.d_w_valid):
296 m.d.sync += d_out.data.eq(self.store_data)
297 #with m.Else():
298 # m.d.sync += d_out.data.eq(0)
299 # unit test passes with that change
300
301 # this must move into the FSM, conditionally noticing that
302 # the "blip" comes from self.d_validblip.
303 # task 1: look up in dcache
304 # task 2: if dcache fails, look up in MMU.
305 # do **NOT** confuse the two.
306 with m.If(self.d_validblip):
307 m.d.comb += self.d_out.valid.eq(~exc.happened)
308 m.d.comb += d_out.load.eq(self.req.load)
309 m.d.comb += d_out.byte_sel.eq(self.req.byte_sel)
310 m.d.comb += self.addr.eq(self.req.addr)
311 m.d.comb += d_out.nc.eq(self.req.nc)
312 m.d.comb += d_out.priv_mode.eq(self.req.priv_mode)
313 m.d.comb += d_out.virt_mode.eq(self.req.virt_mode)
314 m.d.comb += self.align_intr.eq(self.req.align_intr)
315 #m.d.comb += Display("validblip dcbz=%i addr=%x",self.req.dcbz,self.req.addr)
316 m.d.comb += d_out.dcbz.eq(self.req.dcbz)
317 with m.Else():
318 m.d.comb += d_out.load.eq(ldst_r.load)
319 m.d.comb += d_out.byte_sel.eq(ldst_r.byte_sel)
320 m.d.comb += self.addr.eq(ldst_r.addr)
321 m.d.comb += d_out.nc.eq(ldst_r.nc)
322 m.d.comb += d_out.priv_mode.eq(ldst_r.priv_mode)
323 m.d.comb += d_out.virt_mode.eq(ldst_r.virt_mode)
324 m.d.comb += self.align_intr.eq(ldst_r.align_intr)
325 #m.d.comb += Display("no_validblip dcbz=%i addr=%x",ldst_r.dcbz,ldst_r.addr)
326 m.d.comb += d_out.dcbz.eq(ldst_r.dcbz)
327
328 # XXX these should be possible to remove but for some reason
329 # cannot be... yet. TODO, investigate
330 m.d.comb += self.load_data.eq(d_in.data)
331 m.d.comb += d_out.addr.eq(self.addr)
332
333 # Update outputs to MMU
334 m.d.comb += m_out.valid.eq(mmureq)
335 m.d.comb += m_out.iside.eq(self.instr_fault)
336 m.d.comb += m_out.load.eq(ldst_r.load)
337 # m_out.priv <= r.priv_mode; TODO
338 m.d.comb += m_out.tlbie.eq(self.tlbie)
339 # m_out.mtspr <= mmu_mtspr; # TODO
340 # m_out.sprn <= sprn; # TODO
341 m.d.comb += m_out.addr.eq(maddr)
342 # m_out.slbia <= l_in.insn(7); # TODO: no idea what this is
343 # m_out.rs <= l_in.data; # nope, probably not needed, TODO investigate
344
345 return m
346
347 def ports(self):
348 yield from super().ports()
349 # TODO: memory ports
350
351
352 class TestSRAMLoadStore1(LoadStore1):
353 def __init__(self, pspec):
354 super().__init__(pspec)
355 pspec = self.pspec
356 # small 32-entry Memory
357 if (hasattr(pspec, "dmem_test_depth") and
358 isinstance(pspec.dmem_test_depth, int)):
359 depth = pspec.dmem_test_depth
360 else:
361 depth = 32
362 print("TestSRAMBareLoadStoreUnit depth", depth)
363
364 self.mem = Memory(width=pspec.reg_wid, depth=depth)
365
366 def elaborate(self, platform):
367 m = super().elaborate(platform)
368 comb = m.d.comb
369 m.submodules.sram = sram = SRAM(memory=self.mem, granularity=8,
370 features={'cti', 'bte', 'err'})
371 dbus = self.dbus
372
373 # directly connect the wishbone bus of LoadStoreUnitInterface to SRAM
374 # note: SRAM is a target (slave), dbus is initiator (master)
375 fanouts = ['dat_w', 'sel', 'cyc', 'stb', 'we', 'cti', 'bte']
376 fanins = ['dat_r', 'ack', 'err']
377 for fanout in fanouts:
378 print("fanout", fanout, getattr(sram.bus, fanout).shape(),
379 getattr(dbus, fanout).shape())
380 comb += getattr(sram.bus, fanout).eq(getattr(dbus, fanout))
381 comb += getattr(sram.bus, fanout).eq(getattr(dbus, fanout))
382 for fanin in fanins:
383 comb += getattr(dbus, fanin).eq(getattr(sram.bus, fanin))
384 # connect address
385 comb += sram.bus.adr.eq(dbus.adr)
386
387 return m
388