dcbz symbol rename
[soc.git] / src / soc / fu / ldst / loadstore.py
1 """LoadStore1 FSM.
2
3 based on microwatt loadstore1.vhdl, but conforming to PortInterface.
4 unlike loadstore1.vhdl this does *not* deal with actual Load/Store
5 ops: that job is handled by LDSTCompUnit, which talks to LoadStore1
6 by way of PortInterface. PortInterface is where things need extending,
7 such as adding dcbz support, etc.
8
9 this module basically handles "pure" load / store operations, and
10 its first job is to ask the D-Cache for the data. if that fails,
11 the second task (if virtual memory is enabled) is to ask the MMU
12 to perform a TLB, then to go *back* to the cache and ask again.
13
14 Links:
15
16 * https://bugs.libre-soc.org/show_bug.cgi?id=465
17
18 """
19
20 from nmigen import (Elaboratable, Module, Signal, Shape, unsigned, Cat, Mux,
21 Record, Memory,
22 Const)
23 from nmutil.iocontrol import RecordObject
24 from nmutil.util import rising_edge, Display
25 from enum import Enum, unique
26
27 from soc.experiment.dcache import DCache
28 from soc.experiment.pimem import PortInterfaceBase
29 from soc.experiment.mem_types import LoadStore1ToMMUType
30 from soc.experiment.mem_types import MMUToLoadStore1Type
31
32 from soc.minerva.wishbone import make_wb_layout
33 from soc.bus.sram import SRAM
34 from nmutil.util import Display
35
36
37 @unique
38 class State(Enum):
39 IDLE = 0 # ready for instruction
40 ACK_WAIT = 1 # waiting for ack from dcache
41 MMU_LOOKUP = 2 # waiting for MMU to look up translation
42 TLBIE_WAIT = 3 # waiting for MMU to finish doing a tlbie
43
44
45 # captures the LDSTRequest from the PortInterface, which "blips" most
46 # of this at us (pipeline-style).
47 class LDSTRequest(RecordObject):
48 def __init__(self, name=None):
49 RecordObject.__init__(self, name=name)
50
51 self.load = Signal()
52 self.dcbz = Signal()
53 self.addr = Signal(64)
54 # self.store_data = Signal(64) # this is already sync (on a delay)
55 self.byte_sel = Signal(8)
56 self.nc = Signal() # non-cacheable access
57 self.virt_mode = Signal()
58 self.priv_mode = Signal()
59 self.align_intr = Signal()
60
61 # glue logic for microwatt mmu and dcache
62 class LoadStore1(PortInterfaceBase):
63 def __init__(self, pspec):
64 self.pspec = pspec
65 self.disable_cache = (hasattr(pspec, "disable_cache") and
66 pspec.disable_cache == True)
67 regwid = pspec.reg_wid
68 addrwid = pspec.addr_wid
69
70 super().__init__(regwid, addrwid)
71 self.dcache = DCache()
72 # these names are from the perspective of here (LoadStore1)
73 self.d_out = self.dcache.d_in # in to dcache is out for LoadStore
74 self.d_in = self.dcache.d_out # out from dcache is in for LoadStore
75 self.m_out = LoadStore1ToMMUType() # out *to* MMU
76 self.m_in = MMUToLoadStore1Type() # in *from* MMU
77 self.req = LDSTRequest(name="ldst_req")
78
79 # TODO, convert dcache wb_in/wb_out to "standard" nmigen Wishbone bus
80 self.dbus = Record(make_wb_layout(pspec))
81
82 # for creating a single clock blip to DCache
83 self.d_valid = Signal()
84 self.d_w_valid = Signal()
85 self.d_validblip = Signal()
86
87 # DSISR and DAR cached values. note that the MMU FSM is where
88 # these are accessed by OP_MTSPR/OP_MFSPR, on behalf of LoadStore1.
89 # by contrast microwatt has the spr set/get done *in* loadstore1.vhdl
90 self.dsisr = Signal(64)
91 self.dar = Signal(64)
92
93 # state info for LD/ST
94 self.done = Signal()
95 # latch most of the input request
96 self.load = Signal()
97 self.tlbie = Signal()
98 self.dcbz = Signal()
99 self.addr = Signal(64)
100 self.store_data = Signal(64)
101 self.load_data = Signal(64)
102 self.byte_sel = Signal(8)
103 #self.xerc : xer_common_t;
104 #self.reserve = Signal()
105 #self.atomic = Signal()
106 #self.atomic_last = Signal()
107 #self.rc = Signal()
108 self.nc = Signal() # non-cacheable access
109 self.virt_mode = Signal()
110 self.priv_mode = Signal()
111 self.state = Signal(State)
112 self.instr_fault = Signal()
113 self.align_intr = Signal()
114 self.busy = Signal()
115 self.wait_dcache = Signal()
116 self.wait_mmu = Signal()
117 #self.mode_32bit = Signal()
118 #self.intr_vec : integer range 0 to 16#fff#;
119 #self.nia = Signal(64)
120 #self.srr1 = Signal(16)
121
122 def set_dcbz_addr(self, m, addr):
123 m.d.comb += self.req.load.eq(0) #not a load operation
124 m.d.comb += self.req.dcbz.eq(1)
125 #m.d.comb += self.req.byte_sel.eq(mask)
126 m.d.comb += self.req.addr.eq(addr)
127 m.d.comb += Display("set_dcbz_addr %i",addr)
128 #m.d.comb += self.req.priv_mode.eq(~msr_pr) # not-problem ==> priv
129 #m.d.comb += self.req.virt_mode.eq(msr_pr) # problem-state ==> virt
130 #m.d.comb += self.req.align_intr.eq(misalign)
131 return None
132
133 def set_wr_addr(self, m, addr, mask, misalign, msr_pr):
134 m.d.comb += self.req.load.eq(0) # store operation
135 m.d.comb += self.req.byte_sel.eq(mask)
136 m.d.comb += self.req.addr.eq(addr)
137 m.d.comb += self.req.priv_mode.eq(~msr_pr) # not-problem ==> priv
138 m.d.comb += self.req.virt_mode.eq(msr_pr) # problem-state ==> virt
139 m.d.comb += self.req.align_intr.eq(misalign)
140
141 # option to disable the cache entirely for write
142 if self.disable_cache:
143 m.d.comb += self.req.nc.eq(1)
144 return None
145
146 def set_rd_addr(self, m, addr, mask, misalign, msr_pr):
147 m.d.comb += self.d_valid.eq(1)
148 m.d.comb += self.req.load.eq(1) # load operation
149 m.d.comb += self.req.byte_sel.eq(mask)
150 m.d.comb += self.req.align_intr.eq(misalign)
151 m.d.comb += self.req.addr.eq(addr)
152 m.d.comb += self.req.priv_mode.eq(~msr_pr) # not-problem ==> priv
153 m.d.comb += self.req.virt_mode.eq(msr_pr) # problem-state ==> virt
154 # BAD HACK! disable cacheing on LD when address is 0xCxxx_xxxx
155 # this is for peripherals. same thing done in Microwatt loadstore1.vhdl
156 with m.If(addr[28:] == Const(0xc, 4)):
157 m.d.comb += self.req.nc.eq(1)
158 # option to disable the cache entirely for read
159 if self.disable_cache:
160 m.d.comb += self.req.nc.eq(1)
161 return None #FIXME return value
162
163 def set_wr_data(self, m, data, wen):
164 # do the "blip" on write data
165 m.d.comb += self.d_valid.eq(1)
166 # put data into comb which is picked up in main elaborate()
167 m.d.comb += self.d_w_valid.eq(1)
168 m.d.comb += self.store_data.eq(data)
169 #m.d.sync += self.d_out.byte_sel.eq(wen) # this might not be needed
170 st_ok = self.done # TODO indicates write data is valid
171 return st_ok
172
173 def get_rd_data(self, m):
174 ld_ok = self.done # indicates read data is valid
175 data = self.load_data # actual read data
176 return data, ld_ok
177
178 def elaborate(self, platform):
179 m = super().elaborate(platform)
180 comb, sync = m.d.comb, m.d.sync
181
182 # create dcache module
183 m.submodules.dcache = dcache = self.dcache
184
185 # temp vars
186 d_out, d_in, dbus = self.d_out, self.d_in, self.dbus
187 m_out, m_in = self.m_out, self.m_in
188 exc = self.pi.exc_o
189 exception = exc.happened
190 mmureq = Signal()
191
192 # copy of address, but gets over-ridden for OP_FETCH_FAILED
193 maddr = Signal(64)
194 m.d.comb += maddr.eq(self.addr)
195
196 # create a blip (single pulse) on valid read/write request
197 # this can be over-ridden in the FSM to get dcache to re-run
198 # a request when MMU_LOOKUP completes.
199 m.d.comb += self.d_validblip.eq(rising_edge(m, self.d_valid))
200 ldst_r = LDSTRequest("ldst_r")
201
202 # fsm skeleton
203 with m.Switch(self.state):
204 with m.Case(State.IDLE):
205 with m.If(self.d_validblip & ~exc.happened):
206 comb += self.busy.eq(1)
207 sync += self.state.eq(State.ACK_WAIT)
208 sync += ldst_r.eq(self.req) # copy of LDSTRequest on "blip"
209 with m.Else():
210 sync += ldst_r.eq(0)
211
212 # waiting for completion
213 with m.Case(State.ACK_WAIT):
214 comb += self.busy.eq(~exc.happened)
215
216 with m.If(d_in.error):
217 # cache error is not necessarily "final", it could
218 # be that it was just a TLB miss
219 with m.If(d_in.cache_paradox):
220 comb += exception.eq(1)
221 sync += self.state.eq(State.IDLE)
222 sync += ldst_r.eq(0)
223 sync += self.dsisr[63 - 38].eq(~self.load)
224 # XXX there is no architected bit for this
225 # (probably should be a machine check in fact)
226 sync += self.dsisr[63 - 35].eq(d_in.cache_paradox)
227
228 with m.Else():
229 # Look up the translation for TLB miss
230 # and also for permission error and RC error
231 # in case the PTE has been updated.
232 comb += mmureq.eq(1)
233 sync += self.state.eq(State.MMU_LOOKUP)
234 with m.If(d_in.valid):
235 m.d.comb += self.done.eq(~mmureq) # done if not doing MMU
236 with m.If(self.done):
237 sync += Display("ACK_WAIT, done %x", self.addr)
238 sync += self.state.eq(State.IDLE)
239 sync += ldst_r.eq(0)
240 with m.If(self.load):
241 m.d.comb += self.load_data.eq(d_in.data)
242
243 # waiting here for the MMU TLB lookup to complete.
244 # either re-try the dcache lookup or throw MMU exception
245 with m.Case(State.MMU_LOOKUP):
246 comb += self.busy.eq(1)
247 with m.If(m_in.done):
248 with m.If(~self.instr_fault):
249 sync += Display("MMU_LOOKUP, done %x -> %x",
250 self.addr, d_out.addr)
251 # retry the request now that the MMU has
252 # installed a TLB entry, if not exception raised
253 m.d.comb += self.d_out.valid.eq(~exception)
254 sync += self.state.eq(State.ACK_WAIT)
255 sync += ldst_r.eq(0)
256 with m.Else():
257 sync += Display("MMU_LOOKUP, exception %x", self.addr)
258 # instruction lookup fault: store address in DAR
259 comb += exc.happened.eq(1)
260 sync += self.dar.eq(self.addr)
261
262 with m.If(m_in.err):
263 # MMU RADIX exception thrown
264 comb += exception.eq(1)
265 sync += self.dsisr[63 - 33].eq(m_in.invalid)
266 sync += self.dsisr[63 - 36].eq(m_in.perm_error)
267 sync += self.dsisr[63 - 38].eq(self.load)
268 sync += self.dsisr[63 - 44].eq(m_in.badtree)
269 sync += self.dsisr[63 - 45].eq(m_in.rc_error)
270
271 with m.Case(State.TLBIE_WAIT):
272 pass
273
274 # alignment error: store address in DAR
275 with m.If(self.align_intr):
276 comb += exc.happened.eq(1)
277 sync += self.dar.eq(self.addr)
278
279 # happened, alignment, instr_fault, invalid.
280 # note that all of these flow through - eventually to the TRAP
281 # pipeline, via PowerDecoder2.
282 comb += exc.invalid.eq(m_in.invalid)
283 comb += exc.alignment.eq(self.align_intr)
284 comb += exc.instr_fault.eq(self.instr_fault)
285 # badtree, perm_error, rc_error, segment_fault
286 comb += exc.badtree.eq(m_in.badtree)
287 comb += exc.perm_error.eq(m_in.perm_error)
288 comb += exc.rc_error.eq(m_in.rc_error)
289 comb += exc.segment_fault.eq(m_in.segerr)
290
291 # TODO, connect dcache wb_in/wb_out to "standard" nmigen Wishbone bus
292 comb += dbus.adr.eq(dcache.wb_out.adr)
293 comb += dbus.dat_w.eq(dcache.wb_out.dat)
294 comb += dbus.sel.eq(dcache.wb_out.sel)
295 comb += dbus.cyc.eq(dcache.wb_out.cyc)
296 comb += dbus.stb.eq(dcache.wb_out.stb)
297 comb += dbus.we.eq(dcache.wb_out.we)
298
299 comb += dcache.wb_in.dat.eq(dbus.dat_r)
300 comb += dcache.wb_in.ack.eq(dbus.ack)
301 if hasattr(dbus, "stall"):
302 comb += dcache.wb_in.stall.eq(dbus.stall)
303
304 # update out d data when flag set
305 with m.If(self.d_w_valid):
306 m.d.sync += d_out.data.eq(self.store_data)
307 #with m.Else():
308 # m.d.sync += d_out.data.eq(0)
309 # unit test passes with that change
310
311 # this must move into the FSM, conditionally noticing that
312 # the "blip" comes from self.d_validblip.
313 # task 1: look up in dcache
314 # task 2: if dcache fails, look up in MMU.
315 # do **NOT** confuse the two.
316 with m.If(self.d_validblip):
317 m.d.comb += self.d_out.valid.eq(~exc.happened)
318 m.d.comb += d_out.load.eq(self.req.load)
319 m.d.comb += d_out.byte_sel.eq(self.req.byte_sel)
320 m.d.comb += self.addr.eq(self.req.addr)
321 m.d.comb += d_out.nc.eq(self.req.nc)
322 m.d.comb += d_out.priv_mode.eq(self.req.priv_mode)
323 m.d.comb += d_out.virt_mode.eq(self.req.virt_mode)
324 m.d.comb += self.align_intr.eq(self.req.align_intr)
325 #m.d.comb += Display("validblip dcbz=%i addr=%x",self.req.dcbz,self.req.addr)
326 m.d.comb += d_out.dcbz.eq(self.req.dcbz)
327 with m.Else():
328 m.d.comb += d_out.load.eq(ldst_r.load)
329 m.d.comb += d_out.byte_sel.eq(ldst_r.byte_sel)
330 m.d.comb += self.addr.eq(ldst_r.addr)
331 m.d.comb += d_out.nc.eq(ldst_r.nc)
332 m.d.comb += d_out.priv_mode.eq(ldst_r.priv_mode)
333 m.d.comb += d_out.virt_mode.eq(ldst_r.virt_mode)
334 m.d.comb += self.align_intr.eq(ldst_r.align_intr)
335 #m.d.comb += Display("no_validblip dcbz=%i addr=%x",ldst_r.dcbz,ldst_r.addr)
336 m.d.comb += d_out.dcbz.eq(ldst_r.dcbz)
337
338 # XXX these should be possible to remove but for some reason
339 # cannot be... yet. TODO, investigate
340 m.d.comb += self.load_data.eq(d_in.data)
341 m.d.comb += d_out.addr.eq(self.addr)
342
343 # Update outputs to MMU
344 m.d.comb += m_out.valid.eq(mmureq)
345 m.d.comb += m_out.iside.eq(self.instr_fault)
346 m.d.comb += m_out.load.eq(ldst_r.load)
347 # m_out.priv <= r.priv_mode; TODO
348 m.d.comb += m_out.tlbie.eq(self.tlbie)
349 # m_out.mtspr <= mmu_mtspr; # TODO
350 # m_out.sprn <= sprn; # TODO
351 m.d.comb += m_out.addr.eq(maddr)
352 # m_out.slbia <= l_in.insn(7); # TODO: no idea what this is
353 # m_out.rs <= l_in.data; # nope, probably not needed, TODO investigate
354
355 return m
356
357 def ports(self):
358 yield from super().ports()
359 # TODO: memory ports
360
361
362 class TestSRAMLoadStore1(LoadStore1):
363 def __init__(self, pspec):
364 super().__init__(pspec)
365 pspec = self.pspec
366 # small 32-entry Memory
367 if (hasattr(pspec, "dmem_test_depth") and
368 isinstance(pspec.dmem_test_depth, int)):
369 depth = pspec.dmem_test_depth
370 else:
371 depth = 32
372 print("TestSRAMBareLoadStoreUnit depth", depth)
373
374 self.mem = Memory(width=pspec.reg_wid, depth=depth)
375
376 def elaborate(self, platform):
377 m = super().elaborate(platform)
378 comb = m.d.comb
379 m.submodules.sram = sram = SRAM(memory=self.mem, granularity=8,
380 features={'cti', 'bte', 'err'})
381 dbus = self.dbus
382
383 # directly connect the wishbone bus of LoadStoreUnitInterface to SRAM
384 # note: SRAM is a target (slave), dbus is initiator (master)
385 fanouts = ['dat_w', 'sel', 'cyc', 'stb', 'we', 'cti', 'bte']
386 fanins = ['dat_r', 'ack', 'err']
387 for fanout in fanouts:
388 print("fanout", fanout, getattr(sram.bus, fanout).shape(),
389 getattr(dbus, fanout).shape())
390 comb += getattr(sram.bus, fanout).eq(getattr(dbus, fanout))
391 comb += getattr(sram.bus, fanout).eq(getattr(dbus, fanout))
392 for fanin in fanins:
393 comb += getattr(dbus, fanin).eq(getattr(sram.bus, fanin))
394 # connect address
395 comb += sram.bus.adr.eq(dbus.adr)
396
397 return m
398