sort out LoadStore1 misalignment FSM, also required test function pi_ld
[soc.git] / src / soc / fu / ldst / loadstore.py
1 """LoadStore1 FSM.
2
3 based on microwatt loadstore1.vhdl, but conforming to PortInterface.
4 unlike loadstore1.vhdl this does *not* deal with actual Load/Store
5 ops: that job is handled by LDSTCompUnit, which talks to LoadStore1
6 by way of PortInterface. PortInterface is where things need extending,
7 such as adding dcbz support, etc.
8
9 this module basically handles "pure" load / store operations, and
10 its first job is to ask the D-Cache for the data. if that fails,
11 the second task (if virtual memory is enabled) is to ask the MMU
12 to perform a TLB, then to go *back* to the cache and ask again.
13
14 Links:
15
16 * https://bugs.libre-soc.org/show_bug.cgi?id=465
17
18 """
19
20 from nmigen import (Elaboratable, Module, Signal, Shape, unsigned, Cat, Mux,
21 Record, Memory,
22 Const)
23 from nmutil.iocontrol import RecordObject
24 from nmutil.util import rising_edge
25 from enum import Enum, unique
26
27 from soc.experiment.dcache import DCache
28 from soc.experiment.pimem import PortInterfaceBase
29 from soc.experiment.mem_types import LoadStore1ToMMUType
30 from soc.experiment.mem_types import MMUToLoadStore1Type
31
32 from soc.minerva.wishbone import make_wb_layout
33 from soc.bus.sram import SRAM
34 from nmutil.util import Display
35
36
37 @unique
38 class State(Enum):
39 IDLE = 0 # ready for instruction
40 ACK_WAIT = 1 # waiting for ack from dcache
41 MMU_LOOKUP = 2 # waiting for MMU to look up translation
42 TLBIE_WAIT = 3 # waiting for MMU to finish doing a tlbie
43
44
45 # captures the LDSTRequest from the PortInterface, which "blips" most
46 # of this at us (pipeline-style).
47 class LDSTRequest(RecordObject):
48 def __init__(self, name=None):
49 RecordObject.__init__(self, name=name)
50
51 self.load = Signal()
52 self.dcbz = Signal()
53 self.addr = Signal(64)
54 # self.store_data = Signal(64) # this is already sync (on a delay)
55 self.byte_sel = Signal(8)
56 self.nc = Signal() # non-cacheable access
57 self.virt_mode = Signal()
58 self.priv_mode = Signal()
59 self.align_intr = Signal()
60
61 # glue logic for microwatt mmu and dcache
62 class LoadStore1(PortInterfaceBase):
63 def __init__(self, pspec):
64 self.pspec = pspec
65 self.disable_cache = (hasattr(pspec, "disable_cache") and
66 pspec.disable_cache == True)
67 regwid = pspec.reg_wid
68 addrwid = pspec.addr_wid
69
70 super().__init__(regwid, addrwid)
71 self.dcache = DCache()
72 # these names are from the perspective of here (LoadStore1)
73 self.d_out = self.dcache.d_in # in to dcache is out for LoadStore
74 self.d_in = self.dcache.d_out # out from dcache is in for LoadStore
75 self.m_out = LoadStore1ToMMUType() # out *to* MMU
76 self.m_in = MMUToLoadStore1Type() # in *from* MMU
77 self.req = LDSTRequest(name="ldst_req")
78
79 # TODO, convert dcache wb_in/wb_out to "standard" nmigen Wishbone bus
80 self.dbus = Record(make_wb_layout(pspec))
81
82 # for creating a single clock blip to DCache
83 self.d_valid = Signal()
84 self.d_w_valid = Signal()
85 self.d_validblip = Signal()
86
87 # DSISR and DAR cached values. note that the MMU FSM is where
88 # these are accessed by OP_MTSPR/OP_MFSPR, on behalf of LoadStore1.
89 # by contrast microwatt has the spr set/get done *in* loadstore1.vhdl
90 self.dsisr = Signal(64)
91 self.dar = Signal(64)
92
93 # state info for LD/ST
94 self.done = Signal()
95 # latch most of the input request
96 self.load = Signal()
97 self.tlbie = Signal()
98 self.dcbz = Signal()
99 self.addr = Signal(64)
100 self.store_data = Signal(64)
101 self.load_data = Signal(64)
102 self.byte_sel = Signal(8)
103 #self.xerc : xer_common_t;
104 #self.reserve = Signal()
105 #self.atomic = Signal()
106 #self.atomic_last = Signal()
107 #self.rc = Signal()
108 self.nc = Signal() # non-cacheable access
109 self.virt_mode = Signal()
110 self.priv_mode = Signal()
111 self.state = Signal(State)
112 self.instr_fault = Signal()
113 self.align_intr = Signal()
114 self.busy = Signal()
115 self.wait_dcache = Signal()
116 self.wait_mmu = Signal()
117 #self.mode_32bit = Signal()
118 #self.intr_vec : integer range 0 to 16#fff#;
119 #self.nia = Signal(64)
120 #self.srr1 = Signal(16)
121
122 def set_wr_addr(self, m, addr, mask, misalign, msr_pr):
123 m.d.comb += self.req.load.eq(0) # store operation
124 m.d.comb += self.req.byte_sel.eq(mask)
125 m.d.comb += self.req.addr.eq(addr)
126 m.d.comb += self.req.priv_mode.eq(~msr_pr) # not-problem ==> priv
127 m.d.comb += self.req.virt_mode.eq(msr_pr) # problem-state ==> virt
128 m.d.comb += self.req.align_intr.eq(misalign)
129 # option to disable the cache entirely for write
130 if self.disable_cache:
131 m.d.comb += self.req.nc.eq(1)
132 return None
133
134 def set_rd_addr(self, m, addr, mask, misalign, msr_pr):
135 m.d.comb += self.d_valid.eq(1)
136 m.d.comb += self.req.load.eq(1) # load operation
137 m.d.comb += self.req.byte_sel.eq(mask)
138 m.d.comb += self.req.align_intr.eq(misalign)
139 m.d.comb += self.req.addr.eq(addr)
140 m.d.comb += self.req.priv_mode.eq(~msr_pr) # not-problem ==> priv
141 m.d.comb += self.req.virt_mode.eq(msr_pr) # problem-state ==> virt
142 # BAD HACK! disable cacheing on LD when address is 0xCxxx_xxxx
143 # this is for peripherals. same thing done in Microwatt loadstore1.vhdl
144 with m.If(addr[28:] == Const(0xc, 4)):
145 m.d.comb += self.req.nc.eq(1)
146 # option to disable the cache entirely for read
147 if self.disable_cache:
148 m.d.comb += self.req.nc.eq(1)
149 return None #FIXME return value
150
151 def set_wr_data(self, m, data, wen):
152 # do the "blip" on write data
153 m.d.comb += self.d_valid.eq(1)
154 # put data into comb which is picked up in main elaborate()
155 m.d.comb += self.d_w_valid.eq(1)
156 m.d.comb += self.store_data.eq(data)
157 #m.d.sync += self.d_out.byte_sel.eq(wen) # this might not be needed
158 st_ok = self.done # TODO indicates write data is valid
159 return st_ok
160
161 def get_rd_data(self, m):
162 ld_ok = self.done # indicates read data is valid
163 data = self.load_data # actual read data
164 return data, ld_ok
165
166 def elaborate(self, platform):
167 m = super().elaborate(platform)
168 comb, sync = m.d.comb, m.d.sync
169
170 # create dcache module
171 m.submodules.dcache = dcache = self.dcache
172
173 # temp vars
174 d_out, d_in, dbus = self.d_out, self.d_in, self.dbus
175 m_out, m_in = self.m_out, self.m_in
176 exc = self.pi.exc_o
177 exception = exc.happened
178 mmureq = Signal()
179
180 # copy of address, but gets over-ridden for OP_FETCH_FAILED
181 maddr = Signal(64)
182 m.d.comb += maddr.eq(self.addr)
183
184 # create a blip (single pulse) on valid read/write request
185 # this can be over-ridden in the FSM to get dcache to re-run
186 # a request when MMU_LOOKUP completes.
187 m.d.comb += self.d_validblip.eq(rising_edge(m, self.d_valid))
188 ldst_r = LDSTRequest("ldst_r")
189
190 # fsm skeleton
191 with m.Switch(self.state):
192 with m.Case(State.IDLE):
193 with m.If(self.d_validblip & ~exc.happened):
194 comb += self.busy.eq(1)
195 sync += self.state.eq(State.ACK_WAIT)
196 sync += ldst_r.eq(self.req) # copy of LDSTRequest on "blip"
197 with m.Else():
198 sync += ldst_r.eq(0)
199
200 # waiting for completion
201 with m.Case(State.ACK_WAIT):
202 comb += self.busy.eq(~exc.happened)
203
204 with m.If(d_in.error):
205 # cache error is not necessarily "final", it could
206 # be that it was just a TLB miss
207 with m.If(d_in.cache_paradox):
208 comb += exception.eq(1)
209 sync += self.state.eq(State.IDLE)
210 sync += self.dsisr[63 - 38].eq(~self.load)
211 # XXX there is no architected bit for this
212 # (probably should be a machine check in fact)
213 sync += self.dsisr[63 - 35].eq(d_in.cache_paradox)
214
215 with m.Else():
216 # Look up the translation for TLB miss
217 # and also for permission error and RC error
218 # in case the PTE has been updated.
219 comb += mmureq.eq(1)
220 sync += self.state.eq(State.MMU_LOOKUP)
221 with m.If(d_in.valid):
222 m.d.comb += self.done.eq(~mmureq) # done if not doing MMU
223 with m.If(self.done):
224 sync += Display("ACK_WAIT, done %x", self.addr)
225 sync += self.state.eq(State.IDLE)
226 with m.If(self.load):
227 m.d.comb += self.load_data.eq(d_in.data)
228
229 # waiting here for the MMU TLB lookup to complete.
230 # either re-try the dcache lookup or throw MMU exception
231 with m.Case(State.MMU_LOOKUP):
232 comb += self.busy.eq(1)
233 with m.If(m_in.done):
234 with m.If(~self.instr_fault):
235 sync += Display("MMU_LOOKUP, done %x -> %x",
236 self.addr, d_out.addr)
237 # retry the request now that the MMU has
238 # installed a TLB entry, if not exception raised
239 m.d.comb += self.d_out.valid.eq(~exception)
240 sync += self.state.eq(State.ACK_WAIT)
241 with m.Else():
242 sync += Display("MMU_LOOKUP, exception %x", self.addr)
243 # instruction lookup fault: store address in DAR
244 comb += exc.happened.eq(1)
245 sync += self.dar.eq(self.addr)
246
247 with m.If(m_in.err):
248 # MMU RADIX exception thrown
249 comb += exception.eq(1)
250 sync += self.dsisr[63 - 33].eq(m_in.invalid)
251 sync += self.dsisr[63 - 36].eq(m_in.perm_error)
252 sync += self.dsisr[63 - 38].eq(self.load)
253 sync += self.dsisr[63 - 44].eq(m_in.badtree)
254 sync += self.dsisr[63 - 45].eq(m_in.rc_error)
255
256 with m.Case(State.TLBIE_WAIT):
257 pass
258
259 # alignment error: store address in DAR
260 with m.If(self.align_intr):
261 comb += exc.happened.eq(1)
262 sync += self.dar.eq(self.addr)
263
264 # happened, alignment, instr_fault, invalid.
265 # note that all of these flow through - eventually to the TRAP
266 # pipeline, via PowerDecoder2.
267 comb += exc.invalid.eq(m_in.invalid)
268 comb += exc.alignment.eq(self.align_intr)
269 comb += exc.instr_fault.eq(self.instr_fault)
270 # badtree, perm_error, rc_error, segment_fault
271 comb += exc.badtree.eq(m_in.badtree)
272 comb += exc.perm_error.eq(m_in.perm_error)
273 comb += exc.rc_error.eq(m_in.rc_error)
274 comb += exc.segment_fault.eq(m_in.segerr)
275
276 # TODO, connect dcache wb_in/wb_out to "standard" nmigen Wishbone bus
277 comb += dbus.adr.eq(dcache.wb_out.adr)
278 comb += dbus.dat_w.eq(dcache.wb_out.dat)
279 comb += dbus.sel.eq(dcache.wb_out.sel)
280 comb += dbus.cyc.eq(dcache.wb_out.cyc)
281 comb += dbus.stb.eq(dcache.wb_out.stb)
282 comb += dbus.we.eq(dcache.wb_out.we)
283
284 comb += dcache.wb_in.dat.eq(dbus.dat_r)
285 comb += dcache.wb_in.ack.eq(dbus.ack)
286 if hasattr(dbus, "stall"):
287 comb += dcache.wb_in.stall.eq(dbus.stall)
288
289 # write out d data only when flag set
290 with m.If(self.d_w_valid):
291 m.d.sync += d_out.data.eq(self.store_data)
292 with m.Else():
293 m.d.sync += d_out.data.eq(0)
294
295 # this must move into the FSM, conditionally noticing that
296 # the "blip" comes from self.d_validblip.
297 # task 1: look up in dcache
298 # task 2: if dcache fails, look up in MMU.
299 # do **NOT** confuse the two.
300 with m.If(self.d_validblip):
301 m.d.comb += self.d_out.valid.eq(~exc.happened)
302 m.d.comb += d_out.load.eq(self.req.load)
303 m.d.comb += d_out.byte_sel.eq(self.req.byte_sel)
304 m.d.comb += self.addr.eq(self.req.addr)
305 m.d.comb += d_out.nc.eq(self.req.nc)
306 m.d.comb += d_out.priv_mode.eq(self.req.priv_mode)
307 m.d.comb += d_out.virt_mode.eq(self.req.virt_mode)
308 m.d.comb += self.align_intr.eq(self.req.align_intr)
309 with m.Else():
310 m.d.comb += d_out.load.eq(ldst_r.load)
311 m.d.comb += d_out.byte_sel.eq(ldst_r.byte_sel)
312 m.d.comb += self.addr.eq(ldst_r.addr)
313 m.d.comb += d_out.nc.eq(ldst_r.nc)
314 m.d.comb += d_out.priv_mode.eq(ldst_r.priv_mode)
315 m.d.comb += d_out.virt_mode.eq(ldst_r.virt_mode)
316 m.d.comb += self.align_intr.eq(ldst_r.align_intr)
317
318 # XXX these should be possible to remove but for some reason
319 # cannot be... yet. TODO, investigate
320 m.d.comb += self.load_data.eq(d_in.data)
321 m.d.comb += d_out.addr.eq(self.addr)
322
323 # Update outputs to MMU
324 m.d.comb += m_out.valid.eq(mmureq)
325 m.d.comb += m_out.iside.eq(self.instr_fault)
326 m.d.comb += m_out.load.eq(ldst_r.load)
327 # m_out.priv <= r.priv_mode; TODO
328 m.d.comb += m_out.tlbie.eq(self.tlbie)
329 # m_out.mtspr <= mmu_mtspr; # TODO
330 # m_out.sprn <= sprn; # TODO
331 m.d.comb += m_out.addr.eq(maddr)
332 # m_out.slbia <= l_in.insn(7); # TODO: no idea what this is
333 # m_out.rs <= l_in.data; # nope, probably not needed, TODO investigate
334
335 return m
336
337 def ports(self):
338 yield from super().ports()
339 # TODO: memory ports
340
341
342 class TestSRAMLoadStore1(LoadStore1):
343 def __init__(self, pspec):
344 super().__init__(pspec)
345 pspec = self.pspec
346 # small 32-entry Memory
347 if (hasattr(pspec, "dmem_test_depth") and
348 isinstance(pspec.dmem_test_depth, int)):
349 depth = pspec.dmem_test_depth
350 else:
351 depth = 32
352 print("TestSRAMBareLoadStoreUnit depth", depth)
353
354 self.mem = Memory(width=pspec.reg_wid, depth=depth)
355
356 def elaborate(self, platform):
357 m = super().elaborate(platform)
358 comb = m.d.comb
359 m.submodules.sram = sram = SRAM(memory=self.mem, granularity=8,
360 features={'cti', 'bte', 'err'})
361 dbus = self.dbus
362
363 # directly connect the wishbone bus of LoadStoreUnitInterface to SRAM
364 # note: SRAM is a target (slave), dbus is initiator (master)
365 fanouts = ['dat_w', 'sel', 'cyc', 'stb', 'we', 'cti', 'bte']
366 fanins = ['dat_r', 'ack', 'err']
367 for fanout in fanouts:
368 print("fanout", fanout, getattr(sram.bus, fanout).shape(),
369 getattr(dbus, fanout).shape())
370 comb += getattr(sram.bus, fanout).eq(getattr(dbus, fanout))
371 comb += getattr(sram.bus, fanout).eq(getattr(dbus, fanout))
372 for fanin in fanins:
373 comb += getattr(dbus, fanin).eq(getattr(sram.bus, fanin))
374 # connect address
375 comb += sram.bus.adr.eq(dbus.adr)
376
377 return m
378