commented-out and disabled the set_dcbz_addr function, it is the wrong
[soc.git] / src / soc / fu / ldst / loadstore.py
1 """LoadStore1 FSM.
2
3 based on microwatt loadstore1.vhdl, but conforming to PortInterface.
4 unlike loadstore1.vhdl this does *not* deal with actual Load/Store
5 ops: that job is handled by LDSTCompUnit, which talks to LoadStore1
6 by way of PortInterface. PortInterface is where things need extending,
7 such as adding dcbz support, etc.
8
9 this module basically handles "pure" load / store operations, and
10 its first job is to ask the D-Cache for the data. if that fails,
11 the second task (if virtual memory is enabled) is to ask the MMU
12 to perform a TLB, then to go *back* to the cache and ask again.
13
14 Links:
15
16 * https://bugs.libre-soc.org/show_bug.cgi?id=465
17
18 """
19
20 from nmigen import (Elaboratable, Module, Signal, Shape, unsigned, Cat, Mux,
21 Record, Memory,
22 Const)
23 from nmutil.iocontrol import RecordObject
24 from nmutil.util import rising_edge, Display
25 from enum import Enum, unique
26
27 from soc.experiment.dcache import DCache
28 from soc.experiment.pimem import PortInterfaceBase
29 from soc.experiment.mem_types import LoadStore1ToMMUType
30 from soc.experiment.mem_types import MMUToLoadStore1Type
31
32 from soc.minerva.wishbone import make_wb_layout
33 from soc.bus.sram import SRAM
34 from nmutil.util import Display
35
36
37 @unique
38 class State(Enum):
39 IDLE = 0 # ready for instruction
40 ACK_WAIT = 1 # waiting for ack from dcache
41 MMU_LOOKUP = 2 # waiting for MMU to look up translation
42 TLBIE_WAIT = 3 # waiting for MMU to finish doing a tlbie
43
44
45 # captures the LDSTRequest from the PortInterface, which "blips" most
46 # of this at us (pipeline-style).
47 class LDSTRequest(RecordObject):
48 def __init__(self, name=None):
49 RecordObject.__init__(self, name=name)
50
51 self.load = Signal()
52 self.dcbz = Signal()
53 self.addr = Signal(64)
54 # self.store_data = Signal(64) # this is already sync (on a delay)
55 self.byte_sel = Signal(8)
56 self.nc = Signal() # non-cacheable access
57 self.virt_mode = Signal()
58 self.priv_mode = Signal()
59 self.align_intr = Signal()
60
61 # glue logic for microwatt mmu and dcache
62 class LoadStore1(PortInterfaceBase):
63 def __init__(self, pspec):
64 self.pspec = pspec
65 self.disable_cache = (hasattr(pspec, "disable_cache") and
66 pspec.disable_cache == True)
67 regwid = pspec.reg_wid
68 addrwid = pspec.addr_wid
69
70 super().__init__(regwid, addrwid)
71 self.dcache = DCache()
72 # these names are from the perspective of here (LoadStore1)
73 self.d_out = self.dcache.d_in # in to dcache is out for LoadStore
74 self.d_in = self.dcache.d_out # out from dcache is in for LoadStore
75 self.m_out = LoadStore1ToMMUType() # out *to* MMU
76 self.m_in = MMUToLoadStore1Type() # in *from* MMU
77 self.req = LDSTRequest(name="ldst_req")
78
79 # TODO, convert dcache wb_in/wb_out to "standard" nmigen Wishbone bus
80 self.dbus = Record(make_wb_layout(pspec))
81
82 # for creating a single clock blip to DCache
83 self.d_valid = Signal()
84 self.d_w_valid = Signal()
85 self.d_validblip = Signal()
86
87 # DSISR and DAR cached values. note that the MMU FSM is where
88 # these are accessed by OP_MTSPR/OP_MFSPR, on behalf of LoadStore1.
89 # by contrast microwatt has the spr set/get done *in* loadstore1.vhdl
90 self.dsisr = Signal(64)
91 self.dar = Signal(64)
92
93 # state info for LD/ST
94 self.done = Signal()
95 # latch most of the input request
96 self.load = Signal()
97 self.tlbie = Signal()
98 self.dcbz = Signal()
99 self.addr = Signal(64)
100 self.store_data = Signal(64)
101 self.load_data = Signal(64)
102 self.byte_sel = Signal(8)
103 #self.xerc : xer_common_t;
104 #self.reserve = Signal()
105 #self.atomic = Signal()
106 #self.atomic_last = Signal()
107 #self.rc = Signal()
108 self.nc = Signal() # non-cacheable access
109 self.virt_mode = Signal()
110 self.priv_mode = Signal()
111 self.state = Signal(State)
112 self.instr_fault = Signal()
113 self.align_intr = Signal()
114 self.busy = Signal()
115 self.wait_dcache = Signal()
116 self.wait_mmu = Signal()
117 #self.mode_32bit = Signal()
118 #self.intr_vec : integer range 0 to 16#fff#;
119 #self.nia = Signal(64)
120 #self.srr1 = Signal(16)
121
122 # XXX please don't do it this way (and ask in future).
123 # the exact same logic is required for setting store addresses
124 # as for dcbz addresses, therefore why duplicate code?
125 # it would be better to add an argument to set_wr_addr to
126 # specifiy that it requires dcbz mode to be set.
127 def __please_remove_and_use_set_wr_addr_instead_set_dcbz_addr(self, m, addr):
128 m.d.comb += self.req.load.eq(0) #not a load operation
129 m.d.comb += self.req.dcbz.eq(1)
130 #m.d.comb += self.req.byte_sel.eq(mask)
131 m.d.comb += self.req.addr.eq(addr)
132 m.d.comb += Display("set_dcbz_addr %i",addr)
133 #m.d.comb += self.req.priv_mode.eq(~msr_pr) # not-problem ==> priv
134 #m.d.comb += self.req.virt_mode.eq(msr_pr) # problem-state ==> virt
135 #m.d.comb += self.req.align_intr.eq(misalign)
136 return None
137
138 # XXX please add a dcbz argument to all set_wr_addr functions instead.
139 def set_wr_addr(self, m, addr, mask, misalign, msr_pr):
140 m.d.comb += self.req.load.eq(0) # store operation
141 m.d.comb += self.req.byte_sel.eq(mask)
142 m.d.comb += self.req.addr.eq(addr)
143 m.d.comb += self.req.priv_mode.eq(~msr_pr) # not-problem ==> priv
144 m.d.comb += self.req.virt_mode.eq(msr_pr) # problem-state ==> virt
145 m.d.comb += self.req.align_intr.eq(misalign)
146
147 # option to disable the cache entirely for write
148 if self.disable_cache:
149 m.d.comb += self.req.nc.eq(1)
150 return None
151
152 def set_rd_addr(self, m, addr, mask, misalign, msr_pr):
153 m.d.comb += self.d_valid.eq(1)
154 m.d.comb += self.req.load.eq(1) # load operation
155 m.d.comb += self.req.byte_sel.eq(mask)
156 m.d.comb += self.req.align_intr.eq(misalign)
157 m.d.comb += self.req.addr.eq(addr)
158 m.d.comb += self.req.priv_mode.eq(~msr_pr) # not-problem ==> priv
159 m.d.comb += self.req.virt_mode.eq(msr_pr) # problem-state ==> virt
160 # BAD HACK! disable cacheing on LD when address is 0xCxxx_xxxx
161 # this is for peripherals. same thing done in Microwatt loadstore1.vhdl
162 with m.If(addr[28:] == Const(0xc, 4)):
163 m.d.comb += self.req.nc.eq(1)
164 # option to disable the cache entirely for read
165 if self.disable_cache:
166 m.d.comb += self.req.nc.eq(1)
167 return None #FIXME return value
168
169 def set_wr_data(self, m, data, wen):
170 # do the "blip" on write data
171 m.d.comb += self.d_valid.eq(1)
172 # put data into comb which is picked up in main elaborate()
173 m.d.comb += self.d_w_valid.eq(1)
174 m.d.comb += self.store_data.eq(data)
175 #m.d.sync += self.d_out.byte_sel.eq(wen) # this might not be needed
176 st_ok = self.done # TODO indicates write data is valid
177 return st_ok
178
179 def get_rd_data(self, m):
180 ld_ok = self.done # indicates read data is valid
181 data = self.load_data # actual read data
182 return data, ld_ok
183
184 def elaborate(self, platform):
185 m = super().elaborate(platform)
186 comb, sync = m.d.comb, m.d.sync
187
188 # create dcache module
189 m.submodules.dcache = dcache = self.dcache
190
191 # temp vars
192 d_out, d_in, dbus = self.d_out, self.d_in, self.dbus
193 m_out, m_in = self.m_out, self.m_in
194 exc = self.pi.exc_o
195 exception = exc.happened
196 mmureq = Signal()
197
198 # copy of address, but gets over-ridden for OP_FETCH_FAILED
199 maddr = Signal(64)
200 m.d.comb += maddr.eq(self.addr)
201
202 # create a blip (single pulse) on valid read/write request
203 # this can be over-ridden in the FSM to get dcache to re-run
204 # a request when MMU_LOOKUP completes.
205 m.d.comb += self.d_validblip.eq(rising_edge(m, self.d_valid))
206 ldst_r = LDSTRequest("ldst_r")
207
208 # fsm skeleton
209 with m.Switch(self.state):
210 with m.Case(State.IDLE):
211 with m.If(self.d_validblip & ~exc.happened):
212 comb += self.busy.eq(1)
213 sync += self.state.eq(State.ACK_WAIT)
214 sync += ldst_r.eq(self.req) # copy of LDSTRequest on "blip"
215 with m.Else():
216 sync += ldst_r.eq(0)
217
218 # waiting for completion
219 with m.Case(State.ACK_WAIT):
220 comb += self.busy.eq(~exc.happened)
221
222 with m.If(d_in.error):
223 # cache error is not necessarily "final", it could
224 # be that it was just a TLB miss
225 with m.If(d_in.cache_paradox):
226 comb += exception.eq(1)
227 sync += self.state.eq(State.IDLE)
228 sync += ldst_r.eq(0)
229 sync += self.dsisr[63 - 38].eq(~self.load)
230 # XXX there is no architected bit for this
231 # (probably should be a machine check in fact)
232 sync += self.dsisr[63 - 35].eq(d_in.cache_paradox)
233
234 with m.Else():
235 # Look up the translation for TLB miss
236 # and also for permission error and RC error
237 # in case the PTE has been updated.
238 comb += mmureq.eq(1)
239 sync += self.state.eq(State.MMU_LOOKUP)
240 with m.If(d_in.valid):
241 m.d.comb += self.done.eq(~mmureq) # done if not doing MMU
242 with m.If(self.done):
243 sync += Display("ACK_WAIT, done %x", self.addr)
244 sync += self.state.eq(State.IDLE)
245 sync += ldst_r.eq(0)
246 with m.If(self.load):
247 m.d.comb += self.load_data.eq(d_in.data)
248
249 # waiting here for the MMU TLB lookup to complete.
250 # either re-try the dcache lookup or throw MMU exception
251 with m.Case(State.MMU_LOOKUP):
252 comb += self.busy.eq(1)
253 with m.If(m_in.done):
254 with m.If(~self.instr_fault):
255 sync += Display("MMU_LOOKUP, done %x -> %x",
256 self.addr, d_out.addr)
257 # retry the request now that the MMU has
258 # installed a TLB entry, if not exception raised
259 m.d.comb += self.d_out.valid.eq(~exception)
260 sync += self.state.eq(State.ACK_WAIT)
261 sync += ldst_r.eq(0)
262 with m.Else():
263 sync += Display("MMU_LOOKUP, exception %x", self.addr)
264 # instruction lookup fault: store address in DAR
265 comb += exc.happened.eq(1)
266 sync += self.dar.eq(self.addr)
267
268 with m.If(m_in.err):
269 # MMU RADIX exception thrown
270 comb += exception.eq(1)
271 sync += self.dsisr[63 - 33].eq(m_in.invalid)
272 sync += self.dsisr[63 - 36].eq(m_in.perm_error)
273 sync += self.dsisr[63 - 38].eq(self.load)
274 sync += self.dsisr[63 - 44].eq(m_in.badtree)
275 sync += self.dsisr[63 - 45].eq(m_in.rc_error)
276
277 with m.Case(State.TLBIE_WAIT):
278 pass
279
280 # alignment error: store address in DAR
281 with m.If(self.align_intr):
282 comb += exc.happened.eq(1)
283 sync += self.dar.eq(self.addr)
284
285 # happened, alignment, instr_fault, invalid.
286 # note that all of these flow through - eventually to the TRAP
287 # pipeline, via PowerDecoder2.
288 comb += exc.invalid.eq(m_in.invalid)
289 comb += exc.alignment.eq(self.align_intr)
290 comb += exc.instr_fault.eq(self.instr_fault)
291 # badtree, perm_error, rc_error, segment_fault
292 comb += exc.badtree.eq(m_in.badtree)
293 comb += exc.perm_error.eq(m_in.perm_error)
294 comb += exc.rc_error.eq(m_in.rc_error)
295 comb += exc.segment_fault.eq(m_in.segerr)
296
297 # TODO, connect dcache wb_in/wb_out to "standard" nmigen Wishbone bus
298 comb += dbus.adr.eq(dcache.wb_out.adr)
299 comb += dbus.dat_w.eq(dcache.wb_out.dat)
300 comb += dbus.sel.eq(dcache.wb_out.sel)
301 comb += dbus.cyc.eq(dcache.wb_out.cyc)
302 comb += dbus.stb.eq(dcache.wb_out.stb)
303 comb += dbus.we.eq(dcache.wb_out.we)
304
305 comb += dcache.wb_in.dat.eq(dbus.dat_r)
306 comb += dcache.wb_in.ack.eq(dbus.ack)
307 if hasattr(dbus, "stall"):
308 comb += dcache.wb_in.stall.eq(dbus.stall)
309
310 # update out d data when flag set
311 with m.If(self.d_w_valid):
312 m.d.sync += d_out.data.eq(self.store_data)
313 #with m.Else():
314 # m.d.sync += d_out.data.eq(0)
315 # unit test passes with that change
316
317 # this must move into the FSM, conditionally noticing that
318 # the "blip" comes from self.d_validblip.
319 # task 1: look up in dcache
320 # task 2: if dcache fails, look up in MMU.
321 # do **NOT** confuse the two.
322 with m.If(self.d_validblip):
323 m.d.comb += self.d_out.valid.eq(~exc.happened)
324 m.d.comb += d_out.load.eq(self.req.load)
325 m.d.comb += d_out.byte_sel.eq(self.req.byte_sel)
326 m.d.comb += self.addr.eq(self.req.addr)
327 m.d.comb += d_out.nc.eq(self.req.nc)
328 m.d.comb += d_out.priv_mode.eq(self.req.priv_mode)
329 m.d.comb += d_out.virt_mode.eq(self.req.virt_mode)
330 m.d.comb += self.align_intr.eq(self.req.align_intr)
331 #m.d.comb += Display("validblip dcbz=%i addr=%x",self.req.dcbz,self.req.addr)
332 m.d.comb += d_out.dcbz.eq(self.req.dcbz)
333 with m.Else():
334 m.d.comb += d_out.load.eq(ldst_r.load)
335 m.d.comb += d_out.byte_sel.eq(ldst_r.byte_sel)
336 m.d.comb += self.addr.eq(ldst_r.addr)
337 m.d.comb += d_out.nc.eq(ldst_r.nc)
338 m.d.comb += d_out.priv_mode.eq(ldst_r.priv_mode)
339 m.d.comb += d_out.virt_mode.eq(ldst_r.virt_mode)
340 m.d.comb += self.align_intr.eq(ldst_r.align_intr)
341 #m.d.comb += Display("no_validblip dcbz=%i addr=%x",ldst_r.dcbz,ldst_r.addr)
342 m.d.comb += d_out.dcbz.eq(ldst_r.dcbz)
343
344 # XXX these should be possible to remove but for some reason
345 # cannot be... yet. TODO, investigate
346 m.d.comb += self.load_data.eq(d_in.data)
347 m.d.comb += d_out.addr.eq(self.addr)
348
349 # Update outputs to MMU
350 m.d.comb += m_out.valid.eq(mmureq)
351 m.d.comb += m_out.iside.eq(self.instr_fault)
352 m.d.comb += m_out.load.eq(ldst_r.load)
353 # m_out.priv <= r.priv_mode; TODO
354 m.d.comb += m_out.tlbie.eq(self.tlbie)
355 # m_out.mtspr <= mmu_mtspr; # TODO
356 # m_out.sprn <= sprn; # TODO
357 m.d.comb += m_out.addr.eq(maddr)
358 # m_out.slbia <= l_in.insn(7); # TODO: no idea what this is
359 # m_out.rs <= l_in.data; # nope, probably not needed, TODO investigate
360
361 return m
362
363 def ports(self):
364 yield from super().ports()
365 # TODO: memory ports
366
367
368 class TestSRAMLoadStore1(LoadStore1):
369 def __init__(self, pspec):
370 super().__init__(pspec)
371 pspec = self.pspec
372 # small 32-entry Memory
373 if (hasattr(pspec, "dmem_test_depth") and
374 isinstance(pspec.dmem_test_depth, int)):
375 depth = pspec.dmem_test_depth
376 else:
377 depth = 32
378 print("TestSRAMBareLoadStoreUnit depth", depth)
379
380 self.mem = Memory(width=pspec.reg_wid, depth=depth)
381
382 def elaborate(self, platform):
383 m = super().elaborate(platform)
384 comb = m.d.comb
385 m.submodules.sram = sram = SRAM(memory=self.mem, granularity=8,
386 features={'cti', 'bte', 'err'})
387 dbus = self.dbus
388
389 # directly connect the wishbone bus of LoadStoreUnitInterface to SRAM
390 # note: SRAM is a target (slave), dbus is initiator (master)
391 fanouts = ['dat_w', 'sel', 'cyc', 'stb', 'we', 'cti', 'bte']
392 fanins = ['dat_r', 'ack', 'err']
393 for fanout in fanouts:
394 print("fanout", fanout, getattr(sram.bus, fanout).shape(),
395 getattr(dbus, fanout).shape())
396 comb += getattr(sram.bus, fanout).eq(getattr(dbus, fanout))
397 comb += getattr(sram.bus, fanout).eq(getattr(dbus, fanout))
398 for fanin in fanins:
399 comb += getattr(dbus, fanin).eq(getattr(sram.bus, fanin))
400 # connect address
401 comb += sram.bus.adr.eq(dbus.adr)
402
403 return m
404