loadstore.py: add function set_dcbz_addr
[soc.git] / src / soc / fu / ldst / loadstore.py
1 """LoadStore1 FSM.
2
3 based on microwatt loadstore1.vhdl, but conforming to PortInterface.
4 unlike loadstore1.vhdl this does *not* deal with actual Load/Store
5 ops: that job is handled by LDSTCompUnit, which talks to LoadStore1
6 by way of PortInterface. PortInterface is where things need extending,
7 such as adding dcbz support, etc.
8
9 this module basically handles "pure" load / store operations, and
10 its first job is to ask the D-Cache for the data. if that fails,
11 the second task (if virtual memory is enabled) is to ask the MMU
12 to perform a TLB, then to go *back* to the cache and ask again.
13
14 Links:
15
16 * https://bugs.libre-soc.org/show_bug.cgi?id=465
17
18 """
19
20 from nmigen import (Elaboratable, Module, Signal, Shape, unsigned, Cat, Mux,
21 Record, Memory,
22 Const)
23 from nmutil.iocontrol import RecordObject
24 from nmutil.util import rising_edge, Display
25 from enum import Enum, unique
26
27 from soc.experiment.dcache import DCache
28 from soc.experiment.pimem import PortInterfaceBase
29 from soc.experiment.mem_types import LoadStore1ToMMUType
30 from soc.experiment.mem_types import MMUToLoadStore1Type
31
32 from soc.minerva.wishbone import make_wb_layout
33 from soc.bus.sram import SRAM
34 from nmutil.util import Display
35
36
37 @unique
38 class State(Enum):
39 IDLE = 0 # ready for instruction
40 ACK_WAIT = 1 # waiting for ack from dcache
41 MMU_LOOKUP = 2 # waiting for MMU to look up translation
42 TLBIE_WAIT = 3 # waiting for MMU to finish doing a tlbie
43
44
45 # captures the LDSTRequest from the PortInterface, which "blips" most
46 # of this at us (pipeline-style).
47 class LDSTRequest(RecordObject):
48 def __init__(self, name=None):
49 RecordObject.__init__(self, name=name)
50
51 self.load = Signal()
52 self.dcbz = Signal()
53 self.addr = Signal(64)
54 # self.store_data = Signal(64) # this is already sync (on a delay)
55 self.byte_sel = Signal(8)
56 self.nc = Signal() # non-cacheable access
57 self.virt_mode = Signal()
58 self.priv_mode = Signal()
59 self.align_intr = Signal()
60
61 # glue logic for microwatt mmu and dcache
62 class LoadStore1(PortInterfaceBase):
63 def __init__(self, pspec):
64 self.pspec = pspec
65 self.disable_cache = (hasattr(pspec, "disable_cache") and
66 pspec.disable_cache == True)
67 regwid = pspec.reg_wid
68 addrwid = pspec.addr_wid
69
70 super().__init__(regwid, addrwid)
71 self.dcache = DCache()
72 # these names are from the perspective of here (LoadStore1)
73 self.d_out = self.dcache.d_in # in to dcache is out for LoadStore
74 self.d_in = self.dcache.d_out # out from dcache is in for LoadStore
75 self.m_out = LoadStore1ToMMUType() # out *to* MMU
76 self.m_in = MMUToLoadStore1Type() # in *from* MMU
77 self.req = LDSTRequest(name="ldst_req")
78
79 # TODO, convert dcache wb_in/wb_out to "standard" nmigen Wishbone bus
80 self.dbus = Record(make_wb_layout(pspec))
81
82 # for creating a single clock blip to DCache
83 self.d_valid = Signal()
84 self.d_w_valid = Signal()
85 self.d_validblip = Signal()
86
87 # DSISR and DAR cached values. note that the MMU FSM is where
88 # these are accessed by OP_MTSPR/OP_MFSPR, on behalf of LoadStore1.
89 # by contrast microwatt has the spr set/get done *in* loadstore1.vhdl
90 self.dsisr = Signal(64)
91 self.dar = Signal(64)
92
93 # state info for LD/ST
94 self.done = Signal()
95 # latch most of the input request
96 self.load = Signal()
97 self.tlbie = Signal()
98 self.dcbz = Signal()
99 self.addr = Signal(64)
100 self.store_data = Signal(64)
101 self.load_data = Signal(64)
102 self.byte_sel = Signal(8)
103 #self.xerc : xer_common_t;
104 #self.reserve = Signal()
105 #self.atomic = Signal()
106 #self.atomic_last = Signal()
107 #self.rc = Signal()
108 self.nc = Signal() # non-cacheable access
109 self.virt_mode = Signal()
110 self.priv_mode = Signal()
111 self.state = Signal(State)
112 self.instr_fault = Signal()
113 self.align_intr = Signal()
114 self.busy = Signal()
115 self.wait_dcache = Signal()
116 self.wait_mmu = Signal()
117 #self.mode_32bit = Signal()
118 #self.intr_vec : integer range 0 to 16#fff#;
119 #self.nia = Signal(64)
120 #self.srr1 = Signal(16)
121
122 def set_dcbz_addr(self, m, addr):
123 m.d.comb += self.req.load.eq(0) #not a load operation
124 m.d.comb += self.req.dcbz.eq(1)
125 #m.d.comb += self.req.byte_sel.eq(mask)
126 m.d.comb += self.req.addr.eq(addr)
127 m.d.comb += Display("set_dcbz_addr %i",addr)
128 #m.d.comb += self.req.priv_mode.eq(~msr_pr) # not-problem ==> priv
129 #m.d.comb += self.req.virt_mode.eq(msr_pr) # problem-state ==> virt
130 #m.d.comb += self.req.align_intr.eq(misalign)
131 return None
132
133 def set_wr_addr(self, m, addr, mask, misalign, msr_pr):
134 m.d.comb += self.req.load.eq(0) # store operation
135 m.d.comb += self.req.byte_sel.eq(mask)
136 m.d.comb += self.req.addr.eq(addr)
137 m.d.comb += self.req.priv_mode.eq(~msr_pr) # not-problem ==> priv
138 m.d.comb += self.req.virt_mode.eq(msr_pr) # problem-state ==> virt
139 m.d.comb += self.req.align_intr.eq(misalign)
140
141 dcbz = self.pi.is_dcbz
142 with m.If(dcbz):
143 m.d.comb += Display("set_wr_addr: is_dcbz")
144 m.d.comb += self.req.dcbz.eq(dcbz)
145
146 # option to disable the cache entirely for write
147 if self.disable_cache:
148 m.d.comb += self.req.nc.eq(1)
149 return None
150
151 def set_rd_addr(self, m, addr, mask, misalign, msr_pr):
152 m.d.comb += self.d_valid.eq(1)
153 m.d.comb += self.req.load.eq(1) # load operation
154 m.d.comb += self.req.byte_sel.eq(mask)
155 m.d.comb += self.req.align_intr.eq(misalign)
156 m.d.comb += self.req.addr.eq(addr)
157 m.d.comb += self.req.priv_mode.eq(~msr_pr) # not-problem ==> priv
158 m.d.comb += self.req.virt_mode.eq(msr_pr) # problem-state ==> virt
159 # BAD HACK! disable cacheing on LD when address is 0xCxxx_xxxx
160 # this is for peripherals. same thing done in Microwatt loadstore1.vhdl
161 with m.If(addr[28:] == Const(0xc, 4)):
162 m.d.comb += self.req.nc.eq(1)
163 # option to disable the cache entirely for read
164 if self.disable_cache:
165 m.d.comb += self.req.nc.eq(1)
166 return None #FIXME return value
167
168 def set_wr_data(self, m, data, wen):
169 # do the "blip" on write data
170 m.d.comb += self.d_valid.eq(1)
171 # put data into comb which is picked up in main elaborate()
172 m.d.comb += self.d_w_valid.eq(1)
173 m.d.comb += self.store_data.eq(data)
174 #m.d.sync += self.d_out.byte_sel.eq(wen) # this might not be needed
175 st_ok = self.done # TODO indicates write data is valid
176 return st_ok
177
178 def get_rd_data(self, m):
179 ld_ok = self.done # indicates read data is valid
180 data = self.load_data # actual read data
181 return data, ld_ok
182
183 def elaborate(self, platform):
184 m = super().elaborate(platform)
185 comb, sync = m.d.comb, m.d.sync
186
187 # create dcache module
188 m.submodules.dcache = dcache = self.dcache
189
190 # temp vars
191 d_out, d_in, dbus = self.d_out, self.d_in, self.dbus
192 m_out, m_in = self.m_out, self.m_in
193 exc = self.pi.exc_o
194 exception = exc.happened
195 mmureq = Signal()
196
197 # copy of address, but gets over-ridden for OP_FETCH_FAILED
198 maddr = Signal(64)
199 m.d.comb += maddr.eq(self.addr)
200
201 # create a blip (single pulse) on valid read/write request
202 # this can be over-ridden in the FSM to get dcache to re-run
203 # a request when MMU_LOOKUP completes.
204 m.d.comb += self.d_validblip.eq(rising_edge(m, self.d_valid))
205 ldst_r = LDSTRequest("ldst_r")
206
207 # fsm skeleton
208 with m.Switch(self.state):
209 with m.Case(State.IDLE):
210 with m.If(self.d_validblip & ~exc.happened):
211 comb += self.busy.eq(1)
212 sync += self.state.eq(State.ACK_WAIT)
213 sync += ldst_r.eq(self.req) # copy of LDSTRequest on "blip"
214 with m.Else():
215 sync += ldst_r.eq(0)
216
217 # waiting for completion
218 with m.Case(State.ACK_WAIT):
219 comb += self.busy.eq(~exc.happened)
220
221 with m.If(d_in.error):
222 # cache error is not necessarily "final", it could
223 # be that it was just a TLB miss
224 with m.If(d_in.cache_paradox):
225 comb += exception.eq(1)
226 sync += self.state.eq(State.IDLE)
227 sync += ldst_r.eq(0)
228 sync += self.dsisr[63 - 38].eq(~self.load)
229 # XXX there is no architected bit for this
230 # (probably should be a machine check in fact)
231 sync += self.dsisr[63 - 35].eq(d_in.cache_paradox)
232
233 with m.Else():
234 # Look up the translation for TLB miss
235 # and also for permission error and RC error
236 # in case the PTE has been updated.
237 comb += mmureq.eq(1)
238 sync += self.state.eq(State.MMU_LOOKUP)
239 with m.If(d_in.valid):
240 m.d.comb += self.done.eq(~mmureq) # done if not doing MMU
241 with m.If(self.done):
242 sync += Display("ACK_WAIT, done %x", self.addr)
243 sync += self.state.eq(State.IDLE)
244 sync += ldst_r.eq(0)
245 with m.If(self.load):
246 m.d.comb += self.load_data.eq(d_in.data)
247
248 # waiting here for the MMU TLB lookup to complete.
249 # either re-try the dcache lookup or throw MMU exception
250 with m.Case(State.MMU_LOOKUP):
251 comb += self.busy.eq(1)
252 with m.If(m_in.done):
253 with m.If(~self.instr_fault):
254 sync += Display("MMU_LOOKUP, done %x -> %x",
255 self.addr, d_out.addr)
256 # retry the request now that the MMU has
257 # installed a TLB entry, if not exception raised
258 m.d.comb += self.d_out.valid.eq(~exception)
259 sync += self.state.eq(State.ACK_WAIT)
260 sync += ldst_r.eq(0)
261 with m.Else():
262 sync += Display("MMU_LOOKUP, exception %x", self.addr)
263 # instruction lookup fault: store address in DAR
264 comb += exc.happened.eq(1)
265 sync += self.dar.eq(self.addr)
266
267 with m.If(m_in.err):
268 # MMU RADIX exception thrown
269 comb += exception.eq(1)
270 sync += self.dsisr[63 - 33].eq(m_in.invalid)
271 sync += self.dsisr[63 - 36].eq(m_in.perm_error)
272 sync += self.dsisr[63 - 38].eq(self.load)
273 sync += self.dsisr[63 - 44].eq(m_in.badtree)
274 sync += self.dsisr[63 - 45].eq(m_in.rc_error)
275
276 with m.Case(State.TLBIE_WAIT):
277 pass
278
279 # alignment error: store address in DAR
280 with m.If(self.align_intr):
281 comb += exc.happened.eq(1)
282 sync += self.dar.eq(self.addr)
283
284 # happened, alignment, instr_fault, invalid.
285 # note that all of these flow through - eventually to the TRAP
286 # pipeline, via PowerDecoder2.
287 comb += exc.invalid.eq(m_in.invalid)
288 comb += exc.alignment.eq(self.align_intr)
289 comb += exc.instr_fault.eq(self.instr_fault)
290 # badtree, perm_error, rc_error, segment_fault
291 comb += exc.badtree.eq(m_in.badtree)
292 comb += exc.perm_error.eq(m_in.perm_error)
293 comb += exc.rc_error.eq(m_in.rc_error)
294 comb += exc.segment_fault.eq(m_in.segerr)
295
296 # TODO, connect dcache wb_in/wb_out to "standard" nmigen Wishbone bus
297 comb += dbus.adr.eq(dcache.wb_out.adr)
298 comb += dbus.dat_w.eq(dcache.wb_out.dat)
299 comb += dbus.sel.eq(dcache.wb_out.sel)
300 comb += dbus.cyc.eq(dcache.wb_out.cyc)
301 comb += dbus.stb.eq(dcache.wb_out.stb)
302 comb += dbus.we.eq(dcache.wb_out.we)
303
304 comb += dcache.wb_in.dat.eq(dbus.dat_r)
305 comb += dcache.wb_in.ack.eq(dbus.ack)
306 if hasattr(dbus, "stall"):
307 comb += dcache.wb_in.stall.eq(dbus.stall)
308
309 # update out d data when flag set
310 with m.If(self.d_w_valid):
311 m.d.sync += d_out.data.eq(self.store_data)
312 #with m.Else():
313 # m.d.sync += d_out.data.eq(0)
314 # unit test passes with that change
315
316 # this must move into the FSM, conditionally noticing that
317 # the "blip" comes from self.d_validblip.
318 # task 1: look up in dcache
319 # task 2: if dcache fails, look up in MMU.
320 # do **NOT** confuse the two.
321 with m.If(self.d_validblip):
322 m.d.comb += self.d_out.valid.eq(~exc.happened)
323 m.d.comb += d_out.load.eq(self.req.load)
324 m.d.comb += d_out.byte_sel.eq(self.req.byte_sel)
325 m.d.comb += self.addr.eq(self.req.addr)
326 m.d.comb += d_out.nc.eq(self.req.nc)
327 m.d.comb += d_out.priv_mode.eq(self.req.priv_mode)
328 m.d.comb += d_out.virt_mode.eq(self.req.virt_mode)
329 m.d.comb += self.align_intr.eq(self.req.align_intr)
330 #m.d.comb += Display("validblip dcbz=%i addr=%x",self.req.dcbz,self.req.addr)
331 m.d.comb += d_out.dcbz.eq(self.req.dcbz)
332 with m.Else():
333 m.d.comb += d_out.load.eq(ldst_r.load)
334 m.d.comb += d_out.byte_sel.eq(ldst_r.byte_sel)
335 m.d.comb += self.addr.eq(ldst_r.addr)
336 m.d.comb += d_out.nc.eq(ldst_r.nc)
337 m.d.comb += d_out.priv_mode.eq(ldst_r.priv_mode)
338 m.d.comb += d_out.virt_mode.eq(ldst_r.virt_mode)
339 m.d.comb += self.align_intr.eq(ldst_r.align_intr)
340 #m.d.comb += Display("no_validblip dcbz=%i addr=%x",ldst_r.dcbz,ldst_r.addr)
341 m.d.comb += d_out.dcbz.eq(ldst_r.dcbz)
342
343 # XXX these should be possible to remove but for some reason
344 # cannot be... yet. TODO, investigate
345 m.d.comb += self.load_data.eq(d_in.data)
346 m.d.comb += d_out.addr.eq(self.addr)
347
348 # Update outputs to MMU
349 m.d.comb += m_out.valid.eq(mmureq)
350 m.d.comb += m_out.iside.eq(self.instr_fault)
351 m.d.comb += m_out.load.eq(ldst_r.load)
352 # m_out.priv <= r.priv_mode; TODO
353 m.d.comb += m_out.tlbie.eq(self.tlbie)
354 # m_out.mtspr <= mmu_mtspr; # TODO
355 # m_out.sprn <= sprn; # TODO
356 m.d.comb += m_out.addr.eq(maddr)
357 # m_out.slbia <= l_in.insn(7); # TODO: no idea what this is
358 # m_out.rs <= l_in.data; # nope, probably not needed, TODO investigate
359
360 return m
361
362 def ports(self):
363 yield from super().ports()
364 # TODO: memory ports
365
366
367 class TestSRAMLoadStore1(LoadStore1):
368 def __init__(self, pspec):
369 super().__init__(pspec)
370 pspec = self.pspec
371 # small 32-entry Memory
372 if (hasattr(pspec, "dmem_test_depth") and
373 isinstance(pspec.dmem_test_depth, int)):
374 depth = pspec.dmem_test_depth
375 else:
376 depth = 32
377 print("TestSRAMBareLoadStoreUnit depth", depth)
378
379 self.mem = Memory(width=pspec.reg_wid, depth=depth)
380
381 def elaborate(self, platform):
382 m = super().elaborate(platform)
383 comb = m.d.comb
384 m.submodules.sram = sram = SRAM(memory=self.mem, granularity=8,
385 features={'cti', 'bte', 'err'})
386 dbus = self.dbus
387
388 # directly connect the wishbone bus of LoadStoreUnitInterface to SRAM
389 # note: SRAM is a target (slave), dbus is initiator (master)
390 fanouts = ['dat_w', 'sel', 'cyc', 'stb', 'we', 'cti', 'bte']
391 fanins = ['dat_r', 'ack', 'err']
392 for fanout in fanouts:
393 print("fanout", fanout, getattr(sram.bus, fanout).shape(),
394 getattr(dbus, fanout).shape())
395 comb += getattr(sram.bus, fanout).eq(getattr(dbus, fanout))
396 comb += getattr(sram.bus, fanout).eq(getattr(dbus, fanout))
397 for fanin in fanins:
398 comb += getattr(dbus, fanin).eq(getattr(sram.bus, fanin))
399 # connect address
400 comb += sram.bus.adr.eq(dbus.adr)
401
402 return m
403