mmu FSM store in dcache: only put data onto d_in on write request
[soc.git] / src / soc / fu / mmu / fsm.py
1 from nmigen import Elaboratable, Module, Signal, Shape, unsigned, Cat, Mux
2 from nmigen import Record, Memory
3 from nmigen import Const
4 from soc.fu.mmu.pipe_data import MMUInputData, MMUOutputData, MMUPipeSpec
5 from nmutil.singlepipe import ControlBase
6 from nmutil.util import rising_edge
7
8 from soc.experiment.mmu import MMU
9 from soc.experiment.dcache import DCache
10
11 from openpower.consts import MSR
12 from openpower.decoder.power_fields import DecodeFields
13 from openpower.decoder.power_fieldsn import SignalBitRange
14 from openpower.decoder.power_decoder2 import decode_spr_num
15 from openpower.decoder.power_enums import MicrOp, XER_bits
16
17 from soc.experiment.pimem import PortInterface
18 from soc.experiment.pimem import PortInterfaceBase
19
20 from soc.experiment.mem_types import LoadStore1ToDCacheType, LoadStore1ToMMUType
21 from soc.experiment.mem_types import DCacheToLoadStore1Type, MMUToLoadStore1Type
22
23 from soc.minerva.wishbone import make_wb_layout
24 from soc.bus.sram import SRAM
25
26
27 # glue logic for microwatt mmu and dcache
28 class LoadStore1(PortInterfaceBase):
29 def __init__(self, pspec):
30 self.pspec = pspec
31 regwid = pspec.reg_wid
32 addrwid = pspec.addr_wid
33
34 super().__init__(regwid, addrwid)
35 self.dcache = DCache()
36 self.d_in = self.dcache.d_in
37 self.d_out = self.dcache.d_out
38 self.l_in = LoadStore1ToMMUType()
39 self.l_out = MMUToLoadStore1Type()
40 # TODO microwatt
41 self.mmureq = Signal()
42 self.derror = Signal()
43
44 # TODO, convert dcache wb_in/wb_out to "standard" nmigen Wishbone bus
45 self.dbus = Record(make_wb_layout(pspec))
46
47 # for creating a single clock blip to DCache
48 self.d_valid = Signal()
49 self.d_w_data = Signal(64) # XXX
50 self.d_w_valid = Signal()
51 self.d_validblip = Signal()
52
53 def set_wr_addr(self, m, addr, mask):
54 #m.d.comb += self.l_in.valid.eq(1)
55 #m.d.comb += self.l_in.addr.eq(addr)
56 #m.d.comb += self.l_in.load.eq(0)
57 m.d.comb += self.d_valid.eq(1)
58 m.d.comb += self.d_in.valid.eq(self.d_validblip)
59 m.d.comb += self.d_in.load.eq(0)
60 m.d.comb += self.d_in.byte_sel.eq(mask)
61 m.d.comb += self.d_in.addr.eq(addr)
62 return None
63
64 def set_rd_addr(self, m, addr, mask):
65 #m.d.comb += self.l_in.valid.eq(1)
66 #m.d.comb += self.l_in.load.eq(1)
67 #m.d.comb += self.l_in.addr.eq(addr)
68 m.d.comb += self.d_valid.eq(1)
69 m.d.comb += self.d_in.valid.eq(self.d_validblip)
70 m.d.comb += self.d_in.load.eq(1)
71 m.d.comb += self.d_in.byte_sel.eq(mask)
72 m.d.comb += self.d_in.addr.eq(addr)
73 # BAD HACK! disable cacheing on LD when address is 0xCxxx_xxxx
74 # this is for peripherals. same thing done in Microwatt loadstore1.vhdl
75 with m.If(addr[28:] == 0xc):
76 m.d.comb += self.d_in.nc.eq(1)
77 return None #FIXME return value
78
79 def set_wr_data(self, m, data, wen):
80 # put data into comb which is picked up in main elaborate()
81 m.d.comb += self.d_w_valid.eq(1)
82 m.d.comb += self.d_w_data.eq(data)
83 #m.d.sync += self.d_in.byte_sel.eq(wen) # this might not be needed
84 #st_ok = self.d_out.valid # TODO indicates write data is valid
85 st_ok = Const(1, 1)
86 return st_ok
87
88 def get_rd_data(self, m):
89 ld_ok = self.d_out.valid # indicates read data is valid
90 data = self.d_out.data # actual read data
91 return data, ld_ok
92
93 """
94 if d_in.error = '1' then
95 if d_in.cache_paradox = '1' then
96 -- signal an interrupt straight away
97 exception := '1';
98 dsisr(63 - 38) := not r2.req.load;
99 -- XXX there is no architected bit for this
100 -- (probably should be a machine check in fact)
101 dsisr(63 - 35) := d_in.cache_paradox;
102 else
103 -- Look up the translation for TLB miss
104 -- and also for permission error and RC error
105 -- in case the PTE has been updated.
106 mmureq := '1';
107 v.state := MMU_LOOKUP;
108 v.stage1_en := '0';
109 end if;
110 end if;
111 """
112
113 def elaborate(self, platform):
114 m = super().elaborate(platform)
115 comb = m.d.comb
116
117 # create dcache module
118 m.submodules.dcache = dcache = self.dcache
119
120 # temp vars
121 d_out, l_out, dbus = self.d_out, self.l_out, self.dbus
122
123 with m.If(d_out.error):
124 with m.If(d_out.cache_paradox):
125 comb += self.derror.eq(1)
126 # dsisr(63 - 38) := not r2.req.load;
127 # -- XXX there is no architected bit for this
128 # -- (probably should be a machine check in fact)
129 # dsisr(63 - 35) := d_in.cache_paradox;
130 with m.Else():
131 # Look up the translation for TLB miss
132 # and also for permission error and RC error
133 # in case the PTE has been updated.
134 comb += self.mmureq.eq(1)
135 # v.state := MMU_LOOKUP;
136 # v.stage1_en := '0';
137
138 exc = self.pi.exception_o
139
140 #happened, alignment, instr_fault, invalid,
141 comb += exc.happened.eq(d_out.error | l_out.err)
142 comb += exc.invalid.eq(l_out.invalid)
143
144 #badtree, perm_error, rc_error, segment_fault
145 comb += exc.badtree.eq(l_out.badtree)
146 comb += exc.perm_error.eq(l_out.perm_error)
147 comb += exc.rc_error.eq(l_out.rc_error)
148 comb += exc.segment_fault.eq(l_out.segerr)
149
150 # TODO connect those signals somewhere
151 #print(d_out.valid) -> no error
152 #print(d_out.store_done) -> no error
153 #print(d_out.cache_paradox) -> ?
154 #print(l_out.done) -> no error
155
156 # TODO some exceptions set SPRs
157
158 # TODO, connect dcache wb_in/wb_out to "standard" nmigen Wishbone bus
159 comb += dbus.adr.eq(dcache.wb_out.adr)
160 comb += dbus.dat_w.eq(dcache.wb_out.dat)
161 comb += dbus.sel.eq(dcache.wb_out.sel)
162 comb += dbus.cyc.eq(dcache.wb_out.cyc)
163 comb += dbus.stb.eq(dcache.wb_out.stb)
164 comb += dbus.we.eq(dcache.wb_out.we)
165
166 comb += dcache.wb_in.dat.eq(dbus.dat_r)
167 comb += dcache.wb_in.ack.eq(dbus.ack)
168 if hasattr(dbus, "stall"):
169 comb += dcache.wb_in.stall.eq(dbus.stall)
170
171 # create a blip (single pulse) on valid read/write request
172 m.d.comb += self.d_validblip.eq(rising_edge(m, self.d_valid))
173
174 # write out d data only when flag set
175 with m.If(self.d_w_valid):
176 m.d.sync += self.d_in.data.eq(self.d_w_data)
177 with m.Else():
178 m.d.sync += self.d_in.data.eq(0)
179
180 return m
181
182 def ports(self):
183 yield from super().ports()
184 # TODO: memory ports
185
186
187 class TestSRAMLoadStore1(LoadStore1):
188 def __init__(self, pspec):
189 super().__init__(pspec)
190 pspec = self.pspec
191 # small 32-entry Memory
192 if (hasattr(pspec, "dmem_test_depth") and
193 isinstance(pspec.dmem_test_depth, int)):
194 depth = pspec.dmem_test_depth
195 else:
196 depth = 32
197 print("TestSRAMBareLoadStoreUnit depth", depth)
198
199 self.mem = Memory(width=pspec.reg_wid, depth=depth)
200
201 def elaborate(self, platform):
202 m = super().elaborate(platform)
203 comb = m.d.comb
204 m.submodules.sram = sram = SRAM(memory=self.mem, granularity=8,
205 features={'cti', 'bte', 'err'})
206 dbus = self.dbus
207
208 # directly connect the wishbone bus of LoadStoreUnitInterface to SRAM
209 # note: SRAM is a target (slave), dbus is initiator (master)
210 fanouts = ['dat_w', 'sel', 'cyc', 'stb', 'we', 'cti', 'bte']
211 fanins = ['dat_r', 'ack', 'err']
212 for fanout in fanouts:
213 print("fanout", fanout, getattr(sram.bus, fanout).shape(),
214 getattr(dbus, fanout).shape())
215 comb += getattr(sram.bus, fanout).eq(getattr(dbus, fanout))
216 comb += getattr(sram.bus, fanout).eq(getattr(dbus, fanout))
217 for fanin in fanins:
218 comb += getattr(dbus, fanin).eq(getattr(sram.bus, fanin))
219 # connect address
220 comb += sram.bus.adr.eq(dbus.adr)
221
222 return m
223
224
225 class FSMMMUStage(ControlBase):
226 """FSM MMU
227
228 FSM-based MMU: must call set_ldst_interface and pass in an instance
229 of a LoadStore1. this to comply with the ConfigMemoryPortInterface API
230 """
231 def __init__(self, pspec):
232 super().__init__()
233 self.pspec = pspec
234
235 # set up p/n data
236 self.p.data_i = MMUInputData(pspec)
237 self.n.data_o = MMUOutputData(pspec)
238
239 # this Function Unit is extremely unusual in that it actually stores a
240 # "thing" rather than "processes inputs and produces outputs". hence
241 # why it has to be a FSM. linking up LD/ST however is going to have
242 # to be done back in Issuer (or Core)
243
244 self.mmu = MMU()
245
246 # make life a bit easier in Core XXX mustn't really do this,
247 # pspec is designed for config variables, rather than passing
248 # things around. have to think about it, design a way to do
249 # it that makes "sense"
250 # comment out for now self.pspec.mmu = self.mmu
251 # comment out for now self.pspec.dcache = self.dcache
252
253 # debugging output for gtkw
254 self.debug0 = Signal(4)
255 self.illegal = Signal()
256
257 # for SPR field number access
258 i = self.p.data_i
259 self.fields = DecodeFields(SignalBitRange, [i.ctx.op.insn])
260 self.fields.create_specs()
261
262 def set_ldst_interface(self, ldst):
263 """must be called back in Core, after FUs have been set up.
264 one of those will be the MMU (us!) but the LoadStore1 instance
265 must be set up in ConfigMemoryPortInterface. sigh.
266 """
267 # incoming PortInterface
268 self.ldst = ldst
269 self.dcache = self.ldst.dcache
270 self.pi = self.ldst.pi
271
272 def elaborate(self, platform):
273 assert hasattr(self, "dcache"), "remember to call set_ldst_interface"
274 m = super().elaborate(platform)
275 comb = m.d.comb
276 dcache = self.dcache
277
278 # link mmu and dcache together
279 m.submodules.mmu = mmu = self.mmu
280 ldst = self.ldst # managed externally: do not add here
281 m.d.comb += dcache.m_in.eq(mmu.d_out) # MMUToDCacheType
282 m.d.comb += mmu.d_in.eq(dcache.m_out) # DCacheToMMUType
283
284 l_in, l_out = mmu.l_in, mmu.l_out
285 d_in, d_out = dcache.d_in, dcache.d_out
286 wb_out, wb_in = dcache.wb_out, dcache.wb_in
287
288 # link ldst and MMU together
289 comb += l_in.eq(ldst.l_in)
290 comb += ldst.l_out.eq(l_out)
291
292 data_i, data_o = self.p.data_i, self.n.data_o
293 a_i, b_i, o, spr1_o = data_i.ra, data_i.rb, data_o.o, data_o.spr1
294 op = data_i.ctx.op
295 msr_i = op.msr
296
297 # TODO: link these SPRs somewhere
298 dsisr = Signal(64)
299 dar = Signal(64)
300
301 # busy/done signals
302 busy = Signal()
303 done = Signal()
304 m.d.comb += self.n.valid_o.eq(busy & done)
305 m.d.comb += self.p.ready_o.eq(~busy)
306
307 # take copy of X-Form SPR field
308 x_fields = self.fields.FormXFX
309 spr = Signal(len(x_fields.SPR))
310 comb += spr.eq(decode_spr_num(x_fields.SPR))
311
312 # based on MSR bits, set priv and virt mode. TODO: 32-bit mode
313 comb += d_in.priv_mode.eq(~msr_i[MSR.PR])
314 comb += d_in.virt_mode.eq(msr_i[MSR.DR])
315 #comb += d_in.mode_32bit.eq(msr_i[MSR.SF]) # ?? err
316
317 # ok so we have to "pulse" the MMU (or dcache) rather than
318 # hold the valid hi permanently. guess what this does...
319 valid = Signal()
320 blip = Signal()
321 m.d.comb += blip.eq(rising_edge(m, valid))
322
323 with m.If(~busy):
324 with m.If(self.p.valid_i):
325 m.d.sync += busy.eq(1)
326 with m.Else():
327
328 # based on the Micro-Op, we work out which of MMU or DCache
329 # should "action" the operation. one of MMU or DCache gets
330 # enabled ("valid") and we twiddle our thumbs until it
331 # responds ("done").
332
333 # FIXME: properly implement MicrOp.OP_MTSPR and MicrOp.OP_MFSPR
334
335 with m.Switch(op.insn_type):
336 with m.Case(MicrOp.OP_MTSPR):
337 # despite redirection this FU **MUST** behave exactly
338 # like the SPR FU. this **INCLUDES** updating the SPR
339 # regfile because the CSV file entry for OP_MTSPR
340 # categorically defines and requires the expectation
341 # that the CompUnit **WILL** write to the regfile.
342 comb += spr1_o.data.eq(spr)
343 comb += spr1_o.ok.eq(1)
344 # subset SPR: first check a few bits
345 with m.If(~spr[9] & ~spr[5]):
346 comb += self.debug0.eq(3)
347 with m.If(spr[0]):
348 comb += dsisr.eq(a_i[:32])
349 with m.Else():
350 comb += dar.eq(a_i)
351 comb += done.eq(1)
352 # pass it over to the MMU instead
353 with m.Else():
354 comb += self.debug0.eq(4)
355 # blip the MMU and wait for it to complete
356 comb += valid.eq(1) # start "pulse"
357 comb += l_in.valid.eq(blip) # start
358 comb += l_in.mtspr.eq(1) # mtspr mode
359 comb += l_in.sprn.eq(spr) # which SPR
360 comb += l_in.rs.eq(a_i) # incoming operand (RS)
361 comb += done.eq(1) # FIXME l_out.done
362
363 with m.Case(MicrOp.OP_MFSPR):
364 # subset SPR: first check a few bits
365 with m.If(~spr[9] & ~spr[5]):
366 comb += self.debug0.eq(5)
367 with m.If(spr[0]):
368 comb += o.data.eq(dsisr)
369 with m.Else():
370 comb += o.data.eq(dar)
371 comb += o.ok.eq(1)
372 comb += done.eq(1)
373 # pass it over to the MMU instead
374 with m.Else():
375 comb += self.debug0.eq(6)
376 # blip the MMU and wait for it to complete
377 comb += valid.eq(1) # start "pulse"
378 comb += l_in.valid.eq(blip) # start
379 comb += l_in.mtspr.eq(0) # mfspr!=mtspr
380 comb += l_in.sprn.eq(spr) # which SPR
381 comb += l_in.rs.eq(a_i) # incoming operand (RS)
382 comb += o.data.eq(l_out.sprval) # SPR from MMU
383 comb += o.ok.eq(l_out.done) # only when l_out valid
384 comb += done.eq(1) # FIXME l_out.done
385
386 # XXX this one is going to have to go through LDSTCompUnit
387 # because it's LDST that has control over dcache
388 # (through PortInterface). or, another means is devised
389 # so as not to have double-drivers of d_in.valid and addr
390 #
391 #with m.Case(MicrOp.OP_DCBZ):
392 # # activate dcbz mode (spec: v3.0B p850)
393 # comb += valid.eq(1) # start "pulse"
394 # comb += d_in.valid.eq(blip) # start
395 # comb += d_in.dcbz.eq(1) # dcbz mode
396 # comb += d_in.addr.eq(a_i + b_i) # addr is (RA|0) + RB
397 # comb += done.eq(d_out.store_done) # TODO
398 # comb += self.debug0.eq(1)
399
400 with m.Case(MicrOp.OP_TLBIE):
401 # pass TLBIE request to MMU (spec: v3.0B p1034)
402 # note that the spr is *not* an actual spr number, it's
403 # just that those bits happen to match with field bits
404 # RIC, PRS, R
405 comb += valid.eq(1) # start "pulse"
406 comb += l_in.valid.eq(blip) # start
407 comb += l_in.tlbie.eq(1) # mtspr mode
408 comb += l_in.sprn.eq(spr) # use sprn to send insn bits
409 comb += l_in.addr.eq(b_i) # incoming operand (RB)
410 comb += done.eq(l_out.done) # zzzz
411 comb += self.debug0.eq(2)
412
413 with m.Case(MicrOp.OP_ILLEGAL):
414 comb += self.illegal.eq(1)
415
416 with m.If(self.n.ready_i & self.n.valid_o):
417 m.d.sync += busy.eq(0)
418
419 return m
420
421 def __iter__(self):
422 yield from self.p
423 yield from self.n
424
425 def ports(self):
426 return list(self)