comments on dcache-to-mmu link
[soc.git] / src / soc / fu / mmu / fsm.py
1 from nmigen import Elaboratable, Module, Signal, Shape, unsigned, Cat, Mux
2 from nmigen import Record, Memory
3 from nmigen import Const
4 from soc.fu.mmu.pipe_data import MMUInputData, MMUOutputData, MMUPipeSpec
5 from nmutil.singlepipe import ControlBase
6 from nmutil.util import rising_edge
7
8 from soc.experiment.mmu import MMU
9 from soc.experiment.dcache import DCache
10
11 from openpower.consts import MSR
12 from openpower.decoder.power_fields import DecodeFields
13 from openpower.decoder.power_fieldsn import SignalBitRange
14 from openpower.decoder.power_decoder2 import decode_spr_num
15 from openpower.decoder.power_enums import MicrOp, XER_bits
16
17 from soc.experiment.pimem import PortInterface
18 from soc.experiment.pimem import PortInterfaceBase
19
20 from soc.experiment.mem_types import LoadStore1ToDCacheType, LoadStore1ToMMUType
21 from soc.experiment.mem_types import DCacheToLoadStore1Type, MMUToLoadStore1Type
22
23 from soc.minerva.wishbone import make_wb_layout
24 from soc.bus.sram import SRAM
25
26
27 # glue logic for microwatt mmu and dcache
28 class LoadStore1(PortInterfaceBase):
29 def __init__(self, pspec):
30 self.pspec = pspec
31 regwid = pspec.reg_wid
32 addrwid = pspec.addr_wid
33
34 super().__init__(regwid, addrwid)
35 self.dcache = DCache()
36 self.d_in = self.dcache.d_in
37 self.d_out = self.dcache.d_out
38 self.l_in = LoadStore1ToMMUType()
39 self.l_out = MMUToLoadStore1Type()
40 # for debugging with gtkwave only
41 self.debug1 = Signal()
42 self.debug2 = Signal()
43 # TODO microwatt
44 self.mmureq = Signal()
45 self.derror = Signal()
46
47 # TODO, convert dcache wb_in/wb_out to "standard" nmigen Wishbone bus
48 self.dbus = Record(make_wb_layout(pspec))
49
50 def set_wr_addr(self, m, addr, mask):
51 #m.d.comb += self.d_in.valid.eq(1)
52 #m.d.comb += self.l_in.valid.eq(1)
53 #m.d.comb += self.d_in.load.eq(0)
54 #m.d.comb += self.l_in.load.eq(0)
55 # set phys addr on both units
56 m.d.comb += self.d_in.addr.eq(addr)
57 m.d.comb += self.l_in.addr.eq(addr)
58 # TODO set mask
59 return None
60
61 def set_rd_addr(self, m, addr, mask):
62 m.d.comb += self.d_in.valid.eq(1)
63 m.d.comb += self.l_in.valid.eq(1)
64 m.d.comb += self.d_in.load.eq(1)
65 m.d.comb += self.l_in.load.eq(1)
66 m.d.comb += self.d_in.addr.eq(addr)
67 m.d.comb += self.l_in.addr.eq(addr)
68 m.d.comb += self.debug1.eq(1)
69 # m.d.comb += self.debug2.eq(1)
70 return None #FIXME return value
71
72 def set_wr_data(self, m, data, wen):
73 m.d.comb += self.d_in.data.eq(data)
74 # TODO set wen
75 st_ok = Const(1, 1)
76 return st_ok
77
78 def get_rd_data(self, m):
79 ld_ok = self.d_out.valid # indicates read data is valid
80 data = self.d_out.data # actual read data
81 return data, ld_ok
82
83 """
84 if d_in.error = '1' then
85 if d_in.cache_paradox = '1' then
86 -- signal an interrupt straight away
87 exception := '1';
88 dsisr(63 - 38) := not r2.req.load;
89 -- XXX there is no architected bit for this
90 -- (probably should be a machine check in fact)
91 dsisr(63 - 35) := d_in.cache_paradox;
92 else
93 -- Look up the translation for TLB miss
94 -- and also for permission error and RC error
95 -- in case the PTE has been updated.
96 mmureq := '1';
97 v.state := MMU_LOOKUP;
98 v.stage1_en := '0';
99 end if;
100 end if;
101 """
102
103 def elaborate(self, platform):
104 m = super().elaborate(platform)
105 comb = m.d.comb
106
107 # create dcache module
108 m.submodules.dcache = dcache = self.dcache
109
110 # temp vars
111 d_out, l_out, dbus = self.d_out, self.l_out, self.dbus
112
113 with m.If(d_out.error):
114 with m.If(d_out.cache_paradox):
115 comb += self.derror.eq(1)
116 # dsisr(63 - 38) := not r2.req.load;
117 # -- XXX there is no architected bit for this
118 # -- (probably should be a machine check in fact)
119 # dsisr(63 - 35) := d_in.cache_paradox;
120 with m.Else():
121 # Look up the translation for TLB miss
122 # and also for permission error and RC error
123 # in case the PTE has been updated.
124 comb += self.mmureq.eq(1)
125 # v.state := MMU_LOOKUP;
126 # v.stage1_en := '0';
127
128 exc = self.pi.exception_o
129
130 #happened, alignment, instr_fault, invalid,
131 comb += exc.happened.eq(d_out.error | l_out.err)
132 comb += exc.invalid.eq(l_out.invalid)
133
134 #badtree, perm_error, rc_error, segment_fault
135 comb += exc.badtree.eq(l_out.badtree)
136 comb += exc.perm_error.eq(l_out.perm_error)
137 comb += exc.rc_error.eq(l_out.rc_error)
138 comb += exc.segment_fault.eq(l_out.segerr)
139
140 # TODO connect those signals somewhere
141 #print(d_out.valid) -> no error
142 #print(d_out.store_done) -> no error
143 #print(d_out.cache_paradox) -> ?
144 #print(l_out.done) -> no error
145
146 # TODO some exceptions set SPRs
147
148 # TODO, connect dcache wb_in/wb_out to "standard" nmigen Wishbone bus
149 comb += dbus.adr.eq(dcache.wb_out.adr)
150 comb += dbus.dat_w.eq(dcache.wb_out.dat)
151 comb += dbus.sel.eq(dcache.wb_out.sel)
152 comb += dbus.cyc.eq(dcache.wb_out.cyc)
153 comb += dbus.stb.eq(dcache.wb_out.stb)
154 comb += dbus.we.eq(dcache.wb_out.we)
155
156 comb += dcache.wb_in.dat.eq(dbus.dat_r)
157 comb += dcache.wb_in.ack.eq(dbus.ack)
158 if hasattr(dbus, "stall"):
159 comb += dcache.wb_in.stall.eq(dbus.stall)
160
161 return m
162
163 def ports(self):
164 yield from super().ports()
165 # TODO: memory ports
166
167
168 class TestSRAMLoadStore1(LoadStore1):
169 def __init__(self, pspec):
170 super().__init__(pspec)
171 pspec = self.pspec
172 # small 32-entry Memory
173 if (hasattr(pspec, "dmem_test_depth") and
174 isinstance(pspec.dmem_test_depth, int)):
175 depth = pspec.dmem_test_depth
176 else:
177 depth = 32
178 print("TestSRAMBareLoadStoreUnit depth", depth)
179
180 self.mem = Memory(width=pspec.reg_wid, depth=depth)
181
182 def elaborate(self, platform):
183 m = super().elaborate(platform)
184 comb = m.d.comb
185 m.submodules.sram = sram = SRAM(memory=self.mem, granularity=8,
186 features={'cti', 'bte', 'err'})
187 dbus = self.dbus
188
189 # directly connect the wishbone bus of LoadStoreUnitInterface to SRAM
190 # note: SRAM is a target (slave), dbus is initiator (master)
191 fanouts = ['dat_w', 'sel', 'cyc', 'stb', 'we', 'cti', 'bte']
192 fanins = ['dat_r', 'ack', 'err']
193 for fanout in fanouts:
194 print("fanout", fanout, getattr(sram.bus, fanout).shape(),
195 getattr(dbus, fanout).shape())
196 comb += getattr(sram.bus, fanout).eq(getattr(dbus, fanout))
197 comb += getattr(sram.bus, fanout).eq(getattr(dbus, fanout))
198 for fanin in fanins:
199 comb += getattr(dbus, fanin).eq(getattr(sram.bus, fanin))
200 # connect address
201 comb += sram.bus.adr.eq(dbus.adr)
202
203 return m
204
205
206 class FSMMMUStage(ControlBase):
207 """FSM MMU
208
209 FSM-based MMU: must call set_ldst_interface and pass in an instance
210 of a LoadStore1. this to comply with the ConfigMemoryPortInterface API
211 """
212 def __init__(self, pspec):
213 super().__init__()
214 self.pspec = pspec
215
216 # set up p/n data
217 self.p.data_i = MMUInputData(pspec)
218 self.n.data_o = MMUOutputData(pspec)
219
220 # this Function Unit is extremely unusual in that it actually stores a
221 # "thing" rather than "processes inputs and produces outputs". hence
222 # why it has to be a FSM. linking up LD/ST however is going to have
223 # to be done back in Issuer (or Core)
224
225 self.mmu = MMU()
226
227 # make life a bit easier in Core XXX mustn't really do this,
228 # pspec is designed for config variables, rather than passing
229 # things around. have to think about it, design a way to do
230 # it that makes "sense"
231 # comment out for now self.pspec.mmu = self.mmu
232 # comment out for now self.pspec.dcache = self.dcache
233
234 # debugging output for gtkw
235 self.debug0 = Signal(4)
236 self.debug1 = Signal()
237 #self.debug2 = Signal(64)
238 #self.debug3 = Signal(64)
239 self.illegal = Signal()
240
241 # for SPR field number access
242 i = self.p.data_i
243 self.fields = DecodeFields(SignalBitRange, [i.ctx.op.insn])
244 self.fields.create_specs()
245
246 def set_ldst_interface(self, ldst):
247 """must be called back in Core, after FUs have been set up.
248 one of those will be the MMU (us!) but the LoadStore1 instance
249 must be set up in ConfigMemoryPortInterface. sigh.
250 """
251 # incoming PortInterface
252 self.ldst = ldst
253 self.dcache = self.ldst.dcache
254 self.pi = self.ldst.pi
255
256 def elaborate(self, platform):
257 assert hasattr(self, "dcache"), "remember to call set_ldst_interface"
258 m = super().elaborate(platform)
259 comb = m.d.comb
260 dcache = self.dcache
261
262 # link mmu and dcache together
263 m.submodules.mmu = mmu = self.mmu
264 ldst = self.ldst # managed externally: do not add here
265 m.d.comb += dcache.m_in.eq(mmu.d_out) # MMUToDCacheType
266 m.d.comb += mmu.d_in.eq(dcache.m_out) # DCacheToMMUType
267
268 l_in, l_out = mmu.l_in, mmu.l_out
269 d_in, d_out = dcache.d_in, dcache.d_out
270 wb_out, wb_in = dcache.wb_out, dcache.wb_in
271
272 # link ldst and MMU together
273 comb += l_in.eq(ldst.l_in)
274 comb += ldst.l_out.eq(l_out)
275
276 data_i, data_o = self.p.data_i, self.n.data_o
277 a_i, b_i, o, spr1_o = data_i.ra, data_i.rb, data_o.o, data_o.spr1
278 op = data_i.ctx.op
279 msr_i = op.msr
280
281 # TODO: link these SPRs somewhere
282 dsisr = Signal(64)
283 dar = Signal(64)
284
285 # busy/done signals
286 busy = Signal()
287 done = Signal()
288 m.d.comb += self.n.valid_o.eq(busy & done)
289 m.d.comb += self.p.ready_o.eq(~busy)
290
291 # take copy of X-Form SPR field
292 x_fields = self.fields.FormXFX
293 spr = Signal(len(x_fields.SPR))
294 comb += spr.eq(decode_spr_num(x_fields.SPR))
295
296 # based on MSR bits, set priv and virt mode. TODO: 32-bit mode
297 comb += d_in.priv_mode.eq(~msr_i[MSR.PR])
298 comb += d_in.virt_mode.eq(msr_i[MSR.DR])
299 #comb += d_in.mode_32bit.eq(msr_i[MSR.SF]) # ?? err
300
301 # ok so we have to "pulse" the MMU (or dcache) rather than
302 # hold the valid hi permanently. guess what this does...
303 valid = Signal()
304 blip = Signal()
305 m.d.comb += blip.eq(rising_edge(m, valid))
306
307 with m.If(~busy):
308 with m.If(self.p.valid_i):
309 m.d.sync += busy.eq(1)
310 with m.Else():
311
312 # based on the Micro-Op, we work out which of MMU or DCache
313 # should "action" the operation. one of MMU or DCache gets
314 # enabled ("valid") and we twiddle our thumbs until it
315 # responds ("done").
316
317 # FIXME: properly implement MicrOp.OP_MTSPR and MicrOp.OP_MFSPR
318
319 with m.Switch(op.insn_type):
320 with m.Case(MicrOp.OP_MTSPR):
321 # despite redirection this FU **MUST** behave exactly
322 # like the SPR FU. this **INCLUDES** updating the SPR
323 # regfile because the CSV file entry for OP_MTSPR
324 # categorically defines and requires the expectation
325 # that the CompUnit **WILL** write to the regfile.
326 comb += spr1_o.data.eq(spr)
327 comb += spr1_o.ok.eq(1)
328 # subset SPR: first check a few bits
329 with m.If(~spr[9] & ~spr[5]):
330 comb += self.debug0.eq(3)
331 with m.If(spr[0]):
332 comb += dsisr.eq(a_i[:32])
333 with m.Else():
334 comb += dar.eq(a_i)
335 comb += done.eq(1)
336 # pass it over to the MMU instead
337 with m.Else():
338 comb += self.debug0.eq(4)
339 # blip the MMU and wait for it to complete
340 comb += valid.eq(1) # start "pulse"
341 comb += l_in.valid.eq(blip) # start
342 comb += l_in.mtspr.eq(1) # mtspr mode
343 comb += l_in.sprn.eq(spr) # which SPR
344 comb += l_in.rs.eq(a_i) # incoming operand (RS)
345 comb += done.eq(1) # FIXME l_out.done
346
347 with m.Case(MicrOp.OP_MFSPR):
348 # subset SPR: first check a few bits
349 with m.If(~spr[9] & ~spr[5]):
350 comb += self.debug0.eq(5)
351 with m.If(spr[0]):
352 comb += o.data.eq(dsisr)
353 with m.Else():
354 comb += o.data.eq(dar)
355 comb += o.ok.eq(1)
356 comb += done.eq(1)
357 # pass it over to the MMU instead
358 with m.Else():
359 comb += self.debug0.eq(6)
360 # blip the MMU and wait for it to complete
361 comb += valid.eq(1) # start "pulse"
362 comb += l_in.valid.eq(blip) # start
363 comb += l_in.mtspr.eq(0) # mfspr!=mtspr
364 comb += l_in.sprn.eq(spr) # which SPR
365 comb += l_in.rs.eq(a_i) # incoming operand (RS)
366 comb += o.data.eq(l_out.sprval) # SPR from MMU
367 comb += o.ok.eq(l_out.done) # only when l_out valid
368 comb += done.eq(1) # FIXME l_out.done
369
370 # XXX this one is going to have to go through LDSTCompUnit
371 # because it's LDST that has control over dcache
372 # (through PortInterface). or, another means is devised
373 # so as not to have double-drivers of d_in.valid and addr
374 #
375 #with m.Case(MicrOp.OP_DCBZ):
376 # # activate dcbz mode (spec: v3.0B p850)
377 # comb += valid.eq(1) # start "pulse"
378 # comb += d_in.valid.eq(blip) # start
379 # comb += d_in.dcbz.eq(1) # dcbz mode
380 # comb += d_in.addr.eq(a_i + b_i) # addr is (RA|0) + RB
381 # comb += done.eq(d_out.store_done) # TODO
382 # comb += self.debug0.eq(1)
383
384 with m.Case(MicrOp.OP_TLBIE):
385 # pass TLBIE request to MMU (spec: v3.0B p1034)
386 # note that the spr is *not* an actual spr number, it's
387 # just that those bits happen to match with field bits
388 # RIC, PRS, R
389 comb += valid.eq(1) # start "pulse"
390 comb += l_in.valid.eq(blip) # start
391 comb += l_in.tlbie.eq(1) # mtspr mode
392 comb += l_in.sprn.eq(spr) # use sprn to send insn bits
393 comb += l_in.addr.eq(b_i) # incoming operand (RB)
394 comb += done.eq(l_out.done) # zzzz
395 comb += self.debug0.eq(2)
396 with m.Case(MicrOp.OP_ILLEGAL):
397 comb += self.illegal.eq(1)
398
399 with m.If(self.n.ready_i & self.n.valid_o):
400 m.d.sync += busy.eq(0)
401
402 return m
403
404 def __iter__(self):
405 yield from self.p
406 yield from self.n
407
408 def ports(self):
409 return list(self)