implement MFSPR the same way as fu/spr/main_stage.py
[soc.git] / src / soc / fu / mmu / fsm.py
1 from nmigen import Elaboratable, Module, Signal, Shape, unsigned, Cat, Mux
2 from nmigen import Record, Memory
3 from nmigen import Const
4 from soc.fu.mmu.pipe_data import MMUInputData, MMUOutputData, MMUPipeSpec
5 from nmutil.singlepipe import ControlBase
6 from nmutil.util import rising_edge
7
8 from soc.experiment.mmu import MMU
9 from soc.experiment.dcache import DCache
10
11 from openpower.consts import MSR
12 from openpower.decoder.power_fields import DecodeFields
13 from openpower.decoder.power_fieldsn import SignalBitRange
14 from openpower.decoder.power_decoder2 import decode_spr_num
15 from openpower.decoder.power_enums import MicrOp, XER_bits
16
17 from soc.experiment.pimem import PortInterface
18 from soc.experiment.pimem import PortInterfaceBase
19
20 from soc.experiment.mem_types import LoadStore1ToDCacheType, LoadStore1ToMMUType
21 from soc.experiment.mem_types import DCacheToLoadStore1Type, MMUToLoadStore1Type
22
23 from soc.minerva.wishbone import make_wb_layout
24 from soc.bus.sram import SRAM
25
26
27 # glue logic for microwatt mmu and dcache
28 class LoadStore1(PortInterfaceBase):
29 def __init__(self, pspec):
30 self.pspec = pspec
31 self.disable_cache = (hasattr(pspec, "disable_cache") and
32 pspec.disable_cache == True)
33 regwid = pspec.reg_wid
34 addrwid = pspec.addr_wid
35
36 super().__init__(regwid, addrwid)
37 self.dcache = DCache()
38 self.d_in = self.dcache.d_in
39 self.d_out = self.dcache.d_out
40 self.l_in = LoadStore1ToMMUType()
41 self.l_out = MMUToLoadStore1Type()
42 # TODO microwatt
43 self.mmureq = Signal()
44 self.derror = Signal()
45
46 # TODO, convert dcache wb_in/wb_out to "standard" nmigen Wishbone bus
47 self.dbus = Record(make_wb_layout(pspec))
48
49 # for creating a single clock blip to DCache
50 self.d_valid = Signal()
51 self.d_w_data = Signal(64) # XXX
52 self.d_w_valid = Signal()
53 self.d_validblip = Signal()
54
55 def set_wr_addr(self, m, addr, mask):
56 # this gets complicated: actually a FSM is needed which
57 # first checks dcache, then if that fails (in virt mode)
58 # it checks the MMU instead.
59 #m.d.comb += self.l_in.valid.eq(1)
60 #m.d.comb += self.l_in.addr.eq(addr)
61 #m.d.comb += self.l_in.load.eq(0)
62 m.d.comb += self.d_in.load.eq(0)
63 m.d.comb += self.d_in.byte_sel.eq(mask)
64 m.d.comb += self.d_in.addr.eq(addr)
65 # option to disable the cache entirely for write
66 if self.disable_cache:
67 m.d.comb += self.d_in.nc.eq(1)
68 return None
69
70 def set_rd_addr(self, m, addr, mask):
71 # this gets complicated: actually a FSM is needed which
72 # first checks dcache, then if that fails (in virt mode)
73 # it checks the MMU instead.
74 #m.d.comb += self.l_in.valid.eq(1)
75 #m.d.comb += self.l_in.load.eq(1)
76 #m.d.comb += self.l_in.addr.eq(addr)
77 m.d.comb += self.d_valid.eq(1)
78 m.d.comb += self.d_in.valid.eq(self.d_validblip)
79 m.d.comb += self.d_in.load.eq(1)
80 m.d.comb += self.d_in.byte_sel.eq(mask)
81 m.d.comb += self.d_in.addr.eq(addr)
82 # BAD HACK! disable cacheing on LD when address is 0xCxxx_xxxx
83 # this is for peripherals. same thing done in Microwatt loadstore1.vhdl
84 with m.If(addr[28:] == Const(0xc, 4)):
85 m.d.comb += self.d_in.nc.eq(1)
86 # option to disable the cache entirely for read
87 if self.disable_cache:
88 m.d.comb += self.d_in.nc.eq(1)
89 return None #FIXME return value
90
91 def set_wr_data(self, m, data, wen):
92 # do the "blip" on write data
93 m.d.comb += self.d_valid.eq(1)
94 m.d.comb += self.d_in.valid.eq(self.d_validblip)
95 # put data into comb which is picked up in main elaborate()
96 m.d.comb += self.d_w_valid.eq(1)
97 m.d.comb += self.d_w_data.eq(data)
98 #m.d.sync += self.d_in.byte_sel.eq(wen) # this might not be needed
99 st_ok = self.d_out.valid # TODO indicates write data is valid
100 #st_ok = Const(1, 1)
101 return st_ok
102
103 def get_rd_data(self, m):
104 ld_ok = self.d_out.valid # indicates read data is valid
105 data = self.d_out.data # actual read data
106 return data, ld_ok
107
108 """
109 if d_in.error = '1' then
110 if d_in.cache_paradox = '1' then
111 -- signal an interrupt straight away
112 exception := '1';
113 dsisr(63 - 38) := not r2.req.load;
114 -- XXX there is no architected bit for this
115 -- (probably should be a machine check in fact)
116 dsisr(63 - 35) := d_in.cache_paradox;
117 else
118 -- Look up the translation for TLB miss
119 -- and also for permission error and RC error
120 -- in case the PTE has been updated.
121 mmureq := '1';
122 v.state := MMU_LOOKUP;
123 v.stage1_en := '0';
124 end if;
125 end if;
126 """
127
128 def elaborate(self, platform):
129 m = super().elaborate(platform)
130 comb = m.d.comb
131
132 # create dcache module
133 m.submodules.dcache = dcache = self.dcache
134
135 # temp vars
136 d_out, l_out, dbus = self.d_out, self.l_out, self.dbus
137
138 with m.If(d_out.error):
139 with m.If(d_out.cache_paradox):
140 comb += self.derror.eq(1)
141 # dsisr(63 - 38) := not r2.req.load;
142 # -- XXX there is no architected bit for this
143 # -- (probably should be a machine check in fact)
144 # dsisr(63 - 35) := d_in.cache_paradox;
145 with m.Else():
146 # Look up the translation for TLB miss
147 # and also for permission error and RC error
148 # in case the PTE has been updated.
149 comb += self.mmureq.eq(1)
150 # v.state := MMU_LOOKUP;
151 # v.stage1_en := '0';
152
153 exc = self.pi.exc_o
154
155 #happened, alignment, instr_fault, invalid,
156 comb += exc.happened.eq(d_out.error | l_out.err)
157 comb += exc.invalid.eq(l_out.invalid)
158
159 #badtree, perm_error, rc_error, segment_fault
160 comb += exc.badtree.eq(l_out.badtree)
161 comb += exc.perm_error.eq(l_out.perm_error)
162 comb += exc.rc_error.eq(l_out.rc_error)
163 comb += exc.segment_fault.eq(l_out.segerr)
164
165 # TODO connect those signals somewhere
166 #print(d_out.valid) -> no error
167 #print(d_out.store_done) -> no error
168 #print(d_out.cache_paradox) -> ?
169 #print(l_out.done) -> no error
170
171 # TODO some exceptions set SPRs
172
173 # TODO, connect dcache wb_in/wb_out to "standard" nmigen Wishbone bus
174 comb += dbus.adr.eq(dcache.wb_out.adr)
175 comb += dbus.dat_w.eq(dcache.wb_out.dat)
176 comb += dbus.sel.eq(dcache.wb_out.sel)
177 comb += dbus.cyc.eq(dcache.wb_out.cyc)
178 comb += dbus.stb.eq(dcache.wb_out.stb)
179 comb += dbus.we.eq(dcache.wb_out.we)
180
181 comb += dcache.wb_in.dat.eq(dbus.dat_r)
182 comb += dcache.wb_in.ack.eq(dbus.ack)
183 if hasattr(dbus, "stall"):
184 comb += dcache.wb_in.stall.eq(dbus.stall)
185
186 # create a blip (single pulse) on valid read/write request
187 m.d.comb += self.d_validblip.eq(rising_edge(m, self.d_valid))
188
189 # write out d data only when flag set
190 with m.If(self.d_w_valid):
191 m.d.sync += self.d_in.data.eq(self.d_w_data)
192 with m.Else():
193 m.d.sync += self.d_in.data.eq(0)
194
195 return m
196
197 def ports(self):
198 yield from super().ports()
199 # TODO: memory ports
200
201
202 class TestSRAMLoadStore1(LoadStore1):
203 def __init__(self, pspec):
204 super().__init__(pspec)
205 pspec = self.pspec
206 # small 32-entry Memory
207 if (hasattr(pspec, "dmem_test_depth") and
208 isinstance(pspec.dmem_test_depth, int)):
209 depth = pspec.dmem_test_depth
210 else:
211 depth = 32
212 print("TestSRAMBareLoadStoreUnit depth", depth)
213
214 self.mem = Memory(width=pspec.reg_wid, depth=depth)
215
216 def elaborate(self, platform):
217 m = super().elaborate(platform)
218 comb = m.d.comb
219 m.submodules.sram = sram = SRAM(memory=self.mem, granularity=8,
220 features={'cti', 'bte', 'err'})
221 dbus = self.dbus
222
223 # directly connect the wishbone bus of LoadStoreUnitInterface to SRAM
224 # note: SRAM is a target (slave), dbus is initiator (master)
225 fanouts = ['dat_w', 'sel', 'cyc', 'stb', 'we', 'cti', 'bte']
226 fanins = ['dat_r', 'ack', 'err']
227 for fanout in fanouts:
228 print("fanout", fanout, getattr(sram.bus, fanout).shape(),
229 getattr(dbus, fanout).shape())
230 comb += getattr(sram.bus, fanout).eq(getattr(dbus, fanout))
231 comb += getattr(sram.bus, fanout).eq(getattr(dbus, fanout))
232 for fanin in fanins:
233 comb += getattr(dbus, fanin).eq(getattr(sram.bus, fanin))
234 # connect address
235 comb += sram.bus.adr.eq(dbus.adr)
236
237 return m
238
239
240 class FSMMMUStage(ControlBase):
241 """FSM MMU
242
243 FSM-based MMU: must call set_ldst_interface and pass in an instance
244 of a LoadStore1. this to comply with the ConfigMemoryPortInterface API
245 """
246 def __init__(self, pspec):
247 super().__init__()
248 self.pspec = pspec
249
250 # set up p/n data
251 self.p.data_i = MMUInputData(pspec)
252 self.n.data_o = MMUOutputData(pspec)
253
254 # this Function Unit is extremely unusual in that it actually stores a
255 # "thing" rather than "processes inputs and produces outputs". hence
256 # why it has to be a FSM. linking up LD/ST however is going to have
257 # to be done back in Issuer (or Core)
258
259 self.mmu = MMU()
260
261 # make life a bit easier in Core XXX mustn't really do this,
262 # pspec is designed for config variables, rather than passing
263 # things around. have to think about it, design a way to do
264 # it that makes "sense"
265 # comment out for now self.pspec.mmu = self.mmu
266 # comment out for now self.pspec.dcache = self.dcache
267
268 # debugging output for gtkw
269 self.debug0 = Signal(4)
270 self.illegal = Signal()
271
272 # for SPR field number access
273 i = self.p.data_i
274 self.fields = DecodeFields(SignalBitRange, [i.ctx.op.insn])
275 self.fields.create_specs()
276
277 def set_ldst_interface(self, ldst):
278 """must be called back in Core, after FUs have been set up.
279 one of those will be the MMU (us!) but the LoadStore1 instance
280 must be set up in ConfigMemoryPortInterface. sigh.
281 """
282 # incoming PortInterface
283 self.ldst = ldst
284 self.dcache = self.ldst.dcache
285 self.pi = self.ldst.pi
286
287 def elaborate(self, platform):
288 assert hasattr(self, "dcache"), "remember to call set_ldst_interface"
289 m = super().elaborate(platform)
290 comb = m.d.comb
291 dcache = self.dcache
292
293 # link mmu and dcache together
294 m.submodules.mmu = mmu = self.mmu
295 ldst = self.ldst # managed externally: do not add here
296 m.d.comb += dcache.m_in.eq(mmu.d_out) # MMUToDCacheType
297 m.d.comb += mmu.d_in.eq(dcache.m_out) # DCacheToMMUType
298
299 l_in, l_out = mmu.l_in, mmu.l_out
300 d_in, d_out = dcache.d_in, dcache.d_out
301 wb_out, wb_in = dcache.wb_out, dcache.wb_in
302
303 # link ldst and MMU together
304 comb += l_in.eq(ldst.l_in)
305 comb += ldst.l_out.eq(l_out)
306
307 data_i, data_o = self.p.data_i, self.n.data_o
308 a_i, b_i, o, spr1_o = data_i.ra, data_i.rb, data_o.o, data_o.spr1
309 op = data_i.ctx.op
310 msr_i = op.msr
311 spr1_i = data_i.spr1
312
313 # TODO: link these SPRs somewhere
314 dsisr = Signal(64)
315 dar = Signal(64)
316
317 # busy/done signals
318 busy = Signal()
319 done = Signal()
320 m.d.comb += self.n.valid_o.eq(busy & done)
321 m.d.comb += self.p.ready_o.eq(~busy)
322
323 # take copy of X-Form SPR field
324 x_fields = self.fields.FormXFX
325 spr = Signal(len(x_fields.SPR))
326 comb += spr.eq(decode_spr_num(x_fields.SPR))
327
328 # based on MSR bits, set priv and virt mode. TODO: 32-bit mode
329 comb += d_in.priv_mode.eq(~msr_i[MSR.PR])
330 comb += d_in.virt_mode.eq(msr_i[MSR.DR])
331 #comb += d_in.mode_32bit.eq(msr_i[MSR.SF]) # ?? err
332
333 # ok so we have to "pulse" the MMU (or dcache) rather than
334 # hold the valid hi permanently. guess what this does...
335 valid = Signal()
336 blip = Signal()
337 m.d.comb += blip.eq(rising_edge(m, valid))
338
339 with m.If(~busy):
340 with m.If(self.p.valid_i):
341 m.d.sync += busy.eq(1)
342 with m.Else():
343
344 # based on the Micro-Op, we work out which of MMU or DCache
345 # should "action" the operation. one of MMU or DCache gets
346 # enabled ("valid") and we twiddle our thumbs until it
347 # responds ("done").
348
349 # FIXME: properly implement MicrOp.OP_MTSPR and MicrOp.OP_MFSPR
350
351 with m.Switch(op.insn_type):
352 with m.Case(MicrOp.OP_MTSPR):
353 # despite redirection this FU **MUST** behave exactly
354 # like the SPR FU. this **INCLUDES** updating the SPR
355 # regfile because the CSV file entry for OP_MTSPR
356 # categorically defines and requires the expectation
357 # that the CompUnit **WILL** write to the regfile.
358 comb += spr1_o.data.eq(spr)
359 comb += spr1_o.ok.eq(1)
360 # subset SPR: first check a few bits
361 with m.If(~spr[9] & ~spr[5]):
362 comb += self.debug0.eq(3)
363 #if matched update local cached value
364 with m.If(spr[0]):
365 sync += dsisr.eq(a_i[:32])
366 with m.Else():
367 sync += dar.eq(a_i)
368 comb += done.eq(1)
369 # pass it over to the MMU instead
370 with m.Else():
371 comb += self.debug0.eq(4)
372 # blip the MMU and wait for it to complete
373 comb += valid.eq(1) # start "pulse"
374 comb += l_in.valid.eq(blip) # start
375 comb += l_in.mtspr.eq(1) # mtspr mode
376 comb += l_in.sprn.eq(spr) # which SPR
377 comb += l_in.rs.eq(a_i) # incoming operand (RS)
378 comb += done.eq(1) # FIXME l_out.done
379
380 with m.Case(MicrOp.OP_MFSPR):
381 # subset SPR: first check a few bits
382 #with m.If(~spr[9] & ~spr[5]):
383 # comb += self.debug0.eq(5)
384 #with m.If(spr[0]):
385 # comb += o.data.eq(dsisr)
386 #with m.Else():
387 # comb += o.data.eq(dar)
388 #do NOT return cached values
389 comb += o.data.eq(spr1_i)
390 comb += o.ok.eq(1)
391 comb += done.eq(1)
392 # pass it over to the MMU instead
393 #with m.Else():
394 # comb += self.debug0.eq(6)
395 # # blip the MMU and wait for it to complete
396 # comb += valid.eq(1) # start "pulse"
397 # comb += l_in.valid.eq(blip) # start
398 # comb += l_in.mtspr.eq(0) # mfspr!=mtspr
399 # comb += l_in.sprn.eq(spr) # which SPR
400 # comb += l_in.rs.eq(a_i) # incoming operand (RS)
401 # comb += o.data.eq(l_out.sprval) # SPR from MMU
402 # comb += o.ok.eq(l_out.done) # only when l_out valid
403 # comb += done.eq(1) # FIXME l_out.done
404
405 # XXX this one is going to have to go through LDSTCompUnit
406 # because it's LDST that has control over dcache
407 # (through PortInterface). or, another means is devised
408 # so as not to have double-drivers of d_in.valid and addr
409 #
410 #with m.Case(MicrOp.OP_DCBZ):
411 # # activate dcbz mode (spec: v3.0B p850)
412 # comb += valid.eq(1) # start "pulse"
413 # comb += d_in.valid.eq(blip) # start
414 # comb += d_in.dcbz.eq(1) # dcbz mode
415 # comb += d_in.addr.eq(a_i + b_i) # addr is (RA|0) + RB
416 # comb += done.eq(d_out.store_done) # TODO
417 # comb += self.debug0.eq(1)
418
419 with m.Case(MicrOp.OP_TLBIE):
420 # pass TLBIE request to MMU (spec: v3.0B p1034)
421 # note that the spr is *not* an actual spr number, it's
422 # just that those bits happen to match with field bits
423 # RIC, PRS, R
424 comb += valid.eq(1) # start "pulse"
425 comb += l_in.valid.eq(blip) # start
426 comb += l_in.tlbie.eq(1) # mtspr mode
427 comb += l_in.sprn.eq(spr) # use sprn to send insn bits
428 comb += l_in.addr.eq(b_i) # incoming operand (RB)
429 comb += done.eq(l_out.done) # zzzz
430 comb += self.debug0.eq(2)
431
432 with m.Case(MicrOp.OP_ILLEGAL):
433 comb += self.illegal.eq(1)
434
435 with m.If(self.n.ready_i & self.n.valid_o):
436 m.d.sync += busy.eq(0)
437
438 return m
439
440 def __iter__(self):
441 yield from self.p
442 yield from self.n
443
444 def ports(self):
445 return list(self)