upate dsisr and dar using sync
[soc.git] / src / soc / fu / mmu / fsm.py
1 from nmigen import Elaboratable, Module, Signal, Shape, unsigned, Cat, Mux
2 from nmigen import Record, Memory
3 from nmigen import Const
4 from soc.fu.mmu.pipe_data import MMUInputData, MMUOutputData, MMUPipeSpec
5 from nmutil.singlepipe import ControlBase
6 from nmutil.util import rising_edge
7
8 from soc.experiment.mmu import MMU
9 from soc.experiment.dcache import DCache
10
11 from openpower.consts import MSR
12 from openpower.decoder.power_fields import DecodeFields
13 from openpower.decoder.power_fieldsn import SignalBitRange
14 from openpower.decoder.power_decoder2 import decode_spr_num
15 from openpower.decoder.power_enums import MicrOp, XER_bits
16
17 from soc.experiment.pimem import PortInterface
18 from soc.experiment.pimem import PortInterfaceBase
19
20 from soc.experiment.mem_types import LoadStore1ToDCacheType, LoadStore1ToMMUType
21 from soc.experiment.mem_types import DCacheToLoadStore1Type, MMUToLoadStore1Type
22
23 from soc.minerva.wishbone import make_wb_layout
24 from soc.bus.sram import SRAM
25
26
27 # glue logic for microwatt mmu and dcache
28 class LoadStore1(PortInterfaceBase):
29 def __init__(self, pspec):
30 self.pspec = pspec
31 self.disable_cache = (hasattr(pspec, "disable_cache") and
32 pspec.disable_cache == True)
33 regwid = pspec.reg_wid
34 addrwid = pspec.addr_wid
35
36 super().__init__(regwid, addrwid)
37 self.dcache = DCache()
38 self.d_in = self.dcache.d_in
39 self.d_out = self.dcache.d_out
40 self.l_in = LoadStore1ToMMUType()
41 self.l_out = MMUToLoadStore1Type()
42 # TODO microwatt
43 self.mmureq = Signal()
44 self.derror = Signal()
45
46 # TODO, convert dcache wb_in/wb_out to "standard" nmigen Wishbone bus
47 self.dbus = Record(make_wb_layout(pspec))
48
49 # for creating a single clock blip to DCache
50 self.d_valid = Signal()
51 self.d_w_data = Signal(64) # XXX
52 self.d_w_valid = Signal()
53 self.d_validblip = Signal()
54
55 def set_wr_addr(self, m, addr, mask):
56 # this gets complicated: actually a FSM is needed which
57 # first checks dcache, then if that fails (in virt mode)
58 # it checks the MMU instead.
59 #m.d.comb += self.l_in.valid.eq(1)
60 #m.d.comb += self.l_in.addr.eq(addr)
61 #m.d.comb += self.l_in.load.eq(0)
62 m.d.comb += self.d_in.load.eq(0)
63 m.d.comb += self.d_in.byte_sel.eq(mask)
64 m.d.comb += self.d_in.addr.eq(addr)
65 # option to disable the cache entirely for write
66 if self.disable_cache:
67 m.d.comb += self.d_in.nc.eq(1)
68 return None
69
70 def set_rd_addr(self, m, addr, mask):
71 # this gets complicated: actually a FSM is needed which
72 # first checks dcache, then if that fails (in virt mode)
73 # it checks the MMU instead.
74 #m.d.comb += self.l_in.valid.eq(1)
75 #m.d.comb += self.l_in.load.eq(1)
76 #m.d.comb += self.l_in.addr.eq(addr)
77 m.d.comb += self.d_valid.eq(1)
78 m.d.comb += self.d_in.valid.eq(self.d_validblip)
79 m.d.comb += self.d_in.load.eq(1)
80 m.d.comb += self.d_in.byte_sel.eq(mask)
81 m.d.comb += self.d_in.addr.eq(addr)
82 # BAD HACK! disable cacheing on LD when address is 0xCxxx_xxxx
83 # this is for peripherals. same thing done in Microwatt loadstore1.vhdl
84 with m.If(addr[28:] == Const(0xc, 4)):
85 m.d.comb += self.d_in.nc.eq(1)
86 # option to disable the cache entirely for read
87 if self.disable_cache:
88 m.d.comb += self.d_in.nc.eq(1)
89 return None #FIXME return value
90
91 def set_wr_data(self, m, data, wen):
92 # do the "blip" on write data
93 m.d.comb += self.d_valid.eq(1)
94 m.d.comb += self.d_in.valid.eq(self.d_validblip)
95 # put data into comb which is picked up in main elaborate()
96 m.d.comb += self.d_w_valid.eq(1)
97 m.d.comb += self.d_w_data.eq(data)
98 #m.d.sync += self.d_in.byte_sel.eq(wen) # this might not be needed
99 st_ok = self.d_out.valid # TODO indicates write data is valid
100 #st_ok = Const(1, 1)
101 return st_ok
102
103 def get_rd_data(self, m):
104 ld_ok = self.d_out.valid # indicates read data is valid
105 data = self.d_out.data # actual read data
106 return data, ld_ok
107
108 """
109 if d_in.error = '1' then
110 if d_in.cache_paradox = '1' then
111 -- signal an interrupt straight away
112 exception := '1';
113 dsisr(63 - 38) := not r2.req.load;
114 -- XXX there is no architected bit for this
115 -- (probably should be a machine check in fact)
116 dsisr(63 - 35) := d_in.cache_paradox;
117 else
118 -- Look up the translation for TLB miss
119 -- and also for permission error and RC error
120 -- in case the PTE has been updated.
121 mmureq := '1';
122 v.state := MMU_LOOKUP;
123 v.stage1_en := '0';
124 end if;
125 end if;
126 """
127
128 def elaborate(self, platform):
129 m = super().elaborate(platform)
130 comb = m.d.comb
131
132 # create dcache module
133 m.submodules.dcache = dcache = self.dcache
134
135 # temp vars
136 d_out, l_out, dbus = self.d_out, self.l_out, self.dbus
137
138 with m.If(d_out.error):
139 with m.If(d_out.cache_paradox):
140 comb += self.derror.eq(1)
141 # dsisr(63 - 38) := not r2.req.load;
142 # -- XXX there is no architected bit for this
143 # -- (probably should be a machine check in fact)
144 # dsisr(63 - 35) := d_in.cache_paradox;
145 with m.Else():
146 # Look up the translation for TLB miss
147 # and also for permission error and RC error
148 # in case the PTE has been updated.
149 comb += self.mmureq.eq(1)
150 # v.state := MMU_LOOKUP;
151 # v.stage1_en := '0';
152
153 exc = self.pi.exc_o
154
155 #happened, alignment, instr_fault, invalid,
156 comb += exc.happened.eq(d_out.error | l_out.err)
157 comb += exc.invalid.eq(l_out.invalid)
158
159 #badtree, perm_error, rc_error, segment_fault
160 comb += exc.badtree.eq(l_out.badtree)
161 comb += exc.perm_error.eq(l_out.perm_error)
162 comb += exc.rc_error.eq(l_out.rc_error)
163 comb += exc.segment_fault.eq(l_out.segerr)
164
165 # TODO connect those signals somewhere
166 #print(d_out.valid) -> no error
167 #print(d_out.store_done) -> no error
168 #print(d_out.cache_paradox) -> ?
169 #print(l_out.done) -> no error
170
171 # TODO some exceptions set SPRs
172
173 # TODO, connect dcache wb_in/wb_out to "standard" nmigen Wishbone bus
174 comb += dbus.adr.eq(dcache.wb_out.adr)
175 comb += dbus.dat_w.eq(dcache.wb_out.dat)
176 comb += dbus.sel.eq(dcache.wb_out.sel)
177 comb += dbus.cyc.eq(dcache.wb_out.cyc)
178 comb += dbus.stb.eq(dcache.wb_out.stb)
179 comb += dbus.we.eq(dcache.wb_out.we)
180
181 comb += dcache.wb_in.dat.eq(dbus.dat_r)
182 comb += dcache.wb_in.ack.eq(dbus.ack)
183 if hasattr(dbus, "stall"):
184 comb += dcache.wb_in.stall.eq(dbus.stall)
185
186 # create a blip (single pulse) on valid read/write request
187 m.d.comb += self.d_validblip.eq(rising_edge(m, self.d_valid))
188
189 # write out d data only when flag set
190 with m.If(self.d_w_valid):
191 m.d.sync += self.d_in.data.eq(self.d_w_data)
192 with m.Else():
193 m.d.sync += self.d_in.data.eq(0)
194
195 return m
196
197 def ports(self):
198 yield from super().ports()
199 # TODO: memory ports
200
201
202 class TestSRAMLoadStore1(LoadStore1):
203 def __init__(self, pspec):
204 super().__init__(pspec)
205 pspec = self.pspec
206 # small 32-entry Memory
207 if (hasattr(pspec, "dmem_test_depth") and
208 isinstance(pspec.dmem_test_depth, int)):
209 depth = pspec.dmem_test_depth
210 else:
211 depth = 32
212 print("TestSRAMBareLoadStoreUnit depth", depth)
213
214 self.mem = Memory(width=pspec.reg_wid, depth=depth)
215
216 def elaborate(self, platform):
217 m = super().elaborate(platform)
218 comb = m.d.comb
219 m.submodules.sram = sram = SRAM(memory=self.mem, granularity=8,
220 features={'cti', 'bte', 'err'})
221 dbus = self.dbus
222
223 # directly connect the wishbone bus of LoadStoreUnitInterface to SRAM
224 # note: SRAM is a target (slave), dbus is initiator (master)
225 fanouts = ['dat_w', 'sel', 'cyc', 'stb', 'we', 'cti', 'bte']
226 fanins = ['dat_r', 'ack', 'err']
227 for fanout in fanouts:
228 print("fanout", fanout, getattr(sram.bus, fanout).shape(),
229 getattr(dbus, fanout).shape())
230 comb += getattr(sram.bus, fanout).eq(getattr(dbus, fanout))
231 comb += getattr(sram.bus, fanout).eq(getattr(dbus, fanout))
232 for fanin in fanins:
233 comb += getattr(dbus, fanin).eq(getattr(sram.bus, fanin))
234 # connect address
235 comb += sram.bus.adr.eq(dbus.adr)
236
237 return m
238
239
240 class FSMMMUStage(ControlBase):
241 """FSM MMU
242
243 FSM-based MMU: must call set_ldst_interface and pass in an instance
244 of a LoadStore1. this to comply with the ConfigMemoryPortInterface API
245 """
246 def __init__(self, pspec):
247 super().__init__()
248 self.pspec = pspec
249
250 # set up p/n data
251 self.p.data_i = MMUInputData(pspec)
252 self.n.data_o = MMUOutputData(pspec)
253
254 # this Function Unit is extremely unusual in that it actually stores a
255 # "thing" rather than "processes inputs and produces outputs". hence
256 # why it has to be a FSM. linking up LD/ST however is going to have
257 # to be done back in Issuer (or Core)
258
259 self.mmu = MMU()
260
261 # make life a bit easier in Core XXX mustn't really do this,
262 # pspec is designed for config variables, rather than passing
263 # things around. have to think about it, design a way to do
264 # it that makes "sense"
265 # comment out for now self.pspec.mmu = self.mmu
266 # comment out for now self.pspec.dcache = self.dcache
267
268 # debugging output for gtkw
269 self.debug0 = Signal(4)
270 self.illegal = Signal()
271
272 # for SPR field number access
273 i = self.p.data_i
274 self.fields = DecodeFields(SignalBitRange, [i.ctx.op.insn])
275 self.fields.create_specs()
276
277 def set_ldst_interface(self, ldst):
278 """must be called back in Core, after FUs have been set up.
279 one of those will be the MMU (us!) but the LoadStore1 instance
280 must be set up in ConfigMemoryPortInterface. sigh.
281 """
282 # incoming PortInterface
283 self.ldst = ldst
284 self.dcache = self.ldst.dcache
285 self.pi = self.ldst.pi
286
287 def elaborate(self, platform):
288 assert hasattr(self, "dcache"), "remember to call set_ldst_interface"
289 m = super().elaborate(platform)
290 comb = m.d.comb
291 dcache = self.dcache
292
293 # link mmu and dcache together
294 m.submodules.mmu = mmu = self.mmu
295 ldst = self.ldst # managed externally: do not add here
296 m.d.comb += dcache.m_in.eq(mmu.d_out) # MMUToDCacheType
297 m.d.comb += mmu.d_in.eq(dcache.m_out) # DCacheToMMUType
298
299 l_in, l_out = mmu.l_in, mmu.l_out
300 d_in, d_out = dcache.d_in, dcache.d_out
301 wb_out, wb_in = dcache.wb_out, dcache.wb_in
302
303 # link ldst and MMU together
304 comb += l_in.eq(ldst.l_in)
305 comb += ldst.l_out.eq(l_out)
306
307 data_i, data_o = self.p.data_i, self.n.data_o
308 a_i, b_i, o, spr1_o = data_i.ra, data_i.rb, data_o.o, data_o.spr1
309 op = data_i.ctx.op
310 msr_i = op.msr
311
312 # TODO: link these SPRs somewhere
313 dsisr = Signal(64)
314 dar = Signal(64)
315
316 # busy/done signals
317 busy = Signal()
318 done = Signal()
319 m.d.comb += self.n.valid_o.eq(busy & done)
320 m.d.comb += self.p.ready_o.eq(~busy)
321
322 # take copy of X-Form SPR field
323 x_fields = self.fields.FormXFX
324 spr = Signal(len(x_fields.SPR))
325 comb += spr.eq(decode_spr_num(x_fields.SPR))
326
327 # based on MSR bits, set priv and virt mode. TODO: 32-bit mode
328 comb += d_in.priv_mode.eq(~msr_i[MSR.PR])
329 comb += d_in.virt_mode.eq(msr_i[MSR.DR])
330 #comb += d_in.mode_32bit.eq(msr_i[MSR.SF]) # ?? err
331
332 # ok so we have to "pulse" the MMU (or dcache) rather than
333 # hold the valid hi permanently. guess what this does...
334 valid = Signal()
335 blip = Signal()
336 m.d.comb += blip.eq(rising_edge(m, valid))
337
338 with m.If(~busy):
339 with m.If(self.p.valid_i):
340 m.d.sync += busy.eq(1)
341 with m.Else():
342
343 # based on the Micro-Op, we work out which of MMU or DCache
344 # should "action" the operation. one of MMU or DCache gets
345 # enabled ("valid") and we twiddle our thumbs until it
346 # responds ("done").
347
348 # FIXME: properly implement MicrOp.OP_MTSPR and MicrOp.OP_MFSPR
349
350 with m.Switch(op.insn_type):
351 with m.Case(MicrOp.OP_MTSPR):
352 # despite redirection this FU **MUST** behave exactly
353 # like the SPR FU. this **INCLUDES** updating the SPR
354 # regfile because the CSV file entry for OP_MTSPR
355 # categorically defines and requires the expectation
356 # that the CompUnit **WILL** write to the regfile.
357 comb += spr1_o.data.eq(spr)
358 comb += spr1_o.ok.eq(1)
359 # subset SPR: first check a few bits
360 with m.If(~spr[9] & ~spr[5]):
361 comb += self.debug0.eq(3)
362 #if matched update local cached value
363 with m.If(spr[0]):
364 sync += dsisr.eq(a_i[:32])
365 with m.Else():
366 sync += dar.eq(a_i)
367 comb += done.eq(1)
368 # pass it over to the MMU instead
369 with m.Else():
370 comb += self.debug0.eq(4)
371 # blip the MMU and wait for it to complete
372 comb += valid.eq(1) # start "pulse"
373 comb += l_in.valid.eq(blip) # start
374 comb += l_in.mtspr.eq(1) # mtspr mode
375 comb += l_in.sprn.eq(spr) # which SPR
376 comb += l_in.rs.eq(a_i) # incoming operand (RS)
377 comb += done.eq(1) # FIXME l_out.done
378
379 with m.Case(MicrOp.OP_MFSPR):
380 # subset SPR: first check a few bits
381 with m.If(~spr[9] & ~spr[5]):
382 comb += self.debug0.eq(5)
383 with m.If(spr[0]):
384 comb += o.data.eq(dsisr)
385 with m.Else():
386 comb += o.data.eq(dar)
387 comb += o.ok.eq(1)
388 comb += done.eq(1)
389 # pass it over to the MMU instead
390 with m.Else():
391 comb += self.debug0.eq(6)
392 # blip the MMU and wait for it to complete
393 comb += valid.eq(1) # start "pulse"
394 comb += l_in.valid.eq(blip) # start
395 comb += l_in.mtspr.eq(0) # mfspr!=mtspr
396 comb += l_in.sprn.eq(spr) # which SPR
397 comb += l_in.rs.eq(a_i) # incoming operand (RS)
398 comb += o.data.eq(l_out.sprval) # SPR from MMU
399 comb += o.ok.eq(l_out.done) # only when l_out valid
400 comb += done.eq(1) # FIXME l_out.done
401
402 # XXX this one is going to have to go through LDSTCompUnit
403 # because it's LDST that has control over dcache
404 # (through PortInterface). or, another means is devised
405 # so as not to have double-drivers of d_in.valid and addr
406 #
407 #with m.Case(MicrOp.OP_DCBZ):
408 # # activate dcbz mode (spec: v3.0B p850)
409 # comb += valid.eq(1) # start "pulse"
410 # comb += d_in.valid.eq(blip) # start
411 # comb += d_in.dcbz.eq(1) # dcbz mode
412 # comb += d_in.addr.eq(a_i + b_i) # addr is (RA|0) + RB
413 # comb += done.eq(d_out.store_done) # TODO
414 # comb += self.debug0.eq(1)
415
416 with m.Case(MicrOp.OP_TLBIE):
417 # pass TLBIE request to MMU (spec: v3.0B p1034)
418 # note that the spr is *not* an actual spr number, it's
419 # just that those bits happen to match with field bits
420 # RIC, PRS, R
421 comb += valid.eq(1) # start "pulse"
422 comb += l_in.valid.eq(blip) # start
423 comb += l_in.tlbie.eq(1) # mtspr mode
424 comb += l_in.sprn.eq(spr) # use sprn to send insn bits
425 comb += l_in.addr.eq(b_i) # incoming operand (RB)
426 comb += done.eq(l_out.done) # zzzz
427 comb += self.debug0.eq(2)
428
429 with m.Case(MicrOp.OP_ILLEGAL):
430 comb += self.illegal.eq(1)
431
432 with m.If(self.n.ready_i & self.n.valid_o):
433 m.d.sync += busy.eq(0)
434
435 return m
436
437 def __iter__(self):
438 yield from self.p
439 yield from self.n
440
441 def ports(self):
442 return list(self)