1 from nmigen
import Elaboratable
, Module
, Signal
, Shape
, unsigned
, Cat
, Mux
2 from nmigen
import Record
, Memory
3 from nmigen
import Const
4 from soc
.fu
.mmu
.pipe_data
import MMUInputData
, MMUOutputData
, MMUPipeSpec
5 from nmutil
.singlepipe
import ControlBase
6 from nmutil
.util
import rising_edge
8 from soc
.experiment
.mmu
import MMU
9 from soc
.experiment
.dcache
import DCache
11 from openpower
.consts
import MSR
12 from openpower
.decoder
.power_fields
import DecodeFields
13 from openpower
.decoder
.power_fieldsn
import SignalBitRange
14 from openpower
.decoder
.power_decoder2
import decode_spr_num
15 from openpower
.decoder
.power_enums
import MicrOp
, XER_bits
17 from soc
.experiment
.pimem
import PortInterface
18 from soc
.experiment
.pimem
import PortInterfaceBase
20 from soc
.experiment
.mem_types
import LoadStore1ToDCacheType
, LoadStore1ToMMUType
21 from soc
.experiment
.mem_types
import DCacheToLoadStore1Type
, MMUToLoadStore1Type
23 from soc
.minerva
.wishbone
import make_wb_layout
24 from soc
.bus
.sram
import SRAM
27 # glue logic for microwatt mmu and dcache
28 class LoadStore1(PortInterfaceBase
):
29 def __init__(self
, pspec
):
31 self
.disable_cache
= (hasattr(pspec
, "disable_cache") and
32 pspec
.disable_cache
== True)
33 regwid
= pspec
.reg_wid
34 addrwid
= pspec
.addr_wid
36 super().__init
__(regwid
, addrwid
)
37 self
.dcache
= DCache()
38 self
.d_in
= self
.dcache
.d_in
39 self
.d_out
= self
.dcache
.d_out
40 self
.l_in
= LoadStore1ToMMUType()
41 self
.l_out
= MMUToLoadStore1Type()
43 self
.mmureq
= Signal()
44 self
.derror
= Signal()
46 # TODO, convert dcache wb_in/wb_out to "standard" nmigen Wishbone bus
47 self
.dbus
= Record(make_wb_layout(pspec
))
49 # for creating a single clock blip to DCache
50 self
.d_valid
= Signal()
51 self
.d_w_data
= Signal(64) # XXX
52 self
.d_w_valid
= Signal()
53 self
.d_validblip
= Signal()
55 # DSISR and DAR cached values. note that the MMU FSM is where
56 # these are accessed by OP_MTSPR/OP_MFSPR, on behalf of LoadStore1.
57 # by contrast microwatt has the spr set/get done *in* loadstore1.vhdl
58 self
.dsisr
= Signal(64)
61 def set_wr_addr(self
, m
, addr
, mask
):
62 # this gets complicated: actually a FSM is needed which
63 # first checks dcache, then if that fails (in virt mode)
64 # it checks the MMU instead.
65 #m.d.comb += self.l_in.valid.eq(1)
66 #m.d.comb += self.l_in.addr.eq(addr)
67 #m.d.comb += self.l_in.load.eq(0)
68 m
.d
.comb
+= self
.d_in
.load
.eq(0)
69 m
.d
.comb
+= self
.d_in
.byte_sel
.eq(mask
)
70 m
.d
.comb
+= self
.d_in
.addr
.eq(addr
)
71 # option to disable the cache entirely for write
72 if self
.disable_cache
:
73 m
.d
.comb
+= self
.d_in
.nc
.eq(1)
76 def set_rd_addr(self
, m
, addr
, mask
):
77 # this gets complicated: actually a FSM is needed which
78 # first checks dcache, then if that fails (in virt mode)
79 # it checks the MMU instead.
80 #m.d.comb += self.l_in.valid.eq(1)
81 #m.d.comb += self.l_in.load.eq(1)
82 #m.d.comb += self.l_in.addr.eq(addr)
83 m
.d
.comb
+= self
.d_valid
.eq(1)
84 m
.d
.comb
+= self
.d_in
.valid
.eq(self
.d_validblip
)
85 m
.d
.comb
+= self
.d_in
.load
.eq(1)
86 m
.d
.comb
+= self
.d_in
.byte_sel
.eq(mask
)
87 m
.d
.comb
+= self
.d_in
.addr
.eq(addr
)
88 # BAD HACK! disable cacheing on LD when address is 0xCxxx_xxxx
89 # this is for peripherals. same thing done in Microwatt loadstore1.vhdl
90 with m
.If(addr
[28:] == Const(0xc, 4)):
91 m
.d
.comb
+= self
.d_in
.nc
.eq(1)
92 # option to disable the cache entirely for read
93 if self
.disable_cache
:
94 m
.d
.comb
+= self
.d_in
.nc
.eq(1)
95 return None #FIXME return value
97 def set_wr_data(self
, m
, data
, wen
):
98 # do the "blip" on write data
99 m
.d
.comb
+= self
.d_valid
.eq(1)
100 m
.d
.comb
+= self
.d_in
.valid
.eq(self
.d_validblip
)
101 # put data into comb which is picked up in main elaborate()
102 m
.d
.comb
+= self
.d_w_valid
.eq(1)
103 m
.d
.comb
+= self
.d_w_data
.eq(data
)
104 #m.d.sync += self.d_in.byte_sel.eq(wen) # this might not be needed
105 st_ok
= self
.d_out
.valid
# TODO indicates write data is valid
109 def get_rd_data(self
, m
):
110 ld_ok
= self
.d_out
.valid
# indicates read data is valid
111 data
= self
.d_out
.data
# actual read data
115 if d_in.error = '1' then
116 if d_in.cache_paradox = '1' then
117 -- signal an interrupt straight away
119 dsisr(63 - 38) := not r2.req.load;
120 -- XXX there is no architected bit for this
121 -- (probably should be a machine check in fact)
122 dsisr(63 - 35) := d_in.cache_paradox;
124 -- Look up the translation for TLB miss
125 -- and also for permission error and RC error
126 -- in case the PTE has been updated.
128 v.state := MMU_LOOKUP;
134 def elaborate(self
, platform
):
135 m
= super().elaborate(platform
)
138 # create dcache module
139 m
.submodules
.dcache
= dcache
= self
.dcache
142 d_out
, l_out
, dbus
= self
.d_out
, self
.l_out
, self
.dbus
144 with m
.If(d_out
.error
):
145 with m
.If(d_out
.cache_paradox
):
146 comb
+= self
.derror
.eq(1)
147 # dsisr(63 - 38) := not r2.req.load;
148 # -- XXX there is no architected bit for this
149 # -- (probably should be a machine check in fact)
150 # dsisr(63 - 35) := d_in.cache_paradox;
152 # Look up the translation for TLB miss
153 # and also for permission error and RC error
154 # in case the PTE has been updated.
155 comb
+= self
.mmureq
.eq(1)
156 # v.state := MMU_LOOKUP;
157 # v.stage1_en := '0';
161 #happened, alignment, instr_fault, invalid,
162 comb
+= exc
.happened
.eq(d_out
.error | l_out
.err
)
163 comb
+= exc
.invalid
.eq(l_out
.invalid
)
165 #badtree, perm_error, rc_error, segment_fault
166 comb
+= exc
.badtree
.eq(l_out
.badtree
)
167 comb
+= exc
.perm_error
.eq(l_out
.perm_error
)
168 comb
+= exc
.rc_error
.eq(l_out
.rc_error
)
169 comb
+= exc
.segment_fault
.eq(l_out
.segerr
)
171 # TODO connect those signals somewhere
172 #print(d_out.valid) -> no error
173 #print(d_out.store_done) -> no error
174 #print(d_out.cache_paradox) -> ?
175 #print(l_out.done) -> no error
177 # TODO some exceptions set SPRs
179 # TODO, connect dcache wb_in/wb_out to "standard" nmigen Wishbone bus
180 comb
+= dbus
.adr
.eq(dcache
.wb_out
.adr
)
181 comb
+= dbus
.dat_w
.eq(dcache
.wb_out
.dat
)
182 comb
+= dbus
.sel
.eq(dcache
.wb_out
.sel
)
183 comb
+= dbus
.cyc
.eq(dcache
.wb_out
.cyc
)
184 comb
+= dbus
.stb
.eq(dcache
.wb_out
.stb
)
185 comb
+= dbus
.we
.eq(dcache
.wb_out
.we
)
187 comb
+= dcache
.wb_in
.dat
.eq(dbus
.dat_r
)
188 comb
+= dcache
.wb_in
.ack
.eq(dbus
.ack
)
189 if hasattr(dbus
, "stall"):
190 comb
+= dcache
.wb_in
.stall
.eq(dbus
.stall
)
192 # create a blip (single pulse) on valid read/write request
193 m
.d
.comb
+= self
.d_validblip
.eq(rising_edge(m
, self
.d_valid
))
195 # write out d data only when flag set
196 with m
.If(self
.d_w_valid
):
197 m
.d
.sync
+= self
.d_in
.data
.eq(self
.d_w_data
)
199 m
.d
.sync
+= self
.d_in
.data
.eq(0)
204 yield from super().ports()
208 class TestSRAMLoadStore1(LoadStore1
):
209 def __init__(self
, pspec
):
210 super().__init
__(pspec
)
212 # small 32-entry Memory
213 if (hasattr(pspec
, "dmem_test_depth") and
214 isinstance(pspec
.dmem_test_depth
, int)):
215 depth
= pspec
.dmem_test_depth
218 print("TestSRAMBareLoadStoreUnit depth", depth
)
220 self
.mem
= Memory(width
=pspec
.reg_wid
, depth
=depth
)
222 def elaborate(self
, platform
):
223 m
= super().elaborate(platform
)
225 m
.submodules
.sram
= sram
= SRAM(memory
=self
.mem
, granularity
=8,
226 features
={'cti', 'bte', 'err'})
229 # directly connect the wishbone bus of LoadStoreUnitInterface to SRAM
230 # note: SRAM is a target (slave), dbus is initiator (master)
231 fanouts
= ['dat_w', 'sel', 'cyc', 'stb', 'we', 'cti', 'bte']
232 fanins
= ['dat_r', 'ack', 'err']
233 for fanout
in fanouts
:
234 print("fanout", fanout
, getattr(sram
.bus
, fanout
).shape(),
235 getattr(dbus
, fanout
).shape())
236 comb
+= getattr(sram
.bus
, fanout
).eq(getattr(dbus
, fanout
))
237 comb
+= getattr(sram
.bus
, fanout
).eq(getattr(dbus
, fanout
))
239 comb
+= getattr(dbus
, fanin
).eq(getattr(sram
.bus
, fanin
))
241 comb
+= sram
.bus
.adr
.eq(dbus
.adr
)
246 class FSMMMUStage(ControlBase
):
249 FSM-based MMU: must call set_ldst_interface and pass in an instance
250 of a LoadStore1. this to comply with the ConfigMemoryPortInterface API
252 def __init__(self
, pspec
):
257 self
.p
.data_i
= MMUInputData(pspec
)
258 self
.n
.data_o
= MMUOutputData(pspec
)
260 # this Function Unit is extremely unusual in that it actually stores a
261 # "thing" rather than "processes inputs and produces outputs". hence
262 # why it has to be a FSM. linking up LD/ST however is going to have
263 # to be done back in Issuer (or Core)
267 # make life a bit easier in Core XXX mustn't really do this,
268 # pspec is designed for config variables, rather than passing
269 # things around. have to think about it, design a way to do
270 # it that makes "sense"
271 # comment out for now self.pspec.mmu = self.mmu
272 # comment out for now self.pspec.dcache = self.dcache
274 # debugging output for gtkw
275 self
.debug0
= Signal(4)
276 self
.illegal
= Signal()
278 # for SPR field number access
280 self
.fields
= DecodeFields(SignalBitRange
, [i
.ctx
.op
.insn
])
281 self
.fields
.create_specs()
283 def set_ldst_interface(self
, ldst
):
284 """must be called back in Core, after FUs have been set up.
285 one of those will be the MMU (us!) but the LoadStore1 instance
286 must be set up in ConfigMemoryPortInterface. sigh.
288 # incoming PortInterface
290 self
.dcache
= self
.ldst
.dcache
291 self
.pi
= self
.ldst
.pi
293 def elaborate(self
, platform
):
294 assert hasattr(self
, "dcache"), "remember to call set_ldst_interface"
295 m
= super().elaborate(platform
)
299 # link mmu and dcache together
300 m
.submodules
.mmu
= mmu
= self
.mmu
301 ldst
= self
.ldst
# managed externally: do not add here
302 m
.d
.comb
+= dcache
.m_in
.eq(mmu
.d_out
) # MMUToDCacheType
303 m
.d
.comb
+= mmu
.d_in
.eq(dcache
.m_out
) # DCacheToMMUType
305 l_in
, l_out
= mmu
.l_in
, mmu
.l_out
306 d_in
, d_out
= dcache
.d_in
, dcache
.d_out
307 wb_out
, wb_in
= dcache
.wb_out
, dcache
.wb_in
309 # link ldst and MMU together
310 comb
+= l_in
.eq(ldst
.l_in
)
311 comb
+= ldst
.l_out
.eq(l_out
)
313 data_i
, data_o
= self
.p
.data_i
, self
.n
.data_o
314 a_i
, b_i
, o
, spr1_o
= data_i
.ra
, data_i
.rb
, data_o
.o
, data_o
.spr1
319 # these are set / got here *ON BEHALF* of LoadStore1
320 dsisr
, dar
= ldst
.dsisr
, ldst
.dar
325 m
.d
.comb
+= self
.n
.valid_o
.eq(busy
& done
)
326 m
.d
.comb
+= self
.p
.ready_o
.eq(~busy
)
328 # take copy of X-Form SPR field
329 x_fields
= self
.fields
.FormXFX
330 spr
= Signal(len(x_fields
.SPR
))
331 comb
+= spr
.eq(decode_spr_num(x_fields
.SPR
))
333 # based on MSR bits, set priv and virt mode. TODO: 32-bit mode
334 comb
+= d_in
.priv_mode
.eq(~msr_i
[MSR
.PR
])
335 comb
+= d_in
.virt_mode
.eq(msr_i
[MSR
.DR
])
336 #comb += d_in.mode_32bit.eq(msr_i[MSR.SF]) # ?? err
338 # ok so we have to "pulse" the MMU (or dcache) rather than
339 # hold the valid hi permanently. guess what this does...
342 m
.d
.comb
+= blip
.eq(rising_edge(m
, valid
))
345 with m
.If(self
.p
.valid_i
):
346 m
.d
.sync
+= busy
.eq(1)
349 # based on the Micro-Op, we work out which of MMU or DCache
350 # should "action" the operation. one of MMU or DCache gets
351 # enabled ("valid") and we twiddle our thumbs until it
354 # FIXME: properly implement MicrOp.OP_MTSPR and MicrOp.OP_MFSPR
356 with m
.Switch(op
.insn_type
):
357 with m
.Case(MicrOp
.OP_MTSPR
):
358 # despite redirection this FU **MUST** behave exactly
359 # like the SPR FU. this **INCLUDES** updating the SPR
360 # regfile because the CSV file entry for OP_MTSPR
361 # categorically defines and requires the expectation
362 # that the CompUnit **WILL** write to the regfile.
363 comb
+= spr1_o
.data
.eq(a_i
)
364 comb
+= spr1_o
.ok
.eq(1)
365 # subset SPR: first check a few bits
366 # XXX NOTE this must now cover **FOUR** values: this
367 # test is no longer adequate. DSISR, DAR, PGTBL and PID
368 # must ALL be covered here.
369 with m
.If(~spr
[9] & ~spr
[5]):
370 comb
+= self
.debug0
.eq(3)
371 #if matched update local cached value
373 sync
+= dsisr
.eq(a_i
[:32])
377 # pass it over to the MMU instead
379 comb
+= self
.debug0
.eq(4)
380 # blip the MMU and wait for it to complete
381 comb
+= valid
.eq(1) # start "pulse"
382 comb
+= l_in
.valid
.eq(blip
) # start
383 comb
+= l_in
.mtspr
.eq(1) # mtspr mode
384 comb
+= l_in
.sprn
.eq(spr
) # which SPR
385 comb
+= l_in
.rs
.eq(a_i
) # incoming operand (RS)
386 comb
+= done
.eq(1) # FIXME l_out.done
388 with m
.Case(MicrOp
.OP_MFSPR
):
389 # subset SPR: first check a few bits
390 #with m.If(~spr[9] & ~spr[5]):
391 # comb += self.debug0.eq(5)
393 # comb += o.data.eq(dsisr)
395 # comb += o.data.eq(dar)
396 #do NOT return cached values
397 comb
+= o
.data
.eq(spr1_i
)
400 # pass it over to the MMU instead
402 # comb += self.debug0.eq(6)
403 # # blip the MMU and wait for it to complete
404 # comb += valid.eq(1) # start "pulse"
405 # comb += l_in.valid.eq(blip) # start
406 # comb += l_in.mtspr.eq(0) # mfspr!=mtspr
407 # comb += l_in.sprn.eq(spr) # which SPR
408 # comb += l_in.rs.eq(a_i) # incoming operand (RS)
409 # comb += o.data.eq(l_out.sprval) # SPR from MMU
410 # comb += o.ok.eq(l_out.done) # only when l_out valid
411 # comb += done.eq(1) # FIXME l_out.done
413 # XXX this one is going to have to go through LDSTCompUnit
414 # because it's LDST that has control over dcache
415 # (through PortInterface). or, another means is devised
416 # so as not to have double-drivers of d_in.valid and addr
418 #with m.Case(MicrOp.OP_DCBZ):
419 # # activate dcbz mode (spec: v3.0B p850)
420 # comb += valid.eq(1) # start "pulse"
421 # comb += d_in.valid.eq(blip) # start
422 # comb += d_in.dcbz.eq(1) # dcbz mode
423 # comb += d_in.addr.eq(a_i + b_i) # addr is (RA|0) + RB
424 # comb += done.eq(d_out.store_done) # TODO
425 # comb += self.debug0.eq(1)
427 with m
.Case(MicrOp
.OP_TLBIE
):
428 # pass TLBIE request to MMU (spec: v3.0B p1034)
429 # note that the spr is *not* an actual spr number, it's
430 # just that those bits happen to match with field bits
432 comb
+= valid
.eq(1) # start "pulse"
433 comb
+= l_in
.valid
.eq(blip
) # start
434 comb
+= l_in
.tlbie
.eq(1) # mtspr mode
435 comb
+= l_in
.sprn
.eq(spr
) # use sprn to send insn bits
436 comb
+= l_in
.addr
.eq(b_i
) # incoming operand (RB)
437 comb
+= done
.eq(l_out
.done
) # zzzz
438 comb
+= self
.debug0
.eq(2)
440 with m
.Case(MicrOp
.OP_ILLEGAL
):
441 comb
+= self
.illegal
.eq(1)
443 with m
.If(self
.n
.ready_i
& self
.n
.valid_o
):
444 m
.d
.sync
+= busy
.eq(0)