1 from nmigen
import Elaboratable
, Module
, Signal
, Shape
, unsigned
, Cat
, Mux
2 from nmigen
import Record
, Memory
3 from nmigen
import Const
4 from soc
.fu
.mmu
.pipe_data
import MMUInputData
, MMUOutputData
, MMUPipeSpec
5 from nmutil
.singlepipe
import ControlBase
6 from nmutil
.util
import rising_edge
8 from soc
.experiment
.mmu
import MMU
9 from soc
.experiment
.dcache
import DCache
11 from openpower
.consts
import MSR
12 from openpower
.decoder
.power_fields
import DecodeFields
13 from openpower
.decoder
.power_fieldsn
import SignalBitRange
14 from openpower
.decoder
.power_decoder2
import decode_spr_num
15 from openpower
.decoder
.power_enums
import MicrOp
, XER_bits
17 from soc
.experiment
.pimem
import PortInterface
18 from soc
.experiment
.pimem
import PortInterfaceBase
20 from soc
.experiment
.mem_types
import LoadStore1ToDCacheType
, LoadStore1ToMMUType
21 from soc
.experiment
.mem_types
import DCacheToLoadStore1Type
, MMUToLoadStore1Type
23 from soc
.minerva
.wishbone
import make_wb_layout
24 from soc
.bus
.sram
import SRAM
27 # glue logic for microwatt mmu and dcache
28 class LoadStore1(PortInterfaceBase
):
29 def __init__(self
, pspec
):
31 regwid
= pspec
.reg_wid
32 addrwid
= pspec
.addr_wid
34 super().__init
__(regwid
, addrwid
)
35 self
.dcache
= DCache()
36 self
.d_in
= self
.dcache
.d_in
37 self
.d_out
= self
.dcache
.d_out
38 self
.l_in
= LoadStore1ToMMUType()
39 self
.l_out
= MMUToLoadStore1Type()
41 self
.mmureq
= Signal()
42 self
.derror
= Signal()
44 # TODO, convert dcache wb_in/wb_out to "standard" nmigen Wishbone bus
45 self
.dbus
= Record(make_wb_layout(pspec
))
47 # for creating a single clock blip to DCache
48 self
.d_valid
= Signal()
49 self
.d_w_data
= Signal(64) # XXX
50 self
.d_w_valid
= Signal()
51 self
.d_validblip
= Signal()
53 def set_wr_addr(self
, m
, addr
, mask
):
54 # this gets complicated: actually a FSM is needed which
55 # first checks dcache, then if that fails (in virt mode)
56 # it checks the MMU instead.
57 #m.d.comb += self.l_in.valid.eq(1)
58 #m.d.comb += self.l_in.addr.eq(addr)
59 #m.d.comb += self.l_in.load.eq(0)
60 m
.d
.comb
+= self
.d_valid
.eq(1)
61 m
.d
.comb
+= self
.d_in
.valid
.eq(self
.d_validblip
)
62 m
.d
.comb
+= self
.d_in
.load
.eq(0)
63 m
.d
.comb
+= self
.d_in
.byte_sel
.eq(mask
)
64 m
.d
.comb
+= self
.d_in
.addr
.eq(addr
)
67 def set_rd_addr(self
, m
, addr
, mask
):
68 # this gets complicated: actually a FSM is needed which
69 # first checks dcache, then if that fails (in virt mode)
70 # it checks the MMU instead.
71 #m.d.comb += self.l_in.valid.eq(1)
72 #m.d.comb += self.l_in.load.eq(1)
73 #m.d.comb += self.l_in.addr.eq(addr)
74 m
.d
.comb
+= self
.d_valid
.eq(1)
75 m
.d
.comb
+= self
.d_in
.valid
.eq(self
.d_validblip
)
76 m
.d
.comb
+= self
.d_in
.load
.eq(1)
77 m
.d
.comb
+= self
.d_in
.byte_sel
.eq(mask
)
78 m
.d
.comb
+= self
.d_in
.addr
.eq(addr
)
79 # BAD HACK! disable cacheing on LD when address is 0xCxxx_xxxx
80 # this is for peripherals. same thing done in Microwatt loadstore1.vhdl
81 with m
.If(addr
[28:] == Const(0xc, 4)):
82 m
.d
.comb
+= self
.d_in
.nc
.eq(1)
83 return None #FIXME return value
85 def set_wr_data(self
, m
, data
, wen
):
86 # put data into comb which is picked up in main elaborate()
87 m
.d
.comb
+= self
.d_w_valid
.eq(1)
88 m
.d
.comb
+= self
.d_w_data
.eq(data
)
89 #m.d.sync += self.d_in.byte_sel.eq(wen) # this might not be needed
90 #st_ok = self.d_out.valid # TODO indicates write data is valid
94 def get_rd_data(self
, m
):
95 ld_ok
= self
.d_out
.valid
# indicates read data is valid
96 data
= self
.d_out
.data
# actual read data
100 if d_in.error = '1' then
101 if d_in.cache_paradox = '1' then
102 -- signal an interrupt straight away
104 dsisr(63 - 38) := not r2.req.load;
105 -- XXX there is no architected bit for this
106 -- (probably should be a machine check in fact)
107 dsisr(63 - 35) := d_in.cache_paradox;
109 -- Look up the translation for TLB miss
110 -- and also for permission error and RC error
111 -- in case the PTE has been updated.
113 v.state := MMU_LOOKUP;
119 def elaborate(self
, platform
):
120 m
= super().elaborate(platform
)
123 # create dcache module
124 m
.submodules
.dcache
= dcache
= self
.dcache
127 d_out
, l_out
, dbus
= self
.d_out
, self
.l_out
, self
.dbus
129 with m
.If(d_out
.error
):
130 with m
.If(d_out
.cache_paradox
):
131 comb
+= self
.derror
.eq(1)
132 # dsisr(63 - 38) := not r2.req.load;
133 # -- XXX there is no architected bit for this
134 # -- (probably should be a machine check in fact)
135 # dsisr(63 - 35) := d_in.cache_paradox;
137 # Look up the translation for TLB miss
138 # and also for permission error and RC error
139 # in case the PTE has been updated.
140 comb
+= self
.mmureq
.eq(1)
141 # v.state := MMU_LOOKUP;
142 # v.stage1_en := '0';
144 exc
= self
.pi
.exception_o
146 #happened, alignment, instr_fault, invalid,
147 comb
+= exc
.happened
.eq(d_out
.error | l_out
.err
)
148 comb
+= exc
.invalid
.eq(l_out
.invalid
)
150 #badtree, perm_error, rc_error, segment_fault
151 comb
+= exc
.badtree
.eq(l_out
.badtree
)
152 comb
+= exc
.perm_error
.eq(l_out
.perm_error
)
153 comb
+= exc
.rc_error
.eq(l_out
.rc_error
)
154 comb
+= exc
.segment_fault
.eq(l_out
.segerr
)
156 # TODO connect those signals somewhere
157 #print(d_out.valid) -> no error
158 #print(d_out.store_done) -> no error
159 #print(d_out.cache_paradox) -> ?
160 #print(l_out.done) -> no error
162 # TODO some exceptions set SPRs
164 # TODO, connect dcache wb_in/wb_out to "standard" nmigen Wishbone bus
165 comb
+= dbus
.adr
.eq(dcache
.wb_out
.adr
)
166 comb
+= dbus
.dat_w
.eq(dcache
.wb_out
.dat
)
167 comb
+= dbus
.sel
.eq(dcache
.wb_out
.sel
)
168 comb
+= dbus
.cyc
.eq(dcache
.wb_out
.cyc
)
169 comb
+= dbus
.stb
.eq(dcache
.wb_out
.stb
)
170 comb
+= dbus
.we
.eq(dcache
.wb_out
.we
)
172 comb
+= dcache
.wb_in
.dat
.eq(dbus
.dat_r
)
173 comb
+= dcache
.wb_in
.ack
.eq(dbus
.ack
)
174 if hasattr(dbus
, "stall"):
175 comb
+= dcache
.wb_in
.stall
.eq(dbus
.stall
)
177 # create a blip (single pulse) on valid read/write request
178 m
.d
.comb
+= self
.d_validblip
.eq(rising_edge(m
, self
.d_valid
))
180 # write out d data only when flag set
181 with m
.If(self
.d_w_valid
):
182 m
.d
.sync
+= self
.d_in
.data
.eq(self
.d_w_data
)
184 m
.d
.sync
+= self
.d_in
.data
.eq(0)
189 yield from super().ports()
193 class TestSRAMLoadStore1(LoadStore1
):
194 def __init__(self
, pspec
):
195 super().__init
__(pspec
)
197 # small 32-entry Memory
198 if (hasattr(pspec
, "dmem_test_depth") and
199 isinstance(pspec
.dmem_test_depth
, int)):
200 depth
= pspec
.dmem_test_depth
203 print("TestSRAMBareLoadStoreUnit depth", depth
)
205 self
.mem
= Memory(width
=pspec
.reg_wid
, depth
=depth
)
207 def elaborate(self
, platform
):
208 m
= super().elaborate(platform
)
210 m
.submodules
.sram
= sram
= SRAM(memory
=self
.mem
, granularity
=8,
211 features
={'cti', 'bte', 'err'})
214 # directly connect the wishbone bus of LoadStoreUnitInterface to SRAM
215 # note: SRAM is a target (slave), dbus is initiator (master)
216 fanouts
= ['dat_w', 'sel', 'cyc', 'stb', 'we', 'cti', 'bte']
217 fanins
= ['dat_r', 'ack', 'err']
218 for fanout
in fanouts
:
219 print("fanout", fanout
, getattr(sram
.bus
, fanout
).shape(),
220 getattr(dbus
, fanout
).shape())
221 comb
+= getattr(sram
.bus
, fanout
).eq(getattr(dbus
, fanout
))
222 comb
+= getattr(sram
.bus
, fanout
).eq(getattr(dbus
, fanout
))
224 comb
+= getattr(dbus
, fanin
).eq(getattr(sram
.bus
, fanin
))
226 comb
+= sram
.bus
.adr
.eq(dbus
.adr
)
231 class FSMMMUStage(ControlBase
):
234 FSM-based MMU: must call set_ldst_interface and pass in an instance
235 of a LoadStore1. this to comply with the ConfigMemoryPortInterface API
237 def __init__(self
, pspec
):
242 self
.p
.data_i
= MMUInputData(pspec
)
243 self
.n
.data_o
= MMUOutputData(pspec
)
245 # this Function Unit is extremely unusual in that it actually stores a
246 # "thing" rather than "processes inputs and produces outputs". hence
247 # why it has to be a FSM. linking up LD/ST however is going to have
248 # to be done back in Issuer (or Core)
252 # make life a bit easier in Core XXX mustn't really do this,
253 # pspec is designed for config variables, rather than passing
254 # things around. have to think about it, design a way to do
255 # it that makes "sense"
256 # comment out for now self.pspec.mmu = self.mmu
257 # comment out for now self.pspec.dcache = self.dcache
259 # debugging output for gtkw
260 self
.debug0
= Signal(4)
261 self
.illegal
= Signal()
263 # for SPR field number access
265 self
.fields
= DecodeFields(SignalBitRange
, [i
.ctx
.op
.insn
])
266 self
.fields
.create_specs()
268 def set_ldst_interface(self
, ldst
):
269 """must be called back in Core, after FUs have been set up.
270 one of those will be the MMU (us!) but the LoadStore1 instance
271 must be set up in ConfigMemoryPortInterface. sigh.
273 # incoming PortInterface
275 self
.dcache
= self
.ldst
.dcache
276 self
.pi
= self
.ldst
.pi
278 def elaborate(self
, platform
):
279 assert hasattr(self
, "dcache"), "remember to call set_ldst_interface"
280 m
= super().elaborate(platform
)
284 # link mmu and dcache together
285 m
.submodules
.mmu
= mmu
= self
.mmu
286 ldst
= self
.ldst
# managed externally: do not add here
287 m
.d
.comb
+= dcache
.m_in
.eq(mmu
.d_out
) # MMUToDCacheType
288 m
.d
.comb
+= mmu
.d_in
.eq(dcache
.m_out
) # DCacheToMMUType
290 l_in
, l_out
= mmu
.l_in
, mmu
.l_out
291 d_in
, d_out
= dcache
.d_in
, dcache
.d_out
292 wb_out
, wb_in
= dcache
.wb_out
, dcache
.wb_in
294 # link ldst and MMU together
295 comb
+= l_in
.eq(ldst
.l_in
)
296 comb
+= ldst
.l_out
.eq(l_out
)
298 data_i
, data_o
= self
.p
.data_i
, self
.n
.data_o
299 a_i
, b_i
, o
, spr1_o
= data_i
.ra
, data_i
.rb
, data_o
.o
, data_o
.spr1
303 # TODO: link these SPRs somewhere
310 m
.d
.comb
+= self
.n
.valid_o
.eq(busy
& done
)
311 m
.d
.comb
+= self
.p
.ready_o
.eq(~busy
)
313 # take copy of X-Form SPR field
314 x_fields
= self
.fields
.FormXFX
315 spr
= Signal(len(x_fields
.SPR
))
316 comb
+= spr
.eq(decode_spr_num(x_fields
.SPR
))
318 # based on MSR bits, set priv and virt mode. TODO: 32-bit mode
319 comb
+= d_in
.priv_mode
.eq(~msr_i
[MSR
.PR
])
320 comb
+= d_in
.virt_mode
.eq(msr_i
[MSR
.DR
])
321 #comb += d_in.mode_32bit.eq(msr_i[MSR.SF]) # ?? err
323 # ok so we have to "pulse" the MMU (or dcache) rather than
324 # hold the valid hi permanently. guess what this does...
327 m
.d
.comb
+= blip
.eq(rising_edge(m
, valid
))
330 with m
.If(self
.p
.valid_i
):
331 m
.d
.sync
+= busy
.eq(1)
334 # based on the Micro-Op, we work out which of MMU or DCache
335 # should "action" the operation. one of MMU or DCache gets
336 # enabled ("valid") and we twiddle our thumbs until it
339 # FIXME: properly implement MicrOp.OP_MTSPR and MicrOp.OP_MFSPR
341 with m
.Switch(op
.insn_type
):
342 with m
.Case(MicrOp
.OP_MTSPR
):
343 # despite redirection this FU **MUST** behave exactly
344 # like the SPR FU. this **INCLUDES** updating the SPR
345 # regfile because the CSV file entry for OP_MTSPR
346 # categorically defines and requires the expectation
347 # that the CompUnit **WILL** write to the regfile.
348 comb
+= spr1_o
.data
.eq(spr
)
349 comb
+= spr1_o
.ok
.eq(1)
350 # subset SPR: first check a few bits
351 with m
.If(~spr
[9] & ~spr
[5]):
352 comb
+= self
.debug0
.eq(3)
354 comb
+= dsisr
.eq(a_i
[:32])
358 # pass it over to the MMU instead
360 comb
+= self
.debug0
.eq(4)
361 # blip the MMU and wait for it to complete
362 comb
+= valid
.eq(1) # start "pulse"
363 comb
+= l_in
.valid
.eq(blip
) # start
364 comb
+= l_in
.mtspr
.eq(1) # mtspr mode
365 comb
+= l_in
.sprn
.eq(spr
) # which SPR
366 comb
+= l_in
.rs
.eq(a_i
) # incoming operand (RS)
367 comb
+= done
.eq(1) # FIXME l_out.done
369 with m
.Case(MicrOp
.OP_MFSPR
):
370 # subset SPR: first check a few bits
371 with m
.If(~spr
[9] & ~spr
[5]):
372 comb
+= self
.debug0
.eq(5)
374 comb
+= o
.data
.eq(dsisr
)
376 comb
+= o
.data
.eq(dar
)
379 # pass it over to the MMU instead
381 comb
+= self
.debug0
.eq(6)
382 # blip the MMU and wait for it to complete
383 comb
+= valid
.eq(1) # start "pulse"
384 comb
+= l_in
.valid
.eq(blip
) # start
385 comb
+= l_in
.mtspr
.eq(0) # mfspr!=mtspr
386 comb
+= l_in
.sprn
.eq(spr
) # which SPR
387 comb
+= l_in
.rs
.eq(a_i
) # incoming operand (RS)
388 comb
+= o
.data
.eq(l_out
.sprval
) # SPR from MMU
389 comb
+= o
.ok
.eq(l_out
.done
) # only when l_out valid
390 comb
+= done
.eq(1) # FIXME l_out.done
392 # XXX this one is going to have to go through LDSTCompUnit
393 # because it's LDST that has control over dcache
394 # (through PortInterface). or, another means is devised
395 # so as not to have double-drivers of d_in.valid and addr
397 #with m.Case(MicrOp.OP_DCBZ):
398 # # activate dcbz mode (spec: v3.0B p850)
399 # comb += valid.eq(1) # start "pulse"
400 # comb += d_in.valid.eq(blip) # start
401 # comb += d_in.dcbz.eq(1) # dcbz mode
402 # comb += d_in.addr.eq(a_i + b_i) # addr is (RA|0) + RB
403 # comb += done.eq(d_out.store_done) # TODO
404 # comb += self.debug0.eq(1)
406 with m
.Case(MicrOp
.OP_TLBIE
):
407 # pass TLBIE request to MMU (spec: v3.0B p1034)
408 # note that the spr is *not* an actual spr number, it's
409 # just that those bits happen to match with field bits
411 comb
+= valid
.eq(1) # start "pulse"
412 comb
+= l_in
.valid
.eq(blip
) # start
413 comb
+= l_in
.tlbie
.eq(1) # mtspr mode
414 comb
+= l_in
.sprn
.eq(spr
) # use sprn to send insn bits
415 comb
+= l_in
.addr
.eq(b_i
) # incoming operand (RB)
416 comb
+= done
.eq(l_out
.done
) # zzzz
417 comb
+= self
.debug0
.eq(2)
419 with m
.Case(MicrOp
.OP_ILLEGAL
):
420 comb
+= self
.illegal
.eq(1)
422 with m
.If(self
.n
.ready_i
& self
.n
.valid_o
):
423 m
.d
.sync
+= busy
.eq(0)