1 from nmigen
import Elaboratable
, Module
, Signal
, Shape
, unsigned
, Cat
, Mux
2 from nmigen
import Record
, Memory
3 from nmigen
import Const
4 from soc
.fu
.mmu
.pipe_data
import MMUInputData
, MMUOutputData
, MMUPipeSpec
5 from nmutil
.singlepipe
import ControlBase
6 from nmutil
.util
import rising_edge
8 from soc
.experiment
.mmu
import MMU
9 from soc
.experiment
.dcache
import DCache
11 from openpower
.consts
import MSR
12 from openpower
.decoder
.power_fields
import DecodeFields
13 from openpower
.decoder
.power_fieldsn
import SignalBitRange
14 from openpower
.decoder
.power_decoder2
import decode_spr_num
15 from openpower
.decoder
.power_enums
import MicrOp
, XER_bits
17 from soc
.experiment
.pimem
import PortInterface
18 from soc
.experiment
.pimem
import PortInterfaceBase
20 from soc
.experiment
.mem_types
import LoadStore1ToDCacheType
, LoadStore1ToMMUType
21 from soc
.experiment
.mem_types
import DCacheToLoadStore1Type
, MMUToLoadStore1Type
23 from soc
.minerva
.wishbone
import make_wb_layout
24 from soc
.bus
.sram
import SRAM
27 # glue logic for microwatt mmu and dcache
28 class LoadStore1(PortInterfaceBase
):
29 def __init__(self
, pspec
):
31 regwid
= pspec
.reg_wid
32 addrwid
= pspec
.addr_wid
34 super().__init
__(regwid
, addrwid
)
35 self
.dcache
= DCache()
36 self
.d_in
= self
.dcache
.d_in
37 self
.d_out
= self
.dcache
.d_out
38 self
.l_in
= LoadStore1ToMMUType()
39 self
.l_out
= MMUToLoadStore1Type()
41 self
.mmureq
= Signal()
42 self
.derror
= Signal()
44 # TODO, convert dcache wb_in/wb_out to "standard" nmigen Wishbone bus
45 self
.dbus
= Record(make_wb_layout(pspec
))
47 # for creating a single clock blip to DCache
48 self
.d_valid
= Signal()
49 self
.d_w_data
= Signal(64) # XXX
50 self
.d_w_valid
= Signal()
51 self
.d_validblip
= Signal()
53 def set_wr_addr(self
, m
, addr
, mask
):
54 # this gets complicated: actually a FSM is needed which
55 # first checks dcache, then if that fails (in virt mode)
56 # it checks the MMU instead.
57 #m.d.comb += self.l_in.valid.eq(1)
58 #m.d.comb += self.l_in.addr.eq(addr)
59 #m.d.comb += self.l_in.load.eq(0)
60 m
.d
.comb
+= self
.d_in
.load
.eq(0)
61 m
.d
.comb
+= self
.d_in
.byte_sel
.eq(mask
)
62 m
.d
.comb
+= self
.d_in
.addr
.eq(addr
)
63 # TEMPORARY BAD HACK! disable the cache entirely for read
64 m
.d
.comb
+= self
.d_in
.nc
.eq(1)
67 def set_rd_addr(self
, m
, addr
, mask
):
68 # this gets complicated: actually a FSM is needed which
69 # first checks dcache, then if that fails (in virt mode)
70 # it checks the MMU instead.
71 #m.d.comb += self.l_in.valid.eq(1)
72 #m.d.comb += self.l_in.load.eq(1)
73 #m.d.comb += self.l_in.addr.eq(addr)
74 m
.d
.comb
+= self
.d_valid
.eq(1)
75 m
.d
.comb
+= self
.d_in
.valid
.eq(self
.d_validblip
)
76 m
.d
.comb
+= self
.d_in
.load
.eq(1)
77 m
.d
.comb
+= self
.d_in
.byte_sel
.eq(mask
)
78 m
.d
.comb
+= self
.d_in
.addr
.eq(addr
)
79 # BAD HACK! disable cacheing on LD when address is 0xCxxx_xxxx
80 # this is for peripherals. same thing done in Microwatt loadstore1.vhdl
81 with m
.If(addr
[28:] == Const(0xc, 4)):
82 m
.d
.comb
+= self
.d_in
.nc
.eq(1)
83 # TEMPORARY BAD HACK! disable the cache entirely for read
84 m
.d
.comb
+= self
.d_in
.nc
.eq(1)
85 return None #FIXME return value
87 def set_wr_data(self
, m
, data
, wen
):
88 # do the "blip" on write data
89 m
.d
.comb
+= self
.d_valid
.eq(1)
90 m
.d
.comb
+= self
.d_in
.valid
.eq(self
.d_validblip
)
91 # put data into comb which is picked up in main elaborate()
92 m
.d
.comb
+= self
.d_w_valid
.eq(1)
93 m
.d
.comb
+= self
.d_w_data
.eq(data
)
94 #m.d.sync += self.d_in.byte_sel.eq(wen) # this might not be needed
95 st_ok
= self
.d_out
.valid
# TODO indicates write data is valid
99 def get_rd_data(self
, m
):
100 ld_ok
= self
.d_out
.valid
# indicates read data is valid
101 data
= self
.d_out
.data
# actual read data
105 if d_in.error = '1' then
106 if d_in.cache_paradox = '1' then
107 -- signal an interrupt straight away
109 dsisr(63 - 38) := not r2.req.load;
110 -- XXX there is no architected bit for this
111 -- (probably should be a machine check in fact)
112 dsisr(63 - 35) := d_in.cache_paradox;
114 -- Look up the translation for TLB miss
115 -- and also for permission error and RC error
116 -- in case the PTE has been updated.
118 v.state := MMU_LOOKUP;
124 def elaborate(self
, platform
):
125 m
= super().elaborate(platform
)
128 # create dcache module
129 m
.submodules
.dcache
= dcache
= self
.dcache
132 d_out
, l_out
, dbus
= self
.d_out
, self
.l_out
, self
.dbus
134 with m
.If(d_out
.error
):
135 with m
.If(d_out
.cache_paradox
):
136 comb
+= self
.derror
.eq(1)
137 # dsisr(63 - 38) := not r2.req.load;
138 # -- XXX there is no architected bit for this
139 # -- (probably should be a machine check in fact)
140 # dsisr(63 - 35) := d_in.cache_paradox;
142 # Look up the translation for TLB miss
143 # and also for permission error and RC error
144 # in case the PTE has been updated.
145 comb
+= self
.mmureq
.eq(1)
146 # v.state := MMU_LOOKUP;
147 # v.stage1_en := '0';
149 exc
= self
.pi
.exception_o
151 #happened, alignment, instr_fault, invalid,
152 comb
+= exc
.happened
.eq(d_out
.error | l_out
.err
)
153 comb
+= exc
.invalid
.eq(l_out
.invalid
)
155 #badtree, perm_error, rc_error, segment_fault
156 comb
+= exc
.badtree
.eq(l_out
.badtree
)
157 comb
+= exc
.perm_error
.eq(l_out
.perm_error
)
158 comb
+= exc
.rc_error
.eq(l_out
.rc_error
)
159 comb
+= exc
.segment_fault
.eq(l_out
.segerr
)
161 # TODO connect those signals somewhere
162 #print(d_out.valid) -> no error
163 #print(d_out.store_done) -> no error
164 #print(d_out.cache_paradox) -> ?
165 #print(l_out.done) -> no error
167 # TODO some exceptions set SPRs
169 # TODO, connect dcache wb_in/wb_out to "standard" nmigen Wishbone bus
170 comb
+= dbus
.adr
.eq(dcache
.wb_out
.adr
)
171 comb
+= dbus
.dat_w
.eq(dcache
.wb_out
.dat
)
172 comb
+= dbus
.sel
.eq(dcache
.wb_out
.sel
)
173 comb
+= dbus
.cyc
.eq(dcache
.wb_out
.cyc
)
174 comb
+= dbus
.stb
.eq(dcache
.wb_out
.stb
)
175 comb
+= dbus
.we
.eq(dcache
.wb_out
.we
)
177 comb
+= dcache
.wb_in
.dat
.eq(dbus
.dat_r
)
178 comb
+= dcache
.wb_in
.ack
.eq(dbus
.ack
)
179 if hasattr(dbus
, "stall"):
180 comb
+= dcache
.wb_in
.stall
.eq(dbus
.stall
)
182 # create a blip (single pulse) on valid read/write request
183 m
.d
.comb
+= self
.d_validblip
.eq(rising_edge(m
, self
.d_valid
))
185 # write out d data only when flag set
186 with m
.If(self
.d_w_valid
):
187 m
.d
.sync
+= self
.d_in
.data
.eq(self
.d_w_data
)
189 m
.d
.sync
+= self
.d_in
.data
.eq(0)
194 yield from super().ports()
198 class TestSRAMLoadStore1(LoadStore1
):
199 def __init__(self
, pspec
):
200 super().__init
__(pspec
)
202 # small 32-entry Memory
203 if (hasattr(pspec
, "dmem_test_depth") and
204 isinstance(pspec
.dmem_test_depth
, int)):
205 depth
= pspec
.dmem_test_depth
208 print("TestSRAMBareLoadStoreUnit depth", depth
)
210 self
.mem
= Memory(width
=pspec
.reg_wid
, depth
=depth
)
212 def elaborate(self
, platform
):
213 m
= super().elaborate(platform
)
215 m
.submodules
.sram
= sram
= SRAM(memory
=self
.mem
, granularity
=8,
216 features
={'cti', 'bte', 'err'})
219 # directly connect the wishbone bus of LoadStoreUnitInterface to SRAM
220 # note: SRAM is a target (slave), dbus is initiator (master)
221 fanouts
= ['dat_w', 'sel', 'cyc', 'stb', 'we', 'cti', 'bte']
222 fanins
= ['dat_r', 'ack', 'err']
223 for fanout
in fanouts
:
224 print("fanout", fanout
, getattr(sram
.bus
, fanout
).shape(),
225 getattr(dbus
, fanout
).shape())
226 comb
+= getattr(sram
.bus
, fanout
).eq(getattr(dbus
, fanout
))
227 comb
+= getattr(sram
.bus
, fanout
).eq(getattr(dbus
, fanout
))
229 comb
+= getattr(dbus
, fanin
).eq(getattr(sram
.bus
, fanin
))
231 comb
+= sram
.bus
.adr
.eq(dbus
.adr
)
236 class FSMMMUStage(ControlBase
):
239 FSM-based MMU: must call set_ldst_interface and pass in an instance
240 of a LoadStore1. this to comply with the ConfigMemoryPortInterface API
242 def __init__(self
, pspec
):
247 self
.p
.data_i
= MMUInputData(pspec
)
248 self
.n
.data_o
= MMUOutputData(pspec
)
250 # this Function Unit is extremely unusual in that it actually stores a
251 # "thing" rather than "processes inputs and produces outputs". hence
252 # why it has to be a FSM. linking up LD/ST however is going to have
253 # to be done back in Issuer (or Core)
257 # make life a bit easier in Core XXX mustn't really do this,
258 # pspec is designed for config variables, rather than passing
259 # things around. have to think about it, design a way to do
260 # it that makes "sense"
261 # comment out for now self.pspec.mmu = self.mmu
262 # comment out for now self.pspec.dcache = self.dcache
264 # debugging output for gtkw
265 self
.debug0
= Signal(4)
266 self
.illegal
= Signal()
268 # for SPR field number access
270 self
.fields
= DecodeFields(SignalBitRange
, [i
.ctx
.op
.insn
])
271 self
.fields
.create_specs()
273 def set_ldst_interface(self
, ldst
):
274 """must be called back in Core, after FUs have been set up.
275 one of those will be the MMU (us!) but the LoadStore1 instance
276 must be set up in ConfigMemoryPortInterface. sigh.
278 # incoming PortInterface
280 self
.dcache
= self
.ldst
.dcache
281 self
.pi
= self
.ldst
.pi
283 def elaborate(self
, platform
):
284 assert hasattr(self
, "dcache"), "remember to call set_ldst_interface"
285 m
= super().elaborate(platform
)
289 # link mmu and dcache together
290 m
.submodules
.mmu
= mmu
= self
.mmu
291 ldst
= self
.ldst
# managed externally: do not add here
292 m
.d
.comb
+= dcache
.m_in
.eq(mmu
.d_out
) # MMUToDCacheType
293 m
.d
.comb
+= mmu
.d_in
.eq(dcache
.m_out
) # DCacheToMMUType
295 l_in
, l_out
= mmu
.l_in
, mmu
.l_out
296 d_in
, d_out
= dcache
.d_in
, dcache
.d_out
297 wb_out
, wb_in
= dcache
.wb_out
, dcache
.wb_in
299 # link ldst and MMU together
300 comb
+= l_in
.eq(ldst
.l_in
)
301 comb
+= ldst
.l_out
.eq(l_out
)
303 data_i
, data_o
= self
.p
.data_i
, self
.n
.data_o
304 a_i
, b_i
, o
, spr1_o
= data_i
.ra
, data_i
.rb
, data_o
.o
, data_o
.spr1
308 # TODO: link these SPRs somewhere
315 m
.d
.comb
+= self
.n
.valid_o
.eq(busy
& done
)
316 m
.d
.comb
+= self
.p
.ready_o
.eq(~busy
)
318 # take copy of X-Form SPR field
319 x_fields
= self
.fields
.FormXFX
320 spr
= Signal(len(x_fields
.SPR
))
321 comb
+= spr
.eq(decode_spr_num(x_fields
.SPR
))
323 # based on MSR bits, set priv and virt mode. TODO: 32-bit mode
324 comb
+= d_in
.priv_mode
.eq(~msr_i
[MSR
.PR
])
325 comb
+= d_in
.virt_mode
.eq(msr_i
[MSR
.DR
])
326 #comb += d_in.mode_32bit.eq(msr_i[MSR.SF]) # ?? err
328 # ok so we have to "pulse" the MMU (or dcache) rather than
329 # hold the valid hi permanently. guess what this does...
332 m
.d
.comb
+= blip
.eq(rising_edge(m
, valid
))
335 with m
.If(self
.p
.valid_i
):
336 m
.d
.sync
+= busy
.eq(1)
339 # based on the Micro-Op, we work out which of MMU or DCache
340 # should "action" the operation. one of MMU or DCache gets
341 # enabled ("valid") and we twiddle our thumbs until it
344 # FIXME: properly implement MicrOp.OP_MTSPR and MicrOp.OP_MFSPR
346 with m
.Switch(op
.insn_type
):
347 with m
.Case(MicrOp
.OP_MTSPR
):
348 # despite redirection this FU **MUST** behave exactly
349 # like the SPR FU. this **INCLUDES** updating the SPR
350 # regfile because the CSV file entry for OP_MTSPR
351 # categorically defines and requires the expectation
352 # that the CompUnit **WILL** write to the regfile.
353 comb
+= spr1_o
.data
.eq(spr
)
354 comb
+= spr1_o
.ok
.eq(1)
355 # subset SPR: first check a few bits
356 with m
.If(~spr
[9] & ~spr
[5]):
357 comb
+= self
.debug0
.eq(3)
359 comb
+= dsisr
.eq(a_i
[:32])
363 # pass it over to the MMU instead
365 comb
+= self
.debug0
.eq(4)
366 # blip the MMU and wait for it to complete
367 comb
+= valid
.eq(1) # start "pulse"
368 comb
+= l_in
.valid
.eq(blip
) # start
369 comb
+= l_in
.mtspr
.eq(1) # mtspr mode
370 comb
+= l_in
.sprn
.eq(spr
) # which SPR
371 comb
+= l_in
.rs
.eq(a_i
) # incoming operand (RS)
372 comb
+= done
.eq(1) # FIXME l_out.done
374 with m
.Case(MicrOp
.OP_MFSPR
):
375 # subset SPR: first check a few bits
376 with m
.If(~spr
[9] & ~spr
[5]):
377 comb
+= self
.debug0
.eq(5)
379 comb
+= o
.data
.eq(dsisr
)
381 comb
+= o
.data
.eq(dar
)
384 # pass it over to the MMU instead
386 comb
+= self
.debug0
.eq(6)
387 # blip the MMU and wait for it to complete
388 comb
+= valid
.eq(1) # start "pulse"
389 comb
+= l_in
.valid
.eq(blip
) # start
390 comb
+= l_in
.mtspr
.eq(0) # mfspr!=mtspr
391 comb
+= l_in
.sprn
.eq(spr
) # which SPR
392 comb
+= l_in
.rs
.eq(a_i
) # incoming operand (RS)
393 comb
+= o
.data
.eq(l_out
.sprval
) # SPR from MMU
394 comb
+= o
.ok
.eq(l_out
.done
) # only when l_out valid
395 comb
+= done
.eq(1) # FIXME l_out.done
397 # XXX this one is going to have to go through LDSTCompUnit
398 # because it's LDST that has control over dcache
399 # (through PortInterface). or, another means is devised
400 # so as not to have double-drivers of d_in.valid and addr
402 #with m.Case(MicrOp.OP_DCBZ):
403 # # activate dcbz mode (spec: v3.0B p850)
404 # comb += valid.eq(1) # start "pulse"
405 # comb += d_in.valid.eq(blip) # start
406 # comb += d_in.dcbz.eq(1) # dcbz mode
407 # comb += d_in.addr.eq(a_i + b_i) # addr is (RA|0) + RB
408 # comb += done.eq(d_out.store_done) # TODO
409 # comb += self.debug0.eq(1)
411 with m
.Case(MicrOp
.OP_TLBIE
):
412 # pass TLBIE request to MMU (spec: v3.0B p1034)
413 # note that the spr is *not* an actual spr number, it's
414 # just that those bits happen to match with field bits
416 comb
+= valid
.eq(1) # start "pulse"
417 comb
+= l_in
.valid
.eq(blip
) # start
418 comb
+= l_in
.tlbie
.eq(1) # mtspr mode
419 comb
+= l_in
.sprn
.eq(spr
) # use sprn to send insn bits
420 comb
+= l_in
.addr
.eq(b_i
) # incoming operand (RB)
421 comb
+= done
.eq(l_out
.done
) # zzzz
422 comb
+= self
.debug0
.eq(2)
424 with m
.Case(MicrOp
.OP_ILLEGAL
):
425 comb
+= self
.illegal
.eq(1)
427 with m
.If(self
.n
.ready_i
& self
.n
.valid_o
):
428 m
.d
.sync
+= busy
.eq(0)