1 from nmigen
import Elaboratable
, Module
, Signal
, Shape
, unsigned
, Cat
, Mux
2 from nmigen
import Record
, Memory
3 from nmigen
import Const
4 from soc
.fu
.mmu
.pipe_data
import MMUInputData
, MMUOutputData
, MMUPipeSpec
5 from nmutil
.singlepipe
import ControlBase
6 from nmutil
.util
import rising_edge
8 from soc
.experiment
.mmu
import MMU
9 from soc
.experiment
.dcache
import DCache
11 from openpower
.consts
import MSR
12 from openpower
.decoder
.power_fields
import DecodeFields
13 from openpower
.decoder
.power_fieldsn
import SignalBitRange
14 from openpower
.decoder
.power_decoder2
import decode_spr_num
15 from openpower
.decoder
.power_enums
import MicrOp
, XER_bits
17 from soc
.experiment
.pimem
import PortInterface
18 from soc
.experiment
.pimem
import PortInterfaceBase
20 from soc
.experiment
.mem_types
import LoadStore1ToDCacheType
, LoadStore1ToMMUType
21 from soc
.experiment
.mem_types
import DCacheToLoadStore1Type
, MMUToLoadStore1Type
23 from soc
.minerva
.wishbone
import make_wb_layout
24 from soc
.bus
.sram
import SRAM
27 # glue logic for microwatt mmu and dcache
28 class LoadStore1(PortInterfaceBase
):
29 def __init__(self
, pspec
):
31 self
.disable_cache
= (hasattr(pspec
, "disable_cache") and
32 pspec
.disable_cache
== True)
33 regwid
= pspec
.reg_wid
34 addrwid
= pspec
.addr_wid
36 super().__init
__(regwid
, addrwid
)
37 self
.dcache
= DCache()
38 self
.d_in
= self
.dcache
.d_in
39 self
.d_out
= self
.dcache
.d_out
40 self
.l_in
= LoadStore1ToMMUType()
41 self
.l_out
= MMUToLoadStore1Type()
43 self
.mmureq
= Signal()
44 self
.derror
= Signal()
46 # TODO, convert dcache wb_in/wb_out to "standard" nmigen Wishbone bus
47 self
.dbus
= Record(make_wb_layout(pspec
))
49 # for creating a single clock blip to DCache
50 self
.d_valid
= Signal()
51 self
.d_w_data
= Signal(64) # XXX
52 self
.d_w_valid
= Signal()
53 self
.d_validblip
= Signal()
55 def set_wr_addr(self
, m
, addr
, mask
):
56 # this gets complicated: actually a FSM is needed which
57 # first checks dcache, then if that fails (in virt mode)
58 # it checks the MMU instead.
59 #m.d.comb += self.l_in.valid.eq(1)
60 #m.d.comb += self.l_in.addr.eq(addr)
61 #m.d.comb += self.l_in.load.eq(0)
62 m
.d
.comb
+= self
.d_in
.load
.eq(0)
63 m
.d
.comb
+= self
.d_in
.byte_sel
.eq(mask
)
64 m
.d
.comb
+= self
.d_in
.addr
.eq(addr
)
65 # option to disable the cache entirely for write
66 if self
.disable_cache
:
67 m
.d
.comb
+= self
.d_in
.nc
.eq(1)
70 def set_rd_addr(self
, m
, addr
, mask
):
71 # this gets complicated: actually a FSM is needed which
72 # first checks dcache, then if that fails (in virt mode)
73 # it checks the MMU instead.
74 #m.d.comb += self.l_in.valid.eq(1)
75 #m.d.comb += self.l_in.load.eq(1)
76 #m.d.comb += self.l_in.addr.eq(addr)
77 m
.d
.comb
+= self
.d_valid
.eq(1)
78 m
.d
.comb
+= self
.d_in
.valid
.eq(self
.d_validblip
)
79 m
.d
.comb
+= self
.d_in
.load
.eq(1)
80 m
.d
.comb
+= self
.d_in
.byte_sel
.eq(mask
)
81 m
.d
.comb
+= self
.d_in
.addr
.eq(addr
)
82 # BAD HACK! disable cacheing on LD when address is 0xCxxx_xxxx
83 # this is for peripherals. same thing done in Microwatt loadstore1.vhdl
84 with m
.If(addr
[28:] == Const(0xc, 4)):
85 m
.d
.comb
+= self
.d_in
.nc
.eq(1)
86 # option to disable the cache entirely for read
87 if self
.disable_cache
:
88 m
.d
.comb
+= self
.d_in
.nc
.eq(1)
89 return None #FIXME return value
91 def set_wr_data(self
, m
, data
, wen
):
92 # do the "blip" on write data
93 m
.d
.comb
+= self
.d_valid
.eq(1)
94 m
.d
.comb
+= self
.d_in
.valid
.eq(self
.d_validblip
)
95 # put data into comb which is picked up in main elaborate()
96 m
.d
.comb
+= self
.d_w_valid
.eq(1)
97 m
.d
.comb
+= self
.d_w_data
.eq(data
)
98 #m.d.sync += self.d_in.byte_sel.eq(wen) # this might not be needed
99 st_ok
= self
.d_out
.valid
# TODO indicates write data is valid
103 def get_rd_data(self
, m
):
104 ld_ok
= self
.d_out
.valid
# indicates read data is valid
105 data
= self
.d_out
.data
# actual read data
109 if d_in.error = '1' then
110 if d_in.cache_paradox = '1' then
111 -- signal an interrupt straight away
113 dsisr(63 - 38) := not r2.req.load;
114 -- XXX there is no architected bit for this
115 -- (probably should be a machine check in fact)
116 dsisr(63 - 35) := d_in.cache_paradox;
118 -- Look up the translation for TLB miss
119 -- and also for permission error and RC error
120 -- in case the PTE has been updated.
122 v.state := MMU_LOOKUP;
128 def elaborate(self
, platform
):
129 m
= super().elaborate(platform
)
132 # create dcache module
133 m
.submodules
.dcache
= dcache
= self
.dcache
136 d_out
, l_out
, dbus
= self
.d_out
, self
.l_out
, self
.dbus
138 with m
.If(d_out
.error
):
139 with m
.If(d_out
.cache_paradox
):
140 comb
+= self
.derror
.eq(1)
141 # dsisr(63 - 38) := not r2.req.load;
142 # -- XXX there is no architected bit for this
143 # -- (probably should be a machine check in fact)
144 # dsisr(63 - 35) := d_in.cache_paradox;
146 # Look up the translation for TLB miss
147 # and also for permission error and RC error
148 # in case the PTE has been updated.
149 comb
+= self
.mmureq
.eq(1)
150 # v.state := MMU_LOOKUP;
151 # v.stage1_en := '0';
153 exc
= self
.pi
.exception_o
155 #happened, alignment, instr_fault, invalid,
156 comb
+= exc
.happened
.eq(d_out
.error | l_out
.err
)
157 comb
+= exc
.invalid
.eq(l_out
.invalid
)
159 #badtree, perm_error, rc_error, segment_fault
160 comb
+= exc
.badtree
.eq(l_out
.badtree
)
161 comb
+= exc
.perm_error
.eq(l_out
.perm_error
)
162 comb
+= exc
.rc_error
.eq(l_out
.rc_error
)
163 comb
+= exc
.segment_fault
.eq(l_out
.segerr
)
165 # TODO connect those signals somewhere
166 #print(d_out.valid) -> no error
167 #print(d_out.store_done) -> no error
168 #print(d_out.cache_paradox) -> ?
169 #print(l_out.done) -> no error
171 # TODO some exceptions set SPRs
173 # TODO, connect dcache wb_in/wb_out to "standard" nmigen Wishbone bus
174 comb
+= dbus
.adr
.eq(dcache
.wb_out
.adr
)
175 comb
+= dbus
.dat_w
.eq(dcache
.wb_out
.dat
)
176 comb
+= dbus
.sel
.eq(dcache
.wb_out
.sel
)
177 comb
+= dbus
.cyc
.eq(dcache
.wb_out
.cyc
)
178 comb
+= dbus
.stb
.eq(dcache
.wb_out
.stb
)
179 comb
+= dbus
.we
.eq(dcache
.wb_out
.we
)
181 comb
+= dcache
.wb_in
.dat
.eq(dbus
.dat_r
)
182 comb
+= dcache
.wb_in
.ack
.eq(dbus
.ack
)
183 if hasattr(dbus
, "stall"):
184 comb
+= dcache
.wb_in
.stall
.eq(dbus
.stall
)
186 # create a blip (single pulse) on valid read/write request
187 m
.d
.comb
+= self
.d_validblip
.eq(rising_edge(m
, self
.d_valid
))
189 # write out d data only when flag set
190 with m
.If(self
.d_w_valid
):
191 m
.d
.sync
+= self
.d_in
.data
.eq(self
.d_w_data
)
193 m
.d
.sync
+= self
.d_in
.data
.eq(0)
198 yield from super().ports()
202 class TestSRAMLoadStore1(LoadStore1
):
203 def __init__(self
, pspec
):
204 super().__init
__(pspec
)
206 # small 32-entry Memory
207 if (hasattr(pspec
, "dmem_test_depth") and
208 isinstance(pspec
.dmem_test_depth
, int)):
209 depth
= pspec
.dmem_test_depth
212 print("TestSRAMBareLoadStoreUnit depth", depth
)
214 self
.mem
= Memory(width
=pspec
.reg_wid
, depth
=depth
)
216 def elaborate(self
, platform
):
217 m
= super().elaborate(platform
)
219 m
.submodules
.sram
= sram
= SRAM(memory
=self
.mem
, granularity
=8,
220 features
={'cti', 'bte', 'err'})
223 # directly connect the wishbone bus of LoadStoreUnitInterface to SRAM
224 # note: SRAM is a target (slave), dbus is initiator (master)
225 fanouts
= ['dat_w', 'sel', 'cyc', 'stb', 'we', 'cti', 'bte']
226 fanins
= ['dat_r', 'ack', 'err']
227 for fanout
in fanouts
:
228 print("fanout", fanout
, getattr(sram
.bus
, fanout
).shape(),
229 getattr(dbus
, fanout
).shape())
230 comb
+= getattr(sram
.bus
, fanout
).eq(getattr(dbus
, fanout
))
231 comb
+= getattr(sram
.bus
, fanout
).eq(getattr(dbus
, fanout
))
233 comb
+= getattr(dbus
, fanin
).eq(getattr(sram
.bus
, fanin
))
235 comb
+= sram
.bus
.adr
.eq(dbus
.adr
)
240 class FSMMMUStage(ControlBase
):
243 FSM-based MMU: must call set_ldst_interface and pass in an instance
244 of a LoadStore1. this to comply with the ConfigMemoryPortInterface API
246 def __init__(self
, pspec
):
251 self
.p
.data_i
= MMUInputData(pspec
)
252 self
.n
.data_o
= MMUOutputData(pspec
)
254 # this Function Unit is extremely unusual in that it actually stores a
255 # "thing" rather than "processes inputs and produces outputs". hence
256 # why it has to be a FSM. linking up LD/ST however is going to have
257 # to be done back in Issuer (or Core)
261 # make life a bit easier in Core XXX mustn't really do this,
262 # pspec is designed for config variables, rather than passing
263 # things around. have to think about it, design a way to do
264 # it that makes "sense"
265 # comment out for now self.pspec.mmu = self.mmu
266 # comment out for now self.pspec.dcache = self.dcache
268 # debugging output for gtkw
269 self
.debug0
= Signal(4)
270 self
.illegal
= Signal()
272 # for SPR field number access
274 self
.fields
= DecodeFields(SignalBitRange
, [i
.ctx
.op
.insn
])
275 self
.fields
.create_specs()
277 def set_ldst_interface(self
, ldst
):
278 """must be called back in Core, after FUs have been set up.
279 one of those will be the MMU (us!) but the LoadStore1 instance
280 must be set up in ConfigMemoryPortInterface. sigh.
282 # incoming PortInterface
284 self
.dcache
= self
.ldst
.dcache
285 self
.pi
= self
.ldst
.pi
287 def elaborate(self
, platform
):
288 assert hasattr(self
, "dcache"), "remember to call set_ldst_interface"
289 m
= super().elaborate(platform
)
293 # link mmu and dcache together
294 m
.submodules
.mmu
= mmu
= self
.mmu
295 ldst
= self
.ldst
# managed externally: do not add here
296 m
.d
.comb
+= dcache
.m_in
.eq(mmu
.d_out
) # MMUToDCacheType
297 m
.d
.comb
+= mmu
.d_in
.eq(dcache
.m_out
) # DCacheToMMUType
299 l_in
, l_out
= mmu
.l_in
, mmu
.l_out
300 d_in
, d_out
= dcache
.d_in
, dcache
.d_out
301 wb_out
, wb_in
= dcache
.wb_out
, dcache
.wb_in
303 # link ldst and MMU together
304 comb
+= l_in
.eq(ldst
.l_in
)
305 comb
+= ldst
.l_out
.eq(l_out
)
307 data_i
, data_o
= self
.p
.data_i
, self
.n
.data_o
308 a_i
, b_i
, o
, spr1_o
= data_i
.ra
, data_i
.rb
, data_o
.o
, data_o
.spr1
312 # TODO: link these SPRs somewhere
319 m
.d
.comb
+= self
.n
.valid_o
.eq(busy
& done
)
320 m
.d
.comb
+= self
.p
.ready_o
.eq(~busy
)
322 # take copy of X-Form SPR field
323 x_fields
= self
.fields
.FormXFX
324 spr
= Signal(len(x_fields
.SPR
))
325 comb
+= spr
.eq(decode_spr_num(x_fields
.SPR
))
327 # based on MSR bits, set priv and virt mode. TODO: 32-bit mode
328 comb
+= d_in
.priv_mode
.eq(~msr_i
[MSR
.PR
])
329 comb
+= d_in
.virt_mode
.eq(msr_i
[MSR
.DR
])
330 #comb += d_in.mode_32bit.eq(msr_i[MSR.SF]) # ?? err
332 # ok so we have to "pulse" the MMU (or dcache) rather than
333 # hold the valid hi permanently. guess what this does...
336 m
.d
.comb
+= blip
.eq(rising_edge(m
, valid
))
339 with m
.If(self
.p
.valid_i
):
340 m
.d
.sync
+= busy
.eq(1)
343 # based on the Micro-Op, we work out which of MMU or DCache
344 # should "action" the operation. one of MMU or DCache gets
345 # enabled ("valid") and we twiddle our thumbs until it
348 # FIXME: properly implement MicrOp.OP_MTSPR and MicrOp.OP_MFSPR
350 with m
.Switch(op
.insn_type
):
351 with m
.Case(MicrOp
.OP_MTSPR
):
352 # despite redirection this FU **MUST** behave exactly
353 # like the SPR FU. this **INCLUDES** updating the SPR
354 # regfile because the CSV file entry for OP_MTSPR
355 # categorically defines and requires the expectation
356 # that the CompUnit **WILL** write to the regfile.
357 comb
+= spr1_o
.data
.eq(spr
)
358 comb
+= spr1_o
.ok
.eq(1)
359 # subset SPR: first check a few bits
360 with m
.If(~spr
[9] & ~spr
[5]):
361 comb
+= self
.debug0
.eq(3)
363 comb
+= dsisr
.eq(a_i
[:32])
367 # pass it over to the MMU instead
369 comb
+= self
.debug0
.eq(4)
370 # blip the MMU and wait for it to complete
371 comb
+= valid
.eq(1) # start "pulse"
372 comb
+= l_in
.valid
.eq(blip
) # start
373 comb
+= l_in
.mtspr
.eq(1) # mtspr mode
374 comb
+= l_in
.sprn
.eq(spr
) # which SPR
375 comb
+= l_in
.rs
.eq(a_i
) # incoming operand (RS)
376 comb
+= done
.eq(1) # FIXME l_out.done
378 with m
.Case(MicrOp
.OP_MFSPR
):
379 # subset SPR: first check a few bits
380 with m
.If(~spr
[9] & ~spr
[5]):
381 comb
+= self
.debug0
.eq(5)
383 comb
+= o
.data
.eq(dsisr
)
385 comb
+= o
.data
.eq(dar
)
388 # pass it over to the MMU instead
390 comb
+= self
.debug0
.eq(6)
391 # blip the MMU and wait for it to complete
392 comb
+= valid
.eq(1) # start "pulse"
393 comb
+= l_in
.valid
.eq(blip
) # start
394 comb
+= l_in
.mtspr
.eq(0) # mfspr!=mtspr
395 comb
+= l_in
.sprn
.eq(spr
) # which SPR
396 comb
+= l_in
.rs
.eq(a_i
) # incoming operand (RS)
397 comb
+= o
.data
.eq(l_out
.sprval
) # SPR from MMU
398 comb
+= o
.ok
.eq(l_out
.done
) # only when l_out valid
399 comb
+= done
.eq(1) # FIXME l_out.done
401 # XXX this one is going to have to go through LDSTCompUnit
402 # because it's LDST that has control over dcache
403 # (through PortInterface). or, another means is devised
404 # so as not to have double-drivers of d_in.valid and addr
406 #with m.Case(MicrOp.OP_DCBZ):
407 # # activate dcbz mode (spec: v3.0B p850)
408 # comb += valid.eq(1) # start "pulse"
409 # comb += d_in.valid.eq(blip) # start
410 # comb += d_in.dcbz.eq(1) # dcbz mode
411 # comb += d_in.addr.eq(a_i + b_i) # addr is (RA|0) + RB
412 # comb += done.eq(d_out.store_done) # TODO
413 # comb += self.debug0.eq(1)
415 with m
.Case(MicrOp
.OP_TLBIE
):
416 # pass TLBIE request to MMU (spec: v3.0B p1034)
417 # note that the spr is *not* an actual spr number, it's
418 # just that those bits happen to match with field bits
420 comb
+= valid
.eq(1) # start "pulse"
421 comb
+= l_in
.valid
.eq(blip
) # start
422 comb
+= l_in
.tlbie
.eq(1) # mtspr mode
423 comb
+= l_in
.sprn
.eq(spr
) # use sprn to send insn bits
424 comb
+= l_in
.addr
.eq(b_i
) # incoming operand (RB)
425 comb
+= done
.eq(l_out
.done
) # zzzz
426 comb
+= self
.debug0
.eq(2)
428 with m
.Case(MicrOp
.OP_ILLEGAL
):
429 comb
+= self
.illegal
.eq(1)
431 with m
.If(self
.n
.ready_i
& self
.n
.valid_o
):
432 m
.d
.sync
+= busy
.eq(0)