1 from nmigen
import Elaboratable
, Module
, Signal
, Shape
, unsigned
, Cat
, Mux
2 from nmigen
import Record
, Memory
3 from nmigen
import Const
4 from soc
.fu
.mmu
.pipe_data
import MMUInputData
, MMUOutputData
, MMUPipeSpec
5 from nmutil
.singlepipe
import ControlBase
6 from nmutil
.util
import rising_edge
8 from soc
.experiment
.mmu
import MMU
9 from soc
.experiment
.dcache
import DCache
11 from openpower
.consts
import MSR
12 from openpower
.decoder
.power_fields
import DecodeFields
13 from openpower
.decoder
.power_fieldsn
import SignalBitRange
14 from openpower
.decoder
.power_decoder2
import decode_spr_num
15 from openpower
.decoder
.power_enums
import MicrOp
, XER_bits
17 from soc
.experiment
.pimem
import PortInterface
18 from soc
.experiment
.pimem
import PortInterfaceBase
20 from soc
.experiment
.mem_types
import LoadStore1ToDCacheType
, LoadStore1ToMMUType
21 from soc
.experiment
.mem_types
import DCacheToLoadStore1Type
, MMUToLoadStore1Type
23 from soc
.minerva
.wishbone
import make_wb_layout
24 from soc
.bus
.sram
import SRAM
27 # glue logic for microwatt mmu and dcache
28 class LoadStore1(PortInterfaceBase
):
29 def __init__(self
, pspec
):
31 regwid
= pspec
.reg_wid
32 addrwid
= pspec
.addr_wid
34 super().__init
__(regwid
, addrwid
)
35 self
.dcache
= DCache()
36 self
.d_in
= self
.dcache
.d_in
37 self
.d_out
= self
.dcache
.d_out
38 self
.l_in
= LoadStore1ToMMUType()
39 self
.l_out
= MMUToLoadStore1Type()
41 self
.mmureq
= Signal()
42 self
.derror
= Signal()
44 # TODO, convert dcache wb_in/wb_out to "standard" nmigen Wishbone bus
45 self
.dbus
= Record(make_wb_layout(pspec
))
47 # for creating a single clock blip to DCache
48 self
.d_valid
= Signal()
49 self
.d_w_data
= Signal(64) # XXX
50 self
.d_w_valid
= Signal()
51 self
.d_validblip
= Signal()
53 def set_wr_addr(self
, m
, addr
, mask
):
54 #m.d.comb += self.l_in.valid.eq(1)
55 #m.d.comb += self.l_in.addr.eq(addr)
56 #m.d.comb += self.l_in.load.eq(0)
57 m
.d
.comb
+= self
.d_valid
.eq(1)
58 m
.d
.comb
+= self
.d_in
.valid
.eq(self
.d_validblip
)
59 m
.d
.comb
+= self
.d_in
.load
.eq(0)
60 m
.d
.comb
+= self
.d_in
.byte_sel
.eq(mask
)
61 m
.d
.comb
+= self
.d_in
.addr
.eq(addr
)
64 def set_rd_addr(self
, m
, addr
, mask
):
65 #m.d.comb += self.l_in.valid.eq(1)
66 #m.d.comb += self.l_in.load.eq(1)
67 #m.d.comb += self.l_in.addr.eq(addr)
68 m
.d
.comb
+= self
.d_valid
.eq(1)
69 m
.d
.comb
+= self
.d_in
.valid
.eq(self
.d_validblip
)
70 m
.d
.comb
+= self
.d_in
.load
.eq(1)
71 m
.d
.comb
+= self
.d_in
.byte_sel
.eq(mask
)
72 m
.d
.comb
+= self
.d_in
.addr
.eq(addr
)
73 # BAD HACK! disable cacheing on LD when address is 0xCxxx_xxxx
74 # this is for peripherals. same thing done in Microwatt loadstore1.vhdl
75 with m
.If(addr
[28:] == 0xc):
76 m
.d
.comb
+= self
.d_in
.nc
.eq(1)
77 return None #FIXME return value
79 def set_wr_data(self
, m
, data
, wen
):
80 # put data into comb which is picked up in main elaborate()
81 m
.d
.comb
+= self
.d_w_valid
.eq(1)
82 m
.d
.comb
+= self
.d_w_data
.eq(data
)
83 #m.d.sync += self.d_in.byte_sel.eq(wen) # this might not be needed
84 #st_ok = self.d_out.valid # TODO indicates write data is valid
88 def get_rd_data(self
, m
):
89 ld_ok
= self
.d_out
.valid
# indicates read data is valid
90 data
= self
.d_out
.data
# actual read data
94 if d_in.error = '1' then
95 if d_in.cache_paradox = '1' then
96 -- signal an interrupt straight away
98 dsisr(63 - 38) := not r2.req.load;
99 -- XXX there is no architected bit for this
100 -- (probably should be a machine check in fact)
101 dsisr(63 - 35) := d_in.cache_paradox;
103 -- Look up the translation for TLB miss
104 -- and also for permission error and RC error
105 -- in case the PTE has been updated.
107 v.state := MMU_LOOKUP;
113 def elaborate(self
, platform
):
114 m
= super().elaborate(platform
)
117 # create dcache module
118 m
.submodules
.dcache
= dcache
= self
.dcache
121 d_out
, l_out
, dbus
= self
.d_out
, self
.l_out
, self
.dbus
123 with m
.If(d_out
.error
):
124 with m
.If(d_out
.cache_paradox
):
125 comb
+= self
.derror
.eq(1)
126 # dsisr(63 - 38) := not r2.req.load;
127 # -- XXX there is no architected bit for this
128 # -- (probably should be a machine check in fact)
129 # dsisr(63 - 35) := d_in.cache_paradox;
131 # Look up the translation for TLB miss
132 # and also for permission error and RC error
133 # in case the PTE has been updated.
134 comb
+= self
.mmureq
.eq(1)
135 # v.state := MMU_LOOKUP;
136 # v.stage1_en := '0';
138 exc
= self
.pi
.exception_o
140 #happened, alignment, instr_fault, invalid,
141 comb
+= exc
.happened
.eq(d_out
.error | l_out
.err
)
142 comb
+= exc
.invalid
.eq(l_out
.invalid
)
144 #badtree, perm_error, rc_error, segment_fault
145 comb
+= exc
.badtree
.eq(l_out
.badtree
)
146 comb
+= exc
.perm_error
.eq(l_out
.perm_error
)
147 comb
+= exc
.rc_error
.eq(l_out
.rc_error
)
148 comb
+= exc
.segment_fault
.eq(l_out
.segerr
)
150 # TODO connect those signals somewhere
151 #print(d_out.valid) -> no error
152 #print(d_out.store_done) -> no error
153 #print(d_out.cache_paradox) -> ?
154 #print(l_out.done) -> no error
156 # TODO some exceptions set SPRs
158 # TODO, connect dcache wb_in/wb_out to "standard" nmigen Wishbone bus
159 comb
+= dbus
.adr
.eq(dcache
.wb_out
.adr
)
160 comb
+= dbus
.dat_w
.eq(dcache
.wb_out
.dat
)
161 comb
+= dbus
.sel
.eq(dcache
.wb_out
.sel
)
162 comb
+= dbus
.cyc
.eq(dcache
.wb_out
.cyc
)
163 comb
+= dbus
.stb
.eq(dcache
.wb_out
.stb
)
164 comb
+= dbus
.we
.eq(dcache
.wb_out
.we
)
166 comb
+= dcache
.wb_in
.dat
.eq(dbus
.dat_r
)
167 comb
+= dcache
.wb_in
.ack
.eq(dbus
.ack
)
168 if hasattr(dbus
, "stall"):
169 comb
+= dcache
.wb_in
.stall
.eq(dbus
.stall
)
171 # create a blip (single pulse) on valid read/write request
172 m
.d
.comb
+= self
.d_validblip
.eq(rising_edge(m
, self
.d_valid
))
174 # write out d data only when flag set
175 with m
.If(self
.d_w_valid
):
176 m
.d
.sync
+= self
.d_in
.data
.eq(self
.d_w_data
)
178 m
.d
.sync
+= self
.d_in
.data
.eq(0)
183 yield from super().ports()
187 class TestSRAMLoadStore1(LoadStore1
):
188 def __init__(self
, pspec
):
189 super().__init
__(pspec
)
191 # small 32-entry Memory
192 if (hasattr(pspec
, "dmem_test_depth") and
193 isinstance(pspec
.dmem_test_depth
, int)):
194 depth
= pspec
.dmem_test_depth
197 print("TestSRAMBareLoadStoreUnit depth", depth
)
199 self
.mem
= Memory(width
=pspec
.reg_wid
, depth
=depth
)
201 def elaborate(self
, platform
):
202 m
= super().elaborate(platform
)
204 m
.submodules
.sram
= sram
= SRAM(memory
=self
.mem
, granularity
=8,
205 features
={'cti', 'bte', 'err'})
208 # directly connect the wishbone bus of LoadStoreUnitInterface to SRAM
209 # note: SRAM is a target (slave), dbus is initiator (master)
210 fanouts
= ['dat_w', 'sel', 'cyc', 'stb', 'we', 'cti', 'bte']
211 fanins
= ['dat_r', 'ack', 'err']
212 for fanout
in fanouts
:
213 print("fanout", fanout
, getattr(sram
.bus
, fanout
).shape(),
214 getattr(dbus
, fanout
).shape())
215 comb
+= getattr(sram
.bus
, fanout
).eq(getattr(dbus
, fanout
))
216 comb
+= getattr(sram
.bus
, fanout
).eq(getattr(dbus
, fanout
))
218 comb
+= getattr(dbus
, fanin
).eq(getattr(sram
.bus
, fanin
))
220 comb
+= sram
.bus
.adr
.eq(dbus
.adr
)
225 class FSMMMUStage(ControlBase
):
228 FSM-based MMU: must call set_ldst_interface and pass in an instance
229 of a LoadStore1. this to comply with the ConfigMemoryPortInterface API
231 def __init__(self
, pspec
):
236 self
.p
.data_i
= MMUInputData(pspec
)
237 self
.n
.data_o
= MMUOutputData(pspec
)
239 # this Function Unit is extremely unusual in that it actually stores a
240 # "thing" rather than "processes inputs and produces outputs". hence
241 # why it has to be a FSM. linking up LD/ST however is going to have
242 # to be done back in Issuer (or Core)
246 # make life a bit easier in Core XXX mustn't really do this,
247 # pspec is designed for config variables, rather than passing
248 # things around. have to think about it, design a way to do
249 # it that makes "sense"
250 # comment out for now self.pspec.mmu = self.mmu
251 # comment out for now self.pspec.dcache = self.dcache
253 # debugging output for gtkw
254 self
.debug0
= Signal(4)
255 self
.illegal
= Signal()
257 # for SPR field number access
259 self
.fields
= DecodeFields(SignalBitRange
, [i
.ctx
.op
.insn
])
260 self
.fields
.create_specs()
262 def set_ldst_interface(self
, ldst
):
263 """must be called back in Core, after FUs have been set up.
264 one of those will be the MMU (us!) but the LoadStore1 instance
265 must be set up in ConfigMemoryPortInterface. sigh.
267 # incoming PortInterface
269 self
.dcache
= self
.ldst
.dcache
270 self
.pi
= self
.ldst
.pi
272 def elaborate(self
, platform
):
273 assert hasattr(self
, "dcache"), "remember to call set_ldst_interface"
274 m
= super().elaborate(platform
)
278 # link mmu and dcache together
279 m
.submodules
.mmu
= mmu
= self
.mmu
280 ldst
= self
.ldst
# managed externally: do not add here
281 m
.d
.comb
+= dcache
.m_in
.eq(mmu
.d_out
) # MMUToDCacheType
282 m
.d
.comb
+= mmu
.d_in
.eq(dcache
.m_out
) # DCacheToMMUType
284 l_in
, l_out
= mmu
.l_in
, mmu
.l_out
285 d_in
, d_out
= dcache
.d_in
, dcache
.d_out
286 wb_out
, wb_in
= dcache
.wb_out
, dcache
.wb_in
288 # link ldst and MMU together
289 comb
+= l_in
.eq(ldst
.l_in
)
290 comb
+= ldst
.l_out
.eq(l_out
)
292 data_i
, data_o
= self
.p
.data_i
, self
.n
.data_o
293 a_i
, b_i
, o
, spr1_o
= data_i
.ra
, data_i
.rb
, data_o
.o
, data_o
.spr1
297 # TODO: link these SPRs somewhere
304 m
.d
.comb
+= self
.n
.valid_o
.eq(busy
& done
)
305 m
.d
.comb
+= self
.p
.ready_o
.eq(~busy
)
307 # take copy of X-Form SPR field
308 x_fields
= self
.fields
.FormXFX
309 spr
= Signal(len(x_fields
.SPR
))
310 comb
+= spr
.eq(decode_spr_num(x_fields
.SPR
))
312 # based on MSR bits, set priv and virt mode. TODO: 32-bit mode
313 comb
+= d_in
.priv_mode
.eq(~msr_i
[MSR
.PR
])
314 comb
+= d_in
.virt_mode
.eq(msr_i
[MSR
.DR
])
315 #comb += d_in.mode_32bit.eq(msr_i[MSR.SF]) # ?? err
317 # ok so we have to "pulse" the MMU (or dcache) rather than
318 # hold the valid hi permanently. guess what this does...
321 m
.d
.comb
+= blip
.eq(rising_edge(m
, valid
))
324 with m
.If(self
.p
.valid_i
):
325 m
.d
.sync
+= busy
.eq(1)
328 # based on the Micro-Op, we work out which of MMU or DCache
329 # should "action" the operation. one of MMU or DCache gets
330 # enabled ("valid") and we twiddle our thumbs until it
333 # FIXME: properly implement MicrOp.OP_MTSPR and MicrOp.OP_MFSPR
335 with m
.Switch(op
.insn_type
):
336 with m
.Case(MicrOp
.OP_MTSPR
):
337 # despite redirection this FU **MUST** behave exactly
338 # like the SPR FU. this **INCLUDES** updating the SPR
339 # regfile because the CSV file entry for OP_MTSPR
340 # categorically defines and requires the expectation
341 # that the CompUnit **WILL** write to the regfile.
342 comb
+= spr1_o
.data
.eq(spr
)
343 comb
+= spr1_o
.ok
.eq(1)
344 # subset SPR: first check a few bits
345 with m
.If(~spr
[9] & ~spr
[5]):
346 comb
+= self
.debug0
.eq(3)
348 comb
+= dsisr
.eq(a_i
[:32])
352 # pass it over to the MMU instead
354 comb
+= self
.debug0
.eq(4)
355 # blip the MMU and wait for it to complete
356 comb
+= valid
.eq(1) # start "pulse"
357 comb
+= l_in
.valid
.eq(blip
) # start
358 comb
+= l_in
.mtspr
.eq(1) # mtspr mode
359 comb
+= l_in
.sprn
.eq(spr
) # which SPR
360 comb
+= l_in
.rs
.eq(a_i
) # incoming operand (RS)
361 comb
+= done
.eq(1) # FIXME l_out.done
363 with m
.Case(MicrOp
.OP_MFSPR
):
364 # subset SPR: first check a few bits
365 with m
.If(~spr
[9] & ~spr
[5]):
366 comb
+= self
.debug0
.eq(5)
368 comb
+= o
.data
.eq(dsisr
)
370 comb
+= o
.data
.eq(dar
)
373 # pass it over to the MMU instead
375 comb
+= self
.debug0
.eq(6)
376 # blip the MMU and wait for it to complete
377 comb
+= valid
.eq(1) # start "pulse"
378 comb
+= l_in
.valid
.eq(blip
) # start
379 comb
+= l_in
.mtspr
.eq(0) # mfspr!=mtspr
380 comb
+= l_in
.sprn
.eq(spr
) # which SPR
381 comb
+= l_in
.rs
.eq(a_i
) # incoming operand (RS)
382 comb
+= o
.data
.eq(l_out
.sprval
) # SPR from MMU
383 comb
+= o
.ok
.eq(l_out
.done
) # only when l_out valid
384 comb
+= done
.eq(1) # FIXME l_out.done
386 # XXX this one is going to have to go through LDSTCompUnit
387 # because it's LDST that has control over dcache
388 # (through PortInterface). or, another means is devised
389 # so as not to have double-drivers of d_in.valid and addr
391 #with m.Case(MicrOp.OP_DCBZ):
392 # # activate dcbz mode (spec: v3.0B p850)
393 # comb += valid.eq(1) # start "pulse"
394 # comb += d_in.valid.eq(blip) # start
395 # comb += d_in.dcbz.eq(1) # dcbz mode
396 # comb += d_in.addr.eq(a_i + b_i) # addr is (RA|0) + RB
397 # comb += done.eq(d_out.store_done) # TODO
398 # comb += self.debug0.eq(1)
400 with m
.Case(MicrOp
.OP_TLBIE
):
401 # pass TLBIE request to MMU (spec: v3.0B p1034)
402 # note that the spr is *not* an actual spr number, it's
403 # just that those bits happen to match with field bits
405 comb
+= valid
.eq(1) # start "pulse"
406 comb
+= l_in
.valid
.eq(blip
) # start
407 comb
+= l_in
.tlbie
.eq(1) # mtspr mode
408 comb
+= l_in
.sprn
.eq(spr
) # use sprn to send insn bits
409 comb
+= l_in
.addr
.eq(b_i
) # incoming operand (RB)
410 comb
+= done
.eq(l_out
.done
) # zzzz
411 comb
+= self
.debug0
.eq(2)
413 with m
.Case(MicrOp
.OP_ILLEGAL
):
414 comb
+= self
.illegal
.eq(1)
416 with m
.If(self
.n
.ready_i
& self
.n
.valid_o
):
417 m
.d
.sync
+= busy
.eq(0)