3 not in any way intended for production use. this runs a FSM that:
5 * reads the Program Counter from StateRegs
6 * reads an instruction from a fixed-size Test Memory
7 * issues it to the Simple Core
8 * waits for it to complete
10 * does it all over again
12 the purpose of this module is to verify the functional correctness
13 of the Function Units in the absolute simplest and clearest possible
14 way, and to at provide something that can be further incrementally
18 from nmigen
import (Elaboratable
, Module
, Signal
, ClockSignal
, ResetSignal
,
19 ClockDomain
, DomainRenamer
)
20 from nmigen
.cli
import rtlil
21 from nmigen
.cli
import main
24 from soc
.decoder
.power_decoder
import create_pdecode
25 from soc
.decoder
.power_decoder2
import PowerDecode2
26 from soc
.decoder
.decode2execute1
import IssuerDecode2ToOperand
27 from soc
.decoder
.decode2execute1
import Data
28 from soc
.experiment
.testmem
import TestMemory
# test only for instructions
29 from soc
.regfile
.regfiles
import StateRegs
, FastRegs
30 from soc
.simple
.core
import NonProductionCore
31 from soc
.config
.test
.test_loadstore
import TestMemPspec
32 from soc
.config
.ifetch
import ConfigFetchUnit
33 from soc
.decoder
.power_enums
import MicrOp
34 from soc
.debug
.dmi
import CoreDebug
, DMIInterface
35 from soc
.debug
.jtag
import JTAG
36 from soc
.config
.pinouts
import get_pinspecs
37 from soc
.config
.state
import CoreState
38 from soc
.interrupts
.xics
import XICS_ICP
, XICS_ICS
39 from soc
.bus
.simple_gpio
import SimpleGPIO
40 from soc
.clock
.select
import ClockSelect
, DummyPLL
43 from nmutil
.util
import rising_edge
46 class TestIssuerInternal(Elaboratable
):
47 """TestIssuer - reads instructions from TestMemory and issues them
49 efficiency and speed is not the main goal here: functional correctness is.
51 def __init__(self
, pspec
):
53 # JTAG interface. add this right at the start because if it's
54 # added it *modifies* the pspec, by adding enable/disable signals
55 # for parts of the rest of the core
56 self
.jtag_en
= hasattr(pspec
, "debug") and pspec
.debug
== 'jtag'
58 subset
= {'uart', 'mtwi', 'eint', 'gpio', 'mspi0', 'mspi1',
60 self
.jtag
= JTAG(get_pinspecs(subset
=subset
))
61 # add signals to pspec to enable/disable icache and dcache
62 # (or data and intstruction wishbone if icache/dcache not included)
63 pspec
.wb_icache_en
= self
.jtag
.wb_icache_en
64 pspec
.wb_dcache_en
= self
.jtag
.wb_dcache_en
66 # add interrupt controller?
67 self
.xics
= hasattr(pspec
, "xics") and pspec
.xics
== True
69 self
.xics_icp
= XICS_ICP()
70 self
.xics_ics
= XICS_ICS()
71 self
.int_level_i
= self
.xics_ics
.int_level_i
73 # add GPIO peripheral?
74 self
.gpio
= hasattr(pspec
, "gpio") and pspec
.gpio
== True
76 self
.simple_gpio
= SimpleGPIO()
77 self
.gpio_o
= self
.simple_gpio
.gpio_o
79 # main instruction core25
80 self
.core
= core
= NonProductionCore(pspec
)
82 # instruction decoder. goes into Trap Record
83 pdecode
= create_pdecode()
84 self
.cur_state
= CoreState("cur") # current state (MSR/PC/EINT)
85 self
.pdecode2
= PowerDecode2(pdecode
, state
=self
.cur_state
,
86 opkls
=IssuerDecode2ToOperand
)
88 # Test Instruction memory
89 self
.imem
= ConfigFetchUnit(pspec
).fu
90 # one-row cache of instruction read
91 self
.iline
= Signal(64) # one instruction line
92 self
.iprev_adr
= Signal(64) # previous address: if different, do read
95 self
.dbg
= CoreDebug()
97 # instruction go/monitor
98 self
.pc_o
= Signal(64, reset_less
=True)
99 self
.pc_i
= Data(64, "pc_i") # set "ok" to indicate "please change me"
100 self
.core_bigendian_i
= Signal()
101 self
.busy_o
= Signal(reset_less
=True)
102 self
.memerr_o
= Signal(reset_less
=True)
104 # FAST regfile read /write ports for PC, MSR, DEC/TB
105 staterf
= self
.core
.regs
.rf
['state']
106 self
.state_r_pc
= staterf
.r_ports
['cia'] # PC rd
107 self
.state_w_pc
= staterf
.w_ports
['d_wr1'] # PC wr
108 self
.state_r_msr
= staterf
.r_ports
['msr'] # MSR rd
110 # DMI interface access
111 intrf
= self
.core
.regs
.rf
['int']
112 crrf
= self
.core
.regs
.rf
['cr']
113 xerrf
= self
.core
.regs
.rf
['xer']
114 self
.int_r
= intrf
.r_ports
['dmi'] # INT read
115 self
.cr_r
= crrf
.r_ports
['full_cr_dbg'] # CR read
116 self
.xer_r
= xerrf
.r_ports
['full_xer'] # XER read
118 # hack method of keeping an eye on whether branch/trap set the PC
119 self
.state_nia
= self
.core
.regs
.rf
['state'].w_ports
['nia']
120 self
.state_nia
.wen
.name
= 'state_nia_wen'
122 def elaborate(self
, platform
):
124 comb
, sync
= m
.d
.comb
, m
.d
.sync
126 m
.submodules
.core
= core
= DomainRenamer("coresync")(self
.core
)
127 m
.submodules
.imem
= imem
= self
.imem
128 m
.submodules
.dbg
= dbg
= self
.dbg
130 m
.submodules
.jtag
= jtag
= self
.jtag
131 # TODO: UART2GDB mux, here, from external pin
132 # see https://bugs.libre-soc.org/show_bug.cgi?id=499
133 sync
+= dbg
.dmi
.connect_to(jtag
.dmi
)
135 cur_state
= self
.cur_state
137 # XICS interrupt handler
139 m
.submodules
.xics_icp
= icp
= self
.xics_icp
140 m
.submodules
.xics_ics
= ics
= self
.xics_ics
141 comb
+= icp
.ics_i
.eq(ics
.icp_o
) # connect ICS to ICP
142 sync
+= cur_state
.eint
.eq(icp
.core_irq_o
) # connect ICP to core
144 # GPIO test peripheral
146 m
.submodules
.simple_gpio
= simple_gpio
= self
.simple_gpio
148 # connect one GPIO output to ICS bit 15 (like in microwatt soc.vhdl)
149 # XXX causes litex ECP5 test to get wrong idea about input and output
150 # (but works with verilator sim *sigh*)
151 #if self.gpio and self.xics:
152 # comb += self.int_level_i[15].eq(simple_gpio.gpio_o[0])
154 # instruction decoder
155 pdecode
= create_pdecode()
156 m
.submodules
.dec2
= pdecode2
= self
.pdecode2
159 dmi
, d_reg
, d_cr
, d_xer
, = dbg
.dmi
, dbg
.d_gpr
, dbg
.d_cr
, dbg
.d_xer
160 intrf
= self
.core
.regs
.rf
['int']
162 # clock delay power-on reset
163 cd_por
= ClockDomain(reset_less
=True)
164 cd_sync
= ClockDomain()
165 core_sync
= ClockDomain("coresync")
166 m
.domains
+= cd_por
, cd_sync
, core_sync
168 ti_rst
= Signal(reset_less
=True)
169 delay
= Signal(range(4), reset
=3)
170 with m
.If(delay
!= 0):
171 m
.d
.por
+= delay
.eq(delay
- 1)
172 comb
+= cd_por
.clk
.eq(ClockSignal())
174 # power-on reset delay
175 core_rst
= ResetSignal("coresync")
176 comb
+= ti_rst
.eq(delay
!= 0 | dbg
.core_rst_o |
ResetSignal())
177 comb
+= core_rst
.eq(ti_rst
)
179 # busy/halted signals from core
180 comb
+= self
.busy_o
.eq(core
.busy_o
)
181 comb
+= pdecode2
.dec
.bigendian
.eq(self
.core_bigendian_i
)
183 # temporary hack: says "go" immediately for both address gen and ST
185 ldst
= core
.fus
.fus
['ldst0']
186 st_go_edge
= rising_edge(m
, ldst
.st
.rel_o
)
187 m
.d
.comb
+= ldst
.ad
.go_i
.eq(ldst
.ad
.rel_o
) # link addr-go direct to rel
188 m
.d
.comb
+= ldst
.st
.go_i
.eq(st_go_edge
) # link store-go to rising rel
190 # PC and instruction from I-Memory
191 pc_changed
= Signal() # note write to PC
192 comb
+= self
.pc_o
.eq(cur_state
.pc
)
195 # next instruction (+4 on current)
196 nia
= Signal(64, reset_less
=True)
197 comb
+= nia
.eq(cur_state
.pc
+ 4)
200 pc
= Signal(64, reset_less
=True)
201 pc_ok_delay
= Signal()
202 sync
+= pc_ok_delay
.eq(~self
.pc_i
.ok
)
203 with m
.If(self
.pc_i
.ok
):
204 # incoming override (start from pc_i)
205 comb
+= pc
.eq(self
.pc_i
.data
)
207 # otherwise read StateRegs regfile for PC...
208 comb
+= self
.state_r_pc
.ren
.eq(1<<StateRegs
.PC
)
209 # ... but on a 1-clock delay
210 with m
.If(pc_ok_delay
):
211 comb
+= pc
.eq(self
.state_r_pc
.data_o
)
213 # don't write pc every cycle
214 comb
+= self
.state_w_pc
.wen
.eq(0)
215 comb
+= self
.state_w_pc
.data_i
.eq(0)
217 # don't read msr every cycle
218 comb
+= self
.state_r_msr
.ren
.eq(0)
219 msr_read
= Signal(reset
=1)
221 # connect up debug signals
222 # TODO comb += core.icache_rst_i.eq(dbg.icache_rst_o)
223 comb
+= dbg
.terminate_i
.eq(core
.core_terminate_o
)
224 comb
+= dbg
.state
.pc
.eq(pc
)
225 #comb += dbg.state.pc.eq(cur_state.pc)
226 comb
+= dbg
.state
.msr
.eq(cur_state
.msr
)
229 core_busy_o
= core
.busy_o
# core is busy
230 core_ivalid_i
= core
.ivalid_i
# instruction is valid
231 core_issue_i
= core
.issue_i
# instruction is issued
232 dec_opcode_i
= pdecode2
.dec
.raw_opcode_in
# raw opcode
234 insn_type
= core
.e
.do
.insn_type
236 # actually use a nmigen FSM for the first time (w00t)
237 # this FSM is perhaps unusual in that it detects conditions
238 # then "holds" information, combinatorially, for the core
239 # (as opposed to using sync - which would be on a clock's delay)
240 # this includes the actual opcode, valid flags and so on.
244 with m
.State("IDLE"):
245 sync
+= pc_changed
.eq(0)
247 sync
+= core
.raw_insn_i
.eq(0)
248 sync
+= core
.bigendian_i
.eq(0)
249 with m
.If(~dbg
.core_stop_o
& ~core_rst
):
250 # instruction allowed to go: start by reading the PC
251 # capture the PC and also drop it into Insn Memory
252 # we have joined a pair of combinatorial memory
253 # lookups together. this is Generally Bad.
254 comb
+= self
.imem
.a_pc_i
.eq(pc
)
255 comb
+= self
.imem
.a_valid_i
.eq(1)
256 comb
+= self
.imem
.f_valid_i
.eq(1)
257 sync
+= cur_state
.pc
.eq(pc
)
259 # initiate read of MSR. arrives one clock later
260 comb
+= self
.state_r_msr
.ren
.eq(1<<StateRegs
.MSR
)
261 sync
+= msr_read
.eq(0)
263 m
.next
= "INSN_READ" # move to "wait for bus" phase
265 comb
+= core
.core_stopped_i
.eq(1)
266 comb
+= dbg
.core_stopped_i
.eq(1)
268 # dummy pause to find out why simulation is not keeping up
269 with m
.State("INSN_READ"):
270 # one cycle later, msr read arrives. valid only once.
271 with m
.If(~msr_read
):
272 sync
+= msr_read
.eq(1) # yeah don't read it again
273 sync
+= cur_state
.msr
.eq(self
.state_r_msr
.data_o
)
274 with m
.If(self
.imem
.f_busy_o
): # zzz...
275 # busy: stay in wait-read
276 comb
+= self
.imem
.a_valid_i
.eq(1)
277 comb
+= self
.imem
.f_valid_i
.eq(1)
279 # not busy: instruction fetched
280 f_instr_o
= self
.imem
.f_instr_o
281 if f_instr_o
.width
== 32:
284 insn
= f_instr_o
.word_select(cur_state
.pc
[2], 32)
285 comb
+= dec_opcode_i
.eq(insn
) # actual opcode
286 sync
+= core
.e
.eq(pdecode2
.e
)
287 sync
+= core
.state
.eq(cur_state
)
288 sync
+= core
.raw_insn_i
.eq(dec_opcode_i
)
289 sync
+= core
.bigendian_i
.eq(self
.core_bigendian_i
)
290 sync
+= ilatch
.eq(insn
) # latch current insn
291 # also drop PC and MSR into decode "state"
292 m
.next
= "INSN_START" # move to "start"
294 # waiting for instruction bus (stays there until not busy)
295 with m
.State("INSN_START"):
296 comb
+= core_ivalid_i
.eq(1) # instruction is valid
297 comb
+= core_issue_i
.eq(1) # and issued
299 m
.next
= "INSN_ACTIVE" # move to "wait completion"
301 # instruction started: must wait till it finishes
302 with m
.State("INSN_ACTIVE"):
303 with m
.If(insn_type
!= MicrOp
.OP_NOP
):
304 comb
+= core_ivalid_i
.eq(1) # instruction is valid
305 with m
.If(self
.state_nia
.wen
& (1<<StateRegs
.PC
)):
306 sync
+= pc_changed
.eq(1)
307 with m
.If(~core_busy_o
): # instruction done!
308 # ok here we are not reading the branch unit. TODO
309 # this just blithely overwrites whatever pipeline
311 with m
.If(~pc_changed
):
312 comb
+= self
.state_w_pc
.wen
.eq(1<<StateRegs
.PC
)
313 comb
+= self
.state_w_pc
.data_i
.eq(nia
)
315 sync
+= core
.raw_insn_i
.eq(0)
316 sync
+= core
.bigendian_i
.eq(0)
317 m
.next
= "IDLE" # back to idle
319 # this bit doesn't have to be in the FSM: connect up to read
320 # regfiles on demand from DMI
321 with m
.If(d_reg
.req
): # request for regfile access being made
322 # TODO: error-check this
323 # XXX should this be combinatorial? sync better?
325 comb
+= self
.int_r
.ren
.eq(1<<d_reg
.addr
)
327 comb
+= self
.int_r
.addr
.eq(d_reg
.addr
)
328 comb
+= self
.int_r
.ren
.eq(1)
329 d_reg_delay
= Signal()
330 sync
+= d_reg_delay
.eq(d_reg
.req
)
331 with m
.If(d_reg_delay
):
332 # data arrives one clock later
333 comb
+= d_reg
.data
.eq(self
.int_r
.data_o
)
334 comb
+= d_reg
.ack
.eq(1)
336 # sigh same thing for CR debug
337 with m
.If(d_cr
.req
): # request for regfile access being made
338 comb
+= self
.cr_r
.ren
.eq(0b11111111) # enable all
339 d_cr_delay
= Signal()
340 sync
+= d_cr_delay
.eq(d_cr
.req
)
341 with m
.If(d_cr_delay
):
342 # data arrives one clock later
343 comb
+= d_cr
.data
.eq(self
.cr_r
.data_o
)
344 comb
+= d_cr
.ack
.eq(1)
347 with m
.If(d_xer
.req
): # request for regfile access being made
348 comb
+= self
.xer_r
.ren
.eq(0b111111) # enable all
349 d_xer_delay
= Signal()
350 sync
+= d_xer_delay
.eq(d_xer
.req
)
351 with m
.If(d_xer_delay
):
352 # data arrives one clock later
353 comb
+= d_xer
.data
.eq(self
.xer_r
.data_o
)
354 comb
+= d_xer
.ack
.eq(1)
356 # DEC and TB inc/dec FSM
357 self
.tb_dec_fsm(m
, cur_state
.dec
)
361 def tb_dec_fsm(self
, m
, spr_dec
):
364 this is a FSM for updating either dec or tb. it runs alternately
365 DEC, TB, DEC, TB. note that SPR pipeline could have written a new
366 value to DEC, however the regfile has "passthrough" on it so this
369 see v3.0B p1097-1099 for Timeer Resource and p1065 and p1076
372 comb
, sync
= m
.d
.comb
, m
.d
.sync
373 fast_rf
= self
.core
.regs
.rf
['fast']
374 fast_r_dectb
= fast_rf
.r_ports
['issue'] # DEC/TB
375 fast_w_dectb
= fast_rf
.w_ports
['issue'] # DEC/TB
379 # initiates read of current DEC
380 with m
.State("DEC_READ"):
381 comb
+= fast_r_dectb
.addr
.eq(FastRegs
.DEC
)
382 comb
+= fast_r_dectb
.ren
.eq(1)
385 # waits for DEC read to arrive (1 cycle), updates with new value
386 with m
.State("DEC_WRITE"):
388 # TODO: MSR.LPCR 32-bit decrement mode
389 comb
+= new_dec
.eq(fast_r_dectb
.data_o
- 1)
390 comb
+= fast_w_dectb
.addr
.eq(FastRegs
.DEC
)
391 comb
+= fast_w_dectb
.wen
.eq(1)
392 comb
+= fast_w_dectb
.data_i
.eq(new_dec
)
393 sync
+= spr_dec
.eq(new_dec
) # copy into cur_state for decoder
396 # initiates read of current TB
397 with m
.State("TB_READ"):
398 comb
+= fast_r_dectb
.addr
.eq(FastRegs
.TB
)
399 comb
+= fast_r_dectb
.ren
.eq(1)
402 # waits for read TB to arrive, initiates write of current TB
403 with m
.State("TB_WRITE"):
405 comb
+= new_tb
.eq(fast_r_dectb
.data_o
+ 1)
406 comb
+= fast_w_dectb
.addr
.eq(FastRegs
.TB
)
407 comb
+= fast_w_dectb
.wen
.eq(1)
408 comb
+= fast_w_dectb
.data_i
.eq(new_tb
)
414 yield from self
.pc_i
.ports()
417 yield from self
.core
.ports()
418 yield from self
.imem
.ports()
419 yield self
.core_bigendian_i
425 def external_ports(self
):
426 ports
= self
.pc_i
.ports()
427 ports
+= [self
.pc_o
, self
.memerr_o
, self
.core_bigendian_i
, self
.busy_o
,
431 ports
+= list(self
.jtag
.external_ports())
433 # don't add DMI if JTAG is enabled
434 ports
+= list(self
.dbg
.dmi
.ports())
436 ports
+= list(self
.imem
.ibus
.fields
.values())
437 ports
+= list(self
.core
.l0
.cmpi
.lsmem
.lsi
.slavebus
.fields
.values())
440 ports
+= list(self
.xics_icp
.bus
.fields
.values())
441 ports
+= list(self
.xics_ics
.bus
.fields
.values())
442 ports
.append(self
.int_level_i
)
445 ports
+= list(self
.simple_gpio
.bus
.fields
.values())
446 ports
.append(self
.gpio_o
)
454 class TestIssuer(Elaboratable
):
455 def __init__(self
, pspec
):
456 self
.ti
= TestIssuerInternal(pspec
)
458 self
.pll
= DummyPLL()
459 self
.clksel
= ClockSelect()
461 # PLL direct clock or not
462 self
.pll_en
= hasattr(pspec
, "use_pll") and pspec
.use_pll
464 def elaborate(self
, platform
):
468 # TestIssuer runs at direct clock
469 m
.submodules
.ti
= ti
= self
.ti
470 cd_int
= ClockDomain("coresync")
472 # ClockSelect runs at PLL output internal clock rate
473 m
.submodules
.clksel
= clksel
= DomainRenamer("pllclk")(self
.clksel
)
474 m
.submodules
.pll
= pll
= self
.pll
476 # add 2 clock domains established above...
477 cd_pll
= ClockDomain("pllclk")
480 # internal clock is set to selector clock-out. has the side-effect of
481 # running TestIssuer at this speed (see DomainRenamer("intclk") above)
482 intclk
= ClockSignal("coresync")
484 comb
+= intclk
.eq(clksel
.core_clk_o
)
486 comb
+= intclk
.eq(ClockSignal())
488 # PLL clock established. has the side-effect of running clklsel
489 # at the PLL's speed (see DomainRenamer("pllclk") above)
490 pllclk
= ClockSignal("pllclk")
491 comb
+= pllclk
.eq(pll
.clk_pll_o
)
493 # wire up external 24mhz to PLL and clksel
494 comb
+= clksel
.clk_24_i
.eq(ClockSignal())
495 comb
+= pll
.clk_24_i
.eq(clksel
.clk_24_i
)
497 # now wire up ResetSignals. don't mind them all being in this domain
498 #int_rst = ResetSignal("coresync")
499 pll_rst
= ResetSignal("pllclk")
500 #comb += int_rst.eq(ResetSignal())
501 comb
+= pll_rst
.eq(ResetSignal())
506 return list(self
.ti
.ports()) + list(self
.pll
.ports()) + \
507 [ClockSignal(), ResetSignal()] + \
508 list(self
.clksel
.ports())
510 def external_ports(self
):
511 ports
= self
.ti
.external_ports()
512 ports
.append(ClockSignal())
513 ports
.append(ResetSignal())
514 ports
.append(self
.clksel
.clk_sel_i
)
515 ports
.append(self
.clksel
.pll_48_o
)
519 if __name__
== '__main__':
520 units
= {'alu': 1, 'cr': 1, 'branch': 1, 'trap': 1, 'logical': 1,
526 pspec
= TestMemPspec(ldst_ifacetype
='bare_wb',
527 imem_ifacetype
='bare_wb',
532 dut
= TestIssuer(pspec
)
533 vl
= main(dut
, ports
=dut
.ports(), name
="test_issuer")
535 if len(sys
.argv
) == 1:
536 vl
= rtlil
.convert(dut
, ports
=dut
.external_ports(), name
="test_issuer")
537 with
open("test_issuer.il", "w") as f
: