ae6c24bfde03546481aead6c04415b02a17a4554
3 not in any way intended for production use. this runs a FSM that:
5 * reads the Program Counter from StateRegs
6 * reads an instruction from a fixed-size Test Memory
7 * issues it to the Simple Core
8 * waits for it to complete
10 * does it all over again
12 the purpose of this module is to verify the functional correctness
13 of the Function Units in the absolute simplest and clearest possible
14 way, and to at provide something that can be further incrementally
18 from nmigen
import (Elaboratable
, Module
, Signal
, ClockSignal
, ResetSignal
,
19 ClockDomain
, DomainRenamer
)
20 from nmigen
.cli
import rtlil
21 from nmigen
.cli
import main
24 from soc
.decoder
.power_decoder
import create_pdecode
25 from soc
.decoder
.power_decoder2
import PowerDecode2
26 from soc
.decoder
.decode2execute1
import IssuerDecode2ToOperand
27 from soc
.decoder
.decode2execute1
import Data
28 from soc
.experiment
.testmem
import TestMemory
# test only for instructions
29 from soc
.regfile
.regfiles
import StateRegs
, FastRegs
30 from soc
.simple
.core
import NonProductionCore
31 from soc
.config
.test
.test_loadstore
import TestMemPspec
32 from soc
.config
.ifetch
import ConfigFetchUnit
33 from soc
.decoder
.power_enums
import MicrOp
34 from soc
.debug
.dmi
import CoreDebug
, DMIInterface
35 from soc
.debug
.jtag
import JTAG
36 from soc
.config
.pinouts
import get_pinspecs
37 from soc
.config
.state
import CoreState
38 from soc
.interrupts
.xics
import XICS_ICP
, XICS_ICS
39 from soc
.bus
.simple_gpio
import SimpleGPIO
40 from soc
.clock
.select
import ClockSelect
, DummyPLL
43 from nmutil
.util
import rising_edge
46 class TestIssuerInternal(Elaboratable
):
47 """TestIssuer - reads instructions from TestMemory and issues them
49 efficiency and speed is not the main goal here: functional correctness is.
51 def __init__(self
, pspec
):
53 # add interrupt controller?
54 self
.xics
= hasattr(pspec
, "xics") and pspec
.xics
== True
56 self
.xics_icp
= XICS_ICP()
57 self
.xics_ics
= XICS_ICS()
58 self
.int_level_i
= self
.xics_ics
.int_level_i
60 # add GPIO peripheral?
61 self
.gpio
= hasattr(pspec
, "gpio") and pspec
.gpio
== True
63 self
.simple_gpio
= SimpleGPIO()
64 self
.gpio_o
= self
.simple_gpio
.gpio_o
66 # main instruction core25
67 self
.core
= core
= NonProductionCore(pspec
)
69 # instruction decoder. goes into Trap Record
70 pdecode
= create_pdecode()
71 self
.cur_state
= CoreState("cur") # current state (MSR/PC/EINT)
72 self
.pdecode2
= PowerDecode2(pdecode
, state
=self
.cur_state
,
73 opkls
=IssuerDecode2ToOperand
)
75 # Test Instruction memory
76 self
.imem
= ConfigFetchUnit(pspec
).fu
77 # one-row cache of instruction read
78 self
.iline
= Signal(64) # one instruction line
79 self
.iprev_adr
= Signal(64) # previous address: if different, do read
82 self
.dbg
= CoreDebug()
85 self
.jtag_en
= hasattr(pspec
, "debug") and pspec
.debug
== 'jtag'
87 subset
= {'uart', 'mtwi', 'eint', 'gpio', 'mspi0', 'mspi1',
89 self
.jtag
= JTAG(get_pinspecs(subset
=subset
))
91 # instruction go/monitor
92 self
.pc_o
= Signal(64, reset_less
=True)
93 self
.pc_i
= Data(64, "pc_i") # set "ok" to indicate "please change me"
94 self
.core_bigendian_i
= Signal()
95 self
.busy_o
= Signal(reset_less
=True)
96 self
.memerr_o
= Signal(reset_less
=True)
98 # FAST regfile read /write ports for PC, MSR, DEC/TB
99 staterf
= self
.core
.regs
.rf
['state']
100 self
.state_r_pc
= staterf
.r_ports
['cia'] # PC rd
101 self
.state_w_pc
= staterf
.w_ports
['d_wr1'] # PC wr
102 self
.state_r_msr
= staterf
.r_ports
['msr'] # MSR rd
104 # DMI interface access
105 intrf
= self
.core
.regs
.rf
['int']
106 crrf
= self
.core
.regs
.rf
['cr']
107 xerrf
= self
.core
.regs
.rf
['xer']
108 self
.int_r
= intrf
.r_ports
['dmi'] # INT read
109 self
.cr_r
= crrf
.r_ports
['full_cr_dbg'] # CR read
110 self
.xer_r
= xerrf
.r_ports
['full_xer'] # XER read
112 # hack method of keeping an eye on whether branch/trap set the PC
113 self
.state_nia
= self
.core
.regs
.rf
['state'].w_ports
['nia']
114 self
.state_nia
.wen
.name
= 'state_nia_wen'
116 def elaborate(self
, platform
):
118 comb
, sync
= m
.d
.comb
, m
.d
.sync
120 m
.submodules
.core
= core
= DomainRenamer("coresync")(self
.core
)
121 m
.submodules
.imem
= imem
= self
.imem
122 m
.submodules
.dbg
= dbg
= self
.dbg
124 m
.submodules
.jtag
= jtag
= self
.jtag
125 # TODO: UART2GDB mux, here, from external pin
126 # see https://bugs.libre-soc.org/show_bug.cgi?id=499
127 sync
+= dbg
.dmi
.connect_to(jtag
.dmi
)
129 cur_state
= self
.cur_state
131 # XICS interrupt handler
133 m
.submodules
.xics_icp
= icp
= self
.xics_icp
134 m
.submodules
.xics_ics
= ics
= self
.xics_ics
135 comb
+= icp
.ics_i
.eq(ics
.icp_o
) # connect ICS to ICP
136 sync
+= cur_state
.eint
.eq(icp
.core_irq_o
) # connect ICP to core
138 # GPIO test peripheral
140 m
.submodules
.simple_gpio
= simple_gpio
= self
.simple_gpio
142 # connect one GPIO output to ICS bit 15 (like in microwatt soc.vhdl)
143 # XXX causes litex ECP5 test to get wrong idea about input and output
144 # (but works with verilator sim *sigh*)
145 #if self.gpio and self.xics:
146 # comb += self.int_level_i[15].eq(simple_gpio.gpio_o[0])
148 # instruction decoder
149 pdecode
= create_pdecode()
150 m
.submodules
.dec2
= pdecode2
= self
.pdecode2
153 dmi
, d_reg
, d_cr
, d_xer
, = dbg
.dmi
, dbg
.d_gpr
, dbg
.d_cr
, dbg
.d_xer
154 intrf
= self
.core
.regs
.rf
['int']
156 # clock delay power-on reset
157 cd_por
= ClockDomain(reset_less
=True)
158 cd_sync
= ClockDomain()
159 core_sync
= ClockDomain("coresync")
160 m
.domains
+= cd_por
, cd_sync
, core_sync
162 ti_rst
= Signal(reset_less
=True)
163 delay
= Signal(range(4), reset
=3)
164 with m
.If(delay
!= 0):
165 m
.d
.por
+= delay
.eq(delay
- 1)
166 comb
+= cd_por
.clk
.eq(ClockSignal())
168 # power-on reset delay
169 core_rst
= ResetSignal("coresync")
170 comb
+= ti_rst
.eq(delay
!= 0 | dbg
.core_rst_o |
ResetSignal())
171 comb
+= core_rst
.eq(ti_rst
)
173 # busy/halted signals from core
174 comb
+= self
.busy_o
.eq(core
.busy_o
)
175 comb
+= pdecode2
.dec
.bigendian
.eq(self
.core_bigendian_i
)
177 # temporary hack: says "go" immediately for both address gen and ST
179 ldst
= core
.fus
.fus
['ldst0']
180 st_go_edge
= rising_edge(m
, ldst
.st
.rel_o
)
181 m
.d
.comb
+= ldst
.ad
.go_i
.eq(ldst
.ad
.rel_o
) # link addr-go direct to rel
182 m
.d
.comb
+= ldst
.st
.go_i
.eq(st_go_edge
) # link store-go to rising rel
184 # PC and instruction from I-Memory
185 pc_changed
= Signal() # note write to PC
186 comb
+= self
.pc_o
.eq(cur_state
.pc
)
189 # next instruction (+4 on current)
190 nia
= Signal(64, reset_less
=True)
191 comb
+= nia
.eq(cur_state
.pc
+ 4)
194 pc
= Signal(64, reset_less
=True)
195 pc_ok_delay
= Signal()
196 sync
+= pc_ok_delay
.eq(~self
.pc_i
.ok
)
197 with m
.If(self
.pc_i
.ok
):
198 # incoming override (start from pc_i)
199 comb
+= pc
.eq(self
.pc_i
.data
)
201 # otherwise read StateRegs regfile for PC...
202 comb
+= self
.state_r_pc
.ren
.eq(1<<StateRegs
.PC
)
203 # ... but on a 1-clock delay
204 with m
.If(pc_ok_delay
):
205 comb
+= pc
.eq(self
.state_r_pc
.data_o
)
207 # don't write pc every cycle
208 comb
+= self
.state_w_pc
.wen
.eq(0)
209 comb
+= self
.state_w_pc
.data_i
.eq(0)
211 # don't read msr every cycle
212 comb
+= self
.state_r_msr
.ren
.eq(0)
213 msr_read
= Signal(reset
=1)
215 # connect up debug signals
216 # TODO comb += core.icache_rst_i.eq(dbg.icache_rst_o)
217 comb
+= dbg
.terminate_i
.eq(core
.core_terminate_o
)
218 comb
+= dbg
.state
.pc
.eq(pc
)
219 #comb += dbg.state.pc.eq(cur_state.pc)
220 comb
+= dbg
.state
.msr
.eq(cur_state
.msr
)
223 core_busy_o
= core
.busy_o
# core is busy
224 core_ivalid_i
= core
.ivalid_i
# instruction is valid
225 core_issue_i
= core
.issue_i
# instruction is issued
226 dec_opcode_i
= pdecode2
.dec
.raw_opcode_in
# raw opcode
228 insn_type
= core
.e
.do
.insn_type
230 # actually use a nmigen FSM for the first time (w00t)
231 # this FSM is perhaps unusual in that it detects conditions
232 # then "holds" information, combinatorially, for the core
233 # (as opposed to using sync - which would be on a clock's delay)
234 # this includes the actual opcode, valid flags and so on.
238 with m
.State("IDLE"):
239 sync
+= pc_changed
.eq(0)
241 sync
+= core
.raw_insn_i
.eq(0)
242 sync
+= core
.bigendian_i
.eq(0)
243 with m
.If(~dbg
.core_stop_o
& ~core_rst
):
244 # instruction allowed to go: start by reading the PC
245 # capture the PC and also drop it into Insn Memory
246 # we have joined a pair of combinatorial memory
247 # lookups together. this is Generally Bad.
248 comb
+= self
.imem
.a_pc_i
.eq(pc
)
249 comb
+= self
.imem
.a_valid_i
.eq(1)
250 comb
+= self
.imem
.f_valid_i
.eq(1)
251 sync
+= cur_state
.pc
.eq(pc
)
253 # initiate read of MSR. arrives one clock later
254 comb
+= self
.state_r_msr
.ren
.eq(1<<StateRegs
.MSR
)
255 sync
+= msr_read
.eq(0)
257 m
.next
= "INSN_READ" # move to "wait for bus" phase
259 comb
+= core
.core_stopped_i
.eq(1)
260 comb
+= dbg
.core_stopped_i
.eq(1)
262 # dummy pause to find out why simulation is not keeping up
263 with m
.State("INSN_READ"):
264 # one cycle later, msr read arrives. valid only once.
265 with m
.If(~msr_read
):
266 sync
+= msr_read
.eq(1) # yeah don't read it again
267 sync
+= cur_state
.msr
.eq(self
.state_r_msr
.data_o
)
268 with m
.If(self
.imem
.f_busy_o
): # zzz...
269 # busy: stay in wait-read
270 comb
+= self
.imem
.a_valid_i
.eq(1)
271 comb
+= self
.imem
.f_valid_i
.eq(1)
273 # not busy: instruction fetched
274 f_instr_o
= self
.imem
.f_instr_o
275 if f_instr_o
.width
== 32:
278 insn
= f_instr_o
.word_select(cur_state
.pc
[2], 32)
279 comb
+= dec_opcode_i
.eq(insn
) # actual opcode
280 sync
+= core
.e
.eq(pdecode2
.e
)
281 sync
+= core
.state
.eq(cur_state
)
282 sync
+= core
.raw_insn_i
.eq(dec_opcode_i
)
283 sync
+= core
.bigendian_i
.eq(self
.core_bigendian_i
)
284 sync
+= ilatch
.eq(insn
) # latch current insn
285 # also drop PC and MSR into decode "state"
286 m
.next
= "INSN_START" # move to "start"
288 # waiting for instruction bus (stays there until not busy)
289 with m
.State("INSN_START"):
290 comb
+= core_ivalid_i
.eq(1) # instruction is valid
291 comb
+= core_issue_i
.eq(1) # and issued
293 m
.next
= "INSN_ACTIVE" # move to "wait completion"
295 # instruction started: must wait till it finishes
296 with m
.State("INSN_ACTIVE"):
297 with m
.If(insn_type
!= MicrOp
.OP_NOP
):
298 comb
+= core_ivalid_i
.eq(1) # instruction is valid
299 with m
.If(self
.state_nia
.wen
& (1<<StateRegs
.PC
)):
300 sync
+= pc_changed
.eq(1)
301 with m
.If(~core_busy_o
): # instruction done!
302 # ok here we are not reading the branch unit. TODO
303 # this just blithely overwrites whatever pipeline
305 with m
.If(~pc_changed
):
306 comb
+= self
.state_w_pc
.wen
.eq(1<<StateRegs
.PC
)
307 comb
+= self
.state_w_pc
.data_i
.eq(nia
)
309 sync
+= core
.raw_insn_i
.eq(0)
310 sync
+= core
.bigendian_i
.eq(0)
311 m
.next
= "IDLE" # back to idle
313 # this bit doesn't have to be in the FSM: connect up to read
314 # regfiles on demand from DMI
315 with m
.If(d_reg
.req
): # request for regfile access being made
316 # TODO: error-check this
317 # XXX should this be combinatorial? sync better?
319 comb
+= self
.int_r
.ren
.eq(1<<d_reg
.addr
)
321 comb
+= self
.int_r
.addr
.eq(d_reg
.addr
)
322 comb
+= self
.int_r
.ren
.eq(1)
323 d_reg_delay
= Signal()
324 sync
+= d_reg_delay
.eq(d_reg
.req
)
325 with m
.If(d_reg_delay
):
326 # data arrives one clock later
327 comb
+= d_reg
.data
.eq(self
.int_r
.data_o
)
328 comb
+= d_reg
.ack
.eq(1)
330 # sigh same thing for CR debug
331 with m
.If(d_cr
.req
): # request for regfile access being made
332 comb
+= self
.cr_r
.ren
.eq(0b11111111) # enable all
333 d_cr_delay
= Signal()
334 sync
+= d_cr_delay
.eq(d_cr
.req
)
335 with m
.If(d_cr_delay
):
336 # data arrives one clock later
337 comb
+= d_cr
.data
.eq(self
.cr_r
.data_o
)
338 comb
+= d_cr
.ack
.eq(1)
341 with m
.If(d_xer
.req
): # request for regfile access being made
342 comb
+= self
.xer_r
.ren
.eq(0b111111) # enable all
343 d_xer_delay
= Signal()
344 sync
+= d_xer_delay
.eq(d_xer
.req
)
345 with m
.If(d_xer_delay
):
346 # data arrives one clock later
347 comb
+= d_xer
.data
.eq(self
.xer_r
.data_o
)
348 comb
+= d_xer
.ack
.eq(1)
350 # DEC and TB inc/dec FSM
351 self
.tb_dec_fsm(m
, cur_state
.dec
)
355 def tb_dec_fsm(self
, m
, spr_dec
):
358 this is a FSM for updating either dec or tb. it runs alternately
359 DEC, TB, DEC, TB. note that SPR pipeline could have written a new
360 value to DEC, however the regfile has "passthrough" on it so this
363 see v3.0B p1097-1099 for Timeer Resource and p1065 and p1076
366 comb
, sync
= m
.d
.comb
, m
.d
.sync
367 fast_rf
= self
.core
.regs
.rf
['fast']
368 fast_r_dectb
= fast_rf
.r_ports
['issue'] # DEC/TB
369 fast_w_dectb
= fast_rf
.w_ports
['issue'] # DEC/TB
373 # initiates read of current DEC
374 with m
.State("DEC_READ"):
375 comb
+= fast_r_dectb
.addr
.eq(FastRegs
.DEC
)
376 comb
+= fast_r_dectb
.ren
.eq(1)
379 # waits for DEC read to arrive (1 cycle), updates with new value
380 with m
.State("DEC_WRITE"):
382 # TODO: MSR.LPCR 32-bit decrement mode
383 comb
+= new_dec
.eq(fast_r_dectb
.data_o
- 1)
384 comb
+= fast_w_dectb
.addr
.eq(FastRegs
.DEC
)
385 comb
+= fast_w_dectb
.wen
.eq(1)
386 comb
+= fast_w_dectb
.data_i
.eq(new_dec
)
387 sync
+= spr_dec
.eq(new_dec
) # copy into cur_state for decoder
390 # initiates read of current TB
391 with m
.State("TB_READ"):
392 comb
+= fast_r_dectb
.addr
.eq(FastRegs
.TB
)
393 comb
+= fast_r_dectb
.ren
.eq(1)
396 # waits for read TB to arrive, initiates write of current TB
397 with m
.State("TB_WRITE"):
399 comb
+= new_tb
.eq(fast_r_dectb
.data_o
+ 1)
400 comb
+= fast_w_dectb
.addr
.eq(FastRegs
.TB
)
401 comb
+= fast_w_dectb
.wen
.eq(1)
402 comb
+= fast_w_dectb
.data_i
.eq(new_tb
)
408 yield from self
.pc_i
.ports()
411 yield from self
.core
.ports()
412 yield from self
.imem
.ports()
413 yield self
.core_bigendian_i
419 def external_ports(self
):
420 ports
= self
.pc_i
.ports()
421 ports
+= [self
.pc_o
, self
.memerr_o
, self
.core_bigendian_i
, self
.busy_o
,
425 ports
+= list(self
.jtag
.external_ports())
427 # don't add DMI if JTAG is enabled
428 ports
+= list(self
.dbg
.dmi
.ports())
430 ports
+= list(self
.imem
.ibus
.fields
.values())
431 ports
+= list(self
.core
.l0
.cmpi
.lsmem
.lsi
.slavebus
.fields
.values())
434 ports
+= list(self
.xics_icp
.bus
.fields
.values())
435 ports
+= list(self
.xics_ics
.bus
.fields
.values())
436 ports
.append(self
.int_level_i
)
439 ports
+= list(self
.simple_gpio
.bus
.fields
.values())
440 ports
.append(self
.gpio_o
)
448 class TestIssuer(Elaboratable
):
449 def __init__(self
, pspec
):
450 self
.ti
= TestIssuerInternal(pspec
)
452 self
.pll
= DummyPLL()
453 self
.clksel
= ClockSelect()
455 # PLL direct clock or not
456 self
.pll_en
= hasattr(pspec
, "use_pll") and pspec
.use_pll
458 def elaborate(self
, platform
):
462 # TestIssuer runs at direct clock
463 m
.submodules
.ti
= ti
= self
.ti
464 cd_int
= ClockDomain("coresync")
466 # ClockSelect runs at PLL output internal clock rate
467 m
.submodules
.clksel
= clksel
= DomainRenamer("pllclk")(self
.clksel
)
468 m
.submodules
.pll
= pll
= self
.pll
470 # add 2 clock domains established above...
471 cd_pll
= ClockDomain("pllclk")
474 # internal clock is set to selector clock-out. has the side-effect of
475 # running TestIssuer at this speed (see DomainRenamer("intclk") above)
476 intclk
= ClockSignal("coresync")
478 comb
+= intclk
.eq(clksel
.core_clk_o
)
480 comb
+= intclk
.eq(ClockSignal())
482 # PLL clock established. has the side-effect of running clklsel
483 # at the PLL's speed (see DomainRenamer("pllclk") above)
484 pllclk
= ClockSignal("pllclk")
485 comb
+= pllclk
.eq(pll
.clk_pll_o
)
487 # wire up external 24mhz to PLL and clksel
488 comb
+= clksel
.clk_24_i
.eq(ClockSignal())
489 comb
+= pll
.clk_24_i
.eq(clksel
.clk_24_i
)
491 # now wire up ResetSignals. don't mind them all being in this domain
492 #int_rst = ResetSignal("coresync")
493 pll_rst
= ResetSignal("pllclk")
494 #comb += int_rst.eq(ResetSignal())
495 comb
+= pll_rst
.eq(ResetSignal())
500 return list(self
.ti
.ports()) + list(self
.pll
.ports()) + \
501 [ClockSignal(), ResetSignal()] + \
502 list(self
.clksel
.ports())
504 def external_ports(self
):
505 ports
= self
.ti
.external_ports()
506 ports
.append(ClockSignal())
507 ports
.append(ResetSignal())
508 ports
.append(self
.clksel
.clk_sel_i
)
509 ports
.append(self
.clksel
.pll_48_o
)
513 if __name__
== '__main__':
514 units
= {'alu': 1, 'cr': 1, 'branch': 1, 'trap': 1, 'logical': 1,
520 pspec
= TestMemPspec(ldst_ifacetype
='bare_wb',
521 imem_ifacetype
='bare_wb',
526 dut
= TestIssuer(pspec
)
527 vl
= main(dut
, ports
=dut
.ports(), name
="test_issuer")
529 if len(sys
.argv
) == 1:
530 vl
= rtlil
.convert(dut
, ports
=dut
.external_ports(), name
="test_issuer")
531 with
open("test_issuer.il", "w") as f
: