27afb68ed4546c2751f87f35db7373f845c7464b
3 not in any way intended for production use. this runs a FSM that:
5 * reads the Program Counter from StateRegs
6 * reads an instruction from a fixed-size Test Memory
7 * issues it to the Simple Core
8 * waits for it to complete
10 * does it all over again
12 the purpose of this module is to verify the functional correctness
13 of the Function Units in the absolute simplest and clearest possible
14 way, and to at provide something that can be further incrementally
18 from nmigen
import (Elaboratable
, Module
, Signal
, ClockSignal
, ResetSignal
,
19 ClockDomain
, DomainRenamer
)
20 from nmigen
.cli
import rtlil
21 from nmigen
.cli
import main
24 from soc
.decoder
.power_decoder
import create_pdecode
25 from soc
.decoder
.power_decoder2
import PowerDecode2
26 from soc
.decoder
.decode2execute1
import IssuerDecode2ToOperand
27 from soc
.decoder
.decode2execute1
import Data
28 from soc
.experiment
.testmem
import TestMemory
# test only for instructions
29 from soc
.regfile
.regfiles
import StateRegs
, FastRegs
30 from soc
.simple
.core
import NonProductionCore
31 from soc
.config
.test
.test_loadstore
import TestMemPspec
32 from soc
.config
.ifetch
import ConfigFetchUnit
33 from soc
.decoder
.power_enums
import MicrOp
34 from soc
.debug
.dmi
import CoreDebug
, DMIInterface
35 from soc
.debug
.jtag
import JTAG
36 from soc
.config
.pinouts
import get_pinspecs
37 from soc
.config
.state
import CoreState
38 from soc
.interrupts
.xics
import XICS_ICP
, XICS_ICS
39 from soc
.bus
.simple_gpio
import SimpleGPIO
40 from soc
.clock
.select
import ClockSelect
, DummyPLL
43 from nmutil
.util
import rising_edge
46 class TestIssuerInternal(Elaboratable
):
47 """TestIssuer - reads instructions from TestMemory and issues them
49 efficiency and speed is not the main goal here: functional correctness is.
51 def __init__(self
, pspec
):
53 # JTAG interface. add this right at the start because if it's
54 # added it *modifies* the pspec, by adding enable/disable signals
55 # for parts of the rest of the core
56 self
.jtag_en
= hasattr(pspec
, "debug") and pspec
.debug
== 'jtag'
58 subset
= {'uart', 'mtwi', 'eint', 'gpio', 'mspi0', 'mspi1',
60 self
.jtag
= JTAG(get_pinspecs(subset
=subset
))
61 # add signals to pspec to enable/disable icache and dcache
62 # (or data and intstruction wishbone if icache/dcache not included)
63 # https://bugs.libre-soc.org/show_bug.cgi?id=520
64 # TODO: do we actually care if these are not domain-synchronised?
65 # honestly probably not.
66 pspec
.wb_icache_en
= self
.jtag
.wb_icache_en
67 pspec
.wb_dcache_en
= self
.jtag
.wb_dcache_en
69 # add interrupt controller?
70 self
.xics
= hasattr(pspec
, "xics") and pspec
.xics
== True
72 self
.xics_icp
= XICS_ICP()
73 self
.xics_ics
= XICS_ICS()
74 self
.int_level_i
= self
.xics_ics
.int_level_i
76 # add GPIO peripheral?
77 self
.gpio
= hasattr(pspec
, "gpio") and pspec
.gpio
== True
79 self
.simple_gpio
= SimpleGPIO()
80 self
.gpio_o
= self
.simple_gpio
.gpio_o
82 # main instruction core25
83 self
.core
= core
= NonProductionCore(pspec
)
85 # instruction decoder. goes into Trap Record
86 pdecode
= create_pdecode()
87 self
.cur_state
= CoreState("cur") # current state (MSR/PC/EINT)
88 self
.pdecode2
= PowerDecode2(pdecode
, state
=self
.cur_state
,
89 opkls
=IssuerDecode2ToOperand
)
91 # Test Instruction memory
92 self
.imem
= ConfigFetchUnit(pspec
).fu
93 # one-row cache of instruction read
94 self
.iline
= Signal(64) # one instruction line
95 self
.iprev_adr
= Signal(64) # previous address: if different, do read
98 self
.dbg
= CoreDebug()
100 # instruction go/monitor
101 self
.pc_o
= Signal(64, reset_less
=True)
102 self
.pc_i
= Data(64, "pc_i") # set "ok" to indicate "please change me"
103 self
.core_bigendian_i
= Signal()
104 self
.busy_o
= Signal(reset_less
=True)
105 self
.memerr_o
= Signal(reset_less
=True)
107 # FAST regfile read /write ports for PC, MSR, DEC/TB
108 staterf
= self
.core
.regs
.rf
['state']
109 self
.state_r_pc
= staterf
.r_ports
['cia'] # PC rd
110 self
.state_w_pc
= staterf
.w_ports
['d_wr1'] # PC wr
111 self
.state_r_msr
= staterf
.r_ports
['msr'] # MSR rd
113 # DMI interface access
114 intrf
= self
.core
.regs
.rf
['int']
115 crrf
= self
.core
.regs
.rf
['cr']
116 xerrf
= self
.core
.regs
.rf
['xer']
117 self
.int_r
= intrf
.r_ports
['dmi'] # INT read
118 self
.cr_r
= crrf
.r_ports
['full_cr_dbg'] # CR read
119 self
.xer_r
= xerrf
.r_ports
['full_xer'] # XER read
121 # hack method of keeping an eye on whether branch/trap set the PC
122 self
.state_nia
= self
.core
.regs
.rf
['state'].w_ports
['nia']
123 self
.state_nia
.wen
.name
= 'state_nia_wen'
125 def elaborate(self
, platform
):
127 comb
, sync
= m
.d
.comb
, m
.d
.sync
129 m
.submodules
.core
= core
= DomainRenamer("coresync")(self
.core
)
130 m
.submodules
.imem
= imem
= self
.imem
131 m
.submodules
.dbg
= dbg
= self
.dbg
133 m
.submodules
.jtag
= jtag
= self
.jtag
134 # TODO: UART2GDB mux, here, from external pin
135 # see https://bugs.libre-soc.org/show_bug.cgi?id=499
136 sync
+= dbg
.dmi
.connect_to(jtag
.dmi
)
138 cur_state
= self
.cur_state
140 # XICS interrupt handler
142 m
.submodules
.xics_icp
= icp
= self
.xics_icp
143 m
.submodules
.xics_ics
= ics
= self
.xics_ics
144 comb
+= icp
.ics_i
.eq(ics
.icp_o
) # connect ICS to ICP
145 sync
+= cur_state
.eint
.eq(icp
.core_irq_o
) # connect ICP to core
147 # GPIO test peripheral
149 m
.submodules
.simple_gpio
= simple_gpio
= self
.simple_gpio
151 # connect one GPIO output to ICS bit 15 (like in microwatt soc.vhdl)
152 # XXX causes litex ECP5 test to get wrong idea about input and output
153 # (but works with verilator sim *sigh*)
154 #if self.gpio and self.xics:
155 # comb += self.int_level_i[15].eq(simple_gpio.gpio_o[0])
157 # instruction decoder
158 pdecode
= create_pdecode()
159 m
.submodules
.dec2
= pdecode2
= self
.pdecode2
162 dmi
, d_reg
, d_cr
, d_xer
, = dbg
.dmi
, dbg
.d_gpr
, dbg
.d_cr
, dbg
.d_xer
163 intrf
= self
.core
.regs
.rf
['int']
165 # clock delay power-on reset
166 cd_por
= ClockDomain(reset_less
=True)
167 cd_sync
= ClockDomain()
168 core_sync
= ClockDomain("coresync")
169 m
.domains
+= cd_por
, cd_sync
, core_sync
171 ti_rst
= Signal(reset_less
=True)
172 delay
= Signal(range(4), reset
=3)
173 with m
.If(delay
!= 0):
174 m
.d
.por
+= delay
.eq(delay
- 1)
175 comb
+= cd_por
.clk
.eq(ClockSignal())
177 # power-on reset delay
178 core_rst
= ResetSignal("coresync")
179 comb
+= ti_rst
.eq(delay
!= 0 | dbg
.core_rst_o |
ResetSignal())
180 comb
+= core_rst
.eq(ti_rst
)
182 # busy/halted signals from core
183 comb
+= self
.busy_o
.eq(core
.busy_o
)
184 comb
+= pdecode2
.dec
.bigendian
.eq(self
.core_bigendian_i
)
186 # temporary hack: says "go" immediately for both address gen and ST
188 ldst
= core
.fus
.fus
['ldst0']
189 st_go_edge
= rising_edge(m
, ldst
.st
.rel_o
)
190 m
.d
.comb
+= ldst
.ad
.go_i
.eq(ldst
.ad
.rel_o
) # link addr-go direct to rel
191 m
.d
.comb
+= ldst
.st
.go_i
.eq(st_go_edge
) # link store-go to rising rel
193 # PC and instruction from I-Memory
194 pc_changed
= Signal() # note write to PC
195 comb
+= self
.pc_o
.eq(cur_state
.pc
)
198 # next instruction (+4 on current)
199 nia
= Signal(64, reset_less
=True)
200 comb
+= nia
.eq(cur_state
.pc
+ 4)
203 pc
= Signal(64, reset_less
=True)
204 pc_ok_delay
= Signal()
205 sync
+= pc_ok_delay
.eq(~self
.pc_i
.ok
)
206 with m
.If(self
.pc_i
.ok
):
207 # incoming override (start from pc_i)
208 comb
+= pc
.eq(self
.pc_i
.data
)
210 # otherwise read StateRegs regfile for PC...
211 comb
+= self
.state_r_pc
.ren
.eq(1<<StateRegs
.PC
)
212 # ... but on a 1-clock delay
213 with m
.If(pc_ok_delay
):
214 comb
+= pc
.eq(self
.state_r_pc
.data_o
)
216 # don't write pc every cycle
217 comb
+= self
.state_w_pc
.wen
.eq(0)
218 comb
+= self
.state_w_pc
.data_i
.eq(0)
220 # don't read msr every cycle
221 comb
+= self
.state_r_msr
.ren
.eq(0)
222 msr_read
= Signal(reset
=1)
224 # connect up debug signals
225 # TODO comb += core.icache_rst_i.eq(dbg.icache_rst_o)
226 comb
+= dbg
.terminate_i
.eq(core
.core_terminate_o
)
227 comb
+= dbg
.state
.pc
.eq(pc
)
228 #comb += dbg.state.pc.eq(cur_state.pc)
229 comb
+= dbg
.state
.msr
.eq(cur_state
.msr
)
232 core_busy_o
= core
.busy_o
# core is busy
233 core_ivalid_i
= core
.ivalid_i
# instruction is valid
234 core_issue_i
= core
.issue_i
# instruction is issued
235 dec_opcode_i
= pdecode2
.dec
.raw_opcode_in
# raw opcode
237 insn_type
= core
.e
.do
.insn_type
239 # actually use a nmigen FSM for the first time (w00t)
240 # this FSM is perhaps unusual in that it detects conditions
241 # then "holds" information, combinatorially, for the core
242 # (as opposed to using sync - which would be on a clock's delay)
243 # this includes the actual opcode, valid flags and so on.
247 with m
.State("IDLE"):
248 sync
+= pc_changed
.eq(0)
250 sync
+= core
.raw_insn_i
.eq(0)
251 sync
+= core
.bigendian_i
.eq(0)
252 with m
.If(~dbg
.core_stop_o
& ~core_rst
):
253 # instruction allowed to go: start by reading the PC
254 # capture the PC and also drop it into Insn Memory
255 # we have joined a pair of combinatorial memory
256 # lookups together. this is Generally Bad.
257 comb
+= self
.imem
.a_pc_i
.eq(pc
)
258 comb
+= self
.imem
.a_valid_i
.eq(1)
259 comb
+= self
.imem
.f_valid_i
.eq(1)
260 sync
+= cur_state
.pc
.eq(pc
)
262 # initiate read of MSR. arrives one clock later
263 comb
+= self
.state_r_msr
.ren
.eq(1<<StateRegs
.MSR
)
264 sync
+= msr_read
.eq(0)
266 m
.next
= "INSN_READ" # move to "wait for bus" phase
268 comb
+= core
.core_stopped_i
.eq(1)
269 comb
+= dbg
.core_stopped_i
.eq(1)
271 # dummy pause to find out why simulation is not keeping up
272 with m
.State("INSN_READ"):
273 # one cycle later, msr read arrives. valid only once.
274 with m
.If(~msr_read
):
275 sync
+= msr_read
.eq(1) # yeah don't read it again
276 sync
+= cur_state
.msr
.eq(self
.state_r_msr
.data_o
)
277 with m
.If(self
.imem
.f_busy_o
): # zzz...
278 # busy: stay in wait-read
279 comb
+= self
.imem
.a_valid_i
.eq(1)
280 comb
+= self
.imem
.f_valid_i
.eq(1)
282 # not busy: instruction fetched
283 f_instr_o
= self
.imem
.f_instr_o
284 if f_instr_o
.width
== 32:
287 insn
= f_instr_o
.word_select(cur_state
.pc
[2], 32)
288 comb
+= dec_opcode_i
.eq(insn
) # actual opcode
289 sync
+= core
.e
.eq(pdecode2
.e
)
290 sync
+= core
.state
.eq(cur_state
)
291 sync
+= core
.raw_insn_i
.eq(dec_opcode_i
)
292 sync
+= core
.bigendian_i
.eq(self
.core_bigendian_i
)
293 sync
+= ilatch
.eq(insn
) # latch current insn
294 # also drop PC and MSR into decode "state"
295 m
.next
= "INSN_START" # move to "start"
297 # waiting for instruction bus (stays there until not busy)
298 with m
.State("INSN_START"):
299 comb
+= core_ivalid_i
.eq(1) # instruction is valid
300 comb
+= core_issue_i
.eq(1) # and issued
302 m
.next
= "INSN_ACTIVE" # move to "wait completion"
304 # instruction started: must wait till it finishes
305 with m
.State("INSN_ACTIVE"):
306 with m
.If(insn_type
!= MicrOp
.OP_NOP
):
307 comb
+= core_ivalid_i
.eq(1) # instruction is valid
308 with m
.If(self
.state_nia
.wen
& (1<<StateRegs
.PC
)):
309 sync
+= pc_changed
.eq(1)
310 with m
.If(~core_busy_o
): # instruction done!
311 # ok here we are not reading the branch unit. TODO
312 # this just blithely overwrites whatever pipeline
314 with m
.If(~pc_changed
):
315 comb
+= self
.state_w_pc
.wen
.eq(1<<StateRegs
.PC
)
316 comb
+= self
.state_w_pc
.data_i
.eq(nia
)
318 sync
+= core
.raw_insn_i
.eq(0)
319 sync
+= core
.bigendian_i
.eq(0)
320 m
.next
= "IDLE" # back to idle
322 # this bit doesn't have to be in the FSM: connect up to read
323 # regfiles on demand from DMI
324 with m
.If(d_reg
.req
): # request for regfile access being made
325 # TODO: error-check this
326 # XXX should this be combinatorial? sync better?
328 comb
+= self
.int_r
.ren
.eq(1<<d_reg
.addr
)
330 comb
+= self
.int_r
.addr
.eq(d_reg
.addr
)
331 comb
+= self
.int_r
.ren
.eq(1)
332 d_reg_delay
= Signal()
333 sync
+= d_reg_delay
.eq(d_reg
.req
)
334 with m
.If(d_reg_delay
):
335 # data arrives one clock later
336 comb
+= d_reg
.data
.eq(self
.int_r
.data_o
)
337 comb
+= d_reg
.ack
.eq(1)
339 # sigh same thing for CR debug
340 with m
.If(d_cr
.req
): # request for regfile access being made
341 comb
+= self
.cr_r
.ren
.eq(0b11111111) # enable all
342 d_cr_delay
= Signal()
343 sync
+= d_cr_delay
.eq(d_cr
.req
)
344 with m
.If(d_cr_delay
):
345 # data arrives one clock later
346 comb
+= d_cr
.data
.eq(self
.cr_r
.data_o
)
347 comb
+= d_cr
.ack
.eq(1)
350 with m
.If(d_xer
.req
): # request for regfile access being made
351 comb
+= self
.xer_r
.ren
.eq(0b111111) # enable all
352 d_xer_delay
= Signal()
353 sync
+= d_xer_delay
.eq(d_xer
.req
)
354 with m
.If(d_xer_delay
):
355 # data arrives one clock later
356 comb
+= d_xer
.data
.eq(self
.xer_r
.data_o
)
357 comb
+= d_xer
.ack
.eq(1)
359 # DEC and TB inc/dec FSM
360 self
.tb_dec_fsm(m
, cur_state
.dec
)
364 def tb_dec_fsm(self
, m
, spr_dec
):
367 this is a FSM for updating either dec or tb. it runs alternately
368 DEC, TB, DEC, TB. note that SPR pipeline could have written a new
369 value to DEC, however the regfile has "passthrough" on it so this
372 see v3.0B p1097-1099 for Timeer Resource and p1065 and p1076
375 comb
, sync
= m
.d
.comb
, m
.d
.sync
376 fast_rf
= self
.core
.regs
.rf
['fast']
377 fast_r_dectb
= fast_rf
.r_ports
['issue'] # DEC/TB
378 fast_w_dectb
= fast_rf
.w_ports
['issue'] # DEC/TB
382 # initiates read of current DEC
383 with m
.State("DEC_READ"):
384 comb
+= fast_r_dectb
.addr
.eq(FastRegs
.DEC
)
385 comb
+= fast_r_dectb
.ren
.eq(1)
388 # waits for DEC read to arrive (1 cycle), updates with new value
389 with m
.State("DEC_WRITE"):
391 # TODO: MSR.LPCR 32-bit decrement mode
392 comb
+= new_dec
.eq(fast_r_dectb
.data_o
- 1)
393 comb
+= fast_w_dectb
.addr
.eq(FastRegs
.DEC
)
394 comb
+= fast_w_dectb
.wen
.eq(1)
395 comb
+= fast_w_dectb
.data_i
.eq(new_dec
)
396 sync
+= spr_dec
.eq(new_dec
) # copy into cur_state for decoder
399 # initiates read of current TB
400 with m
.State("TB_READ"):
401 comb
+= fast_r_dectb
.addr
.eq(FastRegs
.TB
)
402 comb
+= fast_r_dectb
.ren
.eq(1)
405 # waits for read TB to arrive, initiates write of current TB
406 with m
.State("TB_WRITE"):
408 comb
+= new_tb
.eq(fast_r_dectb
.data_o
+ 1)
409 comb
+= fast_w_dectb
.addr
.eq(FastRegs
.TB
)
410 comb
+= fast_w_dectb
.wen
.eq(1)
411 comb
+= fast_w_dectb
.data_i
.eq(new_tb
)
417 yield from self
.pc_i
.ports()
420 yield from self
.core
.ports()
421 yield from self
.imem
.ports()
422 yield self
.core_bigendian_i
428 def external_ports(self
):
429 ports
= self
.pc_i
.ports()
430 ports
+= [self
.pc_o
, self
.memerr_o
, self
.core_bigendian_i
, self
.busy_o
,
434 ports
+= list(self
.jtag
.external_ports())
436 # don't add DMI if JTAG is enabled
437 ports
+= list(self
.dbg
.dmi
.ports())
439 ports
+= list(self
.imem
.ibus
.fields
.values())
440 ports
+= list(self
.core
.l0
.cmpi
.lsmem
.lsi
.slavebus
.fields
.values())
443 ports
+= list(self
.xics_icp
.bus
.fields
.values())
444 ports
+= list(self
.xics_ics
.bus
.fields
.values())
445 ports
.append(self
.int_level_i
)
448 ports
+= list(self
.simple_gpio
.bus
.fields
.values())
449 ports
.append(self
.gpio_o
)
457 class TestIssuer(Elaboratable
):
458 def __init__(self
, pspec
):
459 self
.ti
= TestIssuerInternal(pspec
)
461 self
.pll
= DummyPLL()
462 self
.clksel
= ClockSelect()
464 # PLL direct clock or not
465 self
.pll_en
= hasattr(pspec
, "use_pll") and pspec
.use_pll
467 def elaborate(self
, platform
):
471 # TestIssuer runs at direct clock
472 m
.submodules
.ti
= ti
= self
.ti
473 cd_int
= ClockDomain("coresync")
475 # ClockSelect runs at PLL output internal clock rate
476 m
.submodules
.clksel
= clksel
= DomainRenamer("pllclk")(self
.clksel
)
477 m
.submodules
.pll
= pll
= self
.pll
479 # add 2 clock domains established above...
480 cd_pll
= ClockDomain("pllclk")
483 # internal clock is set to selector clock-out. has the side-effect of
484 # running TestIssuer at this speed (see DomainRenamer("intclk") above)
485 intclk
= ClockSignal("coresync")
487 comb
+= intclk
.eq(clksel
.core_clk_o
)
489 comb
+= intclk
.eq(ClockSignal())
491 # PLL clock established. has the side-effect of running clklsel
492 # at the PLL's speed (see DomainRenamer("pllclk") above)
493 pllclk
= ClockSignal("pllclk")
494 comb
+= pllclk
.eq(pll
.clk_pll_o
)
496 # wire up external 24mhz to PLL and clksel
497 comb
+= clksel
.clk_24_i
.eq(ClockSignal())
498 comb
+= pll
.clk_24_i
.eq(clksel
.clk_24_i
)
500 # now wire up ResetSignals. don't mind them all being in this domain
501 #int_rst = ResetSignal("coresync")
502 pll_rst
= ResetSignal("pllclk")
503 #comb += int_rst.eq(ResetSignal())
504 comb
+= pll_rst
.eq(ResetSignal())
509 return list(self
.ti
.ports()) + list(self
.pll
.ports()) + \
510 [ClockSignal(), ResetSignal()] + \
511 list(self
.clksel
.ports())
513 def external_ports(self
):
514 ports
= self
.ti
.external_ports()
515 ports
.append(ClockSignal())
516 ports
.append(ResetSignal())
517 ports
.append(self
.clksel
.clk_sel_i
)
518 ports
.append(self
.clksel
.pll_48_o
)
522 if __name__
== '__main__':
523 units
= {'alu': 1, 'cr': 1, 'branch': 1, 'trap': 1, 'logical': 1,
529 pspec
= TestMemPspec(ldst_ifacetype
='bare_wb',
530 imem_ifacetype
='bare_wb',
535 dut
= TestIssuer(pspec
)
536 vl
= main(dut
, ports
=dut
.ports(), name
="test_issuer")
538 if len(sys
.argv
) == 1:
539 vl
= rtlil
.convert(dut
, ports
=dut
.external_ports(), name
="test_issuer")
540 with
open("test_issuer.il", "w") as f
: