86b1b3042bc34ff181e75d2c5817bf4b7211c7a1
3 not in any way intended for production use. this runs a FSM that:
5 * reads the Program Counter from StateRegs
6 * reads an instruction from a fixed-size Test Memory
7 * issues it to the Simple Core
8 * waits for it to complete
10 * does it all over again
12 the purpose of this module is to verify the functional correctness
13 of the Function Units in the absolute simplest and clearest possible
14 way, and to at provide something that can be further incrementally
18 from nmigen
import (Elaboratable
, Module
, Signal
, ClockSignal
, ResetSignal
,
19 ClockDomain
, DomainRenamer
)
20 from nmigen
.cli
import rtlil
21 from nmigen
.cli
import main
24 from soc
.decoder
.power_decoder
import create_pdecode
25 from soc
.decoder
.power_decoder2
import PowerDecode2
26 from soc
.decoder
.decode2execute1
import IssuerDecode2ToOperand
27 from soc
.decoder
.decode2execute1
import Data
28 from soc
.experiment
.testmem
import TestMemory
# test only for instructions
29 from soc
.regfile
.regfiles
import StateRegs
, FastRegs
30 from soc
.simple
.core
import NonProductionCore
31 from soc
.config
.test
.test_loadstore
import TestMemPspec
32 from soc
.config
.ifetch
import ConfigFetchUnit
33 from soc
.decoder
.power_enums
import MicrOp
34 from soc
.debug
.dmi
import CoreDebug
, DMIInterface
35 from soc
.debug
.jtag
import JTAG
, dummy_pinset
# TODO, full pinset
36 from soc
.config
.state
import CoreState
37 from soc
.interrupts
.xics
import XICS_ICP
, XICS_ICS
38 from soc
.bus
.simple_gpio
import SimpleGPIO
39 from soc
.clock
.select
import ClockSelect
, DummyPLL
42 from nmutil
.util
import rising_edge
45 class TestIssuerInternal(Elaboratable
):
46 """TestIssuer - reads instructions from TestMemory and issues them
48 efficiency and speed is not the main goal here: functional correctness is.
50 def __init__(self
, pspec
):
52 # add interrupt controller?
53 self
.xics
= hasattr(pspec
, "xics") and pspec
.xics
== True
55 self
.xics_icp
= XICS_ICP()
56 self
.xics_ics
= XICS_ICS()
57 self
.int_level_i
= self
.xics_ics
.int_level_i
59 # add GPIO peripheral?
60 self
.gpio
= hasattr(pspec
, "gpio") and pspec
.gpio
== True
62 self
.simple_gpio
= SimpleGPIO()
63 self
.gpio_o
= self
.simple_gpio
.gpio_o
65 # main instruction core25
66 self
.core
= core
= NonProductionCore(pspec
)
68 # instruction decoder. goes into Trap Record
69 pdecode
= create_pdecode()
70 self
.cur_state
= CoreState("cur") # current state (MSR/PC/EINT)
71 self
.pdecode2
= PowerDecode2(pdecode
, state
=self
.cur_state
,
72 opkls
=IssuerDecode2ToOperand
)
74 # Test Instruction memory
75 self
.imem
= ConfigFetchUnit(pspec
).fu
76 # one-row cache of instruction read
77 self
.iline
= Signal(64) # one instruction line
78 self
.iprev_adr
= Signal(64) # previous address: if different, do read
81 self
.dbg
= CoreDebug()
84 self
.jtag_en
= hasattr(pspec
, "debug") and pspec
.debug
== 'jtag'
86 self
.jtag
= JTAG(dummy_pinset())
88 # instruction go/monitor
89 self
.pc_o
= Signal(64, reset_less
=True)
90 self
.pc_i
= Data(64, "pc_i") # set "ok" to indicate "please change me"
91 self
.core_bigendian_i
= Signal()
92 self
.busy_o
= Signal(reset_less
=True)
93 self
.memerr_o
= Signal(reset_less
=True)
95 # FAST regfile read /write ports for PC, MSR, DEC/TB
96 staterf
= self
.core
.regs
.rf
['state']
97 self
.state_r_pc
= staterf
.r_ports
['cia'] # PC rd
98 self
.state_w_pc
= staterf
.w_ports
['d_wr1'] # PC wr
99 self
.state_r_msr
= staterf
.r_ports
['msr'] # MSR rd
101 # DMI interface access
102 intrf
= self
.core
.regs
.rf
['int']
103 crrf
= self
.core
.regs
.rf
['cr']
104 xerrf
= self
.core
.regs
.rf
['xer']
105 self
.int_r
= intrf
.r_ports
['dmi'] # INT read
106 self
.cr_r
= crrf
.r_ports
['full_cr_dbg'] # CR read
107 self
.xer_r
= xerrf
.r_ports
['full_xer'] # XER read
109 # hack method of keeping an eye on whether branch/trap set the PC
110 self
.state_nia
= self
.core
.regs
.rf
['state'].w_ports
['nia']
111 self
.state_nia
.wen
.name
= 'state_nia_wen'
113 def elaborate(self
, platform
):
115 comb
, sync
= m
.d
.comb
, m
.d
.sync
117 m
.submodules
.core
= core
= DomainRenamer("coresync")(self
.core
)
118 m
.submodules
.imem
= imem
= self
.imem
119 m
.submodules
.dbg
= dbg
= self
.dbg
121 m
.submodules
.jtag
= jtag
= self
.jtag
122 # TODO: UART2GDB mux, here, from external pin
123 # see https://bugs.libre-soc.org/show_bug.cgi?id=499
124 sync
+= dbg
.dmi
.connect_to(jtag
.dmi
)
126 cur_state
= self
.cur_state
128 # XICS interrupt handler
130 m
.submodules
.xics_icp
= icp
= self
.xics_icp
131 m
.submodules
.xics_ics
= ics
= self
.xics_ics
132 comb
+= icp
.ics_i
.eq(ics
.icp_o
) # connect ICS to ICP
133 sync
+= cur_state
.eint
.eq(icp
.core_irq_o
) # connect ICP to core
135 # GPIO test peripheral
137 m
.submodules
.simple_gpio
= simple_gpio
= self
.simple_gpio
139 # connect one GPIO output to ICS bit 15 (like in microwatt soc.vhdl)
140 if self
.gpio
and self
.xics
:
141 comb
+= self
.int_level_i
[15].eq(simple_gpio
.gpio_o
[0])
143 # instruction decoder
144 pdecode
= create_pdecode()
145 m
.submodules
.dec2
= pdecode2
= self
.pdecode2
148 dmi
, d_reg
, d_cr
, d_xer
, = dbg
.dmi
, dbg
.d_gpr
, dbg
.d_cr
, dbg
.d_xer
149 intrf
= self
.core
.regs
.rf
['int']
151 # clock delay power-on reset
152 cd_por
= ClockDomain(reset_less
=True)
153 cd_sync
= ClockDomain()
154 core_sync
= ClockDomain("coresync")
155 m
.domains
+= cd_por
, cd_sync
, core_sync
157 delay
= Signal(range(4), reset
=3)
158 with m
.If(delay
!= 0):
159 m
.d
.por
+= delay
.eq(delay
- 1)
160 comb
+= cd_por
.clk
.eq(ClockSignal())
161 comb
+= core_sync
.clk
.eq(ClockSignal())
162 # power-on reset delay
163 comb
+= core
.core_reset_i
.eq(delay
!= 0 | dbg
.core_rst_o
)
165 # busy/halted signals from core
166 comb
+= self
.busy_o
.eq(core
.busy_o
)
167 comb
+= pdecode2
.dec
.bigendian
.eq(self
.core_bigendian_i
)
169 # temporary hack: says "go" immediately for both address gen and ST
171 ldst
= core
.fus
.fus
['ldst0']
172 st_go_edge
= rising_edge(m
, ldst
.st
.rel_o
)
173 m
.d
.comb
+= ldst
.ad
.go_i
.eq(ldst
.ad
.rel_o
) # link addr-go direct to rel
174 m
.d
.comb
+= ldst
.st
.go_i
.eq(st_go_edge
) # link store-go to rising rel
176 # PC and instruction from I-Memory
177 pc_changed
= Signal() # note write to PC
178 comb
+= self
.pc_o
.eq(cur_state
.pc
)
181 # next instruction (+4 on current)
182 nia
= Signal(64, reset_less
=True)
183 comb
+= nia
.eq(cur_state
.pc
+ 4)
186 pc
= Signal(64, reset_less
=True)
187 pc_ok_delay
= Signal()
188 sync
+= pc_ok_delay
.eq(~self
.pc_i
.ok
)
189 with m
.If(self
.pc_i
.ok
):
190 # incoming override (start from pc_i)
191 comb
+= pc
.eq(self
.pc_i
.data
)
193 # otherwise read StateRegs regfile for PC...
194 comb
+= self
.state_r_pc
.ren
.eq(1<<StateRegs
.PC
)
195 # ... but on a 1-clock delay
196 with m
.If(pc_ok_delay
):
197 comb
+= pc
.eq(self
.state_r_pc
.data_o
)
199 # don't write pc every cycle
200 comb
+= self
.state_w_pc
.wen
.eq(0)
201 comb
+= self
.state_w_pc
.data_i
.eq(0)
203 # don't read msr every cycle
204 comb
+= self
.state_r_msr
.ren
.eq(0)
205 msr_read
= Signal(reset
=1)
207 # connect up debug signals
208 # TODO comb += core.icache_rst_i.eq(dbg.icache_rst_o)
209 comb
+= dbg
.terminate_i
.eq(core
.core_terminate_o
)
210 comb
+= dbg
.state
.pc
.eq(pc
)
211 #comb += dbg.state.pc.eq(cur_state.pc)
212 comb
+= dbg
.state
.msr
.eq(cur_state
.msr
)
215 core_busy_o
= core
.busy_o
# core is busy
216 core_ivalid_i
= core
.ivalid_i
# instruction is valid
217 core_issue_i
= core
.issue_i
# instruction is issued
218 dec_opcode_i
= pdecode2
.dec
.raw_opcode_in
# raw opcode
220 insn_type
= core
.e
.do
.insn_type
222 # actually use a nmigen FSM for the first time (w00t)
223 # this FSM is perhaps unusual in that it detects conditions
224 # then "holds" information, combinatorially, for the core
225 # (as opposed to using sync - which would be on a clock's delay)
226 # this includes the actual opcode, valid flags and so on.
230 with m
.State("IDLE"):
231 sync
+= pc_changed
.eq(0)
233 sync
+= core
.raw_insn_i
.eq(0)
234 sync
+= core
.bigendian_i
.eq(0)
235 with m
.If(~dbg
.core_stop_o
& ~core
.core_reset_i
):
236 # instruction allowed to go: start by reading the PC
237 # capture the PC and also drop it into Insn Memory
238 # we have joined a pair of combinatorial memory
239 # lookups together. this is Generally Bad.
240 comb
+= self
.imem
.a_pc_i
.eq(pc
)
241 comb
+= self
.imem
.a_valid_i
.eq(1)
242 comb
+= self
.imem
.f_valid_i
.eq(1)
243 sync
+= cur_state
.pc
.eq(pc
)
245 # initiate read of MSR. arrives one clock later
246 comb
+= self
.state_r_msr
.ren
.eq(1<<StateRegs
.MSR
)
247 sync
+= msr_read
.eq(0)
249 m
.next
= "INSN_READ" # move to "wait for bus" phase
251 comb
+= core
.core_stopped_i
.eq(1)
252 comb
+= dbg
.core_stopped_i
.eq(1)
254 # dummy pause to find out why simulation is not keeping up
255 with m
.State("INSN_READ"):
256 # one cycle later, msr read arrives. valid only once.
257 with m
.If(~msr_read
):
258 sync
+= msr_read
.eq(1) # yeah don't read it again
259 sync
+= cur_state
.msr
.eq(self
.state_r_msr
.data_o
)
260 with m
.If(self
.imem
.f_busy_o
): # zzz...
261 # busy: stay in wait-read
262 comb
+= self
.imem
.a_valid_i
.eq(1)
263 comb
+= self
.imem
.f_valid_i
.eq(1)
265 # not busy: instruction fetched
266 f_instr_o
= self
.imem
.f_instr_o
267 if f_instr_o
.width
== 32:
270 insn
= f_instr_o
.word_select(cur_state
.pc
[2], 32)
271 comb
+= dec_opcode_i
.eq(insn
) # actual opcode
272 sync
+= core
.e
.eq(pdecode2
.e
)
273 sync
+= core
.state
.eq(cur_state
)
274 sync
+= core
.raw_insn_i
.eq(dec_opcode_i
)
275 sync
+= core
.bigendian_i
.eq(self
.core_bigendian_i
)
276 sync
+= ilatch
.eq(insn
) # latch current insn
277 # also drop PC and MSR into decode "state"
278 m
.next
= "INSN_START" # move to "start"
280 # waiting for instruction bus (stays there until not busy)
281 with m
.State("INSN_START"):
282 comb
+= core_ivalid_i
.eq(1) # instruction is valid
283 comb
+= core_issue_i
.eq(1) # and issued
285 m
.next
= "INSN_ACTIVE" # move to "wait completion"
287 # instruction started: must wait till it finishes
288 with m
.State("INSN_ACTIVE"):
289 with m
.If(insn_type
!= MicrOp
.OP_NOP
):
290 comb
+= core_ivalid_i
.eq(1) # instruction is valid
291 with m
.If(self
.state_nia
.wen
& (1<<StateRegs
.PC
)):
292 sync
+= pc_changed
.eq(1)
293 with m
.If(~core_busy_o
): # instruction done!
294 # ok here we are not reading the branch unit. TODO
295 # this just blithely overwrites whatever pipeline
297 with m
.If(~pc_changed
):
298 comb
+= self
.state_w_pc
.wen
.eq(1<<StateRegs
.PC
)
299 comb
+= self
.state_w_pc
.data_i
.eq(nia
)
301 sync
+= core
.raw_insn_i
.eq(0)
302 sync
+= core
.bigendian_i
.eq(0)
303 m
.next
= "IDLE" # back to idle
305 # this bit doesn't have to be in the FSM: connect up to read
306 # regfiles on demand from DMI
307 with m
.If(d_reg
.req
): # request for regfile access being made
308 # TODO: error-check this
309 # XXX should this be combinatorial? sync better?
311 comb
+= self
.int_r
.ren
.eq(1<<d_reg
.addr
)
313 comb
+= self
.int_r
.addr
.eq(d_reg
.addr
)
314 comb
+= self
.int_r
.ren
.eq(1)
315 d_reg_delay
= Signal()
316 sync
+= d_reg_delay
.eq(d_reg
.req
)
317 with m
.If(d_reg_delay
):
318 # data arrives one clock later
319 comb
+= d_reg
.data
.eq(self
.int_r
.data_o
)
320 comb
+= d_reg
.ack
.eq(1)
322 # sigh same thing for CR debug
323 with m
.If(d_cr
.req
): # request for regfile access being made
324 comb
+= self
.cr_r
.ren
.eq(0b11111111) # enable all
325 d_cr_delay
= Signal()
326 sync
+= d_cr_delay
.eq(d_cr
.req
)
327 with m
.If(d_cr_delay
):
328 # data arrives one clock later
329 comb
+= d_cr
.data
.eq(self
.cr_r
.data_o
)
330 comb
+= d_cr
.ack
.eq(1)
333 with m
.If(d_xer
.req
): # request for regfile access being made
334 comb
+= self
.xer_r
.ren
.eq(0b111111) # enable all
335 d_xer_delay
= Signal()
336 sync
+= d_xer_delay
.eq(d_xer
.req
)
337 with m
.If(d_xer_delay
):
338 # data arrives one clock later
339 comb
+= d_xer
.data
.eq(self
.xer_r
.data_o
)
340 comb
+= d_xer
.ack
.eq(1)
342 # DEC and TB inc/dec FSM
343 self
.tb_dec_fsm(m
, cur_state
.dec
)
347 def tb_dec_fsm(self
, m
, spr_dec
):
350 this is a FSM for updating either dec or tb. it runs alternately
351 DEC, TB, DEC, TB. note that SPR pipeline could have written a new
352 value to DEC, however the regfile has "passthrough" on it so this
355 see v3.0B p1097-1099 for Timeer Resource and p1065 and p1076
358 comb
, sync
= m
.d
.comb
, m
.d
.sync
359 fast_rf
= self
.core
.regs
.rf
['fast']
360 fast_r_dectb
= fast_rf
.r_ports
['issue'] # DEC/TB
361 fast_w_dectb
= fast_rf
.w_ports
['issue'] # DEC/TB
365 # initiates read of current DEC
366 with m
.State("DEC_READ"):
367 comb
+= fast_r_dectb
.addr
.eq(FastRegs
.DEC
)
368 comb
+= fast_r_dectb
.ren
.eq(1)
371 # waits for DEC read to arrive (1 cycle), updates with new value
372 with m
.State("DEC_WRITE"):
374 # TODO: MSR.LPCR 32-bit decrement mode
375 comb
+= new_dec
.eq(fast_r_dectb
.data_o
- 1)
376 comb
+= fast_w_dectb
.addr
.eq(FastRegs
.DEC
)
377 comb
+= fast_w_dectb
.wen
.eq(1)
378 comb
+= fast_w_dectb
.data_i
.eq(new_dec
)
379 sync
+= spr_dec
.eq(new_dec
) # copy into cur_state for decoder
382 # initiates read of current TB
383 with m
.State("TB_READ"):
384 comb
+= fast_r_dectb
.addr
.eq(FastRegs
.TB
)
385 comb
+= fast_r_dectb
.ren
.eq(1)
388 # waits for read TB to arrive, initiates write of current TB
389 with m
.State("TB_WRITE"):
391 comb
+= new_tb
.eq(fast_r_dectb
.data_o
+ 1)
392 comb
+= fast_w_dectb
.addr
.eq(FastRegs
.TB
)
393 comb
+= fast_w_dectb
.wen
.eq(1)
394 comb
+= fast_w_dectb
.data_i
.eq(new_tb
)
400 yield from self
.pc_i
.ports()
403 yield from self
.core
.ports()
404 yield from self
.imem
.ports()
405 yield self
.core_bigendian_i
411 def external_ports(self
):
412 ports
= self
.pc_i
.ports()
413 ports
+= [self
.pc_o
, self
.memerr_o
, self
.core_bigendian_i
, self
.busy_o
,
417 ports
+= list(self
.jtag
.external_ports())
419 # don't add DMI if JTAG is enabled
420 ports
+= list(self
.dbg
.dmi
.ports())
422 ports
+= list(self
.imem
.ibus
.fields
.values())
423 ports
+= list(self
.core
.l0
.cmpi
.lsmem
.lsi
.slavebus
.fields
.values())
426 ports
+= list(self
.xics_icp
.bus
.fields
.values())
427 ports
+= list(self
.xics_ics
.bus
.fields
.values())
428 ports
.append(self
.int_level_i
)
431 ports
+= list(self
.simple_gpio
.bus
.fields
.values())
432 ports
.append(self
.gpio_o
)
440 class TestIssuer(Elaboratable
):
441 def __init__(self
, pspec
):
442 self
.ti
= TestIssuerInternal(pspec
)
443 self
.pll
= DummyPLL()
444 self
.clksel
= ClockSelect()
446 def elaborate(self
, platform
):
450 # TestIssuer runs at internal clock rate
451 m
.submodules
.ti
= ti
= DomainRenamer("intclk")(self
.ti
)
452 # ClockSelect runs at PLL output internal clock rate
453 m
.submodules
.clksel
= clksel
= DomainRenamer("pllclk")(self
.clksel
)
454 m
.submodules
.pll
= pll
= self
.pll
456 # add 2 clock domains established above...
457 cd_int
= ClockDomain("intclk")
458 cd_pll
= ClockDomain("pllclk")
459 # probably don't have to add cd_int because of DomainRenamer("coresync")
462 # internal clock is set to selector clock-out. has the side-effect of
463 # running TestIssuer at this speed (see DomainRenamer("intclk") above)
464 comb
+= cd_int
.clk
.eq(clksel
.core_clk_o
)
466 # PLL clock established. has the side-effect of running clklsel
467 # at the PLL's speed (see DomainRenamer("pllclk") above)
468 comb
+= cd_pll
.clk
.eq(pll
.clk_pll_o
)
470 # wire up external 24mhz to PLL and clksel
471 comb
+= clksel
.clk_24_i
.eq(ClockSignal())
472 comb
+= pll
.clk_24_i
.eq(clksel
.clk_24_i
)
474 # now wire up ResetSignals. don't mind them all being in this domain
475 comb
+= pll
.rst
.eq(ResetSignal())
476 comb
+= clksel
.rst
.eq(ResetSignal())
481 return list(self
.ti
.ports()) + list(self
.pll
.ports()) + \
482 [ClockSignal(), ResetSignal()] + \
483 list(self
.clksel
.ports())
485 def external_ports(self
):
486 ports
= self
.ti
.external_ports()
487 #ports.append(ClockSignal())
488 #ports.append(ResetSignal())
489 ports
.append(self
.clksel
.clk_sel_i
)
490 ports
.append(self
.clksel
.pll_48_o
)
494 if __name__
== '__main__':
495 units
= {'alu': 1, 'cr': 1, 'branch': 1, 'trap': 1, 'logical': 1,
501 pspec
= TestMemPspec(ldst_ifacetype
='bare_wb',
502 imem_ifacetype
='bare_wb',
507 dut
= TestIssuer(pspec
)
508 vl
= main(dut
, ports
=dut
.ports(), name
="test_issuer")
510 if len(sys
.argv
) == 1:
511 vl
= rtlil
.convert(dut
, ports
=dut
.external_ports(), name
="test_issuer")
512 with
open("test_issuer.il", "w") as f
: