3 not in any way intended for production use. this runs a FSM that:
5 * reads the Program Counter from StateRegs
6 * reads an instruction from a fixed-size Test Memory
7 * issues it to the Simple Core
8 * waits for it to complete
10 * does it all over again
12 the purpose of this module is to verify the functional correctness
13 of the Function Units in the absolute simplest and clearest possible
14 way, and to at provide something that can be further incrementally
18 from nmigen
import (Elaboratable
, Module
, Signal
, ClockSignal
, ResetSignal
,
19 ClockDomain
, DomainRenamer
, Mux
, Const
, Repl
, Cat
)
20 from nmigen
.cli
import rtlil
21 from nmigen
.cli
import main
24 from nmutil
.singlepipe
import ControlBase
25 from soc
.simple
.core_data
import FetchOutput
, FetchInput
27 from nmigen
.lib
.coding
import PriorityEncoder
29 from openpower
.decoder
.power_decoder
import create_pdecode
30 from openpower
.decoder
.power_decoder2
import PowerDecode2
, SVP64PrefixDecoder
31 from openpower
.decoder
.decode2execute1
import IssuerDecode2ToOperand
32 from openpower
.decoder
.decode2execute1
import Data
33 from openpower
.decoder
.power_enums
import (MicrOp
, SVP64PredInt
, SVP64PredCR
,
35 from openpower
.state
import CoreState
36 from openpower
.consts
import (CR
, SVP64CROffs
, MSR
)
37 from soc
.experiment
.testmem
import TestMemory
# test only for instructions
38 from soc
.regfile
.regfiles
import StateRegs
, FastRegs
39 from soc
.simple
.core
import NonProductionCore
40 from soc
.config
.test
.test_loadstore
import TestMemPspec
41 from soc
.config
.ifetch
import ConfigFetchUnit
42 from soc
.debug
.dmi
import CoreDebug
, DMIInterface
43 from soc
.debug
.jtag
import JTAG
44 from soc
.config
.pinouts
import get_pinspecs
45 from soc
.interrupts
.xics
import XICS_ICP
, XICS_ICS
46 from soc
.bus
.simple_gpio
import SimpleGPIO
47 from soc
.bus
.SPBlock512W64B8W
import SPBlock512W64B8W
48 from soc
.clock
.select
import ClockSelect
49 from soc
.clock
.dummypll
import DummyPLL
50 from openpower
.sv
.svstate
import SVSTATERec
51 from soc
.experiment
.icache
import ICache
53 from nmutil
.util
import rising_edge
56 def get_insn(f_instr_o
, pc
):
57 if f_instr_o
.width
== 32:
60 # 64-bit: bit 2 of pc decides which word to select
61 return f_instr_o
.word_select(pc
[2], 32)
63 # gets state input or reads from state regfile
66 def state_get(m
, res
, core_rst
, state_i
, name
, regfile
, regnum
):
69 # read the {insert state variable here}
70 res_ok_delay
= Signal(name
="%s_ok_delay" % name
)
72 sync
+= res_ok_delay
.eq(~state_i
.ok
)
73 with m
.If(state_i
.ok
):
74 # incoming override (start from pc_i)
75 comb
+= res
.eq(state_i
.data
)
77 # otherwise read StateRegs regfile for {insert state here}...
78 comb
+= regfile
.ren
.eq(1 << regnum
)
79 # ... but on a 1-clock delay
80 with m
.If(res_ok_delay
):
81 comb
+= res
.eq(regfile
.o_data
)
84 def get_predint(m
, mask
, name
):
85 """decode SVP64 predicate integer mask field to reg number and invert
86 this is identical to the equivalent function in ISACaller except that
87 it doesn't read the INT directly, it just decodes "what needs to be done"
88 i.e. which INT reg, whether it is shifted and whether it is bit-inverted.
90 * all1s is set to indicate that no mask is to be applied.
91 * regread indicates the GPR register number to be read
92 * invert is set to indicate that the register value is to be inverted
93 * unary indicates that the contents of the register is to be shifted 1<<r3
96 regread
= Signal(5, name
=name
+"regread")
97 invert
= Signal(name
=name
+"invert")
98 unary
= Signal(name
=name
+"unary")
99 all1s
= Signal(name
=name
+"all1s")
101 with m
.Case(SVP64PredInt
.ALWAYS
.value
):
102 comb
+= all1s
.eq(1) # use 0b1111 (all ones)
103 with m
.Case(SVP64PredInt
.R3_UNARY
.value
):
104 comb
+= regread
.eq(3)
105 comb
+= unary
.eq(1) # 1<<r3 - shift r3 (single bit)
106 with m
.Case(SVP64PredInt
.R3
.value
):
107 comb
+= regread
.eq(3)
108 with m
.Case(SVP64PredInt
.R3_N
.value
):
109 comb
+= regread
.eq(3)
111 with m
.Case(SVP64PredInt
.R10
.value
):
112 comb
+= regread
.eq(10)
113 with m
.Case(SVP64PredInt
.R10_N
.value
):
114 comb
+= regread
.eq(10)
116 with m
.Case(SVP64PredInt
.R30
.value
):
117 comb
+= regread
.eq(30)
118 with m
.Case(SVP64PredInt
.R30_N
.value
):
119 comb
+= regread
.eq(30)
121 return regread
, invert
, unary
, all1s
124 def get_predcr(m
, mask
, name
):
125 """decode SVP64 predicate CR to reg number field and invert status
126 this is identical to _get_predcr in ISACaller
129 idx
= Signal(2, name
=name
+"idx")
130 invert
= Signal(name
=name
+"crinvert")
132 with m
.Case(SVP64PredCR
.LT
.value
):
133 comb
+= idx
.eq(CR
.LT
)
135 with m
.Case(SVP64PredCR
.GE
.value
):
136 comb
+= idx
.eq(CR
.LT
)
138 with m
.Case(SVP64PredCR
.GT
.value
):
139 comb
+= idx
.eq(CR
.GT
)
141 with m
.Case(SVP64PredCR
.LE
.value
):
142 comb
+= idx
.eq(CR
.GT
)
144 with m
.Case(SVP64PredCR
.EQ
.value
):
145 comb
+= idx
.eq(CR
.EQ
)
147 with m
.Case(SVP64PredCR
.NE
.value
):
148 comb
+= idx
.eq(CR
.EQ
)
150 with m
.Case(SVP64PredCR
.SO
.value
):
151 comb
+= idx
.eq(CR
.SO
)
153 with m
.Case(SVP64PredCR
.NS
.value
):
154 comb
+= idx
.eq(CR
.SO
)
159 class TestIssuerBase(Elaboratable
):
160 """TestIssuerBase - common base class for Issuers
162 takes care of power-on reset, peripherals, debug, DEC/TB,
163 and gets PC/MSR/SVSTATE from the State Regfile etc.
166 def __init__(self
, pspec
):
168 # test if microwatt compatibility is to be enabled
169 self
.microwatt_compat
= (hasattr(pspec
, "microwatt_compat") and
170 (pspec
.microwatt_compat
== True))
171 self
.alt_reset
= Signal(reset_less
=True) # not connected yet (microwatt)
173 # test is SVP64 is to be enabled
174 self
.svp64_en
= hasattr(pspec
, "svp64") and (pspec
.svp64
== True)
176 # and if regfiles are reduced
177 self
.regreduce_en
= (hasattr(pspec
, "regreduce") and
178 (pspec
.regreduce
== True))
180 # and if overlap requested
181 self
.allow_overlap
= (hasattr(pspec
, "allow_overlap") and
182 (pspec
.allow_overlap
== True))
184 # and get the core domain
185 self
.core_domain
= "coresync"
186 if (hasattr(pspec
, "core_domain") and
187 isinstance(pspec
.core_domain
, str)):
188 self
.core_domain
= pspec
.core_domain
190 # JTAG interface. add this right at the start because if it's
191 # added it *modifies* the pspec, by adding enable/disable signals
192 # for parts of the rest of the core
193 self
.jtag_en
= hasattr(pspec
, "debug") and pspec
.debug
== 'jtag'
194 #self.dbg_domain = "sync" # sigh "dbgsunc" too problematic
195 self
.dbg_domain
= "dbgsync" # domain for DMI/JTAG clock
197 # XXX MUST keep this up-to-date with litex, and
198 # soc-cocotb-sim, and err.. all needs sorting out, argh
201 'eint', 'gpio', 'mspi0',
202 # 'mspi1', - disabled for now
203 # 'pwm', 'sd0', - disabled for now
205 self
.jtag
= JTAG(get_pinspecs(subset
=subset
),
206 domain
=self
.dbg_domain
)
207 # add signals to pspec to enable/disable icache and dcache
208 # (or data and intstruction wishbone if icache/dcache not included)
209 # https://bugs.libre-soc.org/show_bug.cgi?id=520
210 # TODO: do we actually care if these are not domain-synchronised?
211 # honestly probably not.
212 pspec
.wb_icache_en
= self
.jtag
.wb_icache_en
213 pspec
.wb_dcache_en
= self
.jtag
.wb_dcache_en
214 self
.wb_sram_en
= self
.jtag
.wb_sram_en
216 self
.wb_sram_en
= Const(1)
218 # add 4k sram blocks?
219 self
.sram4x4k
= (hasattr(pspec
, "sram4x4kblock") and
220 pspec
.sram4x4kblock
== True)
224 self
.sram4k
.append(SPBlock512W64B8W(name
="sram4k_%d" % i
,
228 # add interrupt controller?
229 self
.xics
= hasattr(pspec
, "xics") and pspec
.xics
== True
231 self
.xics_icp
= XICS_ICP()
232 self
.xics_ics
= XICS_ICS()
233 self
.int_level_i
= self
.xics_ics
.int_level_i
235 self
.ext_irq
= Signal()
237 # add GPIO peripheral?
238 self
.gpio
= hasattr(pspec
, "gpio") and pspec
.gpio
== True
240 self
.simple_gpio
= SimpleGPIO()
241 self
.gpio_o
= self
.simple_gpio
.gpio_o
243 # main instruction core. suitable for prototyping / demo only
244 self
.core
= core
= NonProductionCore(pspec
)
245 self
.core_rst
= ResetSignal(self
.core_domain
)
247 # instruction decoder. goes into Trap Record
248 #pdecode = create_pdecode()
249 self
.cur_state
= CoreState("cur") # current state (MSR/PC/SVSTATE)
250 self
.pdecode2
= PowerDecode2(None, state
=self
.cur_state
,
251 opkls
=IssuerDecode2ToOperand
,
252 svp64_en
=self
.svp64_en
,
253 regreduce_en
=self
.regreduce_en
)
254 pdecode
= self
.pdecode2
.dec
257 self
.svp64
= SVP64PrefixDecoder() # for decoding SVP64 prefix
259 self
.update_svstate
= Signal() # set this if updating svstate
260 self
.new_svstate
= new_svstate
= SVSTATERec("new_svstate")
262 # Test Instruction memory
263 if hasattr(core
, "icache"):
264 # XXX BLECH! use pspec to transfer the I-Cache to ConfigFetchUnit
265 # truly dreadful. needs a huge reorg.
266 pspec
.icache
= core
.icache
267 self
.imem
= ConfigFetchUnit(pspec
).fu
270 self
.dbg
= CoreDebug()
271 self
.dbg_rst_i
= Signal(reset_less
=True)
273 # instruction go/monitor
274 self
.pc_o
= Signal(64, reset_less
=True)
275 self
.pc_i
= Data(64, "pc_i") # set "ok" to indicate "please change me"
276 self
.msr_i
= Data(64, "msr_i") # set "ok" to indicate "please change me"
277 self
.svstate_i
= Data(64, "svstate_i") # ditto
278 self
.core_bigendian_i
= Signal() # TODO: set based on MSR.LE
279 self
.busy_o
= Signal(reset_less
=True)
280 self
.memerr_o
= Signal(reset_less
=True)
282 # STATE regfile read /write ports for PC, MSR, SVSTATE
283 staterf
= self
.core
.regs
.rf
['state']
284 self
.state_r_msr
= staterf
.r_ports
['msr'] # MSR rd
285 self
.state_r_pc
= staterf
.r_ports
['cia'] # PC rd
286 self
.state_r_sv
= staterf
.r_ports
['sv'] # SVSTATE rd
288 self
.state_w_msr
= staterf
.w_ports
['msr'] # MSR wr
289 self
.state_w_pc
= staterf
.w_ports
['d_wr1'] # PC wr
290 self
.state_w_sv
= staterf
.w_ports
['sv'] # SVSTATE wr
292 # DMI interface access
293 intrf
= self
.core
.regs
.rf
['int']
294 crrf
= self
.core
.regs
.rf
['cr']
295 xerrf
= self
.core
.regs
.rf
['xer']
296 self
.int_r
= intrf
.r_ports
['dmi'] # INT read
297 self
.cr_r
= crrf
.r_ports
['full_cr_dbg'] # CR read
298 self
.xer_r
= xerrf
.r_ports
['full_xer'] # XER read
302 self
.int_pred
= intrf
.r_ports
['pred'] # INT predicate read
303 self
.cr_pred
= crrf
.r_ports
['cr_pred'] # CR predicate read
305 # hack method of keeping an eye on whether branch/trap set the PC
306 self
.state_nia
= self
.core
.regs
.rf
['state'].w_ports
['nia']
307 self
.state_nia
.wen
.name
= 'state_nia_wen'
309 # pulse to synchronize the simulator at instruction end
310 self
.insn_done
= Signal()
312 # indicate any instruction still outstanding, in execution
313 self
.any_busy
= Signal()
316 # store copies of predicate masks
317 self
.srcmask
= Signal(64)
318 self
.dstmask
= Signal(64)
320 def setup_peripherals(self
, m
):
321 comb
, sync
= m
.d
.comb
, m
.d
.sync
323 # okaaaay so the debug module must be in coresync clock domain
324 # but NOT its reset signal. to cope with this, set every single
325 # submodule explicitly in coresync domain, debug and JTAG
326 # in their own one but using *external* reset.
327 csd
= DomainRenamer(self
.core_domain
)
328 dbd
= DomainRenamer(self
.dbg_domain
)
330 if self
.microwatt_compat
:
331 m
.submodules
.core
= core
= self
.core
333 m
.submodules
.core
= core
= csd(self
.core
)
334 # this _so_ needs sorting out. ICache is added down inside
335 # LoadStore1 and is already a submodule of LoadStore1
336 if not isinstance(self
.imem
, ICache
):
337 m
.submodules
.imem
= imem
= csd(self
.imem
)
338 if self
.microwatt_compat
:
339 m
.submodules
.dbg
= dbg
= self
.dbg
341 m
.submodules
.dbg
= dbg
= dbd(self
.dbg
)
343 m
.submodules
.jtag
= jtag
= dbd(self
.jtag
)
344 # TODO: UART2GDB mux, here, from external pin
345 # see https://bugs.libre-soc.org/show_bug.cgi?id=499
346 sync
+= dbg
.dmi
.connect_to(jtag
.dmi
)
348 cur_state
= self
.cur_state
350 # 4x 4k SRAM blocks. these simply "exist", they get routed in litex
352 for i
, sram
in enumerate(self
.sram4k
):
353 m
.submodules
["sram4k_%d" % i
] = csd(sram
)
354 comb
+= sram
.enable
.eq(self
.wb_sram_en
)
356 # XICS interrupt handler
358 m
.submodules
.xics_icp
= icp
= csd(self
.xics_icp
)
359 m
.submodules
.xics_ics
= ics
= csd(self
.xics_ics
)
360 comb
+= icp
.ics_i
.eq(ics
.icp_o
) # connect ICS to ICP
361 sync
+= cur_state
.eint
.eq(icp
.core_irq_o
) # connect ICP to core
363 sync
+= cur_state
.eint
.eq(self
.ext_irq
) # connect externally
365 # GPIO test peripheral
367 m
.submodules
.simple_gpio
= simple_gpio
= csd(self
.simple_gpio
)
369 # connect one GPIO output to ICS bit 15 (like in microwatt soc.vhdl)
370 # XXX causes litex ECP5 test to get wrong idea about input and output
371 # (but works with verilator sim *sigh*)
372 # if self.gpio and self.xics:
373 # comb += self.int_level_i[15].eq(simple_gpio.gpio_o[0])
375 # instruction decoder
376 pdecode
= create_pdecode()
377 m
.submodules
.dec2
= pdecode2
= csd(self
.pdecode2
)
379 m
.submodules
.svp64
= svp64
= csd(self
.svp64
)
382 dmi
, d_reg
, d_cr
, d_xer
, = dbg
.dmi
, dbg
.d_gpr
, dbg
.d_cr
, dbg
.d_xer
383 intrf
= self
.core
.regs
.rf
['int']
385 # clock delay power-on reset
386 cd_por
= ClockDomain(reset_less
=True)
387 cd_sync
= ClockDomain()
388 m
.domains
+= cd_por
, cd_sync
389 core_sync
= ClockDomain(self
.core_domain
)
390 if self
.core_domain
!= "sync":
391 m
.domains
+= core_sync
392 if self
.dbg_domain
!= "sync":
393 dbg_sync
= ClockDomain(self
.dbg_domain
)
394 m
.domains
+= dbg_sync
396 ti_rst
= Signal(reset_less
=True)
397 delay
= Signal(range(4), reset
=3)
398 with m
.If(delay
!= 0):
399 m
.d
.por
+= delay
.eq(delay
- 1)
400 comb
+= cd_por
.clk
.eq(ClockSignal())
402 # power-on reset delay
403 core_rst
= ResetSignal(self
.core_domain
)
404 if self
.core_domain
!= "sync":
405 comb
+= ti_rst
.eq(delay
!= 0 | dbg
.core_rst_o |
ResetSignal())
406 comb
+= core_rst
.eq(ti_rst
)
408 with m
.If(delay
!= 0 | dbg
.core_rst_o
):
409 comb
+= core_rst
.eq(1)
411 # connect external reset signal to DMI Reset
412 if self
.dbg_domain
!= "sync":
413 dbg_rst
= ResetSignal(self
.dbg_domain
)
414 comb
+= dbg_rst
.eq(self
.dbg_rst_i
)
416 # busy/halted signals from core
417 core_busy_o
= ~core
.p
.o_ready | core
.n
.o_data
.busy_o
# core is busy
418 comb
+= self
.busy_o
.eq(core_busy_o
)
419 comb
+= pdecode2
.dec
.bigendian
.eq(self
.core_bigendian_i
)
421 # temporary hack: says "go" immediately for both address gen and ST
423 ldst
= core
.fus
.fus
['ldst0']
424 st_go_edge
= rising_edge(m
, ldst
.st
.rel_o
)
425 # link addr-go direct to rel
426 m
.d
.comb
+= ldst
.ad
.go_i
.eq(ldst
.ad
.rel_o
)
427 m
.d
.comb
+= ldst
.st
.go_i
.eq(st_go_edge
) # link store-go to rising rel
429 def do_dmi(self
, m
, dbg
):
430 """deals with DMI debug requests
432 currently only provides read requests for the INT regfile, CR and XER
433 it will later also deal with *writing* to these regfiles.
437 dmi
, d_reg
, d_cr
, d_xer
, = dbg
.dmi
, dbg
.d_gpr
, dbg
.d_cr
, dbg
.d_xer
438 intrf
= self
.core
.regs
.rf
['int']
440 with m
.If(d_reg
.req
): # request for regfile access being made
441 # TODO: error-check this
442 # XXX should this be combinatorial? sync better?
444 comb
+= self
.int_r
.ren
.eq(1 << d_reg
.addr
)
446 comb
+= self
.int_r
.addr
.eq(d_reg
.addr
)
447 comb
+= self
.int_r
.ren
.eq(1)
448 d_reg_delay
= Signal()
449 sync
+= d_reg_delay
.eq(d_reg
.req
)
450 with m
.If(d_reg_delay
):
451 # data arrives one clock later
452 comb
+= d_reg
.data
.eq(self
.int_r
.o_data
)
453 comb
+= d_reg
.ack
.eq(1)
455 # sigh same thing for CR debug
456 with m
.If(d_cr
.req
): # request for regfile access being made
457 comb
+= self
.cr_r
.ren
.eq(0b11111111) # enable all
458 d_cr_delay
= Signal()
459 sync
+= d_cr_delay
.eq(d_cr
.req
)
460 with m
.If(d_cr_delay
):
461 # data arrives one clock later
462 comb
+= d_cr
.data
.eq(self
.cr_r
.o_data
)
463 comb
+= d_cr
.ack
.eq(1)
466 with m
.If(d_xer
.req
): # request for regfile access being made
467 comb
+= self
.xer_r
.ren
.eq(0b111111) # enable all
468 d_xer_delay
= Signal()
469 sync
+= d_xer_delay
.eq(d_xer
.req
)
470 with m
.If(d_xer_delay
):
471 # data arrives one clock later
472 comb
+= d_xer
.data
.eq(self
.xer_r
.o_data
)
473 comb
+= d_xer
.ack
.eq(1)
475 def tb_dec_fsm(self
, m
, spr_dec
):
478 this is a FSM for updating either dec or tb. it runs alternately
479 DEC, TB, DEC, TB. note that SPR pipeline could have written a new
480 value to DEC, however the regfile has "passthrough" on it so this
483 see v3.0B p1097-1099 for Timeer Resource and p1065 and p1076
486 comb
, sync
= m
.d
.comb
, m
.d
.sync
487 fast_rf
= self
.core
.regs
.rf
['fast']
488 fast_r_dectb
= fast_rf
.r_ports
['issue'] # DEC/TB
489 fast_w_dectb
= fast_rf
.w_ports
['issue'] # DEC/TB
493 # initiates read of current DEC
494 with m
.State("DEC_READ"):
495 comb
+= fast_r_dectb
.addr
.eq(FastRegs
.DEC
)
496 comb
+= fast_r_dectb
.ren
.eq(1)
499 # waits for DEC read to arrive (1 cycle), updates with new value
500 with m
.State("DEC_WRITE"):
502 # TODO: MSR.LPCR 32-bit decrement mode
503 comb
+= new_dec
.eq(fast_r_dectb
.o_data
- 1)
504 comb
+= fast_w_dectb
.addr
.eq(FastRegs
.DEC
)
505 comb
+= fast_w_dectb
.wen
.eq(1)
506 comb
+= fast_w_dectb
.i_data
.eq(new_dec
)
507 sync
+= spr_dec
.eq(new_dec
) # copy into cur_state for decoder
510 # initiates read of current TB
511 with m
.State("TB_READ"):
512 comb
+= fast_r_dectb
.addr
.eq(FastRegs
.TB
)
513 comb
+= fast_r_dectb
.ren
.eq(1)
516 # waits for read TB to arrive, initiates write of current TB
517 with m
.State("TB_WRITE"):
519 comb
+= new_tb
.eq(fast_r_dectb
.o_data
+ 1)
520 comb
+= fast_w_dectb
.addr
.eq(FastRegs
.TB
)
521 comb
+= fast_w_dectb
.wen
.eq(1)
522 comb
+= fast_w_dectb
.i_data
.eq(new_tb
)
527 def elaborate(self
, platform
):
530 comb
, sync
= m
.d
.comb
, m
.d
.sync
531 cur_state
= self
.cur_state
532 pdecode2
= self
.pdecode2
535 # set up peripherals and core
536 core_rst
= self
.core_rst
537 self
.setup_peripherals(m
)
539 # reset current state if core reset requested
541 m
.d
.sync
+= self
.cur_state
.eq(0)
543 # check halted condition: requested PC to execute matches DMI stop addr
544 # and immediately stop. address of 0xffff_ffff_ffff_ffff can never
547 comb
+= halted
.eq(dbg
.stop_addr_o
== dbg
.state
.pc
)
549 comb
+= dbg
.core_stopped_i
.eq(1)
550 comb
+= dbg
.terminate_i
.eq(1)
552 # PC and instruction from I-Memory
553 comb
+= self
.pc_o
.eq(cur_state
.pc
)
554 self
.pc_changed
= Signal() # note write to PC
555 self
.msr_changed
= Signal() # note write to MSR
556 self
.sv_changed
= Signal() # note write to SVSTATE
558 # read state either from incoming override or from regfile
559 state
= CoreState("get") # current state (MSR/PC/SVSTATE)
560 state_get(m
, state
.msr
, core_rst
, self
.msr_i
,
562 self
.state_r_msr
, StateRegs
.MSR
)
563 state_get(m
, state
.pc
, core_rst
, self
.pc_i
,
565 self
.state_r_pc
, StateRegs
.PC
)
566 state_get(m
, state
.svstate
, core_rst
, self
.svstate_i
,
567 "svstate", # read SVSTATE
568 self
.state_r_sv
, StateRegs
.SVSTATE
)
570 # don't write pc every cycle
571 comb
+= self
.state_w_pc
.wen
.eq(0)
572 comb
+= self
.state_w_pc
.i_data
.eq(0)
574 # connect up debug state. note "combinatorially same" below,
575 # this is a bit naff, passing state over in the dbg class, but
576 # because it is combinatorial it achieves the desired goal
577 comb
+= dbg
.state
.eq(state
)
579 # this bit doesn't have to be in the FSM: connect up to read
580 # regfiles on demand from DMI
583 # DEC and TB inc/dec FSM. copy of DEC is put into CoreState,
584 # (which uses that in PowerDecoder2 to raise 0x900 exception)
585 self
.tb_dec_fsm(m
, cur_state
.dec
)
587 # while stopped, allow updating the MSR, PC and SVSTATE.
588 # these are mainly for debugging purposes (including DMI/JTAG)
589 with m
.If(dbg
.core_stopped_i
):
590 with m
.If(self
.pc_i
.ok
):
591 comb
+= self
.state_w_pc
.wen
.eq(1 << StateRegs
.PC
)
592 comb
+= self
.state_w_pc
.i_data
.eq(self
.pc_i
.data
)
593 sync
+= self
.pc_changed
.eq(1)
594 with m
.If(self
.msr_i
.ok
):
595 comb
+= self
.state_w_msr
.wen
.eq(1 << StateRegs
.MSR
)
596 comb
+= self
.state_w_msr
.i_data
.eq(self
.msr_i
.data
)
597 sync
+= self
.msr_changed
.eq(1)
598 with m
.If(self
.svstate_i
.ok | self
.update_svstate
):
599 with m
.If(self
.svstate_i
.ok
): # over-ride from external source
600 comb
+= self
.new_svstate
.eq(self
.svstate_i
.data
)
601 comb
+= self
.state_w_sv
.wen
.eq(1 << StateRegs
.SVSTATE
)
602 comb
+= self
.state_w_sv
.i_data
.eq(self
.new_svstate
)
603 sync
+= self
.sv_changed
.eq(1)
605 # start renaming some of the ports to match microwatt
606 if self
.microwatt_compat
:
607 self
.core
.o
.core_terminate_o
.name
= "terminated_out"
608 # names of DMI interface
609 self
.dbg
.dmi
.addr_i
.name
= 'dmi_addr'
610 self
.dbg
.dmi
.din
.name
= 'dmi_din'
611 self
.dbg
.dmi
.dout
.name
= 'dmi_dout'
612 self
.dbg
.dmi
.req_i
.name
= 'dmi_req'
613 self
.dbg
.dmi
.we_i
.name
= 'dmi_wr'
614 self
.dbg
.dmi
.ack_o
.name
= 'dmi_ack'
615 # wishbone instruction bus
616 ibus
= self
.imem
.ibus
617 ibus
.adr
.name
= 'wishbone_insn_out.adr'
618 ibus
.dat_w
.name
= 'wishbone_insn_out.dat'
619 ibus
.sel
.name
= 'wishbone_insn_out.sel'
620 ibus
.cyc
.name
= 'wishbone_insn_out.cyc'
621 ibus
.stb
.name
= 'wishbone_insn_out.stb'
622 ibus
.we
.name
= 'wishbone_insn_out.we'
623 ibus
.dat_r
.name
= 'wishbone_insn_in.dat'
624 ibus
.ack
.name
= 'wishbone_insn_in.ack'
625 ibus
.stall
.name
= 'wishbone_insn_in.stall'
627 dbus
= self
.core
.l0
.cmpi
.wb_bus()
628 dbus
.adr
.name
= 'wishbone_data_out.adr'
629 dbus
.dat_w
.name
= 'wishbone_data_out.dat'
630 dbus
.sel
.name
= 'wishbone_data_out.sel'
631 dbus
.cyc
.name
= 'wishbone_data_out.cyc'
632 dbus
.stb
.name
= 'wishbone_data_out.stb'
633 dbus
.we
.name
= 'wishbone_data_out.we'
634 dbus
.dat_r
.name
= 'wishbone_data_in.dat'
635 dbus
.ack
.name
= 'wishbone_data_in.ack'
636 dbus
.stall
.name
= 'wishbone_data_in.stall'
641 yield from self
.pc_i
.ports()
642 yield from self
.msr_i
.ports()
645 yield from self
.core
.ports()
646 yield from self
.imem
.ports()
647 yield self
.core_bigendian_i
653 def external_ports(self
):
654 if self
.microwatt_compat
:
655 ports
= [self
.core
.o
.core_terminate_o
,
657 self
.alt_reset
, # not connected yet
661 ports
+= list(self
.dbg
.dmi
.ports())
662 # for dbus/ibus microwatt, exclude err btw and cti
663 for name
, sig
in self
.imem
.ibus
.fields
.items():
664 if name
not in ['err', 'bte', 'cti']:
666 for name
, sig
in self
.core
.l0
.cmpi
.wb_bus().fields
.items():
667 if name
not in ['err', 'bte', 'cti']:
671 ports
= self
.pc_i
.ports()
672 ports
= self
.msr_i
.ports()
673 ports
+= [self
.pc_o
, self
.memerr_o
, self
.core_bigendian_i
, self
.busy_o
,
677 ports
+= list(self
.jtag
.external_ports())
679 # don't add DMI if JTAG is enabled
680 ports
+= list(self
.dbg
.dmi
.ports())
682 ports
+= list(self
.imem
.ibus
.fields
.values())
683 ports
+= list(self
.core
.l0
.cmpi
.wb_bus().fields
.values())
686 for sram
in self
.sram4k
:
687 ports
+= list(sram
.bus
.fields
.values())
690 ports
+= list(self
.xics_icp
.bus
.fields
.values())
691 ports
+= list(self
.xics_ics
.bus
.fields
.values())
692 ports
.append(self
.int_level_i
)
694 ports
.append(self
.ext_irq
)
697 ports
+= list(self
.simple_gpio
.bus
.fields
.values())
698 ports
.append(self
.gpio_o
)
707 # Fetch Finite State Machine.
708 # WARNING: there are currently DriverConflicts but it's actually working.
709 # TODO, here: everything that is global in nature, information from the
710 # main TestIssuerInternal, needs to move to either ispec() or ospec().
711 # not only that: TestIssuerInternal.imem can entirely move into here
712 # because imem is only ever accessed inside the FetchFSM.
713 class FetchFSM(ControlBase
):
714 def __init__(self
, allow_overlap
, svp64_en
, imem
, core_rst
,
716 dbg
, core
, svstate
, nia
, is_svp64_mode
):
717 self
.allow_overlap
= allow_overlap
718 self
.svp64_en
= svp64_en
720 self
.core_rst
= core_rst
721 self
.pdecode2
= pdecode2
722 self
.cur_state
= cur_state
725 self
.svstate
= svstate
727 self
.is_svp64_mode
= is_svp64_mode
729 # set up pipeline ControlBase and allocate i/o specs
730 # (unusual: normally done by the Pipeline API)
731 super().__init
__(stage
=self
)
732 self
.p
.i_data
, self
.n
.o_data
= self
.new_specs(None)
733 self
.i
, self
.o
= self
.p
.i_data
, self
.n
.o_data
735 # next 3 functions are Stage API Compliance
736 def setup(self
, m
, i
):
745 def elaborate(self
, platform
):
748 this FSM performs fetch of raw instruction data, partial-decodes
749 it 32-bit at a time to detect SVP64 prefixes, and will optionally
750 read a 2nd 32-bit quantity if that occurs.
752 m
= super().elaborate(platform
)
758 svstate
= self
.svstate
760 is_svp64_mode
= self
.is_svp64_mode
761 fetch_pc_o_ready
= self
.p
.o_ready
762 fetch_pc_i_valid
= self
.p
.i_valid
763 fetch_insn_o_valid
= self
.n
.o_valid
764 fetch_insn_i_ready
= self
.n
.i_ready
768 pdecode2
= self
.pdecode2
769 cur_state
= self
.cur_state
770 dec_opcode_o
= pdecode2
.dec
.raw_opcode_in
# raw opcode
772 # also note instruction fetch failed
773 if hasattr(core
, "icache"):
774 fetch_failed
= core
.icache
.i_out
.fetch_failed
777 fetch_failed
= Const(0, 1)
780 # set priv / virt mode on I-Cache, sigh
781 if isinstance(self
.imem
, ICache
):
782 comb
+= self
.imem
.i_in
.priv_mode
.eq(~msr
[MSR
.PR
])
783 comb
+= self
.imem
.i_in
.virt_mode
.eq(msr
[MSR
.IR
]) # Instr. Redir (VM)
785 with m
.FSM(name
='fetch_fsm'):
788 with m
.State("IDLE"):
789 # fetch allowed if not failed and stopped but not stepping
790 # (see dmi.py for how core_stop_o is generated)
791 with m
.If(~fetch_failed
& ~dbg
.core_stop_o
):
792 comb
+= fetch_pc_o_ready
.eq(1)
793 with m
.If(fetch_pc_i_valid
& ~pdecode2
.instr_fault
795 # instruction allowed to go: start by reading the PC
796 # capture the PC and also drop it into Insn Memory
797 # we have joined a pair of combinatorial memory
798 # lookups together. this is Generally Bad.
799 comb
+= self
.imem
.a_pc_i
.eq(pc
)
800 comb
+= self
.imem
.a_i_valid
.eq(1)
801 comb
+= self
.imem
.f_i_valid
.eq(1)
802 # transfer state to output
803 sync
+= cur_state
.pc
.eq(pc
)
804 sync
+= cur_state
.svstate
.eq(svstate
) # and svstate
805 sync
+= cur_state
.msr
.eq(msr
) # and msr
807 m
.next
= "INSN_READ" # move to "wait for bus" phase
809 # dummy pause to find out why simulation is not keeping up
810 with m
.State("INSN_READ"):
811 # when using "single-step" mode, checking dbg.stopping_o
812 # prevents progress. allow fetch to proceed once started
814 #if self.allow_overlap:
815 # stopping = dbg.stopping_o
817 # stopping: jump back to idle
820 with m
.If(self
.imem
.f_busy_o
&
821 ~pdecode2
.instr_fault
): # zzz...
822 # busy but not fetch failed: stay in wait-read
823 comb
+= self
.imem
.a_pc_i
.eq(pc
)
824 comb
+= self
.imem
.a_i_valid
.eq(1)
825 comb
+= self
.imem
.f_i_valid
.eq(1)
827 # not busy (or fetch failed!): instruction fetched
828 # when fetch failed, the instruction gets ignored
830 if hasattr(core
, "icache"):
831 # blech, icache returns actual instruction
832 insn
= self
.imem
.f_instr_o
834 # but these return raw memory
835 insn
= get_insn(self
.imem
.f_instr_o
, cur_state
.pc
)
838 # decode the SVP64 prefix, if any
839 comb
+= svp64
.raw_opcode_in
.eq(insn
)
840 comb
+= svp64
.bigendian
.eq(self
.core_bigendian_i
)
841 # pass the decoded prefix (if any) to PowerDecoder2
842 sync
+= pdecode2
.sv_rm
.eq(svp64
.svp64_rm
)
843 sync
+= pdecode2
.is_svp64_mode
.eq(is_svp64_mode
)
844 # remember whether this is a prefixed instruction,
845 # so the FSM can readily loop when VL==0
846 sync
+= is_svp64_mode
.eq(svp64
.is_svp64_mode
)
847 # calculate the address of the following instruction
848 insn_size
= Mux(svp64
.is_svp64_mode
, 8, 4)
849 sync
+= nia
.eq(cur_state
.pc
+ insn_size
)
850 with m
.If(~svp64
.is_svp64_mode
):
851 # with no prefix, store the instruction
852 # and hand it directly to the next FSM
853 sync
+= dec_opcode_o
.eq(insn
)
854 m
.next
= "INSN_READY"
856 # fetch the rest of the instruction from memory
857 comb
+= self
.imem
.a_pc_i
.eq(cur_state
.pc
+ 4)
858 comb
+= self
.imem
.a_i_valid
.eq(1)
859 comb
+= self
.imem
.f_i_valid
.eq(1)
860 m
.next
= "INSN_READ2"
862 # not SVP64 - 32-bit only
863 sync
+= nia
.eq(cur_state
.pc
+ 4)
864 sync
+= dec_opcode_o
.eq(insn
)
865 m
.next
= "INSN_READY"
867 with m
.State("INSN_READ2"):
868 with m
.If(self
.imem
.f_busy_o
): # zzz...
869 # busy: stay in wait-read
870 comb
+= self
.imem
.a_i_valid
.eq(1)
871 comb
+= self
.imem
.f_i_valid
.eq(1)
873 # not busy: instruction fetched
874 if hasattr(core
, "icache"):
875 # blech, icache returns actual instruction
876 insn
= self
.imem
.f_instr_o
878 insn
= get_insn(self
.imem
.f_instr_o
, cur_state
.pc
+4)
879 sync
+= dec_opcode_o
.eq(insn
)
880 m
.next
= "INSN_READY"
881 # TODO: probably can start looking at pdecode2.rm_dec
882 # here or maybe even in INSN_READ state, if svp64_mode
883 # detected, in order to trigger - and wait for - the
886 pmode
= pdecode2
.rm_dec
.predmode
888 if pmode != SVP64PredMode.ALWAYS.value:
889 fire predicate loading FSM and wait before
892 sync += self.srcmask.eq(-1) # set to all 1s
893 sync += self.dstmask.eq(-1) # set to all 1s
894 m.next = "INSN_READY"
897 with m
.State("INSN_READY"):
898 # hand over the instruction, to be decoded
899 comb
+= fetch_insn_o_valid
.eq(1)
900 with m
.If(fetch_insn_i_ready
):
903 # whatever was done above, over-ride it if core reset is held
904 with m
.If(self
.core_rst
):
910 class TestIssuerInternal(TestIssuerBase
):
911 """TestIssuer - reads instructions from TestMemory and issues them
913 efficiency and speed is not the main goal here: functional correctness
914 and code clarity is. optimisations (which almost 100% interfere with
915 easy understanding) come later.
918 def fetch_predicate_fsm(self
, m
,
919 pred_insn_i_valid
, pred_insn_o_ready
,
920 pred_mask_o_valid
, pred_mask_i_ready
):
921 """fetch_predicate_fsm - obtains (constructs in the case of CR)
922 src/dest predicate masks
924 https://bugs.libre-soc.org/show_bug.cgi?id=617
925 the predicates can be read here, by using IntRegs r_ports['pred']
926 or CRRegs r_ports['pred']. in the case of CRs it will have to
927 be done through multiple reads, extracting one relevant at a time.
928 later, a faster way would be to use the 32-bit-wide CR port but
929 this is more complex decoding, here. equivalent code used in
930 ISACaller is "from openpower.decoder.isa.caller import get_predcr"
932 note: this ENTIRE FSM is not to be called when svp64 is disabled
936 pdecode2
= self
.pdecode2
937 rm_dec
= pdecode2
.rm_dec
# SVP64RMModeDecode
938 predmode
= rm_dec
.predmode
939 srcpred
, dstpred
= rm_dec
.srcpred
, rm_dec
.dstpred
940 cr_pred
, int_pred
= self
.cr_pred
, self
.int_pred
# read regfiles
941 # get src/dst step, so we can skip already used mask bits
942 cur_state
= self
.cur_state
943 srcstep
= cur_state
.svstate
.srcstep
944 dststep
= cur_state
.svstate
.dststep
945 cur_vl
= cur_state
.svstate
.vl
948 sregread
, sinvert
, sunary
, sall1s
= get_predint(m
, srcpred
, 's')
949 dregread
, dinvert
, dunary
, dall1s
= get_predint(m
, dstpred
, 'd')
950 sidx
, scrinvert
= get_predcr(m
, srcpred
, 's')
951 didx
, dcrinvert
= get_predcr(m
, dstpred
, 'd')
953 # store fetched masks, for either intpred or crpred
954 # when src/dst step is not zero, the skipped mask bits need to be
955 # shifted-out, before actually storing them in src/dest mask
956 new_srcmask
= Signal(64, reset_less
=True)
957 new_dstmask
= Signal(64, reset_less
=True)
959 with m
.FSM(name
="fetch_predicate"):
961 with m
.State("FETCH_PRED_IDLE"):
962 comb
+= pred_insn_o_ready
.eq(1)
963 with m
.If(pred_insn_i_valid
):
964 with m
.If(predmode
== SVP64PredMode
.INT
):
965 # skip fetching destination mask register, when zero
967 sync
+= new_dstmask
.eq(-1)
968 # directly go to fetch source mask register
969 # guaranteed not to be zero (otherwise predmode
970 # would be SVP64PredMode.ALWAYS, not INT)
971 comb
+= int_pred
.addr
.eq(sregread
)
972 comb
+= int_pred
.ren
.eq(1)
973 m
.next
= "INT_SRC_READ"
974 # fetch destination predicate register
976 comb
+= int_pred
.addr
.eq(dregread
)
977 comb
+= int_pred
.ren
.eq(1)
978 m
.next
= "INT_DST_READ"
979 with m
.Elif(predmode
== SVP64PredMode
.CR
):
980 # go fetch masks from the CR register file
981 sync
+= new_srcmask
.eq(0)
982 sync
+= new_dstmask
.eq(0)
985 sync
+= self
.srcmask
.eq(-1)
986 sync
+= self
.dstmask
.eq(-1)
987 m
.next
= "FETCH_PRED_DONE"
989 with m
.State("INT_DST_READ"):
990 # store destination mask
991 inv
= Repl(dinvert
, 64)
993 # set selected mask bit for 1<<r3 mode
994 dst_shift
= Signal(range(64))
995 comb
+= dst_shift
.eq(self
.int_pred
.o_data
& 0b111111)
996 sync
+= new_dstmask
.eq(1 << dst_shift
)
998 # invert mask if requested
999 sync
+= new_dstmask
.eq(self
.int_pred
.o_data ^ inv
)
1000 # skip fetching source mask register, when zero
1002 sync
+= new_srcmask
.eq(-1)
1003 m
.next
= "FETCH_PRED_SHIFT_MASK"
1004 # fetch source predicate register
1006 comb
+= int_pred
.addr
.eq(sregread
)
1007 comb
+= int_pred
.ren
.eq(1)
1008 m
.next
= "INT_SRC_READ"
1010 with m
.State("INT_SRC_READ"):
1012 inv
= Repl(sinvert
, 64)
1014 # set selected mask bit for 1<<r3 mode
1015 src_shift
= Signal(range(64))
1016 comb
+= src_shift
.eq(self
.int_pred
.o_data
& 0b111111)
1017 sync
+= new_srcmask
.eq(1 << src_shift
)
1019 # invert mask if requested
1020 sync
+= new_srcmask
.eq(self
.int_pred
.o_data ^ inv
)
1021 m
.next
= "FETCH_PRED_SHIFT_MASK"
1023 # fetch masks from the CR register file
1024 # implements the following loop:
1025 # idx, inv = get_predcr(mask)
1027 # for cr_idx in range(vl):
1028 # cr = crl[cr_idx + SVP64CROffs.CRPred] # takes one cycle
1030 # mask |= 1 << cr_idx
1032 with m
.State("CR_READ"):
1033 # CR index to be read, which will be ready by the next cycle
1034 cr_idx
= Signal
.like(cur_vl
, reset_less
=True)
1035 # submit the read operation to the regfile
1036 with m
.If(cr_idx
!= cur_vl
):
1037 # the CR read port is unary ...
1039 # ... in MSB0 convention ...
1040 # ren = 1 << (7 - cr_idx)
1041 # ... and with an offset:
1042 # ren = 1 << (7 - off - cr_idx)
1043 idx
= SVP64CROffs
.CRPred
+ cr_idx
1044 comb
+= cr_pred
.ren
.eq(1 << (7 - idx
))
1045 # signal data valid in the next cycle
1046 cr_read
= Signal(reset_less
=True)
1047 sync
+= cr_read
.eq(1)
1048 # load the next index
1049 sync
+= cr_idx
.eq(cr_idx
+ 1)
1052 sync
+= cr_read
.eq(0)
1053 sync
+= cr_idx
.eq(0)
1054 m
.next
= "FETCH_PRED_SHIFT_MASK"
1056 # compensate for the one cycle delay on the regfile
1057 cur_cr_idx
= Signal
.like(cur_vl
)
1058 comb
+= cur_cr_idx
.eq(cr_idx
- 1)
1059 # read the CR field, select the appropriate bit
1060 cr_field
= Signal(4)
1063 comb
+= cr_field
.eq(cr_pred
.o_data
)
1064 comb
+= scr_bit
.eq(cr_field
.bit_select(sidx
, 1)
1066 comb
+= dcr_bit
.eq(cr_field
.bit_select(didx
, 1)
1068 # set the corresponding mask bit
1069 bit_to_set
= Signal
.like(self
.srcmask
)
1070 comb
+= bit_to_set
.eq(1 << cur_cr_idx
)
1072 sync
+= new_srcmask
.eq(new_srcmask | bit_to_set
)
1074 sync
+= new_dstmask
.eq(new_dstmask | bit_to_set
)
1076 with m
.State("FETCH_PRED_SHIFT_MASK"):
1077 # shift-out skipped mask bits
1078 sync
+= self
.srcmask
.eq(new_srcmask
>> srcstep
)
1079 sync
+= self
.dstmask
.eq(new_dstmask
>> dststep
)
1080 m
.next
= "FETCH_PRED_DONE"
1082 with m
.State("FETCH_PRED_DONE"):
1083 comb
+= pred_mask_o_valid
.eq(1)
1084 with m
.If(pred_mask_i_ready
):
1085 m
.next
= "FETCH_PRED_IDLE"
1087 def issue_fsm(self
, m
, core
, nia
,
1088 dbg
, core_rst
, is_svp64_mode
,
1089 fetch_pc_o_ready
, fetch_pc_i_valid
,
1090 fetch_insn_o_valid
, fetch_insn_i_ready
,
1091 pred_insn_i_valid
, pred_insn_o_ready
,
1092 pred_mask_o_valid
, pred_mask_i_ready
,
1093 exec_insn_i_valid
, exec_insn_o_ready
,
1094 exec_pc_o_valid
, exec_pc_i_ready
):
1097 decode / issue FSM. this interacts with the "fetch" FSM
1098 through fetch_insn_ready/valid (incoming) and fetch_pc_ready/valid
1099 (outgoing). also interacts with the "execute" FSM
1100 through exec_insn_ready/valid (outgoing) and exec_pc_ready/valid
1102 SVP64 RM prefixes have already been set up by the
1103 "fetch" phase, so execute is fairly straightforward.
1108 pdecode2
= self
.pdecode2
1109 cur_state
= self
.cur_state
1110 new_svstate
= self
.new_svstate
1113 dec_opcode_i
= pdecode2
.dec
.raw_opcode_in
# raw opcode
1115 # for updating svstate (things like srcstep etc.)
1116 comb
+= new_svstate
.eq(cur_state
.svstate
)
1118 # precalculate srcstep+1 and dststep+1
1119 cur_srcstep
= cur_state
.svstate
.srcstep
1120 cur_dststep
= cur_state
.svstate
.dststep
1121 next_srcstep
= Signal
.like(cur_srcstep
)
1122 next_dststep
= Signal
.like(cur_dststep
)
1123 comb
+= next_srcstep
.eq(cur_state
.svstate
.srcstep
+1)
1124 comb
+= next_dststep
.eq(cur_state
.svstate
.dststep
+1)
1126 # note if an exception happened. in a pipelined or OoO design
1127 # this needs to be accompanied by "shadowing" (or stalling)
1128 exc_happened
= self
.core
.o
.exc_happened
1129 # also note instruction fetch failed
1130 if hasattr(core
, "icache"):
1131 fetch_failed
= core
.icache
.i_out
.fetch_failed
1133 # set to fault in decoder
1134 # update (highest priority) instruction fault
1135 rising_fetch_failed
= rising_edge(m
, fetch_failed
)
1136 with m
.If(rising_fetch_failed
):
1137 sync
+= pdecode2
.instr_fault
.eq(1)
1139 fetch_failed
= Const(0, 1)
1140 flush_needed
= False
1142 with m
.FSM(name
="issue_fsm"):
1144 # sync with the "fetch" phase which is reading the instruction
1145 # at this point, there is no instruction running, that
1146 # could inadvertently update the PC.
1147 with m
.State("ISSUE_START"):
1148 # reset instruction fault
1149 sync
+= pdecode2
.instr_fault
.eq(0)
1150 # wait on "core stop" release, before next fetch
1151 # need to do this here, in case we are in a VL==0 loop
1152 with m
.If(~dbg
.core_stop_o
& ~core_rst
):
1153 comb
+= fetch_pc_i_valid
.eq(1) # tell fetch to start
1154 with m
.If(fetch_pc_o_ready
): # fetch acknowledged us
1155 m
.next
= "INSN_WAIT"
1157 # tell core it's stopped, and acknowledge debug handshake
1158 comb
+= dbg
.core_stopped_i
.eq(1)
1159 # while stopped, allow updating SVSTATE
1160 with m
.If(self
.svstate_i
.ok
):
1161 comb
+= new_svstate
.eq(self
.svstate_i
.data
)
1162 comb
+= self
.update_svstate
.eq(1)
1163 sync
+= self
.sv_changed
.eq(1)
1165 # wait for an instruction to arrive from Fetch
1166 with m
.State("INSN_WAIT"):
1167 # when using "single-step" mode, checking dbg.stopping_o
1168 # prevents progress. allow issue to proceed once started
1170 #if self.allow_overlap:
1171 # stopping = dbg.stopping_o
1172 with m
.If(stopping
):
1173 # stopping: jump back to idle
1174 m
.next
= "ISSUE_START"
1176 # request the icache to stop asserting "failed"
1177 comb
+= core
.icache
.flush_in
.eq(1)
1178 # stop instruction fault
1179 sync
+= pdecode2
.instr_fault
.eq(0)
1181 comb
+= fetch_insn_i_ready
.eq(1)
1182 with m
.If(fetch_insn_o_valid
):
1183 # loop into ISSUE_START if it's a SVP64 instruction
1184 # and VL == 0. this because VL==0 is a for-loop
1185 # from 0 to 0 i.e. always, always a NOP.
1186 cur_vl
= cur_state
.svstate
.vl
1187 with m
.If(is_svp64_mode
& (cur_vl
== 0)):
1188 # update the PC before fetching the next instruction
1189 # since we are in a VL==0 loop, no instruction was
1190 # executed that we could be overwriting
1191 comb
+= self
.state_w_pc
.wen
.eq(1 << StateRegs
.PC
)
1192 comb
+= self
.state_w_pc
.i_data
.eq(nia
)
1193 comb
+= self
.insn_done
.eq(1)
1194 m
.next
= "ISSUE_START"
1197 m
.next
= "PRED_START" # fetching predicate
1199 m
.next
= "DECODE_SV" # skip predication
1201 with m
.State("PRED_START"):
1202 comb
+= pred_insn_i_valid
.eq(1) # tell fetch_pred to start
1203 with m
.If(pred_insn_o_ready
): # fetch_pred acknowledged us
1204 m
.next
= "MASK_WAIT"
1206 with m
.State("MASK_WAIT"):
1207 comb
+= pred_mask_i_ready
.eq(1) # ready to receive the masks
1208 with m
.If(pred_mask_o_valid
): # predication masks are ready
1209 m
.next
= "PRED_SKIP"
1211 # skip zeros in predicate
1212 with m
.State("PRED_SKIP"):
1213 with m
.If(~is_svp64_mode
):
1214 m
.next
= "DECODE_SV" # nothing to do
1217 pred_src_zero
= pdecode2
.rm_dec
.pred_sz
1218 pred_dst_zero
= pdecode2
.rm_dec
.pred_dz
1220 # new srcstep, after skipping zeros
1221 skip_srcstep
= Signal
.like(cur_srcstep
)
1222 # value to be added to the current srcstep
1223 src_delta
= Signal
.like(cur_srcstep
)
1224 # add leading zeros to srcstep, if not in zero mode
1225 with m
.If(~pred_src_zero
):
1226 # priority encoder (count leading zeros)
1227 # append guard bit, in case the mask is all zeros
1228 pri_enc_src
= PriorityEncoder(65)
1229 m
.submodules
.pri_enc_src
= pri_enc_src
1230 comb
+= pri_enc_src
.i
.eq(Cat(self
.srcmask
,
1232 comb
+= src_delta
.eq(pri_enc_src
.o
)
1233 # apply delta to srcstep
1234 comb
+= skip_srcstep
.eq(cur_srcstep
+ src_delta
)
1235 # shift-out all leading zeros from the mask
1236 # plus the leading "one" bit
1237 # TODO count leading zeros and shift-out the zero
1238 # bits, in the same step, in hardware
1239 sync
+= self
.srcmask
.eq(self
.srcmask
>> (src_delta
+1))
1241 # same as above, but for dststep
1242 skip_dststep
= Signal
.like(cur_dststep
)
1243 dst_delta
= Signal
.like(cur_dststep
)
1244 with m
.If(~pred_dst_zero
):
1245 pri_enc_dst
= PriorityEncoder(65)
1246 m
.submodules
.pri_enc_dst
= pri_enc_dst
1247 comb
+= pri_enc_dst
.i
.eq(Cat(self
.dstmask
,
1249 comb
+= dst_delta
.eq(pri_enc_dst
.o
)
1250 comb
+= skip_dststep
.eq(cur_dststep
+ dst_delta
)
1251 sync
+= self
.dstmask
.eq(self
.dstmask
>> (dst_delta
+1))
1253 # TODO: initialize mask[VL]=1 to avoid passing past VL
1254 with m
.If((skip_srcstep
>= cur_vl
) |
1255 (skip_dststep
>= cur_vl
)):
1256 # end of VL loop. Update PC and reset src/dst step
1257 comb
+= self
.state_w_pc
.wen
.eq(1 << StateRegs
.PC
)
1258 comb
+= self
.state_w_pc
.i_data
.eq(nia
)
1259 comb
+= new_svstate
.srcstep
.eq(0)
1260 comb
+= new_svstate
.dststep
.eq(0)
1261 comb
+= self
.update_svstate
.eq(1)
1262 # synchronize with the simulator
1263 comb
+= self
.insn_done
.eq(1)
1265 m
.next
= "ISSUE_START"
1267 # update new src/dst step
1268 comb
+= new_svstate
.srcstep
.eq(skip_srcstep
)
1269 comb
+= new_svstate
.dststep
.eq(skip_dststep
)
1270 comb
+= self
.update_svstate
.eq(1)
1272 m
.next
= "DECODE_SV"
1274 # pass predicate mask bits through to satellite decoders
1275 # TODO: for SIMD this will be *multiple* bits
1276 sync
+= core
.i
.sv_pred_sm
.eq(self
.srcmask
[0])
1277 sync
+= core
.i
.sv_pred_dm
.eq(self
.dstmask
[0])
1279 # after src/dst step have been updated, we are ready
1280 # to decode the instruction
1281 with m
.State("DECODE_SV"):
1282 # decode the instruction
1283 with m
.If(~fetch_failed
):
1284 sync
+= pdecode2
.instr_fault
.eq(0)
1285 sync
+= core
.i
.e
.eq(pdecode2
.e
)
1286 sync
+= core
.i
.state
.eq(cur_state
)
1287 sync
+= core
.i
.raw_insn_i
.eq(dec_opcode_i
)
1288 sync
+= core
.i
.bigendian_i
.eq(self
.core_bigendian_i
)
1290 sync
+= core
.i
.sv_rm
.eq(pdecode2
.sv_rm
)
1291 # set RA_OR_ZERO detection in satellite decoders
1292 sync
+= core
.i
.sv_a_nz
.eq(pdecode2
.sv_a_nz
)
1293 # and svp64 detection
1294 sync
+= core
.i
.is_svp64_mode
.eq(is_svp64_mode
)
1295 # and svp64 bit-rev'd ldst mode
1296 ldst_dec
= pdecode2
.use_svp64_ldst_dec
1297 sync
+= core
.i
.use_svp64_ldst_dec
.eq(ldst_dec
)
1298 # after decoding, reset any previous exception condition,
1299 # allowing it to be set again during the next execution
1300 sync
+= pdecode2
.ldst_exc
.eq(0)
1302 m
.next
= "INSN_EXECUTE" # move to "execute"
1304 # handshake with execution FSM, move to "wait" once acknowledged
1305 with m
.State("INSN_EXECUTE"):
1306 # when using "single-step" mode, checking dbg.stopping_o
1307 # prevents progress. allow execute to proceed once started
1309 #if self.allow_overlap:
1310 # stopping = dbg.stopping_o
1311 with m
.If(stopping
):
1312 # stopping: jump back to idle
1313 m
.next
= "ISSUE_START"
1315 # request the icache to stop asserting "failed"
1316 comb
+= core
.icache
.flush_in
.eq(1)
1317 # stop instruction fault
1318 sync
+= pdecode2
.instr_fault
.eq(0)
1320 comb
+= exec_insn_i_valid
.eq(1) # trigger execute
1321 with m
.If(exec_insn_o_ready
): # execute acknowledged us
1322 m
.next
= "EXECUTE_WAIT"
1324 with m
.State("EXECUTE_WAIT"):
1325 comb
+= exec_pc_i_ready
.eq(1)
1326 # see https://bugs.libre-soc.org/show_bug.cgi?id=636
1327 # the exception info needs to be blatted into
1328 # pdecode.ldst_exc, and the instruction "re-run".
1329 # when ldst_exc.happened is set, the PowerDecoder2
1330 # reacts very differently: it re-writes the instruction
1331 # with a "trap" (calls PowerDecoder2.trap()) which
1332 # will *overwrite* whatever was requested and jump the
1333 # PC to the exception address, as well as alter MSR.
1334 # nothing else needs to be done other than to note
1335 # the change of PC and MSR (and, later, SVSTATE)
1336 with m
.If(exc_happened
):
1337 mmu
= core
.fus
.get_exc("mmu0")
1338 ldst
= core
.fus
.get_exc("ldst0")
1340 with m
.If(fetch_failed
):
1341 # instruction fetch: exception is from MMU
1342 # reset instr_fault (highest priority)
1343 sync
+= pdecode2
.ldst_exc
.eq(mmu
)
1344 sync
+= pdecode2
.instr_fault
.eq(0)
1346 # request icache to stop asserting "failed"
1347 comb
+= core
.icache
.flush_in
.eq(1)
1348 with m
.If(~fetch_failed
):
1349 # otherwise assume it was a LDST exception
1350 sync
+= pdecode2
.ldst_exc
.eq(ldst
)
1352 with m
.If(exec_pc_o_valid
):
1354 # was this the last loop iteration?
1356 cur_vl
= cur_state
.svstate
.vl
1357 comb
+= is_last
.eq(next_srcstep
== cur_vl
)
1359 with m
.If(pdecode2
.instr_fault
):
1360 # reset instruction fault, try again
1361 sync
+= pdecode2
.instr_fault
.eq(0)
1362 m
.next
= "ISSUE_START"
1364 # return directly to Decode if Execute generated an
1366 with m
.Elif(pdecode2
.ldst_exc
.happened
):
1367 m
.next
= "DECODE_SV"
1369 # if MSR, PC or SVSTATE were changed by the previous
1370 # instruction, go directly back to Fetch, without
1371 # updating either MSR PC or SVSTATE
1372 with m
.Elif(self
.msr_changed | self
.pc_changed |
1374 m
.next
= "ISSUE_START"
1376 # also return to Fetch, when no output was a vector
1377 # (regardless of SRCSTEP and VL), or when the last
1378 # instruction was really the last one of the VL loop
1379 with m
.Elif((~pdecode2
.loop_continue
) | is_last
):
1380 # before going back to fetch, update the PC state
1381 # register with the NIA.
1382 # ok here we are not reading the branch unit.
1383 # TODO: this just blithely overwrites whatever
1384 # pipeline updated the PC
1385 comb
+= self
.state_w_pc
.wen
.eq(1 << StateRegs
.PC
)
1386 comb
+= self
.state_w_pc
.i_data
.eq(nia
)
1387 # reset SRCSTEP before returning to Fetch
1389 with m
.If(pdecode2
.loop_continue
):
1390 comb
+= new_svstate
.srcstep
.eq(0)
1391 comb
+= new_svstate
.dststep
.eq(0)
1392 comb
+= self
.update_svstate
.eq(1)
1394 comb
+= new_svstate
.srcstep
.eq(0)
1395 comb
+= new_svstate
.dststep
.eq(0)
1396 comb
+= self
.update_svstate
.eq(1)
1397 m
.next
= "ISSUE_START"
1399 # returning to Execute? then, first update SRCSTEP
1401 comb
+= new_svstate
.srcstep
.eq(next_srcstep
)
1402 comb
+= new_svstate
.dststep
.eq(next_dststep
)
1403 comb
+= self
.update_svstate
.eq(1)
1404 # return to mask skip loop
1405 m
.next
= "PRED_SKIP"
1408 # check if svstate needs updating: if so, write it to State Regfile
1409 with m
.If(self
.update_svstate
):
1410 sync
+= cur_state
.svstate
.eq(self
.new_svstate
) # for next clock
1412 def execute_fsm(self
, m
, core
,
1413 exec_insn_i_valid
, exec_insn_o_ready
,
1414 exec_pc_o_valid
, exec_pc_i_ready
):
1417 execute FSM. this interacts with the "issue" FSM
1418 through exec_insn_ready/valid (incoming) and exec_pc_ready/valid
1419 (outgoing). SVP64 RM prefixes have already been set up by the
1420 "issue" phase, so execute is fairly straightforward.
1426 pdecode2
= self
.pdecode2
1429 core_busy_o
= core
.n
.o_data
.busy_o
# core is busy
1430 core_ivalid_i
= core
.p
.i_valid
# instruction is valid
1432 if hasattr(core
, "icache"):
1433 fetch_failed
= core
.icache
.i_out
.fetch_failed
1435 fetch_failed
= Const(0, 1)
1437 with m
.FSM(name
="exec_fsm"):
1439 # waiting for instruction bus (stays there until not busy)
1440 with m
.State("INSN_START"):
1441 comb
+= exec_insn_o_ready
.eq(1)
1442 with m
.If(exec_insn_i_valid
):
1443 comb
+= core_ivalid_i
.eq(1) # instruction is valid/issued
1444 sync
+= self
.sv_changed
.eq(0)
1445 sync
+= self
.pc_changed
.eq(0)
1446 sync
+= self
.msr_changed
.eq(0)
1447 with m
.If(core
.p
.o_ready
): # only move if accepted
1448 m
.next
= "INSN_ACTIVE" # move to "wait completion"
1450 # instruction started: must wait till it finishes
1451 with m
.State("INSN_ACTIVE"):
1452 # note changes to MSR, PC and SVSTATE
1453 # XXX oops, really must monitor *all* State Regfile write
1454 # ports looking for changes!
1455 with m
.If(self
.state_nia
.wen
& (1 << StateRegs
.SVSTATE
)):
1456 sync
+= self
.sv_changed
.eq(1)
1457 with m
.If(self
.state_nia
.wen
& (1 << StateRegs
.MSR
)):
1458 sync
+= self
.msr_changed
.eq(1)
1459 with m
.If(self
.state_nia
.wen
& (1 << StateRegs
.PC
)):
1460 sync
+= self
.pc_changed
.eq(1)
1461 with m
.If(~core_busy_o
): # instruction done!
1462 comb
+= exec_pc_o_valid
.eq(1)
1463 with m
.If(exec_pc_i_ready
):
1464 # when finished, indicate "done".
1465 # however, if there was an exception, the instruction
1466 # is *not* yet done. this is an implementation
1467 # detail: we choose to implement exceptions by
1468 # taking the exception information from the LDST
1469 # unit, putting that *back* into the PowerDecoder2,
1470 # and *re-running the entire instruction*.
1471 # if we erroneously indicate "done" here, it is as if
1472 # there were *TWO* instructions:
1473 # 1) the failed LDST 2) a TRAP.
1474 with m
.If(~pdecode2
.ldst_exc
.happened
&
1475 ~pdecode2
.instr_fault
):
1476 comb
+= self
.insn_done
.eq(1)
1477 m
.next
= "INSN_START" # back to fetch
1478 # terminate returns directly to INSN_START
1479 with m
.If(dbg
.terminate_i
):
1480 # comb += self.insn_done.eq(1) - no because it's not
1481 m
.next
= "INSN_START" # back to fetch
1483 def elaborate(self
, platform
):
1484 m
= super().elaborate(platform
)
1486 comb
, sync
= m
.d
.comb
, m
.d
.sync
1487 cur_state
= self
.cur_state
1488 pdecode2
= self
.pdecode2
1492 # set up peripherals and core
1493 core_rst
= self
.core_rst
1495 # indicate to outside world if any FU is still executing
1496 comb
+= self
.any_busy
.eq(core
.n
.o_data
.any_busy_o
) # any FU executing
1498 # address of the next instruction, in the absence of a branch
1499 # depends on the instruction size
1502 # connect up debug signals
1503 with m
.If(core
.o
.core_terminate_o
):
1504 comb
+= dbg
.terminate_i
.eq(1)
1506 # pass the prefix mode from Fetch to Issue, so the latter can loop
1508 is_svp64_mode
= Signal()
1510 # there are *THREE^WFOUR-if-SVP64-enabled* FSMs, fetch (32/64-bit)
1511 # issue, decode/execute, now joined by "Predicate fetch/calculate".
1512 # these are the handshake signals between each
1514 # fetch FSM can run as soon as the PC is valid
1515 fetch_pc_i_valid
= Signal() # Execute tells Fetch "start next read"
1516 fetch_pc_o_ready
= Signal() # Fetch Tells SVSTATE "proceed"
1518 # fetch FSM hands over the instruction to be decoded / issued
1519 fetch_insn_o_valid
= Signal()
1520 fetch_insn_i_ready
= Signal()
1522 # predicate fetch FSM decodes and fetches the predicate
1523 pred_insn_i_valid
= Signal()
1524 pred_insn_o_ready
= Signal()
1526 # predicate fetch FSM delivers the masks
1527 pred_mask_o_valid
= Signal()
1528 pred_mask_i_ready
= Signal()
1530 # issue FSM delivers the instruction to the be executed
1531 exec_insn_i_valid
= Signal()
1532 exec_insn_o_ready
= Signal()
1534 # execute FSM, hands over the PC/SVSTATE back to the issue FSM
1535 exec_pc_o_valid
= Signal()
1536 exec_pc_i_ready
= Signal()
1538 # the FSMs here are perhaps unusual in that they detect conditions
1539 # then "hold" information, combinatorially, for the core
1540 # (as opposed to using sync - which would be on a clock's delay)
1541 # this includes the actual opcode, valid flags and so on.
1543 # Fetch, then predicate fetch, then Issue, then Execute.
1544 # Issue is where the VL for-loop # lives. the ready/valid
1545 # signalling is used to communicate between the four.
1548 fetch
= FetchFSM(self
.allow_overlap
, self
.svp64_en
,
1549 self
.imem
, core_rst
, pdecode2
, cur_state
,
1551 dbg
.state
.svstate
, # combinatorially same
1553 m
.submodules
.fetch
= fetch
1554 # connect up in/out data to existing Signals
1555 comb
+= fetch
.p
.i_data
.pc
.eq(dbg
.state
.pc
) # combinatorially same
1556 comb
+= fetch
.p
.i_data
.msr
.eq(dbg
.state
.msr
) # combinatorially same
1557 # and the ready/valid signalling
1558 comb
+= fetch_pc_o_ready
.eq(fetch
.p
.o_ready
)
1559 comb
+= fetch
.p
.i_valid
.eq(fetch_pc_i_valid
)
1560 comb
+= fetch_insn_o_valid
.eq(fetch
.n
.o_valid
)
1561 comb
+= fetch
.n
.i_ready
.eq(fetch_insn_i_ready
)
1563 self
.issue_fsm(m
, core
, nia
,
1564 dbg
, core_rst
, is_svp64_mode
,
1565 fetch_pc_o_ready
, fetch_pc_i_valid
,
1566 fetch_insn_o_valid
, fetch_insn_i_ready
,
1567 pred_insn_i_valid
, pred_insn_o_ready
,
1568 pred_mask_o_valid
, pred_mask_i_ready
,
1569 exec_insn_i_valid
, exec_insn_o_ready
,
1570 exec_pc_o_valid
, exec_pc_i_ready
)
1573 self
.fetch_predicate_fsm(m
,
1574 pred_insn_i_valid
, pred_insn_o_ready
,
1575 pred_mask_o_valid
, pred_mask_i_ready
)
1577 self
.execute_fsm(m
, core
,
1578 exec_insn_i_valid
, exec_insn_o_ready
,
1579 exec_pc_o_valid
, exec_pc_i_ready
)
1584 class TestIssuer(Elaboratable
):
1585 def __init__(self
, pspec
):
1586 self
.ti
= TestIssuerInternal(pspec
)
1587 self
.pll
= DummyPLL(instance
=True)
1589 self
.dbg_rst_i
= Signal(reset_less
=True)
1591 # PLL direct clock or not
1592 self
.pll_en
= hasattr(pspec
, "use_pll") and pspec
.use_pll
1594 self
.pll_test_o
= Signal(reset_less
=True)
1595 self
.pll_vco_o
= Signal(reset_less
=True)
1596 self
.clk_sel_i
= Signal(2, reset_less
=True)
1597 self
.ref_clk
= ClockSignal() # can't rename it but that's ok
1598 self
.pllclk_clk
= ClockSignal("pllclk")
1600 def elaborate(self
, platform
):
1604 # TestIssuer nominally runs at main clock, actually it is
1605 # all combinatorial internally except for coresync'd components
1606 m
.submodules
.ti
= ti
= self
.ti
1609 # ClockSelect runs at PLL output internal clock rate
1610 m
.submodules
.wrappll
= pll
= self
.pll
1612 # add clock domains from PLL
1613 cd_pll
= ClockDomain("pllclk")
1616 # PLL clock established. has the side-effect of running clklsel
1617 # at the PLL's speed (see DomainRenamer("pllclk") above)
1618 pllclk
= self
.pllclk_clk
1619 comb
+= pllclk
.eq(pll
.clk_pll_o
)
1621 # wire up external 24mhz to PLL
1622 #comb += pll.clk_24_i.eq(self.ref_clk)
1623 # output 18 mhz PLL test signal, and analog oscillator out
1624 comb
+= self
.pll_test_o
.eq(pll
.pll_test_o
)
1625 comb
+= self
.pll_vco_o
.eq(pll
.pll_vco_o
)
1627 # input to pll clock selection
1628 comb
+= pll
.clk_sel_i
.eq(self
.clk_sel_i
)
1630 # now wire up ResetSignals. don't mind them being in this domain
1631 pll_rst
= ResetSignal("pllclk")
1632 comb
+= pll_rst
.eq(ResetSignal())
1634 # internal clock is set to selector clock-out. has the side-effect of
1635 # running TestIssuer at this speed (see DomainRenamer("intclk") above)
1636 # debug clock runs at coresync internal clock
1637 if self
.ti
.dbg_domain
!= 'sync':
1638 cd_dbgsync
= ClockDomain("dbgsync")
1639 intclk
= ClockSignal(self
.ti
.core_domain
)
1640 dbgclk
= ClockSignal(self
.ti
.dbg_domain
)
1641 # XXX BYPASS PLL XXX
1642 # XXX BYPASS PLL XXX
1643 # XXX BYPASS PLL XXX
1645 comb
+= intclk
.eq(self
.ref_clk
)
1646 assert self
.ti
.core_domain
!= 'sync', \
1647 "cannot set core_domain to sync and use pll at the same time"
1649 if self
.ti
.core_domain
!= 'sync':
1650 comb
+= intclk
.eq(ClockSignal())
1651 if self
.ti
.dbg_domain
!= 'sync':
1652 dbgclk
= ClockSignal(self
.ti
.dbg_domain
)
1653 comb
+= dbgclk
.eq(intclk
)
1654 comb
+= self
.ti
.dbg_rst_i
.eq(self
.dbg_rst_i
)
1659 return list(self
.ti
.ports()) + list(self
.pll
.ports()) + \
1660 [ClockSignal(), ResetSignal()]
1662 def external_ports(self
):
1663 ports
= self
.ti
.external_ports()
1664 ports
.append(ClockSignal())
1665 ports
.append(ResetSignal())
1667 ports
.append(self
.clk_sel_i
)
1668 ports
.append(self
.pll
.clk_24_i
)
1669 ports
.append(self
.pll_test_o
)
1670 ports
.append(self
.pll_vco_o
)
1671 ports
.append(self
.pllclk_clk
)
1672 ports
.append(self
.ref_clk
)
1676 if __name__
== '__main__':
1677 units
= {'alu': 1, 'cr': 1, 'branch': 1, 'trap': 1, 'logical': 1,
1683 pspec
= TestMemPspec(ldst_ifacetype
='bare_wb',
1684 imem_ifacetype
='bare_wb',
1689 dut
= TestIssuer(pspec
)
1690 vl
= main(dut
, ports
=dut
.ports(), name
="test_issuer")
1692 if len(sys
.argv
) == 1:
1693 vl
= rtlil
.convert(dut
, ports
=dut
.external_ports(), name
="test_issuer")
1694 with
open("test_issuer.il", "w") as f
: