3 not in any way intended for production use. this runs a FSM that:
5 * reads the Program Counter from StateRegs
6 * reads an instruction from a fixed-size Test Memory
7 * issues it to the Simple Core
8 * waits for it to complete
10 * does it all over again
12 the purpose of this module is to verify the functional correctness
13 of the Function Units in the absolute simplest and clearest possible
14 way, and to at provide something that can be further incrementally
18 from nmigen
import (Elaboratable
, Module
, Signal
, ClockSignal
, ResetSignal
,
19 ClockDomain
, DomainRenamer
, Mux
, Const
, Repl
, Cat
)
20 from nmigen
.cli
import rtlil
21 from nmigen
.cli
import main
24 from nmutil
.singlepipe
import ControlBase
25 from soc
.simple
.core_data
import FetchOutput
, FetchInput
27 from nmigen
.lib
.coding
import PriorityEncoder
29 from openpower
.decoder
.power_decoder
import create_pdecode
30 from openpower
.decoder
.power_decoder2
import PowerDecode2
, SVP64PrefixDecoder
31 from openpower
.decoder
.decode2execute1
import IssuerDecode2ToOperand
32 from openpower
.decoder
.decode2execute1
import Data
33 from openpower
.decoder
.power_enums
import (MicrOp
, SVP64PredInt
, SVP64PredCR
,
35 from openpower
.state
import CoreState
36 from openpower
.consts
import (CR
, SVP64CROffs
)
37 from soc
.experiment
.testmem
import TestMemory
# test only for instructions
38 from soc
.regfile
.regfiles
import StateRegs
, FastRegs
39 from soc
.simple
.core
import NonProductionCore
40 from soc
.config
.test
.test_loadstore
import TestMemPspec
41 from soc
.config
.ifetch
import ConfigFetchUnit
42 from soc
.debug
.dmi
import CoreDebug
, DMIInterface
43 from soc
.debug
.jtag
import JTAG
44 from soc
.config
.pinouts
import get_pinspecs
45 from soc
.interrupts
.xics
import XICS_ICP
, XICS_ICS
46 from soc
.bus
.simple_gpio
import SimpleGPIO
47 from soc
.bus
.SPBlock512W64B8W
import SPBlock512W64B8W
48 from soc
.clock
.select
import ClockSelect
49 from soc
.clock
.dummypll
import DummyPLL
50 from openpower
.sv
.svstate
import SVSTATERec
51 from soc
.experiment
.icache
import ICache
53 from nmutil
.util
import rising_edge
56 def get_insn(f_instr_o
, pc
):
57 if f_instr_o
.width
== 32:
60 # 64-bit: bit 2 of pc decides which word to select
61 return f_instr_o
.word_select(pc
[2], 32)
63 # gets state input or reads from state regfile
66 def state_get(m
, core_rst
, state_i
, name
, regfile
, regnum
):
70 res
= Signal(64, reset_less
=True, name
=name
)
71 res_ok_delay
= Signal(name
="%s_ok_delay" % name
)
73 sync
+= res_ok_delay
.eq(~state_i
.ok
)
74 with m
.If(state_i
.ok
):
75 # incoming override (start from pc_i)
76 comb
+= res
.eq(state_i
.data
)
78 # otherwise read StateRegs regfile for PC...
79 comb
+= regfile
.ren
.eq(1 << regnum
)
80 # ... but on a 1-clock delay
81 with m
.If(res_ok_delay
):
82 comb
+= res
.eq(regfile
.o_data
)
86 def get_predint(m
, mask
, name
):
87 """decode SVP64 predicate integer mask field to reg number and invert
88 this is identical to the equivalent function in ISACaller except that
89 it doesn't read the INT directly, it just decodes "what needs to be done"
90 i.e. which INT reg, whether it is shifted and whether it is bit-inverted.
92 * all1s is set to indicate that no mask is to be applied.
93 * regread indicates the GPR register number to be read
94 * invert is set to indicate that the register value is to be inverted
95 * unary indicates that the contents of the register is to be shifted 1<<r3
98 regread
= Signal(5, name
=name
+"regread")
99 invert
= Signal(name
=name
+"invert")
100 unary
= Signal(name
=name
+"unary")
101 all1s
= Signal(name
=name
+"all1s")
103 with m
.Case(SVP64PredInt
.ALWAYS
.value
):
104 comb
+= all1s
.eq(1) # use 0b1111 (all ones)
105 with m
.Case(SVP64PredInt
.R3_UNARY
.value
):
106 comb
+= regread
.eq(3)
107 comb
+= unary
.eq(1) # 1<<r3 - shift r3 (single bit)
108 with m
.Case(SVP64PredInt
.R3
.value
):
109 comb
+= regread
.eq(3)
110 with m
.Case(SVP64PredInt
.R3_N
.value
):
111 comb
+= regread
.eq(3)
113 with m
.Case(SVP64PredInt
.R10
.value
):
114 comb
+= regread
.eq(10)
115 with m
.Case(SVP64PredInt
.R10_N
.value
):
116 comb
+= regread
.eq(10)
118 with m
.Case(SVP64PredInt
.R30
.value
):
119 comb
+= regread
.eq(30)
120 with m
.Case(SVP64PredInt
.R30_N
.value
):
121 comb
+= regread
.eq(30)
123 return regread
, invert
, unary
, all1s
126 def get_predcr(m
, mask
, name
):
127 """decode SVP64 predicate CR to reg number field and invert status
128 this is identical to _get_predcr in ISACaller
131 idx
= Signal(2, name
=name
+"idx")
132 invert
= Signal(name
=name
+"crinvert")
134 with m
.Case(SVP64PredCR
.LT
.value
):
135 comb
+= idx
.eq(CR
.LT
)
137 with m
.Case(SVP64PredCR
.GE
.value
):
138 comb
+= idx
.eq(CR
.LT
)
140 with m
.Case(SVP64PredCR
.GT
.value
):
141 comb
+= idx
.eq(CR
.GT
)
143 with m
.Case(SVP64PredCR
.LE
.value
):
144 comb
+= idx
.eq(CR
.GT
)
146 with m
.Case(SVP64PredCR
.EQ
.value
):
147 comb
+= idx
.eq(CR
.EQ
)
149 with m
.Case(SVP64PredCR
.NE
.value
):
150 comb
+= idx
.eq(CR
.EQ
)
152 with m
.Case(SVP64PredCR
.SO
.value
):
153 comb
+= idx
.eq(CR
.SO
)
155 with m
.Case(SVP64PredCR
.NS
.value
):
156 comb
+= idx
.eq(CR
.SO
)
161 # Fetch Finite State Machine.
162 # WARNING: there are currently DriverConflicts but it's actually working.
163 # TODO, here: everything that is global in nature, information from the
164 # main TestIssuerInternal, needs to move to either ispec() or ospec().
165 # not only that: TestIssuerInternal.imem can entirely move into here
166 # because imem is only ever accessed inside the FetchFSM.
167 class FetchFSM(ControlBase
):
168 def __init__(self
, allow_overlap
, svp64_en
, imem
, core_rst
,
170 dbg
, core
, svstate
, nia
, is_svp64_mode
):
171 self
.allow_overlap
= allow_overlap
172 self
.svp64_en
= svp64_en
174 self
.core_rst
= core_rst
175 self
.pdecode2
= pdecode2
176 self
.cur_state
= cur_state
179 self
.svstate
= svstate
181 self
.is_svp64_mode
= is_svp64_mode
183 # set up pipeline ControlBase and allocate i/o specs
184 # (unusual: normally done by the Pipeline API)
185 super().__init
__(stage
=self
)
186 self
.p
.i_data
, self
.n
.o_data
= self
.new_specs(None)
187 self
.i
, self
.o
= self
.p
.i_data
, self
.n
.o_data
189 # next 3 functions are Stage API Compliance
190 def setup(self
, m
, i
):
199 def elaborate(self
, platform
):
202 this FSM performs fetch of raw instruction data, partial-decodes
203 it 32-bit at a time to detect SVP64 prefixes, and will optionally
204 read a 2nd 32-bit quantity if that occurs.
206 m
= super().elaborate(platform
)
211 svstate
= self
.svstate
213 is_svp64_mode
= self
.is_svp64_mode
214 fetch_pc_o_ready
= self
.p
.o_ready
215 fetch_pc_i_valid
= self
.p
.i_valid
216 fetch_insn_o_valid
= self
.n
.o_valid
217 fetch_insn_i_ready
= self
.n
.i_ready
221 pdecode2
= self
.pdecode2
222 cur_state
= self
.cur_state
223 dec_opcode_o
= pdecode2
.dec
.raw_opcode_in
# raw opcode
225 msr_read
= Signal(reset
=1)
227 # also note instruction fetch failed
228 if hasattr(core
, "icache"):
229 fetch_failed
= core
.icache
.i_out
.fetch_failed
232 fetch_failed
= Const(0, 1)
235 # don't read msr every cycle
236 staterf
= self
.core
.regs
.rf
['state']
237 state_r_msr
= staterf
.r_ports
['msr'] # MSR rd
239 comb
+= state_r_msr
.ren
.eq(0)
241 with m
.FSM(name
='fetch_fsm'):
244 with m
.State("IDLE"):
245 with m
.If(~dbg
.stopping_o
& ~fetch_failed
):
246 comb
+= fetch_pc_o_ready
.eq(1)
247 with m
.If(fetch_pc_i_valid
& ~fetch_failed
):
248 # instruction allowed to go: start by reading the PC
249 # capture the PC and also drop it into Insn Memory
250 # we have joined a pair of combinatorial memory
251 # lookups together. this is Generally Bad.
252 comb
+= self
.imem
.a_pc_i
.eq(pc
)
253 comb
+= self
.imem
.a_i_valid
.eq(1)
254 comb
+= self
.imem
.f_i_valid
.eq(1)
255 sync
+= cur_state
.pc
.eq(pc
)
256 sync
+= cur_state
.svstate
.eq(svstate
) # and svstate
258 # initiate read of MSR. arrives one clock later
259 comb
+= state_r_msr
.ren
.eq(1 << StateRegs
.MSR
)
260 sync
+= msr_read
.eq(0)
262 m
.next
= "INSN_READ" # move to "wait for bus" phase
264 # dummy pause to find out why simulation is not keeping up
265 with m
.State("INSN_READ"):
266 if self
.allow_overlap
:
267 stopping
= dbg
.stopping_o
271 # stopping: jump back to idle
274 # one cycle later, msr/sv read arrives. valid only once.
275 with m
.If(~msr_read
):
276 sync
+= msr_read
.eq(1) # yeah don't read it again
277 sync
+= cur_state
.msr
.eq(state_r_msr
.o_data
)
278 with m
.If(self
.imem
.f_busy_o
& ~fetch_failed
): # zzz...
279 # busy but not fetch failed: stay in wait-read
280 comb
+= self
.imem
.a_i_valid
.eq(1)
281 comb
+= self
.imem
.f_i_valid
.eq(1)
283 # not busy (or fetch failed!): instruction fetched
284 # when fetch failed, the instruction gets ignored
286 insn
= get_insn(self
.imem
.f_instr_o
, cur_state
.pc
)
289 # decode the SVP64 prefix, if any
290 comb
+= svp64
.raw_opcode_in
.eq(insn
)
291 comb
+= svp64
.bigendian
.eq(self
.core_bigendian_i
)
292 # pass the decoded prefix (if any) to PowerDecoder2
293 sync
+= pdecode2
.sv_rm
.eq(svp64
.svp64_rm
)
294 sync
+= pdecode2
.is_svp64_mode
.eq(is_svp64_mode
)
295 # remember whether this is a prefixed instruction,
296 # so the FSM can readily loop when VL==0
297 sync
+= is_svp64_mode
.eq(svp64
.is_svp64_mode
)
298 # calculate the address of the following instruction
299 insn_size
= Mux(svp64
.is_svp64_mode
, 8, 4)
300 sync
+= nia
.eq(cur_state
.pc
+ insn_size
)
301 with m
.If(~svp64
.is_svp64_mode
):
302 # with no prefix, store the instruction
303 # and hand it directly to the next FSM
304 sync
+= dec_opcode_o
.eq(insn
)
305 m
.next
= "INSN_READY"
307 # fetch the rest of the instruction from memory
308 comb
+= self
.imem
.a_pc_i
.eq(cur_state
.pc
+ 4)
309 comb
+= self
.imem
.a_i_valid
.eq(1)
310 comb
+= self
.imem
.f_i_valid
.eq(1)
311 m
.next
= "INSN_READ2"
313 # not SVP64 - 32-bit only
314 sync
+= nia
.eq(cur_state
.pc
+ 4)
315 sync
+= dec_opcode_o
.eq(insn
)
316 m
.next
= "INSN_READY"
318 with m
.State("INSN_READ2"):
319 with m
.If(self
.imem
.f_busy_o
): # zzz...
320 # busy: stay in wait-read
321 comb
+= self
.imem
.a_i_valid
.eq(1)
322 comb
+= self
.imem
.f_i_valid
.eq(1)
324 # not busy: instruction fetched
325 insn
= get_insn(self
.imem
.f_instr_o
, cur_state
.pc
+4)
326 sync
+= dec_opcode_o
.eq(insn
)
327 m
.next
= "INSN_READY"
328 # TODO: probably can start looking at pdecode2.rm_dec
329 # here or maybe even in INSN_READ state, if svp64_mode
330 # detected, in order to trigger - and wait for - the
333 pmode
= pdecode2
.rm_dec
.predmode
335 if pmode != SVP64PredMode.ALWAYS.value:
336 fire predicate loading FSM and wait before
339 sync += self.srcmask.eq(-1) # set to all 1s
340 sync += self.dstmask.eq(-1) # set to all 1s
341 m.next = "INSN_READY"
344 with m
.State("INSN_READY"):
345 # hand over the instruction, to be decoded
346 comb
+= fetch_insn_o_valid
.eq(1)
347 with m
.If(fetch_insn_i_ready
):
350 # whatever was done above, over-ride it if core reset is held
351 with m
.If(self
.core_rst
):
357 class TestIssuerInternal(Elaboratable
):
358 """TestIssuer - reads instructions from TestMemory and issues them
360 efficiency and speed is not the main goal here: functional correctness
361 and code clarity is. optimisations (which almost 100% interfere with
362 easy understanding) come later.
365 def __init__(self
, pspec
):
367 # test is SVP64 is to be enabled
368 self
.svp64_en
= hasattr(pspec
, "svp64") and (pspec
.svp64
== True)
370 # and if regfiles are reduced
371 self
.regreduce_en
= (hasattr(pspec
, "regreduce") and
372 (pspec
.regreduce
== True))
374 # and if overlap requested
375 self
.allow_overlap
= (hasattr(pspec
, "allow_overlap") and
376 (pspec
.allow_overlap
== True))
378 # JTAG interface. add this right at the start because if it's
379 # added it *modifies* the pspec, by adding enable/disable signals
380 # for parts of the rest of the core
381 self
.jtag_en
= hasattr(pspec
, "debug") and pspec
.debug
== 'jtag'
382 self
.dbg_domain
= "sync" # sigh "dbgsunc" too problematic
383 # self.dbg_domain = "dbgsync" # domain for DMI/JTAG clock
385 # XXX MUST keep this up-to-date with litex, and
386 # soc-cocotb-sim, and err.. all needs sorting out, argh
389 'eint', 'gpio', 'mspi0',
390 # 'mspi1', - disabled for now
391 # 'pwm', 'sd0', - disabled for now
393 self
.jtag
= JTAG(get_pinspecs(subset
=subset
),
394 domain
=self
.dbg_domain
)
395 # add signals to pspec to enable/disable icache and dcache
396 # (or data and intstruction wishbone if icache/dcache not included)
397 # https://bugs.libre-soc.org/show_bug.cgi?id=520
398 # TODO: do we actually care if these are not domain-synchronised?
399 # honestly probably not.
400 pspec
.wb_icache_en
= self
.jtag
.wb_icache_en
401 pspec
.wb_dcache_en
= self
.jtag
.wb_dcache_en
402 self
.wb_sram_en
= self
.jtag
.wb_sram_en
404 self
.wb_sram_en
= Const(1)
406 # add 4k sram blocks?
407 self
.sram4x4k
= (hasattr(pspec
, "sram4x4kblock") and
408 pspec
.sram4x4kblock
== True)
412 self
.sram4k
.append(SPBlock512W64B8W(name
="sram4k_%d" % i
,
416 # add interrupt controller?
417 self
.xics
= hasattr(pspec
, "xics") and pspec
.xics
== True
419 self
.xics_icp
= XICS_ICP()
420 self
.xics_ics
= XICS_ICS()
421 self
.int_level_i
= self
.xics_ics
.int_level_i
423 # add GPIO peripheral?
424 self
.gpio
= hasattr(pspec
, "gpio") and pspec
.gpio
== True
426 self
.simple_gpio
= SimpleGPIO()
427 self
.gpio_o
= self
.simple_gpio
.gpio_o
429 # main instruction core. suitable for prototyping / demo only
430 self
.core
= core
= NonProductionCore(pspec
)
431 self
.core_rst
= ResetSignal("coresync")
433 # instruction decoder. goes into Trap Record
434 #pdecode = create_pdecode()
435 self
.cur_state
= CoreState("cur") # current state (MSR/PC/SVSTATE)
436 self
.pdecode2
= PowerDecode2(None, state
=self
.cur_state
,
437 opkls
=IssuerDecode2ToOperand
,
438 svp64_en
=self
.svp64_en
,
439 regreduce_en
=self
.regreduce_en
)
440 pdecode
= self
.pdecode2
.dec
443 self
.svp64
= SVP64PrefixDecoder() # for decoding SVP64 prefix
445 # Test Instruction memory
446 if hasattr(core
, "icache"):
447 # XXX BLECH! use pspec to transfer the I-Cache to ConfigFetchUnit
448 # truly dreadful. needs a huge reorg.
449 pspec
.icache
= core
.icache
450 self
.imem
= ConfigFetchUnit(pspec
).fu
453 self
.dbg
= CoreDebug()
455 # instruction go/monitor
456 self
.pc_o
= Signal(64, reset_less
=True)
457 self
.pc_i
= Data(64, "pc_i") # set "ok" to indicate "please change me"
458 self
.svstate_i
= Data(64, "svstate_i") # ditto
459 self
.core_bigendian_i
= Signal() # TODO: set based on MSR.LE
460 self
.busy_o
= Signal(reset_less
=True)
461 self
.memerr_o
= Signal(reset_less
=True)
463 # STATE regfile read /write ports for PC, MSR, SVSTATE
464 staterf
= self
.core
.regs
.rf
['state']
465 self
.state_r_pc
= staterf
.r_ports
['cia'] # PC rd
466 self
.state_w_pc
= staterf
.w_ports
['d_wr1'] # PC wr
467 self
.state_r_sv
= staterf
.r_ports
['sv'] # SVSTATE rd
468 self
.state_w_sv
= staterf
.w_ports
['sv'] # SVSTATE wr
470 # DMI interface access
471 intrf
= self
.core
.regs
.rf
['int']
472 crrf
= self
.core
.regs
.rf
['cr']
473 xerrf
= self
.core
.regs
.rf
['xer']
474 self
.int_r
= intrf
.r_ports
['dmi'] # INT read
475 self
.cr_r
= crrf
.r_ports
['full_cr_dbg'] # CR read
476 self
.xer_r
= xerrf
.r_ports
['full_xer'] # XER read
480 self
.int_pred
= intrf
.r_ports
['pred'] # INT predicate read
481 self
.cr_pred
= crrf
.r_ports
['cr_pred'] # CR predicate read
483 # hack method of keeping an eye on whether branch/trap set the PC
484 self
.state_nia
= self
.core
.regs
.rf
['state'].w_ports
['nia']
485 self
.state_nia
.wen
.name
= 'state_nia_wen'
487 # pulse to synchronize the simulator at instruction end
488 self
.insn_done
= Signal()
490 # indicate any instruction still outstanding, in execution
491 self
.any_busy
= Signal()
494 # store copies of predicate masks
495 self
.srcmask
= Signal(64)
496 self
.dstmask
= Signal(64)
498 def fetch_predicate_fsm(self
, m
,
499 pred_insn_i_valid
, pred_insn_o_ready
,
500 pred_mask_o_valid
, pred_mask_i_ready
):
501 """fetch_predicate_fsm - obtains (constructs in the case of CR)
502 src/dest predicate masks
504 https://bugs.libre-soc.org/show_bug.cgi?id=617
505 the predicates can be read here, by using IntRegs r_ports['pred']
506 or CRRegs r_ports['pred']. in the case of CRs it will have to
507 be done through multiple reads, extracting one relevant at a time.
508 later, a faster way would be to use the 32-bit-wide CR port but
509 this is more complex decoding, here. equivalent code used in
510 ISACaller is "from openpower.decoder.isa.caller import get_predcr"
512 note: this ENTIRE FSM is not to be called when svp64 is disabled
516 pdecode2
= self
.pdecode2
517 rm_dec
= pdecode2
.rm_dec
# SVP64RMModeDecode
518 predmode
= rm_dec
.predmode
519 srcpred
, dstpred
= rm_dec
.srcpred
, rm_dec
.dstpred
520 cr_pred
, int_pred
= self
.cr_pred
, self
.int_pred
# read regfiles
521 # get src/dst step, so we can skip already used mask bits
522 cur_state
= self
.cur_state
523 srcstep
= cur_state
.svstate
.srcstep
524 dststep
= cur_state
.svstate
.dststep
525 cur_vl
= cur_state
.svstate
.vl
528 sregread
, sinvert
, sunary
, sall1s
= get_predint(m
, srcpred
, 's')
529 dregread
, dinvert
, dunary
, dall1s
= get_predint(m
, dstpred
, 'd')
530 sidx
, scrinvert
= get_predcr(m
, srcpred
, 's')
531 didx
, dcrinvert
= get_predcr(m
, dstpred
, 'd')
533 # store fetched masks, for either intpred or crpred
534 # when src/dst step is not zero, the skipped mask bits need to be
535 # shifted-out, before actually storing them in src/dest mask
536 new_srcmask
= Signal(64, reset_less
=True)
537 new_dstmask
= Signal(64, reset_less
=True)
539 with m
.FSM(name
="fetch_predicate"):
541 with m
.State("FETCH_PRED_IDLE"):
542 comb
+= pred_insn_o_ready
.eq(1)
543 with m
.If(pred_insn_i_valid
):
544 with m
.If(predmode
== SVP64PredMode
.INT
):
545 # skip fetching destination mask register, when zero
547 sync
+= new_dstmask
.eq(-1)
548 # directly go to fetch source mask register
549 # guaranteed not to be zero (otherwise predmode
550 # would be SVP64PredMode.ALWAYS, not INT)
551 comb
+= int_pred
.addr
.eq(sregread
)
552 comb
+= int_pred
.ren
.eq(1)
553 m
.next
= "INT_SRC_READ"
554 # fetch destination predicate register
556 comb
+= int_pred
.addr
.eq(dregread
)
557 comb
+= int_pred
.ren
.eq(1)
558 m
.next
= "INT_DST_READ"
559 with m
.Elif(predmode
== SVP64PredMode
.CR
):
560 # go fetch masks from the CR register file
561 sync
+= new_srcmask
.eq(0)
562 sync
+= new_dstmask
.eq(0)
565 sync
+= self
.srcmask
.eq(-1)
566 sync
+= self
.dstmask
.eq(-1)
567 m
.next
= "FETCH_PRED_DONE"
569 with m
.State("INT_DST_READ"):
570 # store destination mask
571 inv
= Repl(dinvert
, 64)
573 # set selected mask bit for 1<<r3 mode
574 dst_shift
= Signal(range(64))
575 comb
+= dst_shift
.eq(self
.int_pred
.o_data
& 0b111111)
576 sync
+= new_dstmask
.eq(1 << dst_shift
)
578 # invert mask if requested
579 sync
+= new_dstmask
.eq(self
.int_pred
.o_data ^ inv
)
580 # skip fetching source mask register, when zero
582 sync
+= new_srcmask
.eq(-1)
583 m
.next
= "FETCH_PRED_SHIFT_MASK"
584 # fetch source predicate register
586 comb
+= int_pred
.addr
.eq(sregread
)
587 comb
+= int_pred
.ren
.eq(1)
588 m
.next
= "INT_SRC_READ"
590 with m
.State("INT_SRC_READ"):
592 inv
= Repl(sinvert
, 64)
594 # set selected mask bit for 1<<r3 mode
595 src_shift
= Signal(range(64))
596 comb
+= src_shift
.eq(self
.int_pred
.o_data
& 0b111111)
597 sync
+= new_srcmask
.eq(1 << src_shift
)
599 # invert mask if requested
600 sync
+= new_srcmask
.eq(self
.int_pred
.o_data ^ inv
)
601 m
.next
= "FETCH_PRED_SHIFT_MASK"
603 # fetch masks from the CR register file
604 # implements the following loop:
605 # idx, inv = get_predcr(mask)
607 # for cr_idx in range(vl):
608 # cr = crl[cr_idx + SVP64CROffs.CRPred] # takes one cycle
610 # mask |= 1 << cr_idx
612 with m
.State("CR_READ"):
613 # CR index to be read, which will be ready by the next cycle
614 cr_idx
= Signal
.like(cur_vl
, reset_less
=True)
615 # submit the read operation to the regfile
616 with m
.If(cr_idx
!= cur_vl
):
617 # the CR read port is unary ...
619 # ... in MSB0 convention ...
620 # ren = 1 << (7 - cr_idx)
621 # ... and with an offset:
622 # ren = 1 << (7 - off - cr_idx)
623 idx
= SVP64CROffs
.CRPred
+ cr_idx
624 comb
+= cr_pred
.ren
.eq(1 << (7 - idx
))
625 # signal data valid in the next cycle
626 cr_read
= Signal(reset_less
=True)
627 sync
+= cr_read
.eq(1)
628 # load the next index
629 sync
+= cr_idx
.eq(cr_idx
+ 1)
632 sync
+= cr_read
.eq(0)
634 m
.next
= "FETCH_PRED_SHIFT_MASK"
636 # compensate for the one cycle delay on the regfile
637 cur_cr_idx
= Signal
.like(cur_vl
)
638 comb
+= cur_cr_idx
.eq(cr_idx
- 1)
639 # read the CR field, select the appropriate bit
643 comb
+= cr_field
.eq(cr_pred
.o_data
)
644 comb
+= scr_bit
.eq(cr_field
.bit_select(sidx
, 1)
646 comb
+= dcr_bit
.eq(cr_field
.bit_select(didx
, 1)
648 # set the corresponding mask bit
649 bit_to_set
= Signal
.like(self
.srcmask
)
650 comb
+= bit_to_set
.eq(1 << cur_cr_idx
)
652 sync
+= new_srcmask
.eq(new_srcmask | bit_to_set
)
654 sync
+= new_dstmask
.eq(new_dstmask | bit_to_set
)
656 with m
.State("FETCH_PRED_SHIFT_MASK"):
657 # shift-out skipped mask bits
658 sync
+= self
.srcmask
.eq(new_srcmask
>> srcstep
)
659 sync
+= self
.dstmask
.eq(new_dstmask
>> dststep
)
660 m
.next
= "FETCH_PRED_DONE"
662 with m
.State("FETCH_PRED_DONE"):
663 comb
+= pred_mask_o_valid
.eq(1)
664 with m
.If(pred_mask_i_ready
):
665 m
.next
= "FETCH_PRED_IDLE"
667 def issue_fsm(self
, m
, core
, pc_changed
, sv_changed
, nia
,
668 dbg
, core_rst
, is_svp64_mode
,
669 fetch_pc_o_ready
, fetch_pc_i_valid
,
670 fetch_insn_o_valid
, fetch_insn_i_ready
,
671 pred_insn_i_valid
, pred_insn_o_ready
,
672 pred_mask_o_valid
, pred_mask_i_ready
,
673 exec_insn_i_valid
, exec_insn_o_ready
,
674 exec_pc_o_valid
, exec_pc_i_ready
):
677 decode / issue FSM. this interacts with the "fetch" FSM
678 through fetch_insn_ready/valid (incoming) and fetch_pc_ready/valid
679 (outgoing). also interacts with the "execute" FSM
680 through exec_insn_ready/valid (outgoing) and exec_pc_ready/valid
682 SVP64 RM prefixes have already been set up by the
683 "fetch" phase, so execute is fairly straightforward.
688 pdecode2
= self
.pdecode2
689 cur_state
= self
.cur_state
692 dec_opcode_i
= pdecode2
.dec
.raw_opcode_in
# raw opcode
694 # for updating svstate (things like srcstep etc.)
695 update_svstate
= Signal() # set this (below) if updating
696 new_svstate
= SVSTATERec("new_svstate")
697 comb
+= new_svstate
.eq(cur_state
.svstate
)
699 # precalculate srcstep+1 and dststep+1
700 cur_srcstep
= cur_state
.svstate
.srcstep
701 cur_dststep
= cur_state
.svstate
.dststep
702 next_srcstep
= Signal
.like(cur_srcstep
)
703 next_dststep
= Signal
.like(cur_dststep
)
704 comb
+= next_srcstep
.eq(cur_state
.svstate
.srcstep
+1)
705 comb
+= next_dststep
.eq(cur_state
.svstate
.dststep
+1)
707 # note if an exception happened. in a pipelined or OoO design
708 # this needs to be accompanied by "shadowing" (or stalling)
709 exc_happened
= self
.core
.o
.exc_happened
710 # also note instruction fetch failed
711 if hasattr(core
, "icache"):
712 fetch_failed
= core
.icache
.i_out
.fetch_failed
714 # set to fault in decoder
715 # update (highest priority) instruction fault
716 rising_fetch_failed
= rising_edge(m
, fetch_failed
)
717 with m
.If(rising_fetch_failed
):
718 sync
+= pdecode2
.instr_fault
.eq(1)
720 fetch_failed
= Const(0, 1)
723 with m
.FSM(name
="issue_fsm"):
725 # sync with the "fetch" phase which is reading the instruction
726 # at this point, there is no instruction running, that
727 # could inadvertently update the PC.
728 with m
.State("ISSUE_START"):
729 # reset instruction fault
730 sync
+= pdecode2
.instr_fault
.eq(0)
731 # wait on "core stop" release, before next fetch
732 # need to do this here, in case we are in a VL==0 loop
733 with m
.If(~dbg
.core_stop_o
& ~core_rst
):
734 comb
+= fetch_pc_i_valid
.eq(1) # tell fetch to start
735 with m
.If(fetch_pc_o_ready
): # fetch acknowledged us
738 # tell core it's stopped, and acknowledge debug handshake
739 comb
+= dbg
.core_stopped_i
.eq(1)
740 # while stopped, allow updating the PC and SVSTATE
741 with m
.If(self
.pc_i
.ok
):
742 comb
+= self
.state_w_pc
.wen
.eq(1 << StateRegs
.PC
)
743 comb
+= self
.state_w_pc
.i_data
.eq(self
.pc_i
.data
)
744 sync
+= pc_changed
.eq(1)
745 with m
.If(self
.svstate_i
.ok
):
746 comb
+= new_svstate
.eq(self
.svstate_i
.data
)
747 comb
+= update_svstate
.eq(1)
748 sync
+= sv_changed
.eq(1)
750 # wait for an instruction to arrive from Fetch
751 with m
.State("INSN_WAIT"):
752 if self
.allow_overlap
:
753 stopping
= dbg
.stopping_o
757 # stopping: jump back to idle
758 m
.next
= "ISSUE_START"
760 # request the icache to stop asserting "failed"
761 comb
+= core
.icache
.flush_in
.eq(1)
762 # stop instruction fault
763 sync
+= pdecode2
.instr_fault
.eq(0)
765 comb
+= fetch_insn_i_ready
.eq(1)
766 with m
.If(fetch_insn_o_valid
):
767 # loop into ISSUE_START if it's a SVP64 instruction
768 # and VL == 0. this because VL==0 is a for-loop
769 # from 0 to 0 i.e. always, always a NOP.
770 cur_vl
= cur_state
.svstate
.vl
771 with m
.If(is_svp64_mode
& (cur_vl
== 0)):
772 # update the PC before fetching the next instruction
773 # since we are in a VL==0 loop, no instruction was
774 # executed that we could be overwriting
775 comb
+= self
.state_w_pc
.wen
.eq(1 << StateRegs
.PC
)
776 comb
+= self
.state_w_pc
.i_data
.eq(nia
)
777 comb
+= self
.insn_done
.eq(1)
778 m
.next
= "ISSUE_START"
781 m
.next
= "PRED_START" # fetching predicate
783 m
.next
= "DECODE_SV" # skip predication
785 with m
.State("PRED_START"):
786 comb
+= pred_insn_i_valid
.eq(1) # tell fetch_pred to start
787 with m
.If(pred_insn_o_ready
): # fetch_pred acknowledged us
790 with m
.State("MASK_WAIT"):
791 comb
+= pred_mask_i_ready
.eq(1) # ready to receive the masks
792 with m
.If(pred_mask_o_valid
): # predication masks are ready
795 # skip zeros in predicate
796 with m
.State("PRED_SKIP"):
797 with m
.If(~is_svp64_mode
):
798 m
.next
= "DECODE_SV" # nothing to do
801 pred_src_zero
= pdecode2
.rm_dec
.pred_sz
802 pred_dst_zero
= pdecode2
.rm_dec
.pred_dz
804 # new srcstep, after skipping zeros
805 skip_srcstep
= Signal
.like(cur_srcstep
)
806 # value to be added to the current srcstep
807 src_delta
= Signal
.like(cur_srcstep
)
808 # add leading zeros to srcstep, if not in zero mode
809 with m
.If(~pred_src_zero
):
810 # priority encoder (count leading zeros)
811 # append guard bit, in case the mask is all zeros
812 pri_enc_src
= PriorityEncoder(65)
813 m
.submodules
.pri_enc_src
= pri_enc_src
814 comb
+= pri_enc_src
.i
.eq(Cat(self
.srcmask
,
816 comb
+= src_delta
.eq(pri_enc_src
.o
)
817 # apply delta to srcstep
818 comb
+= skip_srcstep
.eq(cur_srcstep
+ src_delta
)
819 # shift-out all leading zeros from the mask
820 # plus the leading "one" bit
821 # TODO count leading zeros and shift-out the zero
822 # bits, in the same step, in hardware
823 sync
+= self
.srcmask
.eq(self
.srcmask
>> (src_delta
+1))
825 # same as above, but for dststep
826 skip_dststep
= Signal
.like(cur_dststep
)
827 dst_delta
= Signal
.like(cur_dststep
)
828 with m
.If(~pred_dst_zero
):
829 pri_enc_dst
= PriorityEncoder(65)
830 m
.submodules
.pri_enc_dst
= pri_enc_dst
831 comb
+= pri_enc_dst
.i
.eq(Cat(self
.dstmask
,
833 comb
+= dst_delta
.eq(pri_enc_dst
.o
)
834 comb
+= skip_dststep
.eq(cur_dststep
+ dst_delta
)
835 sync
+= self
.dstmask
.eq(self
.dstmask
>> (dst_delta
+1))
837 # TODO: initialize mask[VL]=1 to avoid passing past VL
838 with m
.If((skip_srcstep
>= cur_vl
) |
839 (skip_dststep
>= cur_vl
)):
840 # end of VL loop. Update PC and reset src/dst step
841 comb
+= self
.state_w_pc
.wen
.eq(1 << StateRegs
.PC
)
842 comb
+= self
.state_w_pc
.i_data
.eq(nia
)
843 comb
+= new_svstate
.srcstep
.eq(0)
844 comb
+= new_svstate
.dststep
.eq(0)
845 comb
+= update_svstate
.eq(1)
846 # synchronize with the simulator
847 comb
+= self
.insn_done
.eq(1)
849 m
.next
= "ISSUE_START"
851 # update new src/dst step
852 comb
+= new_svstate
.srcstep
.eq(skip_srcstep
)
853 comb
+= new_svstate
.dststep
.eq(skip_dststep
)
854 comb
+= update_svstate
.eq(1)
858 # pass predicate mask bits through to satellite decoders
859 # TODO: for SIMD this will be *multiple* bits
860 sync
+= core
.i
.sv_pred_sm
.eq(self
.srcmask
[0])
861 sync
+= core
.i
.sv_pred_dm
.eq(self
.dstmask
[0])
863 # after src/dst step have been updated, we are ready
864 # to decode the instruction
865 with m
.State("DECODE_SV"):
866 # decode the instruction
867 with m
.If(~fetch_failed
):
868 sync
+= pdecode2
.instr_fault
.eq(0)
869 sync
+= core
.i
.e
.eq(pdecode2
.e
)
870 sync
+= core
.i
.state
.eq(cur_state
)
871 sync
+= core
.i
.raw_insn_i
.eq(dec_opcode_i
)
872 sync
+= core
.i
.bigendian_i
.eq(self
.core_bigendian_i
)
874 sync
+= core
.i
.sv_rm
.eq(pdecode2
.sv_rm
)
875 # set RA_OR_ZERO detection in satellite decoders
876 sync
+= core
.i
.sv_a_nz
.eq(pdecode2
.sv_a_nz
)
877 # and svp64 detection
878 sync
+= core
.i
.is_svp64_mode
.eq(is_svp64_mode
)
879 # and svp64 bit-rev'd ldst mode
880 ldst_dec
= pdecode2
.use_svp64_ldst_dec
881 sync
+= core
.i
.use_svp64_ldst_dec
.eq(ldst_dec
)
882 # after decoding, reset any previous exception condition,
883 # allowing it to be set again during the next execution
884 sync
+= pdecode2
.ldst_exc
.eq(0)
886 m
.next
= "INSN_EXECUTE" # move to "execute"
888 # handshake with execution FSM, move to "wait" once acknowledged
889 with m
.State("INSN_EXECUTE"):
890 comb
+= exec_insn_i_valid
.eq(1) # trigger execute
891 with m
.If(exec_insn_o_ready
): # execute acknowledged us
892 m
.next
= "EXECUTE_WAIT"
894 with m
.State("EXECUTE_WAIT"):
895 # wait on "core stop" release, at instruction end
896 # need to do this here, in case we are in a VL>1 loop
897 with m
.If(~dbg
.core_stop_o
& ~core_rst
):
898 comb
+= exec_pc_i_ready
.eq(1)
899 # see https://bugs.libre-soc.org/show_bug.cgi?id=636
900 # the exception info needs to be blatted into
901 # pdecode.ldst_exc, and the instruction "re-run".
902 # when ldst_exc.happened is set, the PowerDecoder2
903 # reacts very differently: it re-writes the instruction
904 # with a "trap" (calls PowerDecoder2.trap()) which
905 # will *overwrite* whatever was requested and jump the
906 # PC to the exception address, as well as alter MSR.
907 # nothing else needs to be done other than to note
908 # the change of PC and MSR (and, later, SVSTATE)
909 with m
.If(exc_happened
):
910 mmu
= core
.fus
.get_exc("mmu0")
911 ldst
= core
.fus
.get_exc("ldst0")
913 with m
.If(fetch_failed
):
914 # instruction fetch: exception is from MMU
915 # reset instr_fault (highest priority)
916 sync
+= pdecode2
.ldst_exc
.eq(mmu
)
917 sync
+= pdecode2
.instr_fault
.eq(0)
919 # request icache to stop asserting "failed"
920 comb
+= core
.icache
.flush_in
.eq(1)
921 with m
.If(~fetch_failed
):
922 # otherwise assume it was a LDST exception
923 sync
+= pdecode2
.ldst_exc
.eq(ldst
)
925 with m
.If(exec_pc_o_valid
):
927 # was this the last loop iteration?
929 cur_vl
= cur_state
.svstate
.vl
930 comb
+= is_last
.eq(next_srcstep
== cur_vl
)
932 # return directly to Decode if Execute generated an
934 with m
.If(pdecode2
.ldst_exc
.happened
):
937 # if either PC or SVSTATE were changed by the previous
938 # instruction, go directly back to Fetch, without
939 # updating either PC or SVSTATE
940 with m
.Elif(pc_changed | sv_changed
):
941 m
.next
= "ISSUE_START"
943 # also return to Fetch, when no output was a vector
944 # (regardless of SRCSTEP and VL), or when the last
945 # instruction was really the last one of the VL loop
946 with m
.Elif((~pdecode2
.loop_continue
) | is_last
):
947 # before going back to fetch, update the PC state
948 # register with the NIA.
949 # ok here we are not reading the branch unit.
950 # TODO: this just blithely overwrites whatever
951 # pipeline updated the PC
952 comb
+= self
.state_w_pc
.wen
.eq(1 << StateRegs
.PC
)
953 comb
+= self
.state_w_pc
.i_data
.eq(nia
)
954 # reset SRCSTEP before returning to Fetch
956 with m
.If(pdecode2
.loop_continue
):
957 comb
+= new_svstate
.srcstep
.eq(0)
958 comb
+= new_svstate
.dststep
.eq(0)
959 comb
+= update_svstate
.eq(1)
961 comb
+= new_svstate
.srcstep
.eq(0)
962 comb
+= new_svstate
.dststep
.eq(0)
963 comb
+= update_svstate
.eq(1)
964 m
.next
= "ISSUE_START"
966 # returning to Execute? then, first update SRCSTEP
968 comb
+= new_svstate
.srcstep
.eq(next_srcstep
)
969 comb
+= new_svstate
.dststep
.eq(next_dststep
)
970 comb
+= update_svstate
.eq(1)
971 # return to mask skip loop
975 comb
+= dbg
.core_stopped_i
.eq(1)
977 # request the icache to stop asserting "failed"
978 comb
+= core
.icache
.flush_in
.eq(1)
979 # stop instruction fault
980 sync
+= pdecode2
.instr_fault
.eq(0)
982 # request the icache to stop asserting "failed"
983 comb
+= core
.icache
.flush_in
.eq(1)
984 # stop instruction fault
985 sync
+= pdecode2
.instr_fault
.eq(0)
986 # while stopped, allow updating the PC and SVSTATE
987 with m
.If(self
.pc_i
.ok
):
988 comb
+= self
.state_w_pc
.wen
.eq(1 << StateRegs
.PC
)
989 comb
+= self
.state_w_pc
.i_data
.eq(self
.pc_i
.data
)
990 sync
+= pc_changed
.eq(1)
991 with m
.If(self
.svstate_i
.ok
):
992 comb
+= new_svstate
.eq(self
.svstate_i
.data
)
993 comb
+= update_svstate
.eq(1)
994 sync
+= sv_changed
.eq(1)
996 # check if svstate needs updating: if so, write it to State Regfile
997 with m
.If(update_svstate
):
998 comb
+= self
.state_w_sv
.wen
.eq(1 << StateRegs
.SVSTATE
)
999 comb
+= self
.state_w_sv
.i_data
.eq(new_svstate
)
1000 sync
+= cur_state
.svstate
.eq(new_svstate
) # for next clock
1002 def execute_fsm(self
, m
, core
, pc_changed
, sv_changed
,
1003 exec_insn_i_valid
, exec_insn_o_ready
,
1004 exec_pc_o_valid
, exec_pc_i_ready
):
1007 execute FSM. this interacts with the "issue" FSM
1008 through exec_insn_ready/valid (incoming) and exec_pc_ready/valid
1009 (outgoing). SVP64 RM prefixes have already been set up by the
1010 "issue" phase, so execute is fairly straightforward.
1015 pdecode2
= self
.pdecode2
1018 core_busy_o
= core
.n
.o_data
.busy_o
# core is busy
1019 core_ivalid_i
= core
.p
.i_valid
# instruction is valid
1021 if hasattr(core
, "icache"):
1022 fetch_failed
= core
.icache
.i_out
.fetch_failed
1024 fetch_failed
= Const(0, 1)
1026 with m
.FSM(name
="exec_fsm"):
1028 # waiting for instruction bus (stays there until not busy)
1029 with m
.State("INSN_START"):
1030 comb
+= exec_insn_o_ready
.eq(1)
1031 with m
.If(exec_insn_i_valid
):
1032 comb
+= core_ivalid_i
.eq(1) # instruction is valid/issued
1033 sync
+= sv_changed
.eq(0)
1034 sync
+= pc_changed
.eq(0)
1035 with m
.If(core
.p
.o_ready
): # only move if accepted
1036 m
.next
= "INSN_ACTIVE" # move to "wait completion"
1038 # instruction started: must wait till it finishes
1039 with m
.State("INSN_ACTIVE"):
1040 # note changes to PC and SVSTATE
1041 with m
.If(self
.state_nia
.wen
& (1 << StateRegs
.SVSTATE
)):
1042 sync
+= sv_changed
.eq(1)
1043 with m
.If(self
.state_nia
.wen
& (1 << StateRegs
.PC
)):
1044 sync
+= pc_changed
.eq(1)
1045 with m
.If(~core_busy_o
): # instruction done!
1046 comb
+= exec_pc_o_valid
.eq(1)
1047 with m
.If(exec_pc_i_ready
):
1048 # when finished, indicate "done".
1049 # however, if there was an exception, the instruction
1050 # is *not* yet done. this is an implementation
1051 # detail: we choose to implement exceptions by
1052 # taking the exception information from the LDST
1053 # unit, putting that *back* into the PowerDecoder2,
1054 # and *re-running the entire instruction*.
1055 # if we erroneously indicate "done" here, it is as if
1056 # there were *TWO* instructions:
1057 # 1) the failed LDST 2) a TRAP.
1058 with m
.If(~pdecode2
.ldst_exc
.happened
&
1060 comb
+= self
.insn_done
.eq(1)
1061 m
.next
= "INSN_START" # back to fetch
1063 def setup_peripherals(self
, m
):
1064 comb
, sync
= m
.d
.comb
, m
.d
.sync
1066 # okaaaay so the debug module must be in coresync clock domain
1067 # but NOT its reset signal. to cope with this, set every single
1068 # submodule explicitly in coresync domain, debug and JTAG
1069 # in their own one but using *external* reset.
1070 csd
= DomainRenamer("coresync")
1071 dbd
= DomainRenamer(self
.dbg_domain
)
1073 m
.submodules
.core
= core
= csd(self
.core
)
1074 # this _so_ needs sorting out. ICache is added down inside
1075 # LoadStore1 and is already a submodule of LoadStore1
1076 if not isinstance(self
.imem
, ICache
):
1077 m
.submodules
.imem
= imem
= csd(self
.imem
)
1078 m
.submodules
.dbg
= dbg
= dbd(self
.dbg
)
1080 m
.submodules
.jtag
= jtag
= dbd(self
.jtag
)
1081 # TODO: UART2GDB mux, here, from external pin
1082 # see https://bugs.libre-soc.org/show_bug.cgi?id=499
1083 sync
+= dbg
.dmi
.connect_to(jtag
.dmi
)
1085 cur_state
= self
.cur_state
1087 # 4x 4k SRAM blocks. these simply "exist", they get routed in litex
1089 for i
, sram
in enumerate(self
.sram4k
):
1090 m
.submodules
["sram4k_%d" % i
] = csd(sram
)
1091 comb
+= sram
.enable
.eq(self
.wb_sram_en
)
1093 # XICS interrupt handler
1095 m
.submodules
.xics_icp
= icp
= csd(self
.xics_icp
)
1096 m
.submodules
.xics_ics
= ics
= csd(self
.xics_ics
)
1097 comb
+= icp
.ics_i
.eq(ics
.icp_o
) # connect ICS to ICP
1098 sync
+= cur_state
.eint
.eq(icp
.core_irq_o
) # connect ICP to core
1100 # GPIO test peripheral
1102 m
.submodules
.simple_gpio
= simple_gpio
= csd(self
.simple_gpio
)
1104 # connect one GPIO output to ICS bit 15 (like in microwatt soc.vhdl)
1105 # XXX causes litex ECP5 test to get wrong idea about input and output
1106 # (but works with verilator sim *sigh*)
1107 # if self.gpio and self.xics:
1108 # comb += self.int_level_i[15].eq(simple_gpio.gpio_o[0])
1110 # instruction decoder
1111 pdecode
= create_pdecode()
1112 m
.submodules
.dec2
= pdecode2
= csd(self
.pdecode2
)
1114 m
.submodules
.svp64
= svp64
= csd(self
.svp64
)
1117 dmi
, d_reg
, d_cr
, d_xer
, = dbg
.dmi
, dbg
.d_gpr
, dbg
.d_cr
, dbg
.d_xer
1118 intrf
= self
.core
.regs
.rf
['int']
1120 # clock delay power-on reset
1121 cd_por
= ClockDomain(reset_less
=True)
1122 cd_sync
= ClockDomain()
1123 core_sync
= ClockDomain("coresync")
1124 m
.domains
+= cd_por
, cd_sync
, core_sync
1125 if self
.dbg_domain
!= "sync":
1126 dbg_sync
= ClockDomain(self
.dbg_domain
)
1127 m
.domains
+= dbg_sync
1129 ti_rst
= Signal(reset_less
=True)
1130 delay
= Signal(range(4), reset
=3)
1131 with m
.If(delay
!= 0):
1132 m
.d
.por
+= delay
.eq(delay
- 1)
1133 comb
+= cd_por
.clk
.eq(ClockSignal())
1135 # power-on reset delay
1136 core_rst
= ResetSignal("coresync")
1137 comb
+= ti_rst
.eq(delay
!= 0 | dbg
.core_rst_o |
ResetSignal())
1138 comb
+= core_rst
.eq(ti_rst
)
1140 # debug clock is same as coresync, but reset is *main external*
1141 if self
.dbg_domain
!= "sync":
1142 dbg_rst
= ResetSignal(self
.dbg_domain
)
1143 comb
+= dbg_rst
.eq(ResetSignal())
1145 # busy/halted signals from core
1146 core_busy_o
= ~core
.p
.o_ready | core
.n
.o_data
.busy_o
# core is busy
1147 comb
+= self
.busy_o
.eq(core_busy_o
)
1148 comb
+= pdecode2
.dec
.bigendian
.eq(self
.core_bigendian_i
)
1150 # temporary hack: says "go" immediately for both address gen and ST
1152 ldst
= core
.fus
.fus
['ldst0']
1153 st_go_edge
= rising_edge(m
, ldst
.st
.rel_o
)
1154 # link addr-go direct to rel
1155 m
.d
.comb
+= ldst
.ad
.go_i
.eq(ldst
.ad
.rel_o
)
1156 m
.d
.comb
+= ldst
.st
.go_i
.eq(st_go_edge
) # link store-go to rising rel
1158 def elaborate(self
, platform
):
1161 comb
, sync
= m
.d
.comb
, m
.d
.sync
1162 cur_state
= self
.cur_state
1163 pdecode2
= self
.pdecode2
1167 # set up peripherals and core
1168 core_rst
= self
.core_rst
1169 self
.setup_peripherals(m
)
1171 # reset current state if core reset requested
1172 with m
.If(core_rst
):
1173 m
.d
.sync
+= self
.cur_state
.eq(0)
1175 # PC and instruction from I-Memory
1176 comb
+= self
.pc_o
.eq(cur_state
.pc
)
1177 pc_changed
= Signal() # note write to PC
1178 sv_changed
= Signal() # note write to SVSTATE
1180 # indicate to outside world if any FU is still executing
1181 comb
+= self
.any_busy
.eq(core
.n
.o_data
.any_busy_o
) # any FU executing
1183 # read state either from incoming override or from regfile
1184 # TODO: really should be doing MSR in the same way
1185 pc
= state_get(m
, core_rst
, self
.pc_i
,
1187 self
.state_r_pc
, StateRegs
.PC
)
1188 svstate
= state_get(m
, core_rst
, self
.svstate_i
,
1189 "svstate", # read SVSTATE
1190 self
.state_r_sv
, StateRegs
.SVSTATE
)
1192 # don't write pc every cycle
1193 comb
+= self
.state_w_pc
.wen
.eq(0)
1194 comb
+= self
.state_w_pc
.i_data
.eq(0)
1196 # address of the next instruction, in the absence of a branch
1197 # depends on the instruction size
1200 # connect up debug signals
1201 # TODO comb += core.icache_rst_i.eq(dbg.icache_rst_o)
1202 comb
+= dbg
.terminate_i
.eq(core
.o
.core_terminate_o
)
1203 comb
+= dbg
.state
.pc
.eq(pc
)
1204 comb
+= dbg
.state
.svstate
.eq(svstate
)
1205 comb
+= dbg
.state
.msr
.eq(cur_state
.msr
)
1207 # pass the prefix mode from Fetch to Issue, so the latter can loop
1209 is_svp64_mode
= Signal()
1211 # there are *THREE^WFOUR-if-SVP64-enabled* FSMs, fetch (32/64-bit)
1212 # issue, decode/execute, now joined by "Predicate fetch/calculate".
1213 # these are the handshake signals between each
1215 # fetch FSM can run as soon as the PC is valid
1216 fetch_pc_i_valid
= Signal() # Execute tells Fetch "start next read"
1217 fetch_pc_o_ready
= Signal() # Fetch Tells SVSTATE "proceed"
1219 # fetch FSM hands over the instruction to be decoded / issued
1220 fetch_insn_o_valid
= Signal()
1221 fetch_insn_i_ready
= Signal()
1223 # predicate fetch FSM decodes and fetches the predicate
1224 pred_insn_i_valid
= Signal()
1225 pred_insn_o_ready
= Signal()
1227 # predicate fetch FSM delivers the masks
1228 pred_mask_o_valid
= Signal()
1229 pred_mask_i_ready
= Signal()
1231 # issue FSM delivers the instruction to the be executed
1232 exec_insn_i_valid
= Signal()
1233 exec_insn_o_ready
= Signal()
1235 # execute FSM, hands over the PC/SVSTATE back to the issue FSM
1236 exec_pc_o_valid
= Signal()
1237 exec_pc_i_ready
= Signal()
1239 # the FSMs here are perhaps unusual in that they detect conditions
1240 # then "hold" information, combinatorially, for the core
1241 # (as opposed to using sync - which would be on a clock's delay)
1242 # this includes the actual opcode, valid flags and so on.
1244 # Fetch, then predicate fetch, then Issue, then Execute.
1245 # Issue is where the VL for-loop # lives. the ready/valid
1246 # signalling is used to communicate between the four.
1249 fetch
= FetchFSM(self
.allow_overlap
, self
.svp64_en
,
1250 self
.imem
, core_rst
, pdecode2
, cur_state
,
1251 dbg
, core
, svstate
, nia
, is_svp64_mode
)
1252 m
.submodules
.fetch
= fetch
1253 # connect up in/out data to existing Signals
1254 comb
+= fetch
.p
.i_data
.pc
.eq(pc
)
1255 # and the ready/valid signalling
1256 comb
+= fetch_pc_o_ready
.eq(fetch
.p
.o_ready
)
1257 comb
+= fetch
.p
.i_valid
.eq(fetch_pc_i_valid
)
1258 comb
+= fetch_insn_o_valid
.eq(fetch
.n
.o_valid
)
1259 comb
+= fetch
.n
.i_ready
.eq(fetch_insn_i_ready
)
1261 self
.issue_fsm(m
, core
, pc_changed
, sv_changed
, nia
,
1262 dbg
, core_rst
, is_svp64_mode
,
1263 fetch_pc_o_ready
, fetch_pc_i_valid
,
1264 fetch_insn_o_valid
, fetch_insn_i_ready
,
1265 pred_insn_i_valid
, pred_insn_o_ready
,
1266 pred_mask_o_valid
, pred_mask_i_ready
,
1267 exec_insn_i_valid
, exec_insn_o_ready
,
1268 exec_pc_o_valid
, exec_pc_i_ready
)
1271 self
.fetch_predicate_fsm(m
,
1272 pred_insn_i_valid
, pred_insn_o_ready
,
1273 pred_mask_o_valid
, pred_mask_i_ready
)
1275 self
.execute_fsm(m
, core
, pc_changed
, sv_changed
,
1276 exec_insn_i_valid
, exec_insn_o_ready
,
1277 exec_pc_o_valid
, exec_pc_i_ready
)
1279 # this bit doesn't have to be in the FSM: connect up to read
1280 # regfiles on demand from DMI
1283 # DEC and TB inc/dec FSM. copy of DEC is put into CoreState,
1284 # (which uses that in PowerDecoder2 to raise 0x900 exception)
1285 self
.tb_dec_fsm(m
, cur_state
.dec
)
1289 def do_dmi(self
, m
, dbg
):
1290 """deals with DMI debug requests
1292 currently only provides read requests for the INT regfile, CR and XER
1293 it will later also deal with *writing* to these regfiles.
1297 dmi
, d_reg
, d_cr
, d_xer
, = dbg
.dmi
, dbg
.d_gpr
, dbg
.d_cr
, dbg
.d_xer
1298 intrf
= self
.core
.regs
.rf
['int']
1300 with m
.If(d_reg
.req
): # request for regfile access being made
1301 # TODO: error-check this
1302 # XXX should this be combinatorial? sync better?
1304 comb
+= self
.int_r
.ren
.eq(1 << d_reg
.addr
)
1306 comb
+= self
.int_r
.addr
.eq(d_reg
.addr
)
1307 comb
+= self
.int_r
.ren
.eq(1)
1308 d_reg_delay
= Signal()
1309 sync
+= d_reg_delay
.eq(d_reg
.req
)
1310 with m
.If(d_reg_delay
):
1311 # data arrives one clock later
1312 comb
+= d_reg
.data
.eq(self
.int_r
.o_data
)
1313 comb
+= d_reg
.ack
.eq(1)
1315 # sigh same thing for CR debug
1316 with m
.If(d_cr
.req
): # request for regfile access being made
1317 comb
+= self
.cr_r
.ren
.eq(0b11111111) # enable all
1318 d_cr_delay
= Signal()
1319 sync
+= d_cr_delay
.eq(d_cr
.req
)
1320 with m
.If(d_cr_delay
):
1321 # data arrives one clock later
1322 comb
+= d_cr
.data
.eq(self
.cr_r
.o_data
)
1323 comb
+= d_cr
.ack
.eq(1)
1326 with m
.If(d_xer
.req
): # request for regfile access being made
1327 comb
+= self
.xer_r
.ren
.eq(0b111111) # enable all
1328 d_xer_delay
= Signal()
1329 sync
+= d_xer_delay
.eq(d_xer
.req
)
1330 with m
.If(d_xer_delay
):
1331 # data arrives one clock later
1332 comb
+= d_xer
.data
.eq(self
.xer_r
.o_data
)
1333 comb
+= d_xer
.ack
.eq(1)
1335 def tb_dec_fsm(self
, m
, spr_dec
):
1338 this is a FSM for updating either dec or tb. it runs alternately
1339 DEC, TB, DEC, TB. note that SPR pipeline could have written a new
1340 value to DEC, however the regfile has "passthrough" on it so this
1343 see v3.0B p1097-1099 for Timeer Resource and p1065 and p1076
1346 comb
, sync
= m
.d
.comb
, m
.d
.sync
1347 fast_rf
= self
.core
.regs
.rf
['fast']
1348 fast_r_dectb
= fast_rf
.r_ports
['issue'] # DEC/TB
1349 fast_w_dectb
= fast_rf
.w_ports
['issue'] # DEC/TB
1351 with m
.FSM() as fsm
:
1353 # initiates read of current DEC
1354 with m
.State("DEC_READ"):
1355 comb
+= fast_r_dectb
.addr
.eq(FastRegs
.DEC
)
1356 comb
+= fast_r_dectb
.ren
.eq(1)
1357 m
.next
= "DEC_WRITE"
1359 # waits for DEC read to arrive (1 cycle), updates with new value
1360 with m
.State("DEC_WRITE"):
1361 new_dec
= Signal(64)
1362 # TODO: MSR.LPCR 32-bit decrement mode
1363 comb
+= new_dec
.eq(fast_r_dectb
.o_data
- 1)
1364 comb
+= fast_w_dectb
.addr
.eq(FastRegs
.DEC
)
1365 comb
+= fast_w_dectb
.wen
.eq(1)
1366 comb
+= fast_w_dectb
.i_data
.eq(new_dec
)
1367 sync
+= spr_dec
.eq(new_dec
) # copy into cur_state for decoder
1370 # initiates read of current TB
1371 with m
.State("TB_READ"):
1372 comb
+= fast_r_dectb
.addr
.eq(FastRegs
.TB
)
1373 comb
+= fast_r_dectb
.ren
.eq(1)
1376 # waits for read TB to arrive, initiates write of current TB
1377 with m
.State("TB_WRITE"):
1379 comb
+= new_tb
.eq(fast_r_dectb
.o_data
+ 1)
1380 comb
+= fast_w_dectb
.addr
.eq(FastRegs
.TB
)
1381 comb
+= fast_w_dectb
.wen
.eq(1)
1382 comb
+= fast_w_dectb
.i_data
.eq(new_tb
)
1388 yield from self
.pc_i
.ports()
1391 yield from self
.core
.ports()
1392 yield from self
.imem
.ports()
1393 yield self
.core_bigendian_i
1399 def external_ports(self
):
1400 ports
= self
.pc_i
.ports()
1401 ports
+= [self
.pc_o
, self
.memerr_o
, self
.core_bigendian_i
, self
.busy_o
,
1405 ports
+= list(self
.jtag
.external_ports())
1407 # don't add DMI if JTAG is enabled
1408 ports
+= list(self
.dbg
.dmi
.ports())
1410 ports
+= list(self
.imem
.ibus
.fields
.values())
1411 ports
+= list(self
.core
.l0
.cmpi
.wb_bus().fields
.values())
1414 for sram
in self
.sram4k
:
1415 ports
+= list(sram
.bus
.fields
.values())
1418 ports
+= list(self
.xics_icp
.bus
.fields
.values())
1419 ports
+= list(self
.xics_ics
.bus
.fields
.values())
1420 ports
.append(self
.int_level_i
)
1423 ports
+= list(self
.simple_gpio
.bus
.fields
.values())
1424 ports
.append(self
.gpio_o
)
1432 class TestIssuer(Elaboratable
):
1433 def __init__(self
, pspec
):
1434 self
.ti
= TestIssuerInternal(pspec
)
1435 self
.pll
= DummyPLL(instance
=True)
1437 # PLL direct clock or not
1438 self
.pll_en
= hasattr(pspec
, "use_pll") and pspec
.use_pll
1440 self
.pll_test_o
= Signal(reset_less
=True)
1441 self
.pll_vco_o
= Signal(reset_less
=True)
1442 self
.clk_sel_i
= Signal(2, reset_less
=True)
1443 self
.ref_clk
= ClockSignal() # can't rename it but that's ok
1444 self
.pllclk_clk
= ClockSignal("pllclk")
1446 def elaborate(self
, platform
):
1450 # TestIssuer nominally runs at main clock, actually it is
1451 # all combinatorial internally except for coresync'd components
1452 m
.submodules
.ti
= ti
= self
.ti
1455 # ClockSelect runs at PLL output internal clock rate
1456 m
.submodules
.wrappll
= pll
= self
.pll
1458 # add clock domains from PLL
1459 cd_pll
= ClockDomain("pllclk")
1462 # PLL clock established. has the side-effect of running clklsel
1463 # at the PLL's speed (see DomainRenamer("pllclk") above)
1464 pllclk
= self
.pllclk_clk
1465 comb
+= pllclk
.eq(pll
.clk_pll_o
)
1467 # wire up external 24mhz to PLL
1468 #comb += pll.clk_24_i.eq(self.ref_clk)
1469 # output 18 mhz PLL test signal, and analog oscillator out
1470 comb
+= self
.pll_test_o
.eq(pll
.pll_test_o
)
1471 comb
+= self
.pll_vco_o
.eq(pll
.pll_vco_o
)
1473 # input to pll clock selection
1474 comb
+= pll
.clk_sel_i
.eq(self
.clk_sel_i
)
1476 # now wire up ResetSignals. don't mind them being in this domain
1477 pll_rst
= ResetSignal("pllclk")
1478 comb
+= pll_rst
.eq(ResetSignal())
1480 # internal clock is set to selector clock-out. has the side-effect of
1481 # running TestIssuer at this speed (see DomainRenamer("intclk") above)
1482 # debug clock runs at coresync internal clock
1483 cd_coresync
= ClockDomain("coresync")
1484 #m.domains += cd_coresync
1485 if self
.ti
.dbg_domain
!= 'sync':
1486 cd_dbgsync
= ClockDomain("dbgsync")
1487 #m.domains += cd_dbgsync
1488 intclk
= ClockSignal("coresync")
1489 dbgclk
= ClockSignal(self
.ti
.dbg_domain
)
1490 # XXX BYPASS PLL XXX
1491 # XXX BYPASS PLL XXX
1492 # XXX BYPASS PLL XXX
1494 comb
+= intclk
.eq(self
.ref_clk
)
1496 comb
+= intclk
.eq(ClockSignal())
1497 if self
.ti
.dbg_domain
!= 'sync':
1498 dbgclk
= ClockSignal(self
.ti
.dbg_domain
)
1499 comb
+= dbgclk
.eq(intclk
)
1504 return list(self
.ti
.ports()) + list(self
.pll
.ports()) + \
1505 [ClockSignal(), ResetSignal()]
1507 def external_ports(self
):
1508 ports
= self
.ti
.external_ports()
1509 ports
.append(ClockSignal())
1510 ports
.append(ResetSignal())
1512 ports
.append(self
.clk_sel_i
)
1513 ports
.append(self
.pll
.clk_24_i
)
1514 ports
.append(self
.pll_test_o
)
1515 ports
.append(self
.pll_vco_o
)
1516 ports
.append(self
.pllclk_clk
)
1517 ports
.append(self
.ref_clk
)
1521 if __name__
== '__main__':
1522 units
= {'alu': 1, 'cr': 1, 'branch': 1, 'trap': 1, 'logical': 1,
1528 pspec
= TestMemPspec(ldst_ifacetype
='bare_wb',
1529 imem_ifacetype
='bare_wb',
1534 dut
= TestIssuer(pspec
)
1535 vl
= main(dut
, ports
=dut
.ports(), name
="test_issuer")
1537 if len(sys
.argv
) == 1:
1538 vl
= rtlil
.convert(dut
, ports
=dut
.external_ports(), name
="test_issuer")
1539 with
open("test_issuer.il", "w") as f
: