3 not in any way intended for production use. this runs a FSM that:
5 * reads the Program Counter from StateRegs
6 * reads an instruction from a fixed-size Test Memory
7 * issues it to the Simple Core
8 * waits for it to complete
10 * does it all over again
12 the purpose of this module is to verify the functional correctness
13 of the Function Units in the absolute simplest and clearest possible
14 way, and to at provide something that can be further incrementally
18 from nmigen
import (Elaboratable
, Module
, Signal
, ClockSignal
, ResetSignal
,
19 ClockDomain
, DomainRenamer
, Mux
, Const
, Repl
, Cat
)
20 from nmigen
.cli
import rtlil
21 from nmigen
.cli
import main
24 from nmutil
.singlepipe
import ControlBase
25 from soc
.simple
.core_data
import FetchOutput
, FetchInput
27 from nmigen
.lib
.coding
import PriorityEncoder
29 from openpower
.decoder
.power_decoder
import create_pdecode
30 from openpower
.decoder
.power_decoder2
import PowerDecode2
, SVP64PrefixDecoder
31 from openpower
.decoder
.decode2execute1
import IssuerDecode2ToOperand
32 from openpower
.decoder
.decode2execute1
import Data
33 from openpower
.decoder
.power_enums
import (MicrOp
, SVP64PredInt
, SVP64PredCR
,
35 from openpower
.state
import CoreState
36 from openpower
.consts
import (CR
, SVP64CROffs
)
37 from soc
.experiment
.testmem
import TestMemory
# test only for instructions
38 from soc
.regfile
.regfiles
import StateRegs
, FastRegs
39 from soc
.simple
.core
import NonProductionCore
40 from soc
.config
.test
.test_loadstore
import TestMemPspec
41 from soc
.config
.ifetch
import ConfigFetchUnit
42 from soc
.debug
.dmi
import CoreDebug
, DMIInterface
43 from soc
.debug
.jtag
import JTAG
44 from soc
.config
.pinouts
import get_pinspecs
45 from soc
.interrupts
.xics
import XICS_ICP
, XICS_ICS
46 from soc
.bus
.simple_gpio
import SimpleGPIO
47 from soc
.bus
.SPBlock512W64B8W
import SPBlock512W64B8W
48 from soc
.clock
.select
import ClockSelect
49 from soc
.clock
.dummypll
import DummyPLL
50 from openpower
.sv
.svstate
import SVSTATERec
51 from soc
.experiment
.icache
import ICache
53 from nmutil
.util
import rising_edge
56 def get_insn(f_instr_o
, pc
):
57 if f_instr_o
.width
== 32:
60 # 64-bit: bit 2 of pc decides which word to select
61 return f_instr_o
.word_select(pc
[2], 32)
63 # gets state input or reads from state regfile
66 def state_get(m
, core_rst
, state_i
, name
, regfile
, regnum
):
70 res
= Signal(64, reset_less
=True, name
=name
)
71 res_ok_delay
= Signal(name
="%s_ok_delay" % name
)
73 sync
+= res_ok_delay
.eq(~state_i
.ok
)
74 with m
.If(state_i
.ok
):
75 # incoming override (start from pc_i)
76 comb
+= res
.eq(state_i
.data
)
78 # otherwise read StateRegs regfile for PC...
79 comb
+= regfile
.ren
.eq(1 << regnum
)
80 # ... but on a 1-clock delay
81 with m
.If(res_ok_delay
):
82 comb
+= res
.eq(regfile
.o_data
)
86 def get_predint(m
, mask
, name
):
87 """decode SVP64 predicate integer mask field to reg number and invert
88 this is identical to the equivalent function in ISACaller except that
89 it doesn't read the INT directly, it just decodes "what needs to be done"
90 i.e. which INT reg, whether it is shifted and whether it is bit-inverted.
92 * all1s is set to indicate that no mask is to be applied.
93 * regread indicates the GPR register number to be read
94 * invert is set to indicate that the register value is to be inverted
95 * unary indicates that the contents of the register is to be shifted 1<<r3
98 regread
= Signal(5, name
=name
+"regread")
99 invert
= Signal(name
=name
+"invert")
100 unary
= Signal(name
=name
+"unary")
101 all1s
= Signal(name
=name
+"all1s")
103 with m
.Case(SVP64PredInt
.ALWAYS
.value
):
104 comb
+= all1s
.eq(1) # use 0b1111 (all ones)
105 with m
.Case(SVP64PredInt
.R3_UNARY
.value
):
106 comb
+= regread
.eq(3)
107 comb
+= unary
.eq(1) # 1<<r3 - shift r3 (single bit)
108 with m
.Case(SVP64PredInt
.R3
.value
):
109 comb
+= regread
.eq(3)
110 with m
.Case(SVP64PredInt
.R3_N
.value
):
111 comb
+= regread
.eq(3)
113 with m
.Case(SVP64PredInt
.R10
.value
):
114 comb
+= regread
.eq(10)
115 with m
.Case(SVP64PredInt
.R10_N
.value
):
116 comb
+= regread
.eq(10)
118 with m
.Case(SVP64PredInt
.R30
.value
):
119 comb
+= regread
.eq(30)
120 with m
.Case(SVP64PredInt
.R30_N
.value
):
121 comb
+= regread
.eq(30)
123 return regread
, invert
, unary
, all1s
126 def get_predcr(m
, mask
, name
):
127 """decode SVP64 predicate CR to reg number field and invert status
128 this is identical to _get_predcr in ISACaller
131 idx
= Signal(2, name
=name
+"idx")
132 invert
= Signal(name
=name
+"crinvert")
134 with m
.Case(SVP64PredCR
.LT
.value
):
135 comb
+= idx
.eq(CR
.LT
)
137 with m
.Case(SVP64PredCR
.GE
.value
):
138 comb
+= idx
.eq(CR
.LT
)
140 with m
.Case(SVP64PredCR
.GT
.value
):
141 comb
+= idx
.eq(CR
.GT
)
143 with m
.Case(SVP64PredCR
.LE
.value
):
144 comb
+= idx
.eq(CR
.GT
)
146 with m
.Case(SVP64PredCR
.EQ
.value
):
147 comb
+= idx
.eq(CR
.EQ
)
149 with m
.Case(SVP64PredCR
.NE
.value
):
150 comb
+= idx
.eq(CR
.EQ
)
152 with m
.Case(SVP64PredCR
.SO
.value
):
153 comb
+= idx
.eq(CR
.SO
)
155 with m
.Case(SVP64PredCR
.NS
.value
):
156 comb
+= idx
.eq(CR
.SO
)
161 # Fetch Finite State Machine.
162 # WARNING: there are currently DriverConflicts but it's actually working.
163 # TODO, here: everything that is global in nature, information from the
164 # main TestIssuerInternal, needs to move to either ispec() or ospec().
165 # not only that: TestIssuerInternal.imem can entirely move into here
166 # because imem is only ever accessed inside the FetchFSM.
167 class FetchFSM(ControlBase
):
168 def __init__(self
, allow_overlap
, svp64_en
, imem
, core_rst
,
170 dbg
, core
, svstate
, nia
, is_svp64_mode
):
171 self
.allow_overlap
= allow_overlap
172 self
.svp64_en
= svp64_en
174 self
.core_rst
= core_rst
175 self
.pdecode2
= pdecode2
176 self
.cur_state
= cur_state
179 self
.svstate
= svstate
181 self
.is_svp64_mode
= is_svp64_mode
183 # set up pipeline ControlBase and allocate i/o specs
184 # (unusual: normally done by the Pipeline API)
185 super().__init
__(stage
=self
)
186 self
.p
.i_data
, self
.n
.o_data
= self
.new_specs(None)
187 self
.i
, self
.o
= self
.p
.i_data
, self
.n
.o_data
189 # next 3 functions are Stage API Compliance
190 def setup(self
, m
, i
):
199 def elaborate(self
, platform
):
202 this FSM performs fetch of raw instruction data, partial-decodes
203 it 32-bit at a time to detect SVP64 prefixes, and will optionally
204 read a 2nd 32-bit quantity if that occurs.
206 m
= super().elaborate(platform
)
211 svstate
= self
.svstate
213 is_svp64_mode
= self
.is_svp64_mode
214 fetch_pc_o_ready
= self
.p
.o_ready
215 fetch_pc_i_valid
= self
.p
.i_valid
216 fetch_insn_o_valid
= self
.n
.o_valid
217 fetch_insn_i_ready
= self
.n
.i_ready
221 pdecode2
= self
.pdecode2
222 cur_state
= self
.cur_state
223 dec_opcode_o
= pdecode2
.dec
.raw_opcode_in
# raw opcode
225 msr_read
= Signal(reset
=1)
227 # also note instruction fetch failed
228 if hasattr(core
, "icache"):
229 fetch_failed
= core
.icache
.i_out
.fetch_failed
231 fetch_failed
= Const(0, 1)
233 # don't read msr every cycle
234 staterf
= self
.core
.regs
.rf
['state']
235 state_r_msr
= staterf
.r_ports
['msr'] # MSR rd
237 comb
+= state_r_msr
.ren
.eq(0)
239 with m
.FSM(name
='fetch_fsm'):
242 with m
.State("IDLE"):
243 with m
.If(~dbg
.stopping_o
& ~fetch_failed
):
244 comb
+= fetch_pc_o_ready
.eq(1)
245 with m
.If(fetch_pc_i_valid
& ~fetch_failed
):
246 # instruction allowed to go: start by reading the PC
247 # capture the PC and also drop it into Insn Memory
248 # we have joined a pair of combinatorial memory
249 # lookups together. this is Generally Bad.
250 comb
+= self
.imem
.a_pc_i
.eq(pc
)
251 comb
+= self
.imem
.a_i_valid
.eq(1)
252 comb
+= self
.imem
.f_i_valid
.eq(1)
253 sync
+= cur_state
.pc
.eq(pc
)
254 sync
+= cur_state
.svstate
.eq(svstate
) # and svstate
256 # initiate read of MSR. arrives one clock later
257 comb
+= state_r_msr
.ren
.eq(1 << StateRegs
.MSR
)
258 sync
+= msr_read
.eq(0)
260 m
.next
= "INSN_READ" # move to "wait for bus" phase
262 # dummy pause to find out why simulation is not keeping up
263 with m
.State("INSN_READ"):
264 if self
.allow_overlap
:
265 stopping
= dbg
.stopping_o
269 # stopping: jump back to idle
272 # one cycle later, msr/sv read arrives. valid only once.
273 with m
.If(~msr_read
):
274 sync
+= msr_read
.eq(1) # yeah don't read it again
275 sync
+= cur_state
.msr
.eq(state_r_msr
.o_data
)
276 with m
.If(self
.imem
.f_busy_o
& ~fetch_failed
): # zzz...
277 # busy but not fetch failed: stay in wait-read
278 comb
+= self
.imem
.a_i_valid
.eq(1)
279 comb
+= self
.imem
.f_i_valid
.eq(1)
281 # not busy (or fetch failed!): instruction fetched
282 # when fetch failed, the instruction gets ignored
284 insn
= get_insn(self
.imem
.f_instr_o
, cur_state
.pc
)
287 # decode the SVP64 prefix, if any
288 comb
+= svp64
.raw_opcode_in
.eq(insn
)
289 comb
+= svp64
.bigendian
.eq(self
.core_bigendian_i
)
290 # pass the decoded prefix (if any) to PowerDecoder2
291 sync
+= pdecode2
.sv_rm
.eq(svp64
.svp64_rm
)
292 sync
+= pdecode2
.is_svp64_mode
.eq(is_svp64_mode
)
293 # remember whether this is a prefixed instruction,
294 # so the FSM can readily loop when VL==0
295 sync
+= is_svp64_mode
.eq(svp64
.is_svp64_mode
)
296 # calculate the address of the following instruction
297 insn_size
= Mux(svp64
.is_svp64_mode
, 8, 4)
298 sync
+= nia
.eq(cur_state
.pc
+ insn_size
)
299 with m
.If(~svp64
.is_svp64_mode
):
300 # with no prefix, store the instruction
301 # and hand it directly to the next FSM
302 sync
+= dec_opcode_o
.eq(insn
)
303 m
.next
= "INSN_READY"
305 # fetch the rest of the instruction from memory
306 comb
+= self
.imem
.a_pc_i
.eq(cur_state
.pc
+ 4)
307 comb
+= self
.imem
.a_i_valid
.eq(1)
308 comb
+= self
.imem
.f_i_valid
.eq(1)
309 m
.next
= "INSN_READ2"
311 # not SVP64 - 32-bit only
312 sync
+= nia
.eq(cur_state
.pc
+ 4)
313 sync
+= dec_opcode_o
.eq(insn
)
314 m
.next
= "INSN_READY"
316 with m
.State("INSN_READ2"):
317 with m
.If(self
.imem
.f_busy_o
): # zzz...
318 # busy: stay in wait-read
319 comb
+= self
.imem
.a_i_valid
.eq(1)
320 comb
+= self
.imem
.f_i_valid
.eq(1)
322 # not busy: instruction fetched
323 insn
= get_insn(self
.imem
.f_instr_o
, cur_state
.pc
+4)
324 sync
+= dec_opcode_o
.eq(insn
)
325 m
.next
= "INSN_READY"
326 # TODO: probably can start looking at pdecode2.rm_dec
327 # here or maybe even in INSN_READ state, if svp64_mode
328 # detected, in order to trigger - and wait for - the
331 pmode
= pdecode2
.rm_dec
.predmode
333 if pmode != SVP64PredMode.ALWAYS.value:
334 fire predicate loading FSM and wait before
337 sync += self.srcmask.eq(-1) # set to all 1s
338 sync += self.dstmask.eq(-1) # set to all 1s
339 m.next = "INSN_READY"
342 with m
.State("INSN_READY"):
343 # hand over the instruction, to be decoded
344 comb
+= fetch_insn_o_valid
.eq(1)
345 with m
.If(fetch_insn_i_ready
):
348 # whatever was done above, over-ride it if core reset is held
349 with m
.If(self
.core_rst
):
355 class TestIssuerInternal(Elaboratable
):
356 """TestIssuer - reads instructions from TestMemory and issues them
358 efficiency and speed is not the main goal here: functional correctness
359 and code clarity is. optimisations (which almost 100% interfere with
360 easy understanding) come later.
363 def __init__(self
, pspec
):
365 # test is SVP64 is to be enabled
366 self
.svp64_en
= hasattr(pspec
, "svp64") and (pspec
.svp64
== True)
368 # and if regfiles are reduced
369 self
.regreduce_en
= (hasattr(pspec
, "regreduce") and
370 (pspec
.regreduce
== True))
372 # and if overlap requested
373 self
.allow_overlap
= (hasattr(pspec
, "allow_overlap") and
374 (pspec
.allow_overlap
== True))
376 # JTAG interface. add this right at the start because if it's
377 # added it *modifies* the pspec, by adding enable/disable signals
378 # for parts of the rest of the core
379 self
.jtag_en
= hasattr(pspec
, "debug") and pspec
.debug
== 'jtag'
380 self
.dbg_domain
= "sync" # sigh "dbgsunc" too problematic
381 # self.dbg_domain = "dbgsync" # domain for DMI/JTAG clock
383 # XXX MUST keep this up-to-date with litex, and
384 # soc-cocotb-sim, and err.. all needs sorting out, argh
387 'eint', 'gpio', 'mspi0',
388 # 'mspi1', - disabled for now
389 # 'pwm', 'sd0', - disabled for now
391 self
.jtag
= JTAG(get_pinspecs(subset
=subset
),
392 domain
=self
.dbg_domain
)
393 # add signals to pspec to enable/disable icache and dcache
394 # (or data and intstruction wishbone if icache/dcache not included)
395 # https://bugs.libre-soc.org/show_bug.cgi?id=520
396 # TODO: do we actually care if these are not domain-synchronised?
397 # honestly probably not.
398 pspec
.wb_icache_en
= self
.jtag
.wb_icache_en
399 pspec
.wb_dcache_en
= self
.jtag
.wb_dcache_en
400 self
.wb_sram_en
= self
.jtag
.wb_sram_en
402 self
.wb_sram_en
= Const(1)
404 # add 4k sram blocks?
405 self
.sram4x4k
= (hasattr(pspec
, "sram4x4kblock") and
406 pspec
.sram4x4kblock
== True)
410 self
.sram4k
.append(SPBlock512W64B8W(name
="sram4k_%d" % i
,
414 # add interrupt controller?
415 self
.xics
= hasattr(pspec
, "xics") and pspec
.xics
== True
417 self
.xics_icp
= XICS_ICP()
418 self
.xics_ics
= XICS_ICS()
419 self
.int_level_i
= self
.xics_ics
.int_level_i
421 # add GPIO peripheral?
422 self
.gpio
= hasattr(pspec
, "gpio") and pspec
.gpio
== True
424 self
.simple_gpio
= SimpleGPIO()
425 self
.gpio_o
= self
.simple_gpio
.gpio_o
427 # main instruction core. suitable for prototyping / demo only
428 self
.core
= core
= NonProductionCore(pspec
)
429 self
.core_rst
= ResetSignal("coresync")
431 # instruction decoder. goes into Trap Record
432 #pdecode = create_pdecode()
433 self
.cur_state
= CoreState("cur") # current state (MSR/PC/SVSTATE)
434 self
.pdecode2
= PowerDecode2(None, state
=self
.cur_state
,
435 opkls
=IssuerDecode2ToOperand
,
436 svp64_en
=self
.svp64_en
,
437 regreduce_en
=self
.regreduce_en
)
438 pdecode
= self
.pdecode2
.dec
441 self
.svp64
= SVP64PrefixDecoder() # for decoding SVP64 prefix
443 # Test Instruction memory
444 if hasattr(core
, "icache"):
445 # XXX BLECH! use pspec to transfer the I-Cache to ConfigFetchUnit
446 # truly dreadful. needs a huge reorg.
447 pspec
.icache
= core
.icache
448 self
.imem
= ConfigFetchUnit(pspec
).fu
451 self
.dbg
= CoreDebug()
453 # instruction go/monitor
454 self
.pc_o
= Signal(64, reset_less
=True)
455 self
.pc_i
= Data(64, "pc_i") # set "ok" to indicate "please change me"
456 self
.svstate_i
= Data(64, "svstate_i") # ditto
457 self
.core_bigendian_i
= Signal() # TODO: set based on MSR.LE
458 self
.busy_o
= Signal(reset_less
=True)
459 self
.memerr_o
= Signal(reset_less
=True)
461 # STATE regfile read /write ports for PC, MSR, SVSTATE
462 staterf
= self
.core
.regs
.rf
['state']
463 self
.state_r_pc
= staterf
.r_ports
['cia'] # PC rd
464 self
.state_w_pc
= staterf
.w_ports
['d_wr1'] # PC wr
465 self
.state_r_sv
= staterf
.r_ports
['sv'] # SVSTATE rd
466 self
.state_w_sv
= staterf
.w_ports
['sv'] # SVSTATE wr
468 # DMI interface access
469 intrf
= self
.core
.regs
.rf
['int']
470 crrf
= self
.core
.regs
.rf
['cr']
471 xerrf
= self
.core
.regs
.rf
['xer']
472 self
.int_r
= intrf
.r_ports
['dmi'] # INT read
473 self
.cr_r
= crrf
.r_ports
['full_cr_dbg'] # CR read
474 self
.xer_r
= xerrf
.r_ports
['full_xer'] # XER read
478 self
.int_pred
= intrf
.r_ports
['pred'] # INT predicate read
479 self
.cr_pred
= crrf
.r_ports
['cr_pred'] # CR predicate read
481 # hack method of keeping an eye on whether branch/trap set the PC
482 self
.state_nia
= self
.core
.regs
.rf
['state'].w_ports
['nia']
483 self
.state_nia
.wen
.name
= 'state_nia_wen'
485 # pulse to synchronize the simulator at instruction end
486 self
.insn_done
= Signal()
488 # indicate any instruction still outstanding, in execution
489 self
.any_busy
= Signal()
492 # store copies of predicate masks
493 self
.srcmask
= Signal(64)
494 self
.dstmask
= Signal(64)
496 def fetch_predicate_fsm(self
, m
,
497 pred_insn_i_valid
, pred_insn_o_ready
,
498 pred_mask_o_valid
, pred_mask_i_ready
):
499 """fetch_predicate_fsm - obtains (constructs in the case of CR)
500 src/dest predicate masks
502 https://bugs.libre-soc.org/show_bug.cgi?id=617
503 the predicates can be read here, by using IntRegs r_ports['pred']
504 or CRRegs r_ports['pred']. in the case of CRs it will have to
505 be done through multiple reads, extracting one relevant at a time.
506 later, a faster way would be to use the 32-bit-wide CR port but
507 this is more complex decoding, here. equivalent code used in
508 ISACaller is "from openpower.decoder.isa.caller import get_predcr"
510 note: this ENTIRE FSM is not to be called when svp64 is disabled
514 pdecode2
= self
.pdecode2
515 rm_dec
= pdecode2
.rm_dec
# SVP64RMModeDecode
516 predmode
= rm_dec
.predmode
517 srcpred
, dstpred
= rm_dec
.srcpred
, rm_dec
.dstpred
518 cr_pred
, int_pred
= self
.cr_pred
, self
.int_pred
# read regfiles
519 # get src/dst step, so we can skip already used mask bits
520 cur_state
= self
.cur_state
521 srcstep
= cur_state
.svstate
.srcstep
522 dststep
= cur_state
.svstate
.dststep
523 cur_vl
= cur_state
.svstate
.vl
526 sregread
, sinvert
, sunary
, sall1s
= get_predint(m
, srcpred
, 's')
527 dregread
, dinvert
, dunary
, dall1s
= get_predint(m
, dstpred
, 'd')
528 sidx
, scrinvert
= get_predcr(m
, srcpred
, 's')
529 didx
, dcrinvert
= get_predcr(m
, dstpred
, 'd')
531 # store fetched masks, for either intpred or crpred
532 # when src/dst step is not zero, the skipped mask bits need to be
533 # shifted-out, before actually storing them in src/dest mask
534 new_srcmask
= Signal(64, reset_less
=True)
535 new_dstmask
= Signal(64, reset_less
=True)
537 with m
.FSM(name
="fetch_predicate"):
539 with m
.State("FETCH_PRED_IDLE"):
540 comb
+= pred_insn_o_ready
.eq(1)
541 with m
.If(pred_insn_i_valid
):
542 with m
.If(predmode
== SVP64PredMode
.INT
):
543 # skip fetching destination mask register, when zero
545 sync
+= new_dstmask
.eq(-1)
546 # directly go to fetch source mask register
547 # guaranteed not to be zero (otherwise predmode
548 # would be SVP64PredMode.ALWAYS, not INT)
549 comb
+= int_pred
.addr
.eq(sregread
)
550 comb
+= int_pred
.ren
.eq(1)
551 m
.next
= "INT_SRC_READ"
552 # fetch destination predicate register
554 comb
+= int_pred
.addr
.eq(dregread
)
555 comb
+= int_pred
.ren
.eq(1)
556 m
.next
= "INT_DST_READ"
557 with m
.Elif(predmode
== SVP64PredMode
.CR
):
558 # go fetch masks from the CR register file
559 sync
+= new_srcmask
.eq(0)
560 sync
+= new_dstmask
.eq(0)
563 sync
+= self
.srcmask
.eq(-1)
564 sync
+= self
.dstmask
.eq(-1)
565 m
.next
= "FETCH_PRED_DONE"
567 with m
.State("INT_DST_READ"):
568 # store destination mask
569 inv
= Repl(dinvert
, 64)
571 # set selected mask bit for 1<<r3 mode
572 dst_shift
= Signal(range(64))
573 comb
+= dst_shift
.eq(self
.int_pred
.o_data
& 0b111111)
574 sync
+= new_dstmask
.eq(1 << dst_shift
)
576 # invert mask if requested
577 sync
+= new_dstmask
.eq(self
.int_pred
.o_data ^ inv
)
578 # skip fetching source mask register, when zero
580 sync
+= new_srcmask
.eq(-1)
581 m
.next
= "FETCH_PRED_SHIFT_MASK"
582 # fetch source predicate register
584 comb
+= int_pred
.addr
.eq(sregread
)
585 comb
+= int_pred
.ren
.eq(1)
586 m
.next
= "INT_SRC_READ"
588 with m
.State("INT_SRC_READ"):
590 inv
= Repl(sinvert
, 64)
592 # set selected mask bit for 1<<r3 mode
593 src_shift
= Signal(range(64))
594 comb
+= src_shift
.eq(self
.int_pred
.o_data
& 0b111111)
595 sync
+= new_srcmask
.eq(1 << src_shift
)
597 # invert mask if requested
598 sync
+= new_srcmask
.eq(self
.int_pred
.o_data ^ inv
)
599 m
.next
= "FETCH_PRED_SHIFT_MASK"
601 # fetch masks from the CR register file
602 # implements the following loop:
603 # idx, inv = get_predcr(mask)
605 # for cr_idx in range(vl):
606 # cr = crl[cr_idx + SVP64CROffs.CRPred] # takes one cycle
608 # mask |= 1 << cr_idx
610 with m
.State("CR_READ"):
611 # CR index to be read, which will be ready by the next cycle
612 cr_idx
= Signal
.like(cur_vl
, reset_less
=True)
613 # submit the read operation to the regfile
614 with m
.If(cr_idx
!= cur_vl
):
615 # the CR read port is unary ...
617 # ... in MSB0 convention ...
618 # ren = 1 << (7 - cr_idx)
619 # ... and with an offset:
620 # ren = 1 << (7 - off - cr_idx)
621 idx
= SVP64CROffs
.CRPred
+ cr_idx
622 comb
+= cr_pred
.ren
.eq(1 << (7 - idx
))
623 # signal data valid in the next cycle
624 cr_read
= Signal(reset_less
=True)
625 sync
+= cr_read
.eq(1)
626 # load the next index
627 sync
+= cr_idx
.eq(cr_idx
+ 1)
630 sync
+= cr_read
.eq(0)
632 m
.next
= "FETCH_PRED_SHIFT_MASK"
634 # compensate for the one cycle delay on the regfile
635 cur_cr_idx
= Signal
.like(cur_vl
)
636 comb
+= cur_cr_idx
.eq(cr_idx
- 1)
637 # read the CR field, select the appropriate bit
641 comb
+= cr_field
.eq(cr_pred
.o_data
)
642 comb
+= scr_bit
.eq(cr_field
.bit_select(sidx
, 1)
644 comb
+= dcr_bit
.eq(cr_field
.bit_select(didx
, 1)
646 # set the corresponding mask bit
647 bit_to_set
= Signal
.like(self
.srcmask
)
648 comb
+= bit_to_set
.eq(1 << cur_cr_idx
)
650 sync
+= new_srcmask
.eq(new_srcmask | bit_to_set
)
652 sync
+= new_dstmask
.eq(new_dstmask | bit_to_set
)
654 with m
.State("FETCH_PRED_SHIFT_MASK"):
655 # shift-out skipped mask bits
656 sync
+= self
.srcmask
.eq(new_srcmask
>> srcstep
)
657 sync
+= self
.dstmask
.eq(new_dstmask
>> dststep
)
658 m
.next
= "FETCH_PRED_DONE"
660 with m
.State("FETCH_PRED_DONE"):
661 comb
+= pred_mask_o_valid
.eq(1)
662 with m
.If(pred_mask_i_ready
):
663 m
.next
= "FETCH_PRED_IDLE"
665 def issue_fsm(self
, m
, core
, pc_changed
, sv_changed
, nia
,
666 dbg
, core_rst
, is_svp64_mode
,
667 fetch_pc_o_ready
, fetch_pc_i_valid
,
668 fetch_insn_o_valid
, fetch_insn_i_ready
,
669 pred_insn_i_valid
, pred_insn_o_ready
,
670 pred_mask_o_valid
, pred_mask_i_ready
,
671 exec_insn_i_valid
, exec_insn_o_ready
,
672 exec_pc_o_valid
, exec_pc_i_ready
):
675 decode / issue FSM. this interacts with the "fetch" FSM
676 through fetch_insn_ready/valid (incoming) and fetch_pc_ready/valid
677 (outgoing). also interacts with the "execute" FSM
678 through exec_insn_ready/valid (outgoing) and exec_pc_ready/valid
680 SVP64 RM prefixes have already been set up by the
681 "fetch" phase, so execute is fairly straightforward.
686 pdecode2
= self
.pdecode2
687 cur_state
= self
.cur_state
690 dec_opcode_i
= pdecode2
.dec
.raw_opcode_in
# raw opcode
692 # for updating svstate (things like srcstep etc.)
693 update_svstate
= Signal() # set this (below) if updating
694 new_svstate
= SVSTATERec("new_svstate")
695 comb
+= new_svstate
.eq(cur_state
.svstate
)
697 # precalculate srcstep+1 and dststep+1
698 cur_srcstep
= cur_state
.svstate
.srcstep
699 cur_dststep
= cur_state
.svstate
.dststep
700 next_srcstep
= Signal
.like(cur_srcstep
)
701 next_dststep
= Signal
.like(cur_dststep
)
702 comb
+= next_srcstep
.eq(cur_state
.svstate
.srcstep
+1)
703 comb
+= next_dststep
.eq(cur_state
.svstate
.dststep
+1)
705 # note if an exception happened. in a pipelined or OoO design
706 # this needs to be accompanied by "shadowing" (or stalling)
707 exc_happened
= self
.core
.o
.exc_happened
708 # also note instruction fetch failed
709 if hasattr(core
, "icache"):
710 fetch_failed
= core
.icache
.i_out
.fetch_failed
712 fetch_failed
= Const(0, 1)
713 # set to fault in decoder
714 # update (highest priority) instruction fault
715 rising_fetch_failed
= rising_edge(m
, fetch_failed
)
716 with m
.If(rising_fetch_failed
):
717 sync
+= pdecode2
.instr_fault
.eq(1)
719 with m
.FSM(name
="issue_fsm"):
721 # sync with the "fetch" phase which is reading the instruction
722 # at this point, there is no instruction running, that
723 # could inadvertently update the PC.
724 with m
.State("ISSUE_START"):
725 # reset instruction fault
726 sync
+= pdecode2
.instr_fault
.eq(0)
727 # wait on "core stop" release, before next fetch
728 # need to do this here, in case we are in a VL==0 loop
729 with m
.If(~dbg
.core_stop_o
& ~core_rst
):
730 comb
+= fetch_pc_i_valid
.eq(1) # tell fetch to start
731 with m
.If(fetch_pc_o_ready
): # fetch acknowledged us
734 # tell core it's stopped, and acknowledge debug handshake
735 comb
+= dbg
.core_stopped_i
.eq(1)
736 # while stopped, allow updating the PC and SVSTATE
737 with m
.If(self
.pc_i
.ok
):
738 comb
+= self
.state_w_pc
.wen
.eq(1 << StateRegs
.PC
)
739 comb
+= self
.state_w_pc
.i_data
.eq(self
.pc_i
.data
)
740 sync
+= pc_changed
.eq(1)
741 with m
.If(self
.svstate_i
.ok
):
742 comb
+= new_svstate
.eq(self
.svstate_i
.data
)
743 comb
+= update_svstate
.eq(1)
744 sync
+= sv_changed
.eq(1)
746 # wait for an instruction to arrive from Fetch
747 with m
.State("INSN_WAIT"):
748 if self
.allow_overlap
:
749 stopping
= dbg
.stopping_o
753 # stopping: jump back to idle
754 m
.next
= "ISSUE_START"
756 comb
+= fetch_insn_i_ready
.eq(1)
757 with m
.If(fetch_insn_o_valid
):
758 # loop into ISSUE_START if it's a SVP64 instruction
759 # and VL == 0. this because VL==0 is a for-loop
760 # from 0 to 0 i.e. always, always a NOP.
761 cur_vl
= cur_state
.svstate
.vl
762 with m
.If(is_svp64_mode
& (cur_vl
== 0)):
763 # update the PC before fetching the next instruction
764 # since we are in a VL==0 loop, no instruction was
765 # executed that we could be overwriting
766 comb
+= self
.state_w_pc
.wen
.eq(1 << StateRegs
.PC
)
767 comb
+= self
.state_w_pc
.i_data
.eq(nia
)
768 comb
+= self
.insn_done
.eq(1)
769 m
.next
= "ISSUE_START"
772 m
.next
= "PRED_START" # fetching predicate
774 m
.next
= "DECODE_SV" # skip predication
776 with m
.State("PRED_START"):
777 comb
+= pred_insn_i_valid
.eq(1) # tell fetch_pred to start
778 with m
.If(pred_insn_o_ready
): # fetch_pred acknowledged us
781 with m
.State("MASK_WAIT"):
782 comb
+= pred_mask_i_ready
.eq(1) # ready to receive the masks
783 with m
.If(pred_mask_o_valid
): # predication masks are ready
786 # skip zeros in predicate
787 with m
.State("PRED_SKIP"):
788 with m
.If(~is_svp64_mode
):
789 m
.next
= "DECODE_SV" # nothing to do
792 pred_src_zero
= pdecode2
.rm_dec
.pred_sz
793 pred_dst_zero
= pdecode2
.rm_dec
.pred_dz
795 # new srcstep, after skipping zeros
796 skip_srcstep
= Signal
.like(cur_srcstep
)
797 # value to be added to the current srcstep
798 src_delta
= Signal
.like(cur_srcstep
)
799 # add leading zeros to srcstep, if not in zero mode
800 with m
.If(~pred_src_zero
):
801 # priority encoder (count leading zeros)
802 # append guard bit, in case the mask is all zeros
803 pri_enc_src
= PriorityEncoder(65)
804 m
.submodules
.pri_enc_src
= pri_enc_src
805 comb
+= pri_enc_src
.i
.eq(Cat(self
.srcmask
,
807 comb
+= src_delta
.eq(pri_enc_src
.o
)
808 # apply delta to srcstep
809 comb
+= skip_srcstep
.eq(cur_srcstep
+ src_delta
)
810 # shift-out all leading zeros from the mask
811 # plus the leading "one" bit
812 # TODO count leading zeros and shift-out the zero
813 # bits, in the same step, in hardware
814 sync
+= self
.srcmask
.eq(self
.srcmask
>> (src_delta
+1))
816 # same as above, but for dststep
817 skip_dststep
= Signal
.like(cur_dststep
)
818 dst_delta
= Signal
.like(cur_dststep
)
819 with m
.If(~pred_dst_zero
):
820 pri_enc_dst
= PriorityEncoder(65)
821 m
.submodules
.pri_enc_dst
= pri_enc_dst
822 comb
+= pri_enc_dst
.i
.eq(Cat(self
.dstmask
,
824 comb
+= dst_delta
.eq(pri_enc_dst
.o
)
825 comb
+= skip_dststep
.eq(cur_dststep
+ dst_delta
)
826 sync
+= self
.dstmask
.eq(self
.dstmask
>> (dst_delta
+1))
828 # TODO: initialize mask[VL]=1 to avoid passing past VL
829 with m
.If((skip_srcstep
>= cur_vl
) |
830 (skip_dststep
>= cur_vl
)):
831 # end of VL loop. Update PC and reset src/dst step
832 comb
+= self
.state_w_pc
.wen
.eq(1 << StateRegs
.PC
)
833 comb
+= self
.state_w_pc
.i_data
.eq(nia
)
834 comb
+= new_svstate
.srcstep
.eq(0)
835 comb
+= new_svstate
.dststep
.eq(0)
836 comb
+= update_svstate
.eq(1)
837 # synchronize with the simulator
838 comb
+= self
.insn_done
.eq(1)
840 m
.next
= "ISSUE_START"
842 # update new src/dst step
843 comb
+= new_svstate
.srcstep
.eq(skip_srcstep
)
844 comb
+= new_svstate
.dststep
.eq(skip_dststep
)
845 comb
+= update_svstate
.eq(1)
849 # pass predicate mask bits through to satellite decoders
850 # TODO: for SIMD this will be *multiple* bits
851 sync
+= core
.i
.sv_pred_sm
.eq(self
.srcmask
[0])
852 sync
+= core
.i
.sv_pred_dm
.eq(self
.dstmask
[0])
854 # after src/dst step have been updated, we are ready
855 # to decode the instruction
856 with m
.State("DECODE_SV"):
857 # decode the instruction
858 sync
+= core
.i
.e
.eq(pdecode2
.e
)
859 sync
+= core
.i
.state
.eq(cur_state
)
860 sync
+= core
.i
.raw_insn_i
.eq(dec_opcode_i
)
861 sync
+= core
.i
.bigendian_i
.eq(self
.core_bigendian_i
)
863 sync
+= core
.i
.sv_rm
.eq(pdecode2
.sv_rm
)
864 # set RA_OR_ZERO detection in satellite decoders
865 sync
+= core
.i
.sv_a_nz
.eq(pdecode2
.sv_a_nz
)
866 # and svp64 detection
867 sync
+= core
.i
.is_svp64_mode
.eq(is_svp64_mode
)
868 # and svp64 bit-rev'd ldst mode
869 ldst_dec
= pdecode2
.use_svp64_ldst_dec
870 sync
+= core
.i
.use_svp64_ldst_dec
.eq(ldst_dec
)
871 # after decoding, reset any previous exception condition,
872 # allowing it to be set again during the next execution
873 sync
+= pdecode2
.ldst_exc
.eq(0)
875 m
.next
= "INSN_EXECUTE" # move to "execute"
877 # handshake with execution FSM, move to "wait" once acknowledged
878 with m
.State("INSN_EXECUTE"):
879 comb
+= exec_insn_i_valid
.eq(1) # trigger execute
880 with m
.If(exec_insn_o_ready
): # execute acknowledged us
881 m
.next
= "EXECUTE_WAIT"
883 with m
.State("EXECUTE_WAIT"):
884 # wait on "core stop" release, at instruction end
885 # need to do this here, in case we are in a VL>1 loop
886 with m
.If(~dbg
.core_stop_o
& ~core_rst
):
887 comb
+= exec_pc_i_ready
.eq(1)
888 # see https://bugs.libre-soc.org/show_bug.cgi?id=636
889 # the exception info needs to be blatted into
890 # pdecode.ldst_exc, and the instruction "re-run".
891 # when ldst_exc.happened is set, the PowerDecoder2
892 # reacts very differently: it re-writes the instruction
893 # with a "trap" (calls PowerDecoder2.trap()) which
894 # will *overwrite* whatever was requested and jump the
895 # PC to the exception address, as well as alter MSR.
896 # nothing else needs to be done other than to note
897 # the change of PC and MSR (and, later, SVSTATE)
898 with m
.If(exc_happened
):
899 mmu
= core
.fus
.get_exc("mmu0")
900 ldst
= core
.fus
.get_exc("ldst0")
901 with m
.If(fetch_failed
):
902 # instruction fetch: exception is from MMU
903 # reset instr_fault (highest priority)
904 sync
+= pdecode2
.ldst_exc
.eq(mmu
)
905 sync
+= pdecode2
.instr_fault
.eq(0)
907 # otherwise assume it was a LDST exception
908 sync
+= pdecode2
.ldst_exc
.eq(ldst
)
910 with m
.If(exec_pc_o_valid
):
912 # was this the last loop iteration?
914 cur_vl
= cur_state
.svstate
.vl
915 comb
+= is_last
.eq(next_srcstep
== cur_vl
)
917 # return directly to Decode if Execute generated an
919 with m
.If(pdecode2
.ldst_exc
.happened
):
922 # if either PC or SVSTATE were changed by the previous
923 # instruction, go directly back to Fetch, without
924 # updating either PC or SVSTATE
925 with m
.Elif(pc_changed | sv_changed
):
926 m
.next
= "ISSUE_START"
928 # also return to Fetch, when no output was a vector
929 # (regardless of SRCSTEP and VL), or when the last
930 # instruction was really the last one of the VL loop
931 with m
.Elif((~pdecode2
.loop_continue
) | is_last
):
932 # before going back to fetch, update the PC state
933 # register with the NIA.
934 # ok here we are not reading the branch unit.
935 # TODO: this just blithely overwrites whatever
936 # pipeline updated the PC
937 comb
+= self
.state_w_pc
.wen
.eq(1 << StateRegs
.PC
)
938 comb
+= self
.state_w_pc
.i_data
.eq(nia
)
939 # reset SRCSTEP before returning to Fetch
941 with m
.If(pdecode2
.loop_continue
):
942 comb
+= new_svstate
.srcstep
.eq(0)
943 comb
+= new_svstate
.dststep
.eq(0)
944 comb
+= update_svstate
.eq(1)
946 comb
+= new_svstate
.srcstep
.eq(0)
947 comb
+= new_svstate
.dststep
.eq(0)
948 comb
+= update_svstate
.eq(1)
949 m
.next
= "ISSUE_START"
951 # returning to Execute? then, first update SRCSTEP
953 comb
+= new_svstate
.srcstep
.eq(next_srcstep
)
954 comb
+= new_svstate
.dststep
.eq(next_dststep
)
955 comb
+= update_svstate
.eq(1)
956 # return to mask skip loop
960 comb
+= dbg
.core_stopped_i
.eq(1)
961 # while stopped, allow updating the PC and SVSTATE
962 with m
.If(self
.pc_i
.ok
):
963 comb
+= self
.state_w_pc
.wen
.eq(1 << StateRegs
.PC
)
964 comb
+= self
.state_w_pc
.i_data
.eq(self
.pc_i
.data
)
965 sync
+= pc_changed
.eq(1)
966 with m
.If(self
.svstate_i
.ok
):
967 comb
+= new_svstate
.eq(self
.svstate_i
.data
)
968 comb
+= update_svstate
.eq(1)
969 sync
+= sv_changed
.eq(1)
971 # check if svstate needs updating: if so, write it to State Regfile
972 with m
.If(update_svstate
):
973 comb
+= self
.state_w_sv
.wen
.eq(1 << StateRegs
.SVSTATE
)
974 comb
+= self
.state_w_sv
.i_data
.eq(new_svstate
)
975 sync
+= cur_state
.svstate
.eq(new_svstate
) # for next clock
977 def execute_fsm(self
, m
, core
, pc_changed
, sv_changed
,
978 exec_insn_i_valid
, exec_insn_o_ready
,
979 exec_pc_o_valid
, exec_pc_i_ready
):
982 execute FSM. this interacts with the "issue" FSM
983 through exec_insn_ready/valid (incoming) and exec_pc_ready/valid
984 (outgoing). SVP64 RM prefixes have already been set up by the
985 "issue" phase, so execute is fairly straightforward.
990 pdecode2
= self
.pdecode2
993 core_busy_o
= core
.n
.o_data
.busy_o
# core is busy
994 core_ivalid_i
= core
.p
.i_valid
# instruction is valid
996 with m
.FSM(name
="exec_fsm"):
998 # waiting for instruction bus (stays there until not busy)
999 with m
.State("INSN_START"):
1000 comb
+= exec_insn_o_ready
.eq(1)
1001 with m
.If(exec_insn_i_valid
):
1002 comb
+= core_ivalid_i
.eq(1) # instruction is valid/issued
1003 sync
+= sv_changed
.eq(0)
1004 sync
+= pc_changed
.eq(0)
1005 with m
.If(core
.p
.o_ready
): # only move if accepted
1006 m
.next
= "INSN_ACTIVE" # move to "wait completion"
1008 # instruction started: must wait till it finishes
1009 with m
.State("INSN_ACTIVE"):
1010 # note changes to PC and SVSTATE
1011 with m
.If(self
.state_nia
.wen
& (1 << StateRegs
.SVSTATE
)):
1012 sync
+= sv_changed
.eq(1)
1013 with m
.If(self
.state_nia
.wen
& (1 << StateRegs
.PC
)):
1014 sync
+= pc_changed
.eq(1)
1015 with m
.If(~core_busy_o
): # instruction done!
1016 comb
+= exec_pc_o_valid
.eq(1)
1017 with m
.If(exec_pc_i_ready
):
1018 # when finished, indicate "done".
1019 # however, if there was an exception, the instruction
1020 # is *not* yet done. this is an implementation
1021 # detail: we choose to implement exceptions by
1022 # taking the exception information from the LDST
1023 # unit, putting that *back* into the PowerDecoder2,
1024 # and *re-running the entire instruction*.
1025 # if we erroneously indicate "done" here, it is as if
1026 # there were *TWO* instructions:
1027 # 1) the failed LDST 2) a TRAP.
1028 with m
.If(~pdecode2
.ldst_exc
.happened
):
1029 comb
+= self
.insn_done
.eq(1)
1030 m
.next
= "INSN_START" # back to fetch
1032 def setup_peripherals(self
, m
):
1033 comb
, sync
= m
.d
.comb
, m
.d
.sync
1035 # okaaaay so the debug module must be in coresync clock domain
1036 # but NOT its reset signal. to cope with this, set every single
1037 # submodule explicitly in coresync domain, debug and JTAG
1038 # in their own one but using *external* reset.
1039 csd
= DomainRenamer("coresync")
1040 dbd
= DomainRenamer(self
.dbg_domain
)
1042 m
.submodules
.core
= core
= csd(self
.core
)
1043 # this _so_ needs sorting out. ICache is added down inside
1044 # LoadStore1 and is already a submodule of LoadStore1
1045 if not isinstance(self
.imem
, ICache
):
1046 m
.submodules
.imem
= imem
= csd(self
.imem
)
1047 m
.submodules
.dbg
= dbg
= dbd(self
.dbg
)
1049 m
.submodules
.jtag
= jtag
= dbd(self
.jtag
)
1050 # TODO: UART2GDB mux, here, from external pin
1051 # see https://bugs.libre-soc.org/show_bug.cgi?id=499
1052 sync
+= dbg
.dmi
.connect_to(jtag
.dmi
)
1054 cur_state
= self
.cur_state
1056 # 4x 4k SRAM blocks. these simply "exist", they get routed in litex
1058 for i
, sram
in enumerate(self
.sram4k
):
1059 m
.submodules
["sram4k_%d" % i
] = csd(sram
)
1060 comb
+= sram
.enable
.eq(self
.wb_sram_en
)
1062 # XICS interrupt handler
1064 m
.submodules
.xics_icp
= icp
= csd(self
.xics_icp
)
1065 m
.submodules
.xics_ics
= ics
= csd(self
.xics_ics
)
1066 comb
+= icp
.ics_i
.eq(ics
.icp_o
) # connect ICS to ICP
1067 sync
+= cur_state
.eint
.eq(icp
.core_irq_o
) # connect ICP to core
1069 # GPIO test peripheral
1071 m
.submodules
.simple_gpio
= simple_gpio
= csd(self
.simple_gpio
)
1073 # connect one GPIO output to ICS bit 15 (like in microwatt soc.vhdl)
1074 # XXX causes litex ECP5 test to get wrong idea about input and output
1075 # (but works with verilator sim *sigh*)
1076 # if self.gpio and self.xics:
1077 # comb += self.int_level_i[15].eq(simple_gpio.gpio_o[0])
1079 # instruction decoder
1080 pdecode
= create_pdecode()
1081 m
.submodules
.dec2
= pdecode2
= csd(self
.pdecode2
)
1083 m
.submodules
.svp64
= svp64
= csd(self
.svp64
)
1086 dmi
, d_reg
, d_cr
, d_xer
, = dbg
.dmi
, dbg
.d_gpr
, dbg
.d_cr
, dbg
.d_xer
1087 intrf
= self
.core
.regs
.rf
['int']
1089 # clock delay power-on reset
1090 cd_por
= ClockDomain(reset_less
=True)
1091 cd_sync
= ClockDomain()
1092 core_sync
= ClockDomain("coresync")
1093 m
.domains
+= cd_por
, cd_sync
, core_sync
1094 if self
.dbg_domain
!= "sync":
1095 dbg_sync
= ClockDomain(self
.dbg_domain
)
1096 m
.domains
+= dbg_sync
1098 ti_rst
= Signal(reset_less
=True)
1099 delay
= Signal(range(4), reset
=3)
1100 with m
.If(delay
!= 0):
1101 m
.d
.por
+= delay
.eq(delay
- 1)
1102 comb
+= cd_por
.clk
.eq(ClockSignal())
1104 # power-on reset delay
1105 core_rst
= ResetSignal("coresync")
1106 comb
+= ti_rst
.eq(delay
!= 0 | dbg
.core_rst_o |
ResetSignal())
1107 comb
+= core_rst
.eq(ti_rst
)
1109 # debug clock is same as coresync, but reset is *main external*
1110 if self
.dbg_domain
!= "sync":
1111 dbg_rst
= ResetSignal(self
.dbg_domain
)
1112 comb
+= dbg_rst
.eq(ResetSignal())
1114 # busy/halted signals from core
1115 core_busy_o
= ~core
.p
.o_ready | core
.n
.o_data
.busy_o
# core is busy
1116 comb
+= self
.busy_o
.eq(core_busy_o
)
1117 comb
+= pdecode2
.dec
.bigendian
.eq(self
.core_bigendian_i
)
1119 # temporary hack: says "go" immediately for both address gen and ST
1121 ldst
= core
.fus
.fus
['ldst0']
1122 st_go_edge
= rising_edge(m
, ldst
.st
.rel_o
)
1123 # link addr-go direct to rel
1124 m
.d
.comb
+= ldst
.ad
.go_i
.eq(ldst
.ad
.rel_o
)
1125 m
.d
.comb
+= ldst
.st
.go_i
.eq(st_go_edge
) # link store-go to rising rel
1127 def elaborate(self
, platform
):
1130 comb
, sync
= m
.d
.comb
, m
.d
.sync
1131 cur_state
= self
.cur_state
1132 pdecode2
= self
.pdecode2
1136 # set up peripherals and core
1137 core_rst
= self
.core_rst
1138 self
.setup_peripherals(m
)
1140 # reset current state if core reset requested
1141 with m
.If(core_rst
):
1142 m
.d
.sync
+= self
.cur_state
.eq(0)
1144 # PC and instruction from I-Memory
1145 comb
+= self
.pc_o
.eq(cur_state
.pc
)
1146 pc_changed
= Signal() # note write to PC
1147 sv_changed
= Signal() # note write to SVSTATE
1149 # indicate to outside world if any FU is still executing
1150 comb
+= self
.any_busy
.eq(core
.n
.o_data
.any_busy_o
) # any FU executing
1152 # read state either from incoming override or from regfile
1153 # TODO: really should be doing MSR in the same way
1154 pc
= state_get(m
, core_rst
, self
.pc_i
,
1156 self
.state_r_pc
, StateRegs
.PC
)
1157 svstate
= state_get(m
, core_rst
, self
.svstate_i
,
1158 "svstate", # read SVSTATE
1159 self
.state_r_sv
, StateRegs
.SVSTATE
)
1161 # don't write pc every cycle
1162 comb
+= self
.state_w_pc
.wen
.eq(0)
1163 comb
+= self
.state_w_pc
.i_data
.eq(0)
1165 # address of the next instruction, in the absence of a branch
1166 # depends on the instruction size
1169 # connect up debug signals
1170 # TODO comb += core.icache_rst_i.eq(dbg.icache_rst_o)
1171 comb
+= dbg
.terminate_i
.eq(core
.o
.core_terminate_o
)
1172 comb
+= dbg
.state
.pc
.eq(pc
)
1173 comb
+= dbg
.state
.svstate
.eq(svstate
)
1174 comb
+= dbg
.state
.msr
.eq(cur_state
.msr
)
1176 # pass the prefix mode from Fetch to Issue, so the latter can loop
1178 is_svp64_mode
= Signal()
1180 # there are *THREE^WFOUR-if-SVP64-enabled* FSMs, fetch (32/64-bit)
1181 # issue, decode/execute, now joined by "Predicate fetch/calculate".
1182 # these are the handshake signals between each
1184 # fetch FSM can run as soon as the PC is valid
1185 fetch_pc_i_valid
= Signal() # Execute tells Fetch "start next read"
1186 fetch_pc_o_ready
= Signal() # Fetch Tells SVSTATE "proceed"
1188 # fetch FSM hands over the instruction to be decoded / issued
1189 fetch_insn_o_valid
= Signal()
1190 fetch_insn_i_ready
= Signal()
1192 # predicate fetch FSM decodes and fetches the predicate
1193 pred_insn_i_valid
= Signal()
1194 pred_insn_o_ready
= Signal()
1196 # predicate fetch FSM delivers the masks
1197 pred_mask_o_valid
= Signal()
1198 pred_mask_i_ready
= Signal()
1200 # issue FSM delivers the instruction to the be executed
1201 exec_insn_i_valid
= Signal()
1202 exec_insn_o_ready
= Signal()
1204 # execute FSM, hands over the PC/SVSTATE back to the issue FSM
1205 exec_pc_o_valid
= Signal()
1206 exec_pc_i_ready
= Signal()
1208 # the FSMs here are perhaps unusual in that they detect conditions
1209 # then "hold" information, combinatorially, for the core
1210 # (as opposed to using sync - which would be on a clock's delay)
1211 # this includes the actual opcode, valid flags and so on.
1213 # Fetch, then predicate fetch, then Issue, then Execute.
1214 # Issue is where the VL for-loop # lives. the ready/valid
1215 # signalling is used to communicate between the four.
1218 fetch
= FetchFSM(self
.allow_overlap
, self
.svp64_en
,
1219 self
.imem
, core_rst
, pdecode2
, cur_state
,
1220 dbg
, core
, svstate
, nia
, is_svp64_mode
)
1221 m
.submodules
.fetch
= fetch
1222 # connect up in/out data to existing Signals
1223 comb
+= fetch
.p
.i_data
.pc
.eq(pc
)
1224 # and the ready/valid signalling
1225 comb
+= fetch_pc_o_ready
.eq(fetch
.p
.o_ready
)
1226 comb
+= fetch
.p
.i_valid
.eq(fetch_pc_i_valid
)
1227 comb
+= fetch_insn_o_valid
.eq(fetch
.n
.o_valid
)
1228 comb
+= fetch
.n
.i_ready
.eq(fetch_insn_i_ready
)
1230 self
.issue_fsm(m
, core
, pc_changed
, sv_changed
, nia
,
1231 dbg
, core_rst
, is_svp64_mode
,
1232 fetch_pc_o_ready
, fetch_pc_i_valid
,
1233 fetch_insn_o_valid
, fetch_insn_i_ready
,
1234 pred_insn_i_valid
, pred_insn_o_ready
,
1235 pred_mask_o_valid
, pred_mask_i_ready
,
1236 exec_insn_i_valid
, exec_insn_o_ready
,
1237 exec_pc_o_valid
, exec_pc_i_ready
)
1240 self
.fetch_predicate_fsm(m
,
1241 pred_insn_i_valid
, pred_insn_o_ready
,
1242 pred_mask_o_valid
, pred_mask_i_ready
)
1244 self
.execute_fsm(m
, core
, pc_changed
, sv_changed
,
1245 exec_insn_i_valid
, exec_insn_o_ready
,
1246 exec_pc_o_valid
, exec_pc_i_ready
)
1248 # this bit doesn't have to be in the FSM: connect up to read
1249 # regfiles on demand from DMI
1252 # DEC and TB inc/dec FSM. copy of DEC is put into CoreState,
1253 # (which uses that in PowerDecoder2 to raise 0x900 exception)
1254 self
.tb_dec_fsm(m
, cur_state
.dec
)
1258 def do_dmi(self
, m
, dbg
):
1259 """deals with DMI debug requests
1261 currently only provides read requests for the INT regfile, CR and XER
1262 it will later also deal with *writing* to these regfiles.
1266 dmi
, d_reg
, d_cr
, d_xer
, = dbg
.dmi
, dbg
.d_gpr
, dbg
.d_cr
, dbg
.d_xer
1267 intrf
= self
.core
.regs
.rf
['int']
1269 with m
.If(d_reg
.req
): # request for regfile access being made
1270 # TODO: error-check this
1271 # XXX should this be combinatorial? sync better?
1273 comb
+= self
.int_r
.ren
.eq(1 << d_reg
.addr
)
1275 comb
+= self
.int_r
.addr
.eq(d_reg
.addr
)
1276 comb
+= self
.int_r
.ren
.eq(1)
1277 d_reg_delay
= Signal()
1278 sync
+= d_reg_delay
.eq(d_reg
.req
)
1279 with m
.If(d_reg_delay
):
1280 # data arrives one clock later
1281 comb
+= d_reg
.data
.eq(self
.int_r
.o_data
)
1282 comb
+= d_reg
.ack
.eq(1)
1284 # sigh same thing for CR debug
1285 with m
.If(d_cr
.req
): # request for regfile access being made
1286 comb
+= self
.cr_r
.ren
.eq(0b11111111) # enable all
1287 d_cr_delay
= Signal()
1288 sync
+= d_cr_delay
.eq(d_cr
.req
)
1289 with m
.If(d_cr_delay
):
1290 # data arrives one clock later
1291 comb
+= d_cr
.data
.eq(self
.cr_r
.o_data
)
1292 comb
+= d_cr
.ack
.eq(1)
1295 with m
.If(d_xer
.req
): # request for regfile access being made
1296 comb
+= self
.xer_r
.ren
.eq(0b111111) # enable all
1297 d_xer_delay
= Signal()
1298 sync
+= d_xer_delay
.eq(d_xer
.req
)
1299 with m
.If(d_xer_delay
):
1300 # data arrives one clock later
1301 comb
+= d_xer
.data
.eq(self
.xer_r
.o_data
)
1302 comb
+= d_xer
.ack
.eq(1)
1304 def tb_dec_fsm(self
, m
, spr_dec
):
1307 this is a FSM for updating either dec or tb. it runs alternately
1308 DEC, TB, DEC, TB. note that SPR pipeline could have written a new
1309 value to DEC, however the regfile has "passthrough" on it so this
1312 see v3.0B p1097-1099 for Timeer Resource and p1065 and p1076
1315 comb
, sync
= m
.d
.comb
, m
.d
.sync
1316 fast_rf
= self
.core
.regs
.rf
['fast']
1317 fast_r_dectb
= fast_rf
.r_ports
['issue'] # DEC/TB
1318 fast_w_dectb
= fast_rf
.w_ports
['issue'] # DEC/TB
1320 with m
.FSM() as fsm
:
1322 # initiates read of current DEC
1323 with m
.State("DEC_READ"):
1324 comb
+= fast_r_dectb
.addr
.eq(FastRegs
.DEC
)
1325 comb
+= fast_r_dectb
.ren
.eq(1)
1326 m
.next
= "DEC_WRITE"
1328 # waits for DEC read to arrive (1 cycle), updates with new value
1329 with m
.State("DEC_WRITE"):
1330 new_dec
= Signal(64)
1331 # TODO: MSR.LPCR 32-bit decrement mode
1332 comb
+= new_dec
.eq(fast_r_dectb
.o_data
- 1)
1333 comb
+= fast_w_dectb
.addr
.eq(FastRegs
.DEC
)
1334 comb
+= fast_w_dectb
.wen
.eq(1)
1335 comb
+= fast_w_dectb
.i_data
.eq(new_dec
)
1336 sync
+= spr_dec
.eq(new_dec
) # copy into cur_state for decoder
1339 # initiates read of current TB
1340 with m
.State("TB_READ"):
1341 comb
+= fast_r_dectb
.addr
.eq(FastRegs
.TB
)
1342 comb
+= fast_r_dectb
.ren
.eq(1)
1345 # waits for read TB to arrive, initiates write of current TB
1346 with m
.State("TB_WRITE"):
1348 comb
+= new_tb
.eq(fast_r_dectb
.o_data
+ 1)
1349 comb
+= fast_w_dectb
.addr
.eq(FastRegs
.TB
)
1350 comb
+= fast_w_dectb
.wen
.eq(1)
1351 comb
+= fast_w_dectb
.i_data
.eq(new_tb
)
1357 yield from self
.pc_i
.ports()
1360 yield from self
.core
.ports()
1361 yield from self
.imem
.ports()
1362 yield self
.core_bigendian_i
1368 def external_ports(self
):
1369 ports
= self
.pc_i
.ports()
1370 ports
+= [self
.pc_o
, self
.memerr_o
, self
.core_bigendian_i
, self
.busy_o
,
1374 ports
+= list(self
.jtag
.external_ports())
1376 # don't add DMI if JTAG is enabled
1377 ports
+= list(self
.dbg
.dmi
.ports())
1379 ports
+= list(self
.imem
.ibus
.fields
.values())
1380 ports
+= list(self
.core
.l0
.cmpi
.wb_bus().fields
.values())
1383 for sram
in self
.sram4k
:
1384 ports
+= list(sram
.bus
.fields
.values())
1387 ports
+= list(self
.xics_icp
.bus
.fields
.values())
1388 ports
+= list(self
.xics_ics
.bus
.fields
.values())
1389 ports
.append(self
.int_level_i
)
1392 ports
+= list(self
.simple_gpio
.bus
.fields
.values())
1393 ports
.append(self
.gpio_o
)
1401 class TestIssuer(Elaboratable
):
1402 def __init__(self
, pspec
):
1403 self
.ti
= TestIssuerInternal(pspec
)
1404 self
.pll
= DummyPLL(instance
=True)
1406 # PLL direct clock or not
1407 self
.pll_en
= hasattr(pspec
, "use_pll") and pspec
.use_pll
1409 self
.pll_test_o
= Signal(reset_less
=True)
1410 self
.pll_vco_o
= Signal(reset_less
=True)
1411 self
.clk_sel_i
= Signal(2, reset_less
=True)
1412 self
.ref_clk
= ClockSignal() # can't rename it but that's ok
1413 self
.pllclk_clk
= ClockSignal("pllclk")
1415 def elaborate(self
, platform
):
1419 # TestIssuer nominally runs at main clock, actually it is
1420 # all combinatorial internally except for coresync'd components
1421 m
.submodules
.ti
= ti
= self
.ti
1424 # ClockSelect runs at PLL output internal clock rate
1425 m
.submodules
.wrappll
= pll
= self
.pll
1427 # add clock domains from PLL
1428 cd_pll
= ClockDomain("pllclk")
1431 # PLL clock established. has the side-effect of running clklsel
1432 # at the PLL's speed (see DomainRenamer("pllclk") above)
1433 pllclk
= self
.pllclk_clk
1434 comb
+= pllclk
.eq(pll
.clk_pll_o
)
1436 # wire up external 24mhz to PLL
1437 #comb += pll.clk_24_i.eq(self.ref_clk)
1438 # output 18 mhz PLL test signal, and analog oscillator out
1439 comb
+= self
.pll_test_o
.eq(pll
.pll_test_o
)
1440 comb
+= self
.pll_vco_o
.eq(pll
.pll_vco_o
)
1442 # input to pll clock selection
1443 comb
+= pll
.clk_sel_i
.eq(self
.clk_sel_i
)
1445 # now wire up ResetSignals. don't mind them being in this domain
1446 pll_rst
= ResetSignal("pllclk")
1447 comb
+= pll_rst
.eq(ResetSignal())
1449 # internal clock is set to selector clock-out. has the side-effect of
1450 # running TestIssuer at this speed (see DomainRenamer("intclk") above)
1451 # debug clock runs at coresync internal clock
1452 cd_coresync
= ClockDomain("coresync")
1453 #m.domains += cd_coresync
1454 if self
.ti
.dbg_domain
!= 'sync':
1455 cd_dbgsync
= ClockDomain("dbgsync")
1456 #m.domains += cd_dbgsync
1457 intclk
= ClockSignal("coresync")
1458 dbgclk
= ClockSignal(self
.ti
.dbg_domain
)
1459 # XXX BYPASS PLL XXX
1460 # XXX BYPASS PLL XXX
1461 # XXX BYPASS PLL XXX
1463 comb
+= intclk
.eq(self
.ref_clk
)
1465 comb
+= intclk
.eq(ClockSignal())
1466 if self
.ti
.dbg_domain
!= 'sync':
1467 dbgclk
= ClockSignal(self
.ti
.dbg_domain
)
1468 comb
+= dbgclk
.eq(intclk
)
1473 return list(self
.ti
.ports()) + list(self
.pll
.ports()) + \
1474 [ClockSignal(), ResetSignal()]
1476 def external_ports(self
):
1477 ports
= self
.ti
.external_ports()
1478 ports
.append(ClockSignal())
1479 ports
.append(ResetSignal())
1481 ports
.append(self
.clk_sel_i
)
1482 ports
.append(self
.pll
.clk_24_i
)
1483 ports
.append(self
.pll_test_o
)
1484 ports
.append(self
.pll_vco_o
)
1485 ports
.append(self
.pllclk_clk
)
1486 ports
.append(self
.ref_clk
)
1490 if __name__
== '__main__':
1491 units
= {'alu': 1, 'cr': 1, 'branch': 1, 'trap': 1, 'logical': 1,
1497 pspec
= TestMemPspec(ldst_ifacetype
='bare_wb',
1498 imem_ifacetype
='bare_wb',
1503 dut
= TestIssuer(pspec
)
1504 vl
= main(dut
, ports
=dut
.ports(), name
="test_issuer")
1506 if len(sys
.argv
) == 1:
1507 vl
= rtlil
.convert(dut
, ports
=dut
.external_ports(), name
="test_issuer")
1508 with
open("test_issuer.il", "w") as f
: