3 not in any way intended for production use. this runs a FSM that:
5 * reads the Program Counter from StateRegs
6 * reads an instruction from a fixed-size Test Memory
7 * issues it to the Simple Core
8 * waits for it to complete
10 * does it all over again
12 the purpose of this module is to verify the functional correctness
13 of the Function Units in the absolute simplest and clearest possible
14 way, and to at provide something that can be further incrementally
18 from nmigen
import (Elaboratable
, Module
, Signal
, ClockSignal
, ResetSignal
,
19 ClockDomain
, DomainRenamer
, Mux
, Const
, Repl
)
20 from nmigen
.cli
import rtlil
21 from nmigen
.cli
import main
24 from soc
.decoder
.power_decoder
import create_pdecode
25 from soc
.decoder
.power_decoder2
import PowerDecode2
, SVP64PrefixDecoder
26 from soc
.decoder
.decode2execute1
import IssuerDecode2ToOperand
27 from soc
.decoder
.decode2execute1
import Data
28 from soc
.experiment
.testmem
import TestMemory
# test only for instructions
29 from soc
.regfile
.regfiles
import StateRegs
, FastRegs
30 from soc
.simple
.core
import NonProductionCore
31 from soc
.config
.test
.test_loadstore
import TestMemPspec
32 from soc
.config
.ifetch
import ConfigFetchUnit
33 from soc
.decoder
.power_enums
import (MicrOp
, SVP64PredInt
, SVP64PredCR
,
35 from soc
.debug
.dmi
import CoreDebug
, DMIInterface
36 from soc
.debug
.jtag
import JTAG
37 from soc
.config
.pinouts
import get_pinspecs
38 from soc
.config
.state
import CoreState
39 from soc
.interrupts
.xics
import XICS_ICP
, XICS_ICS
40 from soc
.bus
.simple_gpio
import SimpleGPIO
41 from soc
.bus
.SPBlock512W64B8W
import SPBlock512W64B8W
42 from soc
.clock
.select
import ClockSelect
43 from soc
.clock
.dummypll
import DummyPLL
44 from soc
.sv
.svstate
import SVSTATERec
47 from nmutil
.util
import rising_edge
49 def get_insn(f_instr_o
, pc
):
50 if f_instr_o
.width
== 32:
53 # 64-bit: bit 2 of pc decides which word to select
54 return f_instr_o
.word_select(pc
[2], 32)
56 # gets state input or reads from state regfile
57 def state_get(m
, state_i
, name
, regfile
, regnum
):
61 res
= Signal(64, reset_less
=True, name
=name
)
62 res_ok_delay
= Signal(name
="%s_ok_delay" % name
)
63 sync
+= res_ok_delay
.eq(~state_i
.ok
)
64 with m
.If(state_i
.ok
):
65 # incoming override (start from pc_i)
66 comb
+= res
.eq(state_i
.data
)
68 # otherwise read StateRegs regfile for PC...
69 comb
+= regfile
.ren
.eq(1<<regnum
)
70 # ... but on a 1-clock delay
71 with m
.If(res_ok_delay
):
72 comb
+= res
.eq(regfile
.data_o
)
75 def get_predint(m
, mask
, name
):
76 """decode SVP64 predicate integer mask field to reg number and invert
77 this is identical to the equivalent function in ISACaller except that
78 it doesn't read the INT directly, it just decodes "what needs to be done"
79 i.e. which INT reg, whether it is shifted and whether it is bit-inverted.
81 * all1s is set to indicate that no mask is to be applied.
82 * regread indicates the GPR register number to be read
83 * invert is set to indicate that the register value is to be inverted
84 * unary indicates that the contents of the register is to be shifted 1<<r3
87 regread
= Signal(5, name
=name
+"regread")
88 invert
= Signal(name
=name
+"invert")
89 unary
= Signal(name
=name
+"unary")
90 all1s
= Signal(name
=name
+"all1s")
92 with m
.Case(SVP64PredInt
.ALWAYS
.value
):
93 comb
+= all1s
.eq(1) # use 0b1111 (all ones)
94 with m
.Case(SVP64PredInt
.R3_UNARY
.value
):
96 comb
+= unary
.eq(1) # 1<<r3 - shift r3 (single bit)
97 with m
.Case(SVP64PredInt
.R3
.value
):
99 with m
.Case(SVP64PredInt
.R3_N
.value
):
100 comb
+= regread
.eq(3)
102 with m
.Case(SVP64PredInt
.R10
.value
):
103 comb
+= regread
.eq(10)
104 with m
.Case(SVP64PredInt
.R10_N
.value
):
105 comb
+= regread
.eq(10)
107 with m
.Case(SVP64PredInt
.R30
.value
):
108 comb
+= regread
.eq(30)
109 with m
.Case(SVP64PredInt
.R30_N
.value
):
110 comb
+= regread
.eq(30)
112 return regread
, invert
, unary
, all1s
114 def get_predcr(m
, mask
, name
):
115 """decode SVP64 predicate CR to reg number field and invert status
116 this is identical to _get_predcr in ISACaller
119 idx
= Signal(2, name
=name
+"idx")
120 invert
= Signal(name
=name
+"crinvert")
122 with m
.Case(SVP64PredCR
.LT
.value
):
125 with m
.Case(SVP64PredCR
.GE
.value
):
128 with m
.Case(SVP64PredCR
.GT
.value
):
131 with m
.Case(SVP64PredCR
.LE
.value
):
134 with m
.Case(SVP64PredCR
.EQ
.value
):
137 with m
.Case(SVP64PredCR
.NE
.value
):
140 with m
.Case(SVP64PredCR
.SO
.value
):
143 with m
.Case(SVP64PredCR
.NS
.value
):
149 class TestIssuerInternal(Elaboratable
):
150 """TestIssuer - reads instructions from TestMemory and issues them
152 efficiency and speed is not the main goal here: functional correctness
153 and code clarity is. optimisations (which almost 100% interfere with
154 easy understanding) come later.
156 def __init__(self
, pspec
):
158 # test is SVP64 is to be enabled
159 self
.svp64_en
= hasattr(pspec
, "svp64") and (pspec
.svp64
== True)
161 # JTAG interface. add this right at the start because if it's
162 # added it *modifies* the pspec, by adding enable/disable signals
163 # for parts of the rest of the core
164 self
.jtag_en
= hasattr(pspec
, "debug") and pspec
.debug
== 'jtag'
166 subset
= {'uart', 'mtwi', 'eint', 'gpio', 'mspi0', 'mspi1',
168 self
.jtag
= JTAG(get_pinspecs(subset
=subset
))
169 # add signals to pspec to enable/disable icache and dcache
170 # (or data and intstruction wishbone if icache/dcache not included)
171 # https://bugs.libre-soc.org/show_bug.cgi?id=520
172 # TODO: do we actually care if these are not domain-synchronised?
173 # honestly probably not.
174 pspec
.wb_icache_en
= self
.jtag
.wb_icache_en
175 pspec
.wb_dcache_en
= self
.jtag
.wb_dcache_en
176 self
.wb_sram_en
= self
.jtag
.wb_sram_en
178 self
.wb_sram_en
= Const(1)
180 # add 4k sram blocks?
181 self
.sram4x4k
= (hasattr(pspec
, "sram4x4kblock") and
182 pspec
.sram4x4kblock
== True)
186 self
.sram4k
.append(SPBlock512W64B8W(name
="sram4k_%d" % i
,
189 # add interrupt controller?
190 self
.xics
= hasattr(pspec
, "xics") and pspec
.xics
== True
192 self
.xics_icp
= XICS_ICP()
193 self
.xics_ics
= XICS_ICS()
194 self
.int_level_i
= self
.xics_ics
.int_level_i
196 # add GPIO peripheral?
197 self
.gpio
= hasattr(pspec
, "gpio") and pspec
.gpio
== True
199 self
.simple_gpio
= SimpleGPIO()
200 self
.gpio_o
= self
.simple_gpio
.gpio_o
202 # main instruction core. suitable for prototyping / demo only
203 self
.core
= core
= NonProductionCore(pspec
)
205 # instruction decoder. goes into Trap Record
206 pdecode
= create_pdecode()
207 self
.cur_state
= CoreState("cur") # current state (MSR/PC/SVSTATE)
208 self
.pdecode2
= PowerDecode2(pdecode
, state
=self
.cur_state
,
209 opkls
=IssuerDecode2ToOperand
,
210 svp64_en
=self
.svp64_en
)
212 self
.svp64
= SVP64PrefixDecoder() # for decoding SVP64 prefix
214 # Test Instruction memory
215 self
.imem
= ConfigFetchUnit(pspec
).fu
218 self
.dbg
= CoreDebug()
220 # instruction go/monitor
221 self
.pc_o
= Signal(64, reset_less
=True)
222 self
.pc_i
= Data(64, "pc_i") # set "ok" to indicate "please change me"
223 self
.svstate_i
= Data(32, "svstate_i") # ditto
224 self
.core_bigendian_i
= Signal() # TODO: set based on MSR.LE
225 self
.busy_o
= Signal(reset_less
=True)
226 self
.memerr_o
= Signal(reset_less
=True)
228 # STATE regfile read /write ports for PC, MSR, SVSTATE
229 staterf
= self
.core
.regs
.rf
['state']
230 self
.state_r_pc
= staterf
.r_ports
['cia'] # PC rd
231 self
.state_w_pc
= staterf
.w_ports
['d_wr1'] # PC wr
232 self
.state_r_msr
= staterf
.r_ports
['msr'] # MSR rd
233 self
.state_r_sv
= staterf
.r_ports
['sv'] # SVSTATE rd
234 self
.state_w_sv
= staterf
.w_ports
['sv'] # SVSTATE wr
236 # DMI interface access
237 intrf
= self
.core
.regs
.rf
['int']
238 crrf
= self
.core
.regs
.rf
['cr']
239 xerrf
= self
.core
.regs
.rf
['xer']
240 self
.int_r
= intrf
.r_ports
['dmi'] # INT read
241 self
.cr_r
= crrf
.r_ports
['full_cr_dbg'] # CR read
242 self
.xer_r
= xerrf
.r_ports
['full_xer'] # XER read
246 self
.int_pred
= intrf
.r_ports
['pred'] # INT predicate read
247 self
.cr_pred
= crrf
.r_ports
['cr_pred'] # CR predicate read
249 # hack method of keeping an eye on whether branch/trap set the PC
250 self
.state_nia
= self
.core
.regs
.rf
['state'].w_ports
['nia']
251 self
.state_nia
.wen
.name
= 'state_nia_wen'
253 # pulse to synchronize the simulator at instruction end
254 self
.insn_done
= Signal()
257 # store copies of predicate masks
258 self
.srcmask
= Signal(64)
259 self
.dstmask
= Signal(64)
261 def fetch_fsm(self
, m
, core
, pc
, svstate
, nia
, is_svp64_mode
,
262 fetch_pc_ready_o
, fetch_pc_valid_i
,
263 fetch_insn_valid_o
, fetch_insn_ready_i
):
266 this FSM performs fetch of raw instruction data, partial-decodes
267 it 32-bit at a time to detect SVP64 prefixes, and will optionally
268 read a 2nd 32-bit quantity if that occurs.
272 pdecode2
= self
.pdecode2
273 cur_state
= self
.cur_state
274 dec_opcode_i
= pdecode2
.dec
.raw_opcode_in
# raw opcode
276 msr_read
= Signal(reset
=1)
278 with m
.FSM(name
='fetch_fsm'):
281 with m
.State("IDLE"):
282 comb
+= fetch_pc_ready_o
.eq(1)
283 with m
.If(fetch_pc_valid_i
):
284 # instruction allowed to go: start by reading the PC
285 # capture the PC and also drop it into Insn Memory
286 # we have joined a pair of combinatorial memory
287 # lookups together. this is Generally Bad.
288 comb
+= self
.imem
.a_pc_i
.eq(pc
)
289 comb
+= self
.imem
.a_valid_i
.eq(1)
290 comb
+= self
.imem
.f_valid_i
.eq(1)
291 sync
+= cur_state
.pc
.eq(pc
)
292 sync
+= cur_state
.svstate
.eq(svstate
) # and svstate
294 # initiate read of MSR. arrives one clock later
295 comb
+= self
.state_r_msr
.ren
.eq(1 << StateRegs
.MSR
)
296 sync
+= msr_read
.eq(0)
298 m
.next
= "INSN_READ" # move to "wait for bus" phase
300 # dummy pause to find out why simulation is not keeping up
301 with m
.State("INSN_READ"):
302 # one cycle later, msr/sv read arrives. valid only once.
303 with m
.If(~msr_read
):
304 sync
+= msr_read
.eq(1) # yeah don't read it again
305 sync
+= cur_state
.msr
.eq(self
.state_r_msr
.data_o
)
306 with m
.If(self
.imem
.f_busy_o
): # zzz...
307 # busy: stay in wait-read
308 comb
+= self
.imem
.a_valid_i
.eq(1)
309 comb
+= self
.imem
.f_valid_i
.eq(1)
311 # not busy: instruction fetched
312 insn
= get_insn(self
.imem
.f_instr_o
, cur_state
.pc
)
315 # decode the SVP64 prefix, if any
316 comb
+= svp64
.raw_opcode_in
.eq(insn
)
317 comb
+= svp64
.bigendian
.eq(self
.core_bigendian_i
)
318 # pass the decoded prefix (if any) to PowerDecoder2
319 sync
+= pdecode2
.sv_rm
.eq(svp64
.svp64_rm
)
320 # remember whether this is a prefixed instruction, so
321 # the FSM can readily loop when VL==0
322 sync
+= is_svp64_mode
.eq(svp64
.is_svp64_mode
)
323 # calculate the address of the following instruction
324 insn_size
= Mux(svp64
.is_svp64_mode
, 8, 4)
325 sync
+= nia
.eq(cur_state
.pc
+ insn_size
)
326 with m
.If(~svp64
.is_svp64_mode
):
327 # with no prefix, store the instruction
328 # and hand it directly to the next FSM
329 sync
+= dec_opcode_i
.eq(insn
)
330 m
.next
= "INSN_READY"
332 # fetch the rest of the instruction from memory
333 comb
+= self
.imem
.a_pc_i
.eq(cur_state
.pc
+ 4)
334 comb
+= self
.imem
.a_valid_i
.eq(1)
335 comb
+= self
.imem
.f_valid_i
.eq(1)
336 m
.next
= "INSN_READ2"
338 # not SVP64 - 32-bit only
339 sync
+= nia
.eq(cur_state
.pc
+ 4)
340 sync
+= dec_opcode_i
.eq(insn
)
341 m
.next
= "INSN_READY"
343 with m
.State("INSN_READ2"):
344 with m
.If(self
.imem
.f_busy_o
): # zzz...
345 # busy: stay in wait-read
346 comb
+= self
.imem
.a_valid_i
.eq(1)
347 comb
+= self
.imem
.f_valid_i
.eq(1)
349 # not busy: instruction fetched
350 insn
= get_insn(self
.imem
.f_instr_o
, cur_state
.pc
+4)
351 sync
+= dec_opcode_i
.eq(insn
)
352 m
.next
= "INSN_READY"
353 # TODO: probably can start looking at pdecode2.rm_dec
354 # here or maybe even in INSN_READ state, if svp64_mode
355 # detected, in order to trigger - and wait for - the
358 pmode
= pdecode2
.rm_dec
.predmode
360 if pmode != SVP64PredMode.ALWAYS.value:
361 fire predicate loading FSM and wait before
364 sync += self.srcmask.eq(-1) # set to all 1s
365 sync += self.dstmask.eq(-1) # set to all 1s
366 m.next = "INSN_READY"
369 with m
.State("INSN_READY"):
370 # hand over the instruction, to be decoded
371 comb
+= fetch_insn_valid_o
.eq(1)
372 with m
.If(fetch_insn_ready_i
):
375 def fetch_predicate_fsm(self
, m
,
376 pred_insn_valid_i
, pred_insn_ready_o
,
377 pred_mask_valid_o
, pred_mask_ready_i
):
378 """fetch_predicate_fsm - obtains (constructs in the case of CR)
379 src/dest predicate masks
381 https://bugs.libre-soc.org/show_bug.cgi?id=617
382 the predicates can be read here, by using IntRegs r_ports['pred']
383 or CRRegs r_ports['pred']. in the case of CRs it will have to
384 be done through multiple reads, extracting one relevant at a time.
385 later, a faster way would be to use the 32-bit-wide CR port but
386 this is more complex decoding, here. equivalent code used in
387 ISACaller is "from soc.decoder.isa.caller import get_predcr"
389 note: this ENTIRE FSM is not to be called when svp64 is disabled
393 pdecode2
= self
.pdecode2
394 rm_dec
= pdecode2
.rm_dec
# SVP64RMModeDecode
395 predmode
= rm_dec
.predmode
396 srcpred
, dstpred
= rm_dec
.srcpred
, rm_dec
.dstpred
397 cr_pred
, int_pred
= self
.cr_pred
, self
.int_pred
# read regfiles
399 # elif predmode == CR:
400 # CR-src sidx, sinvert = get_predcr(m, srcpred)
401 # CR-dst didx, dinvert = get_predcr(m, dstpred)
402 # TODO read CR-src and CR-dst into self.srcmask+dstmask with loop
403 # has to cope with first one then the other
404 # for cr_idx = FSM-state-loop(0..VL-1):
405 # FSM-state-trigger-CR-read:
406 # cr_ren = (1<<7-(cr_idx+SVP64CROffs.CRPred))
407 # comb += cr_pred.ren.eq(cr_ren)
408 # FSM-state-1-clock-later-actual-Read:
409 # cr_field = Signal(4)
411 # # read the CR field, select the appropriate bit
412 # comb += cr_field.eq(cr_pred.data_o)
413 # comb += cr_bit.eq(cr_field.bit_select(idx)))
414 # # just like in branch BO tests
415 # comd += self.srcmask[cr_idx].eq(inv ^ cr_bit)
418 sregread
, sinvert
, sunary
, sall1s
= get_predint(m
, srcpred
, 's')
419 dregread
, dinvert
, dunary
, dall1s
= get_predint(m
, dstpred
, 'd')
420 sidx
, scrinvert
= get_predcr(m
, srcpred
, 's')
421 didx
, dcrinvert
= get_predcr(m
, dstpred
, 'd')
423 with m
.FSM(name
="fetch_predicate"):
425 with m
.State("FETCH_PRED_IDLE"):
426 comb
+= pred_insn_ready_o
.eq(1)
427 with m
.If(pred_insn_valid_i
):
428 with m
.If(predmode
== SVP64PredMode
.INT
):
429 # skip fetching destination mask register, when zero
431 sync
+= self
.dstmask
.eq(-1)
432 # directly go to fetch source mask register
433 # guaranteed not to be zero (otherwise predmode
434 # would be SVP64PredMode.ALWAYS, not INT)
435 comb
+= int_pred
.addr
.eq(sregread
)
436 comb
+= int_pred
.ren
.eq(1)
437 m
.next
= "INT_SRC_READ"
438 # fetch destination predicate register
440 comb
+= int_pred
.addr
.eq(dregread
)
441 comb
+= int_pred
.ren
.eq(1)
442 m
.next
= "INT_DST_READ"
444 sync
+= self
.srcmask
.eq(-1)
445 sync
+= self
.dstmask
.eq(-1)
446 m
.next
= "FETCH_PRED_DONE"
448 with m
.State("INT_DST_READ"):
449 # store destination mask
450 inv
= Repl(dinvert
, 64)
451 sync
+= self
.dstmask
.eq(self
.int_pred
.data_o ^ inv
)
452 # skip fetching source mask register, when zero
454 sync
+= self
.srcmask
.eq(-1)
455 m
.next
= "FETCH_PRED_DONE"
456 # fetch source predicate register
458 comb
+= int_pred
.addr
.eq(sregread
)
459 comb
+= int_pred
.ren
.eq(1)
460 m
.next
= "INT_SRC_READ"
462 with m
.State("INT_SRC_READ"):
464 inv
= Repl(sinvert
, 64)
465 sync
+= self
.srcmask
.eq(self
.int_pred
.data_o ^ inv
)
466 m
.next
= "FETCH_PRED_DONE"
468 with m
.State("FETCH_PRED_DONE"):
469 comb
+= pred_mask_valid_o
.eq(1)
470 with m
.If(pred_mask_ready_i
):
471 m
.next
= "FETCH_PRED_IDLE"
473 def issue_fsm(self
, m
, core
, pc_changed
, sv_changed
, nia
,
474 dbg
, core_rst
, is_svp64_mode
,
475 fetch_pc_ready_o
, fetch_pc_valid_i
,
476 fetch_insn_valid_o
, fetch_insn_ready_i
,
477 pred_insn_valid_i
, pred_insn_ready_o
,
478 pred_mask_valid_o
, pred_mask_ready_i
,
479 exec_insn_valid_i
, exec_insn_ready_o
,
480 exec_pc_valid_o
, exec_pc_ready_i
):
483 decode / issue FSM. this interacts with the "fetch" FSM
484 through fetch_insn_ready/valid (incoming) and fetch_pc_ready/valid
485 (outgoing). also interacts with the "execute" FSM
486 through exec_insn_ready/valid (outgoing) and exec_pc_ready/valid
488 SVP64 RM prefixes have already been set up by the
489 "fetch" phase, so execute is fairly straightforward.
494 pdecode2
= self
.pdecode2
495 cur_state
= self
.cur_state
498 dec_opcode_i
= pdecode2
.dec
.raw_opcode_in
# raw opcode
500 # for updating svstate (things like srcstep etc.)
501 update_svstate
= Signal() # set this (below) if updating
502 new_svstate
= SVSTATERec("new_svstate")
503 comb
+= new_svstate
.eq(cur_state
.svstate
)
505 # precalculate srcstep+1 and dststep+1
506 cur_srcstep
= cur_state
.svstate
.srcstep
507 cur_dststep
= cur_state
.svstate
.dststep
508 next_srcstep
= Signal
.like(cur_srcstep
)
509 next_dststep
= Signal
.like(cur_dststep
)
510 comb
+= next_srcstep
.eq(cur_state
.svstate
.srcstep
+1)
511 comb
+= next_dststep
.eq(cur_state
.svstate
.dststep
+1)
513 with m
.FSM(name
="issue_fsm"):
515 # sync with the "fetch" phase which is reading the instruction
516 # at this point, there is no instruction running, that
517 # could inadvertently update the PC.
518 with m
.State("ISSUE_START"):
519 # wait on "core stop" release, before next fetch
520 # need to do this here, in case we are in a VL==0 loop
521 with m
.If(~dbg
.core_stop_o
& ~core_rst
):
522 comb
+= fetch_pc_valid_i
.eq(1) # tell fetch to start
523 with m
.If(fetch_pc_ready_o
): # fetch acknowledged us
526 # tell core it's stopped, and acknowledge debug handshake
527 comb
+= core
.core_stopped_i
.eq(1)
528 comb
+= dbg
.core_stopped_i
.eq(1)
529 # while stopped, allow updating the PC and SVSTATE
530 with m
.If(self
.pc_i
.ok
):
531 comb
+= self
.state_w_pc
.wen
.eq(1 << StateRegs
.PC
)
532 comb
+= self
.state_w_pc
.data_i
.eq(self
.pc_i
.data
)
533 sync
+= pc_changed
.eq(1)
534 with m
.If(self
.svstate_i
.ok
):
535 comb
+= new_svstate
.eq(self
.svstate_i
.data
)
536 comb
+= update_svstate
.eq(1)
537 sync
+= sv_changed
.eq(1)
539 # decode the instruction when it arrives
540 with m
.State("INSN_WAIT"):
541 comb
+= fetch_insn_ready_i
.eq(1)
542 with m
.If(fetch_insn_valid_o
):
543 # decode the instruction
544 sync
+= core
.e
.eq(pdecode2
.e
)
545 sync
+= core
.state
.eq(cur_state
)
546 sync
+= core
.raw_insn_i
.eq(dec_opcode_i
)
547 sync
+= core
.bigendian_i
.eq(self
.core_bigendian_i
)
548 # set RA_OR_ZERO detection in satellite decoders
549 sync
+= core
.sv_a_nz
.eq(pdecode2
.sv_a_nz
)
550 # loop into ISSUE_START if it's a SVP64 instruction
551 # and VL == 0. this because VL==0 is a for-loop
552 # from 0 to 0 i.e. always, always a NOP.
553 cur_vl
= cur_state
.svstate
.vl
554 with m
.If(is_svp64_mode
& (cur_vl
== 0)):
555 # update the PC before fetching the next instruction
556 # since we are in a VL==0 loop, no instruction was
557 # executed that we could be overwriting
558 comb
+= self
.state_w_pc
.wen
.eq(1 << StateRegs
.PC
)
559 comb
+= self
.state_w_pc
.data_i
.eq(nia
)
560 comb
+= self
.insn_done
.eq(1)
561 m
.next
= "ISSUE_START"
564 m
.next
= "PRED_START" # start fetching predicate
566 m
.next
= "INSN_EXECUTE" # skip predication
568 with m
.State("PRED_START"):
569 comb
+= pred_insn_valid_i
.eq(1) # tell fetch_pred to start
570 with m
.If(pred_insn_ready_o
): # fetch_pred acknowledged us
573 with m
.State("MASK_WAIT"):
574 comb
+= pred_mask_ready_i
.eq(1) # ready to receive the masks
575 with m
.If(pred_mask_valid_o
): # predication masks are ready
576 m
.next
= "INSN_EXECUTE"
578 # handshake with execution FSM, move to "wait" once acknowledged
579 with m
.State("INSN_EXECUTE"):
580 # with m.If(is_svp64_mode):
581 # TODO advance src/dst step to "skip" over predicated-out
582 # from self.srcmask and self.dstmask
583 # https://bugs.libre-soc.org/show_bug.cgi?id=617#c3
584 # but still without exceeding VL in either case
585 # IMPORTANT: when changing src/dest step, have to
586 # jump to m.next = "DECODE_SV" to deal with the change in
589 with m
.If(is_svp64_mode
):
592 pred_src_zero
= pdecode2
.rm_dec
.pred_sz
593 pred_dst_zero
= pdecode2
.rm_dec
.pred_dz
596 TODO: actually, can use
597 PriorityEncoder(self.srcmask | (1<<cur_srcstep))
599 if not pred_src_zero:
600 if (((1<<cur_srcstep) & self.srcmask) == 0) and
602 comb += update_svstate.eq(1)
603 comb += new_svstate.srcstep.eq(next_srcstep)
605 if not pred_dst_zero:
606 if (((1<<cur_dststep) & self.dstmask) == 0) and
608 comb += new_svstate.dststep.eq(next_dststep)
609 comb += update_svstate.eq(1)
615 comb
+= exec_insn_valid_i
.eq(1) # trigger execute
616 with m
.If(exec_insn_ready_o
): # execute acknowledged us
617 m
.next
= "EXECUTE_WAIT"
619 with m
.State("EXECUTE_WAIT"):
620 # wait on "core stop" release, at instruction end
621 # need to do this here, in case we are in a VL>1 loop
622 with m
.If(~dbg
.core_stop_o
& ~core_rst
):
623 comb
+= exec_pc_ready_i
.eq(1)
624 with m
.If(exec_pc_valid_o
):
626 # was this the last loop iteration?
628 cur_vl
= cur_state
.svstate
.vl
629 comb
+= is_last
.eq(next_srcstep
== cur_vl
)
631 # if either PC or SVSTATE were changed by the previous
632 # instruction, go directly back to Fetch, without
633 # updating either PC or SVSTATE
634 with m
.If(pc_changed | sv_changed
):
635 m
.next
= "ISSUE_START"
637 # also return to Fetch, when no output was a vector
638 # (regardless of SRCSTEP and VL), or when the last
639 # instruction was really the last one of the VL loop
640 with m
.Elif((~pdecode2
.loop_continue
) | is_last
):
641 # before going back to fetch, update the PC state
642 # register with the NIA.
643 # ok here we are not reading the branch unit.
644 # TODO: this just blithely overwrites whatever
645 # pipeline updated the PC
646 comb
+= self
.state_w_pc
.wen
.eq(1 << StateRegs
.PC
)
647 comb
+= self
.state_w_pc
.data_i
.eq(nia
)
648 # reset SRCSTEP before returning to Fetch
649 with m
.If(pdecode2
.loop_continue
):
650 comb
+= new_svstate
.srcstep
.eq(0)
651 comb
+= new_svstate
.dststep
.eq(0)
652 comb
+= update_svstate
.eq(1)
653 m
.next
= "ISSUE_START"
655 # returning to Execute? then, first update SRCSTEP
657 comb
+= new_svstate
.srcstep
.eq(next_srcstep
)
658 comb
+= new_svstate
.dststep
.eq(next_dststep
)
659 comb
+= update_svstate
.eq(1)
663 comb
+= core
.core_stopped_i
.eq(1)
664 comb
+= dbg
.core_stopped_i
.eq(1)
665 # while stopped, allow updating the PC and SVSTATE
666 with m
.If(self
.pc_i
.ok
):
667 comb
+= self
.state_w_pc
.wen
.eq(1 << StateRegs
.PC
)
668 comb
+= self
.state_w_pc
.data_i
.eq(self
.pc_i
.data
)
669 sync
+= pc_changed
.eq(1)
670 with m
.If(self
.svstate_i
.ok
):
671 comb
+= new_svstate
.eq(self
.svstate_i
.data
)
672 comb
+= update_svstate
.eq(1)
673 sync
+= sv_changed
.eq(1)
675 # need to decode the instruction again, after updating SRCSTEP
676 # in the previous state.
677 # mostly a copy of INSN_WAIT, but without the actual wait
678 with m
.State("DECODE_SV"):
679 # decode the instruction
680 sync
+= core
.e
.eq(pdecode2
.e
)
681 sync
+= core
.state
.eq(cur_state
)
682 sync
+= core
.bigendian_i
.eq(self
.core_bigendian_i
)
683 sync
+= core
.sv_a_nz
.eq(pdecode2
.sv_a_nz
)
684 m
.next
= "INSN_EXECUTE" # move to "execute"
686 # check if svstate needs updating: if so, write it to State Regfile
687 with m
.If(update_svstate
):
688 comb
+= self
.state_w_sv
.wen
.eq(1<<StateRegs
.SVSTATE
)
689 comb
+= self
.state_w_sv
.data_i
.eq(new_svstate
)
690 sync
+= cur_state
.svstate
.eq(new_svstate
) # for next clock
692 def execute_fsm(self
, m
, core
, pc_changed
, sv_changed
,
693 exec_insn_valid_i
, exec_insn_ready_o
,
694 exec_pc_valid_o
, exec_pc_ready_i
):
697 execute FSM. this interacts with the "issue" FSM
698 through exec_insn_ready/valid (incoming) and exec_pc_ready/valid
699 (outgoing). SVP64 RM prefixes have already been set up by the
700 "issue" phase, so execute is fairly straightforward.
705 pdecode2
= self
.pdecode2
708 core_busy_o
= core
.busy_o
# core is busy
709 core_ivalid_i
= core
.ivalid_i
# instruction is valid
710 core_issue_i
= core
.issue_i
# instruction is issued
711 insn_type
= core
.e
.do
.insn_type
# instruction MicroOp type
713 with m
.FSM(name
="exec_fsm"):
715 # waiting for instruction bus (stays there until not busy)
716 with m
.State("INSN_START"):
717 comb
+= exec_insn_ready_o
.eq(1)
718 with m
.If(exec_insn_valid_i
):
719 comb
+= core_ivalid_i
.eq(1) # instruction is valid
720 comb
+= core_issue_i
.eq(1) # and issued
721 sync
+= sv_changed
.eq(0)
722 sync
+= pc_changed
.eq(0)
723 m
.next
= "INSN_ACTIVE" # move to "wait completion"
725 # instruction started: must wait till it finishes
726 with m
.State("INSN_ACTIVE"):
727 with m
.If(insn_type
!= MicrOp
.OP_NOP
):
728 comb
+= core_ivalid_i
.eq(1) # instruction is valid
729 # note changes to PC and SVSTATE
730 with m
.If(self
.state_nia
.wen
& (1<<StateRegs
.SVSTATE
)):
731 sync
+= sv_changed
.eq(1)
732 with m
.If(self
.state_nia
.wen
& (1<<StateRegs
.PC
)):
733 sync
+= pc_changed
.eq(1)
734 with m
.If(~core_busy_o
): # instruction done!
735 comb
+= exec_pc_valid_o
.eq(1)
736 with m
.If(exec_pc_ready_i
):
737 comb
+= self
.insn_done
.eq(1)
738 m
.next
= "INSN_START" # back to fetch
740 def setup_peripherals(self
, m
):
741 comb
, sync
= m
.d
.comb
, m
.d
.sync
743 m
.submodules
.core
= core
= DomainRenamer("coresync")(self
.core
)
744 m
.submodules
.imem
= imem
= self
.imem
745 m
.submodules
.dbg
= dbg
= self
.dbg
747 m
.submodules
.jtag
= jtag
= self
.jtag
748 # TODO: UART2GDB mux, here, from external pin
749 # see https://bugs.libre-soc.org/show_bug.cgi?id=499
750 sync
+= dbg
.dmi
.connect_to(jtag
.dmi
)
752 cur_state
= self
.cur_state
754 # 4x 4k SRAM blocks. these simply "exist", they get routed in litex
756 for i
, sram
in enumerate(self
.sram4k
):
757 m
.submodules
["sram4k_%d" % i
] = sram
758 comb
+= sram
.enable
.eq(self
.wb_sram_en
)
760 # XICS interrupt handler
762 m
.submodules
.xics_icp
= icp
= self
.xics_icp
763 m
.submodules
.xics_ics
= ics
= self
.xics_ics
764 comb
+= icp
.ics_i
.eq(ics
.icp_o
) # connect ICS to ICP
765 sync
+= cur_state
.eint
.eq(icp
.core_irq_o
) # connect ICP to core
767 # GPIO test peripheral
769 m
.submodules
.simple_gpio
= simple_gpio
= self
.simple_gpio
771 # connect one GPIO output to ICS bit 15 (like in microwatt soc.vhdl)
772 # XXX causes litex ECP5 test to get wrong idea about input and output
773 # (but works with verilator sim *sigh*)
774 #if self.gpio and self.xics:
775 # comb += self.int_level_i[15].eq(simple_gpio.gpio_o[0])
777 # instruction decoder
778 pdecode
= create_pdecode()
779 m
.submodules
.dec2
= pdecode2
= self
.pdecode2
781 m
.submodules
.svp64
= svp64
= self
.svp64
784 dmi
, d_reg
, d_cr
, d_xer
, = dbg
.dmi
, dbg
.d_gpr
, dbg
.d_cr
, dbg
.d_xer
785 intrf
= self
.core
.regs
.rf
['int']
787 # clock delay power-on reset
788 cd_por
= ClockDomain(reset_less
=True)
789 cd_sync
= ClockDomain()
790 core_sync
= ClockDomain("coresync")
791 m
.domains
+= cd_por
, cd_sync
, core_sync
793 ti_rst
= Signal(reset_less
=True)
794 delay
= Signal(range(4), reset
=3)
795 with m
.If(delay
!= 0):
796 m
.d
.por
+= delay
.eq(delay
- 1)
797 comb
+= cd_por
.clk
.eq(ClockSignal())
799 # power-on reset delay
800 core_rst
= ResetSignal("coresync")
801 comb
+= ti_rst
.eq(delay
!= 0 | dbg
.core_rst_o |
ResetSignal())
802 comb
+= core_rst
.eq(ti_rst
)
804 # busy/halted signals from core
805 comb
+= self
.busy_o
.eq(core
.busy_o
)
806 comb
+= pdecode2
.dec
.bigendian
.eq(self
.core_bigendian_i
)
808 # temporary hack: says "go" immediately for both address gen and ST
810 ldst
= core
.fus
.fus
['ldst0']
811 st_go_edge
= rising_edge(m
, ldst
.st
.rel_o
)
812 m
.d
.comb
+= ldst
.ad
.go_i
.eq(ldst
.ad
.rel_o
) # link addr-go direct to rel
813 m
.d
.comb
+= ldst
.st
.go_i
.eq(st_go_edge
) # link store-go to rising rel
817 def elaborate(self
, platform
):
820 comb
, sync
= m
.d
.comb
, m
.d
.sync
821 cur_state
= self
.cur_state
822 pdecode2
= self
.pdecode2
826 # set up peripherals and core
827 core_rst
= self
.setup_peripherals(m
)
829 # PC and instruction from I-Memory
830 comb
+= self
.pc_o
.eq(cur_state
.pc
)
831 pc_changed
= Signal() # note write to PC
832 sv_changed
= Signal() # note write to SVSTATE
834 # read state either from incoming override or from regfile
835 # TODO: really should be doing MSR in the same way
836 pc
= state_get(m
, self
.pc_i
, "pc", # read PC
837 self
.state_r_pc
, StateRegs
.PC
)
838 svstate
= state_get(m
, self
.svstate_i
, "svstate", # read SVSTATE
839 self
.state_r_sv
, StateRegs
.SVSTATE
)
841 # don't write pc every cycle
842 comb
+= self
.state_w_pc
.wen
.eq(0)
843 comb
+= self
.state_w_pc
.data_i
.eq(0)
845 # don't read msr every cycle
846 comb
+= self
.state_r_msr
.ren
.eq(0)
848 # address of the next instruction, in the absence of a branch
849 # depends on the instruction size
850 nia
= Signal(64, reset_less
=True)
852 # connect up debug signals
853 # TODO comb += core.icache_rst_i.eq(dbg.icache_rst_o)
854 comb
+= dbg
.terminate_i
.eq(core
.core_terminate_o
)
855 comb
+= dbg
.state
.pc
.eq(pc
)
856 comb
+= dbg
.state
.svstate
.eq(svstate
)
857 comb
+= dbg
.state
.msr
.eq(cur_state
.msr
)
859 # pass the prefix mode from Fetch to Issue, so the latter can loop
861 is_svp64_mode
= Signal()
863 # there are *THREE* FSMs, fetch (32/64-bit) issue, decode/execute.
864 # these are the handshake signals between fetch and decode/execute
866 # fetch FSM can run as soon as the PC is valid
867 fetch_pc_valid_i
= Signal() # Execute tells Fetch "start next read"
868 fetch_pc_ready_o
= Signal() # Fetch Tells SVSTATE "proceed"
870 # fetch FSM hands over the instruction to be decoded / issued
871 fetch_insn_valid_o
= Signal()
872 fetch_insn_ready_i
= Signal()
874 # predicate fetch FSM decodes and fetches the predicate
875 pred_insn_valid_i
= Signal()
876 pred_insn_ready_o
= Signal()
878 # predicate fetch FSM delivers the masks
879 pred_mask_valid_o
= Signal()
880 pred_mask_ready_i
= Signal()
882 # issue FSM delivers the instruction to the be executed
883 exec_insn_valid_i
= Signal()
884 exec_insn_ready_o
= Signal()
886 # execute FSM, hands over the PC/SVSTATE back to the issue FSM
887 exec_pc_valid_o
= Signal()
888 exec_pc_ready_i
= Signal()
890 # the FSMs here are perhaps unusual in that they detect conditions
891 # then "hold" information, combinatorially, for the core
892 # (as opposed to using sync - which would be on a clock's delay)
893 # this includes the actual opcode, valid flags and so on.
895 # Fetch, then predicate fetch, then Issue, then Execute.
896 # Issue is where the VL for-loop # lives. the ready/valid
897 # signalling is used to communicate between the four.
899 self
.fetch_fsm(m
, core
, pc
, svstate
, nia
, is_svp64_mode
,
900 fetch_pc_ready_o
, fetch_pc_valid_i
,
901 fetch_insn_valid_o
, fetch_insn_ready_i
)
903 self
.issue_fsm(m
, core
, pc_changed
, sv_changed
, nia
,
904 dbg
, core_rst
, is_svp64_mode
,
905 fetch_pc_ready_o
, fetch_pc_valid_i
,
906 fetch_insn_valid_o
, fetch_insn_ready_i
,
907 pred_insn_valid_i
, pred_insn_ready_o
,
908 pred_mask_valid_o
, pred_mask_ready_i
,
909 exec_insn_valid_i
, exec_insn_ready_o
,
910 exec_pc_valid_o
, exec_pc_ready_i
)
913 self
.fetch_predicate_fsm(m
,
914 pred_insn_valid_i
, pred_insn_ready_o
,
915 pred_mask_valid_o
, pred_mask_ready_i
)
917 self
.execute_fsm(m
, core
, pc_changed
, sv_changed
,
918 exec_insn_valid_i
, exec_insn_ready_o
,
919 exec_pc_valid_o
, exec_pc_ready_i
)
921 # this bit doesn't have to be in the FSM: connect up to read
922 # regfiles on demand from DMI
925 # DEC and TB inc/dec FSM. copy of DEC is put into CoreState,
926 # (which uses that in PowerDecoder2 to raise 0x900 exception)
927 self
.tb_dec_fsm(m
, cur_state
.dec
)
931 def do_dmi(self
, m
, dbg
):
932 """deals with DMI debug requests
934 currently only provides read requests for the INT regfile, CR and XER
935 it will later also deal with *writing* to these regfiles.
939 dmi
, d_reg
, d_cr
, d_xer
, = dbg
.dmi
, dbg
.d_gpr
, dbg
.d_cr
, dbg
.d_xer
940 intrf
= self
.core
.regs
.rf
['int']
942 with m
.If(d_reg
.req
): # request for regfile access being made
943 # TODO: error-check this
944 # XXX should this be combinatorial? sync better?
946 comb
+= self
.int_r
.ren
.eq(1<<d_reg
.addr
)
948 comb
+= self
.int_r
.addr
.eq(d_reg
.addr
)
949 comb
+= self
.int_r
.ren
.eq(1)
950 d_reg_delay
= Signal()
951 sync
+= d_reg_delay
.eq(d_reg
.req
)
952 with m
.If(d_reg_delay
):
953 # data arrives one clock later
954 comb
+= d_reg
.data
.eq(self
.int_r
.data_o
)
955 comb
+= d_reg
.ack
.eq(1)
957 # sigh same thing for CR debug
958 with m
.If(d_cr
.req
): # request for regfile access being made
959 comb
+= self
.cr_r
.ren
.eq(0b11111111) # enable all
960 d_cr_delay
= Signal()
961 sync
+= d_cr_delay
.eq(d_cr
.req
)
962 with m
.If(d_cr_delay
):
963 # data arrives one clock later
964 comb
+= d_cr
.data
.eq(self
.cr_r
.data_o
)
965 comb
+= d_cr
.ack
.eq(1)
968 with m
.If(d_xer
.req
): # request for regfile access being made
969 comb
+= self
.xer_r
.ren
.eq(0b111111) # enable all
970 d_xer_delay
= Signal()
971 sync
+= d_xer_delay
.eq(d_xer
.req
)
972 with m
.If(d_xer_delay
):
973 # data arrives one clock later
974 comb
+= d_xer
.data
.eq(self
.xer_r
.data_o
)
975 comb
+= d_xer
.ack
.eq(1)
977 def tb_dec_fsm(self
, m
, spr_dec
):
980 this is a FSM for updating either dec or tb. it runs alternately
981 DEC, TB, DEC, TB. note that SPR pipeline could have written a new
982 value to DEC, however the regfile has "passthrough" on it so this
985 see v3.0B p1097-1099 for Timeer Resource and p1065 and p1076
988 comb
, sync
= m
.d
.comb
, m
.d
.sync
989 fast_rf
= self
.core
.regs
.rf
['fast']
990 fast_r_dectb
= fast_rf
.r_ports
['issue'] # DEC/TB
991 fast_w_dectb
= fast_rf
.w_ports
['issue'] # DEC/TB
995 # initiates read of current DEC
996 with m
.State("DEC_READ"):
997 comb
+= fast_r_dectb
.addr
.eq(FastRegs
.DEC
)
998 comb
+= fast_r_dectb
.ren
.eq(1)
1001 # waits for DEC read to arrive (1 cycle), updates with new value
1002 with m
.State("DEC_WRITE"):
1003 new_dec
= Signal(64)
1004 # TODO: MSR.LPCR 32-bit decrement mode
1005 comb
+= new_dec
.eq(fast_r_dectb
.data_o
- 1)
1006 comb
+= fast_w_dectb
.addr
.eq(FastRegs
.DEC
)
1007 comb
+= fast_w_dectb
.wen
.eq(1)
1008 comb
+= fast_w_dectb
.data_i
.eq(new_dec
)
1009 sync
+= spr_dec
.eq(new_dec
) # copy into cur_state for decoder
1012 # initiates read of current TB
1013 with m
.State("TB_READ"):
1014 comb
+= fast_r_dectb
.addr
.eq(FastRegs
.TB
)
1015 comb
+= fast_r_dectb
.ren
.eq(1)
1018 # waits for read TB to arrive, initiates write of current TB
1019 with m
.State("TB_WRITE"):
1021 comb
+= new_tb
.eq(fast_r_dectb
.data_o
+ 1)
1022 comb
+= fast_w_dectb
.addr
.eq(FastRegs
.TB
)
1023 comb
+= fast_w_dectb
.wen
.eq(1)
1024 comb
+= fast_w_dectb
.data_i
.eq(new_tb
)
1030 yield from self
.pc_i
.ports()
1033 yield from self
.core
.ports()
1034 yield from self
.imem
.ports()
1035 yield self
.core_bigendian_i
1041 def external_ports(self
):
1042 ports
= self
.pc_i
.ports()
1043 ports
+= [self
.pc_o
, self
.memerr_o
, self
.core_bigendian_i
, self
.busy_o
,
1047 ports
+= list(self
.jtag
.external_ports())
1049 # don't add DMI if JTAG is enabled
1050 ports
+= list(self
.dbg
.dmi
.ports())
1052 ports
+= list(self
.imem
.ibus
.fields
.values())
1053 ports
+= list(self
.core
.l0
.cmpi
.lsmem
.lsi
.slavebus
.fields
.values())
1056 for sram
in self
.sram4k
:
1057 ports
+= list(sram
.bus
.fields
.values())
1060 ports
+= list(self
.xics_icp
.bus
.fields
.values())
1061 ports
+= list(self
.xics_ics
.bus
.fields
.values())
1062 ports
.append(self
.int_level_i
)
1065 ports
+= list(self
.simple_gpio
.bus
.fields
.values())
1066 ports
.append(self
.gpio_o
)
1074 class TestIssuer(Elaboratable
):
1075 def __init__(self
, pspec
):
1076 self
.ti
= TestIssuerInternal(pspec
)
1078 self
.pll
= DummyPLL()
1080 # PLL direct clock or not
1081 self
.pll_en
= hasattr(pspec
, "use_pll") and pspec
.use_pll
1083 self
.pll_18_o
= Signal(reset_less
=True)
1085 def elaborate(self
, platform
):
1089 # TestIssuer runs at direct clock
1090 m
.submodules
.ti
= ti
= self
.ti
1091 cd_int
= ClockDomain("coresync")
1094 # ClockSelect runs at PLL output internal clock rate
1095 m
.submodules
.pll
= pll
= self
.pll
1097 # add clock domains from PLL
1098 cd_pll
= ClockDomain("pllclk")
1101 # PLL clock established. has the side-effect of running clklsel
1102 # at the PLL's speed (see DomainRenamer("pllclk") above)
1103 pllclk
= ClockSignal("pllclk")
1104 comb
+= pllclk
.eq(pll
.clk_pll_o
)
1106 # wire up external 24mhz to PLL
1107 comb
+= pll
.clk_24_i
.eq(ClockSignal())
1109 # output 18 mhz PLL test signal
1110 comb
+= self
.pll_18_o
.eq(pll
.pll_18_o
)
1112 # now wire up ResetSignals. don't mind them being in this domain
1113 pll_rst
= ResetSignal("pllclk")
1114 comb
+= pll_rst
.eq(ResetSignal())
1116 # internal clock is set to selector clock-out. has the side-effect of
1117 # running TestIssuer at this speed (see DomainRenamer("intclk") above)
1118 intclk
= ClockSignal("coresync")
1120 comb
+= intclk
.eq(pll
.clk_pll_o
)
1122 comb
+= intclk
.eq(ClockSignal())
1127 return list(self
.ti
.ports()) + list(self
.pll
.ports()) + \
1128 [ClockSignal(), ResetSignal()]
1130 def external_ports(self
):
1131 ports
= self
.ti
.external_ports()
1132 ports
.append(ClockSignal())
1133 ports
.append(ResetSignal())
1135 ports
.append(self
.pll
.clk_sel_i
)
1136 ports
.append(self
.pll_18_o
)
1137 ports
.append(self
.pll
.pll_lck_o
)
1141 if __name__
== '__main__':
1142 units
= {'alu': 1, 'cr': 1, 'branch': 1, 'trap': 1, 'logical': 1,
1148 pspec
= TestMemPspec(ldst_ifacetype
='bare_wb',
1149 imem_ifacetype
='bare_wb',
1154 dut
= TestIssuer(pspec
)
1155 vl
= main(dut
, ports
=dut
.ports(), name
="test_issuer")
1157 if len(sys
.argv
) == 1:
1158 vl
= rtlil
.convert(dut
, ports
=dut
.external_ports(), name
="test_issuer")
1159 with
open("test_issuer.il", "w") as f
: