4bcb8d8855bcc7f6f69df7459134faecc41ecb7f
3 not in any way intended for production use. this runs a FSM that:
5 * reads the Program Counter from StateRegs
6 * reads an instruction from a fixed-size Test Memory
7 * issues it to the Simple Core
8 * waits for it to complete
10 * does it all over again
12 the purpose of this module is to verify the functional correctness
13 of the Function Units in the absolute simplest and clearest possible
14 way, and to at provide something that can be further incrementally
18 from nmigen
import (Elaboratable
, Module
, Signal
, ClockSignal
, ResetSignal
,
19 ClockDomain
, DomainRenamer
, Mux
, Const
)
20 from nmigen
.cli
import rtlil
21 from nmigen
.cli
import main
24 from soc
.decoder
.power_decoder
import create_pdecode
25 from soc
.decoder
.power_decoder2
import PowerDecode2
, SVP64PrefixDecoder
26 from soc
.decoder
.decode2execute1
import IssuerDecode2ToOperand
27 from soc
.decoder
.decode2execute1
import Data
28 from soc
.experiment
.testmem
import TestMemory
# test only for instructions
29 from soc
.regfile
.regfiles
import StateRegs
, FastRegs
30 from soc
.simple
.core
import NonProductionCore
31 from soc
.config
.test
.test_loadstore
import TestMemPspec
32 from soc
.config
.ifetch
import ConfigFetchUnit
33 from soc
.decoder
.power_enums
import MicrOp
, SVP64PredInt
, SVP64PredCR
34 from soc
.debug
.dmi
import CoreDebug
, DMIInterface
35 from soc
.debug
.jtag
import JTAG
36 from soc
.config
.pinouts
import get_pinspecs
37 from soc
.config
.state
import CoreState
38 from soc
.interrupts
.xics
import XICS_ICP
, XICS_ICS
39 from soc
.bus
.simple_gpio
import SimpleGPIO
40 from soc
.bus
.SPBlock512W64B8W
import SPBlock512W64B8W
41 from soc
.clock
.select
import ClockSelect
42 from soc
.clock
.dummypll
import DummyPLL
43 from soc
.sv
.svstate
import SVSTATERec
46 from nmutil
.util
import rising_edge
48 def get_insn(f_instr_o
, pc
):
49 if f_instr_o
.width
== 32:
52 # 64-bit: bit 2 of pc decides which word to select
53 return f_instr_o
.word_select(pc
[2], 32)
55 # gets state input or reads from state regfile
56 def state_get(m
, state_i
, name
, regfile
, regnum
):
60 res
= Signal(64, reset_less
=True, name
=name
)
61 res_ok_delay
= Signal(name
="%s_ok_delay" % name
)
62 sync
+= res_ok_delay
.eq(~state_i
.ok
)
63 with m
.If(state_i
.ok
):
64 # incoming override (start from pc_i)
65 comb
+= res
.eq(state_i
.data
)
67 # otherwise read StateRegs regfile for PC...
68 comb
+= regfile
.ren
.eq(1<<regnum
)
69 # ... but on a 1-clock delay
70 with m
.If(res_ok_delay
):
71 comb
+= res
.eq(regfile
.data_o
)
74 def get_predint(m
, mask
, name
):
75 """decode SVP64 predicate integer mask field to reg number and invert
76 this is identical to the equivalent function in ISACaller except that
77 it doesn't read the INT directly, it just decodes "what needs to be done"
78 i.e. which INT reg, whether it is shifted and whether it is bit-inverted.
81 regread
= Signal(5, name
=name
+"regread")
82 invert
= Signal(name
=name
+"invert")
83 unary
= Signal(name
=name
+"unary")
85 with m
.Case(SVP64PredInt
.ALWAYS
.value
):
88 with m
.Case(SVP64PredInt
.R3_UNARY
.value
):
91 with m
.Case(SVP64PredInt
.R3
.value
):
93 with m
.Case(SVP64PredInt
.R3_N
.value
):
96 with m
.Case(SVP64PredInt
.R10
.value
):
97 comb
+= regread
.eq(10)
98 with m
.Case(SVP64PredInt
.R10_N
.value
):
99 comb
+= regread
.eq(10)
101 with m
.Case(SVP64PredInt
.R30
.value
):
102 comb
+= regread
.eq(30)
103 with m
.Case(SVP64PredInt
.R30_N
.value
):
104 comb
+= regread
.eq(30)
106 return regread
, invert
, unary
108 def get_predcr(m
, mask
, name
):
109 """decode SVP64 predicate CR to reg number field and invert status
110 this is identical to _get_predcr in ISACaller
113 idx
= Signal(2, name
=name
+"idx")
114 invert
= Signal(name
=name
+"crinvert")
116 with m
.Case(SVP64PredCR
.LT
.value
):
119 with m
.Case(SVP64PredCR
.GE
.value
):
122 with m
.Case(SVP64PredCR
.GT
.value
):
125 with m
.Case(SVP64PredCR
.LE
.value
):
128 with m
.Case(SVP64PredCR
.EQ
.value
):
131 with m
.Case(SVP64PredCR
.NE
.value
):
134 with m
.Case(SVP64PredCR
.SO
.value
):
137 with m
.Case(SVP64PredCR
.NS
.value
):
143 class TestIssuerInternal(Elaboratable
):
144 """TestIssuer - reads instructions from TestMemory and issues them
146 efficiency and speed is not the main goal here: functional correctness
147 and code clarity is. optimisations (which almost 100% interfere with
148 easy understanding) come later.
150 def __init__(self
, pspec
):
152 # test is SVP64 is to be enabled
153 self
.svp64_en
= hasattr(pspec
, "svp64") and (pspec
.svp64
== True)
155 # JTAG interface. add this right at the start because if it's
156 # added it *modifies* the pspec, by adding enable/disable signals
157 # for parts of the rest of the core
158 self
.jtag_en
= hasattr(pspec
, "debug") and pspec
.debug
== 'jtag'
160 subset
= {'uart', 'mtwi', 'eint', 'gpio', 'mspi0', 'mspi1',
162 self
.jtag
= JTAG(get_pinspecs(subset
=subset
))
163 # add signals to pspec to enable/disable icache and dcache
164 # (or data and intstruction wishbone if icache/dcache not included)
165 # https://bugs.libre-soc.org/show_bug.cgi?id=520
166 # TODO: do we actually care if these are not domain-synchronised?
167 # honestly probably not.
168 pspec
.wb_icache_en
= self
.jtag
.wb_icache_en
169 pspec
.wb_dcache_en
= self
.jtag
.wb_dcache_en
170 self
.wb_sram_en
= self
.jtag
.wb_sram_en
172 self
.wb_sram_en
= Const(1)
174 # add 4k sram blocks?
175 self
.sram4x4k
= (hasattr(pspec
, "sram4x4kblock") and
176 pspec
.sram4x4kblock
== True)
180 self
.sram4k
.append(SPBlock512W64B8W(name
="sram4k_%d" % i
,
183 # add interrupt controller?
184 self
.xics
= hasattr(pspec
, "xics") and pspec
.xics
== True
186 self
.xics_icp
= XICS_ICP()
187 self
.xics_ics
= XICS_ICS()
188 self
.int_level_i
= self
.xics_ics
.int_level_i
190 # add GPIO peripheral?
191 self
.gpio
= hasattr(pspec
, "gpio") and pspec
.gpio
== True
193 self
.simple_gpio
= SimpleGPIO()
194 self
.gpio_o
= self
.simple_gpio
.gpio_o
196 # main instruction core. suitable for prototyping / demo only
197 self
.core
= core
= NonProductionCore(pspec
)
199 # instruction decoder. goes into Trap Record
200 pdecode
= create_pdecode()
201 self
.cur_state
= CoreState("cur") # current state (MSR/PC/SVSTATE)
202 self
.pdecode2
= PowerDecode2(pdecode
, state
=self
.cur_state
,
203 opkls
=IssuerDecode2ToOperand
,
204 svp64_en
=self
.svp64_en
)
206 self
.svp64
= SVP64PrefixDecoder() # for decoding SVP64 prefix
208 # Test Instruction memory
209 self
.imem
= ConfigFetchUnit(pspec
).fu
212 self
.dbg
= CoreDebug()
214 # instruction go/monitor
215 self
.pc_o
= Signal(64, reset_less
=True)
216 self
.pc_i
= Data(64, "pc_i") # set "ok" to indicate "please change me"
217 self
.svstate_i
= Data(32, "svstate_i") # ditto
218 self
.core_bigendian_i
= Signal() # TODO: set based on MSR.LE
219 self
.busy_o
= Signal(reset_less
=True)
220 self
.memerr_o
= Signal(reset_less
=True)
222 # STATE regfile read /write ports for PC, MSR, SVSTATE
223 staterf
= self
.core
.regs
.rf
['state']
224 self
.state_r_pc
= staterf
.r_ports
['cia'] # PC rd
225 self
.state_w_pc
= staterf
.w_ports
['d_wr1'] # PC wr
226 self
.state_r_msr
= staterf
.r_ports
['msr'] # MSR rd
227 self
.state_r_sv
= staterf
.r_ports
['sv'] # SVSTATE rd
228 self
.state_w_sv
= staterf
.w_ports
['sv'] # SVSTATE wr
230 # DMI interface access
231 intrf
= self
.core
.regs
.rf
['int']
232 crrf
= self
.core
.regs
.rf
['cr']
233 xerrf
= self
.core
.regs
.rf
['xer']
234 self
.int_r
= intrf
.r_ports
['dmi'] # INT read
235 self
.cr_r
= crrf
.r_ports
['full_cr_dbg'] # CR read
236 self
.xer_r
= xerrf
.r_ports
['full_xer'] # XER read
239 self
.int_pred
= intrf
.r_ports
['pred'] # INT predicate read
240 self
.cr_pred
= crrf
.r_ports
['cr_pred'] # CR predicate read
242 # hack method of keeping an eye on whether branch/trap set the PC
243 self
.state_nia
= self
.core
.regs
.rf
['state'].w_ports
['nia']
244 self
.state_nia
.wen
.name
= 'state_nia_wen'
246 # pulse to synchronize the simulator at instruction end
247 self
.insn_done
= Signal()
250 # store copies of predicate masks
251 self
.srcmask
= Signal(64)
252 self
.dstmask
= Signal(64)
254 def fetch_fsm(self
, m
, core
, pc
, svstate
, nia
, is_svp64_mode
,
255 fetch_pc_ready_o
, fetch_pc_valid_i
,
256 fetch_insn_valid_o
, fetch_insn_ready_i
):
259 this FSM performs fetch of raw instruction data, partial-decodes
260 it 32-bit at a time to detect SVP64 prefixes, and will optionally
261 read a 2nd 32-bit quantity if that occurs.
265 pdecode2
= self
.pdecode2
266 cur_state
= self
.cur_state
267 dec_opcode_i
= pdecode2
.dec
.raw_opcode_in
# raw opcode
269 msr_read
= Signal(reset
=1)
271 with m
.FSM(name
='fetch_fsm'):
274 with m
.State("IDLE"):
275 comb
+= fetch_pc_ready_o
.eq(1)
276 with m
.If(fetch_pc_valid_i
):
277 # instruction allowed to go: start by reading the PC
278 # capture the PC and also drop it into Insn Memory
279 # we have joined a pair of combinatorial memory
280 # lookups together. this is Generally Bad.
281 comb
+= self
.imem
.a_pc_i
.eq(pc
)
282 comb
+= self
.imem
.a_valid_i
.eq(1)
283 comb
+= self
.imem
.f_valid_i
.eq(1)
284 sync
+= cur_state
.pc
.eq(pc
)
285 sync
+= cur_state
.svstate
.eq(svstate
) # and svstate
287 # initiate read of MSR. arrives one clock later
288 comb
+= self
.state_r_msr
.ren
.eq(1 << StateRegs
.MSR
)
289 sync
+= msr_read
.eq(0)
291 m
.next
= "INSN_READ" # move to "wait for bus" phase
293 # dummy pause to find out why simulation is not keeping up
294 with m
.State("INSN_READ"):
295 # one cycle later, msr/sv read arrives. valid only once.
296 with m
.If(~msr_read
):
297 sync
+= msr_read
.eq(1) # yeah don't read it again
298 sync
+= cur_state
.msr
.eq(self
.state_r_msr
.data_o
)
299 with m
.If(self
.imem
.f_busy_o
): # zzz...
300 # busy: stay in wait-read
301 comb
+= self
.imem
.a_valid_i
.eq(1)
302 comb
+= self
.imem
.f_valid_i
.eq(1)
304 # not busy: instruction fetched
305 insn
= get_insn(self
.imem
.f_instr_o
, cur_state
.pc
)
308 # decode the SVP64 prefix, if any
309 comb
+= svp64
.raw_opcode_in
.eq(insn
)
310 comb
+= svp64
.bigendian
.eq(self
.core_bigendian_i
)
311 # pass the decoded prefix (if any) to PowerDecoder2
312 sync
+= pdecode2
.sv_rm
.eq(svp64
.svp64_rm
)
313 # remember whether this is a prefixed instruction, so
314 # the FSM can readily loop when VL==0
315 sync
+= is_svp64_mode
.eq(svp64
.is_svp64_mode
)
316 # calculate the address of the following instruction
317 insn_size
= Mux(svp64
.is_svp64_mode
, 8, 4)
318 sync
+= nia
.eq(cur_state
.pc
+ insn_size
)
319 with m
.If(~svp64
.is_svp64_mode
):
320 # with no prefix, store the instruction
321 # and hand it directly to the next FSM
322 sync
+= dec_opcode_i
.eq(insn
)
323 m
.next
= "INSN_READY"
325 # fetch the rest of the instruction from memory
326 comb
+= self
.imem
.a_pc_i
.eq(cur_state
.pc
+ 4)
327 comb
+= self
.imem
.a_valid_i
.eq(1)
328 comb
+= self
.imem
.f_valid_i
.eq(1)
329 m
.next
= "INSN_READ2"
331 # not SVP64 - 32-bit only
332 sync
+= nia
.eq(cur_state
.pc
+ 4)
333 sync
+= dec_opcode_i
.eq(insn
)
334 m
.next
= "INSN_READY"
336 with m
.State("INSN_READ2"):
337 with m
.If(self
.imem
.f_busy_o
): # zzz...
338 # busy: stay in wait-read
339 comb
+= self
.imem
.a_valid_i
.eq(1)
340 comb
+= self
.imem
.f_valid_i
.eq(1)
342 # not busy: instruction fetched
343 insn
= get_insn(self
.imem
.f_instr_o
, cur_state
.pc
+4)
344 sync
+= dec_opcode_i
.eq(insn
)
345 m
.next
= "INSN_READY"
346 # TODO: probably can start looking at pdecode2.rm_dec
347 # here or maybe even in INSN_READ state, if svp64_mode
348 # detected, in order to trigger - and wait for - the
350 pmode
= pdecode2
.rm_dec
.predmode
352 if pmode != SVP64PredMode.ALWAYS.value:
353 fire predicate loading FSM and wait before
356 sync += self.srcmask.eq(-1) # set to all 1s
357 sync += self.dstmask.eq(-1) # set to all 1s
358 m.next = "INSN_READY"
361 with m
.State("INSN_READY"):
362 # hand over the instruction, to be decoded
363 comb
+= fetch_insn_valid_o
.eq(1)
364 with m
.If(fetch_insn_ready_i
):
367 def fetch_predicate_fsm(self
, m
,
368 pred_insn_valid_i
, pred_insn_ready_o
,
369 pred_mask_valid_o
, pred_mask_ready_i
):
370 """fetch_predicate_fsm - obtains (constructs in the case of CR)
371 src/dest predicate masks
373 https://bugs.libre-soc.org/show_bug.cgi?id=617
374 the predicates can be read here, by using IntRegs r_ports['pred']
375 or CRRegs r_ports['pred']. in the case of CRs it will have to
376 be done through multiple reads, extracting one relevant at a time.
377 later, a faster way would be to use the 32-bit-wide CR port but
378 this is more complex decoding, here. equivalent code used in
379 ISACaller is "from soc.decoder.isa.caller import get_predcr"
383 pdecode2
= self
.pdecode2
384 rm_dec
= pdecode2
.rm_dec
# SVP64RMModeDecode
385 predmode
= rm_dec
.predmode
386 srcpred
, dstpred
= rm_dec
.srcpred
, rm_dec
.dstpred
387 cr_pred
, int_pred
= self
.cr_pred
, self
.int_pred
# read regfiles
388 # if predmode == INT:
389 # INT-src sregread, sinvert, sunary = get_predint(m, srcpred)
390 # INT-dst dregread, dinvert, dunary = get_predint(m, dstpred)
391 # TODO read INT-src and INT-dst into self.srcmask+dstmask
392 # has to cope with first one then the other
393 # FSM-triggered-int-read
394 # comb += int_pred.addr.eq(d_reg.addr)
395 # comb += int_pred.ren.eq(1)
397 # comb += d_reg.data.eq(self.int_r.data_o)
398 # elif predmode == CR:
399 # CR-src sidx, sinvert = get_predcr(m, srcpred)
400 # CR-dst didx, dinvert = get_predcr(m, dstpred)
401 # TODO read CR-src and CR-dst into self.srcmask+dstmask with loop
402 # has to cope with first one then the other
403 # for cr_idx = FSM-state-loop(0..VL-1):
404 # FSM-state-trigger-CR-read:
405 # cr_ren = (1<<7-(cr_idx+SVP64CROffs.CRPred))
406 # comb += cr_pred.ren.eq(cr_ren)
407 # FSM-state-1-clock-later-actual-Read:
408 # cr_field = Signal(4)
410 # # read the CR field, select the appropriate bit
411 # comb += cr_field.eq(cr_pred.data_o)
412 # comb += cr_bit.eq(cr_field.bit_select(idx)))
413 # # just like in branch BO tests
414 # comd += self.srcmask[cr_idx].eq(inv ^ cr_bit)
416 # sync += self.srcmask.eq(-1) # set to all 1s
417 # sync += self.dstmask.eq(-1) # set to all 1s
418 with m
.FSM(name
="fetch_predicate"):
420 with m
.State("FETCH_PRED_IDLE"):
421 comb
+= pred_insn_ready_o
.eq(1)
422 with m
.If(pred_insn_valid_i
):
423 sync
+= self
.srcmask
.eq(-1)
424 sync
+= self
.dstmask
.eq(-1)
425 m
.next
= "FETCH_PRED_DONE"
427 with m
.State("FETCH_PRED_DONE"):
428 comb
+= pred_mask_valid_o
.eq(1)
429 with m
.If(pred_mask_ready_i
):
430 m
.next
= "FETCH_PRED_IDLE"
432 def issue_fsm(self
, m
, core
, pc_changed
, sv_changed
, nia
,
433 dbg
, core_rst
, is_svp64_mode
,
434 fetch_pc_ready_o
, fetch_pc_valid_i
,
435 fetch_insn_valid_o
, fetch_insn_ready_i
,
436 pred_insn_valid_i
, pred_insn_ready_o
,
437 pred_mask_valid_o
, pred_mask_ready_i
,
438 exec_insn_valid_i
, exec_insn_ready_o
,
439 exec_pc_valid_o
, exec_pc_ready_i
):
442 decode / issue FSM. this interacts with the "fetch" FSM
443 through fetch_insn_ready/valid (incoming) and fetch_pc_ready/valid
444 (outgoing). also interacts with the "execute" FSM
445 through exec_insn_ready/valid (outgoing) and exec_pc_ready/valid
447 SVP64 RM prefixes have already been set up by the
448 "fetch" phase, so execute is fairly straightforward.
453 pdecode2
= self
.pdecode2
454 cur_state
= self
.cur_state
457 dec_opcode_i
= pdecode2
.dec
.raw_opcode_in
# raw opcode
459 # for updating svstate (things like srcstep etc.)
460 update_svstate
= Signal() # set this (below) if updating
461 new_svstate
= SVSTATERec("new_svstate")
462 comb
+= new_svstate
.eq(cur_state
.svstate
)
464 # precalculate srcstep+1 and dststep+1
465 cur_srcstep
= cur_state
.svstate
.srcstep
466 cur_dststep
= cur_state
.svstate
.dststep
467 next_srcstep
= Signal
.like(cur_srcstep
)
468 next_dststep
= Signal
.like(cur_dststep
)
469 comb
+= next_srcstep
.eq(cur_state
.svstate
.srcstep
+1)
470 comb
+= next_dststep
.eq(cur_state
.svstate
.dststep
+1)
472 with m
.FSM(name
="issue_fsm"):
474 # sync with the "fetch" phase which is reading the instruction
475 # at this point, there is no instruction running, that
476 # could inadvertently update the PC.
477 with m
.State("ISSUE_START"):
478 # wait on "core stop" release, before next fetch
479 # need to do this here, in case we are in a VL==0 loop
480 with m
.If(~dbg
.core_stop_o
& ~core_rst
):
481 comb
+= fetch_pc_valid_i
.eq(1) # tell fetch to start
482 with m
.If(fetch_pc_ready_o
): # fetch acknowledged us
485 # tell core it's stopped, and acknowledge debug handshake
486 comb
+= core
.core_stopped_i
.eq(1)
487 comb
+= dbg
.core_stopped_i
.eq(1)
488 # while stopped, allow updating the PC and SVSTATE
489 with m
.If(self
.pc_i
.ok
):
490 comb
+= self
.state_w_pc
.wen
.eq(1 << StateRegs
.PC
)
491 comb
+= self
.state_w_pc
.data_i
.eq(self
.pc_i
.data
)
492 sync
+= pc_changed
.eq(1)
493 with m
.If(self
.svstate_i
.ok
):
494 comb
+= new_svstate
.eq(self
.svstate_i
.data
)
495 comb
+= update_svstate
.eq(1)
496 sync
+= sv_changed
.eq(1)
498 # decode the instruction when it arrives
499 with m
.State("INSN_WAIT"):
500 comb
+= fetch_insn_ready_i
.eq(1)
501 with m
.If(fetch_insn_valid_o
):
502 # decode the instruction
503 sync
+= core
.e
.eq(pdecode2
.e
)
504 sync
+= core
.state
.eq(cur_state
)
505 sync
+= core
.raw_insn_i
.eq(dec_opcode_i
)
506 sync
+= core
.bigendian_i
.eq(self
.core_bigendian_i
)
507 # set RA_OR_ZERO detection in satellite decoders
508 sync
+= core
.sv_a_nz
.eq(pdecode2
.sv_a_nz
)
509 # loop into ISSUE_START if it's a SVP64 instruction
510 # and VL == 0. this because VL==0 is a for-loop
511 # from 0 to 0 i.e. always, always a NOP.
512 cur_vl
= cur_state
.svstate
.vl
513 with m
.If(is_svp64_mode
& (cur_vl
== 0)):
514 # update the PC before fetching the next instruction
515 # since we are in a VL==0 loop, no instruction was
516 # executed that we could be overwriting
517 comb
+= self
.state_w_pc
.wen
.eq(1 << StateRegs
.PC
)
518 comb
+= self
.state_w_pc
.data_i
.eq(nia
)
519 comb
+= self
.insn_done
.eq(1)
520 m
.next
= "ISSUE_START"
522 m
.next
= "PRED_START" # start fetching the predicate
524 with m
.State("PRED_START"):
525 comb
+= pred_insn_valid_i
.eq(1) # tell fetch_pred to start
526 with m
.If(pred_insn_ready_o
): # fetch_pred acknowledged us
529 with m
.State("MASK_WAIT"):
530 comb
+= pred_mask_ready_i
.eq(1) # ready to receive the masks
531 with m
.If(pred_mask_valid_o
): # predication masks are ready
532 m
.next
= "INSN_EXECUTE"
534 # handshake with execution FSM, move to "wait" once acknowledged
535 with m
.State("INSN_EXECUTE"):
536 # with m.If(is_svp64_mode):
537 # TODO advance src/dst step to "skip" over predicated-out
538 # from self.srcmask and self.dstmask
539 # https://bugs.libre-soc.org/show_bug.cgi?id=617#c3
540 # but still without exceeding VL in either case
541 # IMPORTANT: when changing src/dest step, have to
542 # jump to m.next = "DECODE_SV" to deal with the change in
545 with m
.If(is_svp64_mode
):
547 pred_src_zero
= pdecode2
.rm_dec
.pred_sz
548 pred_dst_zero
= pdecode2
.rm_dec
.pred_dz
551 if not pred_src_zero:
552 if (((1<<cur_srcstep) & self.srcmask) == 0) and
554 comb += update_svstate.eq(1)
555 comb += new_svstate.srcstep.eq(next_srcstep)
556 sync += sv_changed.eq(1)
558 if not pred_dst_zero:
559 if (((1<<cur_dststep) & self.dstmask) == 0) and
561 comb += new_svstate.dststep.eq(next_dststep)
562 comb += update_svstate.eq(1)
563 sync += sv_changed.eq(1)
569 comb
+= exec_insn_valid_i
.eq(1) # trigger execute
570 with m
.If(exec_insn_ready_o
): # execute acknowledged us
571 m
.next
= "EXECUTE_WAIT"
573 with m
.State("EXECUTE_WAIT"):
574 # wait on "core stop" release, at instruction end
575 # need to do this here, in case we are in a VL>1 loop
576 with m
.If(~dbg
.core_stop_o
& ~core_rst
):
577 comb
+= exec_pc_ready_i
.eq(1)
578 with m
.If(exec_pc_valid_o
):
580 # was this the last loop iteration?
582 cur_vl
= cur_state
.svstate
.vl
583 comb
+= is_last
.eq(next_srcstep
== cur_vl
)
585 # if either PC or SVSTATE were changed by the previous
586 # instruction, go directly back to Fetch, without
587 # updating either PC or SVSTATE
588 with m
.If(pc_changed | sv_changed
):
589 m
.next
= "ISSUE_START"
591 # also return to Fetch, when no output was a vector
592 # (regardless of SRCSTEP and VL), or when the last
593 # instruction was really the last one of the VL loop
594 with m
.Elif((~pdecode2
.loop_continue
) | is_last
):
595 # before going back to fetch, update the PC state
596 # register with the NIA.
597 # ok here we are not reading the branch unit.
598 # TODO: this just blithely overwrites whatever
599 # pipeline updated the PC
600 comb
+= self
.state_w_pc
.wen
.eq(1 << StateRegs
.PC
)
601 comb
+= self
.state_w_pc
.data_i
.eq(nia
)
602 # reset SRCSTEP before returning to Fetch
603 with m
.If(pdecode2
.loop_continue
):
604 comb
+= new_svstate
.srcstep
.eq(0)
605 comb
+= new_svstate
.dststep
.eq(0)
606 comb
+= update_svstate
.eq(1)
607 m
.next
= "ISSUE_START"
609 # returning to Execute? then, first update SRCSTEP
611 comb
+= new_svstate
.srcstep
.eq(next_srcstep
)
612 comb
+= new_svstate
.dststep
.eq(next_dststep
)
613 comb
+= update_svstate
.eq(1)
617 comb
+= core
.core_stopped_i
.eq(1)
618 comb
+= dbg
.core_stopped_i
.eq(1)
619 # while stopped, allow updating the PC and SVSTATE
620 with m
.If(self
.pc_i
.ok
):
621 comb
+= self
.state_w_pc
.wen
.eq(1 << StateRegs
.PC
)
622 comb
+= self
.state_w_pc
.data_i
.eq(self
.pc_i
.data
)
623 sync
+= pc_changed
.eq(1)
624 with m
.If(self
.svstate_i
.ok
):
625 comb
+= new_svstate
.eq(self
.svstate_i
.data
)
626 comb
+= update_svstate
.eq(1)
627 sync
+= sv_changed
.eq(1)
629 # need to decode the instruction again, after updating SRCSTEP
630 # in the previous state.
631 # mostly a copy of INSN_WAIT, but without the actual wait
632 with m
.State("DECODE_SV"):
633 # decode the instruction
634 sync
+= core
.e
.eq(pdecode2
.e
)
635 sync
+= core
.state
.eq(cur_state
)
636 sync
+= core
.bigendian_i
.eq(self
.core_bigendian_i
)
637 sync
+= core
.sv_a_nz
.eq(pdecode2
.sv_a_nz
)
638 m
.next
= "INSN_EXECUTE" # move to "execute"
640 # check if svstate needs updating: if so, write it to State Regfile
641 with m
.If(update_svstate
):
642 comb
+= self
.state_w_sv
.wen
.eq(1<<StateRegs
.SVSTATE
)
643 comb
+= self
.state_w_sv
.data_i
.eq(new_svstate
)
644 sync
+= cur_state
.svstate
.eq(new_svstate
) # for next clock
646 def execute_fsm(self
, m
, core
, pc_changed
, sv_changed
,
647 exec_insn_valid_i
, exec_insn_ready_o
,
648 exec_pc_valid_o
, exec_pc_ready_i
):
651 execute FSM. this interacts with the "issue" FSM
652 through exec_insn_ready/valid (incoming) and exec_pc_ready/valid
653 (outgoing). SVP64 RM prefixes have already been set up by the
654 "issue" phase, so execute is fairly straightforward.
659 pdecode2
= self
.pdecode2
662 core_busy_o
= core
.busy_o
# core is busy
663 core_ivalid_i
= core
.ivalid_i
# instruction is valid
664 core_issue_i
= core
.issue_i
# instruction is issued
665 insn_type
= core
.e
.do
.insn_type
# instruction MicroOp type
667 with m
.FSM(name
="exec_fsm"):
669 # waiting for instruction bus (stays there until not busy)
670 with m
.State("INSN_START"):
671 comb
+= exec_insn_ready_o
.eq(1)
672 with m
.If(exec_insn_valid_i
):
673 comb
+= core_ivalid_i
.eq(1) # instruction is valid
674 comb
+= core_issue_i
.eq(1) # and issued
675 sync
+= sv_changed
.eq(0)
676 sync
+= pc_changed
.eq(0)
677 m
.next
= "INSN_ACTIVE" # move to "wait completion"
679 # instruction started: must wait till it finishes
680 with m
.State("INSN_ACTIVE"):
681 with m
.If(insn_type
!= MicrOp
.OP_NOP
):
682 comb
+= core_ivalid_i
.eq(1) # instruction is valid
683 # note changes to PC and SVSTATE
684 with m
.If(self
.state_nia
.wen
& (1<<StateRegs
.SVSTATE
)):
685 sync
+= sv_changed
.eq(1)
686 with m
.If(self
.state_nia
.wen
& (1<<StateRegs
.PC
)):
687 sync
+= pc_changed
.eq(1)
688 with m
.If(~core_busy_o
): # instruction done!
689 comb
+= exec_pc_valid_o
.eq(1)
690 with m
.If(exec_pc_ready_i
):
691 comb
+= self
.insn_done
.eq(1)
692 m
.next
= "INSN_START" # back to fetch
694 def setup_peripherals(self
, m
):
695 comb
, sync
= m
.d
.comb
, m
.d
.sync
697 m
.submodules
.core
= core
= DomainRenamer("coresync")(self
.core
)
698 m
.submodules
.imem
= imem
= self
.imem
699 m
.submodules
.dbg
= dbg
= self
.dbg
701 m
.submodules
.jtag
= jtag
= self
.jtag
702 # TODO: UART2GDB mux, here, from external pin
703 # see https://bugs.libre-soc.org/show_bug.cgi?id=499
704 sync
+= dbg
.dmi
.connect_to(jtag
.dmi
)
706 cur_state
= self
.cur_state
708 # 4x 4k SRAM blocks. these simply "exist", they get routed in litex
710 for i
, sram
in enumerate(self
.sram4k
):
711 m
.submodules
["sram4k_%d" % i
] = sram
712 comb
+= sram
.enable
.eq(self
.wb_sram_en
)
714 # XICS interrupt handler
716 m
.submodules
.xics_icp
= icp
= self
.xics_icp
717 m
.submodules
.xics_ics
= ics
= self
.xics_ics
718 comb
+= icp
.ics_i
.eq(ics
.icp_o
) # connect ICS to ICP
719 sync
+= cur_state
.eint
.eq(icp
.core_irq_o
) # connect ICP to core
721 # GPIO test peripheral
723 m
.submodules
.simple_gpio
= simple_gpio
= self
.simple_gpio
725 # connect one GPIO output to ICS bit 15 (like in microwatt soc.vhdl)
726 # XXX causes litex ECP5 test to get wrong idea about input and output
727 # (but works with verilator sim *sigh*)
728 #if self.gpio and self.xics:
729 # comb += self.int_level_i[15].eq(simple_gpio.gpio_o[0])
731 # instruction decoder
732 pdecode
= create_pdecode()
733 m
.submodules
.dec2
= pdecode2
= self
.pdecode2
735 m
.submodules
.svp64
= svp64
= self
.svp64
738 dmi
, d_reg
, d_cr
, d_xer
, = dbg
.dmi
, dbg
.d_gpr
, dbg
.d_cr
, dbg
.d_xer
739 intrf
= self
.core
.regs
.rf
['int']
741 # clock delay power-on reset
742 cd_por
= ClockDomain(reset_less
=True)
743 cd_sync
= ClockDomain()
744 core_sync
= ClockDomain("coresync")
745 m
.domains
+= cd_por
, cd_sync
, core_sync
747 ti_rst
= Signal(reset_less
=True)
748 delay
= Signal(range(4), reset
=3)
749 with m
.If(delay
!= 0):
750 m
.d
.por
+= delay
.eq(delay
- 1)
751 comb
+= cd_por
.clk
.eq(ClockSignal())
753 # power-on reset delay
754 core_rst
= ResetSignal("coresync")
755 comb
+= ti_rst
.eq(delay
!= 0 | dbg
.core_rst_o |
ResetSignal())
756 comb
+= core_rst
.eq(ti_rst
)
758 # busy/halted signals from core
759 comb
+= self
.busy_o
.eq(core
.busy_o
)
760 comb
+= pdecode2
.dec
.bigendian
.eq(self
.core_bigendian_i
)
762 # temporary hack: says "go" immediately for both address gen and ST
764 ldst
= core
.fus
.fus
['ldst0']
765 st_go_edge
= rising_edge(m
, ldst
.st
.rel_o
)
766 m
.d
.comb
+= ldst
.ad
.go_i
.eq(ldst
.ad
.rel_o
) # link addr-go direct to rel
767 m
.d
.comb
+= ldst
.st
.go_i
.eq(st_go_edge
) # link store-go to rising rel
771 def elaborate(self
, platform
):
774 comb
, sync
= m
.d
.comb
, m
.d
.sync
775 cur_state
= self
.cur_state
776 pdecode2
= self
.pdecode2
780 # set up peripherals and core
781 core_rst
= self
.setup_peripherals(m
)
783 # PC and instruction from I-Memory
784 comb
+= self
.pc_o
.eq(cur_state
.pc
)
785 pc_changed
= Signal() # note write to PC
786 sv_changed
= Signal() # note write to SVSTATE
788 # read state either from incoming override or from regfile
789 # TODO: really should be doing MSR in the same way
790 pc
= state_get(m
, self
.pc_i
, "pc", # read PC
791 self
.state_r_pc
, StateRegs
.PC
)
792 svstate
= state_get(m
, self
.svstate_i
, "svstate", # read SVSTATE
793 self
.state_r_sv
, StateRegs
.SVSTATE
)
795 # don't write pc every cycle
796 comb
+= self
.state_w_pc
.wen
.eq(0)
797 comb
+= self
.state_w_pc
.data_i
.eq(0)
799 # don't read msr every cycle
800 comb
+= self
.state_r_msr
.ren
.eq(0)
802 # address of the next instruction, in the absence of a branch
803 # depends on the instruction size
804 nia
= Signal(64, reset_less
=True)
806 # connect up debug signals
807 # TODO comb += core.icache_rst_i.eq(dbg.icache_rst_o)
808 comb
+= dbg
.terminate_i
.eq(core
.core_terminate_o
)
809 comb
+= dbg
.state
.pc
.eq(pc
)
810 comb
+= dbg
.state
.svstate
.eq(svstate
)
811 comb
+= dbg
.state
.msr
.eq(cur_state
.msr
)
813 # pass the prefix mode from Fetch to Issue, so the latter can loop
815 is_svp64_mode
= Signal()
817 # there are *THREE* FSMs, fetch (32/64-bit) issue, decode/execute.
818 # these are the handshake signals between fetch and decode/execute
820 # fetch FSM can run as soon as the PC is valid
821 fetch_pc_valid_i
= Signal() # Execute tells Fetch "start next read"
822 fetch_pc_ready_o
= Signal() # Fetch Tells SVSTATE "proceed"
824 # fetch FSM hands over the instruction to be decoded / issued
825 fetch_insn_valid_o
= Signal()
826 fetch_insn_ready_i
= Signal()
828 # predicate fetch FSM decodes and fetches the predicate
829 pred_insn_valid_i
= Signal()
830 pred_insn_ready_o
= Signal()
832 # predicate fetch FSM delivers the masks
833 pred_mask_valid_o
= Signal()
834 pred_mask_ready_i
= Signal()
836 # issue FSM delivers the instruction to the be executed
837 exec_insn_valid_i
= Signal()
838 exec_insn_ready_o
= Signal()
840 # execute FSM, hands over the PC/SVSTATE back to the issue FSM
841 exec_pc_valid_o
= Signal()
842 exec_pc_ready_i
= Signal()
844 # the FSMs here are perhaps unusual in that they detect conditions
845 # then "hold" information, combinatorially, for the core
846 # (as opposed to using sync - which would be on a clock's delay)
847 # this includes the actual opcode, valid flags and so on.
849 # Fetch, then predicate fetch, then Issue, then Execute.
850 # Issue is where the VL for-loop # lives. the ready/valid
851 # signalling is used to communicate between the four.
853 self
.fetch_fsm(m
, core
, pc
, svstate
, nia
, is_svp64_mode
,
854 fetch_pc_ready_o
, fetch_pc_valid_i
,
855 fetch_insn_valid_o
, fetch_insn_ready_i
)
857 self
.issue_fsm(m
, core
, pc_changed
, sv_changed
, nia
,
858 dbg
, core_rst
, is_svp64_mode
,
859 fetch_pc_ready_o
, fetch_pc_valid_i
,
860 fetch_insn_valid_o
, fetch_insn_ready_i
,
861 pred_insn_valid_i
, pred_insn_ready_o
,
862 pred_mask_valid_o
, pred_mask_ready_i
,
863 exec_insn_valid_i
, exec_insn_ready_o
,
864 exec_pc_valid_o
, exec_pc_ready_i
)
866 self
.fetch_predicate_fsm(m
,
867 pred_insn_valid_i
, pred_insn_ready_o
,
868 pred_mask_valid_o
, pred_mask_ready_i
)
870 self
.execute_fsm(m
, core
, pc_changed
, sv_changed
,
871 exec_insn_valid_i
, exec_insn_ready_o
,
872 exec_pc_valid_o
, exec_pc_ready_i
)
874 # this bit doesn't have to be in the FSM: connect up to read
875 # regfiles on demand from DMI
878 # DEC and TB inc/dec FSM. copy of DEC is put into CoreState,
879 # (which uses that in PowerDecoder2 to raise 0x900 exception)
880 self
.tb_dec_fsm(m
, cur_state
.dec
)
884 def do_dmi(self
, m
, dbg
):
885 """deals with DMI debug requests
887 currently only provides read requests for the INT regfile, CR and XER
888 it will later also deal with *writing* to these regfiles.
892 dmi
, d_reg
, d_cr
, d_xer
, = dbg
.dmi
, dbg
.d_gpr
, dbg
.d_cr
, dbg
.d_xer
893 intrf
= self
.core
.regs
.rf
['int']
895 with m
.If(d_reg
.req
): # request for regfile access being made
896 # TODO: error-check this
897 # XXX should this be combinatorial? sync better?
899 comb
+= self
.int_r
.ren
.eq(1<<d_reg
.addr
)
901 comb
+= self
.int_r
.addr
.eq(d_reg
.addr
)
902 comb
+= self
.int_r
.ren
.eq(1)
903 d_reg_delay
= Signal()
904 sync
+= d_reg_delay
.eq(d_reg
.req
)
905 with m
.If(d_reg_delay
):
906 # data arrives one clock later
907 comb
+= d_reg
.data
.eq(self
.int_r
.data_o
)
908 comb
+= d_reg
.ack
.eq(1)
910 # sigh same thing for CR debug
911 with m
.If(d_cr
.req
): # request for regfile access being made
912 comb
+= self
.cr_r
.ren
.eq(0b11111111) # enable all
913 d_cr_delay
= Signal()
914 sync
+= d_cr_delay
.eq(d_cr
.req
)
915 with m
.If(d_cr_delay
):
916 # data arrives one clock later
917 comb
+= d_cr
.data
.eq(self
.cr_r
.data_o
)
918 comb
+= d_cr
.ack
.eq(1)
921 with m
.If(d_xer
.req
): # request for regfile access being made
922 comb
+= self
.xer_r
.ren
.eq(0b111111) # enable all
923 d_xer_delay
= Signal()
924 sync
+= d_xer_delay
.eq(d_xer
.req
)
925 with m
.If(d_xer_delay
):
926 # data arrives one clock later
927 comb
+= d_xer
.data
.eq(self
.xer_r
.data_o
)
928 comb
+= d_xer
.ack
.eq(1)
930 def tb_dec_fsm(self
, m
, spr_dec
):
933 this is a FSM for updating either dec or tb. it runs alternately
934 DEC, TB, DEC, TB. note that SPR pipeline could have written a new
935 value to DEC, however the regfile has "passthrough" on it so this
938 see v3.0B p1097-1099 for Timeer Resource and p1065 and p1076
941 comb
, sync
= m
.d
.comb
, m
.d
.sync
942 fast_rf
= self
.core
.regs
.rf
['fast']
943 fast_r_dectb
= fast_rf
.r_ports
['issue'] # DEC/TB
944 fast_w_dectb
= fast_rf
.w_ports
['issue'] # DEC/TB
948 # initiates read of current DEC
949 with m
.State("DEC_READ"):
950 comb
+= fast_r_dectb
.addr
.eq(FastRegs
.DEC
)
951 comb
+= fast_r_dectb
.ren
.eq(1)
954 # waits for DEC read to arrive (1 cycle), updates with new value
955 with m
.State("DEC_WRITE"):
957 # TODO: MSR.LPCR 32-bit decrement mode
958 comb
+= new_dec
.eq(fast_r_dectb
.data_o
- 1)
959 comb
+= fast_w_dectb
.addr
.eq(FastRegs
.DEC
)
960 comb
+= fast_w_dectb
.wen
.eq(1)
961 comb
+= fast_w_dectb
.data_i
.eq(new_dec
)
962 sync
+= spr_dec
.eq(new_dec
) # copy into cur_state for decoder
965 # initiates read of current TB
966 with m
.State("TB_READ"):
967 comb
+= fast_r_dectb
.addr
.eq(FastRegs
.TB
)
968 comb
+= fast_r_dectb
.ren
.eq(1)
971 # waits for read TB to arrive, initiates write of current TB
972 with m
.State("TB_WRITE"):
974 comb
+= new_tb
.eq(fast_r_dectb
.data_o
+ 1)
975 comb
+= fast_w_dectb
.addr
.eq(FastRegs
.TB
)
976 comb
+= fast_w_dectb
.wen
.eq(1)
977 comb
+= fast_w_dectb
.data_i
.eq(new_tb
)
983 yield from self
.pc_i
.ports()
986 yield from self
.core
.ports()
987 yield from self
.imem
.ports()
988 yield self
.core_bigendian_i
994 def external_ports(self
):
995 ports
= self
.pc_i
.ports()
996 ports
+= [self
.pc_o
, self
.memerr_o
, self
.core_bigendian_i
, self
.busy_o
,
1000 ports
+= list(self
.jtag
.external_ports())
1002 # don't add DMI if JTAG is enabled
1003 ports
+= list(self
.dbg
.dmi
.ports())
1005 ports
+= list(self
.imem
.ibus
.fields
.values())
1006 ports
+= list(self
.core
.l0
.cmpi
.lsmem
.lsi
.slavebus
.fields
.values())
1009 for sram
in self
.sram4k
:
1010 ports
+= list(sram
.bus
.fields
.values())
1013 ports
+= list(self
.xics_icp
.bus
.fields
.values())
1014 ports
+= list(self
.xics_ics
.bus
.fields
.values())
1015 ports
.append(self
.int_level_i
)
1018 ports
+= list(self
.simple_gpio
.bus
.fields
.values())
1019 ports
.append(self
.gpio_o
)
1027 class TestIssuer(Elaboratable
):
1028 def __init__(self
, pspec
):
1029 self
.ti
= TestIssuerInternal(pspec
)
1031 self
.pll
= DummyPLL()
1033 # PLL direct clock or not
1034 self
.pll_en
= hasattr(pspec
, "use_pll") and pspec
.use_pll
1036 self
.pll_18_o
= Signal(reset_less
=True)
1038 def elaborate(self
, platform
):
1042 # TestIssuer runs at direct clock
1043 m
.submodules
.ti
= ti
= self
.ti
1044 cd_int
= ClockDomain("coresync")
1047 # ClockSelect runs at PLL output internal clock rate
1048 m
.submodules
.pll
= pll
= self
.pll
1050 # add clock domains from PLL
1051 cd_pll
= ClockDomain("pllclk")
1054 # PLL clock established. has the side-effect of running clklsel
1055 # at the PLL's speed (see DomainRenamer("pllclk") above)
1056 pllclk
= ClockSignal("pllclk")
1057 comb
+= pllclk
.eq(pll
.clk_pll_o
)
1059 # wire up external 24mhz to PLL
1060 comb
+= pll
.clk_24_i
.eq(ClockSignal())
1062 # output 18 mhz PLL test signal
1063 comb
+= self
.pll_18_o
.eq(pll
.pll_18_o
)
1065 # now wire up ResetSignals. don't mind them being in this domain
1066 pll_rst
= ResetSignal("pllclk")
1067 comb
+= pll_rst
.eq(ResetSignal())
1069 # internal clock is set to selector clock-out. has the side-effect of
1070 # running TestIssuer at this speed (see DomainRenamer("intclk") above)
1071 intclk
= ClockSignal("coresync")
1073 comb
+= intclk
.eq(pll
.clk_pll_o
)
1075 comb
+= intclk
.eq(ClockSignal())
1080 return list(self
.ti
.ports()) + list(self
.pll
.ports()) + \
1081 [ClockSignal(), ResetSignal()]
1083 def external_ports(self
):
1084 ports
= self
.ti
.external_ports()
1085 ports
.append(ClockSignal())
1086 ports
.append(ResetSignal())
1088 ports
.append(self
.pll
.clk_sel_i
)
1089 ports
.append(self
.pll_18_o
)
1090 ports
.append(self
.pll
.pll_lck_o
)
1094 if __name__
== '__main__':
1095 units
= {'alu': 1, 'cr': 1, 'branch': 1, 'trap': 1, 'logical': 1,
1101 pspec
= TestMemPspec(ldst_ifacetype
='bare_wb',
1102 imem_ifacetype
='bare_wb',
1107 dut
= TestIssuer(pspec
)
1108 vl
= main(dut
, ports
=dut
.ports(), name
="test_issuer")
1110 if len(sys
.argv
) == 1:
1111 vl
= rtlil
.convert(dut
, ports
=dut
.external_ports(), name
="test_issuer")
1112 with
open("test_issuer.il", "w") as f
: