3 not in any way intended for production use. this runs a FSM that:
5 * reads the Program Counter from StateRegs
6 * reads an instruction from a fixed-size Test Memory
7 * issues it to the Simple Core
8 * waits for it to complete
10 * does it all over again
12 the purpose of this module is to verify the functional correctness
13 of the Function Units in the absolute simplest and clearest possible
14 way, and to at provide something that can be further incrementally
18 from nmigen
import (Elaboratable
, Module
, Signal
, ClockSignal
, ResetSignal
,
19 ClockDomain
, DomainRenamer
, Mux
, Const
, Repl
)
20 from nmigen
.cli
import rtlil
21 from nmigen
.cli
import main
24 from soc
.decoder
.power_decoder
import create_pdecode
25 from soc
.decoder
.power_decoder2
import PowerDecode2
, SVP64PrefixDecoder
26 from soc
.decoder
.decode2execute1
import IssuerDecode2ToOperand
27 from soc
.decoder
.decode2execute1
import Data
28 from soc
.experiment
.testmem
import TestMemory
# test only for instructions
29 from soc
.regfile
.regfiles
import StateRegs
, FastRegs
30 from soc
.simple
.core
import NonProductionCore
31 from soc
.config
.test
.test_loadstore
import TestMemPspec
32 from soc
.config
.ifetch
import ConfigFetchUnit
33 from soc
.decoder
.power_enums
import (MicrOp
, SVP64PredInt
, SVP64PredCR
,
35 from soc
.debug
.dmi
import CoreDebug
, DMIInterface
36 from soc
.debug
.jtag
import JTAG
37 from soc
.config
.pinouts
import get_pinspecs
38 from soc
.config
.state
import CoreState
39 from soc
.interrupts
.xics
import XICS_ICP
, XICS_ICS
40 from soc
.bus
.simple_gpio
import SimpleGPIO
41 from soc
.bus
.SPBlock512W64B8W
import SPBlock512W64B8W
42 from soc
.clock
.select
import ClockSelect
43 from soc
.clock
.dummypll
import DummyPLL
44 from soc
.sv
.svstate
import SVSTATERec
47 from nmutil
.util
import rising_edge
49 def get_insn(f_instr_o
, pc
):
50 if f_instr_o
.width
== 32:
53 # 64-bit: bit 2 of pc decides which word to select
54 return f_instr_o
.word_select(pc
[2], 32)
56 # gets state input or reads from state regfile
57 def state_get(m
, state_i
, name
, regfile
, regnum
):
61 res
= Signal(64, reset_less
=True, name
=name
)
62 res_ok_delay
= Signal(name
="%s_ok_delay" % name
)
63 sync
+= res_ok_delay
.eq(~state_i
.ok
)
64 with m
.If(state_i
.ok
):
65 # incoming override (start from pc_i)
66 comb
+= res
.eq(state_i
.data
)
68 # otherwise read StateRegs regfile for PC...
69 comb
+= regfile
.ren
.eq(1<<regnum
)
70 # ... but on a 1-clock delay
71 with m
.If(res_ok_delay
):
72 comb
+= res
.eq(regfile
.data_o
)
75 def get_predint(m
, mask
, name
):
76 """decode SVP64 predicate integer mask field to reg number and invert
77 this is identical to the equivalent function in ISACaller except that
78 it doesn't read the INT directly, it just decodes "what needs to be done"
79 i.e. which INT reg, whether it is shifted and whether it is bit-inverted.
81 * all1s is set to indicate that no mask is to be applied.
82 * regread indicates the GPR register number to be read
83 * invert is set to indicate that the register value is to be inverted
84 * unary indicates that the contents of the register is to be shifted 1<<r3
87 regread
= Signal(5, name
=name
+"regread")
88 invert
= Signal(name
=name
+"invert")
89 unary
= Signal(name
=name
+"unary")
90 all1s
= Signal(name
=name
+"all1s")
92 with m
.Case(SVP64PredInt
.ALWAYS
.value
):
93 comb
+= all1s
.eq(1) # use 0b1111 (all ones)
94 with m
.Case(SVP64PredInt
.R3_UNARY
.value
):
96 comb
+= unary
.eq(1) # 1<<r3 - shift r3 (single bit)
97 with m
.Case(SVP64PredInt
.R3
.value
):
99 with m
.Case(SVP64PredInt
.R3_N
.value
):
100 comb
+= regread
.eq(3)
102 with m
.Case(SVP64PredInt
.R10
.value
):
103 comb
+= regread
.eq(10)
104 with m
.Case(SVP64PredInt
.R10_N
.value
):
105 comb
+= regread
.eq(10)
107 with m
.Case(SVP64PredInt
.R30
.value
):
108 comb
+= regread
.eq(30)
109 with m
.Case(SVP64PredInt
.R30_N
.value
):
110 comb
+= regread
.eq(30)
112 return regread
, invert
, unary
, all1s
114 def get_predcr(m
, mask
, name
):
115 """decode SVP64 predicate CR to reg number field and invert status
116 this is identical to _get_predcr in ISACaller
119 idx
= Signal(2, name
=name
+"idx")
120 invert
= Signal(name
=name
+"crinvert")
122 with m
.Case(SVP64PredCR
.LT
.value
):
125 with m
.Case(SVP64PredCR
.GE
.value
):
128 with m
.Case(SVP64PredCR
.GT
.value
):
131 with m
.Case(SVP64PredCR
.LE
.value
):
134 with m
.Case(SVP64PredCR
.EQ
.value
):
137 with m
.Case(SVP64PredCR
.NE
.value
):
140 with m
.Case(SVP64PredCR
.SO
.value
):
143 with m
.Case(SVP64PredCR
.NS
.value
):
149 class TestIssuerInternal(Elaboratable
):
150 """TestIssuer - reads instructions from TestMemory and issues them
152 efficiency and speed is not the main goal here: functional correctness
153 and code clarity is. optimisations (which almost 100% interfere with
154 easy understanding) come later.
156 def __init__(self
, pspec
):
158 # test is SVP64 is to be enabled
159 self
.svp64_en
= hasattr(pspec
, "svp64") and (pspec
.svp64
== True)
161 # and if regfiles are reduced
162 self
.regreduce_en
= (hasattr(pspec
, "regreduce") and
163 (pspec
.regreduce
== True))
165 # JTAG interface. add this right at the start because if it's
166 # added it *modifies* the pspec, by adding enable/disable signals
167 # for parts of the rest of the core
168 self
.jtag_en
= hasattr(pspec
, "debug") and pspec
.debug
== 'jtag'
170 subset
= {'uart', 'mtwi', 'eint', 'gpio', 'mspi0', 'mspi1',
172 self
.jtag
= JTAG(get_pinspecs(subset
=subset
))
173 # add signals to pspec to enable/disable icache and dcache
174 # (or data and intstruction wishbone if icache/dcache not included)
175 # https://bugs.libre-soc.org/show_bug.cgi?id=520
176 # TODO: do we actually care if these are not domain-synchronised?
177 # honestly probably not.
178 pspec
.wb_icache_en
= self
.jtag
.wb_icache_en
179 pspec
.wb_dcache_en
= self
.jtag
.wb_dcache_en
180 self
.wb_sram_en
= self
.jtag
.wb_sram_en
182 self
.wb_sram_en
= Const(1)
184 # add 4k sram blocks?
185 self
.sram4x4k
= (hasattr(pspec
, "sram4x4kblock") and
186 pspec
.sram4x4kblock
== True)
190 self
.sram4k
.append(SPBlock512W64B8W(name
="sram4k_%d" % i
,
193 # add interrupt controller?
194 self
.xics
= hasattr(pspec
, "xics") and pspec
.xics
== True
196 self
.xics_icp
= XICS_ICP()
197 self
.xics_ics
= XICS_ICS()
198 self
.int_level_i
= self
.xics_ics
.int_level_i
200 # add GPIO peripheral?
201 self
.gpio
= hasattr(pspec
, "gpio") and pspec
.gpio
== True
203 self
.simple_gpio
= SimpleGPIO()
204 self
.gpio_o
= self
.simple_gpio
.gpio_o
206 # main instruction core. suitable for prototyping / demo only
207 self
.core
= core
= NonProductionCore(pspec
)
209 # instruction decoder. goes into Trap Record
210 pdecode
= create_pdecode()
211 self
.cur_state
= CoreState("cur") # current state (MSR/PC/SVSTATE)
212 self
.pdecode2
= PowerDecode2(pdecode
, state
=self
.cur_state
,
213 opkls
=IssuerDecode2ToOperand
,
214 svp64_en
=self
.svp64_en
,
215 regreduce_en
=self
.regreduce_en
)
217 self
.svp64
= SVP64PrefixDecoder() # for decoding SVP64 prefix
219 # Test Instruction memory
220 self
.imem
= ConfigFetchUnit(pspec
).fu
223 self
.dbg
= CoreDebug()
225 # instruction go/monitor
226 self
.pc_o
= Signal(64, reset_less
=True)
227 self
.pc_i
= Data(64, "pc_i") # set "ok" to indicate "please change me"
228 self
.svstate_i
= Data(32, "svstate_i") # ditto
229 self
.core_bigendian_i
= Signal() # TODO: set based on MSR.LE
230 self
.busy_o
= Signal(reset_less
=True)
231 self
.memerr_o
= Signal(reset_less
=True)
233 # STATE regfile read /write ports for PC, MSR, SVSTATE
234 staterf
= self
.core
.regs
.rf
['state']
235 self
.state_r_pc
= staterf
.r_ports
['cia'] # PC rd
236 self
.state_w_pc
= staterf
.w_ports
['d_wr1'] # PC wr
237 self
.state_r_msr
= staterf
.r_ports
['msr'] # MSR rd
238 self
.state_r_sv
= staterf
.r_ports
['sv'] # SVSTATE rd
239 self
.state_w_sv
= staterf
.w_ports
['sv'] # SVSTATE wr
241 # DMI interface access
242 intrf
= self
.core
.regs
.rf
['int']
243 crrf
= self
.core
.regs
.rf
['cr']
244 xerrf
= self
.core
.regs
.rf
['xer']
245 self
.int_r
= intrf
.r_ports
['dmi'] # INT read
246 self
.cr_r
= crrf
.r_ports
['full_cr_dbg'] # CR read
247 self
.xer_r
= xerrf
.r_ports
['full_xer'] # XER read
251 self
.int_pred
= intrf
.r_ports
['pred'] # INT predicate read
252 self
.cr_pred
= crrf
.r_ports
['cr_pred'] # CR predicate read
254 # hack method of keeping an eye on whether branch/trap set the PC
255 self
.state_nia
= self
.core
.regs
.rf
['state'].w_ports
['nia']
256 self
.state_nia
.wen
.name
= 'state_nia_wen'
258 # pulse to synchronize the simulator at instruction end
259 self
.insn_done
= Signal()
262 # store copies of predicate masks
263 self
.srcmask
= Signal(64)
264 self
.dstmask
= Signal(64)
266 def fetch_fsm(self
, m
, core
, pc
, svstate
, nia
, is_svp64_mode
,
267 fetch_pc_ready_o
, fetch_pc_valid_i
,
268 fetch_insn_valid_o
, fetch_insn_ready_i
):
271 this FSM performs fetch of raw instruction data, partial-decodes
272 it 32-bit at a time to detect SVP64 prefixes, and will optionally
273 read a 2nd 32-bit quantity if that occurs.
277 pdecode2
= self
.pdecode2
278 cur_state
= self
.cur_state
279 dec_opcode_i
= pdecode2
.dec
.raw_opcode_in
# raw opcode
281 msr_read
= Signal(reset
=1)
283 with m
.FSM(name
='fetch_fsm'):
286 with m
.State("IDLE"):
287 comb
+= fetch_pc_ready_o
.eq(1)
288 with m
.If(fetch_pc_valid_i
):
289 # instruction allowed to go: start by reading the PC
290 # capture the PC and also drop it into Insn Memory
291 # we have joined a pair of combinatorial memory
292 # lookups together. this is Generally Bad.
293 comb
+= self
.imem
.a_pc_i
.eq(pc
)
294 comb
+= self
.imem
.a_valid_i
.eq(1)
295 comb
+= self
.imem
.f_valid_i
.eq(1)
296 sync
+= cur_state
.pc
.eq(pc
)
297 sync
+= cur_state
.svstate
.eq(svstate
) # and svstate
299 # initiate read of MSR. arrives one clock later
300 comb
+= self
.state_r_msr
.ren
.eq(1 << StateRegs
.MSR
)
301 sync
+= msr_read
.eq(0)
303 m
.next
= "INSN_READ" # move to "wait for bus" phase
305 # dummy pause to find out why simulation is not keeping up
306 with m
.State("INSN_READ"):
307 # one cycle later, msr/sv read arrives. valid only once.
308 with m
.If(~msr_read
):
309 sync
+= msr_read
.eq(1) # yeah don't read it again
310 sync
+= cur_state
.msr
.eq(self
.state_r_msr
.data_o
)
311 with m
.If(self
.imem
.f_busy_o
): # zzz...
312 # busy: stay in wait-read
313 comb
+= self
.imem
.a_valid_i
.eq(1)
314 comb
+= self
.imem
.f_valid_i
.eq(1)
316 # not busy: instruction fetched
317 insn
= get_insn(self
.imem
.f_instr_o
, cur_state
.pc
)
320 # decode the SVP64 prefix, if any
321 comb
+= svp64
.raw_opcode_in
.eq(insn
)
322 comb
+= svp64
.bigendian
.eq(self
.core_bigendian_i
)
323 # pass the decoded prefix (if any) to PowerDecoder2
324 sync
+= pdecode2
.sv_rm
.eq(svp64
.svp64_rm
)
325 # remember whether this is a prefixed instruction, so
326 # the FSM can readily loop when VL==0
327 sync
+= is_svp64_mode
.eq(svp64
.is_svp64_mode
)
328 # calculate the address of the following instruction
329 insn_size
= Mux(svp64
.is_svp64_mode
, 8, 4)
330 sync
+= nia
.eq(cur_state
.pc
+ insn_size
)
331 with m
.If(~svp64
.is_svp64_mode
):
332 # with no prefix, store the instruction
333 # and hand it directly to the next FSM
334 sync
+= dec_opcode_i
.eq(insn
)
335 m
.next
= "INSN_READY"
337 # fetch the rest of the instruction from memory
338 comb
+= self
.imem
.a_pc_i
.eq(cur_state
.pc
+ 4)
339 comb
+= self
.imem
.a_valid_i
.eq(1)
340 comb
+= self
.imem
.f_valid_i
.eq(1)
341 m
.next
= "INSN_READ2"
343 # not SVP64 - 32-bit only
344 sync
+= nia
.eq(cur_state
.pc
+ 4)
345 sync
+= dec_opcode_i
.eq(insn
)
346 m
.next
= "INSN_READY"
348 with m
.State("INSN_READ2"):
349 with m
.If(self
.imem
.f_busy_o
): # zzz...
350 # busy: stay in wait-read
351 comb
+= self
.imem
.a_valid_i
.eq(1)
352 comb
+= self
.imem
.f_valid_i
.eq(1)
354 # not busy: instruction fetched
355 insn
= get_insn(self
.imem
.f_instr_o
, cur_state
.pc
+4)
356 sync
+= dec_opcode_i
.eq(insn
)
357 m
.next
= "INSN_READY"
358 # TODO: probably can start looking at pdecode2.rm_dec
359 # here or maybe even in INSN_READ state, if svp64_mode
360 # detected, in order to trigger - and wait for - the
363 pmode
= pdecode2
.rm_dec
.predmode
365 if pmode != SVP64PredMode.ALWAYS.value:
366 fire predicate loading FSM and wait before
369 sync += self.srcmask.eq(-1) # set to all 1s
370 sync += self.dstmask.eq(-1) # set to all 1s
371 m.next = "INSN_READY"
374 with m
.State("INSN_READY"):
375 # hand over the instruction, to be decoded
376 comb
+= fetch_insn_valid_o
.eq(1)
377 with m
.If(fetch_insn_ready_i
):
380 def fetch_predicate_fsm(self
, m
,
381 pred_insn_valid_i
, pred_insn_ready_o
,
382 pred_mask_valid_o
, pred_mask_ready_i
):
383 """fetch_predicate_fsm - obtains (constructs in the case of CR)
384 src/dest predicate masks
386 https://bugs.libre-soc.org/show_bug.cgi?id=617
387 the predicates can be read here, by using IntRegs r_ports['pred']
388 or CRRegs r_ports['pred']. in the case of CRs it will have to
389 be done through multiple reads, extracting one relevant at a time.
390 later, a faster way would be to use the 32-bit-wide CR port but
391 this is more complex decoding, here. equivalent code used in
392 ISACaller is "from soc.decoder.isa.caller import get_predcr"
394 note: this ENTIRE FSM is not to be called when svp64 is disabled
398 pdecode2
= self
.pdecode2
399 rm_dec
= pdecode2
.rm_dec
# SVP64RMModeDecode
400 predmode
= rm_dec
.predmode
401 srcpred
, dstpred
= rm_dec
.srcpred
, rm_dec
.dstpred
402 cr_pred
, int_pred
= self
.cr_pred
, self
.int_pred
# read regfiles
404 # elif predmode == CR:
405 # CR-src sidx, sinvert = get_predcr(m, srcpred)
406 # CR-dst didx, dinvert = get_predcr(m, dstpred)
407 # TODO read CR-src and CR-dst into self.srcmask+dstmask with loop
408 # has to cope with first one then the other
409 # for cr_idx = FSM-state-loop(0..VL-1):
410 # FSM-state-trigger-CR-read:
411 # cr_ren = (1<<7-(cr_idx+SVP64CROffs.CRPred))
412 # comb += cr_pred.ren.eq(cr_ren)
413 # FSM-state-1-clock-later-actual-Read:
414 # cr_field = Signal(4)
416 # # read the CR field, select the appropriate bit
417 # comb += cr_field.eq(cr_pred.data_o)
418 # comb += cr_bit.eq(cr_field.bit_select(idx)))
419 # # just like in branch BO tests
420 # comd += self.srcmask[cr_idx].eq(inv ^ cr_bit)
423 sregread
, sinvert
, sunary
, sall1s
= get_predint(m
, srcpred
, 's')
424 dregread
, dinvert
, dunary
, dall1s
= get_predint(m
, dstpred
, 'd')
425 sidx
, scrinvert
= get_predcr(m
, srcpred
, 's')
426 didx
, dcrinvert
= get_predcr(m
, dstpred
, 'd')
428 with m
.FSM(name
="fetch_predicate"):
430 with m
.State("FETCH_PRED_IDLE"):
431 comb
+= pred_insn_ready_o
.eq(1)
432 with m
.If(pred_insn_valid_i
):
433 with m
.If(predmode
== SVP64PredMode
.INT
):
434 # skip fetching destination mask register, when zero
436 sync
+= self
.dstmask
.eq(-1)
437 # directly go to fetch source mask register
438 # guaranteed not to be zero (otherwise predmode
439 # would be SVP64PredMode.ALWAYS, not INT)
440 comb
+= int_pred
.addr
.eq(sregread
)
441 comb
+= int_pred
.ren
.eq(1)
442 m
.next
= "INT_SRC_READ"
443 # fetch destination predicate register
445 comb
+= int_pred
.addr
.eq(dregread
)
446 comb
+= int_pred
.ren
.eq(1)
447 m
.next
= "INT_DST_READ"
449 sync
+= self
.srcmask
.eq(-1)
450 sync
+= self
.dstmask
.eq(-1)
451 m
.next
= "FETCH_PRED_DONE"
453 with m
.State("INT_DST_READ"):
454 # store destination mask
455 inv
= Repl(dinvert
, 64)
456 sync
+= self
.dstmask
.eq(self
.int_pred
.data_o ^ inv
)
457 # skip fetching source mask register, when zero
459 sync
+= self
.srcmask
.eq(-1)
460 m
.next
= "FETCH_PRED_DONE"
461 # fetch source predicate register
463 comb
+= int_pred
.addr
.eq(sregread
)
464 comb
+= int_pred
.ren
.eq(1)
465 m
.next
= "INT_SRC_READ"
467 with m
.State("INT_SRC_READ"):
469 inv
= Repl(sinvert
, 64)
470 sync
+= self
.srcmask
.eq(self
.int_pred
.data_o ^ inv
)
471 m
.next
= "FETCH_PRED_DONE"
473 with m
.State("FETCH_PRED_DONE"):
474 comb
+= pred_mask_valid_o
.eq(1)
475 with m
.If(pred_mask_ready_i
):
476 m
.next
= "FETCH_PRED_IDLE"
478 def issue_fsm(self
, m
, core
, pc_changed
, sv_changed
, nia
,
479 dbg
, core_rst
, is_svp64_mode
,
480 fetch_pc_ready_o
, fetch_pc_valid_i
,
481 fetch_insn_valid_o
, fetch_insn_ready_i
,
482 pred_insn_valid_i
, pred_insn_ready_o
,
483 pred_mask_valid_o
, pred_mask_ready_i
,
484 exec_insn_valid_i
, exec_insn_ready_o
,
485 exec_pc_valid_o
, exec_pc_ready_i
):
488 decode / issue FSM. this interacts with the "fetch" FSM
489 through fetch_insn_ready/valid (incoming) and fetch_pc_ready/valid
490 (outgoing). also interacts with the "execute" FSM
491 through exec_insn_ready/valid (outgoing) and exec_pc_ready/valid
493 SVP64 RM prefixes have already been set up by the
494 "fetch" phase, so execute is fairly straightforward.
499 pdecode2
= self
.pdecode2
500 cur_state
= self
.cur_state
503 dec_opcode_i
= pdecode2
.dec
.raw_opcode_in
# raw opcode
505 # for updating svstate (things like srcstep etc.)
506 update_svstate
= Signal() # set this (below) if updating
507 new_svstate
= SVSTATERec("new_svstate")
508 comb
+= new_svstate
.eq(cur_state
.svstate
)
510 # precalculate srcstep+1 and dststep+1
511 cur_srcstep
= cur_state
.svstate
.srcstep
512 cur_dststep
= cur_state
.svstate
.dststep
513 next_srcstep
= Signal
.like(cur_srcstep
)
514 next_dststep
= Signal
.like(cur_dststep
)
515 comb
+= next_srcstep
.eq(cur_state
.svstate
.srcstep
+1)
516 comb
+= next_dststep
.eq(cur_state
.svstate
.dststep
+1)
518 with m
.FSM(name
="issue_fsm"):
520 # sync with the "fetch" phase which is reading the instruction
521 # at this point, there is no instruction running, that
522 # could inadvertently update the PC.
523 with m
.State("ISSUE_START"):
524 # wait on "core stop" release, before next fetch
525 # need to do this here, in case we are in a VL==0 loop
526 with m
.If(~dbg
.core_stop_o
& ~core_rst
):
527 comb
+= fetch_pc_valid_i
.eq(1) # tell fetch to start
528 with m
.If(fetch_pc_ready_o
): # fetch acknowledged us
531 # tell core it's stopped, and acknowledge debug handshake
532 comb
+= core
.core_stopped_i
.eq(1)
533 comb
+= dbg
.core_stopped_i
.eq(1)
534 # while stopped, allow updating the PC and SVSTATE
535 with m
.If(self
.pc_i
.ok
):
536 comb
+= self
.state_w_pc
.wen
.eq(1 << StateRegs
.PC
)
537 comb
+= self
.state_w_pc
.data_i
.eq(self
.pc_i
.data
)
538 sync
+= pc_changed
.eq(1)
539 with m
.If(self
.svstate_i
.ok
):
540 comb
+= new_svstate
.eq(self
.svstate_i
.data
)
541 comb
+= update_svstate
.eq(1)
542 sync
+= sv_changed
.eq(1)
544 # wait for an instruction to arrive from Fetch
545 with m
.State("INSN_WAIT"):
546 comb
+= fetch_insn_ready_i
.eq(1)
547 with m
.If(fetch_insn_valid_o
):
548 # loop into ISSUE_START if it's a SVP64 instruction
549 # and VL == 0. this because VL==0 is a for-loop
550 # from 0 to 0 i.e. always, always a NOP.
551 cur_vl
= cur_state
.svstate
.vl
552 with m
.If(is_svp64_mode
& (cur_vl
== 0)):
553 # update the PC before fetching the next instruction
554 # since we are in a VL==0 loop, no instruction was
555 # executed that we could be overwriting
556 comb
+= self
.state_w_pc
.wen
.eq(1 << StateRegs
.PC
)
557 comb
+= self
.state_w_pc
.data_i
.eq(nia
)
558 comb
+= self
.insn_done
.eq(1)
559 m
.next
= "ISSUE_START"
562 m
.next
= "PRED_START" # start fetching predicate
564 m
.next
= "DECODE_SV" # skip predication
566 with m
.State("PRED_START"):
567 comb
+= pred_insn_valid_i
.eq(1) # tell fetch_pred to start
568 with m
.If(pred_insn_ready_o
): # fetch_pred acknowledged us
571 with m
.State("MASK_WAIT"):
572 comb
+= pred_mask_ready_i
.eq(1) # ready to receive the masks
573 with m
.If(pred_mask_valid_o
): # predication masks are ready
574 # with m.If(is_svp64_mode):
575 # TODO advance src/dst step to "skip" over predicated-out
576 # from self.srcmask and self.dstmask
577 # https://bugs.libre-soc.org/show_bug.cgi?id=617#c3
578 # but still without exceeding VL in either case
579 # IMPORTANT: when changing src/dest step, have to
580 # jump to m.next = "DECODE_SV" to deal with the change in
583 with m
.If(is_svp64_mode
):
585 pred_src_zero
= pdecode2
.rm_dec
.pred_sz
586 pred_dst_zero
= pdecode2
.rm_dec
.pred_dz
589 TODO: actually, can use
590 PriorityEncoder(self.srcmask | (1<<cur_srcstep))
592 if not pred_src_zero:
593 if (((1<<cur_srcstep) & self.srcmask) == 0) and
595 comb += update_svstate.eq(1)
596 comb += new_svstate.srcstep.eq(next_srcstep)
598 if not pred_dst_zero:
599 if (((1<<cur_dststep) & self.dstmask) == 0) and
601 comb += new_svstate.dststep.eq(next_dststep)
602 comb += update_svstate.eq(1)
610 # after src/dst step have been updated, we are ready
611 # to decode the instruction
612 with m
.State("DECODE_SV"):
613 # decode the instruction
614 sync
+= core
.e
.eq(pdecode2
.e
)
615 sync
+= core
.state
.eq(cur_state
)
616 sync
+= core
.raw_insn_i
.eq(dec_opcode_i
)
617 sync
+= core
.bigendian_i
.eq(self
.core_bigendian_i
)
618 # set RA_OR_ZERO detection in satellite decoders
619 sync
+= core
.sv_a_nz
.eq(pdecode2
.sv_a_nz
)
620 m
.next
= "INSN_EXECUTE" # move to "execute"
622 # handshake with execution FSM, move to "wait" once acknowledged
623 with m
.State("INSN_EXECUTE"):
624 comb
+= exec_insn_valid_i
.eq(1) # trigger execute
625 with m
.If(exec_insn_ready_o
): # execute acknowledged us
626 m
.next
= "EXECUTE_WAIT"
628 with m
.State("EXECUTE_WAIT"):
629 # wait on "core stop" release, at instruction end
630 # need to do this here, in case we are in a VL>1 loop
631 with m
.If(~dbg
.core_stop_o
& ~core_rst
):
632 comb
+= exec_pc_ready_i
.eq(1)
633 with m
.If(exec_pc_valid_o
):
635 # was this the last loop iteration?
637 cur_vl
= cur_state
.svstate
.vl
638 comb
+= is_last
.eq(next_srcstep
== cur_vl
)
640 # if either PC or SVSTATE were changed by the previous
641 # instruction, go directly back to Fetch, without
642 # updating either PC or SVSTATE
643 with m
.If(pc_changed | sv_changed
):
644 m
.next
= "ISSUE_START"
646 # also return to Fetch, when no output was a vector
647 # (regardless of SRCSTEP and VL), or when the last
648 # instruction was really the last one of the VL loop
649 with m
.Elif((~pdecode2
.loop_continue
) | is_last
):
650 # before going back to fetch, update the PC state
651 # register with the NIA.
652 # ok here we are not reading the branch unit.
653 # TODO: this just blithely overwrites whatever
654 # pipeline updated the PC
655 comb
+= self
.state_w_pc
.wen
.eq(1 << StateRegs
.PC
)
656 comb
+= self
.state_w_pc
.data_i
.eq(nia
)
657 # reset SRCSTEP before returning to Fetch
658 with m
.If(pdecode2
.loop_continue
):
659 comb
+= new_svstate
.srcstep
.eq(0)
660 comb
+= new_svstate
.dststep
.eq(0)
661 comb
+= update_svstate
.eq(1)
662 m
.next
= "ISSUE_START"
664 # returning to Execute? then, first update SRCSTEP
666 comb
+= new_svstate
.srcstep
.eq(next_srcstep
)
667 comb
+= new_svstate
.dststep
.eq(next_dststep
)
668 comb
+= update_svstate
.eq(1)
672 comb
+= core
.core_stopped_i
.eq(1)
673 comb
+= dbg
.core_stopped_i
.eq(1)
674 # while stopped, allow updating the PC and SVSTATE
675 with m
.If(self
.pc_i
.ok
):
676 comb
+= self
.state_w_pc
.wen
.eq(1 << StateRegs
.PC
)
677 comb
+= self
.state_w_pc
.data_i
.eq(self
.pc_i
.data
)
678 sync
+= pc_changed
.eq(1)
679 with m
.If(self
.svstate_i
.ok
):
680 comb
+= new_svstate
.eq(self
.svstate_i
.data
)
681 comb
+= update_svstate
.eq(1)
682 sync
+= sv_changed
.eq(1)
684 # check if svstate needs updating: if so, write it to State Regfile
685 with m
.If(update_svstate
):
686 comb
+= self
.state_w_sv
.wen
.eq(1<<StateRegs
.SVSTATE
)
687 comb
+= self
.state_w_sv
.data_i
.eq(new_svstate
)
688 sync
+= cur_state
.svstate
.eq(new_svstate
) # for next clock
690 def execute_fsm(self
, m
, core
, pc_changed
, sv_changed
,
691 exec_insn_valid_i
, exec_insn_ready_o
,
692 exec_pc_valid_o
, exec_pc_ready_i
):
695 execute FSM. this interacts with the "issue" FSM
696 through exec_insn_ready/valid (incoming) and exec_pc_ready/valid
697 (outgoing). SVP64 RM prefixes have already been set up by the
698 "issue" phase, so execute is fairly straightforward.
703 pdecode2
= self
.pdecode2
706 core_busy_o
= core
.busy_o
# core is busy
707 core_ivalid_i
= core
.ivalid_i
# instruction is valid
708 core_issue_i
= core
.issue_i
# instruction is issued
709 insn_type
= core
.e
.do
.insn_type
# instruction MicroOp type
711 with m
.FSM(name
="exec_fsm"):
713 # waiting for instruction bus (stays there until not busy)
714 with m
.State("INSN_START"):
715 comb
+= exec_insn_ready_o
.eq(1)
716 with m
.If(exec_insn_valid_i
):
717 comb
+= core_ivalid_i
.eq(1) # instruction is valid
718 comb
+= core_issue_i
.eq(1) # and issued
719 sync
+= sv_changed
.eq(0)
720 sync
+= pc_changed
.eq(0)
721 m
.next
= "INSN_ACTIVE" # move to "wait completion"
723 # instruction started: must wait till it finishes
724 with m
.State("INSN_ACTIVE"):
725 with m
.If(insn_type
!= MicrOp
.OP_NOP
):
726 comb
+= core_ivalid_i
.eq(1) # instruction is valid
727 # note changes to PC and SVSTATE
728 with m
.If(self
.state_nia
.wen
& (1<<StateRegs
.SVSTATE
)):
729 sync
+= sv_changed
.eq(1)
730 with m
.If(self
.state_nia
.wen
& (1<<StateRegs
.PC
)):
731 sync
+= pc_changed
.eq(1)
732 with m
.If(~core_busy_o
): # instruction done!
733 comb
+= exec_pc_valid_o
.eq(1)
734 with m
.If(exec_pc_ready_i
):
735 comb
+= self
.insn_done
.eq(1)
736 m
.next
= "INSN_START" # back to fetch
738 def setup_peripherals(self
, m
):
739 comb
, sync
= m
.d
.comb
, m
.d
.sync
741 m
.submodules
.core
= core
= DomainRenamer("coresync")(self
.core
)
742 m
.submodules
.imem
= imem
= self
.imem
743 m
.submodules
.dbg
= dbg
= self
.dbg
745 m
.submodules
.jtag
= jtag
= self
.jtag
746 # TODO: UART2GDB mux, here, from external pin
747 # see https://bugs.libre-soc.org/show_bug.cgi?id=499
748 sync
+= dbg
.dmi
.connect_to(jtag
.dmi
)
750 cur_state
= self
.cur_state
752 # 4x 4k SRAM blocks. these simply "exist", they get routed in litex
754 for i
, sram
in enumerate(self
.sram4k
):
755 m
.submodules
["sram4k_%d" % i
] = sram
756 comb
+= sram
.enable
.eq(self
.wb_sram_en
)
758 # XICS interrupt handler
760 m
.submodules
.xics_icp
= icp
= self
.xics_icp
761 m
.submodules
.xics_ics
= ics
= self
.xics_ics
762 comb
+= icp
.ics_i
.eq(ics
.icp_o
) # connect ICS to ICP
763 sync
+= cur_state
.eint
.eq(icp
.core_irq_o
) # connect ICP to core
765 # GPIO test peripheral
767 m
.submodules
.simple_gpio
= simple_gpio
= self
.simple_gpio
769 # connect one GPIO output to ICS bit 15 (like in microwatt soc.vhdl)
770 # XXX causes litex ECP5 test to get wrong idea about input and output
771 # (but works with verilator sim *sigh*)
772 #if self.gpio and self.xics:
773 # comb += self.int_level_i[15].eq(simple_gpio.gpio_o[0])
775 # instruction decoder
776 pdecode
= create_pdecode()
777 m
.submodules
.dec2
= pdecode2
= self
.pdecode2
779 m
.submodules
.svp64
= svp64
= self
.svp64
782 dmi
, d_reg
, d_cr
, d_xer
, = dbg
.dmi
, dbg
.d_gpr
, dbg
.d_cr
, dbg
.d_xer
783 intrf
= self
.core
.regs
.rf
['int']
785 # clock delay power-on reset
786 cd_por
= ClockDomain(reset_less
=True)
787 cd_sync
= ClockDomain()
788 core_sync
= ClockDomain("coresync")
789 m
.domains
+= cd_por
, cd_sync
, core_sync
791 ti_rst
= Signal(reset_less
=True)
792 delay
= Signal(range(4), reset
=3)
793 with m
.If(delay
!= 0):
794 m
.d
.por
+= delay
.eq(delay
- 1)
795 comb
+= cd_por
.clk
.eq(ClockSignal())
797 # power-on reset delay
798 core_rst
= ResetSignal("coresync")
799 comb
+= ti_rst
.eq(delay
!= 0 | dbg
.core_rst_o |
ResetSignal())
800 comb
+= core_rst
.eq(ti_rst
)
802 # busy/halted signals from core
803 comb
+= self
.busy_o
.eq(core
.busy_o
)
804 comb
+= pdecode2
.dec
.bigendian
.eq(self
.core_bigendian_i
)
806 # temporary hack: says "go" immediately for both address gen and ST
808 ldst
= core
.fus
.fus
['ldst0']
809 st_go_edge
= rising_edge(m
, ldst
.st
.rel_o
)
810 m
.d
.comb
+= ldst
.ad
.go_i
.eq(ldst
.ad
.rel_o
) # link addr-go direct to rel
811 m
.d
.comb
+= ldst
.st
.go_i
.eq(st_go_edge
) # link store-go to rising rel
815 def elaborate(self
, platform
):
818 comb
, sync
= m
.d
.comb
, m
.d
.sync
819 cur_state
= self
.cur_state
820 pdecode2
= self
.pdecode2
824 # set up peripherals and core
825 core_rst
= self
.setup_peripherals(m
)
827 # PC and instruction from I-Memory
828 comb
+= self
.pc_o
.eq(cur_state
.pc
)
829 pc_changed
= Signal() # note write to PC
830 sv_changed
= Signal() # note write to SVSTATE
832 # read state either from incoming override or from regfile
833 # TODO: really should be doing MSR in the same way
834 pc
= state_get(m
, self
.pc_i
, "pc", # read PC
835 self
.state_r_pc
, StateRegs
.PC
)
836 svstate
= state_get(m
, self
.svstate_i
, "svstate", # read SVSTATE
837 self
.state_r_sv
, StateRegs
.SVSTATE
)
839 # don't write pc every cycle
840 comb
+= self
.state_w_pc
.wen
.eq(0)
841 comb
+= self
.state_w_pc
.data_i
.eq(0)
843 # don't read msr every cycle
844 comb
+= self
.state_r_msr
.ren
.eq(0)
846 # address of the next instruction, in the absence of a branch
847 # depends on the instruction size
848 nia
= Signal(64, reset_less
=True)
850 # connect up debug signals
851 # TODO comb += core.icache_rst_i.eq(dbg.icache_rst_o)
852 comb
+= dbg
.terminate_i
.eq(core
.core_terminate_o
)
853 comb
+= dbg
.state
.pc
.eq(pc
)
854 comb
+= dbg
.state
.svstate
.eq(svstate
)
855 comb
+= dbg
.state
.msr
.eq(cur_state
.msr
)
857 # pass the prefix mode from Fetch to Issue, so the latter can loop
859 is_svp64_mode
= Signal()
861 # there are *THREE* FSMs, fetch (32/64-bit) issue, decode/execute.
862 # these are the handshake signals between fetch and decode/execute
864 # fetch FSM can run as soon as the PC is valid
865 fetch_pc_valid_i
= Signal() # Execute tells Fetch "start next read"
866 fetch_pc_ready_o
= Signal() # Fetch Tells SVSTATE "proceed"
868 # fetch FSM hands over the instruction to be decoded / issued
869 fetch_insn_valid_o
= Signal()
870 fetch_insn_ready_i
= Signal()
872 # predicate fetch FSM decodes and fetches the predicate
873 pred_insn_valid_i
= Signal()
874 pred_insn_ready_o
= Signal()
876 # predicate fetch FSM delivers the masks
877 pred_mask_valid_o
= Signal()
878 pred_mask_ready_i
= Signal()
880 # issue FSM delivers the instruction to the be executed
881 exec_insn_valid_i
= Signal()
882 exec_insn_ready_o
= Signal()
884 # execute FSM, hands over the PC/SVSTATE back to the issue FSM
885 exec_pc_valid_o
= Signal()
886 exec_pc_ready_i
= Signal()
888 # the FSMs here are perhaps unusual in that they detect conditions
889 # then "hold" information, combinatorially, for the core
890 # (as opposed to using sync - which would be on a clock's delay)
891 # this includes the actual opcode, valid flags and so on.
893 # Fetch, then predicate fetch, then Issue, then Execute.
894 # Issue is where the VL for-loop # lives. the ready/valid
895 # signalling is used to communicate between the four.
897 self
.fetch_fsm(m
, core
, pc
, svstate
, nia
, is_svp64_mode
,
898 fetch_pc_ready_o
, fetch_pc_valid_i
,
899 fetch_insn_valid_o
, fetch_insn_ready_i
)
901 self
.issue_fsm(m
, core
, pc_changed
, sv_changed
, nia
,
902 dbg
, core_rst
, is_svp64_mode
,
903 fetch_pc_ready_o
, fetch_pc_valid_i
,
904 fetch_insn_valid_o
, fetch_insn_ready_i
,
905 pred_insn_valid_i
, pred_insn_ready_o
,
906 pred_mask_valid_o
, pred_mask_ready_i
,
907 exec_insn_valid_i
, exec_insn_ready_o
,
908 exec_pc_valid_o
, exec_pc_ready_i
)
911 self
.fetch_predicate_fsm(m
,
912 pred_insn_valid_i
, pred_insn_ready_o
,
913 pred_mask_valid_o
, pred_mask_ready_i
)
915 self
.execute_fsm(m
, core
, pc_changed
, sv_changed
,
916 exec_insn_valid_i
, exec_insn_ready_o
,
917 exec_pc_valid_o
, exec_pc_ready_i
)
919 # this bit doesn't have to be in the FSM: connect up to read
920 # regfiles on demand from DMI
923 # DEC and TB inc/dec FSM. copy of DEC is put into CoreState,
924 # (which uses that in PowerDecoder2 to raise 0x900 exception)
925 self
.tb_dec_fsm(m
, cur_state
.dec
)
929 def do_dmi(self
, m
, dbg
):
930 """deals with DMI debug requests
932 currently only provides read requests for the INT regfile, CR and XER
933 it will later also deal with *writing* to these regfiles.
937 dmi
, d_reg
, d_cr
, d_xer
, = dbg
.dmi
, dbg
.d_gpr
, dbg
.d_cr
, dbg
.d_xer
938 intrf
= self
.core
.regs
.rf
['int']
940 with m
.If(d_reg
.req
): # request for regfile access being made
941 # TODO: error-check this
942 # XXX should this be combinatorial? sync better?
944 comb
+= self
.int_r
.ren
.eq(1<<d_reg
.addr
)
946 comb
+= self
.int_r
.addr
.eq(d_reg
.addr
)
947 comb
+= self
.int_r
.ren
.eq(1)
948 d_reg_delay
= Signal()
949 sync
+= d_reg_delay
.eq(d_reg
.req
)
950 with m
.If(d_reg_delay
):
951 # data arrives one clock later
952 comb
+= d_reg
.data
.eq(self
.int_r
.data_o
)
953 comb
+= d_reg
.ack
.eq(1)
955 # sigh same thing for CR debug
956 with m
.If(d_cr
.req
): # request for regfile access being made
957 comb
+= self
.cr_r
.ren
.eq(0b11111111) # enable all
958 d_cr_delay
= Signal()
959 sync
+= d_cr_delay
.eq(d_cr
.req
)
960 with m
.If(d_cr_delay
):
961 # data arrives one clock later
962 comb
+= d_cr
.data
.eq(self
.cr_r
.data_o
)
963 comb
+= d_cr
.ack
.eq(1)
966 with m
.If(d_xer
.req
): # request for regfile access being made
967 comb
+= self
.xer_r
.ren
.eq(0b111111) # enable all
968 d_xer_delay
= Signal()
969 sync
+= d_xer_delay
.eq(d_xer
.req
)
970 with m
.If(d_xer_delay
):
971 # data arrives one clock later
972 comb
+= d_xer
.data
.eq(self
.xer_r
.data_o
)
973 comb
+= d_xer
.ack
.eq(1)
975 def tb_dec_fsm(self
, m
, spr_dec
):
978 this is a FSM for updating either dec or tb. it runs alternately
979 DEC, TB, DEC, TB. note that SPR pipeline could have written a new
980 value to DEC, however the regfile has "passthrough" on it so this
983 see v3.0B p1097-1099 for Timeer Resource and p1065 and p1076
986 comb
, sync
= m
.d
.comb
, m
.d
.sync
987 fast_rf
= self
.core
.regs
.rf
['fast']
988 fast_r_dectb
= fast_rf
.r_ports
['issue'] # DEC/TB
989 fast_w_dectb
= fast_rf
.w_ports
['issue'] # DEC/TB
993 # initiates read of current DEC
994 with m
.State("DEC_READ"):
995 comb
+= fast_r_dectb
.addr
.eq(FastRegs
.DEC
)
996 comb
+= fast_r_dectb
.ren
.eq(1)
999 # waits for DEC read to arrive (1 cycle), updates with new value
1000 with m
.State("DEC_WRITE"):
1001 new_dec
= Signal(64)
1002 # TODO: MSR.LPCR 32-bit decrement mode
1003 comb
+= new_dec
.eq(fast_r_dectb
.data_o
- 1)
1004 comb
+= fast_w_dectb
.addr
.eq(FastRegs
.DEC
)
1005 comb
+= fast_w_dectb
.wen
.eq(1)
1006 comb
+= fast_w_dectb
.data_i
.eq(new_dec
)
1007 sync
+= spr_dec
.eq(new_dec
) # copy into cur_state for decoder
1010 # initiates read of current TB
1011 with m
.State("TB_READ"):
1012 comb
+= fast_r_dectb
.addr
.eq(FastRegs
.TB
)
1013 comb
+= fast_r_dectb
.ren
.eq(1)
1016 # waits for read TB to arrive, initiates write of current TB
1017 with m
.State("TB_WRITE"):
1019 comb
+= new_tb
.eq(fast_r_dectb
.data_o
+ 1)
1020 comb
+= fast_w_dectb
.addr
.eq(FastRegs
.TB
)
1021 comb
+= fast_w_dectb
.wen
.eq(1)
1022 comb
+= fast_w_dectb
.data_i
.eq(new_tb
)
1028 yield from self
.pc_i
.ports()
1031 yield from self
.core
.ports()
1032 yield from self
.imem
.ports()
1033 yield self
.core_bigendian_i
1039 def external_ports(self
):
1040 ports
= self
.pc_i
.ports()
1041 ports
+= [self
.pc_o
, self
.memerr_o
, self
.core_bigendian_i
, self
.busy_o
,
1045 ports
+= list(self
.jtag
.external_ports())
1047 # don't add DMI if JTAG is enabled
1048 ports
+= list(self
.dbg
.dmi
.ports())
1050 ports
+= list(self
.imem
.ibus
.fields
.values())
1051 ports
+= list(self
.core
.l0
.cmpi
.lsmem
.lsi
.slavebus
.fields
.values())
1054 for sram
in self
.sram4k
:
1055 ports
+= list(sram
.bus
.fields
.values())
1058 ports
+= list(self
.xics_icp
.bus
.fields
.values())
1059 ports
+= list(self
.xics_ics
.bus
.fields
.values())
1060 ports
.append(self
.int_level_i
)
1063 ports
+= list(self
.simple_gpio
.bus
.fields
.values())
1064 ports
.append(self
.gpio_o
)
1072 class TestIssuer(Elaboratable
):
1073 def __init__(self
, pspec
):
1074 self
.ti
= TestIssuerInternal(pspec
)
1076 self
.pll
= DummyPLL()
1078 # PLL direct clock or not
1079 self
.pll_en
= hasattr(pspec
, "use_pll") and pspec
.use_pll
1081 self
.pll_18_o
= Signal(reset_less
=True)
1083 def elaborate(self
, platform
):
1087 # TestIssuer runs at direct clock
1088 m
.submodules
.ti
= ti
= self
.ti
1089 cd_int
= ClockDomain("coresync")
1092 # ClockSelect runs at PLL output internal clock rate
1093 m
.submodules
.pll
= pll
= self
.pll
1095 # add clock domains from PLL
1096 cd_pll
= ClockDomain("pllclk")
1099 # PLL clock established. has the side-effect of running clklsel
1100 # at the PLL's speed (see DomainRenamer("pllclk") above)
1101 pllclk
= ClockSignal("pllclk")
1102 comb
+= pllclk
.eq(pll
.clk_pll_o
)
1104 # wire up external 24mhz to PLL
1105 comb
+= pll
.clk_24_i
.eq(ClockSignal())
1107 # output 18 mhz PLL test signal
1108 comb
+= self
.pll_18_o
.eq(pll
.pll_18_o
)
1110 # now wire up ResetSignals. don't mind them being in this domain
1111 pll_rst
= ResetSignal("pllclk")
1112 comb
+= pll_rst
.eq(ResetSignal())
1114 # internal clock is set to selector clock-out. has the side-effect of
1115 # running TestIssuer at this speed (see DomainRenamer("intclk") above)
1116 intclk
= ClockSignal("coresync")
1118 comb
+= intclk
.eq(pll
.clk_pll_o
)
1120 comb
+= intclk
.eq(ClockSignal())
1125 return list(self
.ti
.ports()) + list(self
.pll
.ports()) + \
1126 [ClockSignal(), ResetSignal()]
1128 def external_ports(self
):
1129 ports
= self
.ti
.external_ports()
1130 ports
.append(ClockSignal())
1131 ports
.append(ResetSignal())
1133 ports
.append(self
.pll
.clk_sel_i
)
1134 ports
.append(self
.pll_18_o
)
1135 ports
.append(self
.pll
.pll_lck_o
)
1139 if __name__
== '__main__':
1140 units
= {'alu': 1, 'cr': 1, 'branch': 1, 'trap': 1, 'logical': 1,
1146 pspec
= TestMemPspec(ldst_ifacetype
='bare_wb',
1147 imem_ifacetype
='bare_wb',
1152 dut
= TestIssuer(pspec
)
1153 vl
= main(dut
, ports
=dut
.ports(), name
="test_issuer")
1155 if len(sys
.argv
) == 1:
1156 vl
= rtlil
.convert(dut
, ports
=dut
.external_ports(), name
="test_issuer")
1157 with
open("test_issuer.il", "w") as f
: