3 not in any way intended for production use. this runs a FSM that:
5 * reads the Program Counter from StateRegs
6 * reads an instruction from a fixed-size Test Memory
7 * issues it to the Simple Core
8 * waits for it to complete
10 * does it all over again
12 the purpose of this module is to verify the functional correctness
13 of the Function Units in the absolute simplest and clearest possible
14 way, and to at provide something that can be further incrementally
18 from nmigen
import (Elaboratable
, Module
, Signal
, ClockSignal
, ResetSignal
,
19 ClockDomain
, DomainRenamer
, Mux
, Const
, Repl
)
20 from nmigen
.cli
import rtlil
21 from nmigen
.cli
import main
24 from soc
.decoder
.power_decoder
import create_pdecode
25 from soc
.decoder
.power_decoder2
import PowerDecode2
, SVP64PrefixDecoder
26 from soc
.decoder
.decode2execute1
import IssuerDecode2ToOperand
27 from soc
.decoder
.decode2execute1
import Data
28 from soc
.experiment
.testmem
import TestMemory
# test only for instructions
29 from soc
.regfile
.regfiles
import StateRegs
, FastRegs
30 from soc
.simple
.core
import NonProductionCore
31 from soc
.config
.test
.test_loadstore
import TestMemPspec
32 from soc
.config
.ifetch
import ConfigFetchUnit
33 from soc
.decoder
.power_enums
import (MicrOp
, SVP64PredInt
, SVP64PredCR
,
35 from soc
.debug
.dmi
import CoreDebug
, DMIInterface
36 from soc
.debug
.jtag
import JTAG
37 from soc
.config
.pinouts
import get_pinspecs
38 from soc
.config
.state
import CoreState
39 from soc
.interrupts
.xics
import XICS_ICP
, XICS_ICS
40 from soc
.bus
.simple_gpio
import SimpleGPIO
41 from soc
.bus
.SPBlock512W64B8W
import SPBlock512W64B8W
42 from soc
.clock
.select
import ClockSelect
43 from soc
.clock
.dummypll
import DummyPLL
44 from soc
.sv
.svstate
import SVSTATERec
47 from nmutil
.util
import rising_edge
49 def get_insn(f_instr_o
, pc
):
50 if f_instr_o
.width
== 32:
53 # 64-bit: bit 2 of pc decides which word to select
54 return f_instr_o
.word_select(pc
[2], 32)
56 # gets state input or reads from state regfile
57 def state_get(m
, state_i
, name
, regfile
, regnum
):
61 res
= Signal(64, reset_less
=True, name
=name
)
62 res_ok_delay
= Signal(name
="%s_ok_delay" % name
)
63 sync
+= res_ok_delay
.eq(~state_i
.ok
)
64 with m
.If(state_i
.ok
):
65 # incoming override (start from pc_i)
66 comb
+= res
.eq(state_i
.data
)
68 # otherwise read StateRegs regfile for PC...
69 comb
+= regfile
.ren
.eq(1<<regnum
)
70 # ... but on a 1-clock delay
71 with m
.If(res_ok_delay
):
72 comb
+= res
.eq(regfile
.data_o
)
75 def get_predint(m
, mask
, name
):
76 """decode SVP64 predicate integer mask field to reg number and invert
77 this is identical to the equivalent function in ISACaller except that
78 it doesn't read the INT directly, it just decodes "what needs to be done"
79 i.e. which INT reg, whether it is shifted and whether it is bit-inverted.
81 * all1s is set to indicate that no mask is to be applied.
82 * regread indicates the GPR register number to be read
83 * invert is set to indicate that the register value is to be inverted
84 * unary indicates that the contents of the register is to be shifted 1<<r3
87 regread
= Signal(5, name
=name
+"regread")
88 invert
= Signal(name
=name
+"invert")
89 unary
= Signal(name
=name
+"unary")
90 all1s
= Signal(name
=name
+"all1s")
92 with m
.Case(SVP64PredInt
.ALWAYS
.value
):
93 comb
+= all1s
.eq(1) # use 0b1111 (all ones)
94 with m
.Case(SVP64PredInt
.R3_UNARY
.value
):
96 comb
+= unary
.eq(1) # 1<<r3 - shift r3 (single bit)
97 with m
.Case(SVP64PredInt
.R3
.value
):
99 with m
.Case(SVP64PredInt
.R3_N
.value
):
100 comb
+= regread
.eq(3)
102 with m
.Case(SVP64PredInt
.R10
.value
):
103 comb
+= regread
.eq(10)
104 with m
.Case(SVP64PredInt
.R10_N
.value
):
105 comb
+= regread
.eq(10)
107 with m
.Case(SVP64PredInt
.R30
.value
):
108 comb
+= regread
.eq(30)
109 with m
.Case(SVP64PredInt
.R30_N
.value
):
110 comb
+= regread
.eq(30)
112 return regread
, invert
, unary
, all1s
114 def get_predcr(m
, mask
, name
):
115 """decode SVP64 predicate CR to reg number field and invert status
116 this is identical to _get_predcr in ISACaller
119 idx
= Signal(2, name
=name
+"idx")
120 invert
= Signal(name
=name
+"crinvert")
122 with m
.Case(SVP64PredCR
.LT
.value
):
125 with m
.Case(SVP64PredCR
.GE
.value
):
128 with m
.Case(SVP64PredCR
.GT
.value
):
131 with m
.Case(SVP64PredCR
.LE
.value
):
134 with m
.Case(SVP64PredCR
.EQ
.value
):
137 with m
.Case(SVP64PredCR
.NE
.value
):
140 with m
.Case(SVP64PredCR
.SO
.value
):
143 with m
.Case(SVP64PredCR
.NS
.value
):
149 class TestIssuerInternal(Elaboratable
):
150 """TestIssuer - reads instructions from TestMemory and issues them
152 efficiency and speed is not the main goal here: functional correctness
153 and code clarity is. optimisations (which almost 100% interfere with
154 easy understanding) come later.
156 def __init__(self
, pspec
):
158 # test is SVP64 is to be enabled
159 self
.svp64_en
= hasattr(pspec
, "svp64") and (pspec
.svp64
== True)
161 # and if regfiles are reduced
162 self
.regreduce_en
= (hasattr(pspec
, "regreduce") and
163 (pspec
.regreduce
== True))
165 # JTAG interface. add this right at the start because if it's
166 # added it *modifies* the pspec, by adding enable/disable signals
167 # for parts of the rest of the core
168 self
.jtag_en
= hasattr(pspec
, "debug") and pspec
.debug
== 'jtag'
170 subset
= {'uart', 'mtwi', 'eint', 'gpio', 'mspi0', 'mspi1',
172 self
.jtag
= JTAG(get_pinspecs(subset
=subset
))
173 # add signals to pspec to enable/disable icache and dcache
174 # (or data and intstruction wishbone if icache/dcache not included)
175 # https://bugs.libre-soc.org/show_bug.cgi?id=520
176 # TODO: do we actually care if these are not domain-synchronised?
177 # honestly probably not.
178 pspec
.wb_icache_en
= self
.jtag
.wb_icache_en
179 pspec
.wb_dcache_en
= self
.jtag
.wb_dcache_en
180 self
.wb_sram_en
= self
.jtag
.wb_sram_en
182 self
.wb_sram_en
= Const(1)
184 # add 4k sram blocks?
185 self
.sram4x4k
= (hasattr(pspec
, "sram4x4kblock") and
186 pspec
.sram4x4kblock
== True)
190 self
.sram4k
.append(SPBlock512W64B8W(name
="sram4k_%d" % i
,
193 # add interrupt controller?
194 self
.xics
= hasattr(pspec
, "xics") and pspec
.xics
== True
196 self
.xics_icp
= XICS_ICP()
197 self
.xics_ics
= XICS_ICS()
198 self
.int_level_i
= self
.xics_ics
.int_level_i
200 # add GPIO peripheral?
201 self
.gpio
= hasattr(pspec
, "gpio") and pspec
.gpio
== True
203 self
.simple_gpio
= SimpleGPIO()
204 self
.gpio_o
= self
.simple_gpio
.gpio_o
206 # main instruction core. suitable for prototyping / demo only
207 self
.core
= core
= NonProductionCore(pspec
)
209 # instruction decoder. goes into Trap Record
210 pdecode
= create_pdecode()
211 self
.cur_state
= CoreState("cur") # current state (MSR/PC/SVSTATE)
212 self
.pdecode2
= PowerDecode2(pdecode
, state
=self
.cur_state
,
213 opkls
=IssuerDecode2ToOperand
,
214 svp64_en
=self
.svp64_en
,
215 regreduce_en
=self
.regreduce_en
)
217 self
.svp64
= SVP64PrefixDecoder() # for decoding SVP64 prefix
219 # Test Instruction memory
220 self
.imem
= ConfigFetchUnit(pspec
).fu
223 self
.dbg
= CoreDebug()
225 # instruction go/monitor
226 self
.pc_o
= Signal(64, reset_less
=True)
227 self
.pc_i
= Data(64, "pc_i") # set "ok" to indicate "please change me"
228 self
.svstate_i
= Data(32, "svstate_i") # ditto
229 self
.core_bigendian_i
= Signal() # TODO: set based on MSR.LE
230 self
.busy_o
= Signal(reset_less
=True)
231 self
.memerr_o
= Signal(reset_less
=True)
233 # STATE regfile read /write ports for PC, MSR, SVSTATE
234 staterf
= self
.core
.regs
.rf
['state']
235 self
.state_r_pc
= staterf
.r_ports
['cia'] # PC rd
236 self
.state_w_pc
= staterf
.w_ports
['d_wr1'] # PC wr
237 self
.state_r_msr
= staterf
.r_ports
['msr'] # MSR rd
238 self
.state_r_sv
= staterf
.r_ports
['sv'] # SVSTATE rd
239 self
.state_w_sv
= staterf
.w_ports
['sv'] # SVSTATE wr
241 # DMI interface access
242 intrf
= self
.core
.regs
.rf
['int']
243 crrf
= self
.core
.regs
.rf
['cr']
244 xerrf
= self
.core
.regs
.rf
['xer']
245 self
.int_r
= intrf
.r_ports
['dmi'] # INT read
246 self
.cr_r
= crrf
.r_ports
['full_cr_dbg'] # CR read
247 self
.xer_r
= xerrf
.r_ports
['full_xer'] # XER read
251 self
.int_pred
= intrf
.r_ports
['pred'] # INT predicate read
252 self
.cr_pred
= crrf
.r_ports
['cr_pred'] # CR predicate read
254 # hack method of keeping an eye on whether branch/trap set the PC
255 self
.state_nia
= self
.core
.regs
.rf
['state'].w_ports
['nia']
256 self
.state_nia
.wen
.name
= 'state_nia_wen'
258 # pulse to synchronize the simulator at instruction end
259 self
.insn_done
= Signal()
262 # store copies of predicate masks
263 self
.srcmask
= Signal(64)
264 self
.dstmask
= Signal(64)
266 def fetch_fsm(self
, m
, core
, pc
, svstate
, nia
, is_svp64_mode
,
267 fetch_pc_ready_o
, fetch_pc_valid_i
,
268 fetch_insn_valid_o
, fetch_insn_ready_i
):
271 this FSM performs fetch of raw instruction data, partial-decodes
272 it 32-bit at a time to detect SVP64 prefixes, and will optionally
273 read a 2nd 32-bit quantity if that occurs.
277 pdecode2
= self
.pdecode2
278 cur_state
= self
.cur_state
279 dec_opcode_i
= pdecode2
.dec
.raw_opcode_in
# raw opcode
281 msr_read
= Signal(reset
=1)
283 with m
.FSM(name
='fetch_fsm'):
286 with m
.State("IDLE"):
287 comb
+= fetch_pc_ready_o
.eq(1)
288 with m
.If(fetch_pc_valid_i
):
289 # instruction allowed to go: start by reading the PC
290 # capture the PC and also drop it into Insn Memory
291 # we have joined a pair of combinatorial memory
292 # lookups together. this is Generally Bad.
293 comb
+= self
.imem
.a_pc_i
.eq(pc
)
294 comb
+= self
.imem
.a_valid_i
.eq(1)
295 comb
+= self
.imem
.f_valid_i
.eq(1)
296 sync
+= cur_state
.pc
.eq(pc
)
297 sync
+= cur_state
.svstate
.eq(svstate
) # and svstate
299 # initiate read of MSR. arrives one clock later
300 comb
+= self
.state_r_msr
.ren
.eq(1 << StateRegs
.MSR
)
301 sync
+= msr_read
.eq(0)
303 m
.next
= "INSN_READ" # move to "wait for bus" phase
305 # dummy pause to find out why simulation is not keeping up
306 with m
.State("INSN_READ"):
307 # one cycle later, msr/sv read arrives. valid only once.
308 with m
.If(~msr_read
):
309 sync
+= msr_read
.eq(1) # yeah don't read it again
310 sync
+= cur_state
.msr
.eq(self
.state_r_msr
.data_o
)
311 with m
.If(self
.imem
.f_busy_o
): # zzz...
312 # busy: stay in wait-read
313 comb
+= self
.imem
.a_valid_i
.eq(1)
314 comb
+= self
.imem
.f_valid_i
.eq(1)
316 # not busy: instruction fetched
317 insn
= get_insn(self
.imem
.f_instr_o
, cur_state
.pc
)
320 # decode the SVP64 prefix, if any
321 comb
+= svp64
.raw_opcode_in
.eq(insn
)
322 comb
+= svp64
.bigendian
.eq(self
.core_bigendian_i
)
323 # pass the decoded prefix (if any) to PowerDecoder2
324 sync
+= pdecode2
.sv_rm
.eq(svp64
.svp64_rm
)
325 # remember whether this is a prefixed instruction, so
326 # the FSM can readily loop when VL==0
327 sync
+= is_svp64_mode
.eq(svp64
.is_svp64_mode
)
328 # calculate the address of the following instruction
329 insn_size
= Mux(svp64
.is_svp64_mode
, 8, 4)
330 sync
+= nia
.eq(cur_state
.pc
+ insn_size
)
331 with m
.If(~svp64
.is_svp64_mode
):
332 # with no prefix, store the instruction
333 # and hand it directly to the next FSM
334 sync
+= dec_opcode_i
.eq(insn
)
335 m
.next
= "INSN_READY"
337 # fetch the rest of the instruction from memory
338 comb
+= self
.imem
.a_pc_i
.eq(cur_state
.pc
+ 4)
339 comb
+= self
.imem
.a_valid_i
.eq(1)
340 comb
+= self
.imem
.f_valid_i
.eq(1)
341 m
.next
= "INSN_READ2"
343 # not SVP64 - 32-bit only
344 sync
+= nia
.eq(cur_state
.pc
+ 4)
345 sync
+= dec_opcode_i
.eq(insn
)
346 m
.next
= "INSN_READY"
348 with m
.State("INSN_READ2"):
349 with m
.If(self
.imem
.f_busy_o
): # zzz...
350 # busy: stay in wait-read
351 comb
+= self
.imem
.a_valid_i
.eq(1)
352 comb
+= self
.imem
.f_valid_i
.eq(1)
354 # not busy: instruction fetched
355 insn
= get_insn(self
.imem
.f_instr_o
, cur_state
.pc
+4)
356 sync
+= dec_opcode_i
.eq(insn
)
357 m
.next
= "INSN_READY"
358 # TODO: probably can start looking at pdecode2.rm_dec
359 # here or maybe even in INSN_READ state, if svp64_mode
360 # detected, in order to trigger - and wait for - the
363 pmode
= pdecode2
.rm_dec
.predmode
365 if pmode != SVP64PredMode.ALWAYS.value:
366 fire predicate loading FSM and wait before
369 sync += self.srcmask.eq(-1) # set to all 1s
370 sync += self.dstmask.eq(-1) # set to all 1s
371 m.next = "INSN_READY"
374 with m
.State("INSN_READY"):
375 # hand over the instruction, to be decoded
376 comb
+= fetch_insn_valid_o
.eq(1)
377 with m
.If(fetch_insn_ready_i
):
380 def fetch_predicate_fsm(self
, m
,
381 pred_insn_valid_i
, pred_insn_ready_o
,
382 pred_mask_valid_o
, pred_mask_ready_i
):
383 """fetch_predicate_fsm - obtains (constructs in the case of CR)
384 src/dest predicate masks
386 https://bugs.libre-soc.org/show_bug.cgi?id=617
387 the predicates can be read here, by using IntRegs r_ports['pred']
388 or CRRegs r_ports['pred']. in the case of CRs it will have to
389 be done through multiple reads, extracting one relevant at a time.
390 later, a faster way would be to use the 32-bit-wide CR port but
391 this is more complex decoding, here. equivalent code used in
392 ISACaller is "from soc.decoder.isa.caller import get_predcr"
394 note: this ENTIRE FSM is not to be called when svp64 is disabled
398 pdecode2
= self
.pdecode2
399 rm_dec
= pdecode2
.rm_dec
# SVP64RMModeDecode
400 predmode
= rm_dec
.predmode
401 srcpred
, dstpred
= rm_dec
.srcpred
, rm_dec
.dstpred
402 cr_pred
, int_pred
= self
.cr_pred
, self
.int_pred
# read regfiles
404 # elif predmode == CR:
405 # CR-src sidx, sinvert = get_predcr(m, srcpred)
406 # CR-dst didx, dinvert = get_predcr(m, dstpred)
407 # TODO read CR-src and CR-dst into self.srcmask+dstmask with loop
408 # has to cope with first one then the other
409 # for cr_idx = FSM-state-loop(0..VL-1):
410 # FSM-state-trigger-CR-read:
411 # cr_ren = (1<<7-(cr_idx+SVP64CROffs.CRPred))
412 # comb += cr_pred.ren.eq(cr_ren)
413 # FSM-state-1-clock-later-actual-Read:
414 # cr_field = Signal(4)
416 # # read the CR field, select the appropriate bit
417 # comb += cr_field.eq(cr_pred.data_o)
418 # comb += cr_bit.eq(cr_field.bit_select(idx)))
419 # # just like in branch BO tests
420 # comd += self.srcmask[cr_idx].eq(inv ^ cr_bit)
423 sregread
, sinvert
, sunary
, sall1s
= get_predint(m
, srcpred
, 's')
424 dregread
, dinvert
, dunary
, dall1s
= get_predint(m
, dstpred
, 'd')
425 sidx
, scrinvert
= get_predcr(m
, srcpred
, 's')
426 didx
, dcrinvert
= get_predcr(m
, dstpred
, 'd')
428 with m
.FSM(name
="fetch_predicate"):
430 with m
.State("FETCH_PRED_IDLE"):
431 comb
+= pred_insn_ready_o
.eq(1)
432 with m
.If(pred_insn_valid_i
):
433 with m
.If(predmode
== SVP64PredMode
.INT
):
434 # skip fetching destination mask register, when zero
436 sync
+= self
.dstmask
.eq(-1)
437 # directly go to fetch source mask register
438 # guaranteed not to be zero (otherwise predmode
439 # would be SVP64PredMode.ALWAYS, not INT)
440 comb
+= int_pred
.addr
.eq(sregread
)
441 comb
+= int_pred
.ren
.eq(1)
442 m
.next
= "INT_SRC_READ"
443 # fetch destination predicate register
445 comb
+= int_pred
.addr
.eq(dregread
)
446 comb
+= int_pred
.ren
.eq(1)
447 m
.next
= "INT_DST_READ"
449 sync
+= self
.srcmask
.eq(-1)
450 sync
+= self
.dstmask
.eq(-1)
451 m
.next
= "FETCH_PRED_DONE"
453 with m
.State("INT_DST_READ"):
454 # store destination mask
455 inv
= Repl(dinvert
, 64)
456 sync
+= self
.dstmask
.eq(self
.int_pred
.data_o ^ inv
)
457 # skip fetching source mask register, when zero
459 sync
+= self
.srcmask
.eq(-1)
460 m
.next
= "FETCH_PRED_DONE"
461 # fetch source predicate register
463 comb
+= int_pred
.addr
.eq(sregread
)
464 comb
+= int_pred
.ren
.eq(1)
465 m
.next
= "INT_SRC_READ"
467 with m
.State("INT_SRC_READ"):
469 inv
= Repl(sinvert
, 64)
470 sync
+= self
.srcmask
.eq(self
.int_pred
.data_o ^ inv
)
471 m
.next
= "FETCH_PRED_DONE"
473 with m
.State("FETCH_PRED_DONE"):
474 comb
+= pred_mask_valid_o
.eq(1)
475 with m
.If(pred_mask_ready_i
):
476 m
.next
= "FETCH_PRED_IDLE"
478 def issue_fsm(self
, m
, core
, pc_changed
, sv_changed
, nia
,
479 dbg
, core_rst
, is_svp64_mode
,
480 fetch_pc_ready_o
, fetch_pc_valid_i
,
481 fetch_insn_valid_o
, fetch_insn_ready_i
,
482 pred_insn_valid_i
, pred_insn_ready_o
,
483 pred_mask_valid_o
, pred_mask_ready_i
,
484 exec_insn_valid_i
, exec_insn_ready_o
,
485 exec_pc_valid_o
, exec_pc_ready_i
):
488 decode / issue FSM. this interacts with the "fetch" FSM
489 through fetch_insn_ready/valid (incoming) and fetch_pc_ready/valid
490 (outgoing). also interacts with the "execute" FSM
491 through exec_insn_ready/valid (outgoing) and exec_pc_ready/valid
493 SVP64 RM prefixes have already been set up by the
494 "fetch" phase, so execute is fairly straightforward.
499 pdecode2
= self
.pdecode2
500 cur_state
= self
.cur_state
503 dec_opcode_i
= pdecode2
.dec
.raw_opcode_in
# raw opcode
505 # for updating svstate (things like srcstep etc.)
506 update_svstate
= Signal() # set this (below) if updating
507 new_svstate
= SVSTATERec("new_svstate")
508 comb
+= new_svstate
.eq(cur_state
.svstate
)
510 # precalculate srcstep+1 and dststep+1
511 cur_srcstep
= cur_state
.svstate
.srcstep
512 cur_dststep
= cur_state
.svstate
.dststep
513 next_srcstep
= Signal
.like(cur_srcstep
)
514 next_dststep
= Signal
.like(cur_dststep
)
515 comb
+= next_srcstep
.eq(cur_state
.svstate
.srcstep
+1)
516 comb
+= next_dststep
.eq(cur_state
.svstate
.dststep
+1)
518 with m
.FSM(name
="issue_fsm"):
520 # sync with the "fetch" phase which is reading the instruction
521 # at this point, there is no instruction running, that
522 # could inadvertently update the PC.
523 with m
.State("ISSUE_START"):
524 # wait on "core stop" release, before next fetch
525 # need to do this here, in case we are in a VL==0 loop
526 with m
.If(~dbg
.core_stop_o
& ~core_rst
):
527 comb
+= fetch_pc_valid_i
.eq(1) # tell fetch to start
528 with m
.If(fetch_pc_ready_o
): # fetch acknowledged us
531 # tell core it's stopped, and acknowledge debug handshake
532 comb
+= core
.core_stopped_i
.eq(1)
533 comb
+= dbg
.core_stopped_i
.eq(1)
534 # while stopped, allow updating the PC and SVSTATE
535 with m
.If(self
.pc_i
.ok
):
536 comb
+= self
.state_w_pc
.wen
.eq(1 << StateRegs
.PC
)
537 comb
+= self
.state_w_pc
.data_i
.eq(self
.pc_i
.data
)
538 sync
+= pc_changed
.eq(1)
539 with m
.If(self
.svstate_i
.ok
):
540 comb
+= new_svstate
.eq(self
.svstate_i
.data
)
541 comb
+= update_svstate
.eq(1)
542 sync
+= sv_changed
.eq(1)
544 # decode the instruction when it arrives
545 with m
.State("INSN_WAIT"):
546 comb
+= fetch_insn_ready_i
.eq(1)
547 with m
.If(fetch_insn_valid_o
):
548 # decode the instruction
549 sync
+= core
.e
.eq(pdecode2
.e
)
550 sync
+= core
.state
.eq(cur_state
)
551 sync
+= core
.raw_insn_i
.eq(dec_opcode_i
)
552 sync
+= core
.bigendian_i
.eq(self
.core_bigendian_i
)
553 # set RA_OR_ZERO detection in satellite decoders
554 sync
+= core
.sv_a_nz
.eq(pdecode2
.sv_a_nz
)
555 # loop into ISSUE_START if it's a SVP64 instruction
556 # and VL == 0. this because VL==0 is a for-loop
557 # from 0 to 0 i.e. always, always a NOP.
558 cur_vl
= cur_state
.svstate
.vl
559 with m
.If(is_svp64_mode
& (cur_vl
== 0)):
560 # update the PC before fetching the next instruction
561 # since we are in a VL==0 loop, no instruction was
562 # executed that we could be overwriting
563 comb
+= self
.state_w_pc
.wen
.eq(1 << StateRegs
.PC
)
564 comb
+= self
.state_w_pc
.data_i
.eq(nia
)
565 comb
+= self
.insn_done
.eq(1)
566 m
.next
= "ISSUE_START"
569 m
.next
= "PRED_START" # start fetching predicate
571 m
.next
= "INSN_EXECUTE" # skip predication
573 with m
.State("PRED_START"):
574 comb
+= pred_insn_valid_i
.eq(1) # tell fetch_pred to start
575 with m
.If(pred_insn_ready_o
): # fetch_pred acknowledged us
578 with m
.State("MASK_WAIT"):
579 comb
+= pred_mask_ready_i
.eq(1) # ready to receive the masks
580 with m
.If(pred_mask_valid_o
): # predication masks are ready
581 # with m.If(is_svp64_mode):
582 # TODO advance src/dst step to "skip" over predicated-out
583 # from self.srcmask and self.dstmask
584 # https://bugs.libre-soc.org/show_bug.cgi?id=617#c3
585 # but still without exceeding VL in either case
586 # IMPORTANT: when changing src/dest step, have to
587 # jump to m.next = "DECODE_SV" to deal with the change in
590 with m
.If(is_svp64_mode
):
592 pred_src_zero
= pdecode2
.rm_dec
.pred_sz
593 pred_dst_zero
= pdecode2
.rm_dec
.pred_dz
596 TODO: actually, can use
597 PriorityEncoder(self.srcmask | (1<<cur_srcstep))
599 if not pred_src_zero:
600 if (((1<<cur_srcstep) & self.srcmask) == 0) and
602 comb += update_svstate.eq(1)
603 comb += new_svstate.srcstep.eq(next_srcstep)
605 if not pred_dst_zero:
606 if (((1<<cur_dststep) & self.dstmask) == 0) and
608 comb += new_svstate.dststep.eq(next_dststep)
609 comb += update_svstate.eq(1)
615 m
.next
= "INSN_EXECUTE"
617 # handshake with execution FSM, move to "wait" once acknowledged
618 with m
.State("INSN_EXECUTE"):
619 comb
+= exec_insn_valid_i
.eq(1) # trigger execute
620 with m
.If(exec_insn_ready_o
): # execute acknowledged us
621 m
.next
= "EXECUTE_WAIT"
623 with m
.State("EXECUTE_WAIT"):
624 # wait on "core stop" release, at instruction end
625 # need to do this here, in case we are in a VL>1 loop
626 with m
.If(~dbg
.core_stop_o
& ~core_rst
):
627 comb
+= exec_pc_ready_i
.eq(1)
628 with m
.If(exec_pc_valid_o
):
630 # was this the last loop iteration?
632 cur_vl
= cur_state
.svstate
.vl
633 comb
+= is_last
.eq(next_srcstep
== cur_vl
)
635 # if either PC or SVSTATE were changed by the previous
636 # instruction, go directly back to Fetch, without
637 # updating either PC or SVSTATE
638 with m
.If(pc_changed | sv_changed
):
639 m
.next
= "ISSUE_START"
641 # also return to Fetch, when no output was a vector
642 # (regardless of SRCSTEP and VL), or when the last
643 # instruction was really the last one of the VL loop
644 with m
.Elif((~pdecode2
.loop_continue
) | is_last
):
645 # before going back to fetch, update the PC state
646 # register with the NIA.
647 # ok here we are not reading the branch unit.
648 # TODO: this just blithely overwrites whatever
649 # pipeline updated the PC
650 comb
+= self
.state_w_pc
.wen
.eq(1 << StateRegs
.PC
)
651 comb
+= self
.state_w_pc
.data_i
.eq(nia
)
652 # reset SRCSTEP before returning to Fetch
653 with m
.If(pdecode2
.loop_continue
):
654 comb
+= new_svstate
.srcstep
.eq(0)
655 comb
+= new_svstate
.dststep
.eq(0)
656 comb
+= update_svstate
.eq(1)
657 m
.next
= "ISSUE_START"
659 # returning to Execute? then, first update SRCSTEP
661 comb
+= new_svstate
.srcstep
.eq(next_srcstep
)
662 comb
+= new_svstate
.dststep
.eq(next_dststep
)
663 comb
+= update_svstate
.eq(1)
667 comb
+= core
.core_stopped_i
.eq(1)
668 comb
+= dbg
.core_stopped_i
.eq(1)
669 # while stopped, allow updating the PC and SVSTATE
670 with m
.If(self
.pc_i
.ok
):
671 comb
+= self
.state_w_pc
.wen
.eq(1 << StateRegs
.PC
)
672 comb
+= self
.state_w_pc
.data_i
.eq(self
.pc_i
.data
)
673 sync
+= pc_changed
.eq(1)
674 with m
.If(self
.svstate_i
.ok
):
675 comb
+= new_svstate
.eq(self
.svstate_i
.data
)
676 comb
+= update_svstate
.eq(1)
677 sync
+= sv_changed
.eq(1)
679 # need to decode the instruction again, after updating SRCSTEP
680 # in the previous state.
681 # mostly a copy of INSN_WAIT, but without the actual wait
682 with m
.State("DECODE_SV"):
683 # decode the instruction
684 sync
+= core
.e
.eq(pdecode2
.e
)
685 sync
+= core
.state
.eq(cur_state
)
686 sync
+= core
.bigendian_i
.eq(self
.core_bigendian_i
)
687 sync
+= core
.sv_a_nz
.eq(pdecode2
.sv_a_nz
)
688 m
.next
= "INSN_EXECUTE" # move to "execute"
690 # check if svstate needs updating: if so, write it to State Regfile
691 with m
.If(update_svstate
):
692 comb
+= self
.state_w_sv
.wen
.eq(1<<StateRegs
.SVSTATE
)
693 comb
+= self
.state_w_sv
.data_i
.eq(new_svstate
)
694 sync
+= cur_state
.svstate
.eq(new_svstate
) # for next clock
696 def execute_fsm(self
, m
, core
, pc_changed
, sv_changed
,
697 exec_insn_valid_i
, exec_insn_ready_o
,
698 exec_pc_valid_o
, exec_pc_ready_i
):
701 execute FSM. this interacts with the "issue" FSM
702 through exec_insn_ready/valid (incoming) and exec_pc_ready/valid
703 (outgoing). SVP64 RM prefixes have already been set up by the
704 "issue" phase, so execute is fairly straightforward.
709 pdecode2
= self
.pdecode2
712 core_busy_o
= core
.busy_o
# core is busy
713 core_ivalid_i
= core
.ivalid_i
# instruction is valid
714 core_issue_i
= core
.issue_i
# instruction is issued
715 insn_type
= core
.e
.do
.insn_type
# instruction MicroOp type
717 with m
.FSM(name
="exec_fsm"):
719 # waiting for instruction bus (stays there until not busy)
720 with m
.State("INSN_START"):
721 comb
+= exec_insn_ready_o
.eq(1)
722 with m
.If(exec_insn_valid_i
):
723 comb
+= core_ivalid_i
.eq(1) # instruction is valid
724 comb
+= core_issue_i
.eq(1) # and issued
725 sync
+= sv_changed
.eq(0)
726 sync
+= pc_changed
.eq(0)
727 m
.next
= "INSN_ACTIVE" # move to "wait completion"
729 # instruction started: must wait till it finishes
730 with m
.State("INSN_ACTIVE"):
731 with m
.If(insn_type
!= MicrOp
.OP_NOP
):
732 comb
+= core_ivalid_i
.eq(1) # instruction is valid
733 # note changes to PC and SVSTATE
734 with m
.If(self
.state_nia
.wen
& (1<<StateRegs
.SVSTATE
)):
735 sync
+= sv_changed
.eq(1)
736 with m
.If(self
.state_nia
.wen
& (1<<StateRegs
.PC
)):
737 sync
+= pc_changed
.eq(1)
738 with m
.If(~core_busy_o
): # instruction done!
739 comb
+= exec_pc_valid_o
.eq(1)
740 with m
.If(exec_pc_ready_i
):
741 comb
+= self
.insn_done
.eq(1)
742 m
.next
= "INSN_START" # back to fetch
744 def setup_peripherals(self
, m
):
745 comb
, sync
= m
.d
.comb
, m
.d
.sync
747 m
.submodules
.core
= core
= DomainRenamer("coresync")(self
.core
)
748 m
.submodules
.imem
= imem
= self
.imem
749 m
.submodules
.dbg
= dbg
= self
.dbg
751 m
.submodules
.jtag
= jtag
= self
.jtag
752 # TODO: UART2GDB mux, here, from external pin
753 # see https://bugs.libre-soc.org/show_bug.cgi?id=499
754 sync
+= dbg
.dmi
.connect_to(jtag
.dmi
)
756 cur_state
= self
.cur_state
758 # 4x 4k SRAM blocks. these simply "exist", they get routed in litex
760 for i
, sram
in enumerate(self
.sram4k
):
761 m
.submodules
["sram4k_%d" % i
] = sram
762 comb
+= sram
.enable
.eq(self
.wb_sram_en
)
764 # XICS interrupt handler
766 m
.submodules
.xics_icp
= icp
= self
.xics_icp
767 m
.submodules
.xics_ics
= ics
= self
.xics_ics
768 comb
+= icp
.ics_i
.eq(ics
.icp_o
) # connect ICS to ICP
769 sync
+= cur_state
.eint
.eq(icp
.core_irq_o
) # connect ICP to core
771 # GPIO test peripheral
773 m
.submodules
.simple_gpio
= simple_gpio
= self
.simple_gpio
775 # connect one GPIO output to ICS bit 15 (like in microwatt soc.vhdl)
776 # XXX causes litex ECP5 test to get wrong idea about input and output
777 # (but works with verilator sim *sigh*)
778 #if self.gpio and self.xics:
779 # comb += self.int_level_i[15].eq(simple_gpio.gpio_o[0])
781 # instruction decoder
782 pdecode
= create_pdecode()
783 m
.submodules
.dec2
= pdecode2
= self
.pdecode2
785 m
.submodules
.svp64
= svp64
= self
.svp64
788 dmi
, d_reg
, d_cr
, d_xer
, = dbg
.dmi
, dbg
.d_gpr
, dbg
.d_cr
, dbg
.d_xer
789 intrf
= self
.core
.regs
.rf
['int']
791 # clock delay power-on reset
792 cd_por
= ClockDomain(reset_less
=True)
793 cd_sync
= ClockDomain()
794 core_sync
= ClockDomain("coresync")
795 m
.domains
+= cd_por
, cd_sync
, core_sync
797 ti_rst
= Signal(reset_less
=True)
798 delay
= Signal(range(4), reset
=3)
799 with m
.If(delay
!= 0):
800 m
.d
.por
+= delay
.eq(delay
- 1)
801 comb
+= cd_por
.clk
.eq(ClockSignal())
803 # power-on reset delay
804 core_rst
= ResetSignal("coresync")
805 comb
+= ti_rst
.eq(delay
!= 0 | dbg
.core_rst_o |
ResetSignal())
806 comb
+= core_rst
.eq(ti_rst
)
808 # busy/halted signals from core
809 comb
+= self
.busy_o
.eq(core
.busy_o
)
810 comb
+= pdecode2
.dec
.bigendian
.eq(self
.core_bigendian_i
)
812 # temporary hack: says "go" immediately for both address gen and ST
814 ldst
= core
.fus
.fus
['ldst0']
815 st_go_edge
= rising_edge(m
, ldst
.st
.rel_o
)
816 m
.d
.comb
+= ldst
.ad
.go_i
.eq(ldst
.ad
.rel_o
) # link addr-go direct to rel
817 m
.d
.comb
+= ldst
.st
.go_i
.eq(st_go_edge
) # link store-go to rising rel
821 def elaborate(self
, platform
):
824 comb
, sync
= m
.d
.comb
, m
.d
.sync
825 cur_state
= self
.cur_state
826 pdecode2
= self
.pdecode2
830 # set up peripherals and core
831 core_rst
= self
.setup_peripherals(m
)
833 # PC and instruction from I-Memory
834 comb
+= self
.pc_o
.eq(cur_state
.pc
)
835 pc_changed
= Signal() # note write to PC
836 sv_changed
= Signal() # note write to SVSTATE
838 # read state either from incoming override or from regfile
839 # TODO: really should be doing MSR in the same way
840 pc
= state_get(m
, self
.pc_i
, "pc", # read PC
841 self
.state_r_pc
, StateRegs
.PC
)
842 svstate
= state_get(m
, self
.svstate_i
, "svstate", # read SVSTATE
843 self
.state_r_sv
, StateRegs
.SVSTATE
)
845 # don't write pc every cycle
846 comb
+= self
.state_w_pc
.wen
.eq(0)
847 comb
+= self
.state_w_pc
.data_i
.eq(0)
849 # don't read msr every cycle
850 comb
+= self
.state_r_msr
.ren
.eq(0)
852 # address of the next instruction, in the absence of a branch
853 # depends on the instruction size
854 nia
= Signal(64, reset_less
=True)
856 # connect up debug signals
857 # TODO comb += core.icache_rst_i.eq(dbg.icache_rst_o)
858 comb
+= dbg
.terminate_i
.eq(core
.core_terminate_o
)
859 comb
+= dbg
.state
.pc
.eq(pc
)
860 comb
+= dbg
.state
.svstate
.eq(svstate
)
861 comb
+= dbg
.state
.msr
.eq(cur_state
.msr
)
863 # pass the prefix mode from Fetch to Issue, so the latter can loop
865 is_svp64_mode
= Signal()
867 # there are *THREE* FSMs, fetch (32/64-bit) issue, decode/execute.
868 # these are the handshake signals between fetch and decode/execute
870 # fetch FSM can run as soon as the PC is valid
871 fetch_pc_valid_i
= Signal() # Execute tells Fetch "start next read"
872 fetch_pc_ready_o
= Signal() # Fetch Tells SVSTATE "proceed"
874 # fetch FSM hands over the instruction to be decoded / issued
875 fetch_insn_valid_o
= Signal()
876 fetch_insn_ready_i
= Signal()
878 # predicate fetch FSM decodes and fetches the predicate
879 pred_insn_valid_i
= Signal()
880 pred_insn_ready_o
= Signal()
882 # predicate fetch FSM delivers the masks
883 pred_mask_valid_o
= Signal()
884 pred_mask_ready_i
= Signal()
886 # issue FSM delivers the instruction to the be executed
887 exec_insn_valid_i
= Signal()
888 exec_insn_ready_o
= Signal()
890 # execute FSM, hands over the PC/SVSTATE back to the issue FSM
891 exec_pc_valid_o
= Signal()
892 exec_pc_ready_i
= Signal()
894 # the FSMs here are perhaps unusual in that they detect conditions
895 # then "hold" information, combinatorially, for the core
896 # (as opposed to using sync - which would be on a clock's delay)
897 # this includes the actual opcode, valid flags and so on.
899 # Fetch, then predicate fetch, then Issue, then Execute.
900 # Issue is where the VL for-loop # lives. the ready/valid
901 # signalling is used to communicate between the four.
903 self
.fetch_fsm(m
, core
, pc
, svstate
, nia
, is_svp64_mode
,
904 fetch_pc_ready_o
, fetch_pc_valid_i
,
905 fetch_insn_valid_o
, fetch_insn_ready_i
)
907 self
.issue_fsm(m
, core
, pc_changed
, sv_changed
, nia
,
908 dbg
, core_rst
, is_svp64_mode
,
909 fetch_pc_ready_o
, fetch_pc_valid_i
,
910 fetch_insn_valid_o
, fetch_insn_ready_i
,
911 pred_insn_valid_i
, pred_insn_ready_o
,
912 pred_mask_valid_o
, pred_mask_ready_i
,
913 exec_insn_valid_i
, exec_insn_ready_o
,
914 exec_pc_valid_o
, exec_pc_ready_i
)
917 self
.fetch_predicate_fsm(m
,
918 pred_insn_valid_i
, pred_insn_ready_o
,
919 pred_mask_valid_o
, pred_mask_ready_i
)
921 self
.execute_fsm(m
, core
, pc_changed
, sv_changed
,
922 exec_insn_valid_i
, exec_insn_ready_o
,
923 exec_pc_valid_o
, exec_pc_ready_i
)
925 # this bit doesn't have to be in the FSM: connect up to read
926 # regfiles on demand from DMI
929 # DEC and TB inc/dec FSM. copy of DEC is put into CoreState,
930 # (which uses that in PowerDecoder2 to raise 0x900 exception)
931 self
.tb_dec_fsm(m
, cur_state
.dec
)
935 def do_dmi(self
, m
, dbg
):
936 """deals with DMI debug requests
938 currently only provides read requests for the INT regfile, CR and XER
939 it will later also deal with *writing* to these regfiles.
943 dmi
, d_reg
, d_cr
, d_xer
, = dbg
.dmi
, dbg
.d_gpr
, dbg
.d_cr
, dbg
.d_xer
944 intrf
= self
.core
.regs
.rf
['int']
946 with m
.If(d_reg
.req
): # request for regfile access being made
947 # TODO: error-check this
948 # XXX should this be combinatorial? sync better?
950 comb
+= self
.int_r
.ren
.eq(1<<d_reg
.addr
)
952 comb
+= self
.int_r
.addr
.eq(d_reg
.addr
)
953 comb
+= self
.int_r
.ren
.eq(1)
954 d_reg_delay
= Signal()
955 sync
+= d_reg_delay
.eq(d_reg
.req
)
956 with m
.If(d_reg_delay
):
957 # data arrives one clock later
958 comb
+= d_reg
.data
.eq(self
.int_r
.data_o
)
959 comb
+= d_reg
.ack
.eq(1)
961 # sigh same thing for CR debug
962 with m
.If(d_cr
.req
): # request for regfile access being made
963 comb
+= self
.cr_r
.ren
.eq(0b11111111) # enable all
964 d_cr_delay
= Signal()
965 sync
+= d_cr_delay
.eq(d_cr
.req
)
966 with m
.If(d_cr_delay
):
967 # data arrives one clock later
968 comb
+= d_cr
.data
.eq(self
.cr_r
.data_o
)
969 comb
+= d_cr
.ack
.eq(1)
972 with m
.If(d_xer
.req
): # request for regfile access being made
973 comb
+= self
.xer_r
.ren
.eq(0b111111) # enable all
974 d_xer_delay
= Signal()
975 sync
+= d_xer_delay
.eq(d_xer
.req
)
976 with m
.If(d_xer_delay
):
977 # data arrives one clock later
978 comb
+= d_xer
.data
.eq(self
.xer_r
.data_o
)
979 comb
+= d_xer
.ack
.eq(1)
981 def tb_dec_fsm(self
, m
, spr_dec
):
984 this is a FSM for updating either dec or tb. it runs alternately
985 DEC, TB, DEC, TB. note that SPR pipeline could have written a new
986 value to DEC, however the regfile has "passthrough" on it so this
989 see v3.0B p1097-1099 for Timeer Resource and p1065 and p1076
992 comb
, sync
= m
.d
.comb
, m
.d
.sync
993 fast_rf
= self
.core
.regs
.rf
['fast']
994 fast_r_dectb
= fast_rf
.r_ports
['issue'] # DEC/TB
995 fast_w_dectb
= fast_rf
.w_ports
['issue'] # DEC/TB
999 # initiates read of current DEC
1000 with m
.State("DEC_READ"):
1001 comb
+= fast_r_dectb
.addr
.eq(FastRegs
.DEC
)
1002 comb
+= fast_r_dectb
.ren
.eq(1)
1003 m
.next
= "DEC_WRITE"
1005 # waits for DEC read to arrive (1 cycle), updates with new value
1006 with m
.State("DEC_WRITE"):
1007 new_dec
= Signal(64)
1008 # TODO: MSR.LPCR 32-bit decrement mode
1009 comb
+= new_dec
.eq(fast_r_dectb
.data_o
- 1)
1010 comb
+= fast_w_dectb
.addr
.eq(FastRegs
.DEC
)
1011 comb
+= fast_w_dectb
.wen
.eq(1)
1012 comb
+= fast_w_dectb
.data_i
.eq(new_dec
)
1013 sync
+= spr_dec
.eq(new_dec
) # copy into cur_state for decoder
1016 # initiates read of current TB
1017 with m
.State("TB_READ"):
1018 comb
+= fast_r_dectb
.addr
.eq(FastRegs
.TB
)
1019 comb
+= fast_r_dectb
.ren
.eq(1)
1022 # waits for read TB to arrive, initiates write of current TB
1023 with m
.State("TB_WRITE"):
1025 comb
+= new_tb
.eq(fast_r_dectb
.data_o
+ 1)
1026 comb
+= fast_w_dectb
.addr
.eq(FastRegs
.TB
)
1027 comb
+= fast_w_dectb
.wen
.eq(1)
1028 comb
+= fast_w_dectb
.data_i
.eq(new_tb
)
1034 yield from self
.pc_i
.ports()
1037 yield from self
.core
.ports()
1038 yield from self
.imem
.ports()
1039 yield self
.core_bigendian_i
1045 def external_ports(self
):
1046 ports
= self
.pc_i
.ports()
1047 ports
+= [self
.pc_o
, self
.memerr_o
, self
.core_bigendian_i
, self
.busy_o
,
1051 ports
+= list(self
.jtag
.external_ports())
1053 # don't add DMI if JTAG is enabled
1054 ports
+= list(self
.dbg
.dmi
.ports())
1056 ports
+= list(self
.imem
.ibus
.fields
.values())
1057 ports
+= list(self
.core
.l0
.cmpi
.lsmem
.lsi
.slavebus
.fields
.values())
1060 for sram
in self
.sram4k
:
1061 ports
+= list(sram
.bus
.fields
.values())
1064 ports
+= list(self
.xics_icp
.bus
.fields
.values())
1065 ports
+= list(self
.xics_ics
.bus
.fields
.values())
1066 ports
.append(self
.int_level_i
)
1069 ports
+= list(self
.simple_gpio
.bus
.fields
.values())
1070 ports
.append(self
.gpio_o
)
1078 class TestIssuer(Elaboratable
):
1079 def __init__(self
, pspec
):
1080 self
.ti
= TestIssuerInternal(pspec
)
1082 self
.pll
= DummyPLL()
1084 # PLL direct clock or not
1085 self
.pll_en
= hasattr(pspec
, "use_pll") and pspec
.use_pll
1087 self
.pll_18_o
= Signal(reset_less
=True)
1089 def elaborate(self
, platform
):
1093 # TestIssuer runs at direct clock
1094 m
.submodules
.ti
= ti
= self
.ti
1095 cd_int
= ClockDomain("coresync")
1098 # ClockSelect runs at PLL output internal clock rate
1099 m
.submodules
.pll
= pll
= self
.pll
1101 # add clock domains from PLL
1102 cd_pll
= ClockDomain("pllclk")
1105 # PLL clock established. has the side-effect of running clklsel
1106 # at the PLL's speed (see DomainRenamer("pllclk") above)
1107 pllclk
= ClockSignal("pllclk")
1108 comb
+= pllclk
.eq(pll
.clk_pll_o
)
1110 # wire up external 24mhz to PLL
1111 comb
+= pll
.clk_24_i
.eq(ClockSignal())
1113 # output 18 mhz PLL test signal
1114 comb
+= self
.pll_18_o
.eq(pll
.pll_18_o
)
1116 # now wire up ResetSignals. don't mind them being in this domain
1117 pll_rst
= ResetSignal("pllclk")
1118 comb
+= pll_rst
.eq(ResetSignal())
1120 # internal clock is set to selector clock-out. has the side-effect of
1121 # running TestIssuer at this speed (see DomainRenamer("intclk") above)
1122 intclk
= ClockSignal("coresync")
1124 comb
+= intclk
.eq(pll
.clk_pll_o
)
1126 comb
+= intclk
.eq(ClockSignal())
1131 return list(self
.ti
.ports()) + list(self
.pll
.ports()) + \
1132 [ClockSignal(), ResetSignal()]
1134 def external_ports(self
):
1135 ports
= self
.ti
.external_ports()
1136 ports
.append(ClockSignal())
1137 ports
.append(ResetSignal())
1139 ports
.append(self
.pll
.clk_sel_i
)
1140 ports
.append(self
.pll_18_o
)
1141 ports
.append(self
.pll
.pll_lck_o
)
1145 if __name__
== '__main__':
1146 units
= {'alu': 1, 'cr': 1, 'branch': 1, 'trap': 1, 'logical': 1,
1152 pspec
= TestMemPspec(ldst_ifacetype
='bare_wb',
1153 imem_ifacetype
='bare_wb',
1158 dut
= TestIssuer(pspec
)
1159 vl
= main(dut
, ports
=dut
.ports(), name
="test_issuer")
1161 if len(sys
.argv
) == 1:
1162 vl
= rtlil
.convert(dut
, ports
=dut
.external_ports(), name
="test_issuer")
1163 with
open("test_issuer.il", "w") as f
: