Fix syntax
[soc.git] / src / soc / simple / issuer.py
1 """simple core issuer
2
3 not in any way intended for production use. this runs a FSM that:
4
5 * reads the Program Counter from StateRegs
6 * reads an instruction from a fixed-size Test Memory
7 * issues it to the Simple Core
8 * waits for it to complete
9 * increments the PC
10 * does it all over again
11
12 the purpose of this module is to verify the functional correctness
13 of the Function Units in the absolute simplest and clearest possible
14 way, and to at provide something that can be further incrementally
15 improved.
16 """
17
18 from nmigen import (Elaboratable, Module, Signal, ClockSignal, ResetSignal,
19 ClockDomain, DomainRenamer, Mux, Const)
20 from nmigen.cli import rtlil
21 from nmigen.cli import main
22 import sys
23
24 from soc.decoder.power_decoder import create_pdecode
25 from soc.decoder.power_decoder2 import PowerDecode2, SVP64PrefixDecoder
26 from soc.decoder.decode2execute1 import IssuerDecode2ToOperand
27 from soc.decoder.decode2execute1 import Data
28 from soc.experiment.testmem import TestMemory # test only for instructions
29 from soc.regfile.regfiles import StateRegs, FastRegs
30 from soc.simple.core import NonProductionCore
31 from soc.config.test.test_loadstore import TestMemPspec
32 from soc.config.ifetch import ConfigFetchUnit
33 from soc.decoder.power_enums import MicrOp, SVP64PredInt, SVP64PredCR
34 from soc.debug.dmi import CoreDebug, DMIInterface
35 from soc.debug.jtag import JTAG
36 from soc.config.pinouts import get_pinspecs
37 from soc.config.state import CoreState
38 from soc.interrupts.xics import XICS_ICP, XICS_ICS
39 from soc.bus.simple_gpio import SimpleGPIO
40 from soc.bus.SPBlock512W64B8W import SPBlock512W64B8W
41 from soc.clock.select import ClockSelect
42 from soc.clock.dummypll import DummyPLL
43 from soc.sv.svstate import SVSTATERec
44
45
46 from nmutil.util import rising_edge
47
48 def get_insn(f_instr_o, pc):
49 if f_instr_o.width == 32:
50 return f_instr_o
51 else:
52 # 64-bit: bit 2 of pc decides which word to select
53 return f_instr_o.word_select(pc[2], 32)
54
55 # gets state input or reads from state regfile
56 def state_get(m, state_i, name, regfile, regnum):
57 comb = m.d.comb
58 sync = m.d.sync
59 # read the PC
60 res = Signal(64, reset_less=True, name=name)
61 res_ok_delay = Signal(name="%s_ok_delay" % name)
62 sync += res_ok_delay.eq(~state_i.ok)
63 with m.If(state_i.ok):
64 # incoming override (start from pc_i)
65 comb += res.eq(state_i.data)
66 with m.Else():
67 # otherwise read StateRegs regfile for PC...
68 comb += regfile.ren.eq(1<<regnum)
69 # ... but on a 1-clock delay
70 with m.If(res_ok_delay):
71 comb += res.eq(regfile.data_o)
72 return res
73
74 def get_predint(m, mask):
75 """decode SVP64 predicate integer mask field to reg number and invert
76 this is identical to the equivalent function in ISACaller except that
77 it doesn't read the INT directly, it just decodes "what needs to be done"
78 i.e. which INT reg, whether it is shifted and whether it is bit-inverted.
79 """
80 comb = m.d.comb
81 regread = Signal(5)
82 invert = Signal()
83 unary = Signal()
84 with m.Switch(mask):
85 with m.Case(SVP64PredInt.ALWAYS):
86 comb += regread.eq(0)
87 comb += invert.eq(1)
88 with m.Case(SVP64PredInt.R3_UNARY):
89 comb += regread.eq(3)
90 comb += unary.eq(1)
91 with m.Case(SVP64PredInt.R3):
92 comb += regread.eq(3)
93 with m.Case(SVP64PredInt.R3_N):
94 comb += regread.eq(3)
95 comb += invert.eq(1)
96 with m.Case(SVP64PredInt.R10):
97 comb += regread.eq(10)
98 with m.Case(SVP64PredInt.R10_N):
99 comb += regread.eq(10)
100 comb += invert.eq(1)
101 with m.Case(SVP64PredInt.R30):
102 comb += regread.eq(30)
103 with m.Case(SVP64PredInt.R30_N):
104 comb += regread.eq(30)
105 comb += invert.eq(1)
106 return regread, invert, unary
107
108 def get_predcr(m, mask):
109 """decode SVP64 predicate CR to reg number field and invert status
110 this is identical to _get_predcr in ISACaller
111 """
112 comb = m.d.comb
113 idx = Signal(2)
114 invert = Signal()
115 with m.Switch(mask):
116 with m.Case(SVP64PredCR.LT):
117 comb += idx.eq(0)
118 comb += invert.eq(1)
119 with m.Case(SVP64PredCR.GE):
120 comb += idx.eq(0)
121 comb += invert.eq(0)
122 with m.Case(SVP64PredCR.GT):
123 comb += idx.eq(1)
124 comb += invert.eq(1)
125 with m.Case(SVP64PredCR.LE):
126 comb += idx.eq(1)
127 comb += invert.eq(0)
128 with m.Case(SVP64PredCR.EQ):
129 comb += idx.eq(2)
130 comb += invert.eq(1)
131 with m.Case(SVP64PredCR.NE):
132 comb += idx.eq(1)
133 comb += invert.eq(0)
134 with m.Case(SVP64PredCR.SO):
135 comb += idx.eq(3)
136 comb += invert.eq(1)
137 with m.Case(SVP64PredCR.NS):
138 comb += idx.eq(3)
139 comb += invert.eq(0)
140 return idx, invert
141
142
143 class TestIssuerInternal(Elaboratable):
144 """TestIssuer - reads instructions from TestMemory and issues them
145
146 efficiency and speed is not the main goal here: functional correctness
147 and code clarity is. optimisations (which almost 100% interfere with
148 easy understanding) come later.
149 """
150 def __init__(self, pspec):
151
152 # test is SVP64 is to be enabled
153 self.svp64_en = hasattr(pspec, "svp64") and (pspec.svp64 == True)
154
155 # JTAG interface. add this right at the start because if it's
156 # added it *modifies* the pspec, by adding enable/disable signals
157 # for parts of the rest of the core
158 self.jtag_en = hasattr(pspec, "debug") and pspec.debug == 'jtag'
159 if self.jtag_en:
160 subset = {'uart', 'mtwi', 'eint', 'gpio', 'mspi0', 'mspi1',
161 'pwm', 'sd0', 'sdr'}
162 self.jtag = JTAG(get_pinspecs(subset=subset))
163 # add signals to pspec to enable/disable icache and dcache
164 # (or data and intstruction wishbone if icache/dcache not included)
165 # https://bugs.libre-soc.org/show_bug.cgi?id=520
166 # TODO: do we actually care if these are not domain-synchronised?
167 # honestly probably not.
168 pspec.wb_icache_en = self.jtag.wb_icache_en
169 pspec.wb_dcache_en = self.jtag.wb_dcache_en
170 self.wb_sram_en = self.jtag.wb_sram_en
171 else:
172 self.wb_sram_en = Const(1)
173
174 # add 4k sram blocks?
175 self.sram4x4k = (hasattr(pspec, "sram4x4kblock") and
176 pspec.sram4x4kblock == True)
177 if self.sram4x4k:
178 self.sram4k = []
179 for i in range(4):
180 self.sram4k.append(SPBlock512W64B8W(name="sram4k_%d" % i,
181 features={'err'}))
182
183 # add interrupt controller?
184 self.xics = hasattr(pspec, "xics") and pspec.xics == True
185 if self.xics:
186 self.xics_icp = XICS_ICP()
187 self.xics_ics = XICS_ICS()
188 self.int_level_i = self.xics_ics.int_level_i
189
190 # add GPIO peripheral?
191 self.gpio = hasattr(pspec, "gpio") and pspec.gpio == True
192 if self.gpio:
193 self.simple_gpio = SimpleGPIO()
194 self.gpio_o = self.simple_gpio.gpio_o
195
196 # main instruction core. suitable for prototyping / demo only
197 self.core = core = NonProductionCore(pspec)
198
199 # instruction decoder. goes into Trap Record
200 pdecode = create_pdecode()
201 self.cur_state = CoreState("cur") # current state (MSR/PC/SVSTATE)
202 self.pdecode2 = PowerDecode2(pdecode, state=self.cur_state,
203 opkls=IssuerDecode2ToOperand,
204 svp64_en=self.svp64_en)
205 if self.svp64_en:
206 self.svp64 = SVP64PrefixDecoder() # for decoding SVP64 prefix
207
208 # Test Instruction memory
209 self.imem = ConfigFetchUnit(pspec).fu
210
211 # DMI interface
212 self.dbg = CoreDebug()
213
214 # instruction go/monitor
215 self.pc_o = Signal(64, reset_less=True)
216 self.pc_i = Data(64, "pc_i") # set "ok" to indicate "please change me"
217 self.svstate_i = Data(32, "svstate_i") # ditto
218 self.core_bigendian_i = Signal() # TODO: set based on MSR.LE
219 self.busy_o = Signal(reset_less=True)
220 self.memerr_o = Signal(reset_less=True)
221
222 # STATE regfile read /write ports for PC, MSR, SVSTATE
223 staterf = self.core.regs.rf['state']
224 self.state_r_pc = staterf.r_ports['cia'] # PC rd
225 self.state_w_pc = staterf.w_ports['d_wr1'] # PC wr
226 self.state_r_msr = staterf.r_ports['msr'] # MSR rd
227 self.state_r_sv = staterf.r_ports['sv'] # SVSTATE rd
228 self.state_w_sv = staterf.w_ports['sv'] # SVSTATE wr
229
230 # DMI interface access
231 intrf = self.core.regs.rf['int']
232 crrf = self.core.regs.rf['cr']
233 xerrf = self.core.regs.rf['xer']
234 self.int_r = intrf.r_ports['dmi'] # INT read
235 self.cr_r = crrf.r_ports['full_cr_dbg'] # CR read
236 self.xer_r = xerrf.r_ports['full_xer'] # XER read
237
238 # for predication
239 self.int_pred = intrf.r_ports['pred'] # INT predicate read
240 self.cr_pred = crrf.r_ports['cr_pred'] # CR predicate read
241
242 # hack method of keeping an eye on whether branch/trap set the PC
243 self.state_nia = self.core.regs.rf['state'].w_ports['nia']
244 self.state_nia.wen.name = 'state_nia_wen'
245
246 # pulse to synchronize the simulator at instruction end
247 self.insn_done = Signal()
248
249 if self.svp64_en:
250 # store copies of predicate masks
251 self.srcmask = Signal(64)
252 self.dstmask = Signal(64)
253
254 def fetch_fsm(self, m, core, pc, svstate, nia, is_svp64_mode,
255 fetch_pc_ready_o, fetch_pc_valid_i,
256 fetch_insn_valid_o, fetch_insn_ready_i):
257 """fetch FSM
258
259 this FSM performs fetch of raw instruction data, partial-decodes
260 it 32-bit at a time to detect SVP64 prefixes, and will optionally
261 read a 2nd 32-bit quantity if that occurs.
262 """
263 comb = m.d.comb
264 sync = m.d.sync
265 pdecode2 = self.pdecode2
266 cur_state = self.cur_state
267 dec_opcode_i = pdecode2.dec.raw_opcode_in # raw opcode
268
269 msr_read = Signal(reset=1)
270
271 with m.FSM(name='fetch_fsm'):
272
273 # waiting (zzz)
274 with m.State("IDLE"):
275 comb += fetch_pc_ready_o.eq(1)
276 with m.If(fetch_pc_valid_i):
277 # instruction allowed to go: start by reading the PC
278 # capture the PC and also drop it into Insn Memory
279 # we have joined a pair of combinatorial memory
280 # lookups together. this is Generally Bad.
281 comb += self.imem.a_pc_i.eq(pc)
282 comb += self.imem.a_valid_i.eq(1)
283 comb += self.imem.f_valid_i.eq(1)
284 sync += cur_state.pc.eq(pc)
285 sync += cur_state.svstate.eq(svstate) # and svstate
286
287 # initiate read of MSR. arrives one clock later
288 comb += self.state_r_msr.ren.eq(1 << StateRegs.MSR)
289 sync += msr_read.eq(0)
290
291 m.next = "INSN_READ" # move to "wait for bus" phase
292
293 # dummy pause to find out why simulation is not keeping up
294 with m.State("INSN_READ"):
295 # one cycle later, msr/sv read arrives. valid only once.
296 with m.If(~msr_read):
297 sync += msr_read.eq(1) # yeah don't read it again
298 sync += cur_state.msr.eq(self.state_r_msr.data_o)
299 with m.If(self.imem.f_busy_o): # zzz...
300 # busy: stay in wait-read
301 comb += self.imem.a_valid_i.eq(1)
302 comb += self.imem.f_valid_i.eq(1)
303 with m.Else():
304 # not busy: instruction fetched
305 insn = get_insn(self.imem.f_instr_o, cur_state.pc)
306 if self.svp64_en:
307 svp64 = self.svp64
308 # decode the SVP64 prefix, if any
309 comb += svp64.raw_opcode_in.eq(insn)
310 comb += svp64.bigendian.eq(self.core_bigendian_i)
311 # pass the decoded prefix (if any) to PowerDecoder2
312 sync += pdecode2.sv_rm.eq(svp64.svp64_rm)
313 # remember whether this is a prefixed instruction, so
314 # the FSM can readily loop when VL==0
315 sync += is_svp64_mode.eq(svp64.is_svp64_mode)
316 # calculate the address of the following instruction
317 insn_size = Mux(svp64.is_svp64_mode, 8, 4)
318 sync += nia.eq(cur_state.pc + insn_size)
319 with m.If(~svp64.is_svp64_mode):
320 # with no prefix, store the instruction
321 # and hand it directly to the next FSM
322 sync += dec_opcode_i.eq(insn)
323 m.next = "INSN_READY"
324 with m.Else():
325 # fetch the rest of the instruction from memory
326 comb += self.imem.a_pc_i.eq(cur_state.pc + 4)
327 comb += self.imem.a_valid_i.eq(1)
328 comb += self.imem.f_valid_i.eq(1)
329 m.next = "INSN_READ2"
330 else:
331 # not SVP64 - 32-bit only
332 sync += nia.eq(cur_state.pc + 4)
333 sync += dec_opcode_i.eq(insn)
334 m.next = "INSN_READY"
335
336 with m.State("INSN_READ2"):
337 with m.If(self.imem.f_busy_o): # zzz...
338 # busy: stay in wait-read
339 comb += self.imem.a_valid_i.eq(1)
340 comb += self.imem.f_valid_i.eq(1)
341 with m.Else():
342 # not busy: instruction fetched
343 insn = get_insn(self.imem.f_instr_o, cur_state.pc+4)
344 sync += dec_opcode_i.eq(insn)
345 m.next = "INSN_READY"
346 # TODO: probably can start looking at pdecode2.rm_dec
347 # here or maybe even in INSN_READ state, if svp64_mode
348 # detected, in order to trigger - and wait for - the
349 # predicate reading.
350 pmode = pdecode2.rm_dec.predmode
351 """
352 if pmode != SVP64PredMode.ALWAYS.value:
353 fire predicate loading FSM and wait before
354 moving to INSN_READY
355 else:
356 sync += self.srcmask.eq(-1) # set to all 1s
357 sync += self.dstmask.eq(-1) # set to all 1s
358 m.next = "INSN_READY"
359 """
360
361 with m.State("INSN_READY"):
362 # hand over the instruction, to be decoded
363 comb += fetch_insn_valid_o.eq(1)
364 with m.If(fetch_insn_ready_i):
365 m.next = "IDLE"
366
367 def fetch_predicate_fsm(self, m,
368 pred_insn_valid_i, pred_insn_ready_o,
369 pred_mask_valid_o, pred_mask_ready_i):
370 """fetch_predicate_fsm - obtains (constructs in the case of CR)
371 src/dest predicate masks
372
373 https://bugs.libre-soc.org/show_bug.cgi?id=617
374 the predicates can be read here, by using IntRegs r_ports['pred']
375 or CRRegs r_ports['pred']. in the case of CRs it will have to
376 be done through multiple reads, extracting one relevant at a time.
377 later, a faster way would be to use the 32-bit-wide CR port but
378 this is more complex decoding, here. equivalent code used in
379 ISACaller is "from soc.decoder.isa.caller import get_predcr"
380 """
381 comb = m.d.comb
382 sync = m.d.sync
383 pdecode2 = self.pdecode2
384 rm_dec = pdecode2.rm_dec # SVP64RMModeDecode
385 predmode = rm_dec.predmode
386 srcpred, dstpred = rm_dec.srcpred, rm_dec.dstpred
387 cr_pred, int_pred = self.cr_pred, self.int_pred # read regfiles
388 # if predmode == INT:
389 # INT-src sregread, sinvert, sunary = get_predint(m, srcpred)
390 # INT-dst dregread, dinvert, dunary = get_predint(m, dstpred)
391 # TODO read INT-src and INT-dst into self.srcmask+dstmask
392 # has to cope with first one then the other
393 # FSM-triggered-int-read
394 # comb += int_pred.addr.eq(d_reg.addr)
395 # comb += int_pred.ren.eq(1)
396 # FSM-1-clock-later
397 # comb += d_reg.data.eq(self.int_r.data_o)
398 # elif predmode == CR:
399 # CR-src sidx, sinvert = get_predcr(m, srcpred)
400 # CR-dst didx, dinvert = get_predcr(m, dstpred)
401 # TODO read CR-src and CR-dst into self.srcmask+dstmask with loop
402 # has to cope with first one then the other
403 # for cr_idx = FSM-state-loop(0..VL-1):
404 # FSM-state-trigger-CR-read:
405 # cr_ren = (1<<7-(cr_idx+SVP64CROffs.CRPred))
406 # comb += cr_pred.ren.eq(cr_ren)
407 # FSM-state-1-clock-later-actual-Read:
408 # cr_field = Signal(4)
409 # cr_bit = Signal(1)
410 # # read the CR field, select the appropriate bit
411 # comb += cr_field.eq(cr_pred.data_o)
412 # comb += cr_bit.eq(cr_field.bit_select(idx)))
413 # # just like in branch BO tests
414 # comd += self.srcmask[cr_idx].eq(inv ^ cr_bit)
415 # else
416 # sync += self.srcmask.eq(-1) # set to all 1s
417 # sync += self.dstmask.eq(-1) # set to all 1s
418 with m.FSM(name="fetch_predicate"):
419
420 with m.State("FETCH_PRED_IDLE"):
421 comb += pred_insn_ready_o.eq(1)
422 with m.If(pred_insn_valid_i):
423 sync += self.srcmask.eq(-1)
424 sync += self.dstmask.eq(-1)
425 m.next = "FETCH_PRED_DONE"
426
427 with m.State("FETCH_PRED_DONE"):
428 comb += pred_mask_valid_o.eq(1)
429 with m.If(pred_mask_ready_i):
430 m.next = "FETCH_PRED_IDLE"
431
432 def issue_fsm(self, m, core, pc_changed, sv_changed, nia,
433 dbg, core_rst, is_svp64_mode,
434 fetch_pc_ready_o, fetch_pc_valid_i,
435 fetch_insn_valid_o, fetch_insn_ready_i,
436 pred_insn_valid_i, pred_insn_ready_o,
437 pred_mask_valid_o, pred_mask_ready_i,
438 exec_insn_valid_i, exec_insn_ready_o,
439 exec_pc_valid_o, exec_pc_ready_i):
440 """issue FSM
441
442 decode / issue FSM. this interacts with the "fetch" FSM
443 through fetch_insn_ready/valid (incoming) and fetch_pc_ready/valid
444 (outgoing). also interacts with the "execute" FSM
445 through exec_insn_ready/valid (outgoing) and exec_pc_ready/valid
446 (incoming).
447 SVP64 RM prefixes have already been set up by the
448 "fetch" phase, so execute is fairly straightforward.
449 """
450
451 comb = m.d.comb
452 sync = m.d.sync
453 pdecode2 = self.pdecode2
454 cur_state = self.cur_state
455
456 # temporaries
457 dec_opcode_i = pdecode2.dec.raw_opcode_in # raw opcode
458
459 # for updating svstate (things like srcstep etc.)
460 update_svstate = Signal() # set this (below) if updating
461 new_svstate = SVSTATERec("new_svstate")
462 comb += new_svstate.eq(cur_state.svstate)
463
464 # precalculate srcstep+1 and dststep+1
465 cur_srcstep = cur_state.svstate.srcstep
466 cur_dststep = cur_state.svstate.dststep
467 next_srcstep = Signal.like(cur_srcstep)
468 next_dststep = Signal.like(cur_dststep)
469 comb += next_srcstep.eq(cur_state.svstate.srcstep+1)
470 comb += next_dststep.eq(cur_state.svstate.dststep+1)
471
472 with m.FSM(name="issue_fsm"):
473
474 # sync with the "fetch" phase which is reading the instruction
475 # at this point, there is no instruction running, that
476 # could inadvertently update the PC.
477 with m.State("ISSUE_START"):
478 # wait on "core stop" release, before next fetch
479 # need to do this here, in case we are in a VL==0 loop
480 with m.If(~dbg.core_stop_o & ~core_rst):
481 comb += fetch_pc_valid_i.eq(1) # tell fetch to start
482 with m.If(fetch_pc_ready_o): # fetch acknowledged us
483 m.next = "INSN_WAIT"
484 with m.Else():
485 # tell core it's stopped, and acknowledge debug handshake
486 comb += core.core_stopped_i.eq(1)
487 comb += dbg.core_stopped_i.eq(1)
488 # while stopped, allow updating the PC and SVSTATE
489 with m.If(self.pc_i.ok):
490 comb += self.state_w_pc.wen.eq(1 << StateRegs.PC)
491 comb += self.state_w_pc.data_i.eq(self.pc_i.data)
492 sync += pc_changed.eq(1)
493 with m.If(self.svstate_i.ok):
494 comb += new_svstate.eq(self.svstate_i.data)
495 comb += update_svstate.eq(1)
496 sync += sv_changed.eq(1)
497
498 # decode the instruction when it arrives
499 with m.State("INSN_WAIT"):
500 comb += fetch_insn_ready_i.eq(1)
501 with m.If(fetch_insn_valid_o):
502 # decode the instruction
503 sync += core.e.eq(pdecode2.e)
504 sync += core.state.eq(cur_state)
505 sync += core.raw_insn_i.eq(dec_opcode_i)
506 sync += core.bigendian_i.eq(self.core_bigendian_i)
507 # set RA_OR_ZERO detection in satellite decoders
508 sync += core.sv_a_nz.eq(pdecode2.sv_a_nz)
509 # loop into ISSUE_START if it's a SVP64 instruction
510 # and VL == 0. this because VL==0 is a for-loop
511 # from 0 to 0 i.e. always, always a NOP.
512 cur_vl = cur_state.svstate.vl
513 with m.If(is_svp64_mode & (cur_vl == 0)):
514 # update the PC before fetching the next instruction
515 # since we are in a VL==0 loop, no instruction was
516 # executed that we could be overwriting
517 comb += self.state_w_pc.wen.eq(1 << StateRegs.PC)
518 comb += self.state_w_pc.data_i.eq(nia)
519 comb += self.insn_done.eq(1)
520 m.next = "ISSUE_START"
521 with m.Else():
522 m.next = "PRED_START" # start fetching the predicate
523
524 with m.State("PRED_START"):
525 comb += pred_insn_valid_i.eq(1) # tell fetch_pred to start
526 with m.If(pred_insn_ready_o): # fetch_pred acknowledged us
527 m.next = "MASK_WAIT"
528
529 with m.State("MASK_WAIT"):
530 comb += pred_mask_ready_i.eq(1) # ready to receive the masks
531 with m.If(pred_mask_valid_o): # predication masks are ready
532 m.next = "INSN_EXECUTE"
533
534 # handshake with execution FSM, move to "wait" once acknowledged
535 with m.State("INSN_EXECUTE"):
536 # with m.If(is_svp64_mode):
537 # TODO advance src/dst step to "skip" over predicated-out
538 # from self.srcmask and self.dstmask
539 # https://bugs.libre-soc.org/show_bug.cgi?id=617#c3
540 # but still without exceeding VL in either case
541 # IMPORTANT: when changing src/dest step, have to
542 # jump to m.next = "DECODE_SV" to deal with the change in
543 # SVSTATE
544
545 with m.If(is_svp64_mode):
546
547 pred_src_zero = pdecode2.rm_dec.pred_sz
548 pred_dst_zero = pdecode2.rm_dec.pred_dz
549
550 """
551 if not pred_src_zero:
552 if (((1<<cur_srcstep) & self.srcmask) == 0) and
553 (cur_srcstep != vl):
554 comb += update_svstate.eq(1)
555 comb += new_svstate.srcstep.eq(next_srcstep)
556 sync += sv_changed.eq(1)
557
558 if not pred_dst_zero:
559 if (((1<<cur_dststep) & self.dstmask) == 0) and
560 (cur_dststep != vl):
561 comb += new_svstate.dststep.eq(next_dststep)
562 comb += update_svstate.eq(1)
563 sync += sv_changed.eq(1)
564
565 if update_svstate:
566 m.next = "DECODE_SV"
567 """
568
569 comb += exec_insn_valid_i.eq(1) # trigger execute
570 with m.If(exec_insn_ready_o): # execute acknowledged us
571 m.next = "EXECUTE_WAIT"
572
573 with m.State("EXECUTE_WAIT"):
574 # wait on "core stop" release, at instruction end
575 # need to do this here, in case we are in a VL>1 loop
576 with m.If(~dbg.core_stop_o & ~core_rst):
577 comb += exec_pc_ready_i.eq(1)
578 with m.If(exec_pc_valid_o):
579
580 # was this the last loop iteration?
581 is_last = Signal()
582 cur_vl = cur_state.svstate.vl
583 comb += is_last.eq(next_srcstep == cur_vl)
584
585 # if either PC or SVSTATE were changed by the previous
586 # instruction, go directly back to Fetch, without
587 # updating either PC or SVSTATE
588 with m.If(pc_changed | sv_changed):
589 m.next = "ISSUE_START"
590
591 # also return to Fetch, when no output was a vector
592 # (regardless of SRCSTEP and VL), or when the last
593 # instruction was really the last one of the VL loop
594 with m.Elif((~pdecode2.loop_continue) | is_last):
595 # before going back to fetch, update the PC state
596 # register with the NIA.
597 # ok here we are not reading the branch unit.
598 # TODO: this just blithely overwrites whatever
599 # pipeline updated the PC
600 comb += self.state_w_pc.wen.eq(1 << StateRegs.PC)
601 comb += self.state_w_pc.data_i.eq(nia)
602 # reset SRCSTEP before returning to Fetch
603 with m.If(pdecode2.loop_continue):
604 comb += new_svstate.srcstep.eq(0)
605 comb += new_svstate.dststep.eq(0)
606 comb += update_svstate.eq(1)
607 m.next = "ISSUE_START"
608
609 # returning to Execute? then, first update SRCSTEP
610 with m.Else():
611 comb += new_svstate.srcstep.eq(next_srcstep)
612 comb += new_svstate.dststep.eq(next_dststep)
613 comb += update_svstate.eq(1)
614 m.next = "DECODE_SV"
615
616 with m.Else():
617 comb += core.core_stopped_i.eq(1)
618 comb += dbg.core_stopped_i.eq(1)
619 # while stopped, allow updating the PC and SVSTATE
620 with m.If(self.pc_i.ok):
621 comb += self.state_w_pc.wen.eq(1 << StateRegs.PC)
622 comb += self.state_w_pc.data_i.eq(self.pc_i.data)
623 sync += pc_changed.eq(1)
624 with m.If(self.svstate_i.ok):
625 comb += new_svstate.eq(self.svstate_i.data)
626 comb += update_svstate.eq(1)
627 sync += sv_changed.eq(1)
628
629 # need to decode the instruction again, after updating SRCSTEP
630 # in the previous state.
631 # mostly a copy of INSN_WAIT, but without the actual wait
632 with m.State("DECODE_SV"):
633 # decode the instruction
634 sync += core.e.eq(pdecode2.e)
635 sync += core.state.eq(cur_state)
636 sync += core.bigendian_i.eq(self.core_bigendian_i)
637 sync += core.sv_a_nz.eq(pdecode2.sv_a_nz)
638 m.next = "INSN_EXECUTE" # move to "execute"
639
640 # check if svstate needs updating: if so, write it to State Regfile
641 with m.If(update_svstate):
642 comb += self.state_w_sv.wen.eq(1<<StateRegs.SVSTATE)
643 comb += self.state_w_sv.data_i.eq(new_svstate)
644 sync += cur_state.svstate.eq(new_svstate) # for next clock
645
646 def execute_fsm(self, m, core, pc_changed, sv_changed,
647 exec_insn_valid_i, exec_insn_ready_o,
648 exec_pc_valid_o, exec_pc_ready_i):
649 """execute FSM
650
651 execute FSM. this interacts with the "issue" FSM
652 through exec_insn_ready/valid (incoming) and exec_pc_ready/valid
653 (outgoing). SVP64 RM prefixes have already been set up by the
654 "issue" phase, so execute is fairly straightforward.
655 """
656
657 comb = m.d.comb
658 sync = m.d.sync
659 pdecode2 = self.pdecode2
660
661 # temporaries
662 core_busy_o = core.busy_o # core is busy
663 core_ivalid_i = core.ivalid_i # instruction is valid
664 core_issue_i = core.issue_i # instruction is issued
665 insn_type = core.e.do.insn_type # instruction MicroOp type
666
667 with m.FSM(name="exec_fsm"):
668
669 # waiting for instruction bus (stays there until not busy)
670 with m.State("INSN_START"):
671 comb += exec_insn_ready_o.eq(1)
672 with m.If(exec_insn_valid_i):
673 comb += core_ivalid_i.eq(1) # instruction is valid
674 comb += core_issue_i.eq(1) # and issued
675 sync += sv_changed.eq(0)
676 sync += pc_changed.eq(0)
677 m.next = "INSN_ACTIVE" # move to "wait completion"
678
679 # instruction started: must wait till it finishes
680 with m.State("INSN_ACTIVE"):
681 with m.If(insn_type != MicrOp.OP_NOP):
682 comb += core_ivalid_i.eq(1) # instruction is valid
683 # note changes to PC and SVSTATE
684 with m.If(self.state_nia.wen & (1<<StateRegs.SVSTATE)):
685 sync += sv_changed.eq(1)
686 with m.If(self.state_nia.wen & (1<<StateRegs.PC)):
687 sync += pc_changed.eq(1)
688 with m.If(~core_busy_o): # instruction done!
689 comb += exec_pc_valid_o.eq(1)
690 with m.If(exec_pc_ready_i):
691 comb += self.insn_done.eq(1)
692 m.next = "INSN_START" # back to fetch
693
694 def setup_peripherals(self, m):
695 comb, sync = m.d.comb, m.d.sync
696
697 m.submodules.core = core = DomainRenamer("coresync")(self.core)
698 m.submodules.imem = imem = self.imem
699 m.submodules.dbg = dbg = self.dbg
700 if self.jtag_en:
701 m.submodules.jtag = jtag = self.jtag
702 # TODO: UART2GDB mux, here, from external pin
703 # see https://bugs.libre-soc.org/show_bug.cgi?id=499
704 sync += dbg.dmi.connect_to(jtag.dmi)
705
706 cur_state = self.cur_state
707
708 # 4x 4k SRAM blocks. these simply "exist", they get routed in litex
709 if self.sram4x4k:
710 for i, sram in enumerate(self.sram4k):
711 m.submodules["sram4k_%d" % i] = sram
712 comb += sram.enable.eq(self.wb_sram_en)
713
714 # XICS interrupt handler
715 if self.xics:
716 m.submodules.xics_icp = icp = self.xics_icp
717 m.submodules.xics_ics = ics = self.xics_ics
718 comb += icp.ics_i.eq(ics.icp_o) # connect ICS to ICP
719 sync += cur_state.eint.eq(icp.core_irq_o) # connect ICP to core
720
721 # GPIO test peripheral
722 if self.gpio:
723 m.submodules.simple_gpio = simple_gpio = self.simple_gpio
724
725 # connect one GPIO output to ICS bit 15 (like in microwatt soc.vhdl)
726 # XXX causes litex ECP5 test to get wrong idea about input and output
727 # (but works with verilator sim *sigh*)
728 #if self.gpio and self.xics:
729 # comb += self.int_level_i[15].eq(simple_gpio.gpio_o[0])
730
731 # instruction decoder
732 pdecode = create_pdecode()
733 m.submodules.dec2 = pdecode2 = self.pdecode2
734 if self.svp64_en:
735 m.submodules.svp64 = svp64 = self.svp64
736
737 # convenience
738 dmi, d_reg, d_cr, d_xer, = dbg.dmi, dbg.d_gpr, dbg.d_cr, dbg.d_xer
739 intrf = self.core.regs.rf['int']
740
741 # clock delay power-on reset
742 cd_por = ClockDomain(reset_less=True)
743 cd_sync = ClockDomain()
744 core_sync = ClockDomain("coresync")
745 m.domains += cd_por, cd_sync, core_sync
746
747 ti_rst = Signal(reset_less=True)
748 delay = Signal(range(4), reset=3)
749 with m.If(delay != 0):
750 m.d.por += delay.eq(delay - 1)
751 comb += cd_por.clk.eq(ClockSignal())
752
753 # power-on reset delay
754 core_rst = ResetSignal("coresync")
755 comb += ti_rst.eq(delay != 0 | dbg.core_rst_o | ResetSignal())
756 comb += core_rst.eq(ti_rst)
757
758 # busy/halted signals from core
759 comb += self.busy_o.eq(core.busy_o)
760 comb += pdecode2.dec.bigendian.eq(self.core_bigendian_i)
761
762 # temporary hack: says "go" immediately for both address gen and ST
763 l0 = core.l0
764 ldst = core.fus.fus['ldst0']
765 st_go_edge = rising_edge(m, ldst.st.rel_o)
766 m.d.comb += ldst.ad.go_i.eq(ldst.ad.rel_o) # link addr-go direct to rel
767 m.d.comb += ldst.st.go_i.eq(st_go_edge) # link store-go to rising rel
768
769 return core_rst
770
771 def elaborate(self, platform):
772 m = Module()
773 # convenience
774 comb, sync = m.d.comb, m.d.sync
775 cur_state = self.cur_state
776 pdecode2 = self.pdecode2
777 dbg = self.dbg
778 core = self.core
779
780 # set up peripherals and core
781 core_rst = self.setup_peripherals(m)
782
783 # PC and instruction from I-Memory
784 comb += self.pc_o.eq(cur_state.pc)
785 pc_changed = Signal() # note write to PC
786 sv_changed = Signal() # note write to SVSTATE
787
788 # read state either from incoming override or from regfile
789 # TODO: really should be doing MSR in the same way
790 pc = state_get(m, self.pc_i, "pc", # read PC
791 self.state_r_pc, StateRegs.PC)
792 svstate = state_get(m, self.svstate_i, "svstate", # read SVSTATE
793 self.state_r_sv, StateRegs.SVSTATE)
794
795 # don't write pc every cycle
796 comb += self.state_w_pc.wen.eq(0)
797 comb += self.state_w_pc.data_i.eq(0)
798
799 # don't read msr every cycle
800 comb += self.state_r_msr.ren.eq(0)
801
802 # address of the next instruction, in the absence of a branch
803 # depends on the instruction size
804 nia = Signal(64, reset_less=True)
805
806 # connect up debug signals
807 # TODO comb += core.icache_rst_i.eq(dbg.icache_rst_o)
808 comb += dbg.terminate_i.eq(core.core_terminate_o)
809 comb += dbg.state.pc.eq(pc)
810 comb += dbg.state.svstate.eq(svstate)
811 comb += dbg.state.msr.eq(cur_state.msr)
812
813 # pass the prefix mode from Fetch to Issue, so the latter can loop
814 # on VL==0
815 is_svp64_mode = Signal()
816
817 # there are *THREE* FSMs, fetch (32/64-bit) issue, decode/execute.
818 # these are the handshake signals between fetch and decode/execute
819
820 # fetch FSM can run as soon as the PC is valid
821 fetch_pc_valid_i = Signal() # Execute tells Fetch "start next read"
822 fetch_pc_ready_o = Signal() # Fetch Tells SVSTATE "proceed"
823
824 # fetch FSM hands over the instruction to be decoded / issued
825 fetch_insn_valid_o = Signal()
826 fetch_insn_ready_i = Signal()
827
828 # predicate fetch FSM decodes and fetches the predicate
829 pred_insn_valid_i = Signal()
830 pred_insn_ready_o = Signal()
831
832 # predicate fetch FSM delivers the masks
833 pred_mask_valid_o = Signal()
834 pred_mask_ready_i = Signal()
835
836 # issue FSM delivers the instruction to the be executed
837 exec_insn_valid_i = Signal()
838 exec_insn_ready_o = Signal()
839
840 # execute FSM, hands over the PC/SVSTATE back to the issue FSM
841 exec_pc_valid_o = Signal()
842 exec_pc_ready_i = Signal()
843
844 # the FSMs here are perhaps unusual in that they detect conditions
845 # then "hold" information, combinatorially, for the core
846 # (as opposed to using sync - which would be on a clock's delay)
847 # this includes the actual opcode, valid flags and so on.
848
849 # Fetch, then predicate fetch, then Issue, then Execute.
850 # Issue is where the VL for-loop # lives. the ready/valid
851 # signalling is used to communicate between the four.
852
853 self.fetch_fsm(m, core, pc, svstate, nia, is_svp64_mode,
854 fetch_pc_ready_o, fetch_pc_valid_i,
855 fetch_insn_valid_o, fetch_insn_ready_i)
856
857 self.issue_fsm(m, core, pc_changed, sv_changed, nia,
858 dbg, core_rst, is_svp64_mode,
859 fetch_pc_ready_o, fetch_pc_valid_i,
860 fetch_insn_valid_o, fetch_insn_ready_i,
861 pred_insn_valid_i, pred_insn_ready_o,
862 pred_mask_valid_o, pred_mask_ready_i,
863 exec_insn_valid_i, exec_insn_ready_o,
864 exec_pc_valid_o, exec_pc_ready_i)
865
866 self.fetch_predicate_fsm(m,
867 pred_insn_valid_i, pred_insn_ready_o,
868 pred_mask_valid_o, pred_mask_ready_i)
869
870 self.execute_fsm(m, core, pc_changed, sv_changed,
871 exec_insn_valid_i, exec_insn_ready_o,
872 exec_pc_valid_o, exec_pc_ready_i)
873
874 # this bit doesn't have to be in the FSM: connect up to read
875 # regfiles on demand from DMI
876 self.do_dmi(m, dbg)
877
878 # DEC and TB inc/dec FSM. copy of DEC is put into CoreState,
879 # (which uses that in PowerDecoder2 to raise 0x900 exception)
880 self.tb_dec_fsm(m, cur_state.dec)
881
882 return m
883
884 def do_dmi(self, m, dbg):
885 """deals with DMI debug requests
886
887 currently only provides read requests for the INT regfile, CR and XER
888 it will later also deal with *writing* to these regfiles.
889 """
890 comb = m.d.comb
891 sync = m.d.sync
892 dmi, d_reg, d_cr, d_xer, = dbg.dmi, dbg.d_gpr, dbg.d_cr, dbg.d_xer
893 intrf = self.core.regs.rf['int']
894
895 with m.If(d_reg.req): # request for regfile access being made
896 # TODO: error-check this
897 # XXX should this be combinatorial? sync better?
898 if intrf.unary:
899 comb += self.int_r.ren.eq(1<<d_reg.addr)
900 else:
901 comb += self.int_r.addr.eq(d_reg.addr)
902 comb += self.int_r.ren.eq(1)
903 d_reg_delay = Signal()
904 sync += d_reg_delay.eq(d_reg.req)
905 with m.If(d_reg_delay):
906 # data arrives one clock later
907 comb += d_reg.data.eq(self.int_r.data_o)
908 comb += d_reg.ack.eq(1)
909
910 # sigh same thing for CR debug
911 with m.If(d_cr.req): # request for regfile access being made
912 comb += self.cr_r.ren.eq(0b11111111) # enable all
913 d_cr_delay = Signal()
914 sync += d_cr_delay.eq(d_cr.req)
915 with m.If(d_cr_delay):
916 # data arrives one clock later
917 comb += d_cr.data.eq(self.cr_r.data_o)
918 comb += d_cr.ack.eq(1)
919
920 # aaand XER...
921 with m.If(d_xer.req): # request for regfile access being made
922 comb += self.xer_r.ren.eq(0b111111) # enable all
923 d_xer_delay = Signal()
924 sync += d_xer_delay.eq(d_xer.req)
925 with m.If(d_xer_delay):
926 # data arrives one clock later
927 comb += d_xer.data.eq(self.xer_r.data_o)
928 comb += d_xer.ack.eq(1)
929
930 def tb_dec_fsm(self, m, spr_dec):
931 """tb_dec_fsm
932
933 this is a FSM for updating either dec or tb. it runs alternately
934 DEC, TB, DEC, TB. note that SPR pipeline could have written a new
935 value to DEC, however the regfile has "passthrough" on it so this
936 *should* be ok.
937
938 see v3.0B p1097-1099 for Timeer Resource and p1065 and p1076
939 """
940
941 comb, sync = m.d.comb, m.d.sync
942 fast_rf = self.core.regs.rf['fast']
943 fast_r_dectb = fast_rf.r_ports['issue'] # DEC/TB
944 fast_w_dectb = fast_rf.w_ports['issue'] # DEC/TB
945
946 with m.FSM() as fsm:
947
948 # initiates read of current DEC
949 with m.State("DEC_READ"):
950 comb += fast_r_dectb.addr.eq(FastRegs.DEC)
951 comb += fast_r_dectb.ren.eq(1)
952 m.next = "DEC_WRITE"
953
954 # waits for DEC read to arrive (1 cycle), updates with new value
955 with m.State("DEC_WRITE"):
956 new_dec = Signal(64)
957 # TODO: MSR.LPCR 32-bit decrement mode
958 comb += new_dec.eq(fast_r_dectb.data_o - 1)
959 comb += fast_w_dectb.addr.eq(FastRegs.DEC)
960 comb += fast_w_dectb.wen.eq(1)
961 comb += fast_w_dectb.data_i.eq(new_dec)
962 sync += spr_dec.eq(new_dec) # copy into cur_state for decoder
963 m.next = "TB_READ"
964
965 # initiates read of current TB
966 with m.State("TB_READ"):
967 comb += fast_r_dectb.addr.eq(FastRegs.TB)
968 comb += fast_r_dectb.ren.eq(1)
969 m.next = "TB_WRITE"
970
971 # waits for read TB to arrive, initiates write of current TB
972 with m.State("TB_WRITE"):
973 new_tb = Signal(64)
974 comb += new_tb.eq(fast_r_dectb.data_o + 1)
975 comb += fast_w_dectb.addr.eq(FastRegs.TB)
976 comb += fast_w_dectb.wen.eq(1)
977 comb += fast_w_dectb.data_i.eq(new_tb)
978 m.next = "DEC_READ"
979
980 return m
981
982 def __iter__(self):
983 yield from self.pc_i.ports()
984 yield self.pc_o
985 yield self.memerr_o
986 yield from self.core.ports()
987 yield from self.imem.ports()
988 yield self.core_bigendian_i
989 yield self.busy_o
990
991 def ports(self):
992 return list(self)
993
994 def external_ports(self):
995 ports = self.pc_i.ports()
996 ports += [self.pc_o, self.memerr_o, self.core_bigendian_i, self.busy_o,
997 ]
998
999 if self.jtag_en:
1000 ports += list(self.jtag.external_ports())
1001 else:
1002 # don't add DMI if JTAG is enabled
1003 ports += list(self.dbg.dmi.ports())
1004
1005 ports += list(self.imem.ibus.fields.values())
1006 ports += list(self.core.l0.cmpi.lsmem.lsi.slavebus.fields.values())
1007
1008 if self.sram4x4k:
1009 for sram in self.sram4k:
1010 ports += list(sram.bus.fields.values())
1011
1012 if self.xics:
1013 ports += list(self.xics_icp.bus.fields.values())
1014 ports += list(self.xics_ics.bus.fields.values())
1015 ports.append(self.int_level_i)
1016
1017 if self.gpio:
1018 ports += list(self.simple_gpio.bus.fields.values())
1019 ports.append(self.gpio_o)
1020
1021 return ports
1022
1023 def ports(self):
1024 return list(self)
1025
1026
1027 class TestIssuer(Elaboratable):
1028 def __init__(self, pspec):
1029 self.ti = TestIssuerInternal(pspec)
1030
1031 self.pll = DummyPLL()
1032
1033 # PLL direct clock or not
1034 self.pll_en = hasattr(pspec, "use_pll") and pspec.use_pll
1035 if self.pll_en:
1036 self.pll_18_o = Signal(reset_less=True)
1037
1038 def elaborate(self, platform):
1039 m = Module()
1040 comb = m.d.comb
1041
1042 # TestIssuer runs at direct clock
1043 m.submodules.ti = ti = self.ti
1044 cd_int = ClockDomain("coresync")
1045
1046 if self.pll_en:
1047 # ClockSelect runs at PLL output internal clock rate
1048 m.submodules.pll = pll = self.pll
1049
1050 # add clock domains from PLL
1051 cd_pll = ClockDomain("pllclk")
1052 m.domains += cd_pll
1053
1054 # PLL clock established. has the side-effect of running clklsel
1055 # at the PLL's speed (see DomainRenamer("pllclk") above)
1056 pllclk = ClockSignal("pllclk")
1057 comb += pllclk.eq(pll.clk_pll_o)
1058
1059 # wire up external 24mhz to PLL
1060 comb += pll.clk_24_i.eq(ClockSignal())
1061
1062 # output 18 mhz PLL test signal
1063 comb += self.pll_18_o.eq(pll.pll_18_o)
1064
1065 # now wire up ResetSignals. don't mind them being in this domain
1066 pll_rst = ResetSignal("pllclk")
1067 comb += pll_rst.eq(ResetSignal())
1068
1069 # internal clock is set to selector clock-out. has the side-effect of
1070 # running TestIssuer at this speed (see DomainRenamer("intclk") above)
1071 intclk = ClockSignal("coresync")
1072 if self.pll_en:
1073 comb += intclk.eq(pll.clk_pll_o)
1074 else:
1075 comb += intclk.eq(ClockSignal())
1076
1077 return m
1078
1079 def ports(self):
1080 return list(self.ti.ports()) + list(self.pll.ports()) + \
1081 [ClockSignal(), ResetSignal()]
1082
1083 def external_ports(self):
1084 ports = self.ti.external_ports()
1085 ports.append(ClockSignal())
1086 ports.append(ResetSignal())
1087 if self.pll_en:
1088 ports.append(self.pll.clk_sel_i)
1089 ports.append(self.pll_18_o)
1090 ports.append(self.pll.pll_lck_o)
1091 return ports
1092
1093
1094 if __name__ == '__main__':
1095 units = {'alu': 1, 'cr': 1, 'branch': 1, 'trap': 1, 'logical': 1,
1096 'spr': 1,
1097 'div': 1,
1098 'mul': 1,
1099 'shiftrot': 1
1100 }
1101 pspec = TestMemPspec(ldst_ifacetype='bare_wb',
1102 imem_ifacetype='bare_wb',
1103 addr_wid=48,
1104 mask_wid=8,
1105 reg_wid=64,
1106 units=units)
1107 dut = TestIssuer(pspec)
1108 vl = main(dut, ports=dut.ports(), name="test_issuer")
1109
1110 if len(sys.argv) == 1:
1111 vl = rtlil.convert(dut, ports=dut.external_ports(), name="test_issuer")
1112 with open("test_issuer.il", "w") as f:
1113 f.write(vl)