big split-out of common functions in TestIssuer to TestIssuerBase
[soc.git] / src / soc / simple / issuer.py
1 """simple core issuer
2
3 not in any way intended for production use. this runs a FSM that:
4
5 * reads the Program Counter from StateRegs
6 * reads an instruction from a fixed-size Test Memory
7 * issues it to the Simple Core
8 * waits for it to complete
9 * increments the PC
10 * does it all over again
11
12 the purpose of this module is to verify the functional correctness
13 of the Function Units in the absolute simplest and clearest possible
14 way, and to at provide something that can be further incrementally
15 improved.
16 """
17
18 from nmigen import (Elaboratable, Module, Signal, ClockSignal, ResetSignal,
19 ClockDomain, DomainRenamer, Mux, Const, Repl, Cat)
20 from nmigen.cli import rtlil
21 from nmigen.cli import main
22 import sys
23
24 from nmutil.singlepipe import ControlBase
25 from soc.simple.core_data import FetchOutput, FetchInput
26
27 from nmigen.lib.coding import PriorityEncoder
28
29 from openpower.decoder.power_decoder import create_pdecode
30 from openpower.decoder.power_decoder2 import PowerDecode2, SVP64PrefixDecoder
31 from openpower.decoder.decode2execute1 import IssuerDecode2ToOperand
32 from openpower.decoder.decode2execute1 import Data
33 from openpower.decoder.power_enums import (MicrOp, SVP64PredInt, SVP64PredCR,
34 SVP64PredMode)
35 from openpower.state import CoreState
36 from openpower.consts import (CR, SVP64CROffs)
37 from soc.experiment.testmem import TestMemory # test only for instructions
38 from soc.regfile.regfiles import StateRegs, FastRegs
39 from soc.simple.core import NonProductionCore
40 from soc.config.test.test_loadstore import TestMemPspec
41 from soc.config.ifetch import ConfigFetchUnit
42 from soc.debug.dmi import CoreDebug, DMIInterface
43 from soc.debug.jtag import JTAG
44 from soc.config.pinouts import get_pinspecs
45 from soc.interrupts.xics import XICS_ICP, XICS_ICS
46 from soc.bus.simple_gpio import SimpleGPIO
47 from soc.bus.SPBlock512W64B8W import SPBlock512W64B8W
48 from soc.clock.select import ClockSelect
49 from soc.clock.dummypll import DummyPLL
50 from openpower.sv.svstate import SVSTATERec
51 from soc.experiment.icache import ICache
52
53 from nmutil.util import rising_edge
54
55
56 def get_insn(f_instr_o, pc):
57 if f_instr_o.width == 32:
58 return f_instr_o
59 else:
60 # 64-bit: bit 2 of pc decides which word to select
61 return f_instr_o.word_select(pc[2], 32)
62
63 # gets state input or reads from state regfile
64
65
66 def state_get(m, res, core_rst, state_i, name, regfile, regnum):
67 comb = m.d.comb
68 sync = m.d.sync
69 # read the {insert state variable here}
70 res_ok_delay = Signal(name="%s_ok_delay" % name)
71 with m.If(~core_rst):
72 sync += res_ok_delay.eq(~state_i.ok)
73 with m.If(state_i.ok):
74 # incoming override (start from pc_i)
75 comb += res.eq(state_i.data)
76 with m.Else():
77 # otherwise read StateRegs regfile for {insert state here}...
78 comb += regfile.ren.eq(1 << regnum)
79 # ... but on a 1-clock delay
80 with m.If(res_ok_delay):
81 comb += res.eq(regfile.o_data)
82
83
84 def get_predint(m, mask, name):
85 """decode SVP64 predicate integer mask field to reg number and invert
86 this is identical to the equivalent function in ISACaller except that
87 it doesn't read the INT directly, it just decodes "what needs to be done"
88 i.e. which INT reg, whether it is shifted and whether it is bit-inverted.
89
90 * all1s is set to indicate that no mask is to be applied.
91 * regread indicates the GPR register number to be read
92 * invert is set to indicate that the register value is to be inverted
93 * unary indicates that the contents of the register is to be shifted 1<<r3
94 """
95 comb = m.d.comb
96 regread = Signal(5, name=name+"regread")
97 invert = Signal(name=name+"invert")
98 unary = Signal(name=name+"unary")
99 all1s = Signal(name=name+"all1s")
100 with m.Switch(mask):
101 with m.Case(SVP64PredInt.ALWAYS.value):
102 comb += all1s.eq(1) # use 0b1111 (all ones)
103 with m.Case(SVP64PredInt.R3_UNARY.value):
104 comb += regread.eq(3)
105 comb += unary.eq(1) # 1<<r3 - shift r3 (single bit)
106 with m.Case(SVP64PredInt.R3.value):
107 comb += regread.eq(3)
108 with m.Case(SVP64PredInt.R3_N.value):
109 comb += regread.eq(3)
110 comb += invert.eq(1)
111 with m.Case(SVP64PredInt.R10.value):
112 comb += regread.eq(10)
113 with m.Case(SVP64PredInt.R10_N.value):
114 comb += regread.eq(10)
115 comb += invert.eq(1)
116 with m.Case(SVP64PredInt.R30.value):
117 comb += regread.eq(30)
118 with m.Case(SVP64PredInt.R30_N.value):
119 comb += regread.eq(30)
120 comb += invert.eq(1)
121 return regread, invert, unary, all1s
122
123
124 def get_predcr(m, mask, name):
125 """decode SVP64 predicate CR to reg number field and invert status
126 this is identical to _get_predcr in ISACaller
127 """
128 comb = m.d.comb
129 idx = Signal(2, name=name+"idx")
130 invert = Signal(name=name+"crinvert")
131 with m.Switch(mask):
132 with m.Case(SVP64PredCR.LT.value):
133 comb += idx.eq(CR.LT)
134 comb += invert.eq(0)
135 with m.Case(SVP64PredCR.GE.value):
136 comb += idx.eq(CR.LT)
137 comb += invert.eq(1)
138 with m.Case(SVP64PredCR.GT.value):
139 comb += idx.eq(CR.GT)
140 comb += invert.eq(0)
141 with m.Case(SVP64PredCR.LE.value):
142 comb += idx.eq(CR.GT)
143 comb += invert.eq(1)
144 with m.Case(SVP64PredCR.EQ.value):
145 comb += idx.eq(CR.EQ)
146 comb += invert.eq(0)
147 with m.Case(SVP64PredCR.NE.value):
148 comb += idx.eq(CR.EQ)
149 comb += invert.eq(1)
150 with m.Case(SVP64PredCR.SO.value):
151 comb += idx.eq(CR.SO)
152 comb += invert.eq(0)
153 with m.Case(SVP64PredCR.NS.value):
154 comb += idx.eq(CR.SO)
155 comb += invert.eq(1)
156 return idx, invert
157
158
159 class TestIssuerBase:
160 """TestIssuerBase - base class for Issuers
161 """
162
163 def __init__(self, pspec):
164
165 # test is SVP64 is to be enabled
166 self.svp64_en = hasattr(pspec, "svp64") and (pspec.svp64 == True)
167
168 # and if regfiles are reduced
169 self.regreduce_en = (hasattr(pspec, "regreduce") and
170 (pspec.regreduce == True))
171
172 # and if overlap requested
173 self.allow_overlap = (hasattr(pspec, "allow_overlap") and
174 (pspec.allow_overlap == True))
175
176 # JTAG interface. add this right at the start because if it's
177 # added it *modifies* the pspec, by adding enable/disable signals
178 # for parts of the rest of the core
179 self.jtag_en = hasattr(pspec, "debug") and pspec.debug == 'jtag'
180 self.dbg_domain = "sync" # sigh "dbgsunc" too problematic
181 # self.dbg_domain = "dbgsync" # domain for DMI/JTAG clock
182 if self.jtag_en:
183 # XXX MUST keep this up-to-date with litex, and
184 # soc-cocotb-sim, and err.. all needs sorting out, argh
185 subset = ['uart',
186 'mtwi',
187 'eint', 'gpio', 'mspi0',
188 # 'mspi1', - disabled for now
189 # 'pwm', 'sd0', - disabled for now
190 'sdr']
191 self.jtag = JTAG(get_pinspecs(subset=subset),
192 domain=self.dbg_domain)
193 # add signals to pspec to enable/disable icache and dcache
194 # (or data and intstruction wishbone if icache/dcache not included)
195 # https://bugs.libre-soc.org/show_bug.cgi?id=520
196 # TODO: do we actually care if these are not domain-synchronised?
197 # honestly probably not.
198 pspec.wb_icache_en = self.jtag.wb_icache_en
199 pspec.wb_dcache_en = self.jtag.wb_dcache_en
200 self.wb_sram_en = self.jtag.wb_sram_en
201 else:
202 self.wb_sram_en = Const(1)
203
204 # add 4k sram blocks?
205 self.sram4x4k = (hasattr(pspec, "sram4x4kblock") and
206 pspec.sram4x4kblock == True)
207 if self.sram4x4k:
208 self.sram4k = []
209 for i in range(4):
210 self.sram4k.append(SPBlock512W64B8W(name="sram4k_%d" % i,
211 # features={'err'}
212 ))
213
214 # add interrupt controller?
215 self.xics = hasattr(pspec, "xics") and pspec.xics == True
216 if self.xics:
217 self.xics_icp = XICS_ICP()
218 self.xics_ics = XICS_ICS()
219 self.int_level_i = self.xics_ics.int_level_i
220
221 # add GPIO peripheral?
222 self.gpio = hasattr(pspec, "gpio") and pspec.gpio == True
223 if self.gpio:
224 self.simple_gpio = SimpleGPIO()
225 self.gpio_o = self.simple_gpio.gpio_o
226
227 # main instruction core. suitable for prototyping / demo only
228 self.core = core = NonProductionCore(pspec)
229 self.core_rst = ResetSignal("coresync")
230
231 # instruction decoder. goes into Trap Record
232 #pdecode = create_pdecode()
233 self.cur_state = CoreState("cur") # current state (MSR/PC/SVSTATE)
234 self.pdecode2 = PowerDecode2(None, state=self.cur_state,
235 opkls=IssuerDecode2ToOperand,
236 svp64_en=self.svp64_en,
237 regreduce_en=self.regreduce_en)
238 pdecode = self.pdecode2.dec
239
240 if self.svp64_en:
241 self.svp64 = SVP64PrefixDecoder() # for decoding SVP64 prefix
242
243 # Test Instruction memory
244 if hasattr(core, "icache"):
245 # XXX BLECH! use pspec to transfer the I-Cache to ConfigFetchUnit
246 # truly dreadful. needs a huge reorg.
247 pspec.icache = core.icache
248 self.imem = ConfigFetchUnit(pspec).fu
249
250 # DMI interface
251 self.dbg = CoreDebug()
252
253 # instruction go/monitor
254 self.pc_o = Signal(64, reset_less=True)
255 self.pc_i = Data(64, "pc_i") # set "ok" to indicate "please change me"
256 self.msr_i = Data(64, "msr_i") # set "ok" to indicate "please change me"
257 self.svstate_i = Data(64, "svstate_i") # ditto
258 self.core_bigendian_i = Signal() # TODO: set based on MSR.LE
259 self.busy_o = Signal(reset_less=True)
260 self.memerr_o = Signal(reset_less=True)
261
262 # STATE regfile read /write ports for PC, MSR, SVSTATE
263 staterf = self.core.regs.rf['state']
264 self.state_r_msr = staterf.r_ports['msr'] # MSR rd
265 self.state_r_pc = staterf.r_ports['cia'] # PC rd
266 self.state_r_sv = staterf.r_ports['sv'] # SVSTATE rd
267
268 self.state_w_msr = staterf.w_ports['msr'] # MSR wr
269 self.state_w_pc = staterf.w_ports['d_wr1'] # PC wr
270 self.state_w_sv = staterf.w_ports['sv'] # SVSTATE wr
271
272 # DMI interface access
273 intrf = self.core.regs.rf['int']
274 crrf = self.core.regs.rf['cr']
275 xerrf = self.core.regs.rf['xer']
276 self.int_r = intrf.r_ports['dmi'] # INT read
277 self.cr_r = crrf.r_ports['full_cr_dbg'] # CR read
278 self.xer_r = xerrf.r_ports['full_xer'] # XER read
279
280 if self.svp64_en:
281 # for predication
282 self.int_pred = intrf.r_ports['pred'] # INT predicate read
283 self.cr_pred = crrf.r_ports['cr_pred'] # CR predicate read
284
285 # hack method of keeping an eye on whether branch/trap set the PC
286 self.state_nia = self.core.regs.rf['state'].w_ports['nia']
287 self.state_nia.wen.name = 'state_nia_wen'
288
289 # pulse to synchronize the simulator at instruction end
290 self.insn_done = Signal()
291
292 # indicate any instruction still outstanding, in execution
293 self.any_busy = Signal()
294
295 if self.svp64_en:
296 # store copies of predicate masks
297 self.srcmask = Signal(64)
298 self.dstmask = Signal(64)
299
300 def setup_peripherals(self, m):
301 comb, sync = m.d.comb, m.d.sync
302
303 # okaaaay so the debug module must be in coresync clock domain
304 # but NOT its reset signal. to cope with this, set every single
305 # submodule explicitly in coresync domain, debug and JTAG
306 # in their own one but using *external* reset.
307 csd = DomainRenamer("coresync")
308 dbd = DomainRenamer(self.dbg_domain)
309
310 m.submodules.core = core = csd(self.core)
311 # this _so_ needs sorting out. ICache is added down inside
312 # LoadStore1 and is already a submodule of LoadStore1
313 if not isinstance(self.imem, ICache):
314 m.submodules.imem = imem = csd(self.imem)
315 m.submodules.dbg = dbg = dbd(self.dbg)
316 if self.jtag_en:
317 m.submodules.jtag = jtag = dbd(self.jtag)
318 # TODO: UART2GDB mux, here, from external pin
319 # see https://bugs.libre-soc.org/show_bug.cgi?id=499
320 sync += dbg.dmi.connect_to(jtag.dmi)
321
322 cur_state = self.cur_state
323
324 # 4x 4k SRAM blocks. these simply "exist", they get routed in litex
325 if self.sram4x4k:
326 for i, sram in enumerate(self.sram4k):
327 m.submodules["sram4k_%d" % i] = csd(sram)
328 comb += sram.enable.eq(self.wb_sram_en)
329
330 # XICS interrupt handler
331 if self.xics:
332 m.submodules.xics_icp = icp = csd(self.xics_icp)
333 m.submodules.xics_ics = ics = csd(self.xics_ics)
334 comb += icp.ics_i.eq(ics.icp_o) # connect ICS to ICP
335 sync += cur_state.eint.eq(icp.core_irq_o) # connect ICP to core
336
337 # GPIO test peripheral
338 if self.gpio:
339 m.submodules.simple_gpio = simple_gpio = csd(self.simple_gpio)
340
341 # connect one GPIO output to ICS bit 15 (like in microwatt soc.vhdl)
342 # XXX causes litex ECP5 test to get wrong idea about input and output
343 # (but works with verilator sim *sigh*)
344 # if self.gpio and self.xics:
345 # comb += self.int_level_i[15].eq(simple_gpio.gpio_o[0])
346
347 # instruction decoder
348 pdecode = create_pdecode()
349 m.submodules.dec2 = pdecode2 = csd(self.pdecode2)
350 if self.svp64_en:
351 m.submodules.svp64 = svp64 = csd(self.svp64)
352
353 # convenience
354 dmi, d_reg, d_cr, d_xer, = dbg.dmi, dbg.d_gpr, dbg.d_cr, dbg.d_xer
355 intrf = self.core.regs.rf['int']
356
357 # clock delay power-on reset
358 cd_por = ClockDomain(reset_less=True)
359 cd_sync = ClockDomain()
360 core_sync = ClockDomain("coresync")
361 m.domains += cd_por, cd_sync, core_sync
362 if self.dbg_domain != "sync":
363 dbg_sync = ClockDomain(self.dbg_domain)
364 m.domains += dbg_sync
365
366 ti_rst = Signal(reset_less=True)
367 delay = Signal(range(4), reset=3)
368 with m.If(delay != 0):
369 m.d.por += delay.eq(delay - 1)
370 comb += cd_por.clk.eq(ClockSignal())
371
372 # power-on reset delay
373 core_rst = ResetSignal("coresync")
374 comb += ti_rst.eq(delay != 0 | dbg.core_rst_o | ResetSignal())
375 comb += core_rst.eq(ti_rst)
376
377 # debug clock is same as coresync, but reset is *main external*
378 if self.dbg_domain != "sync":
379 dbg_rst = ResetSignal(self.dbg_domain)
380 comb += dbg_rst.eq(ResetSignal())
381
382 # busy/halted signals from core
383 core_busy_o = ~core.p.o_ready | core.n.o_data.busy_o # core is busy
384 comb += self.busy_o.eq(core_busy_o)
385 comb += pdecode2.dec.bigendian.eq(self.core_bigendian_i)
386
387 # temporary hack: says "go" immediately for both address gen and ST
388 l0 = core.l0
389 ldst = core.fus.fus['ldst0']
390 st_go_edge = rising_edge(m, ldst.st.rel_o)
391 # link addr-go direct to rel
392 m.d.comb += ldst.ad.go_i.eq(ldst.ad.rel_o)
393 m.d.comb += ldst.st.go_i.eq(st_go_edge) # link store-go to rising rel
394
395 def do_dmi(self, m, dbg):
396 """deals with DMI debug requests
397
398 currently only provides read requests for the INT regfile, CR and XER
399 it will later also deal with *writing* to these regfiles.
400 """
401 comb = m.d.comb
402 sync = m.d.sync
403 dmi, d_reg, d_cr, d_xer, = dbg.dmi, dbg.d_gpr, dbg.d_cr, dbg.d_xer
404 intrf = self.core.regs.rf['int']
405
406 with m.If(d_reg.req): # request for regfile access being made
407 # TODO: error-check this
408 # XXX should this be combinatorial? sync better?
409 if intrf.unary:
410 comb += self.int_r.ren.eq(1 << d_reg.addr)
411 else:
412 comb += self.int_r.addr.eq(d_reg.addr)
413 comb += self.int_r.ren.eq(1)
414 d_reg_delay = Signal()
415 sync += d_reg_delay.eq(d_reg.req)
416 with m.If(d_reg_delay):
417 # data arrives one clock later
418 comb += d_reg.data.eq(self.int_r.o_data)
419 comb += d_reg.ack.eq(1)
420
421 # sigh same thing for CR debug
422 with m.If(d_cr.req): # request for regfile access being made
423 comb += self.cr_r.ren.eq(0b11111111) # enable all
424 d_cr_delay = Signal()
425 sync += d_cr_delay.eq(d_cr.req)
426 with m.If(d_cr_delay):
427 # data arrives one clock later
428 comb += d_cr.data.eq(self.cr_r.o_data)
429 comb += d_cr.ack.eq(1)
430
431 # aaand XER...
432 with m.If(d_xer.req): # request for regfile access being made
433 comb += self.xer_r.ren.eq(0b111111) # enable all
434 d_xer_delay = Signal()
435 sync += d_xer_delay.eq(d_xer.req)
436 with m.If(d_xer_delay):
437 # data arrives one clock later
438 comb += d_xer.data.eq(self.xer_r.o_data)
439 comb += d_xer.ack.eq(1)
440
441 def tb_dec_fsm(self, m, spr_dec):
442 """tb_dec_fsm
443
444 this is a FSM for updating either dec or tb. it runs alternately
445 DEC, TB, DEC, TB. note that SPR pipeline could have written a new
446 value to DEC, however the regfile has "passthrough" on it so this
447 *should* be ok.
448
449 see v3.0B p1097-1099 for Timeer Resource and p1065 and p1076
450 """
451
452 comb, sync = m.d.comb, m.d.sync
453 fast_rf = self.core.regs.rf['fast']
454 fast_r_dectb = fast_rf.r_ports['issue'] # DEC/TB
455 fast_w_dectb = fast_rf.w_ports['issue'] # DEC/TB
456
457 with m.FSM() as fsm:
458
459 # initiates read of current DEC
460 with m.State("DEC_READ"):
461 comb += fast_r_dectb.addr.eq(FastRegs.DEC)
462 comb += fast_r_dectb.ren.eq(1)
463 m.next = "DEC_WRITE"
464
465 # waits for DEC read to arrive (1 cycle), updates with new value
466 with m.State("DEC_WRITE"):
467 new_dec = Signal(64)
468 # TODO: MSR.LPCR 32-bit decrement mode
469 comb += new_dec.eq(fast_r_dectb.o_data - 1)
470 comb += fast_w_dectb.addr.eq(FastRegs.DEC)
471 comb += fast_w_dectb.wen.eq(1)
472 comb += fast_w_dectb.i_data.eq(new_dec)
473 sync += spr_dec.eq(new_dec) # copy into cur_state for decoder
474 m.next = "TB_READ"
475
476 # initiates read of current TB
477 with m.State("TB_READ"):
478 comb += fast_r_dectb.addr.eq(FastRegs.TB)
479 comb += fast_r_dectb.ren.eq(1)
480 m.next = "TB_WRITE"
481
482 # waits for read TB to arrive, initiates write of current TB
483 with m.State("TB_WRITE"):
484 new_tb = Signal(64)
485 comb += new_tb.eq(fast_r_dectb.o_data + 1)
486 comb += fast_w_dectb.addr.eq(FastRegs.TB)
487 comb += fast_w_dectb.wen.eq(1)
488 comb += fast_w_dectb.i_data.eq(new_tb)
489 m.next = "DEC_READ"
490
491 return m
492
493 def __iter__(self):
494 yield from self.pc_i.ports()
495 yield from self.msr_i.ports()
496 yield self.pc_o
497 yield self.memerr_o
498 yield from self.core.ports()
499 yield from self.imem.ports()
500 yield self.core_bigendian_i
501 yield self.busy_o
502
503 def ports(self):
504 return list(self)
505
506 def external_ports(self):
507 ports = self.pc_i.ports()
508 ports = self.msr_i.ports()
509 ports += [self.pc_o, self.memerr_o, self.core_bigendian_i, self.busy_o,
510 ]
511
512 if self.jtag_en:
513 ports += list(self.jtag.external_ports())
514 else:
515 # don't add DMI if JTAG is enabled
516 ports += list(self.dbg.dmi.ports())
517
518 ports += list(self.imem.ibus.fields.values())
519 ports += list(self.core.l0.cmpi.wb_bus().fields.values())
520
521 if self.sram4x4k:
522 for sram in self.sram4k:
523 ports += list(sram.bus.fields.values())
524
525 if self.xics:
526 ports += list(self.xics_icp.bus.fields.values())
527 ports += list(self.xics_ics.bus.fields.values())
528 ports.append(self.int_level_i)
529
530 if self.gpio:
531 ports += list(self.simple_gpio.bus.fields.values())
532 ports.append(self.gpio_o)
533
534 return ports
535
536 def ports(self):
537 return list(self)
538
539
540
541 # Fetch Finite State Machine.
542 # WARNING: there are currently DriverConflicts but it's actually working.
543 # TODO, here: everything that is global in nature, information from the
544 # main TestIssuerInternal, needs to move to either ispec() or ospec().
545 # not only that: TestIssuerInternal.imem can entirely move into here
546 # because imem is only ever accessed inside the FetchFSM.
547 class FetchFSM(ControlBase):
548 def __init__(self, allow_overlap, svp64_en, imem, core_rst,
549 pdecode2, cur_state,
550 dbg, core, svstate, nia, is_svp64_mode):
551 self.allow_overlap = allow_overlap
552 self.svp64_en = svp64_en
553 self.imem = imem
554 self.core_rst = core_rst
555 self.pdecode2 = pdecode2
556 self.cur_state = cur_state
557 self.dbg = dbg
558 self.core = core
559 self.svstate = svstate
560 self.nia = nia
561 self.is_svp64_mode = is_svp64_mode
562
563 # set up pipeline ControlBase and allocate i/o specs
564 # (unusual: normally done by the Pipeline API)
565 super().__init__(stage=self)
566 self.p.i_data, self.n.o_data = self.new_specs(None)
567 self.i, self.o = self.p.i_data, self.n.o_data
568
569 # next 3 functions are Stage API Compliance
570 def setup(self, m, i):
571 pass
572
573 def ispec(self):
574 return FetchInput()
575
576 def ospec(self):
577 return FetchOutput()
578
579 def elaborate(self, platform):
580 """fetch FSM
581
582 this FSM performs fetch of raw instruction data, partial-decodes
583 it 32-bit at a time to detect SVP64 prefixes, and will optionally
584 read a 2nd 32-bit quantity if that occurs.
585 """
586 m = super().elaborate(platform)
587
588 dbg = self.dbg
589 core = self.core
590 pc = self.i.pc
591 msr = self.i.msr
592 svstate = self.svstate
593 nia = self.nia
594 is_svp64_mode = self.is_svp64_mode
595 fetch_pc_o_ready = self.p.o_ready
596 fetch_pc_i_valid = self.p.i_valid
597 fetch_insn_o_valid = self.n.o_valid
598 fetch_insn_i_ready = self.n.i_ready
599
600 comb = m.d.comb
601 sync = m.d.sync
602 pdecode2 = self.pdecode2
603 cur_state = self.cur_state
604 dec_opcode_o = pdecode2.dec.raw_opcode_in # raw opcode
605
606 # also note instruction fetch failed
607 if hasattr(core, "icache"):
608 fetch_failed = core.icache.i_out.fetch_failed
609 flush_needed = True
610 else:
611 fetch_failed = Const(0, 1)
612 flush_needed = False
613
614 with m.FSM(name='fetch_fsm'):
615
616 # waiting (zzz)
617 with m.State("IDLE"):
618 with m.If(~dbg.stopping_o & ~fetch_failed):
619 comb += fetch_pc_o_ready.eq(1)
620 with m.If(fetch_pc_i_valid & ~fetch_failed):
621 # instruction allowed to go: start by reading the PC
622 # capture the PC and also drop it into Insn Memory
623 # we have joined a pair of combinatorial memory
624 # lookups together. this is Generally Bad.
625 comb += self.imem.a_pc_i.eq(pc)
626 comb += self.imem.a_i_valid.eq(1)
627 comb += self.imem.f_i_valid.eq(1)
628 sync += cur_state.pc.eq(pc)
629 sync += cur_state.svstate.eq(svstate) # and svstate
630 sync += cur_state.msr.eq(msr) # and msr
631
632 m.next = "INSN_READ" # move to "wait for bus" phase
633
634 # dummy pause to find out why simulation is not keeping up
635 with m.State("INSN_READ"):
636 if self.allow_overlap:
637 stopping = dbg.stopping_o
638 else:
639 stopping = Const(0)
640 with m.If(stopping):
641 # stopping: jump back to idle
642 m.next = "IDLE"
643 with m.Else():
644 with m.If(self.imem.f_busy_o & ~fetch_failed): # zzz...
645 # busy but not fetch failed: stay in wait-read
646 comb += self.imem.a_i_valid.eq(1)
647 comb += self.imem.f_i_valid.eq(1)
648 with m.Else():
649 # not busy (or fetch failed!): instruction fetched
650 # when fetch failed, the instruction gets ignored
651 # by the decoder
652 insn = get_insn(self.imem.f_instr_o, cur_state.pc)
653 if self.svp64_en:
654 svp64 = self.svp64
655 # decode the SVP64 prefix, if any
656 comb += svp64.raw_opcode_in.eq(insn)
657 comb += svp64.bigendian.eq(self.core_bigendian_i)
658 # pass the decoded prefix (if any) to PowerDecoder2
659 sync += pdecode2.sv_rm.eq(svp64.svp64_rm)
660 sync += pdecode2.is_svp64_mode.eq(is_svp64_mode)
661 # remember whether this is a prefixed instruction,
662 # so the FSM can readily loop when VL==0
663 sync += is_svp64_mode.eq(svp64.is_svp64_mode)
664 # calculate the address of the following instruction
665 insn_size = Mux(svp64.is_svp64_mode, 8, 4)
666 sync += nia.eq(cur_state.pc + insn_size)
667 with m.If(~svp64.is_svp64_mode):
668 # with no prefix, store the instruction
669 # and hand it directly to the next FSM
670 sync += dec_opcode_o.eq(insn)
671 m.next = "INSN_READY"
672 with m.Else():
673 # fetch the rest of the instruction from memory
674 comb += self.imem.a_pc_i.eq(cur_state.pc + 4)
675 comb += self.imem.a_i_valid.eq(1)
676 comb += self.imem.f_i_valid.eq(1)
677 m.next = "INSN_READ2"
678 else:
679 # not SVP64 - 32-bit only
680 sync += nia.eq(cur_state.pc + 4)
681 sync += dec_opcode_o.eq(insn)
682 m.next = "INSN_READY"
683
684 with m.State("INSN_READ2"):
685 with m.If(self.imem.f_busy_o): # zzz...
686 # busy: stay in wait-read
687 comb += self.imem.a_i_valid.eq(1)
688 comb += self.imem.f_i_valid.eq(1)
689 with m.Else():
690 # not busy: instruction fetched
691 insn = get_insn(self.imem.f_instr_o, cur_state.pc+4)
692 sync += dec_opcode_o.eq(insn)
693 m.next = "INSN_READY"
694 # TODO: probably can start looking at pdecode2.rm_dec
695 # here or maybe even in INSN_READ state, if svp64_mode
696 # detected, in order to trigger - and wait for - the
697 # predicate reading.
698 if self.svp64_en:
699 pmode = pdecode2.rm_dec.predmode
700 """
701 if pmode != SVP64PredMode.ALWAYS.value:
702 fire predicate loading FSM and wait before
703 moving to INSN_READY
704 else:
705 sync += self.srcmask.eq(-1) # set to all 1s
706 sync += self.dstmask.eq(-1) # set to all 1s
707 m.next = "INSN_READY"
708 """
709
710 with m.State("INSN_READY"):
711 # hand over the instruction, to be decoded
712 comb += fetch_insn_o_valid.eq(1)
713 with m.If(fetch_insn_i_ready):
714 m.next = "IDLE"
715
716 # whatever was done above, over-ride it if core reset is held
717 with m.If(self.core_rst):
718 sync += nia.eq(0)
719
720 return m
721
722
723 class TestIssuerInternal(TestIssuerBase, Elaboratable):
724 """TestIssuer - reads instructions from TestMemory and issues them
725
726 efficiency and speed is not the main goal here: functional correctness
727 and code clarity is. optimisations (which almost 100% interfere with
728 easy understanding) come later.
729 """
730
731 def fetch_predicate_fsm(self, m,
732 pred_insn_i_valid, pred_insn_o_ready,
733 pred_mask_o_valid, pred_mask_i_ready):
734 """fetch_predicate_fsm - obtains (constructs in the case of CR)
735 src/dest predicate masks
736
737 https://bugs.libre-soc.org/show_bug.cgi?id=617
738 the predicates can be read here, by using IntRegs r_ports['pred']
739 or CRRegs r_ports['pred']. in the case of CRs it will have to
740 be done through multiple reads, extracting one relevant at a time.
741 later, a faster way would be to use the 32-bit-wide CR port but
742 this is more complex decoding, here. equivalent code used in
743 ISACaller is "from openpower.decoder.isa.caller import get_predcr"
744
745 note: this ENTIRE FSM is not to be called when svp64 is disabled
746 """
747 comb = m.d.comb
748 sync = m.d.sync
749 pdecode2 = self.pdecode2
750 rm_dec = pdecode2.rm_dec # SVP64RMModeDecode
751 predmode = rm_dec.predmode
752 srcpred, dstpred = rm_dec.srcpred, rm_dec.dstpred
753 cr_pred, int_pred = self.cr_pred, self.int_pred # read regfiles
754 # get src/dst step, so we can skip already used mask bits
755 cur_state = self.cur_state
756 srcstep = cur_state.svstate.srcstep
757 dststep = cur_state.svstate.dststep
758 cur_vl = cur_state.svstate.vl
759
760 # decode predicates
761 sregread, sinvert, sunary, sall1s = get_predint(m, srcpred, 's')
762 dregread, dinvert, dunary, dall1s = get_predint(m, dstpred, 'd')
763 sidx, scrinvert = get_predcr(m, srcpred, 's')
764 didx, dcrinvert = get_predcr(m, dstpred, 'd')
765
766 # store fetched masks, for either intpred or crpred
767 # when src/dst step is not zero, the skipped mask bits need to be
768 # shifted-out, before actually storing them in src/dest mask
769 new_srcmask = Signal(64, reset_less=True)
770 new_dstmask = Signal(64, reset_less=True)
771
772 with m.FSM(name="fetch_predicate"):
773
774 with m.State("FETCH_PRED_IDLE"):
775 comb += pred_insn_o_ready.eq(1)
776 with m.If(pred_insn_i_valid):
777 with m.If(predmode == SVP64PredMode.INT):
778 # skip fetching destination mask register, when zero
779 with m.If(dall1s):
780 sync += new_dstmask.eq(-1)
781 # directly go to fetch source mask register
782 # guaranteed not to be zero (otherwise predmode
783 # would be SVP64PredMode.ALWAYS, not INT)
784 comb += int_pred.addr.eq(sregread)
785 comb += int_pred.ren.eq(1)
786 m.next = "INT_SRC_READ"
787 # fetch destination predicate register
788 with m.Else():
789 comb += int_pred.addr.eq(dregread)
790 comb += int_pred.ren.eq(1)
791 m.next = "INT_DST_READ"
792 with m.Elif(predmode == SVP64PredMode.CR):
793 # go fetch masks from the CR register file
794 sync += new_srcmask.eq(0)
795 sync += new_dstmask.eq(0)
796 m.next = "CR_READ"
797 with m.Else():
798 sync += self.srcmask.eq(-1)
799 sync += self.dstmask.eq(-1)
800 m.next = "FETCH_PRED_DONE"
801
802 with m.State("INT_DST_READ"):
803 # store destination mask
804 inv = Repl(dinvert, 64)
805 with m.If(dunary):
806 # set selected mask bit for 1<<r3 mode
807 dst_shift = Signal(range(64))
808 comb += dst_shift.eq(self.int_pred.o_data & 0b111111)
809 sync += new_dstmask.eq(1 << dst_shift)
810 with m.Else():
811 # invert mask if requested
812 sync += new_dstmask.eq(self.int_pred.o_data ^ inv)
813 # skip fetching source mask register, when zero
814 with m.If(sall1s):
815 sync += new_srcmask.eq(-1)
816 m.next = "FETCH_PRED_SHIFT_MASK"
817 # fetch source predicate register
818 with m.Else():
819 comb += int_pred.addr.eq(sregread)
820 comb += int_pred.ren.eq(1)
821 m.next = "INT_SRC_READ"
822
823 with m.State("INT_SRC_READ"):
824 # store source mask
825 inv = Repl(sinvert, 64)
826 with m.If(sunary):
827 # set selected mask bit for 1<<r3 mode
828 src_shift = Signal(range(64))
829 comb += src_shift.eq(self.int_pred.o_data & 0b111111)
830 sync += new_srcmask.eq(1 << src_shift)
831 with m.Else():
832 # invert mask if requested
833 sync += new_srcmask.eq(self.int_pred.o_data ^ inv)
834 m.next = "FETCH_PRED_SHIFT_MASK"
835
836 # fetch masks from the CR register file
837 # implements the following loop:
838 # idx, inv = get_predcr(mask)
839 # mask = 0
840 # for cr_idx in range(vl):
841 # cr = crl[cr_idx + SVP64CROffs.CRPred] # takes one cycle
842 # if cr[idx] ^ inv:
843 # mask |= 1 << cr_idx
844 # return mask
845 with m.State("CR_READ"):
846 # CR index to be read, which will be ready by the next cycle
847 cr_idx = Signal.like(cur_vl, reset_less=True)
848 # submit the read operation to the regfile
849 with m.If(cr_idx != cur_vl):
850 # the CR read port is unary ...
851 # ren = 1 << cr_idx
852 # ... in MSB0 convention ...
853 # ren = 1 << (7 - cr_idx)
854 # ... and with an offset:
855 # ren = 1 << (7 - off - cr_idx)
856 idx = SVP64CROffs.CRPred + cr_idx
857 comb += cr_pred.ren.eq(1 << (7 - idx))
858 # signal data valid in the next cycle
859 cr_read = Signal(reset_less=True)
860 sync += cr_read.eq(1)
861 # load the next index
862 sync += cr_idx.eq(cr_idx + 1)
863 with m.Else():
864 # exit on loop end
865 sync += cr_read.eq(0)
866 sync += cr_idx.eq(0)
867 m.next = "FETCH_PRED_SHIFT_MASK"
868 with m.If(cr_read):
869 # compensate for the one cycle delay on the regfile
870 cur_cr_idx = Signal.like(cur_vl)
871 comb += cur_cr_idx.eq(cr_idx - 1)
872 # read the CR field, select the appropriate bit
873 cr_field = Signal(4)
874 scr_bit = Signal()
875 dcr_bit = Signal()
876 comb += cr_field.eq(cr_pred.o_data)
877 comb += scr_bit.eq(cr_field.bit_select(sidx, 1)
878 ^ scrinvert)
879 comb += dcr_bit.eq(cr_field.bit_select(didx, 1)
880 ^ dcrinvert)
881 # set the corresponding mask bit
882 bit_to_set = Signal.like(self.srcmask)
883 comb += bit_to_set.eq(1 << cur_cr_idx)
884 with m.If(scr_bit):
885 sync += new_srcmask.eq(new_srcmask | bit_to_set)
886 with m.If(dcr_bit):
887 sync += new_dstmask.eq(new_dstmask | bit_to_set)
888
889 with m.State("FETCH_PRED_SHIFT_MASK"):
890 # shift-out skipped mask bits
891 sync += self.srcmask.eq(new_srcmask >> srcstep)
892 sync += self.dstmask.eq(new_dstmask >> dststep)
893 m.next = "FETCH_PRED_DONE"
894
895 with m.State("FETCH_PRED_DONE"):
896 comb += pred_mask_o_valid.eq(1)
897 with m.If(pred_mask_i_ready):
898 m.next = "FETCH_PRED_IDLE"
899
900 def issue_fsm(self, m, core, msr_changed, pc_changed, sv_changed, nia,
901 dbg, core_rst, is_svp64_mode,
902 fetch_pc_o_ready, fetch_pc_i_valid,
903 fetch_insn_o_valid, fetch_insn_i_ready,
904 pred_insn_i_valid, pred_insn_o_ready,
905 pred_mask_o_valid, pred_mask_i_ready,
906 exec_insn_i_valid, exec_insn_o_ready,
907 exec_pc_o_valid, exec_pc_i_ready):
908 """issue FSM
909
910 decode / issue FSM. this interacts with the "fetch" FSM
911 through fetch_insn_ready/valid (incoming) and fetch_pc_ready/valid
912 (outgoing). also interacts with the "execute" FSM
913 through exec_insn_ready/valid (outgoing) and exec_pc_ready/valid
914 (incoming).
915 SVP64 RM prefixes have already been set up by the
916 "fetch" phase, so execute is fairly straightforward.
917 """
918
919 comb = m.d.comb
920 sync = m.d.sync
921 pdecode2 = self.pdecode2
922 cur_state = self.cur_state
923
924 # temporaries
925 dec_opcode_i = pdecode2.dec.raw_opcode_in # raw opcode
926
927 # for updating svstate (things like srcstep etc.)
928 update_svstate = Signal() # set this (below) if updating
929 new_svstate = SVSTATERec("new_svstate")
930 comb += new_svstate.eq(cur_state.svstate)
931
932 # precalculate srcstep+1 and dststep+1
933 cur_srcstep = cur_state.svstate.srcstep
934 cur_dststep = cur_state.svstate.dststep
935 next_srcstep = Signal.like(cur_srcstep)
936 next_dststep = Signal.like(cur_dststep)
937 comb += next_srcstep.eq(cur_state.svstate.srcstep+1)
938 comb += next_dststep.eq(cur_state.svstate.dststep+1)
939
940 # note if an exception happened. in a pipelined or OoO design
941 # this needs to be accompanied by "shadowing" (or stalling)
942 exc_happened = self.core.o.exc_happened
943 # also note instruction fetch failed
944 if hasattr(core, "icache"):
945 fetch_failed = core.icache.i_out.fetch_failed
946 flush_needed = True
947 # set to fault in decoder
948 # update (highest priority) instruction fault
949 rising_fetch_failed = rising_edge(m, fetch_failed)
950 with m.If(rising_fetch_failed):
951 sync += pdecode2.instr_fault.eq(1)
952 else:
953 fetch_failed = Const(0, 1)
954 flush_needed = False
955
956 with m.FSM(name="issue_fsm"):
957
958 # sync with the "fetch" phase which is reading the instruction
959 # at this point, there is no instruction running, that
960 # could inadvertently update the PC.
961 with m.State("ISSUE_START"):
962 # reset instruction fault
963 sync += pdecode2.instr_fault.eq(0)
964 # wait on "core stop" release, before next fetch
965 # need to do this here, in case we are in a VL==0 loop
966 with m.If(~dbg.core_stop_o & ~core_rst):
967 comb += fetch_pc_i_valid.eq(1) # tell fetch to start
968 with m.If(fetch_pc_o_ready): # fetch acknowledged us
969 m.next = "INSN_WAIT"
970 with m.Else():
971 # tell core it's stopped, and acknowledge debug handshake
972 comb += dbg.core_stopped_i.eq(1)
973 # while stopped, allow updating the MSR, PC and SVSTATE
974 with m.If(self.pc_i.ok):
975 comb += self.state_w_pc.wen.eq(1 << StateRegs.PC)
976 comb += self.state_w_pc.i_data.eq(self.pc_i.data)
977 sync += pc_changed.eq(1)
978 with m.If(self.msr_i.ok):
979 comb += self.state_w_msr.wen.eq(1 << StateRegs.MSR)
980 comb += self.state_w_msr.i_data.eq(self.msr_i.data)
981 sync += msr_changed.eq(1)
982 with m.If(self.svstate_i.ok):
983 comb += new_svstate.eq(self.svstate_i.data)
984 comb += update_svstate.eq(1)
985 sync += sv_changed.eq(1)
986
987 # wait for an instruction to arrive from Fetch
988 with m.State("INSN_WAIT"):
989 if self.allow_overlap:
990 stopping = dbg.stopping_o
991 else:
992 stopping = Const(0)
993 with m.If(stopping):
994 # stopping: jump back to idle
995 m.next = "ISSUE_START"
996 if flush_needed:
997 # request the icache to stop asserting "failed"
998 comb += core.icache.flush_in.eq(1)
999 # stop instruction fault
1000 sync += pdecode2.instr_fault.eq(0)
1001 with m.Else():
1002 comb += fetch_insn_i_ready.eq(1)
1003 with m.If(fetch_insn_o_valid):
1004 # loop into ISSUE_START if it's a SVP64 instruction
1005 # and VL == 0. this because VL==0 is a for-loop
1006 # from 0 to 0 i.e. always, always a NOP.
1007 cur_vl = cur_state.svstate.vl
1008 with m.If(is_svp64_mode & (cur_vl == 0)):
1009 # update the PC before fetching the next instruction
1010 # since we are in a VL==0 loop, no instruction was
1011 # executed that we could be overwriting
1012 comb += self.state_w_pc.wen.eq(1 << StateRegs.PC)
1013 comb += self.state_w_pc.i_data.eq(nia)
1014 comb += self.insn_done.eq(1)
1015 m.next = "ISSUE_START"
1016 with m.Else():
1017 if self.svp64_en:
1018 m.next = "PRED_START" # fetching predicate
1019 else:
1020 m.next = "DECODE_SV" # skip predication
1021
1022 with m.State("PRED_START"):
1023 comb += pred_insn_i_valid.eq(1) # tell fetch_pred to start
1024 with m.If(pred_insn_o_ready): # fetch_pred acknowledged us
1025 m.next = "MASK_WAIT"
1026
1027 with m.State("MASK_WAIT"):
1028 comb += pred_mask_i_ready.eq(1) # ready to receive the masks
1029 with m.If(pred_mask_o_valid): # predication masks are ready
1030 m.next = "PRED_SKIP"
1031
1032 # skip zeros in predicate
1033 with m.State("PRED_SKIP"):
1034 with m.If(~is_svp64_mode):
1035 m.next = "DECODE_SV" # nothing to do
1036 with m.Else():
1037 if self.svp64_en:
1038 pred_src_zero = pdecode2.rm_dec.pred_sz
1039 pred_dst_zero = pdecode2.rm_dec.pred_dz
1040
1041 # new srcstep, after skipping zeros
1042 skip_srcstep = Signal.like(cur_srcstep)
1043 # value to be added to the current srcstep
1044 src_delta = Signal.like(cur_srcstep)
1045 # add leading zeros to srcstep, if not in zero mode
1046 with m.If(~pred_src_zero):
1047 # priority encoder (count leading zeros)
1048 # append guard bit, in case the mask is all zeros
1049 pri_enc_src = PriorityEncoder(65)
1050 m.submodules.pri_enc_src = pri_enc_src
1051 comb += pri_enc_src.i.eq(Cat(self.srcmask,
1052 Const(1, 1)))
1053 comb += src_delta.eq(pri_enc_src.o)
1054 # apply delta to srcstep
1055 comb += skip_srcstep.eq(cur_srcstep + src_delta)
1056 # shift-out all leading zeros from the mask
1057 # plus the leading "one" bit
1058 # TODO count leading zeros and shift-out the zero
1059 # bits, in the same step, in hardware
1060 sync += self.srcmask.eq(self.srcmask >> (src_delta+1))
1061
1062 # same as above, but for dststep
1063 skip_dststep = Signal.like(cur_dststep)
1064 dst_delta = Signal.like(cur_dststep)
1065 with m.If(~pred_dst_zero):
1066 pri_enc_dst = PriorityEncoder(65)
1067 m.submodules.pri_enc_dst = pri_enc_dst
1068 comb += pri_enc_dst.i.eq(Cat(self.dstmask,
1069 Const(1, 1)))
1070 comb += dst_delta.eq(pri_enc_dst.o)
1071 comb += skip_dststep.eq(cur_dststep + dst_delta)
1072 sync += self.dstmask.eq(self.dstmask >> (dst_delta+1))
1073
1074 # TODO: initialize mask[VL]=1 to avoid passing past VL
1075 with m.If((skip_srcstep >= cur_vl) |
1076 (skip_dststep >= cur_vl)):
1077 # end of VL loop. Update PC and reset src/dst step
1078 comb += self.state_w_pc.wen.eq(1 << StateRegs.PC)
1079 comb += self.state_w_pc.i_data.eq(nia)
1080 comb += new_svstate.srcstep.eq(0)
1081 comb += new_svstate.dststep.eq(0)
1082 comb += update_svstate.eq(1)
1083 # synchronize with the simulator
1084 comb += self.insn_done.eq(1)
1085 # go back to Issue
1086 m.next = "ISSUE_START"
1087 with m.Else():
1088 # update new src/dst step
1089 comb += new_svstate.srcstep.eq(skip_srcstep)
1090 comb += new_svstate.dststep.eq(skip_dststep)
1091 comb += update_svstate.eq(1)
1092 # proceed to Decode
1093 m.next = "DECODE_SV"
1094
1095 # pass predicate mask bits through to satellite decoders
1096 # TODO: for SIMD this will be *multiple* bits
1097 sync += core.i.sv_pred_sm.eq(self.srcmask[0])
1098 sync += core.i.sv_pred_dm.eq(self.dstmask[0])
1099
1100 # after src/dst step have been updated, we are ready
1101 # to decode the instruction
1102 with m.State("DECODE_SV"):
1103 # decode the instruction
1104 with m.If(~fetch_failed):
1105 sync += pdecode2.instr_fault.eq(0)
1106 sync += core.i.e.eq(pdecode2.e)
1107 sync += core.i.state.eq(cur_state)
1108 sync += core.i.raw_insn_i.eq(dec_opcode_i)
1109 sync += core.i.bigendian_i.eq(self.core_bigendian_i)
1110 if self.svp64_en:
1111 sync += core.i.sv_rm.eq(pdecode2.sv_rm)
1112 # set RA_OR_ZERO detection in satellite decoders
1113 sync += core.i.sv_a_nz.eq(pdecode2.sv_a_nz)
1114 # and svp64 detection
1115 sync += core.i.is_svp64_mode.eq(is_svp64_mode)
1116 # and svp64 bit-rev'd ldst mode
1117 ldst_dec = pdecode2.use_svp64_ldst_dec
1118 sync += core.i.use_svp64_ldst_dec.eq(ldst_dec)
1119 # after decoding, reset any previous exception condition,
1120 # allowing it to be set again during the next execution
1121 sync += pdecode2.ldst_exc.eq(0)
1122
1123 m.next = "INSN_EXECUTE" # move to "execute"
1124
1125 # handshake with execution FSM, move to "wait" once acknowledged
1126 with m.State("INSN_EXECUTE"):
1127 comb += exec_insn_i_valid.eq(1) # trigger execute
1128 with m.If(exec_insn_o_ready): # execute acknowledged us
1129 m.next = "EXECUTE_WAIT"
1130
1131 with m.State("EXECUTE_WAIT"):
1132 # wait on "core stop" release, at instruction end
1133 # need to do this here, in case we are in a VL>1 loop
1134 with m.If(~dbg.core_stop_o & ~core_rst):
1135 comb += exec_pc_i_ready.eq(1)
1136 # see https://bugs.libre-soc.org/show_bug.cgi?id=636
1137 # the exception info needs to be blatted into
1138 # pdecode.ldst_exc, and the instruction "re-run".
1139 # when ldst_exc.happened is set, the PowerDecoder2
1140 # reacts very differently: it re-writes the instruction
1141 # with a "trap" (calls PowerDecoder2.trap()) which
1142 # will *overwrite* whatever was requested and jump the
1143 # PC to the exception address, as well as alter MSR.
1144 # nothing else needs to be done other than to note
1145 # the change of PC and MSR (and, later, SVSTATE)
1146 with m.If(exc_happened):
1147 mmu = core.fus.get_exc("mmu0")
1148 ldst = core.fus.get_exc("ldst0")
1149 if mmu is not None:
1150 with m.If(fetch_failed):
1151 # instruction fetch: exception is from MMU
1152 # reset instr_fault (highest priority)
1153 sync += pdecode2.ldst_exc.eq(mmu)
1154 sync += pdecode2.instr_fault.eq(0)
1155 if flush_needed:
1156 # request icache to stop asserting "failed"
1157 comb += core.icache.flush_in.eq(1)
1158 with m.If(~fetch_failed):
1159 # otherwise assume it was a LDST exception
1160 sync += pdecode2.ldst_exc.eq(ldst)
1161
1162 with m.If(exec_pc_o_valid):
1163
1164 # was this the last loop iteration?
1165 is_last = Signal()
1166 cur_vl = cur_state.svstate.vl
1167 comb += is_last.eq(next_srcstep == cur_vl)
1168
1169 # return directly to Decode if Execute generated an
1170 # exception.
1171 with m.If(pdecode2.ldst_exc.happened):
1172 m.next = "DECODE_SV"
1173
1174 # if MSR, PC or SVSTATE were changed by the previous
1175 # instruction, go directly back to Fetch, without
1176 # updating either MSR PC or SVSTATE
1177 with m.Elif(msr_changed | pc_changed | sv_changed):
1178 m.next = "ISSUE_START"
1179
1180 # also return to Fetch, when no output was a vector
1181 # (regardless of SRCSTEP and VL), or when the last
1182 # instruction was really the last one of the VL loop
1183 with m.Elif((~pdecode2.loop_continue) | is_last):
1184 # before going back to fetch, update the PC state
1185 # register with the NIA.
1186 # ok here we are not reading the branch unit.
1187 # TODO: this just blithely overwrites whatever
1188 # pipeline updated the PC
1189 comb += self.state_w_pc.wen.eq(1 << StateRegs.PC)
1190 comb += self.state_w_pc.i_data.eq(nia)
1191 # reset SRCSTEP before returning to Fetch
1192 if self.svp64_en:
1193 with m.If(pdecode2.loop_continue):
1194 comb += new_svstate.srcstep.eq(0)
1195 comb += new_svstate.dststep.eq(0)
1196 comb += update_svstate.eq(1)
1197 else:
1198 comb += new_svstate.srcstep.eq(0)
1199 comb += new_svstate.dststep.eq(0)
1200 comb += update_svstate.eq(1)
1201 m.next = "ISSUE_START"
1202
1203 # returning to Execute? then, first update SRCSTEP
1204 with m.Else():
1205 comb += new_svstate.srcstep.eq(next_srcstep)
1206 comb += new_svstate.dststep.eq(next_dststep)
1207 comb += update_svstate.eq(1)
1208 # return to mask skip loop
1209 m.next = "PRED_SKIP"
1210
1211 with m.Else():
1212 comb += dbg.core_stopped_i.eq(1)
1213 if flush_needed:
1214 # request the icache to stop asserting "failed"
1215 comb += core.icache.flush_in.eq(1)
1216 # stop instruction fault
1217 sync += pdecode2.instr_fault.eq(0)
1218 if flush_needed:
1219 # request the icache to stop asserting "failed"
1220 comb += core.icache.flush_in.eq(1)
1221 # stop instruction fault
1222 sync += pdecode2.instr_fault.eq(0)
1223 # while stopped, allow updating the MSR, PC and SVSTATE
1224 with m.If(self.msr_i.ok):
1225 comb += self.state_w_msr.wen.eq(1 << StateRegs.MSR)
1226 comb += self.state_w_msr.i_data.eq(self.msr_i.data)
1227 sync += msr_changed.eq(1)
1228 with m.If(self.pc_i.ok):
1229 comb += self.state_w_pc.wen.eq(1 << StateRegs.PC)
1230 comb += self.state_w_pc.i_data.eq(self.pc_i.data)
1231 sync += pc_changed.eq(1)
1232 with m.If(self.svstate_i.ok):
1233 comb += new_svstate.eq(self.svstate_i.data)
1234 comb += update_svstate.eq(1)
1235 sync += sv_changed.eq(1)
1236
1237 # check if svstate needs updating: if so, write it to State Regfile
1238 with m.If(update_svstate):
1239 comb += self.state_w_sv.wen.eq(1 << StateRegs.SVSTATE)
1240 comb += self.state_w_sv.i_data.eq(new_svstate)
1241 sync += cur_state.svstate.eq(new_svstate) # for next clock
1242
1243 def execute_fsm(self, m, core, msr_changed, pc_changed, sv_changed,
1244 exec_insn_i_valid, exec_insn_o_ready,
1245 exec_pc_o_valid, exec_pc_i_ready):
1246 """execute FSM
1247
1248 execute FSM. this interacts with the "issue" FSM
1249 through exec_insn_ready/valid (incoming) and exec_pc_ready/valid
1250 (outgoing). SVP64 RM prefixes have already been set up by the
1251 "issue" phase, so execute is fairly straightforward.
1252 """
1253
1254 comb = m.d.comb
1255 sync = m.d.sync
1256 pdecode2 = self.pdecode2
1257
1258 # temporaries
1259 core_busy_o = core.n.o_data.busy_o # core is busy
1260 core_ivalid_i = core.p.i_valid # instruction is valid
1261
1262 if hasattr(core, "icache"):
1263 fetch_failed = core.icache.i_out.fetch_failed
1264 else:
1265 fetch_failed = Const(0, 1)
1266
1267 with m.FSM(name="exec_fsm"):
1268
1269 # waiting for instruction bus (stays there until not busy)
1270 with m.State("INSN_START"):
1271 comb += exec_insn_o_ready.eq(1)
1272 with m.If(exec_insn_i_valid):
1273 comb += core_ivalid_i.eq(1) # instruction is valid/issued
1274 sync += sv_changed.eq(0)
1275 sync += pc_changed.eq(0)
1276 sync += msr_changed.eq(0)
1277 with m.If(core.p.o_ready): # only move if accepted
1278 m.next = "INSN_ACTIVE" # move to "wait completion"
1279
1280 # instruction started: must wait till it finishes
1281 with m.State("INSN_ACTIVE"):
1282 # note changes to MSR, PC and SVSTATE
1283 with m.If(self.state_nia.wen & (1 << StateRegs.SVSTATE)):
1284 sync += sv_changed.eq(1)
1285 with m.If(self.state_nia.wen & (1 << StateRegs.MSR)):
1286 sync += msr_changed.eq(1)
1287 with m.If(self.state_nia.wen & (1 << StateRegs.PC)):
1288 sync += pc_changed.eq(1)
1289 with m.If(~core_busy_o): # instruction done!
1290 comb += exec_pc_o_valid.eq(1)
1291 with m.If(exec_pc_i_ready):
1292 # when finished, indicate "done".
1293 # however, if there was an exception, the instruction
1294 # is *not* yet done. this is an implementation
1295 # detail: we choose to implement exceptions by
1296 # taking the exception information from the LDST
1297 # unit, putting that *back* into the PowerDecoder2,
1298 # and *re-running the entire instruction*.
1299 # if we erroneously indicate "done" here, it is as if
1300 # there were *TWO* instructions:
1301 # 1) the failed LDST 2) a TRAP.
1302 with m.If(~pdecode2.ldst_exc.happened &
1303 ~fetch_failed):
1304 comb += self.insn_done.eq(1)
1305 m.next = "INSN_START" # back to fetch
1306
1307 def elaborate(self, platform):
1308 m = Module()
1309 # convenience
1310 comb, sync = m.d.comb, m.d.sync
1311 cur_state = self.cur_state
1312 pdecode2 = self.pdecode2
1313 dbg = self.dbg
1314 core = self.core
1315
1316 # set up peripherals and core
1317 core_rst = self.core_rst
1318 self.setup_peripherals(m)
1319
1320 # reset current state if core reset requested
1321 with m.If(core_rst):
1322 m.d.sync += self.cur_state.eq(0)
1323
1324 # PC and instruction from I-Memory
1325 comb += self.pc_o.eq(cur_state.pc)
1326 pc_changed = Signal() # note write to PC
1327 msr_changed = Signal() # note write to MSR
1328 sv_changed = Signal() # note write to SVSTATE
1329
1330 # indicate to outside world if any FU is still executing
1331 comb += self.any_busy.eq(core.n.o_data.any_busy_o) # any FU executing
1332
1333 # read state either from incoming override or from regfile
1334 state = CoreState("get") # current state (MSR/PC/SVSTATE)
1335 state_get(m, state.msr, core_rst, self.msr_i,
1336 "msr", # read MSR
1337 self.state_r_msr, StateRegs.MSR)
1338 state_get(m, state.pc, core_rst, self.pc_i,
1339 "pc", # read PC
1340 self.state_r_pc, StateRegs.PC)
1341 state_get(m, state.svstate, core_rst, self.svstate_i,
1342 "svstate", # read SVSTATE
1343 self.state_r_sv, StateRegs.SVSTATE)
1344
1345 # don't write pc every cycle
1346 comb += self.state_w_pc.wen.eq(0)
1347 comb += self.state_w_pc.i_data.eq(0)
1348
1349 # address of the next instruction, in the absence of a branch
1350 # depends on the instruction size
1351 nia = Signal(64)
1352
1353 # connect up debug signals
1354 # TODO comb += core.icache_rst_i.eq(dbg.icache_rst_o)
1355 comb += dbg.terminate_i.eq(core.o.core_terminate_o)
1356 comb += dbg.state.eq(state)
1357
1358 # pass the prefix mode from Fetch to Issue, so the latter can loop
1359 # on VL==0
1360 is_svp64_mode = Signal()
1361
1362 # there are *THREE^WFOUR-if-SVP64-enabled* FSMs, fetch (32/64-bit)
1363 # issue, decode/execute, now joined by "Predicate fetch/calculate".
1364 # these are the handshake signals between each
1365
1366 # fetch FSM can run as soon as the PC is valid
1367 fetch_pc_i_valid = Signal() # Execute tells Fetch "start next read"
1368 fetch_pc_o_ready = Signal() # Fetch Tells SVSTATE "proceed"
1369
1370 # fetch FSM hands over the instruction to be decoded / issued
1371 fetch_insn_o_valid = Signal()
1372 fetch_insn_i_ready = Signal()
1373
1374 # predicate fetch FSM decodes and fetches the predicate
1375 pred_insn_i_valid = Signal()
1376 pred_insn_o_ready = Signal()
1377
1378 # predicate fetch FSM delivers the masks
1379 pred_mask_o_valid = Signal()
1380 pred_mask_i_ready = Signal()
1381
1382 # issue FSM delivers the instruction to the be executed
1383 exec_insn_i_valid = Signal()
1384 exec_insn_o_ready = Signal()
1385
1386 # execute FSM, hands over the PC/SVSTATE back to the issue FSM
1387 exec_pc_o_valid = Signal()
1388 exec_pc_i_ready = Signal()
1389
1390 # the FSMs here are perhaps unusual in that they detect conditions
1391 # then "hold" information, combinatorially, for the core
1392 # (as opposed to using sync - which would be on a clock's delay)
1393 # this includes the actual opcode, valid flags and so on.
1394
1395 # Fetch, then predicate fetch, then Issue, then Execute.
1396 # Issue is where the VL for-loop # lives. the ready/valid
1397 # signalling is used to communicate between the four.
1398
1399 # set up Fetch FSM
1400 fetch = FetchFSM(self.allow_overlap, self.svp64_en,
1401 self.imem, core_rst, pdecode2, cur_state,
1402 dbg, core, state.svstate, nia, is_svp64_mode)
1403 m.submodules.fetch = fetch
1404 # connect up in/out data to existing Signals
1405 comb += fetch.p.i_data.pc.eq(state.pc)
1406 comb += fetch.p.i_data.msr.eq(state.msr)
1407 # and the ready/valid signalling
1408 comb += fetch_pc_o_ready.eq(fetch.p.o_ready)
1409 comb += fetch.p.i_valid.eq(fetch_pc_i_valid)
1410 comb += fetch_insn_o_valid.eq(fetch.n.o_valid)
1411 comb += fetch.n.i_ready.eq(fetch_insn_i_ready)
1412
1413 self.issue_fsm(m, core, msr_changed, pc_changed, sv_changed, nia,
1414 dbg, core_rst, is_svp64_mode,
1415 fetch_pc_o_ready, fetch_pc_i_valid,
1416 fetch_insn_o_valid, fetch_insn_i_ready,
1417 pred_insn_i_valid, pred_insn_o_ready,
1418 pred_mask_o_valid, pred_mask_i_ready,
1419 exec_insn_i_valid, exec_insn_o_ready,
1420 exec_pc_o_valid, exec_pc_i_ready)
1421
1422 if self.svp64_en:
1423 self.fetch_predicate_fsm(m,
1424 pred_insn_i_valid, pred_insn_o_ready,
1425 pred_mask_o_valid, pred_mask_i_ready)
1426
1427 self.execute_fsm(m, core, msr_changed, pc_changed, sv_changed,
1428 exec_insn_i_valid, exec_insn_o_ready,
1429 exec_pc_o_valid, exec_pc_i_ready)
1430
1431 # this bit doesn't have to be in the FSM: connect up to read
1432 # regfiles on demand from DMI
1433 self.do_dmi(m, dbg)
1434
1435 # DEC and TB inc/dec FSM. copy of DEC is put into CoreState,
1436 # (which uses that in PowerDecoder2 to raise 0x900 exception)
1437 self.tb_dec_fsm(m, cur_state.dec)
1438
1439 return m
1440
1441
1442 class TestIssuer(Elaboratable):
1443 def __init__(self, pspec):
1444 self.ti = TestIssuerInternal(pspec)
1445 self.pll = DummyPLL(instance=True)
1446
1447 # PLL direct clock or not
1448 self.pll_en = hasattr(pspec, "use_pll") and pspec.use_pll
1449 if self.pll_en:
1450 self.pll_test_o = Signal(reset_less=True)
1451 self.pll_vco_o = Signal(reset_less=True)
1452 self.clk_sel_i = Signal(2, reset_less=True)
1453 self.ref_clk = ClockSignal() # can't rename it but that's ok
1454 self.pllclk_clk = ClockSignal("pllclk")
1455
1456 def elaborate(self, platform):
1457 m = Module()
1458 comb = m.d.comb
1459
1460 # TestIssuer nominally runs at main clock, actually it is
1461 # all combinatorial internally except for coresync'd components
1462 m.submodules.ti = ti = self.ti
1463
1464 if self.pll_en:
1465 # ClockSelect runs at PLL output internal clock rate
1466 m.submodules.wrappll = pll = self.pll
1467
1468 # add clock domains from PLL
1469 cd_pll = ClockDomain("pllclk")
1470 m.domains += cd_pll
1471
1472 # PLL clock established. has the side-effect of running clklsel
1473 # at the PLL's speed (see DomainRenamer("pllclk") above)
1474 pllclk = self.pllclk_clk
1475 comb += pllclk.eq(pll.clk_pll_o)
1476
1477 # wire up external 24mhz to PLL
1478 #comb += pll.clk_24_i.eq(self.ref_clk)
1479 # output 18 mhz PLL test signal, and analog oscillator out
1480 comb += self.pll_test_o.eq(pll.pll_test_o)
1481 comb += self.pll_vco_o.eq(pll.pll_vco_o)
1482
1483 # input to pll clock selection
1484 comb += pll.clk_sel_i.eq(self.clk_sel_i)
1485
1486 # now wire up ResetSignals. don't mind them being in this domain
1487 pll_rst = ResetSignal("pllclk")
1488 comb += pll_rst.eq(ResetSignal())
1489
1490 # internal clock is set to selector clock-out. has the side-effect of
1491 # running TestIssuer at this speed (see DomainRenamer("intclk") above)
1492 # debug clock runs at coresync internal clock
1493 cd_coresync = ClockDomain("coresync")
1494 #m.domains += cd_coresync
1495 if self.ti.dbg_domain != 'sync':
1496 cd_dbgsync = ClockDomain("dbgsync")
1497 #m.domains += cd_dbgsync
1498 intclk = ClockSignal("coresync")
1499 dbgclk = ClockSignal(self.ti.dbg_domain)
1500 # XXX BYPASS PLL XXX
1501 # XXX BYPASS PLL XXX
1502 # XXX BYPASS PLL XXX
1503 if self.pll_en:
1504 comb += intclk.eq(self.ref_clk)
1505 else:
1506 comb += intclk.eq(ClockSignal())
1507 if self.ti.dbg_domain != 'sync':
1508 dbgclk = ClockSignal(self.ti.dbg_domain)
1509 comb += dbgclk.eq(intclk)
1510
1511 return m
1512
1513 def ports(self):
1514 return list(self.ti.ports()) + list(self.pll.ports()) + \
1515 [ClockSignal(), ResetSignal()]
1516
1517 def external_ports(self):
1518 ports = self.ti.external_ports()
1519 ports.append(ClockSignal())
1520 ports.append(ResetSignal())
1521 if self.pll_en:
1522 ports.append(self.clk_sel_i)
1523 ports.append(self.pll.clk_24_i)
1524 ports.append(self.pll_test_o)
1525 ports.append(self.pll_vco_o)
1526 ports.append(self.pllclk_clk)
1527 ports.append(self.ref_clk)
1528 return ports
1529
1530
1531 if __name__ == '__main__':
1532 units = {'alu': 1, 'cr': 1, 'branch': 1, 'trap': 1, 'logical': 1,
1533 'spr': 1,
1534 'div': 1,
1535 'mul': 1,
1536 'shiftrot': 1
1537 }
1538 pspec = TestMemPspec(ldst_ifacetype='bare_wb',
1539 imem_ifacetype='bare_wb',
1540 addr_wid=48,
1541 mask_wid=8,
1542 reg_wid=64,
1543 units=units)
1544 dut = TestIssuer(pspec)
1545 vl = main(dut, ports=dut.ports(), name="test_issuer")
1546
1547 if len(sys.argv) == 1:
1548 vl = rtlil.convert(dut, ports=dut.external_ports(), name="test_issuer")
1549 with open("test_issuer.il", "w") as f:
1550 f.write(vl)