when an exception happens, if it is a fetch_failed take the
[soc.git] / src / soc / simple / issuer.py
1 """simple core issuer
2
3 not in any way intended for production use. this runs a FSM that:
4
5 * reads the Program Counter from StateRegs
6 * reads an instruction from a fixed-size Test Memory
7 * issues it to the Simple Core
8 * waits for it to complete
9 * increments the PC
10 * does it all over again
11
12 the purpose of this module is to verify the functional correctness
13 of the Function Units in the absolute simplest and clearest possible
14 way, and to at provide something that can be further incrementally
15 improved.
16 """
17
18 from nmigen import (Elaboratable, Module, Signal, ClockSignal, ResetSignal,
19 ClockDomain, DomainRenamer, Mux, Const, Repl, Cat)
20 from nmigen.cli import rtlil
21 from nmigen.cli import main
22 import sys
23
24 from nmutil.singlepipe import ControlBase
25 from soc.simple.core_data import FetchOutput, FetchInput
26
27 from nmigen.lib.coding import PriorityEncoder
28
29 from openpower.decoder.power_decoder import create_pdecode
30 from openpower.decoder.power_decoder2 import PowerDecode2, SVP64PrefixDecoder
31 from openpower.decoder.decode2execute1 import IssuerDecode2ToOperand
32 from openpower.decoder.decode2execute1 import Data
33 from openpower.decoder.power_enums import (MicrOp, SVP64PredInt, SVP64PredCR,
34 SVP64PredMode)
35 from openpower.state import CoreState
36 from openpower.consts import (CR, SVP64CROffs)
37 from soc.experiment.testmem import TestMemory # test only for instructions
38 from soc.regfile.regfiles import StateRegs, FastRegs
39 from soc.simple.core import NonProductionCore
40 from soc.config.test.test_loadstore import TestMemPspec
41 from soc.config.ifetch import ConfigFetchUnit
42 from soc.debug.dmi import CoreDebug, DMIInterface
43 from soc.debug.jtag import JTAG
44 from soc.config.pinouts import get_pinspecs
45 from soc.interrupts.xics import XICS_ICP, XICS_ICS
46 from soc.bus.simple_gpio import SimpleGPIO
47 from soc.bus.SPBlock512W64B8W import SPBlock512W64B8W
48 from soc.clock.select import ClockSelect
49 from soc.clock.dummypll import DummyPLL
50 from openpower.sv.svstate import SVSTATERec
51 from soc.experiment.icache import ICache
52
53 from nmutil.util import rising_edge
54
55
56 def get_insn(f_instr_o, pc):
57 if f_instr_o.width == 32:
58 return f_instr_o
59 else:
60 # 64-bit: bit 2 of pc decides which word to select
61 return f_instr_o.word_select(pc[2], 32)
62
63 # gets state input or reads from state regfile
64
65
66 def state_get(m, core_rst, state_i, name, regfile, regnum):
67 comb = m.d.comb
68 sync = m.d.sync
69 # read the PC
70 res = Signal(64, reset_less=True, name=name)
71 res_ok_delay = Signal(name="%s_ok_delay" % name)
72 with m.If(~core_rst):
73 sync += res_ok_delay.eq(~state_i.ok)
74 with m.If(state_i.ok):
75 # incoming override (start from pc_i)
76 comb += res.eq(state_i.data)
77 with m.Else():
78 # otherwise read StateRegs regfile for PC...
79 comb += regfile.ren.eq(1 << regnum)
80 # ... but on a 1-clock delay
81 with m.If(res_ok_delay):
82 comb += res.eq(regfile.o_data)
83 return res
84
85
86 def get_predint(m, mask, name):
87 """decode SVP64 predicate integer mask field to reg number and invert
88 this is identical to the equivalent function in ISACaller except that
89 it doesn't read the INT directly, it just decodes "what needs to be done"
90 i.e. which INT reg, whether it is shifted and whether it is bit-inverted.
91
92 * all1s is set to indicate that no mask is to be applied.
93 * regread indicates the GPR register number to be read
94 * invert is set to indicate that the register value is to be inverted
95 * unary indicates that the contents of the register is to be shifted 1<<r3
96 """
97 comb = m.d.comb
98 regread = Signal(5, name=name+"regread")
99 invert = Signal(name=name+"invert")
100 unary = Signal(name=name+"unary")
101 all1s = Signal(name=name+"all1s")
102 with m.Switch(mask):
103 with m.Case(SVP64PredInt.ALWAYS.value):
104 comb += all1s.eq(1) # use 0b1111 (all ones)
105 with m.Case(SVP64PredInt.R3_UNARY.value):
106 comb += regread.eq(3)
107 comb += unary.eq(1) # 1<<r3 - shift r3 (single bit)
108 with m.Case(SVP64PredInt.R3.value):
109 comb += regread.eq(3)
110 with m.Case(SVP64PredInt.R3_N.value):
111 comb += regread.eq(3)
112 comb += invert.eq(1)
113 with m.Case(SVP64PredInt.R10.value):
114 comb += regread.eq(10)
115 with m.Case(SVP64PredInt.R10_N.value):
116 comb += regread.eq(10)
117 comb += invert.eq(1)
118 with m.Case(SVP64PredInt.R30.value):
119 comb += regread.eq(30)
120 with m.Case(SVP64PredInt.R30_N.value):
121 comb += regread.eq(30)
122 comb += invert.eq(1)
123 return regread, invert, unary, all1s
124
125
126 def get_predcr(m, mask, name):
127 """decode SVP64 predicate CR to reg number field and invert status
128 this is identical to _get_predcr in ISACaller
129 """
130 comb = m.d.comb
131 idx = Signal(2, name=name+"idx")
132 invert = Signal(name=name+"crinvert")
133 with m.Switch(mask):
134 with m.Case(SVP64PredCR.LT.value):
135 comb += idx.eq(CR.LT)
136 comb += invert.eq(0)
137 with m.Case(SVP64PredCR.GE.value):
138 comb += idx.eq(CR.LT)
139 comb += invert.eq(1)
140 with m.Case(SVP64PredCR.GT.value):
141 comb += idx.eq(CR.GT)
142 comb += invert.eq(0)
143 with m.Case(SVP64PredCR.LE.value):
144 comb += idx.eq(CR.GT)
145 comb += invert.eq(1)
146 with m.Case(SVP64PredCR.EQ.value):
147 comb += idx.eq(CR.EQ)
148 comb += invert.eq(0)
149 with m.Case(SVP64PredCR.NE.value):
150 comb += idx.eq(CR.EQ)
151 comb += invert.eq(1)
152 with m.Case(SVP64PredCR.SO.value):
153 comb += idx.eq(CR.SO)
154 comb += invert.eq(0)
155 with m.Case(SVP64PredCR.NS.value):
156 comb += idx.eq(CR.SO)
157 comb += invert.eq(1)
158 return idx, invert
159
160
161 # Fetch Finite State Machine.
162 # WARNING: there are currently DriverConflicts but it's actually working.
163 # TODO, here: everything that is global in nature, information from the
164 # main TestIssuerInternal, needs to move to either ispec() or ospec().
165 # not only that: TestIssuerInternal.imem can entirely move into here
166 # because imem is only ever accessed inside the FetchFSM.
167 class FetchFSM(ControlBase):
168 def __init__(self, allow_overlap, svp64_en, imem, core_rst,
169 pdecode2, cur_state,
170 dbg, core, svstate, nia, is_svp64_mode):
171 self.allow_overlap = allow_overlap
172 self.svp64_en = svp64_en
173 self.imem = imem
174 self.core_rst = core_rst
175 self.pdecode2 = pdecode2
176 self.cur_state = cur_state
177 self.dbg = dbg
178 self.core = core
179 self.svstate = svstate
180 self.nia = nia
181 self.is_svp64_mode = is_svp64_mode
182
183 # set up pipeline ControlBase and allocate i/o specs
184 # (unusual: normally done by the Pipeline API)
185 super().__init__(stage=self)
186 self.p.i_data, self.n.o_data = self.new_specs(None)
187 self.i, self.o = self.p.i_data, self.n.o_data
188
189 # next 3 functions are Stage API Compliance
190 def setup(self, m, i):
191 pass
192
193 def ispec(self):
194 return FetchInput()
195
196 def ospec(self):
197 return FetchOutput()
198
199 def elaborate(self, platform):
200 """fetch FSM
201
202 this FSM performs fetch of raw instruction data, partial-decodes
203 it 32-bit at a time to detect SVP64 prefixes, and will optionally
204 read a 2nd 32-bit quantity if that occurs.
205 """
206 m = super().elaborate(platform)
207
208 dbg = self.dbg
209 core = self.core
210 pc = self.i.pc
211 svstate = self.svstate
212 nia = self.nia
213 is_svp64_mode = self.is_svp64_mode
214 fetch_pc_o_ready = self.p.o_ready
215 fetch_pc_i_valid = self.p.i_valid
216 fetch_insn_o_valid = self.n.o_valid
217 fetch_insn_i_ready = self.n.i_ready
218
219 comb = m.d.comb
220 sync = m.d.sync
221 pdecode2 = self.pdecode2
222 cur_state = self.cur_state
223 dec_opcode_o = pdecode2.dec.raw_opcode_in # raw opcode
224
225 msr_read = Signal(reset=1)
226
227 # also note instruction fetch failed
228 if hasattr(core, "icache"):
229 fetch_failed = core.icache.i_out.fetch_failed
230 else:
231 fetch_failed = Const(0, 1)
232
233 # don't read msr every cycle
234 staterf = self.core.regs.rf['state']
235 state_r_msr = staterf.r_ports['msr'] # MSR rd
236
237 comb += state_r_msr.ren.eq(0)
238
239 with m.FSM(name='fetch_fsm'):
240
241 # waiting (zzz)
242 with m.State("IDLE"):
243 with m.If(~dbg.stopping_o & ~fetch_failed):
244 comb += fetch_pc_o_ready.eq(1)
245 with m.If(fetch_pc_i_valid & ~fetch_failed):
246 # instruction allowed to go: start by reading the PC
247 # capture the PC and also drop it into Insn Memory
248 # we have joined a pair of combinatorial memory
249 # lookups together. this is Generally Bad.
250 comb += self.imem.a_pc_i.eq(pc)
251 comb += self.imem.a_i_valid.eq(1)
252 comb += self.imem.f_i_valid.eq(1)
253 sync += cur_state.pc.eq(pc)
254 sync += cur_state.svstate.eq(svstate) # and svstate
255
256 # initiate read of MSR. arrives one clock later
257 comb += state_r_msr.ren.eq(1 << StateRegs.MSR)
258 sync += msr_read.eq(0)
259
260 m.next = "INSN_READ" # move to "wait for bus" phase
261
262 # dummy pause to find out why simulation is not keeping up
263 with m.State("INSN_READ"):
264 if self.allow_overlap:
265 stopping = dbg.stopping_o
266 else:
267 stopping = Const(0)
268 with m.If(stopping):
269 # stopping: jump back to idle
270 m.next = "IDLE"
271 with m.Else():
272 # one cycle later, msr/sv read arrives. valid only once.
273 with m.If(~msr_read):
274 sync += msr_read.eq(1) # yeah don't read it again
275 sync += cur_state.msr.eq(state_r_msr.o_data)
276 with m.If(self.imem.f_busy_o & ~fetch_failed): # zzz...
277 # busy but not fetch failed: stay in wait-read
278 comb += self.imem.a_i_valid.eq(1)
279 comb += self.imem.f_i_valid.eq(1)
280 with m.Else():
281 # not busy (or fetch failed!): instruction fetched
282 # when fetch failed, the instruction gets ignored
283 # by the decoder
284 insn = get_insn(self.imem.f_instr_o, cur_state.pc)
285 if self.svp64_en:
286 svp64 = self.svp64
287 # decode the SVP64 prefix, if any
288 comb += svp64.raw_opcode_in.eq(insn)
289 comb += svp64.bigendian.eq(self.core_bigendian_i)
290 # pass the decoded prefix (if any) to PowerDecoder2
291 sync += pdecode2.sv_rm.eq(svp64.svp64_rm)
292 sync += pdecode2.is_svp64_mode.eq(is_svp64_mode)
293 # remember whether this is a prefixed instruction,
294 # so the FSM can readily loop when VL==0
295 sync += is_svp64_mode.eq(svp64.is_svp64_mode)
296 # calculate the address of the following instruction
297 insn_size = Mux(svp64.is_svp64_mode, 8, 4)
298 sync += nia.eq(cur_state.pc + insn_size)
299 with m.If(~svp64.is_svp64_mode):
300 # with no prefix, store the instruction
301 # and hand it directly to the next FSM
302 sync += dec_opcode_o.eq(insn)
303 m.next = "INSN_READY"
304 with m.Else():
305 # fetch the rest of the instruction from memory
306 comb += self.imem.a_pc_i.eq(cur_state.pc + 4)
307 comb += self.imem.a_i_valid.eq(1)
308 comb += self.imem.f_i_valid.eq(1)
309 m.next = "INSN_READ2"
310 else:
311 # not SVP64 - 32-bit only
312 sync += nia.eq(cur_state.pc + 4)
313 sync += dec_opcode_o.eq(insn)
314 m.next = "INSN_READY"
315
316 with m.State("INSN_READ2"):
317 with m.If(self.imem.f_busy_o): # zzz...
318 # busy: stay in wait-read
319 comb += self.imem.a_i_valid.eq(1)
320 comb += self.imem.f_i_valid.eq(1)
321 with m.Else():
322 # not busy: instruction fetched
323 insn = get_insn(self.imem.f_instr_o, cur_state.pc+4)
324 sync += dec_opcode_o.eq(insn)
325 m.next = "INSN_READY"
326 # TODO: probably can start looking at pdecode2.rm_dec
327 # here or maybe even in INSN_READ state, if svp64_mode
328 # detected, in order to trigger - and wait for - the
329 # predicate reading.
330 if self.svp64_en:
331 pmode = pdecode2.rm_dec.predmode
332 """
333 if pmode != SVP64PredMode.ALWAYS.value:
334 fire predicate loading FSM and wait before
335 moving to INSN_READY
336 else:
337 sync += self.srcmask.eq(-1) # set to all 1s
338 sync += self.dstmask.eq(-1) # set to all 1s
339 m.next = "INSN_READY"
340 """
341
342 with m.State("INSN_READY"):
343 # hand over the instruction, to be decoded
344 comb += fetch_insn_o_valid.eq(1)
345 with m.If(fetch_insn_i_ready):
346 m.next = "IDLE"
347
348 # whatever was done above, over-ride it if core reset is held
349 with m.If(self.core_rst):
350 sync += nia.eq(0)
351
352 return m
353
354
355 class TestIssuerInternal(Elaboratable):
356 """TestIssuer - reads instructions from TestMemory and issues them
357
358 efficiency and speed is not the main goal here: functional correctness
359 and code clarity is. optimisations (which almost 100% interfere with
360 easy understanding) come later.
361 """
362
363 def __init__(self, pspec):
364
365 # test is SVP64 is to be enabled
366 self.svp64_en = hasattr(pspec, "svp64") and (pspec.svp64 == True)
367
368 # and if regfiles are reduced
369 self.regreduce_en = (hasattr(pspec, "regreduce") and
370 (pspec.regreduce == True))
371
372 # and if overlap requested
373 self.allow_overlap = (hasattr(pspec, "allow_overlap") and
374 (pspec.allow_overlap == True))
375
376 # JTAG interface. add this right at the start because if it's
377 # added it *modifies* the pspec, by adding enable/disable signals
378 # for parts of the rest of the core
379 self.jtag_en = hasattr(pspec, "debug") and pspec.debug == 'jtag'
380 self.dbg_domain = "sync" # sigh "dbgsunc" too problematic
381 # self.dbg_domain = "dbgsync" # domain for DMI/JTAG clock
382 if self.jtag_en:
383 # XXX MUST keep this up-to-date with litex, and
384 # soc-cocotb-sim, and err.. all needs sorting out, argh
385 subset = ['uart',
386 'mtwi',
387 'eint', 'gpio', 'mspi0',
388 # 'mspi1', - disabled for now
389 # 'pwm', 'sd0', - disabled for now
390 'sdr']
391 self.jtag = JTAG(get_pinspecs(subset=subset),
392 domain=self.dbg_domain)
393 # add signals to pspec to enable/disable icache and dcache
394 # (or data and intstruction wishbone if icache/dcache not included)
395 # https://bugs.libre-soc.org/show_bug.cgi?id=520
396 # TODO: do we actually care if these are not domain-synchronised?
397 # honestly probably not.
398 pspec.wb_icache_en = self.jtag.wb_icache_en
399 pspec.wb_dcache_en = self.jtag.wb_dcache_en
400 self.wb_sram_en = self.jtag.wb_sram_en
401 else:
402 self.wb_sram_en = Const(1)
403
404 # add 4k sram blocks?
405 self.sram4x4k = (hasattr(pspec, "sram4x4kblock") and
406 pspec.sram4x4kblock == True)
407 if self.sram4x4k:
408 self.sram4k = []
409 for i in range(4):
410 self.sram4k.append(SPBlock512W64B8W(name="sram4k_%d" % i,
411 # features={'err'}
412 ))
413
414 # add interrupt controller?
415 self.xics = hasattr(pspec, "xics") and pspec.xics == True
416 if self.xics:
417 self.xics_icp = XICS_ICP()
418 self.xics_ics = XICS_ICS()
419 self.int_level_i = self.xics_ics.int_level_i
420
421 # add GPIO peripheral?
422 self.gpio = hasattr(pspec, "gpio") and pspec.gpio == True
423 if self.gpio:
424 self.simple_gpio = SimpleGPIO()
425 self.gpio_o = self.simple_gpio.gpio_o
426
427 # main instruction core. suitable for prototyping / demo only
428 self.core = core = NonProductionCore(pspec)
429 self.core_rst = ResetSignal("coresync")
430
431 # instruction decoder. goes into Trap Record
432 #pdecode = create_pdecode()
433 self.cur_state = CoreState("cur") # current state (MSR/PC/SVSTATE)
434 self.pdecode2 = PowerDecode2(None, state=self.cur_state,
435 opkls=IssuerDecode2ToOperand,
436 svp64_en=self.svp64_en,
437 regreduce_en=self.regreduce_en)
438 pdecode = self.pdecode2.dec
439
440 if self.svp64_en:
441 self.svp64 = SVP64PrefixDecoder() # for decoding SVP64 prefix
442
443 # Test Instruction memory
444 if hasattr(core, "icache"):
445 # XXX BLECH! use pspec to transfer the I-Cache to ConfigFetchUnit
446 # truly dreadful. needs a huge reorg.
447 pspec.icache = core.icache
448 self.imem = ConfigFetchUnit(pspec).fu
449
450 # DMI interface
451 self.dbg = CoreDebug()
452
453 # instruction go/monitor
454 self.pc_o = Signal(64, reset_less=True)
455 self.pc_i = Data(64, "pc_i") # set "ok" to indicate "please change me"
456 self.svstate_i = Data(64, "svstate_i") # ditto
457 self.core_bigendian_i = Signal() # TODO: set based on MSR.LE
458 self.busy_o = Signal(reset_less=True)
459 self.memerr_o = Signal(reset_less=True)
460
461 # STATE regfile read /write ports for PC, MSR, SVSTATE
462 staterf = self.core.regs.rf['state']
463 self.state_r_pc = staterf.r_ports['cia'] # PC rd
464 self.state_w_pc = staterf.w_ports['d_wr1'] # PC wr
465 self.state_r_sv = staterf.r_ports['sv'] # SVSTATE rd
466 self.state_w_sv = staterf.w_ports['sv'] # SVSTATE wr
467
468 # DMI interface access
469 intrf = self.core.regs.rf['int']
470 crrf = self.core.regs.rf['cr']
471 xerrf = self.core.regs.rf['xer']
472 self.int_r = intrf.r_ports['dmi'] # INT read
473 self.cr_r = crrf.r_ports['full_cr_dbg'] # CR read
474 self.xer_r = xerrf.r_ports['full_xer'] # XER read
475
476 if self.svp64_en:
477 # for predication
478 self.int_pred = intrf.r_ports['pred'] # INT predicate read
479 self.cr_pred = crrf.r_ports['cr_pred'] # CR predicate read
480
481 # hack method of keeping an eye on whether branch/trap set the PC
482 self.state_nia = self.core.regs.rf['state'].w_ports['nia']
483 self.state_nia.wen.name = 'state_nia_wen'
484
485 # pulse to synchronize the simulator at instruction end
486 self.insn_done = Signal()
487
488 # indicate any instruction still outstanding, in execution
489 self.any_busy = Signal()
490
491 if self.svp64_en:
492 # store copies of predicate masks
493 self.srcmask = Signal(64)
494 self.dstmask = Signal(64)
495
496 def fetch_predicate_fsm(self, m,
497 pred_insn_i_valid, pred_insn_o_ready,
498 pred_mask_o_valid, pred_mask_i_ready):
499 """fetch_predicate_fsm - obtains (constructs in the case of CR)
500 src/dest predicate masks
501
502 https://bugs.libre-soc.org/show_bug.cgi?id=617
503 the predicates can be read here, by using IntRegs r_ports['pred']
504 or CRRegs r_ports['pred']. in the case of CRs it will have to
505 be done through multiple reads, extracting one relevant at a time.
506 later, a faster way would be to use the 32-bit-wide CR port but
507 this is more complex decoding, here. equivalent code used in
508 ISACaller is "from openpower.decoder.isa.caller import get_predcr"
509
510 note: this ENTIRE FSM is not to be called when svp64 is disabled
511 """
512 comb = m.d.comb
513 sync = m.d.sync
514 pdecode2 = self.pdecode2
515 rm_dec = pdecode2.rm_dec # SVP64RMModeDecode
516 predmode = rm_dec.predmode
517 srcpred, dstpred = rm_dec.srcpred, rm_dec.dstpred
518 cr_pred, int_pred = self.cr_pred, self.int_pred # read regfiles
519 # get src/dst step, so we can skip already used mask bits
520 cur_state = self.cur_state
521 srcstep = cur_state.svstate.srcstep
522 dststep = cur_state.svstate.dststep
523 cur_vl = cur_state.svstate.vl
524
525 # decode predicates
526 sregread, sinvert, sunary, sall1s = get_predint(m, srcpred, 's')
527 dregread, dinvert, dunary, dall1s = get_predint(m, dstpred, 'd')
528 sidx, scrinvert = get_predcr(m, srcpred, 's')
529 didx, dcrinvert = get_predcr(m, dstpred, 'd')
530
531 # store fetched masks, for either intpred or crpred
532 # when src/dst step is not zero, the skipped mask bits need to be
533 # shifted-out, before actually storing them in src/dest mask
534 new_srcmask = Signal(64, reset_less=True)
535 new_dstmask = Signal(64, reset_less=True)
536
537 with m.FSM(name="fetch_predicate"):
538
539 with m.State("FETCH_PRED_IDLE"):
540 comb += pred_insn_o_ready.eq(1)
541 with m.If(pred_insn_i_valid):
542 with m.If(predmode == SVP64PredMode.INT):
543 # skip fetching destination mask register, when zero
544 with m.If(dall1s):
545 sync += new_dstmask.eq(-1)
546 # directly go to fetch source mask register
547 # guaranteed not to be zero (otherwise predmode
548 # would be SVP64PredMode.ALWAYS, not INT)
549 comb += int_pred.addr.eq(sregread)
550 comb += int_pred.ren.eq(1)
551 m.next = "INT_SRC_READ"
552 # fetch destination predicate register
553 with m.Else():
554 comb += int_pred.addr.eq(dregread)
555 comb += int_pred.ren.eq(1)
556 m.next = "INT_DST_READ"
557 with m.Elif(predmode == SVP64PredMode.CR):
558 # go fetch masks from the CR register file
559 sync += new_srcmask.eq(0)
560 sync += new_dstmask.eq(0)
561 m.next = "CR_READ"
562 with m.Else():
563 sync += self.srcmask.eq(-1)
564 sync += self.dstmask.eq(-1)
565 m.next = "FETCH_PRED_DONE"
566
567 with m.State("INT_DST_READ"):
568 # store destination mask
569 inv = Repl(dinvert, 64)
570 with m.If(dunary):
571 # set selected mask bit for 1<<r3 mode
572 dst_shift = Signal(range(64))
573 comb += dst_shift.eq(self.int_pred.o_data & 0b111111)
574 sync += new_dstmask.eq(1 << dst_shift)
575 with m.Else():
576 # invert mask if requested
577 sync += new_dstmask.eq(self.int_pred.o_data ^ inv)
578 # skip fetching source mask register, when zero
579 with m.If(sall1s):
580 sync += new_srcmask.eq(-1)
581 m.next = "FETCH_PRED_SHIFT_MASK"
582 # fetch source predicate register
583 with m.Else():
584 comb += int_pred.addr.eq(sregread)
585 comb += int_pred.ren.eq(1)
586 m.next = "INT_SRC_READ"
587
588 with m.State("INT_SRC_READ"):
589 # store source mask
590 inv = Repl(sinvert, 64)
591 with m.If(sunary):
592 # set selected mask bit for 1<<r3 mode
593 src_shift = Signal(range(64))
594 comb += src_shift.eq(self.int_pred.o_data & 0b111111)
595 sync += new_srcmask.eq(1 << src_shift)
596 with m.Else():
597 # invert mask if requested
598 sync += new_srcmask.eq(self.int_pred.o_data ^ inv)
599 m.next = "FETCH_PRED_SHIFT_MASK"
600
601 # fetch masks from the CR register file
602 # implements the following loop:
603 # idx, inv = get_predcr(mask)
604 # mask = 0
605 # for cr_idx in range(vl):
606 # cr = crl[cr_idx + SVP64CROffs.CRPred] # takes one cycle
607 # if cr[idx] ^ inv:
608 # mask |= 1 << cr_idx
609 # return mask
610 with m.State("CR_READ"):
611 # CR index to be read, which will be ready by the next cycle
612 cr_idx = Signal.like(cur_vl, reset_less=True)
613 # submit the read operation to the regfile
614 with m.If(cr_idx != cur_vl):
615 # the CR read port is unary ...
616 # ren = 1 << cr_idx
617 # ... in MSB0 convention ...
618 # ren = 1 << (7 - cr_idx)
619 # ... and with an offset:
620 # ren = 1 << (7 - off - cr_idx)
621 idx = SVP64CROffs.CRPred + cr_idx
622 comb += cr_pred.ren.eq(1 << (7 - idx))
623 # signal data valid in the next cycle
624 cr_read = Signal(reset_less=True)
625 sync += cr_read.eq(1)
626 # load the next index
627 sync += cr_idx.eq(cr_idx + 1)
628 with m.Else():
629 # exit on loop end
630 sync += cr_read.eq(0)
631 sync += cr_idx.eq(0)
632 m.next = "FETCH_PRED_SHIFT_MASK"
633 with m.If(cr_read):
634 # compensate for the one cycle delay on the regfile
635 cur_cr_idx = Signal.like(cur_vl)
636 comb += cur_cr_idx.eq(cr_idx - 1)
637 # read the CR field, select the appropriate bit
638 cr_field = Signal(4)
639 scr_bit = Signal()
640 dcr_bit = Signal()
641 comb += cr_field.eq(cr_pred.o_data)
642 comb += scr_bit.eq(cr_field.bit_select(sidx, 1)
643 ^ scrinvert)
644 comb += dcr_bit.eq(cr_field.bit_select(didx, 1)
645 ^ dcrinvert)
646 # set the corresponding mask bit
647 bit_to_set = Signal.like(self.srcmask)
648 comb += bit_to_set.eq(1 << cur_cr_idx)
649 with m.If(scr_bit):
650 sync += new_srcmask.eq(new_srcmask | bit_to_set)
651 with m.If(dcr_bit):
652 sync += new_dstmask.eq(new_dstmask | bit_to_set)
653
654 with m.State("FETCH_PRED_SHIFT_MASK"):
655 # shift-out skipped mask bits
656 sync += self.srcmask.eq(new_srcmask >> srcstep)
657 sync += self.dstmask.eq(new_dstmask >> dststep)
658 m.next = "FETCH_PRED_DONE"
659
660 with m.State("FETCH_PRED_DONE"):
661 comb += pred_mask_o_valid.eq(1)
662 with m.If(pred_mask_i_ready):
663 m.next = "FETCH_PRED_IDLE"
664
665 def issue_fsm(self, m, core, pc_changed, sv_changed, nia,
666 dbg, core_rst, is_svp64_mode,
667 fetch_pc_o_ready, fetch_pc_i_valid,
668 fetch_insn_o_valid, fetch_insn_i_ready,
669 pred_insn_i_valid, pred_insn_o_ready,
670 pred_mask_o_valid, pred_mask_i_ready,
671 exec_insn_i_valid, exec_insn_o_ready,
672 exec_pc_o_valid, exec_pc_i_ready):
673 """issue FSM
674
675 decode / issue FSM. this interacts with the "fetch" FSM
676 through fetch_insn_ready/valid (incoming) and fetch_pc_ready/valid
677 (outgoing). also interacts with the "execute" FSM
678 through exec_insn_ready/valid (outgoing) and exec_pc_ready/valid
679 (incoming).
680 SVP64 RM prefixes have already been set up by the
681 "fetch" phase, so execute is fairly straightforward.
682 """
683
684 comb = m.d.comb
685 sync = m.d.sync
686 pdecode2 = self.pdecode2
687 cur_state = self.cur_state
688
689 # temporaries
690 dec_opcode_i = pdecode2.dec.raw_opcode_in # raw opcode
691
692 # for updating svstate (things like srcstep etc.)
693 update_svstate = Signal() # set this (below) if updating
694 new_svstate = SVSTATERec("new_svstate")
695 comb += new_svstate.eq(cur_state.svstate)
696
697 # precalculate srcstep+1 and dststep+1
698 cur_srcstep = cur_state.svstate.srcstep
699 cur_dststep = cur_state.svstate.dststep
700 next_srcstep = Signal.like(cur_srcstep)
701 next_dststep = Signal.like(cur_dststep)
702 comb += next_srcstep.eq(cur_state.svstate.srcstep+1)
703 comb += next_dststep.eq(cur_state.svstate.dststep+1)
704
705 # note if an exception happened. in a pipelined or OoO design
706 # this needs to be accompanied by "shadowing" (or stalling)
707 exc_happened = self.core.o.exc_happened
708 # also note instruction fetch failed
709 if hasattr(core, "icache"):
710 fetch_failed = core.icache.i_out.fetch_failed
711 else:
712 fetch_failed = Const(0, 1)
713 # set to fault in decoder
714 # update (highest priority) instruction fault
715 comb += pdecode2.instr_fault.eq(fetch_failed)
716
717 with m.FSM(name="issue_fsm"):
718
719 # sync with the "fetch" phase which is reading the instruction
720 # at this point, there is no instruction running, that
721 # could inadvertently update the PC.
722 with m.State("ISSUE_START"):
723 # wait on "core stop" release, before next fetch
724 # need to do this here, in case we are in a VL==0 loop
725 with m.If(~dbg.core_stop_o & ~core_rst):
726 comb += fetch_pc_i_valid.eq(1) # tell fetch to start
727 with m.If(fetch_pc_o_ready): # fetch acknowledged us
728 m.next = "INSN_WAIT"
729 with m.Else():
730 # tell core it's stopped, and acknowledge debug handshake
731 comb += dbg.core_stopped_i.eq(1)
732 # while stopped, allow updating the PC and SVSTATE
733 with m.If(self.pc_i.ok):
734 comb += self.state_w_pc.wen.eq(1 << StateRegs.PC)
735 comb += self.state_w_pc.i_data.eq(self.pc_i.data)
736 sync += pc_changed.eq(1)
737 with m.If(self.svstate_i.ok):
738 comb += new_svstate.eq(self.svstate_i.data)
739 comb += update_svstate.eq(1)
740 sync += sv_changed.eq(1)
741
742 # wait for an instruction to arrive from Fetch
743 with m.State("INSN_WAIT"):
744 if self.allow_overlap:
745 stopping = dbg.stopping_o
746 else:
747 stopping = Const(0)
748 with m.If(stopping):
749 # stopping: jump back to idle
750 m.next = "ISSUE_START"
751 with m.Else():
752 comb += fetch_insn_i_ready.eq(1)
753 with m.If(fetch_insn_o_valid):
754 # loop into ISSUE_START if it's a SVP64 instruction
755 # and VL == 0. this because VL==0 is a for-loop
756 # from 0 to 0 i.e. always, always a NOP.
757 cur_vl = cur_state.svstate.vl
758 with m.If(is_svp64_mode & (cur_vl == 0)):
759 # update the PC before fetching the next instruction
760 # since we are in a VL==0 loop, no instruction was
761 # executed that we could be overwriting
762 comb += self.state_w_pc.wen.eq(1 << StateRegs.PC)
763 comb += self.state_w_pc.i_data.eq(nia)
764 comb += self.insn_done.eq(1)
765 m.next = "ISSUE_START"
766 with m.Else():
767 if self.svp64_en:
768 m.next = "PRED_START" # fetching predicate
769 else:
770 m.next = "DECODE_SV" # skip predication
771
772 with m.State("PRED_START"):
773 comb += pred_insn_i_valid.eq(1) # tell fetch_pred to start
774 with m.If(pred_insn_o_ready): # fetch_pred acknowledged us
775 m.next = "MASK_WAIT"
776
777 with m.State("MASK_WAIT"):
778 comb += pred_mask_i_ready.eq(1) # ready to receive the masks
779 with m.If(pred_mask_o_valid): # predication masks are ready
780 m.next = "PRED_SKIP"
781
782 # skip zeros in predicate
783 with m.State("PRED_SKIP"):
784 with m.If(~is_svp64_mode):
785 m.next = "DECODE_SV" # nothing to do
786 with m.Else():
787 if self.svp64_en:
788 pred_src_zero = pdecode2.rm_dec.pred_sz
789 pred_dst_zero = pdecode2.rm_dec.pred_dz
790
791 # new srcstep, after skipping zeros
792 skip_srcstep = Signal.like(cur_srcstep)
793 # value to be added to the current srcstep
794 src_delta = Signal.like(cur_srcstep)
795 # add leading zeros to srcstep, if not in zero mode
796 with m.If(~pred_src_zero):
797 # priority encoder (count leading zeros)
798 # append guard bit, in case the mask is all zeros
799 pri_enc_src = PriorityEncoder(65)
800 m.submodules.pri_enc_src = pri_enc_src
801 comb += pri_enc_src.i.eq(Cat(self.srcmask,
802 Const(1, 1)))
803 comb += src_delta.eq(pri_enc_src.o)
804 # apply delta to srcstep
805 comb += skip_srcstep.eq(cur_srcstep + src_delta)
806 # shift-out all leading zeros from the mask
807 # plus the leading "one" bit
808 # TODO count leading zeros and shift-out the zero
809 # bits, in the same step, in hardware
810 sync += self.srcmask.eq(self.srcmask >> (src_delta+1))
811
812 # same as above, but for dststep
813 skip_dststep = Signal.like(cur_dststep)
814 dst_delta = Signal.like(cur_dststep)
815 with m.If(~pred_dst_zero):
816 pri_enc_dst = PriorityEncoder(65)
817 m.submodules.pri_enc_dst = pri_enc_dst
818 comb += pri_enc_dst.i.eq(Cat(self.dstmask,
819 Const(1, 1)))
820 comb += dst_delta.eq(pri_enc_dst.o)
821 comb += skip_dststep.eq(cur_dststep + dst_delta)
822 sync += self.dstmask.eq(self.dstmask >> (dst_delta+1))
823
824 # TODO: initialize mask[VL]=1 to avoid passing past VL
825 with m.If((skip_srcstep >= cur_vl) |
826 (skip_dststep >= cur_vl)):
827 # end of VL loop. Update PC and reset src/dst step
828 comb += self.state_w_pc.wen.eq(1 << StateRegs.PC)
829 comb += self.state_w_pc.i_data.eq(nia)
830 comb += new_svstate.srcstep.eq(0)
831 comb += new_svstate.dststep.eq(0)
832 comb += update_svstate.eq(1)
833 # synchronize with the simulator
834 comb += self.insn_done.eq(1)
835 # go back to Issue
836 m.next = "ISSUE_START"
837 with m.Else():
838 # update new src/dst step
839 comb += new_svstate.srcstep.eq(skip_srcstep)
840 comb += new_svstate.dststep.eq(skip_dststep)
841 comb += update_svstate.eq(1)
842 # proceed to Decode
843 m.next = "DECODE_SV"
844
845 # pass predicate mask bits through to satellite decoders
846 # TODO: for SIMD this will be *multiple* bits
847 sync += core.i.sv_pred_sm.eq(self.srcmask[0])
848 sync += core.i.sv_pred_dm.eq(self.dstmask[0])
849
850 # after src/dst step have been updated, we are ready
851 # to decode the instruction
852 with m.State("DECODE_SV"):
853 # decode the instruction
854 sync += core.i.e.eq(pdecode2.e)
855 sync += core.i.state.eq(cur_state)
856 sync += core.i.raw_insn_i.eq(dec_opcode_i)
857 sync += core.i.bigendian_i.eq(self.core_bigendian_i)
858 if self.svp64_en:
859 sync += core.i.sv_rm.eq(pdecode2.sv_rm)
860 # set RA_OR_ZERO detection in satellite decoders
861 sync += core.i.sv_a_nz.eq(pdecode2.sv_a_nz)
862 # and svp64 detection
863 sync += core.i.is_svp64_mode.eq(is_svp64_mode)
864 # and svp64 bit-rev'd ldst mode
865 ldst_dec = pdecode2.use_svp64_ldst_dec
866 sync += core.i.use_svp64_ldst_dec.eq(ldst_dec)
867 # after decoding, reset any previous exception condition,
868 # allowing it to be set again during the next execution
869 sync += pdecode2.ldst_exc.eq(0)
870
871 m.next = "INSN_EXECUTE" # move to "execute"
872
873 # handshake with execution FSM, move to "wait" once acknowledged
874 with m.State("INSN_EXECUTE"):
875 comb += exec_insn_i_valid.eq(1) # trigger execute
876 with m.If(exec_insn_o_ready): # execute acknowledged us
877 m.next = "EXECUTE_WAIT"
878
879 with m.State("EXECUTE_WAIT"):
880 # wait on "core stop" release, at instruction end
881 # need to do this here, in case we are in a VL>1 loop
882 with m.If(~dbg.core_stop_o & ~core_rst):
883 comb += exec_pc_i_ready.eq(1)
884 # see https://bugs.libre-soc.org/show_bug.cgi?id=636
885 # the exception info needs to be blatted into
886 # pdecode.ldst_exc, and the instruction "re-run".
887 # when ldst_exc.happened is set, the PowerDecoder2
888 # reacts very differently: it re-writes the instruction
889 # with a "trap" (calls PowerDecoder2.trap()) which
890 # will *overwrite* whatever was requested and jump the
891 # PC to the exception address, as well as alter MSR.
892 # nothing else needs to be done other than to note
893 # the change of PC and MSR (and, later, SVSTATE)
894 with m.If(exc_happened):
895 mmu = core.fus.get_exc("mmu0")
896 ldst = core.fus.get_exc("ldst0")
897 with m.If(fetch_failed):
898 # instruction fetch: exception is from MMU
899 sync += pdecode2.ldst_exc.eq(mmu)
900 with m.Else():
901 # otherwise assume it was a LDST exception
902 sync += pdecode2.ldst_exc.eq(ldst)
903
904 with m.If(exec_pc_o_valid):
905
906 # was this the last loop iteration?
907 is_last = Signal()
908 cur_vl = cur_state.svstate.vl
909 comb += is_last.eq(next_srcstep == cur_vl)
910
911 # return directly to Decode if Execute generated an
912 # exception.
913 with m.If(pdecode2.ldst_exc.happened):
914 m.next = "DECODE_SV"
915
916 # if either PC or SVSTATE were changed by the previous
917 # instruction, go directly back to Fetch, without
918 # updating either PC or SVSTATE
919 with m.Elif(pc_changed | sv_changed):
920 m.next = "ISSUE_START"
921
922 # also return to Fetch, when no output was a vector
923 # (regardless of SRCSTEP and VL), or when the last
924 # instruction was really the last one of the VL loop
925 with m.Elif((~pdecode2.loop_continue) | is_last):
926 # before going back to fetch, update the PC state
927 # register with the NIA.
928 # ok here we are not reading the branch unit.
929 # TODO: this just blithely overwrites whatever
930 # pipeline updated the PC
931 comb += self.state_w_pc.wen.eq(1 << StateRegs.PC)
932 comb += self.state_w_pc.i_data.eq(nia)
933 # reset SRCSTEP before returning to Fetch
934 if self.svp64_en:
935 with m.If(pdecode2.loop_continue):
936 comb += new_svstate.srcstep.eq(0)
937 comb += new_svstate.dststep.eq(0)
938 comb += update_svstate.eq(1)
939 else:
940 comb += new_svstate.srcstep.eq(0)
941 comb += new_svstate.dststep.eq(0)
942 comb += update_svstate.eq(1)
943 m.next = "ISSUE_START"
944
945 # returning to Execute? then, first update SRCSTEP
946 with m.Else():
947 comb += new_svstate.srcstep.eq(next_srcstep)
948 comb += new_svstate.dststep.eq(next_dststep)
949 comb += update_svstate.eq(1)
950 # return to mask skip loop
951 m.next = "PRED_SKIP"
952
953 with m.Else():
954 comb += dbg.core_stopped_i.eq(1)
955 # while stopped, allow updating the PC and SVSTATE
956 with m.If(self.pc_i.ok):
957 comb += self.state_w_pc.wen.eq(1 << StateRegs.PC)
958 comb += self.state_w_pc.i_data.eq(self.pc_i.data)
959 sync += pc_changed.eq(1)
960 with m.If(self.svstate_i.ok):
961 comb += new_svstate.eq(self.svstate_i.data)
962 comb += update_svstate.eq(1)
963 sync += sv_changed.eq(1)
964
965 # check if svstate needs updating: if so, write it to State Regfile
966 with m.If(update_svstate):
967 comb += self.state_w_sv.wen.eq(1 << StateRegs.SVSTATE)
968 comb += self.state_w_sv.i_data.eq(new_svstate)
969 sync += cur_state.svstate.eq(new_svstate) # for next clock
970
971 def execute_fsm(self, m, core, pc_changed, sv_changed,
972 exec_insn_i_valid, exec_insn_o_ready,
973 exec_pc_o_valid, exec_pc_i_ready):
974 """execute FSM
975
976 execute FSM. this interacts with the "issue" FSM
977 through exec_insn_ready/valid (incoming) and exec_pc_ready/valid
978 (outgoing). SVP64 RM prefixes have already been set up by the
979 "issue" phase, so execute is fairly straightforward.
980 """
981
982 comb = m.d.comb
983 sync = m.d.sync
984 pdecode2 = self.pdecode2
985
986 # temporaries
987 core_busy_o = core.n.o_data.busy_o # core is busy
988 core_ivalid_i = core.p.i_valid # instruction is valid
989
990 with m.FSM(name="exec_fsm"):
991
992 # waiting for instruction bus (stays there until not busy)
993 with m.State("INSN_START"):
994 comb += exec_insn_o_ready.eq(1)
995 with m.If(exec_insn_i_valid):
996 comb += core_ivalid_i.eq(1) # instruction is valid/issued
997 sync += sv_changed.eq(0)
998 sync += pc_changed.eq(0)
999 with m.If(core.p.o_ready): # only move if accepted
1000 m.next = "INSN_ACTIVE" # move to "wait completion"
1001
1002 # instruction started: must wait till it finishes
1003 with m.State("INSN_ACTIVE"):
1004 # note changes to PC and SVSTATE
1005 with m.If(self.state_nia.wen & (1 << StateRegs.SVSTATE)):
1006 sync += sv_changed.eq(1)
1007 with m.If(self.state_nia.wen & (1 << StateRegs.PC)):
1008 sync += pc_changed.eq(1)
1009 with m.If(~core_busy_o): # instruction done!
1010 comb += exec_pc_o_valid.eq(1)
1011 with m.If(exec_pc_i_ready):
1012 # when finished, indicate "done".
1013 # however, if there was an exception, the instruction
1014 # is *not* yet done. this is an implementation
1015 # detail: we choose to implement exceptions by
1016 # taking the exception information from the LDST
1017 # unit, putting that *back* into the PowerDecoder2,
1018 # and *re-running the entire instruction*.
1019 # if we erroneously indicate "done" here, it is as if
1020 # there were *TWO* instructions:
1021 # 1) the failed LDST 2) a TRAP.
1022 with m.If(~pdecode2.ldst_exc.happened):
1023 comb += self.insn_done.eq(1)
1024 m.next = "INSN_START" # back to fetch
1025
1026 def setup_peripherals(self, m):
1027 comb, sync = m.d.comb, m.d.sync
1028
1029 # okaaaay so the debug module must be in coresync clock domain
1030 # but NOT its reset signal. to cope with this, set every single
1031 # submodule explicitly in coresync domain, debug and JTAG
1032 # in their own one but using *external* reset.
1033 csd = DomainRenamer("coresync")
1034 dbd = DomainRenamer(self.dbg_domain)
1035
1036 m.submodules.core = core = csd(self.core)
1037 # this _so_ needs sorting out. ICache is added down inside
1038 # LoadStore1 and is already a submodule of LoadStore1
1039 if not isinstance(self.imem, ICache):
1040 m.submodules.imem = imem = csd(self.imem)
1041 m.submodules.dbg = dbg = dbd(self.dbg)
1042 if self.jtag_en:
1043 m.submodules.jtag = jtag = dbd(self.jtag)
1044 # TODO: UART2GDB mux, here, from external pin
1045 # see https://bugs.libre-soc.org/show_bug.cgi?id=499
1046 sync += dbg.dmi.connect_to(jtag.dmi)
1047
1048 cur_state = self.cur_state
1049
1050 # 4x 4k SRAM blocks. these simply "exist", they get routed in litex
1051 if self.sram4x4k:
1052 for i, sram in enumerate(self.sram4k):
1053 m.submodules["sram4k_%d" % i] = csd(sram)
1054 comb += sram.enable.eq(self.wb_sram_en)
1055
1056 # XICS interrupt handler
1057 if self.xics:
1058 m.submodules.xics_icp = icp = csd(self.xics_icp)
1059 m.submodules.xics_ics = ics = csd(self.xics_ics)
1060 comb += icp.ics_i.eq(ics.icp_o) # connect ICS to ICP
1061 sync += cur_state.eint.eq(icp.core_irq_o) # connect ICP to core
1062
1063 # GPIO test peripheral
1064 if self.gpio:
1065 m.submodules.simple_gpio = simple_gpio = csd(self.simple_gpio)
1066
1067 # connect one GPIO output to ICS bit 15 (like in microwatt soc.vhdl)
1068 # XXX causes litex ECP5 test to get wrong idea about input and output
1069 # (but works with verilator sim *sigh*)
1070 # if self.gpio and self.xics:
1071 # comb += self.int_level_i[15].eq(simple_gpio.gpio_o[0])
1072
1073 # instruction decoder
1074 pdecode = create_pdecode()
1075 m.submodules.dec2 = pdecode2 = csd(self.pdecode2)
1076 if self.svp64_en:
1077 m.submodules.svp64 = svp64 = csd(self.svp64)
1078
1079 # convenience
1080 dmi, d_reg, d_cr, d_xer, = dbg.dmi, dbg.d_gpr, dbg.d_cr, dbg.d_xer
1081 intrf = self.core.regs.rf['int']
1082
1083 # clock delay power-on reset
1084 cd_por = ClockDomain(reset_less=True)
1085 cd_sync = ClockDomain()
1086 core_sync = ClockDomain("coresync")
1087 m.domains += cd_por, cd_sync, core_sync
1088 if self.dbg_domain != "sync":
1089 dbg_sync = ClockDomain(self.dbg_domain)
1090 m.domains += dbg_sync
1091
1092 ti_rst = Signal(reset_less=True)
1093 delay = Signal(range(4), reset=3)
1094 with m.If(delay != 0):
1095 m.d.por += delay.eq(delay - 1)
1096 comb += cd_por.clk.eq(ClockSignal())
1097
1098 # power-on reset delay
1099 core_rst = ResetSignal("coresync")
1100 comb += ti_rst.eq(delay != 0 | dbg.core_rst_o | ResetSignal())
1101 comb += core_rst.eq(ti_rst)
1102
1103 # debug clock is same as coresync, but reset is *main external*
1104 if self.dbg_domain != "sync":
1105 dbg_rst = ResetSignal(self.dbg_domain)
1106 comb += dbg_rst.eq(ResetSignal())
1107
1108 # busy/halted signals from core
1109 core_busy_o = ~core.p.o_ready | core.n.o_data.busy_o # core is busy
1110 comb += self.busy_o.eq(core_busy_o)
1111 comb += pdecode2.dec.bigendian.eq(self.core_bigendian_i)
1112
1113 # temporary hack: says "go" immediately for both address gen and ST
1114 l0 = core.l0
1115 ldst = core.fus.fus['ldst0']
1116 st_go_edge = rising_edge(m, ldst.st.rel_o)
1117 # link addr-go direct to rel
1118 m.d.comb += ldst.ad.go_i.eq(ldst.ad.rel_o)
1119 m.d.comb += ldst.st.go_i.eq(st_go_edge) # link store-go to rising rel
1120
1121 def elaborate(self, platform):
1122 m = Module()
1123 # convenience
1124 comb, sync = m.d.comb, m.d.sync
1125 cur_state = self.cur_state
1126 pdecode2 = self.pdecode2
1127 dbg = self.dbg
1128 core = self.core
1129
1130 # set up peripherals and core
1131 core_rst = self.core_rst
1132 self.setup_peripherals(m)
1133
1134 # reset current state if core reset requested
1135 with m.If(core_rst):
1136 m.d.sync += self.cur_state.eq(0)
1137
1138 # PC and instruction from I-Memory
1139 comb += self.pc_o.eq(cur_state.pc)
1140 pc_changed = Signal() # note write to PC
1141 sv_changed = Signal() # note write to SVSTATE
1142
1143 # indicate to outside world if any FU is still executing
1144 comb += self.any_busy.eq(core.n.o_data.any_busy_o) # any FU executing
1145
1146 # read state either from incoming override or from regfile
1147 # TODO: really should be doing MSR in the same way
1148 pc = state_get(m, core_rst, self.pc_i,
1149 "pc", # read PC
1150 self.state_r_pc, StateRegs.PC)
1151 svstate = state_get(m, core_rst, self.svstate_i,
1152 "svstate", # read SVSTATE
1153 self.state_r_sv, StateRegs.SVSTATE)
1154
1155 # don't write pc every cycle
1156 comb += self.state_w_pc.wen.eq(0)
1157 comb += self.state_w_pc.i_data.eq(0)
1158
1159 # address of the next instruction, in the absence of a branch
1160 # depends on the instruction size
1161 nia = Signal(64)
1162
1163 # connect up debug signals
1164 # TODO comb += core.icache_rst_i.eq(dbg.icache_rst_o)
1165 comb += dbg.terminate_i.eq(core.o.core_terminate_o)
1166 comb += dbg.state.pc.eq(pc)
1167 comb += dbg.state.svstate.eq(svstate)
1168 comb += dbg.state.msr.eq(cur_state.msr)
1169
1170 # pass the prefix mode from Fetch to Issue, so the latter can loop
1171 # on VL==0
1172 is_svp64_mode = Signal()
1173
1174 # there are *THREE^WFOUR-if-SVP64-enabled* FSMs, fetch (32/64-bit)
1175 # issue, decode/execute, now joined by "Predicate fetch/calculate".
1176 # these are the handshake signals between each
1177
1178 # fetch FSM can run as soon as the PC is valid
1179 fetch_pc_i_valid = Signal() # Execute tells Fetch "start next read"
1180 fetch_pc_o_ready = Signal() # Fetch Tells SVSTATE "proceed"
1181
1182 # fetch FSM hands over the instruction to be decoded / issued
1183 fetch_insn_o_valid = Signal()
1184 fetch_insn_i_ready = Signal()
1185
1186 # predicate fetch FSM decodes and fetches the predicate
1187 pred_insn_i_valid = Signal()
1188 pred_insn_o_ready = Signal()
1189
1190 # predicate fetch FSM delivers the masks
1191 pred_mask_o_valid = Signal()
1192 pred_mask_i_ready = Signal()
1193
1194 # issue FSM delivers the instruction to the be executed
1195 exec_insn_i_valid = Signal()
1196 exec_insn_o_ready = Signal()
1197
1198 # execute FSM, hands over the PC/SVSTATE back to the issue FSM
1199 exec_pc_o_valid = Signal()
1200 exec_pc_i_ready = Signal()
1201
1202 # the FSMs here are perhaps unusual in that they detect conditions
1203 # then "hold" information, combinatorially, for the core
1204 # (as opposed to using sync - which would be on a clock's delay)
1205 # this includes the actual opcode, valid flags and so on.
1206
1207 # Fetch, then predicate fetch, then Issue, then Execute.
1208 # Issue is where the VL for-loop # lives. the ready/valid
1209 # signalling is used to communicate between the four.
1210
1211 # set up Fetch FSM
1212 fetch = FetchFSM(self.allow_overlap, self.svp64_en,
1213 self.imem, core_rst, pdecode2, cur_state,
1214 dbg, core, svstate, nia, is_svp64_mode)
1215 m.submodules.fetch = fetch
1216 # connect up in/out data to existing Signals
1217 comb += fetch.p.i_data.pc.eq(pc)
1218 # and the ready/valid signalling
1219 comb += fetch_pc_o_ready.eq(fetch.p.o_ready)
1220 comb += fetch.p.i_valid.eq(fetch_pc_i_valid)
1221 comb += fetch_insn_o_valid.eq(fetch.n.o_valid)
1222 comb += fetch.n.i_ready.eq(fetch_insn_i_ready)
1223
1224 self.issue_fsm(m, core, pc_changed, sv_changed, nia,
1225 dbg, core_rst, is_svp64_mode,
1226 fetch_pc_o_ready, fetch_pc_i_valid,
1227 fetch_insn_o_valid, fetch_insn_i_ready,
1228 pred_insn_i_valid, pred_insn_o_ready,
1229 pred_mask_o_valid, pred_mask_i_ready,
1230 exec_insn_i_valid, exec_insn_o_ready,
1231 exec_pc_o_valid, exec_pc_i_ready)
1232
1233 if self.svp64_en:
1234 self.fetch_predicate_fsm(m,
1235 pred_insn_i_valid, pred_insn_o_ready,
1236 pred_mask_o_valid, pred_mask_i_ready)
1237
1238 self.execute_fsm(m, core, pc_changed, sv_changed,
1239 exec_insn_i_valid, exec_insn_o_ready,
1240 exec_pc_o_valid, exec_pc_i_ready)
1241
1242 # this bit doesn't have to be in the FSM: connect up to read
1243 # regfiles on demand from DMI
1244 self.do_dmi(m, dbg)
1245
1246 # DEC and TB inc/dec FSM. copy of DEC is put into CoreState,
1247 # (which uses that in PowerDecoder2 to raise 0x900 exception)
1248 self.tb_dec_fsm(m, cur_state.dec)
1249
1250 return m
1251
1252 def do_dmi(self, m, dbg):
1253 """deals with DMI debug requests
1254
1255 currently only provides read requests for the INT regfile, CR and XER
1256 it will later also deal with *writing* to these regfiles.
1257 """
1258 comb = m.d.comb
1259 sync = m.d.sync
1260 dmi, d_reg, d_cr, d_xer, = dbg.dmi, dbg.d_gpr, dbg.d_cr, dbg.d_xer
1261 intrf = self.core.regs.rf['int']
1262
1263 with m.If(d_reg.req): # request for regfile access being made
1264 # TODO: error-check this
1265 # XXX should this be combinatorial? sync better?
1266 if intrf.unary:
1267 comb += self.int_r.ren.eq(1 << d_reg.addr)
1268 else:
1269 comb += self.int_r.addr.eq(d_reg.addr)
1270 comb += self.int_r.ren.eq(1)
1271 d_reg_delay = Signal()
1272 sync += d_reg_delay.eq(d_reg.req)
1273 with m.If(d_reg_delay):
1274 # data arrives one clock later
1275 comb += d_reg.data.eq(self.int_r.o_data)
1276 comb += d_reg.ack.eq(1)
1277
1278 # sigh same thing for CR debug
1279 with m.If(d_cr.req): # request for regfile access being made
1280 comb += self.cr_r.ren.eq(0b11111111) # enable all
1281 d_cr_delay = Signal()
1282 sync += d_cr_delay.eq(d_cr.req)
1283 with m.If(d_cr_delay):
1284 # data arrives one clock later
1285 comb += d_cr.data.eq(self.cr_r.o_data)
1286 comb += d_cr.ack.eq(1)
1287
1288 # aaand XER...
1289 with m.If(d_xer.req): # request for regfile access being made
1290 comb += self.xer_r.ren.eq(0b111111) # enable all
1291 d_xer_delay = Signal()
1292 sync += d_xer_delay.eq(d_xer.req)
1293 with m.If(d_xer_delay):
1294 # data arrives one clock later
1295 comb += d_xer.data.eq(self.xer_r.o_data)
1296 comb += d_xer.ack.eq(1)
1297
1298 def tb_dec_fsm(self, m, spr_dec):
1299 """tb_dec_fsm
1300
1301 this is a FSM for updating either dec or tb. it runs alternately
1302 DEC, TB, DEC, TB. note that SPR pipeline could have written a new
1303 value to DEC, however the regfile has "passthrough" on it so this
1304 *should* be ok.
1305
1306 see v3.0B p1097-1099 for Timeer Resource and p1065 and p1076
1307 """
1308
1309 comb, sync = m.d.comb, m.d.sync
1310 fast_rf = self.core.regs.rf['fast']
1311 fast_r_dectb = fast_rf.r_ports['issue'] # DEC/TB
1312 fast_w_dectb = fast_rf.w_ports['issue'] # DEC/TB
1313
1314 with m.FSM() as fsm:
1315
1316 # initiates read of current DEC
1317 with m.State("DEC_READ"):
1318 comb += fast_r_dectb.addr.eq(FastRegs.DEC)
1319 comb += fast_r_dectb.ren.eq(1)
1320 m.next = "DEC_WRITE"
1321
1322 # waits for DEC read to arrive (1 cycle), updates with new value
1323 with m.State("DEC_WRITE"):
1324 new_dec = Signal(64)
1325 # TODO: MSR.LPCR 32-bit decrement mode
1326 comb += new_dec.eq(fast_r_dectb.o_data - 1)
1327 comb += fast_w_dectb.addr.eq(FastRegs.DEC)
1328 comb += fast_w_dectb.wen.eq(1)
1329 comb += fast_w_dectb.i_data.eq(new_dec)
1330 sync += spr_dec.eq(new_dec) # copy into cur_state for decoder
1331 m.next = "TB_READ"
1332
1333 # initiates read of current TB
1334 with m.State("TB_READ"):
1335 comb += fast_r_dectb.addr.eq(FastRegs.TB)
1336 comb += fast_r_dectb.ren.eq(1)
1337 m.next = "TB_WRITE"
1338
1339 # waits for read TB to arrive, initiates write of current TB
1340 with m.State("TB_WRITE"):
1341 new_tb = Signal(64)
1342 comb += new_tb.eq(fast_r_dectb.o_data + 1)
1343 comb += fast_w_dectb.addr.eq(FastRegs.TB)
1344 comb += fast_w_dectb.wen.eq(1)
1345 comb += fast_w_dectb.i_data.eq(new_tb)
1346 m.next = "DEC_READ"
1347
1348 return m
1349
1350 def __iter__(self):
1351 yield from self.pc_i.ports()
1352 yield self.pc_o
1353 yield self.memerr_o
1354 yield from self.core.ports()
1355 yield from self.imem.ports()
1356 yield self.core_bigendian_i
1357 yield self.busy_o
1358
1359 def ports(self):
1360 return list(self)
1361
1362 def external_ports(self):
1363 ports = self.pc_i.ports()
1364 ports += [self.pc_o, self.memerr_o, self.core_bigendian_i, self.busy_o,
1365 ]
1366
1367 if self.jtag_en:
1368 ports += list(self.jtag.external_ports())
1369 else:
1370 # don't add DMI if JTAG is enabled
1371 ports += list(self.dbg.dmi.ports())
1372
1373 ports += list(self.imem.ibus.fields.values())
1374 ports += list(self.core.l0.cmpi.wb_bus().fields.values())
1375
1376 if self.sram4x4k:
1377 for sram in self.sram4k:
1378 ports += list(sram.bus.fields.values())
1379
1380 if self.xics:
1381 ports += list(self.xics_icp.bus.fields.values())
1382 ports += list(self.xics_ics.bus.fields.values())
1383 ports.append(self.int_level_i)
1384
1385 if self.gpio:
1386 ports += list(self.simple_gpio.bus.fields.values())
1387 ports.append(self.gpio_o)
1388
1389 return ports
1390
1391 def ports(self):
1392 return list(self)
1393
1394
1395 class TestIssuer(Elaboratable):
1396 def __init__(self, pspec):
1397 self.ti = TestIssuerInternal(pspec)
1398 self.pll = DummyPLL(instance=True)
1399
1400 # PLL direct clock or not
1401 self.pll_en = hasattr(pspec, "use_pll") and pspec.use_pll
1402 if self.pll_en:
1403 self.pll_test_o = Signal(reset_less=True)
1404 self.pll_vco_o = Signal(reset_less=True)
1405 self.clk_sel_i = Signal(2, reset_less=True)
1406 self.ref_clk = ClockSignal() # can't rename it but that's ok
1407 self.pllclk_clk = ClockSignal("pllclk")
1408
1409 def elaborate(self, platform):
1410 m = Module()
1411 comb = m.d.comb
1412
1413 # TestIssuer nominally runs at main clock, actually it is
1414 # all combinatorial internally except for coresync'd components
1415 m.submodules.ti = ti = self.ti
1416
1417 if self.pll_en:
1418 # ClockSelect runs at PLL output internal clock rate
1419 m.submodules.wrappll = pll = self.pll
1420
1421 # add clock domains from PLL
1422 cd_pll = ClockDomain("pllclk")
1423 m.domains += cd_pll
1424
1425 # PLL clock established. has the side-effect of running clklsel
1426 # at the PLL's speed (see DomainRenamer("pllclk") above)
1427 pllclk = self.pllclk_clk
1428 comb += pllclk.eq(pll.clk_pll_o)
1429
1430 # wire up external 24mhz to PLL
1431 #comb += pll.clk_24_i.eq(self.ref_clk)
1432 # output 18 mhz PLL test signal, and analog oscillator out
1433 comb += self.pll_test_o.eq(pll.pll_test_o)
1434 comb += self.pll_vco_o.eq(pll.pll_vco_o)
1435
1436 # input to pll clock selection
1437 comb += pll.clk_sel_i.eq(self.clk_sel_i)
1438
1439 # now wire up ResetSignals. don't mind them being in this domain
1440 pll_rst = ResetSignal("pllclk")
1441 comb += pll_rst.eq(ResetSignal())
1442
1443 # internal clock is set to selector clock-out. has the side-effect of
1444 # running TestIssuer at this speed (see DomainRenamer("intclk") above)
1445 # debug clock runs at coresync internal clock
1446 cd_coresync = ClockDomain("coresync")
1447 #m.domains += cd_coresync
1448 if self.ti.dbg_domain != 'sync':
1449 cd_dbgsync = ClockDomain("dbgsync")
1450 #m.domains += cd_dbgsync
1451 intclk = ClockSignal("coresync")
1452 dbgclk = ClockSignal(self.ti.dbg_domain)
1453 # XXX BYPASS PLL XXX
1454 # XXX BYPASS PLL XXX
1455 # XXX BYPASS PLL XXX
1456 if self.pll_en:
1457 comb += intclk.eq(self.ref_clk)
1458 else:
1459 comb += intclk.eq(ClockSignal())
1460 if self.ti.dbg_domain != 'sync':
1461 dbgclk = ClockSignal(self.ti.dbg_domain)
1462 comb += dbgclk.eq(intclk)
1463
1464 return m
1465
1466 def ports(self):
1467 return list(self.ti.ports()) + list(self.pll.ports()) + \
1468 [ClockSignal(), ResetSignal()]
1469
1470 def external_ports(self):
1471 ports = self.ti.external_ports()
1472 ports.append(ClockSignal())
1473 ports.append(ResetSignal())
1474 if self.pll_en:
1475 ports.append(self.clk_sel_i)
1476 ports.append(self.pll.clk_24_i)
1477 ports.append(self.pll_test_o)
1478 ports.append(self.pll_vco_o)
1479 ports.append(self.pllclk_clk)
1480 ports.append(self.ref_clk)
1481 return ports
1482
1483
1484 if __name__ == '__main__':
1485 units = {'alu': 1, 'cr': 1, 'branch': 1, 'trap': 1, 'logical': 1,
1486 'spr': 1,
1487 'div': 1,
1488 'mul': 1,
1489 'shiftrot': 1
1490 }
1491 pspec = TestMemPspec(ldst_ifacetype='bare_wb',
1492 imem_ifacetype='bare_wb',
1493 addr_wid=48,
1494 mask_wid=8,
1495 reg_wid=64,
1496 units=units)
1497 dut = TestIssuer(pspec)
1498 vl = main(dut, ports=dut.ports(), name="test_issuer")
1499
1500 if len(sys.argv) == 1:
1501 vl = rtlil.convert(dut, ports=dut.external_ports(), name="test_issuer")
1502 with open("test_issuer.il", "w") as f:
1503 f.write(vl)