99752a737c7769f242aa13b245c7f40b84a05adf
[soc.git] / src / soc / simple / issuer.py
1 """simple core issuer
2
3 not in any way intended for production use. this runs a FSM that:
4
5 * reads the Program Counter from StateRegs
6 * reads an instruction from a fixed-size Test Memory
7 * issues it to the Simple Core
8 * waits for it to complete
9 * increments the PC
10 * does it all over again
11
12 the purpose of this module is to verify the functional correctness
13 of the Function Units in the absolute simplest and clearest possible
14 way, and to at provide something that can be further incrementally
15 improved.
16 """
17
18 from nmigen import (Elaboratable, Module, Signal, ClockSignal, ResetSignal,
19 ClockDomain, DomainRenamer, Mux, Const, Repl, Cat)
20 from nmigen.cli import rtlil
21 from nmigen.cli import main
22 import sys
23
24 from nmutil.singlepipe import ControlBase
25 from soc.simple.core_data import FetchOutput
26
27 from nmigen.lib.coding import PriorityEncoder
28
29 from openpower.decoder.power_decoder import create_pdecode
30 from openpower.decoder.power_decoder2 import PowerDecode2, SVP64PrefixDecoder
31 from openpower.decoder.decode2execute1 import IssuerDecode2ToOperand
32 from openpower.decoder.decode2execute1 import Data
33 from openpower.decoder.power_enums import (MicrOp, SVP64PredInt, SVP64PredCR,
34 SVP64PredMode)
35 from openpower.state import CoreState
36 from openpower.consts import (CR, SVP64CROffs)
37 from soc.experiment.testmem import TestMemory # test only for instructions
38 from soc.regfile.regfiles import StateRegs, FastRegs
39 from soc.simple.core import NonProductionCore
40 from soc.config.test.test_loadstore import TestMemPspec
41 from soc.config.ifetch import ConfigFetchUnit
42 from soc.debug.dmi import CoreDebug, DMIInterface
43 from soc.debug.jtag import JTAG
44 from soc.config.pinouts import get_pinspecs
45 from soc.interrupts.xics import XICS_ICP, XICS_ICS
46 from soc.bus.simple_gpio import SimpleGPIO
47 from soc.bus.SPBlock512W64B8W import SPBlock512W64B8W
48 from soc.clock.select import ClockSelect
49 from soc.clock.dummypll import DummyPLL
50 from openpower.sv.svstate import SVSTATERec
51
52
53 from nmutil.util import rising_edge
54
55 def get_insn(f_instr_o, pc):
56 if f_instr_o.width == 32:
57 return f_instr_o
58 else:
59 # 64-bit: bit 2 of pc decides which word to select
60 return f_instr_o.word_select(pc[2], 32)
61
62 # gets state input or reads from state regfile
63 def state_get(m, core_rst, state_i, name, regfile, regnum):
64 comb = m.d.comb
65 sync = m.d.sync
66 # read the PC
67 res = Signal(64, reset_less=True, name=name)
68 res_ok_delay = Signal(name="%s_ok_delay" % name)
69 with m.If(~core_rst):
70 sync += res_ok_delay.eq(~state_i.ok)
71 with m.If(state_i.ok):
72 # incoming override (start from pc_i)
73 comb += res.eq(state_i.data)
74 with m.Else():
75 # otherwise read StateRegs regfile for PC...
76 comb += regfile.ren.eq(1<<regnum)
77 # ... but on a 1-clock delay
78 with m.If(res_ok_delay):
79 comb += res.eq(regfile.o_data)
80 return res
81
82
83 def get_predint(m, mask, name):
84 """decode SVP64 predicate integer mask field to reg number and invert
85 this is identical to the equivalent function in ISACaller except that
86 it doesn't read the INT directly, it just decodes "what needs to be done"
87 i.e. which INT reg, whether it is shifted and whether it is bit-inverted.
88
89 * all1s is set to indicate that no mask is to be applied.
90 * regread indicates the GPR register number to be read
91 * invert is set to indicate that the register value is to be inverted
92 * unary indicates that the contents of the register is to be shifted 1<<r3
93 """
94 comb = m.d.comb
95 regread = Signal(5, name=name+"regread")
96 invert = Signal(name=name+"invert")
97 unary = Signal(name=name+"unary")
98 all1s = Signal(name=name+"all1s")
99 with m.Switch(mask):
100 with m.Case(SVP64PredInt.ALWAYS.value):
101 comb += all1s.eq(1) # use 0b1111 (all ones)
102 with m.Case(SVP64PredInt.R3_UNARY.value):
103 comb += regread.eq(3)
104 comb += unary.eq(1) # 1<<r3 - shift r3 (single bit)
105 with m.Case(SVP64PredInt.R3.value):
106 comb += regread.eq(3)
107 with m.Case(SVP64PredInt.R3_N.value):
108 comb += regread.eq(3)
109 comb += invert.eq(1)
110 with m.Case(SVP64PredInt.R10.value):
111 comb += regread.eq(10)
112 with m.Case(SVP64PredInt.R10_N.value):
113 comb += regread.eq(10)
114 comb += invert.eq(1)
115 with m.Case(SVP64PredInt.R30.value):
116 comb += regread.eq(30)
117 with m.Case(SVP64PredInt.R30_N.value):
118 comb += regread.eq(30)
119 comb += invert.eq(1)
120 return regread, invert, unary, all1s
121
122
123 def get_predcr(m, mask, name):
124 """decode SVP64 predicate CR to reg number field and invert status
125 this is identical to _get_predcr in ISACaller
126 """
127 comb = m.d.comb
128 idx = Signal(2, name=name+"idx")
129 invert = Signal(name=name+"crinvert")
130 with m.Switch(mask):
131 with m.Case(SVP64PredCR.LT.value):
132 comb += idx.eq(CR.LT)
133 comb += invert.eq(0)
134 with m.Case(SVP64PredCR.GE.value):
135 comb += idx.eq(CR.LT)
136 comb += invert.eq(1)
137 with m.Case(SVP64PredCR.GT.value):
138 comb += idx.eq(CR.GT)
139 comb += invert.eq(0)
140 with m.Case(SVP64PredCR.LE.value):
141 comb += idx.eq(CR.GT)
142 comb += invert.eq(1)
143 with m.Case(SVP64PredCR.EQ.value):
144 comb += idx.eq(CR.EQ)
145 comb += invert.eq(0)
146 with m.Case(SVP64PredCR.NE.value):
147 comb += idx.eq(CR.EQ)
148 comb += invert.eq(1)
149 with m.Case(SVP64PredCR.SO.value):
150 comb += idx.eq(CR.SO)
151 comb += invert.eq(0)
152 with m.Case(SVP64PredCR.NS.value):
153 comb += idx.eq(CR.SO)
154 comb += invert.eq(1)
155 return idx, invert
156
157
158 # Fetch Finite State Machine.
159 # WARNING: there are currently DriverConflicts but it's actually working.
160 class FetchFSM(ControlBase):
161 def __init__(self, allow_overlap, svp64_en, imem, core_rst,
162 pdecode2, cur_state,
163 dbg, core, pc, svstate, nia, is_svp64_mode):
164 self.allow_overlap = allow_overlap
165 self.svp64_en = svp64_en
166 self.imem = imem
167 self.core_rst = core_rst
168 self.pdecode2 = pdecode2
169 self.cur_state = cur_state
170 self.dbg = dbg
171 self.core = core
172 self.pc = pc
173 self.svstate = svstate
174 self.nia = nia
175 self.is_svp64_mode = is_svp64_mode
176
177 # set up pipeline ControlBase and allocate i/o specs
178 # (unusual: normally done by the Pipeline API)
179 super().__init__(stage=self)
180 self.p.i_data, self.n.o_data = self.new_specs(None)
181 self.i, self.o = self.p.i_data, self.n.o_data
182
183 staterf = self.core.regs.rf['state']
184 self.state_r_msr = staterf.r_ports['msr'] # MSR rd
185
186 # next 3 functions are Stage API Compliance
187 def setup(self, m, i):
188 pass
189
190 def ispec(self):
191 return Signal(name="dummy_for_now", reset_less=True)
192
193 def ospec(self):
194 return FetchOutput()
195
196 def elaborate(self, platform):
197 """fetch FSM
198
199 this FSM performs fetch of raw instruction data, partial-decodes
200 it 32-bit at a time to detect SVP64 prefixes, and will optionally
201 read a 2nd 32-bit quantity if that occurs.
202 """
203 m = super().elaborate(platform)
204
205 dbg = self.dbg
206 core = self.core,
207 pc = self.pc
208 svstate = self.svstate
209 nia = self.nia
210 is_svp64_mode = self.is_svp64_mode
211 fetch_pc_o_ready = self.p.o_ready
212 fetch_pc_i_valid = self.p.i_valid
213 fetch_insn_o_valid = self.n.o_valid
214 fetch_insn_i_ready = self.n.i_ready
215
216 comb = m.d.comb
217 sync = m.d.sync
218 pdecode2 = self.pdecode2
219 cur_state = self.cur_state
220 dec_opcode_o = pdecode2.dec.raw_opcode_in # raw opcode
221
222 msr_read = Signal(reset=1)
223
224 # don't read msr every cycle
225 comb += self.state_r_msr.ren.eq(0)
226
227 with m.FSM(name='fetch_fsm'):
228
229 # waiting (zzz)
230 with m.State("IDLE"):
231 with m.If(~dbg.stopping_o):
232 comb += fetch_pc_o_ready.eq(1)
233 with m.If(fetch_pc_i_valid):
234 # instruction allowed to go: start by reading the PC
235 # capture the PC and also drop it into Insn Memory
236 # we have joined a pair of combinatorial memory
237 # lookups together. this is Generally Bad.
238 comb += self.imem.a_pc_i.eq(pc)
239 comb += self.imem.a_i_valid.eq(1)
240 comb += self.imem.f_i_valid.eq(1)
241 sync += cur_state.pc.eq(pc)
242 sync += cur_state.svstate.eq(svstate) # and svstate
243
244 # initiate read of MSR. arrives one clock later
245 comb += self.state_r_msr.ren.eq(1 << StateRegs.MSR)
246 sync += msr_read.eq(0)
247
248 m.next = "INSN_READ" # move to "wait for bus" phase
249
250 # dummy pause to find out why simulation is not keeping up
251 with m.State("INSN_READ"):
252 if self.allow_overlap:
253 stopping = dbg.stopping_o
254 else:
255 stopping = Const(0)
256 with m.If(stopping):
257 # stopping: jump back to idle
258 m.next = "IDLE"
259 with m.Else():
260 # one cycle later, msr/sv read arrives. valid only once.
261 with m.If(~msr_read):
262 sync += msr_read.eq(1) # yeah don't read it again
263 sync += cur_state.msr.eq(self.state_r_msr.o_data)
264 with m.If(self.imem.f_busy_o): # zzz...
265 # busy: stay in wait-read
266 comb += self.imem.a_i_valid.eq(1)
267 comb += self.imem.f_i_valid.eq(1)
268 with m.Else():
269 # not busy: instruction fetched
270 insn = get_insn(self.imem.f_instr_o, cur_state.pc)
271 if self.svp64_en:
272 svp64 = self.svp64
273 # decode the SVP64 prefix, if any
274 comb += svp64.raw_opcode_in.eq(insn)
275 comb += svp64.bigendian.eq(self.core_bigendian_i)
276 # pass the decoded prefix (if any) to PowerDecoder2
277 sync += pdecode2.sv_rm.eq(svp64.svp64_rm)
278 sync += pdecode2.is_svp64_mode.eq(is_svp64_mode)
279 # remember whether this is a prefixed instruction,
280 # so the FSM can readily loop when VL==0
281 sync += is_svp64_mode.eq(svp64.is_svp64_mode)
282 # calculate the address of the following instruction
283 insn_size = Mux(svp64.is_svp64_mode, 8, 4)
284 sync += nia.eq(cur_state.pc + insn_size)
285 with m.If(~svp64.is_svp64_mode):
286 # with no prefix, store the instruction
287 # and hand it directly to the next FSM
288 sync += dec_opcode_o.eq(insn)
289 m.next = "INSN_READY"
290 with m.Else():
291 # fetch the rest of the instruction from memory
292 comb += self.imem.a_pc_i.eq(cur_state.pc + 4)
293 comb += self.imem.a_i_valid.eq(1)
294 comb += self.imem.f_i_valid.eq(1)
295 m.next = "INSN_READ2"
296 else:
297 # not SVP64 - 32-bit only
298 sync += nia.eq(cur_state.pc + 4)
299 sync += dec_opcode_o.eq(insn)
300 m.next = "INSN_READY"
301
302 with m.State("INSN_READ2"):
303 with m.If(self.imem.f_busy_o): # zzz...
304 # busy: stay in wait-read
305 comb += self.imem.a_i_valid.eq(1)
306 comb += self.imem.f_i_valid.eq(1)
307 with m.Else():
308 # not busy: instruction fetched
309 insn = get_insn(self.imem.f_instr_o, cur_state.pc+4)
310 sync += dec_opcode_o.eq(insn)
311 m.next = "INSN_READY"
312 # TODO: probably can start looking at pdecode2.rm_dec
313 # here or maybe even in INSN_READ state, if svp64_mode
314 # detected, in order to trigger - and wait for - the
315 # predicate reading.
316 if self.svp64_en:
317 pmode = pdecode2.rm_dec.predmode
318 """
319 if pmode != SVP64PredMode.ALWAYS.value:
320 fire predicate loading FSM and wait before
321 moving to INSN_READY
322 else:
323 sync += self.srcmask.eq(-1) # set to all 1s
324 sync += self.dstmask.eq(-1) # set to all 1s
325 m.next = "INSN_READY"
326 """
327
328 with m.State("INSN_READY"):
329 # hand over the instruction, to be decoded
330 comb += fetch_insn_o_valid.eq(1)
331 with m.If(fetch_insn_i_ready):
332 m.next = "IDLE"
333
334 # whatever was done above, over-ride it if core reset is held
335 with m.If(self.core_rst):
336 sync += nia.eq(0)
337
338 return m
339
340
341 class TestIssuerInternal(Elaboratable):
342 """TestIssuer - reads instructions from TestMemory and issues them
343
344 efficiency and speed is not the main goal here: functional correctness
345 and code clarity is. optimisations (which almost 100% interfere with
346 easy understanding) come later.
347 """
348 def __init__(self, pspec):
349
350 # test is SVP64 is to be enabled
351 self.svp64_en = hasattr(pspec, "svp64") and (pspec.svp64 == True)
352
353 # and if regfiles are reduced
354 self.regreduce_en = (hasattr(pspec, "regreduce") and
355 (pspec.regreduce == True))
356
357 # and if overlap requested
358 self.allow_overlap = (hasattr(pspec, "allow_overlap") and
359 (pspec.allow_overlap == True))
360
361 # JTAG interface. add this right at the start because if it's
362 # added it *modifies* the pspec, by adding enable/disable signals
363 # for parts of the rest of the core
364 self.jtag_en = hasattr(pspec, "debug") and pspec.debug == 'jtag'
365 self.dbg_domain = "sync" # sigh "dbgsunc" too problematic
366 #self.dbg_domain = "dbgsync" # domain for DMI/JTAG clock
367 if self.jtag_en:
368 # XXX MUST keep this up-to-date with litex, and
369 # soc-cocotb-sim, and err.. all needs sorting out, argh
370 subset = ['uart',
371 'mtwi',
372 'eint', 'gpio', 'mspi0',
373 # 'mspi1', - disabled for now
374 # 'pwm', 'sd0', - disabled for now
375 'sdr']
376 self.jtag = JTAG(get_pinspecs(subset=subset),
377 domain=self.dbg_domain)
378 # add signals to pspec to enable/disable icache and dcache
379 # (or data and intstruction wishbone if icache/dcache not included)
380 # https://bugs.libre-soc.org/show_bug.cgi?id=520
381 # TODO: do we actually care if these are not domain-synchronised?
382 # honestly probably not.
383 pspec.wb_icache_en = self.jtag.wb_icache_en
384 pspec.wb_dcache_en = self.jtag.wb_dcache_en
385 self.wb_sram_en = self.jtag.wb_sram_en
386 else:
387 self.wb_sram_en = Const(1)
388
389 # add 4k sram blocks?
390 self.sram4x4k = (hasattr(pspec, "sram4x4kblock") and
391 pspec.sram4x4kblock == True)
392 if self.sram4x4k:
393 self.sram4k = []
394 for i in range(4):
395 self.sram4k.append(SPBlock512W64B8W(name="sram4k_%d" % i,
396 #features={'err'}
397 ))
398
399 # add interrupt controller?
400 self.xics = hasattr(pspec, "xics") and pspec.xics == True
401 if self.xics:
402 self.xics_icp = XICS_ICP()
403 self.xics_ics = XICS_ICS()
404 self.int_level_i = self.xics_ics.int_level_i
405
406 # add GPIO peripheral?
407 self.gpio = hasattr(pspec, "gpio") and pspec.gpio == True
408 if self.gpio:
409 self.simple_gpio = SimpleGPIO()
410 self.gpio_o = self.simple_gpio.gpio_o
411
412 # main instruction core. suitable for prototyping / demo only
413 self.core = core = NonProductionCore(pspec)
414 self.core_rst = ResetSignal("coresync")
415
416 # instruction decoder. goes into Trap Record
417 #pdecode = create_pdecode()
418 self.cur_state = CoreState("cur") # current state (MSR/PC/SVSTATE)
419 self.pdecode2 = PowerDecode2(None, state=self.cur_state,
420 opkls=IssuerDecode2ToOperand,
421 svp64_en=self.svp64_en,
422 regreduce_en=self.regreduce_en)
423 pdecode = self.pdecode2.dec
424
425 if self.svp64_en:
426 self.svp64 = SVP64PrefixDecoder() # for decoding SVP64 prefix
427
428 # Test Instruction memory
429 self.imem = ConfigFetchUnit(pspec).fu
430
431 # DMI interface
432 self.dbg = CoreDebug()
433
434 # instruction go/monitor
435 self.pc_o = Signal(64, reset_less=True)
436 self.pc_i = Data(64, "pc_i") # set "ok" to indicate "please change me"
437 self.svstate_i = Data(64, "svstate_i") # ditto
438 self.core_bigendian_i = Signal() # TODO: set based on MSR.LE
439 self.busy_o = Signal(reset_less=True)
440 self.memerr_o = Signal(reset_less=True)
441
442 # STATE regfile read /write ports for PC, MSR, SVSTATE
443 staterf = self.core.regs.rf['state']
444 self.state_r_pc = staterf.r_ports['cia'] # PC rd
445 self.state_w_pc = staterf.w_ports['d_wr1'] # PC wr
446 self.state_r_sv = staterf.r_ports['sv'] # SVSTATE rd
447 self.state_w_sv = staterf.w_ports['sv'] # SVSTATE wr
448
449 # DMI interface access
450 intrf = self.core.regs.rf['int']
451 crrf = self.core.regs.rf['cr']
452 xerrf = self.core.regs.rf['xer']
453 self.int_r = intrf.r_ports['dmi'] # INT read
454 self.cr_r = crrf.r_ports['full_cr_dbg'] # CR read
455 self.xer_r = xerrf.r_ports['full_xer'] # XER read
456
457 if self.svp64_en:
458 # for predication
459 self.int_pred = intrf.r_ports['pred'] # INT predicate read
460 self.cr_pred = crrf.r_ports['cr_pred'] # CR predicate read
461
462 # hack method of keeping an eye on whether branch/trap set the PC
463 self.state_nia = self.core.regs.rf['state'].w_ports['nia']
464 self.state_nia.wen.name = 'state_nia_wen'
465
466 # pulse to synchronize the simulator at instruction end
467 self.insn_done = Signal()
468
469 # indicate any instruction still outstanding, in execution
470 self.any_busy = Signal()
471
472 if self.svp64_en:
473 # store copies of predicate masks
474 self.srcmask = Signal(64)
475 self.dstmask = Signal(64)
476
477 def fetch_predicate_fsm(self, m,
478 pred_insn_i_valid, pred_insn_o_ready,
479 pred_mask_o_valid, pred_mask_i_ready):
480 """fetch_predicate_fsm - obtains (constructs in the case of CR)
481 src/dest predicate masks
482
483 https://bugs.libre-soc.org/show_bug.cgi?id=617
484 the predicates can be read here, by using IntRegs r_ports['pred']
485 or CRRegs r_ports['pred']. in the case of CRs it will have to
486 be done through multiple reads, extracting one relevant at a time.
487 later, a faster way would be to use the 32-bit-wide CR port but
488 this is more complex decoding, here. equivalent code used in
489 ISACaller is "from openpower.decoder.isa.caller import get_predcr"
490
491 note: this ENTIRE FSM is not to be called when svp64 is disabled
492 """
493 comb = m.d.comb
494 sync = m.d.sync
495 pdecode2 = self.pdecode2
496 rm_dec = pdecode2.rm_dec # SVP64RMModeDecode
497 predmode = rm_dec.predmode
498 srcpred, dstpred = rm_dec.srcpred, rm_dec.dstpred
499 cr_pred, int_pred = self.cr_pred, self.int_pred # read regfiles
500 # get src/dst step, so we can skip already used mask bits
501 cur_state = self.cur_state
502 srcstep = cur_state.svstate.srcstep
503 dststep = cur_state.svstate.dststep
504 cur_vl = cur_state.svstate.vl
505
506 # decode predicates
507 sregread, sinvert, sunary, sall1s = get_predint(m, srcpred, 's')
508 dregread, dinvert, dunary, dall1s = get_predint(m, dstpred, 'd')
509 sidx, scrinvert = get_predcr(m, srcpred, 's')
510 didx, dcrinvert = get_predcr(m, dstpred, 'd')
511
512 # store fetched masks, for either intpred or crpred
513 # when src/dst step is not zero, the skipped mask bits need to be
514 # shifted-out, before actually storing them in src/dest mask
515 new_srcmask = Signal(64, reset_less=True)
516 new_dstmask = Signal(64, reset_less=True)
517
518 with m.FSM(name="fetch_predicate"):
519
520 with m.State("FETCH_PRED_IDLE"):
521 comb += pred_insn_o_ready.eq(1)
522 with m.If(pred_insn_i_valid):
523 with m.If(predmode == SVP64PredMode.INT):
524 # skip fetching destination mask register, when zero
525 with m.If(dall1s):
526 sync += new_dstmask.eq(-1)
527 # directly go to fetch source mask register
528 # guaranteed not to be zero (otherwise predmode
529 # would be SVP64PredMode.ALWAYS, not INT)
530 comb += int_pred.addr.eq(sregread)
531 comb += int_pred.ren.eq(1)
532 m.next = "INT_SRC_READ"
533 # fetch destination predicate register
534 with m.Else():
535 comb += int_pred.addr.eq(dregread)
536 comb += int_pred.ren.eq(1)
537 m.next = "INT_DST_READ"
538 with m.Elif(predmode == SVP64PredMode.CR):
539 # go fetch masks from the CR register file
540 sync += new_srcmask.eq(0)
541 sync += new_dstmask.eq(0)
542 m.next = "CR_READ"
543 with m.Else():
544 sync += self.srcmask.eq(-1)
545 sync += self.dstmask.eq(-1)
546 m.next = "FETCH_PRED_DONE"
547
548 with m.State("INT_DST_READ"):
549 # store destination mask
550 inv = Repl(dinvert, 64)
551 with m.If(dunary):
552 # set selected mask bit for 1<<r3 mode
553 dst_shift = Signal(range(64))
554 comb += dst_shift.eq(self.int_pred.o_data & 0b111111)
555 sync += new_dstmask.eq(1 << dst_shift)
556 with m.Else():
557 # invert mask if requested
558 sync += new_dstmask.eq(self.int_pred.o_data ^ inv)
559 # skip fetching source mask register, when zero
560 with m.If(sall1s):
561 sync += new_srcmask.eq(-1)
562 m.next = "FETCH_PRED_SHIFT_MASK"
563 # fetch source predicate register
564 with m.Else():
565 comb += int_pred.addr.eq(sregread)
566 comb += int_pred.ren.eq(1)
567 m.next = "INT_SRC_READ"
568
569 with m.State("INT_SRC_READ"):
570 # store source mask
571 inv = Repl(sinvert, 64)
572 with m.If(sunary):
573 # set selected mask bit for 1<<r3 mode
574 src_shift = Signal(range(64))
575 comb += src_shift.eq(self.int_pred.o_data & 0b111111)
576 sync += new_srcmask.eq(1 << src_shift)
577 with m.Else():
578 # invert mask if requested
579 sync += new_srcmask.eq(self.int_pred.o_data ^ inv)
580 m.next = "FETCH_PRED_SHIFT_MASK"
581
582 # fetch masks from the CR register file
583 # implements the following loop:
584 # idx, inv = get_predcr(mask)
585 # mask = 0
586 # for cr_idx in range(vl):
587 # cr = crl[cr_idx + SVP64CROffs.CRPred] # takes one cycle
588 # if cr[idx] ^ inv:
589 # mask |= 1 << cr_idx
590 # return mask
591 with m.State("CR_READ"):
592 # CR index to be read, which will be ready by the next cycle
593 cr_idx = Signal.like(cur_vl, reset_less=True)
594 # submit the read operation to the regfile
595 with m.If(cr_idx != cur_vl):
596 # the CR read port is unary ...
597 # ren = 1 << cr_idx
598 # ... in MSB0 convention ...
599 # ren = 1 << (7 - cr_idx)
600 # ... and with an offset:
601 # ren = 1 << (7 - off - cr_idx)
602 idx = SVP64CROffs.CRPred + cr_idx
603 comb += cr_pred.ren.eq(1 << (7 - idx))
604 # signal data valid in the next cycle
605 cr_read = Signal(reset_less=True)
606 sync += cr_read.eq(1)
607 # load the next index
608 sync += cr_idx.eq(cr_idx + 1)
609 with m.Else():
610 # exit on loop end
611 sync += cr_read.eq(0)
612 sync += cr_idx.eq(0)
613 m.next = "FETCH_PRED_SHIFT_MASK"
614 with m.If(cr_read):
615 # compensate for the one cycle delay on the regfile
616 cur_cr_idx = Signal.like(cur_vl)
617 comb += cur_cr_idx.eq(cr_idx - 1)
618 # read the CR field, select the appropriate bit
619 cr_field = Signal(4)
620 scr_bit = Signal()
621 dcr_bit = Signal()
622 comb += cr_field.eq(cr_pred.o_data)
623 comb += scr_bit.eq(cr_field.bit_select(sidx, 1) ^ scrinvert)
624 comb += dcr_bit.eq(cr_field.bit_select(didx, 1) ^ dcrinvert)
625 # set the corresponding mask bit
626 bit_to_set = Signal.like(self.srcmask)
627 comb += bit_to_set.eq(1 << cur_cr_idx)
628 with m.If(scr_bit):
629 sync += new_srcmask.eq(new_srcmask | bit_to_set)
630 with m.If(dcr_bit):
631 sync += new_dstmask.eq(new_dstmask | bit_to_set)
632
633 with m.State("FETCH_PRED_SHIFT_MASK"):
634 # shift-out skipped mask bits
635 sync += self.srcmask.eq(new_srcmask >> srcstep)
636 sync += self.dstmask.eq(new_dstmask >> dststep)
637 m.next = "FETCH_PRED_DONE"
638
639 with m.State("FETCH_PRED_DONE"):
640 comb += pred_mask_o_valid.eq(1)
641 with m.If(pred_mask_i_ready):
642 m.next = "FETCH_PRED_IDLE"
643
644 def issue_fsm(self, m, core, pc_changed, sv_changed, nia,
645 dbg, core_rst, is_svp64_mode,
646 fetch_pc_o_ready, fetch_pc_i_valid,
647 fetch_insn_o_valid, fetch_insn_i_ready,
648 pred_insn_i_valid, pred_insn_o_ready,
649 pred_mask_o_valid, pred_mask_i_ready,
650 exec_insn_i_valid, exec_insn_o_ready,
651 exec_pc_o_valid, exec_pc_i_ready):
652 """issue FSM
653
654 decode / issue FSM. this interacts with the "fetch" FSM
655 through fetch_insn_ready/valid (incoming) and fetch_pc_ready/valid
656 (outgoing). also interacts with the "execute" FSM
657 through exec_insn_ready/valid (outgoing) and exec_pc_ready/valid
658 (incoming).
659 SVP64 RM prefixes have already been set up by the
660 "fetch" phase, so execute is fairly straightforward.
661 """
662
663 comb = m.d.comb
664 sync = m.d.sync
665 pdecode2 = self.pdecode2
666 cur_state = self.cur_state
667
668 # temporaries
669 dec_opcode_i = pdecode2.dec.raw_opcode_in # raw opcode
670
671 # for updating svstate (things like srcstep etc.)
672 update_svstate = Signal() # set this (below) if updating
673 new_svstate = SVSTATERec("new_svstate")
674 comb += new_svstate.eq(cur_state.svstate)
675
676 # precalculate srcstep+1 and dststep+1
677 cur_srcstep = cur_state.svstate.srcstep
678 cur_dststep = cur_state.svstate.dststep
679 next_srcstep = Signal.like(cur_srcstep)
680 next_dststep = Signal.like(cur_dststep)
681 comb += next_srcstep.eq(cur_state.svstate.srcstep+1)
682 comb += next_dststep.eq(cur_state.svstate.dststep+1)
683
684 # note if an exception happened. in a pipelined or OoO design
685 # this needs to be accompanied by "shadowing" (or stalling)
686 exc_happened = self.core.o.exc_happened
687
688 with m.FSM(name="issue_fsm"):
689
690 # sync with the "fetch" phase which is reading the instruction
691 # at this point, there is no instruction running, that
692 # could inadvertently update the PC.
693 with m.State("ISSUE_START"):
694 # wait on "core stop" release, before next fetch
695 # need to do this here, in case we are in a VL==0 loop
696 with m.If(~dbg.core_stop_o & ~core_rst):
697 comb += fetch_pc_i_valid.eq(1) # tell fetch to start
698 with m.If(fetch_pc_o_ready): # fetch acknowledged us
699 m.next = "INSN_WAIT"
700 with m.Else():
701 # tell core it's stopped, and acknowledge debug handshake
702 comb += dbg.core_stopped_i.eq(1)
703 # while stopped, allow updating the PC and SVSTATE
704 with m.If(self.pc_i.ok):
705 comb += self.state_w_pc.wen.eq(1 << StateRegs.PC)
706 comb += self.state_w_pc.i_data.eq(self.pc_i.data)
707 sync += pc_changed.eq(1)
708 with m.If(self.svstate_i.ok):
709 comb += new_svstate.eq(self.svstate_i.data)
710 comb += update_svstate.eq(1)
711 sync += sv_changed.eq(1)
712
713 # wait for an instruction to arrive from Fetch
714 with m.State("INSN_WAIT"):
715 if self.allow_overlap:
716 stopping = dbg.stopping_o
717 else:
718 stopping = Const(0)
719 with m.If(stopping):
720 # stopping: jump back to idle
721 m.next = "ISSUE_START"
722 with m.Else():
723 comb += fetch_insn_i_ready.eq(1)
724 with m.If(fetch_insn_o_valid):
725 # loop into ISSUE_START if it's a SVP64 instruction
726 # and VL == 0. this because VL==0 is a for-loop
727 # from 0 to 0 i.e. always, always a NOP.
728 cur_vl = cur_state.svstate.vl
729 with m.If(is_svp64_mode & (cur_vl == 0)):
730 # update the PC before fetching the next instruction
731 # since we are in a VL==0 loop, no instruction was
732 # executed that we could be overwriting
733 comb += self.state_w_pc.wen.eq(1 << StateRegs.PC)
734 comb += self.state_w_pc.i_data.eq(nia)
735 comb += self.insn_done.eq(1)
736 m.next = "ISSUE_START"
737 with m.Else():
738 if self.svp64_en:
739 m.next = "PRED_START" # fetching predicate
740 else:
741 m.next = "DECODE_SV" # skip predication
742
743 with m.State("PRED_START"):
744 comb += pred_insn_i_valid.eq(1) # tell fetch_pred to start
745 with m.If(pred_insn_o_ready): # fetch_pred acknowledged us
746 m.next = "MASK_WAIT"
747
748 with m.State("MASK_WAIT"):
749 comb += pred_mask_i_ready.eq(1) # ready to receive the masks
750 with m.If(pred_mask_o_valid): # predication masks are ready
751 m.next = "PRED_SKIP"
752
753 # skip zeros in predicate
754 with m.State("PRED_SKIP"):
755 with m.If(~is_svp64_mode):
756 m.next = "DECODE_SV" # nothing to do
757 with m.Else():
758 if self.svp64_en:
759 pred_src_zero = pdecode2.rm_dec.pred_sz
760 pred_dst_zero = pdecode2.rm_dec.pred_dz
761
762 # new srcstep, after skipping zeros
763 skip_srcstep = Signal.like(cur_srcstep)
764 # value to be added to the current srcstep
765 src_delta = Signal.like(cur_srcstep)
766 # add leading zeros to srcstep, if not in zero mode
767 with m.If(~pred_src_zero):
768 # priority encoder (count leading zeros)
769 # append guard bit, in case the mask is all zeros
770 pri_enc_src = PriorityEncoder(65)
771 m.submodules.pri_enc_src = pri_enc_src
772 comb += pri_enc_src.i.eq(Cat(self.srcmask,
773 Const(1, 1)))
774 comb += src_delta.eq(pri_enc_src.o)
775 # apply delta to srcstep
776 comb += skip_srcstep.eq(cur_srcstep + src_delta)
777 # shift-out all leading zeros from the mask
778 # plus the leading "one" bit
779 # TODO count leading zeros and shift-out the zero
780 # bits, in the same step, in hardware
781 sync += self.srcmask.eq(self.srcmask >> (src_delta+1))
782
783 # same as above, but for dststep
784 skip_dststep = Signal.like(cur_dststep)
785 dst_delta = Signal.like(cur_dststep)
786 with m.If(~pred_dst_zero):
787 pri_enc_dst = PriorityEncoder(65)
788 m.submodules.pri_enc_dst = pri_enc_dst
789 comb += pri_enc_dst.i.eq(Cat(self.dstmask,
790 Const(1, 1)))
791 comb += dst_delta.eq(pri_enc_dst.o)
792 comb += skip_dststep.eq(cur_dststep + dst_delta)
793 sync += self.dstmask.eq(self.dstmask >> (dst_delta+1))
794
795 # TODO: initialize mask[VL]=1 to avoid passing past VL
796 with m.If((skip_srcstep >= cur_vl) |
797 (skip_dststep >= cur_vl)):
798 # end of VL loop. Update PC and reset src/dst step
799 comb += self.state_w_pc.wen.eq(1 << StateRegs.PC)
800 comb += self.state_w_pc.i_data.eq(nia)
801 comb += new_svstate.srcstep.eq(0)
802 comb += new_svstate.dststep.eq(0)
803 comb += update_svstate.eq(1)
804 # synchronize with the simulator
805 comb += self.insn_done.eq(1)
806 # go back to Issue
807 m.next = "ISSUE_START"
808 with m.Else():
809 # update new src/dst step
810 comb += new_svstate.srcstep.eq(skip_srcstep)
811 comb += new_svstate.dststep.eq(skip_dststep)
812 comb += update_svstate.eq(1)
813 # proceed to Decode
814 m.next = "DECODE_SV"
815
816 # pass predicate mask bits through to satellite decoders
817 # TODO: for SIMD this will be *multiple* bits
818 sync += core.i.sv_pred_sm.eq(self.srcmask[0])
819 sync += core.i.sv_pred_dm.eq(self.dstmask[0])
820
821 # after src/dst step have been updated, we are ready
822 # to decode the instruction
823 with m.State("DECODE_SV"):
824 # decode the instruction
825 sync += core.i.e.eq(pdecode2.e)
826 sync += core.i.state.eq(cur_state)
827 sync += core.i.raw_insn_i.eq(dec_opcode_i)
828 sync += core.i.bigendian_i.eq(self.core_bigendian_i)
829 if self.svp64_en:
830 sync += core.i.sv_rm.eq(pdecode2.sv_rm)
831 # set RA_OR_ZERO detection in satellite decoders
832 sync += core.i.sv_a_nz.eq(pdecode2.sv_a_nz)
833 # and svp64 detection
834 sync += core.i.is_svp64_mode.eq(is_svp64_mode)
835 # and svp64 bit-rev'd ldst mode
836 ldst_dec = pdecode2.use_svp64_ldst_dec
837 sync += core.i.use_svp64_ldst_dec.eq(ldst_dec)
838 # after decoding, reset any previous exception condition,
839 # allowing it to be set again during the next execution
840 sync += pdecode2.ldst_exc.eq(0)
841
842 m.next = "INSN_EXECUTE" # move to "execute"
843
844 # handshake with execution FSM, move to "wait" once acknowledged
845 with m.State("INSN_EXECUTE"):
846 comb += exec_insn_i_valid.eq(1) # trigger execute
847 with m.If(exec_insn_o_ready): # execute acknowledged us
848 m.next = "EXECUTE_WAIT"
849
850 with m.State("EXECUTE_WAIT"):
851 # wait on "core stop" release, at instruction end
852 # need to do this here, in case we are in a VL>1 loop
853 with m.If(~dbg.core_stop_o & ~core_rst):
854 comb += exec_pc_i_ready.eq(1)
855 # see https://bugs.libre-soc.org/show_bug.cgi?id=636
856 # the exception info needs to be blatted into
857 # pdecode.ldst_exc, and the instruction "re-run".
858 # when ldst_exc.happened is set, the PowerDecoder2
859 # reacts very differently: it re-writes the instruction
860 # with a "trap" (calls PowerDecoder2.trap()) which
861 # will *overwrite* whatever was requested and jump the
862 # PC to the exception address, as well as alter MSR.
863 # nothing else needs to be done other than to note
864 # the change of PC and MSR (and, later, SVSTATE)
865 with m.If(exc_happened):
866 sync += pdecode2.ldst_exc.eq(core.fus.get_exc("ldst0"))
867
868 with m.If(exec_pc_o_valid):
869
870 # was this the last loop iteration?
871 is_last = Signal()
872 cur_vl = cur_state.svstate.vl
873 comb += is_last.eq(next_srcstep == cur_vl)
874
875 # return directly to Decode if Execute generated an
876 # exception.
877 with m.If(pdecode2.ldst_exc.happened):
878 m.next = "DECODE_SV"
879
880 # if either PC or SVSTATE were changed by the previous
881 # instruction, go directly back to Fetch, without
882 # updating either PC or SVSTATE
883 with m.Elif(pc_changed | sv_changed):
884 m.next = "ISSUE_START"
885
886 # also return to Fetch, when no output was a vector
887 # (regardless of SRCSTEP and VL), or when the last
888 # instruction was really the last one of the VL loop
889 with m.Elif((~pdecode2.loop_continue) | is_last):
890 # before going back to fetch, update the PC state
891 # register with the NIA.
892 # ok here we are not reading the branch unit.
893 # TODO: this just blithely overwrites whatever
894 # pipeline updated the PC
895 comb += self.state_w_pc.wen.eq(1 << StateRegs.PC)
896 comb += self.state_w_pc.i_data.eq(nia)
897 # reset SRCSTEP before returning to Fetch
898 if self.svp64_en:
899 with m.If(pdecode2.loop_continue):
900 comb += new_svstate.srcstep.eq(0)
901 comb += new_svstate.dststep.eq(0)
902 comb += update_svstate.eq(1)
903 else:
904 comb += new_svstate.srcstep.eq(0)
905 comb += new_svstate.dststep.eq(0)
906 comb += update_svstate.eq(1)
907 m.next = "ISSUE_START"
908
909 # returning to Execute? then, first update SRCSTEP
910 with m.Else():
911 comb += new_svstate.srcstep.eq(next_srcstep)
912 comb += new_svstate.dststep.eq(next_dststep)
913 comb += update_svstate.eq(1)
914 # return to mask skip loop
915 m.next = "PRED_SKIP"
916
917 with m.Else():
918 comb += dbg.core_stopped_i.eq(1)
919 # while stopped, allow updating the PC and SVSTATE
920 with m.If(self.pc_i.ok):
921 comb += self.state_w_pc.wen.eq(1 << StateRegs.PC)
922 comb += self.state_w_pc.i_data.eq(self.pc_i.data)
923 sync += pc_changed.eq(1)
924 with m.If(self.svstate_i.ok):
925 comb += new_svstate.eq(self.svstate_i.data)
926 comb += update_svstate.eq(1)
927 sync += sv_changed.eq(1)
928
929 # check if svstate needs updating: if so, write it to State Regfile
930 with m.If(update_svstate):
931 comb += self.state_w_sv.wen.eq(1<<StateRegs.SVSTATE)
932 comb += self.state_w_sv.i_data.eq(new_svstate)
933 sync += cur_state.svstate.eq(new_svstate) # for next clock
934
935 def execute_fsm(self, m, core, pc_changed, sv_changed,
936 exec_insn_i_valid, exec_insn_o_ready,
937 exec_pc_o_valid, exec_pc_i_ready):
938 """execute FSM
939
940 execute FSM. this interacts with the "issue" FSM
941 through exec_insn_ready/valid (incoming) and exec_pc_ready/valid
942 (outgoing). SVP64 RM prefixes have already been set up by the
943 "issue" phase, so execute is fairly straightforward.
944 """
945
946 comb = m.d.comb
947 sync = m.d.sync
948 pdecode2 = self.pdecode2
949
950 # temporaries
951 core_busy_o = core.n.o_data.busy_o # core is busy
952 core_ivalid_i = core.p.i_valid # instruction is valid
953
954 with m.FSM(name="exec_fsm"):
955
956 # waiting for instruction bus (stays there until not busy)
957 with m.State("INSN_START"):
958 comb += exec_insn_o_ready.eq(1)
959 with m.If(exec_insn_i_valid):
960 comb += core_ivalid_i.eq(1) # instruction is valid/issued
961 sync += sv_changed.eq(0)
962 sync += pc_changed.eq(0)
963 with m.If(core.p.o_ready): # only move if accepted
964 m.next = "INSN_ACTIVE" # move to "wait completion"
965
966 # instruction started: must wait till it finishes
967 with m.State("INSN_ACTIVE"):
968 # note changes to PC and SVSTATE
969 with m.If(self.state_nia.wen & (1<<StateRegs.SVSTATE)):
970 sync += sv_changed.eq(1)
971 with m.If(self.state_nia.wen & (1<<StateRegs.PC)):
972 sync += pc_changed.eq(1)
973 with m.If(~core_busy_o): # instruction done!
974 comb += exec_pc_o_valid.eq(1)
975 with m.If(exec_pc_i_ready):
976 # when finished, indicate "done".
977 # however, if there was an exception, the instruction
978 # is *not* yet done. this is an implementation
979 # detail: we choose to implement exceptions by
980 # taking the exception information from the LDST
981 # unit, putting that *back* into the PowerDecoder2,
982 # and *re-running the entire instruction*.
983 # if we erroneously indicate "done" here, it is as if
984 # there were *TWO* instructions:
985 # 1) the failed LDST 2) a TRAP.
986 with m.If(~pdecode2.ldst_exc.happened):
987 comb += self.insn_done.eq(1)
988 m.next = "INSN_START" # back to fetch
989
990 def setup_peripherals(self, m):
991 comb, sync = m.d.comb, m.d.sync
992
993 # okaaaay so the debug module must be in coresync clock domain
994 # but NOT its reset signal. to cope with this, set every single
995 # submodule explicitly in coresync domain, debug and JTAG
996 # in their own one but using *external* reset.
997 csd = DomainRenamer("coresync")
998 dbd = DomainRenamer(self.dbg_domain)
999
1000 m.submodules.core = core = csd(self.core)
1001 m.submodules.imem = imem = csd(self.imem)
1002 m.submodules.dbg = dbg = dbd(self.dbg)
1003 if self.jtag_en:
1004 m.submodules.jtag = jtag = dbd(self.jtag)
1005 # TODO: UART2GDB mux, here, from external pin
1006 # see https://bugs.libre-soc.org/show_bug.cgi?id=499
1007 sync += dbg.dmi.connect_to(jtag.dmi)
1008
1009 cur_state = self.cur_state
1010
1011 # 4x 4k SRAM blocks. these simply "exist", they get routed in litex
1012 if self.sram4x4k:
1013 for i, sram in enumerate(self.sram4k):
1014 m.submodules["sram4k_%d" % i] = csd(sram)
1015 comb += sram.enable.eq(self.wb_sram_en)
1016
1017 # XICS interrupt handler
1018 if self.xics:
1019 m.submodules.xics_icp = icp = csd(self.xics_icp)
1020 m.submodules.xics_ics = ics = csd(self.xics_ics)
1021 comb += icp.ics_i.eq(ics.icp_o) # connect ICS to ICP
1022 sync += cur_state.eint.eq(icp.core_irq_o) # connect ICP to core
1023
1024 # GPIO test peripheral
1025 if self.gpio:
1026 m.submodules.simple_gpio = simple_gpio = csd(self.simple_gpio)
1027
1028 # connect one GPIO output to ICS bit 15 (like in microwatt soc.vhdl)
1029 # XXX causes litex ECP5 test to get wrong idea about input and output
1030 # (but works with verilator sim *sigh*)
1031 #if self.gpio and self.xics:
1032 # comb += self.int_level_i[15].eq(simple_gpio.gpio_o[0])
1033
1034 # instruction decoder
1035 pdecode = create_pdecode()
1036 m.submodules.dec2 = pdecode2 = csd(self.pdecode2)
1037 if self.svp64_en:
1038 m.submodules.svp64 = svp64 = csd(self.svp64)
1039
1040 # convenience
1041 dmi, d_reg, d_cr, d_xer, = dbg.dmi, dbg.d_gpr, dbg.d_cr, dbg.d_xer
1042 intrf = self.core.regs.rf['int']
1043
1044 # clock delay power-on reset
1045 cd_por = ClockDomain(reset_less=True)
1046 cd_sync = ClockDomain()
1047 core_sync = ClockDomain("coresync")
1048 m.domains += cd_por, cd_sync, core_sync
1049 if self.dbg_domain != "sync":
1050 dbg_sync = ClockDomain(self.dbg_domain)
1051 m.domains += dbg_sync
1052
1053 ti_rst = Signal(reset_less=True)
1054 delay = Signal(range(4), reset=3)
1055 with m.If(delay != 0):
1056 m.d.por += delay.eq(delay - 1)
1057 comb += cd_por.clk.eq(ClockSignal())
1058
1059 # power-on reset delay
1060 core_rst = ResetSignal("coresync")
1061 comb += ti_rst.eq(delay != 0 | dbg.core_rst_o | ResetSignal())
1062 comb += core_rst.eq(ti_rst)
1063
1064 # debug clock is same as coresync, but reset is *main external*
1065 if self.dbg_domain != "sync":
1066 dbg_rst = ResetSignal(self.dbg_domain)
1067 comb += dbg_rst.eq(ResetSignal())
1068
1069 # busy/halted signals from core
1070 core_busy_o = ~core.p.o_ready | core.n.o_data.busy_o # core is busy
1071 comb += self.busy_o.eq(core_busy_o)
1072 comb += pdecode2.dec.bigendian.eq(self.core_bigendian_i)
1073
1074 # temporary hack: says "go" immediately for both address gen and ST
1075 l0 = core.l0
1076 ldst = core.fus.fus['ldst0']
1077 st_go_edge = rising_edge(m, ldst.st.rel_o)
1078 m.d.comb += ldst.ad.go_i.eq(ldst.ad.rel_o) # link addr-go direct to rel
1079 m.d.comb += ldst.st.go_i.eq(st_go_edge) # link store-go to rising rel
1080
1081 def elaborate(self, platform):
1082 m = Module()
1083 # convenience
1084 comb, sync = m.d.comb, m.d.sync
1085 cur_state = self.cur_state
1086 pdecode2 = self.pdecode2
1087 dbg = self.dbg
1088 core = self.core
1089
1090 # set up peripherals and core
1091 core_rst = self.core_rst
1092 self.setup_peripherals(m)
1093
1094 # reset current state if core reset requested
1095 with m.If(core_rst):
1096 m.d.sync += self.cur_state.eq(0)
1097
1098 # PC and instruction from I-Memory
1099 comb += self.pc_o.eq(cur_state.pc)
1100 pc_changed = Signal() # note write to PC
1101 sv_changed = Signal() # note write to SVSTATE
1102
1103 # indicate to outside world if any FU is still executing
1104 comb += self.any_busy.eq(core.n.o_data.any_busy_o) # any FU executing
1105
1106 # read state either from incoming override or from regfile
1107 # TODO: really should be doing MSR in the same way
1108 pc = state_get(m, core_rst, self.pc_i,
1109 "pc", # read PC
1110 self.state_r_pc, StateRegs.PC)
1111 svstate = state_get(m, core_rst, self.svstate_i,
1112 "svstate", # read SVSTATE
1113 self.state_r_sv, StateRegs.SVSTATE)
1114
1115 # don't write pc every cycle
1116 comb += self.state_w_pc.wen.eq(0)
1117 comb += self.state_w_pc.i_data.eq(0)
1118
1119 # address of the next instruction, in the absence of a branch
1120 # depends on the instruction size
1121 nia = Signal(64)
1122
1123 # connect up debug signals
1124 # TODO comb += core.icache_rst_i.eq(dbg.icache_rst_o)
1125 comb += dbg.terminate_i.eq(core.o.core_terminate_o)
1126 comb += dbg.state.pc.eq(pc)
1127 comb += dbg.state.svstate.eq(svstate)
1128 comb += dbg.state.msr.eq(cur_state.msr)
1129
1130 # pass the prefix mode from Fetch to Issue, so the latter can loop
1131 # on VL==0
1132 is_svp64_mode = Signal()
1133
1134 # there are *THREE^WFOUR-if-SVP64-enabled* FSMs, fetch (32/64-bit)
1135 # issue, decode/execute, now joined by "Predicate fetch/calculate".
1136 # these are the handshake signals between each
1137
1138 # fetch FSM can run as soon as the PC is valid
1139 fetch_pc_i_valid = Signal() # Execute tells Fetch "start next read"
1140 fetch_pc_o_ready = Signal() # Fetch Tells SVSTATE "proceed"
1141
1142 # fetch FSM hands over the instruction to be decoded / issued
1143 fetch_insn_o_valid = Signal()
1144 fetch_insn_i_ready = Signal()
1145
1146 # predicate fetch FSM decodes and fetches the predicate
1147 pred_insn_i_valid = Signal()
1148 pred_insn_o_ready = Signal()
1149
1150 # predicate fetch FSM delivers the masks
1151 pred_mask_o_valid = Signal()
1152 pred_mask_i_ready = Signal()
1153
1154 # issue FSM delivers the instruction to the be executed
1155 exec_insn_i_valid = Signal()
1156 exec_insn_o_ready = Signal()
1157
1158 # execute FSM, hands over the PC/SVSTATE back to the issue FSM
1159 exec_pc_o_valid = Signal()
1160 exec_pc_i_ready = Signal()
1161
1162 # the FSMs here are perhaps unusual in that they detect conditions
1163 # then "hold" information, combinatorially, for the core
1164 # (as opposed to using sync - which would be on a clock's delay)
1165 # this includes the actual opcode, valid flags and so on.
1166
1167 # Fetch, then predicate fetch, then Issue, then Execute.
1168 # Issue is where the VL for-loop # lives. the ready/valid
1169 # signalling is used to communicate between the four.
1170
1171 # set up Fetch FSM
1172 fetch = FetchFSM(self.allow_overlap, self.svp64_en,
1173 self.imem, core_rst, pdecode2, cur_state,
1174 dbg, core, pc, svstate, nia, is_svp64_mode)
1175 m.submodules.fetch = fetch
1176 comb += fetch_pc_o_ready.eq(fetch.p.o_ready)
1177 comb += fetch.p.i_valid.eq(fetch_pc_i_valid)
1178 comb += fetch_insn_o_valid.eq(fetch.n.o_valid)
1179 comb += fetch.n.i_ready.eq(fetch_insn_i_ready)
1180
1181 self.issue_fsm(m, core, pc_changed, sv_changed, nia,
1182 dbg, core_rst, is_svp64_mode,
1183 fetch_pc_o_ready, fetch_pc_i_valid,
1184 fetch_insn_o_valid, fetch_insn_i_ready,
1185 pred_insn_i_valid, pred_insn_o_ready,
1186 pred_mask_o_valid, pred_mask_i_ready,
1187 exec_insn_i_valid, exec_insn_o_ready,
1188 exec_pc_o_valid, exec_pc_i_ready)
1189
1190 if self.svp64_en:
1191 self.fetch_predicate_fsm(m,
1192 pred_insn_i_valid, pred_insn_o_ready,
1193 pred_mask_o_valid, pred_mask_i_ready)
1194
1195 self.execute_fsm(m, core, pc_changed, sv_changed,
1196 exec_insn_i_valid, exec_insn_o_ready,
1197 exec_pc_o_valid, exec_pc_i_ready)
1198
1199 # this bit doesn't have to be in the FSM: connect up to read
1200 # regfiles on demand from DMI
1201 self.do_dmi(m, dbg)
1202
1203 # DEC and TB inc/dec FSM. copy of DEC is put into CoreState,
1204 # (which uses that in PowerDecoder2 to raise 0x900 exception)
1205 self.tb_dec_fsm(m, cur_state.dec)
1206
1207 return m
1208
1209 def do_dmi(self, m, dbg):
1210 """deals with DMI debug requests
1211
1212 currently only provides read requests for the INT regfile, CR and XER
1213 it will later also deal with *writing* to these regfiles.
1214 """
1215 comb = m.d.comb
1216 sync = m.d.sync
1217 dmi, d_reg, d_cr, d_xer, = dbg.dmi, dbg.d_gpr, dbg.d_cr, dbg.d_xer
1218 intrf = self.core.regs.rf['int']
1219
1220 with m.If(d_reg.req): # request for regfile access being made
1221 # TODO: error-check this
1222 # XXX should this be combinatorial? sync better?
1223 if intrf.unary:
1224 comb += self.int_r.ren.eq(1<<d_reg.addr)
1225 else:
1226 comb += self.int_r.addr.eq(d_reg.addr)
1227 comb += self.int_r.ren.eq(1)
1228 d_reg_delay = Signal()
1229 sync += d_reg_delay.eq(d_reg.req)
1230 with m.If(d_reg_delay):
1231 # data arrives one clock later
1232 comb += d_reg.data.eq(self.int_r.o_data)
1233 comb += d_reg.ack.eq(1)
1234
1235 # sigh same thing for CR debug
1236 with m.If(d_cr.req): # request for regfile access being made
1237 comb += self.cr_r.ren.eq(0b11111111) # enable all
1238 d_cr_delay = Signal()
1239 sync += d_cr_delay.eq(d_cr.req)
1240 with m.If(d_cr_delay):
1241 # data arrives one clock later
1242 comb += d_cr.data.eq(self.cr_r.o_data)
1243 comb += d_cr.ack.eq(1)
1244
1245 # aaand XER...
1246 with m.If(d_xer.req): # request for regfile access being made
1247 comb += self.xer_r.ren.eq(0b111111) # enable all
1248 d_xer_delay = Signal()
1249 sync += d_xer_delay.eq(d_xer.req)
1250 with m.If(d_xer_delay):
1251 # data arrives one clock later
1252 comb += d_xer.data.eq(self.xer_r.o_data)
1253 comb += d_xer.ack.eq(1)
1254
1255 def tb_dec_fsm(self, m, spr_dec):
1256 """tb_dec_fsm
1257
1258 this is a FSM for updating either dec or tb. it runs alternately
1259 DEC, TB, DEC, TB. note that SPR pipeline could have written a new
1260 value to DEC, however the regfile has "passthrough" on it so this
1261 *should* be ok.
1262
1263 see v3.0B p1097-1099 for Timeer Resource and p1065 and p1076
1264 """
1265
1266 comb, sync = m.d.comb, m.d.sync
1267 fast_rf = self.core.regs.rf['fast']
1268 fast_r_dectb = fast_rf.r_ports['issue'] # DEC/TB
1269 fast_w_dectb = fast_rf.w_ports['issue'] # DEC/TB
1270
1271 with m.FSM() as fsm:
1272
1273 # initiates read of current DEC
1274 with m.State("DEC_READ"):
1275 comb += fast_r_dectb.addr.eq(FastRegs.DEC)
1276 comb += fast_r_dectb.ren.eq(1)
1277 m.next = "DEC_WRITE"
1278
1279 # waits for DEC read to arrive (1 cycle), updates with new value
1280 with m.State("DEC_WRITE"):
1281 new_dec = Signal(64)
1282 # TODO: MSR.LPCR 32-bit decrement mode
1283 comb += new_dec.eq(fast_r_dectb.o_data - 1)
1284 comb += fast_w_dectb.addr.eq(FastRegs.DEC)
1285 comb += fast_w_dectb.wen.eq(1)
1286 comb += fast_w_dectb.i_data.eq(new_dec)
1287 sync += spr_dec.eq(new_dec) # copy into cur_state for decoder
1288 m.next = "TB_READ"
1289
1290 # initiates read of current TB
1291 with m.State("TB_READ"):
1292 comb += fast_r_dectb.addr.eq(FastRegs.TB)
1293 comb += fast_r_dectb.ren.eq(1)
1294 m.next = "TB_WRITE"
1295
1296 # waits for read TB to arrive, initiates write of current TB
1297 with m.State("TB_WRITE"):
1298 new_tb = Signal(64)
1299 comb += new_tb.eq(fast_r_dectb.o_data + 1)
1300 comb += fast_w_dectb.addr.eq(FastRegs.TB)
1301 comb += fast_w_dectb.wen.eq(1)
1302 comb += fast_w_dectb.i_data.eq(new_tb)
1303 m.next = "DEC_READ"
1304
1305 return m
1306
1307 def __iter__(self):
1308 yield from self.pc_i.ports()
1309 yield self.pc_o
1310 yield self.memerr_o
1311 yield from self.core.ports()
1312 yield from self.imem.ports()
1313 yield self.core_bigendian_i
1314 yield self.busy_o
1315
1316 def ports(self):
1317 return list(self)
1318
1319 def external_ports(self):
1320 ports = self.pc_i.ports()
1321 ports += [self.pc_o, self.memerr_o, self.core_bigendian_i, self.busy_o,
1322 ]
1323
1324 if self.jtag_en:
1325 ports += list(self.jtag.external_ports())
1326 else:
1327 # don't add DMI if JTAG is enabled
1328 ports += list(self.dbg.dmi.ports())
1329
1330 ports += list(self.imem.ibus.fields.values())
1331 ports += list(self.core.l0.cmpi.wb_bus().fields.values())
1332
1333 if self.sram4x4k:
1334 for sram in self.sram4k:
1335 ports += list(sram.bus.fields.values())
1336
1337 if self.xics:
1338 ports += list(self.xics_icp.bus.fields.values())
1339 ports += list(self.xics_ics.bus.fields.values())
1340 ports.append(self.int_level_i)
1341
1342 if self.gpio:
1343 ports += list(self.simple_gpio.bus.fields.values())
1344 ports.append(self.gpio_o)
1345
1346 return ports
1347
1348 def ports(self):
1349 return list(self)
1350
1351
1352 class TestIssuer(Elaboratable):
1353 def __init__(self, pspec):
1354 self.ti = TestIssuerInternal(pspec)
1355 self.pll = DummyPLL(instance=True)
1356
1357 # PLL direct clock or not
1358 self.pll_en = hasattr(pspec, "use_pll") and pspec.use_pll
1359 if self.pll_en:
1360 self.pll_test_o = Signal(reset_less=True)
1361 self.pll_vco_o = Signal(reset_less=True)
1362 self.clk_sel_i = Signal(2, reset_less=True)
1363 self.ref_clk = ClockSignal() # can't rename it but that's ok
1364 self.pllclk_clk = ClockSignal("pllclk")
1365
1366 def elaborate(self, platform):
1367 m = Module()
1368 comb = m.d.comb
1369
1370 # TestIssuer nominally runs at main clock, actually it is
1371 # all combinatorial internally except for coresync'd components
1372 m.submodules.ti = ti = self.ti
1373
1374 if self.pll_en:
1375 # ClockSelect runs at PLL output internal clock rate
1376 m.submodules.wrappll = pll = self.pll
1377
1378 # add clock domains from PLL
1379 cd_pll = ClockDomain("pllclk")
1380 m.domains += cd_pll
1381
1382 # PLL clock established. has the side-effect of running clklsel
1383 # at the PLL's speed (see DomainRenamer("pllclk") above)
1384 pllclk = self.pllclk_clk
1385 comb += pllclk.eq(pll.clk_pll_o)
1386
1387 # wire up external 24mhz to PLL
1388 #comb += pll.clk_24_i.eq(self.ref_clk)
1389 # output 18 mhz PLL test signal, and analog oscillator out
1390 comb += self.pll_test_o.eq(pll.pll_test_o)
1391 comb += self.pll_vco_o.eq(pll.pll_vco_o)
1392
1393 # input to pll clock selection
1394 comb += pll.clk_sel_i.eq(self.clk_sel_i)
1395
1396 # now wire up ResetSignals. don't mind them being in this domain
1397 pll_rst = ResetSignal("pllclk")
1398 comb += pll_rst.eq(ResetSignal())
1399
1400 # internal clock is set to selector clock-out. has the side-effect of
1401 # running TestIssuer at this speed (see DomainRenamer("intclk") above)
1402 # debug clock runs at coresync internal clock
1403 cd_coresync = ClockDomain("coresync")
1404 #m.domains += cd_coresync
1405 if self.ti.dbg_domain != 'sync':
1406 cd_dbgsync = ClockDomain("dbgsync")
1407 #m.domains += cd_dbgsync
1408 intclk = ClockSignal("coresync")
1409 dbgclk = ClockSignal(self.ti.dbg_domain)
1410 # XXX BYPASS PLL XXX
1411 # XXX BYPASS PLL XXX
1412 # XXX BYPASS PLL XXX
1413 if self.pll_en:
1414 comb += intclk.eq(self.ref_clk)
1415 else:
1416 comb += intclk.eq(ClockSignal())
1417 if self.ti.dbg_domain != 'sync':
1418 dbgclk = ClockSignal(self.ti.dbg_domain)
1419 comb += dbgclk.eq(intclk)
1420
1421 return m
1422
1423 def ports(self):
1424 return list(self.ti.ports()) + list(self.pll.ports()) + \
1425 [ClockSignal(), ResetSignal()]
1426
1427 def external_ports(self):
1428 ports = self.ti.external_ports()
1429 ports.append(ClockSignal())
1430 ports.append(ResetSignal())
1431 if self.pll_en:
1432 ports.append(self.clk_sel_i)
1433 ports.append(self.pll.clk_24_i)
1434 ports.append(self.pll_test_o)
1435 ports.append(self.pll_vco_o)
1436 ports.append(self.pllclk_clk)
1437 ports.append(self.ref_clk)
1438 return ports
1439
1440
1441 if __name__ == '__main__':
1442 units = {'alu': 1, 'cr': 1, 'branch': 1, 'trap': 1, 'logical': 1,
1443 'spr': 1,
1444 'div': 1,
1445 'mul': 1,
1446 'shiftrot': 1
1447 }
1448 pspec = TestMemPspec(ldst_ifacetype='bare_wb',
1449 imem_ifacetype='bare_wb',
1450 addr_wid=48,
1451 mask_wid=8,
1452 reg_wid=64,
1453 units=units)
1454 dut = TestIssuer(pspec)
1455 vl = main(dut, ports=dut.ports(), name="test_issuer")
1456
1457 if len(sys.argv) == 1:
1458 vl = rtlil.convert(dut, ports=dut.external_ports(), name="test_issuer")
1459 with open("test_issuer.il", "w") as f:
1460 f.write(vl)