more comments
[soc.git] / src / soc / simple / issuer.py
1 """simple core issuer
2
3 not in any way intended for production use. this runs a FSM that:
4
5 * reads the Program Counter from StateRegs
6 * reads an instruction from a fixed-size Test Memory
7 * issues it to the Simple Core
8 * waits for it to complete
9 * increments the PC
10 * does it all over again
11
12 the purpose of this module is to verify the functional correctness
13 of the Function Units in the absolute simplest and clearest possible
14 way, and to at provide something that can be further incrementally
15 improved.
16 """
17
18 from nmigen import (Elaboratable, Module, Signal, ClockSignal, ResetSignal,
19 ClockDomain, DomainRenamer, Mux, Const, Repl, Cat)
20 from nmigen.cli import rtlil
21 from nmigen.cli import main
22 import sys
23
24 from nmutil.singlepipe import ControlBase
25 from soc.simple.core_data import FetchOutput, FetchInput
26
27 from nmigen.lib.coding import PriorityEncoder
28
29 from openpower.decoder.power_decoder import create_pdecode
30 from openpower.decoder.power_decoder2 import PowerDecode2, SVP64PrefixDecoder
31 from openpower.decoder.decode2execute1 import IssuerDecode2ToOperand
32 from openpower.decoder.decode2execute1 import Data
33 from openpower.decoder.power_enums import (MicrOp, SVP64PredInt, SVP64PredCR,
34 SVP64PredMode)
35 from openpower.state import CoreState
36 from openpower.consts import (CR, SVP64CROffs)
37 from soc.experiment.testmem import TestMemory # test only for instructions
38 from soc.regfile.regfiles import StateRegs, FastRegs
39 from soc.simple.core import NonProductionCore
40 from soc.config.test.test_loadstore import TestMemPspec
41 from soc.config.ifetch import ConfigFetchUnit
42 from soc.debug.dmi import CoreDebug, DMIInterface
43 from soc.debug.jtag import JTAG
44 from soc.config.pinouts import get_pinspecs
45 from soc.interrupts.xics import XICS_ICP, XICS_ICS
46 from soc.bus.simple_gpio import SimpleGPIO
47 from soc.bus.SPBlock512W64B8W import SPBlock512W64B8W
48 from soc.clock.select import ClockSelect
49 from soc.clock.dummypll import DummyPLL
50 from openpower.sv.svstate import SVSTATERec
51
52
53 from nmutil.util import rising_edge
54
55 def get_insn(f_instr_o, pc):
56 if f_instr_o.width == 32:
57 return f_instr_o
58 else:
59 # 64-bit: bit 2 of pc decides which word to select
60 return f_instr_o.word_select(pc[2], 32)
61
62 # gets state input or reads from state regfile
63 def state_get(m, core_rst, state_i, name, regfile, regnum):
64 comb = m.d.comb
65 sync = m.d.sync
66 # read the PC
67 res = Signal(64, reset_less=True, name=name)
68 res_ok_delay = Signal(name="%s_ok_delay" % name)
69 with m.If(~core_rst):
70 sync += res_ok_delay.eq(~state_i.ok)
71 with m.If(state_i.ok):
72 # incoming override (start from pc_i)
73 comb += res.eq(state_i.data)
74 with m.Else():
75 # otherwise read StateRegs regfile for PC...
76 comb += regfile.ren.eq(1<<regnum)
77 # ... but on a 1-clock delay
78 with m.If(res_ok_delay):
79 comb += res.eq(regfile.o_data)
80 return res
81
82
83 def get_predint(m, mask, name):
84 """decode SVP64 predicate integer mask field to reg number and invert
85 this is identical to the equivalent function in ISACaller except that
86 it doesn't read the INT directly, it just decodes "what needs to be done"
87 i.e. which INT reg, whether it is shifted and whether it is bit-inverted.
88
89 * all1s is set to indicate that no mask is to be applied.
90 * regread indicates the GPR register number to be read
91 * invert is set to indicate that the register value is to be inverted
92 * unary indicates that the contents of the register is to be shifted 1<<r3
93 """
94 comb = m.d.comb
95 regread = Signal(5, name=name+"regread")
96 invert = Signal(name=name+"invert")
97 unary = Signal(name=name+"unary")
98 all1s = Signal(name=name+"all1s")
99 with m.Switch(mask):
100 with m.Case(SVP64PredInt.ALWAYS.value):
101 comb += all1s.eq(1) # use 0b1111 (all ones)
102 with m.Case(SVP64PredInt.R3_UNARY.value):
103 comb += regread.eq(3)
104 comb += unary.eq(1) # 1<<r3 - shift r3 (single bit)
105 with m.Case(SVP64PredInt.R3.value):
106 comb += regread.eq(3)
107 with m.Case(SVP64PredInt.R3_N.value):
108 comb += regread.eq(3)
109 comb += invert.eq(1)
110 with m.Case(SVP64PredInt.R10.value):
111 comb += regread.eq(10)
112 with m.Case(SVP64PredInt.R10_N.value):
113 comb += regread.eq(10)
114 comb += invert.eq(1)
115 with m.Case(SVP64PredInt.R30.value):
116 comb += regread.eq(30)
117 with m.Case(SVP64PredInt.R30_N.value):
118 comb += regread.eq(30)
119 comb += invert.eq(1)
120 return regread, invert, unary, all1s
121
122
123 def get_predcr(m, mask, name):
124 """decode SVP64 predicate CR to reg number field and invert status
125 this is identical to _get_predcr in ISACaller
126 """
127 comb = m.d.comb
128 idx = Signal(2, name=name+"idx")
129 invert = Signal(name=name+"crinvert")
130 with m.Switch(mask):
131 with m.Case(SVP64PredCR.LT.value):
132 comb += idx.eq(CR.LT)
133 comb += invert.eq(0)
134 with m.Case(SVP64PredCR.GE.value):
135 comb += idx.eq(CR.LT)
136 comb += invert.eq(1)
137 with m.Case(SVP64PredCR.GT.value):
138 comb += idx.eq(CR.GT)
139 comb += invert.eq(0)
140 with m.Case(SVP64PredCR.LE.value):
141 comb += idx.eq(CR.GT)
142 comb += invert.eq(1)
143 with m.Case(SVP64PredCR.EQ.value):
144 comb += idx.eq(CR.EQ)
145 comb += invert.eq(0)
146 with m.Case(SVP64PredCR.NE.value):
147 comb += idx.eq(CR.EQ)
148 comb += invert.eq(1)
149 with m.Case(SVP64PredCR.SO.value):
150 comb += idx.eq(CR.SO)
151 comb += invert.eq(0)
152 with m.Case(SVP64PredCR.NS.value):
153 comb += idx.eq(CR.SO)
154 comb += invert.eq(1)
155 return idx, invert
156
157
158 # Fetch Finite State Machine.
159 # WARNING: there are currently DriverConflicts but it's actually working.
160 # TODO, here: everything that is global in nature, information from the
161 # main TestIssuerInternal, needs to move to either ispec() or ospec().
162 # not only that: TestIssuerInternal.imem can entirely move into here
163 # because imem is only ever accessed inside the FetchFSM.
164 class FetchFSM(ControlBase):
165 def __init__(self, allow_overlap, svp64_en, imem, core_rst,
166 pdecode2, cur_state,
167 dbg, core, svstate, nia, is_svp64_mode):
168 self.allow_overlap = allow_overlap
169 self.svp64_en = svp64_en
170 self.imem = imem
171 self.core_rst = core_rst
172 self.pdecode2 = pdecode2
173 self.cur_state = cur_state
174 self.dbg = dbg
175 self.core = core
176 self.svstate = svstate
177 self.nia = nia
178 self.is_svp64_mode = is_svp64_mode
179
180 # set up pipeline ControlBase and allocate i/o specs
181 # (unusual: normally done by the Pipeline API)
182 super().__init__(stage=self)
183 self.p.i_data, self.n.o_data = self.new_specs(None)
184 self.i, self.o = self.p.i_data, self.n.o_data
185
186 # next 3 functions are Stage API Compliance
187 def setup(self, m, i):
188 pass
189
190 def ispec(self):
191 return FetchInput()
192
193 def ospec(self):
194 return FetchOutput()
195
196 def elaborate(self, platform):
197 """fetch FSM
198
199 this FSM performs fetch of raw instruction data, partial-decodes
200 it 32-bit at a time to detect SVP64 prefixes, and will optionally
201 read a 2nd 32-bit quantity if that occurs.
202 """
203 m = super().elaborate(platform)
204
205 dbg = self.dbg
206 core = self.core,
207 pc = self.i.pc
208 svstate = self.svstate
209 nia = self.nia
210 is_svp64_mode = self.is_svp64_mode
211 fetch_pc_o_ready = self.p.o_ready
212 fetch_pc_i_valid = self.p.i_valid
213 fetch_insn_o_valid = self.n.o_valid
214 fetch_insn_i_ready = self.n.i_ready
215
216 comb = m.d.comb
217 sync = m.d.sync
218 pdecode2 = self.pdecode2
219 cur_state = self.cur_state
220 dec_opcode_o = pdecode2.dec.raw_opcode_in # raw opcode
221
222 msr_read = Signal(reset=1)
223
224 # don't read msr every cycle
225 staterf = self.core.regs.rf['state']
226 state_r_msr = staterf.r_ports['msr'] # MSR rd
227
228 comb += state_r_msr.ren.eq(0)
229
230 with m.FSM(name='fetch_fsm'):
231
232 # waiting (zzz)
233 with m.State("IDLE"):
234 with m.If(~dbg.stopping_o):
235 comb += fetch_pc_o_ready.eq(1)
236 with m.If(fetch_pc_i_valid):
237 # instruction allowed to go: start by reading the PC
238 # capture the PC and also drop it into Insn Memory
239 # we have joined a pair of combinatorial memory
240 # lookups together. this is Generally Bad.
241 comb += self.imem.a_pc_i.eq(pc)
242 comb += self.imem.a_i_valid.eq(1)
243 comb += self.imem.f_i_valid.eq(1)
244 sync += cur_state.pc.eq(pc)
245 sync += cur_state.svstate.eq(svstate) # and svstate
246
247 # initiate read of MSR. arrives one clock later
248 comb += state_r_msr.ren.eq(1 << StateRegs.MSR)
249 sync += msr_read.eq(0)
250
251 m.next = "INSN_READ" # move to "wait for bus" phase
252
253 # dummy pause to find out why simulation is not keeping up
254 with m.State("INSN_READ"):
255 if self.allow_overlap:
256 stopping = dbg.stopping_o
257 else:
258 stopping = Const(0)
259 with m.If(stopping):
260 # stopping: jump back to idle
261 m.next = "IDLE"
262 with m.Else():
263 # one cycle later, msr/sv read arrives. valid only once.
264 with m.If(~msr_read):
265 sync += msr_read.eq(1) # yeah don't read it again
266 sync += cur_state.msr.eq(state_r_msr.o_data)
267 with m.If(self.imem.f_busy_o): # zzz...
268 # busy: stay in wait-read
269 comb += self.imem.a_i_valid.eq(1)
270 comb += self.imem.f_i_valid.eq(1)
271 with m.Else():
272 # not busy: instruction fetched
273 insn = get_insn(self.imem.f_instr_o, cur_state.pc)
274 if self.svp64_en:
275 svp64 = self.svp64
276 # decode the SVP64 prefix, if any
277 comb += svp64.raw_opcode_in.eq(insn)
278 comb += svp64.bigendian.eq(self.core_bigendian_i)
279 # pass the decoded prefix (if any) to PowerDecoder2
280 sync += pdecode2.sv_rm.eq(svp64.svp64_rm)
281 sync += pdecode2.is_svp64_mode.eq(is_svp64_mode)
282 # remember whether this is a prefixed instruction,
283 # so the FSM can readily loop when VL==0
284 sync += is_svp64_mode.eq(svp64.is_svp64_mode)
285 # calculate the address of the following instruction
286 insn_size = Mux(svp64.is_svp64_mode, 8, 4)
287 sync += nia.eq(cur_state.pc + insn_size)
288 with m.If(~svp64.is_svp64_mode):
289 # with no prefix, store the instruction
290 # and hand it directly to the next FSM
291 sync += dec_opcode_o.eq(insn)
292 m.next = "INSN_READY"
293 with m.Else():
294 # fetch the rest of the instruction from memory
295 comb += self.imem.a_pc_i.eq(cur_state.pc + 4)
296 comb += self.imem.a_i_valid.eq(1)
297 comb += self.imem.f_i_valid.eq(1)
298 m.next = "INSN_READ2"
299 else:
300 # not SVP64 - 32-bit only
301 sync += nia.eq(cur_state.pc + 4)
302 sync += dec_opcode_o.eq(insn)
303 m.next = "INSN_READY"
304
305 with m.State("INSN_READ2"):
306 with m.If(self.imem.f_busy_o): # zzz...
307 # busy: stay in wait-read
308 comb += self.imem.a_i_valid.eq(1)
309 comb += self.imem.f_i_valid.eq(1)
310 with m.Else():
311 # not busy: instruction fetched
312 insn = get_insn(self.imem.f_instr_o, cur_state.pc+4)
313 sync += dec_opcode_o.eq(insn)
314 m.next = "INSN_READY"
315 # TODO: probably can start looking at pdecode2.rm_dec
316 # here or maybe even in INSN_READ state, if svp64_mode
317 # detected, in order to trigger - and wait for - the
318 # predicate reading.
319 if self.svp64_en:
320 pmode = pdecode2.rm_dec.predmode
321 """
322 if pmode != SVP64PredMode.ALWAYS.value:
323 fire predicate loading FSM and wait before
324 moving to INSN_READY
325 else:
326 sync += self.srcmask.eq(-1) # set to all 1s
327 sync += self.dstmask.eq(-1) # set to all 1s
328 m.next = "INSN_READY"
329 """
330
331 with m.State("INSN_READY"):
332 # hand over the instruction, to be decoded
333 comb += fetch_insn_o_valid.eq(1)
334 with m.If(fetch_insn_i_ready):
335 m.next = "IDLE"
336
337 # whatever was done above, over-ride it if core reset is held
338 with m.If(self.core_rst):
339 sync += nia.eq(0)
340
341 return m
342
343
344 class TestIssuerInternal(Elaboratable):
345 """TestIssuer - reads instructions from TestMemory and issues them
346
347 efficiency and speed is not the main goal here: functional correctness
348 and code clarity is. optimisations (which almost 100% interfere with
349 easy understanding) come later.
350 """
351 def __init__(self, pspec):
352
353 # test is SVP64 is to be enabled
354 self.svp64_en = hasattr(pspec, "svp64") and (pspec.svp64 == True)
355
356 # and if regfiles are reduced
357 self.regreduce_en = (hasattr(pspec, "regreduce") and
358 (pspec.regreduce == True))
359
360 # and if overlap requested
361 self.allow_overlap = (hasattr(pspec, "allow_overlap") and
362 (pspec.allow_overlap == True))
363
364 # JTAG interface. add this right at the start because if it's
365 # added it *modifies* the pspec, by adding enable/disable signals
366 # for parts of the rest of the core
367 self.jtag_en = hasattr(pspec, "debug") and pspec.debug == 'jtag'
368 self.dbg_domain = "sync" # sigh "dbgsunc" too problematic
369 #self.dbg_domain = "dbgsync" # domain for DMI/JTAG clock
370 if self.jtag_en:
371 # XXX MUST keep this up-to-date with litex, and
372 # soc-cocotb-sim, and err.. all needs sorting out, argh
373 subset = ['uart',
374 'mtwi',
375 'eint', 'gpio', 'mspi0',
376 # 'mspi1', - disabled for now
377 # 'pwm', 'sd0', - disabled for now
378 'sdr']
379 self.jtag = JTAG(get_pinspecs(subset=subset),
380 domain=self.dbg_domain)
381 # add signals to pspec to enable/disable icache and dcache
382 # (or data and intstruction wishbone if icache/dcache not included)
383 # https://bugs.libre-soc.org/show_bug.cgi?id=520
384 # TODO: do we actually care if these are not domain-synchronised?
385 # honestly probably not.
386 pspec.wb_icache_en = self.jtag.wb_icache_en
387 pspec.wb_dcache_en = self.jtag.wb_dcache_en
388 self.wb_sram_en = self.jtag.wb_sram_en
389 else:
390 self.wb_sram_en = Const(1)
391
392 # add 4k sram blocks?
393 self.sram4x4k = (hasattr(pspec, "sram4x4kblock") and
394 pspec.sram4x4kblock == True)
395 if self.sram4x4k:
396 self.sram4k = []
397 for i in range(4):
398 self.sram4k.append(SPBlock512W64B8W(name="sram4k_%d" % i,
399 #features={'err'}
400 ))
401
402 # add interrupt controller?
403 self.xics = hasattr(pspec, "xics") and pspec.xics == True
404 if self.xics:
405 self.xics_icp = XICS_ICP()
406 self.xics_ics = XICS_ICS()
407 self.int_level_i = self.xics_ics.int_level_i
408
409 # add GPIO peripheral?
410 self.gpio = hasattr(pspec, "gpio") and pspec.gpio == True
411 if self.gpio:
412 self.simple_gpio = SimpleGPIO()
413 self.gpio_o = self.simple_gpio.gpio_o
414
415 # main instruction core. suitable for prototyping / demo only
416 self.core = core = NonProductionCore(pspec)
417 self.core_rst = ResetSignal("coresync")
418
419 # instruction decoder. goes into Trap Record
420 #pdecode = create_pdecode()
421 self.cur_state = CoreState("cur") # current state (MSR/PC/SVSTATE)
422 self.pdecode2 = PowerDecode2(None, state=self.cur_state,
423 opkls=IssuerDecode2ToOperand,
424 svp64_en=self.svp64_en,
425 regreduce_en=self.regreduce_en)
426 pdecode = self.pdecode2.dec
427
428 if self.svp64_en:
429 self.svp64 = SVP64PrefixDecoder() # for decoding SVP64 prefix
430
431 # Test Instruction memory
432 self.imem = ConfigFetchUnit(pspec).fu
433
434 # DMI interface
435 self.dbg = CoreDebug()
436
437 # instruction go/monitor
438 self.pc_o = Signal(64, reset_less=True)
439 self.pc_i = Data(64, "pc_i") # set "ok" to indicate "please change me"
440 self.svstate_i = Data(64, "svstate_i") # ditto
441 self.core_bigendian_i = Signal() # TODO: set based on MSR.LE
442 self.busy_o = Signal(reset_less=True)
443 self.memerr_o = Signal(reset_less=True)
444
445 # STATE regfile read /write ports for PC, MSR, SVSTATE
446 staterf = self.core.regs.rf['state']
447 self.state_r_pc = staterf.r_ports['cia'] # PC rd
448 self.state_w_pc = staterf.w_ports['d_wr1'] # PC wr
449 self.state_r_sv = staterf.r_ports['sv'] # SVSTATE rd
450 self.state_w_sv = staterf.w_ports['sv'] # SVSTATE wr
451
452 # DMI interface access
453 intrf = self.core.regs.rf['int']
454 crrf = self.core.regs.rf['cr']
455 xerrf = self.core.regs.rf['xer']
456 self.int_r = intrf.r_ports['dmi'] # INT read
457 self.cr_r = crrf.r_ports['full_cr_dbg'] # CR read
458 self.xer_r = xerrf.r_ports['full_xer'] # XER read
459
460 if self.svp64_en:
461 # for predication
462 self.int_pred = intrf.r_ports['pred'] # INT predicate read
463 self.cr_pred = crrf.r_ports['cr_pred'] # CR predicate read
464
465 # hack method of keeping an eye on whether branch/trap set the PC
466 self.state_nia = self.core.regs.rf['state'].w_ports['nia']
467 self.state_nia.wen.name = 'state_nia_wen'
468
469 # pulse to synchronize the simulator at instruction end
470 self.insn_done = Signal()
471
472 # indicate any instruction still outstanding, in execution
473 self.any_busy = Signal()
474
475 if self.svp64_en:
476 # store copies of predicate masks
477 self.srcmask = Signal(64)
478 self.dstmask = Signal(64)
479
480 def fetch_predicate_fsm(self, m,
481 pred_insn_i_valid, pred_insn_o_ready,
482 pred_mask_o_valid, pred_mask_i_ready):
483 """fetch_predicate_fsm - obtains (constructs in the case of CR)
484 src/dest predicate masks
485
486 https://bugs.libre-soc.org/show_bug.cgi?id=617
487 the predicates can be read here, by using IntRegs r_ports['pred']
488 or CRRegs r_ports['pred']. in the case of CRs it will have to
489 be done through multiple reads, extracting one relevant at a time.
490 later, a faster way would be to use the 32-bit-wide CR port but
491 this is more complex decoding, here. equivalent code used in
492 ISACaller is "from openpower.decoder.isa.caller import get_predcr"
493
494 note: this ENTIRE FSM is not to be called when svp64 is disabled
495 """
496 comb = m.d.comb
497 sync = m.d.sync
498 pdecode2 = self.pdecode2
499 rm_dec = pdecode2.rm_dec # SVP64RMModeDecode
500 predmode = rm_dec.predmode
501 srcpred, dstpred = rm_dec.srcpred, rm_dec.dstpred
502 cr_pred, int_pred = self.cr_pred, self.int_pred # read regfiles
503 # get src/dst step, so we can skip already used mask bits
504 cur_state = self.cur_state
505 srcstep = cur_state.svstate.srcstep
506 dststep = cur_state.svstate.dststep
507 cur_vl = cur_state.svstate.vl
508
509 # decode predicates
510 sregread, sinvert, sunary, sall1s = get_predint(m, srcpred, 's')
511 dregread, dinvert, dunary, dall1s = get_predint(m, dstpred, 'd')
512 sidx, scrinvert = get_predcr(m, srcpred, 's')
513 didx, dcrinvert = get_predcr(m, dstpred, 'd')
514
515 # store fetched masks, for either intpred or crpred
516 # when src/dst step is not zero, the skipped mask bits need to be
517 # shifted-out, before actually storing them in src/dest mask
518 new_srcmask = Signal(64, reset_less=True)
519 new_dstmask = Signal(64, reset_less=True)
520
521 with m.FSM(name="fetch_predicate"):
522
523 with m.State("FETCH_PRED_IDLE"):
524 comb += pred_insn_o_ready.eq(1)
525 with m.If(pred_insn_i_valid):
526 with m.If(predmode == SVP64PredMode.INT):
527 # skip fetching destination mask register, when zero
528 with m.If(dall1s):
529 sync += new_dstmask.eq(-1)
530 # directly go to fetch source mask register
531 # guaranteed not to be zero (otherwise predmode
532 # would be SVP64PredMode.ALWAYS, not INT)
533 comb += int_pred.addr.eq(sregread)
534 comb += int_pred.ren.eq(1)
535 m.next = "INT_SRC_READ"
536 # fetch destination predicate register
537 with m.Else():
538 comb += int_pred.addr.eq(dregread)
539 comb += int_pred.ren.eq(1)
540 m.next = "INT_DST_READ"
541 with m.Elif(predmode == SVP64PredMode.CR):
542 # go fetch masks from the CR register file
543 sync += new_srcmask.eq(0)
544 sync += new_dstmask.eq(0)
545 m.next = "CR_READ"
546 with m.Else():
547 sync += self.srcmask.eq(-1)
548 sync += self.dstmask.eq(-1)
549 m.next = "FETCH_PRED_DONE"
550
551 with m.State("INT_DST_READ"):
552 # store destination mask
553 inv = Repl(dinvert, 64)
554 with m.If(dunary):
555 # set selected mask bit for 1<<r3 mode
556 dst_shift = Signal(range(64))
557 comb += dst_shift.eq(self.int_pred.o_data & 0b111111)
558 sync += new_dstmask.eq(1 << dst_shift)
559 with m.Else():
560 # invert mask if requested
561 sync += new_dstmask.eq(self.int_pred.o_data ^ inv)
562 # skip fetching source mask register, when zero
563 with m.If(sall1s):
564 sync += new_srcmask.eq(-1)
565 m.next = "FETCH_PRED_SHIFT_MASK"
566 # fetch source predicate register
567 with m.Else():
568 comb += int_pred.addr.eq(sregread)
569 comb += int_pred.ren.eq(1)
570 m.next = "INT_SRC_READ"
571
572 with m.State("INT_SRC_READ"):
573 # store source mask
574 inv = Repl(sinvert, 64)
575 with m.If(sunary):
576 # set selected mask bit for 1<<r3 mode
577 src_shift = Signal(range(64))
578 comb += src_shift.eq(self.int_pred.o_data & 0b111111)
579 sync += new_srcmask.eq(1 << src_shift)
580 with m.Else():
581 # invert mask if requested
582 sync += new_srcmask.eq(self.int_pred.o_data ^ inv)
583 m.next = "FETCH_PRED_SHIFT_MASK"
584
585 # fetch masks from the CR register file
586 # implements the following loop:
587 # idx, inv = get_predcr(mask)
588 # mask = 0
589 # for cr_idx in range(vl):
590 # cr = crl[cr_idx + SVP64CROffs.CRPred] # takes one cycle
591 # if cr[idx] ^ inv:
592 # mask |= 1 << cr_idx
593 # return mask
594 with m.State("CR_READ"):
595 # CR index to be read, which will be ready by the next cycle
596 cr_idx = Signal.like(cur_vl, reset_less=True)
597 # submit the read operation to the regfile
598 with m.If(cr_idx != cur_vl):
599 # the CR read port is unary ...
600 # ren = 1 << cr_idx
601 # ... in MSB0 convention ...
602 # ren = 1 << (7 - cr_idx)
603 # ... and with an offset:
604 # ren = 1 << (7 - off - cr_idx)
605 idx = SVP64CROffs.CRPred + cr_idx
606 comb += cr_pred.ren.eq(1 << (7 - idx))
607 # signal data valid in the next cycle
608 cr_read = Signal(reset_less=True)
609 sync += cr_read.eq(1)
610 # load the next index
611 sync += cr_idx.eq(cr_idx + 1)
612 with m.Else():
613 # exit on loop end
614 sync += cr_read.eq(0)
615 sync += cr_idx.eq(0)
616 m.next = "FETCH_PRED_SHIFT_MASK"
617 with m.If(cr_read):
618 # compensate for the one cycle delay on the regfile
619 cur_cr_idx = Signal.like(cur_vl)
620 comb += cur_cr_idx.eq(cr_idx - 1)
621 # read the CR field, select the appropriate bit
622 cr_field = Signal(4)
623 scr_bit = Signal()
624 dcr_bit = Signal()
625 comb += cr_field.eq(cr_pred.o_data)
626 comb += scr_bit.eq(cr_field.bit_select(sidx, 1) ^ scrinvert)
627 comb += dcr_bit.eq(cr_field.bit_select(didx, 1) ^ dcrinvert)
628 # set the corresponding mask bit
629 bit_to_set = Signal.like(self.srcmask)
630 comb += bit_to_set.eq(1 << cur_cr_idx)
631 with m.If(scr_bit):
632 sync += new_srcmask.eq(new_srcmask | bit_to_set)
633 with m.If(dcr_bit):
634 sync += new_dstmask.eq(new_dstmask | bit_to_set)
635
636 with m.State("FETCH_PRED_SHIFT_MASK"):
637 # shift-out skipped mask bits
638 sync += self.srcmask.eq(new_srcmask >> srcstep)
639 sync += self.dstmask.eq(new_dstmask >> dststep)
640 m.next = "FETCH_PRED_DONE"
641
642 with m.State("FETCH_PRED_DONE"):
643 comb += pred_mask_o_valid.eq(1)
644 with m.If(pred_mask_i_ready):
645 m.next = "FETCH_PRED_IDLE"
646
647 def issue_fsm(self, m, core, pc_changed, sv_changed, nia,
648 dbg, core_rst, is_svp64_mode,
649 fetch_pc_o_ready, fetch_pc_i_valid,
650 fetch_insn_o_valid, fetch_insn_i_ready,
651 pred_insn_i_valid, pred_insn_o_ready,
652 pred_mask_o_valid, pred_mask_i_ready,
653 exec_insn_i_valid, exec_insn_o_ready,
654 exec_pc_o_valid, exec_pc_i_ready):
655 """issue FSM
656
657 decode / issue FSM. this interacts with the "fetch" FSM
658 through fetch_insn_ready/valid (incoming) and fetch_pc_ready/valid
659 (outgoing). also interacts with the "execute" FSM
660 through exec_insn_ready/valid (outgoing) and exec_pc_ready/valid
661 (incoming).
662 SVP64 RM prefixes have already been set up by the
663 "fetch" phase, so execute is fairly straightforward.
664 """
665
666 comb = m.d.comb
667 sync = m.d.sync
668 pdecode2 = self.pdecode2
669 cur_state = self.cur_state
670
671 # temporaries
672 dec_opcode_i = pdecode2.dec.raw_opcode_in # raw opcode
673
674 # for updating svstate (things like srcstep etc.)
675 update_svstate = Signal() # set this (below) if updating
676 new_svstate = SVSTATERec("new_svstate")
677 comb += new_svstate.eq(cur_state.svstate)
678
679 # precalculate srcstep+1 and dststep+1
680 cur_srcstep = cur_state.svstate.srcstep
681 cur_dststep = cur_state.svstate.dststep
682 next_srcstep = Signal.like(cur_srcstep)
683 next_dststep = Signal.like(cur_dststep)
684 comb += next_srcstep.eq(cur_state.svstate.srcstep+1)
685 comb += next_dststep.eq(cur_state.svstate.dststep+1)
686
687 # note if an exception happened. in a pipelined or OoO design
688 # this needs to be accompanied by "shadowing" (or stalling)
689 exc_happened = self.core.o.exc_happened
690
691 with m.FSM(name="issue_fsm"):
692
693 # sync with the "fetch" phase which is reading the instruction
694 # at this point, there is no instruction running, that
695 # could inadvertently update the PC.
696 with m.State("ISSUE_START"):
697 # wait on "core stop" release, before next fetch
698 # need to do this here, in case we are in a VL==0 loop
699 with m.If(~dbg.core_stop_o & ~core_rst):
700 comb += fetch_pc_i_valid.eq(1) # tell fetch to start
701 with m.If(fetch_pc_o_ready): # fetch acknowledged us
702 m.next = "INSN_WAIT"
703 with m.Else():
704 # tell core it's stopped, and acknowledge debug handshake
705 comb += dbg.core_stopped_i.eq(1)
706 # while stopped, allow updating the PC and SVSTATE
707 with m.If(self.pc_i.ok):
708 comb += self.state_w_pc.wen.eq(1 << StateRegs.PC)
709 comb += self.state_w_pc.i_data.eq(self.pc_i.data)
710 sync += pc_changed.eq(1)
711 with m.If(self.svstate_i.ok):
712 comb += new_svstate.eq(self.svstate_i.data)
713 comb += update_svstate.eq(1)
714 sync += sv_changed.eq(1)
715
716 # wait for an instruction to arrive from Fetch
717 with m.State("INSN_WAIT"):
718 if self.allow_overlap:
719 stopping = dbg.stopping_o
720 else:
721 stopping = Const(0)
722 with m.If(stopping):
723 # stopping: jump back to idle
724 m.next = "ISSUE_START"
725 with m.Else():
726 comb += fetch_insn_i_ready.eq(1)
727 with m.If(fetch_insn_o_valid):
728 # loop into ISSUE_START if it's a SVP64 instruction
729 # and VL == 0. this because VL==0 is a for-loop
730 # from 0 to 0 i.e. always, always a NOP.
731 cur_vl = cur_state.svstate.vl
732 with m.If(is_svp64_mode & (cur_vl == 0)):
733 # update the PC before fetching the next instruction
734 # since we are in a VL==0 loop, no instruction was
735 # executed that we could be overwriting
736 comb += self.state_w_pc.wen.eq(1 << StateRegs.PC)
737 comb += self.state_w_pc.i_data.eq(nia)
738 comb += self.insn_done.eq(1)
739 m.next = "ISSUE_START"
740 with m.Else():
741 if self.svp64_en:
742 m.next = "PRED_START" # fetching predicate
743 else:
744 m.next = "DECODE_SV" # skip predication
745
746 with m.State("PRED_START"):
747 comb += pred_insn_i_valid.eq(1) # tell fetch_pred to start
748 with m.If(pred_insn_o_ready): # fetch_pred acknowledged us
749 m.next = "MASK_WAIT"
750
751 with m.State("MASK_WAIT"):
752 comb += pred_mask_i_ready.eq(1) # ready to receive the masks
753 with m.If(pred_mask_o_valid): # predication masks are ready
754 m.next = "PRED_SKIP"
755
756 # skip zeros in predicate
757 with m.State("PRED_SKIP"):
758 with m.If(~is_svp64_mode):
759 m.next = "DECODE_SV" # nothing to do
760 with m.Else():
761 if self.svp64_en:
762 pred_src_zero = pdecode2.rm_dec.pred_sz
763 pred_dst_zero = pdecode2.rm_dec.pred_dz
764
765 # new srcstep, after skipping zeros
766 skip_srcstep = Signal.like(cur_srcstep)
767 # value to be added to the current srcstep
768 src_delta = Signal.like(cur_srcstep)
769 # add leading zeros to srcstep, if not in zero mode
770 with m.If(~pred_src_zero):
771 # priority encoder (count leading zeros)
772 # append guard bit, in case the mask is all zeros
773 pri_enc_src = PriorityEncoder(65)
774 m.submodules.pri_enc_src = pri_enc_src
775 comb += pri_enc_src.i.eq(Cat(self.srcmask,
776 Const(1, 1)))
777 comb += src_delta.eq(pri_enc_src.o)
778 # apply delta to srcstep
779 comb += skip_srcstep.eq(cur_srcstep + src_delta)
780 # shift-out all leading zeros from the mask
781 # plus the leading "one" bit
782 # TODO count leading zeros and shift-out the zero
783 # bits, in the same step, in hardware
784 sync += self.srcmask.eq(self.srcmask >> (src_delta+1))
785
786 # same as above, but for dststep
787 skip_dststep = Signal.like(cur_dststep)
788 dst_delta = Signal.like(cur_dststep)
789 with m.If(~pred_dst_zero):
790 pri_enc_dst = PriorityEncoder(65)
791 m.submodules.pri_enc_dst = pri_enc_dst
792 comb += pri_enc_dst.i.eq(Cat(self.dstmask,
793 Const(1, 1)))
794 comb += dst_delta.eq(pri_enc_dst.o)
795 comb += skip_dststep.eq(cur_dststep + dst_delta)
796 sync += self.dstmask.eq(self.dstmask >> (dst_delta+1))
797
798 # TODO: initialize mask[VL]=1 to avoid passing past VL
799 with m.If((skip_srcstep >= cur_vl) |
800 (skip_dststep >= cur_vl)):
801 # end of VL loop. Update PC and reset src/dst step
802 comb += self.state_w_pc.wen.eq(1 << StateRegs.PC)
803 comb += self.state_w_pc.i_data.eq(nia)
804 comb += new_svstate.srcstep.eq(0)
805 comb += new_svstate.dststep.eq(0)
806 comb += update_svstate.eq(1)
807 # synchronize with the simulator
808 comb += self.insn_done.eq(1)
809 # go back to Issue
810 m.next = "ISSUE_START"
811 with m.Else():
812 # update new src/dst step
813 comb += new_svstate.srcstep.eq(skip_srcstep)
814 comb += new_svstate.dststep.eq(skip_dststep)
815 comb += update_svstate.eq(1)
816 # proceed to Decode
817 m.next = "DECODE_SV"
818
819 # pass predicate mask bits through to satellite decoders
820 # TODO: for SIMD this will be *multiple* bits
821 sync += core.i.sv_pred_sm.eq(self.srcmask[0])
822 sync += core.i.sv_pred_dm.eq(self.dstmask[0])
823
824 # after src/dst step have been updated, we are ready
825 # to decode the instruction
826 with m.State("DECODE_SV"):
827 # decode the instruction
828 sync += core.i.e.eq(pdecode2.e)
829 sync += core.i.state.eq(cur_state)
830 sync += core.i.raw_insn_i.eq(dec_opcode_i)
831 sync += core.i.bigendian_i.eq(self.core_bigendian_i)
832 if self.svp64_en:
833 sync += core.i.sv_rm.eq(pdecode2.sv_rm)
834 # set RA_OR_ZERO detection in satellite decoders
835 sync += core.i.sv_a_nz.eq(pdecode2.sv_a_nz)
836 # and svp64 detection
837 sync += core.i.is_svp64_mode.eq(is_svp64_mode)
838 # and svp64 bit-rev'd ldst mode
839 ldst_dec = pdecode2.use_svp64_ldst_dec
840 sync += core.i.use_svp64_ldst_dec.eq(ldst_dec)
841 # after decoding, reset any previous exception condition,
842 # allowing it to be set again during the next execution
843 sync += pdecode2.ldst_exc.eq(0)
844
845 m.next = "INSN_EXECUTE" # move to "execute"
846
847 # handshake with execution FSM, move to "wait" once acknowledged
848 with m.State("INSN_EXECUTE"):
849 comb += exec_insn_i_valid.eq(1) # trigger execute
850 with m.If(exec_insn_o_ready): # execute acknowledged us
851 m.next = "EXECUTE_WAIT"
852
853 with m.State("EXECUTE_WAIT"):
854 # wait on "core stop" release, at instruction end
855 # need to do this here, in case we are in a VL>1 loop
856 with m.If(~dbg.core_stop_o & ~core_rst):
857 comb += exec_pc_i_ready.eq(1)
858 # see https://bugs.libre-soc.org/show_bug.cgi?id=636
859 # the exception info needs to be blatted into
860 # pdecode.ldst_exc, and the instruction "re-run".
861 # when ldst_exc.happened is set, the PowerDecoder2
862 # reacts very differently: it re-writes the instruction
863 # with a "trap" (calls PowerDecoder2.trap()) which
864 # will *overwrite* whatever was requested and jump the
865 # PC to the exception address, as well as alter MSR.
866 # nothing else needs to be done other than to note
867 # the change of PC and MSR (and, later, SVSTATE)
868 with m.If(exc_happened):
869 sync += pdecode2.ldst_exc.eq(core.fus.get_exc("ldst0"))
870
871 with m.If(exec_pc_o_valid):
872
873 # was this the last loop iteration?
874 is_last = Signal()
875 cur_vl = cur_state.svstate.vl
876 comb += is_last.eq(next_srcstep == cur_vl)
877
878 # return directly to Decode if Execute generated an
879 # exception.
880 with m.If(pdecode2.ldst_exc.happened):
881 m.next = "DECODE_SV"
882
883 # if either PC or SVSTATE were changed by the previous
884 # instruction, go directly back to Fetch, without
885 # updating either PC or SVSTATE
886 with m.Elif(pc_changed | sv_changed):
887 m.next = "ISSUE_START"
888
889 # also return to Fetch, when no output was a vector
890 # (regardless of SRCSTEP and VL), or when the last
891 # instruction was really the last one of the VL loop
892 with m.Elif((~pdecode2.loop_continue) | is_last):
893 # before going back to fetch, update the PC state
894 # register with the NIA.
895 # ok here we are not reading the branch unit.
896 # TODO: this just blithely overwrites whatever
897 # pipeline updated the PC
898 comb += self.state_w_pc.wen.eq(1 << StateRegs.PC)
899 comb += self.state_w_pc.i_data.eq(nia)
900 # reset SRCSTEP before returning to Fetch
901 if self.svp64_en:
902 with m.If(pdecode2.loop_continue):
903 comb += new_svstate.srcstep.eq(0)
904 comb += new_svstate.dststep.eq(0)
905 comb += update_svstate.eq(1)
906 else:
907 comb += new_svstate.srcstep.eq(0)
908 comb += new_svstate.dststep.eq(0)
909 comb += update_svstate.eq(1)
910 m.next = "ISSUE_START"
911
912 # returning to Execute? then, first update SRCSTEP
913 with m.Else():
914 comb += new_svstate.srcstep.eq(next_srcstep)
915 comb += new_svstate.dststep.eq(next_dststep)
916 comb += update_svstate.eq(1)
917 # return to mask skip loop
918 m.next = "PRED_SKIP"
919
920 with m.Else():
921 comb += dbg.core_stopped_i.eq(1)
922 # while stopped, allow updating the PC and SVSTATE
923 with m.If(self.pc_i.ok):
924 comb += self.state_w_pc.wen.eq(1 << StateRegs.PC)
925 comb += self.state_w_pc.i_data.eq(self.pc_i.data)
926 sync += pc_changed.eq(1)
927 with m.If(self.svstate_i.ok):
928 comb += new_svstate.eq(self.svstate_i.data)
929 comb += update_svstate.eq(1)
930 sync += sv_changed.eq(1)
931
932 # check if svstate needs updating: if so, write it to State Regfile
933 with m.If(update_svstate):
934 comb += self.state_w_sv.wen.eq(1<<StateRegs.SVSTATE)
935 comb += self.state_w_sv.i_data.eq(new_svstate)
936 sync += cur_state.svstate.eq(new_svstate) # for next clock
937
938 def execute_fsm(self, m, core, pc_changed, sv_changed,
939 exec_insn_i_valid, exec_insn_o_ready,
940 exec_pc_o_valid, exec_pc_i_ready):
941 """execute FSM
942
943 execute FSM. this interacts with the "issue" FSM
944 through exec_insn_ready/valid (incoming) and exec_pc_ready/valid
945 (outgoing). SVP64 RM prefixes have already been set up by the
946 "issue" phase, so execute is fairly straightforward.
947 """
948
949 comb = m.d.comb
950 sync = m.d.sync
951 pdecode2 = self.pdecode2
952
953 # temporaries
954 core_busy_o = core.n.o_data.busy_o # core is busy
955 core_ivalid_i = core.p.i_valid # instruction is valid
956
957 with m.FSM(name="exec_fsm"):
958
959 # waiting for instruction bus (stays there until not busy)
960 with m.State("INSN_START"):
961 comb += exec_insn_o_ready.eq(1)
962 with m.If(exec_insn_i_valid):
963 comb += core_ivalid_i.eq(1) # instruction is valid/issued
964 sync += sv_changed.eq(0)
965 sync += pc_changed.eq(0)
966 with m.If(core.p.o_ready): # only move if accepted
967 m.next = "INSN_ACTIVE" # move to "wait completion"
968
969 # instruction started: must wait till it finishes
970 with m.State("INSN_ACTIVE"):
971 # note changes to PC and SVSTATE
972 with m.If(self.state_nia.wen & (1<<StateRegs.SVSTATE)):
973 sync += sv_changed.eq(1)
974 with m.If(self.state_nia.wen & (1<<StateRegs.PC)):
975 sync += pc_changed.eq(1)
976 with m.If(~core_busy_o): # instruction done!
977 comb += exec_pc_o_valid.eq(1)
978 with m.If(exec_pc_i_ready):
979 # when finished, indicate "done".
980 # however, if there was an exception, the instruction
981 # is *not* yet done. this is an implementation
982 # detail: we choose to implement exceptions by
983 # taking the exception information from the LDST
984 # unit, putting that *back* into the PowerDecoder2,
985 # and *re-running the entire instruction*.
986 # if we erroneously indicate "done" here, it is as if
987 # there were *TWO* instructions:
988 # 1) the failed LDST 2) a TRAP.
989 with m.If(~pdecode2.ldst_exc.happened):
990 comb += self.insn_done.eq(1)
991 m.next = "INSN_START" # back to fetch
992
993 def setup_peripherals(self, m):
994 comb, sync = m.d.comb, m.d.sync
995
996 # okaaaay so the debug module must be in coresync clock domain
997 # but NOT its reset signal. to cope with this, set every single
998 # submodule explicitly in coresync domain, debug and JTAG
999 # in their own one but using *external* reset.
1000 csd = DomainRenamer("coresync")
1001 dbd = DomainRenamer(self.dbg_domain)
1002
1003 m.submodules.core = core = csd(self.core)
1004 m.submodules.imem = imem = csd(self.imem)
1005 m.submodules.dbg = dbg = dbd(self.dbg)
1006 if self.jtag_en:
1007 m.submodules.jtag = jtag = dbd(self.jtag)
1008 # TODO: UART2GDB mux, here, from external pin
1009 # see https://bugs.libre-soc.org/show_bug.cgi?id=499
1010 sync += dbg.dmi.connect_to(jtag.dmi)
1011
1012 cur_state = self.cur_state
1013
1014 # 4x 4k SRAM blocks. these simply "exist", they get routed in litex
1015 if self.sram4x4k:
1016 for i, sram in enumerate(self.sram4k):
1017 m.submodules["sram4k_%d" % i] = csd(sram)
1018 comb += sram.enable.eq(self.wb_sram_en)
1019
1020 # XICS interrupt handler
1021 if self.xics:
1022 m.submodules.xics_icp = icp = csd(self.xics_icp)
1023 m.submodules.xics_ics = ics = csd(self.xics_ics)
1024 comb += icp.ics_i.eq(ics.icp_o) # connect ICS to ICP
1025 sync += cur_state.eint.eq(icp.core_irq_o) # connect ICP to core
1026
1027 # GPIO test peripheral
1028 if self.gpio:
1029 m.submodules.simple_gpio = simple_gpio = csd(self.simple_gpio)
1030
1031 # connect one GPIO output to ICS bit 15 (like in microwatt soc.vhdl)
1032 # XXX causes litex ECP5 test to get wrong idea about input and output
1033 # (but works with verilator sim *sigh*)
1034 #if self.gpio and self.xics:
1035 # comb += self.int_level_i[15].eq(simple_gpio.gpio_o[0])
1036
1037 # instruction decoder
1038 pdecode = create_pdecode()
1039 m.submodules.dec2 = pdecode2 = csd(self.pdecode2)
1040 if self.svp64_en:
1041 m.submodules.svp64 = svp64 = csd(self.svp64)
1042
1043 # convenience
1044 dmi, d_reg, d_cr, d_xer, = dbg.dmi, dbg.d_gpr, dbg.d_cr, dbg.d_xer
1045 intrf = self.core.regs.rf['int']
1046
1047 # clock delay power-on reset
1048 cd_por = ClockDomain(reset_less=True)
1049 cd_sync = ClockDomain()
1050 core_sync = ClockDomain("coresync")
1051 m.domains += cd_por, cd_sync, core_sync
1052 if self.dbg_domain != "sync":
1053 dbg_sync = ClockDomain(self.dbg_domain)
1054 m.domains += dbg_sync
1055
1056 ti_rst = Signal(reset_less=True)
1057 delay = Signal(range(4), reset=3)
1058 with m.If(delay != 0):
1059 m.d.por += delay.eq(delay - 1)
1060 comb += cd_por.clk.eq(ClockSignal())
1061
1062 # power-on reset delay
1063 core_rst = ResetSignal("coresync")
1064 comb += ti_rst.eq(delay != 0 | dbg.core_rst_o | ResetSignal())
1065 comb += core_rst.eq(ti_rst)
1066
1067 # debug clock is same as coresync, but reset is *main external*
1068 if self.dbg_domain != "sync":
1069 dbg_rst = ResetSignal(self.dbg_domain)
1070 comb += dbg_rst.eq(ResetSignal())
1071
1072 # busy/halted signals from core
1073 core_busy_o = ~core.p.o_ready | core.n.o_data.busy_o # core is busy
1074 comb += self.busy_o.eq(core_busy_o)
1075 comb += pdecode2.dec.bigendian.eq(self.core_bigendian_i)
1076
1077 # temporary hack: says "go" immediately for both address gen and ST
1078 l0 = core.l0
1079 ldst = core.fus.fus['ldst0']
1080 st_go_edge = rising_edge(m, ldst.st.rel_o)
1081 m.d.comb += ldst.ad.go_i.eq(ldst.ad.rel_o) # link addr-go direct to rel
1082 m.d.comb += ldst.st.go_i.eq(st_go_edge) # link store-go to rising rel
1083
1084 def elaborate(self, platform):
1085 m = Module()
1086 # convenience
1087 comb, sync = m.d.comb, m.d.sync
1088 cur_state = self.cur_state
1089 pdecode2 = self.pdecode2
1090 dbg = self.dbg
1091 core = self.core
1092
1093 # set up peripherals and core
1094 core_rst = self.core_rst
1095 self.setup_peripherals(m)
1096
1097 # reset current state if core reset requested
1098 with m.If(core_rst):
1099 m.d.sync += self.cur_state.eq(0)
1100
1101 # PC and instruction from I-Memory
1102 comb += self.pc_o.eq(cur_state.pc)
1103 pc_changed = Signal() # note write to PC
1104 sv_changed = Signal() # note write to SVSTATE
1105
1106 # indicate to outside world if any FU is still executing
1107 comb += self.any_busy.eq(core.n.o_data.any_busy_o) # any FU executing
1108
1109 # read state either from incoming override or from regfile
1110 # TODO: really should be doing MSR in the same way
1111 pc = state_get(m, core_rst, self.pc_i,
1112 "pc", # read PC
1113 self.state_r_pc, StateRegs.PC)
1114 svstate = state_get(m, core_rst, self.svstate_i,
1115 "svstate", # read SVSTATE
1116 self.state_r_sv, StateRegs.SVSTATE)
1117
1118 # don't write pc every cycle
1119 comb += self.state_w_pc.wen.eq(0)
1120 comb += self.state_w_pc.i_data.eq(0)
1121
1122 # address of the next instruction, in the absence of a branch
1123 # depends on the instruction size
1124 nia = Signal(64)
1125
1126 # connect up debug signals
1127 # TODO comb += core.icache_rst_i.eq(dbg.icache_rst_o)
1128 comb += dbg.terminate_i.eq(core.o.core_terminate_o)
1129 comb += dbg.state.pc.eq(pc)
1130 comb += dbg.state.svstate.eq(svstate)
1131 comb += dbg.state.msr.eq(cur_state.msr)
1132
1133 # pass the prefix mode from Fetch to Issue, so the latter can loop
1134 # on VL==0
1135 is_svp64_mode = Signal()
1136
1137 # there are *THREE^WFOUR-if-SVP64-enabled* FSMs, fetch (32/64-bit)
1138 # issue, decode/execute, now joined by "Predicate fetch/calculate".
1139 # these are the handshake signals between each
1140
1141 # fetch FSM can run as soon as the PC is valid
1142 fetch_pc_i_valid = Signal() # Execute tells Fetch "start next read"
1143 fetch_pc_o_ready = Signal() # Fetch Tells SVSTATE "proceed"
1144
1145 # fetch FSM hands over the instruction to be decoded / issued
1146 fetch_insn_o_valid = Signal()
1147 fetch_insn_i_ready = Signal()
1148
1149 # predicate fetch FSM decodes and fetches the predicate
1150 pred_insn_i_valid = Signal()
1151 pred_insn_o_ready = Signal()
1152
1153 # predicate fetch FSM delivers the masks
1154 pred_mask_o_valid = Signal()
1155 pred_mask_i_ready = Signal()
1156
1157 # issue FSM delivers the instruction to the be executed
1158 exec_insn_i_valid = Signal()
1159 exec_insn_o_ready = Signal()
1160
1161 # execute FSM, hands over the PC/SVSTATE back to the issue FSM
1162 exec_pc_o_valid = Signal()
1163 exec_pc_i_ready = Signal()
1164
1165 # the FSMs here are perhaps unusual in that they detect conditions
1166 # then "hold" information, combinatorially, for the core
1167 # (as opposed to using sync - which would be on a clock's delay)
1168 # this includes the actual opcode, valid flags and so on.
1169
1170 # Fetch, then predicate fetch, then Issue, then Execute.
1171 # Issue is where the VL for-loop # lives. the ready/valid
1172 # signalling is used to communicate between the four.
1173
1174 # set up Fetch FSM
1175 fetch = FetchFSM(self.allow_overlap, self.svp64_en,
1176 self.imem, core_rst, pdecode2, cur_state,
1177 dbg, core, svstate, nia, is_svp64_mode)
1178 m.submodules.fetch = fetch
1179 # connect up in/out data to existing Signals
1180 comb += fetch.p.i_data.pc.eq(pc)
1181 # and the ready/valid signalling
1182 comb += fetch_pc_o_ready.eq(fetch.p.o_ready)
1183 comb += fetch.p.i_valid.eq(fetch_pc_i_valid)
1184 comb += fetch_insn_o_valid.eq(fetch.n.o_valid)
1185 comb += fetch.n.i_ready.eq(fetch_insn_i_ready)
1186
1187 self.issue_fsm(m, core, pc_changed, sv_changed, nia,
1188 dbg, core_rst, is_svp64_mode,
1189 fetch_pc_o_ready, fetch_pc_i_valid,
1190 fetch_insn_o_valid, fetch_insn_i_ready,
1191 pred_insn_i_valid, pred_insn_o_ready,
1192 pred_mask_o_valid, pred_mask_i_ready,
1193 exec_insn_i_valid, exec_insn_o_ready,
1194 exec_pc_o_valid, exec_pc_i_ready)
1195
1196 if self.svp64_en:
1197 self.fetch_predicate_fsm(m,
1198 pred_insn_i_valid, pred_insn_o_ready,
1199 pred_mask_o_valid, pred_mask_i_ready)
1200
1201 self.execute_fsm(m, core, pc_changed, sv_changed,
1202 exec_insn_i_valid, exec_insn_o_ready,
1203 exec_pc_o_valid, exec_pc_i_ready)
1204
1205 # this bit doesn't have to be in the FSM: connect up to read
1206 # regfiles on demand from DMI
1207 self.do_dmi(m, dbg)
1208
1209 # DEC and TB inc/dec FSM. copy of DEC is put into CoreState,
1210 # (which uses that in PowerDecoder2 to raise 0x900 exception)
1211 self.tb_dec_fsm(m, cur_state.dec)
1212
1213 return m
1214
1215 def do_dmi(self, m, dbg):
1216 """deals with DMI debug requests
1217
1218 currently only provides read requests for the INT regfile, CR and XER
1219 it will later also deal with *writing* to these regfiles.
1220 """
1221 comb = m.d.comb
1222 sync = m.d.sync
1223 dmi, d_reg, d_cr, d_xer, = dbg.dmi, dbg.d_gpr, dbg.d_cr, dbg.d_xer
1224 intrf = self.core.regs.rf['int']
1225
1226 with m.If(d_reg.req): # request for regfile access being made
1227 # TODO: error-check this
1228 # XXX should this be combinatorial? sync better?
1229 if intrf.unary:
1230 comb += self.int_r.ren.eq(1<<d_reg.addr)
1231 else:
1232 comb += self.int_r.addr.eq(d_reg.addr)
1233 comb += self.int_r.ren.eq(1)
1234 d_reg_delay = Signal()
1235 sync += d_reg_delay.eq(d_reg.req)
1236 with m.If(d_reg_delay):
1237 # data arrives one clock later
1238 comb += d_reg.data.eq(self.int_r.o_data)
1239 comb += d_reg.ack.eq(1)
1240
1241 # sigh same thing for CR debug
1242 with m.If(d_cr.req): # request for regfile access being made
1243 comb += self.cr_r.ren.eq(0b11111111) # enable all
1244 d_cr_delay = Signal()
1245 sync += d_cr_delay.eq(d_cr.req)
1246 with m.If(d_cr_delay):
1247 # data arrives one clock later
1248 comb += d_cr.data.eq(self.cr_r.o_data)
1249 comb += d_cr.ack.eq(1)
1250
1251 # aaand XER...
1252 with m.If(d_xer.req): # request for regfile access being made
1253 comb += self.xer_r.ren.eq(0b111111) # enable all
1254 d_xer_delay = Signal()
1255 sync += d_xer_delay.eq(d_xer.req)
1256 with m.If(d_xer_delay):
1257 # data arrives one clock later
1258 comb += d_xer.data.eq(self.xer_r.o_data)
1259 comb += d_xer.ack.eq(1)
1260
1261 def tb_dec_fsm(self, m, spr_dec):
1262 """tb_dec_fsm
1263
1264 this is a FSM for updating either dec or tb. it runs alternately
1265 DEC, TB, DEC, TB. note that SPR pipeline could have written a new
1266 value to DEC, however the regfile has "passthrough" on it so this
1267 *should* be ok.
1268
1269 see v3.0B p1097-1099 for Timeer Resource and p1065 and p1076
1270 """
1271
1272 comb, sync = m.d.comb, m.d.sync
1273 fast_rf = self.core.regs.rf['fast']
1274 fast_r_dectb = fast_rf.r_ports['issue'] # DEC/TB
1275 fast_w_dectb = fast_rf.w_ports['issue'] # DEC/TB
1276
1277 with m.FSM() as fsm:
1278
1279 # initiates read of current DEC
1280 with m.State("DEC_READ"):
1281 comb += fast_r_dectb.addr.eq(FastRegs.DEC)
1282 comb += fast_r_dectb.ren.eq(1)
1283 m.next = "DEC_WRITE"
1284
1285 # waits for DEC read to arrive (1 cycle), updates with new value
1286 with m.State("DEC_WRITE"):
1287 new_dec = Signal(64)
1288 # TODO: MSR.LPCR 32-bit decrement mode
1289 comb += new_dec.eq(fast_r_dectb.o_data - 1)
1290 comb += fast_w_dectb.addr.eq(FastRegs.DEC)
1291 comb += fast_w_dectb.wen.eq(1)
1292 comb += fast_w_dectb.i_data.eq(new_dec)
1293 sync += spr_dec.eq(new_dec) # copy into cur_state for decoder
1294 m.next = "TB_READ"
1295
1296 # initiates read of current TB
1297 with m.State("TB_READ"):
1298 comb += fast_r_dectb.addr.eq(FastRegs.TB)
1299 comb += fast_r_dectb.ren.eq(1)
1300 m.next = "TB_WRITE"
1301
1302 # waits for read TB to arrive, initiates write of current TB
1303 with m.State("TB_WRITE"):
1304 new_tb = Signal(64)
1305 comb += new_tb.eq(fast_r_dectb.o_data + 1)
1306 comb += fast_w_dectb.addr.eq(FastRegs.TB)
1307 comb += fast_w_dectb.wen.eq(1)
1308 comb += fast_w_dectb.i_data.eq(new_tb)
1309 m.next = "DEC_READ"
1310
1311 return m
1312
1313 def __iter__(self):
1314 yield from self.pc_i.ports()
1315 yield self.pc_o
1316 yield self.memerr_o
1317 yield from self.core.ports()
1318 yield from self.imem.ports()
1319 yield self.core_bigendian_i
1320 yield self.busy_o
1321
1322 def ports(self):
1323 return list(self)
1324
1325 def external_ports(self):
1326 ports = self.pc_i.ports()
1327 ports += [self.pc_o, self.memerr_o, self.core_bigendian_i, self.busy_o,
1328 ]
1329
1330 if self.jtag_en:
1331 ports += list(self.jtag.external_ports())
1332 else:
1333 # don't add DMI if JTAG is enabled
1334 ports += list(self.dbg.dmi.ports())
1335
1336 ports += list(self.imem.ibus.fields.values())
1337 ports += list(self.core.l0.cmpi.wb_bus().fields.values())
1338
1339 if self.sram4x4k:
1340 for sram in self.sram4k:
1341 ports += list(sram.bus.fields.values())
1342
1343 if self.xics:
1344 ports += list(self.xics_icp.bus.fields.values())
1345 ports += list(self.xics_ics.bus.fields.values())
1346 ports.append(self.int_level_i)
1347
1348 if self.gpio:
1349 ports += list(self.simple_gpio.bus.fields.values())
1350 ports.append(self.gpio_o)
1351
1352 return ports
1353
1354 def ports(self):
1355 return list(self)
1356
1357
1358 class TestIssuer(Elaboratable):
1359 def __init__(self, pspec):
1360 self.ti = TestIssuerInternal(pspec)
1361 self.pll = DummyPLL(instance=True)
1362
1363 # PLL direct clock or not
1364 self.pll_en = hasattr(pspec, "use_pll") and pspec.use_pll
1365 if self.pll_en:
1366 self.pll_test_o = Signal(reset_less=True)
1367 self.pll_vco_o = Signal(reset_less=True)
1368 self.clk_sel_i = Signal(2, reset_less=True)
1369 self.ref_clk = ClockSignal() # can't rename it but that's ok
1370 self.pllclk_clk = ClockSignal("pllclk")
1371
1372 def elaborate(self, platform):
1373 m = Module()
1374 comb = m.d.comb
1375
1376 # TestIssuer nominally runs at main clock, actually it is
1377 # all combinatorial internally except for coresync'd components
1378 m.submodules.ti = ti = self.ti
1379
1380 if self.pll_en:
1381 # ClockSelect runs at PLL output internal clock rate
1382 m.submodules.wrappll = pll = self.pll
1383
1384 # add clock domains from PLL
1385 cd_pll = ClockDomain("pllclk")
1386 m.domains += cd_pll
1387
1388 # PLL clock established. has the side-effect of running clklsel
1389 # at the PLL's speed (see DomainRenamer("pllclk") above)
1390 pllclk = self.pllclk_clk
1391 comb += pllclk.eq(pll.clk_pll_o)
1392
1393 # wire up external 24mhz to PLL
1394 #comb += pll.clk_24_i.eq(self.ref_clk)
1395 # output 18 mhz PLL test signal, and analog oscillator out
1396 comb += self.pll_test_o.eq(pll.pll_test_o)
1397 comb += self.pll_vco_o.eq(pll.pll_vco_o)
1398
1399 # input to pll clock selection
1400 comb += pll.clk_sel_i.eq(self.clk_sel_i)
1401
1402 # now wire up ResetSignals. don't mind them being in this domain
1403 pll_rst = ResetSignal("pllclk")
1404 comb += pll_rst.eq(ResetSignal())
1405
1406 # internal clock is set to selector clock-out. has the side-effect of
1407 # running TestIssuer at this speed (see DomainRenamer("intclk") above)
1408 # debug clock runs at coresync internal clock
1409 cd_coresync = ClockDomain("coresync")
1410 #m.domains += cd_coresync
1411 if self.ti.dbg_domain != 'sync':
1412 cd_dbgsync = ClockDomain("dbgsync")
1413 #m.domains += cd_dbgsync
1414 intclk = ClockSignal("coresync")
1415 dbgclk = ClockSignal(self.ti.dbg_domain)
1416 # XXX BYPASS PLL XXX
1417 # XXX BYPASS PLL XXX
1418 # XXX BYPASS PLL XXX
1419 if self.pll_en:
1420 comb += intclk.eq(self.ref_clk)
1421 else:
1422 comb += intclk.eq(ClockSignal())
1423 if self.ti.dbg_domain != 'sync':
1424 dbgclk = ClockSignal(self.ti.dbg_domain)
1425 comb += dbgclk.eq(intclk)
1426
1427 return m
1428
1429 def ports(self):
1430 return list(self.ti.ports()) + list(self.pll.ports()) + \
1431 [ClockSignal(), ResetSignal()]
1432
1433 def external_ports(self):
1434 ports = self.ti.external_ports()
1435 ports.append(ClockSignal())
1436 ports.append(ResetSignal())
1437 if self.pll_en:
1438 ports.append(self.clk_sel_i)
1439 ports.append(self.pll.clk_24_i)
1440 ports.append(self.pll_test_o)
1441 ports.append(self.pll_vco_o)
1442 ports.append(self.pllclk_clk)
1443 ports.append(self.ref_clk)
1444 return ports
1445
1446
1447 if __name__ == '__main__':
1448 units = {'alu': 1, 'cr': 1, 'branch': 1, 'trap': 1, 'logical': 1,
1449 'spr': 1,
1450 'div': 1,
1451 'mul': 1,
1452 'shiftrot': 1
1453 }
1454 pspec = TestMemPspec(ldst_ifacetype='bare_wb',
1455 imem_ifacetype='bare_wb',
1456 addr_wid=48,
1457 mask_wid=8,
1458 reg_wid=64,
1459 units=units)
1460 dut = TestIssuer(pspec)
1461 vl = main(dut, ports=dut.ports(), name="test_issuer")
1462
1463 if len(sys.argv) == 1:
1464 vl = rtlil.convert(dut, ports=dut.external_ports(), name="test_issuer")
1465 with open("test_issuer.il", "w") as f:
1466 f.write(vl)