set and reset instruction fault so it does not occur twice
[soc.git] / src / soc / simple / issuer.py
1 """simple core issuer
2
3 not in any way intended for production use. this runs a FSM that:
4
5 * reads the Program Counter from StateRegs
6 * reads an instruction from a fixed-size Test Memory
7 * issues it to the Simple Core
8 * waits for it to complete
9 * increments the PC
10 * does it all over again
11
12 the purpose of this module is to verify the functional correctness
13 of the Function Units in the absolute simplest and clearest possible
14 way, and to at provide something that can be further incrementally
15 improved.
16 """
17
18 from nmigen import (Elaboratable, Module, Signal, ClockSignal, ResetSignal,
19 ClockDomain, DomainRenamer, Mux, Const, Repl, Cat)
20 from nmigen.cli import rtlil
21 from nmigen.cli import main
22 import sys
23
24 from nmutil.singlepipe import ControlBase
25 from soc.simple.core_data import FetchOutput, FetchInput
26
27 from nmigen.lib.coding import PriorityEncoder
28
29 from openpower.decoder.power_decoder import create_pdecode
30 from openpower.decoder.power_decoder2 import PowerDecode2, SVP64PrefixDecoder
31 from openpower.decoder.decode2execute1 import IssuerDecode2ToOperand
32 from openpower.decoder.decode2execute1 import Data
33 from openpower.decoder.power_enums import (MicrOp, SVP64PredInt, SVP64PredCR,
34 SVP64PredMode)
35 from openpower.state import CoreState
36 from openpower.consts import (CR, SVP64CROffs)
37 from soc.experiment.testmem import TestMemory # test only for instructions
38 from soc.regfile.regfiles import StateRegs, FastRegs
39 from soc.simple.core import NonProductionCore
40 from soc.config.test.test_loadstore import TestMemPspec
41 from soc.config.ifetch import ConfigFetchUnit
42 from soc.debug.dmi import CoreDebug, DMIInterface
43 from soc.debug.jtag import JTAG
44 from soc.config.pinouts import get_pinspecs
45 from soc.interrupts.xics import XICS_ICP, XICS_ICS
46 from soc.bus.simple_gpio import SimpleGPIO
47 from soc.bus.SPBlock512W64B8W import SPBlock512W64B8W
48 from soc.clock.select import ClockSelect
49 from soc.clock.dummypll import DummyPLL
50 from openpower.sv.svstate import SVSTATERec
51 from soc.experiment.icache import ICache
52
53 from nmutil.util import rising_edge
54
55
56 def get_insn(f_instr_o, pc):
57 if f_instr_o.width == 32:
58 return f_instr_o
59 else:
60 # 64-bit: bit 2 of pc decides which word to select
61 return f_instr_o.word_select(pc[2], 32)
62
63 # gets state input or reads from state regfile
64
65
66 def state_get(m, core_rst, state_i, name, regfile, regnum):
67 comb = m.d.comb
68 sync = m.d.sync
69 # read the PC
70 res = Signal(64, reset_less=True, name=name)
71 res_ok_delay = Signal(name="%s_ok_delay" % name)
72 with m.If(~core_rst):
73 sync += res_ok_delay.eq(~state_i.ok)
74 with m.If(state_i.ok):
75 # incoming override (start from pc_i)
76 comb += res.eq(state_i.data)
77 with m.Else():
78 # otherwise read StateRegs regfile for PC...
79 comb += regfile.ren.eq(1 << regnum)
80 # ... but on a 1-clock delay
81 with m.If(res_ok_delay):
82 comb += res.eq(regfile.o_data)
83 return res
84
85
86 def get_predint(m, mask, name):
87 """decode SVP64 predicate integer mask field to reg number and invert
88 this is identical to the equivalent function in ISACaller except that
89 it doesn't read the INT directly, it just decodes "what needs to be done"
90 i.e. which INT reg, whether it is shifted and whether it is bit-inverted.
91
92 * all1s is set to indicate that no mask is to be applied.
93 * regread indicates the GPR register number to be read
94 * invert is set to indicate that the register value is to be inverted
95 * unary indicates that the contents of the register is to be shifted 1<<r3
96 """
97 comb = m.d.comb
98 regread = Signal(5, name=name+"regread")
99 invert = Signal(name=name+"invert")
100 unary = Signal(name=name+"unary")
101 all1s = Signal(name=name+"all1s")
102 with m.Switch(mask):
103 with m.Case(SVP64PredInt.ALWAYS.value):
104 comb += all1s.eq(1) # use 0b1111 (all ones)
105 with m.Case(SVP64PredInt.R3_UNARY.value):
106 comb += regread.eq(3)
107 comb += unary.eq(1) # 1<<r3 - shift r3 (single bit)
108 with m.Case(SVP64PredInt.R3.value):
109 comb += regread.eq(3)
110 with m.Case(SVP64PredInt.R3_N.value):
111 comb += regread.eq(3)
112 comb += invert.eq(1)
113 with m.Case(SVP64PredInt.R10.value):
114 comb += regread.eq(10)
115 with m.Case(SVP64PredInt.R10_N.value):
116 comb += regread.eq(10)
117 comb += invert.eq(1)
118 with m.Case(SVP64PredInt.R30.value):
119 comb += regread.eq(30)
120 with m.Case(SVP64PredInt.R30_N.value):
121 comb += regread.eq(30)
122 comb += invert.eq(1)
123 return regread, invert, unary, all1s
124
125
126 def get_predcr(m, mask, name):
127 """decode SVP64 predicate CR to reg number field and invert status
128 this is identical to _get_predcr in ISACaller
129 """
130 comb = m.d.comb
131 idx = Signal(2, name=name+"idx")
132 invert = Signal(name=name+"crinvert")
133 with m.Switch(mask):
134 with m.Case(SVP64PredCR.LT.value):
135 comb += idx.eq(CR.LT)
136 comb += invert.eq(0)
137 with m.Case(SVP64PredCR.GE.value):
138 comb += idx.eq(CR.LT)
139 comb += invert.eq(1)
140 with m.Case(SVP64PredCR.GT.value):
141 comb += idx.eq(CR.GT)
142 comb += invert.eq(0)
143 with m.Case(SVP64PredCR.LE.value):
144 comb += idx.eq(CR.GT)
145 comb += invert.eq(1)
146 with m.Case(SVP64PredCR.EQ.value):
147 comb += idx.eq(CR.EQ)
148 comb += invert.eq(0)
149 with m.Case(SVP64PredCR.NE.value):
150 comb += idx.eq(CR.EQ)
151 comb += invert.eq(1)
152 with m.Case(SVP64PredCR.SO.value):
153 comb += idx.eq(CR.SO)
154 comb += invert.eq(0)
155 with m.Case(SVP64PredCR.NS.value):
156 comb += idx.eq(CR.SO)
157 comb += invert.eq(1)
158 return idx, invert
159
160
161 # Fetch Finite State Machine.
162 # WARNING: there are currently DriverConflicts but it's actually working.
163 # TODO, here: everything that is global in nature, information from the
164 # main TestIssuerInternal, needs to move to either ispec() or ospec().
165 # not only that: TestIssuerInternal.imem can entirely move into here
166 # because imem is only ever accessed inside the FetchFSM.
167 class FetchFSM(ControlBase):
168 def __init__(self, allow_overlap, svp64_en, imem, core_rst,
169 pdecode2, cur_state,
170 dbg, core, svstate, nia, is_svp64_mode):
171 self.allow_overlap = allow_overlap
172 self.svp64_en = svp64_en
173 self.imem = imem
174 self.core_rst = core_rst
175 self.pdecode2 = pdecode2
176 self.cur_state = cur_state
177 self.dbg = dbg
178 self.core = core
179 self.svstate = svstate
180 self.nia = nia
181 self.is_svp64_mode = is_svp64_mode
182
183 # set up pipeline ControlBase and allocate i/o specs
184 # (unusual: normally done by the Pipeline API)
185 super().__init__(stage=self)
186 self.p.i_data, self.n.o_data = self.new_specs(None)
187 self.i, self.o = self.p.i_data, self.n.o_data
188
189 # next 3 functions are Stage API Compliance
190 def setup(self, m, i):
191 pass
192
193 def ispec(self):
194 return FetchInput()
195
196 def ospec(self):
197 return FetchOutput()
198
199 def elaborate(self, platform):
200 """fetch FSM
201
202 this FSM performs fetch of raw instruction data, partial-decodes
203 it 32-bit at a time to detect SVP64 prefixes, and will optionally
204 read a 2nd 32-bit quantity if that occurs.
205 """
206 m = super().elaborate(platform)
207
208 dbg = self.dbg
209 core = self.core
210 pc = self.i.pc
211 svstate = self.svstate
212 nia = self.nia
213 is_svp64_mode = self.is_svp64_mode
214 fetch_pc_o_ready = self.p.o_ready
215 fetch_pc_i_valid = self.p.i_valid
216 fetch_insn_o_valid = self.n.o_valid
217 fetch_insn_i_ready = self.n.i_ready
218
219 comb = m.d.comb
220 sync = m.d.sync
221 pdecode2 = self.pdecode2
222 cur_state = self.cur_state
223 dec_opcode_o = pdecode2.dec.raw_opcode_in # raw opcode
224
225 msr_read = Signal(reset=1)
226
227 # also note instruction fetch failed
228 if hasattr(core, "icache"):
229 fetch_failed = core.icache.i_out.fetch_failed
230 else:
231 fetch_failed = Const(0, 1)
232
233 # don't read msr every cycle
234 staterf = self.core.regs.rf['state']
235 state_r_msr = staterf.r_ports['msr'] # MSR rd
236
237 comb += state_r_msr.ren.eq(0)
238
239 with m.FSM(name='fetch_fsm'):
240
241 # waiting (zzz)
242 with m.State("IDLE"):
243 with m.If(~dbg.stopping_o & ~fetch_failed):
244 comb += fetch_pc_o_ready.eq(1)
245 with m.If(fetch_pc_i_valid & ~fetch_failed):
246 # instruction allowed to go: start by reading the PC
247 # capture the PC and also drop it into Insn Memory
248 # we have joined a pair of combinatorial memory
249 # lookups together. this is Generally Bad.
250 comb += self.imem.a_pc_i.eq(pc)
251 comb += self.imem.a_i_valid.eq(1)
252 comb += self.imem.f_i_valid.eq(1)
253 sync += cur_state.pc.eq(pc)
254 sync += cur_state.svstate.eq(svstate) # and svstate
255
256 # initiate read of MSR. arrives one clock later
257 comb += state_r_msr.ren.eq(1 << StateRegs.MSR)
258 sync += msr_read.eq(0)
259
260 m.next = "INSN_READ" # move to "wait for bus" phase
261
262 # dummy pause to find out why simulation is not keeping up
263 with m.State("INSN_READ"):
264 if self.allow_overlap:
265 stopping = dbg.stopping_o
266 else:
267 stopping = Const(0)
268 with m.If(stopping):
269 # stopping: jump back to idle
270 m.next = "IDLE"
271 with m.Else():
272 # one cycle later, msr/sv read arrives. valid only once.
273 with m.If(~msr_read):
274 sync += msr_read.eq(1) # yeah don't read it again
275 sync += cur_state.msr.eq(state_r_msr.o_data)
276 with m.If(self.imem.f_busy_o & ~fetch_failed): # zzz...
277 # busy but not fetch failed: stay in wait-read
278 comb += self.imem.a_i_valid.eq(1)
279 comb += self.imem.f_i_valid.eq(1)
280 with m.Else():
281 # not busy (or fetch failed!): instruction fetched
282 # when fetch failed, the instruction gets ignored
283 # by the decoder
284 insn = get_insn(self.imem.f_instr_o, cur_state.pc)
285 if self.svp64_en:
286 svp64 = self.svp64
287 # decode the SVP64 prefix, if any
288 comb += svp64.raw_opcode_in.eq(insn)
289 comb += svp64.bigendian.eq(self.core_bigendian_i)
290 # pass the decoded prefix (if any) to PowerDecoder2
291 sync += pdecode2.sv_rm.eq(svp64.svp64_rm)
292 sync += pdecode2.is_svp64_mode.eq(is_svp64_mode)
293 # remember whether this is a prefixed instruction,
294 # so the FSM can readily loop when VL==0
295 sync += is_svp64_mode.eq(svp64.is_svp64_mode)
296 # calculate the address of the following instruction
297 insn_size = Mux(svp64.is_svp64_mode, 8, 4)
298 sync += nia.eq(cur_state.pc + insn_size)
299 with m.If(~svp64.is_svp64_mode):
300 # with no prefix, store the instruction
301 # and hand it directly to the next FSM
302 sync += dec_opcode_o.eq(insn)
303 m.next = "INSN_READY"
304 with m.Else():
305 # fetch the rest of the instruction from memory
306 comb += self.imem.a_pc_i.eq(cur_state.pc + 4)
307 comb += self.imem.a_i_valid.eq(1)
308 comb += self.imem.f_i_valid.eq(1)
309 m.next = "INSN_READ2"
310 else:
311 # not SVP64 - 32-bit only
312 sync += nia.eq(cur_state.pc + 4)
313 sync += dec_opcode_o.eq(insn)
314 m.next = "INSN_READY"
315
316 with m.State("INSN_READ2"):
317 with m.If(self.imem.f_busy_o): # zzz...
318 # busy: stay in wait-read
319 comb += self.imem.a_i_valid.eq(1)
320 comb += self.imem.f_i_valid.eq(1)
321 with m.Else():
322 # not busy: instruction fetched
323 insn = get_insn(self.imem.f_instr_o, cur_state.pc+4)
324 sync += dec_opcode_o.eq(insn)
325 m.next = "INSN_READY"
326 # TODO: probably can start looking at pdecode2.rm_dec
327 # here or maybe even in INSN_READ state, if svp64_mode
328 # detected, in order to trigger - and wait for - the
329 # predicate reading.
330 if self.svp64_en:
331 pmode = pdecode2.rm_dec.predmode
332 """
333 if pmode != SVP64PredMode.ALWAYS.value:
334 fire predicate loading FSM and wait before
335 moving to INSN_READY
336 else:
337 sync += self.srcmask.eq(-1) # set to all 1s
338 sync += self.dstmask.eq(-1) # set to all 1s
339 m.next = "INSN_READY"
340 """
341
342 with m.State("INSN_READY"):
343 # hand over the instruction, to be decoded
344 comb += fetch_insn_o_valid.eq(1)
345 with m.If(fetch_insn_i_ready):
346 m.next = "IDLE"
347
348 # whatever was done above, over-ride it if core reset is held
349 with m.If(self.core_rst):
350 sync += nia.eq(0)
351
352 return m
353
354
355 class TestIssuerInternal(Elaboratable):
356 """TestIssuer - reads instructions from TestMemory and issues them
357
358 efficiency and speed is not the main goal here: functional correctness
359 and code clarity is. optimisations (which almost 100% interfere with
360 easy understanding) come later.
361 """
362
363 def __init__(self, pspec):
364
365 # test is SVP64 is to be enabled
366 self.svp64_en = hasattr(pspec, "svp64") and (pspec.svp64 == True)
367
368 # and if regfiles are reduced
369 self.regreduce_en = (hasattr(pspec, "regreduce") and
370 (pspec.regreduce == True))
371
372 # and if overlap requested
373 self.allow_overlap = (hasattr(pspec, "allow_overlap") and
374 (pspec.allow_overlap == True))
375
376 # JTAG interface. add this right at the start because if it's
377 # added it *modifies* the pspec, by adding enable/disable signals
378 # for parts of the rest of the core
379 self.jtag_en = hasattr(pspec, "debug") and pspec.debug == 'jtag'
380 self.dbg_domain = "sync" # sigh "dbgsunc" too problematic
381 # self.dbg_domain = "dbgsync" # domain for DMI/JTAG clock
382 if self.jtag_en:
383 # XXX MUST keep this up-to-date with litex, and
384 # soc-cocotb-sim, and err.. all needs sorting out, argh
385 subset = ['uart',
386 'mtwi',
387 'eint', 'gpio', 'mspi0',
388 # 'mspi1', - disabled for now
389 # 'pwm', 'sd0', - disabled for now
390 'sdr']
391 self.jtag = JTAG(get_pinspecs(subset=subset),
392 domain=self.dbg_domain)
393 # add signals to pspec to enable/disable icache and dcache
394 # (or data and intstruction wishbone if icache/dcache not included)
395 # https://bugs.libre-soc.org/show_bug.cgi?id=520
396 # TODO: do we actually care if these are not domain-synchronised?
397 # honestly probably not.
398 pspec.wb_icache_en = self.jtag.wb_icache_en
399 pspec.wb_dcache_en = self.jtag.wb_dcache_en
400 self.wb_sram_en = self.jtag.wb_sram_en
401 else:
402 self.wb_sram_en = Const(1)
403
404 # add 4k sram blocks?
405 self.sram4x4k = (hasattr(pspec, "sram4x4kblock") and
406 pspec.sram4x4kblock == True)
407 if self.sram4x4k:
408 self.sram4k = []
409 for i in range(4):
410 self.sram4k.append(SPBlock512W64B8W(name="sram4k_%d" % i,
411 # features={'err'}
412 ))
413
414 # add interrupt controller?
415 self.xics = hasattr(pspec, "xics") and pspec.xics == True
416 if self.xics:
417 self.xics_icp = XICS_ICP()
418 self.xics_ics = XICS_ICS()
419 self.int_level_i = self.xics_ics.int_level_i
420
421 # add GPIO peripheral?
422 self.gpio = hasattr(pspec, "gpio") and pspec.gpio == True
423 if self.gpio:
424 self.simple_gpio = SimpleGPIO()
425 self.gpio_o = self.simple_gpio.gpio_o
426
427 # main instruction core. suitable for prototyping / demo only
428 self.core = core = NonProductionCore(pspec)
429 self.core_rst = ResetSignal("coresync")
430
431 # instruction decoder. goes into Trap Record
432 #pdecode = create_pdecode()
433 self.cur_state = CoreState("cur") # current state (MSR/PC/SVSTATE)
434 self.pdecode2 = PowerDecode2(None, state=self.cur_state,
435 opkls=IssuerDecode2ToOperand,
436 svp64_en=self.svp64_en,
437 regreduce_en=self.regreduce_en)
438 pdecode = self.pdecode2.dec
439
440 if self.svp64_en:
441 self.svp64 = SVP64PrefixDecoder() # for decoding SVP64 prefix
442
443 # Test Instruction memory
444 if hasattr(core, "icache"):
445 # XXX BLECH! use pspec to transfer the I-Cache to ConfigFetchUnit
446 # truly dreadful. needs a huge reorg.
447 pspec.icache = core.icache
448 self.imem = ConfigFetchUnit(pspec).fu
449
450 # DMI interface
451 self.dbg = CoreDebug()
452
453 # instruction go/monitor
454 self.pc_o = Signal(64, reset_less=True)
455 self.pc_i = Data(64, "pc_i") # set "ok" to indicate "please change me"
456 self.svstate_i = Data(64, "svstate_i") # ditto
457 self.core_bigendian_i = Signal() # TODO: set based on MSR.LE
458 self.busy_o = Signal(reset_less=True)
459 self.memerr_o = Signal(reset_less=True)
460
461 # STATE regfile read /write ports for PC, MSR, SVSTATE
462 staterf = self.core.regs.rf['state']
463 self.state_r_pc = staterf.r_ports['cia'] # PC rd
464 self.state_w_pc = staterf.w_ports['d_wr1'] # PC wr
465 self.state_r_sv = staterf.r_ports['sv'] # SVSTATE rd
466 self.state_w_sv = staterf.w_ports['sv'] # SVSTATE wr
467
468 # DMI interface access
469 intrf = self.core.regs.rf['int']
470 crrf = self.core.regs.rf['cr']
471 xerrf = self.core.regs.rf['xer']
472 self.int_r = intrf.r_ports['dmi'] # INT read
473 self.cr_r = crrf.r_ports['full_cr_dbg'] # CR read
474 self.xer_r = xerrf.r_ports['full_xer'] # XER read
475
476 if self.svp64_en:
477 # for predication
478 self.int_pred = intrf.r_ports['pred'] # INT predicate read
479 self.cr_pred = crrf.r_ports['cr_pred'] # CR predicate read
480
481 # hack method of keeping an eye on whether branch/trap set the PC
482 self.state_nia = self.core.regs.rf['state'].w_ports['nia']
483 self.state_nia.wen.name = 'state_nia_wen'
484
485 # pulse to synchronize the simulator at instruction end
486 self.insn_done = Signal()
487
488 # indicate any instruction still outstanding, in execution
489 self.any_busy = Signal()
490
491 if self.svp64_en:
492 # store copies of predicate masks
493 self.srcmask = Signal(64)
494 self.dstmask = Signal(64)
495
496 def fetch_predicate_fsm(self, m,
497 pred_insn_i_valid, pred_insn_o_ready,
498 pred_mask_o_valid, pred_mask_i_ready):
499 """fetch_predicate_fsm - obtains (constructs in the case of CR)
500 src/dest predicate masks
501
502 https://bugs.libre-soc.org/show_bug.cgi?id=617
503 the predicates can be read here, by using IntRegs r_ports['pred']
504 or CRRegs r_ports['pred']. in the case of CRs it will have to
505 be done through multiple reads, extracting one relevant at a time.
506 later, a faster way would be to use the 32-bit-wide CR port but
507 this is more complex decoding, here. equivalent code used in
508 ISACaller is "from openpower.decoder.isa.caller import get_predcr"
509
510 note: this ENTIRE FSM is not to be called when svp64 is disabled
511 """
512 comb = m.d.comb
513 sync = m.d.sync
514 pdecode2 = self.pdecode2
515 rm_dec = pdecode2.rm_dec # SVP64RMModeDecode
516 predmode = rm_dec.predmode
517 srcpred, dstpred = rm_dec.srcpred, rm_dec.dstpred
518 cr_pred, int_pred = self.cr_pred, self.int_pred # read regfiles
519 # get src/dst step, so we can skip already used mask bits
520 cur_state = self.cur_state
521 srcstep = cur_state.svstate.srcstep
522 dststep = cur_state.svstate.dststep
523 cur_vl = cur_state.svstate.vl
524
525 # decode predicates
526 sregread, sinvert, sunary, sall1s = get_predint(m, srcpred, 's')
527 dregread, dinvert, dunary, dall1s = get_predint(m, dstpred, 'd')
528 sidx, scrinvert = get_predcr(m, srcpred, 's')
529 didx, dcrinvert = get_predcr(m, dstpred, 'd')
530
531 # store fetched masks, for either intpred or crpred
532 # when src/dst step is not zero, the skipped mask bits need to be
533 # shifted-out, before actually storing them in src/dest mask
534 new_srcmask = Signal(64, reset_less=True)
535 new_dstmask = Signal(64, reset_less=True)
536
537 with m.FSM(name="fetch_predicate"):
538
539 with m.State("FETCH_PRED_IDLE"):
540 comb += pred_insn_o_ready.eq(1)
541 with m.If(pred_insn_i_valid):
542 with m.If(predmode == SVP64PredMode.INT):
543 # skip fetching destination mask register, when zero
544 with m.If(dall1s):
545 sync += new_dstmask.eq(-1)
546 # directly go to fetch source mask register
547 # guaranteed not to be zero (otherwise predmode
548 # would be SVP64PredMode.ALWAYS, not INT)
549 comb += int_pred.addr.eq(sregread)
550 comb += int_pred.ren.eq(1)
551 m.next = "INT_SRC_READ"
552 # fetch destination predicate register
553 with m.Else():
554 comb += int_pred.addr.eq(dregread)
555 comb += int_pred.ren.eq(1)
556 m.next = "INT_DST_READ"
557 with m.Elif(predmode == SVP64PredMode.CR):
558 # go fetch masks from the CR register file
559 sync += new_srcmask.eq(0)
560 sync += new_dstmask.eq(0)
561 m.next = "CR_READ"
562 with m.Else():
563 sync += self.srcmask.eq(-1)
564 sync += self.dstmask.eq(-1)
565 m.next = "FETCH_PRED_DONE"
566
567 with m.State("INT_DST_READ"):
568 # store destination mask
569 inv = Repl(dinvert, 64)
570 with m.If(dunary):
571 # set selected mask bit for 1<<r3 mode
572 dst_shift = Signal(range(64))
573 comb += dst_shift.eq(self.int_pred.o_data & 0b111111)
574 sync += new_dstmask.eq(1 << dst_shift)
575 with m.Else():
576 # invert mask if requested
577 sync += new_dstmask.eq(self.int_pred.o_data ^ inv)
578 # skip fetching source mask register, when zero
579 with m.If(sall1s):
580 sync += new_srcmask.eq(-1)
581 m.next = "FETCH_PRED_SHIFT_MASK"
582 # fetch source predicate register
583 with m.Else():
584 comb += int_pred.addr.eq(sregread)
585 comb += int_pred.ren.eq(1)
586 m.next = "INT_SRC_READ"
587
588 with m.State("INT_SRC_READ"):
589 # store source mask
590 inv = Repl(sinvert, 64)
591 with m.If(sunary):
592 # set selected mask bit for 1<<r3 mode
593 src_shift = Signal(range(64))
594 comb += src_shift.eq(self.int_pred.o_data & 0b111111)
595 sync += new_srcmask.eq(1 << src_shift)
596 with m.Else():
597 # invert mask if requested
598 sync += new_srcmask.eq(self.int_pred.o_data ^ inv)
599 m.next = "FETCH_PRED_SHIFT_MASK"
600
601 # fetch masks from the CR register file
602 # implements the following loop:
603 # idx, inv = get_predcr(mask)
604 # mask = 0
605 # for cr_idx in range(vl):
606 # cr = crl[cr_idx + SVP64CROffs.CRPred] # takes one cycle
607 # if cr[idx] ^ inv:
608 # mask |= 1 << cr_idx
609 # return mask
610 with m.State("CR_READ"):
611 # CR index to be read, which will be ready by the next cycle
612 cr_idx = Signal.like(cur_vl, reset_less=True)
613 # submit the read operation to the regfile
614 with m.If(cr_idx != cur_vl):
615 # the CR read port is unary ...
616 # ren = 1 << cr_idx
617 # ... in MSB0 convention ...
618 # ren = 1 << (7 - cr_idx)
619 # ... and with an offset:
620 # ren = 1 << (7 - off - cr_idx)
621 idx = SVP64CROffs.CRPred + cr_idx
622 comb += cr_pred.ren.eq(1 << (7 - idx))
623 # signal data valid in the next cycle
624 cr_read = Signal(reset_less=True)
625 sync += cr_read.eq(1)
626 # load the next index
627 sync += cr_idx.eq(cr_idx + 1)
628 with m.Else():
629 # exit on loop end
630 sync += cr_read.eq(0)
631 sync += cr_idx.eq(0)
632 m.next = "FETCH_PRED_SHIFT_MASK"
633 with m.If(cr_read):
634 # compensate for the one cycle delay on the regfile
635 cur_cr_idx = Signal.like(cur_vl)
636 comb += cur_cr_idx.eq(cr_idx - 1)
637 # read the CR field, select the appropriate bit
638 cr_field = Signal(4)
639 scr_bit = Signal()
640 dcr_bit = Signal()
641 comb += cr_field.eq(cr_pred.o_data)
642 comb += scr_bit.eq(cr_field.bit_select(sidx, 1)
643 ^ scrinvert)
644 comb += dcr_bit.eq(cr_field.bit_select(didx, 1)
645 ^ dcrinvert)
646 # set the corresponding mask bit
647 bit_to_set = Signal.like(self.srcmask)
648 comb += bit_to_set.eq(1 << cur_cr_idx)
649 with m.If(scr_bit):
650 sync += new_srcmask.eq(new_srcmask | bit_to_set)
651 with m.If(dcr_bit):
652 sync += new_dstmask.eq(new_dstmask | bit_to_set)
653
654 with m.State("FETCH_PRED_SHIFT_MASK"):
655 # shift-out skipped mask bits
656 sync += self.srcmask.eq(new_srcmask >> srcstep)
657 sync += self.dstmask.eq(new_dstmask >> dststep)
658 m.next = "FETCH_PRED_DONE"
659
660 with m.State("FETCH_PRED_DONE"):
661 comb += pred_mask_o_valid.eq(1)
662 with m.If(pred_mask_i_ready):
663 m.next = "FETCH_PRED_IDLE"
664
665 def issue_fsm(self, m, core, pc_changed, sv_changed, nia,
666 dbg, core_rst, is_svp64_mode,
667 fetch_pc_o_ready, fetch_pc_i_valid,
668 fetch_insn_o_valid, fetch_insn_i_ready,
669 pred_insn_i_valid, pred_insn_o_ready,
670 pred_mask_o_valid, pred_mask_i_ready,
671 exec_insn_i_valid, exec_insn_o_ready,
672 exec_pc_o_valid, exec_pc_i_ready):
673 """issue FSM
674
675 decode / issue FSM. this interacts with the "fetch" FSM
676 through fetch_insn_ready/valid (incoming) and fetch_pc_ready/valid
677 (outgoing). also interacts with the "execute" FSM
678 through exec_insn_ready/valid (outgoing) and exec_pc_ready/valid
679 (incoming).
680 SVP64 RM prefixes have already been set up by the
681 "fetch" phase, so execute is fairly straightforward.
682 """
683
684 comb = m.d.comb
685 sync = m.d.sync
686 pdecode2 = self.pdecode2
687 cur_state = self.cur_state
688
689 # temporaries
690 dec_opcode_i = pdecode2.dec.raw_opcode_in # raw opcode
691
692 # for updating svstate (things like srcstep etc.)
693 update_svstate = Signal() # set this (below) if updating
694 new_svstate = SVSTATERec("new_svstate")
695 comb += new_svstate.eq(cur_state.svstate)
696
697 # precalculate srcstep+1 and dststep+1
698 cur_srcstep = cur_state.svstate.srcstep
699 cur_dststep = cur_state.svstate.dststep
700 next_srcstep = Signal.like(cur_srcstep)
701 next_dststep = Signal.like(cur_dststep)
702 comb += next_srcstep.eq(cur_state.svstate.srcstep+1)
703 comb += next_dststep.eq(cur_state.svstate.dststep+1)
704
705 # note if an exception happened. in a pipelined or OoO design
706 # this needs to be accompanied by "shadowing" (or stalling)
707 exc_happened = self.core.o.exc_happened
708 # also note instruction fetch failed
709 if hasattr(core, "icache"):
710 fetch_failed = core.icache.i_out.fetch_failed
711 else:
712 fetch_failed = Const(0, 1)
713 # set to fault in decoder
714 # update (highest priority) instruction fault
715 rising_fetch_failed = rising_edge(m, fetch_failed)
716 with m.If(rising_fetch_failed):
717 sync += pdecode2.instr_fault.eq(1)
718
719 with m.FSM(name="issue_fsm"):
720
721 # sync with the "fetch" phase which is reading the instruction
722 # at this point, there is no instruction running, that
723 # could inadvertently update the PC.
724 with m.State("ISSUE_START"):
725 # reset instruction fault
726 sync += pdecode2.instr_fault.eq(0)
727 # wait on "core stop" release, before next fetch
728 # need to do this here, in case we are in a VL==0 loop
729 with m.If(~dbg.core_stop_o & ~core_rst):
730 comb += fetch_pc_i_valid.eq(1) # tell fetch to start
731 with m.If(fetch_pc_o_ready): # fetch acknowledged us
732 m.next = "INSN_WAIT"
733 with m.Else():
734 # tell core it's stopped, and acknowledge debug handshake
735 comb += dbg.core_stopped_i.eq(1)
736 # while stopped, allow updating the PC and SVSTATE
737 with m.If(self.pc_i.ok):
738 comb += self.state_w_pc.wen.eq(1 << StateRegs.PC)
739 comb += self.state_w_pc.i_data.eq(self.pc_i.data)
740 sync += pc_changed.eq(1)
741 with m.If(self.svstate_i.ok):
742 comb += new_svstate.eq(self.svstate_i.data)
743 comb += update_svstate.eq(1)
744 sync += sv_changed.eq(1)
745
746 # wait for an instruction to arrive from Fetch
747 with m.State("INSN_WAIT"):
748 if self.allow_overlap:
749 stopping = dbg.stopping_o
750 else:
751 stopping = Const(0)
752 with m.If(stopping):
753 # stopping: jump back to idle
754 m.next = "ISSUE_START"
755 with m.Else():
756 comb += fetch_insn_i_ready.eq(1)
757 with m.If(fetch_insn_o_valid):
758 # loop into ISSUE_START if it's a SVP64 instruction
759 # and VL == 0. this because VL==0 is a for-loop
760 # from 0 to 0 i.e. always, always a NOP.
761 cur_vl = cur_state.svstate.vl
762 with m.If(is_svp64_mode & (cur_vl == 0)):
763 # update the PC before fetching the next instruction
764 # since we are in a VL==0 loop, no instruction was
765 # executed that we could be overwriting
766 comb += self.state_w_pc.wen.eq(1 << StateRegs.PC)
767 comb += self.state_w_pc.i_data.eq(nia)
768 comb += self.insn_done.eq(1)
769 m.next = "ISSUE_START"
770 with m.Else():
771 if self.svp64_en:
772 m.next = "PRED_START" # fetching predicate
773 else:
774 m.next = "DECODE_SV" # skip predication
775
776 with m.State("PRED_START"):
777 comb += pred_insn_i_valid.eq(1) # tell fetch_pred to start
778 with m.If(pred_insn_o_ready): # fetch_pred acknowledged us
779 m.next = "MASK_WAIT"
780
781 with m.State("MASK_WAIT"):
782 comb += pred_mask_i_ready.eq(1) # ready to receive the masks
783 with m.If(pred_mask_o_valid): # predication masks are ready
784 m.next = "PRED_SKIP"
785
786 # skip zeros in predicate
787 with m.State("PRED_SKIP"):
788 with m.If(~is_svp64_mode):
789 m.next = "DECODE_SV" # nothing to do
790 with m.Else():
791 if self.svp64_en:
792 pred_src_zero = pdecode2.rm_dec.pred_sz
793 pred_dst_zero = pdecode2.rm_dec.pred_dz
794
795 # new srcstep, after skipping zeros
796 skip_srcstep = Signal.like(cur_srcstep)
797 # value to be added to the current srcstep
798 src_delta = Signal.like(cur_srcstep)
799 # add leading zeros to srcstep, if not in zero mode
800 with m.If(~pred_src_zero):
801 # priority encoder (count leading zeros)
802 # append guard bit, in case the mask is all zeros
803 pri_enc_src = PriorityEncoder(65)
804 m.submodules.pri_enc_src = pri_enc_src
805 comb += pri_enc_src.i.eq(Cat(self.srcmask,
806 Const(1, 1)))
807 comb += src_delta.eq(pri_enc_src.o)
808 # apply delta to srcstep
809 comb += skip_srcstep.eq(cur_srcstep + src_delta)
810 # shift-out all leading zeros from the mask
811 # plus the leading "one" bit
812 # TODO count leading zeros and shift-out the zero
813 # bits, in the same step, in hardware
814 sync += self.srcmask.eq(self.srcmask >> (src_delta+1))
815
816 # same as above, but for dststep
817 skip_dststep = Signal.like(cur_dststep)
818 dst_delta = Signal.like(cur_dststep)
819 with m.If(~pred_dst_zero):
820 pri_enc_dst = PriorityEncoder(65)
821 m.submodules.pri_enc_dst = pri_enc_dst
822 comb += pri_enc_dst.i.eq(Cat(self.dstmask,
823 Const(1, 1)))
824 comb += dst_delta.eq(pri_enc_dst.o)
825 comb += skip_dststep.eq(cur_dststep + dst_delta)
826 sync += self.dstmask.eq(self.dstmask >> (dst_delta+1))
827
828 # TODO: initialize mask[VL]=1 to avoid passing past VL
829 with m.If((skip_srcstep >= cur_vl) |
830 (skip_dststep >= cur_vl)):
831 # end of VL loop. Update PC and reset src/dst step
832 comb += self.state_w_pc.wen.eq(1 << StateRegs.PC)
833 comb += self.state_w_pc.i_data.eq(nia)
834 comb += new_svstate.srcstep.eq(0)
835 comb += new_svstate.dststep.eq(0)
836 comb += update_svstate.eq(1)
837 # synchronize with the simulator
838 comb += self.insn_done.eq(1)
839 # go back to Issue
840 m.next = "ISSUE_START"
841 with m.Else():
842 # update new src/dst step
843 comb += new_svstate.srcstep.eq(skip_srcstep)
844 comb += new_svstate.dststep.eq(skip_dststep)
845 comb += update_svstate.eq(1)
846 # proceed to Decode
847 m.next = "DECODE_SV"
848
849 # pass predicate mask bits through to satellite decoders
850 # TODO: for SIMD this will be *multiple* bits
851 sync += core.i.sv_pred_sm.eq(self.srcmask[0])
852 sync += core.i.sv_pred_dm.eq(self.dstmask[0])
853
854 # after src/dst step have been updated, we are ready
855 # to decode the instruction
856 with m.State("DECODE_SV"):
857 # decode the instruction
858 sync += core.i.e.eq(pdecode2.e)
859 sync += core.i.state.eq(cur_state)
860 sync += core.i.raw_insn_i.eq(dec_opcode_i)
861 sync += core.i.bigendian_i.eq(self.core_bigendian_i)
862 if self.svp64_en:
863 sync += core.i.sv_rm.eq(pdecode2.sv_rm)
864 # set RA_OR_ZERO detection in satellite decoders
865 sync += core.i.sv_a_nz.eq(pdecode2.sv_a_nz)
866 # and svp64 detection
867 sync += core.i.is_svp64_mode.eq(is_svp64_mode)
868 # and svp64 bit-rev'd ldst mode
869 ldst_dec = pdecode2.use_svp64_ldst_dec
870 sync += core.i.use_svp64_ldst_dec.eq(ldst_dec)
871 # after decoding, reset any previous exception condition,
872 # allowing it to be set again during the next execution
873 sync += pdecode2.ldst_exc.eq(0)
874
875 m.next = "INSN_EXECUTE" # move to "execute"
876
877 # handshake with execution FSM, move to "wait" once acknowledged
878 with m.State("INSN_EXECUTE"):
879 comb += exec_insn_i_valid.eq(1) # trigger execute
880 with m.If(exec_insn_o_ready): # execute acknowledged us
881 m.next = "EXECUTE_WAIT"
882
883 with m.State("EXECUTE_WAIT"):
884 # wait on "core stop" release, at instruction end
885 # need to do this here, in case we are in a VL>1 loop
886 with m.If(~dbg.core_stop_o & ~core_rst):
887 comb += exec_pc_i_ready.eq(1)
888 # see https://bugs.libre-soc.org/show_bug.cgi?id=636
889 # the exception info needs to be blatted into
890 # pdecode.ldst_exc, and the instruction "re-run".
891 # when ldst_exc.happened is set, the PowerDecoder2
892 # reacts very differently: it re-writes the instruction
893 # with a "trap" (calls PowerDecoder2.trap()) which
894 # will *overwrite* whatever was requested and jump the
895 # PC to the exception address, as well as alter MSR.
896 # nothing else needs to be done other than to note
897 # the change of PC and MSR (and, later, SVSTATE)
898 with m.If(exc_happened):
899 mmu = core.fus.get_exc("mmu0")
900 ldst = core.fus.get_exc("ldst0")
901 with m.If(fetch_failed):
902 # instruction fetch: exception is from MMU
903 # reset instr_fault (highest priority)
904 sync += pdecode2.ldst_exc.eq(mmu)
905 sync += pdecode2.instr_fault.eq(0)
906 with m.Else():
907 # otherwise assume it was a LDST exception
908 sync += pdecode2.ldst_exc.eq(ldst)
909
910 with m.If(exec_pc_o_valid):
911
912 # was this the last loop iteration?
913 is_last = Signal()
914 cur_vl = cur_state.svstate.vl
915 comb += is_last.eq(next_srcstep == cur_vl)
916
917 # return directly to Decode if Execute generated an
918 # exception.
919 with m.If(pdecode2.ldst_exc.happened):
920 m.next = "DECODE_SV"
921
922 # if either PC or SVSTATE were changed by the previous
923 # instruction, go directly back to Fetch, without
924 # updating either PC or SVSTATE
925 with m.Elif(pc_changed | sv_changed):
926 m.next = "ISSUE_START"
927
928 # also return to Fetch, when no output was a vector
929 # (regardless of SRCSTEP and VL), or when the last
930 # instruction was really the last one of the VL loop
931 with m.Elif((~pdecode2.loop_continue) | is_last):
932 # before going back to fetch, update the PC state
933 # register with the NIA.
934 # ok here we are not reading the branch unit.
935 # TODO: this just blithely overwrites whatever
936 # pipeline updated the PC
937 comb += self.state_w_pc.wen.eq(1 << StateRegs.PC)
938 comb += self.state_w_pc.i_data.eq(nia)
939 # reset SRCSTEP before returning to Fetch
940 if self.svp64_en:
941 with m.If(pdecode2.loop_continue):
942 comb += new_svstate.srcstep.eq(0)
943 comb += new_svstate.dststep.eq(0)
944 comb += update_svstate.eq(1)
945 else:
946 comb += new_svstate.srcstep.eq(0)
947 comb += new_svstate.dststep.eq(0)
948 comb += update_svstate.eq(1)
949 m.next = "ISSUE_START"
950
951 # returning to Execute? then, first update SRCSTEP
952 with m.Else():
953 comb += new_svstate.srcstep.eq(next_srcstep)
954 comb += new_svstate.dststep.eq(next_dststep)
955 comb += update_svstate.eq(1)
956 # return to mask skip loop
957 m.next = "PRED_SKIP"
958
959 with m.Else():
960 comb += dbg.core_stopped_i.eq(1)
961 # while stopped, allow updating the PC and SVSTATE
962 with m.If(self.pc_i.ok):
963 comb += self.state_w_pc.wen.eq(1 << StateRegs.PC)
964 comb += self.state_w_pc.i_data.eq(self.pc_i.data)
965 sync += pc_changed.eq(1)
966 with m.If(self.svstate_i.ok):
967 comb += new_svstate.eq(self.svstate_i.data)
968 comb += update_svstate.eq(1)
969 sync += sv_changed.eq(1)
970
971 # check if svstate needs updating: if so, write it to State Regfile
972 with m.If(update_svstate):
973 comb += self.state_w_sv.wen.eq(1 << StateRegs.SVSTATE)
974 comb += self.state_w_sv.i_data.eq(new_svstate)
975 sync += cur_state.svstate.eq(new_svstate) # for next clock
976
977 def execute_fsm(self, m, core, pc_changed, sv_changed,
978 exec_insn_i_valid, exec_insn_o_ready,
979 exec_pc_o_valid, exec_pc_i_ready):
980 """execute FSM
981
982 execute FSM. this interacts with the "issue" FSM
983 through exec_insn_ready/valid (incoming) and exec_pc_ready/valid
984 (outgoing). SVP64 RM prefixes have already been set up by the
985 "issue" phase, so execute is fairly straightforward.
986 """
987
988 comb = m.d.comb
989 sync = m.d.sync
990 pdecode2 = self.pdecode2
991
992 # temporaries
993 core_busy_o = core.n.o_data.busy_o # core is busy
994 core_ivalid_i = core.p.i_valid # instruction is valid
995
996 with m.FSM(name="exec_fsm"):
997
998 # waiting for instruction bus (stays there until not busy)
999 with m.State("INSN_START"):
1000 comb += exec_insn_o_ready.eq(1)
1001 with m.If(exec_insn_i_valid):
1002 comb += core_ivalid_i.eq(1) # instruction is valid/issued
1003 sync += sv_changed.eq(0)
1004 sync += pc_changed.eq(0)
1005 with m.If(core.p.o_ready): # only move if accepted
1006 m.next = "INSN_ACTIVE" # move to "wait completion"
1007
1008 # instruction started: must wait till it finishes
1009 with m.State("INSN_ACTIVE"):
1010 # note changes to PC and SVSTATE
1011 with m.If(self.state_nia.wen & (1 << StateRegs.SVSTATE)):
1012 sync += sv_changed.eq(1)
1013 with m.If(self.state_nia.wen & (1 << StateRegs.PC)):
1014 sync += pc_changed.eq(1)
1015 with m.If(~core_busy_o): # instruction done!
1016 comb += exec_pc_o_valid.eq(1)
1017 with m.If(exec_pc_i_ready):
1018 # when finished, indicate "done".
1019 # however, if there was an exception, the instruction
1020 # is *not* yet done. this is an implementation
1021 # detail: we choose to implement exceptions by
1022 # taking the exception information from the LDST
1023 # unit, putting that *back* into the PowerDecoder2,
1024 # and *re-running the entire instruction*.
1025 # if we erroneously indicate "done" here, it is as if
1026 # there were *TWO* instructions:
1027 # 1) the failed LDST 2) a TRAP.
1028 with m.If(~pdecode2.ldst_exc.happened):
1029 comb += self.insn_done.eq(1)
1030 m.next = "INSN_START" # back to fetch
1031
1032 def setup_peripherals(self, m):
1033 comb, sync = m.d.comb, m.d.sync
1034
1035 # okaaaay so the debug module must be in coresync clock domain
1036 # but NOT its reset signal. to cope with this, set every single
1037 # submodule explicitly in coresync domain, debug and JTAG
1038 # in their own one but using *external* reset.
1039 csd = DomainRenamer("coresync")
1040 dbd = DomainRenamer(self.dbg_domain)
1041
1042 m.submodules.core = core = csd(self.core)
1043 # this _so_ needs sorting out. ICache is added down inside
1044 # LoadStore1 and is already a submodule of LoadStore1
1045 if not isinstance(self.imem, ICache):
1046 m.submodules.imem = imem = csd(self.imem)
1047 m.submodules.dbg = dbg = dbd(self.dbg)
1048 if self.jtag_en:
1049 m.submodules.jtag = jtag = dbd(self.jtag)
1050 # TODO: UART2GDB mux, here, from external pin
1051 # see https://bugs.libre-soc.org/show_bug.cgi?id=499
1052 sync += dbg.dmi.connect_to(jtag.dmi)
1053
1054 cur_state = self.cur_state
1055
1056 # 4x 4k SRAM blocks. these simply "exist", they get routed in litex
1057 if self.sram4x4k:
1058 for i, sram in enumerate(self.sram4k):
1059 m.submodules["sram4k_%d" % i] = csd(sram)
1060 comb += sram.enable.eq(self.wb_sram_en)
1061
1062 # XICS interrupt handler
1063 if self.xics:
1064 m.submodules.xics_icp = icp = csd(self.xics_icp)
1065 m.submodules.xics_ics = ics = csd(self.xics_ics)
1066 comb += icp.ics_i.eq(ics.icp_o) # connect ICS to ICP
1067 sync += cur_state.eint.eq(icp.core_irq_o) # connect ICP to core
1068
1069 # GPIO test peripheral
1070 if self.gpio:
1071 m.submodules.simple_gpio = simple_gpio = csd(self.simple_gpio)
1072
1073 # connect one GPIO output to ICS bit 15 (like in microwatt soc.vhdl)
1074 # XXX causes litex ECP5 test to get wrong idea about input and output
1075 # (but works with verilator sim *sigh*)
1076 # if self.gpio and self.xics:
1077 # comb += self.int_level_i[15].eq(simple_gpio.gpio_o[0])
1078
1079 # instruction decoder
1080 pdecode = create_pdecode()
1081 m.submodules.dec2 = pdecode2 = csd(self.pdecode2)
1082 if self.svp64_en:
1083 m.submodules.svp64 = svp64 = csd(self.svp64)
1084
1085 # convenience
1086 dmi, d_reg, d_cr, d_xer, = dbg.dmi, dbg.d_gpr, dbg.d_cr, dbg.d_xer
1087 intrf = self.core.regs.rf['int']
1088
1089 # clock delay power-on reset
1090 cd_por = ClockDomain(reset_less=True)
1091 cd_sync = ClockDomain()
1092 core_sync = ClockDomain("coresync")
1093 m.domains += cd_por, cd_sync, core_sync
1094 if self.dbg_domain != "sync":
1095 dbg_sync = ClockDomain(self.dbg_domain)
1096 m.domains += dbg_sync
1097
1098 ti_rst = Signal(reset_less=True)
1099 delay = Signal(range(4), reset=3)
1100 with m.If(delay != 0):
1101 m.d.por += delay.eq(delay - 1)
1102 comb += cd_por.clk.eq(ClockSignal())
1103
1104 # power-on reset delay
1105 core_rst = ResetSignal("coresync")
1106 comb += ti_rst.eq(delay != 0 | dbg.core_rst_o | ResetSignal())
1107 comb += core_rst.eq(ti_rst)
1108
1109 # debug clock is same as coresync, but reset is *main external*
1110 if self.dbg_domain != "sync":
1111 dbg_rst = ResetSignal(self.dbg_domain)
1112 comb += dbg_rst.eq(ResetSignal())
1113
1114 # busy/halted signals from core
1115 core_busy_o = ~core.p.o_ready | core.n.o_data.busy_o # core is busy
1116 comb += self.busy_o.eq(core_busy_o)
1117 comb += pdecode2.dec.bigendian.eq(self.core_bigendian_i)
1118
1119 # temporary hack: says "go" immediately for both address gen and ST
1120 l0 = core.l0
1121 ldst = core.fus.fus['ldst0']
1122 st_go_edge = rising_edge(m, ldst.st.rel_o)
1123 # link addr-go direct to rel
1124 m.d.comb += ldst.ad.go_i.eq(ldst.ad.rel_o)
1125 m.d.comb += ldst.st.go_i.eq(st_go_edge) # link store-go to rising rel
1126
1127 def elaborate(self, platform):
1128 m = Module()
1129 # convenience
1130 comb, sync = m.d.comb, m.d.sync
1131 cur_state = self.cur_state
1132 pdecode2 = self.pdecode2
1133 dbg = self.dbg
1134 core = self.core
1135
1136 # set up peripherals and core
1137 core_rst = self.core_rst
1138 self.setup_peripherals(m)
1139
1140 # reset current state if core reset requested
1141 with m.If(core_rst):
1142 m.d.sync += self.cur_state.eq(0)
1143
1144 # PC and instruction from I-Memory
1145 comb += self.pc_o.eq(cur_state.pc)
1146 pc_changed = Signal() # note write to PC
1147 sv_changed = Signal() # note write to SVSTATE
1148
1149 # indicate to outside world if any FU is still executing
1150 comb += self.any_busy.eq(core.n.o_data.any_busy_o) # any FU executing
1151
1152 # read state either from incoming override or from regfile
1153 # TODO: really should be doing MSR in the same way
1154 pc = state_get(m, core_rst, self.pc_i,
1155 "pc", # read PC
1156 self.state_r_pc, StateRegs.PC)
1157 svstate = state_get(m, core_rst, self.svstate_i,
1158 "svstate", # read SVSTATE
1159 self.state_r_sv, StateRegs.SVSTATE)
1160
1161 # don't write pc every cycle
1162 comb += self.state_w_pc.wen.eq(0)
1163 comb += self.state_w_pc.i_data.eq(0)
1164
1165 # address of the next instruction, in the absence of a branch
1166 # depends on the instruction size
1167 nia = Signal(64)
1168
1169 # connect up debug signals
1170 # TODO comb += core.icache_rst_i.eq(dbg.icache_rst_o)
1171 comb += dbg.terminate_i.eq(core.o.core_terminate_o)
1172 comb += dbg.state.pc.eq(pc)
1173 comb += dbg.state.svstate.eq(svstate)
1174 comb += dbg.state.msr.eq(cur_state.msr)
1175
1176 # pass the prefix mode from Fetch to Issue, so the latter can loop
1177 # on VL==0
1178 is_svp64_mode = Signal()
1179
1180 # there are *THREE^WFOUR-if-SVP64-enabled* FSMs, fetch (32/64-bit)
1181 # issue, decode/execute, now joined by "Predicate fetch/calculate".
1182 # these are the handshake signals between each
1183
1184 # fetch FSM can run as soon as the PC is valid
1185 fetch_pc_i_valid = Signal() # Execute tells Fetch "start next read"
1186 fetch_pc_o_ready = Signal() # Fetch Tells SVSTATE "proceed"
1187
1188 # fetch FSM hands over the instruction to be decoded / issued
1189 fetch_insn_o_valid = Signal()
1190 fetch_insn_i_ready = Signal()
1191
1192 # predicate fetch FSM decodes and fetches the predicate
1193 pred_insn_i_valid = Signal()
1194 pred_insn_o_ready = Signal()
1195
1196 # predicate fetch FSM delivers the masks
1197 pred_mask_o_valid = Signal()
1198 pred_mask_i_ready = Signal()
1199
1200 # issue FSM delivers the instruction to the be executed
1201 exec_insn_i_valid = Signal()
1202 exec_insn_o_ready = Signal()
1203
1204 # execute FSM, hands over the PC/SVSTATE back to the issue FSM
1205 exec_pc_o_valid = Signal()
1206 exec_pc_i_ready = Signal()
1207
1208 # the FSMs here are perhaps unusual in that they detect conditions
1209 # then "hold" information, combinatorially, for the core
1210 # (as opposed to using sync - which would be on a clock's delay)
1211 # this includes the actual opcode, valid flags and so on.
1212
1213 # Fetch, then predicate fetch, then Issue, then Execute.
1214 # Issue is where the VL for-loop # lives. the ready/valid
1215 # signalling is used to communicate between the four.
1216
1217 # set up Fetch FSM
1218 fetch = FetchFSM(self.allow_overlap, self.svp64_en,
1219 self.imem, core_rst, pdecode2, cur_state,
1220 dbg, core, svstate, nia, is_svp64_mode)
1221 m.submodules.fetch = fetch
1222 # connect up in/out data to existing Signals
1223 comb += fetch.p.i_data.pc.eq(pc)
1224 # and the ready/valid signalling
1225 comb += fetch_pc_o_ready.eq(fetch.p.o_ready)
1226 comb += fetch.p.i_valid.eq(fetch_pc_i_valid)
1227 comb += fetch_insn_o_valid.eq(fetch.n.o_valid)
1228 comb += fetch.n.i_ready.eq(fetch_insn_i_ready)
1229
1230 self.issue_fsm(m, core, pc_changed, sv_changed, nia,
1231 dbg, core_rst, is_svp64_mode,
1232 fetch_pc_o_ready, fetch_pc_i_valid,
1233 fetch_insn_o_valid, fetch_insn_i_ready,
1234 pred_insn_i_valid, pred_insn_o_ready,
1235 pred_mask_o_valid, pred_mask_i_ready,
1236 exec_insn_i_valid, exec_insn_o_ready,
1237 exec_pc_o_valid, exec_pc_i_ready)
1238
1239 if self.svp64_en:
1240 self.fetch_predicate_fsm(m,
1241 pred_insn_i_valid, pred_insn_o_ready,
1242 pred_mask_o_valid, pred_mask_i_ready)
1243
1244 self.execute_fsm(m, core, pc_changed, sv_changed,
1245 exec_insn_i_valid, exec_insn_o_ready,
1246 exec_pc_o_valid, exec_pc_i_ready)
1247
1248 # this bit doesn't have to be in the FSM: connect up to read
1249 # regfiles on demand from DMI
1250 self.do_dmi(m, dbg)
1251
1252 # DEC and TB inc/dec FSM. copy of DEC is put into CoreState,
1253 # (which uses that in PowerDecoder2 to raise 0x900 exception)
1254 self.tb_dec_fsm(m, cur_state.dec)
1255
1256 return m
1257
1258 def do_dmi(self, m, dbg):
1259 """deals with DMI debug requests
1260
1261 currently only provides read requests for the INT regfile, CR and XER
1262 it will later also deal with *writing* to these regfiles.
1263 """
1264 comb = m.d.comb
1265 sync = m.d.sync
1266 dmi, d_reg, d_cr, d_xer, = dbg.dmi, dbg.d_gpr, dbg.d_cr, dbg.d_xer
1267 intrf = self.core.regs.rf['int']
1268
1269 with m.If(d_reg.req): # request for regfile access being made
1270 # TODO: error-check this
1271 # XXX should this be combinatorial? sync better?
1272 if intrf.unary:
1273 comb += self.int_r.ren.eq(1 << d_reg.addr)
1274 else:
1275 comb += self.int_r.addr.eq(d_reg.addr)
1276 comb += self.int_r.ren.eq(1)
1277 d_reg_delay = Signal()
1278 sync += d_reg_delay.eq(d_reg.req)
1279 with m.If(d_reg_delay):
1280 # data arrives one clock later
1281 comb += d_reg.data.eq(self.int_r.o_data)
1282 comb += d_reg.ack.eq(1)
1283
1284 # sigh same thing for CR debug
1285 with m.If(d_cr.req): # request for regfile access being made
1286 comb += self.cr_r.ren.eq(0b11111111) # enable all
1287 d_cr_delay = Signal()
1288 sync += d_cr_delay.eq(d_cr.req)
1289 with m.If(d_cr_delay):
1290 # data arrives one clock later
1291 comb += d_cr.data.eq(self.cr_r.o_data)
1292 comb += d_cr.ack.eq(1)
1293
1294 # aaand XER...
1295 with m.If(d_xer.req): # request for regfile access being made
1296 comb += self.xer_r.ren.eq(0b111111) # enable all
1297 d_xer_delay = Signal()
1298 sync += d_xer_delay.eq(d_xer.req)
1299 with m.If(d_xer_delay):
1300 # data arrives one clock later
1301 comb += d_xer.data.eq(self.xer_r.o_data)
1302 comb += d_xer.ack.eq(1)
1303
1304 def tb_dec_fsm(self, m, spr_dec):
1305 """tb_dec_fsm
1306
1307 this is a FSM for updating either dec or tb. it runs alternately
1308 DEC, TB, DEC, TB. note that SPR pipeline could have written a new
1309 value to DEC, however the regfile has "passthrough" on it so this
1310 *should* be ok.
1311
1312 see v3.0B p1097-1099 for Timeer Resource and p1065 and p1076
1313 """
1314
1315 comb, sync = m.d.comb, m.d.sync
1316 fast_rf = self.core.regs.rf['fast']
1317 fast_r_dectb = fast_rf.r_ports['issue'] # DEC/TB
1318 fast_w_dectb = fast_rf.w_ports['issue'] # DEC/TB
1319
1320 with m.FSM() as fsm:
1321
1322 # initiates read of current DEC
1323 with m.State("DEC_READ"):
1324 comb += fast_r_dectb.addr.eq(FastRegs.DEC)
1325 comb += fast_r_dectb.ren.eq(1)
1326 m.next = "DEC_WRITE"
1327
1328 # waits for DEC read to arrive (1 cycle), updates with new value
1329 with m.State("DEC_WRITE"):
1330 new_dec = Signal(64)
1331 # TODO: MSR.LPCR 32-bit decrement mode
1332 comb += new_dec.eq(fast_r_dectb.o_data - 1)
1333 comb += fast_w_dectb.addr.eq(FastRegs.DEC)
1334 comb += fast_w_dectb.wen.eq(1)
1335 comb += fast_w_dectb.i_data.eq(new_dec)
1336 sync += spr_dec.eq(new_dec) # copy into cur_state for decoder
1337 m.next = "TB_READ"
1338
1339 # initiates read of current TB
1340 with m.State("TB_READ"):
1341 comb += fast_r_dectb.addr.eq(FastRegs.TB)
1342 comb += fast_r_dectb.ren.eq(1)
1343 m.next = "TB_WRITE"
1344
1345 # waits for read TB to arrive, initiates write of current TB
1346 with m.State("TB_WRITE"):
1347 new_tb = Signal(64)
1348 comb += new_tb.eq(fast_r_dectb.o_data + 1)
1349 comb += fast_w_dectb.addr.eq(FastRegs.TB)
1350 comb += fast_w_dectb.wen.eq(1)
1351 comb += fast_w_dectb.i_data.eq(new_tb)
1352 m.next = "DEC_READ"
1353
1354 return m
1355
1356 def __iter__(self):
1357 yield from self.pc_i.ports()
1358 yield self.pc_o
1359 yield self.memerr_o
1360 yield from self.core.ports()
1361 yield from self.imem.ports()
1362 yield self.core_bigendian_i
1363 yield self.busy_o
1364
1365 def ports(self):
1366 return list(self)
1367
1368 def external_ports(self):
1369 ports = self.pc_i.ports()
1370 ports += [self.pc_o, self.memerr_o, self.core_bigendian_i, self.busy_o,
1371 ]
1372
1373 if self.jtag_en:
1374 ports += list(self.jtag.external_ports())
1375 else:
1376 # don't add DMI if JTAG is enabled
1377 ports += list(self.dbg.dmi.ports())
1378
1379 ports += list(self.imem.ibus.fields.values())
1380 ports += list(self.core.l0.cmpi.wb_bus().fields.values())
1381
1382 if self.sram4x4k:
1383 for sram in self.sram4k:
1384 ports += list(sram.bus.fields.values())
1385
1386 if self.xics:
1387 ports += list(self.xics_icp.bus.fields.values())
1388 ports += list(self.xics_ics.bus.fields.values())
1389 ports.append(self.int_level_i)
1390
1391 if self.gpio:
1392 ports += list(self.simple_gpio.bus.fields.values())
1393 ports.append(self.gpio_o)
1394
1395 return ports
1396
1397 def ports(self):
1398 return list(self)
1399
1400
1401 class TestIssuer(Elaboratable):
1402 def __init__(self, pspec):
1403 self.ti = TestIssuerInternal(pspec)
1404 self.pll = DummyPLL(instance=True)
1405
1406 # PLL direct clock or not
1407 self.pll_en = hasattr(pspec, "use_pll") and pspec.use_pll
1408 if self.pll_en:
1409 self.pll_test_o = Signal(reset_less=True)
1410 self.pll_vco_o = Signal(reset_less=True)
1411 self.clk_sel_i = Signal(2, reset_less=True)
1412 self.ref_clk = ClockSignal() # can't rename it but that's ok
1413 self.pllclk_clk = ClockSignal("pllclk")
1414
1415 def elaborate(self, platform):
1416 m = Module()
1417 comb = m.d.comb
1418
1419 # TestIssuer nominally runs at main clock, actually it is
1420 # all combinatorial internally except for coresync'd components
1421 m.submodules.ti = ti = self.ti
1422
1423 if self.pll_en:
1424 # ClockSelect runs at PLL output internal clock rate
1425 m.submodules.wrappll = pll = self.pll
1426
1427 # add clock domains from PLL
1428 cd_pll = ClockDomain("pllclk")
1429 m.domains += cd_pll
1430
1431 # PLL clock established. has the side-effect of running clklsel
1432 # at the PLL's speed (see DomainRenamer("pllclk") above)
1433 pllclk = self.pllclk_clk
1434 comb += pllclk.eq(pll.clk_pll_o)
1435
1436 # wire up external 24mhz to PLL
1437 #comb += pll.clk_24_i.eq(self.ref_clk)
1438 # output 18 mhz PLL test signal, and analog oscillator out
1439 comb += self.pll_test_o.eq(pll.pll_test_o)
1440 comb += self.pll_vco_o.eq(pll.pll_vco_o)
1441
1442 # input to pll clock selection
1443 comb += pll.clk_sel_i.eq(self.clk_sel_i)
1444
1445 # now wire up ResetSignals. don't mind them being in this domain
1446 pll_rst = ResetSignal("pllclk")
1447 comb += pll_rst.eq(ResetSignal())
1448
1449 # internal clock is set to selector clock-out. has the side-effect of
1450 # running TestIssuer at this speed (see DomainRenamer("intclk") above)
1451 # debug clock runs at coresync internal clock
1452 cd_coresync = ClockDomain("coresync")
1453 #m.domains += cd_coresync
1454 if self.ti.dbg_domain != 'sync':
1455 cd_dbgsync = ClockDomain("dbgsync")
1456 #m.domains += cd_dbgsync
1457 intclk = ClockSignal("coresync")
1458 dbgclk = ClockSignal(self.ti.dbg_domain)
1459 # XXX BYPASS PLL XXX
1460 # XXX BYPASS PLL XXX
1461 # XXX BYPASS PLL XXX
1462 if self.pll_en:
1463 comb += intclk.eq(self.ref_clk)
1464 else:
1465 comb += intclk.eq(ClockSignal())
1466 if self.ti.dbg_domain != 'sync':
1467 dbgclk = ClockSignal(self.ti.dbg_domain)
1468 comb += dbgclk.eq(intclk)
1469
1470 return m
1471
1472 def ports(self):
1473 return list(self.ti.ports()) + list(self.pll.ports()) + \
1474 [ClockSignal(), ResetSignal()]
1475
1476 def external_ports(self):
1477 ports = self.ti.external_ports()
1478 ports.append(ClockSignal())
1479 ports.append(ResetSignal())
1480 if self.pll_en:
1481 ports.append(self.clk_sel_i)
1482 ports.append(self.pll.clk_24_i)
1483 ports.append(self.pll_test_o)
1484 ports.append(self.pll_vco_o)
1485 ports.append(self.pllclk_clk)
1486 ports.append(self.ref_clk)
1487 return ports
1488
1489
1490 if __name__ == '__main__':
1491 units = {'alu': 1, 'cr': 1, 'branch': 1, 'trap': 1, 'logical': 1,
1492 'spr': 1,
1493 'div': 1,
1494 'mul': 1,
1495 'shiftrot': 1
1496 }
1497 pspec = TestMemPspec(ldst_ifacetype='bare_wb',
1498 imem_ifacetype='bare_wb',
1499 addr_wid=48,
1500 mask_wid=8,
1501 reg_wid=64,
1502 units=units)
1503 dut = TestIssuer(pspec)
1504 vl = main(dut, ports=dut.ports(), name="test_issuer")
1505
1506 if len(sys.argv) == 1:
1507 vl = rtlil.convert(dut, ports=dut.external_ports(), name="test_issuer")
1508 with open("test_issuer.il", "w") as f:
1509 f.write(vl)