Allow the formal engine to perform a same-cycle result in the ALU
[soc.git] / src / soc / simple / issuer.py
1 """simple core issuer
2
3 not in any way intended for production use. this runs a FSM that:
4
5 * reads the Program Counter from StateRegs
6 * reads an instruction from a fixed-size Test Memory
7 * issues it to the Simple Core
8 * waits for it to complete
9 * increments the PC
10 * does it all over again
11
12 the purpose of this module is to verify the functional correctness
13 of the Function Units in the absolute simplest and clearest possible
14 way, and to at provide something that can be further incrementally
15 improved.
16 """
17
18 from nmigen import (Elaboratable, Module, Signal, ClockSignal, ResetSignal,
19 ClockDomain, DomainRenamer)
20 from nmigen.cli import rtlil
21 from nmigen.cli import main
22 import sys
23
24 from soc.decoder.power_decoder import create_pdecode
25 from soc.decoder.power_decoder2 import PowerDecode2
26 from soc.decoder.decode2execute1 import IssuerDecode2ToOperand
27 from soc.decoder.decode2execute1 import Data
28 from soc.experiment.testmem import TestMemory # test only for instructions
29 from soc.regfile.regfiles import StateRegs, FastRegs
30 from soc.simple.core import NonProductionCore
31 from soc.config.test.test_loadstore import TestMemPspec
32 from soc.config.ifetch import ConfigFetchUnit
33 from soc.decoder.power_enums import MicrOp
34 from soc.debug.dmi import CoreDebug, DMIInterface
35 from soc.debug.jtag import JTAG
36 from soc.config.pinouts import get_pinspecs
37 from soc.config.state import CoreState
38 from soc.interrupts.xics import XICS_ICP, XICS_ICS
39 from soc.bus.simple_gpio import SimpleGPIO
40 from soc.clock.select import ClockSelect
41 from soc.clock.dummypll import DummyPLL
42
43
44 from nmutil.util import rising_edge
45
46
47 class TestIssuerInternal(Elaboratable):
48 """TestIssuer - reads instructions from TestMemory and issues them
49
50 efficiency and speed is not the main goal here: functional correctness is.
51 """
52 def __init__(self, pspec):
53
54 # JTAG interface. add this right at the start because if it's
55 # added it *modifies* the pspec, by adding enable/disable signals
56 # for parts of the rest of the core
57 self.jtag_en = hasattr(pspec, "debug") and pspec.debug == 'jtag'
58 if self.jtag_en:
59 subset = {'uart', 'mtwi', 'eint', 'gpio', 'mspi0', 'mspi1',
60 'pwm', 'sd0', 'sdr'}
61 self.jtag = JTAG(get_pinspecs(subset=subset))
62 # add signals to pspec to enable/disable icache and dcache
63 # (or data and intstruction wishbone if icache/dcache not included)
64 # https://bugs.libre-soc.org/show_bug.cgi?id=520
65 # TODO: do we actually care if these are not domain-synchronised?
66 # honestly probably not.
67 pspec.wb_icache_en = self.jtag.wb_icache_en
68 pspec.wb_dcache_en = self.jtag.wb_dcache_en
69
70 # add interrupt controller?
71 self.xics = hasattr(pspec, "xics") and pspec.xics == True
72 if self.xics:
73 self.xics_icp = XICS_ICP()
74 self.xics_ics = XICS_ICS()
75 self.int_level_i = self.xics_ics.int_level_i
76
77 # add GPIO peripheral?
78 self.gpio = hasattr(pspec, "gpio") and pspec.gpio == True
79 if self.gpio:
80 self.simple_gpio = SimpleGPIO()
81 self.gpio_o = self.simple_gpio.gpio_o
82
83 # main instruction core25
84 self.core = core = NonProductionCore(pspec)
85
86 # instruction decoder. goes into Trap Record
87 pdecode = create_pdecode()
88 self.cur_state = CoreState("cur") # current state (MSR/PC/EINT)
89 self.pdecode2 = PowerDecode2(pdecode, state=self.cur_state,
90 opkls=IssuerDecode2ToOperand)
91
92 # Test Instruction memory
93 self.imem = ConfigFetchUnit(pspec).fu
94 # one-row cache of instruction read
95 self.iline = Signal(64) # one instruction line
96 self.iprev_adr = Signal(64) # previous address: if different, do read
97
98 # DMI interface
99 self.dbg = CoreDebug()
100
101 # instruction go/monitor
102 self.pc_o = Signal(64, reset_less=True)
103 self.pc_i = Data(64, "pc_i") # set "ok" to indicate "please change me"
104 self.core_bigendian_i = Signal()
105 self.busy_o = Signal(reset_less=True)
106 self.memerr_o = Signal(reset_less=True)
107
108 # FAST regfile read /write ports for PC, MSR, DEC/TB
109 staterf = self.core.regs.rf['state']
110 self.state_r_pc = staterf.r_ports['cia'] # PC rd
111 self.state_w_pc = staterf.w_ports['d_wr1'] # PC wr
112 self.state_r_msr = staterf.r_ports['msr'] # MSR rd
113
114 # DMI interface access
115 intrf = self.core.regs.rf['int']
116 crrf = self.core.regs.rf['cr']
117 xerrf = self.core.regs.rf['xer']
118 self.int_r = intrf.r_ports['dmi'] # INT read
119 self.cr_r = crrf.r_ports['full_cr_dbg'] # CR read
120 self.xer_r = xerrf.r_ports['full_xer'] # XER read
121
122 # hack method of keeping an eye on whether branch/trap set the PC
123 self.state_nia = self.core.regs.rf['state'].w_ports['nia']
124 self.state_nia.wen.name = 'state_nia_wen'
125
126 def elaborate(self, platform):
127 m = Module()
128 comb, sync = m.d.comb, m.d.sync
129
130 m.submodules.core = core = DomainRenamer("coresync")(self.core)
131 m.submodules.imem = imem = self.imem
132 m.submodules.dbg = dbg = self.dbg
133 if self.jtag_en:
134 m.submodules.jtag = jtag = self.jtag
135 # TODO: UART2GDB mux, here, from external pin
136 # see https://bugs.libre-soc.org/show_bug.cgi?id=499
137 sync += dbg.dmi.connect_to(jtag.dmi)
138
139 cur_state = self.cur_state
140
141 # XICS interrupt handler
142 if self.xics:
143 m.submodules.xics_icp = icp = self.xics_icp
144 m.submodules.xics_ics = ics = self.xics_ics
145 comb += icp.ics_i.eq(ics.icp_o) # connect ICS to ICP
146 sync += cur_state.eint.eq(icp.core_irq_o) # connect ICP to core
147
148 # GPIO test peripheral
149 if self.gpio:
150 m.submodules.simple_gpio = simple_gpio = self.simple_gpio
151
152 # connect one GPIO output to ICS bit 15 (like in microwatt soc.vhdl)
153 # XXX causes litex ECP5 test to get wrong idea about input and output
154 # (but works with verilator sim *sigh*)
155 #if self.gpio and self.xics:
156 # comb += self.int_level_i[15].eq(simple_gpio.gpio_o[0])
157
158 # instruction decoder
159 pdecode = create_pdecode()
160 m.submodules.dec2 = pdecode2 = self.pdecode2
161
162 # convenience
163 dmi, d_reg, d_cr, d_xer, = dbg.dmi, dbg.d_gpr, dbg.d_cr, dbg.d_xer
164 intrf = self.core.regs.rf['int']
165
166 # clock delay power-on reset
167 cd_por = ClockDomain(reset_less=True)
168 cd_sync = ClockDomain()
169 core_sync = ClockDomain("coresync")
170 m.domains += cd_por, cd_sync, core_sync
171
172 ti_rst = Signal(reset_less=True)
173 delay = Signal(range(4), reset=3)
174 with m.If(delay != 0):
175 m.d.por += delay.eq(delay - 1)
176 comb += cd_por.clk.eq(ClockSignal())
177
178 # power-on reset delay
179 core_rst = ResetSignal("coresync")
180 comb += ti_rst.eq(delay != 0 | dbg.core_rst_o | ResetSignal())
181 comb += core_rst.eq(ti_rst)
182
183 # busy/halted signals from core
184 comb += self.busy_o.eq(core.busy_o)
185 comb += pdecode2.dec.bigendian.eq(self.core_bigendian_i)
186
187 # temporary hack: says "go" immediately for both address gen and ST
188 l0 = core.l0
189 ldst = core.fus.fus['ldst0']
190 st_go_edge = rising_edge(m, ldst.st.rel_o)
191 m.d.comb += ldst.ad.go_i.eq(ldst.ad.rel_o) # link addr-go direct to rel
192 m.d.comb += ldst.st.go_i.eq(st_go_edge) # link store-go to rising rel
193
194 # PC and instruction from I-Memory
195 pc_changed = Signal() # note write to PC
196 comb += self.pc_o.eq(cur_state.pc)
197 ilatch = Signal(32)
198
199 # next instruction (+4 on current)
200 nia = Signal(64, reset_less=True)
201 comb += nia.eq(cur_state.pc + 4)
202
203 # read the PC
204 pc = Signal(64, reset_less=True)
205 pc_ok_delay = Signal()
206 sync += pc_ok_delay.eq(~self.pc_i.ok)
207 with m.If(self.pc_i.ok):
208 # incoming override (start from pc_i)
209 comb += pc.eq(self.pc_i.data)
210 with m.Else():
211 # otherwise read StateRegs regfile for PC...
212 comb += self.state_r_pc.ren.eq(1<<StateRegs.PC)
213 # ... but on a 1-clock delay
214 with m.If(pc_ok_delay):
215 comb += pc.eq(self.state_r_pc.data_o)
216
217 # don't write pc every cycle
218 comb += self.state_w_pc.wen.eq(0)
219 comb += self.state_w_pc.data_i.eq(0)
220
221 # don't read msr every cycle
222 comb += self.state_r_msr.ren.eq(0)
223 msr_read = Signal(reset=1)
224
225 # connect up debug signals
226 # TODO comb += core.icache_rst_i.eq(dbg.icache_rst_o)
227 comb += dbg.terminate_i.eq(core.core_terminate_o)
228 comb += dbg.state.pc.eq(pc)
229 #comb += dbg.state.pc.eq(cur_state.pc)
230 comb += dbg.state.msr.eq(cur_state.msr)
231
232 # temporaries
233 core_busy_o = core.busy_o # core is busy
234 core_ivalid_i = core.ivalid_i # instruction is valid
235 core_issue_i = core.issue_i # instruction is issued
236 dec_opcode_i = pdecode2.dec.raw_opcode_in # raw opcode
237
238 insn_type = core.e.do.insn_type
239
240 # actually use a nmigen FSM for the first time (w00t)
241 # this FSM is perhaps unusual in that it detects conditions
242 # then "holds" information, combinatorially, for the core
243 # (as opposed to using sync - which would be on a clock's delay)
244 # this includes the actual opcode, valid flags and so on.
245 with m.FSM() as fsm:
246
247 # waiting (zzz)
248 with m.State("IDLE"):
249 sync += pc_changed.eq(0)
250 sync += core.e.eq(0)
251 sync += core.raw_insn_i.eq(0)
252 sync += core.bigendian_i.eq(0)
253 with m.If(~dbg.core_stop_o & ~core_rst):
254 # instruction allowed to go: start by reading the PC
255 # capture the PC and also drop it into Insn Memory
256 # we have joined a pair of combinatorial memory
257 # lookups together. this is Generally Bad.
258 comb += self.imem.a_pc_i.eq(pc)
259 comb += self.imem.a_valid_i.eq(1)
260 comb += self.imem.f_valid_i.eq(1)
261 sync += cur_state.pc.eq(pc)
262
263 # initiate read of MSR. arrives one clock later
264 comb += self.state_r_msr.ren.eq(1<<StateRegs.MSR)
265 sync += msr_read.eq(0)
266
267 m.next = "INSN_READ" # move to "wait for bus" phase
268 with m.Else():
269 comb += core.core_stopped_i.eq(1)
270 comb += dbg.core_stopped_i.eq(1)
271
272 # dummy pause to find out why simulation is not keeping up
273 with m.State("INSN_READ"):
274 # one cycle later, msr read arrives. valid only once.
275 with m.If(~msr_read):
276 sync += msr_read.eq(1) # yeah don't read it again
277 sync += cur_state.msr.eq(self.state_r_msr.data_o)
278 with m.If(self.imem.f_busy_o): # zzz...
279 # busy: stay in wait-read
280 comb += self.imem.a_valid_i.eq(1)
281 comb += self.imem.f_valid_i.eq(1)
282 with m.Else():
283 # not busy: instruction fetched
284 f_instr_o = self.imem.f_instr_o
285 if f_instr_o.width == 32:
286 insn = f_instr_o
287 else:
288 insn = f_instr_o.word_select(cur_state.pc[2], 32)
289 comb += dec_opcode_i.eq(insn) # actual opcode
290 sync += core.e.eq(pdecode2.e)
291 sync += core.state.eq(cur_state)
292 sync += core.raw_insn_i.eq(dec_opcode_i)
293 sync += core.bigendian_i.eq(self.core_bigendian_i)
294 sync += ilatch.eq(insn) # latch current insn
295 # also drop PC and MSR into decode "state"
296 m.next = "INSN_START" # move to "start"
297
298 # waiting for instruction bus (stays there until not busy)
299 with m.State("INSN_START"):
300 comb += core_ivalid_i.eq(1) # instruction is valid
301 comb += core_issue_i.eq(1) # and issued
302
303 m.next = "INSN_ACTIVE" # move to "wait completion"
304
305 # instruction started: must wait till it finishes
306 with m.State("INSN_ACTIVE"):
307 with m.If(insn_type != MicrOp.OP_NOP):
308 comb += core_ivalid_i.eq(1) # instruction is valid
309 with m.If(self.state_nia.wen & (1<<StateRegs.PC)):
310 sync += pc_changed.eq(1)
311 with m.If(~core_busy_o): # instruction done!
312 # ok here we are not reading the branch unit. TODO
313 # this just blithely overwrites whatever pipeline
314 # updated the PC
315 with m.If(~pc_changed):
316 comb += self.state_w_pc.wen.eq(1<<StateRegs.PC)
317 comb += self.state_w_pc.data_i.eq(nia)
318 sync += core.e.eq(0)
319 sync += core.raw_insn_i.eq(0)
320 sync += core.bigendian_i.eq(0)
321 m.next = "IDLE" # back to idle
322
323 # this bit doesn't have to be in the FSM: connect up to read
324 # regfiles on demand from DMI
325 with m.If(d_reg.req): # request for regfile access being made
326 # TODO: error-check this
327 # XXX should this be combinatorial? sync better?
328 if intrf.unary:
329 comb += self.int_r.ren.eq(1<<d_reg.addr)
330 else:
331 comb += self.int_r.addr.eq(d_reg.addr)
332 comb += self.int_r.ren.eq(1)
333 d_reg_delay = Signal()
334 sync += d_reg_delay.eq(d_reg.req)
335 with m.If(d_reg_delay):
336 # data arrives one clock later
337 comb += d_reg.data.eq(self.int_r.data_o)
338 comb += d_reg.ack.eq(1)
339
340 # sigh same thing for CR debug
341 with m.If(d_cr.req): # request for regfile access being made
342 comb += self.cr_r.ren.eq(0b11111111) # enable all
343 d_cr_delay = Signal()
344 sync += d_cr_delay.eq(d_cr.req)
345 with m.If(d_cr_delay):
346 # data arrives one clock later
347 comb += d_cr.data.eq(self.cr_r.data_o)
348 comb += d_cr.ack.eq(1)
349
350 # aaand XER...
351 with m.If(d_xer.req): # request for regfile access being made
352 comb += self.xer_r.ren.eq(0b111111) # enable all
353 d_xer_delay = Signal()
354 sync += d_xer_delay.eq(d_xer.req)
355 with m.If(d_xer_delay):
356 # data arrives one clock later
357 comb += d_xer.data.eq(self.xer_r.data_o)
358 comb += d_xer.ack.eq(1)
359
360 # DEC and TB inc/dec FSM
361 self.tb_dec_fsm(m, cur_state.dec)
362
363 return m
364
365 def tb_dec_fsm(self, m, spr_dec):
366 """tb_dec_fsm
367
368 this is a FSM for updating either dec or tb. it runs alternately
369 DEC, TB, DEC, TB. note that SPR pipeline could have written a new
370 value to DEC, however the regfile has "passthrough" on it so this
371 *should* be ok.
372
373 see v3.0B p1097-1099 for Timeer Resource and p1065 and p1076
374 """
375
376 comb, sync = m.d.comb, m.d.sync
377 fast_rf = self.core.regs.rf['fast']
378 fast_r_dectb = fast_rf.r_ports['issue'] # DEC/TB
379 fast_w_dectb = fast_rf.w_ports['issue'] # DEC/TB
380
381 with m.FSM() as fsm:
382
383 # initiates read of current DEC
384 with m.State("DEC_READ"):
385 comb += fast_r_dectb.addr.eq(FastRegs.DEC)
386 comb += fast_r_dectb.ren.eq(1)
387 m.next = "DEC_WRITE"
388
389 # waits for DEC read to arrive (1 cycle), updates with new value
390 with m.State("DEC_WRITE"):
391 new_dec = Signal(64)
392 # TODO: MSR.LPCR 32-bit decrement mode
393 comb += new_dec.eq(fast_r_dectb.data_o - 1)
394 comb += fast_w_dectb.addr.eq(FastRegs.DEC)
395 comb += fast_w_dectb.wen.eq(1)
396 comb += fast_w_dectb.data_i.eq(new_dec)
397 sync += spr_dec.eq(new_dec) # copy into cur_state for decoder
398 m.next = "TB_READ"
399
400 # initiates read of current TB
401 with m.State("TB_READ"):
402 comb += fast_r_dectb.addr.eq(FastRegs.TB)
403 comb += fast_r_dectb.ren.eq(1)
404 m.next = "TB_WRITE"
405
406 # waits for read TB to arrive, initiates write of current TB
407 with m.State("TB_WRITE"):
408 new_tb = Signal(64)
409 comb += new_tb.eq(fast_r_dectb.data_o + 1)
410 comb += fast_w_dectb.addr.eq(FastRegs.TB)
411 comb += fast_w_dectb.wen.eq(1)
412 comb += fast_w_dectb.data_i.eq(new_tb)
413 m.next = "DEC_READ"
414
415 return m
416
417 def __iter__(self):
418 yield from self.pc_i.ports()
419 yield self.pc_o
420 yield self.memerr_o
421 yield from self.core.ports()
422 yield from self.imem.ports()
423 yield self.core_bigendian_i
424 yield self.busy_o
425
426 def ports(self):
427 return list(self)
428
429 def external_ports(self):
430 ports = self.pc_i.ports()
431 ports += [self.pc_o, self.memerr_o, self.core_bigendian_i, self.busy_o,
432 ]
433
434 if self.jtag_en:
435 ports += list(self.jtag.external_ports())
436 else:
437 # don't add DMI if JTAG is enabled
438 ports += list(self.dbg.dmi.ports())
439
440 ports += list(self.imem.ibus.fields.values())
441 ports += list(self.core.l0.cmpi.lsmem.lsi.slavebus.fields.values())
442
443 if self.xics:
444 ports += list(self.xics_icp.bus.fields.values())
445 ports += list(self.xics_ics.bus.fields.values())
446 ports.append(self.int_level_i)
447
448 if self.gpio:
449 ports += list(self.simple_gpio.bus.fields.values())
450 ports.append(self.gpio_o)
451
452 return ports
453
454 def ports(self):
455 return list(self)
456
457
458 class TestIssuer(Elaboratable):
459 def __init__(self, pspec):
460 self.ti = TestIssuerInternal(pspec)
461
462 self.pll = DummyPLL()
463
464 # PLL direct clock or not
465 self.pll_en = hasattr(pspec, "use_pll") and pspec.use_pll
466 if self.pll_en:
467 self.pll_18_o = Signal(reset_less=True)
468
469 def elaborate(self, platform):
470 m = Module()
471 comb = m.d.comb
472
473 # TestIssuer runs at direct clock
474 m.submodules.ti = ti = self.ti
475 cd_int = ClockDomain("coresync")
476
477 if self.pll_en:
478 # ClockSelect runs at PLL output internal clock rate
479 m.submodules.pll = pll = self.pll
480
481 # add clock domains from PLL
482 cd_pll = ClockDomain("pllclk")
483 m.domains += cd_pll
484
485 # PLL clock established. has the side-effect of running clklsel
486 # at the PLL's speed (see DomainRenamer("pllclk") above)
487 pllclk = ClockSignal("pllclk")
488 comb += pllclk.eq(pll.clk_pll_o)
489
490 # wire up external 24mhz to PLL
491 comb += pll.clk_24_i.eq(ClockSignal())
492
493 # output 18 mhz PLL test signal
494 comb += self.pll_18_o.eq(pll.pll_18_o)
495
496 # now wire up ResetSignals. don't mind them being in this domain
497 pll_rst = ResetSignal("pllclk")
498 comb += pll_rst.eq(ResetSignal())
499
500 # internal clock is set to selector clock-out. has the side-effect of
501 # running TestIssuer at this speed (see DomainRenamer("intclk") above)
502 intclk = ClockSignal("coresync")
503 if self.pll_en:
504 comb += intclk.eq(pll.clk_pll_o)
505 else:
506 comb += intclk.eq(ClockSignal())
507
508 return m
509
510 def ports(self):
511 return list(self.ti.ports()) + list(self.pll.ports()) + \
512 [ClockSignal(), ResetSignal()]
513
514 def external_ports(self):
515 ports = self.ti.external_ports()
516 ports.append(ClockSignal())
517 ports.append(ResetSignal())
518 if self.pll_en:
519 ports.append(self.pll.clk_sel_i)
520 ports.append(self.pll_18_o)
521 ports.append(self.pll.pll_lck_o)
522 return ports
523
524
525 if __name__ == '__main__':
526 units = {'alu': 1, 'cr': 1, 'branch': 1, 'trap': 1, 'logical': 1,
527 'spr': 1,
528 'div': 1,
529 'mul': 1,
530 'shiftrot': 1
531 }
532 pspec = TestMemPspec(ldst_ifacetype='bare_wb',
533 imem_ifacetype='bare_wb',
534 addr_wid=48,
535 mask_wid=8,
536 reg_wid=64,
537 units=units)
538 dut = TestIssuer(pspec)
539 vl = main(dut, ports=dut.ports(), name="test_issuer")
540
541 if len(sys.argv) == 1:
542 vl = rtlil.convert(dut, ports=dut.external_ports(), name="test_issuer")
543 with open("test_issuer.il", "w") as f:
544 f.write(vl)