litex sim.py operational
[soc.git] / src / soc / simple / issuer.py
1 """simple core issuer
2
3 not in any way intended for production use. this runs a FSM that:
4
5 * reads the Program Counter from StateRegs
6 * reads an instruction from a fixed-size Test Memory
7 * issues it to the Simple Core
8 * waits for it to complete
9 * increments the PC
10 * does it all over again
11
12 the purpose of this module is to verify the functional correctness
13 of the Function Units in the absolute simplest and clearest possible
14 way, and to at provide something that can be further incrementally
15 improved.
16 """
17
18 from nmigen import (Elaboratable, Module, Signal, ClockSignal, ResetSignal,
19 ClockDomain, DomainRenamer)
20 from nmigen.cli import rtlil
21 from nmigen.cli import main
22 import sys
23
24 from soc.decoder.power_decoder import create_pdecode
25 from soc.decoder.power_decoder2 import PowerDecode2
26 from soc.decoder.decode2execute1 import IssuerDecode2ToOperand
27 from soc.decoder.decode2execute1 import Data
28 from soc.experiment.testmem import TestMemory # test only for instructions
29 from soc.regfile.regfiles import StateRegs, FastRegs
30 from soc.simple.core import NonProductionCore
31 from soc.config.test.test_loadstore import TestMemPspec
32 from soc.config.ifetch import ConfigFetchUnit
33 from soc.decoder.power_enums import MicrOp
34 from soc.debug.dmi import CoreDebug, DMIInterface
35 from soc.debug.jtag import JTAG
36 from soc.config.pinouts import get_pinspecs
37 from soc.config.state import CoreState
38 from soc.interrupts.xics import XICS_ICP, XICS_ICS
39 from soc.bus.simple_gpio import SimpleGPIO
40 from soc.clock.select import ClockSelect, DummyPLL
41
42
43 from nmutil.util import rising_edge
44
45
46 class TestIssuerInternal(Elaboratable):
47 """TestIssuer - reads instructions from TestMemory and issues them
48
49 efficiency and speed is not the main goal here: functional correctness is.
50 """
51 def __init__(self, pspec):
52
53 # add interrupt controller?
54 self.xics = hasattr(pspec, "xics") and pspec.xics == True
55 if self.xics:
56 self.xics_icp = XICS_ICP()
57 self.xics_ics = XICS_ICS()
58 self.int_level_i = self.xics_ics.int_level_i
59
60 # add GPIO peripheral?
61 self.gpio = hasattr(pspec, "gpio") and pspec.gpio == True
62 if self.gpio:
63 self.simple_gpio = SimpleGPIO()
64 self.gpio_o = self.simple_gpio.gpio_o
65
66 # main instruction core25
67 self.core = core = NonProductionCore(pspec)
68
69 # instruction decoder. goes into Trap Record
70 pdecode = create_pdecode()
71 self.cur_state = CoreState("cur") # current state (MSR/PC/EINT)
72 self.pdecode2 = PowerDecode2(pdecode, state=self.cur_state,
73 opkls=IssuerDecode2ToOperand)
74
75 # Test Instruction memory
76 self.imem = ConfigFetchUnit(pspec).fu
77 # one-row cache of instruction read
78 self.iline = Signal(64) # one instruction line
79 self.iprev_adr = Signal(64) # previous address: if different, do read
80
81 # DMI interface
82 self.dbg = CoreDebug()
83
84 # JTAG interface
85 self.jtag_en = hasattr(pspec, "debug") and pspec.debug == 'jtag'
86 if self.jtag_en:
87 subset = {'uart', 'mtwi', 'eint', 'gpio', 'mspi0', 'mspi1',
88 'pwm', 'sd0', 'sdr'}
89 self.jtag = JTAG(get_pinspecs(subset=subset))
90
91 # instruction go/monitor
92 self.pc_o = Signal(64, reset_less=True)
93 self.pc_i = Data(64, "pc_i") # set "ok" to indicate "please change me"
94 self.core_bigendian_i = Signal()
95 self.busy_o = Signal(reset_less=True)
96 self.memerr_o = Signal(reset_less=True)
97
98 # FAST regfile read /write ports for PC, MSR, DEC/TB
99 staterf = self.core.regs.rf['state']
100 self.state_r_pc = staterf.r_ports['cia'] # PC rd
101 self.state_w_pc = staterf.w_ports['d_wr1'] # PC wr
102 self.state_r_msr = staterf.r_ports['msr'] # MSR rd
103
104 # DMI interface access
105 intrf = self.core.regs.rf['int']
106 crrf = self.core.regs.rf['cr']
107 xerrf = self.core.regs.rf['xer']
108 self.int_r = intrf.r_ports['dmi'] # INT read
109 self.cr_r = crrf.r_ports['full_cr_dbg'] # CR read
110 self.xer_r = xerrf.r_ports['full_xer'] # XER read
111
112 # hack method of keeping an eye on whether branch/trap set the PC
113 self.state_nia = self.core.regs.rf['state'].w_ports['nia']
114 self.state_nia.wen.name = 'state_nia_wen'
115
116 def elaborate(self, platform):
117 m = Module()
118 comb, sync = m.d.comb, m.d.sync
119
120 m.submodules.core = core = self.core
121 m.submodules.imem = imem = self.imem
122 m.submodules.dbg = dbg = self.dbg
123 if self.jtag_en:
124 m.submodules.jtag = jtag = self.jtag
125 # TODO: UART2GDB mux, here, from external pin
126 # see https://bugs.libre-soc.org/show_bug.cgi?id=499
127 sync += dbg.dmi.connect_to(jtag.dmi)
128
129 cur_state = self.cur_state
130
131 # XICS interrupt handler
132 if self.xics:
133 m.submodules.xics_icp = icp = self.xics_icp
134 m.submodules.xics_ics = ics = self.xics_ics
135 comb += icp.ics_i.eq(ics.icp_o) # connect ICS to ICP
136 sync += cur_state.eint.eq(icp.core_irq_o) # connect ICP to core
137
138 # GPIO test peripheral
139 if self.gpio:
140 m.submodules.simple_gpio = simple_gpio = self.simple_gpio
141
142 # connect one GPIO output to ICS bit 15 (like in microwatt soc.vhdl)
143 if self.gpio and self.xics:
144 comb += self.int_level_i[15].eq(simple_gpio.gpio_o[0])
145
146 # instruction decoder
147 pdecode = create_pdecode()
148 m.submodules.dec2 = pdecode2 = self.pdecode2
149
150 # convenience
151 dmi, d_reg, d_cr, d_xer, = dbg.dmi, dbg.d_gpr, dbg.d_cr, dbg.d_xer
152 intrf = self.core.regs.rf['int']
153
154 # clock delay power-on reset
155 cd_por = ClockDomain(reset_less=True)
156 cd_sync = ClockDomain()
157 m.domains += cd_por, cd_sync
158
159 ti_rst = Signal(reset_less=True)
160 delay = Signal(range(4), reset=3)
161 with m.If(delay != 0):
162 m.d.por += delay.eq(delay - 1)
163 comb += cd_por.clk.eq(ClockSignal())
164
165 # power-on reset delay
166 core_rst = ResetSignal("coresync")
167 comb += ti_rst.eq(delay != 0 | dbg.core_rst_o | ResetSignal())
168 comb += core_rst.eq(ti_rst)
169
170 # busy/halted signals from core
171 comb += self.busy_o.eq(core.busy_o)
172 comb += pdecode2.dec.bigendian.eq(self.core_bigendian_i)
173
174 # temporary hack: says "go" immediately for both address gen and ST
175 l0 = core.l0
176 ldst = core.fus.fus['ldst0']
177 st_go_edge = rising_edge(m, ldst.st.rel_o)
178 m.d.comb += ldst.ad.go_i.eq(ldst.ad.rel_o) # link addr-go direct to rel
179 m.d.comb += ldst.st.go_i.eq(st_go_edge) # link store-go to rising rel
180
181 # PC and instruction from I-Memory
182 pc_changed = Signal() # note write to PC
183 comb += self.pc_o.eq(cur_state.pc)
184 ilatch = Signal(32)
185
186 # next instruction (+4 on current)
187 nia = Signal(64, reset_less=True)
188 comb += nia.eq(cur_state.pc + 4)
189
190 # read the PC
191 pc = Signal(64, reset_less=True)
192 pc_ok_delay = Signal()
193 sync += pc_ok_delay.eq(~self.pc_i.ok)
194 with m.If(self.pc_i.ok):
195 # incoming override (start from pc_i)
196 comb += pc.eq(self.pc_i.data)
197 with m.Else():
198 # otherwise read StateRegs regfile for PC...
199 comb += self.state_r_pc.ren.eq(1<<StateRegs.PC)
200 # ... but on a 1-clock delay
201 with m.If(pc_ok_delay):
202 comb += pc.eq(self.state_r_pc.data_o)
203
204 # don't write pc every cycle
205 comb += self.state_w_pc.wen.eq(0)
206 comb += self.state_w_pc.data_i.eq(0)
207
208 # don't read msr every cycle
209 comb += self.state_r_msr.ren.eq(0)
210 msr_read = Signal(reset=1)
211
212 # connect up debug signals
213 # TODO comb += core.icache_rst_i.eq(dbg.icache_rst_o)
214 comb += dbg.terminate_i.eq(core.core_terminate_o)
215 comb += dbg.state.pc.eq(pc)
216 #comb += dbg.state.pc.eq(cur_state.pc)
217 comb += dbg.state.msr.eq(cur_state.msr)
218
219 # temporaries
220 core_busy_o = core.busy_o # core is busy
221 core_ivalid_i = core.ivalid_i # instruction is valid
222 core_issue_i = core.issue_i # instruction is issued
223 dec_opcode_i = pdecode2.dec.raw_opcode_in # raw opcode
224
225 insn_type = core.e.do.insn_type
226
227 # actually use a nmigen FSM for the first time (w00t)
228 # this FSM is perhaps unusual in that it detects conditions
229 # then "holds" information, combinatorially, for the core
230 # (as opposed to using sync - which would be on a clock's delay)
231 # this includes the actual opcode, valid flags and so on.
232 with m.FSM() as fsm:
233
234 # waiting (zzz)
235 with m.State("IDLE"):
236 sync += pc_changed.eq(0)
237 sync += core.e.eq(0)
238 sync += core.raw_insn_i.eq(0)
239 sync += core.bigendian_i.eq(0)
240 with m.If(~dbg.core_stop_o & ~core_rst):
241 # instruction allowed to go: start by reading the PC
242 # capture the PC and also drop it into Insn Memory
243 # we have joined a pair of combinatorial memory
244 # lookups together. this is Generally Bad.
245 comb += self.imem.a_pc_i.eq(pc)
246 comb += self.imem.a_valid_i.eq(1)
247 comb += self.imem.f_valid_i.eq(1)
248 sync += cur_state.pc.eq(pc)
249
250 # initiate read of MSR. arrives one clock later
251 comb += self.state_r_msr.ren.eq(1<<StateRegs.MSR)
252 sync += msr_read.eq(0)
253
254 m.next = "INSN_READ" # move to "wait for bus" phase
255 with m.Else():
256 comb += core.core_stopped_i.eq(1)
257 comb += dbg.core_stopped_i.eq(1)
258
259 # dummy pause to find out why simulation is not keeping up
260 with m.State("INSN_READ"):
261 # one cycle later, msr read arrives. valid only once.
262 with m.If(~msr_read):
263 sync += msr_read.eq(1) # yeah don't read it again
264 sync += cur_state.msr.eq(self.state_r_msr.data_o)
265 with m.If(self.imem.f_busy_o): # zzz...
266 # busy: stay in wait-read
267 comb += self.imem.a_valid_i.eq(1)
268 comb += self.imem.f_valid_i.eq(1)
269 with m.Else():
270 # not busy: instruction fetched
271 f_instr_o = self.imem.f_instr_o
272 if f_instr_o.width == 32:
273 insn = f_instr_o
274 else:
275 insn = f_instr_o.word_select(cur_state.pc[2], 32)
276 comb += dec_opcode_i.eq(insn) # actual opcode
277 sync += core.e.eq(pdecode2.e)
278 sync += core.state.eq(cur_state)
279 sync += core.raw_insn_i.eq(dec_opcode_i)
280 sync += core.bigendian_i.eq(self.core_bigendian_i)
281 sync += ilatch.eq(insn) # latch current insn
282 # also drop PC and MSR into decode "state"
283 m.next = "INSN_START" # move to "start"
284
285 # waiting for instruction bus (stays there until not busy)
286 with m.State("INSN_START"):
287 comb += core_ivalid_i.eq(1) # instruction is valid
288 comb += core_issue_i.eq(1) # and issued
289
290 m.next = "INSN_ACTIVE" # move to "wait completion"
291
292 # instruction started: must wait till it finishes
293 with m.State("INSN_ACTIVE"):
294 with m.If(insn_type != MicrOp.OP_NOP):
295 comb += core_ivalid_i.eq(1) # instruction is valid
296 with m.If(self.state_nia.wen & (1<<StateRegs.PC)):
297 sync += pc_changed.eq(1)
298 with m.If(~core_busy_o): # instruction done!
299 # ok here we are not reading the branch unit. TODO
300 # this just blithely overwrites whatever pipeline
301 # updated the PC
302 with m.If(~pc_changed):
303 comb += self.state_w_pc.wen.eq(1<<StateRegs.PC)
304 comb += self.state_w_pc.data_i.eq(nia)
305 sync += core.e.eq(0)
306 sync += core.raw_insn_i.eq(0)
307 sync += core.bigendian_i.eq(0)
308 m.next = "IDLE" # back to idle
309
310 # this bit doesn't have to be in the FSM: connect up to read
311 # regfiles on demand from DMI
312 with m.If(d_reg.req): # request for regfile access being made
313 # TODO: error-check this
314 # XXX should this be combinatorial? sync better?
315 if intrf.unary:
316 comb += self.int_r.ren.eq(1<<d_reg.addr)
317 else:
318 comb += self.int_r.addr.eq(d_reg.addr)
319 comb += self.int_r.ren.eq(1)
320 d_reg_delay = Signal()
321 sync += d_reg_delay.eq(d_reg.req)
322 with m.If(d_reg_delay):
323 # data arrives one clock later
324 comb += d_reg.data.eq(self.int_r.data_o)
325 comb += d_reg.ack.eq(1)
326
327 # sigh same thing for CR debug
328 with m.If(d_cr.req): # request for regfile access being made
329 comb += self.cr_r.ren.eq(0b11111111) # enable all
330 d_cr_delay = Signal()
331 sync += d_cr_delay.eq(d_cr.req)
332 with m.If(d_cr_delay):
333 # data arrives one clock later
334 comb += d_cr.data.eq(self.cr_r.data_o)
335 comb += d_cr.ack.eq(1)
336
337 # aaand XER...
338 with m.If(d_xer.req): # request for regfile access being made
339 comb += self.xer_r.ren.eq(0b111111) # enable all
340 d_xer_delay = Signal()
341 sync += d_xer_delay.eq(d_xer.req)
342 with m.If(d_xer_delay):
343 # data arrives one clock later
344 comb += d_xer.data.eq(self.xer_r.data_o)
345 comb += d_xer.ack.eq(1)
346
347 # DEC and TB inc/dec FSM
348 self.tb_dec_fsm(m, cur_state.dec)
349
350 return m
351
352 def tb_dec_fsm(self, m, spr_dec):
353 """tb_dec_fsm
354
355 this is a FSM for updating either dec or tb. it runs alternately
356 DEC, TB, DEC, TB. note that SPR pipeline could have written a new
357 value to DEC, however the regfile has "passthrough" on it so this
358 *should* be ok.
359
360 see v3.0B p1097-1099 for Timeer Resource and p1065 and p1076
361 """
362
363 comb, sync = m.d.comb, m.d.sync
364 fast_rf = self.core.regs.rf['fast']
365 fast_r_dectb = fast_rf.r_ports['issue'] # DEC/TB
366 fast_w_dectb = fast_rf.w_ports['issue'] # DEC/TB
367
368 with m.FSM() as fsm:
369
370 # initiates read of current DEC
371 with m.State("DEC_READ"):
372 comb += fast_r_dectb.addr.eq(FastRegs.DEC)
373 comb += fast_r_dectb.ren.eq(1)
374 m.next = "DEC_WRITE"
375
376 # waits for DEC read to arrive (1 cycle), updates with new value
377 with m.State("DEC_WRITE"):
378 new_dec = Signal(64)
379 # TODO: MSR.LPCR 32-bit decrement mode
380 comb += new_dec.eq(fast_r_dectb.data_o - 1)
381 comb += fast_w_dectb.addr.eq(FastRegs.DEC)
382 comb += fast_w_dectb.wen.eq(1)
383 comb += fast_w_dectb.data_i.eq(new_dec)
384 sync += spr_dec.eq(new_dec) # copy into cur_state for decoder
385 m.next = "TB_READ"
386
387 # initiates read of current TB
388 with m.State("TB_READ"):
389 comb += fast_r_dectb.addr.eq(FastRegs.TB)
390 comb += fast_r_dectb.ren.eq(1)
391 m.next = "TB_WRITE"
392
393 # waits for read TB to arrive, initiates write of current TB
394 with m.State("TB_WRITE"):
395 new_tb = Signal(64)
396 comb += new_tb.eq(fast_r_dectb.data_o + 1)
397 comb += fast_w_dectb.addr.eq(FastRegs.TB)
398 comb += fast_w_dectb.wen.eq(1)
399 comb += fast_w_dectb.data_i.eq(new_tb)
400 m.next = "DEC_READ"
401
402 return m
403
404 def __iter__(self):
405 yield from self.pc_i.ports()
406 yield self.pc_o
407 yield self.memerr_o
408 yield from self.core.ports()
409 yield from self.imem.ports()
410 yield self.core_bigendian_i
411 yield self.busy_o
412
413 def ports(self):
414 return list(self)
415
416 def external_ports(self):
417 ports = self.pc_i.ports()
418 ports += [self.pc_o, self.memerr_o, self.core_bigendian_i, self.busy_o,
419 ]
420
421 if self.jtag_en:
422 ports += list(self.jtag.external_ports())
423 else:
424 # don't add DMI if JTAG is enabled
425 ports += list(self.dbg.dmi.ports())
426
427 ports += list(self.imem.ibus.fields.values())
428 ports += list(self.core.l0.cmpi.lsmem.lsi.slavebus.fields.values())
429
430 if self.xics:
431 ports += list(self.xics_icp.bus.fields.values())
432 ports += list(self.xics_ics.bus.fields.values())
433 ports.append(self.int_level_i)
434
435 if self.gpio:
436 ports += list(self.simple_gpio.bus.fields.values())
437 ports.append(self.gpio_o)
438
439 return ports
440
441 def ports(self):
442 return list(self)
443
444
445 class TestIssuer(Elaboratable):
446 def __init__(self, pspec):
447 self.ti = TestIssuerInternal(pspec)
448 self.pll = DummyPLL()
449 self.clksel = ClockSelect()
450
451 def elaborate(self, platform):
452 m = Module()
453 comb = m.d.comb
454
455 # TestIssuer runs at internal clock rate
456 m.submodules.ti = ti = DomainRenamer("intclk")(self.ti)
457 # ClockSelect runs at PLL output internal clock rate
458 m.submodules.clksel = clksel = DomainRenamer("pllclk")(self.clksel)
459 m.submodules.pll = pll = self.pll
460
461 # add 2 clock domains established above...
462 cd_int = ClockDomain("intclk")
463 cd_pll = ClockDomain("pllclk")
464 m.domains += cd_pll
465
466 # internal clock is set to selector clock-out. has the side-effect of
467 # running TestIssuer at this speed (see DomainRenamer("intclk") above)
468 intclk = ClockSignal("intclk")
469 comb += intclk.eq(clksel.core_clk_o)
470
471 # PLL clock established. has the side-effect of running clklsel
472 # at the PLL's speed (see DomainRenamer("pllclk") above)
473 pllclk = ClockSignal("pllclk")
474 comb += pllclk.eq(pll.clk_pll_o)
475
476 # wire up external 24mhz to PLL and clksel
477 comb += clksel.clk_24_i.eq(ClockSignal())
478 comb += pll.clk_24_i.eq(clksel.clk_24_i)
479
480 # now wire up ResetSignals. don't mind them all being in this domain
481 int_rst = ResetSignal("intclk")
482 pll_rst = ResetSignal("pllclk")
483 comb += int_rst.eq(ResetSignal())
484 comb += pll_rst.eq(ResetSignal())
485
486 return m
487
488 def ports(self):
489 return list(self.ti.ports()) + list(self.pll.ports()) + \
490 [ClockSignal(), ResetSignal()] + \
491 list(self.clksel.ports())
492
493 def external_ports(self):
494 ports = self.ti.external_ports()
495 ports.append(ClockSignal())
496 ports.append(ResetSignal())
497 ports.append(self.clksel.clk_sel_i)
498 ports.append(self.clksel.pll_48_o)
499 return ports
500
501
502 if __name__ == '__main__':
503 units = {'alu': 1, 'cr': 1, 'branch': 1, 'trap': 1, 'logical': 1,
504 'spr': 1,
505 'div': 1,
506 'mul': 1,
507 'shiftrot': 1
508 }
509 pspec = TestMemPspec(ldst_ifacetype='bare_wb',
510 imem_ifacetype='bare_wb',
511 addr_wid=48,
512 mask_wid=8,
513 reg_wid=64,
514 units=units)
515 dut = TestIssuer(pspec)
516 vl = main(dut, ports=dut.ports(), name="test_issuer")
517
518 if len(sys.argv) == 1:
519 vl = rtlil.convert(dut, ports=dut.external_ports(), name="test_issuer")
520 with open("test_issuer.il", "w") as f:
521 f.write(vl)