ae6c24bfde03546481aead6c04415b02a17a4554
[soc.git] / src / soc / simple / issuer.py
1 """simple core issuer
2
3 not in any way intended for production use. this runs a FSM that:
4
5 * reads the Program Counter from StateRegs
6 * reads an instruction from a fixed-size Test Memory
7 * issues it to the Simple Core
8 * waits for it to complete
9 * increments the PC
10 * does it all over again
11
12 the purpose of this module is to verify the functional correctness
13 of the Function Units in the absolute simplest and clearest possible
14 way, and to at provide something that can be further incrementally
15 improved.
16 """
17
18 from nmigen import (Elaboratable, Module, Signal, ClockSignal, ResetSignal,
19 ClockDomain, DomainRenamer)
20 from nmigen.cli import rtlil
21 from nmigen.cli import main
22 import sys
23
24 from soc.decoder.power_decoder import create_pdecode
25 from soc.decoder.power_decoder2 import PowerDecode2
26 from soc.decoder.decode2execute1 import IssuerDecode2ToOperand
27 from soc.decoder.decode2execute1 import Data
28 from soc.experiment.testmem import TestMemory # test only for instructions
29 from soc.regfile.regfiles import StateRegs, FastRegs
30 from soc.simple.core import NonProductionCore
31 from soc.config.test.test_loadstore import TestMemPspec
32 from soc.config.ifetch import ConfigFetchUnit
33 from soc.decoder.power_enums import MicrOp
34 from soc.debug.dmi import CoreDebug, DMIInterface
35 from soc.debug.jtag import JTAG
36 from soc.config.pinouts import get_pinspecs
37 from soc.config.state import CoreState
38 from soc.interrupts.xics import XICS_ICP, XICS_ICS
39 from soc.bus.simple_gpio import SimpleGPIO
40 from soc.clock.select import ClockSelect, DummyPLL
41
42
43 from nmutil.util import rising_edge
44
45
46 class TestIssuerInternal(Elaboratable):
47 """TestIssuer - reads instructions from TestMemory and issues them
48
49 efficiency and speed is not the main goal here: functional correctness is.
50 """
51 def __init__(self, pspec):
52
53 # add interrupt controller?
54 self.xics = hasattr(pspec, "xics") and pspec.xics == True
55 if self.xics:
56 self.xics_icp = XICS_ICP()
57 self.xics_ics = XICS_ICS()
58 self.int_level_i = self.xics_ics.int_level_i
59
60 # add GPIO peripheral?
61 self.gpio = hasattr(pspec, "gpio") and pspec.gpio == True
62 if self.gpio:
63 self.simple_gpio = SimpleGPIO()
64 self.gpio_o = self.simple_gpio.gpio_o
65
66 # main instruction core25
67 self.core = core = NonProductionCore(pspec)
68
69 # instruction decoder. goes into Trap Record
70 pdecode = create_pdecode()
71 self.cur_state = CoreState("cur") # current state (MSR/PC/EINT)
72 self.pdecode2 = PowerDecode2(pdecode, state=self.cur_state,
73 opkls=IssuerDecode2ToOperand)
74
75 # Test Instruction memory
76 self.imem = ConfigFetchUnit(pspec).fu
77 # one-row cache of instruction read
78 self.iline = Signal(64) # one instruction line
79 self.iprev_adr = Signal(64) # previous address: if different, do read
80
81 # DMI interface
82 self.dbg = CoreDebug()
83
84 # JTAG interface
85 self.jtag_en = hasattr(pspec, "debug") and pspec.debug == 'jtag'
86 if self.jtag_en:
87 subset = {'uart', 'mtwi', 'eint', 'gpio', 'mspi0', 'mspi1',
88 'pwm', 'sd0', 'sdr'}
89 self.jtag = JTAG(get_pinspecs(subset=subset))
90
91 # instruction go/monitor
92 self.pc_o = Signal(64, reset_less=True)
93 self.pc_i = Data(64, "pc_i") # set "ok" to indicate "please change me"
94 self.core_bigendian_i = Signal()
95 self.busy_o = Signal(reset_less=True)
96 self.memerr_o = Signal(reset_less=True)
97
98 # FAST regfile read /write ports for PC, MSR, DEC/TB
99 staterf = self.core.regs.rf['state']
100 self.state_r_pc = staterf.r_ports['cia'] # PC rd
101 self.state_w_pc = staterf.w_ports['d_wr1'] # PC wr
102 self.state_r_msr = staterf.r_ports['msr'] # MSR rd
103
104 # DMI interface access
105 intrf = self.core.regs.rf['int']
106 crrf = self.core.regs.rf['cr']
107 xerrf = self.core.regs.rf['xer']
108 self.int_r = intrf.r_ports['dmi'] # INT read
109 self.cr_r = crrf.r_ports['full_cr_dbg'] # CR read
110 self.xer_r = xerrf.r_ports['full_xer'] # XER read
111
112 # hack method of keeping an eye on whether branch/trap set the PC
113 self.state_nia = self.core.regs.rf['state'].w_ports['nia']
114 self.state_nia.wen.name = 'state_nia_wen'
115
116 def elaborate(self, platform):
117 m = Module()
118 comb, sync = m.d.comb, m.d.sync
119
120 m.submodules.core = core = DomainRenamer("coresync")(self.core)
121 m.submodules.imem = imem = self.imem
122 m.submodules.dbg = dbg = self.dbg
123 if self.jtag_en:
124 m.submodules.jtag = jtag = self.jtag
125 # TODO: UART2GDB mux, here, from external pin
126 # see https://bugs.libre-soc.org/show_bug.cgi?id=499
127 sync += dbg.dmi.connect_to(jtag.dmi)
128
129 cur_state = self.cur_state
130
131 # XICS interrupt handler
132 if self.xics:
133 m.submodules.xics_icp = icp = self.xics_icp
134 m.submodules.xics_ics = ics = self.xics_ics
135 comb += icp.ics_i.eq(ics.icp_o) # connect ICS to ICP
136 sync += cur_state.eint.eq(icp.core_irq_o) # connect ICP to core
137
138 # GPIO test peripheral
139 if self.gpio:
140 m.submodules.simple_gpio = simple_gpio = self.simple_gpio
141
142 # connect one GPIO output to ICS bit 15 (like in microwatt soc.vhdl)
143 # XXX causes litex ECP5 test to get wrong idea about input and output
144 # (but works with verilator sim *sigh*)
145 #if self.gpio and self.xics:
146 # comb += self.int_level_i[15].eq(simple_gpio.gpio_o[0])
147
148 # instruction decoder
149 pdecode = create_pdecode()
150 m.submodules.dec2 = pdecode2 = self.pdecode2
151
152 # convenience
153 dmi, d_reg, d_cr, d_xer, = dbg.dmi, dbg.d_gpr, dbg.d_cr, dbg.d_xer
154 intrf = self.core.regs.rf['int']
155
156 # clock delay power-on reset
157 cd_por = ClockDomain(reset_less=True)
158 cd_sync = ClockDomain()
159 core_sync = ClockDomain("coresync")
160 m.domains += cd_por, cd_sync, core_sync
161
162 ti_rst = Signal(reset_less=True)
163 delay = Signal(range(4), reset=3)
164 with m.If(delay != 0):
165 m.d.por += delay.eq(delay - 1)
166 comb += cd_por.clk.eq(ClockSignal())
167
168 # power-on reset delay
169 core_rst = ResetSignal("coresync")
170 comb += ti_rst.eq(delay != 0 | dbg.core_rst_o | ResetSignal())
171 comb += core_rst.eq(ti_rst)
172
173 # busy/halted signals from core
174 comb += self.busy_o.eq(core.busy_o)
175 comb += pdecode2.dec.bigendian.eq(self.core_bigendian_i)
176
177 # temporary hack: says "go" immediately for both address gen and ST
178 l0 = core.l0
179 ldst = core.fus.fus['ldst0']
180 st_go_edge = rising_edge(m, ldst.st.rel_o)
181 m.d.comb += ldst.ad.go_i.eq(ldst.ad.rel_o) # link addr-go direct to rel
182 m.d.comb += ldst.st.go_i.eq(st_go_edge) # link store-go to rising rel
183
184 # PC and instruction from I-Memory
185 pc_changed = Signal() # note write to PC
186 comb += self.pc_o.eq(cur_state.pc)
187 ilatch = Signal(32)
188
189 # next instruction (+4 on current)
190 nia = Signal(64, reset_less=True)
191 comb += nia.eq(cur_state.pc + 4)
192
193 # read the PC
194 pc = Signal(64, reset_less=True)
195 pc_ok_delay = Signal()
196 sync += pc_ok_delay.eq(~self.pc_i.ok)
197 with m.If(self.pc_i.ok):
198 # incoming override (start from pc_i)
199 comb += pc.eq(self.pc_i.data)
200 with m.Else():
201 # otherwise read StateRegs regfile for PC...
202 comb += self.state_r_pc.ren.eq(1<<StateRegs.PC)
203 # ... but on a 1-clock delay
204 with m.If(pc_ok_delay):
205 comb += pc.eq(self.state_r_pc.data_o)
206
207 # don't write pc every cycle
208 comb += self.state_w_pc.wen.eq(0)
209 comb += self.state_w_pc.data_i.eq(0)
210
211 # don't read msr every cycle
212 comb += self.state_r_msr.ren.eq(0)
213 msr_read = Signal(reset=1)
214
215 # connect up debug signals
216 # TODO comb += core.icache_rst_i.eq(dbg.icache_rst_o)
217 comb += dbg.terminate_i.eq(core.core_terminate_o)
218 comb += dbg.state.pc.eq(pc)
219 #comb += dbg.state.pc.eq(cur_state.pc)
220 comb += dbg.state.msr.eq(cur_state.msr)
221
222 # temporaries
223 core_busy_o = core.busy_o # core is busy
224 core_ivalid_i = core.ivalid_i # instruction is valid
225 core_issue_i = core.issue_i # instruction is issued
226 dec_opcode_i = pdecode2.dec.raw_opcode_in # raw opcode
227
228 insn_type = core.e.do.insn_type
229
230 # actually use a nmigen FSM for the first time (w00t)
231 # this FSM is perhaps unusual in that it detects conditions
232 # then "holds" information, combinatorially, for the core
233 # (as opposed to using sync - which would be on a clock's delay)
234 # this includes the actual opcode, valid flags and so on.
235 with m.FSM() as fsm:
236
237 # waiting (zzz)
238 with m.State("IDLE"):
239 sync += pc_changed.eq(0)
240 sync += core.e.eq(0)
241 sync += core.raw_insn_i.eq(0)
242 sync += core.bigendian_i.eq(0)
243 with m.If(~dbg.core_stop_o & ~core_rst):
244 # instruction allowed to go: start by reading the PC
245 # capture the PC and also drop it into Insn Memory
246 # we have joined a pair of combinatorial memory
247 # lookups together. this is Generally Bad.
248 comb += self.imem.a_pc_i.eq(pc)
249 comb += self.imem.a_valid_i.eq(1)
250 comb += self.imem.f_valid_i.eq(1)
251 sync += cur_state.pc.eq(pc)
252
253 # initiate read of MSR. arrives one clock later
254 comb += self.state_r_msr.ren.eq(1<<StateRegs.MSR)
255 sync += msr_read.eq(0)
256
257 m.next = "INSN_READ" # move to "wait for bus" phase
258 with m.Else():
259 comb += core.core_stopped_i.eq(1)
260 comb += dbg.core_stopped_i.eq(1)
261
262 # dummy pause to find out why simulation is not keeping up
263 with m.State("INSN_READ"):
264 # one cycle later, msr read arrives. valid only once.
265 with m.If(~msr_read):
266 sync += msr_read.eq(1) # yeah don't read it again
267 sync += cur_state.msr.eq(self.state_r_msr.data_o)
268 with m.If(self.imem.f_busy_o): # zzz...
269 # busy: stay in wait-read
270 comb += self.imem.a_valid_i.eq(1)
271 comb += self.imem.f_valid_i.eq(1)
272 with m.Else():
273 # not busy: instruction fetched
274 f_instr_o = self.imem.f_instr_o
275 if f_instr_o.width == 32:
276 insn = f_instr_o
277 else:
278 insn = f_instr_o.word_select(cur_state.pc[2], 32)
279 comb += dec_opcode_i.eq(insn) # actual opcode
280 sync += core.e.eq(pdecode2.e)
281 sync += core.state.eq(cur_state)
282 sync += core.raw_insn_i.eq(dec_opcode_i)
283 sync += core.bigendian_i.eq(self.core_bigendian_i)
284 sync += ilatch.eq(insn) # latch current insn
285 # also drop PC and MSR into decode "state"
286 m.next = "INSN_START" # move to "start"
287
288 # waiting for instruction bus (stays there until not busy)
289 with m.State("INSN_START"):
290 comb += core_ivalid_i.eq(1) # instruction is valid
291 comb += core_issue_i.eq(1) # and issued
292
293 m.next = "INSN_ACTIVE" # move to "wait completion"
294
295 # instruction started: must wait till it finishes
296 with m.State("INSN_ACTIVE"):
297 with m.If(insn_type != MicrOp.OP_NOP):
298 comb += core_ivalid_i.eq(1) # instruction is valid
299 with m.If(self.state_nia.wen & (1<<StateRegs.PC)):
300 sync += pc_changed.eq(1)
301 with m.If(~core_busy_o): # instruction done!
302 # ok here we are not reading the branch unit. TODO
303 # this just blithely overwrites whatever pipeline
304 # updated the PC
305 with m.If(~pc_changed):
306 comb += self.state_w_pc.wen.eq(1<<StateRegs.PC)
307 comb += self.state_w_pc.data_i.eq(nia)
308 sync += core.e.eq(0)
309 sync += core.raw_insn_i.eq(0)
310 sync += core.bigendian_i.eq(0)
311 m.next = "IDLE" # back to idle
312
313 # this bit doesn't have to be in the FSM: connect up to read
314 # regfiles on demand from DMI
315 with m.If(d_reg.req): # request for regfile access being made
316 # TODO: error-check this
317 # XXX should this be combinatorial? sync better?
318 if intrf.unary:
319 comb += self.int_r.ren.eq(1<<d_reg.addr)
320 else:
321 comb += self.int_r.addr.eq(d_reg.addr)
322 comb += self.int_r.ren.eq(1)
323 d_reg_delay = Signal()
324 sync += d_reg_delay.eq(d_reg.req)
325 with m.If(d_reg_delay):
326 # data arrives one clock later
327 comb += d_reg.data.eq(self.int_r.data_o)
328 comb += d_reg.ack.eq(1)
329
330 # sigh same thing for CR debug
331 with m.If(d_cr.req): # request for regfile access being made
332 comb += self.cr_r.ren.eq(0b11111111) # enable all
333 d_cr_delay = Signal()
334 sync += d_cr_delay.eq(d_cr.req)
335 with m.If(d_cr_delay):
336 # data arrives one clock later
337 comb += d_cr.data.eq(self.cr_r.data_o)
338 comb += d_cr.ack.eq(1)
339
340 # aaand XER...
341 with m.If(d_xer.req): # request for regfile access being made
342 comb += self.xer_r.ren.eq(0b111111) # enable all
343 d_xer_delay = Signal()
344 sync += d_xer_delay.eq(d_xer.req)
345 with m.If(d_xer_delay):
346 # data arrives one clock later
347 comb += d_xer.data.eq(self.xer_r.data_o)
348 comb += d_xer.ack.eq(1)
349
350 # DEC and TB inc/dec FSM
351 self.tb_dec_fsm(m, cur_state.dec)
352
353 return m
354
355 def tb_dec_fsm(self, m, spr_dec):
356 """tb_dec_fsm
357
358 this is a FSM for updating either dec or tb. it runs alternately
359 DEC, TB, DEC, TB. note that SPR pipeline could have written a new
360 value to DEC, however the regfile has "passthrough" on it so this
361 *should* be ok.
362
363 see v3.0B p1097-1099 for Timeer Resource and p1065 and p1076
364 """
365
366 comb, sync = m.d.comb, m.d.sync
367 fast_rf = self.core.regs.rf['fast']
368 fast_r_dectb = fast_rf.r_ports['issue'] # DEC/TB
369 fast_w_dectb = fast_rf.w_ports['issue'] # DEC/TB
370
371 with m.FSM() as fsm:
372
373 # initiates read of current DEC
374 with m.State("DEC_READ"):
375 comb += fast_r_dectb.addr.eq(FastRegs.DEC)
376 comb += fast_r_dectb.ren.eq(1)
377 m.next = "DEC_WRITE"
378
379 # waits for DEC read to arrive (1 cycle), updates with new value
380 with m.State("DEC_WRITE"):
381 new_dec = Signal(64)
382 # TODO: MSR.LPCR 32-bit decrement mode
383 comb += new_dec.eq(fast_r_dectb.data_o - 1)
384 comb += fast_w_dectb.addr.eq(FastRegs.DEC)
385 comb += fast_w_dectb.wen.eq(1)
386 comb += fast_w_dectb.data_i.eq(new_dec)
387 sync += spr_dec.eq(new_dec) # copy into cur_state for decoder
388 m.next = "TB_READ"
389
390 # initiates read of current TB
391 with m.State("TB_READ"):
392 comb += fast_r_dectb.addr.eq(FastRegs.TB)
393 comb += fast_r_dectb.ren.eq(1)
394 m.next = "TB_WRITE"
395
396 # waits for read TB to arrive, initiates write of current TB
397 with m.State("TB_WRITE"):
398 new_tb = Signal(64)
399 comb += new_tb.eq(fast_r_dectb.data_o + 1)
400 comb += fast_w_dectb.addr.eq(FastRegs.TB)
401 comb += fast_w_dectb.wen.eq(1)
402 comb += fast_w_dectb.data_i.eq(new_tb)
403 m.next = "DEC_READ"
404
405 return m
406
407 def __iter__(self):
408 yield from self.pc_i.ports()
409 yield self.pc_o
410 yield self.memerr_o
411 yield from self.core.ports()
412 yield from self.imem.ports()
413 yield self.core_bigendian_i
414 yield self.busy_o
415
416 def ports(self):
417 return list(self)
418
419 def external_ports(self):
420 ports = self.pc_i.ports()
421 ports += [self.pc_o, self.memerr_o, self.core_bigendian_i, self.busy_o,
422 ]
423
424 if self.jtag_en:
425 ports += list(self.jtag.external_ports())
426 else:
427 # don't add DMI if JTAG is enabled
428 ports += list(self.dbg.dmi.ports())
429
430 ports += list(self.imem.ibus.fields.values())
431 ports += list(self.core.l0.cmpi.lsmem.lsi.slavebus.fields.values())
432
433 if self.xics:
434 ports += list(self.xics_icp.bus.fields.values())
435 ports += list(self.xics_ics.bus.fields.values())
436 ports.append(self.int_level_i)
437
438 if self.gpio:
439 ports += list(self.simple_gpio.bus.fields.values())
440 ports.append(self.gpio_o)
441
442 return ports
443
444 def ports(self):
445 return list(self)
446
447
448 class TestIssuer(Elaboratable):
449 def __init__(self, pspec):
450 self.ti = TestIssuerInternal(pspec)
451
452 self.pll = DummyPLL()
453 self.clksel = ClockSelect()
454
455 # PLL direct clock or not
456 self.pll_en = hasattr(pspec, "use_pll") and pspec.use_pll
457
458 def elaborate(self, platform):
459 m = Module()
460 comb = m.d.comb
461
462 # TestIssuer runs at direct clock
463 m.submodules.ti = ti = self.ti
464 cd_int = ClockDomain("coresync")
465
466 # ClockSelect runs at PLL output internal clock rate
467 m.submodules.clksel = clksel = DomainRenamer("pllclk")(self.clksel)
468 m.submodules.pll = pll = self.pll
469
470 # add 2 clock domains established above...
471 cd_pll = ClockDomain("pllclk")
472 m.domains += cd_pll
473
474 # internal clock is set to selector clock-out. has the side-effect of
475 # running TestIssuer at this speed (see DomainRenamer("intclk") above)
476 intclk = ClockSignal("coresync")
477 if self.pll_en:
478 comb += intclk.eq(clksel.core_clk_o)
479 else:
480 comb += intclk.eq(ClockSignal())
481
482 # PLL clock established. has the side-effect of running clklsel
483 # at the PLL's speed (see DomainRenamer("pllclk") above)
484 pllclk = ClockSignal("pllclk")
485 comb += pllclk.eq(pll.clk_pll_o)
486
487 # wire up external 24mhz to PLL and clksel
488 comb += clksel.clk_24_i.eq(ClockSignal())
489 comb += pll.clk_24_i.eq(clksel.clk_24_i)
490
491 # now wire up ResetSignals. don't mind them all being in this domain
492 #int_rst = ResetSignal("coresync")
493 pll_rst = ResetSignal("pllclk")
494 #comb += int_rst.eq(ResetSignal())
495 comb += pll_rst.eq(ResetSignal())
496
497 return m
498
499 def ports(self):
500 return list(self.ti.ports()) + list(self.pll.ports()) + \
501 [ClockSignal(), ResetSignal()] + \
502 list(self.clksel.ports())
503
504 def external_ports(self):
505 ports = self.ti.external_ports()
506 ports.append(ClockSignal())
507 ports.append(ResetSignal())
508 ports.append(self.clksel.clk_sel_i)
509 ports.append(self.clksel.pll_48_o)
510 return ports
511
512
513 if __name__ == '__main__':
514 units = {'alu': 1, 'cr': 1, 'branch': 1, 'trap': 1, 'logical': 1,
515 'spr': 1,
516 'div': 1,
517 'mul': 1,
518 'shiftrot': 1
519 }
520 pspec = TestMemPspec(ldst_ifacetype='bare_wb',
521 imem_ifacetype='bare_wb',
522 addr_wid=48,
523 mask_wid=8,
524 reg_wid=64,
525 units=units)
526 dut = TestIssuer(pspec)
527 vl = main(dut, ports=dut.ports(), name="test_issuer")
528
529 if len(sys.argv) == 1:
530 vl = rtlil.convert(dut, ports=dut.external_ports(), name="test_issuer")
531 with open("test_issuer.il", "w") as f:
532 f.write(vl)