significant reorg of the litex pinspecs to use pinmux JSON files
[soc.git] / src / soc / simple / issuer.py
1 """simple core issuer
2
3 not in any way intended for production use. this runs a FSM that:
4
5 * reads the Program Counter from StateRegs
6 * reads an instruction from a fixed-size Test Memory
7 * issues it to the Simple Core
8 * waits for it to complete
9 * increments the PC
10 * does it all over again
11
12 the purpose of this module is to verify the functional correctness
13 of the Function Units in the absolute simplest and clearest possible
14 way, and to at provide something that can be further incrementally
15 improved.
16 """
17
18 from nmigen import (Elaboratable, Module, Signal, ClockSignal, ResetSignal,
19 ClockDomain, DomainRenamer)
20 from nmigen.cli import rtlil
21 from nmigen.cli import main
22 import sys
23
24 from soc.decoder.power_decoder import create_pdecode
25 from soc.decoder.power_decoder2 import PowerDecode2
26 from soc.decoder.decode2execute1 import IssuerDecode2ToOperand
27 from soc.decoder.decode2execute1 import Data
28 from soc.experiment.testmem import TestMemory # test only for instructions
29 from soc.regfile.regfiles import StateRegs, FastRegs
30 from soc.simple.core import NonProductionCore
31 from soc.config.test.test_loadstore import TestMemPspec
32 from soc.config.ifetch import ConfigFetchUnit
33 from soc.decoder.power_enums import MicrOp
34 from soc.debug.dmi import CoreDebug, DMIInterface
35 from soc.debug.jtag import JTAG
36 from soc.config.pinouts import get_pinspecs
37 from soc.config.state import CoreState
38 from soc.interrupts.xics import XICS_ICP, XICS_ICS
39 from soc.bus.simple_gpio import SimpleGPIO
40 from soc.clock.select import ClockSelect, DummyPLL
41
42
43 from nmutil.util import rising_edge
44
45
46 class TestIssuerInternal(Elaboratable):
47 """TestIssuer - reads instructions from TestMemory and issues them
48
49 efficiency and speed is not the main goal here: functional correctness is.
50 """
51 def __init__(self, pspec):
52
53 # add interrupt controller?
54 self.xics = hasattr(pspec, "xics") and pspec.xics == True
55 if self.xics:
56 self.xics_icp = XICS_ICP()
57 self.xics_ics = XICS_ICS()
58 self.int_level_i = self.xics_ics.int_level_i
59
60 # add GPIO peripheral?
61 self.gpio = hasattr(pspec, "gpio") and pspec.gpio == True
62 if self.gpio:
63 self.simple_gpio = SimpleGPIO()
64 self.gpio_o = self.simple_gpio.gpio_o
65
66 # main instruction core25
67 self.core = core = NonProductionCore(pspec)
68
69 # instruction decoder. goes into Trap Record
70 pdecode = create_pdecode()
71 self.cur_state = CoreState("cur") # current state (MSR/PC/EINT)
72 self.pdecode2 = PowerDecode2(pdecode, state=self.cur_state,
73 opkls=IssuerDecode2ToOperand)
74
75 # Test Instruction memory
76 self.imem = ConfigFetchUnit(pspec).fu
77 # one-row cache of instruction read
78 self.iline = Signal(64) # one instruction line
79 self.iprev_adr = Signal(64) # previous address: if different, do read
80
81 # DMI interface
82 self.dbg = CoreDebug()
83
84 # JTAG interface
85 self.jtag_en = hasattr(pspec, "debug") and pspec.debug == 'jtag'
86 if self.jtag_en:
87 subset = {'uart', 'mtwi', 'eint', 'gpio', 'mspi0', 'mspi1',
88 'pwm', 'sd0'}#, 'sdr'}
89 self.jtag = JTAG(get_pinspecs(subset=subset))
90
91 # instruction go/monitor
92 self.pc_o = Signal(64, reset_less=True)
93 self.pc_i = Data(64, "pc_i") # set "ok" to indicate "please change me"
94 self.core_bigendian_i = Signal()
95 self.busy_o = Signal(reset_less=True)
96 self.memerr_o = Signal(reset_less=True)
97
98 # FAST regfile read /write ports for PC, MSR, DEC/TB
99 staterf = self.core.regs.rf['state']
100 self.state_r_pc = staterf.r_ports['cia'] # PC rd
101 self.state_w_pc = staterf.w_ports['d_wr1'] # PC wr
102 self.state_r_msr = staterf.r_ports['msr'] # MSR rd
103
104 # DMI interface access
105 intrf = self.core.regs.rf['int']
106 crrf = self.core.regs.rf['cr']
107 xerrf = self.core.regs.rf['xer']
108 self.int_r = intrf.r_ports['dmi'] # INT read
109 self.cr_r = crrf.r_ports['full_cr_dbg'] # CR read
110 self.xer_r = xerrf.r_ports['full_xer'] # XER read
111
112 # hack method of keeping an eye on whether branch/trap set the PC
113 self.state_nia = self.core.regs.rf['state'].w_ports['nia']
114 self.state_nia.wen.name = 'state_nia_wen'
115
116 def elaborate(self, platform):
117 m = Module()
118 comb, sync = m.d.comb, m.d.sync
119
120 m.submodules.core = core = DomainRenamer("coresync")(self.core)
121 m.submodules.imem = imem = self.imem
122 m.submodules.dbg = dbg = self.dbg
123 if self.jtag_en:
124 m.submodules.jtag = jtag = self.jtag
125 # TODO: UART2GDB mux, here, from external pin
126 # see https://bugs.libre-soc.org/show_bug.cgi?id=499
127 sync += dbg.dmi.connect_to(jtag.dmi)
128
129 cur_state = self.cur_state
130
131 # XICS interrupt handler
132 if self.xics:
133 m.submodules.xics_icp = icp = self.xics_icp
134 m.submodules.xics_ics = ics = self.xics_ics
135 comb += icp.ics_i.eq(ics.icp_o) # connect ICS to ICP
136 sync += cur_state.eint.eq(icp.core_irq_o) # connect ICP to core
137
138 # GPIO test peripheral
139 if self.gpio:
140 m.submodules.simple_gpio = simple_gpio = self.simple_gpio
141
142 # connect one GPIO output to ICS bit 15 (like in microwatt soc.vhdl)
143 if self.gpio and self.xics:
144 comb += self.int_level_i[15].eq(simple_gpio.gpio_o[0])
145
146 # instruction decoder
147 pdecode = create_pdecode()
148 m.submodules.dec2 = pdecode2 = self.pdecode2
149
150 # convenience
151 dmi, d_reg, d_cr, d_xer, = dbg.dmi, dbg.d_gpr, dbg.d_cr, dbg.d_xer
152 intrf = self.core.regs.rf['int']
153
154 # clock delay power-on reset
155 cd_por = ClockDomain(reset_less=True)
156 cd_sync = ClockDomain()
157 core_sync = ClockDomain("coresync")
158 m.domains += cd_por, cd_sync, core_sync
159
160 delay = Signal(range(4), reset=3)
161 with m.If(delay != 0):
162 m.d.por += delay.eq(delay - 1)
163 comb += cd_por.clk.eq(ClockSignal())
164 comb += core_sync.clk.eq(ClockSignal())
165 # power-on reset delay
166 comb += core.core_reset_i.eq(delay != 0 | dbg.core_rst_o)
167
168 # busy/halted signals from core
169 comb += self.busy_o.eq(core.busy_o)
170 comb += pdecode2.dec.bigendian.eq(self.core_bigendian_i)
171
172 # temporary hack: says "go" immediately for both address gen and ST
173 l0 = core.l0
174 ldst = core.fus.fus['ldst0']
175 st_go_edge = rising_edge(m, ldst.st.rel_o)
176 m.d.comb += ldst.ad.go_i.eq(ldst.ad.rel_o) # link addr-go direct to rel
177 m.d.comb += ldst.st.go_i.eq(st_go_edge) # link store-go to rising rel
178
179 # PC and instruction from I-Memory
180 pc_changed = Signal() # note write to PC
181 comb += self.pc_o.eq(cur_state.pc)
182 ilatch = Signal(32)
183
184 # next instruction (+4 on current)
185 nia = Signal(64, reset_less=True)
186 comb += nia.eq(cur_state.pc + 4)
187
188 # read the PC
189 pc = Signal(64, reset_less=True)
190 pc_ok_delay = Signal()
191 sync += pc_ok_delay.eq(~self.pc_i.ok)
192 with m.If(self.pc_i.ok):
193 # incoming override (start from pc_i)
194 comb += pc.eq(self.pc_i.data)
195 with m.Else():
196 # otherwise read StateRegs regfile for PC...
197 comb += self.state_r_pc.ren.eq(1<<StateRegs.PC)
198 # ... but on a 1-clock delay
199 with m.If(pc_ok_delay):
200 comb += pc.eq(self.state_r_pc.data_o)
201
202 # don't write pc every cycle
203 comb += self.state_w_pc.wen.eq(0)
204 comb += self.state_w_pc.data_i.eq(0)
205
206 # don't read msr every cycle
207 comb += self.state_r_msr.ren.eq(0)
208 msr_read = Signal(reset=1)
209
210 # connect up debug signals
211 # TODO comb += core.icache_rst_i.eq(dbg.icache_rst_o)
212 comb += dbg.terminate_i.eq(core.core_terminate_o)
213 comb += dbg.state.pc.eq(pc)
214 #comb += dbg.state.pc.eq(cur_state.pc)
215 comb += dbg.state.msr.eq(cur_state.msr)
216
217 # temporaries
218 core_busy_o = core.busy_o # core is busy
219 core_ivalid_i = core.ivalid_i # instruction is valid
220 core_issue_i = core.issue_i # instruction is issued
221 dec_opcode_i = pdecode2.dec.raw_opcode_in # raw opcode
222
223 insn_type = core.e.do.insn_type
224
225 # actually use a nmigen FSM for the first time (w00t)
226 # this FSM is perhaps unusual in that it detects conditions
227 # then "holds" information, combinatorially, for the core
228 # (as opposed to using sync - which would be on a clock's delay)
229 # this includes the actual opcode, valid flags and so on.
230 with m.FSM() as fsm:
231
232 # waiting (zzz)
233 with m.State("IDLE"):
234 sync += pc_changed.eq(0)
235 sync += core.e.eq(0)
236 sync += core.raw_insn_i.eq(0)
237 sync += core.bigendian_i.eq(0)
238 with m.If(~dbg.core_stop_o & ~core.core_reset_i):
239 # instruction allowed to go: start by reading the PC
240 # capture the PC and also drop it into Insn Memory
241 # we have joined a pair of combinatorial memory
242 # lookups together. this is Generally Bad.
243 comb += self.imem.a_pc_i.eq(pc)
244 comb += self.imem.a_valid_i.eq(1)
245 comb += self.imem.f_valid_i.eq(1)
246 sync += cur_state.pc.eq(pc)
247
248 # initiate read of MSR. arrives one clock later
249 comb += self.state_r_msr.ren.eq(1<<StateRegs.MSR)
250 sync += msr_read.eq(0)
251
252 m.next = "INSN_READ" # move to "wait for bus" phase
253 with m.Else():
254 comb += core.core_stopped_i.eq(1)
255 comb += dbg.core_stopped_i.eq(1)
256
257 # dummy pause to find out why simulation is not keeping up
258 with m.State("INSN_READ"):
259 # one cycle later, msr read arrives. valid only once.
260 with m.If(~msr_read):
261 sync += msr_read.eq(1) # yeah don't read it again
262 sync += cur_state.msr.eq(self.state_r_msr.data_o)
263 with m.If(self.imem.f_busy_o): # zzz...
264 # busy: stay in wait-read
265 comb += self.imem.a_valid_i.eq(1)
266 comb += self.imem.f_valid_i.eq(1)
267 with m.Else():
268 # not busy: instruction fetched
269 f_instr_o = self.imem.f_instr_o
270 if f_instr_o.width == 32:
271 insn = f_instr_o
272 else:
273 insn = f_instr_o.word_select(cur_state.pc[2], 32)
274 comb += dec_opcode_i.eq(insn) # actual opcode
275 sync += core.e.eq(pdecode2.e)
276 sync += core.state.eq(cur_state)
277 sync += core.raw_insn_i.eq(dec_opcode_i)
278 sync += core.bigendian_i.eq(self.core_bigendian_i)
279 sync += ilatch.eq(insn) # latch current insn
280 # also drop PC and MSR into decode "state"
281 m.next = "INSN_START" # move to "start"
282
283 # waiting for instruction bus (stays there until not busy)
284 with m.State("INSN_START"):
285 comb += core_ivalid_i.eq(1) # instruction is valid
286 comb += core_issue_i.eq(1) # and issued
287
288 m.next = "INSN_ACTIVE" # move to "wait completion"
289
290 # instruction started: must wait till it finishes
291 with m.State("INSN_ACTIVE"):
292 with m.If(insn_type != MicrOp.OP_NOP):
293 comb += core_ivalid_i.eq(1) # instruction is valid
294 with m.If(self.state_nia.wen & (1<<StateRegs.PC)):
295 sync += pc_changed.eq(1)
296 with m.If(~core_busy_o): # instruction done!
297 # ok here we are not reading the branch unit. TODO
298 # this just blithely overwrites whatever pipeline
299 # updated the PC
300 with m.If(~pc_changed):
301 comb += self.state_w_pc.wen.eq(1<<StateRegs.PC)
302 comb += self.state_w_pc.data_i.eq(nia)
303 sync += core.e.eq(0)
304 sync += core.raw_insn_i.eq(0)
305 sync += core.bigendian_i.eq(0)
306 m.next = "IDLE" # back to idle
307
308 # this bit doesn't have to be in the FSM: connect up to read
309 # regfiles on demand from DMI
310 with m.If(d_reg.req): # request for regfile access being made
311 # TODO: error-check this
312 # XXX should this be combinatorial? sync better?
313 if intrf.unary:
314 comb += self.int_r.ren.eq(1<<d_reg.addr)
315 else:
316 comb += self.int_r.addr.eq(d_reg.addr)
317 comb += self.int_r.ren.eq(1)
318 d_reg_delay = Signal()
319 sync += d_reg_delay.eq(d_reg.req)
320 with m.If(d_reg_delay):
321 # data arrives one clock later
322 comb += d_reg.data.eq(self.int_r.data_o)
323 comb += d_reg.ack.eq(1)
324
325 # sigh same thing for CR debug
326 with m.If(d_cr.req): # request for regfile access being made
327 comb += self.cr_r.ren.eq(0b11111111) # enable all
328 d_cr_delay = Signal()
329 sync += d_cr_delay.eq(d_cr.req)
330 with m.If(d_cr_delay):
331 # data arrives one clock later
332 comb += d_cr.data.eq(self.cr_r.data_o)
333 comb += d_cr.ack.eq(1)
334
335 # aaand XER...
336 with m.If(d_xer.req): # request for regfile access being made
337 comb += self.xer_r.ren.eq(0b111111) # enable all
338 d_xer_delay = Signal()
339 sync += d_xer_delay.eq(d_xer.req)
340 with m.If(d_xer_delay):
341 # data arrives one clock later
342 comb += d_xer.data.eq(self.xer_r.data_o)
343 comb += d_xer.ack.eq(1)
344
345 # DEC and TB inc/dec FSM
346 self.tb_dec_fsm(m, cur_state.dec)
347
348 return m
349
350 def tb_dec_fsm(self, m, spr_dec):
351 """tb_dec_fsm
352
353 this is a FSM for updating either dec or tb. it runs alternately
354 DEC, TB, DEC, TB. note that SPR pipeline could have written a new
355 value to DEC, however the regfile has "passthrough" on it so this
356 *should* be ok.
357
358 see v3.0B p1097-1099 for Timeer Resource and p1065 and p1076
359 """
360
361 comb, sync = m.d.comb, m.d.sync
362 fast_rf = self.core.regs.rf['fast']
363 fast_r_dectb = fast_rf.r_ports['issue'] # DEC/TB
364 fast_w_dectb = fast_rf.w_ports['issue'] # DEC/TB
365
366 with m.FSM() as fsm:
367
368 # initiates read of current DEC
369 with m.State("DEC_READ"):
370 comb += fast_r_dectb.addr.eq(FastRegs.DEC)
371 comb += fast_r_dectb.ren.eq(1)
372 m.next = "DEC_WRITE"
373
374 # waits for DEC read to arrive (1 cycle), updates with new value
375 with m.State("DEC_WRITE"):
376 new_dec = Signal(64)
377 # TODO: MSR.LPCR 32-bit decrement mode
378 comb += new_dec.eq(fast_r_dectb.data_o - 1)
379 comb += fast_w_dectb.addr.eq(FastRegs.DEC)
380 comb += fast_w_dectb.wen.eq(1)
381 comb += fast_w_dectb.data_i.eq(new_dec)
382 sync += spr_dec.eq(new_dec) # copy into cur_state for decoder
383 m.next = "TB_READ"
384
385 # initiates read of current TB
386 with m.State("TB_READ"):
387 comb += fast_r_dectb.addr.eq(FastRegs.TB)
388 comb += fast_r_dectb.ren.eq(1)
389 m.next = "TB_WRITE"
390
391 # waits for read TB to arrive, initiates write of current TB
392 with m.State("TB_WRITE"):
393 new_tb = Signal(64)
394 comb += new_tb.eq(fast_r_dectb.data_o + 1)
395 comb += fast_w_dectb.addr.eq(FastRegs.TB)
396 comb += fast_w_dectb.wen.eq(1)
397 comb += fast_w_dectb.data_i.eq(new_tb)
398 m.next = "DEC_READ"
399
400 return m
401
402 def __iter__(self):
403 yield from self.pc_i.ports()
404 yield self.pc_o
405 yield self.memerr_o
406 yield from self.core.ports()
407 yield from self.imem.ports()
408 yield self.core_bigendian_i
409 yield self.busy_o
410
411 def ports(self):
412 return list(self)
413
414 def external_ports(self):
415 ports = self.pc_i.ports()
416 ports += [self.pc_o, self.memerr_o, self.core_bigendian_i, self.busy_o,
417 ]
418
419 if self.jtag_en:
420 ports += list(self.jtag.external_ports())
421 else:
422 # don't add DMI if JTAG is enabled
423 ports += list(self.dbg.dmi.ports())
424
425 ports += list(self.imem.ibus.fields.values())
426 ports += list(self.core.l0.cmpi.lsmem.lsi.slavebus.fields.values())
427
428 if self.xics:
429 ports += list(self.xics_icp.bus.fields.values())
430 ports += list(self.xics_ics.bus.fields.values())
431 ports.append(self.int_level_i)
432
433 if self.gpio:
434 ports += list(self.simple_gpio.bus.fields.values())
435 ports.append(self.gpio_o)
436
437 return ports
438
439 def ports(self):
440 return list(self)
441
442
443 class TestIssuer(Elaboratable):
444 def __init__(self, pspec):
445 self.ti = TestIssuerInternal(pspec)
446 self.pll = DummyPLL()
447 self.clksel = ClockSelect()
448
449 def elaborate(self, platform):
450 m = Module()
451 comb = m.d.comb
452
453 # TestIssuer runs at internal clock rate
454 m.submodules.ti = ti = DomainRenamer("intclk")(self.ti)
455 # ClockSelect runs at PLL output internal clock rate
456 m.submodules.clksel = clksel = DomainRenamer("pllclk")(self.clksel)
457 m.submodules.pll = pll = self.pll
458
459 # add 2 clock domains established above...
460 cd_int = ClockDomain("intclk")
461 cd_pll = ClockDomain("pllclk")
462 # probably don't have to add cd_int because of DomainRenamer("coresync")
463 m.domains += cd_pll
464
465 # internal clock is set to selector clock-out. has the side-effect of
466 # running TestIssuer at this speed (see DomainRenamer("intclk") above)
467 comb += cd_int.clk.eq(clksel.core_clk_o)
468
469 # PLL clock established. has the side-effect of running clklsel
470 # at the PLL's speed (see DomainRenamer("pllclk") above)
471 comb += cd_pll.clk.eq(pll.clk_pll_o)
472
473 # wire up external 24mhz to PLL and clksel
474 comb += clksel.clk_24_i.eq(ClockSignal())
475 comb += pll.clk_24_i.eq(clksel.clk_24_i)
476
477 # now wire up ResetSignals. don't mind them all being in this domain
478 comb += pll.rst.eq(ResetSignal())
479 comb += clksel.rst.eq(ResetSignal())
480
481 return m
482
483 def ports(self):
484 return list(self.ti.ports()) + list(self.pll.ports()) + \
485 [ClockSignal(), ResetSignal()] + \
486 list(self.clksel.ports())
487
488 def external_ports(self):
489 ports = self.ti.external_ports()
490 #ports.append(ClockSignal())
491 #ports.append(ResetSignal())
492 ports.append(self.clksel.clk_sel_i)
493 ports.append(self.clksel.pll_48_o)
494 return ports
495
496
497 if __name__ == '__main__':
498 units = {'alu': 1, 'cr': 1, 'branch': 1, 'trap': 1, 'logical': 1,
499 'spr': 1,
500 'div': 1,
501 'mul': 1,
502 'shiftrot': 1
503 }
504 pspec = TestMemPspec(ldst_ifacetype='bare_wb',
505 imem_ifacetype='bare_wb',
506 addr_wid=48,
507 mask_wid=8,
508 reg_wid=64,
509 units=units)
510 dut = TestIssuer(pspec)
511 vl = main(dut, ports=dut.ports(), name="test_issuer")
512
513 if len(sys.argv) == 1:
514 vl = rtlil.convert(dut, ports=dut.external_ports(), name="test_issuer")
515 with open("test_issuer.il", "w") as f:
516 f.write(vl)