27afb68ed4546c2751f87f35db7373f845c7464b
[soc.git] / src / soc / simple / issuer.py
1 """simple core issuer
2
3 not in any way intended for production use. this runs a FSM that:
4
5 * reads the Program Counter from StateRegs
6 * reads an instruction from a fixed-size Test Memory
7 * issues it to the Simple Core
8 * waits for it to complete
9 * increments the PC
10 * does it all over again
11
12 the purpose of this module is to verify the functional correctness
13 of the Function Units in the absolute simplest and clearest possible
14 way, and to at provide something that can be further incrementally
15 improved.
16 """
17
18 from nmigen import (Elaboratable, Module, Signal, ClockSignal, ResetSignal,
19 ClockDomain, DomainRenamer)
20 from nmigen.cli import rtlil
21 from nmigen.cli import main
22 import sys
23
24 from soc.decoder.power_decoder import create_pdecode
25 from soc.decoder.power_decoder2 import PowerDecode2
26 from soc.decoder.decode2execute1 import IssuerDecode2ToOperand
27 from soc.decoder.decode2execute1 import Data
28 from soc.experiment.testmem import TestMemory # test only for instructions
29 from soc.regfile.regfiles import StateRegs, FastRegs
30 from soc.simple.core import NonProductionCore
31 from soc.config.test.test_loadstore import TestMemPspec
32 from soc.config.ifetch import ConfigFetchUnit
33 from soc.decoder.power_enums import MicrOp
34 from soc.debug.dmi import CoreDebug, DMIInterface
35 from soc.debug.jtag import JTAG
36 from soc.config.pinouts import get_pinspecs
37 from soc.config.state import CoreState
38 from soc.interrupts.xics import XICS_ICP, XICS_ICS
39 from soc.bus.simple_gpio import SimpleGPIO
40 from soc.clock.select import ClockSelect, DummyPLL
41
42
43 from nmutil.util import rising_edge
44
45
46 class TestIssuerInternal(Elaboratable):
47 """TestIssuer - reads instructions from TestMemory and issues them
48
49 efficiency and speed is not the main goal here: functional correctness is.
50 """
51 def __init__(self, pspec):
52
53 # JTAG interface. add this right at the start because if it's
54 # added it *modifies* the pspec, by adding enable/disable signals
55 # for parts of the rest of the core
56 self.jtag_en = hasattr(pspec, "debug") and pspec.debug == 'jtag'
57 if self.jtag_en:
58 subset = {'uart', 'mtwi', 'eint', 'gpio', 'mspi0', 'mspi1',
59 'pwm', 'sd0', 'sdr'}
60 self.jtag = JTAG(get_pinspecs(subset=subset))
61 # add signals to pspec to enable/disable icache and dcache
62 # (or data and intstruction wishbone if icache/dcache not included)
63 # https://bugs.libre-soc.org/show_bug.cgi?id=520
64 # TODO: do we actually care if these are not domain-synchronised?
65 # honestly probably not.
66 pspec.wb_icache_en = self.jtag.wb_icache_en
67 pspec.wb_dcache_en = self.jtag.wb_dcache_en
68
69 # add interrupt controller?
70 self.xics = hasattr(pspec, "xics") and pspec.xics == True
71 if self.xics:
72 self.xics_icp = XICS_ICP()
73 self.xics_ics = XICS_ICS()
74 self.int_level_i = self.xics_ics.int_level_i
75
76 # add GPIO peripheral?
77 self.gpio = hasattr(pspec, "gpio") and pspec.gpio == True
78 if self.gpio:
79 self.simple_gpio = SimpleGPIO()
80 self.gpio_o = self.simple_gpio.gpio_o
81
82 # main instruction core25
83 self.core = core = NonProductionCore(pspec)
84
85 # instruction decoder. goes into Trap Record
86 pdecode = create_pdecode()
87 self.cur_state = CoreState("cur") # current state (MSR/PC/EINT)
88 self.pdecode2 = PowerDecode2(pdecode, state=self.cur_state,
89 opkls=IssuerDecode2ToOperand)
90
91 # Test Instruction memory
92 self.imem = ConfigFetchUnit(pspec).fu
93 # one-row cache of instruction read
94 self.iline = Signal(64) # one instruction line
95 self.iprev_adr = Signal(64) # previous address: if different, do read
96
97 # DMI interface
98 self.dbg = CoreDebug()
99
100 # instruction go/monitor
101 self.pc_o = Signal(64, reset_less=True)
102 self.pc_i = Data(64, "pc_i") # set "ok" to indicate "please change me"
103 self.core_bigendian_i = Signal()
104 self.busy_o = Signal(reset_less=True)
105 self.memerr_o = Signal(reset_less=True)
106
107 # FAST regfile read /write ports for PC, MSR, DEC/TB
108 staterf = self.core.regs.rf['state']
109 self.state_r_pc = staterf.r_ports['cia'] # PC rd
110 self.state_w_pc = staterf.w_ports['d_wr1'] # PC wr
111 self.state_r_msr = staterf.r_ports['msr'] # MSR rd
112
113 # DMI interface access
114 intrf = self.core.regs.rf['int']
115 crrf = self.core.regs.rf['cr']
116 xerrf = self.core.regs.rf['xer']
117 self.int_r = intrf.r_ports['dmi'] # INT read
118 self.cr_r = crrf.r_ports['full_cr_dbg'] # CR read
119 self.xer_r = xerrf.r_ports['full_xer'] # XER read
120
121 # hack method of keeping an eye on whether branch/trap set the PC
122 self.state_nia = self.core.regs.rf['state'].w_ports['nia']
123 self.state_nia.wen.name = 'state_nia_wen'
124
125 def elaborate(self, platform):
126 m = Module()
127 comb, sync = m.d.comb, m.d.sync
128
129 m.submodules.core = core = DomainRenamer("coresync")(self.core)
130 m.submodules.imem = imem = self.imem
131 m.submodules.dbg = dbg = self.dbg
132 if self.jtag_en:
133 m.submodules.jtag = jtag = self.jtag
134 # TODO: UART2GDB mux, here, from external pin
135 # see https://bugs.libre-soc.org/show_bug.cgi?id=499
136 sync += dbg.dmi.connect_to(jtag.dmi)
137
138 cur_state = self.cur_state
139
140 # XICS interrupt handler
141 if self.xics:
142 m.submodules.xics_icp = icp = self.xics_icp
143 m.submodules.xics_ics = ics = self.xics_ics
144 comb += icp.ics_i.eq(ics.icp_o) # connect ICS to ICP
145 sync += cur_state.eint.eq(icp.core_irq_o) # connect ICP to core
146
147 # GPIO test peripheral
148 if self.gpio:
149 m.submodules.simple_gpio = simple_gpio = self.simple_gpio
150
151 # connect one GPIO output to ICS bit 15 (like in microwatt soc.vhdl)
152 # XXX causes litex ECP5 test to get wrong idea about input and output
153 # (but works with verilator sim *sigh*)
154 #if self.gpio and self.xics:
155 # comb += self.int_level_i[15].eq(simple_gpio.gpio_o[0])
156
157 # instruction decoder
158 pdecode = create_pdecode()
159 m.submodules.dec2 = pdecode2 = self.pdecode2
160
161 # convenience
162 dmi, d_reg, d_cr, d_xer, = dbg.dmi, dbg.d_gpr, dbg.d_cr, dbg.d_xer
163 intrf = self.core.regs.rf['int']
164
165 # clock delay power-on reset
166 cd_por = ClockDomain(reset_less=True)
167 cd_sync = ClockDomain()
168 core_sync = ClockDomain("coresync")
169 m.domains += cd_por, cd_sync, core_sync
170
171 ti_rst = Signal(reset_less=True)
172 delay = Signal(range(4), reset=3)
173 with m.If(delay != 0):
174 m.d.por += delay.eq(delay - 1)
175 comb += cd_por.clk.eq(ClockSignal())
176
177 # power-on reset delay
178 core_rst = ResetSignal("coresync")
179 comb += ti_rst.eq(delay != 0 | dbg.core_rst_o | ResetSignal())
180 comb += core_rst.eq(ti_rst)
181
182 # busy/halted signals from core
183 comb += self.busy_o.eq(core.busy_o)
184 comb += pdecode2.dec.bigendian.eq(self.core_bigendian_i)
185
186 # temporary hack: says "go" immediately for both address gen and ST
187 l0 = core.l0
188 ldst = core.fus.fus['ldst0']
189 st_go_edge = rising_edge(m, ldst.st.rel_o)
190 m.d.comb += ldst.ad.go_i.eq(ldst.ad.rel_o) # link addr-go direct to rel
191 m.d.comb += ldst.st.go_i.eq(st_go_edge) # link store-go to rising rel
192
193 # PC and instruction from I-Memory
194 pc_changed = Signal() # note write to PC
195 comb += self.pc_o.eq(cur_state.pc)
196 ilatch = Signal(32)
197
198 # next instruction (+4 on current)
199 nia = Signal(64, reset_less=True)
200 comb += nia.eq(cur_state.pc + 4)
201
202 # read the PC
203 pc = Signal(64, reset_less=True)
204 pc_ok_delay = Signal()
205 sync += pc_ok_delay.eq(~self.pc_i.ok)
206 with m.If(self.pc_i.ok):
207 # incoming override (start from pc_i)
208 comb += pc.eq(self.pc_i.data)
209 with m.Else():
210 # otherwise read StateRegs regfile for PC...
211 comb += self.state_r_pc.ren.eq(1<<StateRegs.PC)
212 # ... but on a 1-clock delay
213 with m.If(pc_ok_delay):
214 comb += pc.eq(self.state_r_pc.data_o)
215
216 # don't write pc every cycle
217 comb += self.state_w_pc.wen.eq(0)
218 comb += self.state_w_pc.data_i.eq(0)
219
220 # don't read msr every cycle
221 comb += self.state_r_msr.ren.eq(0)
222 msr_read = Signal(reset=1)
223
224 # connect up debug signals
225 # TODO comb += core.icache_rst_i.eq(dbg.icache_rst_o)
226 comb += dbg.terminate_i.eq(core.core_terminate_o)
227 comb += dbg.state.pc.eq(pc)
228 #comb += dbg.state.pc.eq(cur_state.pc)
229 comb += dbg.state.msr.eq(cur_state.msr)
230
231 # temporaries
232 core_busy_o = core.busy_o # core is busy
233 core_ivalid_i = core.ivalid_i # instruction is valid
234 core_issue_i = core.issue_i # instruction is issued
235 dec_opcode_i = pdecode2.dec.raw_opcode_in # raw opcode
236
237 insn_type = core.e.do.insn_type
238
239 # actually use a nmigen FSM for the first time (w00t)
240 # this FSM is perhaps unusual in that it detects conditions
241 # then "holds" information, combinatorially, for the core
242 # (as opposed to using sync - which would be on a clock's delay)
243 # this includes the actual opcode, valid flags and so on.
244 with m.FSM() as fsm:
245
246 # waiting (zzz)
247 with m.State("IDLE"):
248 sync += pc_changed.eq(0)
249 sync += core.e.eq(0)
250 sync += core.raw_insn_i.eq(0)
251 sync += core.bigendian_i.eq(0)
252 with m.If(~dbg.core_stop_o & ~core_rst):
253 # instruction allowed to go: start by reading the PC
254 # capture the PC and also drop it into Insn Memory
255 # we have joined a pair of combinatorial memory
256 # lookups together. this is Generally Bad.
257 comb += self.imem.a_pc_i.eq(pc)
258 comb += self.imem.a_valid_i.eq(1)
259 comb += self.imem.f_valid_i.eq(1)
260 sync += cur_state.pc.eq(pc)
261
262 # initiate read of MSR. arrives one clock later
263 comb += self.state_r_msr.ren.eq(1<<StateRegs.MSR)
264 sync += msr_read.eq(0)
265
266 m.next = "INSN_READ" # move to "wait for bus" phase
267 with m.Else():
268 comb += core.core_stopped_i.eq(1)
269 comb += dbg.core_stopped_i.eq(1)
270
271 # dummy pause to find out why simulation is not keeping up
272 with m.State("INSN_READ"):
273 # one cycle later, msr read arrives. valid only once.
274 with m.If(~msr_read):
275 sync += msr_read.eq(1) # yeah don't read it again
276 sync += cur_state.msr.eq(self.state_r_msr.data_o)
277 with m.If(self.imem.f_busy_o): # zzz...
278 # busy: stay in wait-read
279 comb += self.imem.a_valid_i.eq(1)
280 comb += self.imem.f_valid_i.eq(1)
281 with m.Else():
282 # not busy: instruction fetched
283 f_instr_o = self.imem.f_instr_o
284 if f_instr_o.width == 32:
285 insn = f_instr_o
286 else:
287 insn = f_instr_o.word_select(cur_state.pc[2], 32)
288 comb += dec_opcode_i.eq(insn) # actual opcode
289 sync += core.e.eq(pdecode2.e)
290 sync += core.state.eq(cur_state)
291 sync += core.raw_insn_i.eq(dec_opcode_i)
292 sync += core.bigendian_i.eq(self.core_bigendian_i)
293 sync += ilatch.eq(insn) # latch current insn
294 # also drop PC and MSR into decode "state"
295 m.next = "INSN_START" # move to "start"
296
297 # waiting for instruction bus (stays there until not busy)
298 with m.State("INSN_START"):
299 comb += core_ivalid_i.eq(1) # instruction is valid
300 comb += core_issue_i.eq(1) # and issued
301
302 m.next = "INSN_ACTIVE" # move to "wait completion"
303
304 # instruction started: must wait till it finishes
305 with m.State("INSN_ACTIVE"):
306 with m.If(insn_type != MicrOp.OP_NOP):
307 comb += core_ivalid_i.eq(1) # instruction is valid
308 with m.If(self.state_nia.wen & (1<<StateRegs.PC)):
309 sync += pc_changed.eq(1)
310 with m.If(~core_busy_o): # instruction done!
311 # ok here we are not reading the branch unit. TODO
312 # this just blithely overwrites whatever pipeline
313 # updated the PC
314 with m.If(~pc_changed):
315 comb += self.state_w_pc.wen.eq(1<<StateRegs.PC)
316 comb += self.state_w_pc.data_i.eq(nia)
317 sync += core.e.eq(0)
318 sync += core.raw_insn_i.eq(0)
319 sync += core.bigendian_i.eq(0)
320 m.next = "IDLE" # back to idle
321
322 # this bit doesn't have to be in the FSM: connect up to read
323 # regfiles on demand from DMI
324 with m.If(d_reg.req): # request for regfile access being made
325 # TODO: error-check this
326 # XXX should this be combinatorial? sync better?
327 if intrf.unary:
328 comb += self.int_r.ren.eq(1<<d_reg.addr)
329 else:
330 comb += self.int_r.addr.eq(d_reg.addr)
331 comb += self.int_r.ren.eq(1)
332 d_reg_delay = Signal()
333 sync += d_reg_delay.eq(d_reg.req)
334 with m.If(d_reg_delay):
335 # data arrives one clock later
336 comb += d_reg.data.eq(self.int_r.data_o)
337 comb += d_reg.ack.eq(1)
338
339 # sigh same thing for CR debug
340 with m.If(d_cr.req): # request for regfile access being made
341 comb += self.cr_r.ren.eq(0b11111111) # enable all
342 d_cr_delay = Signal()
343 sync += d_cr_delay.eq(d_cr.req)
344 with m.If(d_cr_delay):
345 # data arrives one clock later
346 comb += d_cr.data.eq(self.cr_r.data_o)
347 comb += d_cr.ack.eq(1)
348
349 # aaand XER...
350 with m.If(d_xer.req): # request for regfile access being made
351 comb += self.xer_r.ren.eq(0b111111) # enable all
352 d_xer_delay = Signal()
353 sync += d_xer_delay.eq(d_xer.req)
354 with m.If(d_xer_delay):
355 # data arrives one clock later
356 comb += d_xer.data.eq(self.xer_r.data_o)
357 comb += d_xer.ack.eq(1)
358
359 # DEC and TB inc/dec FSM
360 self.tb_dec_fsm(m, cur_state.dec)
361
362 return m
363
364 def tb_dec_fsm(self, m, spr_dec):
365 """tb_dec_fsm
366
367 this is a FSM for updating either dec or tb. it runs alternately
368 DEC, TB, DEC, TB. note that SPR pipeline could have written a new
369 value to DEC, however the regfile has "passthrough" on it so this
370 *should* be ok.
371
372 see v3.0B p1097-1099 for Timeer Resource and p1065 and p1076
373 """
374
375 comb, sync = m.d.comb, m.d.sync
376 fast_rf = self.core.regs.rf['fast']
377 fast_r_dectb = fast_rf.r_ports['issue'] # DEC/TB
378 fast_w_dectb = fast_rf.w_ports['issue'] # DEC/TB
379
380 with m.FSM() as fsm:
381
382 # initiates read of current DEC
383 with m.State("DEC_READ"):
384 comb += fast_r_dectb.addr.eq(FastRegs.DEC)
385 comb += fast_r_dectb.ren.eq(1)
386 m.next = "DEC_WRITE"
387
388 # waits for DEC read to arrive (1 cycle), updates with new value
389 with m.State("DEC_WRITE"):
390 new_dec = Signal(64)
391 # TODO: MSR.LPCR 32-bit decrement mode
392 comb += new_dec.eq(fast_r_dectb.data_o - 1)
393 comb += fast_w_dectb.addr.eq(FastRegs.DEC)
394 comb += fast_w_dectb.wen.eq(1)
395 comb += fast_w_dectb.data_i.eq(new_dec)
396 sync += spr_dec.eq(new_dec) # copy into cur_state for decoder
397 m.next = "TB_READ"
398
399 # initiates read of current TB
400 with m.State("TB_READ"):
401 comb += fast_r_dectb.addr.eq(FastRegs.TB)
402 comb += fast_r_dectb.ren.eq(1)
403 m.next = "TB_WRITE"
404
405 # waits for read TB to arrive, initiates write of current TB
406 with m.State("TB_WRITE"):
407 new_tb = Signal(64)
408 comb += new_tb.eq(fast_r_dectb.data_o + 1)
409 comb += fast_w_dectb.addr.eq(FastRegs.TB)
410 comb += fast_w_dectb.wen.eq(1)
411 comb += fast_w_dectb.data_i.eq(new_tb)
412 m.next = "DEC_READ"
413
414 return m
415
416 def __iter__(self):
417 yield from self.pc_i.ports()
418 yield self.pc_o
419 yield self.memerr_o
420 yield from self.core.ports()
421 yield from self.imem.ports()
422 yield self.core_bigendian_i
423 yield self.busy_o
424
425 def ports(self):
426 return list(self)
427
428 def external_ports(self):
429 ports = self.pc_i.ports()
430 ports += [self.pc_o, self.memerr_o, self.core_bigendian_i, self.busy_o,
431 ]
432
433 if self.jtag_en:
434 ports += list(self.jtag.external_ports())
435 else:
436 # don't add DMI if JTAG is enabled
437 ports += list(self.dbg.dmi.ports())
438
439 ports += list(self.imem.ibus.fields.values())
440 ports += list(self.core.l0.cmpi.lsmem.lsi.slavebus.fields.values())
441
442 if self.xics:
443 ports += list(self.xics_icp.bus.fields.values())
444 ports += list(self.xics_ics.bus.fields.values())
445 ports.append(self.int_level_i)
446
447 if self.gpio:
448 ports += list(self.simple_gpio.bus.fields.values())
449 ports.append(self.gpio_o)
450
451 return ports
452
453 def ports(self):
454 return list(self)
455
456
457 class TestIssuer(Elaboratable):
458 def __init__(self, pspec):
459 self.ti = TestIssuerInternal(pspec)
460
461 self.pll = DummyPLL()
462 self.clksel = ClockSelect()
463
464 # PLL direct clock or not
465 self.pll_en = hasattr(pspec, "use_pll") and pspec.use_pll
466
467 def elaborate(self, platform):
468 m = Module()
469 comb = m.d.comb
470
471 # TestIssuer runs at direct clock
472 m.submodules.ti = ti = self.ti
473 cd_int = ClockDomain("coresync")
474
475 # ClockSelect runs at PLL output internal clock rate
476 m.submodules.clksel = clksel = DomainRenamer("pllclk")(self.clksel)
477 m.submodules.pll = pll = self.pll
478
479 # add 2 clock domains established above...
480 cd_pll = ClockDomain("pllclk")
481 m.domains += cd_pll
482
483 # internal clock is set to selector clock-out. has the side-effect of
484 # running TestIssuer at this speed (see DomainRenamer("intclk") above)
485 intclk = ClockSignal("coresync")
486 if self.pll_en:
487 comb += intclk.eq(clksel.core_clk_o)
488 else:
489 comb += intclk.eq(ClockSignal())
490
491 # PLL clock established. has the side-effect of running clklsel
492 # at the PLL's speed (see DomainRenamer("pllclk") above)
493 pllclk = ClockSignal("pllclk")
494 comb += pllclk.eq(pll.clk_pll_o)
495
496 # wire up external 24mhz to PLL and clksel
497 comb += clksel.clk_24_i.eq(ClockSignal())
498 comb += pll.clk_24_i.eq(clksel.clk_24_i)
499
500 # now wire up ResetSignals. don't mind them all being in this domain
501 #int_rst = ResetSignal("coresync")
502 pll_rst = ResetSignal("pllclk")
503 #comb += int_rst.eq(ResetSignal())
504 comb += pll_rst.eq(ResetSignal())
505
506 return m
507
508 def ports(self):
509 return list(self.ti.ports()) + list(self.pll.ports()) + \
510 [ClockSignal(), ResetSignal()] + \
511 list(self.clksel.ports())
512
513 def external_ports(self):
514 ports = self.ti.external_ports()
515 ports.append(ClockSignal())
516 ports.append(ResetSignal())
517 ports.append(self.clksel.clk_sel_i)
518 ports.append(self.clksel.pll_48_o)
519 return ports
520
521
522 if __name__ == '__main__':
523 units = {'alu': 1, 'cr': 1, 'branch': 1, 'trap': 1, 'logical': 1,
524 'spr': 1,
525 'div': 1,
526 'mul': 1,
527 'shiftrot': 1
528 }
529 pspec = TestMemPspec(ldst_ifacetype='bare_wb',
530 imem_ifacetype='bare_wb',
531 addr_wid=48,
532 mask_wid=8,
533 reg_wid=64,
534 units=units)
535 dut = TestIssuer(pspec)
536 vl = main(dut, ports=dut.ports(), name="test_issuer")
537
538 if len(sys.argv) == 1:
539 vl = rtlil.convert(dut, ports=dut.external_ports(), name="test_issuer")
540 with open("test_issuer.il", "w") as f:
541 f.write(vl)