reorder / reorganise reset signals slightly
[soc.git] / src / soc / simple / issuer.py
1 """simple core issuer
2
3 not in any way intended for production use. this runs a FSM that:
4
5 * reads the Program Counter from StateRegs
6 * reads an instruction from a fixed-size Test Memory
7 * issues it to the Simple Core
8 * waits for it to complete
9 * increments the PC
10 * does it all over again
11
12 the purpose of this module is to verify the functional correctness
13 of the Function Units in the absolute simplest and clearest possible
14 way, and to at provide something that can be further incrementally
15 improved.
16 """
17
18 from nmigen import (Elaboratable, Module, Signal, ClockSignal, ResetSignal,
19 ClockDomain, DomainRenamer)
20 from nmigen.cli import rtlil
21 from nmigen.cli import main
22 import sys
23
24 from soc.decoder.power_decoder import create_pdecode
25 from soc.decoder.power_decoder2 import PowerDecode2
26 from soc.decoder.decode2execute1 import IssuerDecode2ToOperand
27 from soc.decoder.decode2execute1 import Data
28 from soc.experiment.testmem import TestMemory # test only for instructions
29 from soc.regfile.regfiles import StateRegs, FastRegs
30 from soc.simple.core import NonProductionCore
31 from soc.config.test.test_loadstore import TestMemPspec
32 from soc.config.ifetch import ConfigFetchUnit
33 from soc.decoder.power_enums import MicrOp
34 from soc.debug.dmi import CoreDebug, DMIInterface
35 from soc.debug.jtag import JTAG
36 from soc.config.pinouts import get_pinspecs
37 from soc.config.state import CoreState
38 from soc.interrupts.xics import XICS_ICP, XICS_ICS
39 from soc.bus.simple_gpio import SimpleGPIO
40 from soc.clock.select import ClockSelect, DummyPLL
41
42
43 from nmutil.util import rising_edge
44
45
46 class TestIssuerInternal(Elaboratable):
47 """TestIssuer - reads instructions from TestMemory and issues them
48
49 efficiency and speed is not the main goal here: functional correctness is.
50 """
51 def __init__(self, pspec):
52
53 # add interrupt controller?
54 self.xics = hasattr(pspec, "xics") and pspec.xics == True
55 if self.xics:
56 self.xics_icp = XICS_ICP()
57 self.xics_ics = XICS_ICS()
58 self.int_level_i = self.xics_ics.int_level_i
59
60 # add GPIO peripheral?
61 self.gpio = hasattr(pspec, "gpio") and pspec.gpio == True
62 if self.gpio:
63 self.simple_gpio = SimpleGPIO()
64 self.gpio_o = self.simple_gpio.gpio_o
65
66 # main instruction core25
67 self.core = core = NonProductionCore(pspec)
68
69 # instruction decoder. goes into Trap Record
70 pdecode = create_pdecode()
71 self.cur_state = CoreState("cur") # current state (MSR/PC/EINT)
72 self.pdecode2 = PowerDecode2(pdecode, state=self.cur_state,
73 opkls=IssuerDecode2ToOperand)
74
75 # Test Instruction memory
76 self.imem = ConfigFetchUnit(pspec).fu
77 # one-row cache of instruction read
78 self.iline = Signal(64) # one instruction line
79 self.iprev_adr = Signal(64) # previous address: if different, do read
80
81 # DMI interface
82 self.dbg = CoreDebug()
83
84 # JTAG interface
85 self.jtag_en = hasattr(pspec, "debug") and pspec.debug == 'jtag'
86 if self.jtag_en:
87 subset = {'uart', 'mtwi', 'eint', 'gpio', 'mspi0', 'mspi1',
88 'pwm', 'sd0', 'sdr'}
89 self.jtag = JTAG(get_pinspecs(subset=subset))
90
91 # instruction go/monitor
92 self.pc_o = Signal(64, reset_less=True)
93 self.pc_i = Data(64, "pc_i") # set "ok" to indicate "please change me"
94 self.core_bigendian_i = Signal()
95 self.busy_o = Signal(reset_less=True)
96 self.memerr_o = Signal(reset_less=True)
97
98 # FAST regfile read /write ports for PC, MSR, DEC/TB
99 staterf = self.core.regs.rf['state']
100 self.state_r_pc = staterf.r_ports['cia'] # PC rd
101 self.state_w_pc = staterf.w_ports['d_wr1'] # PC wr
102 self.state_r_msr = staterf.r_ports['msr'] # MSR rd
103
104 # DMI interface access
105 intrf = self.core.regs.rf['int']
106 crrf = self.core.regs.rf['cr']
107 xerrf = self.core.regs.rf['xer']
108 self.int_r = intrf.r_ports['dmi'] # INT read
109 self.cr_r = crrf.r_ports['full_cr_dbg'] # CR read
110 self.xer_r = xerrf.r_ports['full_xer'] # XER read
111
112 # hack method of keeping an eye on whether branch/trap set the PC
113 self.state_nia = self.core.regs.rf['state'].w_ports['nia']
114 self.state_nia.wen.name = 'state_nia_wen'
115
116 def elaborate(self, platform):
117 m = Module()
118 comb, sync = m.d.comb, m.d.sync
119
120 m.submodules.core = core = DomainRenamer("coresync")(self.core)
121 m.submodules.imem = imem = self.imem
122 m.submodules.dbg = dbg = self.dbg
123 if self.jtag_en:
124 m.submodules.jtag = jtag = self.jtag
125 # TODO: UART2GDB mux, here, from external pin
126 # see https://bugs.libre-soc.org/show_bug.cgi?id=499
127 sync += dbg.dmi.connect_to(jtag.dmi)
128
129 cur_state = self.cur_state
130
131 # XICS interrupt handler
132 if self.xics:
133 m.submodules.xics_icp = icp = self.xics_icp
134 m.submodules.xics_ics = ics = self.xics_ics
135 comb += icp.ics_i.eq(ics.icp_o) # connect ICS to ICP
136 sync += cur_state.eint.eq(icp.core_irq_o) # connect ICP to core
137
138 # GPIO test peripheral
139 if self.gpio:
140 m.submodules.simple_gpio = simple_gpio = self.simple_gpio
141
142 # connect one GPIO output to ICS bit 15 (like in microwatt soc.vhdl)
143 if self.gpio and self.xics:
144 comb += self.int_level_i[15].eq(simple_gpio.gpio_o[0])
145
146 # instruction decoder
147 pdecode = create_pdecode()
148 m.submodules.dec2 = pdecode2 = self.pdecode2
149
150 # convenience
151 dmi, d_reg, d_cr, d_xer, = dbg.dmi, dbg.d_gpr, dbg.d_cr, dbg.d_xer
152 intrf = self.core.regs.rf['int']
153
154 # clock delay power-on reset
155 cd_por = ClockDomain(reset_less=True)
156 cd_sync = ClockDomain()
157 core_sync = ClockDomain("coresync")
158 m.domains += cd_por, cd_sync, core_sync
159
160 ti_rst = Signal(reset_less=True)
161 delay = Signal(range(4), reset=3)
162 with m.If(delay != 0):
163 m.d.por += delay.eq(delay - 1)
164 comb += cd_por.clk.eq(ClockSignal())
165 comb += core_sync.clk.eq(ClockSignal())
166
167 # power-on reset delay
168 core_rst = ResetSignal("coresync")
169 comb += ti_rst.eq(delay != 0 | dbg.core_rst_o | ResetSignal())
170 comb += core_rst.eq(ti_rst)
171
172 # busy/halted signals from core
173 comb += self.busy_o.eq(core.busy_o)
174 comb += pdecode2.dec.bigendian.eq(self.core_bigendian_i)
175
176 # temporary hack: says "go" immediately for both address gen and ST
177 l0 = core.l0
178 ldst = core.fus.fus['ldst0']
179 st_go_edge = rising_edge(m, ldst.st.rel_o)
180 m.d.comb += ldst.ad.go_i.eq(ldst.ad.rel_o) # link addr-go direct to rel
181 m.d.comb += ldst.st.go_i.eq(st_go_edge) # link store-go to rising rel
182
183 # PC and instruction from I-Memory
184 pc_changed = Signal() # note write to PC
185 comb += self.pc_o.eq(cur_state.pc)
186 ilatch = Signal(32)
187
188 # next instruction (+4 on current)
189 nia = Signal(64, reset_less=True)
190 comb += nia.eq(cur_state.pc + 4)
191
192 # read the PC
193 pc = Signal(64, reset_less=True)
194 pc_ok_delay = Signal()
195 sync += pc_ok_delay.eq(~self.pc_i.ok)
196 with m.If(self.pc_i.ok):
197 # incoming override (start from pc_i)
198 comb += pc.eq(self.pc_i.data)
199 with m.Else():
200 # otherwise read StateRegs regfile for PC...
201 comb += self.state_r_pc.ren.eq(1<<StateRegs.PC)
202 # ... but on a 1-clock delay
203 with m.If(pc_ok_delay):
204 comb += pc.eq(self.state_r_pc.data_o)
205
206 # don't write pc every cycle
207 comb += self.state_w_pc.wen.eq(0)
208 comb += self.state_w_pc.data_i.eq(0)
209
210 # don't read msr every cycle
211 comb += self.state_r_msr.ren.eq(0)
212 msr_read = Signal(reset=1)
213
214 # connect up debug signals
215 # TODO comb += core.icache_rst_i.eq(dbg.icache_rst_o)
216 comb += dbg.terminate_i.eq(core.core_terminate_o)
217 comb += dbg.state.pc.eq(pc)
218 #comb += dbg.state.pc.eq(cur_state.pc)
219 comb += dbg.state.msr.eq(cur_state.msr)
220
221 # temporaries
222 core_busy_o = core.busy_o # core is busy
223 core_ivalid_i = core.ivalid_i # instruction is valid
224 core_issue_i = core.issue_i # instruction is issued
225 dec_opcode_i = pdecode2.dec.raw_opcode_in # raw opcode
226
227 insn_type = core.e.do.insn_type
228
229 # actually use a nmigen FSM for the first time (w00t)
230 # this FSM is perhaps unusual in that it detects conditions
231 # then "holds" information, combinatorially, for the core
232 # (as opposed to using sync - which would be on a clock's delay)
233 # this includes the actual opcode, valid flags and so on.
234 with m.FSM() as fsm:
235
236 # waiting (zzz)
237 with m.State("IDLE"):
238 sync += pc_changed.eq(0)
239 sync += core.e.eq(0)
240 sync += core.raw_insn_i.eq(0)
241 sync += core.bigendian_i.eq(0)
242 with m.If(~dbg.core_stop_o & ~core_rst):
243 # instruction allowed to go: start by reading the PC
244 # capture the PC and also drop it into Insn Memory
245 # we have joined a pair of combinatorial memory
246 # lookups together. this is Generally Bad.
247 comb += self.imem.a_pc_i.eq(pc)
248 comb += self.imem.a_valid_i.eq(1)
249 comb += self.imem.f_valid_i.eq(1)
250 sync += cur_state.pc.eq(pc)
251
252 # initiate read of MSR. arrives one clock later
253 comb += self.state_r_msr.ren.eq(1<<StateRegs.MSR)
254 sync += msr_read.eq(0)
255
256 m.next = "INSN_READ" # move to "wait for bus" phase
257 with m.Else():
258 comb += core.core_stopped_i.eq(1)
259 comb += dbg.core_stopped_i.eq(1)
260
261 # dummy pause to find out why simulation is not keeping up
262 with m.State("INSN_READ"):
263 # one cycle later, msr read arrives. valid only once.
264 with m.If(~msr_read):
265 sync += msr_read.eq(1) # yeah don't read it again
266 sync += cur_state.msr.eq(self.state_r_msr.data_o)
267 with m.If(self.imem.f_busy_o): # zzz...
268 # busy: stay in wait-read
269 comb += self.imem.a_valid_i.eq(1)
270 comb += self.imem.f_valid_i.eq(1)
271 with m.Else():
272 # not busy: instruction fetched
273 f_instr_o = self.imem.f_instr_o
274 if f_instr_o.width == 32:
275 insn = f_instr_o
276 else:
277 insn = f_instr_o.word_select(cur_state.pc[2], 32)
278 comb += dec_opcode_i.eq(insn) # actual opcode
279 sync += core.e.eq(pdecode2.e)
280 sync += core.state.eq(cur_state)
281 sync += core.raw_insn_i.eq(dec_opcode_i)
282 sync += core.bigendian_i.eq(self.core_bigendian_i)
283 sync += ilatch.eq(insn) # latch current insn
284 # also drop PC and MSR into decode "state"
285 m.next = "INSN_START" # move to "start"
286
287 # waiting for instruction bus (stays there until not busy)
288 with m.State("INSN_START"):
289 comb += core_ivalid_i.eq(1) # instruction is valid
290 comb += core_issue_i.eq(1) # and issued
291
292 m.next = "INSN_ACTIVE" # move to "wait completion"
293
294 # instruction started: must wait till it finishes
295 with m.State("INSN_ACTIVE"):
296 with m.If(insn_type != MicrOp.OP_NOP):
297 comb += core_ivalid_i.eq(1) # instruction is valid
298 with m.If(self.state_nia.wen & (1<<StateRegs.PC)):
299 sync += pc_changed.eq(1)
300 with m.If(~core_busy_o): # instruction done!
301 # ok here we are not reading the branch unit. TODO
302 # this just blithely overwrites whatever pipeline
303 # updated the PC
304 with m.If(~pc_changed):
305 comb += self.state_w_pc.wen.eq(1<<StateRegs.PC)
306 comb += self.state_w_pc.data_i.eq(nia)
307 sync += core.e.eq(0)
308 sync += core.raw_insn_i.eq(0)
309 sync += core.bigendian_i.eq(0)
310 m.next = "IDLE" # back to idle
311
312 # this bit doesn't have to be in the FSM: connect up to read
313 # regfiles on demand from DMI
314 with m.If(d_reg.req): # request for regfile access being made
315 # TODO: error-check this
316 # XXX should this be combinatorial? sync better?
317 if intrf.unary:
318 comb += self.int_r.ren.eq(1<<d_reg.addr)
319 else:
320 comb += self.int_r.addr.eq(d_reg.addr)
321 comb += self.int_r.ren.eq(1)
322 d_reg_delay = Signal()
323 sync += d_reg_delay.eq(d_reg.req)
324 with m.If(d_reg_delay):
325 # data arrives one clock later
326 comb += d_reg.data.eq(self.int_r.data_o)
327 comb += d_reg.ack.eq(1)
328
329 # sigh same thing for CR debug
330 with m.If(d_cr.req): # request for regfile access being made
331 comb += self.cr_r.ren.eq(0b11111111) # enable all
332 d_cr_delay = Signal()
333 sync += d_cr_delay.eq(d_cr.req)
334 with m.If(d_cr_delay):
335 # data arrives one clock later
336 comb += d_cr.data.eq(self.cr_r.data_o)
337 comb += d_cr.ack.eq(1)
338
339 # aaand XER...
340 with m.If(d_xer.req): # request for regfile access being made
341 comb += self.xer_r.ren.eq(0b111111) # enable all
342 d_xer_delay = Signal()
343 sync += d_xer_delay.eq(d_xer.req)
344 with m.If(d_xer_delay):
345 # data arrives one clock later
346 comb += d_xer.data.eq(self.xer_r.data_o)
347 comb += d_xer.ack.eq(1)
348
349 # DEC and TB inc/dec FSM
350 self.tb_dec_fsm(m, cur_state.dec)
351
352 return m
353
354 def tb_dec_fsm(self, m, spr_dec):
355 """tb_dec_fsm
356
357 this is a FSM for updating either dec or tb. it runs alternately
358 DEC, TB, DEC, TB. note that SPR pipeline could have written a new
359 value to DEC, however the regfile has "passthrough" on it so this
360 *should* be ok.
361
362 see v3.0B p1097-1099 for Timeer Resource and p1065 and p1076
363 """
364
365 comb, sync = m.d.comb, m.d.sync
366 fast_rf = self.core.regs.rf['fast']
367 fast_r_dectb = fast_rf.r_ports['issue'] # DEC/TB
368 fast_w_dectb = fast_rf.w_ports['issue'] # DEC/TB
369
370 with m.FSM() as fsm:
371
372 # initiates read of current DEC
373 with m.State("DEC_READ"):
374 comb += fast_r_dectb.addr.eq(FastRegs.DEC)
375 comb += fast_r_dectb.ren.eq(1)
376 m.next = "DEC_WRITE"
377
378 # waits for DEC read to arrive (1 cycle), updates with new value
379 with m.State("DEC_WRITE"):
380 new_dec = Signal(64)
381 # TODO: MSR.LPCR 32-bit decrement mode
382 comb += new_dec.eq(fast_r_dectb.data_o - 1)
383 comb += fast_w_dectb.addr.eq(FastRegs.DEC)
384 comb += fast_w_dectb.wen.eq(1)
385 comb += fast_w_dectb.data_i.eq(new_dec)
386 sync += spr_dec.eq(new_dec) # copy into cur_state for decoder
387 m.next = "TB_READ"
388
389 # initiates read of current TB
390 with m.State("TB_READ"):
391 comb += fast_r_dectb.addr.eq(FastRegs.TB)
392 comb += fast_r_dectb.ren.eq(1)
393 m.next = "TB_WRITE"
394
395 # waits for read TB to arrive, initiates write of current TB
396 with m.State("TB_WRITE"):
397 new_tb = Signal(64)
398 comb += new_tb.eq(fast_r_dectb.data_o + 1)
399 comb += fast_w_dectb.addr.eq(FastRegs.TB)
400 comb += fast_w_dectb.wen.eq(1)
401 comb += fast_w_dectb.data_i.eq(new_tb)
402 m.next = "DEC_READ"
403
404 return m
405
406 def __iter__(self):
407 yield from self.pc_i.ports()
408 yield self.pc_o
409 yield self.memerr_o
410 yield from self.core.ports()
411 yield from self.imem.ports()
412 yield self.core_bigendian_i
413 yield self.busy_o
414
415 def ports(self):
416 return list(self)
417
418 def external_ports(self):
419 ports = self.pc_i.ports()
420 ports += [self.pc_o, self.memerr_o, self.core_bigendian_i, self.busy_o,
421 ]
422
423 if self.jtag_en:
424 ports += list(self.jtag.external_ports())
425 else:
426 # don't add DMI if JTAG is enabled
427 ports += list(self.dbg.dmi.ports())
428
429 ports += list(self.imem.ibus.fields.values())
430 ports += list(self.core.l0.cmpi.lsmem.lsi.slavebus.fields.values())
431
432 if self.xics:
433 ports += list(self.xics_icp.bus.fields.values())
434 ports += list(self.xics_ics.bus.fields.values())
435 ports.append(self.int_level_i)
436
437 if self.gpio:
438 ports += list(self.simple_gpio.bus.fields.values())
439 ports.append(self.gpio_o)
440
441 return ports
442
443 def ports(self):
444 return list(self)
445
446
447 class TestIssuer(Elaboratable):
448 def __init__(self, pspec):
449 self.ti = TestIssuerInternal(pspec)
450 self.pll = DummyPLL()
451 self.clksel = ClockSelect()
452
453 def elaborate(self, platform):
454 m = Module()
455 comb = m.d.comb
456
457 # TestIssuer runs at internal clock rate
458 m.submodules.ti = ti = DomainRenamer("intclk")(self.ti)
459 # ClockSelect runs at PLL output internal clock rate
460 m.submodules.clksel = clksel = DomainRenamer("pllclk")(self.clksel)
461 m.submodules.pll = pll = self.pll
462
463 # add 2 clock domains established above...
464 cd_int = ClockDomain("intclk")
465 cd_pll = ClockDomain("pllclk")
466 # probably don't have to add cd_int because of DomainRenamer("coresync")
467 m.domains += cd_pll
468
469 # internal clock is set to selector clock-out. has the side-effect of
470 # running TestIssuer at this speed (see DomainRenamer("intclk") above)
471 comb += cd_int.clk.eq(clksel.core_clk_o)
472
473 # PLL clock established. has the side-effect of running clklsel
474 # at the PLL's speed (see DomainRenamer("pllclk") above)
475 comb += cd_pll.clk.eq(pll.clk_pll_o)
476
477 # wire up external 24mhz to PLL and clksel
478 comb += clksel.clk_24_i.eq(ClockSignal())
479 comb += pll.clk_24_i.eq(clksel.clk_24_i)
480
481 # now wire up ResetSignals. don't mind them all being in this domain
482 int_rst = ResetSignal("intclk")
483 pll_rst = ResetSignal("pllclk")
484 comb += int_rst.eq(ResetSignal())
485 comb += pll_rst.eq(ResetSignal())
486
487 return m
488
489 def ports(self):
490 return list(self.ti.ports()) + list(self.pll.ports()) + \
491 [ClockSignal(), ResetSignal()] + \
492 list(self.clksel.ports())
493
494 def external_ports(self):
495 ports = self.ti.external_ports()
496 ports.append(ClockSignal())
497 ports.append(ResetSignal())
498 ports.append(self.clksel.clk_sel_i)
499 ports.append(self.clksel.pll_48_o)
500 return ports
501
502
503 if __name__ == '__main__':
504 units = {'alu': 1, 'cr': 1, 'branch': 1, 'trap': 1, 'logical': 1,
505 'spr': 1,
506 'div': 1,
507 'mul': 1,
508 'shiftrot': 1
509 }
510 pspec = TestMemPspec(ldst_ifacetype='bare_wb',
511 imem_ifacetype='bare_wb',
512 addr_wid=48,
513 mask_wid=8,
514 reg_wid=64,
515 units=units)
516 dut = TestIssuer(pspec)
517 vl = main(dut, ports=dut.ports(), name="test_issuer")
518
519 if len(sys.argv) == 1:
520 vl = rtlil.convert(dut, ports=dut.external_ports(), name="test_issuer")
521 with open("test_issuer.il", "w") as f:
522 f.write(vl)