add jtag interface to issuer_verilog
[soc.git] / src / soc / simple / issuer.py
1 """simple core issuer
2
3 not in any way intended for production use. this runs a FSM that:
4
5 * reads the Program Counter from StateRegs
6 * reads an instruction from a fixed-size Test Memory
7 * issues it to the Simple Core
8 * waits for it to complete
9 * increments the PC
10 * does it all over again
11
12 the purpose of this module is to verify the functional correctness
13 of the Function Units in the absolute simplest and clearest possible
14 way, and to at provide something that can be further incrementally
15 improved.
16 """
17
18 from nmigen import (Elaboratable, Module, Signal, ClockSignal, ResetSignal,
19 ClockDomain, DomainRenamer)
20 from nmigen.cli import rtlil
21 from nmigen.cli import main
22 import sys
23
24 from soc.decoder.power_decoder import create_pdecode
25 from soc.decoder.power_decoder2 import PowerDecode2
26 from soc.decoder.decode2execute1 import IssuerDecode2ToOperand
27 from soc.decoder.decode2execute1 import Data
28 from soc.experiment.testmem import TestMemory # test only for instructions
29 from soc.regfile.regfiles import StateRegs, FastRegs
30 from soc.simple.core import NonProductionCore
31 from soc.config.test.test_loadstore import TestMemPspec
32 from soc.config.ifetch import ConfigFetchUnit
33 from soc.decoder.power_enums import MicrOp
34 from soc.debug.dmi import CoreDebug, DMIInterface
35 from soc.debug.jtag import JTAG
36 from soc.config.state import CoreState
37 from soc.interrupts.xics import XICS_ICP, XICS_ICS
38 from soc.bus.simple_gpio import SimpleGPIO
39
40 from nmutil.util import rising_edge
41
42
43 class TestIssuer(Elaboratable):
44 """TestIssuer - reads instructions from TestMemory and issues them
45
46 efficiency and speed is not the main goal here: functional correctness is.
47 """
48 def __init__(self, pspec):
49
50 # add interrupt controller?
51 self.xics = hasattr(pspec, "xics") and pspec.xics == True
52 if self.xics:
53 self.xics_icp = XICS_ICP()
54 self.xics_ics = XICS_ICS()
55 self.int_level_i = self.xics_ics.int_level_i
56
57 # add GPIO peripheral?
58 self.gpio = hasattr(pspec, "gpio") and pspec.gpio == True
59 if self.gpio:
60 self.simple_gpio = SimpleGPIO()
61 self.gpio_o = self.simple_gpio.gpio_o
62
63 # main instruction core25
64 self.core = core = NonProductionCore(pspec)
65
66 # instruction decoder. goes into Trap Record
67 pdecode = create_pdecode()
68 self.cur_state = CoreState("cur") # current state (MSR/PC/EINT)
69 self.pdecode2 = PowerDecode2(pdecode, state=self.cur_state,
70 opkls=IssuerDecode2ToOperand)
71
72 # Test Instruction memory
73 self.imem = ConfigFetchUnit(pspec).fu
74 # one-row cache of instruction read
75 self.iline = Signal(64) # one instruction line
76 self.iprev_adr = Signal(64) # previous address: if different, do read
77
78 # DMI interface
79 self.dbg = CoreDebug()
80
81 # JTAG interface
82 self.jtag_en = hasattr(pspec, "debug") and pspec.debug == 'jtag'
83 if self.jtag_en:
84 self.jtag = JTAG()
85
86 # instruction go/monitor
87 self.pc_o = Signal(64, reset_less=True)
88 self.pc_i = Data(64, "pc_i") # set "ok" to indicate "please change me"
89 self.core_bigendian_i = Signal()
90 self.busy_o = Signal(reset_less=True)
91 self.memerr_o = Signal(reset_less=True)
92
93 # FAST regfile read /write ports for PC, MSR, DEC/TB
94 staterf = self.core.regs.rf['state']
95 self.state_r_pc = staterf.r_ports['cia'] # PC rd
96 self.state_w_pc = staterf.w_ports['d_wr1'] # PC wr
97 self.state_r_msr = staterf.r_ports['msr'] # MSR rd
98
99 # DMI interface access
100 intrf = self.core.regs.rf['int']
101 crrf = self.core.regs.rf['cr']
102 xerrf = self.core.regs.rf['xer']
103 self.int_r = intrf.r_ports['dmi'] # INT read
104 self.cr_r = crrf.r_ports['full_cr_dbg'] # CR read
105 self.xer_r = xerrf.r_ports['full_xer'] # XER read
106
107 # hack method of keeping an eye on whether branch/trap set the PC
108 self.state_nia = self.core.regs.rf['state'].w_ports['nia']
109 self.state_nia.wen.name = 'state_nia_wen'
110
111 def elaborate(self, platform):
112 m = Module()
113 comb, sync = m.d.comb, m.d.sync
114
115 m.submodules.core = core = DomainRenamer("coresync")(self.core)
116 m.submodules.imem = imem = self.imem
117 m.submodules.dbg = dbg = self.dbg
118 if self.jtag_en:
119 m.submodules.jtag = jtag = self.jtag
120 comb += dbg.dmi.connect_to(jtag.dmi)
121
122 cur_state = self.cur_state
123
124 # XICS interrupt handler
125 if self.xics:
126 m.submodules.xics_icp = icp = self.xics_icp
127 m.submodules.xics_ics = ics = self.xics_ics
128 comb += icp.ics_i.eq(ics.icp_o) # connect ICS to ICP
129 sync += cur_state.eint.eq(icp.core_irq_o) # connect ICP to core
130
131 # GPIO test peripheral
132 if self.gpio:
133 m.submodules.simple_gpio = simple_gpio = self.simple_gpio
134
135 # connect one GPIO output to ICS bit 15 (like in microwatt soc.vhdl)
136 if self.gpio and self.xics:
137 comb += self.int_level_i[15].eq(simple_gpio.gpio_o[0])
138
139 # instruction decoder
140 pdecode = create_pdecode()
141 m.submodules.dec2 = pdecode2 = self.pdecode2
142
143 # convenience
144 dmi, d_reg, d_cr, d_xer, = dbg.dmi, dbg.d_gpr, dbg.d_cr, dbg.d_xer
145 intrf = self.core.regs.rf['int']
146
147 # clock delay power-on reset
148 cd_por = ClockDomain(reset_less=True)
149 cd_sync = ClockDomain()
150 core_sync = ClockDomain("coresync")
151 m.domains += cd_por, cd_sync, core_sync
152
153 delay = Signal(range(4), reset=3)
154 with m.If(delay != 0):
155 m.d.por += delay.eq(delay - 1)
156 comb += cd_por.clk.eq(ClockSignal())
157 comb += core_sync.clk.eq(ClockSignal())
158 # power-on reset delay
159 comb += core.core_reset_i.eq(delay != 0 | dbg.core_rst_o)
160
161 # busy/halted signals from core
162 comb += self.busy_o.eq(core.busy_o)
163 comb += pdecode2.dec.bigendian.eq(self.core_bigendian_i)
164
165 # temporary hack: says "go" immediately for both address gen and ST
166 l0 = core.l0
167 ldst = core.fus.fus['ldst0']
168 st_go_edge = rising_edge(m, ldst.st.rel_o)
169 m.d.comb += ldst.ad.go_i.eq(ldst.ad.rel_o) # link addr-go direct to rel
170 m.d.comb += ldst.st.go_i.eq(st_go_edge) # link store-go to rising rel
171
172 # PC and instruction from I-Memory
173 pc_changed = Signal() # note write to PC
174 comb += self.pc_o.eq(cur_state.pc)
175 ilatch = Signal(32)
176
177 # next instruction (+4 on current)
178 nia = Signal(64, reset_less=True)
179 comb += nia.eq(cur_state.pc + 4)
180
181 # read the PC
182 pc = Signal(64, reset_less=True)
183 pc_ok_delay = Signal()
184 sync += pc_ok_delay.eq(~self.pc_i.ok)
185 with m.If(self.pc_i.ok):
186 # incoming override (start from pc_i)
187 comb += pc.eq(self.pc_i.data)
188 with m.Else():
189 # otherwise read StateRegs regfile for PC...
190 comb += self.state_r_pc.ren.eq(1<<StateRegs.PC)
191 # ... but on a 1-clock delay
192 with m.If(pc_ok_delay):
193 comb += pc.eq(self.state_r_pc.data_o)
194
195 # don't write pc every cycle
196 comb += self.state_w_pc.wen.eq(0)
197 comb += self.state_w_pc.data_i.eq(0)
198
199 # don't read msr every cycle
200 comb += self.state_r_msr.ren.eq(0)
201 msr_read = Signal(reset=1)
202
203 # connect up debug signals
204 # TODO comb += core.icache_rst_i.eq(dbg.icache_rst_o)
205 comb += dbg.terminate_i.eq(core.core_terminate_o)
206 comb += dbg.state.pc.eq(pc)
207 #comb += dbg.state.pc.eq(cur_state.pc)
208 comb += dbg.state.msr.eq(cur_state.msr)
209
210 # temporaries
211 core_busy_o = core.busy_o # core is busy
212 core_ivalid_i = core.ivalid_i # instruction is valid
213 core_issue_i = core.issue_i # instruction is issued
214 dec_opcode_i = pdecode2.dec.raw_opcode_in # raw opcode
215
216 insn_type = core.e.do.insn_type
217
218 # actually use a nmigen FSM for the first time (w00t)
219 # this FSM is perhaps unusual in that it detects conditions
220 # then "holds" information, combinatorially, for the core
221 # (as opposed to using sync - which would be on a clock's delay)
222 # this includes the actual opcode, valid flags and so on.
223 with m.FSM() as fsm:
224
225 # waiting (zzz)
226 with m.State("IDLE"):
227 sync += pc_changed.eq(0)
228 sync += core.e.eq(0)
229 sync += core.raw_insn_i.eq(0)
230 sync += core.bigendian_i.eq(0)
231 with m.If(~dbg.core_stop_o & ~core.core_reset_i):
232 # instruction allowed to go: start by reading the PC
233 # capture the PC and also drop it into Insn Memory
234 # we have joined a pair of combinatorial memory
235 # lookups together. this is Generally Bad.
236 comb += self.imem.a_pc_i.eq(pc)
237 comb += self.imem.a_valid_i.eq(1)
238 comb += self.imem.f_valid_i.eq(1)
239 sync += cur_state.pc.eq(pc)
240
241 # initiate read of MSR. arrives one clock later
242 comb += self.state_r_msr.ren.eq(1<<StateRegs.MSR)
243 sync += msr_read.eq(0)
244
245 m.next = "INSN_READ" # move to "wait for bus" phase
246 with m.Else():
247 comb += core.core_stopped_i.eq(1)
248 comb += dbg.core_stopped_i.eq(1)
249
250 # dummy pause to find out why simulation is not keeping up
251 with m.State("INSN_READ"):
252 # one cycle later, msr read arrives. valid only once.
253 with m.If(~msr_read):
254 sync += msr_read.eq(1) # yeah don't read it again
255 sync += cur_state.msr.eq(self.state_r_msr.data_o)
256 with m.If(self.imem.f_busy_o): # zzz...
257 # busy: stay in wait-read
258 comb += self.imem.a_valid_i.eq(1)
259 comb += self.imem.f_valid_i.eq(1)
260 with m.Else():
261 # not busy: instruction fetched
262 f_instr_o = self.imem.f_instr_o
263 if f_instr_o.width == 32:
264 insn = f_instr_o
265 else:
266 insn = f_instr_o.word_select(cur_state.pc[2], 32)
267 comb += dec_opcode_i.eq(insn) # actual opcode
268 sync += core.e.eq(pdecode2.e)
269 sync += core.state.eq(cur_state)
270 sync += core.raw_insn_i.eq(dec_opcode_i)
271 sync += core.bigendian_i.eq(self.core_bigendian_i)
272 sync += ilatch.eq(insn) # latch current insn
273 # also drop PC and MSR into decode "state"
274 m.next = "INSN_START" # move to "start"
275
276 # waiting for instruction bus (stays there until not busy)
277 with m.State("INSN_START"):
278 comb += core_ivalid_i.eq(1) # instruction is valid
279 comb += core_issue_i.eq(1) # and issued
280
281 m.next = "INSN_ACTIVE" # move to "wait completion"
282
283 # instruction started: must wait till it finishes
284 with m.State("INSN_ACTIVE"):
285 with m.If(insn_type != MicrOp.OP_NOP):
286 comb += core_ivalid_i.eq(1) # instruction is valid
287 with m.If(self.state_nia.wen & (1<<StateRegs.PC)):
288 sync += pc_changed.eq(1)
289 with m.If(~core_busy_o): # instruction done!
290 # ok here we are not reading the branch unit. TODO
291 # this just blithely overwrites whatever pipeline
292 # updated the PC
293 with m.If(~pc_changed):
294 comb += self.state_w_pc.wen.eq(1<<StateRegs.PC)
295 comb += self.state_w_pc.data_i.eq(nia)
296 sync += core.e.eq(0)
297 sync += core.raw_insn_i.eq(0)
298 sync += core.bigendian_i.eq(0)
299 m.next = "IDLE" # back to idle
300
301 # this bit doesn't have to be in the FSM: connect up to read
302 # regfiles on demand from DMI
303 with m.If(d_reg.req): # request for regfile access being made
304 # TODO: error-check this
305 # XXX should this be combinatorial? sync better?
306 if intrf.unary:
307 comb += self.int_r.ren.eq(1<<d_reg.addr)
308 else:
309 comb += self.int_r.addr.eq(d_reg.addr)
310 comb += self.int_r.ren.eq(1)
311 d_reg_delay = Signal()
312 sync += d_reg_delay.eq(d_reg.req)
313 with m.If(d_reg_delay):
314 # data arrives one clock later
315 comb += d_reg.data.eq(self.int_r.data_o)
316 comb += d_reg.ack.eq(1)
317
318 # sigh same thing for CR debug
319 with m.If(d_cr.req): # request for regfile access being made
320 comb += self.cr_r.ren.eq(0b11111111) # enable all
321 d_cr_delay = Signal()
322 sync += d_cr_delay.eq(d_cr.req)
323 with m.If(d_cr_delay):
324 # data arrives one clock later
325 comb += d_cr.data.eq(self.cr_r.data_o)
326 comb += d_cr.ack.eq(1)
327
328 # aaand XER...
329 with m.If(d_xer.req): # request for regfile access being made
330 comb += self.xer_r.ren.eq(0b111111) # enable all
331 d_xer_delay = Signal()
332 sync += d_xer_delay.eq(d_xer.req)
333 with m.If(d_xer_delay):
334 # data arrives one clock later
335 comb += d_xer.data.eq(self.xer_r.data_o)
336 comb += d_xer.ack.eq(1)
337
338 # DEC and TB inc/dec FSM
339 self.tb_dec_fsm(m, cur_state.dec)
340
341 return m
342
343 def tb_dec_fsm(self, m, spr_dec):
344 """tb_dec_fsm
345
346 this is a FSM for updating either dec or tb. it runs alternately
347 DEC, TB, DEC, TB. note that SPR pipeline could have written a new
348 value to DEC, however the regfile has "passthrough" on it so this
349 *should* be ok.
350
351 see v3.0B p1097-1099 for Timeer Resource and p1065 and p1076
352 """
353
354 comb, sync = m.d.comb, m.d.sync
355 fast_rf = self.core.regs.rf['fast']
356 fast_r_dectb = fast_rf.r_ports['issue'] # DEC/TB
357 fast_w_dectb = fast_rf.w_ports['issue'] # DEC/TB
358
359 with m.FSM() as fsm:
360
361 # initiates read of current DEC
362 with m.State("DEC_READ"):
363 comb += fast_r_dectb.addr.eq(FastRegs.DEC)
364 comb += fast_r_dectb.ren.eq(1)
365 m.next = "DEC_WRITE"
366
367 # waits for DEC read to arrive (1 cycle), updates with new value
368 with m.State("DEC_WRITE"):
369 new_dec = Signal(64)
370 # TODO: MSR.LPCR 32-bit decrement mode
371 comb += new_dec.eq(fast_r_dectb.data_o - 1)
372 comb += fast_w_dectb.addr.eq(FastRegs.DEC)
373 comb += fast_w_dectb.wen.eq(1)
374 comb += fast_w_dectb.data_i.eq(new_dec)
375 sync += spr_dec.eq(new_dec) # copy into cur_state for decoder
376 m.next = "TB_READ"
377
378 # initiates read of current TB
379 with m.State("TB_READ"):
380 comb += fast_r_dectb.addr.eq(FastRegs.TB)
381 comb += fast_r_dectb.ren.eq(1)
382 m.next = "TB_WRITE"
383
384 # waits for read TB to arrive, initiates write of current TB
385 with m.State("TB_WRITE"):
386 new_tb = Signal(64)
387 comb += new_tb.eq(fast_r_dectb.data_o + 1)
388 comb += fast_w_dectb.addr.eq(FastRegs.TB)
389 comb += fast_w_dectb.wen.eq(1)
390 comb += fast_w_dectb.data_i.eq(new_tb)
391 m.next = "DEC_READ"
392
393 return m
394
395 def __iter__(self):
396 yield from self.pc_i.ports()
397 yield self.pc_o
398 yield self.memerr_o
399 yield from self.core.ports()
400 yield from self.imem.ports()
401 yield self.core_bigendian_i
402 yield self.busy_o
403
404 def ports(self):
405 return list(self)
406
407 def external_ports(self):
408 ports = self.pc_i.ports()
409 ports += [self.pc_o, self.memerr_o, self.core_bigendian_i, self.busy_o,
410 ClockSignal(), ResetSignal(),
411 ]
412
413 if self.jtag_en:
414 ports += list(self.jtag.external_ports())
415 else:
416 # don't add DMI if JTAG is enabled
417 ports += list(self.dbg.dmi.ports())
418
419 ports += list(self.imem.ibus.fields.values())
420 ports += list(self.core.l0.cmpi.lsmem.lsi.slavebus.fields.values())
421
422 if self.xics:
423 ports += list(self.xics_icp.bus.fields.values())
424 ports += list(self.xics_ics.bus.fields.values())
425 ports.append(self.int_level_i)
426
427 if self.gpio:
428 ports += list(self.simple_gpio.bus.fields.values())
429 ports.append(self.gpio_o)
430
431 return ports
432
433 def ports(self):
434 return list(self)
435
436
437 if __name__ == '__main__':
438 units = {'alu': 1, 'cr': 1, 'branch': 1, 'trap': 1, 'logical': 1,
439 'spr': 1,
440 'div': 1,
441 'mul': 1,
442 'shiftrot': 1
443 }
444 pspec = TestMemPspec(ldst_ifacetype='bare_wb',
445 imem_ifacetype='bare_wb',
446 addr_wid=48,
447 mask_wid=8,
448 reg_wid=64,
449 units=units)
450 dut = TestIssuer(pspec)
451 vl = main(dut, ports=dut.ports(), name="test_issuer")
452
453 if len(sys.argv) == 1:
454 vl = rtlil.convert(dut, ports=dut.external_ports(), name="test_issuer")
455 with open("test_issuer.il", "w") as f:
456 f.write(vl)