remove update of pc, msr and svstate from TestIssuerInOrder
[soc.git] / src / soc / simple / inorder.py
1 """simple core issuer
2
3 not in any way intended for production use. this runs a FSM that:
4
5 * reads the Program Counter from StateRegs
6 * reads an instruction from a fixed-size Test Memory
7 * issues it to the Simple Core
8 * waits for it to complete
9 * increments the PC
10 * does it all over again
11
12 the purpose of this module is to verify the functional correctness
13 of the Function Units in the absolute simplest and clearest possible
14 way, and to at provide something that can be further incrementally
15 improved.
16 """
17
18 from nmigen import (Elaboratable, Module, Signal,
19 Mux, Const, Repl, Cat)
20 from nmigen.cli import rtlil
21 from nmigen.cli import main
22 import sys
23
24 from nmutil.singlepipe import ControlBase
25 from soc.simple.core_data import FetchOutput, FetchInput
26
27 from openpower.decoder.power_enums import MicrOp
28 from openpower.state import CoreState
29 from soc.regfile.regfiles import StateRegs
30 from soc.config.test.test_loadstore import TestMemPspec
31 from soc.experiment.icache import ICache
32
33 from nmutil.util import rising_edge
34
35 from soc.simple.issuer import TestIssuerBase
36
37 def get_insn(f_instr_o, pc):
38 if f_instr_o.width == 32:
39 return f_instr_o
40 else:
41 # 64-bit: bit 2 of pc decides which word to select
42 return f_instr_o.word_select(pc[2], 32)
43
44
45 # Fetch Finite State Machine.
46 # WARNING: there are currently DriverConflicts but it's actually working.
47 # TODO, here: everything that is global in nature, information from the
48 # main TestIssuerInternal, needs to move to either ispec() or ospec().
49 # not only that: TestIssuerInternal.imem can entirely move into here
50 # because imem is only ever accessed inside the FetchFSM.
51 class FetchFSM(ControlBase):
52 def __init__(self, allow_overlap, svp64_en, imem, core_rst,
53 pdecode2, cur_state,
54 dbg, core, svstate, nia, is_svp64_mode):
55 self.allow_overlap = allow_overlap
56 self.svp64_en = svp64_en
57 self.imem = imem
58 self.core_rst = core_rst
59 self.pdecode2 = pdecode2
60 self.cur_state = cur_state
61 self.dbg = dbg
62 self.core = core
63 self.svstate = svstate
64 self.nia = nia
65 self.is_svp64_mode = is_svp64_mode
66
67 # set up pipeline ControlBase and allocate i/o specs
68 # (unusual: normally done by the Pipeline API)
69 super().__init__(stage=self)
70 self.p.i_data, self.n.o_data = self.new_specs(None)
71 self.i, self.o = self.p.i_data, self.n.o_data
72
73 # next 3 functions are Stage API Compliance
74 def setup(self, m, i):
75 pass
76
77 def ispec(self):
78 return FetchInput()
79
80 def ospec(self):
81 return FetchOutput()
82
83 def elaborate(self, platform):
84 """fetch FSM
85
86 this FSM performs fetch of raw instruction data, partial-decodes
87 it 32-bit at a time to detect SVP64 prefixes, and will optionally
88 read a 2nd 32-bit quantity if that occurs.
89 """
90 m = super().elaborate(platform)
91
92 dbg = self.dbg
93 core = self.core
94 pc = self.i.pc
95 msr = self.i.msr
96 svstate = self.svstate
97 nia = self.nia
98 is_svp64_mode = self.is_svp64_mode
99 fetch_pc_o_ready = self.p.o_ready
100 fetch_pc_i_valid = self.p.i_valid
101 fetch_insn_o_valid = self.n.o_valid
102 fetch_insn_i_ready = self.n.i_ready
103
104 comb = m.d.comb
105 sync = m.d.sync
106 pdecode2 = self.pdecode2
107 cur_state = self.cur_state
108 dec_opcode_o = pdecode2.dec.raw_opcode_in # raw opcode
109
110 # also note instruction fetch failed
111 if hasattr(core, "icache"):
112 fetch_failed = core.icache.i_out.fetch_failed
113 flush_needed = True
114 else:
115 fetch_failed = Const(0, 1)
116 flush_needed = False
117
118 with m.FSM(name='fetch_fsm'):
119
120 # waiting (zzz)
121 with m.State("IDLE"):
122 with m.If(~dbg.stopping_o & ~fetch_failed):
123 comb += fetch_pc_o_ready.eq(1)
124 with m.If(fetch_pc_i_valid & ~fetch_failed):
125 # instruction allowed to go: start by reading the PC
126 # capture the PC and also drop it into Insn Memory
127 # we have joined a pair of combinatorial memory
128 # lookups together. this is Generally Bad.
129 comb += self.imem.a_pc_i.eq(pc)
130 comb += self.imem.a_i_valid.eq(1)
131 comb += self.imem.f_i_valid.eq(1)
132 sync += cur_state.pc.eq(pc)
133 sync += cur_state.svstate.eq(svstate) # and svstate
134 sync += cur_state.msr.eq(msr) # and msr
135
136 m.next = "INSN_READ" # move to "wait for bus" phase
137
138 # dummy pause to find out why simulation is not keeping up
139 with m.State("INSN_READ"):
140 if self.allow_overlap:
141 stopping = dbg.stopping_o
142 else:
143 stopping = Const(0)
144 with m.If(stopping):
145 # stopping: jump back to idle
146 m.next = "IDLE"
147 with m.Else():
148 with m.If(self.imem.f_busy_o & ~fetch_failed): # zzz...
149 # busy but not fetch failed: stay in wait-read
150 comb += self.imem.a_i_valid.eq(1)
151 comb += self.imem.f_i_valid.eq(1)
152 with m.Else():
153 # not busy (or fetch failed!): instruction fetched
154 # when fetch failed, the instruction gets ignored
155 # by the decoder
156 insn = get_insn(self.imem.f_instr_o, cur_state.pc)
157 if self.svp64_en:
158 svp64 = self.svp64
159 # decode the SVP64 prefix, if any
160 comb += svp64.raw_opcode_in.eq(insn)
161 comb += svp64.bigendian.eq(self.core_bigendian_i)
162 # pass the decoded prefix (if any) to PowerDecoder2
163 sync += pdecode2.sv_rm.eq(svp64.svp64_rm)
164 sync += pdecode2.is_svp64_mode.eq(is_svp64_mode)
165 # remember whether this is a prefixed instruction,
166 # so the FSM can readily loop when VL==0
167 sync += is_svp64_mode.eq(svp64.is_svp64_mode)
168 # calculate the address of the following instruction
169 insn_size = Mux(svp64.is_svp64_mode, 8, 4)
170 sync += nia.eq(cur_state.pc + insn_size)
171 with m.If(~svp64.is_svp64_mode):
172 # with no prefix, store the instruction
173 # and hand it directly to the next FSM
174 sync += dec_opcode_o.eq(insn)
175 m.next = "INSN_READY"
176 with m.Else():
177 # fetch the rest of the instruction from memory
178 comb += self.imem.a_pc_i.eq(cur_state.pc + 4)
179 comb += self.imem.a_i_valid.eq(1)
180 comb += self.imem.f_i_valid.eq(1)
181 m.next = "INSN_READ2"
182 else:
183 # not SVP64 - 32-bit only
184 sync += nia.eq(cur_state.pc + 4)
185 sync += dec_opcode_o.eq(insn)
186 m.next = "INSN_READY"
187
188 with m.State("INSN_READ2"):
189 with m.If(self.imem.f_busy_o): # zzz...
190 # busy: stay in wait-read
191 comb += self.imem.a_i_valid.eq(1)
192 comb += self.imem.f_i_valid.eq(1)
193 with m.Else():
194 # not busy: instruction fetched
195 insn = get_insn(self.imem.f_instr_o, cur_state.pc+4)
196 sync += dec_opcode_o.eq(insn)
197 m.next = "INSN_READY"
198
199 with m.State("INSN_READY"):
200 # hand over the instruction, to be decoded
201 comb += fetch_insn_o_valid.eq(1)
202 with m.If(fetch_insn_i_ready):
203 m.next = "IDLE"
204
205 # whatever was done above, over-ride it if core reset is held
206 with m.If(self.core_rst):
207 sync += nia.eq(0)
208
209 return m
210
211
212 class TestIssuerInternalInOrder(TestIssuerBase):
213 """TestIssuer - reads instructions from TestMemory and issues them
214
215 efficiency and speed is not the main goal here: functional correctness
216 and code clarity is. optimisations (which almost 100% interfere with
217 easy understanding) come later.
218 """
219
220 def issue_fsm(self, m, core, nia,
221 dbg, core_rst, is_svp64_mode,
222 fetch_pc_o_ready, fetch_pc_i_valid,
223 fetch_insn_o_valid, fetch_insn_i_ready,
224 exec_insn_i_valid, exec_insn_o_ready,
225 exec_pc_o_valid, exec_pc_i_ready):
226 """issue FSM
227
228 decode / issue FSM. this interacts with the "fetch" FSM
229 through fetch_insn_ready/valid (incoming) and fetch_pc_ready/valid
230 (outgoing). also interacts with the "execute" FSM
231 through exec_insn_ready/valid (outgoing) and exec_pc_ready/valid
232 (incoming).
233 SVP64 RM prefixes have already been set up by the
234 "fetch" phase, so execute is fairly straightforward.
235 """
236
237 comb = m.d.comb
238 sync = m.d.sync
239 pdecode2 = self.pdecode2
240 cur_state = self.cur_state
241
242 # temporaries
243 dec_opcode_i = pdecode2.dec.raw_opcode_in # raw opcode
244
245 # note if an exception happened. in a pipelined or OoO design
246 # this needs to be accompanied by "shadowing" (or stalling)
247 exc_happened = self.core.o.exc_happened
248 # also note instruction fetch failed
249 if hasattr(core, "icache"):
250 fetch_failed = core.icache.i_out.fetch_failed
251 flush_needed = True
252 # set to fault in decoder
253 # update (highest priority) instruction fault
254 rising_fetch_failed = rising_edge(m, fetch_failed)
255 with m.If(rising_fetch_failed):
256 sync += pdecode2.instr_fault.eq(1)
257 else:
258 fetch_failed = Const(0, 1)
259 flush_needed = False
260
261 with m.FSM(name="issue_fsm"):
262
263 # sync with the "fetch" phase which is reading the instruction
264 # at this point, there is no instruction running, that
265 # could inadvertently update the PC.
266 with m.State("ISSUE_START"):
267 # reset instruction fault
268 sync += pdecode2.instr_fault.eq(0)
269 # wait on "core stop" release, before next fetch
270 # need to do this here, in case we are in a VL==0 loop
271 with m.If(~dbg.core_stop_o & ~core_rst):
272 comb += fetch_pc_i_valid.eq(1) # tell fetch to start
273 with m.If(fetch_pc_o_ready): # fetch acknowledged us
274 m.next = "INSN_WAIT"
275 with m.Else():
276 # tell core it's stopped, and acknowledge debug handshake
277 comb += dbg.core_stopped_i.eq(1)
278
279 # wait for an instruction to arrive from Fetch
280 with m.State("INSN_WAIT"):
281 if self.allow_overlap:
282 stopping = dbg.stopping_o
283 else:
284 stopping = Const(0)
285 with m.If(stopping):
286 # stopping: jump back to idle
287 m.next = "ISSUE_START"
288 if flush_needed:
289 # request the icache to stop asserting "failed"
290 comb += core.icache.flush_in.eq(1)
291 # stop instruction fault
292 sync += pdecode2.instr_fault.eq(0)
293 with m.Else():
294 comb += fetch_insn_i_ready.eq(1)
295 with m.If(fetch_insn_o_valid):
296 # loop into ISSUE_START if it's a SVP64 instruction
297 # and VL == 0. this because VL==0 is a for-loop
298 # from 0 to 0 i.e. always, always a NOP.
299 m.next = "DECODE_SV" # skip predication
300
301 # after src/dst step have been updated, we are ready
302 # to decode the instruction
303 with m.State("DECODE_SV"):
304 # decode the instruction
305 with m.If(~fetch_failed):
306 sync += pdecode2.instr_fault.eq(0)
307 sync += core.i.e.eq(pdecode2.e)
308 sync += core.i.state.eq(cur_state)
309 sync += core.i.raw_insn_i.eq(dec_opcode_i)
310 sync += core.i.bigendian_i.eq(self.core_bigendian_i)
311 # after decoding, reset any previous exception condition,
312 # allowing it to be set again during the next execution
313 sync += pdecode2.ldst_exc.eq(0)
314
315 m.next = "INSN_EXECUTE" # move to "execute"
316
317 # handshake with execution FSM, move to "wait" once acknowledged
318 with m.State("INSN_EXECUTE"):
319 comb += exec_insn_i_valid.eq(1) # trigger execute
320 with m.If(exec_insn_o_ready): # execute acknowledged us
321 m.next = "EXECUTE_WAIT"
322
323 with m.State("EXECUTE_WAIT"):
324 # wait on "core stop" release, at instruction end
325 # need to do this here, in case we are in a VL>1 loop
326 with m.If(~dbg.core_stop_o & ~core_rst):
327 comb += exec_pc_i_ready.eq(1)
328 # see https://bugs.libre-soc.org/show_bug.cgi?id=636
329 # the exception info needs to be blatted into
330 # pdecode.ldst_exc, and the instruction "re-run".
331 # when ldst_exc.happened is set, the PowerDecoder2
332 # reacts very differently: it re-writes the instruction
333 # with a "trap" (calls PowerDecoder2.trap()) which
334 # will *overwrite* whatever was requested and jump the
335 # PC to the exception address, as well as alter MSR.
336 # nothing else needs to be done other than to note
337 # the change of PC and MSR (and, later, SVSTATE)
338 with m.If(exc_happened):
339 mmu = core.fus.get_exc("mmu0")
340 ldst = core.fus.get_exc("ldst0")
341 if mmu is not None:
342 with m.If(fetch_failed):
343 # instruction fetch: exception is from MMU
344 # reset instr_fault (highest priority)
345 sync += pdecode2.ldst_exc.eq(mmu)
346 sync += pdecode2.instr_fault.eq(0)
347 if flush_needed:
348 # request icache to stop asserting "failed"
349 comb += core.icache.flush_in.eq(1)
350 with m.If(~fetch_failed):
351 # otherwise assume it was a LDST exception
352 sync += pdecode2.ldst_exc.eq(ldst)
353
354 with m.If(exec_pc_o_valid):
355
356 # return directly to Decode if Execute generated an
357 # exception.
358 with m.If(pdecode2.ldst_exc.happened):
359 m.next = "DECODE_SV"
360
361 # if MSR, PC or SVSTATE were changed by the previous
362 # instruction, go directly back to Fetch, without
363 # updating either MSR PC or SVSTATE
364 with m.Elif(self.msr_changed | self.pc_changed |
365 self.sv_changed):
366 m.next = "ISSUE_START"
367
368 # returning to Execute? then, first update SRCSTEP
369 with m.Else():
370 # return to mask skip loop
371 m.next = "DECODE_SV"
372
373 with m.Else():
374 comb += dbg.core_stopped_i.eq(1)
375 if flush_needed:
376 # request the icache to stop asserting "failed"
377 comb += core.icache.flush_in.eq(1)
378 # stop instruction fault
379 sync += pdecode2.instr_fault.eq(0)
380 if flush_needed:
381 # request the icache to stop asserting "failed"
382 comb += core.icache.flush_in.eq(1)
383 # stop instruction fault
384 sync += pdecode2.instr_fault.eq(0)
385
386 def execute_fsm(self, m, core,
387 exec_insn_i_valid, exec_insn_o_ready,
388 exec_pc_o_valid, exec_pc_i_ready):
389 """execute FSM
390
391 execute FSM. this interacts with the "issue" FSM
392 through exec_insn_ready/valid (incoming) and exec_pc_ready/valid
393 (outgoing). SVP64 RM prefixes have already been set up by the
394 "issue" phase, so execute is fairly straightforward.
395 """
396
397 comb = m.d.comb
398 sync = m.d.sync
399 pdecode2 = self.pdecode2
400
401 # temporaries
402 core_busy_o = core.n.o_data.busy_o # core is busy
403 core_ivalid_i = core.p.i_valid # instruction is valid
404
405 if hasattr(core, "icache"):
406 fetch_failed = core.icache.i_out.fetch_failed
407 else:
408 fetch_failed = Const(0, 1)
409
410 with m.FSM(name="exec_fsm"):
411
412 # waiting for instruction bus (stays there until not busy)
413 with m.State("INSN_START"):
414 comb += exec_insn_o_ready.eq(1)
415 with m.If(exec_insn_i_valid):
416 comb += core_ivalid_i.eq(1) # instruction is valid/issued
417 sync += self.sv_changed.eq(0)
418 sync += self.pc_changed.eq(0)
419 sync += self.msr_changed.eq(0)
420 with m.If(core.p.o_ready): # only move if accepted
421 m.next = "INSN_ACTIVE" # move to "wait completion"
422
423 # instruction started: must wait till it finishes
424 with m.State("INSN_ACTIVE"):
425 # note changes to MSR, PC and SVSTATE
426 # XXX oops, really must monitor *all* State Regfile write
427 # ports looking for changes!
428 with m.If(self.state_nia.wen & (1 << StateRegs.SVSTATE)):
429 sync += self.sv_changed.eq(1)
430 with m.If(self.state_nia.wen & (1 << StateRegs.MSR)):
431 sync += self.msr_changed.eq(1)
432 with m.If(self.state_nia.wen & (1 << StateRegs.PC)):
433 sync += self.pc_changed.eq(1)
434 with m.If(~core_busy_o): # instruction done!
435 comb += exec_pc_o_valid.eq(1)
436 with m.If(exec_pc_i_ready):
437 # when finished, indicate "done".
438 # however, if there was an exception, the instruction
439 # is *not* yet done. this is an implementation
440 # detail: we choose to implement exceptions by
441 # taking the exception information from the LDST
442 # unit, putting that *back* into the PowerDecoder2,
443 # and *re-running the entire instruction*.
444 # if we erroneously indicate "done" here, it is as if
445 # there were *TWO* instructions:
446 # 1) the failed LDST 2) a TRAP.
447 with m.If(~pdecode2.ldst_exc.happened &
448 ~fetch_failed):
449 comb += self.insn_done.eq(1)
450 m.next = "INSN_START" # back to fetch
451
452 def elaborate(self, platform):
453 m = super().elaborate(platform)
454 # convenience
455 comb, sync = m.d.comb, m.d.sync
456 cur_state = self.cur_state
457 pdecode2 = self.pdecode2
458 dbg = self.dbg
459 core = self.core
460
461 # set up peripherals and core
462 core_rst = self.core_rst
463
464 # indicate to outside world if any FU is still executing
465 comb += self.any_busy.eq(core.n.o_data.any_busy_o) # any FU executing
466
467 # address of the next instruction, in the absence of a branch
468 # depends on the instruction size
469 nia = Signal(64)
470
471 # connect up debug signals
472 comb += dbg.terminate_i.eq(core.o.core_terminate_o)
473
474 # there are *THREE^WFOUR-if-SVP64-enabled* FSMs, fetch (32/64-bit)
475 # issue, decode/execute, now joined by "Predicate fetch/calculate".
476 # these are the handshake signals between each
477
478 # fetch FSM can run as soon as the PC is valid
479 fetch_pc_i_valid = Signal() # Execute tells Fetch "start next read"
480 fetch_pc_o_ready = Signal() # Fetch Tells SVSTATE "proceed"
481
482 # fetch FSM hands over the instruction to be decoded / issued
483 fetch_insn_o_valid = Signal()
484 fetch_insn_i_ready = Signal()
485
486 # issue FSM delivers the instruction to the be executed
487 exec_insn_i_valid = Signal()
488 exec_insn_o_ready = Signal()
489
490 # execute FSM, hands over the PC/SVSTATE back to the issue FSM
491 exec_pc_o_valid = Signal()
492 exec_pc_i_ready = Signal()
493
494 # the FSMs here are perhaps unusual in that they detect conditions
495 # then "hold" information, combinatorially, for the core
496 # (as opposed to using sync - which would be on a clock's delay)
497 # this includes the actual opcode, valid flags and so on.
498
499 # Fetch, then predicate fetch, then Issue, then Execute.
500 # Issue is where the VL for-loop # lives. the ready/valid
501 # signalling is used to communicate between the four.
502
503 # set up Fetch FSM
504 fetch = FetchFSM(self.allow_overlap, self.svp64_en,
505 self.imem, core_rst, pdecode2, cur_state,
506 dbg, core,
507 dbg.state.svstate, # combinatorially same
508 nia)
509 m.submodules.fetch = fetch
510 # connect up in/out data to existing Signals
511 comb += fetch.p.i_data.pc.eq(dbg.state.pc) # combinatorially same
512 comb += fetch.p.i_data.msr.eq(dbg.state.msr) # combinatorially same
513 # and the ready/valid signalling
514 comb += fetch_pc_o_ready.eq(fetch.p.o_ready)
515 comb += fetch.p.i_valid.eq(fetch_pc_i_valid)
516 comb += fetch_insn_o_valid.eq(fetch.n.o_valid)
517 comb += fetch.n.i_ready.eq(fetch_insn_i_ready)
518
519 self.issue_fsm(m, core, nia,
520 dbg, core_rst,
521 fetch_pc_o_ready, fetch_pc_i_valid,
522 fetch_insn_o_valid, fetch_insn_i_ready,
523 exec_insn_i_valid, exec_insn_o_ready,
524 exec_pc_o_valid, exec_pc_i_ready)
525
526 self.execute_fsm(m, core,
527 exec_insn_i_valid, exec_insn_o_ready,
528 exec_pc_o_valid, exec_pc_i_ready)
529
530 return m
531
532
533 if __name__ == '__main__':
534 units = {'alu': 1, 'cr': 1, 'branch': 1, 'trap': 1, 'logical': 1,
535 'spr': 1,
536 'div': 1,
537 'mul': 1,
538 'shiftrot': 1
539 }
540 pspec = TestMemPspec(ldst_ifacetype='bare_wb',
541 imem_ifacetype='bare_wb',
542 addr_wid=48,
543 mask_wid=8,
544 reg_wid=64,
545 units=units)
546 dut = TestIssuer(pspec)
547 vl = main(dut, ports=dut.ports(), name="test_issuer")
548
549 if len(sys.argv) == 1:
550 vl = rtlil.convert(dut, ports=dut.external_ports(), name="test_issuer")
551 with open("test_issuer.il", "w") as f:
552 f.write(vl)