move instruction decoder out of core
[soc.git] / src / soc / simple / issuer.py
1 """simple core issuer
2
3 not in any way intended for production use. this runs a FSM that:
4
5 * reads the Program Counter from StateRegs
6 * reads an instruction from a fixed-size Test Memory
7 * issues it to the Simple Core
8 * waits for it to complete
9 * increments the PC
10 * does it all over again
11
12 the purpose of this module is to verify the functional correctness
13 of the Function Units in the absolute simplest and clearest possible
14 way, and to at provide something that can be further incrementally
15 improved.
16 """
17
18 from nmigen import (Elaboratable, Module, Signal, ClockSignal, ResetSignal,
19 ClockDomain, DomainRenamer)
20 from nmigen.cli import rtlil
21 from nmigen.cli import main
22 import sys
23
24 from soc.decoder.power_decoder import create_pdecode
25 from soc.decoder.power_decoder2 import PowerDecode2
26 from soc.decoder.decode2execute1 import Data
27 from soc.experiment.testmem import TestMemory # test only for instructions
28 from soc.regfile.regfiles import StateRegs
29 from soc.simple.core import NonProductionCore
30 from soc.config.test.test_loadstore import TestMemPspec
31 from soc.config.ifetch import ConfigFetchUnit
32 from soc.decoder.power_enums import MicrOp
33 from soc.debug.dmi import CoreDebug, DMIInterface
34 from soc.config.state import CoreState
35
36 from nmutil.util import rising_edge
37
38
39 class TestIssuer(Elaboratable):
40 """TestIssuer - reads instructions from TestMemory and issues them
41
42 efficiency and speed is not the main goal here: functional correctness is.
43 """
44 def __init__(self, pspec):
45 # main instruction core
46 self.core = core = NonProductionCore(pspec)
47
48 # instruction decoder
49 pdecode = create_pdecode()
50 self. pdecode2 = PowerDecode2(pdecode) # decoder
51
52 # Test Instruction memory
53 self.imem = ConfigFetchUnit(pspec).fu
54 # one-row cache of instruction read
55 self.iline = Signal(64) # one instruction line
56 self.iprev_adr = Signal(64) # previous address: if different, do read
57
58 # DMI interface
59 self.dbg = CoreDebug()
60
61 # instruction go/monitor
62 self.pc_o = Signal(64, reset_less=True)
63 self.pc_i = Data(64, "pc_i") # set "ok" to indicate "please change me"
64 self.core_bigendian_i = Signal()
65 self.busy_o = Signal(reset_less=True)
66 self.memerr_o = Signal(reset_less=True)
67
68 # FAST regfile read /write ports for PC and MSR
69 self.state_r_pc = self.core.regs.rf['state'].r_ports['cia'] # PC rd
70 self.state_w_pc = self.core.regs.rf['state'].w_ports['d_wr1'] # PC wr
71 self.state_r_msr = self.core.regs.rf['state'].r_ports['msr'] # MSR rd
72
73 # DMI interface access
74 intrf = self.core.regs.rf['int']
75 self.int_r = intrf.r_ports['dmi'] # INT read
76
77 # hack method of keeping an eye on whether branch/trap set the PC
78 self.state_nia = self.core.regs.rf['state'].w_ports['nia']
79 self.state_nia.wen.name = 'state_nia_wen'
80
81 def elaborate(self, platform):
82 m = Module()
83 comb, sync = m.d.comb, m.d.sync
84
85 m.submodules.core = core = DomainRenamer("coresync")(self.core)
86 m.submodules.imem = imem = self.imem
87 m.submodules.dbg = dbg = self.dbg
88
89 # instruction decoder
90 pdecode = create_pdecode()
91 m.submodules.dec2 = pdecode2 = self.pdecode2
92
93 # convenience
94 dmi = dbg.dmi
95 d_reg = dbg.dbg_gpr
96 intrf = self.core.regs.rf['int']
97
98 # clock delay power-on reset
99 cd_por = ClockDomain(reset_less=True)
100 cd_sync = ClockDomain()
101 core_sync = ClockDomain("coresync")
102 m.domains += cd_por, cd_sync, core_sync
103
104 delay = Signal(range(4), reset=1)
105 with m.If(delay != 0):
106 m.d.por += delay.eq(delay - 1)
107 comb += cd_por.clk.eq(ClockSignal())
108 comb += core_sync.clk.eq(ClockSignal())
109 # XXX TODO: power-on reset delay (later)
110 #comb += core.core_reset_i.eq(delay != 0 | dbg.core_rst_o)
111
112 # busy/halted signals from core
113 comb += self.busy_o.eq(core.busy_o)
114 comb += pdecode2.dec.bigendian.eq(self.core_bigendian_i)
115
116 # current state (MSR/PC at the moment
117 cur_state = CoreState("cur")
118
119 # temporary hack: says "go" immediately for both address gen and ST
120 l0 = core.l0
121 ldst = core.fus.fus['ldst0']
122 st_go_edge = rising_edge(m, ldst.st.rel_o)
123 m.d.comb += ldst.ad.go_i.eq(ldst.ad.rel_o) # link addr-go direct to rel
124 m.d.comb += ldst.st.go_i.eq(st_go_edge) # link store-go to rising rel
125
126 # PC and instruction from I-Memory
127 pc_changed = Signal() # note write to PC
128 comb += self.pc_o.eq(cur_state.pc)
129 ilatch = Signal(32)
130
131 # next instruction (+4 on current)
132 nia = Signal(64, reset_less=True)
133 comb += nia.eq(cur_state.pc + 4)
134
135 # read the PC
136 pc = Signal(64, reset_less=True)
137 with m.If(self.pc_i.ok):
138 # incoming override (start from pc_i)
139 comb += pc.eq(self.pc_i.data)
140 with m.Else():
141 # otherwise read StateRegs regfile for PC
142 comb += self.state_r_pc.ren.eq(1<<StateRegs.PC)
143 comb += pc.eq(self.state_r_pc.data_o)
144
145 # don't write pc every cycle
146 sync += self.state_w_pc.wen.eq(0)
147 sync += self.state_w_pc.data_i.eq(0)
148
149 # don't read msr every cycle
150 sync += self.state_r_msr.ren.eq(0)
151
152 # connect up debug signals
153 # TODO comb += core.icache_rst_i.eq(dbg.icache_rst_o)
154 comb += core.core_stopped_i.eq(dbg.core_stop_o)
155 comb += core.core_reset_i.eq(dbg.core_rst_o)
156 comb += dbg.terminate_i.eq(core.core_terminate_o)
157 comb += dbg.state.pc.eq(pc)
158 comb += dbg.state.msr.eq(cur_state.msr)
159
160 # temporarily connect up core execute decode to pdecode2
161 comb += core.e.eq(pdecode2.e)
162
163 # temporaries
164 core_busy_o = core.busy_o # core is busy
165 core_ivalid_i = core.ivalid_i # instruction is valid
166 core_issue_i = core.issue_i # instruction is issued
167 core_be_i = pdecode2.dec.bigendian # bigendian mode
168 core_opcode_i = pdecode2.dec.raw_opcode_in # raw opcode
169
170 insn_type = pdecode2.e.do.insn_type
171 insn_state = pdecode2.state
172
173 # actually use a nmigen FSM for the first time (w00t)
174 # this FSM is perhaps unusual in that it detects conditions
175 # then "holds" information, combinatorially, for the core
176 # (as opposed to using sync - which would be on a clock's delay)
177 # this includes the actual opcode, valid flags and so on.
178 with m.FSM() as fsm:
179
180 # waiting (zzz)
181 with m.State("IDLE"):
182 sync += pc_changed.eq(0)
183 with m.If(~dbg.core_stop_o):
184 # instruction allowed to go: start by reading the PC
185 # capture the PC and also drop it into Insn Memory
186 # we have joined a pair of combinatorial memory
187 # lookups together. this is Generally Bad.
188 comb += self.imem.a_pc_i.eq(pc)
189 comb += self.imem.a_valid_i.eq(1)
190 comb += self.imem.f_valid_i.eq(1)
191 sync += cur_state.pc.eq(pc)
192
193 # read MSR, latch it, and put it in decode "state"
194 sync += self.state_r_msr.ren.eq(1<<StateRegs.MSR)
195 sync += cur_state.msr.eq(self.state_r_msr.data_o)
196
197 m.next = "INSN_READ" # move to "wait for bus" phase
198
199 # dummy pause to find out why simulation is not keeping up
200 with m.State("INSN_READ"):
201 with m.If(dbg.core_stop_o):
202 m.next = "IDLE" # back to idle
203 with m.Elif(self.imem.f_busy_o): # zzz...
204 # busy: stay in wait-read
205 comb += self.imem.a_valid_i.eq(1)
206 comb += self.imem.f_valid_i.eq(1)
207 with m.Else():
208 # not busy: instruction fetched
209 f_instr_o = self.imem.f_instr_o
210 if f_instr_o.width == 32:
211 insn = f_instr_o
212 else:
213 insn = f_instr_o.word_select(cur_state.pc[2], 32)
214 comb += core_opcode_i.eq(insn) # actual opcode
215 sync += ilatch.eq(insn) # latch current insn
216 m.next = "INSN_START" # move to "start"
217
218 # waiting for instruction bus (stays there until not busy)
219 with m.State("INSN_START"):
220 comb += core_ivalid_i.eq(1) # instruction is valid
221 comb += core_issue_i.eq(1) # and issued
222 comb += core_opcode_i.eq(ilatch) # actual opcode
223
224 # also drop PC and MSR into decode "state"
225 comb += insn_state.eq(cur_state)
226
227 m.next = "INSN_ACTIVE" # move to "wait completion"
228
229 # instruction started: must wait till it finishes
230 with m.State("INSN_ACTIVE"):
231 with m.If(insn_type != MicrOp.OP_NOP):
232 comb += core_ivalid_i.eq(1) # instruction is valid
233 comb += core_opcode_i.eq(ilatch) # actual opcode
234 comb += insn_state.eq(cur_state) # and MSR and PC
235 with m.If(self.state_nia.wen):
236 sync += pc_changed.eq(1)
237 with m.If(~core_busy_o): # instruction done!
238 # ok here we are not reading the branch unit. TODO
239 # this just blithely overwrites whatever pipeline
240 # updated the PC
241 with m.If(~pc_changed):
242 sync += self.state_w_pc.wen.eq(1<<StateRegs.PC)
243 sync += self.state_w_pc.data_i.eq(nia)
244 m.next = "IDLE" # back to idle
245
246 # this bit doesn't have to be in the FSM: connect up to read
247 # regfiles on demand from DMI
248
249 with m.If(d_reg.req): # request for regfile access being made
250 # TODO: error-check this
251 # XXX should this be combinatorial? sync better?
252 if intrf.unary:
253 comb += self.int_r.ren.eq(1<<d_reg.addr)
254 else:
255 comb += self.int_r.addr.eq(d_reg.addr)
256 comb += self.int_r.ren.eq(1)
257 comb += d_reg.data.eq(self.int_r.data_o)
258 comb += d_reg.ack.eq(1)
259
260 return m
261
262 def __iter__(self):
263 yield from self.pc_i.ports()
264 yield self.pc_o
265 yield self.memerr_o
266 yield from self.core.ports()
267 yield from self.imem.ports()
268 yield self.core_bigendian_i
269 yield self.busy_o
270
271 def ports(self):
272 return list(self)
273
274 def external_ports(self):
275 return self.pc_i.ports() + [self.pc_o,
276 self.memerr_o,
277 self.core_bigendian_i,
278 ClockSignal(),
279 ResetSignal(),
280 self.busy_o,
281 ] + \
282 list(self.dbg.dmi.ports()) + \
283 list(self.imem.ibus.fields.values()) + \
284 list(self.core.l0.cmpi.lsmem.lsi.dbus.fields.values())
285
286 def ports(self):
287 return list(self)
288
289
290 if __name__ == '__main__':
291 units = {'alu': 1, 'cr': 1, 'branch': 1, 'trap': 1, 'logical': 1,
292 'spr': 1,
293 'div': 1,
294 'mul': 1,
295 'shiftrot': 1
296 }
297 pspec = TestMemPspec(ldst_ifacetype='bare_wb',
298 imem_ifacetype='bare_wb',
299 addr_wid=48,
300 mask_wid=8,
301 reg_wid=64,
302 units=units)
303 dut = TestIssuer(pspec)
304 vl = main(dut, ports=dut.ports(), name="test_issuer")
305
306 if len(sys.argv) == 1:
307 vl = rtlil.convert(dut, ports=dut.external_ports(), name="test_issuer")
308 with open("test_issuer.il", "w") as f:
309 f.write(vl)