read delay on getting regfile data
[soc.git] / src / soc / simple / issuer.py
1 """simple core issuer
2
3 not in any way intended for production use. this runs a FSM that:
4
5 * reads the Program Counter from StateRegs
6 * reads an instruction from a fixed-size Test Memory
7 * issues it to the Simple Core
8 * waits for it to complete
9 * increments the PC
10 * does it all over again
11
12 the purpose of this module is to verify the functional correctness
13 of the Function Units in the absolute simplest and clearest possible
14 way, and to at provide something that can be further incrementally
15 improved.
16 """
17
18 from nmigen import (Elaboratable, Module, Signal, ClockSignal, ResetSignal,
19 ClockDomain, DomainRenamer)
20 from nmigen.cli import rtlil
21 from nmigen.cli import main
22 import sys
23
24 from soc.decoder.power_decoder import create_pdecode
25 from soc.decoder.power_decoder2 import PowerDecode2
26 from soc.decoder.decode2execute1 import Data
27 from soc.experiment.testmem import TestMemory # test only for instructions
28 from soc.regfile.regfiles import StateRegs
29 from soc.simple.core import NonProductionCore
30 from soc.config.test.test_loadstore import TestMemPspec
31 from soc.config.ifetch import ConfigFetchUnit
32 from soc.decoder.power_enums import MicrOp
33 from soc.debug.dmi import CoreDebug, DMIInterface
34 from soc.config.state import CoreState
35
36 from nmutil.util import rising_edge
37
38
39 class TestIssuer(Elaboratable):
40 """TestIssuer - reads instructions from TestMemory and issues them
41
42 efficiency and speed is not the main goal here: functional correctness is.
43 """
44 def __init__(self, pspec):
45 # main instruction core
46 self.core = core = NonProductionCore(pspec)
47
48 # instruction decoder
49 pdecode = create_pdecode()
50 self.pdecode2 = PowerDecode2(pdecode) # decoder
51
52 # Test Instruction memory
53 self.imem = ConfigFetchUnit(pspec).fu
54 # one-row cache of instruction read
55 self.iline = Signal(64) # one instruction line
56 self.iprev_adr = Signal(64) # previous address: if different, do read
57
58 # DMI interface
59 self.dbg = CoreDebug()
60
61 # instruction go/monitor
62 self.pc_o = Signal(64, reset_less=True)
63 self.pc_i = Data(64, "pc_i") # set "ok" to indicate "please change me"
64 self.core_bigendian_i = Signal()
65 self.busy_o = Signal(reset_less=True)
66 self.memerr_o = Signal(reset_less=True)
67
68 # FAST regfile read /write ports for PC and MSR
69 staterf = self.core.regs.rf['state']
70 self.state_r_pc = staterf.r_ports['cia'] # PC rd
71 self.state_w_pc = staterf.w_ports['d_wr1'] # PC wr
72 self.state_r_msr = staterf.r_ports['msr'] # MSR rd
73
74 # DMI interface access
75 intrf = self.core.regs.rf['int']
76 self.int_r = intrf.r_ports['dmi'] # INT read
77
78 # hack method of keeping an eye on whether branch/trap set the PC
79 self.state_nia = self.core.regs.rf['state'].w_ports['nia']
80 self.state_nia.wen.name = 'state_nia_wen'
81
82 def elaborate(self, platform):
83 m = Module()
84 comb, sync = m.d.comb, m.d.sync
85
86 m.submodules.core = core = DomainRenamer("coresync")(self.core)
87 m.submodules.imem = imem = self.imem
88 m.submodules.dbg = dbg = self.dbg
89
90 # instruction decoder
91 pdecode = create_pdecode()
92 m.submodules.dec2 = pdecode2 = self.pdecode2
93
94 # convenience
95 dmi = dbg.dmi
96 d_reg = dbg.dbg_gpr
97 intrf = self.core.regs.rf['int']
98
99 # clock delay power-on reset
100 cd_por = ClockDomain(reset_less=True)
101 cd_sync = ClockDomain()
102 core_sync = ClockDomain("coresync")
103 m.domains += cd_por, cd_sync, core_sync
104
105 delay = Signal(range(4), reset=3)
106 with m.If(delay != 0):
107 m.d.por += delay.eq(delay - 1)
108 comb += cd_por.clk.eq(ClockSignal())
109 comb += core_sync.clk.eq(ClockSignal())
110 # power-on reset delay
111 comb += core.core_reset_i.eq(delay != 0 | dbg.core_rst_o)
112
113 # busy/halted signals from core
114 comb += self.busy_o.eq(core.busy_o)
115 comb += pdecode2.dec.bigendian.eq(self.core_bigendian_i)
116
117 # current state (MSR/PC at the moment
118 cur_state = CoreState("cur")
119
120 # temporary hack: says "go" immediately for both address gen and ST
121 l0 = core.l0
122 ldst = core.fus.fus['ldst0']
123 st_go_edge = rising_edge(m, ldst.st.rel_o)
124 m.d.comb += ldst.ad.go_i.eq(ldst.ad.rel_o) # link addr-go direct to rel
125 m.d.comb += ldst.st.go_i.eq(st_go_edge) # link store-go to rising rel
126
127 # PC and instruction from I-Memory
128 pc_changed = Signal() # note write to PC
129 comb += self.pc_o.eq(cur_state.pc)
130 ilatch = Signal(32)
131
132 # next instruction (+4 on current)
133 nia = Signal(64, reset_less=True)
134 comb += nia.eq(cur_state.pc + 4)
135
136 # read the PC
137 pc = Signal(64, reset_less=True)
138 pc_ok_delay = Signal()
139 sync += pc_ok_delay.eq(~self.pc_i.ok)
140 with m.If(self.pc_i.ok):
141 # incoming override (start from pc_i)
142 comb += pc.eq(self.pc_i.data)
143 with m.Else():
144 # otherwise read StateRegs regfile for PC...
145 comb += self.state_r_pc.ren.eq(1<<StateRegs.PC)
146 # ... but on a 1-clock delay
147 with m.If(pc_ok_delay):
148 comb += pc.eq(self.state_r_pc.data_o)
149
150 # don't write pc every cycle
151 comb += self.state_w_pc.wen.eq(0)
152 comb += self.state_w_pc.data_i.eq(0)
153
154 # don't read msr every cycle
155 comb += self.state_r_msr.ren.eq(0)
156
157 # connect up debug signals
158 # TODO comb += core.icache_rst_i.eq(dbg.icache_rst_o)
159 comb += core.core_stopped_i.eq(dbg.core_stop_o)
160 comb += dbg.terminate_i.eq(core.core_terminate_o)
161 comb += dbg.state.pc.eq(pc)
162 comb += dbg.state.msr.eq(cur_state.msr)
163
164 # temporaries
165 core_busy_o = core.busy_o # core is busy
166 core_ivalid_i = core.ivalid_i # instruction is valid
167 core_issue_i = core.issue_i # instruction is issued
168 dec_opcode_i = pdecode2.dec.raw_opcode_in # raw opcode
169
170 insn_type = core.e.do.insn_type
171 dec_state = pdecode2.state
172
173 # actually use a nmigen FSM for the first time (w00t)
174 # this FSM is perhaps unusual in that it detects conditions
175 # then "holds" information, combinatorially, for the core
176 # (as opposed to using sync - which would be on a clock's delay)
177 # this includes the actual opcode, valid flags and so on.
178 with m.FSM() as fsm:
179
180 # waiting (zzz)
181 with m.State("IDLE"):
182 sync += pc_changed.eq(0)
183 sync += core.e.eq(0)
184 with m.If(~dbg.core_stop_o & ~core.core_reset_i):
185 # instruction allowed to go: start by reading the PC
186 # capture the PC and also drop it into Insn Memory
187 # we have joined a pair of combinatorial memory
188 # lookups together. this is Generally Bad.
189 comb += self.imem.a_pc_i.eq(pc)
190 comb += self.imem.a_valid_i.eq(1)
191 comb += self.imem.f_valid_i.eq(1)
192 sync += cur_state.pc.eq(pc)
193
194 # initiate read of MSR
195 comb += self.state_r_msr.ren.eq(1<<StateRegs.MSR)
196
197 m.next = "INSN_READ" # move to "wait for bus" phase
198
199 # dummy pause to find out why simulation is not keeping up
200 with m.State("INSN_READ"):
201 # one cycle later, msr read arrives
202 sync += cur_state.msr.eq(self.state_r_msr.data_o)
203 with m.If(self.imem.f_busy_o): # zzz...
204 # busy: stay in wait-read
205 comb += self.imem.a_valid_i.eq(1)
206 comb += self.imem.f_valid_i.eq(1)
207 with m.Else():
208 # not busy: instruction fetched
209 f_instr_o = self.imem.f_instr_o
210 if f_instr_o.width == 32:
211 insn = f_instr_o
212 else:
213 insn = f_instr_o.word_select(cur_state.pc[2], 32)
214 comb += dec_opcode_i.eq(insn) # actual opcode
215 comb += dec_state.eq(cur_state)
216 sync += core.e.eq(pdecode2.e)
217 sync += ilatch.eq(insn) # latch current insn
218 # also drop PC and MSR into decode "state"
219 m.next = "INSN_START" # move to "start"
220
221 # waiting for instruction bus (stays there until not busy)
222 with m.State("INSN_START"):
223 comb += core_ivalid_i.eq(1) # instruction is valid
224 comb += core_issue_i.eq(1) # and issued
225
226
227 m.next = "INSN_ACTIVE" # move to "wait completion"
228
229 # instruction started: must wait till it finishes
230 with m.State("INSN_ACTIVE"):
231 with m.If(insn_type != MicrOp.OP_NOP):
232 comb += core_ivalid_i.eq(1) # instruction is valid
233 with m.If(self.state_nia.wen):
234 sync += pc_changed.eq(1)
235 with m.If(~core_busy_o): # instruction done!
236 # ok here we are not reading the branch unit. TODO
237 # this just blithely overwrites whatever pipeline
238 # updated the PC
239 with m.If(~pc_changed):
240 comb += self.state_w_pc.wen.eq(1<<StateRegs.PC)
241 comb += self.state_w_pc.data_i.eq(nia)
242 sync += core.e.eq(0)
243 m.next = "IDLE" # back to idle
244
245 # this bit doesn't have to be in the FSM: connect up to read
246 # regfiles on demand from DMI
247 with m.If(d_reg.req): # request for regfile access being made
248 # TODO: error-check this
249 # XXX should this be combinatorial? sync better?
250 if intrf.unary:
251 comb += self.int_r.ren.eq(1<<d_reg.addr)
252 else:
253 comb += self.int_r.addr.eq(d_reg.addr)
254 comb += self.int_r.ren.eq(1)
255 d_reg_delay = Signal()
256 sync += d_reg_delay.eq(d_reg.req)
257 with m.If(d_reg_delay):
258 # data arrives one clock later
259 comb += d_reg.data.eq(self.int_r.data_o)
260 comb += d_reg.ack.eq(1)
261
262 return m
263
264 def __iter__(self):
265 yield from self.pc_i.ports()
266 yield self.pc_o
267 yield self.memerr_o
268 yield from self.core.ports()
269 yield from self.imem.ports()
270 yield self.core_bigendian_i
271 yield self.busy_o
272
273 def ports(self):
274 return list(self)
275
276 def external_ports(self):
277 return self.pc_i.ports() + [self.pc_o,
278 self.memerr_o,
279 self.core_bigendian_i,
280 ClockSignal(),
281 ResetSignal(),
282 self.busy_o,
283 ] + \
284 list(self.dbg.dmi.ports()) + \
285 list(self.imem.ibus.fields.values()) + \
286 list(self.core.l0.cmpi.lsmem.lsi.dbus.fields.values())
287
288 def ports(self):
289 return list(self)
290
291
292 if __name__ == '__main__':
293 units = {'alu': 1, 'cr': 1, 'branch': 1, 'trap': 1, 'logical': 1,
294 'spr': 1,
295 'div': 1,
296 'mul': 1,
297 'shiftrot': 1
298 }
299 pspec = TestMemPspec(ldst_ifacetype='bare_wb',
300 imem_ifacetype='bare_wb',
301 addr_wid=48,
302 mask_wid=8,
303 reg_wid=64,
304 units=units)
305 dut = TestIssuer(pspec)
306 vl = main(dut, ports=dut.ports(), name="test_issuer")
307
308 if len(sys.argv) == 1:
309 vl = rtlil.convert(dut, ports=dut.external_ports(), name="test_issuer")
310 with open("test_issuer.il", "w") as f:
311 f.write(vl)