Merge branch 'master' of git.libre-soc.org:soc
[soc.git] / src / soc / simple / issuer.py
1 """simple core issuer
2
3 not in any way intended for production use. this runs a FSM that:
4
5 * reads the Program Counter from StateRegs
6 * reads an instruction from a fixed-size Test Memory
7 * issues it to the Simple Core
8 * waits for it to complete
9 * increments the PC
10 * does it all over again
11
12 the purpose of this module is to verify the functional correctness
13 of the Function Units in the absolute simplest and clearest possible
14 way, and to at provide something that can be further incrementally
15 improved.
16 """
17
18 from nmigen import (Elaboratable, Module, Signal, ClockSignal, ResetSignal,
19 ClockDomain, DomainRenamer)
20 from nmigen.cli import rtlil
21 from nmigen.cli import main
22 import sys
23
24 from soc.decoder.power_decoder import create_pdecode
25 from soc.decoder.power_decoder2 import PowerDecode2
26 from soc.decoder.decode2execute1 import Data
27 from soc.experiment.testmem import TestMemory # test only for instructions
28 from soc.regfile.regfiles import StateRegs
29 from soc.simple.core import NonProductionCore
30 from soc.config.test.test_loadstore import TestMemPspec
31 from soc.config.ifetch import ConfigFetchUnit
32 from soc.decoder.power_enums import MicrOp
33 from soc.debug.dmi import CoreDebug, DMIInterface
34 from soc.config.state import CoreState
35
36 from nmutil.util import rising_edge
37
38
39 class TestIssuer(Elaboratable):
40 """TestIssuer - reads instructions from TestMemory and issues them
41
42 efficiency and speed is not the main goal here: functional correctness is.
43 """
44 def __init__(self, pspec):
45 # main instruction core
46 self.core = core = NonProductionCore(pspec)
47
48 # instruction decoder
49 pdecode = create_pdecode()
50 self.pdecode2 = PowerDecode2(pdecode) # decoder
51
52 # Test Instruction memory
53 self.imem = ConfigFetchUnit(pspec).fu
54 # one-row cache of instruction read
55 self.iline = Signal(64) # one instruction line
56 self.iprev_adr = Signal(64) # previous address: if different, do read
57
58 # DMI interface
59 self.dbg = CoreDebug()
60
61 # instruction go/monitor
62 self.pc_o = Signal(64, reset_less=True)
63 self.pc_i = Data(64, "pc_i") # set "ok" to indicate "please change me"
64 self.core_bigendian_i = Signal()
65 self.busy_o = Signal(reset_less=True)
66 self.memerr_o = Signal(reset_less=True)
67
68 # FAST regfile read /write ports for PC and MSR
69 staterf = self.core.regs.rf['state']
70 self.state_r_pc = staterf.r_ports['cia'] # PC rd
71 self.state_w_pc = staterf.w_ports['d_wr1'] # PC wr
72 self.state_r_msr = staterf.r_ports['msr'] # MSR rd
73
74 # DMI interface access
75 intrf = self.core.regs.rf['int']
76 crrf = self.core.regs.rf['cr']
77 self.int_r = intrf.r_ports['dmi'] # INT read
78 self.cr_r = crrf.r_ports['full_cr_dbg'] # CR read
79
80 # hack method of keeping an eye on whether branch/trap set the PC
81 self.state_nia = self.core.regs.rf['state'].w_ports['nia']
82 self.state_nia.wen.name = 'state_nia_wen'
83
84 def elaborate(self, platform):
85 m = Module()
86 comb, sync = m.d.comb, m.d.sync
87
88 m.submodules.core = core = DomainRenamer("coresync")(self.core)
89 m.submodules.imem = imem = self.imem
90 m.submodules.dbg = dbg = self.dbg
91
92 # instruction decoder
93 pdecode = create_pdecode()
94 m.submodules.dec2 = pdecode2 = self.pdecode2
95
96 # convenience
97 dmi, d_reg, d_cr = dbg.dmi, dbg.dbg_gpr, dbg.dbg_cr
98 intrf = self.core.regs.rf['int']
99
100 # clock delay power-on reset
101 cd_por = ClockDomain(reset_less=True)
102 cd_sync = ClockDomain()
103 core_sync = ClockDomain("coresync")
104 m.domains += cd_por, cd_sync, core_sync
105
106 delay = Signal(range(4), reset=3)
107 with m.If(delay != 0):
108 m.d.por += delay.eq(delay - 1)
109 comb += cd_por.clk.eq(ClockSignal())
110 comb += core_sync.clk.eq(ClockSignal())
111 # power-on reset delay
112 comb += core.core_reset_i.eq(delay != 0 | dbg.core_rst_o)
113
114 # busy/halted signals from core
115 comb += self.busy_o.eq(core.busy_o)
116 comb += pdecode2.dec.bigendian.eq(self.core_bigendian_i)
117
118 # current state (MSR/PC at the moment
119 cur_state = CoreState("cur")
120
121 # temporary hack: says "go" immediately for both address gen and ST
122 l0 = core.l0
123 ldst = core.fus.fus['ldst0']
124 st_go_edge = rising_edge(m, ldst.st.rel_o)
125 m.d.comb += ldst.ad.go_i.eq(ldst.ad.rel_o) # link addr-go direct to rel
126 m.d.comb += ldst.st.go_i.eq(st_go_edge) # link store-go to rising rel
127
128 # PC and instruction from I-Memory
129 pc_changed = Signal() # note write to PC
130 comb += self.pc_o.eq(cur_state.pc)
131 ilatch = Signal(32)
132
133 # next instruction (+4 on current)
134 nia = Signal(64, reset_less=True)
135 comb += nia.eq(cur_state.pc + 4)
136
137 # read the PC
138 pc = Signal(64, reset_less=True)
139 pc_ok_delay = Signal()
140 sync += pc_ok_delay.eq(~self.pc_i.ok)
141 with m.If(self.pc_i.ok):
142 # incoming override (start from pc_i)
143 comb += pc.eq(self.pc_i.data)
144 with m.Else():
145 # otherwise read StateRegs regfile for PC...
146 comb += self.state_r_pc.ren.eq(1<<StateRegs.PC)
147 # ... but on a 1-clock delay
148 with m.If(pc_ok_delay):
149 comb += pc.eq(self.state_r_pc.data_o)
150
151 # don't write pc every cycle
152 comb += self.state_w_pc.wen.eq(0)
153 comb += self.state_w_pc.data_i.eq(0)
154
155 # don't read msr every cycle
156 comb += self.state_r_msr.ren.eq(0)
157
158 # connect up debug signals
159 # TODO comb += core.icache_rst_i.eq(dbg.icache_rst_o)
160 comb += dbg.terminate_i.eq(core.core_terminate_o)
161 comb += dbg.state.pc.eq(pc)
162 #comb += dbg.state.pc.eq(cur_state.pc)
163 comb += dbg.state.msr.eq(cur_state.msr)
164
165 # temporaries
166 core_busy_o = core.busy_o # core is busy
167 core_ivalid_i = core.ivalid_i # instruction is valid
168 core_issue_i = core.issue_i # instruction is issued
169 dec_opcode_i = pdecode2.dec.raw_opcode_in # raw opcode
170
171 insn_type = core.e.do.insn_type
172 dec_state = pdecode2.state
173
174 # actually use a nmigen FSM for the first time (w00t)
175 # this FSM is perhaps unusual in that it detects conditions
176 # then "holds" information, combinatorially, for the core
177 # (as opposed to using sync - which would be on a clock's delay)
178 # this includes the actual opcode, valid flags and so on.
179 with m.FSM() as fsm:
180
181 # waiting (zzz)
182 with m.State("IDLE"):
183 sync += pc_changed.eq(0)
184 sync += core.e.eq(0)
185 with m.If(~dbg.core_stop_o & ~core.core_reset_i):
186 # instruction allowed to go: start by reading the PC
187 # capture the PC and also drop it into Insn Memory
188 # we have joined a pair of combinatorial memory
189 # lookups together. this is Generally Bad.
190 comb += self.imem.a_pc_i.eq(pc)
191 comb += self.imem.a_valid_i.eq(1)
192 comb += self.imem.f_valid_i.eq(1)
193 sync += cur_state.pc.eq(pc)
194
195 # initiate read of MSR
196 comb += self.state_r_msr.ren.eq(1<<StateRegs.MSR)
197
198 m.next = "INSN_READ" # move to "wait for bus" phase
199 with m.Else():
200 comb += core.core_stopped_i.eq(1)
201 comb += dbg.core_stopped_i.eq(1)
202
203 # dummy pause to find out why simulation is not keeping up
204 with m.State("INSN_READ"):
205 # one cycle later, msr read arrives
206 sync += cur_state.msr.eq(self.state_r_msr.data_o)
207 with m.If(self.imem.f_busy_o): # zzz...
208 # busy: stay in wait-read
209 comb += self.imem.a_valid_i.eq(1)
210 comb += self.imem.f_valid_i.eq(1)
211 with m.Else():
212 # not busy: instruction fetched
213 f_instr_o = self.imem.f_instr_o
214 if f_instr_o.width == 32:
215 insn = f_instr_o
216 else:
217 insn = f_instr_o.word_select(cur_state.pc[2], 32)
218 comb += dec_opcode_i.eq(insn) # actual opcode
219 comb += dec_state.eq(cur_state)
220 sync += core.e.eq(pdecode2.e)
221 sync += ilatch.eq(insn) # latch current insn
222 # also drop PC and MSR into decode "state"
223 m.next = "INSN_START" # move to "start"
224
225 # waiting for instruction bus (stays there until not busy)
226 with m.State("INSN_START"):
227 comb += core_ivalid_i.eq(1) # instruction is valid
228 comb += core_issue_i.eq(1) # and issued
229
230
231 m.next = "INSN_ACTIVE" # move to "wait completion"
232
233 # instruction started: must wait till it finishes
234 with m.State("INSN_ACTIVE"):
235 with m.If(insn_type != MicrOp.OP_NOP):
236 comb += core_ivalid_i.eq(1) # instruction is valid
237 with m.If(self.state_nia.wen):
238 sync += pc_changed.eq(1)
239 with m.If(~core_busy_o): # instruction done!
240 # ok here we are not reading the branch unit. TODO
241 # this just blithely overwrites whatever pipeline
242 # updated the PC
243 with m.If(~pc_changed):
244 comb += self.state_w_pc.wen.eq(1<<StateRegs.PC)
245 comb += self.state_w_pc.data_i.eq(nia)
246 sync += core.e.eq(0)
247 m.next = "IDLE" # back to idle
248
249 # this bit doesn't have to be in the FSM: connect up to read
250 # regfiles on demand from DMI
251 with m.If(d_reg.req): # request for regfile access being made
252 # TODO: error-check this
253 # XXX should this be combinatorial? sync better?
254 if intrf.unary:
255 comb += self.int_r.ren.eq(1<<d_reg.addr)
256 else:
257 comb += self.int_r.addr.eq(d_reg.addr)
258 comb += self.int_r.ren.eq(1)
259 d_reg_delay = Signal()
260 sync += d_reg_delay.eq(d_reg.req)
261 with m.If(d_reg_delay):
262 # data arrives one clock later
263 comb += d_reg.data.eq(self.int_r.data_o)
264 comb += d_reg.ack.eq(1)
265
266 # sigh same thing for CR debug
267 with m.If(d_cr.req): # request for regfile access being made
268 comb += self.cr_r.ren.eq(0b11111111) # enable all
269 d_cr_delay = Signal()
270 sync += d_cr_delay.eq(d_cr.req)
271 with m.If(d_cr_delay):
272 # data arrives one clock later
273 comb += d_cr.data.eq(self.cr_r.data_o)
274 comb += d_cr.ack.eq(1)
275
276 return m
277
278 def __iter__(self):
279 yield from self.pc_i.ports()
280 yield self.pc_o
281 yield self.memerr_o
282 yield from self.core.ports()
283 yield from self.imem.ports()
284 yield self.core_bigendian_i
285 yield self.busy_o
286
287 def ports(self):
288 return list(self)
289
290 def external_ports(self):
291 return self.pc_i.ports() + [self.pc_o,
292 self.memerr_o,
293 self.core_bigendian_i,
294 ClockSignal(),
295 ResetSignal(),
296 self.busy_o,
297 ] + \
298 list(self.dbg.dmi.ports()) + \
299 list(self.imem.ibus.fields.values()) + \
300 list(self.core.l0.cmpi.lsmem.lsi.slavebus.fields.values())
301
302 def ports(self):
303 return list(self)
304
305
306 if __name__ == '__main__':
307 units = {'alu': 1, 'cr': 1, 'branch': 1, 'trap': 1, 'logical': 1,
308 'spr': 1,
309 'div': 1,
310 'mul': 1,
311 'shiftrot': 1
312 }
313 pspec = TestMemPspec(ldst_ifacetype='bare_wb',
314 imem_ifacetype='bare_wb',
315 addr_wid=48,
316 mask_wid=8,
317 reg_wid=64,
318 units=units)
319 dut = TestIssuer(pspec)
320 vl = main(dut, ports=dut.ports(), name="test_issuer")
321
322 if len(sys.argv) == 1:
323 vl = rtlil.convert(dut, ports=dut.external_ports(), name="test_issuer")
324 with open("test_issuer.il", "w") as f:
325 f.write(vl)