debugging test_issuer, getting FSM working
[soc.git] / src / soc / simple / core.py
1 """simple core
2
3 not in any way intended for production use. connects up FunctionUnits to
4 Register Files in a brain-dead fashion that only permits one and only one
5 Function Unit to be operational.
6
7 the principle here is to take the Function Units, analyse their regspecs,
8 and turn their requirements for access to register file read/write ports
9 into groupings by Register File and Register File Port name.
10
11 under each grouping - by regfile/port - a list of Function Units that
12 need to connect to that port is created. as these are a contended
13 resource a "Broadcast Bus" per read/write port is then also created,
14 with access to it managed by a PriorityPicker.
15
16 the brain-dead part of this module is that even though there is no
17 conflict of access, regfile read/write hazards are *not* analysed,
18 and consequently it is safer to wait for the Function Unit to complete
19 before allowing a new instruction to proceed.
20 """
21
22 from nmigen import Elaboratable, Module, Signal
23 from nmigen.cli import rtlil
24
25 from nmutil.picker import PriorityPicker
26 from nmutil.util import treereduce
27
28 from soc.fu.compunits.compunits import AllFunctionUnits
29 from soc.regfile.regfiles import RegFiles
30 from soc.decoder.power_decoder import create_pdecode
31 from soc.decoder.power_decoder2 import PowerDecode2
32 from soc.decoder.decode2execute1 import Data
33 from soc.experiment.l0_cache import TstL0CacheBuffer # test only
34 from soc.experiment.testmem import TestMemory # test only for instructions
35 from soc.regfile.regfiles import FastRegs
36 import operator
37
38
39 # helper function for reducing a list of signals down to a parallel
40 # ORed single signal.
41 def ortreereduce(tree, attr="data_o"):
42 return treereduce(tree, operator.or_, lambda x: getattr(x, attr))
43
44 # helper function to place full regs declarations first
45 def sort_fuspecs(fuspecs):
46 res = []
47 for (regname, fspec) in fuspecs.items():
48 if regname.startswith("full"):
49 res.append((regname, fspec))
50 for (regname, fspec) in fuspecs.items():
51 if not regname.startswith("full"):
52 res.append((regname, fspec))
53 return res # enumerate(res)
54
55
56 class NonProductionCore(Elaboratable):
57 def __init__(self, addrwid=6, idepth=16):
58 # single LD/ST funnel for memory access
59 self.l0 = TstL0CacheBuffer(n_units=1, regwid=64, addrwid=addrwid)
60 pi = self.l0.l0.dports[0].pi
61
62 # function units (only one each)
63 self.fus = AllFunctionUnits(pilist=[pi], addrwid=addrwid)
64
65 # register files (yes plural)
66 self.regs = RegFiles()
67
68 # instruction decoder
69 pdecode = create_pdecode()
70 self.pdecode2 = PowerDecode2(pdecode) # instruction decoder
71
72 # issue/valid/busy signalling
73 self.ivalid_i = self.pdecode2.e.valid # instruction is valid
74 self.issue_i = Signal(reset_less=True)
75 self.busy_o = Signal(name="corebusy_o", reset_less=True)
76
77 # instruction input
78 self.bigendian_i = self.pdecode2.dec.bigendian
79 self.raw_opcode_i = self.pdecode2.dec.raw_opcode_in
80
81 def elaborate(self, platform):
82 m = Module()
83
84 m.submodules.pdecode2 = dec2 = self.pdecode2
85 m.submodules.fus = self.fus
86 m.submodules.l0 = l0 = self.l0
87 self.regs.elaborate_into(m, platform)
88 regs = self.regs
89 fus = self.fus.fus
90
91 fu_bitdict = self.connect_instruction(m)
92 self.connect_rdports(m, fu_bitdict)
93 self.connect_wrports(m, fu_bitdict)
94
95 return m
96
97 def connect_instruction(self, m):
98 comb, sync = m.d.comb, m.d.sync
99 fus = self.fus.fus
100 dec2 = self.pdecode2
101
102 # enable-signals for each FU, get one bit for each FU (by name)
103 fu_enable = Signal(len(fus), reset_less=True)
104 fu_bitdict = {}
105 for i, funame in enumerate(fus.keys()):
106 fu_bitdict[funame] = fu_enable[i]
107
108 # connect up instructions. only one is enabled at any given time
109 for funame, fu in fus.items():
110 fnunit = fu.fnunit.value
111 enable = Signal(name="en_%s" % funame, reset_less=True)
112 comb += enable.eq(self.ivalid_i & (dec2.e.fn_unit & fnunit).bool())
113 with m.If(enable):
114 comb += fu.oper_i.eq_from_execute1(dec2.e)
115 comb += fu.issue_i.eq(self.issue_i)
116 comb += self.busy_o.eq(fu.busy_o)
117 rdmask = dec2.rdflags(fu)
118 comb += fu.rdmaskn.eq(~rdmask)
119 comb += fu_bitdict[funame].eq(enable)
120
121 return fu_bitdict
122
123 def connect_rdports(self, m, fu_bitdict):
124 """connect read ports
125
126 orders the read regspecs into a dict-of-dicts, by regfile, by
127 regport name, then connects all FUs that want that regport by
128 way of a PriorityPicker.
129 """
130 comb, sync = m.d.comb, m.d.sync
131 fus = self.fus.fus
132 regs = self.regs
133
134 # dictionary of lists of regfile read ports
135 byregfiles_rd, byregfiles_rdspec = self.get_byregfiles(True)
136
137 # okaay, now we need a PriorityPicker per regfile per regfile port
138 # loootta pickers... peter piper picked a pack of pickled peppers...
139 rdpickers = {}
140 for regfile, spec in byregfiles_rd.items():
141 fuspecs = byregfiles_rdspec[regfile]
142 rdpickers[regfile] = {}
143
144 # for each named regfile port, connect up all FUs to that port
145 for (regname, fspec) in sort_fuspecs(fuspecs):
146 print ("connect rd", regname, fspec)
147 rpidx = regname
148 # get the regfile specs for this regfile port
149 (rf, read, write, wid, fuspec) = fspec
150 name = "rdflag_%s_%s" % (regfile, regname)
151 rdflag = Signal(name=name, reset_less=True)
152 comb += rdflag.eq(rf)
153
154 # select the required read port. these are pre-defined sizes
155 print (rpidx, regfile, regs.rf.keys())
156 rport = regs.rf[regfile.lower()].r_ports[rpidx]
157
158 # create a priority picker to manage this port
159 rdpickers[regfile][rpidx] = rdpick = PriorityPicker(len(fuspec))
160 setattr(m.submodules, "rdpick_%s_%s" % (regfile, rpidx), rdpick)
161
162 # connect the regspec "reg select" number to this port
163 with m.If(rdpick.en_o):
164 comb += rport.ren.eq(read)
165
166 # connect up the FU req/go signals, and the reg-read to the FU
167 # and create a Read Broadcast Bus
168 for pi, (funame, fu, idx) in enumerate(fuspec):
169 src = fu.src_i[idx]
170
171 # connect request-read to picker input, and output to go-rd
172 fu_active = fu_bitdict[funame]
173 pick = fu.rd_rel_o[idx] & fu_active & rdflag
174 comb += rdpick.i[pi].eq(pick)
175 comb += fu.go_rd_i[idx].eq(rdpick.o[pi])
176
177 # connect regfile port to input, creating a Broadcast Bus
178 print ("reg connect widths",
179 regfile, regname, pi, funame,
180 src.shape(), rport.data_o.shape())
181 comb += src.eq(rport.data_o) # all FUs connect to same port
182
183 def connect_wrports(self, m, fu_bitdict):
184 """connect write ports
185
186 orders the write regspecs into a dict-of-dicts, by regfile,
187 by regport name, then connects all FUs that want that regport
188 by way of a PriorityPicker.
189
190 note that the write-port wen, write-port data, and go_wr_i all need to
191 be on the exact same clock cycle. as there is a combinatorial loop bug
192 at the moment, these all use sync.
193 """
194 comb, sync = m.d.comb, m.d.sync
195 fus = self.fus.fus
196 regs = self.regs
197 # dictionary of lists of regfile write ports
198 byregfiles_wr, byregfiles_wrspec = self.get_byregfiles(False)
199
200 # same for write ports.
201 # BLECH! complex code-duplication! BLECH!
202 wrpickers = {}
203 for regfile, spec in byregfiles_wr.items():
204 fuspecs = byregfiles_wrspec[regfile]
205 wrpickers[regfile] = {}
206 for (regname, fspec) in sort_fuspecs(fuspecs):
207 print ("connect wr", regname, fspec)
208 rpidx = regname
209 # get the regfile specs for this regfile port
210 (rf, read, write, wid, fuspec) = fspec
211
212 # select the required write port. these are pre-defined sizes
213 print (regfile, regs.rf.keys())
214 wport = regs.rf[regfile.lower()].w_ports[rpidx]
215
216 # create a priority picker to manage this port
217 wrpickers[regfile][rpidx] = wrpick = PriorityPicker(len(fuspec))
218 setattr(m.submodules, "wrpick_%s_%s" % (regfile, rpidx), wrpick)
219
220 # connect the regspec write "reg select" number to this port
221 # only if one FU actually requests (and is granted) the port
222 # will the write-enable be activated
223 with m.If(wrpick.en_o):
224 sync += wport.wen.eq(write)
225 with m.Else():
226 sync += wport.wen.eq(0)
227
228 # connect up the FU req/go signals and the reg-read to the FU
229 # these are arbitrated by Data.ok signals
230 wsigs = []
231 for pi, (funame, fu, idx) in enumerate(fuspec):
232 # write-request comes from dest.ok
233 dest = fu.get_out(idx)
234 name = "wrflag_%s_%s_%d" % (funame, regname, idx)
235 wrflag = Signal(name=name, reset_less=True)
236 comb += wrflag.eq(dest.ok)
237
238 # connect request-read to picker input, and output to go-wr
239 fu_active = fu_bitdict[funame]
240 pick = fu.wr.rel[idx] & fu_active #& wrflag
241 comb += wrpick.i[pi].eq(pick)
242 sync += fu.go_wr_i[idx].eq(wrpick.o[pi] & wrpick.en_o)
243 # connect regfile port to input
244 print ("reg connect widths",
245 regfile, regname, pi, funame,
246 dest.shape(), wport.data_i.shape())
247 wsigs.append(dest)
248
249 # here is where we create the Write Broadcast Bus. simple, eh?
250 sync += wport.data_i.eq(ortreereduce(wsigs, "data"))
251
252 def get_byregfiles(self, readmode):
253
254 mode = "read" if readmode else "write"
255 dec2 = self.pdecode2
256 regs = self.regs
257 fus = self.fus.fus
258
259 # dictionary of lists of regfile ports
260 byregfiles = {}
261 byregfiles_spec = {}
262 for (funame, fu) in fus.items():
263 print ("%s ports for %s" % (mode, funame))
264 for idx in range(fu.n_src if readmode else fu.n_dst):
265 if readmode:
266 (regfile, regname, wid) = fu.get_in_spec(idx)
267 else:
268 (regfile, regname, wid) = fu.get_out_spec(idx)
269 print (" %d %s %s %s" % (idx, regfile, regname, str(wid)))
270 if readmode:
271 rdflag, read = dec2.regspecmap_read(regfile, regname)
272 write = None
273 else:
274 rdflag, read = None, None
275 wrport, write = dec2.regspecmap_write(regfile, regname)
276 if regfile not in byregfiles:
277 byregfiles[regfile] = {}
278 byregfiles_spec[regfile] = {}
279 if regname not in byregfiles_spec[regfile]:
280 byregfiles_spec[regfile][regname] = \
281 [rdflag, read, write, wid, []]
282 # here we start to create "lanes"
283 if idx not in byregfiles[regfile]:
284 byregfiles[regfile][idx] = []
285 fuspec = (funame, fu, idx)
286 byregfiles[regfile][idx].append(fuspec)
287 byregfiles_spec[regfile][regname][4].append(fuspec)
288
289 # ok just print that out, for convenience
290 for regfile, spec in byregfiles.items():
291 print ("regfile %s ports:" % mode, regfile)
292 fuspecs = byregfiles_spec[regfile]
293 for regname, fspec in fuspecs.items():
294 [rdflag, read, write, wid, fuspec] = fspec
295 print (" rf %s port %s lane: %s" % (mode, regfile, regname))
296 print (" %s" % regname, wid, read, write, rdflag)
297 for (funame, fu, idx) in fuspec:
298 fusig = fu.src_i[idx] if readmode else fu.dest[idx]
299 print (" ", funame, fu, idx, fusig)
300 print ()
301
302 return byregfiles, byregfiles_spec
303
304 def __iter__(self):
305 yield from self.fus.ports()
306 yield from self.pdecode2.ports()
307 # TODO: regs
308
309 def ports(self):
310 return list(self)
311
312
313 class TestIssuer(Elaboratable):
314 """TestIssuer - reads instructions from TestMemory and issues them
315
316 efficiency and speed is not the main goal here: functional correctness is.
317 """
318 def __init__(self, addrwid=6, idepth=6):
319 # main instruction core
320 self.core = core = NonProductionCore(addrwid)
321
322 # Test Instruction memory
323 self.imem = TestMemory(32, idepth)
324 self.i_rd = self.imem.rdport
325 #self.i_wr = self.imem.write_port() errr...
326
327 # instruction go/monitor
328 self.go_insn_i = Signal(reset_less=True)
329 self.pc_o = Signal(64, reset_less=True)
330 self.pc_i = Data(64, "pc") # set "ok" to indicate "please change me"
331 self.busy_o = core.busy_o
332 self.memerr_o = Signal(reset_less=True)
333
334 # FAST regfile read /write ports
335 self.fast_rd1 = self.core.regs.rf['fast'].r_ports['d_rd1']
336 self.fast_wr1 = self.core.regs.rf['fast'].w_ports['d_wr1']
337
338 def elaborate(self, platform):
339 m = Module()
340 comb, sync = m.d.comb, m.d.sync
341
342 m.submodules.core = core = self.core
343 m.submodules.imem = imem = self.imem
344
345 # temporary hack: says "go" immediately for both address gen and ST
346 l0 = core.l0
347 ldst = core.fus.fus['ldst0']
348 m.d.comb += ldst.ad.go.eq(ldst.ad.rel) # link addr-go direct to rel
349 m.d.comb += ldst.st.go.eq(ldst.st.rel) # link store-go direct to rel
350
351 # PC and instruction from I-Memory
352 current_insn = Signal(32) # current fetched instruction (note sync)
353 current_pc = Signal(64) # current PC (note it is reset/sync)
354 comb += self.pc_o.eq(current_pc)
355
356 # next instruction (+4 on current)
357 nia = Signal(64, reset_less=True)
358 comb += nia.eq(current_insn + 4)
359
360 # temporaries
361 core_busy_o = core.busy_o # core is busy
362 core_ivalid_i = core.ivalid_i # instruction is valid
363 core_issue_i = core.issue_i # instruction is issued
364 core_be_i = core.bigendian_i # bigendian mode
365 core_opcode_i = core.raw_opcode_i # raw opcode
366
367 # actually use a nmigen FSM for the first time (w00t)
368 with m.FSM() as fsm:
369
370 # waiting (zzz)
371 with m.State("IDLE"):
372 with m.If(self.go_insn_i):
373 # instruction allowed to go: start by reading the PC
374 pc = Signal(64, reset_less=True)
375 with m.If(self.pc_i.ok):
376 # incoming override (start from pc_i)
377 comb += pc.eq(self.pc_i.data)
378 with m.Else():
379 # otherwise read FastRegs regfile for PC
380 comb += self.fast_rd1.ren.eq(1<<FastRegs.PC)
381 comb += pc.eq(self.fast_rd1.data_o)
382 # capture the PC and also drop it into Insn Memory
383 # we have joined a pair of combinatorial memory
384 # lookups together. this is Generally Bad.
385 sync += current_pc.eq(pc)
386 comb += self.i_rd.addr.eq(pc)
387 #comb += self.i_rd.en.eq(1) # comb-read (no need to set)
388 sync += current_insn.eq(self.i_rd.data)
389 m.next = "INSN_READ" # move to "issue" phase
390
391 # got the instruction: start issue
392 with m.State("INSN_READ"):
393 comb += core_ivalid_i.eq(1) # say instruction is valid
394 comb += core_issue_i.eq(1) # and issued (ivalid_i redundant)
395 comb += core_be_i.eq(0) # little-endian mode
396 comb += core_opcode_i.eq(current_insn) # actual opcode
397 m.next = "INSN_ACTIVE" # move to "wait for completion" phase
398
399 # instruction started: must wait till it finishes
400 with m.State("INSN_ACTIVE"):
401 comb += core_ivalid_i.eq(1) # say instruction is valid
402 comb += core_opcode_i.eq(current_insn) # actual opcode
403 #sync += core_issue_i.eq(0) # issue raises for only one cycle
404 with m.If(~core_busy_o): # instruction done!
405 #sync += core_ivalid_i.eq(0) # say instruction is invalid
406 #sync += core_opcode_i.eq(0) # clear out (no good reason)
407 # ok here we are not reading the branch unit. TODO
408 # this just blithely overwrites whatever pipeline updated
409 # the PC
410 comb += self.fast_wr1.wen.eq(1<<FastRegs.PC)
411 comb += self.fast_wr1.data_i.eq(nia)
412 m.next = "IDLE" # back to idle
413
414 return m
415
416 def __iter__(self):
417 yield from self.pc_i.ports()
418 yield self.pc_o
419 yield self.go_insn_i
420 yield self.memerr_o
421 yield from self.core.ports()
422 yield from self.imem.ports()
423
424 def ports(self):
425 return list(self)
426
427
428 if __name__ == '__main__':
429 dut = TestIssuer()
430 vl = rtlil.convert(dut, ports=dut.ports())
431 with open("test_issuer.il", "w") as f:
432 f.write(vl)
433
434 dut = NonProductionCore()
435 vl = rtlil.convert(dut, ports=dut.ports())
436 with open("non_production_core.il", "w") as f:
437 f.write(vl)
438