move instruction decoder out of core
[soc.git] / src / soc / simple / core.py
1 """simple core
2
3 not in any way intended for production use. connects up FunctionUnits to
4 Register Files in a brain-dead fashion that only permits one and only one
5 Function Unit to be operational.
6
7 the principle here is to take the Function Units, analyse their regspecs,
8 and turn their requirements for access to register file read/write ports
9 into groupings by Register File and Register File Port name.
10
11 under each grouping - by regfile/port - a list of Function Units that
12 need to connect to that port is created. as these are a contended
13 resource a "Broadcast Bus" per read/write port is then also created,
14 with access to it managed by a PriorityPicker.
15
16 the brain-dead part of this module is that even though there is no
17 conflict of access, regfile read/write hazards are *not* analysed,
18 and consequently it is safer to wait for the Function Unit to complete
19 before allowing a new instruction to proceed.
20 """
21
22 from nmigen import Elaboratable, Module, Signal, ResetSignal, Cat, Mux
23 from nmigen.cli import rtlil
24
25 from soc.decoder.power_regspec_map import regspec_decode_read
26 from soc.decoder.power_regspec_map import regspec_decode_write
27
28 from nmutil.picker import PriorityPicker
29 from nmutil.util import treereduce
30
31 from soc.fu.compunits.compunits import AllFunctionUnits
32 from soc.regfile.regfiles import RegFiles
33 from soc.decoder.decode2execute1 import Decode2ToExecute1Type
34 from soc.decoder.power_decoder2 import get_rdflags
35 from soc.decoder.decode2execute1 import Data
36 from soc.experiment.l0_cache import TstL0CacheBuffer # test only
37 from soc.config.test.test_loadstore import TestMemPspec
38 from soc.decoder.power_enums import MicrOp
39 import operator
40
41 from nmutil.util import rising_edge
42
43
44 # helper function for reducing a list of signals down to a parallel
45 # ORed single signal.
46 def ortreereduce(tree, attr="data_o"):
47 return treereduce(tree, operator.or_, lambda x: getattr(x, attr))
48
49
50 def ortreereduce_sig(tree):
51 return treereduce(tree, operator.or_, lambda x: x)
52
53
54 # helper function to place full regs declarations first
55 def sort_fuspecs(fuspecs):
56 res = []
57 for (regname, fspec) in fuspecs.items():
58 if regname.startswith("full"):
59 res.append((regname, fspec))
60 for (regname, fspec) in fuspecs.items():
61 if not regname.startswith("full"):
62 res.append((regname, fspec))
63 return res # enumerate(res)
64
65
66 class NonProductionCore(Elaboratable):
67 def __init__(self, pspec):
68 # single LD/ST funnel for memory access
69 self.l0 = TstL0CacheBuffer(pspec, n_units=1)
70 pi = self.l0.l0.dports[0]
71
72 # function units (only one each)
73 self.fus = AllFunctionUnits(pspec, pilist=[pi])
74
75 # register files (yes plural)
76 self.regs = RegFiles()
77
78 # instruction decoder
79 self.e = Decode2ToExecute1Type() # decoded instruction
80
81 # issue/valid/busy signalling
82 self.ivalid_i = Signal(reset_less=True) # instruction is valid
83 self.issue_i = Signal(reset_less=True)
84 self.busy_o = Signal(name="corebusy_o", reset_less=True)
85
86 # start/stop and terminated signalling
87 self.core_stopped_i = Signal(reset_less=True)
88 self.core_reset_i = Signal()
89 self.core_terminate_o = Signal(reset=0) # indicates stopped
90
91 def elaborate(self, platform):
92 m = Module()
93
94 m.submodules.fus = self.fus
95 m.submodules.l0 = l0 = self.l0
96 self.regs.elaborate_into(m, platform)
97 regs = self.regs
98 fus = self.fus.fus
99
100 # connect up Function Units, then read/write ports
101 fu_bitdict = self.connect_instruction(m)
102 self.connect_rdports(m, fu_bitdict)
103 self.connect_wrports(m, fu_bitdict)
104
105 # connect up reset
106 m.d.comb += ResetSignal().eq(self.core_reset_i)
107
108 return m
109
110 def connect_instruction(self, m):
111 """connect_instruction
112
113 uses decoded (from PowerOp) function unit information from CSV files
114 to ascertain which Function Unit should deal with the current
115 instruction.
116
117 some (such as OP_ATTN, OP_NOP) are dealt with here, including
118 ignoring it and halting the processor. OP_NOP is a bit annoying
119 because the issuer expects busy flag still to be raised then lowered.
120 (this requires a fake counter to be set).
121 """
122 comb, sync = m.d.comb, m.d.sync
123 fus = self.fus.fus
124 e = self.e # to execute
125
126 # enable-signals for each FU, get one bit for each FU (by name)
127 fu_enable = Signal(len(fus), reset_less=True)
128 fu_bitdict = {}
129 for i, funame in enumerate(fus.keys()):
130 fu_bitdict[funame] = fu_enable[i]
131
132 # enable the required Function Unit based on the opcode decode
133 # note: this *only* works correctly for simple core when one and
134 # *only* one FU is allocated per instruction
135 for funame, fu in fus.items():
136 fnunit = fu.fnunit.value
137 enable = Signal(name="en_%s" % funame, reset_less=True)
138 comb += enable.eq((e.do.fn_unit & fnunit).bool())
139 comb += fu_bitdict[funame].eq(enable)
140
141 # sigh - need a NOP counter
142 counter = Signal(2)
143 with m.If(counter != 0):
144 sync += counter.eq(counter - 1)
145 comb += self.busy_o.eq(1)
146
147 with m.If(self.ivalid_i): # run only when valid
148 with m.Switch(e.do.insn_type):
149 # check for ATTN: halt if true
150 with m.Case(MicrOp.OP_ATTN):
151 m.d.sync += self.core_terminate_o.eq(1)
152
153 with m.Case(MicrOp.OP_NOP):
154 sync += counter.eq(2)
155 comb += self.busy_o.eq(1)
156
157 with m.Default():
158 # connect up instructions. only one enabled at a time
159 for funame, fu in fus.items():
160 enable = fu_bitdict[funame]
161
162 # run this FunctionUnit if enabled
163 with m.If(enable):
164 # route op, issue, busy, read flags and mask to FU
165 comb += fu.oper_i.eq_from_execute1(e)
166 comb += fu.issue_i.eq(self.issue_i)
167 comb += self.busy_o.eq(fu.busy_o)
168 rdmask = get_rdflags(e, fu)
169 comb += fu.rdmaskn.eq(~rdmask)
170
171 return fu_bitdict
172
173 def connect_rdport(self, m, fu_bitdict, rdpickers, regfile, regname, fspec):
174 comb, sync = m.d.comb, m.d.sync
175 fus = self.fus.fus
176 regs = self.regs
177
178 rpidx = regname
179
180 # select the required read port. these are pre-defined sizes
181 rfile = regs.rf[regfile.lower()]
182 rport = rfile.r_ports[rpidx]
183 print("read regfile", rpidx, regfile, regs.rf.keys(),
184 rfile, rfile.unary)
185
186 fspecs = fspec
187 if not isinstance(fspecs, list):
188 fspecs = [fspecs]
189
190 rdflags = []
191 pplen = 0
192 reads = []
193 ppoffs = []
194 for i, fspec in enumerate(fspecs):
195 # get the regfile specs for this regfile port
196 (rf, read, write, wid, fuspec) = fspec
197 print ("fpsec", i, fspec, len(fuspec))
198 ppoffs.append(pplen) # record offset for picker
199 pplen += len(fuspec)
200 name = "rdflag_%s_%s_%d" % (regfile, regname, i)
201 rdflag = Signal(name=name, reset_less=True)
202 comb += rdflag.eq(rf)
203 rdflags.append(rdflag)
204 reads.append(read)
205
206 print ("pplen", pplen)
207
208 # create a priority picker to manage this port
209 rdpickers[regfile][rpidx] = rdpick = PriorityPicker(pplen)
210 setattr(m.submodules, "rdpick_%s_%s" % (regfile, rpidx), rdpick)
211
212 rens = []
213 addrs = []
214 for i, fspec in enumerate(fspecs):
215 (rf, read, write, wid, fuspec) = fspec
216 # connect up the FU req/go signals, and the reg-read to the FU
217 # and create a Read Broadcast Bus
218 for pi, (funame, fu, idx) in enumerate(fuspec):
219 pi += ppoffs[i]
220
221 # connect request-read to picker input, and output to go-rd
222 fu_active = fu_bitdict[funame]
223 name = "%s_%s_%s_%i" % (regfile, rpidx, funame, pi)
224 addr_en = Signal.like(reads[i], name="addr_en_"+name)
225 rp = Signal(name="rp_"+name)
226 pick = Signal()
227
228 comb += pick.eq(fu.rd_rel_o[idx] & fu_active & rdflags[i])
229 comb += rdpick.i[pi].eq(pick)
230 sync += fu.go_rd_i[idx].eq(rising_edge(m, rp))
231 # if picked, select read-port "reg select" number to port
232 comb += rp.eq(rdpick.o[pi] & rdpick.en_o)
233 comb += addr_en.eq(Mux(rp, reads[i], 0))
234 if rfile.unary:
235 rens.append(addr_en)
236 else:
237 addrs.append(addr_en)
238 rens.append(rp)
239
240 with m.If(rp):
241 # connect regfile port to input, creating fan-out Bus
242 src = fu.src_i[idx]
243 print("reg connect widths",
244 regfile, regname, pi, funame,
245 src.shape(), rport.data_o.shape())
246 # all FUs connect to same port
247 sync += src.eq(rport.data_o)
248
249 # or-reduce the muxed read signals
250 if rfile.unary:
251 # for unary-addressed
252 comb += rport.ren.eq(ortreereduce_sig(rens))
253 else:
254 # for binary-addressed
255 comb += rport.addr.eq(ortreereduce_sig(addrs))
256 comb += rport.ren.eq(Cat(*rens).bool())
257 print ("binary", regfile, rpidx, rport, rport.ren, rens, addrs)
258
259 def connect_rdports(self, m, fu_bitdict):
260 """connect read ports
261
262 orders the read regspecs into a dict-of-dicts, by regfile, by
263 regport name, then connects all FUs that want that regport by
264 way of a PriorityPicker.
265 """
266 comb, sync = m.d.comb, m.d.sync
267 fus = self.fus.fus
268 regs = self.regs
269
270 # dictionary of lists of regfile read ports
271 byregfiles_rd, byregfiles_rdspec = self.get_byregfiles(True)
272
273 # okaay, now we need a PriorityPicker per regfile per regfile port
274 # loootta pickers... peter piper picked a pack of pickled peppers...
275 rdpickers = {}
276 for regfile, spec in byregfiles_rd.items():
277 fuspecs = byregfiles_rdspec[regfile]
278 rdpickers[regfile] = {}
279
280 # argh. an experiment to merge RA and RB in the INT regfile
281 # (we have too many read/write ports)
282 #if regfile == 'INT':
283 #fuspecs['rabc'] = [fuspecs.pop('rb')]
284 #fuspecs['rabc'].append(fuspecs.pop('rc'))
285 #fuspecs['rabc'].append(fuspecs.pop('ra'))
286 #if regfile == 'FAST':
287 # fuspecs['fast1'] = [fuspecs.pop('fast1')]
288 # if 'fast2' in fuspecs:
289 # fuspecs['fast1'].append(fuspecs.pop('fast2'))
290
291 # for each named regfile port, connect up all FUs to that port
292 for (regname, fspec) in sort_fuspecs(fuspecs):
293 print("connect rd", regname, fspec)
294 self.connect_rdport(m, fu_bitdict, rdpickers, regfile,
295 regname, fspec)
296
297 def connect_wrport(self, m, fu_bitdict, wrpickers, regfile, regname, fspec):
298 comb, sync = m.d.comb, m.d.sync
299 fus = self.fus.fus
300 regs = self.regs
301
302 print("connect wr", regname, fspec)
303 rpidx = regname
304
305 # select the required write port. these are pre-defined sizes
306 print(regfile, regs.rf.keys())
307 rfile = regs.rf[regfile.lower()]
308 wport = rfile.w_ports[rpidx]
309
310 fspecs = fspec
311 if not isinstance(fspecs, list):
312 fspecs = [fspecs]
313
314 pplen = 0
315 writes = []
316 ppoffs = []
317 for i, fspec in enumerate(fspecs):
318 # get the regfile specs for this regfile port
319 (rf, read, write, wid, fuspec) = fspec
320 print ("fpsec", i, fspec, len(fuspec))
321 ppoffs.append(pplen) # record offset for picker
322 pplen += len(fuspec)
323
324 # create a priority picker to manage this port
325 wrpickers[regfile][rpidx] = wrpick = PriorityPicker(pplen)
326 setattr(m.submodules, "wrpick_%s_%s" % (regfile, rpidx), wrpick)
327
328 wsigs = []
329 wens = []
330 addrs = []
331 for i, fspec in enumerate(fspecs):
332 # connect up the FU req/go signals and the reg-read to the FU
333 # these are arbitrated by Data.ok signals
334 (rf, read, write, wid, fuspec) = fspec
335 for pi, (funame, fu, idx) in enumerate(fuspec):
336 pi += ppoffs[i]
337
338 # write-request comes from dest.ok
339 dest = fu.get_out(idx)
340 fu_dest_latch = fu.get_fu_out(idx) # latched output
341 name = "wrflag_%s_%s_%d" % (funame, regname, idx)
342 wrflag = Signal(name=name, reset_less=True)
343 comb += wrflag.eq(dest.ok & fu.busy_o)
344
345 # connect request-write to picker input, and output to go-wr
346 fu_active = fu_bitdict[funame]
347 pick = fu.wr.rel_o[idx] & fu_active # & wrflag
348 comb += wrpick.i[pi].eq(pick)
349 # create a single-pulse go write from the picker output
350 wr_pick = Signal()
351 comb += wr_pick.eq(wrpick.o[pi] & wrpick.en_o)
352 comb += fu.go_wr_i[idx].eq(rising_edge(m, wr_pick))
353
354 # connect the regspec write "reg select" number to this port
355 # only if one FU actually requests (and is granted) the port
356 # will the write-enable be activated
357 addr_en = Signal.like(write)
358 wp = Signal()
359 comb += wp.eq(wr_pick & wrpick.en_o)
360 comb += addr_en.eq(Mux(wp, write, 0))
361 if rfile.unary:
362 wens.append(addr_en)
363 else:
364 addrs.append(addr_en)
365 wens.append(wp)
366
367 # connect regfile port to input
368 print("reg connect widths",
369 regfile, regname, pi, funame,
370 dest.shape(), wport.data_i.shape())
371 wsigs.append(fu_dest_latch)
372
373 # here is where we create the Write Broadcast Bus. simple, eh?
374 comb += wport.data_i.eq(ortreereduce_sig(wsigs))
375 if rfile.unary:
376 # for unary-addressed
377 comb += wport.wen.eq(ortreereduce_sig(wens))
378 else:
379 # for binary-addressed
380 comb += wport.addr.eq(ortreereduce_sig(addrs))
381 comb += wport.wen.eq(ortreereduce_sig(wens))
382
383 def connect_wrports(self, m, fu_bitdict):
384 """connect write ports
385
386 orders the write regspecs into a dict-of-dicts, by regfile,
387 by regport name, then connects all FUs that want that regport
388 by way of a PriorityPicker.
389
390 note that the write-port wen, write-port data, and go_wr_i all need to
391 be on the exact same clock cycle. as there is a combinatorial loop bug
392 at the moment, these all use sync.
393 """
394 comb, sync = m.d.comb, m.d.sync
395 fus = self.fus.fus
396 regs = self.regs
397 # dictionary of lists of regfile write ports
398 byregfiles_wr, byregfiles_wrspec = self.get_byregfiles(False)
399
400 # same for write ports.
401 # BLECH! complex code-duplication! BLECH!
402 wrpickers = {}
403 for regfile, spec in byregfiles_wr.items():
404 fuspecs = byregfiles_wrspec[regfile]
405 wrpickers[regfile] = {}
406
407 # argh, more port-merging
408 if regfile == 'INT':
409 fuspecs['o'] = [fuspecs.pop('o')]
410 fuspecs['o'].append(fuspecs.pop('o1'))
411 if regfile == 'FAST':
412 fuspecs['fast1'] = [fuspecs.pop('fast1')]
413 if 'fast2' in fuspecs:
414 fuspecs['fast1'].append(fuspecs.pop('fast2'))
415
416 for (regname, fspec) in sort_fuspecs(fuspecs):
417 self.connect_wrport(m, fu_bitdict, wrpickers,
418 regfile, regname, fspec)
419
420 def get_byregfiles(self, readmode):
421
422 mode = "read" if readmode else "write"
423 regs = self.regs
424 fus = self.fus.fus
425 e = self.e # decoded instruction to execute
426
427 # dictionary of lists of regfile ports
428 byregfiles = {}
429 byregfiles_spec = {}
430 for (funame, fu) in fus.items():
431 print("%s ports for %s" % (mode, funame))
432 for idx in range(fu.n_src if readmode else fu.n_dst):
433 if readmode:
434 (regfile, regname, wid) = fu.get_in_spec(idx)
435 else:
436 (regfile, regname, wid) = fu.get_out_spec(idx)
437 print(" %d %s %s %s" % (idx, regfile, regname, str(wid)))
438 if readmode:
439 rdflag, read = regspec_decode_read(e, regfile, regname)
440 write = None
441 else:
442 rdflag, read = None, None
443 wrport, write = regspec_decode_write(e, regfile, regname)
444 if regfile not in byregfiles:
445 byregfiles[regfile] = {}
446 byregfiles_spec[regfile] = {}
447 if regname not in byregfiles_spec[regfile]:
448 byregfiles_spec[regfile][regname] = \
449 (rdflag, read, write, wid, [])
450 # here we start to create "lanes"
451 if idx not in byregfiles[regfile]:
452 byregfiles[regfile][idx] = []
453 fuspec = (funame, fu, idx)
454 byregfiles[regfile][idx].append(fuspec)
455 byregfiles_spec[regfile][regname][4].append(fuspec)
456
457 # ok just print that out, for convenience
458 for regfile, spec in byregfiles.items():
459 print("regfile %s ports:" % mode, regfile)
460 fuspecs = byregfiles_spec[regfile]
461 for regname, fspec in fuspecs.items():
462 [rdflag, read, write, wid, fuspec] = fspec
463 print(" rf %s port %s lane: %s" % (mode, regfile, regname))
464 print(" %s" % regname, wid, read, write, rdflag)
465 for (funame, fu, idx) in fuspec:
466 fusig = fu.src_i[idx] if readmode else fu.dest[idx]
467 print(" ", funame, fu, idx, fusig)
468 print()
469
470 return byregfiles, byregfiles_spec
471
472 def __iter__(self):
473 yield from self.fus.ports()
474 yield from self.e.ports()
475 yield from self.l0.ports()
476 # TODO: regs
477
478 def ports(self):
479 return list(self)
480
481
482 if __name__ == '__main__':
483 pspec = TestMemPspec(ldst_ifacetype='testpi',
484 imem_ifacetype='',
485 addr_wid=48,
486 mask_wid=8,
487 reg_wid=64)
488 dut = NonProductionCore(pspec)
489 vl = rtlil.convert(dut, ports=dut.ports())
490 with open("test_core.il", "w") as f:
491 f.write(vl)