add option to reduce number of regfile ports (get DFFs down in ls180)
[soc.git] / src / soc / simple / core.py
1 """simple core
2
3 not in any way intended for production use. connects up FunctionUnits to
4 Register Files in a brain-dead fashion that only permits one and only one
5 Function Unit to be operational.
6
7 the principle here is to take the Function Units, analyse their regspecs,
8 and turn their requirements for access to register file read/write ports
9 into groupings by Register File and Register File Port name.
10
11 under each grouping - by regfile/port - a list of Function Units that
12 need to connect to that port is created. as these are a contended
13 resource a "Broadcast Bus" per read/write port is then also created,
14 with access to it managed by a PriorityPicker.
15
16 the brain-dead part of this module is that even though there is no
17 conflict of access, regfile read/write hazards are *not* analysed,
18 and consequently it is safer to wait for the Function Unit to complete
19 before allowing a new instruction to proceed.
20 """
21
22 from nmigen import Elaboratable, Module, Signal, ResetSignal, Cat, Mux
23 from nmigen.cli import rtlil
24
25 from soc.decoder.power_decoder2 import PowerDecodeSubset
26 from soc.decoder.power_regspec_map import regspec_decode_read
27 from soc.decoder.power_regspec_map import regspec_decode_write
28
29 from nmutil.picker import PriorityPicker
30 from nmutil.util import treereduce
31
32 from soc.fu.compunits.compunits import AllFunctionUnits
33 from soc.regfile.regfiles import RegFiles
34 from soc.decoder.decode2execute1 import Decode2ToExecute1Type
35 from soc.decoder.decode2execute1 import IssuerDecode2ToOperand
36 from soc.decoder.power_decoder2 import get_rdflags
37 from soc.decoder.decode2execute1 import Data
38 from soc.experiment.l0_cache import TstL0CacheBuffer # test only
39 from soc.config.test.test_loadstore import TestMemPspec
40 from soc.decoder.power_enums import MicrOp
41 from soc.config.state import CoreState
42
43 import operator
44
45 from nmutil.util import rising_edge
46
47
48 # helper function for reducing a list of signals down to a parallel
49 # ORed single signal.
50 def ortreereduce(tree, attr="data_o"):
51 return treereduce(tree, operator.or_, lambda x: getattr(x, attr))
52
53
54 def ortreereduce_sig(tree):
55 return treereduce(tree, operator.or_, lambda x: x)
56
57
58 # helper function to place full regs declarations first
59 def sort_fuspecs(fuspecs):
60 res = []
61 for (regname, fspec) in fuspecs.items():
62 if regname.startswith("full"):
63 res.append((regname, fspec))
64 for (regname, fspec) in fuspecs.items():
65 if not regname.startswith("full"):
66 res.append((regname, fspec))
67 return res # enumerate(res)
68
69
70 class NonProductionCore(Elaboratable):
71 def __init__(self, pspec):
72 self.pspec = pspec
73
74 # test to see if regfile ports should be reduced
75 self.regreduce_en = (hasattr(pspec, "regreduce") and
76 (pspec.regreduce == True))
77
78 # single LD/ST funnel for memory access
79 self.l0 = TstL0CacheBuffer(pspec, n_units=1)
80 pi = self.l0.l0.dports[0]
81
82 # function units (only one each)
83 # only include mmu if enabled in pspec
84 self.fus = AllFunctionUnits(pspec, pilist=[pi])
85
86 # register files (yes plural)
87 self.regs = RegFiles(pspec)
88
89 # instruction decoder - needs a Trap-capable Record (captures EINT etc.)
90 self.e = Decode2ToExecute1Type("core", opkls=IssuerDecode2ToOperand)
91
92 # SVP64 RA_OR_ZERO needs to know if the relevant EXTRA2/3 field is zero
93 self.sv_a_nz = Signal()
94
95 # state and raw instruction
96 self.state = CoreState("core")
97 self.raw_insn_i = Signal(32) # raw instruction
98 self.bigendian_i = Signal() # bigendian - TODO, set by MSR.BE
99
100 # issue/valid/busy signalling
101 self.ivalid_i = Signal(reset_less=True) # instruction is valid
102 self.issue_i = Signal(reset_less=True)
103 self.busy_o = Signal(name="corebusy_o", reset_less=True)
104
105 # start/stop and terminated signalling
106 self.core_stopped_i = Signal(reset_less=True)
107 self.core_terminate_o = Signal(reset=0) # indicates stopped
108
109 # create per-FU instruction decoders (subsetted)
110 self.decoders = {}
111 self.des = {}
112
113 for funame, fu in self.fus.fus.items():
114 f_name = fu.fnunit.name
115 fnunit = fu.fnunit.value
116 opkls = fu.opsubsetkls
117 if f_name == 'TRAP':
118 self.trapunit = funame
119 continue
120 self.decoders[funame] = PowerDecodeSubset(None, opkls, f_name,
121 final=True,
122 state=self.state)
123 self.des[funame] = self.decoders[funame].do
124
125 if "mmu0" in self.decoders:
126 self.decoders["mmu0"].mmu0_spr_dec = self.decoders["spr0"]
127
128 def elaborate(self, platform):
129 m = Module()
130 # for testing purposes, to cut down on build time in coriolis2
131 if hasattr(self.pspec, "nocore") and self.pspec.nocore == True:
132 x = Signal() # dummy signal
133 m.d.sync += x.eq(~x)
134 return m
135 comb = m.d.comb
136
137 m.submodules.fus = self.fus
138 m.submodules.l0 = l0 = self.l0
139 self.regs.elaborate_into(m, platform)
140 regs = self.regs
141 fus = self.fus.fus
142
143 # connect decoders
144 for k, v in self.decoders.items():
145 setattr(m.submodules, "dec_%s" % v.fn_name, v)
146 comb += v.dec.raw_opcode_in.eq(self.raw_insn_i)
147 comb += v.dec.bigendian.eq(self.bigendian_i)
148 # sigh due to SVP64 RA_OR_ZERO detection connect these too
149 comb += v.sv_a_nz.eq(self.sv_a_nz)
150
151 # ssh, cheat: trap uses the main decoder because of the rewriting
152 self.des[self.trapunit] = self.e.do
153
154 # connect up Function Units, then read/write ports
155 fu_bitdict = self.connect_instruction(m)
156 self.connect_rdports(m, fu_bitdict)
157 self.connect_wrports(m, fu_bitdict)
158
159 return m
160
161 def connect_instruction(self, m):
162 """connect_instruction
163
164 uses decoded (from PowerOp) function unit information from CSV files
165 to ascertain which Function Unit should deal with the current
166 instruction.
167
168 some (such as OP_ATTN, OP_NOP) are dealt with here, including
169 ignoring it and halting the processor. OP_NOP is a bit annoying
170 because the issuer expects busy flag still to be raised then lowered.
171 (this requires a fake counter to be set).
172 """
173 comb, sync = m.d.comb, m.d.sync
174 fus = self.fus.fus
175
176 # enable-signals for each FU, get one bit for each FU (by name)
177 fu_enable = Signal(len(fus), reset_less=True)
178 fu_bitdict = {}
179 for i, funame in enumerate(fus.keys()):
180 fu_bitdict[funame] = fu_enable[i]
181
182 # enable the required Function Unit based on the opcode decode
183 # note: this *only* works correctly for simple core when one and
184 # *only* one FU is allocated per instruction
185 for funame, fu in fus.items():
186 fnunit = fu.fnunit.value
187 enable = Signal(name="en_%s" % funame, reset_less=True)
188 comb += enable.eq((self.e.do.fn_unit & fnunit).bool())
189 comb += fu_bitdict[funame].eq(enable)
190
191 # sigh - need a NOP counter
192 counter = Signal(2)
193 with m.If(counter != 0):
194 sync += counter.eq(counter - 1)
195 comb += self.busy_o.eq(1)
196
197 with m.If(self.ivalid_i): # run only when valid
198 with m.Switch(self.e.do.insn_type):
199 # check for ATTN: halt if true
200 with m.Case(MicrOp.OP_ATTN):
201 m.d.sync += self.core_terminate_o.eq(1)
202
203 with m.Case(MicrOp.OP_NOP):
204 sync += counter.eq(2)
205 comb += self.busy_o.eq(1)
206
207 with m.Default():
208 # connect up instructions. only one enabled at a time
209 for funame, fu in fus.items():
210 do = self.des[funame]
211 enable = fu_bitdict[funame]
212
213 # run this FunctionUnit if enabled
214 # route op, issue, busy, read flags and mask to FU
215 with m.If(enable):
216 # operand comes from the *local* decoder
217 comb += fu.oper_i.eq_from(do)
218 #comb += fu.oper_i.eq_from_execute1(e)
219 comb += fu.issue_i.eq(self.issue_i)
220 comb += self.busy_o.eq(fu.busy_o)
221 # rdmask, which is for registers, needs to come
222 # from the *main* decoder
223 rdmask = get_rdflags(self.e, fu)
224 comb += fu.rdmaskn.eq(~rdmask)
225
226 return fu_bitdict
227
228 def connect_rdport(self, m, fu_bitdict, rdpickers, regfile, regname, fspec):
229 comb, sync = m.d.comb, m.d.sync
230 fus = self.fus.fus
231 regs = self.regs
232
233 rpidx = regname
234
235 # select the required read port. these are pre-defined sizes
236 rfile = regs.rf[regfile.lower()]
237 rport = rfile.r_ports[rpidx]
238 print("read regfile", rpidx, regfile, regs.rf.keys(),
239 rfile, rfile.unary)
240
241 fspecs = fspec
242 if not isinstance(fspecs, list):
243 fspecs = [fspecs]
244
245 rdflags = []
246 pplen = 0
247 reads = []
248 ppoffs = []
249 for i, fspec in enumerate(fspecs):
250 # get the regfile specs for this regfile port
251 (rf, read, write, wid, fuspec) = fspec
252 print ("fpsec", i, fspec, len(fuspec))
253 ppoffs.append(pplen) # record offset for picker
254 pplen += len(fuspec)
255 name = "rdflag_%s_%s_%d" % (regfile, regname, i)
256 rdflag = Signal(name=name, reset_less=True)
257 comb += rdflag.eq(rf)
258 rdflags.append(rdflag)
259 reads.append(read)
260
261 print ("pplen", pplen)
262
263 # create a priority picker to manage this port
264 rdpickers[regfile][rpidx] = rdpick = PriorityPicker(pplen)
265 setattr(m.submodules, "rdpick_%s_%s" % (regfile, rpidx), rdpick)
266
267 rens = []
268 addrs = []
269 for i, fspec in enumerate(fspecs):
270 (rf, read, write, wid, fuspec) = fspec
271 # connect up the FU req/go signals, and the reg-read to the FU
272 # and create a Read Broadcast Bus
273 for pi, (funame, fu, idx) in enumerate(fuspec):
274 pi += ppoffs[i]
275
276 # connect request-read to picker input, and output to go-rd
277 fu_active = fu_bitdict[funame]
278 name = "%s_%s_%s_%i" % (regfile, rpidx, funame, pi)
279 addr_en = Signal.like(reads[i], name="addr_en_"+name)
280 pick = Signal(name="pick_"+name) # picker input
281 rp = Signal(name="rp_"+name) # picker output
282 delay_pick = Signal(name="dp_"+name) # read-enable "underway"
283
284 # exclude any currently-enabled read-request (mask out active)
285 comb += pick.eq(fu.rd_rel_o[idx] & fu_active & rdflags[i] &
286 ~delay_pick)
287 comb += rdpick.i[pi].eq(pick)
288 comb += fu.go_rd_i[idx].eq(delay_pick) # pass in *delayed* pick
289
290 # if picked, select read-port "reg select" number to port
291 comb += rp.eq(rdpick.o[pi] & rdpick.en_o)
292 sync += delay_pick.eq(rp) # delayed "pick"
293 comb += addr_en.eq(Mux(rp, reads[i], 0))
294
295 # the read-enable happens combinatorially (see mux-bus below)
296 # but it results in the data coming out on a one-cycle delay.
297 if rfile.unary:
298 rens.append(addr_en)
299 else:
300 addrs.append(addr_en)
301 rens.append(rp)
302
303 # use the *delayed* pick signal to put requested data onto bus
304 with m.If(delay_pick):
305 # connect regfile port to input, creating fan-out Bus
306 src = fu.src_i[idx]
307 print("reg connect widths",
308 regfile, regname, pi, funame,
309 src.shape(), rport.data_o.shape())
310 # all FUs connect to same port
311 comb += src.eq(rport.data_o)
312
313 # or-reduce the muxed read signals
314 if rfile.unary:
315 # for unary-addressed
316 comb += rport.ren.eq(ortreereduce_sig(rens))
317 else:
318 # for binary-addressed
319 comb += rport.addr.eq(ortreereduce_sig(addrs))
320 comb += rport.ren.eq(Cat(*rens).bool())
321 print ("binary", regfile, rpidx, rport, rport.ren, rens, addrs)
322
323 def connect_rdports(self, m, fu_bitdict):
324 """connect read ports
325
326 orders the read regspecs into a dict-of-dicts, by regfile, by
327 regport name, then connects all FUs that want that regport by
328 way of a PriorityPicker.
329 """
330 comb, sync = m.d.comb, m.d.sync
331 fus = self.fus.fus
332 regs = self.regs
333
334 # dictionary of lists of regfile read ports
335 byregfiles_rd, byregfiles_rdspec = self.get_byregfiles(True)
336
337 # okaay, now we need a PriorityPicker per regfile per regfile port
338 # loootta pickers... peter piper picked a pack of pickled peppers...
339 rdpickers = {}
340 for regfile, spec in byregfiles_rd.items():
341 fuspecs = byregfiles_rdspec[regfile]
342 rdpickers[regfile] = {}
343
344 # argh. an experiment to merge RA and RB in the INT regfile
345 # (we have too many read/write ports)
346 if self.regreduce_en:
347 if regfile == 'INT':
348 fuspecs['rabc'] = [fuspecs.pop('rb')]
349 fuspecs['rabc'].append(fuspecs.pop('rc'))
350 fuspecs['rabc'].append(fuspecs.pop('ra'))
351 if regfile == 'FAST':
352 fuspecs['fast1'] = [fuspecs.pop('fast1')]
353 if 'fast2' in fuspecs:
354 fuspecs['fast1'].append(fuspecs.pop('fast2'))
355
356 # for each named regfile port, connect up all FUs to that port
357 for (regname, fspec) in sort_fuspecs(fuspecs):
358 print("connect rd", regname, fspec)
359 self.connect_rdport(m, fu_bitdict, rdpickers, regfile,
360 regname, fspec)
361
362 def connect_wrport(self, m, fu_bitdict, wrpickers, regfile, regname, fspec):
363 comb, sync = m.d.comb, m.d.sync
364 fus = self.fus.fus
365 regs = self.regs
366
367 print("connect wr", regname, fspec)
368 rpidx = regname
369
370 # select the required write port. these are pre-defined sizes
371 print(regfile, regs.rf.keys())
372 rfile = regs.rf[regfile.lower()]
373 wport = rfile.w_ports[rpidx]
374
375 fspecs = fspec
376 if not isinstance(fspecs, list):
377 fspecs = [fspecs]
378
379 pplen = 0
380 writes = []
381 ppoffs = []
382 for i, fspec in enumerate(fspecs):
383 # get the regfile specs for this regfile port
384 (rf, read, write, wid, fuspec) = fspec
385 print ("fpsec", i, fspec, len(fuspec))
386 ppoffs.append(pplen) # record offset for picker
387 pplen += len(fuspec)
388
389 # create a priority picker to manage this port
390 wrpickers[regfile][rpidx] = wrpick = PriorityPicker(pplen)
391 setattr(m.submodules, "wrpick_%s_%s" % (regfile, rpidx), wrpick)
392
393 wsigs = []
394 wens = []
395 addrs = []
396 for i, fspec in enumerate(fspecs):
397 # connect up the FU req/go signals and the reg-read to the FU
398 # these are arbitrated by Data.ok signals
399 (rf, read, write, wid, fuspec) = fspec
400 for pi, (funame, fu, idx) in enumerate(fuspec):
401 pi += ppoffs[i]
402
403 # write-request comes from dest.ok
404 dest = fu.get_out(idx)
405 fu_dest_latch = fu.get_fu_out(idx) # latched output
406 name = "wrflag_%s_%s_%d" % (funame, regname, idx)
407 wrflag = Signal(name=name, reset_less=True)
408 comb += wrflag.eq(dest.ok & fu.busy_o)
409
410 # connect request-write to picker input, and output to go-wr
411 fu_active = fu_bitdict[funame]
412 pick = fu.wr.rel_o[idx] & fu_active # & wrflag
413 comb += wrpick.i[pi].eq(pick)
414 # create a single-pulse go write from the picker output
415 wr_pick = Signal()
416 comb += wr_pick.eq(wrpick.o[pi] & wrpick.en_o)
417 comb += fu.go_wr_i[idx].eq(rising_edge(m, wr_pick))
418
419 # connect the regspec write "reg select" number to this port
420 # only if one FU actually requests (and is granted) the port
421 # will the write-enable be activated
422 addr_en = Signal.like(write)
423 wp = Signal()
424 comb += wp.eq(wr_pick & wrpick.en_o)
425 comb += addr_en.eq(Mux(wp, write, 0))
426 if rfile.unary:
427 wens.append(addr_en)
428 else:
429 addrs.append(addr_en)
430 wens.append(wp)
431
432 # connect regfile port to input
433 print("reg connect widths",
434 regfile, regname, pi, funame,
435 dest.shape(), wport.data_i.shape())
436 wsigs.append(fu_dest_latch)
437
438 # here is where we create the Write Broadcast Bus. simple, eh?
439 comb += wport.data_i.eq(ortreereduce_sig(wsigs))
440 if rfile.unary:
441 # for unary-addressed
442 comb += wport.wen.eq(ortreereduce_sig(wens))
443 else:
444 # for binary-addressed
445 comb += wport.addr.eq(ortreereduce_sig(addrs))
446 comb += wport.wen.eq(ortreereduce_sig(wens))
447
448 def connect_wrports(self, m, fu_bitdict):
449 """connect write ports
450
451 orders the write regspecs into a dict-of-dicts, by regfile,
452 by regport name, then connects all FUs that want that regport
453 by way of a PriorityPicker.
454
455 note that the write-port wen, write-port data, and go_wr_i all need to
456 be on the exact same clock cycle. as there is a combinatorial loop bug
457 at the moment, these all use sync.
458 """
459 comb, sync = m.d.comb, m.d.sync
460 fus = self.fus.fus
461 regs = self.regs
462 # dictionary of lists of regfile write ports
463 byregfiles_wr, byregfiles_wrspec = self.get_byregfiles(False)
464
465 # same for write ports.
466 # BLECH! complex code-duplication! BLECH!
467 wrpickers = {}
468 for regfile, spec in byregfiles_wr.items():
469 fuspecs = byregfiles_wrspec[regfile]
470 wrpickers[regfile] = {}
471
472 if self.regreduce_en:
473 # argh, more port-merging
474 if regfile == 'INT':
475 fuspecs['o'] = [fuspecs.pop('o')]
476 fuspecs['o'].append(fuspecs.pop('o1'))
477 if regfile == 'FAST':
478 fuspecs['fast1'] = [fuspecs.pop('fast1')]
479 if 'fast2' in fuspecs:
480 fuspecs['fast1'].append(fuspecs.pop('fast2'))
481
482 for (regname, fspec) in sort_fuspecs(fuspecs):
483 self.connect_wrport(m, fu_bitdict, wrpickers,
484 regfile, regname, fspec)
485
486 def get_byregfiles(self, readmode):
487
488 mode = "read" if readmode else "write"
489 regs = self.regs
490 fus = self.fus.fus
491 e = self.e # decoded instruction to execute
492
493 # dictionary of lists of regfile ports
494 byregfiles = {}
495 byregfiles_spec = {}
496 for (funame, fu) in fus.items():
497 print("%s ports for %s" % (mode, funame))
498 for idx in range(fu.n_src if readmode else fu.n_dst):
499 if readmode:
500 (regfile, regname, wid) = fu.get_in_spec(idx)
501 else:
502 (regfile, regname, wid) = fu.get_out_spec(idx)
503 print(" %d %s %s %s" % (idx, regfile, regname, str(wid)))
504 if readmode:
505 rdflag, read = regspec_decode_read(e, regfile, regname)
506 write = None
507 else:
508 rdflag, read = None, None
509 wrport, write = regspec_decode_write(e, regfile, regname)
510 if regfile not in byregfiles:
511 byregfiles[regfile] = {}
512 byregfiles_spec[regfile] = {}
513 if regname not in byregfiles_spec[regfile]:
514 byregfiles_spec[regfile][regname] = \
515 (rdflag, read, write, wid, [])
516 # here we start to create "lanes"
517 if idx not in byregfiles[regfile]:
518 byregfiles[regfile][idx] = []
519 fuspec = (funame, fu, idx)
520 byregfiles[regfile][idx].append(fuspec)
521 byregfiles_spec[regfile][regname][4].append(fuspec)
522
523 # ok just print that out, for convenience
524 for regfile, spec in byregfiles.items():
525 print("regfile %s ports:" % mode, regfile)
526 fuspecs = byregfiles_spec[regfile]
527 for regname, fspec in fuspecs.items():
528 [rdflag, read, write, wid, fuspec] = fspec
529 print(" rf %s port %s lane: %s" % (mode, regfile, regname))
530 print(" %s" % regname, wid, read, write, rdflag)
531 for (funame, fu, idx) in fuspec:
532 fusig = fu.src_i[idx] if readmode else fu.dest[idx]
533 print(" ", funame, fu, idx, fusig)
534 print()
535
536 return byregfiles, byregfiles_spec
537
538 def __iter__(self):
539 yield from self.fus.ports()
540 yield from self.e.ports()
541 yield from self.l0.ports()
542 # TODO: regs
543
544 def ports(self):
545 return list(self)
546
547
548 if __name__ == '__main__':
549 pspec = TestMemPspec(ldst_ifacetype='testpi',
550 imem_ifacetype='',
551 addr_wid=48,
552 mask_wid=8,
553 reg_wid=64)
554 dut = NonProductionCore(pspec)
555 vl = rtlil.convert(dut, ports=dut.ports())
556 with open("test_core.il", "w") as f:
557 f.write(vl)