9d115a3dc297e450b5fc02e854a90b9c8b0d28bc
[soc.git] / src / soc / simple / core.py
1 """simple core
2
3 not in any way intended for production use. connects up FunctionUnits to
4 Register Files in a brain-dead fashion that only permits one and only one
5 Function Unit to be operational.
6
7 the principle here is to take the Function Units, analyse their regspecs,
8 and turn their requirements for access to register file read/write ports
9 into groupings by Register File and Register File Port name.
10
11 under each grouping - by regfile/port - a list of Function Units that
12 need to connect to that port is created. as these are a contended
13 resource a "Broadcast Bus" per read/write port is then also created,
14 with access to it managed by a PriorityPicker.
15
16 the brain-dead part of this module is that even though there is no
17 conflict of access, regfile read/write hazards are *not* analysed,
18 and consequently it is safer to wait for the Function Unit to complete
19 before allowing a new instruction to proceed.
20 """
21
22 from nmigen import Elaboratable, Module, Signal, ResetSignal, Cat, Mux
23 from nmigen.cli import rtlil
24
25 from soc.decoder.power_decoder2 import PowerDecodeSubset
26 from soc.decoder.power_regspec_map import regspec_decode_read
27 from soc.decoder.power_regspec_map import regspec_decode_write
28
29 from nmutil.picker import PriorityPicker
30 from nmutil.util import treereduce
31
32 from soc.fu.compunits.compunits import AllFunctionUnits
33 from soc.regfile.regfiles import RegFiles
34 from soc.decoder.decode2execute1 import Decode2ToExecute1Type
35 from soc.decoder.decode2execute1 import IssuerDecode2ToOperand
36 from soc.decoder.power_decoder2 import get_rdflags
37 from soc.decoder.decode2execute1 import Data
38 from soc.experiment.l0_cache import TstL0CacheBuffer # test only
39 from soc.config.test.test_loadstore import TestMemPspec
40 from soc.decoder.power_enums import MicrOp
41 from soc.config.state import CoreState
42
43 import operator
44
45 from nmutil.util import rising_edge
46
47
48 # helper function for reducing a list of signals down to a parallel
49 # ORed single signal.
50 def ortreereduce(tree, attr="data_o"):
51 return treereduce(tree, operator.or_, lambda x: getattr(x, attr))
52
53
54 def ortreereduce_sig(tree):
55 return treereduce(tree, operator.or_, lambda x: x)
56
57
58 # helper function to place full regs declarations first
59 def sort_fuspecs(fuspecs):
60 res = []
61 for (regname, fspec) in fuspecs.items():
62 if regname.startswith("full"):
63 res.append((regname, fspec))
64 for (regname, fspec) in fuspecs.items():
65 if not regname.startswith("full"):
66 res.append((regname, fspec))
67 return res # enumerate(res)
68
69
70 class NonProductionCore(Elaboratable):
71 def __init__(self, pspec):
72 self.pspec = pspec
73
74 # single LD/ST funnel for memory access
75 self.l0 = TstL0CacheBuffer(pspec, n_units=1)
76 pi = self.l0.l0.dports[0]
77
78 # function units (only one each)
79 # only include mmu if enabled in pspec
80 self.fus = AllFunctionUnits(pspec, pilist=[pi])
81
82 # register files (yes plural)
83 self.regs = RegFiles()
84
85 # instruction decoder - needs a Trap-capable Record (captures EINT etc.)
86 self.e = Decode2ToExecute1Type("core", opkls=IssuerDecode2ToOperand)
87
88 self.state = CoreState("core")
89 self.raw_insn_i = Signal(32) # raw instruction
90 self.bigendian_i = Signal() # bigendian
91
92 # issue/valid/busy signalling
93 self.ivalid_i = Signal(reset_less=True) # instruction is valid
94 self.issue_i = Signal(reset_less=True)
95 self.busy_o = Signal(name="corebusy_o", reset_less=True)
96
97 # start/stop and terminated signalling
98 self.core_stopped_i = Signal(reset_less=True)
99 self.core_terminate_o = Signal(reset=0) # indicates stopped
100
101 # create per-FU instruction decoders (subsetted)
102 self.decoders = {}
103 self.des = {}
104
105 for funame, fu in self.fus.fus.items():
106 f_name = fu.fnunit.name
107 fnunit = fu.fnunit.value
108 opkls = fu.opsubsetkls
109 if f_name == 'TRAP':
110 self.trapunit = funame
111 continue
112 self.decoders[funame] = PowerDecodeSubset(None, opkls, f_name,
113 final=True,
114 state=self.state)
115 self.des[funame] = self.decoders[funame].do
116
117 def elaborate(self, platform):
118 m = Module()
119 # for testing purposes, to cut down on build time in coriolis2
120 if hasattr(self.pspec, "nocore") and self.pspec.nocore == True:
121 x = Signal() # dummy signal
122 m.d.sync += x.eq(~x)
123 return m
124 comb = m.d.comb
125
126 m.submodules.fus = self.fus
127 m.submodules.l0 = l0 = self.l0
128 self.regs.elaborate_into(m, platform)
129 regs = self.regs
130 fus = self.fus.fus
131
132 # connect decoders
133 for k, v in self.decoders.items():
134 setattr(m.submodules, "dec_%s" % v.fn_name, v)
135 comb += v.dec.raw_opcode_in.eq(self.raw_insn_i)
136 comb += v.dec.bigendian.eq(self.bigendian_i)
137
138 # ssh, cheat: trap uses the main decoder because of the rewriting
139 self.des[self.trapunit] = self.e.do
140
141 # connect up Function Units, then read/write ports
142 fu_bitdict = self.connect_instruction(m)
143 self.connect_rdports(m, fu_bitdict)
144 self.connect_wrports(m, fu_bitdict)
145
146 return m
147
148 def connect_instruction(self, m):
149 """connect_instruction
150
151 uses decoded (from PowerOp) function unit information from CSV files
152 to ascertain which Function Unit should deal with the current
153 instruction.
154
155 some (such as OP_ATTN, OP_NOP) are dealt with here, including
156 ignoring it and halting the processor. OP_NOP is a bit annoying
157 because the issuer expects busy flag still to be raised then lowered.
158 (this requires a fake counter to be set).
159 """
160 comb, sync = m.d.comb, m.d.sync
161 fus = self.fus.fus
162
163 # enable-signals for each FU, get one bit for each FU (by name)
164 fu_enable = Signal(len(fus), reset_less=True)
165 fu_bitdict = {}
166 for i, funame in enumerate(fus.keys()):
167 fu_bitdict[funame] = fu_enable[i]
168
169 # enable the required Function Unit based on the opcode decode
170 # note: this *only* works correctly for simple core when one and
171 # *only* one FU is allocated per instruction
172 for funame, fu in fus.items():
173 fnunit = fu.fnunit.value
174 enable = Signal(name="en_%s" % funame, reset_less=True)
175 comb += enable.eq((self.e.do.fn_unit & fnunit).bool())
176 comb += fu_bitdict[funame].eq(enable)
177
178 # sigh - need a NOP counter
179 counter = Signal(2)
180 with m.If(counter != 0):
181 sync += counter.eq(counter - 1)
182 comb += self.busy_o.eq(1)
183
184 with m.If(self.ivalid_i): # run only when valid
185 with m.Switch(self.e.do.insn_type):
186 # check for ATTN: halt if true
187 with m.Case(MicrOp.OP_ATTN):
188 m.d.sync += self.core_terminate_o.eq(1)
189
190 with m.Case(MicrOp.OP_NOP):
191 sync += counter.eq(2)
192 comb += self.busy_o.eq(1)
193
194 with m.Default():
195 # connect up instructions. only one enabled at a time
196 for funame, fu in fus.items():
197 do = self.des[funame]
198 enable = fu_bitdict[funame]
199
200 # run this FunctionUnit if enabled
201 # route op, issue, busy, read flags and mask to FU
202 with m.If(enable):
203 # operand comes from the *local* decoder
204 comb += fu.oper_i.eq_from(do)
205 #comb += fu.oper_i.eq_from_execute1(e)
206 comb += fu.issue_i.eq(self.issue_i)
207 comb += self.busy_o.eq(fu.busy_o)
208 # rdmask, which is for registers, needs to come
209 # from the *main* decoder
210 rdmask = get_rdflags(self.e, fu)
211 comb += fu.rdmaskn.eq(~rdmask)
212
213 return fu_bitdict
214
215 def connect_rdport(self, m, fu_bitdict, rdpickers, regfile, regname, fspec):
216 comb, sync = m.d.comb, m.d.sync
217 fus = self.fus.fus
218 regs = self.regs
219
220 rpidx = regname
221
222 # select the required read port. these are pre-defined sizes
223 rfile = regs.rf[regfile.lower()]
224 rport = rfile.r_ports[rpidx]
225 print("read regfile", rpidx, regfile, regs.rf.keys(),
226 rfile, rfile.unary)
227
228 fspecs = fspec
229 if not isinstance(fspecs, list):
230 fspecs = [fspecs]
231
232 rdflags = []
233 pplen = 0
234 reads = []
235 ppoffs = []
236 for i, fspec in enumerate(fspecs):
237 # get the regfile specs for this regfile port
238 (rf, read, write, wid, fuspec) = fspec
239 print ("fpsec", i, fspec, len(fuspec))
240 ppoffs.append(pplen) # record offset for picker
241 pplen += len(fuspec)
242 name = "rdflag_%s_%s_%d" % (regfile, regname, i)
243 rdflag = Signal(name=name, reset_less=True)
244 comb += rdflag.eq(rf)
245 rdflags.append(rdflag)
246 reads.append(read)
247
248 print ("pplen", pplen)
249
250 # create a priority picker to manage this port
251 rdpickers[regfile][rpidx] = rdpick = PriorityPicker(pplen)
252 setattr(m.submodules, "rdpick_%s_%s" % (regfile, rpidx), rdpick)
253
254 rens = []
255 addrs = []
256 for i, fspec in enumerate(fspecs):
257 (rf, read, write, wid, fuspec) = fspec
258 # connect up the FU req/go signals, and the reg-read to the FU
259 # and create a Read Broadcast Bus
260 for pi, (funame, fu, idx) in enumerate(fuspec):
261 pi += ppoffs[i]
262
263 # connect request-read to picker input, and output to go-rd
264 fu_active = fu_bitdict[funame]
265 name = "%s_%s_%s_%i" % (regfile, rpidx, funame, pi)
266 addr_en = Signal.like(reads[i], name="addr_en_"+name)
267 pick = Signal(name="pick_"+name) # picker input
268 rp = Signal(name="rp_"+name) # picker output
269 delay_pick = Signal(name="dp_"+name) # read-enable "underway"
270
271 # exclude any currently-enabled read-request (mask out active)
272 comb += pick.eq(fu.rd_rel_o[idx] & fu_active & rdflags[i] &
273 ~delay_pick)
274 comb += rdpick.i[pi].eq(pick)
275 comb += fu.go_rd_i[idx].eq(delay_pick) # pass in *delayed* pick
276
277 # if picked, select read-port "reg select" number to port
278 comb += rp.eq(rdpick.o[pi] & rdpick.en_o)
279 sync += delay_pick.eq(rp) # delayed "pick"
280 comb += addr_en.eq(Mux(rp, reads[i], 0))
281
282 # the read-enable happens combinatorially (see mux-bus below)
283 # but it results in the data coming out on a one-cycle delay.
284 if rfile.unary:
285 rens.append(addr_en)
286 else:
287 addrs.append(addr_en)
288 rens.append(rp)
289
290 # use the *delayed* pick signal to put requested data onto bus
291 with m.If(delay_pick):
292 # connect regfile port to input, creating fan-out Bus
293 src = fu.src_i[idx]
294 print("reg connect widths",
295 regfile, regname, pi, funame,
296 src.shape(), rport.data_o.shape())
297 # all FUs connect to same port
298 comb += src.eq(rport.data_o)
299
300 # or-reduce the muxed read signals
301 if rfile.unary:
302 # for unary-addressed
303 comb += rport.ren.eq(ortreereduce_sig(rens))
304 else:
305 # for binary-addressed
306 comb += rport.addr.eq(ortreereduce_sig(addrs))
307 comb += rport.ren.eq(Cat(*rens).bool())
308 print ("binary", regfile, rpidx, rport, rport.ren, rens, addrs)
309
310 def connect_rdports(self, m, fu_bitdict):
311 """connect read ports
312
313 orders the read regspecs into a dict-of-dicts, by regfile, by
314 regport name, then connects all FUs that want that regport by
315 way of a PriorityPicker.
316 """
317 comb, sync = m.d.comb, m.d.sync
318 fus = self.fus.fus
319 regs = self.regs
320
321 # dictionary of lists of regfile read ports
322 byregfiles_rd, byregfiles_rdspec = self.get_byregfiles(True)
323
324 # okaay, now we need a PriorityPicker per regfile per regfile port
325 # loootta pickers... peter piper picked a pack of pickled peppers...
326 rdpickers = {}
327 for regfile, spec in byregfiles_rd.items():
328 fuspecs = byregfiles_rdspec[regfile]
329 rdpickers[regfile] = {}
330
331 # argh. an experiment to merge RA and RB in the INT regfile
332 # (we have too many read/write ports)
333 #if regfile == 'INT':
334 #fuspecs['rabc'] = [fuspecs.pop('rb')]
335 #fuspecs['rabc'].append(fuspecs.pop('rc'))
336 #fuspecs['rabc'].append(fuspecs.pop('ra'))
337 #if regfile == 'FAST':
338 # fuspecs['fast1'] = [fuspecs.pop('fast1')]
339 # if 'fast2' in fuspecs:
340 # fuspecs['fast1'].append(fuspecs.pop('fast2'))
341
342 # for each named regfile port, connect up all FUs to that port
343 for (regname, fspec) in sort_fuspecs(fuspecs):
344 print("connect rd", regname, fspec)
345 self.connect_rdport(m, fu_bitdict, rdpickers, regfile,
346 regname, fspec)
347
348 def connect_wrport(self, m, fu_bitdict, wrpickers, regfile, regname, fspec):
349 comb, sync = m.d.comb, m.d.sync
350 fus = self.fus.fus
351 regs = self.regs
352
353 print("connect wr", regname, fspec)
354 rpidx = regname
355
356 # select the required write port. these are pre-defined sizes
357 print(regfile, regs.rf.keys())
358 rfile = regs.rf[regfile.lower()]
359 wport = rfile.w_ports[rpidx]
360
361 fspecs = fspec
362 if not isinstance(fspecs, list):
363 fspecs = [fspecs]
364
365 pplen = 0
366 writes = []
367 ppoffs = []
368 for i, fspec in enumerate(fspecs):
369 # get the regfile specs for this regfile port
370 (rf, read, write, wid, fuspec) = fspec
371 print ("fpsec", i, fspec, len(fuspec))
372 ppoffs.append(pplen) # record offset for picker
373 pplen += len(fuspec)
374
375 # create a priority picker to manage this port
376 wrpickers[regfile][rpidx] = wrpick = PriorityPicker(pplen)
377 setattr(m.submodules, "wrpick_%s_%s" % (regfile, rpidx), wrpick)
378
379 wsigs = []
380 wens = []
381 addrs = []
382 for i, fspec in enumerate(fspecs):
383 # connect up the FU req/go signals and the reg-read to the FU
384 # these are arbitrated by Data.ok signals
385 (rf, read, write, wid, fuspec) = fspec
386 for pi, (funame, fu, idx) in enumerate(fuspec):
387 pi += ppoffs[i]
388
389 # write-request comes from dest.ok
390 dest = fu.get_out(idx)
391 fu_dest_latch = fu.get_fu_out(idx) # latched output
392 name = "wrflag_%s_%s_%d" % (funame, regname, idx)
393 wrflag = Signal(name=name, reset_less=True)
394 comb += wrflag.eq(dest.ok & fu.busy_o)
395
396 # connect request-write to picker input, and output to go-wr
397 fu_active = fu_bitdict[funame]
398 pick = fu.wr.rel_o[idx] & fu_active # & wrflag
399 comb += wrpick.i[pi].eq(pick)
400 # create a single-pulse go write from the picker output
401 wr_pick = Signal()
402 comb += wr_pick.eq(wrpick.o[pi] & wrpick.en_o)
403 comb += fu.go_wr_i[idx].eq(rising_edge(m, wr_pick))
404
405 # connect the regspec write "reg select" number to this port
406 # only if one FU actually requests (and is granted) the port
407 # will the write-enable be activated
408 addr_en = Signal.like(write)
409 wp = Signal()
410 comb += wp.eq(wr_pick & wrpick.en_o)
411 comb += addr_en.eq(Mux(wp, write, 0))
412 if rfile.unary:
413 wens.append(addr_en)
414 else:
415 addrs.append(addr_en)
416 wens.append(wp)
417
418 # connect regfile port to input
419 print("reg connect widths",
420 regfile, regname, pi, funame,
421 dest.shape(), wport.data_i.shape())
422 wsigs.append(fu_dest_latch)
423
424 # here is where we create the Write Broadcast Bus. simple, eh?
425 comb += wport.data_i.eq(ortreereduce_sig(wsigs))
426 if rfile.unary:
427 # for unary-addressed
428 comb += wport.wen.eq(ortreereduce_sig(wens))
429 else:
430 # for binary-addressed
431 comb += wport.addr.eq(ortreereduce_sig(addrs))
432 comb += wport.wen.eq(ortreereduce_sig(wens))
433
434 def connect_wrports(self, m, fu_bitdict):
435 """connect write ports
436
437 orders the write regspecs into a dict-of-dicts, by regfile,
438 by regport name, then connects all FUs that want that regport
439 by way of a PriorityPicker.
440
441 note that the write-port wen, write-port data, and go_wr_i all need to
442 be on the exact same clock cycle. as there is a combinatorial loop bug
443 at the moment, these all use sync.
444 """
445 comb, sync = m.d.comb, m.d.sync
446 fus = self.fus.fus
447 regs = self.regs
448 # dictionary of lists of regfile write ports
449 byregfiles_wr, byregfiles_wrspec = self.get_byregfiles(False)
450
451 # same for write ports.
452 # BLECH! complex code-duplication! BLECH!
453 wrpickers = {}
454 for regfile, spec in byregfiles_wr.items():
455 fuspecs = byregfiles_wrspec[regfile]
456 wrpickers[regfile] = {}
457
458 # argh, more port-merging
459 if regfile == 'INT':
460 fuspecs['o'] = [fuspecs.pop('o')]
461 fuspecs['o'].append(fuspecs.pop('o1'))
462 if regfile == 'FAST':
463 fuspecs['fast1'] = [fuspecs.pop('fast1')]
464 if 'fast2' in fuspecs:
465 fuspecs['fast1'].append(fuspecs.pop('fast2'))
466
467 for (regname, fspec) in sort_fuspecs(fuspecs):
468 self.connect_wrport(m, fu_bitdict, wrpickers,
469 regfile, regname, fspec)
470
471 def get_byregfiles(self, readmode):
472
473 mode = "read" if readmode else "write"
474 regs = self.regs
475 fus = self.fus.fus
476 e = self.e # decoded instruction to execute
477
478 # dictionary of lists of regfile ports
479 byregfiles = {}
480 byregfiles_spec = {}
481 for (funame, fu) in fus.items():
482 print("%s ports for %s" % (mode, funame))
483 for idx in range(fu.n_src if readmode else fu.n_dst):
484 if readmode:
485 (regfile, regname, wid) = fu.get_in_spec(idx)
486 else:
487 (regfile, regname, wid) = fu.get_out_spec(idx)
488 print(" %d %s %s %s" % (idx, regfile, regname, str(wid)))
489 if readmode:
490 rdflag, read = regspec_decode_read(e, regfile, regname)
491 write = None
492 else:
493 rdflag, read = None, None
494 wrport, write = regspec_decode_write(e, regfile, regname)
495 if regfile not in byregfiles:
496 byregfiles[regfile] = {}
497 byregfiles_spec[regfile] = {}
498 if regname not in byregfiles_spec[regfile]:
499 byregfiles_spec[regfile][regname] = \
500 (rdflag, read, write, wid, [])
501 # here we start to create "lanes"
502 if idx not in byregfiles[regfile]:
503 byregfiles[regfile][idx] = []
504 fuspec = (funame, fu, idx)
505 byregfiles[regfile][idx].append(fuspec)
506 byregfiles_spec[regfile][regname][4].append(fuspec)
507
508 # ok just print that out, for convenience
509 for regfile, spec in byregfiles.items():
510 print("regfile %s ports:" % mode, regfile)
511 fuspecs = byregfiles_spec[regfile]
512 for regname, fspec in fuspecs.items():
513 [rdflag, read, write, wid, fuspec] = fspec
514 print(" rf %s port %s lane: %s" % (mode, regfile, regname))
515 print(" %s" % regname, wid, read, write, rdflag)
516 for (funame, fu, idx) in fuspec:
517 fusig = fu.src_i[idx] if readmode else fu.dest[idx]
518 print(" ", funame, fu, idx, fusig)
519 print()
520
521 return byregfiles, byregfiles_spec
522
523 def __iter__(self):
524 yield from self.fus.ports()
525 yield from self.e.ports()
526 yield from self.l0.ports()
527 # TODO: regs
528
529 def ports(self):
530 return list(self)
531
532
533 if __name__ == '__main__':
534 pspec = TestMemPspec(ldst_ifacetype='testpi',
535 imem_ifacetype='',
536 addr_wid=48,
537 mask_wid=8,
538 reg_wid=64)
539 dut = NonProductionCore(pspec)
540 vl = rtlil.convert(dut, ports=dut.ports())
541 with open("test_core.il", "w") as f:
542 f.write(vl)