a788fb5a132c7cd518dbdfb06214b6f33f2e6110
[soc.git] / src / soc / simple / core.py
1 """simple core
2
3 not in any way intended for production use. connects up FunctionUnits to
4 Register Files in a brain-dead fashion that only permits one and only one
5 Function Unit to be operational.
6
7 the principle here is to take the Function Units, analyse their regspecs,
8 and turn their requirements for access to register file read/write ports
9 into groupings by Register File and Register File Port name.
10
11 under each grouping - by regfile/port - a list of Function Units that
12 need to connect to that port is created. as these are a contended
13 resource a "Broadcast Bus" per read/write port is then also created,
14 with access to it managed by a PriorityPicker.
15
16 the brain-dead part of this module is that even though there is no
17 conflict of access, regfile read/write hazards are *not* analysed,
18 and consequently it is safer to wait for the Function Unit to complete
19 before allowing a new instruction to proceed.
20 """
21
22 from nmigen import Elaboratable, Module, Signal, ResetSignal, Cat, Mux
23 from nmigen.cli import rtlil
24
25 from soc.decoder.power_decoder2 import PowerDecodeSubset
26 from soc.decoder.power_regspec_map import regspec_decode_read
27 from soc.decoder.power_regspec_map import regspec_decode_write
28
29 from nmutil.picker import PriorityPicker
30 from nmutil.util import treereduce
31
32 from soc.fu.compunits.compunits import AllFunctionUnits
33 from soc.regfile.regfiles import RegFiles
34 from soc.decoder.decode2execute1 import Decode2ToExecute1Type
35 from soc.decoder.decode2execute1 import IssuerDecode2ToOperand
36 from soc.decoder.power_decoder2 import get_rdflags
37 from soc.decoder.decode2execute1 import Data
38 from soc.experiment.l0_cache import TstL0CacheBuffer # test only
39 from soc.config.test.test_loadstore import TestMemPspec
40 from soc.decoder.power_enums import MicrOp
41 from soc.config.state import CoreState
42
43 import operator
44
45 from nmutil.util import rising_edge
46
47
48 # helper function for reducing a list of signals down to a parallel
49 # ORed single signal.
50 def ortreereduce(tree, attr="data_o"):
51 return treereduce(tree, operator.or_, lambda x: getattr(x, attr))
52
53
54 def ortreereduce_sig(tree):
55 return treereduce(tree, operator.or_, lambda x: x)
56
57
58 # helper function to place full regs declarations first
59 def sort_fuspecs(fuspecs):
60 res = []
61 for (regname, fspec) in fuspecs.items():
62 if regname.startswith("full"):
63 res.append((regname, fspec))
64 for (regname, fspec) in fuspecs.items():
65 if not regname.startswith("full"):
66 res.append((regname, fspec))
67 return res # enumerate(res)
68
69
70 class NonProductionCore(Elaboratable):
71 def __init__(self, pspec):
72 self.pspec = pspec
73
74 # single LD/ST funnel for memory access
75 self.l0 = TstL0CacheBuffer(pspec, n_units=1)
76 pi = self.l0.l0.dports[0]
77
78 if False:
79 # MMU / DCache
80 self.mmu = MMU()
81 self.dcache = DCache()
82
83 # function units (only one each)
84 self.fus = AllFunctionUnits(pspec, pilist=[pi])
85
86 # register files (yes plural)
87 self.regs = RegFiles()
88
89 # instruction decoder - needs a Trap-capable Record (captures EINT etc.)
90 self.e = Decode2ToExecute1Type("core", opkls=IssuerDecode2ToOperand)
91
92 self.state = CoreState("core")
93 self.raw_insn_i = Signal(32) # raw instruction
94 self.bigendian_i = Signal() # bigendian
95
96 # issue/valid/busy signalling
97 self.ivalid_i = Signal(reset_less=True) # instruction is valid
98 self.issue_i = Signal(reset_less=True)
99 self.busy_o = Signal(name="corebusy_o", reset_less=True)
100
101 # start/stop and terminated signalling
102 self.core_stopped_i = Signal(reset_less=True)
103 self.core_terminate_o = Signal(reset=0) # indicates stopped
104
105 # create per-FU instruction decoders (subsetted)
106 self.decoders = {}
107 self.des = {}
108
109 for funame, fu in self.fus.fus.items():
110 f_name = fu.fnunit.name
111 fnunit = fu.fnunit.value
112 opkls = fu.opsubsetkls
113 if f_name == 'TRAP':
114 self.trapunit = funame
115 continue
116 self.decoders[funame] = PowerDecodeSubset(None, opkls, f_name,
117 final=True,
118 state=self.state)
119 self.des[funame] = self.decoders[funame].do
120
121 def elaborate(self, platform):
122 m = Module()
123 # for testing purposes, to cut down on build time in coriolis2
124 if hasattr(self.pspec, "nocore") and self.pspec.nocore == True:
125 return m
126 comb = m.d.comb
127
128 m.submodules.fus = self.fus
129 m.submodules.l0 = l0 = self.l0
130 self.regs.elaborate_into(m, platform)
131 regs = self.regs
132 fus = self.fus.fus
133
134 # connect decoders
135 for k, v in self.decoders.items():
136 setattr(m.submodules, "dec_%s" % v.fn_name, v)
137 comb += v.dec.raw_opcode_in.eq(self.raw_insn_i)
138 comb += v.dec.bigendian.eq(self.bigendian_i)
139
140 # ssh, cheat: trap uses the main decoder because of the rewriting
141 self.des[self.trapunit] = self.e.do
142
143 # connect up Function Units, then read/write ports
144 fu_bitdict = self.connect_instruction(m)
145 self.connect_rdports(m, fu_bitdict)
146 self.connect_wrports(m, fu_bitdict)
147
148 return m
149
150 def connect_instruction(self, m):
151 """connect_instruction
152
153 uses decoded (from PowerOp) function unit information from CSV files
154 to ascertain which Function Unit should deal with the current
155 instruction.
156
157 some (such as OP_ATTN, OP_NOP) are dealt with here, including
158 ignoring it and halting the processor. OP_NOP is a bit annoying
159 because the issuer expects busy flag still to be raised then lowered.
160 (this requires a fake counter to be set).
161 """
162 comb, sync = m.d.comb, m.d.sync
163 fus = self.fus.fus
164
165 # enable-signals for each FU, get one bit for each FU (by name)
166 fu_enable = Signal(len(fus), reset_less=True)
167 fu_bitdict = {}
168 for i, funame in enumerate(fus.keys()):
169 fu_bitdict[funame] = fu_enable[i]
170
171 # enable the required Function Unit based on the opcode decode
172 # note: this *only* works correctly for simple core when one and
173 # *only* one FU is allocated per instruction
174 for funame, fu in fus.items():
175 fnunit = fu.fnunit.value
176 enable = Signal(name="en_%s" % funame, reset_less=True)
177 comb += enable.eq((self.e.do.fn_unit & fnunit).bool())
178 comb += fu_bitdict[funame].eq(enable)
179
180 # sigh - need a NOP counter
181 counter = Signal(2)
182 with m.If(counter != 0):
183 sync += counter.eq(counter - 1)
184 comb += self.busy_o.eq(1)
185
186 with m.If(self.ivalid_i): # run only when valid
187 with m.Switch(self.e.do.insn_type):
188 # check for ATTN: halt if true
189 with m.Case(MicrOp.OP_ATTN):
190 m.d.sync += self.core_terminate_o.eq(1)
191
192 with m.Case(MicrOp.OP_NOP):
193 sync += counter.eq(2)
194 comb += self.busy_o.eq(1)
195
196 with m.Default():
197 # connect up instructions. only one enabled at a time
198 for funame, fu in fus.items():
199 do = self.des[funame]
200 enable = fu_bitdict[funame]
201
202 # run this FunctionUnit if enabled
203 # route op, issue, busy, read flags and mask to FU
204 with m.If(enable):
205 # operand comes from the *local* decoder
206 comb += fu.oper_i.eq_from(do)
207 #comb += fu.oper_i.eq_from_execute1(e)
208 comb += fu.issue_i.eq(self.issue_i)
209 comb += self.busy_o.eq(fu.busy_o)
210 # rdmask, which is for registers, needs to come
211 # from the *main* decoder
212 rdmask = get_rdflags(self.e, fu)
213 comb += fu.rdmaskn.eq(~rdmask)
214
215 return fu_bitdict
216
217 def connect_rdport(self, m, fu_bitdict, rdpickers, regfile, regname, fspec):
218 comb, sync = m.d.comb, m.d.sync
219 fus = self.fus.fus
220 regs = self.regs
221
222 rpidx = regname
223
224 # select the required read port. these are pre-defined sizes
225 rfile = regs.rf[regfile.lower()]
226 rport = rfile.r_ports[rpidx]
227 print("read regfile", rpidx, regfile, regs.rf.keys(),
228 rfile, rfile.unary)
229
230 fspecs = fspec
231 if not isinstance(fspecs, list):
232 fspecs = [fspecs]
233
234 rdflags = []
235 pplen = 0
236 reads = []
237 ppoffs = []
238 for i, fspec in enumerate(fspecs):
239 # get the regfile specs for this regfile port
240 (rf, read, write, wid, fuspec) = fspec
241 print ("fpsec", i, fspec, len(fuspec))
242 ppoffs.append(pplen) # record offset for picker
243 pplen += len(fuspec)
244 name = "rdflag_%s_%s_%d" % (regfile, regname, i)
245 rdflag = Signal(name=name, reset_less=True)
246 comb += rdflag.eq(rf)
247 rdflags.append(rdflag)
248 reads.append(read)
249
250 print ("pplen", pplen)
251
252 # create a priority picker to manage this port
253 rdpickers[regfile][rpidx] = rdpick = PriorityPicker(pplen)
254 setattr(m.submodules, "rdpick_%s_%s" % (regfile, rpidx), rdpick)
255
256 rens = []
257 addrs = []
258 for i, fspec in enumerate(fspecs):
259 (rf, read, write, wid, fuspec) = fspec
260 # connect up the FU req/go signals, and the reg-read to the FU
261 # and create a Read Broadcast Bus
262 for pi, (funame, fu, idx) in enumerate(fuspec):
263 pi += ppoffs[i]
264
265 # connect request-read to picker input, and output to go-rd
266 fu_active = fu_bitdict[funame]
267 name = "%s_%s_%s_%i" % (regfile, rpidx, funame, pi)
268 addr_en = Signal.like(reads[i], name="addr_en_"+name)
269 pick = Signal(name="pick_"+name) # picker input
270 rp = Signal(name="rp_"+name) # picker output
271 delay_pick = Signal(name="dp_"+name) # read-enable "underway"
272
273 # exclude any currently-enabled read-request (mask out active)
274 comb += pick.eq(fu.rd_rel_o[idx] & fu_active & rdflags[i] &
275 ~delay_pick)
276 comb += rdpick.i[pi].eq(pick)
277 comb += fu.go_rd_i[idx].eq(delay_pick) # pass in *delayed* pick
278
279 # if picked, select read-port "reg select" number to port
280 comb += rp.eq(rdpick.o[pi] & rdpick.en_o)
281 sync += delay_pick.eq(rp) # delayed "pick"
282 comb += addr_en.eq(Mux(rp, reads[i], 0))
283
284 # the read-enable happens combinatorially (see mux-bus below)
285 # but it results in the data coming out on a one-cycle delay.
286 if rfile.unary:
287 rens.append(addr_en)
288 else:
289 addrs.append(addr_en)
290 rens.append(rp)
291
292 # use the *delayed* pick signal to put requested data onto bus
293 with m.If(delay_pick):
294 # connect regfile port to input, creating fan-out Bus
295 src = fu.src_i[idx]
296 print("reg connect widths",
297 regfile, regname, pi, funame,
298 src.shape(), rport.data_o.shape())
299 # all FUs connect to same port
300 comb += src.eq(rport.data_o)
301
302 # or-reduce the muxed read signals
303 if rfile.unary:
304 # for unary-addressed
305 comb += rport.ren.eq(ortreereduce_sig(rens))
306 else:
307 # for binary-addressed
308 comb += rport.addr.eq(ortreereduce_sig(addrs))
309 comb += rport.ren.eq(Cat(*rens).bool())
310 print ("binary", regfile, rpidx, rport, rport.ren, rens, addrs)
311
312 def connect_rdports(self, m, fu_bitdict):
313 """connect read ports
314
315 orders the read regspecs into a dict-of-dicts, by regfile, by
316 regport name, then connects all FUs that want that regport by
317 way of a PriorityPicker.
318 """
319 comb, sync = m.d.comb, m.d.sync
320 fus = self.fus.fus
321 regs = self.regs
322
323 # dictionary of lists of regfile read ports
324 byregfiles_rd, byregfiles_rdspec = self.get_byregfiles(True)
325
326 # okaay, now we need a PriorityPicker per regfile per regfile port
327 # loootta pickers... peter piper picked a pack of pickled peppers...
328 rdpickers = {}
329 for regfile, spec in byregfiles_rd.items():
330 fuspecs = byregfiles_rdspec[regfile]
331 rdpickers[regfile] = {}
332
333 # argh. an experiment to merge RA and RB in the INT regfile
334 # (we have too many read/write ports)
335 #if regfile == 'INT':
336 #fuspecs['rabc'] = [fuspecs.pop('rb')]
337 #fuspecs['rabc'].append(fuspecs.pop('rc'))
338 #fuspecs['rabc'].append(fuspecs.pop('ra'))
339 #if regfile == 'FAST':
340 # fuspecs['fast1'] = [fuspecs.pop('fast1')]
341 # if 'fast2' in fuspecs:
342 # fuspecs['fast1'].append(fuspecs.pop('fast2'))
343
344 # for each named regfile port, connect up all FUs to that port
345 for (regname, fspec) in sort_fuspecs(fuspecs):
346 print("connect rd", regname, fspec)
347 self.connect_rdport(m, fu_bitdict, rdpickers, regfile,
348 regname, fspec)
349
350 def connect_wrport(self, m, fu_bitdict, wrpickers, regfile, regname, fspec):
351 comb, sync = m.d.comb, m.d.sync
352 fus = self.fus.fus
353 regs = self.regs
354
355 print("connect wr", regname, fspec)
356 rpidx = regname
357
358 # select the required write port. these are pre-defined sizes
359 print(regfile, regs.rf.keys())
360 rfile = regs.rf[regfile.lower()]
361 wport = rfile.w_ports[rpidx]
362
363 fspecs = fspec
364 if not isinstance(fspecs, list):
365 fspecs = [fspecs]
366
367 pplen = 0
368 writes = []
369 ppoffs = []
370 for i, fspec in enumerate(fspecs):
371 # get the regfile specs for this regfile port
372 (rf, read, write, wid, fuspec) = fspec
373 print ("fpsec", i, fspec, len(fuspec))
374 ppoffs.append(pplen) # record offset for picker
375 pplen += len(fuspec)
376
377 # create a priority picker to manage this port
378 wrpickers[regfile][rpidx] = wrpick = PriorityPicker(pplen)
379 setattr(m.submodules, "wrpick_%s_%s" % (regfile, rpidx), wrpick)
380
381 wsigs = []
382 wens = []
383 addrs = []
384 for i, fspec in enumerate(fspecs):
385 # connect up the FU req/go signals and the reg-read to the FU
386 # these are arbitrated by Data.ok signals
387 (rf, read, write, wid, fuspec) = fspec
388 for pi, (funame, fu, idx) in enumerate(fuspec):
389 pi += ppoffs[i]
390
391 # write-request comes from dest.ok
392 dest = fu.get_out(idx)
393 fu_dest_latch = fu.get_fu_out(idx) # latched output
394 name = "wrflag_%s_%s_%d" % (funame, regname, idx)
395 wrflag = Signal(name=name, reset_less=True)
396 comb += wrflag.eq(dest.ok & fu.busy_o)
397
398 # connect request-write to picker input, and output to go-wr
399 fu_active = fu_bitdict[funame]
400 pick = fu.wr.rel_o[idx] & fu_active # & wrflag
401 comb += wrpick.i[pi].eq(pick)
402 # create a single-pulse go write from the picker output
403 wr_pick = Signal()
404 comb += wr_pick.eq(wrpick.o[pi] & wrpick.en_o)
405 comb += fu.go_wr_i[idx].eq(rising_edge(m, wr_pick))
406
407 # connect the regspec write "reg select" number to this port
408 # only if one FU actually requests (and is granted) the port
409 # will the write-enable be activated
410 addr_en = Signal.like(write)
411 wp = Signal()
412 comb += wp.eq(wr_pick & wrpick.en_o)
413 comb += addr_en.eq(Mux(wp, write, 0))
414 if rfile.unary:
415 wens.append(addr_en)
416 else:
417 addrs.append(addr_en)
418 wens.append(wp)
419
420 # connect regfile port to input
421 print("reg connect widths",
422 regfile, regname, pi, funame,
423 dest.shape(), wport.data_i.shape())
424 wsigs.append(fu_dest_latch)
425
426 # here is where we create the Write Broadcast Bus. simple, eh?
427 comb += wport.data_i.eq(ortreereduce_sig(wsigs))
428 if rfile.unary:
429 # for unary-addressed
430 comb += wport.wen.eq(ortreereduce_sig(wens))
431 else:
432 # for binary-addressed
433 comb += wport.addr.eq(ortreereduce_sig(addrs))
434 comb += wport.wen.eq(ortreereduce_sig(wens))
435
436 def connect_wrports(self, m, fu_bitdict):
437 """connect write ports
438
439 orders the write regspecs into a dict-of-dicts, by regfile,
440 by regport name, then connects all FUs that want that regport
441 by way of a PriorityPicker.
442
443 note that the write-port wen, write-port data, and go_wr_i all need to
444 be on the exact same clock cycle. as there is a combinatorial loop bug
445 at the moment, these all use sync.
446 """
447 comb, sync = m.d.comb, m.d.sync
448 fus = self.fus.fus
449 regs = self.regs
450 # dictionary of lists of regfile write ports
451 byregfiles_wr, byregfiles_wrspec = self.get_byregfiles(False)
452
453 # same for write ports.
454 # BLECH! complex code-duplication! BLECH!
455 wrpickers = {}
456 for regfile, spec in byregfiles_wr.items():
457 fuspecs = byregfiles_wrspec[regfile]
458 wrpickers[regfile] = {}
459
460 # argh, more port-merging
461 if regfile == 'INT':
462 fuspecs['o'] = [fuspecs.pop('o')]
463 fuspecs['o'].append(fuspecs.pop('o1'))
464 if regfile == 'FAST':
465 fuspecs['fast1'] = [fuspecs.pop('fast1')]
466 if 'fast2' in fuspecs:
467 fuspecs['fast1'].append(fuspecs.pop('fast2'))
468
469 for (regname, fspec) in sort_fuspecs(fuspecs):
470 self.connect_wrport(m, fu_bitdict, wrpickers,
471 regfile, regname, fspec)
472
473 def get_byregfiles(self, readmode):
474
475 mode = "read" if readmode else "write"
476 regs = self.regs
477 fus = self.fus.fus
478 e = self.e # decoded instruction to execute
479
480 # dictionary of lists of regfile ports
481 byregfiles = {}
482 byregfiles_spec = {}
483 for (funame, fu) in fus.items():
484 print("%s ports for %s" % (mode, funame))
485 for idx in range(fu.n_src if readmode else fu.n_dst):
486 if readmode:
487 (regfile, regname, wid) = fu.get_in_spec(idx)
488 else:
489 (regfile, regname, wid) = fu.get_out_spec(idx)
490 print(" %d %s %s %s" % (idx, regfile, regname, str(wid)))
491 if readmode:
492 rdflag, read = regspec_decode_read(e, regfile, regname)
493 write = None
494 else:
495 rdflag, read = None, None
496 wrport, write = regspec_decode_write(e, regfile, regname)
497 if regfile not in byregfiles:
498 byregfiles[regfile] = {}
499 byregfiles_spec[regfile] = {}
500 if regname not in byregfiles_spec[regfile]:
501 byregfiles_spec[regfile][regname] = \
502 (rdflag, read, write, wid, [])
503 # here we start to create "lanes"
504 if idx not in byregfiles[regfile]:
505 byregfiles[regfile][idx] = []
506 fuspec = (funame, fu, idx)
507 byregfiles[regfile][idx].append(fuspec)
508 byregfiles_spec[regfile][regname][4].append(fuspec)
509
510 # ok just print that out, for convenience
511 for regfile, spec in byregfiles.items():
512 print("regfile %s ports:" % mode, regfile)
513 fuspecs = byregfiles_spec[regfile]
514 for regname, fspec in fuspecs.items():
515 [rdflag, read, write, wid, fuspec] = fspec
516 print(" rf %s port %s lane: %s" % (mode, regfile, regname))
517 print(" %s" % regname, wid, read, write, rdflag)
518 for (funame, fu, idx) in fuspec:
519 fusig = fu.src_i[idx] if readmode else fu.dest[idx]
520 print(" ", funame, fu, idx, fusig)
521 print()
522
523 return byregfiles, byregfiles_spec
524
525 def __iter__(self):
526 yield from self.fus.ports()
527 yield from self.e.ports()
528 yield from self.l0.ports()
529 # TODO: regs
530
531 def ports(self):
532 return list(self)
533
534
535 if __name__ == '__main__':
536 pspec = TestMemPspec(ldst_ifacetype='testpi',
537 imem_ifacetype='',
538 addr_wid=48,
539 mask_wid=8,
540 reg_wid=64)
541 dut = NonProductionCore(pspec)
542 vl = rtlil.convert(dut, ports=dut.ports())
543 with open("test_core.il", "w") as f:
544 f.write(vl)