missing pspec
[soc.git] / src / soc / simple / core.py
1 """simple core
2
3 not in any way intended for production use. connects up FunctionUnits to
4 Register Files in a brain-dead fashion that only permits one and only one
5 Function Unit to be operational.
6
7 the principle here is to take the Function Units, analyse their regspecs,
8 and turn their requirements for access to register file read/write ports
9 into groupings by Register File and Register File Port name.
10
11 under each grouping - by regfile/port - a list of Function Units that
12 need to connect to that port is created. as these are a contended
13 resource a "Broadcast Bus" per read/write port is then also created,
14 with access to it managed by a PriorityPicker.
15
16 the brain-dead part of this module is that even though there is no
17 conflict of access, regfile read/write hazards are *not* analysed,
18 and consequently it is safer to wait for the Function Unit to complete
19 before allowing a new instruction to proceed.
20 """
21
22 from nmigen import Elaboratable, Module, Signal, ResetSignal, Cat, Mux
23 from nmigen.cli import rtlil
24
25 from soc.decoder.power_decoder2 import PowerDecodeSubset
26 from soc.decoder.power_regspec_map import regspec_decode_read
27 from soc.decoder.power_regspec_map import regspec_decode_write
28
29 from nmutil.picker import PriorityPicker
30 from nmutil.util import treereduce
31
32 from soc.fu.compunits.compunits import AllFunctionUnits
33 from soc.regfile.regfiles import RegFiles
34 from soc.decoder.decode2execute1 import Decode2ToExecute1Type
35 from soc.decoder.decode2execute1 import IssuerDecode2ToOperand
36 from soc.decoder.power_decoder2 import get_rdflags
37 from soc.decoder.decode2execute1 import Data
38 from soc.experiment.l0_cache import TstL0CacheBuffer # test only
39 from soc.config.test.test_loadstore import TestMemPspec
40 from soc.decoder.power_enums import MicrOp
41 from soc.config.state import CoreState
42
43 import operator
44
45 from nmutil.util import rising_edge
46
47
48 # helper function for reducing a list of signals down to a parallel
49 # ORed single signal.
50 def ortreereduce(tree, attr="data_o"):
51 return treereduce(tree, operator.or_, lambda x: getattr(x, attr))
52
53
54 def ortreereduce_sig(tree):
55 return treereduce(tree, operator.or_, lambda x: x)
56
57
58 # helper function to place full regs declarations first
59 def sort_fuspecs(fuspecs):
60 res = []
61 for (regname, fspec) in fuspecs.items():
62 if regname.startswith("full"):
63 res.append((regname, fspec))
64 for (regname, fspec) in fuspecs.items():
65 if not regname.startswith("full"):
66 res.append((regname, fspec))
67 return res # enumerate(res)
68
69
70 class NonProductionCore(Elaboratable):
71 def __init__(self, pspec):
72 self.pspec = pspec
73
74 # single LD/ST funnel for memory access
75 self.l0 = TstL0CacheBuffer(pspec, n_units=1)
76 pi = self.l0.l0.dports[0]
77
78 if False:
79 # MMU / DCache
80 self.mmu = MMU()
81 self.dcache = DCache()
82
83 # function units (only one each)
84 self.fus = AllFunctionUnits(pspec, pilist=[pi])
85
86 # register files (yes plural)
87 self.regs = RegFiles()
88
89 # instruction decoder - needs a Trap-capable Record (captures EINT etc.)
90 self.e = Decode2ToExecute1Type("core", opkls=IssuerDecode2ToOperand)
91
92 self.state = CoreState("core")
93 self.raw_insn_i = Signal(32) # raw instruction
94 self.bigendian_i = Signal() # bigendian
95
96 # issue/valid/busy signalling
97 self.ivalid_i = Signal(reset_less=True) # instruction is valid
98 self.issue_i = Signal(reset_less=True)
99 self.busy_o = Signal(name="corebusy_o", reset_less=True)
100
101 # start/stop and terminated signalling
102 self.core_stopped_i = Signal(reset_less=True)
103 self.core_reset_i = Signal()
104 self.core_terminate_o = Signal(reset=0) # indicates stopped
105
106 # create per-FU instruction decoders (subsetted)
107 self.decoders = {}
108 self.ees = {}
109
110 for funame, fu in self.fus.fus.items():
111 f_name = fu.fnunit.name
112 fnunit = fu.fnunit.value
113 opkls = fu.opsubsetkls
114 if f_name == 'TRAP':
115 self.trapunit = funame
116 continue
117 self.decoders[funame] = PowerDecodeSubset(None, opkls, f_name,
118 final=True,
119 state=self.state)
120 self.ees[funame] = self.decoders[funame].e
121
122 def elaborate(self, platform):
123 m = Module()
124 # for testing purposes, to cut down on build time in coriolis2
125 if hasattr(self.pspec, "nocore") and self.pspec.nocore == True:
126 return m
127 comb = m.d.comb
128
129 m.submodules.fus = self.fus
130 m.submodules.l0 = l0 = self.l0
131 self.regs.elaborate_into(m, platform)
132 regs = self.regs
133 fus = self.fus.fus
134
135 # connect decoders
136 for k, v in self.decoders.items():
137 setattr(m.submodules, "dec_%s" % v.fn_name, v)
138 comb += v.dec.raw_opcode_in.eq(self.raw_insn_i)
139 comb += v.dec.bigendian.eq(self.bigendian_i)
140
141 # ssh, cheat: trap uses the main decoder because of the rewriting
142 self.ees[self.trapunit] = self.e
143
144 # connect up Function Units, then read/write ports
145 fu_bitdict = self.connect_instruction(m)
146 self.connect_rdports(m, fu_bitdict)
147 self.connect_wrports(m, fu_bitdict)
148
149 # connect up reset
150 m.d.comb += ResetSignal().eq(self.core_reset_i)
151
152 return m
153
154 def connect_instruction(self, m):
155 """connect_instruction
156
157 uses decoded (from PowerOp) function unit information from CSV files
158 to ascertain which Function Unit should deal with the current
159 instruction.
160
161 some (such as OP_ATTN, OP_NOP) are dealt with here, including
162 ignoring it and halting the processor. OP_NOP is a bit annoying
163 because the issuer expects busy flag still to be raised then lowered.
164 (this requires a fake counter to be set).
165 """
166 comb, sync = m.d.comb, m.d.sync
167 fus = self.fus.fus
168
169 # enable-signals for each FU, get one bit for each FU (by name)
170 fu_enable = Signal(len(fus), reset_less=True)
171 fu_bitdict = {}
172 for i, funame in enumerate(fus.keys()):
173 fu_bitdict[funame] = fu_enable[i]
174
175 # enable the required Function Unit based on the opcode decode
176 # note: this *only* works correctly for simple core when one and
177 # *only* one FU is allocated per instruction
178 for funame, fu in fus.items():
179 fnunit = fu.fnunit.value
180 enable = Signal(name="en_%s" % funame, reset_less=True)
181 comb += enable.eq((self.e.do.fn_unit & fnunit).bool())
182 comb += fu_bitdict[funame].eq(enable)
183
184 # sigh - need a NOP counter
185 counter = Signal(2)
186 with m.If(counter != 0):
187 sync += counter.eq(counter - 1)
188 comb += self.busy_o.eq(1)
189
190 with m.If(self.ivalid_i): # run only when valid
191 with m.Switch(self.e.do.insn_type):
192 # check for ATTN: halt if true
193 with m.Case(MicrOp.OP_ATTN):
194 m.d.sync += self.core_terminate_o.eq(1)
195
196 with m.Case(MicrOp.OP_NOP):
197 sync += counter.eq(2)
198 comb += self.busy_o.eq(1)
199
200 with m.Default():
201 # connect up instructions. only one enabled at a time
202 for funame, fu in fus.items():
203 e = self.ees[funame]
204 enable = fu_bitdict[funame]
205
206 # run this FunctionUnit if enabled
207 # route op, issue, busy, read flags and mask to FU
208 with m.If(enable):
209 # operand comes from the *local* decoder
210 comb += fu.oper_i.eq_from(e.do)
211 #comb += fu.oper_i.eq_from_execute1(e)
212 comb += fu.issue_i.eq(self.issue_i)
213 comb += self.busy_o.eq(fu.busy_o)
214 # rdmask, which is for registers, needs to come
215 # from the *main* decoder
216 rdmask = get_rdflags(self.e, fu)
217 comb += fu.rdmaskn.eq(~rdmask)
218
219 return fu_bitdict
220
221 def connect_rdport(self, m, fu_bitdict, rdpickers, regfile, regname, fspec):
222 comb, sync = m.d.comb, m.d.sync
223 fus = self.fus.fus
224 regs = self.regs
225
226 rpidx = regname
227
228 # select the required read port. these are pre-defined sizes
229 rfile = regs.rf[regfile.lower()]
230 rport = rfile.r_ports[rpidx]
231 print("read regfile", rpidx, regfile, regs.rf.keys(),
232 rfile, rfile.unary)
233
234 fspecs = fspec
235 if not isinstance(fspecs, list):
236 fspecs = [fspecs]
237
238 rdflags = []
239 pplen = 0
240 reads = []
241 ppoffs = []
242 for i, fspec in enumerate(fspecs):
243 # get the regfile specs for this regfile port
244 (rf, read, write, wid, fuspec) = fspec
245 print ("fpsec", i, fspec, len(fuspec))
246 ppoffs.append(pplen) # record offset for picker
247 pplen += len(fuspec)
248 name = "rdflag_%s_%s_%d" % (regfile, regname, i)
249 rdflag = Signal(name=name, reset_less=True)
250 comb += rdflag.eq(rf)
251 rdflags.append(rdflag)
252 reads.append(read)
253
254 print ("pplen", pplen)
255
256 # create a priority picker to manage this port
257 rdpickers[regfile][rpidx] = rdpick = PriorityPicker(pplen)
258 setattr(m.submodules, "rdpick_%s_%s" % (regfile, rpidx), rdpick)
259
260 rens = []
261 addrs = []
262 for i, fspec in enumerate(fspecs):
263 (rf, read, write, wid, fuspec) = fspec
264 # connect up the FU req/go signals, and the reg-read to the FU
265 # and create a Read Broadcast Bus
266 for pi, (funame, fu, idx) in enumerate(fuspec):
267 pi += ppoffs[i]
268
269 # connect request-read to picker input, and output to go-rd
270 fu_active = fu_bitdict[funame]
271 name = "%s_%s_%s_%i" % (regfile, rpidx, funame, pi)
272 addr_en = Signal.like(reads[i], name="addr_en_"+name)
273 pick = Signal(name="pick_"+name) # picker input
274 rp = Signal(name="rp_"+name) # picker output
275 delay_pick = Signal(name="dp_"+name) # read-enable "underway"
276
277 # exclude any currently-enabled read-request (mask out active)
278 comb += pick.eq(fu.rd_rel_o[idx] & fu_active & rdflags[i] &
279 ~delay_pick)
280 comb += rdpick.i[pi].eq(pick)
281 comb += fu.go_rd_i[idx].eq(delay_pick) # pass in *delayed* pick
282
283 # if picked, select read-port "reg select" number to port
284 comb += rp.eq(rdpick.o[pi] & rdpick.en_o)
285 sync += delay_pick.eq(rp) # delayed "pick"
286 comb += addr_en.eq(Mux(rp, reads[i], 0))
287
288 # the read-enable happens combinatorially (see mux-bus below)
289 # but it results in the data coming out on a one-cycle delay.
290 if rfile.unary:
291 rens.append(addr_en)
292 else:
293 addrs.append(addr_en)
294 rens.append(rp)
295
296 # use the *delayed* pick signal to put requested data onto bus
297 with m.If(delay_pick):
298 # connect regfile port to input, creating fan-out Bus
299 src = fu.src_i[idx]
300 print("reg connect widths",
301 regfile, regname, pi, funame,
302 src.shape(), rport.data_o.shape())
303 # all FUs connect to same port
304 comb += src.eq(rport.data_o)
305
306 # or-reduce the muxed read signals
307 if rfile.unary:
308 # for unary-addressed
309 comb += rport.ren.eq(ortreereduce_sig(rens))
310 else:
311 # for binary-addressed
312 comb += rport.addr.eq(ortreereduce_sig(addrs))
313 comb += rport.ren.eq(Cat(*rens).bool())
314 print ("binary", regfile, rpidx, rport, rport.ren, rens, addrs)
315
316 def connect_rdports(self, m, fu_bitdict):
317 """connect read ports
318
319 orders the read regspecs into a dict-of-dicts, by regfile, by
320 regport name, then connects all FUs that want that regport by
321 way of a PriorityPicker.
322 """
323 comb, sync = m.d.comb, m.d.sync
324 fus = self.fus.fus
325 regs = self.regs
326
327 # dictionary of lists of regfile read ports
328 byregfiles_rd, byregfiles_rdspec = self.get_byregfiles(True)
329
330 # okaay, now we need a PriorityPicker per regfile per regfile port
331 # loootta pickers... peter piper picked a pack of pickled peppers...
332 rdpickers = {}
333 for regfile, spec in byregfiles_rd.items():
334 fuspecs = byregfiles_rdspec[regfile]
335 rdpickers[regfile] = {}
336
337 # argh. an experiment to merge RA and RB in the INT regfile
338 # (we have too many read/write ports)
339 #if regfile == 'INT':
340 #fuspecs['rabc'] = [fuspecs.pop('rb')]
341 #fuspecs['rabc'].append(fuspecs.pop('rc'))
342 #fuspecs['rabc'].append(fuspecs.pop('ra'))
343 #if regfile == 'FAST':
344 # fuspecs['fast1'] = [fuspecs.pop('fast1')]
345 # if 'fast2' in fuspecs:
346 # fuspecs['fast1'].append(fuspecs.pop('fast2'))
347
348 # for each named regfile port, connect up all FUs to that port
349 for (regname, fspec) in sort_fuspecs(fuspecs):
350 print("connect rd", regname, fspec)
351 self.connect_rdport(m, fu_bitdict, rdpickers, regfile,
352 regname, fspec)
353
354 def connect_wrport(self, m, fu_bitdict, wrpickers, regfile, regname, fspec):
355 comb, sync = m.d.comb, m.d.sync
356 fus = self.fus.fus
357 regs = self.regs
358
359 print("connect wr", regname, fspec)
360 rpidx = regname
361
362 # select the required write port. these are pre-defined sizes
363 print(regfile, regs.rf.keys())
364 rfile = regs.rf[regfile.lower()]
365 wport = rfile.w_ports[rpidx]
366
367 fspecs = fspec
368 if not isinstance(fspecs, list):
369 fspecs = [fspecs]
370
371 pplen = 0
372 writes = []
373 ppoffs = []
374 for i, fspec in enumerate(fspecs):
375 # get the regfile specs for this regfile port
376 (rf, read, write, wid, fuspec) = fspec
377 print ("fpsec", i, fspec, len(fuspec))
378 ppoffs.append(pplen) # record offset for picker
379 pplen += len(fuspec)
380
381 # create a priority picker to manage this port
382 wrpickers[regfile][rpidx] = wrpick = PriorityPicker(pplen)
383 setattr(m.submodules, "wrpick_%s_%s" % (regfile, rpidx), wrpick)
384
385 wsigs = []
386 wens = []
387 addrs = []
388 for i, fspec in enumerate(fspecs):
389 # connect up the FU req/go signals and the reg-read to the FU
390 # these are arbitrated by Data.ok signals
391 (rf, read, write, wid, fuspec) = fspec
392 for pi, (funame, fu, idx) in enumerate(fuspec):
393 pi += ppoffs[i]
394
395 # write-request comes from dest.ok
396 dest = fu.get_out(idx)
397 fu_dest_latch = fu.get_fu_out(idx) # latched output
398 name = "wrflag_%s_%s_%d" % (funame, regname, idx)
399 wrflag = Signal(name=name, reset_less=True)
400 comb += wrflag.eq(dest.ok & fu.busy_o)
401
402 # connect request-write to picker input, and output to go-wr
403 fu_active = fu_bitdict[funame]
404 pick = fu.wr.rel_o[idx] & fu_active # & wrflag
405 comb += wrpick.i[pi].eq(pick)
406 # create a single-pulse go write from the picker output
407 wr_pick = Signal()
408 comb += wr_pick.eq(wrpick.o[pi] & wrpick.en_o)
409 comb += fu.go_wr_i[idx].eq(rising_edge(m, wr_pick))
410
411 # connect the regspec write "reg select" number to this port
412 # only if one FU actually requests (and is granted) the port
413 # will the write-enable be activated
414 addr_en = Signal.like(write)
415 wp = Signal()
416 comb += wp.eq(wr_pick & wrpick.en_o)
417 comb += addr_en.eq(Mux(wp, write, 0))
418 if rfile.unary:
419 wens.append(addr_en)
420 else:
421 addrs.append(addr_en)
422 wens.append(wp)
423
424 # connect regfile port to input
425 print("reg connect widths",
426 regfile, regname, pi, funame,
427 dest.shape(), wport.data_i.shape())
428 wsigs.append(fu_dest_latch)
429
430 # here is where we create the Write Broadcast Bus. simple, eh?
431 comb += wport.data_i.eq(ortreereduce_sig(wsigs))
432 if rfile.unary:
433 # for unary-addressed
434 comb += wport.wen.eq(ortreereduce_sig(wens))
435 else:
436 # for binary-addressed
437 comb += wport.addr.eq(ortreereduce_sig(addrs))
438 comb += wport.wen.eq(ortreereduce_sig(wens))
439
440 def connect_wrports(self, m, fu_bitdict):
441 """connect write ports
442
443 orders the write regspecs into a dict-of-dicts, by regfile,
444 by regport name, then connects all FUs that want that regport
445 by way of a PriorityPicker.
446
447 note that the write-port wen, write-port data, and go_wr_i all need to
448 be on the exact same clock cycle. as there is a combinatorial loop bug
449 at the moment, these all use sync.
450 """
451 comb, sync = m.d.comb, m.d.sync
452 fus = self.fus.fus
453 regs = self.regs
454 # dictionary of lists of regfile write ports
455 byregfiles_wr, byregfiles_wrspec = self.get_byregfiles(False)
456
457 # same for write ports.
458 # BLECH! complex code-duplication! BLECH!
459 wrpickers = {}
460 for regfile, spec in byregfiles_wr.items():
461 fuspecs = byregfiles_wrspec[regfile]
462 wrpickers[regfile] = {}
463
464 # argh, more port-merging
465 if regfile == 'INT':
466 fuspecs['o'] = [fuspecs.pop('o')]
467 fuspecs['o'].append(fuspecs.pop('o1'))
468 if regfile == 'FAST':
469 fuspecs['fast1'] = [fuspecs.pop('fast1')]
470 if 'fast2' in fuspecs:
471 fuspecs['fast1'].append(fuspecs.pop('fast2'))
472
473 for (regname, fspec) in sort_fuspecs(fuspecs):
474 self.connect_wrport(m, fu_bitdict, wrpickers,
475 regfile, regname, fspec)
476
477 def get_byregfiles(self, readmode):
478
479 mode = "read" if readmode else "write"
480 regs = self.regs
481 fus = self.fus.fus
482 e = self.e # decoded instruction to execute
483
484 # dictionary of lists of regfile ports
485 byregfiles = {}
486 byregfiles_spec = {}
487 for (funame, fu) in fus.items():
488 print("%s ports for %s" % (mode, funame))
489 for idx in range(fu.n_src if readmode else fu.n_dst):
490 if readmode:
491 (regfile, regname, wid) = fu.get_in_spec(idx)
492 else:
493 (regfile, regname, wid) = fu.get_out_spec(idx)
494 print(" %d %s %s %s" % (idx, regfile, regname, str(wid)))
495 if readmode:
496 rdflag, read = regspec_decode_read(e, regfile, regname)
497 write = None
498 else:
499 rdflag, read = None, None
500 wrport, write = regspec_decode_write(e, regfile, regname)
501 if regfile not in byregfiles:
502 byregfiles[regfile] = {}
503 byregfiles_spec[regfile] = {}
504 if regname not in byregfiles_spec[regfile]:
505 byregfiles_spec[regfile][regname] = \
506 (rdflag, read, write, wid, [])
507 # here we start to create "lanes"
508 if idx not in byregfiles[regfile]:
509 byregfiles[regfile][idx] = []
510 fuspec = (funame, fu, idx)
511 byregfiles[regfile][idx].append(fuspec)
512 byregfiles_spec[regfile][regname][4].append(fuspec)
513
514 # ok just print that out, for convenience
515 for regfile, spec in byregfiles.items():
516 print("regfile %s ports:" % mode, regfile)
517 fuspecs = byregfiles_spec[regfile]
518 for regname, fspec in fuspecs.items():
519 [rdflag, read, write, wid, fuspec] = fspec
520 print(" rf %s port %s lane: %s" % (mode, regfile, regname))
521 print(" %s" % regname, wid, read, write, rdflag)
522 for (funame, fu, idx) in fuspec:
523 fusig = fu.src_i[idx] if readmode else fu.dest[idx]
524 print(" ", funame, fu, idx, fusig)
525 print()
526
527 return byregfiles, byregfiles_spec
528
529 def __iter__(self):
530 yield from self.fus.ports()
531 yield from self.e.ports()
532 yield from self.l0.ports()
533 # TODO: regs
534
535 def ports(self):
536 return list(self)
537
538
539 if __name__ == '__main__':
540 pspec = TestMemPspec(ldst_ifacetype='testpi',
541 imem_ifacetype='',
542 addr_wid=48,
543 mask_wid=8,
544 reg_wid=64)
545 dut = NonProductionCore(pspec)
546 vl = rtlil.convert(dut, ports=dut.ports())
547 with open("test_core.il", "w") as f:
548 f.write(vl)