icache.py connect up all the sub-functions, fix typos and other small
[soc.git] / src / soc / simple / core.py
1 """simple core
2
3 not in any way intended for production use. connects up FunctionUnits to
4 Register Files in a brain-dead fashion that only permits one and only one
5 Function Unit to be operational.
6
7 the principle here is to take the Function Units, analyse their regspecs,
8 and turn their requirements for access to register file read/write ports
9 into groupings by Register File and Register File Port name.
10
11 under each grouping - by regfile/port - a list of Function Units that
12 need to connect to that port is created. as these are a contended
13 resource a "Broadcast Bus" per read/write port is then also created,
14 with access to it managed by a PriorityPicker.
15
16 the brain-dead part of this module is that even though there is no
17 conflict of access, regfile read/write hazards are *not* analysed,
18 and consequently it is safer to wait for the Function Unit to complete
19 before allowing a new instruction to proceed.
20 """
21
22 from nmigen import Elaboratable, Module, Signal, ResetSignal, Cat, Mux
23 from nmigen.cli import rtlil
24
25 from soc.decoder.power_decoder2 import PowerDecodeSubset
26 from soc.decoder.power_regspec_map import regspec_decode_read
27 from soc.decoder.power_regspec_map import regspec_decode_write
28
29 from nmutil.picker import PriorityPicker
30 from nmutil.util import treereduce
31
32 from soc.fu.compunits.compunits import AllFunctionUnits
33 from soc.regfile.regfiles import RegFiles
34 from soc.decoder.decode2execute1 import Decode2ToExecute1Type
35 from soc.decoder.decode2execute1 import IssuerDecode2ToOperand
36 from soc.decoder.power_decoder2 import get_rdflags
37 from soc.decoder.decode2execute1 import Data
38 from soc.experiment.l0_cache import TstL0CacheBuffer # test only
39 from soc.config.test.test_loadstore import TestMemPspec
40 from soc.decoder.power_enums import MicrOp
41 from soc.config.state import CoreState
42
43 import operator
44
45 from nmutil.util import rising_edge
46
47
48 # helper function for reducing a list of signals down to a parallel
49 # ORed single signal.
50 def ortreereduce(tree, attr="data_o"):
51 return treereduce(tree, operator.or_, lambda x: getattr(x, attr))
52
53
54 def ortreereduce_sig(tree):
55 return treereduce(tree, operator.or_, lambda x: x)
56
57
58 # helper function to place full regs declarations first
59 def sort_fuspecs(fuspecs):
60 res = []
61 for (regname, fspec) in fuspecs.items():
62 if regname.startswith("full"):
63 res.append((regname, fspec))
64 for (regname, fspec) in fuspecs.items():
65 if not regname.startswith("full"):
66 res.append((regname, fspec))
67 return res # enumerate(res)
68
69
70 class NonProductionCore(Elaboratable):
71 def __init__(self, pspec):
72
73 # single LD/ST funnel for memory access
74 self.l0 = TstL0CacheBuffer(pspec, n_units=1)
75 pi = self.l0.l0.dports[0]
76
77 # function units (only one each)
78 self.fus = AllFunctionUnits(pspec, pilist=[pi])
79
80 # register files (yes plural)
81 self.regs = RegFiles()
82
83 # instruction decoder - needs a Trap-capable Record (captures EINT etc.)
84 self.e = Decode2ToExecute1Type("core", opkls=IssuerDecode2ToOperand)
85
86 self.state = CoreState("core")
87 self.raw_insn_i = Signal(32) # raw instruction
88 self.bigendian_i = Signal() # bigendian
89
90 # issue/valid/busy signalling
91 self.ivalid_i = Signal(reset_less=True) # instruction is valid
92 self.issue_i = Signal(reset_less=True)
93 self.busy_o = Signal(name="corebusy_o", reset_less=True)
94
95 # start/stop and terminated signalling
96 self.core_stopped_i = Signal(reset_less=True)
97 self.core_reset_i = Signal()
98 self.core_terminate_o = Signal(reset=0) # indicates stopped
99
100 # create per-FU instruction decoders (subsetted)
101 self.decoders = {}
102 self.ees = {}
103
104 for funame, fu in self.fus.fus.items():
105 f_name = fu.fnunit.name
106 fnunit = fu.fnunit.value
107 opkls = fu.opsubsetkls
108 if f_name == 'TRAP':
109 self.trapunit = funame
110 continue
111 self.decoders[funame] = PowerDecodeSubset(None, opkls, f_name,
112 final=True,
113 state=self.state)
114 self.ees[funame] = self.decoders[funame].e
115
116 def elaborate(self, platform):
117 m = Module()
118 comb = m.d.comb
119
120 m.submodules.fus = self.fus
121 m.submodules.l0 = l0 = self.l0
122 self.regs.elaborate_into(m, platform)
123 regs = self.regs
124 fus = self.fus.fus
125
126 # connect decoders
127 for k, v in self.decoders.items():
128 setattr(m.submodules, "dec_%s" % v.fn_name, v)
129 comb += v.dec.raw_opcode_in.eq(self.raw_insn_i)
130 comb += v.dec.bigendian.eq(self.bigendian_i)
131
132 # ssh, cheat: trap uses the main decoder because of the rewriting
133 self.ees[self.trapunit] = self.e
134
135 # connect up Function Units, then read/write ports
136 fu_bitdict = self.connect_instruction(m)
137 self.connect_rdports(m, fu_bitdict)
138 self.connect_wrports(m, fu_bitdict)
139
140 # connect up reset
141 m.d.comb += ResetSignal().eq(self.core_reset_i)
142
143 return m
144
145 def connect_instruction(self, m):
146 """connect_instruction
147
148 uses decoded (from PowerOp) function unit information from CSV files
149 to ascertain which Function Unit should deal with the current
150 instruction.
151
152 some (such as OP_ATTN, OP_NOP) are dealt with here, including
153 ignoring it and halting the processor. OP_NOP is a bit annoying
154 because the issuer expects busy flag still to be raised then lowered.
155 (this requires a fake counter to be set).
156 """
157 comb, sync = m.d.comb, m.d.sync
158 fus = self.fus.fus
159
160 # enable-signals for each FU, get one bit for each FU (by name)
161 fu_enable = Signal(len(fus), reset_less=True)
162 fu_bitdict = {}
163 for i, funame in enumerate(fus.keys()):
164 fu_bitdict[funame] = fu_enable[i]
165
166 # enable the required Function Unit based on the opcode decode
167 # note: this *only* works correctly for simple core when one and
168 # *only* one FU is allocated per instruction
169 for funame, fu in fus.items():
170 fnunit = fu.fnunit.value
171 enable = Signal(name="en_%s" % funame, reset_less=True)
172 comb += enable.eq((self.e.do.fn_unit & fnunit).bool())
173 comb += fu_bitdict[funame].eq(enable)
174
175 # sigh - need a NOP counter
176 counter = Signal(2)
177 with m.If(counter != 0):
178 sync += counter.eq(counter - 1)
179 comb += self.busy_o.eq(1)
180
181 with m.If(self.ivalid_i): # run only when valid
182 with m.Switch(self.e.do.insn_type):
183 # check for ATTN: halt if true
184 with m.Case(MicrOp.OP_ATTN):
185 m.d.sync += self.core_terminate_o.eq(1)
186
187 with m.Case(MicrOp.OP_NOP):
188 sync += counter.eq(2)
189 comb += self.busy_o.eq(1)
190
191 with m.Default():
192 # connect up instructions. only one enabled at a time
193 for funame, fu in fus.items():
194 e = self.ees[funame]
195 enable = fu_bitdict[funame]
196
197 # run this FunctionUnit if enabled
198 # route op, issue, busy, read flags and mask to FU
199 with m.If(enable):
200 # operand comes from the *local* decoder
201 comb += fu.oper_i.eq_from(e.do)
202 #comb += fu.oper_i.eq_from_execute1(e)
203 comb += fu.issue_i.eq(self.issue_i)
204 comb += self.busy_o.eq(fu.busy_o)
205 # rdmask, which is for registers, needs to come
206 # from the *main* decoder
207 rdmask = get_rdflags(self.e, fu)
208 comb += fu.rdmaskn.eq(~rdmask)
209
210 return fu_bitdict
211
212 def connect_rdport(self, m, fu_bitdict, rdpickers, regfile, regname, fspec):
213 comb, sync = m.d.comb, m.d.sync
214 fus = self.fus.fus
215 regs = self.regs
216
217 rpidx = regname
218
219 # select the required read port. these are pre-defined sizes
220 rfile = regs.rf[regfile.lower()]
221 rport = rfile.r_ports[rpidx]
222 print("read regfile", rpidx, regfile, regs.rf.keys(),
223 rfile, rfile.unary)
224
225 fspecs = fspec
226 if not isinstance(fspecs, list):
227 fspecs = [fspecs]
228
229 rdflags = []
230 pplen = 0
231 reads = []
232 ppoffs = []
233 for i, fspec in enumerate(fspecs):
234 # get the regfile specs for this regfile port
235 (rf, read, write, wid, fuspec) = fspec
236 print ("fpsec", i, fspec, len(fuspec))
237 ppoffs.append(pplen) # record offset for picker
238 pplen += len(fuspec)
239 name = "rdflag_%s_%s_%d" % (regfile, regname, i)
240 rdflag = Signal(name=name, reset_less=True)
241 comb += rdflag.eq(rf)
242 rdflags.append(rdflag)
243 reads.append(read)
244
245 print ("pplen", pplen)
246
247 # create a priority picker to manage this port
248 rdpickers[regfile][rpidx] = rdpick = PriorityPicker(pplen)
249 setattr(m.submodules, "rdpick_%s_%s" % (regfile, rpidx), rdpick)
250
251 rens = []
252 addrs = []
253 for i, fspec in enumerate(fspecs):
254 (rf, read, write, wid, fuspec) = fspec
255 # connect up the FU req/go signals, and the reg-read to the FU
256 # and create a Read Broadcast Bus
257 for pi, (funame, fu, idx) in enumerate(fuspec):
258 pi += ppoffs[i]
259
260 # connect request-read to picker input, and output to go-rd
261 fu_active = fu_bitdict[funame]
262 name = "%s_%s_%s_%i" % (regfile, rpidx, funame, pi)
263 addr_en = Signal.like(reads[i], name="addr_en_"+name)
264 pick = Signal(name="pick_"+name) # picker input
265 rp = Signal(name="rp_"+name) # picker output
266 delay_pick = Signal(name="dp_"+name) # read-enable "underway"
267
268 # exclude any currently-enabled read-request (mask out active)
269 comb += pick.eq(fu.rd_rel_o[idx] & fu_active & rdflags[i] &
270 ~delay_pick)
271 comb += rdpick.i[pi].eq(pick)
272 comb += fu.go_rd_i[idx].eq(delay_pick) # pass in *delayed* pick
273
274 # if picked, select read-port "reg select" number to port
275 comb += rp.eq(rdpick.o[pi] & rdpick.en_o)
276 sync += delay_pick.eq(rp) # delayed "pick"
277 comb += addr_en.eq(Mux(rp, reads[i], 0))
278
279 # the read-enable happens combinatorially (see mux-bus below)
280 # but it results in the data coming out on a one-cycle delay.
281 if rfile.unary:
282 rens.append(addr_en)
283 else:
284 addrs.append(addr_en)
285 rens.append(rp)
286
287 # use the *delayed* pick signal to put requested data onto bus
288 with m.If(delay_pick):
289 # connect regfile port to input, creating fan-out Bus
290 src = fu.src_i[idx]
291 print("reg connect widths",
292 regfile, regname, pi, funame,
293 src.shape(), rport.data_o.shape())
294 # all FUs connect to same port
295 comb += src.eq(rport.data_o)
296
297 # or-reduce the muxed read signals
298 if rfile.unary:
299 # for unary-addressed
300 comb += rport.ren.eq(ortreereduce_sig(rens))
301 else:
302 # for binary-addressed
303 comb += rport.addr.eq(ortreereduce_sig(addrs))
304 comb += rport.ren.eq(Cat(*rens).bool())
305 print ("binary", regfile, rpidx, rport, rport.ren, rens, addrs)
306
307 def connect_rdports(self, m, fu_bitdict):
308 """connect read ports
309
310 orders the read regspecs into a dict-of-dicts, by regfile, by
311 regport name, then connects all FUs that want that regport by
312 way of a PriorityPicker.
313 """
314 comb, sync = m.d.comb, m.d.sync
315 fus = self.fus.fus
316 regs = self.regs
317
318 # dictionary of lists of regfile read ports
319 byregfiles_rd, byregfiles_rdspec = self.get_byregfiles(True)
320
321 # okaay, now we need a PriorityPicker per regfile per regfile port
322 # loootta pickers... peter piper picked a pack of pickled peppers...
323 rdpickers = {}
324 for regfile, spec in byregfiles_rd.items():
325 fuspecs = byregfiles_rdspec[regfile]
326 rdpickers[regfile] = {}
327
328 # argh. an experiment to merge RA and RB in the INT regfile
329 # (we have too many read/write ports)
330 #if regfile == 'INT':
331 #fuspecs['rabc'] = [fuspecs.pop('rb')]
332 #fuspecs['rabc'].append(fuspecs.pop('rc'))
333 #fuspecs['rabc'].append(fuspecs.pop('ra'))
334 #if regfile == 'FAST':
335 # fuspecs['fast1'] = [fuspecs.pop('fast1')]
336 # if 'fast2' in fuspecs:
337 # fuspecs['fast1'].append(fuspecs.pop('fast2'))
338
339 # for each named regfile port, connect up all FUs to that port
340 for (regname, fspec) in sort_fuspecs(fuspecs):
341 print("connect rd", regname, fspec)
342 self.connect_rdport(m, fu_bitdict, rdpickers, regfile,
343 regname, fspec)
344
345 def connect_wrport(self, m, fu_bitdict, wrpickers, regfile, regname, fspec):
346 comb, sync = m.d.comb, m.d.sync
347 fus = self.fus.fus
348 regs = self.regs
349
350 print("connect wr", regname, fspec)
351 rpidx = regname
352
353 # select the required write port. these are pre-defined sizes
354 print(regfile, regs.rf.keys())
355 rfile = regs.rf[regfile.lower()]
356 wport = rfile.w_ports[rpidx]
357
358 fspecs = fspec
359 if not isinstance(fspecs, list):
360 fspecs = [fspecs]
361
362 pplen = 0
363 writes = []
364 ppoffs = []
365 for i, fspec in enumerate(fspecs):
366 # get the regfile specs for this regfile port
367 (rf, read, write, wid, fuspec) = fspec
368 print ("fpsec", i, fspec, len(fuspec))
369 ppoffs.append(pplen) # record offset for picker
370 pplen += len(fuspec)
371
372 # create a priority picker to manage this port
373 wrpickers[regfile][rpidx] = wrpick = PriorityPicker(pplen)
374 setattr(m.submodules, "wrpick_%s_%s" % (regfile, rpidx), wrpick)
375
376 wsigs = []
377 wens = []
378 addrs = []
379 for i, fspec in enumerate(fspecs):
380 # connect up the FU req/go signals and the reg-read to the FU
381 # these are arbitrated by Data.ok signals
382 (rf, read, write, wid, fuspec) = fspec
383 for pi, (funame, fu, idx) in enumerate(fuspec):
384 pi += ppoffs[i]
385
386 # write-request comes from dest.ok
387 dest = fu.get_out(idx)
388 fu_dest_latch = fu.get_fu_out(idx) # latched output
389 name = "wrflag_%s_%s_%d" % (funame, regname, idx)
390 wrflag = Signal(name=name, reset_less=True)
391 comb += wrflag.eq(dest.ok & fu.busy_o)
392
393 # connect request-write to picker input, and output to go-wr
394 fu_active = fu_bitdict[funame]
395 pick = fu.wr.rel_o[idx] & fu_active # & wrflag
396 comb += wrpick.i[pi].eq(pick)
397 # create a single-pulse go write from the picker output
398 wr_pick = Signal()
399 comb += wr_pick.eq(wrpick.o[pi] & wrpick.en_o)
400 comb += fu.go_wr_i[idx].eq(rising_edge(m, wr_pick))
401
402 # connect the regspec write "reg select" number to this port
403 # only if one FU actually requests (and is granted) the port
404 # will the write-enable be activated
405 addr_en = Signal.like(write)
406 wp = Signal()
407 comb += wp.eq(wr_pick & wrpick.en_o)
408 comb += addr_en.eq(Mux(wp, write, 0))
409 if rfile.unary:
410 wens.append(addr_en)
411 else:
412 addrs.append(addr_en)
413 wens.append(wp)
414
415 # connect regfile port to input
416 print("reg connect widths",
417 regfile, regname, pi, funame,
418 dest.shape(), wport.data_i.shape())
419 wsigs.append(fu_dest_latch)
420
421 # here is where we create the Write Broadcast Bus. simple, eh?
422 comb += wport.data_i.eq(ortreereduce_sig(wsigs))
423 if rfile.unary:
424 # for unary-addressed
425 comb += wport.wen.eq(ortreereduce_sig(wens))
426 else:
427 # for binary-addressed
428 comb += wport.addr.eq(ortreereduce_sig(addrs))
429 comb += wport.wen.eq(ortreereduce_sig(wens))
430
431 def connect_wrports(self, m, fu_bitdict):
432 """connect write ports
433
434 orders the write regspecs into a dict-of-dicts, by regfile,
435 by regport name, then connects all FUs that want that regport
436 by way of a PriorityPicker.
437
438 note that the write-port wen, write-port data, and go_wr_i all need to
439 be on the exact same clock cycle. as there is a combinatorial loop bug
440 at the moment, these all use sync.
441 """
442 comb, sync = m.d.comb, m.d.sync
443 fus = self.fus.fus
444 regs = self.regs
445 # dictionary of lists of regfile write ports
446 byregfiles_wr, byregfiles_wrspec = self.get_byregfiles(False)
447
448 # same for write ports.
449 # BLECH! complex code-duplication! BLECH!
450 wrpickers = {}
451 for regfile, spec in byregfiles_wr.items():
452 fuspecs = byregfiles_wrspec[regfile]
453 wrpickers[regfile] = {}
454
455 # argh, more port-merging
456 if regfile == 'INT':
457 fuspecs['o'] = [fuspecs.pop('o')]
458 fuspecs['o'].append(fuspecs.pop('o1'))
459 if regfile == 'FAST':
460 fuspecs['fast1'] = [fuspecs.pop('fast1')]
461 if 'fast2' in fuspecs:
462 fuspecs['fast1'].append(fuspecs.pop('fast2'))
463
464 for (regname, fspec) in sort_fuspecs(fuspecs):
465 self.connect_wrport(m, fu_bitdict, wrpickers,
466 regfile, regname, fspec)
467
468 def get_byregfiles(self, readmode):
469
470 mode = "read" if readmode else "write"
471 regs = self.regs
472 fus = self.fus.fus
473 e = self.e # decoded instruction to execute
474
475 # dictionary of lists of regfile ports
476 byregfiles = {}
477 byregfiles_spec = {}
478 for (funame, fu) in fus.items():
479 print("%s ports for %s" % (mode, funame))
480 for idx in range(fu.n_src if readmode else fu.n_dst):
481 if readmode:
482 (regfile, regname, wid) = fu.get_in_spec(idx)
483 else:
484 (regfile, regname, wid) = fu.get_out_spec(idx)
485 print(" %d %s %s %s" % (idx, regfile, regname, str(wid)))
486 if readmode:
487 rdflag, read = regspec_decode_read(e, regfile, regname)
488 write = None
489 else:
490 rdflag, read = None, None
491 wrport, write = regspec_decode_write(e, regfile, regname)
492 if regfile not in byregfiles:
493 byregfiles[regfile] = {}
494 byregfiles_spec[regfile] = {}
495 if regname not in byregfiles_spec[regfile]:
496 byregfiles_spec[regfile][regname] = \
497 (rdflag, read, write, wid, [])
498 # here we start to create "lanes"
499 if idx not in byregfiles[regfile]:
500 byregfiles[regfile][idx] = []
501 fuspec = (funame, fu, idx)
502 byregfiles[regfile][idx].append(fuspec)
503 byregfiles_spec[regfile][regname][4].append(fuspec)
504
505 # ok just print that out, for convenience
506 for regfile, spec in byregfiles.items():
507 print("regfile %s ports:" % mode, regfile)
508 fuspecs = byregfiles_spec[regfile]
509 for regname, fspec in fuspecs.items():
510 [rdflag, read, write, wid, fuspec] = fspec
511 print(" rf %s port %s lane: %s" % (mode, regfile, regname))
512 print(" %s" % regname, wid, read, write, rdflag)
513 for (funame, fu, idx) in fuspec:
514 fusig = fu.src_i[idx] if readmode else fu.dest[idx]
515 print(" ", funame, fu, idx, fusig)
516 print()
517
518 return byregfiles, byregfiles_spec
519
520 def __iter__(self):
521 yield from self.fus.ports()
522 yield from self.e.ports()
523 yield from self.l0.ports()
524 # TODO: regs
525
526 def ports(self):
527 return list(self)
528
529
530 if __name__ == '__main__':
531 pspec = TestMemPspec(ldst_ifacetype='testpi',
532 imem_ifacetype='',
533 addr_wid=48,
534 mask_wid=8,
535 reg_wid=64)
536 dut = NonProductionCore(pspec)
537 vl = rtlil.convert(dut, ports=dut.ports())
538 with open("test_core.il", "w") as f:
539 f.write(vl)